{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3228159112375806, "eval_steps": 500, "global_step": 27000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.1956144860651132e-05, "grad_norm": 8.281915664672852, "learning_rate": 0.0, "loss": 0.675, "step": 1 }, { "epoch": 2.3912289721302265e-05, "grad_norm": 2.2243731021881104, "learning_rate": 3.984063745019921e-09, "loss": 0.635, "step": 2 }, { "epoch": 3.5868434581953393e-05, "grad_norm": 6.724419593811035, "learning_rate": 7.968127490039842e-09, "loss": 0.5883, "step": 3 }, { "epoch": 4.782457944260453e-05, "grad_norm": 5.863780498504639, "learning_rate": 1.195219123505976e-08, "loss": 0.6731, "step": 4 }, { "epoch": 5.978072430325566e-05, "grad_norm": 2.964402914047241, "learning_rate": 1.5936254980079684e-08, "loss": 0.5542, "step": 5 }, { "epoch": 7.173686916390679e-05, "grad_norm": 3.582288980484009, "learning_rate": 1.9920318725099604e-08, "loss": 0.5786, "step": 6 }, { "epoch": 8.369301402455792e-05, "grad_norm": 2.719247341156006, "learning_rate": 2.390438247011952e-08, "loss": 0.6377, "step": 7 }, { "epoch": 9.564915888520906e-05, "grad_norm": 3.3153321743011475, "learning_rate": 2.7888446215139448e-08, "loss": 0.686, "step": 8 }, { "epoch": 0.00010760530374586018, "grad_norm": 7.042778491973877, "learning_rate": 3.187250996015937e-08, "loss": 0.6608, "step": 9 }, { "epoch": 0.00011956144860651132, "grad_norm": 2.6105663776397705, "learning_rate": 3.585657370517929e-08, "loss": 0.7236, "step": 10 }, { "epoch": 0.00013151759346716245, "grad_norm": 5.052253723144531, "learning_rate": 3.984063745019921e-08, "loss": 0.6445, "step": 11 }, { "epoch": 0.00014347373832781357, "grad_norm": 18.513145446777344, "learning_rate": 4.382470119521913e-08, "loss": 0.6162, "step": 12 }, { "epoch": 0.00015542988318846472, "grad_norm": 2.6987192630767822, "learning_rate": 4.780876494023904e-08, "loss": 0.6407, "step": 13 }, { "epoch": 0.00016738602804911585, "grad_norm": 3.3696155548095703, "learning_rate": 5.179282868525897e-08, "loss": 0.6488, "step": 14 }, { "epoch": 0.00017934217290976697, "grad_norm": 2.4508731365203857, "learning_rate": 5.5776892430278896e-08, "loss": 0.6309, "step": 15 }, { "epoch": 0.00019129831777041812, "grad_norm": 4.175604820251465, "learning_rate": 5.97609561752988e-08, "loss": 0.558, "step": 16 }, { "epoch": 0.00020325446263106924, "grad_norm": 3.3811845779418945, "learning_rate": 6.374501992031874e-08, "loss": 0.6986, "step": 17 }, { "epoch": 0.00021521060749172036, "grad_norm": 5.431569576263428, "learning_rate": 6.772908366533864e-08, "loss": 0.6153, "step": 18 }, { "epoch": 0.0002271667523523715, "grad_norm": 2.432086706161499, "learning_rate": 7.171314741035858e-08, "loss": 0.7078, "step": 19 }, { "epoch": 0.00023912289721302263, "grad_norm": 4.247483253479004, "learning_rate": 7.56972111553785e-08, "loss": 0.6141, "step": 20 }, { "epoch": 0.0002510790420736738, "grad_norm": 2.947932481765747, "learning_rate": 7.968127490039842e-08, "loss": 0.6494, "step": 21 }, { "epoch": 0.0002630351869343249, "grad_norm": 2.068476915359497, "learning_rate": 8.366533864541834e-08, "loss": 0.6224, "step": 22 }, { "epoch": 0.000274991331794976, "grad_norm": 2.813786029815674, "learning_rate": 8.764940239043826e-08, "loss": 0.5628, "step": 23 }, { "epoch": 0.00028694747665562715, "grad_norm": 13.295516967773438, "learning_rate": 9.163346613545816e-08, "loss": 0.634, "step": 24 }, { "epoch": 0.00029890362151627827, "grad_norm": 4.653361797332764, "learning_rate": 9.561752988047808e-08, "loss": 0.6807, "step": 25 }, { "epoch": 0.00031085976637692945, "grad_norm": 4.991990089416504, "learning_rate": 9.960159362549802e-08, "loss": 0.5869, "step": 26 }, { "epoch": 0.00032281591123758057, "grad_norm": 3.068495273590088, "learning_rate": 1.0358565737051794e-07, "loss": 0.6526, "step": 27 }, { "epoch": 0.0003347720560982317, "grad_norm": 2.2823734283447266, "learning_rate": 1.0756972111553786e-07, "loss": 0.6056, "step": 28 }, { "epoch": 0.0003467282009588828, "grad_norm": 3.735952615737915, "learning_rate": 1.1155378486055779e-07, "loss": 0.6141, "step": 29 }, { "epoch": 0.00035868434581953393, "grad_norm": 2.1052253246307373, "learning_rate": 1.155378486055777e-07, "loss": 0.5309, "step": 30 }, { "epoch": 0.00037064049068018506, "grad_norm": 3.0795469284057617, "learning_rate": 1.195219123505976e-07, "loss": 0.7348, "step": 31 }, { "epoch": 0.00038259663554083623, "grad_norm": 5.698285102844238, "learning_rate": 1.2350597609561754e-07, "loss": 0.5882, "step": 32 }, { "epoch": 0.00039455278040148736, "grad_norm": 4.846653461456299, "learning_rate": 1.2749003984063747e-07, "loss": 0.6022, "step": 33 }, { "epoch": 0.0004065089252621385, "grad_norm": 2.8326926231384277, "learning_rate": 1.3147410358565738e-07, "loss": 0.5905, "step": 34 }, { "epoch": 0.0004184650701227896, "grad_norm": 3.668052911758423, "learning_rate": 1.3545816733067729e-07, "loss": 0.6539, "step": 35 }, { "epoch": 0.0004304212149834407, "grad_norm": 28.48043441772461, "learning_rate": 1.3944223107569722e-07, "loss": 0.6576, "step": 36 }, { "epoch": 0.00044237735984409184, "grad_norm": 2.002107620239258, "learning_rate": 1.4342629482071715e-07, "loss": 0.6504, "step": 37 }, { "epoch": 0.000454333504704743, "grad_norm": 1.765988826751709, "learning_rate": 1.4741035856573706e-07, "loss": 0.6015, "step": 38 }, { "epoch": 0.00046628964956539414, "grad_norm": 2.3654470443725586, "learning_rate": 1.51394422310757e-07, "loss": 0.6053, "step": 39 }, { "epoch": 0.00047824579442604526, "grad_norm": 2.4217445850372314, "learning_rate": 1.553784860557769e-07, "loss": 0.5702, "step": 40 }, { "epoch": 0.0004902019392866964, "grad_norm": 4.94151496887207, "learning_rate": 1.5936254980079683e-07, "loss": 0.6917, "step": 41 }, { "epoch": 0.0005021580841473476, "grad_norm": 2.1674466133117676, "learning_rate": 1.6334661354581674e-07, "loss": 0.6341, "step": 42 }, { "epoch": 0.0005141142290079986, "grad_norm": 2.8387680053710938, "learning_rate": 1.6733067729083667e-07, "loss": 0.6317, "step": 43 }, { "epoch": 0.0005260703738686498, "grad_norm": 5.974135398864746, "learning_rate": 1.713147410358566e-07, "loss": 0.6489, "step": 44 }, { "epoch": 0.0005380265187293009, "grad_norm": 2.540522813796997, "learning_rate": 1.7529880478087651e-07, "loss": 0.631, "step": 45 }, { "epoch": 0.000549982663589952, "grad_norm": 2.489535093307495, "learning_rate": 1.7928286852589645e-07, "loss": 0.6815, "step": 46 }, { "epoch": 0.0005619388084506032, "grad_norm": 2.5850136280059814, "learning_rate": 1.8326693227091633e-07, "loss": 0.6889, "step": 47 }, { "epoch": 0.0005738949533112543, "grad_norm": 2.816149950027466, "learning_rate": 1.8725099601593626e-07, "loss": 0.5418, "step": 48 }, { "epoch": 0.0005858510981719055, "grad_norm": 2.1104421615600586, "learning_rate": 1.9123505976095617e-07, "loss": 0.6071, "step": 49 }, { "epoch": 0.0005978072430325565, "grad_norm": 3.203007698059082, "learning_rate": 1.952191235059761e-07, "loss": 0.6749, "step": 50 }, { "epoch": 0.0006097633878932077, "grad_norm": 5.8410773277282715, "learning_rate": 1.9920318725099604e-07, "loss": 0.567, "step": 51 }, { "epoch": 0.0006217195327538589, "grad_norm": 3.0833675861358643, "learning_rate": 2.0318725099601594e-07, "loss": 0.5801, "step": 52 }, { "epoch": 0.00063367567761451, "grad_norm": 3.341923952102661, "learning_rate": 2.0717131474103588e-07, "loss": 0.6287, "step": 53 }, { "epoch": 0.0006456318224751611, "grad_norm": 2.5386393070220947, "learning_rate": 2.111553784860558e-07, "loss": 0.7094, "step": 54 }, { "epoch": 0.0006575879673358122, "grad_norm": 3.458578586578369, "learning_rate": 2.1513944223107572e-07, "loss": 0.6616, "step": 55 }, { "epoch": 0.0006695441121964634, "grad_norm": 2.555715322494507, "learning_rate": 2.1912350597609565e-07, "loss": 0.6394, "step": 56 }, { "epoch": 0.0006815002570571146, "grad_norm": 4.063467502593994, "learning_rate": 2.2310756972111558e-07, "loss": 0.621, "step": 57 }, { "epoch": 0.0006934564019177656, "grad_norm": 3.2541048526763916, "learning_rate": 2.2709163346613546e-07, "loss": 0.5341, "step": 58 }, { "epoch": 0.0007054125467784168, "grad_norm": 2.791187047958374, "learning_rate": 2.310756972111554e-07, "loss": 0.7512, "step": 59 }, { "epoch": 0.0007173686916390679, "grad_norm": 5.930652618408203, "learning_rate": 2.350597609561753e-07, "loss": 0.6493, "step": 60 }, { "epoch": 0.000729324836499719, "grad_norm": 2.409254789352417, "learning_rate": 2.390438247011952e-07, "loss": 0.6319, "step": 61 }, { "epoch": 0.0007412809813603701, "grad_norm": 3.2057526111602783, "learning_rate": 2.4302788844621514e-07, "loss": 0.6597, "step": 62 }, { "epoch": 0.0007532371262210213, "grad_norm": 2.1131157875061035, "learning_rate": 2.470119521912351e-07, "loss": 0.6898, "step": 63 }, { "epoch": 0.0007651932710816725, "grad_norm": 5.9026594161987305, "learning_rate": 2.50996015936255e-07, "loss": 0.6981, "step": 64 }, { "epoch": 0.0007771494159423235, "grad_norm": 5.33059549331665, "learning_rate": 2.5498007968127495e-07, "loss": 0.6403, "step": 65 }, { "epoch": 0.0007891055608029747, "grad_norm": 5.041219711303711, "learning_rate": 2.589641434262949e-07, "loss": 0.624, "step": 66 }, { "epoch": 0.0008010617056636258, "grad_norm": 2.339207410812378, "learning_rate": 2.6294820717131476e-07, "loss": 0.6386, "step": 67 }, { "epoch": 0.000813017850524277, "grad_norm": 2.333449125289917, "learning_rate": 2.669322709163347e-07, "loss": 0.6535, "step": 68 }, { "epoch": 0.0008249739953849281, "grad_norm": 2.4978506565093994, "learning_rate": 2.7091633466135457e-07, "loss": 0.5822, "step": 69 }, { "epoch": 0.0008369301402455792, "grad_norm": 2.885756015777588, "learning_rate": 2.749003984063745e-07, "loss": 0.6036, "step": 70 }, { "epoch": 0.0008488862851062304, "grad_norm": 24.555252075195312, "learning_rate": 2.7888446215139444e-07, "loss": 0.62, "step": 71 }, { "epoch": 0.0008608424299668814, "grad_norm": 2.458693265914917, "learning_rate": 2.8286852589641437e-07, "loss": 0.5451, "step": 72 }, { "epoch": 0.0008727985748275326, "grad_norm": 2.1554818153381348, "learning_rate": 2.868525896414343e-07, "loss": 0.5772, "step": 73 }, { "epoch": 0.0008847547196881837, "grad_norm": 1.847727656364441, "learning_rate": 2.908366533864542e-07, "loss": 0.6239, "step": 74 }, { "epoch": 0.0008967108645488349, "grad_norm": 5.327968597412109, "learning_rate": 2.948207171314741e-07, "loss": 0.5534, "step": 75 }, { "epoch": 0.000908667009409486, "grad_norm": 3.298921823501587, "learning_rate": 2.9880478087649405e-07, "loss": 0.705, "step": 76 }, { "epoch": 0.0009206231542701371, "grad_norm": 3.1388344764709473, "learning_rate": 3.02788844621514e-07, "loss": 0.6858, "step": 77 }, { "epoch": 0.0009325792991307883, "grad_norm": 3.1586194038391113, "learning_rate": 3.0677290836653387e-07, "loss": 0.5763, "step": 78 }, { "epoch": 0.0009445354439914394, "grad_norm": 11.576796531677246, "learning_rate": 3.107569721115538e-07, "loss": 0.63, "step": 79 }, { "epoch": 0.0009564915888520905, "grad_norm": 4.144306182861328, "learning_rate": 3.1474103585657373e-07, "loss": 0.5662, "step": 80 }, { "epoch": 0.0009684477337127417, "grad_norm": 2.475597381591797, "learning_rate": 3.1872509960159367e-07, "loss": 0.6159, "step": 81 }, { "epoch": 0.0009804038785733928, "grad_norm": 6.0133233070373535, "learning_rate": 3.227091633466136e-07, "loss": 0.6502, "step": 82 }, { "epoch": 0.0009923600234340438, "grad_norm": 3.3210318088531494, "learning_rate": 3.266932270916335e-07, "loss": 0.5855, "step": 83 }, { "epoch": 0.0010043161682946951, "grad_norm": 4.446418285369873, "learning_rate": 3.3067729083665336e-07, "loss": 0.5684, "step": 84 }, { "epoch": 0.0010162723131553462, "grad_norm": 2.204071283340454, "learning_rate": 3.3466135458167335e-07, "loss": 0.5827, "step": 85 }, { "epoch": 0.0010282284580159973, "grad_norm": 3.8412275314331055, "learning_rate": 3.3864541832669323e-07, "loss": 0.6488, "step": 86 }, { "epoch": 0.0010401846028766485, "grad_norm": 3.9793343544006348, "learning_rate": 3.426294820717132e-07, "loss": 0.6254, "step": 87 }, { "epoch": 0.0010521407477372996, "grad_norm": 2.3755710124969482, "learning_rate": 3.466135458167331e-07, "loss": 0.6428, "step": 88 }, { "epoch": 0.0010640968925979507, "grad_norm": 2.402578830718994, "learning_rate": 3.5059760956175303e-07, "loss": 0.6226, "step": 89 }, { "epoch": 0.0010760530374586018, "grad_norm": 6.0913405418396, "learning_rate": 3.545816733067729e-07, "loss": 0.6009, "step": 90 }, { "epoch": 0.001088009182319253, "grad_norm": 19.299057006835938, "learning_rate": 3.585657370517929e-07, "loss": 0.5903, "step": 91 }, { "epoch": 0.001099965327179904, "grad_norm": 4.0869669914245605, "learning_rate": 3.625498007968128e-07, "loss": 0.6386, "step": 92 }, { "epoch": 0.0011119214720405552, "grad_norm": 2.3265788555145264, "learning_rate": 3.6653386454183266e-07, "loss": 0.5269, "step": 93 }, { "epoch": 0.0011238776169012065, "grad_norm": 11.968341827392578, "learning_rate": 3.7051792828685264e-07, "loss": 0.6705, "step": 94 }, { "epoch": 0.0011358337617618575, "grad_norm": 2.2205970287323, "learning_rate": 3.745019920318725e-07, "loss": 0.674, "step": 95 }, { "epoch": 0.0011477899066225086, "grad_norm": 2.481182813644409, "learning_rate": 3.7848605577689246e-07, "loss": 0.6599, "step": 96 }, { "epoch": 0.0011597460514831599, "grad_norm": 2.4688825607299805, "learning_rate": 3.8247011952191234e-07, "loss": 0.6395, "step": 97 }, { "epoch": 0.001171702196343811, "grad_norm": 2.757944345474243, "learning_rate": 3.864541832669323e-07, "loss": 0.6308, "step": 98 }, { "epoch": 0.001183658341204462, "grad_norm": 12.570455551147461, "learning_rate": 3.904382470119522e-07, "loss": 0.6784, "step": 99 }, { "epoch": 0.001195614486065113, "grad_norm": 3.1075189113616943, "learning_rate": 3.944223107569722e-07, "loss": 0.6774, "step": 100 }, { "epoch": 0.0012075706309257644, "grad_norm": 1.8855059146881104, "learning_rate": 3.9840637450199207e-07, "loss": 0.6356, "step": 101 }, { "epoch": 0.0012195267757864154, "grad_norm": 3.7021212577819824, "learning_rate": 4.02390438247012e-07, "loss": 0.7749, "step": 102 }, { "epoch": 0.0012314829206470665, "grad_norm": 3.260483741760254, "learning_rate": 4.063745019920319e-07, "loss": 0.5795, "step": 103 }, { "epoch": 0.0012434390655077178, "grad_norm": 2.645350456237793, "learning_rate": 4.103585657370518e-07, "loss": 0.6467, "step": 104 }, { "epoch": 0.0012553952103683689, "grad_norm": 3.132094144821167, "learning_rate": 4.1434262948207175e-07, "loss": 0.6186, "step": 105 }, { "epoch": 0.00126735135522902, "grad_norm": 2.811375379562378, "learning_rate": 4.1832669322709163e-07, "loss": 0.5564, "step": 106 }, { "epoch": 0.001279307500089671, "grad_norm": 3.2294650077819824, "learning_rate": 4.223107569721116e-07, "loss": 0.5417, "step": 107 }, { "epoch": 0.0012912636449503223, "grad_norm": 2.551548719406128, "learning_rate": 4.262948207171315e-07, "loss": 0.7048, "step": 108 }, { "epoch": 0.0013032197898109733, "grad_norm": 2.898554563522339, "learning_rate": 4.3027888446215143e-07, "loss": 0.5859, "step": 109 }, { "epoch": 0.0013151759346716244, "grad_norm": 12.302570343017578, "learning_rate": 4.342629482071713e-07, "loss": 0.5206, "step": 110 }, { "epoch": 0.0013271320795322757, "grad_norm": 2.2563657760620117, "learning_rate": 4.382470119521913e-07, "loss": 0.6273, "step": 111 }, { "epoch": 0.0013390882243929268, "grad_norm": 2.8448922634124756, "learning_rate": 4.422310756972112e-07, "loss": 0.5943, "step": 112 }, { "epoch": 0.0013510443692535778, "grad_norm": 3.6511847972869873, "learning_rate": 4.4621513944223117e-07, "loss": 0.6005, "step": 113 }, { "epoch": 0.0013630005141142291, "grad_norm": 5.221853733062744, "learning_rate": 4.5019920318725105e-07, "loss": 0.6212, "step": 114 }, { "epoch": 0.0013749566589748802, "grad_norm": 3.313739538192749, "learning_rate": 4.5418326693227093e-07, "loss": 0.5089, "step": 115 }, { "epoch": 0.0013869128038355313, "grad_norm": 3.534194231033325, "learning_rate": 4.5816733067729086e-07, "loss": 0.6014, "step": 116 }, { "epoch": 0.0013988689486961823, "grad_norm": 3.61918568611145, "learning_rate": 4.621513944223108e-07, "loss": 0.5483, "step": 117 }, { "epoch": 0.0014108250935568336, "grad_norm": 9.90185260772705, "learning_rate": 4.6613545816733073e-07, "loss": 0.5919, "step": 118 }, { "epoch": 0.0014227812384174847, "grad_norm": 2.6455295085906982, "learning_rate": 4.701195219123506e-07, "loss": 0.6023, "step": 119 }, { "epoch": 0.0014347373832781357, "grad_norm": 2.587740898132324, "learning_rate": 4.741035856573706e-07, "loss": 0.605, "step": 120 }, { "epoch": 0.001446693528138787, "grad_norm": 18.75286102294922, "learning_rate": 4.780876494023904e-07, "loss": 0.6233, "step": 121 }, { "epoch": 0.001458649672999438, "grad_norm": 3.8767635822296143, "learning_rate": 4.820717131474104e-07, "loss": 0.6534, "step": 122 }, { "epoch": 0.0014706058178600892, "grad_norm": 5.993529796600342, "learning_rate": 4.860557768924303e-07, "loss": 0.5645, "step": 123 }, { "epoch": 0.0014825619627207402, "grad_norm": 4.472458839416504, "learning_rate": 4.900398406374502e-07, "loss": 0.6017, "step": 124 }, { "epoch": 0.0014945181075813915, "grad_norm": 3.7625298500061035, "learning_rate": 4.940239043824702e-07, "loss": 0.6507, "step": 125 }, { "epoch": 0.0015064742524420426, "grad_norm": 2.1450583934783936, "learning_rate": 4.9800796812749e-07, "loss": 0.5922, "step": 126 }, { "epoch": 0.0015184303973026936, "grad_norm": 2.0735843181610107, "learning_rate": 5.0199203187251e-07, "loss": 0.7024, "step": 127 }, { "epoch": 0.001530386542163345, "grad_norm": 3.65265154838562, "learning_rate": 5.059760956175299e-07, "loss": 0.6337, "step": 128 }, { "epoch": 0.001542342687023996, "grad_norm": 3.7759337425231934, "learning_rate": 5.099601593625499e-07, "loss": 0.5915, "step": 129 }, { "epoch": 0.001554298831884647, "grad_norm": 5.956639289855957, "learning_rate": 5.139442231075698e-07, "loss": 0.618, "step": 130 }, { "epoch": 0.0015662549767452984, "grad_norm": 2.5389373302459717, "learning_rate": 5.179282868525898e-07, "loss": 0.57, "step": 131 }, { "epoch": 0.0015782111216059494, "grad_norm": 3.3699686527252197, "learning_rate": 5.219123505976096e-07, "loss": 0.5968, "step": 132 }, { "epoch": 0.0015901672664666005, "grad_norm": 1.8785090446472168, "learning_rate": 5.258964143426295e-07, "loss": 0.6161, "step": 133 }, { "epoch": 0.0016021234113272516, "grad_norm": 2.1050639152526855, "learning_rate": 5.298804780876494e-07, "loss": 0.5726, "step": 134 }, { "epoch": 0.0016140795561879028, "grad_norm": 4.957662582397461, "learning_rate": 5.338645418326694e-07, "loss": 0.5894, "step": 135 }, { "epoch": 0.001626035701048554, "grad_norm": 2.7053534984588623, "learning_rate": 5.378486055776893e-07, "loss": 0.6048, "step": 136 }, { "epoch": 0.001637991845909205, "grad_norm": 3.7089061737060547, "learning_rate": 5.418326693227091e-07, "loss": 0.5797, "step": 137 }, { "epoch": 0.0016499479907698563, "grad_norm": 2.7219386100769043, "learning_rate": 5.458167330677291e-07, "loss": 0.6484, "step": 138 }, { "epoch": 0.0016619041356305073, "grad_norm": 5.883847713470459, "learning_rate": 5.49800796812749e-07, "loss": 0.6251, "step": 139 }, { "epoch": 0.0016738602804911584, "grad_norm": 2.5216808319091797, "learning_rate": 5.53784860557769e-07, "loss": 0.6749, "step": 140 }, { "epoch": 0.0016858164253518095, "grad_norm": 9.71713924407959, "learning_rate": 5.577689243027889e-07, "loss": 0.6734, "step": 141 }, { "epoch": 0.0016977725702124608, "grad_norm": 2.0484812259674072, "learning_rate": 5.617529880478089e-07, "loss": 0.5905, "step": 142 }, { "epoch": 0.0017097287150731118, "grad_norm": 5.912993907928467, "learning_rate": 5.657370517928287e-07, "loss": 0.6491, "step": 143 }, { "epoch": 0.0017216848599337629, "grad_norm": 2.6053106784820557, "learning_rate": 5.697211155378486e-07, "loss": 0.5955, "step": 144 }, { "epoch": 0.0017336410047944142, "grad_norm": 15.644975662231445, "learning_rate": 5.737051792828686e-07, "loss": 0.5407, "step": 145 }, { "epoch": 0.0017455971496550652, "grad_norm": 3.377533197402954, "learning_rate": 5.776892430278885e-07, "loss": 0.5543, "step": 146 }, { "epoch": 0.0017575532945157163, "grad_norm": 3.7012298107147217, "learning_rate": 5.816733067729084e-07, "loss": 0.5697, "step": 147 }, { "epoch": 0.0017695094393763674, "grad_norm": 5.679858207702637, "learning_rate": 5.856573705179284e-07, "loss": 0.6362, "step": 148 }, { "epoch": 0.0017814655842370187, "grad_norm": 14.685141563415527, "learning_rate": 5.896414342629482e-07, "loss": 0.6023, "step": 149 }, { "epoch": 0.0017934217290976697, "grad_norm": 7.547567367553711, "learning_rate": 5.936254980079681e-07, "loss": 0.5409, "step": 150 }, { "epoch": 0.0018053778739583208, "grad_norm": 1.7791686058044434, "learning_rate": 5.976095617529881e-07, "loss": 0.5716, "step": 151 }, { "epoch": 0.001817334018818972, "grad_norm": 4.223321437835693, "learning_rate": 6.01593625498008e-07, "loss": 0.5656, "step": 152 }, { "epoch": 0.0018292901636796231, "grad_norm": 3.9956140518188477, "learning_rate": 6.05577689243028e-07, "loss": 0.6342, "step": 153 }, { "epoch": 0.0018412463085402742, "grad_norm": 2.0861587524414062, "learning_rate": 6.095617529880479e-07, "loss": 0.5484, "step": 154 }, { "epoch": 0.0018532024534009255, "grad_norm": 3.024629831314087, "learning_rate": 6.135458167330677e-07, "loss": 0.6379, "step": 155 }, { "epoch": 0.0018651585982615766, "grad_norm": 10.684629440307617, "learning_rate": 6.175298804780877e-07, "loss": 0.5774, "step": 156 }, { "epoch": 0.0018771147431222276, "grad_norm": 2.0529961585998535, "learning_rate": 6.215139442231076e-07, "loss": 0.5654, "step": 157 }, { "epoch": 0.0018890708879828787, "grad_norm": 2.205930471420288, "learning_rate": 6.254980079681275e-07, "loss": 0.6205, "step": 158 }, { "epoch": 0.00190102703284353, "grad_norm": 6.7924909591674805, "learning_rate": 6.294820717131475e-07, "loss": 0.6997, "step": 159 }, { "epoch": 0.001912983177704181, "grad_norm": 2.4288418292999268, "learning_rate": 6.334661354581674e-07, "loss": 0.6919, "step": 160 }, { "epoch": 0.0019249393225648321, "grad_norm": 3.5543711185455322, "learning_rate": 6.374501992031873e-07, "loss": 0.6819, "step": 161 }, { "epoch": 0.0019368954674254834, "grad_norm": 4.624143600463867, "learning_rate": 6.414342629482072e-07, "loss": 0.5893, "step": 162 }, { "epoch": 0.0019488516122861345, "grad_norm": 2.107473134994507, "learning_rate": 6.454183266932272e-07, "loss": 0.5786, "step": 163 }, { "epoch": 0.0019608077571467855, "grad_norm": 10.744111061096191, "learning_rate": 6.494023904382471e-07, "loss": 0.5633, "step": 164 }, { "epoch": 0.001972763902007437, "grad_norm": 2.0992021560668945, "learning_rate": 6.53386454183267e-07, "loss": 0.7155, "step": 165 }, { "epoch": 0.0019847200468680877, "grad_norm": 3.5176243782043457, "learning_rate": 6.573705179282868e-07, "loss": 0.7091, "step": 166 }, { "epoch": 0.001996676191728739, "grad_norm": 3.19327712059021, "learning_rate": 6.613545816733067e-07, "loss": 0.6568, "step": 167 }, { "epoch": 0.0020086323365893903, "grad_norm": 3.552905321121216, "learning_rate": 6.653386454183268e-07, "loss": 0.6886, "step": 168 }, { "epoch": 0.002020588481450041, "grad_norm": 2.4160890579223633, "learning_rate": 6.693227091633467e-07, "loss": 0.5374, "step": 169 }, { "epoch": 0.0020325446263106924, "grad_norm": 2.7349393367767334, "learning_rate": 6.733067729083666e-07, "loss": 0.6732, "step": 170 }, { "epoch": 0.0020445007711713437, "grad_norm": 5.870787143707275, "learning_rate": 6.772908366533865e-07, "loss": 0.701, "step": 171 }, { "epoch": 0.0020564569160319945, "grad_norm": 18.413467407226562, "learning_rate": 6.812749003984064e-07, "loss": 0.5687, "step": 172 }, { "epoch": 0.002068413060892646, "grad_norm": 3.7078421115875244, "learning_rate": 6.852589641434264e-07, "loss": 0.6042, "step": 173 }, { "epoch": 0.002080369205753297, "grad_norm": 3.8753371238708496, "learning_rate": 6.892430278884463e-07, "loss": 0.6659, "step": 174 }, { "epoch": 0.002092325350613948, "grad_norm": 2.9242584705352783, "learning_rate": 6.932270916334662e-07, "loss": 0.6097, "step": 175 }, { "epoch": 0.0021042814954745992, "grad_norm": 2.385404109954834, "learning_rate": 6.972111553784861e-07, "loss": 0.6751, "step": 176 }, { "epoch": 0.0021162376403352505, "grad_norm": 5.514831066131592, "learning_rate": 7.011952191235061e-07, "loss": 0.5772, "step": 177 }, { "epoch": 0.0021281937851959014, "grad_norm": 2.3125202655792236, "learning_rate": 7.051792828685259e-07, "loss": 0.5918, "step": 178 }, { "epoch": 0.0021401499300565527, "grad_norm": 4.539021968841553, "learning_rate": 7.091633466135458e-07, "loss": 0.6173, "step": 179 }, { "epoch": 0.0021521060749172035, "grad_norm": 3.3549814224243164, "learning_rate": 7.131474103585657e-07, "loss": 0.6208, "step": 180 }, { "epoch": 0.002164062219777855, "grad_norm": 10.03004264831543, "learning_rate": 7.171314741035858e-07, "loss": 0.6403, "step": 181 }, { "epoch": 0.002176018364638506, "grad_norm": 11.373364448547363, "learning_rate": 7.211155378486057e-07, "loss": 0.6255, "step": 182 }, { "epoch": 0.002187974509499157, "grad_norm": 4.839202404022217, "learning_rate": 7.250996015936256e-07, "loss": 0.5819, "step": 183 }, { "epoch": 0.002199930654359808, "grad_norm": 3.3126251697540283, "learning_rate": 7.290836653386454e-07, "loss": 0.7248, "step": 184 }, { "epoch": 0.0022118867992204595, "grad_norm": 7.53239631652832, "learning_rate": 7.330677290836653e-07, "loss": 0.6224, "step": 185 }, { "epoch": 0.0022238429440811103, "grad_norm": 3.0866527557373047, "learning_rate": 7.370517928286854e-07, "loss": 0.6588, "step": 186 }, { "epoch": 0.0022357990889417616, "grad_norm": 2.5056068897247314, "learning_rate": 7.410358565737053e-07, "loss": 0.5879, "step": 187 }, { "epoch": 0.002247755233802413, "grad_norm": 3.4184930324554443, "learning_rate": 7.450199203187252e-07, "loss": 0.5915, "step": 188 }, { "epoch": 0.0022597113786630638, "grad_norm": 2.1045708656311035, "learning_rate": 7.49003984063745e-07, "loss": 0.601, "step": 189 }, { "epoch": 0.002271667523523715, "grad_norm": 2.570817708969116, "learning_rate": 7.52988047808765e-07, "loss": 0.5171, "step": 190 }, { "epoch": 0.0022836236683843663, "grad_norm": 3.1002750396728516, "learning_rate": 7.569721115537849e-07, "loss": 0.6599, "step": 191 }, { "epoch": 0.002295579813245017, "grad_norm": 2.5435335636138916, "learning_rate": 7.609561752988048e-07, "loss": 0.6003, "step": 192 }, { "epoch": 0.0023075359581056685, "grad_norm": 2.346930742263794, "learning_rate": 7.649402390438247e-07, "loss": 0.6515, "step": 193 }, { "epoch": 0.0023194921029663198, "grad_norm": 3.608126401901245, "learning_rate": 7.689243027888448e-07, "loss": 0.5754, "step": 194 }, { "epoch": 0.0023314482478269706, "grad_norm": 2.3651926517486572, "learning_rate": 7.729083665338646e-07, "loss": 0.5174, "step": 195 }, { "epoch": 0.002343404392687622, "grad_norm": 3.0979831218719482, "learning_rate": 7.768924302788845e-07, "loss": 0.6439, "step": 196 }, { "epoch": 0.0023553605375482727, "grad_norm": 3.32070255279541, "learning_rate": 7.808764940239044e-07, "loss": 0.6154, "step": 197 }, { "epoch": 0.002367316682408924, "grad_norm": 4.118679046630859, "learning_rate": 7.848605577689243e-07, "loss": 0.5666, "step": 198 }, { "epoch": 0.0023792728272695753, "grad_norm": 3.542609453201294, "learning_rate": 7.888446215139444e-07, "loss": 0.6214, "step": 199 }, { "epoch": 0.002391228972130226, "grad_norm": 2.240926742553711, "learning_rate": 7.928286852589643e-07, "loss": 0.6505, "step": 200 }, { "epoch": 0.0024031851169908774, "grad_norm": 2.1999599933624268, "learning_rate": 7.968127490039841e-07, "loss": 0.5053, "step": 201 }, { "epoch": 0.0024151412618515287, "grad_norm": 2.6936540603637695, "learning_rate": 8.00796812749004e-07, "loss": 0.616, "step": 202 }, { "epoch": 0.0024270974067121796, "grad_norm": 7.226795196533203, "learning_rate": 8.04780876494024e-07, "loss": 0.6434, "step": 203 }, { "epoch": 0.002439053551572831, "grad_norm": 2.4717679023742676, "learning_rate": 8.087649402390439e-07, "loss": 0.6553, "step": 204 }, { "epoch": 0.002451009696433482, "grad_norm": 12.725118637084961, "learning_rate": 8.127490039840638e-07, "loss": 0.5383, "step": 205 }, { "epoch": 0.002462965841294133, "grad_norm": 5.007820129394531, "learning_rate": 8.167330677290837e-07, "loss": 0.5385, "step": 206 }, { "epoch": 0.0024749219861547843, "grad_norm": 2.5047085285186768, "learning_rate": 8.207171314741036e-07, "loss": 0.6514, "step": 207 }, { "epoch": 0.0024868781310154356, "grad_norm": 1.8447202444076538, "learning_rate": 8.247011952191236e-07, "loss": 0.6658, "step": 208 }, { "epoch": 0.0024988342758760864, "grad_norm": 2.7734522819519043, "learning_rate": 8.286852589641435e-07, "loss": 0.6255, "step": 209 }, { "epoch": 0.0025107904207367377, "grad_norm": 52.264774322509766, "learning_rate": 8.326693227091634e-07, "loss": 0.6116, "step": 210 }, { "epoch": 0.002522746565597389, "grad_norm": 4.085443019866943, "learning_rate": 8.366533864541833e-07, "loss": 0.6235, "step": 211 }, { "epoch": 0.00253470271045804, "grad_norm": 2.0360307693481445, "learning_rate": 8.406374501992034e-07, "loss": 0.5311, "step": 212 }, { "epoch": 0.002546658855318691, "grad_norm": 3.649291515350342, "learning_rate": 8.446215139442232e-07, "loss": 0.6523, "step": 213 }, { "epoch": 0.002558615000179342, "grad_norm": 2.567530393600464, "learning_rate": 8.486055776892431e-07, "loss": 0.7452, "step": 214 }, { "epoch": 0.0025705711450399933, "grad_norm": 2.1386313438415527, "learning_rate": 8.52589641434263e-07, "loss": 0.6536, "step": 215 }, { "epoch": 0.0025825272899006445, "grad_norm": 2.1159138679504395, "learning_rate": 8.565737051792829e-07, "loss": 0.5599, "step": 216 }, { "epoch": 0.0025944834347612954, "grad_norm": 3.944129705429077, "learning_rate": 8.605577689243029e-07, "loss": 0.5884, "step": 217 }, { "epoch": 0.0026064395796219467, "grad_norm": 3.4009296894073486, "learning_rate": 8.645418326693227e-07, "loss": 0.6237, "step": 218 }, { "epoch": 0.002618395724482598, "grad_norm": 6.415689468383789, "learning_rate": 8.685258964143426e-07, "loss": 0.5343, "step": 219 }, { "epoch": 0.002630351869343249, "grad_norm": 2.9880189895629883, "learning_rate": 8.725099601593626e-07, "loss": 0.607, "step": 220 }, { "epoch": 0.0026423080142039, "grad_norm": 2.950915813446045, "learning_rate": 8.764940239043826e-07, "loss": 0.6779, "step": 221 }, { "epoch": 0.0026542641590645514, "grad_norm": 3.10306978225708, "learning_rate": 8.804780876494025e-07, "loss": 0.5861, "step": 222 }, { "epoch": 0.0026662203039252022, "grad_norm": 2.713804244995117, "learning_rate": 8.844621513944224e-07, "loss": 0.6888, "step": 223 }, { "epoch": 0.0026781764487858535, "grad_norm": 4.404238700866699, "learning_rate": 8.884462151394422e-07, "loss": 0.6753, "step": 224 }, { "epoch": 0.002690132593646505, "grad_norm": 3.1779658794403076, "learning_rate": 8.924302788844623e-07, "loss": 0.624, "step": 225 }, { "epoch": 0.0027020887385071557, "grad_norm": 4.623492240905762, "learning_rate": 8.964143426294822e-07, "loss": 0.571, "step": 226 }, { "epoch": 0.002714044883367807, "grad_norm": 3.2106754779815674, "learning_rate": 9.003984063745021e-07, "loss": 0.5344, "step": 227 }, { "epoch": 0.0027260010282284582, "grad_norm": 2.601588249206543, "learning_rate": 9.04382470119522e-07, "loss": 0.5896, "step": 228 }, { "epoch": 0.002737957173089109, "grad_norm": 1.9262821674346924, "learning_rate": 9.083665338645419e-07, "loss": 0.598, "step": 229 }, { "epoch": 0.0027499133179497604, "grad_norm": 2.28753662109375, "learning_rate": 9.123505976095618e-07, "loss": 0.6253, "step": 230 }, { "epoch": 0.0027618694628104112, "grad_norm": 2.403655529022217, "learning_rate": 9.163346613545817e-07, "loss": 0.667, "step": 231 }, { "epoch": 0.0027738256076710625, "grad_norm": 2.5908617973327637, "learning_rate": 9.203187250996016e-07, "loss": 0.5764, "step": 232 }, { "epoch": 0.002785781752531714, "grad_norm": 3.066962480545044, "learning_rate": 9.243027888446216e-07, "loss": 0.6096, "step": 233 }, { "epoch": 0.0027977378973923646, "grad_norm": 3.3061864376068115, "learning_rate": 9.282868525896416e-07, "loss": 0.6435, "step": 234 }, { "epoch": 0.002809694042253016, "grad_norm": 2.818026542663574, "learning_rate": 9.322709163346615e-07, "loss": 0.6313, "step": 235 }, { "epoch": 0.002821650187113667, "grad_norm": 2.176065444946289, "learning_rate": 9.362549800796813e-07, "loss": 0.5997, "step": 236 }, { "epoch": 0.002833606331974318, "grad_norm": 2.7497730255126953, "learning_rate": 9.402390438247012e-07, "loss": 0.5954, "step": 237 }, { "epoch": 0.0028455624768349693, "grad_norm": 2.7300431728363037, "learning_rate": 9.442231075697211e-07, "loss": 0.5087, "step": 238 }, { "epoch": 0.0028575186216956206, "grad_norm": 4.16013765335083, "learning_rate": 9.482071713147412e-07, "loss": 0.6495, "step": 239 }, { "epoch": 0.0028694747665562715, "grad_norm": 2.1266028881073, "learning_rate": 9.521912350597611e-07, "loss": 0.5966, "step": 240 }, { "epoch": 0.0028814309114169228, "grad_norm": 2.1658823490142822, "learning_rate": 9.561752988047808e-07, "loss": 0.6892, "step": 241 }, { "epoch": 0.002893387056277574, "grad_norm": 5.249494552612305, "learning_rate": 9.601593625498007e-07, "loss": 0.5293, "step": 242 }, { "epoch": 0.002905343201138225, "grad_norm": 2.064868927001953, "learning_rate": 9.641434262948208e-07, "loss": 0.5365, "step": 243 }, { "epoch": 0.002917299345998876, "grad_norm": 5.08155632019043, "learning_rate": 9.681274900398407e-07, "loss": 0.5832, "step": 244 }, { "epoch": 0.0029292554908595275, "grad_norm": 5.900619029998779, "learning_rate": 9.721115537848606e-07, "loss": 0.6991, "step": 245 }, { "epoch": 0.0029412116357201783, "grad_norm": 6.338310241699219, "learning_rate": 9.760956175298805e-07, "loss": 0.5447, "step": 246 }, { "epoch": 0.0029531677805808296, "grad_norm": 3.5351412296295166, "learning_rate": 9.800796812749003e-07, "loss": 0.6804, "step": 247 }, { "epoch": 0.0029651239254414805, "grad_norm": 2.095778226852417, "learning_rate": 9.840637450199204e-07, "loss": 0.6355, "step": 248 }, { "epoch": 0.0029770800703021317, "grad_norm": 6.26090669631958, "learning_rate": 9.880478087649403e-07, "loss": 0.6646, "step": 249 }, { "epoch": 0.002989036215162783, "grad_norm": 2.253544330596924, "learning_rate": 9.920318725099602e-07, "loss": 0.6839, "step": 250 }, { "epoch": 0.003000992360023434, "grad_norm": 5.659281253814697, "learning_rate": 9.9601593625498e-07, "loss": 0.6858, "step": 251 }, { "epoch": 0.003012948504884085, "grad_norm": 7.331029415130615, "learning_rate": 1.0000000000000002e-06, "loss": 0.6697, "step": 252 }, { "epoch": 0.0030249046497447364, "grad_norm": 3.0956175327301025, "learning_rate": 1.00398406374502e-06, "loss": 0.5343, "step": 253 }, { "epoch": 0.0030368607946053873, "grad_norm": 2.1722252368927, "learning_rate": 1.00796812749004e-06, "loss": 0.6358, "step": 254 }, { "epoch": 0.0030488169394660386, "grad_norm": 12.982783317565918, "learning_rate": 1.0119521912350598e-06, "loss": 0.5051, "step": 255 }, { "epoch": 0.00306077308432669, "grad_norm": 5.526536464691162, "learning_rate": 1.01593625498008e-06, "loss": 0.7766, "step": 256 }, { "epoch": 0.0030727292291873407, "grad_norm": 3.0079619884490967, "learning_rate": 1.0199203187250998e-06, "loss": 0.5857, "step": 257 }, { "epoch": 0.003084685374047992, "grad_norm": 2.3801612854003906, "learning_rate": 1.0239043824701197e-06, "loss": 0.6114, "step": 258 }, { "epoch": 0.0030966415189086433, "grad_norm": 3.633087635040283, "learning_rate": 1.0278884462151395e-06, "loss": 0.6411, "step": 259 }, { "epoch": 0.003108597663769294, "grad_norm": 2.7306816577911377, "learning_rate": 1.0318725099601594e-06, "loss": 0.521, "step": 260 }, { "epoch": 0.0031205538086299454, "grad_norm": 1.8525307178497314, "learning_rate": 1.0358565737051795e-06, "loss": 0.5879, "step": 261 }, { "epoch": 0.0031325099534905967, "grad_norm": 3.587998390197754, "learning_rate": 1.0398406374501994e-06, "loss": 0.7164, "step": 262 }, { "epoch": 0.0031444660983512476, "grad_norm": 2.36247181892395, "learning_rate": 1.0438247011952193e-06, "loss": 0.6148, "step": 263 }, { "epoch": 0.003156422243211899, "grad_norm": 3.7783656120300293, "learning_rate": 1.0478087649402392e-06, "loss": 0.6819, "step": 264 }, { "epoch": 0.0031683783880725497, "grad_norm": 2.1224772930145264, "learning_rate": 1.051792828685259e-06, "loss": 0.6367, "step": 265 }, { "epoch": 0.003180334532933201, "grad_norm": 4.1481170654296875, "learning_rate": 1.055776892430279e-06, "loss": 0.5649, "step": 266 }, { "epoch": 0.0031922906777938523, "grad_norm": 2.2729272842407227, "learning_rate": 1.0597609561752988e-06, "loss": 0.6196, "step": 267 }, { "epoch": 0.003204246822654503, "grad_norm": 1.8793522119522095, "learning_rate": 1.0637450199203187e-06, "loss": 0.5706, "step": 268 }, { "epoch": 0.0032162029675151544, "grad_norm": 4.486626625061035, "learning_rate": 1.0677290836653388e-06, "loss": 0.5888, "step": 269 }, { "epoch": 0.0032281591123758057, "grad_norm": 3.282264471054077, "learning_rate": 1.0717131474103587e-06, "loss": 0.5845, "step": 270 }, { "epoch": 0.0032401152572364565, "grad_norm": 2.2703280448913574, "learning_rate": 1.0756972111553785e-06, "loss": 0.6132, "step": 271 }, { "epoch": 0.003252071402097108, "grad_norm": 1.9655585289001465, "learning_rate": 1.0796812749003984e-06, "loss": 0.5809, "step": 272 }, { "epoch": 0.003264027546957759, "grad_norm": 2.5767483711242676, "learning_rate": 1.0836653386454183e-06, "loss": 0.6659, "step": 273 }, { "epoch": 0.00327598369181841, "grad_norm": 2.6953415870666504, "learning_rate": 1.0876494023904384e-06, "loss": 0.601, "step": 274 }, { "epoch": 0.0032879398366790612, "grad_norm": 3.4061903953552246, "learning_rate": 1.0916334661354583e-06, "loss": 0.6601, "step": 275 }, { "epoch": 0.0032998959815397125, "grad_norm": 2.6303622722625732, "learning_rate": 1.0956175298804781e-06, "loss": 0.634, "step": 276 }, { "epoch": 0.0033118521264003634, "grad_norm": 6.425652027130127, "learning_rate": 1.099601593625498e-06, "loss": 0.5885, "step": 277 }, { "epoch": 0.0033238082712610147, "grad_norm": 4.080038070678711, "learning_rate": 1.103585657370518e-06, "loss": 0.5463, "step": 278 }, { "epoch": 0.0033357644161216655, "grad_norm": 3.640110969543457, "learning_rate": 1.107569721115538e-06, "loss": 0.5386, "step": 279 }, { "epoch": 0.003347720560982317, "grad_norm": 3.1754281520843506, "learning_rate": 1.1115537848605579e-06, "loss": 0.5783, "step": 280 }, { "epoch": 0.003359676705842968, "grad_norm": 2.1220040321350098, "learning_rate": 1.1155378486055778e-06, "loss": 0.6178, "step": 281 }, { "epoch": 0.003371632850703619, "grad_norm": 6.605536937713623, "learning_rate": 1.1195219123505976e-06, "loss": 0.5868, "step": 282 }, { "epoch": 0.0033835889955642702, "grad_norm": 2.9497900009155273, "learning_rate": 1.1235059760956177e-06, "loss": 0.6095, "step": 283 }, { "epoch": 0.0033955451404249215, "grad_norm": 1.9561787843704224, "learning_rate": 1.1274900398406376e-06, "loss": 0.5487, "step": 284 }, { "epoch": 0.0034075012852855724, "grad_norm": 1.9195724725723267, "learning_rate": 1.1314741035856575e-06, "loss": 0.4386, "step": 285 }, { "epoch": 0.0034194574301462236, "grad_norm": 5.0757012367248535, "learning_rate": 1.1354581673306774e-06, "loss": 0.586, "step": 286 }, { "epoch": 0.003431413575006875, "grad_norm": 2.3229618072509766, "learning_rate": 1.1394422310756973e-06, "loss": 0.5046, "step": 287 }, { "epoch": 0.0034433697198675258, "grad_norm": 2.297020196914673, "learning_rate": 1.1434262948207173e-06, "loss": 0.5608, "step": 288 }, { "epoch": 0.003455325864728177, "grad_norm": 3.1345741748809814, "learning_rate": 1.1474103585657372e-06, "loss": 0.5821, "step": 289 }, { "epoch": 0.0034672820095888283, "grad_norm": 9.63503646850586, "learning_rate": 1.1513944223107571e-06, "loss": 0.5617, "step": 290 }, { "epoch": 0.003479238154449479, "grad_norm": 2.7317798137664795, "learning_rate": 1.155378486055777e-06, "loss": 0.5918, "step": 291 }, { "epoch": 0.0034911942993101305, "grad_norm": 1.6857951879501343, "learning_rate": 1.1593625498007969e-06, "loss": 0.5818, "step": 292 }, { "epoch": 0.0035031504441707818, "grad_norm": 2.5531468391418457, "learning_rate": 1.1633466135458167e-06, "loss": 0.6759, "step": 293 }, { "epoch": 0.0035151065890314326, "grad_norm": 3.223026990890503, "learning_rate": 1.1673306772908368e-06, "loss": 0.5963, "step": 294 }, { "epoch": 0.003527062733892084, "grad_norm": 2.0818865299224854, "learning_rate": 1.1713147410358567e-06, "loss": 0.5337, "step": 295 }, { "epoch": 0.0035390188787527348, "grad_norm": 8.669801712036133, "learning_rate": 1.1752988047808766e-06, "loss": 0.5483, "step": 296 }, { "epoch": 0.003550975023613386, "grad_norm": 3.6024303436279297, "learning_rate": 1.1792828685258965e-06, "loss": 0.5093, "step": 297 }, { "epoch": 0.0035629311684740373, "grad_norm": 2.048882484436035, "learning_rate": 1.1832669322709164e-06, "loss": 0.5921, "step": 298 }, { "epoch": 0.003574887313334688, "grad_norm": 3.1579678058624268, "learning_rate": 1.1872509960159362e-06, "loss": 0.6028, "step": 299 }, { "epoch": 0.0035868434581953395, "grad_norm": 1.8443363904953003, "learning_rate": 1.1912350597609561e-06, "loss": 0.616, "step": 300 }, { "epoch": 0.0035987996030559907, "grad_norm": 2.9855432510375977, "learning_rate": 1.1952191235059762e-06, "loss": 0.5469, "step": 301 }, { "epoch": 0.0036107557479166416, "grad_norm": 2.665088653564453, "learning_rate": 1.199203187250996e-06, "loss": 0.5593, "step": 302 }, { "epoch": 0.003622711892777293, "grad_norm": 1.9710432291030884, "learning_rate": 1.203187250996016e-06, "loss": 0.5601, "step": 303 }, { "epoch": 0.003634668037637944, "grad_norm": 2.838495969772339, "learning_rate": 1.2071713147410359e-06, "loss": 0.5562, "step": 304 }, { "epoch": 0.003646624182498595, "grad_norm": 3.006420612335205, "learning_rate": 1.211155378486056e-06, "loss": 0.5461, "step": 305 }, { "epoch": 0.0036585803273592463, "grad_norm": 2.091352939605713, "learning_rate": 1.2151394422310758e-06, "loss": 0.6821, "step": 306 }, { "epoch": 0.0036705364722198976, "grad_norm": 1.7388992309570312, "learning_rate": 1.2191235059760957e-06, "loss": 0.61, "step": 307 }, { "epoch": 0.0036824926170805484, "grad_norm": 2.556626319885254, "learning_rate": 1.2231075697211156e-06, "loss": 0.5966, "step": 308 }, { "epoch": 0.0036944487619411997, "grad_norm": 2.8204803466796875, "learning_rate": 1.2270916334661355e-06, "loss": 0.5889, "step": 309 }, { "epoch": 0.003706404906801851, "grad_norm": 43.53572463989258, "learning_rate": 1.2310756972111556e-06, "loss": 0.6996, "step": 310 }, { "epoch": 0.003718361051662502, "grad_norm": 3.234405040740967, "learning_rate": 1.2350597609561754e-06, "loss": 0.5555, "step": 311 }, { "epoch": 0.003730317196523153, "grad_norm": 5.439522743225098, "learning_rate": 1.2390438247011953e-06, "loss": 0.5674, "step": 312 }, { "epoch": 0.003742273341383804, "grad_norm": 5.248721599578857, "learning_rate": 1.2430278884462152e-06, "loss": 0.616, "step": 313 }, { "epoch": 0.0037542294862444553, "grad_norm": 3.842771291732788, "learning_rate": 1.2470119521912353e-06, "loss": 0.6807, "step": 314 }, { "epoch": 0.0037661856311051066, "grad_norm": 2.2457001209259033, "learning_rate": 1.250996015936255e-06, "loss": 0.5717, "step": 315 }, { "epoch": 0.0037781417759657574, "grad_norm": 4.346931457519531, "learning_rate": 1.254980079681275e-06, "loss": 0.5801, "step": 316 }, { "epoch": 0.0037900979208264087, "grad_norm": 2.056455135345459, "learning_rate": 1.258964143426295e-06, "loss": 0.573, "step": 317 }, { "epoch": 0.00380205406568706, "grad_norm": 2.583660125732422, "learning_rate": 1.2629482071713148e-06, "loss": 0.6082, "step": 318 }, { "epoch": 0.003814010210547711, "grad_norm": 2.7873573303222656, "learning_rate": 1.2669322709163347e-06, "loss": 0.5947, "step": 319 }, { "epoch": 0.003825966355408362, "grad_norm": 3.622262716293335, "learning_rate": 1.2709163346613546e-06, "loss": 0.7028, "step": 320 }, { "epoch": 0.0038379225002690134, "grad_norm": 8.15798282623291, "learning_rate": 1.2749003984063747e-06, "loss": 0.7397, "step": 321 }, { "epoch": 0.0038498786451296643, "grad_norm": 2.2317450046539307, "learning_rate": 1.2788844621513946e-06, "loss": 0.6263, "step": 322 }, { "epoch": 0.0038618347899903155, "grad_norm": 5.6608405113220215, "learning_rate": 1.2828685258964144e-06, "loss": 0.6429, "step": 323 }, { "epoch": 0.003873790934850967, "grad_norm": 2.805375337600708, "learning_rate": 1.2868525896414343e-06, "loss": 0.584, "step": 324 }, { "epoch": 0.0038857470797116177, "grad_norm": 2.5959551334381104, "learning_rate": 1.2908366533864544e-06, "loss": 0.636, "step": 325 }, { "epoch": 0.003897703224572269, "grad_norm": 3.2395691871643066, "learning_rate": 1.294820717131474e-06, "loss": 0.6014, "step": 326 }, { "epoch": 0.00390965936943292, "grad_norm": 2.4321680068969727, "learning_rate": 1.2988047808764942e-06, "loss": 0.7173, "step": 327 }, { "epoch": 0.003921615514293571, "grad_norm": 2.6548385620117188, "learning_rate": 1.302788844621514e-06, "loss": 0.4831, "step": 328 }, { "epoch": 0.003933571659154222, "grad_norm": 3.5677108764648438, "learning_rate": 1.306772908366534e-06, "loss": 0.6584, "step": 329 }, { "epoch": 0.003945527804014874, "grad_norm": 3.642611026763916, "learning_rate": 1.310756972111554e-06, "loss": 0.5617, "step": 330 }, { "epoch": 0.0039574839488755245, "grad_norm": 2.510988235473633, "learning_rate": 1.3147410358565737e-06, "loss": 0.6135, "step": 331 }, { "epoch": 0.003969440093736175, "grad_norm": 2.1080636978149414, "learning_rate": 1.3187250996015938e-06, "loss": 0.6742, "step": 332 }, { "epoch": 0.003981396238596827, "grad_norm": 17.12091064453125, "learning_rate": 1.3227091633466135e-06, "loss": 0.5629, "step": 333 }, { "epoch": 0.003993352383457478, "grad_norm": 5.620301246643066, "learning_rate": 1.3266932270916335e-06, "loss": 0.6268, "step": 334 }, { "epoch": 0.004005308528318129, "grad_norm": 2.3697006702423096, "learning_rate": 1.3306772908366536e-06, "loss": 0.6326, "step": 335 }, { "epoch": 0.0040172646731787805, "grad_norm": 2.335782051086426, "learning_rate": 1.3346613545816733e-06, "loss": 0.6837, "step": 336 }, { "epoch": 0.004029220818039431, "grad_norm": 2.3949687480926514, "learning_rate": 1.3386454183266934e-06, "loss": 0.5974, "step": 337 }, { "epoch": 0.004041176962900082, "grad_norm": 3.6677498817443848, "learning_rate": 1.342629482071713e-06, "loss": 0.652, "step": 338 }, { "epoch": 0.004053133107760734, "grad_norm": 2.630796194076538, "learning_rate": 1.3466135458167332e-06, "loss": 0.6396, "step": 339 }, { "epoch": 0.004065089252621385, "grad_norm": 3.347020387649536, "learning_rate": 1.3505976095617532e-06, "loss": 0.5615, "step": 340 }, { "epoch": 0.004077045397482036, "grad_norm": 5.3495001792907715, "learning_rate": 1.354581673306773e-06, "loss": 0.5433, "step": 341 }, { "epoch": 0.004089001542342687, "grad_norm": 2.0846033096313477, "learning_rate": 1.358565737051793e-06, "loss": 0.6002, "step": 342 }, { "epoch": 0.004100957687203338, "grad_norm": 4.729696273803711, "learning_rate": 1.3625498007968129e-06, "loss": 0.6144, "step": 343 }, { "epoch": 0.004112913832063989, "grad_norm": 2.3299360275268555, "learning_rate": 1.3665338645418328e-06, "loss": 0.6448, "step": 344 }, { "epoch": 0.004124869976924641, "grad_norm": 3.191418170928955, "learning_rate": 1.3705179282868529e-06, "loss": 0.6316, "step": 345 }, { "epoch": 0.004136826121785292, "grad_norm": 3.2563514709472656, "learning_rate": 1.3745019920318725e-06, "loss": 0.6994, "step": 346 }, { "epoch": 0.0041487822666459425, "grad_norm": 3.3923192024230957, "learning_rate": 1.3784860557768926e-06, "loss": 0.6424, "step": 347 }, { "epoch": 0.004160738411506594, "grad_norm": 3.574226140975952, "learning_rate": 1.3824701195219125e-06, "loss": 0.7236, "step": 348 }, { "epoch": 0.004172694556367245, "grad_norm": 2.0508503913879395, "learning_rate": 1.3864541832669324e-06, "loss": 0.6052, "step": 349 }, { "epoch": 0.004184650701227896, "grad_norm": 2.6652092933654785, "learning_rate": 1.3904382470119523e-06, "loss": 0.6978, "step": 350 }, { "epoch": 0.004196606846088548, "grad_norm": 2.9180409908294678, "learning_rate": 1.3944223107569721e-06, "loss": 0.4954, "step": 351 }, { "epoch": 0.0042085629909491985, "grad_norm": 2.315725564956665, "learning_rate": 1.398406374501992e-06, "loss": 0.6413, "step": 352 }, { "epoch": 0.004220519135809849, "grad_norm": 2.364490032196045, "learning_rate": 1.4023904382470121e-06, "loss": 0.6229, "step": 353 }, { "epoch": 0.004232475280670501, "grad_norm": 12.342510223388672, "learning_rate": 1.406374501992032e-06, "loss": 0.6292, "step": 354 }, { "epoch": 0.004244431425531152, "grad_norm": 8.868651390075684, "learning_rate": 1.4103585657370519e-06, "loss": 0.6463, "step": 355 }, { "epoch": 0.004256387570391803, "grad_norm": 3.226238489151001, "learning_rate": 1.414342629482072e-06, "loss": 0.6449, "step": 356 }, { "epoch": 0.004268343715252454, "grad_norm": 12.168782234191895, "learning_rate": 1.4183266932270916e-06, "loss": 0.6743, "step": 357 }, { "epoch": 0.004280299860113105, "grad_norm": 2.2613635063171387, "learning_rate": 1.4223107569721117e-06, "loss": 0.5573, "step": 358 }, { "epoch": 0.004292256004973756, "grad_norm": 8.033830642700195, "learning_rate": 1.4262948207171314e-06, "loss": 0.7412, "step": 359 }, { "epoch": 0.004304212149834407, "grad_norm": 11.485153198242188, "learning_rate": 1.4302788844621515e-06, "loss": 0.4981, "step": 360 }, { "epoch": 0.004316168294695059, "grad_norm": 6.428365230560303, "learning_rate": 1.4342629482071716e-06, "loss": 0.6507, "step": 361 }, { "epoch": 0.00432812443955571, "grad_norm": 2.6946098804473877, "learning_rate": 1.4382470119521913e-06, "loss": 0.5491, "step": 362 }, { "epoch": 0.00434008058441636, "grad_norm": 3.839597225189209, "learning_rate": 1.4422310756972113e-06, "loss": 0.4725, "step": 363 }, { "epoch": 0.004352036729277012, "grad_norm": 6.518536567687988, "learning_rate": 1.446215139442231e-06, "loss": 0.6416, "step": 364 }, { "epoch": 0.004363992874137663, "grad_norm": 3.195732593536377, "learning_rate": 1.4501992031872511e-06, "loss": 0.6437, "step": 365 }, { "epoch": 0.004375949018998314, "grad_norm": 2.3940629959106445, "learning_rate": 1.4541832669322712e-06, "loss": 0.6079, "step": 366 }, { "epoch": 0.0043879051638589656, "grad_norm": 3.5171213150024414, "learning_rate": 1.4581673306772909e-06, "loss": 0.5898, "step": 367 }, { "epoch": 0.004399861308719616, "grad_norm": 3.437568187713623, "learning_rate": 1.462151394422311e-06, "loss": 0.6991, "step": 368 }, { "epoch": 0.004411817453580267, "grad_norm": 7.509113311767578, "learning_rate": 1.4661354581673306e-06, "loss": 0.6477, "step": 369 }, { "epoch": 0.004423773598440919, "grad_norm": 35.664085388183594, "learning_rate": 1.4701195219123507e-06, "loss": 0.5134, "step": 370 }, { "epoch": 0.00443572974330157, "grad_norm": 2.107360601425171, "learning_rate": 1.4741035856573708e-06, "loss": 0.584, "step": 371 }, { "epoch": 0.004447685888162221, "grad_norm": 3.781277656555176, "learning_rate": 1.4780876494023905e-06, "loss": 0.6537, "step": 372 }, { "epoch": 0.004459642033022872, "grad_norm": 3.5189874172210693, "learning_rate": 1.4820717131474106e-06, "loss": 0.6214, "step": 373 }, { "epoch": 0.004471598177883523, "grad_norm": 1.8707499504089355, "learning_rate": 1.4860557768924305e-06, "loss": 0.581, "step": 374 }, { "epoch": 0.004483554322744174, "grad_norm": 3.5987260341644287, "learning_rate": 1.4900398406374503e-06, "loss": 0.5972, "step": 375 }, { "epoch": 0.004495510467604826, "grad_norm": 2.8149988651275635, "learning_rate": 1.4940239043824702e-06, "loss": 0.5806, "step": 376 }, { "epoch": 0.004507466612465477, "grad_norm": 4.638751029968262, "learning_rate": 1.49800796812749e-06, "loss": 0.5905, "step": 377 }, { "epoch": 0.0045194227573261275, "grad_norm": 3.740558385848999, "learning_rate": 1.50199203187251e-06, "loss": 0.5663, "step": 378 }, { "epoch": 0.004531378902186779, "grad_norm": 4.882675647735596, "learning_rate": 1.50597609561753e-06, "loss": 0.5919, "step": 379 }, { "epoch": 0.00454333504704743, "grad_norm": 2.0253195762634277, "learning_rate": 1.50996015936255e-06, "loss": 0.6295, "step": 380 }, { "epoch": 0.004555291191908081, "grad_norm": 2.689474105834961, "learning_rate": 1.5139442231075698e-06, "loss": 0.7, "step": 381 }, { "epoch": 0.004567247336768733, "grad_norm": 2.135258197784424, "learning_rate": 1.5179282868525897e-06, "loss": 0.5101, "step": 382 }, { "epoch": 0.0045792034816293835, "grad_norm": 2.0518224239349365, "learning_rate": 1.5219123505976096e-06, "loss": 0.7083, "step": 383 }, { "epoch": 0.004591159626490034, "grad_norm": 2.8723151683807373, "learning_rate": 1.5258964143426297e-06, "loss": 0.5321, "step": 384 }, { "epoch": 0.004603115771350686, "grad_norm": 5.772395610809326, "learning_rate": 1.5298804780876494e-06, "loss": 0.6409, "step": 385 }, { "epoch": 0.004615071916211337, "grad_norm": 4.158657073974609, "learning_rate": 1.5338645418326694e-06, "loss": 0.707, "step": 386 }, { "epoch": 0.004627028061071988, "grad_norm": 1.8599377870559692, "learning_rate": 1.5378486055776895e-06, "loss": 0.6873, "step": 387 }, { "epoch": 0.0046389842059326395, "grad_norm": 2.3312408924102783, "learning_rate": 1.5418326693227092e-06, "loss": 0.5555, "step": 388 }, { "epoch": 0.00465094035079329, "grad_norm": 3.3119912147521973, "learning_rate": 1.5458167330677293e-06, "loss": 0.6157, "step": 389 }, { "epoch": 0.004662896495653941, "grad_norm": 9.601078033447266, "learning_rate": 1.549800796812749e-06, "loss": 0.5456, "step": 390 }, { "epoch": 0.004674852640514592, "grad_norm": 3.4488584995269775, "learning_rate": 1.553784860557769e-06, "loss": 0.6793, "step": 391 }, { "epoch": 0.004686808785375244, "grad_norm": 3.438826084136963, "learning_rate": 1.5577689243027892e-06, "loss": 0.6219, "step": 392 }, { "epoch": 0.004698764930235895, "grad_norm": 5.0683183670043945, "learning_rate": 1.5617529880478088e-06, "loss": 0.6295, "step": 393 }, { "epoch": 0.0047107210750965455, "grad_norm": 2.9052722454071045, "learning_rate": 1.565737051792829e-06, "loss": 0.6428, "step": 394 }, { "epoch": 0.004722677219957197, "grad_norm": 3.745213031768799, "learning_rate": 1.5697211155378486e-06, "loss": 0.5817, "step": 395 }, { "epoch": 0.004734633364817848, "grad_norm": 2.743943929672241, "learning_rate": 1.5737051792828687e-06, "loss": 0.6116, "step": 396 }, { "epoch": 0.004746589509678499, "grad_norm": 3.9678049087524414, "learning_rate": 1.5776892430278888e-06, "loss": 0.5759, "step": 397 }, { "epoch": 0.004758545654539151, "grad_norm": 3.508559465408325, "learning_rate": 1.5816733067729084e-06, "loss": 0.6921, "step": 398 }, { "epoch": 0.0047705017993998015, "grad_norm": 2.983611822128296, "learning_rate": 1.5856573705179285e-06, "loss": 0.6176, "step": 399 }, { "epoch": 0.004782457944260452, "grad_norm": 8.668009757995605, "learning_rate": 1.5896414342629482e-06, "loss": 0.7149, "step": 400 }, { "epoch": 0.004794414089121104, "grad_norm": 8.530879020690918, "learning_rate": 1.5936254980079683e-06, "loss": 0.6488, "step": 401 }, { "epoch": 0.004806370233981755, "grad_norm": 4.186720848083496, "learning_rate": 1.5976095617529882e-06, "loss": 0.5886, "step": 402 }, { "epoch": 0.004818326378842406, "grad_norm": 2.7108569145202637, "learning_rate": 1.601593625498008e-06, "loss": 0.6255, "step": 403 }, { "epoch": 0.0048302825237030575, "grad_norm": 2.6656413078308105, "learning_rate": 1.6055776892430281e-06, "loss": 0.6347, "step": 404 }, { "epoch": 0.004842238668563708, "grad_norm": 4.754838943481445, "learning_rate": 1.609561752988048e-06, "loss": 0.761, "step": 405 }, { "epoch": 0.004854194813424359, "grad_norm": 2.2202610969543457, "learning_rate": 1.613545816733068e-06, "loss": 0.5824, "step": 406 }, { "epoch": 0.004866150958285011, "grad_norm": 2.3735404014587402, "learning_rate": 1.6175298804780878e-06, "loss": 0.7521, "step": 407 }, { "epoch": 0.004878107103145662, "grad_norm": 2.9046406745910645, "learning_rate": 1.6215139442231077e-06, "loss": 0.5868, "step": 408 }, { "epoch": 0.004890063248006313, "grad_norm": 2.2074451446533203, "learning_rate": 1.6254980079681275e-06, "loss": 0.6168, "step": 409 }, { "epoch": 0.004902019392866964, "grad_norm": 2.2736849784851074, "learning_rate": 1.6294820717131476e-06, "loss": 0.6084, "step": 410 }, { "epoch": 0.004913975537727615, "grad_norm": 3.231593132019043, "learning_rate": 1.6334661354581673e-06, "loss": 0.5411, "step": 411 }, { "epoch": 0.004925931682588266, "grad_norm": 18.04610252380371, "learning_rate": 1.6374501992031874e-06, "loss": 0.6551, "step": 412 }, { "epoch": 0.004937887827448918, "grad_norm": 6.906659126281738, "learning_rate": 1.6414342629482073e-06, "loss": 0.6828, "step": 413 }, { "epoch": 0.004949843972309569, "grad_norm": 1.7476248741149902, "learning_rate": 1.6454183266932272e-06, "loss": 0.5359, "step": 414 }, { "epoch": 0.004961800117170219, "grad_norm": 2.060307025909424, "learning_rate": 1.6494023904382473e-06, "loss": 0.5604, "step": 415 }, { "epoch": 0.004973756262030871, "grad_norm": 15.75240707397461, "learning_rate": 1.653386454183267e-06, "loss": 0.6835, "step": 416 }, { "epoch": 0.004985712406891522, "grad_norm": 2.9443187713623047, "learning_rate": 1.657370517928287e-06, "loss": 0.568, "step": 417 }, { "epoch": 0.004997668551752173, "grad_norm": 13.689769744873047, "learning_rate": 1.661354581673307e-06, "loss": 0.6069, "step": 418 }, { "epoch": 0.0050096246966128246, "grad_norm": 1.999322772026062, "learning_rate": 1.6653386454183268e-06, "loss": 0.5981, "step": 419 }, { "epoch": 0.005021580841473475, "grad_norm": 14.104362487792969, "learning_rate": 1.6693227091633469e-06, "loss": 0.5906, "step": 420 }, { "epoch": 0.005033536986334126, "grad_norm": 9.55416202545166, "learning_rate": 1.6733067729083665e-06, "loss": 0.5935, "step": 421 }, { "epoch": 0.005045493131194778, "grad_norm": 2.316645860671997, "learning_rate": 1.6772908366533866e-06, "loss": 0.6427, "step": 422 }, { "epoch": 0.005057449276055429, "grad_norm": 2.656871795654297, "learning_rate": 1.6812749003984067e-06, "loss": 0.558, "step": 423 }, { "epoch": 0.00506940542091608, "grad_norm": 2.906733274459839, "learning_rate": 1.6852589641434264e-06, "loss": 0.6164, "step": 424 }, { "epoch": 0.0050813615657767305, "grad_norm": 2.7714757919311523, "learning_rate": 1.6892430278884465e-06, "loss": 0.6311, "step": 425 }, { "epoch": 0.005093317710637382, "grad_norm": 10.447349548339844, "learning_rate": 1.6932270916334661e-06, "loss": 0.6735, "step": 426 }, { "epoch": 0.005105273855498033, "grad_norm": 4.201689720153809, "learning_rate": 1.6972111553784862e-06, "loss": 0.5109, "step": 427 }, { "epoch": 0.005117230000358684, "grad_norm": 2.6372032165527344, "learning_rate": 1.7011952191235061e-06, "loss": 0.6589, "step": 428 }, { "epoch": 0.005129186145219336, "grad_norm": 5.500724792480469, "learning_rate": 1.705179282868526e-06, "loss": 0.637, "step": 429 }, { "epoch": 0.0051411422900799865, "grad_norm": 29.911806106567383, "learning_rate": 1.709163346613546e-06, "loss": 0.6286, "step": 430 }, { "epoch": 0.005153098434940637, "grad_norm": 2.381371259689331, "learning_rate": 1.7131474103585658e-06, "loss": 0.6174, "step": 431 }, { "epoch": 0.005165054579801289, "grad_norm": 2.347219705581665, "learning_rate": 1.7171314741035859e-06, "loss": 0.7016, "step": 432 }, { "epoch": 0.00517701072466194, "grad_norm": 1.9343236684799194, "learning_rate": 1.7211155378486057e-06, "loss": 0.5212, "step": 433 }, { "epoch": 0.005188966869522591, "grad_norm": 3.0508241653442383, "learning_rate": 1.7250996015936256e-06, "loss": 0.5296, "step": 434 }, { "epoch": 0.0052009230143832425, "grad_norm": 2.814840316772461, "learning_rate": 1.7290836653386455e-06, "loss": 0.5472, "step": 435 }, { "epoch": 0.005212879159243893, "grad_norm": 4.275877952575684, "learning_rate": 1.7330677290836656e-06, "loss": 0.5903, "step": 436 }, { "epoch": 0.005224835304104544, "grad_norm": 2.200486421585083, "learning_rate": 1.7370517928286853e-06, "loss": 0.5852, "step": 437 }, { "epoch": 0.005236791448965196, "grad_norm": 2.908604383468628, "learning_rate": 1.7410358565737053e-06, "loss": 0.5392, "step": 438 }, { "epoch": 0.005248747593825847, "grad_norm": 3.824888229370117, "learning_rate": 1.7450199203187252e-06, "loss": 0.611, "step": 439 }, { "epoch": 0.005260703738686498, "grad_norm": 2.9803404808044434, "learning_rate": 1.7490039840637451e-06, "loss": 0.5851, "step": 440 }, { "epoch": 0.005272659883547149, "grad_norm": 1.9701405763626099, "learning_rate": 1.7529880478087652e-06, "loss": 0.5883, "step": 441 }, { "epoch": 0.0052846160284078, "grad_norm": 2.8704569339752197, "learning_rate": 1.7569721115537849e-06, "loss": 0.5397, "step": 442 }, { "epoch": 0.005296572173268451, "grad_norm": 3.0760538578033447, "learning_rate": 1.760956175298805e-06, "loss": 0.572, "step": 443 }, { "epoch": 0.005308528318129103, "grad_norm": 6.2929487228393555, "learning_rate": 1.7649402390438246e-06, "loss": 0.5768, "step": 444 }, { "epoch": 0.005320484462989754, "grad_norm": 2.8788695335388184, "learning_rate": 1.7689243027888447e-06, "loss": 0.5163, "step": 445 }, { "epoch": 0.0053324406078504045, "grad_norm": 2.5227088928222656, "learning_rate": 1.7729083665338648e-06, "loss": 0.5685, "step": 446 }, { "epoch": 0.005344396752711056, "grad_norm": 6.585536479949951, "learning_rate": 1.7768924302788845e-06, "loss": 0.6147, "step": 447 }, { "epoch": 0.005356352897571707, "grad_norm": 2.3506505489349365, "learning_rate": 1.7808764940239046e-06, "loss": 0.6086, "step": 448 }, { "epoch": 0.005368309042432358, "grad_norm": 3.9060568809509277, "learning_rate": 1.7848605577689247e-06, "loss": 0.6468, "step": 449 }, { "epoch": 0.00538026518729301, "grad_norm": 3.9018521308898926, "learning_rate": 1.7888446215139443e-06, "loss": 0.5899, "step": 450 }, { "epoch": 0.0053922213321536605, "grad_norm": 3.615940809249878, "learning_rate": 1.7928286852589644e-06, "loss": 0.5493, "step": 451 }, { "epoch": 0.005404177477014311, "grad_norm": 3.1107864379882812, "learning_rate": 1.796812749003984e-06, "loss": 0.5999, "step": 452 }, { "epoch": 0.005416133621874963, "grad_norm": 2.948288679122925, "learning_rate": 1.8007968127490042e-06, "loss": 0.5827, "step": 453 }, { "epoch": 0.005428089766735614, "grad_norm": 7.671849727630615, "learning_rate": 1.804780876494024e-06, "loss": 0.6099, "step": 454 }, { "epoch": 0.005440045911596265, "grad_norm": 3.85093355178833, "learning_rate": 1.808764940239044e-06, "loss": 0.6379, "step": 455 }, { "epoch": 0.0054520020564569165, "grad_norm": 4.867312431335449, "learning_rate": 1.812749003984064e-06, "loss": 0.6027, "step": 456 }, { "epoch": 0.005463958201317567, "grad_norm": 2.204057216644287, "learning_rate": 1.8167330677290837e-06, "loss": 0.5876, "step": 457 }, { "epoch": 0.005475914346178218, "grad_norm": 3.650845766067505, "learning_rate": 1.8207171314741038e-06, "loss": 0.6836, "step": 458 }, { "epoch": 0.005487870491038869, "grad_norm": 5.116706848144531, "learning_rate": 1.8247011952191237e-06, "loss": 0.5292, "step": 459 }, { "epoch": 0.005499826635899521, "grad_norm": 2.216684579849243, "learning_rate": 1.8286852589641436e-06, "loss": 0.6103, "step": 460 }, { "epoch": 0.005511782780760172, "grad_norm": 4.124785423278809, "learning_rate": 1.8326693227091634e-06, "loss": 0.5283, "step": 461 }, { "epoch": 0.0055237389256208224, "grad_norm": 2.13277268409729, "learning_rate": 1.8366533864541833e-06, "loss": 0.6175, "step": 462 }, { "epoch": 0.005535695070481474, "grad_norm": 3.1168925762176514, "learning_rate": 1.8406374501992032e-06, "loss": 0.6677, "step": 463 }, { "epoch": 0.005547651215342125, "grad_norm": 2.801405429840088, "learning_rate": 1.8446215139442233e-06, "loss": 0.6528, "step": 464 }, { "epoch": 0.005559607360202776, "grad_norm": 2.3162481784820557, "learning_rate": 1.8486055776892432e-06, "loss": 0.6081, "step": 465 }, { "epoch": 0.005571563505063428, "grad_norm": 20.240800857543945, "learning_rate": 1.852589641434263e-06, "loss": 0.6867, "step": 466 }, { "epoch": 0.005583519649924078, "grad_norm": 2.28456449508667, "learning_rate": 1.8565737051792832e-06, "loss": 0.5601, "step": 467 }, { "epoch": 0.005595475794784729, "grad_norm": 2.5733277797698975, "learning_rate": 1.8605577689243028e-06, "loss": 0.5726, "step": 468 }, { "epoch": 0.005607431939645381, "grad_norm": 2.225593328475952, "learning_rate": 1.864541832669323e-06, "loss": 0.6017, "step": 469 }, { "epoch": 0.005619388084506032, "grad_norm": 3.040172815322876, "learning_rate": 1.8685258964143426e-06, "loss": 0.6611, "step": 470 }, { "epoch": 0.005631344229366683, "grad_norm": 7.695122718811035, "learning_rate": 1.8725099601593627e-06, "loss": 0.6654, "step": 471 }, { "epoch": 0.005643300374227334, "grad_norm": 5.5286478996276855, "learning_rate": 1.8764940239043828e-06, "loss": 0.6416, "step": 472 }, { "epoch": 0.005655256519087985, "grad_norm": 23.940027236938477, "learning_rate": 1.8804780876494024e-06, "loss": 0.6488, "step": 473 }, { "epoch": 0.005667212663948636, "grad_norm": 2.868393898010254, "learning_rate": 1.8844621513944225e-06, "loss": 0.6361, "step": 474 }, { "epoch": 0.005679168808809288, "grad_norm": 3.514009714126587, "learning_rate": 1.8884462151394422e-06, "loss": 0.6924, "step": 475 }, { "epoch": 0.005691124953669939, "grad_norm": 3.0047526359558105, "learning_rate": 1.8924302788844623e-06, "loss": 0.5693, "step": 476 }, { "epoch": 0.0057030810985305895, "grad_norm": 1.9315518140792847, "learning_rate": 1.8964143426294824e-06, "loss": 0.5919, "step": 477 }, { "epoch": 0.005715037243391241, "grad_norm": 2.3383705615997314, "learning_rate": 1.900398406374502e-06, "loss": 0.5463, "step": 478 }, { "epoch": 0.005726993388251892, "grad_norm": 2.710980176925659, "learning_rate": 1.9043824701195221e-06, "loss": 0.62, "step": 479 }, { "epoch": 0.005738949533112543, "grad_norm": 2.143400192260742, "learning_rate": 1.9083665338645422e-06, "loss": 0.5932, "step": 480 }, { "epoch": 0.005750905677973195, "grad_norm": 2.961517572402954, "learning_rate": 1.9123505976095617e-06, "loss": 0.5829, "step": 481 }, { "epoch": 0.0057628618228338455, "grad_norm": 2.7419238090515137, "learning_rate": 1.916334661354582e-06, "loss": 0.5774, "step": 482 }, { "epoch": 0.005774817967694496, "grad_norm": 1.7525485754013062, "learning_rate": 1.9203187250996015e-06, "loss": 0.6153, "step": 483 }, { "epoch": 0.005786774112555148, "grad_norm": 2.339937448501587, "learning_rate": 1.9243027888446218e-06, "loss": 0.6094, "step": 484 }, { "epoch": 0.005798730257415799, "grad_norm": 3.6746363639831543, "learning_rate": 1.9282868525896416e-06, "loss": 0.5717, "step": 485 }, { "epoch": 0.00581068640227645, "grad_norm": 17.691585540771484, "learning_rate": 1.9322709163346615e-06, "loss": 0.6807, "step": 486 }, { "epoch": 0.0058226425471371015, "grad_norm": 4.649540901184082, "learning_rate": 1.9362549800796814e-06, "loss": 0.6757, "step": 487 }, { "epoch": 0.005834598691997752, "grad_norm": 3.776681900024414, "learning_rate": 1.9402390438247013e-06, "loss": 0.6467, "step": 488 }, { "epoch": 0.005846554836858403, "grad_norm": 4.002926826477051, "learning_rate": 1.944223107569721e-06, "loss": 0.5781, "step": 489 }, { "epoch": 0.005858510981719055, "grad_norm": 3.1153922080993652, "learning_rate": 1.9482071713147415e-06, "loss": 0.5808, "step": 490 }, { "epoch": 0.005870467126579706, "grad_norm": 2.800882577896118, "learning_rate": 1.952191235059761e-06, "loss": 0.6988, "step": 491 }, { "epoch": 0.005882423271440357, "grad_norm": 4.683854579925537, "learning_rate": 1.9561752988047812e-06, "loss": 0.6266, "step": 492 }, { "epoch": 0.0058943794163010075, "grad_norm": 3.182827949523926, "learning_rate": 1.9601593625498007e-06, "loss": 0.5473, "step": 493 }, { "epoch": 0.005906335561161659, "grad_norm": 3.339991569519043, "learning_rate": 1.964143426294821e-06, "loss": 0.7056, "step": 494 }, { "epoch": 0.00591829170602231, "grad_norm": 3.00852632522583, "learning_rate": 1.968127490039841e-06, "loss": 0.6529, "step": 495 }, { "epoch": 0.005930247850882961, "grad_norm": 3.839061737060547, "learning_rate": 1.9721115537848607e-06, "loss": 0.5351, "step": 496 }, { "epoch": 0.005942203995743613, "grad_norm": 2.603564739227295, "learning_rate": 1.9760956175298806e-06, "loss": 0.6061, "step": 497 }, { "epoch": 0.0059541601406042635, "grad_norm": 16.8055419921875, "learning_rate": 1.9800796812749005e-06, "loss": 0.5406, "step": 498 }, { "epoch": 0.005966116285464914, "grad_norm": 4.038771629333496, "learning_rate": 1.9840637450199204e-06, "loss": 0.5677, "step": 499 }, { "epoch": 0.005978072430325566, "grad_norm": 1.7186826467514038, "learning_rate": 1.9880478087649403e-06, "loss": 0.6775, "step": 500 }, { "epoch": 0.005990028575186217, "grad_norm": 2.82956862449646, "learning_rate": 1.99203187250996e-06, "loss": 0.6717, "step": 501 }, { "epoch": 0.006001984720046868, "grad_norm": 4.337560176849365, "learning_rate": 1.9960159362549805e-06, "loss": 0.7357, "step": 502 }, { "epoch": 0.0060139408649075195, "grad_norm": 2.9506635665893555, "learning_rate": 2.0000000000000003e-06, "loss": 0.594, "step": 503 }, { "epoch": 0.00602589700976817, "grad_norm": 4.692884922027588, "learning_rate": 2.0039840637450202e-06, "loss": 0.6408, "step": 504 }, { "epoch": 0.006037853154628821, "grad_norm": 4.957040309906006, "learning_rate": 2.00796812749004e-06, "loss": 0.6745, "step": 505 }, { "epoch": 0.006049809299489473, "grad_norm": 3.080925703048706, "learning_rate": 2.01195219123506e-06, "loss": 0.6319, "step": 506 }, { "epoch": 0.006061765444350124, "grad_norm": 2.463798999786377, "learning_rate": 2.01593625498008e-06, "loss": 0.6241, "step": 507 }, { "epoch": 0.006073721589210775, "grad_norm": 3.3488757610321045, "learning_rate": 2.0199203187250997e-06, "loss": 0.5656, "step": 508 }, { "epoch": 0.006085677734071426, "grad_norm": 2.65907883644104, "learning_rate": 2.0239043824701196e-06, "loss": 0.6333, "step": 509 }, { "epoch": 0.006097633878932077, "grad_norm": 10.242619514465332, "learning_rate": 2.0278884462151395e-06, "loss": 0.7725, "step": 510 }, { "epoch": 0.006109590023792728, "grad_norm": 1.4176795482635498, "learning_rate": 2.03187250996016e-06, "loss": 0.5109, "step": 511 }, { "epoch": 0.00612154616865338, "grad_norm": 2.956590175628662, "learning_rate": 2.0358565737051793e-06, "loss": 0.585, "step": 512 }, { "epoch": 0.006133502313514031, "grad_norm": 3.736984968185425, "learning_rate": 2.0398406374501996e-06, "loss": 0.6011, "step": 513 }, { "epoch": 0.0061454584583746814, "grad_norm": 2.462505578994751, "learning_rate": 2.043824701195219e-06, "loss": 0.6276, "step": 514 }, { "epoch": 0.006157414603235333, "grad_norm": 2.5571954250335693, "learning_rate": 2.0478087649402393e-06, "loss": 0.5876, "step": 515 }, { "epoch": 0.006169370748095984, "grad_norm": 3.4475653171539307, "learning_rate": 2.051792828685259e-06, "loss": 0.5392, "step": 516 }, { "epoch": 0.006181326892956635, "grad_norm": 3.0776193141937256, "learning_rate": 2.055776892430279e-06, "loss": 0.6265, "step": 517 }, { "epoch": 0.006193283037817287, "grad_norm": 12.323545455932617, "learning_rate": 2.059760956175299e-06, "loss": 0.6188, "step": 518 }, { "epoch": 0.006205239182677937, "grad_norm": 2.2354817390441895, "learning_rate": 2.063745019920319e-06, "loss": 0.6143, "step": 519 }, { "epoch": 0.006217195327538588, "grad_norm": 6.51090669631958, "learning_rate": 2.0677290836653387e-06, "loss": 0.5476, "step": 520 }, { "epoch": 0.00622915147239924, "grad_norm": 4.245558261871338, "learning_rate": 2.071713147410359e-06, "loss": 0.5836, "step": 521 }, { "epoch": 0.006241107617259891, "grad_norm": 3.209362268447876, "learning_rate": 2.0756972111553785e-06, "loss": 0.6243, "step": 522 }, { "epoch": 0.006253063762120542, "grad_norm": 2.6769955158233643, "learning_rate": 2.079681274900399e-06, "loss": 0.6262, "step": 523 }, { "epoch": 0.006265019906981193, "grad_norm": 2.435323715209961, "learning_rate": 2.0836653386454182e-06, "loss": 0.638, "step": 524 }, { "epoch": 0.006276976051841844, "grad_norm": 3.504957675933838, "learning_rate": 2.0876494023904386e-06, "loss": 0.5735, "step": 525 }, { "epoch": 0.006288932196702495, "grad_norm": 3.0456044673919678, "learning_rate": 2.0916334661354584e-06, "loss": 0.746, "step": 526 }, { "epoch": 0.006300888341563146, "grad_norm": 3.3731539249420166, "learning_rate": 2.0956175298804783e-06, "loss": 0.64, "step": 527 }, { "epoch": 0.006312844486423798, "grad_norm": 3.406684398651123, "learning_rate": 2.099601593625498e-06, "loss": 0.6069, "step": 528 }, { "epoch": 0.0063248006312844485, "grad_norm": 2.297210216522217, "learning_rate": 2.103585657370518e-06, "loss": 0.6159, "step": 529 }, { "epoch": 0.006336756776145099, "grad_norm": 3.2944090366363525, "learning_rate": 2.107569721115538e-06, "loss": 0.5688, "step": 530 }, { "epoch": 0.006348712921005751, "grad_norm": 3.1769607067108154, "learning_rate": 2.111553784860558e-06, "loss": 0.6946, "step": 531 }, { "epoch": 0.006360669065866402, "grad_norm": 2.349457263946533, "learning_rate": 2.1155378486055777e-06, "loss": 0.6436, "step": 532 }, { "epoch": 0.006372625210727053, "grad_norm": 7.7066650390625, "learning_rate": 2.1195219123505976e-06, "loss": 0.5362, "step": 533 }, { "epoch": 0.0063845813555877045, "grad_norm": 2.8872241973876953, "learning_rate": 2.123505976095618e-06, "loss": 0.6351, "step": 534 }, { "epoch": 0.006396537500448355, "grad_norm": 2.2838363647460938, "learning_rate": 2.1274900398406374e-06, "loss": 0.6586, "step": 535 }, { "epoch": 0.006408493645309006, "grad_norm": 3.4239983558654785, "learning_rate": 2.1314741035856577e-06, "loss": 0.5482, "step": 536 }, { "epoch": 0.006420449790169658, "grad_norm": 2.2501633167266846, "learning_rate": 2.1354581673306775e-06, "loss": 0.602, "step": 537 }, { "epoch": 0.006432405935030309, "grad_norm": 6.2422027587890625, "learning_rate": 2.1394422310756974e-06, "loss": 0.6699, "step": 538 }, { "epoch": 0.00644436207989096, "grad_norm": 6.866302490234375, "learning_rate": 2.1434262948207173e-06, "loss": 0.6083, "step": 539 }, { "epoch": 0.006456318224751611, "grad_norm": 3.7869715690612793, "learning_rate": 2.147410358565737e-06, "loss": 0.6312, "step": 540 }, { "epoch": 0.006468274369612262, "grad_norm": 4.101195335388184, "learning_rate": 2.151394422310757e-06, "loss": 0.6686, "step": 541 }, { "epoch": 0.006480230514472913, "grad_norm": 1.872239589691162, "learning_rate": 2.155378486055777e-06, "loss": 0.5401, "step": 542 }, { "epoch": 0.006492186659333565, "grad_norm": 3.9436657428741455, "learning_rate": 2.159362549800797e-06, "loss": 0.5957, "step": 543 }, { "epoch": 0.006504142804194216, "grad_norm": 9.704703330993652, "learning_rate": 2.163346613545817e-06, "loss": 0.5656, "step": 544 }, { "epoch": 0.0065160989490548665, "grad_norm": 2.666748046875, "learning_rate": 2.1673306772908366e-06, "loss": 0.6748, "step": 545 }, { "epoch": 0.006528055093915518, "grad_norm": 2.9308207035064697, "learning_rate": 2.171314741035857e-06, "loss": 0.5012, "step": 546 }, { "epoch": 0.006540011238776169, "grad_norm": 8.536152839660645, "learning_rate": 2.1752988047808768e-06, "loss": 0.7766, "step": 547 }, { "epoch": 0.00655196738363682, "grad_norm": 3.1698124408721924, "learning_rate": 2.1792828685258966e-06, "loss": 0.6341, "step": 548 }, { "epoch": 0.006563923528497472, "grad_norm": 4.566768169403076, "learning_rate": 2.1832669322709165e-06, "loss": 0.7066, "step": 549 }, { "epoch": 0.0065758796733581225, "grad_norm": 2.7324764728546143, "learning_rate": 2.1872509960159364e-06, "loss": 0.6064, "step": 550 }, { "epoch": 0.006587835818218773, "grad_norm": 2.3423569202423096, "learning_rate": 2.1912350597609563e-06, "loss": 0.5979, "step": 551 }, { "epoch": 0.006599791963079425, "grad_norm": 4.266001224517822, "learning_rate": 2.195219123505976e-06, "loss": 0.6273, "step": 552 }, { "epoch": 0.006611748107940076, "grad_norm": 3.0815951824188232, "learning_rate": 2.199203187250996e-06, "loss": 0.5564, "step": 553 }, { "epoch": 0.006623704252800727, "grad_norm": 1.7918440103530884, "learning_rate": 2.2031872509960164e-06, "loss": 0.6201, "step": 554 }, { "epoch": 0.0066356603976613785, "grad_norm": 2.0096254348754883, "learning_rate": 2.207171314741036e-06, "loss": 0.5821, "step": 555 }, { "epoch": 0.006647616542522029, "grad_norm": 7.875658988952637, "learning_rate": 2.211155378486056e-06, "loss": 0.6091, "step": 556 }, { "epoch": 0.00665957268738268, "grad_norm": 2.793358325958252, "learning_rate": 2.215139442231076e-06, "loss": 0.5799, "step": 557 }, { "epoch": 0.006671528832243331, "grad_norm": 4.2700371742248535, "learning_rate": 2.219123505976096e-06, "loss": 0.5875, "step": 558 }, { "epoch": 0.006683484977103983, "grad_norm": 2.816148281097412, "learning_rate": 2.2231075697211158e-06, "loss": 0.5633, "step": 559 }, { "epoch": 0.006695441121964634, "grad_norm": 2.091918706893921, "learning_rate": 2.2270916334661356e-06, "loss": 0.5974, "step": 560 }, { "epoch": 0.0067073972668252844, "grad_norm": 2.9063620567321777, "learning_rate": 2.2310756972111555e-06, "loss": 0.643, "step": 561 }, { "epoch": 0.006719353411685936, "grad_norm": 2.2484545707702637, "learning_rate": 2.2350597609561754e-06, "loss": 0.5937, "step": 562 }, { "epoch": 0.006731309556546587, "grad_norm": 17.929553985595703, "learning_rate": 2.2390438247011953e-06, "loss": 0.6493, "step": 563 }, { "epoch": 0.006743265701407238, "grad_norm": 2.5306971073150635, "learning_rate": 2.243027888446215e-06, "loss": 0.746, "step": 564 }, { "epoch": 0.00675522184626789, "grad_norm": 11.50333023071289, "learning_rate": 2.2470119521912355e-06, "loss": 0.6289, "step": 565 }, { "epoch": 0.0067671779911285404, "grad_norm": 4.726577281951904, "learning_rate": 2.250996015936255e-06, "loss": 0.6717, "step": 566 }, { "epoch": 0.006779134135989191, "grad_norm": 7.345578670501709, "learning_rate": 2.2549800796812752e-06, "loss": 0.4623, "step": 567 }, { "epoch": 0.006791090280849843, "grad_norm": 2.684528350830078, "learning_rate": 2.2589641434262947e-06, "loss": 0.6161, "step": 568 }, { "epoch": 0.006803046425710494, "grad_norm": 4.8548264503479, "learning_rate": 2.262948207171315e-06, "loss": 0.65, "step": 569 }, { "epoch": 0.006815002570571145, "grad_norm": 2.8866195678710938, "learning_rate": 2.266932270916335e-06, "loss": 0.6396, "step": 570 }, { "epoch": 0.006826958715431796, "grad_norm": 2.1547319889068604, "learning_rate": 2.2709163346613547e-06, "loss": 0.7015, "step": 571 }, { "epoch": 0.006838914860292447, "grad_norm": 2.5240330696105957, "learning_rate": 2.2749003984063746e-06, "loss": 0.6581, "step": 572 }, { "epoch": 0.006850871005153098, "grad_norm": 2.6745407581329346, "learning_rate": 2.2788844621513945e-06, "loss": 0.6471, "step": 573 }, { "epoch": 0.00686282715001375, "grad_norm": 3.0928378105163574, "learning_rate": 2.2828685258964144e-06, "loss": 0.5684, "step": 574 }, { "epoch": 0.006874783294874401, "grad_norm": 1.7944376468658447, "learning_rate": 2.2868525896414347e-06, "loss": 0.5593, "step": 575 }, { "epoch": 0.0068867394397350516, "grad_norm": 3.3775956630706787, "learning_rate": 2.290836653386454e-06, "loss": 0.5985, "step": 576 }, { "epoch": 0.006898695584595703, "grad_norm": 3.360578775405884, "learning_rate": 2.2948207171314745e-06, "loss": 0.5539, "step": 577 }, { "epoch": 0.006910651729456354, "grad_norm": 6.445967674255371, "learning_rate": 2.2988047808764943e-06, "loss": 0.5825, "step": 578 }, { "epoch": 0.006922607874317005, "grad_norm": 2.8052563667297363, "learning_rate": 2.3027888446215142e-06, "loss": 0.5915, "step": 579 }, { "epoch": 0.006934564019177657, "grad_norm": 3.6306819915771484, "learning_rate": 2.306772908366534e-06, "loss": 0.5639, "step": 580 }, { "epoch": 0.0069465201640383075, "grad_norm": 2.566671848297119, "learning_rate": 2.310756972111554e-06, "loss": 0.5509, "step": 581 }, { "epoch": 0.006958476308898958, "grad_norm": 1.7922649383544922, "learning_rate": 2.314741035856574e-06, "loss": 0.6013, "step": 582 }, { "epoch": 0.00697043245375961, "grad_norm": 34.18691635131836, "learning_rate": 2.3187250996015937e-06, "loss": 0.6116, "step": 583 }, { "epoch": 0.006982388598620261, "grad_norm": 2.453522205352783, "learning_rate": 2.3227091633466136e-06, "loss": 0.6358, "step": 584 }, { "epoch": 0.006994344743480912, "grad_norm": 6.983034133911133, "learning_rate": 2.3266932270916335e-06, "loss": 0.6969, "step": 585 }, { "epoch": 0.0070063008883415635, "grad_norm": 2.8727598190307617, "learning_rate": 2.3306772908366534e-06, "loss": 0.6577, "step": 586 }, { "epoch": 0.007018257033202214, "grad_norm": 2.6354141235351562, "learning_rate": 2.3346613545816737e-06, "loss": 0.6112, "step": 587 }, { "epoch": 0.007030213178062865, "grad_norm": 2.2347676753997803, "learning_rate": 2.3386454183266936e-06, "loss": 0.5725, "step": 588 }, { "epoch": 0.007042169322923517, "grad_norm": 7.682505130767822, "learning_rate": 2.3426294820717134e-06, "loss": 0.6194, "step": 589 }, { "epoch": 0.007054125467784168, "grad_norm": 2.4905824661254883, "learning_rate": 2.3466135458167333e-06, "loss": 0.5114, "step": 590 }, { "epoch": 0.007066081612644819, "grad_norm": 5.869137287139893, "learning_rate": 2.350597609561753e-06, "loss": 0.5701, "step": 591 }, { "epoch": 0.0070780377575054695, "grad_norm": 3.4987213611602783, "learning_rate": 2.354581673306773e-06, "loss": 0.5979, "step": 592 }, { "epoch": 0.007089993902366121, "grad_norm": 3.6865906715393066, "learning_rate": 2.358565737051793e-06, "loss": 0.6084, "step": 593 }, { "epoch": 0.007101950047226772, "grad_norm": 2.1118905544281006, "learning_rate": 2.362549800796813e-06, "loss": 0.575, "step": 594 }, { "epoch": 0.007113906192087423, "grad_norm": 2.926342010498047, "learning_rate": 2.3665338645418327e-06, "loss": 0.6519, "step": 595 }, { "epoch": 0.007125862336948075, "grad_norm": 2.353996753692627, "learning_rate": 2.370517928286853e-06, "loss": 0.6183, "step": 596 }, { "epoch": 0.0071378184818087255, "grad_norm": 3.181741237640381, "learning_rate": 2.3745019920318725e-06, "loss": 0.6096, "step": 597 }, { "epoch": 0.007149774626669376, "grad_norm": 1.9238253831863403, "learning_rate": 2.378486055776893e-06, "loss": 0.6482, "step": 598 }, { "epoch": 0.007161730771530028, "grad_norm": 2.5291082859039307, "learning_rate": 2.3824701195219122e-06, "loss": 0.5538, "step": 599 }, { "epoch": 0.007173686916390679, "grad_norm": 3.4158456325531006, "learning_rate": 2.3864541832669326e-06, "loss": 0.5545, "step": 600 }, { "epoch": 0.00718564306125133, "grad_norm": 3.0662682056427, "learning_rate": 2.3904382470119524e-06, "loss": 0.6897, "step": 601 }, { "epoch": 0.0071975992061119815, "grad_norm": 2.8201236724853516, "learning_rate": 2.3944223107569723e-06, "loss": 0.6057, "step": 602 }, { "epoch": 0.007209555350972632, "grad_norm": 2.2035071849823, "learning_rate": 2.398406374501992e-06, "loss": 0.63, "step": 603 }, { "epoch": 0.007221511495833283, "grad_norm": 3.0700390338897705, "learning_rate": 2.402390438247012e-06, "loss": 0.664, "step": 604 }, { "epoch": 0.007233467640693935, "grad_norm": 3.1296443939208984, "learning_rate": 2.406374501992032e-06, "loss": 0.5869, "step": 605 }, { "epoch": 0.007245423785554586, "grad_norm": 3.764727830886841, "learning_rate": 2.4103585657370523e-06, "loss": 0.5569, "step": 606 }, { "epoch": 0.007257379930415237, "grad_norm": 2.796762228012085, "learning_rate": 2.4143426294820717e-06, "loss": 0.6398, "step": 607 }, { "epoch": 0.007269336075275888, "grad_norm": 5.808300018310547, "learning_rate": 2.418326693227092e-06, "loss": 0.6628, "step": 608 }, { "epoch": 0.007281292220136539, "grad_norm": 2.535370111465454, "learning_rate": 2.422310756972112e-06, "loss": 0.542, "step": 609 }, { "epoch": 0.00729324836499719, "grad_norm": 5.474642276763916, "learning_rate": 2.4262948207171318e-06, "loss": 0.6217, "step": 610 }, { "epoch": 0.007305204509857842, "grad_norm": 2.598459243774414, "learning_rate": 2.4302788844621517e-06, "loss": 0.4962, "step": 611 }, { "epoch": 0.007317160654718493, "grad_norm": 2.152977466583252, "learning_rate": 2.4342629482071715e-06, "loss": 0.5835, "step": 612 }, { "epoch": 0.0073291167995791435, "grad_norm": 3.558764696121216, "learning_rate": 2.4382470119521914e-06, "loss": 0.6092, "step": 613 }, { "epoch": 0.007341072944439795, "grad_norm": 2.1955482959747314, "learning_rate": 2.4422310756972113e-06, "loss": 0.5362, "step": 614 }, { "epoch": 0.007353029089300446, "grad_norm": 6.157235622406006, "learning_rate": 2.446215139442231e-06, "loss": 0.6072, "step": 615 }, { "epoch": 0.007364985234161097, "grad_norm": 2.6662797927856445, "learning_rate": 2.450199203187251e-06, "loss": 0.6686, "step": 616 }, { "epoch": 0.007376941379021749, "grad_norm": 4.260572910308838, "learning_rate": 2.454183266932271e-06, "loss": 0.6293, "step": 617 }, { "epoch": 0.0073888975238823994, "grad_norm": 4.273194789886475, "learning_rate": 2.458167330677291e-06, "loss": 0.6064, "step": 618 }, { "epoch": 0.00740085366874305, "grad_norm": 3.6622347831726074, "learning_rate": 2.462151394422311e-06, "loss": 0.5638, "step": 619 }, { "epoch": 0.007412809813603702, "grad_norm": 7.218562602996826, "learning_rate": 2.466135458167331e-06, "loss": 0.5431, "step": 620 }, { "epoch": 0.007424765958464353, "grad_norm": 7.523820400238037, "learning_rate": 2.470119521912351e-06, "loss": 0.6307, "step": 621 }, { "epoch": 0.007436722103325004, "grad_norm": 2.4929580688476562, "learning_rate": 2.4741035856573708e-06, "loss": 0.5918, "step": 622 }, { "epoch": 0.007448678248185655, "grad_norm": 2.0936226844787598, "learning_rate": 2.4780876494023907e-06, "loss": 0.5457, "step": 623 }, { "epoch": 0.007460634393046306, "grad_norm": 4.11259126663208, "learning_rate": 2.4820717131474105e-06, "loss": 0.5247, "step": 624 }, { "epoch": 0.007472590537906957, "grad_norm": 3.926480531692505, "learning_rate": 2.4860557768924304e-06, "loss": 0.6491, "step": 625 }, { "epoch": 0.007484546682767608, "grad_norm": 17.332439422607422, "learning_rate": 2.4900398406374503e-06, "loss": 0.5957, "step": 626 }, { "epoch": 0.00749650282762826, "grad_norm": 5.788688659667969, "learning_rate": 2.4940239043824706e-06, "loss": 0.6328, "step": 627 }, { "epoch": 0.0075084589724889106, "grad_norm": 2.184000015258789, "learning_rate": 2.49800796812749e-06, "loss": 0.6134, "step": 628 }, { "epoch": 0.007520415117349561, "grad_norm": 5.181049346923828, "learning_rate": 2.50199203187251e-06, "loss": 0.6625, "step": 629 }, { "epoch": 0.007532371262210213, "grad_norm": 2.400646209716797, "learning_rate": 2.50597609561753e-06, "loss": 0.6134, "step": 630 }, { "epoch": 0.007544327407070864, "grad_norm": 3.1562039852142334, "learning_rate": 2.50996015936255e-06, "loss": 0.6355, "step": 631 }, { "epoch": 0.007556283551931515, "grad_norm": 4.1683759689331055, "learning_rate": 2.51394422310757e-06, "loss": 0.5592, "step": 632 }, { "epoch": 0.0075682396967921665, "grad_norm": 2.1443259716033936, "learning_rate": 2.51792828685259e-06, "loss": 0.5, "step": 633 }, { "epoch": 0.007580195841652817, "grad_norm": 1.8505313396453857, "learning_rate": 2.5219123505976093e-06, "loss": 0.5621, "step": 634 }, { "epoch": 0.007592151986513468, "grad_norm": 2.567841053009033, "learning_rate": 2.5258964143426296e-06, "loss": 0.6129, "step": 635 }, { "epoch": 0.00760410813137412, "grad_norm": 6.667324066162109, "learning_rate": 2.5298804780876495e-06, "loss": 0.5757, "step": 636 }, { "epoch": 0.007616064276234771, "grad_norm": 2.6821794509887695, "learning_rate": 2.5338645418326694e-06, "loss": 0.5145, "step": 637 }, { "epoch": 0.007628020421095422, "grad_norm": 2.4973878860473633, "learning_rate": 2.5378486055776897e-06, "loss": 0.5811, "step": 638 }, { "epoch": 0.007639976565956073, "grad_norm": 3.047931432723999, "learning_rate": 2.541832669322709e-06, "loss": 0.6058, "step": 639 }, { "epoch": 0.007651932710816724, "grad_norm": 10.458799362182617, "learning_rate": 2.545816733067729e-06, "loss": 0.5319, "step": 640 }, { "epoch": 0.007663888855677375, "grad_norm": 3.472581624984741, "learning_rate": 2.5498007968127493e-06, "loss": 0.5968, "step": 641 }, { "epoch": 0.007675845000538027, "grad_norm": 4.702916622161865, "learning_rate": 2.5537848605577692e-06, "loss": 0.5663, "step": 642 }, { "epoch": 0.007687801145398678, "grad_norm": 2.195359230041504, "learning_rate": 2.557768924302789e-06, "loss": 0.527, "step": 643 }, { "epoch": 0.0076997572902593285, "grad_norm": 8.264177322387695, "learning_rate": 2.5617529880478086e-06, "loss": 0.6892, "step": 644 }, { "epoch": 0.00771171343511998, "grad_norm": 8.789841651916504, "learning_rate": 2.565737051792829e-06, "loss": 0.5186, "step": 645 }, { "epoch": 0.007723669579980631, "grad_norm": 2.2718214988708496, "learning_rate": 2.5697211155378487e-06, "loss": 0.6505, "step": 646 }, { "epoch": 0.007735625724841282, "grad_norm": 3.472351551055908, "learning_rate": 2.5737051792828686e-06, "loss": 0.5262, "step": 647 }, { "epoch": 0.007747581869701934, "grad_norm": 3.9554030895233154, "learning_rate": 2.577689243027889e-06, "loss": 0.6317, "step": 648 }, { "epoch": 0.0077595380145625845, "grad_norm": 1.9575397968292236, "learning_rate": 2.581673306772909e-06, "loss": 0.5594, "step": 649 }, { "epoch": 0.007771494159423235, "grad_norm": 5.045785903930664, "learning_rate": 2.5856573705179283e-06, "loss": 0.6619, "step": 650 }, { "epoch": 0.007783450304283887, "grad_norm": 14.329011917114258, "learning_rate": 2.589641434262948e-06, "loss": 0.582, "step": 651 }, { "epoch": 0.007795406449144538, "grad_norm": 2.7192189693450928, "learning_rate": 2.5936254980079685e-06, "loss": 0.5945, "step": 652 }, { "epoch": 0.007807362594005189, "grad_norm": 2.8335793018341064, "learning_rate": 2.5976095617529883e-06, "loss": 0.583, "step": 653 }, { "epoch": 0.00781931873886584, "grad_norm": 2.237535238265991, "learning_rate": 2.6015936254980082e-06, "loss": 0.5992, "step": 654 }, { "epoch": 0.00783127488372649, "grad_norm": 3.38006854057312, "learning_rate": 2.605577689243028e-06, "loss": 0.6612, "step": 655 }, { "epoch": 0.007843231028587142, "grad_norm": 2.1356146335601807, "learning_rate": 2.609561752988048e-06, "loss": 0.5128, "step": 656 }, { "epoch": 0.007855187173447794, "grad_norm": 3.567044496536255, "learning_rate": 2.613545816733068e-06, "loss": 0.5974, "step": 657 }, { "epoch": 0.007867143318308444, "grad_norm": 5.012941837310791, "learning_rate": 2.617529880478088e-06, "loss": 0.5586, "step": 658 }, { "epoch": 0.007879099463169096, "grad_norm": 2.2518763542175293, "learning_rate": 2.621513944223108e-06, "loss": 0.635, "step": 659 }, { "epoch": 0.007891055608029747, "grad_norm": 2.2403931617736816, "learning_rate": 2.6254980079681275e-06, "loss": 0.6148, "step": 660 }, { "epoch": 0.007903011752890397, "grad_norm": 2.3301031589508057, "learning_rate": 2.6294820717131474e-06, "loss": 0.5238, "step": 661 }, { "epoch": 0.007914967897751049, "grad_norm": 2.7779150009155273, "learning_rate": 2.6334661354581677e-06, "loss": 0.6034, "step": 662 }, { "epoch": 0.0079269240426117, "grad_norm": 2.5036261081695557, "learning_rate": 2.6374501992031876e-06, "loss": 0.6781, "step": 663 }, { "epoch": 0.00793888018747235, "grad_norm": 2.7449331283569336, "learning_rate": 2.6414342629482074e-06, "loss": 0.6254, "step": 664 }, { "epoch": 0.007950836332333002, "grad_norm": 5.614695072174072, "learning_rate": 2.645418326693227e-06, "loss": 0.6916, "step": 665 }, { "epoch": 0.007962792477193654, "grad_norm": 3.462272882461548, "learning_rate": 2.649402390438247e-06, "loss": 0.5239, "step": 666 }, { "epoch": 0.007974748622054304, "grad_norm": 3.5175631046295166, "learning_rate": 2.653386454183267e-06, "loss": 0.5076, "step": 667 }, { "epoch": 0.007986704766914956, "grad_norm": 3.086529493331909, "learning_rate": 2.657370517928287e-06, "loss": 0.5596, "step": 668 }, { "epoch": 0.007998660911775608, "grad_norm": 2.7565324306488037, "learning_rate": 2.6613545816733073e-06, "loss": 0.555, "step": 669 }, { "epoch": 0.008010617056636258, "grad_norm": 6.311530590057373, "learning_rate": 2.6653386454183267e-06, "loss": 0.4997, "step": 670 }, { "epoch": 0.00802257320149691, "grad_norm": 4.448373317718506, "learning_rate": 2.6693227091633466e-06, "loss": 0.5859, "step": 671 }, { "epoch": 0.008034529346357561, "grad_norm": 1.9943597316741943, "learning_rate": 2.673306772908367e-06, "loss": 0.5704, "step": 672 }, { "epoch": 0.008046485491218211, "grad_norm": 3.7613258361816406, "learning_rate": 2.677290836653387e-06, "loss": 0.5949, "step": 673 }, { "epoch": 0.008058441636078863, "grad_norm": 4.395104885101318, "learning_rate": 2.6812749003984067e-06, "loss": 0.6218, "step": 674 }, { "epoch": 0.008070397780939514, "grad_norm": 3.5562353134155273, "learning_rate": 2.685258964143426e-06, "loss": 0.4797, "step": 675 }, { "epoch": 0.008082353925800164, "grad_norm": 4.075994968414307, "learning_rate": 2.6892430278884464e-06, "loss": 0.6124, "step": 676 }, { "epoch": 0.008094310070660816, "grad_norm": 4.551362991333008, "learning_rate": 2.6932270916334663e-06, "loss": 0.6159, "step": 677 }, { "epoch": 0.008106266215521468, "grad_norm": 3.9430768489837646, "learning_rate": 2.697211155378486e-06, "loss": 0.5608, "step": 678 }, { "epoch": 0.008118222360382118, "grad_norm": 4.9685235023498535, "learning_rate": 2.7011952191235065e-06, "loss": 0.5609, "step": 679 }, { "epoch": 0.00813017850524277, "grad_norm": 5.689269065856934, "learning_rate": 2.7051792828685264e-06, "loss": 0.5465, "step": 680 }, { "epoch": 0.008142134650103421, "grad_norm": 9.349745750427246, "learning_rate": 2.709163346613546e-06, "loss": 0.6439, "step": 681 }, { "epoch": 0.008154090794964071, "grad_norm": 6.163567543029785, "learning_rate": 2.7131474103585657e-06, "loss": 0.5368, "step": 682 }, { "epoch": 0.008166046939824723, "grad_norm": 2.361847162246704, "learning_rate": 2.717131474103586e-06, "loss": 0.5985, "step": 683 }, { "epoch": 0.008178003084685375, "grad_norm": 5.960029602050781, "learning_rate": 2.721115537848606e-06, "loss": 0.6232, "step": 684 }, { "epoch": 0.008189959229546025, "grad_norm": 2.998408794403076, "learning_rate": 2.7250996015936258e-06, "loss": 0.6392, "step": 685 }, { "epoch": 0.008201915374406676, "grad_norm": 2.11149001121521, "learning_rate": 2.7290836653386452e-06, "loss": 0.6016, "step": 686 }, { "epoch": 0.008213871519267328, "grad_norm": 7.101865768432617, "learning_rate": 2.7330677290836655e-06, "loss": 0.5324, "step": 687 }, { "epoch": 0.008225827664127978, "grad_norm": 2.9449687004089355, "learning_rate": 2.7370517928286854e-06, "loss": 0.5191, "step": 688 }, { "epoch": 0.00823778380898863, "grad_norm": 2.992802143096924, "learning_rate": 2.7410358565737057e-06, "loss": 0.6589, "step": 689 }, { "epoch": 0.008249739953849282, "grad_norm": 3.8117406368255615, "learning_rate": 2.7450199203187256e-06, "loss": 0.7527, "step": 690 }, { "epoch": 0.008261696098709932, "grad_norm": 4.716691493988037, "learning_rate": 2.749003984063745e-06, "loss": 0.657, "step": 691 }, { "epoch": 0.008273652243570583, "grad_norm": 4.620551109313965, "learning_rate": 2.752988047808765e-06, "loss": 0.5629, "step": 692 }, { "epoch": 0.008285608388431235, "grad_norm": 5.8595051765441895, "learning_rate": 2.7569721115537852e-06, "loss": 0.7242, "step": 693 }, { "epoch": 0.008297564533291885, "grad_norm": 1.9338246583938599, "learning_rate": 2.760956175298805e-06, "loss": 0.5289, "step": 694 }, { "epoch": 0.008309520678152537, "grad_norm": 2.014127254486084, "learning_rate": 2.764940239043825e-06, "loss": 0.6018, "step": 695 }, { "epoch": 0.008321476823013188, "grad_norm": 2.3885202407836914, "learning_rate": 2.7689243027888445e-06, "loss": 0.608, "step": 696 }, { "epoch": 0.008333432967873838, "grad_norm": 2.8061747550964355, "learning_rate": 2.7729083665338648e-06, "loss": 0.6474, "step": 697 }, { "epoch": 0.00834538911273449, "grad_norm": 3.5888564586639404, "learning_rate": 2.7768924302788847e-06, "loss": 0.6162, "step": 698 }, { "epoch": 0.008357345257595142, "grad_norm": 5.757090091705322, "learning_rate": 2.7808764940239045e-06, "loss": 0.6511, "step": 699 }, { "epoch": 0.008369301402455792, "grad_norm": 1.8314038515090942, "learning_rate": 2.784860557768925e-06, "loss": 0.6486, "step": 700 }, { "epoch": 0.008381257547316443, "grad_norm": 2.146956443786621, "learning_rate": 2.7888446215139443e-06, "loss": 0.6526, "step": 701 }, { "epoch": 0.008393213692177095, "grad_norm": 26.15460968017578, "learning_rate": 2.792828685258964e-06, "loss": 0.5054, "step": 702 }, { "epoch": 0.008405169837037745, "grad_norm": 20.61722183227539, "learning_rate": 2.796812749003984e-06, "loss": 0.614, "step": 703 }, { "epoch": 0.008417125981898397, "grad_norm": 14.169990539550781, "learning_rate": 2.8007968127490044e-06, "loss": 0.6038, "step": 704 }, { "epoch": 0.008429082126759049, "grad_norm": 11.269041061401367, "learning_rate": 2.8047808764940242e-06, "loss": 0.5777, "step": 705 }, { "epoch": 0.008441038271619699, "grad_norm": 2.0930020809173584, "learning_rate": 2.8087649402390437e-06, "loss": 0.6167, "step": 706 }, { "epoch": 0.00845299441648035, "grad_norm": 1.9310641288757324, "learning_rate": 2.812749003984064e-06, "loss": 0.6465, "step": 707 }, { "epoch": 0.008464950561341002, "grad_norm": 2.0106091499328613, "learning_rate": 2.816733067729084e-06, "loss": 0.5966, "step": 708 }, { "epoch": 0.008476906706201652, "grad_norm": 5.409741401672363, "learning_rate": 2.8207171314741038e-06, "loss": 0.5753, "step": 709 }, { "epoch": 0.008488862851062304, "grad_norm": 4.120572566986084, "learning_rate": 2.824701195219124e-06, "loss": 0.6683, "step": 710 }, { "epoch": 0.008500818995922954, "grad_norm": 3.2932417392730713, "learning_rate": 2.828685258964144e-06, "loss": 0.7403, "step": 711 }, { "epoch": 0.008512775140783605, "grad_norm": 2.8430142402648926, "learning_rate": 2.8326693227091634e-06, "loss": 0.5426, "step": 712 }, { "epoch": 0.008524731285644257, "grad_norm": 2.3712897300720215, "learning_rate": 2.8366533864541833e-06, "loss": 0.595, "step": 713 }, { "epoch": 0.008536687430504907, "grad_norm": 3.058379650115967, "learning_rate": 2.8406374501992036e-06, "loss": 0.6009, "step": 714 }, { "epoch": 0.008548643575365559, "grad_norm": 6.076507568359375, "learning_rate": 2.8446215139442235e-06, "loss": 0.589, "step": 715 }, { "epoch": 0.00856059972022621, "grad_norm": 6.367613315582275, "learning_rate": 2.8486055776892433e-06, "loss": 0.6666, "step": 716 }, { "epoch": 0.00857255586508686, "grad_norm": 2.3042752742767334, "learning_rate": 2.852589641434263e-06, "loss": 0.6307, "step": 717 }, { "epoch": 0.008584512009947512, "grad_norm": 3.6338603496551514, "learning_rate": 2.856573705179283e-06, "loss": 0.6932, "step": 718 }, { "epoch": 0.008596468154808164, "grad_norm": 4.831274032592773, "learning_rate": 2.860557768924303e-06, "loss": 0.7149, "step": 719 }, { "epoch": 0.008608424299668814, "grad_norm": 3.1764142513275146, "learning_rate": 2.864541832669323e-06, "loss": 0.5523, "step": 720 }, { "epoch": 0.008620380444529466, "grad_norm": 2.646530866622925, "learning_rate": 2.868525896414343e-06, "loss": 0.6596, "step": 721 }, { "epoch": 0.008632336589390117, "grad_norm": 5.467514514923096, "learning_rate": 2.8725099601593626e-06, "loss": 0.5879, "step": 722 }, { "epoch": 0.008644292734250767, "grad_norm": 1.7304012775421143, "learning_rate": 2.8764940239043825e-06, "loss": 0.5254, "step": 723 }, { "epoch": 0.00865624887911142, "grad_norm": 2.2873966693878174, "learning_rate": 2.880478087649403e-06, "loss": 0.6324, "step": 724 }, { "epoch": 0.00866820502397207, "grad_norm": 2.5273618698120117, "learning_rate": 2.8844621513944227e-06, "loss": 0.6232, "step": 725 }, { "epoch": 0.00868016116883272, "grad_norm": 2.569417715072632, "learning_rate": 2.8884462151394426e-06, "loss": 0.6627, "step": 726 }, { "epoch": 0.008692117313693373, "grad_norm": 6.029114246368408, "learning_rate": 2.892430278884462e-06, "loss": 0.6561, "step": 727 }, { "epoch": 0.008704073458554024, "grad_norm": 2.825162649154663, "learning_rate": 2.8964143426294823e-06, "loss": 0.5747, "step": 728 }, { "epoch": 0.008716029603414674, "grad_norm": 2.687380790710449, "learning_rate": 2.9003984063745022e-06, "loss": 0.6625, "step": 729 }, { "epoch": 0.008727985748275326, "grad_norm": 1.7377523183822632, "learning_rate": 2.904382470119522e-06, "loss": 0.5616, "step": 730 }, { "epoch": 0.008739941893135978, "grad_norm": 3.856294631958008, "learning_rate": 2.9083665338645424e-06, "loss": 0.7348, "step": 731 }, { "epoch": 0.008751898037996628, "grad_norm": 2.7162301540374756, "learning_rate": 2.912350597609562e-06, "loss": 0.6304, "step": 732 }, { "epoch": 0.00876385418285728, "grad_norm": 7.310739517211914, "learning_rate": 2.9163346613545817e-06, "loss": 0.6331, "step": 733 }, { "epoch": 0.008775810327717931, "grad_norm": 3.8171963691711426, "learning_rate": 2.9203187250996016e-06, "loss": 0.6123, "step": 734 }, { "epoch": 0.008787766472578581, "grad_norm": 3.3173775672912598, "learning_rate": 2.924302788844622e-06, "loss": 0.5975, "step": 735 }, { "epoch": 0.008799722617439233, "grad_norm": 2.9869792461395264, "learning_rate": 2.928286852589642e-06, "loss": 0.5521, "step": 736 }, { "epoch": 0.008811678762299885, "grad_norm": 1.785273551940918, "learning_rate": 2.9322709163346613e-06, "loss": 0.718, "step": 737 }, { "epoch": 0.008823634907160535, "grad_norm": 3.4841148853302, "learning_rate": 2.936254980079681e-06, "loss": 0.7118, "step": 738 }, { "epoch": 0.008835591052021186, "grad_norm": 4.080002784729004, "learning_rate": 2.9402390438247014e-06, "loss": 0.5065, "step": 739 }, { "epoch": 0.008847547196881838, "grad_norm": 3.114231824874878, "learning_rate": 2.9442231075697213e-06, "loss": 0.6377, "step": 740 }, { "epoch": 0.008859503341742488, "grad_norm": 3.142202854156494, "learning_rate": 2.9482071713147416e-06, "loss": 0.5778, "step": 741 }, { "epoch": 0.00887145948660314, "grad_norm": 3.488997220993042, "learning_rate": 2.9521912350597615e-06, "loss": 0.5488, "step": 742 }, { "epoch": 0.008883415631463791, "grad_norm": 5.549143314361572, "learning_rate": 2.956175298804781e-06, "loss": 0.5985, "step": 743 }, { "epoch": 0.008895371776324441, "grad_norm": 3.011340379714966, "learning_rate": 2.960159362549801e-06, "loss": 0.645, "step": 744 }, { "epoch": 0.008907327921185093, "grad_norm": 2.0511531829833984, "learning_rate": 2.964143426294821e-06, "loss": 0.5589, "step": 745 }, { "epoch": 0.008919284066045745, "grad_norm": 9.25084114074707, "learning_rate": 2.968127490039841e-06, "loss": 0.6814, "step": 746 }, { "epoch": 0.008931240210906395, "grad_norm": 3.3742926120758057, "learning_rate": 2.972111553784861e-06, "loss": 0.5899, "step": 747 }, { "epoch": 0.008943196355767047, "grad_norm": 3.0930027961730957, "learning_rate": 2.9760956175298804e-06, "loss": 0.5905, "step": 748 }, { "epoch": 0.008955152500627698, "grad_norm": 4.867155075073242, "learning_rate": 2.9800796812749007e-06, "loss": 0.5438, "step": 749 }, { "epoch": 0.008967108645488348, "grad_norm": 1.553344488143921, "learning_rate": 2.9840637450199206e-06, "loss": 0.6627, "step": 750 }, { "epoch": 0.008979064790349, "grad_norm": 4.3109211921691895, "learning_rate": 2.9880478087649404e-06, "loss": 0.4881, "step": 751 }, { "epoch": 0.008991020935209652, "grad_norm": 3.134003162384033, "learning_rate": 2.9920318725099607e-06, "loss": 0.5857, "step": 752 }, { "epoch": 0.009002977080070302, "grad_norm": 2.9911351203918457, "learning_rate": 2.99601593625498e-06, "loss": 0.7037, "step": 753 }, { "epoch": 0.009014933224930953, "grad_norm": 2.1079506874084473, "learning_rate": 3e-06, "loss": 0.5492, "step": 754 }, { "epoch": 0.009026889369791605, "grad_norm": 3.3136041164398193, "learning_rate": 3.00398406374502e-06, "loss": 0.6966, "step": 755 }, { "epoch": 0.009038845514652255, "grad_norm": 2.978553056716919, "learning_rate": 3.0079681274900403e-06, "loss": 0.6409, "step": 756 }, { "epoch": 0.009050801659512907, "grad_norm": 2.1096153259277344, "learning_rate": 3.01195219123506e-06, "loss": 0.6876, "step": 757 }, { "epoch": 0.009062757804373558, "grad_norm": 2.466346502304077, "learning_rate": 3.0159362549800796e-06, "loss": 0.5847, "step": 758 }, { "epoch": 0.009074713949234208, "grad_norm": 2.0518336296081543, "learning_rate": 3.0199203187251e-06, "loss": 0.655, "step": 759 }, { "epoch": 0.00908667009409486, "grad_norm": 3.2549498081207275, "learning_rate": 3.0239043824701198e-06, "loss": 0.7347, "step": 760 }, { "epoch": 0.009098626238955512, "grad_norm": 3.001386880874634, "learning_rate": 3.0278884462151397e-06, "loss": 0.6353, "step": 761 }, { "epoch": 0.009110582383816162, "grad_norm": 1.9652817249298096, "learning_rate": 3.03187250996016e-06, "loss": 0.4652, "step": 762 }, { "epoch": 0.009122538528676814, "grad_norm": 1.9268333911895752, "learning_rate": 3.0358565737051794e-06, "loss": 0.5824, "step": 763 }, { "epoch": 0.009134494673537465, "grad_norm": 5.720367908477783, "learning_rate": 3.0398406374501993e-06, "loss": 0.6122, "step": 764 }, { "epoch": 0.009146450818398115, "grad_norm": 3.376612901687622, "learning_rate": 3.043824701195219e-06, "loss": 0.6101, "step": 765 }, { "epoch": 0.009158406963258767, "grad_norm": 3.131117105484009, "learning_rate": 3.0478087649402395e-06, "loss": 0.6511, "step": 766 }, { "epoch": 0.009170363108119419, "grad_norm": 2.7709267139434814, "learning_rate": 3.0517928286852594e-06, "loss": 0.5872, "step": 767 }, { "epoch": 0.009182319252980069, "grad_norm": 2.903388023376465, "learning_rate": 3.055776892430279e-06, "loss": 0.6131, "step": 768 }, { "epoch": 0.00919427539784072, "grad_norm": 4.453618049621582, "learning_rate": 3.0597609561752987e-06, "loss": 0.6301, "step": 769 }, { "epoch": 0.009206231542701372, "grad_norm": 3.1656289100646973, "learning_rate": 3.063745019920319e-06, "loss": 0.692, "step": 770 }, { "epoch": 0.009218187687562022, "grad_norm": 6.54768180847168, "learning_rate": 3.067729083665339e-06, "loss": 0.609, "step": 771 }, { "epoch": 0.009230143832422674, "grad_norm": 2.1283278465270996, "learning_rate": 3.0717131474103588e-06, "loss": 0.5404, "step": 772 }, { "epoch": 0.009242099977283326, "grad_norm": 3.2237443923950195, "learning_rate": 3.075697211155379e-06, "loss": 0.6138, "step": 773 }, { "epoch": 0.009254056122143976, "grad_norm": 7.502298355102539, "learning_rate": 3.0796812749003985e-06, "loss": 0.5525, "step": 774 }, { "epoch": 0.009266012267004627, "grad_norm": 3.3139214515686035, "learning_rate": 3.0836653386454184e-06, "loss": 0.5761, "step": 775 }, { "epoch": 0.009277968411865279, "grad_norm": 2.2870049476623535, "learning_rate": 3.0876494023904387e-06, "loss": 0.6045, "step": 776 }, { "epoch": 0.009289924556725929, "grad_norm": 5.376852512359619, "learning_rate": 3.0916334661354586e-06, "loss": 0.584, "step": 777 }, { "epoch": 0.00930188070158658, "grad_norm": 2.4891321659088135, "learning_rate": 3.0956175298804785e-06, "loss": 0.5969, "step": 778 }, { "epoch": 0.00931383684644723, "grad_norm": 2.8757269382476807, "learning_rate": 3.099601593625498e-06, "loss": 0.5388, "step": 779 }, { "epoch": 0.009325792991307882, "grad_norm": 2.7626850605010986, "learning_rate": 3.1035856573705182e-06, "loss": 0.5366, "step": 780 }, { "epoch": 0.009337749136168534, "grad_norm": 5.705322265625, "learning_rate": 3.107569721115538e-06, "loss": 0.6538, "step": 781 }, { "epoch": 0.009349705281029184, "grad_norm": 3.2191925048828125, "learning_rate": 3.111553784860558e-06, "loss": 0.6028, "step": 782 }, { "epoch": 0.009361661425889836, "grad_norm": 4.821744441986084, "learning_rate": 3.1155378486055783e-06, "loss": 0.5503, "step": 783 }, { "epoch": 0.009373617570750488, "grad_norm": 2.0268971920013428, "learning_rate": 3.1195219123505978e-06, "loss": 0.5964, "step": 784 }, { "epoch": 0.009385573715611138, "grad_norm": 1.8132292032241821, "learning_rate": 3.1235059760956176e-06, "loss": 0.6311, "step": 785 }, { "epoch": 0.00939752986047179, "grad_norm": 2.4323508739471436, "learning_rate": 3.1274900398406375e-06, "loss": 0.5437, "step": 786 }, { "epoch": 0.009409486005332441, "grad_norm": 1.7992045879364014, "learning_rate": 3.131474103585658e-06, "loss": 0.6247, "step": 787 }, { "epoch": 0.009421442150193091, "grad_norm": 2.4685096740722656, "learning_rate": 3.1354581673306777e-06, "loss": 0.697, "step": 788 }, { "epoch": 0.009433398295053743, "grad_norm": 7.6651458740234375, "learning_rate": 3.139442231075697e-06, "loss": 0.5605, "step": 789 }, { "epoch": 0.009445354439914394, "grad_norm": 2.237133502960205, "learning_rate": 3.1434262948207175e-06, "loss": 0.5741, "step": 790 }, { "epoch": 0.009457310584775044, "grad_norm": 3.4561078548431396, "learning_rate": 3.1474103585657373e-06, "loss": 0.6517, "step": 791 }, { "epoch": 0.009469266729635696, "grad_norm": 1.9379706382751465, "learning_rate": 3.1513944223107572e-06, "loss": 0.5706, "step": 792 }, { "epoch": 0.009481222874496348, "grad_norm": 2.2410409450531006, "learning_rate": 3.1553784860557775e-06, "loss": 0.6334, "step": 793 }, { "epoch": 0.009493179019356998, "grad_norm": 2.8185179233551025, "learning_rate": 3.159362549800797e-06, "loss": 0.5973, "step": 794 }, { "epoch": 0.00950513516421765, "grad_norm": 5.916341304779053, "learning_rate": 3.163346613545817e-06, "loss": 0.525, "step": 795 }, { "epoch": 0.009517091309078301, "grad_norm": 2.0439414978027344, "learning_rate": 3.1673306772908368e-06, "loss": 0.5323, "step": 796 }, { "epoch": 0.009529047453938951, "grad_norm": 4.355063438415527, "learning_rate": 3.171314741035857e-06, "loss": 0.6917, "step": 797 }, { "epoch": 0.009541003598799603, "grad_norm": 4.568106174468994, "learning_rate": 3.175298804780877e-06, "loss": 0.6751, "step": 798 }, { "epoch": 0.009552959743660255, "grad_norm": 2.715838670730591, "learning_rate": 3.1792828685258964e-06, "loss": 0.571, "step": 799 }, { "epoch": 0.009564915888520905, "grad_norm": 84.95951080322266, "learning_rate": 3.1832669322709163e-06, "loss": 0.6352, "step": 800 }, { "epoch": 0.009576872033381556, "grad_norm": 1.59367036819458, "learning_rate": 3.1872509960159366e-06, "loss": 0.6356, "step": 801 }, { "epoch": 0.009588828178242208, "grad_norm": 3.053346872329712, "learning_rate": 3.1912350597609565e-06, "loss": 0.5438, "step": 802 }, { "epoch": 0.009600784323102858, "grad_norm": 4.631146430969238, "learning_rate": 3.1952191235059763e-06, "loss": 0.5083, "step": 803 }, { "epoch": 0.00961274046796351, "grad_norm": 4.14524507522583, "learning_rate": 3.1992031872509966e-06, "loss": 0.6534, "step": 804 }, { "epoch": 0.009624696612824162, "grad_norm": 2.639374256134033, "learning_rate": 3.203187250996016e-06, "loss": 0.7023, "step": 805 }, { "epoch": 0.009636652757684811, "grad_norm": 7.0635223388671875, "learning_rate": 3.207171314741036e-06, "loss": 0.5843, "step": 806 }, { "epoch": 0.009648608902545463, "grad_norm": 3.5860469341278076, "learning_rate": 3.2111553784860563e-06, "loss": 0.5789, "step": 807 }, { "epoch": 0.009660565047406115, "grad_norm": 2.1966910362243652, "learning_rate": 3.215139442231076e-06, "loss": 0.6655, "step": 808 }, { "epoch": 0.009672521192266765, "grad_norm": 2.9081287384033203, "learning_rate": 3.219123505976096e-06, "loss": 0.6385, "step": 809 }, { "epoch": 0.009684477337127417, "grad_norm": 3.203204393386841, "learning_rate": 3.2231075697211155e-06, "loss": 0.6331, "step": 810 }, { "epoch": 0.009696433481988068, "grad_norm": 3.2124452590942383, "learning_rate": 3.227091633466136e-06, "loss": 0.5823, "step": 811 }, { "epoch": 0.009708389626848718, "grad_norm": 17.98335838317871, "learning_rate": 3.2310756972111557e-06, "loss": 0.637, "step": 812 }, { "epoch": 0.00972034577170937, "grad_norm": 3.7247121334075928, "learning_rate": 3.2350597609561756e-06, "loss": 0.6193, "step": 813 }, { "epoch": 0.009732301916570022, "grad_norm": 8.30686092376709, "learning_rate": 3.239043824701196e-06, "loss": 0.5676, "step": 814 }, { "epoch": 0.009744258061430672, "grad_norm": 2.8532869815826416, "learning_rate": 3.2430278884462153e-06, "loss": 0.6366, "step": 815 }, { "epoch": 0.009756214206291323, "grad_norm": 2.858046531677246, "learning_rate": 3.247011952191235e-06, "loss": 0.5819, "step": 816 }, { "epoch": 0.009768170351151975, "grad_norm": 3.1802966594696045, "learning_rate": 3.250996015936255e-06, "loss": 0.6587, "step": 817 }, { "epoch": 0.009780126496012625, "grad_norm": 13.058235168457031, "learning_rate": 3.2549800796812754e-06, "loss": 0.6194, "step": 818 }, { "epoch": 0.009792082640873277, "grad_norm": 3.139280319213867, "learning_rate": 3.2589641434262953e-06, "loss": 0.645, "step": 819 }, { "epoch": 0.009804038785733929, "grad_norm": 13.168118476867676, "learning_rate": 3.2629482071713147e-06, "loss": 0.6665, "step": 820 }, { "epoch": 0.009815994930594579, "grad_norm": 3.3194739818573, "learning_rate": 3.2669322709163346e-06, "loss": 0.5283, "step": 821 }, { "epoch": 0.00982795107545523, "grad_norm": 2.7571921348571777, "learning_rate": 3.270916334661355e-06, "loss": 0.4817, "step": 822 }, { "epoch": 0.009839907220315882, "grad_norm": 2.809680461883545, "learning_rate": 3.274900398406375e-06, "loss": 0.6706, "step": 823 }, { "epoch": 0.009851863365176532, "grad_norm": 2.106907367706299, "learning_rate": 3.278884462151395e-06, "loss": 0.6272, "step": 824 }, { "epoch": 0.009863819510037184, "grad_norm": 2.750291347503662, "learning_rate": 3.2828685258964146e-06, "loss": 0.6231, "step": 825 }, { "epoch": 0.009875775654897835, "grad_norm": 2.4959239959716797, "learning_rate": 3.2868525896414344e-06, "loss": 0.7048, "step": 826 }, { "epoch": 0.009887731799758485, "grad_norm": 2.2633445262908936, "learning_rate": 3.2908366533864543e-06, "loss": 0.5731, "step": 827 }, { "epoch": 0.009899687944619137, "grad_norm": 2.2871909141540527, "learning_rate": 3.2948207171314746e-06, "loss": 0.6147, "step": 828 }, { "epoch": 0.009911644089479789, "grad_norm": 4.553147792816162, "learning_rate": 3.2988047808764945e-06, "loss": 0.5511, "step": 829 }, { "epoch": 0.009923600234340439, "grad_norm": 5.879729747772217, "learning_rate": 3.302788844621514e-06, "loss": 0.6082, "step": 830 }, { "epoch": 0.00993555637920109, "grad_norm": 1.735573172569275, "learning_rate": 3.306772908366534e-06, "loss": 0.5664, "step": 831 }, { "epoch": 0.009947512524061742, "grad_norm": 2.569685459136963, "learning_rate": 3.310756972111554e-06, "loss": 0.4917, "step": 832 }, { "epoch": 0.009959468668922392, "grad_norm": 4.865814685821533, "learning_rate": 3.314741035856574e-06, "loss": 0.576, "step": 833 }, { "epoch": 0.009971424813783044, "grad_norm": 3.881535768508911, "learning_rate": 3.318725099601594e-06, "loss": 0.5981, "step": 834 }, { "epoch": 0.009983380958643696, "grad_norm": 3.489556312561035, "learning_rate": 3.322709163346614e-06, "loss": 0.6748, "step": 835 }, { "epoch": 0.009995337103504346, "grad_norm": 5.280672073364258, "learning_rate": 3.3266932270916337e-06, "loss": 0.6418, "step": 836 }, { "epoch": 0.010007293248364997, "grad_norm": 2.654426097869873, "learning_rate": 3.3306772908366535e-06, "loss": 0.7072, "step": 837 }, { "epoch": 0.010019249393225649, "grad_norm": 6.188748836517334, "learning_rate": 3.3346613545816734e-06, "loss": 0.5603, "step": 838 }, { "epoch": 0.010031205538086299, "grad_norm": 2.7148702144622803, "learning_rate": 3.3386454183266937e-06, "loss": 0.5299, "step": 839 }, { "epoch": 0.01004316168294695, "grad_norm": 5.974229335784912, "learning_rate": 3.3426294820717136e-06, "loss": 0.6069, "step": 840 }, { "epoch": 0.010055117827807603, "grad_norm": 1.8519331216812134, "learning_rate": 3.346613545816733e-06, "loss": 0.5175, "step": 841 }, { "epoch": 0.010067073972668253, "grad_norm": 2.1145401000976562, "learning_rate": 3.3505976095617534e-06, "loss": 0.6063, "step": 842 }, { "epoch": 0.010079030117528904, "grad_norm": 2.136669158935547, "learning_rate": 3.3545816733067733e-06, "loss": 0.6726, "step": 843 }, { "epoch": 0.010090986262389556, "grad_norm": 2.6276919841766357, "learning_rate": 3.358565737051793e-06, "loss": 0.6145, "step": 844 }, { "epoch": 0.010102942407250206, "grad_norm": 2.5445220470428467, "learning_rate": 3.3625498007968134e-06, "loss": 0.563, "step": 845 }, { "epoch": 0.010114898552110858, "grad_norm": 11.301918029785156, "learning_rate": 3.366533864541833e-06, "loss": 0.6127, "step": 846 }, { "epoch": 0.010126854696971508, "grad_norm": 2.969223976135254, "learning_rate": 3.3705179282868528e-06, "loss": 0.623, "step": 847 }, { "epoch": 0.01013881084183216, "grad_norm": 5.278745651245117, "learning_rate": 3.3745019920318727e-06, "loss": 0.6119, "step": 848 }, { "epoch": 0.010150766986692811, "grad_norm": 4.360371112823486, "learning_rate": 3.378486055776893e-06, "loss": 0.6178, "step": 849 }, { "epoch": 0.010162723131553461, "grad_norm": 3.2826409339904785, "learning_rate": 3.382470119521913e-06, "loss": 0.7183, "step": 850 }, { "epoch": 0.010174679276414113, "grad_norm": 3.4957005977630615, "learning_rate": 3.3864541832669323e-06, "loss": 0.6375, "step": 851 }, { "epoch": 0.010186635421274765, "grad_norm": 2.281708002090454, "learning_rate": 3.390438247011952e-06, "loss": 0.6076, "step": 852 }, { "epoch": 0.010198591566135414, "grad_norm": 3.9331281185150146, "learning_rate": 3.3944223107569725e-06, "loss": 0.5906, "step": 853 }, { "epoch": 0.010210547710996066, "grad_norm": 3.9594078063964844, "learning_rate": 3.3984063745019924e-06, "loss": 0.6519, "step": 854 }, { "epoch": 0.010222503855856718, "grad_norm": 2.3019888401031494, "learning_rate": 3.4023904382470122e-06, "loss": 0.5954, "step": 855 }, { "epoch": 0.010234460000717368, "grad_norm": 1.9162731170654297, "learning_rate": 3.4063745019920317e-06, "loss": 0.586, "step": 856 }, { "epoch": 0.01024641614557802, "grad_norm": 2.537925958633423, "learning_rate": 3.410358565737052e-06, "loss": 0.5431, "step": 857 }, { "epoch": 0.010258372290438671, "grad_norm": 2.002495765686035, "learning_rate": 3.414342629482072e-06, "loss": 0.5874, "step": 858 }, { "epoch": 0.010270328435299321, "grad_norm": 2.452537775039673, "learning_rate": 3.418326693227092e-06, "loss": 0.5679, "step": 859 }, { "epoch": 0.010282284580159973, "grad_norm": 3.5125083923339844, "learning_rate": 3.422310756972112e-06, "loss": 0.5528, "step": 860 }, { "epoch": 0.010294240725020625, "grad_norm": 5.289725303649902, "learning_rate": 3.4262948207171315e-06, "loss": 0.5924, "step": 861 }, { "epoch": 0.010306196869881275, "grad_norm": 3.782355308532715, "learning_rate": 3.4302788844621514e-06, "loss": 0.6606, "step": 862 }, { "epoch": 0.010318153014741926, "grad_norm": 2.8694748878479004, "learning_rate": 3.4342629482071717e-06, "loss": 0.6517, "step": 863 }, { "epoch": 0.010330109159602578, "grad_norm": 2.1115686893463135, "learning_rate": 3.4382470119521916e-06, "loss": 0.6059, "step": 864 }, { "epoch": 0.010342065304463228, "grad_norm": 6.521465301513672, "learning_rate": 3.4422310756972115e-06, "loss": 0.5947, "step": 865 }, { "epoch": 0.01035402144932388, "grad_norm": 4.284839630126953, "learning_rate": 3.4462151394422318e-06, "loss": 0.6417, "step": 866 }, { "epoch": 0.010365977594184532, "grad_norm": 3.247042655944824, "learning_rate": 3.4501992031872512e-06, "loss": 0.5927, "step": 867 }, { "epoch": 0.010377933739045182, "grad_norm": 2.0503172874450684, "learning_rate": 3.454183266932271e-06, "loss": 0.6498, "step": 868 }, { "epoch": 0.010389889883905833, "grad_norm": 8.487083435058594, "learning_rate": 3.458167330677291e-06, "loss": 0.6641, "step": 869 }, { "epoch": 0.010401846028766485, "grad_norm": 2.2572884559631348, "learning_rate": 3.4621513944223113e-06, "loss": 0.576, "step": 870 }, { "epoch": 0.010413802173627135, "grad_norm": 3.791743278503418, "learning_rate": 3.466135458167331e-06, "loss": 0.5378, "step": 871 }, { "epoch": 0.010425758318487787, "grad_norm": 1.886162519454956, "learning_rate": 3.4701195219123506e-06, "loss": 0.5275, "step": 872 }, { "epoch": 0.010437714463348438, "grad_norm": 1.7970168590545654, "learning_rate": 3.4741035856573705e-06, "loss": 0.5377, "step": 873 }, { "epoch": 0.010449670608209088, "grad_norm": 3.0362071990966797, "learning_rate": 3.478087649402391e-06, "loss": 0.6948, "step": 874 }, { "epoch": 0.01046162675306974, "grad_norm": 2.6510584354400635, "learning_rate": 3.4820717131474107e-06, "loss": 0.5964, "step": 875 }, { "epoch": 0.010473582897930392, "grad_norm": 7.3050642013549805, "learning_rate": 3.486055776892431e-06, "loss": 0.622, "step": 876 }, { "epoch": 0.010485539042791042, "grad_norm": 4.341336250305176, "learning_rate": 3.4900398406374505e-06, "loss": 0.7102, "step": 877 }, { "epoch": 0.010497495187651694, "grad_norm": 2.6530401706695557, "learning_rate": 3.4940239043824703e-06, "loss": 0.6036, "step": 878 }, { "epoch": 0.010509451332512345, "grad_norm": 2.263241767883301, "learning_rate": 3.4980079681274902e-06, "loss": 0.5492, "step": 879 }, { "epoch": 0.010521407477372995, "grad_norm": 1.9562212228775024, "learning_rate": 3.5019920318725105e-06, "loss": 0.6327, "step": 880 }, { "epoch": 0.010533363622233647, "grad_norm": 2.558150053024292, "learning_rate": 3.5059760956175304e-06, "loss": 0.5947, "step": 881 }, { "epoch": 0.010545319767094299, "grad_norm": 2.3027660846710205, "learning_rate": 3.50996015936255e-06, "loss": 0.6513, "step": 882 }, { "epoch": 0.010557275911954949, "grad_norm": 26.967342376708984, "learning_rate": 3.5139442231075697e-06, "loss": 0.6394, "step": 883 }, { "epoch": 0.0105692320568156, "grad_norm": 2.6922333240509033, "learning_rate": 3.51792828685259e-06, "loss": 0.6377, "step": 884 }, { "epoch": 0.010581188201676252, "grad_norm": 2.756223201751709, "learning_rate": 3.52191235059761e-06, "loss": 0.5912, "step": 885 }, { "epoch": 0.010593144346536902, "grad_norm": 3.0659713745117188, "learning_rate": 3.52589641434263e-06, "loss": 0.6566, "step": 886 }, { "epoch": 0.010605100491397554, "grad_norm": 1.846001386642456, "learning_rate": 3.5298804780876493e-06, "loss": 0.6194, "step": 887 }, { "epoch": 0.010617056636258206, "grad_norm": 2.553508996963501, "learning_rate": 3.5338645418326696e-06, "loss": 0.595, "step": 888 }, { "epoch": 0.010629012781118856, "grad_norm": 2.560537099838257, "learning_rate": 3.5378486055776894e-06, "loss": 0.6001, "step": 889 }, { "epoch": 0.010640968925979507, "grad_norm": 3.7705397605895996, "learning_rate": 3.5418326693227093e-06, "loss": 0.6412, "step": 890 }, { "epoch": 0.010652925070840159, "grad_norm": 4.201830863952637, "learning_rate": 3.5458167330677296e-06, "loss": 0.5685, "step": 891 }, { "epoch": 0.010664881215700809, "grad_norm": 2.2653496265411377, "learning_rate": 3.549800796812749e-06, "loss": 0.5964, "step": 892 }, { "epoch": 0.01067683736056146, "grad_norm": 2.355717897415161, "learning_rate": 3.553784860557769e-06, "loss": 0.6402, "step": 893 }, { "epoch": 0.010688793505422112, "grad_norm": 2.9453208446502686, "learning_rate": 3.5577689243027893e-06, "loss": 0.6056, "step": 894 }, { "epoch": 0.010700749650282762, "grad_norm": 1.8498308658599854, "learning_rate": 3.561752988047809e-06, "loss": 0.7217, "step": 895 }, { "epoch": 0.010712705795143414, "grad_norm": 3.4790773391723633, "learning_rate": 3.565737051792829e-06, "loss": 0.5602, "step": 896 }, { "epoch": 0.010724661940004066, "grad_norm": 3.5169501304626465, "learning_rate": 3.5697211155378493e-06, "loss": 0.648, "step": 897 }, { "epoch": 0.010736618084864716, "grad_norm": 3.5500078201293945, "learning_rate": 3.573705179282869e-06, "loss": 0.6224, "step": 898 }, { "epoch": 0.010748574229725368, "grad_norm": 4.1416916847229, "learning_rate": 3.5776892430278887e-06, "loss": 0.6684, "step": 899 }, { "epoch": 0.01076053037458602, "grad_norm": 2.1082730293273926, "learning_rate": 3.5816733067729086e-06, "loss": 0.5331, "step": 900 }, { "epoch": 0.01077248651944667, "grad_norm": 5.613128662109375, "learning_rate": 3.585657370517929e-06, "loss": 0.7004, "step": 901 }, { "epoch": 0.010784442664307321, "grad_norm": 11.988324165344238, "learning_rate": 3.5896414342629487e-06, "loss": 0.6097, "step": 902 }, { "epoch": 0.010796398809167973, "grad_norm": 3.3382434844970703, "learning_rate": 3.593625498007968e-06, "loss": 0.5895, "step": 903 }, { "epoch": 0.010808354954028623, "grad_norm": 4.607828617095947, "learning_rate": 3.597609561752988e-06, "loss": 0.7454, "step": 904 }, { "epoch": 0.010820311098889274, "grad_norm": 5.347064971923828, "learning_rate": 3.6015936254980084e-06, "loss": 0.546, "step": 905 }, { "epoch": 0.010832267243749926, "grad_norm": 10.6771879196167, "learning_rate": 3.6055776892430283e-06, "loss": 0.6115, "step": 906 }, { "epoch": 0.010844223388610576, "grad_norm": 2.9641239643096924, "learning_rate": 3.609561752988048e-06, "loss": 0.6812, "step": 907 }, { "epoch": 0.010856179533471228, "grad_norm": 3.6358847618103027, "learning_rate": 3.613545816733068e-06, "loss": 0.6107, "step": 908 }, { "epoch": 0.01086813567833188, "grad_norm": 2.751537561416626, "learning_rate": 3.617529880478088e-06, "loss": 0.5698, "step": 909 }, { "epoch": 0.01088009182319253, "grad_norm": 2.3409242630004883, "learning_rate": 3.6215139442231078e-06, "loss": 0.685, "step": 910 }, { "epoch": 0.010892047968053181, "grad_norm": 2.3902812004089355, "learning_rate": 3.625498007968128e-06, "loss": 0.5073, "step": 911 }, { "epoch": 0.010904004112913833, "grad_norm": 2.580371379852295, "learning_rate": 3.629482071713148e-06, "loss": 0.5481, "step": 912 }, { "epoch": 0.010915960257774483, "grad_norm": 2.2168586254119873, "learning_rate": 3.6334661354581674e-06, "loss": 0.6085, "step": 913 }, { "epoch": 0.010927916402635135, "grad_norm": 1.9476962089538574, "learning_rate": 3.6374501992031873e-06, "loss": 0.598, "step": 914 }, { "epoch": 0.010939872547495785, "grad_norm": 2.7324955463409424, "learning_rate": 3.6414342629482076e-06, "loss": 0.5976, "step": 915 }, { "epoch": 0.010951828692356436, "grad_norm": 2.2498486042022705, "learning_rate": 3.6454183266932275e-06, "loss": 0.6125, "step": 916 }, { "epoch": 0.010963784837217088, "grad_norm": 7.4951019287109375, "learning_rate": 3.6494023904382474e-06, "loss": 0.6416, "step": 917 }, { "epoch": 0.010975740982077738, "grad_norm": 8.924098014831543, "learning_rate": 3.653386454183267e-06, "loss": 0.6342, "step": 918 }, { "epoch": 0.01098769712693839, "grad_norm": 24.56004524230957, "learning_rate": 3.657370517928287e-06, "loss": 0.6021, "step": 919 }, { "epoch": 0.010999653271799041, "grad_norm": 4.302426338195801, "learning_rate": 3.661354581673307e-06, "loss": 0.6218, "step": 920 }, { "epoch": 0.011011609416659691, "grad_norm": 5.646909236907959, "learning_rate": 3.665338645418327e-06, "loss": 0.6104, "step": 921 }, { "epoch": 0.011023565561520343, "grad_norm": 6.207045555114746, "learning_rate": 3.669322709163347e-06, "loss": 0.6106, "step": 922 }, { "epoch": 0.011035521706380995, "grad_norm": 3.4340102672576904, "learning_rate": 3.6733067729083667e-06, "loss": 0.5555, "step": 923 }, { "epoch": 0.011047477851241645, "grad_norm": 2.345381498336792, "learning_rate": 3.6772908366533865e-06, "loss": 0.5297, "step": 924 }, { "epoch": 0.011059433996102297, "grad_norm": 2.268197536468506, "learning_rate": 3.6812749003984064e-06, "loss": 0.533, "step": 925 }, { "epoch": 0.011071390140962948, "grad_norm": 3.158277988433838, "learning_rate": 3.6852589641434267e-06, "loss": 0.6676, "step": 926 }, { "epoch": 0.011083346285823598, "grad_norm": 3.160508871078491, "learning_rate": 3.6892430278884466e-06, "loss": 0.6046, "step": 927 }, { "epoch": 0.01109530243068425, "grad_norm": 2.4299142360687256, "learning_rate": 3.693227091633467e-06, "loss": 0.6642, "step": 928 }, { "epoch": 0.011107258575544902, "grad_norm": 1.8340682983398438, "learning_rate": 3.6972111553784864e-06, "loss": 0.6127, "step": 929 }, { "epoch": 0.011119214720405552, "grad_norm": 2.5088629722595215, "learning_rate": 3.7011952191235062e-06, "loss": 0.5889, "step": 930 }, { "epoch": 0.011131170865266203, "grad_norm": 3.8708715438842773, "learning_rate": 3.705179282868526e-06, "loss": 0.6018, "step": 931 }, { "epoch": 0.011143127010126855, "grad_norm": 2.4244883060455322, "learning_rate": 3.7091633466135464e-06, "loss": 0.564, "step": 932 }, { "epoch": 0.011155083154987505, "grad_norm": 5.095261573791504, "learning_rate": 3.7131474103585663e-06, "loss": 0.5695, "step": 933 }, { "epoch": 0.011167039299848157, "grad_norm": 2.7075083255767822, "learning_rate": 3.7171314741035858e-06, "loss": 0.5437, "step": 934 }, { "epoch": 0.011178995444708809, "grad_norm": 2.5026166439056396, "learning_rate": 3.7211155378486056e-06, "loss": 0.6178, "step": 935 }, { "epoch": 0.011190951589569459, "grad_norm": 2.1219987869262695, "learning_rate": 3.725099601593626e-06, "loss": 0.5676, "step": 936 }, { "epoch": 0.01120290773443011, "grad_norm": 5.051114082336426, "learning_rate": 3.729083665338646e-06, "loss": 0.6155, "step": 937 }, { "epoch": 0.011214863879290762, "grad_norm": 2.12709641456604, "learning_rate": 3.7330677290836657e-06, "loss": 0.5439, "step": 938 }, { "epoch": 0.011226820024151412, "grad_norm": 3.3887994289398193, "learning_rate": 3.737051792828685e-06, "loss": 0.6473, "step": 939 }, { "epoch": 0.011238776169012064, "grad_norm": 3.1317696571350098, "learning_rate": 3.7410358565737055e-06, "loss": 0.5159, "step": 940 }, { "epoch": 0.011250732313872715, "grad_norm": 2.429022789001465, "learning_rate": 3.7450199203187254e-06, "loss": 0.5655, "step": 941 }, { "epoch": 0.011262688458733365, "grad_norm": 5.263227462768555, "learning_rate": 3.7490039840637452e-06, "loss": 0.7357, "step": 942 }, { "epoch": 0.011274644603594017, "grad_norm": 4.630619049072266, "learning_rate": 3.7529880478087655e-06, "loss": 0.6964, "step": 943 }, { "epoch": 0.011286600748454669, "grad_norm": 2.8061726093292236, "learning_rate": 3.756972111553785e-06, "loss": 0.6107, "step": 944 }, { "epoch": 0.011298556893315319, "grad_norm": 7.5787353515625, "learning_rate": 3.760956175298805e-06, "loss": 0.5627, "step": 945 }, { "epoch": 0.01131051303817597, "grad_norm": 2.0542657375335693, "learning_rate": 3.764940239043825e-06, "loss": 0.5504, "step": 946 }, { "epoch": 0.011322469183036622, "grad_norm": 6.042245388031006, "learning_rate": 3.768924302788845e-06, "loss": 0.6323, "step": 947 }, { "epoch": 0.011334425327897272, "grad_norm": 2.257362127304077, "learning_rate": 3.772908366533865e-06, "loss": 0.5657, "step": 948 }, { "epoch": 0.011346381472757924, "grad_norm": 2.7010514736175537, "learning_rate": 3.7768924302788844e-06, "loss": 0.6077, "step": 949 }, { "epoch": 0.011358337617618576, "grad_norm": 5.845555782318115, "learning_rate": 3.7808764940239047e-06, "loss": 0.6127, "step": 950 }, { "epoch": 0.011370293762479226, "grad_norm": 2.4810738563537598, "learning_rate": 3.7848605577689246e-06, "loss": 0.5521, "step": 951 }, { "epoch": 0.011382249907339877, "grad_norm": 4.88818883895874, "learning_rate": 3.7888446215139445e-06, "loss": 0.524, "step": 952 }, { "epoch": 0.011394206052200529, "grad_norm": 3.2904212474823, "learning_rate": 3.7928286852589648e-06, "loss": 0.6383, "step": 953 }, { "epoch": 0.011406162197061179, "grad_norm": 5.3746843338012695, "learning_rate": 3.7968127490039842e-06, "loss": 0.5871, "step": 954 }, { "epoch": 0.01141811834192183, "grad_norm": 2.927089214324951, "learning_rate": 3.800796812749004e-06, "loss": 0.5975, "step": 955 }, { "epoch": 0.011430074486782483, "grad_norm": 2.0333197116851807, "learning_rate": 3.804780876494024e-06, "loss": 0.5627, "step": 956 }, { "epoch": 0.011442030631643132, "grad_norm": 4.667473793029785, "learning_rate": 3.8087649402390443e-06, "loss": 0.6245, "step": 957 }, { "epoch": 0.011453986776503784, "grad_norm": 2.5350146293640137, "learning_rate": 3.812749003984064e-06, "loss": 0.6697, "step": 958 }, { "epoch": 0.011465942921364436, "grad_norm": 2.745485782623291, "learning_rate": 3.8167330677290845e-06, "loss": 0.6872, "step": 959 }, { "epoch": 0.011477899066225086, "grad_norm": 3.142338752746582, "learning_rate": 3.820717131474104e-06, "loss": 0.6495, "step": 960 }, { "epoch": 0.011489855211085738, "grad_norm": 1.7858214378356934, "learning_rate": 3.824701195219123e-06, "loss": 0.5854, "step": 961 }, { "epoch": 0.01150181135594639, "grad_norm": 2.743682384490967, "learning_rate": 3.828685258964144e-06, "loss": 0.5602, "step": 962 }, { "epoch": 0.01151376750080704, "grad_norm": 4.286677360534668, "learning_rate": 3.832669322709164e-06, "loss": 0.66, "step": 963 }, { "epoch": 0.011525723645667691, "grad_norm": 3.228344440460205, "learning_rate": 3.8366533864541834e-06, "loss": 0.6809, "step": 964 }, { "epoch": 0.011537679790528343, "grad_norm": 5.438412666320801, "learning_rate": 3.840637450199203e-06, "loss": 0.6661, "step": 965 }, { "epoch": 0.011549635935388993, "grad_norm": 2.525089979171753, "learning_rate": 3.844621513944223e-06, "loss": 0.6067, "step": 966 }, { "epoch": 0.011561592080249644, "grad_norm": 3.8954508304595947, "learning_rate": 3.8486055776892435e-06, "loss": 0.6273, "step": 967 }, { "epoch": 0.011573548225110296, "grad_norm": 3.7138235569000244, "learning_rate": 3.852589641434264e-06, "loss": 0.6161, "step": 968 }, { "epoch": 0.011585504369970946, "grad_norm": 2.2086329460144043, "learning_rate": 3.856573705179283e-06, "loss": 0.6196, "step": 969 }, { "epoch": 0.011597460514831598, "grad_norm": 4.910168170928955, "learning_rate": 3.860557768924303e-06, "loss": 0.6372, "step": 970 }, { "epoch": 0.01160941665969225, "grad_norm": 3.0504872798919678, "learning_rate": 3.864541832669323e-06, "loss": 0.6708, "step": 971 }, { "epoch": 0.0116213728045529, "grad_norm": 2.8260111808776855, "learning_rate": 3.868525896414343e-06, "loss": 0.6287, "step": 972 }, { "epoch": 0.011633328949413551, "grad_norm": 2.6078238487243652, "learning_rate": 3.872509960159363e-06, "loss": 0.5826, "step": 973 }, { "epoch": 0.011645285094274203, "grad_norm": 4.870280742645264, "learning_rate": 3.876494023904383e-06, "loss": 0.5923, "step": 974 }, { "epoch": 0.011657241239134853, "grad_norm": 11.153132438659668, "learning_rate": 3.8804780876494026e-06, "loss": 0.7125, "step": 975 }, { "epoch": 0.011669197383995505, "grad_norm": 2.2903928756713867, "learning_rate": 3.884462151394423e-06, "loss": 0.6313, "step": 976 }, { "epoch": 0.011681153528856156, "grad_norm": 2.9493753910064697, "learning_rate": 3.888446215139442e-06, "loss": 0.5889, "step": 977 }, { "epoch": 0.011693109673716806, "grad_norm": 2.5555520057678223, "learning_rate": 3.892430278884463e-06, "loss": 0.6699, "step": 978 }, { "epoch": 0.011705065818577458, "grad_norm": 2.3673479557037354, "learning_rate": 3.896414342629483e-06, "loss": 0.4779, "step": 979 }, { "epoch": 0.01171702196343811, "grad_norm": 2.469217300415039, "learning_rate": 3.900398406374502e-06, "loss": 0.6229, "step": 980 }, { "epoch": 0.01172897810829876, "grad_norm": 29.14510154724121, "learning_rate": 3.904382470119522e-06, "loss": 0.6586, "step": 981 }, { "epoch": 0.011740934253159412, "grad_norm": 7.692104339599609, "learning_rate": 3.908366533864542e-06, "loss": 0.6358, "step": 982 }, { "epoch": 0.011752890398020062, "grad_norm": 3.5953385829925537, "learning_rate": 3.9123505976095624e-06, "loss": 0.6928, "step": 983 }, { "epoch": 0.011764846542880713, "grad_norm": 2.038965940475464, "learning_rate": 3.916334661354582e-06, "loss": 0.7147, "step": 984 }, { "epoch": 0.011776802687741365, "grad_norm": 2.5072576999664307, "learning_rate": 3.920318725099601e-06, "loss": 0.5707, "step": 985 }, { "epoch": 0.011788758832602015, "grad_norm": 3.755358934402466, "learning_rate": 3.924302788844622e-06, "loss": 0.5603, "step": 986 }, { "epoch": 0.011800714977462667, "grad_norm": 2.689211845397949, "learning_rate": 3.928286852589642e-06, "loss": 0.6095, "step": 987 }, { "epoch": 0.011812671122323318, "grad_norm": 5.581035137176514, "learning_rate": 3.9322709163346614e-06, "loss": 0.5709, "step": 988 }, { "epoch": 0.011824627267183968, "grad_norm": 3.1372275352478027, "learning_rate": 3.936254980079682e-06, "loss": 0.5747, "step": 989 }, { "epoch": 0.01183658341204462, "grad_norm": 2.4512643814086914, "learning_rate": 3.940239043824702e-06, "loss": 0.6306, "step": 990 }, { "epoch": 0.011848539556905272, "grad_norm": 3.8730721473693848, "learning_rate": 3.9442231075697215e-06, "loss": 0.6774, "step": 991 }, { "epoch": 0.011860495701765922, "grad_norm": 2.6459505558013916, "learning_rate": 3.948207171314741e-06, "loss": 0.5836, "step": 992 }, { "epoch": 0.011872451846626574, "grad_norm": 9.233612060546875, "learning_rate": 3.952191235059761e-06, "loss": 0.6992, "step": 993 }, { "epoch": 0.011884407991487225, "grad_norm": 1.8401297330856323, "learning_rate": 3.9561752988047816e-06, "loss": 0.5542, "step": 994 }, { "epoch": 0.011896364136347875, "grad_norm": 4.044528484344482, "learning_rate": 3.960159362549801e-06, "loss": 0.5944, "step": 995 }, { "epoch": 0.011908320281208527, "grad_norm": 3.205629825592041, "learning_rate": 3.9641434262948205e-06, "loss": 0.6052, "step": 996 }, { "epoch": 0.011920276426069179, "grad_norm": 13.686964988708496, "learning_rate": 3.968127490039841e-06, "loss": 0.6533, "step": 997 }, { "epoch": 0.011932232570929829, "grad_norm": 3.060673952102661, "learning_rate": 3.972111553784861e-06, "loss": 0.7363, "step": 998 }, { "epoch": 0.01194418871579048, "grad_norm": 2.3775651454925537, "learning_rate": 3.9760956175298805e-06, "loss": 0.735, "step": 999 }, { "epoch": 0.011956144860651132, "grad_norm": 2.568477153778076, "learning_rate": 3.980079681274901e-06, "loss": 0.5563, "step": 1000 }, { "epoch": 0.011968101005511782, "grad_norm": 2.946596145629883, "learning_rate": 3.98406374501992e-06, "loss": 0.618, "step": 1001 }, { "epoch": 0.011980057150372434, "grad_norm": 2.561676263809204, "learning_rate": 3.988047808764941e-06, "loss": 0.682, "step": 1002 }, { "epoch": 0.011992013295233086, "grad_norm": 2.966110944747925, "learning_rate": 3.992031872509961e-06, "loss": 0.6441, "step": 1003 }, { "epoch": 0.012003969440093736, "grad_norm": 10.059952735900879, "learning_rate": 3.99601593625498e-06, "loss": 0.6074, "step": 1004 }, { "epoch": 0.012015925584954387, "grad_norm": 3.723149538040161, "learning_rate": 4.000000000000001e-06, "loss": 0.6262, "step": 1005 }, { "epoch": 0.012027881729815039, "grad_norm": 2.1189255714416504, "learning_rate": 4.00398406374502e-06, "loss": 0.6116, "step": 1006 }, { "epoch": 0.012039837874675689, "grad_norm": 3.144559860229492, "learning_rate": 4.0079681274900404e-06, "loss": 0.6685, "step": 1007 }, { "epoch": 0.01205179401953634, "grad_norm": 1.8019880056381226, "learning_rate": 4.01195219123506e-06, "loss": 0.6488, "step": 1008 }, { "epoch": 0.012063750164396992, "grad_norm": 2.5154006481170654, "learning_rate": 4.01593625498008e-06, "loss": 0.6072, "step": 1009 }, { "epoch": 0.012075706309257642, "grad_norm": 10.925771713256836, "learning_rate": 4.0199203187251005e-06, "loss": 0.6995, "step": 1010 }, { "epoch": 0.012087662454118294, "grad_norm": 1.9585793018341064, "learning_rate": 4.02390438247012e-06, "loss": 0.6265, "step": 1011 }, { "epoch": 0.012099618598978946, "grad_norm": 7.514761924743652, "learning_rate": 4.027888446215139e-06, "loss": 0.6944, "step": 1012 }, { "epoch": 0.012111574743839596, "grad_norm": 9.30722713470459, "learning_rate": 4.03187250996016e-06, "loss": 0.6561, "step": 1013 }, { "epoch": 0.012123530888700247, "grad_norm": 55.4019775390625, "learning_rate": 4.03585657370518e-06, "loss": 0.6349, "step": 1014 }, { "epoch": 0.0121354870335609, "grad_norm": 5.0749430656433105, "learning_rate": 4.0398406374501995e-06, "loss": 0.5413, "step": 1015 }, { "epoch": 0.01214744317842155, "grad_norm": 4.924924373626709, "learning_rate": 4.043824701195219e-06, "loss": 0.6369, "step": 1016 }, { "epoch": 0.012159399323282201, "grad_norm": 3.826812744140625, "learning_rate": 4.047808764940239e-06, "loss": 0.5996, "step": 1017 }, { "epoch": 0.012171355468142853, "grad_norm": 2.2324962615966797, "learning_rate": 4.0517928286852595e-06, "loss": 0.5748, "step": 1018 }, { "epoch": 0.012183311613003503, "grad_norm": 2.202977180480957, "learning_rate": 4.055776892430279e-06, "loss": 0.5569, "step": 1019 }, { "epoch": 0.012195267757864154, "grad_norm": 2.4484729766845703, "learning_rate": 4.059760956175299e-06, "loss": 0.5779, "step": 1020 }, { "epoch": 0.012207223902724806, "grad_norm": 2.0908052921295166, "learning_rate": 4.06374501992032e-06, "loss": 0.646, "step": 1021 }, { "epoch": 0.012219180047585456, "grad_norm": 1.8818632364273071, "learning_rate": 4.067729083665339e-06, "loss": 0.5974, "step": 1022 }, { "epoch": 0.012231136192446108, "grad_norm": 4.649717330932617, "learning_rate": 4.0717131474103585e-06, "loss": 0.6611, "step": 1023 }, { "epoch": 0.01224309233730676, "grad_norm": 3.071591854095459, "learning_rate": 4.075697211155379e-06, "loss": 0.7174, "step": 1024 }, { "epoch": 0.01225504848216741, "grad_norm": 3.1292295455932617, "learning_rate": 4.079681274900399e-06, "loss": 0.5457, "step": 1025 }, { "epoch": 0.012267004627028061, "grad_norm": 2.2812094688415527, "learning_rate": 4.083665338645419e-06, "loss": 0.6538, "step": 1026 }, { "epoch": 0.012278960771888713, "grad_norm": 1.9826544523239136, "learning_rate": 4.087649402390438e-06, "loss": 0.6534, "step": 1027 }, { "epoch": 0.012290916916749363, "grad_norm": 5.1631035804748535, "learning_rate": 4.091633466135458e-06, "loss": 0.6137, "step": 1028 }, { "epoch": 0.012302873061610015, "grad_norm": 4.097630500793457, "learning_rate": 4.095617529880479e-06, "loss": 0.612, "step": 1029 }, { "epoch": 0.012314829206470666, "grad_norm": 2.6273486614227295, "learning_rate": 4.099601593625498e-06, "loss": 0.5426, "step": 1030 }, { "epoch": 0.012326785351331316, "grad_norm": 9.122112274169922, "learning_rate": 4.103585657370518e-06, "loss": 0.6405, "step": 1031 }, { "epoch": 0.012338741496191968, "grad_norm": 3.294867992401123, "learning_rate": 4.107569721115538e-06, "loss": 0.5536, "step": 1032 }, { "epoch": 0.01235069764105262, "grad_norm": 13.260171890258789, "learning_rate": 4.111553784860558e-06, "loss": 0.5833, "step": 1033 }, { "epoch": 0.01236265378591327, "grad_norm": 2.9418277740478516, "learning_rate": 4.115537848605578e-06, "loss": 0.6192, "step": 1034 }, { "epoch": 0.012374609930773921, "grad_norm": 3.6689066886901855, "learning_rate": 4.119521912350598e-06, "loss": 0.6126, "step": 1035 }, { "epoch": 0.012386566075634573, "grad_norm": 3.2073216438293457, "learning_rate": 4.123505976095618e-06, "loss": 0.6223, "step": 1036 }, { "epoch": 0.012398522220495223, "grad_norm": 2.421572208404541, "learning_rate": 4.127490039840638e-06, "loss": 0.5966, "step": 1037 }, { "epoch": 0.012410478365355875, "grad_norm": 2.940391778945923, "learning_rate": 4.131474103585658e-06, "loss": 0.7638, "step": 1038 }, { "epoch": 0.012422434510216527, "grad_norm": 8.316088676452637, "learning_rate": 4.1354581673306774e-06, "loss": 0.5695, "step": 1039 }, { "epoch": 0.012434390655077177, "grad_norm": 3.7997987270355225, "learning_rate": 4.139442231075698e-06, "loss": 0.5889, "step": 1040 }, { "epoch": 0.012446346799937828, "grad_norm": 4.063072204589844, "learning_rate": 4.143426294820718e-06, "loss": 0.5389, "step": 1041 }, { "epoch": 0.01245830294479848, "grad_norm": 3.485712766647339, "learning_rate": 4.1474103585657375e-06, "loss": 0.6227, "step": 1042 }, { "epoch": 0.01247025908965913, "grad_norm": 2.594550371170044, "learning_rate": 4.151394422310757e-06, "loss": 0.7137, "step": 1043 }, { "epoch": 0.012482215234519782, "grad_norm": 1.8866002559661865, "learning_rate": 4.155378486055777e-06, "loss": 0.5899, "step": 1044 }, { "epoch": 0.012494171379380433, "grad_norm": 3.114626169204712, "learning_rate": 4.159362549800798e-06, "loss": 0.5455, "step": 1045 }, { "epoch": 0.012506127524241083, "grad_norm": 3.5817055702209473, "learning_rate": 4.163346613545817e-06, "loss": 0.6491, "step": 1046 }, { "epoch": 0.012518083669101735, "grad_norm": 2.6692543029785156, "learning_rate": 4.1673306772908365e-06, "loss": 0.6063, "step": 1047 }, { "epoch": 0.012530039813962387, "grad_norm": 3.9950389862060547, "learning_rate": 4.171314741035857e-06, "loss": 0.647, "step": 1048 }, { "epoch": 0.012541995958823037, "grad_norm": 2.464958429336548, "learning_rate": 4.175298804780877e-06, "loss": 0.6716, "step": 1049 }, { "epoch": 0.012553952103683689, "grad_norm": 6.463669776916504, "learning_rate": 4.1792828685258966e-06, "loss": 0.6322, "step": 1050 }, { "epoch": 0.012565908248544339, "grad_norm": 3.1137030124664307, "learning_rate": 4.183266932270917e-06, "loss": 0.6236, "step": 1051 }, { "epoch": 0.01257786439340499, "grad_norm": 2.3934543132781982, "learning_rate": 4.187250996015936e-06, "loss": 0.5632, "step": 1052 }, { "epoch": 0.012589820538265642, "grad_norm": 6.70931339263916, "learning_rate": 4.191235059760957e-06, "loss": 0.5898, "step": 1053 }, { "epoch": 0.012601776683126292, "grad_norm": 3.4116411209106445, "learning_rate": 4.195219123505976e-06, "loss": 0.533, "step": 1054 }, { "epoch": 0.012613732827986944, "grad_norm": 3.072580575942993, "learning_rate": 4.199203187250996e-06, "loss": 0.6163, "step": 1055 }, { "epoch": 0.012625688972847595, "grad_norm": 3.1769673824310303, "learning_rate": 4.203187250996017e-06, "loss": 0.6674, "step": 1056 }, { "epoch": 0.012637645117708245, "grad_norm": 2.3583147525787354, "learning_rate": 4.207171314741036e-06, "loss": 0.6605, "step": 1057 }, { "epoch": 0.012649601262568897, "grad_norm": 2.650838613510132, "learning_rate": 4.211155378486056e-06, "loss": 0.7108, "step": 1058 }, { "epoch": 0.012661557407429549, "grad_norm": 2.659783124923706, "learning_rate": 4.215139442231076e-06, "loss": 0.5219, "step": 1059 }, { "epoch": 0.012673513552290199, "grad_norm": 3.0953075885772705, "learning_rate": 4.219123505976096e-06, "loss": 0.5464, "step": 1060 }, { "epoch": 0.01268546969715085, "grad_norm": 4.7110371589660645, "learning_rate": 4.223107569721116e-06, "loss": 0.66, "step": 1061 }, { "epoch": 0.012697425842011502, "grad_norm": 3.4995031356811523, "learning_rate": 4.227091633466136e-06, "loss": 0.7088, "step": 1062 }, { "epoch": 0.012709381986872152, "grad_norm": 3.8129994869232178, "learning_rate": 4.2310756972111554e-06, "loss": 0.602, "step": 1063 }, { "epoch": 0.012721338131732804, "grad_norm": 5.277346611022949, "learning_rate": 4.235059760956176e-06, "loss": 0.6042, "step": 1064 }, { "epoch": 0.012733294276593456, "grad_norm": 5.032909393310547, "learning_rate": 4.239043824701195e-06, "loss": 0.5611, "step": 1065 }, { "epoch": 0.012745250421454106, "grad_norm": 1.8674393892288208, "learning_rate": 4.2430278884462155e-06, "loss": 0.5958, "step": 1066 }, { "epoch": 0.012757206566314757, "grad_norm": 3.049377918243408, "learning_rate": 4.247011952191236e-06, "loss": 0.8524, "step": 1067 }, { "epoch": 0.012769162711175409, "grad_norm": 4.373809814453125, "learning_rate": 4.250996015936255e-06, "loss": 0.6057, "step": 1068 }, { "epoch": 0.012781118856036059, "grad_norm": 2.795898199081421, "learning_rate": 4.254980079681275e-06, "loss": 0.5586, "step": 1069 }, { "epoch": 0.01279307500089671, "grad_norm": 1.8787779808044434, "learning_rate": 4.258964143426295e-06, "loss": 0.6504, "step": 1070 }, { "epoch": 0.012805031145757362, "grad_norm": 2.668640375137329, "learning_rate": 4.262948207171315e-06, "loss": 0.6033, "step": 1071 }, { "epoch": 0.012816987290618012, "grad_norm": 2.575009822845459, "learning_rate": 4.266932270916336e-06, "loss": 0.6511, "step": 1072 }, { "epoch": 0.012828943435478664, "grad_norm": 2.7227532863616943, "learning_rate": 4.270916334661355e-06, "loss": 0.5856, "step": 1073 }, { "epoch": 0.012840899580339316, "grad_norm": 2.942793130874634, "learning_rate": 4.2749003984063745e-06, "loss": 0.536, "step": 1074 }, { "epoch": 0.012852855725199966, "grad_norm": 3.2521605491638184, "learning_rate": 4.278884462151395e-06, "loss": 0.6245, "step": 1075 }, { "epoch": 0.012864811870060618, "grad_norm": 3.237274408340454, "learning_rate": 4.282868525896415e-06, "loss": 0.5812, "step": 1076 }, { "epoch": 0.01287676801492127, "grad_norm": 5.182527542114258, "learning_rate": 4.286852589641435e-06, "loss": 0.5889, "step": 1077 }, { "epoch": 0.01288872415978192, "grad_norm": 3.3248558044433594, "learning_rate": 4.290836653386454e-06, "loss": 0.6368, "step": 1078 }, { "epoch": 0.012900680304642571, "grad_norm": 5.4796671867370605, "learning_rate": 4.294820717131474e-06, "loss": 0.5814, "step": 1079 }, { "epoch": 0.012912636449503223, "grad_norm": 2.48132061958313, "learning_rate": 4.298804780876495e-06, "loss": 0.5994, "step": 1080 }, { "epoch": 0.012924592594363873, "grad_norm": 2.931792974472046, "learning_rate": 4.302788844621514e-06, "loss": 0.6246, "step": 1081 }, { "epoch": 0.012936548739224524, "grad_norm": 2.8545761108398438, "learning_rate": 4.3067729083665344e-06, "loss": 0.5861, "step": 1082 }, { "epoch": 0.012948504884085176, "grad_norm": 3.901155710220337, "learning_rate": 4.310756972111554e-06, "loss": 0.6807, "step": 1083 }, { "epoch": 0.012960461028945826, "grad_norm": 3.9243216514587402, "learning_rate": 4.314741035856574e-06, "loss": 0.7048, "step": 1084 }, { "epoch": 0.012972417173806478, "grad_norm": 11.361303329467773, "learning_rate": 4.318725099601594e-06, "loss": 0.6287, "step": 1085 }, { "epoch": 0.01298437331866713, "grad_norm": 5.5260396003723145, "learning_rate": 4.322709163346614e-06, "loss": 0.5981, "step": 1086 }, { "epoch": 0.01299632946352778, "grad_norm": 9.38637638092041, "learning_rate": 4.326693227091634e-06, "loss": 0.6248, "step": 1087 }, { "epoch": 0.013008285608388431, "grad_norm": 3.6934187412261963, "learning_rate": 4.330677290836654e-06, "loss": 0.5837, "step": 1088 }, { "epoch": 0.013020241753249083, "grad_norm": 4.839319229125977, "learning_rate": 4.334661354581673e-06, "loss": 0.6156, "step": 1089 }, { "epoch": 0.013032197898109733, "grad_norm": 2.3250222206115723, "learning_rate": 4.3386454183266935e-06, "loss": 0.5706, "step": 1090 }, { "epoch": 0.013044154042970385, "grad_norm": 6.6993560791015625, "learning_rate": 4.342629482071714e-06, "loss": 0.6284, "step": 1091 }, { "epoch": 0.013056110187831036, "grad_norm": 4.015944004058838, "learning_rate": 4.346613545816733e-06, "loss": 0.5989, "step": 1092 }, { "epoch": 0.013068066332691686, "grad_norm": 4.128204345703125, "learning_rate": 4.3505976095617535e-06, "loss": 0.6091, "step": 1093 }, { "epoch": 0.013080022477552338, "grad_norm": 2.4962284564971924, "learning_rate": 4.354581673306773e-06, "loss": 0.6211, "step": 1094 }, { "epoch": 0.01309197862241299, "grad_norm": 3.9403207302093506, "learning_rate": 4.358565737051793e-06, "loss": 0.5609, "step": 1095 }, { "epoch": 0.01310393476727364, "grad_norm": 2.899765968322754, "learning_rate": 4.362549800796813e-06, "loss": 0.6086, "step": 1096 }, { "epoch": 0.013115890912134292, "grad_norm": 4.711203575134277, "learning_rate": 4.366533864541833e-06, "loss": 0.5871, "step": 1097 }, { "epoch": 0.013127847056994943, "grad_norm": 2.1791634559631348, "learning_rate": 4.370517928286853e-06, "loss": 0.5644, "step": 1098 }, { "epoch": 0.013139803201855593, "grad_norm": 4.95035457611084, "learning_rate": 4.374501992031873e-06, "loss": 0.5871, "step": 1099 }, { "epoch": 0.013151759346716245, "grad_norm": 4.415274143218994, "learning_rate": 4.378486055776892e-06, "loss": 0.643, "step": 1100 }, { "epoch": 0.013163715491576897, "grad_norm": 2.3872103691101074, "learning_rate": 4.382470119521913e-06, "loss": 0.6529, "step": 1101 }, { "epoch": 0.013175671636437547, "grad_norm": 6.267696380615234, "learning_rate": 4.386454183266933e-06, "loss": 0.6115, "step": 1102 }, { "epoch": 0.013187627781298198, "grad_norm": 2.164212703704834, "learning_rate": 4.390438247011952e-06, "loss": 0.5983, "step": 1103 }, { "epoch": 0.01319958392615885, "grad_norm": 4.328146934509277, "learning_rate": 4.394422310756973e-06, "loss": 0.6365, "step": 1104 }, { "epoch": 0.0132115400710195, "grad_norm": 6.055401802062988, "learning_rate": 4.398406374501992e-06, "loss": 0.6167, "step": 1105 }, { "epoch": 0.013223496215880152, "grad_norm": 3.04278302192688, "learning_rate": 4.402390438247012e-06, "loss": 0.5341, "step": 1106 }, { "epoch": 0.013235452360740804, "grad_norm": 2.1351478099823, "learning_rate": 4.406374501992033e-06, "loss": 0.5782, "step": 1107 }, { "epoch": 0.013247408505601454, "grad_norm": 8.378867149353027, "learning_rate": 4.410358565737052e-06, "loss": 0.5535, "step": 1108 }, { "epoch": 0.013259364650462105, "grad_norm": 2.96771502494812, "learning_rate": 4.414342629482072e-06, "loss": 0.6208, "step": 1109 }, { "epoch": 0.013271320795322757, "grad_norm": 2.0840351581573486, "learning_rate": 4.418326693227092e-06, "loss": 0.5753, "step": 1110 }, { "epoch": 0.013283276940183407, "grad_norm": 2.67232608795166, "learning_rate": 4.422310756972112e-06, "loss": 0.6261, "step": 1111 }, { "epoch": 0.013295233085044059, "grad_norm": 1.8877049684524536, "learning_rate": 4.426294820717132e-06, "loss": 0.5527, "step": 1112 }, { "epoch": 0.01330718922990471, "grad_norm": 2.249136447906494, "learning_rate": 4.430278884462152e-06, "loss": 0.669, "step": 1113 }, { "epoch": 0.01331914537476536, "grad_norm": 3.411992311477661, "learning_rate": 4.4342629482071715e-06, "loss": 0.6546, "step": 1114 }, { "epoch": 0.013331101519626012, "grad_norm": 2.9736647605895996, "learning_rate": 4.438247011952192e-06, "loss": 0.6008, "step": 1115 }, { "epoch": 0.013343057664486662, "grad_norm": 3.84853458404541, "learning_rate": 4.442231075697211e-06, "loss": 0.6679, "step": 1116 }, { "epoch": 0.013355013809347314, "grad_norm": 3.1385200023651123, "learning_rate": 4.4462151394422315e-06, "loss": 0.6099, "step": 1117 }, { "epoch": 0.013366969954207965, "grad_norm": 11.823033332824707, "learning_rate": 4.450199203187252e-06, "loss": 0.6058, "step": 1118 }, { "epoch": 0.013378926099068615, "grad_norm": 2.6303458213806152, "learning_rate": 4.454183266932271e-06, "loss": 0.6336, "step": 1119 }, { "epoch": 0.013390882243929267, "grad_norm": 2.3215203285217285, "learning_rate": 4.458167330677291e-06, "loss": 0.7752, "step": 1120 }, { "epoch": 0.013402838388789919, "grad_norm": 2.9551568031311035, "learning_rate": 4.462151394422311e-06, "loss": 0.5879, "step": 1121 }, { "epoch": 0.013414794533650569, "grad_norm": 3.2952463626861572, "learning_rate": 4.466135458167331e-06, "loss": 0.6598, "step": 1122 }, { "epoch": 0.01342675067851122, "grad_norm": 2.959428548812866, "learning_rate": 4.470119521912351e-06, "loss": 0.6279, "step": 1123 }, { "epoch": 0.013438706823371872, "grad_norm": 1.7788184881210327, "learning_rate": 4.474103585657371e-06, "loss": 0.6166, "step": 1124 }, { "epoch": 0.013450662968232522, "grad_norm": 2.5638198852539062, "learning_rate": 4.4780876494023906e-06, "loss": 0.5339, "step": 1125 }, { "epoch": 0.013462619113093174, "grad_norm": 2.254628896713257, "learning_rate": 4.482071713147411e-06, "loss": 0.5408, "step": 1126 }, { "epoch": 0.013474575257953826, "grad_norm": 3.793682098388672, "learning_rate": 4.48605577689243e-06, "loss": 0.6641, "step": 1127 }, { "epoch": 0.013486531402814476, "grad_norm": 4.533689022064209, "learning_rate": 4.490039840637451e-06, "loss": 0.5776, "step": 1128 }, { "epoch": 0.013498487547675127, "grad_norm": 3.948157548904419, "learning_rate": 4.494023904382471e-06, "loss": 0.6511, "step": 1129 }, { "epoch": 0.01351044369253578, "grad_norm": 14.420726776123047, "learning_rate": 4.49800796812749e-06, "loss": 0.5986, "step": 1130 }, { "epoch": 0.01352239983739643, "grad_norm": 1.8984298706054688, "learning_rate": 4.50199203187251e-06, "loss": 0.5764, "step": 1131 }, { "epoch": 0.013534355982257081, "grad_norm": 4.506573677062988, "learning_rate": 4.50597609561753e-06, "loss": 0.5755, "step": 1132 }, { "epoch": 0.013546312127117733, "grad_norm": 6.3094916343688965, "learning_rate": 4.5099601593625505e-06, "loss": 0.5733, "step": 1133 }, { "epoch": 0.013558268271978383, "grad_norm": 1.6947474479675293, "learning_rate": 4.51394422310757e-06, "loss": 0.5691, "step": 1134 }, { "epoch": 0.013570224416839034, "grad_norm": 5.873553276062012, "learning_rate": 4.517928286852589e-06, "loss": 0.6197, "step": 1135 }, { "epoch": 0.013582180561699686, "grad_norm": 6.817983627319336, "learning_rate": 4.52191235059761e-06, "loss": 0.6057, "step": 1136 }, { "epoch": 0.013594136706560336, "grad_norm": 1.8363597393035889, "learning_rate": 4.52589641434263e-06, "loss": 0.5708, "step": 1137 }, { "epoch": 0.013606092851420988, "grad_norm": 9.307336807250977, "learning_rate": 4.52988047808765e-06, "loss": 0.5803, "step": 1138 }, { "epoch": 0.01361804899628164, "grad_norm": 4.890499591827393, "learning_rate": 4.53386454183267e-06, "loss": 0.6823, "step": 1139 }, { "epoch": 0.01363000514114229, "grad_norm": 5.982378959655762, "learning_rate": 4.537848605577689e-06, "loss": 0.7252, "step": 1140 }, { "epoch": 0.013641961286002941, "grad_norm": 1.988491177558899, "learning_rate": 4.5418326693227095e-06, "loss": 0.6242, "step": 1141 }, { "epoch": 0.013653917430863593, "grad_norm": 4.198664665222168, "learning_rate": 4.54581673306773e-06, "loss": 0.7179, "step": 1142 }, { "epoch": 0.013665873575724243, "grad_norm": 2.623972177505493, "learning_rate": 4.549800796812749e-06, "loss": 0.5775, "step": 1143 }, { "epoch": 0.013677829720584895, "grad_norm": 4.358140468597412, "learning_rate": 4.5537848605577696e-06, "loss": 0.6542, "step": 1144 }, { "epoch": 0.013689785865445546, "grad_norm": 2.3158469200134277, "learning_rate": 4.557768924302789e-06, "loss": 0.6173, "step": 1145 }, { "epoch": 0.013701742010306196, "grad_norm": 2.253124237060547, "learning_rate": 4.561752988047809e-06, "loss": 0.5642, "step": 1146 }, { "epoch": 0.013713698155166848, "grad_norm": 6.202012538909912, "learning_rate": 4.565737051792829e-06, "loss": 0.6668, "step": 1147 }, { "epoch": 0.0137256543000275, "grad_norm": 3.1758124828338623, "learning_rate": 4.569721115537849e-06, "loss": 0.6692, "step": 1148 }, { "epoch": 0.01373761044488815, "grad_norm": 2.526048421859741, "learning_rate": 4.573705179282869e-06, "loss": 0.55, "step": 1149 }, { "epoch": 0.013749566589748801, "grad_norm": 5.753032207489014, "learning_rate": 4.577689243027889e-06, "loss": 0.7243, "step": 1150 }, { "epoch": 0.013761522734609453, "grad_norm": 2.7035441398620605, "learning_rate": 4.581673306772908e-06, "loss": 0.5931, "step": 1151 }, { "epoch": 0.013773478879470103, "grad_norm": 2.353694438934326, "learning_rate": 4.585657370517929e-06, "loss": 0.5506, "step": 1152 }, { "epoch": 0.013785435024330755, "grad_norm": 18.091203689575195, "learning_rate": 4.589641434262949e-06, "loss": 0.6891, "step": 1153 }, { "epoch": 0.013797391169191407, "grad_norm": 4.310135364532471, "learning_rate": 4.593625498007968e-06, "loss": 0.5818, "step": 1154 }, { "epoch": 0.013809347314052057, "grad_norm": 3.1969032287597656, "learning_rate": 4.597609561752989e-06, "loss": 0.645, "step": 1155 }, { "epoch": 0.013821303458912708, "grad_norm": 3.621730327606201, "learning_rate": 4.601593625498008e-06, "loss": 0.6251, "step": 1156 }, { "epoch": 0.01383325960377336, "grad_norm": 2.061882972717285, "learning_rate": 4.6055776892430284e-06, "loss": 0.5626, "step": 1157 }, { "epoch": 0.01384521574863401, "grad_norm": 21.569915771484375, "learning_rate": 4.609561752988048e-06, "loss": 0.6314, "step": 1158 }, { "epoch": 0.013857171893494662, "grad_norm": 2.4054043292999268, "learning_rate": 4.613545816733068e-06, "loss": 0.6927, "step": 1159 }, { "epoch": 0.013869128038355313, "grad_norm": 2.6213462352752686, "learning_rate": 4.6175298804780885e-06, "loss": 0.6746, "step": 1160 }, { "epoch": 0.013881084183215963, "grad_norm": 2.982863664627075, "learning_rate": 4.621513944223108e-06, "loss": 0.6371, "step": 1161 }, { "epoch": 0.013893040328076615, "grad_norm": 4.794187545776367, "learning_rate": 4.625498007968127e-06, "loss": 0.6129, "step": 1162 }, { "epoch": 0.013904996472937267, "grad_norm": 3.6465229988098145, "learning_rate": 4.629482071713148e-06, "loss": 0.5497, "step": 1163 }, { "epoch": 0.013916952617797917, "grad_norm": 2.7238576412200928, "learning_rate": 4.633466135458168e-06, "loss": 0.6289, "step": 1164 }, { "epoch": 0.013928908762658569, "grad_norm": 3.7758429050445557, "learning_rate": 4.6374501992031875e-06, "loss": 0.6223, "step": 1165 }, { "epoch": 0.01394086490751922, "grad_norm": 3.0295441150665283, "learning_rate": 4.641434262948207e-06, "loss": 0.6065, "step": 1166 }, { "epoch": 0.01395282105237987, "grad_norm": 2.8090341091156006, "learning_rate": 4.645418326693227e-06, "loss": 0.6276, "step": 1167 }, { "epoch": 0.013964777197240522, "grad_norm": 4.357777118682861, "learning_rate": 4.6494023904382475e-06, "loss": 0.5598, "step": 1168 }, { "epoch": 0.013976733342101174, "grad_norm": 4.840453624725342, "learning_rate": 4.653386454183267e-06, "loss": 0.5989, "step": 1169 }, { "epoch": 0.013988689486961824, "grad_norm": 1.9254359006881714, "learning_rate": 4.657370517928287e-06, "loss": 0.521, "step": 1170 }, { "epoch": 0.014000645631822475, "grad_norm": 1.9523563385009766, "learning_rate": 4.661354581673307e-06, "loss": 0.6017, "step": 1171 }, { "epoch": 0.014012601776683127, "grad_norm": 3.2696526050567627, "learning_rate": 4.665338645418327e-06, "loss": 0.6712, "step": 1172 }, { "epoch": 0.014024557921543777, "grad_norm": 1.8756120204925537, "learning_rate": 4.669322709163347e-06, "loss": 0.6549, "step": 1173 }, { "epoch": 0.014036514066404429, "grad_norm": 2.0684666633605957, "learning_rate": 4.673306772908367e-06, "loss": 0.6485, "step": 1174 }, { "epoch": 0.01404847021126508, "grad_norm": 9.750511169433594, "learning_rate": 4.677290836653387e-06, "loss": 0.4967, "step": 1175 }, { "epoch": 0.01406042635612573, "grad_norm": 14.123799324035645, "learning_rate": 4.681274900398407e-06, "loss": 0.5529, "step": 1176 }, { "epoch": 0.014072382500986382, "grad_norm": 2.4078729152679443, "learning_rate": 4.685258964143427e-06, "loss": 0.5535, "step": 1177 }, { "epoch": 0.014084338645847034, "grad_norm": 4.426707744598389, "learning_rate": 4.689243027888446e-06, "loss": 0.5965, "step": 1178 }, { "epoch": 0.014096294790707684, "grad_norm": 3.081785202026367, "learning_rate": 4.693227091633467e-06, "loss": 0.5988, "step": 1179 }, { "epoch": 0.014108250935568336, "grad_norm": 2.9033806324005127, "learning_rate": 4.697211155378487e-06, "loss": 0.5767, "step": 1180 }, { "epoch": 0.014120207080428987, "grad_norm": 4.61467981338501, "learning_rate": 4.701195219123506e-06, "loss": 0.6408, "step": 1181 }, { "epoch": 0.014132163225289637, "grad_norm": 5.213294982910156, "learning_rate": 4.705179282868526e-06, "loss": 0.5677, "step": 1182 }, { "epoch": 0.014144119370150289, "grad_norm": 2.2029571533203125, "learning_rate": 4.709163346613546e-06, "loss": 0.6716, "step": 1183 }, { "epoch": 0.014156075515010939, "grad_norm": 1.6170841455459595, "learning_rate": 4.7131474103585665e-06, "loss": 0.5738, "step": 1184 }, { "epoch": 0.01416803165987159, "grad_norm": 2.823772668838501, "learning_rate": 4.717131474103586e-06, "loss": 0.6157, "step": 1185 }, { "epoch": 0.014179987804732242, "grad_norm": 2.675567388534546, "learning_rate": 4.721115537848606e-06, "loss": 0.621, "step": 1186 }, { "epoch": 0.014191943949592892, "grad_norm": 2.1109797954559326, "learning_rate": 4.725099601593626e-06, "loss": 0.6329, "step": 1187 }, { "epoch": 0.014203900094453544, "grad_norm": 2.0357015132904053, "learning_rate": 4.729083665338646e-06, "loss": 0.6182, "step": 1188 }, { "epoch": 0.014215856239314196, "grad_norm": 8.018484115600586, "learning_rate": 4.7330677290836655e-06, "loss": 0.6424, "step": 1189 }, { "epoch": 0.014227812384174846, "grad_norm": 4.169000148773193, "learning_rate": 4.737051792828686e-06, "loss": 0.5491, "step": 1190 }, { "epoch": 0.014239768529035498, "grad_norm": 10.127303123474121, "learning_rate": 4.741035856573706e-06, "loss": 0.6383, "step": 1191 }, { "epoch": 0.01425172467389615, "grad_norm": 2.1262245178222656, "learning_rate": 4.7450199203187255e-06, "loss": 0.6323, "step": 1192 }, { "epoch": 0.0142636808187568, "grad_norm": 4.644949436187744, "learning_rate": 4.749003984063745e-06, "loss": 0.6445, "step": 1193 }, { "epoch": 0.014275636963617451, "grad_norm": 9.216053009033203, "learning_rate": 4.752988047808765e-06, "loss": 0.6065, "step": 1194 }, { "epoch": 0.014287593108478103, "grad_norm": 2.8398923873901367, "learning_rate": 4.756972111553786e-06, "loss": 0.5527, "step": 1195 }, { "epoch": 0.014299549253338753, "grad_norm": 6.084990501403809, "learning_rate": 4.760956175298805e-06, "loss": 0.6484, "step": 1196 }, { "epoch": 0.014311505398199404, "grad_norm": 2.64567494392395, "learning_rate": 4.7649402390438245e-06, "loss": 0.7087, "step": 1197 }, { "epoch": 0.014323461543060056, "grad_norm": 4.322199821472168, "learning_rate": 4.768924302788845e-06, "loss": 0.6614, "step": 1198 }, { "epoch": 0.014335417687920706, "grad_norm": 2.940920114517212, "learning_rate": 4.772908366533865e-06, "loss": 0.5218, "step": 1199 }, { "epoch": 0.014347373832781358, "grad_norm": 4.545740127563477, "learning_rate": 4.7768924302788846e-06, "loss": 0.6484, "step": 1200 }, { "epoch": 0.01435932997764201, "grad_norm": 27.10830307006836, "learning_rate": 4.780876494023905e-06, "loss": 0.6821, "step": 1201 }, { "epoch": 0.01437128612250266, "grad_norm": 2.85233736038208, "learning_rate": 4.784860557768924e-06, "loss": 0.686, "step": 1202 }, { "epoch": 0.014383242267363311, "grad_norm": 7.918511867523193, "learning_rate": 4.788844621513945e-06, "loss": 0.5756, "step": 1203 }, { "epoch": 0.014395198412223963, "grad_norm": 4.441627025604248, "learning_rate": 4.792828685258964e-06, "loss": 0.6643, "step": 1204 }, { "epoch": 0.014407154557084613, "grad_norm": 4.900398254394531, "learning_rate": 4.796812749003984e-06, "loss": 0.5507, "step": 1205 }, { "epoch": 0.014419110701945265, "grad_norm": 2.1385254859924316, "learning_rate": 4.800796812749005e-06, "loss": 0.6004, "step": 1206 }, { "epoch": 0.014431066846805916, "grad_norm": 10.633808135986328, "learning_rate": 4.804780876494024e-06, "loss": 0.601, "step": 1207 }, { "epoch": 0.014443022991666566, "grad_norm": 2.320772171020508, "learning_rate": 4.8087649402390445e-06, "loss": 0.5812, "step": 1208 }, { "epoch": 0.014454979136527218, "grad_norm": 4.752778053283691, "learning_rate": 4.812749003984064e-06, "loss": 0.6661, "step": 1209 }, { "epoch": 0.01446693528138787, "grad_norm": 3.1809329986572266, "learning_rate": 4.816733067729084e-06, "loss": 0.61, "step": 1210 }, { "epoch": 0.01447889142624852, "grad_norm": 5.469424724578857, "learning_rate": 4.8207171314741045e-06, "loss": 0.6425, "step": 1211 }, { "epoch": 0.014490847571109172, "grad_norm": 1.9128929376602173, "learning_rate": 4.824701195219124e-06, "loss": 0.6103, "step": 1212 }, { "epoch": 0.014502803715969823, "grad_norm": 3.4402546882629395, "learning_rate": 4.8286852589641434e-06, "loss": 0.5377, "step": 1213 }, { "epoch": 0.014514759860830473, "grad_norm": 2.4159674644470215, "learning_rate": 4.832669322709164e-06, "loss": 0.5604, "step": 1214 }, { "epoch": 0.014526716005691125, "grad_norm": 6.583316802978516, "learning_rate": 4.836653386454184e-06, "loss": 0.6936, "step": 1215 }, { "epoch": 0.014538672150551777, "grad_norm": 2.6145880222320557, "learning_rate": 4.8406374501992035e-06, "loss": 0.6664, "step": 1216 }, { "epoch": 0.014550628295412427, "grad_norm": 1.9042543172836304, "learning_rate": 4.844621513944224e-06, "loss": 0.5917, "step": 1217 }, { "epoch": 0.014562584440273078, "grad_norm": 3.344801902770996, "learning_rate": 4.848605577689243e-06, "loss": 0.5534, "step": 1218 }, { "epoch": 0.01457454058513373, "grad_norm": 3.364349603652954, "learning_rate": 4.8525896414342636e-06, "loss": 0.5927, "step": 1219 }, { "epoch": 0.01458649672999438, "grad_norm": 3.504836320877075, "learning_rate": 4.856573705179283e-06, "loss": 0.5795, "step": 1220 }, { "epoch": 0.014598452874855032, "grad_norm": 2.5324339866638184, "learning_rate": 4.860557768924303e-06, "loss": 0.5525, "step": 1221 }, { "epoch": 0.014610409019715683, "grad_norm": 2.2293410301208496, "learning_rate": 4.864541832669324e-06, "loss": 0.536, "step": 1222 }, { "epoch": 0.014622365164576333, "grad_norm": 2.552079677581787, "learning_rate": 4.868525896414343e-06, "loss": 0.6724, "step": 1223 }, { "epoch": 0.014634321309436985, "grad_norm": 1.8242566585540771, "learning_rate": 4.8725099601593625e-06, "loss": 0.6213, "step": 1224 }, { "epoch": 0.014646277454297637, "grad_norm": 2.157539129257202, "learning_rate": 4.876494023904383e-06, "loss": 0.5182, "step": 1225 }, { "epoch": 0.014658233599158287, "grad_norm": 2.7398736476898193, "learning_rate": 4.880478087649403e-06, "loss": 0.7051, "step": 1226 }, { "epoch": 0.014670189744018939, "grad_norm": 2.409463882446289, "learning_rate": 4.884462151394423e-06, "loss": 0.6019, "step": 1227 }, { "epoch": 0.01468214588887959, "grad_norm": 1.9640836715698242, "learning_rate": 4.888446215139442e-06, "loss": 0.655, "step": 1228 }, { "epoch": 0.01469410203374024, "grad_norm": 15.203811645507812, "learning_rate": 4.892430278884462e-06, "loss": 0.5978, "step": 1229 }, { "epoch": 0.014706058178600892, "grad_norm": 2.4618380069732666, "learning_rate": 4.896414342629483e-06, "loss": 0.5816, "step": 1230 }, { "epoch": 0.014718014323461544, "grad_norm": 4.0416717529296875, "learning_rate": 4.900398406374502e-06, "loss": 0.7051, "step": 1231 }, { "epoch": 0.014729970468322194, "grad_norm": 2.2318105697631836, "learning_rate": 4.9043824701195224e-06, "loss": 0.6039, "step": 1232 }, { "epoch": 0.014741926613182845, "grad_norm": 1.8590197563171387, "learning_rate": 4.908366533864542e-06, "loss": 0.552, "step": 1233 }, { "epoch": 0.014753882758043497, "grad_norm": 4.6944732666015625, "learning_rate": 4.912350597609562e-06, "loss": 0.4609, "step": 1234 }, { "epoch": 0.014765838902904147, "grad_norm": 4.058207988739014, "learning_rate": 4.916334661354582e-06, "loss": 0.6419, "step": 1235 }, { "epoch": 0.014777795047764799, "grad_norm": 2.2022688388824463, "learning_rate": 4.920318725099602e-06, "loss": 0.6563, "step": 1236 }, { "epoch": 0.01478975119262545, "grad_norm": 3.0761051177978516, "learning_rate": 4.924302788844622e-06, "loss": 0.6962, "step": 1237 }, { "epoch": 0.0148017073374861, "grad_norm": 2.9549715518951416, "learning_rate": 4.928286852589642e-06, "loss": 0.546, "step": 1238 }, { "epoch": 0.014813663482346752, "grad_norm": 6.659189701080322, "learning_rate": 4.932270916334662e-06, "loss": 0.7131, "step": 1239 }, { "epoch": 0.014825619627207404, "grad_norm": 3.252342462539673, "learning_rate": 4.9362549800796815e-06, "loss": 0.7314, "step": 1240 }, { "epoch": 0.014837575772068054, "grad_norm": 4.688808441162109, "learning_rate": 4.940239043824702e-06, "loss": 0.6269, "step": 1241 }, { "epoch": 0.014849531916928706, "grad_norm": 3.5115110874176025, "learning_rate": 4.944223107569722e-06, "loss": 0.68, "step": 1242 }, { "epoch": 0.014861488061789357, "grad_norm": 2.30515193939209, "learning_rate": 4.9482071713147415e-06, "loss": 0.6121, "step": 1243 }, { "epoch": 0.014873444206650007, "grad_norm": 2.5191493034362793, "learning_rate": 4.952191235059761e-06, "loss": 0.6365, "step": 1244 }, { "epoch": 0.01488540035151066, "grad_norm": 4.104251861572266, "learning_rate": 4.956175298804781e-06, "loss": 0.6271, "step": 1245 }, { "epoch": 0.01489735649637131, "grad_norm": 5.524101734161377, "learning_rate": 4.960159362549802e-06, "loss": 0.714, "step": 1246 }, { "epoch": 0.01490931264123196, "grad_norm": 6.105603218078613, "learning_rate": 4.964143426294821e-06, "loss": 0.5292, "step": 1247 }, { "epoch": 0.014921268786092613, "grad_norm": 2.813429594039917, "learning_rate": 4.968127490039841e-06, "loss": 0.6182, "step": 1248 }, { "epoch": 0.014933224930953264, "grad_norm": 2.4086081981658936, "learning_rate": 4.972111553784861e-06, "loss": 0.6513, "step": 1249 }, { "epoch": 0.014945181075813914, "grad_norm": 2.4628007411956787, "learning_rate": 4.976095617529881e-06, "loss": 0.5877, "step": 1250 }, { "epoch": 0.014957137220674566, "grad_norm": 5.432663440704346, "learning_rate": 4.980079681274901e-06, "loss": 0.6011, "step": 1251 }, { "epoch": 0.014969093365535216, "grad_norm": 5.650063514709473, "learning_rate": 4.984063745019921e-06, "loss": 0.5696, "step": 1252 }, { "epoch": 0.014981049510395868, "grad_norm": 3.3218016624450684, "learning_rate": 4.988047808764941e-06, "loss": 0.6833, "step": 1253 }, { "epoch": 0.01499300565525652, "grad_norm": 2.4692981243133545, "learning_rate": 4.992031872509961e-06, "loss": 0.5997, "step": 1254 }, { "epoch": 0.01500496180011717, "grad_norm": 3.446763038635254, "learning_rate": 4.99601593625498e-06, "loss": 0.658, "step": 1255 }, { "epoch": 0.015016917944977821, "grad_norm": 2.618547201156616, "learning_rate": 5e-06, "loss": 0.685, "step": 1256 }, { "epoch": 0.015028874089838473, "grad_norm": 4.172295570373535, "learning_rate": 5.00398406374502e-06, "loss": 0.5909, "step": 1257 }, { "epoch": 0.015040830234699123, "grad_norm": 3.100297212600708, "learning_rate": 5.00796812749004e-06, "loss": 0.5944, "step": 1258 }, { "epoch": 0.015052786379559775, "grad_norm": 3.016444206237793, "learning_rate": 5.01195219123506e-06, "loss": 0.6572, "step": 1259 }, { "epoch": 0.015064742524420426, "grad_norm": 4.1627912521362305, "learning_rate": 5.015936254980081e-06, "loss": 0.5341, "step": 1260 }, { "epoch": 0.015076698669281076, "grad_norm": 35.824153900146484, "learning_rate": 5.0199203187251e-06, "loss": 0.6564, "step": 1261 }, { "epoch": 0.015088654814141728, "grad_norm": 2.363001823425293, "learning_rate": 5.02390438247012e-06, "loss": 0.7272, "step": 1262 }, { "epoch": 0.01510061095900238, "grad_norm": 1.9827609062194824, "learning_rate": 5.02788844621514e-06, "loss": 0.5293, "step": 1263 }, { "epoch": 0.01511256710386303, "grad_norm": 3.7790775299072266, "learning_rate": 5.0318725099601595e-06, "loss": 0.6815, "step": 1264 }, { "epoch": 0.015124523248723681, "grad_norm": 11.890880584716797, "learning_rate": 5.03585657370518e-06, "loss": 0.6725, "step": 1265 }, { "epoch": 0.015136479393584333, "grad_norm": 1.784464716911316, "learning_rate": 5.039840637450199e-06, "loss": 0.6184, "step": 1266 }, { "epoch": 0.015148435538444983, "grad_norm": 2.225414991378784, "learning_rate": 5.043824701195219e-06, "loss": 0.6474, "step": 1267 }, { "epoch": 0.015160391683305635, "grad_norm": 2.1132938861846924, "learning_rate": 5.04780876494024e-06, "loss": 0.5742, "step": 1268 }, { "epoch": 0.015172347828166287, "grad_norm": 3.483448028564453, "learning_rate": 5.051792828685259e-06, "loss": 0.5803, "step": 1269 }, { "epoch": 0.015184303973026936, "grad_norm": 3.1404190063476562, "learning_rate": 5.05577689243028e-06, "loss": 0.516, "step": 1270 }, { "epoch": 0.015196260117887588, "grad_norm": 3.0074946880340576, "learning_rate": 5.059760956175299e-06, "loss": 0.6193, "step": 1271 }, { "epoch": 0.01520821626274824, "grad_norm": 3.0275211334228516, "learning_rate": 5.0637450199203185e-06, "loss": 0.6752, "step": 1272 }, { "epoch": 0.01522017240760889, "grad_norm": 3.3236286640167236, "learning_rate": 5.067729083665339e-06, "loss": 0.6455, "step": 1273 }, { "epoch": 0.015232128552469542, "grad_norm": 5.157296657562256, "learning_rate": 5.071713147410359e-06, "loss": 0.5556, "step": 1274 }, { "epoch": 0.015244084697330193, "grad_norm": 6.796929836273193, "learning_rate": 5.075697211155379e-06, "loss": 0.5962, "step": 1275 }, { "epoch": 0.015256040842190843, "grad_norm": 2.267699956893921, "learning_rate": 5.079681274900399e-06, "loss": 0.6068, "step": 1276 }, { "epoch": 0.015267996987051495, "grad_norm": 4.780969142913818, "learning_rate": 5.083665338645418e-06, "loss": 0.6902, "step": 1277 }, { "epoch": 0.015279953131912147, "grad_norm": 2.5600388050079346, "learning_rate": 5.087649402390439e-06, "loss": 0.6248, "step": 1278 }, { "epoch": 0.015291909276772797, "grad_norm": 2.49015212059021, "learning_rate": 5.091633466135458e-06, "loss": 0.6786, "step": 1279 }, { "epoch": 0.015303865421633448, "grad_norm": 3.5681564807891846, "learning_rate": 5.095617529880479e-06, "loss": 0.5513, "step": 1280 }, { "epoch": 0.0153158215664941, "grad_norm": 2.1869702339172363, "learning_rate": 5.099601593625499e-06, "loss": 0.599, "step": 1281 }, { "epoch": 0.01532777771135475, "grad_norm": 1.8578815460205078, "learning_rate": 5.103585657370518e-06, "loss": 0.4976, "step": 1282 }, { "epoch": 0.015339733856215402, "grad_norm": 3.5075886249542236, "learning_rate": 5.1075697211155385e-06, "loss": 0.5936, "step": 1283 }, { "epoch": 0.015351690001076054, "grad_norm": 14.082140922546387, "learning_rate": 5.111553784860558e-06, "loss": 0.5565, "step": 1284 }, { "epoch": 0.015363646145936704, "grad_norm": 3.5327796936035156, "learning_rate": 5.115537848605578e-06, "loss": 0.6177, "step": 1285 }, { "epoch": 0.015375602290797355, "grad_norm": 1.2969328165054321, "learning_rate": 5.119521912350598e-06, "loss": 0.4568, "step": 1286 }, { "epoch": 0.015387558435658007, "grad_norm": 2.030538320541382, "learning_rate": 5.123505976095617e-06, "loss": 0.6193, "step": 1287 }, { "epoch": 0.015399514580518657, "grad_norm": 3.3604226112365723, "learning_rate": 5.127490039840638e-06, "loss": 0.5121, "step": 1288 }, { "epoch": 0.015411470725379309, "grad_norm": 3.9314124584198, "learning_rate": 5.131474103585658e-06, "loss": 0.5617, "step": 1289 }, { "epoch": 0.01542342687023996, "grad_norm": 4.396551132202148, "learning_rate": 5.135458167330678e-06, "loss": 0.6511, "step": 1290 }, { "epoch": 0.01543538301510061, "grad_norm": 3.8369300365448, "learning_rate": 5.1394422310756975e-06, "loss": 0.596, "step": 1291 }, { "epoch": 0.015447339159961262, "grad_norm": 3.819929361343384, "learning_rate": 5.143426294820718e-06, "loss": 0.5637, "step": 1292 }, { "epoch": 0.015459295304821914, "grad_norm": 2.671651840209961, "learning_rate": 5.147410358565737e-06, "loss": 0.5613, "step": 1293 }, { "epoch": 0.015471251449682564, "grad_norm": 4.365366458892822, "learning_rate": 5.151394422310757e-06, "loss": 0.6425, "step": 1294 }, { "epoch": 0.015483207594543216, "grad_norm": 1.6756280660629272, "learning_rate": 5.155378486055778e-06, "loss": 0.6595, "step": 1295 }, { "epoch": 0.015495163739403867, "grad_norm": 4.575931072235107, "learning_rate": 5.159362549800797e-06, "loss": 0.6597, "step": 1296 }, { "epoch": 0.015507119884264517, "grad_norm": 2.662700891494751, "learning_rate": 5.163346613545818e-06, "loss": 0.5845, "step": 1297 }, { "epoch": 0.015519076029125169, "grad_norm": 3.6372532844543457, "learning_rate": 5.167330677290837e-06, "loss": 0.5702, "step": 1298 }, { "epoch": 0.01553103217398582, "grad_norm": 29.753658294677734, "learning_rate": 5.1713147410358565e-06, "loss": 0.6034, "step": 1299 }, { "epoch": 0.01554298831884647, "grad_norm": 3.348196268081665, "learning_rate": 5.175298804780877e-06, "loss": 0.6812, "step": 1300 }, { "epoch": 0.015554944463707122, "grad_norm": 2.9773151874542236, "learning_rate": 5.179282868525896e-06, "loss": 0.6101, "step": 1301 }, { "epoch": 0.015566900608567774, "grad_norm": 6.328124523162842, "learning_rate": 5.1832669322709175e-06, "loss": 0.658, "step": 1302 }, { "epoch": 0.015578856753428424, "grad_norm": 6.011706829071045, "learning_rate": 5.187250996015937e-06, "loss": 0.5527, "step": 1303 }, { "epoch": 0.015590812898289076, "grad_norm": 10.000548362731934, "learning_rate": 5.191235059760956e-06, "loss": 0.5887, "step": 1304 }, { "epoch": 0.015602769043149728, "grad_norm": 18.218984603881836, "learning_rate": 5.195219123505977e-06, "loss": 0.5392, "step": 1305 }, { "epoch": 0.015614725188010378, "grad_norm": 4.064883708953857, "learning_rate": 5.199203187250996e-06, "loss": 0.617, "step": 1306 }, { "epoch": 0.01562668133287103, "grad_norm": 2.9113922119140625, "learning_rate": 5.2031872509960164e-06, "loss": 0.636, "step": 1307 }, { "epoch": 0.01563863747773168, "grad_norm": 3.4781527519226074, "learning_rate": 5.207171314741037e-06, "loss": 0.6021, "step": 1308 }, { "epoch": 0.015650593622592333, "grad_norm": 3.8859755992889404, "learning_rate": 5.211155378486056e-06, "loss": 0.5701, "step": 1309 }, { "epoch": 0.01566254976745298, "grad_norm": 3.782304048538208, "learning_rate": 5.2151394422310765e-06, "loss": 0.5937, "step": 1310 }, { "epoch": 0.015674505912313633, "grad_norm": 2.0265965461730957, "learning_rate": 5.219123505976096e-06, "loss": 0.555, "step": 1311 }, { "epoch": 0.015686462057174284, "grad_norm": 3.0067873001098633, "learning_rate": 5.223107569721116e-06, "loss": 0.5706, "step": 1312 }, { "epoch": 0.015698418202034936, "grad_norm": 2.3450374603271484, "learning_rate": 5.227091633466136e-06, "loss": 0.6586, "step": 1313 }, { "epoch": 0.015710374346895588, "grad_norm": 1.6421220302581787, "learning_rate": 5.231075697211155e-06, "loss": 0.5407, "step": 1314 }, { "epoch": 0.01572233049175624, "grad_norm": 2.3334808349609375, "learning_rate": 5.235059760956176e-06, "loss": 0.5365, "step": 1315 }, { "epoch": 0.015734286636616888, "grad_norm": 3.3220889568328857, "learning_rate": 5.239043824701196e-06, "loss": 0.5988, "step": 1316 }, { "epoch": 0.01574624278147754, "grad_norm": 2.2675139904022217, "learning_rate": 5.243027888446216e-06, "loss": 0.6247, "step": 1317 }, { "epoch": 0.01575819892633819, "grad_norm": 2.2263545989990234, "learning_rate": 5.2470119521912355e-06, "loss": 0.661, "step": 1318 }, { "epoch": 0.015770155071198843, "grad_norm": 8.386923789978027, "learning_rate": 5.250996015936255e-06, "loss": 0.6637, "step": 1319 }, { "epoch": 0.015782111216059495, "grad_norm": 2.374720335006714, "learning_rate": 5.254980079681275e-06, "loss": 0.5617, "step": 1320 }, { "epoch": 0.015794067360920146, "grad_norm": 2.127033233642578, "learning_rate": 5.258964143426295e-06, "loss": 0.5632, "step": 1321 }, { "epoch": 0.015806023505780795, "grad_norm": 3.1841235160827637, "learning_rate": 5.262948207171316e-06, "loss": 0.6608, "step": 1322 }, { "epoch": 0.015817979650641446, "grad_norm": 3.162280797958374, "learning_rate": 5.266932270916335e-06, "loss": 0.6064, "step": 1323 }, { "epoch": 0.015829935795502098, "grad_norm": 2.6855456829071045, "learning_rate": 5.270916334661355e-06, "loss": 0.543, "step": 1324 }, { "epoch": 0.01584189194036275, "grad_norm": 3.199907064437866, "learning_rate": 5.274900398406375e-06, "loss": 0.5755, "step": 1325 }, { "epoch": 0.0158538480852234, "grad_norm": 4.44535493850708, "learning_rate": 5.278884462151395e-06, "loss": 0.5532, "step": 1326 }, { "epoch": 0.015865804230084053, "grad_norm": 3.333646535873413, "learning_rate": 5.282868525896415e-06, "loss": 0.67, "step": 1327 }, { "epoch": 0.0158777603749447, "grad_norm": 2.488429307937622, "learning_rate": 5.286852589641434e-06, "loss": 0.6436, "step": 1328 }, { "epoch": 0.015889716519805353, "grad_norm": 2.272552013397217, "learning_rate": 5.290836653386454e-06, "loss": 0.546, "step": 1329 }, { "epoch": 0.015901672664666005, "grad_norm": 3.398350477218628, "learning_rate": 5.294820717131475e-06, "loss": 0.587, "step": 1330 }, { "epoch": 0.015913628809526657, "grad_norm": 2.050680160522461, "learning_rate": 5.298804780876494e-06, "loss": 0.6113, "step": 1331 }, { "epoch": 0.01592558495438731, "grad_norm": 4.9863810539245605, "learning_rate": 5.302788844621515e-06, "loss": 0.6147, "step": 1332 }, { "epoch": 0.01593754109924796, "grad_norm": 2.2828803062438965, "learning_rate": 5.306772908366534e-06, "loss": 0.6891, "step": 1333 }, { "epoch": 0.01594949724410861, "grad_norm": 3.2066287994384766, "learning_rate": 5.310756972111554e-06, "loss": 0.5207, "step": 1334 }, { "epoch": 0.01596145338896926, "grad_norm": 1.9513922929763794, "learning_rate": 5.314741035856574e-06, "loss": 0.5567, "step": 1335 }, { "epoch": 0.015973409533829912, "grad_norm": 2.2607316970825195, "learning_rate": 5.318725099601593e-06, "loss": 0.5546, "step": 1336 }, { "epoch": 0.015985365678690563, "grad_norm": 2.073347806930542, "learning_rate": 5.3227091633466145e-06, "loss": 0.5797, "step": 1337 }, { "epoch": 0.015997321823551215, "grad_norm": 3.9405829906463623, "learning_rate": 5.326693227091634e-06, "loss": 0.5808, "step": 1338 }, { "epoch": 0.016009277968411867, "grad_norm": 4.322659969329834, "learning_rate": 5.3306772908366535e-06, "loss": 0.6723, "step": 1339 }, { "epoch": 0.016021234113272515, "grad_norm": 5.628212928771973, "learning_rate": 5.334661354581674e-06, "loss": 0.5871, "step": 1340 }, { "epoch": 0.016033190258133167, "grad_norm": 2.7447378635406494, "learning_rate": 5.338645418326693e-06, "loss": 0.5618, "step": 1341 }, { "epoch": 0.01604514640299382, "grad_norm": 6.6985015869140625, "learning_rate": 5.342629482071714e-06, "loss": 0.5924, "step": 1342 }, { "epoch": 0.01605710254785447, "grad_norm": 14.390438079833984, "learning_rate": 5.346613545816734e-06, "loss": 0.6545, "step": 1343 }, { "epoch": 0.016069058692715122, "grad_norm": 3.265894889831543, "learning_rate": 5.350597609561753e-06, "loss": 0.6911, "step": 1344 }, { "epoch": 0.016081014837575774, "grad_norm": 2.4849483966827393, "learning_rate": 5.354581673306774e-06, "loss": 0.5411, "step": 1345 }, { "epoch": 0.016092970982436422, "grad_norm": 4.481477737426758, "learning_rate": 5.358565737051793e-06, "loss": 0.6598, "step": 1346 }, { "epoch": 0.016104927127297074, "grad_norm": 3.023721218109131, "learning_rate": 5.362549800796813e-06, "loss": 0.5879, "step": 1347 }, { "epoch": 0.016116883272157725, "grad_norm": 64.32624053955078, "learning_rate": 5.366533864541833e-06, "loss": 0.5623, "step": 1348 }, { "epoch": 0.016128839417018377, "grad_norm": 2.7158029079437256, "learning_rate": 5.370517928286852e-06, "loss": 0.5724, "step": 1349 }, { "epoch": 0.01614079556187903, "grad_norm": 5.302383899688721, "learning_rate": 5.374501992031873e-06, "loss": 0.6258, "step": 1350 }, { "epoch": 0.01615275170673968, "grad_norm": 2.8108909130096436, "learning_rate": 5.378486055776893e-06, "loss": 0.5332, "step": 1351 }, { "epoch": 0.01616470785160033, "grad_norm": 1.5176169872283936, "learning_rate": 5.382470119521913e-06, "loss": 0.4917, "step": 1352 }, { "epoch": 0.01617666399646098, "grad_norm": 2.483142137527466, "learning_rate": 5.386454183266933e-06, "loss": 0.6453, "step": 1353 }, { "epoch": 0.016188620141321632, "grad_norm": 3.8153269290924072, "learning_rate": 5.390438247011953e-06, "loss": 0.6017, "step": 1354 }, { "epoch": 0.016200576286182284, "grad_norm": 2.1192190647125244, "learning_rate": 5.394422310756972e-06, "loss": 0.5778, "step": 1355 }, { "epoch": 0.016212532431042936, "grad_norm": 4.636434555053711, "learning_rate": 5.398406374501992e-06, "loss": 0.5977, "step": 1356 }, { "epoch": 0.016224488575903584, "grad_norm": 3.327235460281372, "learning_rate": 5.402390438247013e-06, "loss": 0.6785, "step": 1357 }, { "epoch": 0.016236444720764236, "grad_norm": 1.99201238155365, "learning_rate": 5.4063745019920325e-06, "loss": 0.6187, "step": 1358 }, { "epoch": 0.016248400865624887, "grad_norm": 4.23765230178833, "learning_rate": 5.410358565737053e-06, "loss": 0.6204, "step": 1359 }, { "epoch": 0.01626035701048554, "grad_norm": 3.1849286556243896, "learning_rate": 5.414342629482072e-06, "loss": 0.6585, "step": 1360 }, { "epoch": 0.01627231315534619, "grad_norm": 6.455205917358398, "learning_rate": 5.418326693227092e-06, "loss": 0.5574, "step": 1361 }, { "epoch": 0.016284269300206843, "grad_norm": 2.568089485168457, "learning_rate": 5.422310756972112e-06, "loss": 0.6848, "step": 1362 }, { "epoch": 0.01629622544506749, "grad_norm": 3.5947585105895996, "learning_rate": 5.4262948207171314e-06, "loss": 0.5987, "step": 1363 }, { "epoch": 0.016308181589928143, "grad_norm": 3.2483489513397217, "learning_rate": 5.430278884462153e-06, "loss": 0.5767, "step": 1364 }, { "epoch": 0.016320137734788794, "grad_norm": 5.039371967315674, "learning_rate": 5.434262948207172e-06, "loss": 0.5645, "step": 1365 }, { "epoch": 0.016332093879649446, "grad_norm": 4.551858425140381, "learning_rate": 5.4382470119521915e-06, "loss": 0.6044, "step": 1366 }, { "epoch": 0.016344050024510098, "grad_norm": 2.723836898803711, "learning_rate": 5.442231075697212e-06, "loss": 0.5474, "step": 1367 }, { "epoch": 0.01635600616937075, "grad_norm": 3.236194133758545, "learning_rate": 5.446215139442231e-06, "loss": 0.5935, "step": 1368 }, { "epoch": 0.016367962314231398, "grad_norm": 2.988379716873169, "learning_rate": 5.4501992031872516e-06, "loss": 0.5147, "step": 1369 }, { "epoch": 0.01637991845909205, "grad_norm": 1.9235657453536987, "learning_rate": 5.454183266932271e-06, "loss": 0.6556, "step": 1370 }, { "epoch": 0.0163918746039527, "grad_norm": 1.9208550453186035, "learning_rate": 5.4581673306772905e-06, "loss": 0.5272, "step": 1371 }, { "epoch": 0.016403830748813353, "grad_norm": 4.167370319366455, "learning_rate": 5.462151394422312e-06, "loss": 0.6505, "step": 1372 }, { "epoch": 0.016415786893674005, "grad_norm": 1.9510821104049683, "learning_rate": 5.466135458167331e-06, "loss": 0.5767, "step": 1373 }, { "epoch": 0.016427743038534656, "grad_norm": 3.5695078372955322, "learning_rate": 5.470119521912351e-06, "loss": 0.6129, "step": 1374 }, { "epoch": 0.016439699183395304, "grad_norm": 5.0915937423706055, "learning_rate": 5.474103585657371e-06, "loss": 0.7524, "step": 1375 }, { "epoch": 0.016451655328255956, "grad_norm": 2.834230422973633, "learning_rate": 5.47808764940239e-06, "loss": 0.6869, "step": 1376 }, { "epoch": 0.016463611473116608, "grad_norm": 2.938239574432373, "learning_rate": 5.4820717131474115e-06, "loss": 0.5726, "step": 1377 }, { "epoch": 0.01647556761797726, "grad_norm": 2.14595365524292, "learning_rate": 5.486055776892431e-06, "loss": 0.6933, "step": 1378 }, { "epoch": 0.01648752376283791, "grad_norm": 2.7606921195983887, "learning_rate": 5.490039840637451e-06, "loss": 0.5983, "step": 1379 }, { "epoch": 0.016499479907698563, "grad_norm": 2.7387337684631348, "learning_rate": 5.494023904382471e-06, "loss": 0.6085, "step": 1380 }, { "epoch": 0.01651143605255921, "grad_norm": 2.383375644683838, "learning_rate": 5.49800796812749e-06, "loss": 0.5828, "step": 1381 }, { "epoch": 0.016523392197419863, "grad_norm": 2.9104504585266113, "learning_rate": 5.5019920318725104e-06, "loss": 0.5746, "step": 1382 }, { "epoch": 0.016535348342280515, "grad_norm": 2.8906352519989014, "learning_rate": 5.50597609561753e-06, "loss": 0.6129, "step": 1383 }, { "epoch": 0.016547304487141166, "grad_norm": 3.312849521636963, "learning_rate": 5.509960159362551e-06, "loss": 0.6352, "step": 1384 }, { "epoch": 0.016559260632001818, "grad_norm": 3.440714120864868, "learning_rate": 5.5139442231075705e-06, "loss": 0.568, "step": 1385 }, { "epoch": 0.01657121677686247, "grad_norm": 5.588242530822754, "learning_rate": 5.51792828685259e-06, "loss": 0.658, "step": 1386 }, { "epoch": 0.016583172921723118, "grad_norm": 2.101017713546753, "learning_rate": 5.52191235059761e-06, "loss": 0.6588, "step": 1387 }, { "epoch": 0.01659512906658377, "grad_norm": 2.8699629306793213, "learning_rate": 5.52589641434263e-06, "loss": 0.5986, "step": 1388 }, { "epoch": 0.01660708521144442, "grad_norm": 2.740004062652588, "learning_rate": 5.52988047808765e-06, "loss": 0.6477, "step": 1389 }, { "epoch": 0.016619041356305073, "grad_norm": 173.08035278320312, "learning_rate": 5.5338645418326695e-06, "loss": 0.5835, "step": 1390 }, { "epoch": 0.016630997501165725, "grad_norm": 2.729294776916504, "learning_rate": 5.537848605577689e-06, "loss": 0.6198, "step": 1391 }, { "epoch": 0.016642953646026377, "grad_norm": 2.029137372970581, "learning_rate": 5.54183266932271e-06, "loss": 0.552, "step": 1392 }, { "epoch": 0.016654909790887025, "grad_norm": 2.556058168411255, "learning_rate": 5.5458167330677295e-06, "loss": 0.5726, "step": 1393 }, { "epoch": 0.016666865935747677, "grad_norm": 1.8069993257522583, "learning_rate": 5.54980079681275e-06, "loss": 0.6504, "step": 1394 }, { "epoch": 0.01667882208060833, "grad_norm": 2.065058708190918, "learning_rate": 5.553784860557769e-06, "loss": 0.5227, "step": 1395 }, { "epoch": 0.01669077822546898, "grad_norm": 5.274840354919434, "learning_rate": 5.557768924302789e-06, "loss": 0.6996, "step": 1396 }, { "epoch": 0.016702734370329632, "grad_norm": 2.412572145462036, "learning_rate": 5.561752988047809e-06, "loss": 0.6505, "step": 1397 }, { "epoch": 0.016714690515190284, "grad_norm": 4.441572666168213, "learning_rate": 5.5657370517928285e-06, "loss": 0.634, "step": 1398 }, { "epoch": 0.016726646660050932, "grad_norm": 2.830500364303589, "learning_rate": 5.56972111553785e-06, "loss": 0.6271, "step": 1399 }, { "epoch": 0.016738602804911584, "grad_norm": 2.7772696018218994, "learning_rate": 5.573705179282869e-06, "loss": 0.5517, "step": 1400 }, { "epoch": 0.016750558949772235, "grad_norm": 3.759178400039673, "learning_rate": 5.577689243027889e-06, "loss": 0.6897, "step": 1401 }, { "epoch": 0.016762515094632887, "grad_norm": 3.1488802433013916, "learning_rate": 5.581673306772909e-06, "loss": 0.6268, "step": 1402 }, { "epoch": 0.01677447123949354, "grad_norm": 2.866044759750366, "learning_rate": 5.585657370517928e-06, "loss": 0.5839, "step": 1403 }, { "epoch": 0.01678642738435419, "grad_norm": 3.0665557384490967, "learning_rate": 5.589641434262949e-06, "loss": 0.7165, "step": 1404 }, { "epoch": 0.01679838352921484, "grad_norm": 2.173129081726074, "learning_rate": 5.593625498007968e-06, "loss": 0.6209, "step": 1405 }, { "epoch": 0.01681033967407549, "grad_norm": 3.857165575027466, "learning_rate": 5.5976095617529876e-06, "loss": 0.5843, "step": 1406 }, { "epoch": 0.016822295818936142, "grad_norm": 2.2808618545532227, "learning_rate": 5.601593625498009e-06, "loss": 0.6676, "step": 1407 }, { "epoch": 0.016834251963796794, "grad_norm": 3.2252955436706543, "learning_rate": 5.605577689243028e-06, "loss": 0.6522, "step": 1408 }, { "epoch": 0.016846208108657446, "grad_norm": 6.077783584594727, "learning_rate": 5.6095617529880485e-06, "loss": 0.599, "step": 1409 }, { "epoch": 0.016858164253518097, "grad_norm": 3.1703596115112305, "learning_rate": 5.613545816733068e-06, "loss": 0.6512, "step": 1410 }, { "epoch": 0.016870120398378746, "grad_norm": 6.971599578857422, "learning_rate": 5.617529880478087e-06, "loss": 0.6731, "step": 1411 }, { "epoch": 0.016882076543239397, "grad_norm": 2.7691237926483154, "learning_rate": 5.6215139442231085e-06, "loss": 0.5969, "step": 1412 }, { "epoch": 0.01689403268810005, "grad_norm": 3.613252878189087, "learning_rate": 5.625498007968128e-06, "loss": 0.5956, "step": 1413 }, { "epoch": 0.0169059888329607, "grad_norm": 3.1268067359924316, "learning_rate": 5.629482071713148e-06, "loss": 0.5714, "step": 1414 }, { "epoch": 0.016917944977821352, "grad_norm": 11.05225658416748, "learning_rate": 5.633466135458168e-06, "loss": 0.5845, "step": 1415 }, { "epoch": 0.016929901122682004, "grad_norm": 4.185645580291748, "learning_rate": 5.637450199203188e-06, "loss": 0.6313, "step": 1416 }, { "epoch": 0.016941857267542652, "grad_norm": 2.8294365406036377, "learning_rate": 5.6414342629482075e-06, "loss": 0.5231, "step": 1417 }, { "epoch": 0.016953813412403304, "grad_norm": 8.486681938171387, "learning_rate": 5.645418326693227e-06, "loss": 0.5999, "step": 1418 }, { "epoch": 0.016965769557263956, "grad_norm": 3.0505619049072266, "learning_rate": 5.649402390438248e-06, "loss": 0.6149, "step": 1419 }, { "epoch": 0.016977725702124608, "grad_norm": 9.756336212158203, "learning_rate": 5.653386454183268e-06, "loss": 0.7034, "step": 1420 }, { "epoch": 0.01698968184698526, "grad_norm": 2.5217742919921875, "learning_rate": 5.657370517928288e-06, "loss": 0.5445, "step": 1421 }, { "epoch": 0.017001637991845907, "grad_norm": 2.358799457550049, "learning_rate": 5.661354581673307e-06, "loss": 0.6095, "step": 1422 }, { "epoch": 0.01701359413670656, "grad_norm": 3.3012523651123047, "learning_rate": 5.665338645418327e-06, "loss": 0.5796, "step": 1423 }, { "epoch": 0.01702555028156721, "grad_norm": 10.254622459411621, "learning_rate": 5.669322709163347e-06, "loss": 0.6539, "step": 1424 }, { "epoch": 0.017037506426427863, "grad_norm": 3.0901918411254883, "learning_rate": 5.6733067729083666e-06, "loss": 0.574, "step": 1425 }, { "epoch": 0.017049462571288514, "grad_norm": 1.9121382236480713, "learning_rate": 5.677290836653388e-06, "loss": 0.5975, "step": 1426 }, { "epoch": 0.017061418716149166, "grad_norm": 2.4858696460723877, "learning_rate": 5.681274900398407e-06, "loss": 0.6944, "step": 1427 }, { "epoch": 0.017073374861009814, "grad_norm": 5.438307285308838, "learning_rate": 5.685258964143427e-06, "loss": 0.6972, "step": 1428 }, { "epoch": 0.017085331005870466, "grad_norm": 1.9493458271026611, "learning_rate": 5.689243027888447e-06, "loss": 0.6014, "step": 1429 }, { "epoch": 0.017097287150731118, "grad_norm": 4.11181116104126, "learning_rate": 5.693227091633466e-06, "loss": 0.713, "step": 1430 }, { "epoch": 0.01710924329559177, "grad_norm": 3.0521130561828613, "learning_rate": 5.697211155378487e-06, "loss": 0.682, "step": 1431 }, { "epoch": 0.01712119944045242, "grad_norm": 4.300343036651611, "learning_rate": 5.701195219123506e-06, "loss": 0.5615, "step": 1432 }, { "epoch": 0.017133155585313073, "grad_norm": 14.04987621307373, "learning_rate": 5.705179282868526e-06, "loss": 0.7515, "step": 1433 }, { "epoch": 0.01714511173017372, "grad_norm": 2.781975030899048, "learning_rate": 5.709163346613547e-06, "loss": 0.5415, "step": 1434 }, { "epoch": 0.017157067875034373, "grad_norm": 3.049262046813965, "learning_rate": 5.713147410358566e-06, "loss": 0.5825, "step": 1435 }, { "epoch": 0.017169024019895025, "grad_norm": 3.9167027473449707, "learning_rate": 5.7171314741035865e-06, "loss": 0.6389, "step": 1436 }, { "epoch": 0.017180980164755676, "grad_norm": 2.2509586811065674, "learning_rate": 5.721115537848606e-06, "loss": 0.6217, "step": 1437 }, { "epoch": 0.017192936309616328, "grad_norm": 4.197068691253662, "learning_rate": 5.7250996015936254e-06, "loss": 0.7059, "step": 1438 }, { "epoch": 0.01720489245447698, "grad_norm": 3.9123141765594482, "learning_rate": 5.729083665338646e-06, "loss": 0.5473, "step": 1439 }, { "epoch": 0.017216848599337628, "grad_norm": 7.273991584777832, "learning_rate": 5.733067729083665e-06, "loss": 0.6331, "step": 1440 }, { "epoch": 0.01722880474419828, "grad_norm": 3.1684365272521973, "learning_rate": 5.737051792828686e-06, "loss": 0.572, "step": 1441 }, { "epoch": 0.01724076088905893, "grad_norm": 27.898895263671875, "learning_rate": 5.741035856573706e-06, "loss": 0.6995, "step": 1442 }, { "epoch": 0.017252717033919583, "grad_norm": 4.443609237670898, "learning_rate": 5.745019920318725e-06, "loss": 0.5395, "step": 1443 }, { "epoch": 0.017264673178780235, "grad_norm": 3.819740056991577, "learning_rate": 5.7490039840637456e-06, "loss": 0.6068, "step": 1444 }, { "epoch": 0.017276629323640887, "grad_norm": 2.3859264850616455, "learning_rate": 5.752988047808765e-06, "loss": 0.5526, "step": 1445 }, { "epoch": 0.017288585468501535, "grad_norm": 2.570202112197876, "learning_rate": 5.756972111553786e-06, "loss": 0.5766, "step": 1446 }, { "epoch": 0.017300541613362187, "grad_norm": 9.875826835632324, "learning_rate": 5.760956175298806e-06, "loss": 0.7349, "step": 1447 }, { "epoch": 0.01731249775822284, "grad_norm": 4.129602432250977, "learning_rate": 5.764940239043825e-06, "loss": 0.6014, "step": 1448 }, { "epoch": 0.01732445390308349, "grad_norm": 6.308219909667969, "learning_rate": 5.768924302788845e-06, "loss": 0.574, "step": 1449 }, { "epoch": 0.01733641004794414, "grad_norm": 2.4226837158203125, "learning_rate": 5.772908366533865e-06, "loss": 0.5857, "step": 1450 }, { "epoch": 0.017348366192804793, "grad_norm": 4.148144245147705, "learning_rate": 5.776892430278885e-06, "loss": 0.6196, "step": 1451 }, { "epoch": 0.01736032233766544, "grad_norm": 2.8323140144348145, "learning_rate": 5.780876494023905e-06, "loss": 0.7715, "step": 1452 }, { "epoch": 0.017372278482526093, "grad_norm": 2.0304932594299316, "learning_rate": 5.784860557768924e-06, "loss": 0.6367, "step": 1453 }, { "epoch": 0.017384234627386745, "grad_norm": 3.0531861782073975, "learning_rate": 5.788844621513945e-06, "loss": 0.6369, "step": 1454 }, { "epoch": 0.017396190772247397, "grad_norm": 5.404560565948486, "learning_rate": 5.792828685258965e-06, "loss": 0.7015, "step": 1455 }, { "epoch": 0.01740814691710805, "grad_norm": 2.821167469024658, "learning_rate": 5.796812749003985e-06, "loss": 0.6183, "step": 1456 }, { "epoch": 0.0174201030619687, "grad_norm": 3.3094234466552734, "learning_rate": 5.8007968127490044e-06, "loss": 0.6597, "step": 1457 }, { "epoch": 0.01743205920682935, "grad_norm": 2.2913572788238525, "learning_rate": 5.804780876494024e-06, "loss": 0.6218, "step": 1458 }, { "epoch": 0.01744401535169, "grad_norm": 3.5480904579162598, "learning_rate": 5.808764940239044e-06, "loss": 0.5521, "step": 1459 }, { "epoch": 0.017455971496550652, "grad_norm": 3.0802791118621826, "learning_rate": 5.812749003984064e-06, "loss": 0.5531, "step": 1460 }, { "epoch": 0.017467927641411304, "grad_norm": 2.8316144943237305, "learning_rate": 5.816733067729085e-06, "loss": 0.5764, "step": 1461 }, { "epoch": 0.017479883786271955, "grad_norm": 2.9610538482666016, "learning_rate": 5.820717131474104e-06, "loss": 0.6261, "step": 1462 }, { "epoch": 0.017491839931132607, "grad_norm": 2.277621030807495, "learning_rate": 5.824701195219124e-06, "loss": 0.5751, "step": 1463 }, { "epoch": 0.017503796075993255, "grad_norm": 5.968082904815674, "learning_rate": 5.828685258964144e-06, "loss": 0.6058, "step": 1464 }, { "epoch": 0.017515752220853907, "grad_norm": 5.082950592041016, "learning_rate": 5.8326693227091635e-06, "loss": 0.6422, "step": 1465 }, { "epoch": 0.01752770836571456, "grad_norm": 2.347637414932251, "learning_rate": 5.836653386454184e-06, "loss": 0.5885, "step": 1466 }, { "epoch": 0.01753966451057521, "grad_norm": 1.8606735467910767, "learning_rate": 5.840637450199203e-06, "loss": 0.5941, "step": 1467 }, { "epoch": 0.017551620655435862, "grad_norm": 6.392658233642578, "learning_rate": 5.844621513944223e-06, "loss": 0.6144, "step": 1468 }, { "epoch": 0.017563576800296514, "grad_norm": 18.1472110748291, "learning_rate": 5.848605577689244e-06, "loss": 0.6195, "step": 1469 }, { "epoch": 0.017575532945157162, "grad_norm": 2.773930311203003, "learning_rate": 5.852589641434263e-06, "loss": 0.5472, "step": 1470 }, { "epoch": 0.017587489090017814, "grad_norm": 2.8204562664031982, "learning_rate": 5.856573705179284e-06, "loss": 0.7443, "step": 1471 }, { "epoch": 0.017599445234878466, "grad_norm": 3.0278561115264893, "learning_rate": 5.860557768924303e-06, "loss": 0.5621, "step": 1472 }, { "epoch": 0.017611401379739117, "grad_norm": 5.1454548835754395, "learning_rate": 5.8645418326693225e-06, "loss": 0.7125, "step": 1473 }, { "epoch": 0.01762335752459977, "grad_norm": 3.5486197471618652, "learning_rate": 5.868525896414343e-06, "loss": 0.6951, "step": 1474 }, { "epoch": 0.01763531366946042, "grad_norm": 3.57377290725708, "learning_rate": 5.872509960159362e-06, "loss": 0.5505, "step": 1475 }, { "epoch": 0.01764726981432107, "grad_norm": 21.592845916748047, "learning_rate": 5.8764940239043834e-06, "loss": 0.6246, "step": 1476 }, { "epoch": 0.01765922595918172, "grad_norm": 3.4898242950439453, "learning_rate": 5.880478087649403e-06, "loss": 0.603, "step": 1477 }, { "epoch": 0.017671182104042372, "grad_norm": 2.8028693199157715, "learning_rate": 5.884462151394423e-06, "loss": 0.6813, "step": 1478 }, { "epoch": 0.017683138248903024, "grad_norm": 3.096858501434326, "learning_rate": 5.888446215139443e-06, "loss": 0.7032, "step": 1479 }, { "epoch": 0.017695094393763676, "grad_norm": 2.3198485374450684, "learning_rate": 5.892430278884462e-06, "loss": 0.5772, "step": 1480 }, { "epoch": 0.017707050538624328, "grad_norm": 3.4935302734375, "learning_rate": 5.896414342629483e-06, "loss": 0.6418, "step": 1481 }, { "epoch": 0.017719006683484976, "grad_norm": 1.865674614906311, "learning_rate": 5.900398406374503e-06, "loss": 0.5341, "step": 1482 }, { "epoch": 0.017730962828345628, "grad_norm": 2.5818326473236084, "learning_rate": 5.904382470119523e-06, "loss": 0.6025, "step": 1483 }, { "epoch": 0.01774291897320628, "grad_norm": 2.9566822052001953, "learning_rate": 5.9083665338645425e-06, "loss": 0.5628, "step": 1484 }, { "epoch": 0.01775487511806693, "grad_norm": 4.402697563171387, "learning_rate": 5.912350597609562e-06, "loss": 0.5875, "step": 1485 }, { "epoch": 0.017766831262927583, "grad_norm": 8.134008407592773, "learning_rate": 5.916334661354582e-06, "loss": 0.6829, "step": 1486 }, { "epoch": 0.017778787407788234, "grad_norm": 3.6130316257476807, "learning_rate": 5.920318725099602e-06, "loss": 0.7363, "step": 1487 }, { "epoch": 0.017790743552648883, "grad_norm": 9.21349811553955, "learning_rate": 5.924302788844623e-06, "loss": 0.582, "step": 1488 }, { "epoch": 0.017802699697509534, "grad_norm": 3.600539207458496, "learning_rate": 5.928286852589642e-06, "loss": 0.564, "step": 1489 }, { "epoch": 0.017814655842370186, "grad_norm": 3.205430030822754, "learning_rate": 5.932270916334662e-06, "loss": 0.5547, "step": 1490 }, { "epoch": 0.017826611987230838, "grad_norm": 17.603242874145508, "learning_rate": 5.936254980079682e-06, "loss": 0.7282, "step": 1491 }, { "epoch": 0.01783856813209149, "grad_norm": 2.1132402420043945, "learning_rate": 5.9402390438247015e-06, "loss": 0.6677, "step": 1492 }, { "epoch": 0.017850524276952138, "grad_norm": 2.5863263607025146, "learning_rate": 5.944223107569722e-06, "loss": 0.6315, "step": 1493 }, { "epoch": 0.01786248042181279, "grad_norm": 4.101841449737549, "learning_rate": 5.948207171314741e-06, "loss": 0.602, "step": 1494 }, { "epoch": 0.01787443656667344, "grad_norm": 10.392157554626465, "learning_rate": 5.952191235059761e-06, "loss": 0.6667, "step": 1495 }, { "epoch": 0.017886392711534093, "grad_norm": 1.9971224069595337, "learning_rate": 5.956175298804782e-06, "loss": 0.6026, "step": 1496 }, { "epoch": 0.017898348856394745, "grad_norm": 2.775780439376831, "learning_rate": 5.960159362549801e-06, "loss": 0.6297, "step": 1497 }, { "epoch": 0.017910305001255396, "grad_norm": 2.796347141265869, "learning_rate": 5.964143426294822e-06, "loss": 0.6558, "step": 1498 }, { "epoch": 0.017922261146116045, "grad_norm": 3.424852132797241, "learning_rate": 5.968127490039841e-06, "loss": 0.5726, "step": 1499 }, { "epoch": 0.017934217290976696, "grad_norm": 4.988128662109375, "learning_rate": 5.9721115537848606e-06, "loss": 0.6647, "step": 1500 }, { "epoch": 0.017946173435837348, "grad_norm": 7.975927352905273, "learning_rate": 5.976095617529881e-06, "loss": 0.631, "step": 1501 }, { "epoch": 0.017958129580698, "grad_norm": 3.3226418495178223, "learning_rate": 5.9800796812749e-06, "loss": 0.7031, "step": 1502 }, { "epoch": 0.01797008572555865, "grad_norm": 3.3791656494140625, "learning_rate": 5.9840637450199215e-06, "loss": 0.6644, "step": 1503 }, { "epoch": 0.017982041870419303, "grad_norm": 2.420731544494629, "learning_rate": 5.988047808764941e-06, "loss": 0.5081, "step": 1504 }, { "epoch": 0.01799399801527995, "grad_norm": 2.5095326900482178, "learning_rate": 5.99203187250996e-06, "loss": 0.6861, "step": 1505 }, { "epoch": 0.018005954160140603, "grad_norm": 2.4759984016418457, "learning_rate": 5.996015936254981e-06, "loss": 0.562, "step": 1506 }, { "epoch": 0.018017910305001255, "grad_norm": 2.262936592102051, "learning_rate": 6e-06, "loss": 0.6414, "step": 1507 }, { "epoch": 0.018029866449861907, "grad_norm": 2.967951536178589, "learning_rate": 6.0039840637450205e-06, "loss": 0.6433, "step": 1508 }, { "epoch": 0.01804182259472256, "grad_norm": 2.9385745525360107, "learning_rate": 6.00796812749004e-06, "loss": 0.5961, "step": 1509 }, { "epoch": 0.01805377873958321, "grad_norm": 7.451219081878662, "learning_rate": 6.01195219123506e-06, "loss": 0.5833, "step": 1510 }, { "epoch": 0.01806573488444386, "grad_norm": 2.005272150039673, "learning_rate": 6.0159362549800805e-06, "loss": 0.5535, "step": 1511 }, { "epoch": 0.01807769102930451, "grad_norm": 6.768627166748047, "learning_rate": 6.0199203187251e-06, "loss": 0.6295, "step": 1512 }, { "epoch": 0.018089647174165162, "grad_norm": 2.0070383548736572, "learning_rate": 6.02390438247012e-06, "loss": 0.5971, "step": 1513 }, { "epoch": 0.018101603319025814, "grad_norm": 1.9783042669296265, "learning_rate": 6.02788844621514e-06, "loss": 0.6512, "step": 1514 }, { "epoch": 0.018113559463886465, "grad_norm": 3.224902391433716, "learning_rate": 6.031872509960159e-06, "loss": 0.6194, "step": 1515 }, { "epoch": 0.018125515608747117, "grad_norm": 2.074903964996338, "learning_rate": 6.03585657370518e-06, "loss": 0.6073, "step": 1516 }, { "epoch": 0.018137471753607765, "grad_norm": 6.372629165649414, "learning_rate": 6.0398406374502e-06, "loss": 0.6676, "step": 1517 }, { "epoch": 0.018149427898468417, "grad_norm": 2.9098873138427734, "learning_rate": 6.04382470119522e-06, "loss": 0.6069, "step": 1518 }, { "epoch": 0.01816138404332907, "grad_norm": 4.32054328918457, "learning_rate": 6.0478087649402396e-06, "loss": 0.6877, "step": 1519 }, { "epoch": 0.01817334018818972, "grad_norm": 1.6190193891525269, "learning_rate": 6.051792828685259e-06, "loss": 0.5941, "step": 1520 }, { "epoch": 0.018185296333050372, "grad_norm": 3.023528575897217, "learning_rate": 6.055776892430279e-06, "loss": 0.6453, "step": 1521 }, { "epoch": 0.018197252477911024, "grad_norm": 3.836082696914673, "learning_rate": 6.059760956175299e-06, "loss": 0.6597, "step": 1522 }, { "epoch": 0.018209208622771672, "grad_norm": 4.810429096221924, "learning_rate": 6.06374501992032e-06, "loss": 0.6121, "step": 1523 }, { "epoch": 0.018221164767632324, "grad_norm": 3.1813647747039795, "learning_rate": 6.067729083665339e-06, "loss": 0.6796, "step": 1524 }, { "epoch": 0.018233120912492976, "grad_norm": 2.868051290512085, "learning_rate": 6.071713147410359e-06, "loss": 0.6324, "step": 1525 }, { "epoch": 0.018245077057353627, "grad_norm": 1.72219717502594, "learning_rate": 6.075697211155379e-06, "loss": 0.6217, "step": 1526 }, { "epoch": 0.01825703320221428, "grad_norm": 3.1518943309783936, "learning_rate": 6.079681274900399e-06, "loss": 0.5117, "step": 1527 }, { "epoch": 0.01826898934707493, "grad_norm": 2.350059986114502, "learning_rate": 6.083665338645419e-06, "loss": 0.5766, "step": 1528 }, { "epoch": 0.01828094549193558, "grad_norm": 4.1885600090026855, "learning_rate": 6.087649402390438e-06, "loss": 0.6912, "step": 1529 }, { "epoch": 0.01829290163679623, "grad_norm": 2.310265302658081, "learning_rate": 6.091633466135458e-06, "loss": 0.502, "step": 1530 }, { "epoch": 0.018304857781656882, "grad_norm": 1.802080750465393, "learning_rate": 6.095617529880479e-06, "loss": 0.6988, "step": 1531 }, { "epoch": 0.018316813926517534, "grad_norm": 2.2415237426757812, "learning_rate": 6.0996015936254984e-06, "loss": 0.5712, "step": 1532 }, { "epoch": 0.018328770071378186, "grad_norm": 6.272188186645508, "learning_rate": 6.103585657370519e-06, "loss": 0.6355, "step": 1533 }, { "epoch": 0.018340726216238838, "grad_norm": 2.69877552986145, "learning_rate": 6.107569721115538e-06, "loss": 0.4778, "step": 1534 }, { "epoch": 0.018352682361099486, "grad_norm": 2.3542640209198, "learning_rate": 6.111553784860558e-06, "loss": 0.5909, "step": 1535 }, { "epoch": 0.018364638505960137, "grad_norm": 2.712921380996704, "learning_rate": 6.115537848605578e-06, "loss": 0.6434, "step": 1536 }, { "epoch": 0.01837659465082079, "grad_norm": 4.178050518035889, "learning_rate": 6.119521912350597e-06, "loss": 0.621, "step": 1537 }, { "epoch": 0.01838855079568144, "grad_norm": 3.085336208343506, "learning_rate": 6.1235059760956186e-06, "loss": 0.5493, "step": 1538 }, { "epoch": 0.018400506940542093, "grad_norm": 2.366023302078247, "learning_rate": 6.127490039840638e-06, "loss": 0.5975, "step": 1539 }, { "epoch": 0.018412463085402744, "grad_norm": 7.802915573120117, "learning_rate": 6.131474103585658e-06, "loss": 0.6329, "step": 1540 }, { "epoch": 0.018424419230263393, "grad_norm": 3.5023655891418457, "learning_rate": 6.135458167330678e-06, "loss": 0.6047, "step": 1541 }, { "epoch": 0.018436375375124044, "grad_norm": 7.526886940002441, "learning_rate": 6.139442231075697e-06, "loss": 0.7187, "step": 1542 }, { "epoch": 0.018448331519984696, "grad_norm": 3.335608959197998, "learning_rate": 6.1434262948207175e-06, "loss": 0.6585, "step": 1543 }, { "epoch": 0.018460287664845348, "grad_norm": 5.043821334838867, "learning_rate": 6.147410358565737e-06, "loss": 0.6115, "step": 1544 }, { "epoch": 0.018472243809706, "grad_norm": 3.4783310890197754, "learning_rate": 6.151394422310758e-06, "loss": 0.5874, "step": 1545 }, { "epoch": 0.01848419995456665, "grad_norm": 4.489885330200195, "learning_rate": 6.155378486055778e-06, "loss": 0.6074, "step": 1546 }, { "epoch": 0.0184961560994273, "grad_norm": 3.6084468364715576, "learning_rate": 6.159362549800797e-06, "loss": 0.6167, "step": 1547 }, { "epoch": 0.01850811224428795, "grad_norm": 4.231141090393066, "learning_rate": 6.163346613545817e-06, "loss": 0.5488, "step": 1548 }, { "epoch": 0.018520068389148603, "grad_norm": 1.775104284286499, "learning_rate": 6.167330677290837e-06, "loss": 0.5374, "step": 1549 }, { "epoch": 0.018532024534009255, "grad_norm": 4.180036544799805, "learning_rate": 6.171314741035858e-06, "loss": 0.6245, "step": 1550 }, { "epoch": 0.018543980678869906, "grad_norm": 7.816629409790039, "learning_rate": 6.1752988047808774e-06, "loss": 0.6338, "step": 1551 }, { "epoch": 0.018555936823730558, "grad_norm": 3.130089044570923, "learning_rate": 6.179282868525897e-06, "loss": 0.5656, "step": 1552 }, { "epoch": 0.018567892968591206, "grad_norm": 3.1532602310180664, "learning_rate": 6.183266932270917e-06, "loss": 0.6497, "step": 1553 }, { "epoch": 0.018579849113451858, "grad_norm": 2.406266689300537, "learning_rate": 6.187250996015937e-06, "loss": 0.5719, "step": 1554 }, { "epoch": 0.01859180525831251, "grad_norm": 2.6379523277282715, "learning_rate": 6.191235059760957e-06, "loss": 0.5407, "step": 1555 }, { "epoch": 0.01860376140317316, "grad_norm": 2.707315444946289, "learning_rate": 6.195219123505976e-06, "loss": 0.6455, "step": 1556 }, { "epoch": 0.018615717548033813, "grad_norm": 9.099081039428711, "learning_rate": 6.199203187250996e-06, "loss": 0.6158, "step": 1557 }, { "epoch": 0.01862767369289446, "grad_norm": 1.9773615598678589, "learning_rate": 6.203187250996017e-06, "loss": 0.5689, "step": 1558 }, { "epoch": 0.018639629837755113, "grad_norm": 5.86233377456665, "learning_rate": 6.2071713147410365e-06, "loss": 0.6081, "step": 1559 }, { "epoch": 0.018651585982615765, "grad_norm": 3.45302414894104, "learning_rate": 6.211155378486057e-06, "loss": 0.5854, "step": 1560 }, { "epoch": 0.018663542127476417, "grad_norm": 4.1872687339782715, "learning_rate": 6.215139442231076e-06, "loss": 0.6924, "step": 1561 }, { "epoch": 0.01867549827233707, "grad_norm": 3.301135778427124, "learning_rate": 6.219123505976096e-06, "loss": 0.5993, "step": 1562 }, { "epoch": 0.01868745441719772, "grad_norm": 2.291341781616211, "learning_rate": 6.223107569721116e-06, "loss": 0.6441, "step": 1563 }, { "epoch": 0.018699410562058368, "grad_norm": 2.519371271133423, "learning_rate": 6.2270916334661355e-06, "loss": 0.7263, "step": 1564 }, { "epoch": 0.01871136670691902, "grad_norm": 3.7151570320129395, "learning_rate": 6.231075697211157e-06, "loss": 0.6519, "step": 1565 }, { "epoch": 0.01872332285177967, "grad_norm": 5.323541164398193, "learning_rate": 6.235059760956176e-06, "loss": 0.5833, "step": 1566 }, { "epoch": 0.018735278996640323, "grad_norm": 2.8475542068481445, "learning_rate": 6.2390438247011955e-06, "loss": 0.6934, "step": 1567 }, { "epoch": 0.018747235141500975, "grad_norm": 3.024040937423706, "learning_rate": 6.243027888446216e-06, "loss": 0.6449, "step": 1568 }, { "epoch": 0.018759191286361627, "grad_norm": 13.187732696533203, "learning_rate": 6.247011952191235e-06, "loss": 0.6437, "step": 1569 }, { "epoch": 0.018771147431222275, "grad_norm": 2.405819892883301, "learning_rate": 6.250996015936256e-06, "loss": 0.4954, "step": 1570 }, { "epoch": 0.018783103576082927, "grad_norm": 9.414052963256836, "learning_rate": 6.254980079681275e-06, "loss": 0.5641, "step": 1571 }, { "epoch": 0.01879505972094358, "grad_norm": 3.921330451965332, "learning_rate": 6.2589641434262945e-06, "loss": 0.6909, "step": 1572 }, { "epoch": 0.01880701586580423, "grad_norm": 2.887728691101074, "learning_rate": 6.262948207171316e-06, "loss": 0.5228, "step": 1573 }, { "epoch": 0.018818972010664882, "grad_norm": 4.751153469085693, "learning_rate": 6.266932270916335e-06, "loss": 0.6352, "step": 1574 }, { "epoch": 0.018830928155525534, "grad_norm": 2.8663179874420166, "learning_rate": 6.270916334661355e-06, "loss": 0.6468, "step": 1575 }, { "epoch": 0.018842884300386182, "grad_norm": 2.2574729919433594, "learning_rate": 6.274900398406375e-06, "loss": 0.598, "step": 1576 }, { "epoch": 0.018854840445246834, "grad_norm": 2.0254464149475098, "learning_rate": 6.278884462151394e-06, "loss": 0.6062, "step": 1577 }, { "epoch": 0.018866796590107485, "grad_norm": 3.1751437187194824, "learning_rate": 6.282868525896415e-06, "loss": 0.5834, "step": 1578 }, { "epoch": 0.018878752734968137, "grad_norm": 3.7140209674835205, "learning_rate": 6.286852589641435e-06, "loss": 0.5236, "step": 1579 }, { "epoch": 0.01889070887982879, "grad_norm": 2.3231351375579834, "learning_rate": 6.290836653386455e-06, "loss": 0.6341, "step": 1580 }, { "epoch": 0.01890266502468944, "grad_norm": 3.728363037109375, "learning_rate": 6.294820717131475e-06, "loss": 0.5775, "step": 1581 }, { "epoch": 0.01891462116955009, "grad_norm": 3.120762825012207, "learning_rate": 6.298804780876494e-06, "loss": 0.5472, "step": 1582 }, { "epoch": 0.01892657731441074, "grad_norm": 3.2702200412750244, "learning_rate": 6.3027888446215145e-06, "loss": 0.6092, "step": 1583 }, { "epoch": 0.018938533459271392, "grad_norm": 2.052210569381714, "learning_rate": 6.306772908366534e-06, "loss": 0.5443, "step": 1584 }, { "epoch": 0.018950489604132044, "grad_norm": 2.5791540145874023, "learning_rate": 6.310756972111555e-06, "loss": 0.5487, "step": 1585 }, { "epoch": 0.018962445748992696, "grad_norm": 3.838392496109009, "learning_rate": 6.3147410358565745e-06, "loss": 0.5228, "step": 1586 }, { "epoch": 0.018974401893853347, "grad_norm": 2.098471164703369, "learning_rate": 6.318725099601594e-06, "loss": 0.513, "step": 1587 }, { "epoch": 0.018986358038713996, "grad_norm": 2.538177251815796, "learning_rate": 6.322709163346614e-06, "loss": 0.569, "step": 1588 }, { "epoch": 0.018998314183574647, "grad_norm": 3.4122190475463867, "learning_rate": 6.326693227091634e-06, "loss": 0.5675, "step": 1589 }, { "epoch": 0.0190102703284353, "grad_norm": 8.815472602844238, "learning_rate": 6.330677290836654e-06, "loss": 0.6784, "step": 1590 }, { "epoch": 0.01902222647329595, "grad_norm": 3.8084349632263184, "learning_rate": 6.3346613545816735e-06, "loss": 0.6488, "step": 1591 }, { "epoch": 0.019034182618156602, "grad_norm": 3.8218753337860107, "learning_rate": 6.338645418326693e-06, "loss": 0.6471, "step": 1592 }, { "epoch": 0.019046138763017254, "grad_norm": 5.982041835784912, "learning_rate": 6.342629482071714e-06, "loss": 0.6452, "step": 1593 }, { "epoch": 0.019058094907877902, "grad_norm": 3.814605951309204, "learning_rate": 6.3466135458167336e-06, "loss": 0.59, "step": 1594 }, { "epoch": 0.019070051052738554, "grad_norm": 6.7163496017456055, "learning_rate": 6.350597609561754e-06, "loss": 0.5794, "step": 1595 }, { "epoch": 0.019082007197599206, "grad_norm": 5.977344512939453, "learning_rate": 6.354581673306773e-06, "loss": 0.6581, "step": 1596 }, { "epoch": 0.019093963342459858, "grad_norm": 4.663581371307373, "learning_rate": 6.358565737051793e-06, "loss": 0.7006, "step": 1597 }, { "epoch": 0.01910591948732051, "grad_norm": 5.5919928550720215, "learning_rate": 6.362549800796813e-06, "loss": 0.6183, "step": 1598 }, { "epoch": 0.01911787563218116, "grad_norm": 3.2666826248168945, "learning_rate": 6.3665338645418325e-06, "loss": 0.5674, "step": 1599 }, { "epoch": 0.01912983177704181, "grad_norm": 1.7909119129180908, "learning_rate": 6.370517928286854e-06, "loss": 0.5127, "step": 1600 }, { "epoch": 0.01914178792190246, "grad_norm": 2.7124037742614746, "learning_rate": 6.374501992031873e-06, "loss": 0.5264, "step": 1601 }, { "epoch": 0.019153744066763113, "grad_norm": 2.7174370288848877, "learning_rate": 6.3784860557768935e-06, "loss": 0.5163, "step": 1602 }, { "epoch": 0.019165700211623764, "grad_norm": 3.340388298034668, "learning_rate": 6.382470119521913e-06, "loss": 0.5506, "step": 1603 }, { "epoch": 0.019177656356484416, "grad_norm": 4.469993591308594, "learning_rate": 6.386454183266932e-06, "loss": 0.5439, "step": 1604 }, { "epoch": 0.019189612501345068, "grad_norm": 6.141533374786377, "learning_rate": 6.390438247011953e-06, "loss": 0.6173, "step": 1605 }, { "epoch": 0.019201568646205716, "grad_norm": 6.773006916046143, "learning_rate": 6.394422310756972e-06, "loss": 0.5486, "step": 1606 }, { "epoch": 0.019213524791066368, "grad_norm": 4.848293781280518, "learning_rate": 6.398406374501993e-06, "loss": 0.6054, "step": 1607 }, { "epoch": 0.01922548093592702, "grad_norm": 3.0901505947113037, "learning_rate": 6.402390438247013e-06, "loss": 0.5665, "step": 1608 }, { "epoch": 0.01923743708078767, "grad_norm": 2.347921848297119, "learning_rate": 6.406374501992032e-06, "loss": 0.6345, "step": 1609 }, { "epoch": 0.019249393225648323, "grad_norm": 3.318948745727539, "learning_rate": 6.4103585657370525e-06, "loss": 0.5927, "step": 1610 }, { "epoch": 0.019261349370508975, "grad_norm": 3.530580520629883, "learning_rate": 6.414342629482072e-06, "loss": 0.6135, "step": 1611 }, { "epoch": 0.019273305515369623, "grad_norm": 7.211960792541504, "learning_rate": 6.418326693227092e-06, "loss": 0.5955, "step": 1612 }, { "epoch": 0.019285261660230275, "grad_norm": 37.70256042480469, "learning_rate": 6.4223107569721126e-06, "loss": 0.6788, "step": 1613 }, { "epoch": 0.019297217805090926, "grad_norm": 2.3048064708709717, "learning_rate": 6.426294820717132e-06, "loss": 0.6047, "step": 1614 }, { "epoch": 0.019309173949951578, "grad_norm": 2.5912773609161377, "learning_rate": 6.430278884462152e-06, "loss": 0.619, "step": 1615 }, { "epoch": 0.01932113009481223, "grad_norm": 18.835657119750977, "learning_rate": 6.434262948207172e-06, "loss": 0.608, "step": 1616 }, { "epoch": 0.01933308623967288, "grad_norm": 3.1745665073394775, "learning_rate": 6.438247011952192e-06, "loss": 0.6676, "step": 1617 }, { "epoch": 0.01934504238453353, "grad_norm": 10.955490112304688, "learning_rate": 6.4422310756972115e-06, "loss": 0.6177, "step": 1618 }, { "epoch": 0.01935699852939418, "grad_norm": 3.5214180946350098, "learning_rate": 6.446215139442231e-06, "loss": 0.6784, "step": 1619 }, { "epoch": 0.019368954674254833, "grad_norm": 3.1631269454956055, "learning_rate": 6.450199203187252e-06, "loss": 0.6, "step": 1620 }, { "epoch": 0.019380910819115485, "grad_norm": 3.14186954498291, "learning_rate": 6.454183266932272e-06, "loss": 0.6092, "step": 1621 }, { "epoch": 0.019392866963976137, "grad_norm": 3.4968061447143555, "learning_rate": 6.458167330677292e-06, "loss": 0.6572, "step": 1622 }, { "epoch": 0.019404823108836785, "grad_norm": 5.06234884262085, "learning_rate": 6.462151394422311e-06, "loss": 0.6651, "step": 1623 }, { "epoch": 0.019416779253697437, "grad_norm": 7.455443382263184, "learning_rate": 6.466135458167331e-06, "loss": 0.5648, "step": 1624 }, { "epoch": 0.01942873539855809, "grad_norm": 2.9488749504089355, "learning_rate": 6.470119521912351e-06, "loss": 0.6132, "step": 1625 }, { "epoch": 0.01944069154341874, "grad_norm": 3.7290797233581543, "learning_rate": 6.474103585657371e-06, "loss": 0.5872, "step": 1626 }, { "epoch": 0.019452647688279392, "grad_norm": 3.3815224170684814, "learning_rate": 6.478087649402392e-06, "loss": 0.6267, "step": 1627 }, { "epoch": 0.019464603833140044, "grad_norm": 4.194208145141602, "learning_rate": 6.482071713147411e-06, "loss": 0.6698, "step": 1628 }, { "epoch": 0.019476559978000692, "grad_norm": 2.3379323482513428, "learning_rate": 6.486055776892431e-06, "loss": 0.6223, "step": 1629 }, { "epoch": 0.019488516122861343, "grad_norm": 5.780078887939453, "learning_rate": 6.490039840637451e-06, "loss": 0.6229, "step": 1630 }, { "epoch": 0.019500472267721995, "grad_norm": 11.940969467163086, "learning_rate": 6.49402390438247e-06, "loss": 0.6282, "step": 1631 }, { "epoch": 0.019512428412582647, "grad_norm": 5.291007995605469, "learning_rate": 6.498007968127491e-06, "loss": 0.5988, "step": 1632 }, { "epoch": 0.0195243845574433, "grad_norm": 2.3197853565216064, "learning_rate": 6.50199203187251e-06, "loss": 0.6513, "step": 1633 }, { "epoch": 0.01953634070230395, "grad_norm": 4.958611011505127, "learning_rate": 6.50597609561753e-06, "loss": 0.5834, "step": 1634 }, { "epoch": 0.0195482968471646, "grad_norm": 2.6364972591400146, "learning_rate": 6.509960159362551e-06, "loss": 0.5741, "step": 1635 }, { "epoch": 0.01956025299202525, "grad_norm": 3.6147193908691406, "learning_rate": 6.51394422310757e-06, "loss": 0.5531, "step": 1636 }, { "epoch": 0.019572209136885902, "grad_norm": 3.1408915519714355, "learning_rate": 6.5179282868525905e-06, "loss": 0.5729, "step": 1637 }, { "epoch": 0.019584165281746554, "grad_norm": 4.9756083488464355, "learning_rate": 6.52191235059761e-06, "loss": 0.6461, "step": 1638 }, { "epoch": 0.019596121426607205, "grad_norm": 7.2606425285339355, "learning_rate": 6.5258964143426295e-06, "loss": 0.5264, "step": 1639 }, { "epoch": 0.019608077571467857, "grad_norm": 8.499658584594727, "learning_rate": 6.52988047808765e-06, "loss": 0.6112, "step": 1640 }, { "epoch": 0.019620033716328505, "grad_norm": 2.8668341636657715, "learning_rate": 6.533864541832669e-06, "loss": 0.6445, "step": 1641 }, { "epoch": 0.019631989861189157, "grad_norm": 5.652283191680908, "learning_rate": 6.53784860557769e-06, "loss": 0.6857, "step": 1642 }, { "epoch": 0.01964394600604981, "grad_norm": 3.042999505996704, "learning_rate": 6.54183266932271e-06, "loss": 0.6115, "step": 1643 }, { "epoch": 0.01965590215091046, "grad_norm": 2.5026307106018066, "learning_rate": 6.545816733067729e-06, "loss": 0.6292, "step": 1644 }, { "epoch": 0.019667858295771112, "grad_norm": 3.664877414703369, "learning_rate": 6.54980079681275e-06, "loss": 0.6484, "step": 1645 }, { "epoch": 0.019679814440631764, "grad_norm": 3.8121960163116455, "learning_rate": 6.553784860557769e-06, "loss": 0.491, "step": 1646 }, { "epoch": 0.019691770585492412, "grad_norm": 3.2863852977752686, "learning_rate": 6.55776892430279e-06, "loss": 0.6058, "step": 1647 }, { "epoch": 0.019703726730353064, "grad_norm": 2.6199276447296143, "learning_rate": 6.56175298804781e-06, "loss": 0.6883, "step": 1648 }, { "epoch": 0.019715682875213716, "grad_norm": 1.7531942129135132, "learning_rate": 6.565737051792829e-06, "loss": 0.6099, "step": 1649 }, { "epoch": 0.019727639020074367, "grad_norm": 3.258004903793335, "learning_rate": 6.569721115537849e-06, "loss": 0.5761, "step": 1650 }, { "epoch": 0.01973959516493502, "grad_norm": 2.3419978618621826, "learning_rate": 6.573705179282869e-06, "loss": 0.5254, "step": 1651 }, { "epoch": 0.01975155130979567, "grad_norm": 3.1669909954071045, "learning_rate": 6.577689243027889e-06, "loss": 0.5483, "step": 1652 }, { "epoch": 0.01976350745465632, "grad_norm": 4.199524402618408, "learning_rate": 6.581673306772909e-06, "loss": 0.5836, "step": 1653 }, { "epoch": 0.01977546359951697, "grad_norm": 7.777528762817383, "learning_rate": 6.585657370517928e-06, "loss": 0.5939, "step": 1654 }, { "epoch": 0.019787419744377623, "grad_norm": 4.104794502258301, "learning_rate": 6.589641434262949e-06, "loss": 0.5619, "step": 1655 }, { "epoch": 0.019799375889238274, "grad_norm": 3.5862648487091064, "learning_rate": 6.593625498007969e-06, "loss": 0.6221, "step": 1656 }, { "epoch": 0.019811332034098926, "grad_norm": 2.965965986251831, "learning_rate": 6.597609561752989e-06, "loss": 0.5287, "step": 1657 }, { "epoch": 0.019823288178959578, "grad_norm": 5.7555341720581055, "learning_rate": 6.6015936254980085e-06, "loss": 0.5932, "step": 1658 }, { "epoch": 0.019835244323820226, "grad_norm": 2.7030177116394043, "learning_rate": 6.605577689243028e-06, "loss": 0.4948, "step": 1659 }, { "epoch": 0.019847200468680878, "grad_norm": 3.4124915599823, "learning_rate": 6.609561752988048e-06, "loss": 0.542, "step": 1660 }, { "epoch": 0.01985915661354153, "grad_norm": 2.1752045154571533, "learning_rate": 6.613545816733068e-06, "loss": 0.6517, "step": 1661 }, { "epoch": 0.01987111275840218, "grad_norm": 4.373977184295654, "learning_rate": 6.617529880478089e-06, "loss": 0.6384, "step": 1662 }, { "epoch": 0.019883068903262833, "grad_norm": 3.8476688861846924, "learning_rate": 6.621513944223108e-06, "loss": 0.6136, "step": 1663 }, { "epoch": 0.019895025048123485, "grad_norm": 2.633606195449829, "learning_rate": 6.625498007968129e-06, "loss": 0.7014, "step": 1664 }, { "epoch": 0.019906981192984133, "grad_norm": 2.802525758743286, "learning_rate": 6.629482071713148e-06, "loss": 0.6616, "step": 1665 }, { "epoch": 0.019918937337844785, "grad_norm": 5.0060553550720215, "learning_rate": 6.6334661354581675e-06, "loss": 0.5649, "step": 1666 }, { "epoch": 0.019930893482705436, "grad_norm": 1.8177063465118408, "learning_rate": 6.637450199203188e-06, "loss": 0.6176, "step": 1667 }, { "epoch": 0.019942849627566088, "grad_norm": 1.9225459098815918, "learning_rate": 6.641434262948207e-06, "loss": 0.5334, "step": 1668 }, { "epoch": 0.01995480577242674, "grad_norm": 3.2142341136932373, "learning_rate": 6.645418326693228e-06, "loss": 0.6144, "step": 1669 }, { "epoch": 0.01996676191728739, "grad_norm": 25.93622398376465, "learning_rate": 6.649402390438248e-06, "loss": 0.5841, "step": 1670 }, { "epoch": 0.01997871806214804, "grad_norm": 2.906087636947632, "learning_rate": 6.653386454183267e-06, "loss": 0.6265, "step": 1671 }, { "epoch": 0.01999067420700869, "grad_norm": 2.8338427543640137, "learning_rate": 6.657370517928288e-06, "loss": 0.631, "step": 1672 }, { "epoch": 0.020002630351869343, "grad_norm": 1.819858431816101, "learning_rate": 6.661354581673307e-06, "loss": 0.63, "step": 1673 }, { "epoch": 0.020014586496729995, "grad_norm": 2.7481536865234375, "learning_rate": 6.665338645418327e-06, "loss": 0.5566, "step": 1674 }, { "epoch": 0.020026542641590647, "grad_norm": 4.2694783210754395, "learning_rate": 6.669322709163347e-06, "loss": 0.5848, "step": 1675 }, { "epoch": 0.020038498786451298, "grad_norm": 2.358318567276001, "learning_rate": 6.673306772908366e-06, "loss": 0.621, "step": 1676 }, { "epoch": 0.020050454931311947, "grad_norm": 1.851017713546753, "learning_rate": 6.6772908366533875e-06, "loss": 0.5266, "step": 1677 }, { "epoch": 0.020062411076172598, "grad_norm": 2.4650661945343018, "learning_rate": 6.681274900398407e-06, "loss": 0.5892, "step": 1678 }, { "epoch": 0.02007436722103325, "grad_norm": 3.6759002208709717, "learning_rate": 6.685258964143427e-06, "loss": 0.6875, "step": 1679 }, { "epoch": 0.0200863233658939, "grad_norm": 4.40215539932251, "learning_rate": 6.689243027888447e-06, "loss": 0.644, "step": 1680 }, { "epoch": 0.020098279510754553, "grad_norm": 5.070871353149414, "learning_rate": 6.693227091633466e-06, "loss": 0.5565, "step": 1681 }, { "epoch": 0.020110235655615205, "grad_norm": 5.697808265686035, "learning_rate": 6.697211155378487e-06, "loss": 0.66, "step": 1682 }, { "epoch": 0.020122191800475853, "grad_norm": 3.305408000946045, "learning_rate": 6.701195219123507e-06, "loss": 0.611, "step": 1683 }, { "epoch": 0.020134147945336505, "grad_norm": 3.497288703918457, "learning_rate": 6.705179282868527e-06, "loss": 0.6184, "step": 1684 }, { "epoch": 0.020146104090197157, "grad_norm": 3.0454535484313965, "learning_rate": 6.7091633466135465e-06, "loss": 0.6823, "step": 1685 }, { "epoch": 0.02015806023505781, "grad_norm": 5.355257034301758, "learning_rate": 6.713147410358566e-06, "loss": 0.496, "step": 1686 }, { "epoch": 0.02017001637991846, "grad_norm": 2.398993492126465, "learning_rate": 6.717131474103586e-06, "loss": 0.6019, "step": 1687 }, { "epoch": 0.020181972524779112, "grad_norm": 5.306380748748779, "learning_rate": 6.721115537848606e-06, "loss": 0.5611, "step": 1688 }, { "epoch": 0.02019392866963976, "grad_norm": 18.934906005859375, "learning_rate": 6.725099601593627e-06, "loss": 0.6166, "step": 1689 }, { "epoch": 0.020205884814500412, "grad_norm": 3.7968173027038574, "learning_rate": 6.729083665338646e-06, "loss": 0.6985, "step": 1690 }, { "epoch": 0.020217840959361064, "grad_norm": 2.5993945598602295, "learning_rate": 6.733067729083666e-06, "loss": 0.636, "step": 1691 }, { "epoch": 0.020229797104221715, "grad_norm": 2.4165899753570557, "learning_rate": 6.737051792828686e-06, "loss": 0.5993, "step": 1692 }, { "epoch": 0.020241753249082367, "grad_norm": 8.953144073486328, "learning_rate": 6.7410358565737055e-06, "loss": 0.5467, "step": 1693 }, { "epoch": 0.020253709393943015, "grad_norm": 5.800170421600342, "learning_rate": 6.745019920318726e-06, "loss": 0.5692, "step": 1694 }, { "epoch": 0.020265665538803667, "grad_norm": 6.038553237915039, "learning_rate": 6.749003984063745e-06, "loss": 0.5952, "step": 1695 }, { "epoch": 0.02027762168366432, "grad_norm": 52.7071418762207, "learning_rate": 6.752988047808765e-06, "loss": 0.56, "step": 1696 }, { "epoch": 0.02028957782852497, "grad_norm": 8.21235466003418, "learning_rate": 6.756972111553786e-06, "loss": 0.5979, "step": 1697 }, { "epoch": 0.020301533973385622, "grad_norm": 3.2275049686431885, "learning_rate": 6.760956175298805e-06, "loss": 0.64, "step": 1698 }, { "epoch": 0.020313490118246274, "grad_norm": 4.830429553985596, "learning_rate": 6.764940239043826e-06, "loss": 0.5749, "step": 1699 }, { "epoch": 0.020325446263106922, "grad_norm": 4.660829544067383, "learning_rate": 6.768924302788845e-06, "loss": 0.582, "step": 1700 }, { "epoch": 0.020337402407967574, "grad_norm": 3.248053550720215, "learning_rate": 6.772908366533865e-06, "loss": 0.5684, "step": 1701 }, { "epoch": 0.020349358552828226, "grad_norm": 9.033885955810547, "learning_rate": 6.776892430278885e-06, "loss": 0.6011, "step": 1702 }, { "epoch": 0.020361314697688877, "grad_norm": 2.358185052871704, "learning_rate": 6.780876494023904e-06, "loss": 0.6057, "step": 1703 }, { "epoch": 0.02037327084254953, "grad_norm": 2.1319332122802734, "learning_rate": 6.7848605577689255e-06, "loss": 0.7089, "step": 1704 }, { "epoch": 0.02038522698741018, "grad_norm": 2.348480701446533, "learning_rate": 6.788844621513945e-06, "loss": 0.626, "step": 1705 }, { "epoch": 0.02039718313227083, "grad_norm": 2.4737627506256104, "learning_rate": 6.792828685258964e-06, "loss": 0.6711, "step": 1706 }, { "epoch": 0.02040913927713148, "grad_norm": 3.062333106994629, "learning_rate": 6.796812749003985e-06, "loss": 0.579, "step": 1707 }, { "epoch": 0.020421095421992132, "grad_norm": 1.9971312284469604, "learning_rate": 6.800796812749004e-06, "loss": 0.5696, "step": 1708 }, { "epoch": 0.020433051566852784, "grad_norm": 9.633170127868652, "learning_rate": 6.8047808764940245e-06, "loss": 0.6538, "step": 1709 }, { "epoch": 0.020445007711713436, "grad_norm": 5.392826557159424, "learning_rate": 6.808764940239044e-06, "loss": 0.5221, "step": 1710 }, { "epoch": 0.020456963856574088, "grad_norm": 3.957000255584717, "learning_rate": 6.812749003984063e-06, "loss": 0.6176, "step": 1711 }, { "epoch": 0.020468920001434736, "grad_norm": 2.6054768562316895, "learning_rate": 6.8167330677290845e-06, "loss": 0.637, "step": 1712 }, { "epoch": 0.020480876146295388, "grad_norm": 10.282151222229004, "learning_rate": 6.820717131474104e-06, "loss": 0.6295, "step": 1713 }, { "epoch": 0.02049283229115604, "grad_norm": 5.345904350280762, "learning_rate": 6.824701195219124e-06, "loss": 0.6323, "step": 1714 }, { "epoch": 0.02050478843601669, "grad_norm": 2.949364423751831, "learning_rate": 6.828685258964144e-06, "loss": 0.6425, "step": 1715 }, { "epoch": 0.020516744580877343, "grad_norm": 2.818267345428467, "learning_rate": 6.832669322709163e-06, "loss": 0.5923, "step": 1716 }, { "epoch": 0.020528700725737994, "grad_norm": 2.9152424335479736, "learning_rate": 6.836653386454184e-06, "loss": 0.6054, "step": 1717 }, { "epoch": 0.020540656870598643, "grad_norm": 2.4489552974700928, "learning_rate": 6.840637450199204e-06, "loss": 0.6205, "step": 1718 }, { "epoch": 0.020552613015459294, "grad_norm": 3.7842366695404053, "learning_rate": 6.844621513944224e-06, "loss": 0.654, "step": 1719 }, { "epoch": 0.020564569160319946, "grad_norm": 2.5779497623443604, "learning_rate": 6.848605577689244e-06, "loss": 0.5488, "step": 1720 }, { "epoch": 0.020576525305180598, "grad_norm": 3.609497547149658, "learning_rate": 6.852589641434263e-06, "loss": 0.6364, "step": 1721 }, { "epoch": 0.02058848145004125, "grad_norm": 2.5441408157348633, "learning_rate": 6.856573705179283e-06, "loss": 0.5741, "step": 1722 }, { "epoch": 0.0206004375949019, "grad_norm": 15.69640064239502, "learning_rate": 6.860557768924303e-06, "loss": 0.5707, "step": 1723 }, { "epoch": 0.02061239373976255, "grad_norm": 4.088575839996338, "learning_rate": 6.864541832669324e-06, "loss": 0.5514, "step": 1724 }, { "epoch": 0.0206243498846232, "grad_norm": 1.8565384149551392, "learning_rate": 6.868525896414343e-06, "loss": 0.6128, "step": 1725 }, { "epoch": 0.020636306029483853, "grad_norm": 2.1097562313079834, "learning_rate": 6.872509960159363e-06, "loss": 0.6644, "step": 1726 }, { "epoch": 0.020648262174344505, "grad_norm": 4.557736873626709, "learning_rate": 6.876494023904383e-06, "loss": 0.6084, "step": 1727 }, { "epoch": 0.020660218319205156, "grad_norm": 3.3428382873535156, "learning_rate": 6.880478087649403e-06, "loss": 0.6303, "step": 1728 }, { "epoch": 0.020672174464065808, "grad_norm": 3.152345895767212, "learning_rate": 6.884462151394423e-06, "loss": 0.6394, "step": 1729 }, { "epoch": 0.020684130608926456, "grad_norm": 3.068567991256714, "learning_rate": 6.888446215139442e-06, "loss": 0.6437, "step": 1730 }, { "epoch": 0.020696086753787108, "grad_norm": 8.64222240447998, "learning_rate": 6.8924302788844635e-06, "loss": 0.6383, "step": 1731 }, { "epoch": 0.02070804289864776, "grad_norm": 3.534235954284668, "learning_rate": 6.896414342629483e-06, "loss": 0.5228, "step": 1732 }, { "epoch": 0.02071999904350841, "grad_norm": 2.6424412727355957, "learning_rate": 6.9003984063745025e-06, "loss": 0.6593, "step": 1733 }, { "epoch": 0.020731955188369063, "grad_norm": 3.93733549118042, "learning_rate": 6.904382470119523e-06, "loss": 0.5813, "step": 1734 }, { "epoch": 0.020743911333229715, "grad_norm": 9.104530334472656, "learning_rate": 6.908366533864542e-06, "loss": 0.5978, "step": 1735 }, { "epoch": 0.020755867478090363, "grad_norm": 2.668217420578003, "learning_rate": 6.9123505976095625e-06, "loss": 0.6427, "step": 1736 }, { "epoch": 0.020767823622951015, "grad_norm": 1.9922280311584473, "learning_rate": 6.916334661354582e-06, "loss": 0.6818, "step": 1737 }, { "epoch": 0.020779779767811667, "grad_norm": 4.671021461486816, "learning_rate": 6.9203187250996014e-06, "loss": 0.5871, "step": 1738 }, { "epoch": 0.02079173591267232, "grad_norm": 2.419642210006714, "learning_rate": 6.924302788844623e-06, "loss": 0.6415, "step": 1739 }, { "epoch": 0.02080369205753297, "grad_norm": 2.791883707046509, "learning_rate": 6.928286852589642e-06, "loss": 0.5247, "step": 1740 }, { "epoch": 0.020815648202393622, "grad_norm": 2.2002100944519043, "learning_rate": 6.932270916334662e-06, "loss": 0.551, "step": 1741 }, { "epoch": 0.02082760434725427, "grad_norm": 3.6831822395324707, "learning_rate": 6.936254980079682e-06, "loss": 0.5858, "step": 1742 }, { "epoch": 0.020839560492114922, "grad_norm": 2.11665415763855, "learning_rate": 6.940239043824701e-06, "loss": 0.5617, "step": 1743 }, { "epoch": 0.020851516636975573, "grad_norm": 3.0800139904022217, "learning_rate": 6.9442231075697216e-06, "loss": 0.677, "step": 1744 }, { "epoch": 0.020863472781836225, "grad_norm": 2.144221305847168, "learning_rate": 6.948207171314741e-06, "loss": 0.628, "step": 1745 }, { "epoch": 0.020875428926696877, "grad_norm": 2.6597208976745605, "learning_rate": 6.952191235059762e-06, "loss": 0.6124, "step": 1746 }, { "epoch": 0.02088738507155753, "grad_norm": 2.5091216564178467, "learning_rate": 6.956175298804782e-06, "loss": 0.5918, "step": 1747 }, { "epoch": 0.020899341216418177, "grad_norm": 2.6516590118408203, "learning_rate": 6.960159362549801e-06, "loss": 0.6385, "step": 1748 }, { "epoch": 0.02091129736127883, "grad_norm": 3.504246473312378, "learning_rate": 6.964143426294821e-06, "loss": 0.5717, "step": 1749 }, { "epoch": 0.02092325350613948, "grad_norm": 2.513498306274414, "learning_rate": 6.968127490039841e-06, "loss": 0.6422, "step": 1750 }, { "epoch": 0.020935209651000132, "grad_norm": 3.479144334793091, "learning_rate": 6.972111553784862e-06, "loss": 0.6221, "step": 1751 }, { "epoch": 0.020947165795860784, "grad_norm": 2.2373812198638916, "learning_rate": 6.9760956175298815e-06, "loss": 0.5526, "step": 1752 }, { "epoch": 0.020959121940721435, "grad_norm": 3.1240406036376953, "learning_rate": 6.980079681274901e-06, "loss": 0.6344, "step": 1753 }, { "epoch": 0.020971078085582084, "grad_norm": 2.6757631301879883, "learning_rate": 6.984063745019921e-06, "loss": 0.6026, "step": 1754 }, { "epoch": 0.020983034230442735, "grad_norm": 2.2144041061401367, "learning_rate": 6.988047808764941e-06, "loss": 0.6143, "step": 1755 }, { "epoch": 0.020994990375303387, "grad_norm": 3.7530083656311035, "learning_rate": 6.992031872509961e-06, "loss": 0.6012, "step": 1756 }, { "epoch": 0.02100694652016404, "grad_norm": 111.1375961303711, "learning_rate": 6.9960159362549804e-06, "loss": 0.6074, "step": 1757 }, { "epoch": 0.02101890266502469, "grad_norm": 16.160009384155273, "learning_rate": 7e-06, "loss": 0.7606, "step": 1758 }, { "epoch": 0.02103085880988534, "grad_norm": 2.909149646759033, "learning_rate": 7.003984063745021e-06, "loss": 0.4986, "step": 1759 }, { "epoch": 0.02104281495474599, "grad_norm": 6.398083209991455, "learning_rate": 7.0079681274900405e-06, "loss": 0.5902, "step": 1760 }, { "epoch": 0.021054771099606642, "grad_norm": 4.9607439041137695, "learning_rate": 7.011952191235061e-06, "loss": 0.6008, "step": 1761 }, { "epoch": 0.021066727244467294, "grad_norm": 2.399857997894287, "learning_rate": 7.01593625498008e-06, "loss": 0.5678, "step": 1762 }, { "epoch": 0.021078683389327946, "grad_norm": 3.9231367111206055, "learning_rate": 7.0199203187251e-06, "loss": 0.605, "step": 1763 }, { "epoch": 0.021090639534188597, "grad_norm": 2.00008487701416, "learning_rate": 7.02390438247012e-06, "loss": 0.7441, "step": 1764 }, { "epoch": 0.021102595679049246, "grad_norm": 2.7103774547576904, "learning_rate": 7.0278884462151395e-06, "loss": 0.609, "step": 1765 }, { "epoch": 0.021114551823909897, "grad_norm": 2.299269437789917, "learning_rate": 7.031872509960161e-06, "loss": 0.6049, "step": 1766 }, { "epoch": 0.02112650796877055, "grad_norm": 2.310527801513672, "learning_rate": 7.03585657370518e-06, "loss": 0.5624, "step": 1767 }, { "epoch": 0.0211384641136312, "grad_norm": 2.6075146198272705, "learning_rate": 7.0398406374501995e-06, "loss": 0.5926, "step": 1768 }, { "epoch": 0.021150420258491853, "grad_norm": 11.86817455291748, "learning_rate": 7.04382470119522e-06, "loss": 0.5829, "step": 1769 }, { "epoch": 0.021162376403352504, "grad_norm": 3.186122179031372, "learning_rate": 7.047808764940239e-06, "loss": 0.5862, "step": 1770 }, { "epoch": 0.021174332548213153, "grad_norm": 17.10913848876953, "learning_rate": 7.05179282868526e-06, "loss": 0.4816, "step": 1771 }, { "epoch": 0.021186288693073804, "grad_norm": 3.6844756603240967, "learning_rate": 7.055776892430279e-06, "loss": 0.6346, "step": 1772 }, { "epoch": 0.021198244837934456, "grad_norm": 3.542786121368408, "learning_rate": 7.0597609561752985e-06, "loss": 0.6676, "step": 1773 }, { "epoch": 0.021210200982795108, "grad_norm": 4.164369583129883, "learning_rate": 7.06374501992032e-06, "loss": 0.5707, "step": 1774 }, { "epoch": 0.02122215712765576, "grad_norm": 3.136734962463379, "learning_rate": 7.067729083665339e-06, "loss": 0.6009, "step": 1775 }, { "epoch": 0.02123411327251641, "grad_norm": 3.456685781478882, "learning_rate": 7.0717131474103594e-06, "loss": 0.6087, "step": 1776 }, { "epoch": 0.02124606941737706, "grad_norm": 2.5078253746032715, "learning_rate": 7.075697211155379e-06, "loss": 0.6364, "step": 1777 }, { "epoch": 0.02125802556223771, "grad_norm": 2.502863645553589, "learning_rate": 7.079681274900398e-06, "loss": 0.636, "step": 1778 }, { "epoch": 0.021269981707098363, "grad_norm": 2.0643532276153564, "learning_rate": 7.083665338645419e-06, "loss": 0.6749, "step": 1779 }, { "epoch": 0.021281937851959015, "grad_norm": 3.571333885192871, "learning_rate": 7.087649402390438e-06, "loss": 0.6642, "step": 1780 }, { "epoch": 0.021293893996819666, "grad_norm": 2.9632978439331055, "learning_rate": 7.091633466135459e-06, "loss": 0.7056, "step": 1781 }, { "epoch": 0.021305850141680318, "grad_norm": 3.6081831455230713, "learning_rate": 7.095617529880479e-06, "loss": 0.567, "step": 1782 }, { "epoch": 0.021317806286540966, "grad_norm": 3.102193832397461, "learning_rate": 7.099601593625498e-06, "loss": 0.4773, "step": 1783 }, { "epoch": 0.021329762431401618, "grad_norm": 3.7411534786224365, "learning_rate": 7.1035856573705185e-06, "loss": 0.6832, "step": 1784 }, { "epoch": 0.02134171857626227, "grad_norm": 5.61759614944458, "learning_rate": 7.107569721115538e-06, "loss": 0.6518, "step": 1785 }, { "epoch": 0.02135367472112292, "grad_norm": 4.624063491821289, "learning_rate": 7.111553784860559e-06, "loss": 0.6809, "step": 1786 }, { "epoch": 0.021365630865983573, "grad_norm": 3.0231752395629883, "learning_rate": 7.1155378486055785e-06, "loss": 0.5384, "step": 1787 }, { "epoch": 0.021377587010844225, "grad_norm": 8.593154907226562, "learning_rate": 7.119521912350598e-06, "loss": 0.6584, "step": 1788 }, { "epoch": 0.021389543155704873, "grad_norm": 2.9669859409332275, "learning_rate": 7.123505976095618e-06, "loss": 0.5986, "step": 1789 }, { "epoch": 0.021401499300565525, "grad_norm": 4.316876411437988, "learning_rate": 7.127490039840638e-06, "loss": 0.684, "step": 1790 }, { "epoch": 0.021413455445426176, "grad_norm": 2.482285261154175, "learning_rate": 7.131474103585658e-06, "loss": 0.6203, "step": 1791 }, { "epoch": 0.021425411590286828, "grad_norm": 3.0702013969421387, "learning_rate": 7.1354581673306775e-06, "loss": 0.579, "step": 1792 }, { "epoch": 0.02143736773514748, "grad_norm": 4.330074787139893, "learning_rate": 7.139442231075699e-06, "loss": 0.5353, "step": 1793 }, { "epoch": 0.02144932388000813, "grad_norm": 2.1230554580688477, "learning_rate": 7.143426294820718e-06, "loss": 0.5846, "step": 1794 }, { "epoch": 0.02146128002486878, "grad_norm": 2.1348817348480225, "learning_rate": 7.147410358565738e-06, "loss": 0.6082, "step": 1795 }, { "epoch": 0.02147323616972943, "grad_norm": 2.3453359603881836, "learning_rate": 7.151394422310758e-06, "loss": 0.5719, "step": 1796 }, { "epoch": 0.021485192314590083, "grad_norm": 3.3123395442962646, "learning_rate": 7.155378486055777e-06, "loss": 0.6876, "step": 1797 }, { "epoch": 0.021497148459450735, "grad_norm": 3.6698877811431885, "learning_rate": 7.159362549800798e-06, "loss": 0.6696, "step": 1798 }, { "epoch": 0.021509104604311387, "grad_norm": 17.222429275512695, "learning_rate": 7.163346613545817e-06, "loss": 0.7082, "step": 1799 }, { "epoch": 0.02152106074917204, "grad_norm": 12.57965087890625, "learning_rate": 7.1673306772908366e-06, "loss": 0.6216, "step": 1800 }, { "epoch": 0.021533016894032687, "grad_norm": 2.6170287132263184, "learning_rate": 7.171314741035858e-06, "loss": 0.5597, "step": 1801 }, { "epoch": 0.02154497303889334, "grad_norm": 3.543001890182495, "learning_rate": 7.175298804780877e-06, "loss": 0.6995, "step": 1802 }, { "epoch": 0.02155692918375399, "grad_norm": 3.7029571533203125, "learning_rate": 7.1792828685258975e-06, "loss": 0.6389, "step": 1803 }, { "epoch": 0.021568885328614642, "grad_norm": 19.061365127563477, "learning_rate": 7.183266932270917e-06, "loss": 0.6385, "step": 1804 }, { "epoch": 0.021580841473475294, "grad_norm": 5.967075824737549, "learning_rate": 7.187250996015936e-06, "loss": 0.6509, "step": 1805 }, { "epoch": 0.021592797618335945, "grad_norm": 3.8520994186401367, "learning_rate": 7.191235059760957e-06, "loss": 0.5245, "step": 1806 }, { "epoch": 0.021604753763196594, "grad_norm": 4.560391902923584, "learning_rate": 7.195219123505976e-06, "loss": 0.6175, "step": 1807 }, { "epoch": 0.021616709908057245, "grad_norm": 3.3348443508148193, "learning_rate": 7.199203187250997e-06, "loss": 0.6888, "step": 1808 }, { "epoch": 0.021628666052917897, "grad_norm": 2.5902717113494873, "learning_rate": 7.203187250996017e-06, "loss": 0.6704, "step": 1809 }, { "epoch": 0.02164062219777855, "grad_norm": 2.2598416805267334, "learning_rate": 7.207171314741036e-06, "loss": 0.5584, "step": 1810 }, { "epoch": 0.0216525783426392, "grad_norm": 2.4901604652404785, "learning_rate": 7.2111553784860565e-06, "loss": 0.6352, "step": 1811 }, { "epoch": 0.021664534487499852, "grad_norm": 9.43652629852295, "learning_rate": 7.215139442231076e-06, "loss": 0.6142, "step": 1812 }, { "epoch": 0.0216764906323605, "grad_norm": 3.7340919971466064, "learning_rate": 7.219123505976096e-06, "loss": 0.6014, "step": 1813 }, { "epoch": 0.021688446777221152, "grad_norm": 2.8167598247528076, "learning_rate": 7.223107569721116e-06, "loss": 0.6154, "step": 1814 }, { "epoch": 0.021700402922081804, "grad_norm": 2.899843215942383, "learning_rate": 7.227091633466136e-06, "loss": 0.6448, "step": 1815 }, { "epoch": 0.021712359066942456, "grad_norm": 2.492624282836914, "learning_rate": 7.231075697211156e-06, "loss": 0.6192, "step": 1816 }, { "epoch": 0.021724315211803107, "grad_norm": 2.003220558166504, "learning_rate": 7.235059760956176e-06, "loss": 0.585, "step": 1817 }, { "epoch": 0.02173627135666376, "grad_norm": 5.077952861785889, "learning_rate": 7.239043824701196e-06, "loss": 0.5978, "step": 1818 }, { "epoch": 0.021748227501524407, "grad_norm": 3.789616346359253, "learning_rate": 7.2430278884462156e-06, "loss": 0.587, "step": 1819 }, { "epoch": 0.02176018364638506, "grad_norm": 3.1331326961517334, "learning_rate": 7.247011952191235e-06, "loss": 0.592, "step": 1820 }, { "epoch": 0.02177213979124571, "grad_norm": 2.4667303562164307, "learning_rate": 7.250996015936256e-06, "loss": 0.6041, "step": 1821 }, { "epoch": 0.021784095936106362, "grad_norm": 3.5515146255493164, "learning_rate": 7.254980079681276e-06, "loss": 0.7224, "step": 1822 }, { "epoch": 0.021796052080967014, "grad_norm": 4.528810501098633, "learning_rate": 7.258964143426296e-06, "loss": 0.55, "step": 1823 }, { "epoch": 0.021808008225827666, "grad_norm": 3.794214963912964, "learning_rate": 7.262948207171315e-06, "loss": 0.4704, "step": 1824 }, { "epoch": 0.021819964370688314, "grad_norm": 3.416506052017212, "learning_rate": 7.266932270916335e-06, "loss": 0.5956, "step": 1825 }, { "epoch": 0.021831920515548966, "grad_norm": 2.2517507076263428, "learning_rate": 7.270916334661355e-06, "loss": 0.6704, "step": 1826 }, { "epoch": 0.021843876660409618, "grad_norm": 4.865292549133301, "learning_rate": 7.274900398406375e-06, "loss": 0.7401, "step": 1827 }, { "epoch": 0.02185583280527027, "grad_norm": 2.479693651199341, "learning_rate": 7.278884462151396e-06, "loss": 0.5804, "step": 1828 }, { "epoch": 0.02186778895013092, "grad_norm": 2.2186591625213623, "learning_rate": 7.282868525896415e-06, "loss": 0.6899, "step": 1829 }, { "epoch": 0.02187974509499157, "grad_norm": 6.017975807189941, "learning_rate": 7.286852589641435e-06, "loss": 0.5713, "step": 1830 }, { "epoch": 0.02189170123985222, "grad_norm": 2.4508249759674072, "learning_rate": 7.290836653386455e-06, "loss": 0.6149, "step": 1831 }, { "epoch": 0.021903657384712873, "grad_norm": 2.8255815505981445, "learning_rate": 7.2948207171314744e-06, "loss": 0.6434, "step": 1832 }, { "epoch": 0.021915613529573524, "grad_norm": 8.167525291442871, "learning_rate": 7.298804780876495e-06, "loss": 0.5965, "step": 1833 }, { "epoch": 0.021927569674434176, "grad_norm": 3.485678195953369, "learning_rate": 7.302788844621514e-06, "loss": 0.5345, "step": 1834 }, { "epoch": 0.021939525819294828, "grad_norm": 2.4030165672302246, "learning_rate": 7.306772908366534e-06, "loss": 0.6383, "step": 1835 }, { "epoch": 0.021951481964155476, "grad_norm": 3.002675771713257, "learning_rate": 7.310756972111555e-06, "loss": 0.5488, "step": 1836 }, { "epoch": 0.021963438109016128, "grad_norm": 2.2638094425201416, "learning_rate": 7.314741035856574e-06, "loss": 0.6477, "step": 1837 }, { "epoch": 0.02197539425387678, "grad_norm": 2.3954734802246094, "learning_rate": 7.3187250996015946e-06, "loss": 0.6289, "step": 1838 }, { "epoch": 0.02198735039873743, "grad_norm": 3.2870209217071533, "learning_rate": 7.322709163346614e-06, "loss": 0.7237, "step": 1839 }, { "epoch": 0.021999306543598083, "grad_norm": 11.648282051086426, "learning_rate": 7.3266932270916335e-06, "loss": 0.6797, "step": 1840 }, { "epoch": 0.022011262688458735, "grad_norm": 3.019148588180542, "learning_rate": 7.330677290836654e-06, "loss": 0.6172, "step": 1841 }, { "epoch": 0.022023218833319383, "grad_norm": 2.697540283203125, "learning_rate": 7.334661354581673e-06, "loss": 0.6311, "step": 1842 }, { "epoch": 0.022035174978180035, "grad_norm": 14.218124389648438, "learning_rate": 7.338645418326694e-06, "loss": 0.6869, "step": 1843 }, { "epoch": 0.022047131123040686, "grad_norm": 2.10453200340271, "learning_rate": 7.342629482071714e-06, "loss": 0.6352, "step": 1844 }, { "epoch": 0.022059087267901338, "grad_norm": 2.5744471549987793, "learning_rate": 7.346613545816733e-06, "loss": 0.606, "step": 1845 }, { "epoch": 0.02207104341276199, "grad_norm": 3.4382121562957764, "learning_rate": 7.350597609561754e-06, "loss": 0.6255, "step": 1846 }, { "epoch": 0.02208299955762264, "grad_norm": 1.920362114906311, "learning_rate": 7.354581673306773e-06, "loss": 0.5546, "step": 1847 }, { "epoch": 0.02209495570248329, "grad_norm": 2.580486297607422, "learning_rate": 7.358565737051793e-06, "loss": 0.6086, "step": 1848 }, { "epoch": 0.02210691184734394, "grad_norm": 3.2225265502929688, "learning_rate": 7.362549800796813e-06, "loss": 0.6391, "step": 1849 }, { "epoch": 0.022118867992204593, "grad_norm": 2.2031753063201904, "learning_rate": 7.366533864541833e-06, "loss": 0.5693, "step": 1850 }, { "epoch": 0.022130824137065245, "grad_norm": 5.130324363708496, "learning_rate": 7.3705179282868534e-06, "loss": 0.6497, "step": 1851 }, { "epoch": 0.022142780281925897, "grad_norm": 1.9957531690597534, "learning_rate": 7.374501992031873e-06, "loss": 0.7349, "step": 1852 }, { "epoch": 0.02215473642678655, "grad_norm": 2.2291975021362305, "learning_rate": 7.378486055776893e-06, "loss": 0.587, "step": 1853 }, { "epoch": 0.022166692571647197, "grad_norm": 2.4869279861450195, "learning_rate": 7.382470119521913e-06, "loss": 0.6201, "step": 1854 }, { "epoch": 0.02217864871650785, "grad_norm": 2.040163516998291, "learning_rate": 7.386454183266934e-06, "loss": 0.6065, "step": 1855 }, { "epoch": 0.0221906048613685, "grad_norm": 4.891413688659668, "learning_rate": 7.390438247011953e-06, "loss": 0.5685, "step": 1856 }, { "epoch": 0.022202561006229152, "grad_norm": 7.965510368347168, "learning_rate": 7.394422310756973e-06, "loss": 0.6634, "step": 1857 }, { "epoch": 0.022214517151089803, "grad_norm": 25.459049224853516, "learning_rate": 7.398406374501993e-06, "loss": 0.5467, "step": 1858 }, { "epoch": 0.022226473295950455, "grad_norm": 2.5192642211914062, "learning_rate": 7.4023904382470125e-06, "loss": 0.6417, "step": 1859 }, { "epoch": 0.022238429440811103, "grad_norm": 2.798794746398926, "learning_rate": 7.406374501992033e-06, "loss": 0.5879, "step": 1860 }, { "epoch": 0.022250385585671755, "grad_norm": 3.5685875415802, "learning_rate": 7.410358565737052e-06, "loss": 0.6023, "step": 1861 }, { "epoch": 0.022262341730532407, "grad_norm": 2.9324288368225098, "learning_rate": 7.414342629482072e-06, "loss": 0.5533, "step": 1862 }, { "epoch": 0.02227429787539306, "grad_norm": 2.3137590885162354, "learning_rate": 7.418326693227093e-06, "loss": 0.5986, "step": 1863 }, { "epoch": 0.02228625402025371, "grad_norm": 2.7978806495666504, "learning_rate": 7.422310756972112e-06, "loss": 0.6822, "step": 1864 }, { "epoch": 0.022298210165114362, "grad_norm": 3.3063676357269287, "learning_rate": 7.426294820717133e-06, "loss": 0.6037, "step": 1865 }, { "epoch": 0.02231016630997501, "grad_norm": 3.602809429168701, "learning_rate": 7.430278884462152e-06, "loss": 0.5924, "step": 1866 }, { "epoch": 0.022322122454835662, "grad_norm": 5.754494667053223, "learning_rate": 7.4342629482071715e-06, "loss": 0.6444, "step": 1867 }, { "epoch": 0.022334078599696314, "grad_norm": 2.8779380321502686, "learning_rate": 7.438247011952192e-06, "loss": 0.5556, "step": 1868 }, { "epoch": 0.022346034744556965, "grad_norm": 4.971451759338379, "learning_rate": 7.442231075697211e-06, "loss": 0.5241, "step": 1869 }, { "epoch": 0.022357990889417617, "grad_norm": 3.084185838699341, "learning_rate": 7.4462151394422324e-06, "loss": 0.6047, "step": 1870 }, { "epoch": 0.02236994703427827, "grad_norm": 2.6261484622955322, "learning_rate": 7.450199203187252e-06, "loss": 0.609, "step": 1871 }, { "epoch": 0.022381903179138917, "grad_norm": 10.0150146484375, "learning_rate": 7.454183266932271e-06, "loss": 0.6361, "step": 1872 }, { "epoch": 0.02239385932399957, "grad_norm": 4.313632965087891, "learning_rate": 7.458167330677292e-06, "loss": 0.6364, "step": 1873 }, { "epoch": 0.02240581546886022, "grad_norm": 3.6235992908477783, "learning_rate": 7.462151394422311e-06, "loss": 0.7098, "step": 1874 }, { "epoch": 0.022417771613720872, "grad_norm": 4.725368976593018, "learning_rate": 7.466135458167331e-06, "loss": 0.6007, "step": 1875 }, { "epoch": 0.022429727758581524, "grad_norm": 7.7260332107543945, "learning_rate": 7.470119521912351e-06, "loss": 0.6786, "step": 1876 }, { "epoch": 0.022441683903442176, "grad_norm": 2.2681984901428223, "learning_rate": 7.47410358565737e-06, "loss": 0.5661, "step": 1877 }, { "epoch": 0.022453640048302824, "grad_norm": 4.230676174163818, "learning_rate": 7.4780876494023915e-06, "loss": 0.559, "step": 1878 }, { "epoch": 0.022465596193163476, "grad_norm": 3.1431572437286377, "learning_rate": 7.482071713147411e-06, "loss": 0.5425, "step": 1879 }, { "epoch": 0.022477552338024127, "grad_norm": 3.0440256595611572, "learning_rate": 7.486055776892431e-06, "loss": 0.6408, "step": 1880 }, { "epoch": 0.02248950848288478, "grad_norm": 5.507454872131348, "learning_rate": 7.490039840637451e-06, "loss": 0.715, "step": 1881 }, { "epoch": 0.02250146462774543, "grad_norm": 12.042387008666992, "learning_rate": 7.49402390438247e-06, "loss": 0.6604, "step": 1882 }, { "epoch": 0.022513420772606083, "grad_norm": 5.298930644989014, "learning_rate": 7.4980079681274905e-06, "loss": 0.6562, "step": 1883 }, { "epoch": 0.02252537691746673, "grad_norm": 4.513965129852295, "learning_rate": 7.501992031872511e-06, "loss": 0.48, "step": 1884 }, { "epoch": 0.022537333062327383, "grad_norm": 9.67398452758789, "learning_rate": 7.505976095617531e-06, "loss": 0.6862, "step": 1885 }, { "epoch": 0.022549289207188034, "grad_norm": 6.3711371421813965, "learning_rate": 7.5099601593625505e-06, "loss": 0.6527, "step": 1886 }, { "epoch": 0.022561245352048686, "grad_norm": 2.802595853805542, "learning_rate": 7.51394422310757e-06, "loss": 0.5867, "step": 1887 }, { "epoch": 0.022573201496909338, "grad_norm": 3.635427474975586, "learning_rate": 7.51792828685259e-06, "loss": 0.6645, "step": 1888 }, { "epoch": 0.02258515764176999, "grad_norm": 4.181349754333496, "learning_rate": 7.52191235059761e-06, "loss": 0.4928, "step": 1889 }, { "epoch": 0.022597113786630638, "grad_norm": 3.4055912494659424, "learning_rate": 7.525896414342631e-06, "loss": 0.7367, "step": 1890 }, { "epoch": 0.02260906993149129, "grad_norm": 4.3272857666015625, "learning_rate": 7.52988047808765e-06, "loss": 0.6488, "step": 1891 }, { "epoch": 0.02262102607635194, "grad_norm": 2.1717300415039062, "learning_rate": 7.53386454183267e-06, "loss": 0.5486, "step": 1892 }, { "epoch": 0.022632982221212593, "grad_norm": 2.679853677749634, "learning_rate": 7.53784860557769e-06, "loss": 0.6246, "step": 1893 }, { "epoch": 0.022644938366073245, "grad_norm": 4.579233169555664, "learning_rate": 7.5418326693227096e-06, "loss": 0.7011, "step": 1894 }, { "epoch": 0.022656894510933893, "grad_norm": 2.435987710952759, "learning_rate": 7.54581673306773e-06, "loss": 0.4947, "step": 1895 }, { "epoch": 0.022668850655794544, "grad_norm": 2.4800164699554443, "learning_rate": 7.549800796812749e-06, "loss": 0.617, "step": 1896 }, { "epoch": 0.022680806800655196, "grad_norm": 1.5759333372116089, "learning_rate": 7.553784860557769e-06, "loss": 0.581, "step": 1897 }, { "epoch": 0.022692762945515848, "grad_norm": 3.1818861961364746, "learning_rate": 7.55776892430279e-06, "loss": 0.5917, "step": 1898 }, { "epoch": 0.0227047190903765, "grad_norm": 2.258632183074951, "learning_rate": 7.561752988047809e-06, "loss": 0.6657, "step": 1899 }, { "epoch": 0.02271667523523715, "grad_norm": 3.573896884918213, "learning_rate": 7.56573705179283e-06, "loss": 0.5191, "step": 1900 }, { "epoch": 0.0227286313800978, "grad_norm": 1.8010554313659668, "learning_rate": 7.569721115537849e-06, "loss": 0.5357, "step": 1901 }, { "epoch": 0.02274058752495845, "grad_norm": 2.450855255126953, "learning_rate": 7.573705179282869e-06, "loss": 0.6519, "step": 1902 }, { "epoch": 0.022752543669819103, "grad_norm": 1.9982960224151611, "learning_rate": 7.577689243027889e-06, "loss": 0.6095, "step": 1903 }, { "epoch": 0.022764499814679755, "grad_norm": 6.321815013885498, "learning_rate": 7.581673306772908e-06, "loss": 0.5626, "step": 1904 }, { "epoch": 0.022776455959540406, "grad_norm": 3.159575939178467, "learning_rate": 7.5856573705179295e-06, "loss": 0.5978, "step": 1905 }, { "epoch": 0.022788412104401058, "grad_norm": 3.856902837753296, "learning_rate": 7.589641434262949e-06, "loss": 0.6001, "step": 1906 }, { "epoch": 0.022800368249261706, "grad_norm": 1.880581259727478, "learning_rate": 7.5936254980079684e-06, "loss": 0.6965, "step": 1907 }, { "epoch": 0.022812324394122358, "grad_norm": 4.580042839050293, "learning_rate": 7.597609561752989e-06, "loss": 0.6105, "step": 1908 }, { "epoch": 0.02282428053898301, "grad_norm": 2.8010072708129883, "learning_rate": 7.601593625498008e-06, "loss": 0.5905, "step": 1909 }, { "epoch": 0.02283623668384366, "grad_norm": 12.976314544677734, "learning_rate": 7.6055776892430285e-06, "loss": 0.609, "step": 1910 }, { "epoch": 0.022848192828704313, "grad_norm": 5.628698348999023, "learning_rate": 7.609561752988048e-06, "loss": 0.6727, "step": 1911 }, { "epoch": 0.022860148973564965, "grad_norm": 3.8300628662109375, "learning_rate": 7.613545816733067e-06, "loss": 0.6253, "step": 1912 }, { "epoch": 0.022872105118425613, "grad_norm": 7.121125221252441, "learning_rate": 7.6175298804780886e-06, "loss": 0.5621, "step": 1913 }, { "epoch": 0.022884061263286265, "grad_norm": 2.3174312114715576, "learning_rate": 7.621513944223108e-06, "loss": 0.5642, "step": 1914 }, { "epoch": 0.022896017408146917, "grad_norm": 4.565451145172119, "learning_rate": 7.625498007968128e-06, "loss": 0.7316, "step": 1915 }, { "epoch": 0.02290797355300757, "grad_norm": 21.00645637512207, "learning_rate": 7.629482071713148e-06, "loss": 0.5371, "step": 1916 }, { "epoch": 0.02291992969786822, "grad_norm": 3.3689262866973877, "learning_rate": 7.633466135458169e-06, "loss": 0.5953, "step": 1917 }, { "epoch": 0.022931885842728872, "grad_norm": 3.5768139362335205, "learning_rate": 7.637450199203188e-06, "loss": 0.6022, "step": 1918 }, { "epoch": 0.02294384198758952, "grad_norm": 23.228496551513672, "learning_rate": 7.641434262948208e-06, "loss": 0.66, "step": 1919 }, { "epoch": 0.022955798132450172, "grad_norm": 2.2018492221832275, "learning_rate": 7.645418326693227e-06, "loss": 0.5474, "step": 1920 }, { "epoch": 0.022967754277310824, "grad_norm": 3.2055606842041016, "learning_rate": 7.649402390438247e-06, "loss": 0.5595, "step": 1921 }, { "epoch": 0.022979710422171475, "grad_norm": 2.9324748516082764, "learning_rate": 7.653386454183268e-06, "loss": 0.6741, "step": 1922 }, { "epoch": 0.022991666567032127, "grad_norm": 3.439958333969116, "learning_rate": 7.657370517928287e-06, "loss": 0.5892, "step": 1923 }, { "epoch": 0.02300362271189278, "grad_norm": 3.198516368865967, "learning_rate": 7.661354581673307e-06, "loss": 0.6087, "step": 1924 }, { "epoch": 0.023015578856753427, "grad_norm": 4.191287994384766, "learning_rate": 7.665338645418328e-06, "loss": 0.6958, "step": 1925 }, { "epoch": 0.02302753500161408, "grad_norm": 2.5139577388763428, "learning_rate": 7.669322709163347e-06, "loss": 0.6348, "step": 1926 }, { "epoch": 0.02303949114647473, "grad_norm": 3.3257322311401367, "learning_rate": 7.673306772908367e-06, "loss": 0.5923, "step": 1927 }, { "epoch": 0.023051447291335382, "grad_norm": 4.690308094024658, "learning_rate": 7.677290836653386e-06, "loss": 0.5092, "step": 1928 }, { "epoch": 0.023063403436196034, "grad_norm": 2.385418176651001, "learning_rate": 7.681274900398406e-06, "loss": 0.6334, "step": 1929 }, { "epoch": 0.023075359581056686, "grad_norm": 4.268050193786621, "learning_rate": 7.685258964143427e-06, "loss": 0.568, "step": 1930 }, { "epoch": 0.023087315725917334, "grad_norm": 5.463531970977783, "learning_rate": 7.689243027888446e-06, "loss": 0.6933, "step": 1931 }, { "epoch": 0.023099271870777986, "grad_norm": 2.817384719848633, "learning_rate": 7.693227091633468e-06, "loss": 0.5982, "step": 1932 }, { "epoch": 0.023111228015638637, "grad_norm": 2.9751687049865723, "learning_rate": 7.697211155378487e-06, "loss": 0.6597, "step": 1933 }, { "epoch": 0.02312318416049929, "grad_norm": 4.621806621551514, "learning_rate": 7.701195219123506e-06, "loss": 0.6282, "step": 1934 }, { "epoch": 0.02313514030535994, "grad_norm": 6.270543098449707, "learning_rate": 7.705179282868528e-06, "loss": 0.5931, "step": 1935 }, { "epoch": 0.023147096450220592, "grad_norm": 1.7781710624694824, "learning_rate": 7.709163346613547e-06, "loss": 0.5563, "step": 1936 }, { "epoch": 0.02315905259508124, "grad_norm": 3.293503999710083, "learning_rate": 7.713147410358567e-06, "loss": 0.5778, "step": 1937 }, { "epoch": 0.023171008739941892, "grad_norm": 2.646395683288574, "learning_rate": 7.717131474103586e-06, "loss": 0.5994, "step": 1938 }, { "epoch": 0.023182964884802544, "grad_norm": 2.9334239959716797, "learning_rate": 7.721115537848605e-06, "loss": 0.707, "step": 1939 }, { "epoch": 0.023194921029663196, "grad_norm": 3.865129232406616, "learning_rate": 7.725099601593627e-06, "loss": 0.5524, "step": 1940 }, { "epoch": 0.023206877174523848, "grad_norm": 2.8780574798583984, "learning_rate": 7.729083665338646e-06, "loss": 0.6411, "step": 1941 }, { "epoch": 0.0232188333193845, "grad_norm": 2.452472448348999, "learning_rate": 7.733067729083667e-06, "loss": 0.6444, "step": 1942 }, { "epoch": 0.023230789464245147, "grad_norm": 1.9886432886123657, "learning_rate": 7.737051792828687e-06, "loss": 0.5365, "step": 1943 }, { "epoch": 0.0232427456091058, "grad_norm": 3.3282580375671387, "learning_rate": 7.741035856573706e-06, "loss": 0.519, "step": 1944 }, { "epoch": 0.02325470175396645, "grad_norm": 3.535494804382324, "learning_rate": 7.745019920318726e-06, "loss": 0.609, "step": 1945 }, { "epoch": 0.023266657898827103, "grad_norm": 3.0611236095428467, "learning_rate": 7.749003984063745e-06, "loss": 0.5728, "step": 1946 }, { "epoch": 0.023278614043687754, "grad_norm": 3.400216817855835, "learning_rate": 7.752988047808766e-06, "loss": 0.7153, "step": 1947 }, { "epoch": 0.023290570188548406, "grad_norm": 4.561316967010498, "learning_rate": 7.756972111553786e-06, "loss": 0.609, "step": 1948 }, { "epoch": 0.023302526333409054, "grad_norm": 2.2776451110839844, "learning_rate": 7.760956175298805e-06, "loss": 0.5564, "step": 1949 }, { "epoch": 0.023314482478269706, "grad_norm": 2.9629921913146973, "learning_rate": 7.764940239043826e-06, "loss": 0.6036, "step": 1950 }, { "epoch": 0.023326438623130358, "grad_norm": 3.709559679031372, "learning_rate": 7.768924302788846e-06, "loss": 0.6599, "step": 1951 }, { "epoch": 0.02333839476799101, "grad_norm": 6.153728008270264, "learning_rate": 7.772908366533865e-06, "loss": 0.521, "step": 1952 }, { "epoch": 0.02335035091285166, "grad_norm": 2.885913610458374, "learning_rate": 7.776892430278885e-06, "loss": 0.6527, "step": 1953 }, { "epoch": 0.023362307057712313, "grad_norm": 1.6981905698776245, "learning_rate": 7.780876494023904e-06, "loss": 0.5139, "step": 1954 }, { "epoch": 0.02337426320257296, "grad_norm": 2.2737879753112793, "learning_rate": 7.784860557768925e-06, "loss": 0.6203, "step": 1955 }, { "epoch": 0.023386219347433613, "grad_norm": 7.897160053253174, "learning_rate": 7.788844621513945e-06, "loss": 0.6228, "step": 1956 }, { "epoch": 0.023398175492294265, "grad_norm": 2.8883607387542725, "learning_rate": 7.792828685258966e-06, "loss": 0.5603, "step": 1957 }, { "epoch": 0.023410131637154916, "grad_norm": 2.9673478603363037, "learning_rate": 7.796812749003985e-06, "loss": 0.5879, "step": 1958 }, { "epoch": 0.023422087782015568, "grad_norm": 2.1061484813690186, "learning_rate": 7.800796812749005e-06, "loss": 0.5743, "step": 1959 }, { "epoch": 0.02343404392687622, "grad_norm": 8.013120651245117, "learning_rate": 7.804780876494024e-06, "loss": 0.5976, "step": 1960 }, { "epoch": 0.023446000071736868, "grad_norm": 2.7415895462036133, "learning_rate": 7.808764940239044e-06, "loss": 0.5412, "step": 1961 }, { "epoch": 0.02345795621659752, "grad_norm": 4.568814754486084, "learning_rate": 7.812749003984065e-06, "loss": 0.5679, "step": 1962 }, { "epoch": 0.02346991236145817, "grad_norm": 4.344545841217041, "learning_rate": 7.816733067729084e-06, "loss": 0.5749, "step": 1963 }, { "epoch": 0.023481868506318823, "grad_norm": 7.3152875900268555, "learning_rate": 7.820717131474104e-06, "loss": 0.5282, "step": 1964 }, { "epoch": 0.023493824651179475, "grad_norm": 7.224410533905029, "learning_rate": 7.824701195219125e-06, "loss": 0.5909, "step": 1965 }, { "epoch": 0.023505780796040123, "grad_norm": 2.1473546028137207, "learning_rate": 7.828685258964144e-06, "loss": 0.669, "step": 1966 }, { "epoch": 0.023517736940900775, "grad_norm": 4.831884860992432, "learning_rate": 7.832669322709164e-06, "loss": 0.6766, "step": 1967 }, { "epoch": 0.023529693085761427, "grad_norm": 2.4630470275878906, "learning_rate": 7.836653386454183e-06, "loss": 0.586, "step": 1968 }, { "epoch": 0.02354164923062208, "grad_norm": 1.7735775709152222, "learning_rate": 7.840637450199203e-06, "loss": 0.5728, "step": 1969 }, { "epoch": 0.02355360537548273, "grad_norm": 1.8292349576950073, "learning_rate": 7.844621513944224e-06, "loss": 0.5413, "step": 1970 }, { "epoch": 0.02356556152034338, "grad_norm": 2.485727310180664, "learning_rate": 7.848605577689243e-06, "loss": 0.5764, "step": 1971 }, { "epoch": 0.02357751766520403, "grad_norm": 3.238784074783325, "learning_rate": 7.852589641434264e-06, "loss": 0.5805, "step": 1972 }, { "epoch": 0.02358947381006468, "grad_norm": 3.137174129486084, "learning_rate": 7.856573705179284e-06, "loss": 0.6412, "step": 1973 }, { "epoch": 0.023601429954925333, "grad_norm": 2.3269989490509033, "learning_rate": 7.860557768924303e-06, "loss": 0.5406, "step": 1974 }, { "epoch": 0.023613386099785985, "grad_norm": 2.4762935638427734, "learning_rate": 7.864541832669323e-06, "loss": 0.627, "step": 1975 }, { "epoch": 0.023625342244646637, "grad_norm": 4.117007732391357, "learning_rate": 7.868525896414342e-06, "loss": 0.5559, "step": 1976 }, { "epoch": 0.02363729838950729, "grad_norm": 2.2345285415649414, "learning_rate": 7.872509960159363e-06, "loss": 0.6163, "step": 1977 }, { "epoch": 0.023649254534367937, "grad_norm": 7.244786262512207, "learning_rate": 7.876494023904383e-06, "loss": 0.54, "step": 1978 }, { "epoch": 0.02366121067922859, "grad_norm": 2.0755274295806885, "learning_rate": 7.880478087649404e-06, "loss": 0.7084, "step": 1979 }, { "epoch": 0.02367316682408924, "grad_norm": 3.635838747024536, "learning_rate": 7.884462151394424e-06, "loss": 0.6817, "step": 1980 }, { "epoch": 0.023685122968949892, "grad_norm": 1.98629629611969, "learning_rate": 7.888446215139443e-06, "loss": 0.5713, "step": 1981 }, { "epoch": 0.023697079113810544, "grad_norm": 3.0280046463012695, "learning_rate": 7.892430278884462e-06, "loss": 0.6511, "step": 1982 }, { "epoch": 0.023709035258671195, "grad_norm": 3.117682456970215, "learning_rate": 7.896414342629482e-06, "loss": 0.6304, "step": 1983 }, { "epoch": 0.023720991403531844, "grad_norm": 3.264375686645508, "learning_rate": 7.900398406374503e-06, "loss": 0.8683, "step": 1984 }, { "epoch": 0.023732947548392495, "grad_norm": 7.250393390655518, "learning_rate": 7.904382470119523e-06, "loss": 0.6683, "step": 1985 }, { "epoch": 0.023744903693253147, "grad_norm": 3.4271469116210938, "learning_rate": 7.908366533864542e-06, "loss": 0.6152, "step": 1986 }, { "epoch": 0.0237568598381138, "grad_norm": 3.8495709896087646, "learning_rate": 7.912350597609563e-06, "loss": 0.6061, "step": 1987 }, { "epoch": 0.02376881598297445, "grad_norm": 5.935781478881836, "learning_rate": 7.916334661354583e-06, "loss": 0.568, "step": 1988 }, { "epoch": 0.023780772127835102, "grad_norm": 8.48680305480957, "learning_rate": 7.920318725099602e-06, "loss": 0.7166, "step": 1989 }, { "epoch": 0.02379272827269575, "grad_norm": 6.490087032318115, "learning_rate": 7.924302788844621e-06, "loss": 0.6308, "step": 1990 }, { "epoch": 0.023804684417556402, "grad_norm": 3.6128575801849365, "learning_rate": 7.928286852589641e-06, "loss": 0.6338, "step": 1991 }, { "epoch": 0.023816640562417054, "grad_norm": 6.792446613311768, "learning_rate": 7.932270916334662e-06, "loss": 0.6753, "step": 1992 }, { "epoch": 0.023828596707277706, "grad_norm": 34.451446533203125, "learning_rate": 7.936254980079682e-06, "loss": 0.5721, "step": 1993 }, { "epoch": 0.023840552852138357, "grad_norm": 4.084313869476318, "learning_rate": 7.940239043824703e-06, "loss": 0.5752, "step": 1994 }, { "epoch": 0.02385250899699901, "grad_norm": 4.369272708892822, "learning_rate": 7.944223107569722e-06, "loss": 0.5957, "step": 1995 }, { "epoch": 0.023864465141859657, "grad_norm": 3.6870646476745605, "learning_rate": 7.948207171314742e-06, "loss": 0.524, "step": 1996 }, { "epoch": 0.02387642128672031, "grad_norm": 5.83354377746582, "learning_rate": 7.952191235059761e-06, "loss": 0.5953, "step": 1997 }, { "epoch": 0.02388837743158096, "grad_norm": 1.901473879814148, "learning_rate": 7.95617529880478e-06, "loss": 0.6257, "step": 1998 }, { "epoch": 0.023900333576441612, "grad_norm": 2.9630346298217773, "learning_rate": 7.960159362549802e-06, "loss": 0.5453, "step": 1999 }, { "epoch": 0.023912289721302264, "grad_norm": 54.307106018066406, "learning_rate": 7.964143426294821e-06, "loss": 0.6794, "step": 2000 }, { "epoch": 0.023924245866162916, "grad_norm": 7.221742153167725, "learning_rate": 7.96812749003984e-06, "loss": 0.566, "step": 2001 }, { "epoch": 0.023936202011023564, "grad_norm": 1.9305797815322876, "learning_rate": 7.972111553784862e-06, "loss": 0.6272, "step": 2002 }, { "epoch": 0.023948158155884216, "grad_norm": 4.937914848327637, "learning_rate": 7.976095617529881e-06, "loss": 0.6026, "step": 2003 }, { "epoch": 0.023960114300744868, "grad_norm": 5.069412708282471, "learning_rate": 7.980079681274902e-06, "loss": 0.6683, "step": 2004 }, { "epoch": 0.02397207044560552, "grad_norm": 9.119441032409668, "learning_rate": 7.984063745019922e-06, "loss": 0.7109, "step": 2005 }, { "epoch": 0.02398402659046617, "grad_norm": 4.975715637207031, "learning_rate": 7.988047808764941e-06, "loss": 0.6601, "step": 2006 }, { "epoch": 0.023995982735326823, "grad_norm": 5.574361801147461, "learning_rate": 7.99203187250996e-06, "loss": 0.7149, "step": 2007 }, { "epoch": 0.02400793888018747, "grad_norm": 31.46563720703125, "learning_rate": 7.99601593625498e-06, "loss": 0.553, "step": 2008 }, { "epoch": 0.024019895025048123, "grad_norm": 22.67392921447754, "learning_rate": 8.000000000000001e-06, "loss": 0.6091, "step": 2009 }, { "epoch": 0.024031851169908774, "grad_norm": 2.834951400756836, "learning_rate": 8.00398406374502e-06, "loss": 0.6405, "step": 2010 }, { "epoch": 0.024043807314769426, "grad_norm": 4.549587249755859, "learning_rate": 8.00796812749004e-06, "loss": 0.7211, "step": 2011 }, { "epoch": 0.024055763459630078, "grad_norm": 5.139986991882324, "learning_rate": 8.011952191235061e-06, "loss": 0.6103, "step": 2012 }, { "epoch": 0.02406771960449073, "grad_norm": 3.8261406421661377, "learning_rate": 8.015936254980081e-06, "loss": 0.694, "step": 2013 }, { "epoch": 0.024079675749351378, "grad_norm": 2.8838894367218018, "learning_rate": 8.0199203187251e-06, "loss": 0.566, "step": 2014 }, { "epoch": 0.02409163189421203, "grad_norm": 3.6197168827056885, "learning_rate": 8.02390438247012e-06, "loss": 0.6619, "step": 2015 }, { "epoch": 0.02410358803907268, "grad_norm": 3.2014286518096924, "learning_rate": 8.02788844621514e-06, "loss": 0.5593, "step": 2016 }, { "epoch": 0.024115544183933333, "grad_norm": 3.9735751152038574, "learning_rate": 8.03187250996016e-06, "loss": 0.5817, "step": 2017 }, { "epoch": 0.024127500328793985, "grad_norm": 3.4088492393493652, "learning_rate": 8.03585657370518e-06, "loss": 0.7089, "step": 2018 }, { "epoch": 0.024139456473654636, "grad_norm": 3.579190254211426, "learning_rate": 8.039840637450201e-06, "loss": 0.5739, "step": 2019 }, { "epoch": 0.024151412618515285, "grad_norm": 4.920816421508789, "learning_rate": 8.04382470119522e-06, "loss": 0.6676, "step": 2020 }, { "epoch": 0.024163368763375936, "grad_norm": 2.8770995140075684, "learning_rate": 8.04780876494024e-06, "loss": 0.629, "step": 2021 }, { "epoch": 0.024175324908236588, "grad_norm": 6.239623546600342, "learning_rate": 8.05179282868526e-06, "loss": 0.7457, "step": 2022 }, { "epoch": 0.02418728105309724, "grad_norm": 2.3085248470306396, "learning_rate": 8.055776892430279e-06, "loss": 0.6056, "step": 2023 }, { "epoch": 0.02419923719795789, "grad_norm": 2.42034912109375, "learning_rate": 8.0597609561753e-06, "loss": 0.586, "step": 2024 }, { "epoch": 0.024211193342818543, "grad_norm": 4.265187740325928, "learning_rate": 8.06374501992032e-06, "loss": 0.6732, "step": 2025 }, { "epoch": 0.02422314948767919, "grad_norm": 3.531712532043457, "learning_rate": 8.067729083665339e-06, "loss": 0.5715, "step": 2026 }, { "epoch": 0.024235105632539843, "grad_norm": 8.721763610839844, "learning_rate": 8.07171314741036e-06, "loss": 0.6997, "step": 2027 }, { "epoch": 0.024247061777400495, "grad_norm": 3.2687153816223145, "learning_rate": 8.07569721115538e-06, "loss": 0.605, "step": 2028 }, { "epoch": 0.024259017922261147, "grad_norm": 4.963223457336426, "learning_rate": 8.079681274900399e-06, "loss": 0.7227, "step": 2029 }, { "epoch": 0.0242709740671218, "grad_norm": 4.0324811935424805, "learning_rate": 8.083665338645418e-06, "loss": 0.583, "step": 2030 }, { "epoch": 0.024282930211982447, "grad_norm": 3.3050081729888916, "learning_rate": 8.087649402390438e-06, "loss": 0.7316, "step": 2031 }, { "epoch": 0.0242948863568431, "grad_norm": 3.6362721920013428, "learning_rate": 8.091633466135459e-06, "loss": 0.5923, "step": 2032 }, { "epoch": 0.02430684250170375, "grad_norm": 4.2233662605285645, "learning_rate": 8.095617529880478e-06, "loss": 0.5661, "step": 2033 }, { "epoch": 0.024318798646564402, "grad_norm": 5.9621901512146, "learning_rate": 8.0996015936255e-06, "loss": 0.6389, "step": 2034 }, { "epoch": 0.024330754791425054, "grad_norm": 9.16396427154541, "learning_rate": 8.103585657370519e-06, "loss": 0.6364, "step": 2035 }, { "epoch": 0.024342710936285705, "grad_norm": 7.216876983642578, "learning_rate": 8.107569721115539e-06, "loss": 0.6325, "step": 2036 }, { "epoch": 0.024354667081146354, "grad_norm": 3.749551296234131, "learning_rate": 8.111553784860558e-06, "loss": 0.6693, "step": 2037 }, { "epoch": 0.024366623226007005, "grad_norm": 2.3959953784942627, "learning_rate": 8.115537848605577e-06, "loss": 0.5944, "step": 2038 }, { "epoch": 0.024378579370867657, "grad_norm": 5.384545803070068, "learning_rate": 8.119521912350599e-06, "loss": 0.55, "step": 2039 }, { "epoch": 0.02439053551572831, "grad_norm": 2.150963544845581, "learning_rate": 8.123505976095618e-06, "loss": 0.573, "step": 2040 }, { "epoch": 0.02440249166058896, "grad_norm": 2.3716375827789307, "learning_rate": 8.12749003984064e-06, "loss": 0.5527, "step": 2041 }, { "epoch": 0.024414447805449612, "grad_norm": 2.8788208961486816, "learning_rate": 8.131474103585659e-06, "loss": 0.6702, "step": 2042 }, { "epoch": 0.02442640395031026, "grad_norm": 3.672633647918701, "learning_rate": 8.135458167330678e-06, "loss": 0.7252, "step": 2043 }, { "epoch": 0.024438360095170912, "grad_norm": 16.229740142822266, "learning_rate": 8.139442231075698e-06, "loss": 0.6779, "step": 2044 }, { "epoch": 0.024450316240031564, "grad_norm": 2.712404251098633, "learning_rate": 8.143426294820717e-06, "loss": 0.6464, "step": 2045 }, { "epoch": 0.024462272384892216, "grad_norm": 5.786602973937988, "learning_rate": 8.147410358565738e-06, "loss": 0.626, "step": 2046 }, { "epoch": 0.024474228529752867, "grad_norm": 4.284975051879883, "learning_rate": 8.151394422310758e-06, "loss": 0.6176, "step": 2047 }, { "epoch": 0.02448618467461352, "grad_norm": 1.852618932723999, "learning_rate": 8.155378486055777e-06, "loss": 0.5682, "step": 2048 }, { "epoch": 0.024498140819474167, "grad_norm": 10.674643516540527, "learning_rate": 8.159362549800798e-06, "loss": 0.6303, "step": 2049 }, { "epoch": 0.02451009696433482, "grad_norm": 3.1835215091705322, "learning_rate": 8.163346613545818e-06, "loss": 0.6726, "step": 2050 }, { "epoch": 0.02452205310919547, "grad_norm": 2.6643288135528564, "learning_rate": 8.167330677290837e-06, "loss": 0.6735, "step": 2051 }, { "epoch": 0.024534009254056122, "grad_norm": 2.626784324645996, "learning_rate": 8.171314741035857e-06, "loss": 0.5256, "step": 2052 }, { "epoch": 0.024545965398916774, "grad_norm": 1.954952597618103, "learning_rate": 8.175298804780876e-06, "loss": 0.6339, "step": 2053 }, { "epoch": 0.024557921543777426, "grad_norm": 1.999113917350769, "learning_rate": 8.179282868525897e-06, "loss": 0.6987, "step": 2054 }, { "epoch": 0.024569877688638074, "grad_norm": 1.771098017692566, "learning_rate": 8.183266932270917e-06, "loss": 0.5693, "step": 2055 }, { "epoch": 0.024581833833498726, "grad_norm": 2.4881339073181152, "learning_rate": 8.187250996015938e-06, "loss": 0.7096, "step": 2056 }, { "epoch": 0.024593789978359377, "grad_norm": 2.113950490951538, "learning_rate": 8.191235059760957e-06, "loss": 0.655, "step": 2057 }, { "epoch": 0.02460574612322003, "grad_norm": 5.079489231109619, "learning_rate": 8.195219123505977e-06, "loss": 0.5667, "step": 2058 }, { "epoch": 0.02461770226808068, "grad_norm": 2.3118791580200195, "learning_rate": 8.199203187250996e-06, "loss": 0.6717, "step": 2059 }, { "epoch": 0.024629658412941333, "grad_norm": 3.608015298843384, "learning_rate": 8.203187250996016e-06, "loss": 0.6694, "step": 2060 }, { "epoch": 0.02464161455780198, "grad_norm": 4.211339950561523, "learning_rate": 8.207171314741037e-06, "loss": 0.6379, "step": 2061 }, { "epoch": 0.024653570702662633, "grad_norm": 2.0472872257232666, "learning_rate": 8.211155378486056e-06, "loss": 0.5966, "step": 2062 }, { "epoch": 0.024665526847523284, "grad_norm": 5.376012325286865, "learning_rate": 8.215139442231076e-06, "loss": 0.5927, "step": 2063 }, { "epoch": 0.024677482992383936, "grad_norm": 11.814934730529785, "learning_rate": 8.219123505976097e-06, "loss": 0.5634, "step": 2064 }, { "epoch": 0.024689439137244588, "grad_norm": 5.095673561096191, "learning_rate": 8.223107569721116e-06, "loss": 0.6204, "step": 2065 }, { "epoch": 0.02470139528210524, "grad_norm": 9.69583797454834, "learning_rate": 8.227091633466136e-06, "loss": 0.5664, "step": 2066 }, { "epoch": 0.024713351426965888, "grad_norm": 5.050361156463623, "learning_rate": 8.231075697211155e-06, "loss": 0.6029, "step": 2067 }, { "epoch": 0.02472530757182654, "grad_norm": 3.0190069675445557, "learning_rate": 8.235059760956175e-06, "loss": 0.6457, "step": 2068 }, { "epoch": 0.02473726371668719, "grad_norm": 2.5061533451080322, "learning_rate": 8.239043824701196e-06, "loss": 0.6003, "step": 2069 }, { "epoch": 0.024749219861547843, "grad_norm": 3.402078628540039, "learning_rate": 8.243027888446215e-06, "loss": 0.5349, "step": 2070 }, { "epoch": 0.024761176006408495, "grad_norm": 3.3064754009246826, "learning_rate": 8.247011952191236e-06, "loss": 0.613, "step": 2071 }, { "epoch": 0.024773132151269146, "grad_norm": 7.338658332824707, "learning_rate": 8.250996015936256e-06, "loss": 0.5703, "step": 2072 }, { "epoch": 0.024785088296129795, "grad_norm": 2.571244478225708, "learning_rate": 8.254980079681275e-06, "loss": 0.6866, "step": 2073 }, { "epoch": 0.024797044440990446, "grad_norm": 2.9027023315429688, "learning_rate": 8.258964143426297e-06, "loss": 0.6763, "step": 2074 }, { "epoch": 0.024809000585851098, "grad_norm": 3.1282739639282227, "learning_rate": 8.262948207171316e-06, "loss": 0.5761, "step": 2075 }, { "epoch": 0.02482095673071175, "grad_norm": 2.718775987625122, "learning_rate": 8.266932270916335e-06, "loss": 0.6672, "step": 2076 }, { "epoch": 0.0248329128755724, "grad_norm": 5.638730525970459, "learning_rate": 8.270916334661355e-06, "loss": 0.6539, "step": 2077 }, { "epoch": 0.024844869020433053, "grad_norm": 2.723865032196045, "learning_rate": 8.274900398406374e-06, "loss": 0.6633, "step": 2078 }, { "epoch": 0.0248568251652937, "grad_norm": 2.605304718017578, "learning_rate": 8.278884462151396e-06, "loss": 0.6272, "step": 2079 }, { "epoch": 0.024868781310154353, "grad_norm": 4.508316516876221, "learning_rate": 8.282868525896415e-06, "loss": 0.7011, "step": 2080 }, { "epoch": 0.024880737455015005, "grad_norm": 2.9478821754455566, "learning_rate": 8.286852589641436e-06, "loss": 0.6012, "step": 2081 }, { "epoch": 0.024892693599875657, "grad_norm": 2.138725996017456, "learning_rate": 8.290836653386456e-06, "loss": 0.7232, "step": 2082 }, { "epoch": 0.02490464974473631, "grad_norm": 2.1982650756835938, "learning_rate": 8.294820717131475e-06, "loss": 0.6522, "step": 2083 }, { "epoch": 0.02491660588959696, "grad_norm": 3.6039817333221436, "learning_rate": 8.298804780876494e-06, "loss": 0.6213, "step": 2084 }, { "epoch": 0.024928562034457608, "grad_norm": 3.3601322174072266, "learning_rate": 8.302788844621514e-06, "loss": 0.6317, "step": 2085 }, { "epoch": 0.02494051817931826, "grad_norm": 7.35615348815918, "learning_rate": 8.306772908366535e-06, "loss": 0.5815, "step": 2086 }, { "epoch": 0.02495247432417891, "grad_norm": 2.8367838859558105, "learning_rate": 8.310756972111555e-06, "loss": 0.6154, "step": 2087 }, { "epoch": 0.024964430469039563, "grad_norm": 2.096393346786499, "learning_rate": 8.314741035856574e-06, "loss": 0.6573, "step": 2088 }, { "epoch": 0.024976386613900215, "grad_norm": 2.4790265560150146, "learning_rate": 8.318725099601595e-06, "loss": 0.5984, "step": 2089 }, { "epoch": 0.024988342758760867, "grad_norm": 2.3577420711517334, "learning_rate": 8.322709163346615e-06, "loss": 0.6097, "step": 2090 }, { "epoch": 0.025000298903621515, "grad_norm": 2.391491174697876, "learning_rate": 8.326693227091634e-06, "loss": 0.6618, "step": 2091 }, { "epoch": 0.025012255048482167, "grad_norm": 2.78153133392334, "learning_rate": 8.330677290836654e-06, "loss": 0.6634, "step": 2092 }, { "epoch": 0.02502421119334282, "grad_norm": 2.7327747344970703, "learning_rate": 8.334661354581673e-06, "loss": 0.5796, "step": 2093 }, { "epoch": 0.02503616733820347, "grad_norm": 2.51373553276062, "learning_rate": 8.338645418326694e-06, "loss": 0.6036, "step": 2094 }, { "epoch": 0.025048123483064122, "grad_norm": 4.751618385314941, "learning_rate": 8.342629482071714e-06, "loss": 0.689, "step": 2095 }, { "epoch": 0.025060079627924774, "grad_norm": 3.741623640060425, "learning_rate": 8.346613545816735e-06, "loss": 0.6185, "step": 2096 }, { "epoch": 0.025072035772785422, "grad_norm": 3.956707000732422, "learning_rate": 8.350597609561754e-06, "loss": 0.6021, "step": 2097 }, { "epoch": 0.025083991917646074, "grad_norm": 3.701071262359619, "learning_rate": 8.354581673306774e-06, "loss": 0.7509, "step": 2098 }, { "epoch": 0.025095948062506725, "grad_norm": 2.1823232173919678, "learning_rate": 8.358565737051793e-06, "loss": 0.6021, "step": 2099 }, { "epoch": 0.025107904207367377, "grad_norm": 3.1969287395477295, "learning_rate": 8.362549800796813e-06, "loss": 0.6161, "step": 2100 }, { "epoch": 0.02511986035222803, "grad_norm": 3.4166765213012695, "learning_rate": 8.366533864541834e-06, "loss": 0.6624, "step": 2101 }, { "epoch": 0.025131816497088677, "grad_norm": 3.470127582550049, "learning_rate": 8.370517928286853e-06, "loss": 0.5294, "step": 2102 }, { "epoch": 0.02514377264194933, "grad_norm": 3.312999963760376, "learning_rate": 8.374501992031873e-06, "loss": 0.5945, "step": 2103 }, { "epoch": 0.02515572878680998, "grad_norm": 2.2389724254608154, "learning_rate": 8.378486055776894e-06, "loss": 0.6209, "step": 2104 }, { "epoch": 0.025167684931670632, "grad_norm": 2.255262613296509, "learning_rate": 8.382470119521913e-06, "loss": 0.597, "step": 2105 }, { "epoch": 0.025179641076531284, "grad_norm": 2.713876485824585, "learning_rate": 8.386454183266933e-06, "loss": 0.6794, "step": 2106 }, { "epoch": 0.025191597221391936, "grad_norm": 3.7782039642333984, "learning_rate": 8.390438247011952e-06, "loss": 0.6638, "step": 2107 }, { "epoch": 0.025203553366252584, "grad_norm": 2.4310245513916016, "learning_rate": 8.394422310756973e-06, "loss": 0.6795, "step": 2108 }, { "epoch": 0.025215509511113236, "grad_norm": 2.467538833618164, "learning_rate": 8.398406374501993e-06, "loss": 0.6525, "step": 2109 }, { "epoch": 0.025227465655973887, "grad_norm": 4.4168291091918945, "learning_rate": 8.402390438247012e-06, "loss": 0.6572, "step": 2110 }, { "epoch": 0.02523942180083454, "grad_norm": 5.261466026306152, "learning_rate": 8.406374501992033e-06, "loss": 0.6168, "step": 2111 }, { "epoch": 0.02525137794569519, "grad_norm": 2.1795194149017334, "learning_rate": 8.410358565737053e-06, "loss": 0.6651, "step": 2112 }, { "epoch": 0.025263334090555842, "grad_norm": 2.4134042263031006, "learning_rate": 8.414342629482072e-06, "loss": 0.6247, "step": 2113 }, { "epoch": 0.02527529023541649, "grad_norm": 6.2045440673828125, "learning_rate": 8.418326693227092e-06, "loss": 0.5151, "step": 2114 }, { "epoch": 0.025287246380277142, "grad_norm": 3.4299817085266113, "learning_rate": 8.422310756972111e-06, "loss": 0.6478, "step": 2115 }, { "epoch": 0.025299202525137794, "grad_norm": 7.977042198181152, "learning_rate": 8.426294820717132e-06, "loss": 0.6646, "step": 2116 }, { "epoch": 0.025311158669998446, "grad_norm": 2.5242230892181396, "learning_rate": 8.430278884462152e-06, "loss": 0.6403, "step": 2117 }, { "epoch": 0.025323114814859098, "grad_norm": 2.4200663566589355, "learning_rate": 8.434262948207173e-06, "loss": 0.5855, "step": 2118 }, { "epoch": 0.02533507095971975, "grad_norm": 3.2234232425689697, "learning_rate": 8.438247011952192e-06, "loss": 0.6677, "step": 2119 }, { "epoch": 0.025347027104580398, "grad_norm": 3.601137161254883, "learning_rate": 8.442231075697212e-06, "loss": 0.6811, "step": 2120 }, { "epoch": 0.02535898324944105, "grad_norm": 2.2758824825286865, "learning_rate": 8.446215139442231e-06, "loss": 0.6477, "step": 2121 }, { "epoch": 0.0253709393943017, "grad_norm": 2.490654945373535, "learning_rate": 8.45019920318725e-06, "loss": 0.6529, "step": 2122 }, { "epoch": 0.025382895539162353, "grad_norm": 3.0935888290405273, "learning_rate": 8.454183266932272e-06, "loss": 0.5537, "step": 2123 }, { "epoch": 0.025394851684023004, "grad_norm": 2.7616186141967773, "learning_rate": 8.458167330677291e-06, "loss": 0.5761, "step": 2124 }, { "epoch": 0.025406807828883656, "grad_norm": 5.3939900398254395, "learning_rate": 8.462151394422311e-06, "loss": 0.5416, "step": 2125 }, { "epoch": 0.025418763973744304, "grad_norm": 3.3714072704315186, "learning_rate": 8.466135458167332e-06, "loss": 0.6001, "step": 2126 }, { "epoch": 0.025430720118604956, "grad_norm": 4.173433303833008, "learning_rate": 8.470119521912351e-06, "loss": 0.7211, "step": 2127 }, { "epoch": 0.025442676263465608, "grad_norm": 2.8147544860839844, "learning_rate": 8.474103585657371e-06, "loss": 0.5801, "step": 2128 }, { "epoch": 0.02545463240832626, "grad_norm": 4.175613880157471, "learning_rate": 8.47808764940239e-06, "loss": 0.5734, "step": 2129 }, { "epoch": 0.02546658855318691, "grad_norm": 3.080590009689331, "learning_rate": 8.48207171314741e-06, "loss": 0.6383, "step": 2130 }, { "epoch": 0.025478544698047563, "grad_norm": 1.955857515335083, "learning_rate": 8.486055776892431e-06, "loss": 0.6231, "step": 2131 }, { "epoch": 0.02549050084290821, "grad_norm": 2.0832345485687256, "learning_rate": 8.49003984063745e-06, "loss": 0.5958, "step": 2132 }, { "epoch": 0.025502456987768863, "grad_norm": 2.770984411239624, "learning_rate": 8.494023904382472e-06, "loss": 0.7223, "step": 2133 }, { "epoch": 0.025514413132629515, "grad_norm": 2.4707067012786865, "learning_rate": 8.498007968127491e-06, "loss": 0.6706, "step": 2134 }, { "epoch": 0.025526369277490166, "grad_norm": 2.191685914993286, "learning_rate": 8.50199203187251e-06, "loss": 0.6024, "step": 2135 }, { "epoch": 0.025538325422350818, "grad_norm": 16.148035049438477, "learning_rate": 8.50597609561753e-06, "loss": 0.6042, "step": 2136 }, { "epoch": 0.02555028156721147, "grad_norm": 2.0021846294403076, "learning_rate": 8.50996015936255e-06, "loss": 0.6749, "step": 2137 }, { "epoch": 0.025562237712072118, "grad_norm": 4.471226215362549, "learning_rate": 8.51394422310757e-06, "loss": 0.5934, "step": 2138 }, { "epoch": 0.02557419385693277, "grad_norm": 6.237079620361328, "learning_rate": 8.51792828685259e-06, "loss": 0.6367, "step": 2139 }, { "epoch": 0.02558615000179342, "grad_norm": 4.5481486320495605, "learning_rate": 8.52191235059761e-06, "loss": 0.6292, "step": 2140 }, { "epoch": 0.025598106146654073, "grad_norm": 2.0885229110717773, "learning_rate": 8.52589641434263e-06, "loss": 0.6293, "step": 2141 }, { "epoch": 0.025610062291514725, "grad_norm": 4.487415790557861, "learning_rate": 8.52988047808765e-06, "loss": 0.6677, "step": 2142 }, { "epoch": 0.025622018436375377, "grad_norm": 5.294354438781738, "learning_rate": 8.533864541832671e-06, "loss": 0.6679, "step": 2143 }, { "epoch": 0.025633974581236025, "grad_norm": 17.718820571899414, "learning_rate": 8.53784860557769e-06, "loss": 0.5751, "step": 2144 }, { "epoch": 0.025645930726096677, "grad_norm": 3.095719814300537, "learning_rate": 8.54183266932271e-06, "loss": 0.5218, "step": 2145 }, { "epoch": 0.02565788687095733, "grad_norm": 3.172757387161255, "learning_rate": 8.54581673306773e-06, "loss": 0.7042, "step": 2146 }, { "epoch": 0.02566984301581798, "grad_norm": 5.56716251373291, "learning_rate": 8.549800796812749e-06, "loss": 0.6421, "step": 2147 }, { "epoch": 0.025681799160678632, "grad_norm": 1.7170464992523193, "learning_rate": 8.55378486055777e-06, "loss": 0.5901, "step": 2148 }, { "epoch": 0.025693755305539284, "grad_norm": 2.0941591262817383, "learning_rate": 8.55776892430279e-06, "loss": 0.6855, "step": 2149 }, { "epoch": 0.025705711450399932, "grad_norm": 1.7777528762817383, "learning_rate": 8.561752988047809e-06, "loss": 0.5205, "step": 2150 }, { "epoch": 0.025717667595260583, "grad_norm": 10.899077415466309, "learning_rate": 8.56573705179283e-06, "loss": 0.5356, "step": 2151 }, { "epoch": 0.025729623740121235, "grad_norm": 2.139779567718506, "learning_rate": 8.56972111553785e-06, "loss": 0.6313, "step": 2152 }, { "epoch": 0.025741579884981887, "grad_norm": 3.2692201137542725, "learning_rate": 8.57370517928287e-06, "loss": 0.5566, "step": 2153 }, { "epoch": 0.02575353602984254, "grad_norm": 9.280288696289062, "learning_rate": 8.577689243027889e-06, "loss": 0.6748, "step": 2154 }, { "epoch": 0.02576549217470319, "grad_norm": 3.2047617435455322, "learning_rate": 8.581673306772908e-06, "loss": 0.583, "step": 2155 }, { "epoch": 0.02577744831956384, "grad_norm": 3.7872653007507324, "learning_rate": 8.58565737051793e-06, "loss": 0.5832, "step": 2156 }, { "epoch": 0.02578940446442449, "grad_norm": 3.186504364013672, "learning_rate": 8.589641434262949e-06, "loss": 0.5654, "step": 2157 }, { "epoch": 0.025801360609285142, "grad_norm": 3.33258056640625, "learning_rate": 8.59362549800797e-06, "loss": 0.6497, "step": 2158 }, { "epoch": 0.025813316754145794, "grad_norm": 3.080282211303711, "learning_rate": 8.59760956175299e-06, "loss": 0.645, "step": 2159 }, { "epoch": 0.025825272899006445, "grad_norm": 2.9060819149017334, "learning_rate": 8.601593625498009e-06, "loss": 0.6336, "step": 2160 }, { "epoch": 0.025837229043867097, "grad_norm": 2.9220452308654785, "learning_rate": 8.605577689243028e-06, "loss": 0.6286, "step": 2161 }, { "epoch": 0.025849185188727745, "grad_norm": 3.7544426918029785, "learning_rate": 8.609561752988048e-06, "loss": 0.6543, "step": 2162 }, { "epoch": 0.025861141333588397, "grad_norm": 2.5993192195892334, "learning_rate": 8.613545816733069e-06, "loss": 0.6326, "step": 2163 }, { "epoch": 0.02587309747844905, "grad_norm": 5.373326778411865, "learning_rate": 8.617529880478088e-06, "loss": 0.644, "step": 2164 }, { "epoch": 0.0258850536233097, "grad_norm": 3.161445140838623, "learning_rate": 8.621513944223108e-06, "loss": 0.5851, "step": 2165 }, { "epoch": 0.025897009768170352, "grad_norm": 2.2715699672698975, "learning_rate": 8.625498007968129e-06, "loss": 0.674, "step": 2166 }, { "epoch": 0.025908965913031, "grad_norm": 28.575407028198242, "learning_rate": 8.629482071713148e-06, "loss": 0.6571, "step": 2167 }, { "epoch": 0.025920922057891652, "grad_norm": 13.873042106628418, "learning_rate": 8.633466135458168e-06, "loss": 0.5126, "step": 2168 }, { "epoch": 0.025932878202752304, "grad_norm": 14.24766731262207, "learning_rate": 8.637450199203187e-06, "loss": 0.6626, "step": 2169 }, { "epoch": 0.025944834347612956, "grad_norm": 62.56419372558594, "learning_rate": 8.641434262948208e-06, "loss": 0.6489, "step": 2170 }, { "epoch": 0.025956790492473607, "grad_norm": 4.632914066314697, "learning_rate": 8.645418326693228e-06, "loss": 0.7125, "step": 2171 }, { "epoch": 0.02596874663733426, "grad_norm": 5.390468597412109, "learning_rate": 8.649402390438247e-06, "loss": 0.554, "step": 2172 }, { "epoch": 0.025980702782194907, "grad_norm": 2.4185292720794678, "learning_rate": 8.653386454183269e-06, "loss": 0.606, "step": 2173 }, { "epoch": 0.02599265892705556, "grad_norm": 3.2522499561309814, "learning_rate": 8.657370517928288e-06, "loss": 0.5333, "step": 2174 }, { "epoch": 0.02600461507191621, "grad_norm": 5.110065460205078, "learning_rate": 8.661354581673307e-06, "loss": 0.559, "step": 2175 }, { "epoch": 0.026016571216776863, "grad_norm": 2.1393277645111084, "learning_rate": 8.665338645418327e-06, "loss": 0.6368, "step": 2176 }, { "epoch": 0.026028527361637514, "grad_norm": 3.189138889312744, "learning_rate": 8.669322709163346e-06, "loss": 0.6443, "step": 2177 }, { "epoch": 0.026040483506498166, "grad_norm": 3.709054470062256, "learning_rate": 8.673306772908367e-06, "loss": 0.6074, "step": 2178 }, { "epoch": 0.026052439651358814, "grad_norm": 5.243879795074463, "learning_rate": 8.677290836653387e-06, "loss": 0.6529, "step": 2179 }, { "epoch": 0.026064395796219466, "grad_norm": 9.775703430175781, "learning_rate": 8.681274900398408e-06, "loss": 0.6863, "step": 2180 }, { "epoch": 0.026076351941080118, "grad_norm": 3.770963191986084, "learning_rate": 8.685258964143428e-06, "loss": 0.5919, "step": 2181 }, { "epoch": 0.02608830808594077, "grad_norm": 2.288667678833008, "learning_rate": 8.689243027888447e-06, "loss": 0.6392, "step": 2182 }, { "epoch": 0.02610026423080142, "grad_norm": 9.38032054901123, "learning_rate": 8.693227091633466e-06, "loss": 0.5878, "step": 2183 }, { "epoch": 0.026112220375662073, "grad_norm": 4.067906856536865, "learning_rate": 8.697211155378486e-06, "loss": 0.6283, "step": 2184 }, { "epoch": 0.02612417652052272, "grad_norm": 2.066211462020874, "learning_rate": 8.701195219123507e-06, "loss": 0.5035, "step": 2185 }, { "epoch": 0.026136132665383373, "grad_norm": 3.5599098205566406, "learning_rate": 8.705179282868527e-06, "loss": 0.5921, "step": 2186 }, { "epoch": 0.026148088810244025, "grad_norm": 2.30873966217041, "learning_rate": 8.709163346613546e-06, "loss": 0.6203, "step": 2187 }, { "epoch": 0.026160044955104676, "grad_norm": 10.902985572814941, "learning_rate": 8.713147410358567e-06, "loss": 0.6139, "step": 2188 }, { "epoch": 0.026172001099965328, "grad_norm": 2.4275381565093994, "learning_rate": 8.717131474103587e-06, "loss": 0.4997, "step": 2189 }, { "epoch": 0.02618395724482598, "grad_norm": 6.719567775726318, "learning_rate": 8.721115537848606e-06, "loss": 0.5469, "step": 2190 }, { "epoch": 0.026195913389686628, "grad_norm": 3.059718370437622, "learning_rate": 8.725099601593626e-06, "loss": 0.5871, "step": 2191 }, { "epoch": 0.02620786953454728, "grad_norm": 3.05769944190979, "learning_rate": 8.729083665338645e-06, "loss": 0.6629, "step": 2192 }, { "epoch": 0.02621982567940793, "grad_norm": 2.0894107818603516, "learning_rate": 8.733067729083666e-06, "loss": 0.6362, "step": 2193 }, { "epoch": 0.026231781824268583, "grad_norm": 2.244966506958008, "learning_rate": 8.737051792828686e-06, "loss": 0.6797, "step": 2194 }, { "epoch": 0.026243737969129235, "grad_norm": 2.522423028945923, "learning_rate": 8.741035856573707e-06, "loss": 0.6468, "step": 2195 }, { "epoch": 0.026255694113989887, "grad_norm": 2.844834566116333, "learning_rate": 8.745019920318726e-06, "loss": 0.5429, "step": 2196 }, { "epoch": 0.026267650258850535, "grad_norm": 2.7014307975769043, "learning_rate": 8.749003984063746e-06, "loss": 0.6393, "step": 2197 }, { "epoch": 0.026279606403711187, "grad_norm": 8.144770622253418, "learning_rate": 8.752988047808765e-06, "loss": 0.6568, "step": 2198 }, { "epoch": 0.026291562548571838, "grad_norm": 2.539808988571167, "learning_rate": 8.756972111553785e-06, "loss": 0.5745, "step": 2199 }, { "epoch": 0.02630351869343249, "grad_norm": 3.942612886428833, "learning_rate": 8.760956175298806e-06, "loss": 0.5845, "step": 2200 }, { "epoch": 0.02631547483829314, "grad_norm": 2.178218126296997, "learning_rate": 8.764940239043825e-06, "loss": 0.5821, "step": 2201 }, { "epoch": 0.026327430983153793, "grad_norm": 4.256417274475098, "learning_rate": 8.768924302788845e-06, "loss": 0.541, "step": 2202 }, { "epoch": 0.02633938712801444, "grad_norm": 3.7187955379486084, "learning_rate": 8.772908366533866e-06, "loss": 0.4676, "step": 2203 }, { "epoch": 0.026351343272875093, "grad_norm": 3.2611730098724365, "learning_rate": 8.776892430278885e-06, "loss": 0.7047, "step": 2204 }, { "epoch": 0.026363299417735745, "grad_norm": 4.028233051300049, "learning_rate": 8.780876494023905e-06, "loss": 0.6365, "step": 2205 }, { "epoch": 0.026375255562596397, "grad_norm": 5.113590717315674, "learning_rate": 8.784860557768926e-06, "loss": 0.6747, "step": 2206 }, { "epoch": 0.02638721170745705, "grad_norm": 2.789489507675171, "learning_rate": 8.788844621513945e-06, "loss": 0.5109, "step": 2207 }, { "epoch": 0.0263991678523177, "grad_norm": 9.324646949768066, "learning_rate": 8.792828685258965e-06, "loss": 0.66, "step": 2208 }, { "epoch": 0.02641112399717835, "grad_norm": 3.23132586479187, "learning_rate": 8.796812749003984e-06, "loss": 0.5858, "step": 2209 }, { "epoch": 0.026423080142039, "grad_norm": 5.384922981262207, "learning_rate": 8.800796812749005e-06, "loss": 0.5996, "step": 2210 }, { "epoch": 0.026435036286899652, "grad_norm": 3.539499521255493, "learning_rate": 8.804780876494025e-06, "loss": 0.6193, "step": 2211 }, { "epoch": 0.026446992431760304, "grad_norm": 5.950798988342285, "learning_rate": 8.808764940239044e-06, "loss": 0.6291, "step": 2212 }, { "epoch": 0.026458948576620955, "grad_norm": 2.7193238735198975, "learning_rate": 8.812749003984065e-06, "loss": 0.641, "step": 2213 }, { "epoch": 0.026470904721481607, "grad_norm": 3.2025365829467773, "learning_rate": 8.816733067729085e-06, "loss": 0.6221, "step": 2214 }, { "epoch": 0.026482860866342255, "grad_norm": 1.8843058347702026, "learning_rate": 8.820717131474104e-06, "loss": 0.6118, "step": 2215 }, { "epoch": 0.026494817011202907, "grad_norm": 3.726104497909546, "learning_rate": 8.824701195219124e-06, "loss": 0.6942, "step": 2216 }, { "epoch": 0.02650677315606356, "grad_norm": 2.770479440689087, "learning_rate": 8.828685258964143e-06, "loss": 0.6592, "step": 2217 }, { "epoch": 0.02651872930092421, "grad_norm": 2.6081855297088623, "learning_rate": 8.832669322709164e-06, "loss": 0.6615, "step": 2218 }, { "epoch": 0.026530685445784862, "grad_norm": 3.3560214042663574, "learning_rate": 8.836653386454184e-06, "loss": 0.6234, "step": 2219 }, { "epoch": 0.026542641590645514, "grad_norm": 8.551736831665039, "learning_rate": 8.840637450199205e-06, "loss": 0.62, "step": 2220 }, { "epoch": 0.026554597735506162, "grad_norm": 9.55764102935791, "learning_rate": 8.844621513944224e-06, "loss": 0.6326, "step": 2221 }, { "epoch": 0.026566553880366814, "grad_norm": 4.420186996459961, "learning_rate": 8.848605577689244e-06, "loss": 0.5605, "step": 2222 }, { "epoch": 0.026578510025227466, "grad_norm": 2.1574642658233643, "learning_rate": 8.852589641434263e-06, "loss": 0.5934, "step": 2223 }, { "epoch": 0.026590466170088117, "grad_norm": 3.902971029281616, "learning_rate": 8.856573705179283e-06, "loss": 0.7194, "step": 2224 }, { "epoch": 0.02660242231494877, "grad_norm": 2.288123369216919, "learning_rate": 8.860557768924304e-06, "loss": 0.6275, "step": 2225 }, { "epoch": 0.02661437845980942, "grad_norm": 3.2049760818481445, "learning_rate": 8.864541832669323e-06, "loss": 0.5541, "step": 2226 }, { "epoch": 0.02662633460467007, "grad_norm": 5.230268478393555, "learning_rate": 8.868525896414343e-06, "loss": 0.7173, "step": 2227 }, { "epoch": 0.02663829074953072, "grad_norm": 3.3245558738708496, "learning_rate": 8.872509960159364e-06, "loss": 0.6329, "step": 2228 }, { "epoch": 0.026650246894391372, "grad_norm": 13.131962776184082, "learning_rate": 8.876494023904384e-06, "loss": 0.7199, "step": 2229 }, { "epoch": 0.026662203039252024, "grad_norm": 4.496532440185547, "learning_rate": 8.880478087649403e-06, "loss": 0.6304, "step": 2230 }, { "epoch": 0.026674159184112676, "grad_norm": 3.6243035793304443, "learning_rate": 8.884462151394422e-06, "loss": 0.6433, "step": 2231 }, { "epoch": 0.026686115328973324, "grad_norm": 4.3567585945129395, "learning_rate": 8.888446215139444e-06, "loss": 0.6334, "step": 2232 }, { "epoch": 0.026698071473833976, "grad_norm": 4.795907497406006, "learning_rate": 8.892430278884463e-06, "loss": 0.5977, "step": 2233 }, { "epoch": 0.026710027618694628, "grad_norm": 4.797717571258545, "learning_rate": 8.896414342629482e-06, "loss": 0.5783, "step": 2234 }, { "epoch": 0.02672198376355528, "grad_norm": 2.3785457611083984, "learning_rate": 8.900398406374504e-06, "loss": 0.5806, "step": 2235 }, { "epoch": 0.02673393990841593, "grad_norm": 2.62162709236145, "learning_rate": 8.904382470119523e-06, "loss": 0.6901, "step": 2236 }, { "epoch": 0.026745896053276583, "grad_norm": 3.9801697731018066, "learning_rate": 8.908366533864543e-06, "loss": 0.6289, "step": 2237 }, { "epoch": 0.02675785219813723, "grad_norm": 13.02430534362793, "learning_rate": 8.912350597609562e-06, "loss": 0.6376, "step": 2238 }, { "epoch": 0.026769808342997883, "grad_norm": 4.421365737915039, "learning_rate": 8.916334661354581e-06, "loss": 0.7293, "step": 2239 }, { "epoch": 0.026781764487858534, "grad_norm": 6.403133392333984, "learning_rate": 8.920318725099603e-06, "loss": 0.5632, "step": 2240 }, { "epoch": 0.026793720632719186, "grad_norm": 6.047699928283691, "learning_rate": 8.924302788844622e-06, "loss": 0.6, "step": 2241 }, { "epoch": 0.026805676777579838, "grad_norm": 4.099193096160889, "learning_rate": 8.928286852589643e-06, "loss": 0.6079, "step": 2242 }, { "epoch": 0.02681763292244049, "grad_norm": 2.4870290756225586, "learning_rate": 8.932270916334663e-06, "loss": 0.6655, "step": 2243 }, { "epoch": 0.026829589067301138, "grad_norm": 4.839480876922607, "learning_rate": 8.936254980079682e-06, "loss": 0.652, "step": 2244 }, { "epoch": 0.02684154521216179, "grad_norm": 2.9557387828826904, "learning_rate": 8.940239043824702e-06, "loss": 0.5878, "step": 2245 }, { "epoch": 0.02685350135702244, "grad_norm": 4.958639144897461, "learning_rate": 8.944223107569721e-06, "loss": 0.6151, "step": 2246 }, { "epoch": 0.026865457501883093, "grad_norm": 5.24459981918335, "learning_rate": 8.948207171314742e-06, "loss": 0.6255, "step": 2247 }, { "epoch": 0.026877413646743745, "grad_norm": 2.2709908485412598, "learning_rate": 8.952191235059762e-06, "loss": 0.5993, "step": 2248 }, { "epoch": 0.026889369791604396, "grad_norm": 2.1620399951934814, "learning_rate": 8.956175298804781e-06, "loss": 0.5757, "step": 2249 }, { "epoch": 0.026901325936465045, "grad_norm": 3.1912994384765625, "learning_rate": 8.960159362549802e-06, "loss": 0.5815, "step": 2250 }, { "epoch": 0.026913282081325696, "grad_norm": 3.3920159339904785, "learning_rate": 8.964143426294822e-06, "loss": 0.4775, "step": 2251 }, { "epoch": 0.026925238226186348, "grad_norm": 28.702852249145508, "learning_rate": 8.968127490039841e-06, "loss": 0.6006, "step": 2252 }, { "epoch": 0.026937194371047, "grad_norm": 2.8867998123168945, "learning_rate": 8.97211155378486e-06, "loss": 0.6312, "step": 2253 }, { "epoch": 0.02694915051590765, "grad_norm": 3.048616647720337, "learning_rate": 8.97609561752988e-06, "loss": 0.6704, "step": 2254 }, { "epoch": 0.026961106660768303, "grad_norm": 3.080747604370117, "learning_rate": 8.980079681274901e-06, "loss": 0.6111, "step": 2255 }, { "epoch": 0.02697306280562895, "grad_norm": 3.4781668186187744, "learning_rate": 8.98406374501992e-06, "loss": 0.6946, "step": 2256 }, { "epoch": 0.026985018950489603, "grad_norm": 2.454845428466797, "learning_rate": 8.988047808764942e-06, "loss": 0.6509, "step": 2257 }, { "epoch": 0.026996975095350255, "grad_norm": 4.653334617614746, "learning_rate": 8.992031872509961e-06, "loss": 0.7103, "step": 2258 }, { "epoch": 0.027008931240210907, "grad_norm": 4.757311820983887, "learning_rate": 8.99601593625498e-06, "loss": 0.614, "step": 2259 }, { "epoch": 0.02702088738507156, "grad_norm": 2.7898528575897217, "learning_rate": 9e-06, "loss": 0.5933, "step": 2260 }, { "epoch": 0.02703284352993221, "grad_norm": 3.347402811050415, "learning_rate": 9.00398406374502e-06, "loss": 0.6196, "step": 2261 }, { "epoch": 0.02704479967479286, "grad_norm": 10.086821556091309, "learning_rate": 9.00796812749004e-06, "loss": 0.5528, "step": 2262 }, { "epoch": 0.02705675581965351, "grad_norm": 3.3918824195861816, "learning_rate": 9.01195219123506e-06, "loss": 0.6213, "step": 2263 }, { "epoch": 0.027068711964514162, "grad_norm": 4.052858352661133, "learning_rate": 9.01593625498008e-06, "loss": 0.6281, "step": 2264 }, { "epoch": 0.027080668109374813, "grad_norm": 2.692173957824707, "learning_rate": 9.019920318725101e-06, "loss": 0.5463, "step": 2265 }, { "epoch": 0.027092624254235465, "grad_norm": 4.007496356964111, "learning_rate": 9.02390438247012e-06, "loss": 0.615, "step": 2266 }, { "epoch": 0.027104580399096117, "grad_norm": 9.15587043762207, "learning_rate": 9.02788844621514e-06, "loss": 0.5621, "step": 2267 }, { "epoch": 0.027116536543956765, "grad_norm": 2.5440239906311035, "learning_rate": 9.03187250996016e-06, "loss": 0.6338, "step": 2268 }, { "epoch": 0.027128492688817417, "grad_norm": 7.4456071853637695, "learning_rate": 9.035856573705179e-06, "loss": 0.652, "step": 2269 }, { "epoch": 0.02714044883367807, "grad_norm": 3.6477291584014893, "learning_rate": 9.0398406374502e-06, "loss": 0.494, "step": 2270 }, { "epoch": 0.02715240497853872, "grad_norm": 2.5637731552124023, "learning_rate": 9.04382470119522e-06, "loss": 0.6437, "step": 2271 }, { "epoch": 0.027164361123399372, "grad_norm": 3.5715131759643555, "learning_rate": 9.04780876494024e-06, "loss": 0.5543, "step": 2272 }, { "epoch": 0.027176317268260024, "grad_norm": 4.902973651885986, "learning_rate": 9.05179282868526e-06, "loss": 0.6317, "step": 2273 }, { "epoch": 0.027188273413120672, "grad_norm": 2.1492695808410645, "learning_rate": 9.05577689243028e-06, "loss": 0.5857, "step": 2274 }, { "epoch": 0.027200229557981324, "grad_norm": 5.455619812011719, "learning_rate": 9.0597609561753e-06, "loss": 0.5746, "step": 2275 }, { "epoch": 0.027212185702841975, "grad_norm": 2.8152542114257812, "learning_rate": 9.06374501992032e-06, "loss": 0.6491, "step": 2276 }, { "epoch": 0.027224141847702627, "grad_norm": 2.144813060760498, "learning_rate": 9.06772908366534e-06, "loss": 0.6837, "step": 2277 }, { "epoch": 0.02723609799256328, "grad_norm": 2.838203191757202, "learning_rate": 9.071713147410359e-06, "loss": 0.6464, "step": 2278 }, { "epoch": 0.02724805413742393, "grad_norm": 3.7403855323791504, "learning_rate": 9.075697211155378e-06, "loss": 0.5753, "step": 2279 }, { "epoch": 0.02726001028228458, "grad_norm": 4.477349758148193, "learning_rate": 9.0796812749004e-06, "loss": 0.633, "step": 2280 }, { "epoch": 0.02727196642714523, "grad_norm": 2.674389600753784, "learning_rate": 9.083665338645419e-06, "loss": 0.677, "step": 2281 }, { "epoch": 0.027283922572005882, "grad_norm": 3.7610926628112793, "learning_rate": 9.08764940239044e-06, "loss": 0.571, "step": 2282 }, { "epoch": 0.027295878716866534, "grad_norm": 5.085848331451416, "learning_rate": 9.09163346613546e-06, "loss": 0.5875, "step": 2283 }, { "epoch": 0.027307834861727186, "grad_norm": 3.509190082550049, "learning_rate": 9.095617529880479e-06, "loss": 0.6393, "step": 2284 }, { "epoch": 0.027319791006587837, "grad_norm": 2.931629180908203, "learning_rate": 9.099601593625499e-06, "loss": 0.6457, "step": 2285 }, { "epoch": 0.027331747151448486, "grad_norm": 2.5835254192352295, "learning_rate": 9.103585657370518e-06, "loss": 0.5091, "step": 2286 }, { "epoch": 0.027343703296309137, "grad_norm": 2.1661574840545654, "learning_rate": 9.107569721115539e-06, "loss": 0.6887, "step": 2287 }, { "epoch": 0.02735565944116979, "grad_norm": 4.206492900848389, "learning_rate": 9.111553784860559e-06, "loss": 0.6619, "step": 2288 }, { "epoch": 0.02736761558603044, "grad_norm": 3.3259358406066895, "learning_rate": 9.115537848605578e-06, "loss": 0.5493, "step": 2289 }, { "epoch": 0.027379571730891093, "grad_norm": 3.4881951808929443, "learning_rate": 9.1195219123506e-06, "loss": 0.6297, "step": 2290 }, { "epoch": 0.027391527875751744, "grad_norm": 2.8682472705841064, "learning_rate": 9.123505976095619e-06, "loss": 0.7281, "step": 2291 }, { "epoch": 0.027403484020612393, "grad_norm": 2.211468458175659, "learning_rate": 9.127490039840638e-06, "loss": 0.7198, "step": 2292 }, { "epoch": 0.027415440165473044, "grad_norm": 2.2252323627471924, "learning_rate": 9.131474103585658e-06, "loss": 0.6707, "step": 2293 }, { "epoch": 0.027427396310333696, "grad_norm": 4.595821380615234, "learning_rate": 9.135458167330679e-06, "loss": 0.6092, "step": 2294 }, { "epoch": 0.027439352455194348, "grad_norm": 2.147493839263916, "learning_rate": 9.139442231075698e-06, "loss": 0.6345, "step": 2295 }, { "epoch": 0.027451308600055, "grad_norm": 2.301769495010376, "learning_rate": 9.143426294820718e-06, "loss": 0.5672, "step": 2296 }, { "epoch": 0.02746326474491565, "grad_norm": 3.4422895908355713, "learning_rate": 9.147410358565739e-06, "loss": 0.5938, "step": 2297 }, { "epoch": 0.0274752208897763, "grad_norm": 6.582071304321289, "learning_rate": 9.151394422310758e-06, "loss": 0.5211, "step": 2298 }, { "epoch": 0.02748717703463695, "grad_norm": 1.9629184007644653, "learning_rate": 9.155378486055778e-06, "loss": 0.5912, "step": 2299 }, { "epoch": 0.027499133179497603, "grad_norm": 2.036191940307617, "learning_rate": 9.159362549800797e-06, "loss": 0.5836, "step": 2300 }, { "epoch": 0.027511089324358255, "grad_norm": 10.96713924407959, "learning_rate": 9.163346613545817e-06, "loss": 0.5732, "step": 2301 }, { "epoch": 0.027523045469218906, "grad_norm": 2.6516196727752686, "learning_rate": 9.167330677290838e-06, "loss": 0.5609, "step": 2302 }, { "epoch": 0.027535001614079554, "grad_norm": 36.73719024658203, "learning_rate": 9.171314741035857e-06, "loss": 0.6613, "step": 2303 }, { "epoch": 0.027546957758940206, "grad_norm": 3.227973461151123, "learning_rate": 9.175298804780878e-06, "loss": 0.6091, "step": 2304 }, { "epoch": 0.027558913903800858, "grad_norm": 4.332701206207275, "learning_rate": 9.179282868525898e-06, "loss": 0.6093, "step": 2305 }, { "epoch": 0.02757087004866151, "grad_norm": 3.3023993968963623, "learning_rate": 9.183266932270917e-06, "loss": 0.6308, "step": 2306 }, { "epoch": 0.02758282619352216, "grad_norm": 3.150982618331909, "learning_rate": 9.187250996015937e-06, "loss": 0.7281, "step": 2307 }, { "epoch": 0.027594782338382813, "grad_norm": 3.343302011489868, "learning_rate": 9.191235059760956e-06, "loss": 0.6502, "step": 2308 }, { "epoch": 0.02760673848324346, "grad_norm": 2.9322168827056885, "learning_rate": 9.195219123505977e-06, "loss": 0.7246, "step": 2309 }, { "epoch": 0.027618694628104113, "grad_norm": 2.1397509574890137, "learning_rate": 9.199203187250997e-06, "loss": 0.6845, "step": 2310 }, { "epoch": 0.027630650772964765, "grad_norm": 2.848223924636841, "learning_rate": 9.203187250996016e-06, "loss": 0.5417, "step": 2311 }, { "epoch": 0.027642606917825416, "grad_norm": 3.5456838607788086, "learning_rate": 9.207171314741037e-06, "loss": 0.7059, "step": 2312 }, { "epoch": 0.027654563062686068, "grad_norm": 1.861157774925232, "learning_rate": 9.211155378486057e-06, "loss": 0.6214, "step": 2313 }, { "epoch": 0.02766651920754672, "grad_norm": 8.155801773071289, "learning_rate": 9.215139442231076e-06, "loss": 0.6019, "step": 2314 }, { "epoch": 0.027678475352407368, "grad_norm": 3.2586658000946045, "learning_rate": 9.219123505976096e-06, "loss": 0.801, "step": 2315 }, { "epoch": 0.02769043149726802, "grad_norm": 3.584092378616333, "learning_rate": 9.223107569721115e-06, "loss": 0.5715, "step": 2316 }, { "epoch": 0.02770238764212867, "grad_norm": 8.54874038696289, "learning_rate": 9.227091633466136e-06, "loss": 0.6417, "step": 2317 }, { "epoch": 0.027714343786989323, "grad_norm": 8.010350227355957, "learning_rate": 9.231075697211156e-06, "loss": 0.7416, "step": 2318 }, { "epoch": 0.027726299931849975, "grad_norm": 2.423797369003296, "learning_rate": 9.235059760956177e-06, "loss": 0.5505, "step": 2319 }, { "epoch": 0.027738256076710627, "grad_norm": 2.348242998123169, "learning_rate": 9.239043824701196e-06, "loss": 0.6204, "step": 2320 }, { "epoch": 0.027750212221571275, "grad_norm": 4.952844142913818, "learning_rate": 9.243027888446216e-06, "loss": 0.5558, "step": 2321 }, { "epoch": 0.027762168366431927, "grad_norm": 2.582587242126465, "learning_rate": 9.247011952191235e-06, "loss": 0.5882, "step": 2322 }, { "epoch": 0.02777412451129258, "grad_norm": 4.784007549285889, "learning_rate": 9.250996015936255e-06, "loss": 0.7639, "step": 2323 }, { "epoch": 0.02778608065615323, "grad_norm": 2.632683753967285, "learning_rate": 9.254980079681276e-06, "loss": 0.7364, "step": 2324 }, { "epoch": 0.027798036801013882, "grad_norm": 2.130492687225342, "learning_rate": 9.258964143426295e-06, "loss": 0.6522, "step": 2325 }, { "epoch": 0.027809992945874534, "grad_norm": 3.070277452468872, "learning_rate": 9.262948207171315e-06, "loss": 0.5735, "step": 2326 }, { "epoch": 0.027821949090735182, "grad_norm": 2.6272952556610107, "learning_rate": 9.266932270916336e-06, "loss": 0.631, "step": 2327 }, { "epoch": 0.027833905235595834, "grad_norm": 3.1358964443206787, "learning_rate": 9.270916334661355e-06, "loss": 0.727, "step": 2328 }, { "epoch": 0.027845861380456485, "grad_norm": 2.4472875595092773, "learning_rate": 9.274900398406375e-06, "loss": 0.5879, "step": 2329 }, { "epoch": 0.027857817525317137, "grad_norm": 3.8149120807647705, "learning_rate": 9.278884462151394e-06, "loss": 0.6669, "step": 2330 }, { "epoch": 0.02786977367017779, "grad_norm": 4.180617809295654, "learning_rate": 9.282868525896414e-06, "loss": 0.6851, "step": 2331 }, { "epoch": 0.02788172981503844, "grad_norm": 5.806421279907227, "learning_rate": 9.286852589641435e-06, "loss": 0.6017, "step": 2332 }, { "epoch": 0.02789368595989909, "grad_norm": 11.095144271850586, "learning_rate": 9.290836653386454e-06, "loss": 0.6037, "step": 2333 }, { "epoch": 0.02790564210475974, "grad_norm": 9.469705581665039, "learning_rate": 9.294820717131476e-06, "loss": 0.6989, "step": 2334 }, { "epoch": 0.027917598249620392, "grad_norm": 2.0552256107330322, "learning_rate": 9.298804780876495e-06, "loss": 0.6432, "step": 2335 }, { "epoch": 0.027929554394481044, "grad_norm": 6.87484884262085, "learning_rate": 9.302788844621515e-06, "loss": 0.7549, "step": 2336 }, { "epoch": 0.027941510539341696, "grad_norm": 4.010448455810547, "learning_rate": 9.306772908366534e-06, "loss": 0.6978, "step": 2337 }, { "epoch": 0.027953466684202347, "grad_norm": 2.3911209106445312, "learning_rate": 9.310756972111553e-06, "loss": 0.5456, "step": 2338 }, { "epoch": 0.027965422829062996, "grad_norm": 2.8002870082855225, "learning_rate": 9.314741035856575e-06, "loss": 0.629, "step": 2339 }, { "epoch": 0.027977378973923647, "grad_norm": 14.886402130126953, "learning_rate": 9.318725099601594e-06, "loss": 0.5899, "step": 2340 }, { "epoch": 0.0279893351187843, "grad_norm": 2.118403673171997, "learning_rate": 9.322709163346614e-06, "loss": 0.6629, "step": 2341 }, { "epoch": 0.02800129126364495, "grad_norm": 2.1793324947357178, "learning_rate": 9.326693227091635e-06, "loss": 0.5431, "step": 2342 }, { "epoch": 0.028013247408505602, "grad_norm": 10.583122253417969, "learning_rate": 9.330677290836654e-06, "loss": 0.6355, "step": 2343 }, { "epoch": 0.028025203553366254, "grad_norm": 2.0454230308532715, "learning_rate": 9.334661354581675e-06, "loss": 0.6789, "step": 2344 }, { "epoch": 0.028037159698226902, "grad_norm": 2.991741418838501, "learning_rate": 9.338645418326695e-06, "loss": 0.5522, "step": 2345 }, { "epoch": 0.028049115843087554, "grad_norm": 3.8834476470947266, "learning_rate": 9.342629482071714e-06, "loss": 0.6272, "step": 2346 }, { "epoch": 0.028061071987948206, "grad_norm": 5.432900905609131, "learning_rate": 9.346613545816734e-06, "loss": 0.5485, "step": 2347 }, { "epoch": 0.028073028132808858, "grad_norm": 7.405416011810303, "learning_rate": 9.350597609561753e-06, "loss": 0.7181, "step": 2348 }, { "epoch": 0.02808498427766951, "grad_norm": 3.0826711654663086, "learning_rate": 9.354581673306774e-06, "loss": 0.6531, "step": 2349 }, { "epoch": 0.02809694042253016, "grad_norm": 8.82320499420166, "learning_rate": 9.358565737051794e-06, "loss": 0.705, "step": 2350 }, { "epoch": 0.02810889656739081, "grad_norm": 9.836735725402832, "learning_rate": 9.362549800796813e-06, "loss": 0.5992, "step": 2351 }, { "epoch": 0.02812085271225146, "grad_norm": 2.3661868572235107, "learning_rate": 9.366533864541834e-06, "loss": 0.6248, "step": 2352 }, { "epoch": 0.028132808857112113, "grad_norm": 2.8669657707214355, "learning_rate": 9.370517928286854e-06, "loss": 0.6256, "step": 2353 }, { "epoch": 0.028144765001972764, "grad_norm": 4.278280735015869, "learning_rate": 9.374501992031873e-06, "loss": 0.6635, "step": 2354 }, { "epoch": 0.028156721146833416, "grad_norm": 2.326326608657837, "learning_rate": 9.378486055776893e-06, "loss": 0.6253, "step": 2355 }, { "epoch": 0.028168677291694068, "grad_norm": 4.188565254211426, "learning_rate": 9.382470119521914e-06, "loss": 0.5619, "step": 2356 }, { "epoch": 0.028180633436554716, "grad_norm": 2.8233351707458496, "learning_rate": 9.386454183266933e-06, "loss": 0.6475, "step": 2357 }, { "epoch": 0.028192589581415368, "grad_norm": 2.332636594772339, "learning_rate": 9.390438247011953e-06, "loss": 0.6067, "step": 2358 }, { "epoch": 0.02820454572627602, "grad_norm": 4.460232257843018, "learning_rate": 9.394422310756974e-06, "loss": 0.6732, "step": 2359 }, { "epoch": 0.02821650187113667, "grad_norm": 2.952653646469116, "learning_rate": 9.398406374501993e-06, "loss": 0.5556, "step": 2360 }, { "epoch": 0.028228458015997323, "grad_norm": 2.092383623123169, "learning_rate": 9.402390438247013e-06, "loss": 0.6077, "step": 2361 }, { "epoch": 0.028240414160857975, "grad_norm": 3.100703239440918, "learning_rate": 9.406374501992032e-06, "loss": 0.6795, "step": 2362 }, { "epoch": 0.028252370305718623, "grad_norm": 2.2570948600769043, "learning_rate": 9.410358565737052e-06, "loss": 0.5867, "step": 2363 }, { "epoch": 0.028264326450579275, "grad_norm": 2.3468542098999023, "learning_rate": 9.414342629482073e-06, "loss": 0.6673, "step": 2364 }, { "epoch": 0.028276282595439926, "grad_norm": 11.14476203918457, "learning_rate": 9.418326693227092e-06, "loss": 0.5467, "step": 2365 }, { "epoch": 0.028288238740300578, "grad_norm": 4.893223762512207, "learning_rate": 9.422310756972113e-06, "loss": 0.6028, "step": 2366 }, { "epoch": 0.02830019488516123, "grad_norm": 3.6984169483184814, "learning_rate": 9.426294820717133e-06, "loss": 0.5448, "step": 2367 }, { "epoch": 0.028312151030021878, "grad_norm": 4.836824893951416, "learning_rate": 9.430278884462152e-06, "loss": 0.6312, "step": 2368 }, { "epoch": 0.02832410717488253, "grad_norm": 2.9135236740112305, "learning_rate": 9.434262948207172e-06, "loss": 0.6768, "step": 2369 }, { "epoch": 0.02833606331974318, "grad_norm": 3.1607320308685303, "learning_rate": 9.438247011952191e-06, "loss": 0.6228, "step": 2370 }, { "epoch": 0.028348019464603833, "grad_norm": 6.590402126312256, "learning_rate": 9.442231075697212e-06, "loss": 0.6434, "step": 2371 }, { "epoch": 0.028359975609464485, "grad_norm": 2.46219539642334, "learning_rate": 9.446215139442232e-06, "loss": 0.7061, "step": 2372 }, { "epoch": 0.028371931754325137, "grad_norm": 2.934612274169922, "learning_rate": 9.450199203187251e-06, "loss": 0.6038, "step": 2373 }, { "epoch": 0.028383887899185785, "grad_norm": 5.552829265594482, "learning_rate": 9.454183266932273e-06, "loss": 0.6437, "step": 2374 }, { "epoch": 0.028395844044046437, "grad_norm": 2.5818052291870117, "learning_rate": 9.458167330677292e-06, "loss": 0.6076, "step": 2375 }, { "epoch": 0.02840780018890709, "grad_norm": 8.871711730957031, "learning_rate": 9.462151394422311e-06, "loss": 0.7008, "step": 2376 }, { "epoch": 0.02841975633376774, "grad_norm": 2.0111947059631348, "learning_rate": 9.466135458167331e-06, "loss": 0.5783, "step": 2377 }, { "epoch": 0.028431712478628392, "grad_norm": 2.5405445098876953, "learning_rate": 9.47011952191235e-06, "loss": 0.6474, "step": 2378 }, { "epoch": 0.028443668623489043, "grad_norm": 3.691519021987915, "learning_rate": 9.474103585657372e-06, "loss": 0.6066, "step": 2379 }, { "epoch": 0.02845562476834969, "grad_norm": 3.7957870960235596, "learning_rate": 9.478087649402391e-06, "loss": 0.4878, "step": 2380 }, { "epoch": 0.028467580913210343, "grad_norm": 2.43808650970459, "learning_rate": 9.482071713147412e-06, "loss": 0.5567, "step": 2381 }, { "epoch": 0.028479537058070995, "grad_norm": 3.9693942070007324, "learning_rate": 9.486055776892432e-06, "loss": 0.6311, "step": 2382 }, { "epoch": 0.028491493202931647, "grad_norm": 3.483768939971924, "learning_rate": 9.490039840637451e-06, "loss": 0.6326, "step": 2383 }, { "epoch": 0.0285034493477923, "grad_norm": 10.193144798278809, "learning_rate": 9.49402390438247e-06, "loss": 0.6209, "step": 2384 }, { "epoch": 0.02851540549265295, "grad_norm": 3.970357656478882, "learning_rate": 9.49800796812749e-06, "loss": 0.6287, "step": 2385 }, { "epoch": 0.0285273616375136, "grad_norm": 2.2269864082336426, "learning_rate": 9.501992031872511e-06, "loss": 0.5296, "step": 2386 }, { "epoch": 0.02853931778237425, "grad_norm": 4.519774436950684, "learning_rate": 9.50597609561753e-06, "loss": 0.6398, "step": 2387 }, { "epoch": 0.028551273927234902, "grad_norm": 5.210847854614258, "learning_rate": 9.50996015936255e-06, "loss": 0.6523, "step": 2388 }, { "epoch": 0.028563230072095554, "grad_norm": 2.646052360534668, "learning_rate": 9.513944223107571e-06, "loss": 0.6121, "step": 2389 }, { "epoch": 0.028575186216956205, "grad_norm": 2.9111249446868896, "learning_rate": 9.51792828685259e-06, "loss": 0.6476, "step": 2390 }, { "epoch": 0.028587142361816857, "grad_norm": 3.85366153717041, "learning_rate": 9.52191235059761e-06, "loss": 0.7126, "step": 2391 }, { "epoch": 0.028599098506677505, "grad_norm": 2.649512767791748, "learning_rate": 9.52589641434263e-06, "loss": 0.5507, "step": 2392 }, { "epoch": 0.028611054651538157, "grad_norm": 5.216139316558838, "learning_rate": 9.529880478087649e-06, "loss": 0.6077, "step": 2393 }, { "epoch": 0.02862301079639881, "grad_norm": 2.6003010272979736, "learning_rate": 9.53386454183267e-06, "loss": 0.591, "step": 2394 }, { "epoch": 0.02863496694125946, "grad_norm": 3.6465413570404053, "learning_rate": 9.53784860557769e-06, "loss": 0.6823, "step": 2395 }, { "epoch": 0.028646923086120112, "grad_norm": 2.2329158782958984, "learning_rate": 9.54183266932271e-06, "loss": 0.6677, "step": 2396 }, { "epoch": 0.028658879230980764, "grad_norm": 4.9390997886657715, "learning_rate": 9.54581673306773e-06, "loss": 0.6846, "step": 2397 }, { "epoch": 0.028670835375841412, "grad_norm": 2.144343614578247, "learning_rate": 9.54980079681275e-06, "loss": 0.5754, "step": 2398 }, { "epoch": 0.028682791520702064, "grad_norm": 4.741141319274902, "learning_rate": 9.553784860557769e-06, "loss": 0.5525, "step": 2399 }, { "epoch": 0.028694747665562716, "grad_norm": 2.7986903190612793, "learning_rate": 9.557768924302789e-06, "loss": 0.6217, "step": 2400 }, { "epoch": 0.028706703810423367, "grad_norm": 2.472094774246216, "learning_rate": 9.56175298804781e-06, "loss": 0.6924, "step": 2401 }, { "epoch": 0.02871865995528402, "grad_norm": 3.5998644828796387, "learning_rate": 9.56573705179283e-06, "loss": 0.5798, "step": 2402 }, { "epoch": 0.02873061610014467, "grad_norm": 2.986199378967285, "learning_rate": 9.569721115537849e-06, "loss": 0.676, "step": 2403 }, { "epoch": 0.02874257224500532, "grad_norm": 3.1960301399230957, "learning_rate": 9.57370517928287e-06, "loss": 0.6628, "step": 2404 }, { "epoch": 0.02875452838986597, "grad_norm": 2.356313705444336, "learning_rate": 9.57768924302789e-06, "loss": 0.5377, "step": 2405 }, { "epoch": 0.028766484534726623, "grad_norm": 5.2787675857543945, "learning_rate": 9.581673306772909e-06, "loss": 0.5364, "step": 2406 }, { "epoch": 0.028778440679587274, "grad_norm": 4.088493824005127, "learning_rate": 9.585657370517928e-06, "loss": 0.6656, "step": 2407 }, { "epoch": 0.028790396824447926, "grad_norm": 2.347421884536743, "learning_rate": 9.589641434262948e-06, "loss": 0.6363, "step": 2408 }, { "epoch": 0.028802352969308578, "grad_norm": 3.730252265930176, "learning_rate": 9.593625498007969e-06, "loss": 0.6525, "step": 2409 }, { "epoch": 0.028814309114169226, "grad_norm": 3.5679101943969727, "learning_rate": 9.597609561752988e-06, "loss": 0.6246, "step": 2410 }, { "epoch": 0.028826265259029878, "grad_norm": 2.995436191558838, "learning_rate": 9.60159362549801e-06, "loss": 0.5444, "step": 2411 }, { "epoch": 0.02883822140389053, "grad_norm": 3.3428375720977783, "learning_rate": 9.605577689243029e-06, "loss": 0.5559, "step": 2412 }, { "epoch": 0.02885017754875118, "grad_norm": 2.7117180824279785, "learning_rate": 9.609561752988048e-06, "loss": 0.6131, "step": 2413 }, { "epoch": 0.028862133693611833, "grad_norm": 1.8930305242538452, "learning_rate": 9.61354581673307e-06, "loss": 0.5612, "step": 2414 }, { "epoch": 0.028874089838472485, "grad_norm": 3.7798068523406982, "learning_rate": 9.617529880478089e-06, "loss": 0.6593, "step": 2415 }, { "epoch": 0.028886045983333133, "grad_norm": 2.515836715698242, "learning_rate": 9.621513944223108e-06, "loss": 0.6865, "step": 2416 }, { "epoch": 0.028898002128193784, "grad_norm": 1.77298903465271, "learning_rate": 9.625498007968128e-06, "loss": 0.626, "step": 2417 }, { "epoch": 0.028909958273054436, "grad_norm": 4.848187446594238, "learning_rate": 9.629482071713149e-06, "loss": 0.6829, "step": 2418 }, { "epoch": 0.028921914417915088, "grad_norm": 5.767956733703613, "learning_rate": 9.633466135458168e-06, "loss": 0.71, "step": 2419 }, { "epoch": 0.02893387056277574, "grad_norm": 2.637265682220459, "learning_rate": 9.637450199203188e-06, "loss": 0.6445, "step": 2420 }, { "epoch": 0.02894582670763639, "grad_norm": 2.623086452484131, "learning_rate": 9.641434262948209e-06, "loss": 0.6854, "step": 2421 }, { "epoch": 0.02895778285249704, "grad_norm": 2.6845602989196777, "learning_rate": 9.645418326693228e-06, "loss": 0.7534, "step": 2422 }, { "epoch": 0.02896973899735769, "grad_norm": 3.273261308670044, "learning_rate": 9.649402390438248e-06, "loss": 0.694, "step": 2423 }, { "epoch": 0.028981695142218343, "grad_norm": 5.098624229431152, "learning_rate": 9.653386454183267e-06, "loss": 0.6302, "step": 2424 }, { "epoch": 0.028993651287078995, "grad_norm": 3.59979248046875, "learning_rate": 9.657370517928287e-06, "loss": 0.6122, "step": 2425 }, { "epoch": 0.029005607431939646, "grad_norm": 4.65544319152832, "learning_rate": 9.661354581673308e-06, "loss": 0.5856, "step": 2426 }, { "epoch": 0.029017563576800298, "grad_norm": 2.9141695499420166, "learning_rate": 9.665338645418327e-06, "loss": 0.7059, "step": 2427 }, { "epoch": 0.029029519721660946, "grad_norm": 2.5356271266937256, "learning_rate": 9.669322709163349e-06, "loss": 0.6667, "step": 2428 }, { "epoch": 0.029041475866521598, "grad_norm": 4.435794353485107, "learning_rate": 9.673306772908368e-06, "loss": 0.6979, "step": 2429 }, { "epoch": 0.02905343201138225, "grad_norm": 2.332937717437744, "learning_rate": 9.677290836653388e-06, "loss": 0.7046, "step": 2430 }, { "epoch": 0.0290653881562429, "grad_norm": 6.034387588500977, "learning_rate": 9.681274900398407e-06, "loss": 0.5827, "step": 2431 }, { "epoch": 0.029077344301103553, "grad_norm": 2.079131603240967, "learning_rate": 9.685258964143426e-06, "loss": 0.5923, "step": 2432 }, { "epoch": 0.029089300445964205, "grad_norm": 4.201067924499512, "learning_rate": 9.689243027888448e-06, "loss": 0.6968, "step": 2433 }, { "epoch": 0.029101256590824853, "grad_norm": 16.457714080810547, "learning_rate": 9.693227091633467e-06, "loss": 0.632, "step": 2434 }, { "epoch": 0.029113212735685505, "grad_norm": 3.970318555831909, "learning_rate": 9.697211155378487e-06, "loss": 0.6747, "step": 2435 }, { "epoch": 0.029125168880546157, "grad_norm": 1.9693411588668823, "learning_rate": 9.701195219123508e-06, "loss": 0.6406, "step": 2436 }, { "epoch": 0.02913712502540681, "grad_norm": 7.602905750274658, "learning_rate": 9.705179282868527e-06, "loss": 0.596, "step": 2437 }, { "epoch": 0.02914908117026746, "grad_norm": 1.8944296836853027, "learning_rate": 9.709163346613547e-06, "loss": 0.61, "step": 2438 }, { "epoch": 0.02916103731512811, "grad_norm": 2.3916072845458984, "learning_rate": 9.713147410358566e-06, "loss": 0.6863, "step": 2439 }, { "epoch": 0.02917299345998876, "grad_norm": 2.3298490047454834, "learning_rate": 9.717131474103585e-06, "loss": 0.5491, "step": 2440 }, { "epoch": 0.029184949604849412, "grad_norm": 8.128369331359863, "learning_rate": 9.721115537848607e-06, "loss": 0.6023, "step": 2441 }, { "epoch": 0.029196905749710064, "grad_norm": 2.869765520095825, "learning_rate": 9.725099601593626e-06, "loss": 0.6131, "step": 2442 }, { "epoch": 0.029208861894570715, "grad_norm": 5.061326026916504, "learning_rate": 9.729083665338647e-06, "loss": 0.6934, "step": 2443 }, { "epoch": 0.029220818039431367, "grad_norm": 2.3420827388763428, "learning_rate": 9.733067729083667e-06, "loss": 0.5649, "step": 2444 }, { "epoch": 0.029232774184292015, "grad_norm": 1.9590967893600464, "learning_rate": 9.737051792828686e-06, "loss": 0.5917, "step": 2445 }, { "epoch": 0.029244730329152667, "grad_norm": 2.1120197772979736, "learning_rate": 9.741035856573706e-06, "loss": 0.5796, "step": 2446 }, { "epoch": 0.02925668647401332, "grad_norm": 1.686461329460144, "learning_rate": 9.745019920318725e-06, "loss": 0.6667, "step": 2447 }, { "epoch": 0.02926864261887397, "grad_norm": 3.2307872772216797, "learning_rate": 9.749003984063746e-06, "loss": 0.6893, "step": 2448 }, { "epoch": 0.029280598763734622, "grad_norm": 2.4536454677581787, "learning_rate": 9.752988047808766e-06, "loss": 0.6752, "step": 2449 }, { "epoch": 0.029292554908595274, "grad_norm": 2.5526833534240723, "learning_rate": 9.756972111553785e-06, "loss": 0.7112, "step": 2450 }, { "epoch": 0.029304511053455922, "grad_norm": 2.726898193359375, "learning_rate": 9.760956175298806e-06, "loss": 0.6042, "step": 2451 }, { "epoch": 0.029316467198316574, "grad_norm": 29.50424575805664, "learning_rate": 9.764940239043826e-06, "loss": 0.6059, "step": 2452 }, { "epoch": 0.029328423343177226, "grad_norm": 7.970520496368408, "learning_rate": 9.768924302788845e-06, "loss": 0.531, "step": 2453 }, { "epoch": 0.029340379488037877, "grad_norm": 1.8335587978363037, "learning_rate": 9.772908366533865e-06, "loss": 0.6337, "step": 2454 }, { "epoch": 0.02935233563289853, "grad_norm": 9.245265007019043, "learning_rate": 9.776892430278884e-06, "loss": 0.7211, "step": 2455 }, { "epoch": 0.02936429177775918, "grad_norm": 4.730400562286377, "learning_rate": 9.780876494023905e-06, "loss": 0.6238, "step": 2456 }, { "epoch": 0.02937624792261983, "grad_norm": 2.5002384185791016, "learning_rate": 9.784860557768925e-06, "loss": 0.6435, "step": 2457 }, { "epoch": 0.02938820406748048, "grad_norm": 2.9671077728271484, "learning_rate": 9.788844621513946e-06, "loss": 0.7122, "step": 2458 }, { "epoch": 0.029400160212341132, "grad_norm": 1.983014464378357, "learning_rate": 9.792828685258965e-06, "loss": 0.5713, "step": 2459 }, { "epoch": 0.029412116357201784, "grad_norm": 7.657238006591797, "learning_rate": 9.796812749003985e-06, "loss": 0.6173, "step": 2460 }, { "epoch": 0.029424072502062436, "grad_norm": 2.59271240234375, "learning_rate": 9.800796812749004e-06, "loss": 0.5907, "step": 2461 }, { "epoch": 0.029436028646923088, "grad_norm": 2.4436676502227783, "learning_rate": 9.804780876494024e-06, "loss": 0.6544, "step": 2462 }, { "epoch": 0.029447984791783736, "grad_norm": 2.6792995929718018, "learning_rate": 9.808764940239045e-06, "loss": 0.5314, "step": 2463 }, { "epoch": 0.029459940936644387, "grad_norm": 28.005159378051758, "learning_rate": 9.812749003984064e-06, "loss": 0.7423, "step": 2464 }, { "epoch": 0.02947189708150504, "grad_norm": 3.144113540649414, "learning_rate": 9.816733067729084e-06, "loss": 0.5572, "step": 2465 }, { "epoch": 0.02948385322636569, "grad_norm": 2.8699023723602295, "learning_rate": 9.820717131474105e-06, "loss": 0.716, "step": 2466 }, { "epoch": 0.029495809371226343, "grad_norm": 4.7538604736328125, "learning_rate": 9.824701195219124e-06, "loss": 0.6492, "step": 2467 }, { "epoch": 0.029507765516086994, "grad_norm": 3.400416374206543, "learning_rate": 9.828685258964144e-06, "loss": 0.6659, "step": 2468 }, { "epoch": 0.029519721660947643, "grad_norm": 2.1143436431884766, "learning_rate": 9.832669322709163e-06, "loss": 0.6073, "step": 2469 }, { "epoch": 0.029531677805808294, "grad_norm": 4.806789875030518, "learning_rate": 9.836653386454183e-06, "loss": 0.671, "step": 2470 }, { "epoch": 0.029543633950668946, "grad_norm": 3.570136308670044, "learning_rate": 9.840637450199204e-06, "loss": 0.6805, "step": 2471 }, { "epoch": 0.029555590095529598, "grad_norm": 4.807948112487793, "learning_rate": 9.844621513944223e-06, "loss": 0.6235, "step": 2472 }, { "epoch": 0.02956754624039025, "grad_norm": 3.02740478515625, "learning_rate": 9.848605577689245e-06, "loss": 0.5989, "step": 2473 }, { "epoch": 0.0295795023852509, "grad_norm": 3.1111230850219727, "learning_rate": 9.852589641434264e-06, "loss": 0.6102, "step": 2474 }, { "epoch": 0.02959145853011155, "grad_norm": 2.7858598232269287, "learning_rate": 9.856573705179283e-06, "loss": 0.6757, "step": 2475 }, { "epoch": 0.0296034146749722, "grad_norm": 3.5040321350097656, "learning_rate": 9.860557768924303e-06, "loss": 0.5405, "step": 2476 }, { "epoch": 0.029615370819832853, "grad_norm": 3.0775094032287598, "learning_rate": 9.864541832669324e-06, "loss": 0.5909, "step": 2477 }, { "epoch": 0.029627326964693505, "grad_norm": 14.692625999450684, "learning_rate": 9.868525896414343e-06, "loss": 0.5965, "step": 2478 }, { "epoch": 0.029639283109554156, "grad_norm": 2.421757221221924, "learning_rate": 9.872509960159363e-06, "loss": 0.5934, "step": 2479 }, { "epoch": 0.029651239254414808, "grad_norm": 4.195242404937744, "learning_rate": 9.876494023904384e-06, "loss": 0.7132, "step": 2480 }, { "epoch": 0.029663195399275456, "grad_norm": 2.9157590866088867, "learning_rate": 9.880478087649404e-06, "loss": 0.733, "step": 2481 }, { "epoch": 0.029675151544136108, "grad_norm": 6.653414249420166, "learning_rate": 9.884462151394423e-06, "loss": 0.6528, "step": 2482 }, { "epoch": 0.02968710768899676, "grad_norm": 4.512192249298096, "learning_rate": 9.888446215139444e-06, "loss": 0.5903, "step": 2483 }, { "epoch": 0.02969906383385741, "grad_norm": 1.931985855102539, "learning_rate": 9.892430278884464e-06, "loss": 0.6882, "step": 2484 }, { "epoch": 0.029711019978718063, "grad_norm": 2.0187973976135254, "learning_rate": 9.896414342629483e-06, "loss": 0.5934, "step": 2485 }, { "epoch": 0.029722976123578715, "grad_norm": 3.604844808578491, "learning_rate": 9.900398406374503e-06, "loss": 0.6034, "step": 2486 }, { "epoch": 0.029734932268439363, "grad_norm": 7.048389434814453, "learning_rate": 9.904382470119522e-06, "loss": 0.7184, "step": 2487 }, { "epoch": 0.029746888413300015, "grad_norm": 2.7256577014923096, "learning_rate": 9.908366533864543e-06, "loss": 0.6884, "step": 2488 }, { "epoch": 0.029758844558160667, "grad_norm": 7.948963165283203, "learning_rate": 9.912350597609563e-06, "loss": 0.6529, "step": 2489 }, { "epoch": 0.02977080070302132, "grad_norm": 2.3187005519866943, "learning_rate": 9.916334661354584e-06, "loss": 0.6325, "step": 2490 }, { "epoch": 0.02978275684788197, "grad_norm": 4.097919464111328, "learning_rate": 9.920318725099603e-06, "loss": 0.6417, "step": 2491 }, { "epoch": 0.02979471299274262, "grad_norm": 6.216172218322754, "learning_rate": 9.924302788844623e-06, "loss": 0.6362, "step": 2492 }, { "epoch": 0.02980666913760327, "grad_norm": 3.443023681640625, "learning_rate": 9.928286852589642e-06, "loss": 0.7036, "step": 2493 }, { "epoch": 0.02981862528246392, "grad_norm": 2.274427890777588, "learning_rate": 9.932270916334662e-06, "loss": 0.5534, "step": 2494 }, { "epoch": 0.029830581427324573, "grad_norm": 2.036921262741089, "learning_rate": 9.936254980079683e-06, "loss": 0.6596, "step": 2495 }, { "epoch": 0.029842537572185225, "grad_norm": 4.884960651397705, "learning_rate": 9.940239043824702e-06, "loss": 0.6406, "step": 2496 }, { "epoch": 0.029854493717045877, "grad_norm": 6.388926982879639, "learning_rate": 9.944223107569722e-06, "loss": 0.6731, "step": 2497 }, { "epoch": 0.02986644986190653, "grad_norm": 3.7689106464385986, "learning_rate": 9.948207171314743e-06, "loss": 0.6763, "step": 2498 }, { "epoch": 0.029878406006767177, "grad_norm": 2.7940752506256104, "learning_rate": 9.952191235059762e-06, "loss": 0.6772, "step": 2499 }, { "epoch": 0.02989036215162783, "grad_norm": 1.976894497871399, "learning_rate": 9.956175298804782e-06, "loss": 0.5535, "step": 2500 }, { "epoch": 0.02990231829648848, "grad_norm": 2.8885343074798584, "learning_rate": 9.960159362549801e-06, "loss": 0.6764, "step": 2501 }, { "epoch": 0.029914274441349132, "grad_norm": 3.3684840202331543, "learning_rate": 9.96414342629482e-06, "loss": 0.6387, "step": 2502 }, { "epoch": 0.029926230586209784, "grad_norm": 2.993103504180908, "learning_rate": 9.968127490039842e-06, "loss": 0.5648, "step": 2503 }, { "epoch": 0.029938186731070432, "grad_norm": 2.9717793464660645, "learning_rate": 9.972111553784861e-06, "loss": 0.6565, "step": 2504 }, { "epoch": 0.029950142875931084, "grad_norm": 3.6740331649780273, "learning_rate": 9.976095617529882e-06, "loss": 0.6297, "step": 2505 }, { "epoch": 0.029962099020791735, "grad_norm": 5.154743671417236, "learning_rate": 9.980079681274902e-06, "loss": 0.593, "step": 2506 }, { "epoch": 0.029974055165652387, "grad_norm": 2.4528188705444336, "learning_rate": 9.984063745019921e-06, "loss": 0.6346, "step": 2507 }, { "epoch": 0.02998601131051304, "grad_norm": 3.111107349395752, "learning_rate": 9.98804780876494e-06, "loss": 0.5675, "step": 2508 }, { "epoch": 0.02999796745537369, "grad_norm": 2.2751801013946533, "learning_rate": 9.99203187250996e-06, "loss": 0.6901, "step": 2509 }, { "epoch": 0.03000992360023434, "grad_norm": 6.316036224365234, "learning_rate": 9.996015936254981e-06, "loss": 0.6353, "step": 2510 }, { "epoch": 0.03002187974509499, "grad_norm": 6.3455376625061035, "learning_rate": 1e-05, "loss": 0.648, "step": 2511 }, { "epoch": 0.030033835889955642, "grad_norm": 4.518802165985107, "learning_rate": 9.999999996251241e-06, "loss": 0.5871, "step": 2512 }, { "epoch": 0.030045792034816294, "grad_norm": 1.917385220527649, "learning_rate": 9.999999985004965e-06, "loss": 0.5706, "step": 2513 }, { "epoch": 0.030057748179676946, "grad_norm": 2.7319891452789307, "learning_rate": 9.99999996626117e-06, "loss": 0.6285, "step": 2514 }, { "epoch": 0.030069704324537597, "grad_norm": 2.911472797393799, "learning_rate": 9.999999940019858e-06, "loss": 0.6722, "step": 2515 }, { "epoch": 0.030081660469398246, "grad_norm": 5.470757484436035, "learning_rate": 9.999999906281028e-06, "loss": 0.684, "step": 2516 }, { "epoch": 0.030093616614258897, "grad_norm": 5.070748329162598, "learning_rate": 9.999999865044682e-06, "loss": 0.6061, "step": 2517 }, { "epoch": 0.03010557275911955, "grad_norm": 3.673640251159668, "learning_rate": 9.999999816310816e-06, "loss": 0.6779, "step": 2518 }, { "epoch": 0.0301175289039802, "grad_norm": 2.9344208240509033, "learning_rate": 9.999999760079435e-06, "loss": 0.6571, "step": 2519 }, { "epoch": 0.030129485048840852, "grad_norm": 2.908320188522339, "learning_rate": 9.999999696350534e-06, "loss": 0.7358, "step": 2520 }, { "epoch": 0.030141441193701504, "grad_norm": 2.4041907787323, "learning_rate": 9.999999625124116e-06, "loss": 0.6313, "step": 2521 }, { "epoch": 0.030153397338562152, "grad_norm": 3.1848912239074707, "learning_rate": 9.999999546400183e-06, "loss": 0.6553, "step": 2522 }, { "epoch": 0.030165353483422804, "grad_norm": 5.664486885070801, "learning_rate": 9.999999460178731e-06, "loss": 0.6732, "step": 2523 }, { "epoch": 0.030177309628283456, "grad_norm": 1.971721887588501, "learning_rate": 9.999999366459763e-06, "loss": 0.705, "step": 2524 }, { "epoch": 0.030189265773144108, "grad_norm": 4.304393768310547, "learning_rate": 9.999999265243276e-06, "loss": 0.5927, "step": 2525 }, { "epoch": 0.03020122191800476, "grad_norm": 4.228959560394287, "learning_rate": 9.999999156529274e-06, "loss": 0.612, "step": 2526 }, { "epoch": 0.03021317806286541, "grad_norm": 3.480339527130127, "learning_rate": 9.999999040317757e-06, "loss": 0.7016, "step": 2527 }, { "epoch": 0.03022513420772606, "grad_norm": 2.2512950897216797, "learning_rate": 9.999998916608722e-06, "loss": 0.618, "step": 2528 }, { "epoch": 0.03023709035258671, "grad_norm": 6.8762617111206055, "learning_rate": 9.999998785402171e-06, "loss": 0.6037, "step": 2529 }, { "epoch": 0.030249046497447363, "grad_norm": 2.3654651641845703, "learning_rate": 9.999998646698104e-06, "loss": 0.5764, "step": 2530 }, { "epoch": 0.030261002642308014, "grad_norm": 5.1642985343933105, "learning_rate": 9.999998500496522e-06, "loss": 0.6148, "step": 2531 }, { "epoch": 0.030272958787168666, "grad_norm": 3.1468496322631836, "learning_rate": 9.999998346797422e-06, "loss": 0.7504, "step": 2532 }, { "epoch": 0.030284914932029318, "grad_norm": 2.2844510078430176, "learning_rate": 9.99999818560081e-06, "loss": 0.6382, "step": 2533 }, { "epoch": 0.030296871076889966, "grad_norm": 9.58601188659668, "learning_rate": 9.99999801690668e-06, "loss": 0.587, "step": 2534 }, { "epoch": 0.030308827221750618, "grad_norm": 2.428617477416992, "learning_rate": 9.999997840715036e-06, "loss": 0.6782, "step": 2535 }, { "epoch": 0.03032078336661127, "grad_norm": 5.421367168426514, "learning_rate": 9.999997657025879e-06, "loss": 0.5727, "step": 2536 }, { "epoch": 0.03033273951147192, "grad_norm": 5.0048909187316895, "learning_rate": 9.999997465839208e-06, "loss": 0.7138, "step": 2537 }, { "epoch": 0.030344695656332573, "grad_norm": 4.400643825531006, "learning_rate": 9.999997267155021e-06, "loss": 0.5688, "step": 2538 }, { "epoch": 0.030356651801193225, "grad_norm": 2.3242928981781006, "learning_rate": 9.99999706097332e-06, "loss": 0.6253, "step": 2539 }, { "epoch": 0.030368607946053873, "grad_norm": 3.213949203491211, "learning_rate": 9.99999684729411e-06, "loss": 0.683, "step": 2540 }, { "epoch": 0.030380564090914525, "grad_norm": 7.266734600067139, "learning_rate": 9.999996626117381e-06, "loss": 0.5079, "step": 2541 }, { "epoch": 0.030392520235775176, "grad_norm": 4.823323726654053, "learning_rate": 9.999996397443144e-06, "loss": 0.589, "step": 2542 }, { "epoch": 0.030404476380635828, "grad_norm": 2.702342987060547, "learning_rate": 9.999996161271392e-06, "loss": 0.645, "step": 2543 }, { "epoch": 0.03041643252549648, "grad_norm": 4.5614423751831055, "learning_rate": 9.999995917602129e-06, "loss": 0.6892, "step": 2544 }, { "epoch": 0.03042838867035713, "grad_norm": 1.8925628662109375, "learning_rate": 9.999995666435354e-06, "loss": 0.5717, "step": 2545 }, { "epoch": 0.03044034481521778, "grad_norm": 4.575571060180664, "learning_rate": 9.999995407771068e-06, "loss": 0.6124, "step": 2546 }, { "epoch": 0.03045230096007843, "grad_norm": 11.184406280517578, "learning_rate": 9.99999514160927e-06, "loss": 0.6585, "step": 2547 }, { "epoch": 0.030464257104939083, "grad_norm": 8.79327392578125, "learning_rate": 9.999994867949963e-06, "loss": 0.6338, "step": 2548 }, { "epoch": 0.030476213249799735, "grad_norm": 4.940805912017822, "learning_rate": 9.999994586793144e-06, "loss": 0.5631, "step": 2549 }, { "epoch": 0.030488169394660387, "grad_norm": 3.660252094268799, "learning_rate": 9.999994298138817e-06, "loss": 0.6132, "step": 2550 }, { "epoch": 0.03050012553952104, "grad_norm": 2.0032906532287598, "learning_rate": 9.999994001986981e-06, "loss": 0.7266, "step": 2551 }, { "epoch": 0.030512081684381687, "grad_norm": 2.4614357948303223, "learning_rate": 9.999993698337635e-06, "loss": 0.6013, "step": 2552 }, { "epoch": 0.03052403782924234, "grad_norm": 3.8270342350006104, "learning_rate": 9.999993387190783e-06, "loss": 0.6071, "step": 2553 }, { "epoch": 0.03053599397410299, "grad_norm": 2.4753034114837646, "learning_rate": 9.99999306854642e-06, "loss": 0.5811, "step": 2554 }, { "epoch": 0.030547950118963642, "grad_norm": 3.363007068634033, "learning_rate": 9.999992742404552e-06, "loss": 0.6761, "step": 2555 }, { "epoch": 0.030559906263824294, "grad_norm": 3.3770859241485596, "learning_rate": 9.999992408765175e-06, "loss": 0.6579, "step": 2556 }, { "epoch": 0.030571862408684945, "grad_norm": 6.929166793823242, "learning_rate": 9.999992067628294e-06, "loss": 0.5668, "step": 2557 }, { "epoch": 0.030583818553545594, "grad_norm": 7.019585132598877, "learning_rate": 9.999991718993905e-06, "loss": 0.5861, "step": 2558 }, { "epoch": 0.030595774698406245, "grad_norm": 3.18325138092041, "learning_rate": 9.99999136286201e-06, "loss": 0.5323, "step": 2559 }, { "epoch": 0.030607730843266897, "grad_norm": 2.54557204246521, "learning_rate": 9.999990999232613e-06, "loss": 0.5915, "step": 2560 }, { "epoch": 0.03061968698812755, "grad_norm": 2.120044469833374, "learning_rate": 9.999990628105712e-06, "loss": 0.5984, "step": 2561 }, { "epoch": 0.0306316431329882, "grad_norm": 2.1122539043426514, "learning_rate": 9.999990249481305e-06, "loss": 0.6618, "step": 2562 }, { "epoch": 0.030643599277848852, "grad_norm": 2.0514919757843018, "learning_rate": 9.999989863359396e-06, "loss": 0.6346, "step": 2563 }, { "epoch": 0.0306555554227095, "grad_norm": 3.8402204513549805, "learning_rate": 9.999989469739984e-06, "loss": 0.6324, "step": 2564 }, { "epoch": 0.030667511567570152, "grad_norm": 7.366321563720703, "learning_rate": 9.999989068623071e-06, "loss": 0.6405, "step": 2565 }, { "epoch": 0.030679467712430804, "grad_norm": 2.911168336868286, "learning_rate": 9.999988660008655e-06, "loss": 0.6303, "step": 2566 }, { "epoch": 0.030691423857291456, "grad_norm": 3.005887031555176, "learning_rate": 9.999988243896738e-06, "loss": 0.4576, "step": 2567 }, { "epoch": 0.030703380002152107, "grad_norm": 14.127195358276367, "learning_rate": 9.999987820287323e-06, "loss": 0.6325, "step": 2568 }, { "epoch": 0.03071533614701276, "grad_norm": 2.3557002544403076, "learning_rate": 9.999987389180406e-06, "loss": 0.5975, "step": 2569 }, { "epoch": 0.030727292291873407, "grad_norm": 5.190896034240723, "learning_rate": 9.999986950575992e-06, "loss": 0.6727, "step": 2570 }, { "epoch": 0.03073924843673406, "grad_norm": 3.539522409439087, "learning_rate": 9.99998650447408e-06, "loss": 0.6136, "step": 2571 }, { "epoch": 0.03075120458159471, "grad_norm": 2.176340341567993, "learning_rate": 9.999986050874668e-06, "loss": 0.6271, "step": 2572 }, { "epoch": 0.030763160726455362, "grad_norm": 7.498026371002197, "learning_rate": 9.99998558977776e-06, "loss": 0.6724, "step": 2573 }, { "epoch": 0.030775116871316014, "grad_norm": 2.3796586990356445, "learning_rate": 9.999985121183357e-06, "loss": 0.5362, "step": 2574 }, { "epoch": 0.030787073016176662, "grad_norm": 2.1011221408843994, "learning_rate": 9.99998464509146e-06, "loss": 0.6284, "step": 2575 }, { "epoch": 0.030799029161037314, "grad_norm": 2.964359760284424, "learning_rate": 9.999984161502065e-06, "loss": 0.6673, "step": 2576 }, { "epoch": 0.030810985305897966, "grad_norm": 4.713006019592285, "learning_rate": 9.99998367041518e-06, "loss": 0.6642, "step": 2577 }, { "epoch": 0.030822941450758617, "grad_norm": 3.7324514389038086, "learning_rate": 9.999983171830799e-06, "loss": 0.6667, "step": 2578 }, { "epoch": 0.03083489759561927, "grad_norm": 3.244515895843506, "learning_rate": 9.999982665748925e-06, "loss": 0.6366, "step": 2579 }, { "epoch": 0.03084685374047992, "grad_norm": 3.319119930267334, "learning_rate": 9.999982152169559e-06, "loss": 0.6231, "step": 2580 }, { "epoch": 0.03085880988534057, "grad_norm": 2.8889880180358887, "learning_rate": 9.999981631092703e-06, "loss": 0.6697, "step": 2581 }, { "epoch": 0.03087076603020122, "grad_norm": 3.904567241668701, "learning_rate": 9.999981102518356e-06, "loss": 0.6465, "step": 2582 }, { "epoch": 0.030882722175061873, "grad_norm": 2.2818191051483154, "learning_rate": 9.999980566446522e-06, "loss": 0.5947, "step": 2583 }, { "epoch": 0.030894678319922524, "grad_norm": 2.4027440547943115, "learning_rate": 9.999980022877196e-06, "loss": 0.6722, "step": 2584 }, { "epoch": 0.030906634464783176, "grad_norm": 3.542248249053955, "learning_rate": 9.999979471810384e-06, "loss": 0.7004, "step": 2585 }, { "epoch": 0.030918590609643828, "grad_norm": 3.259082794189453, "learning_rate": 9.999978913246084e-06, "loss": 0.6741, "step": 2586 }, { "epoch": 0.030930546754504476, "grad_norm": 3.086273670196533, "learning_rate": 9.999978347184298e-06, "loss": 0.6652, "step": 2587 }, { "epoch": 0.030942502899365128, "grad_norm": 2.733593463897705, "learning_rate": 9.999977773625028e-06, "loss": 0.609, "step": 2588 }, { "epoch": 0.03095445904422578, "grad_norm": 6.0069451332092285, "learning_rate": 9.999977192568273e-06, "loss": 0.6224, "step": 2589 }, { "epoch": 0.03096641518908643, "grad_norm": 2.9050965309143066, "learning_rate": 9.999976604014035e-06, "loss": 0.6831, "step": 2590 }, { "epoch": 0.030978371333947083, "grad_norm": 2.2193307876586914, "learning_rate": 9.999976007962313e-06, "loss": 0.6128, "step": 2591 }, { "epoch": 0.030990327478807735, "grad_norm": 5.167126655578613, "learning_rate": 9.99997540441311e-06, "loss": 0.621, "step": 2592 }, { "epoch": 0.031002283623668383, "grad_norm": 1.9541258811950684, "learning_rate": 9.999974793366425e-06, "loss": 0.5947, "step": 2593 }, { "epoch": 0.031014239768529035, "grad_norm": 3.8924028873443604, "learning_rate": 9.99997417482226e-06, "loss": 0.6667, "step": 2594 }, { "epoch": 0.031026195913389686, "grad_norm": 2.9559645652770996, "learning_rate": 9.999973548780617e-06, "loss": 0.5658, "step": 2595 }, { "epoch": 0.031038152058250338, "grad_norm": 3.686473846435547, "learning_rate": 9.999972915241496e-06, "loss": 0.731, "step": 2596 }, { "epoch": 0.03105010820311099, "grad_norm": 5.067824363708496, "learning_rate": 9.999972274204898e-06, "loss": 0.6694, "step": 2597 }, { "epoch": 0.03106206434797164, "grad_norm": 2.6068599224090576, "learning_rate": 9.999971625670824e-06, "loss": 0.6582, "step": 2598 }, { "epoch": 0.03107402049283229, "grad_norm": 2.3180367946624756, "learning_rate": 9.999970969639273e-06, "loss": 0.6079, "step": 2599 }, { "epoch": 0.03108597663769294, "grad_norm": 5.363174915313721, "learning_rate": 9.999970306110249e-06, "loss": 0.5564, "step": 2600 }, { "epoch": 0.031097932782553593, "grad_norm": 3.2528421878814697, "learning_rate": 9.999969635083753e-06, "loss": 0.6438, "step": 2601 }, { "epoch": 0.031109888927414245, "grad_norm": 3.6410562992095947, "learning_rate": 9.999968956559782e-06, "loss": 0.5765, "step": 2602 }, { "epoch": 0.031121845072274897, "grad_norm": 2.9611053466796875, "learning_rate": 9.999968270538342e-06, "loss": 0.5346, "step": 2603 }, { "epoch": 0.03113380121713555, "grad_norm": 5.679759979248047, "learning_rate": 9.99996757701943e-06, "loss": 0.618, "step": 2604 }, { "epoch": 0.031145757361996197, "grad_norm": 4.36961030960083, "learning_rate": 9.99996687600305e-06, "loss": 0.669, "step": 2605 }, { "epoch": 0.031157713506856848, "grad_norm": 4.737902641296387, "learning_rate": 9.999966167489201e-06, "loss": 0.5353, "step": 2606 }, { "epoch": 0.0311696696517175, "grad_norm": 3.8655130863189697, "learning_rate": 9.999965451477887e-06, "loss": 0.6617, "step": 2607 }, { "epoch": 0.03118162579657815, "grad_norm": 3.3979265689849854, "learning_rate": 9.999964727969104e-06, "loss": 0.6153, "step": 2608 }, { "epoch": 0.031193581941438803, "grad_norm": 2.585350275039673, "learning_rate": 9.999963996962859e-06, "loss": 0.5199, "step": 2609 }, { "epoch": 0.031205538086299455, "grad_norm": 3.2611382007598877, "learning_rate": 9.999963258459149e-06, "loss": 0.5649, "step": 2610 }, { "epoch": 0.031217494231160103, "grad_norm": 2.4220972061157227, "learning_rate": 9.999962512457978e-06, "loss": 0.7057, "step": 2611 }, { "epoch": 0.031229450376020755, "grad_norm": 3.3002266883850098, "learning_rate": 9.999961758959343e-06, "loss": 0.5288, "step": 2612 }, { "epoch": 0.031241406520881407, "grad_norm": 2.702742338180542, "learning_rate": 9.999960997963247e-06, "loss": 0.6912, "step": 2613 }, { "epoch": 0.03125336266574206, "grad_norm": 5.587218284606934, "learning_rate": 9.999960229469694e-06, "loss": 0.7447, "step": 2614 }, { "epoch": 0.03126531881060271, "grad_norm": 1.9136896133422852, "learning_rate": 9.999959453478682e-06, "loss": 0.5746, "step": 2615 }, { "epoch": 0.03127727495546336, "grad_norm": 5.006978511810303, "learning_rate": 9.999958669990213e-06, "loss": 0.6437, "step": 2616 }, { "epoch": 0.031289231100324014, "grad_norm": 2.5171282291412354, "learning_rate": 9.999957879004287e-06, "loss": 0.6645, "step": 2617 }, { "epoch": 0.031301187245184665, "grad_norm": 2.660590887069702, "learning_rate": 9.999957080520908e-06, "loss": 0.67, "step": 2618 }, { "epoch": 0.03131314339004532, "grad_norm": 3.6629250049591064, "learning_rate": 9.999956274540075e-06, "loss": 0.6697, "step": 2619 }, { "epoch": 0.03132509953490596, "grad_norm": 8.97378158569336, "learning_rate": 9.99995546106179e-06, "loss": 0.6306, "step": 2620 }, { "epoch": 0.031337055679766614, "grad_norm": 5.133192539215088, "learning_rate": 9.999954640086054e-06, "loss": 0.5802, "step": 2621 }, { "epoch": 0.031349011824627265, "grad_norm": 5.135898590087891, "learning_rate": 9.999953811612868e-06, "loss": 0.7181, "step": 2622 }, { "epoch": 0.03136096796948792, "grad_norm": 2.508007764816284, "learning_rate": 9.999952975642235e-06, "loss": 0.6988, "step": 2623 }, { "epoch": 0.03137292411434857, "grad_norm": 3.8967978954315186, "learning_rate": 9.999952132174153e-06, "loss": 0.6946, "step": 2624 }, { "epoch": 0.03138488025920922, "grad_norm": 2.0457112789154053, "learning_rate": 9.999951281208625e-06, "loss": 0.7275, "step": 2625 }, { "epoch": 0.03139683640406987, "grad_norm": 3.1233859062194824, "learning_rate": 9.999950422745652e-06, "loss": 0.6197, "step": 2626 }, { "epoch": 0.031408792548930524, "grad_norm": 4.25128698348999, "learning_rate": 9.999949556785237e-06, "loss": 0.583, "step": 2627 }, { "epoch": 0.031420748693791176, "grad_norm": 2.2213134765625, "learning_rate": 9.99994868332738e-06, "loss": 0.4846, "step": 2628 }, { "epoch": 0.03143270483865183, "grad_norm": 2.029477834701538, "learning_rate": 9.99994780237208e-06, "loss": 0.5857, "step": 2629 }, { "epoch": 0.03144466098351248, "grad_norm": 2.24294114112854, "learning_rate": 9.999946913919342e-06, "loss": 0.589, "step": 2630 }, { "epoch": 0.03145661712837313, "grad_norm": 7.485584259033203, "learning_rate": 9.999946017969167e-06, "loss": 0.6892, "step": 2631 }, { "epoch": 0.031468573273233776, "grad_norm": 2.9091012477874756, "learning_rate": 9.999945114521553e-06, "loss": 0.641, "step": 2632 }, { "epoch": 0.03148052941809443, "grad_norm": 4.5997724533081055, "learning_rate": 9.999944203576504e-06, "loss": 0.6924, "step": 2633 }, { "epoch": 0.03149248556295508, "grad_norm": 3.068660259246826, "learning_rate": 9.999943285134021e-06, "loss": 0.6455, "step": 2634 }, { "epoch": 0.03150444170781573, "grad_norm": 2.639040946960449, "learning_rate": 9.999942359194105e-06, "loss": 0.6509, "step": 2635 }, { "epoch": 0.03151639785267638, "grad_norm": 5.537361145019531, "learning_rate": 9.99994142575676e-06, "loss": 0.7432, "step": 2636 }, { "epoch": 0.031528353997537034, "grad_norm": 1.618523120880127, "learning_rate": 9.999940484821982e-06, "loss": 0.6585, "step": 2637 }, { "epoch": 0.031540310142397686, "grad_norm": 1.8804465532302856, "learning_rate": 9.999939536389777e-06, "loss": 0.6484, "step": 2638 }, { "epoch": 0.03155226628725834, "grad_norm": 4.044662952423096, "learning_rate": 9.999938580460145e-06, "loss": 0.5891, "step": 2639 }, { "epoch": 0.03156422243211899, "grad_norm": 8.830406188964844, "learning_rate": 9.999937617033087e-06, "loss": 0.6297, "step": 2640 }, { "epoch": 0.03157617857697964, "grad_norm": 2.3157670497894287, "learning_rate": 9.999936646108605e-06, "loss": 0.6186, "step": 2641 }, { "epoch": 0.03158813472184029, "grad_norm": 2.235710859298706, "learning_rate": 9.9999356676867e-06, "loss": 0.7262, "step": 2642 }, { "epoch": 0.03160009086670094, "grad_norm": 1.912919044494629, "learning_rate": 9.999934681767373e-06, "loss": 0.6443, "step": 2643 }, { "epoch": 0.03161204701156159, "grad_norm": 5.884372711181641, "learning_rate": 9.999933688350628e-06, "loss": 0.5918, "step": 2644 }, { "epoch": 0.03162400315642224, "grad_norm": 3.530395030975342, "learning_rate": 9.999932687436463e-06, "loss": 0.6304, "step": 2645 }, { "epoch": 0.03163595930128289, "grad_norm": 3.0789530277252197, "learning_rate": 9.99993167902488e-06, "loss": 0.6823, "step": 2646 }, { "epoch": 0.031647915446143544, "grad_norm": 2.037766933441162, "learning_rate": 9.999930663115885e-06, "loss": 0.7113, "step": 2647 }, { "epoch": 0.031659871591004196, "grad_norm": 3.0297887325286865, "learning_rate": 9.999929639709473e-06, "loss": 0.617, "step": 2648 }, { "epoch": 0.03167182773586485, "grad_norm": 4.595581531524658, "learning_rate": 9.999928608805651e-06, "loss": 0.6959, "step": 2649 }, { "epoch": 0.0316837838807255, "grad_norm": 2.3533599376678467, "learning_rate": 9.999927570404417e-06, "loss": 0.6165, "step": 2650 }, { "epoch": 0.03169574002558615, "grad_norm": 2.536435842514038, "learning_rate": 9.999926524505774e-06, "loss": 0.649, "step": 2651 }, { "epoch": 0.0317076961704468, "grad_norm": 6.011038303375244, "learning_rate": 9.999925471109724e-06, "loss": 0.5989, "step": 2652 }, { "epoch": 0.031719652315307455, "grad_norm": 2.0178723335266113, "learning_rate": 9.999924410216269e-06, "loss": 0.6244, "step": 2653 }, { "epoch": 0.031731608460168106, "grad_norm": 2.3109593391418457, "learning_rate": 9.999923341825408e-06, "loss": 0.6219, "step": 2654 }, { "epoch": 0.03174356460502875, "grad_norm": 2.332929849624634, "learning_rate": 9.999922265937145e-06, "loss": 0.6, "step": 2655 }, { "epoch": 0.0317555207498894, "grad_norm": 3.8108198642730713, "learning_rate": 9.99992118255148e-06, "loss": 0.6185, "step": 2656 }, { "epoch": 0.031767476894750055, "grad_norm": 3.0044660568237305, "learning_rate": 9.999920091668416e-06, "loss": 0.6278, "step": 2657 }, { "epoch": 0.031779433039610706, "grad_norm": 1.6206421852111816, "learning_rate": 9.999918993287953e-06, "loss": 0.5849, "step": 2658 }, { "epoch": 0.03179138918447136, "grad_norm": 2.708960771560669, "learning_rate": 9.999917887410095e-06, "loss": 0.6084, "step": 2659 }, { "epoch": 0.03180334532933201, "grad_norm": 9.726009368896484, "learning_rate": 9.99991677403484e-06, "loss": 0.5653, "step": 2660 }, { "epoch": 0.03181530147419266, "grad_norm": 1.9639040231704712, "learning_rate": 9.999915653162196e-06, "loss": 0.6564, "step": 2661 }, { "epoch": 0.03182725761905331, "grad_norm": 2.3634533882141113, "learning_rate": 9.999914524792158e-06, "loss": 0.6069, "step": 2662 }, { "epoch": 0.031839213763913965, "grad_norm": 2.884617328643799, "learning_rate": 9.99991338892473e-06, "loss": 0.519, "step": 2663 }, { "epoch": 0.03185116990877462, "grad_norm": 3.7724087238311768, "learning_rate": 9.999912245559915e-06, "loss": 0.586, "step": 2664 }, { "epoch": 0.03186312605363527, "grad_norm": 3.3614771366119385, "learning_rate": 9.999911094697713e-06, "loss": 0.6338, "step": 2665 }, { "epoch": 0.03187508219849592, "grad_norm": 2.4052510261535645, "learning_rate": 9.999909936338128e-06, "loss": 0.5246, "step": 2666 }, { "epoch": 0.031887038343356565, "grad_norm": 2.8012523651123047, "learning_rate": 9.99990877048116e-06, "loss": 0.6502, "step": 2667 }, { "epoch": 0.03189899448821722, "grad_norm": 1.4569710493087769, "learning_rate": 9.99990759712681e-06, "loss": 0.5905, "step": 2668 }, { "epoch": 0.03191095063307787, "grad_norm": 2.5719785690307617, "learning_rate": 9.999906416275083e-06, "loss": 0.6657, "step": 2669 }, { "epoch": 0.03192290677793852, "grad_norm": 2.454807758331299, "learning_rate": 9.999905227925977e-06, "loss": 0.6553, "step": 2670 }, { "epoch": 0.03193486292279917, "grad_norm": 2.630730152130127, "learning_rate": 9.999904032079496e-06, "loss": 0.7196, "step": 2671 }, { "epoch": 0.031946819067659823, "grad_norm": 1.727769374847412, "learning_rate": 9.999902828735639e-06, "loss": 0.563, "step": 2672 }, { "epoch": 0.031958775212520475, "grad_norm": 4.1211113929748535, "learning_rate": 9.999901617894413e-06, "loss": 0.6115, "step": 2673 }, { "epoch": 0.03197073135738113, "grad_norm": 4.475314140319824, "learning_rate": 9.999900399555813e-06, "loss": 0.6726, "step": 2674 }, { "epoch": 0.03198268750224178, "grad_norm": 3.3535313606262207, "learning_rate": 9.999899173719847e-06, "loss": 0.6741, "step": 2675 }, { "epoch": 0.03199464364710243, "grad_norm": 2.154850959777832, "learning_rate": 9.999897940386514e-06, "loss": 0.5628, "step": 2676 }, { "epoch": 0.03200659979196308, "grad_norm": 1.959497332572937, "learning_rate": 9.999896699555819e-06, "loss": 0.5547, "step": 2677 }, { "epoch": 0.032018555936823734, "grad_norm": 3.8922343254089355, "learning_rate": 9.999895451227756e-06, "loss": 0.6335, "step": 2678 }, { "epoch": 0.03203051208168438, "grad_norm": 2.240907669067383, "learning_rate": 9.999894195402335e-06, "loss": 0.5891, "step": 2679 }, { "epoch": 0.03204246822654503, "grad_norm": 2.2557928562164307, "learning_rate": 9.999892932079555e-06, "loss": 0.6485, "step": 2680 }, { "epoch": 0.03205442437140568, "grad_norm": 1.96077299118042, "learning_rate": 9.999891661259417e-06, "loss": 0.5691, "step": 2681 }, { "epoch": 0.032066380516266334, "grad_norm": 2.7009127140045166, "learning_rate": 9.999890382941923e-06, "loss": 0.5995, "step": 2682 }, { "epoch": 0.032078336661126985, "grad_norm": 2.061997890472412, "learning_rate": 9.999889097127078e-06, "loss": 0.6059, "step": 2683 }, { "epoch": 0.03209029280598764, "grad_norm": 2.0601723194122314, "learning_rate": 9.99988780381488e-06, "loss": 0.6804, "step": 2684 }, { "epoch": 0.03210224895084829, "grad_norm": 1.7374191284179688, "learning_rate": 9.99988650300533e-06, "loss": 0.602, "step": 2685 }, { "epoch": 0.03211420509570894, "grad_norm": 1.9458280801773071, "learning_rate": 9.999885194698437e-06, "loss": 0.6238, "step": 2686 }, { "epoch": 0.03212616124056959, "grad_norm": 3.9707634449005127, "learning_rate": 9.999883878894197e-06, "loss": 0.6238, "step": 2687 }, { "epoch": 0.032138117385430244, "grad_norm": 3.100451946258545, "learning_rate": 9.999882555592611e-06, "loss": 0.6685, "step": 2688 }, { "epoch": 0.032150073530290896, "grad_norm": 3.9390757083892822, "learning_rate": 9.999881224793686e-06, "loss": 0.6871, "step": 2689 }, { "epoch": 0.03216202967515155, "grad_norm": 2.411162853240967, "learning_rate": 9.99987988649742e-06, "loss": 0.7438, "step": 2690 }, { "epoch": 0.03217398582001219, "grad_norm": 2.4822804927825928, "learning_rate": 9.999878540703816e-06, "loss": 0.6867, "step": 2691 }, { "epoch": 0.032185941964872844, "grad_norm": 2.145552396774292, "learning_rate": 9.999877187412878e-06, "loss": 0.6035, "step": 2692 }, { "epoch": 0.032197898109733496, "grad_norm": 2.1430227756500244, "learning_rate": 9.999875826624605e-06, "loss": 0.7555, "step": 2693 }, { "epoch": 0.03220985425459415, "grad_norm": 2.1833419799804688, "learning_rate": 9.999874458339e-06, "loss": 0.6494, "step": 2694 }, { "epoch": 0.0322218103994548, "grad_norm": 2.4463798999786377, "learning_rate": 9.999873082556066e-06, "loss": 0.6352, "step": 2695 }, { "epoch": 0.03223376654431545, "grad_norm": 2.037466287612915, "learning_rate": 9.999871699275806e-06, "loss": 0.6582, "step": 2696 }, { "epoch": 0.0322457226891761, "grad_norm": 1.7743898630142212, "learning_rate": 9.999870308498219e-06, "loss": 0.5837, "step": 2697 }, { "epoch": 0.032257678834036754, "grad_norm": 2.3305535316467285, "learning_rate": 9.999868910223309e-06, "loss": 0.6186, "step": 2698 }, { "epoch": 0.032269634978897406, "grad_norm": 2.2102811336517334, "learning_rate": 9.999867504451079e-06, "loss": 0.6012, "step": 2699 }, { "epoch": 0.03228159112375806, "grad_norm": 1.5972883701324463, "learning_rate": 9.999866091181529e-06, "loss": 0.6047, "step": 2700 }, { "epoch": 0.03229354726861871, "grad_norm": 2.2009201049804688, "learning_rate": 9.99986467041466e-06, "loss": 0.5999, "step": 2701 }, { "epoch": 0.03230550341347936, "grad_norm": 5.116009712219238, "learning_rate": 9.999863242150478e-06, "loss": 0.6513, "step": 2702 }, { "epoch": 0.032317459558340006, "grad_norm": 1.9836708307266235, "learning_rate": 9.999861806388983e-06, "loss": 0.675, "step": 2703 }, { "epoch": 0.03232941570320066, "grad_norm": 2.2142724990844727, "learning_rate": 9.999860363130178e-06, "loss": 0.7145, "step": 2704 }, { "epoch": 0.03234137184806131, "grad_norm": 2.1707570552825928, "learning_rate": 9.999858912374066e-06, "loss": 0.6048, "step": 2705 }, { "epoch": 0.03235332799292196, "grad_norm": 3.2422633171081543, "learning_rate": 9.999857454120644e-06, "loss": 0.6916, "step": 2706 }, { "epoch": 0.03236528413778261, "grad_norm": 3.5662472248077393, "learning_rate": 9.999855988369923e-06, "loss": 0.5991, "step": 2707 }, { "epoch": 0.032377240282643265, "grad_norm": 2.7405266761779785, "learning_rate": 9.999854515121896e-06, "loss": 0.5971, "step": 2708 }, { "epoch": 0.032389196427503916, "grad_norm": 7.447633266448975, "learning_rate": 9.999853034376572e-06, "loss": 0.6516, "step": 2709 }, { "epoch": 0.03240115257236457, "grad_norm": 1.7068496942520142, "learning_rate": 9.999851546133947e-06, "loss": 0.5468, "step": 2710 }, { "epoch": 0.03241310871722522, "grad_norm": 1.982132911682129, "learning_rate": 9.999850050394031e-06, "loss": 0.6223, "step": 2711 }, { "epoch": 0.03242506486208587, "grad_norm": 4.9598708152771, "learning_rate": 9.99984854715682e-06, "loss": 0.6571, "step": 2712 }, { "epoch": 0.03243702100694652, "grad_norm": 2.7311582565307617, "learning_rate": 9.999847036422319e-06, "loss": 0.6043, "step": 2713 }, { "epoch": 0.03244897715180717, "grad_norm": 1.940902590751648, "learning_rate": 9.999845518190527e-06, "loss": 0.5899, "step": 2714 }, { "epoch": 0.03246093329666782, "grad_norm": 5.634023189544678, "learning_rate": 9.999843992461452e-06, "loss": 0.7704, "step": 2715 }, { "epoch": 0.03247288944152847, "grad_norm": 1.9840623140335083, "learning_rate": 9.999842459235091e-06, "loss": 0.673, "step": 2716 }, { "epoch": 0.03248484558638912, "grad_norm": 2.1224892139434814, "learning_rate": 9.99984091851145e-06, "loss": 0.7768, "step": 2717 }, { "epoch": 0.032496801731249775, "grad_norm": 2.15083909034729, "learning_rate": 9.999839370290531e-06, "loss": 0.7227, "step": 2718 }, { "epoch": 0.032508757876110426, "grad_norm": 4.278746604919434, "learning_rate": 9.999837814572332e-06, "loss": 0.612, "step": 2719 }, { "epoch": 0.03252071402097108, "grad_norm": 8.04060173034668, "learning_rate": 9.99983625135686e-06, "loss": 0.6832, "step": 2720 }, { "epoch": 0.03253267016583173, "grad_norm": 2.3124349117279053, "learning_rate": 9.999834680644116e-06, "loss": 0.6932, "step": 2721 }, { "epoch": 0.03254462631069238, "grad_norm": 7.148108959197998, "learning_rate": 9.999833102434102e-06, "loss": 0.6784, "step": 2722 }, { "epoch": 0.03255658245555303, "grad_norm": 4.323217868804932, "learning_rate": 9.999831516726822e-06, "loss": 0.6856, "step": 2723 }, { "epoch": 0.032568538600413685, "grad_norm": 7.946097373962402, "learning_rate": 9.999829923522274e-06, "loss": 0.6631, "step": 2724 }, { "epoch": 0.03258049474527434, "grad_norm": 4.151355266571045, "learning_rate": 9.999828322820464e-06, "loss": 0.6181, "step": 2725 }, { "epoch": 0.03259245089013498, "grad_norm": 1.6853684186935425, "learning_rate": 9.999826714621394e-06, "loss": 0.6049, "step": 2726 }, { "epoch": 0.03260440703499563, "grad_norm": 1.8687469959259033, "learning_rate": 9.999825098925066e-06, "loss": 0.5844, "step": 2727 }, { "epoch": 0.032616363179856285, "grad_norm": 2.515462636947632, "learning_rate": 9.999823475731482e-06, "loss": 0.613, "step": 2728 }, { "epoch": 0.03262831932471694, "grad_norm": 2.34548282623291, "learning_rate": 9.999821845040647e-06, "loss": 0.624, "step": 2729 }, { "epoch": 0.03264027546957759, "grad_norm": 3.2850098609924316, "learning_rate": 9.99982020685256e-06, "loss": 0.5896, "step": 2730 }, { "epoch": 0.03265223161443824, "grad_norm": 3.394498825073242, "learning_rate": 9.999818561167225e-06, "loss": 0.6844, "step": 2731 }, { "epoch": 0.03266418775929889, "grad_norm": 1.8169364929199219, "learning_rate": 9.999816907984643e-06, "loss": 0.6041, "step": 2732 }, { "epoch": 0.032676143904159544, "grad_norm": 1.7787981033325195, "learning_rate": 9.99981524730482e-06, "loss": 0.5709, "step": 2733 }, { "epoch": 0.032688100049020195, "grad_norm": 4.290199279785156, "learning_rate": 9.999813579127755e-06, "loss": 0.6482, "step": 2734 }, { "epoch": 0.03270005619388085, "grad_norm": 3.281444787979126, "learning_rate": 9.999811903453452e-06, "loss": 0.6898, "step": 2735 }, { "epoch": 0.0327120123387415, "grad_norm": 4.342289924621582, "learning_rate": 9.999810220281912e-06, "loss": 0.6126, "step": 2736 }, { "epoch": 0.03272396848360215, "grad_norm": 3.3798463344573975, "learning_rate": 9.99980852961314e-06, "loss": 0.5857, "step": 2737 }, { "epoch": 0.032735924628462795, "grad_norm": 6.834131240844727, "learning_rate": 9.999806831447138e-06, "loss": 0.6353, "step": 2738 }, { "epoch": 0.03274788077332345, "grad_norm": 1.653406023979187, "learning_rate": 9.999805125783907e-06, "loss": 0.6294, "step": 2739 }, { "epoch": 0.0327598369181841, "grad_norm": 2.230257511138916, "learning_rate": 9.99980341262345e-06, "loss": 0.6192, "step": 2740 }, { "epoch": 0.03277179306304475, "grad_norm": 5.131825923919678, "learning_rate": 9.999801691965772e-06, "loss": 0.632, "step": 2741 }, { "epoch": 0.0327837492079054, "grad_norm": 3.78226375579834, "learning_rate": 9.999799963810872e-06, "loss": 0.7088, "step": 2742 }, { "epoch": 0.032795705352766054, "grad_norm": 4.447755813598633, "learning_rate": 9.999798228158755e-06, "loss": 0.4781, "step": 2743 }, { "epoch": 0.032807661497626706, "grad_norm": 4.552968502044678, "learning_rate": 9.99979648500942e-06, "loss": 0.6357, "step": 2744 }, { "epoch": 0.03281961764248736, "grad_norm": 1.85200834274292, "learning_rate": 9.999794734362875e-06, "loss": 0.716, "step": 2745 }, { "epoch": 0.03283157378734801, "grad_norm": 6.330460071563721, "learning_rate": 9.999792976219121e-06, "loss": 0.5801, "step": 2746 }, { "epoch": 0.03284352993220866, "grad_norm": 1.9104924201965332, "learning_rate": 9.999791210578158e-06, "loss": 0.561, "step": 2747 }, { "epoch": 0.03285548607706931, "grad_norm": 2.212266445159912, "learning_rate": 9.999789437439991e-06, "loss": 0.5936, "step": 2748 }, { "epoch": 0.032867442221929964, "grad_norm": 3.0306220054626465, "learning_rate": 9.99978765680462e-06, "loss": 0.5834, "step": 2749 }, { "epoch": 0.03287939836679061, "grad_norm": 2.3617043495178223, "learning_rate": 9.999785868672052e-06, "loss": 0.7151, "step": 2750 }, { "epoch": 0.03289135451165126, "grad_norm": 2.100024938583374, "learning_rate": 9.999784073042287e-06, "loss": 0.6474, "step": 2751 }, { "epoch": 0.03290331065651191, "grad_norm": 5.634548664093018, "learning_rate": 9.999782269915327e-06, "loss": 0.6605, "step": 2752 }, { "epoch": 0.032915266801372564, "grad_norm": 3.0894951820373535, "learning_rate": 9.999780459291177e-06, "loss": 0.6507, "step": 2753 }, { "epoch": 0.032927222946233216, "grad_norm": 3.5052030086517334, "learning_rate": 9.999778641169838e-06, "loss": 0.5986, "step": 2754 }, { "epoch": 0.03293917909109387, "grad_norm": 2.5783727169036865, "learning_rate": 9.999776815551312e-06, "loss": 0.5574, "step": 2755 }, { "epoch": 0.03295113523595452, "grad_norm": 4.313648223876953, "learning_rate": 9.999774982435603e-06, "loss": 0.5903, "step": 2756 }, { "epoch": 0.03296309138081517, "grad_norm": 4.1959547996521, "learning_rate": 9.999773141822716e-06, "loss": 0.5936, "step": 2757 }, { "epoch": 0.03297504752567582, "grad_norm": 2.4739577770233154, "learning_rate": 9.999771293712648e-06, "loss": 0.67, "step": 2758 }, { "epoch": 0.032987003670536474, "grad_norm": 2.4302256107330322, "learning_rate": 9.999769438105407e-06, "loss": 0.7155, "step": 2759 }, { "epoch": 0.032998959815397126, "grad_norm": 1.5461018085479736, "learning_rate": 9.999767575000993e-06, "loss": 0.658, "step": 2760 }, { "epoch": 0.03301091596025778, "grad_norm": 3.520951747894287, "learning_rate": 9.99976570439941e-06, "loss": 0.5626, "step": 2761 }, { "epoch": 0.03302287210511842, "grad_norm": 2.4729056358337402, "learning_rate": 9.999763826300663e-06, "loss": 0.6756, "step": 2762 }, { "epoch": 0.033034828249979074, "grad_norm": 1.9369823932647705, "learning_rate": 9.999761940704751e-06, "loss": 0.6025, "step": 2763 }, { "epoch": 0.033046784394839726, "grad_norm": 2.696443557739258, "learning_rate": 9.999760047611677e-06, "loss": 0.6473, "step": 2764 }, { "epoch": 0.03305874053970038, "grad_norm": 2.8941397666931152, "learning_rate": 9.999758147021445e-06, "loss": 0.6376, "step": 2765 }, { "epoch": 0.03307069668456103, "grad_norm": 34.785804748535156, "learning_rate": 9.999756238934059e-06, "loss": 0.7211, "step": 2766 }, { "epoch": 0.03308265282942168, "grad_norm": 2.991138219833374, "learning_rate": 9.999754323349521e-06, "loss": 0.6404, "step": 2767 }, { "epoch": 0.03309460897428233, "grad_norm": 3.563504695892334, "learning_rate": 9.999752400267832e-06, "loss": 0.6698, "step": 2768 }, { "epoch": 0.033106565119142985, "grad_norm": 3.7463595867156982, "learning_rate": 9.999750469688999e-06, "loss": 0.6185, "step": 2769 }, { "epoch": 0.033118521264003636, "grad_norm": 1.9778703451156616, "learning_rate": 9.99974853161302e-06, "loss": 0.6143, "step": 2770 }, { "epoch": 0.03313047740886429, "grad_norm": 4.295238494873047, "learning_rate": 9.999746586039902e-06, "loss": 0.5715, "step": 2771 }, { "epoch": 0.03314243355372494, "grad_norm": 2.4599006175994873, "learning_rate": 9.999744632969646e-06, "loss": 0.614, "step": 2772 }, { "epoch": 0.033154389698585585, "grad_norm": 2.3000409603118896, "learning_rate": 9.999742672402253e-06, "loss": 0.6483, "step": 2773 }, { "epoch": 0.033166345843446236, "grad_norm": 4.304224491119385, "learning_rate": 9.999740704337731e-06, "loss": 0.574, "step": 2774 }, { "epoch": 0.03317830198830689, "grad_norm": 2.4431679248809814, "learning_rate": 9.999738728776078e-06, "loss": 0.558, "step": 2775 }, { "epoch": 0.03319025813316754, "grad_norm": 2.192800760269165, "learning_rate": 9.9997367457173e-06, "loss": 0.6964, "step": 2776 }, { "epoch": 0.03320221427802819, "grad_norm": 2.542855978012085, "learning_rate": 9.999734755161399e-06, "loss": 0.6177, "step": 2777 }, { "epoch": 0.03321417042288884, "grad_norm": 2.5945770740509033, "learning_rate": 9.999732757108378e-06, "loss": 0.6709, "step": 2778 }, { "epoch": 0.033226126567749495, "grad_norm": 4.1578688621521, "learning_rate": 9.999730751558238e-06, "loss": 0.615, "step": 2779 }, { "epoch": 0.03323808271261015, "grad_norm": 4.155785083770752, "learning_rate": 9.999728738510988e-06, "loss": 0.6015, "step": 2780 }, { "epoch": 0.0332500388574708, "grad_norm": 3.088036298751831, "learning_rate": 9.999726717966623e-06, "loss": 0.5898, "step": 2781 }, { "epoch": 0.03326199500233145, "grad_norm": 4.969325542449951, "learning_rate": 9.999724689925152e-06, "loss": 0.5505, "step": 2782 }, { "epoch": 0.0332739511471921, "grad_norm": 4.022447109222412, "learning_rate": 9.999722654386576e-06, "loss": 0.6626, "step": 2783 }, { "epoch": 0.033285907292052754, "grad_norm": 3.5205886363983154, "learning_rate": 9.999720611350898e-06, "loss": 0.6846, "step": 2784 }, { "epoch": 0.0332978634369134, "grad_norm": 3.3655292987823486, "learning_rate": 9.999718560818122e-06, "loss": 0.579, "step": 2785 }, { "epoch": 0.03330981958177405, "grad_norm": 2.4645426273345947, "learning_rate": 9.999716502788248e-06, "loss": 0.6476, "step": 2786 }, { "epoch": 0.0333217757266347, "grad_norm": 2.7264444828033447, "learning_rate": 9.999714437261282e-06, "loss": 0.5984, "step": 2787 }, { "epoch": 0.03333373187149535, "grad_norm": 10.934104919433594, "learning_rate": 9.999712364237227e-06, "loss": 0.6496, "step": 2788 }, { "epoch": 0.033345688016356005, "grad_norm": 3.2413249015808105, "learning_rate": 9.999710283716087e-06, "loss": 0.6551, "step": 2789 }, { "epoch": 0.03335764416121666, "grad_norm": 3.467144727706909, "learning_rate": 9.99970819569786e-06, "loss": 0.5693, "step": 2790 }, { "epoch": 0.03336960030607731, "grad_norm": 3.8741726875305176, "learning_rate": 9.999706100182556e-06, "loss": 0.7448, "step": 2791 }, { "epoch": 0.03338155645093796, "grad_norm": 4.108693599700928, "learning_rate": 9.999703997170174e-06, "loss": 0.53, "step": 2792 }, { "epoch": 0.03339351259579861, "grad_norm": 2.275545835494995, "learning_rate": 9.999701886660717e-06, "loss": 0.7485, "step": 2793 }, { "epoch": 0.033405468740659264, "grad_norm": 10.123970985412598, "learning_rate": 9.999699768654193e-06, "loss": 0.6306, "step": 2794 }, { "epoch": 0.033417424885519915, "grad_norm": 3.300619602203369, "learning_rate": 9.999697643150596e-06, "loss": 0.6962, "step": 2795 }, { "epoch": 0.03342938103038057, "grad_norm": 2.7045400142669678, "learning_rate": 9.999695510149938e-06, "loss": 0.6539, "step": 2796 }, { "epoch": 0.03344133717524121, "grad_norm": 3.069793462753296, "learning_rate": 9.999693369652218e-06, "loss": 0.5989, "step": 2797 }, { "epoch": 0.033453293320101864, "grad_norm": 2.9733448028564453, "learning_rate": 9.99969122165744e-06, "loss": 0.6862, "step": 2798 }, { "epoch": 0.033465249464962515, "grad_norm": 4.68408727645874, "learning_rate": 9.999689066165608e-06, "loss": 0.6389, "step": 2799 }, { "epoch": 0.03347720560982317, "grad_norm": 2.2062478065490723, "learning_rate": 9.999686903176724e-06, "loss": 0.6794, "step": 2800 }, { "epoch": 0.03348916175468382, "grad_norm": 5.21112060546875, "learning_rate": 9.99968473269079e-06, "loss": 0.6301, "step": 2801 }, { "epoch": 0.03350111789954447, "grad_norm": 2.4104113578796387, "learning_rate": 9.999682554707814e-06, "loss": 0.6954, "step": 2802 }, { "epoch": 0.03351307404440512, "grad_norm": 2.3401575088500977, "learning_rate": 9.999680369227795e-06, "loss": 0.6859, "step": 2803 }, { "epoch": 0.033525030189265774, "grad_norm": 1.9425541162490845, "learning_rate": 9.999678176250736e-06, "loss": 0.5424, "step": 2804 }, { "epoch": 0.033536986334126426, "grad_norm": 1.7275567054748535, "learning_rate": 9.999675975776644e-06, "loss": 0.6407, "step": 2805 }, { "epoch": 0.03354894247898708, "grad_norm": 1.9962735176086426, "learning_rate": 9.999673767805519e-06, "loss": 0.6578, "step": 2806 }, { "epoch": 0.03356089862384773, "grad_norm": 6.225343704223633, "learning_rate": 9.999671552337365e-06, "loss": 0.7029, "step": 2807 }, { "epoch": 0.03357285476870838, "grad_norm": 10.377470970153809, "learning_rate": 9.999669329372186e-06, "loss": 0.6313, "step": 2808 }, { "epoch": 0.033584810913569026, "grad_norm": 2.046767473220825, "learning_rate": 9.999667098909987e-06, "loss": 0.7014, "step": 2809 }, { "epoch": 0.03359676705842968, "grad_norm": 2.512214183807373, "learning_rate": 9.999664860950766e-06, "loss": 0.7051, "step": 2810 }, { "epoch": 0.03360872320329033, "grad_norm": 5.707371711730957, "learning_rate": 9.999662615494532e-06, "loss": 0.6428, "step": 2811 }, { "epoch": 0.03362067934815098, "grad_norm": 2.484419822692871, "learning_rate": 9.999660362541286e-06, "loss": 0.6493, "step": 2812 }, { "epoch": 0.03363263549301163, "grad_norm": 2.1308095455169678, "learning_rate": 9.999658102091031e-06, "loss": 0.6562, "step": 2813 }, { "epoch": 0.033644591637872284, "grad_norm": 2.141496181488037, "learning_rate": 9.999655834143771e-06, "loss": 0.6094, "step": 2814 }, { "epoch": 0.033656547782732936, "grad_norm": 2.092095136642456, "learning_rate": 9.99965355869951e-06, "loss": 0.5727, "step": 2815 }, { "epoch": 0.03366850392759359, "grad_norm": 6.173760414123535, "learning_rate": 9.99965127575825e-06, "loss": 0.6118, "step": 2816 }, { "epoch": 0.03368046007245424, "grad_norm": 4.408583641052246, "learning_rate": 9.999648985319995e-06, "loss": 0.6321, "step": 2817 }, { "epoch": 0.03369241621731489, "grad_norm": 2.7519302368164062, "learning_rate": 9.999646687384749e-06, "loss": 0.6488, "step": 2818 }, { "epoch": 0.03370437236217554, "grad_norm": 4.10220193862915, "learning_rate": 9.999644381952515e-06, "loss": 0.6358, "step": 2819 }, { "epoch": 0.033716328507036195, "grad_norm": 4.262170314788818, "learning_rate": 9.999642069023297e-06, "loss": 0.6679, "step": 2820 }, { "epoch": 0.03372828465189684, "grad_norm": 10.419776916503906, "learning_rate": 9.999639748597097e-06, "loss": 0.6173, "step": 2821 }, { "epoch": 0.03374024079675749, "grad_norm": 4.371108531951904, "learning_rate": 9.99963742067392e-06, "loss": 0.6108, "step": 2822 }, { "epoch": 0.03375219694161814, "grad_norm": 1.7569501399993896, "learning_rate": 9.999635085253767e-06, "loss": 0.5487, "step": 2823 }, { "epoch": 0.033764153086478794, "grad_norm": 7.983743667602539, "learning_rate": 9.999632742336645e-06, "loss": 0.6721, "step": 2824 }, { "epoch": 0.033776109231339446, "grad_norm": 13.28775691986084, "learning_rate": 9.999630391922557e-06, "loss": 0.7178, "step": 2825 }, { "epoch": 0.0337880653762001, "grad_norm": 2.132967948913574, "learning_rate": 9.999628034011503e-06, "loss": 0.5879, "step": 2826 }, { "epoch": 0.03380002152106075, "grad_norm": 2.213554620742798, "learning_rate": 9.999625668603492e-06, "loss": 0.6334, "step": 2827 }, { "epoch": 0.0338119776659214, "grad_norm": 2.1290483474731445, "learning_rate": 9.999623295698522e-06, "loss": 0.6233, "step": 2828 }, { "epoch": 0.03382393381078205, "grad_norm": 5.246593952178955, "learning_rate": 9.999620915296601e-06, "loss": 0.8251, "step": 2829 }, { "epoch": 0.033835889955642705, "grad_norm": 2.0594167709350586, "learning_rate": 9.99961852739773e-06, "loss": 0.747, "step": 2830 }, { "epoch": 0.033847846100503357, "grad_norm": 2.3606083393096924, "learning_rate": 9.999616132001913e-06, "loss": 0.6344, "step": 2831 }, { "epoch": 0.03385980224536401, "grad_norm": 6.030673980712891, "learning_rate": 9.999613729109153e-06, "loss": 0.6524, "step": 2832 }, { "epoch": 0.03387175839022465, "grad_norm": 1.9850668907165527, "learning_rate": 9.999611318719455e-06, "loss": 0.5052, "step": 2833 }, { "epoch": 0.033883714535085305, "grad_norm": 1.6368955373764038, "learning_rate": 9.999608900832823e-06, "loss": 0.6701, "step": 2834 }, { "epoch": 0.033895670679945956, "grad_norm": 2.785374879837036, "learning_rate": 9.99960647544926e-06, "loss": 0.6068, "step": 2835 }, { "epoch": 0.03390762682480661, "grad_norm": 1.8793070316314697, "learning_rate": 9.999604042568767e-06, "loss": 0.545, "step": 2836 }, { "epoch": 0.03391958296966726, "grad_norm": 2.2773585319519043, "learning_rate": 9.999601602191351e-06, "loss": 0.6493, "step": 2837 }, { "epoch": 0.03393153911452791, "grad_norm": 9.851921081542969, "learning_rate": 9.999599154317015e-06, "loss": 0.6878, "step": 2838 }, { "epoch": 0.03394349525938856, "grad_norm": 1.5989705324172974, "learning_rate": 9.999596698945763e-06, "loss": 0.5399, "step": 2839 }, { "epoch": 0.033955451404249215, "grad_norm": 2.2291603088378906, "learning_rate": 9.999594236077596e-06, "loss": 0.6371, "step": 2840 }, { "epoch": 0.03396740754910987, "grad_norm": 4.7102861404418945, "learning_rate": 9.999591765712521e-06, "loss": 0.6087, "step": 2841 }, { "epoch": 0.03397936369397052, "grad_norm": 2.370140314102173, "learning_rate": 9.999589287850542e-06, "loss": 0.6848, "step": 2842 }, { "epoch": 0.03399131983883117, "grad_norm": 8.185128211975098, "learning_rate": 9.999586802491659e-06, "loss": 0.6872, "step": 2843 }, { "epoch": 0.034003275983691815, "grad_norm": 1.8931453227996826, "learning_rate": 9.999584309635877e-06, "loss": 0.7026, "step": 2844 }, { "epoch": 0.03401523212855247, "grad_norm": 2.3976519107818604, "learning_rate": 9.999581809283202e-06, "loss": 0.6541, "step": 2845 }, { "epoch": 0.03402718827341312, "grad_norm": 3.2534873485565186, "learning_rate": 9.999579301433636e-06, "loss": 0.5946, "step": 2846 }, { "epoch": 0.03403914441827377, "grad_norm": 4.870706558227539, "learning_rate": 9.999576786087184e-06, "loss": 0.619, "step": 2847 }, { "epoch": 0.03405110056313442, "grad_norm": 22.692989349365234, "learning_rate": 9.999574263243848e-06, "loss": 0.6094, "step": 2848 }, { "epoch": 0.034063056707995074, "grad_norm": 2.7853572368621826, "learning_rate": 9.999571732903632e-06, "loss": 0.4889, "step": 2849 }, { "epoch": 0.034075012852855725, "grad_norm": 2.0512008666992188, "learning_rate": 9.999569195066541e-06, "loss": 0.5746, "step": 2850 }, { "epoch": 0.03408696899771638, "grad_norm": 2.3082070350646973, "learning_rate": 9.999566649732577e-06, "loss": 0.5795, "step": 2851 }, { "epoch": 0.03409892514257703, "grad_norm": 3.0369808673858643, "learning_rate": 9.999564096901746e-06, "loss": 0.6882, "step": 2852 }, { "epoch": 0.03411088128743768, "grad_norm": 7.741317272186279, "learning_rate": 9.999561536574052e-06, "loss": 0.7248, "step": 2853 }, { "epoch": 0.03412283743229833, "grad_norm": 2.426387071609497, "learning_rate": 9.999558968749496e-06, "loss": 0.6479, "step": 2854 }, { "epoch": 0.034134793577158984, "grad_norm": 2.602217197418213, "learning_rate": 9.999556393428084e-06, "loss": 0.7317, "step": 2855 }, { "epoch": 0.03414674972201963, "grad_norm": 3.6315159797668457, "learning_rate": 9.99955381060982e-06, "loss": 0.6589, "step": 2856 }, { "epoch": 0.03415870586688028, "grad_norm": 4.0704193115234375, "learning_rate": 9.999551220294707e-06, "loss": 0.5722, "step": 2857 }, { "epoch": 0.03417066201174093, "grad_norm": 3.1587905883789062, "learning_rate": 9.99954862248275e-06, "loss": 0.6328, "step": 2858 }, { "epoch": 0.034182618156601584, "grad_norm": 2.586371660232544, "learning_rate": 9.999546017173951e-06, "loss": 0.6036, "step": 2859 }, { "epoch": 0.034194574301462236, "grad_norm": 2.2583582401275635, "learning_rate": 9.999543404368315e-06, "loss": 0.7462, "step": 2860 }, { "epoch": 0.03420653044632289, "grad_norm": 2.2699403762817383, "learning_rate": 9.999540784065847e-06, "loss": 0.6511, "step": 2861 }, { "epoch": 0.03421848659118354, "grad_norm": 3.0110626220703125, "learning_rate": 9.999538156266549e-06, "loss": 0.6989, "step": 2862 }, { "epoch": 0.03423044273604419, "grad_norm": 2.4731335639953613, "learning_rate": 9.999535520970425e-06, "loss": 0.7133, "step": 2863 }, { "epoch": 0.03424239888090484, "grad_norm": 3.404634475708008, "learning_rate": 9.999532878177482e-06, "loss": 0.6599, "step": 2864 }, { "epoch": 0.034254355025765494, "grad_norm": 2.023665189743042, "learning_rate": 9.99953022788772e-06, "loss": 0.604, "step": 2865 }, { "epoch": 0.034266311170626146, "grad_norm": 5.740788459777832, "learning_rate": 9.999527570101145e-06, "loss": 0.6523, "step": 2866 }, { "epoch": 0.0342782673154868, "grad_norm": 14.818975448608398, "learning_rate": 9.999524904817759e-06, "loss": 0.5654, "step": 2867 }, { "epoch": 0.03429022346034744, "grad_norm": 2.310572624206543, "learning_rate": 9.99952223203757e-06, "loss": 0.6625, "step": 2868 }, { "epoch": 0.034302179605208094, "grad_norm": 7.078763008117676, "learning_rate": 9.999519551760579e-06, "loss": 0.582, "step": 2869 }, { "epoch": 0.034314135750068746, "grad_norm": 4.149036407470703, "learning_rate": 9.99951686398679e-06, "loss": 0.6831, "step": 2870 }, { "epoch": 0.0343260918949294, "grad_norm": 2.505054473876953, "learning_rate": 9.999514168716208e-06, "loss": 0.6219, "step": 2871 }, { "epoch": 0.03433804803979005, "grad_norm": 2.509923219680786, "learning_rate": 9.999511465948836e-06, "loss": 0.6912, "step": 2872 }, { "epoch": 0.0343500041846507, "grad_norm": 1.6757783889770508, "learning_rate": 9.999508755684679e-06, "loss": 0.5433, "step": 2873 }, { "epoch": 0.03436196032951135, "grad_norm": 2.3895456790924072, "learning_rate": 9.999506037923743e-06, "loss": 0.6477, "step": 2874 }, { "epoch": 0.034373916474372004, "grad_norm": 13.698076248168945, "learning_rate": 9.999503312666029e-06, "loss": 0.7172, "step": 2875 }, { "epoch": 0.034385872619232656, "grad_norm": 2.1941514015197754, "learning_rate": 9.99950057991154e-06, "loss": 0.619, "step": 2876 }, { "epoch": 0.03439782876409331, "grad_norm": 10.23376178741455, "learning_rate": 9.999497839660284e-06, "loss": 0.634, "step": 2877 }, { "epoch": 0.03440978490895396, "grad_norm": 3.7901957035064697, "learning_rate": 9.999495091912263e-06, "loss": 0.6969, "step": 2878 }, { "epoch": 0.03442174105381461, "grad_norm": 2.6814537048339844, "learning_rate": 9.99949233666748e-06, "loss": 0.7131, "step": 2879 }, { "epoch": 0.034433697198675256, "grad_norm": 7.405774116516113, "learning_rate": 9.999489573925942e-06, "loss": 0.6371, "step": 2880 }, { "epoch": 0.03444565334353591, "grad_norm": 2.476966142654419, "learning_rate": 9.99948680368765e-06, "loss": 0.5864, "step": 2881 }, { "epoch": 0.03445760948839656, "grad_norm": 4.381944179534912, "learning_rate": 9.999484025952611e-06, "loss": 0.6795, "step": 2882 }, { "epoch": 0.03446956563325721, "grad_norm": 6.803243160247803, "learning_rate": 9.999481240720829e-06, "loss": 0.6585, "step": 2883 }, { "epoch": 0.03448152177811786, "grad_norm": 2.282975912094116, "learning_rate": 9.999478447992305e-06, "loss": 0.6033, "step": 2884 }, { "epoch": 0.034493477922978515, "grad_norm": 2.045793056488037, "learning_rate": 9.999475647767047e-06, "loss": 0.6264, "step": 2885 }, { "epoch": 0.034505434067839166, "grad_norm": 2.1954376697540283, "learning_rate": 9.999472840045057e-06, "loss": 0.6805, "step": 2886 }, { "epoch": 0.03451739021269982, "grad_norm": 2.593656063079834, "learning_rate": 9.999470024826338e-06, "loss": 0.6583, "step": 2887 }, { "epoch": 0.03452934635756047, "grad_norm": 2.147023916244507, "learning_rate": 9.999467202110899e-06, "loss": 0.6886, "step": 2888 }, { "epoch": 0.03454130250242112, "grad_norm": 5.101837158203125, "learning_rate": 9.99946437189874e-06, "loss": 0.6379, "step": 2889 }, { "epoch": 0.03455325864728177, "grad_norm": 4.631126880645752, "learning_rate": 9.999461534189864e-06, "loss": 0.6681, "step": 2890 }, { "epoch": 0.034565214792142425, "grad_norm": 2.4419846534729004, "learning_rate": 9.99945868898428e-06, "loss": 0.6571, "step": 2891 }, { "epoch": 0.03457717093700307, "grad_norm": 3.092172622680664, "learning_rate": 9.99945583628199e-06, "loss": 0.6012, "step": 2892 }, { "epoch": 0.03458912708186372, "grad_norm": 2.878908157348633, "learning_rate": 9.999452976082998e-06, "loss": 0.6596, "step": 2893 }, { "epoch": 0.03460108322672437, "grad_norm": 11.869346618652344, "learning_rate": 9.999450108387308e-06, "loss": 0.6543, "step": 2894 }, { "epoch": 0.034613039371585025, "grad_norm": 1.9502217769622803, "learning_rate": 9.999447233194924e-06, "loss": 0.6935, "step": 2895 }, { "epoch": 0.03462499551644568, "grad_norm": 4.272416114807129, "learning_rate": 9.999444350505852e-06, "loss": 0.6045, "step": 2896 }, { "epoch": 0.03463695166130633, "grad_norm": 2.1907811164855957, "learning_rate": 9.999441460320096e-06, "loss": 0.6036, "step": 2897 }, { "epoch": 0.03464890780616698, "grad_norm": 2.435926675796509, "learning_rate": 9.99943856263766e-06, "loss": 0.5593, "step": 2898 }, { "epoch": 0.03466086395102763, "grad_norm": 2.379211187362671, "learning_rate": 9.999435657458548e-06, "loss": 0.6404, "step": 2899 }, { "epoch": 0.03467282009588828, "grad_norm": 2.4022953510284424, "learning_rate": 9.999432744782762e-06, "loss": 0.6252, "step": 2900 }, { "epoch": 0.034684776240748935, "grad_norm": 2.288912057876587, "learning_rate": 9.999429824610312e-06, "loss": 0.5642, "step": 2901 }, { "epoch": 0.03469673238560959, "grad_norm": 5.882928371429443, "learning_rate": 9.999426896941198e-06, "loss": 0.6519, "step": 2902 }, { "epoch": 0.03470868853047024, "grad_norm": 3.0813443660736084, "learning_rate": 9.999423961775426e-06, "loss": 0.644, "step": 2903 }, { "epoch": 0.03472064467533088, "grad_norm": 2.2758665084838867, "learning_rate": 9.999421019112999e-06, "loss": 0.6649, "step": 2904 }, { "epoch": 0.034732600820191535, "grad_norm": 3.242558717727661, "learning_rate": 9.99941806895392e-06, "loss": 0.6291, "step": 2905 }, { "epoch": 0.03474455696505219, "grad_norm": 2.771169424057007, "learning_rate": 9.9994151112982e-06, "loss": 0.6236, "step": 2906 }, { "epoch": 0.03475651310991284, "grad_norm": 19.01926040649414, "learning_rate": 9.999412146145839e-06, "loss": 0.5771, "step": 2907 }, { "epoch": 0.03476846925477349, "grad_norm": 4.678402900695801, "learning_rate": 9.99940917349684e-06, "loss": 0.6803, "step": 2908 }, { "epoch": 0.03478042539963414, "grad_norm": 2.8216781616210938, "learning_rate": 9.99940619335121e-06, "loss": 0.6267, "step": 2909 }, { "epoch": 0.034792381544494794, "grad_norm": 3.13545560836792, "learning_rate": 9.999403205708952e-06, "loss": 0.6797, "step": 2910 }, { "epoch": 0.034804337689355445, "grad_norm": 1.791561484336853, "learning_rate": 9.999400210570071e-06, "loss": 0.6261, "step": 2911 }, { "epoch": 0.0348162938342161, "grad_norm": 40.832191467285156, "learning_rate": 9.999397207934574e-06, "loss": 0.696, "step": 2912 }, { "epoch": 0.03482824997907675, "grad_norm": 2.2678980827331543, "learning_rate": 9.99939419780246e-06, "loss": 0.4794, "step": 2913 }, { "epoch": 0.0348402061239374, "grad_norm": 2.2148289680480957, "learning_rate": 9.999391180173738e-06, "loss": 0.577, "step": 2914 }, { "epoch": 0.034852162268798045, "grad_norm": 2.8435254096984863, "learning_rate": 9.999388155048414e-06, "loss": 0.7567, "step": 2915 }, { "epoch": 0.0348641184136587, "grad_norm": 2.843217372894287, "learning_rate": 9.999385122426485e-06, "loss": 0.5623, "step": 2916 }, { "epoch": 0.03487607455851935, "grad_norm": 2.0737576484680176, "learning_rate": 9.999382082307962e-06, "loss": 0.5874, "step": 2917 }, { "epoch": 0.03488803070338, "grad_norm": 3.533231735229492, "learning_rate": 9.999379034692847e-06, "loss": 0.7164, "step": 2918 }, { "epoch": 0.03489998684824065, "grad_norm": 3.354889392852783, "learning_rate": 9.999375979581148e-06, "loss": 0.5446, "step": 2919 }, { "epoch": 0.034911942993101304, "grad_norm": 2.451727867126465, "learning_rate": 9.999372916972866e-06, "loss": 0.7146, "step": 2920 }, { "epoch": 0.034923899137961956, "grad_norm": 9.82259750366211, "learning_rate": 9.999369846868003e-06, "loss": 0.7245, "step": 2921 }, { "epoch": 0.03493585528282261, "grad_norm": 166.68359375, "learning_rate": 9.999366769266571e-06, "loss": 0.5885, "step": 2922 }, { "epoch": 0.03494781142768326, "grad_norm": 2.8587372303009033, "learning_rate": 9.99936368416857e-06, "loss": 0.6463, "step": 2923 }, { "epoch": 0.03495976757254391, "grad_norm": 1.9701673984527588, "learning_rate": 9.999360591574004e-06, "loss": 0.5563, "step": 2924 }, { "epoch": 0.03497172371740456, "grad_norm": 2.471956253051758, "learning_rate": 9.99935749148288e-06, "loss": 0.5571, "step": 2925 }, { "epoch": 0.034983679862265214, "grad_norm": 2.5457842350006104, "learning_rate": 9.999354383895202e-06, "loss": 0.6431, "step": 2926 }, { "epoch": 0.03499563600712586, "grad_norm": 1.641899585723877, "learning_rate": 9.999351268810975e-06, "loss": 0.5904, "step": 2927 }, { "epoch": 0.03500759215198651, "grad_norm": 4.4447550773620605, "learning_rate": 9.999348146230202e-06, "loss": 0.6279, "step": 2928 }, { "epoch": 0.03501954829684716, "grad_norm": 1.9947870969772339, "learning_rate": 9.999345016152889e-06, "loss": 0.6531, "step": 2929 }, { "epoch": 0.035031504441707814, "grad_norm": 2.7093074321746826, "learning_rate": 9.999341878579039e-06, "loss": 0.6082, "step": 2930 }, { "epoch": 0.035043460586568466, "grad_norm": 2.419909715652466, "learning_rate": 9.999338733508658e-06, "loss": 0.6307, "step": 2931 }, { "epoch": 0.03505541673142912, "grad_norm": 5.115073204040527, "learning_rate": 9.999335580941753e-06, "loss": 0.6357, "step": 2932 }, { "epoch": 0.03506737287628977, "grad_norm": 2.149034261703491, "learning_rate": 9.999332420878325e-06, "loss": 0.6165, "step": 2933 }, { "epoch": 0.03507932902115042, "grad_norm": 3.9641408920288086, "learning_rate": 9.999329253318381e-06, "loss": 0.5763, "step": 2934 }, { "epoch": 0.03509128516601107, "grad_norm": 3.068549871444702, "learning_rate": 9.999326078261926e-06, "loss": 0.66, "step": 2935 }, { "epoch": 0.035103241310871725, "grad_norm": 4.284188747406006, "learning_rate": 9.999322895708962e-06, "loss": 0.5485, "step": 2936 }, { "epoch": 0.035115197455732376, "grad_norm": 1.8107022047042847, "learning_rate": 9.999319705659496e-06, "loss": 0.6591, "step": 2937 }, { "epoch": 0.03512715360059303, "grad_norm": 6.055198669433594, "learning_rate": 9.999316508113533e-06, "loss": 0.6821, "step": 2938 }, { "epoch": 0.03513910974545367, "grad_norm": 4.419515132904053, "learning_rate": 9.999313303071077e-06, "loss": 0.583, "step": 2939 }, { "epoch": 0.035151065890314324, "grad_norm": 2.063359498977661, "learning_rate": 9.999310090532132e-06, "loss": 0.6327, "step": 2940 }, { "epoch": 0.035163022035174976, "grad_norm": 1.9833447933197021, "learning_rate": 9.999306870496703e-06, "loss": 0.5983, "step": 2941 }, { "epoch": 0.03517497818003563, "grad_norm": 5.446516036987305, "learning_rate": 9.999303642964796e-06, "loss": 0.6271, "step": 2942 }, { "epoch": 0.03518693432489628, "grad_norm": 2.069230794906616, "learning_rate": 9.999300407936418e-06, "loss": 0.7205, "step": 2943 }, { "epoch": 0.03519889046975693, "grad_norm": 3.1895880699157715, "learning_rate": 9.99929716541157e-06, "loss": 0.5816, "step": 2944 }, { "epoch": 0.03521084661461758, "grad_norm": 2.8232109546661377, "learning_rate": 9.999293915390258e-06, "loss": 0.5241, "step": 2945 }, { "epoch": 0.035222802759478235, "grad_norm": 2.994615316390991, "learning_rate": 9.999290657872486e-06, "loss": 0.668, "step": 2946 }, { "epoch": 0.035234758904338886, "grad_norm": 2.119180679321289, "learning_rate": 9.99928739285826e-06, "loss": 0.6177, "step": 2947 }, { "epoch": 0.03524671504919954, "grad_norm": 2.466768503189087, "learning_rate": 9.999284120347585e-06, "loss": 0.683, "step": 2948 }, { "epoch": 0.03525867119406019, "grad_norm": 2.4367778301239014, "learning_rate": 9.999280840340467e-06, "loss": 0.6702, "step": 2949 }, { "epoch": 0.03527062733892084, "grad_norm": 3.8022375106811523, "learning_rate": 9.999277552836908e-06, "loss": 0.5618, "step": 2950 }, { "epoch": 0.035282583483781486, "grad_norm": 2.5073771476745605, "learning_rate": 9.999274257836915e-06, "loss": 0.614, "step": 2951 }, { "epoch": 0.03529453962864214, "grad_norm": 2.277442693710327, "learning_rate": 9.999270955340493e-06, "loss": 0.648, "step": 2952 }, { "epoch": 0.03530649577350279, "grad_norm": 7.208672523498535, "learning_rate": 9.999267645347646e-06, "loss": 0.6357, "step": 2953 }, { "epoch": 0.03531845191836344, "grad_norm": 2.8061764240264893, "learning_rate": 9.99926432785838e-06, "loss": 0.6725, "step": 2954 }, { "epoch": 0.03533040806322409, "grad_norm": 3.2003378868103027, "learning_rate": 9.999261002872698e-06, "loss": 0.6448, "step": 2955 }, { "epoch": 0.035342364208084745, "grad_norm": 2.2591397762298584, "learning_rate": 9.999257670390608e-06, "loss": 0.5474, "step": 2956 }, { "epoch": 0.0353543203529454, "grad_norm": 3.3136138916015625, "learning_rate": 9.999254330412111e-06, "loss": 0.4799, "step": 2957 }, { "epoch": 0.03536627649780605, "grad_norm": 3.2868494987487793, "learning_rate": 9.999250982937216e-06, "loss": 0.6627, "step": 2958 }, { "epoch": 0.0353782326426667, "grad_norm": 2.051358461380005, "learning_rate": 9.999247627965927e-06, "loss": 0.6877, "step": 2959 }, { "epoch": 0.03539018878752735, "grad_norm": 6.416441440582275, "learning_rate": 9.999244265498248e-06, "loss": 0.6817, "step": 2960 }, { "epoch": 0.035402144932388004, "grad_norm": 2.656139850616455, "learning_rate": 9.999240895534183e-06, "loss": 0.6502, "step": 2961 }, { "epoch": 0.035414101077248655, "grad_norm": 1.782944679260254, "learning_rate": 9.999237518073742e-06, "loss": 0.5696, "step": 2962 }, { "epoch": 0.0354260572221093, "grad_norm": 3.0498507022857666, "learning_rate": 9.999234133116924e-06, "loss": 0.5581, "step": 2963 }, { "epoch": 0.03543801336696995, "grad_norm": 4.151432514190674, "learning_rate": 9.999230740663737e-06, "loss": 0.6299, "step": 2964 }, { "epoch": 0.035449969511830604, "grad_norm": 9.825019836425781, "learning_rate": 9.999227340714185e-06, "loss": 0.5353, "step": 2965 }, { "epoch": 0.035461925656691255, "grad_norm": 3.706711530685425, "learning_rate": 9.999223933268274e-06, "loss": 0.5648, "step": 2966 }, { "epoch": 0.03547388180155191, "grad_norm": 2.795560359954834, "learning_rate": 9.999220518326009e-06, "loss": 0.6506, "step": 2967 }, { "epoch": 0.03548583794641256, "grad_norm": 2.511411190032959, "learning_rate": 9.999217095887396e-06, "loss": 0.631, "step": 2968 }, { "epoch": 0.03549779409127321, "grad_norm": 2.475759983062744, "learning_rate": 9.99921366595244e-06, "loss": 0.6117, "step": 2969 }, { "epoch": 0.03550975023613386, "grad_norm": 2.2975242137908936, "learning_rate": 9.999210228521144e-06, "loss": 0.6181, "step": 2970 }, { "epoch": 0.035521706380994514, "grad_norm": 4.4741291999816895, "learning_rate": 9.999206783593514e-06, "loss": 0.6473, "step": 2971 }, { "epoch": 0.035533662525855166, "grad_norm": 2.771059036254883, "learning_rate": 9.999203331169556e-06, "loss": 0.6233, "step": 2972 }, { "epoch": 0.03554561867071582, "grad_norm": 3.0306620597839355, "learning_rate": 9.999199871249275e-06, "loss": 0.6717, "step": 2973 }, { "epoch": 0.03555757481557647, "grad_norm": 3.170233726501465, "learning_rate": 9.999196403832675e-06, "loss": 0.7137, "step": 2974 }, { "epoch": 0.035569530960437114, "grad_norm": 2.5243594646453857, "learning_rate": 9.999192928919763e-06, "loss": 0.5737, "step": 2975 }, { "epoch": 0.035581487105297765, "grad_norm": 2.6740386486053467, "learning_rate": 9.999189446510544e-06, "loss": 0.6422, "step": 2976 }, { "epoch": 0.03559344325015842, "grad_norm": 3.922355890274048, "learning_rate": 9.999185956605022e-06, "loss": 0.666, "step": 2977 }, { "epoch": 0.03560539939501907, "grad_norm": 2.8741867542266846, "learning_rate": 9.999182459203203e-06, "loss": 0.6473, "step": 2978 }, { "epoch": 0.03561735553987972, "grad_norm": 2.461975574493408, "learning_rate": 9.999178954305093e-06, "loss": 0.6576, "step": 2979 }, { "epoch": 0.03562931168474037, "grad_norm": 2.7321832180023193, "learning_rate": 9.999175441910696e-06, "loss": 0.6576, "step": 2980 }, { "epoch": 0.035641267829601024, "grad_norm": 2.4454264640808105, "learning_rate": 9.999171922020017e-06, "loss": 0.7351, "step": 2981 }, { "epoch": 0.035653223974461676, "grad_norm": 4.3654279708862305, "learning_rate": 9.999168394633063e-06, "loss": 0.6151, "step": 2982 }, { "epoch": 0.03566518011932233, "grad_norm": 3.7859103679656982, "learning_rate": 9.999164859749836e-06, "loss": 0.5999, "step": 2983 }, { "epoch": 0.03567713626418298, "grad_norm": 2.6482460498809814, "learning_rate": 9.999161317370346e-06, "loss": 0.6993, "step": 2984 }, { "epoch": 0.03568909240904363, "grad_norm": 3.77563214302063, "learning_rate": 9.999157767494595e-06, "loss": 0.7618, "step": 2985 }, { "epoch": 0.035701048553904276, "grad_norm": 3.340675115585327, "learning_rate": 9.999154210122589e-06, "loss": 0.5998, "step": 2986 }, { "epoch": 0.03571300469876493, "grad_norm": 2.663008689880371, "learning_rate": 9.999150645254335e-06, "loss": 0.5962, "step": 2987 }, { "epoch": 0.03572496084362558, "grad_norm": 3.2886109352111816, "learning_rate": 9.999147072889834e-06, "loss": 0.6966, "step": 2988 }, { "epoch": 0.03573691698848623, "grad_norm": 2.0764670372009277, "learning_rate": 9.999143493029095e-06, "loss": 0.73, "step": 2989 }, { "epoch": 0.03574887313334688, "grad_norm": 1.885423183441162, "learning_rate": 9.999139905672124e-06, "loss": 0.5414, "step": 2990 }, { "epoch": 0.035760829278207534, "grad_norm": 3.621326446533203, "learning_rate": 9.999136310818924e-06, "loss": 0.6242, "step": 2991 }, { "epoch": 0.035772785423068186, "grad_norm": 1.4798086881637573, "learning_rate": 9.9991327084695e-06, "loss": 0.6168, "step": 2992 }, { "epoch": 0.03578474156792884, "grad_norm": 2.144620656967163, "learning_rate": 9.999129098623861e-06, "loss": 0.5855, "step": 2993 }, { "epoch": 0.03579669771278949, "grad_norm": 3.529639959335327, "learning_rate": 9.999125481282009e-06, "loss": 0.6501, "step": 2994 }, { "epoch": 0.03580865385765014, "grad_norm": 5.600011348724365, "learning_rate": 9.999121856443952e-06, "loss": 0.6858, "step": 2995 }, { "epoch": 0.03582061000251079, "grad_norm": 3.923041582107544, "learning_rate": 9.999118224109691e-06, "loss": 0.5689, "step": 2996 }, { "epoch": 0.035832566147371445, "grad_norm": 8.046813011169434, "learning_rate": 9.999114584279236e-06, "loss": 0.6569, "step": 2997 }, { "epoch": 0.03584452229223209, "grad_norm": 2.307856798171997, "learning_rate": 9.999110936952592e-06, "loss": 0.6051, "step": 2998 }, { "epoch": 0.03585647843709274, "grad_norm": 2.2522764205932617, "learning_rate": 9.999107282129763e-06, "loss": 0.6492, "step": 2999 }, { "epoch": 0.03586843458195339, "grad_norm": 2.476637125015259, "learning_rate": 9.999103619810756e-06, "loss": 0.5598, "step": 3000 }, { "epoch": 0.035880390726814045, "grad_norm": 2.269641637802124, "learning_rate": 9.999099949995571e-06, "loss": 0.7121, "step": 3001 }, { "epoch": 0.035892346871674696, "grad_norm": 2.1452910900115967, "learning_rate": 9.999096272684222e-06, "loss": 0.6888, "step": 3002 }, { "epoch": 0.03590430301653535, "grad_norm": 2.5443496704101562, "learning_rate": 9.999092587876709e-06, "loss": 0.6831, "step": 3003 }, { "epoch": 0.035916259161396, "grad_norm": 1.7778065204620361, "learning_rate": 9.999088895573039e-06, "loss": 0.5934, "step": 3004 }, { "epoch": 0.03592821530625665, "grad_norm": 4.783661842346191, "learning_rate": 9.999085195773217e-06, "loss": 0.668, "step": 3005 }, { "epoch": 0.0359401714511173, "grad_norm": 2.648796558380127, "learning_rate": 9.99908148847725e-06, "loss": 0.4922, "step": 3006 }, { "epoch": 0.035952127595977955, "grad_norm": 1.6432623863220215, "learning_rate": 9.999077773685142e-06, "loss": 0.6373, "step": 3007 }, { "epoch": 0.03596408374083861, "grad_norm": 7.845060348510742, "learning_rate": 9.999074051396898e-06, "loss": 0.6889, "step": 3008 }, { "epoch": 0.03597603988569926, "grad_norm": 12.688963890075684, "learning_rate": 9.999070321612527e-06, "loss": 0.6337, "step": 3009 }, { "epoch": 0.0359879960305599, "grad_norm": 1.6567596197128296, "learning_rate": 9.99906658433203e-06, "loss": 0.5793, "step": 3010 }, { "epoch": 0.035999952175420555, "grad_norm": 1.965720772743225, "learning_rate": 9.999062839555415e-06, "loss": 0.5023, "step": 3011 }, { "epoch": 0.036011908320281207, "grad_norm": 3.016496181488037, "learning_rate": 9.99905908728269e-06, "loss": 0.6469, "step": 3012 }, { "epoch": 0.03602386446514186, "grad_norm": 2.278942108154297, "learning_rate": 9.999055327513855e-06, "loss": 0.6487, "step": 3013 }, { "epoch": 0.03603582061000251, "grad_norm": 2.386136770248413, "learning_rate": 9.999051560248921e-06, "loss": 0.7561, "step": 3014 }, { "epoch": 0.03604777675486316, "grad_norm": 2.6100804805755615, "learning_rate": 9.999047785487891e-06, "loss": 0.6212, "step": 3015 }, { "epoch": 0.03605973289972381, "grad_norm": 4.789152145385742, "learning_rate": 9.99904400323077e-06, "loss": 0.7721, "step": 3016 }, { "epoch": 0.036071689044584465, "grad_norm": 5.474686622619629, "learning_rate": 9.999040213477566e-06, "loss": 0.6863, "step": 3017 }, { "epoch": 0.03608364518944512, "grad_norm": 3.0591001510620117, "learning_rate": 9.999036416228282e-06, "loss": 0.6246, "step": 3018 }, { "epoch": 0.03609560133430577, "grad_norm": 1.4856796264648438, "learning_rate": 9.999032611482927e-06, "loss": 0.6372, "step": 3019 }, { "epoch": 0.03610755747916642, "grad_norm": 4.290282249450684, "learning_rate": 9.999028799241504e-06, "loss": 0.672, "step": 3020 }, { "epoch": 0.03611951362402707, "grad_norm": 6.366616725921631, "learning_rate": 9.999024979504019e-06, "loss": 0.7057, "step": 3021 }, { "epoch": 0.03613146976888772, "grad_norm": 1.7721920013427734, "learning_rate": 9.999021152270479e-06, "loss": 0.587, "step": 3022 }, { "epoch": 0.03614342591374837, "grad_norm": 5.113866806030273, "learning_rate": 9.999017317540888e-06, "loss": 0.7062, "step": 3023 }, { "epoch": 0.03615538205860902, "grad_norm": 2.2067270278930664, "learning_rate": 9.999013475315255e-06, "loss": 0.6192, "step": 3024 }, { "epoch": 0.03616733820346967, "grad_norm": 1.6206034421920776, "learning_rate": 9.999009625593581e-06, "loss": 0.6097, "step": 3025 }, { "epoch": 0.036179294348330324, "grad_norm": 1.7866989374160767, "learning_rate": 9.999005768375876e-06, "loss": 0.5851, "step": 3026 }, { "epoch": 0.036191250493190975, "grad_norm": 2.6060433387756348, "learning_rate": 9.999001903662143e-06, "loss": 0.6249, "step": 3027 }, { "epoch": 0.03620320663805163, "grad_norm": 2.201348066329956, "learning_rate": 9.99899803145239e-06, "loss": 0.6904, "step": 3028 }, { "epoch": 0.03621516278291228, "grad_norm": 1.5015029907226562, "learning_rate": 9.99899415174662e-06, "loss": 0.6596, "step": 3029 }, { "epoch": 0.03622711892777293, "grad_norm": 15.0336275100708, "learning_rate": 9.998990264544842e-06, "loss": 0.5677, "step": 3030 }, { "epoch": 0.03623907507263358, "grad_norm": 1.7577252388000488, "learning_rate": 9.998986369847059e-06, "loss": 0.5877, "step": 3031 }, { "epoch": 0.036251031217494234, "grad_norm": 3.2858121395111084, "learning_rate": 9.99898246765328e-06, "loss": 0.6937, "step": 3032 }, { "epoch": 0.036262987362354886, "grad_norm": 2.380202531814575, "learning_rate": 9.998978557963508e-06, "loss": 0.7046, "step": 3033 }, { "epoch": 0.03627494350721553, "grad_norm": 2.276484966278076, "learning_rate": 9.99897464077775e-06, "loss": 0.666, "step": 3034 }, { "epoch": 0.03628689965207618, "grad_norm": 2.548336982727051, "learning_rate": 9.99897071609601e-06, "loss": 0.6231, "step": 3035 }, { "epoch": 0.036298855796936834, "grad_norm": 4.482124328613281, "learning_rate": 9.998966783918299e-06, "loss": 0.5907, "step": 3036 }, { "epoch": 0.036310811941797486, "grad_norm": 3.0901682376861572, "learning_rate": 9.998962844244618e-06, "loss": 0.639, "step": 3037 }, { "epoch": 0.03632276808665814, "grad_norm": 2.319793462753296, "learning_rate": 9.998958897074973e-06, "loss": 0.6346, "step": 3038 }, { "epoch": 0.03633472423151879, "grad_norm": 1.9079053401947021, "learning_rate": 9.998954942409373e-06, "loss": 0.6396, "step": 3039 }, { "epoch": 0.03634668037637944, "grad_norm": 2.3056724071502686, "learning_rate": 9.998950980247821e-06, "loss": 0.6453, "step": 3040 }, { "epoch": 0.03635863652124009, "grad_norm": 3.522794246673584, "learning_rate": 9.998947010590326e-06, "loss": 0.644, "step": 3041 }, { "epoch": 0.036370592666100744, "grad_norm": 4.901121616363525, "learning_rate": 9.998943033436893e-06, "loss": 0.627, "step": 3042 }, { "epoch": 0.036382548810961396, "grad_norm": 2.564037799835205, "learning_rate": 9.998939048787524e-06, "loss": 0.5651, "step": 3043 }, { "epoch": 0.03639450495582205, "grad_norm": 2.583951711654663, "learning_rate": 9.998935056642229e-06, "loss": 0.7152, "step": 3044 }, { "epoch": 0.03640646110068269, "grad_norm": 2.808760643005371, "learning_rate": 9.998931057001013e-06, "loss": 0.6857, "step": 3045 }, { "epoch": 0.036418417245543344, "grad_norm": 1.7957942485809326, "learning_rate": 9.998927049863885e-06, "loss": 0.6481, "step": 3046 }, { "epoch": 0.036430373390403996, "grad_norm": 2.0787270069122314, "learning_rate": 9.998923035230846e-06, "loss": 0.5412, "step": 3047 }, { "epoch": 0.03644232953526465, "grad_norm": 3.1925125122070312, "learning_rate": 9.998919013101903e-06, "loss": 0.6491, "step": 3048 }, { "epoch": 0.0364542856801253, "grad_norm": 3.4986765384674072, "learning_rate": 9.998914983477063e-06, "loss": 0.6414, "step": 3049 }, { "epoch": 0.03646624182498595, "grad_norm": 5.725205421447754, "learning_rate": 9.998910946356335e-06, "loss": 0.6464, "step": 3050 }, { "epoch": 0.0364781979698466, "grad_norm": 1.6841936111450195, "learning_rate": 9.99890690173972e-06, "loss": 0.5318, "step": 3051 }, { "epoch": 0.036490154114707254, "grad_norm": 2.8201053142547607, "learning_rate": 9.998902849627228e-06, "loss": 0.6724, "step": 3052 }, { "epoch": 0.036502110259567906, "grad_norm": 2.33048415184021, "learning_rate": 9.998898790018863e-06, "loss": 0.6238, "step": 3053 }, { "epoch": 0.03651406640442856, "grad_norm": 2.432410717010498, "learning_rate": 9.99889472291463e-06, "loss": 0.6621, "step": 3054 }, { "epoch": 0.03652602254928921, "grad_norm": 2.573776960372925, "learning_rate": 9.998890648314538e-06, "loss": 0.6842, "step": 3055 }, { "epoch": 0.03653797869414986, "grad_norm": 1.8835684061050415, "learning_rate": 9.998886566218591e-06, "loss": 0.5831, "step": 3056 }, { "epoch": 0.036549934839010506, "grad_norm": 2.38100528717041, "learning_rate": 9.998882476626797e-06, "loss": 0.7064, "step": 3057 }, { "epoch": 0.03656189098387116, "grad_norm": 3.123730421066284, "learning_rate": 9.998878379539158e-06, "loss": 0.6688, "step": 3058 }, { "epoch": 0.03657384712873181, "grad_norm": 3.3760087490081787, "learning_rate": 9.998874274955686e-06, "loss": 0.4913, "step": 3059 }, { "epoch": 0.03658580327359246, "grad_norm": 1.7924984693527222, "learning_rate": 9.998870162876384e-06, "loss": 0.6132, "step": 3060 }, { "epoch": 0.03659775941845311, "grad_norm": 1.7475663423538208, "learning_rate": 9.998866043301258e-06, "loss": 0.6729, "step": 3061 }, { "epoch": 0.036609715563313765, "grad_norm": 4.576251029968262, "learning_rate": 9.998861916230313e-06, "loss": 0.6375, "step": 3062 }, { "epoch": 0.036621671708174416, "grad_norm": 3.365741491317749, "learning_rate": 9.99885778166356e-06, "loss": 0.6311, "step": 3063 }, { "epoch": 0.03663362785303507, "grad_norm": 1.8585469722747803, "learning_rate": 9.998853639600999e-06, "loss": 0.6405, "step": 3064 }, { "epoch": 0.03664558399789572, "grad_norm": 2.7445857524871826, "learning_rate": 9.998849490042642e-06, "loss": 0.7205, "step": 3065 }, { "epoch": 0.03665754014275637, "grad_norm": 4.545765399932861, "learning_rate": 9.998845332988489e-06, "loss": 0.5943, "step": 3066 }, { "epoch": 0.03666949628761702, "grad_norm": 2.3330302238464355, "learning_rate": 9.998841168438551e-06, "loss": 0.6129, "step": 3067 }, { "epoch": 0.036681452432477675, "grad_norm": 2.694901466369629, "learning_rate": 9.998836996392835e-06, "loss": 0.6639, "step": 3068 }, { "epoch": 0.03669340857733832, "grad_norm": 3.7429566383361816, "learning_rate": 9.998832816851343e-06, "loss": 0.6635, "step": 3069 }, { "epoch": 0.03670536472219897, "grad_norm": 2.8503057956695557, "learning_rate": 9.998828629814084e-06, "loss": 0.6261, "step": 3070 }, { "epoch": 0.03671732086705962, "grad_norm": 4.159675121307373, "learning_rate": 9.998824435281064e-06, "loss": 0.6461, "step": 3071 }, { "epoch": 0.036729277011920275, "grad_norm": 1.8400678634643555, "learning_rate": 9.998820233252289e-06, "loss": 0.7484, "step": 3072 }, { "epoch": 0.03674123315678093, "grad_norm": 1.9352009296417236, "learning_rate": 9.998816023727765e-06, "loss": 0.5671, "step": 3073 }, { "epoch": 0.03675318930164158, "grad_norm": 2.2983415126800537, "learning_rate": 9.998811806707498e-06, "loss": 0.6067, "step": 3074 }, { "epoch": 0.03676514544650223, "grad_norm": 1.9772355556488037, "learning_rate": 9.998807582191493e-06, "loss": 0.6246, "step": 3075 }, { "epoch": 0.03677710159136288, "grad_norm": 1.5588816404342651, "learning_rate": 9.998803350179763e-06, "loss": 0.6571, "step": 3076 }, { "epoch": 0.036789057736223534, "grad_norm": 1.6621464490890503, "learning_rate": 9.998799110672307e-06, "loss": 0.6156, "step": 3077 }, { "epoch": 0.036801013881084185, "grad_norm": 4.605578422546387, "learning_rate": 9.998794863669135e-06, "loss": 0.6777, "step": 3078 }, { "epoch": 0.03681297002594484, "grad_norm": 2.8217546939849854, "learning_rate": 9.998790609170252e-06, "loss": 0.6493, "step": 3079 }, { "epoch": 0.03682492617080549, "grad_norm": 2.4881656169891357, "learning_rate": 9.998786347175664e-06, "loss": 0.6571, "step": 3080 }, { "epoch": 0.036836882315666133, "grad_norm": 2.359994888305664, "learning_rate": 9.998782077685378e-06, "loss": 0.6282, "step": 3081 }, { "epoch": 0.036848838460526785, "grad_norm": 1.9706748723983765, "learning_rate": 9.9987778006994e-06, "loss": 0.7371, "step": 3082 }, { "epoch": 0.03686079460538744, "grad_norm": 2.57083797454834, "learning_rate": 9.998773516217739e-06, "loss": 0.7628, "step": 3083 }, { "epoch": 0.03687275075024809, "grad_norm": 3.9929373264312744, "learning_rate": 9.998769224240397e-06, "loss": 0.6866, "step": 3084 }, { "epoch": 0.03688470689510874, "grad_norm": 2.8523335456848145, "learning_rate": 9.998764924767385e-06, "loss": 0.697, "step": 3085 }, { "epoch": 0.03689666303996939, "grad_norm": 3.134875535964966, "learning_rate": 9.998760617798707e-06, "loss": 0.6238, "step": 3086 }, { "epoch": 0.036908619184830044, "grad_norm": 1.7108198404312134, "learning_rate": 9.998756303334368e-06, "loss": 0.7608, "step": 3087 }, { "epoch": 0.036920575329690695, "grad_norm": 6.498095512390137, "learning_rate": 9.998751981374378e-06, "loss": 0.6213, "step": 3088 }, { "epoch": 0.03693253147455135, "grad_norm": 1.694605827331543, "learning_rate": 9.99874765191874e-06, "loss": 0.6652, "step": 3089 }, { "epoch": 0.036944487619412, "grad_norm": 2.6997127532958984, "learning_rate": 9.998743314967462e-06, "loss": 0.6286, "step": 3090 }, { "epoch": 0.03695644376427265, "grad_norm": 2.538947820663452, "learning_rate": 9.998738970520551e-06, "loss": 0.7018, "step": 3091 }, { "epoch": 0.0369683999091333, "grad_norm": 1.7353999614715576, "learning_rate": 9.998734618578015e-06, "loss": 0.5123, "step": 3092 }, { "epoch": 0.03698035605399395, "grad_norm": 2.0803000926971436, "learning_rate": 9.998730259139857e-06, "loss": 0.5885, "step": 3093 }, { "epoch": 0.0369923121988546, "grad_norm": 2.181100368499756, "learning_rate": 9.998725892206085e-06, "loss": 0.666, "step": 3094 }, { "epoch": 0.03700426834371525, "grad_norm": 2.131819725036621, "learning_rate": 9.998721517776706e-06, "loss": 0.6592, "step": 3095 }, { "epoch": 0.0370162244885759, "grad_norm": 3.318542957305908, "learning_rate": 9.998717135851728e-06, "loss": 0.6656, "step": 3096 }, { "epoch": 0.037028180633436554, "grad_norm": 3.174013614654541, "learning_rate": 9.998712746431153e-06, "loss": 0.6203, "step": 3097 }, { "epoch": 0.037040136778297206, "grad_norm": 2.054413318634033, "learning_rate": 9.998708349514993e-06, "loss": 0.6204, "step": 3098 }, { "epoch": 0.03705209292315786, "grad_norm": 1.8056665658950806, "learning_rate": 9.998703945103252e-06, "loss": 0.5817, "step": 3099 }, { "epoch": 0.03706404906801851, "grad_norm": 8.01854133605957, "learning_rate": 9.998699533195934e-06, "loss": 0.6581, "step": 3100 }, { "epoch": 0.03707600521287916, "grad_norm": 3.060629367828369, "learning_rate": 9.998695113793052e-06, "loss": 0.6597, "step": 3101 }, { "epoch": 0.03708796135773981, "grad_norm": 3.2511355876922607, "learning_rate": 9.998690686894605e-06, "loss": 0.6286, "step": 3102 }, { "epoch": 0.037099917502600464, "grad_norm": 2.4768474102020264, "learning_rate": 9.998686252500606e-06, "loss": 0.6315, "step": 3103 }, { "epoch": 0.037111873647461116, "grad_norm": 2.3055219650268555, "learning_rate": 9.998681810611058e-06, "loss": 0.6502, "step": 3104 }, { "epoch": 0.03712382979232176, "grad_norm": 2.2496933937072754, "learning_rate": 9.99867736122597e-06, "loss": 0.6746, "step": 3105 }, { "epoch": 0.03713578593718241, "grad_norm": 2.229970693588257, "learning_rate": 9.998672904345346e-06, "loss": 0.57, "step": 3106 }, { "epoch": 0.037147742082043064, "grad_norm": 2.444423198699951, "learning_rate": 9.998668439969196e-06, "loss": 0.6487, "step": 3107 }, { "epoch": 0.037159698226903716, "grad_norm": 1.7109678983688354, "learning_rate": 9.998663968097523e-06, "loss": 0.6378, "step": 3108 }, { "epoch": 0.03717165437176437, "grad_norm": 4.471859931945801, "learning_rate": 9.998659488730337e-06, "loss": 0.6254, "step": 3109 }, { "epoch": 0.03718361051662502, "grad_norm": 1.8398572206497192, "learning_rate": 9.998655001867643e-06, "loss": 0.6092, "step": 3110 }, { "epoch": 0.03719556666148567, "grad_norm": 2.168067693710327, "learning_rate": 9.99865050750945e-06, "loss": 0.5981, "step": 3111 }, { "epoch": 0.03720752280634632, "grad_norm": 2.3618485927581787, "learning_rate": 9.99864600565576e-06, "loss": 0.6998, "step": 3112 }, { "epoch": 0.037219478951206975, "grad_norm": 2.5886027812957764, "learning_rate": 9.998641496306582e-06, "loss": 0.7139, "step": 3113 }, { "epoch": 0.037231435096067626, "grad_norm": 2.3845701217651367, "learning_rate": 9.998636979461927e-06, "loss": 0.6545, "step": 3114 }, { "epoch": 0.03724339124092828, "grad_norm": 2.527968406677246, "learning_rate": 9.998632455121796e-06, "loss": 0.7566, "step": 3115 }, { "epoch": 0.03725534738578892, "grad_norm": 2.065200090408325, "learning_rate": 9.998627923286196e-06, "loss": 0.7226, "step": 3116 }, { "epoch": 0.037267303530649575, "grad_norm": 14.389434814453125, "learning_rate": 9.998623383955136e-06, "loss": 0.6654, "step": 3117 }, { "epoch": 0.037279259675510226, "grad_norm": 3.3756067752838135, "learning_rate": 9.998618837128625e-06, "loss": 0.6111, "step": 3118 }, { "epoch": 0.03729121582037088, "grad_norm": 1.876871943473816, "learning_rate": 9.998614282806667e-06, "loss": 0.5613, "step": 3119 }, { "epoch": 0.03730317196523153, "grad_norm": 4.704133033752441, "learning_rate": 9.998609720989267e-06, "loss": 0.6699, "step": 3120 }, { "epoch": 0.03731512811009218, "grad_norm": 3.3526439666748047, "learning_rate": 9.998605151676436e-06, "loss": 0.6508, "step": 3121 }, { "epoch": 0.03732708425495283, "grad_norm": 1.9946093559265137, "learning_rate": 9.998600574868177e-06, "loss": 0.5825, "step": 3122 }, { "epoch": 0.037339040399813485, "grad_norm": 2.6298043727874756, "learning_rate": 9.9985959905645e-06, "loss": 0.6789, "step": 3123 }, { "epoch": 0.03735099654467414, "grad_norm": 2.444206953048706, "learning_rate": 9.99859139876541e-06, "loss": 0.6693, "step": 3124 }, { "epoch": 0.03736295268953479, "grad_norm": 2.437866687774658, "learning_rate": 9.998586799470915e-06, "loss": 0.617, "step": 3125 }, { "epoch": 0.03737490883439544, "grad_norm": 1.8024855852127075, "learning_rate": 9.99858219268102e-06, "loss": 0.5763, "step": 3126 }, { "epoch": 0.03738686497925609, "grad_norm": 2.2542638778686523, "learning_rate": 9.998577578395736e-06, "loss": 0.5659, "step": 3127 }, { "epoch": 0.037398821124116736, "grad_norm": 2.739990234375, "learning_rate": 9.998572956615065e-06, "loss": 0.6422, "step": 3128 }, { "epoch": 0.03741077726897739, "grad_norm": 1.9761779308319092, "learning_rate": 9.998568327339015e-06, "loss": 0.7108, "step": 3129 }, { "epoch": 0.03742273341383804, "grad_norm": 11.00893497467041, "learning_rate": 9.998563690567596e-06, "loss": 0.5823, "step": 3130 }, { "epoch": 0.03743468955869869, "grad_norm": 2.067399024963379, "learning_rate": 9.998559046300814e-06, "loss": 0.6016, "step": 3131 }, { "epoch": 0.03744664570355934, "grad_norm": 1.74542236328125, "learning_rate": 9.998554394538674e-06, "loss": 0.6098, "step": 3132 }, { "epoch": 0.037458601848419995, "grad_norm": 4.99261474609375, "learning_rate": 9.998549735281182e-06, "loss": 0.5932, "step": 3133 }, { "epoch": 0.03747055799328065, "grad_norm": 14.886603355407715, "learning_rate": 9.998545068528347e-06, "loss": 0.7143, "step": 3134 }, { "epoch": 0.0374825141381413, "grad_norm": 1.5985560417175293, "learning_rate": 9.998540394280178e-06, "loss": 0.6924, "step": 3135 }, { "epoch": 0.03749447028300195, "grad_norm": 2.036081314086914, "learning_rate": 9.998535712536678e-06, "loss": 0.6596, "step": 3136 }, { "epoch": 0.0375064264278626, "grad_norm": 1.7861242294311523, "learning_rate": 9.998531023297857e-06, "loss": 0.6445, "step": 3137 }, { "epoch": 0.037518382572723254, "grad_norm": 2.3334801197052, "learning_rate": 9.998526326563723e-06, "loss": 0.6674, "step": 3138 }, { "epoch": 0.037530338717583905, "grad_norm": 3.846081018447876, "learning_rate": 9.998521622334278e-06, "loss": 0.6767, "step": 3139 }, { "epoch": 0.03754229486244455, "grad_norm": 2.4362075328826904, "learning_rate": 9.998516910609532e-06, "loss": 0.7036, "step": 3140 }, { "epoch": 0.0375542510073052, "grad_norm": 3.3802263736724854, "learning_rate": 9.998512191389494e-06, "loss": 0.6649, "step": 3141 }, { "epoch": 0.037566207152165854, "grad_norm": 2.0462136268615723, "learning_rate": 9.998507464674167e-06, "loss": 0.6438, "step": 3142 }, { "epoch": 0.037578163297026505, "grad_norm": 2.4879777431488037, "learning_rate": 9.998502730463563e-06, "loss": 0.6807, "step": 3143 }, { "epoch": 0.03759011944188716, "grad_norm": 3.2197182178497314, "learning_rate": 9.998497988757684e-06, "loss": 0.68, "step": 3144 }, { "epoch": 0.03760207558674781, "grad_norm": 1.7228872776031494, "learning_rate": 9.998493239556542e-06, "loss": 0.6404, "step": 3145 }, { "epoch": 0.03761403173160846, "grad_norm": 2.2860097885131836, "learning_rate": 9.998488482860139e-06, "loss": 0.6673, "step": 3146 }, { "epoch": 0.03762598787646911, "grad_norm": 3.8740599155426025, "learning_rate": 9.998483718668487e-06, "loss": 0.5981, "step": 3147 }, { "epoch": 0.037637944021329764, "grad_norm": 2.0094046592712402, "learning_rate": 9.998478946981589e-06, "loss": 0.7237, "step": 3148 }, { "epoch": 0.037649900166190416, "grad_norm": 6.138914585113525, "learning_rate": 9.998474167799454e-06, "loss": 0.5928, "step": 3149 }, { "epoch": 0.03766185631105107, "grad_norm": 7.270077228546143, "learning_rate": 9.998469381122091e-06, "loss": 0.7222, "step": 3150 }, { "epoch": 0.03767381245591172, "grad_norm": 2.3469598293304443, "learning_rate": 9.998464586949503e-06, "loss": 0.6347, "step": 3151 }, { "epoch": 0.037685768600772364, "grad_norm": 1.9662703275680542, "learning_rate": 9.998459785281703e-06, "loss": 0.7293, "step": 3152 }, { "epoch": 0.037697724745633016, "grad_norm": 4.31898832321167, "learning_rate": 9.998454976118692e-06, "loss": 0.6037, "step": 3153 }, { "epoch": 0.03770968089049367, "grad_norm": 2.274839162826538, "learning_rate": 9.99845015946048e-06, "loss": 0.7291, "step": 3154 }, { "epoch": 0.03772163703535432, "grad_norm": 2.638073205947876, "learning_rate": 9.998445335307076e-06, "loss": 0.656, "step": 3155 }, { "epoch": 0.03773359318021497, "grad_norm": 1.968026041984558, "learning_rate": 9.998440503658484e-06, "loss": 0.637, "step": 3156 }, { "epoch": 0.03774554932507562, "grad_norm": 3.4583637714385986, "learning_rate": 9.998435664514713e-06, "loss": 0.5836, "step": 3157 }, { "epoch": 0.037757505469936274, "grad_norm": 1.740567922592163, "learning_rate": 9.99843081787577e-06, "loss": 0.637, "step": 3158 }, { "epoch": 0.037769461614796926, "grad_norm": 1.8880059719085693, "learning_rate": 9.998425963741662e-06, "loss": 0.7528, "step": 3159 }, { "epoch": 0.03778141775965758, "grad_norm": 2.6695683002471924, "learning_rate": 9.998421102112397e-06, "loss": 0.6381, "step": 3160 }, { "epoch": 0.03779337390451823, "grad_norm": 3.7849559783935547, "learning_rate": 9.998416232987982e-06, "loss": 0.6462, "step": 3161 }, { "epoch": 0.03780533004937888, "grad_norm": 10.766056060791016, "learning_rate": 9.998411356368422e-06, "loss": 0.6454, "step": 3162 }, { "epoch": 0.03781728619423953, "grad_norm": 7.440525531768799, "learning_rate": 9.99840647225373e-06, "loss": 0.6278, "step": 3163 }, { "epoch": 0.03782924233910018, "grad_norm": 2.3357555866241455, "learning_rate": 9.998401580643908e-06, "loss": 0.6902, "step": 3164 }, { "epoch": 0.03784119848396083, "grad_norm": 2.7184715270996094, "learning_rate": 9.998396681538964e-06, "loss": 0.5798, "step": 3165 }, { "epoch": 0.03785315462882148, "grad_norm": 2.825713872909546, "learning_rate": 9.998391774938906e-06, "loss": 0.732, "step": 3166 }, { "epoch": 0.03786511077368213, "grad_norm": 1.6617249250411987, "learning_rate": 9.998386860843746e-06, "loss": 0.5393, "step": 3167 }, { "epoch": 0.037877066918542784, "grad_norm": 2.620476484298706, "learning_rate": 9.998381939253484e-06, "loss": 0.5785, "step": 3168 }, { "epoch": 0.037889023063403436, "grad_norm": 1.8102092742919922, "learning_rate": 9.99837701016813e-06, "loss": 0.6116, "step": 3169 }, { "epoch": 0.03790097920826409, "grad_norm": 1.6375149488449097, "learning_rate": 9.998372073587692e-06, "loss": 0.6159, "step": 3170 }, { "epoch": 0.03791293535312474, "grad_norm": 1.8315610885620117, "learning_rate": 9.998367129512178e-06, "loss": 0.6196, "step": 3171 }, { "epoch": 0.03792489149798539, "grad_norm": 2.255734443664551, "learning_rate": 9.998362177941593e-06, "loss": 0.6124, "step": 3172 }, { "epoch": 0.03793684764284604, "grad_norm": 4.305183410644531, "learning_rate": 9.99835721887595e-06, "loss": 0.5916, "step": 3173 }, { "epoch": 0.037948803787706695, "grad_norm": 2.5515999794006348, "learning_rate": 9.99835225231525e-06, "loss": 0.5954, "step": 3174 }, { "epoch": 0.037960759932567346, "grad_norm": 2.3824329376220703, "learning_rate": 9.998347278259504e-06, "loss": 0.6711, "step": 3175 }, { "epoch": 0.03797271607742799, "grad_norm": 1.7843146324157715, "learning_rate": 9.998342296708718e-06, "loss": 0.6036, "step": 3176 }, { "epoch": 0.03798467222228864, "grad_norm": 3.4871432781219482, "learning_rate": 9.9983373076629e-06, "loss": 0.6895, "step": 3177 }, { "epoch": 0.037996628367149295, "grad_norm": 6.771928787231445, "learning_rate": 9.998332311122057e-06, "loss": 0.5975, "step": 3178 }, { "epoch": 0.038008584512009946, "grad_norm": 2.256870746612549, "learning_rate": 9.998327307086197e-06, "loss": 0.5517, "step": 3179 }, { "epoch": 0.0380205406568706, "grad_norm": 6.185461044311523, "learning_rate": 9.998322295555327e-06, "loss": 0.5819, "step": 3180 }, { "epoch": 0.03803249680173125, "grad_norm": 2.4141335487365723, "learning_rate": 9.998317276529457e-06, "loss": 0.7482, "step": 3181 }, { "epoch": 0.0380444529465919, "grad_norm": 1.9487359523773193, "learning_rate": 9.998312250008591e-06, "loss": 0.637, "step": 3182 }, { "epoch": 0.03805640909145255, "grad_norm": 2.0949246883392334, "learning_rate": 9.998307215992737e-06, "loss": 0.6971, "step": 3183 }, { "epoch": 0.038068365236313205, "grad_norm": 4.486485958099365, "learning_rate": 9.998302174481903e-06, "loss": 0.7397, "step": 3184 }, { "epoch": 0.03808032138117386, "grad_norm": 3.1561408042907715, "learning_rate": 9.9982971254761e-06, "loss": 0.6864, "step": 3185 }, { "epoch": 0.03809227752603451, "grad_norm": 2.72377610206604, "learning_rate": 9.998292068975333e-06, "loss": 0.6639, "step": 3186 }, { "epoch": 0.03810423367089515, "grad_norm": 1.8358945846557617, "learning_rate": 9.998287004979607e-06, "loss": 0.5428, "step": 3187 }, { "epoch": 0.038116189815755805, "grad_norm": 5.338199138641357, "learning_rate": 9.998281933488933e-06, "loss": 0.6874, "step": 3188 }, { "epoch": 0.03812814596061646, "grad_norm": 4.031261444091797, "learning_rate": 9.998276854503317e-06, "loss": 0.5951, "step": 3189 }, { "epoch": 0.03814010210547711, "grad_norm": 2.1197288036346436, "learning_rate": 9.998271768022767e-06, "loss": 0.6158, "step": 3190 }, { "epoch": 0.03815205825033776, "grad_norm": 2.103173017501831, "learning_rate": 9.99826667404729e-06, "loss": 0.6653, "step": 3191 }, { "epoch": 0.03816401439519841, "grad_norm": 3.6599090099334717, "learning_rate": 9.998261572576897e-06, "loss": 0.6222, "step": 3192 }, { "epoch": 0.038175970540059063, "grad_norm": 1.9637978076934814, "learning_rate": 9.998256463611591e-06, "loss": 0.6894, "step": 3193 }, { "epoch": 0.038187926684919715, "grad_norm": 2.4563164710998535, "learning_rate": 9.998251347151383e-06, "loss": 0.6618, "step": 3194 }, { "epoch": 0.03819988282978037, "grad_norm": 3.654268264770508, "learning_rate": 9.998246223196276e-06, "loss": 0.6566, "step": 3195 }, { "epoch": 0.03821183897464102, "grad_norm": 2.29573655128479, "learning_rate": 9.998241091746283e-06, "loss": 0.7212, "step": 3196 }, { "epoch": 0.03822379511950167, "grad_norm": 4.181426525115967, "learning_rate": 9.99823595280141e-06, "loss": 0.4886, "step": 3197 }, { "epoch": 0.03823575126436232, "grad_norm": 2.439606189727783, "learning_rate": 9.998230806361665e-06, "loss": 0.5754, "step": 3198 }, { "epoch": 0.03824770740922297, "grad_norm": 3.3339717388153076, "learning_rate": 9.998225652427055e-06, "loss": 0.7703, "step": 3199 }, { "epoch": 0.03825966355408362, "grad_norm": 2.031031370162964, "learning_rate": 9.998220490997588e-06, "loss": 0.6699, "step": 3200 }, { "epoch": 0.03827161969894427, "grad_norm": 3.0242061614990234, "learning_rate": 9.99821532207327e-06, "loss": 0.6236, "step": 3201 }, { "epoch": 0.03828357584380492, "grad_norm": 4.652510643005371, "learning_rate": 9.998210145654113e-06, "loss": 0.6178, "step": 3202 }, { "epoch": 0.038295531988665574, "grad_norm": 3.11165714263916, "learning_rate": 9.998204961740119e-06, "loss": 0.6596, "step": 3203 }, { "epoch": 0.038307488133526225, "grad_norm": 3.506838083267212, "learning_rate": 9.998199770331301e-06, "loss": 0.6354, "step": 3204 }, { "epoch": 0.03831944427838688, "grad_norm": 2.3955507278442383, "learning_rate": 9.998194571427665e-06, "loss": 0.6746, "step": 3205 }, { "epoch": 0.03833140042324753, "grad_norm": 2.8562817573547363, "learning_rate": 9.998189365029216e-06, "loss": 0.6506, "step": 3206 }, { "epoch": 0.03834335656810818, "grad_norm": 1.757738709449768, "learning_rate": 9.998184151135965e-06, "loss": 0.5578, "step": 3207 }, { "epoch": 0.03835531271296883, "grad_norm": 3.73445463180542, "learning_rate": 9.998178929747922e-06, "loss": 0.6251, "step": 3208 }, { "epoch": 0.038367268857829484, "grad_norm": 2.5573694705963135, "learning_rate": 9.99817370086509e-06, "loss": 0.6283, "step": 3209 }, { "epoch": 0.038379225002690136, "grad_norm": 2.918928384780884, "learning_rate": 9.998168464487477e-06, "loss": 0.6035, "step": 3210 }, { "epoch": 0.03839118114755078, "grad_norm": 3.6285691261291504, "learning_rate": 9.998163220615095e-06, "loss": 0.5866, "step": 3211 }, { "epoch": 0.03840313729241143, "grad_norm": 8.731590270996094, "learning_rate": 9.998157969247949e-06, "loss": 0.5701, "step": 3212 }, { "epoch": 0.038415093437272084, "grad_norm": 5.115332126617432, "learning_rate": 9.998152710386047e-06, "loss": 0.7193, "step": 3213 }, { "epoch": 0.038427049582132736, "grad_norm": 2.5980660915374756, "learning_rate": 9.998147444029398e-06, "loss": 0.6642, "step": 3214 }, { "epoch": 0.03843900572699339, "grad_norm": 2.446631669998169, "learning_rate": 9.998142170178008e-06, "loss": 0.614, "step": 3215 }, { "epoch": 0.03845096187185404, "grad_norm": 1.8532994985580444, "learning_rate": 9.998136888831887e-06, "loss": 0.5933, "step": 3216 }, { "epoch": 0.03846291801671469, "grad_norm": 1.66806960105896, "learning_rate": 9.998131599991041e-06, "loss": 0.6771, "step": 3217 }, { "epoch": 0.03847487416157534, "grad_norm": 3.092822551727295, "learning_rate": 9.99812630365548e-06, "loss": 0.5949, "step": 3218 }, { "epoch": 0.038486830306435994, "grad_norm": 3.5385830402374268, "learning_rate": 9.998120999825209e-06, "loss": 0.5911, "step": 3219 }, { "epoch": 0.038498786451296646, "grad_norm": 2.9018983840942383, "learning_rate": 9.99811568850024e-06, "loss": 0.6206, "step": 3220 }, { "epoch": 0.0385107425961573, "grad_norm": 2.8151869773864746, "learning_rate": 9.998110369680578e-06, "loss": 0.6175, "step": 3221 }, { "epoch": 0.03852269874101795, "grad_norm": 2.5907115936279297, "learning_rate": 9.99810504336623e-06, "loss": 0.6977, "step": 3222 }, { "epoch": 0.038534654885878594, "grad_norm": 4.3883209228515625, "learning_rate": 9.998099709557208e-06, "loss": 0.583, "step": 3223 }, { "epoch": 0.038546611030739246, "grad_norm": 3.5276057720184326, "learning_rate": 9.998094368253517e-06, "loss": 0.6875, "step": 3224 }, { "epoch": 0.0385585671755999, "grad_norm": 2.9560599327087402, "learning_rate": 9.998089019455166e-06, "loss": 0.7088, "step": 3225 }, { "epoch": 0.03857052332046055, "grad_norm": 3.1771397590637207, "learning_rate": 9.998083663162163e-06, "loss": 0.6066, "step": 3226 }, { "epoch": 0.0385824794653212, "grad_norm": 3.9236903190612793, "learning_rate": 9.998078299374515e-06, "loss": 0.6472, "step": 3227 }, { "epoch": 0.03859443561018185, "grad_norm": 2.8862714767456055, "learning_rate": 9.99807292809223e-06, "loss": 0.6546, "step": 3228 }, { "epoch": 0.038606391755042505, "grad_norm": 3.115734815597534, "learning_rate": 9.998067549315318e-06, "loss": 0.6335, "step": 3229 }, { "epoch": 0.038618347899903156, "grad_norm": 3.419088363647461, "learning_rate": 9.998062163043787e-06, "loss": 0.7579, "step": 3230 }, { "epoch": 0.03863030404476381, "grad_norm": 2.612513303756714, "learning_rate": 9.998056769277642e-06, "loss": 0.6986, "step": 3231 }, { "epoch": 0.03864226018962446, "grad_norm": 1.6849392652511597, "learning_rate": 9.998051368016894e-06, "loss": 0.659, "step": 3232 }, { "epoch": 0.03865421633448511, "grad_norm": 1.9394950866699219, "learning_rate": 9.998045959261551e-06, "loss": 0.7057, "step": 3233 }, { "epoch": 0.03866617247934576, "grad_norm": 2.1480555534362793, "learning_rate": 9.99804054301162e-06, "loss": 0.6636, "step": 3234 }, { "epoch": 0.03867812862420641, "grad_norm": 3.5098109245300293, "learning_rate": 9.998035119267108e-06, "loss": 0.5988, "step": 3235 }, { "epoch": 0.03869008476906706, "grad_norm": 12.52420711517334, "learning_rate": 9.998029688028027e-06, "loss": 0.6207, "step": 3236 }, { "epoch": 0.03870204091392771, "grad_norm": 1.7802683115005493, "learning_rate": 9.99802424929438e-06, "loss": 0.6055, "step": 3237 }, { "epoch": 0.03871399705878836, "grad_norm": 2.135146141052246, "learning_rate": 9.99801880306618e-06, "loss": 0.5302, "step": 3238 }, { "epoch": 0.038725953203649015, "grad_norm": 2.2546396255493164, "learning_rate": 9.998013349343432e-06, "loss": 0.6465, "step": 3239 }, { "epoch": 0.038737909348509666, "grad_norm": 2.3599441051483154, "learning_rate": 9.998007888126145e-06, "loss": 0.7121, "step": 3240 }, { "epoch": 0.03874986549337032, "grad_norm": 2.522245407104492, "learning_rate": 9.998002419414327e-06, "loss": 0.6548, "step": 3241 }, { "epoch": 0.03876182163823097, "grad_norm": 2.805453300476074, "learning_rate": 9.997996943207988e-06, "loss": 0.6866, "step": 3242 }, { "epoch": 0.03877377778309162, "grad_norm": 2.2446367740631104, "learning_rate": 9.997991459507134e-06, "loss": 0.7118, "step": 3243 }, { "epoch": 0.03878573392795227, "grad_norm": 2.6018564701080322, "learning_rate": 9.997985968311776e-06, "loss": 0.7493, "step": 3244 }, { "epoch": 0.038797690072812925, "grad_norm": 2.861995220184326, "learning_rate": 9.997980469621917e-06, "loss": 0.7001, "step": 3245 }, { "epoch": 0.03880964621767357, "grad_norm": 3.43359375, "learning_rate": 9.997974963437572e-06, "loss": 0.7199, "step": 3246 }, { "epoch": 0.03882160236253422, "grad_norm": 6.615157127380371, "learning_rate": 9.997969449758742e-06, "loss": 0.5707, "step": 3247 }, { "epoch": 0.03883355850739487, "grad_norm": 1.9739534854888916, "learning_rate": 9.99796392858544e-06, "loss": 0.6187, "step": 3248 }, { "epoch": 0.038845514652255525, "grad_norm": 2.025477409362793, "learning_rate": 9.997958399917675e-06, "loss": 0.6555, "step": 3249 }, { "epoch": 0.03885747079711618, "grad_norm": 2.3909497261047363, "learning_rate": 9.997952863755453e-06, "loss": 0.6299, "step": 3250 }, { "epoch": 0.03886942694197683, "grad_norm": 2.1115481853485107, "learning_rate": 9.997947320098784e-06, "loss": 0.6355, "step": 3251 }, { "epoch": 0.03888138308683748, "grad_norm": 2.65427827835083, "learning_rate": 9.997941768947674e-06, "loss": 0.6857, "step": 3252 }, { "epoch": 0.03889333923169813, "grad_norm": 7.323573112487793, "learning_rate": 9.997936210302133e-06, "loss": 0.5873, "step": 3253 }, { "epoch": 0.038905295376558784, "grad_norm": 3.8533031940460205, "learning_rate": 9.997930644162166e-06, "loss": 0.6867, "step": 3254 }, { "epoch": 0.038917251521419435, "grad_norm": 55.68395233154297, "learning_rate": 9.997925070527788e-06, "loss": 0.7269, "step": 3255 }, { "epoch": 0.03892920766628009, "grad_norm": 9.466423988342285, "learning_rate": 9.997919489399001e-06, "loss": 0.6435, "step": 3256 }, { "epoch": 0.03894116381114074, "grad_norm": 6.907632827758789, "learning_rate": 9.997913900775819e-06, "loss": 0.5344, "step": 3257 }, { "epoch": 0.038953119956001384, "grad_norm": 1.843915581703186, "learning_rate": 9.997908304658245e-06, "loss": 0.5203, "step": 3258 }, { "epoch": 0.038965076100862035, "grad_norm": 2.2782201766967773, "learning_rate": 9.99790270104629e-06, "loss": 0.5841, "step": 3259 }, { "epoch": 0.03897703224572269, "grad_norm": 7.788578033447266, "learning_rate": 9.997897089939962e-06, "loss": 0.6269, "step": 3260 }, { "epoch": 0.03898898839058334, "grad_norm": 2.1743297576904297, "learning_rate": 9.997891471339271e-06, "loss": 0.5773, "step": 3261 }, { "epoch": 0.03900094453544399, "grad_norm": 3.4351541996002197, "learning_rate": 9.997885845244222e-06, "loss": 0.6046, "step": 3262 }, { "epoch": 0.03901290068030464, "grad_norm": 1.6800439357757568, "learning_rate": 9.997880211654828e-06, "loss": 0.7678, "step": 3263 }, { "epoch": 0.039024856825165294, "grad_norm": 2.3985607624053955, "learning_rate": 9.997874570571092e-06, "loss": 0.6963, "step": 3264 }, { "epoch": 0.039036812970025946, "grad_norm": 2.038769483566284, "learning_rate": 9.997868921993027e-06, "loss": 0.5962, "step": 3265 }, { "epoch": 0.0390487691148866, "grad_norm": 2.2526192665100098, "learning_rate": 9.99786326592064e-06, "loss": 0.6041, "step": 3266 }, { "epoch": 0.03906072525974725, "grad_norm": 2.201413869857788, "learning_rate": 9.997857602353937e-06, "loss": 0.6443, "step": 3267 }, { "epoch": 0.0390726814046079, "grad_norm": 2.4368574619293213, "learning_rate": 9.997851931292931e-06, "loss": 0.7209, "step": 3268 }, { "epoch": 0.03908463754946855, "grad_norm": 1.8813812732696533, "learning_rate": 9.997846252737627e-06, "loss": 0.6526, "step": 3269 }, { "epoch": 0.0390965936943292, "grad_norm": 2.2750632762908936, "learning_rate": 9.997840566688036e-06, "loss": 0.6852, "step": 3270 }, { "epoch": 0.03910854983918985, "grad_norm": 1.7521123886108398, "learning_rate": 9.997834873144165e-06, "loss": 0.6903, "step": 3271 }, { "epoch": 0.0391205059840505, "grad_norm": 5.80546236038208, "learning_rate": 9.997829172106021e-06, "loss": 0.5892, "step": 3272 }, { "epoch": 0.03913246212891115, "grad_norm": 6.002604961395264, "learning_rate": 9.997823463573618e-06, "loss": 0.5602, "step": 3273 }, { "epoch": 0.039144418273771804, "grad_norm": 3.022798776626587, "learning_rate": 9.997817747546958e-06, "loss": 0.6417, "step": 3274 }, { "epoch": 0.039156374418632456, "grad_norm": 10.627787590026855, "learning_rate": 9.997812024026053e-06, "loss": 0.7297, "step": 3275 }, { "epoch": 0.03916833056349311, "grad_norm": 2.4314372539520264, "learning_rate": 9.997806293010912e-06, "loss": 0.5597, "step": 3276 }, { "epoch": 0.03918028670835376, "grad_norm": 3.688227415084839, "learning_rate": 9.997800554501543e-06, "loss": 0.6278, "step": 3277 }, { "epoch": 0.03919224285321441, "grad_norm": 1.6380062103271484, "learning_rate": 9.997794808497952e-06, "loss": 0.6711, "step": 3278 }, { "epoch": 0.03920419899807506, "grad_norm": 1.942111611366272, "learning_rate": 9.997789055000152e-06, "loss": 0.6338, "step": 3279 }, { "epoch": 0.039216155142935714, "grad_norm": 2.969208240509033, "learning_rate": 9.99778329400815e-06, "loss": 0.6154, "step": 3280 }, { "epoch": 0.039228111287796366, "grad_norm": 3.068275213241577, "learning_rate": 9.997777525521953e-06, "loss": 0.6748, "step": 3281 }, { "epoch": 0.03924006743265701, "grad_norm": 1.5220494270324707, "learning_rate": 9.99777174954157e-06, "loss": 0.6708, "step": 3282 }, { "epoch": 0.03925202357751766, "grad_norm": 2.102531671524048, "learning_rate": 9.997765966067013e-06, "loss": 0.6692, "step": 3283 }, { "epoch": 0.039263979722378314, "grad_norm": 3.4618992805480957, "learning_rate": 9.997760175098285e-06, "loss": 0.602, "step": 3284 }, { "epoch": 0.039275935867238966, "grad_norm": 3.172475576400757, "learning_rate": 9.9977543766354e-06, "loss": 0.7491, "step": 3285 }, { "epoch": 0.03928789201209962, "grad_norm": 2.100855827331543, "learning_rate": 9.997748570678364e-06, "loss": 0.5729, "step": 3286 }, { "epoch": 0.03929984815696027, "grad_norm": 2.2633910179138184, "learning_rate": 9.997742757227186e-06, "loss": 0.6362, "step": 3287 }, { "epoch": 0.03931180430182092, "grad_norm": 2.253387689590454, "learning_rate": 9.997736936281876e-06, "loss": 0.6492, "step": 3288 }, { "epoch": 0.03932376044668157, "grad_norm": 4.330502510070801, "learning_rate": 9.997731107842441e-06, "loss": 0.6926, "step": 3289 }, { "epoch": 0.039335716591542225, "grad_norm": 2.4754674434661865, "learning_rate": 9.99772527190889e-06, "loss": 0.7028, "step": 3290 }, { "epoch": 0.039347672736402876, "grad_norm": 3.994180202484131, "learning_rate": 9.997719428481231e-06, "loss": 0.5962, "step": 3291 }, { "epoch": 0.03935962888126353, "grad_norm": 5.899893283843994, "learning_rate": 9.997713577559478e-06, "loss": 0.573, "step": 3292 }, { "epoch": 0.03937158502612418, "grad_norm": 2.418148994445801, "learning_rate": 9.997707719143632e-06, "loss": 0.6113, "step": 3293 }, { "epoch": 0.039383541170984825, "grad_norm": 1.6102198362350464, "learning_rate": 9.997701853233708e-06, "loss": 0.613, "step": 3294 }, { "epoch": 0.039395497315845476, "grad_norm": 3.9448301792144775, "learning_rate": 9.99769597982971e-06, "loss": 0.7065, "step": 3295 }, { "epoch": 0.03940745346070613, "grad_norm": 3.288926601409912, "learning_rate": 9.997690098931651e-06, "loss": 0.7592, "step": 3296 }, { "epoch": 0.03941940960556678, "grad_norm": 12.898507118225098, "learning_rate": 9.997684210539536e-06, "loss": 0.5655, "step": 3297 }, { "epoch": 0.03943136575042743, "grad_norm": 2.2464470863342285, "learning_rate": 9.997678314653377e-06, "loss": 0.6809, "step": 3298 }, { "epoch": 0.03944332189528808, "grad_norm": 2.238581895828247, "learning_rate": 9.997672411273183e-06, "loss": 0.6705, "step": 3299 }, { "epoch": 0.039455278040148735, "grad_norm": 2.2073185443878174, "learning_rate": 9.997666500398958e-06, "loss": 0.5661, "step": 3300 }, { "epoch": 0.03946723418500939, "grad_norm": 3.816171407699585, "learning_rate": 9.997660582030717e-06, "loss": 0.5553, "step": 3301 }, { "epoch": 0.03947919032987004, "grad_norm": 2.372298240661621, "learning_rate": 9.997654656168465e-06, "loss": 0.5882, "step": 3302 }, { "epoch": 0.03949114647473069, "grad_norm": 1.9635770320892334, "learning_rate": 9.997648722812213e-06, "loss": 0.6268, "step": 3303 }, { "epoch": 0.03950310261959134, "grad_norm": 2.984178066253662, "learning_rate": 9.99764278196197e-06, "loss": 0.6744, "step": 3304 }, { "epoch": 0.039515058764451994, "grad_norm": 3.196194887161255, "learning_rate": 9.99763683361774e-06, "loss": 0.6656, "step": 3305 }, { "epoch": 0.03952701490931264, "grad_norm": 8.117948532104492, "learning_rate": 9.99763087777954e-06, "loss": 0.686, "step": 3306 }, { "epoch": 0.03953897105417329, "grad_norm": 3.771599054336548, "learning_rate": 9.997624914447372e-06, "loss": 0.5573, "step": 3307 }, { "epoch": 0.03955092719903394, "grad_norm": 2.7609376907348633, "learning_rate": 9.99761894362125e-06, "loss": 0.5995, "step": 3308 }, { "epoch": 0.03956288334389459, "grad_norm": 2.5072598457336426, "learning_rate": 9.997612965301178e-06, "loss": 0.7027, "step": 3309 }, { "epoch": 0.039574839488755245, "grad_norm": 2.406374216079712, "learning_rate": 9.997606979487169e-06, "loss": 0.6287, "step": 3310 }, { "epoch": 0.0395867956336159, "grad_norm": 2.7861461639404297, "learning_rate": 9.99760098617923e-06, "loss": 0.6171, "step": 3311 }, { "epoch": 0.03959875177847655, "grad_norm": 4.031463623046875, "learning_rate": 9.997594985377372e-06, "loss": 0.6268, "step": 3312 }, { "epoch": 0.0396107079233372, "grad_norm": 2.7598226070404053, "learning_rate": 9.997588977081601e-06, "loss": 0.6274, "step": 3313 }, { "epoch": 0.03962266406819785, "grad_norm": 5.466834545135498, "learning_rate": 9.997582961291928e-06, "loss": 0.5918, "step": 3314 }, { "epoch": 0.039634620213058504, "grad_norm": 4.340938091278076, "learning_rate": 9.997576938008362e-06, "loss": 0.6505, "step": 3315 }, { "epoch": 0.039646576357919155, "grad_norm": 2.047553300857544, "learning_rate": 9.997570907230912e-06, "loss": 0.5912, "step": 3316 }, { "epoch": 0.0396585325027798, "grad_norm": 4.35520076751709, "learning_rate": 9.997564868959587e-06, "loss": 0.6534, "step": 3317 }, { "epoch": 0.03967048864764045, "grad_norm": 4.495573997497559, "learning_rate": 9.997558823194393e-06, "loss": 0.6163, "step": 3318 }, { "epoch": 0.039682444792501104, "grad_norm": 4.262589931488037, "learning_rate": 9.997552769935344e-06, "loss": 0.6134, "step": 3319 }, { "epoch": 0.039694400937361755, "grad_norm": 2.4889464378356934, "learning_rate": 9.997546709182446e-06, "loss": 0.5857, "step": 3320 }, { "epoch": 0.03970635708222241, "grad_norm": 2.0820677280426025, "learning_rate": 9.99754064093571e-06, "loss": 0.7428, "step": 3321 }, { "epoch": 0.03971831322708306, "grad_norm": 6.201931476593018, "learning_rate": 9.997534565195143e-06, "loss": 0.6016, "step": 3322 }, { "epoch": 0.03973026937194371, "grad_norm": 2.9267873764038086, "learning_rate": 9.997528481960755e-06, "loss": 0.667, "step": 3323 }, { "epoch": 0.03974222551680436, "grad_norm": 1.8727079629898071, "learning_rate": 9.997522391232555e-06, "loss": 0.6185, "step": 3324 }, { "epoch": 0.039754181661665014, "grad_norm": 2.1358799934387207, "learning_rate": 9.997516293010554e-06, "loss": 0.7254, "step": 3325 }, { "epoch": 0.039766137806525666, "grad_norm": 4.247419357299805, "learning_rate": 9.99751018729476e-06, "loss": 0.755, "step": 3326 }, { "epoch": 0.03977809395138632, "grad_norm": 2.6248607635498047, "learning_rate": 9.997504074085179e-06, "loss": 0.6273, "step": 3327 }, { "epoch": 0.03979005009624697, "grad_norm": 2.7870421409606934, "learning_rate": 9.997497953381826e-06, "loss": 0.6668, "step": 3328 }, { "epoch": 0.039802006241107614, "grad_norm": 7.982061862945557, "learning_rate": 9.997491825184704e-06, "loss": 0.6971, "step": 3329 }, { "epoch": 0.039813962385968266, "grad_norm": 3.4910764694213867, "learning_rate": 9.997485689493826e-06, "loss": 0.5835, "step": 3330 }, { "epoch": 0.03982591853082892, "grad_norm": 26.964078903198242, "learning_rate": 9.997479546309201e-06, "loss": 0.5191, "step": 3331 }, { "epoch": 0.03983787467568957, "grad_norm": 2.557546615600586, "learning_rate": 9.997473395630838e-06, "loss": 0.6588, "step": 3332 }, { "epoch": 0.03984983082055022, "grad_norm": 1.9840587377548218, "learning_rate": 9.997467237458746e-06, "loss": 0.707, "step": 3333 }, { "epoch": 0.03986178696541087, "grad_norm": 2.466747999191284, "learning_rate": 9.997461071792932e-06, "loss": 0.6407, "step": 3334 }, { "epoch": 0.039873743110271524, "grad_norm": 2.458381414413452, "learning_rate": 9.99745489863341e-06, "loss": 0.6692, "step": 3335 }, { "epoch": 0.039885699255132176, "grad_norm": 3.5417494773864746, "learning_rate": 9.997448717980186e-06, "loss": 0.7096, "step": 3336 }, { "epoch": 0.03989765539999283, "grad_norm": 1.9473538398742676, "learning_rate": 9.99744252983327e-06, "loss": 0.5713, "step": 3337 }, { "epoch": 0.03990961154485348, "grad_norm": 2.290834903717041, "learning_rate": 9.99743633419267e-06, "loss": 0.6729, "step": 3338 }, { "epoch": 0.03992156768971413, "grad_norm": 2.764821767807007, "learning_rate": 9.997430131058398e-06, "loss": 0.6217, "step": 3339 }, { "epoch": 0.03993352383457478, "grad_norm": 3.297168731689453, "learning_rate": 9.99742392043046e-06, "loss": 0.6138, "step": 3340 }, { "epoch": 0.03994547997943543, "grad_norm": 2.378842830657959, "learning_rate": 9.997417702308869e-06, "loss": 0.6482, "step": 3341 }, { "epoch": 0.03995743612429608, "grad_norm": 2.3685977458953857, "learning_rate": 9.997411476693631e-06, "loss": 0.5583, "step": 3342 }, { "epoch": 0.03996939226915673, "grad_norm": 10.94512939453125, "learning_rate": 9.997405243584757e-06, "loss": 0.6748, "step": 3343 }, { "epoch": 0.03998134841401738, "grad_norm": 3.767624616622925, "learning_rate": 9.997399002982255e-06, "loss": 0.5919, "step": 3344 }, { "epoch": 0.039993304558878034, "grad_norm": 1.8496938943862915, "learning_rate": 9.997392754886137e-06, "loss": 0.6204, "step": 3345 }, { "epoch": 0.040005260703738686, "grad_norm": 2.4548072814941406, "learning_rate": 9.99738649929641e-06, "loss": 0.6221, "step": 3346 }, { "epoch": 0.04001721684859934, "grad_norm": 2.822194814682007, "learning_rate": 9.997380236213086e-06, "loss": 0.5333, "step": 3347 }, { "epoch": 0.04002917299345999, "grad_norm": 3.3473963737487793, "learning_rate": 9.99737396563617e-06, "loss": 0.6492, "step": 3348 }, { "epoch": 0.04004112913832064, "grad_norm": 3.5224742889404297, "learning_rate": 9.997367687565676e-06, "loss": 0.6037, "step": 3349 }, { "epoch": 0.04005308528318129, "grad_norm": 1.6891463994979858, "learning_rate": 9.99736140200161e-06, "loss": 0.6377, "step": 3350 }, { "epoch": 0.040065041428041945, "grad_norm": 1.5085369348526, "learning_rate": 9.997355108943984e-06, "loss": 0.609, "step": 3351 }, { "epoch": 0.040076997572902597, "grad_norm": 2.4327166080474854, "learning_rate": 9.997348808392806e-06, "loss": 0.6635, "step": 3352 }, { "epoch": 0.04008895371776324, "grad_norm": 3.5197560787200928, "learning_rate": 9.997342500348084e-06, "loss": 0.6443, "step": 3353 }, { "epoch": 0.04010090986262389, "grad_norm": 8.336867332458496, "learning_rate": 9.997336184809831e-06, "loss": 0.6209, "step": 3354 }, { "epoch": 0.040112866007484545, "grad_norm": 3.5589334964752197, "learning_rate": 9.997329861778054e-06, "loss": 0.5983, "step": 3355 }, { "epoch": 0.040124822152345196, "grad_norm": 3.086358070373535, "learning_rate": 9.997323531252764e-06, "loss": 0.6211, "step": 3356 }, { "epoch": 0.04013677829720585, "grad_norm": 3.9778263568878174, "learning_rate": 9.997317193233968e-06, "loss": 0.6525, "step": 3357 }, { "epoch": 0.0401487344420665, "grad_norm": 6.095578193664551, "learning_rate": 9.997310847721679e-06, "loss": 0.681, "step": 3358 }, { "epoch": 0.04016069058692715, "grad_norm": 7.306616306304932, "learning_rate": 9.997304494715902e-06, "loss": 0.5932, "step": 3359 }, { "epoch": 0.0401726467317878, "grad_norm": 3.877995729446411, "learning_rate": 9.997298134216651e-06, "loss": 0.6304, "step": 3360 }, { "epoch": 0.040184602876648455, "grad_norm": 3.102872133255005, "learning_rate": 9.997291766223933e-06, "loss": 0.7222, "step": 3361 }, { "epoch": 0.04019655902150911, "grad_norm": 3.4543545246124268, "learning_rate": 9.99728539073776e-06, "loss": 0.6849, "step": 3362 }, { "epoch": 0.04020851516636976, "grad_norm": 14.648721694946289, "learning_rate": 9.997279007758138e-06, "loss": 0.7108, "step": 3363 }, { "epoch": 0.04022047131123041, "grad_norm": 4.660802364349365, "learning_rate": 9.99727261728508e-06, "loss": 0.6647, "step": 3364 }, { "epoch": 0.040232427456091055, "grad_norm": 2.163905382156372, "learning_rate": 9.99726621931859e-06, "loss": 0.5247, "step": 3365 }, { "epoch": 0.04024438360095171, "grad_norm": 2.874906063079834, "learning_rate": 9.997259813858686e-06, "loss": 0.6481, "step": 3366 }, { "epoch": 0.04025633974581236, "grad_norm": 1.993010401725769, "learning_rate": 9.997253400905371e-06, "loss": 0.5835, "step": 3367 }, { "epoch": 0.04026829589067301, "grad_norm": 2.600632667541504, "learning_rate": 9.997246980458657e-06, "loss": 0.6273, "step": 3368 }, { "epoch": 0.04028025203553366, "grad_norm": 3.088770627975464, "learning_rate": 9.997240552518554e-06, "loss": 0.5834, "step": 3369 }, { "epoch": 0.040292208180394314, "grad_norm": 1.6287038326263428, "learning_rate": 9.99723411708507e-06, "loss": 0.6324, "step": 3370 }, { "epoch": 0.040304164325254965, "grad_norm": 2.5592234134674072, "learning_rate": 9.997227674158216e-06, "loss": 0.722, "step": 3371 }, { "epoch": 0.04031612047011562, "grad_norm": 2.0013582706451416, "learning_rate": 9.997221223738002e-06, "loss": 0.7129, "step": 3372 }, { "epoch": 0.04032807661497627, "grad_norm": 2.1012165546417236, "learning_rate": 9.997214765824437e-06, "loss": 0.5993, "step": 3373 }, { "epoch": 0.04034003275983692, "grad_norm": 2.271540641784668, "learning_rate": 9.99720830041753e-06, "loss": 0.5899, "step": 3374 }, { "epoch": 0.04035198890469757, "grad_norm": 4.42039680480957, "learning_rate": 9.99720182751729e-06, "loss": 0.5763, "step": 3375 }, { "epoch": 0.040363945049558224, "grad_norm": 2.108081817626953, "learning_rate": 9.997195347123732e-06, "loss": 0.6009, "step": 3376 }, { "epoch": 0.04037590119441887, "grad_norm": 7.1400580406188965, "learning_rate": 9.99718885923686e-06, "loss": 0.6557, "step": 3377 }, { "epoch": 0.04038785733927952, "grad_norm": 9.168570518493652, "learning_rate": 9.997182363856683e-06, "loss": 0.6304, "step": 3378 }, { "epoch": 0.04039981348414017, "grad_norm": 2.1661319732666016, "learning_rate": 9.997175860983217e-06, "loss": 0.7044, "step": 3379 }, { "epoch": 0.040411769629000824, "grad_norm": 2.7652647495269775, "learning_rate": 9.997169350616466e-06, "loss": 0.6463, "step": 3380 }, { "epoch": 0.040423725773861476, "grad_norm": 3.146484375, "learning_rate": 9.997162832756442e-06, "loss": 0.6335, "step": 3381 }, { "epoch": 0.04043568191872213, "grad_norm": 3.1660540103912354, "learning_rate": 9.997156307403155e-06, "loss": 0.7061, "step": 3382 }, { "epoch": 0.04044763806358278, "grad_norm": 2.41609525680542, "learning_rate": 9.997149774556614e-06, "loss": 0.5923, "step": 3383 }, { "epoch": 0.04045959420844343, "grad_norm": 2.3338446617126465, "learning_rate": 9.99714323421683e-06, "loss": 0.6694, "step": 3384 }, { "epoch": 0.04047155035330408, "grad_norm": 3.563342571258545, "learning_rate": 9.99713668638381e-06, "loss": 0.6145, "step": 3385 }, { "epoch": 0.040483506498164734, "grad_norm": 1.5221203565597534, "learning_rate": 9.997130131057569e-06, "loss": 0.6156, "step": 3386 }, { "epoch": 0.040495462643025386, "grad_norm": 2.298593521118164, "learning_rate": 9.997123568238111e-06, "loss": 0.6819, "step": 3387 }, { "epoch": 0.04050741878788603, "grad_norm": 4.111684322357178, "learning_rate": 9.99711699792545e-06, "loss": 0.6341, "step": 3388 }, { "epoch": 0.04051937493274668, "grad_norm": 1.7223926782608032, "learning_rate": 9.99711042011959e-06, "loss": 0.6461, "step": 3389 }, { "epoch": 0.040531331077607334, "grad_norm": 2.3087821006774902, "learning_rate": 9.997103834820551e-06, "loss": 0.6205, "step": 3390 }, { "epoch": 0.040543287222467986, "grad_norm": 3.3493688106536865, "learning_rate": 9.997097242028334e-06, "loss": 0.634, "step": 3391 }, { "epoch": 0.04055524336732864, "grad_norm": 3.534496545791626, "learning_rate": 9.997090641742953e-06, "loss": 0.6929, "step": 3392 }, { "epoch": 0.04056719951218929, "grad_norm": 2.9574763774871826, "learning_rate": 9.997084033964416e-06, "loss": 0.6499, "step": 3393 }, { "epoch": 0.04057915565704994, "grad_norm": 1.6005213260650635, "learning_rate": 9.997077418692734e-06, "loss": 0.6146, "step": 3394 }, { "epoch": 0.04059111180191059, "grad_norm": 3.4733994007110596, "learning_rate": 9.997070795927917e-06, "loss": 0.5863, "step": 3395 }, { "epoch": 0.040603067946771244, "grad_norm": 1.714982271194458, "learning_rate": 9.997064165669974e-06, "loss": 0.6199, "step": 3396 }, { "epoch": 0.040615024091631896, "grad_norm": 2.3400487899780273, "learning_rate": 9.997057527918916e-06, "loss": 0.6452, "step": 3397 }, { "epoch": 0.04062698023649255, "grad_norm": 2.748823642730713, "learning_rate": 9.997050882674753e-06, "loss": 0.5815, "step": 3398 }, { "epoch": 0.0406389363813532, "grad_norm": 1.6902377605438232, "learning_rate": 9.997044229937493e-06, "loss": 0.5898, "step": 3399 }, { "epoch": 0.040650892526213844, "grad_norm": 1.739357352256775, "learning_rate": 9.99703756970715e-06, "loss": 0.569, "step": 3400 }, { "epoch": 0.040662848671074496, "grad_norm": 3.702754020690918, "learning_rate": 9.997030901983728e-06, "loss": 0.6904, "step": 3401 }, { "epoch": 0.04067480481593515, "grad_norm": 4.563330173492432, "learning_rate": 9.997024226767242e-06, "loss": 0.7119, "step": 3402 }, { "epoch": 0.0406867609607958, "grad_norm": 4.04089879989624, "learning_rate": 9.9970175440577e-06, "loss": 0.6414, "step": 3403 }, { "epoch": 0.04069871710565645, "grad_norm": 1.9022358655929565, "learning_rate": 9.997010853855113e-06, "loss": 0.6856, "step": 3404 }, { "epoch": 0.0407106732505171, "grad_norm": 1.439231276512146, "learning_rate": 9.997004156159489e-06, "loss": 0.5708, "step": 3405 }, { "epoch": 0.040722629395377755, "grad_norm": 2.9525575637817383, "learning_rate": 9.996997450970842e-06, "loss": 0.6845, "step": 3406 }, { "epoch": 0.040734585540238406, "grad_norm": 2.038160562515259, "learning_rate": 9.996990738289179e-06, "loss": 0.6082, "step": 3407 }, { "epoch": 0.04074654168509906, "grad_norm": 4.195492744445801, "learning_rate": 9.99698401811451e-06, "loss": 0.5822, "step": 3408 }, { "epoch": 0.04075849782995971, "grad_norm": 3.3849873542785645, "learning_rate": 9.996977290446845e-06, "loss": 0.694, "step": 3409 }, { "epoch": 0.04077045397482036, "grad_norm": 2.3608717918395996, "learning_rate": 9.996970555286195e-06, "loss": 0.7334, "step": 3410 }, { "epoch": 0.04078241011968101, "grad_norm": 2.088226556777954, "learning_rate": 9.99696381263257e-06, "loss": 0.7415, "step": 3411 }, { "epoch": 0.04079436626454166, "grad_norm": 3.5665054321289062, "learning_rate": 9.996957062485981e-06, "loss": 0.6126, "step": 3412 }, { "epoch": 0.04080632240940231, "grad_norm": 2.6549432277679443, "learning_rate": 9.996950304846437e-06, "loss": 0.7193, "step": 3413 }, { "epoch": 0.04081827855426296, "grad_norm": 2.807872772216797, "learning_rate": 9.996943539713948e-06, "loss": 0.6216, "step": 3414 }, { "epoch": 0.04083023469912361, "grad_norm": 3.3379602432250977, "learning_rate": 9.996936767088523e-06, "loss": 0.6586, "step": 3415 }, { "epoch": 0.040842190843984265, "grad_norm": 3.1137259006500244, "learning_rate": 9.996929986970175e-06, "loss": 0.6045, "step": 3416 }, { "epoch": 0.04085414698884492, "grad_norm": 3.5144734382629395, "learning_rate": 9.996923199358912e-06, "loss": 0.6064, "step": 3417 }, { "epoch": 0.04086610313370557, "grad_norm": 1.9693506956100464, "learning_rate": 9.996916404254746e-06, "loss": 0.65, "step": 3418 }, { "epoch": 0.04087805927856622, "grad_norm": 2.9329042434692383, "learning_rate": 9.996909601657684e-06, "loss": 0.6762, "step": 3419 }, { "epoch": 0.04089001542342687, "grad_norm": 2.301943063735962, "learning_rate": 9.99690279156774e-06, "loss": 0.6784, "step": 3420 }, { "epoch": 0.04090197156828752, "grad_norm": 1.9971978664398193, "learning_rate": 9.996895973984924e-06, "loss": 0.6483, "step": 3421 }, { "epoch": 0.040913927713148175, "grad_norm": 1.7804374694824219, "learning_rate": 9.996889148909242e-06, "loss": 0.5594, "step": 3422 }, { "epoch": 0.04092588385800883, "grad_norm": 4.045444011688232, "learning_rate": 9.996882316340709e-06, "loss": 0.7072, "step": 3423 }, { "epoch": 0.04093784000286947, "grad_norm": 2.8823132514953613, "learning_rate": 9.99687547627933e-06, "loss": 0.6967, "step": 3424 }, { "epoch": 0.04094979614773012, "grad_norm": 4.152271270751953, "learning_rate": 9.996868628725121e-06, "loss": 0.6552, "step": 3425 }, { "epoch": 0.040961752292590775, "grad_norm": 1.8859676122665405, "learning_rate": 9.99686177367809e-06, "loss": 0.7696, "step": 3426 }, { "epoch": 0.04097370843745143, "grad_norm": 15.314352035522461, "learning_rate": 9.996854911138245e-06, "loss": 0.5634, "step": 3427 }, { "epoch": 0.04098566458231208, "grad_norm": 2.687213182449341, "learning_rate": 9.9968480411056e-06, "loss": 0.623, "step": 3428 }, { "epoch": 0.04099762072717273, "grad_norm": 2.619744062423706, "learning_rate": 9.996841163580164e-06, "loss": 0.6572, "step": 3429 }, { "epoch": 0.04100957687203338, "grad_norm": 2.50750994682312, "learning_rate": 9.996834278561945e-06, "loss": 0.5755, "step": 3430 }, { "epoch": 0.041021533016894034, "grad_norm": 1.661442756652832, "learning_rate": 9.996827386050957e-06, "loss": 0.5959, "step": 3431 }, { "epoch": 0.041033489161754685, "grad_norm": 3.8441286087036133, "learning_rate": 9.996820486047207e-06, "loss": 0.7802, "step": 3432 }, { "epoch": 0.04104544530661534, "grad_norm": 2.639307975769043, "learning_rate": 9.996813578550707e-06, "loss": 0.5638, "step": 3433 }, { "epoch": 0.04105740145147599, "grad_norm": 2.6745054721832275, "learning_rate": 9.99680666356147e-06, "loss": 0.5303, "step": 3434 }, { "epoch": 0.04106935759633664, "grad_norm": 2.3834822177886963, "learning_rate": 9.996799741079501e-06, "loss": 0.5833, "step": 3435 }, { "epoch": 0.041081313741197285, "grad_norm": 1.7173179388046265, "learning_rate": 9.996792811104814e-06, "loss": 0.7265, "step": 3436 }, { "epoch": 0.04109326988605794, "grad_norm": 2.799539089202881, "learning_rate": 9.996785873637418e-06, "loss": 0.7095, "step": 3437 }, { "epoch": 0.04110522603091859, "grad_norm": 2.7774715423583984, "learning_rate": 9.996778928677323e-06, "loss": 0.637, "step": 3438 }, { "epoch": 0.04111718217577924, "grad_norm": 2.156935214996338, "learning_rate": 9.996771976224543e-06, "loss": 0.6352, "step": 3439 }, { "epoch": 0.04112913832063989, "grad_norm": 3.9171855449676514, "learning_rate": 9.996765016279084e-06, "loss": 0.6457, "step": 3440 }, { "epoch": 0.041141094465500544, "grad_norm": 3.560581684112549, "learning_rate": 9.996758048840957e-06, "loss": 0.5816, "step": 3441 }, { "epoch": 0.041153050610361196, "grad_norm": 1.9474072456359863, "learning_rate": 9.996751073910175e-06, "loss": 0.6841, "step": 3442 }, { "epoch": 0.04116500675522185, "grad_norm": 2.695364475250244, "learning_rate": 9.996744091486747e-06, "loss": 0.6944, "step": 3443 }, { "epoch": 0.0411769629000825, "grad_norm": 2.251676082611084, "learning_rate": 9.996737101570682e-06, "loss": 0.5721, "step": 3444 }, { "epoch": 0.04118891904494315, "grad_norm": 2.542073965072632, "learning_rate": 9.996730104161994e-06, "loss": 0.6256, "step": 3445 }, { "epoch": 0.0412008751898038, "grad_norm": 4.213515758514404, "learning_rate": 9.996723099260691e-06, "loss": 0.5945, "step": 3446 }, { "epoch": 0.041212831334664454, "grad_norm": 1.883506178855896, "learning_rate": 9.996716086866782e-06, "loss": 0.6143, "step": 3447 }, { "epoch": 0.0412247874795251, "grad_norm": 7.207956790924072, "learning_rate": 9.99670906698028e-06, "loss": 0.5683, "step": 3448 }, { "epoch": 0.04123674362438575, "grad_norm": 2.571798801422119, "learning_rate": 9.996702039601197e-06, "loss": 0.626, "step": 3449 }, { "epoch": 0.0412486997692464, "grad_norm": 2.5457658767700195, "learning_rate": 9.99669500472954e-06, "loss": 0.64, "step": 3450 }, { "epoch": 0.041260655914107054, "grad_norm": 2.7415037155151367, "learning_rate": 9.99668796236532e-06, "loss": 0.631, "step": 3451 }, { "epoch": 0.041272612058967706, "grad_norm": 1.8740036487579346, "learning_rate": 9.996680912508552e-06, "loss": 0.6125, "step": 3452 }, { "epoch": 0.04128456820382836, "grad_norm": 2.960400342941284, "learning_rate": 9.99667385515924e-06, "loss": 0.6814, "step": 3453 }, { "epoch": 0.04129652434868901, "grad_norm": 1.896972894668579, "learning_rate": 9.9966667903174e-06, "loss": 0.6133, "step": 3454 }, { "epoch": 0.04130848049354966, "grad_norm": 5.006804943084717, "learning_rate": 9.996659717983039e-06, "loss": 0.6995, "step": 3455 }, { "epoch": 0.04132043663841031, "grad_norm": 22.725675582885742, "learning_rate": 9.99665263815617e-06, "loss": 0.6396, "step": 3456 }, { "epoch": 0.041332392783270964, "grad_norm": 1.8337699174880981, "learning_rate": 9.996645550836803e-06, "loss": 0.6632, "step": 3457 }, { "epoch": 0.041344348928131616, "grad_norm": 1.8380496501922607, "learning_rate": 9.996638456024947e-06, "loss": 0.6036, "step": 3458 }, { "epoch": 0.04135630507299226, "grad_norm": 3.4797613620758057, "learning_rate": 9.996631353720613e-06, "loss": 0.5581, "step": 3459 }, { "epoch": 0.04136826121785291, "grad_norm": 4.267897129058838, "learning_rate": 9.996624243923814e-06, "loss": 0.6511, "step": 3460 }, { "epoch": 0.041380217362713564, "grad_norm": 7.625946521759033, "learning_rate": 9.99661712663456e-06, "loss": 0.6484, "step": 3461 }, { "epoch": 0.041392173507574216, "grad_norm": 3.4290788173675537, "learning_rate": 9.99661000185286e-06, "loss": 0.6182, "step": 3462 }, { "epoch": 0.04140412965243487, "grad_norm": 1.7795453071594238, "learning_rate": 9.996602869578724e-06, "loss": 0.621, "step": 3463 }, { "epoch": 0.04141608579729552, "grad_norm": 2.3227665424346924, "learning_rate": 9.996595729812166e-06, "loss": 0.6797, "step": 3464 }, { "epoch": 0.04142804194215617, "grad_norm": 12.194140434265137, "learning_rate": 9.996588582553195e-06, "loss": 0.6928, "step": 3465 }, { "epoch": 0.04143999808701682, "grad_norm": 2.254779577255249, "learning_rate": 9.996581427801822e-06, "loss": 0.5612, "step": 3466 }, { "epoch": 0.041451954231877475, "grad_norm": 2.8648178577423096, "learning_rate": 9.996574265558055e-06, "loss": 0.6134, "step": 3467 }, { "epoch": 0.041463910376738126, "grad_norm": 2.598484992980957, "learning_rate": 9.99656709582191e-06, "loss": 0.549, "step": 3468 }, { "epoch": 0.04147586652159878, "grad_norm": 2.4689502716064453, "learning_rate": 9.996559918593392e-06, "loss": 0.6624, "step": 3469 }, { "epoch": 0.04148782266645943, "grad_norm": 2.2675552368164062, "learning_rate": 9.996552733872517e-06, "loss": 0.5711, "step": 3470 }, { "epoch": 0.041499778811320075, "grad_norm": 2.095889091491699, "learning_rate": 9.996545541659292e-06, "loss": 0.6944, "step": 3471 }, { "epoch": 0.041511734956180726, "grad_norm": 1.8997561931610107, "learning_rate": 9.99653834195373e-06, "loss": 0.5701, "step": 3472 }, { "epoch": 0.04152369110104138, "grad_norm": 2.369982957839966, "learning_rate": 9.99653113475584e-06, "loss": 0.6768, "step": 3473 }, { "epoch": 0.04153564724590203, "grad_norm": 2.031592607498169, "learning_rate": 9.996523920065635e-06, "loss": 0.6205, "step": 3474 }, { "epoch": 0.04154760339076268, "grad_norm": 2.4496490955352783, "learning_rate": 9.996516697883126e-06, "loss": 0.6797, "step": 3475 }, { "epoch": 0.04155955953562333, "grad_norm": 1.7435052394866943, "learning_rate": 9.99650946820832e-06, "loss": 0.6586, "step": 3476 }, { "epoch": 0.041571515680483985, "grad_norm": 11.566227912902832, "learning_rate": 9.99650223104123e-06, "loss": 0.6809, "step": 3477 }, { "epoch": 0.04158347182534464, "grad_norm": 1.7942993640899658, "learning_rate": 9.996494986381868e-06, "loss": 0.5967, "step": 3478 }, { "epoch": 0.04159542797020529, "grad_norm": 5.024359226226807, "learning_rate": 9.996487734230245e-06, "loss": 0.6185, "step": 3479 }, { "epoch": 0.04160738411506594, "grad_norm": 1.3995225429534912, "learning_rate": 9.99648047458637e-06, "loss": 0.5948, "step": 3480 }, { "epoch": 0.04161934025992659, "grad_norm": 2.8233141899108887, "learning_rate": 9.996473207450255e-06, "loss": 0.554, "step": 3481 }, { "epoch": 0.041631296404787244, "grad_norm": 1.8156639337539673, "learning_rate": 9.99646593282191e-06, "loss": 0.5985, "step": 3482 }, { "epoch": 0.04164325254964789, "grad_norm": 2.2980268001556396, "learning_rate": 9.996458650701345e-06, "loss": 0.6658, "step": 3483 }, { "epoch": 0.04165520869450854, "grad_norm": 4.437527656555176, "learning_rate": 9.996451361088576e-06, "loss": 0.6377, "step": 3484 }, { "epoch": 0.04166716483936919, "grad_norm": 2.208730459213257, "learning_rate": 9.996444063983609e-06, "loss": 0.5628, "step": 3485 }, { "epoch": 0.041679120984229844, "grad_norm": 1.538411021232605, "learning_rate": 9.996436759386457e-06, "loss": 0.606, "step": 3486 }, { "epoch": 0.041691077129090495, "grad_norm": 3.322643995285034, "learning_rate": 9.99642944729713e-06, "loss": 0.4773, "step": 3487 }, { "epoch": 0.04170303327395115, "grad_norm": 3.10516095161438, "learning_rate": 9.996422127715638e-06, "loss": 0.7207, "step": 3488 }, { "epoch": 0.0417149894188118, "grad_norm": 1.9444342851638794, "learning_rate": 9.996414800641994e-06, "loss": 0.6502, "step": 3489 }, { "epoch": 0.04172694556367245, "grad_norm": 2.4518582820892334, "learning_rate": 9.996407466076207e-06, "loss": 0.7104, "step": 3490 }, { "epoch": 0.0417389017085331, "grad_norm": 2.0467021465301514, "learning_rate": 9.996400124018291e-06, "loss": 0.6235, "step": 3491 }, { "epoch": 0.041750857853393754, "grad_norm": 5.338706016540527, "learning_rate": 9.996392774468256e-06, "loss": 0.5563, "step": 3492 }, { "epoch": 0.041762813998254406, "grad_norm": 3.125300407409668, "learning_rate": 9.99638541742611e-06, "loss": 0.6045, "step": 3493 }, { "epoch": 0.04177477014311506, "grad_norm": 4.171721458435059, "learning_rate": 9.996378052891869e-06, "loss": 0.6573, "step": 3494 }, { "epoch": 0.0417867262879757, "grad_norm": 2.519282579421997, "learning_rate": 9.99637068086554e-06, "loss": 0.6692, "step": 3495 }, { "epoch": 0.041798682432836354, "grad_norm": 1.7942880392074585, "learning_rate": 9.996363301347135e-06, "loss": 0.5875, "step": 3496 }, { "epoch": 0.041810638577697005, "grad_norm": 3.0873615741729736, "learning_rate": 9.996355914336666e-06, "loss": 0.6732, "step": 3497 }, { "epoch": 0.04182259472255766, "grad_norm": 1.9570162296295166, "learning_rate": 9.996348519834143e-06, "loss": 0.6792, "step": 3498 }, { "epoch": 0.04183455086741831, "grad_norm": 1.7848362922668457, "learning_rate": 9.996341117839578e-06, "loss": 0.6907, "step": 3499 }, { "epoch": 0.04184650701227896, "grad_norm": 1.864080786705017, "learning_rate": 9.996333708352981e-06, "loss": 0.518, "step": 3500 }, { "epoch": 0.04185846315713961, "grad_norm": 2.6498491764068604, "learning_rate": 9.996326291374364e-06, "loss": 0.599, "step": 3501 }, { "epoch": 0.041870419302000264, "grad_norm": 2.870481252670288, "learning_rate": 9.996318866903738e-06, "loss": 0.542, "step": 3502 }, { "epoch": 0.041882375446860916, "grad_norm": 13.385187149047852, "learning_rate": 9.996311434941115e-06, "loss": 0.6454, "step": 3503 }, { "epoch": 0.04189433159172157, "grad_norm": 5.667015552520752, "learning_rate": 9.996303995486504e-06, "loss": 0.5956, "step": 3504 }, { "epoch": 0.04190628773658222, "grad_norm": 2.625899076461792, "learning_rate": 9.99629654853992e-06, "loss": 0.7174, "step": 3505 }, { "epoch": 0.04191824388144287, "grad_norm": 1.8077946901321411, "learning_rate": 9.996289094101369e-06, "loss": 0.6023, "step": 3506 }, { "epoch": 0.041930200026303516, "grad_norm": 1.8331620693206787, "learning_rate": 9.996281632170866e-06, "loss": 0.6999, "step": 3507 }, { "epoch": 0.04194215617116417, "grad_norm": 5.299564838409424, "learning_rate": 9.996274162748421e-06, "loss": 0.7547, "step": 3508 }, { "epoch": 0.04195411231602482, "grad_norm": 1.5562200546264648, "learning_rate": 9.996266685834044e-06, "loss": 0.4581, "step": 3509 }, { "epoch": 0.04196606846088547, "grad_norm": 3.057507038116455, "learning_rate": 9.996259201427747e-06, "loss": 0.628, "step": 3510 }, { "epoch": 0.04197802460574612, "grad_norm": 2.2751379013061523, "learning_rate": 9.996251709529543e-06, "loss": 0.6105, "step": 3511 }, { "epoch": 0.041989980750606774, "grad_norm": 3.424199104309082, "learning_rate": 9.996244210139441e-06, "loss": 0.654, "step": 3512 }, { "epoch": 0.042001936895467426, "grad_norm": 1.948136806488037, "learning_rate": 9.996236703257454e-06, "loss": 0.6155, "step": 3513 }, { "epoch": 0.04201389304032808, "grad_norm": 2.6257927417755127, "learning_rate": 9.99622918888359e-06, "loss": 0.5853, "step": 3514 }, { "epoch": 0.04202584918518873, "grad_norm": 3.3242249488830566, "learning_rate": 9.996221667017865e-06, "loss": 0.5847, "step": 3515 }, { "epoch": 0.04203780533004938, "grad_norm": 2.716691732406616, "learning_rate": 9.996214137660287e-06, "loss": 0.7044, "step": 3516 }, { "epoch": 0.04204976147491003, "grad_norm": 1.7949135303497314, "learning_rate": 9.996206600810868e-06, "loss": 0.659, "step": 3517 }, { "epoch": 0.04206171761977068, "grad_norm": 12.882911682128906, "learning_rate": 9.99619905646962e-06, "loss": 0.7219, "step": 3518 }, { "epoch": 0.04207367376463133, "grad_norm": 3.5204367637634277, "learning_rate": 9.996191504636553e-06, "loss": 0.7361, "step": 3519 }, { "epoch": 0.04208562990949198, "grad_norm": 2.032014846801758, "learning_rate": 9.996183945311679e-06, "loss": 0.642, "step": 3520 }, { "epoch": 0.04209758605435263, "grad_norm": 1.8460389375686646, "learning_rate": 9.996176378495011e-06, "loss": 0.5746, "step": 3521 }, { "epoch": 0.042109542199213285, "grad_norm": 1.8885018825531006, "learning_rate": 9.996168804186556e-06, "loss": 0.5275, "step": 3522 }, { "epoch": 0.042121498344073936, "grad_norm": 9.162095069885254, "learning_rate": 9.996161222386331e-06, "loss": 0.6205, "step": 3523 }, { "epoch": 0.04213345448893459, "grad_norm": 3.6782498359680176, "learning_rate": 9.996153633094343e-06, "loss": 0.6725, "step": 3524 }, { "epoch": 0.04214541063379524, "grad_norm": 1.84982430934906, "learning_rate": 9.996146036310605e-06, "loss": 0.6377, "step": 3525 }, { "epoch": 0.04215736677865589, "grad_norm": 1.6995548009872437, "learning_rate": 9.996138432035128e-06, "loss": 0.5767, "step": 3526 }, { "epoch": 0.04216932292351654, "grad_norm": 5.233714580535889, "learning_rate": 9.996130820267924e-06, "loss": 0.6414, "step": 3527 }, { "epoch": 0.042181279068377195, "grad_norm": 2.7908551692962646, "learning_rate": 9.996123201009004e-06, "loss": 0.7306, "step": 3528 }, { "epoch": 0.04219323521323785, "grad_norm": 1.9750062227249146, "learning_rate": 9.99611557425838e-06, "loss": 0.6446, "step": 3529 }, { "epoch": 0.04220519135809849, "grad_norm": 2.4040703773498535, "learning_rate": 9.996107940016062e-06, "loss": 0.6757, "step": 3530 }, { "epoch": 0.04221714750295914, "grad_norm": 2.03652286529541, "learning_rate": 9.996100298282064e-06, "loss": 0.6065, "step": 3531 }, { "epoch": 0.042229103647819795, "grad_norm": 1.7851072549819946, "learning_rate": 9.996092649056394e-06, "loss": 0.6655, "step": 3532 }, { "epoch": 0.042241059792680447, "grad_norm": 2.237440347671509, "learning_rate": 9.996084992339066e-06, "loss": 0.6102, "step": 3533 }, { "epoch": 0.0422530159375411, "grad_norm": 8.04793930053711, "learning_rate": 9.996077328130092e-06, "loss": 0.6141, "step": 3534 }, { "epoch": 0.04226497208240175, "grad_norm": 2.0128252506256104, "learning_rate": 9.996069656429481e-06, "loss": 0.5532, "step": 3535 }, { "epoch": 0.0422769282272624, "grad_norm": 3.4756691455841064, "learning_rate": 9.996061977237247e-06, "loss": 0.5644, "step": 3536 }, { "epoch": 0.04228888437212305, "grad_norm": 2.3354485034942627, "learning_rate": 9.996054290553399e-06, "loss": 0.5889, "step": 3537 }, { "epoch": 0.042300840516983705, "grad_norm": 3.369069814682007, "learning_rate": 9.99604659637795e-06, "loss": 0.6447, "step": 3538 }, { "epoch": 0.04231279666184436, "grad_norm": 2.0693917274475098, "learning_rate": 9.996038894710912e-06, "loss": 0.5575, "step": 3539 }, { "epoch": 0.04232475280670501, "grad_norm": 2.2888853549957275, "learning_rate": 9.996031185552293e-06, "loss": 0.6567, "step": 3540 }, { "epoch": 0.04233670895156566, "grad_norm": 5.941481590270996, "learning_rate": 9.99602346890211e-06, "loss": 0.715, "step": 3541 }, { "epoch": 0.042348665096426305, "grad_norm": 1.696004867553711, "learning_rate": 9.996015744760373e-06, "loss": 0.5494, "step": 3542 }, { "epoch": 0.04236062124128696, "grad_norm": 3.493809938430786, "learning_rate": 9.99600801312709e-06, "loss": 0.6351, "step": 3543 }, { "epoch": 0.04237257738614761, "grad_norm": 2.2271625995635986, "learning_rate": 9.996000274002277e-06, "loss": 0.5372, "step": 3544 }, { "epoch": 0.04238453353100826, "grad_norm": 2.504328727722168, "learning_rate": 9.995992527385946e-06, "loss": 0.6359, "step": 3545 }, { "epoch": 0.04239648967586891, "grad_norm": 2.9219348430633545, "learning_rate": 9.995984773278102e-06, "loss": 0.6133, "step": 3546 }, { "epoch": 0.042408445820729564, "grad_norm": 2.009822368621826, "learning_rate": 9.995977011678764e-06, "loss": 0.6553, "step": 3547 }, { "epoch": 0.042420401965590215, "grad_norm": 5.189429759979248, "learning_rate": 9.99596924258794e-06, "loss": 0.7008, "step": 3548 }, { "epoch": 0.04243235811045087, "grad_norm": 6.124334335327148, "learning_rate": 9.99596146600564e-06, "loss": 0.7052, "step": 3549 }, { "epoch": 0.04244431425531152, "grad_norm": 2.2421257495880127, "learning_rate": 9.995953681931882e-06, "loss": 0.6057, "step": 3550 }, { "epoch": 0.04245627040017217, "grad_norm": 2.3211207389831543, "learning_rate": 9.995945890366672e-06, "loss": 0.6096, "step": 3551 }, { "epoch": 0.04246822654503282, "grad_norm": 2.978369951248169, "learning_rate": 9.995938091310022e-06, "loss": 0.5796, "step": 3552 }, { "epoch": 0.042480182689893474, "grad_norm": 2.0722458362579346, "learning_rate": 9.995930284761947e-06, "loss": 0.588, "step": 3553 }, { "epoch": 0.04249213883475412, "grad_norm": 2.0474181175231934, "learning_rate": 9.995922470722457e-06, "loss": 0.4814, "step": 3554 }, { "epoch": 0.04250409497961477, "grad_norm": 2.69272780418396, "learning_rate": 9.995914649191562e-06, "loss": 0.6367, "step": 3555 }, { "epoch": 0.04251605112447542, "grad_norm": 5.56296443939209, "learning_rate": 9.995906820169277e-06, "loss": 0.6042, "step": 3556 }, { "epoch": 0.042528007269336074, "grad_norm": 3.1908175945281982, "learning_rate": 9.995898983655609e-06, "loss": 0.7137, "step": 3557 }, { "epoch": 0.042539963414196726, "grad_norm": 2.200023651123047, "learning_rate": 9.995891139650575e-06, "loss": 0.6754, "step": 3558 }, { "epoch": 0.04255191955905738, "grad_norm": 1.9878253936767578, "learning_rate": 9.995883288154185e-06, "loss": 0.6061, "step": 3559 }, { "epoch": 0.04256387570391803, "grad_norm": 3.4978508949279785, "learning_rate": 9.995875429166448e-06, "loss": 0.7415, "step": 3560 }, { "epoch": 0.04257583184877868, "grad_norm": 1.978635311126709, "learning_rate": 9.99586756268738e-06, "loss": 0.5191, "step": 3561 }, { "epoch": 0.04258778799363933, "grad_norm": 2.0805764198303223, "learning_rate": 9.99585968871699e-06, "loss": 0.6041, "step": 3562 }, { "epoch": 0.042599744138499984, "grad_norm": 2.0416746139526367, "learning_rate": 9.99585180725529e-06, "loss": 0.6846, "step": 3563 }, { "epoch": 0.042611700283360636, "grad_norm": 2.5832910537719727, "learning_rate": 9.995843918302295e-06, "loss": 0.6318, "step": 3564 }, { "epoch": 0.04262365642822129, "grad_norm": 1.734703779220581, "learning_rate": 9.995836021858012e-06, "loss": 0.6631, "step": 3565 }, { "epoch": 0.04263561257308193, "grad_norm": 2.516282558441162, "learning_rate": 9.995828117922455e-06, "loss": 0.6249, "step": 3566 }, { "epoch": 0.042647568717942584, "grad_norm": 1.6779346466064453, "learning_rate": 9.995820206495636e-06, "loss": 0.6843, "step": 3567 }, { "epoch": 0.042659524862803236, "grad_norm": 2.6678354740142822, "learning_rate": 9.995812287577568e-06, "loss": 0.6885, "step": 3568 }, { "epoch": 0.04267148100766389, "grad_norm": 2.568105459213257, "learning_rate": 9.995804361168262e-06, "loss": 0.5685, "step": 3569 }, { "epoch": 0.04268343715252454, "grad_norm": 1.8572001457214355, "learning_rate": 9.995796427267728e-06, "loss": 0.7103, "step": 3570 }, { "epoch": 0.04269539329738519, "grad_norm": 10.534331321716309, "learning_rate": 9.995788485875982e-06, "loss": 0.5533, "step": 3571 }, { "epoch": 0.04270734944224584, "grad_norm": 1.7638291120529175, "learning_rate": 9.99578053699303e-06, "loss": 0.5717, "step": 3572 }, { "epoch": 0.042719305587106494, "grad_norm": 8.037595748901367, "learning_rate": 9.99577258061889e-06, "loss": 0.7435, "step": 3573 }, { "epoch": 0.042731261731967146, "grad_norm": 2.770674705505371, "learning_rate": 9.99576461675357e-06, "loss": 0.7137, "step": 3574 }, { "epoch": 0.0427432178768278, "grad_norm": 1.8705754280090332, "learning_rate": 9.995756645397083e-06, "loss": 0.6189, "step": 3575 }, { "epoch": 0.04275517402168845, "grad_norm": 2.2870256900787354, "learning_rate": 9.995748666549441e-06, "loss": 0.625, "step": 3576 }, { "epoch": 0.0427671301665491, "grad_norm": 4.286918640136719, "learning_rate": 9.995740680210656e-06, "loss": 0.6601, "step": 3577 }, { "epoch": 0.042779086311409746, "grad_norm": 1.9188332557678223, "learning_rate": 9.995732686380742e-06, "loss": 0.6062, "step": 3578 }, { "epoch": 0.0427910424562704, "grad_norm": 5.821152210235596, "learning_rate": 9.99572468505971e-06, "loss": 0.6797, "step": 3579 }, { "epoch": 0.04280299860113105, "grad_norm": 1.5376465320587158, "learning_rate": 9.995716676247566e-06, "loss": 0.6206, "step": 3580 }, { "epoch": 0.0428149547459917, "grad_norm": 5.0086541175842285, "learning_rate": 9.995708659944332e-06, "loss": 0.6106, "step": 3581 }, { "epoch": 0.04282691089085235, "grad_norm": 1.9629392623901367, "learning_rate": 9.995700636150012e-06, "loss": 0.6566, "step": 3582 }, { "epoch": 0.042838867035713005, "grad_norm": 4.961541652679443, "learning_rate": 9.995692604864623e-06, "loss": 0.7002, "step": 3583 }, { "epoch": 0.042850823180573656, "grad_norm": 3.986117124557495, "learning_rate": 9.995684566088174e-06, "loss": 0.6249, "step": 3584 }, { "epoch": 0.04286277932543431, "grad_norm": 1.9208166599273682, "learning_rate": 9.995676519820678e-06, "loss": 0.6613, "step": 3585 }, { "epoch": 0.04287473547029496, "grad_norm": 2.817427635192871, "learning_rate": 9.99566846606215e-06, "loss": 0.6878, "step": 3586 }, { "epoch": 0.04288669161515561, "grad_norm": 2.7814629077911377, "learning_rate": 9.995660404812597e-06, "loss": 0.6576, "step": 3587 }, { "epoch": 0.04289864776001626, "grad_norm": 2.316150426864624, "learning_rate": 9.995652336072033e-06, "loss": 0.6526, "step": 3588 }, { "epoch": 0.04291060390487691, "grad_norm": 1.7741762399673462, "learning_rate": 9.995644259840472e-06, "loss": 0.646, "step": 3589 }, { "epoch": 0.04292256004973756, "grad_norm": 2.045794725418091, "learning_rate": 9.995636176117924e-06, "loss": 0.6532, "step": 3590 }, { "epoch": 0.04293451619459821, "grad_norm": 2.0693650245666504, "learning_rate": 9.995628084904404e-06, "loss": 0.6024, "step": 3591 }, { "epoch": 0.04294647233945886, "grad_norm": 3.256547451019287, "learning_rate": 9.995619986199918e-06, "loss": 0.6038, "step": 3592 }, { "epoch": 0.042958428484319515, "grad_norm": 1.835490345954895, "learning_rate": 9.995611880004484e-06, "loss": 0.5852, "step": 3593 }, { "epoch": 0.04297038462918017, "grad_norm": 2.6904447078704834, "learning_rate": 9.995603766318112e-06, "loss": 0.6108, "step": 3594 }, { "epoch": 0.04298234077404082, "grad_norm": 1.9943169355392456, "learning_rate": 9.995595645140814e-06, "loss": 0.6323, "step": 3595 }, { "epoch": 0.04299429691890147, "grad_norm": 6.353963851928711, "learning_rate": 9.995587516472604e-06, "loss": 0.7012, "step": 3596 }, { "epoch": 0.04300625306376212, "grad_norm": 2.548656463623047, "learning_rate": 9.995579380313491e-06, "loss": 0.6286, "step": 3597 }, { "epoch": 0.043018209208622774, "grad_norm": 8.442852973937988, "learning_rate": 9.995571236663491e-06, "loss": 0.6787, "step": 3598 }, { "epoch": 0.043030165353483425, "grad_norm": 1.9656739234924316, "learning_rate": 9.995563085522613e-06, "loss": 0.6064, "step": 3599 }, { "epoch": 0.04304212149834408, "grad_norm": 3.2461605072021484, "learning_rate": 9.99555492689087e-06, "loss": 0.6562, "step": 3600 }, { "epoch": 0.04305407764320472, "grad_norm": 6.602165222167969, "learning_rate": 9.995546760768275e-06, "loss": 0.738, "step": 3601 }, { "epoch": 0.04306603378806537, "grad_norm": 1.7987974882125854, "learning_rate": 9.99553858715484e-06, "loss": 0.5507, "step": 3602 }, { "epoch": 0.043077989932926025, "grad_norm": 2.032494068145752, "learning_rate": 9.995530406050578e-06, "loss": 0.6871, "step": 3603 }, { "epoch": 0.04308994607778668, "grad_norm": 1.9831379652023315, "learning_rate": 9.995522217455499e-06, "loss": 0.6861, "step": 3604 }, { "epoch": 0.04310190222264733, "grad_norm": 1.977353811264038, "learning_rate": 9.995514021369617e-06, "loss": 0.6253, "step": 3605 }, { "epoch": 0.04311385836750798, "grad_norm": 2.2753610610961914, "learning_rate": 9.995505817792943e-06, "loss": 0.6075, "step": 3606 }, { "epoch": 0.04312581451236863, "grad_norm": 3.6161916255950928, "learning_rate": 9.995497606725493e-06, "loss": 0.5787, "step": 3607 }, { "epoch": 0.043137770657229284, "grad_norm": 3.819291591644287, "learning_rate": 9.995489388167274e-06, "loss": 0.6895, "step": 3608 }, { "epoch": 0.043149726802089935, "grad_norm": 3.127654790878296, "learning_rate": 9.995481162118302e-06, "loss": 0.5321, "step": 3609 }, { "epoch": 0.04316168294695059, "grad_norm": 2.6179912090301514, "learning_rate": 9.995472928578589e-06, "loss": 0.6536, "step": 3610 }, { "epoch": 0.04317363909181124, "grad_norm": 2.526857614517212, "learning_rate": 9.995464687548144e-06, "loss": 0.6315, "step": 3611 }, { "epoch": 0.04318559523667189, "grad_norm": 2.022844076156616, "learning_rate": 9.995456439026984e-06, "loss": 0.681, "step": 3612 }, { "epoch": 0.043197551381532535, "grad_norm": 4.416643142700195, "learning_rate": 9.99544818301512e-06, "loss": 0.5542, "step": 3613 }, { "epoch": 0.04320950752639319, "grad_norm": 2.0261831283569336, "learning_rate": 9.995439919512561e-06, "loss": 0.5718, "step": 3614 }, { "epoch": 0.04322146367125384, "grad_norm": 1.7161133289337158, "learning_rate": 9.995431648519325e-06, "loss": 0.6268, "step": 3615 }, { "epoch": 0.04323341981611449, "grad_norm": 2.2099955081939697, "learning_rate": 9.99542337003542e-06, "loss": 0.6176, "step": 3616 }, { "epoch": 0.04324537596097514, "grad_norm": 2.176452159881592, "learning_rate": 9.99541508406086e-06, "loss": 0.6221, "step": 3617 }, { "epoch": 0.043257332105835794, "grad_norm": 2.358388662338257, "learning_rate": 9.995406790595658e-06, "loss": 0.5313, "step": 3618 }, { "epoch": 0.043269288250696446, "grad_norm": 2.0787603855133057, "learning_rate": 9.995398489639825e-06, "loss": 0.6071, "step": 3619 }, { "epoch": 0.0432812443955571, "grad_norm": 2.1786859035491943, "learning_rate": 9.995390181193373e-06, "loss": 0.6334, "step": 3620 }, { "epoch": 0.04329320054041775, "grad_norm": 2.448791265487671, "learning_rate": 9.995381865256318e-06, "loss": 0.5668, "step": 3621 }, { "epoch": 0.0433051566852784, "grad_norm": 2.4508635997772217, "learning_rate": 9.99537354182867e-06, "loss": 0.6523, "step": 3622 }, { "epoch": 0.04331711283013905, "grad_norm": 2.6078054904937744, "learning_rate": 9.99536521091044e-06, "loss": 0.6459, "step": 3623 }, { "epoch": 0.043329068974999704, "grad_norm": 3.8343875408172607, "learning_rate": 9.995356872501643e-06, "loss": 0.6016, "step": 3624 }, { "epoch": 0.04334102511986035, "grad_norm": 2.5383875370025635, "learning_rate": 9.995348526602291e-06, "loss": 0.577, "step": 3625 }, { "epoch": 0.043352981264721, "grad_norm": 2.4638752937316895, "learning_rate": 9.995340173212395e-06, "loss": 0.5789, "step": 3626 }, { "epoch": 0.04336493740958165, "grad_norm": 1.9109493494033813, "learning_rate": 9.99533181233197e-06, "loss": 0.6402, "step": 3627 }, { "epoch": 0.043376893554442304, "grad_norm": 2.253495216369629, "learning_rate": 9.995323443961027e-06, "loss": 0.7108, "step": 3628 }, { "epoch": 0.043388849699302956, "grad_norm": 3.6816518306732178, "learning_rate": 9.995315068099579e-06, "loss": 0.5949, "step": 3629 }, { "epoch": 0.04340080584416361, "grad_norm": 1.9308735132217407, "learning_rate": 9.995306684747637e-06, "loss": 0.655, "step": 3630 }, { "epoch": 0.04341276198902426, "grad_norm": 2.363546371459961, "learning_rate": 9.995298293905215e-06, "loss": 0.6558, "step": 3631 }, { "epoch": 0.04342471813388491, "grad_norm": 2.329704999923706, "learning_rate": 9.995289895572325e-06, "loss": 0.731, "step": 3632 }, { "epoch": 0.04343667427874556, "grad_norm": 70.99147033691406, "learning_rate": 9.995281489748982e-06, "loss": 0.5939, "step": 3633 }, { "epoch": 0.043448630423606215, "grad_norm": 2.8079583644866943, "learning_rate": 9.995273076435195e-06, "loss": 0.6612, "step": 3634 }, { "epoch": 0.043460586568466866, "grad_norm": 3.6552951335906982, "learning_rate": 9.995264655630978e-06, "loss": 0.6893, "step": 3635 }, { "epoch": 0.04347254271332752, "grad_norm": 7.753768444061279, "learning_rate": 9.995256227336345e-06, "loss": 0.709, "step": 3636 }, { "epoch": 0.04348449885818816, "grad_norm": 4.967518329620361, "learning_rate": 9.995247791551307e-06, "loss": 0.5903, "step": 3637 }, { "epoch": 0.043496455003048815, "grad_norm": 6.011589527130127, "learning_rate": 9.995239348275877e-06, "loss": 0.5839, "step": 3638 }, { "epoch": 0.043508411147909466, "grad_norm": 3.444666862487793, "learning_rate": 9.995230897510069e-06, "loss": 0.7391, "step": 3639 }, { "epoch": 0.04352036729277012, "grad_norm": 2.8829147815704346, "learning_rate": 9.995222439253896e-06, "loss": 0.6427, "step": 3640 }, { "epoch": 0.04353232343763077, "grad_norm": 2.909100294113159, "learning_rate": 9.995213973507364e-06, "loss": 0.6417, "step": 3641 }, { "epoch": 0.04354427958249142, "grad_norm": 3.621192216873169, "learning_rate": 9.995205500270495e-06, "loss": 0.5651, "step": 3642 }, { "epoch": 0.04355623572735207, "grad_norm": 3.152963161468506, "learning_rate": 9.995197019543297e-06, "loss": 0.6197, "step": 3643 }, { "epoch": 0.043568191872212725, "grad_norm": 3.1200733184814453, "learning_rate": 9.995188531325783e-06, "loss": 0.6741, "step": 3644 }, { "epoch": 0.04358014801707338, "grad_norm": 1.9052900075912476, "learning_rate": 9.995180035617967e-06, "loss": 0.6143, "step": 3645 }, { "epoch": 0.04359210416193403, "grad_norm": 2.631582260131836, "learning_rate": 9.995171532419858e-06, "loss": 0.5674, "step": 3646 }, { "epoch": 0.04360406030679468, "grad_norm": 7.958278656005859, "learning_rate": 9.995163021731473e-06, "loss": 0.6684, "step": 3647 }, { "epoch": 0.04361601645165533, "grad_norm": 11.7514009475708, "learning_rate": 9.995154503552825e-06, "loss": 0.5352, "step": 3648 }, { "epoch": 0.043627972596515976, "grad_norm": 1.9466354846954346, "learning_rate": 9.995145977883922e-06, "loss": 0.7159, "step": 3649 }, { "epoch": 0.04363992874137663, "grad_norm": 4.156196594238281, "learning_rate": 9.995137444724782e-06, "loss": 0.7113, "step": 3650 }, { "epoch": 0.04365188488623728, "grad_norm": 2.2689452171325684, "learning_rate": 9.995128904075416e-06, "loss": 0.589, "step": 3651 }, { "epoch": 0.04366384103109793, "grad_norm": 1.7307932376861572, "learning_rate": 9.995120355935835e-06, "loss": 0.6232, "step": 3652 }, { "epoch": 0.04367579717595858, "grad_norm": 3.727146625518799, "learning_rate": 9.995111800306053e-06, "loss": 0.6541, "step": 3653 }, { "epoch": 0.043687753320819235, "grad_norm": 1.803795576095581, "learning_rate": 9.995103237186083e-06, "loss": 0.6086, "step": 3654 }, { "epoch": 0.04369970946567989, "grad_norm": 1.5656466484069824, "learning_rate": 9.99509466657594e-06, "loss": 0.6967, "step": 3655 }, { "epoch": 0.04371166561054054, "grad_norm": 1.5460301637649536, "learning_rate": 9.995086088475634e-06, "loss": 0.5866, "step": 3656 }, { "epoch": 0.04372362175540119, "grad_norm": 2.3595211505889893, "learning_rate": 9.995077502885178e-06, "loss": 0.6998, "step": 3657 }, { "epoch": 0.04373557790026184, "grad_norm": 2.497842788696289, "learning_rate": 9.995068909804586e-06, "loss": 0.5498, "step": 3658 }, { "epoch": 0.043747534045122494, "grad_norm": 1.8206686973571777, "learning_rate": 9.99506030923387e-06, "loss": 0.6156, "step": 3659 }, { "epoch": 0.04375949018998314, "grad_norm": 5.586261749267578, "learning_rate": 9.995051701173043e-06, "loss": 0.5773, "step": 3660 }, { "epoch": 0.04377144633484379, "grad_norm": 1.7394459247589111, "learning_rate": 9.995043085622118e-06, "loss": 0.6914, "step": 3661 }, { "epoch": 0.04378340247970444, "grad_norm": 2.588442325592041, "learning_rate": 9.99503446258111e-06, "loss": 0.6625, "step": 3662 }, { "epoch": 0.043795358624565094, "grad_norm": 4.647327423095703, "learning_rate": 9.995025832050029e-06, "loss": 0.6317, "step": 3663 }, { "epoch": 0.043807314769425745, "grad_norm": 4.0684380531311035, "learning_rate": 9.995017194028888e-06, "loss": 0.7047, "step": 3664 }, { "epoch": 0.0438192709142864, "grad_norm": 2.412370204925537, "learning_rate": 9.995008548517702e-06, "loss": 0.6828, "step": 3665 }, { "epoch": 0.04383122705914705, "grad_norm": 2.4158201217651367, "learning_rate": 9.994999895516484e-06, "loss": 0.6621, "step": 3666 }, { "epoch": 0.0438431832040077, "grad_norm": 2.274902582168579, "learning_rate": 9.994991235025245e-06, "loss": 0.5693, "step": 3667 }, { "epoch": 0.04385513934886835, "grad_norm": 3.692723512649536, "learning_rate": 9.994982567043998e-06, "loss": 0.6424, "step": 3668 }, { "epoch": 0.043867095493729004, "grad_norm": 2.228174924850464, "learning_rate": 9.994973891572756e-06, "loss": 0.6518, "step": 3669 }, { "epoch": 0.043879051638589656, "grad_norm": 2.915571689605713, "learning_rate": 9.994965208611536e-06, "loss": 0.6547, "step": 3670 }, { "epoch": 0.04389100778345031, "grad_norm": 2.587721347808838, "learning_rate": 9.994956518160345e-06, "loss": 0.6823, "step": 3671 }, { "epoch": 0.04390296392831095, "grad_norm": 2.359659194946289, "learning_rate": 9.994947820219201e-06, "loss": 0.6234, "step": 3672 }, { "epoch": 0.043914920073171604, "grad_norm": 2.777923822402954, "learning_rate": 9.994939114788114e-06, "loss": 0.6486, "step": 3673 }, { "epoch": 0.043926876218032256, "grad_norm": 2.256174087524414, "learning_rate": 9.994930401867098e-06, "loss": 0.6123, "step": 3674 }, { "epoch": 0.04393883236289291, "grad_norm": 2.324954032897949, "learning_rate": 9.994921681456167e-06, "loss": 0.6394, "step": 3675 }, { "epoch": 0.04395078850775356, "grad_norm": 1.6785138845443726, "learning_rate": 9.994912953555332e-06, "loss": 0.5937, "step": 3676 }, { "epoch": 0.04396274465261421, "grad_norm": 1.9609752893447876, "learning_rate": 9.994904218164609e-06, "loss": 0.5988, "step": 3677 }, { "epoch": 0.04397470079747486, "grad_norm": 2.0450665950775146, "learning_rate": 9.994895475284008e-06, "loss": 0.6001, "step": 3678 }, { "epoch": 0.043986656942335514, "grad_norm": 1.5013939142227173, "learning_rate": 9.994886724913544e-06, "loss": 0.6123, "step": 3679 }, { "epoch": 0.043998613087196166, "grad_norm": 1.844622254371643, "learning_rate": 9.994877967053228e-06, "loss": 0.5985, "step": 3680 }, { "epoch": 0.04401056923205682, "grad_norm": 4.2838239669799805, "learning_rate": 9.994869201703077e-06, "loss": 0.6128, "step": 3681 }, { "epoch": 0.04402252537691747, "grad_norm": 6.846349716186523, "learning_rate": 9.9948604288631e-06, "loss": 0.6672, "step": 3682 }, { "epoch": 0.04403448152177812, "grad_norm": 2.560380697250366, "learning_rate": 9.994851648533313e-06, "loss": 0.6233, "step": 3683 }, { "epoch": 0.044046437666638766, "grad_norm": 2.9312539100646973, "learning_rate": 9.994842860713728e-06, "loss": 0.7711, "step": 3684 }, { "epoch": 0.04405839381149942, "grad_norm": 3.42417311668396, "learning_rate": 9.99483406540436e-06, "loss": 0.6745, "step": 3685 }, { "epoch": 0.04407034995636007, "grad_norm": 4.910491466522217, "learning_rate": 9.994825262605218e-06, "loss": 0.6296, "step": 3686 }, { "epoch": 0.04408230610122072, "grad_norm": 1.8385599851608276, "learning_rate": 9.994816452316319e-06, "loss": 0.6962, "step": 3687 }, { "epoch": 0.04409426224608137, "grad_norm": 4.12991189956665, "learning_rate": 9.994807634537675e-06, "loss": 0.6635, "step": 3688 }, { "epoch": 0.044106218390942024, "grad_norm": 2.296879291534424, "learning_rate": 9.994798809269299e-06, "loss": 0.5699, "step": 3689 }, { "epoch": 0.044118174535802676, "grad_norm": 2.4046263694763184, "learning_rate": 9.994789976511204e-06, "loss": 0.6355, "step": 3690 }, { "epoch": 0.04413013068066333, "grad_norm": 1.892086386680603, "learning_rate": 9.994781136263406e-06, "loss": 0.6802, "step": 3691 }, { "epoch": 0.04414208682552398, "grad_norm": 3.4636261463165283, "learning_rate": 9.994772288525912e-06, "loss": 0.7252, "step": 3692 }, { "epoch": 0.04415404297038463, "grad_norm": 2.1889188289642334, "learning_rate": 9.994763433298742e-06, "loss": 0.5793, "step": 3693 }, { "epoch": 0.04416599911524528, "grad_norm": 4.245351314544678, "learning_rate": 9.994754570581905e-06, "loss": 0.597, "step": 3694 }, { "epoch": 0.044177955260105935, "grad_norm": 3.3407764434814453, "learning_rate": 9.994745700375417e-06, "loss": 0.5806, "step": 3695 }, { "epoch": 0.04418991140496658, "grad_norm": 1.4527368545532227, "learning_rate": 9.99473682267929e-06, "loss": 0.6476, "step": 3696 }, { "epoch": 0.04420186754982723, "grad_norm": 3.907057046890259, "learning_rate": 9.994727937493535e-06, "loss": 0.6629, "step": 3697 }, { "epoch": 0.04421382369468788, "grad_norm": 2.4885149002075195, "learning_rate": 9.994719044818171e-06, "loss": 0.5789, "step": 3698 }, { "epoch": 0.044225779839548535, "grad_norm": 1.517757773399353, "learning_rate": 9.994710144653207e-06, "loss": 0.6631, "step": 3699 }, { "epoch": 0.044237735984409186, "grad_norm": 2.0116875171661377, "learning_rate": 9.994701236998657e-06, "loss": 0.73, "step": 3700 }, { "epoch": 0.04424969212926984, "grad_norm": 1.7248268127441406, "learning_rate": 9.994692321854535e-06, "loss": 0.6012, "step": 3701 }, { "epoch": 0.04426164827413049, "grad_norm": 2.832855224609375, "learning_rate": 9.994683399220854e-06, "loss": 0.7514, "step": 3702 }, { "epoch": 0.04427360441899114, "grad_norm": 2.8511545658111572, "learning_rate": 9.994674469097627e-06, "loss": 0.7208, "step": 3703 }, { "epoch": 0.04428556056385179, "grad_norm": 2.3442323207855225, "learning_rate": 9.994665531484869e-06, "loss": 0.5576, "step": 3704 }, { "epoch": 0.044297516708712445, "grad_norm": 2.3859291076660156, "learning_rate": 9.99465658638259e-06, "loss": 0.6239, "step": 3705 }, { "epoch": 0.0443094728535731, "grad_norm": 5.283908843994141, "learning_rate": 9.994647633790808e-06, "loss": 0.6012, "step": 3706 }, { "epoch": 0.04432142899843375, "grad_norm": 2.4575421810150146, "learning_rate": 9.994638673709533e-06, "loss": 0.6672, "step": 3707 }, { "epoch": 0.04433338514329439, "grad_norm": 1.7773293256759644, "learning_rate": 9.994629706138781e-06, "loss": 0.5619, "step": 3708 }, { "epoch": 0.044345341288155045, "grad_norm": 2.022212505340576, "learning_rate": 9.994620731078562e-06, "loss": 0.6496, "step": 3709 }, { "epoch": 0.0443572974330157, "grad_norm": 3.1545374393463135, "learning_rate": 9.994611748528893e-06, "loss": 0.598, "step": 3710 }, { "epoch": 0.04436925357787635, "grad_norm": 2.1966488361358643, "learning_rate": 9.994602758489786e-06, "loss": 0.5676, "step": 3711 }, { "epoch": 0.044381209722737, "grad_norm": 1.9253921508789062, "learning_rate": 9.994593760961254e-06, "loss": 0.6397, "step": 3712 }, { "epoch": 0.04439316586759765, "grad_norm": 1.718877911567688, "learning_rate": 9.994584755943309e-06, "loss": 0.5736, "step": 3713 }, { "epoch": 0.044405122012458303, "grad_norm": 1.7019366025924683, "learning_rate": 9.994575743435968e-06, "loss": 0.6539, "step": 3714 }, { "epoch": 0.044417078157318955, "grad_norm": 2.410724639892578, "learning_rate": 9.994566723439242e-06, "loss": 0.5914, "step": 3715 }, { "epoch": 0.04442903430217961, "grad_norm": 2.180173873901367, "learning_rate": 9.994557695953149e-06, "loss": 0.639, "step": 3716 }, { "epoch": 0.04444099044704026, "grad_norm": 1.7416571378707886, "learning_rate": 9.994548660977695e-06, "loss": 0.6146, "step": 3717 }, { "epoch": 0.04445294659190091, "grad_norm": 1.7657076120376587, "learning_rate": 9.9945396185129e-06, "loss": 0.5436, "step": 3718 }, { "epoch": 0.044464902736761555, "grad_norm": 2.24432635307312, "learning_rate": 9.994530568558773e-06, "loss": 0.6615, "step": 3719 }, { "epoch": 0.04447685888162221, "grad_norm": 1.9438642263412476, "learning_rate": 9.994521511115332e-06, "loss": 0.5278, "step": 3720 }, { "epoch": 0.04448881502648286, "grad_norm": 1.7249369621276855, "learning_rate": 9.994512446182586e-06, "loss": 0.6698, "step": 3721 }, { "epoch": 0.04450077117134351, "grad_norm": 2.345088243484497, "learning_rate": 9.994503373760551e-06, "loss": 0.6257, "step": 3722 }, { "epoch": 0.04451272731620416, "grad_norm": 2.3695590496063232, "learning_rate": 9.994494293849242e-06, "loss": 0.6015, "step": 3723 }, { "epoch": 0.044524683461064814, "grad_norm": 1.9154651165008545, "learning_rate": 9.99448520644867e-06, "loss": 0.6215, "step": 3724 }, { "epoch": 0.044536639605925465, "grad_norm": 2.633971691131592, "learning_rate": 9.99447611155885e-06, "loss": 0.5679, "step": 3725 }, { "epoch": 0.04454859575078612, "grad_norm": 2.8535995483398438, "learning_rate": 9.994467009179796e-06, "loss": 0.6839, "step": 3726 }, { "epoch": 0.04456055189564677, "grad_norm": 3.282543182373047, "learning_rate": 9.994457899311519e-06, "loss": 0.6707, "step": 3727 }, { "epoch": 0.04457250804050742, "grad_norm": 1.5307834148406982, "learning_rate": 9.994448781954035e-06, "loss": 0.6659, "step": 3728 }, { "epoch": 0.04458446418536807, "grad_norm": 1.4968843460083008, "learning_rate": 9.99443965710736e-06, "loss": 0.6461, "step": 3729 }, { "epoch": 0.044596420330228724, "grad_norm": 3.5693113803863525, "learning_rate": 9.994430524771503e-06, "loss": 0.6308, "step": 3730 }, { "epoch": 0.04460837647508937, "grad_norm": 1.7341409921646118, "learning_rate": 9.99442138494648e-06, "loss": 0.6883, "step": 3731 }, { "epoch": 0.04462033261995002, "grad_norm": 16.675621032714844, "learning_rate": 9.994412237632303e-06, "loss": 0.6021, "step": 3732 }, { "epoch": 0.04463228876481067, "grad_norm": 1.9101606607437134, "learning_rate": 9.994403082828989e-06, "loss": 0.6285, "step": 3733 }, { "epoch": 0.044644244909671324, "grad_norm": 2.089146614074707, "learning_rate": 9.99439392053655e-06, "loss": 0.5691, "step": 3734 }, { "epoch": 0.044656201054531976, "grad_norm": 2.20186185836792, "learning_rate": 9.994384750754998e-06, "loss": 0.6053, "step": 3735 }, { "epoch": 0.04466815719939263, "grad_norm": 2.069793224334717, "learning_rate": 9.994375573484348e-06, "loss": 0.6866, "step": 3736 }, { "epoch": 0.04468011334425328, "grad_norm": 1.5562822818756104, "learning_rate": 9.994366388724616e-06, "loss": 0.684, "step": 3737 }, { "epoch": 0.04469206948911393, "grad_norm": 1.902716040611267, "learning_rate": 9.994357196475812e-06, "loss": 0.6026, "step": 3738 }, { "epoch": 0.04470402563397458, "grad_norm": 2.5979764461517334, "learning_rate": 9.994347996737952e-06, "loss": 0.6243, "step": 3739 }, { "epoch": 0.044715981778835234, "grad_norm": 5.872543811798096, "learning_rate": 9.99433878951105e-06, "loss": 0.7052, "step": 3740 }, { "epoch": 0.044727937923695886, "grad_norm": 2.1748650074005127, "learning_rate": 9.994329574795121e-06, "loss": 0.7475, "step": 3741 }, { "epoch": 0.04473989406855654, "grad_norm": 2.0707623958587646, "learning_rate": 9.994320352590175e-06, "loss": 0.6748, "step": 3742 }, { "epoch": 0.04475185021341718, "grad_norm": 3.965376377105713, "learning_rate": 9.994311122896227e-06, "loss": 0.6402, "step": 3743 }, { "epoch": 0.044763806358277834, "grad_norm": 3.094024658203125, "learning_rate": 9.994301885713294e-06, "loss": 0.6518, "step": 3744 }, { "epoch": 0.044775762503138486, "grad_norm": 1.9036924839019775, "learning_rate": 9.994292641041385e-06, "loss": 0.6243, "step": 3745 }, { "epoch": 0.04478771864799914, "grad_norm": 1.4963105916976929, "learning_rate": 9.99428338888052e-06, "loss": 0.7066, "step": 3746 }, { "epoch": 0.04479967479285979, "grad_norm": 1.764169692993164, "learning_rate": 9.994274129230706e-06, "loss": 0.5783, "step": 3747 }, { "epoch": 0.04481163093772044, "grad_norm": 1.6624332666397095, "learning_rate": 9.99426486209196e-06, "loss": 0.653, "step": 3748 }, { "epoch": 0.04482358708258109, "grad_norm": 3.589024782180786, "learning_rate": 9.994255587464299e-06, "loss": 0.6569, "step": 3749 }, { "epoch": 0.044835543227441745, "grad_norm": 3.198943614959717, "learning_rate": 9.994246305347733e-06, "loss": 0.6635, "step": 3750 }, { "epoch": 0.044847499372302396, "grad_norm": 1.653827428817749, "learning_rate": 9.994237015742275e-06, "loss": 0.6469, "step": 3751 }, { "epoch": 0.04485945551716305, "grad_norm": 1.8048946857452393, "learning_rate": 9.994227718647942e-06, "loss": 0.6581, "step": 3752 }, { "epoch": 0.0448714116620237, "grad_norm": 1.8203139305114746, "learning_rate": 9.994218414064747e-06, "loss": 0.7164, "step": 3753 }, { "epoch": 0.04488336780688435, "grad_norm": 2.042024850845337, "learning_rate": 9.994209101992704e-06, "loss": 0.6021, "step": 3754 }, { "epoch": 0.044895323951744996, "grad_norm": 3.820385456085205, "learning_rate": 9.994199782431825e-06, "loss": 0.6248, "step": 3755 }, { "epoch": 0.04490728009660565, "grad_norm": 17.787670135498047, "learning_rate": 9.994190455382128e-06, "loss": 0.7052, "step": 3756 }, { "epoch": 0.0449192362414663, "grad_norm": 1.7057663202285767, "learning_rate": 9.994181120843625e-06, "loss": 0.5777, "step": 3757 }, { "epoch": 0.04493119238632695, "grad_norm": 1.814421534538269, "learning_rate": 9.994171778816328e-06, "loss": 0.6645, "step": 3758 }, { "epoch": 0.0449431485311876, "grad_norm": 1.99287748336792, "learning_rate": 9.994162429300253e-06, "loss": 0.6209, "step": 3759 }, { "epoch": 0.044955104676048255, "grad_norm": 2.486912250518799, "learning_rate": 9.994153072295414e-06, "loss": 0.6798, "step": 3760 }, { "epoch": 0.044967060820908906, "grad_norm": 5.505276203155518, "learning_rate": 9.994143707801823e-06, "loss": 0.5904, "step": 3761 }, { "epoch": 0.04497901696576956, "grad_norm": 1.7787379026412964, "learning_rate": 9.994134335819498e-06, "loss": 0.5872, "step": 3762 }, { "epoch": 0.04499097311063021, "grad_norm": 1.9153419733047485, "learning_rate": 9.99412495634845e-06, "loss": 0.5694, "step": 3763 }, { "epoch": 0.04500292925549086, "grad_norm": 6.7057881355285645, "learning_rate": 9.994115569388694e-06, "loss": 0.6608, "step": 3764 }, { "epoch": 0.04501488540035151, "grad_norm": 4.238050937652588, "learning_rate": 9.994106174940243e-06, "loss": 0.5839, "step": 3765 }, { "epoch": 0.045026841545212165, "grad_norm": 5.060512065887451, "learning_rate": 9.994096773003113e-06, "loss": 0.6563, "step": 3766 }, { "epoch": 0.04503879769007281, "grad_norm": 2.083056688308716, "learning_rate": 9.994087363577318e-06, "loss": 0.6365, "step": 3767 }, { "epoch": 0.04505075383493346, "grad_norm": 1.8944183588027954, "learning_rate": 9.99407794666287e-06, "loss": 0.6135, "step": 3768 }, { "epoch": 0.04506270997979411, "grad_norm": 2.3309853076934814, "learning_rate": 9.994068522259784e-06, "loss": 0.655, "step": 3769 }, { "epoch": 0.045074666124654765, "grad_norm": 1.7004883289337158, "learning_rate": 9.994059090368076e-06, "loss": 0.6634, "step": 3770 }, { "epoch": 0.04508662226951542, "grad_norm": 2.3437201976776123, "learning_rate": 9.994049650987756e-06, "loss": 0.5205, "step": 3771 }, { "epoch": 0.04509857841437607, "grad_norm": 1.8959510326385498, "learning_rate": 9.994040204118844e-06, "loss": 0.6188, "step": 3772 }, { "epoch": 0.04511053455923672, "grad_norm": 1.3233674764633179, "learning_rate": 9.99403074976135e-06, "loss": 0.6356, "step": 3773 }, { "epoch": 0.04512249070409737, "grad_norm": 2.6205272674560547, "learning_rate": 9.994021287915288e-06, "loss": 0.6079, "step": 3774 }, { "epoch": 0.045134446848958024, "grad_norm": 3.60349440574646, "learning_rate": 9.994011818580673e-06, "loss": 0.6632, "step": 3775 }, { "epoch": 0.045146402993818675, "grad_norm": 2.0366246700286865, "learning_rate": 9.994002341757521e-06, "loss": 0.6674, "step": 3776 }, { "epoch": 0.04515835913867933, "grad_norm": 2.915821075439453, "learning_rate": 9.993992857445844e-06, "loss": 0.6668, "step": 3777 }, { "epoch": 0.04517031528353998, "grad_norm": 4.271125316619873, "learning_rate": 9.99398336564566e-06, "loss": 0.5909, "step": 3778 }, { "epoch": 0.045182271428400624, "grad_norm": 1.6713894605636597, "learning_rate": 9.993973866356975e-06, "loss": 0.6448, "step": 3779 }, { "epoch": 0.045194227573261275, "grad_norm": 2.0300097465515137, "learning_rate": 9.99396435957981e-06, "loss": 0.6161, "step": 3780 }, { "epoch": 0.04520618371812193, "grad_norm": 1.7352542877197266, "learning_rate": 9.99395484531418e-06, "loss": 0.5997, "step": 3781 }, { "epoch": 0.04521813986298258, "grad_norm": 1.9552273750305176, "learning_rate": 9.993945323560095e-06, "loss": 0.6011, "step": 3782 }, { "epoch": 0.04523009600784323, "grad_norm": 1.772274374961853, "learning_rate": 9.993935794317572e-06, "loss": 0.6238, "step": 3783 }, { "epoch": 0.04524205215270388, "grad_norm": 1.6732449531555176, "learning_rate": 9.993926257586623e-06, "loss": 0.6886, "step": 3784 }, { "epoch": 0.045254008297564534, "grad_norm": 2.0135576725006104, "learning_rate": 9.993916713367267e-06, "loss": 0.7428, "step": 3785 }, { "epoch": 0.045265964442425186, "grad_norm": 2.9856598377227783, "learning_rate": 9.993907161659513e-06, "loss": 0.5397, "step": 3786 }, { "epoch": 0.04527792058728584, "grad_norm": 2.0692853927612305, "learning_rate": 9.993897602463378e-06, "loss": 0.6442, "step": 3787 }, { "epoch": 0.04528987673214649, "grad_norm": 1.7753099203109741, "learning_rate": 9.993888035778875e-06, "loss": 0.6499, "step": 3788 }, { "epoch": 0.04530183287700714, "grad_norm": 2.45055890083313, "learning_rate": 9.99387846160602e-06, "loss": 0.691, "step": 3789 }, { "epoch": 0.045313789021867786, "grad_norm": 3.8872501850128174, "learning_rate": 9.993868879944826e-06, "loss": 0.6965, "step": 3790 }, { "epoch": 0.04532574516672844, "grad_norm": 2.99759578704834, "learning_rate": 9.993859290795307e-06, "loss": 0.7431, "step": 3791 }, { "epoch": 0.04533770131158909, "grad_norm": 2.3633363246917725, "learning_rate": 9.99384969415748e-06, "loss": 0.6641, "step": 3792 }, { "epoch": 0.04534965745644974, "grad_norm": 1.661608338356018, "learning_rate": 9.993840090031357e-06, "loss": 0.5227, "step": 3793 }, { "epoch": 0.04536161360131039, "grad_norm": 2.5542118549346924, "learning_rate": 9.993830478416953e-06, "loss": 0.6197, "step": 3794 }, { "epoch": 0.045373569746171044, "grad_norm": 1.7044956684112549, "learning_rate": 9.993820859314282e-06, "loss": 0.7003, "step": 3795 }, { "epoch": 0.045385525891031696, "grad_norm": 1.757446050643921, "learning_rate": 9.993811232723359e-06, "loss": 0.5654, "step": 3796 }, { "epoch": 0.04539748203589235, "grad_norm": 2.0886993408203125, "learning_rate": 9.993801598644198e-06, "loss": 0.6419, "step": 3797 }, { "epoch": 0.045409438180753, "grad_norm": 1.8323495388031006, "learning_rate": 9.993791957076814e-06, "loss": 0.6797, "step": 3798 }, { "epoch": 0.04542139432561365, "grad_norm": 1.9049923419952393, "learning_rate": 9.993782308021222e-06, "loss": 0.5931, "step": 3799 }, { "epoch": 0.0454333504704743, "grad_norm": 1.63481867313385, "learning_rate": 9.993772651477435e-06, "loss": 0.6506, "step": 3800 }, { "epoch": 0.045445306615334954, "grad_norm": 1.9758437871932983, "learning_rate": 9.993762987445469e-06, "loss": 0.6242, "step": 3801 }, { "epoch": 0.0454572627601956, "grad_norm": 1.6422374248504639, "learning_rate": 9.993753315925336e-06, "loss": 0.6297, "step": 3802 }, { "epoch": 0.04546921890505625, "grad_norm": 2.8891899585723877, "learning_rate": 9.993743636917053e-06, "loss": 0.6369, "step": 3803 }, { "epoch": 0.0454811750499169, "grad_norm": 1.4598335027694702, "learning_rate": 9.993733950420634e-06, "loss": 0.7132, "step": 3804 }, { "epoch": 0.045493131194777554, "grad_norm": 2.012256145477295, "learning_rate": 9.993724256436092e-06, "loss": 0.6027, "step": 3805 }, { "epoch": 0.045505087339638206, "grad_norm": 2.3491907119750977, "learning_rate": 9.993714554963445e-06, "loss": 0.694, "step": 3806 }, { "epoch": 0.04551704348449886, "grad_norm": 2.213010311126709, "learning_rate": 9.993704846002704e-06, "loss": 0.6401, "step": 3807 }, { "epoch": 0.04552899962935951, "grad_norm": 1.3935884237289429, "learning_rate": 9.993695129553884e-06, "loss": 0.6679, "step": 3808 }, { "epoch": 0.04554095577422016, "grad_norm": 1.6020227670669556, "learning_rate": 9.993685405617002e-06, "loss": 0.6372, "step": 3809 }, { "epoch": 0.04555291191908081, "grad_norm": 3.0966503620147705, "learning_rate": 9.99367567419207e-06, "loss": 0.5997, "step": 3810 }, { "epoch": 0.045564868063941465, "grad_norm": 2.327995777130127, "learning_rate": 9.993665935279104e-06, "loss": 0.6638, "step": 3811 }, { "epoch": 0.045576824208802116, "grad_norm": 5.527585506439209, "learning_rate": 9.993656188878119e-06, "loss": 0.6099, "step": 3812 }, { "epoch": 0.04558878035366277, "grad_norm": 4.755916118621826, "learning_rate": 9.993646434989126e-06, "loss": 0.6804, "step": 3813 }, { "epoch": 0.04560073649852341, "grad_norm": 5.042958736419678, "learning_rate": 9.993636673612145e-06, "loss": 0.6138, "step": 3814 }, { "epoch": 0.045612692643384065, "grad_norm": 3.3305132389068604, "learning_rate": 9.993626904747187e-06, "loss": 0.6929, "step": 3815 }, { "epoch": 0.045624648788244716, "grad_norm": 4.090176105499268, "learning_rate": 9.993617128394269e-06, "loss": 0.6145, "step": 3816 }, { "epoch": 0.04563660493310537, "grad_norm": 1.8287672996520996, "learning_rate": 9.993607344553402e-06, "loss": 0.6499, "step": 3817 }, { "epoch": 0.04564856107796602, "grad_norm": 1.6539971828460693, "learning_rate": 9.993597553224605e-06, "loss": 0.7001, "step": 3818 }, { "epoch": 0.04566051722282667, "grad_norm": 2.506843328475952, "learning_rate": 9.99358775440789e-06, "loss": 0.6155, "step": 3819 }, { "epoch": 0.04567247336768732, "grad_norm": 1.6246944665908813, "learning_rate": 9.993577948103272e-06, "loss": 0.6139, "step": 3820 }, { "epoch": 0.045684429512547975, "grad_norm": 1.6816908121109009, "learning_rate": 9.993568134310766e-06, "loss": 0.7128, "step": 3821 }, { "epoch": 0.04569638565740863, "grad_norm": 2.1579935550689697, "learning_rate": 9.993558313030388e-06, "loss": 0.7342, "step": 3822 }, { "epoch": 0.04570834180226928, "grad_norm": 5.192943572998047, "learning_rate": 9.993548484262151e-06, "loss": 0.6483, "step": 3823 }, { "epoch": 0.04572029794712993, "grad_norm": 1.684693455696106, "learning_rate": 9.99353864800607e-06, "loss": 0.6943, "step": 3824 }, { "epoch": 0.04573225409199058, "grad_norm": 2.987999439239502, "learning_rate": 9.993528804262161e-06, "loss": 0.627, "step": 3825 }, { "epoch": 0.04574421023685123, "grad_norm": 2.696950674057007, "learning_rate": 9.993518953030436e-06, "loss": 0.691, "step": 3826 }, { "epoch": 0.04575616638171188, "grad_norm": 3.1195147037506104, "learning_rate": 9.993509094310913e-06, "loss": 0.6341, "step": 3827 }, { "epoch": 0.04576812252657253, "grad_norm": 1.9932520389556885, "learning_rate": 9.993499228103605e-06, "loss": 0.7238, "step": 3828 }, { "epoch": 0.04578007867143318, "grad_norm": 1.9849051237106323, "learning_rate": 9.993489354408528e-06, "loss": 0.5816, "step": 3829 }, { "epoch": 0.04579203481629383, "grad_norm": 2.204713821411133, "learning_rate": 9.993479473225696e-06, "loss": 0.7218, "step": 3830 }, { "epoch": 0.045803990961154485, "grad_norm": 1.8015128374099731, "learning_rate": 9.993469584555122e-06, "loss": 0.6015, "step": 3831 }, { "epoch": 0.04581594710601514, "grad_norm": 1.3151856660842896, "learning_rate": 9.993459688396824e-06, "loss": 0.5575, "step": 3832 }, { "epoch": 0.04582790325087579, "grad_norm": 1.5824772119522095, "learning_rate": 9.993449784750815e-06, "loss": 0.6874, "step": 3833 }, { "epoch": 0.04583985939573644, "grad_norm": 1.259934663772583, "learning_rate": 9.99343987361711e-06, "loss": 0.594, "step": 3834 }, { "epoch": 0.04585181554059709, "grad_norm": 1.6720242500305176, "learning_rate": 9.993429954995724e-06, "loss": 0.5935, "step": 3835 }, { "epoch": 0.045863771685457744, "grad_norm": 3.22834849357605, "learning_rate": 9.993420028886674e-06, "loss": 0.6848, "step": 3836 }, { "epoch": 0.045875727830318395, "grad_norm": 1.739479899406433, "learning_rate": 9.99341009528997e-06, "loss": 0.7405, "step": 3837 }, { "epoch": 0.04588768397517904, "grad_norm": 4.357768535614014, "learning_rate": 9.993400154205631e-06, "loss": 0.6697, "step": 3838 }, { "epoch": 0.04589964012003969, "grad_norm": 1.6372199058532715, "learning_rate": 9.993390205633671e-06, "loss": 0.7117, "step": 3839 }, { "epoch": 0.045911596264900344, "grad_norm": 3.4595625400543213, "learning_rate": 9.993380249574104e-06, "loss": 0.623, "step": 3840 }, { "epoch": 0.045923552409760995, "grad_norm": 6.878548622131348, "learning_rate": 9.993370286026947e-06, "loss": 0.6373, "step": 3841 }, { "epoch": 0.04593550855462165, "grad_norm": 2.7101807594299316, "learning_rate": 9.993360314992212e-06, "loss": 0.6587, "step": 3842 }, { "epoch": 0.0459474646994823, "grad_norm": 1.4371649026870728, "learning_rate": 9.993350336469916e-06, "loss": 0.6291, "step": 3843 }, { "epoch": 0.04595942084434295, "grad_norm": 3.0147764682769775, "learning_rate": 9.993340350460074e-06, "loss": 0.5896, "step": 3844 }, { "epoch": 0.0459713769892036, "grad_norm": 1.691982626914978, "learning_rate": 9.993330356962701e-06, "loss": 0.609, "step": 3845 }, { "epoch": 0.045983333134064254, "grad_norm": 2.0168440341949463, "learning_rate": 9.99332035597781e-06, "loss": 0.5994, "step": 3846 }, { "epoch": 0.045995289278924906, "grad_norm": 1.9427592754364014, "learning_rate": 9.993310347505418e-06, "loss": 0.7338, "step": 3847 }, { "epoch": 0.04600724542378556, "grad_norm": 2.9978439807891846, "learning_rate": 9.99330033154554e-06, "loss": 0.7468, "step": 3848 }, { "epoch": 0.04601920156864621, "grad_norm": 1.5933970212936401, "learning_rate": 9.993290308098188e-06, "loss": 0.6391, "step": 3849 }, { "epoch": 0.046031157713506854, "grad_norm": 2.066469669342041, "learning_rate": 9.993280277163382e-06, "loss": 0.6295, "step": 3850 }, { "epoch": 0.046043113858367506, "grad_norm": 2.5131516456604004, "learning_rate": 9.993270238741133e-06, "loss": 0.4947, "step": 3851 }, { "epoch": 0.04605507000322816, "grad_norm": 6.91212272644043, "learning_rate": 9.993260192831457e-06, "loss": 0.6271, "step": 3852 }, { "epoch": 0.04606702614808881, "grad_norm": 1.910334825515747, "learning_rate": 9.993250139434372e-06, "loss": 0.66, "step": 3853 }, { "epoch": 0.04607898229294946, "grad_norm": 2.4947102069854736, "learning_rate": 9.993240078549889e-06, "loss": 0.6371, "step": 3854 }, { "epoch": 0.04609093843781011, "grad_norm": 1.5790815353393555, "learning_rate": 9.993230010178024e-06, "loss": 0.6152, "step": 3855 }, { "epoch": 0.046102894582670764, "grad_norm": 1.6840566396713257, "learning_rate": 9.993219934318794e-06, "loss": 0.6021, "step": 3856 }, { "epoch": 0.046114850727531416, "grad_norm": 2.331005573272705, "learning_rate": 9.993209850972212e-06, "loss": 0.6116, "step": 3857 }, { "epoch": 0.04612680687239207, "grad_norm": 1.7717243432998657, "learning_rate": 9.993199760138295e-06, "loss": 0.6552, "step": 3858 }, { "epoch": 0.04613876301725272, "grad_norm": 1.9909292459487915, "learning_rate": 9.993189661817058e-06, "loss": 0.5081, "step": 3859 }, { "epoch": 0.04615071916211337, "grad_norm": 2.2274441719055176, "learning_rate": 9.993179556008514e-06, "loss": 0.7162, "step": 3860 }, { "epoch": 0.046162675306974016, "grad_norm": 21.229154586791992, "learning_rate": 9.993169442712681e-06, "loss": 0.6501, "step": 3861 }, { "epoch": 0.04617463145183467, "grad_norm": 2.114516258239746, "learning_rate": 9.993159321929572e-06, "loss": 0.6755, "step": 3862 }, { "epoch": 0.04618658759669532, "grad_norm": 1.7393662929534912, "learning_rate": 9.993149193659201e-06, "loss": 0.604, "step": 3863 }, { "epoch": 0.04619854374155597, "grad_norm": 3.1685400009155273, "learning_rate": 9.993139057901586e-06, "loss": 0.6784, "step": 3864 }, { "epoch": 0.04621049988641662, "grad_norm": 1.3724031448364258, "learning_rate": 9.993128914656742e-06, "loss": 0.615, "step": 3865 }, { "epoch": 0.046222456031277274, "grad_norm": 1.5165245532989502, "learning_rate": 9.993118763924683e-06, "loss": 0.614, "step": 3866 }, { "epoch": 0.046234412176137926, "grad_norm": 1.829667091369629, "learning_rate": 9.993108605705425e-06, "loss": 0.6135, "step": 3867 }, { "epoch": 0.04624636832099858, "grad_norm": 2.2555880546569824, "learning_rate": 9.993098439998983e-06, "loss": 0.6013, "step": 3868 }, { "epoch": 0.04625832446585923, "grad_norm": 1.655839204788208, "learning_rate": 9.993088266805371e-06, "loss": 0.6799, "step": 3869 }, { "epoch": 0.04627028061071988, "grad_norm": 2.7804641723632812, "learning_rate": 9.993078086124607e-06, "loss": 0.5664, "step": 3870 }, { "epoch": 0.04628223675558053, "grad_norm": 1.7624404430389404, "learning_rate": 9.993067897956703e-06, "loss": 0.6485, "step": 3871 }, { "epoch": 0.046294192900441185, "grad_norm": 1.7167731523513794, "learning_rate": 9.993057702301678e-06, "loss": 0.6439, "step": 3872 }, { "epoch": 0.04630614904530183, "grad_norm": 1.7166708707809448, "learning_rate": 9.993047499159544e-06, "loss": 0.5564, "step": 3873 }, { "epoch": 0.04631810519016248, "grad_norm": 1.7277536392211914, "learning_rate": 9.993037288530317e-06, "loss": 0.6985, "step": 3874 }, { "epoch": 0.04633006133502313, "grad_norm": 1.8597692251205444, "learning_rate": 9.993027070414014e-06, "loss": 0.6351, "step": 3875 }, { "epoch": 0.046342017479883785, "grad_norm": 2.4846596717834473, "learning_rate": 9.993016844810647e-06, "loss": 0.6182, "step": 3876 }, { "epoch": 0.046353973624744436, "grad_norm": 3.3460757732391357, "learning_rate": 9.993006611720236e-06, "loss": 0.7835, "step": 3877 }, { "epoch": 0.04636592976960509, "grad_norm": 3.2501654624938965, "learning_rate": 9.992996371142793e-06, "loss": 0.6546, "step": 3878 }, { "epoch": 0.04637788591446574, "grad_norm": 2.1534252166748047, "learning_rate": 9.992986123078335e-06, "loss": 0.5756, "step": 3879 }, { "epoch": 0.04638984205932639, "grad_norm": 3.8854141235351562, "learning_rate": 9.992975867526874e-06, "loss": 0.6987, "step": 3880 }, { "epoch": 0.04640179820418704, "grad_norm": 1.7745037078857422, "learning_rate": 9.99296560448843e-06, "loss": 0.55, "step": 3881 }, { "epoch": 0.046413754349047695, "grad_norm": 1.8737717866897583, "learning_rate": 9.992955333963017e-06, "loss": 0.526, "step": 3882 }, { "epoch": 0.04642571049390835, "grad_norm": 2.3442416191101074, "learning_rate": 9.99294505595065e-06, "loss": 0.6483, "step": 3883 }, { "epoch": 0.046437666638769, "grad_norm": 1.9054542779922485, "learning_rate": 9.992934770451342e-06, "loss": 0.7518, "step": 3884 }, { "epoch": 0.04644962278362964, "grad_norm": 2.0099103450775146, "learning_rate": 9.992924477465111e-06, "loss": 0.6949, "step": 3885 }, { "epoch": 0.046461578928490295, "grad_norm": 1.419437289237976, "learning_rate": 9.992914176991973e-06, "loss": 0.6668, "step": 3886 }, { "epoch": 0.04647353507335095, "grad_norm": 1.9994781017303467, "learning_rate": 9.99290386903194e-06, "loss": 0.7004, "step": 3887 }, { "epoch": 0.0464854912182116, "grad_norm": 1.7578462362289429, "learning_rate": 9.992893553585033e-06, "loss": 0.7587, "step": 3888 }, { "epoch": 0.04649744736307225, "grad_norm": 2.4676623344421387, "learning_rate": 9.992883230651262e-06, "loss": 0.697, "step": 3889 }, { "epoch": 0.0465094035079329, "grad_norm": 2.8596420288085938, "learning_rate": 9.992872900230647e-06, "loss": 0.5542, "step": 3890 }, { "epoch": 0.046521359652793554, "grad_norm": 2.4415576457977295, "learning_rate": 9.9928625623232e-06, "loss": 0.7101, "step": 3891 }, { "epoch": 0.046533315797654205, "grad_norm": 2.022611379623413, "learning_rate": 9.992852216928938e-06, "loss": 0.6413, "step": 3892 }, { "epoch": 0.04654527194251486, "grad_norm": 1.8974204063415527, "learning_rate": 9.992841864047877e-06, "loss": 0.6549, "step": 3893 }, { "epoch": 0.04655722808737551, "grad_norm": 1.7017062902450562, "learning_rate": 9.992831503680031e-06, "loss": 0.656, "step": 3894 }, { "epoch": 0.04656918423223616, "grad_norm": 2.136031150817871, "learning_rate": 9.992821135825417e-06, "loss": 0.7194, "step": 3895 }, { "epoch": 0.04658114037709681, "grad_norm": 2.0221729278564453, "learning_rate": 9.992810760484048e-06, "loss": 0.5694, "step": 3896 }, { "epoch": 0.04659309652195746, "grad_norm": 2.2627546787261963, "learning_rate": 9.992800377655945e-06, "loss": 0.5975, "step": 3897 }, { "epoch": 0.04660505266681811, "grad_norm": 2.6928045749664307, "learning_rate": 9.992789987341118e-06, "loss": 0.6976, "step": 3898 }, { "epoch": 0.04661700881167876, "grad_norm": 2.2342114448547363, "learning_rate": 9.992779589539584e-06, "loss": 0.6462, "step": 3899 }, { "epoch": 0.04662896495653941, "grad_norm": 16.035808563232422, "learning_rate": 9.99276918425136e-06, "loss": 0.6555, "step": 3900 }, { "epoch": 0.046640921101400064, "grad_norm": 1.5519530773162842, "learning_rate": 9.992758771476464e-06, "loss": 0.6594, "step": 3901 }, { "epoch": 0.046652877246260716, "grad_norm": 2.677327871322632, "learning_rate": 9.992748351214903e-06, "loss": 0.6255, "step": 3902 }, { "epoch": 0.04666483339112137, "grad_norm": 1.5221374034881592, "learning_rate": 9.992737923466703e-06, "loss": 0.6775, "step": 3903 }, { "epoch": 0.04667678953598202, "grad_norm": 1.8874497413635254, "learning_rate": 9.99272748823187e-06, "loss": 0.6827, "step": 3904 }, { "epoch": 0.04668874568084267, "grad_norm": 2.9348883628845215, "learning_rate": 9.992717045510428e-06, "loss": 0.5849, "step": 3905 }, { "epoch": 0.04670070182570332, "grad_norm": 4.2424235343933105, "learning_rate": 9.992706595302389e-06, "loss": 0.6735, "step": 3906 }, { "epoch": 0.046712657970563974, "grad_norm": 1.9950224161148071, "learning_rate": 9.992696137607767e-06, "loss": 0.6574, "step": 3907 }, { "epoch": 0.046724614115424626, "grad_norm": 5.656342506408691, "learning_rate": 9.992685672426579e-06, "loss": 0.6488, "step": 3908 }, { "epoch": 0.04673657026028527, "grad_norm": 1.6713027954101562, "learning_rate": 9.992675199758843e-06, "loss": 0.597, "step": 3909 }, { "epoch": 0.04674852640514592, "grad_norm": 1.540745735168457, "learning_rate": 9.992664719604572e-06, "loss": 0.5565, "step": 3910 }, { "epoch": 0.046760482550006574, "grad_norm": 1.4263052940368652, "learning_rate": 9.992654231963782e-06, "loss": 0.686, "step": 3911 }, { "epoch": 0.046772438694867226, "grad_norm": 2.9862911701202393, "learning_rate": 9.99264373683649e-06, "loss": 0.7147, "step": 3912 }, { "epoch": 0.04678439483972788, "grad_norm": 2.423187732696533, "learning_rate": 9.99263323422271e-06, "loss": 0.6471, "step": 3913 }, { "epoch": 0.04679635098458853, "grad_norm": 2.2815158367156982, "learning_rate": 9.992622724122458e-06, "loss": 0.6432, "step": 3914 }, { "epoch": 0.04680830712944918, "grad_norm": 2.0531506538391113, "learning_rate": 9.992612206535753e-06, "loss": 0.7231, "step": 3915 }, { "epoch": 0.04682026327430983, "grad_norm": 7.357848167419434, "learning_rate": 9.992601681462607e-06, "loss": 0.6911, "step": 3916 }, { "epoch": 0.046832219419170484, "grad_norm": 3.3852007389068604, "learning_rate": 9.992591148903037e-06, "loss": 0.5964, "step": 3917 }, { "epoch": 0.046844175564031136, "grad_norm": 3.3469388484954834, "learning_rate": 9.992580608857059e-06, "loss": 0.7074, "step": 3918 }, { "epoch": 0.04685613170889179, "grad_norm": 5.0789570808410645, "learning_rate": 9.992570061324688e-06, "loss": 0.642, "step": 3919 }, { "epoch": 0.04686808785375244, "grad_norm": 1.674613356590271, "learning_rate": 9.99255950630594e-06, "loss": 0.5866, "step": 3920 }, { "epoch": 0.046880043998613084, "grad_norm": 2.6176254749298096, "learning_rate": 9.992548943800833e-06, "loss": 0.6998, "step": 3921 }, { "epoch": 0.046892000143473736, "grad_norm": 2.6015992164611816, "learning_rate": 9.99253837380938e-06, "loss": 0.6122, "step": 3922 }, { "epoch": 0.04690395628833439, "grad_norm": 1.6901500225067139, "learning_rate": 9.9925277963316e-06, "loss": 0.6538, "step": 3923 }, { "epoch": 0.04691591243319504, "grad_norm": 1.9259785413742065, "learning_rate": 9.992517211367503e-06, "loss": 0.633, "step": 3924 }, { "epoch": 0.04692786857805569, "grad_norm": 2.0301175117492676, "learning_rate": 9.992506618917111e-06, "loss": 0.7116, "step": 3925 }, { "epoch": 0.04693982472291634, "grad_norm": 3.200169324874878, "learning_rate": 9.992496018980438e-06, "loss": 0.632, "step": 3926 }, { "epoch": 0.046951780867776995, "grad_norm": 4.035454750061035, "learning_rate": 9.992485411557499e-06, "loss": 0.5194, "step": 3927 }, { "epoch": 0.046963737012637646, "grad_norm": 1.9841853380203247, "learning_rate": 9.99247479664831e-06, "loss": 0.6219, "step": 3928 }, { "epoch": 0.0469756931574983, "grad_norm": 2.3361575603485107, "learning_rate": 9.992464174252888e-06, "loss": 0.6724, "step": 3929 }, { "epoch": 0.04698764930235895, "grad_norm": 2.262906312942505, "learning_rate": 9.992453544371247e-06, "loss": 0.5777, "step": 3930 }, { "epoch": 0.0469996054472196, "grad_norm": 1.6017884016036987, "learning_rate": 9.992442907003406e-06, "loss": 0.5989, "step": 3931 }, { "epoch": 0.047011561592080246, "grad_norm": 2.237645149230957, "learning_rate": 9.992432262149377e-06, "loss": 0.7055, "step": 3932 }, { "epoch": 0.0470235177369409, "grad_norm": 5.05493688583374, "learning_rate": 9.99242160980918e-06, "loss": 0.6492, "step": 3933 }, { "epoch": 0.04703547388180155, "grad_norm": 2.2323479652404785, "learning_rate": 9.992410949982828e-06, "loss": 0.6586, "step": 3934 }, { "epoch": 0.0470474300266622, "grad_norm": 2.251241683959961, "learning_rate": 9.992400282670339e-06, "loss": 0.566, "step": 3935 }, { "epoch": 0.04705938617152285, "grad_norm": 1.8064134120941162, "learning_rate": 9.992389607871727e-06, "loss": 0.6608, "step": 3936 }, { "epoch": 0.047071342316383505, "grad_norm": 3.0107197761535645, "learning_rate": 9.992378925587009e-06, "loss": 0.7197, "step": 3937 }, { "epoch": 0.04708329846124416, "grad_norm": 1.7044075727462769, "learning_rate": 9.9923682358162e-06, "loss": 0.706, "step": 3938 }, { "epoch": 0.04709525460610481, "grad_norm": 1.4633972644805908, "learning_rate": 9.992357538559318e-06, "loss": 0.6454, "step": 3939 }, { "epoch": 0.04710721075096546, "grad_norm": 1.5475085973739624, "learning_rate": 9.99234683381638e-06, "loss": 0.6572, "step": 3940 }, { "epoch": 0.04711916689582611, "grad_norm": 9.150354385375977, "learning_rate": 9.992336121587398e-06, "loss": 0.5601, "step": 3941 }, { "epoch": 0.04713112304068676, "grad_norm": 2.0468780994415283, "learning_rate": 9.99232540187239e-06, "loss": 0.6705, "step": 3942 }, { "epoch": 0.047143079185547415, "grad_norm": 2.8927688598632812, "learning_rate": 9.992314674671376e-06, "loss": 0.5958, "step": 3943 }, { "epoch": 0.04715503533040806, "grad_norm": 1.6416981220245361, "learning_rate": 9.992303939984365e-06, "loss": 0.6719, "step": 3944 }, { "epoch": 0.04716699147526871, "grad_norm": 89.73773193359375, "learning_rate": 9.992293197811375e-06, "loss": 0.6334, "step": 3945 }, { "epoch": 0.04717894762012936, "grad_norm": 2.1431589126586914, "learning_rate": 9.992282448152427e-06, "loss": 0.6865, "step": 3946 }, { "epoch": 0.047190903764990015, "grad_norm": 3.2867891788482666, "learning_rate": 9.992271691007533e-06, "loss": 0.5915, "step": 3947 }, { "epoch": 0.04720285990985067, "grad_norm": 2.73248553276062, "learning_rate": 9.992260926376709e-06, "loss": 0.5921, "step": 3948 }, { "epoch": 0.04721481605471132, "grad_norm": 2.2150933742523193, "learning_rate": 9.992250154259972e-06, "loss": 0.7276, "step": 3949 }, { "epoch": 0.04722677219957197, "grad_norm": 1.584510326385498, "learning_rate": 9.992239374657339e-06, "loss": 0.6272, "step": 3950 }, { "epoch": 0.04723872834443262, "grad_norm": 3.364670753479004, "learning_rate": 9.992228587568824e-06, "loss": 0.5891, "step": 3951 }, { "epoch": 0.047250684489293274, "grad_norm": 2.224553346633911, "learning_rate": 9.992217792994445e-06, "loss": 0.5721, "step": 3952 }, { "epoch": 0.047262640634153925, "grad_norm": 2.596911907196045, "learning_rate": 9.99220699093422e-06, "loss": 0.6986, "step": 3953 }, { "epoch": 0.04727459677901458, "grad_norm": 2.2718803882598877, "learning_rate": 9.99219618138816e-06, "loss": 0.6447, "step": 3954 }, { "epoch": 0.04728655292387523, "grad_norm": 2.4532248973846436, "learning_rate": 9.992185364356285e-06, "loss": 0.6354, "step": 3955 }, { "epoch": 0.047298509068735874, "grad_norm": 2.017366409301758, "learning_rate": 9.99217453983861e-06, "loss": 0.5293, "step": 3956 }, { "epoch": 0.047310465213596525, "grad_norm": 3.6403138637542725, "learning_rate": 9.992163707835152e-06, "loss": 0.6308, "step": 3957 }, { "epoch": 0.04732242135845718, "grad_norm": 2.2772042751312256, "learning_rate": 9.992152868345925e-06, "loss": 0.6321, "step": 3958 }, { "epoch": 0.04733437750331783, "grad_norm": 3.084902048110962, "learning_rate": 9.99214202137095e-06, "loss": 0.5974, "step": 3959 }, { "epoch": 0.04734633364817848, "grad_norm": 3.4805397987365723, "learning_rate": 9.99213116691024e-06, "loss": 0.6183, "step": 3960 }, { "epoch": 0.04735828979303913, "grad_norm": 1.5780079364776611, "learning_rate": 9.992120304963808e-06, "loss": 0.7294, "step": 3961 }, { "epoch": 0.047370245937899784, "grad_norm": 2.6635429859161377, "learning_rate": 9.992109435531677e-06, "loss": 0.593, "step": 3962 }, { "epoch": 0.047382202082760436, "grad_norm": 2.545809268951416, "learning_rate": 9.99209855861386e-06, "loss": 0.635, "step": 3963 }, { "epoch": 0.04739415822762109, "grad_norm": 1.8895965814590454, "learning_rate": 9.992087674210374e-06, "loss": 0.6214, "step": 3964 }, { "epoch": 0.04740611437248174, "grad_norm": 1.5818103551864624, "learning_rate": 9.992076782321232e-06, "loss": 0.7591, "step": 3965 }, { "epoch": 0.04741807051734239, "grad_norm": 2.787693500518799, "learning_rate": 9.992065882946456e-06, "loss": 0.6932, "step": 3966 }, { "epoch": 0.04743002666220304, "grad_norm": 8.854829788208008, "learning_rate": 9.992054976086058e-06, "loss": 0.643, "step": 3967 }, { "epoch": 0.04744198280706369, "grad_norm": 2.097079277038574, "learning_rate": 9.992044061740056e-06, "loss": 0.6256, "step": 3968 }, { "epoch": 0.04745393895192434, "grad_norm": 3.375302314758301, "learning_rate": 9.992033139908466e-06, "loss": 0.629, "step": 3969 }, { "epoch": 0.04746589509678499, "grad_norm": 3.372457504272461, "learning_rate": 9.992022210591305e-06, "loss": 0.6396, "step": 3970 }, { "epoch": 0.04747785124164564, "grad_norm": 2.7001285552978516, "learning_rate": 9.99201127378859e-06, "loss": 0.624, "step": 3971 }, { "epoch": 0.047489807386506294, "grad_norm": 3.1481635570526123, "learning_rate": 9.992000329500333e-06, "loss": 0.7564, "step": 3972 }, { "epoch": 0.047501763531366946, "grad_norm": 1.9204211235046387, "learning_rate": 9.991989377726557e-06, "loss": 0.6091, "step": 3973 }, { "epoch": 0.0475137196762276, "grad_norm": 2.529589891433716, "learning_rate": 9.991978418467275e-06, "loss": 0.5605, "step": 3974 }, { "epoch": 0.04752567582108825, "grad_norm": 8.346297264099121, "learning_rate": 9.991967451722501e-06, "loss": 0.673, "step": 3975 }, { "epoch": 0.0475376319659489, "grad_norm": 3.0377907752990723, "learning_rate": 9.991956477492256e-06, "loss": 0.6143, "step": 3976 }, { "epoch": 0.04754958811080955, "grad_norm": 2.8715760707855225, "learning_rate": 9.991945495776554e-06, "loss": 0.7155, "step": 3977 }, { "epoch": 0.047561544255670204, "grad_norm": 1.9776893854141235, "learning_rate": 9.991934506575413e-06, "loss": 0.6477, "step": 3978 }, { "epoch": 0.047573500400530856, "grad_norm": 2.2732226848602295, "learning_rate": 9.991923509888848e-06, "loss": 0.6375, "step": 3979 }, { "epoch": 0.0475854565453915, "grad_norm": 2.636084794998169, "learning_rate": 9.991912505716875e-06, "loss": 0.6247, "step": 3980 }, { "epoch": 0.04759741269025215, "grad_norm": 1.8271678686141968, "learning_rate": 9.991901494059513e-06, "loss": 0.638, "step": 3981 }, { "epoch": 0.047609368835112804, "grad_norm": 1.7751843929290771, "learning_rate": 9.991890474916775e-06, "loss": 0.7145, "step": 3982 }, { "epoch": 0.047621324979973456, "grad_norm": 1.5823441743850708, "learning_rate": 9.991879448288682e-06, "loss": 0.691, "step": 3983 }, { "epoch": 0.04763328112483411, "grad_norm": 2.004845380783081, "learning_rate": 9.991868414175246e-06, "loss": 0.6409, "step": 3984 }, { "epoch": 0.04764523726969476, "grad_norm": 1.8928433656692505, "learning_rate": 9.991857372576485e-06, "loss": 0.5967, "step": 3985 }, { "epoch": 0.04765719341455541, "grad_norm": 1.4582867622375488, "learning_rate": 9.991846323492419e-06, "loss": 0.5859, "step": 3986 }, { "epoch": 0.04766914955941606, "grad_norm": 1.9117133617401123, "learning_rate": 9.991835266923059e-06, "loss": 0.5893, "step": 3987 }, { "epoch": 0.047681105704276715, "grad_norm": 2.2440199851989746, "learning_rate": 9.991824202868425e-06, "loss": 0.7131, "step": 3988 }, { "epoch": 0.047693061849137366, "grad_norm": 1.9877296686172485, "learning_rate": 9.991813131328534e-06, "loss": 0.6786, "step": 3989 }, { "epoch": 0.04770501799399802, "grad_norm": 1.9922125339508057, "learning_rate": 9.9918020523034e-06, "loss": 0.6984, "step": 3990 }, { "epoch": 0.04771697413885866, "grad_norm": 3.4252800941467285, "learning_rate": 9.991790965793042e-06, "loss": 0.6178, "step": 3991 }, { "epoch": 0.047728930283719315, "grad_norm": 2.819753885269165, "learning_rate": 9.991779871797476e-06, "loss": 0.6676, "step": 3992 }, { "epoch": 0.047740886428579966, "grad_norm": 2.920839786529541, "learning_rate": 9.991768770316716e-06, "loss": 0.6685, "step": 3993 }, { "epoch": 0.04775284257344062, "grad_norm": 1.6761151552200317, "learning_rate": 9.991757661350785e-06, "loss": 0.6158, "step": 3994 }, { "epoch": 0.04776479871830127, "grad_norm": 3.6528573036193848, "learning_rate": 9.991746544899692e-06, "loss": 0.6931, "step": 3995 }, { "epoch": 0.04777675486316192, "grad_norm": 1.6802773475646973, "learning_rate": 9.991735420963459e-06, "loss": 0.608, "step": 3996 }, { "epoch": 0.04778871100802257, "grad_norm": 4.154604911804199, "learning_rate": 9.9917242895421e-06, "loss": 0.7149, "step": 3997 }, { "epoch": 0.047800667152883225, "grad_norm": 7.636675834655762, "learning_rate": 9.991713150635633e-06, "loss": 0.6811, "step": 3998 }, { "epoch": 0.04781262329774388, "grad_norm": 1.5718193054199219, "learning_rate": 9.991702004244076e-06, "loss": 0.5629, "step": 3999 }, { "epoch": 0.04782457944260453, "grad_norm": 1.9799062013626099, "learning_rate": 9.991690850367442e-06, "loss": 0.5683, "step": 4000 }, { "epoch": 0.04783653558746518, "grad_norm": 1.6273033618927002, "learning_rate": 9.991679689005751e-06, "loss": 0.59, "step": 4001 }, { "epoch": 0.04784849173232583, "grad_norm": 1.9516247510910034, "learning_rate": 9.991668520159018e-06, "loss": 0.6348, "step": 4002 }, { "epoch": 0.04786044787718648, "grad_norm": 3.3975868225097656, "learning_rate": 9.991657343827262e-06, "loss": 0.7221, "step": 4003 }, { "epoch": 0.04787240402204713, "grad_norm": 1.8347113132476807, "learning_rate": 9.991646160010496e-06, "loss": 0.5906, "step": 4004 }, { "epoch": 0.04788436016690778, "grad_norm": 4.62253999710083, "learning_rate": 9.99163496870874e-06, "loss": 0.6922, "step": 4005 }, { "epoch": 0.04789631631176843, "grad_norm": 2.086484432220459, "learning_rate": 9.991623769922008e-06, "loss": 0.7307, "step": 4006 }, { "epoch": 0.047908272456629084, "grad_norm": 3.494523048400879, "learning_rate": 9.99161256365032e-06, "loss": 0.7256, "step": 4007 }, { "epoch": 0.047920228601489735, "grad_norm": 2.9217641353607178, "learning_rate": 9.99160134989369e-06, "loss": 0.5861, "step": 4008 }, { "epoch": 0.04793218474635039, "grad_norm": 2.5078330039978027, "learning_rate": 9.991590128652137e-06, "loss": 0.6614, "step": 4009 }, { "epoch": 0.04794414089121104, "grad_norm": 1.6083630323410034, "learning_rate": 9.991578899925677e-06, "loss": 0.6246, "step": 4010 }, { "epoch": 0.04795609703607169, "grad_norm": 2.139240264892578, "learning_rate": 9.991567663714325e-06, "loss": 0.5865, "step": 4011 }, { "epoch": 0.04796805318093234, "grad_norm": 2.1991398334503174, "learning_rate": 9.991556420018101e-06, "loss": 0.7162, "step": 4012 }, { "epoch": 0.047980009325792994, "grad_norm": 1.6074769496917725, "learning_rate": 9.99154516883702e-06, "loss": 0.6506, "step": 4013 }, { "epoch": 0.047991965470653646, "grad_norm": 1.286074161529541, "learning_rate": 9.991533910171101e-06, "loss": 0.5989, "step": 4014 }, { "epoch": 0.04800392161551429, "grad_norm": 1.6158512830734253, "learning_rate": 9.991522644020357e-06, "loss": 0.6138, "step": 4015 }, { "epoch": 0.04801587776037494, "grad_norm": 1.583996057510376, "learning_rate": 9.991511370384808e-06, "loss": 0.6401, "step": 4016 }, { "epoch": 0.048027833905235594, "grad_norm": 1.911704182624817, "learning_rate": 9.99150008926447e-06, "loss": 0.6313, "step": 4017 }, { "epoch": 0.048039790050096245, "grad_norm": 3.2865679264068604, "learning_rate": 9.991488800659358e-06, "loss": 0.738, "step": 4018 }, { "epoch": 0.0480517461949569, "grad_norm": 2.0270261764526367, "learning_rate": 9.991477504569493e-06, "loss": 0.676, "step": 4019 }, { "epoch": 0.04806370233981755, "grad_norm": 1.774669885635376, "learning_rate": 9.991466200994889e-06, "loss": 0.6614, "step": 4020 }, { "epoch": 0.0480756584846782, "grad_norm": 2.0908823013305664, "learning_rate": 9.991454889935564e-06, "loss": 0.6609, "step": 4021 }, { "epoch": 0.04808761462953885, "grad_norm": 2.69893217086792, "learning_rate": 9.991443571391535e-06, "loss": 0.7079, "step": 4022 }, { "epoch": 0.048099570774399504, "grad_norm": 3.257875442504883, "learning_rate": 9.991432245362817e-06, "loss": 0.5643, "step": 4023 }, { "epoch": 0.048111526919260156, "grad_norm": 1.7042596340179443, "learning_rate": 9.99142091184943e-06, "loss": 0.5799, "step": 4024 }, { "epoch": 0.04812348306412081, "grad_norm": 2.1211743354797363, "learning_rate": 9.991409570851389e-06, "loss": 0.669, "step": 4025 }, { "epoch": 0.04813543920898146, "grad_norm": 4.308719158172607, "learning_rate": 9.991398222368712e-06, "loss": 0.7066, "step": 4026 }, { "epoch": 0.048147395353842104, "grad_norm": 1.660207986831665, "learning_rate": 9.991386866401415e-06, "loss": 0.5994, "step": 4027 }, { "epoch": 0.048159351498702756, "grad_norm": 2.651977300643921, "learning_rate": 9.991375502949518e-06, "loss": 0.5941, "step": 4028 }, { "epoch": 0.04817130764356341, "grad_norm": 12.130253791809082, "learning_rate": 9.991364132013033e-06, "loss": 0.6332, "step": 4029 }, { "epoch": 0.04818326378842406, "grad_norm": 2.6096763610839844, "learning_rate": 9.99135275359198e-06, "loss": 0.7154, "step": 4030 }, { "epoch": 0.04819521993328471, "grad_norm": 2.5155577659606934, "learning_rate": 9.991341367686376e-06, "loss": 0.646, "step": 4031 }, { "epoch": 0.04820717607814536, "grad_norm": 2.958888292312622, "learning_rate": 9.99132997429624e-06, "loss": 0.717, "step": 4032 }, { "epoch": 0.048219132223006014, "grad_norm": 4.198020935058594, "learning_rate": 9.991318573421584e-06, "loss": 0.6172, "step": 4033 }, { "epoch": 0.048231088367866666, "grad_norm": 2.6212832927703857, "learning_rate": 9.99130716506243e-06, "loss": 0.6477, "step": 4034 }, { "epoch": 0.04824304451272732, "grad_norm": 2.423884868621826, "learning_rate": 9.991295749218792e-06, "loss": 0.564, "step": 4035 }, { "epoch": 0.04825500065758797, "grad_norm": 1.698779821395874, "learning_rate": 9.991284325890688e-06, "loss": 0.6161, "step": 4036 }, { "epoch": 0.04826695680244862, "grad_norm": 1.6516499519348145, "learning_rate": 9.991272895078137e-06, "loss": 0.5695, "step": 4037 }, { "epoch": 0.04827891294730927, "grad_norm": 1.3477059602737427, "learning_rate": 9.991261456781154e-06, "loss": 0.6135, "step": 4038 }, { "epoch": 0.04829086909216992, "grad_norm": 1.7493013143539429, "learning_rate": 9.991250010999756e-06, "loss": 0.6497, "step": 4039 }, { "epoch": 0.04830282523703057, "grad_norm": 1.571705937385559, "learning_rate": 9.991238557733963e-06, "loss": 0.5834, "step": 4040 }, { "epoch": 0.04831478138189122, "grad_norm": 4.183403491973877, "learning_rate": 9.991227096983786e-06, "loss": 0.5598, "step": 4041 }, { "epoch": 0.04832673752675187, "grad_norm": 1.792851209640503, "learning_rate": 9.99121562874925e-06, "loss": 0.6137, "step": 4042 }, { "epoch": 0.048338693671612525, "grad_norm": 2.056753158569336, "learning_rate": 9.991204153030366e-06, "loss": 0.6445, "step": 4043 }, { "epoch": 0.048350649816473176, "grad_norm": 1.588340163230896, "learning_rate": 9.991192669827154e-06, "loss": 0.6846, "step": 4044 }, { "epoch": 0.04836260596133383, "grad_norm": 2.2255399227142334, "learning_rate": 9.991181179139633e-06, "loss": 0.6544, "step": 4045 }, { "epoch": 0.04837456210619448, "grad_norm": 1.838137149810791, "learning_rate": 9.991169680967817e-06, "loss": 0.6798, "step": 4046 }, { "epoch": 0.04838651825105513, "grad_norm": 3.6820571422576904, "learning_rate": 9.991158175311722e-06, "loss": 0.6112, "step": 4047 }, { "epoch": 0.04839847439591578, "grad_norm": 2.709584951400757, "learning_rate": 9.99114666217137e-06, "loss": 0.7213, "step": 4048 }, { "epoch": 0.048410430540776435, "grad_norm": 1.7724754810333252, "learning_rate": 9.991135141546775e-06, "loss": 0.5599, "step": 4049 }, { "epoch": 0.04842238668563709, "grad_norm": 1.5929689407348633, "learning_rate": 9.991123613437957e-06, "loss": 0.5314, "step": 4050 }, { "epoch": 0.04843434283049773, "grad_norm": 1.6482902765274048, "learning_rate": 9.99111207784493e-06, "loss": 0.6048, "step": 4051 }, { "epoch": 0.04844629897535838, "grad_norm": 2.3563430309295654, "learning_rate": 9.99110053476771e-06, "loss": 0.6688, "step": 4052 }, { "epoch": 0.048458255120219035, "grad_norm": 7.14281702041626, "learning_rate": 9.99108898420632e-06, "loss": 0.7033, "step": 4053 }, { "epoch": 0.048470211265079687, "grad_norm": 2.213866949081421, "learning_rate": 9.991077426160774e-06, "loss": 0.7254, "step": 4054 }, { "epoch": 0.04848216740994034, "grad_norm": 1.5216792821884155, "learning_rate": 9.99106586063109e-06, "loss": 0.6093, "step": 4055 }, { "epoch": 0.04849412355480099, "grad_norm": 1.4262776374816895, "learning_rate": 9.991054287617285e-06, "loss": 0.6178, "step": 4056 }, { "epoch": 0.04850607969966164, "grad_norm": 6.909708499908447, "learning_rate": 9.991042707119375e-06, "loss": 0.6931, "step": 4057 }, { "epoch": 0.04851803584452229, "grad_norm": 20.5408935546875, "learning_rate": 9.991031119137379e-06, "loss": 0.5595, "step": 4058 }, { "epoch": 0.048529991989382945, "grad_norm": 2.205620288848877, "learning_rate": 9.991019523671315e-06, "loss": 0.6277, "step": 4059 }, { "epoch": 0.0485419481342436, "grad_norm": 1.6963826417922974, "learning_rate": 9.991007920721199e-06, "loss": 0.6011, "step": 4060 }, { "epoch": 0.04855390427910425, "grad_norm": 2.8851680755615234, "learning_rate": 9.99099631028705e-06, "loss": 0.6963, "step": 4061 }, { "epoch": 0.04856586042396489, "grad_norm": 1.5179699659347534, "learning_rate": 9.990984692368883e-06, "loss": 0.5781, "step": 4062 }, { "epoch": 0.048577816568825545, "grad_norm": 2.1349778175354004, "learning_rate": 9.990973066966715e-06, "loss": 0.7441, "step": 4063 }, { "epoch": 0.0485897727136862, "grad_norm": 2.033345937728882, "learning_rate": 9.990961434080568e-06, "loss": 0.7547, "step": 4064 }, { "epoch": 0.04860172885854685, "grad_norm": 5.20054817199707, "learning_rate": 9.990949793710457e-06, "loss": 0.708, "step": 4065 }, { "epoch": 0.0486136850034075, "grad_norm": 3.0178940296173096, "learning_rate": 9.990938145856397e-06, "loss": 0.6321, "step": 4066 }, { "epoch": 0.04862564114826815, "grad_norm": 2.184189558029175, "learning_rate": 9.990926490518408e-06, "loss": 0.6258, "step": 4067 }, { "epoch": 0.048637597293128804, "grad_norm": 3.2529382705688477, "learning_rate": 9.990914827696508e-06, "loss": 0.6983, "step": 4068 }, { "epoch": 0.048649553437989455, "grad_norm": 1.9475963115692139, "learning_rate": 9.990903157390712e-06, "loss": 0.674, "step": 4069 }, { "epoch": 0.04866150958285011, "grad_norm": 2.032719373703003, "learning_rate": 9.99089147960104e-06, "loss": 0.64, "step": 4070 }, { "epoch": 0.04867346572771076, "grad_norm": 1.9594327211380005, "learning_rate": 9.990879794327508e-06, "loss": 0.5442, "step": 4071 }, { "epoch": 0.04868542187257141, "grad_norm": 2.4082343578338623, "learning_rate": 9.990868101570135e-06, "loss": 0.7596, "step": 4072 }, { "epoch": 0.04869737801743206, "grad_norm": 1.5257865190505981, "learning_rate": 9.990856401328936e-06, "loss": 0.6104, "step": 4073 }, { "epoch": 0.04870933416229271, "grad_norm": 2.8176774978637695, "learning_rate": 9.990844693603931e-06, "loss": 0.6138, "step": 4074 }, { "epoch": 0.04872129030715336, "grad_norm": 1.90521240234375, "learning_rate": 9.990832978395137e-06, "loss": 0.6483, "step": 4075 }, { "epoch": 0.04873324645201401, "grad_norm": 1.7514725923538208, "learning_rate": 9.99082125570257e-06, "loss": 0.5881, "step": 4076 }, { "epoch": 0.04874520259687466, "grad_norm": 3.303527593612671, "learning_rate": 9.990809525526251e-06, "loss": 0.6186, "step": 4077 }, { "epoch": 0.048757158741735314, "grad_norm": 2.3244521617889404, "learning_rate": 9.990797787866193e-06, "loss": 0.6443, "step": 4078 }, { "epoch": 0.048769114886595966, "grad_norm": 2.0362548828125, "learning_rate": 9.990786042722417e-06, "loss": 0.6105, "step": 4079 }, { "epoch": 0.04878107103145662, "grad_norm": 1.7584729194641113, "learning_rate": 9.99077429009494e-06, "loss": 0.6966, "step": 4080 }, { "epoch": 0.04879302717631727, "grad_norm": 3.0033786296844482, "learning_rate": 9.990762529983777e-06, "loss": 0.833, "step": 4081 }, { "epoch": 0.04880498332117792, "grad_norm": 1.7311627864837646, "learning_rate": 9.990750762388949e-06, "loss": 0.6625, "step": 4082 }, { "epoch": 0.04881693946603857, "grad_norm": 2.309131145477295, "learning_rate": 9.990738987310475e-06, "loss": 0.593, "step": 4083 }, { "epoch": 0.048828895610899224, "grad_norm": 2.426211357116699, "learning_rate": 9.990727204748368e-06, "loss": 0.6524, "step": 4084 }, { "epoch": 0.048840851755759876, "grad_norm": 1.8967972993850708, "learning_rate": 9.990715414702647e-06, "loss": 0.6492, "step": 4085 }, { "epoch": 0.04885280790062052, "grad_norm": 1.7551013231277466, "learning_rate": 9.990703617173332e-06, "loss": 0.6664, "step": 4086 }, { "epoch": 0.04886476404548117, "grad_norm": 1.798642873764038, "learning_rate": 9.990691812160438e-06, "loss": 0.5855, "step": 4087 }, { "epoch": 0.048876720190341824, "grad_norm": 5.979931831359863, "learning_rate": 9.990679999663986e-06, "loss": 0.6399, "step": 4088 }, { "epoch": 0.048888676335202476, "grad_norm": 1.8210376501083374, "learning_rate": 9.990668179683989e-06, "loss": 0.6402, "step": 4089 }, { "epoch": 0.04890063248006313, "grad_norm": 2.471921443939209, "learning_rate": 9.990656352220469e-06, "loss": 0.6719, "step": 4090 }, { "epoch": 0.04891258862492378, "grad_norm": 1.3996776342391968, "learning_rate": 9.990644517273442e-06, "loss": 0.5613, "step": 4091 }, { "epoch": 0.04892454476978443, "grad_norm": 4.358223915100098, "learning_rate": 9.990632674842924e-06, "loss": 0.7044, "step": 4092 }, { "epoch": 0.04893650091464508, "grad_norm": 1.658920407295227, "learning_rate": 9.990620824928937e-06, "loss": 0.675, "step": 4093 }, { "epoch": 0.048948457059505734, "grad_norm": 2.3158533573150635, "learning_rate": 9.990608967531494e-06, "loss": 0.5926, "step": 4094 }, { "epoch": 0.048960413204366386, "grad_norm": 2.5981993675231934, "learning_rate": 9.990597102650616e-06, "loss": 0.6206, "step": 4095 }, { "epoch": 0.04897236934922704, "grad_norm": 1.6941471099853516, "learning_rate": 9.99058523028632e-06, "loss": 0.6984, "step": 4096 }, { "epoch": 0.04898432549408769, "grad_norm": 17.807117462158203, "learning_rate": 9.990573350438625e-06, "loss": 0.7196, "step": 4097 }, { "epoch": 0.048996281638948334, "grad_norm": 7.135753154754639, "learning_rate": 9.990561463107546e-06, "loss": 0.5817, "step": 4098 }, { "epoch": 0.049008237783808986, "grad_norm": 1.9802786111831665, "learning_rate": 9.990549568293103e-06, "loss": 0.6984, "step": 4099 }, { "epoch": 0.04902019392866964, "grad_norm": 2.25692081451416, "learning_rate": 9.990537665995313e-06, "loss": 0.5994, "step": 4100 }, { "epoch": 0.04903215007353029, "grad_norm": 1.6132206916809082, "learning_rate": 9.990525756214194e-06, "loss": 0.5868, "step": 4101 }, { "epoch": 0.04904410621839094, "grad_norm": 1.6660926342010498, "learning_rate": 9.990513838949765e-06, "loss": 0.6127, "step": 4102 }, { "epoch": 0.04905606236325159, "grad_norm": 1.8782215118408203, "learning_rate": 9.990501914202041e-06, "loss": 0.6793, "step": 4103 }, { "epoch": 0.049068018508112245, "grad_norm": 2.460381269454956, "learning_rate": 9.990489981971042e-06, "loss": 0.721, "step": 4104 }, { "epoch": 0.049079974652972896, "grad_norm": 3.958552122116089, "learning_rate": 9.990478042256787e-06, "loss": 0.7387, "step": 4105 }, { "epoch": 0.04909193079783355, "grad_norm": 2.998687982559204, "learning_rate": 9.99046609505929e-06, "loss": 0.6953, "step": 4106 }, { "epoch": 0.0491038869426942, "grad_norm": 1.6778329610824585, "learning_rate": 9.990454140378573e-06, "loss": 0.6291, "step": 4107 }, { "epoch": 0.04911584308755485, "grad_norm": 2.556810140609741, "learning_rate": 9.990442178214651e-06, "loss": 0.6119, "step": 4108 }, { "epoch": 0.0491277992324155, "grad_norm": 2.8406012058258057, "learning_rate": 9.990430208567546e-06, "loss": 0.6137, "step": 4109 }, { "epoch": 0.04913975537727615, "grad_norm": 3.1339428424835205, "learning_rate": 9.990418231437271e-06, "loss": 0.6922, "step": 4110 }, { "epoch": 0.0491517115221368, "grad_norm": 9.607048034667969, "learning_rate": 9.990406246823846e-06, "loss": 0.5205, "step": 4111 }, { "epoch": 0.04916366766699745, "grad_norm": 1.7220550775527954, "learning_rate": 9.990394254727289e-06, "loss": 0.7083, "step": 4112 }, { "epoch": 0.0491756238118581, "grad_norm": 1.6447668075561523, "learning_rate": 9.990382255147619e-06, "loss": 0.6272, "step": 4113 }, { "epoch": 0.049187579956718755, "grad_norm": 2.8270277976989746, "learning_rate": 9.990370248084854e-06, "loss": 0.5008, "step": 4114 }, { "epoch": 0.04919953610157941, "grad_norm": 4.437140464782715, "learning_rate": 9.990358233539008e-06, "loss": 0.6924, "step": 4115 }, { "epoch": 0.04921149224644006, "grad_norm": 1.832180142402649, "learning_rate": 9.990346211510104e-06, "loss": 0.6099, "step": 4116 }, { "epoch": 0.04922344839130071, "grad_norm": 28.143911361694336, "learning_rate": 9.990334181998157e-06, "loss": 0.6332, "step": 4117 }, { "epoch": 0.04923540453616136, "grad_norm": 2.052863121032715, "learning_rate": 9.990322145003189e-06, "loss": 0.7426, "step": 4118 }, { "epoch": 0.049247360681022014, "grad_norm": 2.0438027381896973, "learning_rate": 9.990310100525212e-06, "loss": 0.5521, "step": 4119 }, { "epoch": 0.049259316825882665, "grad_norm": 2.0510387420654297, "learning_rate": 9.990298048564249e-06, "loss": 0.588, "step": 4120 }, { "epoch": 0.04927127297074332, "grad_norm": 3.7120819091796875, "learning_rate": 9.990285989120316e-06, "loss": 0.6075, "step": 4121 }, { "epoch": 0.04928322911560396, "grad_norm": 1.4272297620773315, "learning_rate": 9.990273922193433e-06, "loss": 0.6301, "step": 4122 }, { "epoch": 0.04929518526046461, "grad_norm": 2.2864503860473633, "learning_rate": 9.990261847783613e-06, "loss": 0.6119, "step": 4123 }, { "epoch": 0.049307141405325265, "grad_norm": 1.7650314569473267, "learning_rate": 9.99024976589088e-06, "loss": 0.5802, "step": 4124 }, { "epoch": 0.04931909755018592, "grad_norm": 2.0006985664367676, "learning_rate": 9.990237676515248e-06, "loss": 0.6204, "step": 4125 }, { "epoch": 0.04933105369504657, "grad_norm": 2.8140861988067627, "learning_rate": 9.990225579656738e-06, "loss": 0.5384, "step": 4126 }, { "epoch": 0.04934300983990722, "grad_norm": 1.6605215072631836, "learning_rate": 9.990213475315368e-06, "loss": 0.6793, "step": 4127 }, { "epoch": 0.04935496598476787, "grad_norm": 3.628575563430786, "learning_rate": 9.990201363491154e-06, "loss": 0.5853, "step": 4128 }, { "epoch": 0.049366922129628524, "grad_norm": 2.683485507965088, "learning_rate": 9.990189244184114e-06, "loss": 0.7075, "step": 4129 }, { "epoch": 0.049378878274489175, "grad_norm": 2.203747272491455, "learning_rate": 9.99017711739427e-06, "loss": 0.6928, "step": 4130 }, { "epoch": 0.04939083441934983, "grad_norm": 3.723292589187622, "learning_rate": 9.990164983121638e-06, "loss": 0.6543, "step": 4131 }, { "epoch": 0.04940279056421048, "grad_norm": 1.7043719291687012, "learning_rate": 9.990152841366234e-06, "loss": 0.6036, "step": 4132 }, { "epoch": 0.049414746709071124, "grad_norm": 1.7509973049163818, "learning_rate": 9.990140692128079e-06, "loss": 0.6645, "step": 4133 }, { "epoch": 0.049426702853931775, "grad_norm": 3.1536917686462402, "learning_rate": 9.99012853540719e-06, "loss": 0.608, "step": 4134 }, { "epoch": 0.04943865899879243, "grad_norm": 7.387462139129639, "learning_rate": 9.990116371203585e-06, "loss": 0.6907, "step": 4135 }, { "epoch": 0.04945061514365308, "grad_norm": 2.4544475078582764, "learning_rate": 9.990104199517283e-06, "loss": 0.6255, "step": 4136 }, { "epoch": 0.04946257128851373, "grad_norm": 1.8157892227172852, "learning_rate": 9.990092020348304e-06, "loss": 0.6323, "step": 4137 }, { "epoch": 0.04947452743337438, "grad_norm": 21.94256591796875, "learning_rate": 9.990079833696661e-06, "loss": 0.6047, "step": 4138 }, { "epoch": 0.049486483578235034, "grad_norm": 4.695467948913574, "learning_rate": 9.990067639562377e-06, "loss": 0.6778, "step": 4139 }, { "epoch": 0.049498439723095686, "grad_norm": 2.95280122756958, "learning_rate": 9.99005543794547e-06, "loss": 0.6788, "step": 4140 }, { "epoch": 0.04951039586795634, "grad_norm": 3.4357681274414062, "learning_rate": 9.990043228845954e-06, "loss": 0.5304, "step": 4141 }, { "epoch": 0.04952235201281699, "grad_norm": 1.6189872026443481, "learning_rate": 9.990031012263854e-06, "loss": 0.6192, "step": 4142 }, { "epoch": 0.04953430815767764, "grad_norm": 1.970755934715271, "learning_rate": 9.990018788199182e-06, "loss": 0.5998, "step": 4143 }, { "epoch": 0.04954626430253829, "grad_norm": 1.442761778831482, "learning_rate": 9.990006556651961e-06, "loss": 0.6277, "step": 4144 }, { "epoch": 0.04955822044739894, "grad_norm": 1.7070528268814087, "learning_rate": 9.989994317622206e-06, "loss": 0.6163, "step": 4145 }, { "epoch": 0.04957017659225959, "grad_norm": 1.8530436754226685, "learning_rate": 9.989982071109939e-06, "loss": 0.6101, "step": 4146 }, { "epoch": 0.04958213273712024, "grad_norm": 5.612704753875732, "learning_rate": 9.989969817115175e-06, "loss": 0.5594, "step": 4147 }, { "epoch": 0.04959408888198089, "grad_norm": 3.1651060581207275, "learning_rate": 9.989957555637933e-06, "loss": 0.6738, "step": 4148 }, { "epoch": 0.049606045026841544, "grad_norm": 3.253305196762085, "learning_rate": 9.98994528667823e-06, "loss": 0.6294, "step": 4149 }, { "epoch": 0.049618001171702196, "grad_norm": 2.2588701248168945, "learning_rate": 9.989933010236091e-06, "loss": 0.6702, "step": 4150 }, { "epoch": 0.04962995731656285, "grad_norm": 2.071676254272461, "learning_rate": 9.989920726311527e-06, "loss": 0.6224, "step": 4151 }, { "epoch": 0.0496419134614235, "grad_norm": 2.1625030040740967, "learning_rate": 9.989908434904558e-06, "loss": 0.6995, "step": 4152 }, { "epoch": 0.04965386960628415, "grad_norm": 2.7066802978515625, "learning_rate": 9.989896136015205e-06, "loss": 0.6617, "step": 4153 }, { "epoch": 0.0496658257511448, "grad_norm": 2.6401071548461914, "learning_rate": 9.989883829643485e-06, "loss": 0.7056, "step": 4154 }, { "epoch": 0.049677781896005455, "grad_norm": 4.5683488845825195, "learning_rate": 9.989871515789417e-06, "loss": 0.6439, "step": 4155 }, { "epoch": 0.049689738040866106, "grad_norm": 2.8914945125579834, "learning_rate": 9.989859194453018e-06, "loss": 0.6947, "step": 4156 }, { "epoch": 0.04970169418572675, "grad_norm": 2.9773573875427246, "learning_rate": 9.989846865634307e-06, "loss": 0.6565, "step": 4157 }, { "epoch": 0.0497136503305874, "grad_norm": 2.986801862716675, "learning_rate": 9.989834529333305e-06, "loss": 0.8054, "step": 4158 }, { "epoch": 0.049725606475448055, "grad_norm": 4.0778679847717285, "learning_rate": 9.989822185550025e-06, "loss": 0.6453, "step": 4159 }, { "epoch": 0.049737562620308706, "grad_norm": 5.084489822387695, "learning_rate": 9.989809834284492e-06, "loss": 0.5697, "step": 4160 }, { "epoch": 0.04974951876516936, "grad_norm": 2.051588773727417, "learning_rate": 9.98979747553672e-06, "loss": 0.6221, "step": 4161 }, { "epoch": 0.04976147491003001, "grad_norm": 6.4723944664001465, "learning_rate": 9.989785109306728e-06, "loss": 0.6556, "step": 4162 }, { "epoch": 0.04977343105489066, "grad_norm": 2.4319515228271484, "learning_rate": 9.989772735594539e-06, "loss": 0.6146, "step": 4163 }, { "epoch": 0.04978538719975131, "grad_norm": 1.9558149576187134, "learning_rate": 9.989760354400165e-06, "loss": 0.5931, "step": 4164 }, { "epoch": 0.049797343344611965, "grad_norm": 3.737590789794922, "learning_rate": 9.98974796572363e-06, "loss": 0.7172, "step": 4165 }, { "epoch": 0.04980929948947262, "grad_norm": 6.903892993927002, "learning_rate": 9.989735569564946e-06, "loss": 0.7116, "step": 4166 }, { "epoch": 0.04982125563433327, "grad_norm": 1.872367262840271, "learning_rate": 9.989723165924139e-06, "loss": 0.6271, "step": 4167 }, { "epoch": 0.04983321177919392, "grad_norm": 3.6308517456054688, "learning_rate": 9.989710754801223e-06, "loss": 0.7092, "step": 4168 }, { "epoch": 0.049845167924054565, "grad_norm": 1.97615647315979, "learning_rate": 9.989698336196218e-06, "loss": 0.7408, "step": 4169 }, { "epoch": 0.049857124068915216, "grad_norm": 1.9972221851348877, "learning_rate": 9.989685910109144e-06, "loss": 0.6272, "step": 4170 }, { "epoch": 0.04986908021377587, "grad_norm": 9.360368728637695, "learning_rate": 9.989673476540018e-06, "loss": 0.5565, "step": 4171 }, { "epoch": 0.04988103635863652, "grad_norm": 2.5517327785491943, "learning_rate": 9.989661035488858e-06, "loss": 0.6847, "step": 4172 }, { "epoch": 0.04989299250349717, "grad_norm": 2.1339240074157715, "learning_rate": 9.989648586955685e-06, "loss": 0.743, "step": 4173 }, { "epoch": 0.04990494864835782, "grad_norm": 1.9875495433807373, "learning_rate": 9.989636130940514e-06, "loss": 0.5983, "step": 4174 }, { "epoch": 0.049916904793218475, "grad_norm": 1.776881456375122, "learning_rate": 9.989623667443366e-06, "loss": 0.7752, "step": 4175 }, { "epoch": 0.04992886093807913, "grad_norm": 3.6939239501953125, "learning_rate": 9.989611196464261e-06, "loss": 0.6515, "step": 4176 }, { "epoch": 0.04994081708293978, "grad_norm": 1.7714451551437378, "learning_rate": 9.989598718003217e-06, "loss": 0.6524, "step": 4177 }, { "epoch": 0.04995277322780043, "grad_norm": 5.569236755371094, "learning_rate": 9.989586232060251e-06, "loss": 0.5496, "step": 4178 }, { "epoch": 0.04996472937266108, "grad_norm": 2.134251594543457, "learning_rate": 9.989573738635383e-06, "loss": 0.7661, "step": 4179 }, { "epoch": 0.049976685517521734, "grad_norm": 5.4550957679748535, "learning_rate": 9.98956123772863e-06, "loss": 0.6609, "step": 4180 }, { "epoch": 0.04998864166238238, "grad_norm": 2.082932949066162, "learning_rate": 9.989548729340012e-06, "loss": 0.5821, "step": 4181 }, { "epoch": 0.05000059780724303, "grad_norm": 1.6310052871704102, "learning_rate": 9.98953621346955e-06, "loss": 0.6277, "step": 4182 }, { "epoch": 0.05001255395210368, "grad_norm": 3.410567283630371, "learning_rate": 9.98952369011726e-06, "loss": 0.5554, "step": 4183 }, { "epoch": 0.050024510096964334, "grad_norm": 2.2320234775543213, "learning_rate": 9.989511159283162e-06, "loss": 0.6525, "step": 4184 }, { "epoch": 0.050036466241824985, "grad_norm": 3.1861326694488525, "learning_rate": 9.989498620967273e-06, "loss": 0.6025, "step": 4185 }, { "epoch": 0.05004842238668564, "grad_norm": 2.3092730045318604, "learning_rate": 9.989486075169613e-06, "loss": 0.6163, "step": 4186 }, { "epoch": 0.05006037853154629, "grad_norm": 2.5367438793182373, "learning_rate": 9.989473521890203e-06, "loss": 0.5732, "step": 4187 }, { "epoch": 0.05007233467640694, "grad_norm": 2.7819647789001465, "learning_rate": 9.989460961129058e-06, "loss": 0.5981, "step": 4188 }, { "epoch": 0.05008429082126759, "grad_norm": 2.5257372856140137, "learning_rate": 9.989448392886198e-06, "loss": 0.5504, "step": 4189 }, { "epoch": 0.050096246966128244, "grad_norm": 5.366401195526123, "learning_rate": 9.989435817161645e-06, "loss": 0.656, "step": 4190 }, { "epoch": 0.050108203110988896, "grad_norm": 2.173752784729004, "learning_rate": 9.989423233955414e-06, "loss": 0.6054, "step": 4191 }, { "epoch": 0.05012015925584955, "grad_norm": 2.811383008956909, "learning_rate": 9.989410643267524e-06, "loss": 0.642, "step": 4192 }, { "epoch": 0.05013211540071019, "grad_norm": 3.3314192295074463, "learning_rate": 9.989398045097995e-06, "loss": 0.6051, "step": 4193 }, { "epoch": 0.050144071545570844, "grad_norm": 2.209486722946167, "learning_rate": 9.989385439446848e-06, "loss": 0.578, "step": 4194 }, { "epoch": 0.050156027690431496, "grad_norm": 5.681656837463379, "learning_rate": 9.989372826314096e-06, "loss": 0.6053, "step": 4195 }, { "epoch": 0.05016798383529215, "grad_norm": 3.598808526992798, "learning_rate": 9.989360205699764e-06, "loss": 0.6068, "step": 4196 }, { "epoch": 0.0501799399801528, "grad_norm": 2.8712732791900635, "learning_rate": 9.989347577603869e-06, "loss": 0.679, "step": 4197 }, { "epoch": 0.05019189612501345, "grad_norm": 2.2553622722625732, "learning_rate": 9.98933494202643e-06, "loss": 0.5604, "step": 4198 }, { "epoch": 0.0502038522698741, "grad_norm": 4.94667911529541, "learning_rate": 9.989322298967466e-06, "loss": 0.6358, "step": 4199 }, { "epoch": 0.050215808414734754, "grad_norm": 1.6727471351623535, "learning_rate": 9.989309648426993e-06, "loss": 0.7028, "step": 4200 }, { "epoch": 0.050227764559595406, "grad_norm": 4.483667373657227, "learning_rate": 9.989296990405034e-06, "loss": 0.6825, "step": 4201 }, { "epoch": 0.05023972070445606, "grad_norm": 3.5550761222839355, "learning_rate": 9.989284324901607e-06, "loss": 0.6634, "step": 4202 }, { "epoch": 0.05025167684931671, "grad_norm": 1.6096324920654297, "learning_rate": 9.98927165191673e-06, "loss": 0.5767, "step": 4203 }, { "epoch": 0.050263632994177354, "grad_norm": 6.934179306030273, "learning_rate": 9.989258971450421e-06, "loss": 0.6556, "step": 4204 }, { "epoch": 0.050275589139038006, "grad_norm": 2.2618863582611084, "learning_rate": 9.989246283502702e-06, "loss": 0.6069, "step": 4205 }, { "epoch": 0.05028754528389866, "grad_norm": 2.057666063308716, "learning_rate": 9.989233588073589e-06, "loss": 0.7051, "step": 4206 }, { "epoch": 0.05029950142875931, "grad_norm": 1.5533215999603271, "learning_rate": 9.989220885163105e-06, "loss": 0.6386, "step": 4207 }, { "epoch": 0.05031145757361996, "grad_norm": 8.613237380981445, "learning_rate": 9.989208174771264e-06, "loss": 0.6384, "step": 4208 }, { "epoch": 0.05032341371848061, "grad_norm": 4.743224620819092, "learning_rate": 9.989195456898088e-06, "loss": 0.6126, "step": 4209 }, { "epoch": 0.050335369863341264, "grad_norm": 4.101738452911377, "learning_rate": 9.989182731543596e-06, "loss": 0.6136, "step": 4210 }, { "epoch": 0.050347326008201916, "grad_norm": 2.926628828048706, "learning_rate": 9.989169998707808e-06, "loss": 0.6199, "step": 4211 }, { "epoch": 0.05035928215306257, "grad_norm": 4.446949481964111, "learning_rate": 9.989157258390741e-06, "loss": 0.6349, "step": 4212 }, { "epoch": 0.05037123829792322, "grad_norm": 1.6214519739151, "learning_rate": 9.989144510592415e-06, "loss": 0.6654, "step": 4213 }, { "epoch": 0.05038319444278387, "grad_norm": 3.9429872035980225, "learning_rate": 9.989131755312849e-06, "loss": 0.657, "step": 4214 }, { "epoch": 0.05039515058764452, "grad_norm": 2.0571579933166504, "learning_rate": 9.98911899255206e-06, "loss": 0.5616, "step": 4215 }, { "epoch": 0.05040710673250517, "grad_norm": 2.3848557472229004, "learning_rate": 9.989106222310072e-06, "loss": 0.6813, "step": 4216 }, { "epoch": 0.05041906287736582, "grad_norm": 7.139404296875, "learning_rate": 9.9890934445869e-06, "loss": 0.6446, "step": 4217 }, { "epoch": 0.05043101902222647, "grad_norm": 2.8066132068634033, "learning_rate": 9.989080659382567e-06, "loss": 0.5953, "step": 4218 }, { "epoch": 0.05044297516708712, "grad_norm": 2.559018611907959, "learning_rate": 9.989067866697089e-06, "loss": 0.6413, "step": 4219 }, { "epoch": 0.050454931311947775, "grad_norm": 2.953406572341919, "learning_rate": 9.989055066530485e-06, "loss": 0.5662, "step": 4220 }, { "epoch": 0.050466887456808426, "grad_norm": 2.656737804412842, "learning_rate": 9.989042258882775e-06, "loss": 0.6583, "step": 4221 }, { "epoch": 0.05047884360166908, "grad_norm": 2.100693941116333, "learning_rate": 9.98902944375398e-06, "loss": 0.6435, "step": 4222 }, { "epoch": 0.05049079974652973, "grad_norm": 2.3203389644622803, "learning_rate": 9.989016621144116e-06, "loss": 0.5734, "step": 4223 }, { "epoch": 0.05050275589139038, "grad_norm": 2.298971652984619, "learning_rate": 9.989003791053204e-06, "loss": 0.6132, "step": 4224 }, { "epoch": 0.05051471203625103, "grad_norm": 2.1943252086639404, "learning_rate": 9.988990953481263e-06, "loss": 0.6663, "step": 4225 }, { "epoch": 0.050526668181111685, "grad_norm": 2.576463222503662, "learning_rate": 9.988978108428313e-06, "loss": 0.7021, "step": 4226 }, { "epoch": 0.05053862432597234, "grad_norm": 9.999451637268066, "learning_rate": 9.988965255894374e-06, "loss": 0.4764, "step": 4227 }, { "epoch": 0.05055058047083298, "grad_norm": 2.2627034187316895, "learning_rate": 9.988952395879461e-06, "loss": 0.6498, "step": 4228 }, { "epoch": 0.05056253661569363, "grad_norm": 6.968447208404541, "learning_rate": 9.988939528383598e-06, "loss": 0.5634, "step": 4229 }, { "epoch": 0.050574492760554285, "grad_norm": 2.0483651161193848, "learning_rate": 9.988926653406802e-06, "loss": 0.6318, "step": 4230 }, { "epoch": 0.05058644890541494, "grad_norm": 2.052664041519165, "learning_rate": 9.988913770949093e-06, "loss": 0.5997, "step": 4231 }, { "epoch": 0.05059840505027559, "grad_norm": 1.5834916830062866, "learning_rate": 9.98890088101049e-06, "loss": 0.5713, "step": 4232 }, { "epoch": 0.05061036119513624, "grad_norm": 2.1857638359069824, "learning_rate": 9.98888798359101e-06, "loss": 0.6574, "step": 4233 }, { "epoch": 0.05062231733999689, "grad_norm": 6.785651206970215, "learning_rate": 9.988875078690679e-06, "loss": 0.61, "step": 4234 }, { "epoch": 0.050634273484857543, "grad_norm": 1.6780084371566772, "learning_rate": 9.98886216630951e-06, "loss": 0.6245, "step": 4235 }, { "epoch": 0.050646229629718195, "grad_norm": 16.64753532409668, "learning_rate": 9.988849246447523e-06, "loss": 0.6276, "step": 4236 }, { "epoch": 0.05065818577457885, "grad_norm": 3.1827735900878906, "learning_rate": 9.98883631910474e-06, "loss": 0.617, "step": 4237 }, { "epoch": 0.0506701419194395, "grad_norm": 2.178919553756714, "learning_rate": 9.98882338428118e-06, "loss": 0.6725, "step": 4238 }, { "epoch": 0.05068209806430015, "grad_norm": 2.4562880992889404, "learning_rate": 9.988810441976861e-06, "loss": 0.5516, "step": 4239 }, { "epoch": 0.050694054209160795, "grad_norm": 2.572176933288574, "learning_rate": 9.988797492191803e-06, "loss": 0.643, "step": 4240 }, { "epoch": 0.05070601035402145, "grad_norm": 4.965910911560059, "learning_rate": 9.988784534926026e-06, "loss": 0.6279, "step": 4241 }, { "epoch": 0.0507179664988821, "grad_norm": 2.0960326194763184, "learning_rate": 9.988771570179548e-06, "loss": 0.6567, "step": 4242 }, { "epoch": 0.05072992264374275, "grad_norm": 2.2581629753112793, "learning_rate": 9.988758597952389e-06, "loss": 0.7164, "step": 4243 }, { "epoch": 0.0507418787886034, "grad_norm": 2.3770978450775146, "learning_rate": 9.98874561824457e-06, "loss": 0.5906, "step": 4244 }, { "epoch": 0.050753834933464054, "grad_norm": 3.030491828918457, "learning_rate": 9.988732631056108e-06, "loss": 0.6217, "step": 4245 }, { "epoch": 0.050765791078324705, "grad_norm": 3.1998589038848877, "learning_rate": 9.988719636387026e-06, "loss": 0.7065, "step": 4246 }, { "epoch": 0.05077774722318536, "grad_norm": 4.988858222961426, "learning_rate": 9.988706634237337e-06, "loss": 0.6611, "step": 4247 }, { "epoch": 0.05078970336804601, "grad_norm": 4.694711685180664, "learning_rate": 9.988693624607068e-06, "loss": 0.6656, "step": 4248 }, { "epoch": 0.05080165951290666, "grad_norm": 4.351840019226074, "learning_rate": 9.988680607496234e-06, "loss": 0.6192, "step": 4249 }, { "epoch": 0.05081361565776731, "grad_norm": 2.1312596797943115, "learning_rate": 9.988667582904857e-06, "loss": 0.6624, "step": 4250 }, { "epoch": 0.050825571802627964, "grad_norm": 2.8276286125183105, "learning_rate": 9.988654550832953e-06, "loss": 0.6637, "step": 4251 }, { "epoch": 0.05083752794748861, "grad_norm": 6.082199573516846, "learning_rate": 9.988641511280546e-06, "loss": 0.6665, "step": 4252 }, { "epoch": 0.05084948409234926, "grad_norm": 1.6334505081176758, "learning_rate": 9.988628464247652e-06, "loss": 0.6031, "step": 4253 }, { "epoch": 0.05086144023720991, "grad_norm": 2.5813043117523193, "learning_rate": 9.988615409734293e-06, "loss": 0.5584, "step": 4254 }, { "epoch": 0.050873396382070564, "grad_norm": 5.0190887451171875, "learning_rate": 9.988602347740486e-06, "loss": 0.674, "step": 4255 }, { "epoch": 0.050885352526931216, "grad_norm": 1.6210296154022217, "learning_rate": 9.988589278266252e-06, "loss": 0.5597, "step": 4256 }, { "epoch": 0.05089730867179187, "grad_norm": 4.278048038482666, "learning_rate": 9.988576201311612e-06, "loss": 0.6644, "step": 4257 }, { "epoch": 0.05090926481665252, "grad_norm": 2.43377947807312, "learning_rate": 9.988563116876583e-06, "loss": 0.6991, "step": 4258 }, { "epoch": 0.05092122096151317, "grad_norm": 2.05100154876709, "learning_rate": 9.988550024961186e-06, "loss": 0.6208, "step": 4259 }, { "epoch": 0.05093317710637382, "grad_norm": 2.481759786605835, "learning_rate": 9.988536925565442e-06, "loss": 0.6366, "step": 4260 }, { "epoch": 0.050945133251234474, "grad_norm": 4.571022987365723, "learning_rate": 9.988523818689367e-06, "loss": 0.573, "step": 4261 }, { "epoch": 0.050957089396095126, "grad_norm": 1.724494218826294, "learning_rate": 9.988510704332985e-06, "loss": 0.6363, "step": 4262 }, { "epoch": 0.05096904554095577, "grad_norm": 4.802210330963135, "learning_rate": 9.988497582496312e-06, "loss": 0.6325, "step": 4263 }, { "epoch": 0.05098100168581642, "grad_norm": 2.605304479598999, "learning_rate": 9.98848445317937e-06, "loss": 0.6297, "step": 4264 }, { "epoch": 0.050992957830677074, "grad_norm": 2.742007255554199, "learning_rate": 9.988471316382176e-06, "loss": 0.6081, "step": 4265 }, { "epoch": 0.051004913975537726, "grad_norm": 2.3891713619232178, "learning_rate": 9.988458172104753e-06, "loss": 0.5931, "step": 4266 }, { "epoch": 0.05101687012039838, "grad_norm": 3.174952745437622, "learning_rate": 9.98844502034712e-06, "loss": 0.6695, "step": 4267 }, { "epoch": 0.05102882626525903, "grad_norm": 4.460954666137695, "learning_rate": 9.988431861109294e-06, "loss": 0.673, "step": 4268 }, { "epoch": 0.05104078241011968, "grad_norm": 2.231980323791504, "learning_rate": 9.988418694391298e-06, "loss": 0.6551, "step": 4269 }, { "epoch": 0.05105273855498033, "grad_norm": 2.2440478801727295, "learning_rate": 9.988405520193151e-06, "loss": 0.6752, "step": 4270 }, { "epoch": 0.051064694699840985, "grad_norm": 1.8080133199691772, "learning_rate": 9.988392338514871e-06, "loss": 0.5918, "step": 4271 }, { "epoch": 0.051076650844701636, "grad_norm": 1.7044705152511597, "learning_rate": 9.98837914935648e-06, "loss": 0.5496, "step": 4272 }, { "epoch": 0.05108860698956229, "grad_norm": 2.181189775466919, "learning_rate": 9.988365952717995e-06, "loss": 0.6687, "step": 4273 }, { "epoch": 0.05110056313442294, "grad_norm": 3.5432324409484863, "learning_rate": 9.98835274859944e-06, "loss": 0.6027, "step": 4274 }, { "epoch": 0.051112519279283584, "grad_norm": 2.8524959087371826, "learning_rate": 9.98833953700083e-06, "loss": 0.6538, "step": 4275 }, { "epoch": 0.051124475424144236, "grad_norm": 4.060698986053467, "learning_rate": 9.988326317922189e-06, "loss": 0.6589, "step": 4276 }, { "epoch": 0.05113643156900489, "grad_norm": 1.481642484664917, "learning_rate": 9.988313091363533e-06, "loss": 0.6919, "step": 4277 }, { "epoch": 0.05114838771386554, "grad_norm": 4.385682106018066, "learning_rate": 9.988299857324885e-06, "loss": 0.7, "step": 4278 }, { "epoch": 0.05116034385872619, "grad_norm": 2.759732723236084, "learning_rate": 9.988286615806264e-06, "loss": 0.726, "step": 4279 }, { "epoch": 0.05117230000358684, "grad_norm": 1.7425702810287476, "learning_rate": 9.988273366807687e-06, "loss": 0.5579, "step": 4280 }, { "epoch": 0.051184256148447495, "grad_norm": 1.770742416381836, "learning_rate": 9.98826011032918e-06, "loss": 0.5676, "step": 4281 }, { "epoch": 0.051196212293308146, "grad_norm": 2.6414296627044678, "learning_rate": 9.988246846370757e-06, "loss": 0.657, "step": 4282 }, { "epoch": 0.0512081684381688, "grad_norm": 5.677974700927734, "learning_rate": 9.988233574932441e-06, "loss": 0.5377, "step": 4283 }, { "epoch": 0.05122012458302945, "grad_norm": 2.3579981327056885, "learning_rate": 9.98822029601425e-06, "loss": 0.5819, "step": 4284 }, { "epoch": 0.0512320807278901, "grad_norm": 1.9609153270721436, "learning_rate": 9.988207009616207e-06, "loss": 0.6583, "step": 4285 }, { "epoch": 0.05124403687275075, "grad_norm": 2.539764404296875, "learning_rate": 9.988193715738329e-06, "loss": 0.6069, "step": 4286 }, { "epoch": 0.0512559930176114, "grad_norm": 2.616342067718506, "learning_rate": 9.988180414380634e-06, "loss": 0.6714, "step": 4287 }, { "epoch": 0.05126794916247205, "grad_norm": 4.478177547454834, "learning_rate": 9.988167105543148e-06, "loss": 0.6407, "step": 4288 }, { "epoch": 0.0512799053073327, "grad_norm": 1.806096076965332, "learning_rate": 9.988153789225886e-06, "loss": 0.5982, "step": 4289 }, { "epoch": 0.05129186145219335, "grad_norm": 3.6100313663482666, "learning_rate": 9.988140465428871e-06, "loss": 0.568, "step": 4290 }, { "epoch": 0.051303817597054005, "grad_norm": 2.7534027099609375, "learning_rate": 9.988127134152122e-06, "loss": 0.6876, "step": 4291 }, { "epoch": 0.05131577374191466, "grad_norm": 2.0498135089874268, "learning_rate": 9.988113795395656e-06, "loss": 0.7043, "step": 4292 }, { "epoch": 0.05132772988677531, "grad_norm": 1.495947241783142, "learning_rate": 9.988100449159497e-06, "loss": 0.6903, "step": 4293 }, { "epoch": 0.05133968603163596, "grad_norm": 2.3987131118774414, "learning_rate": 9.988087095443664e-06, "loss": 0.6811, "step": 4294 }, { "epoch": 0.05135164217649661, "grad_norm": 1.7939523458480835, "learning_rate": 9.988073734248177e-06, "loss": 0.6418, "step": 4295 }, { "epoch": 0.051363598321357264, "grad_norm": 1.8228139877319336, "learning_rate": 9.988060365573056e-06, "loss": 0.5729, "step": 4296 }, { "epoch": 0.051375554466217915, "grad_norm": 4.312936305999756, "learning_rate": 9.988046989418319e-06, "loss": 0.6594, "step": 4297 }, { "epoch": 0.05138751061107857, "grad_norm": 2.356630802154541, "learning_rate": 9.988033605783988e-06, "loss": 0.7038, "step": 4298 }, { "epoch": 0.05139946675593921, "grad_norm": 2.444019317626953, "learning_rate": 9.988020214670085e-06, "loss": 0.6175, "step": 4299 }, { "epoch": 0.051411422900799864, "grad_norm": 3.428396224975586, "learning_rate": 9.988006816076627e-06, "loss": 0.6501, "step": 4300 }, { "epoch": 0.051423379045660515, "grad_norm": 3.1488499641418457, "learning_rate": 9.987993410003634e-06, "loss": 0.7117, "step": 4301 }, { "epoch": 0.05143533519052117, "grad_norm": 2.6346802711486816, "learning_rate": 9.987979996451129e-06, "loss": 0.6129, "step": 4302 }, { "epoch": 0.05144729133538182, "grad_norm": 2.0359463691711426, "learning_rate": 9.987966575419127e-06, "loss": 0.5772, "step": 4303 }, { "epoch": 0.05145924748024247, "grad_norm": 1.9652197360992432, "learning_rate": 9.987953146907654e-06, "loss": 0.6639, "step": 4304 }, { "epoch": 0.05147120362510312, "grad_norm": 2.0337893962860107, "learning_rate": 9.987939710916728e-06, "loss": 0.5767, "step": 4305 }, { "epoch": 0.051483159769963774, "grad_norm": 2.1194205284118652, "learning_rate": 9.987926267446368e-06, "loss": 0.571, "step": 4306 }, { "epoch": 0.051495115914824426, "grad_norm": 3.984856605529785, "learning_rate": 9.987912816496594e-06, "loss": 0.5216, "step": 4307 }, { "epoch": 0.05150707205968508, "grad_norm": 1.961742639541626, "learning_rate": 9.987899358067427e-06, "loss": 0.5395, "step": 4308 }, { "epoch": 0.05151902820454573, "grad_norm": 1.837952971458435, "learning_rate": 9.987885892158889e-06, "loss": 0.6064, "step": 4309 }, { "epoch": 0.05153098434940638, "grad_norm": 1.7874314785003662, "learning_rate": 9.987872418770997e-06, "loss": 0.7452, "step": 4310 }, { "epoch": 0.051542940494267026, "grad_norm": 2.0154635906219482, "learning_rate": 9.987858937903772e-06, "loss": 0.6774, "step": 4311 }, { "epoch": 0.05155489663912768, "grad_norm": 2.411166191101074, "learning_rate": 9.987845449557237e-06, "loss": 0.6651, "step": 4312 }, { "epoch": 0.05156685278398833, "grad_norm": 2.192809581756592, "learning_rate": 9.987831953731407e-06, "loss": 0.6351, "step": 4313 }, { "epoch": 0.05157880892884898, "grad_norm": 2.1991124153137207, "learning_rate": 9.987818450426308e-06, "loss": 0.6638, "step": 4314 }, { "epoch": 0.05159076507370963, "grad_norm": 3.8315908908843994, "learning_rate": 9.987804939641955e-06, "loss": 0.6774, "step": 4315 }, { "epoch": 0.051602721218570284, "grad_norm": 2.2422618865966797, "learning_rate": 9.987791421378373e-06, "loss": 0.5824, "step": 4316 }, { "epoch": 0.051614677363430936, "grad_norm": 3.5588150024414062, "learning_rate": 9.987777895635579e-06, "loss": 0.5887, "step": 4317 }, { "epoch": 0.05162663350829159, "grad_norm": 4.0330376625061035, "learning_rate": 9.987764362413595e-06, "loss": 0.6836, "step": 4318 }, { "epoch": 0.05163858965315224, "grad_norm": 3.7205264568328857, "learning_rate": 9.987750821712438e-06, "loss": 0.6333, "step": 4319 }, { "epoch": 0.05165054579801289, "grad_norm": 2.0276975631713867, "learning_rate": 9.987737273532135e-06, "loss": 0.6758, "step": 4320 }, { "epoch": 0.05166250194287354, "grad_norm": 1.8932613134384155, "learning_rate": 9.987723717872699e-06, "loss": 0.6586, "step": 4321 }, { "epoch": 0.051674458087734194, "grad_norm": 2.495030403137207, "learning_rate": 9.987710154734154e-06, "loss": 0.6763, "step": 4322 }, { "epoch": 0.05168641423259484, "grad_norm": 2.4770610332489014, "learning_rate": 9.987696584116523e-06, "loss": 0.691, "step": 4323 }, { "epoch": 0.05169837037745549, "grad_norm": 1.6108312606811523, "learning_rate": 9.987683006019819e-06, "loss": 0.5376, "step": 4324 }, { "epoch": 0.05171032652231614, "grad_norm": 5.904786586761475, "learning_rate": 9.987669420444068e-06, "loss": 0.7373, "step": 4325 }, { "epoch": 0.051722282667176794, "grad_norm": 2.6275248527526855, "learning_rate": 9.98765582738929e-06, "loss": 0.5896, "step": 4326 }, { "epoch": 0.051734238812037446, "grad_norm": 1.9509974718093872, "learning_rate": 9.987642226855504e-06, "loss": 0.6127, "step": 4327 }, { "epoch": 0.0517461949568981, "grad_norm": 1.5986404418945312, "learning_rate": 9.987628618842731e-06, "loss": 0.661, "step": 4328 }, { "epoch": 0.05175815110175875, "grad_norm": 1.7715219259262085, "learning_rate": 9.98761500335099e-06, "loss": 0.6625, "step": 4329 }, { "epoch": 0.0517701072466194, "grad_norm": 3.85546612739563, "learning_rate": 9.987601380380303e-06, "loss": 0.6462, "step": 4330 }, { "epoch": 0.05178206339148005, "grad_norm": 2.1290230751037598, "learning_rate": 9.98758774993069e-06, "loss": 0.5405, "step": 4331 }, { "epoch": 0.051794019536340705, "grad_norm": 1.7459065914154053, "learning_rate": 9.987574112002172e-06, "loss": 0.612, "step": 4332 }, { "epoch": 0.051805975681201356, "grad_norm": 2.1659626960754395, "learning_rate": 9.987560466594768e-06, "loss": 0.6932, "step": 4333 }, { "epoch": 0.051817931826062, "grad_norm": 2.67694091796875, "learning_rate": 9.987546813708499e-06, "loss": 0.6894, "step": 4334 }, { "epoch": 0.05182988797092265, "grad_norm": 2.065589427947998, "learning_rate": 9.987533153343388e-06, "loss": 0.6204, "step": 4335 }, { "epoch": 0.051841844115783305, "grad_norm": 1.494710087776184, "learning_rate": 9.987519485499452e-06, "loss": 0.6439, "step": 4336 }, { "epoch": 0.051853800260643956, "grad_norm": 3.274669885635376, "learning_rate": 9.987505810176713e-06, "loss": 0.6654, "step": 4337 }, { "epoch": 0.05186575640550461, "grad_norm": 3.024020195007324, "learning_rate": 9.987492127375192e-06, "loss": 0.6262, "step": 4338 }, { "epoch": 0.05187771255036526, "grad_norm": 1.6949267387390137, "learning_rate": 9.987478437094907e-06, "loss": 0.625, "step": 4339 }, { "epoch": 0.05188966869522591, "grad_norm": 2.8932271003723145, "learning_rate": 9.987464739335881e-06, "loss": 0.6615, "step": 4340 }, { "epoch": 0.05190162484008656, "grad_norm": 3.6234793663024902, "learning_rate": 9.987451034098134e-06, "loss": 0.61, "step": 4341 }, { "epoch": 0.051913580984947215, "grad_norm": 2.169951915740967, "learning_rate": 9.987437321381688e-06, "loss": 0.6738, "step": 4342 }, { "epoch": 0.05192553712980787, "grad_norm": 2.043224334716797, "learning_rate": 9.98742360118656e-06, "loss": 0.5617, "step": 4343 }, { "epoch": 0.05193749327466852, "grad_norm": 3.575864553451538, "learning_rate": 9.987409873512773e-06, "loss": 0.6122, "step": 4344 }, { "epoch": 0.05194944941952917, "grad_norm": 2.084603786468506, "learning_rate": 9.987396138360348e-06, "loss": 0.6718, "step": 4345 }, { "epoch": 0.051961405564389815, "grad_norm": 1.960955262184143, "learning_rate": 9.987382395729303e-06, "loss": 0.6932, "step": 4346 }, { "epoch": 0.05197336170925047, "grad_norm": 2.2090022563934326, "learning_rate": 9.987368645619662e-06, "loss": 0.6098, "step": 4347 }, { "epoch": 0.05198531785411112, "grad_norm": 2.143576145172119, "learning_rate": 9.987354888031444e-06, "loss": 0.6408, "step": 4348 }, { "epoch": 0.05199727399897177, "grad_norm": 5.183767795562744, "learning_rate": 9.98734112296467e-06, "loss": 0.5714, "step": 4349 }, { "epoch": 0.05200923014383242, "grad_norm": 2.456136465072632, "learning_rate": 9.987327350419359e-06, "loss": 0.5999, "step": 4350 }, { "epoch": 0.05202118628869307, "grad_norm": 2.3267626762390137, "learning_rate": 9.987313570395533e-06, "loss": 0.6428, "step": 4351 }, { "epoch": 0.052033142433553725, "grad_norm": 2.4492745399475098, "learning_rate": 9.987299782893214e-06, "loss": 0.7172, "step": 4352 }, { "epoch": 0.05204509857841438, "grad_norm": 2.4090795516967773, "learning_rate": 9.98728598791242e-06, "loss": 0.6381, "step": 4353 }, { "epoch": 0.05205705472327503, "grad_norm": 4.5653533935546875, "learning_rate": 9.987272185453173e-06, "loss": 0.6017, "step": 4354 }, { "epoch": 0.05206901086813568, "grad_norm": 2.1823439598083496, "learning_rate": 9.987258375515494e-06, "loss": 0.6388, "step": 4355 }, { "epoch": 0.05208096701299633, "grad_norm": 1.4643760919570923, "learning_rate": 9.987244558099403e-06, "loss": 0.6513, "step": 4356 }, { "epoch": 0.052092923157856984, "grad_norm": 1.697977066040039, "learning_rate": 9.98723073320492e-06, "loss": 0.7071, "step": 4357 }, { "epoch": 0.05210487930271763, "grad_norm": 2.5317842960357666, "learning_rate": 9.987216900832069e-06, "loss": 0.597, "step": 4358 }, { "epoch": 0.05211683544757828, "grad_norm": 1.7173810005187988, "learning_rate": 9.987203060980868e-06, "loss": 0.6499, "step": 4359 }, { "epoch": 0.05212879159243893, "grad_norm": 5.336909294128418, "learning_rate": 9.987189213651337e-06, "loss": 0.7659, "step": 4360 }, { "epoch": 0.052140747737299584, "grad_norm": 2.1232826709747314, "learning_rate": 9.987175358843499e-06, "loss": 0.5717, "step": 4361 }, { "epoch": 0.052152703882160235, "grad_norm": 1.8381487131118774, "learning_rate": 9.987161496557373e-06, "loss": 0.6043, "step": 4362 }, { "epoch": 0.05216466002702089, "grad_norm": 3.575185775756836, "learning_rate": 9.987147626792981e-06, "loss": 0.6306, "step": 4363 }, { "epoch": 0.05217661617188154, "grad_norm": 2.184562921524048, "learning_rate": 9.987133749550344e-06, "loss": 0.6545, "step": 4364 }, { "epoch": 0.05218857231674219, "grad_norm": 3.611359119415283, "learning_rate": 9.987119864829482e-06, "loss": 0.6488, "step": 4365 }, { "epoch": 0.05220052846160284, "grad_norm": 2.0350394248962402, "learning_rate": 9.987105972630415e-06, "loss": 0.7184, "step": 4366 }, { "epoch": 0.052212484606463494, "grad_norm": 8.877720832824707, "learning_rate": 9.987092072953167e-06, "loss": 0.7201, "step": 4367 }, { "epoch": 0.052224440751324146, "grad_norm": 1.884485125541687, "learning_rate": 9.987078165797754e-06, "loss": 0.6336, "step": 4368 }, { "epoch": 0.0522363968961848, "grad_norm": 1.7706260681152344, "learning_rate": 9.987064251164203e-06, "loss": 0.6259, "step": 4369 }, { "epoch": 0.05224835304104544, "grad_norm": 1.9715579748153687, "learning_rate": 9.987050329052528e-06, "loss": 0.7536, "step": 4370 }, { "epoch": 0.052260309185906094, "grad_norm": 1.8702186346054077, "learning_rate": 9.987036399462755e-06, "loss": 0.6346, "step": 4371 }, { "epoch": 0.052272265330766746, "grad_norm": 3.4645273685455322, "learning_rate": 9.987022462394903e-06, "loss": 0.6646, "step": 4372 }, { "epoch": 0.0522842214756274, "grad_norm": 2.0424423217773438, "learning_rate": 9.987008517848993e-06, "loss": 0.7165, "step": 4373 }, { "epoch": 0.05229617762048805, "grad_norm": 1.7236392498016357, "learning_rate": 9.986994565825046e-06, "loss": 0.5785, "step": 4374 }, { "epoch": 0.0523081337653487, "grad_norm": 23.343278884887695, "learning_rate": 9.986980606323083e-06, "loss": 0.6472, "step": 4375 }, { "epoch": 0.05232008991020935, "grad_norm": 2.1053826808929443, "learning_rate": 9.986966639343125e-06, "loss": 0.5605, "step": 4376 }, { "epoch": 0.052332046055070004, "grad_norm": 1.5783311128616333, "learning_rate": 9.986952664885191e-06, "loss": 0.5719, "step": 4377 }, { "epoch": 0.052344002199930656, "grad_norm": 2.156418800354004, "learning_rate": 9.986938682949307e-06, "loss": 0.5582, "step": 4378 }, { "epoch": 0.05235595834479131, "grad_norm": 1.9405760765075684, "learning_rate": 9.98692469353549e-06, "loss": 0.5958, "step": 4379 }, { "epoch": 0.05236791448965196, "grad_norm": 1.8944785594940186, "learning_rate": 9.98691069664376e-06, "loss": 0.5477, "step": 4380 }, { "epoch": 0.05237987063451261, "grad_norm": 3.561702251434326, "learning_rate": 9.986896692274141e-06, "loss": 0.6438, "step": 4381 }, { "epoch": 0.052391826779373256, "grad_norm": 1.7839945554733276, "learning_rate": 9.986882680426652e-06, "loss": 0.5736, "step": 4382 }, { "epoch": 0.05240378292423391, "grad_norm": 3.837193250656128, "learning_rate": 9.986868661101314e-06, "loss": 0.6014, "step": 4383 }, { "epoch": 0.05241573906909456, "grad_norm": 5.407747268676758, "learning_rate": 9.98685463429815e-06, "loss": 0.7055, "step": 4384 }, { "epoch": 0.05242769521395521, "grad_norm": 1.3325672149658203, "learning_rate": 9.98684060001718e-06, "loss": 0.5417, "step": 4385 }, { "epoch": 0.05243965135881586, "grad_norm": 2.4667751789093018, "learning_rate": 9.986826558258423e-06, "loss": 0.6667, "step": 4386 }, { "epoch": 0.052451607503676514, "grad_norm": 1.811112642288208, "learning_rate": 9.986812509021904e-06, "loss": 0.6042, "step": 4387 }, { "epoch": 0.052463563648537166, "grad_norm": 3.4418745040893555, "learning_rate": 9.98679845230764e-06, "loss": 0.6524, "step": 4388 }, { "epoch": 0.05247551979339782, "grad_norm": 8.019081115722656, "learning_rate": 9.986784388115655e-06, "loss": 0.6501, "step": 4389 }, { "epoch": 0.05248747593825847, "grad_norm": 2.007291078567505, "learning_rate": 9.986770316445968e-06, "loss": 0.6742, "step": 4390 }, { "epoch": 0.05249943208311912, "grad_norm": 1.5423468351364136, "learning_rate": 9.986756237298603e-06, "loss": 0.6663, "step": 4391 }, { "epoch": 0.05251138822797977, "grad_norm": 1.7521864175796509, "learning_rate": 9.986742150673579e-06, "loss": 0.7163, "step": 4392 }, { "epoch": 0.052523344372840425, "grad_norm": 1.5803279876708984, "learning_rate": 9.986728056570915e-06, "loss": 0.6163, "step": 4393 }, { "epoch": 0.05253530051770107, "grad_norm": 2.8202431201934814, "learning_rate": 9.986713954990638e-06, "loss": 0.6125, "step": 4394 }, { "epoch": 0.05254725666256172, "grad_norm": 4.020788192749023, "learning_rate": 9.986699845932764e-06, "loss": 0.6774, "step": 4395 }, { "epoch": 0.05255921280742237, "grad_norm": 4.260190486907959, "learning_rate": 9.986685729397316e-06, "loss": 0.5947, "step": 4396 }, { "epoch": 0.052571168952283025, "grad_norm": 5.485550880432129, "learning_rate": 9.986671605384315e-06, "loss": 0.7252, "step": 4397 }, { "epoch": 0.052583125097143676, "grad_norm": 2.251558780670166, "learning_rate": 9.986657473893781e-06, "loss": 0.6672, "step": 4398 }, { "epoch": 0.05259508124200433, "grad_norm": 2.9973692893981934, "learning_rate": 9.98664333492574e-06, "loss": 0.6647, "step": 4399 }, { "epoch": 0.05260703738686498, "grad_norm": 2.3897721767425537, "learning_rate": 9.986629188480206e-06, "loss": 0.6986, "step": 4400 }, { "epoch": 0.05261899353172563, "grad_norm": 3.0486464500427246, "learning_rate": 9.986615034557206e-06, "loss": 0.6495, "step": 4401 }, { "epoch": 0.05263094967658628, "grad_norm": 1.6636145114898682, "learning_rate": 9.986600873156758e-06, "loss": 0.5677, "step": 4402 }, { "epoch": 0.052642905821446935, "grad_norm": 23.670425415039062, "learning_rate": 9.986586704278883e-06, "loss": 0.7259, "step": 4403 }, { "epoch": 0.05265486196630759, "grad_norm": 4.2393035888671875, "learning_rate": 9.986572527923605e-06, "loss": 0.6467, "step": 4404 }, { "epoch": 0.05266681811116823, "grad_norm": 2.1122689247131348, "learning_rate": 9.986558344090945e-06, "loss": 0.5744, "step": 4405 }, { "epoch": 0.05267877425602888, "grad_norm": 2.001615285873413, "learning_rate": 9.986544152780919e-06, "loss": 0.6655, "step": 4406 }, { "epoch": 0.052690730400889535, "grad_norm": 2.244040012359619, "learning_rate": 9.986529953993555e-06, "loss": 0.6365, "step": 4407 }, { "epoch": 0.05270268654575019, "grad_norm": 1.8629612922668457, "learning_rate": 9.986515747728874e-06, "loss": 0.6739, "step": 4408 }, { "epoch": 0.05271464269061084, "grad_norm": 1.6316072940826416, "learning_rate": 9.986501533986891e-06, "loss": 0.7275, "step": 4409 }, { "epoch": 0.05272659883547149, "grad_norm": 3.3993325233459473, "learning_rate": 9.986487312767632e-06, "loss": 0.5941, "step": 4410 }, { "epoch": 0.05273855498033214, "grad_norm": 2.2333824634552, "learning_rate": 9.98647308407112e-06, "loss": 0.6778, "step": 4411 }, { "epoch": 0.052750511125192794, "grad_norm": 8.672926902770996, "learning_rate": 9.986458847897372e-06, "loss": 0.6559, "step": 4412 }, { "epoch": 0.052762467270053445, "grad_norm": 6.639893054962158, "learning_rate": 9.98644460424641e-06, "loss": 0.6576, "step": 4413 }, { "epoch": 0.0527744234149141, "grad_norm": 2.0536835193634033, "learning_rate": 9.986430353118258e-06, "loss": 0.6811, "step": 4414 }, { "epoch": 0.05278637955977475, "grad_norm": 2.479992628097534, "learning_rate": 9.986416094512936e-06, "loss": 0.6111, "step": 4415 }, { "epoch": 0.0527983357046354, "grad_norm": 3.972698211669922, "learning_rate": 9.986401828430465e-06, "loss": 0.5775, "step": 4416 }, { "epoch": 0.052810291849496045, "grad_norm": 5.404018878936768, "learning_rate": 9.986387554870866e-06, "loss": 0.6204, "step": 4417 }, { "epoch": 0.0528222479943567, "grad_norm": 1.9446812868118286, "learning_rate": 9.986373273834164e-06, "loss": 0.6119, "step": 4418 }, { "epoch": 0.05283420413921735, "grad_norm": 1.6876130104064941, "learning_rate": 9.986358985320375e-06, "loss": 0.6308, "step": 4419 }, { "epoch": 0.052846160284078, "grad_norm": 1.9789503812789917, "learning_rate": 9.986344689329524e-06, "loss": 0.6145, "step": 4420 }, { "epoch": 0.05285811642893865, "grad_norm": 1.7968722581863403, "learning_rate": 9.98633038586163e-06, "loss": 0.6377, "step": 4421 }, { "epoch": 0.052870072573799304, "grad_norm": 2.8880274295806885, "learning_rate": 9.986316074916718e-06, "loss": 0.6376, "step": 4422 }, { "epoch": 0.052882028718659956, "grad_norm": 8.696298599243164, "learning_rate": 9.986301756494806e-06, "loss": 0.6084, "step": 4423 }, { "epoch": 0.05289398486352061, "grad_norm": 2.1794564723968506, "learning_rate": 9.986287430595916e-06, "loss": 0.6846, "step": 4424 }, { "epoch": 0.05290594100838126, "grad_norm": 2.1246745586395264, "learning_rate": 9.986273097220072e-06, "loss": 0.5806, "step": 4425 }, { "epoch": 0.05291789715324191, "grad_norm": 3.2426323890686035, "learning_rate": 9.986258756367292e-06, "loss": 0.5848, "step": 4426 }, { "epoch": 0.05292985329810256, "grad_norm": 2.7610275745391846, "learning_rate": 9.986244408037602e-06, "loss": 0.6537, "step": 4427 }, { "epoch": 0.052941809442963214, "grad_norm": 8.104877471923828, "learning_rate": 9.98623005223102e-06, "loss": 0.6087, "step": 4428 }, { "epoch": 0.05295376558782386, "grad_norm": 1.6640592813491821, "learning_rate": 9.986215688947565e-06, "loss": 0.6515, "step": 4429 }, { "epoch": 0.05296572173268451, "grad_norm": 2.176973342895508, "learning_rate": 9.986201318187265e-06, "loss": 0.51, "step": 4430 }, { "epoch": 0.05297767787754516, "grad_norm": 2.1683967113494873, "learning_rate": 9.986186939950138e-06, "loss": 0.6445, "step": 4431 }, { "epoch": 0.052989634022405814, "grad_norm": 3.108530044555664, "learning_rate": 9.986172554236205e-06, "loss": 0.6466, "step": 4432 }, { "epoch": 0.053001590167266466, "grad_norm": 1.5700517892837524, "learning_rate": 9.98615816104549e-06, "loss": 0.6377, "step": 4433 }, { "epoch": 0.05301354631212712, "grad_norm": 11.873218536376953, "learning_rate": 9.986143760378012e-06, "loss": 0.7129, "step": 4434 }, { "epoch": 0.05302550245698777, "grad_norm": 3.226783514022827, "learning_rate": 9.986129352233793e-06, "loss": 0.6892, "step": 4435 }, { "epoch": 0.05303745860184842, "grad_norm": 3.0154688358306885, "learning_rate": 9.986114936612857e-06, "loss": 0.5784, "step": 4436 }, { "epoch": 0.05304941474670907, "grad_norm": 3.7917752265930176, "learning_rate": 9.986100513515223e-06, "loss": 0.6167, "step": 4437 }, { "epoch": 0.053061370891569724, "grad_norm": 1.836305022239685, "learning_rate": 9.986086082940915e-06, "loss": 0.7502, "step": 4438 }, { "epoch": 0.053073327036430376, "grad_norm": 2.15228271484375, "learning_rate": 9.986071644889952e-06, "loss": 0.6043, "step": 4439 }, { "epoch": 0.05308528318129103, "grad_norm": 13.560086250305176, "learning_rate": 9.986057199362355e-06, "loss": 0.6378, "step": 4440 }, { "epoch": 0.05309723932615167, "grad_norm": 1.6794663667678833, "learning_rate": 9.98604274635815e-06, "loss": 0.5401, "step": 4441 }, { "epoch": 0.053109195471012324, "grad_norm": 4.404685020446777, "learning_rate": 9.986028285877356e-06, "loss": 0.6143, "step": 4442 }, { "epoch": 0.053121151615872976, "grad_norm": 2.255295991897583, "learning_rate": 9.986013817919996e-06, "loss": 0.6478, "step": 4443 }, { "epoch": 0.05313310776073363, "grad_norm": 2.2726101875305176, "learning_rate": 9.985999342486088e-06, "loss": 0.6592, "step": 4444 }, { "epoch": 0.05314506390559428, "grad_norm": 2.468465566635132, "learning_rate": 9.985984859575657e-06, "loss": 0.657, "step": 4445 }, { "epoch": 0.05315702005045493, "grad_norm": 2.2914247512817383, "learning_rate": 9.985970369188724e-06, "loss": 0.6585, "step": 4446 }, { "epoch": 0.05316897619531558, "grad_norm": 1.7240780591964722, "learning_rate": 9.985955871325311e-06, "loss": 0.6174, "step": 4447 }, { "epoch": 0.053180932340176235, "grad_norm": 1.511856198310852, "learning_rate": 9.98594136598544e-06, "loss": 0.5728, "step": 4448 }, { "epoch": 0.053192888485036886, "grad_norm": 2.1759836673736572, "learning_rate": 9.98592685316913e-06, "loss": 0.6256, "step": 4449 }, { "epoch": 0.05320484462989754, "grad_norm": 2.816882848739624, "learning_rate": 9.985912332876406e-06, "loss": 0.6481, "step": 4450 }, { "epoch": 0.05321680077475819, "grad_norm": 1.7832953929901123, "learning_rate": 9.98589780510729e-06, "loss": 0.6925, "step": 4451 }, { "epoch": 0.05322875691961884, "grad_norm": 2.12833833694458, "learning_rate": 9.985883269861802e-06, "loss": 0.6707, "step": 4452 }, { "epoch": 0.053240713064479486, "grad_norm": 1.776882290840149, "learning_rate": 9.985868727139965e-06, "loss": 0.5861, "step": 4453 }, { "epoch": 0.05325266920934014, "grad_norm": 1.8683414459228516, "learning_rate": 9.985854176941797e-06, "loss": 0.6634, "step": 4454 }, { "epoch": 0.05326462535420079, "grad_norm": 2.8704745769500732, "learning_rate": 9.985839619267326e-06, "loss": 0.6584, "step": 4455 }, { "epoch": 0.05327658149906144, "grad_norm": 8.44536018371582, "learning_rate": 9.985825054116571e-06, "loss": 0.6897, "step": 4456 }, { "epoch": 0.05328853764392209, "grad_norm": 1.7582862377166748, "learning_rate": 9.985810481489552e-06, "loss": 0.6028, "step": 4457 }, { "epoch": 0.053300493788782745, "grad_norm": 3.93245267868042, "learning_rate": 9.985795901386292e-06, "loss": 0.667, "step": 4458 }, { "epoch": 0.0533124499336434, "grad_norm": 1.8690855503082275, "learning_rate": 9.985781313806815e-06, "loss": 0.6153, "step": 4459 }, { "epoch": 0.05332440607850405, "grad_norm": 2.540847063064575, "learning_rate": 9.98576671875114e-06, "loss": 0.6249, "step": 4460 }, { "epoch": 0.0533363622233647, "grad_norm": 3.3151323795318604, "learning_rate": 9.985752116219292e-06, "loss": 0.6183, "step": 4461 }, { "epoch": 0.05334831836822535, "grad_norm": 2.9396984577178955, "learning_rate": 9.985737506211288e-06, "loss": 0.6332, "step": 4462 }, { "epoch": 0.053360274513086, "grad_norm": 7.986015796661377, "learning_rate": 9.985722888727155e-06, "loss": 0.6606, "step": 4463 }, { "epoch": 0.05337223065794665, "grad_norm": 3.554666519165039, "learning_rate": 9.985708263766912e-06, "loss": 0.6073, "step": 4464 }, { "epoch": 0.0533841868028073, "grad_norm": 2.774259328842163, "learning_rate": 9.985693631330582e-06, "loss": 0.7049, "step": 4465 }, { "epoch": 0.05339614294766795, "grad_norm": 3.0809855461120605, "learning_rate": 9.985678991418187e-06, "loss": 0.6646, "step": 4466 }, { "epoch": 0.0534080990925286, "grad_norm": 1.5050172805786133, "learning_rate": 9.985664344029748e-06, "loss": 0.5471, "step": 4467 }, { "epoch": 0.053420055237389255, "grad_norm": 2.8826260566711426, "learning_rate": 9.985649689165288e-06, "loss": 0.5986, "step": 4468 }, { "epoch": 0.05343201138224991, "grad_norm": 4.1107869148254395, "learning_rate": 9.985635026824828e-06, "loss": 0.6464, "step": 4469 }, { "epoch": 0.05344396752711056, "grad_norm": 1.7605438232421875, "learning_rate": 9.985620357008391e-06, "loss": 0.7267, "step": 4470 }, { "epoch": 0.05345592367197121, "grad_norm": 1.91366446018219, "learning_rate": 9.985605679716e-06, "loss": 0.5623, "step": 4471 }, { "epoch": 0.05346787981683186, "grad_norm": 3.625335693359375, "learning_rate": 9.985590994947674e-06, "loss": 0.5921, "step": 4472 }, { "epoch": 0.053479835961692514, "grad_norm": 1.7665348052978516, "learning_rate": 9.985576302703435e-06, "loss": 0.5698, "step": 4473 }, { "epoch": 0.053491792106553165, "grad_norm": 2.184329032897949, "learning_rate": 9.985561602983309e-06, "loss": 0.6069, "step": 4474 }, { "epoch": 0.05350374825141382, "grad_norm": 1.9175379276275635, "learning_rate": 9.985546895787315e-06, "loss": 0.6647, "step": 4475 }, { "epoch": 0.05351570439627446, "grad_norm": 1.761094331741333, "learning_rate": 9.985532181115475e-06, "loss": 0.5844, "step": 4476 }, { "epoch": 0.053527660541135114, "grad_norm": 1.722346305847168, "learning_rate": 9.985517458967812e-06, "loss": 0.6663, "step": 4477 }, { "epoch": 0.053539616685995765, "grad_norm": 3.342707872390747, "learning_rate": 9.98550272934435e-06, "loss": 0.5844, "step": 4478 }, { "epoch": 0.05355157283085642, "grad_norm": 1.772740125656128, "learning_rate": 9.985487992245107e-06, "loss": 0.6306, "step": 4479 }, { "epoch": 0.05356352897571707, "grad_norm": 1.6971346139907837, "learning_rate": 9.985473247670109e-06, "loss": 0.6374, "step": 4480 }, { "epoch": 0.05357548512057772, "grad_norm": 2.4776220321655273, "learning_rate": 9.985458495619372e-06, "loss": 0.6316, "step": 4481 }, { "epoch": 0.05358744126543837, "grad_norm": 2.645589590072632, "learning_rate": 9.985443736092926e-06, "loss": 0.5517, "step": 4482 }, { "epoch": 0.053599397410299024, "grad_norm": 2.4025020599365234, "learning_rate": 9.985428969090787e-06, "loss": 0.6027, "step": 4483 }, { "epoch": 0.053611353555159676, "grad_norm": 3.090925455093384, "learning_rate": 9.985414194612981e-06, "loss": 0.5894, "step": 4484 }, { "epoch": 0.05362330970002033, "grad_norm": 1.3960665464401245, "learning_rate": 9.985399412659529e-06, "loss": 0.6219, "step": 4485 }, { "epoch": 0.05363526584488098, "grad_norm": 5.462957382202148, "learning_rate": 9.985384623230451e-06, "loss": 0.5829, "step": 4486 }, { "epoch": 0.05364722198974163, "grad_norm": 2.814084529876709, "learning_rate": 9.985369826325773e-06, "loss": 0.6343, "step": 4487 }, { "epoch": 0.053659178134602276, "grad_norm": 1.9401936531066895, "learning_rate": 9.985355021945514e-06, "loss": 0.6162, "step": 4488 }, { "epoch": 0.05367113427946293, "grad_norm": 3.6490325927734375, "learning_rate": 9.985340210089697e-06, "loss": 0.6551, "step": 4489 }, { "epoch": 0.05368309042432358, "grad_norm": 2.8672568798065186, "learning_rate": 9.985325390758348e-06, "loss": 0.6446, "step": 4490 }, { "epoch": 0.05369504656918423, "grad_norm": 5.960527420043945, "learning_rate": 9.985310563951482e-06, "loss": 0.5975, "step": 4491 }, { "epoch": 0.05370700271404488, "grad_norm": 1.788878083229065, "learning_rate": 9.985295729669126e-06, "loss": 0.6663, "step": 4492 }, { "epoch": 0.053718958858905534, "grad_norm": 4.89085578918457, "learning_rate": 9.985280887911302e-06, "loss": 0.5973, "step": 4493 }, { "epoch": 0.053730915003766186, "grad_norm": 1.9847303628921509, "learning_rate": 9.985266038678031e-06, "loss": 0.5688, "step": 4494 }, { "epoch": 0.05374287114862684, "grad_norm": 2.519498586654663, "learning_rate": 9.985251181969337e-06, "loss": 0.717, "step": 4495 }, { "epoch": 0.05375482729348749, "grad_norm": 1.744063377380371, "learning_rate": 9.98523631778524e-06, "loss": 0.6042, "step": 4496 }, { "epoch": 0.05376678343834814, "grad_norm": 2.1110646724700928, "learning_rate": 9.985221446125763e-06, "loss": 0.6945, "step": 4497 }, { "epoch": 0.05377873958320879, "grad_norm": 3.0291998386383057, "learning_rate": 9.985206566990931e-06, "loss": 0.5841, "step": 4498 }, { "epoch": 0.053790695728069444, "grad_norm": 3.1493608951568604, "learning_rate": 9.98519168038076e-06, "loss": 0.5963, "step": 4499 }, { "epoch": 0.05380265187293009, "grad_norm": 3.138462781906128, "learning_rate": 9.98517678629528e-06, "loss": 0.6755, "step": 4500 }, { "epoch": 0.05381460801779074, "grad_norm": 1.7676091194152832, "learning_rate": 9.98516188473451e-06, "loss": 0.6659, "step": 4501 }, { "epoch": 0.05382656416265139, "grad_norm": 3.068951368331909, "learning_rate": 9.98514697569847e-06, "loss": 0.7951, "step": 4502 }, { "epoch": 0.053838520307512044, "grad_norm": 2.1624228954315186, "learning_rate": 9.985132059187184e-06, "loss": 0.673, "step": 4503 }, { "epoch": 0.053850476452372696, "grad_norm": 1.786372184753418, "learning_rate": 9.985117135200675e-06, "loss": 0.7101, "step": 4504 }, { "epoch": 0.05386243259723335, "grad_norm": 2.1450417041778564, "learning_rate": 9.985102203738968e-06, "loss": 0.7981, "step": 4505 }, { "epoch": 0.053874388742094, "grad_norm": 1.9976755380630493, "learning_rate": 9.985087264802079e-06, "loss": 0.5946, "step": 4506 }, { "epoch": 0.05388634488695465, "grad_norm": 2.795271396636963, "learning_rate": 9.985072318390036e-06, "loss": 0.7054, "step": 4507 }, { "epoch": 0.0538983010318153, "grad_norm": 2.572566032409668, "learning_rate": 9.985057364502857e-06, "loss": 0.604, "step": 4508 }, { "epoch": 0.053910257176675955, "grad_norm": 2.66707444190979, "learning_rate": 9.985042403140571e-06, "loss": 0.6622, "step": 4509 }, { "epoch": 0.053922213321536606, "grad_norm": 1.8419454097747803, "learning_rate": 9.985027434303192e-06, "loss": 0.5835, "step": 4510 }, { "epoch": 0.05393416946639726, "grad_norm": 16.23214340209961, "learning_rate": 9.985012457990748e-06, "loss": 0.6279, "step": 4511 }, { "epoch": 0.0539461256112579, "grad_norm": 2.7798080444335938, "learning_rate": 9.984997474203263e-06, "loss": 0.7064, "step": 4512 }, { "epoch": 0.053958081756118555, "grad_norm": 1.9837572574615479, "learning_rate": 9.984982482940752e-06, "loss": 0.6043, "step": 4513 }, { "epoch": 0.053970037900979206, "grad_norm": 2.7231268882751465, "learning_rate": 9.984967484203243e-06, "loss": 0.6509, "step": 4514 }, { "epoch": 0.05398199404583986, "grad_norm": 4.03129243850708, "learning_rate": 9.984952477990759e-06, "loss": 0.6286, "step": 4515 }, { "epoch": 0.05399395019070051, "grad_norm": 1.625328779220581, "learning_rate": 9.984937464303322e-06, "loss": 0.6198, "step": 4516 }, { "epoch": 0.05400590633556116, "grad_norm": 1.7285865545272827, "learning_rate": 9.984922443140951e-06, "loss": 0.5924, "step": 4517 }, { "epoch": 0.05401786248042181, "grad_norm": 6.444177627563477, "learning_rate": 9.984907414503673e-06, "loss": 0.7369, "step": 4518 }, { "epoch": 0.054029818625282465, "grad_norm": 2.2201220989227295, "learning_rate": 9.984892378391506e-06, "loss": 0.6753, "step": 4519 }, { "epoch": 0.05404177477014312, "grad_norm": 4.180936813354492, "learning_rate": 9.984877334804479e-06, "loss": 0.5897, "step": 4520 }, { "epoch": 0.05405373091500377, "grad_norm": 3.864449977874756, "learning_rate": 9.984862283742608e-06, "loss": 0.6027, "step": 4521 }, { "epoch": 0.05406568705986442, "grad_norm": 3.9542813301086426, "learning_rate": 9.984847225205918e-06, "loss": 0.6782, "step": 4522 }, { "epoch": 0.05407764320472507, "grad_norm": 2.2943530082702637, "learning_rate": 9.984832159194434e-06, "loss": 0.7162, "step": 4523 }, { "epoch": 0.05408959934958572, "grad_norm": 2.240360736846924, "learning_rate": 9.984817085708175e-06, "loss": 0.6156, "step": 4524 }, { "epoch": 0.05410155549444637, "grad_norm": 2.105983018875122, "learning_rate": 9.984802004747166e-06, "loss": 0.5868, "step": 4525 }, { "epoch": 0.05411351163930702, "grad_norm": 1.764526605606079, "learning_rate": 9.984786916311428e-06, "loss": 0.6181, "step": 4526 }, { "epoch": 0.05412546778416767, "grad_norm": 2.7294437885284424, "learning_rate": 9.984771820400983e-06, "loss": 0.6082, "step": 4527 }, { "epoch": 0.054137423929028324, "grad_norm": 2.5054500102996826, "learning_rate": 9.984756717015857e-06, "loss": 0.5961, "step": 4528 }, { "epoch": 0.054149380073888975, "grad_norm": 1.5952153205871582, "learning_rate": 9.984741606156069e-06, "loss": 0.4947, "step": 4529 }, { "epoch": 0.05416133621874963, "grad_norm": 2.859182119369507, "learning_rate": 9.984726487821645e-06, "loss": 0.6025, "step": 4530 }, { "epoch": 0.05417329236361028, "grad_norm": 2.3785974979400635, "learning_rate": 9.984711362012605e-06, "loss": 0.6217, "step": 4531 }, { "epoch": 0.05418524850847093, "grad_norm": 2.490816116333008, "learning_rate": 9.984696228728972e-06, "loss": 0.5648, "step": 4532 }, { "epoch": 0.05419720465333158, "grad_norm": 3.4301624298095703, "learning_rate": 9.984681087970771e-06, "loss": 0.6033, "step": 4533 }, { "epoch": 0.054209160798192234, "grad_norm": 3.247847557067871, "learning_rate": 9.98466593973802e-06, "loss": 0.6062, "step": 4534 }, { "epoch": 0.05422111694305288, "grad_norm": 3.860616445541382, "learning_rate": 9.984650784030748e-06, "loss": 0.5429, "step": 4535 }, { "epoch": 0.05423307308791353, "grad_norm": 1.5414338111877441, "learning_rate": 9.984635620848972e-06, "loss": 0.5753, "step": 4536 }, { "epoch": 0.05424502923277418, "grad_norm": 2.0948078632354736, "learning_rate": 9.98462045019272e-06, "loss": 0.656, "step": 4537 }, { "epoch": 0.054256985377634834, "grad_norm": 2.116318702697754, "learning_rate": 9.984605272062008e-06, "loss": 0.6237, "step": 4538 }, { "epoch": 0.054268941522495485, "grad_norm": 2.1342275142669678, "learning_rate": 9.984590086456865e-06, "loss": 0.5892, "step": 4539 }, { "epoch": 0.05428089766735614, "grad_norm": 2.170741558074951, "learning_rate": 9.984574893377313e-06, "loss": 0.5685, "step": 4540 }, { "epoch": 0.05429285381221679, "grad_norm": 4.702277660369873, "learning_rate": 9.98455969282337e-06, "loss": 0.6348, "step": 4541 }, { "epoch": 0.05430480995707744, "grad_norm": 2.5931825637817383, "learning_rate": 9.984544484795065e-06, "loss": 0.6421, "step": 4542 }, { "epoch": 0.05431676610193809, "grad_norm": 3.9704396724700928, "learning_rate": 9.984529269292416e-06, "loss": 0.6616, "step": 4543 }, { "epoch": 0.054328722246798744, "grad_norm": 3.725382089614868, "learning_rate": 9.984514046315447e-06, "loss": 0.78, "step": 4544 }, { "epoch": 0.054340678391659396, "grad_norm": 7.4424262046813965, "learning_rate": 9.984498815864183e-06, "loss": 0.6136, "step": 4545 }, { "epoch": 0.05435263453652005, "grad_norm": 3.1309165954589844, "learning_rate": 9.984483577938645e-06, "loss": 0.6695, "step": 4546 }, { "epoch": 0.05436459068138069, "grad_norm": 3.33367919921875, "learning_rate": 9.984468332538855e-06, "loss": 0.663, "step": 4547 }, { "epoch": 0.054376546826241344, "grad_norm": 2.5590672492980957, "learning_rate": 9.984453079664838e-06, "loss": 0.5654, "step": 4548 }, { "epoch": 0.054388502971101996, "grad_norm": 2.147392988204956, "learning_rate": 9.984437819316616e-06, "loss": 0.618, "step": 4549 }, { "epoch": 0.05440045911596265, "grad_norm": 2.8029160499572754, "learning_rate": 9.984422551494211e-06, "loss": 0.6804, "step": 4550 }, { "epoch": 0.0544124152608233, "grad_norm": 1.9181796312332153, "learning_rate": 9.984407276197647e-06, "loss": 0.7208, "step": 4551 }, { "epoch": 0.05442437140568395, "grad_norm": 3.3839199542999268, "learning_rate": 9.984391993426947e-06, "loss": 0.6013, "step": 4552 }, { "epoch": 0.0544363275505446, "grad_norm": 3.494211196899414, "learning_rate": 9.984376703182134e-06, "loss": 0.6285, "step": 4553 }, { "epoch": 0.054448283695405254, "grad_norm": 2.0520477294921875, "learning_rate": 9.98436140546323e-06, "loss": 0.647, "step": 4554 }, { "epoch": 0.054460239840265906, "grad_norm": 3.4510836601257324, "learning_rate": 9.984346100270259e-06, "loss": 0.6182, "step": 4555 }, { "epoch": 0.05447219598512656, "grad_norm": 2.645266056060791, "learning_rate": 9.984330787603241e-06, "loss": 0.6226, "step": 4556 }, { "epoch": 0.05448415212998721, "grad_norm": 2.4351766109466553, "learning_rate": 9.984315467462203e-06, "loss": 0.6575, "step": 4557 }, { "epoch": 0.05449610827484786, "grad_norm": 1.840915560722351, "learning_rate": 9.984300139847164e-06, "loss": 0.6658, "step": 4558 }, { "epoch": 0.054508064419708506, "grad_norm": 2.480794906616211, "learning_rate": 9.984284804758153e-06, "loss": 0.7208, "step": 4559 }, { "epoch": 0.05452002056456916, "grad_norm": 1.6000025272369385, "learning_rate": 9.984269462195187e-06, "loss": 0.6521, "step": 4560 }, { "epoch": 0.05453197670942981, "grad_norm": 2.2050652503967285, "learning_rate": 9.984254112158292e-06, "loss": 0.6578, "step": 4561 }, { "epoch": 0.05454393285429046, "grad_norm": 2.4135003089904785, "learning_rate": 9.98423875464749e-06, "loss": 0.5296, "step": 4562 }, { "epoch": 0.05455588899915111, "grad_norm": 2.5929319858551025, "learning_rate": 9.984223389662804e-06, "loss": 0.6258, "step": 4563 }, { "epoch": 0.054567845144011765, "grad_norm": 2.1984140872955322, "learning_rate": 9.984208017204256e-06, "loss": 0.6402, "step": 4564 }, { "epoch": 0.054579801288872416, "grad_norm": 21.86024284362793, "learning_rate": 9.984192637271872e-06, "loss": 0.6674, "step": 4565 }, { "epoch": 0.05459175743373307, "grad_norm": 1.6419802904129028, "learning_rate": 9.984177249865674e-06, "loss": 0.7386, "step": 4566 }, { "epoch": 0.05460371357859372, "grad_norm": 3.647449016571045, "learning_rate": 9.984161854985684e-06, "loss": 0.6746, "step": 4567 }, { "epoch": 0.05461566972345437, "grad_norm": 5.6363067626953125, "learning_rate": 9.984146452631924e-06, "loss": 0.6622, "step": 4568 }, { "epoch": 0.05462762586831502, "grad_norm": 6.076606750488281, "learning_rate": 9.98413104280442e-06, "loss": 0.7168, "step": 4569 }, { "epoch": 0.054639582013175675, "grad_norm": 3.6142752170562744, "learning_rate": 9.984115625503195e-06, "loss": 0.6161, "step": 4570 }, { "epoch": 0.05465153815803632, "grad_norm": 1.7329800128936768, "learning_rate": 9.984100200728268e-06, "loss": 0.6397, "step": 4571 }, { "epoch": 0.05466349430289697, "grad_norm": 1.88148832321167, "learning_rate": 9.984084768479667e-06, "loss": 0.6216, "step": 4572 }, { "epoch": 0.05467545044775762, "grad_norm": 3.0044445991516113, "learning_rate": 9.984069328757414e-06, "loss": 0.5977, "step": 4573 }, { "epoch": 0.054687406592618275, "grad_norm": 1.7306365966796875, "learning_rate": 9.98405388156153e-06, "loss": 0.6215, "step": 4574 }, { "epoch": 0.054699362737478927, "grad_norm": 1.888891339302063, "learning_rate": 9.984038426892039e-06, "loss": 0.6642, "step": 4575 }, { "epoch": 0.05471131888233958, "grad_norm": 19.75623893737793, "learning_rate": 9.984022964748965e-06, "loss": 0.6893, "step": 4576 }, { "epoch": 0.05472327502720023, "grad_norm": 2.1849939823150635, "learning_rate": 9.98400749513233e-06, "loss": 0.6712, "step": 4577 }, { "epoch": 0.05473523117206088, "grad_norm": 2.045313835144043, "learning_rate": 9.983992018042162e-06, "loss": 0.6322, "step": 4578 }, { "epoch": 0.05474718731692153, "grad_norm": 5.598787784576416, "learning_rate": 9.983976533478476e-06, "loss": 0.7129, "step": 4579 }, { "epoch": 0.054759143461782185, "grad_norm": 2.7962183952331543, "learning_rate": 9.983961041441302e-06, "loss": 0.6431, "step": 4580 }, { "epoch": 0.05477109960664284, "grad_norm": 3.168152093887329, "learning_rate": 9.983945541930659e-06, "loss": 0.6587, "step": 4581 }, { "epoch": 0.05478305575150349, "grad_norm": 2.3288631439208984, "learning_rate": 9.983930034946572e-06, "loss": 0.6395, "step": 4582 }, { "epoch": 0.05479501189636413, "grad_norm": 2.7460203170776367, "learning_rate": 9.983914520489064e-06, "loss": 0.6286, "step": 4583 }, { "epoch": 0.054806968041224785, "grad_norm": 3.2084689140319824, "learning_rate": 9.983898998558158e-06, "loss": 0.5668, "step": 4584 }, { "epoch": 0.05481892418608544, "grad_norm": 2.060443878173828, "learning_rate": 9.983883469153882e-06, "loss": 0.6403, "step": 4585 }, { "epoch": 0.05483088033094609, "grad_norm": 1.7267146110534668, "learning_rate": 9.983867932276252e-06, "loss": 0.5808, "step": 4586 }, { "epoch": 0.05484283647580674, "grad_norm": 2.2475426197052, "learning_rate": 9.983852387925292e-06, "loss": 0.5523, "step": 4587 }, { "epoch": 0.05485479262066739, "grad_norm": 2.57719349861145, "learning_rate": 9.983836836101031e-06, "loss": 0.6436, "step": 4588 }, { "epoch": 0.054866748765528044, "grad_norm": 1.5641621351242065, "learning_rate": 9.983821276803488e-06, "loss": 0.6105, "step": 4589 }, { "epoch": 0.054878704910388695, "grad_norm": 2.362044095993042, "learning_rate": 9.983805710032688e-06, "loss": 0.6451, "step": 4590 }, { "epoch": 0.05489066105524935, "grad_norm": 7.5351691246032715, "learning_rate": 9.983790135788652e-06, "loss": 0.623, "step": 4591 }, { "epoch": 0.05490261720011, "grad_norm": 2.7910516262054443, "learning_rate": 9.983774554071406e-06, "loss": 0.5205, "step": 4592 }, { "epoch": 0.05491457334497065, "grad_norm": 2.8917396068573, "learning_rate": 9.983758964880973e-06, "loss": 0.6019, "step": 4593 }, { "epoch": 0.0549265294898313, "grad_norm": 6.13397216796875, "learning_rate": 9.983743368217374e-06, "loss": 0.6608, "step": 4594 }, { "epoch": 0.05493848563469195, "grad_norm": 2.1112568378448486, "learning_rate": 9.983727764080635e-06, "loss": 0.633, "step": 4595 }, { "epoch": 0.0549504417795526, "grad_norm": 1.7151652574539185, "learning_rate": 9.983712152470777e-06, "loss": 0.6055, "step": 4596 }, { "epoch": 0.05496239792441325, "grad_norm": 2.8608996868133545, "learning_rate": 9.983696533387827e-06, "loss": 0.6642, "step": 4597 }, { "epoch": 0.0549743540692739, "grad_norm": 3.674356460571289, "learning_rate": 9.983680906831806e-06, "loss": 0.6281, "step": 4598 }, { "epoch": 0.054986310214134554, "grad_norm": 2.8815810680389404, "learning_rate": 9.983665272802737e-06, "loss": 0.6998, "step": 4599 }, { "epoch": 0.054998266358995206, "grad_norm": 4.404606819152832, "learning_rate": 9.983649631300645e-06, "loss": 0.6473, "step": 4600 }, { "epoch": 0.05501022250385586, "grad_norm": 2.414586067199707, "learning_rate": 9.983633982325551e-06, "loss": 0.6286, "step": 4601 }, { "epoch": 0.05502217864871651, "grad_norm": 2.671548366546631, "learning_rate": 9.983618325877483e-06, "loss": 0.591, "step": 4602 }, { "epoch": 0.05503413479357716, "grad_norm": 3.0256409645080566, "learning_rate": 9.98360266195646e-06, "loss": 0.6207, "step": 4603 }, { "epoch": 0.05504609093843781, "grad_norm": 2.674586772918701, "learning_rate": 9.983586990562506e-06, "loss": 0.6832, "step": 4604 }, { "epoch": 0.055058047083298464, "grad_norm": 8.921581268310547, "learning_rate": 9.983571311695647e-06, "loss": 0.674, "step": 4605 }, { "epoch": 0.05507000322815911, "grad_norm": 2.0176360607147217, "learning_rate": 9.983555625355904e-06, "loss": 0.6415, "step": 4606 }, { "epoch": 0.05508195937301976, "grad_norm": 2.238365650177002, "learning_rate": 9.983539931543303e-06, "loss": 0.569, "step": 4607 }, { "epoch": 0.05509391551788041, "grad_norm": 4.270402431488037, "learning_rate": 9.983524230257866e-06, "loss": 0.5725, "step": 4608 }, { "epoch": 0.055105871662741064, "grad_norm": 2.369654417037964, "learning_rate": 9.983508521499616e-06, "loss": 0.6204, "step": 4609 }, { "epoch": 0.055117827807601716, "grad_norm": 2.699580669403076, "learning_rate": 9.983492805268578e-06, "loss": 0.6842, "step": 4610 }, { "epoch": 0.05512978395246237, "grad_norm": 2.0318987369537354, "learning_rate": 9.983477081564774e-06, "loss": 0.6116, "step": 4611 }, { "epoch": 0.05514174009732302, "grad_norm": 1.8153291940689087, "learning_rate": 9.98346135038823e-06, "loss": 0.6398, "step": 4612 }, { "epoch": 0.05515369624218367, "grad_norm": 2.3313732147216797, "learning_rate": 9.983445611738966e-06, "loss": 0.6242, "step": 4613 }, { "epoch": 0.05516565238704432, "grad_norm": 2.9463729858398438, "learning_rate": 9.983429865617009e-06, "loss": 0.6231, "step": 4614 }, { "epoch": 0.055177608531904974, "grad_norm": 2.634549140930176, "learning_rate": 9.98341411202238e-06, "loss": 0.5691, "step": 4615 }, { "epoch": 0.055189564676765626, "grad_norm": 3.0849251747131348, "learning_rate": 9.983398350955104e-06, "loss": 0.6051, "step": 4616 }, { "epoch": 0.05520152082162628, "grad_norm": 3.213970184326172, "learning_rate": 9.983382582415205e-06, "loss": 0.6602, "step": 4617 }, { "epoch": 0.05521347696648692, "grad_norm": 6.40494441986084, "learning_rate": 9.983366806402705e-06, "loss": 0.6311, "step": 4618 }, { "epoch": 0.055225433111347574, "grad_norm": 6.4380011558532715, "learning_rate": 9.983351022917631e-06, "loss": 0.6555, "step": 4619 }, { "epoch": 0.055237389256208226, "grad_norm": 2.1437487602233887, "learning_rate": 9.983335231960004e-06, "loss": 0.6751, "step": 4620 }, { "epoch": 0.05524934540106888, "grad_norm": 1.9801222085952759, "learning_rate": 9.983319433529845e-06, "loss": 0.6755, "step": 4621 }, { "epoch": 0.05526130154592953, "grad_norm": 2.224756956100464, "learning_rate": 9.983303627627185e-06, "loss": 0.684, "step": 4622 }, { "epoch": 0.05527325769079018, "grad_norm": 2.925811290740967, "learning_rate": 9.983287814252041e-06, "loss": 0.6561, "step": 4623 }, { "epoch": 0.05528521383565083, "grad_norm": 2.226475477218628, "learning_rate": 9.98327199340444e-06, "loss": 0.6299, "step": 4624 }, { "epoch": 0.055297169980511485, "grad_norm": 3.275902032852173, "learning_rate": 9.983256165084405e-06, "loss": 0.6151, "step": 4625 }, { "epoch": 0.055309126125372136, "grad_norm": 3.139376640319824, "learning_rate": 9.983240329291958e-06, "loss": 0.6309, "step": 4626 }, { "epoch": 0.05532108227023279, "grad_norm": 10.847182273864746, "learning_rate": 9.983224486027126e-06, "loss": 0.6094, "step": 4627 }, { "epoch": 0.05533303841509344, "grad_norm": 2.861914873123169, "learning_rate": 9.983208635289931e-06, "loss": 0.6684, "step": 4628 }, { "epoch": 0.05534499455995409, "grad_norm": 3.0521273612976074, "learning_rate": 9.983192777080397e-06, "loss": 0.7268, "step": 4629 }, { "epoch": 0.055356950704814736, "grad_norm": 1.5317082405090332, "learning_rate": 9.983176911398548e-06, "loss": 0.58, "step": 4630 }, { "epoch": 0.05536890684967539, "grad_norm": 2.219003915786743, "learning_rate": 9.983161038244406e-06, "loss": 0.7152, "step": 4631 }, { "epoch": 0.05538086299453604, "grad_norm": 3.365384101867676, "learning_rate": 9.983145157617999e-06, "loss": 0.6196, "step": 4632 }, { "epoch": 0.05539281913939669, "grad_norm": 2.4042673110961914, "learning_rate": 9.983129269519347e-06, "loss": 0.6115, "step": 4633 }, { "epoch": 0.05540477528425734, "grad_norm": 2.489457130432129, "learning_rate": 9.983113373948475e-06, "loss": 0.634, "step": 4634 }, { "epoch": 0.055416731429117995, "grad_norm": 20.576873779296875, "learning_rate": 9.983097470905405e-06, "loss": 0.6518, "step": 4635 }, { "epoch": 0.05542868757397865, "grad_norm": 2.2185988426208496, "learning_rate": 9.983081560390165e-06, "loss": 0.6883, "step": 4636 }, { "epoch": 0.0554406437188393, "grad_norm": 1.951161503791809, "learning_rate": 9.983065642402774e-06, "loss": 0.579, "step": 4637 }, { "epoch": 0.05545259986369995, "grad_norm": 2.7190046310424805, "learning_rate": 9.98304971694326e-06, "loss": 0.7136, "step": 4638 }, { "epoch": 0.0554645560085606, "grad_norm": 1.9243601560592651, "learning_rate": 9.983033784011644e-06, "loss": 0.6151, "step": 4639 }, { "epoch": 0.055476512153421254, "grad_norm": 3.239866256713867, "learning_rate": 9.983017843607953e-06, "loss": 0.6676, "step": 4640 }, { "epoch": 0.055488468298281905, "grad_norm": 2.1523499488830566, "learning_rate": 9.983001895732207e-06, "loss": 0.6523, "step": 4641 }, { "epoch": 0.05550042444314255, "grad_norm": 6.551025390625, "learning_rate": 9.982985940384435e-06, "loss": 0.5794, "step": 4642 }, { "epoch": 0.0555123805880032, "grad_norm": 3.219512462615967, "learning_rate": 9.982969977564656e-06, "loss": 0.6521, "step": 4643 }, { "epoch": 0.05552433673286385, "grad_norm": 1.819331169128418, "learning_rate": 9.982954007272894e-06, "loss": 0.6227, "step": 4644 }, { "epoch": 0.055536292877724505, "grad_norm": 8.143912315368652, "learning_rate": 9.982938029509177e-06, "loss": 0.6502, "step": 4645 }, { "epoch": 0.05554824902258516, "grad_norm": 5.124748706817627, "learning_rate": 9.982922044273527e-06, "loss": 0.651, "step": 4646 }, { "epoch": 0.05556020516744581, "grad_norm": 4.931238174438477, "learning_rate": 9.982906051565965e-06, "loss": 0.6714, "step": 4647 }, { "epoch": 0.05557216131230646, "grad_norm": 4.6188507080078125, "learning_rate": 9.98289005138652e-06, "loss": 0.6529, "step": 4648 }, { "epoch": 0.05558411745716711, "grad_norm": 2.00756573677063, "learning_rate": 9.982874043735214e-06, "loss": 0.6371, "step": 4649 }, { "epoch": 0.055596073602027764, "grad_norm": 3.7606918811798096, "learning_rate": 9.98285802861207e-06, "loss": 0.6826, "step": 4650 }, { "epoch": 0.055608029746888415, "grad_norm": 13.57259464263916, "learning_rate": 9.982842006017112e-06, "loss": 0.7231, "step": 4651 }, { "epoch": 0.05561998589174907, "grad_norm": 3.970874309539795, "learning_rate": 9.982825975950366e-06, "loss": 0.5976, "step": 4652 }, { "epoch": 0.05563194203660972, "grad_norm": 2.208850860595703, "learning_rate": 9.982809938411854e-06, "loss": 0.6438, "step": 4653 }, { "epoch": 0.055643898181470364, "grad_norm": 1.7017781734466553, "learning_rate": 9.9827938934016e-06, "loss": 0.6441, "step": 4654 }, { "epoch": 0.055655854326331015, "grad_norm": 1.7552297115325928, "learning_rate": 9.98277784091963e-06, "loss": 0.6749, "step": 4655 }, { "epoch": 0.05566781047119167, "grad_norm": 4.496758937835693, "learning_rate": 9.982761780965967e-06, "loss": 0.6411, "step": 4656 }, { "epoch": 0.05567976661605232, "grad_norm": 5.368988037109375, "learning_rate": 9.982745713540634e-06, "loss": 0.6111, "step": 4657 }, { "epoch": 0.05569172276091297, "grad_norm": 1.8793405294418335, "learning_rate": 9.982729638643658e-06, "loss": 0.6472, "step": 4658 }, { "epoch": 0.05570367890577362, "grad_norm": 1.9229105710983276, "learning_rate": 9.982713556275058e-06, "loss": 0.6781, "step": 4659 }, { "epoch": 0.055715635050634274, "grad_norm": 1.5736902952194214, "learning_rate": 9.982697466434864e-06, "loss": 0.65, "step": 4660 }, { "epoch": 0.055727591195494926, "grad_norm": 1.8550318479537964, "learning_rate": 9.982681369123097e-06, "loss": 0.6208, "step": 4661 }, { "epoch": 0.05573954734035558, "grad_norm": 2.5543859004974365, "learning_rate": 9.982665264339782e-06, "loss": 0.6922, "step": 4662 }, { "epoch": 0.05575150348521623, "grad_norm": 2.6168651580810547, "learning_rate": 9.98264915208494e-06, "loss": 0.7131, "step": 4663 }, { "epoch": 0.05576345963007688, "grad_norm": 4.277859210968018, "learning_rate": 9.9826330323586e-06, "loss": 0.7086, "step": 4664 }, { "epoch": 0.05577541577493753, "grad_norm": 2.2308905124664307, "learning_rate": 9.982616905160785e-06, "loss": 0.6107, "step": 4665 }, { "epoch": 0.05578737191979818, "grad_norm": 3.4126651287078857, "learning_rate": 9.982600770491516e-06, "loss": 0.6776, "step": 4666 }, { "epoch": 0.05579932806465883, "grad_norm": 2.793426275253296, "learning_rate": 9.98258462835082e-06, "loss": 0.6331, "step": 4667 }, { "epoch": 0.05581128420951948, "grad_norm": 2.910485029220581, "learning_rate": 9.982568478738723e-06, "loss": 0.6301, "step": 4668 }, { "epoch": 0.05582324035438013, "grad_norm": 2.7993359565734863, "learning_rate": 9.982552321655243e-06, "loss": 0.5507, "step": 4669 }, { "epoch": 0.055835196499240784, "grad_norm": 1.4645278453826904, "learning_rate": 9.98253615710041e-06, "loss": 0.6173, "step": 4670 }, { "epoch": 0.055847152644101436, "grad_norm": 6.4464192390441895, "learning_rate": 9.982519985074249e-06, "loss": 0.619, "step": 4671 }, { "epoch": 0.05585910878896209, "grad_norm": 6.019668102264404, "learning_rate": 9.982503805576778e-06, "loss": 0.639, "step": 4672 }, { "epoch": 0.05587106493382274, "grad_norm": 2.869384765625, "learning_rate": 9.982487618608025e-06, "loss": 0.6405, "step": 4673 }, { "epoch": 0.05588302107868339, "grad_norm": 2.5806057453155518, "learning_rate": 9.982471424168015e-06, "loss": 0.6384, "step": 4674 }, { "epoch": 0.05589497722354404, "grad_norm": 2.0246694087982178, "learning_rate": 9.982455222256771e-06, "loss": 0.6604, "step": 4675 }, { "epoch": 0.055906933368404695, "grad_norm": 10.496179580688477, "learning_rate": 9.98243901287432e-06, "loss": 0.5764, "step": 4676 }, { "epoch": 0.05591888951326534, "grad_norm": 2.0311641693115234, "learning_rate": 9.98242279602068e-06, "loss": 0.6057, "step": 4677 }, { "epoch": 0.05593084565812599, "grad_norm": 2.090251922607422, "learning_rate": 9.982406571695881e-06, "loss": 0.5773, "step": 4678 }, { "epoch": 0.05594280180298664, "grad_norm": 2.2950737476348877, "learning_rate": 9.982390339899946e-06, "loss": 0.6456, "step": 4679 }, { "epoch": 0.055954757947847295, "grad_norm": 2.850445032119751, "learning_rate": 9.9823741006329e-06, "loss": 0.6041, "step": 4680 }, { "epoch": 0.055966714092707946, "grad_norm": 3.5201668739318848, "learning_rate": 9.982357853894765e-06, "loss": 0.5905, "step": 4681 }, { "epoch": 0.0559786702375686, "grad_norm": 2.5840694904327393, "learning_rate": 9.982341599685567e-06, "loss": 0.6281, "step": 4682 }, { "epoch": 0.05599062638242925, "grad_norm": 1.7759883403778076, "learning_rate": 9.982325338005329e-06, "loss": 0.5875, "step": 4683 }, { "epoch": 0.0560025825272899, "grad_norm": 1.4810391664505005, "learning_rate": 9.982309068854077e-06, "loss": 0.6134, "step": 4684 }, { "epoch": 0.05601453867215055, "grad_norm": 2.221156120300293, "learning_rate": 9.982292792231836e-06, "loss": 0.7315, "step": 4685 }, { "epoch": 0.056026494817011205, "grad_norm": 2.771512508392334, "learning_rate": 9.982276508138628e-06, "loss": 0.6694, "step": 4686 }, { "epoch": 0.05603845096187186, "grad_norm": 4.556904315948486, "learning_rate": 9.98226021657448e-06, "loss": 0.6293, "step": 4687 }, { "epoch": 0.05605040710673251, "grad_norm": 1.5860233306884766, "learning_rate": 9.982243917539414e-06, "loss": 0.5902, "step": 4688 }, { "epoch": 0.05606236325159315, "grad_norm": 2.906820058822632, "learning_rate": 9.982227611033455e-06, "loss": 0.766, "step": 4689 }, { "epoch": 0.056074319396453805, "grad_norm": 2.1171348094940186, "learning_rate": 9.98221129705663e-06, "loss": 0.5821, "step": 4690 }, { "epoch": 0.056086275541314456, "grad_norm": 2.145570755004883, "learning_rate": 9.98219497560896e-06, "loss": 0.6004, "step": 4691 }, { "epoch": 0.05609823168617511, "grad_norm": 1.887944221496582, "learning_rate": 9.98217864669047e-06, "loss": 0.5902, "step": 4692 }, { "epoch": 0.05611018783103576, "grad_norm": 1.89643394947052, "learning_rate": 9.982162310301188e-06, "loss": 0.59, "step": 4693 }, { "epoch": 0.05612214397589641, "grad_norm": 4.891425609588623, "learning_rate": 9.982145966441135e-06, "loss": 0.5739, "step": 4694 }, { "epoch": 0.05613410012075706, "grad_norm": 1.927113652229309, "learning_rate": 9.982129615110335e-06, "loss": 0.6679, "step": 4695 }, { "epoch": 0.056146056265617715, "grad_norm": 2.490408420562744, "learning_rate": 9.982113256308815e-06, "loss": 0.6948, "step": 4696 }, { "epoch": 0.05615801241047837, "grad_norm": 2.5089149475097656, "learning_rate": 9.9820968900366e-06, "loss": 0.6268, "step": 4697 }, { "epoch": 0.05616996855533902, "grad_norm": 1.5309637784957886, "learning_rate": 9.98208051629371e-06, "loss": 0.5071, "step": 4698 }, { "epoch": 0.05618192470019967, "grad_norm": 1.75736665725708, "learning_rate": 9.982064135080173e-06, "loss": 0.509, "step": 4699 }, { "epoch": 0.05619388084506032, "grad_norm": 2.5284347534179688, "learning_rate": 9.982047746396015e-06, "loss": 0.5683, "step": 4700 }, { "epoch": 0.05620583698992097, "grad_norm": 3.263509511947632, "learning_rate": 9.982031350241258e-06, "loss": 0.6463, "step": 4701 }, { "epoch": 0.05621779313478162, "grad_norm": 1.9058735370635986, "learning_rate": 9.982014946615926e-06, "loss": 0.7034, "step": 4702 }, { "epoch": 0.05622974927964227, "grad_norm": 2.925149440765381, "learning_rate": 9.981998535520045e-06, "loss": 0.6203, "step": 4703 }, { "epoch": 0.05624170542450292, "grad_norm": 4.6055450439453125, "learning_rate": 9.981982116953642e-06, "loss": 0.6418, "step": 4704 }, { "epoch": 0.056253661569363574, "grad_norm": 1.9860491752624512, "learning_rate": 9.981965690916738e-06, "loss": 0.6215, "step": 4705 }, { "epoch": 0.056265617714224225, "grad_norm": 1.7463608980178833, "learning_rate": 9.981949257409356e-06, "loss": 0.6611, "step": 4706 }, { "epoch": 0.05627757385908488, "grad_norm": 1.6179105043411255, "learning_rate": 9.981932816431525e-06, "loss": 0.6837, "step": 4707 }, { "epoch": 0.05628953000394553, "grad_norm": 3.3482401371002197, "learning_rate": 9.98191636798327e-06, "loss": 0.6528, "step": 4708 }, { "epoch": 0.05630148614880618, "grad_norm": 3.761054039001465, "learning_rate": 9.981899912064612e-06, "loss": 0.6267, "step": 4709 }, { "epoch": 0.05631344229366683, "grad_norm": 2.2229628562927246, "learning_rate": 9.981883448675578e-06, "loss": 0.6526, "step": 4710 }, { "epoch": 0.056325398438527484, "grad_norm": 2.1287038326263428, "learning_rate": 9.981866977816192e-06, "loss": 0.5589, "step": 4711 }, { "epoch": 0.056337354583388136, "grad_norm": 3.6362383365631104, "learning_rate": 9.98185049948648e-06, "loss": 0.6232, "step": 4712 }, { "epoch": 0.05634931072824878, "grad_norm": 6.740583896636963, "learning_rate": 9.981834013686463e-06, "loss": 0.6219, "step": 4713 }, { "epoch": 0.05636126687310943, "grad_norm": 2.2804791927337646, "learning_rate": 9.981817520416168e-06, "loss": 0.617, "step": 4714 }, { "epoch": 0.056373223017970084, "grad_norm": 8.130289077758789, "learning_rate": 9.981801019675621e-06, "loss": 0.5992, "step": 4715 }, { "epoch": 0.056385179162830736, "grad_norm": 2.415987491607666, "learning_rate": 9.981784511464846e-06, "loss": 0.6122, "step": 4716 }, { "epoch": 0.05639713530769139, "grad_norm": 2.3733363151550293, "learning_rate": 9.981767995783867e-06, "loss": 0.6152, "step": 4717 }, { "epoch": 0.05640909145255204, "grad_norm": 2.2716288566589355, "learning_rate": 9.98175147263271e-06, "loss": 0.6142, "step": 4718 }, { "epoch": 0.05642104759741269, "grad_norm": 3.0499484539031982, "learning_rate": 9.981734942011398e-06, "loss": 0.576, "step": 4719 }, { "epoch": 0.05643300374227334, "grad_norm": 3.9415462017059326, "learning_rate": 9.981718403919957e-06, "loss": 0.585, "step": 4720 }, { "epoch": 0.056444959887133994, "grad_norm": 1.8694939613342285, "learning_rate": 9.98170185835841e-06, "loss": 0.6343, "step": 4721 }, { "epoch": 0.056456916031994646, "grad_norm": 2.3260979652404785, "learning_rate": 9.981685305326785e-06, "loss": 0.5322, "step": 4722 }, { "epoch": 0.0564688721768553, "grad_norm": 1.9252352714538574, "learning_rate": 9.981668744825106e-06, "loss": 0.5781, "step": 4723 }, { "epoch": 0.05648082832171595, "grad_norm": 3.550747871398926, "learning_rate": 9.981652176853397e-06, "loss": 0.6337, "step": 4724 }, { "epoch": 0.056492784466576594, "grad_norm": 2.0935347080230713, "learning_rate": 9.981635601411682e-06, "loss": 0.7493, "step": 4725 }, { "epoch": 0.056504740611437246, "grad_norm": 1.8418077230453491, "learning_rate": 9.981619018499985e-06, "loss": 0.5669, "step": 4726 }, { "epoch": 0.0565166967562979, "grad_norm": 1.9337857961654663, "learning_rate": 9.981602428118336e-06, "loss": 0.5238, "step": 4727 }, { "epoch": 0.05652865290115855, "grad_norm": 2.898946523666382, "learning_rate": 9.981585830266753e-06, "loss": 0.6662, "step": 4728 }, { "epoch": 0.0565406090460192, "grad_norm": 2.803790807723999, "learning_rate": 9.981569224945267e-06, "loss": 0.6182, "step": 4729 }, { "epoch": 0.05655256519087985, "grad_norm": 4.761957168579102, "learning_rate": 9.981552612153899e-06, "loss": 0.606, "step": 4730 }, { "epoch": 0.056564521335740504, "grad_norm": 5.045748233795166, "learning_rate": 9.981535991892674e-06, "loss": 0.6436, "step": 4731 }, { "epoch": 0.056576477480601156, "grad_norm": 5.137528419494629, "learning_rate": 9.98151936416162e-06, "loss": 0.5751, "step": 4732 }, { "epoch": 0.05658843362546181, "grad_norm": 3.9916441440582275, "learning_rate": 9.98150272896076e-06, "loss": 0.676, "step": 4733 }, { "epoch": 0.05660038977032246, "grad_norm": 2.114973306655884, "learning_rate": 9.981486086290119e-06, "loss": 0.6229, "step": 4734 }, { "epoch": 0.05661234591518311, "grad_norm": 2.097982883453369, "learning_rate": 9.98146943614972e-06, "loss": 0.6466, "step": 4735 }, { "epoch": 0.056624302060043756, "grad_norm": 2.7440438270568848, "learning_rate": 9.981452778539593e-06, "loss": 0.6901, "step": 4736 }, { "epoch": 0.05663625820490441, "grad_norm": 2.6170427799224854, "learning_rate": 9.981436113459757e-06, "loss": 0.6456, "step": 4737 }, { "epoch": 0.05664821434976506, "grad_norm": 3.669109344482422, "learning_rate": 9.98141944091024e-06, "loss": 0.6857, "step": 4738 }, { "epoch": 0.05666017049462571, "grad_norm": 1.9256361722946167, "learning_rate": 9.98140276089107e-06, "loss": 0.5516, "step": 4739 }, { "epoch": 0.05667212663948636, "grad_norm": 2.7320611476898193, "learning_rate": 9.981386073402267e-06, "loss": 0.6017, "step": 4740 }, { "epoch": 0.056684082784347015, "grad_norm": 2.513437032699585, "learning_rate": 9.981369378443856e-06, "loss": 0.7187, "step": 4741 }, { "epoch": 0.056696038929207666, "grad_norm": 2.915304660797119, "learning_rate": 9.981352676015865e-06, "loss": 0.5639, "step": 4742 }, { "epoch": 0.05670799507406832, "grad_norm": 2.783862590789795, "learning_rate": 9.98133596611832e-06, "loss": 0.5915, "step": 4743 }, { "epoch": 0.05671995121892897, "grad_norm": 4.050806999206543, "learning_rate": 9.981319248751241e-06, "loss": 0.6246, "step": 4744 }, { "epoch": 0.05673190736378962, "grad_norm": 2.9380481243133545, "learning_rate": 9.981302523914658e-06, "loss": 0.6199, "step": 4745 }, { "epoch": 0.05674386350865027, "grad_norm": 3.076007843017578, "learning_rate": 9.981285791608593e-06, "loss": 0.5709, "step": 4746 }, { "epoch": 0.056755819653510925, "grad_norm": 1.8788506984710693, "learning_rate": 9.981269051833074e-06, "loss": 0.632, "step": 4747 }, { "epoch": 0.05676777579837157, "grad_norm": 2.1935555934906006, "learning_rate": 9.981252304588122e-06, "loss": 0.7329, "step": 4748 }, { "epoch": 0.05677973194323222, "grad_norm": 3.5900654792785645, "learning_rate": 9.981235549873767e-06, "loss": 0.6391, "step": 4749 }, { "epoch": 0.05679168808809287, "grad_norm": 1.530396819114685, "learning_rate": 9.981218787690029e-06, "loss": 0.6469, "step": 4750 }, { "epoch": 0.056803644232953525, "grad_norm": 2.080352306365967, "learning_rate": 9.981202018036937e-06, "loss": 0.6759, "step": 4751 }, { "epoch": 0.05681560037781418, "grad_norm": 1.7339913845062256, "learning_rate": 9.981185240914514e-06, "loss": 0.5941, "step": 4752 }, { "epoch": 0.05682755652267483, "grad_norm": 1.9750337600708008, "learning_rate": 9.981168456322787e-06, "loss": 0.5934, "step": 4753 }, { "epoch": 0.05683951266753548, "grad_norm": 1.2223000526428223, "learning_rate": 9.981151664261782e-06, "loss": 0.5771, "step": 4754 }, { "epoch": 0.05685146881239613, "grad_norm": 2.024292230606079, "learning_rate": 9.981134864731519e-06, "loss": 0.7093, "step": 4755 }, { "epoch": 0.056863424957256783, "grad_norm": 1.8207459449768066, "learning_rate": 9.981118057732029e-06, "loss": 0.6582, "step": 4756 }, { "epoch": 0.056875381102117435, "grad_norm": 1.8804386854171753, "learning_rate": 9.981101243263334e-06, "loss": 0.6988, "step": 4757 }, { "epoch": 0.05688733724697809, "grad_norm": 2.658412456512451, "learning_rate": 9.981084421325458e-06, "loss": 0.6865, "step": 4758 }, { "epoch": 0.05689929339183874, "grad_norm": 3.2776496410369873, "learning_rate": 9.981067591918431e-06, "loss": 0.5726, "step": 4759 }, { "epoch": 0.05691124953669938, "grad_norm": 179.44456481933594, "learning_rate": 9.981050755042274e-06, "loss": 0.7274, "step": 4760 }, { "epoch": 0.056923205681560035, "grad_norm": 3.7128050327301025, "learning_rate": 9.981033910697014e-06, "loss": 0.6266, "step": 4761 }, { "epoch": 0.05693516182642069, "grad_norm": 4.967628002166748, "learning_rate": 9.981017058882676e-06, "loss": 0.6947, "step": 4762 }, { "epoch": 0.05694711797128134, "grad_norm": 1.8561701774597168, "learning_rate": 9.981000199599285e-06, "loss": 0.6414, "step": 4763 }, { "epoch": 0.05695907411614199, "grad_norm": 1.7567334175109863, "learning_rate": 9.980983332846867e-06, "loss": 0.5735, "step": 4764 }, { "epoch": 0.05697103026100264, "grad_norm": 1.896474003791809, "learning_rate": 9.980966458625447e-06, "loss": 0.6253, "step": 4765 }, { "epoch": 0.056982986405863294, "grad_norm": 2.504223585128784, "learning_rate": 9.980949576935049e-06, "loss": 0.6438, "step": 4766 }, { "epoch": 0.056994942550723945, "grad_norm": 2.2663867473602295, "learning_rate": 9.9809326877757e-06, "loss": 0.6478, "step": 4767 }, { "epoch": 0.0570068986955846, "grad_norm": 1.4630622863769531, "learning_rate": 9.980915791147425e-06, "loss": 0.5024, "step": 4768 }, { "epoch": 0.05701885484044525, "grad_norm": 1.3814679384231567, "learning_rate": 9.98089888705025e-06, "loss": 0.5765, "step": 4769 }, { "epoch": 0.0570308109853059, "grad_norm": 2.27667498588562, "learning_rate": 9.980881975484199e-06, "loss": 0.6664, "step": 4770 }, { "epoch": 0.05704276713016655, "grad_norm": 1.8115911483764648, "learning_rate": 9.980865056449297e-06, "loss": 0.6413, "step": 4771 }, { "epoch": 0.0570547232750272, "grad_norm": 4.2376227378845215, "learning_rate": 9.980848129945568e-06, "loss": 0.7173, "step": 4772 }, { "epoch": 0.05706667941988785, "grad_norm": 2.092362403869629, "learning_rate": 9.980831195973042e-06, "loss": 0.6463, "step": 4773 }, { "epoch": 0.0570786355647485, "grad_norm": 1.9211562871932983, "learning_rate": 9.980814254531741e-06, "loss": 0.5616, "step": 4774 }, { "epoch": 0.05709059170960915, "grad_norm": 1.631227970123291, "learning_rate": 9.980797305621693e-06, "loss": 0.6583, "step": 4775 }, { "epoch": 0.057102547854469804, "grad_norm": 2.555908203125, "learning_rate": 9.98078034924292e-06, "loss": 0.6219, "step": 4776 }, { "epoch": 0.057114503999330456, "grad_norm": 2.635154962539673, "learning_rate": 9.98076338539545e-06, "loss": 0.6344, "step": 4777 }, { "epoch": 0.05712646014419111, "grad_norm": 2.668195962905884, "learning_rate": 9.980746414079307e-06, "loss": 0.6213, "step": 4778 }, { "epoch": 0.05713841628905176, "grad_norm": 3.674835205078125, "learning_rate": 9.980729435294518e-06, "loss": 0.6631, "step": 4779 }, { "epoch": 0.05715037243391241, "grad_norm": 3.4832167625427246, "learning_rate": 9.980712449041107e-06, "loss": 0.7581, "step": 4780 }, { "epoch": 0.05716232857877306, "grad_norm": 1.9295629262924194, "learning_rate": 9.9806954553191e-06, "loss": 0.7396, "step": 4781 }, { "epoch": 0.057174284723633714, "grad_norm": 1.8520333766937256, "learning_rate": 9.980678454128523e-06, "loss": 0.6108, "step": 4782 }, { "epoch": 0.057186240868494366, "grad_norm": 1.9295179843902588, "learning_rate": 9.9806614454694e-06, "loss": 0.665, "step": 4783 }, { "epoch": 0.05719819701335501, "grad_norm": 2.0792553424835205, "learning_rate": 9.980644429341757e-06, "loss": 0.6114, "step": 4784 }, { "epoch": 0.05721015315821566, "grad_norm": 1.847039818763733, "learning_rate": 9.980627405745622e-06, "loss": 0.5874, "step": 4785 }, { "epoch": 0.057222109303076314, "grad_norm": 11.919577598571777, "learning_rate": 9.980610374681017e-06, "loss": 0.6192, "step": 4786 }, { "epoch": 0.057234065447936966, "grad_norm": 2.504206418991089, "learning_rate": 9.98059333614797e-06, "loss": 0.6631, "step": 4787 }, { "epoch": 0.05724602159279762, "grad_norm": 2.4603254795074463, "learning_rate": 9.980576290146504e-06, "loss": 0.6434, "step": 4788 }, { "epoch": 0.05725797773765827, "grad_norm": 3.044151782989502, "learning_rate": 9.980559236676647e-06, "loss": 0.6312, "step": 4789 }, { "epoch": 0.05726993388251892, "grad_norm": 1.7998427152633667, "learning_rate": 9.980542175738424e-06, "loss": 0.6009, "step": 4790 }, { "epoch": 0.05728189002737957, "grad_norm": 3.138192892074585, "learning_rate": 9.98052510733186e-06, "loss": 0.6598, "step": 4791 }, { "epoch": 0.057293846172240225, "grad_norm": 2.513514518737793, "learning_rate": 9.980508031456981e-06, "loss": 0.6445, "step": 4792 }, { "epoch": 0.057305802317100876, "grad_norm": 2.8931784629821777, "learning_rate": 9.980490948113812e-06, "loss": 0.6286, "step": 4793 }, { "epoch": 0.05731775846196153, "grad_norm": 1.953311800956726, "learning_rate": 9.98047385730238e-06, "loss": 0.6343, "step": 4794 }, { "epoch": 0.05732971460682218, "grad_norm": 4.640477657318115, "learning_rate": 9.980456759022709e-06, "loss": 0.6136, "step": 4795 }, { "epoch": 0.057341670751682824, "grad_norm": 3.1241469383239746, "learning_rate": 9.980439653274827e-06, "loss": 0.6074, "step": 4796 }, { "epoch": 0.057353626896543476, "grad_norm": 2.8299267292022705, "learning_rate": 9.980422540058756e-06, "loss": 0.614, "step": 4797 }, { "epoch": 0.05736558304140413, "grad_norm": 1.7444639205932617, "learning_rate": 9.980405419374524e-06, "loss": 0.5418, "step": 4798 }, { "epoch": 0.05737753918626478, "grad_norm": 2.974611520767212, "learning_rate": 9.980388291222157e-06, "loss": 0.6073, "step": 4799 }, { "epoch": 0.05738949533112543, "grad_norm": 3.1500024795532227, "learning_rate": 9.98037115560168e-06, "loss": 0.6299, "step": 4800 }, { "epoch": 0.05740145147598608, "grad_norm": 4.370059490203857, "learning_rate": 9.980354012513119e-06, "loss": 0.681, "step": 4801 }, { "epoch": 0.057413407620846735, "grad_norm": 2.2582178115844727, "learning_rate": 9.980336861956499e-06, "loss": 0.576, "step": 4802 }, { "epoch": 0.057425363765707386, "grad_norm": 2.057593584060669, "learning_rate": 9.980319703931847e-06, "loss": 0.6476, "step": 4803 }, { "epoch": 0.05743731991056804, "grad_norm": 7.289271354675293, "learning_rate": 9.980302538439185e-06, "loss": 0.6007, "step": 4804 }, { "epoch": 0.05744927605542869, "grad_norm": 1.919110894203186, "learning_rate": 9.980285365478546e-06, "loss": 0.6223, "step": 4805 }, { "epoch": 0.05746123220028934, "grad_norm": 2.0830037593841553, "learning_rate": 9.980268185049949e-06, "loss": 0.6333, "step": 4806 }, { "epoch": 0.057473188345149986, "grad_norm": 1.4553184509277344, "learning_rate": 9.980250997153423e-06, "loss": 0.59, "step": 4807 }, { "epoch": 0.05748514449001064, "grad_norm": 2.5763001441955566, "learning_rate": 9.98023380178899e-06, "loss": 0.7112, "step": 4808 }, { "epoch": 0.05749710063487129, "grad_norm": 2.5305097103118896, "learning_rate": 9.980216598956682e-06, "loss": 0.7492, "step": 4809 }, { "epoch": 0.05750905677973194, "grad_norm": 1.768227458000183, "learning_rate": 9.980199388656521e-06, "loss": 0.6503, "step": 4810 }, { "epoch": 0.05752101292459259, "grad_norm": 1.6451588869094849, "learning_rate": 9.980182170888533e-06, "loss": 0.6267, "step": 4811 }, { "epoch": 0.057532969069453245, "grad_norm": 3.8512446880340576, "learning_rate": 9.980164945652744e-06, "loss": 0.5684, "step": 4812 }, { "epoch": 0.0575449252143139, "grad_norm": 2.806607246398926, "learning_rate": 9.98014771294918e-06, "loss": 0.6656, "step": 4813 }, { "epoch": 0.05755688135917455, "grad_norm": 1.7092398405075073, "learning_rate": 9.980130472777865e-06, "loss": 0.6589, "step": 4814 }, { "epoch": 0.0575688375040352, "grad_norm": 2.2691004276275635, "learning_rate": 9.98011322513883e-06, "loss": 0.6702, "step": 4815 }, { "epoch": 0.05758079364889585, "grad_norm": 7.056223392486572, "learning_rate": 9.980095970032095e-06, "loss": 0.6074, "step": 4816 }, { "epoch": 0.057592749793756504, "grad_norm": 2.2517759799957275, "learning_rate": 9.98007870745769e-06, "loss": 0.6167, "step": 4817 }, { "epoch": 0.057604705938617155, "grad_norm": 1.9076234102249146, "learning_rate": 9.980061437415638e-06, "loss": 0.5835, "step": 4818 }, { "epoch": 0.0576166620834778, "grad_norm": 1.7458232641220093, "learning_rate": 9.980044159905968e-06, "loss": 0.5499, "step": 4819 }, { "epoch": 0.05762861822833845, "grad_norm": 6.246662139892578, "learning_rate": 9.9800268749287e-06, "loss": 0.6883, "step": 4820 }, { "epoch": 0.057640574373199104, "grad_norm": 4.88377571105957, "learning_rate": 9.980009582483869e-06, "loss": 0.6751, "step": 4821 }, { "epoch": 0.057652530518059755, "grad_norm": 1.7189313173294067, "learning_rate": 9.979992282571493e-06, "loss": 0.6419, "step": 4822 }, { "epoch": 0.05766448666292041, "grad_norm": 3.776881456375122, "learning_rate": 9.979974975191602e-06, "loss": 0.6814, "step": 4823 }, { "epoch": 0.05767644280778106, "grad_norm": 3.435744047164917, "learning_rate": 9.979957660344221e-06, "loss": 0.6228, "step": 4824 }, { "epoch": 0.05768839895264171, "grad_norm": 2.438178777694702, "learning_rate": 9.979940338029375e-06, "loss": 0.6623, "step": 4825 }, { "epoch": 0.05770035509750236, "grad_norm": 1.9473224878311157, "learning_rate": 9.97992300824709e-06, "loss": 0.604, "step": 4826 }, { "epoch": 0.057712311242363014, "grad_norm": 3.9037487506866455, "learning_rate": 9.979905670997394e-06, "loss": 0.6527, "step": 4827 }, { "epoch": 0.057724267387223666, "grad_norm": 1.860485553741455, "learning_rate": 9.979888326280311e-06, "loss": 0.5762, "step": 4828 }, { "epoch": 0.05773622353208432, "grad_norm": 2.41025447845459, "learning_rate": 9.979870974095869e-06, "loss": 0.4999, "step": 4829 }, { "epoch": 0.05774817967694497, "grad_norm": 3.4904305934906006, "learning_rate": 9.979853614444092e-06, "loss": 0.681, "step": 4830 }, { "epoch": 0.057760135821805614, "grad_norm": 2.390432596206665, "learning_rate": 9.979836247325007e-06, "loss": 0.6751, "step": 4831 }, { "epoch": 0.057772091966666266, "grad_norm": 3.4119393825531006, "learning_rate": 9.97981887273864e-06, "loss": 0.6041, "step": 4832 }, { "epoch": 0.05778404811152692, "grad_norm": 2.2563183307647705, "learning_rate": 9.979801490685016e-06, "loss": 0.6775, "step": 4833 }, { "epoch": 0.05779600425638757, "grad_norm": 2.5805137157440186, "learning_rate": 9.979784101164162e-06, "loss": 0.6124, "step": 4834 }, { "epoch": 0.05780796040124822, "grad_norm": 1.8987782001495361, "learning_rate": 9.979766704176107e-06, "loss": 0.6259, "step": 4835 }, { "epoch": 0.05781991654610887, "grad_norm": 2.1197218894958496, "learning_rate": 9.97974929972087e-06, "loss": 0.6832, "step": 4836 }, { "epoch": 0.057831872690969524, "grad_norm": 2.0091896057128906, "learning_rate": 9.979731887798483e-06, "loss": 0.5768, "step": 4837 }, { "epoch": 0.057843828835830176, "grad_norm": 2.2865915298461914, "learning_rate": 9.97971446840897e-06, "loss": 0.4995, "step": 4838 }, { "epoch": 0.05785578498069083, "grad_norm": 1.9112467765808105, "learning_rate": 9.979697041552359e-06, "loss": 0.5175, "step": 4839 }, { "epoch": 0.05786774112555148, "grad_norm": 2.1119775772094727, "learning_rate": 9.979679607228672e-06, "loss": 0.6921, "step": 4840 }, { "epoch": 0.05787969727041213, "grad_norm": 3.4452104568481445, "learning_rate": 9.97966216543794e-06, "loss": 0.6735, "step": 4841 }, { "epoch": 0.05789165341527278, "grad_norm": 3.2056684494018555, "learning_rate": 9.979644716180186e-06, "loss": 0.7007, "step": 4842 }, { "epoch": 0.05790360956013343, "grad_norm": 2.5143542289733887, "learning_rate": 9.979627259455435e-06, "loss": 0.6395, "step": 4843 }, { "epoch": 0.05791556570499408, "grad_norm": 3.376248598098755, "learning_rate": 9.979609795263718e-06, "loss": 0.6865, "step": 4844 }, { "epoch": 0.05792752184985473, "grad_norm": 2.7678563594818115, "learning_rate": 9.979592323605058e-06, "loss": 0.5855, "step": 4845 }, { "epoch": 0.05793947799471538, "grad_norm": 3.9773411750793457, "learning_rate": 9.97957484447948e-06, "loss": 0.6691, "step": 4846 }, { "epoch": 0.057951434139576034, "grad_norm": 2.5725393295288086, "learning_rate": 9.979557357887015e-06, "loss": 0.6711, "step": 4847 }, { "epoch": 0.057963390284436686, "grad_norm": 6.98379373550415, "learning_rate": 9.979539863827683e-06, "loss": 0.6971, "step": 4848 }, { "epoch": 0.05797534642929734, "grad_norm": 3.227825403213501, "learning_rate": 9.979522362301514e-06, "loss": 0.6613, "step": 4849 }, { "epoch": 0.05798730257415799, "grad_norm": 2.152064561843872, "learning_rate": 9.979504853308536e-06, "loss": 0.5574, "step": 4850 }, { "epoch": 0.05799925871901864, "grad_norm": 2.390798807144165, "learning_rate": 9.979487336848768e-06, "loss": 0.6069, "step": 4851 }, { "epoch": 0.05801121486387929, "grad_norm": 1.9696637392044067, "learning_rate": 9.979469812922244e-06, "loss": 0.581, "step": 4852 }, { "epoch": 0.058023171008739945, "grad_norm": 3.079498052597046, "learning_rate": 9.979452281528988e-06, "loss": 0.5536, "step": 4853 }, { "epoch": 0.058035127153600596, "grad_norm": 2.148668050765991, "learning_rate": 9.979434742669024e-06, "loss": 0.641, "step": 4854 }, { "epoch": 0.05804708329846124, "grad_norm": 4.837893962860107, "learning_rate": 9.97941719634238e-06, "loss": 0.5849, "step": 4855 }, { "epoch": 0.05805903944332189, "grad_norm": 6.048989772796631, "learning_rate": 9.979399642549084e-06, "loss": 0.7549, "step": 4856 }, { "epoch": 0.058070995588182545, "grad_norm": 2.6378231048583984, "learning_rate": 9.979382081289158e-06, "loss": 0.7079, "step": 4857 }, { "epoch": 0.058082951733043196, "grad_norm": 6.106777667999268, "learning_rate": 9.979364512562633e-06, "loss": 0.5853, "step": 4858 }, { "epoch": 0.05809490787790385, "grad_norm": 1.8805774450302124, "learning_rate": 9.979346936369531e-06, "loss": 0.7315, "step": 4859 }, { "epoch": 0.0581068640227645, "grad_norm": 1.7345839738845825, "learning_rate": 9.979329352709883e-06, "loss": 0.5698, "step": 4860 }, { "epoch": 0.05811882016762515, "grad_norm": 2.357900619506836, "learning_rate": 9.979311761583713e-06, "loss": 0.6383, "step": 4861 }, { "epoch": 0.0581307763124858, "grad_norm": 2.103759527206421, "learning_rate": 9.979294162991045e-06, "loss": 0.5628, "step": 4862 }, { "epoch": 0.058142732457346455, "grad_norm": 3.8643269538879395, "learning_rate": 9.97927655693191e-06, "loss": 0.6365, "step": 4863 }, { "epoch": 0.05815468860220711, "grad_norm": 2.1933560371398926, "learning_rate": 9.97925894340633e-06, "loss": 0.5956, "step": 4864 }, { "epoch": 0.05816664474706776, "grad_norm": 7.839820384979248, "learning_rate": 9.979241322414334e-06, "loss": 0.588, "step": 4865 }, { "epoch": 0.05817860089192841, "grad_norm": 2.140199899673462, "learning_rate": 9.979223693955949e-06, "loss": 0.6061, "step": 4866 }, { "epoch": 0.058190557036789055, "grad_norm": 2.0406150817871094, "learning_rate": 9.979206058031199e-06, "loss": 0.6691, "step": 4867 }, { "epoch": 0.05820251318164971, "grad_norm": 5.592079162597656, "learning_rate": 9.979188414640114e-06, "loss": 0.5602, "step": 4868 }, { "epoch": 0.05821446932651036, "grad_norm": 1.9610716104507446, "learning_rate": 9.979170763782714e-06, "loss": 0.7009, "step": 4869 }, { "epoch": 0.05822642547137101, "grad_norm": 2.7269155979156494, "learning_rate": 9.979153105459035e-06, "loss": 0.615, "step": 4870 }, { "epoch": 0.05823838161623166, "grad_norm": 2.5033352375030518, "learning_rate": 9.979135439669095e-06, "loss": 0.6638, "step": 4871 }, { "epoch": 0.05825033776109231, "grad_norm": 2.391191244125366, "learning_rate": 9.979117766412924e-06, "loss": 0.5379, "step": 4872 }, { "epoch": 0.058262293905952965, "grad_norm": 2.96516752243042, "learning_rate": 9.97910008569055e-06, "loss": 0.6015, "step": 4873 }, { "epoch": 0.05827425005081362, "grad_norm": 2.3294997215270996, "learning_rate": 9.979082397501996e-06, "loss": 0.5821, "step": 4874 }, { "epoch": 0.05828620619567427, "grad_norm": 6.128208160400391, "learning_rate": 9.979064701847291e-06, "loss": 0.632, "step": 4875 }, { "epoch": 0.05829816234053492, "grad_norm": 1.7639086246490479, "learning_rate": 9.97904699872646e-06, "loss": 0.6095, "step": 4876 }, { "epoch": 0.05831011848539557, "grad_norm": 1.690186619758606, "learning_rate": 9.979029288139531e-06, "loss": 0.5933, "step": 4877 }, { "epoch": 0.05832207463025622, "grad_norm": 2.0415585041046143, "learning_rate": 9.97901157008653e-06, "loss": 0.5472, "step": 4878 }, { "epoch": 0.05833403077511687, "grad_norm": 2.838627576828003, "learning_rate": 9.978993844567483e-06, "loss": 0.6191, "step": 4879 }, { "epoch": 0.05834598691997752, "grad_norm": 2.8255465030670166, "learning_rate": 9.978976111582416e-06, "loss": 0.6878, "step": 4880 }, { "epoch": 0.05835794306483817, "grad_norm": 2.9320762157440186, "learning_rate": 9.978958371131359e-06, "loss": 0.5859, "step": 4881 }, { "epoch": 0.058369899209698824, "grad_norm": 3.1235768795013428, "learning_rate": 9.978940623214335e-06, "loss": 0.6211, "step": 4882 }, { "epoch": 0.058381855354559475, "grad_norm": 1.9116222858428955, "learning_rate": 9.97892286783137e-06, "loss": 0.6281, "step": 4883 }, { "epoch": 0.05839381149942013, "grad_norm": 3.9194252490997314, "learning_rate": 9.978905104982496e-06, "loss": 0.5936, "step": 4884 }, { "epoch": 0.05840576764428078, "grad_norm": 4.942624568939209, "learning_rate": 9.978887334667735e-06, "loss": 0.6114, "step": 4885 }, { "epoch": 0.05841772378914143, "grad_norm": 3.2804832458496094, "learning_rate": 9.978869556887114e-06, "loss": 0.5821, "step": 4886 }, { "epoch": 0.05842967993400208, "grad_norm": 2.1258790493011475, "learning_rate": 9.97885177164066e-06, "loss": 0.7151, "step": 4887 }, { "epoch": 0.058441636078862734, "grad_norm": 2.9879887104034424, "learning_rate": 9.978833978928402e-06, "loss": 0.6489, "step": 4888 }, { "epoch": 0.058453592223723386, "grad_norm": 4.811061859130859, "learning_rate": 9.978816178750363e-06, "loss": 0.6086, "step": 4889 }, { "epoch": 0.05846554836858403, "grad_norm": 2.204819679260254, "learning_rate": 9.97879837110657e-06, "loss": 0.6717, "step": 4890 }, { "epoch": 0.05847750451344468, "grad_norm": 2.596039295196533, "learning_rate": 9.978780555997053e-06, "loss": 0.6414, "step": 4891 }, { "epoch": 0.058489460658305334, "grad_norm": 3.249354839324951, "learning_rate": 9.978762733421839e-06, "loss": 0.6593, "step": 4892 }, { "epoch": 0.058501416803165986, "grad_norm": 1.8400429487228394, "learning_rate": 9.97874490338095e-06, "loss": 0.5278, "step": 4893 }, { "epoch": 0.05851337294802664, "grad_norm": 1.67067551612854, "learning_rate": 9.978727065874415e-06, "loss": 0.6109, "step": 4894 }, { "epoch": 0.05852532909288729, "grad_norm": 3.4224977493286133, "learning_rate": 9.978709220902262e-06, "loss": 0.6807, "step": 4895 }, { "epoch": 0.05853728523774794, "grad_norm": 6.7061076164245605, "learning_rate": 9.978691368464518e-06, "loss": 0.64, "step": 4896 }, { "epoch": 0.05854924138260859, "grad_norm": 3.6152350902557373, "learning_rate": 9.978673508561205e-06, "loss": 0.5762, "step": 4897 }, { "epoch": 0.058561197527469244, "grad_norm": 38.75541687011719, "learning_rate": 9.978655641192357e-06, "loss": 0.6876, "step": 4898 }, { "epoch": 0.058573153672329896, "grad_norm": 2.3309273719787598, "learning_rate": 9.978637766357995e-06, "loss": 0.616, "step": 4899 }, { "epoch": 0.05858510981719055, "grad_norm": 4.293458938598633, "learning_rate": 9.97861988405815e-06, "loss": 0.6061, "step": 4900 }, { "epoch": 0.0585970659620512, "grad_norm": 4.521827697753906, "learning_rate": 9.978601994292845e-06, "loss": 0.6242, "step": 4901 }, { "epoch": 0.058609022106911844, "grad_norm": 1.7909440994262695, "learning_rate": 9.97858409706211e-06, "loss": 0.6404, "step": 4902 }, { "epoch": 0.058620978251772496, "grad_norm": 8.183095932006836, "learning_rate": 9.978566192365968e-06, "loss": 0.7155, "step": 4903 }, { "epoch": 0.05863293439663315, "grad_norm": 2.5050532817840576, "learning_rate": 9.97854828020445e-06, "loss": 0.6601, "step": 4904 }, { "epoch": 0.0586448905414938, "grad_norm": 2.053022861480713, "learning_rate": 9.97853036057758e-06, "loss": 0.702, "step": 4905 }, { "epoch": 0.05865684668635445, "grad_norm": 3.7848715782165527, "learning_rate": 9.978512433485388e-06, "loss": 0.6934, "step": 4906 }, { "epoch": 0.0586688028312151, "grad_norm": 2.120271921157837, "learning_rate": 9.978494498927898e-06, "loss": 0.5771, "step": 4907 }, { "epoch": 0.058680758976075754, "grad_norm": 2.1999526023864746, "learning_rate": 9.978476556905139e-06, "loss": 0.4973, "step": 4908 }, { "epoch": 0.058692715120936406, "grad_norm": 2.5671815872192383, "learning_rate": 9.978458607417135e-06, "loss": 0.6318, "step": 4909 }, { "epoch": 0.05870467126579706, "grad_norm": 2.530895709991455, "learning_rate": 9.978440650463914e-06, "loss": 0.6314, "step": 4910 }, { "epoch": 0.05871662741065771, "grad_norm": 1.8991230726242065, "learning_rate": 9.978422686045506e-06, "loss": 0.6852, "step": 4911 }, { "epoch": 0.05872858355551836, "grad_norm": 2.8582890033721924, "learning_rate": 9.978404714161933e-06, "loss": 0.6533, "step": 4912 }, { "epoch": 0.05874053970037901, "grad_norm": 4.9956841468811035, "learning_rate": 9.978386734813226e-06, "loss": 0.6525, "step": 4913 }, { "epoch": 0.05875249584523966, "grad_norm": 2.278949737548828, "learning_rate": 9.97836874799941e-06, "loss": 0.5459, "step": 4914 }, { "epoch": 0.05876445199010031, "grad_norm": 3.9381542205810547, "learning_rate": 9.978350753720512e-06, "loss": 0.5251, "step": 4915 }, { "epoch": 0.05877640813496096, "grad_norm": 4.112914085388184, "learning_rate": 9.97833275197656e-06, "loss": 0.5359, "step": 4916 }, { "epoch": 0.05878836427982161, "grad_norm": 3.4332683086395264, "learning_rate": 9.978314742767581e-06, "loss": 0.6236, "step": 4917 }, { "epoch": 0.058800320424682265, "grad_norm": 2.273002862930298, "learning_rate": 9.9782967260936e-06, "loss": 0.6566, "step": 4918 }, { "epoch": 0.058812276569542916, "grad_norm": 2.7978193759918213, "learning_rate": 9.978278701954645e-06, "loss": 0.578, "step": 4919 }, { "epoch": 0.05882423271440357, "grad_norm": 2.617382049560547, "learning_rate": 9.978260670350746e-06, "loss": 0.5782, "step": 4920 }, { "epoch": 0.05883618885926422, "grad_norm": 2.6442835330963135, "learning_rate": 9.978242631281925e-06, "loss": 0.6379, "step": 4921 }, { "epoch": 0.05884814500412487, "grad_norm": 3.5316002368927, "learning_rate": 9.978224584748212e-06, "loss": 0.6406, "step": 4922 }, { "epoch": 0.05886010114898552, "grad_norm": 3.1141138076782227, "learning_rate": 9.978206530749634e-06, "loss": 0.6639, "step": 4923 }, { "epoch": 0.058872057293846175, "grad_norm": 3.9901037216186523, "learning_rate": 9.978188469286217e-06, "loss": 0.5831, "step": 4924 }, { "epoch": 0.05888401343870683, "grad_norm": 3.517373561859131, "learning_rate": 9.978170400357988e-06, "loss": 0.6257, "step": 4925 }, { "epoch": 0.05889596958356747, "grad_norm": 2.5368664264678955, "learning_rate": 9.978152323964975e-06, "loss": 0.6331, "step": 4926 }, { "epoch": 0.05890792572842812, "grad_norm": 2.65817928314209, "learning_rate": 9.978134240107206e-06, "loss": 0.6689, "step": 4927 }, { "epoch": 0.058919881873288775, "grad_norm": 2.2711169719696045, "learning_rate": 9.978116148784707e-06, "loss": 0.6255, "step": 4928 }, { "epoch": 0.05893183801814943, "grad_norm": 6.961314678192139, "learning_rate": 9.978098049997505e-06, "loss": 0.6188, "step": 4929 }, { "epoch": 0.05894379416301008, "grad_norm": 9.446502685546875, "learning_rate": 9.978079943745627e-06, "loss": 0.6686, "step": 4930 }, { "epoch": 0.05895575030787073, "grad_norm": 3.510775089263916, "learning_rate": 9.9780618300291e-06, "loss": 0.6021, "step": 4931 }, { "epoch": 0.05896770645273138, "grad_norm": 3.486375093460083, "learning_rate": 9.978043708847953e-06, "loss": 0.646, "step": 4932 }, { "epoch": 0.058979662597592034, "grad_norm": 3.668557643890381, "learning_rate": 9.97802558020221e-06, "loss": 0.739, "step": 4933 }, { "epoch": 0.058991618742452685, "grad_norm": 2.749568462371826, "learning_rate": 9.978007444091901e-06, "loss": 0.7389, "step": 4934 }, { "epoch": 0.05900357488731334, "grad_norm": 2.0106208324432373, "learning_rate": 9.977989300517052e-06, "loss": 0.6606, "step": 4935 }, { "epoch": 0.05901553103217399, "grad_norm": 3.34846830368042, "learning_rate": 9.97797114947769e-06, "loss": 0.6358, "step": 4936 }, { "epoch": 0.059027487177034633, "grad_norm": 3.936018705368042, "learning_rate": 9.977952990973842e-06, "loss": 0.5727, "step": 4937 }, { "epoch": 0.059039443321895285, "grad_norm": 3.121781826019287, "learning_rate": 9.977934825005538e-06, "loss": 0.6273, "step": 4938 }, { "epoch": 0.05905139946675594, "grad_norm": 1.9393320083618164, "learning_rate": 9.9779166515728e-06, "loss": 0.6218, "step": 4939 }, { "epoch": 0.05906335561161659, "grad_norm": 2.140402317047119, "learning_rate": 9.977898470675662e-06, "loss": 0.6069, "step": 4940 }, { "epoch": 0.05907531175647724, "grad_norm": 2.989598512649536, "learning_rate": 9.977880282314145e-06, "loss": 0.6639, "step": 4941 }, { "epoch": 0.05908726790133789, "grad_norm": 1.7305110692977905, "learning_rate": 9.977862086488279e-06, "loss": 0.6341, "step": 4942 }, { "epoch": 0.059099224046198544, "grad_norm": 2.4295544624328613, "learning_rate": 9.977843883198092e-06, "loss": 0.6373, "step": 4943 }, { "epoch": 0.059111180191059196, "grad_norm": 3.9422082901000977, "learning_rate": 9.97782567244361e-06, "loss": 0.6462, "step": 4944 }, { "epoch": 0.05912313633591985, "grad_norm": 3.0328280925750732, "learning_rate": 9.977807454224859e-06, "loss": 0.6161, "step": 4945 }, { "epoch": 0.0591350924807805, "grad_norm": 2.1882941722869873, "learning_rate": 9.97778922854187e-06, "loss": 0.5884, "step": 4946 }, { "epoch": 0.05914704862564115, "grad_norm": 4.3416314125061035, "learning_rate": 9.977770995394669e-06, "loss": 0.6852, "step": 4947 }, { "epoch": 0.0591590047705018, "grad_norm": 4.227261543273926, "learning_rate": 9.97775275478328e-06, "loss": 0.606, "step": 4948 }, { "epoch": 0.05917096091536245, "grad_norm": 2.2838287353515625, "learning_rate": 9.977734506707736e-06, "loss": 0.5897, "step": 4949 }, { "epoch": 0.0591829170602231, "grad_norm": 3.476996898651123, "learning_rate": 9.977716251168059e-06, "loss": 0.61, "step": 4950 }, { "epoch": 0.05919487320508375, "grad_norm": 2.5331056118011475, "learning_rate": 9.97769798816428e-06, "loss": 0.6383, "step": 4951 }, { "epoch": 0.0592068293499444, "grad_norm": 2.5245981216430664, "learning_rate": 9.977679717696426e-06, "loss": 0.5572, "step": 4952 }, { "epoch": 0.059218785494805054, "grad_norm": 6.02830171585083, "learning_rate": 9.977661439764523e-06, "loss": 0.6803, "step": 4953 }, { "epoch": 0.059230741639665706, "grad_norm": 2.9115822315216064, "learning_rate": 9.977643154368599e-06, "loss": 0.5247, "step": 4954 }, { "epoch": 0.05924269778452636, "grad_norm": 3.1382880210876465, "learning_rate": 9.97762486150868e-06, "loss": 0.6124, "step": 4955 }, { "epoch": 0.05925465392938701, "grad_norm": 2.538830280303955, "learning_rate": 9.977606561184797e-06, "loss": 0.5356, "step": 4956 }, { "epoch": 0.05926661007424766, "grad_norm": 2.5671780109405518, "learning_rate": 9.977588253396972e-06, "loss": 0.6683, "step": 4957 }, { "epoch": 0.05927856621910831, "grad_norm": 1.7281934022903442, "learning_rate": 9.97756993814524e-06, "loss": 0.5913, "step": 4958 }, { "epoch": 0.059290522363968964, "grad_norm": 2.401216983795166, "learning_rate": 9.977551615429621e-06, "loss": 0.6526, "step": 4959 }, { "epoch": 0.059302478508829616, "grad_norm": 3.0264179706573486, "learning_rate": 9.977533285250147e-06, "loss": 0.6985, "step": 4960 }, { "epoch": 0.05931443465369026, "grad_norm": 8.91159725189209, "learning_rate": 9.977514947606843e-06, "loss": 0.6083, "step": 4961 }, { "epoch": 0.05932639079855091, "grad_norm": 2.367560625076294, "learning_rate": 9.97749660249974e-06, "loss": 0.572, "step": 4962 }, { "epoch": 0.059338346943411564, "grad_norm": 7.637314796447754, "learning_rate": 9.97747824992886e-06, "loss": 0.6381, "step": 4963 }, { "epoch": 0.059350303088272216, "grad_norm": 2.6776788234710693, "learning_rate": 9.977459889894236e-06, "loss": 0.6637, "step": 4964 }, { "epoch": 0.05936225923313287, "grad_norm": 2.4139277935028076, "learning_rate": 9.977441522395892e-06, "loss": 0.6217, "step": 4965 }, { "epoch": 0.05937421537799352, "grad_norm": 2.9425389766693115, "learning_rate": 9.977423147433859e-06, "loss": 0.7452, "step": 4966 }, { "epoch": 0.05938617152285417, "grad_norm": 1.6231483221054077, "learning_rate": 9.977404765008158e-06, "loss": 0.5669, "step": 4967 }, { "epoch": 0.05939812766771482, "grad_norm": 8.761285781860352, "learning_rate": 9.977386375118824e-06, "loss": 0.6353, "step": 4968 }, { "epoch": 0.059410083812575475, "grad_norm": 2.5760719776153564, "learning_rate": 9.977367977765881e-06, "loss": 0.6551, "step": 4969 }, { "epoch": 0.059422039957436126, "grad_norm": 3.2788684368133545, "learning_rate": 9.977349572949358e-06, "loss": 0.6415, "step": 4970 }, { "epoch": 0.05943399610229678, "grad_norm": 2.3282251358032227, "learning_rate": 9.977331160669279e-06, "loss": 0.6515, "step": 4971 }, { "epoch": 0.05944595224715743, "grad_norm": 2.8089864253997803, "learning_rate": 9.977312740925675e-06, "loss": 0.6381, "step": 4972 }, { "epoch": 0.059457908392018075, "grad_norm": 1.855934500694275, "learning_rate": 9.977294313718575e-06, "loss": 0.5497, "step": 4973 }, { "epoch": 0.059469864536878726, "grad_norm": 2.040499687194824, "learning_rate": 9.977275879048003e-06, "loss": 0.6042, "step": 4974 }, { "epoch": 0.05948182068173938, "grad_norm": 3.132507801055908, "learning_rate": 9.977257436913988e-06, "loss": 0.5882, "step": 4975 }, { "epoch": 0.05949377682660003, "grad_norm": 4.849552154541016, "learning_rate": 9.977238987316557e-06, "loss": 0.676, "step": 4976 }, { "epoch": 0.05950573297146068, "grad_norm": 2.7293059825897217, "learning_rate": 9.97722053025574e-06, "loss": 0.643, "step": 4977 }, { "epoch": 0.05951768911632133, "grad_norm": 1.7436680793762207, "learning_rate": 9.977202065731563e-06, "loss": 0.6167, "step": 4978 }, { "epoch": 0.059529645261181985, "grad_norm": 2.2325873374938965, "learning_rate": 9.977183593744053e-06, "loss": 0.659, "step": 4979 }, { "epoch": 0.05954160140604264, "grad_norm": 1.8592532873153687, "learning_rate": 9.97716511429324e-06, "loss": 0.6247, "step": 4980 }, { "epoch": 0.05955355755090329, "grad_norm": 6.232499122619629, "learning_rate": 9.977146627379148e-06, "loss": 0.6057, "step": 4981 }, { "epoch": 0.05956551369576394, "grad_norm": 1.6612917184829712, "learning_rate": 9.977128133001808e-06, "loss": 0.5437, "step": 4982 }, { "epoch": 0.05957746984062459, "grad_norm": 2.239025831222534, "learning_rate": 9.977109631161245e-06, "loss": 0.5368, "step": 4983 }, { "epoch": 0.05958942598548524, "grad_norm": 6.430713653564453, "learning_rate": 9.977091121857492e-06, "loss": 0.7316, "step": 4984 }, { "epoch": 0.05960138213034589, "grad_norm": 2.1257781982421875, "learning_rate": 9.977072605090571e-06, "loss": 0.671, "step": 4985 }, { "epoch": 0.05961333827520654, "grad_norm": 3.5430428981781006, "learning_rate": 9.977054080860512e-06, "loss": 0.5739, "step": 4986 }, { "epoch": 0.05962529442006719, "grad_norm": 2.5688576698303223, "learning_rate": 9.977035549167342e-06, "loss": 0.6342, "step": 4987 }, { "epoch": 0.05963725056492784, "grad_norm": 2.2837493419647217, "learning_rate": 9.977017010011092e-06, "loss": 0.5852, "step": 4988 }, { "epoch": 0.059649206709788495, "grad_norm": 2.4272055625915527, "learning_rate": 9.976998463391785e-06, "loss": 0.7761, "step": 4989 }, { "epoch": 0.05966116285464915, "grad_norm": 2.0152981281280518, "learning_rate": 9.976979909309453e-06, "loss": 0.8527, "step": 4990 }, { "epoch": 0.0596731189995098, "grad_norm": 2.5733935832977295, "learning_rate": 9.97696134776412e-06, "loss": 0.7186, "step": 4991 }, { "epoch": 0.05968507514437045, "grad_norm": 6.42813777923584, "learning_rate": 9.976942778755816e-06, "loss": 0.6855, "step": 4992 }, { "epoch": 0.0596970312892311, "grad_norm": 2.725186586380005, "learning_rate": 9.97692420228457e-06, "loss": 0.6066, "step": 4993 }, { "epoch": 0.059708987434091754, "grad_norm": 4.270514011383057, "learning_rate": 9.976905618350407e-06, "loss": 0.562, "step": 4994 }, { "epoch": 0.059720943578952405, "grad_norm": 3.1048502922058105, "learning_rate": 9.976887026953356e-06, "loss": 0.6194, "step": 4995 }, { "epoch": 0.05973289972381306, "grad_norm": 2.366347312927246, "learning_rate": 9.976868428093447e-06, "loss": 0.6141, "step": 4996 }, { "epoch": 0.0597448558686737, "grad_norm": 1.7467210292816162, "learning_rate": 9.976849821770704e-06, "loss": 0.6348, "step": 4997 }, { "epoch": 0.059756812013534354, "grad_norm": 3.7444770336151123, "learning_rate": 9.97683120798516e-06, "loss": 0.7069, "step": 4998 }, { "epoch": 0.059768768158395005, "grad_norm": 2.3549914360046387, "learning_rate": 9.976812586736838e-06, "loss": 0.6351, "step": 4999 }, { "epoch": 0.05978072430325566, "grad_norm": 1.8375251293182373, "learning_rate": 9.976793958025768e-06, "loss": 0.6468, "step": 5000 }, { "epoch": 0.05979268044811631, "grad_norm": 1.7071990966796875, "learning_rate": 9.976775321851977e-06, "loss": 0.6177, "step": 5001 }, { "epoch": 0.05980463659297696, "grad_norm": 3.247825860977173, "learning_rate": 9.976756678215495e-06, "loss": 0.6205, "step": 5002 }, { "epoch": 0.05981659273783761, "grad_norm": 2.0225796699523926, "learning_rate": 9.976738027116348e-06, "loss": 0.7615, "step": 5003 }, { "epoch": 0.059828548882698264, "grad_norm": 1.6540606021881104, "learning_rate": 9.976719368554565e-06, "loss": 0.5195, "step": 5004 }, { "epoch": 0.059840505027558916, "grad_norm": 4.037696361541748, "learning_rate": 9.976700702530174e-06, "loss": 0.6369, "step": 5005 }, { "epoch": 0.05985246117241957, "grad_norm": 2.168419599533081, "learning_rate": 9.976682029043201e-06, "loss": 0.6564, "step": 5006 }, { "epoch": 0.05986441731728022, "grad_norm": 3.669471025466919, "learning_rate": 9.976663348093678e-06, "loss": 0.7358, "step": 5007 }, { "epoch": 0.059876373462140864, "grad_norm": 3.191331148147583, "learning_rate": 9.976644659681628e-06, "loss": 0.5558, "step": 5008 }, { "epoch": 0.059888329607001516, "grad_norm": 4.066779613494873, "learning_rate": 9.976625963807083e-06, "loss": 0.6521, "step": 5009 }, { "epoch": 0.05990028575186217, "grad_norm": 3.6245806217193604, "learning_rate": 9.976607260470071e-06, "loss": 0.6914, "step": 5010 }, { "epoch": 0.05991224189672282, "grad_norm": 3.095350503921509, "learning_rate": 9.976588549670616e-06, "loss": 0.5851, "step": 5011 }, { "epoch": 0.05992419804158347, "grad_norm": 2.498077869415283, "learning_rate": 9.97656983140875e-06, "loss": 0.6969, "step": 5012 }, { "epoch": 0.05993615418644412, "grad_norm": 2.2256507873535156, "learning_rate": 9.9765511056845e-06, "loss": 0.6379, "step": 5013 }, { "epoch": 0.059948110331304774, "grad_norm": 3.1054296493530273, "learning_rate": 9.976532372497893e-06, "loss": 0.6934, "step": 5014 }, { "epoch": 0.059960066476165426, "grad_norm": 3.0713021755218506, "learning_rate": 9.97651363184896e-06, "loss": 0.6905, "step": 5015 }, { "epoch": 0.05997202262102608, "grad_norm": 1.9191174507141113, "learning_rate": 9.976494883737726e-06, "loss": 0.6425, "step": 5016 }, { "epoch": 0.05998397876588673, "grad_norm": 3.400681495666504, "learning_rate": 9.97647612816422e-06, "loss": 0.607, "step": 5017 }, { "epoch": 0.05999593491074738, "grad_norm": 2.033710241317749, "learning_rate": 9.97645736512847e-06, "loss": 0.7599, "step": 5018 }, { "epoch": 0.06000789105560803, "grad_norm": 4.311028957366943, "learning_rate": 9.976438594630506e-06, "loss": 0.6018, "step": 5019 }, { "epoch": 0.06001984720046868, "grad_norm": 2.3219125270843506, "learning_rate": 9.976419816670354e-06, "loss": 0.7297, "step": 5020 }, { "epoch": 0.06003180334532933, "grad_norm": 2.1302990913391113, "learning_rate": 9.976401031248043e-06, "loss": 0.6243, "step": 5021 }, { "epoch": 0.06004375949018998, "grad_norm": 2.330946922302246, "learning_rate": 9.9763822383636e-06, "loss": 0.6313, "step": 5022 }, { "epoch": 0.06005571563505063, "grad_norm": 2.3151137828826904, "learning_rate": 9.976363438017053e-06, "loss": 0.654, "step": 5023 }, { "epoch": 0.060067671779911284, "grad_norm": 3.8414573669433594, "learning_rate": 9.976344630208434e-06, "loss": 0.6517, "step": 5024 }, { "epoch": 0.060079627924771936, "grad_norm": 2.109626054763794, "learning_rate": 9.976325814937767e-06, "loss": 0.8222, "step": 5025 }, { "epoch": 0.06009158406963259, "grad_norm": 4.369717121124268, "learning_rate": 9.976306992205083e-06, "loss": 0.613, "step": 5026 }, { "epoch": 0.06010354021449324, "grad_norm": 2.8830792903900146, "learning_rate": 9.976288162010408e-06, "loss": 0.7125, "step": 5027 }, { "epoch": 0.06011549635935389, "grad_norm": 2.6515047550201416, "learning_rate": 9.97626932435377e-06, "loss": 0.678, "step": 5028 }, { "epoch": 0.06012745250421454, "grad_norm": 2.733583688735962, "learning_rate": 9.976250479235202e-06, "loss": 0.6646, "step": 5029 }, { "epoch": 0.060139408649075195, "grad_norm": 3.1751177310943604, "learning_rate": 9.976231626654725e-06, "loss": 0.6703, "step": 5030 }, { "epoch": 0.060151364793935846, "grad_norm": 1.5428798198699951, "learning_rate": 9.976212766612374e-06, "loss": 0.6898, "step": 5031 }, { "epoch": 0.06016332093879649, "grad_norm": 2.7156612873077393, "learning_rate": 9.976193899108172e-06, "loss": 0.7082, "step": 5032 }, { "epoch": 0.06017527708365714, "grad_norm": 2.1770310401916504, "learning_rate": 9.976175024142151e-06, "loss": 0.6264, "step": 5033 }, { "epoch": 0.060187233228517795, "grad_norm": 3.205160617828369, "learning_rate": 9.976156141714337e-06, "loss": 0.714, "step": 5034 }, { "epoch": 0.060199189373378446, "grad_norm": 3.3863775730133057, "learning_rate": 9.976137251824761e-06, "loss": 0.6638, "step": 5035 }, { "epoch": 0.0602111455182391, "grad_norm": 3.3966314792633057, "learning_rate": 9.976118354473446e-06, "loss": 0.6088, "step": 5036 }, { "epoch": 0.06022310166309975, "grad_norm": 4.299707889556885, "learning_rate": 9.976099449660427e-06, "loss": 0.7005, "step": 5037 }, { "epoch": 0.0602350578079604, "grad_norm": 1.7523925304412842, "learning_rate": 9.976080537385728e-06, "loss": 0.7437, "step": 5038 }, { "epoch": 0.06024701395282105, "grad_norm": 1.9499660730361938, "learning_rate": 9.976061617649377e-06, "loss": 0.6841, "step": 5039 }, { "epoch": 0.060258970097681705, "grad_norm": 5.247425079345703, "learning_rate": 9.976042690451406e-06, "loss": 0.7223, "step": 5040 }, { "epoch": 0.06027092624254236, "grad_norm": 2.209829092025757, "learning_rate": 9.976023755791841e-06, "loss": 0.5587, "step": 5041 }, { "epoch": 0.06028288238740301, "grad_norm": 2.4273853302001953, "learning_rate": 9.97600481367071e-06, "loss": 0.6257, "step": 5042 }, { "epoch": 0.06029483853226366, "grad_norm": 3.9978110790252686, "learning_rate": 9.975985864088042e-06, "loss": 0.621, "step": 5043 }, { "epoch": 0.060306794677124305, "grad_norm": 1.8895797729492188, "learning_rate": 9.975966907043867e-06, "loss": 0.5917, "step": 5044 }, { "epoch": 0.06031875082198496, "grad_norm": 3.4442310333251953, "learning_rate": 9.975947942538212e-06, "loss": 0.5735, "step": 5045 }, { "epoch": 0.06033070696684561, "grad_norm": 1.811698317527771, "learning_rate": 9.975928970571102e-06, "loss": 0.5726, "step": 5046 }, { "epoch": 0.06034266311170626, "grad_norm": 8.090716361999512, "learning_rate": 9.975909991142571e-06, "loss": 0.7195, "step": 5047 }, { "epoch": 0.06035461925656691, "grad_norm": 1.6213289499282837, "learning_rate": 9.975891004252647e-06, "loss": 0.6082, "step": 5048 }, { "epoch": 0.060366575401427564, "grad_norm": 2.2148492336273193, "learning_rate": 9.975872009901355e-06, "loss": 0.6599, "step": 5049 }, { "epoch": 0.060378531546288215, "grad_norm": 2.3079757690429688, "learning_rate": 9.975853008088724e-06, "loss": 0.6751, "step": 5050 }, { "epoch": 0.06039048769114887, "grad_norm": 7.676765441894531, "learning_rate": 9.975833998814787e-06, "loss": 0.5805, "step": 5051 }, { "epoch": 0.06040244383600952, "grad_norm": 1.9500004053115845, "learning_rate": 9.975814982079568e-06, "loss": 0.7742, "step": 5052 }, { "epoch": 0.06041439998087017, "grad_norm": 2.673940420150757, "learning_rate": 9.975795957883095e-06, "loss": 0.5192, "step": 5053 }, { "epoch": 0.06042635612573082, "grad_norm": 2.9968667030334473, "learning_rate": 9.9757769262254e-06, "loss": 0.5922, "step": 5054 }, { "epoch": 0.060438312270591474, "grad_norm": 3.8424882888793945, "learning_rate": 9.975757887106509e-06, "loss": 0.5539, "step": 5055 }, { "epoch": 0.06045026841545212, "grad_norm": 5.991318702697754, "learning_rate": 9.975738840526452e-06, "loss": 0.6369, "step": 5056 }, { "epoch": 0.06046222456031277, "grad_norm": 2.227992296218872, "learning_rate": 9.975719786485255e-06, "loss": 0.6052, "step": 5057 }, { "epoch": 0.06047418070517342, "grad_norm": 4.696300029754639, "learning_rate": 9.97570072498295e-06, "loss": 0.6595, "step": 5058 }, { "epoch": 0.060486136850034074, "grad_norm": 2.7955501079559326, "learning_rate": 9.975681656019565e-06, "loss": 0.6928, "step": 5059 }, { "epoch": 0.060498092994894725, "grad_norm": 2.48724102973938, "learning_rate": 9.975662579595126e-06, "loss": 0.6662, "step": 5060 }, { "epoch": 0.06051004913975538, "grad_norm": 2.400296688079834, "learning_rate": 9.975643495709665e-06, "loss": 0.5715, "step": 5061 }, { "epoch": 0.06052200528461603, "grad_norm": 2.943354368209839, "learning_rate": 9.975624404363208e-06, "loss": 0.6378, "step": 5062 }, { "epoch": 0.06053396142947668, "grad_norm": 2.38485050201416, "learning_rate": 9.975605305555784e-06, "loss": 0.6761, "step": 5063 }, { "epoch": 0.06054591757433733, "grad_norm": 4.496350288391113, "learning_rate": 9.975586199287423e-06, "loss": 0.5178, "step": 5064 }, { "epoch": 0.060557873719197984, "grad_norm": 5.7691826820373535, "learning_rate": 9.975567085558154e-06, "loss": 0.6567, "step": 5065 }, { "epoch": 0.060569829864058636, "grad_norm": 1.675719976425171, "learning_rate": 9.975547964368001e-06, "loss": 0.5324, "step": 5066 }, { "epoch": 0.06058178600891929, "grad_norm": 2.1099185943603516, "learning_rate": 9.975528835716998e-06, "loss": 0.6041, "step": 5067 }, { "epoch": 0.06059374215377993, "grad_norm": 3.791553258895874, "learning_rate": 9.975509699605173e-06, "loss": 0.6135, "step": 5068 }, { "epoch": 0.060605698298640584, "grad_norm": 2.3369967937469482, "learning_rate": 9.975490556032553e-06, "loss": 0.6038, "step": 5069 }, { "epoch": 0.060617654443501236, "grad_norm": 1.8031492233276367, "learning_rate": 9.975471404999165e-06, "loss": 0.5886, "step": 5070 }, { "epoch": 0.06062961058836189, "grad_norm": 1.934662103652954, "learning_rate": 9.975452246505041e-06, "loss": 0.5715, "step": 5071 }, { "epoch": 0.06064156673322254, "grad_norm": 3.940239429473877, "learning_rate": 9.97543308055021e-06, "loss": 0.6204, "step": 5072 }, { "epoch": 0.06065352287808319, "grad_norm": 4.3596296310424805, "learning_rate": 9.975413907134699e-06, "loss": 0.6619, "step": 5073 }, { "epoch": 0.06066547902294384, "grad_norm": 1.5940790176391602, "learning_rate": 9.975394726258536e-06, "loss": 0.7138, "step": 5074 }, { "epoch": 0.060677435167804494, "grad_norm": 1.7867958545684814, "learning_rate": 9.975375537921752e-06, "loss": 0.5412, "step": 5075 }, { "epoch": 0.060689391312665146, "grad_norm": 6.342235565185547, "learning_rate": 9.975356342124374e-06, "loss": 0.6446, "step": 5076 }, { "epoch": 0.0607013474575258, "grad_norm": 1.8683701753616333, "learning_rate": 9.975337138866432e-06, "loss": 0.6247, "step": 5077 }, { "epoch": 0.06071330360238645, "grad_norm": 3.0296449661254883, "learning_rate": 9.975317928147954e-06, "loss": 0.6235, "step": 5078 }, { "epoch": 0.060725259747247094, "grad_norm": 2.0198426246643066, "learning_rate": 9.97529870996897e-06, "loss": 0.6746, "step": 5079 }, { "epoch": 0.060737215892107746, "grad_norm": 3.05004620552063, "learning_rate": 9.975279484329507e-06, "loss": 0.6367, "step": 5080 }, { "epoch": 0.0607491720369684, "grad_norm": 2.7186667919158936, "learning_rate": 9.975260251229594e-06, "loss": 0.6082, "step": 5081 }, { "epoch": 0.06076112818182905, "grad_norm": 2.06990385055542, "learning_rate": 9.975241010669262e-06, "loss": 0.6479, "step": 5082 }, { "epoch": 0.0607730843266897, "grad_norm": 1.9914429187774658, "learning_rate": 9.975221762648538e-06, "loss": 0.5614, "step": 5083 }, { "epoch": 0.06078504047155035, "grad_norm": 1.9619487524032593, "learning_rate": 9.975202507167451e-06, "loss": 0.6569, "step": 5084 }, { "epoch": 0.060796996616411005, "grad_norm": 2.4119694232940674, "learning_rate": 9.97518324422603e-06, "loss": 0.6035, "step": 5085 }, { "epoch": 0.060808952761271656, "grad_norm": 2.3779335021972656, "learning_rate": 9.975163973824304e-06, "loss": 0.6624, "step": 5086 }, { "epoch": 0.06082090890613231, "grad_norm": 3.2618792057037354, "learning_rate": 9.975144695962303e-06, "loss": 0.6242, "step": 5087 }, { "epoch": 0.06083286505099296, "grad_norm": 3.1008877754211426, "learning_rate": 9.975125410640054e-06, "loss": 0.5828, "step": 5088 }, { "epoch": 0.06084482119585361, "grad_norm": 12.244906425476074, "learning_rate": 9.975106117857587e-06, "loss": 0.6713, "step": 5089 }, { "epoch": 0.06085677734071426, "grad_norm": 3.6650924682617188, "learning_rate": 9.975086817614931e-06, "loss": 0.6834, "step": 5090 }, { "epoch": 0.06086873348557491, "grad_norm": 2.0335705280303955, "learning_rate": 9.975067509912115e-06, "loss": 0.6694, "step": 5091 }, { "epoch": 0.06088068963043556, "grad_norm": 2.714301347732544, "learning_rate": 9.975048194749166e-06, "loss": 0.5983, "step": 5092 }, { "epoch": 0.06089264577529621, "grad_norm": 125.77906036376953, "learning_rate": 9.975028872126115e-06, "loss": 0.6573, "step": 5093 }, { "epoch": 0.06090460192015686, "grad_norm": 6.311562538146973, "learning_rate": 9.97500954204299e-06, "loss": 0.7223, "step": 5094 }, { "epoch": 0.060916558065017515, "grad_norm": 2.422255277633667, "learning_rate": 9.974990204499823e-06, "loss": 0.6921, "step": 5095 }, { "epoch": 0.060928514209878167, "grad_norm": 3.1647446155548096, "learning_rate": 9.974970859496637e-06, "loss": 0.7057, "step": 5096 }, { "epoch": 0.06094047035473882, "grad_norm": 9.222628593444824, "learning_rate": 9.974951507033468e-06, "loss": 0.6258, "step": 5097 }, { "epoch": 0.06095242649959947, "grad_norm": 2.7001898288726807, "learning_rate": 9.974932147110339e-06, "loss": 0.5733, "step": 5098 }, { "epoch": 0.06096438264446012, "grad_norm": 97.12152099609375, "learning_rate": 9.974912779727284e-06, "loss": 0.6655, "step": 5099 }, { "epoch": 0.06097633878932077, "grad_norm": 5.882547855377197, "learning_rate": 9.974893404884326e-06, "loss": 0.7496, "step": 5100 }, { "epoch": 0.060988294934181425, "grad_norm": 2.444798707962036, "learning_rate": 9.9748740225815e-06, "loss": 0.6101, "step": 5101 }, { "epoch": 0.06100025107904208, "grad_norm": 2.3375980854034424, "learning_rate": 9.974854632818834e-06, "loss": 0.6012, "step": 5102 }, { "epoch": 0.06101220722390272, "grad_norm": 2.6725990772247314, "learning_rate": 9.974835235596353e-06, "loss": 0.6731, "step": 5103 }, { "epoch": 0.06102416336876337, "grad_norm": 2.6909122467041016, "learning_rate": 9.97481583091409e-06, "loss": 0.6768, "step": 5104 }, { "epoch": 0.061036119513624025, "grad_norm": 2.147185802459717, "learning_rate": 9.974796418772073e-06, "loss": 0.6443, "step": 5105 }, { "epoch": 0.06104807565848468, "grad_norm": 2.716573476791382, "learning_rate": 9.974776999170333e-06, "loss": 0.6114, "step": 5106 }, { "epoch": 0.06106003180334533, "grad_norm": 5.767584800720215, "learning_rate": 9.974757572108896e-06, "loss": 0.6087, "step": 5107 }, { "epoch": 0.06107198794820598, "grad_norm": 2.123178482055664, "learning_rate": 9.97473813758779e-06, "loss": 0.6277, "step": 5108 }, { "epoch": 0.06108394409306663, "grad_norm": 1.8285002708435059, "learning_rate": 9.974718695607048e-06, "loss": 0.5622, "step": 5109 }, { "epoch": 0.061095900237927284, "grad_norm": 2.355424642562866, "learning_rate": 9.974699246166697e-06, "loss": 0.7264, "step": 5110 }, { "epoch": 0.061107856382787935, "grad_norm": 2.745872974395752, "learning_rate": 9.97467978926677e-06, "loss": 0.5867, "step": 5111 }, { "epoch": 0.06111981252764859, "grad_norm": 3.160128593444824, "learning_rate": 9.97466032490729e-06, "loss": 0.6061, "step": 5112 }, { "epoch": 0.06113176867250924, "grad_norm": 3.2545793056488037, "learning_rate": 9.974640853088288e-06, "loss": 0.6706, "step": 5113 }, { "epoch": 0.06114372481736989, "grad_norm": 1.9540327787399292, "learning_rate": 9.974621373809797e-06, "loss": 0.6535, "step": 5114 }, { "epoch": 0.061155680962230535, "grad_norm": 2.5194754600524902, "learning_rate": 9.974601887071843e-06, "loss": 0.6436, "step": 5115 }, { "epoch": 0.06116763710709119, "grad_norm": 3.7720694541931152, "learning_rate": 9.974582392874455e-06, "loss": 0.6102, "step": 5116 }, { "epoch": 0.06117959325195184, "grad_norm": 1.6237266063690186, "learning_rate": 9.974562891217666e-06, "loss": 0.5652, "step": 5117 }, { "epoch": 0.06119154939681249, "grad_norm": 1.9549293518066406, "learning_rate": 9.974543382101497e-06, "loss": 0.5345, "step": 5118 }, { "epoch": 0.06120350554167314, "grad_norm": 3.5446701049804688, "learning_rate": 9.974523865525988e-06, "loss": 0.6112, "step": 5119 }, { "epoch": 0.061215461686533794, "grad_norm": 3.744356393814087, "learning_rate": 9.974504341491158e-06, "loss": 0.5454, "step": 5120 }, { "epoch": 0.061227417831394446, "grad_norm": 1.8367564678192139, "learning_rate": 9.974484809997044e-06, "loss": 0.5539, "step": 5121 }, { "epoch": 0.0612393739762551, "grad_norm": 3.425933361053467, "learning_rate": 9.974465271043672e-06, "loss": 0.6222, "step": 5122 }, { "epoch": 0.06125133012111575, "grad_norm": 4.354777812957764, "learning_rate": 9.97444572463107e-06, "loss": 0.5464, "step": 5123 }, { "epoch": 0.0612632862659764, "grad_norm": 2.149897336959839, "learning_rate": 9.97442617075927e-06, "loss": 0.6945, "step": 5124 }, { "epoch": 0.06127524241083705, "grad_norm": 2.7963850498199463, "learning_rate": 9.9744066094283e-06, "loss": 0.6361, "step": 5125 }, { "epoch": 0.061287198555697704, "grad_norm": 14.449825286865234, "learning_rate": 9.974387040638191e-06, "loss": 0.6611, "step": 5126 }, { "epoch": 0.06129915470055835, "grad_norm": 2.2051916122436523, "learning_rate": 9.97436746438897e-06, "loss": 0.5605, "step": 5127 }, { "epoch": 0.061311110845419, "grad_norm": 3.8418726921081543, "learning_rate": 9.974347880680666e-06, "loss": 0.6091, "step": 5128 }, { "epoch": 0.06132306699027965, "grad_norm": 3.433102607727051, "learning_rate": 9.97432828951331e-06, "loss": 0.6441, "step": 5129 }, { "epoch": 0.061335023135140304, "grad_norm": 1.7201268672943115, "learning_rate": 9.974308690886934e-06, "loss": 0.6211, "step": 5130 }, { "epoch": 0.061346979280000956, "grad_norm": 2.734130382537842, "learning_rate": 9.974289084801563e-06, "loss": 0.5426, "step": 5131 }, { "epoch": 0.06135893542486161, "grad_norm": 2.0214736461639404, "learning_rate": 9.974269471257226e-06, "loss": 0.7104, "step": 5132 }, { "epoch": 0.06137089156972226, "grad_norm": 3.3398873805999756, "learning_rate": 9.974249850253957e-06, "loss": 0.6559, "step": 5133 }, { "epoch": 0.06138284771458291, "grad_norm": 3.8533987998962402, "learning_rate": 9.97423022179178e-06, "loss": 0.5071, "step": 5134 }, { "epoch": 0.06139480385944356, "grad_norm": 11.211297988891602, "learning_rate": 9.974210585870727e-06, "loss": 0.6821, "step": 5135 }, { "epoch": 0.061406760004304214, "grad_norm": 1.9946213960647583, "learning_rate": 9.97419094249083e-06, "loss": 0.6652, "step": 5136 }, { "epoch": 0.061418716149164866, "grad_norm": 1.8256951570510864, "learning_rate": 9.974171291652115e-06, "loss": 0.6223, "step": 5137 }, { "epoch": 0.06143067229402552, "grad_norm": 2.647596836090088, "learning_rate": 9.974151633354612e-06, "loss": 0.5889, "step": 5138 }, { "epoch": 0.06144262843888616, "grad_norm": 5.8188347816467285, "learning_rate": 9.974131967598351e-06, "loss": 0.653, "step": 5139 }, { "epoch": 0.061454584583746814, "grad_norm": 4.895063400268555, "learning_rate": 9.974112294383363e-06, "loss": 0.6713, "step": 5140 }, { "epoch": 0.061466540728607466, "grad_norm": 3.4063172340393066, "learning_rate": 9.974092613709673e-06, "loss": 0.5232, "step": 5141 }, { "epoch": 0.06147849687346812, "grad_norm": 3.6757888793945312, "learning_rate": 9.974072925577315e-06, "loss": 0.6149, "step": 5142 }, { "epoch": 0.06149045301832877, "grad_norm": 1.9899687767028809, "learning_rate": 9.974053229986318e-06, "loss": 0.6715, "step": 5143 }, { "epoch": 0.06150240916318942, "grad_norm": 2.8816885948181152, "learning_rate": 9.974033526936708e-06, "loss": 0.6088, "step": 5144 }, { "epoch": 0.06151436530805007, "grad_norm": 3.0365350246429443, "learning_rate": 9.974013816428519e-06, "loss": 0.5951, "step": 5145 }, { "epoch": 0.061526321452910725, "grad_norm": 2.580484628677368, "learning_rate": 9.97399409846178e-06, "loss": 0.6918, "step": 5146 }, { "epoch": 0.061538277597771376, "grad_norm": 4.536003112792969, "learning_rate": 9.973974373036515e-06, "loss": 0.5088, "step": 5147 }, { "epoch": 0.06155023374263203, "grad_norm": 2.0268325805664062, "learning_rate": 9.97395464015276e-06, "loss": 0.6171, "step": 5148 }, { "epoch": 0.06156218988749268, "grad_norm": 3.015756130218506, "learning_rate": 9.973934899810543e-06, "loss": 0.6617, "step": 5149 }, { "epoch": 0.061574146032353325, "grad_norm": 9.895890235900879, "learning_rate": 9.973915152009892e-06, "loss": 0.7217, "step": 5150 }, { "epoch": 0.061586102177213976, "grad_norm": 1.9308273792266846, "learning_rate": 9.973895396750838e-06, "loss": 0.6949, "step": 5151 }, { "epoch": 0.06159805832207463, "grad_norm": 2.7919561862945557, "learning_rate": 9.97387563403341e-06, "loss": 0.6502, "step": 5152 }, { "epoch": 0.06161001446693528, "grad_norm": 3.0165011882781982, "learning_rate": 9.973855863857637e-06, "loss": 0.6664, "step": 5153 }, { "epoch": 0.06162197061179593, "grad_norm": 1.898950219154358, "learning_rate": 9.973836086223552e-06, "loss": 0.6885, "step": 5154 }, { "epoch": 0.06163392675665658, "grad_norm": 3.5880045890808105, "learning_rate": 9.97381630113118e-06, "loss": 0.6354, "step": 5155 }, { "epoch": 0.061645882901517235, "grad_norm": 2.3300538063049316, "learning_rate": 9.973796508580554e-06, "loss": 0.6204, "step": 5156 }, { "epoch": 0.06165783904637789, "grad_norm": 5.59406852722168, "learning_rate": 9.9737767085717e-06, "loss": 0.6058, "step": 5157 }, { "epoch": 0.06166979519123854, "grad_norm": 2.5043797492980957, "learning_rate": 9.973756901104651e-06, "loss": 0.5927, "step": 5158 }, { "epoch": 0.06168175133609919, "grad_norm": 1.4190806150436401, "learning_rate": 9.973737086179438e-06, "loss": 0.6032, "step": 5159 }, { "epoch": 0.06169370748095984, "grad_norm": 8.611151695251465, "learning_rate": 9.973717263796088e-06, "loss": 0.7597, "step": 5160 }, { "epoch": 0.061705663625820494, "grad_norm": 2.4353840351104736, "learning_rate": 9.97369743395463e-06, "loss": 0.6489, "step": 5161 }, { "epoch": 0.06171761977068114, "grad_norm": 3.1014721393585205, "learning_rate": 9.973677596655096e-06, "loss": 0.6696, "step": 5162 }, { "epoch": 0.06172957591554179, "grad_norm": 2.7837893962860107, "learning_rate": 9.973657751897515e-06, "loss": 0.6833, "step": 5163 }, { "epoch": 0.06174153206040244, "grad_norm": 4.356460094451904, "learning_rate": 9.973637899681913e-06, "loss": 0.7095, "step": 5164 }, { "epoch": 0.06175348820526309, "grad_norm": 7.293306827545166, "learning_rate": 9.973618040008327e-06, "loss": 0.5821, "step": 5165 }, { "epoch": 0.061765444350123745, "grad_norm": 4.427361011505127, "learning_rate": 9.973598172876782e-06, "loss": 0.5874, "step": 5166 }, { "epoch": 0.0617774004949844, "grad_norm": 2.3548521995544434, "learning_rate": 9.97357829828731e-06, "loss": 0.6386, "step": 5167 }, { "epoch": 0.06178935663984505, "grad_norm": 3.2516112327575684, "learning_rate": 9.973558416239938e-06, "loss": 0.6797, "step": 5168 }, { "epoch": 0.0618013127847057, "grad_norm": 3.674880266189575, "learning_rate": 9.973538526734698e-06, "loss": 0.603, "step": 5169 }, { "epoch": 0.06181326892956635, "grad_norm": 4.8859381675720215, "learning_rate": 9.97351862977162e-06, "loss": 0.6961, "step": 5170 }, { "epoch": 0.061825225074427004, "grad_norm": 9.896313667297363, "learning_rate": 9.973498725350732e-06, "loss": 0.6244, "step": 5171 }, { "epoch": 0.061837181219287655, "grad_norm": 1.782745122909546, "learning_rate": 9.973478813472065e-06, "loss": 0.6381, "step": 5172 }, { "epoch": 0.06184913736414831, "grad_norm": 2.0587329864501953, "learning_rate": 9.97345889413565e-06, "loss": 0.7238, "step": 5173 }, { "epoch": 0.06186109350900895, "grad_norm": 1.7003865242004395, "learning_rate": 9.973438967341515e-06, "loss": 0.5896, "step": 5174 }, { "epoch": 0.061873049653869604, "grad_norm": 1.8177587985992432, "learning_rate": 9.973419033089691e-06, "loss": 0.6513, "step": 5175 }, { "epoch": 0.061885005798730255, "grad_norm": 4.57074499130249, "learning_rate": 9.973399091380207e-06, "loss": 0.6353, "step": 5176 }, { "epoch": 0.06189696194359091, "grad_norm": 2.021440267562866, "learning_rate": 9.973379142213094e-06, "loss": 0.6813, "step": 5177 }, { "epoch": 0.06190891808845156, "grad_norm": 8.39668083190918, "learning_rate": 9.973359185588381e-06, "loss": 0.6405, "step": 5178 }, { "epoch": 0.06192087423331221, "grad_norm": 2.789412021636963, "learning_rate": 9.973339221506097e-06, "loss": 0.592, "step": 5179 }, { "epoch": 0.06193283037817286, "grad_norm": 2.95936918258667, "learning_rate": 9.973319249966275e-06, "loss": 0.7227, "step": 5180 }, { "epoch": 0.061944786523033514, "grad_norm": 4.078803062438965, "learning_rate": 9.973299270968942e-06, "loss": 0.6373, "step": 5181 }, { "epoch": 0.061956742667894166, "grad_norm": 3.0676920413970947, "learning_rate": 9.973279284514131e-06, "loss": 0.5941, "step": 5182 }, { "epoch": 0.06196869881275482, "grad_norm": 2.195561408996582, "learning_rate": 9.973259290601867e-06, "loss": 0.6719, "step": 5183 }, { "epoch": 0.06198065495761547, "grad_norm": 4.09055233001709, "learning_rate": 9.973239289232185e-06, "loss": 0.5835, "step": 5184 }, { "epoch": 0.06199261110247612, "grad_norm": 4.675578594207764, "learning_rate": 9.973219280405112e-06, "loss": 0.6838, "step": 5185 }, { "epoch": 0.062004567247336766, "grad_norm": 2.8352930545806885, "learning_rate": 9.973199264120679e-06, "loss": 0.6155, "step": 5186 }, { "epoch": 0.06201652339219742, "grad_norm": 1.782660961151123, "learning_rate": 9.973179240378917e-06, "loss": 0.767, "step": 5187 }, { "epoch": 0.06202847953705807, "grad_norm": 2.6197915077209473, "learning_rate": 9.973159209179854e-06, "loss": 0.5666, "step": 5188 }, { "epoch": 0.06204043568191872, "grad_norm": 2.1814534664154053, "learning_rate": 9.973139170523522e-06, "loss": 0.6231, "step": 5189 }, { "epoch": 0.06205239182677937, "grad_norm": 9.502202033996582, "learning_rate": 9.97311912440995e-06, "loss": 0.7141, "step": 5190 }, { "epoch": 0.062064347971640024, "grad_norm": 2.82973313331604, "learning_rate": 9.973099070839168e-06, "loss": 0.6745, "step": 5191 }, { "epoch": 0.062076304116500676, "grad_norm": 2.3788235187530518, "learning_rate": 9.973079009811207e-06, "loss": 0.6582, "step": 5192 }, { "epoch": 0.06208826026136133, "grad_norm": 3.4869320392608643, "learning_rate": 9.973058941326094e-06, "loss": 0.707, "step": 5193 }, { "epoch": 0.06210021640622198, "grad_norm": 3.6468286514282227, "learning_rate": 9.973038865383863e-06, "loss": 0.6577, "step": 5194 }, { "epoch": 0.06211217255108263, "grad_norm": 15.65117073059082, "learning_rate": 9.973018781984544e-06, "loss": 0.8792, "step": 5195 }, { "epoch": 0.06212412869594328, "grad_norm": 5.937126159667969, "learning_rate": 9.972998691128163e-06, "loss": 0.665, "step": 5196 }, { "epoch": 0.062136084840803935, "grad_norm": 2.5050244331359863, "learning_rate": 9.972978592814755e-06, "loss": 0.6475, "step": 5197 }, { "epoch": 0.06214804098566458, "grad_norm": 2.038761854171753, "learning_rate": 9.972958487044345e-06, "loss": 0.5702, "step": 5198 }, { "epoch": 0.06215999713052523, "grad_norm": 2.1866116523742676, "learning_rate": 9.97293837381697e-06, "loss": 0.6223, "step": 5199 }, { "epoch": 0.06217195327538588, "grad_norm": 5.265787124633789, "learning_rate": 9.972918253132654e-06, "loss": 0.594, "step": 5200 }, { "epoch": 0.062183909420246535, "grad_norm": 2.3112633228302, "learning_rate": 9.972898124991431e-06, "loss": 0.6674, "step": 5201 }, { "epoch": 0.062195865565107186, "grad_norm": 7.042965888977051, "learning_rate": 9.972877989393327e-06, "loss": 0.5596, "step": 5202 }, { "epoch": 0.06220782170996784, "grad_norm": 6.169881820678711, "learning_rate": 9.972857846338375e-06, "loss": 0.6166, "step": 5203 }, { "epoch": 0.06221977785482849, "grad_norm": 2.537236213684082, "learning_rate": 9.972837695826607e-06, "loss": 0.6653, "step": 5204 }, { "epoch": 0.06223173399968914, "grad_norm": 3.4024744033813477, "learning_rate": 9.972817537858051e-06, "loss": 0.6061, "step": 5205 }, { "epoch": 0.06224369014454979, "grad_norm": 2.054271936416626, "learning_rate": 9.972797372432737e-06, "loss": 0.6344, "step": 5206 }, { "epoch": 0.062255646289410445, "grad_norm": 3.220165252685547, "learning_rate": 9.972777199550694e-06, "loss": 0.5549, "step": 5207 }, { "epoch": 0.0622676024342711, "grad_norm": 2.4941065311431885, "learning_rate": 9.972757019211955e-06, "loss": 0.6411, "step": 5208 }, { "epoch": 0.06227955857913174, "grad_norm": 1.6809163093566895, "learning_rate": 9.97273683141655e-06, "loss": 0.5084, "step": 5209 }, { "epoch": 0.06229151472399239, "grad_norm": 3.21966552734375, "learning_rate": 9.972716636164507e-06, "loss": 0.7341, "step": 5210 }, { "epoch": 0.062303470868853045, "grad_norm": 3.208524465560913, "learning_rate": 9.972696433455859e-06, "loss": 0.6284, "step": 5211 }, { "epoch": 0.062315427013713696, "grad_norm": 3.421278715133667, "learning_rate": 9.972676223290636e-06, "loss": 0.5873, "step": 5212 }, { "epoch": 0.06232738315857435, "grad_norm": 5.311954975128174, "learning_rate": 9.972656005668864e-06, "loss": 0.5357, "step": 5213 }, { "epoch": 0.062339339303435, "grad_norm": 2.163269281387329, "learning_rate": 9.972635780590579e-06, "loss": 0.5901, "step": 5214 }, { "epoch": 0.06235129544829565, "grad_norm": 3.1578121185302734, "learning_rate": 9.972615548055809e-06, "loss": 0.6122, "step": 5215 }, { "epoch": 0.0623632515931563, "grad_norm": 2.277895212173462, "learning_rate": 9.972595308064583e-06, "loss": 0.5823, "step": 5216 }, { "epoch": 0.062375207738016955, "grad_norm": 1.5294510126113892, "learning_rate": 9.972575060616934e-06, "loss": 0.6067, "step": 5217 }, { "epoch": 0.06238716388287761, "grad_norm": 2.9829156398773193, "learning_rate": 9.97255480571289e-06, "loss": 0.5956, "step": 5218 }, { "epoch": 0.06239912002773826, "grad_norm": 4.356801986694336, "learning_rate": 9.972534543352484e-06, "loss": 0.7423, "step": 5219 }, { "epoch": 0.06241107617259891, "grad_norm": 1.677293300628662, "learning_rate": 9.972514273535743e-06, "loss": 0.6554, "step": 5220 }, { "epoch": 0.062423032317459555, "grad_norm": 5.724884986877441, "learning_rate": 9.9724939962627e-06, "loss": 0.5904, "step": 5221 }, { "epoch": 0.06243498846232021, "grad_norm": 2.908208131790161, "learning_rate": 9.972473711533384e-06, "loss": 0.6833, "step": 5222 }, { "epoch": 0.06244694460718086, "grad_norm": 3.473623752593994, "learning_rate": 9.972453419347828e-06, "loss": 0.6246, "step": 5223 }, { "epoch": 0.06245890075204151, "grad_norm": 2.0380747318267822, "learning_rate": 9.972433119706057e-06, "loss": 0.5985, "step": 5224 }, { "epoch": 0.06247085689690216, "grad_norm": 2.1550605297088623, "learning_rate": 9.972412812608108e-06, "loss": 0.6748, "step": 5225 }, { "epoch": 0.062482813041762814, "grad_norm": 2.3263041973114014, "learning_rate": 9.972392498054007e-06, "loss": 0.7262, "step": 5226 }, { "epoch": 0.062494769186623465, "grad_norm": 2.2673656940460205, "learning_rate": 9.972372176043785e-06, "loss": 0.6652, "step": 5227 }, { "epoch": 0.06250672533148412, "grad_norm": 3.187966823577881, "learning_rate": 9.972351846577474e-06, "loss": 0.65, "step": 5228 }, { "epoch": 0.06251868147634476, "grad_norm": 2.2317376136779785, "learning_rate": 9.972331509655104e-06, "loss": 0.6846, "step": 5229 }, { "epoch": 0.06253063762120542, "grad_norm": 2.4270501136779785, "learning_rate": 9.972311165276705e-06, "loss": 0.6848, "step": 5230 }, { "epoch": 0.06254259376606607, "grad_norm": 2.0199131965637207, "learning_rate": 9.972290813442308e-06, "loss": 0.6135, "step": 5231 }, { "epoch": 0.06255454991092672, "grad_norm": 2.574392318725586, "learning_rate": 9.972270454151942e-06, "loss": 0.5707, "step": 5232 }, { "epoch": 0.06256650605578737, "grad_norm": 5.2907819747924805, "learning_rate": 9.97225008740564e-06, "loss": 0.6828, "step": 5233 }, { "epoch": 0.06257846220064803, "grad_norm": 2.276475191116333, "learning_rate": 9.972229713203432e-06, "loss": 0.5476, "step": 5234 }, { "epoch": 0.06259041834550867, "grad_norm": 15.184964179992676, "learning_rate": 9.972209331545345e-06, "loss": 0.6758, "step": 5235 }, { "epoch": 0.06260237449036933, "grad_norm": 5.994210720062256, "learning_rate": 9.972188942431415e-06, "loss": 0.6278, "step": 5236 }, { "epoch": 0.06261433063522998, "grad_norm": 2.8061838150024414, "learning_rate": 9.972168545861672e-06, "loss": 0.6645, "step": 5237 }, { "epoch": 0.06262628678009063, "grad_norm": 2.431769371032715, "learning_rate": 9.97214814183614e-06, "loss": 0.5905, "step": 5238 }, { "epoch": 0.06263824292495128, "grad_norm": 2.6749682426452637, "learning_rate": 9.972127730354856e-06, "loss": 0.6124, "step": 5239 }, { "epoch": 0.06265019906981192, "grad_norm": 1.5583562850952148, "learning_rate": 9.97210731141785e-06, "loss": 0.5399, "step": 5240 }, { "epoch": 0.06266215521467258, "grad_norm": 2.3257572650909424, "learning_rate": 9.972086885025149e-06, "loss": 0.6153, "step": 5241 }, { "epoch": 0.06267411135953323, "grad_norm": 2.4485597610473633, "learning_rate": 9.972066451176788e-06, "loss": 0.6511, "step": 5242 }, { "epoch": 0.06268606750439389, "grad_norm": 2.49698805809021, "learning_rate": 9.972046009872795e-06, "loss": 0.6694, "step": 5243 }, { "epoch": 0.06269802364925453, "grad_norm": 1.5669610500335693, "learning_rate": 9.972025561113202e-06, "loss": 0.6129, "step": 5244 }, { "epoch": 0.06270997979411519, "grad_norm": 2.970259666442871, "learning_rate": 9.97200510489804e-06, "loss": 0.6034, "step": 5245 }, { "epoch": 0.06272193593897583, "grad_norm": 2.4747166633605957, "learning_rate": 9.971984641227336e-06, "loss": 0.7531, "step": 5246 }, { "epoch": 0.06273389208383649, "grad_norm": 3.8257100582122803, "learning_rate": 9.971964170101125e-06, "loss": 0.655, "step": 5247 }, { "epoch": 0.06274584822869714, "grad_norm": 2.005650758743286, "learning_rate": 9.971943691519434e-06, "loss": 0.6127, "step": 5248 }, { "epoch": 0.0627578043735578, "grad_norm": 2.306084156036377, "learning_rate": 9.971923205482298e-06, "loss": 0.5891, "step": 5249 }, { "epoch": 0.06276976051841844, "grad_norm": 2.7476725578308105, "learning_rate": 9.971902711989745e-06, "loss": 0.6491, "step": 5250 }, { "epoch": 0.06278171666327909, "grad_norm": 2.8784897327423096, "learning_rate": 9.971882211041805e-06, "loss": 0.5766, "step": 5251 }, { "epoch": 0.06279367280813974, "grad_norm": 3.648928165435791, "learning_rate": 9.971861702638512e-06, "loss": 0.6928, "step": 5252 }, { "epoch": 0.06280562895300039, "grad_norm": 2.5164427757263184, "learning_rate": 9.971841186779893e-06, "loss": 0.689, "step": 5253 }, { "epoch": 0.06281758509786105, "grad_norm": 2.3002004623413086, "learning_rate": 9.97182066346598e-06, "loss": 0.7322, "step": 5254 }, { "epoch": 0.06282954124272169, "grad_norm": 3.0168399810791016, "learning_rate": 9.971800132696807e-06, "loss": 0.7214, "step": 5255 }, { "epoch": 0.06284149738758235, "grad_norm": 4.270264148712158, "learning_rate": 9.971779594472399e-06, "loss": 0.6562, "step": 5256 }, { "epoch": 0.062853453532443, "grad_norm": 2.196612596511841, "learning_rate": 9.971759048792794e-06, "loss": 0.6393, "step": 5257 }, { "epoch": 0.06286540967730365, "grad_norm": 2.6249165534973145, "learning_rate": 9.971738495658014e-06, "loss": 0.5739, "step": 5258 }, { "epoch": 0.0628773658221643, "grad_norm": 2.6663448810577393, "learning_rate": 9.971717935068096e-06, "loss": 0.5902, "step": 5259 }, { "epoch": 0.06288932196702496, "grad_norm": 1.8048628568649292, "learning_rate": 9.97169736702307e-06, "loss": 0.5762, "step": 5260 }, { "epoch": 0.0629012781118856, "grad_norm": 2.400952100753784, "learning_rate": 9.971676791522966e-06, "loss": 0.7418, "step": 5261 }, { "epoch": 0.06291323425674626, "grad_norm": 3.600595235824585, "learning_rate": 9.971656208567815e-06, "loss": 0.6563, "step": 5262 }, { "epoch": 0.0629251904016069, "grad_norm": 2.9563136100769043, "learning_rate": 9.971635618157647e-06, "loss": 0.6438, "step": 5263 }, { "epoch": 0.06293714654646755, "grad_norm": 2.0475170612335205, "learning_rate": 9.971615020292494e-06, "loss": 0.6856, "step": 5264 }, { "epoch": 0.06294910269132821, "grad_norm": 3.228921890258789, "learning_rate": 9.971594414972388e-06, "loss": 0.5299, "step": 5265 }, { "epoch": 0.06296105883618885, "grad_norm": 3.2320716381073, "learning_rate": 9.971573802197356e-06, "loss": 0.6666, "step": 5266 }, { "epoch": 0.06297301498104951, "grad_norm": 4.302029132843018, "learning_rate": 9.971553181967433e-06, "loss": 0.7074, "step": 5267 }, { "epoch": 0.06298497112591016, "grad_norm": 13.219015121459961, "learning_rate": 9.971532554282648e-06, "loss": 0.5852, "step": 5268 }, { "epoch": 0.06299692727077082, "grad_norm": 2.248190402984619, "learning_rate": 9.971511919143033e-06, "loss": 0.6538, "step": 5269 }, { "epoch": 0.06300888341563146, "grad_norm": 2.669259548187256, "learning_rate": 9.971491276548617e-06, "loss": 0.6317, "step": 5270 }, { "epoch": 0.06302083956049212, "grad_norm": 7.354964256286621, "learning_rate": 9.971470626499433e-06, "loss": 0.5596, "step": 5271 }, { "epoch": 0.06303279570535276, "grad_norm": 1.3890146017074585, "learning_rate": 9.97144996899551e-06, "loss": 0.5124, "step": 5272 }, { "epoch": 0.06304475185021342, "grad_norm": 2.0539116859436035, "learning_rate": 9.971429304036881e-06, "loss": 0.6518, "step": 5273 }, { "epoch": 0.06305670799507407, "grad_norm": 2.668933868408203, "learning_rate": 9.971408631623578e-06, "loss": 0.651, "step": 5274 }, { "epoch": 0.06306866413993471, "grad_norm": 4.307901382446289, "learning_rate": 9.971387951755627e-06, "loss": 0.6095, "step": 5275 }, { "epoch": 0.06308062028479537, "grad_norm": 2.743488311767578, "learning_rate": 9.971367264433063e-06, "loss": 0.6695, "step": 5276 }, { "epoch": 0.06309257642965602, "grad_norm": 9.700882911682129, "learning_rate": 9.971346569655916e-06, "loss": 0.6425, "step": 5277 }, { "epoch": 0.06310453257451668, "grad_norm": 2.465775489807129, "learning_rate": 9.971325867424217e-06, "loss": 0.59, "step": 5278 }, { "epoch": 0.06311648871937732, "grad_norm": 1.6862733364105225, "learning_rate": 9.971305157737998e-06, "loss": 0.6815, "step": 5279 }, { "epoch": 0.06312844486423798, "grad_norm": 2.76006817817688, "learning_rate": 9.97128444059729e-06, "loss": 0.6385, "step": 5280 }, { "epoch": 0.06314040100909862, "grad_norm": 2.0285325050354004, "learning_rate": 9.971263716002122e-06, "loss": 0.6469, "step": 5281 }, { "epoch": 0.06315235715395928, "grad_norm": 2.3700318336486816, "learning_rate": 9.971242983952527e-06, "loss": 0.6328, "step": 5282 }, { "epoch": 0.06316431329881993, "grad_norm": 3.8457884788513184, "learning_rate": 9.971222244448535e-06, "loss": 0.7249, "step": 5283 }, { "epoch": 0.06317626944368059, "grad_norm": 4.627010822296143, "learning_rate": 9.971201497490176e-06, "loss": 0.7219, "step": 5284 }, { "epoch": 0.06318822558854123, "grad_norm": 1.528124451637268, "learning_rate": 9.971180743077485e-06, "loss": 0.6865, "step": 5285 }, { "epoch": 0.06320018173340188, "grad_norm": 2.084505081176758, "learning_rate": 9.971159981210491e-06, "loss": 0.6738, "step": 5286 }, { "epoch": 0.06321213787826253, "grad_norm": 12.170857429504395, "learning_rate": 9.971139211889224e-06, "loss": 0.5981, "step": 5287 }, { "epoch": 0.06322409402312318, "grad_norm": 1.907710075378418, "learning_rate": 9.971118435113716e-06, "loss": 0.6538, "step": 5288 }, { "epoch": 0.06323605016798384, "grad_norm": 2.1493642330169678, "learning_rate": 9.971097650883998e-06, "loss": 0.7712, "step": 5289 }, { "epoch": 0.06324800631284448, "grad_norm": 1.6275808811187744, "learning_rate": 9.971076859200102e-06, "loss": 0.506, "step": 5290 }, { "epoch": 0.06325996245770514, "grad_norm": 2.8630025386810303, "learning_rate": 9.971056060062059e-06, "loss": 0.7004, "step": 5291 }, { "epoch": 0.06327191860256579, "grad_norm": 4.033011436462402, "learning_rate": 9.971035253469899e-06, "loss": 0.616, "step": 5292 }, { "epoch": 0.06328387474742644, "grad_norm": 3.7171123027801514, "learning_rate": 9.971014439423654e-06, "loss": 0.5775, "step": 5293 }, { "epoch": 0.06329583089228709, "grad_norm": 4.4597978591918945, "learning_rate": 9.970993617923355e-06, "loss": 0.6565, "step": 5294 }, { "epoch": 0.06330778703714775, "grad_norm": 5.735994815826416, "learning_rate": 9.970972788969034e-06, "loss": 0.657, "step": 5295 }, { "epoch": 0.06331974318200839, "grad_norm": 2.2632217407226562, "learning_rate": 9.97095195256072e-06, "loss": 0.6541, "step": 5296 }, { "epoch": 0.06333169932686905, "grad_norm": 2.256002426147461, "learning_rate": 9.970931108698448e-06, "loss": 0.6625, "step": 5297 }, { "epoch": 0.0633436554717297, "grad_norm": 2.439171552658081, "learning_rate": 9.970910257382247e-06, "loss": 0.6081, "step": 5298 }, { "epoch": 0.06335561161659034, "grad_norm": 3.1741371154785156, "learning_rate": 9.970889398612148e-06, "loss": 0.6047, "step": 5299 }, { "epoch": 0.063367567761451, "grad_norm": 2.3222815990448, "learning_rate": 9.970868532388182e-06, "loss": 0.6417, "step": 5300 }, { "epoch": 0.06337952390631164, "grad_norm": 3.255866289138794, "learning_rate": 9.97084765871038e-06, "loss": 0.5431, "step": 5301 }, { "epoch": 0.0633914800511723, "grad_norm": 2.2877564430236816, "learning_rate": 9.970826777578777e-06, "loss": 0.6497, "step": 5302 }, { "epoch": 0.06340343619603295, "grad_norm": 4.8455705642700195, "learning_rate": 9.9708058889934e-06, "loss": 0.5936, "step": 5303 }, { "epoch": 0.0634153923408936, "grad_norm": 1.855973482131958, "learning_rate": 9.970784992954283e-06, "loss": 0.6658, "step": 5304 }, { "epoch": 0.06342734848575425, "grad_norm": 1.7213176488876343, "learning_rate": 9.970764089461456e-06, "loss": 0.6378, "step": 5305 }, { "epoch": 0.06343930463061491, "grad_norm": 3.4242122173309326, "learning_rate": 9.970743178514948e-06, "loss": 0.6546, "step": 5306 }, { "epoch": 0.06345126077547555, "grad_norm": 1.8980131149291992, "learning_rate": 9.970722260114795e-06, "loss": 0.5925, "step": 5307 }, { "epoch": 0.06346321692033621, "grad_norm": 3.66754150390625, "learning_rate": 9.970701334261028e-06, "loss": 0.6637, "step": 5308 }, { "epoch": 0.06347517306519686, "grad_norm": 1.6263712644577026, "learning_rate": 9.970680400953674e-06, "loss": 0.5206, "step": 5309 }, { "epoch": 0.0634871292100575, "grad_norm": 2.434964656829834, "learning_rate": 9.970659460192767e-06, "loss": 0.6046, "step": 5310 }, { "epoch": 0.06349908535491816, "grad_norm": 11.529838562011719, "learning_rate": 9.97063851197834e-06, "loss": 0.6243, "step": 5311 }, { "epoch": 0.0635110414997788, "grad_norm": 2.50602126121521, "learning_rate": 9.970617556310422e-06, "loss": 0.6289, "step": 5312 }, { "epoch": 0.06352299764463946, "grad_norm": 2.128141164779663, "learning_rate": 9.970596593189046e-06, "loss": 0.6197, "step": 5313 }, { "epoch": 0.06353495378950011, "grad_norm": 2.8636980056762695, "learning_rate": 9.970575622614242e-06, "loss": 0.6583, "step": 5314 }, { "epoch": 0.06354690993436077, "grad_norm": 6.638113021850586, "learning_rate": 9.970554644586044e-06, "loss": 0.5617, "step": 5315 }, { "epoch": 0.06355886607922141, "grad_norm": 3.1139724254608154, "learning_rate": 9.97053365910448e-06, "loss": 0.6889, "step": 5316 }, { "epoch": 0.06357082222408207, "grad_norm": 1.5934860706329346, "learning_rate": 9.970512666169583e-06, "loss": 0.5176, "step": 5317 }, { "epoch": 0.06358277836894272, "grad_norm": 2.08504581451416, "learning_rate": 9.970491665781384e-06, "loss": 0.686, "step": 5318 }, { "epoch": 0.06359473451380337, "grad_norm": 1.8271112442016602, "learning_rate": 9.970470657939916e-06, "loss": 0.6563, "step": 5319 }, { "epoch": 0.06360669065866402, "grad_norm": 2.218472719192505, "learning_rate": 9.970449642645211e-06, "loss": 0.6219, "step": 5320 }, { "epoch": 0.06361864680352468, "grad_norm": 1.7943004369735718, "learning_rate": 9.970428619897297e-06, "loss": 0.6432, "step": 5321 }, { "epoch": 0.06363060294838532, "grad_norm": 2.4304258823394775, "learning_rate": 9.970407589696207e-06, "loss": 0.6092, "step": 5322 }, { "epoch": 0.06364255909324597, "grad_norm": 3.773233652114868, "learning_rate": 9.970386552041975e-06, "loss": 0.5997, "step": 5323 }, { "epoch": 0.06365451523810663, "grad_norm": 2.786588668823242, "learning_rate": 9.97036550693463e-06, "loss": 0.5667, "step": 5324 }, { "epoch": 0.06366647138296727, "grad_norm": 2.0833489894866943, "learning_rate": 9.970344454374205e-06, "loss": 0.5411, "step": 5325 }, { "epoch": 0.06367842752782793, "grad_norm": 2.495884656906128, "learning_rate": 9.97032339436073e-06, "loss": 0.61, "step": 5326 }, { "epoch": 0.06369038367268857, "grad_norm": 3.4894919395446777, "learning_rate": 9.970302326894237e-06, "loss": 0.6781, "step": 5327 }, { "epoch": 0.06370233981754923, "grad_norm": 3.258241653442383, "learning_rate": 9.970281251974758e-06, "loss": 0.6097, "step": 5328 }, { "epoch": 0.06371429596240988, "grad_norm": 2.5216639041900635, "learning_rate": 9.970260169602326e-06, "loss": 0.6183, "step": 5329 }, { "epoch": 0.06372625210727054, "grad_norm": 4.541905403137207, "learning_rate": 9.970239079776971e-06, "loss": 0.6649, "step": 5330 }, { "epoch": 0.06373820825213118, "grad_norm": 3.4771533012390137, "learning_rate": 9.970217982498723e-06, "loss": 0.6501, "step": 5331 }, { "epoch": 0.06375016439699184, "grad_norm": 8.719623565673828, "learning_rate": 9.970196877767618e-06, "loss": 0.7019, "step": 5332 }, { "epoch": 0.06376212054185249, "grad_norm": 4.731333255767822, "learning_rate": 9.970175765583684e-06, "loss": 0.5948, "step": 5333 }, { "epoch": 0.06377407668671313, "grad_norm": 2.8169846534729004, "learning_rate": 9.970154645946952e-06, "loss": 0.7141, "step": 5334 }, { "epoch": 0.06378603283157379, "grad_norm": 5.443726539611816, "learning_rate": 9.970133518857459e-06, "loss": 0.6975, "step": 5335 }, { "epoch": 0.06379798897643443, "grad_norm": 6.09377384185791, "learning_rate": 9.970112384315229e-06, "loss": 0.7161, "step": 5336 }, { "epoch": 0.06380994512129509, "grad_norm": 6.954132080078125, "learning_rate": 9.970091242320302e-06, "loss": 0.6325, "step": 5337 }, { "epoch": 0.06382190126615574, "grad_norm": 2.748914957046509, "learning_rate": 9.970070092872701e-06, "loss": 0.6271, "step": 5338 }, { "epoch": 0.0638338574110164, "grad_norm": 2.6740260124206543, "learning_rate": 9.970048935972465e-06, "loss": 0.5464, "step": 5339 }, { "epoch": 0.06384581355587704, "grad_norm": 4.565944194793701, "learning_rate": 9.970027771619624e-06, "loss": 0.5336, "step": 5340 }, { "epoch": 0.0638577697007377, "grad_norm": 3.786961317062378, "learning_rate": 9.970006599814205e-06, "loss": 0.5732, "step": 5341 }, { "epoch": 0.06386972584559834, "grad_norm": 1.6559942960739136, "learning_rate": 9.969985420556248e-06, "loss": 0.5961, "step": 5342 }, { "epoch": 0.063881681990459, "grad_norm": 2.070023536682129, "learning_rate": 9.969964233845777e-06, "loss": 0.6461, "step": 5343 }, { "epoch": 0.06389363813531965, "grad_norm": 1.9761067628860474, "learning_rate": 9.969943039682828e-06, "loss": 0.613, "step": 5344 }, { "epoch": 0.06390559428018029, "grad_norm": 3.6561458110809326, "learning_rate": 9.969921838067432e-06, "loss": 0.6578, "step": 5345 }, { "epoch": 0.06391755042504095, "grad_norm": 9.825369834899902, "learning_rate": 9.96990062899962e-06, "loss": 0.6458, "step": 5346 }, { "epoch": 0.0639295065699016, "grad_norm": 2.9570090770721436, "learning_rate": 9.969879412479424e-06, "loss": 0.6543, "step": 5347 }, { "epoch": 0.06394146271476225, "grad_norm": 2.1306939125061035, "learning_rate": 9.969858188506878e-06, "loss": 0.7049, "step": 5348 }, { "epoch": 0.0639534188596229, "grad_norm": 2.3005077838897705, "learning_rate": 9.969836957082011e-06, "loss": 0.5712, "step": 5349 }, { "epoch": 0.06396537500448356, "grad_norm": 3.5389370918273926, "learning_rate": 9.969815718204855e-06, "loss": 0.6521, "step": 5350 }, { "epoch": 0.0639773311493442, "grad_norm": 2.312328338623047, "learning_rate": 9.969794471875442e-06, "loss": 0.5848, "step": 5351 }, { "epoch": 0.06398928729420486, "grad_norm": 4.83831787109375, "learning_rate": 9.969773218093806e-06, "loss": 0.5352, "step": 5352 }, { "epoch": 0.0640012434390655, "grad_norm": 2.3958747386932373, "learning_rate": 9.969751956859978e-06, "loss": 0.664, "step": 5353 }, { "epoch": 0.06401319958392616, "grad_norm": 3.559093952178955, "learning_rate": 9.969730688173989e-06, "loss": 0.663, "step": 5354 }, { "epoch": 0.06402515572878681, "grad_norm": 5.514453411102295, "learning_rate": 9.969709412035869e-06, "loss": 0.6031, "step": 5355 }, { "epoch": 0.06403711187364747, "grad_norm": 1.598265290260315, "learning_rate": 9.969688128445654e-06, "loss": 0.6131, "step": 5356 }, { "epoch": 0.06404906801850811, "grad_norm": 7.90231466293335, "learning_rate": 9.969666837403373e-06, "loss": 0.5849, "step": 5357 }, { "epoch": 0.06406102416336876, "grad_norm": 4.264150619506836, "learning_rate": 9.969645538909061e-06, "loss": 0.6616, "step": 5358 }, { "epoch": 0.06407298030822942, "grad_norm": 1.8606680631637573, "learning_rate": 9.969624232962745e-06, "loss": 0.623, "step": 5359 }, { "epoch": 0.06408493645309006, "grad_norm": 4.604469299316406, "learning_rate": 9.969602919564462e-06, "loss": 0.7647, "step": 5360 }, { "epoch": 0.06409689259795072, "grad_norm": 3.5807278156280518, "learning_rate": 9.96958159871424e-06, "loss": 0.5846, "step": 5361 }, { "epoch": 0.06410884874281136, "grad_norm": 2.6060903072357178, "learning_rate": 9.969560270412115e-06, "loss": 0.6039, "step": 5362 }, { "epoch": 0.06412080488767202, "grad_norm": 5.340683460235596, "learning_rate": 9.969538934658113e-06, "loss": 0.6763, "step": 5363 }, { "epoch": 0.06413276103253267, "grad_norm": 3.888671636581421, "learning_rate": 9.969517591452273e-06, "loss": 0.6701, "step": 5364 }, { "epoch": 0.06414471717739333, "grad_norm": 2.889878034591675, "learning_rate": 9.969496240794624e-06, "loss": 0.6732, "step": 5365 }, { "epoch": 0.06415667332225397, "grad_norm": 11.582298278808594, "learning_rate": 9.969474882685196e-06, "loss": 0.6613, "step": 5366 }, { "epoch": 0.06416862946711463, "grad_norm": 2.5825586318969727, "learning_rate": 9.969453517124024e-06, "loss": 0.5549, "step": 5367 }, { "epoch": 0.06418058561197527, "grad_norm": 4.330772876739502, "learning_rate": 9.969432144111137e-06, "loss": 0.6844, "step": 5368 }, { "epoch": 0.06419254175683592, "grad_norm": 3.2986984252929688, "learning_rate": 9.969410763646569e-06, "loss": 0.6807, "step": 5369 }, { "epoch": 0.06420449790169658, "grad_norm": 2.913785219192505, "learning_rate": 9.969389375730354e-06, "loss": 0.676, "step": 5370 }, { "epoch": 0.06421645404655722, "grad_norm": 3.558772563934326, "learning_rate": 9.969367980362521e-06, "loss": 0.6908, "step": 5371 }, { "epoch": 0.06422841019141788, "grad_norm": 2.186279535293579, "learning_rate": 9.969346577543102e-06, "loss": 0.5858, "step": 5372 }, { "epoch": 0.06424036633627853, "grad_norm": 2.6925086975097656, "learning_rate": 9.969325167272132e-06, "loss": 0.6217, "step": 5373 }, { "epoch": 0.06425232248113918, "grad_norm": 2.7297921180725098, "learning_rate": 9.96930374954964e-06, "loss": 0.6527, "step": 5374 }, { "epoch": 0.06426427862599983, "grad_norm": 2.9101226329803467, "learning_rate": 9.969282324375659e-06, "loss": 0.6044, "step": 5375 }, { "epoch": 0.06427623477086049, "grad_norm": 6.396247386932373, "learning_rate": 9.969260891750222e-06, "loss": 0.5413, "step": 5376 }, { "epoch": 0.06428819091572113, "grad_norm": 7.791195392608643, "learning_rate": 9.969239451673361e-06, "loss": 0.6851, "step": 5377 }, { "epoch": 0.06430014706058179, "grad_norm": 5.095545768737793, "learning_rate": 9.969218004145107e-06, "loss": 0.6181, "step": 5378 }, { "epoch": 0.06431210320544244, "grad_norm": 4.053230285644531, "learning_rate": 9.969196549165494e-06, "loss": 0.6464, "step": 5379 }, { "epoch": 0.0643240593503031, "grad_norm": 2.0949816703796387, "learning_rate": 9.969175086734552e-06, "loss": 0.6737, "step": 5380 }, { "epoch": 0.06433601549516374, "grad_norm": 1.4832204580307007, "learning_rate": 9.969153616852314e-06, "loss": 0.6384, "step": 5381 }, { "epoch": 0.06434797164002438, "grad_norm": 3.489340305328369, "learning_rate": 9.969132139518813e-06, "loss": 0.6047, "step": 5382 }, { "epoch": 0.06435992778488504, "grad_norm": 3.7894885540008545, "learning_rate": 9.969110654734083e-06, "loss": 0.5805, "step": 5383 }, { "epoch": 0.06437188392974569, "grad_norm": 2.003155469894409, "learning_rate": 9.96908916249815e-06, "loss": 0.7382, "step": 5384 }, { "epoch": 0.06438384007460635, "grad_norm": 7.287209510803223, "learning_rate": 9.969067662811052e-06, "loss": 0.6115, "step": 5385 }, { "epoch": 0.06439579621946699, "grad_norm": 2.2765066623687744, "learning_rate": 9.96904615567282e-06, "loss": 0.5586, "step": 5386 }, { "epoch": 0.06440775236432765, "grad_norm": 1.7464637756347656, "learning_rate": 9.969024641083484e-06, "loss": 0.621, "step": 5387 }, { "epoch": 0.0644197085091883, "grad_norm": 2.2527987957000732, "learning_rate": 9.96900311904308e-06, "loss": 0.5716, "step": 5388 }, { "epoch": 0.06443166465404895, "grad_norm": 1.9511661529541016, "learning_rate": 9.968981589551637e-06, "loss": 0.6393, "step": 5389 }, { "epoch": 0.0644436207989096, "grad_norm": 3.5672879219055176, "learning_rate": 9.968960052609189e-06, "loss": 0.5634, "step": 5390 }, { "epoch": 0.06445557694377026, "grad_norm": 3.0825212001800537, "learning_rate": 9.968938508215766e-06, "loss": 0.6365, "step": 5391 }, { "epoch": 0.0644675330886309, "grad_norm": 2.9069559574127197, "learning_rate": 9.968916956371403e-06, "loss": 0.5984, "step": 5392 }, { "epoch": 0.06447948923349155, "grad_norm": 2.133923292160034, "learning_rate": 9.968895397076132e-06, "loss": 0.6482, "step": 5393 }, { "epoch": 0.0644914453783522, "grad_norm": 6.747178077697754, "learning_rate": 9.968873830329984e-06, "loss": 0.637, "step": 5394 }, { "epoch": 0.06450340152321285, "grad_norm": 2.6228079795837402, "learning_rate": 9.968852256132993e-06, "loss": 0.6696, "step": 5395 }, { "epoch": 0.06451535766807351, "grad_norm": 2.4971885681152344, "learning_rate": 9.968830674485191e-06, "loss": 0.5443, "step": 5396 }, { "epoch": 0.06452731381293415, "grad_norm": 1.9596072435379028, "learning_rate": 9.968809085386608e-06, "loss": 0.5672, "step": 5397 }, { "epoch": 0.06453926995779481, "grad_norm": 5.548653602600098, "learning_rate": 9.968787488837279e-06, "loss": 0.5695, "step": 5398 }, { "epoch": 0.06455122610265546, "grad_norm": 2.026762008666992, "learning_rate": 9.968765884837235e-06, "loss": 0.5754, "step": 5399 }, { "epoch": 0.06456318224751612, "grad_norm": 4.905057907104492, "learning_rate": 9.968744273386508e-06, "loss": 0.6944, "step": 5400 }, { "epoch": 0.06457513839237676, "grad_norm": 2.5534520149230957, "learning_rate": 9.968722654485132e-06, "loss": 0.5952, "step": 5401 }, { "epoch": 0.06458709453723742, "grad_norm": 4.966882705688477, "learning_rate": 9.968701028133138e-06, "loss": 0.7128, "step": 5402 }, { "epoch": 0.06459905068209806, "grad_norm": 3.002354621887207, "learning_rate": 9.968679394330562e-06, "loss": 0.5467, "step": 5403 }, { "epoch": 0.06461100682695872, "grad_norm": 2.3257429599761963, "learning_rate": 9.968657753077431e-06, "loss": 0.6249, "step": 5404 }, { "epoch": 0.06462296297181937, "grad_norm": 5.093574047088623, "learning_rate": 9.968636104373781e-06, "loss": 0.6192, "step": 5405 }, { "epoch": 0.06463491911668001, "grad_norm": 2.4904956817626953, "learning_rate": 9.968614448219644e-06, "loss": 0.6468, "step": 5406 }, { "epoch": 0.06464687526154067, "grad_norm": 7.766526222229004, "learning_rate": 9.96859278461505e-06, "loss": 0.6385, "step": 5407 }, { "epoch": 0.06465883140640132, "grad_norm": 5.826353073120117, "learning_rate": 9.968571113560036e-06, "loss": 0.7316, "step": 5408 }, { "epoch": 0.06467078755126197, "grad_norm": 2.2826554775238037, "learning_rate": 9.96854943505463e-06, "loss": 0.5438, "step": 5409 }, { "epoch": 0.06468274369612262, "grad_norm": 2.164120674133301, "learning_rate": 9.968527749098866e-06, "loss": 0.6598, "step": 5410 }, { "epoch": 0.06469469984098328, "grad_norm": 6.234490394592285, "learning_rate": 9.96850605569278e-06, "loss": 0.6666, "step": 5411 }, { "epoch": 0.06470665598584392, "grad_norm": 2.1636011600494385, "learning_rate": 9.968484354836399e-06, "loss": 0.7127, "step": 5412 }, { "epoch": 0.06471861213070458, "grad_norm": 3.232417345046997, "learning_rate": 9.968462646529758e-06, "loss": 0.5986, "step": 5413 }, { "epoch": 0.06473056827556523, "grad_norm": 3.1982522010803223, "learning_rate": 9.968440930772891e-06, "loss": 0.6956, "step": 5414 }, { "epoch": 0.06474252442042588, "grad_norm": 2.1698389053344727, "learning_rate": 9.968419207565828e-06, "loss": 0.5728, "step": 5415 }, { "epoch": 0.06475448056528653, "grad_norm": 4.674184799194336, "learning_rate": 9.968397476908604e-06, "loss": 0.6756, "step": 5416 }, { "epoch": 0.06476643671014717, "grad_norm": 1.9610826969146729, "learning_rate": 9.968375738801248e-06, "loss": 0.5672, "step": 5417 }, { "epoch": 0.06477839285500783, "grad_norm": 2.226125955581665, "learning_rate": 9.968353993243798e-06, "loss": 0.6876, "step": 5418 }, { "epoch": 0.06479034899986848, "grad_norm": 1.6608457565307617, "learning_rate": 9.96833224023628e-06, "loss": 0.6819, "step": 5419 }, { "epoch": 0.06480230514472914, "grad_norm": 2.0703537464141846, "learning_rate": 9.968310479778733e-06, "loss": 0.5992, "step": 5420 }, { "epoch": 0.06481426128958978, "grad_norm": 4.049479007720947, "learning_rate": 9.968288711871186e-06, "loss": 0.6581, "step": 5421 }, { "epoch": 0.06482621743445044, "grad_norm": 2.400970220565796, "learning_rate": 9.968266936513673e-06, "loss": 0.6438, "step": 5422 }, { "epoch": 0.06483817357931108, "grad_norm": 2.843123197555542, "learning_rate": 9.968245153706224e-06, "loss": 0.6669, "step": 5423 }, { "epoch": 0.06485012972417174, "grad_norm": 1.8209288120269775, "learning_rate": 9.968223363448876e-06, "loss": 0.596, "step": 5424 }, { "epoch": 0.06486208586903239, "grad_norm": 2.1305675506591797, "learning_rate": 9.968201565741657e-06, "loss": 0.6512, "step": 5425 }, { "epoch": 0.06487404201389305, "grad_norm": 1.680136799812317, "learning_rate": 9.968179760584603e-06, "loss": 0.6524, "step": 5426 }, { "epoch": 0.06488599815875369, "grad_norm": 6.305211544036865, "learning_rate": 9.968157947977748e-06, "loss": 0.5966, "step": 5427 }, { "epoch": 0.06489795430361434, "grad_norm": 1.993424654006958, "learning_rate": 9.96813612792112e-06, "loss": 0.6738, "step": 5428 }, { "epoch": 0.064909910448475, "grad_norm": 4.752613544464111, "learning_rate": 9.968114300414757e-06, "loss": 0.6826, "step": 5429 }, { "epoch": 0.06492186659333564, "grad_norm": 5.4697113037109375, "learning_rate": 9.968092465458686e-06, "loss": 0.555, "step": 5430 }, { "epoch": 0.0649338227381963, "grad_norm": 3.192777156829834, "learning_rate": 9.968070623052943e-06, "loss": 0.7244, "step": 5431 }, { "epoch": 0.06494577888305694, "grad_norm": 2.0016369819641113, "learning_rate": 9.968048773197561e-06, "loss": 0.6391, "step": 5432 }, { "epoch": 0.0649577350279176, "grad_norm": 2.5166056156158447, "learning_rate": 9.968026915892573e-06, "loss": 0.7058, "step": 5433 }, { "epoch": 0.06496969117277825, "grad_norm": 2.6231653690338135, "learning_rate": 9.96800505113801e-06, "loss": 0.6091, "step": 5434 }, { "epoch": 0.0649816473176389, "grad_norm": 3.053570032119751, "learning_rate": 9.967983178933906e-06, "loss": 0.5826, "step": 5435 }, { "epoch": 0.06499360346249955, "grad_norm": 7.127950668334961, "learning_rate": 9.967961299280295e-06, "loss": 0.6388, "step": 5436 }, { "epoch": 0.06500555960736021, "grad_norm": 2.834902763366699, "learning_rate": 9.967939412177206e-06, "loss": 0.6365, "step": 5437 }, { "epoch": 0.06501751575222085, "grad_norm": 2.6934714317321777, "learning_rate": 9.967917517624676e-06, "loss": 0.5788, "step": 5438 }, { "epoch": 0.06502947189708151, "grad_norm": 7.7189154624938965, "learning_rate": 9.967895615622734e-06, "loss": 0.648, "step": 5439 }, { "epoch": 0.06504142804194216, "grad_norm": 4.616116046905518, "learning_rate": 9.967873706171417e-06, "loss": 0.6166, "step": 5440 }, { "epoch": 0.0650533841868028, "grad_norm": 1.8306925296783447, "learning_rate": 9.967851789270754e-06, "loss": 0.6295, "step": 5441 }, { "epoch": 0.06506534033166346, "grad_norm": 7.007410049438477, "learning_rate": 9.967829864920781e-06, "loss": 0.5832, "step": 5442 }, { "epoch": 0.0650772964765241, "grad_norm": 3.3849971294403076, "learning_rate": 9.96780793312153e-06, "loss": 0.5815, "step": 5443 }, { "epoch": 0.06508925262138476, "grad_norm": 2.746803045272827, "learning_rate": 9.967785993873031e-06, "loss": 0.6684, "step": 5444 }, { "epoch": 0.06510120876624541, "grad_norm": 3.8915603160858154, "learning_rate": 9.967764047175322e-06, "loss": 0.6207, "step": 5445 }, { "epoch": 0.06511316491110607, "grad_norm": 6.383718013763428, "learning_rate": 9.96774209302843e-06, "loss": 0.7004, "step": 5446 }, { "epoch": 0.06512512105596671, "grad_norm": 3.6274161338806152, "learning_rate": 9.967720131432394e-06, "loss": 0.6492, "step": 5447 }, { "epoch": 0.06513707720082737, "grad_norm": 3.512925624847412, "learning_rate": 9.967698162387243e-06, "loss": 0.6981, "step": 5448 }, { "epoch": 0.06514903334568801, "grad_norm": 7.717514991760254, "learning_rate": 9.967676185893009e-06, "loss": 0.6477, "step": 5449 }, { "epoch": 0.06516098949054867, "grad_norm": 3.7567026615142822, "learning_rate": 9.96765420194973e-06, "loss": 0.6096, "step": 5450 }, { "epoch": 0.06517294563540932, "grad_norm": 4.893013000488281, "learning_rate": 9.967632210557434e-06, "loss": 0.7122, "step": 5451 }, { "epoch": 0.06518490178026996, "grad_norm": 3.3429136276245117, "learning_rate": 9.967610211716156e-06, "loss": 0.6304, "step": 5452 }, { "epoch": 0.06519685792513062, "grad_norm": 3.938373565673828, "learning_rate": 9.967588205425928e-06, "loss": 0.6813, "step": 5453 }, { "epoch": 0.06520881406999127, "grad_norm": 2.5087242126464844, "learning_rate": 9.967566191686786e-06, "loss": 0.6673, "step": 5454 }, { "epoch": 0.06522077021485193, "grad_norm": 2.544689178466797, "learning_rate": 9.96754417049876e-06, "loss": 0.667, "step": 5455 }, { "epoch": 0.06523272635971257, "grad_norm": 5.434605121612549, "learning_rate": 9.967522141861884e-06, "loss": 0.5809, "step": 5456 }, { "epoch": 0.06524468250457323, "grad_norm": 3.818453073501587, "learning_rate": 9.96750010577619e-06, "loss": 0.6759, "step": 5457 }, { "epoch": 0.06525663864943387, "grad_norm": 2.6589977741241455, "learning_rate": 9.967478062241713e-06, "loss": 0.7099, "step": 5458 }, { "epoch": 0.06526859479429453, "grad_norm": 3.6568233966827393, "learning_rate": 9.967456011258483e-06, "loss": 0.5329, "step": 5459 }, { "epoch": 0.06528055093915518, "grad_norm": 3.8940234184265137, "learning_rate": 9.967433952826538e-06, "loss": 0.6633, "step": 5460 }, { "epoch": 0.06529250708401584, "grad_norm": 11.371414184570312, "learning_rate": 9.967411886945906e-06, "loss": 0.806, "step": 5461 }, { "epoch": 0.06530446322887648, "grad_norm": 4.341534614562988, "learning_rate": 9.967389813616622e-06, "loss": 0.5764, "step": 5462 }, { "epoch": 0.06531641937373714, "grad_norm": 2.8165712356567383, "learning_rate": 9.96736773283872e-06, "loss": 0.5297, "step": 5463 }, { "epoch": 0.06532837551859778, "grad_norm": 277.0387878417969, "learning_rate": 9.967345644612234e-06, "loss": 0.5048, "step": 5464 }, { "epoch": 0.06534033166345843, "grad_norm": 3.428647994995117, "learning_rate": 9.967323548937193e-06, "loss": 0.6064, "step": 5465 }, { "epoch": 0.06535228780831909, "grad_norm": 10.464129447937012, "learning_rate": 9.967301445813635e-06, "loss": 0.673, "step": 5466 }, { "epoch": 0.06536424395317973, "grad_norm": 7.586685657501221, "learning_rate": 9.96727933524159e-06, "loss": 0.635, "step": 5467 }, { "epoch": 0.06537620009804039, "grad_norm": 1.7883321046829224, "learning_rate": 9.967257217221092e-06, "loss": 0.5467, "step": 5468 }, { "epoch": 0.06538815624290104, "grad_norm": 2.1594111919403076, "learning_rate": 9.967235091752172e-06, "loss": 0.6219, "step": 5469 }, { "epoch": 0.0654001123877617, "grad_norm": 2.2785966396331787, "learning_rate": 9.967212958834867e-06, "loss": 0.5679, "step": 5470 }, { "epoch": 0.06541206853262234, "grad_norm": 2.744800329208374, "learning_rate": 9.967190818469208e-06, "loss": 0.5912, "step": 5471 }, { "epoch": 0.065424024677483, "grad_norm": 12.100065231323242, "learning_rate": 9.96716867065523e-06, "loss": 0.6631, "step": 5472 }, { "epoch": 0.06543598082234364, "grad_norm": 14.273921966552734, "learning_rate": 9.967146515392964e-06, "loss": 0.6623, "step": 5473 }, { "epoch": 0.0654479369672043, "grad_norm": 3.49245548248291, "learning_rate": 9.967124352682444e-06, "loss": 0.6441, "step": 5474 }, { "epoch": 0.06545989311206495, "grad_norm": 3.8606302738189697, "learning_rate": 9.967102182523705e-06, "loss": 0.6129, "step": 5475 }, { "epoch": 0.06547184925692559, "grad_norm": 8.206043243408203, "learning_rate": 9.967080004916777e-06, "loss": 0.6598, "step": 5476 }, { "epoch": 0.06548380540178625, "grad_norm": 2.0992300510406494, "learning_rate": 9.967057819861694e-06, "loss": 0.6532, "step": 5477 }, { "epoch": 0.0654957615466469, "grad_norm": 5.219053745269775, "learning_rate": 9.967035627358492e-06, "loss": 0.6627, "step": 5478 }, { "epoch": 0.06550771769150755, "grad_norm": 3.981501817703247, "learning_rate": 9.967013427407203e-06, "loss": 0.5677, "step": 5479 }, { "epoch": 0.0655196738363682, "grad_norm": 7.744472026824951, "learning_rate": 9.966991220007858e-06, "loss": 0.659, "step": 5480 }, { "epoch": 0.06553162998122886, "grad_norm": 1.539760708808899, "learning_rate": 9.966969005160492e-06, "loss": 0.5938, "step": 5481 }, { "epoch": 0.0655435861260895, "grad_norm": 2.0588505268096924, "learning_rate": 9.966946782865138e-06, "loss": 0.6259, "step": 5482 }, { "epoch": 0.06555554227095016, "grad_norm": 2.2270257472991943, "learning_rate": 9.966924553121832e-06, "loss": 0.5873, "step": 5483 }, { "epoch": 0.0655674984158108, "grad_norm": 9.279703140258789, "learning_rate": 9.966902315930604e-06, "loss": 0.7239, "step": 5484 }, { "epoch": 0.06557945456067146, "grad_norm": 2.364999294281006, "learning_rate": 9.966880071291487e-06, "loss": 0.6431, "step": 5485 }, { "epoch": 0.06559141070553211, "grad_norm": 3.1272714138031006, "learning_rate": 9.966857819204517e-06, "loss": 0.6469, "step": 5486 }, { "epoch": 0.06560336685039275, "grad_norm": 3.6460044384002686, "learning_rate": 9.966835559669725e-06, "loss": 0.5956, "step": 5487 }, { "epoch": 0.06561532299525341, "grad_norm": 1.8845648765563965, "learning_rate": 9.966813292687146e-06, "loss": 0.5255, "step": 5488 }, { "epoch": 0.06562727914011406, "grad_norm": 2.394000291824341, "learning_rate": 9.966791018256812e-06, "loss": 0.5256, "step": 5489 }, { "epoch": 0.06563923528497471, "grad_norm": 2.6782922744750977, "learning_rate": 9.966768736378759e-06, "loss": 0.6262, "step": 5490 }, { "epoch": 0.06565119142983536, "grad_norm": 2.8035449981689453, "learning_rate": 9.966746447053017e-06, "loss": 0.5603, "step": 5491 }, { "epoch": 0.06566314757469602, "grad_norm": 5.25637149810791, "learning_rate": 9.966724150279623e-06, "loss": 0.8143, "step": 5492 }, { "epoch": 0.06567510371955666, "grad_norm": 2.7683355808258057, "learning_rate": 9.966701846058607e-06, "loss": 0.6688, "step": 5493 }, { "epoch": 0.06568705986441732, "grad_norm": 1.9774134159088135, "learning_rate": 9.966679534390003e-06, "loss": 0.7053, "step": 5494 }, { "epoch": 0.06569901600927797, "grad_norm": 1.94557785987854, "learning_rate": 9.966657215273847e-06, "loss": 0.577, "step": 5495 }, { "epoch": 0.06571097215413862, "grad_norm": 5.2494330406188965, "learning_rate": 9.96663488871017e-06, "loss": 0.7085, "step": 5496 }, { "epoch": 0.06572292829899927, "grad_norm": 2.076148509979248, "learning_rate": 9.966612554699007e-06, "loss": 0.6321, "step": 5497 }, { "epoch": 0.06573488444385993, "grad_norm": 4.205386161804199, "learning_rate": 9.96659021324039e-06, "loss": 0.7386, "step": 5498 }, { "epoch": 0.06574684058872057, "grad_norm": 4.355458736419678, "learning_rate": 9.966567864334353e-06, "loss": 0.6525, "step": 5499 }, { "epoch": 0.06575879673358122, "grad_norm": 1.6528081893920898, "learning_rate": 9.966545507980932e-06, "loss": 0.6003, "step": 5500 }, { "epoch": 0.06577075287844188, "grad_norm": 3.0377180576324463, "learning_rate": 9.966523144180156e-06, "loss": 0.6665, "step": 5501 }, { "epoch": 0.06578270902330252, "grad_norm": 10.326525688171387, "learning_rate": 9.966500772932062e-06, "loss": 0.6414, "step": 5502 }, { "epoch": 0.06579466516816318, "grad_norm": 2.7661356925964355, "learning_rate": 9.966478394236683e-06, "loss": 0.6699, "step": 5503 }, { "epoch": 0.06580662131302382, "grad_norm": 2.7588391304016113, "learning_rate": 9.96645600809405e-06, "loss": 0.7041, "step": 5504 }, { "epoch": 0.06581857745788448, "grad_norm": 5.3634724617004395, "learning_rate": 9.9664336145042e-06, "loss": 0.6367, "step": 5505 }, { "epoch": 0.06583053360274513, "grad_norm": 1.7949548959732056, "learning_rate": 9.966411213467167e-06, "loss": 0.64, "step": 5506 }, { "epoch": 0.06584248974760579, "grad_norm": 12.673669815063477, "learning_rate": 9.96638880498298e-06, "loss": 0.6621, "step": 5507 }, { "epoch": 0.06585444589246643, "grad_norm": 2.204928159713745, "learning_rate": 9.966366389051676e-06, "loss": 0.6782, "step": 5508 }, { "epoch": 0.06586640203732709, "grad_norm": 7.690549373626709, "learning_rate": 9.966343965673287e-06, "loss": 0.6317, "step": 5509 }, { "epoch": 0.06587835818218774, "grad_norm": 2.5043952465057373, "learning_rate": 9.966321534847849e-06, "loss": 0.6896, "step": 5510 }, { "epoch": 0.06589031432704838, "grad_norm": 2.5095577239990234, "learning_rate": 9.966299096575394e-06, "loss": 0.7324, "step": 5511 }, { "epoch": 0.06590227047190904, "grad_norm": 2.6758859157562256, "learning_rate": 9.966276650855955e-06, "loss": 0.6112, "step": 5512 }, { "epoch": 0.06591422661676968, "grad_norm": 16.89628791809082, "learning_rate": 9.966254197689567e-06, "loss": 0.5754, "step": 5513 }, { "epoch": 0.06592618276163034, "grad_norm": 3.3817267417907715, "learning_rate": 9.966231737076264e-06, "loss": 0.5931, "step": 5514 }, { "epoch": 0.06593813890649099, "grad_norm": 9.290541648864746, "learning_rate": 9.966209269016078e-06, "loss": 0.6024, "step": 5515 }, { "epoch": 0.06595009505135165, "grad_norm": 4.455134868621826, "learning_rate": 9.966186793509043e-06, "loss": 0.568, "step": 5516 }, { "epoch": 0.06596205119621229, "grad_norm": 3.1715118885040283, "learning_rate": 9.966164310555195e-06, "loss": 0.5146, "step": 5517 }, { "epoch": 0.06597400734107295, "grad_norm": 8.190625190734863, "learning_rate": 9.966141820154564e-06, "loss": 0.7518, "step": 5518 }, { "epoch": 0.0659859634859336, "grad_norm": 2.6289994716644287, "learning_rate": 9.966119322307186e-06, "loss": 0.5923, "step": 5519 }, { "epoch": 0.06599791963079425, "grad_norm": 4.731510639190674, "learning_rate": 9.966096817013097e-06, "loss": 0.591, "step": 5520 }, { "epoch": 0.0660098757756549, "grad_norm": 2.0950493812561035, "learning_rate": 9.966074304272325e-06, "loss": 0.5301, "step": 5521 }, { "epoch": 0.06602183192051556, "grad_norm": 1.9999473094940186, "learning_rate": 9.96605178408491e-06, "loss": 0.5592, "step": 5522 }, { "epoch": 0.0660337880653762, "grad_norm": 2.701526641845703, "learning_rate": 9.96602925645088e-06, "loss": 0.6294, "step": 5523 }, { "epoch": 0.06604574421023685, "grad_norm": 3.806466579437256, "learning_rate": 9.966006721370272e-06, "loss": 0.6229, "step": 5524 }, { "epoch": 0.0660577003550975, "grad_norm": 3.9496572017669678, "learning_rate": 9.96598417884312e-06, "loss": 0.7705, "step": 5525 }, { "epoch": 0.06606965649995815, "grad_norm": 15.656760215759277, "learning_rate": 9.965961628869457e-06, "loss": 0.6354, "step": 5526 }, { "epoch": 0.06608161264481881, "grad_norm": 2.3586618900299072, "learning_rate": 9.965939071449315e-06, "loss": 0.5665, "step": 5527 }, { "epoch": 0.06609356878967945, "grad_norm": 2.95466947555542, "learning_rate": 9.965916506582733e-06, "loss": 0.6856, "step": 5528 }, { "epoch": 0.06610552493454011, "grad_norm": 2.8403735160827637, "learning_rate": 9.96589393426974e-06, "loss": 0.6529, "step": 5529 }, { "epoch": 0.06611748107940076, "grad_norm": 3.7819664478302, "learning_rate": 9.965871354510371e-06, "loss": 0.628, "step": 5530 }, { "epoch": 0.06612943722426141, "grad_norm": 4.189505100250244, "learning_rate": 9.96584876730466e-06, "loss": 0.5978, "step": 5531 }, { "epoch": 0.06614139336912206, "grad_norm": 2.437903642654419, "learning_rate": 9.965826172652641e-06, "loss": 0.6389, "step": 5532 }, { "epoch": 0.06615334951398272, "grad_norm": 2.6476893424987793, "learning_rate": 9.96580357055435e-06, "loss": 0.6377, "step": 5533 }, { "epoch": 0.06616530565884336, "grad_norm": 2.7457635402679443, "learning_rate": 9.965780961009816e-06, "loss": 0.6267, "step": 5534 }, { "epoch": 0.06617726180370401, "grad_norm": 2.4619903564453125, "learning_rate": 9.965758344019078e-06, "loss": 0.6975, "step": 5535 }, { "epoch": 0.06618921794856467, "grad_norm": 2.857295513153076, "learning_rate": 9.965735719582166e-06, "loss": 0.5795, "step": 5536 }, { "epoch": 0.06620117409342531, "grad_norm": 2.6892640590667725, "learning_rate": 9.965713087699117e-06, "loss": 0.6152, "step": 5537 }, { "epoch": 0.06621313023828597, "grad_norm": 3.522047996520996, "learning_rate": 9.965690448369963e-06, "loss": 0.5615, "step": 5538 }, { "epoch": 0.06622508638314661, "grad_norm": 2.775930404663086, "learning_rate": 9.965667801594739e-06, "loss": 0.6622, "step": 5539 }, { "epoch": 0.06623704252800727, "grad_norm": 4.808686256408691, "learning_rate": 9.965645147373478e-06, "loss": 0.5983, "step": 5540 }, { "epoch": 0.06624899867286792, "grad_norm": 2.4949848651885986, "learning_rate": 9.965622485706214e-06, "loss": 0.5485, "step": 5541 }, { "epoch": 0.06626095481772858, "grad_norm": 2.4298737049102783, "learning_rate": 9.965599816592984e-06, "loss": 0.6587, "step": 5542 }, { "epoch": 0.06627291096258922, "grad_norm": 2.539639711380005, "learning_rate": 9.965577140033817e-06, "loss": 0.5587, "step": 5543 }, { "epoch": 0.06628486710744988, "grad_norm": 2.5409719944000244, "learning_rate": 9.96555445602875e-06, "loss": 0.615, "step": 5544 }, { "epoch": 0.06629682325231052, "grad_norm": 3.9240036010742188, "learning_rate": 9.965531764577816e-06, "loss": 0.7299, "step": 5545 }, { "epoch": 0.06630877939717117, "grad_norm": 2.164456605911255, "learning_rate": 9.96550906568105e-06, "loss": 0.5682, "step": 5546 }, { "epoch": 0.06632073554203183, "grad_norm": 10.615472793579102, "learning_rate": 9.965486359338486e-06, "loss": 0.6353, "step": 5547 }, { "epoch": 0.06633269168689247, "grad_norm": 3.0951035022735596, "learning_rate": 9.965463645550155e-06, "loss": 0.6041, "step": 5548 }, { "epoch": 0.06634464783175313, "grad_norm": 2.4362642765045166, "learning_rate": 9.965440924316097e-06, "loss": 0.5778, "step": 5549 }, { "epoch": 0.06635660397661378, "grad_norm": 8.054248809814453, "learning_rate": 9.96541819563634e-06, "loss": 0.6112, "step": 5550 }, { "epoch": 0.06636856012147443, "grad_norm": 5.1256561279296875, "learning_rate": 9.965395459510923e-06, "loss": 0.5545, "step": 5551 }, { "epoch": 0.06638051626633508, "grad_norm": 3.1702239513397217, "learning_rate": 9.965372715939877e-06, "loss": 0.6525, "step": 5552 }, { "epoch": 0.06639247241119574, "grad_norm": 4.411245822906494, "learning_rate": 9.965349964923237e-06, "loss": 0.6425, "step": 5553 }, { "epoch": 0.06640442855605638, "grad_norm": 1.9848010540008545, "learning_rate": 9.965327206461037e-06, "loss": 0.5607, "step": 5554 }, { "epoch": 0.06641638470091704, "grad_norm": 3.630268096923828, "learning_rate": 9.96530444055331e-06, "loss": 0.598, "step": 5555 }, { "epoch": 0.06642834084577769, "grad_norm": 3.3609580993652344, "learning_rate": 9.965281667200093e-06, "loss": 0.5892, "step": 5556 }, { "epoch": 0.06644029699063835, "grad_norm": 3.6775569915771484, "learning_rate": 9.965258886401418e-06, "loss": 0.7015, "step": 5557 }, { "epoch": 0.06645225313549899, "grad_norm": 2.0319929122924805, "learning_rate": 9.96523609815732e-06, "loss": 0.605, "step": 5558 }, { "epoch": 0.06646420928035963, "grad_norm": 2.027371406555176, "learning_rate": 9.965213302467832e-06, "loss": 0.6354, "step": 5559 }, { "epoch": 0.0664761654252203, "grad_norm": 2.365363597869873, "learning_rate": 9.965190499332988e-06, "loss": 0.8028, "step": 5560 }, { "epoch": 0.06648812157008094, "grad_norm": 2.356179714202881, "learning_rate": 9.965167688752826e-06, "loss": 0.6032, "step": 5561 }, { "epoch": 0.0665000777149416, "grad_norm": 2.3596527576446533, "learning_rate": 9.965144870727375e-06, "loss": 0.681, "step": 5562 }, { "epoch": 0.06651203385980224, "grad_norm": 2.0439794063568115, "learning_rate": 9.965122045256673e-06, "loss": 0.6684, "step": 5563 }, { "epoch": 0.0665239900046629, "grad_norm": 4.253358840942383, "learning_rate": 9.965099212340753e-06, "loss": 0.6343, "step": 5564 }, { "epoch": 0.06653594614952354, "grad_norm": 6.630643844604492, "learning_rate": 9.965076371979647e-06, "loss": 0.6087, "step": 5565 }, { "epoch": 0.0665479022943842, "grad_norm": 2.1052963733673096, "learning_rate": 9.965053524173394e-06, "loss": 0.6117, "step": 5566 }, { "epoch": 0.06655985843924485, "grad_norm": 12.601967811584473, "learning_rate": 9.965030668922024e-06, "loss": 0.741, "step": 5567 }, { "epoch": 0.06657181458410551, "grad_norm": 3.0036566257476807, "learning_rate": 9.965007806225573e-06, "loss": 0.6306, "step": 5568 }, { "epoch": 0.06658377072896615, "grad_norm": 3.544581174850464, "learning_rate": 9.964984936084077e-06, "loss": 0.6758, "step": 5569 }, { "epoch": 0.0665957268738268, "grad_norm": 2.5071117877960205, "learning_rate": 9.964962058497565e-06, "loss": 0.6752, "step": 5570 }, { "epoch": 0.06660768301868746, "grad_norm": 2.0332841873168945, "learning_rate": 9.964939173466078e-06, "loss": 0.651, "step": 5571 }, { "epoch": 0.0666196391635481, "grad_norm": 1.854785442352295, "learning_rate": 9.964916280989645e-06, "loss": 0.678, "step": 5572 }, { "epoch": 0.06663159530840876, "grad_norm": 3.2794137001037598, "learning_rate": 9.964893381068303e-06, "loss": 0.6684, "step": 5573 }, { "epoch": 0.0666435514532694, "grad_norm": 1.6947364807128906, "learning_rate": 9.964870473702086e-06, "loss": 0.5752, "step": 5574 }, { "epoch": 0.06665550759813006, "grad_norm": 1.6981315612792969, "learning_rate": 9.96484755889103e-06, "loss": 0.6357, "step": 5575 }, { "epoch": 0.0666674637429907, "grad_norm": 2.397930145263672, "learning_rate": 9.964824636635164e-06, "loss": 0.5591, "step": 5576 }, { "epoch": 0.06667941988785137, "grad_norm": 5.70659065246582, "learning_rate": 9.964801706934525e-06, "loss": 0.6845, "step": 5577 }, { "epoch": 0.06669137603271201, "grad_norm": 5.21384334564209, "learning_rate": 9.964778769789151e-06, "loss": 0.6353, "step": 5578 }, { "epoch": 0.06670333217757267, "grad_norm": 2.6601030826568604, "learning_rate": 9.964755825199074e-06, "loss": 0.7146, "step": 5579 }, { "epoch": 0.06671528832243331, "grad_norm": 4.730578899383545, "learning_rate": 9.964732873164326e-06, "loss": 0.6579, "step": 5580 }, { "epoch": 0.06672724446729397, "grad_norm": 2.6339516639709473, "learning_rate": 9.964709913684944e-06, "loss": 0.6695, "step": 5581 }, { "epoch": 0.06673920061215462, "grad_norm": 2.3384318351745605, "learning_rate": 9.964686946760963e-06, "loss": 0.7104, "step": 5582 }, { "epoch": 0.06675115675701526, "grad_norm": 2.7938647270202637, "learning_rate": 9.964663972392416e-06, "loss": 0.6663, "step": 5583 }, { "epoch": 0.06676311290187592, "grad_norm": 2.6252174377441406, "learning_rate": 9.964640990579337e-06, "loss": 0.5944, "step": 5584 }, { "epoch": 0.06677506904673657, "grad_norm": 2.089261054992676, "learning_rate": 9.964618001321761e-06, "loss": 0.6042, "step": 5585 }, { "epoch": 0.06678702519159722, "grad_norm": 3.312329053878784, "learning_rate": 9.964595004619724e-06, "loss": 0.6402, "step": 5586 }, { "epoch": 0.06679898133645787, "grad_norm": 2.6624555587768555, "learning_rate": 9.964572000473259e-06, "loss": 0.5703, "step": 5587 }, { "epoch": 0.06681093748131853, "grad_norm": 9.60278606414795, "learning_rate": 9.9645489888824e-06, "loss": 0.6374, "step": 5588 }, { "epoch": 0.06682289362617917, "grad_norm": 2.632676839828491, "learning_rate": 9.964525969847181e-06, "loss": 0.613, "step": 5589 }, { "epoch": 0.06683484977103983, "grad_norm": 2.7631802558898926, "learning_rate": 9.964502943367637e-06, "loss": 0.6432, "step": 5590 }, { "epoch": 0.06684680591590048, "grad_norm": 1.8448219299316406, "learning_rate": 9.964479909443805e-06, "loss": 0.6606, "step": 5591 }, { "epoch": 0.06685876206076113, "grad_norm": 4.533482551574707, "learning_rate": 9.964456868075718e-06, "loss": 0.6626, "step": 5592 }, { "epoch": 0.06687071820562178, "grad_norm": 2.1740477085113525, "learning_rate": 9.96443381926341e-06, "loss": 0.6408, "step": 5593 }, { "epoch": 0.06688267435048242, "grad_norm": 3.4488420486450195, "learning_rate": 9.964410763006916e-06, "loss": 0.6645, "step": 5594 }, { "epoch": 0.06689463049534308, "grad_norm": 2.337921142578125, "learning_rate": 9.96438769930627e-06, "loss": 0.6773, "step": 5595 }, { "epoch": 0.06690658664020373, "grad_norm": 3.1890125274658203, "learning_rate": 9.964364628161506e-06, "loss": 0.6989, "step": 5596 }, { "epoch": 0.06691854278506439, "grad_norm": 2.3101160526275635, "learning_rate": 9.964341549572661e-06, "loss": 0.5503, "step": 5597 }, { "epoch": 0.06693049892992503, "grad_norm": 4.316457271575928, "learning_rate": 9.964318463539768e-06, "loss": 0.6643, "step": 5598 }, { "epoch": 0.06694245507478569, "grad_norm": 2.068575143814087, "learning_rate": 9.964295370062862e-06, "loss": 0.601, "step": 5599 }, { "epoch": 0.06695441121964633, "grad_norm": 3.347456216812134, "learning_rate": 9.964272269141977e-06, "loss": 0.6213, "step": 5600 }, { "epoch": 0.06696636736450699, "grad_norm": 3.1132686138153076, "learning_rate": 9.96424916077715e-06, "loss": 0.6051, "step": 5601 }, { "epoch": 0.06697832350936764, "grad_norm": 1.7638704776763916, "learning_rate": 9.96422604496841e-06, "loss": 0.5492, "step": 5602 }, { "epoch": 0.0669902796542283, "grad_norm": 2.1971523761749268, "learning_rate": 9.964202921715797e-06, "loss": 0.5593, "step": 5603 }, { "epoch": 0.06700223579908894, "grad_norm": 3.256058931350708, "learning_rate": 9.964179791019346e-06, "loss": 0.6131, "step": 5604 }, { "epoch": 0.0670141919439496, "grad_norm": 3.855966567993164, "learning_rate": 9.964156652879087e-06, "loss": 0.6724, "step": 5605 }, { "epoch": 0.06702614808881024, "grad_norm": 3.715662956237793, "learning_rate": 9.964133507295058e-06, "loss": 0.6236, "step": 5606 }, { "epoch": 0.06703810423367089, "grad_norm": 3.814788579940796, "learning_rate": 9.964110354267294e-06, "loss": 0.6141, "step": 5607 }, { "epoch": 0.06705006037853155, "grad_norm": 9.442773818969727, "learning_rate": 9.964087193795829e-06, "loss": 0.5915, "step": 5608 }, { "epoch": 0.06706201652339219, "grad_norm": 2.4333837032318115, "learning_rate": 9.964064025880697e-06, "loss": 0.695, "step": 5609 }, { "epoch": 0.06707397266825285, "grad_norm": 3.594252586364746, "learning_rate": 9.964040850521934e-06, "loss": 0.6439, "step": 5610 }, { "epoch": 0.0670859288131135, "grad_norm": 3.051504135131836, "learning_rate": 9.964017667719574e-06, "loss": 0.5775, "step": 5611 }, { "epoch": 0.06709788495797415, "grad_norm": 2.475717782974243, "learning_rate": 9.963994477473652e-06, "loss": 0.5361, "step": 5612 }, { "epoch": 0.0671098411028348, "grad_norm": 7.9899797439575195, "learning_rate": 9.963971279784202e-06, "loss": 0.6509, "step": 5613 }, { "epoch": 0.06712179724769546, "grad_norm": 2.22180438041687, "learning_rate": 9.96394807465126e-06, "loss": 0.624, "step": 5614 }, { "epoch": 0.0671337533925561, "grad_norm": 2.9368648529052734, "learning_rate": 9.963924862074858e-06, "loss": 0.6803, "step": 5615 }, { "epoch": 0.06714570953741676, "grad_norm": 2.4540903568267822, "learning_rate": 9.963901642055035e-06, "loss": 0.532, "step": 5616 }, { "epoch": 0.0671576656822774, "grad_norm": 4.055134296417236, "learning_rate": 9.963878414591826e-06, "loss": 0.5803, "step": 5617 }, { "epoch": 0.06716962182713805, "grad_norm": 28.21052360534668, "learning_rate": 9.96385517968526e-06, "loss": 0.5549, "step": 5618 }, { "epoch": 0.06718157797199871, "grad_norm": 4.8675150871276855, "learning_rate": 9.963831937335378e-06, "loss": 0.5653, "step": 5619 }, { "epoch": 0.06719353411685935, "grad_norm": 2.089961051940918, "learning_rate": 9.96380868754221e-06, "loss": 0.6587, "step": 5620 }, { "epoch": 0.06720549026172001, "grad_norm": 1.978148102760315, "learning_rate": 9.963785430305795e-06, "loss": 0.6734, "step": 5621 }, { "epoch": 0.06721744640658066, "grad_norm": 2.7067530155181885, "learning_rate": 9.963762165626165e-06, "loss": 0.7187, "step": 5622 }, { "epoch": 0.06722940255144132, "grad_norm": 4.08008337020874, "learning_rate": 9.963738893503356e-06, "loss": 0.6287, "step": 5623 }, { "epoch": 0.06724135869630196, "grad_norm": 1.947376012802124, "learning_rate": 9.963715613937405e-06, "loss": 0.6767, "step": 5624 }, { "epoch": 0.06725331484116262, "grad_norm": 5.050628185272217, "learning_rate": 9.963692326928343e-06, "loss": 0.7132, "step": 5625 }, { "epoch": 0.06726527098602327, "grad_norm": 2.155482769012451, "learning_rate": 9.963669032476206e-06, "loss": 0.5456, "step": 5626 }, { "epoch": 0.06727722713088392, "grad_norm": 2.245220422744751, "learning_rate": 9.96364573058103e-06, "loss": 0.7096, "step": 5627 }, { "epoch": 0.06728918327574457, "grad_norm": 1.9229096174240112, "learning_rate": 9.963622421242852e-06, "loss": 0.6301, "step": 5628 }, { "epoch": 0.06730113942060521, "grad_norm": 2.7410709857940674, "learning_rate": 9.963599104461703e-06, "loss": 0.6066, "step": 5629 }, { "epoch": 0.06731309556546587, "grad_norm": 2.2269530296325684, "learning_rate": 9.963575780237619e-06, "loss": 0.6492, "step": 5630 }, { "epoch": 0.06732505171032652, "grad_norm": 3.5296695232391357, "learning_rate": 9.963552448570635e-06, "loss": 0.6193, "step": 5631 }, { "epoch": 0.06733700785518718, "grad_norm": 6.246156215667725, "learning_rate": 9.963529109460788e-06, "loss": 0.661, "step": 5632 }, { "epoch": 0.06734896400004782, "grad_norm": 2.3369154930114746, "learning_rate": 9.96350576290811e-06, "loss": 0.6135, "step": 5633 }, { "epoch": 0.06736092014490848, "grad_norm": 3.166238784790039, "learning_rate": 9.963482408912637e-06, "loss": 0.6875, "step": 5634 }, { "epoch": 0.06737287628976912, "grad_norm": 1.913103461265564, "learning_rate": 9.963459047474406e-06, "loss": 0.5744, "step": 5635 }, { "epoch": 0.06738483243462978, "grad_norm": 1.9330435991287231, "learning_rate": 9.963435678593452e-06, "loss": 0.5925, "step": 5636 }, { "epoch": 0.06739678857949043, "grad_norm": 4.5255818367004395, "learning_rate": 9.963412302269806e-06, "loss": 0.6635, "step": 5637 }, { "epoch": 0.06740874472435109, "grad_norm": 7.446920394897461, "learning_rate": 9.963388918503506e-06, "loss": 0.7066, "step": 5638 }, { "epoch": 0.06742070086921173, "grad_norm": 4.3857269287109375, "learning_rate": 9.963365527294586e-06, "loss": 0.6762, "step": 5639 }, { "epoch": 0.06743265701407239, "grad_norm": 2.7003366947174072, "learning_rate": 9.963342128643083e-06, "loss": 0.665, "step": 5640 }, { "epoch": 0.06744461315893303, "grad_norm": 3.3768842220306396, "learning_rate": 9.96331872254903e-06, "loss": 0.5588, "step": 5641 }, { "epoch": 0.06745656930379368, "grad_norm": 1.6030250787734985, "learning_rate": 9.963295309012464e-06, "loss": 0.6082, "step": 5642 }, { "epoch": 0.06746852544865434, "grad_norm": 1.9810459613800049, "learning_rate": 9.963271888033418e-06, "loss": 0.5606, "step": 5643 }, { "epoch": 0.06748048159351498, "grad_norm": 2.1814253330230713, "learning_rate": 9.96324845961193e-06, "loss": 0.5231, "step": 5644 }, { "epoch": 0.06749243773837564, "grad_norm": 3.1701836585998535, "learning_rate": 9.96322502374803e-06, "loss": 0.6111, "step": 5645 }, { "epoch": 0.06750439388323629, "grad_norm": 2.088228940963745, "learning_rate": 9.96320158044176e-06, "loss": 0.5791, "step": 5646 }, { "epoch": 0.06751635002809694, "grad_norm": 4.23032808303833, "learning_rate": 9.96317812969315e-06, "loss": 0.6813, "step": 5647 }, { "epoch": 0.06752830617295759, "grad_norm": 2.9792447090148926, "learning_rate": 9.963154671502237e-06, "loss": 0.5925, "step": 5648 }, { "epoch": 0.06754026231781825, "grad_norm": 21.11087417602539, "learning_rate": 9.963131205869055e-06, "loss": 0.5808, "step": 5649 }, { "epoch": 0.06755221846267889, "grad_norm": 2.3923861980438232, "learning_rate": 9.963107732793643e-06, "loss": 0.7264, "step": 5650 }, { "epoch": 0.06756417460753955, "grad_norm": 3.776803970336914, "learning_rate": 9.96308425227603e-06, "loss": 0.6799, "step": 5651 }, { "epoch": 0.0675761307524002, "grad_norm": 1.7350012063980103, "learning_rate": 9.963060764316255e-06, "loss": 0.547, "step": 5652 }, { "epoch": 0.06758808689726084, "grad_norm": 2.5176775455474854, "learning_rate": 9.963037268914354e-06, "loss": 0.5736, "step": 5653 }, { "epoch": 0.0676000430421215, "grad_norm": 1.747978925704956, "learning_rate": 9.96301376607036e-06, "loss": 0.5668, "step": 5654 }, { "epoch": 0.06761199918698214, "grad_norm": 1.7461037635803223, "learning_rate": 9.96299025578431e-06, "loss": 0.6423, "step": 5655 }, { "epoch": 0.0676239553318428, "grad_norm": 2.7143146991729736, "learning_rate": 9.962966738056237e-06, "loss": 0.6131, "step": 5656 }, { "epoch": 0.06763591147670345, "grad_norm": 2.4991626739501953, "learning_rate": 9.96294321288618e-06, "loss": 0.7563, "step": 5657 }, { "epoch": 0.0676478676215641, "grad_norm": 2.397216320037842, "learning_rate": 9.96291968027417e-06, "loss": 0.5771, "step": 5658 }, { "epoch": 0.06765982376642475, "grad_norm": 4.055891036987305, "learning_rate": 9.962896140220245e-06, "loss": 0.6257, "step": 5659 }, { "epoch": 0.06767177991128541, "grad_norm": 4.331663131713867, "learning_rate": 9.96287259272444e-06, "loss": 0.6092, "step": 5660 }, { "epoch": 0.06768373605614605, "grad_norm": 1.6957029104232788, "learning_rate": 9.96284903778679e-06, "loss": 0.5645, "step": 5661 }, { "epoch": 0.06769569220100671, "grad_norm": 2.380824327468872, "learning_rate": 9.962825475407329e-06, "loss": 0.614, "step": 5662 }, { "epoch": 0.06770764834586736, "grad_norm": 4.051365375518799, "learning_rate": 9.962801905586095e-06, "loss": 0.6951, "step": 5663 }, { "epoch": 0.06771960449072802, "grad_norm": 3.534348964691162, "learning_rate": 9.96277832832312e-06, "loss": 0.6685, "step": 5664 }, { "epoch": 0.06773156063558866, "grad_norm": 5.558220386505127, "learning_rate": 9.962754743618444e-06, "loss": 0.72, "step": 5665 }, { "epoch": 0.0677435167804493, "grad_norm": 2.1636016368865967, "learning_rate": 9.962731151472098e-06, "loss": 0.6677, "step": 5666 }, { "epoch": 0.06775547292530996, "grad_norm": 3.178558111190796, "learning_rate": 9.962707551884118e-06, "loss": 0.6203, "step": 5667 }, { "epoch": 0.06776742907017061, "grad_norm": 2.4258227348327637, "learning_rate": 9.962683944854542e-06, "loss": 0.6046, "step": 5668 }, { "epoch": 0.06777938521503127, "grad_norm": 5.468891620635986, "learning_rate": 9.962660330383403e-06, "loss": 0.5802, "step": 5669 }, { "epoch": 0.06779134135989191, "grad_norm": 2.1343729496002197, "learning_rate": 9.962636708470738e-06, "loss": 0.6496, "step": 5670 }, { "epoch": 0.06780329750475257, "grad_norm": 2.4127838611602783, "learning_rate": 9.962613079116581e-06, "loss": 0.6957, "step": 5671 }, { "epoch": 0.06781525364961322, "grad_norm": 1.4991015195846558, "learning_rate": 9.962589442320968e-06, "loss": 0.61, "step": 5672 }, { "epoch": 0.06782720979447387, "grad_norm": 2.002437114715576, "learning_rate": 9.962565798083934e-06, "loss": 0.6363, "step": 5673 }, { "epoch": 0.06783916593933452, "grad_norm": 1.8952815532684326, "learning_rate": 9.962542146405516e-06, "loss": 0.6664, "step": 5674 }, { "epoch": 0.06785112208419518, "grad_norm": 2.630769729614258, "learning_rate": 9.962518487285748e-06, "loss": 0.5571, "step": 5675 }, { "epoch": 0.06786307822905582, "grad_norm": 2.858703374862671, "learning_rate": 9.962494820724667e-06, "loss": 0.6426, "step": 5676 }, { "epoch": 0.06787503437391647, "grad_norm": 5.271166801452637, "learning_rate": 9.962471146722306e-06, "loss": 0.6164, "step": 5677 }, { "epoch": 0.06788699051877713, "grad_norm": 2.803816795349121, "learning_rate": 9.962447465278701e-06, "loss": 0.5665, "step": 5678 }, { "epoch": 0.06789894666363777, "grad_norm": 2.0731136798858643, "learning_rate": 9.96242377639389e-06, "loss": 0.5623, "step": 5679 }, { "epoch": 0.06791090280849843, "grad_norm": 1.837058663368225, "learning_rate": 9.962400080067907e-06, "loss": 0.6041, "step": 5680 }, { "epoch": 0.06792285895335907, "grad_norm": 4.275301456451416, "learning_rate": 9.962376376300786e-06, "loss": 0.6417, "step": 5681 }, { "epoch": 0.06793481509821973, "grad_norm": 3.391655683517456, "learning_rate": 9.962352665092566e-06, "loss": 0.7576, "step": 5682 }, { "epoch": 0.06794677124308038, "grad_norm": 1.858091115951538, "learning_rate": 9.962328946443278e-06, "loss": 0.6106, "step": 5683 }, { "epoch": 0.06795872738794104, "grad_norm": 3.4052793979644775, "learning_rate": 9.962305220352963e-06, "loss": 0.5435, "step": 5684 }, { "epoch": 0.06797068353280168, "grad_norm": 1.8975749015808105, "learning_rate": 9.962281486821652e-06, "loss": 0.6373, "step": 5685 }, { "epoch": 0.06798263967766234, "grad_norm": 3.736069917678833, "learning_rate": 9.962257745849382e-06, "loss": 0.6439, "step": 5686 }, { "epoch": 0.06799459582252299, "grad_norm": 2.7999207973480225, "learning_rate": 9.962233997436191e-06, "loss": 0.5972, "step": 5687 }, { "epoch": 0.06800655196738363, "grad_norm": 2.8269643783569336, "learning_rate": 9.96221024158211e-06, "loss": 0.6073, "step": 5688 }, { "epoch": 0.06801850811224429, "grad_norm": 23.648391723632812, "learning_rate": 9.962186478287177e-06, "loss": 0.631, "step": 5689 }, { "epoch": 0.06803046425710493, "grad_norm": 2.2566287517547607, "learning_rate": 9.962162707551429e-06, "loss": 0.6751, "step": 5690 }, { "epoch": 0.06804242040196559, "grad_norm": 2.2643444538116455, "learning_rate": 9.9621389293749e-06, "loss": 0.726, "step": 5691 }, { "epoch": 0.06805437654682624, "grad_norm": 3.0906946659088135, "learning_rate": 9.962115143757627e-06, "loss": 0.5946, "step": 5692 }, { "epoch": 0.0680663326916869, "grad_norm": 3.706972122192383, "learning_rate": 9.962091350699642e-06, "loss": 0.6434, "step": 5693 }, { "epoch": 0.06807828883654754, "grad_norm": 1.8781154155731201, "learning_rate": 9.962067550200987e-06, "loss": 0.6278, "step": 5694 }, { "epoch": 0.0680902449814082, "grad_norm": 3.564347505569458, "learning_rate": 9.962043742261692e-06, "loss": 0.5483, "step": 5695 }, { "epoch": 0.06810220112626884, "grad_norm": 2.2825558185577393, "learning_rate": 9.962019926881794e-06, "loss": 0.6233, "step": 5696 }, { "epoch": 0.0681141572711295, "grad_norm": 2.289334297180176, "learning_rate": 9.961996104061331e-06, "loss": 0.5589, "step": 5697 }, { "epoch": 0.06812611341599015, "grad_norm": 4.204774379730225, "learning_rate": 9.961972273800336e-06, "loss": 0.6722, "step": 5698 }, { "epoch": 0.0681380695608508, "grad_norm": 3.0009050369262695, "learning_rate": 9.961948436098846e-06, "loss": 0.6261, "step": 5699 }, { "epoch": 0.06815002570571145, "grad_norm": 3.401057004928589, "learning_rate": 9.961924590956899e-06, "loss": 0.693, "step": 5700 }, { "epoch": 0.0681619818505721, "grad_norm": 2.9689114093780518, "learning_rate": 9.961900738374526e-06, "loss": 0.5611, "step": 5701 }, { "epoch": 0.06817393799543275, "grad_norm": 2.5069029331207275, "learning_rate": 9.961876878351766e-06, "loss": 0.6495, "step": 5702 }, { "epoch": 0.0681858941402934, "grad_norm": 2.6144707202911377, "learning_rate": 9.961853010888654e-06, "loss": 0.5777, "step": 5703 }, { "epoch": 0.06819785028515406, "grad_norm": 19.375823974609375, "learning_rate": 9.961829135985227e-06, "loss": 0.6985, "step": 5704 }, { "epoch": 0.0682098064300147, "grad_norm": 2.706449270248413, "learning_rate": 9.961805253641516e-06, "loss": 0.6727, "step": 5705 }, { "epoch": 0.06822176257487536, "grad_norm": 4.147083282470703, "learning_rate": 9.961781363857563e-06, "loss": 0.5821, "step": 5706 }, { "epoch": 0.068233718719736, "grad_norm": 3.28250789642334, "learning_rate": 9.9617574666334e-06, "loss": 0.6148, "step": 5707 }, { "epoch": 0.06824567486459666, "grad_norm": 2.834031343460083, "learning_rate": 9.961733561969067e-06, "loss": 0.5886, "step": 5708 }, { "epoch": 0.06825763100945731, "grad_norm": 31.1217041015625, "learning_rate": 9.961709649864595e-06, "loss": 0.6296, "step": 5709 }, { "epoch": 0.06826958715431797, "grad_norm": 8.795367240905762, "learning_rate": 9.96168573032002e-06, "loss": 0.6426, "step": 5710 }, { "epoch": 0.06828154329917861, "grad_norm": 2.355604648590088, "learning_rate": 9.961661803335383e-06, "loss": 0.7225, "step": 5711 }, { "epoch": 0.06829349944403926, "grad_norm": 2.0698070526123047, "learning_rate": 9.961637868910715e-06, "loss": 0.5855, "step": 5712 }, { "epoch": 0.06830545558889992, "grad_norm": 2.6266531944274902, "learning_rate": 9.961613927046051e-06, "loss": 0.5341, "step": 5713 }, { "epoch": 0.06831741173376056, "grad_norm": 3.171739339828491, "learning_rate": 9.961589977741433e-06, "loss": 0.6677, "step": 5714 }, { "epoch": 0.06832936787862122, "grad_norm": 2.563969135284424, "learning_rate": 9.961566020996891e-06, "loss": 0.6712, "step": 5715 }, { "epoch": 0.06834132402348186, "grad_norm": 1.6956700086593628, "learning_rate": 9.961542056812462e-06, "loss": 0.5956, "step": 5716 }, { "epoch": 0.06835328016834252, "grad_norm": 2.395526885986328, "learning_rate": 9.961518085188186e-06, "loss": 0.6174, "step": 5717 }, { "epoch": 0.06836523631320317, "grad_norm": 1.9566490650177002, "learning_rate": 9.961494106124094e-06, "loss": 0.6593, "step": 5718 }, { "epoch": 0.06837719245806383, "grad_norm": 2.0683436393737793, "learning_rate": 9.961470119620226e-06, "loss": 0.6528, "step": 5719 }, { "epoch": 0.06838914860292447, "grad_norm": 1.8349940776824951, "learning_rate": 9.961446125676615e-06, "loss": 0.6074, "step": 5720 }, { "epoch": 0.06840110474778513, "grad_norm": 2.2217209339141846, "learning_rate": 9.961422124293296e-06, "loss": 0.6048, "step": 5721 }, { "epoch": 0.06841306089264577, "grad_norm": 2.0590827465057373, "learning_rate": 9.961398115470308e-06, "loss": 0.7304, "step": 5722 }, { "epoch": 0.06842501703750643, "grad_norm": 1.9905999898910522, "learning_rate": 9.961374099207688e-06, "loss": 0.6086, "step": 5723 }, { "epoch": 0.06843697318236708, "grad_norm": 2.2728166580200195, "learning_rate": 9.961350075505466e-06, "loss": 0.662, "step": 5724 }, { "epoch": 0.06844892932722772, "grad_norm": 2.77797794342041, "learning_rate": 9.961326044363686e-06, "loss": 0.5555, "step": 5725 }, { "epoch": 0.06846088547208838, "grad_norm": 2.166762590408325, "learning_rate": 9.961302005782377e-06, "loss": 0.6011, "step": 5726 }, { "epoch": 0.06847284161694903, "grad_norm": 1.611331820487976, "learning_rate": 9.96127795976158e-06, "loss": 0.5367, "step": 5727 }, { "epoch": 0.06848479776180968, "grad_norm": 3.793795108795166, "learning_rate": 9.961253906301327e-06, "loss": 0.7293, "step": 5728 }, { "epoch": 0.06849675390667033, "grad_norm": 1.7156217098236084, "learning_rate": 9.961229845401658e-06, "loss": 0.5723, "step": 5729 }, { "epoch": 0.06850871005153099, "grad_norm": 1.4323853254318237, "learning_rate": 9.961205777062606e-06, "loss": 0.57, "step": 5730 }, { "epoch": 0.06852066619639163, "grad_norm": 1.711198329925537, "learning_rate": 9.961181701284208e-06, "loss": 0.6219, "step": 5731 }, { "epoch": 0.06853262234125229, "grad_norm": 1.6130255460739136, "learning_rate": 9.961157618066502e-06, "loss": 0.6493, "step": 5732 }, { "epoch": 0.06854457848611294, "grad_norm": 2.623318910598755, "learning_rate": 9.96113352740952e-06, "loss": 0.4851, "step": 5733 }, { "epoch": 0.0685565346309736, "grad_norm": 2.170769214630127, "learning_rate": 9.961109429313304e-06, "loss": 0.5955, "step": 5734 }, { "epoch": 0.06856849077583424, "grad_norm": 2.265770435333252, "learning_rate": 9.961085323777886e-06, "loss": 0.5675, "step": 5735 }, { "epoch": 0.06858044692069488, "grad_norm": 3.6470017433166504, "learning_rate": 9.9610612108033e-06, "loss": 0.6356, "step": 5736 }, { "epoch": 0.06859240306555554, "grad_norm": 5.179670810699463, "learning_rate": 9.961037090389587e-06, "loss": 0.5977, "step": 5737 }, { "epoch": 0.06860435921041619, "grad_norm": 2.2774767875671387, "learning_rate": 9.961012962536782e-06, "loss": 0.6837, "step": 5738 }, { "epoch": 0.06861631535527685, "grad_norm": 2.944148063659668, "learning_rate": 9.960988827244922e-06, "loss": 0.5524, "step": 5739 }, { "epoch": 0.06862827150013749, "grad_norm": 8.441520690917969, "learning_rate": 9.960964684514038e-06, "loss": 0.6831, "step": 5740 }, { "epoch": 0.06864022764499815, "grad_norm": 1.9199388027191162, "learning_rate": 9.960940534344171e-06, "loss": 0.6688, "step": 5741 }, { "epoch": 0.0686521837898588, "grad_norm": 1.9846992492675781, "learning_rate": 9.960916376735357e-06, "loss": 0.6639, "step": 5742 }, { "epoch": 0.06866413993471945, "grad_norm": 2.8433825969696045, "learning_rate": 9.96089221168763e-06, "loss": 0.7651, "step": 5743 }, { "epoch": 0.0686760960795801, "grad_norm": 9.151534080505371, "learning_rate": 9.960868039201028e-06, "loss": 0.6873, "step": 5744 }, { "epoch": 0.06868805222444076, "grad_norm": 2.1323728561401367, "learning_rate": 9.960843859275586e-06, "loss": 0.5394, "step": 5745 }, { "epoch": 0.0687000083693014, "grad_norm": 3.676156520843506, "learning_rate": 9.960819671911342e-06, "loss": 0.632, "step": 5746 }, { "epoch": 0.06871196451416206, "grad_norm": 4.861612796783447, "learning_rate": 9.960795477108332e-06, "loss": 0.6785, "step": 5747 }, { "epoch": 0.0687239206590227, "grad_norm": 2.3358118534088135, "learning_rate": 9.96077127486659e-06, "loss": 0.6264, "step": 5748 }, { "epoch": 0.06873587680388335, "grad_norm": 1.7167094945907593, "learning_rate": 9.960747065186154e-06, "loss": 0.5313, "step": 5749 }, { "epoch": 0.06874783294874401, "grad_norm": 1.8535656929016113, "learning_rate": 9.960722848067062e-06, "loss": 0.6796, "step": 5750 }, { "epoch": 0.06875978909360465, "grad_norm": 1.9110647439956665, "learning_rate": 9.960698623509345e-06, "loss": 0.5872, "step": 5751 }, { "epoch": 0.06877174523846531, "grad_norm": 2.6117780208587646, "learning_rate": 9.960674391513046e-06, "loss": 0.6493, "step": 5752 }, { "epoch": 0.06878370138332596, "grad_norm": 6.322932243347168, "learning_rate": 9.960650152078197e-06, "loss": 0.5917, "step": 5753 }, { "epoch": 0.06879565752818662, "grad_norm": 3.078599214553833, "learning_rate": 9.960625905204835e-06, "loss": 0.5805, "step": 5754 }, { "epoch": 0.06880761367304726, "grad_norm": 2.613879919052124, "learning_rate": 9.960601650892997e-06, "loss": 0.5736, "step": 5755 }, { "epoch": 0.06881956981790792, "grad_norm": 16.482709884643555, "learning_rate": 9.960577389142719e-06, "loss": 0.6137, "step": 5756 }, { "epoch": 0.06883152596276856, "grad_norm": 3.8603060245513916, "learning_rate": 9.960553119954037e-06, "loss": 0.5497, "step": 5757 }, { "epoch": 0.06884348210762922, "grad_norm": 4.545787811279297, "learning_rate": 9.96052884332699e-06, "loss": 0.6605, "step": 5758 }, { "epoch": 0.06885543825248987, "grad_norm": 2.313110113143921, "learning_rate": 9.960504559261611e-06, "loss": 0.6937, "step": 5759 }, { "epoch": 0.06886739439735051, "grad_norm": 2.2531840801239014, "learning_rate": 9.960480267757939e-06, "loss": 0.6362, "step": 5760 }, { "epoch": 0.06887935054221117, "grad_norm": 1.4272199869155884, "learning_rate": 9.960455968816008e-06, "loss": 0.5544, "step": 5761 }, { "epoch": 0.06889130668707182, "grad_norm": 2.861334800720215, "learning_rate": 9.960431662435855e-06, "loss": 0.6592, "step": 5762 }, { "epoch": 0.06890326283193247, "grad_norm": 2.8342349529266357, "learning_rate": 9.960407348617519e-06, "loss": 0.6672, "step": 5763 }, { "epoch": 0.06891521897679312, "grad_norm": 1.8104327917099, "learning_rate": 9.960383027361033e-06, "loss": 0.6641, "step": 5764 }, { "epoch": 0.06892717512165378, "grad_norm": 2.488438129425049, "learning_rate": 9.960358698666436e-06, "loss": 0.7457, "step": 5765 }, { "epoch": 0.06893913126651442, "grad_norm": 3.4714443683624268, "learning_rate": 9.960334362533764e-06, "loss": 0.6154, "step": 5766 }, { "epoch": 0.06895108741137508, "grad_norm": 2.9358696937561035, "learning_rate": 9.960310018963051e-06, "loss": 0.6121, "step": 5767 }, { "epoch": 0.06896304355623573, "grad_norm": 2.666870355606079, "learning_rate": 9.960285667954337e-06, "loss": 0.6198, "step": 5768 }, { "epoch": 0.06897499970109638, "grad_norm": 1.7849777936935425, "learning_rate": 9.960261309507655e-06, "loss": 0.6639, "step": 5769 }, { "epoch": 0.06898695584595703, "grad_norm": 2.9537346363067627, "learning_rate": 9.960236943623045e-06, "loss": 0.6985, "step": 5770 }, { "epoch": 0.06899891199081767, "grad_norm": 1.876112699508667, "learning_rate": 9.960212570300542e-06, "loss": 0.6585, "step": 5771 }, { "epoch": 0.06901086813567833, "grad_norm": 1.6918283700942993, "learning_rate": 9.960188189540184e-06, "loss": 0.6477, "step": 5772 }, { "epoch": 0.06902282428053898, "grad_norm": 15.061944007873535, "learning_rate": 9.960163801342005e-06, "loss": 0.6509, "step": 5773 }, { "epoch": 0.06903478042539964, "grad_norm": 2.2473950386047363, "learning_rate": 9.960139405706042e-06, "loss": 0.5724, "step": 5774 }, { "epoch": 0.06904673657026028, "grad_norm": 2.1004209518432617, "learning_rate": 9.960115002632334e-06, "loss": 0.6568, "step": 5775 }, { "epoch": 0.06905869271512094, "grad_norm": 2.109347343444824, "learning_rate": 9.960090592120915e-06, "loss": 0.6817, "step": 5776 }, { "epoch": 0.06907064885998158, "grad_norm": 3.902647018432617, "learning_rate": 9.960066174171822e-06, "loss": 0.5907, "step": 5777 }, { "epoch": 0.06908260500484224, "grad_norm": 2.618849515914917, "learning_rate": 9.960041748785093e-06, "loss": 0.612, "step": 5778 }, { "epoch": 0.06909456114970289, "grad_norm": 2.2558093070983887, "learning_rate": 9.960017315960763e-06, "loss": 0.6524, "step": 5779 }, { "epoch": 0.06910651729456355, "grad_norm": 8.398128509521484, "learning_rate": 9.959992875698869e-06, "loss": 0.6373, "step": 5780 }, { "epoch": 0.06911847343942419, "grad_norm": 4.388463973999023, "learning_rate": 9.959968427999448e-06, "loss": 0.6311, "step": 5781 }, { "epoch": 0.06913042958428485, "grad_norm": 4.3841328620910645, "learning_rate": 9.95994397286254e-06, "loss": 0.572, "step": 5782 }, { "epoch": 0.0691423857291455, "grad_norm": 3.9411230087280273, "learning_rate": 9.959919510288175e-06, "loss": 0.6193, "step": 5783 }, { "epoch": 0.06915434187400614, "grad_norm": 2.769516944885254, "learning_rate": 9.959895040276394e-06, "loss": 0.5776, "step": 5784 }, { "epoch": 0.0691662980188668, "grad_norm": 2.255693197250366, "learning_rate": 9.959870562827233e-06, "loss": 0.6606, "step": 5785 }, { "epoch": 0.06917825416372744, "grad_norm": 3.086094856262207, "learning_rate": 9.959846077940728e-06, "loss": 0.7161, "step": 5786 }, { "epoch": 0.0691902103085881, "grad_norm": 6.461404800415039, "learning_rate": 9.959821585616916e-06, "loss": 0.6849, "step": 5787 }, { "epoch": 0.06920216645344875, "grad_norm": 2.323394536972046, "learning_rate": 9.959797085855835e-06, "loss": 0.6546, "step": 5788 }, { "epoch": 0.0692141225983094, "grad_norm": 3.39654803276062, "learning_rate": 9.959772578657521e-06, "loss": 0.609, "step": 5789 }, { "epoch": 0.06922607874317005, "grad_norm": 3.361140251159668, "learning_rate": 9.959748064022008e-06, "loss": 0.4942, "step": 5790 }, { "epoch": 0.06923803488803071, "grad_norm": 4.776079177856445, "learning_rate": 9.959723541949337e-06, "loss": 0.5756, "step": 5791 }, { "epoch": 0.06924999103289135, "grad_norm": 3.1164960861206055, "learning_rate": 9.959699012439542e-06, "loss": 0.6474, "step": 5792 }, { "epoch": 0.06926194717775201, "grad_norm": 2.5424020290374756, "learning_rate": 9.959674475492661e-06, "loss": 0.5938, "step": 5793 }, { "epoch": 0.06927390332261266, "grad_norm": 1.7712523937225342, "learning_rate": 9.95964993110873e-06, "loss": 0.6249, "step": 5794 }, { "epoch": 0.0692858594674733, "grad_norm": 2.463571548461914, "learning_rate": 9.959625379287788e-06, "loss": 0.6875, "step": 5795 }, { "epoch": 0.06929781561233396, "grad_norm": 1.9117318391799927, "learning_rate": 9.95960082002987e-06, "loss": 0.5598, "step": 5796 }, { "epoch": 0.0693097717571946, "grad_norm": 3.4049370288848877, "learning_rate": 9.95957625333501e-06, "loss": 0.6988, "step": 5797 }, { "epoch": 0.06932172790205526, "grad_norm": 3.574488878250122, "learning_rate": 9.95955167920325e-06, "loss": 0.7414, "step": 5798 }, { "epoch": 0.06933368404691591, "grad_norm": 3.722231388092041, "learning_rate": 9.959527097634625e-06, "loss": 0.6279, "step": 5799 }, { "epoch": 0.06934564019177657, "grad_norm": 43.56584930419922, "learning_rate": 9.95950250862917e-06, "loss": 0.7618, "step": 5800 }, { "epoch": 0.06935759633663721, "grad_norm": 1.9836558103561401, "learning_rate": 9.959477912186924e-06, "loss": 0.6165, "step": 5801 }, { "epoch": 0.06936955248149787, "grad_norm": 2.167179584503174, "learning_rate": 9.959453308307924e-06, "loss": 0.5905, "step": 5802 }, { "epoch": 0.06938150862635852, "grad_norm": 2.50201678276062, "learning_rate": 9.959428696992206e-06, "loss": 0.6134, "step": 5803 }, { "epoch": 0.06939346477121917, "grad_norm": 1.7947320938110352, "learning_rate": 9.959404078239806e-06, "loss": 0.5703, "step": 5804 }, { "epoch": 0.06940542091607982, "grad_norm": 3.7300937175750732, "learning_rate": 9.959379452050763e-06, "loss": 0.5417, "step": 5805 }, { "epoch": 0.06941737706094048, "grad_norm": 3.61997127532959, "learning_rate": 9.959354818425112e-06, "loss": 0.5753, "step": 5806 }, { "epoch": 0.06942933320580112, "grad_norm": 1.9533883333206177, "learning_rate": 9.959330177362892e-06, "loss": 0.5647, "step": 5807 }, { "epoch": 0.06944128935066177, "grad_norm": 2.3148865699768066, "learning_rate": 9.959305528864138e-06, "loss": 0.6628, "step": 5808 }, { "epoch": 0.06945324549552243, "grad_norm": 2.718353033065796, "learning_rate": 9.959280872928887e-06, "loss": 0.6857, "step": 5809 }, { "epoch": 0.06946520164038307, "grad_norm": 2.4305121898651123, "learning_rate": 9.959256209557177e-06, "loss": 0.6014, "step": 5810 }, { "epoch": 0.06947715778524373, "grad_norm": 3.3787224292755127, "learning_rate": 9.959231538749046e-06, "loss": 0.5517, "step": 5811 }, { "epoch": 0.06948911393010437, "grad_norm": 1.569958209991455, "learning_rate": 9.95920686050453e-06, "loss": 0.5756, "step": 5812 }, { "epoch": 0.06950107007496503, "grad_norm": 4.644542694091797, "learning_rate": 9.959182174823664e-06, "loss": 0.6265, "step": 5813 }, { "epoch": 0.06951302621982568, "grad_norm": 2.6794207096099854, "learning_rate": 9.959157481706487e-06, "loss": 0.6669, "step": 5814 }, { "epoch": 0.06952498236468634, "grad_norm": 2.231602668762207, "learning_rate": 9.959132781153035e-06, "loss": 0.5961, "step": 5815 }, { "epoch": 0.06953693850954698, "grad_norm": 1.9581458568572998, "learning_rate": 9.959108073163348e-06, "loss": 0.5733, "step": 5816 }, { "epoch": 0.06954889465440764, "grad_norm": 2.3329014778137207, "learning_rate": 9.95908335773746e-06, "loss": 0.5963, "step": 5817 }, { "epoch": 0.06956085079926828, "grad_norm": 5.255395412445068, "learning_rate": 9.959058634875407e-06, "loss": 0.6424, "step": 5818 }, { "epoch": 0.06957280694412893, "grad_norm": 1.8971415758132935, "learning_rate": 9.95903390457723e-06, "loss": 0.6818, "step": 5819 }, { "epoch": 0.06958476308898959, "grad_norm": 2.721215009689331, "learning_rate": 9.959009166842963e-06, "loss": 0.5466, "step": 5820 }, { "epoch": 0.06959671923385023, "grad_norm": 1.9989738464355469, "learning_rate": 9.958984421672646e-06, "loss": 0.5597, "step": 5821 }, { "epoch": 0.06960867537871089, "grad_norm": 2.1252291202545166, "learning_rate": 9.95895966906631e-06, "loss": 0.6221, "step": 5822 }, { "epoch": 0.06962063152357154, "grad_norm": 2.3284752368927, "learning_rate": 9.958934909024e-06, "loss": 0.5934, "step": 5823 }, { "epoch": 0.0696325876684322, "grad_norm": 22.889501571655273, "learning_rate": 9.95891014154575e-06, "loss": 0.63, "step": 5824 }, { "epoch": 0.06964454381329284, "grad_norm": 1.4612972736358643, "learning_rate": 9.958885366631594e-06, "loss": 0.5872, "step": 5825 }, { "epoch": 0.0696564999581535, "grad_norm": 3.2530417442321777, "learning_rate": 9.958860584281573e-06, "loss": 0.6039, "step": 5826 }, { "epoch": 0.06966845610301414, "grad_norm": 3.307583808898926, "learning_rate": 9.958835794495723e-06, "loss": 0.7297, "step": 5827 }, { "epoch": 0.0696804122478748, "grad_norm": 5.147078514099121, "learning_rate": 9.958810997274082e-06, "loss": 0.6313, "step": 5828 }, { "epoch": 0.06969236839273545, "grad_norm": 2.119112730026245, "learning_rate": 9.958786192616685e-06, "loss": 0.6293, "step": 5829 }, { "epoch": 0.06970432453759609, "grad_norm": 1.994280219078064, "learning_rate": 9.95876138052357e-06, "loss": 0.6624, "step": 5830 }, { "epoch": 0.06971628068245675, "grad_norm": 2.043219566345215, "learning_rate": 9.958736560994776e-06, "loss": 0.6314, "step": 5831 }, { "epoch": 0.0697282368273174, "grad_norm": 2.178335189819336, "learning_rate": 9.958711734030338e-06, "loss": 0.643, "step": 5832 }, { "epoch": 0.06974019297217805, "grad_norm": 3.838217258453369, "learning_rate": 9.958686899630295e-06, "loss": 0.6224, "step": 5833 }, { "epoch": 0.0697521491170387, "grad_norm": 1.941333532333374, "learning_rate": 9.958662057794682e-06, "loss": 0.6319, "step": 5834 }, { "epoch": 0.06976410526189936, "grad_norm": 1.8038734197616577, "learning_rate": 9.95863720852354e-06, "loss": 0.6561, "step": 5835 }, { "epoch": 0.06977606140676, "grad_norm": 2.9548656940460205, "learning_rate": 9.958612351816901e-06, "loss": 0.6429, "step": 5836 }, { "epoch": 0.06978801755162066, "grad_norm": 3.1785504817962646, "learning_rate": 9.958587487674807e-06, "loss": 0.5679, "step": 5837 }, { "epoch": 0.0697999736964813, "grad_norm": 4.231451034545898, "learning_rate": 9.958562616097294e-06, "loss": 0.6401, "step": 5838 }, { "epoch": 0.06981192984134196, "grad_norm": 1.8247488737106323, "learning_rate": 9.958537737084397e-06, "loss": 0.6544, "step": 5839 }, { "epoch": 0.06982388598620261, "grad_norm": 2.1754000186920166, "learning_rate": 9.958512850636155e-06, "loss": 0.5523, "step": 5840 }, { "epoch": 0.06983584213106327, "grad_norm": 2.622117280960083, "learning_rate": 9.958487956752607e-06, "loss": 0.632, "step": 5841 }, { "epoch": 0.06984779827592391, "grad_norm": 2.044620990753174, "learning_rate": 9.958463055433787e-06, "loss": 0.5877, "step": 5842 }, { "epoch": 0.06985975442078456, "grad_norm": 3.5546088218688965, "learning_rate": 9.958438146679735e-06, "loss": 0.6762, "step": 5843 }, { "epoch": 0.06987171056564521, "grad_norm": 4.798818588256836, "learning_rate": 9.958413230490487e-06, "loss": 0.624, "step": 5844 }, { "epoch": 0.06988366671050586, "grad_norm": 22.890804290771484, "learning_rate": 9.958388306866081e-06, "loss": 0.5642, "step": 5845 }, { "epoch": 0.06989562285536652, "grad_norm": 4.069189548492432, "learning_rate": 9.958363375806555e-06, "loss": 0.6744, "step": 5846 }, { "epoch": 0.06990757900022716, "grad_norm": 2.2692975997924805, "learning_rate": 9.958338437311943e-06, "loss": 0.6235, "step": 5847 }, { "epoch": 0.06991953514508782, "grad_norm": 2.4349052906036377, "learning_rate": 9.958313491382288e-06, "loss": 0.6458, "step": 5848 }, { "epoch": 0.06993149128994847, "grad_norm": 3.086470603942871, "learning_rate": 9.958288538017621e-06, "loss": 0.61, "step": 5849 }, { "epoch": 0.06994344743480913, "grad_norm": 2.148563861846924, "learning_rate": 9.958263577217984e-06, "loss": 0.5941, "step": 5850 }, { "epoch": 0.06995540357966977, "grad_norm": 1.703374981880188, "learning_rate": 9.958238608983416e-06, "loss": 0.6621, "step": 5851 }, { "epoch": 0.06996735972453043, "grad_norm": 2.5638198852539062, "learning_rate": 9.958213633313947e-06, "loss": 0.7023, "step": 5852 }, { "epoch": 0.06997931586939107, "grad_norm": 2.7487099170684814, "learning_rate": 9.958188650209622e-06, "loss": 0.7276, "step": 5853 }, { "epoch": 0.06999127201425172, "grad_norm": 3.482043981552124, "learning_rate": 9.958163659670475e-06, "loss": 0.6393, "step": 5854 }, { "epoch": 0.07000322815911238, "grad_norm": 5.357461452484131, "learning_rate": 9.958138661696544e-06, "loss": 0.6275, "step": 5855 }, { "epoch": 0.07001518430397302, "grad_norm": 8.398303985595703, "learning_rate": 9.958113656287864e-06, "loss": 0.6248, "step": 5856 }, { "epoch": 0.07002714044883368, "grad_norm": 2.090773105621338, "learning_rate": 9.958088643444478e-06, "loss": 0.6154, "step": 5857 }, { "epoch": 0.07003909659369432, "grad_norm": 2.0930848121643066, "learning_rate": 9.95806362316642e-06, "loss": 0.6338, "step": 5858 }, { "epoch": 0.07005105273855498, "grad_norm": 4.558060169219971, "learning_rate": 9.958038595453727e-06, "loss": 0.6592, "step": 5859 }, { "epoch": 0.07006300888341563, "grad_norm": 3.625502347946167, "learning_rate": 9.958013560306437e-06, "loss": 0.583, "step": 5860 }, { "epoch": 0.07007496502827629, "grad_norm": 5.188382148742676, "learning_rate": 9.95798851772459e-06, "loss": 0.6223, "step": 5861 }, { "epoch": 0.07008692117313693, "grad_norm": 2.8220388889312744, "learning_rate": 9.957963467708221e-06, "loss": 0.6123, "step": 5862 }, { "epoch": 0.07009887731799759, "grad_norm": 2.2043185234069824, "learning_rate": 9.957938410257368e-06, "loss": 0.6358, "step": 5863 }, { "epoch": 0.07011083346285824, "grad_norm": 4.572438716888428, "learning_rate": 9.957913345372067e-06, "loss": 0.6851, "step": 5864 }, { "epoch": 0.0701227896077189, "grad_norm": 5.351919174194336, "learning_rate": 9.95788827305236e-06, "loss": 0.598, "step": 5865 }, { "epoch": 0.07013474575257954, "grad_norm": 4.720386981964111, "learning_rate": 9.95786319329828e-06, "loss": 0.638, "step": 5866 }, { "epoch": 0.07014670189744018, "grad_norm": 2.917670488357544, "learning_rate": 9.957838106109868e-06, "loss": 0.6219, "step": 5867 }, { "epoch": 0.07015865804230084, "grad_norm": 2.5467441082000732, "learning_rate": 9.957813011487158e-06, "loss": 0.6297, "step": 5868 }, { "epoch": 0.07017061418716149, "grad_norm": 2.0713155269622803, "learning_rate": 9.957787909430193e-06, "loss": 0.5867, "step": 5869 }, { "epoch": 0.07018257033202215, "grad_norm": 2.4978528022766113, "learning_rate": 9.957762799939004e-06, "loss": 0.7132, "step": 5870 }, { "epoch": 0.07019452647688279, "grad_norm": 2.421212673187256, "learning_rate": 9.957737683013634e-06, "loss": 0.578, "step": 5871 }, { "epoch": 0.07020648262174345, "grad_norm": 1.9775999784469604, "learning_rate": 9.957712558654118e-06, "loss": 0.6214, "step": 5872 }, { "epoch": 0.0702184387666041, "grad_norm": 1.8667014837265015, "learning_rate": 9.957687426860494e-06, "loss": 0.603, "step": 5873 }, { "epoch": 0.07023039491146475, "grad_norm": 10.113024711608887, "learning_rate": 9.957662287632801e-06, "loss": 0.6246, "step": 5874 }, { "epoch": 0.0702423510563254, "grad_norm": 5.652347564697266, "learning_rate": 9.957637140971076e-06, "loss": 0.7504, "step": 5875 }, { "epoch": 0.07025430720118606, "grad_norm": 20.049583435058594, "learning_rate": 9.957611986875355e-06, "loss": 0.6217, "step": 5876 }, { "epoch": 0.0702662633460467, "grad_norm": 6.736702919006348, "learning_rate": 9.95758682534568e-06, "loss": 0.6738, "step": 5877 }, { "epoch": 0.07027821949090735, "grad_norm": 3.2712268829345703, "learning_rate": 9.957561656382085e-06, "loss": 0.5746, "step": 5878 }, { "epoch": 0.070290175635768, "grad_norm": 2.3119301795959473, "learning_rate": 9.957536479984607e-06, "loss": 0.6466, "step": 5879 }, { "epoch": 0.07030213178062865, "grad_norm": 42.575225830078125, "learning_rate": 9.957511296153285e-06, "loss": 0.6322, "step": 5880 }, { "epoch": 0.07031408792548931, "grad_norm": 2.0172793865203857, "learning_rate": 9.957486104888158e-06, "loss": 0.6655, "step": 5881 }, { "epoch": 0.07032604407034995, "grad_norm": 3.931516408920288, "learning_rate": 9.957460906189264e-06, "loss": 0.6309, "step": 5882 }, { "epoch": 0.07033800021521061, "grad_norm": 2.0006866455078125, "learning_rate": 9.957435700056639e-06, "loss": 0.6692, "step": 5883 }, { "epoch": 0.07034995636007126, "grad_norm": 2.1617822647094727, "learning_rate": 9.957410486490322e-06, "loss": 0.6101, "step": 5884 }, { "epoch": 0.07036191250493191, "grad_norm": 2.268965244293213, "learning_rate": 9.95738526549035e-06, "loss": 0.6643, "step": 5885 }, { "epoch": 0.07037386864979256, "grad_norm": 1.9632824659347534, "learning_rate": 9.957360037056763e-06, "loss": 0.6071, "step": 5886 }, { "epoch": 0.07038582479465322, "grad_norm": 2.1660919189453125, "learning_rate": 9.957334801189595e-06, "loss": 0.6186, "step": 5887 }, { "epoch": 0.07039778093951386, "grad_norm": 1.6233696937561035, "learning_rate": 9.957309557888886e-06, "loss": 0.5947, "step": 5888 }, { "epoch": 0.07040973708437451, "grad_norm": 14.616938591003418, "learning_rate": 9.957284307154674e-06, "loss": 0.701, "step": 5889 }, { "epoch": 0.07042169322923517, "grad_norm": 2.9894490242004395, "learning_rate": 9.957259048986995e-06, "loss": 0.747, "step": 5890 }, { "epoch": 0.07043364937409581, "grad_norm": 1.5008548498153687, "learning_rate": 9.95723378338589e-06, "loss": 0.7429, "step": 5891 }, { "epoch": 0.07044560551895647, "grad_norm": 4.387363433837891, "learning_rate": 9.957208510351395e-06, "loss": 0.6576, "step": 5892 }, { "epoch": 0.07045756166381711, "grad_norm": 2.6175801753997803, "learning_rate": 9.95718322988355e-06, "loss": 0.6445, "step": 5893 }, { "epoch": 0.07046951780867777, "grad_norm": 2.9683241844177246, "learning_rate": 9.957157941982389e-06, "loss": 0.6724, "step": 5894 }, { "epoch": 0.07048147395353842, "grad_norm": 3.267814874649048, "learning_rate": 9.957132646647951e-06, "loss": 0.606, "step": 5895 }, { "epoch": 0.07049343009839908, "grad_norm": 1.8595889806747437, "learning_rate": 9.957107343880276e-06, "loss": 0.7223, "step": 5896 }, { "epoch": 0.07050538624325972, "grad_norm": 2.1285507678985596, "learning_rate": 9.957082033679401e-06, "loss": 0.5951, "step": 5897 }, { "epoch": 0.07051734238812038, "grad_norm": 1.818300724029541, "learning_rate": 9.957056716045366e-06, "loss": 0.6007, "step": 5898 }, { "epoch": 0.07052929853298102, "grad_norm": 2.3210983276367188, "learning_rate": 9.957031390978204e-06, "loss": 0.6461, "step": 5899 }, { "epoch": 0.07054125467784168, "grad_norm": 3.123194694519043, "learning_rate": 9.957006058477958e-06, "loss": 0.6912, "step": 5900 }, { "epoch": 0.07055321082270233, "grad_norm": 2.6888699531555176, "learning_rate": 9.956980718544663e-06, "loss": 0.6324, "step": 5901 }, { "epoch": 0.07056516696756297, "grad_norm": 2.3205740451812744, "learning_rate": 9.956955371178357e-06, "loss": 0.5771, "step": 5902 }, { "epoch": 0.07057712311242363, "grad_norm": 3.1205196380615234, "learning_rate": 9.95693001637908e-06, "loss": 0.6794, "step": 5903 }, { "epoch": 0.07058907925728428, "grad_norm": 2.658963680267334, "learning_rate": 9.956904654146868e-06, "loss": 0.6066, "step": 5904 }, { "epoch": 0.07060103540214493, "grad_norm": 1.7673399448394775, "learning_rate": 9.95687928448176e-06, "loss": 0.6308, "step": 5905 }, { "epoch": 0.07061299154700558, "grad_norm": 3.9478225708007812, "learning_rate": 9.956853907383793e-06, "loss": 0.606, "step": 5906 }, { "epoch": 0.07062494769186624, "grad_norm": 1.5423173904418945, "learning_rate": 9.956828522853007e-06, "loss": 0.4913, "step": 5907 }, { "epoch": 0.07063690383672688, "grad_norm": 1.991693139076233, "learning_rate": 9.956803130889439e-06, "loss": 0.605, "step": 5908 }, { "epoch": 0.07064885998158754, "grad_norm": 2.5434532165527344, "learning_rate": 9.956777731493126e-06, "loss": 0.6182, "step": 5909 }, { "epoch": 0.07066081612644819, "grad_norm": 4.229970455169678, "learning_rate": 9.95675232466411e-06, "loss": 0.5941, "step": 5910 }, { "epoch": 0.07067277227130885, "grad_norm": 6.030850410461426, "learning_rate": 9.956726910402423e-06, "loss": 0.6728, "step": 5911 }, { "epoch": 0.07068472841616949, "grad_norm": 1.9648330211639404, "learning_rate": 9.956701488708109e-06, "loss": 0.6712, "step": 5912 }, { "epoch": 0.07069668456103013, "grad_norm": 2.391429901123047, "learning_rate": 9.9566760595812e-06, "loss": 0.7041, "step": 5913 }, { "epoch": 0.0707086407058908, "grad_norm": 3.02351450920105, "learning_rate": 9.95665062302174e-06, "loss": 0.5896, "step": 5914 }, { "epoch": 0.07072059685075144, "grad_norm": 2.3360960483551025, "learning_rate": 9.956625179029767e-06, "loss": 0.6376, "step": 5915 }, { "epoch": 0.0707325529956121, "grad_norm": 3.415583610534668, "learning_rate": 9.956599727605314e-06, "loss": 0.5043, "step": 5916 }, { "epoch": 0.07074450914047274, "grad_norm": 1.4009796380996704, "learning_rate": 9.956574268748422e-06, "loss": 0.5636, "step": 5917 }, { "epoch": 0.0707564652853334, "grad_norm": 4.261534214019775, "learning_rate": 9.95654880245913e-06, "loss": 0.606, "step": 5918 }, { "epoch": 0.07076842143019405, "grad_norm": 3.804436445236206, "learning_rate": 9.956523328737475e-06, "loss": 0.6902, "step": 5919 }, { "epoch": 0.0707803775750547, "grad_norm": 2.065753221511841, "learning_rate": 9.956497847583495e-06, "loss": 0.6171, "step": 5920 }, { "epoch": 0.07079233371991535, "grad_norm": 3.997894763946533, "learning_rate": 9.95647235899723e-06, "loss": 0.6504, "step": 5921 }, { "epoch": 0.07080428986477601, "grad_norm": 3.1136298179626465, "learning_rate": 9.956446862978718e-06, "loss": 0.587, "step": 5922 }, { "epoch": 0.07081624600963665, "grad_norm": 2.71962833404541, "learning_rate": 9.956421359527994e-06, "loss": 0.5442, "step": 5923 }, { "epoch": 0.07082820215449731, "grad_norm": 1.6622185707092285, "learning_rate": 9.956395848645101e-06, "loss": 0.588, "step": 5924 }, { "epoch": 0.07084015829935796, "grad_norm": 3.1071856021881104, "learning_rate": 9.956370330330074e-06, "loss": 0.5388, "step": 5925 }, { "epoch": 0.0708521144442186, "grad_norm": 3.2727279663085938, "learning_rate": 9.956344804582951e-06, "loss": 0.675, "step": 5926 }, { "epoch": 0.07086407058907926, "grad_norm": 6.739810943603516, "learning_rate": 9.956319271403773e-06, "loss": 0.6213, "step": 5927 }, { "epoch": 0.0708760267339399, "grad_norm": 3.7562434673309326, "learning_rate": 9.956293730792575e-06, "loss": 0.5793, "step": 5928 }, { "epoch": 0.07088798287880056, "grad_norm": 1.9314969778060913, "learning_rate": 9.956268182749399e-06, "loss": 0.7066, "step": 5929 }, { "epoch": 0.07089993902366121, "grad_norm": 1.8444806337356567, "learning_rate": 9.956242627274279e-06, "loss": 0.6068, "step": 5930 }, { "epoch": 0.07091189516852187, "grad_norm": 4.009878635406494, "learning_rate": 9.956217064367256e-06, "loss": 0.58, "step": 5931 }, { "epoch": 0.07092385131338251, "grad_norm": 1.860573649406433, "learning_rate": 9.956191494028368e-06, "loss": 0.6242, "step": 5932 }, { "epoch": 0.07093580745824317, "grad_norm": 2.558016777038574, "learning_rate": 9.956165916257654e-06, "loss": 0.6841, "step": 5933 }, { "epoch": 0.07094776360310381, "grad_norm": 2.184670925140381, "learning_rate": 9.95614033105515e-06, "loss": 0.61, "step": 5934 }, { "epoch": 0.07095971974796447, "grad_norm": 1.971778154373169, "learning_rate": 9.956114738420898e-06, "loss": 0.5785, "step": 5935 }, { "epoch": 0.07097167589282512, "grad_norm": 1.7590941190719604, "learning_rate": 9.956089138354934e-06, "loss": 0.6679, "step": 5936 }, { "epoch": 0.07098363203768576, "grad_norm": 4.725027561187744, "learning_rate": 9.956063530857296e-06, "loss": 0.6842, "step": 5937 }, { "epoch": 0.07099558818254642, "grad_norm": 3.2802233695983887, "learning_rate": 9.956037915928022e-06, "loss": 0.6548, "step": 5938 }, { "epoch": 0.07100754432740707, "grad_norm": 2.508965253829956, "learning_rate": 9.956012293567154e-06, "loss": 0.7195, "step": 5939 }, { "epoch": 0.07101950047226772, "grad_norm": 2.528512954711914, "learning_rate": 9.955986663774726e-06, "loss": 0.5669, "step": 5940 }, { "epoch": 0.07103145661712837, "grad_norm": 5.633007526397705, "learning_rate": 9.95596102655078e-06, "loss": 0.6506, "step": 5941 }, { "epoch": 0.07104341276198903, "grad_norm": 4.470312118530273, "learning_rate": 9.955935381895351e-06, "loss": 0.6825, "step": 5942 }, { "epoch": 0.07105536890684967, "grad_norm": 5.210360527038574, "learning_rate": 9.955909729808482e-06, "loss": 0.578, "step": 5943 }, { "epoch": 0.07106732505171033, "grad_norm": 1.7935101985931396, "learning_rate": 9.955884070290206e-06, "loss": 0.611, "step": 5944 }, { "epoch": 0.07107928119657098, "grad_norm": 1.7563998699188232, "learning_rate": 9.955858403340566e-06, "loss": 0.7274, "step": 5945 }, { "epoch": 0.07109123734143163, "grad_norm": 1.3981050252914429, "learning_rate": 9.955832728959597e-06, "loss": 0.5086, "step": 5946 }, { "epoch": 0.07110319348629228, "grad_norm": 3.8572208881378174, "learning_rate": 9.95580704714734e-06, "loss": 0.7127, "step": 5947 }, { "epoch": 0.07111514963115294, "grad_norm": 3.2434377670288086, "learning_rate": 9.955781357903833e-06, "loss": 0.6105, "step": 5948 }, { "epoch": 0.07112710577601358, "grad_norm": 2.0232391357421875, "learning_rate": 9.955755661229114e-06, "loss": 0.5777, "step": 5949 }, { "epoch": 0.07113906192087423, "grad_norm": 1.6305015087127686, "learning_rate": 9.955729957123222e-06, "loss": 0.6952, "step": 5950 }, { "epoch": 0.07115101806573489, "grad_norm": 2.8220770359039307, "learning_rate": 9.955704245586195e-06, "loss": 0.6534, "step": 5951 }, { "epoch": 0.07116297421059553, "grad_norm": 3.655113697052002, "learning_rate": 9.955678526618073e-06, "loss": 0.5866, "step": 5952 }, { "epoch": 0.07117493035545619, "grad_norm": 3.834334135055542, "learning_rate": 9.955652800218892e-06, "loss": 0.6538, "step": 5953 }, { "epoch": 0.07118688650031683, "grad_norm": 2.587531089782715, "learning_rate": 9.955627066388693e-06, "loss": 0.6776, "step": 5954 }, { "epoch": 0.07119884264517749, "grad_norm": 1.4481250047683716, "learning_rate": 9.955601325127511e-06, "loss": 0.5457, "step": 5955 }, { "epoch": 0.07121079879003814, "grad_norm": 1.8621901273727417, "learning_rate": 9.95557557643539e-06, "loss": 0.6948, "step": 5956 }, { "epoch": 0.0712227549348988, "grad_norm": 2.90915584564209, "learning_rate": 9.955549820312365e-06, "loss": 0.6298, "step": 5957 }, { "epoch": 0.07123471107975944, "grad_norm": 1.5869090557098389, "learning_rate": 9.955524056758477e-06, "loss": 0.6466, "step": 5958 }, { "epoch": 0.0712466672246201, "grad_norm": 4.770732879638672, "learning_rate": 9.95549828577376e-06, "loss": 0.6132, "step": 5959 }, { "epoch": 0.07125862336948074, "grad_norm": 2.82401704788208, "learning_rate": 9.955472507358258e-06, "loss": 0.692, "step": 5960 }, { "epoch": 0.07127057951434139, "grad_norm": 1.801506757736206, "learning_rate": 9.955446721512006e-06, "loss": 0.6253, "step": 5961 }, { "epoch": 0.07128253565920205, "grad_norm": 2.351454496383667, "learning_rate": 9.955420928235044e-06, "loss": 0.7198, "step": 5962 }, { "epoch": 0.07129449180406269, "grad_norm": 2.8104639053344727, "learning_rate": 9.955395127527411e-06, "loss": 0.6712, "step": 5963 }, { "epoch": 0.07130644794892335, "grad_norm": 1.9448996782302856, "learning_rate": 9.955369319389148e-06, "loss": 0.6175, "step": 5964 }, { "epoch": 0.071318404093784, "grad_norm": 4.179165840148926, "learning_rate": 9.955343503820288e-06, "loss": 0.6364, "step": 5965 }, { "epoch": 0.07133036023864466, "grad_norm": 7.822927474975586, "learning_rate": 9.955317680820874e-06, "loss": 0.6319, "step": 5966 }, { "epoch": 0.0713423163835053, "grad_norm": 2.2331278324127197, "learning_rate": 9.955291850390943e-06, "loss": 0.6415, "step": 5967 }, { "epoch": 0.07135427252836596, "grad_norm": 1.615599274635315, "learning_rate": 9.955266012530534e-06, "loss": 0.5317, "step": 5968 }, { "epoch": 0.0713662286732266, "grad_norm": 1.750326156616211, "learning_rate": 9.955240167239686e-06, "loss": 0.6295, "step": 5969 }, { "epoch": 0.07137818481808726, "grad_norm": 1.8817005157470703, "learning_rate": 9.955214314518438e-06, "loss": 0.7138, "step": 5970 }, { "epoch": 0.0713901409629479, "grad_norm": 2.094482660293579, "learning_rate": 9.95518845436683e-06, "loss": 0.6878, "step": 5971 }, { "epoch": 0.07140209710780855, "grad_norm": 1.8286813497543335, "learning_rate": 9.955162586784898e-06, "loss": 0.5553, "step": 5972 }, { "epoch": 0.07141405325266921, "grad_norm": 2.2268404960632324, "learning_rate": 9.955136711772681e-06, "loss": 0.6696, "step": 5973 }, { "epoch": 0.07142600939752985, "grad_norm": 2.375042676925659, "learning_rate": 9.955110829330221e-06, "loss": 0.6213, "step": 5974 }, { "epoch": 0.07143796554239051, "grad_norm": 3.303842782974243, "learning_rate": 9.955084939457554e-06, "loss": 0.6279, "step": 5975 }, { "epoch": 0.07144992168725116, "grad_norm": 2.651611089706421, "learning_rate": 9.955059042154719e-06, "loss": 0.6389, "step": 5976 }, { "epoch": 0.07146187783211182, "grad_norm": 1.9468960762023926, "learning_rate": 9.955033137421755e-06, "loss": 0.6056, "step": 5977 }, { "epoch": 0.07147383397697246, "grad_norm": 2.189760684967041, "learning_rate": 9.955007225258701e-06, "loss": 0.6823, "step": 5978 }, { "epoch": 0.07148579012183312, "grad_norm": 2.884025812149048, "learning_rate": 9.954981305665598e-06, "loss": 0.8017, "step": 5979 }, { "epoch": 0.07149774626669377, "grad_norm": 4.968649864196777, "learning_rate": 9.954955378642481e-06, "loss": 0.6236, "step": 5980 }, { "epoch": 0.07150970241155442, "grad_norm": 1.9471535682678223, "learning_rate": 9.954929444189391e-06, "loss": 0.6119, "step": 5981 }, { "epoch": 0.07152165855641507, "grad_norm": 1.469705581665039, "learning_rate": 9.954903502306367e-06, "loss": 0.5974, "step": 5982 }, { "epoch": 0.07153361470127573, "grad_norm": 1.5871913433074951, "learning_rate": 9.95487755299345e-06, "loss": 0.6828, "step": 5983 }, { "epoch": 0.07154557084613637, "grad_norm": 2.3440866470336914, "learning_rate": 9.954851596250674e-06, "loss": 0.7626, "step": 5984 }, { "epoch": 0.07155752699099702, "grad_norm": 3.0166399478912354, "learning_rate": 9.95482563207808e-06, "loss": 0.5643, "step": 5985 }, { "epoch": 0.07156948313585768, "grad_norm": 3.6805434226989746, "learning_rate": 9.954799660475708e-06, "loss": 0.7139, "step": 5986 }, { "epoch": 0.07158143928071832, "grad_norm": 1.7438099384307861, "learning_rate": 9.954773681443598e-06, "loss": 0.6204, "step": 5987 }, { "epoch": 0.07159339542557898, "grad_norm": 4.073526382446289, "learning_rate": 9.954747694981783e-06, "loss": 0.6456, "step": 5988 }, { "epoch": 0.07160535157043962, "grad_norm": 1.9720553159713745, "learning_rate": 9.954721701090311e-06, "loss": 0.6374, "step": 5989 }, { "epoch": 0.07161730771530028, "grad_norm": 5.425780296325684, "learning_rate": 9.954695699769213e-06, "loss": 0.7029, "step": 5990 }, { "epoch": 0.07162926386016093, "grad_norm": 2.6930923461914062, "learning_rate": 9.954669691018533e-06, "loss": 0.8313, "step": 5991 }, { "epoch": 0.07164122000502159, "grad_norm": 5.532368183135986, "learning_rate": 9.954643674838307e-06, "loss": 0.5745, "step": 5992 }, { "epoch": 0.07165317614988223, "grad_norm": 3.8689253330230713, "learning_rate": 9.954617651228575e-06, "loss": 0.5451, "step": 5993 }, { "epoch": 0.07166513229474289, "grad_norm": 5.332041263580322, "learning_rate": 9.954591620189377e-06, "loss": 0.5853, "step": 5994 }, { "epoch": 0.07167708843960353, "grad_norm": 7.103998184204102, "learning_rate": 9.954565581720751e-06, "loss": 0.6272, "step": 5995 }, { "epoch": 0.07168904458446418, "grad_norm": 2.6539721488952637, "learning_rate": 9.954539535822737e-06, "loss": 0.6293, "step": 5996 }, { "epoch": 0.07170100072932484, "grad_norm": 3.056811571121216, "learning_rate": 9.95451348249537e-06, "loss": 0.7277, "step": 5997 }, { "epoch": 0.07171295687418548, "grad_norm": 2.676269769668579, "learning_rate": 9.954487421738698e-06, "loss": 0.7331, "step": 5998 }, { "epoch": 0.07172491301904614, "grad_norm": 2.5945885181427, "learning_rate": 9.954461353552751e-06, "loss": 0.6661, "step": 5999 }, { "epoch": 0.07173686916390679, "grad_norm": 3.7230873107910156, "learning_rate": 9.954435277937573e-06, "loss": 0.6368, "step": 6000 }, { "epoch": 0.07174882530876744, "grad_norm": 3.139536142349243, "learning_rate": 9.9544091948932e-06, "loss": 0.8042, "step": 6001 }, { "epoch": 0.07176078145362809, "grad_norm": 4.222895622253418, "learning_rate": 9.954383104419673e-06, "loss": 0.5962, "step": 6002 }, { "epoch": 0.07177273759848875, "grad_norm": 2.996396541595459, "learning_rate": 9.954357006517032e-06, "loss": 0.578, "step": 6003 }, { "epoch": 0.07178469374334939, "grad_norm": 1.9427566528320312, "learning_rate": 9.954330901185316e-06, "loss": 0.5524, "step": 6004 }, { "epoch": 0.07179664988821005, "grad_norm": 7.2847418785095215, "learning_rate": 9.95430478842456e-06, "loss": 0.5684, "step": 6005 }, { "epoch": 0.0718086060330707, "grad_norm": 1.953577995300293, "learning_rate": 9.95427866823481e-06, "loss": 0.6438, "step": 6006 }, { "epoch": 0.07182056217793135, "grad_norm": 2.9293789863586426, "learning_rate": 9.9542525406161e-06, "loss": 0.6854, "step": 6007 }, { "epoch": 0.071832518322792, "grad_norm": 1.9683877229690552, "learning_rate": 9.95422640556847e-06, "loss": 0.628, "step": 6008 }, { "epoch": 0.07184447446765264, "grad_norm": 2.280984878540039, "learning_rate": 9.954200263091959e-06, "loss": 0.7185, "step": 6009 }, { "epoch": 0.0718564306125133, "grad_norm": 4.260857582092285, "learning_rate": 9.95417411318661e-06, "loss": 0.6505, "step": 6010 }, { "epoch": 0.07186838675737395, "grad_norm": 1.8523144721984863, "learning_rate": 9.954147955852458e-06, "loss": 0.6964, "step": 6011 }, { "epoch": 0.0718803429022346, "grad_norm": 2.1204378604888916, "learning_rate": 9.954121791089544e-06, "loss": 0.5844, "step": 6012 }, { "epoch": 0.07189229904709525, "grad_norm": 3.0726234912872314, "learning_rate": 9.954095618897906e-06, "loss": 0.5677, "step": 6013 }, { "epoch": 0.07190425519195591, "grad_norm": 1.8753775358200073, "learning_rate": 9.954069439277582e-06, "loss": 0.5846, "step": 6014 }, { "epoch": 0.07191621133681655, "grad_norm": 1.6468397378921509, "learning_rate": 9.954043252228616e-06, "loss": 0.6148, "step": 6015 }, { "epoch": 0.07192816748167721, "grad_norm": 3.067070722579956, "learning_rate": 9.954017057751045e-06, "loss": 0.6598, "step": 6016 }, { "epoch": 0.07194012362653786, "grad_norm": 4.218221664428711, "learning_rate": 9.953990855844906e-06, "loss": 0.6034, "step": 6017 }, { "epoch": 0.07195207977139852, "grad_norm": 2.621734380722046, "learning_rate": 9.953964646510241e-06, "loss": 0.5782, "step": 6018 }, { "epoch": 0.07196403591625916, "grad_norm": 1.9424452781677246, "learning_rate": 9.953938429747087e-06, "loss": 0.6912, "step": 6019 }, { "epoch": 0.0719759920611198, "grad_norm": 1.7524224519729614, "learning_rate": 9.953912205555487e-06, "loss": 0.5273, "step": 6020 }, { "epoch": 0.07198794820598046, "grad_norm": 5.101604461669922, "learning_rate": 9.953885973935478e-06, "loss": 0.6197, "step": 6021 }, { "epoch": 0.07199990435084111, "grad_norm": 10.588607788085938, "learning_rate": 9.953859734887097e-06, "loss": 0.6236, "step": 6022 }, { "epoch": 0.07201186049570177, "grad_norm": 5.863710880279541, "learning_rate": 9.953833488410386e-06, "loss": 0.6316, "step": 6023 }, { "epoch": 0.07202381664056241, "grad_norm": 2.4667325019836426, "learning_rate": 9.953807234505387e-06, "loss": 0.7282, "step": 6024 }, { "epoch": 0.07203577278542307, "grad_norm": 3.6089236736297607, "learning_rate": 9.953780973172135e-06, "loss": 0.6971, "step": 6025 }, { "epoch": 0.07204772893028372, "grad_norm": 2.9184489250183105, "learning_rate": 9.95375470441067e-06, "loss": 0.6835, "step": 6026 }, { "epoch": 0.07205968507514438, "grad_norm": 2.212839365005493, "learning_rate": 9.953728428221033e-06, "loss": 0.7205, "step": 6027 }, { "epoch": 0.07207164122000502, "grad_norm": 9.284270286560059, "learning_rate": 9.95370214460326e-06, "loss": 0.6868, "step": 6028 }, { "epoch": 0.07208359736486568, "grad_norm": 2.3596205711364746, "learning_rate": 9.953675853557396e-06, "loss": 0.6322, "step": 6029 }, { "epoch": 0.07209555350972632, "grad_norm": 2.7576167583465576, "learning_rate": 9.953649555083477e-06, "loss": 0.7543, "step": 6030 }, { "epoch": 0.07210750965458697, "grad_norm": 2.0507092475891113, "learning_rate": 9.953623249181544e-06, "loss": 0.7247, "step": 6031 }, { "epoch": 0.07211946579944763, "grad_norm": 2.408937454223633, "learning_rate": 9.953596935851633e-06, "loss": 0.6649, "step": 6032 }, { "epoch": 0.07213142194430827, "grad_norm": 3.6923763751983643, "learning_rate": 9.953570615093787e-06, "loss": 0.6296, "step": 6033 }, { "epoch": 0.07214337808916893, "grad_norm": 1.8173434734344482, "learning_rate": 9.953544286908043e-06, "loss": 0.6544, "step": 6034 }, { "epoch": 0.07215533423402958, "grad_norm": 2.4804375171661377, "learning_rate": 9.953517951294445e-06, "loss": 0.6229, "step": 6035 }, { "epoch": 0.07216729037889023, "grad_norm": 3.5622894763946533, "learning_rate": 9.953491608253025e-06, "loss": 0.7024, "step": 6036 }, { "epoch": 0.07217924652375088, "grad_norm": 7.471817493438721, "learning_rate": 9.95346525778383e-06, "loss": 0.5761, "step": 6037 }, { "epoch": 0.07219120266861154, "grad_norm": 1.7691289186477661, "learning_rate": 9.953438899886894e-06, "loss": 0.5503, "step": 6038 }, { "epoch": 0.07220315881347218, "grad_norm": 8.314308166503906, "learning_rate": 9.95341253456226e-06, "loss": 0.6118, "step": 6039 }, { "epoch": 0.07221511495833284, "grad_norm": 2.5575919151306152, "learning_rate": 9.953386161809966e-06, "loss": 0.661, "step": 6040 }, { "epoch": 0.07222707110319349, "grad_norm": 3.1984338760375977, "learning_rate": 9.953359781630052e-06, "loss": 0.5612, "step": 6041 }, { "epoch": 0.07223902724805414, "grad_norm": 2.138012647628784, "learning_rate": 9.953333394022558e-06, "loss": 0.6041, "step": 6042 }, { "epoch": 0.07225098339291479, "grad_norm": 3.134141206741333, "learning_rate": 9.953306998987522e-06, "loss": 0.6899, "step": 6043 }, { "epoch": 0.07226293953777543, "grad_norm": 3.174820899963379, "learning_rate": 9.953280596524983e-06, "loss": 0.6776, "step": 6044 }, { "epoch": 0.07227489568263609, "grad_norm": 2.47835111618042, "learning_rate": 9.953254186634985e-06, "loss": 0.5957, "step": 6045 }, { "epoch": 0.07228685182749674, "grad_norm": 2.604290008544922, "learning_rate": 9.953227769317565e-06, "loss": 0.7377, "step": 6046 }, { "epoch": 0.0722988079723574, "grad_norm": 7.309451580047607, "learning_rate": 9.95320134457276e-06, "loss": 0.6475, "step": 6047 }, { "epoch": 0.07231076411721804, "grad_norm": 4.411806106567383, "learning_rate": 9.953174912400611e-06, "loss": 0.6459, "step": 6048 }, { "epoch": 0.0723227202620787, "grad_norm": 4.965823650360107, "learning_rate": 9.953148472801161e-06, "loss": 0.6465, "step": 6049 }, { "epoch": 0.07233467640693934, "grad_norm": 2.3492326736450195, "learning_rate": 9.953122025774448e-06, "loss": 0.7669, "step": 6050 }, { "epoch": 0.0723466325518, "grad_norm": 2.687840700149536, "learning_rate": 9.95309557132051e-06, "loss": 0.6026, "step": 6051 }, { "epoch": 0.07235858869666065, "grad_norm": 2.0951216220855713, "learning_rate": 9.953069109439386e-06, "loss": 0.7141, "step": 6052 }, { "epoch": 0.0723705448415213, "grad_norm": 2.7891504764556885, "learning_rate": 9.953042640131119e-06, "loss": 0.5229, "step": 6053 }, { "epoch": 0.07238250098638195, "grad_norm": 1.4767388105392456, "learning_rate": 9.953016163395747e-06, "loss": 0.6388, "step": 6054 }, { "epoch": 0.0723944571312426, "grad_norm": 5.081317901611328, "learning_rate": 9.952989679233309e-06, "loss": 0.8334, "step": 6055 }, { "epoch": 0.07240641327610325, "grad_norm": 4.6031975746154785, "learning_rate": 9.952963187643846e-06, "loss": 0.6326, "step": 6056 }, { "epoch": 0.0724183694209639, "grad_norm": 1.9609637260437012, "learning_rate": 9.952936688627396e-06, "loss": 0.6873, "step": 6057 }, { "epoch": 0.07243032556582456, "grad_norm": 2.0075523853302, "learning_rate": 9.952910182184002e-06, "loss": 0.7406, "step": 6058 }, { "epoch": 0.0724422817106852, "grad_norm": 2.3032493591308594, "learning_rate": 9.9528836683137e-06, "loss": 0.5839, "step": 6059 }, { "epoch": 0.07245423785554586, "grad_norm": 2.4885385036468506, "learning_rate": 9.95285714701653e-06, "loss": 0.6232, "step": 6060 }, { "epoch": 0.0724661940004065, "grad_norm": 2.7037813663482666, "learning_rate": 9.952830618292534e-06, "loss": 0.6755, "step": 6061 }, { "epoch": 0.07247815014526716, "grad_norm": 2.4103572368621826, "learning_rate": 9.952804082141752e-06, "loss": 0.566, "step": 6062 }, { "epoch": 0.07249010629012781, "grad_norm": 2.0285840034484863, "learning_rate": 9.952777538564222e-06, "loss": 0.6386, "step": 6063 }, { "epoch": 0.07250206243498847, "grad_norm": 5.5765156745910645, "learning_rate": 9.952750987559984e-06, "loss": 0.69, "step": 6064 }, { "epoch": 0.07251401857984911, "grad_norm": 2.3840293884277344, "learning_rate": 9.95272442912908e-06, "loss": 0.7008, "step": 6065 }, { "epoch": 0.07252597472470977, "grad_norm": 2.9226455688476562, "learning_rate": 9.952697863271547e-06, "loss": 0.6625, "step": 6066 }, { "epoch": 0.07253793086957042, "grad_norm": 2.483121633529663, "learning_rate": 9.952671289987426e-06, "loss": 0.6479, "step": 6067 }, { "epoch": 0.07254988701443106, "grad_norm": 2.2486419677734375, "learning_rate": 9.952644709276756e-06, "loss": 0.657, "step": 6068 }, { "epoch": 0.07256184315929172, "grad_norm": 1.410570502281189, "learning_rate": 9.952618121139579e-06, "loss": 0.5803, "step": 6069 }, { "epoch": 0.07257379930415236, "grad_norm": 2.0059165954589844, "learning_rate": 9.952591525575931e-06, "loss": 0.6443, "step": 6070 }, { "epoch": 0.07258575544901302, "grad_norm": 1.8311728239059448, "learning_rate": 9.952564922585857e-06, "loss": 0.5342, "step": 6071 }, { "epoch": 0.07259771159387367, "grad_norm": 2.551439046859741, "learning_rate": 9.952538312169392e-06, "loss": 0.6001, "step": 6072 }, { "epoch": 0.07260966773873433, "grad_norm": 2.946568250656128, "learning_rate": 9.952511694326581e-06, "loss": 0.6281, "step": 6073 }, { "epoch": 0.07262162388359497, "grad_norm": 3.1653268337249756, "learning_rate": 9.95248506905746e-06, "loss": 0.5554, "step": 6074 }, { "epoch": 0.07263358002845563, "grad_norm": 2.993626117706299, "learning_rate": 9.95245843636207e-06, "loss": 0.6287, "step": 6075 }, { "epoch": 0.07264553617331627, "grad_norm": 5.579105854034424, "learning_rate": 9.952431796240452e-06, "loss": 0.6347, "step": 6076 }, { "epoch": 0.07265749231817693, "grad_norm": 9.710591316223145, "learning_rate": 9.952405148692643e-06, "loss": 0.5766, "step": 6077 }, { "epoch": 0.07266944846303758, "grad_norm": 1.7538620233535767, "learning_rate": 9.952378493718686e-06, "loss": 0.5912, "step": 6078 }, { "epoch": 0.07268140460789822, "grad_norm": 2.4231717586517334, "learning_rate": 9.952351831318618e-06, "loss": 0.5951, "step": 6079 }, { "epoch": 0.07269336075275888, "grad_norm": 2.0763278007507324, "learning_rate": 9.952325161492484e-06, "loss": 0.6302, "step": 6080 }, { "epoch": 0.07270531689761953, "grad_norm": 2.4783735275268555, "learning_rate": 9.952298484240319e-06, "loss": 0.6698, "step": 6081 }, { "epoch": 0.07271727304248018, "grad_norm": 3.862812042236328, "learning_rate": 9.952271799562165e-06, "loss": 0.6772, "step": 6082 }, { "epoch": 0.07272922918734083, "grad_norm": 11.397578239440918, "learning_rate": 9.95224510745806e-06, "loss": 0.6231, "step": 6083 }, { "epoch": 0.07274118533220149, "grad_norm": 2.1568422317504883, "learning_rate": 9.952218407928049e-06, "loss": 0.6373, "step": 6084 }, { "epoch": 0.07275314147706213, "grad_norm": 1.7685620784759521, "learning_rate": 9.952191700972166e-06, "loss": 0.6765, "step": 6085 }, { "epoch": 0.07276509762192279, "grad_norm": 2.4237687587738037, "learning_rate": 9.952164986590457e-06, "loss": 0.6277, "step": 6086 }, { "epoch": 0.07277705376678344, "grad_norm": 6.972693920135498, "learning_rate": 9.952138264782958e-06, "loss": 0.654, "step": 6087 }, { "epoch": 0.0727890099116441, "grad_norm": 3.5503618717193604, "learning_rate": 9.952111535549709e-06, "loss": 0.6717, "step": 6088 }, { "epoch": 0.07280096605650474, "grad_norm": 7.495293617248535, "learning_rate": 9.952084798890753e-06, "loss": 0.7308, "step": 6089 }, { "epoch": 0.07281292220136538, "grad_norm": 3.2115538120269775, "learning_rate": 9.952058054806126e-06, "loss": 0.5892, "step": 6090 }, { "epoch": 0.07282487834622604, "grad_norm": 3.1999363899230957, "learning_rate": 9.95203130329587e-06, "loss": 0.5581, "step": 6091 }, { "epoch": 0.07283683449108669, "grad_norm": 6.071566581726074, "learning_rate": 9.952004544360028e-06, "loss": 0.6543, "step": 6092 }, { "epoch": 0.07284879063594735, "grad_norm": 2.5053844451904297, "learning_rate": 9.951977777998637e-06, "loss": 0.6313, "step": 6093 }, { "epoch": 0.07286074678080799, "grad_norm": 1.7652267217636108, "learning_rate": 9.951951004211736e-06, "loss": 0.6724, "step": 6094 }, { "epoch": 0.07287270292566865, "grad_norm": 9.117752075195312, "learning_rate": 9.951924222999368e-06, "loss": 0.6921, "step": 6095 }, { "epoch": 0.0728846590705293, "grad_norm": 2.0708417892456055, "learning_rate": 9.951897434361572e-06, "loss": 0.6279, "step": 6096 }, { "epoch": 0.07289661521538995, "grad_norm": 3.7194902896881104, "learning_rate": 9.95187063829839e-06, "loss": 0.6051, "step": 6097 }, { "epoch": 0.0729085713602506, "grad_norm": 2.77463960647583, "learning_rate": 9.951843834809856e-06, "loss": 0.5757, "step": 6098 }, { "epoch": 0.07292052750511126, "grad_norm": 1.7350729703903198, "learning_rate": 9.951817023896017e-06, "loss": 0.5694, "step": 6099 }, { "epoch": 0.0729324836499719, "grad_norm": 2.1359033584594727, "learning_rate": 9.951790205556913e-06, "loss": 0.6417, "step": 6100 }, { "epoch": 0.07294443979483256, "grad_norm": 1.516922116279602, "learning_rate": 9.951763379792578e-06, "loss": 0.6798, "step": 6101 }, { "epoch": 0.0729563959396932, "grad_norm": 2.177995443344116, "learning_rate": 9.95173654660306e-06, "loss": 0.6436, "step": 6102 }, { "epoch": 0.07296835208455385, "grad_norm": 2.044612407684326, "learning_rate": 9.951709705988393e-06, "loss": 0.6718, "step": 6103 }, { "epoch": 0.07298030822941451, "grad_norm": 2.4512205123901367, "learning_rate": 9.95168285794862e-06, "loss": 0.6665, "step": 6104 }, { "epoch": 0.07299226437427515, "grad_norm": 2.1021640300750732, "learning_rate": 9.95165600248378e-06, "loss": 0.6339, "step": 6105 }, { "epoch": 0.07300422051913581, "grad_norm": 2.1047089099884033, "learning_rate": 9.951629139593915e-06, "loss": 0.6142, "step": 6106 }, { "epoch": 0.07301617666399646, "grad_norm": 2.528425931930542, "learning_rate": 9.951602269279065e-06, "loss": 0.5552, "step": 6107 }, { "epoch": 0.07302813280885712, "grad_norm": 2.0171825885772705, "learning_rate": 9.95157539153927e-06, "loss": 0.7463, "step": 6108 }, { "epoch": 0.07304008895371776, "grad_norm": 2.9820148944854736, "learning_rate": 9.95154850637457e-06, "loss": 0.6485, "step": 6109 }, { "epoch": 0.07305204509857842, "grad_norm": 1.7486261129379272, "learning_rate": 9.951521613785005e-06, "loss": 0.5402, "step": 6110 }, { "epoch": 0.07306400124343906, "grad_norm": 2.289189577102661, "learning_rate": 9.951494713770617e-06, "loss": 0.6574, "step": 6111 }, { "epoch": 0.07307595738829972, "grad_norm": 3.9245903491973877, "learning_rate": 9.951467806331443e-06, "loss": 0.5611, "step": 6112 }, { "epoch": 0.07308791353316037, "grad_norm": 1.697652816772461, "learning_rate": 9.951440891467527e-06, "loss": 0.6914, "step": 6113 }, { "epoch": 0.07309986967802101, "grad_norm": 1.8944730758666992, "learning_rate": 9.951413969178906e-06, "loss": 0.6583, "step": 6114 }, { "epoch": 0.07311182582288167, "grad_norm": 1.8084542751312256, "learning_rate": 9.951387039465625e-06, "loss": 0.5774, "step": 6115 }, { "epoch": 0.07312378196774232, "grad_norm": 1.897133469581604, "learning_rate": 9.95136010232772e-06, "loss": 0.6878, "step": 6116 }, { "epoch": 0.07313573811260297, "grad_norm": 2.1765496730804443, "learning_rate": 9.951333157765232e-06, "loss": 0.7308, "step": 6117 }, { "epoch": 0.07314769425746362, "grad_norm": 2.186896324157715, "learning_rate": 9.951306205778203e-06, "loss": 0.655, "step": 6118 }, { "epoch": 0.07315965040232428, "grad_norm": 2.77543568611145, "learning_rate": 9.951279246366675e-06, "loss": 0.6776, "step": 6119 }, { "epoch": 0.07317160654718492, "grad_norm": 1.6854283809661865, "learning_rate": 9.951252279530682e-06, "loss": 0.6548, "step": 6120 }, { "epoch": 0.07318356269204558, "grad_norm": 1.3327885866165161, "learning_rate": 9.951225305270272e-06, "loss": 0.7097, "step": 6121 }, { "epoch": 0.07319551883690623, "grad_norm": 1.685437560081482, "learning_rate": 9.95119832358548e-06, "loss": 0.5596, "step": 6122 }, { "epoch": 0.07320747498176688, "grad_norm": 5.6591339111328125, "learning_rate": 9.95117133447635e-06, "loss": 0.5295, "step": 6123 }, { "epoch": 0.07321943112662753, "grad_norm": 2.104684591293335, "learning_rate": 9.95114433794292e-06, "loss": 0.6706, "step": 6124 }, { "epoch": 0.07323138727148819, "grad_norm": 1.8727290630340576, "learning_rate": 9.951117333985232e-06, "loss": 0.5385, "step": 6125 }, { "epoch": 0.07324334341634883, "grad_norm": 1.7001979351043701, "learning_rate": 9.951090322603324e-06, "loss": 0.6078, "step": 6126 }, { "epoch": 0.07325529956120948, "grad_norm": 2.187790632247925, "learning_rate": 9.95106330379724e-06, "loss": 0.7567, "step": 6127 }, { "epoch": 0.07326725570607014, "grad_norm": 2.6789705753326416, "learning_rate": 9.95103627756702e-06, "loss": 0.5273, "step": 6128 }, { "epoch": 0.07327921185093078, "grad_norm": 4.187442302703857, "learning_rate": 9.951009243912703e-06, "loss": 0.7036, "step": 6129 }, { "epoch": 0.07329116799579144, "grad_norm": 2.1390841007232666, "learning_rate": 9.950982202834329e-06, "loss": 0.6133, "step": 6130 }, { "epoch": 0.07330312414065208, "grad_norm": 2.9765985012054443, "learning_rate": 9.950955154331941e-06, "loss": 0.6698, "step": 6131 }, { "epoch": 0.07331508028551274, "grad_norm": 3.49654221534729, "learning_rate": 9.950928098405577e-06, "loss": 0.6352, "step": 6132 }, { "epoch": 0.07332703643037339, "grad_norm": 3.5943453311920166, "learning_rate": 9.950901035055278e-06, "loss": 0.646, "step": 6133 }, { "epoch": 0.07333899257523405, "grad_norm": 2.2129809856414795, "learning_rate": 9.950873964281087e-06, "loss": 0.7213, "step": 6134 }, { "epoch": 0.07335094872009469, "grad_norm": 2.176453113555908, "learning_rate": 9.950846886083044e-06, "loss": 0.632, "step": 6135 }, { "epoch": 0.07336290486495535, "grad_norm": 1.7904096841812134, "learning_rate": 9.950819800461185e-06, "loss": 0.7011, "step": 6136 }, { "epoch": 0.073374861009816, "grad_norm": 1.8367819786071777, "learning_rate": 9.950792707415556e-06, "loss": 0.592, "step": 6137 }, { "epoch": 0.07338681715467664, "grad_norm": 2.827770709991455, "learning_rate": 9.950765606946196e-06, "loss": 0.6273, "step": 6138 }, { "epoch": 0.0733987732995373, "grad_norm": 1.5729694366455078, "learning_rate": 9.950738499053146e-06, "loss": 0.5135, "step": 6139 }, { "epoch": 0.07341072944439794, "grad_norm": 2.4007999897003174, "learning_rate": 9.950711383736445e-06, "loss": 0.5536, "step": 6140 }, { "epoch": 0.0734226855892586, "grad_norm": 1.8474509716033936, "learning_rate": 9.950684260996134e-06, "loss": 0.6799, "step": 6141 }, { "epoch": 0.07343464173411925, "grad_norm": 2.470468521118164, "learning_rate": 9.950657130832255e-06, "loss": 0.6351, "step": 6142 }, { "epoch": 0.0734465978789799, "grad_norm": 3.6717710494995117, "learning_rate": 9.950629993244847e-06, "loss": 0.6527, "step": 6143 }, { "epoch": 0.07345855402384055, "grad_norm": 1.6730948686599731, "learning_rate": 9.950602848233955e-06, "loss": 0.5807, "step": 6144 }, { "epoch": 0.07347051016870121, "grad_norm": 1.9708329439163208, "learning_rate": 9.950575695799613e-06, "loss": 0.6586, "step": 6145 }, { "epoch": 0.07348246631356185, "grad_norm": 1.7116665840148926, "learning_rate": 9.950548535941866e-06, "loss": 0.7564, "step": 6146 }, { "epoch": 0.07349442245842251, "grad_norm": 5.4243245124816895, "learning_rate": 9.950521368660755e-06, "loss": 0.5933, "step": 6147 }, { "epoch": 0.07350637860328316, "grad_norm": 2.922591209411621, "learning_rate": 9.950494193956319e-06, "loss": 0.6039, "step": 6148 }, { "epoch": 0.07351833474814382, "grad_norm": 2.3394970893859863, "learning_rate": 9.950467011828599e-06, "loss": 0.6616, "step": 6149 }, { "epoch": 0.07353029089300446, "grad_norm": 4.8552422523498535, "learning_rate": 9.950439822277637e-06, "loss": 0.6034, "step": 6150 }, { "epoch": 0.0735422470378651, "grad_norm": 3.829866409301758, "learning_rate": 9.950412625303472e-06, "loss": 0.6329, "step": 6151 }, { "epoch": 0.07355420318272576, "grad_norm": 2.0475502014160156, "learning_rate": 9.950385420906146e-06, "loss": 0.5804, "step": 6152 }, { "epoch": 0.07356615932758641, "grad_norm": 7.188986301422119, "learning_rate": 9.9503582090857e-06, "loss": 0.6287, "step": 6153 }, { "epoch": 0.07357811547244707, "grad_norm": 2.553091049194336, "learning_rate": 9.950330989842173e-06, "loss": 0.612, "step": 6154 }, { "epoch": 0.07359007161730771, "grad_norm": 2.1444034576416016, "learning_rate": 9.950303763175606e-06, "loss": 0.6756, "step": 6155 }, { "epoch": 0.07360202776216837, "grad_norm": 2.5183029174804688, "learning_rate": 9.950276529086045e-06, "loss": 0.6503, "step": 6156 }, { "epoch": 0.07361398390702902, "grad_norm": 2.6800620555877686, "learning_rate": 9.950249287573524e-06, "loss": 0.5131, "step": 6157 }, { "epoch": 0.07362594005188967, "grad_norm": 1.908993124961853, "learning_rate": 9.950222038638086e-06, "loss": 0.6827, "step": 6158 }, { "epoch": 0.07363789619675032, "grad_norm": 1.7097705602645874, "learning_rate": 9.950194782279774e-06, "loss": 0.5731, "step": 6159 }, { "epoch": 0.07364985234161098, "grad_norm": 1.9809391498565674, "learning_rate": 9.950167518498626e-06, "loss": 0.6711, "step": 6160 }, { "epoch": 0.07366180848647162, "grad_norm": 3.16219162940979, "learning_rate": 9.950140247294687e-06, "loss": 0.5696, "step": 6161 }, { "epoch": 0.07367376463133227, "grad_norm": 3.7126598358154297, "learning_rate": 9.950112968667992e-06, "loss": 0.598, "step": 6162 }, { "epoch": 0.07368572077619293, "grad_norm": 4.384833335876465, "learning_rate": 9.950085682618586e-06, "loss": 0.5991, "step": 6163 }, { "epoch": 0.07369767692105357, "grad_norm": 2.9732298851013184, "learning_rate": 9.950058389146509e-06, "loss": 0.6172, "step": 6164 }, { "epoch": 0.07370963306591423, "grad_norm": 4.918410301208496, "learning_rate": 9.950031088251801e-06, "loss": 0.7391, "step": 6165 }, { "epoch": 0.07372158921077487, "grad_norm": 2.040252923965454, "learning_rate": 9.950003779934505e-06, "loss": 0.5994, "step": 6166 }, { "epoch": 0.07373354535563553, "grad_norm": 2.8255295753479004, "learning_rate": 9.949976464194662e-06, "loss": 0.6771, "step": 6167 }, { "epoch": 0.07374550150049618, "grad_norm": 1.7880572080612183, "learning_rate": 9.949949141032309e-06, "loss": 0.5563, "step": 6168 }, { "epoch": 0.07375745764535684, "grad_norm": 3.3803248405456543, "learning_rate": 9.949921810447492e-06, "loss": 0.6457, "step": 6169 }, { "epoch": 0.07376941379021748, "grad_norm": 3.6318202018737793, "learning_rate": 9.949894472440248e-06, "loss": 0.6288, "step": 6170 }, { "epoch": 0.07378136993507814, "grad_norm": 4.313760280609131, "learning_rate": 9.94986712701062e-06, "loss": 0.5622, "step": 6171 }, { "epoch": 0.07379332607993878, "grad_norm": 1.8814976215362549, "learning_rate": 9.949839774158648e-06, "loss": 0.6907, "step": 6172 }, { "epoch": 0.07380528222479943, "grad_norm": 2.699042558670044, "learning_rate": 9.949812413884374e-06, "loss": 0.5413, "step": 6173 }, { "epoch": 0.07381723836966009, "grad_norm": 28.402902603149414, "learning_rate": 9.949785046187839e-06, "loss": 0.6826, "step": 6174 }, { "epoch": 0.07382919451452073, "grad_norm": 3.442166805267334, "learning_rate": 9.949757671069084e-06, "loss": 0.603, "step": 6175 }, { "epoch": 0.07384115065938139, "grad_norm": 2.1138150691986084, "learning_rate": 9.94973028852815e-06, "loss": 0.6164, "step": 6176 }, { "epoch": 0.07385310680424204, "grad_norm": 2.0422637462615967, "learning_rate": 9.949702898565078e-06, "loss": 0.6158, "step": 6177 }, { "epoch": 0.0738650629491027, "grad_norm": 2.0292279720306396, "learning_rate": 9.949675501179907e-06, "loss": 0.6059, "step": 6178 }, { "epoch": 0.07387701909396334, "grad_norm": 3.8759686946868896, "learning_rate": 9.949648096372681e-06, "loss": 0.6979, "step": 6179 }, { "epoch": 0.073888975238824, "grad_norm": 2.4167983531951904, "learning_rate": 9.949620684143441e-06, "loss": 0.6259, "step": 6180 }, { "epoch": 0.07390093138368464, "grad_norm": 2.2275397777557373, "learning_rate": 9.949593264492227e-06, "loss": 0.5792, "step": 6181 }, { "epoch": 0.0739128875285453, "grad_norm": 2.680994749069214, "learning_rate": 9.949565837419079e-06, "loss": 0.5559, "step": 6182 }, { "epoch": 0.07392484367340595, "grad_norm": 2.2827742099761963, "learning_rate": 9.949538402924041e-06, "loss": 0.5957, "step": 6183 }, { "epoch": 0.0739367998182666, "grad_norm": 2.6963438987731934, "learning_rate": 9.949510961007152e-06, "loss": 0.5698, "step": 6184 }, { "epoch": 0.07394875596312725, "grad_norm": 2.9567534923553467, "learning_rate": 9.949483511668455e-06, "loss": 0.6822, "step": 6185 }, { "epoch": 0.0739607121079879, "grad_norm": 2.718195915222168, "learning_rate": 9.949456054907987e-06, "loss": 0.6084, "step": 6186 }, { "epoch": 0.07397266825284855, "grad_norm": 2.9624485969543457, "learning_rate": 9.949428590725793e-06, "loss": 0.4593, "step": 6187 }, { "epoch": 0.0739846243977092, "grad_norm": 3.9979615211486816, "learning_rate": 9.949401119121915e-06, "loss": 0.629, "step": 6188 }, { "epoch": 0.07399658054256986, "grad_norm": 1.7728742361068726, "learning_rate": 9.94937364009639e-06, "loss": 0.7132, "step": 6189 }, { "epoch": 0.0740085366874305, "grad_norm": 5.379919052124023, "learning_rate": 9.949346153649265e-06, "loss": 0.7039, "step": 6190 }, { "epoch": 0.07402049283229116, "grad_norm": 8.246184349060059, "learning_rate": 9.949318659780575e-06, "loss": 0.5943, "step": 6191 }, { "epoch": 0.0740324489771518, "grad_norm": 2.298865795135498, "learning_rate": 9.949291158490364e-06, "loss": 0.6087, "step": 6192 }, { "epoch": 0.07404440512201246, "grad_norm": 3.270174741744995, "learning_rate": 9.949263649778676e-06, "loss": 0.643, "step": 6193 }, { "epoch": 0.07405636126687311, "grad_norm": 2.040339469909668, "learning_rate": 9.949236133645548e-06, "loss": 0.5591, "step": 6194 }, { "epoch": 0.07406831741173377, "grad_norm": 1.8815834522247314, "learning_rate": 9.949208610091021e-06, "loss": 0.7124, "step": 6195 }, { "epoch": 0.07408027355659441, "grad_norm": 2.830552816390991, "learning_rate": 9.94918107911514e-06, "loss": 0.6131, "step": 6196 }, { "epoch": 0.07409222970145506, "grad_norm": 2.942094087600708, "learning_rate": 9.949153540717943e-06, "loss": 0.5949, "step": 6197 }, { "epoch": 0.07410418584631571, "grad_norm": 2.883533000946045, "learning_rate": 9.949125994899475e-06, "loss": 0.5589, "step": 6198 }, { "epoch": 0.07411614199117636, "grad_norm": 4.102441310882568, "learning_rate": 9.949098441659773e-06, "loss": 0.5363, "step": 6199 }, { "epoch": 0.07412809813603702, "grad_norm": 1.7884814739227295, "learning_rate": 9.949070880998881e-06, "loss": 0.5965, "step": 6200 }, { "epoch": 0.07414005428089766, "grad_norm": 1.682249903678894, "learning_rate": 9.949043312916839e-06, "loss": 0.6392, "step": 6201 }, { "epoch": 0.07415201042575832, "grad_norm": 2.4533002376556396, "learning_rate": 9.949015737413689e-06, "loss": 0.6159, "step": 6202 }, { "epoch": 0.07416396657061897, "grad_norm": 2.28291916847229, "learning_rate": 9.948988154489473e-06, "loss": 0.6184, "step": 6203 }, { "epoch": 0.07417592271547963, "grad_norm": 2.1919636726379395, "learning_rate": 9.948960564144232e-06, "loss": 0.5755, "step": 6204 }, { "epoch": 0.07418787886034027, "grad_norm": 2.562765121459961, "learning_rate": 9.948932966378007e-06, "loss": 0.6185, "step": 6205 }, { "epoch": 0.07419983500520093, "grad_norm": 2.406254529953003, "learning_rate": 9.94890536119084e-06, "loss": 0.6303, "step": 6206 }, { "epoch": 0.07421179115006157, "grad_norm": 1.7932875156402588, "learning_rate": 9.94887774858277e-06, "loss": 0.5838, "step": 6207 }, { "epoch": 0.07422374729492223, "grad_norm": 2.016721725463867, "learning_rate": 9.94885012855384e-06, "loss": 0.6266, "step": 6208 }, { "epoch": 0.07423570343978288, "grad_norm": 5.29292631149292, "learning_rate": 9.948822501104094e-06, "loss": 0.5861, "step": 6209 }, { "epoch": 0.07424765958464352, "grad_norm": 2.1857125759124756, "learning_rate": 9.948794866233569e-06, "loss": 0.5967, "step": 6210 }, { "epoch": 0.07425961572950418, "grad_norm": 1.7097722291946411, "learning_rate": 9.94876722394231e-06, "loss": 0.7187, "step": 6211 }, { "epoch": 0.07427157187436483, "grad_norm": 2.0230824947357178, "learning_rate": 9.948739574230356e-06, "loss": 0.6827, "step": 6212 }, { "epoch": 0.07428352801922548, "grad_norm": 1.8083739280700684, "learning_rate": 9.94871191709775e-06, "loss": 0.5745, "step": 6213 }, { "epoch": 0.07429548416408613, "grad_norm": 2.0363900661468506, "learning_rate": 9.948684252544531e-06, "loss": 0.5877, "step": 6214 }, { "epoch": 0.07430744030894679, "grad_norm": 3.659132480621338, "learning_rate": 9.948656580570744e-06, "loss": 0.6143, "step": 6215 }, { "epoch": 0.07431939645380743, "grad_norm": 2.623239040374756, "learning_rate": 9.94862890117643e-06, "loss": 0.6725, "step": 6216 }, { "epoch": 0.07433135259866809, "grad_norm": 6.952420234680176, "learning_rate": 9.948601214361627e-06, "loss": 0.7256, "step": 6217 }, { "epoch": 0.07434330874352874, "grad_norm": 1.6523698568344116, "learning_rate": 9.94857352012638e-06, "loss": 0.599, "step": 6218 }, { "epoch": 0.0743552648883894, "grad_norm": 2.7681076526641846, "learning_rate": 9.94854581847073e-06, "loss": 0.5915, "step": 6219 }, { "epoch": 0.07436722103325004, "grad_norm": 4.048276424407959, "learning_rate": 9.948518109394717e-06, "loss": 0.6829, "step": 6220 }, { "epoch": 0.07437917717811068, "grad_norm": 1.8758544921875, "learning_rate": 9.948490392898385e-06, "loss": 0.6952, "step": 6221 }, { "epoch": 0.07439113332297134, "grad_norm": 1.4123685359954834, "learning_rate": 9.948462668981772e-06, "loss": 0.519, "step": 6222 }, { "epoch": 0.07440308946783199, "grad_norm": 1.9394807815551758, "learning_rate": 9.948434937644921e-06, "loss": 0.6647, "step": 6223 }, { "epoch": 0.07441504561269265, "grad_norm": 5.482438564300537, "learning_rate": 9.948407198887876e-06, "loss": 0.5905, "step": 6224 }, { "epoch": 0.07442700175755329, "grad_norm": 1.3394263982772827, "learning_rate": 9.948379452710677e-06, "loss": 0.6385, "step": 6225 }, { "epoch": 0.07443895790241395, "grad_norm": 2.1614677906036377, "learning_rate": 9.948351699113365e-06, "loss": 0.7228, "step": 6226 }, { "epoch": 0.0744509140472746, "grad_norm": 3.8870465755462646, "learning_rate": 9.948323938095983e-06, "loss": 0.6126, "step": 6227 }, { "epoch": 0.07446287019213525, "grad_norm": 3.0502469539642334, "learning_rate": 9.948296169658572e-06, "loss": 0.6964, "step": 6228 }, { "epoch": 0.0744748263369959, "grad_norm": 1.9753602743148804, "learning_rate": 9.94826839380117e-06, "loss": 0.6451, "step": 6229 }, { "epoch": 0.07448678248185656, "grad_norm": 3.0560386180877686, "learning_rate": 9.948240610523825e-06, "loss": 0.6401, "step": 6230 }, { "epoch": 0.0744987386267172, "grad_norm": 2.04185152053833, "learning_rate": 9.948212819826574e-06, "loss": 0.6325, "step": 6231 }, { "epoch": 0.07451069477157785, "grad_norm": 2.4101243019104004, "learning_rate": 9.948185021709461e-06, "loss": 0.6541, "step": 6232 }, { "epoch": 0.0745226509164385, "grad_norm": 1.6734222173690796, "learning_rate": 9.948157216172527e-06, "loss": 0.6129, "step": 6233 }, { "epoch": 0.07453460706129915, "grad_norm": 1.5157253742218018, "learning_rate": 9.948129403215814e-06, "loss": 0.5586, "step": 6234 }, { "epoch": 0.07454656320615981, "grad_norm": 1.8908169269561768, "learning_rate": 9.948101582839363e-06, "loss": 0.5821, "step": 6235 }, { "epoch": 0.07455851935102045, "grad_norm": 2.2282166481018066, "learning_rate": 9.948073755043214e-06, "loss": 0.6266, "step": 6236 }, { "epoch": 0.07457047549588111, "grad_norm": 2.358556032180786, "learning_rate": 9.948045919827415e-06, "loss": 0.6644, "step": 6237 }, { "epoch": 0.07458243164074176, "grad_norm": 2.371953010559082, "learning_rate": 9.948018077192001e-06, "loss": 0.6985, "step": 6238 }, { "epoch": 0.07459438778560241, "grad_norm": 1.5561341047286987, "learning_rate": 9.947990227137016e-06, "loss": 0.6369, "step": 6239 }, { "epoch": 0.07460634393046306, "grad_norm": 4.187460899353027, "learning_rate": 9.947962369662503e-06, "loss": 0.7312, "step": 6240 }, { "epoch": 0.07461830007532372, "grad_norm": 2.4967257976531982, "learning_rate": 9.947934504768504e-06, "loss": 0.5746, "step": 6241 }, { "epoch": 0.07463025622018436, "grad_norm": 2.824702262878418, "learning_rate": 9.947906632455058e-06, "loss": 0.6555, "step": 6242 }, { "epoch": 0.07464221236504502, "grad_norm": 2.790933847427368, "learning_rate": 9.947878752722208e-06, "loss": 0.6577, "step": 6243 }, { "epoch": 0.07465416850990567, "grad_norm": 1.5729857683181763, "learning_rate": 9.947850865569997e-06, "loss": 0.543, "step": 6244 }, { "epoch": 0.07466612465476631, "grad_norm": 1.6851422786712646, "learning_rate": 9.947822970998465e-06, "loss": 0.6255, "step": 6245 }, { "epoch": 0.07467808079962697, "grad_norm": 2.2003118991851807, "learning_rate": 9.947795069007657e-06, "loss": 0.6159, "step": 6246 }, { "epoch": 0.07469003694448761, "grad_norm": 2.3739664554595947, "learning_rate": 9.94776715959761e-06, "loss": 0.5881, "step": 6247 }, { "epoch": 0.07470199308934827, "grad_norm": 1.897714614868164, "learning_rate": 9.94773924276837e-06, "loss": 0.546, "step": 6248 }, { "epoch": 0.07471394923420892, "grad_norm": 2.1229076385498047, "learning_rate": 9.947711318519979e-06, "loss": 0.6456, "step": 6249 }, { "epoch": 0.07472590537906958, "grad_norm": 1.9893699884414673, "learning_rate": 9.947683386852474e-06, "loss": 0.5249, "step": 6250 }, { "epoch": 0.07473786152393022, "grad_norm": 1.8736813068389893, "learning_rate": 9.947655447765902e-06, "loss": 0.745, "step": 6251 }, { "epoch": 0.07474981766879088, "grad_norm": 1.5470050573349, "learning_rate": 9.947627501260303e-06, "loss": 0.6098, "step": 6252 }, { "epoch": 0.07476177381365152, "grad_norm": 3.3843331336975098, "learning_rate": 9.94759954733572e-06, "loss": 0.6791, "step": 6253 }, { "epoch": 0.07477372995851218, "grad_norm": 4.174108505249023, "learning_rate": 9.947571585992192e-06, "loss": 0.6506, "step": 6254 }, { "epoch": 0.07478568610337283, "grad_norm": 1.5614532232284546, "learning_rate": 9.947543617229765e-06, "loss": 0.6485, "step": 6255 }, { "epoch": 0.07479764224823347, "grad_norm": 1.6826972961425781, "learning_rate": 9.947515641048477e-06, "loss": 0.6167, "step": 6256 }, { "epoch": 0.07480959839309413, "grad_norm": 2.8639163970947266, "learning_rate": 9.947487657448371e-06, "loss": 0.6208, "step": 6257 }, { "epoch": 0.07482155453795478, "grad_norm": 1.5641118288040161, "learning_rate": 9.94745966642949e-06, "loss": 0.661, "step": 6258 }, { "epoch": 0.07483351068281544, "grad_norm": 1.7738007307052612, "learning_rate": 9.947431667991879e-06, "loss": 0.6306, "step": 6259 }, { "epoch": 0.07484546682767608, "grad_norm": 2.029540777206421, "learning_rate": 9.947403662135572e-06, "loss": 0.5792, "step": 6260 }, { "epoch": 0.07485742297253674, "grad_norm": 3.492954730987549, "learning_rate": 9.947375648860618e-06, "loss": 0.6146, "step": 6261 }, { "epoch": 0.07486937911739738, "grad_norm": 3.3605287075042725, "learning_rate": 9.947347628167056e-06, "loss": 0.6914, "step": 6262 }, { "epoch": 0.07488133526225804, "grad_norm": 2.024521827697754, "learning_rate": 9.94731960005493e-06, "loss": 0.6574, "step": 6263 }, { "epoch": 0.07489329140711869, "grad_norm": 1.7261000871658325, "learning_rate": 9.94729156452428e-06, "loss": 0.5635, "step": 6264 }, { "epoch": 0.07490524755197935, "grad_norm": 2.370044469833374, "learning_rate": 9.947263521575148e-06, "loss": 0.6181, "step": 6265 }, { "epoch": 0.07491720369683999, "grad_norm": 3.9549694061279297, "learning_rate": 9.947235471207577e-06, "loss": 0.6556, "step": 6266 }, { "epoch": 0.07492915984170065, "grad_norm": 7.053286552429199, "learning_rate": 9.947207413421609e-06, "loss": 0.6283, "step": 6267 }, { "epoch": 0.0749411159865613, "grad_norm": 1.9387140274047852, "learning_rate": 9.947179348217285e-06, "loss": 0.6812, "step": 6268 }, { "epoch": 0.07495307213142194, "grad_norm": 2.697807788848877, "learning_rate": 9.947151275594649e-06, "loss": 0.6759, "step": 6269 }, { "epoch": 0.0749650282762826, "grad_norm": 3.6236894130706787, "learning_rate": 9.947123195553742e-06, "loss": 0.6158, "step": 6270 }, { "epoch": 0.07497698442114324, "grad_norm": 2.3167967796325684, "learning_rate": 9.947095108094607e-06, "loss": 0.7102, "step": 6271 }, { "epoch": 0.0749889405660039, "grad_norm": 2.121743679046631, "learning_rate": 9.947067013217285e-06, "loss": 0.6482, "step": 6272 }, { "epoch": 0.07500089671086455, "grad_norm": 1.76418137550354, "learning_rate": 9.947038910921817e-06, "loss": 0.5349, "step": 6273 }, { "epoch": 0.0750128528557252, "grad_norm": 2.403402805328369, "learning_rate": 9.947010801208248e-06, "loss": 0.6232, "step": 6274 }, { "epoch": 0.07502480900058585, "grad_norm": 2.3257179260253906, "learning_rate": 9.946982684076618e-06, "loss": 0.6226, "step": 6275 }, { "epoch": 0.07503676514544651, "grad_norm": 6.428072452545166, "learning_rate": 9.946954559526969e-06, "loss": 0.6413, "step": 6276 }, { "epoch": 0.07504872129030715, "grad_norm": 2.0106420516967773, "learning_rate": 9.946926427559344e-06, "loss": 0.5841, "step": 6277 }, { "epoch": 0.07506067743516781, "grad_norm": 7.443051338195801, "learning_rate": 9.946898288173788e-06, "loss": 0.6225, "step": 6278 }, { "epoch": 0.07507263358002846, "grad_norm": 3.373307943344116, "learning_rate": 9.946870141370338e-06, "loss": 0.5649, "step": 6279 }, { "epoch": 0.0750845897248891, "grad_norm": 4.546780109405518, "learning_rate": 9.946841987149039e-06, "loss": 0.6542, "step": 6280 }, { "epoch": 0.07509654586974976, "grad_norm": 2.363285779953003, "learning_rate": 9.946813825509933e-06, "loss": 0.7375, "step": 6281 }, { "epoch": 0.0751085020146104, "grad_norm": 3.234748363494873, "learning_rate": 9.946785656453063e-06, "loss": 0.6887, "step": 6282 }, { "epoch": 0.07512045815947106, "grad_norm": 4.8290228843688965, "learning_rate": 9.94675747997847e-06, "loss": 0.7075, "step": 6283 }, { "epoch": 0.07513241430433171, "grad_norm": 12.679448127746582, "learning_rate": 9.946729296086196e-06, "loss": 0.6826, "step": 6284 }, { "epoch": 0.07514437044919237, "grad_norm": 2.9270856380462646, "learning_rate": 9.946701104776283e-06, "loss": 0.6566, "step": 6285 }, { "epoch": 0.07515632659405301, "grad_norm": 2.0543673038482666, "learning_rate": 9.946672906048776e-06, "loss": 0.6139, "step": 6286 }, { "epoch": 0.07516828273891367, "grad_norm": 1.7320446968078613, "learning_rate": 9.946644699903715e-06, "loss": 0.6653, "step": 6287 }, { "epoch": 0.07518023888377431, "grad_norm": 2.136730909347534, "learning_rate": 9.946616486341142e-06, "loss": 0.6695, "step": 6288 }, { "epoch": 0.07519219502863497, "grad_norm": 4.394355297088623, "learning_rate": 9.9465882653611e-06, "loss": 0.6819, "step": 6289 }, { "epoch": 0.07520415117349562, "grad_norm": 3.798189878463745, "learning_rate": 9.946560036963631e-06, "loss": 0.6876, "step": 6290 }, { "epoch": 0.07521610731835626, "grad_norm": 2.4632534980773926, "learning_rate": 9.946531801148778e-06, "loss": 0.5265, "step": 6291 }, { "epoch": 0.07522806346321692, "grad_norm": 1.9656274318695068, "learning_rate": 9.946503557916584e-06, "loss": 0.6689, "step": 6292 }, { "epoch": 0.07524001960807757, "grad_norm": 2.6880834102630615, "learning_rate": 9.946475307267089e-06, "loss": 0.6104, "step": 6293 }, { "epoch": 0.07525197575293822, "grad_norm": 2.6804118156433105, "learning_rate": 9.946447049200338e-06, "loss": 0.6725, "step": 6294 }, { "epoch": 0.07526393189779887, "grad_norm": 1.9189660549163818, "learning_rate": 9.946418783716372e-06, "loss": 0.5602, "step": 6295 }, { "epoch": 0.07527588804265953, "grad_norm": 2.3863344192504883, "learning_rate": 9.946390510815231e-06, "loss": 0.6372, "step": 6296 }, { "epoch": 0.07528784418752017, "grad_norm": 1.893926739692688, "learning_rate": 9.946362230496962e-06, "loss": 0.5823, "step": 6297 }, { "epoch": 0.07529980033238083, "grad_norm": 3.899704694747925, "learning_rate": 9.946333942761606e-06, "loss": 0.6736, "step": 6298 }, { "epoch": 0.07531175647724148, "grad_norm": 3.3802969455718994, "learning_rate": 9.946305647609203e-06, "loss": 0.596, "step": 6299 }, { "epoch": 0.07532371262210213, "grad_norm": 6.952639579772949, "learning_rate": 9.946277345039796e-06, "loss": 0.6629, "step": 6300 }, { "epoch": 0.07533566876696278, "grad_norm": 10.647343635559082, "learning_rate": 9.94624903505343e-06, "loss": 0.6635, "step": 6301 }, { "epoch": 0.07534762491182344, "grad_norm": 2.2576825618743896, "learning_rate": 9.946220717650148e-06, "loss": 0.6527, "step": 6302 }, { "epoch": 0.07535958105668408, "grad_norm": 2.292731761932373, "learning_rate": 9.946192392829987e-06, "loss": 0.6865, "step": 6303 }, { "epoch": 0.07537153720154473, "grad_norm": 1.798912763595581, "learning_rate": 9.946164060592996e-06, "loss": 0.6267, "step": 6304 }, { "epoch": 0.07538349334640539, "grad_norm": 2.440615653991699, "learning_rate": 9.946135720939212e-06, "loss": 0.6302, "step": 6305 }, { "epoch": 0.07539544949126603, "grad_norm": 2.0950825214385986, "learning_rate": 9.946107373868681e-06, "loss": 0.6214, "step": 6306 }, { "epoch": 0.07540740563612669, "grad_norm": 2.140310287475586, "learning_rate": 9.946079019381444e-06, "loss": 0.6217, "step": 6307 }, { "epoch": 0.07541936178098733, "grad_norm": 1.5596897602081299, "learning_rate": 9.946050657477544e-06, "loss": 0.6339, "step": 6308 }, { "epoch": 0.075431317925848, "grad_norm": 2.3353688716888428, "learning_rate": 9.946022288157025e-06, "loss": 0.6347, "step": 6309 }, { "epoch": 0.07544327407070864, "grad_norm": 15.973440170288086, "learning_rate": 9.945993911419925e-06, "loss": 0.6526, "step": 6310 }, { "epoch": 0.0754552302155693, "grad_norm": 2.0291271209716797, "learning_rate": 9.945965527266292e-06, "loss": 0.7013, "step": 6311 }, { "epoch": 0.07546718636042994, "grad_norm": 2.0260159969329834, "learning_rate": 9.945937135696164e-06, "loss": 0.6656, "step": 6312 }, { "epoch": 0.0754791425052906, "grad_norm": 1.8895306587219238, "learning_rate": 9.945908736709589e-06, "loss": 0.6918, "step": 6313 }, { "epoch": 0.07549109865015124, "grad_norm": 134.8359375, "learning_rate": 9.945880330306603e-06, "loss": 0.7115, "step": 6314 }, { "epoch": 0.07550305479501189, "grad_norm": 1.7145894765853882, "learning_rate": 9.945851916487252e-06, "loss": 0.533, "step": 6315 }, { "epoch": 0.07551501093987255, "grad_norm": 2.5996055603027344, "learning_rate": 9.94582349525158e-06, "loss": 0.7257, "step": 6316 }, { "epoch": 0.0755269670847332, "grad_norm": 4.740416526794434, "learning_rate": 9.945795066599628e-06, "loss": 0.6817, "step": 6317 }, { "epoch": 0.07553892322959385, "grad_norm": 4.248735427856445, "learning_rate": 9.945766630531439e-06, "loss": 0.6455, "step": 6318 }, { "epoch": 0.0755508793744545, "grad_norm": 13.233107566833496, "learning_rate": 9.945738187047054e-06, "loss": 0.5589, "step": 6319 }, { "epoch": 0.07556283551931516, "grad_norm": 2.504511594772339, "learning_rate": 9.945709736146519e-06, "loss": 0.6537, "step": 6320 }, { "epoch": 0.0755747916641758, "grad_norm": 2.087932825088501, "learning_rate": 9.945681277829873e-06, "loss": 0.5877, "step": 6321 }, { "epoch": 0.07558674780903646, "grad_norm": 1.6850199699401855, "learning_rate": 9.94565281209716e-06, "loss": 0.582, "step": 6322 }, { "epoch": 0.0755987039538971, "grad_norm": 1.645393967628479, "learning_rate": 9.945624338948424e-06, "loss": 0.6306, "step": 6323 }, { "epoch": 0.07561066009875776, "grad_norm": 2.876868724822998, "learning_rate": 9.945595858383707e-06, "loss": 0.7088, "step": 6324 }, { "epoch": 0.0756226162436184, "grad_norm": 2.0417535305023193, "learning_rate": 9.94556737040305e-06, "loss": 0.6938, "step": 6325 }, { "epoch": 0.07563457238847907, "grad_norm": 2.2011239528656006, "learning_rate": 9.945538875006499e-06, "loss": 0.6567, "step": 6326 }, { "epoch": 0.07564652853333971, "grad_norm": 2.129897356033325, "learning_rate": 9.945510372194094e-06, "loss": 0.6543, "step": 6327 }, { "epoch": 0.07565848467820036, "grad_norm": 3.7298452854156494, "learning_rate": 9.945481861965879e-06, "loss": 0.6494, "step": 6328 }, { "epoch": 0.07567044082306101, "grad_norm": 4.982676982879639, "learning_rate": 9.945453344321895e-06, "loss": 0.6887, "step": 6329 }, { "epoch": 0.07568239696792166, "grad_norm": 2.1750152111053467, "learning_rate": 9.945424819262186e-06, "loss": 0.6831, "step": 6330 }, { "epoch": 0.07569435311278232, "grad_norm": 13.988712310791016, "learning_rate": 9.945396286786798e-06, "loss": 0.6147, "step": 6331 }, { "epoch": 0.07570630925764296, "grad_norm": 3.969475030899048, "learning_rate": 9.945367746895768e-06, "loss": 0.6775, "step": 6332 }, { "epoch": 0.07571826540250362, "grad_norm": 5.531850337982178, "learning_rate": 9.945339199589141e-06, "loss": 0.5643, "step": 6333 }, { "epoch": 0.07573022154736427, "grad_norm": 2.470705986022949, "learning_rate": 9.945310644866962e-06, "loss": 0.7031, "step": 6334 }, { "epoch": 0.07574217769222492, "grad_norm": 3.2088398933410645, "learning_rate": 9.945282082729271e-06, "loss": 0.647, "step": 6335 }, { "epoch": 0.07575413383708557, "grad_norm": 2.778027296066284, "learning_rate": 9.945253513176113e-06, "loss": 0.54, "step": 6336 }, { "epoch": 0.07576608998194623, "grad_norm": 7.666052341461182, "learning_rate": 9.945224936207529e-06, "loss": 0.6845, "step": 6337 }, { "epoch": 0.07577804612680687, "grad_norm": 2.833054304122925, "learning_rate": 9.945196351823563e-06, "loss": 0.5653, "step": 6338 }, { "epoch": 0.07579000227166752, "grad_norm": 1.8498796224594116, "learning_rate": 9.945167760024258e-06, "loss": 0.6707, "step": 6339 }, { "epoch": 0.07580195841652818, "grad_norm": 2.9582111835479736, "learning_rate": 9.945139160809656e-06, "loss": 0.7149, "step": 6340 }, { "epoch": 0.07581391456138882, "grad_norm": 10.242384910583496, "learning_rate": 9.9451105541798e-06, "loss": 0.5894, "step": 6341 }, { "epoch": 0.07582587070624948, "grad_norm": 2.925657033920288, "learning_rate": 9.945081940134733e-06, "loss": 0.6834, "step": 6342 }, { "epoch": 0.07583782685111012, "grad_norm": 1.6218948364257812, "learning_rate": 9.945053318674498e-06, "loss": 0.6803, "step": 6343 }, { "epoch": 0.07584978299597078, "grad_norm": 1.928827166557312, "learning_rate": 9.945024689799139e-06, "loss": 0.6611, "step": 6344 }, { "epoch": 0.07586173914083143, "grad_norm": 20.741657257080078, "learning_rate": 9.944996053508695e-06, "loss": 0.6083, "step": 6345 }, { "epoch": 0.07587369528569209, "grad_norm": 1.8142426013946533, "learning_rate": 9.944967409803214e-06, "loss": 0.5863, "step": 6346 }, { "epoch": 0.07588565143055273, "grad_norm": 1.8690617084503174, "learning_rate": 9.944938758682738e-06, "loss": 0.6768, "step": 6347 }, { "epoch": 0.07589760757541339, "grad_norm": 2.63417649269104, "learning_rate": 9.944910100147307e-06, "loss": 0.6972, "step": 6348 }, { "epoch": 0.07590956372027403, "grad_norm": 1.8521649837493896, "learning_rate": 9.944881434196966e-06, "loss": 0.6759, "step": 6349 }, { "epoch": 0.07592151986513469, "grad_norm": 2.34051775932312, "learning_rate": 9.944852760831758e-06, "loss": 0.6837, "step": 6350 }, { "epoch": 0.07593347600999534, "grad_norm": 3.0902953147888184, "learning_rate": 9.944824080051726e-06, "loss": 0.6152, "step": 6351 }, { "epoch": 0.07594543215485598, "grad_norm": 1.8264137506484985, "learning_rate": 9.944795391856911e-06, "loss": 0.683, "step": 6352 }, { "epoch": 0.07595738829971664, "grad_norm": 4.656239986419678, "learning_rate": 9.94476669624736e-06, "loss": 0.706, "step": 6353 }, { "epoch": 0.07596934444457729, "grad_norm": 4.134549617767334, "learning_rate": 9.944737993223112e-06, "loss": 0.6393, "step": 6354 }, { "epoch": 0.07598130058943794, "grad_norm": 2.644517421722412, "learning_rate": 9.944709282784212e-06, "loss": 0.6023, "step": 6355 }, { "epoch": 0.07599325673429859, "grad_norm": 5.277651786804199, "learning_rate": 9.944680564930705e-06, "loss": 0.6483, "step": 6356 }, { "epoch": 0.07600521287915925, "grad_norm": 3.9496169090270996, "learning_rate": 9.944651839662631e-06, "loss": 0.7166, "step": 6357 }, { "epoch": 0.07601716902401989, "grad_norm": 5.766467094421387, "learning_rate": 9.944623106980033e-06, "loss": 0.6159, "step": 6358 }, { "epoch": 0.07602912516888055, "grad_norm": 2.2539925575256348, "learning_rate": 9.944594366882956e-06, "loss": 0.5652, "step": 6359 }, { "epoch": 0.0760410813137412, "grad_norm": 2.494377374649048, "learning_rate": 9.94456561937144e-06, "loss": 0.5453, "step": 6360 }, { "epoch": 0.07605303745860185, "grad_norm": 2.87595534324646, "learning_rate": 9.944536864445534e-06, "loss": 0.6264, "step": 6361 }, { "epoch": 0.0760649936034625, "grad_norm": 1.8568754196166992, "learning_rate": 9.944508102105275e-06, "loss": 0.586, "step": 6362 }, { "epoch": 0.07607694974832314, "grad_norm": 6.342837333679199, "learning_rate": 9.944479332350707e-06, "loss": 0.58, "step": 6363 }, { "epoch": 0.0760889058931838, "grad_norm": 14.21863079071045, "learning_rate": 9.944450555181876e-06, "loss": 0.5712, "step": 6364 }, { "epoch": 0.07610086203804445, "grad_norm": 1.518114686012268, "learning_rate": 9.944421770598825e-06, "loss": 0.5327, "step": 6365 }, { "epoch": 0.0761128181829051, "grad_norm": 2.253364324569702, "learning_rate": 9.944392978601596e-06, "loss": 0.6591, "step": 6366 }, { "epoch": 0.07612477432776575, "grad_norm": 22.764169692993164, "learning_rate": 9.94436417919023e-06, "loss": 0.6044, "step": 6367 }, { "epoch": 0.07613673047262641, "grad_norm": 3.988262891769409, "learning_rate": 9.944335372364775e-06, "loss": 0.7094, "step": 6368 }, { "epoch": 0.07614868661748705, "grad_norm": 1.8425217866897583, "learning_rate": 9.944306558125271e-06, "loss": 0.6659, "step": 6369 }, { "epoch": 0.07616064276234771, "grad_norm": 3.30780291557312, "learning_rate": 9.944277736471762e-06, "loss": 0.5946, "step": 6370 }, { "epoch": 0.07617259890720836, "grad_norm": 2.6246886253356934, "learning_rate": 9.944248907404287e-06, "loss": 0.6645, "step": 6371 }, { "epoch": 0.07618455505206902, "grad_norm": 2.085556745529175, "learning_rate": 9.944220070922899e-06, "loss": 0.6524, "step": 6372 }, { "epoch": 0.07619651119692966, "grad_norm": 2.2854061126708984, "learning_rate": 9.944191227027631e-06, "loss": 0.6452, "step": 6373 }, { "epoch": 0.0762084673417903, "grad_norm": 2.5071210861206055, "learning_rate": 9.944162375718533e-06, "loss": 0.6771, "step": 6374 }, { "epoch": 0.07622042348665097, "grad_norm": 2.302858352661133, "learning_rate": 9.944133516995645e-06, "loss": 0.7104, "step": 6375 }, { "epoch": 0.07623237963151161, "grad_norm": 2.5543086528778076, "learning_rate": 9.944104650859012e-06, "loss": 0.6557, "step": 6376 }, { "epoch": 0.07624433577637227, "grad_norm": 3.528449058532715, "learning_rate": 9.944075777308675e-06, "loss": 0.7224, "step": 6377 }, { "epoch": 0.07625629192123291, "grad_norm": 1.9780689477920532, "learning_rate": 9.94404689634468e-06, "loss": 0.6123, "step": 6378 }, { "epoch": 0.07626824806609357, "grad_norm": 2.3811428546905518, "learning_rate": 9.94401800796707e-06, "loss": 0.6032, "step": 6379 }, { "epoch": 0.07628020421095422, "grad_norm": 2.742800712585449, "learning_rate": 9.943989112175885e-06, "loss": 0.5995, "step": 6380 }, { "epoch": 0.07629216035581488, "grad_norm": 2.214703321456909, "learning_rate": 9.943960208971172e-06, "loss": 0.6178, "step": 6381 }, { "epoch": 0.07630411650067552, "grad_norm": 3.0807011127471924, "learning_rate": 9.943931298352973e-06, "loss": 0.5776, "step": 6382 }, { "epoch": 0.07631607264553618, "grad_norm": 2.960099697113037, "learning_rate": 9.943902380321332e-06, "loss": 0.6758, "step": 6383 }, { "epoch": 0.07632802879039682, "grad_norm": 35.256568908691406, "learning_rate": 9.943873454876292e-06, "loss": 0.6619, "step": 6384 }, { "epoch": 0.07633998493525748, "grad_norm": 1.619225263595581, "learning_rate": 9.943844522017896e-06, "loss": 0.678, "step": 6385 }, { "epoch": 0.07635194108011813, "grad_norm": 3.274251937866211, "learning_rate": 9.943815581746186e-06, "loss": 0.6025, "step": 6386 }, { "epoch": 0.07636389722497877, "grad_norm": 3.599123001098633, "learning_rate": 9.943786634061208e-06, "loss": 0.5864, "step": 6387 }, { "epoch": 0.07637585336983943, "grad_norm": 3.448350429534912, "learning_rate": 9.943757678963005e-06, "loss": 0.6406, "step": 6388 }, { "epoch": 0.07638780951470008, "grad_norm": 6.9919233322143555, "learning_rate": 9.94372871645162e-06, "loss": 0.6274, "step": 6389 }, { "epoch": 0.07639976565956073, "grad_norm": 2.3095850944519043, "learning_rate": 9.943699746527094e-06, "loss": 0.6757, "step": 6390 }, { "epoch": 0.07641172180442138, "grad_norm": 3.624072313308716, "learning_rate": 9.943670769189475e-06, "loss": 0.5426, "step": 6391 }, { "epoch": 0.07642367794928204, "grad_norm": 2.7785637378692627, "learning_rate": 9.943641784438803e-06, "loss": 0.5109, "step": 6392 }, { "epoch": 0.07643563409414268, "grad_norm": 2.7238781452178955, "learning_rate": 9.943612792275124e-06, "loss": 0.6763, "step": 6393 }, { "epoch": 0.07644759023900334, "grad_norm": 3.388512372970581, "learning_rate": 9.943583792698478e-06, "loss": 0.6428, "step": 6394 }, { "epoch": 0.07645954638386399, "grad_norm": 1.5664602518081665, "learning_rate": 9.94355478570891e-06, "loss": 0.63, "step": 6395 }, { "epoch": 0.07647150252872464, "grad_norm": 4.619553089141846, "learning_rate": 9.943525771306468e-06, "loss": 0.6846, "step": 6396 }, { "epoch": 0.07648345867358529, "grad_norm": 1.9834444522857666, "learning_rate": 9.943496749491189e-06, "loss": 0.5847, "step": 6397 }, { "epoch": 0.07649541481844593, "grad_norm": 2.910675048828125, "learning_rate": 9.943467720263118e-06, "loss": 0.7449, "step": 6398 }, { "epoch": 0.07650737096330659, "grad_norm": 2.269421100616455, "learning_rate": 9.9434386836223e-06, "loss": 0.5876, "step": 6399 }, { "epoch": 0.07651932710816724, "grad_norm": 5.312558650970459, "learning_rate": 9.94340963956878e-06, "loss": 0.666, "step": 6400 }, { "epoch": 0.0765312832530279, "grad_norm": 2.2637622356414795, "learning_rate": 9.943380588102598e-06, "loss": 0.6444, "step": 6401 }, { "epoch": 0.07654323939788854, "grad_norm": 3.6002326011657715, "learning_rate": 9.9433515292238e-06, "loss": 0.6631, "step": 6402 }, { "epoch": 0.0765551955427492, "grad_norm": 5.153615474700928, "learning_rate": 9.943322462932427e-06, "loss": 0.7664, "step": 6403 }, { "epoch": 0.07656715168760984, "grad_norm": 51.88636779785156, "learning_rate": 9.943293389228527e-06, "loss": 0.6408, "step": 6404 }, { "epoch": 0.0765791078324705, "grad_norm": 1.9736058712005615, "learning_rate": 9.94326430811214e-06, "loss": 0.6579, "step": 6405 }, { "epoch": 0.07659106397733115, "grad_norm": 4.105018615722656, "learning_rate": 9.943235219583309e-06, "loss": 0.6752, "step": 6406 }, { "epoch": 0.0766030201221918, "grad_norm": 3.4086525440216064, "learning_rate": 9.943206123642082e-06, "loss": 0.629, "step": 6407 }, { "epoch": 0.07661497626705245, "grad_norm": 11.610920906066895, "learning_rate": 9.943177020288498e-06, "loss": 0.7019, "step": 6408 }, { "epoch": 0.07662693241191311, "grad_norm": 5.63117790222168, "learning_rate": 9.943147909522603e-06, "loss": 0.5853, "step": 6409 }, { "epoch": 0.07663888855677375, "grad_norm": 7.009944438934326, "learning_rate": 9.94311879134444e-06, "loss": 0.7012, "step": 6410 }, { "epoch": 0.0766508447016344, "grad_norm": 4.410702228546143, "learning_rate": 9.943089665754054e-06, "loss": 0.6691, "step": 6411 }, { "epoch": 0.07666280084649506, "grad_norm": 3.9848616123199463, "learning_rate": 9.943060532751484e-06, "loss": 0.6673, "step": 6412 }, { "epoch": 0.0766747569913557, "grad_norm": 1.966267704963684, "learning_rate": 9.94303139233678e-06, "loss": 0.7145, "step": 6413 }, { "epoch": 0.07668671313621636, "grad_norm": 2.594611167907715, "learning_rate": 9.943002244509982e-06, "loss": 0.6172, "step": 6414 }, { "epoch": 0.076698669281077, "grad_norm": 3.5945212841033936, "learning_rate": 9.942973089271136e-06, "loss": 0.5411, "step": 6415 }, { "epoch": 0.07671062542593766, "grad_norm": 2.3720576763153076, "learning_rate": 9.942943926620284e-06, "loss": 0.6919, "step": 6416 }, { "epoch": 0.07672258157079831, "grad_norm": 2.559091091156006, "learning_rate": 9.94291475655747e-06, "loss": 0.6657, "step": 6417 }, { "epoch": 0.07673453771565897, "grad_norm": 3.099548101425171, "learning_rate": 9.942885579082734e-06, "loss": 0.604, "step": 6418 }, { "epoch": 0.07674649386051961, "grad_norm": 6.076061725616455, "learning_rate": 9.942856394196127e-06, "loss": 0.6938, "step": 6419 }, { "epoch": 0.07675845000538027, "grad_norm": 3.206953287124634, "learning_rate": 9.94282720189769e-06, "loss": 0.6554, "step": 6420 }, { "epoch": 0.07677040615024092, "grad_norm": 4.3243408203125, "learning_rate": 9.942798002187465e-06, "loss": 0.6101, "step": 6421 }, { "epoch": 0.07678236229510156, "grad_norm": 1.6490445137023926, "learning_rate": 9.942768795065495e-06, "loss": 0.6478, "step": 6422 }, { "epoch": 0.07679431843996222, "grad_norm": 2.7432079315185547, "learning_rate": 9.942739580531829e-06, "loss": 0.5511, "step": 6423 }, { "epoch": 0.07680627458482286, "grad_norm": 2.4520843029022217, "learning_rate": 9.942710358586505e-06, "loss": 0.5795, "step": 6424 }, { "epoch": 0.07681823072968352, "grad_norm": 3.3074142932891846, "learning_rate": 9.94268112922957e-06, "loss": 0.6398, "step": 6425 }, { "epoch": 0.07683018687454417, "grad_norm": 2.328995943069458, "learning_rate": 9.942651892461069e-06, "loss": 0.6275, "step": 6426 }, { "epoch": 0.07684214301940483, "grad_norm": 4.130566596984863, "learning_rate": 9.94262264828104e-06, "loss": 0.629, "step": 6427 }, { "epoch": 0.07685409916426547, "grad_norm": 2.0767557621002197, "learning_rate": 9.942593396689532e-06, "loss": 0.6775, "step": 6428 }, { "epoch": 0.07686605530912613, "grad_norm": 3.4429872035980225, "learning_rate": 9.94256413768659e-06, "loss": 0.68, "step": 6429 }, { "epoch": 0.07687801145398677, "grad_norm": 2.4316153526306152, "learning_rate": 9.942534871272253e-06, "loss": 0.5802, "step": 6430 }, { "epoch": 0.07688996759884743, "grad_norm": 11.540600776672363, "learning_rate": 9.942505597446568e-06, "loss": 0.6473, "step": 6431 }, { "epoch": 0.07690192374370808, "grad_norm": 2.5819883346557617, "learning_rate": 9.942476316209579e-06, "loss": 0.6633, "step": 6432 }, { "epoch": 0.07691387988856872, "grad_norm": 3.2803711891174316, "learning_rate": 9.942447027561329e-06, "loss": 0.5479, "step": 6433 }, { "epoch": 0.07692583603342938, "grad_norm": 2.8651416301727295, "learning_rate": 9.942417731501861e-06, "loss": 0.6298, "step": 6434 }, { "epoch": 0.07693779217829003, "grad_norm": 3.253209114074707, "learning_rate": 9.942388428031222e-06, "loss": 0.7306, "step": 6435 }, { "epoch": 0.07694974832315069, "grad_norm": 2.7283823490142822, "learning_rate": 9.942359117149452e-06, "loss": 0.6971, "step": 6436 }, { "epoch": 0.07696170446801133, "grad_norm": 2.9717824459075928, "learning_rate": 9.942329798856599e-06, "loss": 0.6733, "step": 6437 }, { "epoch": 0.07697366061287199, "grad_norm": 7.690432071685791, "learning_rate": 9.942300473152702e-06, "loss": 0.6298, "step": 6438 }, { "epoch": 0.07698561675773263, "grad_norm": 2.35772442817688, "learning_rate": 9.942271140037811e-06, "loss": 0.6796, "step": 6439 }, { "epoch": 0.07699757290259329, "grad_norm": 4.455057621002197, "learning_rate": 9.942241799511964e-06, "loss": 0.6155, "step": 6440 }, { "epoch": 0.07700952904745394, "grad_norm": 2.0141184329986572, "learning_rate": 9.94221245157521e-06, "loss": 0.544, "step": 6441 }, { "epoch": 0.0770214851923146, "grad_norm": 18.50222396850586, "learning_rate": 9.942183096227591e-06, "loss": 0.6455, "step": 6442 }, { "epoch": 0.07703344133717524, "grad_norm": 5.9228410720825195, "learning_rate": 9.94215373346915e-06, "loss": 0.7075, "step": 6443 }, { "epoch": 0.0770453974820359, "grad_norm": 5.429414749145508, "learning_rate": 9.942124363299932e-06, "loss": 0.5513, "step": 6444 }, { "epoch": 0.07705735362689654, "grad_norm": 3.1335673332214355, "learning_rate": 9.94209498571998e-06, "loss": 0.6604, "step": 6445 }, { "epoch": 0.07706930977175719, "grad_norm": 7.891449928283691, "learning_rate": 9.942065600729341e-06, "loss": 0.5393, "step": 6446 }, { "epoch": 0.07708126591661785, "grad_norm": 3.5963521003723145, "learning_rate": 9.942036208328056e-06, "loss": 0.5587, "step": 6447 }, { "epoch": 0.07709322206147849, "grad_norm": 4.0077009201049805, "learning_rate": 9.94200680851617e-06, "loss": 0.6298, "step": 6448 }, { "epoch": 0.07710517820633915, "grad_norm": 3.1351304054260254, "learning_rate": 9.941977401293727e-06, "loss": 0.6763, "step": 6449 }, { "epoch": 0.0771171343511998, "grad_norm": 2.5864877700805664, "learning_rate": 9.941947986660771e-06, "loss": 0.6199, "step": 6450 }, { "epoch": 0.07712909049606045, "grad_norm": 3.6932766437530518, "learning_rate": 9.941918564617348e-06, "loss": 0.6058, "step": 6451 }, { "epoch": 0.0771410466409211, "grad_norm": 19.875261306762695, "learning_rate": 9.941889135163499e-06, "loss": 0.7246, "step": 6452 }, { "epoch": 0.07715300278578176, "grad_norm": 2.439713478088379, "learning_rate": 9.94185969829927e-06, "loss": 0.6761, "step": 6453 }, { "epoch": 0.0771649589306424, "grad_norm": 2.8751795291900635, "learning_rate": 9.941830254024706e-06, "loss": 0.6334, "step": 6454 }, { "epoch": 0.07717691507550306, "grad_norm": 2.98801326751709, "learning_rate": 9.941800802339849e-06, "loss": 0.5792, "step": 6455 }, { "epoch": 0.0771888712203637, "grad_norm": 1.7657020092010498, "learning_rate": 9.941771343244744e-06, "loss": 0.5358, "step": 6456 }, { "epoch": 0.07720082736522435, "grad_norm": 4.388984680175781, "learning_rate": 9.941741876739435e-06, "loss": 0.5896, "step": 6457 }, { "epoch": 0.07721278351008501, "grad_norm": 3.071460247039795, "learning_rate": 9.941712402823968e-06, "loss": 0.5735, "step": 6458 }, { "epoch": 0.07722473965494565, "grad_norm": 2.682860851287842, "learning_rate": 9.941682921498384e-06, "loss": 0.6568, "step": 6459 }, { "epoch": 0.07723669579980631, "grad_norm": 2.9951751232147217, "learning_rate": 9.94165343276273e-06, "loss": 0.6375, "step": 6460 }, { "epoch": 0.07724865194466696, "grad_norm": 3.501018762588501, "learning_rate": 9.941623936617049e-06, "loss": 0.6391, "step": 6461 }, { "epoch": 0.07726060808952762, "grad_norm": 2.507265329360962, "learning_rate": 9.941594433061385e-06, "loss": 0.6801, "step": 6462 }, { "epoch": 0.07727256423438826, "grad_norm": 3.585850715637207, "learning_rate": 9.941564922095782e-06, "loss": 0.6427, "step": 6463 }, { "epoch": 0.07728452037924892, "grad_norm": 2.3786096572875977, "learning_rate": 9.941535403720286e-06, "loss": 0.7001, "step": 6464 }, { "epoch": 0.07729647652410956, "grad_norm": 2.400672674179077, "learning_rate": 9.94150587793494e-06, "loss": 0.5908, "step": 6465 }, { "epoch": 0.07730843266897022, "grad_norm": 2.7530906200408936, "learning_rate": 9.941476344739786e-06, "loss": 0.6252, "step": 6466 }, { "epoch": 0.07732038881383087, "grad_norm": 3.26161527633667, "learning_rate": 9.941446804134874e-06, "loss": 0.6845, "step": 6467 }, { "epoch": 0.07733234495869153, "grad_norm": 4.783819675445557, "learning_rate": 9.941417256120243e-06, "loss": 0.6659, "step": 6468 }, { "epoch": 0.07734430110355217, "grad_norm": 2.241983652114868, "learning_rate": 9.941387700695941e-06, "loss": 0.6621, "step": 6469 }, { "epoch": 0.07735625724841282, "grad_norm": 2.242485523223877, "learning_rate": 9.94135813786201e-06, "loss": 0.6207, "step": 6470 }, { "epoch": 0.07736821339327347, "grad_norm": 2.9814932346343994, "learning_rate": 9.941328567618493e-06, "loss": 0.6566, "step": 6471 }, { "epoch": 0.07738016953813412, "grad_norm": 2.2010936737060547, "learning_rate": 9.941298989965438e-06, "loss": 0.6576, "step": 6472 }, { "epoch": 0.07739212568299478, "grad_norm": 3.1786954402923584, "learning_rate": 9.941269404902887e-06, "loss": 0.5629, "step": 6473 }, { "epoch": 0.07740408182785542, "grad_norm": 4.556948184967041, "learning_rate": 9.941239812430885e-06, "loss": 0.6626, "step": 6474 }, { "epoch": 0.07741603797271608, "grad_norm": 2.6319591999053955, "learning_rate": 9.941210212549477e-06, "loss": 0.6979, "step": 6475 }, { "epoch": 0.07742799411757673, "grad_norm": 131.9123077392578, "learning_rate": 9.941180605258705e-06, "loss": 0.5917, "step": 6476 }, { "epoch": 0.07743995026243738, "grad_norm": 2.1376760005950928, "learning_rate": 9.941150990558617e-06, "loss": 0.6319, "step": 6477 }, { "epoch": 0.07745190640729803, "grad_norm": 3.413839101791382, "learning_rate": 9.941121368449255e-06, "loss": 0.6739, "step": 6478 }, { "epoch": 0.07746386255215869, "grad_norm": 2.0550785064697266, "learning_rate": 9.941091738930665e-06, "loss": 0.582, "step": 6479 }, { "epoch": 0.07747581869701933, "grad_norm": 3.8470656871795654, "learning_rate": 9.941062102002887e-06, "loss": 0.6283, "step": 6480 }, { "epoch": 0.07748777484187998, "grad_norm": 2.0571138858795166, "learning_rate": 9.941032457665972e-06, "loss": 0.6631, "step": 6481 }, { "epoch": 0.07749973098674064, "grad_norm": 4.261889457702637, "learning_rate": 9.94100280591996e-06, "loss": 0.6502, "step": 6482 }, { "epoch": 0.07751168713160128, "grad_norm": 5.436319828033447, "learning_rate": 9.940973146764897e-06, "loss": 0.5947, "step": 6483 }, { "epoch": 0.07752364327646194, "grad_norm": 33.392704010009766, "learning_rate": 9.940943480200826e-06, "loss": 0.6656, "step": 6484 }, { "epoch": 0.07753559942132258, "grad_norm": 3.6640238761901855, "learning_rate": 9.940913806227795e-06, "loss": 0.6129, "step": 6485 }, { "epoch": 0.07754755556618324, "grad_norm": 5.809096336364746, "learning_rate": 9.940884124845844e-06, "loss": 0.536, "step": 6486 }, { "epoch": 0.07755951171104389, "grad_norm": 4.758578300476074, "learning_rate": 9.94085443605502e-06, "loss": 0.5668, "step": 6487 }, { "epoch": 0.07757146785590455, "grad_norm": 4.190311908721924, "learning_rate": 9.940824739855368e-06, "loss": 0.67, "step": 6488 }, { "epoch": 0.07758342400076519, "grad_norm": 3.5447194576263428, "learning_rate": 9.940795036246933e-06, "loss": 0.6453, "step": 6489 }, { "epoch": 0.07759538014562585, "grad_norm": 6.920644760131836, "learning_rate": 9.940765325229756e-06, "loss": 0.6694, "step": 6490 }, { "epoch": 0.0776073362904865, "grad_norm": 2.0940351486206055, "learning_rate": 9.940735606803885e-06, "loss": 0.5634, "step": 6491 }, { "epoch": 0.07761929243534714, "grad_norm": 7.8874969482421875, "learning_rate": 9.940705880969363e-06, "loss": 0.6805, "step": 6492 }, { "epoch": 0.0776312485802078, "grad_norm": 6.246784210205078, "learning_rate": 9.940676147726236e-06, "loss": 0.6552, "step": 6493 }, { "epoch": 0.07764320472506844, "grad_norm": 4.538883686065674, "learning_rate": 9.940646407074546e-06, "loss": 0.7188, "step": 6494 }, { "epoch": 0.0776551608699291, "grad_norm": 3.632016658782959, "learning_rate": 9.94061665901434e-06, "loss": 0.6729, "step": 6495 }, { "epoch": 0.07766711701478975, "grad_norm": 5.46764612197876, "learning_rate": 9.940586903545662e-06, "loss": 0.5299, "step": 6496 }, { "epoch": 0.0776790731596504, "grad_norm": 3.678119659423828, "learning_rate": 9.940557140668556e-06, "loss": 0.678, "step": 6497 }, { "epoch": 0.07769102930451105, "grad_norm": 3.917855739593506, "learning_rate": 9.940527370383065e-06, "loss": 0.6782, "step": 6498 }, { "epoch": 0.07770298544937171, "grad_norm": 6.608554840087891, "learning_rate": 9.94049759268924e-06, "loss": 0.6583, "step": 6499 }, { "epoch": 0.07771494159423235, "grad_norm": 2.1129634380340576, "learning_rate": 9.940467807587118e-06, "loss": 0.6255, "step": 6500 }, { "epoch": 0.07772689773909301, "grad_norm": 2.064195156097412, "learning_rate": 9.940438015076747e-06, "loss": 0.5007, "step": 6501 }, { "epoch": 0.07773885388395366, "grad_norm": 4.558048248291016, "learning_rate": 9.940408215158174e-06, "loss": 0.6496, "step": 6502 }, { "epoch": 0.07775081002881432, "grad_norm": 3.432546615600586, "learning_rate": 9.94037840783144e-06, "loss": 0.6589, "step": 6503 }, { "epoch": 0.07776276617367496, "grad_norm": 2.8581626415252686, "learning_rate": 9.94034859309659e-06, "loss": 0.6739, "step": 6504 }, { "epoch": 0.0777747223185356, "grad_norm": 2.5058538913726807, "learning_rate": 9.940318770953672e-06, "loss": 0.5569, "step": 6505 }, { "epoch": 0.07778667846339626, "grad_norm": 2.7992236614227295, "learning_rate": 9.940288941402725e-06, "loss": 0.6726, "step": 6506 }, { "epoch": 0.07779863460825691, "grad_norm": 3.058236598968506, "learning_rate": 9.9402591044438e-06, "loss": 0.6482, "step": 6507 }, { "epoch": 0.07781059075311757, "grad_norm": 2.4697370529174805, "learning_rate": 9.94022926007694e-06, "loss": 0.5797, "step": 6508 }, { "epoch": 0.07782254689797821, "grad_norm": 3.3467719554901123, "learning_rate": 9.940199408302185e-06, "loss": 0.5241, "step": 6509 }, { "epoch": 0.07783450304283887, "grad_norm": 1.79860258102417, "learning_rate": 9.940169549119585e-06, "loss": 0.6463, "step": 6510 }, { "epoch": 0.07784645918769952, "grad_norm": 1.7525888681411743, "learning_rate": 9.940139682529184e-06, "loss": 0.6689, "step": 6511 }, { "epoch": 0.07785841533256017, "grad_norm": 2.881824016571045, "learning_rate": 9.940109808531025e-06, "loss": 0.6737, "step": 6512 }, { "epoch": 0.07787037147742082, "grad_norm": 2.103806734085083, "learning_rate": 9.940079927125155e-06, "loss": 0.5795, "step": 6513 }, { "epoch": 0.07788232762228148, "grad_norm": 3.2914998531341553, "learning_rate": 9.940050038311617e-06, "loss": 0.6706, "step": 6514 }, { "epoch": 0.07789428376714212, "grad_norm": 2.389021635055542, "learning_rate": 9.940020142090456e-06, "loss": 0.6276, "step": 6515 }, { "epoch": 0.07790623991200277, "grad_norm": 12.676126480102539, "learning_rate": 9.939990238461717e-06, "loss": 0.7263, "step": 6516 }, { "epoch": 0.07791819605686343, "grad_norm": 2.1890878677368164, "learning_rate": 9.939960327425445e-06, "loss": 0.5499, "step": 6517 }, { "epoch": 0.07793015220172407, "grad_norm": 3.9188382625579834, "learning_rate": 9.939930408981686e-06, "loss": 0.7699, "step": 6518 }, { "epoch": 0.07794210834658473, "grad_norm": 3.001495361328125, "learning_rate": 9.939900483130483e-06, "loss": 0.6376, "step": 6519 }, { "epoch": 0.07795406449144537, "grad_norm": 6.356512069702148, "learning_rate": 9.939870549871882e-06, "loss": 0.6034, "step": 6520 }, { "epoch": 0.07796602063630603, "grad_norm": 3.307858467102051, "learning_rate": 9.939840609205927e-06, "loss": 0.6139, "step": 6521 }, { "epoch": 0.07797797678116668, "grad_norm": 2.510439157485962, "learning_rate": 9.939810661132665e-06, "loss": 0.6384, "step": 6522 }, { "epoch": 0.07798993292602734, "grad_norm": 3.009230852127075, "learning_rate": 9.939780705652138e-06, "loss": 0.6055, "step": 6523 }, { "epoch": 0.07800188907088798, "grad_norm": 2.699291706085205, "learning_rate": 9.939750742764392e-06, "loss": 0.6129, "step": 6524 }, { "epoch": 0.07801384521574864, "grad_norm": 3.102123975753784, "learning_rate": 9.939720772469473e-06, "loss": 0.8236, "step": 6525 }, { "epoch": 0.07802580136060928, "grad_norm": 2.1102757453918457, "learning_rate": 9.939690794767425e-06, "loss": 0.5495, "step": 6526 }, { "epoch": 0.07803775750546994, "grad_norm": 2.094028949737549, "learning_rate": 9.939660809658292e-06, "loss": 0.6137, "step": 6527 }, { "epoch": 0.07804971365033059, "grad_norm": 5.311180114746094, "learning_rate": 9.939630817142121e-06, "loss": 0.6416, "step": 6528 }, { "epoch": 0.07806166979519123, "grad_norm": 3.194873571395874, "learning_rate": 9.939600817218957e-06, "loss": 0.6564, "step": 6529 }, { "epoch": 0.07807362594005189, "grad_norm": 2.407290458679199, "learning_rate": 9.939570809888843e-06, "loss": 0.5429, "step": 6530 }, { "epoch": 0.07808558208491254, "grad_norm": 2.8722190856933594, "learning_rate": 9.939540795151825e-06, "loss": 0.5793, "step": 6531 }, { "epoch": 0.0780975382297732, "grad_norm": 4.480419158935547, "learning_rate": 9.939510773007947e-06, "loss": 0.5259, "step": 6532 }, { "epoch": 0.07810949437463384, "grad_norm": 1.9717106819152832, "learning_rate": 9.939480743457258e-06, "loss": 0.6829, "step": 6533 }, { "epoch": 0.0781214505194945, "grad_norm": 6.677964210510254, "learning_rate": 9.939450706499797e-06, "loss": 0.6739, "step": 6534 }, { "epoch": 0.07813340666435514, "grad_norm": 2.8501477241516113, "learning_rate": 9.939420662135612e-06, "loss": 0.6711, "step": 6535 }, { "epoch": 0.0781453628092158, "grad_norm": 1.903640866279602, "learning_rate": 9.939390610364752e-06, "loss": 0.5854, "step": 6536 }, { "epoch": 0.07815731895407645, "grad_norm": 2.37125563621521, "learning_rate": 9.939360551187253e-06, "loss": 0.6094, "step": 6537 }, { "epoch": 0.0781692750989371, "grad_norm": 2.069801092147827, "learning_rate": 9.939330484603169e-06, "loss": 0.619, "step": 6538 }, { "epoch": 0.07818123124379775, "grad_norm": 2.7038705348968506, "learning_rate": 9.93930041061254e-06, "loss": 0.5892, "step": 6539 }, { "epoch": 0.0781931873886584, "grad_norm": 2.137192487716675, "learning_rate": 9.939270329215414e-06, "loss": 0.5778, "step": 6540 }, { "epoch": 0.07820514353351905, "grad_norm": 5.592099666595459, "learning_rate": 9.939240240411834e-06, "loss": 0.5992, "step": 6541 }, { "epoch": 0.0782170996783797, "grad_norm": 4.332507133483887, "learning_rate": 9.939210144201844e-06, "loss": 0.6663, "step": 6542 }, { "epoch": 0.07822905582324036, "grad_norm": 3.3769185543060303, "learning_rate": 9.939180040585492e-06, "loss": 0.5572, "step": 6543 }, { "epoch": 0.078241011968101, "grad_norm": 3.293524980545044, "learning_rate": 9.939149929562822e-06, "loss": 0.5648, "step": 6544 }, { "epoch": 0.07825296811296166, "grad_norm": 2.8560054302215576, "learning_rate": 9.93911981113388e-06, "loss": 0.6318, "step": 6545 }, { "epoch": 0.0782649242578223, "grad_norm": 3.7667431831359863, "learning_rate": 9.939089685298709e-06, "loss": 0.5344, "step": 6546 }, { "epoch": 0.07827688040268296, "grad_norm": 3.275716781616211, "learning_rate": 9.939059552057354e-06, "loss": 0.7042, "step": 6547 }, { "epoch": 0.07828883654754361, "grad_norm": 3.1922338008880615, "learning_rate": 9.939029411409864e-06, "loss": 0.675, "step": 6548 }, { "epoch": 0.07830079269240427, "grad_norm": 4.158949375152588, "learning_rate": 9.938999263356282e-06, "loss": 0.7308, "step": 6549 }, { "epoch": 0.07831274883726491, "grad_norm": 2.4905388355255127, "learning_rate": 9.938969107896652e-06, "loss": 0.612, "step": 6550 }, { "epoch": 0.07832470498212557, "grad_norm": 1.9799256324768066, "learning_rate": 9.93893894503102e-06, "loss": 0.5927, "step": 6551 }, { "epoch": 0.07833666112698622, "grad_norm": 2.81760573387146, "learning_rate": 9.938908774759433e-06, "loss": 0.64, "step": 6552 }, { "epoch": 0.07834861727184686, "grad_norm": 10.02881145477295, "learning_rate": 9.938878597081933e-06, "loss": 0.5974, "step": 6553 }, { "epoch": 0.07836057341670752, "grad_norm": 3.6921279430389404, "learning_rate": 9.938848411998569e-06, "loss": 0.6378, "step": 6554 }, { "epoch": 0.07837252956156816, "grad_norm": 2.6037421226501465, "learning_rate": 9.938818219509382e-06, "loss": 0.5986, "step": 6555 }, { "epoch": 0.07838448570642882, "grad_norm": 2.6547725200653076, "learning_rate": 9.938788019614421e-06, "loss": 0.6546, "step": 6556 }, { "epoch": 0.07839644185128947, "grad_norm": 5.055820465087891, "learning_rate": 9.93875781231373e-06, "loss": 0.5774, "step": 6557 }, { "epoch": 0.07840839799615013, "grad_norm": 3.8027870655059814, "learning_rate": 9.938727597607354e-06, "loss": 0.6629, "step": 6558 }, { "epoch": 0.07842035414101077, "grad_norm": 3.4505362510681152, "learning_rate": 9.938697375495337e-06, "loss": 0.6054, "step": 6559 }, { "epoch": 0.07843231028587143, "grad_norm": 3.7927117347717285, "learning_rate": 9.938667145977728e-06, "loss": 0.696, "step": 6560 }, { "epoch": 0.07844426643073207, "grad_norm": 3.5218963623046875, "learning_rate": 9.938636909054568e-06, "loss": 0.5893, "step": 6561 }, { "epoch": 0.07845622257559273, "grad_norm": 2.7053656578063965, "learning_rate": 9.938606664725905e-06, "loss": 0.6311, "step": 6562 }, { "epoch": 0.07846817872045338, "grad_norm": 2.481308698654175, "learning_rate": 9.938576412991784e-06, "loss": 0.6367, "step": 6563 }, { "epoch": 0.07848013486531402, "grad_norm": 2.4102325439453125, "learning_rate": 9.93854615385225e-06, "loss": 0.6209, "step": 6564 }, { "epoch": 0.07849209101017468, "grad_norm": 4.183379173278809, "learning_rate": 9.938515887307347e-06, "loss": 0.5489, "step": 6565 }, { "epoch": 0.07850404715503533, "grad_norm": 3.7947945594787598, "learning_rate": 9.938485613357125e-06, "loss": 0.6711, "step": 6566 }, { "epoch": 0.07851600329989598, "grad_norm": 2.7077577114105225, "learning_rate": 9.938455332001625e-06, "loss": 0.6802, "step": 6567 }, { "epoch": 0.07852795944475663, "grad_norm": 5.270781517028809, "learning_rate": 9.938425043240893e-06, "loss": 0.6391, "step": 6568 }, { "epoch": 0.07853991558961729, "grad_norm": 5.137558460235596, "learning_rate": 9.938394747074975e-06, "loss": 0.6434, "step": 6569 }, { "epoch": 0.07855187173447793, "grad_norm": 2.995405435562134, "learning_rate": 9.938364443503916e-06, "loss": 0.6614, "step": 6570 }, { "epoch": 0.07856382787933859, "grad_norm": 26.503665924072266, "learning_rate": 9.938334132527763e-06, "loss": 0.6025, "step": 6571 }, { "epoch": 0.07857578402419924, "grad_norm": 3.2392966747283936, "learning_rate": 9.938303814146562e-06, "loss": 0.555, "step": 6572 }, { "epoch": 0.0785877401690599, "grad_norm": 8.320700645446777, "learning_rate": 9.938273488360353e-06, "loss": 0.6381, "step": 6573 }, { "epoch": 0.07859969631392054, "grad_norm": 4.080724239349365, "learning_rate": 9.938243155169189e-06, "loss": 0.6066, "step": 6574 }, { "epoch": 0.07861165245878118, "grad_norm": 15.4666166305542, "learning_rate": 9.93821281457311e-06, "loss": 0.6981, "step": 6575 }, { "epoch": 0.07862360860364184, "grad_norm": 2.7696690559387207, "learning_rate": 9.938182466572165e-06, "loss": 0.6316, "step": 6576 }, { "epoch": 0.07863556474850249, "grad_norm": 5.19983434677124, "learning_rate": 9.938152111166394e-06, "loss": 0.6707, "step": 6577 }, { "epoch": 0.07864752089336315, "grad_norm": 3.2047934532165527, "learning_rate": 9.93812174835585e-06, "loss": 0.6637, "step": 6578 }, { "epoch": 0.07865947703822379, "grad_norm": 4.515292644500732, "learning_rate": 9.938091378140572e-06, "loss": 0.7328, "step": 6579 }, { "epoch": 0.07867143318308445, "grad_norm": 6.404518127441406, "learning_rate": 9.93806100052061e-06, "loss": 0.6155, "step": 6580 }, { "epoch": 0.0786833893279451, "grad_norm": 5.297389030456543, "learning_rate": 9.93803061549601e-06, "loss": 0.6805, "step": 6581 }, { "epoch": 0.07869534547280575, "grad_norm": 4.072995662689209, "learning_rate": 9.938000223066813e-06, "loss": 0.5439, "step": 6582 }, { "epoch": 0.0787073016176664, "grad_norm": 3.744924783706665, "learning_rate": 9.937969823233069e-06, "loss": 0.6634, "step": 6583 }, { "epoch": 0.07871925776252706, "grad_norm": 6.726517200469971, "learning_rate": 9.93793941599482e-06, "loss": 0.644, "step": 6584 }, { "epoch": 0.0787312139073877, "grad_norm": 4.647032737731934, "learning_rate": 9.937909001352113e-06, "loss": 0.5667, "step": 6585 }, { "epoch": 0.07874317005224836, "grad_norm": 5.244523048400879, "learning_rate": 9.937878579304996e-06, "loss": 0.6345, "step": 6586 }, { "epoch": 0.078755126197109, "grad_norm": 2.966661214828491, "learning_rate": 9.937848149853511e-06, "loss": 0.6324, "step": 6587 }, { "epoch": 0.07876708234196965, "grad_norm": 2.1716840267181396, "learning_rate": 9.937817712997705e-06, "loss": 0.6926, "step": 6588 }, { "epoch": 0.07877903848683031, "grad_norm": 2.344963550567627, "learning_rate": 9.937787268737624e-06, "loss": 0.5942, "step": 6589 }, { "epoch": 0.07879099463169095, "grad_norm": 2.6910929679870605, "learning_rate": 9.937756817073314e-06, "loss": 0.7206, "step": 6590 }, { "epoch": 0.07880295077655161, "grad_norm": 4.207815647125244, "learning_rate": 9.93772635800482e-06, "loss": 0.6309, "step": 6591 }, { "epoch": 0.07881490692141226, "grad_norm": 2.0687382221221924, "learning_rate": 9.937695891532187e-06, "loss": 0.5519, "step": 6592 }, { "epoch": 0.07882686306627291, "grad_norm": 3.407088041305542, "learning_rate": 9.937665417655462e-06, "loss": 0.6649, "step": 6593 }, { "epoch": 0.07883881921113356, "grad_norm": 4.39151668548584, "learning_rate": 9.93763493637469e-06, "loss": 0.6998, "step": 6594 }, { "epoch": 0.07885077535599422, "grad_norm": 4.182395935058594, "learning_rate": 9.937604447689917e-06, "loss": 0.6417, "step": 6595 }, { "epoch": 0.07886273150085486, "grad_norm": 3.5960440635681152, "learning_rate": 9.93757395160119e-06, "loss": 0.6424, "step": 6596 }, { "epoch": 0.07887468764571552, "grad_norm": 6.520715236663818, "learning_rate": 9.937543448108551e-06, "loss": 0.6287, "step": 6597 }, { "epoch": 0.07888664379057617, "grad_norm": 2.4548470973968506, "learning_rate": 9.937512937212048e-06, "loss": 0.7338, "step": 6598 }, { "epoch": 0.07889859993543681, "grad_norm": 4.344684600830078, "learning_rate": 9.937482418911728e-06, "loss": 0.5828, "step": 6599 }, { "epoch": 0.07891055608029747, "grad_norm": 4.255831241607666, "learning_rate": 9.937451893207634e-06, "loss": 0.6853, "step": 6600 }, { "epoch": 0.07892251222515811, "grad_norm": 3.3338875770568848, "learning_rate": 9.937421360099816e-06, "loss": 0.5639, "step": 6601 }, { "epoch": 0.07893446837001877, "grad_norm": 3.831292152404785, "learning_rate": 9.937390819588316e-06, "loss": 0.5889, "step": 6602 }, { "epoch": 0.07894642451487942, "grad_norm": 5.613239765167236, "learning_rate": 9.93736027167318e-06, "loss": 0.6, "step": 6603 }, { "epoch": 0.07895838065974008, "grad_norm": 2.8349432945251465, "learning_rate": 9.937329716354456e-06, "loss": 0.576, "step": 6604 }, { "epoch": 0.07897033680460072, "grad_norm": 15.497817993164062, "learning_rate": 9.937299153632188e-06, "loss": 0.6629, "step": 6605 }, { "epoch": 0.07898229294946138, "grad_norm": 5.946347236633301, "learning_rate": 9.937268583506421e-06, "loss": 0.5972, "step": 6606 }, { "epoch": 0.07899424909432202, "grad_norm": 4.421408176422119, "learning_rate": 9.937238005977204e-06, "loss": 0.6148, "step": 6607 }, { "epoch": 0.07900620523918268, "grad_norm": 3.0279629230499268, "learning_rate": 9.937207421044582e-06, "loss": 0.603, "step": 6608 }, { "epoch": 0.07901816138404333, "grad_norm": 3.1661903858184814, "learning_rate": 9.937176828708597e-06, "loss": 0.573, "step": 6609 }, { "epoch": 0.07903011752890399, "grad_norm": 3.3916914463043213, "learning_rate": 9.937146228969298e-06, "loss": 0.6267, "step": 6610 }, { "epoch": 0.07904207367376463, "grad_norm": 3.8640780448913574, "learning_rate": 9.937115621826734e-06, "loss": 0.5646, "step": 6611 }, { "epoch": 0.07905402981862528, "grad_norm": 5.457558631896973, "learning_rate": 9.937085007280945e-06, "loss": 0.6784, "step": 6612 }, { "epoch": 0.07906598596348594, "grad_norm": 2.6575238704681396, "learning_rate": 9.93705438533198e-06, "loss": 0.6326, "step": 6613 }, { "epoch": 0.07907794210834658, "grad_norm": 5.92981481552124, "learning_rate": 9.937023755979885e-06, "loss": 0.6191, "step": 6614 }, { "epoch": 0.07908989825320724, "grad_norm": 6.767515659332275, "learning_rate": 9.936993119224704e-06, "loss": 0.6772, "step": 6615 }, { "epoch": 0.07910185439806788, "grad_norm": 3.499499797821045, "learning_rate": 9.936962475066485e-06, "loss": 0.6158, "step": 6616 }, { "epoch": 0.07911381054292854, "grad_norm": 13.04777717590332, "learning_rate": 9.936931823505273e-06, "loss": 0.6408, "step": 6617 }, { "epoch": 0.07912576668778919, "grad_norm": 3.8605751991271973, "learning_rate": 9.936901164541114e-06, "loss": 0.6377, "step": 6618 }, { "epoch": 0.07913772283264985, "grad_norm": 2.6122477054595947, "learning_rate": 9.936870498174055e-06, "loss": 0.6032, "step": 6619 }, { "epoch": 0.07914967897751049, "grad_norm": 3.696570873260498, "learning_rate": 9.936839824404141e-06, "loss": 0.6898, "step": 6620 }, { "epoch": 0.07916163512237115, "grad_norm": 2.9052085876464844, "learning_rate": 9.936809143231419e-06, "loss": 0.5861, "step": 6621 }, { "epoch": 0.0791735912672318, "grad_norm": 3.117539405822754, "learning_rate": 9.936778454655932e-06, "loss": 0.6085, "step": 6622 }, { "epoch": 0.07918554741209244, "grad_norm": 1.8225992918014526, "learning_rate": 9.936747758677729e-06, "loss": 0.7265, "step": 6623 }, { "epoch": 0.0791975035569531, "grad_norm": 5.604671001434326, "learning_rate": 9.936717055296854e-06, "loss": 0.5954, "step": 6624 }, { "epoch": 0.07920945970181374, "grad_norm": 8.358147621154785, "learning_rate": 9.936686344513356e-06, "loss": 0.5888, "step": 6625 }, { "epoch": 0.0792214158466744, "grad_norm": 5.698474884033203, "learning_rate": 9.936655626327279e-06, "loss": 0.7503, "step": 6626 }, { "epoch": 0.07923337199153505, "grad_norm": 3.703857898712158, "learning_rate": 9.93662490073867e-06, "loss": 0.6079, "step": 6627 }, { "epoch": 0.0792453281363957, "grad_norm": 2.9105911254882812, "learning_rate": 9.936594167747573e-06, "loss": 0.6004, "step": 6628 }, { "epoch": 0.07925728428125635, "grad_norm": 2.3452394008636475, "learning_rate": 9.936563427354035e-06, "loss": 0.6301, "step": 6629 }, { "epoch": 0.07926924042611701, "grad_norm": 4.932770729064941, "learning_rate": 9.936532679558104e-06, "loss": 0.6765, "step": 6630 }, { "epoch": 0.07928119657097765, "grad_norm": 2.812251567840576, "learning_rate": 9.936501924359825e-06, "loss": 0.6508, "step": 6631 }, { "epoch": 0.07929315271583831, "grad_norm": 3.63633131980896, "learning_rate": 9.936471161759243e-06, "loss": 0.5981, "step": 6632 }, { "epoch": 0.07930510886069896, "grad_norm": 1.8632732629776, "learning_rate": 9.936440391756405e-06, "loss": 0.5952, "step": 6633 }, { "epoch": 0.0793170650055596, "grad_norm": 5.584059238433838, "learning_rate": 9.936409614351358e-06, "loss": 0.615, "step": 6634 }, { "epoch": 0.07932902115042026, "grad_norm": 2.1888060569763184, "learning_rate": 9.936378829544146e-06, "loss": 0.7523, "step": 6635 }, { "epoch": 0.0793409772952809, "grad_norm": 3.142850875854492, "learning_rate": 9.936348037334818e-06, "loss": 0.594, "step": 6636 }, { "epoch": 0.07935293344014156, "grad_norm": 3.1229958534240723, "learning_rate": 9.936317237723417e-06, "loss": 0.574, "step": 6637 }, { "epoch": 0.07936488958500221, "grad_norm": 4.582483291625977, "learning_rate": 9.93628643070999e-06, "loss": 0.6994, "step": 6638 }, { "epoch": 0.07937684572986287, "grad_norm": 4.797295570373535, "learning_rate": 9.936255616294586e-06, "loss": 0.6366, "step": 6639 }, { "epoch": 0.07938880187472351, "grad_norm": 3.007127285003662, "learning_rate": 9.936224794477247e-06, "loss": 0.6204, "step": 6640 }, { "epoch": 0.07940075801958417, "grad_norm": 10.37288761138916, "learning_rate": 9.936193965258024e-06, "loss": 0.7617, "step": 6641 }, { "epoch": 0.07941271416444481, "grad_norm": 3.2921652793884277, "learning_rate": 9.93616312863696e-06, "loss": 0.5939, "step": 6642 }, { "epoch": 0.07942467030930547, "grad_norm": 5.793199062347412, "learning_rate": 9.936132284614099e-06, "loss": 0.5734, "step": 6643 }, { "epoch": 0.07943662645416612, "grad_norm": 3.4684810638427734, "learning_rate": 9.936101433189494e-06, "loss": 0.6681, "step": 6644 }, { "epoch": 0.07944858259902678, "grad_norm": 1.7570676803588867, "learning_rate": 9.936070574363185e-06, "loss": 0.6249, "step": 6645 }, { "epoch": 0.07946053874388742, "grad_norm": 2.131894111633301, "learning_rate": 9.936039708135222e-06, "loss": 0.7206, "step": 6646 }, { "epoch": 0.07947249488874807, "grad_norm": 2.9148125648498535, "learning_rate": 9.936008834505648e-06, "loss": 0.65, "step": 6647 }, { "epoch": 0.07948445103360872, "grad_norm": 3.1027257442474365, "learning_rate": 9.935977953474512e-06, "loss": 0.5917, "step": 6648 }, { "epoch": 0.07949640717846937, "grad_norm": 3.7577435970306396, "learning_rate": 9.93594706504186e-06, "loss": 0.6736, "step": 6649 }, { "epoch": 0.07950836332333003, "grad_norm": 1.8847345113754272, "learning_rate": 9.935916169207738e-06, "loss": 0.6937, "step": 6650 }, { "epoch": 0.07952031946819067, "grad_norm": 3.7275478839874268, "learning_rate": 9.93588526597219e-06, "loss": 0.7428, "step": 6651 }, { "epoch": 0.07953227561305133, "grad_norm": 2.2813708782196045, "learning_rate": 9.935854355335268e-06, "loss": 0.6231, "step": 6652 }, { "epoch": 0.07954423175791198, "grad_norm": 3.165748357772827, "learning_rate": 9.935823437297014e-06, "loss": 0.6699, "step": 6653 }, { "epoch": 0.07955618790277263, "grad_norm": 2.6432430744171143, "learning_rate": 9.935792511857474e-06, "loss": 0.6463, "step": 6654 }, { "epoch": 0.07956814404763328, "grad_norm": 5.476485729217529, "learning_rate": 9.935761579016695e-06, "loss": 0.6435, "step": 6655 }, { "epoch": 0.07958010019249394, "grad_norm": 2.1769604682922363, "learning_rate": 9.935730638774726e-06, "loss": 0.6814, "step": 6656 }, { "epoch": 0.07959205633735458, "grad_norm": 2.8077497482299805, "learning_rate": 9.93569969113161e-06, "loss": 0.636, "step": 6657 }, { "epoch": 0.07960401248221523, "grad_norm": 6.3382887840271, "learning_rate": 9.935668736087394e-06, "loss": 0.5895, "step": 6658 }, { "epoch": 0.07961596862707589, "grad_norm": 2.425668954849243, "learning_rate": 9.935637773642126e-06, "loss": 0.6679, "step": 6659 }, { "epoch": 0.07962792477193653, "grad_norm": 1.7974642515182495, "learning_rate": 9.935606803795852e-06, "loss": 0.5129, "step": 6660 }, { "epoch": 0.07963988091679719, "grad_norm": 2.754474639892578, "learning_rate": 9.935575826548618e-06, "loss": 0.6094, "step": 6661 }, { "epoch": 0.07965183706165783, "grad_norm": 3.7938120365142822, "learning_rate": 9.93554484190047e-06, "loss": 0.6979, "step": 6662 }, { "epoch": 0.0796637932065185, "grad_norm": 6.0935797691345215, "learning_rate": 9.935513849851457e-06, "loss": 0.6688, "step": 6663 }, { "epoch": 0.07967574935137914, "grad_norm": 2.368382692337036, "learning_rate": 9.935482850401622e-06, "loss": 0.5785, "step": 6664 }, { "epoch": 0.0796877054962398, "grad_norm": 2.643833637237549, "learning_rate": 9.935451843551011e-06, "loss": 0.6116, "step": 6665 }, { "epoch": 0.07969966164110044, "grad_norm": 2.6772546768188477, "learning_rate": 9.935420829299675e-06, "loss": 0.6602, "step": 6666 }, { "epoch": 0.0797116177859611, "grad_norm": 9.943100929260254, "learning_rate": 9.935389807647658e-06, "loss": 0.6815, "step": 6667 }, { "epoch": 0.07972357393082175, "grad_norm": 2.6302778720855713, "learning_rate": 9.935358778595004e-06, "loss": 0.6089, "step": 6668 }, { "epoch": 0.0797355300756824, "grad_norm": 2.240957736968994, "learning_rate": 9.935327742141764e-06, "loss": 0.6632, "step": 6669 }, { "epoch": 0.07974748622054305, "grad_norm": 2.856245279312134, "learning_rate": 9.935296698287982e-06, "loss": 0.6731, "step": 6670 }, { "epoch": 0.0797594423654037, "grad_norm": 3.3712193965911865, "learning_rate": 9.935265647033705e-06, "loss": 0.6217, "step": 6671 }, { "epoch": 0.07977139851026435, "grad_norm": 5.115405559539795, "learning_rate": 9.93523458837898e-06, "loss": 0.591, "step": 6672 }, { "epoch": 0.079783354655125, "grad_norm": 2.3465921878814697, "learning_rate": 9.935203522323854e-06, "loss": 0.66, "step": 6673 }, { "epoch": 0.07979531079998566, "grad_norm": 2.264711380004883, "learning_rate": 9.93517244886837e-06, "loss": 0.5477, "step": 6674 }, { "epoch": 0.0798072669448463, "grad_norm": 2.846557855606079, "learning_rate": 9.93514136801258e-06, "loss": 0.6377, "step": 6675 }, { "epoch": 0.07981922308970696, "grad_norm": 3.087743043899536, "learning_rate": 9.935110279756525e-06, "loss": 0.6371, "step": 6676 }, { "epoch": 0.0798311792345676, "grad_norm": 2.0945987701416016, "learning_rate": 9.935079184100258e-06, "loss": 0.6538, "step": 6677 }, { "epoch": 0.07984313537942826, "grad_norm": 2.3248085975646973, "learning_rate": 9.935048081043821e-06, "loss": 0.5876, "step": 6678 }, { "epoch": 0.07985509152428891, "grad_norm": 6.217248439788818, "learning_rate": 9.93501697058726e-06, "loss": 0.6957, "step": 6679 }, { "epoch": 0.07986704766914957, "grad_norm": 4.053992748260498, "learning_rate": 9.934985852730626e-06, "loss": 0.6418, "step": 6680 }, { "epoch": 0.07987900381401021, "grad_norm": 1.8573024272918701, "learning_rate": 9.934954727473962e-06, "loss": 0.6381, "step": 6681 }, { "epoch": 0.07989095995887086, "grad_norm": 3.1842541694641113, "learning_rate": 9.934923594817317e-06, "loss": 0.7003, "step": 6682 }, { "epoch": 0.07990291610373151, "grad_norm": 2.4023118019104004, "learning_rate": 9.934892454760736e-06, "loss": 0.6263, "step": 6683 }, { "epoch": 0.07991487224859216, "grad_norm": 4.209440231323242, "learning_rate": 9.934861307304265e-06, "loss": 0.5804, "step": 6684 }, { "epoch": 0.07992682839345282, "grad_norm": 3.018557071685791, "learning_rate": 9.934830152447954e-06, "loss": 0.7314, "step": 6685 }, { "epoch": 0.07993878453831346, "grad_norm": 2.595886468887329, "learning_rate": 9.934798990191845e-06, "loss": 0.6956, "step": 6686 }, { "epoch": 0.07995074068317412, "grad_norm": 1.4537659883499146, "learning_rate": 9.93476782053599e-06, "loss": 0.6461, "step": 6687 }, { "epoch": 0.07996269682803477, "grad_norm": 8.05805492401123, "learning_rate": 9.934736643480432e-06, "loss": 0.6786, "step": 6688 }, { "epoch": 0.07997465297289542, "grad_norm": 3.9278335571289062, "learning_rate": 9.934705459025218e-06, "loss": 0.7224, "step": 6689 }, { "epoch": 0.07998660911775607, "grad_norm": 2.4086506366729736, "learning_rate": 9.934674267170396e-06, "loss": 0.678, "step": 6690 }, { "epoch": 0.07999856526261673, "grad_norm": 2.4440155029296875, "learning_rate": 9.934643067916014e-06, "loss": 0.6294, "step": 6691 }, { "epoch": 0.08001052140747737, "grad_norm": 3.57401967048645, "learning_rate": 9.934611861262116e-06, "loss": 0.6359, "step": 6692 }, { "epoch": 0.08002247755233803, "grad_norm": 2.560149669647217, "learning_rate": 9.93458064720875e-06, "loss": 0.6232, "step": 6693 }, { "epoch": 0.08003443369719868, "grad_norm": 2.4684553146362305, "learning_rate": 9.934549425755963e-06, "loss": 0.7313, "step": 6694 }, { "epoch": 0.08004638984205932, "grad_norm": 2.478334903717041, "learning_rate": 9.934518196903802e-06, "loss": 0.5757, "step": 6695 }, { "epoch": 0.08005834598691998, "grad_norm": 11.201496124267578, "learning_rate": 9.934486960652312e-06, "loss": 0.703, "step": 6696 }, { "epoch": 0.08007030213178062, "grad_norm": 7.448606491088867, "learning_rate": 9.934455717001541e-06, "loss": 0.6105, "step": 6697 }, { "epoch": 0.08008225827664128, "grad_norm": 2.2861812114715576, "learning_rate": 9.934424465951538e-06, "loss": 0.6849, "step": 6698 }, { "epoch": 0.08009421442150193, "grad_norm": 2.5248982906341553, "learning_rate": 9.934393207502347e-06, "loss": 0.5906, "step": 6699 }, { "epoch": 0.08010617056636259, "grad_norm": 9.488547325134277, "learning_rate": 9.934361941654016e-06, "loss": 0.6336, "step": 6700 }, { "epoch": 0.08011812671122323, "grad_norm": 2.4614369869232178, "learning_rate": 9.934330668406592e-06, "loss": 0.7179, "step": 6701 }, { "epoch": 0.08013008285608389, "grad_norm": 3.4720797538757324, "learning_rate": 9.934299387760123e-06, "loss": 0.676, "step": 6702 }, { "epoch": 0.08014203900094453, "grad_norm": 2.171887159347534, "learning_rate": 9.934268099714653e-06, "loss": 0.6485, "step": 6703 }, { "epoch": 0.08015399514580519, "grad_norm": 2.1266133785247803, "learning_rate": 9.93423680427023e-06, "loss": 0.5771, "step": 6704 }, { "epoch": 0.08016595129066584, "grad_norm": 1.6852610111236572, "learning_rate": 9.9342055014269e-06, "loss": 0.6199, "step": 6705 }, { "epoch": 0.08017790743552648, "grad_norm": 2.338730812072754, "learning_rate": 9.934174191184714e-06, "loss": 0.6713, "step": 6706 }, { "epoch": 0.08018986358038714, "grad_norm": 2.7846920490264893, "learning_rate": 9.934142873543715e-06, "loss": 0.6458, "step": 6707 }, { "epoch": 0.08020181972524779, "grad_norm": 2.788141965866089, "learning_rate": 9.934111548503951e-06, "loss": 0.6499, "step": 6708 }, { "epoch": 0.08021377587010844, "grad_norm": 7.127279758453369, "learning_rate": 9.934080216065471e-06, "loss": 0.6362, "step": 6709 }, { "epoch": 0.08022573201496909, "grad_norm": 1.8895928859710693, "learning_rate": 9.934048876228318e-06, "loss": 0.6344, "step": 6710 }, { "epoch": 0.08023768815982975, "grad_norm": 2.2330918312072754, "learning_rate": 9.934017528992541e-06, "loss": 0.6496, "step": 6711 }, { "epoch": 0.08024964430469039, "grad_norm": 3.476715087890625, "learning_rate": 9.93398617435819e-06, "loss": 0.6974, "step": 6712 }, { "epoch": 0.08026160044955105, "grad_norm": 1.7189682722091675, "learning_rate": 9.933954812325307e-06, "loss": 0.575, "step": 6713 }, { "epoch": 0.0802735565944117, "grad_norm": 3.4662094116210938, "learning_rate": 9.93392344289394e-06, "loss": 0.6339, "step": 6714 }, { "epoch": 0.08028551273927235, "grad_norm": 5.593409538269043, "learning_rate": 9.93389206606414e-06, "loss": 0.7115, "step": 6715 }, { "epoch": 0.080297468884133, "grad_norm": 4.279990196228027, "learning_rate": 9.933860681835951e-06, "loss": 0.6227, "step": 6716 }, { "epoch": 0.08030942502899364, "grad_norm": 3.8407864570617676, "learning_rate": 9.93382929020942e-06, "loss": 0.5744, "step": 6717 }, { "epoch": 0.0803213811738543, "grad_norm": 1.9535996913909912, "learning_rate": 9.933797891184594e-06, "loss": 0.5926, "step": 6718 }, { "epoch": 0.08033333731871495, "grad_norm": 2.937849283218384, "learning_rate": 9.93376648476152e-06, "loss": 0.5291, "step": 6719 }, { "epoch": 0.0803452934635756, "grad_norm": 3.422567844390869, "learning_rate": 9.933735070940246e-06, "loss": 0.6304, "step": 6720 }, { "epoch": 0.08035724960843625, "grad_norm": 6.225504398345947, "learning_rate": 9.933703649720818e-06, "loss": 0.6374, "step": 6721 }, { "epoch": 0.08036920575329691, "grad_norm": 2.0102202892303467, "learning_rate": 9.933672221103285e-06, "loss": 0.6105, "step": 6722 }, { "epoch": 0.08038116189815755, "grad_norm": 1.926292896270752, "learning_rate": 9.933640785087693e-06, "loss": 0.6422, "step": 6723 }, { "epoch": 0.08039311804301821, "grad_norm": 2.37408185005188, "learning_rate": 9.933609341674089e-06, "loss": 0.6701, "step": 6724 }, { "epoch": 0.08040507418787886, "grad_norm": 6.122306823730469, "learning_rate": 9.933577890862521e-06, "loss": 0.5984, "step": 6725 }, { "epoch": 0.08041703033273952, "grad_norm": 2.9346935749053955, "learning_rate": 9.933546432653033e-06, "loss": 0.5744, "step": 6726 }, { "epoch": 0.08042898647760016, "grad_norm": 7.769791603088379, "learning_rate": 9.933514967045677e-06, "loss": 0.6897, "step": 6727 }, { "epoch": 0.08044094262246082, "grad_norm": 2.2210988998413086, "learning_rate": 9.933483494040497e-06, "loss": 0.5628, "step": 6728 }, { "epoch": 0.08045289876732147, "grad_norm": 4.243476867675781, "learning_rate": 9.933452013637542e-06, "loss": 0.6505, "step": 6729 }, { "epoch": 0.08046485491218211, "grad_norm": 8.03172779083252, "learning_rate": 9.933420525836856e-06, "loss": 0.6056, "step": 6730 }, { "epoch": 0.08047681105704277, "grad_norm": 3.911846876144409, "learning_rate": 9.93338903063849e-06, "loss": 0.6665, "step": 6731 }, { "epoch": 0.08048876720190341, "grad_norm": 3.966587543487549, "learning_rate": 9.933357528042487e-06, "loss": 0.6516, "step": 6732 }, { "epoch": 0.08050072334676407, "grad_norm": 5.504673004150391, "learning_rate": 9.9333260180489e-06, "loss": 0.6776, "step": 6733 }, { "epoch": 0.08051267949162472, "grad_norm": 1.6425223350524902, "learning_rate": 9.93329450065777e-06, "loss": 0.6871, "step": 6734 }, { "epoch": 0.08052463563648538, "grad_norm": 5.985398769378662, "learning_rate": 9.933262975869152e-06, "loss": 0.5919, "step": 6735 }, { "epoch": 0.08053659178134602, "grad_norm": 2.7056655883789062, "learning_rate": 9.933231443683084e-06, "loss": 0.6596, "step": 6736 }, { "epoch": 0.08054854792620668, "grad_norm": 2.4543652534484863, "learning_rate": 9.933199904099621e-06, "loss": 0.6003, "step": 6737 }, { "epoch": 0.08056050407106732, "grad_norm": 2.4298083782196045, "learning_rate": 9.933168357118805e-06, "loss": 0.6811, "step": 6738 }, { "epoch": 0.08057246021592798, "grad_norm": 2.0776162147521973, "learning_rate": 9.933136802740686e-06, "loss": 0.6266, "step": 6739 }, { "epoch": 0.08058441636078863, "grad_norm": 2.715099811553955, "learning_rate": 9.933105240965312e-06, "loss": 0.6234, "step": 6740 }, { "epoch": 0.08059637250564927, "grad_norm": 4.187997817993164, "learning_rate": 9.93307367179273e-06, "loss": 0.7374, "step": 6741 }, { "epoch": 0.08060832865050993, "grad_norm": 8.776756286621094, "learning_rate": 9.933042095222984e-06, "loss": 0.5922, "step": 6742 }, { "epoch": 0.08062028479537058, "grad_norm": 2.0381240844726562, "learning_rate": 9.933010511256124e-06, "loss": 0.5734, "step": 6743 }, { "epoch": 0.08063224094023123, "grad_norm": 3.710033893585205, "learning_rate": 9.9329789198922e-06, "loss": 0.7159, "step": 6744 }, { "epoch": 0.08064419708509188, "grad_norm": 3.866454601287842, "learning_rate": 9.932947321131254e-06, "loss": 0.6305, "step": 6745 }, { "epoch": 0.08065615322995254, "grad_norm": 2.790005922317505, "learning_rate": 9.932915714973337e-06, "loss": 0.7045, "step": 6746 }, { "epoch": 0.08066810937481318, "grad_norm": 1.6138092279434204, "learning_rate": 9.932884101418494e-06, "loss": 0.6028, "step": 6747 }, { "epoch": 0.08068006551967384, "grad_norm": 1.5362818241119385, "learning_rate": 9.932852480466774e-06, "loss": 0.5721, "step": 6748 }, { "epoch": 0.08069202166453449, "grad_norm": 2.666964530944824, "learning_rate": 9.932820852118225e-06, "loss": 0.5902, "step": 6749 }, { "epoch": 0.08070397780939514, "grad_norm": 3.8412420749664307, "learning_rate": 9.932789216372895e-06, "loss": 0.7403, "step": 6750 }, { "epoch": 0.08071593395425579, "grad_norm": 2.3100550174713135, "learning_rate": 9.932757573230828e-06, "loss": 0.5957, "step": 6751 }, { "epoch": 0.08072789009911645, "grad_norm": 2.625028371810913, "learning_rate": 9.932725922692075e-06, "loss": 0.6881, "step": 6752 }, { "epoch": 0.08073984624397709, "grad_norm": 2.931670904159546, "learning_rate": 9.93269426475668e-06, "loss": 0.5444, "step": 6753 }, { "epoch": 0.08075180238883774, "grad_norm": 1.5204296112060547, "learning_rate": 9.932662599424693e-06, "loss": 0.5886, "step": 6754 }, { "epoch": 0.0807637585336984, "grad_norm": 35.508182525634766, "learning_rate": 9.932630926696161e-06, "loss": 0.6704, "step": 6755 }, { "epoch": 0.08077571467855904, "grad_norm": 1.7924069166183472, "learning_rate": 9.932599246571133e-06, "loss": 0.6485, "step": 6756 }, { "epoch": 0.0807876708234197, "grad_norm": 5.466980934143066, "learning_rate": 9.932567559049653e-06, "loss": 0.6056, "step": 6757 }, { "epoch": 0.08079962696828034, "grad_norm": 2.27126407623291, "learning_rate": 9.932535864131772e-06, "loss": 0.6454, "step": 6758 }, { "epoch": 0.080811583113141, "grad_norm": 9.298717498779297, "learning_rate": 9.932504161817533e-06, "loss": 0.6302, "step": 6759 }, { "epoch": 0.08082353925800165, "grad_norm": 5.031497955322266, "learning_rate": 9.932472452106989e-06, "loss": 0.6641, "step": 6760 }, { "epoch": 0.0808354954028623, "grad_norm": 2.5067825317382812, "learning_rate": 9.932440735000185e-06, "loss": 0.6181, "step": 6761 }, { "epoch": 0.08084745154772295, "grad_norm": 5.606962203979492, "learning_rate": 9.932409010497169e-06, "loss": 0.64, "step": 6762 }, { "epoch": 0.08085940769258361, "grad_norm": 4.29669189453125, "learning_rate": 9.932377278597987e-06, "loss": 0.5272, "step": 6763 }, { "epoch": 0.08087136383744425, "grad_norm": 2.2229955196380615, "learning_rate": 9.932345539302687e-06, "loss": 0.685, "step": 6764 }, { "epoch": 0.0808833199823049, "grad_norm": 1.6077229976654053, "learning_rate": 9.932313792611318e-06, "loss": 0.6422, "step": 6765 }, { "epoch": 0.08089527612716556, "grad_norm": 3.8078858852386475, "learning_rate": 9.932282038523926e-06, "loss": 0.7132, "step": 6766 }, { "epoch": 0.0809072322720262, "grad_norm": 3.8190393447875977, "learning_rate": 9.932250277040563e-06, "loss": 0.6271, "step": 6767 }, { "epoch": 0.08091918841688686, "grad_norm": 9.306645393371582, "learning_rate": 9.93221850816127e-06, "loss": 0.5931, "step": 6768 }, { "epoch": 0.0809311445617475, "grad_norm": 1.5727331638336182, "learning_rate": 9.932186731886099e-06, "loss": 0.5221, "step": 6769 }, { "epoch": 0.08094310070660816, "grad_norm": 17.12554168701172, "learning_rate": 9.932154948215096e-06, "loss": 0.6458, "step": 6770 }, { "epoch": 0.08095505685146881, "grad_norm": 2.1204724311828613, "learning_rate": 9.932123157148309e-06, "loss": 0.6713, "step": 6771 }, { "epoch": 0.08096701299632947, "grad_norm": 2.9119832515716553, "learning_rate": 9.932091358685786e-06, "loss": 0.6335, "step": 6772 }, { "epoch": 0.08097896914119011, "grad_norm": 2.561190605163574, "learning_rate": 9.932059552827575e-06, "loss": 0.6293, "step": 6773 }, { "epoch": 0.08099092528605077, "grad_norm": 1.8019814491271973, "learning_rate": 9.932027739573722e-06, "loss": 0.5494, "step": 6774 }, { "epoch": 0.08100288143091142, "grad_norm": 3.079815149307251, "learning_rate": 9.931995918924278e-06, "loss": 0.6449, "step": 6775 }, { "epoch": 0.08101483757577206, "grad_norm": 2.46549654006958, "learning_rate": 9.931964090879287e-06, "loss": 0.6075, "step": 6776 }, { "epoch": 0.08102679372063272, "grad_norm": 2.135758399963379, "learning_rate": 9.931932255438797e-06, "loss": 0.7168, "step": 6777 }, { "epoch": 0.08103874986549336, "grad_norm": 2.175672769546509, "learning_rate": 9.93190041260286e-06, "loss": 0.5766, "step": 6778 }, { "epoch": 0.08105070601035402, "grad_norm": 11.364992141723633, "learning_rate": 9.931868562371518e-06, "loss": 0.6866, "step": 6779 }, { "epoch": 0.08106266215521467, "grad_norm": 3.48380446434021, "learning_rate": 9.931836704744825e-06, "loss": 0.6829, "step": 6780 }, { "epoch": 0.08107461830007533, "grad_norm": 2.893273115158081, "learning_rate": 9.931804839722822e-06, "loss": 0.59, "step": 6781 }, { "epoch": 0.08108657444493597, "grad_norm": 2.414409875869751, "learning_rate": 9.931772967305561e-06, "loss": 0.6757, "step": 6782 }, { "epoch": 0.08109853058979663, "grad_norm": 3.8767967224121094, "learning_rate": 9.93174108749309e-06, "loss": 0.5464, "step": 6783 }, { "epoch": 0.08111048673465727, "grad_norm": 2.2658042907714844, "learning_rate": 9.931709200285455e-06, "loss": 0.6954, "step": 6784 }, { "epoch": 0.08112244287951793, "grad_norm": 6.340396881103516, "learning_rate": 9.931677305682704e-06, "loss": 0.6629, "step": 6785 }, { "epoch": 0.08113439902437858, "grad_norm": 1.5020079612731934, "learning_rate": 9.931645403684886e-06, "loss": 0.4976, "step": 6786 }, { "epoch": 0.08114635516923924, "grad_norm": 1.9628307819366455, "learning_rate": 9.931613494292047e-06, "loss": 0.53, "step": 6787 }, { "epoch": 0.08115831131409988, "grad_norm": 1.668229579925537, "learning_rate": 9.931581577504237e-06, "loss": 0.6596, "step": 6788 }, { "epoch": 0.08117026745896053, "grad_norm": 2.5414717197418213, "learning_rate": 9.931549653321503e-06, "loss": 0.5815, "step": 6789 }, { "epoch": 0.08118222360382119, "grad_norm": 2.690585136413574, "learning_rate": 9.931517721743892e-06, "loss": 0.6164, "step": 6790 }, { "epoch": 0.08119417974868183, "grad_norm": 5.777253150939941, "learning_rate": 9.931485782771455e-06, "loss": 0.6655, "step": 6791 }, { "epoch": 0.08120613589354249, "grad_norm": 1.7497773170471191, "learning_rate": 9.931453836404237e-06, "loss": 0.6301, "step": 6792 }, { "epoch": 0.08121809203840313, "grad_norm": 2.710416793823242, "learning_rate": 9.931421882642285e-06, "loss": 0.6102, "step": 6793 }, { "epoch": 0.08123004818326379, "grad_norm": 1.7309622764587402, "learning_rate": 9.931389921485649e-06, "loss": 0.628, "step": 6794 }, { "epoch": 0.08124200432812444, "grad_norm": 1.8429526090621948, "learning_rate": 9.931357952934375e-06, "loss": 0.6745, "step": 6795 }, { "epoch": 0.0812539604729851, "grad_norm": 2.177818536758423, "learning_rate": 9.931325976988512e-06, "loss": 0.657, "step": 6796 }, { "epoch": 0.08126591661784574, "grad_norm": 2.2784852981567383, "learning_rate": 9.93129399364811e-06, "loss": 0.6735, "step": 6797 }, { "epoch": 0.0812778727627064, "grad_norm": 2.216860055923462, "learning_rate": 9.931262002913216e-06, "loss": 0.6243, "step": 6798 }, { "epoch": 0.08128982890756704, "grad_norm": 2.4049601554870605, "learning_rate": 9.931230004783876e-06, "loss": 0.6455, "step": 6799 }, { "epoch": 0.08130178505242769, "grad_norm": 3.4075262546539307, "learning_rate": 9.931197999260139e-06, "loss": 0.6532, "step": 6800 }, { "epoch": 0.08131374119728835, "grad_norm": 3.8164381980895996, "learning_rate": 9.93116598634205e-06, "loss": 0.6473, "step": 6801 }, { "epoch": 0.08132569734214899, "grad_norm": 1.914293646812439, "learning_rate": 9.931133966029664e-06, "loss": 0.509, "step": 6802 }, { "epoch": 0.08133765348700965, "grad_norm": 2.528331756591797, "learning_rate": 9.931101938323024e-06, "loss": 0.6758, "step": 6803 }, { "epoch": 0.0813496096318703, "grad_norm": 2.8921332359313965, "learning_rate": 9.93106990322218e-06, "loss": 0.5739, "step": 6804 }, { "epoch": 0.08136156577673095, "grad_norm": 2.2307522296905518, "learning_rate": 9.931037860727178e-06, "loss": 0.6783, "step": 6805 }, { "epoch": 0.0813735219215916, "grad_norm": 2.388693332672119, "learning_rate": 9.931005810838066e-06, "loss": 0.7201, "step": 6806 }, { "epoch": 0.08138547806645226, "grad_norm": 15.577055931091309, "learning_rate": 9.930973753554896e-06, "loss": 0.5733, "step": 6807 }, { "epoch": 0.0813974342113129, "grad_norm": 1.723615288734436, "learning_rate": 9.93094168887771e-06, "loss": 0.6587, "step": 6808 }, { "epoch": 0.08140939035617356, "grad_norm": 1.9215763807296753, "learning_rate": 9.930909616806563e-06, "loss": 0.6513, "step": 6809 }, { "epoch": 0.0814213465010342, "grad_norm": 1.6222434043884277, "learning_rate": 9.930877537341498e-06, "loss": 0.5791, "step": 6810 }, { "epoch": 0.08143330264589486, "grad_norm": 2.3388423919677734, "learning_rate": 9.930845450482565e-06, "loss": 0.5677, "step": 6811 }, { "epoch": 0.08144525879075551, "grad_norm": 2.2238097190856934, "learning_rate": 9.930813356229811e-06, "loss": 0.554, "step": 6812 }, { "epoch": 0.08145721493561615, "grad_norm": 2.353525400161743, "learning_rate": 9.930781254583285e-06, "loss": 0.6318, "step": 6813 }, { "epoch": 0.08146917108047681, "grad_norm": 25.878206253051758, "learning_rate": 9.930749145543036e-06, "loss": 0.661, "step": 6814 }, { "epoch": 0.08148112722533746, "grad_norm": 2.597310781478882, "learning_rate": 9.93071702910911e-06, "loss": 0.7111, "step": 6815 }, { "epoch": 0.08149308337019812, "grad_norm": 3.2513835430145264, "learning_rate": 9.930684905281556e-06, "loss": 0.6266, "step": 6816 }, { "epoch": 0.08150503951505876, "grad_norm": 2.773897886276245, "learning_rate": 9.930652774060424e-06, "loss": 0.6267, "step": 6817 }, { "epoch": 0.08151699565991942, "grad_norm": 2.388293981552124, "learning_rate": 9.930620635445758e-06, "loss": 0.564, "step": 6818 }, { "epoch": 0.08152895180478006, "grad_norm": 1.7002612352371216, "learning_rate": 9.93058848943761e-06, "loss": 0.6987, "step": 6819 }, { "epoch": 0.08154090794964072, "grad_norm": 3.9018659591674805, "learning_rate": 9.930556336036028e-06, "loss": 0.6707, "step": 6820 }, { "epoch": 0.08155286409450137, "grad_norm": 3.3632051944732666, "learning_rate": 9.930524175241059e-06, "loss": 0.7253, "step": 6821 }, { "epoch": 0.08156482023936203, "grad_norm": 2.5651845932006836, "learning_rate": 9.930492007052751e-06, "loss": 0.6014, "step": 6822 }, { "epoch": 0.08157677638422267, "grad_norm": 2.992471933364868, "learning_rate": 9.930459831471153e-06, "loss": 0.6312, "step": 6823 }, { "epoch": 0.08158873252908332, "grad_norm": 2.1112253665924072, "learning_rate": 9.930427648496313e-06, "loss": 0.6775, "step": 6824 }, { "epoch": 0.08160068867394397, "grad_norm": 3.736140012741089, "learning_rate": 9.93039545812828e-06, "loss": 0.6494, "step": 6825 }, { "epoch": 0.08161264481880462, "grad_norm": 4.3270063400268555, "learning_rate": 9.930363260367099e-06, "loss": 0.6374, "step": 6826 }, { "epoch": 0.08162460096366528, "grad_norm": 1.9415969848632812, "learning_rate": 9.930331055212823e-06, "loss": 0.6198, "step": 6827 }, { "epoch": 0.08163655710852592, "grad_norm": 2.2994818687438965, "learning_rate": 9.930298842665496e-06, "loss": 0.6699, "step": 6828 }, { "epoch": 0.08164851325338658, "grad_norm": 2.9821925163269043, "learning_rate": 9.930266622725171e-06, "loss": 0.6853, "step": 6829 }, { "epoch": 0.08166046939824723, "grad_norm": 1.9885547161102295, "learning_rate": 9.930234395391893e-06, "loss": 0.6057, "step": 6830 }, { "epoch": 0.08167242554310788, "grad_norm": 5.965402126312256, "learning_rate": 9.93020216066571e-06, "loss": 0.6781, "step": 6831 }, { "epoch": 0.08168438168796853, "grad_norm": 2.2967846393585205, "learning_rate": 9.93016991854667e-06, "loss": 0.6112, "step": 6832 }, { "epoch": 0.08169633783282919, "grad_norm": 2.229548215866089, "learning_rate": 9.930137669034824e-06, "loss": 0.597, "step": 6833 }, { "epoch": 0.08170829397768983, "grad_norm": 1.7451591491699219, "learning_rate": 9.930105412130222e-06, "loss": 0.6217, "step": 6834 }, { "epoch": 0.08172025012255048, "grad_norm": 2.499317169189453, "learning_rate": 9.930073147832905e-06, "loss": 0.6863, "step": 6835 }, { "epoch": 0.08173220626741114, "grad_norm": 2.184206962585449, "learning_rate": 9.930040876142928e-06, "loss": 0.5983, "step": 6836 }, { "epoch": 0.08174416241227178, "grad_norm": 1.736029028892517, "learning_rate": 9.930008597060336e-06, "loss": 0.7169, "step": 6837 }, { "epoch": 0.08175611855713244, "grad_norm": 2.382207155227661, "learning_rate": 9.929976310585182e-06, "loss": 0.6372, "step": 6838 }, { "epoch": 0.08176807470199308, "grad_norm": 1.9781262874603271, "learning_rate": 9.929944016717508e-06, "loss": 0.6733, "step": 6839 }, { "epoch": 0.08178003084685374, "grad_norm": 2.0177717208862305, "learning_rate": 9.929911715457366e-06, "loss": 0.5301, "step": 6840 }, { "epoch": 0.08179198699171439, "grad_norm": 2.082388162612915, "learning_rate": 9.929879406804804e-06, "loss": 0.5961, "step": 6841 }, { "epoch": 0.08180394313657505, "grad_norm": 2.427708625793457, "learning_rate": 9.92984709075987e-06, "loss": 0.6851, "step": 6842 }, { "epoch": 0.08181589928143569, "grad_norm": 9.534903526306152, "learning_rate": 9.929814767322611e-06, "loss": 0.5211, "step": 6843 }, { "epoch": 0.08182785542629635, "grad_norm": 1.5676696300506592, "learning_rate": 9.92978243649308e-06, "loss": 0.6546, "step": 6844 }, { "epoch": 0.081839811571157, "grad_norm": 1.8870124816894531, "learning_rate": 9.929750098271322e-06, "loss": 0.4896, "step": 6845 }, { "epoch": 0.08185176771601765, "grad_norm": 2.430440664291382, "learning_rate": 9.929717752657386e-06, "loss": 0.7775, "step": 6846 }, { "epoch": 0.0818637238608783, "grad_norm": 1.7065980434417725, "learning_rate": 9.92968539965132e-06, "loss": 0.5089, "step": 6847 }, { "epoch": 0.08187568000573894, "grad_norm": 2.0787508487701416, "learning_rate": 9.929653039253173e-06, "loss": 0.696, "step": 6848 }, { "epoch": 0.0818876361505996, "grad_norm": 7.291020393371582, "learning_rate": 9.929620671462996e-06, "loss": 0.7431, "step": 6849 }, { "epoch": 0.08189959229546025, "grad_norm": 3.7128002643585205, "learning_rate": 9.929588296280833e-06, "loss": 0.66, "step": 6850 }, { "epoch": 0.0819115484403209, "grad_norm": 2.0789871215820312, "learning_rate": 9.929555913706737e-06, "loss": 0.6075, "step": 6851 }, { "epoch": 0.08192350458518155, "grad_norm": 13.454051971435547, "learning_rate": 9.929523523740751e-06, "loss": 0.6008, "step": 6852 }, { "epoch": 0.08193546073004221, "grad_norm": 2.015479803085327, "learning_rate": 9.92949112638293e-06, "loss": 0.672, "step": 6853 }, { "epoch": 0.08194741687490285, "grad_norm": 5.207844257354736, "learning_rate": 9.929458721633318e-06, "loss": 0.6968, "step": 6854 }, { "epoch": 0.08195937301976351, "grad_norm": 5.6125288009643555, "learning_rate": 9.929426309491966e-06, "loss": 0.5971, "step": 6855 }, { "epoch": 0.08197132916462416, "grad_norm": 2.0419204235076904, "learning_rate": 9.929393889958923e-06, "loss": 0.5667, "step": 6856 }, { "epoch": 0.08198328530948482, "grad_norm": 3.385753631591797, "learning_rate": 9.929361463034234e-06, "loss": 0.5756, "step": 6857 }, { "epoch": 0.08199524145434546, "grad_norm": 1.557227373123169, "learning_rate": 9.929329028717952e-06, "loss": 0.5978, "step": 6858 }, { "epoch": 0.0820071975992061, "grad_norm": 2.519822835922241, "learning_rate": 9.92929658701012e-06, "loss": 0.6378, "step": 6859 }, { "epoch": 0.08201915374406676, "grad_norm": 2.2342300415039062, "learning_rate": 9.929264137910794e-06, "loss": 0.6831, "step": 6860 }, { "epoch": 0.08203110988892741, "grad_norm": 2.0400049686431885, "learning_rate": 9.929231681420018e-06, "loss": 0.6914, "step": 6861 }, { "epoch": 0.08204306603378807, "grad_norm": 2.894912004470825, "learning_rate": 9.92919921753784e-06, "loss": 0.6702, "step": 6862 }, { "epoch": 0.08205502217864871, "grad_norm": 8.743091583251953, "learning_rate": 9.92916674626431e-06, "loss": 0.5649, "step": 6863 }, { "epoch": 0.08206697832350937, "grad_norm": 2.655303716659546, "learning_rate": 9.92913426759948e-06, "loss": 0.7031, "step": 6864 }, { "epoch": 0.08207893446837002, "grad_norm": 3.3783462047576904, "learning_rate": 9.929101781543393e-06, "loss": 0.7231, "step": 6865 }, { "epoch": 0.08209089061323067, "grad_norm": 3.342103958129883, "learning_rate": 9.929069288096102e-06, "loss": 0.6501, "step": 6866 }, { "epoch": 0.08210284675809132, "grad_norm": 3.087238073348999, "learning_rate": 9.929036787257654e-06, "loss": 0.6392, "step": 6867 }, { "epoch": 0.08211480290295198, "grad_norm": 2.034318208694458, "learning_rate": 9.929004279028096e-06, "loss": 0.6226, "step": 6868 }, { "epoch": 0.08212675904781262, "grad_norm": 2.2730493545532227, "learning_rate": 9.92897176340748e-06, "loss": 0.6469, "step": 6869 }, { "epoch": 0.08213871519267328, "grad_norm": 3.1066787242889404, "learning_rate": 9.928939240395852e-06, "loss": 0.5844, "step": 6870 }, { "epoch": 0.08215067133753393, "grad_norm": 3.3516907691955566, "learning_rate": 9.928906709993264e-06, "loss": 0.5292, "step": 6871 }, { "epoch": 0.08216262748239457, "grad_norm": 6.739714622497559, "learning_rate": 9.928874172199762e-06, "loss": 0.6116, "step": 6872 }, { "epoch": 0.08217458362725523, "grad_norm": 3.49190092086792, "learning_rate": 9.928841627015395e-06, "loss": 0.6685, "step": 6873 }, { "epoch": 0.08218653977211587, "grad_norm": 7.572968482971191, "learning_rate": 9.928809074440213e-06, "loss": 0.5947, "step": 6874 }, { "epoch": 0.08219849591697653, "grad_norm": 2.286327362060547, "learning_rate": 9.928776514474265e-06, "loss": 0.5661, "step": 6875 }, { "epoch": 0.08221045206183718, "grad_norm": 7.757554531097412, "learning_rate": 9.928743947117598e-06, "loss": 0.5905, "step": 6876 }, { "epoch": 0.08222240820669784, "grad_norm": 3.047696113586426, "learning_rate": 9.928711372370263e-06, "loss": 0.5791, "step": 6877 }, { "epoch": 0.08223436435155848, "grad_norm": 1.9851601123809814, "learning_rate": 9.928678790232307e-06, "loss": 0.6605, "step": 6878 }, { "epoch": 0.08224632049641914, "grad_norm": 4.09773588180542, "learning_rate": 9.92864620070378e-06, "loss": 0.6162, "step": 6879 }, { "epoch": 0.08225827664127978, "grad_norm": 3.846578598022461, "learning_rate": 9.92861360378473e-06, "loss": 0.6083, "step": 6880 }, { "epoch": 0.08227023278614044, "grad_norm": 2.5864617824554443, "learning_rate": 9.928580999475207e-06, "loss": 0.6402, "step": 6881 }, { "epoch": 0.08228218893100109, "grad_norm": 4.157655239105225, "learning_rate": 9.928548387775259e-06, "loss": 0.6048, "step": 6882 }, { "epoch": 0.08229414507586173, "grad_norm": 2.1564714908599854, "learning_rate": 9.928515768684935e-06, "loss": 0.6907, "step": 6883 }, { "epoch": 0.08230610122072239, "grad_norm": 6.149139404296875, "learning_rate": 9.928483142204284e-06, "loss": 0.5718, "step": 6884 }, { "epoch": 0.08231805736558304, "grad_norm": 3.134655714035034, "learning_rate": 9.928450508333355e-06, "loss": 0.6259, "step": 6885 }, { "epoch": 0.0823300135104437, "grad_norm": 2.5980961322784424, "learning_rate": 9.928417867072198e-06, "loss": 0.5982, "step": 6886 }, { "epoch": 0.08234196965530434, "grad_norm": 6.273279666900635, "learning_rate": 9.92838521842086e-06, "loss": 0.6817, "step": 6887 }, { "epoch": 0.082353925800165, "grad_norm": 2.5879733562469482, "learning_rate": 9.92835256237939e-06, "loss": 0.564, "step": 6888 }, { "epoch": 0.08236588194502564, "grad_norm": 3.6386559009552, "learning_rate": 9.92831989894784e-06, "loss": 0.5288, "step": 6889 }, { "epoch": 0.0823778380898863, "grad_norm": 5.077692031860352, "learning_rate": 9.928287228126255e-06, "loss": 0.6127, "step": 6890 }, { "epoch": 0.08238979423474695, "grad_norm": 2.2974374294281006, "learning_rate": 9.928254549914686e-06, "loss": 0.6337, "step": 6891 }, { "epoch": 0.0824017503796076, "grad_norm": 2.851835250854492, "learning_rate": 9.928221864313183e-06, "loss": 0.5065, "step": 6892 }, { "epoch": 0.08241370652446825, "grad_norm": 2.7154157161712646, "learning_rate": 9.928189171321793e-06, "loss": 0.6149, "step": 6893 }, { "epoch": 0.08242566266932891, "grad_norm": 3.3665976524353027, "learning_rate": 9.928156470940565e-06, "loss": 0.7052, "step": 6894 }, { "epoch": 0.08243761881418955, "grad_norm": 2.0120060443878174, "learning_rate": 9.92812376316955e-06, "loss": 0.5699, "step": 6895 }, { "epoch": 0.0824495749590502, "grad_norm": 2.2712485790252686, "learning_rate": 9.928091048008795e-06, "loss": 0.6813, "step": 6896 }, { "epoch": 0.08246153110391086, "grad_norm": 3.635817050933838, "learning_rate": 9.928058325458352e-06, "loss": 0.6325, "step": 6897 }, { "epoch": 0.0824734872487715, "grad_norm": 2.8530659675598145, "learning_rate": 9.928025595518264e-06, "loss": 0.6589, "step": 6898 }, { "epoch": 0.08248544339363216, "grad_norm": 4.692137241363525, "learning_rate": 9.927992858188588e-06, "loss": 0.7158, "step": 6899 }, { "epoch": 0.0824973995384928, "grad_norm": 4.603412628173828, "learning_rate": 9.927960113469368e-06, "loss": 0.6562, "step": 6900 }, { "epoch": 0.08250935568335346, "grad_norm": 4.630460739135742, "learning_rate": 9.927927361360655e-06, "loss": 0.6822, "step": 6901 }, { "epoch": 0.08252131182821411, "grad_norm": 4.959305763244629, "learning_rate": 9.927894601862495e-06, "loss": 0.6725, "step": 6902 }, { "epoch": 0.08253326797307477, "grad_norm": 4.368716716766357, "learning_rate": 9.92786183497494e-06, "loss": 0.6377, "step": 6903 }, { "epoch": 0.08254522411793541, "grad_norm": 2.9065873622894287, "learning_rate": 9.927829060698042e-06, "loss": 0.6781, "step": 6904 }, { "epoch": 0.08255718026279607, "grad_norm": 2.0926969051361084, "learning_rate": 9.927796279031842e-06, "loss": 0.7371, "step": 6905 }, { "epoch": 0.08256913640765672, "grad_norm": 2.7553157806396484, "learning_rate": 9.927763489976399e-06, "loss": 0.646, "step": 6906 }, { "epoch": 0.08258109255251736, "grad_norm": 2.1020381450653076, "learning_rate": 9.927730693531755e-06, "loss": 0.6247, "step": 6907 }, { "epoch": 0.08259304869737802, "grad_norm": 4.319429874420166, "learning_rate": 9.92769788969796e-06, "loss": 0.6339, "step": 6908 }, { "epoch": 0.08260500484223866, "grad_norm": 2.114452600479126, "learning_rate": 9.927665078475066e-06, "loss": 0.6455, "step": 6909 }, { "epoch": 0.08261696098709932, "grad_norm": 2.1987948417663574, "learning_rate": 9.927632259863119e-06, "loss": 0.6571, "step": 6910 }, { "epoch": 0.08262891713195997, "grad_norm": 4.6240973472595215, "learning_rate": 9.92759943386217e-06, "loss": 0.61, "step": 6911 }, { "epoch": 0.08264087327682063, "grad_norm": 2.113765239715576, "learning_rate": 9.92756660047227e-06, "loss": 0.6261, "step": 6912 }, { "epoch": 0.08265282942168127, "grad_norm": 2.8409759998321533, "learning_rate": 9.927533759693465e-06, "loss": 0.6764, "step": 6913 }, { "epoch": 0.08266478556654193, "grad_norm": 2.2204723358154297, "learning_rate": 9.927500911525807e-06, "loss": 0.6667, "step": 6914 }, { "epoch": 0.08267674171140257, "grad_norm": 5.588034152984619, "learning_rate": 9.927468055969342e-06, "loss": 0.7088, "step": 6915 }, { "epoch": 0.08268869785626323, "grad_norm": 2.1092801094055176, "learning_rate": 9.927435193024123e-06, "loss": 0.6633, "step": 6916 }, { "epoch": 0.08270065400112388, "grad_norm": 3.781649112701416, "learning_rate": 9.927402322690196e-06, "loss": 0.6565, "step": 6917 }, { "epoch": 0.08271261014598452, "grad_norm": 3.1853299140930176, "learning_rate": 9.927369444967611e-06, "loss": 0.626, "step": 6918 }, { "epoch": 0.08272456629084518, "grad_norm": 5.138570785522461, "learning_rate": 9.92733655985642e-06, "loss": 0.7041, "step": 6919 }, { "epoch": 0.08273652243570583, "grad_norm": 2.960106611251831, "learning_rate": 9.92730366735667e-06, "loss": 0.6512, "step": 6920 }, { "epoch": 0.08274847858056648, "grad_norm": 4.162737846374512, "learning_rate": 9.92727076746841e-06, "loss": 0.6055, "step": 6921 }, { "epoch": 0.08276043472542713, "grad_norm": 2.6221067905426025, "learning_rate": 9.92723786019169e-06, "loss": 0.7468, "step": 6922 }, { "epoch": 0.08277239087028779, "grad_norm": 2.5839619636535645, "learning_rate": 9.92720494552656e-06, "loss": 0.5974, "step": 6923 }, { "epoch": 0.08278434701514843, "grad_norm": 2.2623751163482666, "learning_rate": 9.927172023473069e-06, "loss": 0.6516, "step": 6924 }, { "epoch": 0.08279630316000909, "grad_norm": 7.599514484405518, "learning_rate": 9.927139094031264e-06, "loss": 0.6605, "step": 6925 }, { "epoch": 0.08280825930486974, "grad_norm": 2.264474391937256, "learning_rate": 9.927106157201197e-06, "loss": 0.639, "step": 6926 }, { "epoch": 0.0828202154497304, "grad_norm": 2.824209451675415, "learning_rate": 9.927073212982919e-06, "loss": 0.6259, "step": 6927 }, { "epoch": 0.08283217159459104, "grad_norm": 3.4474570751190186, "learning_rate": 9.927040261376474e-06, "loss": 0.6225, "step": 6928 }, { "epoch": 0.0828441277394517, "grad_norm": 2.8605873584747314, "learning_rate": 9.927007302381917e-06, "loss": 0.6504, "step": 6929 }, { "epoch": 0.08285608388431234, "grad_norm": 5.160881996154785, "learning_rate": 9.926974335999294e-06, "loss": 0.6598, "step": 6930 }, { "epoch": 0.08286804002917299, "grad_norm": 2.013530731201172, "learning_rate": 9.926941362228655e-06, "loss": 0.66, "step": 6931 }, { "epoch": 0.08287999617403365, "grad_norm": 2.8263745307922363, "learning_rate": 9.92690838107005e-06, "loss": 0.6248, "step": 6932 }, { "epoch": 0.08289195231889429, "grad_norm": 4.08642053604126, "learning_rate": 9.92687539252353e-06, "loss": 0.7145, "step": 6933 }, { "epoch": 0.08290390846375495, "grad_norm": 5.237629413604736, "learning_rate": 9.926842396589143e-06, "loss": 0.6533, "step": 6934 }, { "epoch": 0.0829158646086156, "grad_norm": 2.449098825454712, "learning_rate": 9.926809393266936e-06, "loss": 0.6641, "step": 6935 }, { "epoch": 0.08292782075347625, "grad_norm": 2.7177846431732178, "learning_rate": 9.926776382556961e-06, "loss": 0.6554, "step": 6936 }, { "epoch": 0.0829397768983369, "grad_norm": 2.3089632987976074, "learning_rate": 9.926743364459268e-06, "loss": 0.554, "step": 6937 }, { "epoch": 0.08295173304319756, "grad_norm": 2.238557815551758, "learning_rate": 9.926710338973907e-06, "loss": 0.6024, "step": 6938 }, { "epoch": 0.0829636891880582, "grad_norm": 2.874864339828491, "learning_rate": 9.926677306100925e-06, "loss": 0.6415, "step": 6939 }, { "epoch": 0.08297564533291886, "grad_norm": 1.582768201828003, "learning_rate": 9.926644265840373e-06, "loss": 0.5969, "step": 6940 }, { "epoch": 0.0829876014777795, "grad_norm": 2.470899820327759, "learning_rate": 9.926611218192302e-06, "loss": 0.5849, "step": 6941 }, { "epoch": 0.08299955762264015, "grad_norm": 2.1055660247802734, "learning_rate": 9.926578163156757e-06, "loss": 0.567, "step": 6942 }, { "epoch": 0.08301151376750081, "grad_norm": 4.379851341247559, "learning_rate": 9.926545100733793e-06, "loss": 0.6392, "step": 6943 }, { "epoch": 0.08302346991236145, "grad_norm": 1.586596965789795, "learning_rate": 9.926512030923457e-06, "loss": 0.6265, "step": 6944 }, { "epoch": 0.08303542605722211, "grad_norm": 2.0541179180145264, "learning_rate": 9.926478953725797e-06, "loss": 0.6509, "step": 6945 }, { "epoch": 0.08304738220208276, "grad_norm": 7.7682671546936035, "learning_rate": 9.926445869140866e-06, "loss": 0.5648, "step": 6946 }, { "epoch": 0.08305933834694341, "grad_norm": 3.1984143257141113, "learning_rate": 9.92641277716871e-06, "loss": 0.5565, "step": 6947 }, { "epoch": 0.08307129449180406, "grad_norm": 5.042453289031982, "learning_rate": 9.926379677809384e-06, "loss": 0.6517, "step": 6948 }, { "epoch": 0.08308325063666472, "grad_norm": 2.5076863765716553, "learning_rate": 9.926346571062931e-06, "loss": 0.7112, "step": 6949 }, { "epoch": 0.08309520678152536, "grad_norm": 3.719693422317505, "learning_rate": 9.926313456929403e-06, "loss": 0.5784, "step": 6950 }, { "epoch": 0.08310716292638602, "grad_norm": 8.84671401977539, "learning_rate": 9.926280335408854e-06, "loss": 0.5995, "step": 6951 }, { "epoch": 0.08311911907124667, "grad_norm": 2.7193856239318848, "learning_rate": 9.926247206501327e-06, "loss": 0.5237, "step": 6952 }, { "epoch": 0.08313107521610733, "grad_norm": 2.5253493785858154, "learning_rate": 9.926214070206876e-06, "loss": 0.6831, "step": 6953 }, { "epoch": 0.08314303136096797, "grad_norm": 8.206076622009277, "learning_rate": 9.92618092652555e-06, "loss": 0.6494, "step": 6954 }, { "epoch": 0.08315498750582861, "grad_norm": 1.8476825952529907, "learning_rate": 9.926147775457398e-06, "loss": 0.6122, "step": 6955 }, { "epoch": 0.08316694365068927, "grad_norm": 4.7240447998046875, "learning_rate": 9.92611461700247e-06, "loss": 0.5614, "step": 6956 }, { "epoch": 0.08317889979554992, "grad_norm": 6.46399450302124, "learning_rate": 9.926081451160815e-06, "loss": 0.5722, "step": 6957 }, { "epoch": 0.08319085594041058, "grad_norm": 5.070662975311279, "learning_rate": 9.926048277932483e-06, "loss": 0.5907, "step": 6958 }, { "epoch": 0.08320281208527122, "grad_norm": 2.492964506149292, "learning_rate": 9.926015097317525e-06, "loss": 0.6565, "step": 6959 }, { "epoch": 0.08321476823013188, "grad_norm": 3.4305384159088135, "learning_rate": 9.925981909315989e-06, "loss": 0.7501, "step": 6960 }, { "epoch": 0.08322672437499253, "grad_norm": 2.9826767444610596, "learning_rate": 9.925948713927927e-06, "loss": 0.6145, "step": 6961 }, { "epoch": 0.08323868051985318, "grad_norm": 1.8483538627624512, "learning_rate": 9.925915511153386e-06, "loss": 0.6215, "step": 6962 }, { "epoch": 0.08325063666471383, "grad_norm": 2.3690333366394043, "learning_rate": 9.925882300992418e-06, "loss": 0.5561, "step": 6963 }, { "epoch": 0.08326259280957449, "grad_norm": 20.066816329956055, "learning_rate": 9.925849083445071e-06, "loss": 0.6869, "step": 6964 }, { "epoch": 0.08327454895443513, "grad_norm": 2.244993209838867, "learning_rate": 9.925815858511399e-06, "loss": 0.6765, "step": 6965 }, { "epoch": 0.08328650509929578, "grad_norm": 5.8659796714782715, "learning_rate": 9.925782626191444e-06, "loss": 0.615, "step": 6966 }, { "epoch": 0.08329846124415644, "grad_norm": 2.51651930809021, "learning_rate": 9.925749386485263e-06, "loss": 0.6233, "step": 6967 }, { "epoch": 0.08331041738901708, "grad_norm": 4.635591983795166, "learning_rate": 9.925716139392903e-06, "loss": 0.6102, "step": 6968 }, { "epoch": 0.08332237353387774, "grad_norm": 3.520745277404785, "learning_rate": 9.925682884914413e-06, "loss": 0.5689, "step": 6969 }, { "epoch": 0.08333432967873838, "grad_norm": 8.044305801391602, "learning_rate": 9.925649623049847e-06, "loss": 0.6469, "step": 6970 }, { "epoch": 0.08334628582359904, "grad_norm": 6.606931209564209, "learning_rate": 9.92561635379925e-06, "loss": 0.6464, "step": 6971 }, { "epoch": 0.08335824196845969, "grad_norm": 13.796091079711914, "learning_rate": 9.925583077162672e-06, "loss": 0.5397, "step": 6972 }, { "epoch": 0.08337019811332035, "grad_norm": 2.0864906311035156, "learning_rate": 9.925549793140168e-06, "loss": 0.657, "step": 6973 }, { "epoch": 0.08338215425818099, "grad_norm": 2.2465426921844482, "learning_rate": 9.925516501731783e-06, "loss": 0.5179, "step": 6974 }, { "epoch": 0.08339411040304165, "grad_norm": 2.8627333641052246, "learning_rate": 9.925483202937566e-06, "loss": 0.6336, "step": 6975 }, { "epoch": 0.0834060665479023, "grad_norm": 7.27118444442749, "learning_rate": 9.925449896757573e-06, "loss": 0.6673, "step": 6976 }, { "epoch": 0.08341802269276294, "grad_norm": 1.9184765815734863, "learning_rate": 9.925416583191849e-06, "loss": 0.6452, "step": 6977 }, { "epoch": 0.0834299788376236, "grad_norm": 2.086822748184204, "learning_rate": 9.925383262240447e-06, "loss": 0.6108, "step": 6978 }, { "epoch": 0.08344193498248424, "grad_norm": 2.083793878555298, "learning_rate": 9.925349933903413e-06, "loss": 0.6068, "step": 6979 }, { "epoch": 0.0834538911273449, "grad_norm": 2.8467066287994385, "learning_rate": 9.925316598180802e-06, "loss": 0.6183, "step": 6980 }, { "epoch": 0.08346584727220555, "grad_norm": 2.0240628719329834, "learning_rate": 9.925283255072657e-06, "loss": 0.6337, "step": 6981 }, { "epoch": 0.0834778034170662, "grad_norm": 2.082573890686035, "learning_rate": 9.925249904579034e-06, "loss": 0.6499, "step": 6982 }, { "epoch": 0.08348975956192685, "grad_norm": 6.975724697113037, "learning_rate": 9.925216546699982e-06, "loss": 0.629, "step": 6983 }, { "epoch": 0.08350171570678751, "grad_norm": 3.192070245742798, "learning_rate": 9.925183181435551e-06, "loss": 0.7286, "step": 6984 }, { "epoch": 0.08351367185164815, "grad_norm": 2.749953508377075, "learning_rate": 9.92514980878579e-06, "loss": 0.6801, "step": 6985 }, { "epoch": 0.08352562799650881, "grad_norm": 3.94209885597229, "learning_rate": 9.925116428750749e-06, "loss": 0.6007, "step": 6986 }, { "epoch": 0.08353758414136946, "grad_norm": 3.359048843383789, "learning_rate": 9.925083041330477e-06, "loss": 0.5191, "step": 6987 }, { "epoch": 0.08354954028623011, "grad_norm": 9.1320161819458, "learning_rate": 9.925049646525027e-06, "loss": 0.6393, "step": 6988 }, { "epoch": 0.08356149643109076, "grad_norm": 1.7492917776107788, "learning_rate": 9.925016244334447e-06, "loss": 0.7363, "step": 6989 }, { "epoch": 0.0835734525759514, "grad_norm": 1.8906803131103516, "learning_rate": 9.924982834758789e-06, "loss": 0.6405, "step": 6990 }, { "epoch": 0.08358540872081206, "grad_norm": 2.3889029026031494, "learning_rate": 9.9249494177981e-06, "loss": 0.6, "step": 6991 }, { "epoch": 0.08359736486567271, "grad_norm": 2.0014047622680664, "learning_rate": 9.924915993452433e-06, "loss": 0.6323, "step": 6992 }, { "epoch": 0.08360932101053337, "grad_norm": 23.077627182006836, "learning_rate": 9.924882561721837e-06, "loss": 0.5556, "step": 6993 }, { "epoch": 0.08362127715539401, "grad_norm": 14.441991806030273, "learning_rate": 9.92484912260636e-06, "loss": 0.5538, "step": 6994 }, { "epoch": 0.08363323330025467, "grad_norm": 2.3496592044830322, "learning_rate": 9.924815676106057e-06, "loss": 0.6713, "step": 6995 }, { "epoch": 0.08364518944511531, "grad_norm": 3.4021565914154053, "learning_rate": 9.924782222220973e-06, "loss": 0.5794, "step": 6996 }, { "epoch": 0.08365714558997597, "grad_norm": 2.5888314247131348, "learning_rate": 9.924748760951162e-06, "loss": 0.6187, "step": 6997 }, { "epoch": 0.08366910173483662, "grad_norm": 2.205249071121216, "learning_rate": 9.924715292296672e-06, "loss": 0.6481, "step": 6998 }, { "epoch": 0.08368105787969728, "grad_norm": 2.8074450492858887, "learning_rate": 9.924681816257555e-06, "loss": 0.5969, "step": 6999 }, { "epoch": 0.08369301402455792, "grad_norm": 4.656020164489746, "learning_rate": 9.92464833283386e-06, "loss": 0.672, "step": 7000 }, { "epoch": 0.08370497016941857, "grad_norm": 2.4934558868408203, "learning_rate": 9.924614842025636e-06, "loss": 0.641, "step": 7001 }, { "epoch": 0.08371692631427922, "grad_norm": 2.9320037364959717, "learning_rate": 9.924581343832935e-06, "loss": 0.6227, "step": 7002 }, { "epoch": 0.08372888245913987, "grad_norm": 2.5082058906555176, "learning_rate": 9.924547838255807e-06, "loss": 0.6858, "step": 7003 }, { "epoch": 0.08374083860400053, "grad_norm": 3.388328790664673, "learning_rate": 9.924514325294302e-06, "loss": 0.6578, "step": 7004 }, { "epoch": 0.08375279474886117, "grad_norm": 2.3583199977874756, "learning_rate": 9.924480804948471e-06, "loss": 0.6203, "step": 7005 }, { "epoch": 0.08376475089372183, "grad_norm": 2.316465377807617, "learning_rate": 9.924447277218364e-06, "loss": 0.7304, "step": 7006 }, { "epoch": 0.08377670703858248, "grad_norm": 3.248420238494873, "learning_rate": 9.924413742104029e-06, "loss": 0.6656, "step": 7007 }, { "epoch": 0.08378866318344314, "grad_norm": 2.579577922821045, "learning_rate": 9.924380199605518e-06, "loss": 0.6273, "step": 7008 }, { "epoch": 0.08380061932830378, "grad_norm": 2.002908229827881, "learning_rate": 9.924346649722882e-06, "loss": 0.6461, "step": 7009 }, { "epoch": 0.08381257547316444, "grad_norm": 3.0186123847961426, "learning_rate": 9.92431309245617e-06, "loss": 0.608, "step": 7010 }, { "epoch": 0.08382453161802508, "grad_norm": 1.4857317209243774, "learning_rate": 9.924279527805435e-06, "loss": 0.5942, "step": 7011 }, { "epoch": 0.08383648776288574, "grad_norm": 2.7480711936950684, "learning_rate": 9.924245955770724e-06, "loss": 0.6711, "step": 7012 }, { "epoch": 0.08384844390774639, "grad_norm": 3.311770439147949, "learning_rate": 9.924212376352089e-06, "loss": 0.6711, "step": 7013 }, { "epoch": 0.08386040005260703, "grad_norm": 10.769469261169434, "learning_rate": 9.92417878954958e-06, "loss": 0.6593, "step": 7014 }, { "epoch": 0.08387235619746769, "grad_norm": 2.3223447799682617, "learning_rate": 9.924145195363247e-06, "loss": 0.6056, "step": 7015 }, { "epoch": 0.08388431234232833, "grad_norm": 2.0327296257019043, "learning_rate": 9.924111593793143e-06, "loss": 0.6212, "step": 7016 }, { "epoch": 0.083896268487189, "grad_norm": 1.7233011722564697, "learning_rate": 9.924077984839313e-06, "loss": 0.6426, "step": 7017 }, { "epoch": 0.08390822463204964, "grad_norm": 3.9458539485931396, "learning_rate": 9.924044368501811e-06, "loss": 0.6204, "step": 7018 }, { "epoch": 0.0839201807769103, "grad_norm": 3.402679204940796, "learning_rate": 9.924010744780688e-06, "loss": 0.6763, "step": 7019 }, { "epoch": 0.08393213692177094, "grad_norm": 8.076944351196289, "learning_rate": 9.923977113675993e-06, "loss": 0.6729, "step": 7020 }, { "epoch": 0.0839440930666316, "grad_norm": 1.8394707441329956, "learning_rate": 9.923943475187776e-06, "loss": 0.5444, "step": 7021 }, { "epoch": 0.08395604921149225, "grad_norm": 2.399657964706421, "learning_rate": 9.92390982931609e-06, "loss": 0.5921, "step": 7022 }, { "epoch": 0.0839680053563529, "grad_norm": 1.8312287330627441, "learning_rate": 9.923876176060983e-06, "loss": 0.5803, "step": 7023 }, { "epoch": 0.08397996150121355, "grad_norm": 3.0773532390594482, "learning_rate": 9.923842515422508e-06, "loss": 0.6101, "step": 7024 }, { "epoch": 0.0839919176460742, "grad_norm": 3.588484525680542, "learning_rate": 9.92380884740071e-06, "loss": 0.6477, "step": 7025 }, { "epoch": 0.08400387379093485, "grad_norm": 2.723144292831421, "learning_rate": 9.923775171995645e-06, "loss": 0.6282, "step": 7026 }, { "epoch": 0.0840158299357955, "grad_norm": 1.982698678970337, "learning_rate": 9.923741489207362e-06, "loss": 0.5738, "step": 7027 }, { "epoch": 0.08402778608065616, "grad_norm": 3.1850643157958984, "learning_rate": 9.92370779903591e-06, "loss": 0.5763, "step": 7028 }, { "epoch": 0.0840397422255168, "grad_norm": 8.236279487609863, "learning_rate": 9.923674101481342e-06, "loss": 0.5525, "step": 7029 }, { "epoch": 0.08405169837037746, "grad_norm": 3.046189069747925, "learning_rate": 9.923640396543706e-06, "loss": 0.5787, "step": 7030 }, { "epoch": 0.0840636545152381, "grad_norm": 2.5019779205322266, "learning_rate": 9.923606684223055e-06, "loss": 0.5945, "step": 7031 }, { "epoch": 0.08407561066009876, "grad_norm": 5.318169593811035, "learning_rate": 9.923572964519437e-06, "loss": 0.7011, "step": 7032 }, { "epoch": 0.08408756680495941, "grad_norm": 2.2864739894866943, "learning_rate": 9.923539237432904e-06, "loss": 0.6369, "step": 7033 }, { "epoch": 0.08409952294982007, "grad_norm": 3.2323787212371826, "learning_rate": 9.923505502963508e-06, "loss": 0.653, "step": 7034 }, { "epoch": 0.08411147909468071, "grad_norm": 5.183863639831543, "learning_rate": 9.923471761111297e-06, "loss": 0.6583, "step": 7035 }, { "epoch": 0.08412343523954136, "grad_norm": 1.9942872524261475, "learning_rate": 9.923438011876321e-06, "loss": 0.6227, "step": 7036 }, { "epoch": 0.08413539138440201, "grad_norm": 1.7011711597442627, "learning_rate": 9.923404255258633e-06, "loss": 0.6741, "step": 7037 }, { "epoch": 0.08414734752926266, "grad_norm": 2.105159282684326, "learning_rate": 9.923370491258284e-06, "loss": 0.655, "step": 7038 }, { "epoch": 0.08415930367412332, "grad_norm": 3.218635082244873, "learning_rate": 9.923336719875323e-06, "loss": 0.5912, "step": 7039 }, { "epoch": 0.08417125981898396, "grad_norm": 3.8677453994750977, "learning_rate": 9.9233029411098e-06, "loss": 0.6459, "step": 7040 }, { "epoch": 0.08418321596384462, "grad_norm": 4.340719699859619, "learning_rate": 9.92326915496177e-06, "loss": 0.5819, "step": 7041 }, { "epoch": 0.08419517210870527, "grad_norm": 2.551069736480713, "learning_rate": 9.923235361431277e-06, "loss": 0.577, "step": 7042 }, { "epoch": 0.08420712825356592, "grad_norm": 12.909215927124023, "learning_rate": 9.923201560518376e-06, "loss": 0.6266, "step": 7043 }, { "epoch": 0.08421908439842657, "grad_norm": 2.164058208465576, "learning_rate": 9.923167752223118e-06, "loss": 0.6284, "step": 7044 }, { "epoch": 0.08423104054328723, "grad_norm": 2.6811625957489014, "learning_rate": 9.92313393654555e-06, "loss": 0.6571, "step": 7045 }, { "epoch": 0.08424299668814787, "grad_norm": 2.149256467819214, "learning_rate": 9.923100113485727e-06, "loss": 0.5789, "step": 7046 }, { "epoch": 0.08425495283300853, "grad_norm": 2.4091134071350098, "learning_rate": 9.923066283043698e-06, "loss": 0.5816, "step": 7047 }, { "epoch": 0.08426690897786918, "grad_norm": 3.373889923095703, "learning_rate": 9.923032445219512e-06, "loss": 0.5817, "step": 7048 }, { "epoch": 0.08427886512272982, "grad_norm": 12.640486717224121, "learning_rate": 9.922998600013223e-06, "loss": 0.6814, "step": 7049 }, { "epoch": 0.08429082126759048, "grad_norm": 12.946405410766602, "learning_rate": 9.92296474742488e-06, "loss": 0.6336, "step": 7050 }, { "epoch": 0.08430277741245112, "grad_norm": 3.078928232192993, "learning_rate": 9.922930887454532e-06, "loss": 0.5769, "step": 7051 }, { "epoch": 0.08431473355731178, "grad_norm": 1.7535336017608643, "learning_rate": 9.922897020102233e-06, "loss": 0.6314, "step": 7052 }, { "epoch": 0.08432668970217243, "grad_norm": 1.9223099946975708, "learning_rate": 9.922863145368033e-06, "loss": 0.6213, "step": 7053 }, { "epoch": 0.08433864584703309, "grad_norm": 3.8494813442230225, "learning_rate": 9.92282926325198e-06, "loss": 0.6354, "step": 7054 }, { "epoch": 0.08435060199189373, "grad_norm": 1.9680935144424438, "learning_rate": 9.92279537375413e-06, "loss": 0.6171, "step": 7055 }, { "epoch": 0.08436255813675439, "grad_norm": 3.1408987045288086, "learning_rate": 9.922761476874529e-06, "loss": 0.6873, "step": 7056 }, { "epoch": 0.08437451428161503, "grad_norm": 4.735689640045166, "learning_rate": 9.922727572613232e-06, "loss": 0.5973, "step": 7057 }, { "epoch": 0.0843864704264757, "grad_norm": 3.049884796142578, "learning_rate": 9.922693660970283e-06, "loss": 0.6129, "step": 7058 }, { "epoch": 0.08439842657133634, "grad_norm": 5.266820907592773, "learning_rate": 9.922659741945742e-06, "loss": 0.5673, "step": 7059 }, { "epoch": 0.08441038271619698, "grad_norm": 4.2140278816223145, "learning_rate": 9.922625815539653e-06, "loss": 0.6326, "step": 7060 }, { "epoch": 0.08442233886105764, "grad_norm": 1.7548270225524902, "learning_rate": 9.922591881752068e-06, "loss": 0.6103, "step": 7061 }, { "epoch": 0.08443429500591829, "grad_norm": 6.632153511047363, "learning_rate": 9.922557940583042e-06, "loss": 0.6806, "step": 7062 }, { "epoch": 0.08444625115077894, "grad_norm": 2.6726503372192383, "learning_rate": 9.92252399203262e-06, "loss": 0.5906, "step": 7063 }, { "epoch": 0.08445820729563959, "grad_norm": 3.1157002449035645, "learning_rate": 9.922490036100857e-06, "loss": 0.6293, "step": 7064 }, { "epoch": 0.08447016344050025, "grad_norm": 3.960623264312744, "learning_rate": 9.922456072787803e-06, "loss": 0.6055, "step": 7065 }, { "epoch": 0.08448211958536089, "grad_norm": 3.3224008083343506, "learning_rate": 9.922422102093508e-06, "loss": 0.6474, "step": 7066 }, { "epoch": 0.08449407573022155, "grad_norm": 56.263999938964844, "learning_rate": 9.922388124018023e-06, "loss": 0.6023, "step": 7067 }, { "epoch": 0.0845060318750822, "grad_norm": 2.0036253929138184, "learning_rate": 9.922354138561401e-06, "loss": 0.7359, "step": 7068 }, { "epoch": 0.08451798801994286, "grad_norm": 1.5160715579986572, "learning_rate": 9.92232014572369e-06, "loss": 0.6147, "step": 7069 }, { "epoch": 0.0845299441648035, "grad_norm": 3.679339647293091, "learning_rate": 9.922286145504944e-06, "loss": 0.6029, "step": 7070 }, { "epoch": 0.08454190030966416, "grad_norm": 3.988102912902832, "learning_rate": 9.92225213790521e-06, "loss": 0.6458, "step": 7071 }, { "epoch": 0.0845538564545248, "grad_norm": 2.3064870834350586, "learning_rate": 9.922218122924546e-06, "loss": 0.6788, "step": 7072 }, { "epoch": 0.08456581259938545, "grad_norm": 1.8912428617477417, "learning_rate": 9.922184100562993e-06, "loss": 0.6512, "step": 7073 }, { "epoch": 0.0845777687442461, "grad_norm": 2.6925086975097656, "learning_rate": 9.92215007082061e-06, "loss": 0.6184, "step": 7074 }, { "epoch": 0.08458972488910675, "grad_norm": 2.999572277069092, "learning_rate": 9.922116033697447e-06, "loss": 0.5897, "step": 7075 }, { "epoch": 0.08460168103396741, "grad_norm": 2.138619899749756, "learning_rate": 9.92208198919355e-06, "loss": 0.5906, "step": 7076 }, { "epoch": 0.08461363717882806, "grad_norm": 1.378121018409729, "learning_rate": 9.922047937308976e-06, "loss": 0.6477, "step": 7077 }, { "epoch": 0.08462559332368871, "grad_norm": 1.8780180215835571, "learning_rate": 9.922013878043774e-06, "loss": 0.614, "step": 7078 }, { "epoch": 0.08463754946854936, "grad_norm": 1.635550856590271, "learning_rate": 9.921979811397993e-06, "loss": 0.6066, "step": 7079 }, { "epoch": 0.08464950561341002, "grad_norm": 1.9930974245071411, "learning_rate": 9.921945737371688e-06, "loss": 0.6184, "step": 7080 }, { "epoch": 0.08466146175827066, "grad_norm": 7.396145820617676, "learning_rate": 9.921911655964905e-06, "loss": 0.5977, "step": 7081 }, { "epoch": 0.08467341790313132, "grad_norm": 5.27747106552124, "learning_rate": 9.9218775671777e-06, "loss": 0.5614, "step": 7082 }, { "epoch": 0.08468537404799197, "grad_norm": 3.675562858581543, "learning_rate": 9.921843471010122e-06, "loss": 0.6666, "step": 7083 }, { "epoch": 0.08469733019285261, "grad_norm": 2.3970372676849365, "learning_rate": 9.92180936746222e-06, "loss": 0.5991, "step": 7084 }, { "epoch": 0.08470928633771327, "grad_norm": 2.9836392402648926, "learning_rate": 9.92177525653405e-06, "loss": 0.6329, "step": 7085 }, { "epoch": 0.08472124248257391, "grad_norm": 1.9633641242980957, "learning_rate": 9.92174113822566e-06, "loss": 0.6728, "step": 7086 }, { "epoch": 0.08473319862743457, "grad_norm": 2.411041259765625, "learning_rate": 9.9217070125371e-06, "loss": 0.6859, "step": 7087 }, { "epoch": 0.08474515477229522, "grad_norm": 2.025724172592163, "learning_rate": 9.921672879468424e-06, "loss": 0.5951, "step": 7088 }, { "epoch": 0.08475711091715588, "grad_norm": 3.4898552894592285, "learning_rate": 9.921638739019683e-06, "loss": 0.5513, "step": 7089 }, { "epoch": 0.08476906706201652, "grad_norm": 2.378206968307495, "learning_rate": 9.921604591190927e-06, "loss": 0.7108, "step": 7090 }, { "epoch": 0.08478102320687718, "grad_norm": 2.2701289653778076, "learning_rate": 9.921570435982206e-06, "loss": 0.6926, "step": 7091 }, { "epoch": 0.08479297935173782, "grad_norm": 2.8601133823394775, "learning_rate": 9.921536273393573e-06, "loss": 0.6525, "step": 7092 }, { "epoch": 0.08480493549659848, "grad_norm": 2.0388312339782715, "learning_rate": 9.92150210342508e-06, "loss": 0.6817, "step": 7093 }, { "epoch": 0.08481689164145913, "grad_norm": 2.1708426475524902, "learning_rate": 9.921467926076777e-06, "loss": 0.7061, "step": 7094 }, { "epoch": 0.08482884778631979, "grad_norm": 6.917527198791504, "learning_rate": 9.921433741348714e-06, "loss": 0.6481, "step": 7095 }, { "epoch": 0.08484080393118043, "grad_norm": 2.309937000274658, "learning_rate": 9.921399549240944e-06, "loss": 0.5957, "step": 7096 }, { "epoch": 0.08485276007604108, "grad_norm": 2.914766550064087, "learning_rate": 9.92136534975352e-06, "loss": 0.5056, "step": 7097 }, { "epoch": 0.08486471622090173, "grad_norm": 1.758748173713684, "learning_rate": 9.921331142886488e-06, "loss": 0.6248, "step": 7098 }, { "epoch": 0.08487667236576238, "grad_norm": 2.696580410003662, "learning_rate": 9.921296928639905e-06, "loss": 0.6501, "step": 7099 }, { "epoch": 0.08488862851062304, "grad_norm": 2.9161012172698975, "learning_rate": 9.921262707013817e-06, "loss": 0.6919, "step": 7100 }, { "epoch": 0.08490058465548368, "grad_norm": 2.661868095397949, "learning_rate": 9.92122847800828e-06, "loss": 0.6142, "step": 7101 }, { "epoch": 0.08491254080034434, "grad_norm": 4.442163467407227, "learning_rate": 9.921194241623344e-06, "loss": 0.6268, "step": 7102 }, { "epoch": 0.08492449694520499, "grad_norm": 3.4840247631073, "learning_rate": 9.921159997859058e-06, "loss": 0.6687, "step": 7103 }, { "epoch": 0.08493645309006564, "grad_norm": 4.5542778968811035, "learning_rate": 9.921125746715477e-06, "loss": 0.6299, "step": 7104 }, { "epoch": 0.08494840923492629, "grad_norm": 50.46116256713867, "learning_rate": 9.92109148819265e-06, "loss": 0.6604, "step": 7105 }, { "epoch": 0.08496036537978695, "grad_norm": 2.8343007564544678, "learning_rate": 9.921057222290627e-06, "loss": 0.6564, "step": 7106 }, { "epoch": 0.08497232152464759, "grad_norm": 2.318294048309326, "learning_rate": 9.921022949009462e-06, "loss": 0.5737, "step": 7107 }, { "epoch": 0.08498427766950824, "grad_norm": 2.896919012069702, "learning_rate": 9.920988668349207e-06, "loss": 0.7525, "step": 7108 }, { "epoch": 0.0849962338143689, "grad_norm": 1.5757266283035278, "learning_rate": 9.92095438030991e-06, "loss": 0.6079, "step": 7109 }, { "epoch": 0.08500818995922954, "grad_norm": 7.649774074554443, "learning_rate": 9.920920084891626e-06, "loss": 0.5995, "step": 7110 }, { "epoch": 0.0850201461040902, "grad_norm": 2.738538980484009, "learning_rate": 9.920885782094403e-06, "loss": 0.6744, "step": 7111 }, { "epoch": 0.08503210224895084, "grad_norm": 2.361297369003296, "learning_rate": 9.920851471918296e-06, "loss": 0.66, "step": 7112 }, { "epoch": 0.0850440583938115, "grad_norm": 4.714890480041504, "learning_rate": 9.920817154363355e-06, "loss": 0.5356, "step": 7113 }, { "epoch": 0.08505601453867215, "grad_norm": 2.013611078262329, "learning_rate": 9.92078282942963e-06, "loss": 0.6149, "step": 7114 }, { "epoch": 0.0850679706835328, "grad_norm": 1.8672219514846802, "learning_rate": 9.920748497117175e-06, "loss": 0.6476, "step": 7115 }, { "epoch": 0.08507992682839345, "grad_norm": 3.182774543762207, "learning_rate": 9.920714157426038e-06, "loss": 0.6747, "step": 7116 }, { "epoch": 0.08509188297325411, "grad_norm": 2.7713074684143066, "learning_rate": 9.920679810356276e-06, "loss": 0.6719, "step": 7117 }, { "epoch": 0.08510383911811475, "grad_norm": 2.72638201713562, "learning_rate": 9.920645455907933e-06, "loss": 0.6357, "step": 7118 }, { "epoch": 0.0851157952629754, "grad_norm": 2.858069896697998, "learning_rate": 9.920611094081067e-06, "loss": 0.576, "step": 7119 }, { "epoch": 0.08512775140783606, "grad_norm": 2.0992748737335205, "learning_rate": 9.920576724875726e-06, "loss": 0.6454, "step": 7120 }, { "epoch": 0.0851397075526967, "grad_norm": 2.827908515930176, "learning_rate": 9.920542348291965e-06, "loss": 0.6082, "step": 7121 }, { "epoch": 0.08515166369755736, "grad_norm": 2.002798318862915, "learning_rate": 9.920507964329831e-06, "loss": 0.6884, "step": 7122 }, { "epoch": 0.085163619842418, "grad_norm": 5.2786712646484375, "learning_rate": 9.920473572989378e-06, "loss": 0.6038, "step": 7123 }, { "epoch": 0.08517557598727866, "grad_norm": 5.070843696594238, "learning_rate": 9.92043917427066e-06, "loss": 0.6817, "step": 7124 }, { "epoch": 0.08518753213213931, "grad_norm": 3.353097438812256, "learning_rate": 9.920404768173722e-06, "loss": 0.6295, "step": 7125 }, { "epoch": 0.08519948827699997, "grad_norm": 3.6323082447052, "learning_rate": 9.920370354698623e-06, "loss": 0.6998, "step": 7126 }, { "epoch": 0.08521144442186061, "grad_norm": 2.6316277980804443, "learning_rate": 9.920335933845408e-06, "loss": 0.564, "step": 7127 }, { "epoch": 0.08522340056672127, "grad_norm": 2.514695167541504, "learning_rate": 9.920301505614134e-06, "loss": 0.6155, "step": 7128 }, { "epoch": 0.08523535671158192, "grad_norm": 2.2413909435272217, "learning_rate": 9.92026707000485e-06, "loss": 0.6488, "step": 7129 }, { "epoch": 0.08524731285644258, "grad_norm": 2.166576623916626, "learning_rate": 9.920232627017607e-06, "loss": 0.6087, "step": 7130 }, { "epoch": 0.08525926900130322, "grad_norm": 2.9353137016296387, "learning_rate": 9.92019817665246e-06, "loss": 0.596, "step": 7131 }, { "epoch": 0.08527122514616386, "grad_norm": 2.145447015762329, "learning_rate": 9.920163718909457e-06, "loss": 0.6161, "step": 7132 }, { "epoch": 0.08528318129102452, "grad_norm": 3.4620578289031982, "learning_rate": 9.920129253788651e-06, "loss": 0.5976, "step": 7133 }, { "epoch": 0.08529513743588517, "grad_norm": 7.436400413513184, "learning_rate": 9.920094781290092e-06, "loss": 0.5764, "step": 7134 }, { "epoch": 0.08530709358074583, "grad_norm": 5.594358444213867, "learning_rate": 9.920060301413836e-06, "loss": 0.6487, "step": 7135 }, { "epoch": 0.08531904972560647, "grad_norm": 3.9957714080810547, "learning_rate": 9.920025814159932e-06, "loss": 0.6678, "step": 7136 }, { "epoch": 0.08533100587046713, "grad_norm": 3.397808790206909, "learning_rate": 9.919991319528431e-06, "loss": 0.6392, "step": 7137 }, { "epoch": 0.08534296201532778, "grad_norm": 5.737151145935059, "learning_rate": 9.919956817519385e-06, "loss": 0.6404, "step": 7138 }, { "epoch": 0.08535491816018843, "grad_norm": 7.444916248321533, "learning_rate": 9.919922308132846e-06, "loss": 0.6885, "step": 7139 }, { "epoch": 0.08536687430504908, "grad_norm": 1.9472482204437256, "learning_rate": 9.919887791368867e-06, "loss": 0.5498, "step": 7140 }, { "epoch": 0.08537883044990974, "grad_norm": 2.2031946182250977, "learning_rate": 9.9198532672275e-06, "loss": 0.5994, "step": 7141 }, { "epoch": 0.08539078659477038, "grad_norm": 3.009427785873413, "learning_rate": 9.919818735708793e-06, "loss": 0.6233, "step": 7142 }, { "epoch": 0.08540274273963103, "grad_norm": 8.207337379455566, "learning_rate": 9.9197841968128e-06, "loss": 0.6536, "step": 7143 }, { "epoch": 0.08541469888449169, "grad_norm": 5.45363187789917, "learning_rate": 9.919749650539576e-06, "loss": 0.7051, "step": 7144 }, { "epoch": 0.08542665502935233, "grad_norm": 1.8456380367279053, "learning_rate": 9.919715096889168e-06, "loss": 0.6505, "step": 7145 }, { "epoch": 0.08543861117421299, "grad_norm": 1.6582385301589966, "learning_rate": 9.919680535861631e-06, "loss": 0.6069, "step": 7146 }, { "epoch": 0.08545056731907363, "grad_norm": 1.8272536993026733, "learning_rate": 9.919645967457014e-06, "loss": 0.5561, "step": 7147 }, { "epoch": 0.08546252346393429, "grad_norm": 3.2762351036071777, "learning_rate": 9.919611391675371e-06, "loss": 0.5713, "step": 7148 }, { "epoch": 0.08547447960879494, "grad_norm": 2.057699680328369, "learning_rate": 9.919576808516753e-06, "loss": 0.5574, "step": 7149 }, { "epoch": 0.0854864357536556, "grad_norm": 2.4218459129333496, "learning_rate": 9.919542217981212e-06, "loss": 0.6196, "step": 7150 }, { "epoch": 0.08549839189851624, "grad_norm": 3.091395378112793, "learning_rate": 9.9195076200688e-06, "loss": 0.73, "step": 7151 }, { "epoch": 0.0855103480433769, "grad_norm": 6.691561222076416, "learning_rate": 9.91947301477957e-06, "loss": 0.6299, "step": 7152 }, { "epoch": 0.08552230418823754, "grad_norm": 2.537923812866211, "learning_rate": 9.919438402113573e-06, "loss": 0.6662, "step": 7153 }, { "epoch": 0.0855342603330982, "grad_norm": 3.6184160709381104, "learning_rate": 9.919403782070858e-06, "loss": 0.6425, "step": 7154 }, { "epoch": 0.08554621647795885, "grad_norm": 3.158665418624878, "learning_rate": 9.919369154651481e-06, "loss": 0.6007, "step": 7155 }, { "epoch": 0.08555817262281949, "grad_norm": 2.720548152923584, "learning_rate": 9.919334519855492e-06, "loss": 0.6489, "step": 7156 }, { "epoch": 0.08557012876768015, "grad_norm": 3.8368263244628906, "learning_rate": 9.919299877682944e-06, "loss": 0.5562, "step": 7157 }, { "epoch": 0.0855820849125408, "grad_norm": 1.8728150129318237, "learning_rate": 9.91926522813389e-06, "loss": 0.6487, "step": 7158 }, { "epoch": 0.08559404105740145, "grad_norm": 3.515838861465454, "learning_rate": 9.919230571208378e-06, "loss": 0.5984, "step": 7159 }, { "epoch": 0.0856059972022621, "grad_norm": 2.1142871379852295, "learning_rate": 9.919195906906463e-06, "loss": 0.6192, "step": 7160 }, { "epoch": 0.08561795334712276, "grad_norm": 2.577009916305542, "learning_rate": 9.919161235228197e-06, "loss": 0.5377, "step": 7161 }, { "epoch": 0.0856299094919834, "grad_norm": 3.3250732421875, "learning_rate": 9.919126556173628e-06, "loss": 0.7455, "step": 7162 }, { "epoch": 0.08564186563684406, "grad_norm": 1.8975334167480469, "learning_rate": 9.919091869742814e-06, "loss": 0.6967, "step": 7163 }, { "epoch": 0.0856538217817047, "grad_norm": 2.06524920463562, "learning_rate": 9.919057175935804e-06, "loss": 0.6533, "step": 7164 }, { "epoch": 0.08566577792656536, "grad_norm": 1.9299870729446411, "learning_rate": 9.91902247475265e-06, "loss": 0.6099, "step": 7165 }, { "epoch": 0.08567773407142601, "grad_norm": 2.4967243671417236, "learning_rate": 9.918987766193404e-06, "loss": 0.6099, "step": 7166 }, { "epoch": 0.08568969021628665, "grad_norm": 2.9994022846221924, "learning_rate": 9.91895305025812e-06, "loss": 0.7025, "step": 7167 }, { "epoch": 0.08570164636114731, "grad_norm": 2.034229040145874, "learning_rate": 9.918918326946847e-06, "loss": 0.6123, "step": 7168 }, { "epoch": 0.08571360250600796, "grad_norm": 2.749591588973999, "learning_rate": 9.918883596259639e-06, "loss": 0.6559, "step": 7169 }, { "epoch": 0.08572555865086862, "grad_norm": 2.362959623336792, "learning_rate": 9.918848858196547e-06, "loss": 0.6532, "step": 7170 }, { "epoch": 0.08573751479572926, "grad_norm": 2.1010828018188477, "learning_rate": 9.918814112757625e-06, "loss": 0.5749, "step": 7171 }, { "epoch": 0.08574947094058992, "grad_norm": 4.635754585266113, "learning_rate": 9.918779359942922e-06, "loss": 0.6083, "step": 7172 }, { "epoch": 0.08576142708545056, "grad_norm": 1.9417369365692139, "learning_rate": 9.918744599752492e-06, "loss": 0.5425, "step": 7173 }, { "epoch": 0.08577338323031122, "grad_norm": 2.220970392227173, "learning_rate": 9.918709832186388e-06, "loss": 0.5675, "step": 7174 }, { "epoch": 0.08578533937517187, "grad_norm": 5.192009449005127, "learning_rate": 9.91867505724466e-06, "loss": 0.6381, "step": 7175 }, { "epoch": 0.08579729552003253, "grad_norm": 3.3646934032440186, "learning_rate": 9.918640274927362e-06, "loss": 0.6851, "step": 7176 }, { "epoch": 0.08580925166489317, "grad_norm": 3.268855094909668, "learning_rate": 9.918605485234546e-06, "loss": 0.6222, "step": 7177 }, { "epoch": 0.08582120780975382, "grad_norm": 4.471719264984131, "learning_rate": 9.918570688166263e-06, "loss": 0.6128, "step": 7178 }, { "epoch": 0.08583316395461447, "grad_norm": 2.390328884124756, "learning_rate": 9.918535883722565e-06, "loss": 0.5597, "step": 7179 }, { "epoch": 0.08584512009947512, "grad_norm": 3.2651333808898926, "learning_rate": 9.918501071903506e-06, "loss": 0.6212, "step": 7180 }, { "epoch": 0.08585707624433578, "grad_norm": 3.127699375152588, "learning_rate": 9.918466252709136e-06, "loss": 0.6103, "step": 7181 }, { "epoch": 0.08586903238919642, "grad_norm": 5.999209403991699, "learning_rate": 9.91843142613951e-06, "loss": 0.5627, "step": 7182 }, { "epoch": 0.08588098853405708, "grad_norm": 2.8118209838867188, "learning_rate": 9.918396592194676e-06, "loss": 0.645, "step": 7183 }, { "epoch": 0.08589294467891773, "grad_norm": 2.3595592975616455, "learning_rate": 9.918361750874692e-06, "loss": 0.6722, "step": 7184 }, { "epoch": 0.08590490082377839, "grad_norm": 3.1268553733825684, "learning_rate": 9.918326902179606e-06, "loss": 0.6323, "step": 7185 }, { "epoch": 0.08591685696863903, "grad_norm": 6.343358993530273, "learning_rate": 9.91829204610947e-06, "loss": 0.696, "step": 7186 }, { "epoch": 0.08592881311349969, "grad_norm": 3.136829137802124, "learning_rate": 9.91825718266434e-06, "loss": 0.7018, "step": 7187 }, { "epoch": 0.08594076925836033, "grad_norm": 4.193990707397461, "learning_rate": 9.918222311844263e-06, "loss": 0.6401, "step": 7188 }, { "epoch": 0.08595272540322099, "grad_norm": 2.2710633277893066, "learning_rate": 9.918187433649297e-06, "loss": 0.6401, "step": 7189 }, { "epoch": 0.08596468154808164, "grad_norm": 4.411904811859131, "learning_rate": 9.918152548079489e-06, "loss": 0.6271, "step": 7190 }, { "epoch": 0.08597663769294228, "grad_norm": 2.381787061691284, "learning_rate": 9.918117655134896e-06, "loss": 0.612, "step": 7191 }, { "epoch": 0.08598859383780294, "grad_norm": 2.8648557662963867, "learning_rate": 9.918082754815567e-06, "loss": 0.709, "step": 7192 }, { "epoch": 0.08600054998266358, "grad_norm": 2.9010870456695557, "learning_rate": 9.918047847121555e-06, "loss": 0.5996, "step": 7193 }, { "epoch": 0.08601250612752424, "grad_norm": 1.9088174104690552, "learning_rate": 9.918012932052912e-06, "loss": 0.6349, "step": 7194 }, { "epoch": 0.08602446227238489, "grad_norm": 3.6210992336273193, "learning_rate": 9.917978009609693e-06, "loss": 0.5358, "step": 7195 }, { "epoch": 0.08603641841724555, "grad_norm": 1.7837475538253784, "learning_rate": 9.917943079791947e-06, "loss": 0.5775, "step": 7196 }, { "epoch": 0.08604837456210619, "grad_norm": 2.7192304134368896, "learning_rate": 9.91790814259973e-06, "loss": 0.6087, "step": 7197 }, { "epoch": 0.08606033070696685, "grad_norm": 3.122532367706299, "learning_rate": 9.91787319803309e-06, "loss": 0.6232, "step": 7198 }, { "epoch": 0.0860722868518275, "grad_norm": 11.826726913452148, "learning_rate": 9.917838246092083e-06, "loss": 0.6066, "step": 7199 }, { "epoch": 0.08608424299668815, "grad_norm": 5.41049337387085, "learning_rate": 9.91780328677676e-06, "loss": 0.6877, "step": 7200 }, { "epoch": 0.0860961991415488, "grad_norm": 10.110252380371094, "learning_rate": 9.917768320087174e-06, "loss": 0.6232, "step": 7201 }, { "epoch": 0.08610815528640944, "grad_norm": 2.199861764907837, "learning_rate": 9.917733346023374e-06, "loss": 0.6211, "step": 7202 }, { "epoch": 0.0861201114312701, "grad_norm": 2.0665955543518066, "learning_rate": 9.917698364585418e-06, "loss": 0.5481, "step": 7203 }, { "epoch": 0.08613206757613075, "grad_norm": 14.49657917022705, "learning_rate": 9.917663375773357e-06, "loss": 0.6705, "step": 7204 }, { "epoch": 0.0861440237209914, "grad_norm": 3.534691333770752, "learning_rate": 9.917628379587239e-06, "loss": 0.591, "step": 7205 }, { "epoch": 0.08615597986585205, "grad_norm": 2.137322425842285, "learning_rate": 9.917593376027121e-06, "loss": 0.7184, "step": 7206 }, { "epoch": 0.08616793601071271, "grad_norm": 2.3010175228118896, "learning_rate": 9.917558365093055e-06, "loss": 0.6411, "step": 7207 }, { "epoch": 0.08617989215557335, "grad_norm": 1.9309738874435425, "learning_rate": 9.917523346785091e-06, "loss": 0.6879, "step": 7208 }, { "epoch": 0.08619184830043401, "grad_norm": 27.114702224731445, "learning_rate": 9.917488321103285e-06, "loss": 0.7185, "step": 7209 }, { "epoch": 0.08620380444529466, "grad_norm": 2.3457202911376953, "learning_rate": 9.917453288047688e-06, "loss": 0.5865, "step": 7210 }, { "epoch": 0.08621576059015532, "grad_norm": 1.8411650657653809, "learning_rate": 9.917418247618353e-06, "loss": 0.6233, "step": 7211 }, { "epoch": 0.08622771673501596, "grad_norm": 1.8912010192871094, "learning_rate": 9.91738319981533e-06, "loss": 0.5886, "step": 7212 }, { "epoch": 0.08623967287987662, "grad_norm": 2.631436824798584, "learning_rate": 9.917348144638674e-06, "loss": 0.6849, "step": 7213 }, { "epoch": 0.08625162902473726, "grad_norm": 1.9452579021453857, "learning_rate": 9.917313082088436e-06, "loss": 0.6535, "step": 7214 }, { "epoch": 0.08626358516959791, "grad_norm": 1.800748586654663, "learning_rate": 9.917278012164671e-06, "loss": 0.4698, "step": 7215 }, { "epoch": 0.08627554131445857, "grad_norm": 1.7804332971572876, "learning_rate": 9.91724293486743e-06, "loss": 0.6105, "step": 7216 }, { "epoch": 0.08628749745931921, "grad_norm": 10.778916358947754, "learning_rate": 9.917207850196766e-06, "loss": 0.603, "step": 7217 }, { "epoch": 0.08629945360417987, "grad_norm": 3.333179235458374, "learning_rate": 9.917172758152729e-06, "loss": 0.5795, "step": 7218 }, { "epoch": 0.08631140974904052, "grad_norm": 2.236196994781494, "learning_rate": 9.917137658735377e-06, "loss": 0.5912, "step": 7219 }, { "epoch": 0.08632336589390117, "grad_norm": 3.3809654712677, "learning_rate": 9.917102551944757e-06, "loss": 0.6309, "step": 7220 }, { "epoch": 0.08633532203876182, "grad_norm": 6.035996437072754, "learning_rate": 9.917067437780927e-06, "loss": 0.6457, "step": 7221 }, { "epoch": 0.08634727818362248, "grad_norm": 7.994808197021484, "learning_rate": 9.917032316243934e-06, "loss": 0.6187, "step": 7222 }, { "epoch": 0.08635923432848312, "grad_norm": 1.6001988649368286, "learning_rate": 9.916997187333835e-06, "loss": 0.6633, "step": 7223 }, { "epoch": 0.08637119047334378, "grad_norm": 4.997723579406738, "learning_rate": 9.916962051050682e-06, "loss": 0.6037, "step": 7224 }, { "epoch": 0.08638314661820443, "grad_norm": 2.2140722274780273, "learning_rate": 9.916926907394527e-06, "loss": 0.6525, "step": 7225 }, { "epoch": 0.08639510276306507, "grad_norm": 5.395835876464844, "learning_rate": 9.916891756365421e-06, "loss": 0.62, "step": 7226 }, { "epoch": 0.08640705890792573, "grad_norm": 3.179123878479004, "learning_rate": 9.916856597963419e-06, "loss": 0.6406, "step": 7227 }, { "epoch": 0.08641901505278637, "grad_norm": 1.9715361595153809, "learning_rate": 9.916821432188572e-06, "loss": 0.7176, "step": 7228 }, { "epoch": 0.08643097119764703, "grad_norm": 2.346384286880493, "learning_rate": 9.916786259040935e-06, "loss": 0.5394, "step": 7229 }, { "epoch": 0.08644292734250768, "grad_norm": 2.4622974395751953, "learning_rate": 9.91675107852056e-06, "loss": 0.6828, "step": 7230 }, { "epoch": 0.08645488348736834, "grad_norm": 3.8034684658050537, "learning_rate": 9.916715890627499e-06, "loss": 0.6434, "step": 7231 }, { "epoch": 0.08646683963222898, "grad_norm": 3.5311875343322754, "learning_rate": 9.916680695361804e-06, "loss": 0.6326, "step": 7232 }, { "epoch": 0.08647879577708964, "grad_norm": 3.423234462738037, "learning_rate": 9.916645492723532e-06, "loss": 0.6095, "step": 7233 }, { "epoch": 0.08649075192195028, "grad_norm": 1.7055919170379639, "learning_rate": 9.916610282712728e-06, "loss": 0.555, "step": 7234 }, { "epoch": 0.08650270806681094, "grad_norm": 2.1701526641845703, "learning_rate": 9.916575065329453e-06, "loss": 0.5935, "step": 7235 }, { "epoch": 0.08651466421167159, "grad_norm": 1.8744016885757446, "learning_rate": 9.916539840573754e-06, "loss": 0.6168, "step": 7236 }, { "epoch": 0.08652662035653223, "grad_norm": 2.878229856491089, "learning_rate": 9.916504608445687e-06, "loss": 0.5851, "step": 7237 }, { "epoch": 0.08653857650139289, "grad_norm": 2.588104724884033, "learning_rate": 9.916469368945305e-06, "loss": 0.6217, "step": 7238 }, { "epoch": 0.08655053264625354, "grad_norm": 2.384091854095459, "learning_rate": 9.916434122072657e-06, "loss": 0.619, "step": 7239 }, { "epoch": 0.0865624887911142, "grad_norm": 4.985904693603516, "learning_rate": 9.9163988678278e-06, "loss": 0.6705, "step": 7240 }, { "epoch": 0.08657444493597484, "grad_norm": 2.319502830505371, "learning_rate": 9.916363606210787e-06, "loss": 0.6734, "step": 7241 }, { "epoch": 0.0865864010808355, "grad_norm": 1.7038400173187256, "learning_rate": 9.916328337221665e-06, "loss": 0.5876, "step": 7242 }, { "epoch": 0.08659835722569614, "grad_norm": 25.55879020690918, "learning_rate": 9.916293060860496e-06, "loss": 0.5211, "step": 7243 }, { "epoch": 0.0866103133705568, "grad_norm": 1.5751757621765137, "learning_rate": 9.916257777127324e-06, "loss": 0.6039, "step": 7244 }, { "epoch": 0.08662226951541745, "grad_norm": 2.588409900665283, "learning_rate": 9.91622248602221e-06, "loss": 0.6465, "step": 7245 }, { "epoch": 0.0866342256602781, "grad_norm": 4.0072808265686035, "learning_rate": 9.9161871875452e-06, "loss": 0.7532, "step": 7246 }, { "epoch": 0.08664618180513875, "grad_norm": 3.7504663467407227, "learning_rate": 9.91615188169635e-06, "loss": 0.6304, "step": 7247 }, { "epoch": 0.08665813794999941, "grad_norm": 1.5627026557922363, "learning_rate": 9.916116568475714e-06, "loss": 0.6256, "step": 7248 }, { "epoch": 0.08667009409486005, "grad_norm": 14.748623847961426, "learning_rate": 9.916081247883343e-06, "loss": 0.625, "step": 7249 }, { "epoch": 0.0866820502397207, "grad_norm": 2.328202247619629, "learning_rate": 9.916045919919291e-06, "loss": 0.6709, "step": 7250 }, { "epoch": 0.08669400638458136, "grad_norm": 3.191638946533203, "learning_rate": 9.91601058458361e-06, "loss": 0.6585, "step": 7251 }, { "epoch": 0.086705962529442, "grad_norm": 2.244408130645752, "learning_rate": 9.915975241876356e-06, "loss": 0.549, "step": 7252 }, { "epoch": 0.08671791867430266, "grad_norm": 3.3569676876068115, "learning_rate": 9.915939891797577e-06, "loss": 0.6489, "step": 7253 }, { "epoch": 0.0867298748191633, "grad_norm": 1.9475719928741455, "learning_rate": 9.91590453434733e-06, "loss": 0.6831, "step": 7254 }, { "epoch": 0.08674183096402396, "grad_norm": 5.317146301269531, "learning_rate": 9.915869169525667e-06, "loss": 0.5916, "step": 7255 }, { "epoch": 0.08675378710888461, "grad_norm": 3.4430577754974365, "learning_rate": 9.91583379733264e-06, "loss": 0.6259, "step": 7256 }, { "epoch": 0.08676574325374527, "grad_norm": 4.450260639190674, "learning_rate": 9.9157984177683e-06, "loss": 0.6679, "step": 7257 }, { "epoch": 0.08677769939860591, "grad_norm": 2.9585518836975098, "learning_rate": 9.915763030832709e-06, "loss": 0.6816, "step": 7258 }, { "epoch": 0.08678965554346657, "grad_norm": 3.559473991394043, "learning_rate": 9.91572763652591e-06, "loss": 0.6657, "step": 7259 }, { "epoch": 0.08680161168832722, "grad_norm": 1.6119495630264282, "learning_rate": 9.915692234847959e-06, "loss": 0.6364, "step": 7260 }, { "epoch": 0.08681356783318786, "grad_norm": 3.1277387142181396, "learning_rate": 9.915656825798912e-06, "loss": 0.6743, "step": 7261 }, { "epoch": 0.08682552397804852, "grad_norm": 2.221609115600586, "learning_rate": 9.915621409378819e-06, "loss": 0.6214, "step": 7262 }, { "epoch": 0.08683748012290916, "grad_norm": 1.7051469087600708, "learning_rate": 9.915585985587735e-06, "loss": 0.6131, "step": 7263 }, { "epoch": 0.08684943626776982, "grad_norm": 3.1471848487854004, "learning_rate": 9.91555055442571e-06, "loss": 0.7808, "step": 7264 }, { "epoch": 0.08686139241263047, "grad_norm": 2.572171449661255, "learning_rate": 9.915515115892802e-06, "loss": 0.714, "step": 7265 }, { "epoch": 0.08687334855749113, "grad_norm": 2.46962571144104, "learning_rate": 9.915479669989061e-06, "loss": 0.5514, "step": 7266 }, { "epoch": 0.08688530470235177, "grad_norm": 3.3070192337036133, "learning_rate": 9.915444216714542e-06, "loss": 0.6035, "step": 7267 }, { "epoch": 0.08689726084721243, "grad_norm": 6.324902057647705, "learning_rate": 9.915408756069297e-06, "loss": 0.6239, "step": 7268 }, { "epoch": 0.08690921699207307, "grad_norm": 4.213664531707764, "learning_rate": 9.915373288053375e-06, "loss": 0.5482, "step": 7269 }, { "epoch": 0.08692117313693373, "grad_norm": 11.074012756347656, "learning_rate": 9.915337812666836e-06, "loss": 0.635, "step": 7270 }, { "epoch": 0.08693312928179438, "grad_norm": 3.669570207595825, "learning_rate": 9.915302329909731e-06, "loss": 0.5997, "step": 7271 }, { "epoch": 0.08694508542665504, "grad_norm": 2.218086004257202, "learning_rate": 9.915266839782113e-06, "loss": 0.6329, "step": 7272 }, { "epoch": 0.08695704157151568, "grad_norm": 2.257094383239746, "learning_rate": 9.915231342284035e-06, "loss": 0.6017, "step": 7273 }, { "epoch": 0.08696899771637633, "grad_norm": 4.201932430267334, "learning_rate": 9.915195837415548e-06, "loss": 0.709, "step": 7274 }, { "epoch": 0.08698095386123698, "grad_norm": 4.357908248901367, "learning_rate": 9.91516032517671e-06, "loss": 0.5759, "step": 7275 }, { "epoch": 0.08699291000609763, "grad_norm": 4.949243068695068, "learning_rate": 9.915124805567569e-06, "loss": 0.7566, "step": 7276 }, { "epoch": 0.08700486615095829, "grad_norm": 2.770253896713257, "learning_rate": 9.915089278588184e-06, "loss": 0.593, "step": 7277 }, { "epoch": 0.08701682229581893, "grad_norm": 2.2643144130706787, "learning_rate": 9.915053744238603e-06, "loss": 0.6274, "step": 7278 }, { "epoch": 0.08702877844067959, "grad_norm": 3.4833805561065674, "learning_rate": 9.915018202518881e-06, "loss": 0.566, "step": 7279 }, { "epoch": 0.08704073458554024, "grad_norm": 2.6160831451416016, "learning_rate": 9.914982653429073e-06, "loss": 0.6756, "step": 7280 }, { "epoch": 0.0870526907304009, "grad_norm": 1.8739523887634277, "learning_rate": 9.91494709696923e-06, "loss": 0.6348, "step": 7281 }, { "epoch": 0.08706464687526154, "grad_norm": 1.9948227405548096, "learning_rate": 9.914911533139407e-06, "loss": 0.5267, "step": 7282 }, { "epoch": 0.0870766030201222, "grad_norm": 2.05692195892334, "learning_rate": 9.914875961939657e-06, "loss": 0.5707, "step": 7283 }, { "epoch": 0.08708855916498284, "grad_norm": 5.072434425354004, "learning_rate": 9.914840383370032e-06, "loss": 0.6877, "step": 7284 }, { "epoch": 0.08710051530984349, "grad_norm": 2.8316473960876465, "learning_rate": 9.914804797430589e-06, "loss": 0.677, "step": 7285 }, { "epoch": 0.08711247145470415, "grad_norm": 5.901924133300781, "learning_rate": 9.914769204121375e-06, "loss": 0.583, "step": 7286 }, { "epoch": 0.08712442759956479, "grad_norm": 8.489832878112793, "learning_rate": 9.914733603442449e-06, "loss": 0.5365, "step": 7287 }, { "epoch": 0.08713638374442545, "grad_norm": 1.848984956741333, "learning_rate": 9.914697995393863e-06, "loss": 0.6695, "step": 7288 }, { "epoch": 0.0871483398892861, "grad_norm": 3.316929817199707, "learning_rate": 9.914662379975668e-06, "loss": 0.5489, "step": 7289 }, { "epoch": 0.08716029603414675, "grad_norm": 2.9313108921051025, "learning_rate": 9.914626757187921e-06, "loss": 0.6667, "step": 7290 }, { "epoch": 0.0871722521790074, "grad_norm": 2.1517651081085205, "learning_rate": 9.914591127030673e-06, "loss": 0.6497, "step": 7291 }, { "epoch": 0.08718420832386806, "grad_norm": 3.1148247718811035, "learning_rate": 9.91455548950398e-06, "loss": 0.6662, "step": 7292 }, { "epoch": 0.0871961644687287, "grad_norm": 9.641478538513184, "learning_rate": 9.91451984460789e-06, "loss": 0.6824, "step": 7293 }, { "epoch": 0.08720812061358936, "grad_norm": 2.303103446960449, "learning_rate": 9.914484192342462e-06, "loss": 0.6252, "step": 7294 }, { "epoch": 0.08722007675845, "grad_norm": 2.294139862060547, "learning_rate": 9.914448532707747e-06, "loss": 0.671, "step": 7295 }, { "epoch": 0.08723203290331066, "grad_norm": 3.6716148853302, "learning_rate": 9.9144128657038e-06, "loss": 0.6351, "step": 7296 }, { "epoch": 0.08724398904817131, "grad_norm": 2.2611231803894043, "learning_rate": 9.914377191330673e-06, "loss": 0.7078, "step": 7297 }, { "epoch": 0.08725594519303195, "grad_norm": 4.78244161605835, "learning_rate": 9.914341509588421e-06, "loss": 0.5517, "step": 7298 }, { "epoch": 0.08726790133789261, "grad_norm": 1.6039838790893555, "learning_rate": 9.914305820477095e-06, "loss": 0.6199, "step": 7299 }, { "epoch": 0.08727985748275326, "grad_norm": 2.778078556060791, "learning_rate": 9.91427012399675e-06, "loss": 0.6294, "step": 7300 }, { "epoch": 0.08729181362761392, "grad_norm": 1.995784878730774, "learning_rate": 9.91423442014744e-06, "loss": 0.692, "step": 7301 }, { "epoch": 0.08730376977247456, "grad_norm": 2.6747779846191406, "learning_rate": 9.914198708929216e-06, "loss": 0.6574, "step": 7302 }, { "epoch": 0.08731572591733522, "grad_norm": 2.4314019680023193, "learning_rate": 9.914162990342137e-06, "loss": 0.5821, "step": 7303 }, { "epoch": 0.08732768206219586, "grad_norm": 3.339648962020874, "learning_rate": 9.914127264386252e-06, "loss": 0.6851, "step": 7304 }, { "epoch": 0.08733963820705652, "grad_norm": 2.944002866744995, "learning_rate": 9.914091531061614e-06, "loss": 0.6912, "step": 7305 }, { "epoch": 0.08735159435191717, "grad_norm": 3.063965320587158, "learning_rate": 9.91405579036828e-06, "loss": 0.6703, "step": 7306 }, { "epoch": 0.08736355049677783, "grad_norm": 2.123612880706787, "learning_rate": 9.9140200423063e-06, "loss": 0.6116, "step": 7307 }, { "epoch": 0.08737550664163847, "grad_norm": 3.2151191234588623, "learning_rate": 9.91398428687573e-06, "loss": 0.5694, "step": 7308 }, { "epoch": 0.08738746278649911, "grad_norm": 1.56192946434021, "learning_rate": 9.913948524076625e-06, "loss": 0.5751, "step": 7309 }, { "epoch": 0.08739941893135977, "grad_norm": 1.7452694177627563, "learning_rate": 9.913912753909037e-06, "loss": 0.6983, "step": 7310 }, { "epoch": 0.08741137507622042, "grad_norm": 7.860830783843994, "learning_rate": 9.913876976373018e-06, "loss": 0.7296, "step": 7311 }, { "epoch": 0.08742333122108108, "grad_norm": 1.9863559007644653, "learning_rate": 9.913841191468622e-06, "loss": 0.6586, "step": 7312 }, { "epoch": 0.08743528736594172, "grad_norm": 2.2134180068969727, "learning_rate": 9.913805399195903e-06, "loss": 0.5538, "step": 7313 }, { "epoch": 0.08744724351080238, "grad_norm": 4.063320159912109, "learning_rate": 9.91376959955492e-06, "loss": 0.6238, "step": 7314 }, { "epoch": 0.08745919965566303, "grad_norm": 1.922228455543518, "learning_rate": 9.913733792545716e-06, "loss": 0.5948, "step": 7315 }, { "epoch": 0.08747115580052368, "grad_norm": 3.1282548904418945, "learning_rate": 9.913697978168355e-06, "loss": 0.6169, "step": 7316 }, { "epoch": 0.08748311194538433, "grad_norm": 2.2993414402008057, "learning_rate": 9.913662156422885e-06, "loss": 0.5877, "step": 7317 }, { "epoch": 0.08749506809024499, "grad_norm": 3.5877676010131836, "learning_rate": 9.91362632730936e-06, "loss": 0.6433, "step": 7318 }, { "epoch": 0.08750702423510563, "grad_norm": 2.9529733657836914, "learning_rate": 9.913590490827836e-06, "loss": 0.6217, "step": 7319 }, { "epoch": 0.08751898037996628, "grad_norm": 7.886843204498291, "learning_rate": 9.913554646978367e-06, "loss": 0.6358, "step": 7320 }, { "epoch": 0.08753093652482694, "grad_norm": 2.3958210945129395, "learning_rate": 9.913518795761003e-06, "loss": 0.6023, "step": 7321 }, { "epoch": 0.08754289266968758, "grad_norm": 2.4724528789520264, "learning_rate": 9.913482937175803e-06, "loss": 0.6601, "step": 7322 }, { "epoch": 0.08755484881454824, "grad_norm": 2.4226315021514893, "learning_rate": 9.913447071222815e-06, "loss": 0.6783, "step": 7323 }, { "epoch": 0.08756680495940888, "grad_norm": 1.8808064460754395, "learning_rate": 9.913411197902097e-06, "loss": 0.6413, "step": 7324 }, { "epoch": 0.08757876110426954, "grad_norm": 1.8988555669784546, "learning_rate": 9.9133753172137e-06, "loss": 0.6493, "step": 7325 }, { "epoch": 0.08759071724913019, "grad_norm": 56.57615280151367, "learning_rate": 9.91333942915768e-06, "loss": 0.5135, "step": 7326 }, { "epoch": 0.08760267339399085, "grad_norm": 3.4764652252197266, "learning_rate": 9.913303533734092e-06, "loss": 0.6656, "step": 7327 }, { "epoch": 0.08761462953885149, "grad_norm": 4.604128360748291, "learning_rate": 9.913267630942986e-06, "loss": 0.6302, "step": 7328 }, { "epoch": 0.08762658568371215, "grad_norm": 3.5112476348876953, "learning_rate": 9.913231720784418e-06, "loss": 0.5606, "step": 7329 }, { "epoch": 0.0876385418285728, "grad_norm": 2.5441646575927734, "learning_rate": 9.91319580325844e-06, "loss": 0.6132, "step": 7330 }, { "epoch": 0.08765049797343345, "grad_norm": 1.4110455513000488, "learning_rate": 9.913159878365109e-06, "loss": 0.586, "step": 7331 }, { "epoch": 0.0876624541182941, "grad_norm": 2.4327566623687744, "learning_rate": 9.913123946104478e-06, "loss": 0.626, "step": 7332 }, { "epoch": 0.08767441026315474, "grad_norm": 2.629295587539673, "learning_rate": 9.913088006476599e-06, "loss": 0.6119, "step": 7333 }, { "epoch": 0.0876863664080154, "grad_norm": 2.6465048789978027, "learning_rate": 9.913052059481527e-06, "loss": 0.601, "step": 7334 }, { "epoch": 0.08769832255287605, "grad_norm": 10.314786911010742, "learning_rate": 9.913016105119318e-06, "loss": 0.6589, "step": 7335 }, { "epoch": 0.0877102786977367, "grad_norm": 2.931762933731079, "learning_rate": 9.912980143390022e-06, "loss": 0.5155, "step": 7336 }, { "epoch": 0.08772223484259735, "grad_norm": 1.9770358800888062, "learning_rate": 9.912944174293695e-06, "loss": 0.7828, "step": 7337 }, { "epoch": 0.08773419098745801, "grad_norm": 19.10394287109375, "learning_rate": 9.91290819783039e-06, "loss": 0.6354, "step": 7338 }, { "epoch": 0.08774614713231865, "grad_norm": 7.466893196105957, "learning_rate": 9.912872214000163e-06, "loss": 0.5658, "step": 7339 }, { "epoch": 0.08775810327717931, "grad_norm": 5.880064010620117, "learning_rate": 9.912836222803067e-06, "loss": 0.6225, "step": 7340 }, { "epoch": 0.08777005942203996, "grad_norm": 2.337657928466797, "learning_rate": 9.912800224239155e-06, "loss": 0.6972, "step": 7341 }, { "epoch": 0.08778201556690061, "grad_norm": 2.075437545776367, "learning_rate": 9.912764218308482e-06, "loss": 0.6105, "step": 7342 }, { "epoch": 0.08779397171176126, "grad_norm": 1.468254566192627, "learning_rate": 9.9127282050111e-06, "loss": 0.5402, "step": 7343 }, { "epoch": 0.0878059278566219, "grad_norm": 5.3188796043396, "learning_rate": 9.912692184347065e-06, "loss": 0.6138, "step": 7344 }, { "epoch": 0.08781788400148256, "grad_norm": 1.9456791877746582, "learning_rate": 9.912656156316431e-06, "loss": 0.6267, "step": 7345 }, { "epoch": 0.08782984014634321, "grad_norm": 1.5508836507797241, "learning_rate": 9.912620120919253e-06, "loss": 0.5552, "step": 7346 }, { "epoch": 0.08784179629120387, "grad_norm": 2.139676570892334, "learning_rate": 9.912584078155583e-06, "loss": 0.6186, "step": 7347 }, { "epoch": 0.08785375243606451, "grad_norm": 1.5216808319091797, "learning_rate": 9.912548028025474e-06, "loss": 0.6154, "step": 7348 }, { "epoch": 0.08786570858092517, "grad_norm": 3.6293535232543945, "learning_rate": 9.912511970528984e-06, "loss": 0.7126, "step": 7349 }, { "epoch": 0.08787766472578581, "grad_norm": 3.7635891437530518, "learning_rate": 9.912475905666164e-06, "loss": 0.6627, "step": 7350 }, { "epoch": 0.08788962087064647, "grad_norm": 2.013214111328125, "learning_rate": 9.912439833437069e-06, "loss": 0.7518, "step": 7351 }, { "epoch": 0.08790157701550712, "grad_norm": 1.8076449632644653, "learning_rate": 9.912403753841752e-06, "loss": 0.6814, "step": 7352 }, { "epoch": 0.08791353316036778, "grad_norm": 3.603057384490967, "learning_rate": 9.91236766688027e-06, "loss": 0.7044, "step": 7353 }, { "epoch": 0.08792548930522842, "grad_norm": 4.747870445251465, "learning_rate": 9.912331572552674e-06, "loss": 0.6979, "step": 7354 }, { "epoch": 0.08793744545008908, "grad_norm": 1.7405203580856323, "learning_rate": 9.912295470859018e-06, "loss": 0.6253, "step": 7355 }, { "epoch": 0.08794940159494972, "grad_norm": 2.3529610633850098, "learning_rate": 9.91225936179936e-06, "loss": 0.5156, "step": 7356 }, { "epoch": 0.08796135773981037, "grad_norm": 2.3980603218078613, "learning_rate": 9.912223245373752e-06, "loss": 0.6094, "step": 7357 }, { "epoch": 0.08797331388467103, "grad_norm": 2.1184327602386475, "learning_rate": 9.912187121582244e-06, "loss": 0.5733, "step": 7358 }, { "epoch": 0.08798527002953167, "grad_norm": 1.78804612159729, "learning_rate": 9.912150990424898e-06, "loss": 0.6383, "step": 7359 }, { "epoch": 0.08799722617439233, "grad_norm": 1.6877299547195435, "learning_rate": 9.912114851901762e-06, "loss": 0.6703, "step": 7360 }, { "epoch": 0.08800918231925298, "grad_norm": 1.7015156745910645, "learning_rate": 9.912078706012893e-06, "loss": 0.6075, "step": 7361 }, { "epoch": 0.08802113846411364, "grad_norm": 3.890847682952881, "learning_rate": 9.912042552758344e-06, "loss": 0.6634, "step": 7362 }, { "epoch": 0.08803309460897428, "grad_norm": 5.523667812347412, "learning_rate": 9.912006392138171e-06, "loss": 0.6131, "step": 7363 }, { "epoch": 0.08804505075383494, "grad_norm": 1.949358582496643, "learning_rate": 9.911970224152425e-06, "loss": 0.6472, "step": 7364 }, { "epoch": 0.08805700689869558, "grad_norm": 3.5028114318847656, "learning_rate": 9.911934048801163e-06, "loss": 0.6731, "step": 7365 }, { "epoch": 0.08806896304355624, "grad_norm": 2.2390902042388916, "learning_rate": 9.911897866084439e-06, "loss": 0.555, "step": 7366 }, { "epoch": 0.08808091918841689, "grad_norm": 1.9370561838150024, "learning_rate": 9.911861676002307e-06, "loss": 0.72, "step": 7367 }, { "epoch": 0.08809287533327753, "grad_norm": 2.993367910385132, "learning_rate": 9.91182547855482e-06, "loss": 0.5944, "step": 7368 }, { "epoch": 0.08810483147813819, "grad_norm": 2.4809343814849854, "learning_rate": 9.911789273742035e-06, "loss": 0.7553, "step": 7369 }, { "epoch": 0.08811678762299884, "grad_norm": 3.1585946083068848, "learning_rate": 9.911753061564002e-06, "loss": 0.6676, "step": 7370 }, { "epoch": 0.0881287437678595, "grad_norm": 2.3971874713897705, "learning_rate": 9.911716842020778e-06, "loss": 0.5183, "step": 7371 }, { "epoch": 0.08814069991272014, "grad_norm": 93.41990661621094, "learning_rate": 9.911680615112418e-06, "loss": 0.7106, "step": 7372 }, { "epoch": 0.0881526560575808, "grad_norm": 2.61110782623291, "learning_rate": 9.911644380838977e-06, "loss": 0.5974, "step": 7373 }, { "epoch": 0.08816461220244144, "grad_norm": 3.1942238807678223, "learning_rate": 9.911608139200505e-06, "loss": 0.5936, "step": 7374 }, { "epoch": 0.0881765683473021, "grad_norm": 2.0911922454833984, "learning_rate": 9.911571890197061e-06, "loss": 0.6719, "step": 7375 }, { "epoch": 0.08818852449216275, "grad_norm": 1.5049570798873901, "learning_rate": 9.911535633828695e-06, "loss": 0.5687, "step": 7376 }, { "epoch": 0.0882004806370234, "grad_norm": 4.165165424346924, "learning_rate": 9.911499370095468e-06, "loss": 0.7429, "step": 7377 }, { "epoch": 0.08821243678188405, "grad_norm": 2.3330395221710205, "learning_rate": 9.911463098997427e-06, "loss": 0.6142, "step": 7378 }, { "epoch": 0.0882243929267447, "grad_norm": 6.06402063369751, "learning_rate": 9.91142682053463e-06, "loss": 0.6996, "step": 7379 }, { "epoch": 0.08823634907160535, "grad_norm": 2.624755620956421, "learning_rate": 9.911390534707133e-06, "loss": 0.601, "step": 7380 }, { "epoch": 0.088248305216466, "grad_norm": 2.963489055633545, "learning_rate": 9.911354241514986e-06, "loss": 0.638, "step": 7381 }, { "epoch": 0.08826026136132666, "grad_norm": 2.9934470653533936, "learning_rate": 9.911317940958246e-06, "loss": 0.5672, "step": 7382 }, { "epoch": 0.0882722175061873, "grad_norm": 2.3683927059173584, "learning_rate": 9.911281633036969e-06, "loss": 0.682, "step": 7383 }, { "epoch": 0.08828417365104796, "grad_norm": 3.1994845867156982, "learning_rate": 9.911245317751207e-06, "loss": 0.597, "step": 7384 }, { "epoch": 0.0882961297959086, "grad_norm": 3.1208012104034424, "learning_rate": 9.911208995101016e-06, "loss": 0.6598, "step": 7385 }, { "epoch": 0.08830808594076926, "grad_norm": 2.25984787940979, "learning_rate": 9.911172665086448e-06, "loss": 0.6909, "step": 7386 }, { "epoch": 0.08832004208562991, "grad_norm": 5.838399410247803, "learning_rate": 9.91113632770756e-06, "loss": 0.6443, "step": 7387 }, { "epoch": 0.08833199823049057, "grad_norm": 2.3665637969970703, "learning_rate": 9.911099982964405e-06, "loss": 0.6303, "step": 7388 }, { "epoch": 0.08834395437535121, "grad_norm": 2.463268518447876, "learning_rate": 9.91106363085704e-06, "loss": 0.6601, "step": 7389 }, { "epoch": 0.08835591052021187, "grad_norm": 2.8457412719726562, "learning_rate": 9.911027271385515e-06, "loss": 0.5833, "step": 7390 }, { "epoch": 0.08836786666507251, "grad_norm": 1.991150140762329, "learning_rate": 9.91099090454989e-06, "loss": 0.6618, "step": 7391 }, { "epoch": 0.08837982280993316, "grad_norm": 3.5809364318847656, "learning_rate": 9.910954530350215e-06, "loss": 0.611, "step": 7392 }, { "epoch": 0.08839177895479382, "grad_norm": 1.8703696727752686, "learning_rate": 9.910918148786546e-06, "loss": 0.6914, "step": 7393 }, { "epoch": 0.08840373509965446, "grad_norm": 3.217754364013672, "learning_rate": 9.910881759858939e-06, "loss": 0.696, "step": 7394 }, { "epoch": 0.08841569124451512, "grad_norm": 4.438357353210449, "learning_rate": 9.910845363567446e-06, "loss": 0.696, "step": 7395 }, { "epoch": 0.08842764738937577, "grad_norm": 1.7664293050765991, "learning_rate": 9.910808959912122e-06, "loss": 0.6828, "step": 7396 }, { "epoch": 0.08843960353423642, "grad_norm": 1.8500796556472778, "learning_rate": 9.910772548893023e-06, "loss": 0.6355, "step": 7397 }, { "epoch": 0.08845155967909707, "grad_norm": 2.4287123680114746, "learning_rate": 9.910736130510206e-06, "loss": 0.6918, "step": 7398 }, { "epoch": 0.08846351582395773, "grad_norm": 5.107161521911621, "learning_rate": 9.91069970476372e-06, "loss": 0.6867, "step": 7399 }, { "epoch": 0.08847547196881837, "grad_norm": 4.40020227432251, "learning_rate": 9.910663271653624e-06, "loss": 0.5615, "step": 7400 }, { "epoch": 0.08848742811367903, "grad_norm": 4.79883337020874, "learning_rate": 9.91062683117997e-06, "loss": 0.737, "step": 7401 }, { "epoch": 0.08849938425853968, "grad_norm": 4.512155532836914, "learning_rate": 9.910590383342813e-06, "loss": 0.5839, "step": 7402 }, { "epoch": 0.08851134040340032, "grad_norm": 1.513220191001892, "learning_rate": 9.91055392814221e-06, "loss": 0.573, "step": 7403 }, { "epoch": 0.08852329654826098, "grad_norm": 2.1838791370391846, "learning_rate": 9.910517465578211e-06, "loss": 0.6802, "step": 7404 }, { "epoch": 0.08853525269312162, "grad_norm": 5.449561595916748, "learning_rate": 9.910480995650876e-06, "loss": 0.6333, "step": 7405 }, { "epoch": 0.08854720883798228, "grad_norm": 2.099292755126953, "learning_rate": 9.910444518360257e-06, "loss": 0.6232, "step": 7406 }, { "epoch": 0.08855916498284293, "grad_norm": 3.006930351257324, "learning_rate": 9.910408033706408e-06, "loss": 0.7212, "step": 7407 }, { "epoch": 0.08857112112770359, "grad_norm": 2.0830938816070557, "learning_rate": 9.910371541689386e-06, "loss": 0.662, "step": 7408 }, { "epoch": 0.08858307727256423, "grad_norm": 1.674604058265686, "learning_rate": 9.910335042309245e-06, "loss": 0.6385, "step": 7409 }, { "epoch": 0.08859503341742489, "grad_norm": 1.5621963739395142, "learning_rate": 9.910298535566036e-06, "loss": 0.5164, "step": 7410 }, { "epoch": 0.08860698956228553, "grad_norm": 2.382754325866699, "learning_rate": 9.91026202145982e-06, "loss": 0.618, "step": 7411 }, { "epoch": 0.0886189457071462, "grad_norm": 1.8155360221862793, "learning_rate": 9.910225499990647e-06, "loss": 0.5873, "step": 7412 }, { "epoch": 0.08863090185200684, "grad_norm": 2.7653093338012695, "learning_rate": 9.910188971158574e-06, "loss": 0.5624, "step": 7413 }, { "epoch": 0.0886428579968675, "grad_norm": 1.9127141237258911, "learning_rate": 9.910152434963655e-06, "loss": 0.5683, "step": 7414 }, { "epoch": 0.08865481414172814, "grad_norm": 4.263766288757324, "learning_rate": 9.910115891405945e-06, "loss": 0.6457, "step": 7415 }, { "epoch": 0.08866677028658879, "grad_norm": 2.4706013202667236, "learning_rate": 9.9100793404855e-06, "loss": 0.5665, "step": 7416 }, { "epoch": 0.08867872643144945, "grad_norm": 1.8903158903121948, "learning_rate": 9.910042782202372e-06, "loss": 0.6212, "step": 7417 }, { "epoch": 0.08869068257631009, "grad_norm": 1.5966805219650269, "learning_rate": 9.910006216556617e-06, "loss": 0.6753, "step": 7418 }, { "epoch": 0.08870263872117075, "grad_norm": 2.1086301803588867, "learning_rate": 9.909969643548291e-06, "loss": 0.6299, "step": 7419 }, { "epoch": 0.0887145948660314, "grad_norm": 1.9533954858779907, "learning_rate": 9.909933063177448e-06, "loss": 0.6057, "step": 7420 }, { "epoch": 0.08872655101089205, "grad_norm": 2.431795597076416, "learning_rate": 9.909896475444144e-06, "loss": 0.6629, "step": 7421 }, { "epoch": 0.0887385071557527, "grad_norm": 1.9378330707550049, "learning_rate": 9.909859880348431e-06, "loss": 0.5862, "step": 7422 }, { "epoch": 0.08875046330061336, "grad_norm": 4.891535758972168, "learning_rate": 9.909823277890367e-06, "loss": 0.6284, "step": 7423 }, { "epoch": 0.088762419445474, "grad_norm": 2.7221016883850098, "learning_rate": 9.909786668070005e-06, "loss": 0.5639, "step": 7424 }, { "epoch": 0.08877437559033466, "grad_norm": 4.280818462371826, "learning_rate": 9.909750050887401e-06, "loss": 0.6182, "step": 7425 }, { "epoch": 0.0887863317351953, "grad_norm": 25.2506160736084, "learning_rate": 9.909713426342609e-06, "loss": 0.5769, "step": 7426 }, { "epoch": 0.08879828788005595, "grad_norm": 2.215970516204834, "learning_rate": 9.909676794435683e-06, "loss": 0.5868, "step": 7427 }, { "epoch": 0.08881024402491661, "grad_norm": 1.9109456539154053, "learning_rate": 9.909640155166682e-06, "loss": 0.5902, "step": 7428 }, { "epoch": 0.08882220016977725, "grad_norm": 3.5144777297973633, "learning_rate": 9.909603508535656e-06, "loss": 0.6929, "step": 7429 }, { "epoch": 0.08883415631463791, "grad_norm": 1.4610556364059448, "learning_rate": 9.909566854542663e-06, "loss": 0.5986, "step": 7430 }, { "epoch": 0.08884611245949856, "grad_norm": 1.352588415145874, "learning_rate": 9.909530193187755e-06, "loss": 0.5777, "step": 7431 }, { "epoch": 0.08885806860435921, "grad_norm": 3.336120367050171, "learning_rate": 9.909493524470991e-06, "loss": 0.5541, "step": 7432 }, { "epoch": 0.08887002474921986, "grad_norm": 1.4329407215118408, "learning_rate": 9.909456848392423e-06, "loss": 0.697, "step": 7433 }, { "epoch": 0.08888198089408052, "grad_norm": 5.358066558837891, "learning_rate": 9.909420164952108e-06, "loss": 0.5338, "step": 7434 }, { "epoch": 0.08889393703894116, "grad_norm": 2.2753353118896484, "learning_rate": 9.9093834741501e-06, "loss": 0.755, "step": 7435 }, { "epoch": 0.08890589318380182, "grad_norm": 4.179793357849121, "learning_rate": 9.909346775986454e-06, "loss": 0.7321, "step": 7436 }, { "epoch": 0.08891784932866247, "grad_norm": 2.43270206451416, "learning_rate": 9.909310070461224e-06, "loss": 0.7032, "step": 7437 }, { "epoch": 0.08892980547352311, "grad_norm": 2.60408353805542, "learning_rate": 9.909273357574467e-06, "loss": 0.5462, "step": 7438 }, { "epoch": 0.08894176161838377, "grad_norm": 1.9975148439407349, "learning_rate": 9.909236637326238e-06, "loss": 0.5865, "step": 7439 }, { "epoch": 0.08895371776324441, "grad_norm": 3.066908597946167, "learning_rate": 9.90919990971659e-06, "loss": 0.5823, "step": 7440 }, { "epoch": 0.08896567390810507, "grad_norm": 2.2040624618530273, "learning_rate": 9.909163174745579e-06, "loss": 0.6023, "step": 7441 }, { "epoch": 0.08897763005296572, "grad_norm": 3.667607545852661, "learning_rate": 9.909126432413261e-06, "loss": 0.7402, "step": 7442 }, { "epoch": 0.08898958619782638, "grad_norm": 4.586547374725342, "learning_rate": 9.90908968271969e-06, "loss": 0.5681, "step": 7443 }, { "epoch": 0.08900154234268702, "grad_norm": 1.8391884565353394, "learning_rate": 9.909052925664922e-06, "loss": 0.6645, "step": 7444 }, { "epoch": 0.08901349848754768, "grad_norm": 2.0272939205169678, "learning_rate": 9.909016161249012e-06, "loss": 0.7187, "step": 7445 }, { "epoch": 0.08902545463240832, "grad_norm": 1.7470431327819824, "learning_rate": 9.908979389472012e-06, "loss": 0.5728, "step": 7446 }, { "epoch": 0.08903741077726898, "grad_norm": 1.7683683633804321, "learning_rate": 9.908942610333984e-06, "loss": 0.7085, "step": 7447 }, { "epoch": 0.08904936692212963, "grad_norm": 1.753969669342041, "learning_rate": 9.908905823834978e-06, "loss": 0.6429, "step": 7448 }, { "epoch": 0.08906132306699029, "grad_norm": 2.5242466926574707, "learning_rate": 9.908869029975049e-06, "loss": 0.6182, "step": 7449 }, { "epoch": 0.08907327921185093, "grad_norm": 8.07974910736084, "learning_rate": 9.908832228754254e-06, "loss": 0.6414, "step": 7450 }, { "epoch": 0.08908523535671158, "grad_norm": 1.992223858833313, "learning_rate": 9.908795420172647e-06, "loss": 0.6633, "step": 7451 }, { "epoch": 0.08909719150157223, "grad_norm": 2.024132013320923, "learning_rate": 9.908758604230285e-06, "loss": 0.6344, "step": 7452 }, { "epoch": 0.08910914764643288, "grad_norm": 6.8228068351745605, "learning_rate": 9.908721780927221e-06, "loss": 0.596, "step": 7453 }, { "epoch": 0.08912110379129354, "grad_norm": 2.084265947341919, "learning_rate": 9.908684950263512e-06, "loss": 0.6986, "step": 7454 }, { "epoch": 0.08913305993615418, "grad_norm": 1.9699918031692505, "learning_rate": 9.908648112239213e-06, "loss": 0.7074, "step": 7455 }, { "epoch": 0.08914501608101484, "grad_norm": 2.5483551025390625, "learning_rate": 9.908611266854378e-06, "loss": 0.5588, "step": 7456 }, { "epoch": 0.08915697222587549, "grad_norm": 2.9123497009277344, "learning_rate": 9.908574414109062e-06, "loss": 0.5767, "step": 7457 }, { "epoch": 0.08916892837073614, "grad_norm": 2.4108808040618896, "learning_rate": 9.908537554003323e-06, "loss": 0.5967, "step": 7458 }, { "epoch": 0.08918088451559679, "grad_norm": 2.1422533988952637, "learning_rate": 9.908500686537214e-06, "loss": 0.5965, "step": 7459 }, { "epoch": 0.08919284066045745, "grad_norm": 3.432642936706543, "learning_rate": 9.90846381171079e-06, "loss": 0.6713, "step": 7460 }, { "epoch": 0.08920479680531809, "grad_norm": 2.2761168479919434, "learning_rate": 9.908426929524108e-06, "loss": 0.6445, "step": 7461 }, { "epoch": 0.08921675295017874, "grad_norm": 3.3723561763763428, "learning_rate": 9.908390039977222e-06, "loss": 0.6197, "step": 7462 }, { "epoch": 0.0892287090950394, "grad_norm": 6.925197124481201, "learning_rate": 9.908353143070187e-06, "loss": 0.553, "step": 7463 }, { "epoch": 0.08924066523990004, "grad_norm": 3.2958033084869385, "learning_rate": 9.90831623880306e-06, "loss": 0.6008, "step": 7464 }, { "epoch": 0.0892526213847607, "grad_norm": 1.9848073720932007, "learning_rate": 9.908279327175895e-06, "loss": 0.634, "step": 7465 }, { "epoch": 0.08926457752962134, "grad_norm": 1.8723725080490112, "learning_rate": 9.908242408188748e-06, "loss": 0.6264, "step": 7466 }, { "epoch": 0.089276533674482, "grad_norm": 11.39486026763916, "learning_rate": 9.908205481841673e-06, "loss": 0.7521, "step": 7467 }, { "epoch": 0.08928848981934265, "grad_norm": 2.1700611114501953, "learning_rate": 9.908168548134727e-06, "loss": 0.6667, "step": 7468 }, { "epoch": 0.0893004459642033, "grad_norm": 2.2598867416381836, "learning_rate": 9.908131607067965e-06, "loss": 0.6953, "step": 7469 }, { "epoch": 0.08931240210906395, "grad_norm": 2.3968729972839355, "learning_rate": 9.908094658641444e-06, "loss": 0.6807, "step": 7470 }, { "epoch": 0.08932435825392461, "grad_norm": 1.6049250364303589, "learning_rate": 9.908057702855214e-06, "loss": 0.6415, "step": 7471 }, { "epoch": 0.08933631439878525, "grad_norm": 1.5178108215332031, "learning_rate": 9.908020739709337e-06, "loss": 0.6597, "step": 7472 }, { "epoch": 0.08934827054364591, "grad_norm": 9.570115089416504, "learning_rate": 9.907983769203864e-06, "loss": 0.7938, "step": 7473 }, { "epoch": 0.08936022668850656, "grad_norm": 1.6750283241271973, "learning_rate": 9.907946791338852e-06, "loss": 0.5718, "step": 7474 }, { "epoch": 0.0893721828333672, "grad_norm": 1.891967535018921, "learning_rate": 9.907909806114356e-06, "loss": 0.7083, "step": 7475 }, { "epoch": 0.08938413897822786, "grad_norm": 1.7717018127441406, "learning_rate": 9.907872813530433e-06, "loss": 0.6776, "step": 7476 }, { "epoch": 0.0893960951230885, "grad_norm": 2.025522232055664, "learning_rate": 9.907835813587136e-06, "loss": 0.596, "step": 7477 }, { "epoch": 0.08940805126794917, "grad_norm": 2.2020254135131836, "learning_rate": 9.907798806284521e-06, "loss": 0.5852, "step": 7478 }, { "epoch": 0.08942000741280981, "grad_norm": 5.975053310394287, "learning_rate": 9.907761791622646e-06, "loss": 0.6208, "step": 7479 }, { "epoch": 0.08943196355767047, "grad_norm": 3.4236955642700195, "learning_rate": 9.907724769601565e-06, "loss": 0.6128, "step": 7480 }, { "epoch": 0.08944391970253111, "grad_norm": 2.393770933151245, "learning_rate": 9.907687740221333e-06, "loss": 0.7349, "step": 7481 }, { "epoch": 0.08945587584739177, "grad_norm": 2.0271103382110596, "learning_rate": 9.907650703482005e-06, "loss": 0.6746, "step": 7482 }, { "epoch": 0.08946783199225242, "grad_norm": 1.9569282531738281, "learning_rate": 9.907613659383637e-06, "loss": 0.6203, "step": 7483 }, { "epoch": 0.08947978813711308, "grad_norm": 2.3316025733947754, "learning_rate": 9.907576607926286e-06, "loss": 0.6021, "step": 7484 }, { "epoch": 0.08949174428197372, "grad_norm": 1.9998576641082764, "learning_rate": 9.907539549110006e-06, "loss": 0.5815, "step": 7485 }, { "epoch": 0.08950370042683437, "grad_norm": 1.7003811597824097, "learning_rate": 9.907502482934852e-06, "loss": 0.6339, "step": 7486 }, { "epoch": 0.08951565657169502, "grad_norm": 2.2430121898651123, "learning_rate": 9.907465409400882e-06, "loss": 0.7045, "step": 7487 }, { "epoch": 0.08952761271655567, "grad_norm": 2.2348601818084717, "learning_rate": 9.90742832850815e-06, "loss": 0.6261, "step": 7488 }, { "epoch": 0.08953956886141633, "grad_norm": 2.5340023040771484, "learning_rate": 9.907391240256713e-06, "loss": 0.6009, "step": 7489 }, { "epoch": 0.08955152500627697, "grad_norm": 2.3941614627838135, "learning_rate": 9.907354144646624e-06, "loss": 0.5869, "step": 7490 }, { "epoch": 0.08956348115113763, "grad_norm": 3.418691635131836, "learning_rate": 9.90731704167794e-06, "loss": 0.7147, "step": 7491 }, { "epoch": 0.08957543729599828, "grad_norm": 3.3294055461883545, "learning_rate": 9.907279931350715e-06, "loss": 0.6579, "step": 7492 }, { "epoch": 0.08958739344085893, "grad_norm": 4.238478183746338, "learning_rate": 9.907242813665008e-06, "loss": 0.6752, "step": 7493 }, { "epoch": 0.08959934958571958, "grad_norm": 2.4473390579223633, "learning_rate": 9.907205688620874e-06, "loss": 0.6239, "step": 7494 }, { "epoch": 0.08961130573058024, "grad_norm": 1.7575422525405884, "learning_rate": 9.907168556218366e-06, "loss": 0.613, "step": 7495 }, { "epoch": 0.08962326187544088, "grad_norm": 3.2780635356903076, "learning_rate": 9.907131416457542e-06, "loss": 0.571, "step": 7496 }, { "epoch": 0.08963521802030154, "grad_norm": 2.1515798568725586, "learning_rate": 9.907094269338456e-06, "loss": 0.6837, "step": 7497 }, { "epoch": 0.08964717416516219, "grad_norm": 1.8984956741333008, "learning_rate": 9.907057114861165e-06, "loss": 0.5814, "step": 7498 }, { "epoch": 0.08965913031002283, "grad_norm": 4.4716596603393555, "learning_rate": 9.907019953025724e-06, "loss": 0.6117, "step": 7499 }, { "epoch": 0.08967108645488349, "grad_norm": 1.6515165567398071, "learning_rate": 9.906982783832191e-06, "loss": 0.5821, "step": 7500 }, { "epoch": 0.08968304259974413, "grad_norm": 2.0786163806915283, "learning_rate": 9.90694560728062e-06, "loss": 0.6243, "step": 7501 }, { "epoch": 0.08969499874460479, "grad_norm": 3.685098886489868, "learning_rate": 9.906908423371064e-06, "loss": 0.5405, "step": 7502 }, { "epoch": 0.08970695488946544, "grad_norm": 2.4742045402526855, "learning_rate": 9.906871232103584e-06, "loss": 0.6663, "step": 7503 }, { "epoch": 0.0897189110343261, "grad_norm": 2.529130220413208, "learning_rate": 9.906834033478231e-06, "loss": 0.5594, "step": 7504 }, { "epoch": 0.08973086717918674, "grad_norm": 1.4731580018997192, "learning_rate": 9.906796827495064e-06, "loss": 0.7269, "step": 7505 }, { "epoch": 0.0897428233240474, "grad_norm": 1.7909893989562988, "learning_rate": 9.90675961415414e-06, "loss": 0.619, "step": 7506 }, { "epoch": 0.08975477946890804, "grad_norm": 1.8113791942596436, "learning_rate": 9.90672239345551e-06, "loss": 0.6934, "step": 7507 }, { "epoch": 0.0897667356137687, "grad_norm": 2.549325704574585, "learning_rate": 9.906685165399231e-06, "loss": 0.7247, "step": 7508 }, { "epoch": 0.08977869175862935, "grad_norm": 2.3477590084075928, "learning_rate": 9.906647929985362e-06, "loss": 0.668, "step": 7509 }, { "epoch": 0.08979064790348999, "grad_norm": 2.8077099323272705, "learning_rate": 9.906610687213956e-06, "loss": 0.6862, "step": 7510 }, { "epoch": 0.08980260404835065, "grad_norm": 2.2482893466949463, "learning_rate": 9.906573437085071e-06, "loss": 0.5913, "step": 7511 }, { "epoch": 0.0898145601932113, "grad_norm": 2.0195415019989014, "learning_rate": 9.906536179598762e-06, "loss": 0.6437, "step": 7512 }, { "epoch": 0.08982651633807195, "grad_norm": 1.7676666975021362, "learning_rate": 9.906498914755083e-06, "loss": 0.5606, "step": 7513 }, { "epoch": 0.0898384724829326, "grad_norm": 7.75370979309082, "learning_rate": 9.906461642554092e-06, "loss": 0.6221, "step": 7514 }, { "epoch": 0.08985042862779326, "grad_norm": 2.622263193130493, "learning_rate": 9.906424362995845e-06, "loss": 0.611, "step": 7515 }, { "epoch": 0.0898623847726539, "grad_norm": 1.9646975994110107, "learning_rate": 9.906387076080398e-06, "loss": 0.5924, "step": 7516 }, { "epoch": 0.08987434091751456, "grad_norm": 2.3012566566467285, "learning_rate": 9.906349781807803e-06, "loss": 0.5958, "step": 7517 }, { "epoch": 0.0898862970623752, "grad_norm": 10.098994255065918, "learning_rate": 9.906312480178122e-06, "loss": 0.6406, "step": 7518 }, { "epoch": 0.08989825320723586, "grad_norm": 3.092404365539551, "learning_rate": 9.906275171191407e-06, "loss": 0.7226, "step": 7519 }, { "epoch": 0.08991020935209651, "grad_norm": 2.6348650455474854, "learning_rate": 9.906237854847715e-06, "loss": 0.5907, "step": 7520 }, { "epoch": 0.08992216549695715, "grad_norm": 1.8511544466018677, "learning_rate": 9.9062005311471e-06, "loss": 0.7265, "step": 7521 }, { "epoch": 0.08993412164181781, "grad_norm": 1.6613529920578003, "learning_rate": 9.906163200089622e-06, "loss": 0.5971, "step": 7522 }, { "epoch": 0.08994607778667846, "grad_norm": 3.20646333694458, "learning_rate": 9.906125861675335e-06, "loss": 0.6079, "step": 7523 }, { "epoch": 0.08995803393153912, "grad_norm": 2.308582305908203, "learning_rate": 9.906088515904296e-06, "loss": 0.556, "step": 7524 }, { "epoch": 0.08996999007639976, "grad_norm": 1.6212726831436157, "learning_rate": 9.906051162776557e-06, "loss": 0.5298, "step": 7525 }, { "epoch": 0.08998194622126042, "grad_norm": 2.3307318687438965, "learning_rate": 9.906013802292178e-06, "loss": 0.5862, "step": 7526 }, { "epoch": 0.08999390236612106, "grad_norm": 2.455832004547119, "learning_rate": 9.905976434451214e-06, "loss": 0.6296, "step": 7527 }, { "epoch": 0.09000585851098172, "grad_norm": 1.5878013372421265, "learning_rate": 9.90593905925372e-06, "loss": 0.6183, "step": 7528 }, { "epoch": 0.09001781465584237, "grad_norm": 2.4948372840881348, "learning_rate": 9.905901676699753e-06, "loss": 0.7219, "step": 7529 }, { "epoch": 0.09002977080070303, "grad_norm": 2.4832921028137207, "learning_rate": 9.90586428678937e-06, "loss": 0.5708, "step": 7530 }, { "epoch": 0.09004172694556367, "grad_norm": 1.4493533372879028, "learning_rate": 9.905826889522627e-06, "loss": 0.6635, "step": 7531 }, { "epoch": 0.09005368309042433, "grad_norm": 1.6931837797164917, "learning_rate": 9.905789484899577e-06, "loss": 0.6136, "step": 7532 }, { "epoch": 0.09006563923528497, "grad_norm": 4.664152145385742, "learning_rate": 9.90575207292028e-06, "loss": 0.6476, "step": 7533 }, { "epoch": 0.09007759538014562, "grad_norm": 5.3199143409729, "learning_rate": 9.905714653584789e-06, "loss": 0.6758, "step": 7534 }, { "epoch": 0.09008955152500628, "grad_norm": 2.090820789337158, "learning_rate": 9.905677226893162e-06, "loss": 0.6538, "step": 7535 }, { "epoch": 0.09010150766986692, "grad_norm": 17.436599731445312, "learning_rate": 9.905639792845455e-06, "loss": 0.6943, "step": 7536 }, { "epoch": 0.09011346381472758, "grad_norm": 1.41110098361969, "learning_rate": 9.905602351441723e-06, "loss": 0.5685, "step": 7537 }, { "epoch": 0.09012541995958823, "grad_norm": 6.0029706954956055, "learning_rate": 9.905564902682023e-06, "loss": 0.6006, "step": 7538 }, { "epoch": 0.09013737610444889, "grad_norm": 2.312920093536377, "learning_rate": 9.905527446566411e-06, "loss": 0.5806, "step": 7539 }, { "epoch": 0.09014933224930953, "grad_norm": 3.9939608573913574, "learning_rate": 9.905489983094943e-06, "loss": 0.6819, "step": 7540 }, { "epoch": 0.09016128839417019, "grad_norm": 1.9687025547027588, "learning_rate": 9.905452512267675e-06, "loss": 0.6656, "step": 7541 }, { "epoch": 0.09017324453903083, "grad_norm": 3.702829122543335, "learning_rate": 9.905415034084665e-06, "loss": 0.6344, "step": 7542 }, { "epoch": 0.09018520068389149, "grad_norm": 1.945479154586792, "learning_rate": 9.905377548545967e-06, "loss": 0.5752, "step": 7543 }, { "epoch": 0.09019715682875214, "grad_norm": 2.387176036834717, "learning_rate": 9.905340055651638e-06, "loss": 0.7078, "step": 7544 }, { "epoch": 0.09020911297361278, "grad_norm": 2.7207064628601074, "learning_rate": 9.905302555401732e-06, "loss": 0.6738, "step": 7545 }, { "epoch": 0.09022106911847344, "grad_norm": 1.7337325811386108, "learning_rate": 9.905265047796312e-06, "loss": 0.6189, "step": 7546 }, { "epoch": 0.09023302526333409, "grad_norm": 1.5751268863677979, "learning_rate": 9.905227532835423e-06, "loss": 0.5949, "step": 7547 }, { "epoch": 0.09024498140819474, "grad_norm": 4.217498779296875, "learning_rate": 9.905190010519133e-06, "loss": 0.6986, "step": 7548 }, { "epoch": 0.09025693755305539, "grad_norm": 25.584680557250977, "learning_rate": 9.905152480847494e-06, "loss": 0.5254, "step": 7549 }, { "epoch": 0.09026889369791605, "grad_norm": 2.342360496520996, "learning_rate": 9.905114943820557e-06, "loss": 0.6068, "step": 7550 }, { "epoch": 0.09028084984277669, "grad_norm": 3.519679307937622, "learning_rate": 9.905077399438385e-06, "loss": 0.6799, "step": 7551 }, { "epoch": 0.09029280598763735, "grad_norm": 3.698906898498535, "learning_rate": 9.905039847701032e-06, "loss": 0.619, "step": 7552 }, { "epoch": 0.090304762132498, "grad_norm": 1.6804248094558716, "learning_rate": 9.905002288608555e-06, "loss": 0.5292, "step": 7553 }, { "epoch": 0.09031671827735865, "grad_norm": 55.24628829956055, "learning_rate": 9.90496472216101e-06, "loss": 0.6309, "step": 7554 }, { "epoch": 0.0903286744222193, "grad_norm": 1.7641997337341309, "learning_rate": 9.90492714835845e-06, "loss": 0.6438, "step": 7555 }, { "epoch": 0.09034063056707996, "grad_norm": 1.6150699853897095, "learning_rate": 9.904889567200936e-06, "loss": 0.6185, "step": 7556 }, { "epoch": 0.0903525867119406, "grad_norm": 3.150195837020874, "learning_rate": 9.904851978688523e-06, "loss": 0.5956, "step": 7557 }, { "epoch": 0.09036454285680125, "grad_norm": 2.281062602996826, "learning_rate": 9.904814382821267e-06, "loss": 0.7514, "step": 7558 }, { "epoch": 0.0903764990016619, "grad_norm": 3.5735723972320557, "learning_rate": 9.904776779599226e-06, "loss": 0.6184, "step": 7559 }, { "epoch": 0.09038845514652255, "grad_norm": 1.6593437194824219, "learning_rate": 9.904739169022452e-06, "loss": 0.5292, "step": 7560 }, { "epoch": 0.09040041129138321, "grad_norm": 1.9355851411819458, "learning_rate": 9.904701551091006e-06, "loss": 0.7172, "step": 7561 }, { "epoch": 0.09041236743624385, "grad_norm": 3.8351430892944336, "learning_rate": 9.90466392580494e-06, "loss": 0.6252, "step": 7562 }, { "epoch": 0.09042432358110451, "grad_norm": 2.4159328937530518, "learning_rate": 9.904626293164316e-06, "loss": 0.6507, "step": 7563 }, { "epoch": 0.09043627972596516, "grad_norm": 2.204228401184082, "learning_rate": 9.904588653169187e-06, "loss": 0.5961, "step": 7564 }, { "epoch": 0.09044823587082582, "grad_norm": 2.414337635040283, "learning_rate": 9.904551005819609e-06, "loss": 0.7185, "step": 7565 }, { "epoch": 0.09046019201568646, "grad_norm": 1.887087345123291, "learning_rate": 9.904513351115638e-06, "loss": 0.5773, "step": 7566 }, { "epoch": 0.09047214816054712, "grad_norm": 1.6714489459991455, "learning_rate": 9.904475689057333e-06, "loss": 0.5693, "step": 7567 }, { "epoch": 0.09048410430540776, "grad_norm": 6.091268539428711, "learning_rate": 9.90443801964475e-06, "loss": 0.7113, "step": 7568 }, { "epoch": 0.09049606045026841, "grad_norm": 10.808797836303711, "learning_rate": 9.904400342877944e-06, "loss": 0.6517, "step": 7569 }, { "epoch": 0.09050801659512907, "grad_norm": 1.5205495357513428, "learning_rate": 9.904362658756973e-06, "loss": 0.5825, "step": 7570 }, { "epoch": 0.09051997273998971, "grad_norm": 1.826220154762268, "learning_rate": 9.904324967281893e-06, "loss": 0.7166, "step": 7571 }, { "epoch": 0.09053192888485037, "grad_norm": 1.9657772779464722, "learning_rate": 9.90428726845276e-06, "loss": 0.6121, "step": 7572 }, { "epoch": 0.09054388502971102, "grad_norm": 1.9276139736175537, "learning_rate": 9.90424956226963e-06, "loss": 0.6431, "step": 7573 }, { "epoch": 0.09055584117457167, "grad_norm": 1.773061752319336, "learning_rate": 9.90421184873256e-06, "loss": 0.7006, "step": 7574 }, { "epoch": 0.09056779731943232, "grad_norm": 1.5948331356048584, "learning_rate": 9.904174127841608e-06, "loss": 0.6466, "step": 7575 }, { "epoch": 0.09057975346429298, "grad_norm": 2.0507869720458984, "learning_rate": 9.904136399596828e-06, "loss": 0.6046, "step": 7576 }, { "epoch": 0.09059170960915362, "grad_norm": 2.1417672634124756, "learning_rate": 9.90409866399828e-06, "loss": 0.5204, "step": 7577 }, { "epoch": 0.09060366575401428, "grad_norm": 8.525518417358398, "learning_rate": 9.904060921046018e-06, "loss": 0.5877, "step": 7578 }, { "epoch": 0.09061562189887493, "grad_norm": 2.024395704269409, "learning_rate": 9.904023170740099e-06, "loss": 0.5887, "step": 7579 }, { "epoch": 0.09062757804373557, "grad_norm": 2.659195899963379, "learning_rate": 9.90398541308058e-06, "loss": 0.6889, "step": 7580 }, { "epoch": 0.09063953418859623, "grad_norm": 2.072808265686035, "learning_rate": 9.903947648067517e-06, "loss": 0.6731, "step": 7581 }, { "epoch": 0.09065149033345687, "grad_norm": 1.74069344997406, "learning_rate": 9.903909875700969e-06, "loss": 0.5811, "step": 7582 }, { "epoch": 0.09066344647831753, "grad_norm": 2.0429182052612305, "learning_rate": 9.903872095980988e-06, "loss": 0.6126, "step": 7583 }, { "epoch": 0.09067540262317818, "grad_norm": 2.2590413093566895, "learning_rate": 9.903834308907633e-06, "loss": 0.6356, "step": 7584 }, { "epoch": 0.09068735876803884, "grad_norm": 2.185730457305908, "learning_rate": 9.903796514480964e-06, "loss": 0.6658, "step": 7585 }, { "epoch": 0.09069931491289948, "grad_norm": 2.8915462493896484, "learning_rate": 9.903758712701033e-06, "loss": 0.586, "step": 7586 }, { "epoch": 0.09071127105776014, "grad_norm": 2.4965240955352783, "learning_rate": 9.903720903567898e-06, "loss": 0.659, "step": 7587 }, { "epoch": 0.09072322720262078, "grad_norm": 3.446505546569824, "learning_rate": 9.903683087081617e-06, "loss": 0.6422, "step": 7588 }, { "epoch": 0.09073518334748144, "grad_norm": 2.2998828887939453, "learning_rate": 9.903645263242245e-06, "loss": 0.6014, "step": 7589 }, { "epoch": 0.09074713949234209, "grad_norm": 4.571174621582031, "learning_rate": 9.90360743204984e-06, "loss": 0.7282, "step": 7590 }, { "epoch": 0.09075909563720275, "grad_norm": 1.7862603664398193, "learning_rate": 9.903569593504458e-06, "loss": 0.6324, "step": 7591 }, { "epoch": 0.09077105178206339, "grad_norm": 3.2332701683044434, "learning_rate": 9.903531747606157e-06, "loss": 0.6724, "step": 7592 }, { "epoch": 0.09078300792692404, "grad_norm": 1.9495222568511963, "learning_rate": 9.903493894354991e-06, "loss": 0.652, "step": 7593 }, { "epoch": 0.0907949640717847, "grad_norm": 2.5774643421173096, "learning_rate": 9.90345603375102e-06, "loss": 0.5918, "step": 7594 }, { "epoch": 0.09080692021664534, "grad_norm": 3.121248960494995, "learning_rate": 9.903418165794297e-06, "loss": 0.5827, "step": 7595 }, { "epoch": 0.090818876361506, "grad_norm": 3.017070770263672, "learning_rate": 9.903380290484883e-06, "loss": 0.6595, "step": 7596 }, { "epoch": 0.09083083250636664, "grad_norm": 4.209043025970459, "learning_rate": 9.903342407822832e-06, "loss": 0.579, "step": 7597 }, { "epoch": 0.0908427886512273, "grad_norm": 2.8958237171173096, "learning_rate": 9.903304517808201e-06, "loss": 0.5933, "step": 7598 }, { "epoch": 0.09085474479608795, "grad_norm": 1.9366310834884644, "learning_rate": 9.90326662044105e-06, "loss": 0.6986, "step": 7599 }, { "epoch": 0.0908667009409486, "grad_norm": 2.6557974815368652, "learning_rate": 9.903228715721431e-06, "loss": 0.7195, "step": 7600 }, { "epoch": 0.09087865708580925, "grad_norm": 1.9451587200164795, "learning_rate": 9.903190803649405e-06, "loss": 0.5665, "step": 7601 }, { "epoch": 0.09089061323066991, "grad_norm": 2.2968592643737793, "learning_rate": 9.903152884225025e-06, "loss": 0.6576, "step": 7602 }, { "epoch": 0.09090256937553055, "grad_norm": 2.221741199493408, "learning_rate": 9.90311495744835e-06, "loss": 0.6623, "step": 7603 }, { "epoch": 0.0909145255203912, "grad_norm": 2.4539382457733154, "learning_rate": 9.903077023319438e-06, "loss": 0.6108, "step": 7604 }, { "epoch": 0.09092648166525186, "grad_norm": 1.91224205493927, "learning_rate": 9.903039081838343e-06, "loss": 0.636, "step": 7605 }, { "epoch": 0.0909384378101125, "grad_norm": 1.9592385292053223, "learning_rate": 9.903001133005126e-06, "loss": 0.5541, "step": 7606 }, { "epoch": 0.09095039395497316, "grad_norm": 5.3636603355407715, "learning_rate": 9.902963176819839e-06, "loss": 0.7019, "step": 7607 }, { "epoch": 0.0909623500998338, "grad_norm": 2.3378777503967285, "learning_rate": 9.902925213282542e-06, "loss": 0.637, "step": 7608 }, { "epoch": 0.09097430624469446, "grad_norm": 2.25630259513855, "learning_rate": 9.902887242393292e-06, "loss": 0.68, "step": 7609 }, { "epoch": 0.09098626238955511, "grad_norm": 5.336178302764893, "learning_rate": 9.902849264152143e-06, "loss": 0.6263, "step": 7610 }, { "epoch": 0.09099821853441577, "grad_norm": 2.8736352920532227, "learning_rate": 9.902811278559156e-06, "loss": 0.6485, "step": 7611 }, { "epoch": 0.09101017467927641, "grad_norm": 2.5886008739471436, "learning_rate": 9.902773285614386e-06, "loss": 0.6524, "step": 7612 }, { "epoch": 0.09102213082413707, "grad_norm": 4.609886169433594, "learning_rate": 9.902735285317888e-06, "loss": 0.6657, "step": 7613 }, { "epoch": 0.09103408696899772, "grad_norm": 4.299375057220459, "learning_rate": 9.902697277669724e-06, "loss": 0.7107, "step": 7614 }, { "epoch": 0.09104604311385837, "grad_norm": 2.999009132385254, "learning_rate": 9.902659262669945e-06, "loss": 0.6677, "step": 7615 }, { "epoch": 0.09105799925871902, "grad_norm": 2.7311882972717285, "learning_rate": 9.902621240318612e-06, "loss": 0.6853, "step": 7616 }, { "epoch": 0.09106995540357966, "grad_norm": 1.9346213340759277, "learning_rate": 9.902583210615783e-06, "loss": 0.6183, "step": 7617 }, { "epoch": 0.09108191154844032, "grad_norm": 4.46799898147583, "learning_rate": 9.902545173561512e-06, "loss": 0.6469, "step": 7618 }, { "epoch": 0.09109386769330097, "grad_norm": 1.9823713302612305, "learning_rate": 9.902507129155856e-06, "loss": 0.6153, "step": 7619 }, { "epoch": 0.09110582383816163, "grad_norm": 2.163529396057129, "learning_rate": 9.902469077398875e-06, "loss": 0.7532, "step": 7620 }, { "epoch": 0.09111777998302227, "grad_norm": 1.6236572265625, "learning_rate": 9.902431018290623e-06, "loss": 0.7312, "step": 7621 }, { "epoch": 0.09112973612788293, "grad_norm": 5.248617172241211, "learning_rate": 9.902392951831158e-06, "loss": 0.6663, "step": 7622 }, { "epoch": 0.09114169227274357, "grad_norm": 2.9139344692230225, "learning_rate": 9.902354878020537e-06, "loss": 0.7139, "step": 7623 }, { "epoch": 0.09115364841760423, "grad_norm": 3.045245885848999, "learning_rate": 9.902316796858819e-06, "loss": 0.6072, "step": 7624 }, { "epoch": 0.09116560456246488, "grad_norm": 4.073487758636475, "learning_rate": 9.902278708346057e-06, "loss": 0.6706, "step": 7625 }, { "epoch": 0.09117756070732554, "grad_norm": 2.724938154220581, "learning_rate": 9.902240612482313e-06, "loss": 0.576, "step": 7626 }, { "epoch": 0.09118951685218618, "grad_norm": 1.7783451080322266, "learning_rate": 9.902202509267642e-06, "loss": 0.6757, "step": 7627 }, { "epoch": 0.09120147299704683, "grad_norm": 5.983331680297852, "learning_rate": 9.9021643987021e-06, "loss": 0.5689, "step": 7628 }, { "epoch": 0.09121342914190748, "grad_norm": 2.2774436473846436, "learning_rate": 9.902126280785744e-06, "loss": 0.5766, "step": 7629 }, { "epoch": 0.09122538528676813, "grad_norm": 7.370788097381592, "learning_rate": 9.902088155518634e-06, "loss": 0.6539, "step": 7630 }, { "epoch": 0.09123734143162879, "grad_norm": 3.8710033893585205, "learning_rate": 9.902050022900824e-06, "loss": 0.6013, "step": 7631 }, { "epoch": 0.09124929757648943, "grad_norm": 2.118142604827881, "learning_rate": 9.902011882932375e-06, "loss": 0.6407, "step": 7632 }, { "epoch": 0.09126125372135009, "grad_norm": 2.532271385192871, "learning_rate": 9.90197373561334e-06, "loss": 0.648, "step": 7633 }, { "epoch": 0.09127320986621074, "grad_norm": 2.468191623687744, "learning_rate": 9.901935580943776e-06, "loss": 0.6066, "step": 7634 }, { "epoch": 0.0912851660110714, "grad_norm": 2.52457857131958, "learning_rate": 9.901897418923745e-06, "loss": 0.7443, "step": 7635 }, { "epoch": 0.09129712215593204, "grad_norm": 1.5770715475082397, "learning_rate": 9.901859249553301e-06, "loss": 0.6377, "step": 7636 }, { "epoch": 0.0913090783007927, "grad_norm": 2.324695587158203, "learning_rate": 9.9018210728325e-06, "loss": 0.5855, "step": 7637 }, { "epoch": 0.09132103444565334, "grad_norm": 2.0710787773132324, "learning_rate": 9.901782888761402e-06, "loss": 0.5602, "step": 7638 }, { "epoch": 0.091332990590514, "grad_norm": 2.5311660766601562, "learning_rate": 9.901744697340063e-06, "loss": 0.6612, "step": 7639 }, { "epoch": 0.09134494673537465, "grad_norm": 5.5482683181762695, "learning_rate": 9.901706498568541e-06, "loss": 0.7041, "step": 7640 }, { "epoch": 0.09135690288023529, "grad_norm": 102.42597198486328, "learning_rate": 9.901668292446891e-06, "loss": 0.7917, "step": 7641 }, { "epoch": 0.09136885902509595, "grad_norm": 3.658595561981201, "learning_rate": 9.901630078975176e-06, "loss": 0.6432, "step": 7642 }, { "epoch": 0.0913808151699566, "grad_norm": 2.1661412715911865, "learning_rate": 9.901591858153445e-06, "loss": 0.5843, "step": 7643 }, { "epoch": 0.09139277131481725, "grad_norm": 2.3256475925445557, "learning_rate": 9.901553629981762e-06, "loss": 0.6127, "step": 7644 }, { "epoch": 0.0914047274596779, "grad_norm": 2.120335340499878, "learning_rate": 9.90151539446018e-06, "loss": 0.6518, "step": 7645 }, { "epoch": 0.09141668360453856, "grad_norm": 4.747528076171875, "learning_rate": 9.901477151588759e-06, "loss": 0.6069, "step": 7646 }, { "epoch": 0.0914286397493992, "grad_norm": 5.230048656463623, "learning_rate": 9.901438901367557e-06, "loss": 0.6478, "step": 7647 }, { "epoch": 0.09144059589425986, "grad_norm": 1.9570043087005615, "learning_rate": 9.901400643796627e-06, "loss": 0.6266, "step": 7648 }, { "epoch": 0.0914525520391205, "grad_norm": 1.7451815605163574, "learning_rate": 9.901362378876032e-06, "loss": 0.5959, "step": 7649 }, { "epoch": 0.09146450818398116, "grad_norm": 3.552478075027466, "learning_rate": 9.901324106605825e-06, "loss": 0.5612, "step": 7650 }, { "epoch": 0.09147646432884181, "grad_norm": 2.2528512477874756, "learning_rate": 9.901285826986066e-06, "loss": 0.6455, "step": 7651 }, { "epoch": 0.09148842047370245, "grad_norm": 1.7179754972457886, "learning_rate": 9.901247540016812e-06, "loss": 0.7283, "step": 7652 }, { "epoch": 0.09150037661856311, "grad_norm": 3.709017276763916, "learning_rate": 9.901209245698117e-06, "loss": 0.6165, "step": 7653 }, { "epoch": 0.09151233276342376, "grad_norm": 2.670349597930908, "learning_rate": 9.901170944030045e-06, "loss": 0.6082, "step": 7654 }, { "epoch": 0.09152428890828442, "grad_norm": 3.164015769958496, "learning_rate": 9.901132635012649e-06, "loss": 0.6393, "step": 7655 }, { "epoch": 0.09153624505314506, "grad_norm": 2.3761613368988037, "learning_rate": 9.901094318645984e-06, "loss": 0.6889, "step": 7656 }, { "epoch": 0.09154820119800572, "grad_norm": 1.6775412559509277, "learning_rate": 9.901055994930115e-06, "loss": 0.6307, "step": 7657 }, { "epoch": 0.09156015734286636, "grad_norm": 20.974546432495117, "learning_rate": 9.901017663865093e-06, "loss": 0.6857, "step": 7658 }, { "epoch": 0.09157211348772702, "grad_norm": 2.5648560523986816, "learning_rate": 9.900979325450978e-06, "loss": 0.6912, "step": 7659 }, { "epoch": 0.09158406963258767, "grad_norm": 2.0101449489593506, "learning_rate": 9.900940979687827e-06, "loss": 0.613, "step": 7660 }, { "epoch": 0.09159602577744833, "grad_norm": 1.7239313125610352, "learning_rate": 9.900902626575696e-06, "loss": 0.5993, "step": 7661 }, { "epoch": 0.09160798192230897, "grad_norm": 1.278254747390747, "learning_rate": 9.900864266114647e-06, "loss": 0.597, "step": 7662 }, { "epoch": 0.09161993806716962, "grad_norm": 2.3014864921569824, "learning_rate": 9.900825898304733e-06, "loss": 0.7094, "step": 7663 }, { "epoch": 0.09163189421203027, "grad_norm": 1.8244414329528809, "learning_rate": 9.900787523146013e-06, "loss": 0.5431, "step": 7664 }, { "epoch": 0.09164385035689092, "grad_norm": 3.5028743743896484, "learning_rate": 9.900749140638546e-06, "loss": 0.5569, "step": 7665 }, { "epoch": 0.09165580650175158, "grad_norm": 1.5809049606323242, "learning_rate": 9.900710750782387e-06, "loss": 0.6677, "step": 7666 }, { "epoch": 0.09166776264661222, "grad_norm": 1.8045250177383423, "learning_rate": 9.900672353577593e-06, "loss": 0.6494, "step": 7667 }, { "epoch": 0.09167971879147288, "grad_norm": 2.1960232257843018, "learning_rate": 9.900633949024227e-06, "loss": 0.734, "step": 7668 }, { "epoch": 0.09169167493633353, "grad_norm": 3.6015422344207764, "learning_rate": 9.900595537122341e-06, "loss": 0.6519, "step": 7669 }, { "epoch": 0.09170363108119418, "grad_norm": 2.0976972579956055, "learning_rate": 9.900557117871996e-06, "loss": 0.6874, "step": 7670 }, { "epoch": 0.09171558722605483, "grad_norm": 2.995205879211426, "learning_rate": 9.900518691273247e-06, "loss": 0.6182, "step": 7671 }, { "epoch": 0.09172754337091549, "grad_norm": 2.1146581172943115, "learning_rate": 9.900480257326153e-06, "loss": 0.6686, "step": 7672 }, { "epoch": 0.09173949951577613, "grad_norm": 3.0301496982574463, "learning_rate": 9.900441816030772e-06, "loss": 0.6599, "step": 7673 }, { "epoch": 0.09175145566063679, "grad_norm": 3.4927492141723633, "learning_rate": 9.900403367387159e-06, "loss": 0.6415, "step": 7674 }, { "epoch": 0.09176341180549744, "grad_norm": 1.9438222646713257, "learning_rate": 9.900364911395377e-06, "loss": 0.6474, "step": 7675 }, { "epoch": 0.09177536795035808, "grad_norm": 2.0113041400909424, "learning_rate": 9.900326448055477e-06, "loss": 0.5265, "step": 7676 }, { "epoch": 0.09178732409521874, "grad_norm": 2.1139028072357178, "learning_rate": 9.900287977367521e-06, "loss": 0.5961, "step": 7677 }, { "epoch": 0.09179928024007938, "grad_norm": 2.7748098373413086, "learning_rate": 9.900249499331567e-06, "loss": 0.6698, "step": 7678 }, { "epoch": 0.09181123638494004, "grad_norm": 2.129774332046509, "learning_rate": 9.900211013947672e-06, "loss": 0.6084, "step": 7679 }, { "epoch": 0.09182319252980069, "grad_norm": 6.0184831619262695, "learning_rate": 9.900172521215892e-06, "loss": 0.5735, "step": 7680 }, { "epoch": 0.09183514867466135, "grad_norm": 2.0469448566436768, "learning_rate": 9.900134021136286e-06, "loss": 0.6514, "step": 7681 }, { "epoch": 0.09184710481952199, "grad_norm": 2.502702474594116, "learning_rate": 9.900095513708911e-06, "loss": 0.6647, "step": 7682 }, { "epoch": 0.09185906096438265, "grad_norm": 3.3744399547576904, "learning_rate": 9.900056998933825e-06, "loss": 0.6566, "step": 7683 }, { "epoch": 0.0918710171092433, "grad_norm": 3.3047707080841064, "learning_rate": 9.900018476811088e-06, "loss": 0.6705, "step": 7684 }, { "epoch": 0.09188297325410395, "grad_norm": 1.901578426361084, "learning_rate": 9.899979947340755e-06, "loss": 0.6562, "step": 7685 }, { "epoch": 0.0918949293989646, "grad_norm": 3.362494945526123, "learning_rate": 9.899941410522884e-06, "loss": 0.5699, "step": 7686 }, { "epoch": 0.09190688554382524, "grad_norm": 13.683476448059082, "learning_rate": 9.899902866357535e-06, "loss": 0.5697, "step": 7687 }, { "epoch": 0.0919188416886859, "grad_norm": 3.839733123779297, "learning_rate": 9.899864314844762e-06, "loss": 0.6065, "step": 7688 }, { "epoch": 0.09193079783354655, "grad_norm": 8.15191650390625, "learning_rate": 9.899825755984627e-06, "loss": 0.7045, "step": 7689 }, { "epoch": 0.0919427539784072, "grad_norm": 1.708935260772705, "learning_rate": 9.899787189777185e-06, "loss": 0.601, "step": 7690 }, { "epoch": 0.09195471012326785, "grad_norm": 1.9922049045562744, "learning_rate": 9.899748616222496e-06, "loss": 0.6046, "step": 7691 }, { "epoch": 0.09196666626812851, "grad_norm": 2.084024667739868, "learning_rate": 9.899710035320615e-06, "loss": 0.6955, "step": 7692 }, { "epoch": 0.09197862241298915, "grad_norm": 2.9062957763671875, "learning_rate": 9.899671447071601e-06, "loss": 0.5897, "step": 7693 }, { "epoch": 0.09199057855784981, "grad_norm": 2.632495164871216, "learning_rate": 9.899632851475513e-06, "loss": 0.7178, "step": 7694 }, { "epoch": 0.09200253470271046, "grad_norm": 2.0650835037231445, "learning_rate": 9.899594248532409e-06, "loss": 0.5679, "step": 7695 }, { "epoch": 0.09201449084757111, "grad_norm": 2.314631938934326, "learning_rate": 9.899555638242344e-06, "loss": 0.5974, "step": 7696 }, { "epoch": 0.09202644699243176, "grad_norm": 2.19126558303833, "learning_rate": 9.89951702060538e-06, "loss": 0.7429, "step": 7697 }, { "epoch": 0.09203840313729242, "grad_norm": 2.637687921524048, "learning_rate": 9.899478395621572e-06, "loss": 0.6946, "step": 7698 }, { "epoch": 0.09205035928215306, "grad_norm": 2.3120434284210205, "learning_rate": 9.89943976329098e-06, "loss": 0.6163, "step": 7699 }, { "epoch": 0.09206231542701371, "grad_norm": 3.6410534381866455, "learning_rate": 9.899401123613657e-06, "loss": 0.5999, "step": 7700 }, { "epoch": 0.09207427157187437, "grad_norm": 1.742085576057434, "learning_rate": 9.899362476589667e-06, "loss": 0.6451, "step": 7701 }, { "epoch": 0.09208622771673501, "grad_norm": 3.487410306930542, "learning_rate": 9.899323822219066e-06, "loss": 0.5651, "step": 7702 }, { "epoch": 0.09209818386159567, "grad_norm": 3.279520273208618, "learning_rate": 9.89928516050191e-06, "loss": 0.6732, "step": 7703 }, { "epoch": 0.09211014000645631, "grad_norm": 4.211667060852051, "learning_rate": 9.899246491438261e-06, "loss": 0.6171, "step": 7704 }, { "epoch": 0.09212209615131697, "grad_norm": 2.44209623336792, "learning_rate": 9.89920781502817e-06, "loss": 0.7506, "step": 7705 }, { "epoch": 0.09213405229617762, "grad_norm": 1.4557033777236938, "learning_rate": 9.899169131271704e-06, "loss": 0.6547, "step": 7706 }, { "epoch": 0.09214600844103828, "grad_norm": 1.8404210805892944, "learning_rate": 9.899130440168915e-06, "loss": 0.5016, "step": 7707 }, { "epoch": 0.09215796458589892, "grad_norm": 3.6557376384735107, "learning_rate": 9.899091741719862e-06, "loss": 0.5977, "step": 7708 }, { "epoch": 0.09216992073075958, "grad_norm": 1.9674516916275024, "learning_rate": 9.899053035924603e-06, "loss": 0.61, "step": 7709 }, { "epoch": 0.09218187687562023, "grad_norm": 1.978095531463623, "learning_rate": 9.899014322783199e-06, "loss": 0.7153, "step": 7710 }, { "epoch": 0.09219383302048087, "grad_norm": 2.0285849571228027, "learning_rate": 9.898975602295704e-06, "loss": 0.6162, "step": 7711 }, { "epoch": 0.09220578916534153, "grad_norm": 4.68703556060791, "learning_rate": 9.898936874462178e-06, "loss": 0.6234, "step": 7712 }, { "epoch": 0.09221774531020217, "grad_norm": 2.9400484561920166, "learning_rate": 9.898898139282678e-06, "loss": 0.6798, "step": 7713 }, { "epoch": 0.09222970145506283, "grad_norm": 2.9696431159973145, "learning_rate": 9.898859396757263e-06, "loss": 0.6011, "step": 7714 }, { "epoch": 0.09224165759992348, "grad_norm": 3.3025176525115967, "learning_rate": 9.898820646885991e-06, "loss": 0.7197, "step": 7715 }, { "epoch": 0.09225361374478414, "grad_norm": 2.189232110977173, "learning_rate": 9.89878188966892e-06, "loss": 0.6262, "step": 7716 }, { "epoch": 0.09226556988964478, "grad_norm": 2.4798123836517334, "learning_rate": 9.898743125106109e-06, "loss": 0.5996, "step": 7717 }, { "epoch": 0.09227752603450544, "grad_norm": 1.6780868768692017, "learning_rate": 9.898704353197614e-06, "loss": 0.6755, "step": 7718 }, { "epoch": 0.09228948217936608, "grad_norm": 1.8862342834472656, "learning_rate": 9.898665573943496e-06, "loss": 0.6613, "step": 7719 }, { "epoch": 0.09230143832422674, "grad_norm": 6.2137322425842285, "learning_rate": 9.898626787343811e-06, "loss": 0.6425, "step": 7720 }, { "epoch": 0.09231339446908739, "grad_norm": 2.230727434158325, "learning_rate": 9.898587993398616e-06, "loss": 0.6291, "step": 7721 }, { "epoch": 0.09232535061394803, "grad_norm": 2.3623054027557373, "learning_rate": 9.898549192107973e-06, "loss": 0.693, "step": 7722 }, { "epoch": 0.09233730675880869, "grad_norm": 4.547327995300293, "learning_rate": 9.898510383471937e-06, "loss": 0.5516, "step": 7723 }, { "epoch": 0.09234926290366934, "grad_norm": 2.469712972640991, "learning_rate": 9.898471567490566e-06, "loss": 0.7144, "step": 7724 }, { "epoch": 0.09236121904853, "grad_norm": 1.61489737033844, "learning_rate": 9.898432744163922e-06, "loss": 0.6431, "step": 7725 }, { "epoch": 0.09237317519339064, "grad_norm": 1.6535664796829224, "learning_rate": 9.89839391349206e-06, "loss": 0.6652, "step": 7726 }, { "epoch": 0.0923851313382513, "grad_norm": 4.839949607849121, "learning_rate": 9.898355075475039e-06, "loss": 0.6513, "step": 7727 }, { "epoch": 0.09239708748311194, "grad_norm": 2.0558149814605713, "learning_rate": 9.898316230112916e-06, "loss": 0.5831, "step": 7728 }, { "epoch": 0.0924090436279726, "grad_norm": 6.268530368804932, "learning_rate": 9.898277377405752e-06, "loss": 0.7181, "step": 7729 }, { "epoch": 0.09242099977283325, "grad_norm": 1.5720202922821045, "learning_rate": 9.898238517353603e-06, "loss": 0.7599, "step": 7730 }, { "epoch": 0.0924329559176939, "grad_norm": 2.532158136367798, "learning_rate": 9.898199649956527e-06, "loss": 0.5879, "step": 7731 }, { "epoch": 0.09244491206255455, "grad_norm": 2.098667621612549, "learning_rate": 9.898160775214584e-06, "loss": 0.6156, "step": 7732 }, { "epoch": 0.09245686820741521, "grad_norm": 7.075351715087891, "learning_rate": 9.898121893127832e-06, "loss": 0.6132, "step": 7733 }, { "epoch": 0.09246882435227585, "grad_norm": 1.5373886823654175, "learning_rate": 9.898083003696328e-06, "loss": 0.5602, "step": 7734 }, { "epoch": 0.0924807804971365, "grad_norm": 5.454044342041016, "learning_rate": 9.89804410692013e-06, "loss": 0.6034, "step": 7735 }, { "epoch": 0.09249273664199716, "grad_norm": 2.9604737758636475, "learning_rate": 9.8980052027993e-06, "loss": 0.5956, "step": 7736 }, { "epoch": 0.0925046927868578, "grad_norm": 3.352515697479248, "learning_rate": 9.897966291333893e-06, "loss": 0.7235, "step": 7737 }, { "epoch": 0.09251664893171846, "grad_norm": 2.0192248821258545, "learning_rate": 9.897927372523968e-06, "loss": 0.577, "step": 7738 }, { "epoch": 0.0925286050765791, "grad_norm": 2.8665847778320312, "learning_rate": 9.897888446369583e-06, "loss": 0.6508, "step": 7739 }, { "epoch": 0.09254056122143976, "grad_norm": 1.4075918197631836, "learning_rate": 9.897849512870797e-06, "loss": 0.6573, "step": 7740 }, { "epoch": 0.09255251736630041, "grad_norm": 3.009925365447998, "learning_rate": 9.89781057202767e-06, "loss": 0.6008, "step": 7741 }, { "epoch": 0.09256447351116107, "grad_norm": 1.868281364440918, "learning_rate": 9.897771623840256e-06, "loss": 0.6075, "step": 7742 }, { "epoch": 0.09257642965602171, "grad_norm": 2.733433246612549, "learning_rate": 9.897732668308617e-06, "loss": 0.6326, "step": 7743 }, { "epoch": 0.09258838580088237, "grad_norm": 2.0672755241394043, "learning_rate": 9.897693705432809e-06, "loss": 0.6129, "step": 7744 }, { "epoch": 0.09260034194574301, "grad_norm": 8.268135070800781, "learning_rate": 9.897654735212893e-06, "loss": 0.5818, "step": 7745 }, { "epoch": 0.09261229809060366, "grad_norm": 1.5412042140960693, "learning_rate": 9.897615757648929e-06, "loss": 0.6982, "step": 7746 }, { "epoch": 0.09262425423546432, "grad_norm": 2.331069231033325, "learning_rate": 9.897576772740969e-06, "loss": 0.6594, "step": 7747 }, { "epoch": 0.09263621038032496, "grad_norm": 4.071032524108887, "learning_rate": 9.897537780489078e-06, "loss": 0.6575, "step": 7748 }, { "epoch": 0.09264816652518562, "grad_norm": 1.9711108207702637, "learning_rate": 9.897498780893309e-06, "loss": 0.6425, "step": 7749 }, { "epoch": 0.09266012267004627, "grad_norm": 2.7668378353118896, "learning_rate": 9.897459773953725e-06, "loss": 0.642, "step": 7750 }, { "epoch": 0.09267207881490692, "grad_norm": 2.9728569984436035, "learning_rate": 9.897420759670382e-06, "loss": 0.6298, "step": 7751 }, { "epoch": 0.09268403495976757, "grad_norm": 3.330298900604248, "learning_rate": 9.897381738043338e-06, "loss": 0.5653, "step": 7752 }, { "epoch": 0.09269599110462823, "grad_norm": 1.6737549304962158, "learning_rate": 9.897342709072655e-06, "loss": 0.5773, "step": 7753 }, { "epoch": 0.09270794724948887, "grad_norm": 3.9944300651550293, "learning_rate": 9.897303672758389e-06, "loss": 0.5636, "step": 7754 }, { "epoch": 0.09271990339434953, "grad_norm": 2.6370723247528076, "learning_rate": 9.897264629100598e-06, "loss": 0.6934, "step": 7755 }, { "epoch": 0.09273185953921018, "grad_norm": 2.896263360977173, "learning_rate": 9.89722557809934e-06, "loss": 0.6442, "step": 7756 }, { "epoch": 0.09274381568407083, "grad_norm": 1.9368464946746826, "learning_rate": 9.897186519754677e-06, "loss": 0.5644, "step": 7757 }, { "epoch": 0.09275577182893148, "grad_norm": 2.7363667488098145, "learning_rate": 9.897147454066664e-06, "loss": 0.6185, "step": 7758 }, { "epoch": 0.09276772797379212, "grad_norm": 2.1397950649261475, "learning_rate": 9.897108381035361e-06, "loss": 0.65, "step": 7759 }, { "epoch": 0.09277968411865278, "grad_norm": 2.0721096992492676, "learning_rate": 9.897069300660829e-06, "loss": 0.6744, "step": 7760 }, { "epoch": 0.09279164026351343, "grad_norm": 2.4665310382843018, "learning_rate": 9.897030212943121e-06, "loss": 0.6349, "step": 7761 }, { "epoch": 0.09280359640837409, "grad_norm": 2.0410637855529785, "learning_rate": 9.8969911178823e-06, "loss": 0.6501, "step": 7762 }, { "epoch": 0.09281555255323473, "grad_norm": 1.4894267320632935, "learning_rate": 9.896952015478423e-06, "loss": 0.4814, "step": 7763 }, { "epoch": 0.09282750869809539, "grad_norm": 2.8592004776000977, "learning_rate": 9.89691290573155e-06, "loss": 0.6102, "step": 7764 }, { "epoch": 0.09283946484295603, "grad_norm": 2.147301197052002, "learning_rate": 9.89687378864174e-06, "loss": 0.698, "step": 7765 }, { "epoch": 0.0928514209878167, "grad_norm": 3.1719956398010254, "learning_rate": 9.896834664209047e-06, "loss": 0.6245, "step": 7766 }, { "epoch": 0.09286337713267734, "grad_norm": 3.2760941982269287, "learning_rate": 9.896795532433534e-06, "loss": 0.6884, "step": 7767 }, { "epoch": 0.092875333277538, "grad_norm": 1.8268964290618896, "learning_rate": 9.89675639331526e-06, "loss": 0.7709, "step": 7768 }, { "epoch": 0.09288728942239864, "grad_norm": 3.230851650238037, "learning_rate": 9.896717246854282e-06, "loss": 0.5843, "step": 7769 }, { "epoch": 0.09289924556725929, "grad_norm": 2.157916307449341, "learning_rate": 9.896678093050658e-06, "loss": 0.5614, "step": 7770 }, { "epoch": 0.09291120171211995, "grad_norm": 4.445187568664551, "learning_rate": 9.89663893190445e-06, "loss": 0.6848, "step": 7771 }, { "epoch": 0.09292315785698059, "grad_norm": 1.4257919788360596, "learning_rate": 9.896599763415711e-06, "loss": 0.6203, "step": 7772 }, { "epoch": 0.09293511400184125, "grad_norm": 2.7305562496185303, "learning_rate": 9.896560587584506e-06, "loss": 0.6926, "step": 7773 }, { "epoch": 0.0929470701467019, "grad_norm": 5.106887340545654, "learning_rate": 9.89652140441089e-06, "loss": 0.644, "step": 7774 }, { "epoch": 0.09295902629156255, "grad_norm": 3.011841058731079, "learning_rate": 9.896482213894923e-06, "loss": 0.6415, "step": 7775 }, { "epoch": 0.0929709824364232, "grad_norm": 2.0439038276672363, "learning_rate": 9.896443016036665e-06, "loss": 0.6563, "step": 7776 }, { "epoch": 0.09298293858128386, "grad_norm": 17.9150390625, "learning_rate": 9.89640381083617e-06, "loss": 0.659, "step": 7777 }, { "epoch": 0.0929948947261445, "grad_norm": 2.0225448608398438, "learning_rate": 9.896364598293503e-06, "loss": 0.6326, "step": 7778 }, { "epoch": 0.09300685087100516, "grad_norm": 3.3732471466064453, "learning_rate": 9.896325378408719e-06, "loss": 0.6194, "step": 7779 }, { "epoch": 0.0930188070158658, "grad_norm": 1.5796678066253662, "learning_rate": 9.896286151181878e-06, "loss": 0.5941, "step": 7780 }, { "epoch": 0.09303076316072645, "grad_norm": 2.373258352279663, "learning_rate": 9.896246916613037e-06, "loss": 0.6243, "step": 7781 }, { "epoch": 0.09304271930558711, "grad_norm": 3.1704370975494385, "learning_rate": 9.896207674702257e-06, "loss": 0.637, "step": 7782 }, { "epoch": 0.09305467545044775, "grad_norm": 2.078186511993408, "learning_rate": 9.896168425449597e-06, "loss": 0.6148, "step": 7783 }, { "epoch": 0.09306663159530841, "grad_norm": 1.8883346319198608, "learning_rate": 9.896129168855114e-06, "loss": 0.6258, "step": 7784 }, { "epoch": 0.09307858774016906, "grad_norm": 2.1290690898895264, "learning_rate": 9.896089904918869e-06, "loss": 0.6627, "step": 7785 }, { "epoch": 0.09309054388502971, "grad_norm": 1.9491991996765137, "learning_rate": 9.89605063364092e-06, "loss": 0.6023, "step": 7786 }, { "epoch": 0.09310250002989036, "grad_norm": 1.5322842597961426, "learning_rate": 9.896011355021324e-06, "loss": 0.5863, "step": 7787 }, { "epoch": 0.09311445617475102, "grad_norm": 2.1354167461395264, "learning_rate": 9.895972069060143e-06, "loss": 0.6293, "step": 7788 }, { "epoch": 0.09312641231961166, "grad_norm": 3.2150533199310303, "learning_rate": 9.895932775757433e-06, "loss": 0.5817, "step": 7789 }, { "epoch": 0.09313836846447232, "grad_norm": 3.153437376022339, "learning_rate": 9.895893475113256e-06, "loss": 0.6306, "step": 7790 }, { "epoch": 0.09315032460933297, "grad_norm": 1.7278707027435303, "learning_rate": 9.895854167127668e-06, "loss": 0.6129, "step": 7791 }, { "epoch": 0.09316228075419362, "grad_norm": 1.6468888521194458, "learning_rate": 9.89581485180073e-06, "loss": 0.6139, "step": 7792 }, { "epoch": 0.09317423689905427, "grad_norm": 3.085078477859497, "learning_rate": 9.895775529132501e-06, "loss": 0.6174, "step": 7793 }, { "epoch": 0.09318619304391491, "grad_norm": 2.2479538917541504, "learning_rate": 9.895736199123039e-06, "loss": 0.5532, "step": 7794 }, { "epoch": 0.09319814918877557, "grad_norm": 30.72015953063965, "learning_rate": 9.895696861772401e-06, "loss": 0.6523, "step": 7795 }, { "epoch": 0.09321010533363622, "grad_norm": 2.1977994441986084, "learning_rate": 9.89565751708065e-06, "loss": 0.6447, "step": 7796 }, { "epoch": 0.09322206147849688, "grad_norm": 2.339855194091797, "learning_rate": 9.895618165047844e-06, "loss": 0.6676, "step": 7797 }, { "epoch": 0.09323401762335752, "grad_norm": 2.2927002906799316, "learning_rate": 9.89557880567404e-06, "loss": 0.7668, "step": 7798 }, { "epoch": 0.09324597376821818, "grad_norm": 3.2182743549346924, "learning_rate": 9.8955394389593e-06, "loss": 0.6656, "step": 7799 }, { "epoch": 0.09325792991307882, "grad_norm": 3.4047679901123047, "learning_rate": 9.895500064903676e-06, "loss": 0.5493, "step": 7800 }, { "epoch": 0.09326988605793948, "grad_norm": 3.3334128856658936, "learning_rate": 9.895460683507236e-06, "loss": 0.6592, "step": 7801 }, { "epoch": 0.09328184220280013, "grad_norm": 3.921396255493164, "learning_rate": 9.895421294770036e-06, "loss": 0.6037, "step": 7802 }, { "epoch": 0.09329379834766079, "grad_norm": 2.133439779281616, "learning_rate": 9.895381898692133e-06, "loss": 0.6703, "step": 7803 }, { "epoch": 0.09330575449252143, "grad_norm": 2.018712282180786, "learning_rate": 9.895342495273588e-06, "loss": 0.6107, "step": 7804 }, { "epoch": 0.09331771063738208, "grad_norm": 2.71637225151062, "learning_rate": 9.895303084514459e-06, "loss": 0.5504, "step": 7805 }, { "epoch": 0.09332966678224273, "grad_norm": 1.604149341583252, "learning_rate": 9.895263666414806e-06, "loss": 0.594, "step": 7806 }, { "epoch": 0.09334162292710338, "grad_norm": 2.1683194637298584, "learning_rate": 9.895224240974687e-06, "loss": 0.6335, "step": 7807 }, { "epoch": 0.09335357907196404, "grad_norm": 7.1353864669799805, "learning_rate": 9.895184808194163e-06, "loss": 0.6051, "step": 7808 }, { "epoch": 0.09336553521682468, "grad_norm": 3.237502336502075, "learning_rate": 9.895145368073293e-06, "loss": 0.6471, "step": 7809 }, { "epoch": 0.09337749136168534, "grad_norm": 3.6715939044952393, "learning_rate": 9.895105920612133e-06, "loss": 0.6052, "step": 7810 }, { "epoch": 0.09338944750654599, "grad_norm": 2.031033754348755, "learning_rate": 9.895066465810746e-06, "loss": 0.5917, "step": 7811 }, { "epoch": 0.09340140365140664, "grad_norm": 2.4825499057769775, "learning_rate": 9.895027003669186e-06, "loss": 0.6147, "step": 7812 }, { "epoch": 0.09341335979626729, "grad_norm": 2.2898170948028564, "learning_rate": 9.894987534187519e-06, "loss": 0.6615, "step": 7813 }, { "epoch": 0.09342531594112795, "grad_norm": 2.5237162113189697, "learning_rate": 9.8949480573658e-06, "loss": 0.6802, "step": 7814 }, { "epoch": 0.09343727208598859, "grad_norm": 6.668234348297119, "learning_rate": 9.894908573204088e-06, "loss": 0.669, "step": 7815 }, { "epoch": 0.09344922823084925, "grad_norm": 2.1220667362213135, "learning_rate": 9.894869081702444e-06, "loss": 0.6454, "step": 7816 }, { "epoch": 0.0934611843757099, "grad_norm": 3.2628448009490967, "learning_rate": 9.894829582860926e-06, "loss": 0.5364, "step": 7817 }, { "epoch": 0.09347314052057054, "grad_norm": 2.145108938217163, "learning_rate": 9.894790076679595e-06, "loss": 0.6821, "step": 7818 }, { "epoch": 0.0934850966654312, "grad_norm": 6.428374290466309, "learning_rate": 9.894750563158507e-06, "loss": 0.6059, "step": 7819 }, { "epoch": 0.09349705281029184, "grad_norm": 3.567707061767578, "learning_rate": 9.894711042297724e-06, "loss": 0.6854, "step": 7820 }, { "epoch": 0.0935090089551525, "grad_norm": 4.713679790496826, "learning_rate": 9.894671514097306e-06, "loss": 0.6312, "step": 7821 }, { "epoch": 0.09352096510001315, "grad_norm": 2.7189884185791016, "learning_rate": 9.894631978557308e-06, "loss": 0.666, "step": 7822 }, { "epoch": 0.0935329212448738, "grad_norm": 3.87255597114563, "learning_rate": 9.894592435677793e-06, "loss": 0.6019, "step": 7823 }, { "epoch": 0.09354487738973445, "grad_norm": 1.9390138387680054, "learning_rate": 9.89455288545882e-06, "loss": 0.6015, "step": 7824 }, { "epoch": 0.09355683353459511, "grad_norm": 3.177532911300659, "learning_rate": 9.894513327900446e-06, "loss": 0.6136, "step": 7825 }, { "epoch": 0.09356878967945575, "grad_norm": 1.8635889291763306, "learning_rate": 9.894473763002731e-06, "loss": 0.6488, "step": 7826 }, { "epoch": 0.09358074582431641, "grad_norm": 2.755133867263794, "learning_rate": 9.894434190765737e-06, "loss": 0.5973, "step": 7827 }, { "epoch": 0.09359270196917706, "grad_norm": 3.328840494155884, "learning_rate": 9.894394611189523e-06, "loss": 0.6609, "step": 7828 }, { "epoch": 0.0936046581140377, "grad_norm": 1.910842776298523, "learning_rate": 9.894355024274145e-06, "loss": 0.6191, "step": 7829 }, { "epoch": 0.09361661425889836, "grad_norm": 1.8197335004806519, "learning_rate": 9.894315430019666e-06, "loss": 0.593, "step": 7830 }, { "epoch": 0.093628570403759, "grad_norm": 1.4791979789733887, "learning_rate": 9.894275828426142e-06, "loss": 0.5793, "step": 7831 }, { "epoch": 0.09364052654861967, "grad_norm": 5.62432336807251, "learning_rate": 9.894236219493632e-06, "loss": 0.6201, "step": 7832 }, { "epoch": 0.09365248269348031, "grad_norm": 2.954965591430664, "learning_rate": 9.894196603222201e-06, "loss": 0.6139, "step": 7833 }, { "epoch": 0.09366443883834097, "grad_norm": 2.1632323265075684, "learning_rate": 9.894156979611902e-06, "loss": 0.5544, "step": 7834 }, { "epoch": 0.09367639498320161, "grad_norm": 6.466398239135742, "learning_rate": 9.894117348662799e-06, "loss": 0.6604, "step": 7835 }, { "epoch": 0.09368835112806227, "grad_norm": 2.875462293624878, "learning_rate": 9.89407771037495e-06, "loss": 0.572, "step": 7836 }, { "epoch": 0.09370030727292292, "grad_norm": 1.7121244668960571, "learning_rate": 9.894038064748413e-06, "loss": 0.5281, "step": 7837 }, { "epoch": 0.09371226341778358, "grad_norm": 1.976319432258606, "learning_rate": 9.893998411783248e-06, "loss": 0.6701, "step": 7838 }, { "epoch": 0.09372421956264422, "grad_norm": 7.4277167320251465, "learning_rate": 9.893958751479515e-06, "loss": 0.5627, "step": 7839 }, { "epoch": 0.09373617570750488, "grad_norm": 3.0505716800689697, "learning_rate": 9.893919083837275e-06, "loss": 0.5866, "step": 7840 }, { "epoch": 0.09374813185236552, "grad_norm": 2.535966634750366, "learning_rate": 9.893879408856586e-06, "loss": 0.6649, "step": 7841 }, { "epoch": 0.09376008799722617, "grad_norm": 3.566474676132202, "learning_rate": 9.893839726537506e-06, "loss": 0.7541, "step": 7842 }, { "epoch": 0.09377204414208683, "grad_norm": 2.6991236209869385, "learning_rate": 9.893800036880097e-06, "loss": 0.611, "step": 7843 }, { "epoch": 0.09378400028694747, "grad_norm": 2.0415196418762207, "learning_rate": 9.893760339884417e-06, "loss": 0.6441, "step": 7844 }, { "epoch": 0.09379595643180813, "grad_norm": 3.445955276489258, "learning_rate": 9.893720635550526e-06, "loss": 0.598, "step": 7845 }, { "epoch": 0.09380791257666878, "grad_norm": 2.240851640701294, "learning_rate": 9.893680923878483e-06, "loss": 0.7031, "step": 7846 }, { "epoch": 0.09381986872152943, "grad_norm": 2.553928852081299, "learning_rate": 9.893641204868349e-06, "loss": 0.6779, "step": 7847 }, { "epoch": 0.09383182486639008, "grad_norm": 2.3429651260375977, "learning_rate": 9.893601478520182e-06, "loss": 0.6468, "step": 7848 }, { "epoch": 0.09384378101125074, "grad_norm": 2.4427099227905273, "learning_rate": 9.893561744834041e-06, "loss": 0.6103, "step": 7849 }, { "epoch": 0.09385573715611138, "grad_norm": 2.1683173179626465, "learning_rate": 9.89352200380999e-06, "loss": 0.6659, "step": 7850 }, { "epoch": 0.09386769330097204, "grad_norm": 1.7550818920135498, "learning_rate": 9.893482255448083e-06, "loss": 0.6597, "step": 7851 }, { "epoch": 0.09387964944583269, "grad_norm": 4.095203876495361, "learning_rate": 9.893442499748383e-06, "loss": 0.6617, "step": 7852 }, { "epoch": 0.09389160559069333, "grad_norm": 3.602355718612671, "learning_rate": 9.893402736710946e-06, "loss": 0.7008, "step": 7853 }, { "epoch": 0.09390356173555399, "grad_norm": 3.0483314990997314, "learning_rate": 9.893362966335837e-06, "loss": 0.6496, "step": 7854 }, { "epoch": 0.09391551788041463, "grad_norm": 3.0423669815063477, "learning_rate": 9.893323188623113e-06, "loss": 0.6265, "step": 7855 }, { "epoch": 0.09392747402527529, "grad_norm": 3.6077916622161865, "learning_rate": 9.893283403572831e-06, "loss": 0.5567, "step": 7856 }, { "epoch": 0.09393943017013594, "grad_norm": 1.7735517024993896, "learning_rate": 9.893243611185054e-06, "loss": 0.5531, "step": 7857 }, { "epoch": 0.0939513863149966, "grad_norm": 1.9990595579147339, "learning_rate": 9.893203811459842e-06, "loss": 0.6938, "step": 7858 }, { "epoch": 0.09396334245985724, "grad_norm": 2.113534450531006, "learning_rate": 9.893164004397253e-06, "loss": 0.6658, "step": 7859 }, { "epoch": 0.0939752986047179, "grad_norm": 5.058884620666504, "learning_rate": 9.893124189997348e-06, "loss": 0.584, "step": 7860 }, { "epoch": 0.09398725474957854, "grad_norm": 1.955679178237915, "learning_rate": 9.893084368260186e-06, "loss": 0.5683, "step": 7861 }, { "epoch": 0.0939992108944392, "grad_norm": 4.709196090698242, "learning_rate": 9.893044539185826e-06, "loss": 0.7197, "step": 7862 }, { "epoch": 0.09401116703929985, "grad_norm": 4.089357852935791, "learning_rate": 9.893004702774326e-06, "loss": 0.6082, "step": 7863 }, { "epoch": 0.09402312318416049, "grad_norm": 1.964111328125, "learning_rate": 9.892964859025751e-06, "loss": 0.5294, "step": 7864 }, { "epoch": 0.09403507932902115, "grad_norm": 5.329721927642822, "learning_rate": 9.892925007940156e-06, "loss": 0.7122, "step": 7865 }, { "epoch": 0.0940470354738818, "grad_norm": 1.6660875082015991, "learning_rate": 9.892885149517604e-06, "loss": 0.588, "step": 7866 }, { "epoch": 0.09405899161874245, "grad_norm": 2.210984945297241, "learning_rate": 9.892845283758153e-06, "loss": 0.648, "step": 7867 }, { "epoch": 0.0940709477636031, "grad_norm": 2.6945912837982178, "learning_rate": 9.892805410661863e-06, "loss": 0.6892, "step": 7868 }, { "epoch": 0.09408290390846376, "grad_norm": 4.022794723510742, "learning_rate": 9.892765530228793e-06, "loss": 0.7128, "step": 7869 }, { "epoch": 0.0940948600533244, "grad_norm": 1.7846354246139526, "learning_rate": 9.892725642459004e-06, "loss": 0.6214, "step": 7870 }, { "epoch": 0.09410681619818506, "grad_norm": 4.808928966522217, "learning_rate": 9.892685747352557e-06, "loss": 0.7111, "step": 7871 }, { "epoch": 0.0941187723430457, "grad_norm": 3.1611971855163574, "learning_rate": 9.892645844909509e-06, "loss": 0.5774, "step": 7872 }, { "epoch": 0.09413072848790636, "grad_norm": 3.461644411087036, "learning_rate": 9.892605935129921e-06, "loss": 0.6127, "step": 7873 }, { "epoch": 0.09414268463276701, "grad_norm": 2.126833200454712, "learning_rate": 9.892566018013854e-06, "loss": 0.7471, "step": 7874 }, { "epoch": 0.09415464077762767, "grad_norm": 1.6408371925354004, "learning_rate": 9.892526093561365e-06, "loss": 0.7482, "step": 7875 }, { "epoch": 0.09416659692248831, "grad_norm": 2.6282832622528076, "learning_rate": 9.892486161772518e-06, "loss": 0.6594, "step": 7876 }, { "epoch": 0.09417855306734896, "grad_norm": 1.8632925748825073, "learning_rate": 9.89244622264737e-06, "loss": 0.614, "step": 7877 }, { "epoch": 0.09419050921220962, "grad_norm": 9.399202346801758, "learning_rate": 9.89240627618598e-06, "loss": 0.5469, "step": 7878 }, { "epoch": 0.09420246535707026, "grad_norm": 2.378422737121582, "learning_rate": 9.892366322388411e-06, "loss": 0.5514, "step": 7879 }, { "epoch": 0.09421442150193092, "grad_norm": 2.0956318378448486, "learning_rate": 9.89232636125472e-06, "loss": 0.6221, "step": 7880 }, { "epoch": 0.09422637764679156, "grad_norm": 2.549433708190918, "learning_rate": 9.892286392784968e-06, "loss": 0.4982, "step": 7881 }, { "epoch": 0.09423833379165222, "grad_norm": 2.043701410293579, "learning_rate": 9.892246416979218e-06, "loss": 0.6188, "step": 7882 }, { "epoch": 0.09425028993651287, "grad_norm": 2.6180379390716553, "learning_rate": 9.892206433837526e-06, "loss": 0.6379, "step": 7883 }, { "epoch": 0.09426224608137353, "grad_norm": 11.475248336791992, "learning_rate": 9.892166443359952e-06, "loss": 0.6619, "step": 7884 }, { "epoch": 0.09427420222623417, "grad_norm": 3.8189287185668945, "learning_rate": 9.892126445546556e-06, "loss": 0.6618, "step": 7885 }, { "epoch": 0.09428615837109483, "grad_norm": 4.8153462409973145, "learning_rate": 9.892086440397401e-06, "loss": 0.7137, "step": 7886 }, { "epoch": 0.09429811451595548, "grad_norm": 4.609544277191162, "learning_rate": 9.892046427912544e-06, "loss": 0.61, "step": 7887 }, { "epoch": 0.09431007066081612, "grad_norm": 3.362488031387329, "learning_rate": 9.892006408092047e-06, "loss": 0.7059, "step": 7888 }, { "epoch": 0.09432202680567678, "grad_norm": 4.850714683532715, "learning_rate": 9.89196638093597e-06, "loss": 0.6299, "step": 7889 }, { "epoch": 0.09433398295053742, "grad_norm": 1.9786739349365234, "learning_rate": 9.89192634644437e-06, "loss": 0.6023, "step": 7890 }, { "epoch": 0.09434593909539808, "grad_norm": 2.4524917602539062, "learning_rate": 9.891886304617308e-06, "loss": 0.6569, "step": 7891 }, { "epoch": 0.09435789524025873, "grad_norm": 9.293890953063965, "learning_rate": 9.891846255454847e-06, "loss": 0.7451, "step": 7892 }, { "epoch": 0.09436985138511939, "grad_norm": 3.7978386878967285, "learning_rate": 9.891806198957045e-06, "loss": 0.6235, "step": 7893 }, { "epoch": 0.09438180752998003, "grad_norm": 4.182805061340332, "learning_rate": 9.891766135123962e-06, "loss": 0.6564, "step": 7894 }, { "epoch": 0.09439376367484069, "grad_norm": 3.358330249786377, "learning_rate": 9.891726063955658e-06, "loss": 0.6363, "step": 7895 }, { "epoch": 0.09440571981970133, "grad_norm": 3.14145827293396, "learning_rate": 9.891685985452193e-06, "loss": 0.6951, "step": 7896 }, { "epoch": 0.09441767596456199, "grad_norm": 6.858057975769043, "learning_rate": 9.891645899613629e-06, "loss": 0.604, "step": 7897 }, { "epoch": 0.09442963210942264, "grad_norm": 2.8591344356536865, "learning_rate": 9.891605806440024e-06, "loss": 0.6103, "step": 7898 }, { "epoch": 0.0944415882542833, "grad_norm": 3.5078284740448, "learning_rate": 9.891565705931439e-06, "loss": 0.5543, "step": 7899 }, { "epoch": 0.09445354439914394, "grad_norm": 3.933568000793457, "learning_rate": 9.891525598087931e-06, "loss": 0.711, "step": 7900 }, { "epoch": 0.09446550054400459, "grad_norm": 16.570829391479492, "learning_rate": 9.891485482909566e-06, "loss": 0.6864, "step": 7901 }, { "epoch": 0.09447745668886524, "grad_norm": 4.49362325668335, "learning_rate": 9.891445360396401e-06, "loss": 0.6162, "step": 7902 }, { "epoch": 0.09448941283372589, "grad_norm": 3.2129104137420654, "learning_rate": 9.891405230548496e-06, "loss": 0.6649, "step": 7903 }, { "epoch": 0.09450136897858655, "grad_norm": 2.3410215377807617, "learning_rate": 9.89136509336591e-06, "loss": 0.6871, "step": 7904 }, { "epoch": 0.09451332512344719, "grad_norm": 16.739179611206055, "learning_rate": 9.891324948848707e-06, "loss": 0.6831, "step": 7905 }, { "epoch": 0.09452528126830785, "grad_norm": 4.538641452789307, "learning_rate": 9.891284796996943e-06, "loss": 0.6182, "step": 7906 }, { "epoch": 0.0945372374131685, "grad_norm": 3.5941483974456787, "learning_rate": 9.89124463781068e-06, "loss": 0.6216, "step": 7907 }, { "epoch": 0.09454919355802915, "grad_norm": 18.128934860229492, "learning_rate": 9.89120447128998e-06, "loss": 0.5798, "step": 7908 }, { "epoch": 0.0945611497028898, "grad_norm": 1.8748552799224854, "learning_rate": 9.8911642974349e-06, "loss": 0.5859, "step": 7909 }, { "epoch": 0.09457310584775046, "grad_norm": 1.81295907497406, "learning_rate": 9.891124116245502e-06, "loss": 0.6094, "step": 7910 }, { "epoch": 0.0945850619926111, "grad_norm": 2.178816795349121, "learning_rate": 9.891083927721846e-06, "loss": 0.6929, "step": 7911 }, { "epoch": 0.09459701813747175, "grad_norm": 2.3435635566711426, "learning_rate": 9.891043731863992e-06, "loss": 0.6145, "step": 7912 }, { "epoch": 0.0946089742823324, "grad_norm": 3.0695605278015137, "learning_rate": 9.891003528672001e-06, "loss": 0.6127, "step": 7913 }, { "epoch": 0.09462093042719305, "grad_norm": 2.046041488647461, "learning_rate": 9.890963318145934e-06, "loss": 0.6031, "step": 7914 }, { "epoch": 0.09463288657205371, "grad_norm": 5.048370838165283, "learning_rate": 9.890923100285848e-06, "loss": 0.5173, "step": 7915 }, { "epoch": 0.09464484271691435, "grad_norm": 1.8517308235168457, "learning_rate": 9.890882875091808e-06, "loss": 0.6397, "step": 7916 }, { "epoch": 0.09465679886177501, "grad_norm": 5.862598419189453, "learning_rate": 9.89084264256387e-06, "loss": 0.6141, "step": 7917 }, { "epoch": 0.09466875500663566, "grad_norm": 9.069984436035156, "learning_rate": 9.890802402702095e-06, "loss": 0.6758, "step": 7918 }, { "epoch": 0.09468071115149632, "grad_norm": 3.3513059616088867, "learning_rate": 9.890762155506546e-06, "loss": 0.5968, "step": 7919 }, { "epoch": 0.09469266729635696, "grad_norm": 5.42928409576416, "learning_rate": 9.89072190097728e-06, "loss": 0.7077, "step": 7920 }, { "epoch": 0.09470462344121762, "grad_norm": 1.7931897640228271, "learning_rate": 9.890681639114361e-06, "loss": 0.627, "step": 7921 }, { "epoch": 0.09471657958607826, "grad_norm": 2.8445956707000732, "learning_rate": 9.890641369917849e-06, "loss": 0.6203, "step": 7922 }, { "epoch": 0.09472853573093891, "grad_norm": 2.5856661796569824, "learning_rate": 9.890601093387799e-06, "loss": 0.6713, "step": 7923 }, { "epoch": 0.09474049187579957, "grad_norm": 1.5721626281738281, "learning_rate": 9.890560809524277e-06, "loss": 0.6404, "step": 7924 }, { "epoch": 0.09475244802066021, "grad_norm": 21.364286422729492, "learning_rate": 9.890520518327342e-06, "loss": 0.7153, "step": 7925 }, { "epoch": 0.09476440416552087, "grad_norm": 2.1979148387908936, "learning_rate": 9.890480219797054e-06, "loss": 0.5881, "step": 7926 }, { "epoch": 0.09477636031038152, "grad_norm": 3.2341928482055664, "learning_rate": 9.890439913933474e-06, "loss": 0.6594, "step": 7927 }, { "epoch": 0.09478831645524217, "grad_norm": 2.301703691482544, "learning_rate": 9.890399600736662e-06, "loss": 0.6235, "step": 7928 }, { "epoch": 0.09480027260010282, "grad_norm": 2.5075037479400635, "learning_rate": 9.890359280206677e-06, "loss": 0.6607, "step": 7929 }, { "epoch": 0.09481222874496348, "grad_norm": 3.081252098083496, "learning_rate": 9.890318952343583e-06, "loss": 0.6052, "step": 7930 }, { "epoch": 0.09482418488982412, "grad_norm": 3.9782040119171143, "learning_rate": 9.890278617147437e-06, "loss": 0.6324, "step": 7931 }, { "epoch": 0.09483614103468478, "grad_norm": 3.1880440711975098, "learning_rate": 9.8902382746183e-06, "loss": 0.5973, "step": 7932 }, { "epoch": 0.09484809717954543, "grad_norm": 3.6388230323791504, "learning_rate": 9.890197924756234e-06, "loss": 0.6487, "step": 7933 }, { "epoch": 0.09486005332440609, "grad_norm": 2.2151362895965576, "learning_rate": 9.890157567561299e-06, "loss": 0.5184, "step": 7934 }, { "epoch": 0.09487200946926673, "grad_norm": 4.744258880615234, "learning_rate": 9.890117203033555e-06, "loss": 0.6419, "step": 7935 }, { "epoch": 0.09488396561412737, "grad_norm": 2.064699411392212, "learning_rate": 9.890076831173063e-06, "loss": 0.5897, "step": 7936 }, { "epoch": 0.09489592175898803, "grad_norm": 3.1081693172454834, "learning_rate": 9.890036451979885e-06, "loss": 0.6607, "step": 7937 }, { "epoch": 0.09490787790384868, "grad_norm": 6.7648210525512695, "learning_rate": 9.889996065454079e-06, "loss": 0.6381, "step": 7938 }, { "epoch": 0.09491983404870934, "grad_norm": 3.136596918106079, "learning_rate": 9.889955671595706e-06, "loss": 0.664, "step": 7939 }, { "epoch": 0.09493179019356998, "grad_norm": 2.3351104259490967, "learning_rate": 9.889915270404826e-06, "loss": 0.6175, "step": 7940 }, { "epoch": 0.09494374633843064, "grad_norm": 2.7986650466918945, "learning_rate": 9.889874861881503e-06, "loss": 0.6343, "step": 7941 }, { "epoch": 0.09495570248329128, "grad_norm": 2.712440252304077, "learning_rate": 9.889834446025794e-06, "loss": 0.6283, "step": 7942 }, { "epoch": 0.09496765862815194, "grad_norm": 1.8368034362792969, "learning_rate": 9.889794022837762e-06, "loss": 0.5932, "step": 7943 }, { "epoch": 0.09497961477301259, "grad_norm": 1.9257714748382568, "learning_rate": 9.889753592317465e-06, "loss": 0.5376, "step": 7944 }, { "epoch": 0.09499157091787325, "grad_norm": 2.0177860260009766, "learning_rate": 9.889713154464966e-06, "loss": 0.6276, "step": 7945 }, { "epoch": 0.09500352706273389, "grad_norm": 2.9555537700653076, "learning_rate": 9.889672709280324e-06, "loss": 0.6409, "step": 7946 }, { "epoch": 0.09501548320759454, "grad_norm": 2.108341932296753, "learning_rate": 9.8896322567636e-06, "loss": 0.7509, "step": 7947 }, { "epoch": 0.0950274393524552, "grad_norm": 12.254411697387695, "learning_rate": 9.889591796914856e-06, "loss": 0.6027, "step": 7948 }, { "epoch": 0.09503939549731584, "grad_norm": 19.617828369140625, "learning_rate": 9.889551329734152e-06, "loss": 0.5939, "step": 7949 }, { "epoch": 0.0950513516421765, "grad_norm": 3.614063262939453, "learning_rate": 9.889510855221547e-06, "loss": 0.6316, "step": 7950 }, { "epoch": 0.09506330778703714, "grad_norm": 1.7050782442092896, "learning_rate": 9.889470373377104e-06, "loss": 0.5706, "step": 7951 }, { "epoch": 0.0950752639318978, "grad_norm": 1.8588355779647827, "learning_rate": 9.889429884200883e-06, "loss": 0.6892, "step": 7952 }, { "epoch": 0.09508722007675845, "grad_norm": 4.309784412384033, "learning_rate": 9.889389387692945e-06, "loss": 0.7598, "step": 7953 }, { "epoch": 0.0950991762216191, "grad_norm": 1.9267159700393677, "learning_rate": 9.88934888385335e-06, "loss": 0.6536, "step": 7954 }, { "epoch": 0.09511113236647975, "grad_norm": 1.7940353155136108, "learning_rate": 9.889308372682158e-06, "loss": 0.5722, "step": 7955 }, { "epoch": 0.09512308851134041, "grad_norm": 1.9626798629760742, "learning_rate": 9.889267854179432e-06, "loss": 0.5731, "step": 7956 }, { "epoch": 0.09513504465620105, "grad_norm": 1.7385344505310059, "learning_rate": 9.88922732834523e-06, "loss": 0.6265, "step": 7957 }, { "epoch": 0.09514700080106171, "grad_norm": 5.165024280548096, "learning_rate": 9.889186795179615e-06, "loss": 0.64, "step": 7958 }, { "epoch": 0.09515895694592236, "grad_norm": 1.7785744667053223, "learning_rate": 9.889146254682647e-06, "loss": 0.5928, "step": 7959 }, { "epoch": 0.095170913090783, "grad_norm": 2.5767133235931396, "learning_rate": 9.889105706854387e-06, "loss": 0.623, "step": 7960 }, { "epoch": 0.09518286923564366, "grad_norm": 2.4586706161499023, "learning_rate": 9.889065151694894e-06, "loss": 0.6495, "step": 7961 }, { "epoch": 0.0951948253805043, "grad_norm": 2.155238151550293, "learning_rate": 9.889024589204233e-06, "loss": 0.7789, "step": 7962 }, { "epoch": 0.09520678152536496, "grad_norm": 1.7104135751724243, "learning_rate": 9.888984019382461e-06, "loss": 0.6583, "step": 7963 }, { "epoch": 0.09521873767022561, "grad_norm": 2.118572473526001, "learning_rate": 9.888943442229641e-06, "loss": 0.7102, "step": 7964 }, { "epoch": 0.09523069381508627, "grad_norm": 8.495247840881348, "learning_rate": 9.888902857745832e-06, "loss": 0.6183, "step": 7965 }, { "epoch": 0.09524264995994691, "grad_norm": 5.060530662536621, "learning_rate": 9.888862265931095e-06, "loss": 0.5766, "step": 7966 }, { "epoch": 0.09525460610480757, "grad_norm": 4.1596174240112305, "learning_rate": 9.888821666785493e-06, "loss": 0.554, "step": 7967 }, { "epoch": 0.09526656224966822, "grad_norm": 2.6774699687957764, "learning_rate": 9.888781060309085e-06, "loss": 0.6373, "step": 7968 }, { "epoch": 0.09527851839452887, "grad_norm": 1.5642669200897217, "learning_rate": 9.888740446501933e-06, "loss": 0.6032, "step": 7969 }, { "epoch": 0.09529047453938952, "grad_norm": 2.0802087783813477, "learning_rate": 9.888699825364096e-06, "loss": 0.6647, "step": 7970 }, { "epoch": 0.09530243068425016, "grad_norm": 2.274367094039917, "learning_rate": 9.888659196895638e-06, "loss": 0.6303, "step": 7971 }, { "epoch": 0.09531438682911082, "grad_norm": 1.5325798988342285, "learning_rate": 9.888618561096616e-06, "loss": 0.5951, "step": 7972 }, { "epoch": 0.09532634297397147, "grad_norm": 1.6453431844711304, "learning_rate": 9.888577917967096e-06, "loss": 0.5003, "step": 7973 }, { "epoch": 0.09533829911883213, "grad_norm": 3.4015378952026367, "learning_rate": 9.888537267507133e-06, "loss": 0.5972, "step": 7974 }, { "epoch": 0.09535025526369277, "grad_norm": 1.8481366634368896, "learning_rate": 9.888496609716793e-06, "loss": 0.5942, "step": 7975 }, { "epoch": 0.09536221140855343, "grad_norm": 3.8052423000335693, "learning_rate": 9.888455944596134e-06, "loss": 0.601, "step": 7976 }, { "epoch": 0.09537416755341407, "grad_norm": 3.5992472171783447, "learning_rate": 9.888415272145217e-06, "loss": 0.6049, "step": 7977 }, { "epoch": 0.09538612369827473, "grad_norm": 5.2453436851501465, "learning_rate": 9.888374592364107e-06, "loss": 0.6561, "step": 7978 }, { "epoch": 0.09539807984313538, "grad_norm": 1.5473353862762451, "learning_rate": 9.888333905252859e-06, "loss": 0.5626, "step": 7979 }, { "epoch": 0.09541003598799604, "grad_norm": 2.6546106338500977, "learning_rate": 9.88829321081154e-06, "loss": 0.7498, "step": 7980 }, { "epoch": 0.09542199213285668, "grad_norm": 1.823543906211853, "learning_rate": 9.888252509040203e-06, "loss": 0.7032, "step": 7981 }, { "epoch": 0.09543394827771733, "grad_norm": 2.4412682056427, "learning_rate": 9.888211799938918e-06, "loss": 0.674, "step": 7982 }, { "epoch": 0.09544590442257798, "grad_norm": 4.818027019500732, "learning_rate": 9.888171083507742e-06, "loss": 0.6177, "step": 7983 }, { "epoch": 0.09545786056743863, "grad_norm": 1.7474888563156128, "learning_rate": 9.888130359746734e-06, "loss": 0.7295, "step": 7984 }, { "epoch": 0.09546981671229929, "grad_norm": 3.121070623397827, "learning_rate": 9.888089628655959e-06, "loss": 0.5461, "step": 7985 }, { "epoch": 0.09548177285715993, "grad_norm": 5.783577919006348, "learning_rate": 9.888048890235475e-06, "loss": 0.6483, "step": 7986 }, { "epoch": 0.09549372900202059, "grad_norm": 6.689357280731201, "learning_rate": 9.888008144485345e-06, "loss": 0.561, "step": 7987 }, { "epoch": 0.09550568514688124, "grad_norm": 3.7159266471862793, "learning_rate": 9.887967391405628e-06, "loss": 0.5671, "step": 7988 }, { "epoch": 0.0955176412917419, "grad_norm": 17.507322311401367, "learning_rate": 9.88792663099639e-06, "loss": 0.599, "step": 7989 }, { "epoch": 0.09552959743660254, "grad_norm": 8.079161643981934, "learning_rate": 9.887885863257685e-06, "loss": 0.6263, "step": 7990 }, { "epoch": 0.0955415535814632, "grad_norm": 1.7478196620941162, "learning_rate": 9.88784508818958e-06, "loss": 0.6469, "step": 7991 }, { "epoch": 0.09555350972632384, "grad_norm": 2.095010280609131, "learning_rate": 9.887804305792133e-06, "loss": 0.7042, "step": 7992 }, { "epoch": 0.0955654658711845, "grad_norm": 5.138412952423096, "learning_rate": 9.887763516065405e-06, "loss": 0.6251, "step": 7993 }, { "epoch": 0.09557742201604515, "grad_norm": 8.157208442687988, "learning_rate": 9.88772271900946e-06, "loss": 0.716, "step": 7994 }, { "epoch": 0.09558937816090579, "grad_norm": 2.4698214530944824, "learning_rate": 9.887681914624357e-06, "loss": 0.6049, "step": 7995 }, { "epoch": 0.09560133430576645, "grad_norm": 12.612133979797363, "learning_rate": 9.887641102910157e-06, "loss": 0.6052, "step": 7996 }, { "epoch": 0.0956132904506271, "grad_norm": 7.4723591804504395, "learning_rate": 9.887600283866922e-06, "loss": 0.6362, "step": 7997 }, { "epoch": 0.09562524659548775, "grad_norm": 2.914907217025757, "learning_rate": 9.887559457494714e-06, "loss": 0.5918, "step": 7998 }, { "epoch": 0.0956372027403484, "grad_norm": 3.102806568145752, "learning_rate": 9.887518623793591e-06, "loss": 0.7144, "step": 7999 }, { "epoch": 0.09564915888520906, "grad_norm": 2.2141566276550293, "learning_rate": 9.88747778276362e-06, "loss": 0.6094, "step": 8000 }, { "epoch": 0.0956611150300697, "grad_norm": 6.498046398162842, "learning_rate": 9.887436934404855e-06, "loss": 0.5655, "step": 8001 }, { "epoch": 0.09567307117493036, "grad_norm": 2.3252553939819336, "learning_rate": 9.887396078717362e-06, "loss": 0.7289, "step": 8002 }, { "epoch": 0.095685027319791, "grad_norm": 2.2116122245788574, "learning_rate": 9.887355215701202e-06, "loss": 0.5893, "step": 8003 }, { "epoch": 0.09569698346465166, "grad_norm": 2.530715227127075, "learning_rate": 9.887314345356437e-06, "loss": 0.5687, "step": 8004 }, { "epoch": 0.09570893960951231, "grad_norm": 2.0505754947662354, "learning_rate": 9.887273467683123e-06, "loss": 0.6545, "step": 8005 }, { "epoch": 0.09572089575437295, "grad_norm": 6.3080830574035645, "learning_rate": 9.887232582681327e-06, "loss": 0.7115, "step": 8006 }, { "epoch": 0.09573285189923361, "grad_norm": 2.2566606998443604, "learning_rate": 9.887191690351108e-06, "loss": 0.597, "step": 8007 }, { "epoch": 0.09574480804409426, "grad_norm": 3.2786664962768555, "learning_rate": 9.887150790692528e-06, "loss": 0.7069, "step": 8008 }, { "epoch": 0.09575676418895492, "grad_norm": 2.7024359703063965, "learning_rate": 9.887109883705647e-06, "loss": 0.6411, "step": 8009 }, { "epoch": 0.09576872033381556, "grad_norm": 2.9906134605407715, "learning_rate": 9.887068969390529e-06, "loss": 0.586, "step": 8010 }, { "epoch": 0.09578067647867622, "grad_norm": 13.894244194030762, "learning_rate": 9.887028047747234e-06, "loss": 0.6595, "step": 8011 }, { "epoch": 0.09579263262353686, "grad_norm": 2.091721296310425, "learning_rate": 9.886987118775822e-06, "loss": 0.5818, "step": 8012 }, { "epoch": 0.09580458876839752, "grad_norm": 3.182600498199463, "learning_rate": 9.886946182476356e-06, "loss": 0.7256, "step": 8013 }, { "epoch": 0.09581654491325817, "grad_norm": 2.8339481353759766, "learning_rate": 9.886905238848895e-06, "loss": 0.5937, "step": 8014 }, { "epoch": 0.09582850105811883, "grad_norm": 2.3518779277801514, "learning_rate": 9.886864287893505e-06, "loss": 0.5846, "step": 8015 }, { "epoch": 0.09584045720297947, "grad_norm": 2.5563626289367676, "learning_rate": 9.886823329610243e-06, "loss": 0.69, "step": 8016 }, { "epoch": 0.09585241334784013, "grad_norm": 8.374083518981934, "learning_rate": 9.886782363999172e-06, "loss": 0.5949, "step": 8017 }, { "epoch": 0.09586436949270077, "grad_norm": 1.7550252676010132, "learning_rate": 9.886741391060355e-06, "loss": 0.6612, "step": 8018 }, { "epoch": 0.09587632563756142, "grad_norm": 2.440687417984009, "learning_rate": 9.88670041079385e-06, "loss": 0.6723, "step": 8019 }, { "epoch": 0.09588828178242208, "grad_norm": 1.8880895376205444, "learning_rate": 9.886659423199721e-06, "loss": 0.703, "step": 8020 }, { "epoch": 0.09590023792728272, "grad_norm": 4.342343330383301, "learning_rate": 9.886618428278029e-06, "loss": 0.6043, "step": 8021 }, { "epoch": 0.09591219407214338, "grad_norm": 2.8343992233276367, "learning_rate": 9.886577426028836e-06, "loss": 0.5333, "step": 8022 }, { "epoch": 0.09592415021700403, "grad_norm": 2.9030327796936035, "learning_rate": 9.886536416452201e-06, "loss": 0.6086, "step": 8023 }, { "epoch": 0.09593610636186468, "grad_norm": 2.076124906539917, "learning_rate": 9.886495399548188e-06, "loss": 0.6363, "step": 8024 }, { "epoch": 0.09594806250672533, "grad_norm": 6.251008987426758, "learning_rate": 9.886454375316858e-06, "loss": 0.6834, "step": 8025 }, { "epoch": 0.09596001865158599, "grad_norm": 2.0503976345062256, "learning_rate": 9.886413343758274e-06, "loss": 0.6896, "step": 8026 }, { "epoch": 0.09597197479644663, "grad_norm": 14.322452545166016, "learning_rate": 9.886372304872493e-06, "loss": 0.6987, "step": 8027 }, { "epoch": 0.09598393094130729, "grad_norm": 2.8785126209259033, "learning_rate": 9.886331258659581e-06, "loss": 0.6464, "step": 8028 }, { "epoch": 0.09599588708616794, "grad_norm": 1.9627071619033813, "learning_rate": 9.886290205119597e-06, "loss": 0.6398, "step": 8029 }, { "epoch": 0.09600784323102858, "grad_norm": 3.2905948162078857, "learning_rate": 9.886249144252604e-06, "loss": 0.6524, "step": 8030 }, { "epoch": 0.09601979937588924, "grad_norm": 2.7824525833129883, "learning_rate": 9.886208076058663e-06, "loss": 0.6808, "step": 8031 }, { "epoch": 0.09603175552074988, "grad_norm": 6.44750452041626, "learning_rate": 9.886167000537835e-06, "loss": 0.6491, "step": 8032 }, { "epoch": 0.09604371166561054, "grad_norm": 2.8707921504974365, "learning_rate": 9.886125917690183e-06, "loss": 0.6816, "step": 8033 }, { "epoch": 0.09605566781047119, "grad_norm": 2.702943801879883, "learning_rate": 9.886084827515768e-06, "loss": 0.7261, "step": 8034 }, { "epoch": 0.09606762395533185, "grad_norm": 3.899552822113037, "learning_rate": 9.886043730014652e-06, "loss": 0.6214, "step": 8035 }, { "epoch": 0.09607958010019249, "grad_norm": 2.9851043224334717, "learning_rate": 9.886002625186894e-06, "loss": 0.5785, "step": 8036 }, { "epoch": 0.09609153624505315, "grad_norm": 7.392728805541992, "learning_rate": 9.885961513032559e-06, "loss": 0.6725, "step": 8037 }, { "epoch": 0.0961034923899138, "grad_norm": 2.939157009124756, "learning_rate": 9.885920393551707e-06, "loss": 0.5077, "step": 8038 }, { "epoch": 0.09611544853477445, "grad_norm": 4.128475666046143, "learning_rate": 9.885879266744401e-06, "loss": 0.6005, "step": 8039 }, { "epoch": 0.0961274046796351, "grad_norm": 2.707357883453369, "learning_rate": 9.8858381326107e-06, "loss": 0.5816, "step": 8040 }, { "epoch": 0.09613936082449576, "grad_norm": 2.3665122985839844, "learning_rate": 9.885796991150669e-06, "loss": 0.6252, "step": 8041 }, { "epoch": 0.0961513169693564, "grad_norm": 4.263649940490723, "learning_rate": 9.885755842364366e-06, "loss": 0.6083, "step": 8042 }, { "epoch": 0.09616327311421705, "grad_norm": 1.66368567943573, "learning_rate": 9.885714686251858e-06, "loss": 0.5481, "step": 8043 }, { "epoch": 0.0961752292590777, "grad_norm": 2.8498785495758057, "learning_rate": 9.8856735228132e-06, "loss": 0.6242, "step": 8044 }, { "epoch": 0.09618718540393835, "grad_norm": 3.4263930320739746, "learning_rate": 9.885632352048458e-06, "loss": 0.6357, "step": 8045 }, { "epoch": 0.09619914154879901, "grad_norm": 9.54056453704834, "learning_rate": 9.885591173957693e-06, "loss": 0.6349, "step": 8046 }, { "epoch": 0.09621109769365965, "grad_norm": 8.655048370361328, "learning_rate": 9.885549988540968e-06, "loss": 0.6297, "step": 8047 }, { "epoch": 0.09622305383852031, "grad_norm": 2.414638042449951, "learning_rate": 9.885508795798342e-06, "loss": 0.5979, "step": 8048 }, { "epoch": 0.09623500998338096, "grad_norm": 2.7810163497924805, "learning_rate": 9.885467595729881e-06, "loss": 0.5477, "step": 8049 }, { "epoch": 0.09624696612824162, "grad_norm": 1.9927690029144287, "learning_rate": 9.885426388335641e-06, "loss": 0.6682, "step": 8050 }, { "epoch": 0.09625892227310226, "grad_norm": 4.188233852386475, "learning_rate": 9.885385173615687e-06, "loss": 0.5915, "step": 8051 }, { "epoch": 0.09627087841796292, "grad_norm": 2.4011619091033936, "learning_rate": 9.885343951570082e-06, "loss": 0.611, "step": 8052 }, { "epoch": 0.09628283456282356, "grad_norm": 4.409858226776123, "learning_rate": 9.885302722198885e-06, "loss": 0.7367, "step": 8053 }, { "epoch": 0.09629479070768421, "grad_norm": 3.3504865169525146, "learning_rate": 9.88526148550216e-06, "loss": 0.7305, "step": 8054 }, { "epoch": 0.09630674685254487, "grad_norm": 3.7387924194335938, "learning_rate": 9.885220241479967e-06, "loss": 0.7292, "step": 8055 }, { "epoch": 0.09631870299740551, "grad_norm": 2.432325839996338, "learning_rate": 9.88517899013237e-06, "loss": 0.6508, "step": 8056 }, { "epoch": 0.09633065914226617, "grad_norm": 5.368996620178223, "learning_rate": 9.88513773145943e-06, "loss": 0.6475, "step": 8057 }, { "epoch": 0.09634261528712681, "grad_norm": 3.7554972171783447, "learning_rate": 9.885096465461208e-06, "loss": 0.6512, "step": 8058 }, { "epoch": 0.09635457143198747, "grad_norm": 2.9870638847351074, "learning_rate": 9.885055192137765e-06, "loss": 0.7156, "step": 8059 }, { "epoch": 0.09636652757684812, "grad_norm": 3.5841522216796875, "learning_rate": 9.885013911489168e-06, "loss": 0.5679, "step": 8060 }, { "epoch": 0.09637848372170878, "grad_norm": 2.271160364151001, "learning_rate": 9.884972623515472e-06, "loss": 0.5447, "step": 8061 }, { "epoch": 0.09639043986656942, "grad_norm": 2.3741252422332764, "learning_rate": 9.884931328216743e-06, "loss": 0.5846, "step": 8062 }, { "epoch": 0.09640239601143008, "grad_norm": 1.955203890800476, "learning_rate": 9.884890025593043e-06, "loss": 0.685, "step": 8063 }, { "epoch": 0.09641435215629073, "grad_norm": 1.9871457815170288, "learning_rate": 9.884848715644432e-06, "loss": 0.6847, "step": 8064 }, { "epoch": 0.09642630830115137, "grad_norm": 1.914300799369812, "learning_rate": 9.884807398370974e-06, "loss": 0.6053, "step": 8065 }, { "epoch": 0.09643826444601203, "grad_norm": 5.069474697113037, "learning_rate": 9.884766073772729e-06, "loss": 0.6177, "step": 8066 }, { "epoch": 0.09645022059087267, "grad_norm": 1.8554768562316895, "learning_rate": 9.88472474184976e-06, "loss": 0.4963, "step": 8067 }, { "epoch": 0.09646217673573333, "grad_norm": 3.888216018676758, "learning_rate": 9.88468340260213e-06, "loss": 0.4945, "step": 8068 }, { "epoch": 0.09647413288059398, "grad_norm": 4.843978404998779, "learning_rate": 9.884642056029898e-06, "loss": 0.6299, "step": 8069 }, { "epoch": 0.09648608902545464, "grad_norm": 2.59043550491333, "learning_rate": 9.884600702133129e-06, "loss": 0.7061, "step": 8070 }, { "epoch": 0.09649804517031528, "grad_norm": 2.5658650398254395, "learning_rate": 9.884559340911884e-06, "loss": 0.5906, "step": 8071 }, { "epoch": 0.09651000131517594, "grad_norm": 3.387754201889038, "learning_rate": 9.884517972366225e-06, "loss": 0.7039, "step": 8072 }, { "epoch": 0.09652195746003658, "grad_norm": 3.380180597305298, "learning_rate": 9.884476596496213e-06, "loss": 0.6492, "step": 8073 }, { "epoch": 0.09653391360489724, "grad_norm": 4.3487229347229, "learning_rate": 9.884435213301913e-06, "loss": 0.625, "step": 8074 }, { "epoch": 0.09654586974975789, "grad_norm": 3.5097949504852295, "learning_rate": 9.884393822783382e-06, "loss": 0.6146, "step": 8075 }, { "epoch": 0.09655782589461855, "grad_norm": 7.719720363616943, "learning_rate": 9.884352424940689e-06, "loss": 0.5804, "step": 8076 }, { "epoch": 0.09656978203947919, "grad_norm": 3.4470150470733643, "learning_rate": 9.884311019773889e-06, "loss": 0.6459, "step": 8077 }, { "epoch": 0.09658173818433984, "grad_norm": 9.775936126708984, "learning_rate": 9.884269607283048e-06, "loss": 0.7365, "step": 8078 }, { "epoch": 0.0965936943292005, "grad_norm": 2.141071319580078, "learning_rate": 9.884228187468228e-06, "loss": 0.6395, "step": 8079 }, { "epoch": 0.09660565047406114, "grad_norm": 3.766252279281616, "learning_rate": 9.88418676032949e-06, "loss": 0.668, "step": 8080 }, { "epoch": 0.0966176066189218, "grad_norm": 2.0314126014709473, "learning_rate": 9.884145325866896e-06, "loss": 0.5591, "step": 8081 }, { "epoch": 0.09662956276378244, "grad_norm": 1.9533954858779907, "learning_rate": 9.884103884080509e-06, "loss": 0.6292, "step": 8082 }, { "epoch": 0.0966415189086431, "grad_norm": 2.1235549449920654, "learning_rate": 9.884062434970392e-06, "loss": 0.681, "step": 8083 }, { "epoch": 0.09665347505350375, "grad_norm": 2.3640687465667725, "learning_rate": 9.884020978536607e-06, "loss": 0.6208, "step": 8084 }, { "epoch": 0.0966654311983644, "grad_norm": 1.5737876892089844, "learning_rate": 9.883979514779212e-06, "loss": 0.5489, "step": 8085 }, { "epoch": 0.09667738734322505, "grad_norm": 1.7034554481506348, "learning_rate": 9.883938043698273e-06, "loss": 0.5887, "step": 8086 }, { "epoch": 0.09668934348808571, "grad_norm": 6.513753890991211, "learning_rate": 9.883896565293854e-06, "loss": 0.6413, "step": 8087 }, { "epoch": 0.09670129963294635, "grad_norm": 2.6565847396850586, "learning_rate": 9.883855079566012e-06, "loss": 0.5806, "step": 8088 }, { "epoch": 0.096713255777807, "grad_norm": 2.0682854652404785, "learning_rate": 9.883813586514814e-06, "loss": 0.5663, "step": 8089 }, { "epoch": 0.09672521192266766, "grad_norm": 3.892158031463623, "learning_rate": 9.883772086140317e-06, "loss": 0.5555, "step": 8090 }, { "epoch": 0.0967371680675283, "grad_norm": 2.2044365406036377, "learning_rate": 9.88373057844259e-06, "loss": 0.611, "step": 8091 }, { "epoch": 0.09674912421238896, "grad_norm": 2.216157913208008, "learning_rate": 9.88368906342169e-06, "loss": 0.7208, "step": 8092 }, { "epoch": 0.0967610803572496, "grad_norm": 1.9920268058776855, "learning_rate": 9.88364754107768e-06, "loss": 0.5386, "step": 8093 }, { "epoch": 0.09677303650211026, "grad_norm": 2.101419448852539, "learning_rate": 9.883606011410624e-06, "loss": 0.6473, "step": 8094 }, { "epoch": 0.09678499264697091, "grad_norm": 3.695979595184326, "learning_rate": 9.883564474420585e-06, "loss": 0.655, "step": 8095 }, { "epoch": 0.09679694879183157, "grad_norm": 2.7508468627929688, "learning_rate": 9.883522930107623e-06, "loss": 0.5399, "step": 8096 }, { "epoch": 0.09680890493669221, "grad_norm": 15.944613456726074, "learning_rate": 9.8834813784718e-06, "loss": 0.7207, "step": 8097 }, { "epoch": 0.09682086108155287, "grad_norm": 3.65460467338562, "learning_rate": 9.88343981951318e-06, "loss": 0.5723, "step": 8098 }, { "epoch": 0.09683281722641351, "grad_norm": 2.7693076133728027, "learning_rate": 9.883398253231823e-06, "loss": 0.5335, "step": 8099 }, { "epoch": 0.09684477337127417, "grad_norm": 6.367344856262207, "learning_rate": 9.883356679627796e-06, "loss": 0.6986, "step": 8100 }, { "epoch": 0.09685672951613482, "grad_norm": 2.196565628051758, "learning_rate": 9.883315098701156e-06, "loss": 0.7135, "step": 8101 }, { "epoch": 0.09686868566099546, "grad_norm": 7.278653144836426, "learning_rate": 9.883273510451968e-06, "loss": 0.641, "step": 8102 }, { "epoch": 0.09688064180585612, "grad_norm": 1.910923957824707, "learning_rate": 9.883231914880295e-06, "loss": 0.7216, "step": 8103 }, { "epoch": 0.09689259795071677, "grad_norm": 3.252816915512085, "learning_rate": 9.883190311986197e-06, "loss": 0.6596, "step": 8104 }, { "epoch": 0.09690455409557742, "grad_norm": 1.5194847583770752, "learning_rate": 9.883148701769738e-06, "loss": 0.5807, "step": 8105 }, { "epoch": 0.09691651024043807, "grad_norm": 2.132413387298584, "learning_rate": 9.88310708423098e-06, "loss": 0.5665, "step": 8106 }, { "epoch": 0.09692846638529873, "grad_norm": 4.065640926361084, "learning_rate": 9.883065459369987e-06, "loss": 0.5854, "step": 8107 }, { "epoch": 0.09694042253015937, "grad_norm": 2.586683511734009, "learning_rate": 9.88302382718682e-06, "loss": 0.612, "step": 8108 }, { "epoch": 0.09695237867502003, "grad_norm": 3.5496184825897217, "learning_rate": 9.88298218768154e-06, "loss": 0.5984, "step": 8109 }, { "epoch": 0.09696433481988068, "grad_norm": 2.196901321411133, "learning_rate": 9.88294054085421e-06, "loss": 0.6761, "step": 8110 }, { "epoch": 0.09697629096474134, "grad_norm": 1.7007386684417725, "learning_rate": 9.882898886704897e-06, "loss": 0.598, "step": 8111 }, { "epoch": 0.09698824710960198, "grad_norm": 2.0169625282287598, "learning_rate": 9.882857225233657e-06, "loss": 0.5366, "step": 8112 }, { "epoch": 0.09700020325446262, "grad_norm": 2.241056442260742, "learning_rate": 9.882815556440555e-06, "loss": 0.6107, "step": 8113 }, { "epoch": 0.09701215939932328, "grad_norm": 7.520094394683838, "learning_rate": 9.882773880325656e-06, "loss": 0.6487, "step": 8114 }, { "epoch": 0.09702411554418393, "grad_norm": 1.864242434501648, "learning_rate": 9.882732196889018e-06, "loss": 0.5871, "step": 8115 }, { "epoch": 0.09703607168904459, "grad_norm": 2.47200870513916, "learning_rate": 9.882690506130706e-06, "loss": 0.5745, "step": 8116 }, { "epoch": 0.09704802783390523, "grad_norm": 6.770140171051025, "learning_rate": 9.882648808050784e-06, "loss": 0.6533, "step": 8117 }, { "epoch": 0.09705998397876589, "grad_norm": 2.108832836151123, "learning_rate": 9.88260710264931e-06, "loss": 0.6282, "step": 8118 }, { "epoch": 0.09707194012362654, "grad_norm": 11.769144058227539, "learning_rate": 9.88256538992635e-06, "loss": 0.5645, "step": 8119 }, { "epoch": 0.0970838962684872, "grad_norm": 3.6498425006866455, "learning_rate": 9.882523669881966e-06, "loss": 0.5497, "step": 8120 }, { "epoch": 0.09709585241334784, "grad_norm": 2.7563040256500244, "learning_rate": 9.882481942516222e-06, "loss": 0.6124, "step": 8121 }, { "epoch": 0.0971078085582085, "grad_norm": 2.4625837802886963, "learning_rate": 9.882440207829178e-06, "loss": 0.5738, "step": 8122 }, { "epoch": 0.09711976470306914, "grad_norm": 3.543978214263916, "learning_rate": 9.882398465820897e-06, "loss": 0.7114, "step": 8123 }, { "epoch": 0.09713172084792979, "grad_norm": 2.7218313217163086, "learning_rate": 9.882356716491443e-06, "loss": 0.7213, "step": 8124 }, { "epoch": 0.09714367699279045, "grad_norm": 3.73776912689209, "learning_rate": 9.882314959840877e-06, "loss": 0.6412, "step": 8125 }, { "epoch": 0.09715563313765109, "grad_norm": 2.8282124996185303, "learning_rate": 9.882273195869264e-06, "loss": 0.6265, "step": 8126 }, { "epoch": 0.09716758928251175, "grad_norm": 9.068857192993164, "learning_rate": 9.882231424576662e-06, "loss": 0.5543, "step": 8127 }, { "epoch": 0.0971795454273724, "grad_norm": 1.8715144395828247, "learning_rate": 9.88218964596314e-06, "loss": 0.6798, "step": 8128 }, { "epoch": 0.09719150157223305, "grad_norm": 2.2409534454345703, "learning_rate": 9.882147860028755e-06, "loss": 0.5941, "step": 8129 }, { "epoch": 0.0972034577170937, "grad_norm": 15.910430908203125, "learning_rate": 9.882106066773572e-06, "loss": 0.5787, "step": 8130 }, { "epoch": 0.09721541386195436, "grad_norm": 3.7872564792633057, "learning_rate": 9.882064266197656e-06, "loss": 0.718, "step": 8131 }, { "epoch": 0.097227370006815, "grad_norm": 2.020939350128174, "learning_rate": 9.882022458301064e-06, "loss": 0.6495, "step": 8132 }, { "epoch": 0.09723932615167566, "grad_norm": 3.4734816551208496, "learning_rate": 9.881980643083865e-06, "loss": 0.607, "step": 8133 }, { "epoch": 0.0972512822965363, "grad_norm": 2.686696767807007, "learning_rate": 9.881938820546115e-06, "loss": 0.7051, "step": 8134 }, { "epoch": 0.09726323844139696, "grad_norm": 3.280909538269043, "learning_rate": 9.881896990687882e-06, "loss": 0.6221, "step": 8135 }, { "epoch": 0.09727519458625761, "grad_norm": 2.7923789024353027, "learning_rate": 9.881855153509227e-06, "loss": 0.6873, "step": 8136 }, { "epoch": 0.09728715073111825, "grad_norm": 2.902660846710205, "learning_rate": 9.881813309010216e-06, "loss": 0.5808, "step": 8137 }, { "epoch": 0.09729910687597891, "grad_norm": 2.356475591659546, "learning_rate": 9.881771457190905e-06, "loss": 0.6781, "step": 8138 }, { "epoch": 0.09731106302083956, "grad_norm": 5.352368354797363, "learning_rate": 9.881729598051361e-06, "loss": 0.7204, "step": 8139 }, { "epoch": 0.09732301916570021, "grad_norm": 2.996846914291382, "learning_rate": 9.881687731591645e-06, "loss": 0.571, "step": 8140 }, { "epoch": 0.09733497531056086, "grad_norm": 2.7039403915405273, "learning_rate": 9.881645857811823e-06, "loss": 0.5951, "step": 8141 }, { "epoch": 0.09734693145542152, "grad_norm": 1.8201597929000854, "learning_rate": 9.881603976711955e-06, "loss": 0.6778, "step": 8142 }, { "epoch": 0.09735888760028216, "grad_norm": 4.39415168762207, "learning_rate": 9.881562088292105e-06, "loss": 0.6723, "step": 8143 }, { "epoch": 0.09737084374514282, "grad_norm": 4.029386043548584, "learning_rate": 9.881520192552334e-06, "loss": 0.6585, "step": 8144 }, { "epoch": 0.09738279989000347, "grad_norm": 2.281522274017334, "learning_rate": 9.881478289492707e-06, "loss": 0.5324, "step": 8145 }, { "epoch": 0.09739475603486412, "grad_norm": 6.821662425994873, "learning_rate": 9.881436379113287e-06, "loss": 0.5949, "step": 8146 }, { "epoch": 0.09740671217972477, "grad_norm": 1.5180938243865967, "learning_rate": 9.881394461414134e-06, "loss": 0.5745, "step": 8147 }, { "epoch": 0.09741866832458541, "grad_norm": 2.137707471847534, "learning_rate": 9.881352536395313e-06, "loss": 0.6998, "step": 8148 }, { "epoch": 0.09743062446944607, "grad_norm": 2.83695912361145, "learning_rate": 9.881310604056888e-06, "loss": 0.7138, "step": 8149 }, { "epoch": 0.09744258061430672, "grad_norm": 3.8807358741760254, "learning_rate": 9.881268664398919e-06, "loss": 0.6231, "step": 8150 }, { "epoch": 0.09745453675916738, "grad_norm": 3.7933926582336426, "learning_rate": 9.88122671742147e-06, "loss": 0.6941, "step": 8151 }, { "epoch": 0.09746649290402802, "grad_norm": 2.166343927383423, "learning_rate": 9.881184763124606e-06, "loss": 0.6038, "step": 8152 }, { "epoch": 0.09747844904888868, "grad_norm": 2.019770860671997, "learning_rate": 9.881142801508385e-06, "loss": 0.6239, "step": 8153 }, { "epoch": 0.09749040519374932, "grad_norm": 1.6618664264678955, "learning_rate": 9.881100832572874e-06, "loss": 0.5497, "step": 8154 }, { "epoch": 0.09750236133860998, "grad_norm": 1.7753578424453735, "learning_rate": 9.881058856318137e-06, "loss": 0.5495, "step": 8155 }, { "epoch": 0.09751431748347063, "grad_norm": 2.978029489517212, "learning_rate": 9.881016872744234e-06, "loss": 0.6874, "step": 8156 }, { "epoch": 0.09752627362833129, "grad_norm": 3.735454797744751, "learning_rate": 9.880974881851229e-06, "loss": 0.7439, "step": 8157 }, { "epoch": 0.09753822977319193, "grad_norm": 2.009902000427246, "learning_rate": 9.880932883639183e-06, "loss": 0.6557, "step": 8158 }, { "epoch": 0.09755018591805259, "grad_norm": 5.289623260498047, "learning_rate": 9.880890878108162e-06, "loss": 0.6693, "step": 8159 }, { "epoch": 0.09756214206291323, "grad_norm": 1.5596188306808472, "learning_rate": 9.880848865258228e-06, "loss": 0.6495, "step": 8160 }, { "epoch": 0.09757409820777388, "grad_norm": 2.5764381885528564, "learning_rate": 9.880806845089444e-06, "loss": 0.6405, "step": 8161 }, { "epoch": 0.09758605435263454, "grad_norm": 3.4138731956481934, "learning_rate": 9.880764817601873e-06, "loss": 0.6131, "step": 8162 }, { "epoch": 0.09759801049749518, "grad_norm": 1.8471125364303589, "learning_rate": 9.880722782795578e-06, "loss": 0.6751, "step": 8163 }, { "epoch": 0.09760996664235584, "grad_norm": 2.2482473850250244, "learning_rate": 9.88068074067062e-06, "loss": 0.6894, "step": 8164 }, { "epoch": 0.09762192278721649, "grad_norm": 2.3447370529174805, "learning_rate": 9.880638691227065e-06, "loss": 0.6324, "step": 8165 }, { "epoch": 0.09763387893207714, "grad_norm": 2.9033548831939697, "learning_rate": 9.880596634464975e-06, "loss": 0.5501, "step": 8166 }, { "epoch": 0.09764583507693779, "grad_norm": 4.2954559326171875, "learning_rate": 9.880554570384413e-06, "loss": 0.6042, "step": 8167 }, { "epoch": 0.09765779122179845, "grad_norm": 3.1373496055603027, "learning_rate": 9.880512498985441e-06, "loss": 0.5838, "step": 8168 }, { "epoch": 0.0976697473666591, "grad_norm": 2.5310659408569336, "learning_rate": 9.880470420268125e-06, "loss": 0.6379, "step": 8169 }, { "epoch": 0.09768170351151975, "grad_norm": 5.136441707611084, "learning_rate": 9.880428334232526e-06, "loss": 0.6346, "step": 8170 }, { "epoch": 0.0976936596563804, "grad_norm": 2.4411733150482178, "learning_rate": 9.880386240878707e-06, "loss": 0.5851, "step": 8171 }, { "epoch": 0.09770561580124104, "grad_norm": 3.8504507541656494, "learning_rate": 9.88034414020673e-06, "loss": 0.6408, "step": 8172 }, { "epoch": 0.0977175719461017, "grad_norm": 10.323537826538086, "learning_rate": 9.880302032216662e-06, "loss": 0.5552, "step": 8173 }, { "epoch": 0.09772952809096234, "grad_norm": 1.9172587394714355, "learning_rate": 9.880259916908563e-06, "loss": 0.5518, "step": 8174 }, { "epoch": 0.097741484235823, "grad_norm": 4.460681438446045, "learning_rate": 9.880217794282495e-06, "loss": 0.5423, "step": 8175 }, { "epoch": 0.09775344038068365, "grad_norm": 5.822517395019531, "learning_rate": 9.880175664338526e-06, "loss": 0.6288, "step": 8176 }, { "epoch": 0.0977653965255443, "grad_norm": 2.9857800006866455, "learning_rate": 9.880133527076715e-06, "loss": 0.615, "step": 8177 }, { "epoch": 0.09777735267040495, "grad_norm": 2.4065988063812256, "learning_rate": 9.880091382497125e-06, "loss": 0.6132, "step": 8178 }, { "epoch": 0.09778930881526561, "grad_norm": 2.410177230834961, "learning_rate": 9.880049230599822e-06, "loss": 0.5937, "step": 8179 }, { "epoch": 0.09780126496012626, "grad_norm": 3.802377700805664, "learning_rate": 9.880007071384868e-06, "loss": 0.5971, "step": 8180 }, { "epoch": 0.09781322110498691, "grad_norm": 2.6084039211273193, "learning_rate": 9.879964904852326e-06, "loss": 0.5667, "step": 8181 }, { "epoch": 0.09782517724984756, "grad_norm": 2.798362970352173, "learning_rate": 9.879922731002258e-06, "loss": 0.6381, "step": 8182 }, { "epoch": 0.0978371333947082, "grad_norm": 1.9902819395065308, "learning_rate": 9.879880549834731e-06, "loss": 0.6862, "step": 8183 }, { "epoch": 0.09784908953956886, "grad_norm": 2.2111635208129883, "learning_rate": 9.879838361349804e-06, "loss": 0.6791, "step": 8184 }, { "epoch": 0.0978610456844295, "grad_norm": 2.169788360595703, "learning_rate": 9.879796165547542e-06, "loss": 0.649, "step": 8185 }, { "epoch": 0.09787300182929017, "grad_norm": 2.435709238052368, "learning_rate": 9.87975396242801e-06, "loss": 0.6291, "step": 8186 }, { "epoch": 0.09788495797415081, "grad_norm": 2.1796154975891113, "learning_rate": 9.879711751991268e-06, "loss": 0.6204, "step": 8187 }, { "epoch": 0.09789691411901147, "grad_norm": 3.2554290294647217, "learning_rate": 9.879669534237381e-06, "loss": 0.6271, "step": 8188 }, { "epoch": 0.09790887026387211, "grad_norm": 2.298635721206665, "learning_rate": 9.879627309166412e-06, "loss": 0.6249, "step": 8189 }, { "epoch": 0.09792082640873277, "grad_norm": 2.235766649246216, "learning_rate": 9.879585076778424e-06, "loss": 0.5532, "step": 8190 }, { "epoch": 0.09793278255359342, "grad_norm": 3.8393943309783936, "learning_rate": 9.879542837073482e-06, "loss": 0.6623, "step": 8191 }, { "epoch": 0.09794473869845408, "grad_norm": 6.00780725479126, "learning_rate": 9.879500590051648e-06, "loss": 0.6131, "step": 8192 }, { "epoch": 0.09795669484331472, "grad_norm": 2.2362945079803467, "learning_rate": 9.879458335712985e-06, "loss": 0.5076, "step": 8193 }, { "epoch": 0.09796865098817538, "grad_norm": 2.097346305847168, "learning_rate": 9.879416074057557e-06, "loss": 0.5986, "step": 8194 }, { "epoch": 0.09798060713303602, "grad_norm": 1.5965096950531006, "learning_rate": 9.879373805085428e-06, "loss": 0.6545, "step": 8195 }, { "epoch": 0.09799256327789667, "grad_norm": 2.6586835384368896, "learning_rate": 9.87933152879666e-06, "loss": 0.629, "step": 8196 }, { "epoch": 0.09800451942275733, "grad_norm": 1.7935022115707397, "learning_rate": 9.879289245191317e-06, "loss": 0.7674, "step": 8197 }, { "epoch": 0.09801647556761797, "grad_norm": 5.489677429199219, "learning_rate": 9.879246954269464e-06, "loss": 0.5577, "step": 8198 }, { "epoch": 0.09802843171247863, "grad_norm": 3.366920232772827, "learning_rate": 9.87920465603116e-06, "loss": 0.6739, "step": 8199 }, { "epoch": 0.09804038785733928, "grad_norm": 2.0580432415008545, "learning_rate": 9.879162350476473e-06, "loss": 0.5819, "step": 8200 }, { "epoch": 0.09805234400219993, "grad_norm": 2.092149496078491, "learning_rate": 9.879120037605466e-06, "loss": 0.5773, "step": 8201 }, { "epoch": 0.09806430014706058, "grad_norm": 3.518760919570923, "learning_rate": 9.8790777174182e-06, "loss": 0.6572, "step": 8202 }, { "epoch": 0.09807625629192124, "grad_norm": 2.003483295440674, "learning_rate": 9.879035389914738e-06, "loss": 0.6254, "step": 8203 }, { "epoch": 0.09808821243678188, "grad_norm": 6.156469821929932, "learning_rate": 9.878993055095147e-06, "loss": 0.7261, "step": 8204 }, { "epoch": 0.09810016858164254, "grad_norm": 2.1292734146118164, "learning_rate": 9.878950712959488e-06, "loss": 0.5957, "step": 8205 }, { "epoch": 0.09811212472650319, "grad_norm": 2.7933261394500732, "learning_rate": 9.878908363507825e-06, "loss": 0.6331, "step": 8206 }, { "epoch": 0.09812408087136383, "grad_norm": 1.7483041286468506, "learning_rate": 9.87886600674022e-06, "loss": 0.6939, "step": 8207 }, { "epoch": 0.09813603701622449, "grad_norm": 2.3133509159088135, "learning_rate": 9.87882364265674e-06, "loss": 0.7336, "step": 8208 }, { "epoch": 0.09814799316108513, "grad_norm": 2.3438706398010254, "learning_rate": 9.878781271257447e-06, "loss": 0.7168, "step": 8209 }, { "epoch": 0.09815994930594579, "grad_norm": 2.322606086730957, "learning_rate": 9.878738892542403e-06, "loss": 0.58, "step": 8210 }, { "epoch": 0.09817190545080644, "grad_norm": 2.8234946727752686, "learning_rate": 9.878696506511674e-06, "loss": 0.6449, "step": 8211 }, { "epoch": 0.0981838615956671, "grad_norm": 2.761746644973755, "learning_rate": 9.87865411316532e-06, "loss": 0.6453, "step": 8212 }, { "epoch": 0.09819581774052774, "grad_norm": 3.096421718597412, "learning_rate": 9.878611712503408e-06, "loss": 0.5516, "step": 8213 }, { "epoch": 0.0982077738853884, "grad_norm": 3.7526581287384033, "learning_rate": 9.878569304526001e-06, "loss": 0.5764, "step": 8214 }, { "epoch": 0.09821973003024904, "grad_norm": 2.860227346420288, "learning_rate": 9.87852688923316e-06, "loss": 0.673, "step": 8215 }, { "epoch": 0.0982316861751097, "grad_norm": 4.765320301055908, "learning_rate": 9.878484466624953e-06, "loss": 0.6149, "step": 8216 }, { "epoch": 0.09824364231997035, "grad_norm": 3.65040922164917, "learning_rate": 9.878442036701441e-06, "loss": 0.4914, "step": 8217 }, { "epoch": 0.098255598464831, "grad_norm": 2.896780252456665, "learning_rate": 9.878399599462687e-06, "loss": 0.6405, "step": 8218 }, { "epoch": 0.09826755460969165, "grad_norm": 7.40220308303833, "learning_rate": 9.878357154908756e-06, "loss": 0.701, "step": 8219 }, { "epoch": 0.0982795107545523, "grad_norm": 10.201713562011719, "learning_rate": 9.87831470303971e-06, "loss": 0.645, "step": 8220 }, { "epoch": 0.09829146689941295, "grad_norm": 1.9567800760269165, "learning_rate": 9.878272243855615e-06, "loss": 0.6777, "step": 8221 }, { "epoch": 0.0983034230442736, "grad_norm": 2.102785348892212, "learning_rate": 9.878229777356531e-06, "loss": 0.6526, "step": 8222 }, { "epoch": 0.09831537918913426, "grad_norm": 1.846924066543579, "learning_rate": 9.878187303542528e-06, "loss": 0.7446, "step": 8223 }, { "epoch": 0.0983273353339949, "grad_norm": 1.9607723951339722, "learning_rate": 9.878144822413662e-06, "loss": 0.5773, "step": 8224 }, { "epoch": 0.09833929147885556, "grad_norm": 1.6578572988510132, "learning_rate": 9.878102333970004e-06, "loss": 0.5188, "step": 8225 }, { "epoch": 0.0983512476237162, "grad_norm": 2.5261921882629395, "learning_rate": 9.878059838211611e-06, "loss": 0.6354, "step": 8226 }, { "epoch": 0.09836320376857687, "grad_norm": 1.92131769657135, "learning_rate": 9.878017335138553e-06, "loss": 0.5911, "step": 8227 }, { "epoch": 0.09837515991343751, "grad_norm": 2.5953030586242676, "learning_rate": 9.877974824750888e-06, "loss": 0.6435, "step": 8228 }, { "epoch": 0.09838711605829817, "grad_norm": 1.7591590881347656, "learning_rate": 9.877932307048683e-06, "loss": 0.6234, "step": 8229 }, { "epoch": 0.09839907220315881, "grad_norm": 2.3958451747894287, "learning_rate": 9.877889782032002e-06, "loss": 0.6286, "step": 8230 }, { "epoch": 0.09841102834801946, "grad_norm": 1.5879817008972168, "learning_rate": 9.877847249700907e-06, "loss": 0.5597, "step": 8231 }, { "epoch": 0.09842298449288012, "grad_norm": 2.8717336654663086, "learning_rate": 9.877804710055465e-06, "loss": 0.6196, "step": 8232 }, { "epoch": 0.09843494063774076, "grad_norm": 2.347119092941284, "learning_rate": 9.877762163095735e-06, "loss": 0.634, "step": 8233 }, { "epoch": 0.09844689678260142, "grad_norm": 4.888736248016357, "learning_rate": 9.877719608821784e-06, "loss": 0.6213, "step": 8234 }, { "epoch": 0.09845885292746206, "grad_norm": 10.902676582336426, "learning_rate": 9.877677047233677e-06, "loss": 0.5876, "step": 8235 }, { "epoch": 0.09847080907232272, "grad_norm": 1.688210129737854, "learning_rate": 9.877634478331472e-06, "loss": 0.6508, "step": 8236 }, { "epoch": 0.09848276521718337, "grad_norm": 6.140114784240723, "learning_rate": 9.87759190211524e-06, "loss": 0.6984, "step": 8237 }, { "epoch": 0.09849472136204403, "grad_norm": 3.1737701892852783, "learning_rate": 9.87754931858504e-06, "loss": 0.7066, "step": 8238 }, { "epoch": 0.09850667750690467, "grad_norm": 2.3299427032470703, "learning_rate": 9.877506727740938e-06, "loss": 0.6572, "step": 8239 }, { "epoch": 0.09851863365176533, "grad_norm": 2.1092517375946045, "learning_rate": 9.877464129582996e-06, "loss": 0.5923, "step": 8240 }, { "epoch": 0.09853058979662598, "grad_norm": 2.8277337551116943, "learning_rate": 9.877421524111281e-06, "loss": 0.5854, "step": 8241 }, { "epoch": 0.09854254594148663, "grad_norm": 1.8046956062316895, "learning_rate": 9.877378911325854e-06, "loss": 0.5476, "step": 8242 }, { "epoch": 0.09855450208634728, "grad_norm": 1.5168238878250122, "learning_rate": 9.877336291226783e-06, "loss": 0.4923, "step": 8243 }, { "epoch": 0.09856645823120792, "grad_norm": 2.2537174224853516, "learning_rate": 9.877293663814126e-06, "loss": 0.5551, "step": 8244 }, { "epoch": 0.09857841437606858, "grad_norm": 1.760716199874878, "learning_rate": 9.87725102908795e-06, "loss": 0.5746, "step": 8245 }, { "epoch": 0.09859037052092923, "grad_norm": 3.210188865661621, "learning_rate": 9.877208387048318e-06, "loss": 0.5785, "step": 8246 }, { "epoch": 0.09860232666578989, "grad_norm": 2.6085727214813232, "learning_rate": 9.877165737695295e-06, "loss": 0.6107, "step": 8247 }, { "epoch": 0.09861428281065053, "grad_norm": 23.03697395324707, "learning_rate": 9.877123081028946e-06, "loss": 0.5438, "step": 8248 }, { "epoch": 0.09862623895551119, "grad_norm": 2.341468095779419, "learning_rate": 9.877080417049334e-06, "loss": 0.558, "step": 8249 }, { "epoch": 0.09863819510037183, "grad_norm": 4.283657550811768, "learning_rate": 9.877037745756521e-06, "loss": 0.7138, "step": 8250 }, { "epoch": 0.09865015124523249, "grad_norm": 5.896031379699707, "learning_rate": 9.876995067150573e-06, "loss": 0.6185, "step": 8251 }, { "epoch": 0.09866210739009314, "grad_norm": 2.1447904109954834, "learning_rate": 9.876952381231553e-06, "loss": 0.5574, "step": 8252 }, { "epoch": 0.0986740635349538, "grad_norm": 7.679489612579346, "learning_rate": 9.876909687999528e-06, "loss": 0.6493, "step": 8253 }, { "epoch": 0.09868601967981444, "grad_norm": 10.30223274230957, "learning_rate": 9.876866987454556e-06, "loss": 0.578, "step": 8254 }, { "epoch": 0.09869797582467509, "grad_norm": 5.0103936195373535, "learning_rate": 9.876824279596708e-06, "loss": 0.5886, "step": 8255 }, { "epoch": 0.09870993196953574, "grad_norm": 2.318094253540039, "learning_rate": 9.876781564426042e-06, "loss": 0.6431, "step": 8256 }, { "epoch": 0.09872188811439639, "grad_norm": 2.777979612350464, "learning_rate": 9.876738841942625e-06, "loss": 0.6653, "step": 8257 }, { "epoch": 0.09873384425925705, "grad_norm": 4.210792541503906, "learning_rate": 9.876696112146522e-06, "loss": 0.5758, "step": 8258 }, { "epoch": 0.09874580040411769, "grad_norm": 3.0184478759765625, "learning_rate": 9.876653375037796e-06, "loss": 0.72, "step": 8259 }, { "epoch": 0.09875775654897835, "grad_norm": 2.0014071464538574, "learning_rate": 9.87661063061651e-06, "loss": 0.7056, "step": 8260 }, { "epoch": 0.098769712693839, "grad_norm": 2.9388489723205566, "learning_rate": 9.87656787888273e-06, "loss": 0.6491, "step": 8261 }, { "epoch": 0.09878166883869965, "grad_norm": 3.6762936115264893, "learning_rate": 9.876525119836518e-06, "loss": 0.6838, "step": 8262 }, { "epoch": 0.0987936249835603, "grad_norm": 2.348259687423706, "learning_rate": 9.87648235347794e-06, "loss": 0.7148, "step": 8263 }, { "epoch": 0.09880558112842096, "grad_norm": 3.475623846054077, "learning_rate": 9.876439579807059e-06, "loss": 0.5559, "step": 8264 }, { "epoch": 0.0988175372732816, "grad_norm": 2.000411033630371, "learning_rate": 9.87639679882394e-06, "loss": 0.5966, "step": 8265 }, { "epoch": 0.09882949341814225, "grad_norm": 2.577413558959961, "learning_rate": 9.876354010528647e-06, "loss": 0.5849, "step": 8266 }, { "epoch": 0.0988414495630029, "grad_norm": 3.6599013805389404, "learning_rate": 9.876311214921243e-06, "loss": 0.5969, "step": 8267 }, { "epoch": 0.09885340570786355, "grad_norm": 2.887343406677246, "learning_rate": 9.876268412001792e-06, "loss": 0.6143, "step": 8268 }, { "epoch": 0.09886536185272421, "grad_norm": 4.77907657623291, "learning_rate": 9.87622560177036e-06, "loss": 0.6077, "step": 8269 }, { "epoch": 0.09887731799758485, "grad_norm": 3.076836347579956, "learning_rate": 9.87618278422701e-06, "loss": 0.6451, "step": 8270 }, { "epoch": 0.09888927414244551, "grad_norm": 12.090578079223633, "learning_rate": 9.876139959371808e-06, "loss": 0.6273, "step": 8271 }, { "epoch": 0.09890123028730616, "grad_norm": 2.1058080196380615, "learning_rate": 9.876097127204816e-06, "loss": 0.598, "step": 8272 }, { "epoch": 0.09891318643216682, "grad_norm": 2.789482355117798, "learning_rate": 9.8760542877261e-06, "loss": 0.5789, "step": 8273 }, { "epoch": 0.09892514257702746, "grad_norm": 3.1363861560821533, "learning_rate": 9.876011440935721e-06, "loss": 0.6788, "step": 8274 }, { "epoch": 0.09893709872188812, "grad_norm": 1.852441668510437, "learning_rate": 9.875968586833749e-06, "loss": 0.6101, "step": 8275 }, { "epoch": 0.09894905486674876, "grad_norm": 1.9940307140350342, "learning_rate": 9.87592572542024e-06, "loss": 0.7755, "step": 8276 }, { "epoch": 0.09896101101160942, "grad_norm": 42.760528564453125, "learning_rate": 9.875882856695268e-06, "loss": 0.7241, "step": 8277 }, { "epoch": 0.09897296715647007, "grad_norm": 1.9133962392807007, "learning_rate": 9.87583998065889e-06, "loss": 0.6648, "step": 8278 }, { "epoch": 0.09898492330133071, "grad_norm": 4.840386867523193, "learning_rate": 9.875797097311173e-06, "loss": 0.6391, "step": 8279 }, { "epoch": 0.09899687944619137, "grad_norm": 3.2658193111419678, "learning_rate": 9.875754206652179e-06, "loss": 0.6462, "step": 8280 }, { "epoch": 0.09900883559105202, "grad_norm": 2.1849048137664795, "learning_rate": 9.875711308681977e-06, "loss": 0.6491, "step": 8281 }, { "epoch": 0.09902079173591267, "grad_norm": 2.0841047763824463, "learning_rate": 9.875668403400627e-06, "loss": 0.7019, "step": 8282 }, { "epoch": 0.09903274788077332, "grad_norm": 2.4258720874786377, "learning_rate": 9.875625490808196e-06, "loss": 0.6151, "step": 8283 }, { "epoch": 0.09904470402563398, "grad_norm": 6.691239833831787, "learning_rate": 9.875582570904745e-06, "loss": 0.7018, "step": 8284 }, { "epoch": 0.09905666017049462, "grad_norm": 2.390450954437256, "learning_rate": 9.875539643690343e-06, "loss": 0.5786, "step": 8285 }, { "epoch": 0.09906861631535528, "grad_norm": 1.9811463356018066, "learning_rate": 9.875496709165052e-06, "loss": 0.5946, "step": 8286 }, { "epoch": 0.09908057246021593, "grad_norm": 2.3057165145874023, "learning_rate": 9.875453767328934e-06, "loss": 0.6227, "step": 8287 }, { "epoch": 0.09909252860507659, "grad_norm": 2.7615015506744385, "learning_rate": 9.875410818182057e-06, "loss": 0.6241, "step": 8288 }, { "epoch": 0.09910448474993723, "grad_norm": 3.380786657333374, "learning_rate": 9.875367861724485e-06, "loss": 0.5937, "step": 8289 }, { "epoch": 0.09911644089479787, "grad_norm": 2.1287782192230225, "learning_rate": 9.875324897956279e-06, "loss": 0.5823, "step": 8290 }, { "epoch": 0.09912839703965853, "grad_norm": 2.5239298343658447, "learning_rate": 9.875281926877508e-06, "loss": 0.5732, "step": 8291 }, { "epoch": 0.09914035318451918, "grad_norm": 2.186039686203003, "learning_rate": 9.875238948488237e-06, "loss": 0.5331, "step": 8292 }, { "epoch": 0.09915230932937984, "grad_norm": 3.3450684547424316, "learning_rate": 9.875195962788524e-06, "loss": 0.6379, "step": 8293 }, { "epoch": 0.09916426547424048, "grad_norm": 7.604887962341309, "learning_rate": 9.875152969778438e-06, "loss": 0.6465, "step": 8294 }, { "epoch": 0.09917622161910114, "grad_norm": 2.957421064376831, "learning_rate": 9.875109969458044e-06, "loss": 0.6245, "step": 8295 }, { "epoch": 0.09918817776396179, "grad_norm": 2.0779623985290527, "learning_rate": 9.875066961827404e-06, "loss": 0.5888, "step": 8296 }, { "epoch": 0.09920013390882244, "grad_norm": 3.136798858642578, "learning_rate": 9.875023946886584e-06, "loss": 0.5578, "step": 8297 }, { "epoch": 0.09921209005368309, "grad_norm": 2.3734850883483887, "learning_rate": 9.87498092463565e-06, "loss": 0.6361, "step": 8298 }, { "epoch": 0.09922404619854375, "grad_norm": 3.619598388671875, "learning_rate": 9.874937895074663e-06, "loss": 0.7017, "step": 8299 }, { "epoch": 0.09923600234340439, "grad_norm": 3.6200599670410156, "learning_rate": 9.87489485820369e-06, "loss": 0.6686, "step": 8300 }, { "epoch": 0.09924795848826505, "grad_norm": 3.9179749488830566, "learning_rate": 9.874851814022795e-06, "loss": 0.5658, "step": 8301 }, { "epoch": 0.0992599146331257, "grad_norm": 2.6906816959381104, "learning_rate": 9.87480876253204e-06, "loss": 0.6018, "step": 8302 }, { "epoch": 0.09927187077798634, "grad_norm": 10.362960815429688, "learning_rate": 9.874765703731495e-06, "loss": 0.654, "step": 8303 }, { "epoch": 0.099283826922847, "grad_norm": 4.720951080322266, "learning_rate": 9.87472263762122e-06, "loss": 0.6329, "step": 8304 }, { "epoch": 0.09929578306770764, "grad_norm": 1.7568886280059814, "learning_rate": 9.874679564201282e-06, "loss": 0.5506, "step": 8305 }, { "epoch": 0.0993077392125683, "grad_norm": 2.828716993331909, "learning_rate": 9.874636483471743e-06, "loss": 0.6659, "step": 8306 }, { "epoch": 0.09931969535742895, "grad_norm": 1.4302681684494019, "learning_rate": 9.87459339543267e-06, "loss": 0.743, "step": 8307 }, { "epoch": 0.0993316515022896, "grad_norm": 2.1201491355895996, "learning_rate": 9.874550300084127e-06, "loss": 0.6351, "step": 8308 }, { "epoch": 0.09934360764715025, "grad_norm": 7.703461170196533, "learning_rate": 9.87450719742618e-06, "loss": 0.5651, "step": 8309 }, { "epoch": 0.09935556379201091, "grad_norm": 3.457242012023926, "learning_rate": 9.874464087458891e-06, "loss": 0.6485, "step": 8310 }, { "epoch": 0.09936751993687155, "grad_norm": 2.1713972091674805, "learning_rate": 9.874420970182325e-06, "loss": 0.6306, "step": 8311 }, { "epoch": 0.09937947608173221, "grad_norm": 2.584381103515625, "learning_rate": 9.874377845596547e-06, "loss": 0.7315, "step": 8312 }, { "epoch": 0.09939143222659286, "grad_norm": 2.2367427349090576, "learning_rate": 9.874334713701622e-06, "loss": 0.645, "step": 8313 }, { "epoch": 0.0994033883714535, "grad_norm": 2.8091189861297607, "learning_rate": 9.874291574497618e-06, "loss": 0.6316, "step": 8314 }, { "epoch": 0.09941534451631416, "grad_norm": 1.5824239253997803, "learning_rate": 9.874248427984593e-06, "loss": 0.6071, "step": 8315 }, { "epoch": 0.0994273006611748, "grad_norm": 4.380946159362793, "learning_rate": 9.874205274162618e-06, "loss": 0.5953, "step": 8316 }, { "epoch": 0.09943925680603546, "grad_norm": 2.2272746562957764, "learning_rate": 9.874162113031753e-06, "loss": 0.6064, "step": 8317 }, { "epoch": 0.09945121295089611, "grad_norm": 2.817702531814575, "learning_rate": 9.874118944592064e-06, "loss": 0.656, "step": 8318 }, { "epoch": 0.09946316909575677, "grad_norm": 2.1944057941436768, "learning_rate": 9.874075768843618e-06, "loss": 0.649, "step": 8319 }, { "epoch": 0.09947512524061741, "grad_norm": 8.082052230834961, "learning_rate": 9.874032585786477e-06, "loss": 0.6077, "step": 8320 }, { "epoch": 0.09948708138547807, "grad_norm": 10.719785690307617, "learning_rate": 9.873989395420706e-06, "loss": 0.6751, "step": 8321 }, { "epoch": 0.09949903753033872, "grad_norm": 3.5114388465881348, "learning_rate": 9.873946197746373e-06, "loss": 0.6489, "step": 8322 }, { "epoch": 0.09951099367519937, "grad_norm": 6.680120944976807, "learning_rate": 9.87390299276354e-06, "loss": 0.6382, "step": 8323 }, { "epoch": 0.09952294982006002, "grad_norm": 2.8089098930358887, "learning_rate": 9.87385978047227e-06, "loss": 0.6471, "step": 8324 }, { "epoch": 0.09953490596492066, "grad_norm": 2.2570018768310547, "learning_rate": 9.873816560872631e-06, "loss": 0.6395, "step": 8325 }, { "epoch": 0.09954686210978132, "grad_norm": 1.901051640510559, "learning_rate": 9.873773333964687e-06, "loss": 0.5816, "step": 8326 }, { "epoch": 0.09955881825464197, "grad_norm": 2.527966260910034, "learning_rate": 9.873730099748503e-06, "loss": 0.6547, "step": 8327 }, { "epoch": 0.09957077439950263, "grad_norm": 2.8016228675842285, "learning_rate": 9.873686858224143e-06, "loss": 0.7065, "step": 8328 }, { "epoch": 0.09958273054436327, "grad_norm": 2.4857943058013916, "learning_rate": 9.873643609391671e-06, "loss": 0.6236, "step": 8329 }, { "epoch": 0.09959468668922393, "grad_norm": 2.8137731552124023, "learning_rate": 9.873600353251155e-06, "loss": 0.6518, "step": 8330 }, { "epoch": 0.09960664283408457, "grad_norm": 3.555382013320923, "learning_rate": 9.873557089802657e-06, "loss": 0.5817, "step": 8331 }, { "epoch": 0.09961859897894523, "grad_norm": 4.559761047363281, "learning_rate": 9.873513819046244e-06, "loss": 0.6284, "step": 8332 }, { "epoch": 0.09963055512380588, "grad_norm": 10.865852355957031, "learning_rate": 9.873470540981977e-06, "loss": 0.5553, "step": 8333 }, { "epoch": 0.09964251126866654, "grad_norm": 17.3044376373291, "learning_rate": 9.873427255609927e-06, "loss": 0.6068, "step": 8334 }, { "epoch": 0.09965446741352718, "grad_norm": 5.06025505065918, "learning_rate": 9.873383962930153e-06, "loss": 0.6232, "step": 8335 }, { "epoch": 0.09966642355838784, "grad_norm": 3.2557995319366455, "learning_rate": 9.873340662942723e-06, "loss": 0.6798, "step": 8336 }, { "epoch": 0.09967837970324848, "grad_norm": 5.189642429351807, "learning_rate": 9.873297355647702e-06, "loss": 0.6009, "step": 8337 }, { "epoch": 0.09969033584810913, "grad_norm": 3.0198378562927246, "learning_rate": 9.873254041045153e-06, "loss": 0.6025, "step": 8338 }, { "epoch": 0.09970229199296979, "grad_norm": 2.6017613410949707, "learning_rate": 9.873210719135143e-06, "loss": 0.649, "step": 8339 }, { "epoch": 0.09971424813783043, "grad_norm": 3.190279245376587, "learning_rate": 9.873167389917736e-06, "loss": 0.5635, "step": 8340 }, { "epoch": 0.09972620428269109, "grad_norm": 5.058901786804199, "learning_rate": 9.873124053392997e-06, "loss": 0.5118, "step": 8341 }, { "epoch": 0.09973816042755174, "grad_norm": 2.520937442779541, "learning_rate": 9.873080709560991e-06, "loss": 0.6133, "step": 8342 }, { "epoch": 0.0997501165724124, "grad_norm": 5.121374607086182, "learning_rate": 9.873037358421785e-06, "loss": 0.6437, "step": 8343 }, { "epoch": 0.09976207271727304, "grad_norm": 1.57228422164917, "learning_rate": 9.872993999975442e-06, "loss": 0.6067, "step": 8344 }, { "epoch": 0.0997740288621337, "grad_norm": 2.9443461894989014, "learning_rate": 9.872950634222025e-06, "loss": 0.6589, "step": 8345 }, { "epoch": 0.09978598500699434, "grad_norm": 2.979424476623535, "learning_rate": 9.872907261161602e-06, "loss": 0.6285, "step": 8346 }, { "epoch": 0.099797941151855, "grad_norm": 3.767017364501953, "learning_rate": 9.872863880794238e-06, "loss": 0.6826, "step": 8347 }, { "epoch": 0.09980989729671565, "grad_norm": 3.5922634601593018, "learning_rate": 9.872820493119996e-06, "loss": 0.7728, "step": 8348 }, { "epoch": 0.09982185344157629, "grad_norm": 1.6757243871688843, "learning_rate": 9.872777098138943e-06, "loss": 0.5204, "step": 8349 }, { "epoch": 0.09983380958643695, "grad_norm": 3.897172212600708, "learning_rate": 9.872733695851144e-06, "loss": 0.6371, "step": 8350 }, { "epoch": 0.0998457657312976, "grad_norm": 1.7002894878387451, "learning_rate": 9.872690286256664e-06, "loss": 0.649, "step": 8351 }, { "epoch": 0.09985772187615825, "grad_norm": 6.8296732902526855, "learning_rate": 9.872646869355567e-06, "loss": 0.6428, "step": 8352 }, { "epoch": 0.0998696780210189, "grad_norm": 2.501495599746704, "learning_rate": 9.872603445147917e-06, "loss": 0.5598, "step": 8353 }, { "epoch": 0.09988163416587956, "grad_norm": 3.782649040222168, "learning_rate": 9.872560013633783e-06, "loss": 0.628, "step": 8354 }, { "epoch": 0.0998935903107402, "grad_norm": 2.1735851764678955, "learning_rate": 9.872516574813227e-06, "loss": 0.7176, "step": 8355 }, { "epoch": 0.09990554645560086, "grad_norm": 2.2888333797454834, "learning_rate": 9.872473128686316e-06, "loss": 0.5489, "step": 8356 }, { "epoch": 0.0999175026004615, "grad_norm": 6.975987911224365, "learning_rate": 9.872429675253112e-06, "loss": 0.5578, "step": 8357 }, { "epoch": 0.09992945874532216, "grad_norm": 2.435758590698242, "learning_rate": 9.872386214513685e-06, "loss": 0.5868, "step": 8358 }, { "epoch": 0.09994141489018281, "grad_norm": 2.226369619369507, "learning_rate": 9.872342746468095e-06, "loss": 0.6338, "step": 8359 }, { "epoch": 0.09995337103504347, "grad_norm": 1.869877576828003, "learning_rate": 9.872299271116413e-06, "loss": 0.591, "step": 8360 }, { "epoch": 0.09996532717990411, "grad_norm": 13.803510665893555, "learning_rate": 9.872255788458698e-06, "loss": 0.5951, "step": 8361 }, { "epoch": 0.09997728332476476, "grad_norm": 2.336484909057617, "learning_rate": 9.872212298495018e-06, "loss": 0.6907, "step": 8362 }, { "epoch": 0.09998923946962542, "grad_norm": 6.02130651473999, "learning_rate": 9.87216880122544e-06, "loss": 0.571, "step": 8363 }, { "epoch": 0.10000119561448606, "grad_norm": 2.213456392288208, "learning_rate": 9.872125296650029e-06, "loss": 0.6239, "step": 8364 }, { "epoch": 0.10001315175934672, "grad_norm": 2.07210373878479, "learning_rate": 9.872081784768846e-06, "loss": 0.6698, "step": 8365 }, { "epoch": 0.10002510790420736, "grad_norm": 3.029043436050415, "learning_rate": 9.87203826558196e-06, "loss": 0.6463, "step": 8366 }, { "epoch": 0.10003706404906802, "grad_norm": 3.8493003845214844, "learning_rate": 9.871994739089435e-06, "loss": 0.5956, "step": 8367 }, { "epoch": 0.10004902019392867, "grad_norm": 6.604294776916504, "learning_rate": 9.871951205291337e-06, "loss": 0.6053, "step": 8368 }, { "epoch": 0.10006097633878933, "grad_norm": 5.050609588623047, "learning_rate": 9.87190766418773e-06, "loss": 0.7987, "step": 8369 }, { "epoch": 0.10007293248364997, "grad_norm": 3.37969970703125, "learning_rate": 9.87186411577868e-06, "loss": 0.6631, "step": 8370 }, { "epoch": 0.10008488862851063, "grad_norm": 3.401008367538452, "learning_rate": 9.871820560064254e-06, "loss": 0.7192, "step": 8371 }, { "epoch": 0.10009684477337127, "grad_norm": 2.594393730163574, "learning_rate": 9.871776997044515e-06, "loss": 0.6848, "step": 8372 }, { "epoch": 0.10010880091823192, "grad_norm": 4.820531368255615, "learning_rate": 9.871733426719528e-06, "loss": 0.648, "step": 8373 }, { "epoch": 0.10012075706309258, "grad_norm": 2.5735366344451904, "learning_rate": 9.871689849089361e-06, "loss": 0.6679, "step": 8374 }, { "epoch": 0.10013271320795322, "grad_norm": 5.725105285644531, "learning_rate": 9.871646264154078e-06, "loss": 0.6453, "step": 8375 }, { "epoch": 0.10014466935281388, "grad_norm": 2.853072166442871, "learning_rate": 9.871602671913743e-06, "loss": 0.5897, "step": 8376 }, { "epoch": 0.10015662549767453, "grad_norm": 1.885494351387024, "learning_rate": 9.871559072368421e-06, "loss": 0.6858, "step": 8377 }, { "epoch": 0.10016858164253518, "grad_norm": 1.9030539989471436, "learning_rate": 9.871515465518183e-06, "loss": 0.6059, "step": 8378 }, { "epoch": 0.10018053778739583, "grad_norm": 2.793280601501465, "learning_rate": 9.871471851363087e-06, "loss": 0.6502, "step": 8379 }, { "epoch": 0.10019249393225649, "grad_norm": 1.8676098585128784, "learning_rate": 9.871428229903202e-06, "loss": 0.6221, "step": 8380 }, { "epoch": 0.10020445007711713, "grad_norm": 1.9150904417037964, "learning_rate": 9.871384601138594e-06, "loss": 0.6076, "step": 8381 }, { "epoch": 0.10021640622197779, "grad_norm": 4.24594259262085, "learning_rate": 9.871340965069327e-06, "loss": 0.7748, "step": 8382 }, { "epoch": 0.10022836236683844, "grad_norm": 4.709903240203857, "learning_rate": 9.871297321695468e-06, "loss": 0.6549, "step": 8383 }, { "epoch": 0.1002403185116991, "grad_norm": 2.454446315765381, "learning_rate": 9.87125367101708e-06, "loss": 0.6087, "step": 8384 }, { "epoch": 0.10025227465655974, "grad_norm": 15.98055362701416, "learning_rate": 9.87121001303423e-06, "loss": 0.6931, "step": 8385 }, { "epoch": 0.10026423080142038, "grad_norm": 2.993704319000244, "learning_rate": 9.871166347746983e-06, "loss": 0.6512, "step": 8386 }, { "epoch": 0.10027618694628104, "grad_norm": 2.170793294906616, "learning_rate": 9.871122675155405e-06, "loss": 0.6204, "step": 8387 }, { "epoch": 0.10028814309114169, "grad_norm": 13.40664005279541, "learning_rate": 9.871078995259562e-06, "loss": 0.6519, "step": 8388 }, { "epoch": 0.10030009923600235, "grad_norm": 1.885006308555603, "learning_rate": 9.871035308059518e-06, "loss": 0.5888, "step": 8389 }, { "epoch": 0.10031205538086299, "grad_norm": 2.9115653038024902, "learning_rate": 9.870991613555339e-06, "loss": 0.5717, "step": 8390 }, { "epoch": 0.10032401152572365, "grad_norm": 1.9481664896011353, "learning_rate": 9.87094791174709e-06, "loss": 0.6043, "step": 8391 }, { "epoch": 0.1003359676705843, "grad_norm": 1.6102216243743896, "learning_rate": 9.87090420263484e-06, "loss": 0.6267, "step": 8392 }, { "epoch": 0.10034792381544495, "grad_norm": 2.920767307281494, "learning_rate": 9.87086048621865e-06, "loss": 0.7453, "step": 8393 }, { "epoch": 0.1003598799603056, "grad_norm": 2.39340877532959, "learning_rate": 9.870816762498587e-06, "loss": 0.706, "step": 8394 }, { "epoch": 0.10037183610516626, "grad_norm": 2.6857640743255615, "learning_rate": 9.87077303147472e-06, "loss": 0.5132, "step": 8395 }, { "epoch": 0.1003837922500269, "grad_norm": 1.7268550395965576, "learning_rate": 9.870729293147108e-06, "loss": 0.7042, "step": 8396 }, { "epoch": 0.10039574839488755, "grad_norm": 6.182360649108887, "learning_rate": 9.87068554751582e-06, "loss": 0.6297, "step": 8397 }, { "epoch": 0.1004077045397482, "grad_norm": 2.326770544052124, "learning_rate": 9.870641794580924e-06, "loss": 0.6217, "step": 8398 }, { "epoch": 0.10041966068460885, "grad_norm": 2.3992674350738525, "learning_rate": 9.870598034342481e-06, "loss": 0.5619, "step": 8399 }, { "epoch": 0.10043161682946951, "grad_norm": 4.361082077026367, "learning_rate": 9.870554266800561e-06, "loss": 0.6898, "step": 8400 }, { "epoch": 0.10044357297433015, "grad_norm": 2.7169225215911865, "learning_rate": 9.870510491955227e-06, "loss": 0.6894, "step": 8401 }, { "epoch": 0.10045552911919081, "grad_norm": 5.278655529022217, "learning_rate": 9.870466709806544e-06, "loss": 0.633, "step": 8402 }, { "epoch": 0.10046748526405146, "grad_norm": 2.6966543197631836, "learning_rate": 9.87042292035458e-06, "loss": 0.5796, "step": 8403 }, { "epoch": 0.10047944140891212, "grad_norm": 2.2322142124176025, "learning_rate": 9.870379123599399e-06, "loss": 0.6064, "step": 8404 }, { "epoch": 0.10049139755377276, "grad_norm": 1.9935663938522339, "learning_rate": 9.870335319541069e-06, "loss": 0.6325, "step": 8405 }, { "epoch": 0.10050335369863342, "grad_norm": 3.005647659301758, "learning_rate": 9.870291508179651e-06, "loss": 0.6139, "step": 8406 }, { "epoch": 0.10051530984349406, "grad_norm": 3.0250699520111084, "learning_rate": 9.870247689515215e-06, "loss": 0.5316, "step": 8407 }, { "epoch": 0.10052726598835471, "grad_norm": 1.7786093950271606, "learning_rate": 9.870203863547825e-06, "loss": 0.5772, "step": 8408 }, { "epoch": 0.10053922213321537, "grad_norm": 1.7126517295837402, "learning_rate": 9.870160030277546e-06, "loss": 0.6201, "step": 8409 }, { "epoch": 0.10055117827807601, "grad_norm": 1.8052477836608887, "learning_rate": 9.870116189704448e-06, "loss": 0.5543, "step": 8410 }, { "epoch": 0.10056313442293667, "grad_norm": 2.6013100147247314, "learning_rate": 9.870072341828591e-06, "loss": 0.5142, "step": 8411 }, { "epoch": 0.10057509056779732, "grad_norm": 6.715320587158203, "learning_rate": 9.870028486650042e-06, "loss": 0.6204, "step": 8412 }, { "epoch": 0.10058704671265797, "grad_norm": 2.7711093425750732, "learning_rate": 9.869984624168872e-06, "loss": 0.6637, "step": 8413 }, { "epoch": 0.10059900285751862, "grad_norm": 3.3011157512664795, "learning_rate": 9.869940754385139e-06, "loss": 0.7044, "step": 8414 }, { "epoch": 0.10061095900237928, "grad_norm": 3.316148042678833, "learning_rate": 9.869896877298914e-06, "loss": 0.5842, "step": 8415 }, { "epoch": 0.10062291514723992, "grad_norm": 1.7807365655899048, "learning_rate": 9.869852992910261e-06, "loss": 0.674, "step": 8416 }, { "epoch": 0.10063487129210058, "grad_norm": 1.897425651550293, "learning_rate": 9.869809101219248e-06, "loss": 0.6074, "step": 8417 }, { "epoch": 0.10064682743696123, "grad_norm": 1.8046483993530273, "learning_rate": 9.869765202225936e-06, "loss": 0.597, "step": 8418 }, { "epoch": 0.10065878358182188, "grad_norm": 7.487619876861572, "learning_rate": 9.869721295930395e-06, "loss": 0.6112, "step": 8419 }, { "epoch": 0.10067073972668253, "grad_norm": 8.356141090393066, "learning_rate": 9.86967738233269e-06, "loss": 0.6331, "step": 8420 }, { "epoch": 0.10068269587154317, "grad_norm": 2.4147980213165283, "learning_rate": 9.869633461432885e-06, "loss": 0.6985, "step": 8421 }, { "epoch": 0.10069465201640383, "grad_norm": 3.2369675636291504, "learning_rate": 9.86958953323105e-06, "loss": 0.6947, "step": 8422 }, { "epoch": 0.10070660816126448, "grad_norm": 3.227762222290039, "learning_rate": 9.869545597727247e-06, "loss": 0.6426, "step": 8423 }, { "epoch": 0.10071856430612514, "grad_norm": 4.359533786773682, "learning_rate": 9.869501654921542e-06, "loss": 0.6079, "step": 8424 }, { "epoch": 0.10073052045098578, "grad_norm": 2.0264158248901367, "learning_rate": 9.869457704814003e-06, "loss": 0.5824, "step": 8425 }, { "epoch": 0.10074247659584644, "grad_norm": 23.5228271484375, "learning_rate": 9.869413747404693e-06, "loss": 0.5904, "step": 8426 }, { "epoch": 0.10075443274070708, "grad_norm": 2.4983551502227783, "learning_rate": 9.869369782693682e-06, "loss": 0.5684, "step": 8427 }, { "epoch": 0.10076638888556774, "grad_norm": 4.009566783905029, "learning_rate": 9.869325810681033e-06, "loss": 0.6863, "step": 8428 }, { "epoch": 0.10077834503042839, "grad_norm": 2.4888546466827393, "learning_rate": 9.869281831366812e-06, "loss": 0.6426, "step": 8429 }, { "epoch": 0.10079030117528905, "grad_norm": 3.1252589225769043, "learning_rate": 9.869237844751088e-06, "loss": 0.6941, "step": 8430 }, { "epoch": 0.10080225732014969, "grad_norm": 8.091858863830566, "learning_rate": 9.869193850833922e-06, "loss": 0.5395, "step": 8431 }, { "epoch": 0.10081421346501034, "grad_norm": 4.016855716705322, "learning_rate": 9.869149849615382e-06, "loss": 0.6517, "step": 8432 }, { "epoch": 0.100826169609871, "grad_norm": 2.014024496078491, "learning_rate": 9.869105841095536e-06, "loss": 0.5912, "step": 8433 }, { "epoch": 0.10083812575473164, "grad_norm": 4.3577656745910645, "learning_rate": 9.869061825274448e-06, "loss": 0.6245, "step": 8434 }, { "epoch": 0.1008500818995923, "grad_norm": 2.4208874702453613, "learning_rate": 9.869017802152185e-06, "loss": 0.5499, "step": 8435 }, { "epoch": 0.10086203804445294, "grad_norm": 8.255447387695312, "learning_rate": 9.868973771728812e-06, "loss": 0.6816, "step": 8436 }, { "epoch": 0.1008739941893136, "grad_norm": 6.385929107666016, "learning_rate": 9.868929734004397e-06, "loss": 0.6194, "step": 8437 }, { "epoch": 0.10088595033417425, "grad_norm": 2.182781934738159, "learning_rate": 9.868885688979003e-06, "loss": 0.6713, "step": 8438 }, { "epoch": 0.1008979064790349, "grad_norm": 4.410092830657959, "learning_rate": 9.868841636652697e-06, "loss": 0.5876, "step": 8439 }, { "epoch": 0.10090986262389555, "grad_norm": 2.0375733375549316, "learning_rate": 9.868797577025547e-06, "loss": 0.5628, "step": 8440 }, { "epoch": 0.10092181876875621, "grad_norm": 8.54712963104248, "learning_rate": 9.868753510097618e-06, "loss": 0.5802, "step": 8441 }, { "epoch": 0.10093377491361685, "grad_norm": 1.5530840158462524, "learning_rate": 9.868709435868976e-06, "loss": 0.6399, "step": 8442 }, { "epoch": 0.10094573105847751, "grad_norm": 1.581055998802185, "learning_rate": 9.868665354339684e-06, "loss": 0.5202, "step": 8443 }, { "epoch": 0.10095768720333816, "grad_norm": 3.0450849533081055, "learning_rate": 9.868621265509814e-06, "loss": 0.6207, "step": 8444 }, { "epoch": 0.1009696433481988, "grad_norm": 58.96699905395508, "learning_rate": 9.868577169379428e-06, "loss": 0.6087, "step": 8445 }, { "epoch": 0.10098159949305946, "grad_norm": 2.573235511779785, "learning_rate": 9.868533065948593e-06, "loss": 0.5723, "step": 8446 }, { "epoch": 0.1009935556379201, "grad_norm": 3.8333170413970947, "learning_rate": 9.868488955217375e-06, "loss": 0.5969, "step": 8447 }, { "epoch": 0.10100551178278076, "grad_norm": 2.4542441368103027, "learning_rate": 9.868444837185843e-06, "loss": 0.6949, "step": 8448 }, { "epoch": 0.10101746792764141, "grad_norm": 2.1626336574554443, "learning_rate": 9.868400711854058e-06, "loss": 0.666, "step": 8449 }, { "epoch": 0.10102942407250207, "grad_norm": 6.519423484802246, "learning_rate": 9.86835657922209e-06, "loss": 0.6335, "step": 8450 }, { "epoch": 0.10104138021736271, "grad_norm": 4.111865520477295, "learning_rate": 9.868312439290004e-06, "loss": 0.6623, "step": 8451 }, { "epoch": 0.10105333636222337, "grad_norm": 3.2354025840759277, "learning_rate": 9.868268292057866e-06, "loss": 0.6361, "step": 8452 }, { "epoch": 0.10106529250708401, "grad_norm": 2.5113275051116943, "learning_rate": 9.868224137525742e-06, "loss": 0.6829, "step": 8453 }, { "epoch": 0.10107724865194467, "grad_norm": 2.765580892562866, "learning_rate": 9.868179975693699e-06, "loss": 0.5911, "step": 8454 }, { "epoch": 0.10108920479680532, "grad_norm": 3.343940258026123, "learning_rate": 9.868135806561801e-06, "loss": 0.7435, "step": 8455 }, { "epoch": 0.10110116094166596, "grad_norm": 8.980149269104004, "learning_rate": 9.868091630130119e-06, "loss": 0.6798, "step": 8456 }, { "epoch": 0.10111311708652662, "grad_norm": 4.4499592781066895, "learning_rate": 9.868047446398714e-06, "loss": 0.705, "step": 8457 }, { "epoch": 0.10112507323138727, "grad_norm": 9.00420093536377, "learning_rate": 9.868003255367655e-06, "loss": 0.7237, "step": 8458 }, { "epoch": 0.10113702937624792, "grad_norm": 3.1370158195495605, "learning_rate": 9.867959057037009e-06, "loss": 0.6648, "step": 8459 }, { "epoch": 0.10114898552110857, "grad_norm": 9.812984466552734, "learning_rate": 9.867914851406842e-06, "loss": 0.5207, "step": 8460 }, { "epoch": 0.10116094166596923, "grad_norm": 3.0635581016540527, "learning_rate": 9.867870638477217e-06, "loss": 0.5484, "step": 8461 }, { "epoch": 0.10117289781082987, "grad_norm": 4.904325008392334, "learning_rate": 9.867826418248203e-06, "loss": 0.5888, "step": 8462 }, { "epoch": 0.10118485395569053, "grad_norm": 3.1300973892211914, "learning_rate": 9.867782190719866e-06, "loss": 0.6577, "step": 8463 }, { "epoch": 0.10119681010055118, "grad_norm": 6.462874889373779, "learning_rate": 9.867737955892273e-06, "loss": 0.612, "step": 8464 }, { "epoch": 0.10120876624541184, "grad_norm": 2.3179771900177, "learning_rate": 9.867693713765489e-06, "loss": 0.6221, "step": 8465 }, { "epoch": 0.10122072239027248, "grad_norm": 3.57426381111145, "learning_rate": 9.867649464339583e-06, "loss": 0.5957, "step": 8466 }, { "epoch": 0.10123267853513312, "grad_norm": 3.5050675868988037, "learning_rate": 9.867605207614617e-06, "loss": 0.6591, "step": 8467 }, { "epoch": 0.10124463467999378, "grad_norm": 3.2200231552124023, "learning_rate": 9.86756094359066e-06, "loss": 0.6541, "step": 8468 }, { "epoch": 0.10125659082485443, "grad_norm": 2.372457504272461, "learning_rate": 9.867516672267779e-06, "loss": 0.5606, "step": 8469 }, { "epoch": 0.10126854696971509, "grad_norm": 1.6698393821716309, "learning_rate": 9.86747239364604e-06, "loss": 0.7161, "step": 8470 }, { "epoch": 0.10128050311457573, "grad_norm": 3.2862350940704346, "learning_rate": 9.867428107725508e-06, "loss": 0.6545, "step": 8471 }, { "epoch": 0.10129245925943639, "grad_norm": 11.13688850402832, "learning_rate": 9.867383814506248e-06, "loss": 0.5542, "step": 8472 }, { "epoch": 0.10130441540429704, "grad_norm": 3.424609661102295, "learning_rate": 9.867339513988332e-06, "loss": 0.6427, "step": 8473 }, { "epoch": 0.1013163715491577, "grad_norm": 2.5695810317993164, "learning_rate": 9.867295206171822e-06, "loss": 0.6418, "step": 8474 }, { "epoch": 0.10132832769401834, "grad_norm": 16.932832717895508, "learning_rate": 9.867250891056786e-06, "loss": 0.6365, "step": 8475 }, { "epoch": 0.101340283838879, "grad_norm": 3.830599308013916, "learning_rate": 9.867206568643288e-06, "loss": 0.5806, "step": 8476 }, { "epoch": 0.10135223998373964, "grad_norm": 2.0650830268859863, "learning_rate": 9.867162238931398e-06, "loss": 0.6417, "step": 8477 }, { "epoch": 0.1013641961286003, "grad_norm": 3.950514316558838, "learning_rate": 9.867117901921181e-06, "loss": 0.6557, "step": 8478 }, { "epoch": 0.10137615227346095, "grad_norm": 17.143278121948242, "learning_rate": 9.867073557612703e-06, "loss": 0.5666, "step": 8479 }, { "epoch": 0.10138810841832159, "grad_norm": 6.213132381439209, "learning_rate": 9.867029206006031e-06, "loss": 0.6158, "step": 8480 }, { "epoch": 0.10140006456318225, "grad_norm": 2.5161514282226562, "learning_rate": 9.86698484710123e-06, "loss": 0.6647, "step": 8481 }, { "epoch": 0.1014120207080429, "grad_norm": 2.895951271057129, "learning_rate": 9.86694048089837e-06, "loss": 0.6289, "step": 8482 }, { "epoch": 0.10142397685290355, "grad_norm": 4.6300249099731445, "learning_rate": 9.866896107397516e-06, "loss": 0.7107, "step": 8483 }, { "epoch": 0.1014359329977642, "grad_norm": 2.029090642929077, "learning_rate": 9.866851726598731e-06, "loss": 0.5698, "step": 8484 }, { "epoch": 0.10144788914262486, "grad_norm": 2.653939723968506, "learning_rate": 9.866807338502087e-06, "loss": 0.6447, "step": 8485 }, { "epoch": 0.1014598452874855, "grad_norm": 2.443223237991333, "learning_rate": 9.866762943107649e-06, "loss": 0.5968, "step": 8486 }, { "epoch": 0.10147180143234616, "grad_norm": 3.900740623474121, "learning_rate": 9.866718540415481e-06, "loss": 0.6887, "step": 8487 }, { "epoch": 0.1014837575772068, "grad_norm": 2.57814359664917, "learning_rate": 9.866674130425651e-06, "loss": 0.6933, "step": 8488 }, { "epoch": 0.10149571372206746, "grad_norm": 11.931685447692871, "learning_rate": 9.866629713138225e-06, "loss": 0.5956, "step": 8489 }, { "epoch": 0.10150766986692811, "grad_norm": 2.7185919284820557, "learning_rate": 9.866585288553271e-06, "loss": 0.7044, "step": 8490 }, { "epoch": 0.10151962601178875, "grad_norm": 1.6777942180633545, "learning_rate": 9.866540856670856e-06, "loss": 0.6421, "step": 8491 }, { "epoch": 0.10153158215664941, "grad_norm": 2.6913654804229736, "learning_rate": 9.866496417491047e-06, "loss": 0.6084, "step": 8492 }, { "epoch": 0.10154353830151006, "grad_norm": 7.644339561462402, "learning_rate": 9.866451971013907e-06, "loss": 0.7235, "step": 8493 }, { "epoch": 0.10155549444637071, "grad_norm": 5.529134273529053, "learning_rate": 9.866407517239505e-06, "loss": 0.5476, "step": 8494 }, { "epoch": 0.10156745059123136, "grad_norm": 4.576261043548584, "learning_rate": 9.86636305616791e-06, "loss": 0.569, "step": 8495 }, { "epoch": 0.10157940673609202, "grad_norm": 2.742349624633789, "learning_rate": 9.866318587799185e-06, "loss": 0.7016, "step": 8496 }, { "epoch": 0.10159136288095266, "grad_norm": 3.505256175994873, "learning_rate": 9.866274112133396e-06, "loss": 0.6863, "step": 8497 }, { "epoch": 0.10160331902581332, "grad_norm": 1.677038311958313, "learning_rate": 9.866229629170613e-06, "loss": 0.6975, "step": 8498 }, { "epoch": 0.10161527517067397, "grad_norm": 3.19919753074646, "learning_rate": 9.866185138910902e-06, "loss": 0.5945, "step": 8499 }, { "epoch": 0.10162723131553462, "grad_norm": 5.6127400398254395, "learning_rate": 9.86614064135433e-06, "loss": 0.6488, "step": 8500 }, { "epoch": 0.10163918746039527, "grad_norm": 3.0712695121765137, "learning_rate": 9.86609613650096e-06, "loss": 0.651, "step": 8501 }, { "epoch": 0.10165114360525593, "grad_norm": 2.3321774005889893, "learning_rate": 9.866051624350863e-06, "loss": 0.5387, "step": 8502 }, { "epoch": 0.10166309975011657, "grad_norm": 3.1818952560424805, "learning_rate": 9.866007104904105e-06, "loss": 0.68, "step": 8503 }, { "epoch": 0.10167505589497722, "grad_norm": 2.482150077819824, "learning_rate": 9.86596257816075e-06, "loss": 0.6938, "step": 8504 }, { "epoch": 0.10168701203983788, "grad_norm": 2.7882626056671143, "learning_rate": 9.86591804412087e-06, "loss": 0.5851, "step": 8505 }, { "epoch": 0.10169896818469852, "grad_norm": 3.6683695316314697, "learning_rate": 9.865873502784527e-06, "loss": 0.6513, "step": 8506 }, { "epoch": 0.10171092432955918, "grad_norm": 4.0045671463012695, "learning_rate": 9.865828954151788e-06, "loss": 0.6058, "step": 8507 }, { "epoch": 0.10172288047441982, "grad_norm": 9.62979507446289, "learning_rate": 9.865784398222722e-06, "loss": 0.5832, "step": 8508 }, { "epoch": 0.10173483661928048, "grad_norm": 3.3120810985565186, "learning_rate": 9.865739834997396e-06, "loss": 0.5882, "step": 8509 }, { "epoch": 0.10174679276414113, "grad_norm": 7.3277788162231445, "learning_rate": 9.865695264475876e-06, "loss": 0.5655, "step": 8510 }, { "epoch": 0.10175874890900179, "grad_norm": 3.0862667560577393, "learning_rate": 9.865650686658229e-06, "loss": 0.6695, "step": 8511 }, { "epoch": 0.10177070505386243, "grad_norm": 2.64388108253479, "learning_rate": 9.865606101544518e-06, "loss": 0.5775, "step": 8512 }, { "epoch": 0.10178266119872309, "grad_norm": 2.4916775226593018, "learning_rate": 9.865561509134817e-06, "loss": 0.6599, "step": 8513 }, { "epoch": 0.10179461734358373, "grad_norm": 3.600121021270752, "learning_rate": 9.865516909429187e-06, "loss": 0.6791, "step": 8514 }, { "epoch": 0.10180657348844438, "grad_norm": 37.124855041503906, "learning_rate": 9.865472302427699e-06, "loss": 0.6164, "step": 8515 }, { "epoch": 0.10181852963330504, "grad_norm": 3.001885414123535, "learning_rate": 9.865427688130416e-06, "loss": 0.627, "step": 8516 }, { "epoch": 0.10183048577816568, "grad_norm": 2.406872510910034, "learning_rate": 9.865383066537408e-06, "loss": 0.5981, "step": 8517 }, { "epoch": 0.10184244192302634, "grad_norm": 2.09121036529541, "learning_rate": 9.865338437648741e-06, "loss": 0.6292, "step": 8518 }, { "epoch": 0.10185439806788699, "grad_norm": 2.2349257469177246, "learning_rate": 9.865293801464482e-06, "loss": 0.6535, "step": 8519 }, { "epoch": 0.10186635421274765, "grad_norm": 8.054917335510254, "learning_rate": 9.865249157984696e-06, "loss": 0.5873, "step": 8520 }, { "epoch": 0.10187831035760829, "grad_norm": 17.3435115814209, "learning_rate": 9.865204507209454e-06, "loss": 0.6684, "step": 8521 }, { "epoch": 0.10189026650246895, "grad_norm": 20.03379249572754, "learning_rate": 9.86515984913882e-06, "loss": 0.5835, "step": 8522 }, { "epoch": 0.1019022226473296, "grad_norm": 7.909969329833984, "learning_rate": 9.86511518377286e-06, "loss": 0.6248, "step": 8523 }, { "epoch": 0.10191417879219025, "grad_norm": 3.2832260131835938, "learning_rate": 9.865070511111643e-06, "loss": 0.683, "step": 8524 }, { "epoch": 0.1019261349370509, "grad_norm": 2.185530424118042, "learning_rate": 9.865025831155235e-06, "loss": 0.5759, "step": 8525 }, { "epoch": 0.10193809108191154, "grad_norm": 6.215433597564697, "learning_rate": 9.864981143903704e-06, "loss": 0.6548, "step": 8526 }, { "epoch": 0.1019500472267722, "grad_norm": 3.7563674449920654, "learning_rate": 9.864936449357117e-06, "loss": 0.6315, "step": 8527 }, { "epoch": 0.10196200337163284, "grad_norm": 1.7058367729187012, "learning_rate": 9.86489174751554e-06, "loss": 0.5823, "step": 8528 }, { "epoch": 0.1019739595164935, "grad_norm": 2.441605567932129, "learning_rate": 9.864847038379042e-06, "loss": 0.5164, "step": 8529 }, { "epoch": 0.10198591566135415, "grad_norm": 1.8525785207748413, "learning_rate": 9.864802321947687e-06, "loss": 0.6458, "step": 8530 }, { "epoch": 0.10199787180621481, "grad_norm": 1.844197392463684, "learning_rate": 9.864757598221544e-06, "loss": 0.6487, "step": 8531 }, { "epoch": 0.10200982795107545, "grad_norm": 4.892889022827148, "learning_rate": 9.864712867200678e-06, "loss": 0.667, "step": 8532 }, { "epoch": 0.10202178409593611, "grad_norm": 3.8626317977905273, "learning_rate": 9.86466812888516e-06, "loss": 0.6801, "step": 8533 }, { "epoch": 0.10203374024079676, "grad_norm": 2.797569513320923, "learning_rate": 9.864623383275053e-06, "loss": 0.4694, "step": 8534 }, { "epoch": 0.10204569638565741, "grad_norm": 2.3902952671051025, "learning_rate": 9.864578630370429e-06, "loss": 0.6498, "step": 8535 }, { "epoch": 0.10205765253051806, "grad_norm": 11.614766120910645, "learning_rate": 9.86453387017135e-06, "loss": 0.613, "step": 8536 }, { "epoch": 0.10206960867537872, "grad_norm": 1.8661549091339111, "learning_rate": 9.864489102677886e-06, "loss": 0.6954, "step": 8537 }, { "epoch": 0.10208156482023936, "grad_norm": 5.738970756530762, "learning_rate": 9.864444327890102e-06, "loss": 0.7233, "step": 8538 }, { "epoch": 0.10209352096510001, "grad_norm": 2.4481804370880127, "learning_rate": 9.864399545808067e-06, "loss": 0.549, "step": 8539 }, { "epoch": 0.10210547710996067, "grad_norm": 1.6743348836898804, "learning_rate": 9.864354756431848e-06, "loss": 0.6287, "step": 8540 }, { "epoch": 0.10211743325482131, "grad_norm": 3.964491844177246, "learning_rate": 9.86430995976151e-06, "loss": 0.656, "step": 8541 }, { "epoch": 0.10212938939968197, "grad_norm": 1.6671711206436157, "learning_rate": 9.864265155797125e-06, "loss": 0.576, "step": 8542 }, { "epoch": 0.10214134554454261, "grad_norm": 2.1994073390960693, "learning_rate": 9.864220344538755e-06, "loss": 0.5343, "step": 8543 }, { "epoch": 0.10215330168940327, "grad_norm": 2.2884464263916016, "learning_rate": 9.86417552598647e-06, "loss": 0.6681, "step": 8544 }, { "epoch": 0.10216525783426392, "grad_norm": 1.7006745338439941, "learning_rate": 9.864130700140336e-06, "loss": 0.5831, "step": 8545 }, { "epoch": 0.10217721397912458, "grad_norm": 6.4492340087890625, "learning_rate": 9.86408586700042e-06, "loss": 0.5449, "step": 8546 }, { "epoch": 0.10218917012398522, "grad_norm": 1.661747932434082, "learning_rate": 9.864041026566792e-06, "loss": 0.624, "step": 8547 }, { "epoch": 0.10220112626884588, "grad_norm": 1.4810409545898438, "learning_rate": 9.863996178839517e-06, "loss": 0.625, "step": 8548 }, { "epoch": 0.10221308241370652, "grad_norm": 2.8088488578796387, "learning_rate": 9.863951323818661e-06, "loss": 0.6554, "step": 8549 }, { "epoch": 0.10222503855856717, "grad_norm": 1.933547854423523, "learning_rate": 9.863906461504294e-06, "loss": 0.6085, "step": 8550 }, { "epoch": 0.10223699470342783, "grad_norm": 2.5905981063842773, "learning_rate": 9.86386159189648e-06, "loss": 0.5502, "step": 8551 }, { "epoch": 0.10224895084828847, "grad_norm": 1.934030294418335, "learning_rate": 9.86381671499529e-06, "loss": 0.6068, "step": 8552 }, { "epoch": 0.10226090699314913, "grad_norm": 2.3888871669769287, "learning_rate": 9.863771830800788e-06, "loss": 0.6412, "step": 8553 }, { "epoch": 0.10227286313800978, "grad_norm": 1.6220405101776123, "learning_rate": 9.863726939313044e-06, "loss": 0.5458, "step": 8554 }, { "epoch": 0.10228481928287043, "grad_norm": 9.007343292236328, "learning_rate": 9.863682040532125e-06, "loss": 0.6525, "step": 8555 }, { "epoch": 0.10229677542773108, "grad_norm": 2.0416650772094727, "learning_rate": 9.863637134458098e-06, "loss": 0.6057, "step": 8556 }, { "epoch": 0.10230873157259174, "grad_norm": 1.7229928970336914, "learning_rate": 9.863592221091027e-06, "loss": 0.5634, "step": 8557 }, { "epoch": 0.10232068771745238, "grad_norm": 1.9104948043823242, "learning_rate": 9.863547300430985e-06, "loss": 0.6604, "step": 8558 }, { "epoch": 0.10233264386231304, "grad_norm": 3.466309070587158, "learning_rate": 9.863502372478033e-06, "loss": 0.683, "step": 8559 }, { "epoch": 0.10234460000717369, "grad_norm": 3.2492406368255615, "learning_rate": 9.863457437232245e-06, "loss": 0.5816, "step": 8560 }, { "epoch": 0.10235655615203434, "grad_norm": 1.844925045967102, "learning_rate": 9.863412494693684e-06, "loss": 0.5907, "step": 8561 }, { "epoch": 0.10236851229689499, "grad_norm": 2.486351251602173, "learning_rate": 9.86336754486242e-06, "loss": 0.6425, "step": 8562 }, { "epoch": 0.10238046844175563, "grad_norm": 2.439971446990967, "learning_rate": 9.863322587738518e-06, "loss": 0.6817, "step": 8563 }, { "epoch": 0.10239242458661629, "grad_norm": 2.032238483428955, "learning_rate": 9.863277623322047e-06, "loss": 0.5666, "step": 8564 }, { "epoch": 0.10240438073147694, "grad_norm": 3.257633686065674, "learning_rate": 9.863232651613073e-06, "loss": 0.5324, "step": 8565 }, { "epoch": 0.1024163368763376, "grad_norm": 1.9920040369033813, "learning_rate": 9.863187672611665e-06, "loss": 0.669, "step": 8566 }, { "epoch": 0.10242829302119824, "grad_norm": 5.07574462890625, "learning_rate": 9.863142686317889e-06, "loss": 0.6065, "step": 8567 }, { "epoch": 0.1024402491660589, "grad_norm": 2.9556853771209717, "learning_rate": 9.863097692731814e-06, "loss": 0.6346, "step": 8568 }, { "epoch": 0.10245220531091954, "grad_norm": 2.0267910957336426, "learning_rate": 9.863052691853507e-06, "loss": 0.6938, "step": 8569 }, { "epoch": 0.1024641614557802, "grad_norm": 3.7594730854034424, "learning_rate": 9.863007683683034e-06, "loss": 0.5918, "step": 8570 }, { "epoch": 0.10247611760064085, "grad_norm": 3.0206995010375977, "learning_rate": 9.862962668220465e-06, "loss": 0.6913, "step": 8571 }, { "epoch": 0.1024880737455015, "grad_norm": 2.668516159057617, "learning_rate": 9.862917645465865e-06, "loss": 0.6701, "step": 8572 }, { "epoch": 0.10250002989036215, "grad_norm": 2.9956700801849365, "learning_rate": 9.862872615419303e-06, "loss": 0.5985, "step": 8573 }, { "epoch": 0.1025119860352228, "grad_norm": 2.097180128097534, "learning_rate": 9.862827578080847e-06, "loss": 0.5953, "step": 8574 }, { "epoch": 0.10252394218008345, "grad_norm": 3.4309194087982178, "learning_rate": 9.862782533450563e-06, "loss": 0.614, "step": 8575 }, { "epoch": 0.1025358983249441, "grad_norm": 3.053821086883545, "learning_rate": 9.862737481528521e-06, "loss": 0.7139, "step": 8576 }, { "epoch": 0.10254785446980476, "grad_norm": 1.8679416179656982, "learning_rate": 9.862692422314786e-06, "loss": 0.6605, "step": 8577 }, { "epoch": 0.1025598106146654, "grad_norm": 2.5112783908843994, "learning_rate": 9.862647355809425e-06, "loss": 0.7234, "step": 8578 }, { "epoch": 0.10257176675952606, "grad_norm": 3.0254335403442383, "learning_rate": 9.862602282012508e-06, "loss": 0.6474, "step": 8579 }, { "epoch": 0.1025837229043867, "grad_norm": 4.616170883178711, "learning_rate": 9.862557200924103e-06, "loss": 0.5945, "step": 8580 }, { "epoch": 0.10259567904924737, "grad_norm": 7.6978983879089355, "learning_rate": 9.862512112544274e-06, "loss": 0.6392, "step": 8581 }, { "epoch": 0.10260763519410801, "grad_norm": 1.6895359754562378, "learning_rate": 9.862467016873092e-06, "loss": 0.6121, "step": 8582 }, { "epoch": 0.10261959133896867, "grad_norm": 2.463627338409424, "learning_rate": 9.862421913910623e-06, "loss": 0.6078, "step": 8583 }, { "epoch": 0.10263154748382931, "grad_norm": 1.8282978534698486, "learning_rate": 9.862376803656934e-06, "loss": 0.5919, "step": 8584 }, { "epoch": 0.10264350362868997, "grad_norm": 1.9798647165298462, "learning_rate": 9.862331686112095e-06, "loss": 0.6542, "step": 8585 }, { "epoch": 0.10265545977355062, "grad_norm": 77.09949493408203, "learning_rate": 9.862286561276173e-06, "loss": 0.6561, "step": 8586 }, { "epoch": 0.10266741591841126, "grad_norm": 2.472736358642578, "learning_rate": 9.862241429149233e-06, "loss": 0.6051, "step": 8587 }, { "epoch": 0.10267937206327192, "grad_norm": 3.276127576828003, "learning_rate": 9.862196289731346e-06, "loss": 0.6505, "step": 8588 }, { "epoch": 0.10269132820813257, "grad_norm": 4.110748767852783, "learning_rate": 9.862151143022578e-06, "loss": 0.7582, "step": 8589 }, { "epoch": 0.10270328435299322, "grad_norm": 2.0848288536071777, "learning_rate": 9.862105989022998e-06, "loss": 0.6657, "step": 8590 }, { "epoch": 0.10271524049785387, "grad_norm": 2.500988006591797, "learning_rate": 9.862060827732672e-06, "loss": 0.6436, "step": 8591 }, { "epoch": 0.10272719664271453, "grad_norm": 3.139066219329834, "learning_rate": 9.862015659151669e-06, "loss": 0.5938, "step": 8592 }, { "epoch": 0.10273915278757517, "grad_norm": 2.819973945617676, "learning_rate": 9.861970483280055e-06, "loss": 0.7249, "step": 8593 }, { "epoch": 0.10275110893243583, "grad_norm": 1.7528094053268433, "learning_rate": 9.861925300117901e-06, "loss": 0.6569, "step": 8594 }, { "epoch": 0.10276306507729648, "grad_norm": 2.6729466915130615, "learning_rate": 9.86188010966527e-06, "loss": 0.6009, "step": 8595 }, { "epoch": 0.10277502122215713, "grad_norm": 2.197174549102783, "learning_rate": 9.861834911922235e-06, "loss": 0.6986, "step": 8596 }, { "epoch": 0.10278697736701778, "grad_norm": 2.566784143447876, "learning_rate": 9.86178970688886e-06, "loss": 0.7337, "step": 8597 }, { "epoch": 0.10279893351187842, "grad_norm": 3.561297655105591, "learning_rate": 9.861744494565216e-06, "loss": 0.6829, "step": 8598 }, { "epoch": 0.10281088965673908, "grad_norm": 2.175508499145508, "learning_rate": 9.861699274951367e-06, "loss": 0.6305, "step": 8599 }, { "epoch": 0.10282284580159973, "grad_norm": 1.795798420906067, "learning_rate": 9.861654048047381e-06, "loss": 0.5956, "step": 8600 }, { "epoch": 0.10283480194646039, "grad_norm": 5.3902812004089355, "learning_rate": 9.86160881385333e-06, "loss": 0.6217, "step": 8601 }, { "epoch": 0.10284675809132103, "grad_norm": 2.210371494293213, "learning_rate": 9.86156357236928e-06, "loss": 0.6216, "step": 8602 }, { "epoch": 0.10285871423618169, "grad_norm": 2.199251890182495, "learning_rate": 9.861518323595296e-06, "loss": 0.6977, "step": 8603 }, { "epoch": 0.10287067038104233, "grad_norm": 2.9469220638275146, "learning_rate": 9.861473067531448e-06, "loss": 0.5549, "step": 8604 }, { "epoch": 0.10288262652590299, "grad_norm": 7.792548179626465, "learning_rate": 9.861427804177806e-06, "loss": 0.6378, "step": 8605 }, { "epoch": 0.10289458267076364, "grad_norm": 2.2966036796569824, "learning_rate": 9.861382533534433e-06, "loss": 0.63, "step": 8606 }, { "epoch": 0.1029065388156243, "grad_norm": 1.7001703977584839, "learning_rate": 9.8613372556014e-06, "loss": 0.5731, "step": 8607 }, { "epoch": 0.10291849496048494, "grad_norm": 5.245120525360107, "learning_rate": 9.861291970378776e-06, "loss": 0.6408, "step": 8608 }, { "epoch": 0.10293045110534559, "grad_norm": 2.272282361984253, "learning_rate": 9.861246677866628e-06, "loss": 0.6215, "step": 8609 }, { "epoch": 0.10294240725020624, "grad_norm": 2.1091268062591553, "learning_rate": 9.861201378065022e-06, "loss": 0.601, "step": 8610 }, { "epoch": 0.10295436339506689, "grad_norm": 2.0931167602539062, "learning_rate": 9.861156070974027e-06, "loss": 0.6031, "step": 8611 }, { "epoch": 0.10296631953992755, "grad_norm": 1.8449115753173828, "learning_rate": 9.861110756593713e-06, "loss": 0.6907, "step": 8612 }, { "epoch": 0.10297827568478819, "grad_norm": 2.861177921295166, "learning_rate": 9.861065434924144e-06, "loss": 0.6412, "step": 8613 }, { "epoch": 0.10299023182964885, "grad_norm": 1.9320240020751953, "learning_rate": 9.861020105965391e-06, "loss": 0.5668, "step": 8614 }, { "epoch": 0.1030021879745095, "grad_norm": 3.06352162361145, "learning_rate": 9.860974769717521e-06, "loss": 0.6913, "step": 8615 }, { "epoch": 0.10301414411937015, "grad_norm": 2.519618272781372, "learning_rate": 9.8609294261806e-06, "loss": 0.6988, "step": 8616 }, { "epoch": 0.1030261002642308, "grad_norm": 2.403012275695801, "learning_rate": 9.860884075354703e-06, "loss": 0.5789, "step": 8617 }, { "epoch": 0.10303805640909146, "grad_norm": 1.9348783493041992, "learning_rate": 9.86083871723989e-06, "loss": 0.5669, "step": 8618 }, { "epoch": 0.1030500125539521, "grad_norm": 2.1399385929107666, "learning_rate": 9.86079335183623e-06, "loss": 0.6163, "step": 8619 }, { "epoch": 0.10306196869881276, "grad_norm": 2.477910041809082, "learning_rate": 9.860747979143797e-06, "loss": 0.6178, "step": 8620 }, { "epoch": 0.1030739248436734, "grad_norm": 2.537537097930908, "learning_rate": 9.860702599162652e-06, "loss": 0.6372, "step": 8621 }, { "epoch": 0.10308588098853405, "grad_norm": 2.294264554977417, "learning_rate": 9.860657211892869e-06, "loss": 0.7155, "step": 8622 }, { "epoch": 0.10309783713339471, "grad_norm": 2.43778920173645, "learning_rate": 9.86061181733451e-06, "loss": 0.6383, "step": 8623 }, { "epoch": 0.10310979327825535, "grad_norm": 2.352642297744751, "learning_rate": 9.860566415487649e-06, "loss": 0.58, "step": 8624 }, { "epoch": 0.10312174942311601, "grad_norm": 3.0513179302215576, "learning_rate": 9.86052100635235e-06, "loss": 0.6656, "step": 8625 }, { "epoch": 0.10313370556797666, "grad_norm": 2.236861228942871, "learning_rate": 9.860475589928685e-06, "loss": 0.6379, "step": 8626 }, { "epoch": 0.10314566171283732, "grad_norm": 1.552506685256958, "learning_rate": 9.860430166216716e-06, "loss": 0.5975, "step": 8627 }, { "epoch": 0.10315761785769796, "grad_norm": 9.075822830200195, "learning_rate": 9.860384735216517e-06, "loss": 0.6658, "step": 8628 }, { "epoch": 0.10316957400255862, "grad_norm": 2.138094663619995, "learning_rate": 9.860339296928151e-06, "loss": 0.6158, "step": 8629 }, { "epoch": 0.10318153014741926, "grad_norm": 11.451545715332031, "learning_rate": 9.860293851351692e-06, "loss": 0.6027, "step": 8630 }, { "epoch": 0.10319348629227992, "grad_norm": 4.59876823425293, "learning_rate": 9.860248398487205e-06, "loss": 0.7103, "step": 8631 }, { "epoch": 0.10320544243714057, "grad_norm": 3.1474955081939697, "learning_rate": 9.860202938334757e-06, "loss": 0.6885, "step": 8632 }, { "epoch": 0.10321739858200121, "grad_norm": 1.7844219207763672, "learning_rate": 9.860157470894418e-06, "loss": 0.5914, "step": 8633 }, { "epoch": 0.10322935472686187, "grad_norm": 2.265829563140869, "learning_rate": 9.860111996166255e-06, "loss": 0.7005, "step": 8634 }, { "epoch": 0.10324131087172252, "grad_norm": 3.937669515609741, "learning_rate": 9.860066514150337e-06, "loss": 0.5769, "step": 8635 }, { "epoch": 0.10325326701658318, "grad_norm": 1.6142723560333252, "learning_rate": 9.860021024846732e-06, "loss": 0.6644, "step": 8636 }, { "epoch": 0.10326522316144382, "grad_norm": 2.1056923866271973, "learning_rate": 9.859975528255508e-06, "loss": 0.5894, "step": 8637 }, { "epoch": 0.10327717930630448, "grad_norm": 3.637579917907715, "learning_rate": 9.859930024376733e-06, "loss": 0.589, "step": 8638 }, { "epoch": 0.10328913545116512, "grad_norm": 2.9599130153656006, "learning_rate": 9.859884513210478e-06, "loss": 0.6864, "step": 8639 }, { "epoch": 0.10330109159602578, "grad_norm": 1.4724901914596558, "learning_rate": 9.859838994756807e-06, "loss": 0.7392, "step": 8640 }, { "epoch": 0.10331304774088643, "grad_norm": 2.817049980163574, "learning_rate": 9.859793469015791e-06, "loss": 0.6222, "step": 8641 }, { "epoch": 0.10332500388574709, "grad_norm": 4.152522563934326, "learning_rate": 9.859747935987495e-06, "loss": 0.6601, "step": 8642 }, { "epoch": 0.10333696003060773, "grad_norm": 1.692284107208252, "learning_rate": 9.859702395671992e-06, "loss": 0.6074, "step": 8643 }, { "epoch": 0.10334891617546839, "grad_norm": 2.164081335067749, "learning_rate": 9.859656848069349e-06, "loss": 0.6097, "step": 8644 }, { "epoch": 0.10336087232032903, "grad_norm": 3.7333970069885254, "learning_rate": 9.859611293179631e-06, "loss": 0.6739, "step": 8645 }, { "epoch": 0.10337282846518968, "grad_norm": 2.4833686351776123, "learning_rate": 9.859565731002909e-06, "loss": 0.6954, "step": 8646 }, { "epoch": 0.10338478461005034, "grad_norm": 1.5375412702560425, "learning_rate": 9.859520161539252e-06, "loss": 0.6446, "step": 8647 }, { "epoch": 0.10339674075491098, "grad_norm": 1.6340081691741943, "learning_rate": 9.859474584788726e-06, "loss": 0.7614, "step": 8648 }, { "epoch": 0.10340869689977164, "grad_norm": 1.599802851676941, "learning_rate": 9.859429000751401e-06, "loss": 0.618, "step": 8649 }, { "epoch": 0.10342065304463229, "grad_norm": 2.2410149574279785, "learning_rate": 9.859383409427345e-06, "loss": 0.594, "step": 8650 }, { "epoch": 0.10343260918949294, "grad_norm": 4.394484996795654, "learning_rate": 9.859337810816627e-06, "loss": 0.808, "step": 8651 }, { "epoch": 0.10344456533435359, "grad_norm": 3.020169973373413, "learning_rate": 9.859292204919315e-06, "loss": 0.7455, "step": 8652 }, { "epoch": 0.10345652147921425, "grad_norm": 1.575148344039917, "learning_rate": 9.859246591735477e-06, "loss": 0.6893, "step": 8653 }, { "epoch": 0.10346847762407489, "grad_norm": 3.3430302143096924, "learning_rate": 9.85920097126518e-06, "loss": 0.6377, "step": 8654 }, { "epoch": 0.10348043376893555, "grad_norm": 2.9879536628723145, "learning_rate": 9.859155343508495e-06, "loss": 0.7024, "step": 8655 }, { "epoch": 0.1034923899137962, "grad_norm": 5.328864574432373, "learning_rate": 9.859109708465489e-06, "loss": 0.6355, "step": 8656 }, { "epoch": 0.10350434605865684, "grad_norm": 3.0031142234802246, "learning_rate": 9.85906406613623e-06, "loss": 0.6218, "step": 8657 }, { "epoch": 0.1035163022035175, "grad_norm": 1.7938804626464844, "learning_rate": 9.859018416520787e-06, "loss": 0.6662, "step": 8658 }, { "epoch": 0.10352825834837814, "grad_norm": 1.7000125646591187, "learning_rate": 9.85897275961923e-06, "loss": 0.5741, "step": 8659 }, { "epoch": 0.1035402144932388, "grad_norm": 2.9170784950256348, "learning_rate": 9.858927095431625e-06, "loss": 0.7198, "step": 8660 }, { "epoch": 0.10355217063809945, "grad_norm": 1.757614254951477, "learning_rate": 9.858881423958042e-06, "loss": 0.7393, "step": 8661 }, { "epoch": 0.1035641267829601, "grad_norm": 5.616436004638672, "learning_rate": 9.858835745198549e-06, "loss": 0.6538, "step": 8662 }, { "epoch": 0.10357608292782075, "grad_norm": 1.7485955953598022, "learning_rate": 9.858790059153214e-06, "loss": 0.5931, "step": 8663 }, { "epoch": 0.10358803907268141, "grad_norm": 14.076956748962402, "learning_rate": 9.858744365822107e-06, "loss": 0.6389, "step": 8664 }, { "epoch": 0.10359999521754205, "grad_norm": 2.896860361099243, "learning_rate": 9.858698665205294e-06, "loss": 0.6053, "step": 8665 }, { "epoch": 0.10361195136240271, "grad_norm": 2.113140106201172, "learning_rate": 9.858652957302847e-06, "loss": 0.6251, "step": 8666 }, { "epoch": 0.10362390750726336, "grad_norm": 2.4897561073303223, "learning_rate": 9.858607242114832e-06, "loss": 0.7087, "step": 8667 }, { "epoch": 0.103635863652124, "grad_norm": 1.7663267850875854, "learning_rate": 9.858561519641317e-06, "loss": 0.6185, "step": 8668 }, { "epoch": 0.10364781979698466, "grad_norm": 2.268416404724121, "learning_rate": 9.858515789882372e-06, "loss": 0.6688, "step": 8669 }, { "epoch": 0.1036597759418453, "grad_norm": 1.5863256454467773, "learning_rate": 9.858470052838068e-06, "loss": 0.5665, "step": 8670 }, { "epoch": 0.10367173208670596, "grad_norm": 2.101346254348755, "learning_rate": 9.858424308508467e-06, "loss": 0.5646, "step": 8671 }, { "epoch": 0.10368368823156661, "grad_norm": 4.789974212646484, "learning_rate": 9.858378556893644e-06, "loss": 0.4977, "step": 8672 }, { "epoch": 0.10369564437642727, "grad_norm": 2.7700817584991455, "learning_rate": 9.858332797993664e-06, "loss": 0.615, "step": 8673 }, { "epoch": 0.10370760052128791, "grad_norm": 2.259012460708618, "learning_rate": 9.858287031808597e-06, "loss": 0.6241, "step": 8674 }, { "epoch": 0.10371955666614857, "grad_norm": 1.8144981861114502, "learning_rate": 9.858241258338512e-06, "loss": 0.6264, "step": 8675 }, { "epoch": 0.10373151281100922, "grad_norm": 3.740048885345459, "learning_rate": 9.858195477583477e-06, "loss": 0.5818, "step": 8676 }, { "epoch": 0.10374346895586987, "grad_norm": 2.928065538406372, "learning_rate": 9.85814968954356e-06, "loss": 0.6215, "step": 8677 }, { "epoch": 0.10375542510073052, "grad_norm": 3.508495807647705, "learning_rate": 9.85810389421883e-06, "loss": 0.6947, "step": 8678 }, { "epoch": 0.10376738124559118, "grad_norm": 2.4163389205932617, "learning_rate": 9.858058091609356e-06, "loss": 0.7357, "step": 8679 }, { "epoch": 0.10377933739045182, "grad_norm": 3.0286357402801514, "learning_rate": 9.858012281715207e-06, "loss": 0.5628, "step": 8680 }, { "epoch": 0.10379129353531247, "grad_norm": 3.8382880687713623, "learning_rate": 9.857966464536451e-06, "loss": 0.5848, "step": 8681 }, { "epoch": 0.10380324968017313, "grad_norm": 2.880901336669922, "learning_rate": 9.857920640073158e-06, "loss": 0.711, "step": 8682 }, { "epoch": 0.10381520582503377, "grad_norm": 1.6419180631637573, "learning_rate": 9.857874808325395e-06, "loss": 0.6436, "step": 8683 }, { "epoch": 0.10382716196989443, "grad_norm": 2.3056998252868652, "learning_rate": 9.857828969293231e-06, "loss": 0.7337, "step": 8684 }, { "epoch": 0.10383911811475507, "grad_norm": 20.751686096191406, "learning_rate": 9.857783122976735e-06, "loss": 0.6515, "step": 8685 }, { "epoch": 0.10385107425961573, "grad_norm": 2.063049554824829, "learning_rate": 9.857737269375978e-06, "loss": 0.6774, "step": 8686 }, { "epoch": 0.10386303040447638, "grad_norm": 2.343430757522583, "learning_rate": 9.857691408491023e-06, "loss": 0.6759, "step": 8687 }, { "epoch": 0.10387498654933704, "grad_norm": 1.932571530342102, "learning_rate": 9.857645540321944e-06, "loss": 0.6771, "step": 8688 }, { "epoch": 0.10388694269419768, "grad_norm": 1.7564440965652466, "learning_rate": 9.857599664868811e-06, "loss": 0.5815, "step": 8689 }, { "epoch": 0.10389889883905834, "grad_norm": 2.2266602516174316, "learning_rate": 9.857553782131689e-06, "loss": 0.6904, "step": 8690 }, { "epoch": 0.10391085498391898, "grad_norm": 3.7936625480651855, "learning_rate": 9.857507892110646e-06, "loss": 0.6404, "step": 8691 }, { "epoch": 0.10392281112877963, "grad_norm": 2.825941324234009, "learning_rate": 9.857461994805753e-06, "loss": 0.5715, "step": 8692 }, { "epoch": 0.10393476727364029, "grad_norm": 1.5289498567581177, "learning_rate": 9.85741609021708e-06, "loss": 0.5921, "step": 8693 }, { "epoch": 0.10394672341850093, "grad_norm": 3.702080249786377, "learning_rate": 9.857370178344692e-06, "loss": 0.6555, "step": 8694 }, { "epoch": 0.10395867956336159, "grad_norm": 1.7810519933700562, "learning_rate": 9.85732425918866e-06, "loss": 0.6134, "step": 8695 }, { "epoch": 0.10397063570822224, "grad_norm": 1.7678680419921875, "learning_rate": 9.857278332749056e-06, "loss": 0.6479, "step": 8696 }, { "epoch": 0.1039825918530829, "grad_norm": 1.5578622817993164, "learning_rate": 9.857232399025944e-06, "loss": 0.6606, "step": 8697 }, { "epoch": 0.10399454799794354, "grad_norm": 10.259832382202148, "learning_rate": 9.857186458019395e-06, "loss": 0.674, "step": 8698 }, { "epoch": 0.1040065041428042, "grad_norm": 9.216394424438477, "learning_rate": 9.857140509729478e-06, "loss": 0.6451, "step": 8699 }, { "epoch": 0.10401846028766484, "grad_norm": 4.0747222900390625, "learning_rate": 9.857094554156262e-06, "loss": 0.5584, "step": 8700 }, { "epoch": 0.1040304164325255, "grad_norm": 1.8366835117340088, "learning_rate": 9.857048591299815e-06, "loss": 0.6371, "step": 8701 }, { "epoch": 0.10404237257738615, "grad_norm": 1.9424879550933838, "learning_rate": 9.857002621160205e-06, "loss": 0.565, "step": 8702 }, { "epoch": 0.1040543287222468, "grad_norm": 29.99087142944336, "learning_rate": 9.856956643737503e-06, "loss": 0.6009, "step": 8703 }, { "epoch": 0.10406628486710745, "grad_norm": 2.4669110774993896, "learning_rate": 9.856910659031778e-06, "loss": 0.6448, "step": 8704 }, { "epoch": 0.1040782410119681, "grad_norm": 1.88704514503479, "learning_rate": 9.856864667043098e-06, "loss": 0.5744, "step": 8705 }, { "epoch": 0.10409019715682875, "grad_norm": 1.7018449306488037, "learning_rate": 9.85681866777153e-06, "loss": 0.6249, "step": 8706 }, { "epoch": 0.1041021533016894, "grad_norm": 1.8870513439178467, "learning_rate": 9.85677266121715e-06, "loss": 0.6234, "step": 8707 }, { "epoch": 0.10411410944655006, "grad_norm": 2.7444701194763184, "learning_rate": 9.856726647380017e-06, "loss": 0.4713, "step": 8708 }, { "epoch": 0.1041260655914107, "grad_norm": 1.8417404890060425, "learning_rate": 9.856680626260208e-06, "loss": 0.6552, "step": 8709 }, { "epoch": 0.10413802173627136, "grad_norm": 1.4982008934020996, "learning_rate": 9.856634597857788e-06, "loss": 0.6168, "step": 8710 }, { "epoch": 0.104149977881132, "grad_norm": 1.7359637022018433, "learning_rate": 9.856588562172828e-06, "loss": 0.6274, "step": 8711 }, { "epoch": 0.10416193402599266, "grad_norm": 1.7265979051589966, "learning_rate": 9.856542519205397e-06, "loss": 0.5261, "step": 8712 }, { "epoch": 0.10417389017085331, "grad_norm": 1.8391907215118408, "learning_rate": 9.856496468955561e-06, "loss": 0.5968, "step": 8713 }, { "epoch": 0.10418584631571397, "grad_norm": 1.960152268409729, "learning_rate": 9.856450411423392e-06, "loss": 0.594, "step": 8714 }, { "epoch": 0.10419780246057461, "grad_norm": 11.702497482299805, "learning_rate": 9.856404346608959e-06, "loss": 0.5436, "step": 8715 }, { "epoch": 0.10420975860543526, "grad_norm": 2.5406014919281006, "learning_rate": 9.85635827451233e-06, "loss": 0.7213, "step": 8716 }, { "epoch": 0.10422171475029592, "grad_norm": 2.6086716651916504, "learning_rate": 9.856312195133576e-06, "loss": 0.7297, "step": 8717 }, { "epoch": 0.10423367089515656, "grad_norm": 1.7164698839187622, "learning_rate": 9.856266108472762e-06, "loss": 0.6216, "step": 8718 }, { "epoch": 0.10424562704001722, "grad_norm": 2.105708122253418, "learning_rate": 9.85622001452996e-06, "loss": 0.6626, "step": 8719 }, { "epoch": 0.10425758318487786, "grad_norm": 1.724657654762268, "learning_rate": 9.85617391330524e-06, "loss": 0.6576, "step": 8720 }, { "epoch": 0.10426953932973852, "grad_norm": 2.4185194969177246, "learning_rate": 9.856127804798669e-06, "loss": 0.692, "step": 8721 }, { "epoch": 0.10428149547459917, "grad_norm": 2.584594964981079, "learning_rate": 9.85608168901032e-06, "loss": 0.6301, "step": 8722 }, { "epoch": 0.10429345161945983, "grad_norm": 2.566054344177246, "learning_rate": 9.856035565940257e-06, "loss": 0.6761, "step": 8723 }, { "epoch": 0.10430540776432047, "grad_norm": 2.36088490486145, "learning_rate": 9.855989435588552e-06, "loss": 0.7393, "step": 8724 }, { "epoch": 0.10431736390918113, "grad_norm": 2.1970014572143555, "learning_rate": 9.855943297955272e-06, "loss": 0.6055, "step": 8725 }, { "epoch": 0.10432932005404177, "grad_norm": 3.9937033653259277, "learning_rate": 9.855897153040488e-06, "loss": 0.6371, "step": 8726 }, { "epoch": 0.10434127619890242, "grad_norm": 42.50830841064453, "learning_rate": 9.85585100084427e-06, "loss": 0.6237, "step": 8727 }, { "epoch": 0.10435323234376308, "grad_norm": 1.370164155960083, "learning_rate": 9.855804841366685e-06, "loss": 0.6222, "step": 8728 }, { "epoch": 0.10436518848862372, "grad_norm": 1.728867769241333, "learning_rate": 9.855758674607805e-06, "loss": 0.5529, "step": 8729 }, { "epoch": 0.10437714463348438, "grad_norm": 3.682297468185425, "learning_rate": 9.855712500567697e-06, "loss": 0.5824, "step": 8730 }, { "epoch": 0.10438910077834503, "grad_norm": 1.91572904586792, "learning_rate": 9.855666319246431e-06, "loss": 0.6597, "step": 8731 }, { "epoch": 0.10440105692320568, "grad_norm": 2.189627170562744, "learning_rate": 9.855620130644077e-06, "loss": 0.6953, "step": 8732 }, { "epoch": 0.10441301306806633, "grad_norm": 2.0944857597351074, "learning_rate": 9.855573934760702e-06, "loss": 0.6592, "step": 8733 }, { "epoch": 0.10442496921292699, "grad_norm": 3.78204345703125, "learning_rate": 9.855527731596376e-06, "loss": 0.6728, "step": 8734 }, { "epoch": 0.10443692535778763, "grad_norm": 2.1951141357421875, "learning_rate": 9.85548152115117e-06, "loss": 0.6312, "step": 8735 }, { "epoch": 0.10444888150264829, "grad_norm": 2.607170581817627, "learning_rate": 9.855435303425152e-06, "loss": 0.6761, "step": 8736 }, { "epoch": 0.10446083764750894, "grad_norm": 3.747008800506592, "learning_rate": 9.855389078418392e-06, "loss": 0.6279, "step": 8737 }, { "epoch": 0.1044727937923696, "grad_norm": 3.4579005241394043, "learning_rate": 9.855342846130957e-06, "loss": 0.605, "step": 8738 }, { "epoch": 0.10448474993723024, "grad_norm": 1.418530821800232, "learning_rate": 9.85529660656292e-06, "loss": 0.6312, "step": 8739 }, { "epoch": 0.10449670608209088, "grad_norm": 1.9390748739242554, "learning_rate": 9.855250359714347e-06, "loss": 0.6528, "step": 8740 }, { "epoch": 0.10450866222695154, "grad_norm": 2.00681471824646, "learning_rate": 9.855204105585309e-06, "loss": 0.5793, "step": 8741 }, { "epoch": 0.10452061837181219, "grad_norm": 1.768215537071228, "learning_rate": 9.855157844175877e-06, "loss": 0.7167, "step": 8742 }, { "epoch": 0.10453257451667285, "grad_norm": 9.02173137664795, "learning_rate": 9.855111575486117e-06, "loss": 0.6958, "step": 8743 }, { "epoch": 0.10454453066153349, "grad_norm": 2.519392967224121, "learning_rate": 9.855065299516101e-06, "loss": 0.6824, "step": 8744 }, { "epoch": 0.10455648680639415, "grad_norm": 3.2483129501342773, "learning_rate": 9.855019016265897e-06, "loss": 0.7058, "step": 8745 }, { "epoch": 0.1045684429512548, "grad_norm": 1.646332859992981, "learning_rate": 9.854972725735574e-06, "loss": 0.6145, "step": 8746 }, { "epoch": 0.10458039909611545, "grad_norm": 1.7946608066558838, "learning_rate": 9.854926427925203e-06, "loss": 0.7398, "step": 8747 }, { "epoch": 0.1045923552409761, "grad_norm": 3.069772720336914, "learning_rate": 9.854880122834853e-06, "loss": 0.7418, "step": 8748 }, { "epoch": 0.10460431138583676, "grad_norm": 3.9319264888763428, "learning_rate": 9.854833810464591e-06, "loss": 0.6333, "step": 8749 }, { "epoch": 0.1046162675306974, "grad_norm": 1.9592846632003784, "learning_rate": 9.85478749081449e-06, "loss": 0.6961, "step": 8750 }, { "epoch": 0.10462822367555805, "grad_norm": 2.68068790435791, "learning_rate": 9.854741163884619e-06, "loss": 0.6766, "step": 8751 }, { "epoch": 0.1046401798204187, "grad_norm": 2.306544065475464, "learning_rate": 9.854694829675043e-06, "loss": 0.6233, "step": 8752 }, { "epoch": 0.10465213596527935, "grad_norm": 2.030527353286743, "learning_rate": 9.854648488185837e-06, "loss": 0.6155, "step": 8753 }, { "epoch": 0.10466409211014001, "grad_norm": 4.418360233306885, "learning_rate": 9.854602139417069e-06, "loss": 0.6346, "step": 8754 }, { "epoch": 0.10467604825500065, "grad_norm": 3.787688732147217, "learning_rate": 9.854555783368806e-06, "loss": 0.7939, "step": 8755 }, { "epoch": 0.10468800439986131, "grad_norm": 2.1530370712280273, "learning_rate": 9.854509420041121e-06, "loss": 0.6033, "step": 8756 }, { "epoch": 0.10469996054472196, "grad_norm": 1.9742351770401, "learning_rate": 9.85446304943408e-06, "loss": 0.6008, "step": 8757 }, { "epoch": 0.10471191668958262, "grad_norm": 2.301069736480713, "learning_rate": 9.854416671547755e-06, "loss": 0.6453, "step": 8758 }, { "epoch": 0.10472387283444326, "grad_norm": 2.275472640991211, "learning_rate": 9.854370286382215e-06, "loss": 0.5808, "step": 8759 }, { "epoch": 0.10473582897930392, "grad_norm": 6.112713813781738, "learning_rate": 9.854323893937532e-06, "loss": 0.6192, "step": 8760 }, { "epoch": 0.10474778512416456, "grad_norm": 1.822843074798584, "learning_rate": 9.85427749421377e-06, "loss": 0.6687, "step": 8761 }, { "epoch": 0.10475974126902522, "grad_norm": 1.7372809648513794, "learning_rate": 9.854231087211003e-06, "loss": 0.6193, "step": 8762 }, { "epoch": 0.10477169741388587, "grad_norm": 3.198592185974121, "learning_rate": 9.854184672929298e-06, "loss": 0.5368, "step": 8763 }, { "epoch": 0.10478365355874651, "grad_norm": 1.9240455627441406, "learning_rate": 9.854138251368728e-06, "loss": 0.6406, "step": 8764 }, { "epoch": 0.10479560970360717, "grad_norm": 2.994760036468506, "learning_rate": 9.85409182252936e-06, "loss": 0.5617, "step": 8765 }, { "epoch": 0.10480756584846782, "grad_norm": 2.4784979820251465, "learning_rate": 9.854045386411263e-06, "loss": 0.6233, "step": 8766 }, { "epoch": 0.10481952199332847, "grad_norm": 1.8983078002929688, "learning_rate": 9.853998943014506e-06, "loss": 0.641, "step": 8767 }, { "epoch": 0.10483147813818912, "grad_norm": 3.61796236038208, "learning_rate": 9.853952492339162e-06, "loss": 0.607, "step": 8768 }, { "epoch": 0.10484343428304978, "grad_norm": 9.173749923706055, "learning_rate": 9.8539060343853e-06, "loss": 0.6706, "step": 8769 }, { "epoch": 0.10485539042791042, "grad_norm": 4.736979007720947, "learning_rate": 9.853859569152988e-06, "loss": 0.6102, "step": 8770 }, { "epoch": 0.10486734657277108, "grad_norm": 9.380821228027344, "learning_rate": 9.853813096642296e-06, "loss": 0.6787, "step": 8771 }, { "epoch": 0.10487930271763173, "grad_norm": 3.2932982444763184, "learning_rate": 9.853766616853295e-06, "loss": 0.6659, "step": 8772 }, { "epoch": 0.10489125886249238, "grad_norm": 1.7844536304473877, "learning_rate": 9.853720129786052e-06, "loss": 0.6461, "step": 8773 }, { "epoch": 0.10490321500735303, "grad_norm": 4.795321941375732, "learning_rate": 9.853673635440639e-06, "loss": 0.6331, "step": 8774 }, { "epoch": 0.10491517115221367, "grad_norm": 2.508317470550537, "learning_rate": 9.853627133817125e-06, "loss": 0.7649, "step": 8775 }, { "epoch": 0.10492712729707433, "grad_norm": 2.5575363636016846, "learning_rate": 9.853580624915582e-06, "loss": 0.6462, "step": 8776 }, { "epoch": 0.10493908344193498, "grad_norm": 1.904383897781372, "learning_rate": 9.853534108736075e-06, "loss": 0.7083, "step": 8777 }, { "epoch": 0.10495103958679564, "grad_norm": 1.5583299398422241, "learning_rate": 9.853487585278678e-06, "loss": 0.6883, "step": 8778 }, { "epoch": 0.10496299573165628, "grad_norm": 3.9310715198516846, "learning_rate": 9.853441054543458e-06, "loss": 0.6444, "step": 8779 }, { "epoch": 0.10497495187651694, "grad_norm": 1.8472546339035034, "learning_rate": 9.853394516530486e-06, "loss": 0.6499, "step": 8780 }, { "epoch": 0.10498690802137758, "grad_norm": 4.303549289703369, "learning_rate": 9.853347971239832e-06, "loss": 0.6121, "step": 8781 }, { "epoch": 0.10499886416623824, "grad_norm": 5.456420421600342, "learning_rate": 9.853301418671566e-06, "loss": 0.7583, "step": 8782 }, { "epoch": 0.10501082031109889, "grad_norm": 3.3997559547424316, "learning_rate": 9.853254858825755e-06, "loss": 0.6034, "step": 8783 }, { "epoch": 0.10502277645595955, "grad_norm": 2.2004568576812744, "learning_rate": 9.853208291702475e-06, "loss": 0.7357, "step": 8784 }, { "epoch": 0.10503473260082019, "grad_norm": 2.2596278190612793, "learning_rate": 9.853161717301788e-06, "loss": 0.6506, "step": 8785 }, { "epoch": 0.10504668874568085, "grad_norm": 1.85458505153656, "learning_rate": 9.85311513562377e-06, "loss": 0.5477, "step": 8786 }, { "epoch": 0.1050586448905415, "grad_norm": 2.343553066253662, "learning_rate": 9.853068546668487e-06, "loss": 0.6268, "step": 8787 }, { "epoch": 0.10507060103540214, "grad_norm": 5.175838470458984, "learning_rate": 9.853021950436013e-06, "loss": 0.6854, "step": 8788 }, { "epoch": 0.1050825571802628, "grad_norm": 1.505475640296936, "learning_rate": 9.852975346926413e-06, "loss": 0.5776, "step": 8789 }, { "epoch": 0.10509451332512344, "grad_norm": 2.0276284217834473, "learning_rate": 9.85292873613976e-06, "loss": 0.6569, "step": 8790 }, { "epoch": 0.1051064694699841, "grad_norm": 7.182913780212402, "learning_rate": 9.852882118076124e-06, "loss": 0.6266, "step": 8791 }, { "epoch": 0.10511842561484475, "grad_norm": 1.9257293939590454, "learning_rate": 9.852835492735573e-06, "loss": 0.5834, "step": 8792 }, { "epoch": 0.1051303817597054, "grad_norm": 3.083528757095337, "learning_rate": 9.852788860118177e-06, "loss": 0.61, "step": 8793 }, { "epoch": 0.10514233790456605, "grad_norm": 3.3253064155578613, "learning_rate": 9.852742220224009e-06, "loss": 0.6194, "step": 8794 }, { "epoch": 0.10515429404942671, "grad_norm": 3.2821662425994873, "learning_rate": 9.852695573053135e-06, "loss": 0.6283, "step": 8795 }, { "epoch": 0.10516625019428735, "grad_norm": 4.148953914642334, "learning_rate": 9.852648918605628e-06, "loss": 0.7379, "step": 8796 }, { "epoch": 0.10517820633914801, "grad_norm": 2.5862209796905518, "learning_rate": 9.852602256881556e-06, "loss": 0.6895, "step": 8797 }, { "epoch": 0.10519016248400866, "grad_norm": 2.1282882690429688, "learning_rate": 9.852555587880989e-06, "loss": 0.6166, "step": 8798 }, { "epoch": 0.1052021186288693, "grad_norm": 6.166895389556885, "learning_rate": 9.852508911604e-06, "loss": 0.6557, "step": 8799 }, { "epoch": 0.10521407477372996, "grad_norm": 5.134453773498535, "learning_rate": 9.852462228050655e-06, "loss": 0.6337, "step": 8800 }, { "epoch": 0.1052260309185906, "grad_norm": 3.1248302459716797, "learning_rate": 9.852415537221025e-06, "loss": 0.5724, "step": 8801 }, { "epoch": 0.10523798706345126, "grad_norm": 1.7187315225601196, "learning_rate": 9.852368839115181e-06, "loss": 0.5142, "step": 8802 }, { "epoch": 0.10524994320831191, "grad_norm": 2.5016000270843506, "learning_rate": 9.852322133733194e-06, "loss": 0.5987, "step": 8803 }, { "epoch": 0.10526189935317257, "grad_norm": 1.7244443893432617, "learning_rate": 9.852275421075132e-06, "loss": 0.6304, "step": 8804 }, { "epoch": 0.10527385549803321, "grad_norm": 2.864678144454956, "learning_rate": 9.852228701141065e-06, "loss": 0.6024, "step": 8805 }, { "epoch": 0.10528581164289387, "grad_norm": 19.679283142089844, "learning_rate": 9.852181973931065e-06, "loss": 0.6745, "step": 8806 }, { "epoch": 0.10529776778775451, "grad_norm": 3.113215446472168, "learning_rate": 9.8521352394452e-06, "loss": 0.6054, "step": 8807 }, { "epoch": 0.10530972393261517, "grad_norm": 4.626001834869385, "learning_rate": 9.852088497683541e-06, "loss": 0.7086, "step": 8808 }, { "epoch": 0.10532168007747582, "grad_norm": 2.2572031021118164, "learning_rate": 9.85204174864616e-06, "loss": 0.6428, "step": 8809 }, { "epoch": 0.10533363622233646, "grad_norm": 2.5864434242248535, "learning_rate": 9.851994992333123e-06, "loss": 0.5934, "step": 8810 }, { "epoch": 0.10534559236719712, "grad_norm": 1.7159788608551025, "learning_rate": 9.851948228744501e-06, "loss": 0.647, "step": 8811 }, { "epoch": 0.10535754851205777, "grad_norm": 1.853111982345581, "learning_rate": 9.851901457880369e-06, "loss": 0.6311, "step": 8812 }, { "epoch": 0.10536950465691843, "grad_norm": 3.9399802684783936, "learning_rate": 9.851854679740791e-06, "loss": 0.6354, "step": 8813 }, { "epoch": 0.10538146080177907, "grad_norm": 3.1101977825164795, "learning_rate": 9.85180789432584e-06, "loss": 0.6549, "step": 8814 }, { "epoch": 0.10539341694663973, "grad_norm": 1.8057758808135986, "learning_rate": 9.851761101635586e-06, "loss": 0.6311, "step": 8815 }, { "epoch": 0.10540537309150037, "grad_norm": 3.669255256652832, "learning_rate": 9.851714301670099e-06, "loss": 0.6399, "step": 8816 }, { "epoch": 0.10541732923636103, "grad_norm": 2.9481241703033447, "learning_rate": 9.85166749442945e-06, "loss": 0.6004, "step": 8817 }, { "epoch": 0.10542928538122168, "grad_norm": 5.377033710479736, "learning_rate": 9.851620679913706e-06, "loss": 0.6851, "step": 8818 }, { "epoch": 0.10544124152608234, "grad_norm": 3.3484268188476562, "learning_rate": 9.851573858122941e-06, "loss": 0.6427, "step": 8819 }, { "epoch": 0.10545319767094298, "grad_norm": 4.318789005279541, "learning_rate": 9.851527029057224e-06, "loss": 0.6255, "step": 8820 }, { "epoch": 0.10546515381580364, "grad_norm": 5.517202854156494, "learning_rate": 9.851480192716625e-06, "loss": 0.6165, "step": 8821 }, { "epoch": 0.10547710996066428, "grad_norm": 1.8934948444366455, "learning_rate": 9.851433349101214e-06, "loss": 0.6345, "step": 8822 }, { "epoch": 0.10548906610552493, "grad_norm": 1.4861847162246704, "learning_rate": 9.851386498211061e-06, "loss": 0.5835, "step": 8823 }, { "epoch": 0.10550102225038559, "grad_norm": 1.8863046169281006, "learning_rate": 9.851339640046239e-06, "loss": 0.6453, "step": 8824 }, { "epoch": 0.10551297839524623, "grad_norm": 2.3803060054779053, "learning_rate": 9.851292774606814e-06, "loss": 0.6209, "step": 8825 }, { "epoch": 0.10552493454010689, "grad_norm": 1.8276697397232056, "learning_rate": 9.851245901892857e-06, "loss": 0.6361, "step": 8826 }, { "epoch": 0.10553689068496754, "grad_norm": 1.7391468286514282, "learning_rate": 9.851199021904442e-06, "loss": 0.6827, "step": 8827 }, { "epoch": 0.1055488468298282, "grad_norm": 5.213388442993164, "learning_rate": 9.851152134641635e-06, "loss": 0.6002, "step": 8828 }, { "epoch": 0.10556080297468884, "grad_norm": 1.6096800565719604, "learning_rate": 9.85110524010451e-06, "loss": 0.5758, "step": 8829 }, { "epoch": 0.1055727591195495, "grad_norm": 1.8038102388381958, "learning_rate": 9.851058338293134e-06, "loss": 0.5532, "step": 8830 }, { "epoch": 0.10558471526441014, "grad_norm": 2.0446295738220215, "learning_rate": 9.851011429207579e-06, "loss": 0.6014, "step": 8831 }, { "epoch": 0.1055966714092708, "grad_norm": 2.542569875717163, "learning_rate": 9.850964512847915e-06, "loss": 0.5958, "step": 8832 }, { "epoch": 0.10560862755413145, "grad_norm": 2.9451029300689697, "learning_rate": 9.850917589214213e-06, "loss": 0.6278, "step": 8833 }, { "epoch": 0.10562058369899209, "grad_norm": 1.8333288431167603, "learning_rate": 9.850870658306541e-06, "loss": 0.7579, "step": 8834 }, { "epoch": 0.10563253984385275, "grad_norm": 2.7528579235076904, "learning_rate": 9.850823720124974e-06, "loss": 0.6015, "step": 8835 }, { "epoch": 0.1056444959887134, "grad_norm": 1.9327692985534668, "learning_rate": 9.850776774669577e-06, "loss": 0.6266, "step": 8836 }, { "epoch": 0.10565645213357405, "grad_norm": 1.5593961477279663, "learning_rate": 9.850729821940426e-06, "loss": 0.6585, "step": 8837 }, { "epoch": 0.1056684082784347, "grad_norm": 1.5662130117416382, "learning_rate": 9.850682861937586e-06, "loss": 0.6818, "step": 8838 }, { "epoch": 0.10568036442329536, "grad_norm": 2.6330714225769043, "learning_rate": 9.850635894661131e-06, "loss": 0.5683, "step": 8839 }, { "epoch": 0.105692320568156, "grad_norm": 1.9378066062927246, "learning_rate": 9.850588920111129e-06, "loss": 0.5429, "step": 8840 }, { "epoch": 0.10570427671301666, "grad_norm": 2.869749069213867, "learning_rate": 9.850541938287653e-06, "loss": 0.6327, "step": 8841 }, { "epoch": 0.1057162328578773, "grad_norm": 8.808598518371582, "learning_rate": 9.850494949190772e-06, "loss": 0.5911, "step": 8842 }, { "epoch": 0.10572818900273796, "grad_norm": 4.133315563201904, "learning_rate": 9.850447952820556e-06, "loss": 0.5351, "step": 8843 }, { "epoch": 0.10574014514759861, "grad_norm": 4.361558437347412, "learning_rate": 9.850400949177075e-06, "loss": 0.7133, "step": 8844 }, { "epoch": 0.10575210129245927, "grad_norm": 3.2172086238861084, "learning_rate": 9.850353938260403e-06, "loss": 0.6797, "step": 8845 }, { "epoch": 0.10576405743731991, "grad_norm": 2.122426748275757, "learning_rate": 9.850306920070605e-06, "loss": 0.6065, "step": 8846 }, { "epoch": 0.10577601358218056, "grad_norm": 2.0256497859954834, "learning_rate": 9.850259894607756e-06, "loss": 0.6742, "step": 8847 }, { "epoch": 0.10578796972704121, "grad_norm": 1.7548946142196655, "learning_rate": 9.850212861871927e-06, "loss": 0.6161, "step": 8848 }, { "epoch": 0.10579992587190186, "grad_norm": 1.7363989353179932, "learning_rate": 9.850165821863184e-06, "loss": 0.6822, "step": 8849 }, { "epoch": 0.10581188201676252, "grad_norm": 3.183112859725952, "learning_rate": 9.850118774581601e-06, "loss": 0.7099, "step": 8850 }, { "epoch": 0.10582383816162316, "grad_norm": 7.7686333656311035, "learning_rate": 9.850071720027248e-06, "loss": 0.5819, "step": 8851 }, { "epoch": 0.10583579430648382, "grad_norm": 3.2659659385681152, "learning_rate": 9.850024658200195e-06, "loss": 0.5755, "step": 8852 }, { "epoch": 0.10584775045134447, "grad_norm": 2.800881862640381, "learning_rate": 9.849977589100513e-06, "loss": 0.6147, "step": 8853 }, { "epoch": 0.10585970659620512, "grad_norm": 4.0251336097717285, "learning_rate": 9.849930512728271e-06, "loss": 0.7495, "step": 8854 }, { "epoch": 0.10587166274106577, "grad_norm": 2.211214303970337, "learning_rate": 9.849883429083542e-06, "loss": 0.5884, "step": 8855 }, { "epoch": 0.10588361888592643, "grad_norm": 2.0736594200134277, "learning_rate": 9.849836338166397e-06, "loss": 0.6979, "step": 8856 }, { "epoch": 0.10589557503078707, "grad_norm": 2.894277334213257, "learning_rate": 9.849789239976904e-06, "loss": 0.7228, "step": 8857 }, { "epoch": 0.10590753117564772, "grad_norm": 2.3029069900512695, "learning_rate": 9.849742134515134e-06, "loss": 0.6726, "step": 8858 }, { "epoch": 0.10591948732050838, "grad_norm": 6.699684143066406, "learning_rate": 9.849695021781159e-06, "loss": 0.5606, "step": 8859 }, { "epoch": 0.10593144346536902, "grad_norm": 3.1800341606140137, "learning_rate": 9.849647901775048e-06, "loss": 0.6384, "step": 8860 }, { "epoch": 0.10594339961022968, "grad_norm": 2.0425705909729004, "learning_rate": 9.849600774496874e-06, "loss": 0.6473, "step": 8861 }, { "epoch": 0.10595535575509032, "grad_norm": 2.2282605171203613, "learning_rate": 9.849553639946706e-06, "loss": 0.6052, "step": 8862 }, { "epoch": 0.10596731189995098, "grad_norm": 1.9181265830993652, "learning_rate": 9.849506498124615e-06, "loss": 0.7083, "step": 8863 }, { "epoch": 0.10597926804481163, "grad_norm": 1.6589621305465698, "learning_rate": 9.849459349030672e-06, "loss": 0.6561, "step": 8864 }, { "epoch": 0.10599122418967229, "grad_norm": 2.3788211345672607, "learning_rate": 9.849412192664949e-06, "loss": 0.7229, "step": 8865 }, { "epoch": 0.10600318033453293, "grad_norm": 1.8623090982437134, "learning_rate": 9.849365029027513e-06, "loss": 0.7225, "step": 8866 }, { "epoch": 0.10601513647939359, "grad_norm": 1.9418282508850098, "learning_rate": 9.849317858118437e-06, "loss": 0.6276, "step": 8867 }, { "epoch": 0.10602709262425423, "grad_norm": 1.4843848943710327, "learning_rate": 9.849270679937792e-06, "loss": 0.5671, "step": 8868 }, { "epoch": 0.10603904876911488, "grad_norm": 2.035308361053467, "learning_rate": 9.84922349448565e-06, "loss": 0.6484, "step": 8869 }, { "epoch": 0.10605100491397554, "grad_norm": 1.747597336769104, "learning_rate": 9.849176301762077e-06, "loss": 0.5947, "step": 8870 }, { "epoch": 0.10606296105883618, "grad_norm": 2.478733777999878, "learning_rate": 9.849129101767149e-06, "loss": 0.5881, "step": 8871 }, { "epoch": 0.10607491720369684, "grad_norm": 17.976268768310547, "learning_rate": 9.849081894500934e-06, "loss": 0.6014, "step": 8872 }, { "epoch": 0.10608687334855749, "grad_norm": 2.4254097938537598, "learning_rate": 9.849034679963504e-06, "loss": 0.6825, "step": 8873 }, { "epoch": 0.10609882949341815, "grad_norm": 3.060711622238159, "learning_rate": 9.84898745815493e-06, "loss": 0.612, "step": 8874 }, { "epoch": 0.10611078563827879, "grad_norm": 2.2624828815460205, "learning_rate": 9.848940229075281e-06, "loss": 0.529, "step": 8875 }, { "epoch": 0.10612274178313945, "grad_norm": 2.946864604949951, "learning_rate": 9.848892992724628e-06, "loss": 0.5653, "step": 8876 }, { "epoch": 0.1061346979280001, "grad_norm": 5.207892894744873, "learning_rate": 9.848845749103044e-06, "loss": 0.582, "step": 8877 }, { "epoch": 0.10614665407286075, "grad_norm": 6.611482620239258, "learning_rate": 9.8487984982106e-06, "loss": 0.6521, "step": 8878 }, { "epoch": 0.1061586102177214, "grad_norm": 1.9687272310256958, "learning_rate": 9.848751240047362e-06, "loss": 0.6225, "step": 8879 }, { "epoch": 0.10617056636258206, "grad_norm": 5.549468040466309, "learning_rate": 9.848703974613405e-06, "loss": 0.593, "step": 8880 }, { "epoch": 0.1061825225074427, "grad_norm": 3.0584774017333984, "learning_rate": 9.848656701908801e-06, "loss": 0.6952, "step": 8881 }, { "epoch": 0.10619447865230335, "grad_norm": 3.1183173656463623, "learning_rate": 9.848609421933619e-06, "loss": 0.4956, "step": 8882 }, { "epoch": 0.106206434797164, "grad_norm": 1.996856689453125, "learning_rate": 9.848562134687927e-06, "loss": 0.6429, "step": 8883 }, { "epoch": 0.10621839094202465, "grad_norm": 2.102104425430298, "learning_rate": 9.848514840171801e-06, "loss": 0.5965, "step": 8884 }, { "epoch": 0.10623034708688531, "grad_norm": 1.8199267387390137, "learning_rate": 9.84846753838531e-06, "loss": 0.7156, "step": 8885 }, { "epoch": 0.10624230323174595, "grad_norm": 3.409797430038452, "learning_rate": 9.848420229328526e-06, "loss": 0.6221, "step": 8886 }, { "epoch": 0.10625425937660661, "grad_norm": 1.9705133438110352, "learning_rate": 9.848372913001515e-06, "loss": 0.7484, "step": 8887 }, { "epoch": 0.10626621552146726, "grad_norm": 2.002329111099243, "learning_rate": 9.848325589404355e-06, "loss": 0.5971, "step": 8888 }, { "epoch": 0.10627817166632791, "grad_norm": 5.290456771850586, "learning_rate": 9.848278258537112e-06, "loss": 0.6837, "step": 8889 }, { "epoch": 0.10629012781118856, "grad_norm": 1.6796596050262451, "learning_rate": 9.848230920399858e-06, "loss": 0.611, "step": 8890 }, { "epoch": 0.10630208395604922, "grad_norm": 2.019094228744507, "learning_rate": 9.848183574992665e-06, "loss": 0.6508, "step": 8891 }, { "epoch": 0.10631404010090986, "grad_norm": 5.388896942138672, "learning_rate": 9.848136222315605e-06, "loss": 0.7108, "step": 8892 }, { "epoch": 0.10632599624577051, "grad_norm": 3.6818978786468506, "learning_rate": 9.848088862368745e-06, "loss": 0.7067, "step": 8893 }, { "epoch": 0.10633795239063117, "grad_norm": 1.7349919080734253, "learning_rate": 9.84804149515216e-06, "loss": 0.6212, "step": 8894 }, { "epoch": 0.10634990853549181, "grad_norm": 1.9552597999572754, "learning_rate": 9.84799412066592e-06, "loss": 0.5778, "step": 8895 }, { "epoch": 0.10636186468035247, "grad_norm": 2.0004284381866455, "learning_rate": 9.847946738910095e-06, "loss": 0.7894, "step": 8896 }, { "epoch": 0.10637382082521311, "grad_norm": 2.378772258758545, "learning_rate": 9.847899349884755e-06, "loss": 0.5704, "step": 8897 }, { "epoch": 0.10638577697007377, "grad_norm": 3.911677598953247, "learning_rate": 9.847851953589975e-06, "loss": 0.6353, "step": 8898 }, { "epoch": 0.10639773311493442, "grad_norm": 2.372910261154175, "learning_rate": 9.847804550025822e-06, "loss": 0.6219, "step": 8899 }, { "epoch": 0.10640968925979508, "grad_norm": 2.6070239543914795, "learning_rate": 9.847757139192372e-06, "loss": 0.7422, "step": 8900 }, { "epoch": 0.10642164540465572, "grad_norm": 1.8766593933105469, "learning_rate": 9.847709721089689e-06, "loss": 0.6507, "step": 8901 }, { "epoch": 0.10643360154951638, "grad_norm": 2.811352014541626, "learning_rate": 9.84766229571785e-06, "loss": 0.6253, "step": 8902 }, { "epoch": 0.10644555769437702, "grad_norm": 1.5774595737457275, "learning_rate": 9.847614863076924e-06, "loss": 0.6257, "step": 8903 }, { "epoch": 0.10645751383923768, "grad_norm": 1.6120076179504395, "learning_rate": 9.847567423166981e-06, "loss": 0.5883, "step": 8904 }, { "epoch": 0.10646946998409833, "grad_norm": 1.552559494972229, "learning_rate": 9.847519975988096e-06, "loss": 0.5157, "step": 8905 }, { "epoch": 0.10648142612895897, "grad_norm": 2.3164849281311035, "learning_rate": 9.847472521540336e-06, "loss": 0.5908, "step": 8906 }, { "epoch": 0.10649338227381963, "grad_norm": 2.9948973655700684, "learning_rate": 9.847425059823774e-06, "loss": 0.6419, "step": 8907 }, { "epoch": 0.10650533841868028, "grad_norm": 5.8793840408325195, "learning_rate": 9.84737759083848e-06, "loss": 0.7448, "step": 8908 }, { "epoch": 0.10651729456354093, "grad_norm": 2.5303709506988525, "learning_rate": 9.847330114584527e-06, "loss": 0.7242, "step": 8909 }, { "epoch": 0.10652925070840158, "grad_norm": 2.297776937484741, "learning_rate": 9.847282631061986e-06, "loss": 0.6113, "step": 8910 }, { "epoch": 0.10654120685326224, "grad_norm": 2.6699368953704834, "learning_rate": 9.847235140270925e-06, "loss": 0.4751, "step": 8911 }, { "epoch": 0.10655316299812288, "grad_norm": 2.2790396213531494, "learning_rate": 9.84718764221142e-06, "loss": 0.6559, "step": 8912 }, { "epoch": 0.10656511914298354, "grad_norm": 2.1530377864837646, "learning_rate": 9.847140136883538e-06, "loss": 0.6402, "step": 8913 }, { "epoch": 0.10657707528784419, "grad_norm": 2.3305487632751465, "learning_rate": 9.847092624287354e-06, "loss": 0.6454, "step": 8914 }, { "epoch": 0.10658903143270484, "grad_norm": 2.025400400161743, "learning_rate": 9.847045104422937e-06, "loss": 0.66, "step": 8915 }, { "epoch": 0.10660098757756549, "grad_norm": 4.6160993576049805, "learning_rate": 9.846997577290358e-06, "loss": 0.6457, "step": 8916 }, { "epoch": 0.10661294372242613, "grad_norm": 5.899036407470703, "learning_rate": 9.846950042889688e-06, "loss": 0.585, "step": 8917 }, { "epoch": 0.1066248998672868, "grad_norm": 4.662954807281494, "learning_rate": 9.846902501221e-06, "loss": 0.6964, "step": 8918 }, { "epoch": 0.10663685601214744, "grad_norm": 2.1659977436065674, "learning_rate": 9.846854952284367e-06, "loss": 0.5999, "step": 8919 }, { "epoch": 0.1066488121570081, "grad_norm": 3.587841749191284, "learning_rate": 9.846807396079854e-06, "loss": 0.5895, "step": 8920 }, { "epoch": 0.10666076830186874, "grad_norm": 1.8851613998413086, "learning_rate": 9.846759832607539e-06, "loss": 0.6172, "step": 8921 }, { "epoch": 0.1066727244467294, "grad_norm": 1.8204383850097656, "learning_rate": 9.846712261867488e-06, "loss": 0.7385, "step": 8922 }, { "epoch": 0.10668468059159004, "grad_norm": 1.787131905555725, "learning_rate": 9.846664683859777e-06, "loss": 0.6974, "step": 8923 }, { "epoch": 0.1066966367364507, "grad_norm": 5.877430438995361, "learning_rate": 9.846617098584474e-06, "loss": 0.6744, "step": 8924 }, { "epoch": 0.10670859288131135, "grad_norm": 2.337738513946533, "learning_rate": 9.84656950604165e-06, "loss": 0.6748, "step": 8925 }, { "epoch": 0.106720549026172, "grad_norm": 1.759311318397522, "learning_rate": 9.84652190623138e-06, "loss": 0.6272, "step": 8926 }, { "epoch": 0.10673250517103265, "grad_norm": 1.5164754390716553, "learning_rate": 9.846474299153733e-06, "loss": 0.5995, "step": 8927 }, { "epoch": 0.1067444613158933, "grad_norm": 3.0869193077087402, "learning_rate": 9.84642668480878e-06, "loss": 0.5932, "step": 8928 }, { "epoch": 0.10675641746075396, "grad_norm": 8.140478134155273, "learning_rate": 9.846379063196592e-06, "loss": 0.6327, "step": 8929 }, { "epoch": 0.1067683736056146, "grad_norm": 2.1006064414978027, "learning_rate": 9.846331434317242e-06, "loss": 0.5984, "step": 8930 }, { "epoch": 0.10678032975047526, "grad_norm": 2.1739509105682373, "learning_rate": 9.8462837981708e-06, "loss": 0.6461, "step": 8931 }, { "epoch": 0.1067922858953359, "grad_norm": 2.163970708847046, "learning_rate": 9.84623615475734e-06, "loss": 0.6678, "step": 8932 }, { "epoch": 0.10680424204019656, "grad_norm": 2.1924402713775635, "learning_rate": 9.846188504076931e-06, "loss": 0.6444, "step": 8933 }, { "epoch": 0.1068161981850572, "grad_norm": 6.493037223815918, "learning_rate": 9.846140846129644e-06, "loss": 0.6349, "step": 8934 }, { "epoch": 0.10682815432991787, "grad_norm": 2.5841639041900635, "learning_rate": 9.846093180915553e-06, "loss": 0.6347, "step": 8935 }, { "epoch": 0.10684011047477851, "grad_norm": 3.08465838432312, "learning_rate": 9.846045508434728e-06, "loss": 0.6112, "step": 8936 }, { "epoch": 0.10685206661963917, "grad_norm": 2.963883638381958, "learning_rate": 9.84599782868724e-06, "loss": 0.6004, "step": 8937 }, { "epoch": 0.10686402276449981, "grad_norm": 2.0191948413848877, "learning_rate": 9.845950141673162e-06, "loss": 0.6822, "step": 8938 }, { "epoch": 0.10687597890936047, "grad_norm": 2.3925015926361084, "learning_rate": 9.845902447392564e-06, "loss": 0.5665, "step": 8939 }, { "epoch": 0.10688793505422112, "grad_norm": 2.1758575439453125, "learning_rate": 9.845854745845518e-06, "loss": 0.6779, "step": 8940 }, { "epoch": 0.10689989119908176, "grad_norm": 1.8373243808746338, "learning_rate": 9.845807037032095e-06, "loss": 0.6275, "step": 8941 }, { "epoch": 0.10691184734394242, "grad_norm": 2.508728504180908, "learning_rate": 9.845759320952365e-06, "loss": 0.6582, "step": 8942 }, { "epoch": 0.10692380348880307, "grad_norm": 2.4014055728912354, "learning_rate": 9.845711597606406e-06, "loss": 0.6064, "step": 8943 }, { "epoch": 0.10693575963366372, "grad_norm": 1.7323191165924072, "learning_rate": 9.845663866994283e-06, "loss": 0.7052, "step": 8944 }, { "epoch": 0.10694771577852437, "grad_norm": 4.892427444458008, "learning_rate": 9.845616129116071e-06, "loss": 0.7694, "step": 8945 }, { "epoch": 0.10695967192338503, "grad_norm": 3.2588236331939697, "learning_rate": 9.84556838397184e-06, "loss": 0.4858, "step": 8946 }, { "epoch": 0.10697162806824567, "grad_norm": 1.6996490955352783, "learning_rate": 9.84552063156166e-06, "loss": 0.5988, "step": 8947 }, { "epoch": 0.10698358421310633, "grad_norm": 3.0368378162384033, "learning_rate": 9.845472871885606e-06, "loss": 0.6744, "step": 8948 }, { "epoch": 0.10699554035796698, "grad_norm": 2.425356388092041, "learning_rate": 9.845425104943749e-06, "loss": 0.7219, "step": 8949 }, { "epoch": 0.10700749650282763, "grad_norm": 1.9660727977752686, "learning_rate": 9.845377330736158e-06, "loss": 0.6095, "step": 8950 }, { "epoch": 0.10701945264768828, "grad_norm": 1.6663892269134521, "learning_rate": 9.84532954926291e-06, "loss": 0.6224, "step": 8951 }, { "epoch": 0.10703140879254892, "grad_norm": 2.6024227142333984, "learning_rate": 9.84528176052407e-06, "loss": 0.6632, "step": 8952 }, { "epoch": 0.10704336493740958, "grad_norm": 3.355813980102539, "learning_rate": 9.845233964519714e-06, "loss": 0.6737, "step": 8953 }, { "epoch": 0.10705532108227023, "grad_norm": 3.3400893211364746, "learning_rate": 9.845186161249911e-06, "loss": 0.5966, "step": 8954 }, { "epoch": 0.10706727722713089, "grad_norm": 1.5358755588531494, "learning_rate": 9.845138350714736e-06, "loss": 0.6777, "step": 8955 }, { "epoch": 0.10707923337199153, "grad_norm": 15.95663070678711, "learning_rate": 9.845090532914258e-06, "loss": 0.5945, "step": 8956 }, { "epoch": 0.10709118951685219, "grad_norm": 3.255176067352295, "learning_rate": 9.845042707848549e-06, "loss": 0.6281, "step": 8957 }, { "epoch": 0.10710314566171283, "grad_norm": 1.85027277469635, "learning_rate": 9.844994875517681e-06, "loss": 0.606, "step": 8958 }, { "epoch": 0.10711510180657349, "grad_norm": 1.9813565015792847, "learning_rate": 9.844947035921727e-06, "loss": 0.5707, "step": 8959 }, { "epoch": 0.10712705795143414, "grad_norm": 1.785982608795166, "learning_rate": 9.844899189060758e-06, "loss": 0.5619, "step": 8960 }, { "epoch": 0.1071390140962948, "grad_norm": 2.031883955001831, "learning_rate": 9.844851334934844e-06, "loss": 0.6914, "step": 8961 }, { "epoch": 0.10715097024115544, "grad_norm": 2.777050495147705, "learning_rate": 9.84480347354406e-06, "loss": 0.7233, "step": 8962 }, { "epoch": 0.1071629263860161, "grad_norm": 1.7937711477279663, "learning_rate": 9.844755604888474e-06, "loss": 0.6615, "step": 8963 }, { "epoch": 0.10717488253087674, "grad_norm": 3.611921787261963, "learning_rate": 9.844707728968162e-06, "loss": 0.5963, "step": 8964 }, { "epoch": 0.10718683867573739, "grad_norm": 7.249114513397217, "learning_rate": 9.844659845783191e-06, "loss": 0.5489, "step": 8965 }, { "epoch": 0.10719879482059805, "grad_norm": 2.4556307792663574, "learning_rate": 9.844611955333637e-06, "loss": 0.7131, "step": 8966 }, { "epoch": 0.10721075096545869, "grad_norm": 3.515836000442505, "learning_rate": 9.84456405761957e-06, "loss": 0.6934, "step": 8967 }, { "epoch": 0.10722270711031935, "grad_norm": 3.0262084007263184, "learning_rate": 9.844516152641062e-06, "loss": 0.6333, "step": 8968 }, { "epoch": 0.10723466325518, "grad_norm": 4.212601184844971, "learning_rate": 9.844468240398184e-06, "loss": 0.6133, "step": 8969 }, { "epoch": 0.10724661940004065, "grad_norm": 1.5940550565719604, "learning_rate": 9.84442032089101e-06, "loss": 0.5928, "step": 8970 }, { "epoch": 0.1072585755449013, "grad_norm": 5.2043585777282715, "learning_rate": 9.84437239411961e-06, "loss": 0.7842, "step": 8971 }, { "epoch": 0.10727053168976196, "grad_norm": 1.5917187929153442, "learning_rate": 9.844324460084055e-06, "loss": 0.5862, "step": 8972 }, { "epoch": 0.1072824878346226, "grad_norm": 4.396997451782227, "learning_rate": 9.84427651878442e-06, "loss": 0.7491, "step": 8973 }, { "epoch": 0.10729444397948326, "grad_norm": 5.3897480964660645, "learning_rate": 9.844228570220774e-06, "loss": 0.6054, "step": 8974 }, { "epoch": 0.1073064001243439, "grad_norm": 3.628659248352051, "learning_rate": 9.84418061439319e-06, "loss": 0.6902, "step": 8975 }, { "epoch": 0.10731835626920455, "grad_norm": 1.508949637413025, "learning_rate": 9.844132651301741e-06, "loss": 0.6169, "step": 8976 }, { "epoch": 0.10733031241406521, "grad_norm": 2.42280650138855, "learning_rate": 9.844084680946497e-06, "loss": 0.6685, "step": 8977 }, { "epoch": 0.10734226855892585, "grad_norm": 4.090648174285889, "learning_rate": 9.84403670332753e-06, "loss": 0.6621, "step": 8978 }, { "epoch": 0.10735422470378651, "grad_norm": 1.6833316087722778, "learning_rate": 9.843988718444914e-06, "loss": 0.6397, "step": 8979 }, { "epoch": 0.10736618084864716, "grad_norm": 3.9932827949523926, "learning_rate": 9.843940726298721e-06, "loss": 0.4644, "step": 8980 }, { "epoch": 0.10737813699350782, "grad_norm": 1.8788951635360718, "learning_rate": 9.843892726889019e-06, "loss": 0.683, "step": 8981 }, { "epoch": 0.10739009313836846, "grad_norm": 1.3859171867370605, "learning_rate": 9.843844720215885e-06, "loss": 0.6626, "step": 8982 }, { "epoch": 0.10740204928322912, "grad_norm": 1.98267662525177, "learning_rate": 9.843796706279386e-06, "loss": 0.593, "step": 8983 }, { "epoch": 0.10741400542808976, "grad_norm": 6.605754375457764, "learning_rate": 9.843748685079599e-06, "loss": 0.6405, "step": 8984 }, { "epoch": 0.10742596157295042, "grad_norm": 1.903110384941101, "learning_rate": 9.843700656616592e-06, "loss": 0.5657, "step": 8985 }, { "epoch": 0.10743791771781107, "grad_norm": 2.0692522525787354, "learning_rate": 9.84365262089044e-06, "loss": 0.7306, "step": 8986 }, { "epoch": 0.10744987386267173, "grad_norm": 3.152780532836914, "learning_rate": 9.843604577901214e-06, "loss": 0.7357, "step": 8987 }, { "epoch": 0.10746183000753237, "grad_norm": 1.6276326179504395, "learning_rate": 9.843556527648986e-06, "loss": 0.6417, "step": 8988 }, { "epoch": 0.10747378615239302, "grad_norm": 2.2044074535369873, "learning_rate": 9.843508470133825e-06, "loss": 0.7743, "step": 8989 }, { "epoch": 0.10748574229725368, "grad_norm": 1.5401127338409424, "learning_rate": 9.843460405355808e-06, "loss": 0.68, "step": 8990 }, { "epoch": 0.10749769844211432, "grad_norm": 8.585537910461426, "learning_rate": 9.843412333315004e-06, "loss": 0.5947, "step": 8991 }, { "epoch": 0.10750965458697498, "grad_norm": 3.8772358894348145, "learning_rate": 9.843364254011487e-06, "loss": 0.5842, "step": 8992 }, { "epoch": 0.10752161073183562, "grad_norm": 9.549616813659668, "learning_rate": 9.843316167445327e-06, "loss": 0.6873, "step": 8993 }, { "epoch": 0.10753356687669628, "grad_norm": 2.2358052730560303, "learning_rate": 9.843268073616598e-06, "loss": 0.6924, "step": 8994 }, { "epoch": 0.10754552302155693, "grad_norm": 2.2045605182647705, "learning_rate": 9.84321997252537e-06, "loss": 0.6776, "step": 8995 }, { "epoch": 0.10755747916641759, "grad_norm": 4.091456413269043, "learning_rate": 9.843171864171717e-06, "loss": 0.5327, "step": 8996 }, { "epoch": 0.10756943531127823, "grad_norm": 1.7902461290359497, "learning_rate": 9.843123748555711e-06, "loss": 0.7788, "step": 8997 }, { "epoch": 0.10758139145613889, "grad_norm": 1.8031001091003418, "learning_rate": 9.843075625677425e-06, "loss": 0.6439, "step": 8998 }, { "epoch": 0.10759334760099953, "grad_norm": 3.8211944103240967, "learning_rate": 9.843027495536928e-06, "loss": 0.6362, "step": 8999 }, { "epoch": 0.10760530374586018, "grad_norm": 1.9732688665390015, "learning_rate": 9.842979358134295e-06, "loss": 0.5634, "step": 9000 }, { "epoch": 0.10761725989072084, "grad_norm": 2.8451144695281982, "learning_rate": 9.842931213469596e-06, "loss": 0.5636, "step": 9001 }, { "epoch": 0.10762921603558148, "grad_norm": 2.393007755279541, "learning_rate": 9.842883061542905e-06, "loss": 0.5331, "step": 9002 }, { "epoch": 0.10764117218044214, "grad_norm": 2.189347267150879, "learning_rate": 9.842834902354295e-06, "loss": 0.59, "step": 9003 }, { "epoch": 0.10765312832530279, "grad_norm": 4.492251396179199, "learning_rate": 9.842786735903836e-06, "loss": 0.6153, "step": 9004 }, { "epoch": 0.10766508447016344, "grad_norm": 2.1021032333374023, "learning_rate": 9.8427385621916e-06, "loss": 0.6911, "step": 9005 }, { "epoch": 0.10767704061502409, "grad_norm": 1.4869048595428467, "learning_rate": 9.842690381217662e-06, "loss": 0.5145, "step": 9006 }, { "epoch": 0.10768899675988475, "grad_norm": 2.928790807723999, "learning_rate": 9.84264219298209e-06, "loss": 0.7037, "step": 9007 }, { "epoch": 0.10770095290474539, "grad_norm": 1.6455186605453491, "learning_rate": 9.84259399748496e-06, "loss": 0.578, "step": 9008 }, { "epoch": 0.10771290904960605, "grad_norm": 1.9772318601608276, "learning_rate": 9.842545794726344e-06, "loss": 0.6157, "step": 9009 }, { "epoch": 0.1077248651944667, "grad_norm": 1.5935919284820557, "learning_rate": 9.842497584706314e-06, "loss": 0.5963, "step": 9010 }, { "epoch": 0.10773682133932734, "grad_norm": 1.7382973432540894, "learning_rate": 9.84244936742494e-06, "loss": 0.6316, "step": 9011 }, { "epoch": 0.107748777484188, "grad_norm": 1.716320514678955, "learning_rate": 9.842401142882297e-06, "loss": 0.6824, "step": 9012 }, { "epoch": 0.10776073362904864, "grad_norm": 2.4916396141052246, "learning_rate": 9.842352911078455e-06, "loss": 0.6789, "step": 9013 }, { "epoch": 0.1077726897739093, "grad_norm": 1.547133207321167, "learning_rate": 9.842304672013489e-06, "loss": 0.5622, "step": 9014 }, { "epoch": 0.10778464591876995, "grad_norm": 2.053164005279541, "learning_rate": 9.842256425687468e-06, "loss": 0.6344, "step": 9015 }, { "epoch": 0.1077966020636306, "grad_norm": 1.9605661630630493, "learning_rate": 9.84220817210047e-06, "loss": 0.5465, "step": 9016 }, { "epoch": 0.10780855820849125, "grad_norm": 1.765974998474121, "learning_rate": 9.84215991125256e-06, "loss": 0.6729, "step": 9017 }, { "epoch": 0.10782051435335191, "grad_norm": 2.043876886367798, "learning_rate": 9.842111643143816e-06, "loss": 0.6678, "step": 9018 }, { "epoch": 0.10783247049821255, "grad_norm": 3.016110420227051, "learning_rate": 9.842063367774307e-06, "loss": 0.6416, "step": 9019 }, { "epoch": 0.10784442664307321, "grad_norm": 1.981797218322754, "learning_rate": 9.842015085144107e-06, "loss": 0.6421, "step": 9020 }, { "epoch": 0.10785638278793386, "grad_norm": 24.24229621887207, "learning_rate": 9.84196679525329e-06, "loss": 0.6445, "step": 9021 }, { "epoch": 0.10786833893279452, "grad_norm": 1.717856764793396, "learning_rate": 9.841918498101925e-06, "loss": 0.617, "step": 9022 }, { "epoch": 0.10788029507765516, "grad_norm": 2.68576979637146, "learning_rate": 9.841870193690087e-06, "loss": 0.6837, "step": 9023 }, { "epoch": 0.1078922512225158, "grad_norm": 2.8357300758361816, "learning_rate": 9.841821882017846e-06, "loss": 0.6509, "step": 9024 }, { "epoch": 0.10790420736737646, "grad_norm": 1.739734411239624, "learning_rate": 9.841773563085274e-06, "loss": 0.7045, "step": 9025 }, { "epoch": 0.10791616351223711, "grad_norm": 2.756596565246582, "learning_rate": 9.84172523689245e-06, "loss": 0.6198, "step": 9026 }, { "epoch": 0.10792811965709777, "grad_norm": 1.8626919984817505, "learning_rate": 9.841676903439438e-06, "loss": 0.6222, "step": 9027 }, { "epoch": 0.10794007580195841, "grad_norm": 2.670145034790039, "learning_rate": 9.841628562726317e-06, "loss": 0.5894, "step": 9028 }, { "epoch": 0.10795203194681907, "grad_norm": 4.025383472442627, "learning_rate": 9.841580214753157e-06, "loss": 0.6196, "step": 9029 }, { "epoch": 0.10796398809167972, "grad_norm": 1.6495071649551392, "learning_rate": 9.841531859520028e-06, "loss": 0.5866, "step": 9030 }, { "epoch": 0.10797594423654037, "grad_norm": 1.7062982320785522, "learning_rate": 9.841483497027005e-06, "loss": 0.5613, "step": 9031 }, { "epoch": 0.10798790038140102, "grad_norm": 1.8025615215301514, "learning_rate": 9.841435127274162e-06, "loss": 0.7388, "step": 9032 }, { "epoch": 0.10799985652626168, "grad_norm": 32.39241409301758, "learning_rate": 9.841386750261568e-06, "loss": 0.6711, "step": 9033 }, { "epoch": 0.10801181267112232, "grad_norm": 14.708858489990234, "learning_rate": 9.841338365989298e-06, "loss": 0.5849, "step": 9034 }, { "epoch": 0.10802376881598297, "grad_norm": 5.374484062194824, "learning_rate": 9.841289974457424e-06, "loss": 0.7115, "step": 9035 }, { "epoch": 0.10803572496084363, "grad_norm": 2.1257317066192627, "learning_rate": 9.841241575666019e-06, "loss": 0.5918, "step": 9036 }, { "epoch": 0.10804768110570427, "grad_norm": 1.7319601774215698, "learning_rate": 9.841193169615153e-06, "loss": 0.5847, "step": 9037 }, { "epoch": 0.10805963725056493, "grad_norm": 8.838606834411621, "learning_rate": 9.841144756304904e-06, "loss": 0.5781, "step": 9038 }, { "epoch": 0.10807159339542557, "grad_norm": 1.7470498085021973, "learning_rate": 9.841096335735338e-06, "loss": 0.6788, "step": 9039 }, { "epoch": 0.10808354954028623, "grad_norm": 1.4656720161437988, "learning_rate": 9.841047907906533e-06, "loss": 0.5933, "step": 9040 }, { "epoch": 0.10809550568514688, "grad_norm": 1.4559905529022217, "learning_rate": 9.84099947281856e-06, "loss": 0.5958, "step": 9041 }, { "epoch": 0.10810746183000754, "grad_norm": 3.537717342376709, "learning_rate": 9.840951030471489e-06, "loss": 0.6537, "step": 9042 }, { "epoch": 0.10811941797486818, "grad_norm": 6.2266716957092285, "learning_rate": 9.840902580865395e-06, "loss": 0.6625, "step": 9043 }, { "epoch": 0.10813137411972884, "grad_norm": 2.485334634780884, "learning_rate": 9.840854124000352e-06, "loss": 0.61, "step": 9044 }, { "epoch": 0.10814333026458949, "grad_norm": 2.386610269546509, "learning_rate": 9.84080565987643e-06, "loss": 0.6164, "step": 9045 }, { "epoch": 0.10815528640945014, "grad_norm": 2.385754346847534, "learning_rate": 9.8407571884937e-06, "loss": 0.6862, "step": 9046 }, { "epoch": 0.10816724255431079, "grad_norm": 3.0329129695892334, "learning_rate": 9.840708709852242e-06, "loss": 0.6085, "step": 9047 }, { "epoch": 0.10817919869917143, "grad_norm": 1.7877497673034668, "learning_rate": 9.840660223952123e-06, "loss": 0.5953, "step": 9048 }, { "epoch": 0.10819115484403209, "grad_norm": 2.0056045055389404, "learning_rate": 9.840611730793415e-06, "loss": 0.6793, "step": 9049 }, { "epoch": 0.10820311098889274, "grad_norm": 1.6988672018051147, "learning_rate": 9.840563230376193e-06, "loss": 0.6197, "step": 9050 }, { "epoch": 0.1082150671337534, "grad_norm": 3.0106940269470215, "learning_rate": 9.84051472270053e-06, "loss": 0.5862, "step": 9051 }, { "epoch": 0.10822702327861404, "grad_norm": 1.7000552415847778, "learning_rate": 9.840466207766496e-06, "loss": 0.6254, "step": 9052 }, { "epoch": 0.1082389794234747, "grad_norm": 4.962863445281982, "learning_rate": 9.84041768557417e-06, "loss": 0.6515, "step": 9053 }, { "epoch": 0.10825093556833534, "grad_norm": 1.6279417276382446, "learning_rate": 9.840369156123617e-06, "loss": 0.6423, "step": 9054 }, { "epoch": 0.108262891713196, "grad_norm": 4.217915058135986, "learning_rate": 9.840320619414914e-06, "loss": 0.6282, "step": 9055 }, { "epoch": 0.10827484785805665, "grad_norm": 3.9491031169891357, "learning_rate": 9.840272075448133e-06, "loss": 0.5867, "step": 9056 }, { "epoch": 0.1082868040029173, "grad_norm": 1.5027457475662231, "learning_rate": 9.840223524223347e-06, "loss": 0.615, "step": 9057 }, { "epoch": 0.10829876014777795, "grad_norm": 2.3987765312194824, "learning_rate": 9.840174965740627e-06, "loss": 0.5833, "step": 9058 }, { "epoch": 0.1083107162926386, "grad_norm": 2.362229108810425, "learning_rate": 9.84012640000005e-06, "loss": 0.66, "step": 9059 }, { "epoch": 0.10832267243749925, "grad_norm": 2.795592784881592, "learning_rate": 9.840077827001687e-06, "loss": 0.6821, "step": 9060 }, { "epoch": 0.1083346285823599, "grad_norm": 1.5570428371429443, "learning_rate": 9.840029246745605e-06, "loss": 0.6061, "step": 9061 }, { "epoch": 0.10834658472722056, "grad_norm": 2.050787925720215, "learning_rate": 9.839980659231886e-06, "loss": 0.6966, "step": 9062 }, { "epoch": 0.1083585408720812, "grad_norm": 4.552443027496338, "learning_rate": 9.8399320644606e-06, "loss": 0.6334, "step": 9063 }, { "epoch": 0.10837049701694186, "grad_norm": 2.8635754585266113, "learning_rate": 9.839883462431816e-06, "loss": 0.8025, "step": 9064 }, { "epoch": 0.1083824531618025, "grad_norm": 5.058707237243652, "learning_rate": 9.83983485314561e-06, "loss": 0.5797, "step": 9065 }, { "epoch": 0.10839440930666316, "grad_norm": 2.3178508281707764, "learning_rate": 9.839786236602054e-06, "loss": 0.6033, "step": 9066 }, { "epoch": 0.10840636545152381, "grad_norm": 2.3009440898895264, "learning_rate": 9.839737612801223e-06, "loss": 0.64, "step": 9067 }, { "epoch": 0.10841832159638447, "grad_norm": 2.1985299587249756, "learning_rate": 9.839688981743187e-06, "loss": 0.6807, "step": 9068 }, { "epoch": 0.10843027774124511, "grad_norm": 3.6515839099884033, "learning_rate": 9.83964034342802e-06, "loss": 0.6342, "step": 9069 }, { "epoch": 0.10844223388610576, "grad_norm": 2.660618782043457, "learning_rate": 9.839591697855795e-06, "loss": 0.6439, "step": 9070 }, { "epoch": 0.10845419003096642, "grad_norm": 1.5134519338607788, "learning_rate": 9.839543045026586e-06, "loss": 0.613, "step": 9071 }, { "epoch": 0.10846614617582706, "grad_norm": 2.371825695037842, "learning_rate": 9.839494384940465e-06, "loss": 0.6146, "step": 9072 }, { "epoch": 0.10847810232068772, "grad_norm": 2.591392755508423, "learning_rate": 9.839445717597505e-06, "loss": 0.6416, "step": 9073 }, { "epoch": 0.10849005846554836, "grad_norm": 6.276492595672607, "learning_rate": 9.839397042997779e-06, "loss": 0.6558, "step": 9074 }, { "epoch": 0.10850201461040902, "grad_norm": 1.9784094095230103, "learning_rate": 9.83934836114136e-06, "loss": 0.6562, "step": 9075 }, { "epoch": 0.10851397075526967, "grad_norm": 2.858229160308838, "learning_rate": 9.83929967202832e-06, "loss": 0.5851, "step": 9076 }, { "epoch": 0.10852592690013033, "grad_norm": 2.8600261211395264, "learning_rate": 9.839250975658731e-06, "loss": 0.6522, "step": 9077 }, { "epoch": 0.10853788304499097, "grad_norm": 1.7795042991638184, "learning_rate": 9.83920227203267e-06, "loss": 0.706, "step": 9078 }, { "epoch": 0.10854983918985163, "grad_norm": 2.664503335952759, "learning_rate": 9.83915356115021e-06, "loss": 0.6607, "step": 9079 }, { "epoch": 0.10856179533471227, "grad_norm": 2.524913787841797, "learning_rate": 9.83910484301142e-06, "loss": 0.5595, "step": 9080 }, { "epoch": 0.10857375147957293, "grad_norm": 3.2561118602752686, "learning_rate": 9.839056117616376e-06, "loss": 0.5332, "step": 9081 }, { "epoch": 0.10858570762443358, "grad_norm": 1.4066399335861206, "learning_rate": 9.839007384965149e-06, "loss": 0.5911, "step": 9082 }, { "epoch": 0.10859766376929422, "grad_norm": 1.4213435649871826, "learning_rate": 9.838958645057814e-06, "loss": 0.5986, "step": 9083 }, { "epoch": 0.10860961991415488, "grad_norm": 1.6313937902450562, "learning_rate": 9.838909897894443e-06, "loss": 0.6705, "step": 9084 }, { "epoch": 0.10862157605901553, "grad_norm": 1.506576657295227, "learning_rate": 9.83886114347511e-06, "loss": 0.5491, "step": 9085 }, { "epoch": 0.10863353220387618, "grad_norm": 2.7937231063842773, "learning_rate": 9.838812381799887e-06, "loss": 0.6189, "step": 9086 }, { "epoch": 0.10864548834873683, "grad_norm": 2.096956491470337, "learning_rate": 9.838763612868846e-06, "loss": 0.5636, "step": 9087 }, { "epoch": 0.10865744449359749, "grad_norm": 4.846706390380859, "learning_rate": 9.838714836682063e-06, "loss": 0.5758, "step": 9088 }, { "epoch": 0.10866940063845813, "grad_norm": 1.6500117778778076, "learning_rate": 9.838666053239612e-06, "loss": 0.6391, "step": 9089 }, { "epoch": 0.10868135678331879, "grad_norm": 2.395261764526367, "learning_rate": 9.838617262541563e-06, "loss": 0.5482, "step": 9090 }, { "epoch": 0.10869331292817944, "grad_norm": 15.211037635803223, "learning_rate": 9.838568464587988e-06, "loss": 0.6831, "step": 9091 }, { "epoch": 0.1087052690730401, "grad_norm": 1.9864450693130493, "learning_rate": 9.838519659378965e-06, "loss": 0.6284, "step": 9092 }, { "epoch": 0.10871722521790074, "grad_norm": 1.645793080329895, "learning_rate": 9.838470846914563e-06, "loss": 0.5898, "step": 9093 }, { "epoch": 0.10872918136276138, "grad_norm": 3.243173837661743, "learning_rate": 9.838422027194857e-06, "loss": 0.6154, "step": 9094 }, { "epoch": 0.10874113750762204, "grad_norm": 5.5095109939575195, "learning_rate": 9.83837320021992e-06, "loss": 0.5657, "step": 9095 }, { "epoch": 0.10875309365248269, "grad_norm": 1.758573293685913, "learning_rate": 9.838324365989827e-06, "loss": 0.637, "step": 9096 }, { "epoch": 0.10876504979734335, "grad_norm": 6.192301273345947, "learning_rate": 9.838275524504646e-06, "loss": 0.602, "step": 9097 }, { "epoch": 0.10877700594220399, "grad_norm": 2.5611042976379395, "learning_rate": 9.838226675764457e-06, "loss": 0.7017, "step": 9098 }, { "epoch": 0.10878896208706465, "grad_norm": 1.840624451637268, "learning_rate": 9.838177819769327e-06, "loss": 0.5036, "step": 9099 }, { "epoch": 0.1088009182319253, "grad_norm": 1.7316577434539795, "learning_rate": 9.838128956519335e-06, "loss": 0.6216, "step": 9100 }, { "epoch": 0.10881287437678595, "grad_norm": 1.96091628074646, "learning_rate": 9.83808008601455e-06, "loss": 0.7736, "step": 9101 }, { "epoch": 0.1088248305216466, "grad_norm": 4.570071697235107, "learning_rate": 9.838031208255045e-06, "loss": 0.6779, "step": 9102 }, { "epoch": 0.10883678666650726, "grad_norm": 1.545535922050476, "learning_rate": 9.837982323240897e-06, "loss": 0.6525, "step": 9103 }, { "epoch": 0.1088487428113679, "grad_norm": 1.9318218231201172, "learning_rate": 9.837933430972178e-06, "loss": 0.664, "step": 9104 }, { "epoch": 0.10886069895622856, "grad_norm": 1.7460922002792358, "learning_rate": 9.83788453144896e-06, "loss": 0.6314, "step": 9105 }, { "epoch": 0.1088726551010892, "grad_norm": 1.3582360744476318, "learning_rate": 9.837835624671317e-06, "loss": 0.6391, "step": 9106 }, { "epoch": 0.10888461124594985, "grad_norm": 1.7638299465179443, "learning_rate": 9.83778671063932e-06, "loss": 0.5272, "step": 9107 }, { "epoch": 0.10889656739081051, "grad_norm": 4.027317523956299, "learning_rate": 9.837737789353048e-06, "loss": 0.5849, "step": 9108 }, { "epoch": 0.10890852353567115, "grad_norm": 1.5340908765792847, "learning_rate": 9.837688860812569e-06, "loss": 0.577, "step": 9109 }, { "epoch": 0.10892047968053181, "grad_norm": 1.8549168109893799, "learning_rate": 9.837639925017958e-06, "loss": 0.5738, "step": 9110 }, { "epoch": 0.10893243582539246, "grad_norm": 1.2311246395111084, "learning_rate": 9.83759098196929e-06, "loss": 0.5459, "step": 9111 }, { "epoch": 0.10894439197025312, "grad_norm": 9.7588472366333, "learning_rate": 9.837542031666637e-06, "loss": 0.5708, "step": 9112 }, { "epoch": 0.10895634811511376, "grad_norm": 1.3993034362792969, "learning_rate": 9.837493074110071e-06, "loss": 0.5964, "step": 9113 }, { "epoch": 0.10896830425997442, "grad_norm": 2.38218092918396, "learning_rate": 9.83744410929967e-06, "loss": 0.6953, "step": 9114 }, { "epoch": 0.10898026040483506, "grad_norm": 3.2473604679107666, "learning_rate": 9.837395137235502e-06, "loss": 0.6325, "step": 9115 }, { "epoch": 0.10899221654969572, "grad_norm": 2.231687545776367, "learning_rate": 9.837346157917644e-06, "loss": 0.659, "step": 9116 }, { "epoch": 0.10900417269455637, "grad_norm": 2.071270704269409, "learning_rate": 9.837297171346168e-06, "loss": 0.7281, "step": 9117 }, { "epoch": 0.10901612883941701, "grad_norm": 2.003109931945801, "learning_rate": 9.837248177521146e-06, "loss": 0.6331, "step": 9118 }, { "epoch": 0.10902808498427767, "grad_norm": 2.0874035358428955, "learning_rate": 9.837199176442655e-06, "loss": 0.6567, "step": 9119 }, { "epoch": 0.10904004112913832, "grad_norm": 1.898958683013916, "learning_rate": 9.837150168110766e-06, "loss": 0.6315, "step": 9120 }, { "epoch": 0.10905199727399897, "grad_norm": 1.445091962814331, "learning_rate": 9.837101152525554e-06, "loss": 0.6218, "step": 9121 }, { "epoch": 0.10906395341885962, "grad_norm": 1.2385379076004028, "learning_rate": 9.83705212968709e-06, "loss": 0.6356, "step": 9122 }, { "epoch": 0.10907590956372028, "grad_norm": 2.0162458419799805, "learning_rate": 9.837003099595451e-06, "loss": 0.5266, "step": 9123 }, { "epoch": 0.10908786570858092, "grad_norm": 4.162867546081543, "learning_rate": 9.836954062250708e-06, "loss": 0.6554, "step": 9124 }, { "epoch": 0.10909982185344158, "grad_norm": 1.9789639711380005, "learning_rate": 9.836905017652933e-06, "loss": 0.684, "step": 9125 }, { "epoch": 0.10911177799830223, "grad_norm": 1.5280287265777588, "learning_rate": 9.836855965802204e-06, "loss": 0.5048, "step": 9126 }, { "epoch": 0.10912373414316288, "grad_norm": 5.332552433013916, "learning_rate": 9.836806906698592e-06, "loss": 0.6013, "step": 9127 }, { "epoch": 0.10913569028802353, "grad_norm": 2.273592710494995, "learning_rate": 9.83675784034217e-06, "loss": 0.6765, "step": 9128 }, { "epoch": 0.10914764643288417, "grad_norm": 2.154555082321167, "learning_rate": 9.836708766733015e-06, "loss": 0.6076, "step": 9129 }, { "epoch": 0.10915960257774483, "grad_norm": 2.314476728439331, "learning_rate": 9.836659685871195e-06, "loss": 0.7092, "step": 9130 }, { "epoch": 0.10917155872260548, "grad_norm": 2.5336103439331055, "learning_rate": 9.836610597756788e-06, "loss": 0.6255, "step": 9131 }, { "epoch": 0.10918351486746614, "grad_norm": 1.874288558959961, "learning_rate": 9.836561502389866e-06, "loss": 0.6263, "step": 9132 }, { "epoch": 0.10919547101232678, "grad_norm": 4.235125541687012, "learning_rate": 9.836512399770502e-06, "loss": 0.5686, "step": 9133 }, { "epoch": 0.10920742715718744, "grad_norm": 2.6896426677703857, "learning_rate": 9.836463289898773e-06, "loss": 0.5602, "step": 9134 }, { "epoch": 0.10921938330204808, "grad_norm": 1.712402582168579, "learning_rate": 9.836414172774748e-06, "loss": 0.5605, "step": 9135 }, { "epoch": 0.10923133944690874, "grad_norm": 10.700676918029785, "learning_rate": 9.836365048398503e-06, "loss": 0.6376, "step": 9136 }, { "epoch": 0.10924329559176939, "grad_norm": 4.804296493530273, "learning_rate": 9.836315916770112e-06, "loss": 0.5409, "step": 9137 }, { "epoch": 0.10925525173663005, "grad_norm": 1.5260752439498901, "learning_rate": 9.836266777889647e-06, "loss": 0.552, "step": 9138 }, { "epoch": 0.10926720788149069, "grad_norm": 2.285155773162842, "learning_rate": 9.836217631757183e-06, "loss": 0.6276, "step": 9139 }, { "epoch": 0.10927916402635135, "grad_norm": 1.968634843826294, "learning_rate": 9.836168478372794e-06, "loss": 0.668, "step": 9140 }, { "epoch": 0.109291120171212, "grad_norm": 2.9699337482452393, "learning_rate": 9.836119317736554e-06, "loss": 0.6158, "step": 9141 }, { "epoch": 0.10930307631607264, "grad_norm": 2.7182323932647705, "learning_rate": 9.836070149848533e-06, "loss": 0.6569, "step": 9142 }, { "epoch": 0.1093150324609333, "grad_norm": 1.5642560720443726, "learning_rate": 9.83602097470881e-06, "loss": 0.6531, "step": 9143 }, { "epoch": 0.10932698860579394, "grad_norm": 2.7988245487213135, "learning_rate": 9.835971792317457e-06, "loss": 0.6614, "step": 9144 }, { "epoch": 0.1093389447506546, "grad_norm": 4.1617536544799805, "learning_rate": 9.835922602674545e-06, "loss": 0.6073, "step": 9145 }, { "epoch": 0.10935090089551525, "grad_norm": 3.7011191844940186, "learning_rate": 9.835873405780152e-06, "loss": 0.6766, "step": 9146 }, { "epoch": 0.1093628570403759, "grad_norm": 4.107875823974609, "learning_rate": 9.835824201634347e-06, "loss": 0.6923, "step": 9147 }, { "epoch": 0.10937481318523655, "grad_norm": 2.4842684268951416, "learning_rate": 9.835774990237208e-06, "loss": 0.6998, "step": 9148 }, { "epoch": 0.10938676933009721, "grad_norm": 1.7369225025177002, "learning_rate": 9.835725771588808e-06, "loss": 0.6181, "step": 9149 }, { "epoch": 0.10939872547495785, "grad_norm": 2.2656803131103516, "learning_rate": 9.835676545689218e-06, "loss": 0.641, "step": 9150 }, { "epoch": 0.10941068161981851, "grad_norm": 6.841588497161865, "learning_rate": 9.835627312538517e-06, "loss": 0.6635, "step": 9151 }, { "epoch": 0.10942263776467916, "grad_norm": 1.8513978719711304, "learning_rate": 9.835578072136773e-06, "loss": 0.5527, "step": 9152 }, { "epoch": 0.1094345939095398, "grad_norm": 2.387436866760254, "learning_rate": 9.835528824484064e-06, "loss": 0.7128, "step": 9153 }, { "epoch": 0.10944655005440046, "grad_norm": 4.858328819274902, "learning_rate": 9.835479569580462e-06, "loss": 0.5864, "step": 9154 }, { "epoch": 0.1094585061992611, "grad_norm": 1.7353827953338623, "learning_rate": 9.835430307426039e-06, "loss": 0.5883, "step": 9155 }, { "epoch": 0.10947046234412176, "grad_norm": 2.8116328716278076, "learning_rate": 9.835381038020873e-06, "loss": 0.6052, "step": 9156 }, { "epoch": 0.10948241848898241, "grad_norm": 3.1408798694610596, "learning_rate": 9.835331761365036e-06, "loss": 0.6527, "step": 9157 }, { "epoch": 0.10949437463384307, "grad_norm": 2.5967886447906494, "learning_rate": 9.835282477458603e-06, "loss": 0.668, "step": 9158 }, { "epoch": 0.10950633077870371, "grad_norm": 2.9802258014678955, "learning_rate": 9.835233186301645e-06, "loss": 0.6506, "step": 9159 }, { "epoch": 0.10951828692356437, "grad_norm": 1.6761380434036255, "learning_rate": 9.835183887894237e-06, "loss": 0.6452, "step": 9160 }, { "epoch": 0.10953024306842502, "grad_norm": 1.957054615020752, "learning_rate": 9.835134582236455e-06, "loss": 0.5963, "step": 9161 }, { "epoch": 0.10954219921328567, "grad_norm": 1.6871109008789062, "learning_rate": 9.83508526932837e-06, "loss": 0.6279, "step": 9162 }, { "epoch": 0.10955415535814632, "grad_norm": 1.8495770692825317, "learning_rate": 9.83503594917006e-06, "loss": 0.5934, "step": 9163 }, { "epoch": 0.10956611150300698, "grad_norm": 2.5976343154907227, "learning_rate": 9.834986621761594e-06, "loss": 0.7402, "step": 9164 }, { "epoch": 0.10957806764786762, "grad_norm": 1.6371279954910278, "learning_rate": 9.83493728710305e-06, "loss": 0.6246, "step": 9165 }, { "epoch": 0.10959002379272827, "grad_norm": 2.2495388984680176, "learning_rate": 9.8348879451945e-06, "loss": 0.5827, "step": 9166 }, { "epoch": 0.10960197993758893, "grad_norm": 4.4346723556518555, "learning_rate": 9.834838596036018e-06, "loss": 0.6344, "step": 9167 }, { "epoch": 0.10961393608244957, "grad_norm": 1.6490044593811035, "learning_rate": 9.834789239627678e-06, "loss": 0.5859, "step": 9168 }, { "epoch": 0.10962589222731023, "grad_norm": 2.3544678688049316, "learning_rate": 9.834739875969554e-06, "loss": 0.5753, "step": 9169 }, { "epoch": 0.10963784837217087, "grad_norm": 5.492090225219727, "learning_rate": 9.834690505061721e-06, "loss": 0.5554, "step": 9170 }, { "epoch": 0.10964980451703153, "grad_norm": 2.9932363033294678, "learning_rate": 9.834641126904253e-06, "loss": 0.697, "step": 9171 }, { "epoch": 0.10966176066189218, "grad_norm": 1.9110517501831055, "learning_rate": 9.834591741497222e-06, "loss": 0.6433, "step": 9172 }, { "epoch": 0.10967371680675284, "grad_norm": 2.020724296569824, "learning_rate": 9.834542348840706e-06, "loss": 0.7332, "step": 9173 }, { "epoch": 0.10968567295161348, "grad_norm": 5.665092945098877, "learning_rate": 9.834492948934774e-06, "loss": 0.5265, "step": 9174 }, { "epoch": 0.10969762909647414, "grad_norm": 4.052839756011963, "learning_rate": 9.834443541779503e-06, "loss": 0.6261, "step": 9175 }, { "epoch": 0.10970958524133478, "grad_norm": 1.4601749181747437, "learning_rate": 9.834394127374967e-06, "loss": 0.5572, "step": 9176 }, { "epoch": 0.10972154138619543, "grad_norm": 1.4443575143814087, "learning_rate": 9.834344705721242e-06, "loss": 0.6291, "step": 9177 }, { "epoch": 0.10973349753105609, "grad_norm": 8.96803092956543, "learning_rate": 9.834295276818397e-06, "loss": 0.7115, "step": 9178 }, { "epoch": 0.10974545367591673, "grad_norm": 2.728438377380371, "learning_rate": 9.83424584066651e-06, "loss": 0.5587, "step": 9179 }, { "epoch": 0.10975740982077739, "grad_norm": 1.8861677646636963, "learning_rate": 9.834196397265654e-06, "loss": 0.5532, "step": 9180 }, { "epoch": 0.10976936596563804, "grad_norm": 18.722253799438477, "learning_rate": 9.834146946615904e-06, "loss": 0.7665, "step": 9181 }, { "epoch": 0.1097813221104987, "grad_norm": 1.8640574216842651, "learning_rate": 9.834097488717334e-06, "loss": 0.6783, "step": 9182 }, { "epoch": 0.10979327825535934, "grad_norm": 6.855612754821777, "learning_rate": 9.834048023570016e-06, "loss": 0.6351, "step": 9183 }, { "epoch": 0.10980523440022, "grad_norm": 2.422372341156006, "learning_rate": 9.833998551174026e-06, "loss": 0.6147, "step": 9184 }, { "epoch": 0.10981719054508064, "grad_norm": 2.938323497772217, "learning_rate": 9.833949071529439e-06, "loss": 0.6334, "step": 9185 }, { "epoch": 0.1098291466899413, "grad_norm": 2.5547618865966797, "learning_rate": 9.833899584636328e-06, "loss": 0.659, "step": 9186 }, { "epoch": 0.10984110283480195, "grad_norm": 3.4885919094085693, "learning_rate": 9.833850090494766e-06, "loss": 0.5804, "step": 9187 }, { "epoch": 0.1098530589796626, "grad_norm": 9.908679962158203, "learning_rate": 9.83380058910483e-06, "loss": 0.6917, "step": 9188 }, { "epoch": 0.10986501512452325, "grad_norm": 3.870633602142334, "learning_rate": 9.833751080466593e-06, "loss": 0.6728, "step": 9189 }, { "epoch": 0.1098769712693839, "grad_norm": 2.885040760040283, "learning_rate": 9.83370156458013e-06, "loss": 0.5704, "step": 9190 }, { "epoch": 0.10988892741424455, "grad_norm": 23.390228271484375, "learning_rate": 9.833652041445511e-06, "loss": 0.6302, "step": 9191 }, { "epoch": 0.1099008835591052, "grad_norm": 7.360520839691162, "learning_rate": 9.833602511062816e-06, "loss": 0.622, "step": 9192 }, { "epoch": 0.10991283970396586, "grad_norm": 2.1083459854125977, "learning_rate": 9.833552973432117e-06, "loss": 0.6793, "step": 9193 }, { "epoch": 0.1099247958488265, "grad_norm": 3.4124677181243896, "learning_rate": 9.833503428553487e-06, "loss": 0.6563, "step": 9194 }, { "epoch": 0.10993675199368716, "grad_norm": 5.301535129547119, "learning_rate": 9.833453876427001e-06, "loss": 0.5931, "step": 9195 }, { "epoch": 0.1099487081385478, "grad_norm": 4.284478187561035, "learning_rate": 9.833404317052735e-06, "loss": 0.5952, "step": 9196 }, { "epoch": 0.10996066428340846, "grad_norm": 2.0511443614959717, "learning_rate": 9.833354750430761e-06, "loss": 0.7258, "step": 9197 }, { "epoch": 0.10997262042826911, "grad_norm": 1.61002516746521, "learning_rate": 9.833305176561156e-06, "loss": 0.6772, "step": 9198 }, { "epoch": 0.10998457657312977, "grad_norm": 1.5230555534362793, "learning_rate": 9.83325559544399e-06, "loss": 0.6457, "step": 9199 }, { "epoch": 0.10999653271799041, "grad_norm": 2.845982789993286, "learning_rate": 9.833206007079343e-06, "loss": 0.6619, "step": 9200 }, { "epoch": 0.11000848886285106, "grad_norm": 4.213005542755127, "learning_rate": 9.833156411467287e-06, "loss": 0.6445, "step": 9201 }, { "epoch": 0.11002044500771171, "grad_norm": 6.9164958000183105, "learning_rate": 9.833106808607893e-06, "loss": 0.6634, "step": 9202 }, { "epoch": 0.11003240115257236, "grad_norm": 2.942434310913086, "learning_rate": 9.83305719850124e-06, "loss": 0.6465, "step": 9203 }, { "epoch": 0.11004435729743302, "grad_norm": 4.588923931121826, "learning_rate": 9.833007581147399e-06, "loss": 0.6706, "step": 9204 }, { "epoch": 0.11005631344229366, "grad_norm": 3.241420269012451, "learning_rate": 9.832957956546446e-06, "loss": 0.6098, "step": 9205 }, { "epoch": 0.11006826958715432, "grad_norm": 2.0853328704833984, "learning_rate": 9.832908324698456e-06, "loss": 0.7306, "step": 9206 }, { "epoch": 0.11008022573201497, "grad_norm": 1.781930923461914, "learning_rate": 9.832858685603503e-06, "loss": 0.5475, "step": 9207 }, { "epoch": 0.11009218187687562, "grad_norm": 2.4412124156951904, "learning_rate": 9.83280903926166e-06, "loss": 0.5067, "step": 9208 }, { "epoch": 0.11010413802173627, "grad_norm": 2.5784146785736084, "learning_rate": 9.832759385673005e-06, "loss": 0.6245, "step": 9209 }, { "epoch": 0.11011609416659693, "grad_norm": 5.945041656494141, "learning_rate": 9.832709724837609e-06, "loss": 0.7042, "step": 9210 }, { "epoch": 0.11012805031145757, "grad_norm": 2.176421642303467, "learning_rate": 9.832660056755548e-06, "loss": 0.6782, "step": 9211 }, { "epoch": 0.11014000645631822, "grad_norm": 1.8405096530914307, "learning_rate": 9.832610381426895e-06, "loss": 0.6311, "step": 9212 }, { "epoch": 0.11015196260117888, "grad_norm": 2.6441969871520996, "learning_rate": 9.832560698851726e-06, "loss": 0.614, "step": 9213 }, { "epoch": 0.11016391874603952, "grad_norm": 2.3255927562713623, "learning_rate": 9.832511009030114e-06, "loss": 0.5659, "step": 9214 }, { "epoch": 0.11017587489090018, "grad_norm": 2.0767414569854736, "learning_rate": 9.832461311962137e-06, "loss": 0.5384, "step": 9215 }, { "epoch": 0.11018783103576082, "grad_norm": 3.7622499465942383, "learning_rate": 9.832411607647866e-06, "loss": 0.6248, "step": 9216 }, { "epoch": 0.11019978718062148, "grad_norm": 1.8949693441390991, "learning_rate": 9.832361896087377e-06, "loss": 0.5599, "step": 9217 }, { "epoch": 0.11021174332548213, "grad_norm": 2.557046890258789, "learning_rate": 9.832312177280743e-06, "loss": 0.6263, "step": 9218 }, { "epoch": 0.11022369947034279, "grad_norm": 3.274238109588623, "learning_rate": 9.832262451228041e-06, "loss": 0.6326, "step": 9219 }, { "epoch": 0.11023565561520343, "grad_norm": 1.6070746183395386, "learning_rate": 9.832212717929342e-06, "loss": 0.6152, "step": 9220 }, { "epoch": 0.11024761176006409, "grad_norm": 2.3163766860961914, "learning_rate": 9.832162977384724e-06, "loss": 0.6475, "step": 9221 }, { "epoch": 0.11025956790492474, "grad_norm": 2.3954241275787354, "learning_rate": 9.832113229594263e-06, "loss": 0.7065, "step": 9222 }, { "epoch": 0.1102715240497854, "grad_norm": 4.786742210388184, "learning_rate": 9.832063474558028e-06, "loss": 0.55, "step": 9223 }, { "epoch": 0.11028348019464604, "grad_norm": 2.6478488445281982, "learning_rate": 9.832013712276098e-06, "loss": 0.6732, "step": 9224 }, { "epoch": 0.11029543633950668, "grad_norm": 2.7561984062194824, "learning_rate": 9.831963942748547e-06, "loss": 0.6376, "step": 9225 }, { "epoch": 0.11030739248436734, "grad_norm": 1.9376758337020874, "learning_rate": 9.831914165975445e-06, "loss": 0.5729, "step": 9226 }, { "epoch": 0.11031934862922799, "grad_norm": 3.8816797733306885, "learning_rate": 9.831864381956875e-06, "loss": 0.6325, "step": 9227 }, { "epoch": 0.11033130477408865, "grad_norm": 5.205363750457764, "learning_rate": 9.831814590692905e-06, "loss": 0.5997, "step": 9228 }, { "epoch": 0.11034326091894929, "grad_norm": 1.7564212083816528, "learning_rate": 9.831764792183611e-06, "loss": 0.6179, "step": 9229 }, { "epoch": 0.11035521706380995, "grad_norm": 6.1676130294799805, "learning_rate": 9.83171498642907e-06, "loss": 0.644, "step": 9230 }, { "epoch": 0.1103671732086706, "grad_norm": 4.689080238342285, "learning_rate": 9.831665173429354e-06, "loss": 0.634, "step": 9231 }, { "epoch": 0.11037912935353125, "grad_norm": 1.850610613822937, "learning_rate": 9.83161535318454e-06, "loss": 0.6507, "step": 9232 }, { "epoch": 0.1103910854983919, "grad_norm": 21.357873916625977, "learning_rate": 9.831565525694702e-06, "loss": 0.6092, "step": 9233 }, { "epoch": 0.11040304164325256, "grad_norm": 2.0725252628326416, "learning_rate": 9.831515690959913e-06, "loss": 0.5932, "step": 9234 }, { "epoch": 0.1104149977881132, "grad_norm": 2.967609405517578, "learning_rate": 9.83146584898025e-06, "loss": 0.669, "step": 9235 }, { "epoch": 0.11042695393297385, "grad_norm": 3.1587576866149902, "learning_rate": 9.831415999755788e-06, "loss": 0.6554, "step": 9236 }, { "epoch": 0.1104389100778345, "grad_norm": 1.8942075967788696, "learning_rate": 9.831366143286598e-06, "loss": 0.6913, "step": 9237 }, { "epoch": 0.11045086622269515, "grad_norm": 197.5926055908203, "learning_rate": 9.83131627957276e-06, "loss": 0.6447, "step": 9238 }, { "epoch": 0.11046282236755581, "grad_norm": 2.9075803756713867, "learning_rate": 9.831266408614344e-06, "loss": 0.6202, "step": 9239 }, { "epoch": 0.11047477851241645, "grad_norm": 6.754037380218506, "learning_rate": 9.831216530411427e-06, "loss": 0.5607, "step": 9240 }, { "epoch": 0.11048673465727711, "grad_norm": 2.154277801513672, "learning_rate": 9.831166644964083e-06, "loss": 0.6254, "step": 9241 }, { "epoch": 0.11049869080213776, "grad_norm": 2.370417833328247, "learning_rate": 9.83111675227239e-06, "loss": 0.6964, "step": 9242 }, { "epoch": 0.11051064694699841, "grad_norm": 2.832512378692627, "learning_rate": 9.831066852336419e-06, "loss": 0.6291, "step": 9243 }, { "epoch": 0.11052260309185906, "grad_norm": 8.185296058654785, "learning_rate": 9.831016945156244e-06, "loss": 0.6521, "step": 9244 }, { "epoch": 0.11053455923671972, "grad_norm": 5.557229518890381, "learning_rate": 9.830967030731945e-06, "loss": 0.5776, "step": 9245 }, { "epoch": 0.11054651538158036, "grad_norm": 1.472629427909851, "learning_rate": 9.830917109063592e-06, "loss": 0.6987, "step": 9246 }, { "epoch": 0.11055847152644102, "grad_norm": 2.981797456741333, "learning_rate": 9.830867180151262e-06, "loss": 0.5512, "step": 9247 }, { "epoch": 0.11057042767130167, "grad_norm": 6.801027297973633, "learning_rate": 9.83081724399503e-06, "loss": 0.6816, "step": 9248 }, { "epoch": 0.11058238381616231, "grad_norm": 1.590405821800232, "learning_rate": 9.83076730059497e-06, "loss": 0.5913, "step": 9249 }, { "epoch": 0.11059433996102297, "grad_norm": 1.7091342210769653, "learning_rate": 9.830717349951158e-06, "loss": 0.6205, "step": 9250 }, { "epoch": 0.11060629610588361, "grad_norm": 1.5982856750488281, "learning_rate": 9.830667392063666e-06, "loss": 0.6545, "step": 9251 }, { "epoch": 0.11061825225074427, "grad_norm": 1.9747012853622437, "learning_rate": 9.830617426932573e-06, "loss": 0.6187, "step": 9252 }, { "epoch": 0.11063020839560492, "grad_norm": 5.000236511230469, "learning_rate": 9.830567454557951e-06, "loss": 0.5816, "step": 9253 }, { "epoch": 0.11064216454046558, "grad_norm": 2.4737110137939453, "learning_rate": 9.830517474939876e-06, "loss": 0.6559, "step": 9254 }, { "epoch": 0.11065412068532622, "grad_norm": 1.8394864797592163, "learning_rate": 9.830467488078424e-06, "loss": 0.7233, "step": 9255 }, { "epoch": 0.11066607683018688, "grad_norm": 1.4102020263671875, "learning_rate": 9.830417493973667e-06, "loss": 0.6, "step": 9256 }, { "epoch": 0.11067803297504752, "grad_norm": 2.3956799507141113, "learning_rate": 9.830367492625683e-06, "loss": 0.5705, "step": 9257 }, { "epoch": 0.11068998911990818, "grad_norm": 4.931099891662598, "learning_rate": 9.830317484034546e-06, "loss": 0.5398, "step": 9258 }, { "epoch": 0.11070194526476883, "grad_norm": 2.600820541381836, "learning_rate": 9.830267468200332e-06, "loss": 0.6575, "step": 9259 }, { "epoch": 0.11071390140962947, "grad_norm": 2.944159507751465, "learning_rate": 9.830217445123113e-06, "loss": 0.6659, "step": 9260 }, { "epoch": 0.11072585755449013, "grad_norm": 5.6642985343933105, "learning_rate": 9.830167414802965e-06, "loss": 0.6192, "step": 9261 }, { "epoch": 0.11073781369935078, "grad_norm": 2.441554069519043, "learning_rate": 9.830117377239964e-06, "loss": 0.5283, "step": 9262 }, { "epoch": 0.11074976984421143, "grad_norm": 4.470638275146484, "learning_rate": 9.830067332434186e-06, "loss": 0.6263, "step": 9263 }, { "epoch": 0.11076172598907208, "grad_norm": 3.5404374599456787, "learning_rate": 9.830017280385704e-06, "loss": 0.6238, "step": 9264 }, { "epoch": 0.11077368213393274, "grad_norm": 5.101003646850586, "learning_rate": 9.829967221094595e-06, "loss": 0.6355, "step": 9265 }, { "epoch": 0.11078563827879338, "grad_norm": 17.10088348388672, "learning_rate": 9.829917154560933e-06, "loss": 0.6044, "step": 9266 }, { "epoch": 0.11079759442365404, "grad_norm": 2.5956006050109863, "learning_rate": 9.829867080784792e-06, "loss": 0.604, "step": 9267 }, { "epoch": 0.11080955056851469, "grad_norm": 1.7573000192642212, "learning_rate": 9.829816999766248e-06, "loss": 0.638, "step": 9268 }, { "epoch": 0.11082150671337535, "grad_norm": 1.542655348777771, "learning_rate": 9.829766911505377e-06, "loss": 0.7483, "step": 9269 }, { "epoch": 0.11083346285823599, "grad_norm": 3.9848291873931885, "learning_rate": 9.829716816002253e-06, "loss": 0.5985, "step": 9270 }, { "epoch": 0.11084541900309663, "grad_norm": 5.464112281799316, "learning_rate": 9.829666713256952e-06, "loss": 0.5283, "step": 9271 }, { "epoch": 0.1108573751479573, "grad_norm": 5.839920520782471, "learning_rate": 9.829616603269549e-06, "loss": 0.5752, "step": 9272 }, { "epoch": 0.11086933129281794, "grad_norm": 2.6496310234069824, "learning_rate": 9.829566486040118e-06, "loss": 0.7019, "step": 9273 }, { "epoch": 0.1108812874376786, "grad_norm": 3.7161993980407715, "learning_rate": 9.829516361568734e-06, "loss": 0.5217, "step": 9274 }, { "epoch": 0.11089324358253924, "grad_norm": 4.587987899780273, "learning_rate": 9.829466229855473e-06, "loss": 0.6311, "step": 9275 }, { "epoch": 0.1109051997273999, "grad_norm": 2.6314172744750977, "learning_rate": 9.829416090900413e-06, "loss": 0.6467, "step": 9276 }, { "epoch": 0.11091715587226054, "grad_norm": 3.1047396659851074, "learning_rate": 9.829365944703624e-06, "loss": 0.6655, "step": 9277 }, { "epoch": 0.1109291120171212, "grad_norm": 1.7204017639160156, "learning_rate": 9.829315791265184e-06, "loss": 0.6841, "step": 9278 }, { "epoch": 0.11094106816198185, "grad_norm": 2.3723785877227783, "learning_rate": 9.829265630585167e-06, "loss": 0.5784, "step": 9279 }, { "epoch": 0.11095302430684251, "grad_norm": 14.934289932250977, "learning_rate": 9.82921546266365e-06, "loss": 0.6084, "step": 9280 }, { "epoch": 0.11096498045170315, "grad_norm": 4.038763999938965, "learning_rate": 9.829165287500709e-06, "loss": 0.5849, "step": 9281 }, { "epoch": 0.11097693659656381, "grad_norm": 2.854642629623413, "learning_rate": 9.829115105096414e-06, "loss": 0.667, "step": 9282 }, { "epoch": 0.11098889274142446, "grad_norm": 1.4993672370910645, "learning_rate": 9.829064915450846e-06, "loss": 0.6105, "step": 9283 }, { "epoch": 0.1110008488862851, "grad_norm": 2.7549638748168945, "learning_rate": 9.829014718564077e-06, "loss": 0.5274, "step": 9284 }, { "epoch": 0.11101280503114576, "grad_norm": 3.966512441635132, "learning_rate": 9.828964514436184e-06, "loss": 0.7304, "step": 9285 }, { "epoch": 0.1110247611760064, "grad_norm": 2.174015760421753, "learning_rate": 9.82891430306724e-06, "loss": 0.6586, "step": 9286 }, { "epoch": 0.11103671732086706, "grad_norm": 3.0645153522491455, "learning_rate": 9.828864084457323e-06, "loss": 0.6005, "step": 9287 }, { "epoch": 0.1110486734657277, "grad_norm": 5.229443550109863, "learning_rate": 9.828813858606508e-06, "loss": 0.5972, "step": 9288 }, { "epoch": 0.11106062961058837, "grad_norm": 18.02303695678711, "learning_rate": 9.828763625514868e-06, "loss": 0.6679, "step": 9289 }, { "epoch": 0.11107258575544901, "grad_norm": 2.0449182987213135, "learning_rate": 9.828713385182482e-06, "loss": 0.6884, "step": 9290 }, { "epoch": 0.11108454190030967, "grad_norm": 3.9975342750549316, "learning_rate": 9.82866313760942e-06, "loss": 0.6349, "step": 9291 }, { "epoch": 0.11109649804517031, "grad_norm": 1.9378528594970703, "learning_rate": 9.828612882795763e-06, "loss": 0.6313, "step": 9292 }, { "epoch": 0.11110845419003097, "grad_norm": 1.9131522178649902, "learning_rate": 9.828562620741582e-06, "loss": 0.6538, "step": 9293 }, { "epoch": 0.11112041033489162, "grad_norm": 2.1868348121643066, "learning_rate": 9.828512351446955e-06, "loss": 0.6693, "step": 9294 }, { "epoch": 0.11113236647975226, "grad_norm": 2.34153151512146, "learning_rate": 9.828462074911958e-06, "loss": 0.5768, "step": 9295 }, { "epoch": 0.11114432262461292, "grad_norm": 25.19501304626465, "learning_rate": 9.828411791136663e-06, "loss": 0.6526, "step": 9296 }, { "epoch": 0.11115627876947357, "grad_norm": 1.8602908849716187, "learning_rate": 9.828361500121148e-06, "loss": 0.6232, "step": 9297 }, { "epoch": 0.11116823491433422, "grad_norm": 2.130861282348633, "learning_rate": 9.828311201865487e-06, "loss": 0.5703, "step": 9298 }, { "epoch": 0.11118019105919487, "grad_norm": 2.6595542430877686, "learning_rate": 9.828260896369756e-06, "loss": 0.5883, "step": 9299 }, { "epoch": 0.11119214720405553, "grad_norm": 2.9225940704345703, "learning_rate": 9.828210583634033e-06, "loss": 0.5989, "step": 9300 }, { "epoch": 0.11120410334891617, "grad_norm": 2.1760189533233643, "learning_rate": 9.828160263658389e-06, "loss": 0.6887, "step": 9301 }, { "epoch": 0.11121605949377683, "grad_norm": 1.9331490993499756, "learning_rate": 9.828109936442902e-06, "loss": 0.5887, "step": 9302 }, { "epoch": 0.11122801563863748, "grad_norm": 4.6688737869262695, "learning_rate": 9.828059601987647e-06, "loss": 0.5923, "step": 9303 }, { "epoch": 0.11123997178349813, "grad_norm": 1.6026700735092163, "learning_rate": 9.828009260292699e-06, "loss": 0.7008, "step": 9304 }, { "epoch": 0.11125192792835878, "grad_norm": 2.041062116622925, "learning_rate": 9.827958911358135e-06, "loss": 0.6207, "step": 9305 }, { "epoch": 0.11126388407321944, "grad_norm": 4.186577796936035, "learning_rate": 9.82790855518403e-06, "loss": 0.6085, "step": 9306 }, { "epoch": 0.11127584021808008, "grad_norm": 1.8515125513076782, "learning_rate": 9.827858191770456e-06, "loss": 0.6843, "step": 9307 }, { "epoch": 0.11128779636294073, "grad_norm": 4.7046217918396, "learning_rate": 9.827807821117495e-06, "loss": 0.6246, "step": 9308 }, { "epoch": 0.11129975250780139, "grad_norm": 1.6653013229370117, "learning_rate": 9.827757443225216e-06, "loss": 0.6569, "step": 9309 }, { "epoch": 0.11131170865266203, "grad_norm": 2.8669228553771973, "learning_rate": 9.827707058093699e-06, "loss": 0.5787, "step": 9310 }, { "epoch": 0.11132366479752269, "grad_norm": 2.91414213180542, "learning_rate": 9.827656665723018e-06, "loss": 0.615, "step": 9311 }, { "epoch": 0.11133562094238333, "grad_norm": 2.9041364192962646, "learning_rate": 9.82760626611325e-06, "loss": 0.5853, "step": 9312 }, { "epoch": 0.11134757708724399, "grad_norm": 4.209662914276123, "learning_rate": 9.827555859264466e-06, "loss": 0.5903, "step": 9313 }, { "epoch": 0.11135953323210464, "grad_norm": 2.0700454711914062, "learning_rate": 9.827505445176748e-06, "loss": 0.5664, "step": 9314 }, { "epoch": 0.1113714893769653, "grad_norm": 1.6764353513717651, "learning_rate": 9.827455023850168e-06, "loss": 0.6716, "step": 9315 }, { "epoch": 0.11138344552182594, "grad_norm": 4.203566551208496, "learning_rate": 9.8274045952848e-06, "loss": 0.7086, "step": 9316 }, { "epoch": 0.1113954016666866, "grad_norm": 3.8113420009613037, "learning_rate": 9.827354159480723e-06, "loss": 0.7104, "step": 9317 }, { "epoch": 0.11140735781154724, "grad_norm": 1.7465996742248535, "learning_rate": 9.82730371643801e-06, "loss": 0.6293, "step": 9318 }, { "epoch": 0.11141931395640789, "grad_norm": 3.001103639602661, "learning_rate": 9.82725326615674e-06, "loss": 0.6692, "step": 9319 }, { "epoch": 0.11143127010126855, "grad_norm": 3.230804204940796, "learning_rate": 9.827202808636986e-06, "loss": 0.6277, "step": 9320 }, { "epoch": 0.11144322624612919, "grad_norm": 8.866036415100098, "learning_rate": 9.827152343878822e-06, "loss": 0.6203, "step": 9321 }, { "epoch": 0.11145518239098985, "grad_norm": 3.072680950164795, "learning_rate": 9.82710187188233e-06, "loss": 0.6592, "step": 9322 }, { "epoch": 0.1114671385358505, "grad_norm": 2.0895678997039795, "learning_rate": 9.827051392647578e-06, "loss": 0.7242, "step": 9323 }, { "epoch": 0.11147909468071115, "grad_norm": 4.461756229400635, "learning_rate": 9.827000906174647e-06, "loss": 0.6207, "step": 9324 }, { "epoch": 0.1114910508255718, "grad_norm": 34.16529083251953, "learning_rate": 9.826950412463611e-06, "loss": 0.6069, "step": 9325 }, { "epoch": 0.11150300697043246, "grad_norm": 2.5416295528411865, "learning_rate": 9.826899911514545e-06, "loss": 0.6095, "step": 9326 }, { "epoch": 0.1115149631152931, "grad_norm": 2.5738983154296875, "learning_rate": 9.826849403327526e-06, "loss": 0.6956, "step": 9327 }, { "epoch": 0.11152691926015376, "grad_norm": 3.09794020652771, "learning_rate": 9.82679888790263e-06, "loss": 0.5534, "step": 9328 }, { "epoch": 0.1115388754050144, "grad_norm": 1.2754162549972534, "learning_rate": 9.826748365239929e-06, "loss": 0.6277, "step": 9329 }, { "epoch": 0.11155083154987507, "grad_norm": 1.7749812602996826, "learning_rate": 9.826697835339505e-06, "loss": 0.6326, "step": 9330 }, { "epoch": 0.11156278769473571, "grad_norm": 1.7992390394210815, "learning_rate": 9.826647298201429e-06, "loss": 0.5982, "step": 9331 }, { "epoch": 0.11157474383959635, "grad_norm": 3.6616954803466797, "learning_rate": 9.826596753825776e-06, "loss": 0.6261, "step": 9332 }, { "epoch": 0.11158669998445701, "grad_norm": 2.242746591567993, "learning_rate": 9.826546202212629e-06, "loss": 0.6252, "step": 9333 }, { "epoch": 0.11159865612931766, "grad_norm": 10.502673149108887, "learning_rate": 9.826495643362055e-06, "loss": 0.6777, "step": 9334 }, { "epoch": 0.11161061227417832, "grad_norm": 2.965108633041382, "learning_rate": 9.826445077274134e-06, "loss": 0.7061, "step": 9335 }, { "epoch": 0.11162256841903896, "grad_norm": 2.2845540046691895, "learning_rate": 9.826394503948942e-06, "loss": 0.6311, "step": 9336 }, { "epoch": 0.11163452456389962, "grad_norm": 1.6468456983566284, "learning_rate": 9.826343923386554e-06, "loss": 0.6127, "step": 9337 }, { "epoch": 0.11164648070876027, "grad_norm": 9.824012756347656, "learning_rate": 9.826293335587048e-06, "loss": 0.6824, "step": 9338 }, { "epoch": 0.11165843685362092, "grad_norm": 4.290395736694336, "learning_rate": 9.826242740550495e-06, "loss": 0.5295, "step": 9339 }, { "epoch": 0.11167039299848157, "grad_norm": 2.0684878826141357, "learning_rate": 9.826192138276976e-06, "loss": 0.7225, "step": 9340 }, { "epoch": 0.11168234914334223, "grad_norm": 2.569200038909912, "learning_rate": 9.826141528766565e-06, "loss": 0.5547, "step": 9341 }, { "epoch": 0.11169430528820287, "grad_norm": 2.3653571605682373, "learning_rate": 9.826090912019334e-06, "loss": 0.6781, "step": 9342 }, { "epoch": 0.11170626143306352, "grad_norm": 2.388152599334717, "learning_rate": 9.826040288035366e-06, "loss": 0.7013, "step": 9343 }, { "epoch": 0.11171821757792418, "grad_norm": 6.200438499450684, "learning_rate": 9.825989656814733e-06, "loss": 0.6172, "step": 9344 }, { "epoch": 0.11173017372278482, "grad_norm": 4.071606159210205, "learning_rate": 9.825939018357512e-06, "loss": 0.5294, "step": 9345 }, { "epoch": 0.11174212986764548, "grad_norm": 1.656996488571167, "learning_rate": 9.825888372663777e-06, "loss": 0.6233, "step": 9346 }, { "epoch": 0.11175408601250612, "grad_norm": 2.083486318588257, "learning_rate": 9.825837719733604e-06, "loss": 0.6283, "step": 9347 }, { "epoch": 0.11176604215736678, "grad_norm": 1.722527027130127, "learning_rate": 9.825787059567073e-06, "loss": 0.6241, "step": 9348 }, { "epoch": 0.11177799830222743, "grad_norm": 6.132310390472412, "learning_rate": 9.825736392164255e-06, "loss": 0.5306, "step": 9349 }, { "epoch": 0.11178995444708809, "grad_norm": 2.2046258449554443, "learning_rate": 9.825685717525229e-06, "loss": 0.6308, "step": 9350 }, { "epoch": 0.11180191059194873, "grad_norm": 2.2033369541168213, "learning_rate": 9.82563503565007e-06, "loss": 0.5356, "step": 9351 }, { "epoch": 0.11181386673680939, "grad_norm": 6.540464401245117, "learning_rate": 9.825584346538854e-06, "loss": 0.6631, "step": 9352 }, { "epoch": 0.11182582288167003, "grad_norm": 6.007403373718262, "learning_rate": 9.825533650191658e-06, "loss": 0.7224, "step": 9353 }, { "epoch": 0.11183777902653068, "grad_norm": 3.9960715770721436, "learning_rate": 9.825482946608555e-06, "loss": 0.6106, "step": 9354 }, { "epoch": 0.11184973517139134, "grad_norm": 9.596895217895508, "learning_rate": 9.825432235789625e-06, "loss": 0.6208, "step": 9355 }, { "epoch": 0.11186169131625198, "grad_norm": 1.9804179668426514, "learning_rate": 9.825381517734942e-06, "loss": 0.5484, "step": 9356 }, { "epoch": 0.11187364746111264, "grad_norm": 9.0076322555542, "learning_rate": 9.825330792444584e-06, "loss": 0.6187, "step": 9357 }, { "epoch": 0.11188560360597329, "grad_norm": 4.274143695831299, "learning_rate": 9.825280059918622e-06, "loss": 0.6419, "step": 9358 }, { "epoch": 0.11189755975083394, "grad_norm": 3.815136194229126, "learning_rate": 9.825229320157137e-06, "loss": 0.67, "step": 9359 }, { "epoch": 0.11190951589569459, "grad_norm": 2.32119083404541, "learning_rate": 9.825178573160204e-06, "loss": 0.7123, "step": 9360 }, { "epoch": 0.11192147204055525, "grad_norm": 1.8991020917892456, "learning_rate": 9.825127818927898e-06, "loss": 0.5498, "step": 9361 }, { "epoch": 0.11193342818541589, "grad_norm": 1.4937591552734375, "learning_rate": 9.825077057460295e-06, "loss": 0.5863, "step": 9362 }, { "epoch": 0.11194538433027655, "grad_norm": 2.587038040161133, "learning_rate": 9.825026288757472e-06, "loss": 0.5681, "step": 9363 }, { "epoch": 0.1119573404751372, "grad_norm": 2.6420257091522217, "learning_rate": 9.824975512819506e-06, "loss": 0.6311, "step": 9364 }, { "epoch": 0.11196929661999785, "grad_norm": 2.28315806388855, "learning_rate": 9.82492472964647e-06, "loss": 0.6309, "step": 9365 }, { "epoch": 0.1119812527648585, "grad_norm": 1.6686736345291138, "learning_rate": 9.824873939238444e-06, "loss": 0.6023, "step": 9366 }, { "epoch": 0.11199320890971914, "grad_norm": 2.116636276245117, "learning_rate": 9.824823141595502e-06, "loss": 0.5126, "step": 9367 }, { "epoch": 0.1120051650545798, "grad_norm": 1.62738037109375, "learning_rate": 9.82477233671772e-06, "loss": 0.5923, "step": 9368 }, { "epoch": 0.11201712119944045, "grad_norm": 2.9639084339141846, "learning_rate": 9.824721524605177e-06, "loss": 0.6531, "step": 9369 }, { "epoch": 0.1120290773443011, "grad_norm": 6.6663055419921875, "learning_rate": 9.824670705257944e-06, "loss": 0.596, "step": 9370 }, { "epoch": 0.11204103348916175, "grad_norm": 1.9359617233276367, "learning_rate": 9.8246198786761e-06, "loss": 0.6716, "step": 9371 }, { "epoch": 0.11205298963402241, "grad_norm": 3.7210233211517334, "learning_rate": 9.824569044859725e-06, "loss": 0.6504, "step": 9372 }, { "epoch": 0.11206494577888305, "grad_norm": 5.97432804107666, "learning_rate": 9.824518203808888e-06, "loss": 0.5469, "step": 9373 }, { "epoch": 0.11207690192374371, "grad_norm": 6.413021087646484, "learning_rate": 9.82446735552367e-06, "loss": 0.6649, "step": 9374 }, { "epoch": 0.11208885806860436, "grad_norm": 3.5506012439727783, "learning_rate": 9.824416500004146e-06, "loss": 0.6137, "step": 9375 }, { "epoch": 0.11210081421346502, "grad_norm": 2.3584165573120117, "learning_rate": 9.824365637250392e-06, "loss": 0.5727, "step": 9376 }, { "epoch": 0.11211277035832566, "grad_norm": 6.957425594329834, "learning_rate": 9.824314767262484e-06, "loss": 0.626, "step": 9377 }, { "epoch": 0.1121247265031863, "grad_norm": 3.602938175201416, "learning_rate": 9.8242638900405e-06, "loss": 0.6542, "step": 9378 }, { "epoch": 0.11213668264804696, "grad_norm": 2.6245436668395996, "learning_rate": 9.824213005584516e-06, "loss": 0.558, "step": 9379 }, { "epoch": 0.11214863879290761, "grad_norm": 2.0918996334075928, "learning_rate": 9.824162113894606e-06, "loss": 0.7505, "step": 9380 }, { "epoch": 0.11216059493776827, "grad_norm": 2.326249837875366, "learning_rate": 9.824111214970848e-06, "loss": 0.7272, "step": 9381 }, { "epoch": 0.11217255108262891, "grad_norm": 1.6996806859970093, "learning_rate": 9.824060308813319e-06, "loss": 0.7144, "step": 9382 }, { "epoch": 0.11218450722748957, "grad_norm": 2.6038408279418945, "learning_rate": 9.824009395422094e-06, "loss": 0.5768, "step": 9383 }, { "epoch": 0.11219646337235022, "grad_norm": 2.076228618621826, "learning_rate": 9.823958474797249e-06, "loss": 0.6033, "step": 9384 }, { "epoch": 0.11220841951721088, "grad_norm": 2.1689417362213135, "learning_rate": 9.82390754693886e-06, "loss": 0.6597, "step": 9385 }, { "epoch": 0.11222037566207152, "grad_norm": 2.403465509414673, "learning_rate": 9.823856611847006e-06, "loss": 0.6303, "step": 9386 }, { "epoch": 0.11223233180693218, "grad_norm": 2.8713035583496094, "learning_rate": 9.823805669521763e-06, "loss": 0.5991, "step": 9387 }, { "epoch": 0.11224428795179282, "grad_norm": 2.010197639465332, "learning_rate": 9.823754719963206e-06, "loss": 0.6375, "step": 9388 }, { "epoch": 0.11225624409665348, "grad_norm": 2.4913718700408936, "learning_rate": 9.82370376317141e-06, "loss": 0.6567, "step": 9389 }, { "epoch": 0.11226820024151413, "grad_norm": 2.6205990314483643, "learning_rate": 9.823652799146454e-06, "loss": 0.6028, "step": 9390 }, { "epoch": 0.11228015638637477, "grad_norm": 1.9452813863754272, "learning_rate": 9.823601827888413e-06, "loss": 0.6066, "step": 9391 }, { "epoch": 0.11229211253123543, "grad_norm": 11.17236614227295, "learning_rate": 9.823550849397365e-06, "loss": 0.6303, "step": 9392 }, { "epoch": 0.11230406867609607, "grad_norm": 5.632646560668945, "learning_rate": 9.823499863673385e-06, "loss": 0.5975, "step": 9393 }, { "epoch": 0.11231602482095673, "grad_norm": 9.778059005737305, "learning_rate": 9.823448870716549e-06, "loss": 0.6546, "step": 9394 }, { "epoch": 0.11232798096581738, "grad_norm": 3.302389621734619, "learning_rate": 9.823397870526934e-06, "loss": 0.6781, "step": 9395 }, { "epoch": 0.11233993711067804, "grad_norm": 1.8038979768753052, "learning_rate": 9.823346863104617e-06, "loss": 0.5906, "step": 9396 }, { "epoch": 0.11235189325553868, "grad_norm": 2.369513750076294, "learning_rate": 9.823295848449675e-06, "loss": 0.5776, "step": 9397 }, { "epoch": 0.11236384940039934, "grad_norm": 13.993199348449707, "learning_rate": 9.823244826562183e-06, "loss": 0.6255, "step": 9398 }, { "epoch": 0.11237580554525999, "grad_norm": 2.03407883644104, "learning_rate": 9.82319379744222e-06, "loss": 0.5831, "step": 9399 }, { "epoch": 0.11238776169012064, "grad_norm": 2.275047540664673, "learning_rate": 9.82314276108986e-06, "loss": 0.639, "step": 9400 }, { "epoch": 0.11239971783498129, "grad_norm": 6.220311641693115, "learning_rate": 9.823091717505178e-06, "loss": 0.5842, "step": 9401 }, { "epoch": 0.11241167397984193, "grad_norm": 2.127753734588623, "learning_rate": 9.823040666688255e-06, "loss": 0.7207, "step": 9402 }, { "epoch": 0.11242363012470259, "grad_norm": 3.7544827461242676, "learning_rate": 9.822989608639164e-06, "loss": 0.7071, "step": 9403 }, { "epoch": 0.11243558626956324, "grad_norm": 2.4892568588256836, "learning_rate": 9.822938543357984e-06, "loss": 0.5931, "step": 9404 }, { "epoch": 0.1124475424144239, "grad_norm": 1.923351764678955, "learning_rate": 9.82288747084479e-06, "loss": 0.5993, "step": 9405 }, { "epoch": 0.11245949855928454, "grad_norm": 2.927292585372925, "learning_rate": 9.82283639109966e-06, "loss": 0.4763, "step": 9406 }, { "epoch": 0.1124714547041452, "grad_norm": 7.528247356414795, "learning_rate": 9.82278530412267e-06, "loss": 0.5792, "step": 9407 }, { "epoch": 0.11248341084900584, "grad_norm": 2.507443904876709, "learning_rate": 9.822734209913895e-06, "loss": 0.5708, "step": 9408 }, { "epoch": 0.1124953669938665, "grad_norm": 2.71543025970459, "learning_rate": 9.822683108473413e-06, "loss": 0.6892, "step": 9409 }, { "epoch": 0.11250732313872715, "grad_norm": 6.79202938079834, "learning_rate": 9.8226319998013e-06, "loss": 0.6582, "step": 9410 }, { "epoch": 0.1125192792835878, "grad_norm": 2.401536703109741, "learning_rate": 9.822580883897634e-06, "loss": 0.6325, "step": 9411 }, { "epoch": 0.11253123542844845, "grad_norm": 2.3678438663482666, "learning_rate": 9.822529760762491e-06, "loss": 0.5507, "step": 9412 }, { "epoch": 0.1125431915733091, "grad_norm": 2.8614964485168457, "learning_rate": 9.822478630395949e-06, "loss": 0.4779, "step": 9413 }, { "epoch": 0.11255514771816975, "grad_norm": 3.251365900039673, "learning_rate": 9.822427492798082e-06, "loss": 0.6251, "step": 9414 }, { "epoch": 0.1125671038630304, "grad_norm": 2.2042083740234375, "learning_rate": 9.822376347968967e-06, "loss": 0.608, "step": 9415 }, { "epoch": 0.11257906000789106, "grad_norm": 1.9262484312057495, "learning_rate": 9.82232519590868e-06, "loss": 0.6428, "step": 9416 }, { "epoch": 0.1125910161527517, "grad_norm": 2.243837833404541, "learning_rate": 9.822274036617302e-06, "loss": 0.6171, "step": 9417 }, { "epoch": 0.11260297229761236, "grad_norm": 2.7522239685058594, "learning_rate": 9.822222870094906e-06, "loss": 0.5959, "step": 9418 }, { "epoch": 0.112614928442473, "grad_norm": 2.539592742919922, "learning_rate": 9.822171696341569e-06, "loss": 0.6468, "step": 9419 }, { "epoch": 0.11262688458733366, "grad_norm": 2.3451595306396484, "learning_rate": 9.82212051535737e-06, "loss": 0.5443, "step": 9420 }, { "epoch": 0.11263884073219431, "grad_norm": 2.4338126182556152, "learning_rate": 9.822069327142383e-06, "loss": 0.5352, "step": 9421 }, { "epoch": 0.11265079687705497, "grad_norm": 2.2972326278686523, "learning_rate": 9.822018131696687e-06, "loss": 0.62, "step": 9422 }, { "epoch": 0.11266275302191561, "grad_norm": 1.8943514823913574, "learning_rate": 9.821966929020355e-06, "loss": 0.5563, "step": 9423 }, { "epoch": 0.11267470916677627, "grad_norm": 2.3463470935821533, "learning_rate": 9.821915719113468e-06, "loss": 0.6555, "step": 9424 }, { "epoch": 0.11268666531163692, "grad_norm": 8.756004333496094, "learning_rate": 9.821864501976102e-06, "loss": 0.6655, "step": 9425 }, { "epoch": 0.11269862145649756, "grad_norm": 2.280484199523926, "learning_rate": 9.821813277608333e-06, "loss": 0.6049, "step": 9426 }, { "epoch": 0.11271057760135822, "grad_norm": 3.4680898189544678, "learning_rate": 9.821762046010238e-06, "loss": 0.5076, "step": 9427 }, { "epoch": 0.11272253374621886, "grad_norm": 2.295928716659546, "learning_rate": 9.821710807181892e-06, "loss": 0.5745, "step": 9428 }, { "epoch": 0.11273448989107952, "grad_norm": 1.646726369857788, "learning_rate": 9.821659561123375e-06, "loss": 0.6431, "step": 9429 }, { "epoch": 0.11274644603594017, "grad_norm": 2.9679298400878906, "learning_rate": 9.821608307834763e-06, "loss": 0.6075, "step": 9430 }, { "epoch": 0.11275840218080083, "grad_norm": 2.364980459213257, "learning_rate": 9.821557047316131e-06, "loss": 0.6294, "step": 9431 }, { "epoch": 0.11277035832566147, "grad_norm": 2.198289155960083, "learning_rate": 9.821505779567557e-06, "loss": 0.6859, "step": 9432 }, { "epoch": 0.11278231447052213, "grad_norm": 2.322521209716797, "learning_rate": 9.821454504589117e-06, "loss": 0.6451, "step": 9433 }, { "epoch": 0.11279427061538277, "grad_norm": 1.5026675462722778, "learning_rate": 9.82140322238089e-06, "loss": 0.6789, "step": 9434 }, { "epoch": 0.11280622676024343, "grad_norm": 3.4405181407928467, "learning_rate": 9.821351932942952e-06, "loss": 0.6356, "step": 9435 }, { "epoch": 0.11281818290510408, "grad_norm": 2.538323402404785, "learning_rate": 9.821300636275379e-06, "loss": 0.6366, "step": 9436 }, { "epoch": 0.11283013904996472, "grad_norm": 10.77305793762207, "learning_rate": 9.821249332378248e-06, "loss": 0.5645, "step": 9437 }, { "epoch": 0.11284209519482538, "grad_norm": 1.596797227859497, "learning_rate": 9.82119802125164e-06, "loss": 0.5695, "step": 9438 }, { "epoch": 0.11285405133968603, "grad_norm": 1.455289363861084, "learning_rate": 9.821146702895625e-06, "loss": 0.5682, "step": 9439 }, { "epoch": 0.11286600748454668, "grad_norm": 6.787753105163574, "learning_rate": 9.821095377310284e-06, "loss": 0.6242, "step": 9440 }, { "epoch": 0.11287796362940733, "grad_norm": 3.582412004470825, "learning_rate": 9.821044044495693e-06, "loss": 0.6897, "step": 9441 }, { "epoch": 0.11288991977426799, "grad_norm": 3.326049327850342, "learning_rate": 9.82099270445193e-06, "loss": 0.6787, "step": 9442 }, { "epoch": 0.11290187591912863, "grad_norm": 4.704698085784912, "learning_rate": 9.82094135717907e-06, "loss": 0.6359, "step": 9443 }, { "epoch": 0.11291383206398929, "grad_norm": 19.22209930419922, "learning_rate": 9.820890002677194e-06, "loss": 0.6972, "step": 9444 }, { "epoch": 0.11292578820884994, "grad_norm": 3.8277134895324707, "learning_rate": 9.820838640946373e-06, "loss": 0.6899, "step": 9445 }, { "epoch": 0.1129377443537106, "grad_norm": 3.2548768520355225, "learning_rate": 9.820787271986691e-06, "loss": 0.6582, "step": 9446 }, { "epoch": 0.11294970049857124, "grad_norm": 2.881484031677246, "learning_rate": 9.82073589579822e-06, "loss": 0.6601, "step": 9447 }, { "epoch": 0.1129616566434319, "grad_norm": 1.5936931371688843, "learning_rate": 9.820684512381036e-06, "loss": 0.6436, "step": 9448 }, { "epoch": 0.11297361278829254, "grad_norm": 2.2573156356811523, "learning_rate": 9.820633121735222e-06, "loss": 0.6283, "step": 9449 }, { "epoch": 0.11298556893315319, "grad_norm": 2.572235345840454, "learning_rate": 9.820581723860848e-06, "loss": 0.5628, "step": 9450 }, { "epoch": 0.11299752507801385, "grad_norm": 1.4644107818603516, "learning_rate": 9.820530318757997e-06, "loss": 0.5134, "step": 9451 }, { "epoch": 0.11300948122287449, "grad_norm": 3.7236814498901367, "learning_rate": 9.820478906426742e-06, "loss": 0.6013, "step": 9452 }, { "epoch": 0.11302143736773515, "grad_norm": 4.2217936515808105, "learning_rate": 9.820427486867163e-06, "loss": 0.7151, "step": 9453 }, { "epoch": 0.1130333935125958, "grad_norm": 2.691728353500366, "learning_rate": 9.820376060079337e-06, "loss": 0.6404, "step": 9454 }, { "epoch": 0.11304534965745645, "grad_norm": 2.261425495147705, "learning_rate": 9.820324626063338e-06, "loss": 0.6509, "step": 9455 }, { "epoch": 0.1130573058023171, "grad_norm": 1.4431560039520264, "learning_rate": 9.820273184819244e-06, "loss": 0.6047, "step": 9456 }, { "epoch": 0.11306926194717776, "grad_norm": 3.3854119777679443, "learning_rate": 9.820221736347136e-06, "loss": 0.6016, "step": 9457 }, { "epoch": 0.1130812180920384, "grad_norm": 3.0847554206848145, "learning_rate": 9.820170280647088e-06, "loss": 0.6327, "step": 9458 }, { "epoch": 0.11309317423689906, "grad_norm": 4.152366638183594, "learning_rate": 9.820118817719175e-06, "loss": 0.6502, "step": 9459 }, { "epoch": 0.1131051303817597, "grad_norm": 4.934304714202881, "learning_rate": 9.82006734756348e-06, "loss": 0.6278, "step": 9460 }, { "epoch": 0.11311708652662035, "grad_norm": 59.139347076416016, "learning_rate": 9.820015870180076e-06, "loss": 0.5679, "step": 9461 }, { "epoch": 0.11312904267148101, "grad_norm": 2.858109474182129, "learning_rate": 9.819964385569039e-06, "loss": 0.6071, "step": 9462 }, { "epoch": 0.11314099881634165, "grad_norm": 1.5240015983581543, "learning_rate": 9.81991289373045e-06, "loss": 0.6211, "step": 9463 }, { "epoch": 0.11315295496120231, "grad_norm": 3.145592451095581, "learning_rate": 9.819861394664384e-06, "loss": 0.5299, "step": 9464 }, { "epoch": 0.11316491110606296, "grad_norm": 3.071621894836426, "learning_rate": 9.81980988837092e-06, "loss": 0.5811, "step": 9465 }, { "epoch": 0.11317686725092362, "grad_norm": 1.7718724012374878, "learning_rate": 9.819758374850132e-06, "loss": 0.6005, "step": 9466 }, { "epoch": 0.11318882339578426, "grad_norm": 2.411310911178589, "learning_rate": 9.8197068541021e-06, "loss": 0.5828, "step": 9467 }, { "epoch": 0.11320077954064492, "grad_norm": 2.878385543823242, "learning_rate": 9.819655326126901e-06, "loss": 0.6279, "step": 9468 }, { "epoch": 0.11321273568550556, "grad_norm": 2.9054341316223145, "learning_rate": 9.81960379092461e-06, "loss": 0.6505, "step": 9469 }, { "epoch": 0.11322469183036622, "grad_norm": 1.9299581050872803, "learning_rate": 9.819552248495308e-06, "loss": 0.7605, "step": 9470 }, { "epoch": 0.11323664797522687, "grad_norm": 2.3053882122039795, "learning_rate": 9.819500698839068e-06, "loss": 0.7023, "step": 9471 }, { "epoch": 0.11324860412008751, "grad_norm": 9.337142944335938, "learning_rate": 9.819449141955973e-06, "loss": 0.6269, "step": 9472 }, { "epoch": 0.11326056026494817, "grad_norm": 3.290987253189087, "learning_rate": 9.819397577846093e-06, "loss": 0.6014, "step": 9473 }, { "epoch": 0.11327251640980882, "grad_norm": 4.8813395500183105, "learning_rate": 9.819346006509511e-06, "loss": 0.5915, "step": 9474 }, { "epoch": 0.11328447255466947, "grad_norm": 1.8937662839889526, "learning_rate": 9.819294427946303e-06, "loss": 0.5326, "step": 9475 }, { "epoch": 0.11329642869953012, "grad_norm": 2.3839011192321777, "learning_rate": 9.819242842156545e-06, "loss": 0.6236, "step": 9476 }, { "epoch": 0.11330838484439078, "grad_norm": 1.8838108777999878, "learning_rate": 9.819191249140316e-06, "loss": 0.6146, "step": 9477 }, { "epoch": 0.11332034098925142, "grad_norm": 7.428898334503174, "learning_rate": 9.819139648897693e-06, "loss": 0.6579, "step": 9478 }, { "epoch": 0.11333229713411208, "grad_norm": 8.184623718261719, "learning_rate": 9.819088041428752e-06, "loss": 0.6784, "step": 9479 }, { "epoch": 0.11334425327897273, "grad_norm": 2.3809940814971924, "learning_rate": 9.819036426733571e-06, "loss": 0.6698, "step": 9480 }, { "epoch": 0.11335620942383338, "grad_norm": 1.727851390838623, "learning_rate": 9.818984804812229e-06, "loss": 0.5504, "step": 9481 }, { "epoch": 0.11336816556869403, "grad_norm": 7.191313743591309, "learning_rate": 9.818933175664801e-06, "loss": 0.6584, "step": 9482 }, { "epoch": 0.11338012171355469, "grad_norm": 2.3479013442993164, "learning_rate": 9.818881539291366e-06, "loss": 0.5224, "step": 9483 }, { "epoch": 0.11339207785841533, "grad_norm": 2.7560431957244873, "learning_rate": 9.818829895692001e-06, "loss": 0.6117, "step": 9484 }, { "epoch": 0.11340403400327598, "grad_norm": 4.996461868286133, "learning_rate": 9.818778244866784e-06, "loss": 0.5745, "step": 9485 }, { "epoch": 0.11341599014813664, "grad_norm": 2.400073528289795, "learning_rate": 9.818726586815792e-06, "loss": 0.6044, "step": 9486 }, { "epoch": 0.11342794629299728, "grad_norm": 2.9418725967407227, "learning_rate": 9.8186749215391e-06, "loss": 0.6772, "step": 9487 }, { "epoch": 0.11343990243785794, "grad_norm": 1.9731651544570923, "learning_rate": 9.818623249036791e-06, "loss": 0.7368, "step": 9488 }, { "epoch": 0.11345185858271858, "grad_norm": 2.0653529167175293, "learning_rate": 9.818571569308937e-06, "loss": 0.6448, "step": 9489 }, { "epoch": 0.11346381472757924, "grad_norm": 5.090723037719727, "learning_rate": 9.81851988235562e-06, "loss": 0.7066, "step": 9490 }, { "epoch": 0.11347577087243989, "grad_norm": 1.7860585451126099, "learning_rate": 9.818468188176912e-06, "loss": 0.6212, "step": 9491 }, { "epoch": 0.11348772701730055, "grad_norm": 1.8974026441574097, "learning_rate": 9.818416486772897e-06, "loss": 0.6409, "step": 9492 }, { "epoch": 0.11349968316216119, "grad_norm": 2.1161584854125977, "learning_rate": 9.818364778143648e-06, "loss": 0.6137, "step": 9493 }, { "epoch": 0.11351163930702185, "grad_norm": 3.161573886871338, "learning_rate": 9.818313062289244e-06, "loss": 0.5854, "step": 9494 }, { "epoch": 0.1135235954518825, "grad_norm": 1.5380768775939941, "learning_rate": 9.818261339209762e-06, "loss": 0.5367, "step": 9495 }, { "epoch": 0.11353555159674314, "grad_norm": 3.094346046447754, "learning_rate": 9.818209608905282e-06, "loss": 0.6906, "step": 9496 }, { "epoch": 0.1135475077416038, "grad_norm": 3.043971061706543, "learning_rate": 9.818157871375877e-06, "loss": 0.5411, "step": 9497 }, { "epoch": 0.11355946388646444, "grad_norm": 2.7702102661132812, "learning_rate": 9.818106126621629e-06, "loss": 0.6226, "step": 9498 }, { "epoch": 0.1135714200313251, "grad_norm": 1.9989361763000488, "learning_rate": 9.818054374642613e-06, "loss": 0.7271, "step": 9499 }, { "epoch": 0.11358337617618575, "grad_norm": 7.30267333984375, "learning_rate": 9.818002615438907e-06, "loss": 0.5753, "step": 9500 }, { "epoch": 0.1135953323210464, "grad_norm": 4.025777816772461, "learning_rate": 9.81795084901059e-06, "loss": 0.6728, "step": 9501 }, { "epoch": 0.11360728846590705, "grad_norm": 3.0600955486297607, "learning_rate": 9.817899075357738e-06, "loss": 0.6318, "step": 9502 }, { "epoch": 0.11361924461076771, "grad_norm": 2.276183605194092, "learning_rate": 9.817847294480428e-06, "loss": 0.6712, "step": 9503 }, { "epoch": 0.11363120075562835, "grad_norm": 3.124354839324951, "learning_rate": 9.81779550637874e-06, "loss": 0.6272, "step": 9504 }, { "epoch": 0.11364315690048901, "grad_norm": 2.825259208679199, "learning_rate": 9.81774371105275e-06, "loss": 0.6318, "step": 9505 }, { "epoch": 0.11365511304534966, "grad_norm": 5.41890287399292, "learning_rate": 9.817691908502536e-06, "loss": 0.7145, "step": 9506 }, { "epoch": 0.11366706919021032, "grad_norm": 2.814549207687378, "learning_rate": 9.817640098728177e-06, "loss": 0.6168, "step": 9507 }, { "epoch": 0.11367902533507096, "grad_norm": 4.054227828979492, "learning_rate": 9.81758828172975e-06, "loss": 0.6434, "step": 9508 }, { "epoch": 0.1136909814799316, "grad_norm": 2.334362268447876, "learning_rate": 9.81753645750733e-06, "loss": 0.6099, "step": 9509 }, { "epoch": 0.11370293762479226, "grad_norm": 1.846480131149292, "learning_rate": 9.817484626060998e-06, "loss": 0.6455, "step": 9510 }, { "epoch": 0.11371489376965291, "grad_norm": 5.194658279418945, "learning_rate": 9.817432787390831e-06, "loss": 0.6524, "step": 9511 }, { "epoch": 0.11372684991451357, "grad_norm": 2.1518969535827637, "learning_rate": 9.817380941496906e-06, "loss": 0.5992, "step": 9512 }, { "epoch": 0.11373880605937421, "grad_norm": 3.085878610610962, "learning_rate": 9.817329088379302e-06, "loss": 0.7425, "step": 9513 }, { "epoch": 0.11375076220423487, "grad_norm": 2.33493709564209, "learning_rate": 9.817277228038095e-06, "loss": 0.5468, "step": 9514 }, { "epoch": 0.11376271834909552, "grad_norm": 2.973081350326538, "learning_rate": 9.817225360473365e-06, "loss": 0.5614, "step": 9515 }, { "epoch": 0.11377467449395617, "grad_norm": 5.400787353515625, "learning_rate": 9.817173485685188e-06, "loss": 0.6073, "step": 9516 }, { "epoch": 0.11378663063881682, "grad_norm": 3.000640869140625, "learning_rate": 9.817121603673641e-06, "loss": 0.6229, "step": 9517 }, { "epoch": 0.11379858678367748, "grad_norm": 2.7221407890319824, "learning_rate": 9.817069714438804e-06, "loss": 0.5929, "step": 9518 }, { "epoch": 0.11381054292853812, "grad_norm": 2.7285983562469482, "learning_rate": 9.817017817980753e-06, "loss": 0.6938, "step": 9519 }, { "epoch": 0.11382249907339877, "grad_norm": 2.704789161682129, "learning_rate": 9.816965914299568e-06, "loss": 0.5906, "step": 9520 }, { "epoch": 0.11383445521825943, "grad_norm": 3.780355453491211, "learning_rate": 9.816914003395324e-06, "loss": 0.5884, "step": 9521 }, { "epoch": 0.11384641136312007, "grad_norm": 3.063164234161377, "learning_rate": 9.816862085268102e-06, "loss": 0.6797, "step": 9522 }, { "epoch": 0.11385836750798073, "grad_norm": 1.9103988409042358, "learning_rate": 9.816810159917977e-06, "loss": 0.7302, "step": 9523 }, { "epoch": 0.11387032365284137, "grad_norm": 3.046642541885376, "learning_rate": 9.816758227345028e-06, "loss": 0.7118, "step": 9524 }, { "epoch": 0.11388227979770203, "grad_norm": 2.5812134742736816, "learning_rate": 9.816706287549334e-06, "loss": 0.6697, "step": 9525 }, { "epoch": 0.11389423594256268, "grad_norm": 2.190225124359131, "learning_rate": 9.81665434053097e-06, "loss": 0.5631, "step": 9526 }, { "epoch": 0.11390619208742334, "grad_norm": 2.2543020248413086, "learning_rate": 9.816602386290017e-06, "loss": 0.588, "step": 9527 }, { "epoch": 0.11391814823228398, "grad_norm": 1.6380695104599, "learning_rate": 9.81655042482655e-06, "loss": 0.6063, "step": 9528 }, { "epoch": 0.11393010437714464, "grad_norm": 2.1930184364318848, "learning_rate": 9.81649845614065e-06, "loss": 0.6476, "step": 9529 }, { "epoch": 0.11394206052200528, "grad_norm": 3.140812873840332, "learning_rate": 9.816446480232393e-06, "loss": 0.559, "step": 9530 }, { "epoch": 0.11395401666686594, "grad_norm": 1.9404700994491577, "learning_rate": 9.81639449710186e-06, "loss": 0.5534, "step": 9531 }, { "epoch": 0.11396597281172659, "grad_norm": 2.6769659519195557, "learning_rate": 9.816342506749122e-06, "loss": 0.6045, "step": 9532 }, { "epoch": 0.11397792895658723, "grad_norm": 1.9118735790252686, "learning_rate": 9.816290509174265e-06, "loss": 0.6696, "step": 9533 }, { "epoch": 0.11398988510144789, "grad_norm": 3.638500452041626, "learning_rate": 9.816238504377361e-06, "loss": 0.5359, "step": 9534 }, { "epoch": 0.11400184124630854, "grad_norm": 2.8236379623413086, "learning_rate": 9.81618649235849e-06, "loss": 0.6439, "step": 9535 }, { "epoch": 0.1140137973911692, "grad_norm": 3.5118777751922607, "learning_rate": 9.81613447311773e-06, "loss": 0.6371, "step": 9536 }, { "epoch": 0.11402575353602984, "grad_norm": 2.0445094108581543, "learning_rate": 9.816082446655162e-06, "loss": 0.5614, "step": 9537 }, { "epoch": 0.1140377096808905, "grad_norm": 1.7991198301315308, "learning_rate": 9.816030412970858e-06, "loss": 0.5836, "step": 9538 }, { "epoch": 0.11404966582575114, "grad_norm": 5.334380626678467, "learning_rate": 9.815978372064901e-06, "loss": 0.552, "step": 9539 }, { "epoch": 0.1140616219706118, "grad_norm": 2.3204946517944336, "learning_rate": 9.815926323937368e-06, "loss": 0.6735, "step": 9540 }, { "epoch": 0.11407357811547245, "grad_norm": 10.369635581970215, "learning_rate": 9.815874268588334e-06, "loss": 0.644, "step": 9541 }, { "epoch": 0.1140855342603331, "grad_norm": 2.0630810260772705, "learning_rate": 9.815822206017881e-06, "loss": 0.5711, "step": 9542 }, { "epoch": 0.11409749040519375, "grad_norm": 4.475592136383057, "learning_rate": 9.815770136226086e-06, "loss": 0.658, "step": 9543 }, { "epoch": 0.1141094465500544, "grad_norm": 2.4035465717315674, "learning_rate": 9.815718059213027e-06, "loss": 0.652, "step": 9544 }, { "epoch": 0.11412140269491505, "grad_norm": 2.7674853801727295, "learning_rate": 9.815665974978779e-06, "loss": 0.6146, "step": 9545 }, { "epoch": 0.1141333588397757, "grad_norm": 47.06651306152344, "learning_rate": 9.815613883523425e-06, "loss": 0.6994, "step": 9546 }, { "epoch": 0.11414531498463636, "grad_norm": 1.9988223314285278, "learning_rate": 9.81556178484704e-06, "loss": 0.6104, "step": 9547 }, { "epoch": 0.114157271129497, "grad_norm": 6.447707176208496, "learning_rate": 9.815509678949704e-06, "loss": 0.4866, "step": 9548 }, { "epoch": 0.11416922727435766, "grad_norm": 2.780337333679199, "learning_rate": 9.815457565831492e-06, "loss": 0.5809, "step": 9549 }, { "epoch": 0.1141811834192183, "grad_norm": 1.7218492031097412, "learning_rate": 9.815405445492486e-06, "loss": 0.6611, "step": 9550 }, { "epoch": 0.11419313956407896, "grad_norm": 2.2483954429626465, "learning_rate": 9.815353317932762e-06, "loss": 0.6064, "step": 9551 }, { "epoch": 0.11420509570893961, "grad_norm": 15.418380737304688, "learning_rate": 9.8153011831524e-06, "loss": 0.7018, "step": 9552 }, { "epoch": 0.11421705185380027, "grad_norm": 3.3328325748443604, "learning_rate": 9.815249041151474e-06, "loss": 0.6512, "step": 9553 }, { "epoch": 0.11422900799866091, "grad_norm": 3.9039461612701416, "learning_rate": 9.815196891930067e-06, "loss": 0.6156, "step": 9554 }, { "epoch": 0.11424096414352156, "grad_norm": 3.5662789344787598, "learning_rate": 9.815144735488253e-06, "loss": 0.5844, "step": 9555 }, { "epoch": 0.11425292028838221, "grad_norm": 2.3934524059295654, "learning_rate": 9.815092571826115e-06, "loss": 0.6559, "step": 9556 }, { "epoch": 0.11426487643324286, "grad_norm": 4.236307144165039, "learning_rate": 9.815040400943728e-06, "loss": 0.6281, "step": 9557 }, { "epoch": 0.11427683257810352, "grad_norm": 2.5001943111419678, "learning_rate": 9.814988222841169e-06, "loss": 0.642, "step": 9558 }, { "epoch": 0.11428878872296416, "grad_norm": 1.6635655164718628, "learning_rate": 9.814936037518518e-06, "loss": 0.6043, "step": 9559 }, { "epoch": 0.11430074486782482, "grad_norm": 2.7030093669891357, "learning_rate": 9.814883844975855e-06, "loss": 0.5545, "step": 9560 }, { "epoch": 0.11431270101268547, "grad_norm": 2.345456838607788, "learning_rate": 9.814831645213256e-06, "loss": 0.5744, "step": 9561 }, { "epoch": 0.11432465715754613, "grad_norm": 2.4301671981811523, "learning_rate": 9.8147794382308e-06, "loss": 0.6367, "step": 9562 }, { "epoch": 0.11433661330240677, "grad_norm": 4.592338562011719, "learning_rate": 9.814727224028566e-06, "loss": 0.6542, "step": 9563 }, { "epoch": 0.11434856944726743, "grad_norm": 3.692448854446411, "learning_rate": 9.814675002606631e-06, "loss": 0.6846, "step": 9564 }, { "epoch": 0.11436052559212807, "grad_norm": 2.2592005729675293, "learning_rate": 9.814622773965072e-06, "loss": 0.6134, "step": 9565 }, { "epoch": 0.11437248173698873, "grad_norm": 1.9874123334884644, "learning_rate": 9.81457053810397e-06, "loss": 0.7104, "step": 9566 }, { "epoch": 0.11438443788184938, "grad_norm": 1.9497101306915283, "learning_rate": 9.814518295023403e-06, "loss": 0.6111, "step": 9567 }, { "epoch": 0.11439639402671002, "grad_norm": 2.51501727104187, "learning_rate": 9.814466044723447e-06, "loss": 0.624, "step": 9568 }, { "epoch": 0.11440835017157068, "grad_norm": 4.636082649230957, "learning_rate": 9.814413787204184e-06, "loss": 0.6487, "step": 9569 }, { "epoch": 0.11442030631643132, "grad_norm": 2.4143195152282715, "learning_rate": 9.814361522465689e-06, "loss": 0.5441, "step": 9570 }, { "epoch": 0.11443226246129198, "grad_norm": 2.377378225326538, "learning_rate": 9.814309250508042e-06, "loss": 0.7212, "step": 9571 }, { "epoch": 0.11444421860615263, "grad_norm": 6.81253719329834, "learning_rate": 9.814256971331322e-06, "loss": 0.5814, "step": 9572 }, { "epoch": 0.11445617475101329, "grad_norm": 1.8857372999191284, "learning_rate": 9.814204684935606e-06, "loss": 0.5931, "step": 9573 }, { "epoch": 0.11446813089587393, "grad_norm": 2.170750379562378, "learning_rate": 9.814152391320975e-06, "loss": 0.7023, "step": 9574 }, { "epoch": 0.11448008704073459, "grad_norm": 1.8933160305023193, "learning_rate": 9.814100090487503e-06, "loss": 0.5913, "step": 9575 }, { "epoch": 0.11449204318559524, "grad_norm": 4.144046783447266, "learning_rate": 9.81404778243527e-06, "loss": 0.6158, "step": 9576 }, { "epoch": 0.1145039993304559, "grad_norm": 3.7373909950256348, "learning_rate": 9.813995467164358e-06, "loss": 0.6894, "step": 9577 }, { "epoch": 0.11451595547531654, "grad_norm": 5.0499043464660645, "learning_rate": 9.81394314467484e-06, "loss": 0.6887, "step": 9578 }, { "epoch": 0.11452791162017718, "grad_norm": 8.92524528503418, "learning_rate": 9.8138908149668e-06, "loss": 0.6048, "step": 9579 }, { "epoch": 0.11453986776503784, "grad_norm": 2.3401551246643066, "learning_rate": 9.813838478040311e-06, "loss": 0.7307, "step": 9580 }, { "epoch": 0.11455182390989849, "grad_norm": 1.9991428852081299, "learning_rate": 9.813786133895456e-06, "loss": 0.7014, "step": 9581 }, { "epoch": 0.11456378005475915, "grad_norm": 4.576174736022949, "learning_rate": 9.81373378253231e-06, "loss": 0.7166, "step": 9582 }, { "epoch": 0.11457573619961979, "grad_norm": 2.749572992324829, "learning_rate": 9.813681423950954e-06, "loss": 0.5592, "step": 9583 }, { "epoch": 0.11458769234448045, "grad_norm": 2.0943970680236816, "learning_rate": 9.813629058151466e-06, "loss": 0.5568, "step": 9584 }, { "epoch": 0.1145996484893411, "grad_norm": 3.6775989532470703, "learning_rate": 9.813576685133924e-06, "loss": 0.532, "step": 9585 }, { "epoch": 0.11461160463420175, "grad_norm": 2.4377105236053467, "learning_rate": 9.813524304898406e-06, "loss": 0.6175, "step": 9586 }, { "epoch": 0.1146235607790624, "grad_norm": 16.35052490234375, "learning_rate": 9.81347191744499e-06, "loss": 0.6327, "step": 9587 }, { "epoch": 0.11463551692392306, "grad_norm": 1.9073938131332397, "learning_rate": 9.813419522773756e-06, "loss": 0.6657, "step": 9588 }, { "epoch": 0.1146474730687837, "grad_norm": 10.957284927368164, "learning_rate": 9.813367120884784e-06, "loss": 0.6367, "step": 9589 }, { "epoch": 0.11465942921364436, "grad_norm": 2.0847976207733154, "learning_rate": 9.81331471177815e-06, "loss": 0.6229, "step": 9590 }, { "epoch": 0.114671385358505, "grad_norm": 5.0240478515625, "learning_rate": 9.813262295453934e-06, "loss": 0.6694, "step": 9591 }, { "epoch": 0.11468334150336565, "grad_norm": 2.57969331741333, "learning_rate": 9.813209871912215e-06, "loss": 0.664, "step": 9592 }, { "epoch": 0.11469529764822631, "grad_norm": 2.3174519538879395, "learning_rate": 9.81315744115307e-06, "loss": 0.6186, "step": 9593 }, { "epoch": 0.11470725379308695, "grad_norm": 2.501035213470459, "learning_rate": 9.813105003176578e-06, "loss": 0.6793, "step": 9594 }, { "epoch": 0.11471920993794761, "grad_norm": 3.0136592388153076, "learning_rate": 9.813052557982816e-06, "loss": 0.6043, "step": 9595 }, { "epoch": 0.11473116608280826, "grad_norm": 2.9911227226257324, "learning_rate": 9.813000105571866e-06, "loss": 0.6325, "step": 9596 }, { "epoch": 0.11474312222766891, "grad_norm": 9.248387336730957, "learning_rate": 9.812947645943806e-06, "loss": 0.6793, "step": 9597 }, { "epoch": 0.11475507837252956, "grad_norm": 2.838104486465454, "learning_rate": 9.812895179098713e-06, "loss": 0.5959, "step": 9598 }, { "epoch": 0.11476703451739022, "grad_norm": 2.5344951152801514, "learning_rate": 9.812842705036668e-06, "loss": 0.6262, "step": 9599 }, { "epoch": 0.11477899066225086, "grad_norm": 3.4872639179229736, "learning_rate": 9.812790223757747e-06, "loss": 0.7499, "step": 9600 }, { "epoch": 0.11479094680711152, "grad_norm": 2.932562828063965, "learning_rate": 9.81273773526203e-06, "loss": 0.58, "step": 9601 }, { "epoch": 0.11480290295197217, "grad_norm": 3.2114155292510986, "learning_rate": 9.812685239549597e-06, "loss": 0.6707, "step": 9602 }, { "epoch": 0.11481485909683281, "grad_norm": 4.585883140563965, "learning_rate": 9.812632736620525e-06, "loss": 0.6413, "step": 9603 }, { "epoch": 0.11482681524169347, "grad_norm": 2.552703857421875, "learning_rate": 9.812580226474892e-06, "loss": 0.5468, "step": 9604 }, { "epoch": 0.11483877138655411, "grad_norm": 10.343838691711426, "learning_rate": 9.812527709112778e-06, "loss": 0.6944, "step": 9605 }, { "epoch": 0.11485072753141477, "grad_norm": 9.803153038024902, "learning_rate": 9.812475184534262e-06, "loss": 0.6711, "step": 9606 }, { "epoch": 0.11486268367627542, "grad_norm": 5.245515823364258, "learning_rate": 9.812422652739422e-06, "loss": 0.5884, "step": 9607 }, { "epoch": 0.11487463982113608, "grad_norm": 2.99809193611145, "learning_rate": 9.812370113728338e-06, "loss": 0.6382, "step": 9608 }, { "epoch": 0.11488659596599672, "grad_norm": 4.377040863037109, "learning_rate": 9.812317567501086e-06, "loss": 0.657, "step": 9609 }, { "epoch": 0.11489855211085738, "grad_norm": 3.0623435974121094, "learning_rate": 9.812265014057749e-06, "loss": 0.6475, "step": 9610 }, { "epoch": 0.11491050825571802, "grad_norm": 1.5079904794692993, "learning_rate": 9.812212453398402e-06, "loss": 0.6379, "step": 9611 }, { "epoch": 0.11492246440057868, "grad_norm": 3.070652723312378, "learning_rate": 9.812159885523124e-06, "loss": 0.662, "step": 9612 }, { "epoch": 0.11493442054543933, "grad_norm": 3.6367650032043457, "learning_rate": 9.812107310431998e-06, "loss": 0.5556, "step": 9613 }, { "epoch": 0.11494637669029997, "grad_norm": 4.4022722244262695, "learning_rate": 9.8120547281251e-06, "loss": 0.6058, "step": 9614 }, { "epoch": 0.11495833283516063, "grad_norm": 2.9332854747772217, "learning_rate": 9.812002138602507e-06, "loss": 0.6947, "step": 9615 }, { "epoch": 0.11497028898002128, "grad_norm": 2.7463107109069824, "learning_rate": 9.8119495418643e-06, "loss": 0.6402, "step": 9616 }, { "epoch": 0.11498224512488193, "grad_norm": 4.322770118713379, "learning_rate": 9.811896937910559e-06, "loss": 0.6182, "step": 9617 }, { "epoch": 0.11499420126974258, "grad_norm": 13.799166679382324, "learning_rate": 9.81184432674136e-06, "loss": 0.6687, "step": 9618 }, { "epoch": 0.11500615741460324, "grad_norm": 1.9831465482711792, "learning_rate": 9.811791708356784e-06, "loss": 0.6687, "step": 9619 }, { "epoch": 0.11501811355946388, "grad_norm": 2.390369176864624, "learning_rate": 9.811739082756907e-06, "loss": 0.6177, "step": 9620 }, { "epoch": 0.11503006970432454, "grad_norm": 6.5730109214782715, "learning_rate": 9.811686449941812e-06, "loss": 0.6316, "step": 9621 }, { "epoch": 0.11504202584918519, "grad_norm": 4.57522439956665, "learning_rate": 9.811633809911579e-06, "loss": 0.5679, "step": 9622 }, { "epoch": 0.11505398199404585, "grad_norm": 2.592074394226074, "learning_rate": 9.81158116266628e-06, "loss": 0.6313, "step": 9623 }, { "epoch": 0.11506593813890649, "grad_norm": 2.328939199447632, "learning_rate": 9.811528508205998e-06, "loss": 0.6993, "step": 9624 }, { "epoch": 0.11507789428376715, "grad_norm": 1.823785662651062, "learning_rate": 9.811475846530813e-06, "loss": 0.627, "step": 9625 }, { "epoch": 0.1150898504286278, "grad_norm": 44.005184173583984, "learning_rate": 9.811423177640804e-06, "loss": 0.6673, "step": 9626 }, { "epoch": 0.11510180657348844, "grad_norm": 2.8393073081970215, "learning_rate": 9.811370501536048e-06, "loss": 0.6298, "step": 9627 }, { "epoch": 0.1151137627183491, "grad_norm": 1.55304753780365, "learning_rate": 9.811317818216624e-06, "loss": 0.5765, "step": 9628 }, { "epoch": 0.11512571886320974, "grad_norm": 3.201592206954956, "learning_rate": 9.811265127682612e-06, "loss": 0.5237, "step": 9629 }, { "epoch": 0.1151376750080704, "grad_norm": 2.951786994934082, "learning_rate": 9.811212429934091e-06, "loss": 0.6149, "step": 9630 }, { "epoch": 0.11514963115293105, "grad_norm": 3.2887821197509766, "learning_rate": 9.81115972497114e-06, "loss": 0.6881, "step": 9631 }, { "epoch": 0.1151615872977917, "grad_norm": 3.3188371658325195, "learning_rate": 9.811107012793839e-06, "loss": 0.5549, "step": 9632 }, { "epoch": 0.11517354344265235, "grad_norm": 2.350040912628174, "learning_rate": 9.811054293402264e-06, "loss": 0.4765, "step": 9633 }, { "epoch": 0.11518549958751301, "grad_norm": 4.205013751983643, "learning_rate": 9.811001566796499e-06, "loss": 0.5535, "step": 9634 }, { "epoch": 0.11519745573237365, "grad_norm": 1.7698715925216675, "learning_rate": 9.810948832976616e-06, "loss": 0.6199, "step": 9635 }, { "epoch": 0.11520941187723431, "grad_norm": 3.665724754333496, "learning_rate": 9.810896091942701e-06, "loss": 0.6016, "step": 9636 }, { "epoch": 0.11522136802209496, "grad_norm": 3.075639009475708, "learning_rate": 9.81084334369483e-06, "loss": 0.6518, "step": 9637 }, { "epoch": 0.1152333241669556, "grad_norm": 1.6510707139968872, "learning_rate": 9.810790588233081e-06, "loss": 0.6282, "step": 9638 }, { "epoch": 0.11524528031181626, "grad_norm": 1.5980910062789917, "learning_rate": 9.810737825557535e-06, "loss": 0.6355, "step": 9639 }, { "epoch": 0.1152572364566769, "grad_norm": 13.986209869384766, "learning_rate": 9.81068505566827e-06, "loss": 0.6992, "step": 9640 }, { "epoch": 0.11526919260153756, "grad_norm": 2.1779367923736572, "learning_rate": 9.810632278565367e-06, "loss": 0.5965, "step": 9641 }, { "epoch": 0.11528114874639821, "grad_norm": 3.699455738067627, "learning_rate": 9.810579494248902e-06, "loss": 0.5179, "step": 9642 }, { "epoch": 0.11529310489125887, "grad_norm": 2.2032580375671387, "learning_rate": 9.810526702718956e-06, "loss": 0.7843, "step": 9643 }, { "epoch": 0.11530506103611951, "grad_norm": 2.5094199180603027, "learning_rate": 9.810473903975611e-06, "loss": 0.5609, "step": 9644 }, { "epoch": 0.11531701718098017, "grad_norm": 1.5073422193527222, "learning_rate": 9.81042109801894e-06, "loss": 0.509, "step": 9645 }, { "epoch": 0.11532897332584081, "grad_norm": 2.1440768241882324, "learning_rate": 9.810368284849028e-06, "loss": 0.6284, "step": 9646 }, { "epoch": 0.11534092947070147, "grad_norm": 2.0673904418945312, "learning_rate": 9.810315464465949e-06, "loss": 0.6144, "step": 9647 }, { "epoch": 0.11535288561556212, "grad_norm": 2.3796961307525635, "learning_rate": 9.810262636869788e-06, "loss": 0.7324, "step": 9648 }, { "epoch": 0.11536484176042278, "grad_norm": 2.1831438541412354, "learning_rate": 9.810209802060618e-06, "loss": 0.6746, "step": 9649 }, { "epoch": 0.11537679790528342, "grad_norm": 2.6343021392822266, "learning_rate": 9.810156960038524e-06, "loss": 0.6339, "step": 9650 }, { "epoch": 0.11538875405014407, "grad_norm": 5.893270015716553, "learning_rate": 9.81010411080358e-06, "loss": 0.6506, "step": 9651 }, { "epoch": 0.11540071019500472, "grad_norm": 2.1847596168518066, "learning_rate": 9.81005125435587e-06, "loss": 0.6328, "step": 9652 }, { "epoch": 0.11541266633986537, "grad_norm": 1.8838016986846924, "learning_rate": 9.80999839069547e-06, "loss": 0.6836, "step": 9653 }, { "epoch": 0.11542462248472603, "grad_norm": 3.164201259613037, "learning_rate": 9.80994551982246e-06, "loss": 0.5721, "step": 9654 }, { "epoch": 0.11543657862958667, "grad_norm": 2.1176319122314453, "learning_rate": 9.80989264173692e-06, "loss": 0.5267, "step": 9655 }, { "epoch": 0.11544853477444733, "grad_norm": 2.195465087890625, "learning_rate": 9.809839756438929e-06, "loss": 0.6984, "step": 9656 }, { "epoch": 0.11546049091930798, "grad_norm": 2.2614903450012207, "learning_rate": 9.809786863928565e-06, "loss": 0.6761, "step": 9657 }, { "epoch": 0.11547244706416863, "grad_norm": 3.2722110748291016, "learning_rate": 9.80973396420591e-06, "loss": 0.5884, "step": 9658 }, { "epoch": 0.11548440320902928, "grad_norm": 1.6085087060928345, "learning_rate": 9.809681057271042e-06, "loss": 0.5525, "step": 9659 }, { "epoch": 0.11549635935388994, "grad_norm": 1.558606505393982, "learning_rate": 9.80962814312404e-06, "loss": 0.7521, "step": 9660 }, { "epoch": 0.11550831549875058, "grad_norm": 2.2344322204589844, "learning_rate": 9.809575221764982e-06, "loss": 0.6104, "step": 9661 }, { "epoch": 0.11552027164361123, "grad_norm": 24.304380416870117, "learning_rate": 9.809522293193951e-06, "loss": 0.6769, "step": 9662 }, { "epoch": 0.11553222778847189, "grad_norm": 2.202345848083496, "learning_rate": 9.809469357411024e-06, "loss": 0.6565, "step": 9663 }, { "epoch": 0.11554418393333253, "grad_norm": 3.4900543689727783, "learning_rate": 9.809416414416279e-06, "loss": 0.6178, "step": 9664 }, { "epoch": 0.11555614007819319, "grad_norm": 1.929551124572754, "learning_rate": 9.809363464209797e-06, "loss": 0.6749, "step": 9665 }, { "epoch": 0.11556809622305383, "grad_norm": 1.8980950117111206, "learning_rate": 9.809310506791659e-06, "loss": 0.6107, "step": 9666 }, { "epoch": 0.1155800523679145, "grad_norm": 3.9760143756866455, "learning_rate": 9.80925754216194e-06, "loss": 0.6401, "step": 9667 }, { "epoch": 0.11559200851277514, "grad_norm": 2.0900955200195312, "learning_rate": 9.809204570320724e-06, "loss": 0.6921, "step": 9668 }, { "epoch": 0.1156039646576358, "grad_norm": 16.894290924072266, "learning_rate": 9.80915159126809e-06, "loss": 0.6477, "step": 9669 }, { "epoch": 0.11561592080249644, "grad_norm": 2.4357411861419678, "learning_rate": 9.809098605004115e-06, "loss": 0.612, "step": 9670 }, { "epoch": 0.1156278769473571, "grad_norm": 1.7679213285446167, "learning_rate": 9.80904561152888e-06, "loss": 0.6208, "step": 9671 }, { "epoch": 0.11563983309221774, "grad_norm": 1.4825242757797241, "learning_rate": 9.808992610842462e-06, "loss": 0.5145, "step": 9672 }, { "epoch": 0.11565178923707839, "grad_norm": 1.9338836669921875, "learning_rate": 9.808939602944945e-06, "loss": 0.6069, "step": 9673 }, { "epoch": 0.11566374538193905, "grad_norm": 13.258281707763672, "learning_rate": 9.808886587836405e-06, "loss": 0.713, "step": 9674 }, { "epoch": 0.11567570152679969, "grad_norm": 3.0932159423828125, "learning_rate": 9.808833565516923e-06, "loss": 0.5574, "step": 9675 }, { "epoch": 0.11568765767166035, "grad_norm": 2.21809720993042, "learning_rate": 9.808780535986576e-06, "loss": 0.687, "step": 9676 }, { "epoch": 0.115699613816521, "grad_norm": 2.558485507965088, "learning_rate": 9.808727499245448e-06, "loss": 0.5148, "step": 9677 }, { "epoch": 0.11571156996138166, "grad_norm": 3.335616111755371, "learning_rate": 9.808674455293615e-06, "loss": 0.6149, "step": 9678 }, { "epoch": 0.1157235261062423, "grad_norm": 2.1247150897979736, "learning_rate": 9.808621404131156e-06, "loss": 0.6361, "step": 9679 }, { "epoch": 0.11573548225110296, "grad_norm": 2.339757204055786, "learning_rate": 9.808568345758153e-06, "loss": 0.6616, "step": 9680 }, { "epoch": 0.1157474383959636, "grad_norm": 2.6449146270751953, "learning_rate": 9.808515280174686e-06, "loss": 0.6365, "step": 9681 }, { "epoch": 0.11575939454082426, "grad_norm": 2.883657932281494, "learning_rate": 9.808462207380834e-06, "loss": 0.6353, "step": 9682 }, { "epoch": 0.1157713506856849, "grad_norm": 2.4658043384552, "learning_rate": 9.808409127376673e-06, "loss": 0.5567, "step": 9683 }, { "epoch": 0.11578330683054557, "grad_norm": 1.8858720064163208, "learning_rate": 9.808356040162286e-06, "loss": 0.6284, "step": 9684 }, { "epoch": 0.11579526297540621, "grad_norm": 1.7565408945083618, "learning_rate": 9.808302945737754e-06, "loss": 0.5902, "step": 9685 }, { "epoch": 0.11580721912026685, "grad_norm": 1.799958348274231, "learning_rate": 9.808249844103153e-06, "loss": 0.6291, "step": 9686 }, { "epoch": 0.11581917526512751, "grad_norm": 1.5171704292297363, "learning_rate": 9.808196735258564e-06, "loss": 0.6306, "step": 9687 }, { "epoch": 0.11583113140998816, "grad_norm": 3.862544059753418, "learning_rate": 9.808143619204067e-06, "loss": 0.6884, "step": 9688 }, { "epoch": 0.11584308755484882, "grad_norm": 2.2799832820892334, "learning_rate": 9.808090495939743e-06, "loss": 0.6865, "step": 9689 }, { "epoch": 0.11585504369970946, "grad_norm": 1.6528749465942383, "learning_rate": 9.80803736546567e-06, "loss": 0.6206, "step": 9690 }, { "epoch": 0.11586699984457012, "grad_norm": 1.3369057178497314, "learning_rate": 9.807984227781926e-06, "loss": 0.6351, "step": 9691 }, { "epoch": 0.11587895598943077, "grad_norm": 1.383562684059143, "learning_rate": 9.807931082888594e-06, "loss": 0.6843, "step": 9692 }, { "epoch": 0.11589091213429142, "grad_norm": 1.974722981452942, "learning_rate": 9.807877930785753e-06, "loss": 0.578, "step": 9693 }, { "epoch": 0.11590286827915207, "grad_norm": 1.6154919862747192, "learning_rate": 9.80782477147348e-06, "loss": 0.6111, "step": 9694 }, { "epoch": 0.11591482442401273, "grad_norm": 9.635427474975586, "learning_rate": 9.807771604951858e-06, "loss": 0.6946, "step": 9695 }, { "epoch": 0.11592678056887337, "grad_norm": 2.246732234954834, "learning_rate": 9.807718431220965e-06, "loss": 0.6261, "step": 9696 }, { "epoch": 0.11593873671373402, "grad_norm": 1.939956784248352, "learning_rate": 9.80766525028088e-06, "loss": 0.6535, "step": 9697 }, { "epoch": 0.11595069285859468, "grad_norm": 2.667527437210083, "learning_rate": 9.807612062131686e-06, "loss": 0.5123, "step": 9698 }, { "epoch": 0.11596264900345532, "grad_norm": 7.279308795928955, "learning_rate": 9.80755886677346e-06, "loss": 0.6566, "step": 9699 }, { "epoch": 0.11597460514831598, "grad_norm": 2.6315155029296875, "learning_rate": 9.807505664206282e-06, "loss": 0.6044, "step": 9700 }, { "epoch": 0.11598656129317662, "grad_norm": 2.999305009841919, "learning_rate": 9.807452454430231e-06, "loss": 0.5838, "step": 9701 }, { "epoch": 0.11599851743803728, "grad_norm": 2.2879459857940674, "learning_rate": 9.80739923744539e-06, "loss": 0.6472, "step": 9702 }, { "epoch": 0.11601047358289793, "grad_norm": 2.376649856567383, "learning_rate": 9.807346013251835e-06, "loss": 0.6389, "step": 9703 }, { "epoch": 0.11602242972775859, "grad_norm": 2.936157703399658, "learning_rate": 9.807292781849649e-06, "loss": 0.578, "step": 9704 }, { "epoch": 0.11603438587261923, "grad_norm": 1.7345668077468872, "learning_rate": 9.80723954323891e-06, "loss": 0.7151, "step": 9705 }, { "epoch": 0.11604634201747989, "grad_norm": 3.2068071365356445, "learning_rate": 9.807186297419697e-06, "loss": 0.6405, "step": 9706 }, { "epoch": 0.11605829816234053, "grad_norm": 2.66317081451416, "learning_rate": 9.807133044392094e-06, "loss": 0.5951, "step": 9707 }, { "epoch": 0.11607025430720119, "grad_norm": 1.701862096786499, "learning_rate": 9.807079784156174e-06, "loss": 0.591, "step": 9708 }, { "epoch": 0.11608221045206184, "grad_norm": 2.179854154586792, "learning_rate": 9.807026516712023e-06, "loss": 0.6298, "step": 9709 }, { "epoch": 0.11609416659692248, "grad_norm": 4.1124587059021, "learning_rate": 9.806973242059718e-06, "loss": 0.6753, "step": 9710 }, { "epoch": 0.11610612274178314, "grad_norm": 3.5501646995544434, "learning_rate": 9.806919960199342e-06, "loss": 0.6832, "step": 9711 }, { "epoch": 0.11611807888664379, "grad_norm": 2.157010555267334, "learning_rate": 9.80686667113097e-06, "loss": 0.6078, "step": 9712 }, { "epoch": 0.11613003503150444, "grad_norm": 1.8554221391677856, "learning_rate": 9.806813374854684e-06, "loss": 0.5996, "step": 9713 }, { "epoch": 0.11614199117636509, "grad_norm": 1.7071181535720825, "learning_rate": 9.806760071370565e-06, "loss": 0.5658, "step": 9714 }, { "epoch": 0.11615394732122575, "grad_norm": 1.865553617477417, "learning_rate": 9.806706760678692e-06, "loss": 0.5726, "step": 9715 }, { "epoch": 0.11616590346608639, "grad_norm": 3.8969058990478516, "learning_rate": 9.806653442779147e-06, "loss": 0.5533, "step": 9716 }, { "epoch": 0.11617785961094705, "grad_norm": 2.4180972576141357, "learning_rate": 9.806600117672004e-06, "loss": 0.5437, "step": 9717 }, { "epoch": 0.1161898157558077, "grad_norm": 2.112992286682129, "learning_rate": 9.806546785357352e-06, "loss": 0.5437, "step": 9718 }, { "epoch": 0.11620177190066835, "grad_norm": 2.271026849746704, "learning_rate": 9.806493445835262e-06, "loss": 0.5886, "step": 9719 }, { "epoch": 0.116213728045529, "grad_norm": 2.0444133281707764, "learning_rate": 9.80644009910582e-06, "loss": 0.5531, "step": 9720 }, { "epoch": 0.11622568419038964, "grad_norm": 5.950747013092041, "learning_rate": 9.806386745169104e-06, "loss": 0.619, "step": 9721 }, { "epoch": 0.1162376403352503, "grad_norm": 1.816481590270996, "learning_rate": 9.806333384025192e-06, "loss": 0.6008, "step": 9722 }, { "epoch": 0.11624959648011095, "grad_norm": 1.6121690273284912, "learning_rate": 9.806280015674168e-06, "loss": 0.6418, "step": 9723 }, { "epoch": 0.1162615526249716, "grad_norm": 5.326763153076172, "learning_rate": 9.80622664011611e-06, "loss": 0.5959, "step": 9724 }, { "epoch": 0.11627350876983225, "grad_norm": 2.5870742797851562, "learning_rate": 9.806173257351097e-06, "loss": 0.534, "step": 9725 }, { "epoch": 0.11628546491469291, "grad_norm": 2.4577677249908447, "learning_rate": 9.806119867379212e-06, "loss": 0.6024, "step": 9726 }, { "epoch": 0.11629742105955355, "grad_norm": 2.2497079372406006, "learning_rate": 9.80606647020053e-06, "loss": 0.6434, "step": 9727 }, { "epoch": 0.11630937720441421, "grad_norm": 4.550412178039551, "learning_rate": 9.806013065815137e-06, "loss": 0.6381, "step": 9728 }, { "epoch": 0.11632133334927486, "grad_norm": 7.23227596282959, "learning_rate": 9.805959654223109e-06, "loss": 0.6234, "step": 9729 }, { "epoch": 0.11633328949413552, "grad_norm": 2.0861024856567383, "learning_rate": 9.805906235424529e-06, "loss": 0.6009, "step": 9730 }, { "epoch": 0.11634524563899616, "grad_norm": 8.530107498168945, "learning_rate": 9.805852809419474e-06, "loss": 0.6285, "step": 9731 }, { "epoch": 0.11635720178385682, "grad_norm": 5.909365177154541, "learning_rate": 9.805799376208026e-06, "loss": 0.6228, "step": 9732 }, { "epoch": 0.11636915792871746, "grad_norm": 2.839885950088501, "learning_rate": 9.805745935790263e-06, "loss": 0.6051, "step": 9733 }, { "epoch": 0.11638111407357811, "grad_norm": 1.8721388578414917, "learning_rate": 9.80569248816627e-06, "loss": 0.6563, "step": 9734 }, { "epoch": 0.11639307021843877, "grad_norm": 2.6270275115966797, "learning_rate": 9.805639033336123e-06, "loss": 0.6591, "step": 9735 }, { "epoch": 0.11640502636329941, "grad_norm": 4.902781963348389, "learning_rate": 9.805585571299903e-06, "loss": 0.6352, "step": 9736 }, { "epoch": 0.11641698250816007, "grad_norm": 3.3944308757781982, "learning_rate": 9.805532102057689e-06, "loss": 0.6898, "step": 9737 }, { "epoch": 0.11642893865302072, "grad_norm": 2.5007436275482178, "learning_rate": 9.805478625609564e-06, "loss": 0.622, "step": 9738 }, { "epoch": 0.11644089479788138, "grad_norm": 3.9260687828063965, "learning_rate": 9.805425141955606e-06, "loss": 0.6304, "step": 9739 }, { "epoch": 0.11645285094274202, "grad_norm": 1.936559796333313, "learning_rate": 9.805371651095897e-06, "loss": 0.5351, "step": 9740 }, { "epoch": 0.11646480708760268, "grad_norm": 2.660529136657715, "learning_rate": 9.805318153030515e-06, "loss": 0.6081, "step": 9741 }, { "epoch": 0.11647676323246332, "grad_norm": 5.374217510223389, "learning_rate": 9.805264647759543e-06, "loss": 0.6078, "step": 9742 }, { "epoch": 0.11648871937732398, "grad_norm": 6.0170512199401855, "learning_rate": 9.805211135283056e-06, "loss": 0.6919, "step": 9743 }, { "epoch": 0.11650067552218463, "grad_norm": 3.4183764457702637, "learning_rate": 9.805157615601142e-06, "loss": 0.6602, "step": 9744 }, { "epoch": 0.11651263166704527, "grad_norm": 2.6654810905456543, "learning_rate": 9.805104088713875e-06, "loss": 0.6468, "step": 9745 }, { "epoch": 0.11652458781190593, "grad_norm": 2.357140064239502, "learning_rate": 9.805050554621338e-06, "loss": 0.791, "step": 9746 }, { "epoch": 0.11653654395676658, "grad_norm": 1.55707848072052, "learning_rate": 9.80499701332361e-06, "loss": 0.6264, "step": 9747 }, { "epoch": 0.11654850010162723, "grad_norm": 1.907019019126892, "learning_rate": 9.804943464820772e-06, "loss": 0.5995, "step": 9748 }, { "epoch": 0.11656045624648788, "grad_norm": 1.5074976682662964, "learning_rate": 9.804889909112905e-06, "loss": 0.6266, "step": 9749 }, { "epoch": 0.11657241239134854, "grad_norm": 3.6671018600463867, "learning_rate": 9.804836346200087e-06, "loss": 0.6001, "step": 9750 }, { "epoch": 0.11658436853620918, "grad_norm": 2.4180948734283447, "learning_rate": 9.804782776082402e-06, "loss": 0.5909, "step": 9751 }, { "epoch": 0.11659632468106984, "grad_norm": 2.9908478260040283, "learning_rate": 9.804729198759926e-06, "loss": 0.6242, "step": 9752 }, { "epoch": 0.11660828082593049, "grad_norm": 1.627747654914856, "learning_rate": 9.804675614232741e-06, "loss": 0.6768, "step": 9753 }, { "epoch": 0.11662023697079114, "grad_norm": 1.8573532104492188, "learning_rate": 9.80462202250093e-06, "loss": 0.5681, "step": 9754 }, { "epoch": 0.11663219311565179, "grad_norm": 3.647146701812744, "learning_rate": 9.804568423564572e-06, "loss": 0.6278, "step": 9755 }, { "epoch": 0.11664414926051243, "grad_norm": 2.91744065284729, "learning_rate": 9.804514817423744e-06, "loss": 0.6502, "step": 9756 }, { "epoch": 0.11665610540537309, "grad_norm": 2.2800540924072266, "learning_rate": 9.80446120407853e-06, "loss": 0.5546, "step": 9757 }, { "epoch": 0.11666806155023374, "grad_norm": 3.8923778533935547, "learning_rate": 9.804407583529008e-06, "loss": 0.6057, "step": 9758 }, { "epoch": 0.1166800176950944, "grad_norm": 1.809029221534729, "learning_rate": 9.804353955775261e-06, "loss": 0.5692, "step": 9759 }, { "epoch": 0.11669197383995504, "grad_norm": 5.26913595199585, "learning_rate": 9.804300320817368e-06, "loss": 0.7092, "step": 9760 }, { "epoch": 0.1167039299848157, "grad_norm": 3.3101751804351807, "learning_rate": 9.80424667865541e-06, "loss": 0.5827, "step": 9761 }, { "epoch": 0.11671588612967634, "grad_norm": 2.1858372688293457, "learning_rate": 9.804193029289467e-06, "loss": 0.6715, "step": 9762 }, { "epoch": 0.116727842274537, "grad_norm": 3.163191080093384, "learning_rate": 9.804139372719618e-06, "loss": 0.5816, "step": 9763 }, { "epoch": 0.11673979841939765, "grad_norm": 1.8465865850448608, "learning_rate": 9.804085708945947e-06, "loss": 0.6751, "step": 9764 }, { "epoch": 0.1167517545642583, "grad_norm": 1.8893429040908813, "learning_rate": 9.804032037968532e-06, "loss": 0.6546, "step": 9765 }, { "epoch": 0.11676371070911895, "grad_norm": 1.6804287433624268, "learning_rate": 9.803978359787454e-06, "loss": 0.6453, "step": 9766 }, { "epoch": 0.11677566685397961, "grad_norm": 1.7368754148483276, "learning_rate": 9.803924674402793e-06, "loss": 0.5675, "step": 9767 }, { "epoch": 0.11678762299884025, "grad_norm": 2.0585594177246094, "learning_rate": 9.80387098181463e-06, "loss": 0.6704, "step": 9768 }, { "epoch": 0.1167995791437009, "grad_norm": 2.2766497135162354, "learning_rate": 9.803817282023044e-06, "loss": 0.5753, "step": 9769 }, { "epoch": 0.11681153528856156, "grad_norm": 2.699535608291626, "learning_rate": 9.803763575028118e-06, "loss": 0.7117, "step": 9770 }, { "epoch": 0.1168234914334222, "grad_norm": 1.9184436798095703, "learning_rate": 9.803709860829932e-06, "loss": 0.5656, "step": 9771 }, { "epoch": 0.11683544757828286, "grad_norm": 3.8560945987701416, "learning_rate": 9.803656139428566e-06, "loss": 0.6196, "step": 9772 }, { "epoch": 0.1168474037231435, "grad_norm": 2.543797492980957, "learning_rate": 9.8036024108241e-06, "loss": 0.6214, "step": 9773 }, { "epoch": 0.11685935986800416, "grad_norm": 4.085618019104004, "learning_rate": 9.803548675016614e-06, "loss": 0.6517, "step": 9774 }, { "epoch": 0.11687131601286481, "grad_norm": 2.76979398727417, "learning_rate": 9.803494932006192e-06, "loss": 0.6245, "step": 9775 }, { "epoch": 0.11688327215772547, "grad_norm": 1.8134230375289917, "learning_rate": 9.80344118179291e-06, "loss": 0.6411, "step": 9776 }, { "epoch": 0.11689522830258611, "grad_norm": 1.7357386350631714, "learning_rate": 9.803387424376852e-06, "loss": 0.5491, "step": 9777 }, { "epoch": 0.11690718444744677, "grad_norm": 1.633960247039795, "learning_rate": 9.803333659758098e-06, "loss": 0.6135, "step": 9778 }, { "epoch": 0.11691914059230742, "grad_norm": 2.250366687774658, "learning_rate": 9.803279887936728e-06, "loss": 0.6944, "step": 9779 }, { "epoch": 0.11693109673716806, "grad_norm": 1.6735011339187622, "learning_rate": 9.803226108912822e-06, "loss": 0.5638, "step": 9780 }, { "epoch": 0.11694305288202872, "grad_norm": 2.7350525856018066, "learning_rate": 9.80317232268646e-06, "loss": 0.5555, "step": 9781 }, { "epoch": 0.11695500902688936, "grad_norm": 1.4991127252578735, "learning_rate": 9.803118529257728e-06, "loss": 0.5036, "step": 9782 }, { "epoch": 0.11696696517175002, "grad_norm": 1.7310402393341064, "learning_rate": 9.803064728626699e-06, "loss": 0.6721, "step": 9783 }, { "epoch": 0.11697892131661067, "grad_norm": 4.965047359466553, "learning_rate": 9.803010920793459e-06, "loss": 0.6083, "step": 9784 }, { "epoch": 0.11699087746147133, "grad_norm": 2.1766176223754883, "learning_rate": 9.802957105758086e-06, "loss": 0.6772, "step": 9785 }, { "epoch": 0.11700283360633197, "grad_norm": 4.656555652618408, "learning_rate": 9.802903283520663e-06, "loss": 0.69, "step": 9786 }, { "epoch": 0.11701478975119263, "grad_norm": 1.548510193824768, "learning_rate": 9.802849454081269e-06, "loss": 0.6442, "step": 9787 }, { "epoch": 0.11702674589605327, "grad_norm": 3.3153138160705566, "learning_rate": 9.802795617439985e-06, "loss": 0.5317, "step": 9788 }, { "epoch": 0.11703870204091393, "grad_norm": 3.3805465698242188, "learning_rate": 9.802741773596892e-06, "loss": 0.7082, "step": 9789 }, { "epoch": 0.11705065818577458, "grad_norm": 3.1763100624084473, "learning_rate": 9.80268792255207e-06, "loss": 0.6164, "step": 9790 }, { "epoch": 0.11706261433063524, "grad_norm": 2.076094150543213, "learning_rate": 9.802634064305601e-06, "loss": 0.6627, "step": 9791 }, { "epoch": 0.11707457047549588, "grad_norm": 1.8125252723693848, "learning_rate": 9.802580198857564e-06, "loss": 0.7164, "step": 9792 }, { "epoch": 0.11708652662035653, "grad_norm": 2.1751768589019775, "learning_rate": 9.802526326208044e-06, "loss": 0.6472, "step": 9793 }, { "epoch": 0.11709848276521719, "grad_norm": 2.6524932384490967, "learning_rate": 9.802472446357117e-06, "loss": 0.5884, "step": 9794 }, { "epoch": 0.11711043891007783, "grad_norm": 4.292466163635254, "learning_rate": 9.802418559304865e-06, "loss": 0.59, "step": 9795 }, { "epoch": 0.11712239505493849, "grad_norm": 1.595616102218628, "learning_rate": 9.802364665051369e-06, "loss": 0.5661, "step": 9796 }, { "epoch": 0.11713435119979913, "grad_norm": 1.5665936470031738, "learning_rate": 9.802310763596712e-06, "loss": 0.6348, "step": 9797 }, { "epoch": 0.11714630734465979, "grad_norm": 1.345115303993225, "learning_rate": 9.802256854940971e-06, "loss": 0.5604, "step": 9798 }, { "epoch": 0.11715826348952044, "grad_norm": 3.0758206844329834, "learning_rate": 9.80220293908423e-06, "loss": 0.6449, "step": 9799 }, { "epoch": 0.1171702196343811, "grad_norm": 2.5747580528259277, "learning_rate": 9.802149016026568e-06, "loss": 0.529, "step": 9800 }, { "epoch": 0.11718217577924174, "grad_norm": 2.357712507247925, "learning_rate": 9.802095085768066e-06, "loss": 0.5757, "step": 9801 }, { "epoch": 0.1171941319241024, "grad_norm": 3.316683053970337, "learning_rate": 9.802041148308805e-06, "loss": 0.6488, "step": 9802 }, { "epoch": 0.11720608806896304, "grad_norm": 1.62416672706604, "learning_rate": 9.801987203648869e-06, "loss": 0.6015, "step": 9803 }, { "epoch": 0.11721804421382369, "grad_norm": 1.3242096900939941, "learning_rate": 9.801933251788333e-06, "loss": 0.544, "step": 9804 }, { "epoch": 0.11723000035868435, "grad_norm": 1.5728437900543213, "learning_rate": 9.801879292727283e-06, "loss": 0.6407, "step": 9805 }, { "epoch": 0.11724195650354499, "grad_norm": 2.1612753868103027, "learning_rate": 9.801825326465797e-06, "loss": 0.575, "step": 9806 }, { "epoch": 0.11725391264840565, "grad_norm": 1.7837527990341187, "learning_rate": 9.801771353003958e-06, "loss": 0.6213, "step": 9807 }, { "epoch": 0.1172658687932663, "grad_norm": 2.0684893131256104, "learning_rate": 9.801717372341844e-06, "loss": 0.6796, "step": 9808 }, { "epoch": 0.11727782493812695, "grad_norm": 4.979133129119873, "learning_rate": 9.801663384479539e-06, "loss": 0.6125, "step": 9809 }, { "epoch": 0.1172897810829876, "grad_norm": 3.294469118118286, "learning_rate": 9.801609389417122e-06, "loss": 0.6677, "step": 9810 }, { "epoch": 0.11730173722784826, "grad_norm": 2.524934768676758, "learning_rate": 9.801555387154675e-06, "loss": 0.6755, "step": 9811 }, { "epoch": 0.1173136933727089, "grad_norm": 1.6404825448989868, "learning_rate": 9.801501377692278e-06, "loss": 0.5112, "step": 9812 }, { "epoch": 0.11732564951756956, "grad_norm": 1.9749932289123535, "learning_rate": 9.801447361030014e-06, "loss": 0.632, "step": 9813 }, { "epoch": 0.1173376056624302, "grad_norm": 2.090158700942993, "learning_rate": 9.801393337167961e-06, "loss": 0.6471, "step": 9814 }, { "epoch": 0.11734956180729085, "grad_norm": 2.0689496994018555, "learning_rate": 9.801339306106203e-06, "loss": 0.6473, "step": 9815 }, { "epoch": 0.11736151795215151, "grad_norm": 2.150923252105713, "learning_rate": 9.80128526784482e-06, "loss": 0.6781, "step": 9816 }, { "epoch": 0.11737347409701215, "grad_norm": 2.5203464031219482, "learning_rate": 9.80123122238389e-06, "loss": 0.6149, "step": 9817 }, { "epoch": 0.11738543024187281, "grad_norm": 1.587053894996643, "learning_rate": 9.801177169723499e-06, "loss": 0.5641, "step": 9818 }, { "epoch": 0.11739738638673346, "grad_norm": 3.0551559925079346, "learning_rate": 9.801123109863725e-06, "loss": 0.6883, "step": 9819 }, { "epoch": 0.11740934253159412, "grad_norm": 4.4857988357543945, "learning_rate": 9.801069042804651e-06, "loss": 0.6022, "step": 9820 }, { "epoch": 0.11742129867645476, "grad_norm": 8.004732131958008, "learning_rate": 9.801014968546356e-06, "loss": 0.5942, "step": 9821 }, { "epoch": 0.11743325482131542, "grad_norm": 1.8630576133728027, "learning_rate": 9.800960887088922e-06, "loss": 0.6417, "step": 9822 }, { "epoch": 0.11744521096617606, "grad_norm": 3.001675605773926, "learning_rate": 9.800906798432429e-06, "loss": 0.5761, "step": 9823 }, { "epoch": 0.11745716711103672, "grad_norm": 1.9950209856033325, "learning_rate": 9.80085270257696e-06, "loss": 0.6685, "step": 9824 }, { "epoch": 0.11746912325589737, "grad_norm": 3.0449752807617188, "learning_rate": 9.800798599522596e-06, "loss": 0.5965, "step": 9825 }, { "epoch": 0.11748107940075803, "grad_norm": 3.119626760482788, "learning_rate": 9.800744489269417e-06, "loss": 0.6928, "step": 9826 }, { "epoch": 0.11749303554561867, "grad_norm": 3.196599245071411, "learning_rate": 9.800690371817504e-06, "loss": 0.5863, "step": 9827 }, { "epoch": 0.11750499169047932, "grad_norm": 1.466848611831665, "learning_rate": 9.800636247166939e-06, "loss": 0.5511, "step": 9828 }, { "epoch": 0.11751694783533997, "grad_norm": 1.5874334573745728, "learning_rate": 9.800582115317804e-06, "loss": 0.5586, "step": 9829 }, { "epoch": 0.11752890398020062, "grad_norm": 2.382607936859131, "learning_rate": 9.800527976270178e-06, "loss": 0.7063, "step": 9830 }, { "epoch": 0.11754086012506128, "grad_norm": 1.7833987474441528, "learning_rate": 9.800473830024144e-06, "loss": 0.6442, "step": 9831 }, { "epoch": 0.11755281626992192, "grad_norm": 2.809157133102417, "learning_rate": 9.800419676579781e-06, "loss": 0.6113, "step": 9832 }, { "epoch": 0.11756477241478258, "grad_norm": 3.385498285293579, "learning_rate": 9.800365515937173e-06, "loss": 0.5863, "step": 9833 }, { "epoch": 0.11757672855964323, "grad_norm": 8.774651527404785, "learning_rate": 9.8003113480964e-06, "loss": 0.5822, "step": 9834 }, { "epoch": 0.11758868470450388, "grad_norm": 1.466978907585144, "learning_rate": 9.800257173057541e-06, "loss": 0.6203, "step": 9835 }, { "epoch": 0.11760064084936453, "grad_norm": 1.8514047861099243, "learning_rate": 9.800202990820682e-06, "loss": 0.7238, "step": 9836 }, { "epoch": 0.11761259699422519, "grad_norm": 2.3066346645355225, "learning_rate": 9.8001488013859e-06, "loss": 0.7495, "step": 9837 }, { "epoch": 0.11762455313908583, "grad_norm": 2.0560288429260254, "learning_rate": 9.800094604753279e-06, "loss": 0.5846, "step": 9838 }, { "epoch": 0.11763650928394648, "grad_norm": 1.7779489755630493, "learning_rate": 9.800040400922897e-06, "loss": 0.6051, "step": 9839 }, { "epoch": 0.11764846542880714, "grad_norm": 2.1184957027435303, "learning_rate": 9.799986189894839e-06, "loss": 0.6304, "step": 9840 }, { "epoch": 0.11766042157366778, "grad_norm": 1.9809110164642334, "learning_rate": 9.799931971669184e-06, "loss": 0.7009, "step": 9841 }, { "epoch": 0.11767237771852844, "grad_norm": 2.440682888031006, "learning_rate": 9.799877746246014e-06, "loss": 0.7135, "step": 9842 }, { "epoch": 0.11768433386338908, "grad_norm": 2.209146022796631, "learning_rate": 9.79982351362541e-06, "loss": 0.6649, "step": 9843 }, { "epoch": 0.11769629000824974, "grad_norm": 2.598923921585083, "learning_rate": 9.799769273807453e-06, "loss": 0.7095, "step": 9844 }, { "epoch": 0.11770824615311039, "grad_norm": 2.2008559703826904, "learning_rate": 9.799715026792226e-06, "loss": 0.6498, "step": 9845 }, { "epoch": 0.11772020229797105, "grad_norm": 1.9016250371932983, "learning_rate": 9.79966077257981e-06, "loss": 0.6739, "step": 9846 }, { "epoch": 0.11773215844283169, "grad_norm": 1.4646973609924316, "learning_rate": 9.799606511170286e-06, "loss": 0.6198, "step": 9847 }, { "epoch": 0.11774411458769235, "grad_norm": 2.9718549251556396, "learning_rate": 9.799552242563733e-06, "loss": 0.6685, "step": 9848 }, { "epoch": 0.117756070732553, "grad_norm": 1.9941377639770508, "learning_rate": 9.799497966760234e-06, "loss": 0.5825, "step": 9849 }, { "epoch": 0.11776802687741365, "grad_norm": 1.928640604019165, "learning_rate": 9.799443683759873e-06, "loss": 0.5848, "step": 9850 }, { "epoch": 0.1177799830222743, "grad_norm": 2.227750778198242, "learning_rate": 9.799389393562727e-06, "loss": 0.6516, "step": 9851 }, { "epoch": 0.11779193916713494, "grad_norm": 1.9035860300064087, "learning_rate": 9.79933509616888e-06, "loss": 0.5875, "step": 9852 }, { "epoch": 0.1178038953119956, "grad_norm": 1.4588695764541626, "learning_rate": 9.799280791578413e-06, "loss": 0.636, "step": 9853 }, { "epoch": 0.11781585145685625, "grad_norm": 3.4392666816711426, "learning_rate": 9.799226479791408e-06, "loss": 0.6647, "step": 9854 }, { "epoch": 0.1178278076017169, "grad_norm": 5.366650581359863, "learning_rate": 9.799172160807946e-06, "loss": 0.7232, "step": 9855 }, { "epoch": 0.11783976374657755, "grad_norm": 1.7542901039123535, "learning_rate": 9.79911783462811e-06, "loss": 0.6618, "step": 9856 }, { "epoch": 0.11785171989143821, "grad_norm": 1.5247443914413452, "learning_rate": 9.799063501251977e-06, "loss": 0.6001, "step": 9857 }, { "epoch": 0.11786367603629885, "grad_norm": 4.224219799041748, "learning_rate": 9.799009160679631e-06, "loss": 0.6893, "step": 9858 }, { "epoch": 0.11787563218115951, "grad_norm": 2.6864421367645264, "learning_rate": 9.798954812911155e-06, "loss": 0.6, "step": 9859 }, { "epoch": 0.11788758832602016, "grad_norm": 1.8161154985427856, "learning_rate": 9.79890045794663e-06, "loss": 0.6489, "step": 9860 }, { "epoch": 0.11789954447088082, "grad_norm": 1.3491195440292358, "learning_rate": 9.798846095786134e-06, "loss": 0.6601, "step": 9861 }, { "epoch": 0.11791150061574146, "grad_norm": 2.2341160774230957, "learning_rate": 9.798791726429752e-06, "loss": 0.6477, "step": 9862 }, { "epoch": 0.1179234567606021, "grad_norm": 2.255483388900757, "learning_rate": 9.798737349877567e-06, "loss": 0.5953, "step": 9863 }, { "epoch": 0.11793541290546276, "grad_norm": 1.6031848192214966, "learning_rate": 9.798682966129656e-06, "loss": 0.5703, "step": 9864 }, { "epoch": 0.11794736905032341, "grad_norm": 1.867964267730713, "learning_rate": 9.798628575186105e-06, "loss": 0.5515, "step": 9865 }, { "epoch": 0.11795932519518407, "grad_norm": 1.9800220727920532, "learning_rate": 9.798574177046991e-06, "loss": 0.5999, "step": 9866 }, { "epoch": 0.11797128134004471, "grad_norm": 3.8845951557159424, "learning_rate": 9.798519771712399e-06, "loss": 0.6065, "step": 9867 }, { "epoch": 0.11798323748490537, "grad_norm": 1.7858649492263794, "learning_rate": 9.798465359182409e-06, "loss": 0.6969, "step": 9868 }, { "epoch": 0.11799519362976602, "grad_norm": 1.5688729286193848, "learning_rate": 9.798410939457103e-06, "loss": 0.7115, "step": 9869 }, { "epoch": 0.11800714977462667, "grad_norm": 2.13529109954834, "learning_rate": 9.798356512536564e-06, "loss": 0.6675, "step": 9870 }, { "epoch": 0.11801910591948732, "grad_norm": 1.7754215002059937, "learning_rate": 9.798302078420871e-06, "loss": 0.6198, "step": 9871 }, { "epoch": 0.11803106206434798, "grad_norm": 30.511869430541992, "learning_rate": 9.79824763711011e-06, "loss": 0.6542, "step": 9872 }, { "epoch": 0.11804301820920862, "grad_norm": 2.5930280685424805, "learning_rate": 9.798193188604357e-06, "loss": 0.62, "step": 9873 }, { "epoch": 0.11805497435406927, "grad_norm": 2.439178943634033, "learning_rate": 9.798138732903697e-06, "loss": 0.6925, "step": 9874 }, { "epoch": 0.11806693049892993, "grad_norm": 1.7357177734375, "learning_rate": 9.79808427000821e-06, "loss": 0.6579, "step": 9875 }, { "epoch": 0.11807888664379057, "grad_norm": 3.0454189777374268, "learning_rate": 9.798029799917979e-06, "loss": 0.5837, "step": 9876 }, { "epoch": 0.11809084278865123, "grad_norm": 2.474332332611084, "learning_rate": 9.797975322633084e-06, "loss": 0.6822, "step": 9877 }, { "epoch": 0.11810279893351187, "grad_norm": 1.908149242401123, "learning_rate": 9.79792083815361e-06, "loss": 0.5972, "step": 9878 }, { "epoch": 0.11811475507837253, "grad_norm": 1.5623924732208252, "learning_rate": 9.797866346479637e-06, "loss": 0.6571, "step": 9879 }, { "epoch": 0.11812671122323318, "grad_norm": 2.116046190261841, "learning_rate": 9.797811847611245e-06, "loss": 0.5676, "step": 9880 }, { "epoch": 0.11813866736809384, "grad_norm": 4.361329078674316, "learning_rate": 9.797757341548518e-06, "loss": 0.5768, "step": 9881 }, { "epoch": 0.11815062351295448, "grad_norm": 1.4692264795303345, "learning_rate": 9.797702828291535e-06, "loss": 0.6221, "step": 9882 }, { "epoch": 0.11816257965781514, "grad_norm": 2.3288795948028564, "learning_rate": 9.797648307840381e-06, "loss": 0.5822, "step": 9883 }, { "epoch": 0.11817453580267578, "grad_norm": 2.7566449642181396, "learning_rate": 9.797593780195136e-06, "loss": 0.5838, "step": 9884 }, { "epoch": 0.11818649194753644, "grad_norm": 1.6509414911270142, "learning_rate": 9.797539245355882e-06, "loss": 0.7369, "step": 9885 }, { "epoch": 0.11819844809239709, "grad_norm": 1.745222568511963, "learning_rate": 9.797484703322702e-06, "loss": 0.6174, "step": 9886 }, { "epoch": 0.11821040423725773, "grad_norm": 1.5193653106689453, "learning_rate": 9.797430154095675e-06, "loss": 0.6496, "step": 9887 }, { "epoch": 0.11822236038211839, "grad_norm": 1.8677486181259155, "learning_rate": 9.797375597674884e-06, "loss": 0.5885, "step": 9888 }, { "epoch": 0.11823431652697904, "grad_norm": 3.2201592922210693, "learning_rate": 9.797321034060414e-06, "loss": 0.5939, "step": 9889 }, { "epoch": 0.1182462726718397, "grad_norm": 2.4982621669769287, "learning_rate": 9.797266463252341e-06, "loss": 0.5939, "step": 9890 }, { "epoch": 0.11825822881670034, "grad_norm": 26.21946907043457, "learning_rate": 9.797211885250752e-06, "loss": 0.5859, "step": 9891 }, { "epoch": 0.118270184961561, "grad_norm": 2.825493097305298, "learning_rate": 9.797157300055727e-06, "loss": 0.627, "step": 9892 }, { "epoch": 0.11828214110642164, "grad_norm": 2.0808396339416504, "learning_rate": 9.797102707667345e-06, "loss": 0.5743, "step": 9893 }, { "epoch": 0.1182940972512823, "grad_norm": 1.6517200469970703, "learning_rate": 9.797048108085693e-06, "loss": 0.6844, "step": 9894 }, { "epoch": 0.11830605339614295, "grad_norm": 2.8390707969665527, "learning_rate": 9.796993501310848e-06, "loss": 0.5967, "step": 9895 }, { "epoch": 0.1183180095410036, "grad_norm": 1.9290605783462524, "learning_rate": 9.796938887342896e-06, "loss": 0.7159, "step": 9896 }, { "epoch": 0.11832996568586425, "grad_norm": 1.9531457424163818, "learning_rate": 9.796884266181917e-06, "loss": 0.5651, "step": 9897 }, { "epoch": 0.1183419218307249, "grad_norm": 2.8272910118103027, "learning_rate": 9.796829637827993e-06, "loss": 0.6523, "step": 9898 }, { "epoch": 0.11835387797558555, "grad_norm": 2.3638360500335693, "learning_rate": 9.796775002281204e-06, "loss": 0.7221, "step": 9899 }, { "epoch": 0.1183658341204462, "grad_norm": 2.6389966011047363, "learning_rate": 9.796720359541636e-06, "loss": 0.7234, "step": 9900 }, { "epoch": 0.11837779026530686, "grad_norm": 1.632899284362793, "learning_rate": 9.796665709609368e-06, "loss": 0.6241, "step": 9901 }, { "epoch": 0.1183897464101675, "grad_norm": 1.5358814001083374, "learning_rate": 9.796611052484484e-06, "loss": 0.7051, "step": 9902 }, { "epoch": 0.11840170255502816, "grad_norm": 1.6438087224960327, "learning_rate": 9.796556388167063e-06, "loss": 0.5828, "step": 9903 }, { "epoch": 0.1184136586998888, "grad_norm": 6.8106231689453125, "learning_rate": 9.79650171665719e-06, "loss": 0.5372, "step": 9904 }, { "epoch": 0.11842561484474946, "grad_norm": 2.3757119178771973, "learning_rate": 9.796447037954943e-06, "loss": 0.6006, "step": 9905 }, { "epoch": 0.11843757098961011, "grad_norm": 1.5956073999404907, "learning_rate": 9.796392352060408e-06, "loss": 0.6784, "step": 9906 }, { "epoch": 0.11844952713447077, "grad_norm": 7.104597568511963, "learning_rate": 9.796337658973667e-06, "loss": 0.6087, "step": 9907 }, { "epoch": 0.11846148327933141, "grad_norm": 2.117758274078369, "learning_rate": 9.796282958694799e-06, "loss": 0.686, "step": 9908 }, { "epoch": 0.11847343942419207, "grad_norm": 4.61106538772583, "learning_rate": 9.79622825122389e-06, "loss": 0.7116, "step": 9909 }, { "epoch": 0.11848539556905271, "grad_norm": 4.582176208496094, "learning_rate": 9.796173536561017e-06, "loss": 0.5931, "step": 9910 }, { "epoch": 0.11849735171391336, "grad_norm": 3.090940237045288, "learning_rate": 9.796118814706265e-06, "loss": 0.6118, "step": 9911 }, { "epoch": 0.11850930785877402, "grad_norm": 2.3247883319854736, "learning_rate": 9.796064085659718e-06, "loss": 0.6528, "step": 9912 }, { "epoch": 0.11852126400363466, "grad_norm": 1.9889612197875977, "learning_rate": 9.796009349421454e-06, "loss": 0.704, "step": 9913 }, { "epoch": 0.11853322014849532, "grad_norm": 3.408932685852051, "learning_rate": 9.795954605991557e-06, "loss": 0.5914, "step": 9914 }, { "epoch": 0.11854517629335597, "grad_norm": 1.7705105543136597, "learning_rate": 9.795899855370109e-06, "loss": 0.606, "step": 9915 }, { "epoch": 0.11855713243821663, "grad_norm": 1.7987873554229736, "learning_rate": 9.795845097557193e-06, "loss": 0.6362, "step": 9916 }, { "epoch": 0.11856908858307727, "grad_norm": 1.940970540046692, "learning_rate": 9.795790332552888e-06, "loss": 0.6063, "step": 9917 }, { "epoch": 0.11858104472793793, "grad_norm": 2.5334010124206543, "learning_rate": 9.795735560357281e-06, "loss": 0.7244, "step": 9918 }, { "epoch": 0.11859300087279857, "grad_norm": 1.510279655456543, "learning_rate": 9.795680780970451e-06, "loss": 0.5981, "step": 9919 }, { "epoch": 0.11860495701765923, "grad_norm": 2.008246421813965, "learning_rate": 9.795625994392479e-06, "loss": 0.5512, "step": 9920 }, { "epoch": 0.11861691316251988, "grad_norm": 1.9995996952056885, "learning_rate": 9.79557120062345e-06, "loss": 0.5775, "step": 9921 }, { "epoch": 0.11862886930738052, "grad_norm": 2.6718175411224365, "learning_rate": 9.795516399663445e-06, "loss": 0.628, "step": 9922 }, { "epoch": 0.11864082545224118, "grad_norm": 1.5830379724502563, "learning_rate": 9.795461591512546e-06, "loss": 0.5986, "step": 9923 }, { "epoch": 0.11865278159710183, "grad_norm": 1.635178804397583, "learning_rate": 9.795406776170834e-06, "loss": 0.6048, "step": 9924 }, { "epoch": 0.11866473774196248, "grad_norm": 1.7444069385528564, "learning_rate": 9.795351953638395e-06, "loss": 0.6056, "step": 9925 }, { "epoch": 0.11867669388682313, "grad_norm": 4.003472328186035, "learning_rate": 9.795297123915309e-06, "loss": 0.667, "step": 9926 }, { "epoch": 0.11868865003168379, "grad_norm": 2.698976993560791, "learning_rate": 9.795242287001655e-06, "loss": 0.6159, "step": 9927 }, { "epoch": 0.11870060617654443, "grad_norm": 3.6708338260650635, "learning_rate": 9.795187442897521e-06, "loss": 0.6293, "step": 9928 }, { "epoch": 0.11871256232140509, "grad_norm": 16.74312973022461, "learning_rate": 9.795132591602982e-06, "loss": 0.6248, "step": 9929 }, { "epoch": 0.11872451846626574, "grad_norm": 1.9107651710510254, "learning_rate": 9.79507773311813e-06, "loss": 0.6298, "step": 9930 }, { "epoch": 0.1187364746111264, "grad_norm": 1.7469488382339478, "learning_rate": 9.795022867443038e-06, "loss": 0.6338, "step": 9931 }, { "epoch": 0.11874843075598704, "grad_norm": 1.963135838508606, "learning_rate": 9.794967994577792e-06, "loss": 0.7822, "step": 9932 }, { "epoch": 0.1187603869008477, "grad_norm": 2.185763120651245, "learning_rate": 9.794913114522477e-06, "loss": 0.677, "step": 9933 }, { "epoch": 0.11877234304570834, "grad_norm": 1.6601792573928833, "learning_rate": 9.794858227277173e-06, "loss": 0.6583, "step": 9934 }, { "epoch": 0.11878429919056899, "grad_norm": 1.7312761545181274, "learning_rate": 9.79480333284196e-06, "loss": 0.5675, "step": 9935 }, { "epoch": 0.11879625533542965, "grad_norm": 2.0597238540649414, "learning_rate": 9.794748431216922e-06, "loss": 0.6217, "step": 9936 }, { "epoch": 0.11880821148029029, "grad_norm": 1.6097056865692139, "learning_rate": 9.794693522402145e-06, "loss": 0.6226, "step": 9937 }, { "epoch": 0.11882016762515095, "grad_norm": 1.8831685781478882, "learning_rate": 9.794638606397706e-06, "loss": 0.624, "step": 9938 }, { "epoch": 0.1188321237700116, "grad_norm": 1.5205312967300415, "learning_rate": 9.794583683203688e-06, "loss": 0.6251, "step": 9939 }, { "epoch": 0.11884407991487225, "grad_norm": 2.764853000640869, "learning_rate": 9.794528752820175e-06, "loss": 0.7114, "step": 9940 }, { "epoch": 0.1188560360597329, "grad_norm": 2.019794225692749, "learning_rate": 9.794473815247252e-06, "loss": 0.6862, "step": 9941 }, { "epoch": 0.11886799220459356, "grad_norm": 3.506032705307007, "learning_rate": 9.794418870484996e-06, "loss": 0.6418, "step": 9942 }, { "epoch": 0.1188799483494542, "grad_norm": 2.400061845779419, "learning_rate": 9.794363918533493e-06, "loss": 0.7794, "step": 9943 }, { "epoch": 0.11889190449431486, "grad_norm": 1.7864500284194946, "learning_rate": 9.794308959392824e-06, "loss": 0.6755, "step": 9944 }, { "epoch": 0.1189038606391755, "grad_norm": 1.5073020458221436, "learning_rate": 9.794253993063069e-06, "loss": 0.5456, "step": 9945 }, { "epoch": 0.11891581678403615, "grad_norm": 6.567835807800293, "learning_rate": 9.794199019544317e-06, "loss": 0.664, "step": 9946 }, { "epoch": 0.11892777292889681, "grad_norm": 1.629384994506836, "learning_rate": 9.794144038836646e-06, "loss": 0.5813, "step": 9947 }, { "epoch": 0.11893972907375745, "grad_norm": 2.220789670944214, "learning_rate": 9.794089050940136e-06, "loss": 0.5349, "step": 9948 }, { "epoch": 0.11895168521861811, "grad_norm": 2.261251211166382, "learning_rate": 9.794034055854875e-06, "loss": 0.7174, "step": 9949 }, { "epoch": 0.11896364136347876, "grad_norm": 1.4535096883773804, "learning_rate": 9.793979053580942e-06, "loss": 0.5139, "step": 9950 }, { "epoch": 0.11897559750833941, "grad_norm": 4.777159214019775, "learning_rate": 9.793924044118421e-06, "loss": 0.6291, "step": 9951 }, { "epoch": 0.11898755365320006, "grad_norm": 2.9758002758026123, "learning_rate": 9.793869027467393e-06, "loss": 0.5554, "step": 9952 }, { "epoch": 0.11899950979806072, "grad_norm": 1.7980279922485352, "learning_rate": 9.793814003627941e-06, "loss": 0.6992, "step": 9953 }, { "epoch": 0.11901146594292136, "grad_norm": 3.112833023071289, "learning_rate": 9.79375897260015e-06, "loss": 0.6687, "step": 9954 }, { "epoch": 0.11902342208778202, "grad_norm": 2.579681396484375, "learning_rate": 9.793703934384098e-06, "loss": 0.6404, "step": 9955 }, { "epoch": 0.11903537823264267, "grad_norm": 1.8757226467132568, "learning_rate": 9.793648888979872e-06, "loss": 0.6757, "step": 9956 }, { "epoch": 0.11904733437750331, "grad_norm": 1.7991321086883545, "learning_rate": 9.793593836387552e-06, "loss": 0.6371, "step": 9957 }, { "epoch": 0.11905929052236397, "grad_norm": 1.9493024349212646, "learning_rate": 9.79353877660722e-06, "loss": 0.6677, "step": 9958 }, { "epoch": 0.11907124666722461, "grad_norm": 2.7661778926849365, "learning_rate": 9.79348370963896e-06, "loss": 0.6024, "step": 9959 }, { "epoch": 0.11908320281208527, "grad_norm": 2.0272257328033447, "learning_rate": 9.793428635482854e-06, "loss": 0.6657, "step": 9960 }, { "epoch": 0.11909515895694592, "grad_norm": 2.1999030113220215, "learning_rate": 9.793373554138986e-06, "loss": 0.6095, "step": 9961 }, { "epoch": 0.11910711510180658, "grad_norm": 1.605230689048767, "learning_rate": 9.793318465607436e-06, "loss": 0.6693, "step": 9962 }, { "epoch": 0.11911907124666722, "grad_norm": 2.2927708625793457, "learning_rate": 9.793263369888285e-06, "loss": 0.5774, "step": 9963 }, { "epoch": 0.11913102739152788, "grad_norm": 1.6752443313598633, "learning_rate": 9.793208266981624e-06, "loss": 0.6867, "step": 9964 }, { "epoch": 0.11914298353638852, "grad_norm": 1.8663718700408936, "learning_rate": 9.793153156887527e-06, "loss": 0.7054, "step": 9965 }, { "epoch": 0.11915493968124918, "grad_norm": 2.672672986984253, "learning_rate": 9.793098039606082e-06, "loss": 0.6771, "step": 9966 }, { "epoch": 0.11916689582610983, "grad_norm": 2.1729042530059814, "learning_rate": 9.793042915137367e-06, "loss": 0.6633, "step": 9967 }, { "epoch": 0.11917885197097049, "grad_norm": 1.9095649719238281, "learning_rate": 9.792987783481468e-06, "loss": 0.5734, "step": 9968 }, { "epoch": 0.11919080811583113, "grad_norm": 2.6562740802764893, "learning_rate": 9.792932644638466e-06, "loss": 0.6274, "step": 9969 }, { "epoch": 0.11920276426069178, "grad_norm": 1.4797935485839844, "learning_rate": 9.792877498608446e-06, "loss": 0.5569, "step": 9970 }, { "epoch": 0.11921472040555244, "grad_norm": 1.456967830657959, "learning_rate": 9.792822345391488e-06, "loss": 0.6325, "step": 9971 }, { "epoch": 0.11922667655041308, "grad_norm": 1.4719725847244263, "learning_rate": 9.792767184987678e-06, "loss": 0.6237, "step": 9972 }, { "epoch": 0.11923863269527374, "grad_norm": 1.614929437637329, "learning_rate": 9.792712017397094e-06, "loss": 0.6929, "step": 9973 }, { "epoch": 0.11925058884013438, "grad_norm": 2.465122699737549, "learning_rate": 9.792656842619823e-06, "loss": 0.6688, "step": 9974 }, { "epoch": 0.11926254498499504, "grad_norm": 1.6776962280273438, "learning_rate": 9.792601660655945e-06, "loss": 0.5987, "step": 9975 }, { "epoch": 0.11927450112985569, "grad_norm": 2.1426055431365967, "learning_rate": 9.792546471505544e-06, "loss": 0.6298, "step": 9976 }, { "epoch": 0.11928645727471635, "grad_norm": 2.485830783843994, "learning_rate": 9.792491275168702e-06, "loss": 0.698, "step": 9977 }, { "epoch": 0.11929841341957699, "grad_norm": 2.0332441329956055, "learning_rate": 9.792436071645504e-06, "loss": 0.5407, "step": 9978 }, { "epoch": 0.11931036956443765, "grad_norm": 3.533198118209839, "learning_rate": 9.79238086093603e-06, "loss": 0.6127, "step": 9979 }, { "epoch": 0.1193223257092983, "grad_norm": 2.8458142280578613, "learning_rate": 9.792325643040364e-06, "loss": 0.6411, "step": 9980 }, { "epoch": 0.11933428185415894, "grad_norm": 1.8143417835235596, "learning_rate": 9.792270417958588e-06, "loss": 0.6669, "step": 9981 }, { "epoch": 0.1193462379990196, "grad_norm": 12.761820793151855, "learning_rate": 9.792215185690786e-06, "loss": 0.6813, "step": 9982 }, { "epoch": 0.11935819414388024, "grad_norm": 27.930299758911133, "learning_rate": 9.792159946237042e-06, "loss": 0.7351, "step": 9983 }, { "epoch": 0.1193701502887409, "grad_norm": 2.8655998706817627, "learning_rate": 9.792104699597434e-06, "loss": 0.6435, "step": 9984 }, { "epoch": 0.11938210643360155, "grad_norm": 10.761787414550781, "learning_rate": 9.792049445772049e-06, "loss": 0.6796, "step": 9985 }, { "epoch": 0.1193940625784622, "grad_norm": 1.468190312385559, "learning_rate": 9.791994184760968e-06, "loss": 0.5865, "step": 9986 }, { "epoch": 0.11940601872332285, "grad_norm": 4.630648136138916, "learning_rate": 9.791938916564277e-06, "loss": 0.5873, "step": 9987 }, { "epoch": 0.11941797486818351, "grad_norm": 2.726663589477539, "learning_rate": 9.791883641182056e-06, "loss": 0.6146, "step": 9988 }, { "epoch": 0.11942993101304415, "grad_norm": 1.8271862268447876, "learning_rate": 9.791828358614388e-06, "loss": 0.6068, "step": 9989 }, { "epoch": 0.11944188715790481, "grad_norm": 10.171838760375977, "learning_rate": 9.791773068861357e-06, "loss": 0.6211, "step": 9990 }, { "epoch": 0.11945384330276546, "grad_norm": 1.5983556509017944, "learning_rate": 9.791717771923044e-06, "loss": 0.6862, "step": 9991 }, { "epoch": 0.11946579944762611, "grad_norm": 2.289165735244751, "learning_rate": 9.791662467799533e-06, "loss": 0.6421, "step": 9992 }, { "epoch": 0.11947775559248676, "grad_norm": 4.053098678588867, "learning_rate": 9.791607156490908e-06, "loss": 0.6075, "step": 9993 }, { "epoch": 0.1194897117373474, "grad_norm": 2.845947265625, "learning_rate": 9.791551837997251e-06, "loss": 0.6422, "step": 9994 }, { "epoch": 0.11950166788220806, "grad_norm": 1.606744647026062, "learning_rate": 9.791496512318644e-06, "loss": 0.6219, "step": 9995 }, { "epoch": 0.11951362402706871, "grad_norm": 1.695676326751709, "learning_rate": 9.791441179455173e-06, "loss": 0.6827, "step": 9996 }, { "epoch": 0.11952558017192937, "grad_norm": 1.7261358499526978, "learning_rate": 9.791385839406918e-06, "loss": 0.6, "step": 9997 }, { "epoch": 0.11953753631679001, "grad_norm": 1.9449228048324585, "learning_rate": 9.79133049217396e-06, "loss": 0.6592, "step": 9998 }, { "epoch": 0.11954949246165067, "grad_norm": 3.967832326889038, "learning_rate": 9.791275137756389e-06, "loss": 0.5959, "step": 9999 }, { "epoch": 0.11956144860651131, "grad_norm": 1.8324236869812012, "learning_rate": 9.791219776154281e-06, "loss": 0.6115, "step": 10000 }, { "epoch": 0.11957340475137197, "grad_norm": 2.3273868560791016, "learning_rate": 9.791164407367723e-06, "loss": 0.6733, "step": 10001 }, { "epoch": 0.11958536089623262, "grad_norm": 1.6680320501327515, "learning_rate": 9.791109031396799e-06, "loss": 0.7262, "step": 10002 }, { "epoch": 0.11959731704109328, "grad_norm": 1.8684494495391846, "learning_rate": 9.791053648241589e-06, "loss": 0.697, "step": 10003 }, { "epoch": 0.11960927318595392, "grad_norm": 2.612466812133789, "learning_rate": 9.790998257902176e-06, "loss": 0.5456, "step": 10004 }, { "epoch": 0.11962122933081457, "grad_norm": 2.50219988822937, "learning_rate": 9.790942860378642e-06, "loss": 0.6531, "step": 10005 }, { "epoch": 0.11963318547567522, "grad_norm": 2.462672472000122, "learning_rate": 9.790887455671077e-06, "loss": 0.6462, "step": 10006 }, { "epoch": 0.11964514162053587, "grad_norm": 1.7419191598892212, "learning_rate": 9.790832043779554e-06, "loss": 0.5692, "step": 10007 }, { "epoch": 0.11965709776539653, "grad_norm": 1.6947253942489624, "learning_rate": 9.790776624704165e-06, "loss": 0.6491, "step": 10008 }, { "epoch": 0.11966905391025717, "grad_norm": 6.090564727783203, "learning_rate": 9.79072119844499e-06, "loss": 0.6686, "step": 10009 }, { "epoch": 0.11968101005511783, "grad_norm": 2.29640793800354, "learning_rate": 9.79066576500211e-06, "loss": 0.6611, "step": 10010 }, { "epoch": 0.11969296619997848, "grad_norm": 2.4835855960845947, "learning_rate": 9.790610324375609e-06, "loss": 0.6989, "step": 10011 }, { "epoch": 0.11970492234483913, "grad_norm": 2.5056731700897217, "learning_rate": 9.79055487656557e-06, "loss": 0.694, "step": 10012 }, { "epoch": 0.11971687848969978, "grad_norm": 2.4645347595214844, "learning_rate": 9.79049942157208e-06, "loss": 0.5507, "step": 10013 }, { "epoch": 0.11972883463456044, "grad_norm": 2.0422465801239014, "learning_rate": 9.790443959395216e-06, "loss": 0.6868, "step": 10014 }, { "epoch": 0.11974079077942108, "grad_norm": 1.8147549629211426, "learning_rate": 9.790388490035066e-06, "loss": 0.6309, "step": 10015 }, { "epoch": 0.11975274692428173, "grad_norm": 1.7876766920089722, "learning_rate": 9.79033301349171e-06, "loss": 0.696, "step": 10016 }, { "epoch": 0.11976470306914239, "grad_norm": 3.1066248416900635, "learning_rate": 9.790277529765236e-06, "loss": 0.6279, "step": 10017 }, { "epoch": 0.11977665921400303, "grad_norm": 2.5977113246917725, "learning_rate": 9.79022203885572e-06, "loss": 0.6936, "step": 10018 }, { "epoch": 0.11978861535886369, "grad_norm": 2.3549530506134033, "learning_rate": 9.79016654076325e-06, "loss": 0.6611, "step": 10019 }, { "epoch": 0.11980057150372433, "grad_norm": 1.6372123956680298, "learning_rate": 9.79011103548791e-06, "loss": 0.5955, "step": 10020 }, { "epoch": 0.119812527648585, "grad_norm": 2.1319878101348877, "learning_rate": 9.79005552302978e-06, "loss": 0.5194, "step": 10021 }, { "epoch": 0.11982448379344564, "grad_norm": 1.4797418117523193, "learning_rate": 9.790000003388946e-06, "loss": 0.5949, "step": 10022 }, { "epoch": 0.1198364399383063, "grad_norm": 9.20009708404541, "learning_rate": 9.789944476565488e-06, "loss": 0.5384, "step": 10023 }, { "epoch": 0.11984839608316694, "grad_norm": 3.165950059890747, "learning_rate": 9.789888942559491e-06, "loss": 0.5351, "step": 10024 }, { "epoch": 0.1198603522280276, "grad_norm": 5.535952091217041, "learning_rate": 9.78983340137104e-06, "loss": 0.5484, "step": 10025 }, { "epoch": 0.11987230837288824, "grad_norm": 2.9715778827667236, "learning_rate": 9.789777853000218e-06, "loss": 0.6651, "step": 10026 }, { "epoch": 0.1198842645177489, "grad_norm": 6.498599052429199, "learning_rate": 9.789722297447106e-06, "loss": 0.6389, "step": 10027 }, { "epoch": 0.11989622066260955, "grad_norm": 1.7919726371765137, "learning_rate": 9.789666734711787e-06, "loss": 0.6551, "step": 10028 }, { "epoch": 0.1199081768074702, "grad_norm": 1.422921061515808, "learning_rate": 9.789611164794346e-06, "loss": 0.714, "step": 10029 }, { "epoch": 0.11992013295233085, "grad_norm": 5.11022424697876, "learning_rate": 9.789555587694868e-06, "loss": 0.6877, "step": 10030 }, { "epoch": 0.1199320890971915, "grad_norm": 1.6063417196273804, "learning_rate": 9.789500003413433e-06, "loss": 0.5881, "step": 10031 }, { "epoch": 0.11994404524205216, "grad_norm": 1.7695248126983643, "learning_rate": 9.789444411950125e-06, "loss": 0.7268, "step": 10032 }, { "epoch": 0.1199560013869128, "grad_norm": 2.0294692516326904, "learning_rate": 9.789388813305029e-06, "loss": 0.6741, "step": 10033 }, { "epoch": 0.11996795753177346, "grad_norm": 1.6048494577407837, "learning_rate": 9.789333207478228e-06, "loss": 0.6939, "step": 10034 }, { "epoch": 0.1199799136766341, "grad_norm": 1.536004900932312, "learning_rate": 9.789277594469805e-06, "loss": 0.5876, "step": 10035 }, { "epoch": 0.11999186982149476, "grad_norm": 1.5353337526321411, "learning_rate": 9.78922197427984e-06, "loss": 0.5657, "step": 10036 }, { "epoch": 0.1200038259663554, "grad_norm": 1.785265326499939, "learning_rate": 9.789166346908423e-06, "loss": 0.6828, "step": 10037 }, { "epoch": 0.12001578211121607, "grad_norm": 1.8225977420806885, "learning_rate": 9.789110712355633e-06, "loss": 0.6098, "step": 10038 }, { "epoch": 0.12002773825607671, "grad_norm": 2.606274127960205, "learning_rate": 9.789055070621554e-06, "loss": 0.6811, "step": 10039 }, { "epoch": 0.12003969440093736, "grad_norm": 4.851946830749512, "learning_rate": 9.78899942170627e-06, "loss": 0.6376, "step": 10040 }, { "epoch": 0.12005165054579801, "grad_norm": 1.7900140285491943, "learning_rate": 9.788943765609867e-06, "loss": 0.5686, "step": 10041 }, { "epoch": 0.12006360669065866, "grad_norm": 1.8784250020980835, "learning_rate": 9.788888102332423e-06, "loss": 0.6418, "step": 10042 }, { "epoch": 0.12007556283551932, "grad_norm": 3.996488571166992, "learning_rate": 9.788832431874025e-06, "loss": 0.7255, "step": 10043 }, { "epoch": 0.12008751898037996, "grad_norm": 2.397174596786499, "learning_rate": 9.788776754234755e-06, "loss": 0.6208, "step": 10044 }, { "epoch": 0.12009947512524062, "grad_norm": 3.5616912841796875, "learning_rate": 9.788721069414698e-06, "loss": 0.573, "step": 10045 }, { "epoch": 0.12011143127010127, "grad_norm": 1.657454490661621, "learning_rate": 9.788665377413937e-06, "loss": 0.6397, "step": 10046 }, { "epoch": 0.12012338741496192, "grad_norm": 2.3646938800811768, "learning_rate": 9.788609678232555e-06, "loss": 0.5946, "step": 10047 }, { "epoch": 0.12013534355982257, "grad_norm": 2.057647228240967, "learning_rate": 9.788553971870634e-06, "loss": 0.5054, "step": 10048 }, { "epoch": 0.12014729970468323, "grad_norm": 1.2952525615692139, "learning_rate": 9.788498258328261e-06, "loss": 0.5378, "step": 10049 }, { "epoch": 0.12015925584954387, "grad_norm": 2.7694363594055176, "learning_rate": 9.788442537605517e-06, "loss": 0.7891, "step": 10050 }, { "epoch": 0.12017121199440453, "grad_norm": 1.4827470779418945, "learning_rate": 9.788386809702489e-06, "loss": 0.5761, "step": 10051 }, { "epoch": 0.12018316813926518, "grad_norm": 15.163520812988281, "learning_rate": 9.788331074619255e-06, "loss": 0.6179, "step": 10052 }, { "epoch": 0.12019512428412582, "grad_norm": 2.537848949432373, "learning_rate": 9.788275332355904e-06, "loss": 0.7045, "step": 10053 }, { "epoch": 0.12020708042898648, "grad_norm": 1.9722849130630493, "learning_rate": 9.788219582912515e-06, "loss": 0.6183, "step": 10054 }, { "epoch": 0.12021903657384712, "grad_norm": 2.47265887260437, "learning_rate": 9.788163826289174e-06, "loss": 0.607, "step": 10055 }, { "epoch": 0.12023099271870778, "grad_norm": 3.3268795013427734, "learning_rate": 9.788108062485965e-06, "loss": 0.629, "step": 10056 }, { "epoch": 0.12024294886356843, "grad_norm": 1.5271549224853516, "learning_rate": 9.788052291502971e-06, "loss": 0.6856, "step": 10057 }, { "epoch": 0.12025490500842909, "grad_norm": 18.629581451416016, "learning_rate": 9.787996513340275e-06, "loss": 0.6241, "step": 10058 }, { "epoch": 0.12026686115328973, "grad_norm": 3.355563163757324, "learning_rate": 9.787940727997961e-06, "loss": 0.7058, "step": 10059 }, { "epoch": 0.12027881729815039, "grad_norm": 1.427438735961914, "learning_rate": 9.787884935476112e-06, "loss": 0.5834, "step": 10060 }, { "epoch": 0.12029077344301103, "grad_norm": 1.8717572689056396, "learning_rate": 9.787829135774815e-06, "loss": 0.6247, "step": 10061 }, { "epoch": 0.12030272958787169, "grad_norm": 2.088318347930908, "learning_rate": 9.78777332889415e-06, "loss": 0.6256, "step": 10062 }, { "epoch": 0.12031468573273234, "grad_norm": 1.3313045501708984, "learning_rate": 9.787717514834202e-06, "loss": 0.6752, "step": 10063 }, { "epoch": 0.12032664187759298, "grad_norm": 2.561753034591675, "learning_rate": 9.787661693595056e-06, "loss": 0.555, "step": 10064 }, { "epoch": 0.12033859802245364, "grad_norm": 2.1154074668884277, "learning_rate": 9.787605865176792e-06, "loss": 0.657, "step": 10065 }, { "epoch": 0.12035055416731429, "grad_norm": 1.6162300109863281, "learning_rate": 9.787550029579496e-06, "loss": 0.6258, "step": 10066 }, { "epoch": 0.12036251031217494, "grad_norm": 2.3136141300201416, "learning_rate": 9.787494186803253e-06, "loss": 0.5475, "step": 10067 }, { "epoch": 0.12037446645703559, "grad_norm": 4.148295879364014, "learning_rate": 9.787438336848145e-06, "loss": 0.6747, "step": 10068 }, { "epoch": 0.12038642260189625, "grad_norm": 2.4601070880889893, "learning_rate": 9.787382479714256e-06, "loss": 0.7199, "step": 10069 }, { "epoch": 0.12039837874675689, "grad_norm": 2.0040016174316406, "learning_rate": 9.78732661540167e-06, "loss": 0.6611, "step": 10070 }, { "epoch": 0.12041033489161755, "grad_norm": 3.254009962081909, "learning_rate": 9.78727074391047e-06, "loss": 0.7215, "step": 10071 }, { "epoch": 0.1204222910364782, "grad_norm": 2.268467664718628, "learning_rate": 9.787214865240742e-06, "loss": 0.6463, "step": 10072 }, { "epoch": 0.12043424718133885, "grad_norm": 1.8876808881759644, "learning_rate": 9.787158979392569e-06, "loss": 0.5535, "step": 10073 }, { "epoch": 0.1204462033261995, "grad_norm": 1.4949277639389038, "learning_rate": 9.78710308636603e-06, "loss": 0.5834, "step": 10074 }, { "epoch": 0.12045815947106014, "grad_norm": 1.9407562017440796, "learning_rate": 9.787047186161218e-06, "loss": 0.5659, "step": 10075 }, { "epoch": 0.1204701156159208, "grad_norm": 2.493480682373047, "learning_rate": 9.786991278778208e-06, "loss": 0.6381, "step": 10076 }, { "epoch": 0.12048207176078145, "grad_norm": 1.8934992551803589, "learning_rate": 9.78693536421709e-06, "loss": 0.6517, "step": 10077 }, { "epoch": 0.1204940279056421, "grad_norm": 1.6339550018310547, "learning_rate": 9.786879442477943e-06, "loss": 0.583, "step": 10078 }, { "epoch": 0.12050598405050275, "grad_norm": 1.615662932395935, "learning_rate": 9.786823513560855e-06, "loss": 0.6004, "step": 10079 }, { "epoch": 0.12051794019536341, "grad_norm": 2.15263295173645, "learning_rate": 9.786767577465908e-06, "loss": 0.66, "step": 10080 }, { "epoch": 0.12052989634022405, "grad_norm": 1.7339560985565186, "learning_rate": 9.786711634193184e-06, "loss": 0.6011, "step": 10081 }, { "epoch": 0.12054185248508471, "grad_norm": 2.590611219406128, "learning_rate": 9.786655683742772e-06, "loss": 0.5915, "step": 10082 }, { "epoch": 0.12055380862994536, "grad_norm": 2.232562303543091, "learning_rate": 9.786599726114751e-06, "loss": 0.6597, "step": 10083 }, { "epoch": 0.12056576477480602, "grad_norm": 2.5175528526306152, "learning_rate": 9.786543761309206e-06, "loss": 0.5407, "step": 10084 }, { "epoch": 0.12057772091966666, "grad_norm": 2.828003406524658, "learning_rate": 9.786487789326224e-06, "loss": 0.5705, "step": 10085 }, { "epoch": 0.12058967706452732, "grad_norm": 1.8471894264221191, "learning_rate": 9.786431810165885e-06, "loss": 0.4812, "step": 10086 }, { "epoch": 0.12060163320938797, "grad_norm": 2.6397511959075928, "learning_rate": 9.786375823828275e-06, "loss": 0.6654, "step": 10087 }, { "epoch": 0.12061358935424861, "grad_norm": 2.125663995742798, "learning_rate": 9.786319830313476e-06, "loss": 0.5397, "step": 10088 }, { "epoch": 0.12062554549910927, "grad_norm": 3.32019305229187, "learning_rate": 9.786263829621575e-06, "loss": 0.5386, "step": 10089 }, { "epoch": 0.12063750164396991, "grad_norm": 2.4634196758270264, "learning_rate": 9.786207821752653e-06, "loss": 0.5947, "step": 10090 }, { "epoch": 0.12064945778883057, "grad_norm": 1.6150089502334595, "learning_rate": 9.786151806706797e-06, "loss": 0.6918, "step": 10091 }, { "epoch": 0.12066141393369122, "grad_norm": 1.959808588027954, "learning_rate": 9.786095784484088e-06, "loss": 0.607, "step": 10092 }, { "epoch": 0.12067337007855188, "grad_norm": 1.6297098398208618, "learning_rate": 9.786039755084613e-06, "loss": 0.5557, "step": 10093 }, { "epoch": 0.12068532622341252, "grad_norm": 2.3982093334198, "learning_rate": 9.785983718508453e-06, "loss": 0.6781, "step": 10094 }, { "epoch": 0.12069728236827318, "grad_norm": 2.119077205657959, "learning_rate": 9.785927674755695e-06, "loss": 0.6487, "step": 10095 }, { "epoch": 0.12070923851313382, "grad_norm": 2.0264339447021484, "learning_rate": 9.78587162382642e-06, "loss": 0.6532, "step": 10096 }, { "epoch": 0.12072119465799448, "grad_norm": 1.89907968044281, "learning_rate": 9.785815565720713e-06, "loss": 0.5268, "step": 10097 }, { "epoch": 0.12073315080285513, "grad_norm": 3.0335452556610107, "learning_rate": 9.78575950043866e-06, "loss": 0.625, "step": 10098 }, { "epoch": 0.12074510694771577, "grad_norm": 1.477772831916809, "learning_rate": 9.785703427980342e-06, "loss": 0.5521, "step": 10099 }, { "epoch": 0.12075706309257643, "grad_norm": 1.778596043586731, "learning_rate": 9.785647348345845e-06, "loss": 0.5889, "step": 10100 }, { "epoch": 0.12076901923743708, "grad_norm": 1.8689815998077393, "learning_rate": 9.785591261535255e-06, "loss": 0.6258, "step": 10101 }, { "epoch": 0.12078097538229773, "grad_norm": 2.417126417160034, "learning_rate": 9.78553516754865e-06, "loss": 0.6696, "step": 10102 }, { "epoch": 0.12079293152715838, "grad_norm": 1.9614157676696777, "learning_rate": 9.785479066386121e-06, "loss": 0.6111, "step": 10103 }, { "epoch": 0.12080488767201904, "grad_norm": 3.386664390563965, "learning_rate": 9.785422958047748e-06, "loss": 0.6334, "step": 10104 }, { "epoch": 0.12081684381687968, "grad_norm": 1.7407556772232056, "learning_rate": 9.785366842533617e-06, "loss": 0.5448, "step": 10105 }, { "epoch": 0.12082879996174034, "grad_norm": 1.4605985879898071, "learning_rate": 9.785310719843811e-06, "loss": 0.5858, "step": 10106 }, { "epoch": 0.12084075610660099, "grad_norm": 2.0955076217651367, "learning_rate": 9.785254589978414e-06, "loss": 0.5861, "step": 10107 }, { "epoch": 0.12085271225146164, "grad_norm": 4.172842979431152, "learning_rate": 9.785198452937513e-06, "loss": 0.6334, "step": 10108 }, { "epoch": 0.12086466839632229, "grad_norm": 3.33160400390625, "learning_rate": 9.785142308721187e-06, "loss": 0.6333, "step": 10109 }, { "epoch": 0.12087662454118295, "grad_norm": 1.8573083877563477, "learning_rate": 9.785086157329525e-06, "loss": 0.593, "step": 10110 }, { "epoch": 0.12088858068604359, "grad_norm": 1.3579485416412354, "learning_rate": 9.78502999876261e-06, "loss": 0.6069, "step": 10111 }, { "epoch": 0.12090053683090424, "grad_norm": 11.139016151428223, "learning_rate": 9.784973833020522e-06, "loss": 0.7399, "step": 10112 }, { "epoch": 0.1209124929757649, "grad_norm": 3.3482182025909424, "learning_rate": 9.784917660103352e-06, "loss": 0.6044, "step": 10113 }, { "epoch": 0.12092444912062554, "grad_norm": 2.298992395401001, "learning_rate": 9.78486148001118e-06, "loss": 0.7046, "step": 10114 }, { "epoch": 0.1209364052654862, "grad_norm": 1.747207522392273, "learning_rate": 9.78480529274409e-06, "loss": 0.6686, "step": 10115 }, { "epoch": 0.12094836141034684, "grad_norm": 2.5914766788482666, "learning_rate": 9.784749098302169e-06, "loss": 0.6215, "step": 10116 }, { "epoch": 0.1209603175552075, "grad_norm": 3.095956563949585, "learning_rate": 9.7846928966855e-06, "loss": 0.6551, "step": 10117 }, { "epoch": 0.12097227370006815, "grad_norm": 1.6087785959243774, "learning_rate": 9.784636687894165e-06, "loss": 0.7739, "step": 10118 }, { "epoch": 0.1209842298449288, "grad_norm": 3.2742128372192383, "learning_rate": 9.784580471928252e-06, "loss": 0.6343, "step": 10119 }, { "epoch": 0.12099618598978945, "grad_norm": 1.9332809448242188, "learning_rate": 9.784524248787844e-06, "loss": 0.5754, "step": 10120 }, { "epoch": 0.12100814213465011, "grad_norm": 1.7693021297454834, "learning_rate": 9.784468018473023e-06, "loss": 0.6739, "step": 10121 }, { "epoch": 0.12102009827951075, "grad_norm": 1.9590743780136108, "learning_rate": 9.784411780983875e-06, "loss": 0.6691, "step": 10122 }, { "epoch": 0.1210320544243714, "grad_norm": 4.0926055908203125, "learning_rate": 9.784355536320487e-06, "loss": 0.6389, "step": 10123 }, { "epoch": 0.12104401056923206, "grad_norm": 3.9503746032714844, "learning_rate": 9.784299284482938e-06, "loss": 0.6364, "step": 10124 }, { "epoch": 0.1210559667140927, "grad_norm": 1.3920514583587646, "learning_rate": 9.784243025471319e-06, "loss": 0.6604, "step": 10125 }, { "epoch": 0.12106792285895336, "grad_norm": 1.6376619338989258, "learning_rate": 9.784186759285708e-06, "loss": 0.7567, "step": 10126 }, { "epoch": 0.121079879003814, "grad_norm": 2.230164051055908, "learning_rate": 9.78413048592619e-06, "loss": 0.5635, "step": 10127 }, { "epoch": 0.12109183514867466, "grad_norm": 1.7923767566680908, "learning_rate": 9.784074205392854e-06, "loss": 0.6088, "step": 10128 }, { "epoch": 0.12110379129353531, "grad_norm": 2.88588285446167, "learning_rate": 9.784017917685781e-06, "loss": 0.6302, "step": 10129 }, { "epoch": 0.12111574743839597, "grad_norm": 1.7583004236221313, "learning_rate": 9.783961622805059e-06, "loss": 0.575, "step": 10130 }, { "epoch": 0.12112770358325661, "grad_norm": 1.9406545162200928, "learning_rate": 9.783905320750765e-06, "loss": 0.6306, "step": 10131 }, { "epoch": 0.12113965972811727, "grad_norm": 2.512287139892578, "learning_rate": 9.78384901152299e-06, "loss": 0.7367, "step": 10132 }, { "epoch": 0.12115161587297792, "grad_norm": 2.252845287322998, "learning_rate": 9.783792695121816e-06, "loss": 0.7223, "step": 10133 }, { "epoch": 0.12116357201783858, "grad_norm": 4.534407138824463, "learning_rate": 9.78373637154733e-06, "loss": 0.6065, "step": 10134 }, { "epoch": 0.12117552816269922, "grad_norm": 2.043168783187866, "learning_rate": 9.783680040799613e-06, "loss": 0.642, "step": 10135 }, { "epoch": 0.12118748430755986, "grad_norm": 1.5310066938400269, "learning_rate": 9.78362370287875e-06, "loss": 0.6567, "step": 10136 }, { "epoch": 0.12119944045242052, "grad_norm": 1.6254979372024536, "learning_rate": 9.783567357784827e-06, "loss": 0.6035, "step": 10137 }, { "epoch": 0.12121139659728117, "grad_norm": 2.8763349056243896, "learning_rate": 9.783511005517926e-06, "loss": 0.6422, "step": 10138 }, { "epoch": 0.12122335274214183, "grad_norm": 2.1777548789978027, "learning_rate": 9.783454646078134e-06, "loss": 0.6301, "step": 10139 }, { "epoch": 0.12123530888700247, "grad_norm": 1.709248423576355, "learning_rate": 9.783398279465537e-06, "loss": 0.6424, "step": 10140 }, { "epoch": 0.12124726503186313, "grad_norm": 2.2125184535980225, "learning_rate": 9.783341905680215e-06, "loss": 0.6612, "step": 10141 }, { "epoch": 0.12125922117672377, "grad_norm": 1.779623031616211, "learning_rate": 9.783285524722254e-06, "loss": 0.7257, "step": 10142 }, { "epoch": 0.12127117732158443, "grad_norm": 1.4851481914520264, "learning_rate": 9.783229136591743e-06, "loss": 0.6242, "step": 10143 }, { "epoch": 0.12128313346644508, "grad_norm": 2.549924373626709, "learning_rate": 9.78317274128876e-06, "loss": 0.5743, "step": 10144 }, { "epoch": 0.12129508961130574, "grad_norm": 4.317471027374268, "learning_rate": 9.783116338813392e-06, "loss": 0.5993, "step": 10145 }, { "epoch": 0.12130704575616638, "grad_norm": 1.3633955717086792, "learning_rate": 9.783059929165726e-06, "loss": 0.6587, "step": 10146 }, { "epoch": 0.12131900190102703, "grad_norm": 3.8114216327667236, "learning_rate": 9.783003512345843e-06, "loss": 0.6296, "step": 10147 }, { "epoch": 0.12133095804588769, "grad_norm": 2.919565200805664, "learning_rate": 9.782947088353832e-06, "loss": 0.5186, "step": 10148 }, { "epoch": 0.12134291419074833, "grad_norm": 3.86482572555542, "learning_rate": 9.782890657189772e-06, "loss": 0.8118, "step": 10149 }, { "epoch": 0.12135487033560899, "grad_norm": 1.4435087442398071, "learning_rate": 9.78283421885375e-06, "loss": 0.6312, "step": 10150 }, { "epoch": 0.12136682648046963, "grad_norm": 8.957514762878418, "learning_rate": 9.782777773345852e-06, "loss": 0.6697, "step": 10151 }, { "epoch": 0.12137878262533029, "grad_norm": 1.451817274093628, "learning_rate": 9.782721320666165e-06, "loss": 0.4904, "step": 10152 }, { "epoch": 0.12139073877019094, "grad_norm": 6.957592487335205, "learning_rate": 9.782664860814766e-06, "loss": 0.6566, "step": 10153 }, { "epoch": 0.1214026949150516, "grad_norm": 3.1980199813842773, "learning_rate": 9.782608393791747e-06, "loss": 0.4852, "step": 10154 }, { "epoch": 0.12141465105991224, "grad_norm": 7.657463550567627, "learning_rate": 9.782551919597187e-06, "loss": 0.6456, "step": 10155 }, { "epoch": 0.1214266072047729, "grad_norm": 1.875402569770813, "learning_rate": 9.782495438231174e-06, "loss": 0.6089, "step": 10156 }, { "epoch": 0.12143856334963354, "grad_norm": 1.8044052124023438, "learning_rate": 9.782438949693793e-06, "loss": 0.6704, "step": 10157 }, { "epoch": 0.12145051949449419, "grad_norm": 2.3712897300720215, "learning_rate": 9.782382453985128e-06, "loss": 0.6366, "step": 10158 }, { "epoch": 0.12146247563935485, "grad_norm": 1.7586686611175537, "learning_rate": 9.782325951105263e-06, "loss": 0.7111, "step": 10159 }, { "epoch": 0.12147443178421549, "grad_norm": 2.7304203510284424, "learning_rate": 9.782269441054284e-06, "loss": 0.5498, "step": 10160 }, { "epoch": 0.12148638792907615, "grad_norm": 1.9086201190948486, "learning_rate": 9.782212923832273e-06, "loss": 0.5825, "step": 10161 }, { "epoch": 0.1214983440739368, "grad_norm": 1.6212375164031982, "learning_rate": 9.782156399439319e-06, "loss": 0.5715, "step": 10162 }, { "epoch": 0.12151030021879745, "grad_norm": 1.8923012018203735, "learning_rate": 9.782099867875504e-06, "loss": 0.6798, "step": 10163 }, { "epoch": 0.1215222563636581, "grad_norm": 2.3499836921691895, "learning_rate": 9.782043329140911e-06, "loss": 0.6414, "step": 10164 }, { "epoch": 0.12153421250851876, "grad_norm": 1.528943419456482, "learning_rate": 9.781986783235629e-06, "loss": 0.6696, "step": 10165 }, { "epoch": 0.1215461686533794, "grad_norm": 2.059765100479126, "learning_rate": 9.781930230159742e-06, "loss": 0.6528, "step": 10166 }, { "epoch": 0.12155812479824006, "grad_norm": 2.4291799068450928, "learning_rate": 9.781873669913331e-06, "loss": 0.743, "step": 10167 }, { "epoch": 0.1215700809431007, "grad_norm": 2.0385375022888184, "learning_rate": 9.781817102496485e-06, "loss": 0.6727, "step": 10168 }, { "epoch": 0.12158203708796136, "grad_norm": 2.639988660812378, "learning_rate": 9.781760527909287e-06, "loss": 0.6707, "step": 10169 }, { "epoch": 0.12159399323282201, "grad_norm": 2.971503257751465, "learning_rate": 9.781703946151821e-06, "loss": 0.6294, "step": 10170 }, { "epoch": 0.12160594937768265, "grad_norm": 2.293793201446533, "learning_rate": 9.781647357224175e-06, "loss": 0.6482, "step": 10171 }, { "epoch": 0.12161790552254331, "grad_norm": 4.116691589355469, "learning_rate": 9.781590761126431e-06, "loss": 0.6284, "step": 10172 }, { "epoch": 0.12162986166740396, "grad_norm": 1.7061748504638672, "learning_rate": 9.781534157858675e-06, "loss": 0.623, "step": 10173 }, { "epoch": 0.12164181781226462, "grad_norm": 2.045443296432495, "learning_rate": 9.78147754742099e-06, "loss": 0.6504, "step": 10174 }, { "epoch": 0.12165377395712526, "grad_norm": 2.191903591156006, "learning_rate": 9.781420929813463e-06, "loss": 0.6425, "step": 10175 }, { "epoch": 0.12166573010198592, "grad_norm": 2.877938985824585, "learning_rate": 9.781364305036179e-06, "loss": 0.6345, "step": 10176 }, { "epoch": 0.12167768624684656, "grad_norm": 4.720987319946289, "learning_rate": 9.781307673089222e-06, "loss": 0.6887, "step": 10177 }, { "epoch": 0.12168964239170722, "grad_norm": 1.8935810327529907, "learning_rate": 9.781251033972675e-06, "loss": 0.5746, "step": 10178 }, { "epoch": 0.12170159853656787, "grad_norm": 1.7037512063980103, "learning_rate": 9.781194387686628e-06, "loss": 0.62, "step": 10179 }, { "epoch": 0.12171355468142853, "grad_norm": 2.375027656555176, "learning_rate": 9.781137734231164e-06, "loss": 0.62, "step": 10180 }, { "epoch": 0.12172551082628917, "grad_norm": 1.6103030443191528, "learning_rate": 9.781081073606365e-06, "loss": 0.7067, "step": 10181 }, { "epoch": 0.12173746697114982, "grad_norm": 1.7745556831359863, "learning_rate": 9.781024405812317e-06, "loss": 0.6102, "step": 10182 }, { "epoch": 0.12174942311601047, "grad_norm": 2.161591053009033, "learning_rate": 9.780967730849107e-06, "loss": 0.5226, "step": 10183 }, { "epoch": 0.12176137926087112, "grad_norm": 1.6048023700714111, "learning_rate": 9.780911048716819e-06, "loss": 0.6332, "step": 10184 }, { "epoch": 0.12177333540573178, "grad_norm": 1.7612210512161255, "learning_rate": 9.780854359415538e-06, "loss": 0.615, "step": 10185 }, { "epoch": 0.12178529155059242, "grad_norm": 4.545188903808594, "learning_rate": 9.780797662945349e-06, "loss": 0.5425, "step": 10186 }, { "epoch": 0.12179724769545308, "grad_norm": 1.8858574628829956, "learning_rate": 9.780740959306336e-06, "loss": 0.6108, "step": 10187 }, { "epoch": 0.12180920384031373, "grad_norm": 2.901719808578491, "learning_rate": 9.780684248498586e-06, "loss": 0.5141, "step": 10188 }, { "epoch": 0.12182115998517438, "grad_norm": 4.94465446472168, "learning_rate": 9.780627530522183e-06, "loss": 0.6031, "step": 10189 }, { "epoch": 0.12183311613003503, "grad_norm": 1.718333125114441, "learning_rate": 9.78057080537721e-06, "loss": 0.5962, "step": 10190 }, { "epoch": 0.12184507227489569, "grad_norm": 1.287495732307434, "learning_rate": 9.780514073063757e-06, "loss": 0.6331, "step": 10191 }, { "epoch": 0.12185702841975633, "grad_norm": 2.6070022583007812, "learning_rate": 9.780457333581903e-06, "loss": 0.6295, "step": 10192 }, { "epoch": 0.12186898456461699, "grad_norm": 1.9598627090454102, "learning_rate": 9.780400586931739e-06, "loss": 0.6653, "step": 10193 }, { "epoch": 0.12188094070947764, "grad_norm": 1.6082206964492798, "learning_rate": 9.780343833113346e-06, "loss": 0.71, "step": 10194 }, { "epoch": 0.12189289685433828, "grad_norm": 1.8673374652862549, "learning_rate": 9.78028707212681e-06, "loss": 0.5741, "step": 10195 }, { "epoch": 0.12190485299919894, "grad_norm": 3.25985050201416, "learning_rate": 9.780230303972217e-06, "loss": 0.6453, "step": 10196 }, { "epoch": 0.12191680914405958, "grad_norm": 2.1425821781158447, "learning_rate": 9.780173528649652e-06, "loss": 0.6197, "step": 10197 }, { "epoch": 0.12192876528892024, "grad_norm": 1.7935303449630737, "learning_rate": 9.7801167461592e-06, "loss": 0.5935, "step": 10198 }, { "epoch": 0.12194072143378089, "grad_norm": 2.3220064640045166, "learning_rate": 9.780059956500947e-06, "loss": 0.71, "step": 10199 }, { "epoch": 0.12195267757864155, "grad_norm": 1.3764969110488892, "learning_rate": 9.780003159674975e-06, "loss": 0.618, "step": 10200 }, { "epoch": 0.12196463372350219, "grad_norm": 1.5590083599090576, "learning_rate": 9.779946355681373e-06, "loss": 0.6502, "step": 10201 }, { "epoch": 0.12197658986836285, "grad_norm": 1.4500900506973267, "learning_rate": 9.779889544520222e-06, "loss": 0.5969, "step": 10202 }, { "epoch": 0.1219885460132235, "grad_norm": 1.56090247631073, "learning_rate": 9.779832726191612e-06, "loss": 0.6275, "step": 10203 }, { "epoch": 0.12200050215808415, "grad_norm": 2.5142805576324463, "learning_rate": 9.779775900695624e-06, "loss": 0.5523, "step": 10204 }, { "epoch": 0.1220124583029448, "grad_norm": 1.755753755569458, "learning_rate": 9.779719068032347e-06, "loss": 0.6062, "step": 10205 }, { "epoch": 0.12202441444780544, "grad_norm": 2.6396379470825195, "learning_rate": 9.779662228201864e-06, "loss": 0.6823, "step": 10206 }, { "epoch": 0.1220363705926661, "grad_norm": 18.965848922729492, "learning_rate": 9.77960538120426e-06, "loss": 0.6953, "step": 10207 }, { "epoch": 0.12204832673752675, "grad_norm": 5.43349027633667, "learning_rate": 9.779548527039619e-06, "loss": 0.6988, "step": 10208 }, { "epoch": 0.1220602828823874, "grad_norm": 2.019899606704712, "learning_rate": 9.77949166570803e-06, "loss": 0.6194, "step": 10209 }, { "epoch": 0.12207223902724805, "grad_norm": 2.377228021621704, "learning_rate": 9.779434797209576e-06, "loss": 0.6815, "step": 10210 }, { "epoch": 0.12208419517210871, "grad_norm": 1.9794453382492065, "learning_rate": 9.779377921544342e-06, "loss": 0.779, "step": 10211 }, { "epoch": 0.12209615131696935, "grad_norm": 1.5978801250457764, "learning_rate": 9.779321038712415e-06, "loss": 0.6364, "step": 10212 }, { "epoch": 0.12210810746183001, "grad_norm": 2.678226947784424, "learning_rate": 9.779264148713879e-06, "loss": 0.6829, "step": 10213 }, { "epoch": 0.12212006360669066, "grad_norm": 1.9716390371322632, "learning_rate": 9.779207251548816e-06, "loss": 0.5786, "step": 10214 }, { "epoch": 0.12213201975155132, "grad_norm": 2.50612735748291, "learning_rate": 9.779150347217319e-06, "loss": 0.5662, "step": 10215 }, { "epoch": 0.12214397589641196, "grad_norm": 3.2699310779571533, "learning_rate": 9.779093435719467e-06, "loss": 0.6206, "step": 10216 }, { "epoch": 0.1221559320412726, "grad_norm": 1.854824423789978, "learning_rate": 9.779036517055348e-06, "loss": 0.615, "step": 10217 }, { "epoch": 0.12216788818613326, "grad_norm": 3.8991501331329346, "learning_rate": 9.778979591225047e-06, "loss": 0.7136, "step": 10218 }, { "epoch": 0.12217984433099391, "grad_norm": 1.969115138053894, "learning_rate": 9.778922658228648e-06, "loss": 0.6906, "step": 10219 }, { "epoch": 0.12219180047585457, "grad_norm": 1.9995005130767822, "learning_rate": 9.77886571806624e-06, "loss": 0.6162, "step": 10220 }, { "epoch": 0.12220375662071521, "grad_norm": 5.227531909942627, "learning_rate": 9.778808770737903e-06, "loss": 0.683, "step": 10221 }, { "epoch": 0.12221571276557587, "grad_norm": 1.7837313413619995, "learning_rate": 9.778751816243726e-06, "loss": 0.6536, "step": 10222 }, { "epoch": 0.12222766891043652, "grad_norm": 1.9643266201019287, "learning_rate": 9.778694854583794e-06, "loss": 0.7156, "step": 10223 }, { "epoch": 0.12223962505529717, "grad_norm": 4.677948951721191, "learning_rate": 9.778637885758194e-06, "loss": 0.6118, "step": 10224 }, { "epoch": 0.12225158120015782, "grad_norm": 1.6052640676498413, "learning_rate": 9.778580909767006e-06, "loss": 0.6458, "step": 10225 }, { "epoch": 0.12226353734501848, "grad_norm": 2.6261141300201416, "learning_rate": 9.77852392661032e-06, "loss": 0.6087, "step": 10226 }, { "epoch": 0.12227549348987912, "grad_norm": 1.9752790927886963, "learning_rate": 9.778466936288222e-06, "loss": 0.5518, "step": 10227 }, { "epoch": 0.12228744963473978, "grad_norm": 2.190981149673462, "learning_rate": 9.778409938800794e-06, "loss": 0.7196, "step": 10228 }, { "epoch": 0.12229940577960043, "grad_norm": 1.8409905433654785, "learning_rate": 9.778352934148124e-06, "loss": 0.6695, "step": 10229 }, { "epoch": 0.12231136192446107, "grad_norm": 1.7231147289276123, "learning_rate": 9.778295922330297e-06, "loss": 0.5952, "step": 10230 }, { "epoch": 0.12232331806932173, "grad_norm": 2.1441385746002197, "learning_rate": 9.778238903347399e-06, "loss": 0.7238, "step": 10231 }, { "epoch": 0.12233527421418237, "grad_norm": 4.3475542068481445, "learning_rate": 9.778181877199513e-06, "loss": 0.6173, "step": 10232 }, { "epoch": 0.12234723035904303, "grad_norm": 4.046888828277588, "learning_rate": 9.778124843886727e-06, "loss": 0.6229, "step": 10233 }, { "epoch": 0.12235918650390368, "grad_norm": 1.40402090549469, "learning_rate": 9.778067803409128e-06, "loss": 0.5749, "step": 10234 }, { "epoch": 0.12237114264876434, "grad_norm": 1.82724928855896, "learning_rate": 9.778010755766796e-06, "loss": 0.5681, "step": 10235 }, { "epoch": 0.12238309879362498, "grad_norm": 4.757280349731445, "learning_rate": 9.777953700959822e-06, "loss": 0.5876, "step": 10236 }, { "epoch": 0.12239505493848564, "grad_norm": 2.3675332069396973, "learning_rate": 9.777896638988288e-06, "loss": 0.7137, "step": 10237 }, { "epoch": 0.12240701108334628, "grad_norm": 2.2181689739227295, "learning_rate": 9.777839569852283e-06, "loss": 0.6397, "step": 10238 }, { "epoch": 0.12241896722820694, "grad_norm": 2.5750248432159424, "learning_rate": 9.777782493551889e-06, "loss": 0.6155, "step": 10239 }, { "epoch": 0.12243092337306759, "grad_norm": 1.685733675956726, "learning_rate": 9.777725410087193e-06, "loss": 0.6798, "step": 10240 }, { "epoch": 0.12244287951792823, "grad_norm": 2.30293869972229, "learning_rate": 9.777668319458282e-06, "loss": 0.7042, "step": 10241 }, { "epoch": 0.12245483566278889, "grad_norm": 4.3368353843688965, "learning_rate": 9.777611221665239e-06, "loss": 0.619, "step": 10242 }, { "epoch": 0.12246679180764954, "grad_norm": 2.0291690826416016, "learning_rate": 9.777554116708152e-06, "loss": 0.5148, "step": 10243 }, { "epoch": 0.1224787479525102, "grad_norm": 1.8493640422821045, "learning_rate": 9.777497004587104e-06, "loss": 0.6112, "step": 10244 }, { "epoch": 0.12249070409737084, "grad_norm": 1.8086658716201782, "learning_rate": 9.777439885302184e-06, "loss": 0.5904, "step": 10245 }, { "epoch": 0.1225026602422315, "grad_norm": 1.7197860479354858, "learning_rate": 9.777382758853475e-06, "loss": 0.6015, "step": 10246 }, { "epoch": 0.12251461638709214, "grad_norm": 2.465226173400879, "learning_rate": 9.777325625241065e-06, "loss": 0.6219, "step": 10247 }, { "epoch": 0.1225265725319528, "grad_norm": 2.141132354736328, "learning_rate": 9.777268484465037e-06, "loss": 0.661, "step": 10248 }, { "epoch": 0.12253852867681345, "grad_norm": 1.8144123554229736, "learning_rate": 9.777211336525479e-06, "loss": 0.5996, "step": 10249 }, { "epoch": 0.1225504848216741, "grad_norm": 2.6103742122650146, "learning_rate": 9.777154181422473e-06, "loss": 0.6811, "step": 10250 }, { "epoch": 0.12256244096653475, "grad_norm": 3.3755898475646973, "learning_rate": 9.77709701915611e-06, "loss": 0.5483, "step": 10251 }, { "epoch": 0.12257439711139541, "grad_norm": 1.6372170448303223, "learning_rate": 9.777039849726471e-06, "loss": 0.6492, "step": 10252 }, { "epoch": 0.12258635325625605, "grad_norm": 1.913658618927002, "learning_rate": 9.776982673133646e-06, "loss": 0.5198, "step": 10253 }, { "epoch": 0.1225983094011167, "grad_norm": 1.6934502124786377, "learning_rate": 9.776925489377718e-06, "loss": 0.6163, "step": 10254 }, { "epoch": 0.12261026554597736, "grad_norm": 7.760000228881836, "learning_rate": 9.776868298458773e-06, "loss": 0.674, "step": 10255 }, { "epoch": 0.122622221690838, "grad_norm": 2.188828945159912, "learning_rate": 9.776811100376895e-06, "loss": 0.6942, "step": 10256 }, { "epoch": 0.12263417783569866, "grad_norm": 51.468929290771484, "learning_rate": 9.776753895132175e-06, "loss": 0.7411, "step": 10257 }, { "epoch": 0.1226461339805593, "grad_norm": 5.468079090118408, "learning_rate": 9.776696682724693e-06, "loss": 0.6276, "step": 10258 }, { "epoch": 0.12265809012541996, "grad_norm": 1.3494040966033936, "learning_rate": 9.776639463154538e-06, "loss": 0.5415, "step": 10259 }, { "epoch": 0.12267004627028061, "grad_norm": 2.7570343017578125, "learning_rate": 9.776582236421796e-06, "loss": 0.6495, "step": 10260 }, { "epoch": 0.12268200241514127, "grad_norm": 2.33180570602417, "learning_rate": 9.77652500252655e-06, "loss": 0.6809, "step": 10261 }, { "epoch": 0.12269395856000191, "grad_norm": 1.8441884517669678, "learning_rate": 9.776467761468891e-06, "loss": 0.5788, "step": 10262 }, { "epoch": 0.12270591470486257, "grad_norm": 2.942655324935913, "learning_rate": 9.7764105132489e-06, "loss": 0.5987, "step": 10263 }, { "epoch": 0.12271787084972322, "grad_norm": 2.738664388656616, "learning_rate": 9.776353257866665e-06, "loss": 0.6765, "step": 10264 }, { "epoch": 0.12272982699458386, "grad_norm": 2.3197531700134277, "learning_rate": 9.77629599532227e-06, "loss": 0.5999, "step": 10265 }, { "epoch": 0.12274178313944452, "grad_norm": 1.6015933752059937, "learning_rate": 9.776238725615803e-06, "loss": 0.6729, "step": 10266 }, { "epoch": 0.12275373928430516, "grad_norm": 1.8710979223251343, "learning_rate": 9.776181448747348e-06, "loss": 0.564, "step": 10267 }, { "epoch": 0.12276569542916582, "grad_norm": 4.5490264892578125, "learning_rate": 9.776124164716992e-06, "loss": 0.566, "step": 10268 }, { "epoch": 0.12277765157402647, "grad_norm": 2.0957889556884766, "learning_rate": 9.776066873524822e-06, "loss": 0.5207, "step": 10269 }, { "epoch": 0.12278960771888713, "grad_norm": 1.367124319076538, "learning_rate": 9.776009575170923e-06, "loss": 0.5965, "step": 10270 }, { "epoch": 0.12280156386374777, "grad_norm": 4.061791896820068, "learning_rate": 9.77595226965538e-06, "loss": 0.6628, "step": 10271 }, { "epoch": 0.12281352000860843, "grad_norm": 5.44180965423584, "learning_rate": 9.775894956978279e-06, "loss": 0.6619, "step": 10272 }, { "epoch": 0.12282547615346907, "grad_norm": 1.9670759439468384, "learning_rate": 9.775837637139708e-06, "loss": 0.6246, "step": 10273 }, { "epoch": 0.12283743229832973, "grad_norm": 1.6009021997451782, "learning_rate": 9.77578031013975e-06, "loss": 0.5606, "step": 10274 }, { "epoch": 0.12284938844319038, "grad_norm": 2.69746470451355, "learning_rate": 9.775722975978493e-06, "loss": 0.7939, "step": 10275 }, { "epoch": 0.12286134458805104, "grad_norm": 1.9387000799179077, "learning_rate": 9.775665634656022e-06, "loss": 0.6153, "step": 10276 }, { "epoch": 0.12287330073291168, "grad_norm": 2.279816150665283, "learning_rate": 9.775608286172424e-06, "loss": 0.6511, "step": 10277 }, { "epoch": 0.12288525687777233, "grad_norm": 1.8392961025238037, "learning_rate": 9.775550930527783e-06, "loss": 0.608, "step": 10278 }, { "epoch": 0.12289721302263298, "grad_norm": 1.8147493600845337, "learning_rate": 9.77549356772219e-06, "loss": 0.6687, "step": 10279 }, { "epoch": 0.12290916916749363, "grad_norm": 1.8336207866668701, "learning_rate": 9.775436197755725e-06, "loss": 0.5779, "step": 10280 }, { "epoch": 0.12292112531235429, "grad_norm": 1.885763168334961, "learning_rate": 9.775378820628477e-06, "loss": 0.6359, "step": 10281 }, { "epoch": 0.12293308145721493, "grad_norm": 1.6180968284606934, "learning_rate": 9.77532143634053e-06, "loss": 0.5504, "step": 10282 }, { "epoch": 0.12294503760207559, "grad_norm": 1.8232474327087402, "learning_rate": 9.775264044891973e-06, "loss": 0.6969, "step": 10283 }, { "epoch": 0.12295699374693624, "grad_norm": 1.4940255880355835, "learning_rate": 9.77520664628289e-06, "loss": 0.653, "step": 10284 }, { "epoch": 0.1229689498917969, "grad_norm": 6.081596851348877, "learning_rate": 9.775149240513369e-06, "loss": 0.6706, "step": 10285 }, { "epoch": 0.12298090603665754, "grad_norm": 1.9654011726379395, "learning_rate": 9.775091827583493e-06, "loss": 0.6546, "step": 10286 }, { "epoch": 0.1229928621815182, "grad_norm": 2.459813117980957, "learning_rate": 9.77503440749335e-06, "loss": 0.6197, "step": 10287 }, { "epoch": 0.12300481832637884, "grad_norm": 1.7073397636413574, "learning_rate": 9.774976980243027e-06, "loss": 0.5939, "step": 10288 }, { "epoch": 0.12301677447123949, "grad_norm": 5.381610870361328, "learning_rate": 9.774919545832609e-06, "loss": 0.6346, "step": 10289 }, { "epoch": 0.12302873061610015, "grad_norm": 1.6840882301330566, "learning_rate": 9.774862104262181e-06, "loss": 0.5776, "step": 10290 }, { "epoch": 0.12304068676096079, "grad_norm": 2.0590908527374268, "learning_rate": 9.774804655531831e-06, "loss": 0.7865, "step": 10291 }, { "epoch": 0.12305264290582145, "grad_norm": 2.8985824584960938, "learning_rate": 9.774747199641644e-06, "loss": 0.6317, "step": 10292 }, { "epoch": 0.1230645990506821, "grad_norm": 1.6501855850219727, "learning_rate": 9.774689736591707e-06, "loss": 0.6673, "step": 10293 }, { "epoch": 0.12307655519554275, "grad_norm": 1.4708932638168335, "learning_rate": 9.774632266382105e-06, "loss": 0.6817, "step": 10294 }, { "epoch": 0.1230885113404034, "grad_norm": 2.3132376670837402, "learning_rate": 9.774574789012927e-06, "loss": 0.7706, "step": 10295 }, { "epoch": 0.12310046748526406, "grad_norm": 1.7852944135665894, "learning_rate": 9.774517304484254e-06, "loss": 0.6607, "step": 10296 }, { "epoch": 0.1231124236301247, "grad_norm": 6.922220706939697, "learning_rate": 9.774459812796178e-06, "loss": 0.6524, "step": 10297 }, { "epoch": 0.12312437977498536, "grad_norm": 2.526808738708496, "learning_rate": 9.77440231394878e-06, "loss": 0.6468, "step": 10298 }, { "epoch": 0.123136335919846, "grad_norm": 3.1638448238372803, "learning_rate": 9.774344807942152e-06, "loss": 0.6175, "step": 10299 }, { "epoch": 0.12314829206470665, "grad_norm": 1.9544930458068848, "learning_rate": 9.774287294776375e-06, "loss": 0.6185, "step": 10300 }, { "epoch": 0.12316024820956731, "grad_norm": 2.048637866973877, "learning_rate": 9.774229774451537e-06, "loss": 0.5754, "step": 10301 }, { "epoch": 0.12317220435442795, "grad_norm": 1.9787614345550537, "learning_rate": 9.774172246967727e-06, "loss": 0.6072, "step": 10302 }, { "epoch": 0.12318416049928861, "grad_norm": 8.909947395324707, "learning_rate": 9.774114712325025e-06, "loss": 0.6845, "step": 10303 }, { "epoch": 0.12319611664414926, "grad_norm": 4.8996100425720215, "learning_rate": 9.774057170523524e-06, "loss": 0.601, "step": 10304 }, { "epoch": 0.12320807278900991, "grad_norm": 1.8812857866287231, "learning_rate": 9.773999621563306e-06, "loss": 0.6574, "step": 10305 }, { "epoch": 0.12322002893387056, "grad_norm": 2.137253522872925, "learning_rate": 9.773942065444459e-06, "loss": 0.6245, "step": 10306 }, { "epoch": 0.12323198507873122, "grad_norm": 4.0285773277282715, "learning_rate": 9.773884502167068e-06, "loss": 0.6304, "step": 10307 }, { "epoch": 0.12324394122359186, "grad_norm": 2.1329052448272705, "learning_rate": 9.773826931731221e-06, "loss": 0.6241, "step": 10308 }, { "epoch": 0.12325589736845252, "grad_norm": 2.2550368309020996, "learning_rate": 9.773769354137004e-06, "loss": 0.4966, "step": 10309 }, { "epoch": 0.12326785351331317, "grad_norm": 3.0914456844329834, "learning_rate": 9.773711769384501e-06, "loss": 0.6957, "step": 10310 }, { "epoch": 0.12327980965817383, "grad_norm": 1.8203248977661133, "learning_rate": 9.773654177473803e-06, "loss": 0.7258, "step": 10311 }, { "epoch": 0.12329176580303447, "grad_norm": 1.4680451154708862, "learning_rate": 9.773596578404991e-06, "loss": 0.6345, "step": 10312 }, { "epoch": 0.12330372194789511, "grad_norm": 2.0680007934570312, "learning_rate": 9.773538972178156e-06, "loss": 0.596, "step": 10313 }, { "epoch": 0.12331567809275577, "grad_norm": 4.93080472946167, "learning_rate": 9.773481358793382e-06, "loss": 0.5281, "step": 10314 }, { "epoch": 0.12332763423761642, "grad_norm": 1.890949010848999, "learning_rate": 9.773423738250755e-06, "loss": 0.6619, "step": 10315 }, { "epoch": 0.12333959038247708, "grad_norm": 2.197277545928955, "learning_rate": 9.773366110550362e-06, "loss": 0.5827, "step": 10316 }, { "epoch": 0.12335154652733772, "grad_norm": 1.9532228708267212, "learning_rate": 9.773308475692289e-06, "loss": 0.7232, "step": 10317 }, { "epoch": 0.12336350267219838, "grad_norm": 4.608822822570801, "learning_rate": 9.773250833676623e-06, "loss": 0.6075, "step": 10318 }, { "epoch": 0.12337545881705902, "grad_norm": 2.7383780479431152, "learning_rate": 9.773193184503453e-06, "loss": 0.6189, "step": 10319 }, { "epoch": 0.12338741496191968, "grad_norm": 1.8354140520095825, "learning_rate": 9.773135528172861e-06, "loss": 0.6951, "step": 10320 }, { "epoch": 0.12339937110678033, "grad_norm": 2.5957436561584473, "learning_rate": 9.773077864684936e-06, "loss": 0.5696, "step": 10321 }, { "epoch": 0.12341132725164099, "grad_norm": 5.849056243896484, "learning_rate": 9.773020194039764e-06, "loss": 0.6727, "step": 10322 }, { "epoch": 0.12342328339650163, "grad_norm": 1.9024457931518555, "learning_rate": 9.77296251623743e-06, "loss": 0.6611, "step": 10323 }, { "epoch": 0.12343523954136228, "grad_norm": 1.603065848350525, "learning_rate": 9.772904831278021e-06, "loss": 0.7334, "step": 10324 }, { "epoch": 0.12344719568622294, "grad_norm": 2.8368895053863525, "learning_rate": 9.772847139161626e-06, "loss": 0.5927, "step": 10325 }, { "epoch": 0.12345915183108358, "grad_norm": 5.778454780578613, "learning_rate": 9.772789439888329e-06, "loss": 0.6143, "step": 10326 }, { "epoch": 0.12347110797594424, "grad_norm": 1.8102771043777466, "learning_rate": 9.772731733458218e-06, "loss": 0.7877, "step": 10327 }, { "epoch": 0.12348306412080488, "grad_norm": 1.6891919374465942, "learning_rate": 9.772674019871377e-06, "loss": 0.6759, "step": 10328 }, { "epoch": 0.12349502026566554, "grad_norm": 1.7329894304275513, "learning_rate": 9.772616299127895e-06, "loss": 0.6046, "step": 10329 }, { "epoch": 0.12350697641052619, "grad_norm": 1.6118583679199219, "learning_rate": 9.77255857122786e-06, "loss": 0.5641, "step": 10330 }, { "epoch": 0.12351893255538685, "grad_norm": 2.3649511337280273, "learning_rate": 9.772500836171354e-06, "loss": 0.7388, "step": 10331 }, { "epoch": 0.12353088870024749, "grad_norm": 1.5106626749038696, "learning_rate": 9.772443093958469e-06, "loss": 0.4863, "step": 10332 }, { "epoch": 0.12354284484510815, "grad_norm": 2.112171173095703, "learning_rate": 9.772385344589285e-06, "loss": 0.7027, "step": 10333 }, { "epoch": 0.1235548009899688, "grad_norm": 7.0317230224609375, "learning_rate": 9.772327588063894e-06, "loss": 0.5885, "step": 10334 }, { "epoch": 0.12356675713482945, "grad_norm": 1.5597679615020752, "learning_rate": 9.772269824382381e-06, "loss": 0.6904, "step": 10335 }, { "epoch": 0.1235787132796901, "grad_norm": 2.0310769081115723, "learning_rate": 9.772212053544831e-06, "loss": 0.6031, "step": 10336 }, { "epoch": 0.12359066942455074, "grad_norm": 2.1096384525299072, "learning_rate": 9.772154275551334e-06, "loss": 0.5414, "step": 10337 }, { "epoch": 0.1236026255694114, "grad_norm": 2.2341814041137695, "learning_rate": 9.772096490401974e-06, "loss": 0.5765, "step": 10338 }, { "epoch": 0.12361458171427205, "grad_norm": 2.7917447090148926, "learning_rate": 9.772038698096838e-06, "loss": 0.6305, "step": 10339 }, { "epoch": 0.1236265378591327, "grad_norm": 1.919108271598816, "learning_rate": 9.771980898636013e-06, "loss": 0.5302, "step": 10340 }, { "epoch": 0.12363849400399335, "grad_norm": 3.3363595008850098, "learning_rate": 9.771923092019586e-06, "loss": 0.6432, "step": 10341 }, { "epoch": 0.12365045014885401, "grad_norm": 1.5786356925964355, "learning_rate": 9.771865278247643e-06, "loss": 0.6693, "step": 10342 }, { "epoch": 0.12366240629371465, "grad_norm": 5.677303791046143, "learning_rate": 9.771807457320272e-06, "loss": 0.7299, "step": 10343 }, { "epoch": 0.12367436243857531, "grad_norm": 1.9772577285766602, "learning_rate": 9.771749629237558e-06, "loss": 0.5843, "step": 10344 }, { "epoch": 0.12368631858343596, "grad_norm": 1.6353139877319336, "learning_rate": 9.771691793999588e-06, "loss": 0.7024, "step": 10345 }, { "epoch": 0.12369827472829661, "grad_norm": 1.3977482318878174, "learning_rate": 9.771633951606449e-06, "loss": 0.5488, "step": 10346 }, { "epoch": 0.12371023087315726, "grad_norm": 3.00504732131958, "learning_rate": 9.771576102058228e-06, "loss": 0.7189, "step": 10347 }, { "epoch": 0.1237221870180179, "grad_norm": 3.652174711227417, "learning_rate": 9.771518245355015e-06, "loss": 0.6734, "step": 10348 }, { "epoch": 0.12373414316287856, "grad_norm": 1.899611234664917, "learning_rate": 9.771460381496889e-06, "loss": 0.6667, "step": 10349 }, { "epoch": 0.12374609930773921, "grad_norm": 4.923280715942383, "learning_rate": 9.771402510483943e-06, "loss": 0.6257, "step": 10350 }, { "epoch": 0.12375805545259987, "grad_norm": 3.794877290725708, "learning_rate": 9.771344632316263e-06, "loss": 0.6113, "step": 10351 }, { "epoch": 0.12377001159746051, "grad_norm": 6.2700324058532715, "learning_rate": 9.771286746993934e-06, "loss": 0.5312, "step": 10352 }, { "epoch": 0.12378196774232117, "grad_norm": 2.1733031272888184, "learning_rate": 9.771228854517043e-06, "loss": 0.5234, "step": 10353 }, { "epoch": 0.12379392388718181, "grad_norm": 6.977395057678223, "learning_rate": 9.771170954885676e-06, "loss": 0.6635, "step": 10354 }, { "epoch": 0.12380588003204247, "grad_norm": 3.974541425704956, "learning_rate": 9.771113048099924e-06, "loss": 0.5612, "step": 10355 }, { "epoch": 0.12381783617690312, "grad_norm": 2.7379887104034424, "learning_rate": 9.77105513415987e-06, "loss": 0.6245, "step": 10356 }, { "epoch": 0.12382979232176378, "grad_norm": 5.018599510192871, "learning_rate": 9.7709972130656e-06, "loss": 0.6032, "step": 10357 }, { "epoch": 0.12384174846662442, "grad_norm": 2.1933650970458984, "learning_rate": 9.770939284817205e-06, "loss": 0.5542, "step": 10358 }, { "epoch": 0.12385370461148507, "grad_norm": 2.026376485824585, "learning_rate": 9.77088134941477e-06, "loss": 0.7293, "step": 10359 }, { "epoch": 0.12386566075634572, "grad_norm": 1.8739019632339478, "learning_rate": 9.77082340685838e-06, "loss": 0.6908, "step": 10360 }, { "epoch": 0.12387761690120637, "grad_norm": 1.8943687677383423, "learning_rate": 9.770765457148124e-06, "loss": 0.5126, "step": 10361 }, { "epoch": 0.12388957304606703, "grad_norm": 2.0219459533691406, "learning_rate": 9.770707500284088e-06, "loss": 0.5545, "step": 10362 }, { "epoch": 0.12390152919092767, "grad_norm": 2.5624160766601562, "learning_rate": 9.770649536266359e-06, "loss": 0.5736, "step": 10363 }, { "epoch": 0.12391348533578833, "grad_norm": 1.8061985969543457, "learning_rate": 9.770591565095025e-06, "loss": 0.6426, "step": 10364 }, { "epoch": 0.12392544148064898, "grad_norm": 13.478211402893066, "learning_rate": 9.77053358677017e-06, "loss": 0.6237, "step": 10365 }, { "epoch": 0.12393739762550963, "grad_norm": 1.684281587600708, "learning_rate": 9.770475601291885e-06, "loss": 0.7024, "step": 10366 }, { "epoch": 0.12394935377037028, "grad_norm": 2.7250847816467285, "learning_rate": 9.770417608660255e-06, "loss": 0.6288, "step": 10367 }, { "epoch": 0.12396130991523094, "grad_norm": 1.9590418338775635, "learning_rate": 9.770359608875365e-06, "loss": 0.5878, "step": 10368 }, { "epoch": 0.12397326606009158, "grad_norm": 2.9960732460021973, "learning_rate": 9.770301601937304e-06, "loss": 0.6494, "step": 10369 }, { "epoch": 0.12398522220495224, "grad_norm": 1.918479323387146, "learning_rate": 9.77024358784616e-06, "loss": 0.6548, "step": 10370 }, { "epoch": 0.12399717834981289, "grad_norm": 9.732447624206543, "learning_rate": 9.770185566602018e-06, "loss": 0.5598, "step": 10371 }, { "epoch": 0.12400913449467353, "grad_norm": 2.05941104888916, "learning_rate": 9.770127538204967e-06, "loss": 0.5945, "step": 10372 }, { "epoch": 0.12402109063953419, "grad_norm": 27.28643035888672, "learning_rate": 9.77006950265509e-06, "loss": 0.7551, "step": 10373 }, { "epoch": 0.12403304678439483, "grad_norm": 1.5049786567687988, "learning_rate": 9.77001145995248e-06, "loss": 0.6474, "step": 10374 }, { "epoch": 0.1240450029292555, "grad_norm": 1.8400704860687256, "learning_rate": 9.76995341009722e-06, "loss": 0.6911, "step": 10375 }, { "epoch": 0.12405695907411614, "grad_norm": 2.7580454349517822, "learning_rate": 9.769895353089397e-06, "loss": 0.6647, "step": 10376 }, { "epoch": 0.1240689152189768, "grad_norm": 1.8841886520385742, "learning_rate": 9.7698372889291e-06, "loss": 0.6267, "step": 10377 }, { "epoch": 0.12408087136383744, "grad_norm": 1.6378422975540161, "learning_rate": 9.769779217616413e-06, "loss": 0.7685, "step": 10378 }, { "epoch": 0.1240928275086981, "grad_norm": 1.7505912780761719, "learning_rate": 9.769721139151426e-06, "loss": 0.6131, "step": 10379 }, { "epoch": 0.12410478365355875, "grad_norm": 1.797037959098816, "learning_rate": 9.769663053534227e-06, "loss": 0.6188, "step": 10380 }, { "epoch": 0.1241167397984194, "grad_norm": 1.6774307489395142, "learning_rate": 9.7696049607649e-06, "loss": 0.6019, "step": 10381 }, { "epoch": 0.12412869594328005, "grad_norm": 3.0432169437408447, "learning_rate": 9.769546860843534e-06, "loss": 0.5363, "step": 10382 }, { "epoch": 0.1241406520881407, "grad_norm": 3.069334030151367, "learning_rate": 9.769488753770216e-06, "loss": 0.595, "step": 10383 }, { "epoch": 0.12415260823300135, "grad_norm": 2.274245500564575, "learning_rate": 9.769430639545031e-06, "loss": 0.6232, "step": 10384 }, { "epoch": 0.124164564377862, "grad_norm": 1.9694507122039795, "learning_rate": 9.769372518168067e-06, "loss": 0.7494, "step": 10385 }, { "epoch": 0.12417652052272266, "grad_norm": 2.371670961380005, "learning_rate": 9.769314389639414e-06, "loss": 0.6606, "step": 10386 }, { "epoch": 0.1241884766675833, "grad_norm": 1.616750955581665, "learning_rate": 9.769256253959156e-06, "loss": 0.6441, "step": 10387 }, { "epoch": 0.12420043281244396, "grad_norm": 2.1949737071990967, "learning_rate": 9.769198111127383e-06, "loss": 0.6012, "step": 10388 }, { "epoch": 0.1242123889573046, "grad_norm": 1.7678961753845215, "learning_rate": 9.769139961144178e-06, "loss": 0.6083, "step": 10389 }, { "epoch": 0.12422434510216526, "grad_norm": 1.3266890048980713, "learning_rate": 9.769081804009632e-06, "loss": 0.5495, "step": 10390 }, { "epoch": 0.12423630124702591, "grad_norm": 3.548865556716919, "learning_rate": 9.769023639723831e-06, "loss": 0.5779, "step": 10391 }, { "epoch": 0.12424825739188657, "grad_norm": 1.9889460802078247, "learning_rate": 9.768965468286863e-06, "loss": 0.6461, "step": 10392 }, { "epoch": 0.12426021353674721, "grad_norm": 8.227648735046387, "learning_rate": 9.768907289698812e-06, "loss": 0.663, "step": 10393 }, { "epoch": 0.12427216968160787, "grad_norm": 2.56335186958313, "learning_rate": 9.76884910395977e-06, "loss": 0.5881, "step": 10394 }, { "epoch": 0.12428412582646851, "grad_norm": 2.8194313049316406, "learning_rate": 9.76879091106982e-06, "loss": 0.6106, "step": 10395 }, { "epoch": 0.12429608197132916, "grad_norm": 3.6535794734954834, "learning_rate": 9.768732711029052e-06, "loss": 0.6641, "step": 10396 }, { "epoch": 0.12430803811618982, "grad_norm": 3.7435097694396973, "learning_rate": 9.768674503837553e-06, "loss": 0.5987, "step": 10397 }, { "epoch": 0.12431999426105046, "grad_norm": 2.2154715061187744, "learning_rate": 9.768616289495408e-06, "loss": 0.614, "step": 10398 }, { "epoch": 0.12433195040591112, "grad_norm": 2.3035385608673096, "learning_rate": 9.768558068002707e-06, "loss": 0.5871, "step": 10399 }, { "epoch": 0.12434390655077177, "grad_norm": 1.8140766620635986, "learning_rate": 9.768499839359536e-06, "loss": 0.6878, "step": 10400 }, { "epoch": 0.12435586269563242, "grad_norm": 3.209300994873047, "learning_rate": 9.768441603565983e-06, "loss": 0.6119, "step": 10401 }, { "epoch": 0.12436781884049307, "grad_norm": 3.6910133361816406, "learning_rate": 9.768383360622134e-06, "loss": 0.6413, "step": 10402 }, { "epoch": 0.12437977498535373, "grad_norm": 2.0276310443878174, "learning_rate": 9.768325110528077e-06, "loss": 0.6353, "step": 10403 }, { "epoch": 0.12439173113021437, "grad_norm": 8.227266311645508, "learning_rate": 9.768266853283901e-06, "loss": 0.6348, "step": 10404 }, { "epoch": 0.12440368727507503, "grad_norm": 2.066537380218506, "learning_rate": 9.76820858888969e-06, "loss": 0.5358, "step": 10405 }, { "epoch": 0.12441564341993568, "grad_norm": 1.604115605354309, "learning_rate": 9.768150317345534e-06, "loss": 0.5368, "step": 10406 }, { "epoch": 0.12442759956479632, "grad_norm": 6.349122524261475, "learning_rate": 9.76809203865152e-06, "loss": 0.5928, "step": 10407 }, { "epoch": 0.12443955570965698, "grad_norm": 6.21234130859375, "learning_rate": 9.768033752807736e-06, "loss": 0.5718, "step": 10408 }, { "epoch": 0.12445151185451762, "grad_norm": 1.9208827018737793, "learning_rate": 9.767975459814267e-06, "loss": 0.6513, "step": 10409 }, { "epoch": 0.12446346799937828, "grad_norm": 2.226807117462158, "learning_rate": 9.767917159671203e-06, "loss": 0.5808, "step": 10410 }, { "epoch": 0.12447542414423893, "grad_norm": 1.8190635442733765, "learning_rate": 9.767858852378628e-06, "loss": 0.594, "step": 10411 }, { "epoch": 0.12448738028909959, "grad_norm": 2.169145107269287, "learning_rate": 9.767800537936633e-06, "loss": 0.5287, "step": 10412 }, { "epoch": 0.12449933643396023, "grad_norm": 1.5957144498825073, "learning_rate": 9.767742216345304e-06, "loss": 0.6242, "step": 10413 }, { "epoch": 0.12451129257882089, "grad_norm": 2.626509189605713, "learning_rate": 9.76768388760473e-06, "loss": 0.649, "step": 10414 }, { "epoch": 0.12452324872368153, "grad_norm": 3.408158302307129, "learning_rate": 9.767625551714996e-06, "loss": 0.6048, "step": 10415 }, { "epoch": 0.1245352048685422, "grad_norm": 1.7339726686477661, "learning_rate": 9.76756720867619e-06, "loss": 0.6633, "step": 10416 }, { "epoch": 0.12454716101340284, "grad_norm": 2.4038162231445312, "learning_rate": 9.767508858488402e-06, "loss": 0.6153, "step": 10417 }, { "epoch": 0.12455911715826348, "grad_norm": 4.538910388946533, "learning_rate": 9.767450501151715e-06, "loss": 0.6268, "step": 10418 }, { "epoch": 0.12457107330312414, "grad_norm": 1.9200001955032349, "learning_rate": 9.76739213666622e-06, "loss": 0.6547, "step": 10419 }, { "epoch": 0.12458302944798479, "grad_norm": 1.9174598455429077, "learning_rate": 9.767333765032003e-06, "loss": 0.5433, "step": 10420 }, { "epoch": 0.12459498559284544, "grad_norm": 1.5162262916564941, "learning_rate": 9.767275386249154e-06, "loss": 0.7033, "step": 10421 }, { "epoch": 0.12460694173770609, "grad_norm": 1.7874442338943481, "learning_rate": 9.767217000317756e-06, "loss": 0.5586, "step": 10422 }, { "epoch": 0.12461889788256675, "grad_norm": 3.6703476905822754, "learning_rate": 9.767158607237902e-06, "loss": 0.6555, "step": 10423 }, { "epoch": 0.12463085402742739, "grad_norm": 3.199965000152588, "learning_rate": 9.767100207009674e-06, "loss": 0.74, "step": 10424 }, { "epoch": 0.12464281017228805, "grad_norm": 1.937319040298462, "learning_rate": 9.767041799633163e-06, "loss": 0.8061, "step": 10425 }, { "epoch": 0.1246547663171487, "grad_norm": 1.9150440692901611, "learning_rate": 9.766983385108458e-06, "loss": 0.6177, "step": 10426 }, { "epoch": 0.12466672246200936, "grad_norm": 1.4589608907699585, "learning_rate": 9.766924963435642e-06, "loss": 0.6036, "step": 10427 }, { "epoch": 0.12467867860687, "grad_norm": 2.0728743076324463, "learning_rate": 9.766866534614806e-06, "loss": 0.5022, "step": 10428 }, { "epoch": 0.12469063475173066, "grad_norm": 1.5407967567443848, "learning_rate": 9.766808098646036e-06, "loss": 0.6542, "step": 10429 }, { "epoch": 0.1247025908965913, "grad_norm": 2.0851964950561523, "learning_rate": 9.766749655529421e-06, "loss": 0.6302, "step": 10430 }, { "epoch": 0.12471454704145195, "grad_norm": 3.4186227321624756, "learning_rate": 9.766691205265048e-06, "loss": 0.5756, "step": 10431 }, { "epoch": 0.1247265031863126, "grad_norm": 16.72058868408203, "learning_rate": 9.766632747853004e-06, "loss": 0.5755, "step": 10432 }, { "epoch": 0.12473845933117325, "grad_norm": 2.291978120803833, "learning_rate": 9.766574283293379e-06, "loss": 0.5993, "step": 10433 }, { "epoch": 0.12475041547603391, "grad_norm": 3.9302055835723877, "learning_rate": 9.766515811586256e-06, "loss": 0.5316, "step": 10434 }, { "epoch": 0.12476237162089455, "grad_norm": 5.270977020263672, "learning_rate": 9.766457332731729e-06, "loss": 0.5832, "step": 10435 }, { "epoch": 0.12477432776575521, "grad_norm": 2.074537515640259, "learning_rate": 9.76639884672988e-06, "loss": 0.6257, "step": 10436 }, { "epoch": 0.12478628391061586, "grad_norm": 2.0001437664031982, "learning_rate": 9.7663403535808e-06, "loss": 0.5621, "step": 10437 }, { "epoch": 0.12479824005547652, "grad_norm": 1.8424451351165771, "learning_rate": 9.766281853284575e-06, "loss": 0.6714, "step": 10438 }, { "epoch": 0.12481019620033716, "grad_norm": 3.9293582439422607, "learning_rate": 9.766223345841292e-06, "loss": 0.5687, "step": 10439 }, { "epoch": 0.12482215234519782, "grad_norm": 2.602269411087036, "learning_rate": 9.766164831251044e-06, "loss": 0.7139, "step": 10440 }, { "epoch": 0.12483410849005847, "grad_norm": 2.971792459487915, "learning_rate": 9.766106309513912e-06, "loss": 0.6753, "step": 10441 }, { "epoch": 0.12484606463491911, "grad_norm": 2.6740036010742188, "learning_rate": 9.766047780629987e-06, "loss": 0.662, "step": 10442 }, { "epoch": 0.12485802077977977, "grad_norm": 1.8948462009429932, "learning_rate": 9.765989244599357e-06, "loss": 0.6657, "step": 10443 }, { "epoch": 0.12486997692464041, "grad_norm": 1.4596470594406128, "learning_rate": 9.765930701422108e-06, "loss": 0.6091, "step": 10444 }, { "epoch": 0.12488193306950107, "grad_norm": 2.288944959640503, "learning_rate": 9.76587215109833e-06, "loss": 0.6563, "step": 10445 }, { "epoch": 0.12489388921436172, "grad_norm": 1.6559417247772217, "learning_rate": 9.76581359362811e-06, "loss": 0.6399, "step": 10446 }, { "epoch": 0.12490584535922238, "grad_norm": 2.4728543758392334, "learning_rate": 9.765755029011535e-06, "loss": 0.7033, "step": 10447 }, { "epoch": 0.12491780150408302, "grad_norm": 2.7260653972625732, "learning_rate": 9.765696457248694e-06, "loss": 0.5328, "step": 10448 }, { "epoch": 0.12492975764894368, "grad_norm": 2.109912633895874, "learning_rate": 9.765637878339674e-06, "loss": 0.6012, "step": 10449 }, { "epoch": 0.12494171379380432, "grad_norm": 2.093210458755493, "learning_rate": 9.765579292284564e-06, "loss": 0.6787, "step": 10450 }, { "epoch": 0.12495366993866498, "grad_norm": 2.1779942512512207, "learning_rate": 9.765520699083449e-06, "loss": 0.6305, "step": 10451 }, { "epoch": 0.12496562608352563, "grad_norm": 1.5953550338745117, "learning_rate": 9.765462098736421e-06, "loss": 0.6773, "step": 10452 }, { "epoch": 0.12497758222838629, "grad_norm": 1.8893030881881714, "learning_rate": 9.765403491243562e-06, "loss": 0.6697, "step": 10453 }, { "epoch": 0.12498953837324693, "grad_norm": 2.489211320877075, "learning_rate": 9.765344876604966e-06, "loss": 0.6949, "step": 10454 }, { "epoch": 0.12500149451810758, "grad_norm": 1.9564363956451416, "learning_rate": 9.765286254820719e-06, "loss": 0.7096, "step": 10455 }, { "epoch": 0.12501345066296823, "grad_norm": 1.8560092449188232, "learning_rate": 9.765227625890907e-06, "loss": 0.6057, "step": 10456 }, { "epoch": 0.1250254068078289, "grad_norm": 3.2912306785583496, "learning_rate": 9.76516898981562e-06, "loss": 0.5839, "step": 10457 }, { "epoch": 0.12503736295268952, "grad_norm": 2.2596166133880615, "learning_rate": 9.765110346594945e-06, "loss": 0.6344, "step": 10458 }, { "epoch": 0.12504931909755018, "grad_norm": 2.203244209289551, "learning_rate": 9.76505169622897e-06, "loss": 0.6868, "step": 10459 }, { "epoch": 0.12506127524241084, "grad_norm": 2.2489826679229736, "learning_rate": 9.764993038717783e-06, "loss": 0.6216, "step": 10460 }, { "epoch": 0.1250732313872715, "grad_norm": 1.7769227027893066, "learning_rate": 9.764934374061472e-06, "loss": 0.6309, "step": 10461 }, { "epoch": 0.12508518753213213, "grad_norm": 1.6631357669830322, "learning_rate": 9.764875702260124e-06, "loss": 0.5544, "step": 10462 }, { "epoch": 0.1250971436769928, "grad_norm": 5.724848747253418, "learning_rate": 9.764817023313831e-06, "loss": 0.6037, "step": 10463 }, { "epoch": 0.12510909982185345, "grad_norm": 1.59425950050354, "learning_rate": 9.764758337222675e-06, "loss": 0.6188, "step": 10464 }, { "epoch": 0.1251210559667141, "grad_norm": 2.577643871307373, "learning_rate": 9.764699643986748e-06, "loss": 0.5948, "step": 10465 }, { "epoch": 0.12513301211157474, "grad_norm": 1.9859330654144287, "learning_rate": 9.764640943606136e-06, "loss": 0.5973, "step": 10466 }, { "epoch": 0.1251449682564354, "grad_norm": 1.6084003448486328, "learning_rate": 9.764582236080929e-06, "loss": 0.5271, "step": 10467 }, { "epoch": 0.12515692440129605, "grad_norm": 1.748194694519043, "learning_rate": 9.764523521411213e-06, "loss": 0.5635, "step": 10468 }, { "epoch": 0.12516888054615669, "grad_norm": 1.549363613128662, "learning_rate": 9.764464799597079e-06, "loss": 0.6656, "step": 10469 }, { "epoch": 0.12518083669101734, "grad_norm": 2.1984143257141113, "learning_rate": 9.764406070638613e-06, "loss": 0.7166, "step": 10470 }, { "epoch": 0.125192792835878, "grad_norm": 3.6363115310668945, "learning_rate": 9.7643473345359e-06, "loss": 0.6856, "step": 10471 }, { "epoch": 0.12520474898073866, "grad_norm": 1.8699612617492676, "learning_rate": 9.764288591289034e-06, "loss": 0.6609, "step": 10472 }, { "epoch": 0.1252167051255993, "grad_norm": 2.842200994491577, "learning_rate": 9.7642298408981e-06, "loss": 0.5979, "step": 10473 }, { "epoch": 0.12522866127045995, "grad_norm": 2.8319380283355713, "learning_rate": 9.764171083363185e-06, "loss": 0.5836, "step": 10474 }, { "epoch": 0.1252406174153206, "grad_norm": 2.9916434288024902, "learning_rate": 9.764112318684381e-06, "loss": 0.598, "step": 10475 }, { "epoch": 0.12525257356018127, "grad_norm": 2.408942461013794, "learning_rate": 9.764053546861772e-06, "loss": 0.5033, "step": 10476 }, { "epoch": 0.1252645297050419, "grad_norm": 1.948417067527771, "learning_rate": 9.763994767895449e-06, "loss": 0.595, "step": 10477 }, { "epoch": 0.12527648584990256, "grad_norm": 1.7078981399536133, "learning_rate": 9.763935981785497e-06, "loss": 0.6065, "step": 10478 }, { "epoch": 0.12528844199476322, "grad_norm": 2.30428147315979, "learning_rate": 9.763877188532007e-06, "loss": 0.6353, "step": 10479 }, { "epoch": 0.12530039813962385, "grad_norm": 2.1712565422058105, "learning_rate": 9.763818388135066e-06, "loss": 0.6615, "step": 10480 }, { "epoch": 0.1253123542844845, "grad_norm": 1.9105249643325806, "learning_rate": 9.763759580594762e-06, "loss": 0.6305, "step": 10481 }, { "epoch": 0.12532431042934516, "grad_norm": 1.5618447065353394, "learning_rate": 9.763700765911183e-06, "loss": 0.5295, "step": 10482 }, { "epoch": 0.12533626657420582, "grad_norm": 2.119677782058716, "learning_rate": 9.763641944084422e-06, "loss": 0.6463, "step": 10483 }, { "epoch": 0.12534822271906645, "grad_norm": 1.9161419868469238, "learning_rate": 9.763583115114559e-06, "loss": 0.6384, "step": 10484 }, { "epoch": 0.1253601788639271, "grad_norm": 2.7905263900756836, "learning_rate": 9.763524279001687e-06, "loss": 0.6913, "step": 10485 }, { "epoch": 0.12537213500878777, "grad_norm": 2.112334966659546, "learning_rate": 9.763465435745893e-06, "loss": 0.645, "step": 10486 }, { "epoch": 0.12538409115364843, "grad_norm": 4.878203392028809, "learning_rate": 9.763406585347267e-06, "loss": 0.6863, "step": 10487 }, { "epoch": 0.12539604729850906, "grad_norm": 1.5609831809997559, "learning_rate": 9.763347727805894e-06, "loss": 0.5455, "step": 10488 }, { "epoch": 0.12540800344336972, "grad_norm": 2.5092692375183105, "learning_rate": 9.763288863121864e-06, "loss": 0.6314, "step": 10489 }, { "epoch": 0.12541995958823038, "grad_norm": 2.4460246562957764, "learning_rate": 9.763229991295268e-06, "loss": 0.5652, "step": 10490 }, { "epoch": 0.125431915733091, "grad_norm": 2.2123031616210938, "learning_rate": 9.76317111232619e-06, "loss": 0.7439, "step": 10491 }, { "epoch": 0.12544387187795167, "grad_norm": 3.4299569129943848, "learning_rate": 9.76311222621472e-06, "loss": 0.5773, "step": 10492 }, { "epoch": 0.12545582802281233, "grad_norm": 1.5900236368179321, "learning_rate": 9.763053332960946e-06, "loss": 0.7072, "step": 10493 }, { "epoch": 0.12546778416767299, "grad_norm": 1.6118214130401611, "learning_rate": 9.762994432564959e-06, "loss": 0.5725, "step": 10494 }, { "epoch": 0.12547974031253362, "grad_norm": 4.40976095199585, "learning_rate": 9.762935525026842e-06, "loss": 0.551, "step": 10495 }, { "epoch": 0.12549169645739428, "grad_norm": 1.9826171398162842, "learning_rate": 9.762876610346687e-06, "loss": 0.5871, "step": 10496 }, { "epoch": 0.12550365260225493, "grad_norm": 2.045341730117798, "learning_rate": 9.762817688524582e-06, "loss": 0.6116, "step": 10497 }, { "epoch": 0.1255156087471156, "grad_norm": 1.8572520017623901, "learning_rate": 9.762758759560615e-06, "loss": 0.6799, "step": 10498 }, { "epoch": 0.12552756489197622, "grad_norm": 1.7734689712524414, "learning_rate": 9.762699823454875e-06, "loss": 0.6731, "step": 10499 }, { "epoch": 0.12553952103683688, "grad_norm": 3.134324789047241, "learning_rate": 9.762640880207448e-06, "loss": 0.5883, "step": 10500 }, { "epoch": 0.12555147718169754, "grad_norm": 2.854767084121704, "learning_rate": 9.762581929818426e-06, "loss": 0.5458, "step": 10501 }, { "epoch": 0.12556343332655817, "grad_norm": 4.6460371017456055, "learning_rate": 9.762522972287894e-06, "loss": 0.5502, "step": 10502 }, { "epoch": 0.12557538947141883, "grad_norm": 2.1550452709198, "learning_rate": 9.762464007615941e-06, "loss": 0.5691, "step": 10503 }, { "epoch": 0.1255873456162795, "grad_norm": 11.264376640319824, "learning_rate": 9.762405035802658e-06, "loss": 0.7176, "step": 10504 }, { "epoch": 0.12559930176114015, "grad_norm": 3.39632248878479, "learning_rate": 9.762346056848132e-06, "loss": 0.6766, "step": 10505 }, { "epoch": 0.12561125790600078, "grad_norm": 5.768178462982178, "learning_rate": 9.76228707075245e-06, "loss": 0.5367, "step": 10506 }, { "epoch": 0.12562321405086144, "grad_norm": 1.8862228393554688, "learning_rate": 9.762228077515703e-06, "loss": 0.6356, "step": 10507 }, { "epoch": 0.1256351701957221, "grad_norm": 3.8792548179626465, "learning_rate": 9.762169077137976e-06, "loss": 0.6047, "step": 10508 }, { "epoch": 0.12564712634058275, "grad_norm": 2.0504441261291504, "learning_rate": 9.76211006961936e-06, "loss": 0.7071, "step": 10509 }, { "epoch": 0.12565908248544339, "grad_norm": 1.5116462707519531, "learning_rate": 9.762051054959945e-06, "loss": 0.616, "step": 10510 }, { "epoch": 0.12567103863030404, "grad_norm": 1.8240796327590942, "learning_rate": 9.761992033159816e-06, "loss": 0.648, "step": 10511 }, { "epoch": 0.1256829947751647, "grad_norm": 3.1947784423828125, "learning_rate": 9.761933004219063e-06, "loss": 0.4713, "step": 10512 }, { "epoch": 0.12569495092002533, "grad_norm": 2.105482339859009, "learning_rate": 9.761873968137774e-06, "loss": 0.5598, "step": 10513 }, { "epoch": 0.125706907064886, "grad_norm": 4.436686038970947, "learning_rate": 9.76181492491604e-06, "loss": 0.7584, "step": 10514 }, { "epoch": 0.12571886320974665, "grad_norm": 1.6810575723648071, "learning_rate": 9.761755874553946e-06, "loss": 0.6347, "step": 10515 }, { "epoch": 0.1257308193546073, "grad_norm": 1.898422360420227, "learning_rate": 9.761696817051582e-06, "loss": 0.6749, "step": 10516 }, { "epoch": 0.12574277549946794, "grad_norm": 4.023463249206543, "learning_rate": 9.76163775240904e-06, "loss": 0.5859, "step": 10517 }, { "epoch": 0.1257547316443286, "grad_norm": 4.3580193519592285, "learning_rate": 9.7615786806264e-06, "loss": 0.6687, "step": 10518 }, { "epoch": 0.12576668778918926, "grad_norm": 2.4818570613861084, "learning_rate": 9.761519601703759e-06, "loss": 0.6573, "step": 10519 }, { "epoch": 0.12577864393404992, "grad_norm": 1.6916685104370117, "learning_rate": 9.761460515641202e-06, "loss": 0.6588, "step": 10520 }, { "epoch": 0.12579060007891055, "grad_norm": 1.7333495616912842, "learning_rate": 9.761401422438818e-06, "loss": 0.5016, "step": 10521 }, { "epoch": 0.1258025562237712, "grad_norm": 2.8615148067474365, "learning_rate": 9.761342322096695e-06, "loss": 0.6386, "step": 10522 }, { "epoch": 0.12581451236863186, "grad_norm": 1.9659889936447144, "learning_rate": 9.761283214614923e-06, "loss": 0.5913, "step": 10523 }, { "epoch": 0.12582646851349252, "grad_norm": 1.9658598899841309, "learning_rate": 9.76122409999359e-06, "loss": 0.5227, "step": 10524 }, { "epoch": 0.12583842465835315, "grad_norm": 2.1173529624938965, "learning_rate": 9.761164978232785e-06, "loss": 0.666, "step": 10525 }, { "epoch": 0.1258503808032138, "grad_norm": 2.0222511291503906, "learning_rate": 9.761105849332593e-06, "loss": 0.7032, "step": 10526 }, { "epoch": 0.12586233694807447, "grad_norm": 2.043881416320801, "learning_rate": 9.76104671329311e-06, "loss": 0.6378, "step": 10527 }, { "epoch": 0.1258742930929351, "grad_norm": 2.672985076904297, "learning_rate": 9.760987570114417e-06, "loss": 0.6239, "step": 10528 }, { "epoch": 0.12588624923779576, "grad_norm": 5.14995813369751, "learning_rate": 9.76092841979661e-06, "loss": 0.5388, "step": 10529 }, { "epoch": 0.12589820538265642, "grad_norm": 1.9092109203338623, "learning_rate": 9.760869262339772e-06, "loss": 0.6137, "step": 10530 }, { "epoch": 0.12591016152751708, "grad_norm": 12.134050369262695, "learning_rate": 9.760810097743993e-06, "loss": 0.6422, "step": 10531 }, { "epoch": 0.1259221176723777, "grad_norm": 1.9016797542572021, "learning_rate": 9.760750926009362e-06, "loss": 0.6759, "step": 10532 }, { "epoch": 0.12593407381723837, "grad_norm": 3.547908306121826, "learning_rate": 9.760691747135968e-06, "loss": 0.5797, "step": 10533 }, { "epoch": 0.12594602996209903, "grad_norm": 3.2465250492095947, "learning_rate": 9.760632561123901e-06, "loss": 0.7089, "step": 10534 }, { "epoch": 0.12595798610695969, "grad_norm": 2.0741255283355713, "learning_rate": 9.76057336797325e-06, "loss": 0.74, "step": 10535 }, { "epoch": 0.12596994225182032, "grad_norm": 1.8563673496246338, "learning_rate": 9.7605141676841e-06, "loss": 0.63, "step": 10536 }, { "epoch": 0.12598189839668097, "grad_norm": 1.824963927268982, "learning_rate": 9.760454960256543e-06, "loss": 0.5421, "step": 10537 }, { "epoch": 0.12599385454154163, "grad_norm": 3.17681884765625, "learning_rate": 9.760395745690665e-06, "loss": 0.5982, "step": 10538 }, { "epoch": 0.12600581068640226, "grad_norm": 2.7015933990478516, "learning_rate": 9.760336523986559e-06, "loss": 0.6687, "step": 10539 }, { "epoch": 0.12601776683126292, "grad_norm": 1.692436933517456, "learning_rate": 9.760277295144311e-06, "loss": 0.4986, "step": 10540 }, { "epoch": 0.12602972297612358, "grad_norm": 2.0399158000946045, "learning_rate": 9.760218059164009e-06, "loss": 0.6724, "step": 10541 }, { "epoch": 0.12604167912098424, "grad_norm": 2.85184383392334, "learning_rate": 9.760158816045745e-06, "loss": 0.6001, "step": 10542 }, { "epoch": 0.12605363526584487, "grad_norm": 1.6864469051361084, "learning_rate": 9.760099565789605e-06, "loss": 0.6072, "step": 10543 }, { "epoch": 0.12606559141070553, "grad_norm": 2.877640962600708, "learning_rate": 9.760040308395678e-06, "loss": 0.7052, "step": 10544 }, { "epoch": 0.1260775475555662, "grad_norm": 3.119887590408325, "learning_rate": 9.759981043864055e-06, "loss": 0.6923, "step": 10545 }, { "epoch": 0.12608950370042685, "grad_norm": 2.2645061016082764, "learning_rate": 9.759921772194822e-06, "loss": 0.5862, "step": 10546 }, { "epoch": 0.12610145984528748, "grad_norm": 3.5571134090423584, "learning_rate": 9.75986249338807e-06, "loss": 0.6443, "step": 10547 }, { "epoch": 0.12611341599014814, "grad_norm": 27.530532836914062, "learning_rate": 9.759803207443887e-06, "loss": 0.5881, "step": 10548 }, { "epoch": 0.1261253721350088, "grad_norm": 1.837410569190979, "learning_rate": 9.759743914362365e-06, "loss": 0.6419, "step": 10549 }, { "epoch": 0.12613732827986943, "grad_norm": 3.2122464179992676, "learning_rate": 9.759684614143587e-06, "loss": 0.6415, "step": 10550 }, { "epoch": 0.12614928442473008, "grad_norm": 2.9023101329803467, "learning_rate": 9.759625306787646e-06, "loss": 0.5993, "step": 10551 }, { "epoch": 0.12616124056959074, "grad_norm": 1.4525477886199951, "learning_rate": 9.759565992294629e-06, "loss": 0.7047, "step": 10552 }, { "epoch": 0.1261731967144514, "grad_norm": 2.2123970985412598, "learning_rate": 9.759506670664626e-06, "loss": 0.6212, "step": 10553 }, { "epoch": 0.12618515285931203, "grad_norm": 1.7107006311416626, "learning_rate": 9.759447341897728e-06, "loss": 0.5406, "step": 10554 }, { "epoch": 0.1261971090041727, "grad_norm": 2.270747423171997, "learning_rate": 9.759388005994019e-06, "loss": 0.5798, "step": 10555 }, { "epoch": 0.12620906514903335, "grad_norm": 1.4932290315628052, "learning_rate": 9.759328662953592e-06, "loss": 0.7184, "step": 10556 }, { "epoch": 0.126221021293894, "grad_norm": 2.173673152923584, "learning_rate": 9.759269312776536e-06, "loss": 0.5975, "step": 10557 }, { "epoch": 0.12623297743875464, "grad_norm": 2.803189516067505, "learning_rate": 9.759209955462937e-06, "loss": 0.6127, "step": 10558 }, { "epoch": 0.1262449335836153, "grad_norm": 1.67196786403656, "learning_rate": 9.759150591012886e-06, "loss": 0.6311, "step": 10559 }, { "epoch": 0.12625688972847596, "grad_norm": 1.6688854694366455, "learning_rate": 9.759091219426472e-06, "loss": 0.6708, "step": 10560 }, { "epoch": 0.1262688458733366, "grad_norm": 1.6029179096221924, "learning_rate": 9.759031840703784e-06, "loss": 0.6634, "step": 10561 }, { "epoch": 0.12628080201819725, "grad_norm": 2.0135037899017334, "learning_rate": 9.75897245484491e-06, "loss": 0.6691, "step": 10562 }, { "epoch": 0.1262927581630579, "grad_norm": 2.15911865234375, "learning_rate": 9.758913061849942e-06, "loss": 0.6167, "step": 10563 }, { "epoch": 0.12630471430791856, "grad_norm": 1.4254424571990967, "learning_rate": 9.758853661718964e-06, "loss": 0.667, "step": 10564 }, { "epoch": 0.1263166704527792, "grad_norm": 1.7540780305862427, "learning_rate": 9.75879425445207e-06, "loss": 0.6252, "step": 10565 }, { "epoch": 0.12632862659763985, "grad_norm": 1.6378978490829468, "learning_rate": 9.758734840049347e-06, "loss": 0.6864, "step": 10566 }, { "epoch": 0.1263405827425005, "grad_norm": 2.480186939239502, "learning_rate": 9.758675418510884e-06, "loss": 0.6681, "step": 10567 }, { "epoch": 0.12635253888736117, "grad_norm": 1.401589035987854, "learning_rate": 9.758615989836769e-06, "loss": 0.5864, "step": 10568 }, { "epoch": 0.1263644950322218, "grad_norm": 2.3884594440460205, "learning_rate": 9.758556554027095e-06, "loss": 0.5884, "step": 10569 }, { "epoch": 0.12637645117708246, "grad_norm": 1.8416247367858887, "learning_rate": 9.758497111081945e-06, "loss": 0.621, "step": 10570 }, { "epoch": 0.12638840732194312, "grad_norm": 1.8964754343032837, "learning_rate": 9.758437661001414e-06, "loss": 0.59, "step": 10571 }, { "epoch": 0.12640036346680375, "grad_norm": 1.8756128549575806, "learning_rate": 9.75837820378559e-06, "loss": 0.644, "step": 10572 }, { "epoch": 0.1264123196116644, "grad_norm": 1.5054634809494019, "learning_rate": 9.758318739434557e-06, "loss": 0.6731, "step": 10573 }, { "epoch": 0.12642427575652507, "grad_norm": 6.383187294006348, "learning_rate": 9.758259267948413e-06, "loss": 0.6103, "step": 10574 }, { "epoch": 0.12643623190138573, "grad_norm": 2.4160516262054443, "learning_rate": 9.758199789327238e-06, "loss": 0.6413, "step": 10575 }, { "epoch": 0.12644818804624636, "grad_norm": 3.578925848007202, "learning_rate": 9.758140303571128e-06, "loss": 0.6205, "step": 10576 }, { "epoch": 0.12646014419110702, "grad_norm": 2.1016314029693604, "learning_rate": 9.75808081068017e-06, "loss": 0.6236, "step": 10577 }, { "epoch": 0.12647210033596767, "grad_norm": 2.7151525020599365, "learning_rate": 9.758021310654452e-06, "loss": 0.6089, "step": 10578 }, { "epoch": 0.12648405648082833, "grad_norm": 1.6594583988189697, "learning_rate": 9.757961803494063e-06, "loss": 0.7028, "step": 10579 }, { "epoch": 0.12649601262568896, "grad_norm": 2.471872329711914, "learning_rate": 9.757902289199095e-06, "loss": 0.6432, "step": 10580 }, { "epoch": 0.12650796877054962, "grad_norm": 1.4320564270019531, "learning_rate": 9.757842767769637e-06, "loss": 0.6642, "step": 10581 }, { "epoch": 0.12651992491541028, "grad_norm": 1.691129446029663, "learning_rate": 9.757783239205774e-06, "loss": 0.7225, "step": 10582 }, { "epoch": 0.12653188106027094, "grad_norm": 1.9505161046981812, "learning_rate": 9.7577237035076e-06, "loss": 0.5275, "step": 10583 }, { "epoch": 0.12654383720513157, "grad_norm": 1.8628901243209839, "learning_rate": 9.7576641606752e-06, "loss": 0.6142, "step": 10584 }, { "epoch": 0.12655579334999223, "grad_norm": 1.6823269128799438, "learning_rate": 9.757604610708667e-06, "loss": 0.6328, "step": 10585 }, { "epoch": 0.1265677494948529, "grad_norm": 2.6317451000213623, "learning_rate": 9.75754505360809e-06, "loss": 0.5872, "step": 10586 }, { "epoch": 0.12657970563971352, "grad_norm": 1.9427794218063354, "learning_rate": 9.757485489373556e-06, "loss": 0.5732, "step": 10587 }, { "epoch": 0.12659166178457418, "grad_norm": 2.1491715908050537, "learning_rate": 9.757425918005156e-06, "loss": 0.6453, "step": 10588 }, { "epoch": 0.12660361792943484, "grad_norm": 3.0314249992370605, "learning_rate": 9.75736633950298e-06, "loss": 0.5912, "step": 10589 }, { "epoch": 0.1266155740742955, "grad_norm": 1.9606027603149414, "learning_rate": 9.757306753867114e-06, "loss": 0.6032, "step": 10590 }, { "epoch": 0.12662753021915613, "grad_norm": 4.493899345397949, "learning_rate": 9.757247161097651e-06, "loss": 0.6883, "step": 10591 }, { "epoch": 0.12663948636401678, "grad_norm": 2.372743606567383, "learning_rate": 9.757187561194679e-06, "loss": 0.617, "step": 10592 }, { "epoch": 0.12665144250887744, "grad_norm": 1.8213776350021362, "learning_rate": 9.757127954158287e-06, "loss": 0.5795, "step": 10593 }, { "epoch": 0.1266633986537381, "grad_norm": 3.568471670150757, "learning_rate": 9.757068339988565e-06, "loss": 0.6266, "step": 10594 }, { "epoch": 0.12667535479859873, "grad_norm": 2.095852851867676, "learning_rate": 9.757008718685602e-06, "loss": 0.6678, "step": 10595 }, { "epoch": 0.1266873109434594, "grad_norm": 2.2193071842193604, "learning_rate": 9.756949090249487e-06, "loss": 0.5325, "step": 10596 }, { "epoch": 0.12669926708832005, "grad_norm": 1.6290414333343506, "learning_rate": 9.756889454680311e-06, "loss": 0.617, "step": 10597 }, { "epoch": 0.12671122323318068, "grad_norm": 1.573713779449463, "learning_rate": 9.75682981197816e-06, "loss": 0.5177, "step": 10598 }, { "epoch": 0.12672317937804134, "grad_norm": 13.048746109008789, "learning_rate": 9.75677016214313e-06, "loss": 0.6606, "step": 10599 }, { "epoch": 0.126735135522902, "grad_norm": 1.4023078680038452, "learning_rate": 9.756710505175305e-06, "loss": 0.5684, "step": 10600 }, { "epoch": 0.12674709166776266, "grad_norm": 1.5036330223083496, "learning_rate": 9.756650841074773e-06, "loss": 0.6704, "step": 10601 }, { "epoch": 0.1267590478126233, "grad_norm": 1.6365392208099365, "learning_rate": 9.756591169841627e-06, "loss": 0.6118, "step": 10602 }, { "epoch": 0.12677100395748395, "grad_norm": 4.630569934844971, "learning_rate": 9.756531491475957e-06, "loss": 0.5704, "step": 10603 }, { "epoch": 0.1267829601023446, "grad_norm": 3.612438201904297, "learning_rate": 9.75647180597785e-06, "loss": 0.6031, "step": 10604 }, { "epoch": 0.12679491624720526, "grad_norm": 1.4865946769714355, "learning_rate": 9.756412113347398e-06, "loss": 0.6828, "step": 10605 }, { "epoch": 0.1268068723920659, "grad_norm": 1.8600951433181763, "learning_rate": 9.756352413584688e-06, "loss": 0.5782, "step": 10606 }, { "epoch": 0.12681882853692655, "grad_norm": 2.320458173751831, "learning_rate": 9.75629270668981e-06, "loss": 0.6226, "step": 10607 }, { "epoch": 0.1268307846817872, "grad_norm": 3.930241584777832, "learning_rate": 9.756232992662855e-06, "loss": 0.6033, "step": 10608 }, { "epoch": 0.12684274082664784, "grad_norm": 2.2318687438964844, "learning_rate": 9.756173271503912e-06, "loss": 0.7014, "step": 10609 }, { "epoch": 0.1268546969715085, "grad_norm": 1.5253136157989502, "learning_rate": 9.756113543213071e-06, "loss": 0.6243, "step": 10610 }, { "epoch": 0.12686665311636916, "grad_norm": 2.028050184249878, "learning_rate": 9.75605380779042e-06, "loss": 0.6046, "step": 10611 }, { "epoch": 0.12687860926122982, "grad_norm": 1.5131959915161133, "learning_rate": 9.755994065236048e-06, "loss": 0.6232, "step": 10612 }, { "epoch": 0.12689056540609045, "grad_norm": 2.274738311767578, "learning_rate": 9.755934315550048e-06, "loss": 0.7971, "step": 10613 }, { "epoch": 0.1269025215509511, "grad_norm": 1.7251431941986084, "learning_rate": 9.755874558732507e-06, "loss": 0.6753, "step": 10614 }, { "epoch": 0.12691447769581177, "grad_norm": 3.493788719177246, "learning_rate": 9.755814794783514e-06, "loss": 0.5952, "step": 10615 }, { "epoch": 0.12692643384067243, "grad_norm": 3.392761707305908, "learning_rate": 9.755755023703163e-06, "loss": 0.6634, "step": 10616 }, { "epoch": 0.12693838998553306, "grad_norm": 1.7803544998168945, "learning_rate": 9.755695245491536e-06, "loss": 0.5889, "step": 10617 }, { "epoch": 0.12695034613039372, "grad_norm": 2.050837993621826, "learning_rate": 9.75563546014873e-06, "loss": 0.6869, "step": 10618 }, { "epoch": 0.12696230227525437, "grad_norm": 1.850067138671875, "learning_rate": 9.75557566767483e-06, "loss": 0.7073, "step": 10619 }, { "epoch": 0.126974258420115, "grad_norm": 2.398771047592163, "learning_rate": 9.75551586806993e-06, "loss": 0.5979, "step": 10620 }, { "epoch": 0.12698621456497566, "grad_norm": 2.2264671325683594, "learning_rate": 9.755456061334114e-06, "loss": 0.576, "step": 10621 }, { "epoch": 0.12699817070983632, "grad_norm": 3.0129308700561523, "learning_rate": 9.755396247467476e-06, "loss": 0.6495, "step": 10622 }, { "epoch": 0.12701012685469698, "grad_norm": 2.303879737854004, "learning_rate": 9.755336426470106e-06, "loss": 0.6418, "step": 10623 }, { "epoch": 0.1270220829995576, "grad_norm": 1.2425775527954102, "learning_rate": 9.75527659834209e-06, "loss": 0.6223, "step": 10624 }, { "epoch": 0.12703403914441827, "grad_norm": 3.1325523853302, "learning_rate": 9.755216763083522e-06, "loss": 0.5897, "step": 10625 }, { "epoch": 0.12704599528927893, "grad_norm": 1.7377852201461792, "learning_rate": 9.755156920694487e-06, "loss": 0.6146, "step": 10626 }, { "epoch": 0.1270579514341396, "grad_norm": 7.169368743896484, "learning_rate": 9.755097071175078e-06, "loss": 0.568, "step": 10627 }, { "epoch": 0.12706990757900022, "grad_norm": 1.6907604932785034, "learning_rate": 9.755037214525385e-06, "loss": 0.5426, "step": 10628 }, { "epoch": 0.12708186372386088, "grad_norm": 3.0970566272735596, "learning_rate": 9.754977350745497e-06, "loss": 0.7054, "step": 10629 }, { "epoch": 0.12709381986872154, "grad_norm": 2.0122947692871094, "learning_rate": 9.754917479835503e-06, "loss": 0.6736, "step": 10630 }, { "epoch": 0.12710577601358217, "grad_norm": 2.1901159286499023, "learning_rate": 9.754857601795494e-06, "loss": 0.6273, "step": 10631 }, { "epoch": 0.12711773215844283, "grad_norm": 2.365344762802124, "learning_rate": 9.754797716625557e-06, "loss": 0.6171, "step": 10632 }, { "epoch": 0.12712968830330348, "grad_norm": 2.7757766246795654, "learning_rate": 9.754737824325786e-06, "loss": 0.6962, "step": 10633 }, { "epoch": 0.12714164444816414, "grad_norm": 1.943508505821228, "learning_rate": 9.754677924896268e-06, "loss": 0.6118, "step": 10634 }, { "epoch": 0.12715360059302477, "grad_norm": 2.130680799484253, "learning_rate": 9.754618018337093e-06, "loss": 0.6241, "step": 10635 }, { "epoch": 0.12716555673788543, "grad_norm": 2.3558194637298584, "learning_rate": 9.754558104648352e-06, "loss": 0.6235, "step": 10636 }, { "epoch": 0.1271775128827461, "grad_norm": 2.8152034282684326, "learning_rate": 9.754498183830135e-06, "loss": 0.7263, "step": 10637 }, { "epoch": 0.12718946902760675, "grad_norm": 1.8332610130310059, "learning_rate": 9.754438255882531e-06, "loss": 0.5265, "step": 10638 }, { "epoch": 0.12720142517246738, "grad_norm": 1.6449851989746094, "learning_rate": 9.754378320805627e-06, "loss": 0.6663, "step": 10639 }, { "epoch": 0.12721338131732804, "grad_norm": 2.96403169631958, "learning_rate": 9.75431837859952e-06, "loss": 0.687, "step": 10640 }, { "epoch": 0.1272253374621887, "grad_norm": 1.8275948762893677, "learning_rate": 9.754258429264291e-06, "loss": 0.7395, "step": 10641 }, { "epoch": 0.12723729360704936, "grad_norm": 3.824798345565796, "learning_rate": 9.754198472800039e-06, "loss": 0.7651, "step": 10642 }, { "epoch": 0.12724924975191, "grad_norm": 1.4299238920211792, "learning_rate": 9.754138509206847e-06, "loss": 0.6637, "step": 10643 }, { "epoch": 0.12726120589677065, "grad_norm": 2.01550030708313, "learning_rate": 9.754078538484808e-06, "loss": 0.627, "step": 10644 }, { "epoch": 0.1272731620416313, "grad_norm": 1.583274245262146, "learning_rate": 9.754018560634012e-06, "loss": 0.6562, "step": 10645 }, { "epoch": 0.12728511818649194, "grad_norm": 1.546805739402771, "learning_rate": 9.753958575654547e-06, "loss": 0.7027, "step": 10646 }, { "epoch": 0.1272970743313526, "grad_norm": 1.9095048904418945, "learning_rate": 9.753898583546506e-06, "loss": 0.665, "step": 10647 }, { "epoch": 0.12730903047621325, "grad_norm": 1.5602272748947144, "learning_rate": 9.753838584309977e-06, "loss": 0.5323, "step": 10648 }, { "epoch": 0.1273209866210739, "grad_norm": 1.6733241081237793, "learning_rate": 9.753778577945048e-06, "loss": 0.614, "step": 10649 }, { "epoch": 0.12733294276593454, "grad_norm": 1.8821449279785156, "learning_rate": 9.753718564451813e-06, "loss": 0.5704, "step": 10650 }, { "epoch": 0.1273448989107952, "grad_norm": 1.7716838121414185, "learning_rate": 9.75365854383036e-06, "loss": 0.7186, "step": 10651 }, { "epoch": 0.12735685505565586, "grad_norm": 1.7195643186569214, "learning_rate": 9.753598516080778e-06, "loss": 0.6599, "step": 10652 }, { "epoch": 0.12736881120051652, "grad_norm": 3.319416046142578, "learning_rate": 9.75353848120316e-06, "loss": 0.677, "step": 10653 }, { "epoch": 0.12738076734537715, "grad_norm": 1.9863901138305664, "learning_rate": 9.753478439197592e-06, "loss": 0.6026, "step": 10654 }, { "epoch": 0.1273927234902378, "grad_norm": 1.6221518516540527, "learning_rate": 9.753418390064166e-06, "loss": 0.6651, "step": 10655 }, { "epoch": 0.12740467963509847, "grad_norm": 2.542707681655884, "learning_rate": 9.753358333802973e-06, "loss": 0.6174, "step": 10656 }, { "epoch": 0.1274166357799591, "grad_norm": 1.907240629196167, "learning_rate": 9.753298270414103e-06, "loss": 0.6241, "step": 10657 }, { "epoch": 0.12742859192481976, "grad_norm": 1.6627205610275269, "learning_rate": 9.753238199897645e-06, "loss": 0.6575, "step": 10658 }, { "epoch": 0.12744054806968041, "grad_norm": 1.834363579750061, "learning_rate": 9.75317812225369e-06, "loss": 0.6262, "step": 10659 }, { "epoch": 0.12745250421454107, "grad_norm": 1.941243290901184, "learning_rate": 9.753118037482327e-06, "loss": 0.5772, "step": 10660 }, { "epoch": 0.1274644603594017, "grad_norm": 1.620941162109375, "learning_rate": 9.753057945583646e-06, "loss": 0.6661, "step": 10661 }, { "epoch": 0.12747641650426236, "grad_norm": 1.9410325288772583, "learning_rate": 9.75299784655774e-06, "loss": 0.6673, "step": 10662 }, { "epoch": 0.12748837264912302, "grad_norm": 1.9927343130111694, "learning_rate": 9.752937740404693e-06, "loss": 0.6285, "step": 10663 }, { "epoch": 0.12750032879398368, "grad_norm": 1.5734772682189941, "learning_rate": 9.752877627124602e-06, "loss": 0.5713, "step": 10664 }, { "epoch": 0.1275122849388443, "grad_norm": 1.5806562900543213, "learning_rate": 9.752817506717553e-06, "loss": 0.6004, "step": 10665 }, { "epoch": 0.12752424108370497, "grad_norm": 4.266246318817139, "learning_rate": 9.752757379183636e-06, "loss": 0.644, "step": 10666 }, { "epoch": 0.12753619722856563, "grad_norm": 1.8966879844665527, "learning_rate": 9.752697244522946e-06, "loss": 0.6856, "step": 10667 }, { "epoch": 0.12754815337342626, "grad_norm": 1.575308918952942, "learning_rate": 9.752637102735566e-06, "loss": 0.5855, "step": 10668 }, { "epoch": 0.12756010951828692, "grad_norm": 1.9433180093765259, "learning_rate": 9.75257695382159e-06, "loss": 0.5824, "step": 10669 }, { "epoch": 0.12757206566314758, "grad_norm": 3.4611802101135254, "learning_rate": 9.752516797781111e-06, "loss": 0.6759, "step": 10670 }, { "epoch": 0.12758402180800824, "grad_norm": 2.7442479133605957, "learning_rate": 9.752456634614214e-06, "loss": 0.6754, "step": 10671 }, { "epoch": 0.12759597795286887, "grad_norm": 1.5936020612716675, "learning_rate": 9.75239646432099e-06, "loss": 0.6022, "step": 10672 }, { "epoch": 0.12760793409772953, "grad_norm": 2.2136592864990234, "learning_rate": 9.752336286901533e-06, "loss": 0.6015, "step": 10673 }, { "epoch": 0.12761989024259018, "grad_norm": 1.730772852897644, "learning_rate": 9.752276102355931e-06, "loss": 0.6217, "step": 10674 }, { "epoch": 0.12763184638745084, "grad_norm": 1.8512037992477417, "learning_rate": 9.752215910684272e-06, "loss": 0.6403, "step": 10675 }, { "epoch": 0.12764380253231147, "grad_norm": 3.1981587409973145, "learning_rate": 9.75215571188665e-06, "loss": 0.6083, "step": 10676 }, { "epoch": 0.12765575867717213, "grad_norm": 3.26278018951416, "learning_rate": 9.752095505963155e-06, "loss": 0.6254, "step": 10677 }, { "epoch": 0.1276677148220328, "grad_norm": 3.6972830295562744, "learning_rate": 9.752035292913873e-06, "loss": 0.7553, "step": 10678 }, { "epoch": 0.12767967096689342, "grad_norm": 1.9430077075958252, "learning_rate": 9.7519750727389e-06, "loss": 0.6584, "step": 10679 }, { "epoch": 0.12769162711175408, "grad_norm": 1.9456384181976318, "learning_rate": 9.75191484543832e-06, "loss": 0.6833, "step": 10680 }, { "epoch": 0.12770358325661474, "grad_norm": 1.6684494018554688, "learning_rate": 9.751854611012227e-06, "loss": 0.6584, "step": 10681 }, { "epoch": 0.1277155394014754, "grad_norm": 1.6152445077896118, "learning_rate": 9.751794369460714e-06, "loss": 0.6111, "step": 10682 }, { "epoch": 0.12772749554633603, "grad_norm": 2.4009921550750732, "learning_rate": 9.751734120783867e-06, "loss": 0.6412, "step": 10683 }, { "epoch": 0.1277394516911967, "grad_norm": 1.7472389936447144, "learning_rate": 9.751673864981778e-06, "loss": 0.5846, "step": 10684 }, { "epoch": 0.12775140783605735, "grad_norm": 1.3991515636444092, "learning_rate": 9.751613602054538e-06, "loss": 0.5109, "step": 10685 }, { "epoch": 0.127763363980918, "grad_norm": 9.423437118530273, "learning_rate": 9.751553332002234e-06, "loss": 0.6367, "step": 10686 }, { "epoch": 0.12777532012577864, "grad_norm": 3.9933321475982666, "learning_rate": 9.75149305482496e-06, "loss": 0.6671, "step": 10687 }, { "epoch": 0.1277872762706393, "grad_norm": 7.981260776519775, "learning_rate": 9.751432770522808e-06, "loss": 0.6002, "step": 10688 }, { "epoch": 0.12779923241549995, "grad_norm": 38.15574264526367, "learning_rate": 9.751372479095861e-06, "loss": 0.5332, "step": 10689 }, { "epoch": 0.12781118856036058, "grad_norm": 2.1176092624664307, "learning_rate": 9.751312180544218e-06, "loss": 0.5369, "step": 10690 }, { "epoch": 0.12782314470522124, "grad_norm": 2.1695313453674316, "learning_rate": 9.751251874867965e-06, "loss": 0.6729, "step": 10691 }, { "epoch": 0.1278351008500819, "grad_norm": 2.325031280517578, "learning_rate": 9.75119156206719e-06, "loss": 0.56, "step": 10692 }, { "epoch": 0.12784705699494256, "grad_norm": 3.0267436504364014, "learning_rate": 9.751131242141988e-06, "loss": 0.6354, "step": 10693 }, { "epoch": 0.1278590131398032, "grad_norm": 2.497232437133789, "learning_rate": 9.75107091509245e-06, "loss": 0.7144, "step": 10694 }, { "epoch": 0.12787096928466385, "grad_norm": 1.8878459930419922, "learning_rate": 9.751010580918662e-06, "loss": 0.5796, "step": 10695 }, { "epoch": 0.1278829254295245, "grad_norm": 3.784933567047119, "learning_rate": 9.750950239620717e-06, "loss": 0.6028, "step": 10696 }, { "epoch": 0.12789488157438517, "grad_norm": 2.1021780967712402, "learning_rate": 9.750889891198705e-06, "loss": 0.61, "step": 10697 }, { "epoch": 0.1279068377192458, "grad_norm": 3.289832592010498, "learning_rate": 9.750829535652717e-06, "loss": 0.6055, "step": 10698 }, { "epoch": 0.12791879386410646, "grad_norm": 2.193938970565796, "learning_rate": 9.750769172982844e-06, "loss": 0.6188, "step": 10699 }, { "epoch": 0.12793075000896711, "grad_norm": 2.1400516033172607, "learning_rate": 9.750708803189175e-06, "loss": 0.7148, "step": 10700 }, { "epoch": 0.12794270615382777, "grad_norm": 1.5521317720413208, "learning_rate": 9.7506484262718e-06, "loss": 0.6364, "step": 10701 }, { "epoch": 0.1279546622986884, "grad_norm": 2.6890177726745605, "learning_rate": 9.750588042230813e-06, "loss": 0.5965, "step": 10702 }, { "epoch": 0.12796661844354906, "grad_norm": 1.7221949100494385, "learning_rate": 9.750527651066301e-06, "loss": 0.6703, "step": 10703 }, { "epoch": 0.12797857458840972, "grad_norm": 1.7636383771896362, "learning_rate": 9.750467252778357e-06, "loss": 0.7093, "step": 10704 }, { "epoch": 0.12799053073327035, "grad_norm": 2.53108549118042, "learning_rate": 9.75040684736707e-06, "loss": 0.628, "step": 10705 }, { "epoch": 0.128002486878131, "grad_norm": 1.8601891994476318, "learning_rate": 9.75034643483253e-06, "loss": 0.6675, "step": 10706 }, { "epoch": 0.12801444302299167, "grad_norm": 2.179837226867676, "learning_rate": 9.75028601517483e-06, "loss": 0.6263, "step": 10707 }, { "epoch": 0.12802639916785233, "grad_norm": 1.9479238986968994, "learning_rate": 9.75022558839406e-06, "loss": 0.5546, "step": 10708 }, { "epoch": 0.12803835531271296, "grad_norm": 2.148580312728882, "learning_rate": 9.750165154490308e-06, "loss": 0.7188, "step": 10709 }, { "epoch": 0.12805031145757362, "grad_norm": 4.615427494049072, "learning_rate": 9.750104713463666e-06, "loss": 0.6097, "step": 10710 }, { "epoch": 0.12806226760243428, "grad_norm": 2.162201166152954, "learning_rate": 9.750044265314226e-06, "loss": 0.5938, "step": 10711 }, { "epoch": 0.12807422374729494, "grad_norm": 1.4936879873275757, "learning_rate": 9.749983810042078e-06, "loss": 0.735, "step": 10712 }, { "epoch": 0.12808617989215557, "grad_norm": 1.491346001625061, "learning_rate": 9.749923347647312e-06, "loss": 0.5788, "step": 10713 }, { "epoch": 0.12809813603701622, "grad_norm": 1.3443259000778198, "learning_rate": 9.749862878130021e-06, "loss": 0.5025, "step": 10714 }, { "epoch": 0.12811009218187688, "grad_norm": 1.7124186754226685, "learning_rate": 9.749802401490291e-06, "loss": 0.6091, "step": 10715 }, { "epoch": 0.12812204832673751, "grad_norm": 3.2540433406829834, "learning_rate": 9.749741917728217e-06, "loss": 0.6013, "step": 10716 }, { "epoch": 0.12813400447159817, "grad_norm": 2.0158450603485107, "learning_rate": 9.749681426843887e-06, "loss": 0.6055, "step": 10717 }, { "epoch": 0.12814596061645883, "grad_norm": 1.7612895965576172, "learning_rate": 9.749620928837394e-06, "loss": 0.6956, "step": 10718 }, { "epoch": 0.1281579167613195, "grad_norm": 1.5024194717407227, "learning_rate": 9.749560423708826e-06, "loss": 0.63, "step": 10719 }, { "epoch": 0.12816987290618012, "grad_norm": 1.740051507949829, "learning_rate": 9.749499911458276e-06, "loss": 0.6118, "step": 10720 }, { "epoch": 0.12818182905104078, "grad_norm": 1.769309163093567, "learning_rate": 9.749439392085836e-06, "loss": 0.6415, "step": 10721 }, { "epoch": 0.12819378519590144, "grad_norm": 2.061086893081665, "learning_rate": 9.749378865591592e-06, "loss": 0.5765, "step": 10722 }, { "epoch": 0.1282057413407621, "grad_norm": 1.6337924003601074, "learning_rate": 9.749318331975638e-06, "loss": 0.6827, "step": 10723 }, { "epoch": 0.12821769748562273, "grad_norm": 1.8747525215148926, "learning_rate": 9.749257791238064e-06, "loss": 0.6672, "step": 10724 }, { "epoch": 0.1282296536304834, "grad_norm": 17.315019607543945, "learning_rate": 9.749197243378963e-06, "loss": 0.625, "step": 10725 }, { "epoch": 0.12824160977534405, "grad_norm": 4.225539207458496, "learning_rate": 9.749136688398421e-06, "loss": 0.7067, "step": 10726 }, { "epoch": 0.12825356592020468, "grad_norm": 4.708991527557373, "learning_rate": 9.749076126296535e-06, "loss": 0.5387, "step": 10727 }, { "epoch": 0.12826552206506533, "grad_norm": 2.088223934173584, "learning_rate": 9.749015557073392e-06, "loss": 0.5876, "step": 10728 }, { "epoch": 0.128277478209926, "grad_norm": 5.6320061683654785, "learning_rate": 9.74895498072908e-06, "loss": 0.7192, "step": 10729 }, { "epoch": 0.12828943435478665, "grad_norm": 2.127603054046631, "learning_rate": 9.748894397263695e-06, "loss": 0.6474, "step": 10730 }, { "epoch": 0.12830139049964728, "grad_norm": 2.890852689743042, "learning_rate": 9.748833806677327e-06, "loss": 0.6499, "step": 10731 }, { "epoch": 0.12831334664450794, "grad_norm": 2.2552688121795654, "learning_rate": 9.748773208970065e-06, "loss": 0.6187, "step": 10732 }, { "epoch": 0.1283253027893686, "grad_norm": 1.8771673440933228, "learning_rate": 9.748712604142e-06, "loss": 0.57, "step": 10733 }, { "epoch": 0.12833725893422926, "grad_norm": 3.6720635890960693, "learning_rate": 9.748651992193225e-06, "loss": 0.541, "step": 10734 }, { "epoch": 0.1283492150790899, "grad_norm": 1.5469356775283813, "learning_rate": 9.748591373123828e-06, "loss": 0.6071, "step": 10735 }, { "epoch": 0.12836117122395055, "grad_norm": 3.6883552074432373, "learning_rate": 9.748530746933902e-06, "loss": 0.6505, "step": 10736 }, { "epoch": 0.1283731273688112, "grad_norm": 3.143218755722046, "learning_rate": 9.748470113623538e-06, "loss": 0.6503, "step": 10737 }, { "epoch": 0.12838508351367184, "grad_norm": 2.6460444927215576, "learning_rate": 9.748409473192826e-06, "loss": 0.5983, "step": 10738 }, { "epoch": 0.1283970396585325, "grad_norm": 3.103961229324341, "learning_rate": 9.748348825641856e-06, "loss": 0.6332, "step": 10739 }, { "epoch": 0.12840899580339316, "grad_norm": 2.283560037612915, "learning_rate": 9.74828817097072e-06, "loss": 0.6665, "step": 10740 }, { "epoch": 0.12842095194825381, "grad_norm": 1.733534812927246, "learning_rate": 9.74822750917951e-06, "loss": 0.5205, "step": 10741 }, { "epoch": 0.12843290809311445, "grad_norm": 5.936001777648926, "learning_rate": 9.748166840268316e-06, "loss": 0.5844, "step": 10742 }, { "epoch": 0.1284448642379751, "grad_norm": 2.0073142051696777, "learning_rate": 9.74810616423723e-06, "loss": 0.626, "step": 10743 }, { "epoch": 0.12845682038283576, "grad_norm": 1.5439796447753906, "learning_rate": 9.748045481086339e-06, "loss": 0.5971, "step": 10744 }, { "epoch": 0.12846877652769642, "grad_norm": 4.792913436889648, "learning_rate": 9.747984790815738e-06, "loss": 0.5935, "step": 10745 }, { "epoch": 0.12848073267255705, "grad_norm": 1.5881612300872803, "learning_rate": 9.747924093425518e-06, "loss": 0.5426, "step": 10746 }, { "epoch": 0.1284926888174177, "grad_norm": 1.8887085914611816, "learning_rate": 9.747863388915768e-06, "loss": 0.7071, "step": 10747 }, { "epoch": 0.12850464496227837, "grad_norm": 3.062202215194702, "learning_rate": 9.747802677286581e-06, "loss": 0.579, "step": 10748 }, { "epoch": 0.12851660110713903, "grad_norm": 2.0291802883148193, "learning_rate": 9.747741958538047e-06, "loss": 0.6968, "step": 10749 }, { "epoch": 0.12852855725199966, "grad_norm": 4.71274995803833, "learning_rate": 9.747681232670257e-06, "loss": 0.6534, "step": 10750 }, { "epoch": 0.12854051339686032, "grad_norm": 3.2694437503814697, "learning_rate": 9.7476204996833e-06, "loss": 0.5656, "step": 10751 }, { "epoch": 0.12855246954172098, "grad_norm": 2.2250046730041504, "learning_rate": 9.74755975957727e-06, "loss": 0.63, "step": 10752 }, { "epoch": 0.1285644256865816, "grad_norm": 2.190319538116455, "learning_rate": 9.74749901235226e-06, "loss": 0.635, "step": 10753 }, { "epoch": 0.12857638183144227, "grad_norm": 2.438296318054199, "learning_rate": 9.747438258008355e-06, "loss": 0.5729, "step": 10754 }, { "epoch": 0.12858833797630292, "grad_norm": 1.7922687530517578, "learning_rate": 9.747377496545652e-06, "loss": 0.5923, "step": 10755 }, { "epoch": 0.12860029412116358, "grad_norm": 1.3090349435806274, "learning_rate": 9.747316727964238e-06, "loss": 0.6074, "step": 10756 }, { "epoch": 0.12861225026602421, "grad_norm": 2.2882282733917236, "learning_rate": 9.747255952264205e-06, "loss": 0.5842, "step": 10757 }, { "epoch": 0.12862420641088487, "grad_norm": 2.1508612632751465, "learning_rate": 9.747195169445646e-06, "loss": 0.6443, "step": 10758 }, { "epoch": 0.12863616255574553, "grad_norm": 2.283433675765991, "learning_rate": 9.747134379508651e-06, "loss": 0.6235, "step": 10759 }, { "epoch": 0.1286481187006062, "grad_norm": 2.221569061279297, "learning_rate": 9.74707358245331e-06, "loss": 0.5552, "step": 10760 }, { "epoch": 0.12866007484546682, "grad_norm": 6.231150150299072, "learning_rate": 9.747012778279718e-06, "loss": 0.5903, "step": 10761 }, { "epoch": 0.12867203099032748, "grad_norm": 3.0258233547210693, "learning_rate": 9.74695196698796e-06, "loss": 0.7034, "step": 10762 }, { "epoch": 0.12868398713518814, "grad_norm": 1.6315399408340454, "learning_rate": 9.746891148578132e-06, "loss": 0.6434, "step": 10763 }, { "epoch": 0.12869594328004877, "grad_norm": 1.832077980041504, "learning_rate": 9.746830323050326e-06, "loss": 0.6657, "step": 10764 }, { "epoch": 0.12870789942490943, "grad_norm": 2.6471476554870605, "learning_rate": 9.746769490404629e-06, "loss": 0.6227, "step": 10765 }, { "epoch": 0.1287198555697701, "grad_norm": 6.651586532592773, "learning_rate": 9.746708650641135e-06, "loss": 0.5743, "step": 10766 }, { "epoch": 0.12873181171463075, "grad_norm": 1.894771695137024, "learning_rate": 9.746647803759932e-06, "loss": 0.6632, "step": 10767 }, { "epoch": 0.12874376785949138, "grad_norm": 1.9551234245300293, "learning_rate": 9.746586949761116e-06, "loss": 0.5855, "step": 10768 }, { "epoch": 0.12875572400435203, "grad_norm": 1.5384074449539185, "learning_rate": 9.746526088644776e-06, "loss": 0.5192, "step": 10769 }, { "epoch": 0.1287676801492127, "grad_norm": 1.8768093585968018, "learning_rate": 9.746465220411003e-06, "loss": 0.5289, "step": 10770 }, { "epoch": 0.12877963629407335, "grad_norm": 2.133559226989746, "learning_rate": 9.74640434505989e-06, "loss": 0.6199, "step": 10771 }, { "epoch": 0.12879159243893398, "grad_norm": 10.592790603637695, "learning_rate": 9.746343462591526e-06, "loss": 0.6787, "step": 10772 }, { "epoch": 0.12880354858379464, "grad_norm": 2.8906190395355225, "learning_rate": 9.746282573006002e-06, "loss": 0.5987, "step": 10773 }, { "epoch": 0.1288155047286553, "grad_norm": 1.685454249382019, "learning_rate": 9.746221676303412e-06, "loss": 0.6174, "step": 10774 }, { "epoch": 0.12882746087351593, "grad_norm": 1.6070566177368164, "learning_rate": 9.746160772483843e-06, "loss": 0.5514, "step": 10775 }, { "epoch": 0.1288394170183766, "grad_norm": 3.038470506668091, "learning_rate": 9.74609986154739e-06, "loss": 0.6286, "step": 10776 }, { "epoch": 0.12885137316323725, "grad_norm": 2.77237606048584, "learning_rate": 9.746038943494145e-06, "loss": 0.583, "step": 10777 }, { "epoch": 0.1288633293080979, "grad_norm": 2.1556856632232666, "learning_rate": 9.745978018324195e-06, "loss": 0.5972, "step": 10778 }, { "epoch": 0.12887528545295854, "grad_norm": 2.3480193614959717, "learning_rate": 9.745917086037637e-06, "loss": 0.5909, "step": 10779 }, { "epoch": 0.1288872415978192, "grad_norm": 1.7239972352981567, "learning_rate": 9.74585614663456e-06, "loss": 0.6323, "step": 10780 }, { "epoch": 0.12889919774267986, "grad_norm": 1.4382425546646118, "learning_rate": 9.745795200115053e-06, "loss": 0.6903, "step": 10781 }, { "epoch": 0.1289111538875405, "grad_norm": 2.647271156311035, "learning_rate": 9.745734246479209e-06, "loss": 0.6811, "step": 10782 }, { "epoch": 0.12892311003240114, "grad_norm": 2.5920217037200928, "learning_rate": 9.74567328572712e-06, "loss": 0.6686, "step": 10783 }, { "epoch": 0.1289350661772618, "grad_norm": 1.5954385995864868, "learning_rate": 9.745612317858876e-06, "loss": 0.6784, "step": 10784 }, { "epoch": 0.12894702232212246, "grad_norm": 4.610267162322998, "learning_rate": 9.745551342874571e-06, "loss": 0.6956, "step": 10785 }, { "epoch": 0.1289589784669831, "grad_norm": 3.269594192504883, "learning_rate": 9.745490360774295e-06, "loss": 0.6278, "step": 10786 }, { "epoch": 0.12897093461184375, "grad_norm": 1.6836884021759033, "learning_rate": 9.745429371558139e-06, "loss": 0.7041, "step": 10787 }, { "epoch": 0.1289828907567044, "grad_norm": 2.228140354156494, "learning_rate": 9.745368375226194e-06, "loss": 0.6903, "step": 10788 }, { "epoch": 0.12899484690156507, "grad_norm": 2.4261016845703125, "learning_rate": 9.745307371778553e-06, "loss": 0.628, "step": 10789 }, { "epoch": 0.1290068030464257, "grad_norm": 1.9669277667999268, "learning_rate": 9.745246361215307e-06, "loss": 0.5688, "step": 10790 }, { "epoch": 0.12901875919128636, "grad_norm": 2.057810068130493, "learning_rate": 9.745185343536546e-06, "loss": 0.6242, "step": 10791 }, { "epoch": 0.12903071533614702, "grad_norm": 2.5711565017700195, "learning_rate": 9.745124318742364e-06, "loss": 0.5782, "step": 10792 }, { "epoch": 0.12904267148100768, "grad_norm": 2.1078310012817383, "learning_rate": 9.74506328683285e-06, "loss": 0.6144, "step": 10793 }, { "epoch": 0.1290546276258683, "grad_norm": 2.5002028942108154, "learning_rate": 9.745002247808097e-06, "loss": 0.6921, "step": 10794 }, { "epoch": 0.12906658377072897, "grad_norm": 6.384228706359863, "learning_rate": 9.744941201668198e-06, "loss": 0.6358, "step": 10795 }, { "epoch": 0.12907853991558962, "grad_norm": 3.6248812675476074, "learning_rate": 9.744880148413241e-06, "loss": 0.5485, "step": 10796 }, { "epoch": 0.12909049606045025, "grad_norm": 1.8620195388793945, "learning_rate": 9.74481908804332e-06, "loss": 0.6694, "step": 10797 }, { "epoch": 0.1291024522053109, "grad_norm": 3.5742263793945312, "learning_rate": 9.744758020558525e-06, "loss": 0.6227, "step": 10798 }, { "epoch": 0.12911440835017157, "grad_norm": 2.634983539581299, "learning_rate": 9.74469694595895e-06, "loss": 0.6503, "step": 10799 }, { "epoch": 0.12912636449503223, "grad_norm": 2.0026535987854004, "learning_rate": 9.744635864244684e-06, "loss": 0.6154, "step": 10800 }, { "epoch": 0.12913832063989286, "grad_norm": 1.764339566230774, "learning_rate": 9.74457477541582e-06, "loss": 0.6087, "step": 10801 }, { "epoch": 0.12915027678475352, "grad_norm": 2.299105167388916, "learning_rate": 9.74451367947245e-06, "loss": 0.576, "step": 10802 }, { "epoch": 0.12916223292961418, "grad_norm": 2.341822624206543, "learning_rate": 9.744452576414664e-06, "loss": 0.7175, "step": 10803 }, { "epoch": 0.12917418907447484, "grad_norm": 1.5136522054672241, "learning_rate": 9.744391466242555e-06, "loss": 0.5734, "step": 10804 }, { "epoch": 0.12918614521933547, "grad_norm": 1.9489223957061768, "learning_rate": 9.744330348956212e-06, "loss": 0.7112, "step": 10805 }, { "epoch": 0.12919810136419613, "grad_norm": 2.5916473865509033, "learning_rate": 9.744269224555732e-06, "loss": 0.6825, "step": 10806 }, { "epoch": 0.12921005750905679, "grad_norm": 3.1871888637542725, "learning_rate": 9.744208093041202e-06, "loss": 0.7426, "step": 10807 }, { "epoch": 0.12922201365391744, "grad_norm": 2.3323090076446533, "learning_rate": 9.744146954412715e-06, "loss": 0.5656, "step": 10808 }, { "epoch": 0.12923396979877808, "grad_norm": 4.843266010284424, "learning_rate": 9.744085808670362e-06, "loss": 0.6264, "step": 10809 }, { "epoch": 0.12924592594363873, "grad_norm": 1.612907886505127, "learning_rate": 9.744024655814237e-06, "loss": 0.6614, "step": 10810 }, { "epoch": 0.1292578820884994, "grad_norm": 1.4262257814407349, "learning_rate": 9.743963495844428e-06, "loss": 0.5549, "step": 10811 }, { "epoch": 0.12926983823336002, "grad_norm": 2.2245664596557617, "learning_rate": 9.743902328761031e-06, "loss": 0.7058, "step": 10812 }, { "epoch": 0.12928179437822068, "grad_norm": 2.009894847869873, "learning_rate": 9.743841154564135e-06, "loss": 0.611, "step": 10813 }, { "epoch": 0.12929375052308134, "grad_norm": 8.514265060424805, "learning_rate": 9.743779973253834e-06, "loss": 0.6424, "step": 10814 }, { "epoch": 0.129305706667942, "grad_norm": 2.5112576484680176, "learning_rate": 9.743718784830214e-06, "loss": 0.5219, "step": 10815 }, { "epoch": 0.12931766281280263, "grad_norm": 3.8248233795166016, "learning_rate": 9.743657589293373e-06, "loss": 0.6171, "step": 10816 }, { "epoch": 0.1293296189576633, "grad_norm": 2.41286563873291, "learning_rate": 9.743596386643402e-06, "loss": 0.6776, "step": 10817 }, { "epoch": 0.12934157510252395, "grad_norm": 1.519614815711975, "learning_rate": 9.74353517688039e-06, "loss": 0.5903, "step": 10818 }, { "epoch": 0.1293535312473846, "grad_norm": 3.281006336212158, "learning_rate": 9.74347396000443e-06, "loss": 0.678, "step": 10819 }, { "epoch": 0.12936548739224524, "grad_norm": 1.943128228187561, "learning_rate": 9.743412736015614e-06, "loss": 0.5649, "step": 10820 }, { "epoch": 0.1293774435371059, "grad_norm": 3.9865305423736572, "learning_rate": 9.743351504914034e-06, "loss": 0.6439, "step": 10821 }, { "epoch": 0.12938939968196655, "grad_norm": 2.0047190189361572, "learning_rate": 9.74329026669978e-06, "loss": 0.7419, "step": 10822 }, { "epoch": 0.12940135582682719, "grad_norm": 1.8444712162017822, "learning_rate": 9.743229021372948e-06, "loss": 0.6924, "step": 10823 }, { "epoch": 0.12941331197168784, "grad_norm": 1.9102320671081543, "learning_rate": 9.743167768933626e-06, "loss": 0.6831, "step": 10824 }, { "epoch": 0.1294252681165485, "grad_norm": 1.4795700311660767, "learning_rate": 9.743106509381907e-06, "loss": 0.6134, "step": 10825 }, { "epoch": 0.12943722426140916, "grad_norm": 2.3324527740478516, "learning_rate": 9.743045242717883e-06, "loss": 0.5229, "step": 10826 }, { "epoch": 0.1294491804062698, "grad_norm": 1.4344053268432617, "learning_rate": 9.742983968941646e-06, "loss": 0.6353, "step": 10827 }, { "epoch": 0.12946113655113045, "grad_norm": 2.469487190246582, "learning_rate": 9.742922688053287e-06, "loss": 0.6654, "step": 10828 }, { "epoch": 0.1294730926959911, "grad_norm": 2.001683235168457, "learning_rate": 9.7428614000529e-06, "loss": 0.6751, "step": 10829 }, { "epoch": 0.12948504884085177, "grad_norm": 2.0514698028564453, "learning_rate": 9.742800104940575e-06, "loss": 0.6052, "step": 10830 }, { "epoch": 0.1294970049857124, "grad_norm": 1.8552818298339844, "learning_rate": 9.742738802716403e-06, "loss": 0.6726, "step": 10831 }, { "epoch": 0.12950896113057306, "grad_norm": 2.0289690494537354, "learning_rate": 9.742677493380479e-06, "loss": 0.5896, "step": 10832 }, { "epoch": 0.12952091727543372, "grad_norm": 2.2668051719665527, "learning_rate": 9.742616176932893e-06, "loss": 0.5998, "step": 10833 }, { "epoch": 0.12953287342029435, "grad_norm": 1.8241593837738037, "learning_rate": 9.742554853373736e-06, "loss": 0.6572, "step": 10834 }, { "epoch": 0.129544829565155, "grad_norm": 1.7252883911132812, "learning_rate": 9.742493522703103e-06, "loss": 0.6487, "step": 10835 }, { "epoch": 0.12955678571001567, "grad_norm": 1.8387507200241089, "learning_rate": 9.742432184921083e-06, "loss": 0.6196, "step": 10836 }, { "epoch": 0.12956874185487632, "grad_norm": 1.387923240661621, "learning_rate": 9.742370840027768e-06, "loss": 0.6468, "step": 10837 }, { "epoch": 0.12958069799973695, "grad_norm": 1.3982433080673218, "learning_rate": 9.742309488023251e-06, "loss": 0.6102, "step": 10838 }, { "epoch": 0.1295926541445976, "grad_norm": 3.0036733150482178, "learning_rate": 9.742248128907626e-06, "loss": 0.7472, "step": 10839 }, { "epoch": 0.12960461028945827, "grad_norm": 4.752432823181152, "learning_rate": 9.742186762680983e-06, "loss": 0.6851, "step": 10840 }, { "epoch": 0.12961656643431893, "grad_norm": 2.65255069732666, "learning_rate": 9.742125389343413e-06, "loss": 0.6508, "step": 10841 }, { "epoch": 0.12962852257917956, "grad_norm": 1.7192100286483765, "learning_rate": 9.742064008895009e-06, "loss": 0.7341, "step": 10842 }, { "epoch": 0.12964047872404022, "grad_norm": 3.2475414276123047, "learning_rate": 9.742002621335865e-06, "loss": 0.6069, "step": 10843 }, { "epoch": 0.12965243486890088, "grad_norm": 2.0343241691589355, "learning_rate": 9.741941226666069e-06, "loss": 0.706, "step": 10844 }, { "epoch": 0.1296643910137615, "grad_norm": 3.042978286743164, "learning_rate": 9.741879824885716e-06, "loss": 0.7216, "step": 10845 }, { "epoch": 0.12967634715862217, "grad_norm": 2.1485037803649902, "learning_rate": 9.741818415994897e-06, "loss": 0.6619, "step": 10846 }, { "epoch": 0.12968830330348283, "grad_norm": 1.969496726989746, "learning_rate": 9.741756999993706e-06, "loss": 0.5986, "step": 10847 }, { "epoch": 0.12970025944834349, "grad_norm": 2.249037265777588, "learning_rate": 9.741695576882231e-06, "loss": 0.7022, "step": 10848 }, { "epoch": 0.12971221559320412, "grad_norm": 3.785463571548462, "learning_rate": 9.741634146660568e-06, "loss": 0.6277, "step": 10849 }, { "epoch": 0.12972417173806478, "grad_norm": 2.079209089279175, "learning_rate": 9.741572709328809e-06, "loss": 0.5974, "step": 10850 }, { "epoch": 0.12973612788292543, "grad_norm": 2.0413122177124023, "learning_rate": 9.741511264887042e-06, "loss": 0.5469, "step": 10851 }, { "epoch": 0.1297480840277861, "grad_norm": 4.348143577575684, "learning_rate": 9.741449813335365e-06, "loss": 0.733, "step": 10852 }, { "epoch": 0.12976004017264672, "grad_norm": 1.8424137830734253, "learning_rate": 9.741388354673866e-06, "loss": 0.5414, "step": 10853 }, { "epoch": 0.12977199631750738, "grad_norm": 3.5980441570281982, "learning_rate": 9.741326888902638e-06, "loss": 0.7246, "step": 10854 }, { "epoch": 0.12978395246236804, "grad_norm": 3.9045872688293457, "learning_rate": 9.741265416021772e-06, "loss": 0.693, "step": 10855 }, { "epoch": 0.12979590860722867, "grad_norm": 2.038332462310791, "learning_rate": 9.741203936031366e-06, "loss": 0.5145, "step": 10856 }, { "epoch": 0.12980786475208933, "grad_norm": 1.9294981956481934, "learning_rate": 9.741142448931503e-06, "loss": 0.7145, "step": 10857 }, { "epoch": 0.12981982089695, "grad_norm": 7.222982406616211, "learning_rate": 9.741080954722281e-06, "loss": 0.6633, "step": 10858 }, { "epoch": 0.12983177704181065, "grad_norm": 1.8218052387237549, "learning_rate": 9.741019453403793e-06, "loss": 0.5621, "step": 10859 }, { "epoch": 0.12984373318667128, "grad_norm": 2.9821865558624268, "learning_rate": 9.74095794497613e-06, "loss": 0.6761, "step": 10860 }, { "epoch": 0.12985568933153194, "grad_norm": 2.068222761154175, "learning_rate": 9.740896429439381e-06, "loss": 0.6083, "step": 10861 }, { "epoch": 0.1298676454763926, "grad_norm": 35.035457611083984, "learning_rate": 9.740834906793644e-06, "loss": 0.5806, "step": 10862 }, { "epoch": 0.12987960162125325, "grad_norm": 1.2088383436203003, "learning_rate": 9.740773377039006e-06, "loss": 0.5499, "step": 10863 }, { "epoch": 0.12989155776611389, "grad_norm": 3.755887746810913, "learning_rate": 9.740711840175563e-06, "loss": 0.651, "step": 10864 }, { "epoch": 0.12990351391097454, "grad_norm": 3.793349266052246, "learning_rate": 9.740650296203405e-06, "loss": 0.7227, "step": 10865 }, { "epoch": 0.1299154700558352, "grad_norm": 1.7928928136825562, "learning_rate": 9.740588745122624e-06, "loss": 0.6893, "step": 10866 }, { "epoch": 0.12992742620069586, "grad_norm": 2.152623176574707, "learning_rate": 9.740527186933315e-06, "loss": 0.6029, "step": 10867 }, { "epoch": 0.1299393823455565, "grad_norm": 1.3460596799850464, "learning_rate": 9.74046562163557e-06, "loss": 0.4994, "step": 10868 }, { "epoch": 0.12995133849041715, "grad_norm": 2.8730828762054443, "learning_rate": 9.740404049229475e-06, "loss": 0.6358, "step": 10869 }, { "epoch": 0.1299632946352778, "grad_norm": 1.8078715801239014, "learning_rate": 9.740342469715132e-06, "loss": 0.7655, "step": 10870 }, { "epoch": 0.12997525078013844, "grad_norm": 1.6561280488967896, "learning_rate": 9.740280883092625e-06, "loss": 0.5424, "step": 10871 }, { "epoch": 0.1299872069249991, "grad_norm": 2.172760009765625, "learning_rate": 9.740219289362051e-06, "loss": 0.5663, "step": 10872 }, { "epoch": 0.12999916306985976, "grad_norm": 2.9119489192962646, "learning_rate": 9.740157688523504e-06, "loss": 0.649, "step": 10873 }, { "epoch": 0.13001111921472042, "grad_norm": 3.888061285018921, "learning_rate": 9.740096080577071e-06, "loss": 0.672, "step": 10874 }, { "epoch": 0.13002307535958105, "grad_norm": 2.283522844314575, "learning_rate": 9.740034465522848e-06, "loss": 0.6154, "step": 10875 }, { "epoch": 0.1300350315044417, "grad_norm": 2.244656562805176, "learning_rate": 9.739972843360926e-06, "loss": 0.6467, "step": 10876 }, { "epoch": 0.13004698764930236, "grad_norm": 2.476680040359497, "learning_rate": 9.739911214091399e-06, "loss": 0.5448, "step": 10877 }, { "epoch": 0.13005894379416302, "grad_norm": 2.9441142082214355, "learning_rate": 9.739849577714358e-06, "loss": 0.647, "step": 10878 }, { "epoch": 0.13007089993902365, "grad_norm": 2.3589072227478027, "learning_rate": 9.739787934229894e-06, "loss": 0.7004, "step": 10879 }, { "epoch": 0.1300828560838843, "grad_norm": 5.120180606842041, "learning_rate": 9.739726283638102e-06, "loss": 0.6621, "step": 10880 }, { "epoch": 0.13009481222874497, "grad_norm": 2.065859079360962, "learning_rate": 9.739664625939075e-06, "loss": 0.6271, "step": 10881 }, { "epoch": 0.1301067683736056, "grad_norm": 1.7779723405838013, "learning_rate": 9.739602961132903e-06, "loss": 0.6092, "step": 10882 }, { "epoch": 0.13011872451846626, "grad_norm": 1.654732584953308, "learning_rate": 9.739541289219679e-06, "loss": 0.5657, "step": 10883 }, { "epoch": 0.13013068066332692, "grad_norm": 2.558116912841797, "learning_rate": 9.739479610199496e-06, "loss": 0.6484, "step": 10884 }, { "epoch": 0.13014263680818758, "grad_norm": 1.4437313079833984, "learning_rate": 9.739417924072447e-06, "loss": 0.5515, "step": 10885 }, { "epoch": 0.1301545929530482, "grad_norm": 1.741341233253479, "learning_rate": 9.739356230838625e-06, "loss": 0.5754, "step": 10886 }, { "epoch": 0.13016654909790887, "grad_norm": 1.9058682918548584, "learning_rate": 9.739294530498121e-06, "loss": 0.5846, "step": 10887 }, { "epoch": 0.13017850524276953, "grad_norm": 2.9233295917510986, "learning_rate": 9.739232823051026e-06, "loss": 0.6822, "step": 10888 }, { "epoch": 0.13019046138763019, "grad_norm": 1.8511337041854858, "learning_rate": 9.739171108497437e-06, "loss": 0.643, "step": 10889 }, { "epoch": 0.13020241753249082, "grad_norm": 2.429535388946533, "learning_rate": 9.739109386837442e-06, "loss": 0.6554, "step": 10890 }, { "epoch": 0.13021437367735147, "grad_norm": 1.5297791957855225, "learning_rate": 9.739047658071137e-06, "loss": 0.6382, "step": 10891 }, { "epoch": 0.13022632982221213, "grad_norm": 2.0901131629943848, "learning_rate": 9.738985922198612e-06, "loss": 0.6681, "step": 10892 }, { "epoch": 0.13023828596707276, "grad_norm": 2.120504856109619, "learning_rate": 9.738924179219962e-06, "loss": 0.6079, "step": 10893 }, { "epoch": 0.13025024211193342, "grad_norm": 1.7680610418319702, "learning_rate": 9.738862429135279e-06, "loss": 0.5995, "step": 10894 }, { "epoch": 0.13026219825679408, "grad_norm": 20.151254653930664, "learning_rate": 9.738800671944653e-06, "loss": 0.6456, "step": 10895 }, { "epoch": 0.13027415440165474, "grad_norm": 3.4038898944854736, "learning_rate": 9.73873890764818e-06, "loss": 0.5757, "step": 10896 }, { "epoch": 0.13028611054651537, "grad_norm": 5.040280818939209, "learning_rate": 9.73867713624595e-06, "loss": 0.6226, "step": 10897 }, { "epoch": 0.13029806669137603, "grad_norm": 6.386504173278809, "learning_rate": 9.738615357738058e-06, "loss": 0.7037, "step": 10898 }, { "epoch": 0.1303100228362367, "grad_norm": 2.2377116680145264, "learning_rate": 9.738553572124594e-06, "loss": 0.5751, "step": 10899 }, { "epoch": 0.13032197898109735, "grad_norm": 1.7428044080734253, "learning_rate": 9.738491779405652e-06, "loss": 0.7684, "step": 10900 }, { "epoch": 0.13033393512595798, "grad_norm": 2.435892343521118, "learning_rate": 9.738429979581327e-06, "loss": 0.6157, "step": 10901 }, { "epoch": 0.13034589127081864, "grad_norm": 1.8967944383621216, "learning_rate": 9.738368172651709e-06, "loss": 0.5718, "step": 10902 }, { "epoch": 0.1303578474156793, "grad_norm": 3.017778158187866, "learning_rate": 9.73830635861689e-06, "loss": 0.5101, "step": 10903 }, { "epoch": 0.13036980356053993, "grad_norm": 2.9057514667510986, "learning_rate": 9.738244537476964e-06, "loss": 0.6152, "step": 10904 }, { "epoch": 0.13038175970540059, "grad_norm": 3.1553871631622314, "learning_rate": 9.738182709232022e-06, "loss": 0.5678, "step": 10905 }, { "epoch": 0.13039371585026124, "grad_norm": 2.2275354862213135, "learning_rate": 9.73812087388216e-06, "loss": 0.614, "step": 10906 }, { "epoch": 0.1304056719951219, "grad_norm": 2.247955799102783, "learning_rate": 9.738059031427469e-06, "loss": 0.783, "step": 10907 }, { "epoch": 0.13041762813998253, "grad_norm": 1.7862651348114014, "learning_rate": 9.73799718186804e-06, "loss": 0.5786, "step": 10908 }, { "epoch": 0.1304295842848432, "grad_norm": 3.2262253761291504, "learning_rate": 9.73793532520397e-06, "loss": 0.5545, "step": 10909 }, { "epoch": 0.13044154042970385, "grad_norm": 1.7306995391845703, "learning_rate": 9.737873461435348e-06, "loss": 0.5286, "step": 10910 }, { "epoch": 0.1304534965745645, "grad_norm": 2.9520986080169678, "learning_rate": 9.737811590562267e-06, "loss": 0.6521, "step": 10911 }, { "epoch": 0.13046545271942514, "grad_norm": 10.932717323303223, "learning_rate": 9.737749712584821e-06, "loss": 0.6374, "step": 10912 }, { "epoch": 0.1304774088642858, "grad_norm": 2.495159149169922, "learning_rate": 9.737687827503101e-06, "loss": 0.6246, "step": 10913 }, { "epoch": 0.13048936500914646, "grad_norm": 1.8366174697875977, "learning_rate": 9.737625935317204e-06, "loss": 0.6457, "step": 10914 }, { "epoch": 0.1305013211540071, "grad_norm": 3.2829089164733887, "learning_rate": 9.73756403602722e-06, "loss": 0.6808, "step": 10915 }, { "epoch": 0.13051327729886775, "grad_norm": 2.4545695781707764, "learning_rate": 9.737502129633241e-06, "loss": 0.6101, "step": 10916 }, { "epoch": 0.1305252334437284, "grad_norm": 2.858919620513916, "learning_rate": 9.73744021613536e-06, "loss": 0.6958, "step": 10917 }, { "epoch": 0.13053718958858906, "grad_norm": 1.9492610692977905, "learning_rate": 9.73737829553367e-06, "loss": 0.6562, "step": 10918 }, { "epoch": 0.1305491457334497, "grad_norm": 25.414485931396484, "learning_rate": 9.737316367828267e-06, "loss": 0.6774, "step": 10919 }, { "epoch": 0.13056110187831035, "grad_norm": 1.7833219766616821, "learning_rate": 9.737254433019238e-06, "loss": 0.584, "step": 10920 }, { "epoch": 0.130573058023171, "grad_norm": 3.3500373363494873, "learning_rate": 9.737192491106681e-06, "loss": 0.6271, "step": 10921 }, { "epoch": 0.13058501416803167, "grad_norm": 2.136866807937622, "learning_rate": 9.737130542090689e-06, "loss": 0.6453, "step": 10922 }, { "epoch": 0.1305969703128923, "grad_norm": 3.9403204917907715, "learning_rate": 9.73706858597135e-06, "loss": 0.5722, "step": 10923 }, { "epoch": 0.13060892645775296, "grad_norm": 3.438832998275757, "learning_rate": 9.737006622748762e-06, "loss": 0.6828, "step": 10924 }, { "epoch": 0.13062088260261362, "grad_norm": 1.5164037942886353, "learning_rate": 9.736944652423013e-06, "loss": 0.6042, "step": 10925 }, { "epoch": 0.13063283874747428, "grad_norm": 2.2114369869232178, "learning_rate": 9.736882674994202e-06, "loss": 0.6413, "step": 10926 }, { "epoch": 0.1306447948923349, "grad_norm": 2.8326025009155273, "learning_rate": 9.736820690462416e-06, "loss": 0.6684, "step": 10927 }, { "epoch": 0.13065675103719557, "grad_norm": 1.6981217861175537, "learning_rate": 9.736758698827752e-06, "loss": 0.5269, "step": 10928 }, { "epoch": 0.13066870718205623, "grad_norm": 1.3189202547073364, "learning_rate": 9.7366967000903e-06, "loss": 0.5235, "step": 10929 }, { "epoch": 0.13068066332691686, "grad_norm": 1.844080924987793, "learning_rate": 9.736634694250154e-06, "loss": 0.7026, "step": 10930 }, { "epoch": 0.13069261947177752, "grad_norm": 2.536524534225464, "learning_rate": 9.73657268130741e-06, "loss": 0.6574, "step": 10931 }, { "epoch": 0.13070457561663817, "grad_norm": 2.1169025897979736, "learning_rate": 9.736510661262157e-06, "loss": 0.691, "step": 10932 }, { "epoch": 0.13071653176149883, "grad_norm": 2.5295205116271973, "learning_rate": 9.736448634114489e-06, "loss": 0.6149, "step": 10933 }, { "epoch": 0.13072848790635946, "grad_norm": 1.7917746305465698, "learning_rate": 9.7363865998645e-06, "loss": 0.5975, "step": 10934 }, { "epoch": 0.13074044405122012, "grad_norm": 1.5884790420532227, "learning_rate": 9.736324558512283e-06, "loss": 0.6233, "step": 10935 }, { "epoch": 0.13075240019608078, "grad_norm": 2.3004658222198486, "learning_rate": 9.73626251005793e-06, "loss": 0.613, "step": 10936 }, { "epoch": 0.13076435634094144, "grad_norm": 1.085532784461975, "learning_rate": 9.736200454501534e-06, "loss": 0.6402, "step": 10937 }, { "epoch": 0.13077631248580207, "grad_norm": 4.348695755004883, "learning_rate": 9.736138391843188e-06, "loss": 0.5768, "step": 10938 }, { "epoch": 0.13078826863066273, "grad_norm": 1.54825758934021, "learning_rate": 9.736076322082988e-06, "loss": 0.5807, "step": 10939 }, { "epoch": 0.1308002247755234, "grad_norm": 2.219569206237793, "learning_rate": 9.736014245221023e-06, "loss": 0.6013, "step": 10940 }, { "epoch": 0.13081218092038402, "grad_norm": 1.538943886756897, "learning_rate": 9.735952161257387e-06, "loss": 0.6109, "step": 10941 }, { "epoch": 0.13082413706524468, "grad_norm": 1.9657518863677979, "learning_rate": 9.735890070192176e-06, "loss": 0.6423, "step": 10942 }, { "epoch": 0.13083609321010534, "grad_norm": 2.9332690238952637, "learning_rate": 9.73582797202548e-06, "loss": 0.6626, "step": 10943 }, { "epoch": 0.130848049354966, "grad_norm": 2.975848913192749, "learning_rate": 9.735765866757394e-06, "loss": 0.6972, "step": 10944 }, { "epoch": 0.13086000549982663, "grad_norm": 2.1431756019592285, "learning_rate": 9.735703754388008e-06, "loss": 0.5873, "step": 10945 }, { "epoch": 0.13087196164468728, "grad_norm": 1.6446640491485596, "learning_rate": 9.73564163491742e-06, "loss": 0.6327, "step": 10946 }, { "epoch": 0.13088391778954794, "grad_norm": 12.239595413208008, "learning_rate": 9.735579508345718e-06, "loss": 0.6632, "step": 10947 }, { "epoch": 0.1308958739344086, "grad_norm": 2.016218662261963, "learning_rate": 9.735517374673e-06, "loss": 0.6143, "step": 10948 }, { "epoch": 0.13090783007926923, "grad_norm": 1.7404673099517822, "learning_rate": 9.735455233899355e-06, "loss": 0.6454, "step": 10949 }, { "epoch": 0.1309197862241299, "grad_norm": 1.9292166233062744, "learning_rate": 9.73539308602488e-06, "loss": 0.616, "step": 10950 }, { "epoch": 0.13093174236899055, "grad_norm": 2.6539411544799805, "learning_rate": 9.735330931049664e-06, "loss": 0.6214, "step": 10951 }, { "epoch": 0.13094369851385118, "grad_norm": 4.3664374351501465, "learning_rate": 9.735268768973805e-06, "loss": 0.6471, "step": 10952 }, { "epoch": 0.13095565465871184, "grad_norm": 4.896920680999756, "learning_rate": 9.735206599797392e-06, "loss": 0.6043, "step": 10953 }, { "epoch": 0.1309676108035725, "grad_norm": 4.337604522705078, "learning_rate": 9.735144423520519e-06, "loss": 0.5595, "step": 10954 }, { "epoch": 0.13097956694843316, "grad_norm": 1.3706903457641602, "learning_rate": 9.735082240143283e-06, "loss": 0.5253, "step": 10955 }, { "epoch": 0.1309915230932938, "grad_norm": 1.973991870880127, "learning_rate": 9.735020049665772e-06, "loss": 0.6035, "step": 10956 }, { "epoch": 0.13100347923815445, "grad_norm": 2.7562437057495117, "learning_rate": 9.734957852088082e-06, "loss": 0.4982, "step": 10957 }, { "epoch": 0.1310154353830151, "grad_norm": 2.628206253051758, "learning_rate": 9.734895647410306e-06, "loss": 0.6543, "step": 10958 }, { "epoch": 0.13102739152787576, "grad_norm": 3.1460299491882324, "learning_rate": 9.734833435632539e-06, "loss": 0.6404, "step": 10959 }, { "epoch": 0.1310393476727364, "grad_norm": 1.965890884399414, "learning_rate": 9.73477121675487e-06, "loss": 0.5608, "step": 10960 }, { "epoch": 0.13105130381759705, "grad_norm": 3.398892879486084, "learning_rate": 9.734708990777395e-06, "loss": 0.6462, "step": 10961 }, { "epoch": 0.1310632599624577, "grad_norm": 2.66432523727417, "learning_rate": 9.734646757700208e-06, "loss": 0.5712, "step": 10962 }, { "epoch": 0.13107521610731834, "grad_norm": 2.4002881050109863, "learning_rate": 9.7345845175234e-06, "loss": 0.6495, "step": 10963 }, { "epoch": 0.131087172252179, "grad_norm": 2.4191975593566895, "learning_rate": 9.734522270247066e-06, "loss": 0.6671, "step": 10964 }, { "epoch": 0.13109912839703966, "grad_norm": 1.206496000289917, "learning_rate": 9.7344600158713e-06, "loss": 0.4943, "step": 10965 }, { "epoch": 0.13111108454190032, "grad_norm": 3.435952663421631, "learning_rate": 9.734397754396194e-06, "loss": 0.6648, "step": 10966 }, { "epoch": 0.13112304068676095, "grad_norm": 2.0461585521698, "learning_rate": 9.734335485821842e-06, "loss": 0.602, "step": 10967 }, { "epoch": 0.1311349968316216, "grad_norm": 1.8092502355575562, "learning_rate": 9.734273210148336e-06, "loss": 0.5793, "step": 10968 }, { "epoch": 0.13114695297648227, "grad_norm": 2.9005978107452393, "learning_rate": 9.734210927375772e-06, "loss": 0.5689, "step": 10969 }, { "epoch": 0.13115890912134293, "grad_norm": 1.3371461629867554, "learning_rate": 9.734148637504242e-06, "loss": 0.5706, "step": 10970 }, { "epoch": 0.13117086526620356, "grad_norm": 2.5981128215789795, "learning_rate": 9.734086340533837e-06, "loss": 0.5478, "step": 10971 }, { "epoch": 0.13118282141106422, "grad_norm": 3.7104380130767822, "learning_rate": 9.734024036464654e-06, "loss": 0.5846, "step": 10972 }, { "epoch": 0.13119477755592487, "grad_norm": 1.7297817468643188, "learning_rate": 9.733961725296786e-06, "loss": 0.6399, "step": 10973 }, { "epoch": 0.1312067337007855, "grad_norm": 1.693207859992981, "learning_rate": 9.733899407030325e-06, "loss": 0.6612, "step": 10974 }, { "epoch": 0.13121868984564616, "grad_norm": 2.306123971939087, "learning_rate": 9.733837081665365e-06, "loss": 0.7065, "step": 10975 }, { "epoch": 0.13123064599050682, "grad_norm": 1.9855194091796875, "learning_rate": 9.733774749202e-06, "loss": 0.6132, "step": 10976 }, { "epoch": 0.13124260213536748, "grad_norm": 1.9559762477874756, "learning_rate": 9.733712409640321e-06, "loss": 0.5497, "step": 10977 }, { "epoch": 0.1312545582802281, "grad_norm": 2.2270219326019287, "learning_rate": 9.733650062980425e-06, "loss": 0.67, "step": 10978 }, { "epoch": 0.13126651442508877, "grad_norm": 3.8823890686035156, "learning_rate": 9.733587709222403e-06, "loss": 0.684, "step": 10979 }, { "epoch": 0.13127847056994943, "grad_norm": 5.467769622802734, "learning_rate": 9.733525348366351e-06, "loss": 0.6972, "step": 10980 }, { "epoch": 0.1312904267148101, "grad_norm": 1.8779183626174927, "learning_rate": 9.733462980412358e-06, "loss": 0.6592, "step": 10981 }, { "epoch": 0.13130238285967072, "grad_norm": 2.08913516998291, "learning_rate": 9.733400605360523e-06, "loss": 0.7198, "step": 10982 }, { "epoch": 0.13131433900453138, "grad_norm": 1.6281424760818481, "learning_rate": 9.733338223210937e-06, "loss": 0.6609, "step": 10983 }, { "epoch": 0.13132629514939204, "grad_norm": 2.0760679244995117, "learning_rate": 9.733275833963692e-06, "loss": 0.631, "step": 10984 }, { "epoch": 0.1313382512942527, "grad_norm": 2.0554497241973877, "learning_rate": 9.733213437618885e-06, "loss": 0.5712, "step": 10985 }, { "epoch": 0.13135020743911333, "grad_norm": 2.001600503921509, "learning_rate": 9.733151034176605e-06, "loss": 0.574, "step": 10986 }, { "epoch": 0.13136216358397398, "grad_norm": 1.9414007663726807, "learning_rate": 9.733088623636951e-06, "loss": 0.5839, "step": 10987 }, { "epoch": 0.13137411972883464, "grad_norm": 4.8392510414123535, "learning_rate": 9.733026206000011e-06, "loss": 0.6727, "step": 10988 }, { "epoch": 0.13138607587369527, "grad_norm": 2.1441941261291504, "learning_rate": 9.732963781265882e-06, "loss": 0.6045, "step": 10989 }, { "epoch": 0.13139803201855593, "grad_norm": 4.122856616973877, "learning_rate": 9.732901349434661e-06, "loss": 0.6188, "step": 10990 }, { "epoch": 0.1314099881634166, "grad_norm": 5.430914402008057, "learning_rate": 9.732838910506432e-06, "loss": 0.6783, "step": 10991 }, { "epoch": 0.13142194430827725, "grad_norm": 2.180816888809204, "learning_rate": 9.732776464481296e-06, "loss": 0.7401, "step": 10992 }, { "epoch": 0.13143390045313788, "grad_norm": 4.721336364746094, "learning_rate": 9.732714011359348e-06, "loss": 0.6855, "step": 10993 }, { "epoch": 0.13144585659799854, "grad_norm": 2.0347561836242676, "learning_rate": 9.732651551140675e-06, "loss": 0.7018, "step": 10994 }, { "epoch": 0.1314578127428592, "grad_norm": 1.8720203638076782, "learning_rate": 9.732589083825376e-06, "loss": 0.6349, "step": 10995 }, { "epoch": 0.13146976888771986, "grad_norm": 1.9303683042526245, "learning_rate": 9.732526609413543e-06, "loss": 0.6743, "step": 10996 }, { "epoch": 0.1314817250325805, "grad_norm": 1.5568407773971558, "learning_rate": 9.732464127905269e-06, "loss": 0.5745, "step": 10997 }, { "epoch": 0.13149368117744115, "grad_norm": 3.123119831085205, "learning_rate": 9.732401639300646e-06, "loss": 0.6427, "step": 10998 }, { "epoch": 0.1315056373223018, "grad_norm": 2.6999871730804443, "learning_rate": 9.732339143599772e-06, "loss": 0.5529, "step": 10999 }, { "epoch": 0.13151759346716244, "grad_norm": 2.1353931427001953, "learning_rate": 9.732276640802738e-06, "loss": 0.6588, "step": 11000 }, { "epoch": 0.1315295496120231, "grad_norm": 3.2050466537475586, "learning_rate": 9.732214130909638e-06, "loss": 0.5677, "step": 11001 }, { "epoch": 0.13154150575688375, "grad_norm": 52.5003662109375, "learning_rate": 9.732151613920567e-06, "loss": 0.5807, "step": 11002 }, { "epoch": 0.1315534619017444, "grad_norm": 1.893639087677002, "learning_rate": 9.73208908983562e-06, "loss": 0.6011, "step": 11003 }, { "epoch": 0.13156541804660504, "grad_norm": 2.067277669906616, "learning_rate": 9.732026558654885e-06, "loss": 0.6066, "step": 11004 }, { "epoch": 0.1315773741914657, "grad_norm": 2.101613998413086, "learning_rate": 9.73196402037846e-06, "loss": 0.6772, "step": 11005 }, { "epoch": 0.13158933033632636, "grad_norm": 2.59812593460083, "learning_rate": 9.73190147500644e-06, "loss": 0.6726, "step": 11006 }, { "epoch": 0.13160128648118702, "grad_norm": 2.19071364402771, "learning_rate": 9.731838922538914e-06, "loss": 0.643, "step": 11007 }, { "epoch": 0.13161324262604765, "grad_norm": 2.548711061477661, "learning_rate": 9.731776362975981e-06, "loss": 0.6768, "step": 11008 }, { "epoch": 0.1316251987709083, "grad_norm": 2.1344521045684814, "learning_rate": 9.731713796317731e-06, "loss": 0.6243, "step": 11009 }, { "epoch": 0.13163715491576897, "grad_norm": 2.9235832691192627, "learning_rate": 9.731651222564261e-06, "loss": 0.539, "step": 11010 }, { "epoch": 0.1316491110606296, "grad_norm": 2.9879307746887207, "learning_rate": 9.731588641715663e-06, "loss": 0.6678, "step": 11011 }, { "epoch": 0.13166106720549026, "grad_norm": 1.4885005950927734, "learning_rate": 9.73152605377203e-06, "loss": 0.6354, "step": 11012 }, { "epoch": 0.13167302335035092, "grad_norm": 1.751492977142334, "learning_rate": 9.731463458733456e-06, "loss": 0.6279, "step": 11013 }, { "epoch": 0.13168497949521157, "grad_norm": 2.393454074859619, "learning_rate": 9.731400856600037e-06, "loss": 0.5846, "step": 11014 }, { "epoch": 0.1316969356400722, "grad_norm": 1.869242548942566, "learning_rate": 9.731338247371866e-06, "loss": 0.6462, "step": 11015 }, { "epoch": 0.13170889178493286, "grad_norm": 2.3940846920013428, "learning_rate": 9.731275631049037e-06, "loss": 0.6778, "step": 11016 }, { "epoch": 0.13172084792979352, "grad_norm": 8.482132911682129, "learning_rate": 9.731213007631643e-06, "loss": 0.7273, "step": 11017 }, { "epoch": 0.13173280407465418, "grad_norm": 1.5275020599365234, "learning_rate": 9.731150377119777e-06, "loss": 0.6551, "step": 11018 }, { "epoch": 0.1317447602195148, "grad_norm": 4.058250904083252, "learning_rate": 9.731087739513534e-06, "loss": 0.6067, "step": 11019 }, { "epoch": 0.13175671636437547, "grad_norm": 1.8374264240264893, "learning_rate": 9.73102509481301e-06, "loss": 0.5483, "step": 11020 }, { "epoch": 0.13176867250923613, "grad_norm": 1.8866713047027588, "learning_rate": 9.730962443018296e-06, "loss": 0.5615, "step": 11021 }, { "epoch": 0.13178062865409676, "grad_norm": 2.1471047401428223, "learning_rate": 9.730899784129488e-06, "loss": 0.6319, "step": 11022 }, { "epoch": 0.13179258479895742, "grad_norm": 1.6271638870239258, "learning_rate": 9.730837118146676e-06, "loss": 0.6402, "step": 11023 }, { "epoch": 0.13180454094381808, "grad_norm": 3.3170695304870605, "learning_rate": 9.730774445069961e-06, "loss": 0.6247, "step": 11024 }, { "epoch": 0.13181649708867874, "grad_norm": 1.6302422285079956, "learning_rate": 9.73071176489943e-06, "loss": 0.6152, "step": 11025 }, { "epoch": 0.13182845323353937, "grad_norm": 1.877415657043457, "learning_rate": 9.730649077635181e-06, "loss": 0.6431, "step": 11026 }, { "epoch": 0.13184040937840003, "grad_norm": 2.3656792640686035, "learning_rate": 9.730586383277307e-06, "loss": 0.6135, "step": 11027 }, { "epoch": 0.13185236552326068, "grad_norm": 1.7431025505065918, "learning_rate": 9.7305236818259e-06, "loss": 0.6003, "step": 11028 }, { "epoch": 0.13186432166812134, "grad_norm": 2.654823064804077, "learning_rate": 9.730460973281058e-06, "loss": 0.6122, "step": 11029 }, { "epoch": 0.13187627781298197, "grad_norm": 41.56131362915039, "learning_rate": 9.730398257642873e-06, "loss": 0.5756, "step": 11030 }, { "epoch": 0.13188823395784263, "grad_norm": 2.703920364379883, "learning_rate": 9.73033553491144e-06, "loss": 0.648, "step": 11031 }, { "epoch": 0.1319001901027033, "grad_norm": 3.4672062397003174, "learning_rate": 9.730272805086848e-06, "loss": 0.6828, "step": 11032 }, { "epoch": 0.13191214624756392, "grad_norm": 1.5284684896469116, "learning_rate": 9.7302100681692e-06, "loss": 0.5616, "step": 11033 }, { "epoch": 0.13192410239242458, "grad_norm": 2.6779887676239014, "learning_rate": 9.730147324158581e-06, "loss": 0.6396, "step": 11034 }, { "epoch": 0.13193605853728524, "grad_norm": 3.174511432647705, "learning_rate": 9.730084573055092e-06, "loss": 0.6307, "step": 11035 }, { "epoch": 0.1319480146821459, "grad_norm": 2.669668436050415, "learning_rate": 9.730021814858823e-06, "loss": 0.603, "step": 11036 }, { "epoch": 0.13195997082700653, "grad_norm": 1.6502493619918823, "learning_rate": 9.72995904956987e-06, "loss": 0.656, "step": 11037 }, { "epoch": 0.1319719269718672, "grad_norm": 1.794772982597351, "learning_rate": 9.729896277188327e-06, "loss": 0.5311, "step": 11038 }, { "epoch": 0.13198388311672785, "grad_norm": 2.6480493545532227, "learning_rate": 9.729833497714288e-06, "loss": 0.6905, "step": 11039 }, { "epoch": 0.1319958392615885, "grad_norm": 4.233588218688965, "learning_rate": 9.729770711147845e-06, "loss": 0.6521, "step": 11040 }, { "epoch": 0.13200779540644914, "grad_norm": 1.8528023958206177, "learning_rate": 9.729707917489095e-06, "loss": 0.6712, "step": 11041 }, { "epoch": 0.1320197515513098, "grad_norm": 1.6168365478515625, "learning_rate": 9.72964511673813e-06, "loss": 0.6781, "step": 11042 }, { "epoch": 0.13203170769617045, "grad_norm": 2.693791627883911, "learning_rate": 9.729582308895048e-06, "loss": 0.6998, "step": 11043 }, { "epoch": 0.1320436638410311, "grad_norm": 1.8774336576461792, "learning_rate": 9.729519493959939e-06, "loss": 0.6334, "step": 11044 }, { "epoch": 0.13205561998589174, "grad_norm": 1.9673670530319214, "learning_rate": 9.729456671932899e-06, "loss": 0.7595, "step": 11045 }, { "epoch": 0.1320675761307524, "grad_norm": 2.001833438873291, "learning_rate": 9.729393842814021e-06, "loss": 0.7118, "step": 11046 }, { "epoch": 0.13207953227561306, "grad_norm": 1.7144646644592285, "learning_rate": 9.729331006603401e-06, "loss": 0.5962, "step": 11047 }, { "epoch": 0.1320914884204737, "grad_norm": 1.423709511756897, "learning_rate": 9.729268163301133e-06, "loss": 0.6867, "step": 11048 }, { "epoch": 0.13210344456533435, "grad_norm": 2.430960178375244, "learning_rate": 9.72920531290731e-06, "loss": 0.6305, "step": 11049 }, { "epoch": 0.132115400710195, "grad_norm": 1.995318055152893, "learning_rate": 9.729142455422026e-06, "loss": 0.682, "step": 11050 }, { "epoch": 0.13212735685505567, "grad_norm": 2.425807476043701, "learning_rate": 9.729079590845377e-06, "loss": 0.538, "step": 11051 }, { "epoch": 0.1321393129999163, "grad_norm": 1.7783125638961792, "learning_rate": 9.729016719177456e-06, "loss": 0.5528, "step": 11052 }, { "epoch": 0.13215126914477696, "grad_norm": 2.0978612899780273, "learning_rate": 9.728953840418358e-06, "loss": 0.7282, "step": 11053 }, { "epoch": 0.13216322528963761, "grad_norm": 1.914627194404602, "learning_rate": 9.728890954568175e-06, "loss": 0.6002, "step": 11054 }, { "epoch": 0.13217518143449827, "grad_norm": 3.010256767272949, "learning_rate": 9.728828061627006e-06, "loss": 0.6011, "step": 11055 }, { "epoch": 0.1321871375793589, "grad_norm": 2.645751476287842, "learning_rate": 9.728765161594942e-06, "loss": 0.5874, "step": 11056 }, { "epoch": 0.13219909372421956, "grad_norm": 1.5017004013061523, "learning_rate": 9.728702254472075e-06, "loss": 0.601, "step": 11057 }, { "epoch": 0.13221104986908022, "grad_norm": 1.6420296430587769, "learning_rate": 9.728639340258505e-06, "loss": 0.575, "step": 11058 }, { "epoch": 0.13222300601394085, "grad_norm": 2.4947474002838135, "learning_rate": 9.728576418954323e-06, "loss": 0.7387, "step": 11059 }, { "epoch": 0.1322349621588015, "grad_norm": 1.9412509202957153, "learning_rate": 9.728513490559623e-06, "loss": 0.5768, "step": 11060 }, { "epoch": 0.13224691830366217, "grad_norm": 2.001514196395874, "learning_rate": 9.728450555074502e-06, "loss": 0.5791, "step": 11061 }, { "epoch": 0.13225887444852283, "grad_norm": 2.2711048126220703, "learning_rate": 9.728387612499051e-06, "loss": 0.5754, "step": 11062 }, { "epoch": 0.13227083059338346, "grad_norm": 8.712011337280273, "learning_rate": 9.728324662833366e-06, "loss": 0.5599, "step": 11063 }, { "epoch": 0.13228278673824412, "grad_norm": 1.9246503114700317, "learning_rate": 9.728261706077542e-06, "loss": 0.5176, "step": 11064 }, { "epoch": 0.13229474288310478, "grad_norm": 2.7696192264556885, "learning_rate": 9.728198742231672e-06, "loss": 0.7005, "step": 11065 }, { "epoch": 0.13230669902796544, "grad_norm": 1.489565372467041, "learning_rate": 9.728135771295852e-06, "loss": 0.6797, "step": 11066 }, { "epoch": 0.13231865517282607, "grad_norm": 1.9712377786636353, "learning_rate": 9.728072793270176e-06, "loss": 0.6891, "step": 11067 }, { "epoch": 0.13233061131768672, "grad_norm": 2.260329484939575, "learning_rate": 9.728009808154737e-06, "loss": 0.6054, "step": 11068 }, { "epoch": 0.13234256746254738, "grad_norm": 1.352241039276123, "learning_rate": 9.727946815949631e-06, "loss": 0.5135, "step": 11069 }, { "epoch": 0.13235452360740801, "grad_norm": 1.9873402118682861, "learning_rate": 9.727883816654952e-06, "loss": 0.683, "step": 11070 }, { "epoch": 0.13236647975226867, "grad_norm": 2.685455322265625, "learning_rate": 9.727820810270794e-06, "loss": 0.6065, "step": 11071 }, { "epoch": 0.13237843589712933, "grad_norm": 1.389127254486084, "learning_rate": 9.727757796797252e-06, "loss": 0.6349, "step": 11072 }, { "epoch": 0.13239039204199, "grad_norm": 3.170175552368164, "learning_rate": 9.72769477623442e-06, "loss": 0.5708, "step": 11073 }, { "epoch": 0.13240234818685062, "grad_norm": 2.164327621459961, "learning_rate": 9.727631748582394e-06, "loss": 0.6715, "step": 11074 }, { "epoch": 0.13241430433171128, "grad_norm": 5.489250659942627, "learning_rate": 9.727568713841267e-06, "loss": 0.5866, "step": 11075 }, { "epoch": 0.13242626047657194, "grad_norm": 5.292828559875488, "learning_rate": 9.727505672011132e-06, "loss": 0.5441, "step": 11076 }, { "epoch": 0.1324382166214326, "grad_norm": 2.101182222366333, "learning_rate": 9.727442623092089e-06, "loss": 0.5779, "step": 11077 }, { "epoch": 0.13245017276629323, "grad_norm": 6.0254316329956055, "learning_rate": 9.727379567084227e-06, "loss": 0.642, "step": 11078 }, { "epoch": 0.1324621289111539, "grad_norm": 2.0617265701293945, "learning_rate": 9.727316503987642e-06, "loss": 0.6497, "step": 11079 }, { "epoch": 0.13247408505601455, "grad_norm": 1.7932243347167969, "learning_rate": 9.727253433802429e-06, "loss": 0.599, "step": 11080 }, { "epoch": 0.13248604120087518, "grad_norm": 2.0618364810943604, "learning_rate": 9.727190356528683e-06, "loss": 0.6637, "step": 11081 }, { "epoch": 0.13249799734573584, "grad_norm": 2.1793723106384277, "learning_rate": 9.727127272166498e-06, "loss": 0.5747, "step": 11082 }, { "epoch": 0.1325099534905965, "grad_norm": 2.1810829639434814, "learning_rate": 9.72706418071597e-06, "loss": 0.647, "step": 11083 }, { "epoch": 0.13252190963545715, "grad_norm": 2.094719648361206, "learning_rate": 9.727001082177191e-06, "loss": 0.6611, "step": 11084 }, { "epoch": 0.13253386578031778, "grad_norm": 2.808091402053833, "learning_rate": 9.72693797655026e-06, "loss": 0.7547, "step": 11085 }, { "epoch": 0.13254582192517844, "grad_norm": 20.242795944213867, "learning_rate": 9.726874863835268e-06, "loss": 0.6142, "step": 11086 }, { "epoch": 0.1325577780700391, "grad_norm": 2.104194402694702, "learning_rate": 9.726811744032308e-06, "loss": 0.6054, "step": 11087 }, { "epoch": 0.13256973421489976, "grad_norm": 2.1870973110198975, "learning_rate": 9.726748617141477e-06, "loss": 0.5871, "step": 11088 }, { "epoch": 0.1325816903597604, "grad_norm": 2.7698683738708496, "learning_rate": 9.726685483162872e-06, "loss": 0.6072, "step": 11089 }, { "epoch": 0.13259364650462105, "grad_norm": 2.3012051582336426, "learning_rate": 9.726622342096583e-06, "loss": 0.6009, "step": 11090 }, { "epoch": 0.1326056026494817, "grad_norm": 42.49261474609375, "learning_rate": 9.726559193942709e-06, "loss": 0.6726, "step": 11091 }, { "epoch": 0.13261755879434234, "grad_norm": 1.626715064048767, "learning_rate": 9.72649603870134e-06, "loss": 0.6479, "step": 11092 }, { "epoch": 0.132629514939203, "grad_norm": 1.6848056316375732, "learning_rate": 9.726432876372576e-06, "loss": 0.5728, "step": 11093 }, { "epoch": 0.13264147108406366, "grad_norm": 3.2489519119262695, "learning_rate": 9.72636970695651e-06, "loss": 0.6368, "step": 11094 }, { "epoch": 0.13265342722892431, "grad_norm": 2.624115467071533, "learning_rate": 9.726306530453233e-06, "loss": 0.7087, "step": 11095 }, { "epoch": 0.13266538337378495, "grad_norm": 1.7242839336395264, "learning_rate": 9.726243346862842e-06, "loss": 0.6187, "step": 11096 }, { "epoch": 0.1326773395186456, "grad_norm": 2.4557242393493652, "learning_rate": 9.726180156185434e-06, "loss": 0.6791, "step": 11097 }, { "epoch": 0.13268929566350626, "grad_norm": 2.0763890743255615, "learning_rate": 9.726116958421102e-06, "loss": 0.6106, "step": 11098 }, { "epoch": 0.13270125180836692, "grad_norm": 2.743424892425537, "learning_rate": 9.726053753569942e-06, "loss": 0.6722, "step": 11099 }, { "epoch": 0.13271320795322755, "grad_norm": 3.2762956619262695, "learning_rate": 9.725990541632046e-06, "loss": 0.6561, "step": 11100 }, { "epoch": 0.1327251640980882, "grad_norm": 1.7174265384674072, "learning_rate": 9.72592732260751e-06, "loss": 0.6429, "step": 11101 }, { "epoch": 0.13273712024294887, "grad_norm": 1.9170297384262085, "learning_rate": 9.72586409649643e-06, "loss": 0.6997, "step": 11102 }, { "epoch": 0.13274907638780953, "grad_norm": 1.6590908765792847, "learning_rate": 9.7258008632989e-06, "loss": 0.6129, "step": 11103 }, { "epoch": 0.13276103253267016, "grad_norm": 1.5079292058944702, "learning_rate": 9.725737623015015e-06, "loss": 0.621, "step": 11104 }, { "epoch": 0.13277298867753082, "grad_norm": 2.9143340587615967, "learning_rate": 9.72567437564487e-06, "loss": 0.5183, "step": 11105 }, { "epoch": 0.13278494482239148, "grad_norm": 2.063253164291382, "learning_rate": 9.725611121188559e-06, "loss": 0.6382, "step": 11106 }, { "epoch": 0.1327969009672521, "grad_norm": 2.862692356109619, "learning_rate": 9.725547859646176e-06, "loss": 0.6511, "step": 11107 }, { "epoch": 0.13280885711211277, "grad_norm": 1.5473014116287231, "learning_rate": 9.725484591017818e-06, "loss": 0.644, "step": 11108 }, { "epoch": 0.13282081325697342, "grad_norm": 1.9865620136260986, "learning_rate": 9.72542131530358e-06, "loss": 0.5472, "step": 11109 }, { "epoch": 0.13283276940183408, "grad_norm": 1.7571338415145874, "learning_rate": 9.725358032503556e-06, "loss": 0.6064, "step": 11110 }, { "epoch": 0.13284472554669471, "grad_norm": 6.083277702331543, "learning_rate": 9.725294742617839e-06, "loss": 0.6527, "step": 11111 }, { "epoch": 0.13285668169155537, "grad_norm": 5.748186111450195, "learning_rate": 9.725231445646527e-06, "loss": 0.7432, "step": 11112 }, { "epoch": 0.13286863783641603, "grad_norm": 2.139500141143799, "learning_rate": 9.725168141589713e-06, "loss": 0.5668, "step": 11113 }, { "epoch": 0.1328805939812767, "grad_norm": 5.72319221496582, "learning_rate": 9.725104830447494e-06, "loss": 0.6609, "step": 11114 }, { "epoch": 0.13289255012613732, "grad_norm": 2.7803971767425537, "learning_rate": 9.725041512219962e-06, "loss": 0.6644, "step": 11115 }, { "epoch": 0.13290450627099798, "grad_norm": 1.6147798299789429, "learning_rate": 9.724978186907214e-06, "loss": 0.6599, "step": 11116 }, { "epoch": 0.13291646241585864, "grad_norm": 2.257392168045044, "learning_rate": 9.724914854509345e-06, "loss": 0.5907, "step": 11117 }, { "epoch": 0.13292841856071927, "grad_norm": 1.3940975666046143, "learning_rate": 9.724851515026449e-06, "loss": 0.658, "step": 11118 }, { "epoch": 0.13294037470557993, "grad_norm": 1.7339237928390503, "learning_rate": 9.724788168458622e-06, "loss": 0.6952, "step": 11119 }, { "epoch": 0.1329523308504406, "grad_norm": 4.465059280395508, "learning_rate": 9.724724814805957e-06, "loss": 0.5997, "step": 11120 }, { "epoch": 0.13296428699530125, "grad_norm": 1.7330929040908813, "learning_rate": 9.72466145406855e-06, "loss": 0.5806, "step": 11121 }, { "epoch": 0.13297624314016188, "grad_norm": 2.370117425918579, "learning_rate": 9.724598086246497e-06, "loss": 0.5538, "step": 11122 }, { "epoch": 0.13298819928502253, "grad_norm": 1.6161186695098877, "learning_rate": 9.724534711339893e-06, "loss": 0.6475, "step": 11123 }, { "epoch": 0.1330001554298832, "grad_norm": 1.687996506690979, "learning_rate": 9.724471329348833e-06, "loss": 0.6652, "step": 11124 }, { "epoch": 0.13301211157474385, "grad_norm": 2.5019919872283936, "learning_rate": 9.72440794027341e-06, "loss": 0.6299, "step": 11125 }, { "epoch": 0.13302406771960448, "grad_norm": 6.387680530548096, "learning_rate": 9.724344544113723e-06, "loss": 0.5655, "step": 11126 }, { "epoch": 0.13303602386446514, "grad_norm": 2.0747880935668945, "learning_rate": 9.72428114086986e-06, "loss": 0.6671, "step": 11127 }, { "epoch": 0.1330479800093258, "grad_norm": 2.0147085189819336, "learning_rate": 9.724217730541925e-06, "loss": 0.6358, "step": 11128 }, { "epoch": 0.13305993615418643, "grad_norm": 3.100689649581909, "learning_rate": 9.724154313130005e-06, "loss": 0.6989, "step": 11129 }, { "epoch": 0.1330718922990471, "grad_norm": 4.46273136138916, "learning_rate": 9.724090888634201e-06, "loss": 0.7128, "step": 11130 }, { "epoch": 0.13308384844390775, "grad_norm": 4.00037956237793, "learning_rate": 9.724027457054605e-06, "loss": 0.6326, "step": 11131 }, { "epoch": 0.1330958045887684, "grad_norm": 2.562730550765991, "learning_rate": 9.723964018391315e-06, "loss": 0.6663, "step": 11132 }, { "epoch": 0.13310776073362904, "grad_norm": 1.6252377033233643, "learning_rate": 9.723900572644422e-06, "loss": 0.583, "step": 11133 }, { "epoch": 0.1331197168784897, "grad_norm": 2.54893159866333, "learning_rate": 9.723837119814026e-06, "loss": 0.6233, "step": 11134 }, { "epoch": 0.13313167302335036, "grad_norm": 1.8996864557266235, "learning_rate": 9.723773659900218e-06, "loss": 0.6203, "step": 11135 }, { "epoch": 0.13314362916821101, "grad_norm": 2.027787446975708, "learning_rate": 9.723710192903092e-06, "loss": 0.5638, "step": 11136 }, { "epoch": 0.13315558531307164, "grad_norm": 1.8533111810684204, "learning_rate": 9.72364671882275e-06, "loss": 0.6266, "step": 11137 }, { "epoch": 0.1331675414579323, "grad_norm": 1.7997386455535889, "learning_rate": 9.72358323765928e-06, "loss": 0.6515, "step": 11138 }, { "epoch": 0.13317949760279296, "grad_norm": 1.6280556917190552, "learning_rate": 9.723519749412781e-06, "loss": 0.4987, "step": 11139 }, { "epoch": 0.1331914537476536, "grad_norm": 3.4740092754364014, "learning_rate": 9.723456254083347e-06, "loss": 0.6865, "step": 11140 }, { "epoch": 0.13320340989251425, "grad_norm": 5.547384262084961, "learning_rate": 9.723392751671075e-06, "loss": 0.6269, "step": 11141 }, { "epoch": 0.1332153660373749, "grad_norm": 1.9257450103759766, "learning_rate": 9.723329242176057e-06, "loss": 0.6722, "step": 11142 }, { "epoch": 0.13322732218223557, "grad_norm": 3.2781264781951904, "learning_rate": 9.72326572559839e-06, "loss": 0.6025, "step": 11143 }, { "epoch": 0.1332392783270962, "grad_norm": 1.875041127204895, "learning_rate": 9.723202201938171e-06, "loss": 0.7286, "step": 11144 }, { "epoch": 0.13325123447195686, "grad_norm": 2.152294874191284, "learning_rate": 9.723138671195493e-06, "loss": 0.6895, "step": 11145 }, { "epoch": 0.13326319061681752, "grad_norm": 2.7056026458740234, "learning_rate": 9.72307513337045e-06, "loss": 0.6175, "step": 11146 }, { "epoch": 0.13327514676167818, "grad_norm": 1.8666592836380005, "learning_rate": 9.723011588463142e-06, "loss": 0.63, "step": 11147 }, { "epoch": 0.1332871029065388, "grad_norm": 2.507448673248291, "learning_rate": 9.722948036473658e-06, "loss": 0.5583, "step": 11148 }, { "epoch": 0.13329905905139947, "grad_norm": 2.2387044429779053, "learning_rate": 9.722884477402098e-06, "loss": 0.6597, "step": 11149 }, { "epoch": 0.13331101519626012, "grad_norm": 9.258249282836914, "learning_rate": 9.722820911248559e-06, "loss": 0.5989, "step": 11150 }, { "epoch": 0.13332297134112078, "grad_norm": 2.0190176963806152, "learning_rate": 9.72275733801313e-06, "loss": 0.6598, "step": 11151 }, { "epoch": 0.1333349274859814, "grad_norm": 1.3095285892486572, "learning_rate": 9.72269375769591e-06, "loss": 0.5745, "step": 11152 }, { "epoch": 0.13334688363084207, "grad_norm": 2.0403003692626953, "learning_rate": 9.722630170296993e-06, "loss": 0.6689, "step": 11153 }, { "epoch": 0.13335883977570273, "grad_norm": 3.282357692718506, "learning_rate": 9.722566575816477e-06, "loss": 0.5453, "step": 11154 }, { "epoch": 0.13337079592056336, "grad_norm": 2.55733585357666, "learning_rate": 9.722502974254456e-06, "loss": 0.7457, "step": 11155 }, { "epoch": 0.13338275206542402, "grad_norm": 2.4795186519622803, "learning_rate": 9.722439365611023e-06, "loss": 0.7448, "step": 11156 }, { "epoch": 0.13339470821028468, "grad_norm": 3.2338404655456543, "learning_rate": 9.722375749886276e-06, "loss": 0.6635, "step": 11157 }, { "epoch": 0.13340666435514534, "grad_norm": 2.4605369567871094, "learning_rate": 9.72231212708031e-06, "loss": 0.6549, "step": 11158 }, { "epoch": 0.13341862050000597, "grad_norm": 1.6534067392349243, "learning_rate": 9.72224849719322e-06, "loss": 0.7455, "step": 11159 }, { "epoch": 0.13343057664486663, "grad_norm": 2.5424082279205322, "learning_rate": 9.722184860225102e-06, "loss": 0.6754, "step": 11160 }, { "epoch": 0.1334425327897273, "grad_norm": 1.9436863660812378, "learning_rate": 9.722121216176051e-06, "loss": 0.6382, "step": 11161 }, { "epoch": 0.13345448893458794, "grad_norm": 1.9008084535598755, "learning_rate": 9.722057565046164e-06, "loss": 0.6829, "step": 11162 }, { "epoch": 0.13346644507944858, "grad_norm": 2.461606740951538, "learning_rate": 9.721993906835532e-06, "loss": 0.5561, "step": 11163 }, { "epoch": 0.13347840122430923, "grad_norm": 3.2692010402679443, "learning_rate": 9.721930241544255e-06, "loss": 0.6496, "step": 11164 }, { "epoch": 0.1334903573691699, "grad_norm": 2.4604947566986084, "learning_rate": 9.721866569172426e-06, "loss": 0.6052, "step": 11165 }, { "epoch": 0.13350231351403052, "grad_norm": 1.829262375831604, "learning_rate": 9.721802889720143e-06, "loss": 0.6244, "step": 11166 }, { "epoch": 0.13351426965889118, "grad_norm": 1.8638646602630615, "learning_rate": 9.721739203187497e-06, "loss": 0.5851, "step": 11167 }, { "epoch": 0.13352622580375184, "grad_norm": 2.2778546810150146, "learning_rate": 9.721675509574588e-06, "loss": 0.6374, "step": 11168 }, { "epoch": 0.1335381819486125, "grad_norm": 6.297852516174316, "learning_rate": 9.72161180888151e-06, "loss": 0.7556, "step": 11169 }, { "epoch": 0.13355013809347313, "grad_norm": 1.5356677770614624, "learning_rate": 9.721548101108357e-06, "loss": 0.5283, "step": 11170 }, { "epoch": 0.1335620942383338, "grad_norm": 3.015545606613159, "learning_rate": 9.721484386255226e-06, "loss": 0.6898, "step": 11171 }, { "epoch": 0.13357405038319445, "grad_norm": 3.730712652206421, "learning_rate": 9.721420664322214e-06, "loss": 0.6609, "step": 11172 }, { "epoch": 0.1335860065280551, "grad_norm": 2.870150327682495, "learning_rate": 9.721356935309414e-06, "loss": 0.6735, "step": 11173 }, { "epoch": 0.13359796267291574, "grad_norm": 2.212625741958618, "learning_rate": 9.721293199216923e-06, "loss": 0.663, "step": 11174 }, { "epoch": 0.1336099188177764, "grad_norm": 4.359799385070801, "learning_rate": 9.721229456044836e-06, "loss": 0.6226, "step": 11175 }, { "epoch": 0.13362187496263705, "grad_norm": 2.4373364448547363, "learning_rate": 9.721165705793248e-06, "loss": 0.6339, "step": 11176 }, { "epoch": 0.13363383110749769, "grad_norm": 2.4686715602874756, "learning_rate": 9.721101948462255e-06, "loss": 0.6531, "step": 11177 }, { "epoch": 0.13364578725235834, "grad_norm": 2.555241107940674, "learning_rate": 9.721038184051955e-06, "loss": 0.5819, "step": 11178 }, { "epoch": 0.133657743397219, "grad_norm": 1.953810453414917, "learning_rate": 9.720974412562438e-06, "loss": 0.6693, "step": 11179 }, { "epoch": 0.13366969954207966, "grad_norm": 1.9524273872375488, "learning_rate": 9.720910633993805e-06, "loss": 0.5617, "step": 11180 }, { "epoch": 0.1336816556869403, "grad_norm": 1.9474865198135376, "learning_rate": 9.720846848346151e-06, "loss": 0.5738, "step": 11181 }, { "epoch": 0.13369361183180095, "grad_norm": 8.443556785583496, "learning_rate": 9.720783055619568e-06, "loss": 0.7215, "step": 11182 }, { "epoch": 0.1337055679766616, "grad_norm": 1.9735281467437744, "learning_rate": 9.720719255814155e-06, "loss": 0.5031, "step": 11183 }, { "epoch": 0.13371752412152227, "grad_norm": 2.2900705337524414, "learning_rate": 9.720655448930007e-06, "loss": 0.5798, "step": 11184 }, { "epoch": 0.1337294802663829, "grad_norm": 4.720205307006836, "learning_rate": 9.720591634967219e-06, "loss": 0.6184, "step": 11185 }, { "epoch": 0.13374143641124356, "grad_norm": 1.8302804231643677, "learning_rate": 9.720527813925887e-06, "loss": 0.5795, "step": 11186 }, { "epoch": 0.13375339255610422, "grad_norm": 2.4052951335906982, "learning_rate": 9.720463985806107e-06, "loss": 0.6197, "step": 11187 }, { "epoch": 0.13376534870096485, "grad_norm": 1.6155554056167603, "learning_rate": 9.720400150607973e-06, "loss": 0.6172, "step": 11188 }, { "epoch": 0.1337773048458255, "grad_norm": 1.8653485774993896, "learning_rate": 9.720336308331585e-06, "loss": 0.5779, "step": 11189 }, { "epoch": 0.13378926099068617, "grad_norm": 2.158721685409546, "learning_rate": 9.720272458977034e-06, "loss": 0.6923, "step": 11190 }, { "epoch": 0.13380121713554682, "grad_norm": 2.378004789352417, "learning_rate": 9.720208602544418e-06, "loss": 0.6386, "step": 11191 }, { "epoch": 0.13381317328040745, "grad_norm": 2.605581283569336, "learning_rate": 9.720144739033833e-06, "loss": 0.6781, "step": 11192 }, { "epoch": 0.1338251294252681, "grad_norm": 2.014857530593872, "learning_rate": 9.720080868445373e-06, "loss": 0.6306, "step": 11193 }, { "epoch": 0.13383708557012877, "grad_norm": 1.8478894233703613, "learning_rate": 9.720016990779137e-06, "loss": 0.6093, "step": 11194 }, { "epoch": 0.13384904171498943, "grad_norm": 3.1253600120544434, "learning_rate": 9.719953106035216e-06, "loss": 0.5933, "step": 11195 }, { "epoch": 0.13386099785985006, "grad_norm": 2.350940465927124, "learning_rate": 9.71988921421371e-06, "loss": 0.6069, "step": 11196 }, { "epoch": 0.13387295400471072, "grad_norm": 2.7970404624938965, "learning_rate": 9.719825315314714e-06, "loss": 0.7153, "step": 11197 }, { "epoch": 0.13388491014957138, "grad_norm": 9.621453285217285, "learning_rate": 9.719761409338323e-06, "loss": 0.7361, "step": 11198 }, { "epoch": 0.133896866294432, "grad_norm": 1.7013407945632935, "learning_rate": 9.719697496284633e-06, "loss": 0.5155, "step": 11199 }, { "epoch": 0.13390882243929267, "grad_norm": 1.5994832515716553, "learning_rate": 9.71963357615374e-06, "loss": 0.6389, "step": 11200 }, { "epoch": 0.13392077858415333, "grad_norm": 2.7546796798706055, "learning_rate": 9.719569648945739e-06, "loss": 0.6036, "step": 11201 }, { "epoch": 0.13393273472901399, "grad_norm": 2.1592302322387695, "learning_rate": 9.719505714660726e-06, "loss": 0.6195, "step": 11202 }, { "epoch": 0.13394469087387462, "grad_norm": 3.9784364700317383, "learning_rate": 9.719441773298798e-06, "loss": 0.6813, "step": 11203 }, { "epoch": 0.13395664701873528, "grad_norm": 2.475349187850952, "learning_rate": 9.71937782486005e-06, "loss": 0.5669, "step": 11204 }, { "epoch": 0.13396860316359593, "grad_norm": 1.9019349813461304, "learning_rate": 9.719313869344577e-06, "loss": 0.4982, "step": 11205 }, { "epoch": 0.1339805593084566, "grad_norm": 1.6051234006881714, "learning_rate": 9.719249906752479e-06, "loss": 0.6139, "step": 11206 }, { "epoch": 0.13399251545331722, "grad_norm": 1.9514141082763672, "learning_rate": 9.719185937083846e-06, "loss": 0.6227, "step": 11207 }, { "epoch": 0.13400447159817788, "grad_norm": 3.0903356075286865, "learning_rate": 9.71912196033878e-06, "loss": 0.6484, "step": 11208 }, { "epoch": 0.13401642774303854, "grad_norm": 3.124501943588257, "learning_rate": 9.719057976517372e-06, "loss": 0.6, "step": 11209 }, { "epoch": 0.1340283838878992, "grad_norm": 10.502242088317871, "learning_rate": 9.71899398561972e-06, "loss": 0.6056, "step": 11210 }, { "epoch": 0.13404034003275983, "grad_norm": 2.3142476081848145, "learning_rate": 9.71892998764592e-06, "loss": 0.6476, "step": 11211 }, { "epoch": 0.1340522961776205, "grad_norm": 1.834995150566101, "learning_rate": 9.718865982596068e-06, "loss": 0.5349, "step": 11212 }, { "epoch": 0.13406425232248115, "grad_norm": 1.966001272201538, "learning_rate": 9.71880197047026e-06, "loss": 0.6171, "step": 11213 }, { "epoch": 0.13407620846734178, "grad_norm": 1.6168508529663086, "learning_rate": 9.718737951268588e-06, "loss": 0.6859, "step": 11214 }, { "epoch": 0.13408816461220244, "grad_norm": 2.360334634780884, "learning_rate": 9.718673924991156e-06, "loss": 0.5008, "step": 11215 }, { "epoch": 0.1341001207570631, "grad_norm": 1.5503275394439697, "learning_rate": 9.718609891638053e-06, "loss": 0.5908, "step": 11216 }, { "epoch": 0.13411207690192375, "grad_norm": 2.409785747528076, "learning_rate": 9.71854585120938e-06, "loss": 0.6908, "step": 11217 }, { "epoch": 0.13412403304678439, "grad_norm": 1.7903913259506226, "learning_rate": 9.71848180370523e-06, "loss": 0.5449, "step": 11218 }, { "epoch": 0.13413598919164504, "grad_norm": 2.066823959350586, "learning_rate": 9.7184177491257e-06, "loss": 0.5977, "step": 11219 }, { "epoch": 0.1341479453365057, "grad_norm": 2.0082061290740967, "learning_rate": 9.718353687470884e-06, "loss": 0.5434, "step": 11220 }, { "epoch": 0.13415990148136636, "grad_norm": 1.3946034908294678, "learning_rate": 9.718289618740881e-06, "loss": 0.5816, "step": 11221 }, { "epoch": 0.134171857626227, "grad_norm": 2.223170280456543, "learning_rate": 9.718225542935786e-06, "loss": 0.5776, "step": 11222 }, { "epoch": 0.13418381377108765, "grad_norm": 9.76098918914795, "learning_rate": 9.718161460055696e-06, "loss": 0.608, "step": 11223 }, { "epoch": 0.1341957699159483, "grad_norm": 2.5352256298065186, "learning_rate": 9.718097370100705e-06, "loss": 0.6523, "step": 11224 }, { "epoch": 0.13420772606080894, "grad_norm": 1.4654943943023682, "learning_rate": 9.718033273070911e-06, "loss": 0.5904, "step": 11225 }, { "epoch": 0.1342196822056696, "grad_norm": 2.164503812789917, "learning_rate": 9.717969168966408e-06, "loss": 0.592, "step": 11226 }, { "epoch": 0.13423163835053026, "grad_norm": 2.2809648513793945, "learning_rate": 9.717905057787294e-06, "loss": 0.5817, "step": 11227 }, { "epoch": 0.13424359449539092, "grad_norm": 1.7349318265914917, "learning_rate": 9.717840939533665e-06, "loss": 0.6142, "step": 11228 }, { "epoch": 0.13425555064025155, "grad_norm": 2.9065983295440674, "learning_rate": 9.717776814205616e-06, "loss": 0.6028, "step": 11229 }, { "epoch": 0.1342675067851122, "grad_norm": 1.74582839012146, "learning_rate": 9.717712681803245e-06, "loss": 0.6422, "step": 11230 }, { "epoch": 0.13427946292997286, "grad_norm": 1.952908992767334, "learning_rate": 9.717648542326646e-06, "loss": 0.6157, "step": 11231 }, { "epoch": 0.13429141907483352, "grad_norm": 6.339206218719482, "learning_rate": 9.717584395775917e-06, "loss": 0.6315, "step": 11232 }, { "epoch": 0.13430337521969415, "grad_norm": 2.3927111625671387, "learning_rate": 9.717520242151152e-06, "loss": 0.6184, "step": 11233 }, { "epoch": 0.1343153313645548, "grad_norm": 2.6404850482940674, "learning_rate": 9.717456081452451e-06, "loss": 0.5938, "step": 11234 }, { "epoch": 0.13432728750941547, "grad_norm": 2.300922393798828, "learning_rate": 9.717391913679907e-06, "loss": 0.6787, "step": 11235 }, { "epoch": 0.1343392436542761, "grad_norm": 3.0622141361236572, "learning_rate": 9.717327738833616e-06, "loss": 0.626, "step": 11236 }, { "epoch": 0.13435119979913676, "grad_norm": 2.4464707374572754, "learning_rate": 9.717263556913676e-06, "loss": 0.5512, "step": 11237 }, { "epoch": 0.13436315594399742, "grad_norm": 2.577791690826416, "learning_rate": 9.717199367920183e-06, "loss": 0.6873, "step": 11238 }, { "epoch": 0.13437511208885808, "grad_norm": 3.9037442207336426, "learning_rate": 9.717135171853232e-06, "loss": 0.542, "step": 11239 }, { "epoch": 0.1343870682337187, "grad_norm": 1.996744990348816, "learning_rate": 9.71707096871292e-06, "loss": 0.5391, "step": 11240 }, { "epoch": 0.13439902437857937, "grad_norm": 1.717472791671753, "learning_rate": 9.717006758499342e-06, "loss": 0.6366, "step": 11241 }, { "epoch": 0.13441098052344003, "grad_norm": 2.988111734390259, "learning_rate": 9.7169425412126e-06, "loss": 0.6425, "step": 11242 }, { "epoch": 0.13442293666830069, "grad_norm": 1.9417171478271484, "learning_rate": 9.716878316852782e-06, "loss": 0.5459, "step": 11243 }, { "epoch": 0.13443489281316132, "grad_norm": 2.0728847980499268, "learning_rate": 9.71681408541999e-06, "loss": 0.6691, "step": 11244 }, { "epoch": 0.13444684895802197, "grad_norm": 1.7115017175674438, "learning_rate": 9.716749846914317e-06, "loss": 0.6538, "step": 11245 }, { "epoch": 0.13445880510288263, "grad_norm": 1.8346107006072998, "learning_rate": 9.716685601335862e-06, "loss": 0.6033, "step": 11246 }, { "epoch": 0.13447076124774326, "grad_norm": 5.1946234703063965, "learning_rate": 9.716621348684721e-06, "loss": 0.6495, "step": 11247 }, { "epoch": 0.13448271739260392, "grad_norm": 1.3617308139801025, "learning_rate": 9.716557088960987e-06, "loss": 0.6175, "step": 11248 }, { "epoch": 0.13449467353746458, "grad_norm": 1.9802030324935913, "learning_rate": 9.71649282216476e-06, "loss": 0.6117, "step": 11249 }, { "epoch": 0.13450662968232524, "grad_norm": 1.5672956705093384, "learning_rate": 9.716428548296137e-06, "loss": 0.6533, "step": 11250 }, { "epoch": 0.13451858582718587, "grad_norm": 5.729554653167725, "learning_rate": 9.71636426735521e-06, "loss": 0.6255, "step": 11251 }, { "epoch": 0.13453054197204653, "grad_norm": 6.4374799728393555, "learning_rate": 9.716299979342078e-06, "loss": 0.6095, "step": 11252 }, { "epoch": 0.1345424981169072, "grad_norm": 3.7788097858428955, "learning_rate": 9.716235684256841e-06, "loss": 0.7277, "step": 11253 }, { "epoch": 0.13455445426176785, "grad_norm": 2.714132308959961, "learning_rate": 9.716171382099589e-06, "loss": 0.6251, "step": 11254 }, { "epoch": 0.13456641040662848, "grad_norm": 18.024635314941406, "learning_rate": 9.716107072870421e-06, "loss": 0.5915, "step": 11255 }, { "epoch": 0.13457836655148914, "grad_norm": 2.393558979034424, "learning_rate": 9.716042756569435e-06, "loss": 0.686, "step": 11256 }, { "epoch": 0.1345903226963498, "grad_norm": 2.8498153686523438, "learning_rate": 9.715978433196725e-06, "loss": 0.6719, "step": 11257 }, { "epoch": 0.13460227884121043, "grad_norm": 4.955181121826172, "learning_rate": 9.715914102752389e-06, "loss": 0.6366, "step": 11258 }, { "epoch": 0.13461423498607109, "grad_norm": 4.879579067230225, "learning_rate": 9.715849765236522e-06, "loss": 0.6005, "step": 11259 }, { "epoch": 0.13462619113093174, "grad_norm": 1.7787822484970093, "learning_rate": 9.715785420649225e-06, "loss": 0.6124, "step": 11260 }, { "epoch": 0.1346381472757924, "grad_norm": 2.1170897483825684, "learning_rate": 9.715721068990586e-06, "loss": 0.5678, "step": 11261 }, { "epoch": 0.13465010342065303, "grad_norm": 7.865562438964844, "learning_rate": 9.71565671026071e-06, "loss": 0.5759, "step": 11262 }, { "epoch": 0.1346620595655137, "grad_norm": 2.3042314052581787, "learning_rate": 9.71559234445969e-06, "loss": 0.6136, "step": 11263 }, { "epoch": 0.13467401571037435, "grad_norm": 2.9048569202423096, "learning_rate": 9.715527971587622e-06, "loss": 0.6052, "step": 11264 }, { "epoch": 0.134685971855235, "grad_norm": 2.3836028575897217, "learning_rate": 9.715463591644602e-06, "loss": 0.6387, "step": 11265 }, { "epoch": 0.13469792800009564, "grad_norm": 6.459034442901611, "learning_rate": 9.715399204630728e-06, "loss": 0.6403, "step": 11266 }, { "epoch": 0.1347098841449563, "grad_norm": 1.6462740898132324, "learning_rate": 9.715334810546095e-06, "loss": 0.5871, "step": 11267 }, { "epoch": 0.13472184028981696, "grad_norm": 1.8355592489242554, "learning_rate": 9.715270409390803e-06, "loss": 0.6286, "step": 11268 }, { "epoch": 0.13473379643467762, "grad_norm": 1.6852200031280518, "learning_rate": 9.715206001164946e-06, "loss": 0.6622, "step": 11269 }, { "epoch": 0.13474575257953825, "grad_norm": 1.5220882892608643, "learning_rate": 9.715141585868619e-06, "loss": 0.5978, "step": 11270 }, { "epoch": 0.1347577087243989, "grad_norm": 3.042508363723755, "learning_rate": 9.715077163501923e-06, "loss": 0.6457, "step": 11271 }, { "epoch": 0.13476966486925956, "grad_norm": 11.183775901794434, "learning_rate": 9.71501273406495e-06, "loss": 0.6008, "step": 11272 }, { "epoch": 0.1347816210141202, "grad_norm": 1.3537176847457886, "learning_rate": 9.7149482975578e-06, "loss": 0.5723, "step": 11273 }, { "epoch": 0.13479357715898085, "grad_norm": 2.196078300476074, "learning_rate": 9.714883853980567e-06, "loss": 0.6843, "step": 11274 }, { "epoch": 0.1348055333038415, "grad_norm": 1.7810487747192383, "learning_rate": 9.714819403333348e-06, "loss": 0.7161, "step": 11275 }, { "epoch": 0.13481748944870217, "grad_norm": 2.0112576484680176, "learning_rate": 9.714754945616242e-06, "loss": 0.6631, "step": 11276 }, { "epoch": 0.1348294455935628, "grad_norm": 2.2338883876800537, "learning_rate": 9.714690480829345e-06, "loss": 0.6277, "step": 11277 }, { "epoch": 0.13484140173842346, "grad_norm": 2.6474623680114746, "learning_rate": 9.714626008972752e-06, "loss": 0.6053, "step": 11278 }, { "epoch": 0.13485335788328412, "grad_norm": 2.065941095352173, "learning_rate": 9.71456153004656e-06, "loss": 0.6663, "step": 11279 }, { "epoch": 0.13486531402814478, "grad_norm": 2.0219898223876953, "learning_rate": 9.714497044050868e-06, "loss": 0.6413, "step": 11280 }, { "epoch": 0.1348772701730054, "grad_norm": 1.7476778030395508, "learning_rate": 9.714432550985768e-06, "loss": 0.5527, "step": 11281 }, { "epoch": 0.13488922631786607, "grad_norm": 1.5211330652236938, "learning_rate": 9.714368050851363e-06, "loss": 0.6861, "step": 11282 }, { "epoch": 0.13490118246272673, "grad_norm": 1.953602910041809, "learning_rate": 9.714303543647746e-06, "loss": 0.6508, "step": 11283 }, { "epoch": 0.13491313860758736, "grad_norm": 2.896883249282837, "learning_rate": 9.71423902937501e-06, "loss": 0.6157, "step": 11284 }, { "epoch": 0.13492509475244802, "grad_norm": 1.6862236261367798, "learning_rate": 9.71417450803326e-06, "loss": 0.6218, "step": 11285 }, { "epoch": 0.13493705089730867, "grad_norm": 2.725972890853882, "learning_rate": 9.714109979622588e-06, "loss": 0.5873, "step": 11286 }, { "epoch": 0.13494900704216933, "grad_norm": 1.8616262674331665, "learning_rate": 9.71404544414309e-06, "loss": 0.6601, "step": 11287 }, { "epoch": 0.13496096318702996, "grad_norm": 5.1855597496032715, "learning_rate": 9.713980901594863e-06, "loss": 0.5981, "step": 11288 }, { "epoch": 0.13497291933189062, "grad_norm": 3.021267890930176, "learning_rate": 9.713916351978008e-06, "loss": 0.5986, "step": 11289 }, { "epoch": 0.13498487547675128, "grad_norm": 5.968560218811035, "learning_rate": 9.713851795292617e-06, "loss": 0.6195, "step": 11290 }, { "epoch": 0.13499683162161194, "grad_norm": 1.7717885971069336, "learning_rate": 9.713787231538788e-06, "loss": 0.5238, "step": 11291 }, { "epoch": 0.13500878776647257, "grad_norm": 2.127230644226074, "learning_rate": 9.713722660716619e-06, "loss": 0.6454, "step": 11292 }, { "epoch": 0.13502074391133323, "grad_norm": 1.767717957496643, "learning_rate": 9.713658082826207e-06, "loss": 0.6172, "step": 11293 }, { "epoch": 0.1350327000561939, "grad_norm": 2.5644915103912354, "learning_rate": 9.713593497867646e-06, "loss": 0.5986, "step": 11294 }, { "epoch": 0.13504465620105452, "grad_norm": 1.5821996927261353, "learning_rate": 9.713528905841036e-06, "loss": 0.7127, "step": 11295 }, { "epoch": 0.13505661234591518, "grad_norm": 1.5520588159561157, "learning_rate": 9.713464306746473e-06, "loss": 0.5584, "step": 11296 }, { "epoch": 0.13506856849077584, "grad_norm": 1.6777089834213257, "learning_rate": 9.713399700584052e-06, "loss": 0.6468, "step": 11297 }, { "epoch": 0.1350805246356365, "grad_norm": 1.6921555995941162, "learning_rate": 9.713335087353872e-06, "loss": 0.7249, "step": 11298 }, { "epoch": 0.13509248078049713, "grad_norm": 1.7689334154129028, "learning_rate": 9.71327046705603e-06, "loss": 0.5403, "step": 11299 }, { "epoch": 0.13510443692535778, "grad_norm": 12.469799041748047, "learning_rate": 9.713205839690623e-06, "loss": 0.5639, "step": 11300 }, { "epoch": 0.13511639307021844, "grad_norm": 1.6911661624908447, "learning_rate": 9.713141205257746e-06, "loss": 0.554, "step": 11301 }, { "epoch": 0.1351283492150791, "grad_norm": 2.263254165649414, "learning_rate": 9.713076563757496e-06, "loss": 0.6584, "step": 11302 }, { "epoch": 0.13514030535993973, "grad_norm": 1.5916030406951904, "learning_rate": 9.713011915189972e-06, "loss": 0.559, "step": 11303 }, { "epoch": 0.1351522615048004, "grad_norm": 3.475529432296753, "learning_rate": 9.71294725955527e-06, "loss": 0.6261, "step": 11304 }, { "epoch": 0.13516421764966105, "grad_norm": 2.378221273422241, "learning_rate": 9.712882596853486e-06, "loss": 0.6635, "step": 11305 }, { "epoch": 0.13517617379452168, "grad_norm": 1.3247172832489014, "learning_rate": 9.71281792708472e-06, "loss": 0.6114, "step": 11306 }, { "epoch": 0.13518812993938234, "grad_norm": 2.400073289871216, "learning_rate": 9.712753250249063e-06, "loss": 0.5463, "step": 11307 }, { "epoch": 0.135200086084243, "grad_norm": 1.4906494617462158, "learning_rate": 9.712688566346618e-06, "loss": 0.635, "step": 11308 }, { "epoch": 0.13521204222910366, "grad_norm": 2.9904608726501465, "learning_rate": 9.71262387537748e-06, "loss": 0.643, "step": 11309 }, { "epoch": 0.1352239983739643, "grad_norm": 1.6726341247558594, "learning_rate": 9.712559177341745e-06, "loss": 0.5981, "step": 11310 }, { "epoch": 0.13523595451882495, "grad_norm": 1.5448840856552124, "learning_rate": 9.712494472239512e-06, "loss": 0.6445, "step": 11311 }, { "epoch": 0.1352479106636856, "grad_norm": 1.5828447341918945, "learning_rate": 9.712429760070876e-06, "loss": 0.6736, "step": 11312 }, { "epoch": 0.13525986680854626, "grad_norm": 1.7406929731369019, "learning_rate": 9.712365040835934e-06, "loss": 0.6207, "step": 11313 }, { "epoch": 0.1352718229534069, "grad_norm": 1.7429940700531006, "learning_rate": 9.712300314534786e-06, "loss": 0.6269, "step": 11314 }, { "epoch": 0.13528377909826755, "grad_norm": 9.846110343933105, "learning_rate": 9.712235581167525e-06, "loss": 0.6115, "step": 11315 }, { "epoch": 0.1352957352431282, "grad_norm": 3.1195566654205322, "learning_rate": 9.712170840734249e-06, "loss": 0.7019, "step": 11316 }, { "epoch": 0.13530769138798884, "grad_norm": 1.9375675916671753, "learning_rate": 9.712106093235059e-06, "loss": 0.7063, "step": 11317 }, { "epoch": 0.1353196475328495, "grad_norm": 2.6734888553619385, "learning_rate": 9.712041338670049e-06, "loss": 0.5461, "step": 11318 }, { "epoch": 0.13533160367771016, "grad_norm": 1.6558629274368286, "learning_rate": 9.711976577039314e-06, "loss": 0.7016, "step": 11319 }, { "epoch": 0.13534355982257082, "grad_norm": 3.4969937801361084, "learning_rate": 9.711911808342955e-06, "loss": 0.6661, "step": 11320 }, { "epoch": 0.13535551596743145, "grad_norm": 1.909807801246643, "learning_rate": 9.711847032581066e-06, "loss": 0.5943, "step": 11321 }, { "epoch": 0.1353674721122921, "grad_norm": 3.159214973449707, "learning_rate": 9.711782249753748e-06, "loss": 0.5435, "step": 11322 }, { "epoch": 0.13537942825715277, "grad_norm": 1.6953495740890503, "learning_rate": 9.711717459861092e-06, "loss": 0.5582, "step": 11323 }, { "epoch": 0.13539138440201343, "grad_norm": 1.9669286012649536, "learning_rate": 9.711652662903202e-06, "loss": 0.6232, "step": 11324 }, { "epoch": 0.13540334054687406, "grad_norm": 1.9967600107192993, "learning_rate": 9.711587858880172e-06, "loss": 0.539, "step": 11325 }, { "epoch": 0.13541529669173472, "grad_norm": 1.826926827430725, "learning_rate": 9.711523047792097e-06, "loss": 0.6215, "step": 11326 }, { "epoch": 0.13542725283659537, "grad_norm": 3.1947264671325684, "learning_rate": 9.711458229639079e-06, "loss": 0.7418, "step": 11327 }, { "epoch": 0.13543920898145603, "grad_norm": 2.4400548934936523, "learning_rate": 9.711393404421213e-06, "loss": 0.5915, "step": 11328 }, { "epoch": 0.13545116512631666, "grad_norm": 1.6938436031341553, "learning_rate": 9.711328572138594e-06, "loss": 0.5715, "step": 11329 }, { "epoch": 0.13546312127117732, "grad_norm": 1.949167251586914, "learning_rate": 9.71126373279132e-06, "loss": 0.5645, "step": 11330 }, { "epoch": 0.13547507741603798, "grad_norm": 2.548959255218506, "learning_rate": 9.711198886379492e-06, "loss": 0.6618, "step": 11331 }, { "epoch": 0.1354870335608986, "grad_norm": 1.5527983903884888, "learning_rate": 9.711134032903202e-06, "loss": 0.6223, "step": 11332 }, { "epoch": 0.13549898970575927, "grad_norm": 1.4543523788452148, "learning_rate": 9.711069172362553e-06, "loss": 0.5727, "step": 11333 }, { "epoch": 0.13551094585061993, "grad_norm": 1.3923310041427612, "learning_rate": 9.711004304757636e-06, "loss": 0.6488, "step": 11334 }, { "epoch": 0.1355229019954806, "grad_norm": 2.0404858589172363, "learning_rate": 9.710939430088554e-06, "loss": 0.5517, "step": 11335 }, { "epoch": 0.13553485814034122, "grad_norm": 3.080354928970337, "learning_rate": 9.710874548355399e-06, "loss": 0.5657, "step": 11336 }, { "epoch": 0.13554681428520188, "grad_norm": 15.795023918151855, "learning_rate": 9.71080965955827e-06, "loss": 0.6597, "step": 11337 }, { "epoch": 0.13555877043006254, "grad_norm": 2.438948154449463, "learning_rate": 9.710744763697269e-06, "loss": 0.6563, "step": 11338 }, { "epoch": 0.1355707265749232, "grad_norm": 1.9758135080337524, "learning_rate": 9.710679860772487e-06, "loss": 0.7783, "step": 11339 }, { "epoch": 0.13558268271978383, "grad_norm": 1.5236260890960693, "learning_rate": 9.710614950784026e-06, "loss": 0.6257, "step": 11340 }, { "epoch": 0.13559463886464448, "grad_norm": 1.4211437702178955, "learning_rate": 9.710550033731977e-06, "loss": 0.5991, "step": 11341 }, { "epoch": 0.13560659500950514, "grad_norm": 2.470982313156128, "learning_rate": 9.710485109616445e-06, "loss": 0.6158, "step": 11342 }, { "epoch": 0.13561855115436577, "grad_norm": 1.9530932903289795, "learning_rate": 9.710420178437523e-06, "loss": 0.7139, "step": 11343 }, { "epoch": 0.13563050729922643, "grad_norm": 1.8246427774429321, "learning_rate": 9.71035524019531e-06, "loss": 0.6912, "step": 11344 }, { "epoch": 0.1356424634440871, "grad_norm": 11.311647415161133, "learning_rate": 9.710290294889901e-06, "loss": 0.5572, "step": 11345 }, { "epoch": 0.13565441958894775, "grad_norm": 1.501058578491211, "learning_rate": 9.710225342521395e-06, "loss": 0.6397, "step": 11346 }, { "epoch": 0.13566637573380838, "grad_norm": 2.399970531463623, "learning_rate": 9.710160383089891e-06, "loss": 0.5971, "step": 11347 }, { "epoch": 0.13567833187866904, "grad_norm": 2.1604504585266113, "learning_rate": 9.710095416595483e-06, "loss": 0.6794, "step": 11348 }, { "epoch": 0.1356902880235297, "grad_norm": 1.4229506254196167, "learning_rate": 9.710030443038271e-06, "loss": 0.6338, "step": 11349 }, { "epoch": 0.13570224416839036, "grad_norm": 2.1641581058502197, "learning_rate": 9.709965462418352e-06, "loss": 0.7208, "step": 11350 }, { "epoch": 0.135714200313251, "grad_norm": 2.3608603477478027, "learning_rate": 9.709900474735822e-06, "loss": 0.6515, "step": 11351 }, { "epoch": 0.13572615645811165, "grad_norm": 1.4874887466430664, "learning_rate": 9.70983547999078e-06, "loss": 0.5949, "step": 11352 }, { "epoch": 0.1357381126029723, "grad_norm": 1.849947214126587, "learning_rate": 9.709770478183322e-06, "loss": 0.6062, "step": 11353 }, { "epoch": 0.13575006874783294, "grad_norm": 1.522294044494629, "learning_rate": 9.709705469313548e-06, "loss": 0.671, "step": 11354 }, { "epoch": 0.1357620248926936, "grad_norm": 2.7805423736572266, "learning_rate": 9.709640453381553e-06, "loss": 0.5153, "step": 11355 }, { "epoch": 0.13577398103755425, "grad_norm": 1.215564489364624, "learning_rate": 9.709575430387436e-06, "loss": 0.5301, "step": 11356 }, { "epoch": 0.1357859371824149, "grad_norm": 1.6369034051895142, "learning_rate": 9.709510400331294e-06, "loss": 0.6619, "step": 11357 }, { "epoch": 0.13579789332727554, "grad_norm": 1.5105968713760376, "learning_rate": 9.709445363213224e-06, "loss": 0.6373, "step": 11358 }, { "epoch": 0.1358098494721362, "grad_norm": 1.8402884006500244, "learning_rate": 9.709380319033324e-06, "loss": 0.6055, "step": 11359 }, { "epoch": 0.13582180561699686, "grad_norm": 1.4808074235916138, "learning_rate": 9.709315267791692e-06, "loss": 0.7275, "step": 11360 }, { "epoch": 0.13583376176185752, "grad_norm": 2.6971659660339355, "learning_rate": 9.709250209488423e-06, "loss": 0.51, "step": 11361 }, { "epoch": 0.13584571790671815, "grad_norm": 2.2987420558929443, "learning_rate": 9.709185144123619e-06, "loss": 0.6727, "step": 11362 }, { "epoch": 0.1358576740515788, "grad_norm": 2.3297650814056396, "learning_rate": 9.709120071697373e-06, "loss": 0.5935, "step": 11363 }, { "epoch": 0.13586963019643947, "grad_norm": 4.639767169952393, "learning_rate": 9.709054992209787e-06, "loss": 0.6162, "step": 11364 }, { "epoch": 0.1358815863413001, "grad_norm": 1.4259450435638428, "learning_rate": 9.708989905660956e-06, "loss": 0.6789, "step": 11365 }, { "epoch": 0.13589354248616076, "grad_norm": 5.344219207763672, "learning_rate": 9.708924812050976e-06, "loss": 0.5981, "step": 11366 }, { "epoch": 0.13590549863102142, "grad_norm": 3.180847644805908, "learning_rate": 9.708859711379947e-06, "loss": 0.6437, "step": 11367 }, { "epoch": 0.13591745477588207, "grad_norm": 1.735133409500122, "learning_rate": 9.708794603647967e-06, "loss": 0.741, "step": 11368 }, { "epoch": 0.1359294109207427, "grad_norm": 2.103574275970459, "learning_rate": 9.708729488855133e-06, "loss": 0.6092, "step": 11369 }, { "epoch": 0.13594136706560336, "grad_norm": 1.9120423793792725, "learning_rate": 9.708664367001542e-06, "loss": 0.4991, "step": 11370 }, { "epoch": 0.13595332321046402, "grad_norm": 1.5596692562103271, "learning_rate": 9.708599238087291e-06, "loss": 0.7261, "step": 11371 }, { "epoch": 0.13596527935532468, "grad_norm": 2.191087484359741, "learning_rate": 9.70853410211248e-06, "loss": 0.714, "step": 11372 }, { "epoch": 0.1359772355001853, "grad_norm": 2.5931344032287598, "learning_rate": 9.708468959077205e-06, "loss": 0.6792, "step": 11373 }, { "epoch": 0.13598919164504597, "grad_norm": 2.043804168701172, "learning_rate": 9.708403808981564e-06, "loss": 0.5945, "step": 11374 }, { "epoch": 0.13600114778990663, "grad_norm": 1.9481953382492065, "learning_rate": 9.708338651825655e-06, "loss": 0.6134, "step": 11375 }, { "epoch": 0.13601310393476726, "grad_norm": 2.0909900665283203, "learning_rate": 9.708273487609576e-06, "loss": 0.6061, "step": 11376 }, { "epoch": 0.13602506007962792, "grad_norm": 1.554246187210083, "learning_rate": 9.708208316333422e-06, "loss": 0.5412, "step": 11377 }, { "epoch": 0.13603701622448858, "grad_norm": 2.0921592712402344, "learning_rate": 9.708143137997295e-06, "loss": 0.586, "step": 11378 }, { "epoch": 0.13604897236934924, "grad_norm": 1.5539366006851196, "learning_rate": 9.708077952601289e-06, "loss": 0.5192, "step": 11379 }, { "epoch": 0.13606092851420987, "grad_norm": 1.6309736967086792, "learning_rate": 9.708012760145504e-06, "loss": 0.5982, "step": 11380 }, { "epoch": 0.13607288465907053, "grad_norm": 3.4968345165252686, "learning_rate": 9.707947560630038e-06, "loss": 0.6031, "step": 11381 }, { "epoch": 0.13608484080393118, "grad_norm": 2.072263479232788, "learning_rate": 9.707882354054988e-06, "loss": 0.6662, "step": 11382 }, { "epoch": 0.13609679694879184, "grad_norm": 1.5747010707855225, "learning_rate": 9.70781714042045e-06, "loss": 0.6555, "step": 11383 }, { "epoch": 0.13610875309365247, "grad_norm": 2.8080341815948486, "learning_rate": 9.707751919726524e-06, "loss": 0.6901, "step": 11384 }, { "epoch": 0.13612070923851313, "grad_norm": 1.3777375221252441, "learning_rate": 9.707686691973309e-06, "loss": 0.535, "step": 11385 }, { "epoch": 0.1361326653833738, "grad_norm": 5.3679423332214355, "learning_rate": 9.7076214571609e-06, "loss": 0.5617, "step": 11386 }, { "epoch": 0.13614462152823445, "grad_norm": 22.851253509521484, "learning_rate": 9.707556215289395e-06, "loss": 0.6032, "step": 11387 }, { "epoch": 0.13615657767309508, "grad_norm": 1.46553373336792, "learning_rate": 9.707490966358894e-06, "loss": 0.6355, "step": 11388 }, { "epoch": 0.13616853381795574, "grad_norm": 1.5194065570831299, "learning_rate": 9.707425710369493e-06, "loss": 0.6127, "step": 11389 }, { "epoch": 0.1361804899628164, "grad_norm": 1.8612685203552246, "learning_rate": 9.70736044732129e-06, "loss": 0.6831, "step": 11390 }, { "epoch": 0.13619244610767703, "grad_norm": 2.041844367980957, "learning_rate": 9.707295177214384e-06, "loss": 0.6211, "step": 11391 }, { "epoch": 0.1362044022525377, "grad_norm": 4.39743185043335, "learning_rate": 9.707229900048872e-06, "loss": 0.7005, "step": 11392 }, { "epoch": 0.13621635839739835, "grad_norm": 1.8662891387939453, "learning_rate": 9.707164615824851e-06, "loss": 0.5563, "step": 11393 }, { "epoch": 0.136228314542259, "grad_norm": 1.8081313371658325, "learning_rate": 9.707099324542422e-06, "loss": 0.8195, "step": 11394 }, { "epoch": 0.13624027068711964, "grad_norm": 2.367034912109375, "learning_rate": 9.707034026201678e-06, "loss": 0.7425, "step": 11395 }, { "epoch": 0.1362522268319803, "grad_norm": 5.823318958282471, "learning_rate": 9.706968720802723e-06, "loss": 0.5854, "step": 11396 }, { "epoch": 0.13626418297684095, "grad_norm": 1.7575933933258057, "learning_rate": 9.70690340834565e-06, "loss": 0.5862, "step": 11397 }, { "epoch": 0.1362761391217016, "grad_norm": 1.4155539274215698, "learning_rate": 9.706838088830558e-06, "loss": 0.5996, "step": 11398 }, { "epoch": 0.13628809526656224, "grad_norm": 2.346942186355591, "learning_rate": 9.706772762257546e-06, "loss": 0.6224, "step": 11399 }, { "epoch": 0.1363000514114229, "grad_norm": 2.29203200340271, "learning_rate": 9.706707428626712e-06, "loss": 0.4626, "step": 11400 }, { "epoch": 0.13631200755628356, "grad_norm": 1.723648190498352, "learning_rate": 9.706642087938154e-06, "loss": 0.6703, "step": 11401 }, { "epoch": 0.1363239637011442, "grad_norm": 2.652086019515991, "learning_rate": 9.70657674019197e-06, "loss": 0.6141, "step": 11402 }, { "epoch": 0.13633591984600485, "grad_norm": 5.692120552062988, "learning_rate": 9.706511385388254e-06, "loss": 0.6428, "step": 11403 }, { "epoch": 0.1363478759908655, "grad_norm": 3.364034652709961, "learning_rate": 9.706446023527109e-06, "loss": 0.563, "step": 11404 }, { "epoch": 0.13635983213572617, "grad_norm": 2.6643447875976562, "learning_rate": 9.706380654608633e-06, "loss": 0.5896, "step": 11405 }, { "epoch": 0.1363717882805868, "grad_norm": 1.6742002964019775, "learning_rate": 9.706315278632921e-06, "loss": 0.6421, "step": 11406 }, { "epoch": 0.13638374442544746, "grad_norm": 2.346886396408081, "learning_rate": 9.706249895600073e-06, "loss": 0.6505, "step": 11407 }, { "epoch": 0.13639570057030811, "grad_norm": 6.28955602645874, "learning_rate": 9.706184505510186e-06, "loss": 0.6225, "step": 11408 }, { "epoch": 0.13640765671516877, "grad_norm": 2.782257556915283, "learning_rate": 9.70611910836336e-06, "loss": 0.5774, "step": 11409 }, { "epoch": 0.1364196128600294, "grad_norm": 2.9075217247009277, "learning_rate": 9.70605370415969e-06, "loss": 0.7014, "step": 11410 }, { "epoch": 0.13643156900489006, "grad_norm": 2.413691997528076, "learning_rate": 9.705988292899277e-06, "loss": 0.6877, "step": 11411 }, { "epoch": 0.13644352514975072, "grad_norm": 1.7169963121414185, "learning_rate": 9.705922874582218e-06, "loss": 0.6787, "step": 11412 }, { "epoch": 0.13645548129461135, "grad_norm": 1.5631089210510254, "learning_rate": 9.70585744920861e-06, "loss": 0.5386, "step": 11413 }, { "epoch": 0.136467437439472, "grad_norm": 1.619462490081787, "learning_rate": 9.705792016778553e-06, "loss": 0.6955, "step": 11414 }, { "epoch": 0.13647939358433267, "grad_norm": 1.9405509233474731, "learning_rate": 9.705726577292143e-06, "loss": 0.7032, "step": 11415 }, { "epoch": 0.13649134972919333, "grad_norm": 1.4883508682250977, "learning_rate": 9.70566113074948e-06, "loss": 0.6521, "step": 11416 }, { "epoch": 0.13650330587405396, "grad_norm": 3.8775081634521484, "learning_rate": 9.705595677150661e-06, "loss": 0.622, "step": 11417 }, { "epoch": 0.13651526201891462, "grad_norm": 1.565201997756958, "learning_rate": 9.705530216495784e-06, "loss": 0.6354, "step": 11418 }, { "epoch": 0.13652721816377528, "grad_norm": 2.441361665725708, "learning_rate": 9.705464748784949e-06, "loss": 0.6985, "step": 11419 }, { "epoch": 0.13653917430863594, "grad_norm": 2.1345205307006836, "learning_rate": 9.705399274018253e-06, "loss": 0.5953, "step": 11420 }, { "epoch": 0.13655113045349657, "grad_norm": 2.8087854385375977, "learning_rate": 9.705333792195792e-06, "loss": 0.6757, "step": 11421 }, { "epoch": 0.13656308659835723, "grad_norm": 1.5413109064102173, "learning_rate": 9.705268303317668e-06, "loss": 0.6437, "step": 11422 }, { "epoch": 0.13657504274321788, "grad_norm": 2.37780499458313, "learning_rate": 9.705202807383977e-06, "loss": 0.6256, "step": 11423 }, { "epoch": 0.13658699888807851, "grad_norm": 1.5879031419754028, "learning_rate": 9.705137304394818e-06, "loss": 0.6543, "step": 11424 }, { "epoch": 0.13659895503293917, "grad_norm": 1.876758098602295, "learning_rate": 9.70507179435029e-06, "loss": 0.7161, "step": 11425 }, { "epoch": 0.13661091117779983, "grad_norm": 1.8164258003234863, "learning_rate": 9.705006277250488e-06, "loss": 0.6493, "step": 11426 }, { "epoch": 0.1366228673226605, "grad_norm": 3.182133913040161, "learning_rate": 9.704940753095513e-06, "loss": 0.6354, "step": 11427 }, { "epoch": 0.13663482346752112, "grad_norm": 1.3855056762695312, "learning_rate": 9.704875221885462e-06, "loss": 0.6338, "step": 11428 }, { "epoch": 0.13664677961238178, "grad_norm": 2.5410966873168945, "learning_rate": 9.704809683620436e-06, "loss": 0.6715, "step": 11429 }, { "epoch": 0.13665873575724244, "grad_norm": 3.0942370891571045, "learning_rate": 9.704744138300528e-06, "loss": 0.646, "step": 11430 }, { "epoch": 0.1366706919021031, "grad_norm": 2.950768232345581, "learning_rate": 9.704678585925841e-06, "loss": 0.6073, "step": 11431 }, { "epoch": 0.13668264804696373, "grad_norm": 1.814956545829773, "learning_rate": 9.704613026496472e-06, "loss": 0.6432, "step": 11432 }, { "epoch": 0.1366946041918244, "grad_norm": 2.0495145320892334, "learning_rate": 9.70454746001252e-06, "loss": 0.5586, "step": 11433 }, { "epoch": 0.13670656033668505, "grad_norm": 1.6626096963882446, "learning_rate": 9.704481886474082e-06, "loss": 0.5576, "step": 11434 }, { "epoch": 0.13671851648154568, "grad_norm": 3.8505170345306396, "learning_rate": 9.704416305881255e-06, "loss": 0.5687, "step": 11435 }, { "epoch": 0.13673047262640634, "grad_norm": 3.1494903564453125, "learning_rate": 9.704350718234142e-06, "loss": 0.6559, "step": 11436 }, { "epoch": 0.136742428771267, "grad_norm": 1.707426905632019, "learning_rate": 9.704285123532835e-06, "loss": 0.6396, "step": 11437 }, { "epoch": 0.13675438491612765, "grad_norm": 1.8945634365081787, "learning_rate": 9.704219521777437e-06, "loss": 0.5184, "step": 11438 }, { "epoch": 0.13676634106098828, "grad_norm": 2.907740831375122, "learning_rate": 9.704153912968046e-06, "loss": 0.5734, "step": 11439 }, { "epoch": 0.13677829720584894, "grad_norm": 2.6629087924957275, "learning_rate": 9.70408829710476e-06, "loss": 0.6135, "step": 11440 }, { "epoch": 0.1367902533507096, "grad_norm": 11.480559349060059, "learning_rate": 9.704022674187674e-06, "loss": 0.6964, "step": 11441 }, { "epoch": 0.13680220949557026, "grad_norm": 2.5617125034332275, "learning_rate": 9.703957044216891e-06, "loss": 0.5767, "step": 11442 }, { "epoch": 0.1368141656404309, "grad_norm": 2.028416633605957, "learning_rate": 9.703891407192508e-06, "loss": 0.5686, "step": 11443 }, { "epoch": 0.13682612178529155, "grad_norm": 2.5454843044281006, "learning_rate": 9.703825763114622e-06, "loss": 0.5151, "step": 11444 }, { "epoch": 0.1368380779301522, "grad_norm": 2.1078531742095947, "learning_rate": 9.703760111983333e-06, "loss": 0.6709, "step": 11445 }, { "epoch": 0.13685003407501287, "grad_norm": 4.0168328285217285, "learning_rate": 9.70369445379874e-06, "loss": 0.6157, "step": 11446 }, { "epoch": 0.1368619902198735, "grad_norm": 1.903971552848816, "learning_rate": 9.70362878856094e-06, "loss": 0.6229, "step": 11447 }, { "epoch": 0.13687394636473416, "grad_norm": 2.0234291553497314, "learning_rate": 9.703563116270031e-06, "loss": 0.7235, "step": 11448 }, { "epoch": 0.13688590250959481, "grad_norm": 2.000746011734009, "learning_rate": 9.703497436926113e-06, "loss": 0.6336, "step": 11449 }, { "epoch": 0.13689785865445545, "grad_norm": 1.5172932147979736, "learning_rate": 9.703431750529283e-06, "loss": 0.5904, "step": 11450 }, { "epoch": 0.1369098147993161, "grad_norm": 1.7873467206954956, "learning_rate": 9.703366057079642e-06, "loss": 0.5952, "step": 11451 }, { "epoch": 0.13692177094417676, "grad_norm": 3.058720111846924, "learning_rate": 9.703300356577287e-06, "loss": 0.6575, "step": 11452 }, { "epoch": 0.13693372708903742, "grad_norm": 2.945603609085083, "learning_rate": 9.703234649022315e-06, "loss": 0.6569, "step": 11453 }, { "epoch": 0.13694568323389805, "grad_norm": 1.5756940841674805, "learning_rate": 9.703168934414826e-06, "loss": 0.6807, "step": 11454 }, { "epoch": 0.1369576393787587, "grad_norm": 1.5085489749908447, "learning_rate": 9.70310321275492e-06, "loss": 0.6558, "step": 11455 }, { "epoch": 0.13696959552361937, "grad_norm": 1.497671365737915, "learning_rate": 9.703037484042691e-06, "loss": 0.6194, "step": 11456 }, { "epoch": 0.13698155166848003, "grad_norm": 3.2933740615844727, "learning_rate": 9.702971748278244e-06, "loss": 0.6213, "step": 11457 }, { "epoch": 0.13699350781334066, "grad_norm": 1.6388572454452515, "learning_rate": 9.702906005461673e-06, "loss": 0.5969, "step": 11458 }, { "epoch": 0.13700546395820132, "grad_norm": 1.766175627708435, "learning_rate": 9.702840255593079e-06, "loss": 0.5252, "step": 11459 }, { "epoch": 0.13701742010306198, "grad_norm": 1.9585336446762085, "learning_rate": 9.702774498672555e-06, "loss": 0.5955, "step": 11460 }, { "epoch": 0.1370293762479226, "grad_norm": 1.9141888618469238, "learning_rate": 9.702708734700208e-06, "loss": 0.619, "step": 11461 }, { "epoch": 0.13704133239278327, "grad_norm": 3.0218346118927, "learning_rate": 9.70264296367613e-06, "loss": 0.6568, "step": 11462 }, { "epoch": 0.13705328853764392, "grad_norm": 1.5782907009124756, "learning_rate": 9.702577185600424e-06, "loss": 0.6806, "step": 11463 }, { "epoch": 0.13706524468250458, "grad_norm": 4.594339370727539, "learning_rate": 9.702511400473187e-06, "loss": 0.5971, "step": 11464 }, { "epoch": 0.13707720082736521, "grad_norm": 2.6515727043151855, "learning_rate": 9.702445608294516e-06, "loss": 0.5636, "step": 11465 }, { "epoch": 0.13708915697222587, "grad_norm": 1.8653546571731567, "learning_rate": 9.702379809064512e-06, "loss": 0.6575, "step": 11466 }, { "epoch": 0.13710111311708653, "grad_norm": 5.766881465911865, "learning_rate": 9.702314002783273e-06, "loss": 0.6331, "step": 11467 }, { "epoch": 0.1371130692619472, "grad_norm": 1.7982174158096313, "learning_rate": 9.702248189450895e-06, "loss": 0.6526, "step": 11468 }, { "epoch": 0.13712502540680782, "grad_norm": 1.965273141860962, "learning_rate": 9.702182369067482e-06, "loss": 0.654, "step": 11469 }, { "epoch": 0.13713698155166848, "grad_norm": 1.6842026710510254, "learning_rate": 9.702116541633128e-06, "loss": 0.5913, "step": 11470 }, { "epoch": 0.13714893769652914, "grad_norm": 3.925405740737915, "learning_rate": 9.702050707147936e-06, "loss": 0.6222, "step": 11471 }, { "epoch": 0.13716089384138977, "grad_norm": 3.479867696762085, "learning_rate": 9.701984865612e-06, "loss": 0.5798, "step": 11472 }, { "epoch": 0.13717284998625043, "grad_norm": 3.0625662803649902, "learning_rate": 9.701919017025421e-06, "loss": 0.6661, "step": 11473 }, { "epoch": 0.1371848061311111, "grad_norm": 2.777411460876465, "learning_rate": 9.701853161388299e-06, "loss": 0.6021, "step": 11474 }, { "epoch": 0.13719676227597175, "grad_norm": 1.969154953956604, "learning_rate": 9.701787298700731e-06, "loss": 0.6473, "step": 11475 }, { "epoch": 0.13720871842083238, "grad_norm": 2.4055473804473877, "learning_rate": 9.701721428962816e-06, "loss": 0.6267, "step": 11476 }, { "epoch": 0.13722067456569303, "grad_norm": 3.111905813217163, "learning_rate": 9.701655552174653e-06, "loss": 0.6333, "step": 11477 }, { "epoch": 0.1372326307105537, "grad_norm": 1.5379480123519897, "learning_rate": 9.70158966833634e-06, "loss": 0.6678, "step": 11478 }, { "epoch": 0.13724458685541435, "grad_norm": 2.889191150665283, "learning_rate": 9.70152377744798e-06, "loss": 0.6174, "step": 11479 }, { "epoch": 0.13725654300027498, "grad_norm": 1.782362461090088, "learning_rate": 9.701457879509664e-06, "loss": 0.5863, "step": 11480 }, { "epoch": 0.13726849914513564, "grad_norm": 13.851956367492676, "learning_rate": 9.701391974521496e-06, "loss": 0.6398, "step": 11481 }, { "epoch": 0.1372804552899963, "grad_norm": 2.336916923522949, "learning_rate": 9.701326062483577e-06, "loss": 0.68, "step": 11482 }, { "epoch": 0.13729241143485693, "grad_norm": 1.9107264280319214, "learning_rate": 9.701260143396e-06, "loss": 0.6487, "step": 11483 }, { "epoch": 0.1373043675797176, "grad_norm": 1.7749131917953491, "learning_rate": 9.701194217258867e-06, "loss": 0.5666, "step": 11484 }, { "epoch": 0.13731632372457825, "grad_norm": 9.593171119689941, "learning_rate": 9.701128284072278e-06, "loss": 0.6556, "step": 11485 }, { "epoch": 0.1373282798694389, "grad_norm": 3.6545841693878174, "learning_rate": 9.701062343836329e-06, "loss": 0.616, "step": 11486 }, { "epoch": 0.13734023601429954, "grad_norm": 3.2853646278381348, "learning_rate": 9.70099639655112e-06, "loss": 0.5694, "step": 11487 }, { "epoch": 0.1373521921591602, "grad_norm": 1.339328408241272, "learning_rate": 9.700930442216751e-06, "loss": 0.6103, "step": 11488 }, { "epoch": 0.13736414830402086, "grad_norm": 3.844456434249878, "learning_rate": 9.70086448083332e-06, "loss": 0.6058, "step": 11489 }, { "epoch": 0.13737610444888151, "grad_norm": 1.2693089246749878, "learning_rate": 9.700798512400928e-06, "loss": 0.5629, "step": 11490 }, { "epoch": 0.13738806059374215, "grad_norm": 2.2646424770355225, "learning_rate": 9.70073253691967e-06, "loss": 0.6284, "step": 11491 }, { "epoch": 0.1374000167386028, "grad_norm": 2.3414571285247803, "learning_rate": 9.700666554389646e-06, "loss": 0.6216, "step": 11492 }, { "epoch": 0.13741197288346346, "grad_norm": 2.5605525970458984, "learning_rate": 9.700600564810958e-06, "loss": 0.5766, "step": 11493 }, { "epoch": 0.13742392902832412, "grad_norm": 1.448105812072754, "learning_rate": 9.7005345681837e-06, "loss": 0.5791, "step": 11494 }, { "epoch": 0.13743588517318475, "grad_norm": 4.262363910675049, "learning_rate": 9.700468564507976e-06, "loss": 0.7127, "step": 11495 }, { "epoch": 0.1374478413180454, "grad_norm": 2.057831048965454, "learning_rate": 9.700402553783881e-06, "loss": 0.618, "step": 11496 }, { "epoch": 0.13745979746290607, "grad_norm": 2.153756856918335, "learning_rate": 9.700336536011517e-06, "loss": 0.6561, "step": 11497 }, { "epoch": 0.1374717536077667, "grad_norm": 3.81504487991333, "learning_rate": 9.700270511190981e-06, "loss": 0.684, "step": 11498 }, { "epoch": 0.13748370975262736, "grad_norm": 2.0466809272766113, "learning_rate": 9.700204479322372e-06, "loss": 0.603, "step": 11499 }, { "epoch": 0.13749566589748802, "grad_norm": 2.6612813472747803, "learning_rate": 9.70013844040579e-06, "loss": 0.5854, "step": 11500 }, { "epoch": 0.13750762204234868, "grad_norm": 2.001620054244995, "learning_rate": 9.700072394441335e-06, "loss": 0.6124, "step": 11501 }, { "epoch": 0.1375195781872093, "grad_norm": 6.302332401275635, "learning_rate": 9.700006341429105e-06, "loss": 0.6656, "step": 11502 }, { "epoch": 0.13753153433206997, "grad_norm": 1.9425331354141235, "learning_rate": 9.699940281369198e-06, "loss": 0.6055, "step": 11503 }, { "epoch": 0.13754349047693062, "grad_norm": 3.7656636238098145, "learning_rate": 9.699874214261713e-06, "loss": 0.5383, "step": 11504 }, { "epoch": 0.13755544662179128, "grad_norm": 2.3315281867980957, "learning_rate": 9.699808140106751e-06, "loss": 0.6002, "step": 11505 }, { "epoch": 0.13756740276665191, "grad_norm": 2.394003391265869, "learning_rate": 9.699742058904409e-06, "loss": 0.6499, "step": 11506 }, { "epoch": 0.13757935891151257, "grad_norm": 2.0514538288116455, "learning_rate": 9.699675970654786e-06, "loss": 0.7159, "step": 11507 }, { "epoch": 0.13759131505637323, "grad_norm": 1.7715014219284058, "learning_rate": 9.699609875357986e-06, "loss": 0.6335, "step": 11508 }, { "epoch": 0.13760327120123386, "grad_norm": 2.0623977184295654, "learning_rate": 9.699543773014103e-06, "loss": 0.7627, "step": 11509 }, { "epoch": 0.13761522734609452, "grad_norm": 2.4961588382720947, "learning_rate": 9.699477663623234e-06, "loss": 0.5753, "step": 11510 }, { "epoch": 0.13762718349095518, "grad_norm": 2.0347557067871094, "learning_rate": 9.699411547185484e-06, "loss": 0.6304, "step": 11511 }, { "epoch": 0.13763913963581584, "grad_norm": 3.063688039779663, "learning_rate": 9.69934542370095e-06, "loss": 0.6797, "step": 11512 }, { "epoch": 0.13765109578067647, "grad_norm": 2.386211633682251, "learning_rate": 9.69927929316973e-06, "loss": 0.6315, "step": 11513 }, { "epoch": 0.13766305192553713, "grad_norm": 1.9524905681610107, "learning_rate": 9.699213155591925e-06, "loss": 0.6658, "step": 11514 }, { "epoch": 0.1376750080703978, "grad_norm": 3.112107038497925, "learning_rate": 9.699147010967632e-06, "loss": 0.6132, "step": 11515 }, { "epoch": 0.13768696421525844, "grad_norm": 2.014417886734009, "learning_rate": 9.699080859296951e-06, "loss": 0.6031, "step": 11516 }, { "epoch": 0.13769892036011908, "grad_norm": 1.501318097114563, "learning_rate": 9.699014700579983e-06, "loss": 0.676, "step": 11517 }, { "epoch": 0.13771087650497973, "grad_norm": 1.6664726734161377, "learning_rate": 9.698948534816826e-06, "loss": 0.6477, "step": 11518 }, { "epoch": 0.1377228326498404, "grad_norm": 1.6548768281936646, "learning_rate": 9.698882362007577e-06, "loss": 0.6218, "step": 11519 }, { "epoch": 0.13773478879470102, "grad_norm": 2.281628131866455, "learning_rate": 9.69881618215234e-06, "loss": 0.6335, "step": 11520 }, { "epoch": 0.13774674493956168, "grad_norm": 3.559626340866089, "learning_rate": 9.69874999525121e-06, "loss": 0.5508, "step": 11521 }, { "epoch": 0.13775870108442234, "grad_norm": 6.362987041473389, "learning_rate": 9.698683801304287e-06, "loss": 0.5881, "step": 11522 }, { "epoch": 0.137770657229283, "grad_norm": 1.749938726425171, "learning_rate": 9.698617600311673e-06, "loss": 0.5833, "step": 11523 }, { "epoch": 0.13778261337414363, "grad_norm": 1.6011409759521484, "learning_rate": 9.698551392273463e-06, "loss": 0.6004, "step": 11524 }, { "epoch": 0.1377945695190043, "grad_norm": 1.582655668258667, "learning_rate": 9.69848517718976e-06, "loss": 0.6035, "step": 11525 }, { "epoch": 0.13780652566386495, "grad_norm": 2.0793850421905518, "learning_rate": 9.69841895506066e-06, "loss": 0.5746, "step": 11526 }, { "epoch": 0.1378184818087256, "grad_norm": 1.6807328462600708, "learning_rate": 9.698352725886264e-06, "loss": 0.5906, "step": 11527 }, { "epoch": 0.13783043795358624, "grad_norm": 2.169686794281006, "learning_rate": 9.698286489666672e-06, "loss": 0.7173, "step": 11528 }, { "epoch": 0.1378423940984469, "grad_norm": 3.0306930541992188, "learning_rate": 9.698220246401985e-06, "loss": 0.6162, "step": 11529 }, { "epoch": 0.13785435024330756, "grad_norm": 6.446970462799072, "learning_rate": 9.698153996092297e-06, "loss": 0.6811, "step": 11530 }, { "epoch": 0.13786630638816819, "grad_norm": 3.633711814880371, "learning_rate": 9.698087738737712e-06, "loss": 0.6289, "step": 11531 }, { "epoch": 0.13787826253302884, "grad_norm": 1.5331518650054932, "learning_rate": 9.698021474338329e-06, "loss": 0.5969, "step": 11532 }, { "epoch": 0.1378902186778895, "grad_norm": 2.7780165672302246, "learning_rate": 9.697955202894243e-06, "loss": 0.7449, "step": 11533 }, { "epoch": 0.13790217482275016, "grad_norm": 1.897887110710144, "learning_rate": 9.697888924405558e-06, "loss": 0.6661, "step": 11534 }, { "epoch": 0.1379141309676108, "grad_norm": 1.7044146060943604, "learning_rate": 9.697822638872372e-06, "loss": 0.4892, "step": 11535 }, { "epoch": 0.13792608711247145, "grad_norm": 2.2421672344207764, "learning_rate": 9.697756346294784e-06, "loss": 0.7077, "step": 11536 }, { "epoch": 0.1379380432573321, "grad_norm": 1.8015702962875366, "learning_rate": 9.697690046672894e-06, "loss": 0.6187, "step": 11537 }, { "epoch": 0.13794999940219277, "grad_norm": 2.3223016262054443, "learning_rate": 9.6976237400068e-06, "loss": 0.5287, "step": 11538 }, { "epoch": 0.1379619555470534, "grad_norm": 1.9086490869522095, "learning_rate": 9.697557426296603e-06, "loss": 0.6626, "step": 11539 }, { "epoch": 0.13797391169191406, "grad_norm": 28.731828689575195, "learning_rate": 9.697491105542403e-06, "loss": 0.6161, "step": 11540 }, { "epoch": 0.13798586783677472, "grad_norm": 2.449702024459839, "learning_rate": 9.697424777744297e-06, "loss": 0.6476, "step": 11541 }, { "epoch": 0.13799782398163535, "grad_norm": 1.9882829189300537, "learning_rate": 9.697358442902388e-06, "loss": 0.6477, "step": 11542 }, { "epoch": 0.138009780126496, "grad_norm": 2.7148818969726562, "learning_rate": 9.69729210101677e-06, "loss": 0.6979, "step": 11543 }, { "epoch": 0.13802173627135667, "grad_norm": 2.071168899536133, "learning_rate": 9.697225752087548e-06, "loss": 0.5605, "step": 11544 }, { "epoch": 0.13803369241621732, "grad_norm": 2.494795560836792, "learning_rate": 9.69715939611482e-06, "loss": 0.6299, "step": 11545 }, { "epoch": 0.13804564856107795, "grad_norm": 1.8030288219451904, "learning_rate": 9.697093033098683e-06, "loss": 0.5841, "step": 11546 }, { "epoch": 0.1380576047059386, "grad_norm": 3.14479923248291, "learning_rate": 9.697026663039237e-06, "loss": 0.5066, "step": 11547 }, { "epoch": 0.13806956085079927, "grad_norm": 2.8605306148529053, "learning_rate": 9.696960285936586e-06, "loss": 0.6566, "step": 11548 }, { "epoch": 0.13808151699565993, "grad_norm": 3.388899087905884, "learning_rate": 9.696893901790825e-06, "loss": 0.6017, "step": 11549 }, { "epoch": 0.13809347314052056, "grad_norm": 3.3027844429016113, "learning_rate": 9.696827510602054e-06, "loss": 0.6352, "step": 11550 }, { "epoch": 0.13810542928538122, "grad_norm": 2.6827456951141357, "learning_rate": 9.696761112370376e-06, "loss": 0.6767, "step": 11551 }, { "epoch": 0.13811738543024188, "grad_norm": 2.360250234603882, "learning_rate": 9.696694707095886e-06, "loss": 0.6335, "step": 11552 }, { "epoch": 0.13812934157510254, "grad_norm": 1.895833969116211, "learning_rate": 9.696628294778687e-06, "loss": 0.548, "step": 11553 }, { "epoch": 0.13814129771996317, "grad_norm": 1.9088187217712402, "learning_rate": 9.696561875418874e-06, "loss": 0.5953, "step": 11554 }, { "epoch": 0.13815325386482383, "grad_norm": 1.7221009731292725, "learning_rate": 9.696495449016554e-06, "loss": 0.6787, "step": 11555 }, { "epoch": 0.13816521000968449, "grad_norm": 1.5057568550109863, "learning_rate": 9.69642901557182e-06, "loss": 0.6053, "step": 11556 }, { "epoch": 0.13817716615454512, "grad_norm": 2.4349215030670166, "learning_rate": 9.696362575084775e-06, "loss": 0.5972, "step": 11557 }, { "epoch": 0.13818912229940578, "grad_norm": 3.0349583625793457, "learning_rate": 9.696296127555516e-06, "loss": 0.5702, "step": 11558 }, { "epoch": 0.13820107844426643, "grad_norm": 2.1424291133880615, "learning_rate": 9.696229672984146e-06, "loss": 0.6195, "step": 11559 }, { "epoch": 0.1382130345891271, "grad_norm": 2.4108641147613525, "learning_rate": 9.69616321137076e-06, "loss": 0.6686, "step": 11560 }, { "epoch": 0.13822499073398772, "grad_norm": 2.1037518978118896, "learning_rate": 9.696096742715464e-06, "loss": 0.759, "step": 11561 }, { "epoch": 0.13823694687884838, "grad_norm": 2.3961188793182373, "learning_rate": 9.696030267018353e-06, "loss": 0.6838, "step": 11562 }, { "epoch": 0.13824890302370904, "grad_norm": 2.5030856132507324, "learning_rate": 9.695963784279528e-06, "loss": 0.6882, "step": 11563 }, { "epoch": 0.1382608591685697, "grad_norm": 2.487705707550049, "learning_rate": 9.695897294499088e-06, "loss": 0.7276, "step": 11564 }, { "epoch": 0.13827281531343033, "grad_norm": 2.0581393241882324, "learning_rate": 9.695830797677135e-06, "loss": 0.638, "step": 11565 }, { "epoch": 0.138284771458291, "grad_norm": 2.0455806255340576, "learning_rate": 9.695764293813765e-06, "loss": 0.6613, "step": 11566 }, { "epoch": 0.13829672760315165, "grad_norm": 2.644404649734497, "learning_rate": 9.695697782909081e-06, "loss": 0.5724, "step": 11567 }, { "epoch": 0.13830868374801228, "grad_norm": 1.8259544372558594, "learning_rate": 9.69563126496318e-06, "loss": 0.573, "step": 11568 }, { "epoch": 0.13832063989287294, "grad_norm": 2.5940511226654053, "learning_rate": 9.695564739976166e-06, "loss": 0.6718, "step": 11569 }, { "epoch": 0.1383325960377336, "grad_norm": 2.803034543991089, "learning_rate": 9.695498207948134e-06, "loss": 0.7125, "step": 11570 }, { "epoch": 0.13834455218259425, "grad_norm": 1.9959205389022827, "learning_rate": 9.695431668879184e-06, "loss": 0.6065, "step": 11571 }, { "epoch": 0.13835650832745489, "grad_norm": 48.11326599121094, "learning_rate": 9.695365122769421e-06, "loss": 0.6378, "step": 11572 }, { "epoch": 0.13836846447231554, "grad_norm": 4.074028968811035, "learning_rate": 9.69529856961894e-06, "loss": 0.5972, "step": 11573 }, { "epoch": 0.1383804206171762, "grad_norm": 2.0128188133239746, "learning_rate": 9.695232009427842e-06, "loss": 0.6532, "step": 11574 }, { "epoch": 0.13839237676203686, "grad_norm": 3.8865673542022705, "learning_rate": 9.695165442196228e-06, "loss": 0.6106, "step": 11575 }, { "epoch": 0.1384043329068975, "grad_norm": 2.3316798210144043, "learning_rate": 9.695098867924196e-06, "loss": 0.7027, "step": 11576 }, { "epoch": 0.13841628905175815, "grad_norm": 2.2353804111480713, "learning_rate": 9.695032286611845e-06, "loss": 0.6662, "step": 11577 }, { "epoch": 0.1384282451966188, "grad_norm": 2.1359035968780518, "learning_rate": 9.694965698259278e-06, "loss": 0.6622, "step": 11578 }, { "epoch": 0.13844020134147944, "grad_norm": 1.7466291189193726, "learning_rate": 9.694899102866593e-06, "loss": 0.5436, "step": 11579 }, { "epoch": 0.1384521574863401, "grad_norm": 2.0538530349731445, "learning_rate": 9.694832500433889e-06, "loss": 0.6375, "step": 11580 }, { "epoch": 0.13846411363120076, "grad_norm": 2.9769201278686523, "learning_rate": 9.694765890961269e-06, "loss": 0.6044, "step": 11581 }, { "epoch": 0.13847606977606142, "grad_norm": 3.209665060043335, "learning_rate": 9.694699274448829e-06, "loss": 0.6736, "step": 11582 }, { "epoch": 0.13848802592092205, "grad_norm": 1.6316134929656982, "learning_rate": 9.69463265089667e-06, "loss": 0.7216, "step": 11583 }, { "epoch": 0.1384999820657827, "grad_norm": 1.6095980405807495, "learning_rate": 9.694566020304897e-06, "loss": 0.573, "step": 11584 }, { "epoch": 0.13851193821064336, "grad_norm": 3.0202529430389404, "learning_rate": 9.694499382673601e-06, "loss": 0.7294, "step": 11585 }, { "epoch": 0.13852389435550402, "grad_norm": 1.4727102518081665, "learning_rate": 9.694432738002887e-06, "loss": 0.6188, "step": 11586 }, { "epoch": 0.13853585050036465, "grad_norm": 1.7186195850372314, "learning_rate": 9.694366086292857e-06, "loss": 0.5674, "step": 11587 }, { "epoch": 0.1385478066452253, "grad_norm": 1.8965184688568115, "learning_rate": 9.694299427543607e-06, "loss": 0.5446, "step": 11588 }, { "epoch": 0.13855976279008597, "grad_norm": 4.55774450302124, "learning_rate": 9.69423276175524e-06, "loss": 0.6755, "step": 11589 }, { "epoch": 0.1385717189349466, "grad_norm": 2.1331095695495605, "learning_rate": 9.694166088927853e-06, "loss": 0.634, "step": 11590 }, { "epoch": 0.13858367507980726, "grad_norm": 2.5713188648223877, "learning_rate": 9.694099409061546e-06, "loss": 0.5832, "step": 11591 }, { "epoch": 0.13859563122466792, "grad_norm": 2.128633975982666, "learning_rate": 9.694032722156421e-06, "loss": 0.6251, "step": 11592 }, { "epoch": 0.13860758736952858, "grad_norm": 54.25904846191406, "learning_rate": 9.693966028212576e-06, "loss": 0.5906, "step": 11593 }, { "epoch": 0.1386195435143892, "grad_norm": 2.387110948562622, "learning_rate": 9.693899327230115e-06, "loss": 0.6366, "step": 11594 }, { "epoch": 0.13863149965924987, "grad_norm": 2.1254780292510986, "learning_rate": 9.693832619209134e-06, "loss": 0.656, "step": 11595 }, { "epoch": 0.13864345580411053, "grad_norm": 5.120186805725098, "learning_rate": 9.693765904149735e-06, "loss": 0.6564, "step": 11596 }, { "epoch": 0.13865541194897119, "grad_norm": 5.263539791107178, "learning_rate": 9.693699182052016e-06, "loss": 0.5841, "step": 11597 }, { "epoch": 0.13866736809383182, "grad_norm": 2.1070477962493896, "learning_rate": 9.693632452916078e-06, "loss": 0.7566, "step": 11598 }, { "epoch": 0.13867932423869248, "grad_norm": 46.334083557128906, "learning_rate": 9.693565716742023e-06, "loss": 0.5875, "step": 11599 }, { "epoch": 0.13869128038355313, "grad_norm": 2.155073642730713, "learning_rate": 9.69349897352995e-06, "loss": 0.6003, "step": 11600 }, { "epoch": 0.13870323652841376, "grad_norm": 2.5311193466186523, "learning_rate": 9.693432223279955e-06, "loss": 0.5882, "step": 11601 }, { "epoch": 0.13871519267327442, "grad_norm": 2.8779609203338623, "learning_rate": 9.693365465992146e-06, "loss": 0.5875, "step": 11602 }, { "epoch": 0.13872714881813508, "grad_norm": 1.9836667776107788, "learning_rate": 9.693298701666615e-06, "loss": 0.5876, "step": 11603 }, { "epoch": 0.13873910496299574, "grad_norm": 1.8057148456573486, "learning_rate": 9.69323193030347e-06, "loss": 0.6115, "step": 11604 }, { "epoch": 0.13875106110785637, "grad_norm": 2.005199670791626, "learning_rate": 9.693165151902804e-06, "loss": 0.6295, "step": 11605 }, { "epoch": 0.13876301725271703, "grad_norm": 1.448984980583191, "learning_rate": 9.693098366464722e-06, "loss": 0.5024, "step": 11606 }, { "epoch": 0.1387749733975777, "grad_norm": 2.1831114292144775, "learning_rate": 9.69303157398932e-06, "loss": 0.7042, "step": 11607 }, { "epoch": 0.13878692954243835, "grad_norm": 2.102656364440918, "learning_rate": 9.692964774476703e-06, "loss": 0.6624, "step": 11608 }, { "epoch": 0.13879888568729898, "grad_norm": 3.502162218093872, "learning_rate": 9.692897967926968e-06, "loss": 0.6616, "step": 11609 }, { "epoch": 0.13881084183215964, "grad_norm": 2.5528581142425537, "learning_rate": 9.692831154340216e-06, "loss": 0.5542, "step": 11610 }, { "epoch": 0.1388227979770203, "grad_norm": 2.505826234817505, "learning_rate": 9.692764333716546e-06, "loss": 0.624, "step": 11611 }, { "epoch": 0.13883475412188095, "grad_norm": 1.7689802646636963, "learning_rate": 9.69269750605606e-06, "loss": 0.5773, "step": 11612 }, { "epoch": 0.13884671026674159, "grad_norm": 3.2095940113067627, "learning_rate": 9.692630671358856e-06, "loss": 0.6964, "step": 11613 }, { "epoch": 0.13885866641160224, "grad_norm": 1.5904816389083862, "learning_rate": 9.692563829625038e-06, "loss": 0.6841, "step": 11614 }, { "epoch": 0.1388706225564629, "grad_norm": 5.768423557281494, "learning_rate": 9.692496980854702e-06, "loss": 0.6215, "step": 11615 }, { "epoch": 0.13888257870132353, "grad_norm": 4.15388298034668, "learning_rate": 9.692430125047951e-06, "loss": 0.5489, "step": 11616 }, { "epoch": 0.1388945348461842, "grad_norm": 1.761753797531128, "learning_rate": 9.692363262204883e-06, "loss": 0.5776, "step": 11617 }, { "epoch": 0.13890649099104485, "grad_norm": 2.0187320709228516, "learning_rate": 9.692296392325601e-06, "loss": 0.5806, "step": 11618 }, { "epoch": 0.1389184471359055, "grad_norm": 1.633256196975708, "learning_rate": 9.692229515410203e-06, "loss": 0.6759, "step": 11619 }, { "epoch": 0.13893040328076614, "grad_norm": 1.9803820848464966, "learning_rate": 9.692162631458791e-06, "loss": 0.5649, "step": 11620 }, { "epoch": 0.1389423594256268, "grad_norm": 1.6981689929962158, "learning_rate": 9.692095740471464e-06, "loss": 0.6468, "step": 11621 }, { "epoch": 0.13895431557048746, "grad_norm": 1.585163950920105, "learning_rate": 9.692028842448322e-06, "loss": 0.5757, "step": 11622 }, { "epoch": 0.13896627171534812, "grad_norm": 2.945787191390991, "learning_rate": 9.691961937389468e-06, "loss": 0.5885, "step": 11623 }, { "epoch": 0.13897822786020875, "grad_norm": 2.2090957164764404, "learning_rate": 9.691895025295e-06, "loss": 0.677, "step": 11624 }, { "epoch": 0.1389901840050694, "grad_norm": 1.4794977903366089, "learning_rate": 9.691828106165018e-06, "loss": 0.5504, "step": 11625 }, { "epoch": 0.13900214014993006, "grad_norm": 2.3195345401763916, "learning_rate": 9.691761179999622e-06, "loss": 0.6426, "step": 11626 }, { "epoch": 0.1390140962947907, "grad_norm": 1.688977599143982, "learning_rate": 9.691694246798915e-06, "loss": 0.6844, "step": 11627 }, { "epoch": 0.13902605243965135, "grad_norm": 1.7543294429779053, "learning_rate": 9.691627306562995e-06, "loss": 0.696, "step": 11628 }, { "epoch": 0.139038008584512, "grad_norm": 1.8932702541351318, "learning_rate": 9.691560359291963e-06, "loss": 0.6766, "step": 11629 }, { "epoch": 0.13904996472937267, "grad_norm": 1.989106297492981, "learning_rate": 9.691493404985921e-06, "loss": 0.5567, "step": 11630 }, { "epoch": 0.1390619208742333, "grad_norm": 3.2129552364349365, "learning_rate": 9.691426443644966e-06, "loss": 0.6826, "step": 11631 }, { "epoch": 0.13907387701909396, "grad_norm": 2.1641392707824707, "learning_rate": 9.691359475269202e-06, "loss": 0.5638, "step": 11632 }, { "epoch": 0.13908583316395462, "grad_norm": 2.8187918663024902, "learning_rate": 9.691292499858728e-06, "loss": 0.6382, "step": 11633 }, { "epoch": 0.13909778930881528, "grad_norm": 1.658607006072998, "learning_rate": 9.691225517413643e-06, "loss": 0.6277, "step": 11634 }, { "epoch": 0.1391097454536759, "grad_norm": 3.823063611984253, "learning_rate": 9.69115852793405e-06, "loss": 0.5803, "step": 11635 }, { "epoch": 0.13912170159853657, "grad_norm": 1.9164940118789673, "learning_rate": 9.691091531420046e-06, "loss": 0.6882, "step": 11636 }, { "epoch": 0.13913365774339723, "grad_norm": 1.713491439819336, "learning_rate": 9.691024527871736e-06, "loss": 0.5967, "step": 11637 }, { "epoch": 0.13914561388825786, "grad_norm": 1.577439308166504, "learning_rate": 9.690957517289216e-06, "loss": 0.6105, "step": 11638 }, { "epoch": 0.13915757003311852, "grad_norm": 3.919673442840576, "learning_rate": 9.69089049967259e-06, "loss": 0.6956, "step": 11639 }, { "epoch": 0.13916952617797917, "grad_norm": 2.3169736862182617, "learning_rate": 9.690823475021955e-06, "loss": 0.6659, "step": 11640 }, { "epoch": 0.13918148232283983, "grad_norm": 2.2384250164031982, "learning_rate": 9.690756443337416e-06, "loss": 0.6784, "step": 11641 }, { "epoch": 0.13919343846770046, "grad_norm": 3.1548945903778076, "learning_rate": 9.690689404619069e-06, "loss": 0.6064, "step": 11642 }, { "epoch": 0.13920539461256112, "grad_norm": 2.3424081802368164, "learning_rate": 9.690622358867018e-06, "loss": 0.6773, "step": 11643 }, { "epoch": 0.13921735075742178, "grad_norm": 1.3937300443649292, "learning_rate": 9.690555306081361e-06, "loss": 0.6417, "step": 11644 }, { "epoch": 0.13922930690228244, "grad_norm": 2.0838301181793213, "learning_rate": 9.6904882462622e-06, "loss": 0.5652, "step": 11645 }, { "epoch": 0.13924126304714307, "grad_norm": 1.6454784870147705, "learning_rate": 9.690421179409637e-06, "loss": 0.5865, "step": 11646 }, { "epoch": 0.13925321919200373, "grad_norm": 2.172316312789917, "learning_rate": 9.69035410552377e-06, "loss": 0.6451, "step": 11647 }, { "epoch": 0.1392651753368644, "grad_norm": 1.8326510190963745, "learning_rate": 9.690287024604698e-06, "loss": 0.6133, "step": 11648 }, { "epoch": 0.13927713148172502, "grad_norm": 1.9065967798233032, "learning_rate": 9.690219936652526e-06, "loss": 0.6413, "step": 11649 }, { "epoch": 0.13928908762658568, "grad_norm": 3.824493408203125, "learning_rate": 9.690152841667352e-06, "loss": 0.6676, "step": 11650 }, { "epoch": 0.13930104377144634, "grad_norm": 1.8865454196929932, "learning_rate": 9.690085739649278e-06, "loss": 0.6101, "step": 11651 }, { "epoch": 0.139312999916307, "grad_norm": 8.242175102233887, "learning_rate": 9.6900186305984e-06, "loss": 0.5929, "step": 11652 }, { "epoch": 0.13932495606116763, "grad_norm": 1.757897973060608, "learning_rate": 9.689951514514827e-06, "loss": 0.5352, "step": 11653 }, { "epoch": 0.13933691220602828, "grad_norm": 5.983354568481445, "learning_rate": 9.689884391398654e-06, "loss": 0.5689, "step": 11654 }, { "epoch": 0.13934886835088894, "grad_norm": 6.511047840118408, "learning_rate": 9.689817261249982e-06, "loss": 0.6242, "step": 11655 }, { "epoch": 0.1393608244957496, "grad_norm": 3.993267774581909, "learning_rate": 9.689750124068914e-06, "loss": 0.6744, "step": 11656 }, { "epoch": 0.13937278064061023, "grad_norm": 2.0713846683502197, "learning_rate": 9.689682979855546e-06, "loss": 0.5867, "step": 11657 }, { "epoch": 0.1393847367854709, "grad_norm": 2.099926233291626, "learning_rate": 9.689615828609983e-06, "loss": 0.7039, "step": 11658 }, { "epoch": 0.13939669293033155, "grad_norm": 42.548370361328125, "learning_rate": 9.689548670332326e-06, "loss": 0.6284, "step": 11659 }, { "epoch": 0.13940864907519218, "grad_norm": 6.959231376647949, "learning_rate": 9.68948150502267e-06, "loss": 0.6203, "step": 11660 }, { "epoch": 0.13942060522005284, "grad_norm": 1.9177101850509644, "learning_rate": 9.689414332681124e-06, "loss": 0.6429, "step": 11661 }, { "epoch": 0.1394325613649135, "grad_norm": 4.086327075958252, "learning_rate": 9.689347153307782e-06, "loss": 0.6221, "step": 11662 }, { "epoch": 0.13944451750977416, "grad_norm": 1.857329249382019, "learning_rate": 9.68927996690275e-06, "loss": 0.7423, "step": 11663 }, { "epoch": 0.1394564736546348, "grad_norm": 2.1349270343780518, "learning_rate": 9.689212773466123e-06, "loss": 0.6181, "step": 11664 }, { "epoch": 0.13946842979949545, "grad_norm": 2.0918753147125244, "learning_rate": 9.689145572998007e-06, "loss": 0.579, "step": 11665 }, { "epoch": 0.1394803859443561, "grad_norm": 2.2970809936523438, "learning_rate": 9.6890783654985e-06, "loss": 0.6985, "step": 11666 }, { "epoch": 0.13949234208921676, "grad_norm": 2.2584593296051025, "learning_rate": 9.689011150967702e-06, "loss": 0.6851, "step": 11667 }, { "epoch": 0.1395042982340774, "grad_norm": 1.9800485372543335, "learning_rate": 9.688943929405716e-06, "loss": 0.6355, "step": 11668 }, { "epoch": 0.13951625437893805, "grad_norm": 2.057602643966675, "learning_rate": 9.688876700812642e-06, "loss": 0.5757, "step": 11669 }, { "epoch": 0.1395282105237987, "grad_norm": 3.147423505783081, "learning_rate": 9.68880946518858e-06, "loss": 0.6376, "step": 11670 }, { "epoch": 0.13954016666865937, "grad_norm": 2.086771249771118, "learning_rate": 9.688742222533632e-06, "loss": 0.622, "step": 11671 }, { "epoch": 0.13955212281352, "grad_norm": 3.01045560836792, "learning_rate": 9.688674972847899e-06, "loss": 0.7003, "step": 11672 }, { "epoch": 0.13956407895838066, "grad_norm": 1.6335850954055786, "learning_rate": 9.688607716131481e-06, "loss": 0.6466, "step": 11673 }, { "epoch": 0.13957603510324132, "grad_norm": 3.581061840057373, "learning_rate": 9.688540452384478e-06, "loss": 0.5999, "step": 11674 }, { "epoch": 0.13958799124810195, "grad_norm": 8.701537132263184, "learning_rate": 9.688473181606993e-06, "loss": 0.5186, "step": 11675 }, { "epoch": 0.1395999473929626, "grad_norm": 3.636955976486206, "learning_rate": 9.688405903799126e-06, "loss": 0.6029, "step": 11676 }, { "epoch": 0.13961190353782327, "grad_norm": 2.2139551639556885, "learning_rate": 9.688338618960977e-06, "loss": 0.6352, "step": 11677 }, { "epoch": 0.13962385968268393, "grad_norm": 1.9526557922363281, "learning_rate": 9.68827132709265e-06, "loss": 0.528, "step": 11678 }, { "epoch": 0.13963581582754456, "grad_norm": 7.70900821685791, "learning_rate": 9.68820402819424e-06, "loss": 0.6659, "step": 11679 }, { "epoch": 0.13964777197240522, "grad_norm": 1.7344770431518555, "learning_rate": 9.688136722265853e-06, "loss": 0.6263, "step": 11680 }, { "epoch": 0.13965972811726587, "grad_norm": 3.657980442047119, "learning_rate": 9.688069409307587e-06, "loss": 0.7403, "step": 11681 }, { "epoch": 0.13967168426212653, "grad_norm": 2.1746649742126465, "learning_rate": 9.688002089319546e-06, "loss": 0.5728, "step": 11682 }, { "epoch": 0.13968364040698716, "grad_norm": 3.986992835998535, "learning_rate": 9.687934762301826e-06, "loss": 0.6472, "step": 11683 }, { "epoch": 0.13969559655184782, "grad_norm": 2.2000460624694824, "learning_rate": 9.687867428254536e-06, "loss": 0.5736, "step": 11684 }, { "epoch": 0.13970755269670848, "grad_norm": 1.9018206596374512, "learning_rate": 9.68780008717777e-06, "loss": 0.763, "step": 11685 }, { "epoch": 0.1397195088415691, "grad_norm": 2.3627119064331055, "learning_rate": 9.68773273907163e-06, "loss": 0.6246, "step": 11686 }, { "epoch": 0.13973146498642977, "grad_norm": 2.481328248977661, "learning_rate": 9.68766538393622e-06, "loss": 0.5839, "step": 11687 }, { "epoch": 0.13974342113129043, "grad_norm": 1.461557388305664, "learning_rate": 9.687598021771638e-06, "loss": 0.6242, "step": 11688 }, { "epoch": 0.1397553772761511, "grad_norm": 2.4673397541046143, "learning_rate": 9.687530652577986e-06, "loss": 0.7151, "step": 11689 }, { "epoch": 0.13976733342101172, "grad_norm": 2.0775303840637207, "learning_rate": 9.687463276355365e-06, "loss": 0.6265, "step": 11690 }, { "epoch": 0.13977928956587238, "grad_norm": 3.606431245803833, "learning_rate": 9.687395893103877e-06, "loss": 0.6681, "step": 11691 }, { "epoch": 0.13979124571073304, "grad_norm": 1.6350739002227783, "learning_rate": 9.68732850282362e-06, "loss": 0.6605, "step": 11692 }, { "epoch": 0.1398032018555937, "grad_norm": 10.240187644958496, "learning_rate": 9.6872611055147e-06, "loss": 0.5741, "step": 11693 }, { "epoch": 0.13981515800045433, "grad_norm": 2.091733455657959, "learning_rate": 9.687193701177213e-06, "loss": 0.5598, "step": 11694 }, { "epoch": 0.13982711414531498, "grad_norm": 3.56880784034729, "learning_rate": 9.687126289811264e-06, "loss": 0.6731, "step": 11695 }, { "epoch": 0.13983907029017564, "grad_norm": 1.3756240606307983, "learning_rate": 9.687058871416952e-06, "loss": 0.6576, "step": 11696 }, { "epoch": 0.13985102643503627, "grad_norm": 1.9879697561264038, "learning_rate": 9.68699144599438e-06, "loss": 0.6458, "step": 11697 }, { "epoch": 0.13986298257989693, "grad_norm": 2.9378812313079834, "learning_rate": 9.686924013543646e-06, "loss": 0.6883, "step": 11698 }, { "epoch": 0.1398749387247576, "grad_norm": 2.079179048538208, "learning_rate": 9.686856574064854e-06, "loss": 0.6015, "step": 11699 }, { "epoch": 0.13988689486961825, "grad_norm": 7.014050006866455, "learning_rate": 9.686789127558102e-06, "loss": 0.6125, "step": 11700 }, { "epoch": 0.13989885101447888, "grad_norm": 1.4056378602981567, "learning_rate": 9.686721674023494e-06, "loss": 0.6022, "step": 11701 }, { "epoch": 0.13991080715933954, "grad_norm": 2.516512393951416, "learning_rate": 9.686654213461131e-06, "loss": 0.6359, "step": 11702 }, { "epoch": 0.1399227633042002, "grad_norm": 3.0782620906829834, "learning_rate": 9.686586745871111e-06, "loss": 0.6254, "step": 11703 }, { "epoch": 0.13993471944906086, "grad_norm": 2.7479913234710693, "learning_rate": 9.686519271253541e-06, "loss": 0.6967, "step": 11704 }, { "epoch": 0.1399466755939215, "grad_norm": 5.484256267547607, "learning_rate": 9.686451789608515e-06, "loss": 0.6118, "step": 11705 }, { "epoch": 0.13995863173878215, "grad_norm": 2.0976078510284424, "learning_rate": 9.68638430093614e-06, "loss": 0.5911, "step": 11706 }, { "epoch": 0.1399705878836428, "grad_norm": 2.1180524826049805, "learning_rate": 9.686316805236516e-06, "loss": 0.6246, "step": 11707 }, { "epoch": 0.13998254402850344, "grad_norm": 2.6915435791015625, "learning_rate": 9.686249302509741e-06, "loss": 0.5969, "step": 11708 }, { "epoch": 0.1399945001733641, "grad_norm": 4.0999836921691895, "learning_rate": 9.68618179275592e-06, "loss": 0.5663, "step": 11709 }, { "epoch": 0.14000645631822475, "grad_norm": 3.182072877883911, "learning_rate": 9.68611427597515e-06, "loss": 0.6239, "step": 11710 }, { "epoch": 0.1400184124630854, "grad_norm": 2.5876262187957764, "learning_rate": 9.68604675216754e-06, "loss": 0.648, "step": 11711 }, { "epoch": 0.14003036860794604, "grad_norm": 4.149425983428955, "learning_rate": 9.685979221333183e-06, "loss": 0.551, "step": 11712 }, { "epoch": 0.1400423247528067, "grad_norm": 2.1430115699768066, "learning_rate": 9.685911683472184e-06, "loss": 0.6871, "step": 11713 }, { "epoch": 0.14005428089766736, "grad_norm": 4.594332218170166, "learning_rate": 9.685844138584643e-06, "loss": 0.6471, "step": 11714 }, { "epoch": 0.14006623704252802, "grad_norm": 1.9707612991333008, "learning_rate": 9.685776586670663e-06, "loss": 0.6442, "step": 11715 }, { "epoch": 0.14007819318738865, "grad_norm": 1.6520962715148926, "learning_rate": 9.685709027730345e-06, "loss": 0.6994, "step": 11716 }, { "epoch": 0.1400901493322493, "grad_norm": 3.0025100708007812, "learning_rate": 9.685641461763789e-06, "loss": 0.6024, "step": 11717 }, { "epoch": 0.14010210547710997, "grad_norm": 6.800235271453857, "learning_rate": 9.685573888771096e-06, "loss": 0.6593, "step": 11718 }, { "epoch": 0.1401140616219706, "grad_norm": 5.025818347930908, "learning_rate": 9.68550630875237e-06, "loss": 0.536, "step": 11719 }, { "epoch": 0.14012601776683126, "grad_norm": 2.3696603775024414, "learning_rate": 9.68543872170771e-06, "loss": 0.5909, "step": 11720 }, { "epoch": 0.14013797391169192, "grad_norm": 2.3635380268096924, "learning_rate": 9.685371127637217e-06, "loss": 0.6322, "step": 11721 }, { "epoch": 0.14014993005655257, "grad_norm": 1.384291648864746, "learning_rate": 9.685303526540995e-06, "loss": 0.6671, "step": 11722 }, { "epoch": 0.1401618862014132, "grad_norm": 3.0594420433044434, "learning_rate": 9.685235918419142e-06, "loss": 0.6689, "step": 11723 }, { "epoch": 0.14017384234627386, "grad_norm": 2.1178276538848877, "learning_rate": 9.685168303271762e-06, "loss": 0.5976, "step": 11724 }, { "epoch": 0.14018579849113452, "grad_norm": 10.6519136428833, "learning_rate": 9.685100681098956e-06, "loss": 0.648, "step": 11725 }, { "epoch": 0.14019775463599518, "grad_norm": 2.6461727619171143, "learning_rate": 9.685033051900822e-06, "loss": 0.6797, "step": 11726 }, { "epoch": 0.1402097107808558, "grad_norm": 2.9607889652252197, "learning_rate": 9.684965415677467e-06, "loss": 0.6681, "step": 11727 }, { "epoch": 0.14022166692571647, "grad_norm": 1.712568759918213, "learning_rate": 9.68489777242899e-06, "loss": 0.6506, "step": 11728 }, { "epoch": 0.14023362307057713, "grad_norm": 2.186082601547241, "learning_rate": 9.684830122155491e-06, "loss": 0.7798, "step": 11729 }, { "epoch": 0.1402455792154378, "grad_norm": 2.9689934253692627, "learning_rate": 9.684762464857074e-06, "loss": 0.6385, "step": 11730 }, { "epoch": 0.14025753536029842, "grad_norm": 6.3809404373168945, "learning_rate": 9.684694800533838e-06, "loss": 0.712, "step": 11731 }, { "epoch": 0.14026949150515908, "grad_norm": 3.3172366619110107, "learning_rate": 9.684627129185885e-06, "loss": 0.5624, "step": 11732 }, { "epoch": 0.14028144765001974, "grad_norm": 2.000793933868408, "learning_rate": 9.684559450813318e-06, "loss": 0.5879, "step": 11733 }, { "epoch": 0.14029340379488037, "grad_norm": 1.6574809551239014, "learning_rate": 9.684491765416236e-06, "loss": 0.6173, "step": 11734 }, { "epoch": 0.14030535993974103, "grad_norm": 2.017336368560791, "learning_rate": 9.684424072994742e-06, "loss": 0.6549, "step": 11735 }, { "epoch": 0.14031731608460168, "grad_norm": 2.8769168853759766, "learning_rate": 9.68435637354894e-06, "loss": 0.6691, "step": 11736 }, { "epoch": 0.14032927222946234, "grad_norm": 1.9894685745239258, "learning_rate": 9.684288667078928e-06, "loss": 0.6688, "step": 11737 }, { "epoch": 0.14034122837432297, "grad_norm": 4.577498435974121, "learning_rate": 9.684220953584806e-06, "loss": 0.6375, "step": 11738 }, { "epoch": 0.14035318451918363, "grad_norm": 1.7876133918762207, "learning_rate": 9.68415323306668e-06, "loss": 0.7256, "step": 11739 }, { "epoch": 0.1403651406640443, "grad_norm": 3.0624351501464844, "learning_rate": 9.68408550552465e-06, "loss": 0.5642, "step": 11740 }, { "epoch": 0.14037709680890495, "grad_norm": 3.148740530014038, "learning_rate": 9.684017770958815e-06, "loss": 0.7128, "step": 11741 }, { "epoch": 0.14038905295376558, "grad_norm": 1.8505327701568604, "learning_rate": 9.68395002936928e-06, "loss": 0.651, "step": 11742 }, { "epoch": 0.14040100909862624, "grad_norm": 2.5181100368499756, "learning_rate": 9.683882280756148e-06, "loss": 0.6429, "step": 11743 }, { "epoch": 0.1404129652434869, "grad_norm": 1.80772864818573, "learning_rate": 9.683814525119514e-06, "loss": 0.5813, "step": 11744 }, { "epoch": 0.14042492138834753, "grad_norm": 9.260807037353516, "learning_rate": 9.683746762459485e-06, "loss": 0.6324, "step": 11745 }, { "epoch": 0.1404368775332082, "grad_norm": 2.0813210010528564, "learning_rate": 9.68367899277616e-06, "loss": 0.5624, "step": 11746 }, { "epoch": 0.14044883367806885, "grad_norm": 2.0476131439208984, "learning_rate": 9.683611216069643e-06, "loss": 0.6901, "step": 11747 }, { "epoch": 0.1404607898229295, "grad_norm": 5.363970756530762, "learning_rate": 9.683543432340035e-06, "loss": 0.5663, "step": 11748 }, { "epoch": 0.14047274596779014, "grad_norm": 1.7328122854232788, "learning_rate": 9.683475641587435e-06, "loss": 0.6102, "step": 11749 }, { "epoch": 0.1404847021126508, "grad_norm": 1.9877738952636719, "learning_rate": 9.683407843811947e-06, "loss": 0.6708, "step": 11750 }, { "epoch": 0.14049665825751145, "grad_norm": 1.8393447399139404, "learning_rate": 9.683340039013674e-06, "loss": 0.5896, "step": 11751 }, { "epoch": 0.1405086144023721, "grad_norm": 1.8050272464752197, "learning_rate": 9.683272227192715e-06, "loss": 0.647, "step": 11752 }, { "epoch": 0.14052057054723274, "grad_norm": 2.157478094100952, "learning_rate": 9.683204408349171e-06, "loss": 0.6281, "step": 11753 }, { "epoch": 0.1405325266920934, "grad_norm": 2.612136125564575, "learning_rate": 9.683136582483147e-06, "loss": 0.5813, "step": 11754 }, { "epoch": 0.14054448283695406, "grad_norm": 3.0846173763275146, "learning_rate": 9.683068749594744e-06, "loss": 0.5082, "step": 11755 }, { "epoch": 0.1405564389818147, "grad_norm": 2.707615613937378, "learning_rate": 9.683000909684061e-06, "loss": 0.6376, "step": 11756 }, { "epoch": 0.14056839512667535, "grad_norm": 2.0774080753326416, "learning_rate": 9.682933062751203e-06, "loss": 0.5793, "step": 11757 }, { "epoch": 0.140580351271536, "grad_norm": 4.037118911743164, "learning_rate": 9.682865208796269e-06, "loss": 0.6272, "step": 11758 }, { "epoch": 0.14059230741639667, "grad_norm": 4.4383063316345215, "learning_rate": 9.682797347819362e-06, "loss": 0.6336, "step": 11759 }, { "epoch": 0.1406042635612573, "grad_norm": 1.9589359760284424, "learning_rate": 9.682729479820584e-06, "loss": 0.5845, "step": 11760 }, { "epoch": 0.14061621970611796, "grad_norm": 2.79313325881958, "learning_rate": 9.682661604800037e-06, "loss": 0.5785, "step": 11761 }, { "epoch": 0.14062817585097862, "grad_norm": 2.0417897701263428, "learning_rate": 9.682593722757822e-06, "loss": 0.6395, "step": 11762 }, { "epoch": 0.14064013199583927, "grad_norm": 2.6022496223449707, "learning_rate": 9.682525833694041e-06, "loss": 0.6825, "step": 11763 }, { "epoch": 0.1406520881406999, "grad_norm": 2.410752773284912, "learning_rate": 9.682457937608796e-06, "loss": 0.6511, "step": 11764 }, { "epoch": 0.14066404428556056, "grad_norm": 1.6982171535491943, "learning_rate": 9.682390034502186e-06, "loss": 0.7439, "step": 11765 }, { "epoch": 0.14067600043042122, "grad_norm": 2.4851419925689697, "learning_rate": 9.682322124374319e-06, "loss": 0.692, "step": 11766 }, { "epoch": 0.14068795657528185, "grad_norm": 2.8917396068573, "learning_rate": 9.682254207225292e-06, "loss": 0.5637, "step": 11767 }, { "epoch": 0.1406999127201425, "grad_norm": 1.8220957517623901, "learning_rate": 9.682186283055208e-06, "loss": 0.5528, "step": 11768 }, { "epoch": 0.14071186886500317, "grad_norm": 1.9007128477096558, "learning_rate": 9.68211835186417e-06, "loss": 0.6522, "step": 11769 }, { "epoch": 0.14072382500986383, "grad_norm": 1.8496040105819702, "learning_rate": 9.682050413652279e-06, "loss": 0.6013, "step": 11770 }, { "epoch": 0.14073578115472446, "grad_norm": 1.6651785373687744, "learning_rate": 9.681982468419636e-06, "loss": 0.6417, "step": 11771 }, { "epoch": 0.14074773729958512, "grad_norm": 1.619741678237915, "learning_rate": 9.681914516166345e-06, "loss": 0.6402, "step": 11772 }, { "epoch": 0.14075969344444578, "grad_norm": 1.6117229461669922, "learning_rate": 9.681846556892504e-06, "loss": 0.5417, "step": 11773 }, { "epoch": 0.14077164958930644, "grad_norm": 1.5976811647415161, "learning_rate": 9.68177859059822e-06, "loss": 0.5464, "step": 11774 }, { "epoch": 0.14078360573416707, "grad_norm": 1.735327959060669, "learning_rate": 9.68171061728359e-06, "loss": 0.6407, "step": 11775 }, { "epoch": 0.14079556187902773, "grad_norm": 2.8739876747131348, "learning_rate": 9.681642636948719e-06, "loss": 0.6845, "step": 11776 }, { "epoch": 0.14080751802388838, "grad_norm": 2.1324684619903564, "learning_rate": 9.681574649593709e-06, "loss": 0.6629, "step": 11777 }, { "epoch": 0.14081947416874901, "grad_norm": 1.7429102659225464, "learning_rate": 9.68150665521866e-06, "loss": 0.5724, "step": 11778 }, { "epoch": 0.14083143031360967, "grad_norm": 1.9513335227966309, "learning_rate": 9.681438653823676e-06, "loss": 0.6349, "step": 11779 }, { "epoch": 0.14084338645847033, "grad_norm": 1.9395406246185303, "learning_rate": 9.681370645408858e-06, "loss": 0.5961, "step": 11780 }, { "epoch": 0.140855342603331, "grad_norm": 3.171978712081909, "learning_rate": 9.681302629974309e-06, "loss": 0.6408, "step": 11781 }, { "epoch": 0.14086729874819162, "grad_norm": 2.4880030155181885, "learning_rate": 9.681234607520129e-06, "loss": 0.5893, "step": 11782 }, { "epoch": 0.14087925489305228, "grad_norm": 3.684330463409424, "learning_rate": 9.681166578046422e-06, "loss": 0.638, "step": 11783 }, { "epoch": 0.14089121103791294, "grad_norm": 2.690856456756592, "learning_rate": 9.681098541553287e-06, "loss": 0.6118, "step": 11784 }, { "epoch": 0.1409031671827736, "grad_norm": 2.3093855381011963, "learning_rate": 9.681030498040829e-06, "loss": 0.5634, "step": 11785 }, { "epoch": 0.14091512332763423, "grad_norm": 2.88362455368042, "learning_rate": 9.68096244750915e-06, "loss": 0.5675, "step": 11786 }, { "epoch": 0.1409270794724949, "grad_norm": 1.9614564180374146, "learning_rate": 9.68089438995835e-06, "loss": 0.6342, "step": 11787 }, { "epoch": 0.14093903561735555, "grad_norm": 1.8508919477462769, "learning_rate": 9.680826325388533e-06, "loss": 0.7326, "step": 11788 }, { "epoch": 0.1409509917622162, "grad_norm": 2.9383716583251953, "learning_rate": 9.6807582537998e-06, "loss": 0.5966, "step": 11789 }, { "epoch": 0.14096294790707684, "grad_norm": 10.46573257446289, "learning_rate": 9.680690175192254e-06, "loss": 0.615, "step": 11790 }, { "epoch": 0.1409749040519375, "grad_norm": 3.443286180496216, "learning_rate": 9.680622089565996e-06, "loss": 0.608, "step": 11791 }, { "epoch": 0.14098686019679815, "grad_norm": 2.8372902870178223, "learning_rate": 9.68055399692113e-06, "loss": 0.7145, "step": 11792 }, { "epoch": 0.14099881634165878, "grad_norm": 2.8991100788116455, "learning_rate": 9.680485897257755e-06, "loss": 0.6664, "step": 11793 }, { "epoch": 0.14101077248651944, "grad_norm": 4.461805820465088, "learning_rate": 9.680417790575975e-06, "loss": 0.6606, "step": 11794 }, { "epoch": 0.1410227286313801, "grad_norm": 2.3884682655334473, "learning_rate": 9.68034967687589e-06, "loss": 0.6397, "step": 11795 }, { "epoch": 0.14103468477624076, "grad_norm": 1.7251253128051758, "learning_rate": 9.680281556157607e-06, "loss": 0.602, "step": 11796 }, { "epoch": 0.1410466409211014, "grad_norm": 1.788934350013733, "learning_rate": 9.680213428421223e-06, "loss": 0.6761, "step": 11797 }, { "epoch": 0.14105859706596205, "grad_norm": 2.112365961074829, "learning_rate": 9.680145293666845e-06, "loss": 0.5883, "step": 11798 }, { "epoch": 0.1410705532108227, "grad_norm": 2.533700466156006, "learning_rate": 9.68007715189457e-06, "loss": 0.5947, "step": 11799 }, { "epoch": 0.14108250935568337, "grad_norm": 4.420506477355957, "learning_rate": 9.680009003104503e-06, "loss": 0.5969, "step": 11800 }, { "epoch": 0.141094465500544, "grad_norm": 2.3617053031921387, "learning_rate": 9.679940847296747e-06, "loss": 0.6287, "step": 11801 }, { "epoch": 0.14110642164540466, "grad_norm": 4.507275104522705, "learning_rate": 9.679872684471402e-06, "loss": 0.5125, "step": 11802 }, { "epoch": 0.14111837779026531, "grad_norm": 2.9732284545898438, "learning_rate": 9.679804514628573e-06, "loss": 0.662, "step": 11803 }, { "epoch": 0.14113033393512595, "grad_norm": 2.3233251571655273, "learning_rate": 9.679736337768359e-06, "loss": 0.5539, "step": 11804 }, { "epoch": 0.1411422900799866, "grad_norm": 2.2782905101776123, "learning_rate": 9.679668153890862e-06, "loss": 0.6612, "step": 11805 }, { "epoch": 0.14115424622484726, "grad_norm": 2.782386064529419, "learning_rate": 9.67959996299619e-06, "loss": 0.6102, "step": 11806 }, { "epoch": 0.14116620236970792, "grad_norm": 2.766601800918579, "learning_rate": 9.679531765084438e-06, "loss": 0.6221, "step": 11807 }, { "epoch": 0.14117815851456855, "grad_norm": 3.1225333213806152, "learning_rate": 9.679463560155714e-06, "loss": 0.596, "step": 11808 }, { "epoch": 0.1411901146594292, "grad_norm": 1.5077767372131348, "learning_rate": 9.679395348210116e-06, "loss": 0.6772, "step": 11809 }, { "epoch": 0.14120207080428987, "grad_norm": 5.490828037261963, "learning_rate": 9.679327129247749e-06, "loss": 0.6589, "step": 11810 }, { "epoch": 0.14121402694915053, "grad_norm": 1.4480750560760498, "learning_rate": 9.679258903268714e-06, "loss": 0.6234, "step": 11811 }, { "epoch": 0.14122598309401116, "grad_norm": 2.7751011848449707, "learning_rate": 9.679190670273112e-06, "loss": 0.7072, "step": 11812 }, { "epoch": 0.14123793923887182, "grad_norm": 2.220172166824341, "learning_rate": 9.679122430261048e-06, "loss": 0.5494, "step": 11813 }, { "epoch": 0.14124989538373248, "grad_norm": 2.1505517959594727, "learning_rate": 9.679054183232625e-06, "loss": 0.6418, "step": 11814 }, { "epoch": 0.1412618515285931, "grad_norm": 2.4102137088775635, "learning_rate": 9.678985929187942e-06, "loss": 0.6522, "step": 11815 }, { "epoch": 0.14127380767345377, "grad_norm": 2.0029656887054443, "learning_rate": 9.678917668127103e-06, "loss": 0.6061, "step": 11816 }, { "epoch": 0.14128576381831442, "grad_norm": 42.25804138183594, "learning_rate": 9.67884940005021e-06, "loss": 0.5079, "step": 11817 }, { "epoch": 0.14129771996317508, "grad_norm": 2.193974018096924, "learning_rate": 9.678781124957366e-06, "loss": 0.5426, "step": 11818 }, { "epoch": 0.14130967610803571, "grad_norm": 2.80544114112854, "learning_rate": 9.678712842848674e-06, "loss": 0.5842, "step": 11819 }, { "epoch": 0.14132163225289637, "grad_norm": 2.6885673999786377, "learning_rate": 9.678644553724234e-06, "loss": 0.6987, "step": 11820 }, { "epoch": 0.14133358839775703, "grad_norm": 2.3614132404327393, "learning_rate": 9.678576257584151e-06, "loss": 0.6521, "step": 11821 }, { "epoch": 0.1413455445426177, "grad_norm": 2.3937106132507324, "learning_rate": 9.678507954428527e-06, "loss": 0.5315, "step": 11822 }, { "epoch": 0.14135750068747832, "grad_norm": 1.8006184101104736, "learning_rate": 9.678439644257463e-06, "loss": 0.6959, "step": 11823 }, { "epoch": 0.14136945683233898, "grad_norm": 1.9767295122146606, "learning_rate": 9.67837132707106e-06, "loss": 0.5228, "step": 11824 }, { "epoch": 0.14138141297719964, "grad_norm": 2.0423636436462402, "learning_rate": 9.678303002869424e-06, "loss": 0.6337, "step": 11825 }, { "epoch": 0.14139336912206027, "grad_norm": 1.852014422416687, "learning_rate": 9.678234671652658e-06, "loss": 0.5236, "step": 11826 }, { "epoch": 0.14140532526692093, "grad_norm": 2.935006618499756, "learning_rate": 9.678166333420859e-06, "loss": 0.5518, "step": 11827 }, { "epoch": 0.1414172814117816, "grad_norm": 27.324695587158203, "learning_rate": 9.678097988174135e-06, "loss": 0.5879, "step": 11828 }, { "epoch": 0.14142923755664225, "grad_norm": 14.879343032836914, "learning_rate": 9.678029635912588e-06, "loss": 0.5833, "step": 11829 }, { "epoch": 0.14144119370150288, "grad_norm": 2.7880940437316895, "learning_rate": 9.677961276636317e-06, "loss": 0.5974, "step": 11830 }, { "epoch": 0.14145314984636354, "grad_norm": 3.5576529502868652, "learning_rate": 9.677892910345425e-06, "loss": 0.6814, "step": 11831 }, { "epoch": 0.1414651059912242, "grad_norm": 2.3341965675354004, "learning_rate": 9.677824537040018e-06, "loss": 0.6971, "step": 11832 }, { "epoch": 0.14147706213608485, "grad_norm": 8.638009071350098, "learning_rate": 9.677756156720198e-06, "loss": 0.615, "step": 11833 }, { "epoch": 0.14148901828094548, "grad_norm": 1.7684866189956665, "learning_rate": 9.677687769386062e-06, "loss": 0.547, "step": 11834 }, { "epoch": 0.14150097442580614, "grad_norm": 1.9415003061294556, "learning_rate": 9.67761937503772e-06, "loss": 0.6096, "step": 11835 }, { "epoch": 0.1415129305706668, "grad_norm": 1.517182469367981, "learning_rate": 9.67755097367527e-06, "loss": 0.5496, "step": 11836 }, { "epoch": 0.14152488671552743, "grad_norm": 2.48344087600708, "learning_rate": 9.677482565298814e-06, "loss": 0.6685, "step": 11837 }, { "epoch": 0.1415368428603881, "grad_norm": 2.0946695804595947, "learning_rate": 9.677414149908457e-06, "loss": 0.6001, "step": 11838 }, { "epoch": 0.14154879900524875, "grad_norm": 2.1071951389312744, "learning_rate": 9.677345727504302e-06, "loss": 0.5962, "step": 11839 }, { "epoch": 0.1415607551501094, "grad_norm": 2.9493801593780518, "learning_rate": 9.677277298086449e-06, "loss": 0.7203, "step": 11840 }, { "epoch": 0.14157271129497004, "grad_norm": 3.4158685207366943, "learning_rate": 9.677208861655004e-06, "loss": 0.6794, "step": 11841 }, { "epoch": 0.1415846674398307, "grad_norm": 2.727696657180786, "learning_rate": 9.677140418210067e-06, "loss": 0.6325, "step": 11842 }, { "epoch": 0.14159662358469136, "grad_norm": 2.257948398590088, "learning_rate": 9.67707196775174e-06, "loss": 0.7191, "step": 11843 }, { "epoch": 0.14160857972955201, "grad_norm": 2.362983465194702, "learning_rate": 9.677003510280129e-06, "loss": 0.579, "step": 11844 }, { "epoch": 0.14162053587441265, "grad_norm": 2.2846240997314453, "learning_rate": 9.676935045795334e-06, "loss": 0.6101, "step": 11845 }, { "epoch": 0.1416324920192733, "grad_norm": 1.7878187894821167, "learning_rate": 9.676866574297458e-06, "loss": 0.5606, "step": 11846 }, { "epoch": 0.14164444816413396, "grad_norm": 1.6814296245574951, "learning_rate": 9.676798095786604e-06, "loss": 0.6828, "step": 11847 }, { "epoch": 0.14165640430899462, "grad_norm": 4.49492883682251, "learning_rate": 9.676729610262874e-06, "loss": 0.6148, "step": 11848 }, { "epoch": 0.14166836045385525, "grad_norm": 1.372219443321228, "learning_rate": 9.676661117726372e-06, "loss": 0.6816, "step": 11849 }, { "epoch": 0.1416803165987159, "grad_norm": 2.1452925205230713, "learning_rate": 9.6765926181772e-06, "loss": 0.6018, "step": 11850 }, { "epoch": 0.14169227274357657, "grad_norm": 5.814630031585693, "learning_rate": 9.67652411161546e-06, "loss": 0.5791, "step": 11851 }, { "epoch": 0.1417042288884372, "grad_norm": 1.5605971813201904, "learning_rate": 9.676455598041256e-06, "loss": 0.5748, "step": 11852 }, { "epoch": 0.14171618503329786, "grad_norm": 2.3237600326538086, "learning_rate": 9.67638707745469e-06, "loss": 0.6307, "step": 11853 }, { "epoch": 0.14172814117815852, "grad_norm": 1.8539079427719116, "learning_rate": 9.676318549855866e-06, "loss": 0.6705, "step": 11854 }, { "epoch": 0.14174009732301918, "grad_norm": 2.5494794845581055, "learning_rate": 9.676250015244884e-06, "loss": 0.6177, "step": 11855 }, { "epoch": 0.1417520534678798, "grad_norm": 3.951120138168335, "learning_rate": 9.676181473621849e-06, "loss": 0.6984, "step": 11856 }, { "epoch": 0.14176400961274047, "grad_norm": 1.9230537414550781, "learning_rate": 9.676112924986865e-06, "loss": 0.6248, "step": 11857 }, { "epoch": 0.14177596575760112, "grad_norm": 16.581130981445312, "learning_rate": 9.676044369340032e-06, "loss": 0.6358, "step": 11858 }, { "epoch": 0.14178792190246178, "grad_norm": 2.017932653427124, "learning_rate": 9.675975806681453e-06, "loss": 0.5829, "step": 11859 }, { "epoch": 0.14179987804732241, "grad_norm": 2.143110990524292, "learning_rate": 9.675907237011232e-06, "loss": 0.603, "step": 11860 }, { "epoch": 0.14181183419218307, "grad_norm": 2.8293163776397705, "learning_rate": 9.675838660329471e-06, "loss": 0.6211, "step": 11861 }, { "epoch": 0.14182379033704373, "grad_norm": 3.6212313175201416, "learning_rate": 9.675770076636274e-06, "loss": 0.5826, "step": 11862 }, { "epoch": 0.14183574648190436, "grad_norm": 1.7882403135299683, "learning_rate": 9.675701485931744e-06, "loss": 0.643, "step": 11863 }, { "epoch": 0.14184770262676502, "grad_norm": 2.92830753326416, "learning_rate": 9.675632888215982e-06, "loss": 0.6002, "step": 11864 }, { "epoch": 0.14185965877162568, "grad_norm": 6.255271911621094, "learning_rate": 9.675564283489092e-06, "loss": 0.679, "step": 11865 }, { "epoch": 0.14187161491648634, "grad_norm": 1.5176067352294922, "learning_rate": 9.675495671751177e-06, "loss": 0.7002, "step": 11866 }, { "epoch": 0.14188357106134697, "grad_norm": 1.4769726991653442, "learning_rate": 9.675427053002339e-06, "loss": 0.527, "step": 11867 }, { "epoch": 0.14189552720620763, "grad_norm": 2.3443472385406494, "learning_rate": 9.675358427242682e-06, "loss": 0.5933, "step": 11868 }, { "epoch": 0.1419074833510683, "grad_norm": 2.040989637374878, "learning_rate": 9.675289794472309e-06, "loss": 0.6164, "step": 11869 }, { "epoch": 0.14191943949592895, "grad_norm": 1.8176575899124146, "learning_rate": 9.67522115469132e-06, "loss": 0.5754, "step": 11870 }, { "epoch": 0.14193139564078958, "grad_norm": 4.196867942810059, "learning_rate": 9.675152507899822e-06, "loss": 0.7384, "step": 11871 }, { "epoch": 0.14194335178565023, "grad_norm": 2.625760555267334, "learning_rate": 9.675083854097917e-06, "loss": 0.6012, "step": 11872 }, { "epoch": 0.1419553079305109, "grad_norm": 1.861846923828125, "learning_rate": 9.675015193285706e-06, "loss": 0.6746, "step": 11873 }, { "epoch": 0.14196726407537152, "grad_norm": 4.02211332321167, "learning_rate": 9.674946525463293e-06, "loss": 0.6317, "step": 11874 }, { "epoch": 0.14197922022023218, "grad_norm": 4.074284553527832, "learning_rate": 9.674877850630782e-06, "loss": 0.6439, "step": 11875 }, { "epoch": 0.14199117636509284, "grad_norm": 1.6548149585723877, "learning_rate": 9.674809168788275e-06, "loss": 0.6077, "step": 11876 }, { "epoch": 0.1420031325099535, "grad_norm": 4.123317718505859, "learning_rate": 9.674740479935873e-06, "loss": 0.6809, "step": 11877 }, { "epoch": 0.14201508865481413, "grad_norm": 1.7966179847717285, "learning_rate": 9.674671784073684e-06, "loss": 0.6196, "step": 11878 }, { "epoch": 0.1420270447996748, "grad_norm": 3.884183168411255, "learning_rate": 9.674603081201805e-06, "loss": 0.5706, "step": 11879 }, { "epoch": 0.14203900094453545, "grad_norm": 1.887467861175537, "learning_rate": 9.674534371320345e-06, "loss": 0.5837, "step": 11880 }, { "epoch": 0.1420509570893961, "grad_norm": 2.8857169151306152, "learning_rate": 9.674465654429401e-06, "loss": 0.6053, "step": 11881 }, { "epoch": 0.14206291323425674, "grad_norm": 1.9701228141784668, "learning_rate": 9.674396930529082e-06, "loss": 0.5875, "step": 11882 }, { "epoch": 0.1420748693791174, "grad_norm": 1.7024836540222168, "learning_rate": 9.674328199619487e-06, "loss": 0.5522, "step": 11883 }, { "epoch": 0.14208682552397806, "grad_norm": 1.413533329963684, "learning_rate": 9.67425946170072e-06, "loss": 0.5735, "step": 11884 }, { "epoch": 0.1420987816688387, "grad_norm": 4.047450542449951, "learning_rate": 9.674190716772884e-06, "loss": 0.5703, "step": 11885 }, { "epoch": 0.14211073781369934, "grad_norm": 2.3106131553649902, "learning_rate": 9.674121964836084e-06, "loss": 0.6042, "step": 11886 }, { "epoch": 0.14212269395856, "grad_norm": 1.91390860080719, "learning_rate": 9.674053205890422e-06, "loss": 0.5517, "step": 11887 }, { "epoch": 0.14213465010342066, "grad_norm": 6.571054458618164, "learning_rate": 9.673984439935998e-06, "loss": 0.6196, "step": 11888 }, { "epoch": 0.1421466062482813, "grad_norm": 1.849228858947754, "learning_rate": 9.673915666972918e-06, "loss": 0.6179, "step": 11889 }, { "epoch": 0.14215856239314195, "grad_norm": 2.6151115894317627, "learning_rate": 9.673846887001286e-06, "loss": 0.646, "step": 11890 }, { "epoch": 0.1421705185380026, "grad_norm": 2.1769213676452637, "learning_rate": 9.673778100021203e-06, "loss": 0.6811, "step": 11891 }, { "epoch": 0.14218247468286327, "grad_norm": 5.69650936126709, "learning_rate": 9.673709306032775e-06, "loss": 0.7248, "step": 11892 }, { "epoch": 0.1421944308277239, "grad_norm": 2.1218833923339844, "learning_rate": 9.673640505036101e-06, "loss": 0.6091, "step": 11893 }, { "epoch": 0.14220638697258456, "grad_norm": 1.6151988506317139, "learning_rate": 9.673571697031287e-06, "loss": 0.5641, "step": 11894 }, { "epoch": 0.14221834311744522, "grad_norm": 3.531388282775879, "learning_rate": 9.673502882018437e-06, "loss": 0.5916, "step": 11895 }, { "epoch": 0.14223029926230588, "grad_norm": 3.6267223358154297, "learning_rate": 9.673434059997654e-06, "loss": 0.6602, "step": 11896 }, { "epoch": 0.1422422554071665, "grad_norm": 1.9016317129135132, "learning_rate": 9.673365230969036e-06, "loss": 0.6669, "step": 11897 }, { "epoch": 0.14225421155202717, "grad_norm": 1.849643349647522, "learning_rate": 9.673296394932691e-06, "loss": 0.5576, "step": 11898 }, { "epoch": 0.14226616769688782, "grad_norm": 3.345181703567505, "learning_rate": 9.673227551888724e-06, "loss": 0.6572, "step": 11899 }, { "epoch": 0.14227812384174846, "grad_norm": 3.6269137859344482, "learning_rate": 9.673158701837235e-06, "loss": 0.553, "step": 11900 }, { "epoch": 0.1422900799866091, "grad_norm": 2.9048826694488525, "learning_rate": 9.673089844778327e-06, "loss": 0.6098, "step": 11901 }, { "epoch": 0.14230203613146977, "grad_norm": 2.1914222240448, "learning_rate": 9.673020980712104e-06, "loss": 0.5552, "step": 11902 }, { "epoch": 0.14231399227633043, "grad_norm": 1.8387830257415771, "learning_rate": 9.67295210963867e-06, "loss": 0.5692, "step": 11903 }, { "epoch": 0.14232594842119106, "grad_norm": 7.120871543884277, "learning_rate": 9.672883231558127e-06, "loss": 0.5842, "step": 11904 }, { "epoch": 0.14233790456605172, "grad_norm": 7.437304973602295, "learning_rate": 9.67281434647058e-06, "loss": 0.6254, "step": 11905 }, { "epoch": 0.14234986071091238, "grad_norm": 3.3187570571899414, "learning_rate": 9.672745454376131e-06, "loss": 0.5912, "step": 11906 }, { "epoch": 0.14236181685577304, "grad_norm": 2.5351486206054688, "learning_rate": 9.672676555274882e-06, "loss": 0.5768, "step": 11907 }, { "epoch": 0.14237377300063367, "grad_norm": 1.5648629665374756, "learning_rate": 9.67260764916694e-06, "loss": 0.538, "step": 11908 }, { "epoch": 0.14238572914549433, "grad_norm": 6.113496780395508, "learning_rate": 9.672538736052405e-06, "loss": 0.6724, "step": 11909 }, { "epoch": 0.14239768529035499, "grad_norm": 8.437667846679688, "learning_rate": 9.672469815931383e-06, "loss": 0.626, "step": 11910 }, { "epoch": 0.14240964143521562, "grad_norm": 2.3369197845458984, "learning_rate": 9.672400888803975e-06, "loss": 0.5329, "step": 11911 }, { "epoch": 0.14242159758007628, "grad_norm": 1.516746163368225, "learning_rate": 9.672331954670286e-06, "loss": 0.6309, "step": 11912 }, { "epoch": 0.14243355372493693, "grad_norm": 2.5442564487457275, "learning_rate": 9.672263013530416e-06, "loss": 0.5891, "step": 11913 }, { "epoch": 0.1424455098697976, "grad_norm": 2.2745096683502197, "learning_rate": 9.672194065384474e-06, "loss": 0.6197, "step": 11914 }, { "epoch": 0.14245746601465822, "grad_norm": 1.7926480770111084, "learning_rate": 9.67212511023256e-06, "loss": 0.6394, "step": 11915 }, { "epoch": 0.14246942215951888, "grad_norm": 2.55853271484375, "learning_rate": 9.672056148074776e-06, "loss": 0.6298, "step": 11916 }, { "epoch": 0.14248137830437954, "grad_norm": 6.633262634277344, "learning_rate": 9.671987178911228e-06, "loss": 0.6502, "step": 11917 }, { "epoch": 0.1424933344492402, "grad_norm": 3.0736441612243652, "learning_rate": 9.671918202742019e-06, "loss": 0.7303, "step": 11918 }, { "epoch": 0.14250529059410083, "grad_norm": 5.34234094619751, "learning_rate": 9.671849219567252e-06, "loss": 0.6211, "step": 11919 }, { "epoch": 0.1425172467389615, "grad_norm": 1.8059769868850708, "learning_rate": 9.671780229387032e-06, "loss": 0.7114, "step": 11920 }, { "epoch": 0.14252920288382215, "grad_norm": 3.2004756927490234, "learning_rate": 9.671711232201458e-06, "loss": 0.5622, "step": 11921 }, { "epoch": 0.14254115902868278, "grad_norm": 2.3074915409088135, "learning_rate": 9.671642228010638e-06, "loss": 0.7057, "step": 11922 }, { "epoch": 0.14255311517354344, "grad_norm": 3.052676200866699, "learning_rate": 9.671573216814673e-06, "loss": 0.71, "step": 11923 }, { "epoch": 0.1425650713184041, "grad_norm": 2.19689679145813, "learning_rate": 9.671504198613668e-06, "loss": 0.5375, "step": 11924 }, { "epoch": 0.14257702746326475, "grad_norm": 1.6835373640060425, "learning_rate": 9.671435173407727e-06, "loss": 0.6263, "step": 11925 }, { "epoch": 0.14258898360812539, "grad_norm": 2.558573007583618, "learning_rate": 9.671366141196949e-06, "loss": 0.5907, "step": 11926 }, { "epoch": 0.14260093975298604, "grad_norm": 3.1984710693359375, "learning_rate": 9.671297101981446e-06, "loss": 0.6758, "step": 11927 }, { "epoch": 0.1426128958978467, "grad_norm": 2.163466453552246, "learning_rate": 9.671228055761312e-06, "loss": 0.6285, "step": 11928 }, { "epoch": 0.14262485204270736, "grad_norm": 3.0346572399139404, "learning_rate": 9.671159002536656e-06, "loss": 0.6195, "step": 11929 }, { "epoch": 0.142636808187568, "grad_norm": 4.756256103515625, "learning_rate": 9.67108994230758e-06, "loss": 0.7145, "step": 11930 }, { "epoch": 0.14264876433242865, "grad_norm": 2.2384629249572754, "learning_rate": 9.671020875074189e-06, "loss": 0.6406, "step": 11931 }, { "epoch": 0.1426607204772893, "grad_norm": 7.147006988525391, "learning_rate": 9.670951800836586e-06, "loss": 0.6032, "step": 11932 }, { "epoch": 0.14267267662214994, "grad_norm": 1.9587618112564087, "learning_rate": 9.670882719594873e-06, "loss": 0.6546, "step": 11933 }, { "epoch": 0.1426846327670106, "grad_norm": 3.5533525943756104, "learning_rate": 9.670813631349156e-06, "loss": 0.6696, "step": 11934 }, { "epoch": 0.14269658891187126, "grad_norm": 3.178007125854492, "learning_rate": 9.670744536099537e-06, "loss": 0.4696, "step": 11935 }, { "epoch": 0.14270854505673192, "grad_norm": 2.256636381149292, "learning_rate": 9.670675433846119e-06, "loss": 0.5918, "step": 11936 }, { "epoch": 0.14272050120159255, "grad_norm": 5.167954444885254, "learning_rate": 9.670606324589007e-06, "loss": 0.6114, "step": 11937 }, { "epoch": 0.1427324573464532, "grad_norm": 5.378835201263428, "learning_rate": 9.670537208328305e-06, "loss": 0.6884, "step": 11938 }, { "epoch": 0.14274441349131387, "grad_norm": 2.3368687629699707, "learning_rate": 9.670468085064115e-06, "loss": 0.6055, "step": 11939 }, { "epoch": 0.14275636963617452, "grad_norm": 5.557821273803711, "learning_rate": 9.670398954796542e-06, "loss": 0.589, "step": 11940 }, { "epoch": 0.14276832578103515, "grad_norm": 2.971118450164795, "learning_rate": 9.67032981752569e-06, "loss": 0.5154, "step": 11941 }, { "epoch": 0.1427802819258958, "grad_norm": 1.9280892610549927, "learning_rate": 9.67026067325166e-06, "loss": 0.6619, "step": 11942 }, { "epoch": 0.14279223807075647, "grad_norm": 1.8357102870941162, "learning_rate": 9.670191521974558e-06, "loss": 0.6334, "step": 11943 }, { "epoch": 0.1428041942156171, "grad_norm": 4.204207897186279, "learning_rate": 9.670122363694488e-06, "loss": 0.5268, "step": 11944 }, { "epoch": 0.14281615036047776, "grad_norm": 2.2864251136779785, "learning_rate": 9.670053198411553e-06, "loss": 0.6485, "step": 11945 }, { "epoch": 0.14282810650533842, "grad_norm": 9.872477531433105, "learning_rate": 9.669984026125856e-06, "loss": 0.6756, "step": 11946 }, { "epoch": 0.14284006265019908, "grad_norm": 4.55291223526001, "learning_rate": 9.669914846837502e-06, "loss": 0.5896, "step": 11947 }, { "epoch": 0.1428520187950597, "grad_norm": 2.309798240661621, "learning_rate": 9.669845660546592e-06, "loss": 0.6768, "step": 11948 }, { "epoch": 0.14286397493992037, "grad_norm": 1.5126044750213623, "learning_rate": 9.669776467253234e-06, "loss": 0.6411, "step": 11949 }, { "epoch": 0.14287593108478103, "grad_norm": 1.629658818244934, "learning_rate": 9.669707266957528e-06, "loss": 0.6125, "step": 11950 }, { "epoch": 0.14288788722964169, "grad_norm": 2.4076273441314697, "learning_rate": 9.669638059659582e-06, "loss": 0.5829, "step": 11951 }, { "epoch": 0.14289984337450232, "grad_norm": 2.1086230278015137, "learning_rate": 9.669568845359495e-06, "loss": 0.6761, "step": 11952 }, { "epoch": 0.14291179951936298, "grad_norm": 1.816620111465454, "learning_rate": 9.669499624057374e-06, "loss": 0.6524, "step": 11953 }, { "epoch": 0.14292375566422363, "grad_norm": 2.203251361846924, "learning_rate": 9.66943039575332e-06, "loss": 0.5236, "step": 11954 }, { "epoch": 0.1429357118090843, "grad_norm": 1.7228004932403564, "learning_rate": 9.66936116044744e-06, "loss": 0.6856, "step": 11955 }, { "epoch": 0.14294766795394492, "grad_norm": 4.704005241394043, "learning_rate": 9.669291918139836e-06, "loss": 0.6132, "step": 11956 }, { "epoch": 0.14295962409880558, "grad_norm": 3.690857410430908, "learning_rate": 9.669222668830613e-06, "loss": 0.5462, "step": 11957 }, { "epoch": 0.14297158024366624, "grad_norm": 1.8087342977523804, "learning_rate": 9.669153412519872e-06, "loss": 0.5755, "step": 11958 }, { "epoch": 0.14298353638852687, "grad_norm": 4.44474983215332, "learning_rate": 9.669084149207722e-06, "loss": 0.6046, "step": 11959 }, { "epoch": 0.14299549253338753, "grad_norm": 2.471717119216919, "learning_rate": 9.669014878894261e-06, "loss": 0.5692, "step": 11960 }, { "epoch": 0.1430074486782482, "grad_norm": 1.5697779655456543, "learning_rate": 9.668945601579595e-06, "loss": 0.6089, "step": 11961 }, { "epoch": 0.14301940482310885, "grad_norm": 3.0107581615448, "learning_rate": 9.668876317263832e-06, "loss": 0.6483, "step": 11962 }, { "epoch": 0.14303136096796948, "grad_norm": 4.370799541473389, "learning_rate": 9.66880702594707e-06, "loss": 0.6532, "step": 11963 }, { "epoch": 0.14304331711283014, "grad_norm": 27.980186462402344, "learning_rate": 9.668737727629414e-06, "loss": 0.7057, "step": 11964 }, { "epoch": 0.1430552732576908, "grad_norm": 1.910394549369812, "learning_rate": 9.66866842231097e-06, "loss": 0.6294, "step": 11965 }, { "epoch": 0.14306722940255145, "grad_norm": 2.931441307067871, "learning_rate": 9.668599109991843e-06, "loss": 0.7272, "step": 11966 }, { "epoch": 0.14307918554741209, "grad_norm": 3.143589973449707, "learning_rate": 9.668529790672134e-06, "loss": 0.6273, "step": 11967 }, { "epoch": 0.14309114169227274, "grad_norm": 8.15556812286377, "learning_rate": 9.668460464351947e-06, "loss": 0.6172, "step": 11968 }, { "epoch": 0.1431030978371334, "grad_norm": 3.5476877689361572, "learning_rate": 9.668391131031387e-06, "loss": 0.5679, "step": 11969 }, { "epoch": 0.14311505398199403, "grad_norm": 2.380096912384033, "learning_rate": 9.66832179071056e-06, "loss": 0.5916, "step": 11970 }, { "epoch": 0.1431270101268547, "grad_norm": 10.356534004211426, "learning_rate": 9.668252443389565e-06, "loss": 0.6677, "step": 11971 }, { "epoch": 0.14313896627171535, "grad_norm": 3.65527606010437, "learning_rate": 9.668183089068511e-06, "loss": 0.5875, "step": 11972 }, { "epoch": 0.143150922416576, "grad_norm": 7.653534889221191, "learning_rate": 9.668113727747498e-06, "loss": 0.6341, "step": 11973 }, { "epoch": 0.14316287856143664, "grad_norm": 2.660961389541626, "learning_rate": 9.668044359426634e-06, "loss": 0.5149, "step": 11974 }, { "epoch": 0.1431748347062973, "grad_norm": 2.7531280517578125, "learning_rate": 9.667974984106021e-06, "loss": 0.6754, "step": 11975 }, { "epoch": 0.14318679085115796, "grad_norm": 1.7244356870651245, "learning_rate": 9.667905601785762e-06, "loss": 0.6482, "step": 11976 }, { "epoch": 0.14319874699601862, "grad_norm": 2.9687371253967285, "learning_rate": 9.667836212465963e-06, "loss": 0.5881, "step": 11977 }, { "epoch": 0.14321070314087925, "grad_norm": 1.6508365869522095, "learning_rate": 9.667766816146724e-06, "loss": 0.6098, "step": 11978 }, { "epoch": 0.1432226592857399, "grad_norm": 4.493250370025635, "learning_rate": 9.667697412828154e-06, "loss": 0.6546, "step": 11979 }, { "epoch": 0.14323461543060056, "grad_norm": 2.8547770977020264, "learning_rate": 9.667628002510355e-06, "loss": 0.6711, "step": 11980 }, { "epoch": 0.1432465715754612, "grad_norm": 1.8744678497314453, "learning_rate": 9.66755858519343e-06, "loss": 0.6077, "step": 11981 }, { "epoch": 0.14325852772032185, "grad_norm": 2.1531546115875244, "learning_rate": 9.667489160877486e-06, "loss": 0.5942, "step": 11982 }, { "epoch": 0.1432704838651825, "grad_norm": 1.9329569339752197, "learning_rate": 9.667419729562625e-06, "loss": 0.6749, "step": 11983 }, { "epoch": 0.14328244001004317, "grad_norm": 3.1080713272094727, "learning_rate": 9.667350291248953e-06, "loss": 0.6382, "step": 11984 }, { "epoch": 0.1432943961549038, "grad_norm": 2.2825610637664795, "learning_rate": 9.667280845936572e-06, "loss": 0.6711, "step": 11985 }, { "epoch": 0.14330635229976446, "grad_norm": 3.153386354446411, "learning_rate": 9.667211393625583e-06, "loss": 0.6829, "step": 11986 }, { "epoch": 0.14331830844462512, "grad_norm": 2.0129246711730957, "learning_rate": 9.6671419343161e-06, "loss": 0.5542, "step": 11987 }, { "epoch": 0.14333026458948578, "grad_norm": 2.0590596199035645, "learning_rate": 9.667072468008217e-06, "loss": 0.5688, "step": 11988 }, { "epoch": 0.1433422207343464, "grad_norm": 1.9322035312652588, "learning_rate": 9.667002994702043e-06, "loss": 0.6586, "step": 11989 }, { "epoch": 0.14335417687920707, "grad_norm": 2.213268518447876, "learning_rate": 9.66693351439768e-06, "loss": 0.5487, "step": 11990 }, { "epoch": 0.14336613302406773, "grad_norm": 3.0982749462127686, "learning_rate": 9.666864027095238e-06, "loss": 0.5757, "step": 11991 }, { "epoch": 0.14337808916892836, "grad_norm": 2.374363660812378, "learning_rate": 9.666794532794812e-06, "loss": 0.582, "step": 11992 }, { "epoch": 0.14339004531378902, "grad_norm": 6.939388751983643, "learning_rate": 9.666725031496514e-06, "loss": 0.6143, "step": 11993 }, { "epoch": 0.14340200145864967, "grad_norm": 3.2683401107788086, "learning_rate": 9.666655523200443e-06, "loss": 0.6077, "step": 11994 }, { "epoch": 0.14341395760351033, "grad_norm": 2.8359413146972656, "learning_rate": 9.666586007906707e-06, "loss": 0.6716, "step": 11995 }, { "epoch": 0.14342591374837096, "grad_norm": 8.391817092895508, "learning_rate": 9.666516485615408e-06, "loss": 0.6172, "step": 11996 }, { "epoch": 0.14343786989323162, "grad_norm": 2.3769664764404297, "learning_rate": 9.666446956326652e-06, "loss": 0.5967, "step": 11997 }, { "epoch": 0.14344982603809228, "grad_norm": 5.8048996925354, "learning_rate": 9.666377420040542e-06, "loss": 0.5892, "step": 11998 }, { "epoch": 0.14346178218295294, "grad_norm": 5.195908546447754, "learning_rate": 9.666307876757182e-06, "loss": 0.5646, "step": 11999 }, { "epoch": 0.14347373832781357, "grad_norm": 2.1442415714263916, "learning_rate": 9.666238326476677e-06, "loss": 0.6332, "step": 12000 }, { "epoch": 0.14348569447267423, "grad_norm": 1.9437732696533203, "learning_rate": 9.666168769199132e-06, "loss": 0.5111, "step": 12001 }, { "epoch": 0.1434976506175349, "grad_norm": 1.8053011894226074, "learning_rate": 9.666099204924649e-06, "loss": 0.5776, "step": 12002 }, { "epoch": 0.14350960676239552, "grad_norm": 1.8809216022491455, "learning_rate": 9.666029633653333e-06, "loss": 0.5818, "step": 12003 }, { "epoch": 0.14352156290725618, "grad_norm": 5.3856306076049805, "learning_rate": 9.66596005538529e-06, "loss": 0.6678, "step": 12004 }, { "epoch": 0.14353351905211684, "grad_norm": 2.039822816848755, "learning_rate": 9.665890470120622e-06, "loss": 0.6356, "step": 12005 }, { "epoch": 0.1435454751969775, "grad_norm": 2.250476360321045, "learning_rate": 9.665820877859437e-06, "loss": 0.6018, "step": 12006 }, { "epoch": 0.14355743134183813, "grad_norm": 1.6064485311508179, "learning_rate": 9.665751278601836e-06, "loss": 0.5654, "step": 12007 }, { "epoch": 0.14356938748669879, "grad_norm": 4.001762866973877, "learning_rate": 9.665681672347924e-06, "loss": 0.5558, "step": 12008 }, { "epoch": 0.14358134363155944, "grad_norm": 1.9607759714126587, "learning_rate": 9.665612059097806e-06, "loss": 0.604, "step": 12009 }, { "epoch": 0.1435932997764201, "grad_norm": 5.178315162658691, "learning_rate": 9.665542438851586e-06, "loss": 0.6562, "step": 12010 }, { "epoch": 0.14360525592128073, "grad_norm": 1.7825323343276978, "learning_rate": 9.665472811609368e-06, "loss": 0.6382, "step": 12011 }, { "epoch": 0.1436172120661414, "grad_norm": 2.7686383724212646, "learning_rate": 9.665403177371258e-06, "loss": 0.7191, "step": 12012 }, { "epoch": 0.14362916821100205, "grad_norm": 2.7405753135681152, "learning_rate": 9.665333536137358e-06, "loss": 0.6367, "step": 12013 }, { "epoch": 0.1436411243558627, "grad_norm": 1.8303135633468628, "learning_rate": 9.665263887907775e-06, "loss": 0.6334, "step": 12014 }, { "epoch": 0.14365308050072334, "grad_norm": 2.697141408920288, "learning_rate": 9.665194232682612e-06, "loss": 0.6597, "step": 12015 }, { "epoch": 0.143665036645584, "grad_norm": 3.4159276485443115, "learning_rate": 9.665124570461973e-06, "loss": 0.6839, "step": 12016 }, { "epoch": 0.14367699279044466, "grad_norm": 1.758531928062439, "learning_rate": 9.665054901245965e-06, "loss": 0.6363, "step": 12017 }, { "epoch": 0.1436889489353053, "grad_norm": 3.22629976272583, "learning_rate": 9.66498522503469e-06, "loss": 0.6499, "step": 12018 }, { "epoch": 0.14370090508016595, "grad_norm": 2.1706302165985107, "learning_rate": 9.66491554182825e-06, "loss": 0.5441, "step": 12019 }, { "epoch": 0.1437128612250266, "grad_norm": 6.337170124053955, "learning_rate": 9.664845851626757e-06, "loss": 0.7069, "step": 12020 }, { "epoch": 0.14372481736988726, "grad_norm": 3.215461254119873, "learning_rate": 9.664776154430307e-06, "loss": 0.6565, "step": 12021 }, { "epoch": 0.1437367735147479, "grad_norm": 1.6946024894714355, "learning_rate": 9.66470645023901e-06, "loss": 0.5369, "step": 12022 }, { "epoch": 0.14374872965960855, "grad_norm": 2.7265384197235107, "learning_rate": 9.664636739052971e-06, "loss": 0.6189, "step": 12023 }, { "epoch": 0.1437606858044692, "grad_norm": 3.9048209190368652, "learning_rate": 9.664567020872291e-06, "loss": 0.6331, "step": 12024 }, { "epoch": 0.14377264194932987, "grad_norm": 2.5056803226470947, "learning_rate": 9.664497295697076e-06, "loss": 0.6531, "step": 12025 }, { "epoch": 0.1437845980941905, "grad_norm": 1.6206583976745605, "learning_rate": 9.664427563527432e-06, "loss": 0.6017, "step": 12026 }, { "epoch": 0.14379655423905116, "grad_norm": 1.9136621952056885, "learning_rate": 9.664357824363461e-06, "loss": 0.6576, "step": 12027 }, { "epoch": 0.14380851038391182, "grad_norm": 2.3875834941864014, "learning_rate": 9.66428807820527e-06, "loss": 0.6352, "step": 12028 }, { "epoch": 0.14382046652877245, "grad_norm": 3.0637807846069336, "learning_rate": 9.664218325052962e-06, "loss": 0.6608, "step": 12029 }, { "epoch": 0.1438324226736331, "grad_norm": 2.4810314178466797, "learning_rate": 9.664148564906641e-06, "loss": 0.7024, "step": 12030 }, { "epoch": 0.14384437881849377, "grad_norm": 1.947104811668396, "learning_rate": 9.664078797766414e-06, "loss": 0.6216, "step": 12031 }, { "epoch": 0.14385633496335443, "grad_norm": 1.587724208831787, "learning_rate": 9.664009023632384e-06, "loss": 0.6067, "step": 12032 }, { "epoch": 0.14386829110821506, "grad_norm": 4.276402473449707, "learning_rate": 9.663939242504654e-06, "loss": 0.6605, "step": 12033 }, { "epoch": 0.14388024725307572, "grad_norm": 3.5015177726745605, "learning_rate": 9.663869454383333e-06, "loss": 0.6222, "step": 12034 }, { "epoch": 0.14389220339793637, "grad_norm": 2.528930425643921, "learning_rate": 9.663799659268522e-06, "loss": 0.5673, "step": 12035 }, { "epoch": 0.14390415954279703, "grad_norm": 3.514305830001831, "learning_rate": 9.663729857160327e-06, "loss": 0.5737, "step": 12036 }, { "epoch": 0.14391611568765766, "grad_norm": 2.493802547454834, "learning_rate": 9.663660048058853e-06, "loss": 0.6841, "step": 12037 }, { "epoch": 0.14392807183251832, "grad_norm": 3.0346860885620117, "learning_rate": 9.663590231964204e-06, "loss": 0.5852, "step": 12038 }, { "epoch": 0.14394002797737898, "grad_norm": 20.653522491455078, "learning_rate": 9.663520408876485e-06, "loss": 0.588, "step": 12039 }, { "epoch": 0.1439519841222396, "grad_norm": 1.5184954404830933, "learning_rate": 9.663450578795798e-06, "loss": 0.5843, "step": 12040 }, { "epoch": 0.14396394026710027, "grad_norm": 3.2878611087799072, "learning_rate": 9.663380741722254e-06, "loss": 0.6052, "step": 12041 }, { "epoch": 0.14397589641196093, "grad_norm": 6.808273792266846, "learning_rate": 9.663310897655953e-06, "loss": 0.6865, "step": 12042 }, { "epoch": 0.1439878525568216, "grad_norm": 2.8418967723846436, "learning_rate": 9.663241046597e-06, "loss": 0.538, "step": 12043 }, { "epoch": 0.14399980870168222, "grad_norm": 2.1010589599609375, "learning_rate": 9.6631711885455e-06, "loss": 0.5951, "step": 12044 }, { "epoch": 0.14401176484654288, "grad_norm": 4.310869216918945, "learning_rate": 9.663101323501561e-06, "loss": 0.6097, "step": 12045 }, { "epoch": 0.14402372099140354, "grad_norm": 2.2879984378814697, "learning_rate": 9.663031451465281e-06, "loss": 0.6571, "step": 12046 }, { "epoch": 0.1440356771362642, "grad_norm": 2.538907051086426, "learning_rate": 9.66296157243677e-06, "loss": 0.7218, "step": 12047 }, { "epoch": 0.14404763328112483, "grad_norm": 3.173971652984619, "learning_rate": 9.662891686416135e-06, "loss": 0.6189, "step": 12048 }, { "epoch": 0.14405958942598548, "grad_norm": 2.8941242694854736, "learning_rate": 9.662821793403472e-06, "loss": 0.622, "step": 12049 }, { "epoch": 0.14407154557084614, "grad_norm": 4.152329444885254, "learning_rate": 9.662751893398895e-06, "loss": 0.6326, "step": 12050 }, { "epoch": 0.14408350171570677, "grad_norm": 2.199585199356079, "learning_rate": 9.662681986402503e-06, "loss": 0.6134, "step": 12051 }, { "epoch": 0.14409545786056743, "grad_norm": 3.5323667526245117, "learning_rate": 9.662612072414404e-06, "loss": 0.7558, "step": 12052 }, { "epoch": 0.1441074140054281, "grad_norm": 3.4345157146453857, "learning_rate": 9.662542151434701e-06, "loss": 0.6199, "step": 12053 }, { "epoch": 0.14411937015028875, "grad_norm": 2.133448362350464, "learning_rate": 9.6624722234635e-06, "loss": 0.5277, "step": 12054 }, { "epoch": 0.14413132629514938, "grad_norm": 5.00504732131958, "learning_rate": 9.662402288500905e-06, "loss": 0.6261, "step": 12055 }, { "epoch": 0.14414328244001004, "grad_norm": 6.338324069976807, "learning_rate": 9.662332346547022e-06, "loss": 0.6421, "step": 12056 }, { "epoch": 0.1441552385848707, "grad_norm": 2.0175094604492188, "learning_rate": 9.662262397601953e-06, "loss": 0.5835, "step": 12057 }, { "epoch": 0.14416719472973136, "grad_norm": 4.679686069488525, "learning_rate": 9.662192441665806e-06, "loss": 0.6489, "step": 12058 }, { "epoch": 0.144179150874592, "grad_norm": 2.9877049922943115, "learning_rate": 9.662122478738685e-06, "loss": 0.6317, "step": 12059 }, { "epoch": 0.14419110701945265, "grad_norm": 2.1502022743225098, "learning_rate": 9.662052508820695e-06, "loss": 0.6388, "step": 12060 }, { "epoch": 0.1442030631643133, "grad_norm": 2.6568586826324463, "learning_rate": 9.66198253191194e-06, "loss": 0.5787, "step": 12061 }, { "epoch": 0.14421501930917394, "grad_norm": 1.6755733489990234, "learning_rate": 9.661912548012527e-06, "loss": 0.5529, "step": 12062 }, { "epoch": 0.1442269754540346, "grad_norm": 2.261878728866577, "learning_rate": 9.661842557122558e-06, "loss": 0.5089, "step": 12063 }, { "epoch": 0.14423893159889525, "grad_norm": 4.294517517089844, "learning_rate": 9.66177255924214e-06, "loss": 0.5754, "step": 12064 }, { "epoch": 0.1442508877437559, "grad_norm": 5.343157768249512, "learning_rate": 9.661702554371378e-06, "loss": 0.6487, "step": 12065 }, { "epoch": 0.14426284388861654, "grad_norm": 2.2078397274017334, "learning_rate": 9.661632542510376e-06, "loss": 0.6368, "step": 12066 }, { "epoch": 0.1442748000334772, "grad_norm": 20.19651222229004, "learning_rate": 9.66156252365924e-06, "loss": 0.637, "step": 12067 }, { "epoch": 0.14428675617833786, "grad_norm": 1.6525760889053345, "learning_rate": 9.661492497818074e-06, "loss": 0.6061, "step": 12068 }, { "epoch": 0.14429871232319852, "grad_norm": 4.933875560760498, "learning_rate": 9.661422464986982e-06, "loss": 0.6074, "step": 12069 }, { "epoch": 0.14431066846805915, "grad_norm": 1.806801199913025, "learning_rate": 9.661352425166073e-06, "loss": 0.6336, "step": 12070 }, { "epoch": 0.1443226246129198, "grad_norm": 2.5719964504241943, "learning_rate": 9.661282378355448e-06, "loss": 0.6894, "step": 12071 }, { "epoch": 0.14433458075778047, "grad_norm": 5.5565290451049805, "learning_rate": 9.661212324555213e-06, "loss": 0.6836, "step": 12072 }, { "epoch": 0.14434653690264113, "grad_norm": 2.302079916000366, "learning_rate": 9.661142263765475e-06, "loss": 0.6912, "step": 12073 }, { "epoch": 0.14435849304750176, "grad_norm": 2.1954729557037354, "learning_rate": 9.661072195986337e-06, "loss": 0.5908, "step": 12074 }, { "epoch": 0.14437044919236242, "grad_norm": 3.7473535537719727, "learning_rate": 9.661002121217906e-06, "loss": 0.6707, "step": 12075 }, { "epoch": 0.14438240533722307, "grad_norm": 2.2543346881866455, "learning_rate": 9.660932039460284e-06, "loss": 0.558, "step": 12076 }, { "epoch": 0.1443943614820837, "grad_norm": 2.6777420043945312, "learning_rate": 9.660861950713577e-06, "loss": 0.5775, "step": 12077 }, { "epoch": 0.14440631762694436, "grad_norm": 4.698396682739258, "learning_rate": 9.660791854977894e-06, "loss": 0.5203, "step": 12078 }, { "epoch": 0.14441827377180502, "grad_norm": 2.191171884536743, "learning_rate": 9.660721752253335e-06, "loss": 0.5764, "step": 12079 }, { "epoch": 0.14443022991666568, "grad_norm": 2.1557846069335938, "learning_rate": 9.660651642540007e-06, "loss": 0.6665, "step": 12080 }, { "epoch": 0.1444421860615263, "grad_norm": 2.0072739124298096, "learning_rate": 9.660581525838015e-06, "loss": 0.602, "step": 12081 }, { "epoch": 0.14445414220638697, "grad_norm": 11.276708602905273, "learning_rate": 9.660511402147466e-06, "loss": 0.6685, "step": 12082 }, { "epoch": 0.14446609835124763, "grad_norm": 2.3162660598754883, "learning_rate": 9.660441271468461e-06, "loss": 0.6992, "step": 12083 }, { "epoch": 0.1444780544961083, "grad_norm": 8.892436027526855, "learning_rate": 9.660371133801111e-06, "loss": 0.4931, "step": 12084 }, { "epoch": 0.14449001064096892, "grad_norm": 2.9984521865844727, "learning_rate": 9.660300989145517e-06, "loss": 0.6171, "step": 12085 }, { "epoch": 0.14450196678582958, "grad_norm": 3.292050361633301, "learning_rate": 9.660230837501783e-06, "loss": 0.5366, "step": 12086 }, { "epoch": 0.14451392293069024, "grad_norm": 1.743385672569275, "learning_rate": 9.660160678870019e-06, "loss": 0.6892, "step": 12087 }, { "epoch": 0.14452587907555087, "grad_norm": 1.8471957445144653, "learning_rate": 9.660090513250326e-06, "loss": 0.5597, "step": 12088 }, { "epoch": 0.14453783522041153, "grad_norm": 3.28135347366333, "learning_rate": 9.660020340642811e-06, "loss": 0.5373, "step": 12089 }, { "epoch": 0.14454979136527218, "grad_norm": 3.9404852390289307, "learning_rate": 9.65995016104758e-06, "loss": 0.6138, "step": 12090 }, { "epoch": 0.14456174751013284, "grad_norm": 2.459768295288086, "learning_rate": 9.659879974464734e-06, "loss": 0.5583, "step": 12091 }, { "epoch": 0.14457370365499347, "grad_norm": 4.3247599601745605, "learning_rate": 9.659809780894385e-06, "loss": 0.7009, "step": 12092 }, { "epoch": 0.14458565979985413, "grad_norm": 3.4222538471221924, "learning_rate": 9.659739580336633e-06, "loss": 0.6363, "step": 12093 }, { "epoch": 0.1445976159447148, "grad_norm": 4.706803321838379, "learning_rate": 9.659669372791584e-06, "loss": 0.619, "step": 12094 }, { "epoch": 0.14460957208957545, "grad_norm": 2.6185226440429688, "learning_rate": 9.659599158259345e-06, "loss": 0.5908, "step": 12095 }, { "epoch": 0.14462152823443608, "grad_norm": 1.4849448204040527, "learning_rate": 9.659528936740021e-06, "loss": 0.6137, "step": 12096 }, { "epoch": 0.14463348437929674, "grad_norm": 1.6277518272399902, "learning_rate": 9.659458708233717e-06, "loss": 0.5106, "step": 12097 }, { "epoch": 0.1446454405241574, "grad_norm": 5.689572334289551, "learning_rate": 9.659388472740538e-06, "loss": 0.6688, "step": 12098 }, { "epoch": 0.14465739666901803, "grad_norm": 1.711239218711853, "learning_rate": 9.659318230260589e-06, "loss": 0.5786, "step": 12099 }, { "epoch": 0.1446693528138787, "grad_norm": 11.4483060836792, "learning_rate": 9.659247980793975e-06, "loss": 0.6428, "step": 12100 }, { "epoch": 0.14468130895873935, "grad_norm": 2.2718498706817627, "learning_rate": 9.659177724340805e-06, "loss": 0.5484, "step": 12101 }, { "epoch": 0.1446932651036, "grad_norm": 2.6207666397094727, "learning_rate": 9.659107460901179e-06, "loss": 0.5594, "step": 12102 }, { "epoch": 0.14470522124846064, "grad_norm": 4.746756553649902, "learning_rate": 9.659037190475204e-06, "loss": 0.5501, "step": 12103 }, { "epoch": 0.1447171773933213, "grad_norm": 3.294936418533325, "learning_rate": 9.658966913062988e-06, "loss": 0.696, "step": 12104 }, { "epoch": 0.14472913353818195, "grad_norm": 2.7327826023101807, "learning_rate": 9.658896628664634e-06, "loss": 0.5778, "step": 12105 }, { "epoch": 0.1447410896830426, "grad_norm": 1.7385073900222778, "learning_rate": 9.658826337280247e-06, "loss": 0.6029, "step": 12106 }, { "epoch": 0.14475304582790324, "grad_norm": 1.9942681789398193, "learning_rate": 9.658756038909935e-06, "loss": 0.6357, "step": 12107 }, { "epoch": 0.1447650019727639, "grad_norm": 2.7786619663238525, "learning_rate": 9.6586857335538e-06, "loss": 0.6995, "step": 12108 }, { "epoch": 0.14477695811762456, "grad_norm": 1.8100517988204956, "learning_rate": 9.658615421211951e-06, "loss": 0.5457, "step": 12109 }, { "epoch": 0.1447889142624852, "grad_norm": 1.6646946668624878, "learning_rate": 9.658545101884491e-06, "loss": 0.6366, "step": 12110 }, { "epoch": 0.14480087040734585, "grad_norm": 3.0255985260009766, "learning_rate": 9.658474775571527e-06, "loss": 0.5958, "step": 12111 }, { "epoch": 0.1448128265522065, "grad_norm": 2.945713996887207, "learning_rate": 9.658404442273161e-06, "loss": 0.6303, "step": 12112 }, { "epoch": 0.14482478269706717, "grad_norm": 3.7372307777404785, "learning_rate": 9.658334101989502e-06, "loss": 0.6952, "step": 12113 }, { "epoch": 0.1448367388419278, "grad_norm": 2.1540708541870117, "learning_rate": 9.658263754720656e-06, "loss": 0.7044, "step": 12114 }, { "epoch": 0.14484869498678846, "grad_norm": 4.372424602508545, "learning_rate": 9.658193400466726e-06, "loss": 0.6567, "step": 12115 }, { "epoch": 0.14486065113164912, "grad_norm": 5.767359733581543, "learning_rate": 9.658123039227818e-06, "loss": 0.7015, "step": 12116 }, { "epoch": 0.14487260727650977, "grad_norm": 3.40080189704895, "learning_rate": 9.658052671004038e-06, "loss": 0.5875, "step": 12117 }, { "epoch": 0.1448845634213704, "grad_norm": 1.9119057655334473, "learning_rate": 9.657982295795491e-06, "loss": 0.6621, "step": 12118 }, { "epoch": 0.14489651956623106, "grad_norm": 2.9130496978759766, "learning_rate": 9.657911913602284e-06, "loss": 0.6366, "step": 12119 }, { "epoch": 0.14490847571109172, "grad_norm": 3.143444776535034, "learning_rate": 9.65784152442452e-06, "loss": 0.6948, "step": 12120 }, { "epoch": 0.14492043185595235, "grad_norm": 2.919741153717041, "learning_rate": 9.657771128262308e-06, "loss": 0.6658, "step": 12121 }, { "epoch": 0.144932388000813, "grad_norm": 9.195216178894043, "learning_rate": 9.65770072511575e-06, "loss": 0.5206, "step": 12122 }, { "epoch": 0.14494434414567367, "grad_norm": 3.1813132762908936, "learning_rate": 9.657630314984955e-06, "loss": 0.6056, "step": 12123 }, { "epoch": 0.14495630029053433, "grad_norm": 12.619887351989746, "learning_rate": 9.657559897870025e-06, "loss": 0.6084, "step": 12124 }, { "epoch": 0.14496825643539496, "grad_norm": 29.745756149291992, "learning_rate": 9.657489473771069e-06, "loss": 0.6791, "step": 12125 }, { "epoch": 0.14498021258025562, "grad_norm": 2.0506174564361572, "learning_rate": 9.65741904268819e-06, "loss": 0.6658, "step": 12126 }, { "epoch": 0.14499216872511628, "grad_norm": 2.7219674587249756, "learning_rate": 9.657348604621496e-06, "loss": 0.549, "step": 12127 }, { "epoch": 0.14500412486997694, "grad_norm": 2.9198548793792725, "learning_rate": 9.657278159571088e-06, "loss": 0.6762, "step": 12128 }, { "epoch": 0.14501608101483757, "grad_norm": 4.996282577514648, "learning_rate": 9.657207707537077e-06, "loss": 0.6406, "step": 12129 }, { "epoch": 0.14502803715969823, "grad_norm": 2.1430556774139404, "learning_rate": 9.657137248519567e-06, "loss": 0.6112, "step": 12130 }, { "epoch": 0.14503999330455888, "grad_norm": 3.5439183712005615, "learning_rate": 9.657066782518662e-06, "loss": 0.6795, "step": 12131 }, { "epoch": 0.14505194944941954, "grad_norm": 2.54667067527771, "learning_rate": 9.65699630953447e-06, "loss": 0.6414, "step": 12132 }, { "epoch": 0.14506390559428017, "grad_norm": 1.5692100524902344, "learning_rate": 9.656925829567095e-06, "loss": 0.5714, "step": 12133 }, { "epoch": 0.14507586173914083, "grad_norm": 3.5074727535247803, "learning_rate": 9.656855342616642e-06, "loss": 0.5854, "step": 12134 }, { "epoch": 0.1450878178840015, "grad_norm": 2.051185131072998, "learning_rate": 9.65678484868322e-06, "loss": 0.568, "step": 12135 }, { "epoch": 0.14509977402886212, "grad_norm": 1.7285791635513306, "learning_rate": 9.65671434776693e-06, "loss": 0.6505, "step": 12136 }, { "epoch": 0.14511173017372278, "grad_norm": 6.297573089599609, "learning_rate": 9.656643839867883e-06, "loss": 0.616, "step": 12137 }, { "epoch": 0.14512368631858344, "grad_norm": 2.924131393432617, "learning_rate": 9.65657332498618e-06, "loss": 0.6345, "step": 12138 }, { "epoch": 0.1451356424634441, "grad_norm": 2.0840065479278564, "learning_rate": 9.65650280312193e-06, "loss": 0.5433, "step": 12139 }, { "epoch": 0.14514759860830473, "grad_norm": 3.5074462890625, "learning_rate": 9.656432274275235e-06, "loss": 0.6046, "step": 12140 }, { "epoch": 0.1451595547531654, "grad_norm": 1.9895484447479248, "learning_rate": 9.656361738446208e-06, "loss": 0.5441, "step": 12141 }, { "epoch": 0.14517151089802605, "grad_norm": 2.734060764312744, "learning_rate": 9.656291195634946e-06, "loss": 0.736, "step": 12142 }, { "epoch": 0.1451834670428867, "grad_norm": 2.0696139335632324, "learning_rate": 9.65622064584156e-06, "loss": 0.6767, "step": 12143 }, { "epoch": 0.14519542318774734, "grad_norm": 4.895262241363525, "learning_rate": 9.656150089066154e-06, "loss": 0.6461, "step": 12144 }, { "epoch": 0.145207379332608, "grad_norm": 2.5677785873413086, "learning_rate": 9.656079525308834e-06, "loss": 0.6989, "step": 12145 }, { "epoch": 0.14521933547746865, "grad_norm": 3.592329502105713, "learning_rate": 9.656008954569708e-06, "loss": 0.6041, "step": 12146 }, { "epoch": 0.14523129162232928, "grad_norm": 2.8164563179016113, "learning_rate": 9.655938376848879e-06, "loss": 0.5978, "step": 12147 }, { "epoch": 0.14524324776718994, "grad_norm": 2.9268851280212402, "learning_rate": 9.655867792146454e-06, "loss": 0.6036, "step": 12148 }, { "epoch": 0.1452552039120506, "grad_norm": 3.1613216400146484, "learning_rate": 9.65579720046254e-06, "loss": 0.6752, "step": 12149 }, { "epoch": 0.14526716005691126, "grad_norm": 3.9779553413391113, "learning_rate": 9.65572660179724e-06, "loss": 0.6227, "step": 12150 }, { "epoch": 0.1452791162017719, "grad_norm": 5.241827011108398, "learning_rate": 9.655655996150662e-06, "loss": 0.6445, "step": 12151 }, { "epoch": 0.14529107234663255, "grad_norm": 2.1098246574401855, "learning_rate": 9.65558538352291e-06, "loss": 0.6162, "step": 12152 }, { "epoch": 0.1453030284914932, "grad_norm": 2.410820245742798, "learning_rate": 9.655514763914093e-06, "loss": 0.7917, "step": 12153 }, { "epoch": 0.14531498463635387, "grad_norm": 2.160984754562378, "learning_rate": 9.655444137324313e-06, "loss": 0.5993, "step": 12154 }, { "epoch": 0.1453269407812145, "grad_norm": 2.3066444396972656, "learning_rate": 9.65537350375368e-06, "loss": 0.5625, "step": 12155 }, { "epoch": 0.14533889692607516, "grad_norm": 2.142503261566162, "learning_rate": 9.655302863202296e-06, "loss": 0.673, "step": 12156 }, { "epoch": 0.14535085307093581, "grad_norm": 2.02131724357605, "learning_rate": 9.65523221567027e-06, "loss": 0.5753, "step": 12157 }, { "epoch": 0.14536280921579645, "grad_norm": 1.8850610256195068, "learning_rate": 9.655161561157707e-06, "loss": 0.699, "step": 12158 }, { "epoch": 0.1453747653606571, "grad_norm": 2.7951571941375732, "learning_rate": 9.655090899664713e-06, "loss": 0.6374, "step": 12159 }, { "epoch": 0.14538672150551776, "grad_norm": 2.4377810955047607, "learning_rate": 9.655020231191391e-06, "loss": 0.6058, "step": 12160 }, { "epoch": 0.14539867765037842, "grad_norm": 2.73655104637146, "learning_rate": 9.654949555737853e-06, "loss": 0.6461, "step": 12161 }, { "epoch": 0.14541063379523905, "grad_norm": 2.9206552505493164, "learning_rate": 9.6548788733042e-06, "loss": 0.6003, "step": 12162 }, { "epoch": 0.1454225899400997, "grad_norm": 5.678301811218262, "learning_rate": 9.65480818389054e-06, "loss": 0.5842, "step": 12163 }, { "epoch": 0.14543454608496037, "grad_norm": 2.4514429569244385, "learning_rate": 9.654737487496977e-06, "loss": 0.6159, "step": 12164 }, { "epoch": 0.14544650222982103, "grad_norm": 2.5183258056640625, "learning_rate": 9.65466678412362e-06, "loss": 0.7308, "step": 12165 }, { "epoch": 0.14545845837468166, "grad_norm": 1.8244808912277222, "learning_rate": 9.654596073770574e-06, "loss": 0.6145, "step": 12166 }, { "epoch": 0.14547041451954232, "grad_norm": 1.754957914352417, "learning_rate": 9.654525356437945e-06, "loss": 0.6224, "step": 12167 }, { "epoch": 0.14548237066440298, "grad_norm": 2.058987855911255, "learning_rate": 9.654454632125838e-06, "loss": 0.5463, "step": 12168 }, { "epoch": 0.1454943268092636, "grad_norm": 1.7366596460342407, "learning_rate": 9.65438390083436e-06, "loss": 0.6135, "step": 12169 }, { "epoch": 0.14550628295412427, "grad_norm": 4.338596343994141, "learning_rate": 9.654313162563616e-06, "loss": 0.56, "step": 12170 }, { "epoch": 0.14551823909898493, "grad_norm": 1.7116674184799194, "learning_rate": 9.654242417313712e-06, "loss": 0.6454, "step": 12171 }, { "epoch": 0.14553019524384558, "grad_norm": 3.3396716117858887, "learning_rate": 9.654171665084757e-06, "loss": 0.615, "step": 12172 }, { "epoch": 0.14554215138870621, "grad_norm": 2.184091806411743, "learning_rate": 9.654100905876854e-06, "loss": 0.5907, "step": 12173 }, { "epoch": 0.14555410753356687, "grad_norm": 3.855790853500366, "learning_rate": 9.65403013969011e-06, "loss": 0.6077, "step": 12174 }, { "epoch": 0.14556606367842753, "grad_norm": 2.663055896759033, "learning_rate": 9.653959366524631e-06, "loss": 0.6934, "step": 12175 }, { "epoch": 0.1455780198232882, "grad_norm": 2.9903242588043213, "learning_rate": 9.653888586380526e-06, "loss": 0.6544, "step": 12176 }, { "epoch": 0.14558997596814882, "grad_norm": 5.75654411315918, "learning_rate": 9.653817799257896e-06, "loss": 0.6231, "step": 12177 }, { "epoch": 0.14560193211300948, "grad_norm": 1.9176580905914307, "learning_rate": 9.65374700515685e-06, "loss": 0.6237, "step": 12178 }, { "epoch": 0.14561388825787014, "grad_norm": 15.686559677124023, "learning_rate": 9.653676204077494e-06, "loss": 0.6711, "step": 12179 }, { "epoch": 0.14562584440273077, "grad_norm": 2.78383731842041, "learning_rate": 9.653605396019933e-06, "loss": 0.6682, "step": 12180 }, { "epoch": 0.14563780054759143, "grad_norm": 2.579118013381958, "learning_rate": 9.653534580984276e-06, "loss": 0.5731, "step": 12181 }, { "epoch": 0.1456497566924521, "grad_norm": 2.1052701473236084, "learning_rate": 9.653463758970627e-06, "loss": 0.5344, "step": 12182 }, { "epoch": 0.14566171283731275, "grad_norm": 2.5640599727630615, "learning_rate": 9.653392929979094e-06, "loss": 0.7313, "step": 12183 }, { "epoch": 0.14567366898217338, "grad_norm": 3.1340088844299316, "learning_rate": 9.653322094009778e-06, "loss": 0.5804, "step": 12184 }, { "epoch": 0.14568562512703404, "grad_norm": 4.673832416534424, "learning_rate": 9.65325125106279e-06, "loss": 0.7402, "step": 12185 }, { "epoch": 0.1456975812718947, "grad_norm": 2.4165890216827393, "learning_rate": 9.653180401138237e-06, "loss": 0.5458, "step": 12186 }, { "epoch": 0.14570953741675535, "grad_norm": 1.6729336977005005, "learning_rate": 9.653109544236223e-06, "loss": 0.5555, "step": 12187 }, { "epoch": 0.14572149356161598, "grad_norm": 2.556478500366211, "learning_rate": 9.653038680356854e-06, "loss": 0.5586, "step": 12188 }, { "epoch": 0.14573344970647664, "grad_norm": 5.633881568908691, "learning_rate": 9.652967809500238e-06, "loss": 0.6446, "step": 12189 }, { "epoch": 0.1457454058513373, "grad_norm": 1.8351688385009766, "learning_rate": 9.652896931666478e-06, "loss": 0.6649, "step": 12190 }, { "epoch": 0.14575736199619796, "grad_norm": 1.856810450553894, "learning_rate": 9.652826046855684e-06, "loss": 0.5906, "step": 12191 }, { "epoch": 0.1457693181410586, "grad_norm": 2.1920840740203857, "learning_rate": 9.65275515506796e-06, "loss": 0.6813, "step": 12192 }, { "epoch": 0.14578127428591925, "grad_norm": 3.092146158218384, "learning_rate": 9.652684256303414e-06, "loss": 0.5294, "step": 12193 }, { "epoch": 0.1457932304307799, "grad_norm": 2.395385980606079, "learning_rate": 9.652613350562153e-06, "loss": 0.6325, "step": 12194 }, { "epoch": 0.14580518657564054, "grad_norm": 1.370983600616455, "learning_rate": 9.65254243784428e-06, "loss": 0.5327, "step": 12195 }, { "epoch": 0.1458171427205012, "grad_norm": 2.052978277206421, "learning_rate": 9.652471518149902e-06, "loss": 0.5818, "step": 12196 }, { "epoch": 0.14582909886536186, "grad_norm": 3.2379708290100098, "learning_rate": 9.652400591479127e-06, "loss": 0.6215, "step": 12197 }, { "epoch": 0.14584105501022251, "grad_norm": 2.7501144409179688, "learning_rate": 9.652329657832062e-06, "loss": 0.5579, "step": 12198 }, { "epoch": 0.14585301115508315, "grad_norm": 2.025057554244995, "learning_rate": 9.652258717208811e-06, "loss": 0.5273, "step": 12199 }, { "epoch": 0.1458649672999438, "grad_norm": 2.703827381134033, "learning_rate": 9.652187769609482e-06, "loss": 0.6676, "step": 12200 }, { "epoch": 0.14587692344480446, "grad_norm": 2.4866385459899902, "learning_rate": 9.652116815034181e-06, "loss": 0.6526, "step": 12201 }, { "epoch": 0.14588887958966512, "grad_norm": 2.807263135910034, "learning_rate": 9.652045853483015e-06, "loss": 0.5736, "step": 12202 }, { "epoch": 0.14590083573452575, "grad_norm": 5.477419853210449, "learning_rate": 9.651974884956088e-06, "loss": 0.5934, "step": 12203 }, { "epoch": 0.1459127918793864, "grad_norm": 5.9982476234436035, "learning_rate": 9.651903909453509e-06, "loss": 0.5373, "step": 12204 }, { "epoch": 0.14592474802424707, "grad_norm": 1.9741255044937134, "learning_rate": 9.651832926975384e-06, "loss": 0.5662, "step": 12205 }, { "epoch": 0.1459367041691077, "grad_norm": 1.8938696384429932, "learning_rate": 9.651761937521817e-06, "loss": 0.6717, "step": 12206 }, { "epoch": 0.14594866031396836, "grad_norm": 3.7289206981658936, "learning_rate": 9.651690941092919e-06, "loss": 0.5868, "step": 12207 }, { "epoch": 0.14596061645882902, "grad_norm": 2.316648006439209, "learning_rate": 9.651619937688793e-06, "loss": 0.6699, "step": 12208 }, { "epoch": 0.14597257260368968, "grad_norm": 8.760603904724121, "learning_rate": 9.651548927309546e-06, "loss": 0.5805, "step": 12209 }, { "epoch": 0.1459845287485503, "grad_norm": 4.567224025726318, "learning_rate": 9.651477909955284e-06, "loss": 0.6326, "step": 12210 }, { "epoch": 0.14599648489341097, "grad_norm": 2.2161154747009277, "learning_rate": 9.651406885626117e-06, "loss": 0.5711, "step": 12211 }, { "epoch": 0.14600844103827162, "grad_norm": 2.0435006618499756, "learning_rate": 9.651335854322146e-06, "loss": 0.5869, "step": 12212 }, { "epoch": 0.14602039718313228, "grad_norm": 2.155184030532837, "learning_rate": 9.651264816043481e-06, "loss": 0.6881, "step": 12213 }, { "epoch": 0.14603235332799291, "grad_norm": 1.5493757724761963, "learning_rate": 9.65119377079023e-06, "loss": 0.6156, "step": 12214 }, { "epoch": 0.14604430947285357, "grad_norm": 1.7664239406585693, "learning_rate": 9.651122718562496e-06, "loss": 0.6642, "step": 12215 }, { "epoch": 0.14605626561771423, "grad_norm": 1.7785416841506958, "learning_rate": 9.651051659360386e-06, "loss": 0.639, "step": 12216 }, { "epoch": 0.14606822176257486, "grad_norm": 2.720109462738037, "learning_rate": 9.65098059318401e-06, "loss": 0.6777, "step": 12217 }, { "epoch": 0.14608017790743552, "grad_norm": 2.5546255111694336, "learning_rate": 9.65090952003347e-06, "loss": 0.5699, "step": 12218 }, { "epoch": 0.14609213405229618, "grad_norm": 4.042491912841797, "learning_rate": 9.650838439908873e-06, "loss": 0.6507, "step": 12219 }, { "epoch": 0.14610409019715684, "grad_norm": 2.2517714500427246, "learning_rate": 9.650767352810331e-06, "loss": 0.6598, "step": 12220 }, { "epoch": 0.14611604634201747, "grad_norm": 19.37611198425293, "learning_rate": 9.650696258737943e-06, "loss": 0.5995, "step": 12221 }, { "epoch": 0.14612800248687813, "grad_norm": 1.9503090381622314, "learning_rate": 9.650625157691823e-06, "loss": 0.6049, "step": 12222 }, { "epoch": 0.1461399586317388, "grad_norm": 5.078479290008545, "learning_rate": 9.650554049672073e-06, "loss": 0.6219, "step": 12223 }, { "epoch": 0.14615191477659945, "grad_norm": 2.335904598236084, "learning_rate": 9.6504829346788e-06, "loss": 0.6414, "step": 12224 }, { "epoch": 0.14616387092146008, "grad_norm": 8.885790824890137, "learning_rate": 9.65041181271211e-06, "loss": 0.5855, "step": 12225 }, { "epoch": 0.14617582706632073, "grad_norm": 2.7520751953125, "learning_rate": 9.650340683772115e-06, "loss": 0.5637, "step": 12226 }, { "epoch": 0.1461877832111814, "grad_norm": 1.9807037115097046, "learning_rate": 9.650269547858915e-06, "loss": 0.6247, "step": 12227 }, { "epoch": 0.14619973935604202, "grad_norm": 2.484147787094116, "learning_rate": 9.65019840497262e-06, "loss": 0.6548, "step": 12228 }, { "epoch": 0.14621169550090268, "grad_norm": 3.3907439708709717, "learning_rate": 9.650127255113333e-06, "loss": 0.6672, "step": 12229 }, { "epoch": 0.14622365164576334, "grad_norm": 1.7532973289489746, "learning_rate": 9.650056098281166e-06, "loss": 0.5817, "step": 12230 }, { "epoch": 0.146235607790624, "grad_norm": 23.207307815551758, "learning_rate": 9.649984934476224e-06, "loss": 0.6401, "step": 12231 }, { "epoch": 0.14624756393548463, "grad_norm": 2.296457290649414, "learning_rate": 9.649913763698613e-06, "loss": 0.6196, "step": 12232 }, { "epoch": 0.1462595200803453, "grad_norm": 2.0429487228393555, "learning_rate": 9.64984258594844e-06, "loss": 0.6219, "step": 12233 }, { "epoch": 0.14627147622520595, "grad_norm": 3.725048780441284, "learning_rate": 9.64977140122581e-06, "loss": 0.6469, "step": 12234 }, { "epoch": 0.1462834323700666, "grad_norm": 2.7142221927642822, "learning_rate": 9.649700209530832e-06, "loss": 0.6031, "step": 12235 }, { "epoch": 0.14629538851492724, "grad_norm": 2.5148866176605225, "learning_rate": 9.649629010863612e-06, "loss": 0.5919, "step": 12236 }, { "epoch": 0.1463073446597879, "grad_norm": 1.799542784690857, "learning_rate": 9.649557805224257e-06, "loss": 0.6928, "step": 12237 }, { "epoch": 0.14631930080464856, "grad_norm": 2.4811861515045166, "learning_rate": 9.649486592612873e-06, "loss": 0.5846, "step": 12238 }, { "epoch": 0.1463312569495092, "grad_norm": 2.8323545455932617, "learning_rate": 9.649415373029567e-06, "loss": 0.6928, "step": 12239 }, { "epoch": 0.14634321309436985, "grad_norm": 1.9768089056015015, "learning_rate": 9.649344146474448e-06, "loss": 0.5794, "step": 12240 }, { "epoch": 0.1463551692392305, "grad_norm": 2.5514538288116455, "learning_rate": 9.649272912947619e-06, "loss": 0.6729, "step": 12241 }, { "epoch": 0.14636712538409116, "grad_norm": 4.669729232788086, "learning_rate": 9.64920167244919e-06, "loss": 0.6413, "step": 12242 }, { "epoch": 0.1463790815289518, "grad_norm": 2.4387569427490234, "learning_rate": 9.649130424979265e-06, "loss": 0.567, "step": 12243 }, { "epoch": 0.14639103767381245, "grad_norm": 1.4886194467544556, "learning_rate": 9.649059170537953e-06, "loss": 0.5862, "step": 12244 }, { "epoch": 0.1464029938186731, "grad_norm": 1.890250325202942, "learning_rate": 9.648987909125361e-06, "loss": 0.7011, "step": 12245 }, { "epoch": 0.14641494996353377, "grad_norm": 1.4077471494674683, "learning_rate": 9.648916640741595e-06, "loss": 0.4981, "step": 12246 }, { "epoch": 0.1464269061083944, "grad_norm": 3.8713221549987793, "learning_rate": 9.648845365386762e-06, "loss": 0.7856, "step": 12247 }, { "epoch": 0.14643886225325506, "grad_norm": 1.9004625082015991, "learning_rate": 9.648774083060968e-06, "loss": 0.6836, "step": 12248 }, { "epoch": 0.14645081839811572, "grad_norm": 1.5610114336013794, "learning_rate": 9.648702793764323e-06, "loss": 0.6446, "step": 12249 }, { "epoch": 0.14646277454297638, "grad_norm": 1.996567964553833, "learning_rate": 9.648631497496928e-06, "loss": 0.6457, "step": 12250 }, { "epoch": 0.146474730687837, "grad_norm": 1.4669016599655151, "learning_rate": 9.648560194258895e-06, "loss": 0.6494, "step": 12251 }, { "epoch": 0.14648668683269767, "grad_norm": 2.4442074298858643, "learning_rate": 9.648488884050331e-06, "loss": 0.5861, "step": 12252 }, { "epoch": 0.14649864297755832, "grad_norm": 2.7770590782165527, "learning_rate": 9.64841756687134e-06, "loss": 0.5651, "step": 12253 }, { "epoch": 0.14651059912241896, "grad_norm": 4.204647064208984, "learning_rate": 9.64834624272203e-06, "loss": 0.7033, "step": 12254 }, { "epoch": 0.1465225552672796, "grad_norm": 2.247649669647217, "learning_rate": 9.64827491160251e-06, "loss": 0.7223, "step": 12255 }, { "epoch": 0.14653451141214027, "grad_norm": 2.2372348308563232, "learning_rate": 9.648203573512885e-06, "loss": 0.6629, "step": 12256 }, { "epoch": 0.14654646755700093, "grad_norm": 3.9006898403167725, "learning_rate": 9.64813222845326e-06, "loss": 0.6176, "step": 12257 }, { "epoch": 0.14655842370186156, "grad_norm": 2.0366828441619873, "learning_rate": 9.648060876423746e-06, "loss": 0.6526, "step": 12258 }, { "epoch": 0.14657037984672222, "grad_norm": 2.647515296936035, "learning_rate": 9.647989517424448e-06, "loss": 0.6039, "step": 12259 }, { "epoch": 0.14658233599158288, "grad_norm": 2.833207845687866, "learning_rate": 9.647918151455473e-06, "loss": 0.5898, "step": 12260 }, { "epoch": 0.14659429213644354, "grad_norm": 2.4383151531219482, "learning_rate": 9.64784677851693e-06, "loss": 0.6061, "step": 12261 }, { "epoch": 0.14660624828130417, "grad_norm": 2.024616241455078, "learning_rate": 9.64777539860892e-06, "loss": 0.6018, "step": 12262 }, { "epoch": 0.14661820442616483, "grad_norm": 11.349255561828613, "learning_rate": 9.647704011731557e-06, "loss": 0.674, "step": 12263 }, { "epoch": 0.1466301605710255, "grad_norm": 2.113938331604004, "learning_rate": 9.647632617884946e-06, "loss": 0.7118, "step": 12264 }, { "epoch": 0.14664211671588612, "grad_norm": 2.5702269077301025, "learning_rate": 9.647561217069192e-06, "loss": 0.5622, "step": 12265 }, { "epoch": 0.14665407286074678, "grad_norm": 2.749786615371704, "learning_rate": 9.647489809284404e-06, "loss": 0.6949, "step": 12266 }, { "epoch": 0.14666602900560743, "grad_norm": 2.9114625453948975, "learning_rate": 9.647418394530689e-06, "loss": 0.7675, "step": 12267 }, { "epoch": 0.1466779851504681, "grad_norm": 1.3088443279266357, "learning_rate": 9.647346972808152e-06, "loss": 0.6468, "step": 12268 }, { "epoch": 0.14668994129532872, "grad_norm": 2.5861384868621826, "learning_rate": 9.647275544116903e-06, "loss": 0.6921, "step": 12269 }, { "epoch": 0.14670189744018938, "grad_norm": 1.992526888847351, "learning_rate": 9.647204108457049e-06, "loss": 0.6309, "step": 12270 }, { "epoch": 0.14671385358505004, "grad_norm": 5.659360885620117, "learning_rate": 9.647132665828694e-06, "loss": 0.594, "step": 12271 }, { "epoch": 0.1467258097299107, "grad_norm": 2.308206081390381, "learning_rate": 9.647061216231946e-06, "loss": 0.6372, "step": 12272 }, { "epoch": 0.14673776587477133, "grad_norm": 5.105327129364014, "learning_rate": 9.646989759666916e-06, "loss": 0.6509, "step": 12273 }, { "epoch": 0.146749722019632, "grad_norm": 2.1397507190704346, "learning_rate": 9.646918296133706e-06, "loss": 0.6334, "step": 12274 }, { "epoch": 0.14676167816449265, "grad_norm": 4.952632904052734, "learning_rate": 9.646846825632426e-06, "loss": 0.6402, "step": 12275 }, { "epoch": 0.14677363430935328, "grad_norm": 3.1983582973480225, "learning_rate": 9.646775348163185e-06, "loss": 0.632, "step": 12276 }, { "epoch": 0.14678559045421394, "grad_norm": 2.3566510677337646, "learning_rate": 9.646703863726086e-06, "loss": 0.6451, "step": 12277 }, { "epoch": 0.1467975465990746, "grad_norm": 1.8043473958969116, "learning_rate": 9.646632372321236e-06, "loss": 0.6217, "step": 12278 }, { "epoch": 0.14680950274393526, "grad_norm": 4.843038082122803, "learning_rate": 9.646560873948746e-06, "loss": 0.7341, "step": 12279 }, { "epoch": 0.14682145888879589, "grad_norm": 1.828385591506958, "learning_rate": 9.646489368608723e-06, "loss": 0.6776, "step": 12280 }, { "epoch": 0.14683341503365654, "grad_norm": 2.8430566787719727, "learning_rate": 9.646417856301271e-06, "loss": 0.5833, "step": 12281 }, { "epoch": 0.1468453711785172, "grad_norm": 1.6285334825515747, "learning_rate": 9.6463463370265e-06, "loss": 0.5995, "step": 12282 }, { "epoch": 0.14685732732337786, "grad_norm": 11.16401481628418, "learning_rate": 9.646274810784514e-06, "loss": 0.5667, "step": 12283 }, { "epoch": 0.1468692834682385, "grad_norm": 1.7113831043243408, "learning_rate": 9.646203277575425e-06, "loss": 0.6546, "step": 12284 }, { "epoch": 0.14688123961309915, "grad_norm": 1.9764800071716309, "learning_rate": 9.646131737399335e-06, "loss": 0.667, "step": 12285 }, { "epoch": 0.1468931957579598, "grad_norm": 1.8797376155853271, "learning_rate": 9.646060190256356e-06, "loss": 0.5471, "step": 12286 }, { "epoch": 0.14690515190282044, "grad_norm": 3.051722288131714, "learning_rate": 9.645988636146593e-06, "loss": 0.6624, "step": 12287 }, { "epoch": 0.1469171080476811, "grad_norm": 2.225170135498047, "learning_rate": 9.645917075070153e-06, "loss": 0.5622, "step": 12288 }, { "epoch": 0.14692906419254176, "grad_norm": 1.929086685180664, "learning_rate": 9.645845507027146e-06, "loss": 0.5746, "step": 12289 }, { "epoch": 0.14694102033740242, "grad_norm": 2.783447742462158, "learning_rate": 9.645773932017675e-06, "loss": 0.6901, "step": 12290 }, { "epoch": 0.14695297648226305, "grad_norm": 2.1976051330566406, "learning_rate": 9.64570235004185e-06, "loss": 0.6342, "step": 12291 }, { "epoch": 0.1469649326271237, "grad_norm": 2.7896645069122314, "learning_rate": 9.645630761099776e-06, "loss": 0.5926, "step": 12292 }, { "epoch": 0.14697688877198437, "grad_norm": 2.455613136291504, "learning_rate": 9.645559165191565e-06, "loss": 0.6899, "step": 12293 }, { "epoch": 0.14698884491684502, "grad_norm": 1.8376456499099731, "learning_rate": 9.645487562317319e-06, "loss": 0.567, "step": 12294 }, { "epoch": 0.14700080106170565, "grad_norm": 3.7377195358276367, "learning_rate": 9.645415952477148e-06, "loss": 0.6994, "step": 12295 }, { "epoch": 0.1470127572065663, "grad_norm": 3.847325086593628, "learning_rate": 9.645344335671162e-06, "loss": 0.6037, "step": 12296 }, { "epoch": 0.14702471335142697, "grad_norm": 1.9408539533615112, "learning_rate": 9.645272711899463e-06, "loss": 0.5574, "step": 12297 }, { "epoch": 0.14703666949628763, "grad_norm": 3.263307809829712, "learning_rate": 9.645201081162163e-06, "loss": 0.5884, "step": 12298 }, { "epoch": 0.14704862564114826, "grad_norm": 2.4767062664031982, "learning_rate": 9.645129443459368e-06, "loss": 0.6816, "step": 12299 }, { "epoch": 0.14706058178600892, "grad_norm": 5.126768112182617, "learning_rate": 9.645057798791183e-06, "loss": 0.6974, "step": 12300 }, { "epoch": 0.14707253793086958, "grad_norm": 1.8349369764328003, "learning_rate": 9.644986147157716e-06, "loss": 0.733, "step": 12301 }, { "epoch": 0.1470844940757302, "grad_norm": 3.8804080486297607, "learning_rate": 9.644914488559078e-06, "loss": 0.5435, "step": 12302 }, { "epoch": 0.14709645022059087, "grad_norm": 8.281874656677246, "learning_rate": 9.644842822995373e-06, "loss": 0.5973, "step": 12303 }, { "epoch": 0.14710840636545153, "grad_norm": 1.7449191808700562, "learning_rate": 9.644771150466711e-06, "loss": 0.5873, "step": 12304 }, { "epoch": 0.14712036251031219, "grad_norm": 1.7846555709838867, "learning_rate": 9.644699470973197e-06, "loss": 0.598, "step": 12305 }, { "epoch": 0.14713231865517282, "grad_norm": 1.735615849494934, "learning_rate": 9.644627784514941e-06, "loss": 0.5872, "step": 12306 }, { "epoch": 0.14714427480003348, "grad_norm": 144.98155212402344, "learning_rate": 9.644556091092048e-06, "loss": 0.5793, "step": 12307 }, { "epoch": 0.14715623094489413, "grad_norm": 1.8464651107788086, "learning_rate": 9.644484390704626e-06, "loss": 0.4854, "step": 12308 }, { "epoch": 0.1471681870897548, "grad_norm": 1.3765449523925781, "learning_rate": 9.644412683352783e-06, "loss": 0.4617, "step": 12309 }, { "epoch": 0.14718014323461542, "grad_norm": 3.027583122253418, "learning_rate": 9.64434096903663e-06, "loss": 0.6335, "step": 12310 }, { "epoch": 0.14719209937947608, "grad_norm": 3.241243362426758, "learning_rate": 9.644269247756268e-06, "loss": 0.6277, "step": 12311 }, { "epoch": 0.14720405552433674, "grad_norm": 2.0988614559173584, "learning_rate": 9.644197519511809e-06, "loss": 0.5957, "step": 12312 }, { "epoch": 0.14721601166919737, "grad_norm": 2.437953233718872, "learning_rate": 9.644125784303359e-06, "loss": 0.6225, "step": 12313 }, { "epoch": 0.14722796781405803, "grad_norm": 4.226492404937744, "learning_rate": 9.644054042131026e-06, "loss": 0.6381, "step": 12314 }, { "epoch": 0.1472399239589187, "grad_norm": 3.801077365875244, "learning_rate": 9.643982292994917e-06, "loss": 0.7796, "step": 12315 }, { "epoch": 0.14725188010377935, "grad_norm": 1.7404457330703735, "learning_rate": 9.64391053689514e-06, "loss": 0.651, "step": 12316 }, { "epoch": 0.14726383624863998, "grad_norm": 2.839564085006714, "learning_rate": 9.643838773831804e-06, "loss": 0.6233, "step": 12317 }, { "epoch": 0.14727579239350064, "grad_norm": 1.9852039813995361, "learning_rate": 9.643767003805013e-06, "loss": 0.6258, "step": 12318 }, { "epoch": 0.1472877485383613, "grad_norm": 1.4510409832000732, "learning_rate": 9.64369522681488e-06, "loss": 0.588, "step": 12319 }, { "epoch": 0.14729970468322195, "grad_norm": 1.9568840265274048, "learning_rate": 9.643623442861507e-06, "loss": 0.5394, "step": 12320 }, { "epoch": 0.14731166082808259, "grad_norm": 2.49253249168396, "learning_rate": 9.643551651945005e-06, "loss": 0.6078, "step": 12321 }, { "epoch": 0.14732361697294324, "grad_norm": 2.769991874694824, "learning_rate": 9.64347985406548e-06, "loss": 0.654, "step": 12322 }, { "epoch": 0.1473355731178039, "grad_norm": 3.329822301864624, "learning_rate": 9.643408049223041e-06, "loss": 0.5782, "step": 12323 }, { "epoch": 0.14734752926266453, "grad_norm": 3.6241257190704346, "learning_rate": 9.643336237417795e-06, "loss": 0.7035, "step": 12324 }, { "epoch": 0.1473594854075252, "grad_norm": 2.592251777648926, "learning_rate": 9.64326441864985e-06, "loss": 0.5639, "step": 12325 }, { "epoch": 0.14737144155238585, "grad_norm": 1.5847740173339844, "learning_rate": 9.643192592919315e-06, "loss": 0.6066, "step": 12326 }, { "epoch": 0.1473833976972465, "grad_norm": 2.796781063079834, "learning_rate": 9.643120760226294e-06, "loss": 0.6082, "step": 12327 }, { "epoch": 0.14739535384210714, "grad_norm": 3.5096492767333984, "learning_rate": 9.643048920570896e-06, "loss": 0.7001, "step": 12328 }, { "epoch": 0.1474073099869678, "grad_norm": 2.1439476013183594, "learning_rate": 9.642977073953233e-06, "loss": 0.6171, "step": 12329 }, { "epoch": 0.14741926613182846, "grad_norm": 1.612988829612732, "learning_rate": 9.642905220373406e-06, "loss": 0.5127, "step": 12330 }, { "epoch": 0.14743122227668912, "grad_norm": 1.6734795570373535, "learning_rate": 9.64283335983153e-06, "loss": 0.5569, "step": 12331 }, { "epoch": 0.14744317842154975, "grad_norm": 2.7687630653381348, "learning_rate": 9.642761492327706e-06, "loss": 0.5676, "step": 12332 }, { "epoch": 0.1474551345664104, "grad_norm": 4.591479301452637, "learning_rate": 9.642689617862044e-06, "loss": 0.6462, "step": 12333 }, { "epoch": 0.14746709071127106, "grad_norm": 2.490482807159424, "learning_rate": 9.642617736434654e-06, "loss": 0.5871, "step": 12334 }, { "epoch": 0.1474790468561317, "grad_norm": 1.6462444067001343, "learning_rate": 9.642545848045642e-06, "loss": 0.6771, "step": 12335 }, { "epoch": 0.14749100300099235, "grad_norm": 4.139084339141846, "learning_rate": 9.642473952695116e-06, "loss": 0.689, "step": 12336 }, { "epoch": 0.147502959145853, "grad_norm": 2.549785614013672, "learning_rate": 9.642402050383183e-06, "loss": 0.6223, "step": 12337 }, { "epoch": 0.14751491529071367, "grad_norm": 1.515184998512268, "learning_rate": 9.642330141109952e-06, "loss": 0.6053, "step": 12338 }, { "epoch": 0.1475268714355743, "grad_norm": 18.028331756591797, "learning_rate": 9.64225822487553e-06, "loss": 0.6007, "step": 12339 }, { "epoch": 0.14753882758043496, "grad_norm": 9.570018768310547, "learning_rate": 9.642186301680028e-06, "loss": 0.566, "step": 12340 }, { "epoch": 0.14755078372529562, "grad_norm": 1.9376156330108643, "learning_rate": 9.642114371523549e-06, "loss": 0.5928, "step": 12341 }, { "epoch": 0.14756273987015628, "grad_norm": 4.528834342956543, "learning_rate": 9.642042434406202e-06, "loss": 0.6809, "step": 12342 }, { "epoch": 0.1475746960150169, "grad_norm": 1.7886226177215576, "learning_rate": 9.641970490328098e-06, "loss": 0.5803, "step": 12343 }, { "epoch": 0.14758665215987757, "grad_norm": 3.4774982929229736, "learning_rate": 9.641898539289343e-06, "loss": 0.6342, "step": 12344 }, { "epoch": 0.14759860830473823, "grad_norm": 1.9155018329620361, "learning_rate": 9.641826581290042e-06, "loss": 0.547, "step": 12345 }, { "epoch": 0.14761056444959886, "grad_norm": 2.188443899154663, "learning_rate": 9.641754616330308e-06, "loss": 0.5176, "step": 12346 }, { "epoch": 0.14762252059445952, "grad_norm": 3.250804901123047, "learning_rate": 9.641682644410244e-06, "loss": 0.5573, "step": 12347 }, { "epoch": 0.14763447673932018, "grad_norm": 3.910067558288574, "learning_rate": 9.641610665529962e-06, "loss": 0.6913, "step": 12348 }, { "epoch": 0.14764643288418083, "grad_norm": 1.8038463592529297, "learning_rate": 9.641538679689569e-06, "loss": 0.676, "step": 12349 }, { "epoch": 0.14765838902904146, "grad_norm": 1.393418550491333, "learning_rate": 9.64146668688917e-06, "loss": 0.5934, "step": 12350 }, { "epoch": 0.14767034517390212, "grad_norm": 2.5248100757598877, "learning_rate": 9.641394687128877e-06, "loss": 0.7019, "step": 12351 }, { "epoch": 0.14768230131876278, "grad_norm": 1.6147390604019165, "learning_rate": 9.641322680408795e-06, "loss": 0.5436, "step": 12352 }, { "epoch": 0.14769425746362344, "grad_norm": 1.3388241529464722, "learning_rate": 9.641250666729035e-06, "loss": 0.6012, "step": 12353 }, { "epoch": 0.14770621360848407, "grad_norm": 3.3507015705108643, "learning_rate": 9.641178646089702e-06, "loss": 0.5735, "step": 12354 }, { "epoch": 0.14771816975334473, "grad_norm": 2.1703171730041504, "learning_rate": 9.641106618490905e-06, "loss": 0.5045, "step": 12355 }, { "epoch": 0.1477301258982054, "grad_norm": 2.7050554752349854, "learning_rate": 9.641034583932753e-06, "loss": 0.6221, "step": 12356 }, { "epoch": 0.14774208204306605, "grad_norm": 2.4028077125549316, "learning_rate": 9.640962542415351e-06, "loss": 0.58, "step": 12357 }, { "epoch": 0.14775403818792668, "grad_norm": 10.712141990661621, "learning_rate": 9.640890493938812e-06, "loss": 0.5955, "step": 12358 }, { "epoch": 0.14776599433278734, "grad_norm": 1.8407751321792603, "learning_rate": 9.64081843850324e-06, "loss": 0.6204, "step": 12359 }, { "epoch": 0.147777950477648, "grad_norm": 1.807170033454895, "learning_rate": 9.640746376108742e-06, "loss": 0.6158, "step": 12360 }, { "epoch": 0.14778990662250863, "grad_norm": 1.9326084852218628, "learning_rate": 9.640674306755433e-06, "loss": 0.4975, "step": 12361 }, { "epoch": 0.14780186276736929, "grad_norm": 2.639453411102295, "learning_rate": 9.640602230443412e-06, "loss": 0.5858, "step": 12362 }, { "epoch": 0.14781381891222994, "grad_norm": 2.3792734146118164, "learning_rate": 9.640530147172794e-06, "loss": 0.6865, "step": 12363 }, { "epoch": 0.1478257750570906, "grad_norm": 2.006394147872925, "learning_rate": 9.640458056943684e-06, "loss": 0.6047, "step": 12364 }, { "epoch": 0.14783773120195123, "grad_norm": 2.145650625228882, "learning_rate": 9.640385959756189e-06, "loss": 0.6667, "step": 12365 }, { "epoch": 0.1478496873468119, "grad_norm": 1.9690451622009277, "learning_rate": 9.640313855610422e-06, "loss": 0.6025, "step": 12366 }, { "epoch": 0.14786164349167255, "grad_norm": 2.7345736026763916, "learning_rate": 9.640241744506484e-06, "loss": 0.6059, "step": 12367 }, { "epoch": 0.1478735996365332, "grad_norm": 7.749884128570557, "learning_rate": 9.64016962644449e-06, "loss": 0.6947, "step": 12368 }, { "epoch": 0.14788555578139384, "grad_norm": 1.8846229314804077, "learning_rate": 9.640097501424544e-06, "loss": 0.6165, "step": 12369 }, { "epoch": 0.1478975119262545, "grad_norm": 2.139125108718872, "learning_rate": 9.640025369446755e-06, "loss": 0.515, "step": 12370 }, { "epoch": 0.14790946807111516, "grad_norm": 1.7274184226989746, "learning_rate": 9.639953230511232e-06, "loss": 0.6203, "step": 12371 }, { "epoch": 0.1479214242159758, "grad_norm": 1.8396995067596436, "learning_rate": 9.639881084618084e-06, "loss": 0.6134, "step": 12372 }, { "epoch": 0.14793338036083645, "grad_norm": 1.717624545097351, "learning_rate": 9.639808931767415e-06, "loss": 0.6168, "step": 12373 }, { "epoch": 0.1479453365056971, "grad_norm": 2.0808839797973633, "learning_rate": 9.639736771959338e-06, "loss": 0.5666, "step": 12374 }, { "epoch": 0.14795729265055776, "grad_norm": 3.6190595626831055, "learning_rate": 9.639664605193958e-06, "loss": 0.6714, "step": 12375 }, { "epoch": 0.1479692487954184, "grad_norm": 2.9101016521453857, "learning_rate": 9.639592431471385e-06, "loss": 0.622, "step": 12376 }, { "epoch": 0.14798120494027905, "grad_norm": 2.0680935382843018, "learning_rate": 9.639520250791727e-06, "loss": 0.61, "step": 12377 }, { "epoch": 0.1479931610851397, "grad_norm": 1.636496663093567, "learning_rate": 9.639448063155093e-06, "loss": 0.5588, "step": 12378 }, { "epoch": 0.14800511723000037, "grad_norm": 2.0985586643218994, "learning_rate": 9.639375868561588e-06, "loss": 0.6105, "step": 12379 }, { "epoch": 0.148017073374861, "grad_norm": 1.986377239227295, "learning_rate": 9.639303667011323e-06, "loss": 0.6974, "step": 12380 }, { "epoch": 0.14802902951972166, "grad_norm": 2.0354509353637695, "learning_rate": 9.639231458504405e-06, "loss": 0.6628, "step": 12381 }, { "epoch": 0.14804098566458232, "grad_norm": 1.8110437393188477, "learning_rate": 9.639159243040946e-06, "loss": 0.6449, "step": 12382 }, { "epoch": 0.14805294180944295, "grad_norm": 1.8502854108810425, "learning_rate": 9.639087020621048e-06, "loss": 0.5302, "step": 12383 }, { "epoch": 0.1480648979543036, "grad_norm": 2.434206247329712, "learning_rate": 9.639014791244823e-06, "loss": 0.5688, "step": 12384 }, { "epoch": 0.14807685409916427, "grad_norm": 3.1172614097595215, "learning_rate": 9.63894255491238e-06, "loss": 0.7538, "step": 12385 }, { "epoch": 0.14808881024402493, "grad_norm": 1.807085394859314, "learning_rate": 9.638870311623824e-06, "loss": 0.5893, "step": 12386 }, { "epoch": 0.14810076638888556, "grad_norm": 2.901029348373413, "learning_rate": 9.638798061379268e-06, "loss": 0.5984, "step": 12387 }, { "epoch": 0.14811272253374622, "grad_norm": 1.883047103881836, "learning_rate": 9.638725804178816e-06, "loss": 0.6461, "step": 12388 }, { "epoch": 0.14812467867860687, "grad_norm": 2.354116678237915, "learning_rate": 9.63865354002258e-06, "loss": 0.5426, "step": 12389 }, { "epoch": 0.14813663482346753, "grad_norm": 1.737852692604065, "learning_rate": 9.638581268910664e-06, "loss": 0.6492, "step": 12390 }, { "epoch": 0.14814859096832816, "grad_norm": 2.1992435455322266, "learning_rate": 9.638508990843181e-06, "loss": 0.6289, "step": 12391 }, { "epoch": 0.14816054711318882, "grad_norm": 4.518675327301025, "learning_rate": 9.638436705820238e-06, "loss": 0.5695, "step": 12392 }, { "epoch": 0.14817250325804948, "grad_norm": 1.9738092422485352, "learning_rate": 9.63836441384194e-06, "loss": 0.6757, "step": 12393 }, { "epoch": 0.1481844594029101, "grad_norm": 7.16920804977417, "learning_rate": 9.6382921149084e-06, "loss": 0.6917, "step": 12394 }, { "epoch": 0.14819641554777077, "grad_norm": 20.234180450439453, "learning_rate": 9.638219809019724e-06, "loss": 0.6501, "step": 12395 }, { "epoch": 0.14820837169263143, "grad_norm": 3.1323800086975098, "learning_rate": 9.63814749617602e-06, "loss": 0.545, "step": 12396 }, { "epoch": 0.1482203278374921, "grad_norm": 3.8017611503601074, "learning_rate": 9.6380751763774e-06, "loss": 0.5464, "step": 12397 }, { "epoch": 0.14823228398235272, "grad_norm": 1.6113656759262085, "learning_rate": 9.638002849623967e-06, "loss": 0.6564, "step": 12398 }, { "epoch": 0.14824424012721338, "grad_norm": 9.052617073059082, "learning_rate": 9.637930515915834e-06, "loss": 0.6387, "step": 12399 }, { "epoch": 0.14825619627207404, "grad_norm": 2.3273744583129883, "learning_rate": 9.637858175253105e-06, "loss": 0.7138, "step": 12400 }, { "epoch": 0.1482681524169347, "grad_norm": 6.923782825469971, "learning_rate": 9.637785827635895e-06, "loss": 0.6912, "step": 12401 }, { "epoch": 0.14828010856179533, "grad_norm": 1.6546000242233276, "learning_rate": 9.637713473064304e-06, "loss": 0.6602, "step": 12402 }, { "epoch": 0.14829206470665598, "grad_norm": 19.65342903137207, "learning_rate": 9.63764111153845e-06, "loss": 0.5627, "step": 12403 }, { "epoch": 0.14830402085151664, "grad_norm": 2.4274628162384033, "learning_rate": 9.637568743058435e-06, "loss": 0.5413, "step": 12404 }, { "epoch": 0.14831597699637727, "grad_norm": 3.9307878017425537, "learning_rate": 9.637496367624369e-06, "loss": 0.6098, "step": 12405 }, { "epoch": 0.14832793314123793, "grad_norm": 2.1768798828125, "learning_rate": 9.637423985236359e-06, "loss": 0.6926, "step": 12406 }, { "epoch": 0.1483398892860986, "grad_norm": 5.217077255249023, "learning_rate": 9.637351595894517e-06, "loss": 0.6526, "step": 12407 }, { "epoch": 0.14835184543095925, "grad_norm": 2.734431505203247, "learning_rate": 9.63727919959895e-06, "loss": 0.5908, "step": 12408 }, { "epoch": 0.14836380157581988, "grad_norm": 2.0874409675598145, "learning_rate": 9.637206796349765e-06, "loss": 0.6489, "step": 12409 }, { "epoch": 0.14837575772068054, "grad_norm": 2.116809844970703, "learning_rate": 9.637134386147073e-06, "loss": 0.5729, "step": 12410 }, { "epoch": 0.1483877138655412, "grad_norm": 2.0838773250579834, "learning_rate": 9.637061968990982e-06, "loss": 0.4909, "step": 12411 }, { "epoch": 0.14839967001040186, "grad_norm": 4.630552291870117, "learning_rate": 9.636989544881598e-06, "loss": 0.6025, "step": 12412 }, { "epoch": 0.1484116261552625, "grad_norm": 3.284698486328125, "learning_rate": 9.636917113819035e-06, "loss": 0.7147, "step": 12413 }, { "epoch": 0.14842358230012315, "grad_norm": 2.1399734020233154, "learning_rate": 9.636844675803397e-06, "loss": 0.7668, "step": 12414 }, { "epoch": 0.1484355384449838, "grad_norm": 1.9065699577331543, "learning_rate": 9.636772230834792e-06, "loss": 0.6623, "step": 12415 }, { "epoch": 0.14844749458984446, "grad_norm": 2.0781195163726807, "learning_rate": 9.636699778913332e-06, "loss": 0.633, "step": 12416 }, { "epoch": 0.1484594507347051, "grad_norm": 2.222745656967163, "learning_rate": 9.636627320039123e-06, "loss": 0.5898, "step": 12417 }, { "epoch": 0.14847140687956575, "grad_norm": 1.8598474264144897, "learning_rate": 9.636554854212276e-06, "loss": 0.6669, "step": 12418 }, { "epoch": 0.1484833630244264, "grad_norm": 1.8094873428344727, "learning_rate": 9.6364823814329e-06, "loss": 0.6439, "step": 12419 }, { "epoch": 0.14849531916928704, "grad_norm": 1.7454577684402466, "learning_rate": 9.6364099017011e-06, "loss": 0.5551, "step": 12420 }, { "epoch": 0.1485072753141477, "grad_norm": 3.8071556091308594, "learning_rate": 9.636337415016987e-06, "loss": 0.6202, "step": 12421 }, { "epoch": 0.14851923145900836, "grad_norm": 3.030641794204712, "learning_rate": 9.63626492138067e-06, "loss": 0.562, "step": 12422 }, { "epoch": 0.14853118760386902, "grad_norm": 7.841597557067871, "learning_rate": 9.636192420792257e-06, "loss": 0.6526, "step": 12423 }, { "epoch": 0.14854314374872965, "grad_norm": 1.917091727256775, "learning_rate": 9.636119913251858e-06, "loss": 0.5884, "step": 12424 }, { "epoch": 0.1485550998935903, "grad_norm": 2.3810129165649414, "learning_rate": 9.63604739875958e-06, "loss": 0.635, "step": 12425 }, { "epoch": 0.14856705603845097, "grad_norm": 2.4234061241149902, "learning_rate": 9.635974877315533e-06, "loss": 0.7306, "step": 12426 }, { "epoch": 0.14857901218331163, "grad_norm": 1.8564033508300781, "learning_rate": 9.635902348919824e-06, "loss": 0.7025, "step": 12427 }, { "epoch": 0.14859096832817226, "grad_norm": 3.6599621772766113, "learning_rate": 9.635829813572564e-06, "loss": 0.6164, "step": 12428 }, { "epoch": 0.14860292447303292, "grad_norm": 3.151054620742798, "learning_rate": 9.63575727127386e-06, "loss": 0.7585, "step": 12429 }, { "epoch": 0.14861488061789357, "grad_norm": 1.8834872245788574, "learning_rate": 9.635684722023821e-06, "loss": 0.6331, "step": 12430 }, { "epoch": 0.1486268367627542, "grad_norm": 1.823519229888916, "learning_rate": 9.635612165822558e-06, "loss": 0.6942, "step": 12431 }, { "epoch": 0.14863879290761486, "grad_norm": 1.6314629316329956, "learning_rate": 9.635539602670177e-06, "loss": 0.6143, "step": 12432 }, { "epoch": 0.14865074905247552, "grad_norm": 2.630303144454956, "learning_rate": 9.635467032566788e-06, "loss": 0.6327, "step": 12433 }, { "epoch": 0.14866270519733618, "grad_norm": 2.0802125930786133, "learning_rate": 9.6353944555125e-06, "loss": 0.6067, "step": 12434 }, { "epoch": 0.1486746613421968, "grad_norm": 5.811675071716309, "learning_rate": 9.635321871507422e-06, "loss": 0.6443, "step": 12435 }, { "epoch": 0.14868661748705747, "grad_norm": 1.6204098463058472, "learning_rate": 9.635249280551662e-06, "loss": 0.5524, "step": 12436 }, { "epoch": 0.14869857363191813, "grad_norm": 2.448615312576294, "learning_rate": 9.63517668264533e-06, "loss": 0.7175, "step": 12437 }, { "epoch": 0.1487105297767788, "grad_norm": 2.31040096282959, "learning_rate": 9.63510407778853e-06, "loss": 0.6614, "step": 12438 }, { "epoch": 0.14872248592163942, "grad_norm": 2.928218126296997, "learning_rate": 9.63503146598138e-06, "loss": 0.6772, "step": 12439 }, { "epoch": 0.14873444206650008, "grad_norm": 2.1594786643981934, "learning_rate": 9.63495884722398e-06, "loss": 0.6704, "step": 12440 }, { "epoch": 0.14874639821136074, "grad_norm": 1.642669439315796, "learning_rate": 9.634886221516446e-06, "loss": 0.5857, "step": 12441 }, { "epoch": 0.14875835435622137, "grad_norm": 2.5279698371887207, "learning_rate": 9.634813588858882e-06, "loss": 0.6867, "step": 12442 }, { "epoch": 0.14877031050108203, "grad_norm": 3.3196122646331787, "learning_rate": 9.634740949251398e-06, "loss": 0.5594, "step": 12443 }, { "epoch": 0.14878226664594268, "grad_norm": 4.460806369781494, "learning_rate": 9.634668302694105e-06, "loss": 0.6786, "step": 12444 }, { "epoch": 0.14879422279080334, "grad_norm": 1.5093098878860474, "learning_rate": 9.63459564918711e-06, "loss": 0.5669, "step": 12445 }, { "epoch": 0.14880617893566397, "grad_norm": 3.076258897781372, "learning_rate": 9.634522988730522e-06, "loss": 0.668, "step": 12446 }, { "epoch": 0.14881813508052463, "grad_norm": 1.7646448612213135, "learning_rate": 9.63445032132445e-06, "loss": 0.6461, "step": 12447 }, { "epoch": 0.1488300912253853, "grad_norm": 1.9873040914535522, "learning_rate": 9.634377646969002e-06, "loss": 0.6818, "step": 12448 }, { "epoch": 0.14884204737024595, "grad_norm": 5.026721000671387, "learning_rate": 9.634304965664292e-06, "loss": 0.663, "step": 12449 }, { "epoch": 0.14885400351510658, "grad_norm": 2.474055528640747, "learning_rate": 9.634232277410423e-06, "loss": 0.6193, "step": 12450 }, { "epoch": 0.14886595965996724, "grad_norm": 1.8851449489593506, "learning_rate": 9.634159582207505e-06, "loss": 0.6982, "step": 12451 }, { "epoch": 0.1488779158048279, "grad_norm": 2.25059175491333, "learning_rate": 9.63408688005565e-06, "loss": 0.614, "step": 12452 }, { "epoch": 0.14888987194968853, "grad_norm": 3.7491631507873535, "learning_rate": 9.634014170954964e-06, "loss": 0.6384, "step": 12453 }, { "epoch": 0.1489018280945492, "grad_norm": 1.9875942468643188, "learning_rate": 9.633941454905558e-06, "loss": 0.5459, "step": 12454 }, { "epoch": 0.14891378423940985, "grad_norm": 2.9950876235961914, "learning_rate": 9.633868731907541e-06, "loss": 0.5303, "step": 12455 }, { "epoch": 0.1489257403842705, "grad_norm": 4.0374908447265625, "learning_rate": 9.63379600196102e-06, "loss": 0.5213, "step": 12456 }, { "epoch": 0.14893769652913114, "grad_norm": 3.307770252227783, "learning_rate": 9.633723265066106e-06, "loss": 0.5509, "step": 12457 }, { "epoch": 0.1489496526739918, "grad_norm": 8.65832805633545, "learning_rate": 9.633650521222909e-06, "loss": 0.5714, "step": 12458 }, { "epoch": 0.14896160881885245, "grad_norm": 2.2963719367980957, "learning_rate": 9.633577770431535e-06, "loss": 0.533, "step": 12459 }, { "epoch": 0.1489735649637131, "grad_norm": 4.606659889221191, "learning_rate": 9.633505012692095e-06, "loss": 0.5627, "step": 12460 }, { "epoch": 0.14898552110857374, "grad_norm": 1.7488226890563965, "learning_rate": 9.633432248004697e-06, "loss": 0.6208, "step": 12461 }, { "epoch": 0.1489974772534344, "grad_norm": 1.9346673488616943, "learning_rate": 9.633359476369452e-06, "loss": 0.6314, "step": 12462 }, { "epoch": 0.14900943339829506, "grad_norm": 2.1313936710357666, "learning_rate": 9.633286697786467e-06, "loss": 0.5999, "step": 12463 }, { "epoch": 0.1490213895431557, "grad_norm": 1.710364580154419, "learning_rate": 9.633213912255851e-06, "loss": 0.5905, "step": 12464 }, { "epoch": 0.14903334568801635, "grad_norm": 3.5772571563720703, "learning_rate": 9.633141119777718e-06, "loss": 0.674, "step": 12465 }, { "epoch": 0.149045301832877, "grad_norm": 1.735060214996338, "learning_rate": 9.63306832035217e-06, "loss": 0.5633, "step": 12466 }, { "epoch": 0.14905725797773767, "grad_norm": 2.567551374435425, "learning_rate": 9.632995513979322e-06, "loss": 0.6493, "step": 12467 }, { "epoch": 0.1490692141225983, "grad_norm": 3.5277998447418213, "learning_rate": 9.63292270065928e-06, "loss": 0.5723, "step": 12468 }, { "epoch": 0.14908117026745896, "grad_norm": 4.070736885070801, "learning_rate": 9.632849880392152e-06, "loss": 0.5548, "step": 12469 }, { "epoch": 0.14909312641231962, "grad_norm": 2.4151477813720703, "learning_rate": 9.632777053178052e-06, "loss": 0.6787, "step": 12470 }, { "epoch": 0.14910508255718027, "grad_norm": 1.5766592025756836, "learning_rate": 9.632704219017083e-06, "loss": 0.5774, "step": 12471 }, { "epoch": 0.1491170387020409, "grad_norm": 2.8798937797546387, "learning_rate": 9.63263137790936e-06, "loss": 0.6437, "step": 12472 }, { "epoch": 0.14912899484690156, "grad_norm": 2.1755776405334473, "learning_rate": 9.63255852985499e-06, "loss": 0.6222, "step": 12473 }, { "epoch": 0.14914095099176222, "grad_norm": 3.2638607025146484, "learning_rate": 9.632485674854082e-06, "loss": 0.472, "step": 12474 }, { "epoch": 0.14915290713662288, "grad_norm": 1.3447911739349365, "learning_rate": 9.632412812906744e-06, "loss": 0.6167, "step": 12475 }, { "epoch": 0.1491648632814835, "grad_norm": 4.010525226593018, "learning_rate": 9.632339944013088e-06, "loss": 0.611, "step": 12476 }, { "epoch": 0.14917681942634417, "grad_norm": 2.029519557952881, "learning_rate": 9.63226706817322e-06, "loss": 0.6103, "step": 12477 }, { "epoch": 0.14918877557120483, "grad_norm": 5.98759126663208, "learning_rate": 9.632194185387254e-06, "loss": 0.5233, "step": 12478 }, { "epoch": 0.14920073171606546, "grad_norm": 4.943129539489746, "learning_rate": 9.632121295655293e-06, "loss": 0.6204, "step": 12479 }, { "epoch": 0.14921268786092612, "grad_norm": 6.273941516876221, "learning_rate": 9.632048398977451e-06, "loss": 0.6979, "step": 12480 }, { "epoch": 0.14922464400578678, "grad_norm": 5.500368118286133, "learning_rate": 9.631975495353836e-06, "loss": 0.6506, "step": 12481 }, { "epoch": 0.14923660015064744, "grad_norm": 17.0708065032959, "learning_rate": 9.631902584784558e-06, "loss": 0.6037, "step": 12482 }, { "epoch": 0.14924855629550807, "grad_norm": 2.0894293785095215, "learning_rate": 9.631829667269726e-06, "loss": 0.6487, "step": 12483 }, { "epoch": 0.14926051244036873, "grad_norm": 1.9711215496063232, "learning_rate": 9.631756742809446e-06, "loss": 0.7024, "step": 12484 }, { "epoch": 0.14927246858522938, "grad_norm": 3.7077741622924805, "learning_rate": 9.631683811403833e-06, "loss": 0.57, "step": 12485 }, { "epoch": 0.14928442473009004, "grad_norm": 4.299538612365723, "learning_rate": 9.631610873052991e-06, "loss": 0.7265, "step": 12486 }, { "epoch": 0.14929638087495067, "grad_norm": 1.5792657136917114, "learning_rate": 9.631537927757036e-06, "loss": 0.5579, "step": 12487 }, { "epoch": 0.14930833701981133, "grad_norm": 1.870671272277832, "learning_rate": 9.63146497551607e-06, "loss": 0.5727, "step": 12488 }, { "epoch": 0.149320293164672, "grad_norm": 2.1849193572998047, "learning_rate": 9.631392016330208e-06, "loss": 0.6096, "step": 12489 }, { "epoch": 0.14933224930953262, "grad_norm": 4.248756408691406, "learning_rate": 9.631319050199557e-06, "loss": 0.5357, "step": 12490 }, { "epoch": 0.14934420545439328, "grad_norm": 2.0609920024871826, "learning_rate": 9.631246077124226e-06, "loss": 0.5907, "step": 12491 }, { "epoch": 0.14935616159925394, "grad_norm": 7.343037128448486, "learning_rate": 9.631173097104324e-06, "loss": 0.6225, "step": 12492 }, { "epoch": 0.1493681177441146, "grad_norm": 2.0933589935302734, "learning_rate": 9.631100110139962e-06, "loss": 0.6222, "step": 12493 }, { "epoch": 0.14938007388897523, "grad_norm": 1.9504282474517822, "learning_rate": 9.63102711623125e-06, "loss": 0.7758, "step": 12494 }, { "epoch": 0.1493920300338359, "grad_norm": 2.0896873474121094, "learning_rate": 9.630954115378295e-06, "loss": 0.6152, "step": 12495 }, { "epoch": 0.14940398617869655, "grad_norm": 1.310685396194458, "learning_rate": 9.63088110758121e-06, "loss": 0.5694, "step": 12496 }, { "epoch": 0.1494159423235572, "grad_norm": 2.0396673679351807, "learning_rate": 9.630808092840101e-06, "loss": 0.5879, "step": 12497 }, { "epoch": 0.14942789846841784, "grad_norm": 2.2501208782196045, "learning_rate": 9.630735071155077e-06, "loss": 0.6416, "step": 12498 }, { "epoch": 0.1494398546132785, "grad_norm": 5.265336036682129, "learning_rate": 9.630662042526252e-06, "loss": 0.6257, "step": 12499 }, { "epoch": 0.14945181075813915, "grad_norm": 6.525341510772705, "learning_rate": 9.630589006953733e-06, "loss": 0.6685, "step": 12500 }, { "epoch": 0.14946376690299978, "grad_norm": 1.9800543785095215, "learning_rate": 9.630515964437627e-06, "loss": 0.6556, "step": 12501 }, { "epoch": 0.14947572304786044, "grad_norm": 3.039193868637085, "learning_rate": 9.630442914978046e-06, "loss": 0.5208, "step": 12502 }, { "epoch": 0.1494876791927211, "grad_norm": 2.3135690689086914, "learning_rate": 9.630369858575101e-06, "loss": 0.6383, "step": 12503 }, { "epoch": 0.14949963533758176, "grad_norm": 2.578392267227173, "learning_rate": 9.630296795228899e-06, "loss": 0.6624, "step": 12504 }, { "epoch": 0.1495115914824424, "grad_norm": 2.440377950668335, "learning_rate": 9.63022372493955e-06, "loss": 0.6459, "step": 12505 }, { "epoch": 0.14952354762730305, "grad_norm": 3.5462472438812256, "learning_rate": 9.630150647707167e-06, "loss": 0.6459, "step": 12506 }, { "epoch": 0.1495355037721637, "grad_norm": 2.038532018661499, "learning_rate": 9.630077563531855e-06, "loss": 0.5746, "step": 12507 }, { "epoch": 0.14954745991702437, "grad_norm": 2.7455952167510986, "learning_rate": 9.630004472413722e-06, "loss": 0.5291, "step": 12508 }, { "epoch": 0.149559416061885, "grad_norm": 2.012985944747925, "learning_rate": 9.629931374352886e-06, "loss": 0.7119, "step": 12509 }, { "epoch": 0.14957137220674566, "grad_norm": 1.9256261587142944, "learning_rate": 9.629858269349448e-06, "loss": 0.6484, "step": 12510 }, { "epoch": 0.14958332835160632, "grad_norm": 1.5410363674163818, "learning_rate": 9.629785157403522e-06, "loss": 0.6365, "step": 12511 }, { "epoch": 0.14959528449646695, "grad_norm": 2.626016139984131, "learning_rate": 9.629712038515218e-06, "loss": 0.7474, "step": 12512 }, { "epoch": 0.1496072406413276, "grad_norm": 4.996392726898193, "learning_rate": 9.629638912684642e-06, "loss": 0.6372, "step": 12513 }, { "epoch": 0.14961919678618826, "grad_norm": 1.7569615840911865, "learning_rate": 9.629565779911906e-06, "loss": 0.5451, "step": 12514 }, { "epoch": 0.14963115293104892, "grad_norm": 5.184755802154541, "learning_rate": 9.629492640197122e-06, "loss": 0.6067, "step": 12515 }, { "epoch": 0.14964310907590955, "grad_norm": 5.000289440155029, "learning_rate": 9.629419493540396e-06, "loss": 0.5921, "step": 12516 }, { "epoch": 0.1496550652207702, "grad_norm": 2.6846046447753906, "learning_rate": 9.629346339941839e-06, "loss": 0.6147, "step": 12517 }, { "epoch": 0.14966702136563087, "grad_norm": 1.9734262228012085, "learning_rate": 9.629273179401562e-06, "loss": 0.6263, "step": 12518 }, { "epoch": 0.14967897751049153, "grad_norm": 19.4126033782959, "learning_rate": 9.629200011919672e-06, "loss": 0.5834, "step": 12519 }, { "epoch": 0.14969093365535216, "grad_norm": 3.4234225749969482, "learning_rate": 9.62912683749628e-06, "loss": 0.5717, "step": 12520 }, { "epoch": 0.14970288980021282, "grad_norm": 1.7363955974578857, "learning_rate": 9.629053656131496e-06, "loss": 0.5894, "step": 12521 }, { "epoch": 0.14971484594507348, "grad_norm": 3.000821113586426, "learning_rate": 9.62898046782543e-06, "loss": 0.5962, "step": 12522 }, { "epoch": 0.1497268020899341, "grad_norm": 2.3757355213165283, "learning_rate": 9.628907272578192e-06, "loss": 0.6048, "step": 12523 }, { "epoch": 0.14973875823479477, "grad_norm": 3.249439001083374, "learning_rate": 9.62883407038989e-06, "loss": 0.598, "step": 12524 }, { "epoch": 0.14975071437965543, "grad_norm": 6.317228317260742, "learning_rate": 9.628760861260635e-06, "loss": 0.6453, "step": 12525 }, { "epoch": 0.14976267052451608, "grad_norm": 1.9457124471664429, "learning_rate": 9.628687645190536e-06, "loss": 0.5798, "step": 12526 }, { "epoch": 0.14977462666937671, "grad_norm": 29.68307876586914, "learning_rate": 9.628614422179704e-06, "loss": 0.6148, "step": 12527 }, { "epoch": 0.14978658281423737, "grad_norm": 2.5040321350097656, "learning_rate": 9.628541192228249e-06, "loss": 0.5461, "step": 12528 }, { "epoch": 0.14979853895909803, "grad_norm": 4.332046031951904, "learning_rate": 9.62846795533628e-06, "loss": 0.575, "step": 12529 }, { "epoch": 0.1498104951039587, "grad_norm": 1.8590620756149292, "learning_rate": 9.628394711503906e-06, "loss": 0.6357, "step": 12530 }, { "epoch": 0.14982245124881932, "grad_norm": 12.475417137145996, "learning_rate": 9.628321460731239e-06, "loss": 0.5873, "step": 12531 }, { "epoch": 0.14983440739367998, "grad_norm": 5.933927536010742, "learning_rate": 9.628248203018385e-06, "loss": 0.583, "step": 12532 }, { "epoch": 0.14984636353854064, "grad_norm": 2.376710891723633, "learning_rate": 9.628174938365458e-06, "loss": 0.7174, "step": 12533 }, { "epoch": 0.1498583196834013, "grad_norm": 2.169128894805908, "learning_rate": 9.628101666772565e-06, "loss": 0.602, "step": 12534 }, { "epoch": 0.14987027582826193, "grad_norm": 3.4540293216705322, "learning_rate": 9.628028388239818e-06, "loss": 0.5485, "step": 12535 }, { "epoch": 0.1498822319731226, "grad_norm": 2.1252458095550537, "learning_rate": 9.627955102767328e-06, "loss": 0.6014, "step": 12536 }, { "epoch": 0.14989418811798325, "grad_norm": 1.8708142042160034, "learning_rate": 9.6278818103552e-06, "loss": 0.5868, "step": 12537 }, { "epoch": 0.14990614426284388, "grad_norm": 3.1591734886169434, "learning_rate": 9.627808511003549e-06, "loss": 0.5974, "step": 12538 }, { "epoch": 0.14991810040770454, "grad_norm": 3.020127058029175, "learning_rate": 9.627735204712481e-06, "loss": 0.632, "step": 12539 }, { "epoch": 0.1499300565525652, "grad_norm": 2.8198349475860596, "learning_rate": 9.627661891482107e-06, "loss": 0.7603, "step": 12540 }, { "epoch": 0.14994201269742585, "grad_norm": 2.6998980045318604, "learning_rate": 9.62758857131254e-06, "loss": 0.6267, "step": 12541 }, { "epoch": 0.14995396884228648, "grad_norm": 3.1341800689697266, "learning_rate": 9.627515244203886e-06, "loss": 0.6159, "step": 12542 }, { "epoch": 0.14996592498714714, "grad_norm": 7.449032306671143, "learning_rate": 9.627441910156257e-06, "loss": 0.7077, "step": 12543 }, { "epoch": 0.1499778811320078, "grad_norm": 1.7688554525375366, "learning_rate": 9.627368569169762e-06, "loss": 0.5817, "step": 12544 }, { "epoch": 0.14998983727686846, "grad_norm": 2.124858856201172, "learning_rate": 9.627295221244511e-06, "loss": 0.5834, "step": 12545 }, { "epoch": 0.1500017934217291, "grad_norm": 4.217347145080566, "learning_rate": 9.627221866380614e-06, "loss": 0.7022, "step": 12546 }, { "epoch": 0.15001374956658975, "grad_norm": 2.044374465942383, "learning_rate": 9.627148504578183e-06, "loss": 0.6907, "step": 12547 }, { "epoch": 0.1500257057114504, "grad_norm": 2.29514479637146, "learning_rate": 9.627075135837325e-06, "loss": 0.586, "step": 12548 }, { "epoch": 0.15003766185631104, "grad_norm": 1.892309308052063, "learning_rate": 9.627001760158152e-06, "loss": 0.6707, "step": 12549 }, { "epoch": 0.1500496180011717, "grad_norm": 1.9279148578643799, "learning_rate": 9.626928377540773e-06, "loss": 0.6549, "step": 12550 }, { "epoch": 0.15006157414603236, "grad_norm": 2.786343574523926, "learning_rate": 9.6268549879853e-06, "loss": 0.5633, "step": 12551 }, { "epoch": 0.15007353029089301, "grad_norm": 3.3091421127319336, "learning_rate": 9.626781591491839e-06, "loss": 0.627, "step": 12552 }, { "epoch": 0.15008548643575365, "grad_norm": 3.3603124618530273, "learning_rate": 9.626708188060504e-06, "loss": 0.6047, "step": 12553 }, { "epoch": 0.1500974425806143, "grad_norm": 5.721898078918457, "learning_rate": 9.626634777691403e-06, "loss": 0.5789, "step": 12554 }, { "epoch": 0.15010939872547496, "grad_norm": 2.2039480209350586, "learning_rate": 9.626561360384647e-06, "loss": 0.6193, "step": 12555 }, { "epoch": 0.15012135487033562, "grad_norm": 7.829267978668213, "learning_rate": 9.626487936140347e-06, "loss": 0.5407, "step": 12556 }, { "epoch": 0.15013331101519625, "grad_norm": 1.7822914123535156, "learning_rate": 9.62641450495861e-06, "loss": 0.5768, "step": 12557 }, { "epoch": 0.1501452671600569, "grad_norm": 2.2143936157226562, "learning_rate": 9.626341066839548e-06, "loss": 0.6702, "step": 12558 }, { "epoch": 0.15015722330491757, "grad_norm": 2.1436550617218018, "learning_rate": 9.626267621783272e-06, "loss": 0.5631, "step": 12559 }, { "epoch": 0.1501691794497782, "grad_norm": 3.5254404544830322, "learning_rate": 9.626194169789892e-06, "loss": 0.6212, "step": 12560 }, { "epoch": 0.15018113559463886, "grad_norm": 4.891641616821289, "learning_rate": 9.626120710859517e-06, "loss": 0.5784, "step": 12561 }, { "epoch": 0.15019309173949952, "grad_norm": 2.359405755996704, "learning_rate": 9.626047244992256e-06, "loss": 0.6204, "step": 12562 }, { "epoch": 0.15020504788436018, "grad_norm": 3.717867136001587, "learning_rate": 9.625973772188222e-06, "loss": 0.6449, "step": 12563 }, { "epoch": 0.1502170040292208, "grad_norm": 6.414705753326416, "learning_rate": 9.625900292447524e-06, "loss": 0.5583, "step": 12564 }, { "epoch": 0.15022896017408147, "grad_norm": 2.185304880142212, "learning_rate": 9.625826805770272e-06, "loss": 0.5921, "step": 12565 }, { "epoch": 0.15024091631894212, "grad_norm": 2.9191477298736572, "learning_rate": 9.625753312156576e-06, "loss": 0.68, "step": 12566 }, { "epoch": 0.15025287246380278, "grad_norm": 7.350907325744629, "learning_rate": 9.625679811606546e-06, "loss": 0.598, "step": 12567 }, { "epoch": 0.15026482860866341, "grad_norm": 2.045257568359375, "learning_rate": 9.625606304120295e-06, "loss": 0.5572, "step": 12568 }, { "epoch": 0.15027678475352407, "grad_norm": 2.565277099609375, "learning_rate": 9.625532789697928e-06, "loss": 0.657, "step": 12569 }, { "epoch": 0.15028874089838473, "grad_norm": 2.0452287197113037, "learning_rate": 9.625459268339559e-06, "loss": 0.5328, "step": 12570 }, { "epoch": 0.15030069704324536, "grad_norm": 3.0931310653686523, "learning_rate": 9.625385740045298e-06, "loss": 0.6355, "step": 12571 }, { "epoch": 0.15031265318810602, "grad_norm": 3.0228798389434814, "learning_rate": 9.625312204815254e-06, "loss": 0.6233, "step": 12572 }, { "epoch": 0.15032460933296668, "grad_norm": 4.855648517608643, "learning_rate": 9.62523866264954e-06, "loss": 0.6194, "step": 12573 }, { "epoch": 0.15033656547782734, "grad_norm": 1.638754963874817, "learning_rate": 9.62516511354826e-06, "loss": 0.6172, "step": 12574 }, { "epoch": 0.15034852162268797, "grad_norm": 2.7073745727539062, "learning_rate": 9.625091557511532e-06, "loss": 0.6539, "step": 12575 }, { "epoch": 0.15036047776754863, "grad_norm": 2.2196555137634277, "learning_rate": 9.625017994539461e-06, "loss": 0.5806, "step": 12576 }, { "epoch": 0.1503724339124093, "grad_norm": 2.187113046646118, "learning_rate": 9.62494442463216e-06, "loss": 0.6054, "step": 12577 }, { "epoch": 0.15038439005726995, "grad_norm": 3.1580073833465576, "learning_rate": 9.624870847789739e-06, "loss": 0.6274, "step": 12578 }, { "epoch": 0.15039634620213058, "grad_norm": 2.805002450942993, "learning_rate": 9.624797264012307e-06, "loss": 0.5756, "step": 12579 }, { "epoch": 0.15040830234699124, "grad_norm": 2.4228768348693848, "learning_rate": 9.624723673299973e-06, "loss": 0.6558, "step": 12580 }, { "epoch": 0.1504202584918519, "grad_norm": 2.6360559463500977, "learning_rate": 9.624650075652852e-06, "loss": 0.6193, "step": 12581 }, { "epoch": 0.15043221463671252, "grad_norm": 2.8783857822418213, "learning_rate": 9.624576471071052e-06, "loss": 0.585, "step": 12582 }, { "epoch": 0.15044417078157318, "grad_norm": 2.6625845432281494, "learning_rate": 9.62450285955468e-06, "loss": 0.6494, "step": 12583 }, { "epoch": 0.15045612692643384, "grad_norm": 2.5921568870544434, "learning_rate": 9.624429241103852e-06, "loss": 0.6514, "step": 12584 }, { "epoch": 0.1504680830712945, "grad_norm": 4.212170124053955, "learning_rate": 9.624355615718677e-06, "loss": 0.5447, "step": 12585 }, { "epoch": 0.15048003921615513, "grad_norm": 3.782052516937256, "learning_rate": 9.624281983399263e-06, "loss": 0.5942, "step": 12586 }, { "epoch": 0.1504919953610158, "grad_norm": 1.729050874710083, "learning_rate": 9.624208344145721e-06, "loss": 0.5325, "step": 12587 }, { "epoch": 0.15050395150587645, "grad_norm": 2.83213210105896, "learning_rate": 9.624134697958164e-06, "loss": 0.6978, "step": 12588 }, { "epoch": 0.1505159076507371, "grad_norm": 2.4052069187164307, "learning_rate": 9.6240610448367e-06, "loss": 0.5735, "step": 12589 }, { "epoch": 0.15052786379559774, "grad_norm": 2.0294554233551025, "learning_rate": 9.623987384781438e-06, "loss": 0.639, "step": 12590 }, { "epoch": 0.1505398199404584, "grad_norm": 3.64516544342041, "learning_rate": 9.623913717792492e-06, "loss": 0.7506, "step": 12591 }, { "epoch": 0.15055177608531906, "grad_norm": 2.830146312713623, "learning_rate": 9.623840043869971e-06, "loss": 0.6299, "step": 12592 }, { "epoch": 0.15056373223017971, "grad_norm": 2.4432618618011475, "learning_rate": 9.623766363013984e-06, "loss": 0.5839, "step": 12593 }, { "epoch": 0.15057568837504035, "grad_norm": 2.3524186611175537, "learning_rate": 9.623692675224645e-06, "loss": 0.6548, "step": 12594 }, { "epoch": 0.150587644519901, "grad_norm": 1.8698844909667969, "learning_rate": 9.623618980502061e-06, "loss": 0.6069, "step": 12595 }, { "epoch": 0.15059960066476166, "grad_norm": 17.31698226928711, "learning_rate": 9.623545278846346e-06, "loss": 0.832, "step": 12596 }, { "epoch": 0.1506115568096223, "grad_norm": 2.1026220321655273, "learning_rate": 9.623471570257606e-06, "loss": 0.5332, "step": 12597 }, { "epoch": 0.15062351295448295, "grad_norm": 5.596242904663086, "learning_rate": 9.623397854735955e-06, "loss": 0.6315, "step": 12598 }, { "epoch": 0.1506354690993436, "grad_norm": 2.781741142272949, "learning_rate": 9.623324132281502e-06, "loss": 0.5718, "step": 12599 }, { "epoch": 0.15064742524420427, "grad_norm": 1.9452122449874878, "learning_rate": 9.62325040289436e-06, "loss": 0.6305, "step": 12600 }, { "epoch": 0.1506593813890649, "grad_norm": 3.486114740371704, "learning_rate": 9.623176666574635e-06, "loss": 0.762, "step": 12601 }, { "epoch": 0.15067133753392556, "grad_norm": 2.66916823387146, "learning_rate": 9.62310292332244e-06, "loss": 0.7233, "step": 12602 }, { "epoch": 0.15068329367878622, "grad_norm": 2.1989269256591797, "learning_rate": 9.623029173137888e-06, "loss": 0.685, "step": 12603 }, { "epoch": 0.15069524982364688, "grad_norm": 3.6451127529144287, "learning_rate": 9.622955416021087e-06, "loss": 0.5962, "step": 12604 }, { "epoch": 0.1507072059685075, "grad_norm": 8.639973640441895, "learning_rate": 9.622881651972148e-06, "loss": 0.6501, "step": 12605 }, { "epoch": 0.15071916211336817, "grad_norm": 3.800149440765381, "learning_rate": 9.622807880991179e-06, "loss": 0.5819, "step": 12606 }, { "epoch": 0.15073111825822882, "grad_norm": 2.9820358753204346, "learning_rate": 9.622734103078295e-06, "loss": 0.613, "step": 12607 }, { "epoch": 0.15074307440308946, "grad_norm": 3.654925584793091, "learning_rate": 9.622660318233605e-06, "loss": 0.5934, "step": 12608 }, { "epoch": 0.15075503054795011, "grad_norm": 7.812982559204102, "learning_rate": 9.622586526457218e-06, "loss": 0.6333, "step": 12609 }, { "epoch": 0.15076698669281077, "grad_norm": 4.937738418579102, "learning_rate": 9.622512727749247e-06, "loss": 0.6364, "step": 12610 }, { "epoch": 0.15077894283767143, "grad_norm": 2.560157537460327, "learning_rate": 9.622438922109802e-06, "loss": 0.6478, "step": 12611 }, { "epoch": 0.15079089898253206, "grad_norm": 2.470961332321167, "learning_rate": 9.622365109538994e-06, "loss": 0.4984, "step": 12612 }, { "epoch": 0.15080285512739272, "grad_norm": 2.001844644546509, "learning_rate": 9.622291290036932e-06, "loss": 0.6017, "step": 12613 }, { "epoch": 0.15081481127225338, "grad_norm": 2.034212589263916, "learning_rate": 9.622217463603726e-06, "loss": 0.6325, "step": 12614 }, { "epoch": 0.15082676741711404, "grad_norm": 4.2667717933654785, "learning_rate": 9.622143630239492e-06, "loss": 0.5574, "step": 12615 }, { "epoch": 0.15083872356197467, "grad_norm": 1.9657093286514282, "learning_rate": 9.622069789944337e-06, "loss": 0.7065, "step": 12616 }, { "epoch": 0.15085067970683533, "grad_norm": 3.0827815532684326, "learning_rate": 9.62199594271837e-06, "loss": 0.5865, "step": 12617 }, { "epoch": 0.150862635851696, "grad_norm": 2.870894193649292, "learning_rate": 9.621922088561703e-06, "loss": 0.5756, "step": 12618 }, { "epoch": 0.15087459199655662, "grad_norm": 5.0910325050354, "learning_rate": 9.62184822747445e-06, "loss": 0.6166, "step": 12619 }, { "epoch": 0.15088654814141728, "grad_norm": 2.9536397457122803, "learning_rate": 9.621774359456717e-06, "loss": 0.5831, "step": 12620 }, { "epoch": 0.15089850428627793, "grad_norm": 1.9817330837249756, "learning_rate": 9.621700484508619e-06, "loss": 0.6848, "step": 12621 }, { "epoch": 0.1509104604311386, "grad_norm": 3.269141435623169, "learning_rate": 9.621626602630263e-06, "loss": 0.6699, "step": 12622 }, { "epoch": 0.15092241657599922, "grad_norm": 3.0060789585113525, "learning_rate": 9.621552713821763e-06, "loss": 0.6725, "step": 12623 }, { "epoch": 0.15093437272085988, "grad_norm": 2.9360287189483643, "learning_rate": 9.621478818083227e-06, "loss": 0.5466, "step": 12624 }, { "epoch": 0.15094632886572054, "grad_norm": 12.117486953735352, "learning_rate": 9.621404915414768e-06, "loss": 0.6767, "step": 12625 }, { "epoch": 0.1509582850105812, "grad_norm": 4.508894920349121, "learning_rate": 9.621331005816494e-06, "loss": 0.5652, "step": 12626 }, { "epoch": 0.15097024115544183, "grad_norm": 3.744295835494995, "learning_rate": 9.621257089288517e-06, "loss": 0.6861, "step": 12627 }, { "epoch": 0.1509821973003025, "grad_norm": 1.8443810939788818, "learning_rate": 9.621183165830952e-06, "loss": 0.6322, "step": 12628 }, { "epoch": 0.15099415344516315, "grad_norm": 6.218006134033203, "learning_rate": 9.621109235443905e-06, "loss": 0.6312, "step": 12629 }, { "epoch": 0.15100610959002378, "grad_norm": 3.3514907360076904, "learning_rate": 9.621035298127488e-06, "loss": 0.6351, "step": 12630 }, { "epoch": 0.15101806573488444, "grad_norm": 2.1345248222351074, "learning_rate": 9.620961353881812e-06, "loss": 0.6041, "step": 12631 }, { "epoch": 0.1510300218797451, "grad_norm": 2.370826482772827, "learning_rate": 9.620887402706989e-06, "loss": 0.5893, "step": 12632 }, { "epoch": 0.15104197802460576, "grad_norm": 5.302665710449219, "learning_rate": 9.620813444603128e-06, "loss": 0.6396, "step": 12633 }, { "epoch": 0.1510539341694664, "grad_norm": 2.2714133262634277, "learning_rate": 9.620739479570342e-06, "loss": 0.5769, "step": 12634 }, { "epoch": 0.15106589031432704, "grad_norm": 3.3809103965759277, "learning_rate": 9.62066550760874e-06, "loss": 0.5911, "step": 12635 }, { "epoch": 0.1510778464591877, "grad_norm": 3.3686599731445312, "learning_rate": 9.620591528718433e-06, "loss": 0.6358, "step": 12636 }, { "epoch": 0.15108980260404836, "grad_norm": 2.256948471069336, "learning_rate": 9.620517542899532e-06, "loss": 0.5579, "step": 12637 }, { "epoch": 0.151101758748909, "grad_norm": 2.7761480808258057, "learning_rate": 9.62044355015215e-06, "loss": 0.6715, "step": 12638 }, { "epoch": 0.15111371489376965, "grad_norm": 3.0548489093780518, "learning_rate": 9.620369550476396e-06, "loss": 0.5524, "step": 12639 }, { "epoch": 0.1511256710386303, "grad_norm": 1.7897379398345947, "learning_rate": 9.620295543872383e-06, "loss": 0.6999, "step": 12640 }, { "epoch": 0.15113762718349097, "grad_norm": 8.898972511291504, "learning_rate": 9.620221530340217e-06, "loss": 0.5548, "step": 12641 }, { "epoch": 0.1511495833283516, "grad_norm": 3.010362148284912, "learning_rate": 9.620147509880015e-06, "loss": 0.5881, "step": 12642 }, { "epoch": 0.15116153947321226, "grad_norm": 3.4900829792022705, "learning_rate": 9.620073482491885e-06, "loss": 0.5803, "step": 12643 }, { "epoch": 0.15117349561807292, "grad_norm": 12.437516212463379, "learning_rate": 9.619999448175939e-06, "loss": 0.594, "step": 12644 }, { "epoch": 0.15118545176293355, "grad_norm": 3.263551712036133, "learning_rate": 9.619925406932286e-06, "loss": 0.5808, "step": 12645 }, { "epoch": 0.1511974079077942, "grad_norm": 2.8746235370635986, "learning_rate": 9.619851358761039e-06, "loss": 0.595, "step": 12646 }, { "epoch": 0.15120936405265487, "grad_norm": 3.828655242919922, "learning_rate": 9.619777303662307e-06, "loss": 0.6431, "step": 12647 }, { "epoch": 0.15122132019751552, "grad_norm": 1.8766512870788574, "learning_rate": 9.619703241636205e-06, "loss": 0.5973, "step": 12648 }, { "epoch": 0.15123327634237616, "grad_norm": 3.5890421867370605, "learning_rate": 9.619629172682841e-06, "loss": 0.6074, "step": 12649 }, { "epoch": 0.1512452324872368, "grad_norm": 3.732734203338623, "learning_rate": 9.619555096802326e-06, "loss": 0.5426, "step": 12650 }, { "epoch": 0.15125718863209747, "grad_norm": 2.9075865745544434, "learning_rate": 9.619481013994771e-06, "loss": 0.5761, "step": 12651 }, { "epoch": 0.15126914477695813, "grad_norm": 2.2976489067077637, "learning_rate": 9.619406924260288e-06, "loss": 0.6431, "step": 12652 }, { "epoch": 0.15128110092181876, "grad_norm": 2.4507737159729004, "learning_rate": 9.61933282759899e-06, "loss": 0.7044, "step": 12653 }, { "epoch": 0.15129305706667942, "grad_norm": 3.096947193145752, "learning_rate": 9.619258724010984e-06, "loss": 0.6322, "step": 12654 }, { "epoch": 0.15130501321154008, "grad_norm": 26.125070571899414, "learning_rate": 9.619184613496384e-06, "loss": 0.6036, "step": 12655 }, { "epoch": 0.1513169693564007, "grad_norm": 9.66930866241455, "learning_rate": 9.6191104960553e-06, "loss": 0.5549, "step": 12656 }, { "epoch": 0.15132892550126137, "grad_norm": 2.0228793621063232, "learning_rate": 9.619036371687843e-06, "loss": 0.6767, "step": 12657 }, { "epoch": 0.15134088164612203, "grad_norm": 3.908414840698242, "learning_rate": 9.618962240394125e-06, "loss": 0.6533, "step": 12658 }, { "epoch": 0.15135283779098269, "grad_norm": 3.700629472732544, "learning_rate": 9.618888102174256e-06, "loss": 0.6312, "step": 12659 }, { "epoch": 0.15136479393584332, "grad_norm": 2.049924373626709, "learning_rate": 9.618813957028348e-06, "loss": 0.6281, "step": 12660 }, { "epoch": 0.15137675008070398, "grad_norm": 8.674800872802734, "learning_rate": 9.618739804956514e-06, "loss": 0.566, "step": 12661 }, { "epoch": 0.15138870622556463, "grad_norm": 2.7558388710021973, "learning_rate": 9.61866564595886e-06, "loss": 0.5777, "step": 12662 }, { "epoch": 0.1514006623704253, "grad_norm": 2.2105953693389893, "learning_rate": 9.618591480035503e-06, "loss": 0.5601, "step": 12663 }, { "epoch": 0.15141261851528592, "grad_norm": 7.728433609008789, "learning_rate": 9.61851730718655e-06, "loss": 0.6076, "step": 12664 }, { "epoch": 0.15142457466014658, "grad_norm": 1.9347987174987793, "learning_rate": 9.618443127412116e-06, "loss": 0.6085, "step": 12665 }, { "epoch": 0.15143653080500724, "grad_norm": 4.0980000495910645, "learning_rate": 9.618368940712308e-06, "loss": 0.5518, "step": 12666 }, { "epoch": 0.15144848694986787, "grad_norm": 2.7257070541381836, "learning_rate": 9.61829474708724e-06, "loss": 0.7504, "step": 12667 }, { "epoch": 0.15146044309472853, "grad_norm": 3.3421192169189453, "learning_rate": 9.618220546537023e-06, "loss": 0.6717, "step": 12668 }, { "epoch": 0.1514723992395892, "grad_norm": 2.233494997024536, "learning_rate": 9.618146339061767e-06, "loss": 0.6886, "step": 12669 }, { "epoch": 0.15148435538444985, "grad_norm": 2.3586597442626953, "learning_rate": 9.618072124661586e-06, "loss": 0.5966, "step": 12670 }, { "epoch": 0.15149631152931048, "grad_norm": 3.3206698894500732, "learning_rate": 9.617997903336588e-06, "loss": 0.6496, "step": 12671 }, { "epoch": 0.15150826767417114, "grad_norm": 4.091821670532227, "learning_rate": 9.617923675086886e-06, "loss": 0.6311, "step": 12672 }, { "epoch": 0.1515202238190318, "grad_norm": 2.0311124324798584, "learning_rate": 9.61784943991259e-06, "loss": 0.6482, "step": 12673 }, { "epoch": 0.15153217996389245, "grad_norm": 3.3893117904663086, "learning_rate": 9.617775197813814e-06, "loss": 0.6883, "step": 12674 }, { "epoch": 0.15154413610875309, "grad_norm": 2.8804290294647217, "learning_rate": 9.617700948790667e-06, "loss": 0.6477, "step": 12675 }, { "epoch": 0.15155609225361374, "grad_norm": 5.9913554191589355, "learning_rate": 9.617626692843261e-06, "loss": 0.5854, "step": 12676 }, { "epoch": 0.1515680483984744, "grad_norm": 6.7781901359558105, "learning_rate": 9.617552429971708e-06, "loss": 0.6496, "step": 12677 }, { "epoch": 0.15158000454333503, "grad_norm": 1.9535307884216309, "learning_rate": 9.617478160176116e-06, "loss": 0.5957, "step": 12678 }, { "epoch": 0.1515919606881957, "grad_norm": 3.757213592529297, "learning_rate": 9.617403883456603e-06, "loss": 0.7125, "step": 12679 }, { "epoch": 0.15160391683305635, "grad_norm": 2.9110231399536133, "learning_rate": 9.617329599813274e-06, "loss": 0.604, "step": 12680 }, { "epoch": 0.151615872977917, "grad_norm": 5.967161178588867, "learning_rate": 9.617255309246244e-06, "loss": 0.6009, "step": 12681 }, { "epoch": 0.15162782912277764, "grad_norm": 3.4029459953308105, "learning_rate": 9.617181011755623e-06, "loss": 0.6407, "step": 12682 }, { "epoch": 0.1516397852676383, "grad_norm": 5.98478364944458, "learning_rate": 9.617106707341523e-06, "loss": 0.5765, "step": 12683 }, { "epoch": 0.15165174141249896, "grad_norm": 4.164590358734131, "learning_rate": 9.617032396004053e-06, "loss": 0.5834, "step": 12684 }, { "epoch": 0.15166369755735962, "grad_norm": 2.6427860260009766, "learning_rate": 9.61695807774333e-06, "loss": 0.5814, "step": 12685 }, { "epoch": 0.15167565370222025, "grad_norm": 4.449002265930176, "learning_rate": 9.616883752559459e-06, "loss": 0.6756, "step": 12686 }, { "epoch": 0.1516876098470809, "grad_norm": 2.2361176013946533, "learning_rate": 9.616809420452556e-06, "loss": 0.6126, "step": 12687 }, { "epoch": 0.15169956599194157, "grad_norm": 2.4123709201812744, "learning_rate": 9.61673508142273e-06, "loss": 0.5889, "step": 12688 }, { "epoch": 0.1517115221368022, "grad_norm": 4.084651470184326, "learning_rate": 9.616660735470093e-06, "loss": 0.5536, "step": 12689 }, { "epoch": 0.15172347828166285, "grad_norm": 1.7718480825424194, "learning_rate": 9.616586382594757e-06, "loss": 0.631, "step": 12690 }, { "epoch": 0.1517354344265235, "grad_norm": 3.1271157264709473, "learning_rate": 9.616512022796833e-06, "loss": 0.5354, "step": 12691 }, { "epoch": 0.15174739057138417, "grad_norm": 3.6593902111053467, "learning_rate": 9.616437656076433e-06, "loss": 0.5939, "step": 12692 }, { "epoch": 0.1517593467162448, "grad_norm": 3.8634605407714844, "learning_rate": 9.61636328243367e-06, "loss": 0.6689, "step": 12693 }, { "epoch": 0.15177130286110546, "grad_norm": 5.121244430541992, "learning_rate": 9.616288901868653e-06, "loss": 0.6927, "step": 12694 }, { "epoch": 0.15178325900596612, "grad_norm": 5.274718761444092, "learning_rate": 9.616214514381494e-06, "loss": 0.6069, "step": 12695 }, { "epoch": 0.15179521515082678, "grad_norm": 7.564780235290527, "learning_rate": 9.616140119972305e-06, "loss": 0.6547, "step": 12696 }, { "epoch": 0.1518071712956874, "grad_norm": 3.2986934185028076, "learning_rate": 9.616065718641198e-06, "loss": 0.5266, "step": 12697 }, { "epoch": 0.15181912744054807, "grad_norm": 2.2283642292022705, "learning_rate": 9.615991310388284e-06, "loss": 0.6405, "step": 12698 }, { "epoch": 0.15183108358540873, "grad_norm": 5.289634704589844, "learning_rate": 9.615916895213673e-06, "loss": 0.6981, "step": 12699 }, { "epoch": 0.15184303973026939, "grad_norm": 3.545175313949585, "learning_rate": 9.61584247311748e-06, "loss": 0.6818, "step": 12700 }, { "epoch": 0.15185499587513002, "grad_norm": 8.53524112701416, "learning_rate": 9.615768044099814e-06, "loss": 0.6134, "step": 12701 }, { "epoch": 0.15186695201999068, "grad_norm": 3.3082001209259033, "learning_rate": 9.615693608160787e-06, "loss": 0.6349, "step": 12702 }, { "epoch": 0.15187890816485133, "grad_norm": 2.0208146572113037, "learning_rate": 9.615619165300511e-06, "loss": 0.5764, "step": 12703 }, { "epoch": 0.15189086430971196, "grad_norm": 2.634232759475708, "learning_rate": 9.6155447155191e-06, "loss": 0.6199, "step": 12704 }, { "epoch": 0.15190282045457262, "grad_norm": 2.491339683532715, "learning_rate": 9.615470258816661e-06, "loss": 0.6195, "step": 12705 }, { "epoch": 0.15191477659943328, "grad_norm": 8.386377334594727, "learning_rate": 9.615395795193309e-06, "loss": 0.6186, "step": 12706 }, { "epoch": 0.15192673274429394, "grad_norm": 2.839181661605835, "learning_rate": 9.615321324649153e-06, "loss": 0.617, "step": 12707 }, { "epoch": 0.15193868888915457, "grad_norm": 3.039066791534424, "learning_rate": 9.615246847184307e-06, "loss": 0.6415, "step": 12708 }, { "epoch": 0.15195064503401523, "grad_norm": 3.3631210327148438, "learning_rate": 9.615172362798884e-06, "loss": 0.5985, "step": 12709 }, { "epoch": 0.1519626011788759, "grad_norm": 1.973253607749939, "learning_rate": 9.615097871492991e-06, "loss": 0.683, "step": 12710 }, { "epoch": 0.15197455732373655, "grad_norm": 4.157123565673828, "learning_rate": 9.615023373266744e-06, "loss": 0.6629, "step": 12711 }, { "epoch": 0.15198651346859718, "grad_norm": 1.6014500856399536, "learning_rate": 9.614948868120252e-06, "loss": 0.6065, "step": 12712 }, { "epoch": 0.15199846961345784, "grad_norm": 6.428302764892578, "learning_rate": 9.614874356053627e-06, "loss": 0.6792, "step": 12713 }, { "epoch": 0.1520104257583185, "grad_norm": 2.1761717796325684, "learning_rate": 9.614799837066983e-06, "loss": 0.6772, "step": 12714 }, { "epoch": 0.15202238190317913, "grad_norm": 5.107579708099365, "learning_rate": 9.61472531116043e-06, "loss": 0.6489, "step": 12715 }, { "epoch": 0.15203433804803979, "grad_norm": 2.48284912109375, "learning_rate": 9.61465077833408e-06, "loss": 0.5334, "step": 12716 }, { "epoch": 0.15204629419290044, "grad_norm": 8.091484069824219, "learning_rate": 9.614576238588045e-06, "loss": 0.7183, "step": 12717 }, { "epoch": 0.1520582503377611, "grad_norm": 3.173248052597046, "learning_rate": 9.614501691922436e-06, "loss": 0.6146, "step": 12718 }, { "epoch": 0.15207020648262173, "grad_norm": 8.37496280670166, "learning_rate": 9.614427138337366e-06, "loss": 0.4939, "step": 12719 }, { "epoch": 0.1520821626274824, "grad_norm": 5.317255973815918, "learning_rate": 9.614352577832945e-06, "loss": 0.6229, "step": 12720 }, { "epoch": 0.15209411877234305, "grad_norm": 6.0301737785339355, "learning_rate": 9.614278010409286e-06, "loss": 0.6007, "step": 12721 }, { "epoch": 0.1521060749172037, "grad_norm": 4.388609886169434, "learning_rate": 9.6142034360665e-06, "loss": 0.6355, "step": 12722 }, { "epoch": 0.15211803106206434, "grad_norm": 4.240759372711182, "learning_rate": 9.614128854804701e-06, "loss": 0.5706, "step": 12723 }, { "epoch": 0.152129987206925, "grad_norm": 3.654784917831421, "learning_rate": 9.614054266624e-06, "loss": 0.6442, "step": 12724 }, { "epoch": 0.15214194335178566, "grad_norm": 3.2789905071258545, "learning_rate": 9.613979671524508e-06, "loss": 0.6011, "step": 12725 }, { "epoch": 0.1521538994966463, "grad_norm": 3.1225671768188477, "learning_rate": 9.613905069506336e-06, "loss": 0.628, "step": 12726 }, { "epoch": 0.15216585564150695, "grad_norm": 2.0944671630859375, "learning_rate": 9.613830460569598e-06, "loss": 0.6088, "step": 12727 }, { "epoch": 0.1521778117863676, "grad_norm": 3.539763927459717, "learning_rate": 9.613755844714403e-06, "loss": 0.6564, "step": 12728 }, { "epoch": 0.15218976793122826, "grad_norm": 3.2152469158172607, "learning_rate": 9.613681221940866e-06, "loss": 0.6716, "step": 12729 }, { "epoch": 0.1522017240760889, "grad_norm": 2.959040403366089, "learning_rate": 9.613606592249098e-06, "loss": 0.574, "step": 12730 }, { "epoch": 0.15221368022094955, "grad_norm": 3.8595657348632812, "learning_rate": 9.613531955639209e-06, "loss": 0.721, "step": 12731 }, { "epoch": 0.1522256363658102, "grad_norm": 2.238938331604004, "learning_rate": 9.613457312111313e-06, "loss": 0.6216, "step": 12732 }, { "epoch": 0.15223759251067087, "grad_norm": 3.450186252593994, "learning_rate": 9.613382661665522e-06, "loss": 0.5299, "step": 12733 }, { "epoch": 0.1522495486555315, "grad_norm": 7.777019023895264, "learning_rate": 9.613308004301948e-06, "loss": 0.5457, "step": 12734 }, { "epoch": 0.15226150480039216, "grad_norm": 6.568169593811035, "learning_rate": 9.6132333400207e-06, "loss": 0.5861, "step": 12735 }, { "epoch": 0.15227346094525282, "grad_norm": 1.9438420534133911, "learning_rate": 9.613158668821894e-06, "loss": 0.5951, "step": 12736 }, { "epoch": 0.15228541709011345, "grad_norm": 1.8369569778442383, "learning_rate": 9.61308399070564e-06, "loss": 0.6531, "step": 12737 }, { "epoch": 0.1522973732349741, "grad_norm": 8.572121620178223, "learning_rate": 9.61300930567205e-06, "loss": 0.6312, "step": 12738 }, { "epoch": 0.15230932937983477, "grad_norm": 2.7413196563720703, "learning_rate": 9.612934613721236e-06, "loss": 0.5998, "step": 12739 }, { "epoch": 0.15232128552469543, "grad_norm": 10.719255447387695, "learning_rate": 9.61285991485331e-06, "loss": 0.6031, "step": 12740 }, { "epoch": 0.15233324166955606, "grad_norm": 5.800154685974121, "learning_rate": 9.612785209068385e-06, "loss": 0.6573, "step": 12741 }, { "epoch": 0.15234519781441672, "grad_norm": 3.0842528343200684, "learning_rate": 9.612710496366572e-06, "loss": 0.5875, "step": 12742 }, { "epoch": 0.15235715395927737, "grad_norm": 3.0501585006713867, "learning_rate": 9.612635776747982e-06, "loss": 0.6025, "step": 12743 }, { "epoch": 0.15236911010413803, "grad_norm": 3.7758350372314453, "learning_rate": 9.61256105021273e-06, "loss": 0.6655, "step": 12744 }, { "epoch": 0.15238106624899866, "grad_norm": 1.6978566646575928, "learning_rate": 9.612486316760925e-06, "loss": 0.6795, "step": 12745 }, { "epoch": 0.15239302239385932, "grad_norm": 4.0811076164245605, "learning_rate": 9.61241157639268e-06, "loss": 0.6088, "step": 12746 }, { "epoch": 0.15240497853871998, "grad_norm": 3.0329248905181885, "learning_rate": 9.612336829108108e-06, "loss": 0.6401, "step": 12747 }, { "epoch": 0.1524169346835806, "grad_norm": 3.6453516483306885, "learning_rate": 9.612262074907322e-06, "loss": 0.6301, "step": 12748 }, { "epoch": 0.15242889082844127, "grad_norm": 2.7193541526794434, "learning_rate": 9.61218731379043e-06, "loss": 0.7559, "step": 12749 }, { "epoch": 0.15244084697330193, "grad_norm": 2.2587406635284424, "learning_rate": 9.612112545757548e-06, "loss": 0.5913, "step": 12750 }, { "epoch": 0.1524528031181626, "grad_norm": 2.8371052742004395, "learning_rate": 9.612037770808786e-06, "loss": 0.6262, "step": 12751 }, { "epoch": 0.15246475926302322, "grad_norm": 6.958185195922852, "learning_rate": 9.61196298894426e-06, "loss": 0.575, "step": 12752 }, { "epoch": 0.15247671540788388, "grad_norm": 1.8391705751419067, "learning_rate": 9.611888200164076e-06, "loss": 0.652, "step": 12753 }, { "epoch": 0.15248867155274454, "grad_norm": 1.6901851892471313, "learning_rate": 9.611813404468352e-06, "loss": 0.5781, "step": 12754 }, { "epoch": 0.1525006276976052, "grad_norm": 3.027540445327759, "learning_rate": 9.611738601857194e-06, "loss": 0.6614, "step": 12755 }, { "epoch": 0.15251258384246583, "grad_norm": 2.8981378078460693, "learning_rate": 9.61166379233072e-06, "loss": 0.6847, "step": 12756 }, { "epoch": 0.15252453998732649, "grad_norm": 4.449352264404297, "learning_rate": 9.611588975889037e-06, "loss": 0.6538, "step": 12757 }, { "epoch": 0.15253649613218714, "grad_norm": 4.639063835144043, "learning_rate": 9.611514152532263e-06, "loss": 0.6206, "step": 12758 }, { "epoch": 0.1525484522770478, "grad_norm": 2.2981536388397217, "learning_rate": 9.611439322260506e-06, "loss": 0.6729, "step": 12759 }, { "epoch": 0.15256040842190843, "grad_norm": 3.048414707183838, "learning_rate": 9.611364485073879e-06, "loss": 0.6295, "step": 12760 }, { "epoch": 0.1525723645667691, "grad_norm": 1.425513744354248, "learning_rate": 9.611289640972495e-06, "loss": 0.6158, "step": 12761 }, { "epoch": 0.15258432071162975, "grad_norm": 4.090348243713379, "learning_rate": 9.611214789956466e-06, "loss": 0.6082, "step": 12762 }, { "epoch": 0.15259627685649038, "grad_norm": 2.5533149242401123, "learning_rate": 9.611139932025904e-06, "loss": 0.5635, "step": 12763 }, { "epoch": 0.15260823300135104, "grad_norm": 3.577808141708374, "learning_rate": 9.61106506718092e-06, "loss": 0.5471, "step": 12764 }, { "epoch": 0.1526201891462117, "grad_norm": 2.8946785926818848, "learning_rate": 9.610990195421629e-06, "loss": 0.6661, "step": 12765 }, { "epoch": 0.15263214529107236, "grad_norm": 2.7039108276367188, "learning_rate": 9.610915316748143e-06, "loss": 0.5925, "step": 12766 }, { "epoch": 0.152644101435933, "grad_norm": 1.832679033279419, "learning_rate": 9.610840431160569e-06, "loss": 0.6329, "step": 12767 }, { "epoch": 0.15265605758079365, "grad_norm": 2.5640339851379395, "learning_rate": 9.610765538659026e-06, "loss": 0.631, "step": 12768 }, { "epoch": 0.1526680137256543, "grad_norm": 8.139036178588867, "learning_rate": 9.610690639243624e-06, "loss": 0.6491, "step": 12769 }, { "epoch": 0.15267996987051496, "grad_norm": 1.8321939706802368, "learning_rate": 9.610615732914475e-06, "loss": 0.6123, "step": 12770 }, { "epoch": 0.1526919260153756, "grad_norm": 2.291067361831665, "learning_rate": 9.61054081967169e-06, "loss": 0.5888, "step": 12771 }, { "epoch": 0.15270388216023625, "grad_norm": 3.8183352947235107, "learning_rate": 9.610465899515385e-06, "loss": 0.5891, "step": 12772 }, { "epoch": 0.1527158383050969, "grad_norm": 2.1571707725524902, "learning_rate": 9.610390972445667e-06, "loss": 0.5899, "step": 12773 }, { "epoch": 0.15272779444995754, "grad_norm": 2.8700180053710938, "learning_rate": 9.610316038462652e-06, "loss": 0.5819, "step": 12774 }, { "epoch": 0.1527397505948182, "grad_norm": 3.955279588699341, "learning_rate": 9.610241097566454e-06, "loss": 0.6071, "step": 12775 }, { "epoch": 0.15275170673967886, "grad_norm": 2.5834403038024902, "learning_rate": 9.61016614975718e-06, "loss": 0.6107, "step": 12776 }, { "epoch": 0.15276366288453952, "grad_norm": 2.5019967555999756, "learning_rate": 9.610091195034949e-06, "loss": 0.6017, "step": 12777 }, { "epoch": 0.15277561902940015, "grad_norm": 2.6102840900421143, "learning_rate": 9.610016233399868e-06, "loss": 0.5905, "step": 12778 }, { "epoch": 0.1527875751742608, "grad_norm": 2.8228070735931396, "learning_rate": 9.60994126485205e-06, "loss": 0.7814, "step": 12779 }, { "epoch": 0.15279953131912147, "grad_norm": 2.4776740074157715, "learning_rate": 9.609866289391611e-06, "loss": 0.6042, "step": 12780 }, { "epoch": 0.15281148746398213, "grad_norm": 2.12326717376709, "learning_rate": 9.609791307018658e-06, "loss": 0.6301, "step": 12781 }, { "epoch": 0.15282344360884276, "grad_norm": 3.739192485809326, "learning_rate": 9.609716317733311e-06, "loss": 0.6961, "step": 12782 }, { "epoch": 0.15283539975370342, "grad_norm": 2.8544299602508545, "learning_rate": 9.609641321535674e-06, "loss": 0.6734, "step": 12783 }, { "epoch": 0.15284735589856407, "grad_norm": 3.521263360977173, "learning_rate": 9.609566318425866e-06, "loss": 0.6493, "step": 12784 }, { "epoch": 0.1528593120434247, "grad_norm": 1.9628586769104004, "learning_rate": 9.609491308403995e-06, "loss": 0.6951, "step": 12785 }, { "epoch": 0.15287126818828536, "grad_norm": 2.3008179664611816, "learning_rate": 9.609416291470177e-06, "loss": 0.6317, "step": 12786 }, { "epoch": 0.15288322433314602, "grad_norm": 17.955162048339844, "learning_rate": 9.609341267624522e-06, "loss": 0.6573, "step": 12787 }, { "epoch": 0.15289518047800668, "grad_norm": 4.151383876800537, "learning_rate": 9.609266236867144e-06, "loss": 0.6218, "step": 12788 }, { "epoch": 0.1529071366228673, "grad_norm": 2.94779372215271, "learning_rate": 9.609191199198154e-06, "loss": 0.6534, "step": 12789 }, { "epoch": 0.15291909276772797, "grad_norm": 10.165889739990234, "learning_rate": 9.609116154617667e-06, "loss": 0.5693, "step": 12790 }, { "epoch": 0.15293104891258863, "grad_norm": 2.70729398727417, "learning_rate": 9.609041103125793e-06, "loss": 0.6701, "step": 12791 }, { "epoch": 0.1529430050574493, "grad_norm": 2.171692371368408, "learning_rate": 9.608966044722644e-06, "loss": 0.6252, "step": 12792 }, { "epoch": 0.15295496120230992, "grad_norm": 1.6718730926513672, "learning_rate": 9.608890979408337e-06, "loss": 0.6534, "step": 12793 }, { "epoch": 0.15296691734717058, "grad_norm": 3.631784677505493, "learning_rate": 9.608815907182979e-06, "loss": 0.5447, "step": 12794 }, { "epoch": 0.15297887349203124, "grad_norm": 3.0886738300323486, "learning_rate": 9.608740828046686e-06, "loss": 0.5825, "step": 12795 }, { "epoch": 0.15299082963689187, "grad_norm": 3.4344358444213867, "learning_rate": 9.60866574199957e-06, "loss": 0.7199, "step": 12796 }, { "epoch": 0.15300278578175253, "grad_norm": 3.07507586479187, "learning_rate": 9.608590649041744e-06, "loss": 0.7099, "step": 12797 }, { "epoch": 0.15301474192661318, "grad_norm": 5.129120826721191, "learning_rate": 9.608515549173319e-06, "loss": 0.6125, "step": 12798 }, { "epoch": 0.15302669807147384, "grad_norm": 2.298915386199951, "learning_rate": 9.60844044239441e-06, "loss": 0.6787, "step": 12799 }, { "epoch": 0.15303865421633447, "grad_norm": 3.259016275405884, "learning_rate": 9.608365328705125e-06, "loss": 0.6799, "step": 12800 }, { "epoch": 0.15305061036119513, "grad_norm": 2.8379106521606445, "learning_rate": 9.608290208105582e-06, "loss": 0.6127, "step": 12801 }, { "epoch": 0.1530625665060558, "grad_norm": 2.232727289199829, "learning_rate": 9.608215080595891e-06, "loss": 0.6238, "step": 12802 }, { "epoch": 0.15307452265091645, "grad_norm": 3.3237245082855225, "learning_rate": 9.608139946176165e-06, "loss": 0.6386, "step": 12803 }, { "epoch": 0.15308647879577708, "grad_norm": 2.705540657043457, "learning_rate": 9.608064804846517e-06, "loss": 0.558, "step": 12804 }, { "epoch": 0.15309843494063774, "grad_norm": 2.345139503479004, "learning_rate": 9.607989656607059e-06, "loss": 0.6812, "step": 12805 }, { "epoch": 0.1531103910854984, "grad_norm": 3.002840518951416, "learning_rate": 9.607914501457906e-06, "loss": 0.6409, "step": 12806 }, { "epoch": 0.15312234723035903, "grad_norm": 10.618795394897461, "learning_rate": 9.607839339399168e-06, "loss": 0.5901, "step": 12807 }, { "epoch": 0.1531343033752197, "grad_norm": 17.625730514526367, "learning_rate": 9.607764170430958e-06, "loss": 0.6679, "step": 12808 }, { "epoch": 0.15314625952008035, "grad_norm": 2.369051694869995, "learning_rate": 9.607688994553388e-06, "loss": 0.6819, "step": 12809 }, { "epoch": 0.153158215664941, "grad_norm": 2.2503201961517334, "learning_rate": 9.607613811766575e-06, "loss": 0.6469, "step": 12810 }, { "epoch": 0.15317017180980164, "grad_norm": 8.66479206085205, "learning_rate": 9.607538622070625e-06, "loss": 0.6018, "step": 12811 }, { "epoch": 0.1531821279546623, "grad_norm": 1.7148051261901855, "learning_rate": 9.607463425465657e-06, "loss": 0.5908, "step": 12812 }, { "epoch": 0.15319408409952295, "grad_norm": 1.7043397426605225, "learning_rate": 9.607388221951781e-06, "loss": 0.6154, "step": 12813 }, { "epoch": 0.1532060402443836, "grad_norm": 14.970388412475586, "learning_rate": 9.607313011529109e-06, "loss": 0.5711, "step": 12814 }, { "epoch": 0.15321799638924424, "grad_norm": 3.958622932434082, "learning_rate": 9.607237794197754e-06, "loss": 0.6417, "step": 12815 }, { "epoch": 0.1532299525341049, "grad_norm": 6.718023777008057, "learning_rate": 9.607162569957833e-06, "loss": 0.6816, "step": 12816 }, { "epoch": 0.15324190867896556, "grad_norm": 5.8764472007751465, "learning_rate": 9.607087338809451e-06, "loss": 0.5634, "step": 12817 }, { "epoch": 0.15325386482382622, "grad_norm": 2.2181854248046875, "learning_rate": 9.607012100752728e-06, "loss": 0.5905, "step": 12818 }, { "epoch": 0.15326582096868685, "grad_norm": 3.262458086013794, "learning_rate": 9.606936855787774e-06, "loss": 0.6419, "step": 12819 }, { "epoch": 0.1532777771135475, "grad_norm": 6.9582085609436035, "learning_rate": 9.6068616039147e-06, "loss": 0.6222, "step": 12820 }, { "epoch": 0.15328973325840817, "grad_norm": 2.2962417602539062, "learning_rate": 9.606786345133623e-06, "loss": 0.6227, "step": 12821 }, { "epoch": 0.1533016894032688, "grad_norm": 2.385570526123047, "learning_rate": 9.606711079444652e-06, "loss": 0.6354, "step": 12822 }, { "epoch": 0.15331364554812946, "grad_norm": 8.197830200195312, "learning_rate": 9.606635806847901e-06, "loss": 0.5959, "step": 12823 }, { "epoch": 0.15332560169299012, "grad_norm": 2.3402655124664307, "learning_rate": 9.606560527343485e-06, "loss": 0.6657, "step": 12824 }, { "epoch": 0.15333755783785077, "grad_norm": 5.346144199371338, "learning_rate": 9.606485240931513e-06, "loss": 0.5132, "step": 12825 }, { "epoch": 0.1533495139827114, "grad_norm": 2.6593692302703857, "learning_rate": 9.606409947612102e-06, "loss": 0.655, "step": 12826 }, { "epoch": 0.15336147012757206, "grad_norm": 2.9160404205322266, "learning_rate": 9.606334647385362e-06, "loss": 0.6483, "step": 12827 }, { "epoch": 0.15337342627243272, "grad_norm": 2.5343210697174072, "learning_rate": 9.606259340251406e-06, "loss": 0.6935, "step": 12828 }, { "epoch": 0.15338538241729338, "grad_norm": 1.6787288188934326, "learning_rate": 9.60618402621035e-06, "loss": 0.5479, "step": 12829 }, { "epoch": 0.153397338562154, "grad_norm": 3.5197689533233643, "learning_rate": 9.606108705262303e-06, "loss": 0.613, "step": 12830 }, { "epoch": 0.15340929470701467, "grad_norm": 1.9213240146636963, "learning_rate": 9.60603337740738e-06, "loss": 0.5723, "step": 12831 }, { "epoch": 0.15342125085187533, "grad_norm": 3.932978868484497, "learning_rate": 9.605958042645694e-06, "loss": 0.6087, "step": 12832 }, { "epoch": 0.15343320699673596, "grad_norm": 2.4272851943969727, "learning_rate": 9.605882700977355e-06, "loss": 0.7338, "step": 12833 }, { "epoch": 0.15344516314159662, "grad_norm": 2.0697689056396484, "learning_rate": 9.60580735240248e-06, "loss": 0.6118, "step": 12834 }, { "epoch": 0.15345711928645728, "grad_norm": 6.168189525604248, "learning_rate": 9.605731996921182e-06, "loss": 0.716, "step": 12835 }, { "epoch": 0.15346907543131794, "grad_norm": 1.975456953048706, "learning_rate": 9.605656634533572e-06, "loss": 0.5563, "step": 12836 }, { "epoch": 0.15348103157617857, "grad_norm": 3.2209503650665283, "learning_rate": 9.605581265239763e-06, "loss": 0.6965, "step": 12837 }, { "epoch": 0.15349298772103923, "grad_norm": 1.7406892776489258, "learning_rate": 9.605505889039868e-06, "loss": 0.652, "step": 12838 }, { "epoch": 0.15350494386589988, "grad_norm": 2.2732324600219727, "learning_rate": 9.605430505934e-06, "loss": 0.5771, "step": 12839 }, { "epoch": 0.15351690001076054, "grad_norm": 2.4898927211761475, "learning_rate": 9.605355115922274e-06, "loss": 0.6408, "step": 12840 }, { "epoch": 0.15352885615562117, "grad_norm": 2.8870956897735596, "learning_rate": 9.605279719004804e-06, "loss": 0.6717, "step": 12841 }, { "epoch": 0.15354081230048183, "grad_norm": 2.979609727859497, "learning_rate": 9.605204315181697e-06, "loss": 0.5938, "step": 12842 }, { "epoch": 0.1535527684453425, "grad_norm": 5.3039960861206055, "learning_rate": 9.605128904453069e-06, "loss": 0.6118, "step": 12843 }, { "epoch": 0.15356472459020312, "grad_norm": 11.535902976989746, "learning_rate": 9.605053486819036e-06, "loss": 0.6496, "step": 12844 }, { "epoch": 0.15357668073506378, "grad_norm": 2.713283061981201, "learning_rate": 9.604978062279709e-06, "loss": 0.5381, "step": 12845 }, { "epoch": 0.15358863687992444, "grad_norm": 4.084646701812744, "learning_rate": 9.6049026308352e-06, "loss": 0.6217, "step": 12846 }, { "epoch": 0.1536005930247851, "grad_norm": 2.1570091247558594, "learning_rate": 9.604827192485624e-06, "loss": 0.6696, "step": 12847 }, { "epoch": 0.15361254916964573, "grad_norm": 3.6680305004119873, "learning_rate": 9.604751747231093e-06, "loss": 0.5873, "step": 12848 }, { "epoch": 0.1536245053145064, "grad_norm": 2.6194212436676025, "learning_rate": 9.60467629507172e-06, "loss": 0.6466, "step": 12849 }, { "epoch": 0.15363646145936705, "grad_norm": 1.739309310913086, "learning_rate": 9.604600836007619e-06, "loss": 0.5768, "step": 12850 }, { "epoch": 0.1536484176042277, "grad_norm": 2.2609152793884277, "learning_rate": 9.604525370038902e-06, "loss": 0.5834, "step": 12851 }, { "epoch": 0.15366037374908834, "grad_norm": 3.3347549438476562, "learning_rate": 9.604449897165685e-06, "loss": 0.6368, "step": 12852 }, { "epoch": 0.153672329893949, "grad_norm": 2.2931461334228516, "learning_rate": 9.604374417388076e-06, "loss": 0.6202, "step": 12853 }, { "epoch": 0.15368428603880965, "grad_norm": 2.2615675926208496, "learning_rate": 9.604298930706192e-06, "loss": 0.5651, "step": 12854 }, { "epoch": 0.15369624218367028, "grad_norm": 17.946624755859375, "learning_rate": 9.604223437120146e-06, "loss": 0.5495, "step": 12855 }, { "epoch": 0.15370819832853094, "grad_norm": 2.5360403060913086, "learning_rate": 9.604147936630051e-06, "loss": 0.6963, "step": 12856 }, { "epoch": 0.1537201544733916, "grad_norm": 1.855007290840149, "learning_rate": 9.60407242923602e-06, "loss": 0.5764, "step": 12857 }, { "epoch": 0.15373211061825226, "grad_norm": 91.07976531982422, "learning_rate": 9.603996914938165e-06, "loss": 0.5466, "step": 12858 }, { "epoch": 0.1537440667631129, "grad_norm": 2.6576032638549805, "learning_rate": 9.6039213937366e-06, "loss": 0.6439, "step": 12859 }, { "epoch": 0.15375602290797355, "grad_norm": 32.117713928222656, "learning_rate": 9.60384586563144e-06, "loss": 0.6554, "step": 12860 }, { "epoch": 0.1537679790528342, "grad_norm": 17.693344116210938, "learning_rate": 9.603770330622796e-06, "loss": 0.5476, "step": 12861 }, { "epoch": 0.15377993519769487, "grad_norm": 2.0432560443878174, "learning_rate": 9.603694788710783e-06, "loss": 0.6468, "step": 12862 }, { "epoch": 0.1537918913425555, "grad_norm": 2.199083089828491, "learning_rate": 9.60361923989551e-06, "loss": 0.5236, "step": 12863 }, { "epoch": 0.15380384748741616, "grad_norm": 2.828017473220825, "learning_rate": 9.603543684177097e-06, "loss": 0.5744, "step": 12864 }, { "epoch": 0.15381580363227682, "grad_norm": 2.301083564758301, "learning_rate": 9.603468121555653e-06, "loss": 0.6756, "step": 12865 }, { "epoch": 0.15382775977713745, "grad_norm": 3.647468328475952, "learning_rate": 9.603392552031292e-06, "loss": 0.573, "step": 12866 }, { "epoch": 0.1538397159219981, "grad_norm": 2.881019353866577, "learning_rate": 9.603316975604126e-06, "loss": 0.573, "step": 12867 }, { "epoch": 0.15385167206685876, "grad_norm": 2.5910732746124268, "learning_rate": 9.603241392274272e-06, "loss": 0.572, "step": 12868 }, { "epoch": 0.15386362821171942, "grad_norm": 1.845504879951477, "learning_rate": 9.603165802041841e-06, "loss": 0.6014, "step": 12869 }, { "epoch": 0.15387558435658005, "grad_norm": 4.536710262298584, "learning_rate": 9.603090204906945e-06, "loss": 0.6612, "step": 12870 }, { "epoch": 0.1538875405014407, "grad_norm": 4.563742637634277, "learning_rate": 9.6030146008697e-06, "loss": 0.7553, "step": 12871 }, { "epoch": 0.15389949664630137, "grad_norm": 16.502666473388672, "learning_rate": 9.602938989930217e-06, "loss": 0.6432, "step": 12872 }, { "epoch": 0.15391145279116203, "grad_norm": 2.3775699138641357, "learning_rate": 9.602863372088614e-06, "loss": 0.5636, "step": 12873 }, { "epoch": 0.15392340893602266, "grad_norm": 4.654168605804443, "learning_rate": 9.602787747344998e-06, "loss": 0.7423, "step": 12874 }, { "epoch": 0.15393536508088332, "grad_norm": 1.9037727117538452, "learning_rate": 9.602712115699484e-06, "loss": 0.571, "step": 12875 }, { "epoch": 0.15394732122574398, "grad_norm": 2.4267168045043945, "learning_rate": 9.60263647715219e-06, "loss": 0.5649, "step": 12876 }, { "epoch": 0.15395927737060464, "grad_norm": 5.535449028015137, "learning_rate": 9.602560831703225e-06, "loss": 0.5202, "step": 12877 }, { "epoch": 0.15397123351546527, "grad_norm": 8.97606086730957, "learning_rate": 9.602485179352703e-06, "loss": 0.5893, "step": 12878 }, { "epoch": 0.15398318966032593, "grad_norm": 1.911427617073059, "learning_rate": 9.60240952010074e-06, "loss": 0.5695, "step": 12879 }, { "epoch": 0.15399514580518658, "grad_norm": 2.1272778511047363, "learning_rate": 9.602333853947446e-06, "loss": 0.6943, "step": 12880 }, { "epoch": 0.15400710195004721, "grad_norm": 3.063690185546875, "learning_rate": 9.602258180892937e-06, "loss": 0.5896, "step": 12881 }, { "epoch": 0.15401905809490787, "grad_norm": 2.663756847381592, "learning_rate": 9.602182500937325e-06, "loss": 0.6493, "step": 12882 }, { "epoch": 0.15403101423976853, "grad_norm": 1.9315874576568604, "learning_rate": 9.602106814080723e-06, "loss": 0.6034, "step": 12883 }, { "epoch": 0.1540429703846292, "grad_norm": 4.513734340667725, "learning_rate": 9.602031120323248e-06, "loss": 0.6901, "step": 12884 }, { "epoch": 0.15405492652948982, "grad_norm": 4.1772942543029785, "learning_rate": 9.601955419665009e-06, "loss": 0.6511, "step": 12885 }, { "epoch": 0.15406688267435048, "grad_norm": 4.281466960906982, "learning_rate": 9.601879712106121e-06, "loss": 0.6546, "step": 12886 }, { "epoch": 0.15407883881921114, "grad_norm": 2.1985907554626465, "learning_rate": 9.6018039976467e-06, "loss": 0.5927, "step": 12887 }, { "epoch": 0.1540907949640718, "grad_norm": 2.1843864917755127, "learning_rate": 9.601728276286857e-06, "loss": 0.629, "step": 12888 }, { "epoch": 0.15410275110893243, "grad_norm": 3.4495961666107178, "learning_rate": 9.601652548026705e-06, "loss": 0.6556, "step": 12889 }, { "epoch": 0.1541147072537931, "grad_norm": 10.840367317199707, "learning_rate": 9.60157681286636e-06, "loss": 0.5812, "step": 12890 }, { "epoch": 0.15412666339865375, "grad_norm": 5.875367641448975, "learning_rate": 9.601501070805933e-06, "loss": 0.6716, "step": 12891 }, { "epoch": 0.15413861954351438, "grad_norm": 2.945389986038208, "learning_rate": 9.60142532184554e-06, "loss": 0.648, "step": 12892 }, { "epoch": 0.15415057568837504, "grad_norm": 2.476172924041748, "learning_rate": 9.601349565985292e-06, "loss": 0.5476, "step": 12893 }, { "epoch": 0.1541625318332357, "grad_norm": 3.6261916160583496, "learning_rate": 9.601273803225305e-06, "loss": 0.6487, "step": 12894 }, { "epoch": 0.15417448797809635, "grad_norm": 3.0824999809265137, "learning_rate": 9.601198033565691e-06, "loss": 0.6048, "step": 12895 }, { "epoch": 0.15418644412295698, "grad_norm": 2.2766976356506348, "learning_rate": 9.601122257006563e-06, "loss": 0.5842, "step": 12896 }, { "epoch": 0.15419840026781764, "grad_norm": 2.7955150604248047, "learning_rate": 9.601046473548037e-06, "loss": 0.6059, "step": 12897 }, { "epoch": 0.1542103564126783, "grad_norm": 3.2627570629119873, "learning_rate": 9.600970683190227e-06, "loss": 0.6089, "step": 12898 }, { "epoch": 0.15422231255753896, "grad_norm": 2.9862401485443115, "learning_rate": 9.600894885933242e-06, "loss": 0.7218, "step": 12899 }, { "epoch": 0.1542342687023996, "grad_norm": 16.02581787109375, "learning_rate": 9.600819081777202e-06, "loss": 0.683, "step": 12900 }, { "epoch": 0.15424622484726025, "grad_norm": 2.199409246444702, "learning_rate": 9.600743270722215e-06, "loss": 0.6476, "step": 12901 }, { "epoch": 0.1542581809921209, "grad_norm": 2.7691683769226074, "learning_rate": 9.600667452768398e-06, "loss": 0.5742, "step": 12902 }, { "epoch": 0.15427013713698154, "grad_norm": 2.7507009506225586, "learning_rate": 9.600591627915866e-06, "loss": 0.687, "step": 12903 }, { "epoch": 0.1542820932818422, "grad_norm": 11.586731910705566, "learning_rate": 9.600515796164726e-06, "loss": 0.6072, "step": 12904 }, { "epoch": 0.15429404942670286, "grad_norm": 1.7212949991226196, "learning_rate": 9.600439957515099e-06, "loss": 0.5802, "step": 12905 }, { "epoch": 0.15430600557156351, "grad_norm": 4.342515468597412, "learning_rate": 9.600364111967095e-06, "loss": 0.5284, "step": 12906 }, { "epoch": 0.15431796171642415, "grad_norm": 2.0182747840881348, "learning_rate": 9.60028825952083e-06, "loss": 0.6959, "step": 12907 }, { "epoch": 0.1543299178612848, "grad_norm": 4.362251281738281, "learning_rate": 9.600212400176416e-06, "loss": 0.6361, "step": 12908 }, { "epoch": 0.15434187400614546, "grad_norm": 2.108035087585449, "learning_rate": 9.600136533933966e-06, "loss": 0.5674, "step": 12909 }, { "epoch": 0.15435383015100612, "grad_norm": 2.6893274784088135, "learning_rate": 9.600060660793596e-06, "loss": 0.6489, "step": 12910 }, { "epoch": 0.15436578629586675, "grad_norm": 3.388500928878784, "learning_rate": 9.599984780755419e-06, "loss": 0.5982, "step": 12911 }, { "epoch": 0.1543777424407274, "grad_norm": 2.6259422302246094, "learning_rate": 9.599908893819547e-06, "loss": 0.7165, "step": 12912 }, { "epoch": 0.15438969858558807, "grad_norm": 1.8276607990264893, "learning_rate": 9.599832999986096e-06, "loss": 0.5479, "step": 12913 }, { "epoch": 0.1544016547304487, "grad_norm": 2.6322481632232666, "learning_rate": 9.59975709925518e-06, "loss": 0.5329, "step": 12914 }, { "epoch": 0.15441361087530936, "grad_norm": 2.554126739501953, "learning_rate": 9.59968119162691e-06, "loss": 0.6679, "step": 12915 }, { "epoch": 0.15442556702017002, "grad_norm": 2.9221553802490234, "learning_rate": 9.599605277101403e-06, "loss": 0.564, "step": 12916 }, { "epoch": 0.15443752316503068, "grad_norm": 1.8601620197296143, "learning_rate": 9.599529355678772e-06, "loss": 0.5763, "step": 12917 }, { "epoch": 0.1544494793098913, "grad_norm": 3.2398929595947266, "learning_rate": 9.599453427359128e-06, "loss": 0.6354, "step": 12918 }, { "epoch": 0.15446143545475197, "grad_norm": 4.1724467277526855, "learning_rate": 9.59937749214259e-06, "loss": 0.5178, "step": 12919 }, { "epoch": 0.15447339159961263, "grad_norm": 8.922088623046875, "learning_rate": 9.599301550029268e-06, "loss": 0.6152, "step": 12920 }, { "epoch": 0.15448534774447328, "grad_norm": 2.0165457725524902, "learning_rate": 9.599225601019276e-06, "loss": 0.5544, "step": 12921 }, { "epoch": 0.15449730388933391, "grad_norm": 3.703427791595459, "learning_rate": 9.59914964511273e-06, "loss": 0.5867, "step": 12922 }, { "epoch": 0.15450926003419457, "grad_norm": 3.4619505405426025, "learning_rate": 9.599073682309742e-06, "loss": 0.6665, "step": 12923 }, { "epoch": 0.15452121617905523, "grad_norm": 2.5564358234405518, "learning_rate": 9.598997712610427e-06, "loss": 0.5988, "step": 12924 }, { "epoch": 0.15453317232391586, "grad_norm": 2.697648048400879, "learning_rate": 9.598921736014898e-06, "loss": 0.5934, "step": 12925 }, { "epoch": 0.15454512846877652, "grad_norm": 3.2414538860321045, "learning_rate": 9.598845752523271e-06, "loss": 0.5325, "step": 12926 }, { "epoch": 0.15455708461363718, "grad_norm": 4.404446601867676, "learning_rate": 9.598769762135657e-06, "loss": 0.6459, "step": 12927 }, { "epoch": 0.15456904075849784, "grad_norm": 2.4129936695098877, "learning_rate": 9.598693764852174e-06, "loss": 0.6851, "step": 12928 }, { "epoch": 0.15458099690335847, "grad_norm": 3.353853464126587, "learning_rate": 9.598617760672932e-06, "loss": 0.7018, "step": 12929 }, { "epoch": 0.15459295304821913, "grad_norm": 4.888699531555176, "learning_rate": 9.598541749598044e-06, "loss": 0.6175, "step": 12930 }, { "epoch": 0.1546049091930798, "grad_norm": 6.746058464050293, "learning_rate": 9.59846573162763e-06, "loss": 0.5723, "step": 12931 }, { "epoch": 0.15461686533794045, "grad_norm": 3.6530025005340576, "learning_rate": 9.598389706761797e-06, "loss": 0.6392, "step": 12932 }, { "epoch": 0.15462882148280108, "grad_norm": 3.0545566082000732, "learning_rate": 9.598313675000664e-06, "loss": 0.6152, "step": 12933 }, { "epoch": 0.15464077762766174, "grad_norm": 5.847268581390381, "learning_rate": 9.598237636344342e-06, "loss": 0.6394, "step": 12934 }, { "epoch": 0.1546527337725224, "grad_norm": 4.99573278427124, "learning_rate": 9.598161590792949e-06, "loss": 0.6161, "step": 12935 }, { "epoch": 0.15466468991738305, "grad_norm": 2.83152174949646, "learning_rate": 9.598085538346594e-06, "loss": 0.6496, "step": 12936 }, { "epoch": 0.15467664606224368, "grad_norm": 2.874114751815796, "learning_rate": 9.598009479005395e-06, "loss": 0.6912, "step": 12937 }, { "epoch": 0.15468860220710434, "grad_norm": 41.84185028076172, "learning_rate": 9.597933412769462e-06, "loss": 0.6204, "step": 12938 }, { "epoch": 0.154700558351965, "grad_norm": 2.649833917617798, "learning_rate": 9.597857339638915e-06, "loss": 0.6211, "step": 12939 }, { "epoch": 0.15471251449682563, "grad_norm": 3.3797433376312256, "learning_rate": 9.597781259613861e-06, "loss": 0.6241, "step": 12940 }, { "epoch": 0.1547244706416863, "grad_norm": 36.026432037353516, "learning_rate": 9.59770517269442e-06, "loss": 0.6282, "step": 12941 }, { "epoch": 0.15473642678654695, "grad_norm": 3.691622257232666, "learning_rate": 9.597629078880704e-06, "loss": 0.6313, "step": 12942 }, { "epoch": 0.1547483829314076, "grad_norm": 2.4372901916503906, "learning_rate": 9.597552978172826e-06, "loss": 0.5433, "step": 12943 }, { "epoch": 0.15476033907626824, "grad_norm": 3.7362589836120605, "learning_rate": 9.5974768705709e-06, "loss": 0.6982, "step": 12944 }, { "epoch": 0.1547722952211289, "grad_norm": 3.992798089981079, "learning_rate": 9.597400756075042e-06, "loss": 0.5563, "step": 12945 }, { "epoch": 0.15478425136598956, "grad_norm": 2.9870495796203613, "learning_rate": 9.597324634685365e-06, "loss": 0.6209, "step": 12946 }, { "epoch": 0.15479620751085021, "grad_norm": 1.8541111946105957, "learning_rate": 9.597248506401983e-06, "loss": 0.5551, "step": 12947 }, { "epoch": 0.15480816365571085, "grad_norm": 2.9325037002563477, "learning_rate": 9.59717237122501e-06, "loss": 0.6294, "step": 12948 }, { "epoch": 0.1548201198005715, "grad_norm": 4.3916239738464355, "learning_rate": 9.597096229154563e-06, "loss": 0.607, "step": 12949 }, { "epoch": 0.15483207594543216, "grad_norm": 2.0571908950805664, "learning_rate": 9.597020080190753e-06, "loss": 0.5338, "step": 12950 }, { "epoch": 0.1548440320902928, "grad_norm": 2.3231022357940674, "learning_rate": 9.596943924333695e-06, "loss": 0.6258, "step": 12951 }, { "epoch": 0.15485598823515345, "grad_norm": 2.3222992420196533, "learning_rate": 9.596867761583502e-06, "loss": 0.6226, "step": 12952 }, { "epoch": 0.1548679443800141, "grad_norm": 2.145765542984009, "learning_rate": 9.596791591940291e-06, "loss": 0.6775, "step": 12953 }, { "epoch": 0.15487990052487477, "grad_norm": 3.3045036792755127, "learning_rate": 9.596715415404173e-06, "loss": 0.5272, "step": 12954 }, { "epoch": 0.1548918566697354, "grad_norm": 2.5289690494537354, "learning_rate": 9.596639231975264e-06, "loss": 0.6937, "step": 12955 }, { "epoch": 0.15490381281459606, "grad_norm": 2.553962230682373, "learning_rate": 9.59656304165368e-06, "loss": 0.7389, "step": 12956 }, { "epoch": 0.15491576895945672, "grad_norm": 1.8347325325012207, "learning_rate": 9.596486844439533e-06, "loss": 0.5755, "step": 12957 }, { "epoch": 0.15492772510431738, "grad_norm": 3.6196842193603516, "learning_rate": 9.596410640332936e-06, "loss": 0.605, "step": 12958 }, { "epoch": 0.154939681249178, "grad_norm": 5.339559078216553, "learning_rate": 9.596334429334007e-06, "loss": 0.7208, "step": 12959 }, { "epoch": 0.15495163739403867, "grad_norm": 1.6631766557693481, "learning_rate": 9.596258211442858e-06, "loss": 0.6052, "step": 12960 }, { "epoch": 0.15496359353889932, "grad_norm": 3.168459177017212, "learning_rate": 9.596181986659601e-06, "loss": 0.6455, "step": 12961 }, { "epoch": 0.15497554968375996, "grad_norm": 2.013488292694092, "learning_rate": 9.596105754984355e-06, "loss": 0.5778, "step": 12962 }, { "epoch": 0.15498750582862061, "grad_norm": 5.552803993225098, "learning_rate": 9.596029516417232e-06, "loss": 0.6184, "step": 12963 }, { "epoch": 0.15499946197348127, "grad_norm": 3.950570821762085, "learning_rate": 9.595953270958344e-06, "loss": 0.6196, "step": 12964 }, { "epoch": 0.15501141811834193, "grad_norm": 14.127669334411621, "learning_rate": 9.59587701860781e-06, "loss": 0.6131, "step": 12965 }, { "epoch": 0.15502337426320256, "grad_norm": 3.557096481323242, "learning_rate": 9.595800759365742e-06, "loss": 0.6361, "step": 12966 }, { "epoch": 0.15503533040806322, "grad_norm": 2.171112537384033, "learning_rate": 9.595724493232255e-06, "loss": 0.5447, "step": 12967 }, { "epoch": 0.15504728655292388, "grad_norm": 1.9849716424942017, "learning_rate": 9.595648220207461e-06, "loss": 0.6475, "step": 12968 }, { "epoch": 0.15505924269778454, "grad_norm": 2.658005714416504, "learning_rate": 9.595571940291477e-06, "loss": 0.6938, "step": 12969 }, { "epoch": 0.15507119884264517, "grad_norm": 15.492386817932129, "learning_rate": 9.595495653484417e-06, "loss": 0.6296, "step": 12970 }, { "epoch": 0.15508315498750583, "grad_norm": 6.017584323883057, "learning_rate": 9.595419359786394e-06, "loss": 0.652, "step": 12971 }, { "epoch": 0.1550951111323665, "grad_norm": 3.619206428527832, "learning_rate": 9.595343059197524e-06, "loss": 0.6156, "step": 12972 }, { "epoch": 0.15510706727722712, "grad_norm": 2.727410078048706, "learning_rate": 9.59526675171792e-06, "loss": 0.6877, "step": 12973 }, { "epoch": 0.15511902342208778, "grad_norm": 2.433333396911621, "learning_rate": 9.595190437347699e-06, "loss": 0.6873, "step": 12974 }, { "epoch": 0.15513097956694843, "grad_norm": 7.175053596496582, "learning_rate": 9.595114116086972e-06, "loss": 0.6792, "step": 12975 }, { "epoch": 0.1551429357118091, "grad_norm": 3.4409141540527344, "learning_rate": 9.595037787935855e-06, "loss": 0.6335, "step": 12976 }, { "epoch": 0.15515489185666972, "grad_norm": 2.9392526149749756, "learning_rate": 9.594961452894463e-06, "loss": 0.6187, "step": 12977 }, { "epoch": 0.15516684800153038, "grad_norm": 3.471494436264038, "learning_rate": 9.594885110962911e-06, "loss": 0.62, "step": 12978 }, { "epoch": 0.15517880414639104, "grad_norm": 5.302237510681152, "learning_rate": 9.59480876214131e-06, "loss": 0.701, "step": 12979 }, { "epoch": 0.1551907602912517, "grad_norm": 2.257018804550171, "learning_rate": 9.594732406429778e-06, "loss": 0.6017, "step": 12980 }, { "epoch": 0.15520271643611233, "grad_norm": 1.6095489263534546, "learning_rate": 9.59465604382843e-06, "loss": 0.5559, "step": 12981 }, { "epoch": 0.155214672580973, "grad_norm": 4.189055442810059, "learning_rate": 9.594579674337377e-06, "loss": 0.6782, "step": 12982 }, { "epoch": 0.15522662872583365, "grad_norm": 2.3091487884521484, "learning_rate": 9.594503297956737e-06, "loss": 0.6054, "step": 12983 }, { "epoch": 0.15523858487069428, "grad_norm": 4.376380443572998, "learning_rate": 9.594426914686621e-06, "loss": 0.6596, "step": 12984 }, { "epoch": 0.15525054101555494, "grad_norm": 2.563992500305176, "learning_rate": 9.594350524527147e-06, "loss": 0.6239, "step": 12985 }, { "epoch": 0.1552624971604156, "grad_norm": 3.1928114891052246, "learning_rate": 9.594274127478428e-06, "loss": 0.6145, "step": 12986 }, { "epoch": 0.15527445330527626, "grad_norm": 1.7116587162017822, "learning_rate": 9.594197723540579e-06, "loss": 0.6518, "step": 12987 }, { "epoch": 0.1552864094501369, "grad_norm": 1.9031516313552856, "learning_rate": 9.594121312713713e-06, "loss": 0.6249, "step": 12988 }, { "epoch": 0.15529836559499755, "grad_norm": 7.252367973327637, "learning_rate": 9.594044894997945e-06, "loss": 0.6012, "step": 12989 }, { "epoch": 0.1553103217398582, "grad_norm": 1.5331484079360962, "learning_rate": 9.593968470393392e-06, "loss": 0.5253, "step": 12990 }, { "epoch": 0.15532227788471886, "grad_norm": 4.103149890899658, "learning_rate": 9.593892038900166e-06, "loss": 0.5447, "step": 12991 }, { "epoch": 0.1553342340295795, "grad_norm": 3.522629737854004, "learning_rate": 9.593815600518384e-06, "loss": 0.6159, "step": 12992 }, { "epoch": 0.15534619017444015, "grad_norm": 5.315404891967773, "learning_rate": 9.593739155248157e-06, "loss": 0.6944, "step": 12993 }, { "epoch": 0.1553581463193008, "grad_norm": 2.4404711723327637, "learning_rate": 9.593662703089603e-06, "loss": 0.6107, "step": 12994 }, { "epoch": 0.15537010246416147, "grad_norm": 2.6709601879119873, "learning_rate": 9.593586244042836e-06, "loss": 0.531, "step": 12995 }, { "epoch": 0.1553820586090221, "grad_norm": 3.4574201107025146, "learning_rate": 9.59350977810797e-06, "loss": 0.5299, "step": 12996 }, { "epoch": 0.15539401475388276, "grad_norm": 3.145177125930786, "learning_rate": 9.593433305285118e-06, "loss": 0.5678, "step": 12997 }, { "epoch": 0.15540597089874342, "grad_norm": 19.081729888916016, "learning_rate": 9.593356825574398e-06, "loss": 0.5936, "step": 12998 }, { "epoch": 0.15541792704360405, "grad_norm": 6.446173667907715, "learning_rate": 9.593280338975923e-06, "loss": 0.5604, "step": 12999 }, { "epoch": 0.1554298831884647, "grad_norm": 2.5452044010162354, "learning_rate": 9.593203845489807e-06, "loss": 0.6473, "step": 13000 }, { "epoch": 0.15544183933332537, "grad_norm": 7.099352836608887, "learning_rate": 9.593127345116168e-06, "loss": 0.6065, "step": 13001 }, { "epoch": 0.15545379547818602, "grad_norm": 3.1949923038482666, "learning_rate": 9.593050837855116e-06, "loss": 0.6852, "step": 13002 }, { "epoch": 0.15546575162304666, "grad_norm": 11.771021842956543, "learning_rate": 9.592974323706768e-06, "loss": 0.6817, "step": 13003 }, { "epoch": 0.1554777077679073, "grad_norm": 2.790745258331299, "learning_rate": 9.59289780267124e-06, "loss": 0.5932, "step": 13004 }, { "epoch": 0.15548966391276797, "grad_norm": 2.2607669830322266, "learning_rate": 9.592821274748643e-06, "loss": 0.653, "step": 13005 }, { "epoch": 0.15550162005762863, "grad_norm": 2.3340468406677246, "learning_rate": 9.592744739939096e-06, "loss": 0.6799, "step": 13006 }, { "epoch": 0.15551357620248926, "grad_norm": 1.9732974767684937, "learning_rate": 9.592668198242711e-06, "loss": 0.6671, "step": 13007 }, { "epoch": 0.15552553234734992, "grad_norm": 2.2506484985351562, "learning_rate": 9.592591649659606e-06, "loss": 0.7663, "step": 13008 }, { "epoch": 0.15553748849221058, "grad_norm": 5.849144458770752, "learning_rate": 9.592515094189892e-06, "loss": 0.6625, "step": 13009 }, { "epoch": 0.1555494446370712, "grad_norm": 2.9627747535705566, "learning_rate": 9.592438531833684e-06, "loss": 0.6715, "step": 13010 }, { "epoch": 0.15556140078193187, "grad_norm": 2.24944806098938, "learning_rate": 9.592361962591099e-06, "loss": 0.6244, "step": 13011 }, { "epoch": 0.15557335692679253, "grad_norm": 2.852543592453003, "learning_rate": 9.592285386462252e-06, "loss": 0.6721, "step": 13012 }, { "epoch": 0.1555853130716532, "grad_norm": 2.2283313274383545, "learning_rate": 9.592208803447256e-06, "loss": 0.6397, "step": 13013 }, { "epoch": 0.15559726921651382, "grad_norm": 2.0260565280914307, "learning_rate": 9.592132213546225e-06, "loss": 0.6273, "step": 13014 }, { "epoch": 0.15560922536137448, "grad_norm": 7.704102039337158, "learning_rate": 9.592055616759278e-06, "loss": 0.6252, "step": 13015 }, { "epoch": 0.15562118150623513, "grad_norm": 2.434912919998169, "learning_rate": 9.591979013086525e-06, "loss": 0.6573, "step": 13016 }, { "epoch": 0.1556331376510958, "grad_norm": 3.2180135250091553, "learning_rate": 9.591902402528084e-06, "loss": 0.6089, "step": 13017 }, { "epoch": 0.15564509379595642, "grad_norm": 2.125397205352783, "learning_rate": 9.59182578508407e-06, "loss": 0.6404, "step": 13018 }, { "epoch": 0.15565704994081708, "grad_norm": 5.702031135559082, "learning_rate": 9.591749160754596e-06, "loss": 0.6632, "step": 13019 }, { "epoch": 0.15566900608567774, "grad_norm": 3.2357749938964844, "learning_rate": 9.591672529539777e-06, "loss": 0.5945, "step": 13020 }, { "epoch": 0.15568096223053837, "grad_norm": 2.2987353801727295, "learning_rate": 9.591595891439731e-06, "loss": 0.6041, "step": 13021 }, { "epoch": 0.15569291837539903, "grad_norm": 2.0886902809143066, "learning_rate": 9.591519246454568e-06, "loss": 0.6874, "step": 13022 }, { "epoch": 0.1557048745202597, "grad_norm": 3.7073419094085693, "learning_rate": 9.591442594584407e-06, "loss": 0.6458, "step": 13023 }, { "epoch": 0.15571683066512035, "grad_norm": 3.0922434329986572, "learning_rate": 9.591365935829361e-06, "loss": 0.5423, "step": 13024 }, { "epoch": 0.15572878680998098, "grad_norm": 1.7422752380371094, "learning_rate": 9.591289270189548e-06, "loss": 0.633, "step": 13025 }, { "epoch": 0.15574074295484164, "grad_norm": 3.058929204940796, "learning_rate": 9.591212597665077e-06, "loss": 0.6107, "step": 13026 }, { "epoch": 0.1557526990997023, "grad_norm": 2.11299467086792, "learning_rate": 9.591135918256068e-06, "loss": 0.5315, "step": 13027 }, { "epoch": 0.15576465524456296, "grad_norm": 2.5819075107574463, "learning_rate": 9.591059231962634e-06, "loss": 0.6432, "step": 13028 }, { "epoch": 0.15577661138942359, "grad_norm": 2.3532092571258545, "learning_rate": 9.59098253878489e-06, "loss": 0.6762, "step": 13029 }, { "epoch": 0.15578856753428424, "grad_norm": 2.250527858734131, "learning_rate": 9.590905838722953e-06, "loss": 0.6562, "step": 13030 }, { "epoch": 0.1558005236791449, "grad_norm": 3.482696056365967, "learning_rate": 9.590829131776935e-06, "loss": 0.68, "step": 13031 }, { "epoch": 0.15581247982400553, "grad_norm": 3.168196439743042, "learning_rate": 9.590752417946951e-06, "loss": 0.6502, "step": 13032 }, { "epoch": 0.1558244359688662, "grad_norm": 2.846815586090088, "learning_rate": 9.59067569723312e-06, "loss": 0.6377, "step": 13033 }, { "epoch": 0.15583639211372685, "grad_norm": 3.2030491828918457, "learning_rate": 9.590598969635554e-06, "loss": 0.6545, "step": 13034 }, { "epoch": 0.1558483482585875, "grad_norm": 1.4844639301300049, "learning_rate": 9.590522235154367e-06, "loss": 0.5766, "step": 13035 }, { "epoch": 0.15586030440344814, "grad_norm": 1.9002031087875366, "learning_rate": 9.590445493789678e-06, "loss": 0.5922, "step": 13036 }, { "epoch": 0.1558722605483088, "grad_norm": 2.4859912395477295, "learning_rate": 9.590368745541598e-06, "loss": 0.5749, "step": 13037 }, { "epoch": 0.15588421669316946, "grad_norm": 2.19252610206604, "learning_rate": 9.590291990410244e-06, "loss": 0.5793, "step": 13038 }, { "epoch": 0.15589617283803012, "grad_norm": 4.158751010894775, "learning_rate": 9.59021522839573e-06, "loss": 0.6795, "step": 13039 }, { "epoch": 0.15590812898289075, "grad_norm": 3.6163370609283447, "learning_rate": 9.590138459498173e-06, "loss": 0.6023, "step": 13040 }, { "epoch": 0.1559200851277514, "grad_norm": 2.3086228370666504, "learning_rate": 9.590061683717687e-06, "loss": 0.5579, "step": 13041 }, { "epoch": 0.15593204127261207, "grad_norm": 2.595127582550049, "learning_rate": 9.589984901054387e-06, "loss": 0.6743, "step": 13042 }, { "epoch": 0.15594399741747272, "grad_norm": 2.0869429111480713, "learning_rate": 9.589908111508387e-06, "loss": 0.548, "step": 13043 }, { "epoch": 0.15595595356233335, "grad_norm": 3.574291706085205, "learning_rate": 9.589831315079806e-06, "loss": 0.5969, "step": 13044 }, { "epoch": 0.155967909707194, "grad_norm": 2.7707602977752686, "learning_rate": 9.589754511768757e-06, "loss": 0.5807, "step": 13045 }, { "epoch": 0.15597986585205467, "grad_norm": 8.59544563293457, "learning_rate": 9.589677701575352e-06, "loss": 0.6301, "step": 13046 }, { "epoch": 0.1559918219969153, "grad_norm": 2.384733200073242, "learning_rate": 9.58960088449971e-06, "loss": 0.7012, "step": 13047 }, { "epoch": 0.15600377814177596, "grad_norm": 2.088529109954834, "learning_rate": 9.589524060541947e-06, "loss": 0.625, "step": 13048 }, { "epoch": 0.15601573428663662, "grad_norm": 3.3860597610473633, "learning_rate": 9.589447229702173e-06, "loss": 0.6145, "step": 13049 }, { "epoch": 0.15602769043149728, "grad_norm": 9.118122100830078, "learning_rate": 9.589370391980508e-06, "loss": 0.7306, "step": 13050 }, { "epoch": 0.1560396465763579, "grad_norm": 2.2892544269561768, "learning_rate": 9.589293547377067e-06, "loss": 0.6414, "step": 13051 }, { "epoch": 0.15605160272121857, "grad_norm": 2.089625835418701, "learning_rate": 9.589216695891963e-06, "loss": 0.6275, "step": 13052 }, { "epoch": 0.15606355886607923, "grad_norm": 22.93037986755371, "learning_rate": 9.589139837525312e-06, "loss": 0.6182, "step": 13053 }, { "epoch": 0.15607551501093989, "grad_norm": 1.9868966341018677, "learning_rate": 9.58906297227723e-06, "loss": 0.5572, "step": 13054 }, { "epoch": 0.15608747115580052, "grad_norm": 2.555893898010254, "learning_rate": 9.588986100147832e-06, "loss": 0.5819, "step": 13055 }, { "epoch": 0.15609942730066118, "grad_norm": 2.5609447956085205, "learning_rate": 9.588909221137233e-06, "loss": 0.678, "step": 13056 }, { "epoch": 0.15611138344552183, "grad_norm": 1.7676750421524048, "learning_rate": 9.588832335245547e-06, "loss": 0.6822, "step": 13057 }, { "epoch": 0.15612333959038247, "grad_norm": 2.0079243183135986, "learning_rate": 9.588755442472892e-06, "loss": 0.6657, "step": 13058 }, { "epoch": 0.15613529573524312, "grad_norm": 1.9881559610366821, "learning_rate": 9.58867854281938e-06, "loss": 0.6596, "step": 13059 }, { "epoch": 0.15614725188010378, "grad_norm": 1.7735753059387207, "learning_rate": 9.58860163628513e-06, "loss": 0.5472, "step": 13060 }, { "epoch": 0.15615920802496444, "grad_norm": 23.152036666870117, "learning_rate": 9.588524722870255e-06, "loss": 0.5772, "step": 13061 }, { "epoch": 0.15617116416982507, "grad_norm": 16.630931854248047, "learning_rate": 9.588447802574871e-06, "loss": 0.6157, "step": 13062 }, { "epoch": 0.15618312031468573, "grad_norm": 1.555032730102539, "learning_rate": 9.588370875399094e-06, "loss": 0.5181, "step": 13063 }, { "epoch": 0.1561950764595464, "grad_norm": 10.273635864257812, "learning_rate": 9.588293941343039e-06, "loss": 0.7124, "step": 13064 }, { "epoch": 0.15620703260440705, "grad_norm": 3.6453347206115723, "learning_rate": 9.588217000406819e-06, "loss": 0.6692, "step": 13065 }, { "epoch": 0.15621898874926768, "grad_norm": 2.1356937885284424, "learning_rate": 9.588140052590553e-06, "loss": 0.6004, "step": 13066 }, { "epoch": 0.15623094489412834, "grad_norm": 4.349652290344238, "learning_rate": 9.588063097894353e-06, "loss": 0.6815, "step": 13067 }, { "epoch": 0.156242901038989, "grad_norm": 2.364058017730713, "learning_rate": 9.587986136318337e-06, "loss": 0.5833, "step": 13068 }, { "epoch": 0.15625485718384963, "grad_norm": 1.8954592943191528, "learning_rate": 9.58790916786262e-06, "loss": 0.6651, "step": 13069 }, { "epoch": 0.15626681332871029, "grad_norm": 2.8962202072143555, "learning_rate": 9.587832192527315e-06, "loss": 0.4929, "step": 13070 }, { "epoch": 0.15627876947357094, "grad_norm": 2.2242534160614014, "learning_rate": 9.587755210312541e-06, "loss": 0.6658, "step": 13071 }, { "epoch": 0.1562907256184316, "grad_norm": 2.0582549571990967, "learning_rate": 9.587678221218412e-06, "loss": 0.6472, "step": 13072 }, { "epoch": 0.15630268176329223, "grad_norm": 10.612227439880371, "learning_rate": 9.587601225245042e-06, "loss": 0.5515, "step": 13073 }, { "epoch": 0.1563146379081529, "grad_norm": 2.094815254211426, "learning_rate": 9.587524222392547e-06, "loss": 0.6059, "step": 13074 }, { "epoch": 0.15632659405301355, "grad_norm": 3.7907791137695312, "learning_rate": 9.587447212661044e-06, "loss": 0.7356, "step": 13075 }, { "epoch": 0.1563385501978742, "grad_norm": 3.9532971382141113, "learning_rate": 9.58737019605065e-06, "loss": 0.64, "step": 13076 }, { "epoch": 0.15635050634273484, "grad_norm": 1.899869441986084, "learning_rate": 9.587293172561475e-06, "loss": 0.6161, "step": 13077 }, { "epoch": 0.1563624624875955, "grad_norm": 7.504451751708984, "learning_rate": 9.587216142193637e-06, "loss": 0.6672, "step": 13078 }, { "epoch": 0.15637441863245616, "grad_norm": 7.54551887512207, "learning_rate": 9.587139104947257e-06, "loss": 0.6278, "step": 13079 }, { "epoch": 0.1563863747773168, "grad_norm": 1.7882436513900757, "learning_rate": 9.58706206082244e-06, "loss": 0.6736, "step": 13080 }, { "epoch": 0.15639833092217745, "grad_norm": 2.1792147159576416, "learning_rate": 9.58698500981931e-06, "loss": 0.5775, "step": 13081 }, { "epoch": 0.1564102870670381, "grad_norm": 3.034424304962158, "learning_rate": 9.58690795193798e-06, "loss": 0.6268, "step": 13082 }, { "epoch": 0.15642224321189876, "grad_norm": 8.68448257446289, "learning_rate": 9.586830887178565e-06, "loss": 0.623, "step": 13083 }, { "epoch": 0.1564341993567594, "grad_norm": 2.4607627391815186, "learning_rate": 9.586753815541178e-06, "loss": 0.5835, "step": 13084 }, { "epoch": 0.15644615550162005, "grad_norm": 2.261965274810791, "learning_rate": 9.58667673702594e-06, "loss": 0.667, "step": 13085 }, { "epoch": 0.1564581116464807, "grad_norm": 3.127131462097168, "learning_rate": 9.586599651632964e-06, "loss": 0.6253, "step": 13086 }, { "epoch": 0.15647006779134137, "grad_norm": 6.040380954742432, "learning_rate": 9.586522559362363e-06, "loss": 0.6423, "step": 13087 }, { "epoch": 0.156482023936202, "grad_norm": 2.783673048019409, "learning_rate": 9.586445460214257e-06, "loss": 0.6505, "step": 13088 }, { "epoch": 0.15649398008106266, "grad_norm": 4.462122917175293, "learning_rate": 9.586368354188761e-06, "loss": 0.6957, "step": 13089 }, { "epoch": 0.15650593622592332, "grad_norm": 6.816770553588867, "learning_rate": 9.586291241285987e-06, "loss": 0.5895, "step": 13090 }, { "epoch": 0.15651789237078395, "grad_norm": 2.249021530151367, "learning_rate": 9.586214121506054e-06, "loss": 0.6312, "step": 13091 }, { "epoch": 0.1565298485156446, "grad_norm": 8.26220989227295, "learning_rate": 9.586136994849078e-06, "loss": 0.6505, "step": 13092 }, { "epoch": 0.15654180466050527, "grad_norm": 2.9874701499938965, "learning_rate": 9.58605986131517e-06, "loss": 0.5706, "step": 13093 }, { "epoch": 0.15655376080536593, "grad_norm": 2.3444623947143555, "learning_rate": 9.585982720904453e-06, "loss": 0.5297, "step": 13094 }, { "epoch": 0.15656571695022656, "grad_norm": 2.868016242980957, "learning_rate": 9.585905573617035e-06, "loss": 0.6885, "step": 13095 }, { "epoch": 0.15657767309508722, "grad_norm": 2.1917831897735596, "learning_rate": 9.585828419453039e-06, "loss": 0.5972, "step": 13096 }, { "epoch": 0.15658962923994788, "grad_norm": 2.701911449432373, "learning_rate": 9.585751258412573e-06, "loss": 0.5823, "step": 13097 }, { "epoch": 0.15660158538480853, "grad_norm": 3.426340103149414, "learning_rate": 9.585674090495758e-06, "loss": 0.703, "step": 13098 }, { "epoch": 0.15661354152966916, "grad_norm": 2.3217697143554688, "learning_rate": 9.585596915702709e-06, "loss": 0.6822, "step": 13099 }, { "epoch": 0.15662549767452982, "grad_norm": 3.170504331588745, "learning_rate": 9.585519734033541e-06, "loss": 0.5659, "step": 13100 }, { "epoch": 0.15663745381939048, "grad_norm": 3.131819248199463, "learning_rate": 9.58544254548837e-06, "loss": 0.5731, "step": 13101 }, { "epoch": 0.15664940996425114, "grad_norm": 2.238642692565918, "learning_rate": 9.585365350067313e-06, "loss": 0.6408, "step": 13102 }, { "epoch": 0.15666136610911177, "grad_norm": 3.5897762775421143, "learning_rate": 9.585288147770481e-06, "loss": 0.6781, "step": 13103 }, { "epoch": 0.15667332225397243, "grad_norm": 4.338995933532715, "learning_rate": 9.585210938597995e-06, "loss": 0.5751, "step": 13104 }, { "epoch": 0.1566852783988331, "grad_norm": 2.919553518295288, "learning_rate": 9.58513372254997e-06, "loss": 0.566, "step": 13105 }, { "epoch": 0.15669723454369372, "grad_norm": 1.9939466714859009, "learning_rate": 9.58505649962652e-06, "loss": 0.6682, "step": 13106 }, { "epoch": 0.15670919068855438, "grad_norm": 2.291443109512329, "learning_rate": 9.58497926982776e-06, "loss": 0.6043, "step": 13107 }, { "epoch": 0.15672114683341504, "grad_norm": 3.9779293537139893, "learning_rate": 9.584902033153809e-06, "loss": 0.5645, "step": 13108 }, { "epoch": 0.1567331029782757, "grad_norm": 3.1529316902160645, "learning_rate": 9.58482478960478e-06, "loss": 0.5713, "step": 13109 }, { "epoch": 0.15674505912313633, "grad_norm": 2.2896647453308105, "learning_rate": 9.58474753918079e-06, "loss": 0.659, "step": 13110 }, { "epoch": 0.15675701526799699, "grad_norm": 3.124124765396118, "learning_rate": 9.584670281881954e-06, "loss": 0.5767, "step": 13111 }, { "epoch": 0.15676897141285764, "grad_norm": 1.725252628326416, "learning_rate": 9.58459301770839e-06, "loss": 0.6119, "step": 13112 }, { "epoch": 0.1567809275577183, "grad_norm": 2.4169631004333496, "learning_rate": 9.584515746660213e-06, "loss": 0.6409, "step": 13113 }, { "epoch": 0.15679288370257893, "grad_norm": 2.2332613468170166, "learning_rate": 9.584438468737536e-06, "loss": 0.5985, "step": 13114 }, { "epoch": 0.1568048398474396, "grad_norm": 4.479870796203613, "learning_rate": 9.58436118394048e-06, "loss": 0.6576, "step": 13115 }, { "epoch": 0.15681679599230025, "grad_norm": 1.5377743244171143, "learning_rate": 9.584283892269158e-06, "loss": 0.6335, "step": 13116 }, { "epoch": 0.15682875213716088, "grad_norm": 3.3466684818267822, "learning_rate": 9.584206593723684e-06, "loss": 0.6214, "step": 13117 }, { "epoch": 0.15684070828202154, "grad_norm": 3.3421883583068848, "learning_rate": 9.584129288304175e-06, "loss": 0.7538, "step": 13118 }, { "epoch": 0.1568526644268822, "grad_norm": 4.689743518829346, "learning_rate": 9.58405197601075e-06, "loss": 0.6804, "step": 13119 }, { "epoch": 0.15686462057174286, "grad_norm": 7.369133949279785, "learning_rate": 9.583974656843522e-06, "loss": 0.5868, "step": 13120 }, { "epoch": 0.1568765767166035, "grad_norm": 2.3710007667541504, "learning_rate": 9.583897330802609e-06, "loss": 0.5145, "step": 13121 }, { "epoch": 0.15688853286146415, "grad_norm": 2.3109397888183594, "learning_rate": 9.583819997888126e-06, "loss": 0.6089, "step": 13122 }, { "epoch": 0.1569004890063248, "grad_norm": 2.9306185245513916, "learning_rate": 9.583742658100186e-06, "loss": 0.544, "step": 13123 }, { "epoch": 0.15691244515118546, "grad_norm": 2.1782450675964355, "learning_rate": 9.58366531143891e-06, "loss": 0.6912, "step": 13124 }, { "epoch": 0.1569244012960461, "grad_norm": 4.411847114562988, "learning_rate": 9.58358795790441e-06, "loss": 0.7097, "step": 13125 }, { "epoch": 0.15693635744090675, "grad_norm": 2.837520122528076, "learning_rate": 9.583510597496803e-06, "loss": 0.6773, "step": 13126 }, { "epoch": 0.1569483135857674, "grad_norm": 2.278587818145752, "learning_rate": 9.583433230216209e-06, "loss": 0.5928, "step": 13127 }, { "epoch": 0.15696026973062804, "grad_norm": 3.3933260440826416, "learning_rate": 9.583355856062738e-06, "loss": 0.5982, "step": 13128 }, { "epoch": 0.1569722258754887, "grad_norm": 3.213801860809326, "learning_rate": 9.583278475036509e-06, "loss": 0.6715, "step": 13129 }, { "epoch": 0.15698418202034936, "grad_norm": 5.265436172485352, "learning_rate": 9.583201087137636e-06, "loss": 0.5957, "step": 13130 }, { "epoch": 0.15699613816521002, "grad_norm": 2.2309327125549316, "learning_rate": 9.583123692366238e-06, "loss": 0.693, "step": 13131 }, { "epoch": 0.15700809431007065, "grad_norm": 2.8264000415802, "learning_rate": 9.583046290722431e-06, "loss": 0.5772, "step": 13132 }, { "epoch": 0.1570200504549313, "grad_norm": 2.3390846252441406, "learning_rate": 9.582968882206328e-06, "loss": 0.5681, "step": 13133 }, { "epoch": 0.15703200659979197, "grad_norm": 3.4615466594696045, "learning_rate": 9.582891466818048e-06, "loss": 0.5828, "step": 13134 }, { "epoch": 0.15704396274465263, "grad_norm": 1.7485851049423218, "learning_rate": 9.582814044557705e-06, "loss": 0.6598, "step": 13135 }, { "epoch": 0.15705591888951326, "grad_norm": 1.6617088317871094, "learning_rate": 9.582736615425416e-06, "loss": 0.5743, "step": 13136 }, { "epoch": 0.15706787503437392, "grad_norm": 2.248060464859009, "learning_rate": 9.582659179421299e-06, "loss": 0.6073, "step": 13137 }, { "epoch": 0.15707983117923457, "grad_norm": 2.400238513946533, "learning_rate": 9.582581736545467e-06, "loss": 0.5777, "step": 13138 }, { "epoch": 0.1570917873240952, "grad_norm": 5.710100173950195, "learning_rate": 9.582504286798037e-06, "loss": 0.5923, "step": 13139 }, { "epoch": 0.15710374346895586, "grad_norm": 5.02943754196167, "learning_rate": 9.582426830179126e-06, "loss": 0.5671, "step": 13140 }, { "epoch": 0.15711569961381652, "grad_norm": 3.5428032875061035, "learning_rate": 9.582349366688849e-06, "loss": 0.6351, "step": 13141 }, { "epoch": 0.15712765575867718, "grad_norm": 2.887615919113159, "learning_rate": 9.582271896327324e-06, "loss": 0.6863, "step": 13142 }, { "epoch": 0.1571396119035378, "grad_norm": 2.235870361328125, "learning_rate": 9.582194419094665e-06, "loss": 0.6746, "step": 13143 }, { "epoch": 0.15715156804839847, "grad_norm": 2.555677652359009, "learning_rate": 9.58211693499099e-06, "loss": 0.6616, "step": 13144 }, { "epoch": 0.15716352419325913, "grad_norm": 5.829361915588379, "learning_rate": 9.582039444016413e-06, "loss": 0.6815, "step": 13145 }, { "epoch": 0.1571754803381198, "grad_norm": 2.670197010040283, "learning_rate": 9.581961946171053e-06, "loss": 0.661, "step": 13146 }, { "epoch": 0.15718743648298042, "grad_norm": 2.554305076599121, "learning_rate": 9.581884441455026e-06, "loss": 0.5465, "step": 13147 }, { "epoch": 0.15719939262784108, "grad_norm": 2.236267328262329, "learning_rate": 9.581806929868443e-06, "loss": 0.6419, "step": 13148 }, { "epoch": 0.15721134877270174, "grad_norm": 3.543706178665161, "learning_rate": 9.581729411411427e-06, "loss": 0.6189, "step": 13149 }, { "epoch": 0.15722330491756237, "grad_norm": 2.6300806999206543, "learning_rate": 9.581651886084091e-06, "loss": 0.7041, "step": 13150 }, { "epoch": 0.15723526106242303, "grad_norm": 2.3050355911254883, "learning_rate": 9.58157435388655e-06, "loss": 0.6867, "step": 13151 }, { "epoch": 0.15724721720728368, "grad_norm": 3.607758045196533, "learning_rate": 9.581496814818924e-06, "loss": 0.6234, "step": 13152 }, { "epoch": 0.15725917335214434, "grad_norm": 2.7240989208221436, "learning_rate": 9.581419268881329e-06, "loss": 0.5946, "step": 13153 }, { "epoch": 0.15727112949700497, "grad_norm": 2.6609833240509033, "learning_rate": 9.581341716073878e-06, "loss": 0.6441, "step": 13154 }, { "epoch": 0.15728308564186563, "grad_norm": 2.97282338142395, "learning_rate": 9.581264156396688e-06, "loss": 0.6985, "step": 13155 }, { "epoch": 0.1572950417867263, "grad_norm": 1.7914729118347168, "learning_rate": 9.581186589849876e-06, "loss": 0.6246, "step": 13156 }, { "epoch": 0.15730699793158695, "grad_norm": 1.4120460748672485, "learning_rate": 9.581109016433559e-06, "loss": 0.5903, "step": 13157 }, { "epoch": 0.15731895407644758, "grad_norm": 1.566929578781128, "learning_rate": 9.581031436147853e-06, "loss": 0.4924, "step": 13158 }, { "epoch": 0.15733091022130824, "grad_norm": 2.8614661693573, "learning_rate": 9.580953848992871e-06, "loss": 0.6258, "step": 13159 }, { "epoch": 0.1573428663661689, "grad_norm": 1.5026535987854004, "learning_rate": 9.580876254968736e-06, "loss": 0.5841, "step": 13160 }, { "epoch": 0.15735482251102956, "grad_norm": 1.8947070837020874, "learning_rate": 9.58079865407556e-06, "loss": 0.5855, "step": 13161 }, { "epoch": 0.1573667786558902, "grad_norm": 1.8199809789657593, "learning_rate": 9.58072104631346e-06, "loss": 0.6069, "step": 13162 }, { "epoch": 0.15737873480075085, "grad_norm": 2.9294369220733643, "learning_rate": 9.580643431682553e-06, "loss": 0.6059, "step": 13163 }, { "epoch": 0.1573906909456115, "grad_norm": 2.109035015106201, "learning_rate": 9.580565810182954e-06, "loss": 0.5724, "step": 13164 }, { "epoch": 0.15740264709047214, "grad_norm": 4.660819053649902, "learning_rate": 9.58048818181478e-06, "loss": 0.512, "step": 13165 }, { "epoch": 0.1574146032353328, "grad_norm": 2.064845323562622, "learning_rate": 9.58041054657815e-06, "loss": 0.6094, "step": 13166 }, { "epoch": 0.15742655938019345, "grad_norm": 4.323362827301025, "learning_rate": 9.580332904473175e-06, "loss": 0.5081, "step": 13167 }, { "epoch": 0.1574385155250541, "grad_norm": 2.660881996154785, "learning_rate": 9.580255255499976e-06, "loss": 0.5967, "step": 13168 }, { "epoch": 0.15745047166991474, "grad_norm": 2.4772002696990967, "learning_rate": 9.580177599658668e-06, "loss": 0.521, "step": 13169 }, { "epoch": 0.1574624278147754, "grad_norm": 5.854859828948975, "learning_rate": 9.580099936949368e-06, "loss": 0.5149, "step": 13170 }, { "epoch": 0.15747438395963606, "grad_norm": 1.9202672243118286, "learning_rate": 9.58002226737219e-06, "loss": 0.6171, "step": 13171 }, { "epoch": 0.15748634010449672, "grad_norm": 3.6688477993011475, "learning_rate": 9.579944590927255e-06, "loss": 0.5693, "step": 13172 }, { "epoch": 0.15749829624935735, "grad_norm": 2.2274062633514404, "learning_rate": 9.579866907614675e-06, "loss": 0.6316, "step": 13173 }, { "epoch": 0.157510252394218, "grad_norm": 5.263210773468018, "learning_rate": 9.579789217434568e-06, "loss": 0.6051, "step": 13174 }, { "epoch": 0.15752220853907867, "grad_norm": 3.1169910430908203, "learning_rate": 9.579711520387052e-06, "loss": 0.5969, "step": 13175 }, { "epoch": 0.1575341646839393, "grad_norm": 12.613615989685059, "learning_rate": 9.579633816472242e-06, "loss": 0.5744, "step": 13176 }, { "epoch": 0.15754612082879996, "grad_norm": 3.8958592414855957, "learning_rate": 9.579556105690252e-06, "loss": 0.6346, "step": 13177 }, { "epoch": 0.15755807697366062, "grad_norm": 2.1509218215942383, "learning_rate": 9.579478388041206e-06, "loss": 0.5522, "step": 13178 }, { "epoch": 0.15757003311852127, "grad_norm": 12.114920616149902, "learning_rate": 9.579400663525214e-06, "loss": 0.592, "step": 13179 }, { "epoch": 0.1575819892633819, "grad_norm": 3.2918708324432373, "learning_rate": 9.579322932142394e-06, "loss": 0.6095, "step": 13180 }, { "epoch": 0.15759394540824256, "grad_norm": 2.344187021255493, "learning_rate": 9.579245193892863e-06, "loss": 0.6179, "step": 13181 }, { "epoch": 0.15760590155310322, "grad_norm": 2.3519318103790283, "learning_rate": 9.579167448776737e-06, "loss": 0.6072, "step": 13182 }, { "epoch": 0.15761785769796388, "grad_norm": 1.969417691230774, "learning_rate": 9.579089696794133e-06, "loss": 0.5888, "step": 13183 }, { "epoch": 0.1576298138428245, "grad_norm": 2.365570306777954, "learning_rate": 9.579011937945169e-06, "loss": 0.6422, "step": 13184 }, { "epoch": 0.15764176998768517, "grad_norm": 1.8282564878463745, "learning_rate": 9.57893417222996e-06, "loss": 0.6758, "step": 13185 }, { "epoch": 0.15765372613254583, "grad_norm": 2.1678926944732666, "learning_rate": 9.578856399648624e-06, "loss": 0.6456, "step": 13186 }, { "epoch": 0.15766568227740646, "grad_norm": 1.8287101984024048, "learning_rate": 9.578778620201275e-06, "loss": 0.6606, "step": 13187 }, { "epoch": 0.15767763842226712, "grad_norm": 3.056138277053833, "learning_rate": 9.578700833888031e-06, "loss": 0.6535, "step": 13188 }, { "epoch": 0.15768959456712778, "grad_norm": 4.113909721374512, "learning_rate": 9.57862304070901e-06, "loss": 0.6252, "step": 13189 }, { "epoch": 0.15770155071198844, "grad_norm": 4.087114334106445, "learning_rate": 9.578545240664327e-06, "loss": 0.6198, "step": 13190 }, { "epoch": 0.15771350685684907, "grad_norm": 2.970133066177368, "learning_rate": 9.578467433754098e-06, "loss": 0.5687, "step": 13191 }, { "epoch": 0.15772546300170973, "grad_norm": 1.9166159629821777, "learning_rate": 9.578389619978442e-06, "loss": 0.5838, "step": 13192 }, { "epoch": 0.15773741914657038, "grad_norm": 2.002525806427002, "learning_rate": 9.578311799337474e-06, "loss": 0.6547, "step": 13193 }, { "epoch": 0.15774937529143104, "grad_norm": 1.8531088829040527, "learning_rate": 9.578233971831312e-06, "loss": 0.6889, "step": 13194 }, { "epoch": 0.15776133143629167, "grad_norm": 2.871544361114502, "learning_rate": 9.57815613746007e-06, "loss": 0.6347, "step": 13195 }, { "epoch": 0.15777328758115233, "grad_norm": 3.479677677154541, "learning_rate": 9.57807829622387e-06, "loss": 0.6799, "step": 13196 }, { "epoch": 0.157785243726013, "grad_norm": 1.8605685234069824, "learning_rate": 9.578000448122824e-06, "loss": 0.6278, "step": 13197 }, { "epoch": 0.15779719987087362, "grad_norm": 2.084010124206543, "learning_rate": 9.577922593157048e-06, "loss": 0.5735, "step": 13198 }, { "epoch": 0.15780915601573428, "grad_norm": 2.2933924198150635, "learning_rate": 9.577844731326661e-06, "loss": 0.6822, "step": 13199 }, { "epoch": 0.15782111216059494, "grad_norm": 1.898510456085205, "learning_rate": 9.577766862631783e-06, "loss": 0.5923, "step": 13200 }, { "epoch": 0.1578330683054556, "grad_norm": 3.006042718887329, "learning_rate": 9.577688987072524e-06, "loss": 0.641, "step": 13201 }, { "epoch": 0.15784502445031623, "grad_norm": 1.685899257659912, "learning_rate": 9.577611104649004e-06, "loss": 0.6948, "step": 13202 }, { "epoch": 0.1578569805951769, "grad_norm": 6.652050018310547, "learning_rate": 9.577533215361342e-06, "loss": 0.5596, "step": 13203 }, { "epoch": 0.15786893674003755, "grad_norm": 2.481166362762451, "learning_rate": 9.577455319209651e-06, "loss": 0.6428, "step": 13204 }, { "epoch": 0.1578808928848982, "grad_norm": 5.567988395690918, "learning_rate": 9.577377416194051e-06, "loss": 0.6048, "step": 13205 }, { "epoch": 0.15789284902975884, "grad_norm": 1.671281337738037, "learning_rate": 9.577299506314657e-06, "loss": 0.668, "step": 13206 }, { "epoch": 0.1579048051746195, "grad_norm": 7.32910680770874, "learning_rate": 9.577221589571585e-06, "loss": 0.5896, "step": 13207 }, { "epoch": 0.15791676131948015, "grad_norm": 2.273042917251587, "learning_rate": 9.577143665964953e-06, "loss": 0.6262, "step": 13208 }, { "epoch": 0.15792871746434078, "grad_norm": 3.0308213233947754, "learning_rate": 9.577065735494879e-06, "loss": 0.6005, "step": 13209 }, { "epoch": 0.15794067360920144, "grad_norm": 2.0580976009368896, "learning_rate": 9.576987798161477e-06, "loss": 0.5917, "step": 13210 }, { "epoch": 0.1579526297540621, "grad_norm": 5.231498718261719, "learning_rate": 9.576909853964866e-06, "loss": 0.6427, "step": 13211 }, { "epoch": 0.15796458589892276, "grad_norm": 2.157580852508545, "learning_rate": 9.576831902905164e-06, "loss": 0.6262, "step": 13212 }, { "epoch": 0.1579765420437834, "grad_norm": 3.096923828125, "learning_rate": 9.576753944982485e-06, "loss": 0.6368, "step": 13213 }, { "epoch": 0.15798849818864405, "grad_norm": 2.946458578109741, "learning_rate": 9.576675980196948e-06, "loss": 0.5862, "step": 13214 }, { "epoch": 0.1580004543335047, "grad_norm": 1.8124642372131348, "learning_rate": 9.576598008548668e-06, "loss": 0.6397, "step": 13215 }, { "epoch": 0.15801241047836537, "grad_norm": 2.415386199951172, "learning_rate": 9.576520030037764e-06, "loss": 0.6092, "step": 13216 }, { "epoch": 0.158024366623226, "grad_norm": 3.8905751705169678, "learning_rate": 9.57644204466435e-06, "loss": 0.732, "step": 13217 }, { "epoch": 0.15803632276808666, "grad_norm": 4.046450614929199, "learning_rate": 9.576364052428548e-06, "loss": 0.7204, "step": 13218 }, { "epoch": 0.15804827891294732, "grad_norm": 1.9293975830078125, "learning_rate": 9.576286053330468e-06, "loss": 0.573, "step": 13219 }, { "epoch": 0.15806023505780797, "grad_norm": 3.0470504760742188, "learning_rate": 9.576208047370235e-06, "loss": 0.6674, "step": 13220 }, { "epoch": 0.1580721912026686, "grad_norm": 2.60625958442688, "learning_rate": 9.576130034547958e-06, "loss": 0.5637, "step": 13221 }, { "epoch": 0.15808414734752926, "grad_norm": 5.446976184844971, "learning_rate": 9.57605201486376e-06, "loss": 0.5476, "step": 13222 }, { "epoch": 0.15809610349238992, "grad_norm": 4.43943452835083, "learning_rate": 9.575973988317755e-06, "loss": 0.5842, "step": 13223 }, { "epoch": 0.15810805963725055, "grad_norm": 1.9058623313903809, "learning_rate": 9.575895954910061e-06, "loss": 0.5752, "step": 13224 }, { "epoch": 0.1581200157821112, "grad_norm": 8.713643074035645, "learning_rate": 9.575817914640795e-06, "loss": 0.6823, "step": 13225 }, { "epoch": 0.15813197192697187, "grad_norm": 2.968496322631836, "learning_rate": 9.575739867510074e-06, "loss": 0.6419, "step": 13226 }, { "epoch": 0.15814392807183253, "grad_norm": 1.725376844406128, "learning_rate": 9.575661813518014e-06, "loss": 0.5976, "step": 13227 }, { "epoch": 0.15815588421669316, "grad_norm": 4.561502456665039, "learning_rate": 9.575583752664733e-06, "loss": 0.5324, "step": 13228 }, { "epoch": 0.15816784036155382, "grad_norm": 3.6026461124420166, "learning_rate": 9.575505684950349e-06, "loss": 0.6007, "step": 13229 }, { "epoch": 0.15817979650641448, "grad_norm": 2.147075891494751, "learning_rate": 9.575427610374976e-06, "loss": 0.5488, "step": 13230 }, { "epoch": 0.15819175265127514, "grad_norm": 2.0394670963287354, "learning_rate": 9.575349528938735e-06, "loss": 0.6034, "step": 13231 }, { "epoch": 0.15820370879613577, "grad_norm": 2.801482677459717, "learning_rate": 9.57527144064174e-06, "loss": 0.688, "step": 13232 }, { "epoch": 0.15821566494099643, "grad_norm": 2.4807372093200684, "learning_rate": 9.575193345484109e-06, "loss": 0.6592, "step": 13233 }, { "epoch": 0.15822762108585708, "grad_norm": 2.7048540115356445, "learning_rate": 9.57511524346596e-06, "loss": 0.5821, "step": 13234 }, { "epoch": 0.15823957723071772, "grad_norm": 2.0657455921173096, "learning_rate": 9.575037134587408e-06, "loss": 0.6064, "step": 13235 }, { "epoch": 0.15825153337557837, "grad_norm": 2.248094081878662, "learning_rate": 9.574959018848574e-06, "loss": 0.5078, "step": 13236 }, { "epoch": 0.15826348952043903, "grad_norm": 6.820540904998779, "learning_rate": 9.57488089624957e-06, "loss": 0.6482, "step": 13237 }, { "epoch": 0.1582754456652997, "grad_norm": 2.9066944122314453, "learning_rate": 9.574802766790517e-06, "loss": 0.6621, "step": 13238 }, { "epoch": 0.15828740181016032, "grad_norm": 2.3223469257354736, "learning_rate": 9.574724630471533e-06, "loss": 0.6732, "step": 13239 }, { "epoch": 0.15829935795502098, "grad_norm": 2.8908584117889404, "learning_rate": 9.57464648729273e-06, "loss": 0.5711, "step": 13240 }, { "epoch": 0.15831131409988164, "grad_norm": 2.1706879138946533, "learning_rate": 9.574568337254229e-06, "loss": 0.6449, "step": 13241 }, { "epoch": 0.1583232702447423, "grad_norm": 2.7391610145568848, "learning_rate": 9.57449018035615e-06, "loss": 0.6368, "step": 13242 }, { "epoch": 0.15833522638960293, "grad_norm": 2.5289158821105957, "learning_rate": 9.574412016598602e-06, "loss": 0.5834, "step": 13243 }, { "epoch": 0.1583471825344636, "grad_norm": 21.001501083374023, "learning_rate": 9.57433384598171e-06, "loss": 0.6388, "step": 13244 }, { "epoch": 0.15835913867932425, "grad_norm": 2.4689178466796875, "learning_rate": 9.574255668505586e-06, "loss": 0.5455, "step": 13245 }, { "epoch": 0.15837109482418488, "grad_norm": 7.762134552001953, "learning_rate": 9.57417748417035e-06, "loss": 0.5913, "step": 13246 }, { "epoch": 0.15838305096904554, "grad_norm": 2.177762031555176, "learning_rate": 9.574099292976119e-06, "loss": 0.5727, "step": 13247 }, { "epoch": 0.1583950071139062, "grad_norm": 1.9645155668258667, "learning_rate": 9.574021094923009e-06, "loss": 0.5792, "step": 13248 }, { "epoch": 0.15840696325876685, "grad_norm": 2.3265559673309326, "learning_rate": 9.573942890011138e-06, "loss": 0.6463, "step": 13249 }, { "epoch": 0.15841891940362748, "grad_norm": 7.33138370513916, "learning_rate": 9.573864678240626e-06, "loss": 0.7058, "step": 13250 }, { "epoch": 0.15843087554848814, "grad_norm": 3.679978132247925, "learning_rate": 9.573786459611585e-06, "loss": 0.5953, "step": 13251 }, { "epoch": 0.1584428316933488, "grad_norm": 3.992042064666748, "learning_rate": 9.573708234124136e-06, "loss": 0.5978, "step": 13252 }, { "epoch": 0.15845478783820946, "grad_norm": 3.9189441204071045, "learning_rate": 9.573630001778396e-06, "loss": 0.5173, "step": 13253 }, { "epoch": 0.1584667439830701, "grad_norm": 6.459835052490234, "learning_rate": 9.573551762574481e-06, "loss": 0.5877, "step": 13254 }, { "epoch": 0.15847870012793075, "grad_norm": 3.1456193923950195, "learning_rate": 9.573473516512507e-06, "loss": 0.6101, "step": 13255 }, { "epoch": 0.1584906562727914, "grad_norm": 2.9591033458709717, "learning_rate": 9.573395263592596e-06, "loss": 0.6672, "step": 13256 }, { "epoch": 0.15850261241765204, "grad_norm": 2.2301948070526123, "learning_rate": 9.573317003814862e-06, "loss": 0.5915, "step": 13257 }, { "epoch": 0.1585145685625127, "grad_norm": 2.66774320602417, "learning_rate": 9.57323873717942e-06, "loss": 0.5962, "step": 13258 }, { "epoch": 0.15852652470737336, "grad_norm": 2.948944568634033, "learning_rate": 9.573160463686393e-06, "loss": 0.66, "step": 13259 }, { "epoch": 0.15853848085223401, "grad_norm": 2.547302484512329, "learning_rate": 9.573082183335896e-06, "loss": 0.6666, "step": 13260 }, { "epoch": 0.15855043699709465, "grad_norm": 3.3163821697235107, "learning_rate": 9.573003896128045e-06, "loss": 0.5852, "step": 13261 }, { "epoch": 0.1585623931419553, "grad_norm": 3.868532419204712, "learning_rate": 9.572925602062958e-06, "loss": 0.6217, "step": 13262 }, { "epoch": 0.15857434928681596, "grad_norm": 3.9184913635253906, "learning_rate": 9.572847301140753e-06, "loss": 0.6018, "step": 13263 }, { "epoch": 0.15858630543167662, "grad_norm": 3.0999579429626465, "learning_rate": 9.572768993361548e-06, "loss": 0.5965, "step": 13264 }, { "epoch": 0.15859826157653725, "grad_norm": 2.9199745655059814, "learning_rate": 9.572690678725458e-06, "loss": 0.6009, "step": 13265 }, { "epoch": 0.1586102177213979, "grad_norm": 4.424691677093506, "learning_rate": 9.572612357232605e-06, "loss": 0.6298, "step": 13266 }, { "epoch": 0.15862217386625857, "grad_norm": 1.9819406270980835, "learning_rate": 9.5725340288831e-06, "loss": 0.5376, "step": 13267 }, { "epoch": 0.1586341300111192, "grad_norm": 6.014241695404053, "learning_rate": 9.572455693677067e-06, "loss": 0.6354, "step": 13268 }, { "epoch": 0.15864608615597986, "grad_norm": 2.171082019805908, "learning_rate": 9.572377351614619e-06, "loss": 0.6195, "step": 13269 }, { "epoch": 0.15865804230084052, "grad_norm": 2.0178253650665283, "learning_rate": 9.572299002695876e-06, "loss": 0.5639, "step": 13270 }, { "epoch": 0.15866999844570118, "grad_norm": 2.4344794750213623, "learning_rate": 9.572220646920952e-06, "loss": 0.557, "step": 13271 }, { "epoch": 0.1586819545905618, "grad_norm": 2.0634021759033203, "learning_rate": 9.572142284289969e-06, "loss": 0.5547, "step": 13272 }, { "epoch": 0.15869391073542247, "grad_norm": 3.8687987327575684, "learning_rate": 9.572063914803042e-06, "loss": 0.704, "step": 13273 }, { "epoch": 0.15870586688028313, "grad_norm": 4.953826904296875, "learning_rate": 9.571985538460288e-06, "loss": 0.6191, "step": 13274 }, { "epoch": 0.15871782302514378, "grad_norm": 1.9126770496368408, "learning_rate": 9.571907155261825e-06, "loss": 0.5437, "step": 13275 }, { "epoch": 0.15872977917000441, "grad_norm": 3.3139023780822754, "learning_rate": 9.571828765207774e-06, "loss": 0.7075, "step": 13276 }, { "epoch": 0.15874173531486507, "grad_norm": 2.771362781524658, "learning_rate": 9.571750368298247e-06, "loss": 0.6832, "step": 13277 }, { "epoch": 0.15875369145972573, "grad_norm": 1.973619818687439, "learning_rate": 9.571671964533364e-06, "loss": 0.6765, "step": 13278 }, { "epoch": 0.1587656476045864, "grad_norm": 1.7661672830581665, "learning_rate": 9.571593553913243e-06, "loss": 0.5462, "step": 13279 }, { "epoch": 0.15877760374944702, "grad_norm": 1.9699207544326782, "learning_rate": 9.571515136438001e-06, "loss": 0.5499, "step": 13280 }, { "epoch": 0.15878955989430768, "grad_norm": 2.5639586448669434, "learning_rate": 9.571436712107756e-06, "loss": 0.6269, "step": 13281 }, { "epoch": 0.15880151603916834, "grad_norm": 2.6741859912872314, "learning_rate": 9.571358280922627e-06, "loss": 0.6654, "step": 13282 }, { "epoch": 0.15881347218402897, "grad_norm": 2.701911687850952, "learning_rate": 9.571279842882728e-06, "loss": 0.7062, "step": 13283 }, { "epoch": 0.15882542832888963, "grad_norm": 3.7468135356903076, "learning_rate": 9.57120139798818e-06, "loss": 0.5964, "step": 13284 }, { "epoch": 0.1588373844737503, "grad_norm": 3.919639825820923, "learning_rate": 9.571122946239097e-06, "loss": 0.6166, "step": 13285 }, { "epoch": 0.15884934061861095, "grad_norm": 4.357755184173584, "learning_rate": 9.571044487635601e-06, "loss": 0.614, "step": 13286 }, { "epoch": 0.15886129676347158, "grad_norm": 2.213022232055664, "learning_rate": 9.570966022177808e-06, "loss": 0.6121, "step": 13287 }, { "epoch": 0.15887325290833224, "grad_norm": 2.022224187850952, "learning_rate": 9.570887549865833e-06, "loss": 0.645, "step": 13288 }, { "epoch": 0.1588852090531929, "grad_norm": 4.225819110870361, "learning_rate": 9.570809070699798e-06, "loss": 0.5713, "step": 13289 }, { "epoch": 0.15889716519805355, "grad_norm": 2.0381882190704346, "learning_rate": 9.570730584679816e-06, "loss": 0.5873, "step": 13290 }, { "epoch": 0.15890912134291418, "grad_norm": 6.716598033905029, "learning_rate": 9.57065209180601e-06, "loss": 0.6319, "step": 13291 }, { "epoch": 0.15892107748777484, "grad_norm": 1.673902153968811, "learning_rate": 9.570573592078494e-06, "loss": 0.5695, "step": 13292 }, { "epoch": 0.1589330336326355, "grad_norm": 5.417562007904053, "learning_rate": 9.570495085497387e-06, "loss": 0.5606, "step": 13293 }, { "epoch": 0.15894498977749613, "grad_norm": 3.385524034500122, "learning_rate": 9.570416572062806e-06, "loss": 0.7319, "step": 13294 }, { "epoch": 0.1589569459223568, "grad_norm": 3.6093170642852783, "learning_rate": 9.57033805177487e-06, "loss": 0.5653, "step": 13295 }, { "epoch": 0.15896890206721745, "grad_norm": 2.587073802947998, "learning_rate": 9.570259524633693e-06, "loss": 0.6208, "step": 13296 }, { "epoch": 0.1589808582120781, "grad_norm": 4.820890426635742, "learning_rate": 9.570180990639397e-06, "loss": 0.6321, "step": 13297 }, { "epoch": 0.15899281435693874, "grad_norm": 4.257768154144287, "learning_rate": 9.5701024497921e-06, "loss": 0.5911, "step": 13298 }, { "epoch": 0.1590047705017994, "grad_norm": 9.691534996032715, "learning_rate": 9.570023902091916e-06, "loss": 0.6362, "step": 13299 }, { "epoch": 0.15901672664666006, "grad_norm": 2.346447229385376, "learning_rate": 9.569945347538968e-06, "loss": 0.584, "step": 13300 }, { "epoch": 0.15902868279152071, "grad_norm": 2.728947401046753, "learning_rate": 9.569866786133365e-06, "loss": 0.564, "step": 13301 }, { "epoch": 0.15904063893638135, "grad_norm": 2.5989468097686768, "learning_rate": 9.569788217875236e-06, "loss": 0.621, "step": 13302 }, { "epoch": 0.159052595081242, "grad_norm": 2.845370054244995, "learning_rate": 9.569709642764691e-06, "loss": 0.6464, "step": 13303 }, { "epoch": 0.15906455122610266, "grad_norm": 2.5735254287719727, "learning_rate": 9.56963106080185e-06, "loss": 0.5814, "step": 13304 }, { "epoch": 0.1590765073709633, "grad_norm": 5.371774673461914, "learning_rate": 9.569552471986832e-06, "loss": 0.5178, "step": 13305 }, { "epoch": 0.15908846351582395, "grad_norm": 2.8589885234832764, "learning_rate": 9.569473876319753e-06, "loss": 0.7392, "step": 13306 }, { "epoch": 0.1591004196606846, "grad_norm": 2.4142236709594727, "learning_rate": 9.569395273800732e-06, "loss": 0.55, "step": 13307 }, { "epoch": 0.15911237580554527, "grad_norm": 9.30242919921875, "learning_rate": 9.569316664429886e-06, "loss": 0.5636, "step": 13308 }, { "epoch": 0.1591243319504059, "grad_norm": 2.654571056365967, "learning_rate": 9.569238048207334e-06, "loss": 0.6699, "step": 13309 }, { "epoch": 0.15913628809526656, "grad_norm": 4.19002628326416, "learning_rate": 9.569159425133195e-06, "loss": 0.6247, "step": 13310 }, { "epoch": 0.15914824424012722, "grad_norm": 2.156116485595703, "learning_rate": 9.569080795207582e-06, "loss": 0.5946, "step": 13311 }, { "epoch": 0.15916020038498788, "grad_norm": 2.475396156311035, "learning_rate": 9.569002158430618e-06, "loss": 0.5871, "step": 13312 }, { "epoch": 0.1591721565298485, "grad_norm": 5.641969203948975, "learning_rate": 9.56892351480242e-06, "loss": 0.5836, "step": 13313 }, { "epoch": 0.15918411267470917, "grad_norm": 2.1626393795013428, "learning_rate": 9.568844864323101e-06, "loss": 0.5777, "step": 13314 }, { "epoch": 0.15919606881956982, "grad_norm": 3.11206316947937, "learning_rate": 9.568766206992786e-06, "loss": 0.6529, "step": 13315 }, { "epoch": 0.15920802496443046, "grad_norm": 2.8773281574249268, "learning_rate": 9.56868754281159e-06, "loss": 0.6451, "step": 13316 }, { "epoch": 0.15921998110929111, "grad_norm": 3.5698328018188477, "learning_rate": 9.56860887177963e-06, "loss": 0.6009, "step": 13317 }, { "epoch": 0.15923193725415177, "grad_norm": 2.3522632122039795, "learning_rate": 9.568530193897025e-06, "loss": 0.6475, "step": 13318 }, { "epoch": 0.15924389339901243, "grad_norm": 4.922862529754639, "learning_rate": 9.568451509163893e-06, "loss": 0.6424, "step": 13319 }, { "epoch": 0.15925584954387306, "grad_norm": 10.455653190612793, "learning_rate": 9.568372817580351e-06, "loss": 0.5551, "step": 13320 }, { "epoch": 0.15926780568873372, "grad_norm": 6.048717975616455, "learning_rate": 9.568294119146517e-06, "loss": 0.7495, "step": 13321 }, { "epoch": 0.15927976183359438, "grad_norm": 2.608510971069336, "learning_rate": 9.56821541386251e-06, "loss": 0.6003, "step": 13322 }, { "epoch": 0.15929171797845504, "grad_norm": 2.1181764602661133, "learning_rate": 9.568136701728449e-06, "loss": 0.5336, "step": 13323 }, { "epoch": 0.15930367412331567, "grad_norm": 2.0571162700653076, "learning_rate": 9.568057982744449e-06, "loss": 0.756, "step": 13324 }, { "epoch": 0.15931563026817633, "grad_norm": 3.1750147342681885, "learning_rate": 9.567979256910634e-06, "loss": 0.67, "step": 13325 }, { "epoch": 0.159327586413037, "grad_norm": 2.144512414932251, "learning_rate": 9.567900524227114e-06, "loss": 0.6446, "step": 13326 }, { "epoch": 0.15933954255789762, "grad_norm": 2.3411781787872314, "learning_rate": 9.567821784694012e-06, "loss": 0.7597, "step": 13327 }, { "epoch": 0.15935149870275828, "grad_norm": 1.9411826133728027, "learning_rate": 9.567743038311443e-06, "loss": 0.5748, "step": 13328 }, { "epoch": 0.15936345484761893, "grad_norm": 4.292835712432861, "learning_rate": 9.56766428507953e-06, "loss": 0.5807, "step": 13329 }, { "epoch": 0.1593754109924796, "grad_norm": 2.4447522163391113, "learning_rate": 9.567585524998387e-06, "loss": 0.571, "step": 13330 }, { "epoch": 0.15938736713734022, "grad_norm": 3.070159435272217, "learning_rate": 9.567506758068134e-06, "loss": 0.6811, "step": 13331 }, { "epoch": 0.15939932328220088, "grad_norm": 3.8616037368774414, "learning_rate": 9.567427984288886e-06, "loss": 0.6479, "step": 13332 }, { "epoch": 0.15941127942706154, "grad_norm": 4.160147666931152, "learning_rate": 9.567349203660766e-06, "loss": 0.614, "step": 13333 }, { "epoch": 0.1594232355719222, "grad_norm": 3.3456947803497314, "learning_rate": 9.56727041618389e-06, "loss": 0.6587, "step": 13334 }, { "epoch": 0.15943519171678283, "grad_norm": 3.051781177520752, "learning_rate": 9.567191621858374e-06, "loss": 0.5967, "step": 13335 }, { "epoch": 0.1594471478616435, "grad_norm": 2.9260013103485107, "learning_rate": 9.567112820684339e-06, "loss": 0.5909, "step": 13336 }, { "epoch": 0.15945910400650415, "grad_norm": 1.9289534091949463, "learning_rate": 9.5670340126619e-06, "loss": 0.5328, "step": 13337 }, { "epoch": 0.1594710601513648, "grad_norm": 5.007779598236084, "learning_rate": 9.56695519779118e-06, "loss": 0.5264, "step": 13338 }, { "epoch": 0.15948301629622544, "grad_norm": 8.465428352355957, "learning_rate": 9.566876376072294e-06, "loss": 0.6649, "step": 13339 }, { "epoch": 0.1594949724410861, "grad_norm": 2.767659902572632, "learning_rate": 9.56679754750536e-06, "loss": 0.669, "step": 13340 }, { "epoch": 0.15950692858594676, "grad_norm": 4.54240083694458, "learning_rate": 9.566718712090497e-06, "loss": 0.6336, "step": 13341 }, { "epoch": 0.1595188847308074, "grad_norm": 3.2156031131744385, "learning_rate": 9.566639869827823e-06, "loss": 0.6218, "step": 13342 }, { "epoch": 0.15953084087566805, "grad_norm": 3.0236799716949463, "learning_rate": 9.566561020717456e-06, "loss": 0.6078, "step": 13343 }, { "epoch": 0.1595427970205287, "grad_norm": 3.050933599472046, "learning_rate": 9.566482164759514e-06, "loss": 0.7158, "step": 13344 }, { "epoch": 0.15955475316538936, "grad_norm": 1.9500784873962402, "learning_rate": 9.566403301954117e-06, "loss": 0.6785, "step": 13345 }, { "epoch": 0.15956670931025, "grad_norm": 3.2565128803253174, "learning_rate": 9.566324432301382e-06, "loss": 0.6096, "step": 13346 }, { "epoch": 0.15957866545511065, "grad_norm": 2.710003137588501, "learning_rate": 9.566245555801425e-06, "loss": 0.6398, "step": 13347 }, { "epoch": 0.1595906215999713, "grad_norm": 3.7859601974487305, "learning_rate": 9.566166672454368e-06, "loss": 0.5487, "step": 13348 }, { "epoch": 0.15960257774483197, "grad_norm": 5.2637457847595215, "learning_rate": 9.566087782260329e-06, "loss": 0.6196, "step": 13349 }, { "epoch": 0.1596145338896926, "grad_norm": 10.85058307647705, "learning_rate": 9.566008885219425e-06, "loss": 0.6104, "step": 13350 }, { "epoch": 0.15962649003455326, "grad_norm": 3.160764694213867, "learning_rate": 9.565929981331775e-06, "loss": 0.669, "step": 13351 }, { "epoch": 0.15963844617941392, "grad_norm": 2.088869333267212, "learning_rate": 9.565851070597494e-06, "loss": 0.6329, "step": 13352 }, { "epoch": 0.15965040232427455, "grad_norm": 3.120288848876953, "learning_rate": 9.565772153016704e-06, "loss": 0.568, "step": 13353 }, { "epoch": 0.1596623584691352, "grad_norm": 2.5459389686584473, "learning_rate": 9.565693228589523e-06, "loss": 0.5932, "step": 13354 }, { "epoch": 0.15967431461399587, "grad_norm": 2.433732509613037, "learning_rate": 9.56561429731607e-06, "loss": 0.5613, "step": 13355 }, { "epoch": 0.15968627075885652, "grad_norm": 5.810960292816162, "learning_rate": 9.56553535919646e-06, "loss": 0.6457, "step": 13356 }, { "epoch": 0.15969822690371716, "grad_norm": 1.922272801399231, "learning_rate": 9.565456414230815e-06, "loss": 0.6395, "step": 13357 }, { "epoch": 0.15971018304857781, "grad_norm": 4.57905387878418, "learning_rate": 9.565377462419252e-06, "loss": 0.5719, "step": 13358 }, { "epoch": 0.15972213919343847, "grad_norm": 2.566937208175659, "learning_rate": 9.565298503761889e-06, "loss": 0.6228, "step": 13359 }, { "epoch": 0.15973409533829913, "grad_norm": 2.679572820663452, "learning_rate": 9.565219538258843e-06, "loss": 0.6811, "step": 13360 }, { "epoch": 0.15974605148315976, "grad_norm": 21.9735164642334, "learning_rate": 9.565140565910236e-06, "loss": 0.6218, "step": 13361 }, { "epoch": 0.15975800762802042, "grad_norm": 1.8219246864318848, "learning_rate": 9.565061586716183e-06, "loss": 0.5669, "step": 13362 }, { "epoch": 0.15976996377288108, "grad_norm": 10.887452125549316, "learning_rate": 9.564982600676804e-06, "loss": 0.6556, "step": 13363 }, { "epoch": 0.1597819199177417, "grad_norm": 3.771346092224121, "learning_rate": 9.56490360779222e-06, "loss": 0.6057, "step": 13364 }, { "epoch": 0.15979387606260237, "grad_norm": 3.840265989303589, "learning_rate": 9.564824608062545e-06, "loss": 0.6548, "step": 13365 }, { "epoch": 0.15980583220746303, "grad_norm": 2.2222156524658203, "learning_rate": 9.5647456014879e-06, "loss": 0.5702, "step": 13366 }, { "epoch": 0.1598177883523237, "grad_norm": 2.365086793899536, "learning_rate": 9.564666588068401e-06, "loss": 0.6129, "step": 13367 }, { "epoch": 0.15982974449718432, "grad_norm": 3.474968671798706, "learning_rate": 9.564587567804168e-06, "loss": 0.7014, "step": 13368 }, { "epoch": 0.15984170064204498, "grad_norm": 3.2930619716644287, "learning_rate": 9.564508540695321e-06, "loss": 0.6245, "step": 13369 }, { "epoch": 0.15985365678690563, "grad_norm": 2.6517794132232666, "learning_rate": 9.564429506741978e-06, "loss": 0.568, "step": 13370 }, { "epoch": 0.1598656129317663, "grad_norm": 2.666656732559204, "learning_rate": 9.564350465944255e-06, "loss": 0.7103, "step": 13371 }, { "epoch": 0.15987756907662692, "grad_norm": 1.9287073612213135, "learning_rate": 9.564271418302273e-06, "loss": 0.6296, "step": 13372 }, { "epoch": 0.15988952522148758, "grad_norm": 1.6836143732070923, "learning_rate": 9.56419236381615e-06, "loss": 0.6592, "step": 13373 }, { "epoch": 0.15990148136634824, "grad_norm": 28.973217010498047, "learning_rate": 9.564113302486005e-06, "loss": 0.6202, "step": 13374 }, { "epoch": 0.15991343751120887, "grad_norm": 2.1774842739105225, "learning_rate": 9.564034234311954e-06, "loss": 0.6345, "step": 13375 }, { "epoch": 0.15992539365606953, "grad_norm": 10.412129402160645, "learning_rate": 9.56395515929412e-06, "loss": 0.6158, "step": 13376 }, { "epoch": 0.1599373498009302, "grad_norm": 6.025394916534424, "learning_rate": 9.563876077432617e-06, "loss": 0.6832, "step": 13377 }, { "epoch": 0.15994930594579085, "grad_norm": 3.5463056564331055, "learning_rate": 9.563796988727567e-06, "loss": 0.6162, "step": 13378 }, { "epoch": 0.15996126209065148, "grad_norm": 14.492583274841309, "learning_rate": 9.563717893179085e-06, "loss": 0.6207, "step": 13379 }, { "epoch": 0.15997321823551214, "grad_norm": 2.3940203189849854, "learning_rate": 9.563638790787293e-06, "loss": 0.5856, "step": 13380 }, { "epoch": 0.1599851743803728, "grad_norm": 1.9424235820770264, "learning_rate": 9.563559681552309e-06, "loss": 0.6104, "step": 13381 }, { "epoch": 0.15999713052523346, "grad_norm": 2.435325860977173, "learning_rate": 9.56348056547425e-06, "loss": 0.6524, "step": 13382 }, { "epoch": 0.1600090866700941, "grad_norm": 2.116844415664673, "learning_rate": 9.563401442553238e-06, "loss": 0.6189, "step": 13383 }, { "epoch": 0.16002104281495474, "grad_norm": 3.2559328079223633, "learning_rate": 9.563322312789386e-06, "loss": 0.5987, "step": 13384 }, { "epoch": 0.1600329989598154, "grad_norm": 2.2425646781921387, "learning_rate": 9.563243176182819e-06, "loss": 0.6821, "step": 13385 }, { "epoch": 0.16004495510467606, "grad_norm": 3.3961126804351807, "learning_rate": 9.56316403273365e-06, "loss": 0.5716, "step": 13386 }, { "epoch": 0.1600569112495367, "grad_norm": 7.217851161956787, "learning_rate": 9.563084882442002e-06, "loss": 0.5275, "step": 13387 }, { "epoch": 0.16006886739439735, "grad_norm": 4.550276279449463, "learning_rate": 9.56300572530799e-06, "loss": 0.6853, "step": 13388 }, { "epoch": 0.160080823539258, "grad_norm": 3.798462390899658, "learning_rate": 9.562926561331738e-06, "loss": 0.5981, "step": 13389 }, { "epoch": 0.16009277968411864, "grad_norm": 3.2673070430755615, "learning_rate": 9.562847390513359e-06, "loss": 0.643, "step": 13390 }, { "epoch": 0.1601047358289793, "grad_norm": 2.5373122692108154, "learning_rate": 9.562768212852975e-06, "loss": 0.7696, "step": 13391 }, { "epoch": 0.16011669197383996, "grad_norm": 4.035669803619385, "learning_rate": 9.562689028350702e-06, "loss": 0.6944, "step": 13392 }, { "epoch": 0.16012864811870062, "grad_norm": 3.8208963871002197, "learning_rate": 9.562609837006664e-06, "loss": 0.6372, "step": 13393 }, { "epoch": 0.16014060426356125, "grad_norm": 3.2279510498046875, "learning_rate": 9.562530638820974e-06, "loss": 0.6802, "step": 13394 }, { "epoch": 0.1601525604084219, "grad_norm": 3.3847577571868896, "learning_rate": 9.562451433793753e-06, "loss": 0.6842, "step": 13395 }, { "epoch": 0.16016451655328257, "grad_norm": 8.992907524108887, "learning_rate": 9.562372221925121e-06, "loss": 0.6563, "step": 13396 }, { "epoch": 0.16017647269814322, "grad_norm": 2.4815073013305664, "learning_rate": 9.562293003215196e-06, "loss": 0.6481, "step": 13397 }, { "epoch": 0.16018842884300385, "grad_norm": 2.067477226257324, "learning_rate": 9.562213777664095e-06, "loss": 0.6554, "step": 13398 }, { "epoch": 0.1602003849878645, "grad_norm": 6.4264373779296875, "learning_rate": 9.562134545271938e-06, "loss": 0.5786, "step": 13399 }, { "epoch": 0.16021234113272517, "grad_norm": 3.2352187633514404, "learning_rate": 9.562055306038845e-06, "loss": 0.5978, "step": 13400 }, { "epoch": 0.1602242972775858, "grad_norm": 5.752099514007568, "learning_rate": 9.561976059964934e-06, "loss": 0.5961, "step": 13401 }, { "epoch": 0.16023625342244646, "grad_norm": 2.611152172088623, "learning_rate": 9.561896807050322e-06, "loss": 0.6802, "step": 13402 }, { "epoch": 0.16024820956730712, "grad_norm": 1.726513147354126, "learning_rate": 9.561817547295132e-06, "loss": 0.5588, "step": 13403 }, { "epoch": 0.16026016571216778, "grad_norm": 2.039163827896118, "learning_rate": 9.56173828069948e-06, "loss": 0.646, "step": 13404 }, { "epoch": 0.1602721218570284, "grad_norm": 2.9384374618530273, "learning_rate": 9.561659007263483e-06, "loss": 0.644, "step": 13405 }, { "epoch": 0.16028407800188907, "grad_norm": 3.4179470539093018, "learning_rate": 9.561579726987263e-06, "loss": 0.6473, "step": 13406 }, { "epoch": 0.16029603414674973, "grad_norm": 1.840331792831421, "learning_rate": 9.561500439870938e-06, "loss": 0.5982, "step": 13407 }, { "epoch": 0.16030799029161039, "grad_norm": 11.615707397460938, "learning_rate": 9.561421145914629e-06, "loss": 0.6953, "step": 13408 }, { "epoch": 0.16031994643647102, "grad_norm": 5.452111721038818, "learning_rate": 9.561341845118451e-06, "loss": 0.585, "step": 13409 }, { "epoch": 0.16033190258133168, "grad_norm": 1.9892436265945435, "learning_rate": 9.561262537482524e-06, "loss": 0.5097, "step": 13410 }, { "epoch": 0.16034385872619233, "grad_norm": 4.371800422668457, "learning_rate": 9.56118322300697e-06, "loss": 0.5564, "step": 13411 }, { "epoch": 0.16035581487105297, "grad_norm": 2.175764322280884, "learning_rate": 9.561103901691903e-06, "loss": 0.5275, "step": 13412 }, { "epoch": 0.16036777101591362, "grad_norm": 1.873080849647522, "learning_rate": 9.561024573537445e-06, "loss": 0.6156, "step": 13413 }, { "epoch": 0.16037972716077428, "grad_norm": 7.053283214569092, "learning_rate": 9.560945238543716e-06, "loss": 0.6893, "step": 13414 }, { "epoch": 0.16039168330563494, "grad_norm": 4.270844459533691, "learning_rate": 9.560865896710831e-06, "loss": 0.5682, "step": 13415 }, { "epoch": 0.16040363945049557, "grad_norm": 1.986082673072815, "learning_rate": 9.560786548038914e-06, "loss": 0.6506, "step": 13416 }, { "epoch": 0.16041559559535623, "grad_norm": 1.9262343645095825, "learning_rate": 9.560707192528079e-06, "loss": 0.667, "step": 13417 }, { "epoch": 0.1604275517402169, "grad_norm": 2.475687026977539, "learning_rate": 9.560627830178449e-06, "loss": 0.629, "step": 13418 }, { "epoch": 0.16043950788507755, "grad_norm": 3.566375732421875, "learning_rate": 9.56054846099014e-06, "loss": 0.6026, "step": 13419 }, { "epoch": 0.16045146402993818, "grad_norm": 5.167773246765137, "learning_rate": 9.560469084963272e-06, "loss": 0.5688, "step": 13420 }, { "epoch": 0.16046342017479884, "grad_norm": 6.596253395080566, "learning_rate": 9.560389702097967e-06, "loss": 0.6318, "step": 13421 }, { "epoch": 0.1604753763196595, "grad_norm": 5.053430557250977, "learning_rate": 9.560310312394341e-06, "loss": 0.6605, "step": 13422 }, { "epoch": 0.16048733246452013, "grad_norm": 5.265506744384766, "learning_rate": 9.560230915852512e-06, "loss": 0.6272, "step": 13423 }, { "epoch": 0.16049928860938079, "grad_norm": 2.1820647716522217, "learning_rate": 9.5601515124726e-06, "loss": 0.5387, "step": 13424 }, { "epoch": 0.16051124475424144, "grad_norm": 2.7031402587890625, "learning_rate": 9.560072102254726e-06, "loss": 0.5741, "step": 13425 }, { "epoch": 0.1605232008991021, "grad_norm": 4.522905349731445, "learning_rate": 9.559992685199008e-06, "loss": 0.6693, "step": 13426 }, { "epoch": 0.16053515704396273, "grad_norm": 2.052598476409912, "learning_rate": 9.559913261305563e-06, "loss": 0.5635, "step": 13427 }, { "epoch": 0.1605471131888234, "grad_norm": 2.5163209438323975, "learning_rate": 9.559833830574512e-06, "loss": 0.5776, "step": 13428 }, { "epoch": 0.16055906933368405, "grad_norm": 7.814783096313477, "learning_rate": 9.559754393005976e-06, "loss": 0.7891, "step": 13429 }, { "epoch": 0.1605710254785447, "grad_norm": 10.547176361083984, "learning_rate": 9.55967494860007e-06, "loss": 0.4954, "step": 13430 }, { "epoch": 0.16058298162340534, "grad_norm": 10.519770622253418, "learning_rate": 9.559595497356917e-06, "loss": 0.633, "step": 13431 }, { "epoch": 0.160594937768266, "grad_norm": 4.7650299072265625, "learning_rate": 9.559516039276632e-06, "loss": 0.6608, "step": 13432 }, { "epoch": 0.16060689391312666, "grad_norm": 1.8864037990570068, "learning_rate": 9.55943657435934e-06, "loss": 0.6355, "step": 13433 }, { "epoch": 0.1606188500579873, "grad_norm": 2.465367078781128, "learning_rate": 9.559357102605154e-06, "loss": 0.6541, "step": 13434 }, { "epoch": 0.16063080620284795, "grad_norm": 2.552720785140991, "learning_rate": 9.559277624014196e-06, "loss": 0.5299, "step": 13435 }, { "epoch": 0.1606427623477086, "grad_norm": 3.2933337688446045, "learning_rate": 9.559198138586587e-06, "loss": 0.6804, "step": 13436 }, { "epoch": 0.16065471849256927, "grad_norm": 4.097784042358398, "learning_rate": 9.559118646322441e-06, "loss": 0.6622, "step": 13437 }, { "epoch": 0.1606666746374299, "grad_norm": 4.9560394287109375, "learning_rate": 9.559039147221883e-06, "loss": 0.6275, "step": 13438 }, { "epoch": 0.16067863078229055, "grad_norm": 11.896025657653809, "learning_rate": 9.558959641285028e-06, "loss": 0.5661, "step": 13439 }, { "epoch": 0.1606905869271512, "grad_norm": 2.764094114303589, "learning_rate": 9.558880128511998e-06, "loss": 0.5623, "step": 13440 }, { "epoch": 0.16070254307201187, "grad_norm": 3.392099618911743, "learning_rate": 9.55880060890291e-06, "loss": 0.6251, "step": 13441 }, { "epoch": 0.1607144992168725, "grad_norm": 1.9480139017105103, "learning_rate": 9.558721082457885e-06, "loss": 0.5924, "step": 13442 }, { "epoch": 0.16072645536173316, "grad_norm": 3.850785493850708, "learning_rate": 9.558641549177041e-06, "loss": 0.6081, "step": 13443 }, { "epoch": 0.16073841150659382, "grad_norm": 3.8578262329101562, "learning_rate": 9.558562009060498e-06, "loss": 0.6716, "step": 13444 }, { "epoch": 0.16075036765145448, "grad_norm": 5.106904983520508, "learning_rate": 9.558482462108375e-06, "loss": 0.621, "step": 13445 }, { "epoch": 0.1607623237963151, "grad_norm": 3.133641242980957, "learning_rate": 9.558402908320792e-06, "loss": 0.6247, "step": 13446 }, { "epoch": 0.16077427994117577, "grad_norm": 1.8812545537948608, "learning_rate": 9.558323347697866e-06, "loss": 0.5954, "step": 13447 }, { "epoch": 0.16078623608603643, "grad_norm": 8.787132263183594, "learning_rate": 9.55824378023972e-06, "loss": 0.5573, "step": 13448 }, { "epoch": 0.16079819223089706, "grad_norm": 3.9333336353302, "learning_rate": 9.55816420594647e-06, "loss": 0.6289, "step": 13449 }, { "epoch": 0.16081014837575772, "grad_norm": 2.803602933883667, "learning_rate": 9.558084624818237e-06, "loss": 0.5997, "step": 13450 }, { "epoch": 0.16082210452061838, "grad_norm": 7.357798099517822, "learning_rate": 9.55800503685514e-06, "loss": 0.6173, "step": 13451 }, { "epoch": 0.16083406066547903, "grad_norm": 3.5029714107513428, "learning_rate": 9.557925442057297e-06, "loss": 0.6113, "step": 13452 }, { "epoch": 0.16084601681033966, "grad_norm": 9.032804489135742, "learning_rate": 9.557845840424832e-06, "loss": 0.6564, "step": 13453 }, { "epoch": 0.16085797295520032, "grad_norm": 2.645158529281616, "learning_rate": 9.557766231957857e-06, "loss": 0.6038, "step": 13454 }, { "epoch": 0.16086992910006098, "grad_norm": 5.053421974182129, "learning_rate": 9.557686616656496e-06, "loss": 0.8364, "step": 13455 }, { "epoch": 0.16088188524492164, "grad_norm": 4.549205303192139, "learning_rate": 9.55760699452087e-06, "loss": 0.6744, "step": 13456 }, { "epoch": 0.16089384138978227, "grad_norm": 2.0276613235473633, "learning_rate": 9.557527365551094e-06, "loss": 0.6214, "step": 13457 }, { "epoch": 0.16090579753464293, "grad_norm": 2.49481463432312, "learning_rate": 9.55744772974729e-06, "loss": 0.6161, "step": 13458 }, { "epoch": 0.1609177536795036, "grad_norm": 8.646782875061035, "learning_rate": 9.557368087109578e-06, "loss": 0.6577, "step": 13459 }, { "epoch": 0.16092970982436422, "grad_norm": 2.349464178085327, "learning_rate": 9.557288437638074e-06, "loss": 0.5644, "step": 13460 }, { "epoch": 0.16094166596922488, "grad_norm": 2.9983935356140137, "learning_rate": 9.557208781332904e-06, "loss": 0.6342, "step": 13461 }, { "epoch": 0.16095362211408554, "grad_norm": 6.826877593994141, "learning_rate": 9.55712911819418e-06, "loss": 0.679, "step": 13462 }, { "epoch": 0.1609655782589462, "grad_norm": 2.065167188644409, "learning_rate": 9.557049448222025e-06, "loss": 0.5653, "step": 13463 }, { "epoch": 0.16097753440380683, "grad_norm": 1.951911449432373, "learning_rate": 9.556969771416558e-06, "loss": 0.5679, "step": 13464 }, { "epoch": 0.16098949054866749, "grad_norm": 1.772096037864685, "learning_rate": 9.556890087777898e-06, "loss": 0.6145, "step": 13465 }, { "epoch": 0.16100144669352814, "grad_norm": 2.8561437129974365, "learning_rate": 9.556810397306166e-06, "loss": 0.6228, "step": 13466 }, { "epoch": 0.1610134028383888, "grad_norm": 2.899311065673828, "learning_rate": 9.556730700001482e-06, "loss": 0.5847, "step": 13467 }, { "epoch": 0.16102535898324943, "grad_norm": 2.443765640258789, "learning_rate": 9.556650995863962e-06, "loss": 0.6086, "step": 13468 }, { "epoch": 0.1610373151281101, "grad_norm": 3.862382173538208, "learning_rate": 9.556571284893728e-06, "loss": 0.6877, "step": 13469 }, { "epoch": 0.16104927127297075, "grad_norm": 2.245473861694336, "learning_rate": 9.5564915670909e-06, "loss": 0.6263, "step": 13470 }, { "epoch": 0.16106122741783138, "grad_norm": 2.662843942642212, "learning_rate": 9.556411842455597e-06, "loss": 0.6482, "step": 13471 }, { "epoch": 0.16107318356269204, "grad_norm": 10.430373191833496, "learning_rate": 9.556332110987938e-06, "loss": 0.6488, "step": 13472 }, { "epoch": 0.1610851397075527, "grad_norm": 3.8070127964019775, "learning_rate": 9.556252372688041e-06, "loss": 0.6098, "step": 13473 }, { "epoch": 0.16109709585241336, "grad_norm": 3.827064275741577, "learning_rate": 9.55617262755603e-06, "loss": 0.6078, "step": 13474 }, { "epoch": 0.161109051997274, "grad_norm": 2.5208873748779297, "learning_rate": 9.55609287559202e-06, "loss": 0.7239, "step": 13475 }, { "epoch": 0.16112100814213465, "grad_norm": 2.156644582748413, "learning_rate": 9.556013116796133e-06, "loss": 0.6649, "step": 13476 }, { "epoch": 0.1611329642869953, "grad_norm": 2.4744515419006348, "learning_rate": 9.555933351168489e-06, "loss": 0.5684, "step": 13477 }, { "epoch": 0.16114492043185596, "grad_norm": 3.318246364593506, "learning_rate": 9.555853578709207e-06, "loss": 0.6113, "step": 13478 }, { "epoch": 0.1611568765767166, "grad_norm": 1.9925576448440552, "learning_rate": 9.555773799418403e-06, "loss": 0.6435, "step": 13479 }, { "epoch": 0.16116883272157725, "grad_norm": 2.608751058578491, "learning_rate": 9.555694013296204e-06, "loss": 0.6108, "step": 13480 }, { "epoch": 0.1611807888664379, "grad_norm": 2.143080949783325, "learning_rate": 9.555614220342723e-06, "loss": 0.63, "step": 13481 }, { "epoch": 0.16119274501129854, "grad_norm": 2.68350887298584, "learning_rate": 9.555534420558084e-06, "loss": 0.6312, "step": 13482 }, { "epoch": 0.1612047011561592, "grad_norm": 2.769859552383423, "learning_rate": 9.555454613942405e-06, "loss": 0.6094, "step": 13483 }, { "epoch": 0.16121665730101986, "grad_norm": 3.623793601989746, "learning_rate": 9.555374800495804e-06, "loss": 0.5845, "step": 13484 }, { "epoch": 0.16122861344588052, "grad_norm": 2.193451166152954, "learning_rate": 9.555294980218404e-06, "loss": 0.6081, "step": 13485 }, { "epoch": 0.16124056959074115, "grad_norm": 3.0787601470947266, "learning_rate": 9.555215153110322e-06, "loss": 0.6447, "step": 13486 }, { "epoch": 0.1612525257356018, "grad_norm": 2.3440418243408203, "learning_rate": 9.55513531917168e-06, "loss": 0.6011, "step": 13487 }, { "epoch": 0.16126448188046247, "grad_norm": 1.755760908126831, "learning_rate": 9.555055478402595e-06, "loss": 0.6644, "step": 13488 }, { "epoch": 0.16127643802532313, "grad_norm": 1.8943105936050415, "learning_rate": 9.554975630803189e-06, "loss": 0.6887, "step": 13489 }, { "epoch": 0.16128839417018376, "grad_norm": 3.4909801483154297, "learning_rate": 9.55489577637358e-06, "loss": 0.584, "step": 13490 }, { "epoch": 0.16130035031504442, "grad_norm": 3.4917731285095215, "learning_rate": 9.55481591511389e-06, "loss": 0.659, "step": 13491 }, { "epoch": 0.16131230645990507, "grad_norm": 2.0457818508148193, "learning_rate": 9.554736047024234e-06, "loss": 0.6168, "step": 13492 }, { "epoch": 0.1613242626047657, "grad_norm": 2.5440168380737305, "learning_rate": 9.554656172104736e-06, "loss": 0.5867, "step": 13493 }, { "epoch": 0.16133621874962636, "grad_norm": 2.8711369037628174, "learning_rate": 9.554576290355518e-06, "loss": 0.5947, "step": 13494 }, { "epoch": 0.16134817489448702, "grad_norm": 2.5682196617126465, "learning_rate": 9.554496401776694e-06, "loss": 0.7072, "step": 13495 }, { "epoch": 0.16136013103934768, "grad_norm": 7.012271404266357, "learning_rate": 9.55441650636839e-06, "loss": 0.5647, "step": 13496 }, { "epoch": 0.1613720871842083, "grad_norm": 3.6973958015441895, "learning_rate": 9.554336604130718e-06, "loss": 0.639, "step": 13497 }, { "epoch": 0.16138404332906897, "grad_norm": 2.092095136642456, "learning_rate": 9.554256695063804e-06, "loss": 0.5503, "step": 13498 }, { "epoch": 0.16139599947392963, "grad_norm": 2.589656114578247, "learning_rate": 9.554176779167765e-06, "loss": 0.5717, "step": 13499 }, { "epoch": 0.1614079556187903, "grad_norm": 17.149980545043945, "learning_rate": 9.554096856442722e-06, "loss": 0.6106, "step": 13500 }, { "epoch": 0.16141991176365092, "grad_norm": 2.989220380783081, "learning_rate": 9.554016926888797e-06, "loss": 0.585, "step": 13501 }, { "epoch": 0.16143186790851158, "grad_norm": 1.7262320518493652, "learning_rate": 9.553936990506104e-06, "loss": 0.5948, "step": 13502 }, { "epoch": 0.16144382405337224, "grad_norm": 1.9897501468658447, "learning_rate": 9.553857047294768e-06, "loss": 0.6641, "step": 13503 }, { "epoch": 0.1614557801982329, "grad_norm": 4.20904016494751, "learning_rate": 9.553777097254905e-06, "loss": 0.7048, "step": 13504 }, { "epoch": 0.16146773634309353, "grad_norm": 6.248229503631592, "learning_rate": 9.553697140386639e-06, "loss": 0.6498, "step": 13505 }, { "epoch": 0.16147969248795419, "grad_norm": 2.1847381591796875, "learning_rate": 9.55361717669009e-06, "loss": 0.5275, "step": 13506 }, { "epoch": 0.16149164863281484, "grad_norm": 2.584052562713623, "learning_rate": 9.553537206165374e-06, "loss": 0.5687, "step": 13507 }, { "epoch": 0.16150360477767547, "grad_norm": 1.5573803186416626, "learning_rate": 9.55345722881261e-06, "loss": 0.5816, "step": 13508 }, { "epoch": 0.16151556092253613, "grad_norm": 3.144941568374634, "learning_rate": 9.553377244631924e-06, "loss": 0.5518, "step": 13509 }, { "epoch": 0.1615275170673968, "grad_norm": 7.285748481750488, "learning_rate": 9.553297253623432e-06, "loss": 0.6363, "step": 13510 }, { "epoch": 0.16153947321225745, "grad_norm": 4.148238658905029, "learning_rate": 9.553217255787255e-06, "loss": 0.6254, "step": 13511 }, { "epoch": 0.16155142935711808, "grad_norm": 2.7345480918884277, "learning_rate": 9.553137251123512e-06, "loss": 0.5512, "step": 13512 }, { "epoch": 0.16156338550197874, "grad_norm": 2.6697380542755127, "learning_rate": 9.553057239632322e-06, "loss": 0.6985, "step": 13513 }, { "epoch": 0.1615753416468394, "grad_norm": 3.743947982788086, "learning_rate": 9.552977221313809e-06, "loss": 0.6013, "step": 13514 }, { "epoch": 0.16158729779170006, "grad_norm": 2.204496383666992, "learning_rate": 9.552897196168089e-06, "loss": 0.5376, "step": 13515 }, { "epoch": 0.1615992539365607, "grad_norm": 3.6415085792541504, "learning_rate": 9.552817164195285e-06, "loss": 0.61, "step": 13516 }, { "epoch": 0.16161121008142135, "grad_norm": 3.1664299964904785, "learning_rate": 9.552737125395512e-06, "loss": 0.5251, "step": 13517 }, { "epoch": 0.161623166226282, "grad_norm": 4.077641010284424, "learning_rate": 9.552657079768897e-06, "loss": 0.5884, "step": 13518 }, { "epoch": 0.16163512237114264, "grad_norm": 3.5006537437438965, "learning_rate": 9.552577027315555e-06, "loss": 0.5626, "step": 13519 }, { "epoch": 0.1616470785160033, "grad_norm": 2.9188687801361084, "learning_rate": 9.552496968035608e-06, "loss": 0.7513, "step": 13520 }, { "epoch": 0.16165903466086395, "grad_norm": 1.4748455286026, "learning_rate": 9.552416901929175e-06, "loss": 0.5684, "step": 13521 }, { "epoch": 0.1616709908057246, "grad_norm": 3.477616310119629, "learning_rate": 9.552336828996379e-06, "loss": 0.5571, "step": 13522 }, { "epoch": 0.16168294695058524, "grad_norm": 3.2891175746917725, "learning_rate": 9.552256749237335e-06, "loss": 0.5978, "step": 13523 }, { "epoch": 0.1616949030954459, "grad_norm": 4.215555191040039, "learning_rate": 9.552176662652166e-06, "loss": 0.6953, "step": 13524 }, { "epoch": 0.16170685924030656, "grad_norm": 3.319042921066284, "learning_rate": 9.552096569240993e-06, "loss": 0.6345, "step": 13525 }, { "epoch": 0.16171881538516722, "grad_norm": 5.014697551727295, "learning_rate": 9.552016469003935e-06, "loss": 0.6731, "step": 13526 }, { "epoch": 0.16173077153002785, "grad_norm": 2.0325472354888916, "learning_rate": 9.551936361941111e-06, "loss": 0.5277, "step": 13527 }, { "epoch": 0.1617427276748885, "grad_norm": 4.025394916534424, "learning_rate": 9.551856248052644e-06, "loss": 0.6478, "step": 13528 }, { "epoch": 0.16175468381974917, "grad_norm": 9.32809066772461, "learning_rate": 9.55177612733865e-06, "loss": 0.7495, "step": 13529 }, { "epoch": 0.1617666399646098, "grad_norm": 3.3855581283569336, "learning_rate": 9.551695999799254e-06, "loss": 0.6681, "step": 13530 }, { "epoch": 0.16177859610947046, "grad_norm": 5.441014289855957, "learning_rate": 9.551615865434573e-06, "loss": 0.5667, "step": 13531 }, { "epoch": 0.16179055225433112, "grad_norm": 3.734600305557251, "learning_rate": 9.551535724244728e-06, "loss": 0.5414, "step": 13532 }, { "epoch": 0.16180250839919177, "grad_norm": 2.0411267280578613, "learning_rate": 9.551455576229838e-06, "loss": 0.6196, "step": 13533 }, { "epoch": 0.1618144645440524, "grad_norm": 2.4299492835998535, "learning_rate": 9.551375421390024e-06, "loss": 0.7103, "step": 13534 }, { "epoch": 0.16182642068891306, "grad_norm": 3.403773784637451, "learning_rate": 9.551295259725407e-06, "loss": 0.6066, "step": 13535 }, { "epoch": 0.16183837683377372, "grad_norm": 2.3239476680755615, "learning_rate": 9.551215091236106e-06, "loss": 0.5218, "step": 13536 }, { "epoch": 0.16185033297863438, "grad_norm": 2.20032000541687, "learning_rate": 9.551134915922244e-06, "loss": 0.6966, "step": 13537 }, { "epoch": 0.161862289123495, "grad_norm": 3.5912511348724365, "learning_rate": 9.551054733783937e-06, "loss": 0.6311, "step": 13538 }, { "epoch": 0.16187424526835567, "grad_norm": 6.966111183166504, "learning_rate": 9.550974544821308e-06, "loss": 0.6139, "step": 13539 }, { "epoch": 0.16188620141321633, "grad_norm": 1.8590593338012695, "learning_rate": 9.550894349034475e-06, "loss": 0.7278, "step": 13540 }, { "epoch": 0.16189815755807696, "grad_norm": 2.8812553882598877, "learning_rate": 9.550814146423562e-06, "loss": 0.6847, "step": 13541 }, { "epoch": 0.16191011370293762, "grad_norm": 2.592072010040283, "learning_rate": 9.550733936988685e-06, "loss": 0.5893, "step": 13542 }, { "epoch": 0.16192206984779828, "grad_norm": 1.5896795988082886, "learning_rate": 9.550653720729968e-06, "loss": 0.6526, "step": 13543 }, { "epoch": 0.16193402599265894, "grad_norm": 32.785892486572266, "learning_rate": 9.550573497647529e-06, "loss": 0.6997, "step": 13544 }, { "epoch": 0.16194598213751957, "grad_norm": 2.538677453994751, "learning_rate": 9.550493267741487e-06, "loss": 0.7256, "step": 13545 }, { "epoch": 0.16195793828238023, "grad_norm": 3.6466994285583496, "learning_rate": 9.550413031011966e-06, "loss": 0.6843, "step": 13546 }, { "epoch": 0.16196989442724088, "grad_norm": 2.461141586303711, "learning_rate": 9.550332787459084e-06, "loss": 0.6334, "step": 13547 }, { "epoch": 0.16198185057210154, "grad_norm": 4.7230963706970215, "learning_rate": 9.550252537082963e-06, "loss": 0.5545, "step": 13548 }, { "epoch": 0.16199380671696217, "grad_norm": 1.9603865146636963, "learning_rate": 9.55017227988372e-06, "loss": 0.6606, "step": 13549 }, { "epoch": 0.16200576286182283, "grad_norm": 4.740930557250977, "learning_rate": 9.55009201586148e-06, "loss": 0.5489, "step": 13550 }, { "epoch": 0.1620177190066835, "grad_norm": 4.0728044509887695, "learning_rate": 9.550011745016359e-06, "loss": 0.6297, "step": 13551 }, { "epoch": 0.16202967515154412, "grad_norm": 9.91648006439209, "learning_rate": 9.54993146734848e-06, "loss": 0.6777, "step": 13552 }, { "epoch": 0.16204163129640478, "grad_norm": 2.643601417541504, "learning_rate": 9.549851182857963e-06, "loss": 0.6272, "step": 13553 }, { "epoch": 0.16205358744126544, "grad_norm": 4.473418235778809, "learning_rate": 9.549770891544928e-06, "loss": 0.5836, "step": 13554 }, { "epoch": 0.1620655435861261, "grad_norm": 2.865089178085327, "learning_rate": 9.549690593409492e-06, "loss": 0.7133, "step": 13555 }, { "epoch": 0.16207749973098673, "grad_norm": 2.0801405906677246, "learning_rate": 9.549610288451783e-06, "loss": 0.5996, "step": 13556 }, { "epoch": 0.1620894558758474, "grad_norm": 2.0250370502471924, "learning_rate": 9.549529976671916e-06, "loss": 0.5625, "step": 13557 }, { "epoch": 0.16210141202070805, "grad_norm": 51.024024963378906, "learning_rate": 9.549449658070013e-06, "loss": 0.708, "step": 13558 }, { "epoch": 0.1621133681655687, "grad_norm": 5.651737213134766, "learning_rate": 9.549369332646192e-06, "loss": 0.6894, "step": 13559 }, { "epoch": 0.16212532431042934, "grad_norm": 2.0111653804779053, "learning_rate": 9.549289000400577e-06, "loss": 0.6135, "step": 13560 }, { "epoch": 0.16213728045529, "grad_norm": 5.135767936706543, "learning_rate": 9.549208661333286e-06, "loss": 0.6285, "step": 13561 }, { "epoch": 0.16214923660015065, "grad_norm": 7.1389055252075195, "learning_rate": 9.549128315444443e-06, "loss": 0.5774, "step": 13562 }, { "epoch": 0.1621611927450113, "grad_norm": 2.4028589725494385, "learning_rate": 9.549047962734163e-06, "loss": 0.6681, "step": 13563 }, { "epoch": 0.16217314888987194, "grad_norm": 2.1747729778289795, "learning_rate": 9.548967603202573e-06, "loss": 0.6256, "step": 13564 }, { "epoch": 0.1621851050347326, "grad_norm": 3.626288652420044, "learning_rate": 9.548887236849786e-06, "loss": 0.7231, "step": 13565 }, { "epoch": 0.16219706117959326, "grad_norm": 1.7700996398925781, "learning_rate": 9.548806863675927e-06, "loss": 0.5384, "step": 13566 }, { "epoch": 0.1622090173244539, "grad_norm": 4.627917766571045, "learning_rate": 9.548726483681119e-06, "loss": 0.5813, "step": 13567 }, { "epoch": 0.16222097346931455, "grad_norm": 1.9132673740386963, "learning_rate": 9.548646096865478e-06, "loss": 0.5637, "step": 13568 }, { "epoch": 0.1622329296141752, "grad_norm": 2.5639798641204834, "learning_rate": 9.548565703229124e-06, "loss": 0.6539, "step": 13569 }, { "epoch": 0.16224488575903587, "grad_norm": 3.2417197227478027, "learning_rate": 9.548485302772182e-06, "loss": 0.661, "step": 13570 }, { "epoch": 0.1622568419038965, "grad_norm": 2.6345608234405518, "learning_rate": 9.548404895494768e-06, "loss": 0.6113, "step": 13571 }, { "epoch": 0.16226879804875716, "grad_norm": 3.4547224044799805, "learning_rate": 9.548324481397007e-06, "loss": 0.5919, "step": 13572 }, { "epoch": 0.16228075419361782, "grad_norm": 1.9168041944503784, "learning_rate": 9.548244060479015e-06, "loss": 0.5918, "step": 13573 }, { "epoch": 0.16229271033847847, "grad_norm": 1.5941250324249268, "learning_rate": 9.548163632740916e-06, "loss": 0.6615, "step": 13574 }, { "epoch": 0.1623046664833391, "grad_norm": 4.055750370025635, "learning_rate": 9.548083198182829e-06, "loss": 0.5009, "step": 13575 }, { "epoch": 0.16231662262819976, "grad_norm": 1.7146141529083252, "learning_rate": 9.548002756804876e-06, "loss": 0.6785, "step": 13576 }, { "epoch": 0.16232857877306042, "grad_norm": 2.057204246520996, "learning_rate": 9.547922308607177e-06, "loss": 0.5835, "step": 13577 }, { "epoch": 0.16234053491792105, "grad_norm": 5.270332336425781, "learning_rate": 9.54784185358985e-06, "loss": 0.7461, "step": 13578 }, { "epoch": 0.1623524910627817, "grad_norm": 6.286032676696777, "learning_rate": 9.54776139175302e-06, "loss": 0.6079, "step": 13579 }, { "epoch": 0.16236444720764237, "grad_norm": 2.2093122005462646, "learning_rate": 9.547680923096804e-06, "loss": 0.5497, "step": 13580 }, { "epoch": 0.16237640335250303, "grad_norm": 11.5594482421875, "learning_rate": 9.547600447621325e-06, "loss": 0.6422, "step": 13581 }, { "epoch": 0.16238835949736366, "grad_norm": 2.3789923191070557, "learning_rate": 9.547519965326704e-06, "loss": 0.6119, "step": 13582 }, { "epoch": 0.16240031564222432, "grad_norm": 2.2799196243286133, "learning_rate": 9.547439476213061e-06, "loss": 0.5576, "step": 13583 }, { "epoch": 0.16241227178708498, "grad_norm": 2.1419107913970947, "learning_rate": 9.547358980280515e-06, "loss": 0.6353, "step": 13584 }, { "epoch": 0.16242422793194564, "grad_norm": 3.4803104400634766, "learning_rate": 9.547278477529189e-06, "loss": 0.6305, "step": 13585 }, { "epoch": 0.16243618407680627, "grad_norm": 5.230049133300781, "learning_rate": 9.547197967959202e-06, "loss": 0.6397, "step": 13586 }, { "epoch": 0.16244814022166693, "grad_norm": 7.734706401824951, "learning_rate": 9.547117451570676e-06, "loss": 0.6333, "step": 13587 }, { "epoch": 0.16246009636652758, "grad_norm": 1.808833122253418, "learning_rate": 9.547036928363732e-06, "loss": 0.5435, "step": 13588 }, { "epoch": 0.16247205251138822, "grad_norm": 2.2874786853790283, "learning_rate": 9.546956398338487e-06, "loss": 0.7146, "step": 13589 }, { "epoch": 0.16248400865624887, "grad_norm": 1.5709584951400757, "learning_rate": 9.546875861495066e-06, "loss": 0.6211, "step": 13590 }, { "epoch": 0.16249596480110953, "grad_norm": 3.409372329711914, "learning_rate": 9.54679531783359e-06, "loss": 0.6452, "step": 13591 }, { "epoch": 0.1625079209459702, "grad_norm": 3.98787522315979, "learning_rate": 9.546714767354177e-06, "loss": 0.5496, "step": 13592 }, { "epoch": 0.16251987709083082, "grad_norm": 2.6608152389526367, "learning_rate": 9.546634210056948e-06, "loss": 0.605, "step": 13593 }, { "epoch": 0.16253183323569148, "grad_norm": 9.012795448303223, "learning_rate": 9.546553645942027e-06, "loss": 0.5485, "step": 13594 }, { "epoch": 0.16254378938055214, "grad_norm": 1.6253254413604736, "learning_rate": 9.546473075009533e-06, "loss": 0.5654, "step": 13595 }, { "epoch": 0.1625557455254128, "grad_norm": 2.22517991065979, "learning_rate": 9.546392497259584e-06, "loss": 0.627, "step": 13596 }, { "epoch": 0.16256770167027343, "grad_norm": 1.6611242294311523, "learning_rate": 9.546311912692306e-06, "loss": 0.5977, "step": 13597 }, { "epoch": 0.1625796578151341, "grad_norm": 5.32161808013916, "learning_rate": 9.546231321307815e-06, "loss": 0.6759, "step": 13598 }, { "epoch": 0.16259161395999475, "grad_norm": 2.2939956188201904, "learning_rate": 9.546150723106235e-06, "loss": 0.6755, "step": 13599 }, { "epoch": 0.16260357010485538, "grad_norm": 2.4959592819213867, "learning_rate": 9.546070118087685e-06, "loss": 0.6064, "step": 13600 }, { "epoch": 0.16261552624971604, "grad_norm": 3.247948169708252, "learning_rate": 9.545989506252287e-06, "loss": 0.5769, "step": 13601 }, { "epoch": 0.1626274823945767, "grad_norm": 2.810450553894043, "learning_rate": 9.545908887600162e-06, "loss": 0.5266, "step": 13602 }, { "epoch": 0.16263943853943735, "grad_norm": 2.224428415298462, "learning_rate": 9.54582826213143e-06, "loss": 0.6933, "step": 13603 }, { "epoch": 0.16265139468429798, "grad_norm": 2.140646457672119, "learning_rate": 9.545747629846215e-06, "loss": 0.6005, "step": 13604 }, { "epoch": 0.16266335082915864, "grad_norm": 6.642253875732422, "learning_rate": 9.54566699074463e-06, "loss": 0.5641, "step": 13605 }, { "epoch": 0.1626753069740193, "grad_norm": 2.3423523902893066, "learning_rate": 9.545586344826805e-06, "loss": 0.5775, "step": 13606 }, { "epoch": 0.16268726311887996, "grad_norm": 2.7860124111175537, "learning_rate": 9.545505692092856e-06, "loss": 0.6252, "step": 13607 }, { "epoch": 0.1626992192637406, "grad_norm": 2.823857545852661, "learning_rate": 9.545425032542906e-06, "loss": 0.6441, "step": 13608 }, { "epoch": 0.16271117540860125, "grad_norm": 3.983757495880127, "learning_rate": 9.545344366177073e-06, "loss": 0.7193, "step": 13609 }, { "epoch": 0.1627231315534619, "grad_norm": 1.5395588874816895, "learning_rate": 9.545263692995481e-06, "loss": 0.6144, "step": 13610 }, { "epoch": 0.16273508769832254, "grad_norm": 3.8546228408813477, "learning_rate": 9.54518301299825e-06, "loss": 0.6276, "step": 13611 }, { "epoch": 0.1627470438431832, "grad_norm": 2.039231061935425, "learning_rate": 9.545102326185503e-06, "loss": 0.5988, "step": 13612 }, { "epoch": 0.16275899998804386, "grad_norm": 10.178388595581055, "learning_rate": 9.545021632557356e-06, "loss": 0.598, "step": 13613 }, { "epoch": 0.16277095613290452, "grad_norm": 2.076453447341919, "learning_rate": 9.544940932113932e-06, "loss": 0.5807, "step": 13614 }, { "epoch": 0.16278291227776515, "grad_norm": 1.4533662796020508, "learning_rate": 9.544860224855356e-06, "loss": 0.5931, "step": 13615 }, { "epoch": 0.1627948684226258, "grad_norm": 1.9858065843582153, "learning_rate": 9.544779510781745e-06, "loss": 0.6646, "step": 13616 }, { "epoch": 0.16280682456748646, "grad_norm": 2.182762384414673, "learning_rate": 9.54469878989322e-06, "loss": 0.5936, "step": 13617 }, { "epoch": 0.16281878071234712, "grad_norm": 3.113072633743286, "learning_rate": 9.544618062189905e-06, "loss": 0.5513, "step": 13618 }, { "epoch": 0.16283073685720775, "grad_norm": 2.7152702808380127, "learning_rate": 9.544537327671918e-06, "loss": 0.6422, "step": 13619 }, { "epoch": 0.1628426930020684, "grad_norm": 14.916300773620605, "learning_rate": 9.54445658633938e-06, "loss": 0.6242, "step": 13620 }, { "epoch": 0.16285464914692907, "grad_norm": 4.427801132202148, "learning_rate": 9.544375838192415e-06, "loss": 0.5505, "step": 13621 }, { "epoch": 0.16286660529178973, "grad_norm": 2.8576269149780273, "learning_rate": 9.54429508323114e-06, "loss": 0.6253, "step": 13622 }, { "epoch": 0.16287856143665036, "grad_norm": 3.1492984294891357, "learning_rate": 9.54421432145568e-06, "loss": 0.5769, "step": 13623 }, { "epoch": 0.16289051758151102, "grad_norm": 1.6344913244247437, "learning_rate": 9.544133552866154e-06, "loss": 0.5516, "step": 13624 }, { "epoch": 0.16290247372637168, "grad_norm": 2.9450182914733887, "learning_rate": 9.544052777462685e-06, "loss": 0.5653, "step": 13625 }, { "epoch": 0.1629144298712323, "grad_norm": 7.6982622146606445, "learning_rate": 9.543971995245391e-06, "loss": 0.6482, "step": 13626 }, { "epoch": 0.16292638601609297, "grad_norm": 5.23628568649292, "learning_rate": 9.543891206214395e-06, "loss": 0.576, "step": 13627 }, { "epoch": 0.16293834216095363, "grad_norm": 8.174816131591797, "learning_rate": 9.54381041036982e-06, "loss": 0.6146, "step": 13628 }, { "epoch": 0.16295029830581428, "grad_norm": 4.9311089515686035, "learning_rate": 9.543729607711783e-06, "loss": 0.5778, "step": 13629 }, { "epoch": 0.16296225445067491, "grad_norm": 2.772946357727051, "learning_rate": 9.543648798240408e-06, "loss": 0.6479, "step": 13630 }, { "epoch": 0.16297421059553557, "grad_norm": 2.900949239730835, "learning_rate": 9.543567981955815e-06, "loss": 0.5683, "step": 13631 }, { "epoch": 0.16298616674039623, "grad_norm": 2.4209132194519043, "learning_rate": 9.543487158858124e-06, "loss": 0.6086, "step": 13632 }, { "epoch": 0.1629981228852569, "grad_norm": 2.1841115951538086, "learning_rate": 9.543406328947461e-06, "loss": 0.6244, "step": 13633 }, { "epoch": 0.16301007903011752, "grad_norm": 4.9515910148620605, "learning_rate": 9.543325492223943e-06, "loss": 0.6495, "step": 13634 }, { "epoch": 0.16302203517497818, "grad_norm": 1.898157000541687, "learning_rate": 9.543244648687693e-06, "loss": 0.5386, "step": 13635 }, { "epoch": 0.16303399131983884, "grad_norm": 2.2333710193634033, "learning_rate": 9.54316379833883e-06, "loss": 0.679, "step": 13636 }, { "epoch": 0.16304594746469947, "grad_norm": 2.524580955505371, "learning_rate": 9.543082941177477e-06, "loss": 0.5831, "step": 13637 }, { "epoch": 0.16305790360956013, "grad_norm": 4.03099250793457, "learning_rate": 9.543002077203755e-06, "loss": 0.6804, "step": 13638 }, { "epoch": 0.1630698597544208, "grad_norm": 2.417677879333496, "learning_rate": 9.542921206417786e-06, "loss": 0.6353, "step": 13639 }, { "epoch": 0.16308181589928145, "grad_norm": 1.9440772533416748, "learning_rate": 9.54284032881969e-06, "loss": 0.5876, "step": 13640 }, { "epoch": 0.16309377204414208, "grad_norm": 2.2792325019836426, "learning_rate": 9.542759444409589e-06, "loss": 0.641, "step": 13641 }, { "epoch": 0.16310572818900274, "grad_norm": 2.7311503887176514, "learning_rate": 9.542678553187603e-06, "loss": 0.6258, "step": 13642 }, { "epoch": 0.1631176843338634, "grad_norm": 4.450184345245361, "learning_rate": 9.542597655153856e-06, "loss": 0.6054, "step": 13643 }, { "epoch": 0.16312964047872405, "grad_norm": 1.56643807888031, "learning_rate": 9.542516750308467e-06, "loss": 0.6101, "step": 13644 }, { "epoch": 0.16314159662358468, "grad_norm": 2.4901082515716553, "learning_rate": 9.542435838651557e-06, "loss": 0.6813, "step": 13645 }, { "epoch": 0.16315355276844534, "grad_norm": 2.7357444763183594, "learning_rate": 9.54235492018325e-06, "loss": 0.6909, "step": 13646 }, { "epoch": 0.163165508913306, "grad_norm": 10.043549537658691, "learning_rate": 9.542273994903666e-06, "loss": 0.6165, "step": 13647 }, { "epoch": 0.16317746505816663, "grad_norm": 2.805068254470825, "learning_rate": 9.542193062812923e-06, "loss": 0.6201, "step": 13648 }, { "epoch": 0.1631894212030273, "grad_norm": 1.962915301322937, "learning_rate": 9.542112123911148e-06, "loss": 0.563, "step": 13649 }, { "epoch": 0.16320137734788795, "grad_norm": 1.9007234573364258, "learning_rate": 9.542031178198459e-06, "loss": 0.566, "step": 13650 }, { "epoch": 0.1632133334927486, "grad_norm": 3.089371919631958, "learning_rate": 9.541950225674979e-06, "loss": 0.5142, "step": 13651 }, { "epoch": 0.16322528963760924, "grad_norm": 4.299890518188477, "learning_rate": 9.541869266340826e-06, "loss": 0.6603, "step": 13652 }, { "epoch": 0.1632372457824699, "grad_norm": 5.644405841827393, "learning_rate": 9.541788300196126e-06, "loss": 0.6322, "step": 13653 }, { "epoch": 0.16324920192733056, "grad_norm": 2.1056058406829834, "learning_rate": 9.541707327240999e-06, "loss": 0.6067, "step": 13654 }, { "epoch": 0.16326115807219121, "grad_norm": 129.18902587890625, "learning_rate": 9.541626347475563e-06, "loss": 0.6859, "step": 13655 }, { "epoch": 0.16327311421705185, "grad_norm": 2.6318917274475098, "learning_rate": 9.541545360899944e-06, "loss": 0.6089, "step": 13656 }, { "epoch": 0.1632850703619125, "grad_norm": 3.779062509536743, "learning_rate": 9.541464367514262e-06, "loss": 0.5965, "step": 13657 }, { "epoch": 0.16329702650677316, "grad_norm": 2.7629923820495605, "learning_rate": 9.541383367318638e-06, "loss": 0.5704, "step": 13658 }, { "epoch": 0.1633089826516338, "grad_norm": 18.716190338134766, "learning_rate": 9.541302360313191e-06, "loss": 0.6692, "step": 13659 }, { "epoch": 0.16332093879649445, "grad_norm": 2.0476176738739014, "learning_rate": 9.541221346498046e-06, "loss": 0.6813, "step": 13660 }, { "epoch": 0.1633328949413551, "grad_norm": 3.5614140033721924, "learning_rate": 9.541140325873326e-06, "loss": 0.5778, "step": 13661 }, { "epoch": 0.16334485108621577, "grad_norm": 1.6537595987319946, "learning_rate": 9.541059298439147e-06, "loss": 0.5917, "step": 13662 }, { "epoch": 0.1633568072310764, "grad_norm": 2.324463367462158, "learning_rate": 9.540978264195635e-06, "loss": 0.6515, "step": 13663 }, { "epoch": 0.16336876337593706, "grad_norm": 2.326003074645996, "learning_rate": 9.540897223142909e-06, "loss": 0.6558, "step": 13664 }, { "epoch": 0.16338071952079772, "grad_norm": 3.041574478149414, "learning_rate": 9.540816175281092e-06, "loss": 0.7632, "step": 13665 }, { "epoch": 0.16339267566565838, "grad_norm": 1.6494019031524658, "learning_rate": 9.540735120610305e-06, "loss": 0.5142, "step": 13666 }, { "epoch": 0.163404631810519, "grad_norm": 2.5418026447296143, "learning_rate": 9.54065405913067e-06, "loss": 0.6282, "step": 13667 }, { "epoch": 0.16341658795537967, "grad_norm": 42.48238754272461, "learning_rate": 9.540572990842308e-06, "loss": 0.5866, "step": 13668 }, { "epoch": 0.16342854410024032, "grad_norm": 2.2599873542785645, "learning_rate": 9.54049191574534e-06, "loss": 0.6924, "step": 13669 }, { "epoch": 0.16344050024510096, "grad_norm": 5.300454139709473, "learning_rate": 9.540410833839887e-06, "loss": 0.6689, "step": 13670 }, { "epoch": 0.16345245638996161, "grad_norm": 2.2805228233337402, "learning_rate": 9.540329745126075e-06, "loss": 0.5853, "step": 13671 }, { "epoch": 0.16346441253482227, "grad_norm": 1.931620717048645, "learning_rate": 9.54024864960402e-06, "loss": 0.5819, "step": 13672 }, { "epoch": 0.16347636867968293, "grad_norm": 1.971680760383606, "learning_rate": 9.540167547273846e-06, "loss": 0.5863, "step": 13673 }, { "epoch": 0.16348832482454356, "grad_norm": 3.308189868927002, "learning_rate": 9.540086438135677e-06, "loss": 0.6841, "step": 13674 }, { "epoch": 0.16350028096940422, "grad_norm": 2.9344630241394043, "learning_rate": 9.54000532218963e-06, "loss": 0.5416, "step": 13675 }, { "epoch": 0.16351223711426488, "grad_norm": 1.8129369020462036, "learning_rate": 9.539924199435829e-06, "loss": 0.5958, "step": 13676 }, { "epoch": 0.16352419325912554, "grad_norm": 2.1693332195281982, "learning_rate": 9.539843069874396e-06, "loss": 0.6885, "step": 13677 }, { "epoch": 0.16353614940398617, "grad_norm": 2.11015248298645, "learning_rate": 9.539761933505453e-06, "loss": 0.658, "step": 13678 }, { "epoch": 0.16354810554884683, "grad_norm": 1.8122243881225586, "learning_rate": 9.53968079032912e-06, "loss": 0.5858, "step": 13679 }, { "epoch": 0.1635600616937075, "grad_norm": 3.1615052223205566, "learning_rate": 9.539599640345519e-06, "loss": 0.7487, "step": 13680 }, { "epoch": 0.16357201783856815, "grad_norm": 3.2230265140533447, "learning_rate": 9.539518483554772e-06, "loss": 0.636, "step": 13681 }, { "epoch": 0.16358397398342878, "grad_norm": 3.533019781112671, "learning_rate": 9.539437319957004e-06, "loss": 0.5792, "step": 13682 }, { "epoch": 0.16359593012828944, "grad_norm": 3.4063990116119385, "learning_rate": 9.53935614955233e-06, "loss": 0.5552, "step": 13683 }, { "epoch": 0.1636078862731501, "grad_norm": 3.386181354522705, "learning_rate": 9.539274972340878e-06, "loss": 0.6465, "step": 13684 }, { "epoch": 0.16361984241801072, "grad_norm": 3.1779673099517822, "learning_rate": 9.539193788322765e-06, "loss": 0.583, "step": 13685 }, { "epoch": 0.16363179856287138, "grad_norm": 2.9105477333068848, "learning_rate": 9.539112597498114e-06, "loss": 0.5714, "step": 13686 }, { "epoch": 0.16364375470773204, "grad_norm": 4.454912185668945, "learning_rate": 9.539031399867049e-06, "loss": 0.5304, "step": 13687 }, { "epoch": 0.1636557108525927, "grad_norm": 3.0388214588165283, "learning_rate": 9.53895019542969e-06, "loss": 0.6946, "step": 13688 }, { "epoch": 0.16366766699745333, "grad_norm": 2.283398389816284, "learning_rate": 9.53886898418616e-06, "loss": 0.5652, "step": 13689 }, { "epoch": 0.163679623142314, "grad_norm": 1.6231831312179565, "learning_rate": 9.53878776613658e-06, "loss": 0.6647, "step": 13690 }, { "epoch": 0.16369157928717465, "grad_norm": 2.7837753295898438, "learning_rate": 9.53870654128107e-06, "loss": 0.5682, "step": 13691 }, { "epoch": 0.1637035354320353, "grad_norm": 2.6157000064849854, "learning_rate": 9.538625309619755e-06, "loss": 0.6002, "step": 13692 }, { "epoch": 0.16371549157689594, "grad_norm": 2.490628719329834, "learning_rate": 9.538544071152754e-06, "loss": 0.5646, "step": 13693 }, { "epoch": 0.1637274477217566, "grad_norm": 4.974236488342285, "learning_rate": 9.53846282588019e-06, "loss": 0.6177, "step": 13694 }, { "epoch": 0.16373940386661726, "grad_norm": 8.960718154907227, "learning_rate": 9.538381573802185e-06, "loss": 0.5448, "step": 13695 }, { "epoch": 0.1637513600114779, "grad_norm": 2.8418848514556885, "learning_rate": 9.53830031491886e-06, "loss": 0.5861, "step": 13696 }, { "epoch": 0.16376331615633855, "grad_norm": 1.9678348302841187, "learning_rate": 9.538219049230339e-06, "loss": 0.6042, "step": 13697 }, { "epoch": 0.1637752723011992, "grad_norm": 5.076722145080566, "learning_rate": 9.538137776736742e-06, "loss": 0.5061, "step": 13698 }, { "epoch": 0.16378722844605986, "grad_norm": 3.1616015434265137, "learning_rate": 9.538056497438192e-06, "loss": 0.7389, "step": 13699 }, { "epoch": 0.1637991845909205, "grad_norm": 5.224472999572754, "learning_rate": 9.53797521133481e-06, "loss": 0.5817, "step": 13700 }, { "epoch": 0.16381114073578115, "grad_norm": 4.582954406738281, "learning_rate": 9.537893918426717e-06, "loss": 0.5817, "step": 13701 }, { "epoch": 0.1638230968806418, "grad_norm": 4.002660274505615, "learning_rate": 9.537812618714035e-06, "loss": 0.605, "step": 13702 }, { "epoch": 0.16383505302550247, "grad_norm": 8.953336715698242, "learning_rate": 9.537731312196888e-06, "loss": 0.5947, "step": 13703 }, { "epoch": 0.1638470091703631, "grad_norm": 1.8301200866699219, "learning_rate": 9.537649998875398e-06, "loss": 0.5841, "step": 13704 }, { "epoch": 0.16385896531522376, "grad_norm": 2.9475340843200684, "learning_rate": 9.537568678749685e-06, "loss": 0.6478, "step": 13705 }, { "epoch": 0.16387092146008442, "grad_norm": 2.7797563076019287, "learning_rate": 9.53748735181987e-06, "loss": 0.5881, "step": 13706 }, { "epoch": 0.16388287760494505, "grad_norm": 2.5308125019073486, "learning_rate": 9.537406018086079e-06, "loss": 0.6232, "step": 13707 }, { "epoch": 0.1638948337498057, "grad_norm": 3.8524720668792725, "learning_rate": 9.537324677548429e-06, "loss": 0.605, "step": 13708 }, { "epoch": 0.16390678989466637, "grad_norm": 3.735649585723877, "learning_rate": 9.537243330207046e-06, "loss": 0.6735, "step": 13709 }, { "epoch": 0.16391874603952702, "grad_norm": 6.756400108337402, "learning_rate": 9.537161976062051e-06, "loss": 0.5496, "step": 13710 }, { "epoch": 0.16393070218438766, "grad_norm": 3.7454047203063965, "learning_rate": 9.537080615113565e-06, "loss": 0.61, "step": 13711 }, { "epoch": 0.16394265832924831, "grad_norm": 1.813970685005188, "learning_rate": 9.536999247361711e-06, "loss": 0.5076, "step": 13712 }, { "epoch": 0.16395461447410897, "grad_norm": 19.60239601135254, "learning_rate": 9.536917872806608e-06, "loss": 0.7068, "step": 13713 }, { "epoch": 0.16396657061896963, "grad_norm": 3.1012790203094482, "learning_rate": 9.536836491448382e-06, "loss": 0.727, "step": 13714 }, { "epoch": 0.16397852676383026, "grad_norm": 4.435484886169434, "learning_rate": 9.536755103287155e-06, "loss": 0.5594, "step": 13715 }, { "epoch": 0.16399048290869092, "grad_norm": 2.594072103500366, "learning_rate": 9.536673708323046e-06, "loss": 0.5869, "step": 13716 }, { "epoch": 0.16400243905355158, "grad_norm": 1.9199676513671875, "learning_rate": 9.53659230655618e-06, "loss": 0.7685, "step": 13717 }, { "epoch": 0.1640143951984122, "grad_norm": 2.793980121612549, "learning_rate": 9.536510897986677e-06, "loss": 0.6201, "step": 13718 }, { "epoch": 0.16402635134327287, "grad_norm": 2.5720465183258057, "learning_rate": 9.53642948261466e-06, "loss": 0.7385, "step": 13719 }, { "epoch": 0.16403830748813353, "grad_norm": 2.8800277709960938, "learning_rate": 9.53634806044025e-06, "loss": 0.6796, "step": 13720 }, { "epoch": 0.1640502636329942, "grad_norm": 3.1916277408599854, "learning_rate": 9.536266631463571e-06, "loss": 0.665, "step": 13721 }, { "epoch": 0.16406221977785482, "grad_norm": 2.1737449169158936, "learning_rate": 9.536185195684744e-06, "loss": 0.5655, "step": 13722 }, { "epoch": 0.16407417592271548, "grad_norm": 2.7187540531158447, "learning_rate": 9.53610375310389e-06, "loss": 0.6264, "step": 13723 }, { "epoch": 0.16408613206757613, "grad_norm": 2.2029221057891846, "learning_rate": 9.536022303721132e-06, "loss": 0.6318, "step": 13724 }, { "epoch": 0.1640980882124368, "grad_norm": 3.642103672027588, "learning_rate": 9.535940847536594e-06, "loss": 0.5698, "step": 13725 }, { "epoch": 0.16411004435729742, "grad_norm": 2.934612274169922, "learning_rate": 9.535859384550397e-06, "loss": 0.6111, "step": 13726 }, { "epoch": 0.16412200050215808, "grad_norm": 2.7707560062408447, "learning_rate": 9.53577791476266e-06, "loss": 0.6081, "step": 13727 }, { "epoch": 0.16413395664701874, "grad_norm": 2.1626181602478027, "learning_rate": 9.535696438173511e-06, "loss": 0.7415, "step": 13728 }, { "epoch": 0.16414591279187937, "grad_norm": 4.973281383514404, "learning_rate": 9.535614954783067e-06, "loss": 0.5952, "step": 13729 }, { "epoch": 0.16415786893674003, "grad_norm": 2.500939130783081, "learning_rate": 9.535533464591453e-06, "loss": 0.6023, "step": 13730 }, { "epoch": 0.1641698250816007, "grad_norm": 1.6701167821884155, "learning_rate": 9.535451967598792e-06, "loss": 0.5636, "step": 13731 }, { "epoch": 0.16418178122646135, "grad_norm": 3.1914968490600586, "learning_rate": 9.535370463805203e-06, "loss": 0.6639, "step": 13732 }, { "epoch": 0.16419373737132198, "grad_norm": 1.7778682708740234, "learning_rate": 9.53528895321081e-06, "loss": 0.5656, "step": 13733 }, { "epoch": 0.16420569351618264, "grad_norm": 10.279765129089355, "learning_rate": 9.535207435815735e-06, "loss": 0.6141, "step": 13734 }, { "epoch": 0.1642176496610433, "grad_norm": 2.5715484619140625, "learning_rate": 9.5351259116201e-06, "loss": 0.7031, "step": 13735 }, { "epoch": 0.16422960580590396, "grad_norm": 2.653449535369873, "learning_rate": 9.535044380624028e-06, "loss": 0.5975, "step": 13736 }, { "epoch": 0.1642415619507646, "grad_norm": 2.9412901401519775, "learning_rate": 9.534962842827639e-06, "loss": 0.6561, "step": 13737 }, { "epoch": 0.16425351809562524, "grad_norm": 1.6751140356063843, "learning_rate": 9.53488129823106e-06, "loss": 0.4882, "step": 13738 }, { "epoch": 0.1642654742404859, "grad_norm": 2.323258876800537, "learning_rate": 9.534799746834409e-06, "loss": 0.5861, "step": 13739 }, { "epoch": 0.16427743038534656, "grad_norm": 3.5058627128601074, "learning_rate": 9.53471818863781e-06, "loss": 0.6087, "step": 13740 }, { "epoch": 0.1642893865302072, "grad_norm": 2.4812488555908203, "learning_rate": 9.534636623641384e-06, "loss": 0.6572, "step": 13741 }, { "epoch": 0.16430134267506785, "grad_norm": 87.31703186035156, "learning_rate": 9.534555051845257e-06, "loss": 0.6796, "step": 13742 }, { "epoch": 0.1643132988199285, "grad_norm": 3.3375613689422607, "learning_rate": 9.534473473249547e-06, "loss": 0.6764, "step": 13743 }, { "epoch": 0.16432525496478914, "grad_norm": 2.773956775665283, "learning_rate": 9.534391887854377e-06, "loss": 0.6656, "step": 13744 }, { "epoch": 0.1643372111096498, "grad_norm": 3.5556368827819824, "learning_rate": 9.534310295659871e-06, "loss": 0.5465, "step": 13745 }, { "epoch": 0.16434916725451046, "grad_norm": 3.711946487426758, "learning_rate": 9.53422869666615e-06, "loss": 0.6744, "step": 13746 }, { "epoch": 0.16436112339937112, "grad_norm": 3.2083489894866943, "learning_rate": 9.534147090873338e-06, "loss": 0.7352, "step": 13747 }, { "epoch": 0.16437307954423175, "grad_norm": 1.8275495767593384, "learning_rate": 9.534065478281555e-06, "loss": 0.5932, "step": 13748 }, { "epoch": 0.1643850356890924, "grad_norm": 2.1865999698638916, "learning_rate": 9.533983858890926e-06, "loss": 0.6268, "step": 13749 }, { "epoch": 0.16439699183395307, "grad_norm": 2.3094193935394287, "learning_rate": 9.533902232701571e-06, "loss": 0.5524, "step": 13750 }, { "epoch": 0.16440894797881372, "grad_norm": 7.092860221862793, "learning_rate": 9.533820599713614e-06, "loss": 0.7274, "step": 13751 }, { "epoch": 0.16442090412367436, "grad_norm": 1.75734281539917, "learning_rate": 9.533738959927176e-06, "loss": 0.6867, "step": 13752 }, { "epoch": 0.164432860268535, "grad_norm": 2.952061414718628, "learning_rate": 9.533657313342382e-06, "loss": 0.5961, "step": 13753 }, { "epoch": 0.16444481641339567, "grad_norm": 2.5736067295074463, "learning_rate": 9.533575659959352e-06, "loss": 0.6323, "step": 13754 }, { "epoch": 0.1644567725582563, "grad_norm": 1.902403712272644, "learning_rate": 9.53349399977821e-06, "loss": 0.6174, "step": 13755 }, { "epoch": 0.16446872870311696, "grad_norm": 2.78637433052063, "learning_rate": 9.533412332799078e-06, "loss": 0.6463, "step": 13756 }, { "epoch": 0.16448068484797762, "grad_norm": 2.4419524669647217, "learning_rate": 9.533330659022075e-06, "loss": 0.5809, "step": 13757 }, { "epoch": 0.16449264099283828, "grad_norm": 8.569662094116211, "learning_rate": 9.53324897844733e-06, "loss": 0.637, "step": 13758 }, { "epoch": 0.1645045971376989, "grad_norm": 2.3229222297668457, "learning_rate": 9.53316729107496e-06, "loss": 0.6634, "step": 13759 }, { "epoch": 0.16451655328255957, "grad_norm": 2.7375593185424805, "learning_rate": 9.53308559690509e-06, "loss": 0.6484, "step": 13760 }, { "epoch": 0.16452850942742023, "grad_norm": 2.049255847930908, "learning_rate": 9.533003895937841e-06, "loss": 0.6826, "step": 13761 }, { "epoch": 0.1645404655722809, "grad_norm": 4.27780818939209, "learning_rate": 9.532922188173338e-06, "loss": 0.6972, "step": 13762 }, { "epoch": 0.16455242171714152, "grad_norm": 2.9151041507720947, "learning_rate": 9.532840473611702e-06, "loss": 0.5788, "step": 13763 }, { "epoch": 0.16456437786200218, "grad_norm": 2.331371784210205, "learning_rate": 9.532758752253055e-06, "loss": 0.7119, "step": 13764 }, { "epoch": 0.16457633400686283, "grad_norm": 4.209106922149658, "learning_rate": 9.532677024097521e-06, "loss": 0.5347, "step": 13765 }, { "epoch": 0.16458829015172347, "grad_norm": 3.493551731109619, "learning_rate": 9.53259528914522e-06, "loss": 0.6555, "step": 13766 }, { "epoch": 0.16460024629658412, "grad_norm": 1.7140109539031982, "learning_rate": 9.532513547396278e-06, "loss": 0.6465, "step": 13767 }, { "epoch": 0.16461220244144478, "grad_norm": 2.1586363315582275, "learning_rate": 9.532431798850815e-06, "loss": 0.6108, "step": 13768 }, { "epoch": 0.16462415858630544, "grad_norm": 1.3961745500564575, "learning_rate": 9.532350043508954e-06, "loss": 0.587, "step": 13769 }, { "epoch": 0.16463611473116607, "grad_norm": 6.670644283294678, "learning_rate": 9.532268281370819e-06, "loss": 0.6249, "step": 13770 }, { "epoch": 0.16464807087602673, "grad_norm": 3.3276915550231934, "learning_rate": 9.532186512436532e-06, "loss": 0.5688, "step": 13771 }, { "epoch": 0.1646600270208874, "grad_norm": 27.624725341796875, "learning_rate": 9.532104736706214e-06, "loss": 0.6174, "step": 13772 }, { "epoch": 0.16467198316574805, "grad_norm": 2.1480472087860107, "learning_rate": 9.532022954179989e-06, "loss": 0.5907, "step": 13773 }, { "epoch": 0.16468393931060868, "grad_norm": 2.199587106704712, "learning_rate": 9.53194116485798e-06, "loss": 0.5255, "step": 13774 }, { "epoch": 0.16469589545546934, "grad_norm": 1.7991259098052979, "learning_rate": 9.531859368740309e-06, "loss": 0.5616, "step": 13775 }, { "epoch": 0.16470785160033, "grad_norm": 1.873365879058838, "learning_rate": 9.531777565827099e-06, "loss": 0.5969, "step": 13776 }, { "epoch": 0.16471980774519063, "grad_norm": 3.426663398742676, "learning_rate": 9.53169575611847e-06, "loss": 0.5285, "step": 13777 }, { "epoch": 0.16473176389005129, "grad_norm": 3.9452717304229736, "learning_rate": 9.53161393961455e-06, "loss": 0.6261, "step": 13778 }, { "epoch": 0.16474372003491194, "grad_norm": 3.4599289894104004, "learning_rate": 9.531532116315458e-06, "loss": 0.6283, "step": 13779 }, { "epoch": 0.1647556761797726, "grad_norm": 1.962886929512024, "learning_rate": 9.531450286221318e-06, "loss": 0.5983, "step": 13780 }, { "epoch": 0.16476763232463323, "grad_norm": 7.888169288635254, "learning_rate": 9.531368449332251e-06, "loss": 0.7191, "step": 13781 }, { "epoch": 0.1647795884694939, "grad_norm": 2.9935801029205322, "learning_rate": 9.531286605648383e-06, "loss": 0.7192, "step": 13782 }, { "epoch": 0.16479154461435455, "grad_norm": 2.9095842838287354, "learning_rate": 9.53120475516983e-06, "loss": 0.5417, "step": 13783 }, { "epoch": 0.1648035007592152, "grad_norm": 2.0105783939361572, "learning_rate": 9.531122897896725e-06, "loss": 0.5673, "step": 13784 }, { "epoch": 0.16481545690407584, "grad_norm": 1.6807528734207153, "learning_rate": 9.531041033829183e-06, "loss": 0.6057, "step": 13785 }, { "epoch": 0.1648274130489365, "grad_norm": 3.448063850402832, "learning_rate": 9.530959162967328e-06, "loss": 0.6448, "step": 13786 }, { "epoch": 0.16483936919379716, "grad_norm": 3.4889159202575684, "learning_rate": 9.530877285311284e-06, "loss": 0.6651, "step": 13787 }, { "epoch": 0.16485132533865782, "grad_norm": 3.6865575313568115, "learning_rate": 9.530795400861173e-06, "loss": 0.6034, "step": 13788 }, { "epoch": 0.16486328148351845, "grad_norm": 4.070921421051025, "learning_rate": 9.530713509617118e-06, "loss": 0.6464, "step": 13789 }, { "epoch": 0.1648752376283791, "grad_norm": 1.989744782447815, "learning_rate": 9.530631611579245e-06, "loss": 0.7967, "step": 13790 }, { "epoch": 0.16488719377323977, "grad_norm": 2.352356433868408, "learning_rate": 9.53054970674767e-06, "loss": 0.5451, "step": 13791 }, { "epoch": 0.1648991499181004, "grad_norm": 3.412520408630371, "learning_rate": 9.53046779512252e-06, "loss": 0.5521, "step": 13792 }, { "epoch": 0.16491110606296105, "grad_norm": 3.847104549407959, "learning_rate": 9.530385876703919e-06, "loss": 0.6825, "step": 13793 }, { "epoch": 0.1649230622078217, "grad_norm": 2.4839606285095215, "learning_rate": 9.530303951491989e-06, "loss": 0.6172, "step": 13794 }, { "epoch": 0.16493501835268237, "grad_norm": 2.8569014072418213, "learning_rate": 9.53022201948685e-06, "loss": 0.6133, "step": 13795 }, { "epoch": 0.164946974497543, "grad_norm": 2.0120081901550293, "learning_rate": 9.530140080688628e-06, "loss": 0.6545, "step": 13796 }, { "epoch": 0.16495893064240366, "grad_norm": 3.1116583347320557, "learning_rate": 9.530058135097444e-06, "loss": 0.5806, "step": 13797 }, { "epoch": 0.16497088678726432, "grad_norm": 1.9363057613372803, "learning_rate": 9.529976182713422e-06, "loss": 0.6101, "step": 13798 }, { "epoch": 0.16498284293212498, "grad_norm": 3.8325228691101074, "learning_rate": 9.529894223536686e-06, "loss": 0.5657, "step": 13799 }, { "epoch": 0.1649947990769856, "grad_norm": 8.402033805847168, "learning_rate": 9.529812257567357e-06, "loss": 0.6341, "step": 13800 }, { "epoch": 0.16500675522184627, "grad_norm": 2.547271490097046, "learning_rate": 9.529730284805559e-06, "loss": 0.631, "step": 13801 }, { "epoch": 0.16501871136670693, "grad_norm": 2.9230430126190186, "learning_rate": 9.529648305251412e-06, "loss": 0.603, "step": 13802 }, { "epoch": 0.16503066751156756, "grad_norm": 2.8890161514282227, "learning_rate": 9.529566318905043e-06, "loss": 0.7084, "step": 13803 }, { "epoch": 0.16504262365642822, "grad_norm": 1.6718214750289917, "learning_rate": 9.529484325766573e-06, "loss": 0.6001, "step": 13804 }, { "epoch": 0.16505457980128888, "grad_norm": 2.9758338928222656, "learning_rate": 9.529402325836125e-06, "loss": 0.5503, "step": 13805 }, { "epoch": 0.16506653594614953, "grad_norm": 2.026320219039917, "learning_rate": 9.529320319113822e-06, "loss": 0.5859, "step": 13806 }, { "epoch": 0.16507849209101016, "grad_norm": 1.8010845184326172, "learning_rate": 9.529238305599786e-06, "loss": 0.531, "step": 13807 }, { "epoch": 0.16509044823587082, "grad_norm": 7.75438928604126, "learning_rate": 9.529156285294144e-06, "loss": 0.7436, "step": 13808 }, { "epoch": 0.16510240438073148, "grad_norm": 1.8759113550186157, "learning_rate": 9.529074258197013e-06, "loss": 0.6357, "step": 13809 }, { "epoch": 0.16511436052559214, "grad_norm": 3.86588454246521, "learning_rate": 9.52899222430852e-06, "loss": 0.6869, "step": 13810 }, { "epoch": 0.16512631667045277, "grad_norm": 2.495565414428711, "learning_rate": 9.528910183628788e-06, "loss": 0.5096, "step": 13811 }, { "epoch": 0.16513827281531343, "grad_norm": 3.1341304779052734, "learning_rate": 9.528828136157938e-06, "loss": 0.6276, "step": 13812 }, { "epoch": 0.1651502289601741, "grad_norm": 3.337303400039673, "learning_rate": 9.528746081896094e-06, "loss": 0.5942, "step": 13813 }, { "epoch": 0.16516218510503472, "grad_norm": 2.5724291801452637, "learning_rate": 9.52866402084338e-06, "loss": 0.6884, "step": 13814 }, { "epoch": 0.16517414124989538, "grad_norm": 3.1854088306427, "learning_rate": 9.528581952999918e-06, "loss": 0.6678, "step": 13815 }, { "epoch": 0.16518609739475604, "grad_norm": 3.544171094894409, "learning_rate": 9.528499878365832e-06, "loss": 0.5847, "step": 13816 }, { "epoch": 0.1651980535396167, "grad_norm": 3.9111783504486084, "learning_rate": 9.528417796941243e-06, "loss": 0.6528, "step": 13817 }, { "epoch": 0.16521000968447733, "grad_norm": 2.6730167865753174, "learning_rate": 9.528335708726276e-06, "loss": 0.5976, "step": 13818 }, { "epoch": 0.16522196582933799, "grad_norm": 1.7899906635284424, "learning_rate": 9.528253613721054e-06, "loss": 0.656, "step": 13819 }, { "epoch": 0.16523392197419864, "grad_norm": 2.3958024978637695, "learning_rate": 9.528171511925699e-06, "loss": 0.5773, "step": 13820 }, { "epoch": 0.1652458781190593, "grad_norm": 52.816917419433594, "learning_rate": 9.528089403340335e-06, "loss": 0.6851, "step": 13821 }, { "epoch": 0.16525783426391993, "grad_norm": 2.4346091747283936, "learning_rate": 9.528007287965085e-06, "loss": 0.6466, "step": 13822 }, { "epoch": 0.1652697904087806, "grad_norm": 2.143667459487915, "learning_rate": 9.527925165800071e-06, "loss": 0.6471, "step": 13823 }, { "epoch": 0.16528174655364125, "grad_norm": 2.086658477783203, "learning_rate": 9.527843036845416e-06, "loss": 0.6379, "step": 13824 }, { "epoch": 0.16529370269850188, "grad_norm": 6.2916483879089355, "learning_rate": 9.527760901101247e-06, "loss": 0.5915, "step": 13825 }, { "epoch": 0.16530565884336254, "grad_norm": 5.661386013031006, "learning_rate": 9.527678758567685e-06, "loss": 0.6873, "step": 13826 }, { "epoch": 0.1653176149882232, "grad_norm": 3.4216816425323486, "learning_rate": 9.52759660924485e-06, "loss": 0.727, "step": 13827 }, { "epoch": 0.16532957113308386, "grad_norm": 10.779571533203125, "learning_rate": 9.527514453132869e-06, "loss": 0.6465, "step": 13828 }, { "epoch": 0.1653415272779445, "grad_norm": 3.038403034210205, "learning_rate": 9.527432290231864e-06, "loss": 0.6608, "step": 13829 }, { "epoch": 0.16535348342280515, "grad_norm": 2.3523874282836914, "learning_rate": 9.527350120541958e-06, "loss": 0.5865, "step": 13830 }, { "epoch": 0.1653654395676658, "grad_norm": 1.9168248176574707, "learning_rate": 9.527267944063274e-06, "loss": 0.5731, "step": 13831 }, { "epoch": 0.16537739571252646, "grad_norm": 3.414983034133911, "learning_rate": 9.527185760795937e-06, "loss": 0.6935, "step": 13832 }, { "epoch": 0.1653893518573871, "grad_norm": 2.0757994651794434, "learning_rate": 9.527103570740067e-06, "loss": 0.6751, "step": 13833 }, { "epoch": 0.16540130800224775, "grad_norm": 1.7958821058273315, "learning_rate": 9.52702137389579e-06, "loss": 0.608, "step": 13834 }, { "epoch": 0.1654132641471084, "grad_norm": 2.4300782680511475, "learning_rate": 9.526939170263228e-06, "loss": 0.6486, "step": 13835 }, { "epoch": 0.16542522029196904, "grad_norm": 5.152628421783447, "learning_rate": 9.526856959842506e-06, "loss": 0.6276, "step": 13836 }, { "epoch": 0.1654371764368297, "grad_norm": 1.9017654657363892, "learning_rate": 9.526774742633745e-06, "loss": 0.6259, "step": 13837 }, { "epoch": 0.16544913258169036, "grad_norm": 2.6895811557769775, "learning_rate": 9.526692518637069e-06, "loss": 0.7061, "step": 13838 }, { "epoch": 0.16546108872655102, "grad_norm": 2.4114198684692383, "learning_rate": 9.5266102878526e-06, "loss": 0.6245, "step": 13839 }, { "epoch": 0.16547304487141165, "grad_norm": 2.8616533279418945, "learning_rate": 9.526528050280466e-06, "loss": 0.6058, "step": 13840 }, { "epoch": 0.1654850010162723, "grad_norm": 2.051532030105591, "learning_rate": 9.526445805920784e-06, "loss": 0.6036, "step": 13841 }, { "epoch": 0.16549695716113297, "grad_norm": 2.0532686710357666, "learning_rate": 9.526363554773683e-06, "loss": 0.6211, "step": 13842 }, { "epoch": 0.16550891330599363, "grad_norm": 3.5112104415893555, "learning_rate": 9.526281296839281e-06, "loss": 0.6026, "step": 13843 }, { "epoch": 0.16552086945085426, "grad_norm": 1.662095546722412, "learning_rate": 9.526199032117706e-06, "loss": 0.6281, "step": 13844 }, { "epoch": 0.16553282559571492, "grad_norm": 4.3232598304748535, "learning_rate": 9.52611676060908e-06, "loss": 0.563, "step": 13845 }, { "epoch": 0.16554478174057558, "grad_norm": 2.2885489463806152, "learning_rate": 9.526034482313524e-06, "loss": 0.54, "step": 13846 }, { "epoch": 0.16555673788543623, "grad_norm": 10.35496711730957, "learning_rate": 9.525952197231165e-06, "loss": 0.5405, "step": 13847 }, { "epoch": 0.16556869403029686, "grad_norm": 1.6743128299713135, "learning_rate": 9.525869905362125e-06, "loss": 0.6161, "step": 13848 }, { "epoch": 0.16558065017515752, "grad_norm": 2.254523277282715, "learning_rate": 9.525787606706524e-06, "loss": 0.5784, "step": 13849 }, { "epoch": 0.16559260632001818, "grad_norm": 2.049145221710205, "learning_rate": 9.525705301264491e-06, "loss": 0.6338, "step": 13850 }, { "epoch": 0.1656045624648788, "grad_norm": 1.7030762434005737, "learning_rate": 9.525622989036145e-06, "loss": 0.5362, "step": 13851 }, { "epoch": 0.16561651860973947, "grad_norm": 1.9089795351028442, "learning_rate": 9.525540670021613e-06, "loss": 0.6799, "step": 13852 }, { "epoch": 0.16562847475460013, "grad_norm": 2.7896971702575684, "learning_rate": 9.525458344221016e-06, "loss": 0.6564, "step": 13853 }, { "epoch": 0.1656404308994608, "grad_norm": 2.180574655532837, "learning_rate": 9.525376011634477e-06, "loss": 0.5901, "step": 13854 }, { "epoch": 0.16565238704432142, "grad_norm": 1.668803334236145, "learning_rate": 9.525293672262123e-06, "loss": 0.5112, "step": 13855 }, { "epoch": 0.16566434318918208, "grad_norm": 2.247955560684204, "learning_rate": 9.525211326104073e-06, "loss": 0.6605, "step": 13856 }, { "epoch": 0.16567629933404274, "grad_norm": 15.451098442077637, "learning_rate": 9.525128973160454e-06, "loss": 0.6385, "step": 13857 }, { "epoch": 0.1656882554789034, "grad_norm": 2.2353768348693848, "learning_rate": 9.525046613431386e-06, "loss": 0.5745, "step": 13858 }, { "epoch": 0.16570021162376403, "grad_norm": 2.1885669231414795, "learning_rate": 9.524964246916997e-06, "loss": 0.5862, "step": 13859 }, { "epoch": 0.16571216776862469, "grad_norm": 3.284404754638672, "learning_rate": 9.524881873617407e-06, "loss": 0.6958, "step": 13860 }, { "epoch": 0.16572412391348534, "grad_norm": 3.0977749824523926, "learning_rate": 9.524799493532738e-06, "loss": 0.6222, "step": 13861 }, { "epoch": 0.16573608005834597, "grad_norm": 3.0842108726501465, "learning_rate": 9.524717106663119e-06, "loss": 0.6268, "step": 13862 }, { "epoch": 0.16574803620320663, "grad_norm": 1.8481546640396118, "learning_rate": 9.524634713008672e-06, "loss": 0.7234, "step": 13863 }, { "epoch": 0.1657599923480673, "grad_norm": 5.351134300231934, "learning_rate": 9.524552312569515e-06, "loss": 0.6753, "step": 13864 }, { "epoch": 0.16577194849292795, "grad_norm": 1.7163453102111816, "learning_rate": 9.524469905345779e-06, "loss": 0.6281, "step": 13865 }, { "epoch": 0.16578390463778858, "grad_norm": 1.499813199043274, "learning_rate": 9.524387491337583e-06, "loss": 0.5897, "step": 13866 }, { "epoch": 0.16579586078264924, "grad_norm": 1.6785411834716797, "learning_rate": 9.524305070545053e-06, "loss": 0.5528, "step": 13867 }, { "epoch": 0.1658078169275099, "grad_norm": 1.8462793827056885, "learning_rate": 9.52422264296831e-06, "loss": 0.645, "step": 13868 }, { "epoch": 0.16581977307237056, "grad_norm": 2.7524826526641846, "learning_rate": 9.52414020860748e-06, "loss": 0.7175, "step": 13869 }, { "epoch": 0.1658317292172312, "grad_norm": 2.2308616638183594, "learning_rate": 9.524057767462684e-06, "loss": 0.6404, "step": 13870 }, { "epoch": 0.16584368536209185, "grad_norm": 6.546504020690918, "learning_rate": 9.523975319534049e-06, "loss": 0.5423, "step": 13871 }, { "epoch": 0.1658556415069525, "grad_norm": 2.42285418510437, "learning_rate": 9.523892864821698e-06, "loss": 0.6275, "step": 13872 }, { "epoch": 0.16586759765181314, "grad_norm": 13.11098861694336, "learning_rate": 9.52381040332575e-06, "loss": 0.5836, "step": 13873 }, { "epoch": 0.1658795537966738, "grad_norm": 2.921297550201416, "learning_rate": 9.523727935046335e-06, "loss": 0.5399, "step": 13874 }, { "epoch": 0.16589150994153445, "grad_norm": 4.82484769821167, "learning_rate": 9.523645459983574e-06, "loss": 0.6427, "step": 13875 }, { "epoch": 0.1659034660863951, "grad_norm": 2.719473123550415, "learning_rate": 9.523562978137589e-06, "loss": 0.6917, "step": 13876 }, { "epoch": 0.16591542223125574, "grad_norm": 2.30842661857605, "learning_rate": 9.523480489508507e-06, "loss": 0.5512, "step": 13877 }, { "epoch": 0.1659273783761164, "grad_norm": 3.5539181232452393, "learning_rate": 9.523397994096449e-06, "loss": 0.6578, "step": 13878 }, { "epoch": 0.16593933452097706, "grad_norm": 1.9905693531036377, "learning_rate": 9.52331549190154e-06, "loss": 0.5999, "step": 13879 }, { "epoch": 0.16595129066583772, "grad_norm": 12.948543548583984, "learning_rate": 9.523232982923903e-06, "loss": 0.6924, "step": 13880 }, { "epoch": 0.16596324681069835, "grad_norm": 2.387356758117676, "learning_rate": 9.52315046716366e-06, "loss": 0.5963, "step": 13881 }, { "epoch": 0.165975202955559, "grad_norm": 3.4912800788879395, "learning_rate": 9.523067944620938e-06, "loss": 0.5081, "step": 13882 }, { "epoch": 0.16598715910041967, "grad_norm": 3.709923267364502, "learning_rate": 9.52298541529586e-06, "loss": 0.6568, "step": 13883 }, { "epoch": 0.1659991152452803, "grad_norm": 3.6104938983917236, "learning_rate": 9.522902879188551e-06, "loss": 0.6269, "step": 13884 }, { "epoch": 0.16601107139014096, "grad_norm": 1.9010798931121826, "learning_rate": 9.522820336299132e-06, "loss": 0.542, "step": 13885 }, { "epoch": 0.16602302753500162, "grad_norm": 3.2880406379699707, "learning_rate": 9.522737786627727e-06, "loss": 0.6259, "step": 13886 }, { "epoch": 0.16603498367986227, "grad_norm": 4.4070587158203125, "learning_rate": 9.52265523017446e-06, "loss": 0.5944, "step": 13887 }, { "epoch": 0.1660469398247229, "grad_norm": 3.2440357208251953, "learning_rate": 9.522572666939458e-06, "loss": 0.6747, "step": 13888 }, { "epoch": 0.16605889596958356, "grad_norm": 1.8834224939346313, "learning_rate": 9.522490096922841e-06, "loss": 0.5649, "step": 13889 }, { "epoch": 0.16607085211444422, "grad_norm": 3.4244561195373535, "learning_rate": 9.522407520124733e-06, "loss": 0.566, "step": 13890 }, { "epoch": 0.16608280825930488, "grad_norm": 3.982743263244629, "learning_rate": 9.522324936545259e-06, "loss": 0.6651, "step": 13891 }, { "epoch": 0.1660947644041655, "grad_norm": 2.0583386421203613, "learning_rate": 9.522242346184544e-06, "loss": 0.6116, "step": 13892 }, { "epoch": 0.16610672054902617, "grad_norm": 2.1635403633117676, "learning_rate": 9.52215974904271e-06, "loss": 0.5722, "step": 13893 }, { "epoch": 0.16611867669388683, "grad_norm": 3.0694620609283447, "learning_rate": 9.522077145119882e-06, "loss": 0.643, "step": 13894 }, { "epoch": 0.16613063283874746, "grad_norm": 2.5709545612335205, "learning_rate": 9.521994534416182e-06, "loss": 0.6225, "step": 13895 }, { "epoch": 0.16614258898360812, "grad_norm": 2.0341782569885254, "learning_rate": 9.521911916931734e-06, "loss": 0.6636, "step": 13896 }, { "epoch": 0.16615454512846878, "grad_norm": 5.701987266540527, "learning_rate": 9.521829292666665e-06, "loss": 0.6839, "step": 13897 }, { "epoch": 0.16616650127332944, "grad_norm": 3.217116117477417, "learning_rate": 9.521746661621096e-06, "loss": 0.5838, "step": 13898 }, { "epoch": 0.16617845741819007, "grad_norm": 1.9570777416229248, "learning_rate": 9.521664023795152e-06, "loss": 0.6176, "step": 13899 }, { "epoch": 0.16619041356305073, "grad_norm": 2.2849607467651367, "learning_rate": 9.521581379188958e-06, "loss": 0.5365, "step": 13900 }, { "epoch": 0.16620236970791138, "grad_norm": 3.432896137237549, "learning_rate": 9.521498727802634e-06, "loss": 0.6207, "step": 13901 }, { "epoch": 0.16621432585277204, "grad_norm": 3.8339788913726807, "learning_rate": 9.521416069636309e-06, "loss": 0.7142, "step": 13902 }, { "epoch": 0.16622628199763267, "grad_norm": 2.5421440601348877, "learning_rate": 9.521333404690104e-06, "loss": 0.6513, "step": 13903 }, { "epoch": 0.16623823814249333, "grad_norm": 2.9398915767669678, "learning_rate": 9.521250732964143e-06, "loss": 0.6423, "step": 13904 }, { "epoch": 0.166250194287354, "grad_norm": 3.2965755462646484, "learning_rate": 9.521168054458552e-06, "loss": 0.7346, "step": 13905 }, { "epoch": 0.16626215043221465, "grad_norm": 2.5432419776916504, "learning_rate": 9.52108536917345e-06, "loss": 0.638, "step": 13906 }, { "epoch": 0.16627410657707528, "grad_norm": 1.9678155183792114, "learning_rate": 9.521002677108968e-06, "loss": 0.6049, "step": 13907 }, { "epoch": 0.16628606272193594, "grad_norm": 2.0237600803375244, "learning_rate": 9.520919978265225e-06, "loss": 0.5738, "step": 13908 }, { "epoch": 0.1662980188667966, "grad_norm": 1.7956678867340088, "learning_rate": 9.520837272642347e-06, "loss": 0.6258, "step": 13909 }, { "epoch": 0.16630997501165723, "grad_norm": 2.3136889934539795, "learning_rate": 9.520754560240456e-06, "loss": 0.5285, "step": 13910 }, { "epoch": 0.1663219311565179, "grad_norm": 3.1014819145202637, "learning_rate": 9.520671841059679e-06, "loss": 0.6449, "step": 13911 }, { "epoch": 0.16633388730137855, "grad_norm": 1.7889137268066406, "learning_rate": 9.520589115100136e-06, "loss": 0.5894, "step": 13912 }, { "epoch": 0.1663458434462392, "grad_norm": 2.228785753250122, "learning_rate": 9.520506382361956e-06, "loss": 0.6358, "step": 13913 }, { "epoch": 0.16635779959109984, "grad_norm": 2.72184419631958, "learning_rate": 9.52042364284526e-06, "loss": 0.6098, "step": 13914 }, { "epoch": 0.1663697557359605, "grad_norm": 3.492356061935425, "learning_rate": 9.520340896550174e-06, "loss": 0.6002, "step": 13915 }, { "epoch": 0.16638171188082115, "grad_norm": 2.163492441177368, "learning_rate": 9.520258143476818e-06, "loss": 0.6093, "step": 13916 }, { "epoch": 0.1663936680256818, "grad_norm": 2.144514799118042, "learning_rate": 9.52017538362532e-06, "loss": 0.5969, "step": 13917 }, { "epoch": 0.16640562417054244, "grad_norm": 3.768064498901367, "learning_rate": 9.520092616995804e-06, "loss": 0.6201, "step": 13918 }, { "epoch": 0.1664175803154031, "grad_norm": 2.8093655109405518, "learning_rate": 9.520009843588391e-06, "loss": 0.5852, "step": 13919 }, { "epoch": 0.16642953646026376, "grad_norm": 17.445661544799805, "learning_rate": 9.519927063403209e-06, "loss": 0.6532, "step": 13920 }, { "epoch": 0.1664414926051244, "grad_norm": 2.1586084365844727, "learning_rate": 9.51984427644038e-06, "loss": 0.7262, "step": 13921 }, { "epoch": 0.16645344874998505, "grad_norm": 3.0573577880859375, "learning_rate": 9.519761482700028e-06, "loss": 0.6096, "step": 13922 }, { "epoch": 0.1664654048948457, "grad_norm": 2.3251168727874756, "learning_rate": 9.519678682182276e-06, "loss": 0.6612, "step": 13923 }, { "epoch": 0.16647736103970637, "grad_norm": 1.438694953918457, "learning_rate": 9.519595874887254e-06, "loss": 0.6606, "step": 13924 }, { "epoch": 0.166489317184567, "grad_norm": 1.7589597702026367, "learning_rate": 9.519513060815078e-06, "loss": 0.5347, "step": 13925 }, { "epoch": 0.16650127332942766, "grad_norm": 3.360288381576538, "learning_rate": 9.519430239965878e-06, "loss": 0.577, "step": 13926 }, { "epoch": 0.16651322947428832, "grad_norm": 2.640477418899536, "learning_rate": 9.519347412339776e-06, "loss": 0.6386, "step": 13927 }, { "epoch": 0.16652518561914897, "grad_norm": 3.12308406829834, "learning_rate": 9.519264577936896e-06, "loss": 0.7093, "step": 13928 }, { "epoch": 0.1665371417640096, "grad_norm": 2.4877748489379883, "learning_rate": 9.519181736757364e-06, "loss": 0.6786, "step": 13929 }, { "epoch": 0.16654909790887026, "grad_norm": 2.2971670627593994, "learning_rate": 9.519098888801302e-06, "loss": 0.6878, "step": 13930 }, { "epoch": 0.16656105405373092, "grad_norm": 2.1615357398986816, "learning_rate": 9.519016034068833e-06, "loss": 0.5556, "step": 13931 }, { "epoch": 0.16657301019859155, "grad_norm": 1.74111807346344, "learning_rate": 9.518933172560086e-06, "loss": 0.5315, "step": 13932 }, { "epoch": 0.1665849663434522, "grad_norm": 2.061915874481201, "learning_rate": 9.518850304275183e-06, "loss": 0.6941, "step": 13933 }, { "epoch": 0.16659692248831287, "grad_norm": 2.7746429443359375, "learning_rate": 9.518767429214247e-06, "loss": 0.681, "step": 13934 }, { "epoch": 0.16660887863317353, "grad_norm": 1.751197099685669, "learning_rate": 9.518684547377402e-06, "loss": 0.6136, "step": 13935 }, { "epoch": 0.16662083477803416, "grad_norm": 2.3236889839172363, "learning_rate": 9.518601658764774e-06, "loss": 0.6265, "step": 13936 }, { "epoch": 0.16663279092289482, "grad_norm": 1.518064260482788, "learning_rate": 9.518518763376488e-06, "loss": 0.5264, "step": 13937 }, { "epoch": 0.16664474706775548, "grad_norm": 3.1805403232574463, "learning_rate": 9.518435861212668e-06, "loss": 0.5718, "step": 13938 }, { "epoch": 0.16665670321261614, "grad_norm": 3.0149405002593994, "learning_rate": 9.518352952273436e-06, "loss": 0.7023, "step": 13939 }, { "epoch": 0.16666865935747677, "grad_norm": 2.923168420791626, "learning_rate": 9.518270036558916e-06, "loss": 0.6312, "step": 13940 }, { "epoch": 0.16668061550233743, "grad_norm": 2.5404398441314697, "learning_rate": 9.518187114069235e-06, "loss": 0.5517, "step": 13941 }, { "epoch": 0.16669257164719808, "grad_norm": 1.6550540924072266, "learning_rate": 9.518104184804517e-06, "loss": 0.5736, "step": 13942 }, { "epoch": 0.16670452779205872, "grad_norm": 1.9274051189422607, "learning_rate": 9.518021248764886e-06, "loss": 0.5705, "step": 13943 }, { "epoch": 0.16671648393691937, "grad_norm": 1.8705744743347168, "learning_rate": 9.517938305950465e-06, "loss": 0.6307, "step": 13944 }, { "epoch": 0.16672844008178003, "grad_norm": 2.1001014709472656, "learning_rate": 9.51785535636138e-06, "loss": 0.6111, "step": 13945 }, { "epoch": 0.1667403962266407, "grad_norm": 1.6689356565475464, "learning_rate": 9.517772399997756e-06, "loss": 0.6028, "step": 13946 }, { "epoch": 0.16675235237150132, "grad_norm": 2.648284912109375, "learning_rate": 9.517689436859714e-06, "loss": 0.6642, "step": 13947 }, { "epoch": 0.16676430851636198, "grad_norm": 2.030930519104004, "learning_rate": 9.517606466947381e-06, "loss": 0.5537, "step": 13948 }, { "epoch": 0.16677626466122264, "grad_norm": 1.737047791481018, "learning_rate": 9.517523490260884e-06, "loss": 0.6746, "step": 13949 }, { "epoch": 0.1667882208060833, "grad_norm": 1.7114676237106323, "learning_rate": 9.517440506800342e-06, "loss": 0.6314, "step": 13950 }, { "epoch": 0.16680017695094393, "grad_norm": 1.7442861795425415, "learning_rate": 9.517357516565881e-06, "loss": 0.5652, "step": 13951 }, { "epoch": 0.1668121330958046, "grad_norm": 2.0438737869262695, "learning_rate": 9.51727451955763e-06, "loss": 0.6969, "step": 13952 }, { "epoch": 0.16682408924066525, "grad_norm": 2.5573155879974365, "learning_rate": 9.517191515775705e-06, "loss": 0.6321, "step": 13953 }, { "epoch": 0.16683604538552588, "grad_norm": 2.2312934398651123, "learning_rate": 9.517108505220238e-06, "loss": 0.6834, "step": 13954 }, { "epoch": 0.16684800153038654, "grad_norm": 3.478064775466919, "learning_rate": 9.517025487891351e-06, "loss": 0.6388, "step": 13955 }, { "epoch": 0.1668599576752472, "grad_norm": 2.7169313430786133, "learning_rate": 9.516942463789167e-06, "loss": 0.642, "step": 13956 }, { "epoch": 0.16687191382010785, "grad_norm": 2.6042520999908447, "learning_rate": 9.516859432913813e-06, "loss": 0.6409, "step": 13957 }, { "epoch": 0.16688386996496848, "grad_norm": 1.73836350440979, "learning_rate": 9.516776395265412e-06, "loss": 0.6378, "step": 13958 }, { "epoch": 0.16689582610982914, "grad_norm": 3.4374818801879883, "learning_rate": 9.516693350844088e-06, "loss": 0.666, "step": 13959 }, { "epoch": 0.1669077822546898, "grad_norm": 2.527831792831421, "learning_rate": 9.516610299649967e-06, "loss": 0.5348, "step": 13960 }, { "epoch": 0.16691973839955046, "grad_norm": 2.0945279598236084, "learning_rate": 9.51652724168317e-06, "loss": 0.6139, "step": 13961 }, { "epoch": 0.1669316945444111, "grad_norm": 7.6745195388793945, "learning_rate": 9.516444176943829e-06, "loss": 0.6319, "step": 13962 }, { "epoch": 0.16694365068927175, "grad_norm": 2.5452253818511963, "learning_rate": 9.516361105432062e-06, "loss": 0.6754, "step": 13963 }, { "epoch": 0.1669556068341324, "grad_norm": 2.1416919231414795, "learning_rate": 9.516278027147994e-06, "loss": 0.6251, "step": 13964 }, { "epoch": 0.16696756297899307, "grad_norm": 2.6967756748199463, "learning_rate": 9.516194942091752e-06, "loss": 0.6625, "step": 13965 }, { "epoch": 0.1669795191238537, "grad_norm": 1.6779711246490479, "learning_rate": 9.51611185026346e-06, "loss": 0.5144, "step": 13966 }, { "epoch": 0.16699147526871436, "grad_norm": 3.5858168601989746, "learning_rate": 9.516028751663243e-06, "loss": 0.6436, "step": 13967 }, { "epoch": 0.16700343141357502, "grad_norm": 2.0534632205963135, "learning_rate": 9.515945646291222e-06, "loss": 0.6085, "step": 13968 }, { "epoch": 0.16701538755843565, "grad_norm": 2.54337739944458, "learning_rate": 9.515862534147526e-06, "loss": 0.5954, "step": 13969 }, { "epoch": 0.1670273437032963, "grad_norm": 3.3721890449523926, "learning_rate": 9.515779415232278e-06, "loss": 0.6483, "step": 13970 }, { "epoch": 0.16703929984815696, "grad_norm": 1.8416104316711426, "learning_rate": 9.515696289545604e-06, "loss": 0.4993, "step": 13971 }, { "epoch": 0.16705125599301762, "grad_norm": 16.86424446105957, "learning_rate": 9.515613157087627e-06, "loss": 0.7009, "step": 13972 }, { "epoch": 0.16706321213787825, "grad_norm": 2.6225807666778564, "learning_rate": 9.515530017858473e-06, "loss": 0.6137, "step": 13973 }, { "epoch": 0.1670751682827389, "grad_norm": 4.3905930519104, "learning_rate": 9.515446871858263e-06, "loss": 0.654, "step": 13974 }, { "epoch": 0.16708712442759957, "grad_norm": 2.5810678005218506, "learning_rate": 9.515363719087126e-06, "loss": 0.6332, "step": 13975 }, { "epoch": 0.16709908057246023, "grad_norm": 5.3625006675720215, "learning_rate": 9.515280559545183e-06, "loss": 0.6576, "step": 13976 }, { "epoch": 0.16711103671732086, "grad_norm": 1.3592371940612793, "learning_rate": 9.515197393232562e-06, "loss": 0.5766, "step": 13977 }, { "epoch": 0.16712299286218152, "grad_norm": 4.148369312286377, "learning_rate": 9.515114220149387e-06, "loss": 0.5598, "step": 13978 }, { "epoch": 0.16713494900704218, "grad_norm": 3.1380629539489746, "learning_rate": 9.515031040295783e-06, "loss": 0.7036, "step": 13979 }, { "epoch": 0.1671469051519028, "grad_norm": 1.7700743675231934, "learning_rate": 9.514947853671872e-06, "loss": 0.602, "step": 13980 }, { "epoch": 0.16715886129676347, "grad_norm": 5.6679792404174805, "learning_rate": 9.514864660277782e-06, "loss": 0.6874, "step": 13981 }, { "epoch": 0.16717081744162413, "grad_norm": 2.6858203411102295, "learning_rate": 9.514781460113635e-06, "loss": 0.7076, "step": 13982 }, { "epoch": 0.16718277358648478, "grad_norm": 2.0650486946105957, "learning_rate": 9.51469825317956e-06, "loss": 0.5985, "step": 13983 }, { "epoch": 0.16719472973134542, "grad_norm": 2.1386446952819824, "learning_rate": 9.514615039475677e-06, "loss": 0.5664, "step": 13984 }, { "epoch": 0.16720668587620607, "grad_norm": 2.6284024715423584, "learning_rate": 9.514531819002111e-06, "loss": 0.6788, "step": 13985 }, { "epoch": 0.16721864202106673, "grad_norm": 1.8456861972808838, "learning_rate": 9.514448591758992e-06, "loss": 0.7055, "step": 13986 }, { "epoch": 0.1672305981659274, "grad_norm": 1.853781819343567, "learning_rate": 9.514365357746438e-06, "loss": 0.6352, "step": 13987 }, { "epoch": 0.16724255431078802, "grad_norm": 2.508615732192993, "learning_rate": 9.514282116964578e-06, "loss": 0.5818, "step": 13988 }, { "epoch": 0.16725451045564868, "grad_norm": 3.890178918838501, "learning_rate": 9.514198869413537e-06, "loss": 0.6469, "step": 13989 }, { "epoch": 0.16726646660050934, "grad_norm": 2.285122871398926, "learning_rate": 9.514115615093438e-06, "loss": 0.636, "step": 13990 }, { "epoch": 0.16727842274536997, "grad_norm": 3.2831876277923584, "learning_rate": 9.514032354004408e-06, "loss": 0.686, "step": 13991 }, { "epoch": 0.16729037889023063, "grad_norm": 1.8478987216949463, "learning_rate": 9.513949086146567e-06, "loss": 0.5722, "step": 13992 }, { "epoch": 0.1673023350350913, "grad_norm": 6.436714172363281, "learning_rate": 9.513865811520047e-06, "loss": 0.7373, "step": 13993 }, { "epoch": 0.16731429117995195, "grad_norm": 2.6602306365966797, "learning_rate": 9.513782530124968e-06, "loss": 0.6888, "step": 13994 }, { "epoch": 0.16732624732481258, "grad_norm": 2.3817925453186035, "learning_rate": 9.513699241961455e-06, "loss": 0.6825, "step": 13995 }, { "epoch": 0.16733820346967324, "grad_norm": 2.477567195892334, "learning_rate": 9.513615947029635e-06, "loss": 0.5702, "step": 13996 }, { "epoch": 0.1673501596145339, "grad_norm": 2.9678573608398438, "learning_rate": 9.51353264532963e-06, "loss": 0.669, "step": 13997 }, { "epoch": 0.16736211575939455, "grad_norm": 3.9383785724639893, "learning_rate": 9.51344933686157e-06, "loss": 0.6117, "step": 13998 }, { "epoch": 0.16737407190425518, "grad_norm": 2.4566309452056885, "learning_rate": 9.513366021625574e-06, "loss": 0.624, "step": 13999 }, { "epoch": 0.16738602804911584, "grad_norm": 12.79472827911377, "learning_rate": 9.51328269962177e-06, "loss": 0.6171, "step": 14000 }, { "epoch": 0.1673979841939765, "grad_norm": 2.325732946395874, "learning_rate": 9.513199370850284e-06, "loss": 0.5447, "step": 14001 }, { "epoch": 0.16740994033883713, "grad_norm": 2.5879592895507812, "learning_rate": 9.513116035311239e-06, "loss": 0.7312, "step": 14002 }, { "epoch": 0.1674218964836978, "grad_norm": 2.1552672386169434, "learning_rate": 9.51303269300476e-06, "loss": 0.755, "step": 14003 }, { "epoch": 0.16743385262855845, "grad_norm": 3.4678001403808594, "learning_rate": 9.512949343930973e-06, "loss": 0.6979, "step": 14004 }, { "epoch": 0.1674458087734191, "grad_norm": 2.2565882205963135, "learning_rate": 9.51286598809e-06, "loss": 0.6019, "step": 14005 }, { "epoch": 0.16745776491827974, "grad_norm": 2.3119633197784424, "learning_rate": 9.512782625481972e-06, "loss": 0.5956, "step": 14006 }, { "epoch": 0.1674697210631404, "grad_norm": 1.8717803955078125, "learning_rate": 9.512699256107008e-06, "loss": 0.5935, "step": 14007 }, { "epoch": 0.16748167720800106, "grad_norm": 1.615787386894226, "learning_rate": 9.512615879965236e-06, "loss": 0.6155, "step": 14008 }, { "epoch": 0.16749363335286171, "grad_norm": 3.1622939109802246, "learning_rate": 9.51253249705678e-06, "loss": 0.6094, "step": 14009 }, { "epoch": 0.16750558949772235, "grad_norm": 1.5267523527145386, "learning_rate": 9.512449107381766e-06, "loss": 0.5901, "step": 14010 }, { "epoch": 0.167517545642583, "grad_norm": 1.9793448448181152, "learning_rate": 9.51236571094032e-06, "loss": 0.677, "step": 14011 }, { "epoch": 0.16752950178744366, "grad_norm": 2.5144622325897217, "learning_rate": 9.512282307732565e-06, "loss": 0.5971, "step": 14012 }, { "epoch": 0.1675414579323043, "grad_norm": 1.369905710220337, "learning_rate": 9.512198897758626e-06, "loss": 0.6104, "step": 14013 }, { "epoch": 0.16755341407716495, "grad_norm": 2.058091878890991, "learning_rate": 9.512115481018628e-06, "loss": 0.6075, "step": 14014 }, { "epoch": 0.1675653702220256, "grad_norm": 3.745636224746704, "learning_rate": 9.512032057512697e-06, "loss": 0.6567, "step": 14015 }, { "epoch": 0.16757732636688627, "grad_norm": 49.20801544189453, "learning_rate": 9.51194862724096e-06, "loss": 0.6508, "step": 14016 }, { "epoch": 0.1675892825117469, "grad_norm": 1.5362683534622192, "learning_rate": 9.511865190203537e-06, "loss": 0.5759, "step": 14017 }, { "epoch": 0.16760123865660756, "grad_norm": 1.901769995689392, "learning_rate": 9.511781746400557e-06, "loss": 0.5836, "step": 14018 }, { "epoch": 0.16761319480146822, "grad_norm": 2.385716199874878, "learning_rate": 9.511698295832144e-06, "loss": 0.6754, "step": 14019 }, { "epoch": 0.16762515094632888, "grad_norm": 2.320498466491699, "learning_rate": 9.511614838498423e-06, "loss": 0.6131, "step": 14020 }, { "epoch": 0.1676371070911895, "grad_norm": 2.8772518634796143, "learning_rate": 9.511531374399523e-06, "loss": 0.6614, "step": 14021 }, { "epoch": 0.16764906323605017, "grad_norm": 1.8239253759384155, "learning_rate": 9.511447903535563e-06, "loss": 0.6402, "step": 14022 }, { "epoch": 0.16766101938091083, "grad_norm": 2.874986171722412, "learning_rate": 9.51136442590667e-06, "loss": 0.6502, "step": 14023 }, { "epoch": 0.16767297552577148, "grad_norm": 2.296414852142334, "learning_rate": 9.511280941512971e-06, "loss": 0.5449, "step": 14024 }, { "epoch": 0.16768493167063211, "grad_norm": 2.1214425563812256, "learning_rate": 9.51119745035459e-06, "loss": 0.4584, "step": 14025 }, { "epoch": 0.16769688781549277, "grad_norm": 1.7174928188323975, "learning_rate": 9.511113952431652e-06, "loss": 0.6415, "step": 14026 }, { "epoch": 0.16770884396035343, "grad_norm": 2.114816904067993, "learning_rate": 9.511030447744284e-06, "loss": 0.5374, "step": 14027 }, { "epoch": 0.16772080010521406, "grad_norm": 10.599104881286621, "learning_rate": 9.510946936292608e-06, "loss": 0.573, "step": 14028 }, { "epoch": 0.16773275625007472, "grad_norm": 2.067936658859253, "learning_rate": 9.510863418076753e-06, "loss": 0.662, "step": 14029 }, { "epoch": 0.16774471239493538, "grad_norm": 4.886773586273193, "learning_rate": 9.510779893096842e-06, "loss": 0.5127, "step": 14030 }, { "epoch": 0.16775666853979604, "grad_norm": 2.4771265983581543, "learning_rate": 9.510696361353e-06, "loss": 0.7538, "step": 14031 }, { "epoch": 0.16776862468465667, "grad_norm": 5.16243839263916, "learning_rate": 9.510612822845352e-06, "loss": 0.6054, "step": 14032 }, { "epoch": 0.16778058082951733, "grad_norm": 2.3071441650390625, "learning_rate": 9.510529277574024e-06, "loss": 0.6588, "step": 14033 }, { "epoch": 0.167792536974378, "grad_norm": 2.397675037384033, "learning_rate": 9.510445725539142e-06, "loss": 0.6387, "step": 14034 }, { "epoch": 0.16780449311923865, "grad_norm": 2.5507090091705322, "learning_rate": 9.51036216674083e-06, "loss": 0.4936, "step": 14035 }, { "epoch": 0.16781644926409928, "grad_norm": 1.8236409425735474, "learning_rate": 9.510278601179214e-06, "loss": 0.596, "step": 14036 }, { "epoch": 0.16782840540895994, "grad_norm": 2.5693325996398926, "learning_rate": 9.51019502885442e-06, "loss": 0.624, "step": 14037 }, { "epoch": 0.1678403615538206, "grad_norm": 2.4606192111968994, "learning_rate": 9.510111449766573e-06, "loss": 0.595, "step": 14038 }, { "epoch": 0.16785231769868122, "grad_norm": 2.437415838241577, "learning_rate": 9.510027863915797e-06, "loss": 0.6852, "step": 14039 }, { "epoch": 0.16786427384354188, "grad_norm": 2.5914015769958496, "learning_rate": 9.509944271302218e-06, "loss": 0.5871, "step": 14040 }, { "epoch": 0.16787622998840254, "grad_norm": 3.368269205093384, "learning_rate": 9.509860671925962e-06, "loss": 0.5871, "step": 14041 }, { "epoch": 0.1678881861332632, "grad_norm": 5.099209308624268, "learning_rate": 9.509777065787154e-06, "loss": 0.617, "step": 14042 }, { "epoch": 0.16790014227812383, "grad_norm": 2.0175509452819824, "learning_rate": 9.509693452885919e-06, "loss": 0.7086, "step": 14043 }, { "epoch": 0.1679120984229845, "grad_norm": 2.1182138919830322, "learning_rate": 9.509609833222383e-06, "loss": 0.7028, "step": 14044 }, { "epoch": 0.16792405456784515, "grad_norm": 2.199650287628174, "learning_rate": 9.50952620679667e-06, "loss": 0.6083, "step": 14045 }, { "epoch": 0.1679360107127058, "grad_norm": 2.0278940200805664, "learning_rate": 9.509442573608907e-06, "loss": 0.6747, "step": 14046 }, { "epoch": 0.16794796685756644, "grad_norm": 1.8719258308410645, "learning_rate": 9.509358933659218e-06, "loss": 0.4918, "step": 14047 }, { "epoch": 0.1679599230024271, "grad_norm": 2.3359408378601074, "learning_rate": 9.509275286947731e-06, "loss": 0.7203, "step": 14048 }, { "epoch": 0.16797187914728776, "grad_norm": 2.2625889778137207, "learning_rate": 9.509191633474568e-06, "loss": 0.6886, "step": 14049 }, { "epoch": 0.1679838352921484, "grad_norm": 1.7183325290679932, "learning_rate": 9.509107973239858e-06, "loss": 0.6375, "step": 14050 }, { "epoch": 0.16799579143700905, "grad_norm": 2.2581982612609863, "learning_rate": 9.509024306243723e-06, "loss": 0.5943, "step": 14051 }, { "epoch": 0.1680077475818697, "grad_norm": 2.084716796875, "learning_rate": 9.50894063248629e-06, "loss": 0.5711, "step": 14052 }, { "epoch": 0.16801970372673036, "grad_norm": 1.6933403015136719, "learning_rate": 9.508856951967684e-06, "loss": 0.668, "step": 14053 }, { "epoch": 0.168031659871591, "grad_norm": 2.4393856525421143, "learning_rate": 9.508773264688032e-06, "loss": 0.6727, "step": 14054 }, { "epoch": 0.16804361601645165, "grad_norm": 2.859192132949829, "learning_rate": 9.50868957064746e-06, "loss": 0.5472, "step": 14055 }, { "epoch": 0.1680555721613123, "grad_norm": 3.6308493614196777, "learning_rate": 9.508605869846088e-06, "loss": 0.6237, "step": 14056 }, { "epoch": 0.16806752830617297, "grad_norm": 1.7261338233947754, "learning_rate": 9.508522162284048e-06, "loss": 0.6293, "step": 14057 }, { "epoch": 0.1680794844510336, "grad_norm": 3.0163965225219727, "learning_rate": 9.508438447961463e-06, "loss": 0.6694, "step": 14058 }, { "epoch": 0.16809144059589426, "grad_norm": 1.6719331741333008, "learning_rate": 9.508354726878458e-06, "loss": 0.4928, "step": 14059 }, { "epoch": 0.16810339674075492, "grad_norm": 3.224929094314575, "learning_rate": 9.50827099903516e-06, "loss": 0.6962, "step": 14060 }, { "epoch": 0.16811535288561555, "grad_norm": 3.9931235313415527, "learning_rate": 9.508187264431695e-06, "loss": 0.6032, "step": 14061 }, { "epoch": 0.1681273090304762, "grad_norm": 2.57920503616333, "learning_rate": 9.508103523068184e-06, "loss": 0.5841, "step": 14062 }, { "epoch": 0.16813926517533687, "grad_norm": 2.1233580112457275, "learning_rate": 9.508019774944756e-06, "loss": 0.6119, "step": 14063 }, { "epoch": 0.16815122132019752, "grad_norm": 2.190854787826538, "learning_rate": 9.507936020061538e-06, "loss": 0.6573, "step": 14064 }, { "epoch": 0.16816317746505816, "grad_norm": 2.452965021133423, "learning_rate": 9.507852258418652e-06, "loss": 0.6532, "step": 14065 }, { "epoch": 0.16817513360991881, "grad_norm": 1.8880414962768555, "learning_rate": 9.507768490016227e-06, "loss": 0.6323, "step": 14066 }, { "epoch": 0.16818708975477947, "grad_norm": 2.6920154094696045, "learning_rate": 9.507684714854388e-06, "loss": 0.588, "step": 14067 }, { "epoch": 0.16819904589964013, "grad_norm": 2.335038185119629, "learning_rate": 9.507600932933259e-06, "loss": 0.6045, "step": 14068 }, { "epoch": 0.16821100204450076, "grad_norm": 1.789050817489624, "learning_rate": 9.507517144252965e-06, "loss": 0.6657, "step": 14069 }, { "epoch": 0.16822295818936142, "grad_norm": 2.1537787914276123, "learning_rate": 9.507433348813635e-06, "loss": 0.509, "step": 14070 }, { "epoch": 0.16823491433422208, "grad_norm": 2.855807304382324, "learning_rate": 9.50734954661539e-06, "loss": 0.6658, "step": 14071 }, { "epoch": 0.1682468704790827, "grad_norm": 1.9645384550094604, "learning_rate": 9.50726573765836e-06, "loss": 0.6378, "step": 14072 }, { "epoch": 0.16825882662394337, "grad_norm": 2.3907716274261475, "learning_rate": 9.50718192194267e-06, "loss": 0.6482, "step": 14073 }, { "epoch": 0.16827078276880403, "grad_norm": 1.7525492906570435, "learning_rate": 9.507098099468443e-06, "loss": 0.6211, "step": 14074 }, { "epoch": 0.1682827389136647, "grad_norm": 3.027052640914917, "learning_rate": 9.507014270235806e-06, "loss": 0.5864, "step": 14075 }, { "epoch": 0.16829469505852532, "grad_norm": 1.9856243133544922, "learning_rate": 9.506930434244887e-06, "loss": 0.5993, "step": 14076 }, { "epoch": 0.16830665120338598, "grad_norm": 2.2680959701538086, "learning_rate": 9.506846591495809e-06, "loss": 0.6389, "step": 14077 }, { "epoch": 0.16831860734824663, "grad_norm": 1.8874393701553345, "learning_rate": 9.506762741988698e-06, "loss": 0.5854, "step": 14078 }, { "epoch": 0.1683305634931073, "grad_norm": 2.2649636268615723, "learning_rate": 9.50667888572368e-06, "loss": 0.5464, "step": 14079 }, { "epoch": 0.16834251963796792, "grad_norm": 3.670583963394165, "learning_rate": 9.506595022700882e-06, "loss": 0.6279, "step": 14080 }, { "epoch": 0.16835447578282858, "grad_norm": 2.8288772106170654, "learning_rate": 9.50651115292043e-06, "loss": 0.6625, "step": 14081 }, { "epoch": 0.16836643192768924, "grad_norm": 1.6678646802902222, "learning_rate": 9.506427276382446e-06, "loss": 0.577, "step": 14082 }, { "epoch": 0.1683783880725499, "grad_norm": 1.4231895208358765, "learning_rate": 9.506343393087059e-06, "loss": 0.6657, "step": 14083 }, { "epoch": 0.16839034421741053, "grad_norm": 2.037174701690674, "learning_rate": 9.506259503034394e-06, "loss": 0.6764, "step": 14084 }, { "epoch": 0.1684023003622712, "grad_norm": 4.144456386566162, "learning_rate": 9.506175606224578e-06, "loss": 0.639, "step": 14085 }, { "epoch": 0.16841425650713185, "grad_norm": 2.77850341796875, "learning_rate": 9.506091702657734e-06, "loss": 0.6821, "step": 14086 }, { "epoch": 0.16842621265199248, "grad_norm": 6.229855537414551, "learning_rate": 9.506007792333991e-06, "loss": 0.6054, "step": 14087 }, { "epoch": 0.16843816879685314, "grad_norm": 19.64421844482422, "learning_rate": 9.50592387525347e-06, "loss": 0.5565, "step": 14088 }, { "epoch": 0.1684501249417138, "grad_norm": 1.82167387008667, "learning_rate": 9.505839951416305e-06, "loss": 0.5976, "step": 14089 }, { "epoch": 0.16846208108657446, "grad_norm": 2.5124199390411377, "learning_rate": 9.505756020822614e-06, "loss": 0.7427, "step": 14090 }, { "epoch": 0.1684740372314351, "grad_norm": 1.629289984703064, "learning_rate": 9.505672083472527e-06, "loss": 0.6652, "step": 14091 }, { "epoch": 0.16848599337629575, "grad_norm": 1.757936954498291, "learning_rate": 9.505588139366168e-06, "loss": 0.6683, "step": 14092 }, { "epoch": 0.1684979495211564, "grad_norm": 3.380011796951294, "learning_rate": 9.505504188503664e-06, "loss": 0.6716, "step": 14093 }, { "epoch": 0.16850990566601706, "grad_norm": 5.412971019744873, "learning_rate": 9.505420230885139e-06, "loss": 0.5765, "step": 14094 }, { "epoch": 0.1685218618108777, "grad_norm": 7.246378421783447, "learning_rate": 9.505336266510723e-06, "loss": 0.6666, "step": 14095 }, { "epoch": 0.16853381795573835, "grad_norm": 2.101975679397583, "learning_rate": 9.505252295380537e-06, "loss": 0.5722, "step": 14096 }, { "epoch": 0.168545774100599, "grad_norm": 2.222259759902954, "learning_rate": 9.505168317494709e-06, "loss": 0.561, "step": 14097 }, { "epoch": 0.16855773024545964, "grad_norm": 7.325465679168701, "learning_rate": 9.505084332853366e-06, "loss": 0.4937, "step": 14098 }, { "epoch": 0.1685696863903203, "grad_norm": 3.3890841007232666, "learning_rate": 9.505000341456635e-06, "loss": 0.5656, "step": 14099 }, { "epoch": 0.16858164253518096, "grad_norm": 1.889986515045166, "learning_rate": 9.504916343304637e-06, "loss": 0.6446, "step": 14100 }, { "epoch": 0.16859359868004162, "grad_norm": 1.8496067523956299, "learning_rate": 9.504832338397503e-06, "loss": 0.6162, "step": 14101 }, { "epoch": 0.16860555482490225, "grad_norm": 1.9107176065444946, "learning_rate": 9.504748326735355e-06, "loss": 0.6616, "step": 14102 }, { "epoch": 0.1686175109697629, "grad_norm": 3.1967945098876953, "learning_rate": 9.504664308318321e-06, "loss": 0.5828, "step": 14103 }, { "epoch": 0.16862946711462357, "grad_norm": 10.007894515991211, "learning_rate": 9.504580283146528e-06, "loss": 0.6016, "step": 14104 }, { "epoch": 0.16864142325948422, "grad_norm": 1.6935553550720215, "learning_rate": 9.504496251220102e-06, "loss": 0.59, "step": 14105 }, { "epoch": 0.16865337940434486, "grad_norm": 2.8987278938293457, "learning_rate": 9.504412212539165e-06, "loss": 0.6014, "step": 14106 }, { "epoch": 0.16866533554920551, "grad_norm": 1.7406456470489502, "learning_rate": 9.504328167103847e-06, "loss": 0.699, "step": 14107 }, { "epoch": 0.16867729169406617, "grad_norm": 3.2265214920043945, "learning_rate": 9.504244114914273e-06, "loss": 0.5912, "step": 14108 }, { "epoch": 0.1686892478389268, "grad_norm": 7.677227020263672, "learning_rate": 9.504160055970569e-06, "loss": 0.5908, "step": 14109 }, { "epoch": 0.16870120398378746, "grad_norm": 2.5209767818450928, "learning_rate": 9.50407599027286e-06, "loss": 0.6432, "step": 14110 }, { "epoch": 0.16871316012864812, "grad_norm": 2.498992919921875, "learning_rate": 9.503991917821276e-06, "loss": 0.6975, "step": 14111 }, { "epoch": 0.16872511627350878, "grad_norm": 2.180734395980835, "learning_rate": 9.503907838615938e-06, "loss": 0.6423, "step": 14112 }, { "epoch": 0.1687370724183694, "grad_norm": 3.470614194869995, "learning_rate": 9.503823752656973e-06, "loss": 0.6061, "step": 14113 }, { "epoch": 0.16874902856323007, "grad_norm": 9.231361389160156, "learning_rate": 9.503739659944511e-06, "loss": 0.6675, "step": 14114 }, { "epoch": 0.16876098470809073, "grad_norm": 2.26650071144104, "learning_rate": 9.503655560478673e-06, "loss": 0.6669, "step": 14115 }, { "epoch": 0.1687729408529514, "grad_norm": 2.0718774795532227, "learning_rate": 9.50357145425959e-06, "loss": 0.6647, "step": 14116 }, { "epoch": 0.16878489699781202, "grad_norm": 1.462768793106079, "learning_rate": 9.503487341287383e-06, "loss": 0.6056, "step": 14117 }, { "epoch": 0.16879685314267268, "grad_norm": 2.4935569763183594, "learning_rate": 9.503403221562182e-06, "loss": 0.6148, "step": 14118 }, { "epoch": 0.16880880928753333, "grad_norm": 1.9837676286697388, "learning_rate": 9.503319095084112e-06, "loss": 0.6222, "step": 14119 }, { "epoch": 0.16882076543239397, "grad_norm": 1.5494184494018555, "learning_rate": 9.503234961853298e-06, "loss": 0.551, "step": 14120 }, { "epoch": 0.16883272157725462, "grad_norm": 4.478752613067627, "learning_rate": 9.503150821869869e-06, "loss": 0.6525, "step": 14121 }, { "epoch": 0.16884467772211528, "grad_norm": 1.688900351524353, "learning_rate": 9.503066675133947e-06, "loss": 0.6405, "step": 14122 }, { "epoch": 0.16885663386697594, "grad_norm": 1.7396280765533447, "learning_rate": 9.502982521645663e-06, "loss": 0.6561, "step": 14123 }, { "epoch": 0.16886859001183657, "grad_norm": 4.9405717849731445, "learning_rate": 9.502898361405138e-06, "loss": 0.6528, "step": 14124 }, { "epoch": 0.16888054615669723, "grad_norm": 3.3906617164611816, "learning_rate": 9.502814194412503e-06, "loss": 0.5657, "step": 14125 }, { "epoch": 0.1688925023015579, "grad_norm": 1.4963126182556152, "learning_rate": 9.50273002066788e-06, "loss": 0.5868, "step": 14126 }, { "epoch": 0.16890445844641855, "grad_norm": 1.876818060874939, "learning_rate": 9.5026458401714e-06, "loss": 0.639, "step": 14127 }, { "epoch": 0.16891641459127918, "grad_norm": 1.4090964794158936, "learning_rate": 9.502561652923185e-06, "loss": 0.6275, "step": 14128 }, { "epoch": 0.16892837073613984, "grad_norm": 2.8260419368743896, "learning_rate": 9.502477458923362e-06, "loss": 0.5241, "step": 14129 }, { "epoch": 0.1689403268810005, "grad_norm": 1.9975794553756714, "learning_rate": 9.502393258172061e-06, "loss": 0.5425, "step": 14130 }, { "epoch": 0.16895228302586116, "grad_norm": 1.530713438987732, "learning_rate": 9.502309050669403e-06, "loss": 0.6119, "step": 14131 }, { "epoch": 0.16896423917072179, "grad_norm": 1.5424247980117798, "learning_rate": 9.502224836415517e-06, "loss": 0.6227, "step": 14132 }, { "epoch": 0.16897619531558244, "grad_norm": 1.6278772354125977, "learning_rate": 9.502140615410529e-06, "loss": 0.6177, "step": 14133 }, { "epoch": 0.1689881514604431, "grad_norm": 1.1255854368209839, "learning_rate": 9.502056387654565e-06, "loss": 0.485, "step": 14134 }, { "epoch": 0.16900010760530373, "grad_norm": 2.2336370944976807, "learning_rate": 9.501972153147751e-06, "loss": 0.7044, "step": 14135 }, { "epoch": 0.1690120637501644, "grad_norm": 2.1459779739379883, "learning_rate": 9.501887911890215e-06, "loss": 0.747, "step": 14136 }, { "epoch": 0.16902401989502505, "grad_norm": 1.7864960432052612, "learning_rate": 9.50180366388208e-06, "loss": 0.6154, "step": 14137 }, { "epoch": 0.1690359760398857, "grad_norm": 2.508791923522949, "learning_rate": 9.501719409123476e-06, "loss": 0.6317, "step": 14138 }, { "epoch": 0.16904793218474634, "grad_norm": 2.7955400943756104, "learning_rate": 9.501635147614527e-06, "loss": 0.4866, "step": 14139 }, { "epoch": 0.169059888329607, "grad_norm": 1.773512601852417, "learning_rate": 9.50155087935536e-06, "loss": 0.691, "step": 14140 }, { "epoch": 0.16907184447446766, "grad_norm": 1.747045636177063, "learning_rate": 9.501466604346103e-06, "loss": 0.6041, "step": 14141 }, { "epoch": 0.16908380061932832, "grad_norm": 4.318785667419434, "learning_rate": 9.501382322586881e-06, "loss": 0.6322, "step": 14142 }, { "epoch": 0.16909575676418895, "grad_norm": 3.02591609954834, "learning_rate": 9.501298034077819e-06, "loss": 0.6124, "step": 14143 }, { "epoch": 0.1691077129090496, "grad_norm": 4.016925811767578, "learning_rate": 9.501213738819044e-06, "loss": 0.5751, "step": 14144 }, { "epoch": 0.16911966905391027, "grad_norm": 2.2396600246429443, "learning_rate": 9.501129436810683e-06, "loss": 0.5936, "step": 14145 }, { "epoch": 0.1691316251987709, "grad_norm": 8.04814338684082, "learning_rate": 9.501045128052863e-06, "loss": 0.5154, "step": 14146 }, { "epoch": 0.16914358134363155, "grad_norm": 1.533772587776184, "learning_rate": 9.50096081254571e-06, "loss": 0.63, "step": 14147 }, { "epoch": 0.1691555374884922, "grad_norm": 2.148526906967163, "learning_rate": 9.50087649028935e-06, "loss": 0.5557, "step": 14148 }, { "epoch": 0.16916749363335287, "grad_norm": 5.684652805328369, "learning_rate": 9.50079216128391e-06, "loss": 0.708, "step": 14149 }, { "epoch": 0.1691794497782135, "grad_norm": 2.2752647399902344, "learning_rate": 9.500707825529516e-06, "loss": 0.5327, "step": 14150 }, { "epoch": 0.16919140592307416, "grad_norm": 1.7840412855148315, "learning_rate": 9.500623483026294e-06, "loss": 0.5352, "step": 14151 }, { "epoch": 0.16920336206793482, "grad_norm": 4.737813472747803, "learning_rate": 9.500539133774373e-06, "loss": 0.7203, "step": 14152 }, { "epoch": 0.16921531821279548, "grad_norm": 1.8959637880325317, "learning_rate": 9.500454777773876e-06, "loss": 0.6539, "step": 14153 }, { "epoch": 0.1692272743576561, "grad_norm": 4.1274285316467285, "learning_rate": 9.50037041502493e-06, "loss": 0.5931, "step": 14154 }, { "epoch": 0.16923923050251677, "grad_norm": 2.228351593017578, "learning_rate": 9.500286045527665e-06, "loss": 0.6287, "step": 14155 }, { "epoch": 0.16925118664737743, "grad_norm": 2.108299493789673, "learning_rate": 9.500201669282204e-06, "loss": 0.5803, "step": 14156 }, { "epoch": 0.16926314279223806, "grad_norm": 6.271249294281006, "learning_rate": 9.500117286288675e-06, "loss": 0.612, "step": 14157 }, { "epoch": 0.16927509893709872, "grad_norm": 1.9416110515594482, "learning_rate": 9.500032896547205e-06, "loss": 0.6619, "step": 14158 }, { "epoch": 0.16928705508195938, "grad_norm": 2.230675458908081, "learning_rate": 9.499948500057916e-06, "loss": 0.6482, "step": 14159 }, { "epoch": 0.16929901122682003, "grad_norm": 2.648787260055542, "learning_rate": 9.499864096820941e-06, "loss": 0.6533, "step": 14160 }, { "epoch": 0.16931096737168067, "grad_norm": 2.3337502479553223, "learning_rate": 9.499779686836404e-06, "loss": 0.6045, "step": 14161 }, { "epoch": 0.16932292351654132, "grad_norm": 2.2385733127593994, "learning_rate": 9.49969527010443e-06, "loss": 0.5457, "step": 14162 }, { "epoch": 0.16933487966140198, "grad_norm": 1.9508439302444458, "learning_rate": 9.499610846625149e-06, "loss": 0.5711, "step": 14163 }, { "epoch": 0.16934683580626264, "grad_norm": 5.312036991119385, "learning_rate": 9.499526416398684e-06, "loss": 0.7122, "step": 14164 }, { "epoch": 0.16935879195112327, "grad_norm": 2.1814301013946533, "learning_rate": 9.499441979425164e-06, "loss": 0.6341, "step": 14165 }, { "epoch": 0.16937074809598393, "grad_norm": 1.756547212600708, "learning_rate": 9.499357535704713e-06, "loss": 0.5762, "step": 14166 }, { "epoch": 0.1693827042408446, "grad_norm": 2.3688833713531494, "learning_rate": 9.499273085237461e-06, "loss": 0.5359, "step": 14167 }, { "epoch": 0.16939466038570522, "grad_norm": 1.790259599685669, "learning_rate": 9.499188628023532e-06, "loss": 0.6306, "step": 14168 }, { "epoch": 0.16940661653056588, "grad_norm": 1.539313793182373, "learning_rate": 9.499104164063056e-06, "loss": 0.677, "step": 14169 }, { "epoch": 0.16941857267542654, "grad_norm": 1.5665165185928345, "learning_rate": 9.499019693356155e-06, "loss": 0.5684, "step": 14170 }, { "epoch": 0.1694305288202872, "grad_norm": 4.009651184082031, "learning_rate": 9.498935215902958e-06, "loss": 0.6053, "step": 14171 }, { "epoch": 0.16944248496514783, "grad_norm": 1.7440776824951172, "learning_rate": 9.498850731703592e-06, "loss": 0.5935, "step": 14172 }, { "epoch": 0.16945444111000849, "grad_norm": 7.958326816558838, "learning_rate": 9.498766240758183e-06, "loss": 0.6557, "step": 14173 }, { "epoch": 0.16946639725486914, "grad_norm": 4.423637390136719, "learning_rate": 9.498681743066859e-06, "loss": 0.5517, "step": 14174 }, { "epoch": 0.1694783533997298, "grad_norm": 3.0403361320495605, "learning_rate": 9.498597238629745e-06, "loss": 0.6204, "step": 14175 }, { "epoch": 0.16949030954459043, "grad_norm": 2.0650205612182617, "learning_rate": 9.498512727446969e-06, "loss": 0.6259, "step": 14176 }, { "epoch": 0.1695022656894511, "grad_norm": 6.87672233581543, "learning_rate": 9.498428209518658e-06, "loss": 0.5859, "step": 14177 }, { "epoch": 0.16951422183431175, "grad_norm": 1.9394018650054932, "learning_rate": 9.498343684844935e-06, "loss": 0.6077, "step": 14178 }, { "epoch": 0.16952617797917238, "grad_norm": 1.7680922746658325, "learning_rate": 9.498259153425932e-06, "loss": 0.6831, "step": 14179 }, { "epoch": 0.16953813412403304, "grad_norm": 1.6073118448257446, "learning_rate": 9.498174615261772e-06, "loss": 0.557, "step": 14180 }, { "epoch": 0.1695500902688937, "grad_norm": 7.007025718688965, "learning_rate": 9.498090070352585e-06, "loss": 0.6919, "step": 14181 }, { "epoch": 0.16956204641375436, "grad_norm": 1.600791573524475, "learning_rate": 9.498005518698494e-06, "loss": 0.6499, "step": 14182 }, { "epoch": 0.169574002558615, "grad_norm": 4.756985664367676, "learning_rate": 9.497920960299629e-06, "loss": 0.6289, "step": 14183 }, { "epoch": 0.16958595870347565, "grad_norm": 2.0797457695007324, "learning_rate": 9.497836395156116e-06, "loss": 0.5674, "step": 14184 }, { "epoch": 0.1695979148483363, "grad_norm": 2.8585448265075684, "learning_rate": 9.49775182326808e-06, "loss": 0.629, "step": 14185 }, { "epoch": 0.16960987099319697, "grad_norm": 2.036956787109375, "learning_rate": 9.49766724463565e-06, "loss": 0.5439, "step": 14186 }, { "epoch": 0.1696218271380576, "grad_norm": 2.0226147174835205, "learning_rate": 9.497582659258953e-06, "loss": 0.535, "step": 14187 }, { "epoch": 0.16963378328291825, "grad_norm": 3.1068434715270996, "learning_rate": 9.497498067138113e-06, "loss": 0.5992, "step": 14188 }, { "epoch": 0.1696457394277789, "grad_norm": 3.2609329223632812, "learning_rate": 9.49741346827326e-06, "loss": 0.6491, "step": 14189 }, { "epoch": 0.16965769557263957, "grad_norm": 1.4854007959365845, "learning_rate": 9.497328862664518e-06, "loss": 0.5069, "step": 14190 }, { "epoch": 0.1696696517175002, "grad_norm": 1.775105357170105, "learning_rate": 9.497244250312015e-06, "loss": 0.6709, "step": 14191 }, { "epoch": 0.16968160786236086, "grad_norm": 3.8401379585266113, "learning_rate": 9.49715963121588e-06, "loss": 0.5619, "step": 14192 }, { "epoch": 0.16969356400722152, "grad_norm": 1.5138391256332397, "learning_rate": 9.497075005376238e-06, "loss": 0.5805, "step": 14193 }, { "epoch": 0.16970552015208215, "grad_norm": 3.756685733795166, "learning_rate": 9.496990372793215e-06, "loss": 0.5608, "step": 14194 }, { "epoch": 0.1697174762969428, "grad_norm": 4.77981424331665, "learning_rate": 9.49690573346694e-06, "loss": 0.5767, "step": 14195 }, { "epoch": 0.16972943244180347, "grad_norm": 2.189357042312622, "learning_rate": 9.496821087397538e-06, "loss": 0.6607, "step": 14196 }, { "epoch": 0.16974138858666413, "grad_norm": 1.6031416654586792, "learning_rate": 9.496736434585136e-06, "loss": 0.6038, "step": 14197 }, { "epoch": 0.16975334473152476, "grad_norm": 1.7795113325119019, "learning_rate": 9.496651775029864e-06, "loss": 0.5524, "step": 14198 }, { "epoch": 0.16976530087638542, "grad_norm": 2.421039342880249, "learning_rate": 9.496567108731845e-06, "loss": 0.6105, "step": 14199 }, { "epoch": 0.16977725702124608, "grad_norm": 5.495951175689697, "learning_rate": 9.496482435691209e-06, "loss": 0.6478, "step": 14200 }, { "epoch": 0.16978921316610673, "grad_norm": 2.3582684993743896, "learning_rate": 9.49639775590808e-06, "loss": 0.641, "step": 14201 }, { "epoch": 0.16980116931096736, "grad_norm": 3.393543004989624, "learning_rate": 9.496313069382588e-06, "loss": 0.543, "step": 14202 }, { "epoch": 0.16981312545582802, "grad_norm": 1.5988787412643433, "learning_rate": 9.496228376114857e-06, "loss": 0.5422, "step": 14203 }, { "epoch": 0.16982508160068868, "grad_norm": 3.0632059574127197, "learning_rate": 9.496143676105017e-06, "loss": 0.6206, "step": 14204 }, { "epoch": 0.1698370377455493, "grad_norm": 1.456361174583435, "learning_rate": 9.496058969353194e-06, "loss": 0.641, "step": 14205 }, { "epoch": 0.16984899389040997, "grad_norm": 3.257067918777466, "learning_rate": 9.495974255859513e-06, "loss": 0.7784, "step": 14206 }, { "epoch": 0.16986095003527063, "grad_norm": 6.2045674324035645, "learning_rate": 9.495889535624103e-06, "loss": 0.5622, "step": 14207 }, { "epoch": 0.1698729061801313, "grad_norm": 1.767151951789856, "learning_rate": 9.49580480864709e-06, "loss": 0.4761, "step": 14208 }, { "epoch": 0.16988486232499192, "grad_norm": 2.673333168029785, "learning_rate": 9.495720074928605e-06, "loss": 0.7699, "step": 14209 }, { "epoch": 0.16989681846985258, "grad_norm": 1.8046150207519531, "learning_rate": 9.495635334468769e-06, "loss": 0.6229, "step": 14210 }, { "epoch": 0.16990877461471324, "grad_norm": 3.0882439613342285, "learning_rate": 9.495550587267713e-06, "loss": 0.6384, "step": 14211 }, { "epoch": 0.1699207307595739, "grad_norm": 1.658076286315918, "learning_rate": 9.49546583332556e-06, "loss": 0.6143, "step": 14212 }, { "epoch": 0.16993268690443453, "grad_norm": 4.731967926025391, "learning_rate": 9.495381072642444e-06, "loss": 0.6666, "step": 14213 }, { "epoch": 0.16994464304929519, "grad_norm": 2.6481003761291504, "learning_rate": 9.495296305218486e-06, "loss": 0.6448, "step": 14214 }, { "epoch": 0.16995659919415584, "grad_norm": 1.8711705207824707, "learning_rate": 9.495211531053817e-06, "loss": 0.4854, "step": 14215 }, { "epoch": 0.16996855533901647, "grad_norm": 2.1742050647735596, "learning_rate": 9.49512675014856e-06, "loss": 0.6582, "step": 14216 }, { "epoch": 0.16998051148387713, "grad_norm": 1.6457868814468384, "learning_rate": 9.495041962502844e-06, "loss": 0.6213, "step": 14217 }, { "epoch": 0.1699924676287378, "grad_norm": 2.010882616043091, "learning_rate": 9.4949571681168e-06, "loss": 0.6396, "step": 14218 }, { "epoch": 0.17000442377359845, "grad_norm": 1.6303192377090454, "learning_rate": 9.494872366990548e-06, "loss": 0.6353, "step": 14219 }, { "epoch": 0.17001637991845908, "grad_norm": 1.5075894594192505, "learning_rate": 9.494787559124221e-06, "loss": 0.5589, "step": 14220 }, { "epoch": 0.17002833606331974, "grad_norm": 4.116957187652588, "learning_rate": 9.494702744517943e-06, "loss": 0.611, "step": 14221 }, { "epoch": 0.1700402922081804, "grad_norm": 2.1016507148742676, "learning_rate": 9.494617923171843e-06, "loss": 0.6112, "step": 14222 }, { "epoch": 0.17005224835304106, "grad_norm": 1.2756989002227783, "learning_rate": 9.494533095086047e-06, "loss": 0.6326, "step": 14223 }, { "epoch": 0.1700642044979017, "grad_norm": 3.212646484375, "learning_rate": 9.494448260260685e-06, "loss": 0.6184, "step": 14224 }, { "epoch": 0.17007616064276235, "grad_norm": 3.389009475708008, "learning_rate": 9.49436341869588e-06, "loss": 0.6899, "step": 14225 }, { "epoch": 0.170088116787623, "grad_norm": 1.373570203781128, "learning_rate": 9.49427857039176e-06, "loss": 0.5986, "step": 14226 }, { "epoch": 0.17010007293248364, "grad_norm": 2.5718250274658203, "learning_rate": 9.494193715348454e-06, "loss": 0.5786, "step": 14227 }, { "epoch": 0.1701120290773443, "grad_norm": 4.63755464553833, "learning_rate": 9.494108853566088e-06, "loss": 0.6508, "step": 14228 }, { "epoch": 0.17012398522220495, "grad_norm": 3.944671392440796, "learning_rate": 9.494023985044791e-06, "loss": 0.6092, "step": 14229 }, { "epoch": 0.1701359413670656, "grad_norm": 1.8035001754760742, "learning_rate": 9.493939109784688e-06, "loss": 0.5727, "step": 14230 }, { "epoch": 0.17014789751192624, "grad_norm": 1.68611478805542, "learning_rate": 9.493854227785911e-06, "loss": 0.5848, "step": 14231 }, { "epoch": 0.1701598536567869, "grad_norm": 2.480071544647217, "learning_rate": 9.49376933904858e-06, "loss": 0.6177, "step": 14232 }, { "epoch": 0.17017180980164756, "grad_norm": 1.6056327819824219, "learning_rate": 9.493684443572826e-06, "loss": 0.5501, "step": 14233 }, { "epoch": 0.17018376594650822, "grad_norm": 2.71897292137146, "learning_rate": 9.493599541358776e-06, "loss": 0.6809, "step": 14234 }, { "epoch": 0.17019572209136885, "grad_norm": 3.689035177230835, "learning_rate": 9.493514632406558e-06, "loss": 0.6972, "step": 14235 }, { "epoch": 0.1702076782362295, "grad_norm": 2.2911412715911865, "learning_rate": 9.4934297167163e-06, "loss": 0.6608, "step": 14236 }, { "epoch": 0.17021963438109017, "grad_norm": 1.8271245956420898, "learning_rate": 9.493344794288127e-06, "loss": 0.6346, "step": 14237 }, { "epoch": 0.1702315905259508, "grad_norm": 2.3513567447662354, "learning_rate": 9.493259865122168e-06, "loss": 0.6339, "step": 14238 }, { "epoch": 0.17024354667081146, "grad_norm": 2.1711387634277344, "learning_rate": 9.49317492921855e-06, "loss": 0.6988, "step": 14239 }, { "epoch": 0.17025550281567212, "grad_norm": 1.5284984111785889, "learning_rate": 9.4930899865774e-06, "loss": 0.5856, "step": 14240 }, { "epoch": 0.17026745896053277, "grad_norm": 3.0707406997680664, "learning_rate": 9.493005037198845e-06, "loss": 0.6755, "step": 14241 }, { "epoch": 0.1702794151053934, "grad_norm": 1.519127607345581, "learning_rate": 9.492920081083013e-06, "loss": 0.6411, "step": 14242 }, { "epoch": 0.17029137125025406, "grad_norm": 3.3506977558135986, "learning_rate": 9.492835118230034e-06, "loss": 0.5466, "step": 14243 }, { "epoch": 0.17030332739511472, "grad_norm": 2.5225913524627686, "learning_rate": 9.492750148640028e-06, "loss": 0.5706, "step": 14244 }, { "epoch": 0.17031528353997538, "grad_norm": 1.832350492477417, "learning_rate": 9.49266517231313e-06, "loss": 0.6466, "step": 14245 }, { "epoch": 0.170327239684836, "grad_norm": 3.882506847381592, "learning_rate": 9.492580189249464e-06, "loss": 0.6549, "step": 14246 }, { "epoch": 0.17033919582969667, "grad_norm": 1.912461757659912, "learning_rate": 9.492495199449158e-06, "loss": 0.6542, "step": 14247 }, { "epoch": 0.17035115197455733, "grad_norm": 1.6603221893310547, "learning_rate": 9.492410202912341e-06, "loss": 0.6251, "step": 14248 }, { "epoch": 0.170363108119418, "grad_norm": 8.093099594116211, "learning_rate": 9.492325199639137e-06, "loss": 0.6636, "step": 14249 }, { "epoch": 0.17037506426427862, "grad_norm": 2.060030460357666, "learning_rate": 9.492240189629677e-06, "loss": 0.7241, "step": 14250 }, { "epoch": 0.17038702040913928, "grad_norm": 1.5032281875610352, "learning_rate": 9.492155172884086e-06, "loss": 0.6318, "step": 14251 }, { "epoch": 0.17039897655399994, "grad_norm": 2.4313511848449707, "learning_rate": 9.492070149402493e-06, "loss": 0.6303, "step": 14252 }, { "epoch": 0.17041093269886057, "grad_norm": 1.9879424571990967, "learning_rate": 9.491985119185024e-06, "loss": 0.6203, "step": 14253 }, { "epoch": 0.17042288884372123, "grad_norm": 1.4920803308486938, "learning_rate": 9.491900082231806e-06, "loss": 0.6101, "step": 14254 }, { "epoch": 0.17043484498858189, "grad_norm": 1.5422228574752808, "learning_rate": 9.491815038542971e-06, "loss": 0.5464, "step": 14255 }, { "epoch": 0.17044680113344254, "grad_norm": 1.6335920095443726, "learning_rate": 9.491729988118641e-06, "loss": 0.6043, "step": 14256 }, { "epoch": 0.17045875727830317, "grad_norm": 1.767652988433838, "learning_rate": 9.491644930958948e-06, "loss": 0.5872, "step": 14257 }, { "epoch": 0.17047071342316383, "grad_norm": 1.8247028589248657, "learning_rate": 9.491559867064015e-06, "loss": 0.6543, "step": 14258 }, { "epoch": 0.1704826695680245, "grad_norm": 3.8785061836242676, "learning_rate": 9.491474796433973e-06, "loss": 0.7423, "step": 14259 }, { "epoch": 0.17049462571288515, "grad_norm": 2.848125457763672, "learning_rate": 9.49138971906895e-06, "loss": 0.5892, "step": 14260 }, { "epoch": 0.17050658185774578, "grad_norm": 1.5908228158950806, "learning_rate": 9.49130463496907e-06, "loss": 0.5734, "step": 14261 }, { "epoch": 0.17051853800260644, "grad_norm": 5.283910751342773, "learning_rate": 9.491219544134465e-06, "loss": 0.67, "step": 14262 }, { "epoch": 0.1705304941474671, "grad_norm": 2.2842674255371094, "learning_rate": 9.491134446565258e-06, "loss": 0.6269, "step": 14263 }, { "epoch": 0.17054245029232773, "grad_norm": 2.3176116943359375, "learning_rate": 9.49104934226158e-06, "loss": 0.5772, "step": 14264 }, { "epoch": 0.1705544064371884, "grad_norm": 2.198902130126953, "learning_rate": 9.490964231223558e-06, "loss": 0.6188, "step": 14265 }, { "epoch": 0.17056636258204905, "grad_norm": 2.3747293949127197, "learning_rate": 9.490879113451319e-06, "loss": 0.6302, "step": 14266 }, { "epoch": 0.1705783187269097, "grad_norm": 1.981709361076355, "learning_rate": 9.49079398894499e-06, "loss": 0.6718, "step": 14267 }, { "epoch": 0.17059027487177034, "grad_norm": 2.3317410945892334, "learning_rate": 9.4907088577047e-06, "loss": 0.6635, "step": 14268 }, { "epoch": 0.170602231016631, "grad_norm": 1.880271553993225, "learning_rate": 9.490623719730576e-06, "loss": 0.6483, "step": 14269 }, { "epoch": 0.17061418716149165, "grad_norm": 2.079155683517456, "learning_rate": 9.490538575022745e-06, "loss": 0.6217, "step": 14270 }, { "epoch": 0.1706261433063523, "grad_norm": 3.039196729660034, "learning_rate": 9.490453423581336e-06, "loss": 0.6134, "step": 14271 }, { "epoch": 0.17063809945121294, "grad_norm": 2.764090061187744, "learning_rate": 9.490368265406477e-06, "loss": 0.518, "step": 14272 }, { "epoch": 0.1706500555960736, "grad_norm": 1.4132184982299805, "learning_rate": 9.490283100498294e-06, "loss": 0.4849, "step": 14273 }, { "epoch": 0.17066201174093426, "grad_norm": 1.6106812953948975, "learning_rate": 9.490197928856917e-06, "loss": 0.6292, "step": 14274 }, { "epoch": 0.1706739678857949, "grad_norm": 4.067685604095459, "learning_rate": 9.490112750482469e-06, "loss": 0.5649, "step": 14275 }, { "epoch": 0.17068592403065555, "grad_norm": 1.893965482711792, "learning_rate": 9.490027565375085e-06, "loss": 0.5958, "step": 14276 }, { "epoch": 0.1706978801755162, "grad_norm": 2.247314453125, "learning_rate": 9.489942373534886e-06, "loss": 0.5946, "step": 14277 }, { "epoch": 0.17070983632037687, "grad_norm": 3.466110944747925, "learning_rate": 9.489857174962004e-06, "loss": 0.6379, "step": 14278 }, { "epoch": 0.1707217924652375, "grad_norm": 1.9769917726516724, "learning_rate": 9.489771969656563e-06, "loss": 0.6244, "step": 14279 }, { "epoch": 0.17073374861009816, "grad_norm": 3.4853243827819824, "learning_rate": 9.489686757618694e-06, "loss": 0.5923, "step": 14280 }, { "epoch": 0.17074570475495882, "grad_norm": 1.7792339324951172, "learning_rate": 9.489601538848525e-06, "loss": 0.6992, "step": 14281 }, { "epoch": 0.17075766089981947, "grad_norm": 2.9532032012939453, "learning_rate": 9.489516313346181e-06, "loss": 0.6547, "step": 14282 }, { "epoch": 0.1707696170446801, "grad_norm": 2.264003276824951, "learning_rate": 9.489431081111791e-06, "loss": 0.6388, "step": 14283 }, { "epoch": 0.17078157318954076, "grad_norm": 1.7090210914611816, "learning_rate": 9.489345842145485e-06, "loss": 0.6949, "step": 14284 }, { "epoch": 0.17079352933440142, "grad_norm": 2.876864433288574, "learning_rate": 9.489260596447389e-06, "loss": 0.649, "step": 14285 }, { "epoch": 0.17080548547926205, "grad_norm": 1.8568288087844849, "learning_rate": 9.48917534401763e-06, "loss": 0.6466, "step": 14286 }, { "epoch": 0.1708174416241227, "grad_norm": 2.2237870693206787, "learning_rate": 9.489090084856335e-06, "loss": 0.6192, "step": 14287 }, { "epoch": 0.17082939776898337, "grad_norm": 2.6453890800476074, "learning_rate": 9.489004818963635e-06, "loss": 0.5746, "step": 14288 }, { "epoch": 0.17084135391384403, "grad_norm": 2.3251914978027344, "learning_rate": 9.488919546339656e-06, "loss": 0.7264, "step": 14289 }, { "epoch": 0.17085331005870466, "grad_norm": 2.381019353866577, "learning_rate": 9.488834266984525e-06, "loss": 0.6026, "step": 14290 }, { "epoch": 0.17086526620356532, "grad_norm": 1.6405997276306152, "learning_rate": 9.488748980898374e-06, "loss": 0.5968, "step": 14291 }, { "epoch": 0.17087722234842598, "grad_norm": 3.326760768890381, "learning_rate": 9.488663688081326e-06, "loss": 0.6365, "step": 14292 }, { "epoch": 0.17088917849328664, "grad_norm": 2.3580074310302734, "learning_rate": 9.488578388533512e-06, "loss": 0.6743, "step": 14293 }, { "epoch": 0.17090113463814727, "grad_norm": 16.419754028320312, "learning_rate": 9.488493082255058e-06, "loss": 0.6416, "step": 14294 }, { "epoch": 0.17091309078300793, "grad_norm": 2.267850160598755, "learning_rate": 9.488407769246091e-06, "loss": 0.6498, "step": 14295 }, { "epoch": 0.17092504692786858, "grad_norm": 2.1142094135284424, "learning_rate": 9.488322449506743e-06, "loss": 0.6931, "step": 14296 }, { "epoch": 0.17093700307272922, "grad_norm": 2.341132640838623, "learning_rate": 9.48823712303714e-06, "loss": 0.6746, "step": 14297 }, { "epoch": 0.17094895921758987, "grad_norm": 9.236424446105957, "learning_rate": 9.488151789837408e-06, "loss": 0.5841, "step": 14298 }, { "epoch": 0.17096091536245053, "grad_norm": 1.533217191696167, "learning_rate": 9.488066449907676e-06, "loss": 0.6667, "step": 14299 }, { "epoch": 0.1709728715073112, "grad_norm": 4.406167984008789, "learning_rate": 9.487981103248072e-06, "loss": 0.7101, "step": 14300 }, { "epoch": 0.17098482765217182, "grad_norm": 2.2291224002838135, "learning_rate": 9.487895749858726e-06, "loss": 0.6831, "step": 14301 }, { "epoch": 0.17099678379703248, "grad_norm": 2.079686403274536, "learning_rate": 9.487810389739764e-06, "loss": 0.6022, "step": 14302 }, { "epoch": 0.17100873994189314, "grad_norm": 1.736064076423645, "learning_rate": 9.487725022891313e-06, "loss": 0.6005, "step": 14303 }, { "epoch": 0.1710206960867538, "grad_norm": 3.0030245780944824, "learning_rate": 9.487639649313504e-06, "loss": 0.6666, "step": 14304 }, { "epoch": 0.17103265223161443, "grad_norm": 2.6497392654418945, "learning_rate": 9.487554269006464e-06, "loss": 0.6732, "step": 14305 }, { "epoch": 0.1710446083764751, "grad_norm": 2.1387157440185547, "learning_rate": 9.487468881970318e-06, "loss": 0.6029, "step": 14306 }, { "epoch": 0.17105656452133575, "grad_norm": 1.5067713260650635, "learning_rate": 9.4873834882052e-06, "loss": 0.6711, "step": 14307 }, { "epoch": 0.1710685206661964, "grad_norm": 3.0637426376342773, "learning_rate": 9.487298087711232e-06, "loss": 0.5884, "step": 14308 }, { "epoch": 0.17108047681105704, "grad_norm": 4.090836048126221, "learning_rate": 9.487212680488544e-06, "loss": 0.5728, "step": 14309 }, { "epoch": 0.1710924329559177, "grad_norm": 2.4878990650177, "learning_rate": 9.487127266537267e-06, "loss": 0.6118, "step": 14310 }, { "epoch": 0.17110438910077835, "grad_norm": 3.390872001647949, "learning_rate": 9.487041845857524e-06, "loss": 0.6099, "step": 14311 }, { "epoch": 0.17111634524563898, "grad_norm": 1.5266499519348145, "learning_rate": 9.486956418449447e-06, "loss": 0.54, "step": 14312 }, { "epoch": 0.17112830139049964, "grad_norm": 2.2382638454437256, "learning_rate": 9.486870984313163e-06, "loss": 0.7333, "step": 14313 }, { "epoch": 0.1711402575353603, "grad_norm": 3.810755729675293, "learning_rate": 9.4867855434488e-06, "loss": 0.5794, "step": 14314 }, { "epoch": 0.17115221368022096, "grad_norm": 1.6544183492660522, "learning_rate": 9.486700095856488e-06, "loss": 0.6519, "step": 14315 }, { "epoch": 0.1711641698250816, "grad_norm": 2.150250196456909, "learning_rate": 9.486614641536349e-06, "loss": 0.5768, "step": 14316 }, { "epoch": 0.17117612596994225, "grad_norm": 2.148643732070923, "learning_rate": 9.486529180488519e-06, "loss": 0.5335, "step": 14317 }, { "epoch": 0.1711880821148029, "grad_norm": 1.940901756286621, "learning_rate": 9.486443712713121e-06, "loss": 0.6716, "step": 14318 }, { "epoch": 0.17120003825966357, "grad_norm": 1.5798755884170532, "learning_rate": 9.486358238210284e-06, "loss": 0.636, "step": 14319 }, { "epoch": 0.1712119944045242, "grad_norm": 1.4276189804077148, "learning_rate": 9.486272756980139e-06, "loss": 0.541, "step": 14320 }, { "epoch": 0.17122395054938486, "grad_norm": 2.131035566329956, "learning_rate": 9.48618726902281e-06, "loss": 0.5912, "step": 14321 }, { "epoch": 0.17123590669424552, "grad_norm": 2.1937594413757324, "learning_rate": 9.48610177433843e-06, "loss": 0.517, "step": 14322 }, { "epoch": 0.17124786283910615, "grad_norm": 1.8820711374282837, "learning_rate": 9.486016272927122e-06, "loss": 0.6156, "step": 14323 }, { "epoch": 0.1712598189839668, "grad_norm": 1.8574877977371216, "learning_rate": 9.485930764789018e-06, "loss": 0.6411, "step": 14324 }, { "epoch": 0.17127177512882746, "grad_norm": 2.120480537414551, "learning_rate": 9.485845249924243e-06, "loss": 0.6524, "step": 14325 }, { "epoch": 0.17128373127368812, "grad_norm": 1.8953945636749268, "learning_rate": 9.48575972833293e-06, "loss": 0.6002, "step": 14326 }, { "epoch": 0.17129568741854875, "grad_norm": 1.8939062356948853, "learning_rate": 9.485674200015201e-06, "loss": 0.6175, "step": 14327 }, { "epoch": 0.1713076435634094, "grad_norm": 2.576514959335327, "learning_rate": 9.48558866497119e-06, "loss": 0.6205, "step": 14328 }, { "epoch": 0.17131959970827007, "grad_norm": 1.9783767461776733, "learning_rate": 9.485503123201021e-06, "loss": 0.5708, "step": 14329 }, { "epoch": 0.17133155585313073, "grad_norm": 1.6105719804763794, "learning_rate": 9.485417574704825e-06, "loss": 0.6521, "step": 14330 }, { "epoch": 0.17134351199799136, "grad_norm": 1.9894064664840698, "learning_rate": 9.48533201948273e-06, "loss": 0.6016, "step": 14331 }, { "epoch": 0.17135546814285202, "grad_norm": 1.5580300092697144, "learning_rate": 9.485246457534863e-06, "loss": 0.691, "step": 14332 }, { "epoch": 0.17136742428771268, "grad_norm": 1.896921157836914, "learning_rate": 9.485160888861353e-06, "loss": 0.7291, "step": 14333 }, { "epoch": 0.1713793804325733, "grad_norm": 2.9818108081817627, "learning_rate": 9.485075313462328e-06, "loss": 0.5849, "step": 14334 }, { "epoch": 0.17139133657743397, "grad_norm": 3.2399439811706543, "learning_rate": 9.48498973133792e-06, "loss": 0.5834, "step": 14335 }, { "epoch": 0.17140329272229463, "grad_norm": 2.7628207206726074, "learning_rate": 9.484904142488249e-06, "loss": 0.5861, "step": 14336 }, { "epoch": 0.17141524886715528, "grad_norm": 2.5637431144714355, "learning_rate": 9.484818546913452e-06, "loss": 0.548, "step": 14337 }, { "epoch": 0.17142720501201592, "grad_norm": 2.3588452339172363, "learning_rate": 9.48473294461365e-06, "loss": 0.6316, "step": 14338 }, { "epoch": 0.17143916115687657, "grad_norm": 2.889021158218384, "learning_rate": 9.48464733558898e-06, "loss": 0.6087, "step": 14339 }, { "epoch": 0.17145111730173723, "grad_norm": 2.859086036682129, "learning_rate": 9.48456171983956e-06, "loss": 0.6708, "step": 14340 }, { "epoch": 0.1714630734465979, "grad_norm": 1.5952506065368652, "learning_rate": 9.484476097365527e-06, "loss": 0.5465, "step": 14341 }, { "epoch": 0.17147502959145852, "grad_norm": 2.309847831726074, "learning_rate": 9.484390468167005e-06, "loss": 0.6589, "step": 14342 }, { "epoch": 0.17148698573631918, "grad_norm": 2.9313626289367676, "learning_rate": 9.484304832244124e-06, "loss": 0.7468, "step": 14343 }, { "epoch": 0.17149894188117984, "grad_norm": 2.181170701980591, "learning_rate": 9.484219189597012e-06, "loss": 0.6134, "step": 14344 }, { "epoch": 0.17151089802604047, "grad_norm": 1.6873067617416382, "learning_rate": 9.484133540225797e-06, "loss": 0.617, "step": 14345 }, { "epoch": 0.17152285417090113, "grad_norm": 2.0924646854400635, "learning_rate": 9.48404788413061e-06, "loss": 0.6132, "step": 14346 }, { "epoch": 0.1715348103157618, "grad_norm": 1.805680274963379, "learning_rate": 9.483962221311573e-06, "loss": 0.5667, "step": 14347 }, { "epoch": 0.17154676646062245, "grad_norm": 1.8023650646209717, "learning_rate": 9.483876551768822e-06, "loss": 0.6533, "step": 14348 }, { "epoch": 0.17155872260548308, "grad_norm": 2.2714807987213135, "learning_rate": 9.483790875502482e-06, "loss": 0.5315, "step": 14349 }, { "epoch": 0.17157067875034374, "grad_norm": 2.5138614177703857, "learning_rate": 9.48370519251268e-06, "loss": 0.613, "step": 14350 }, { "epoch": 0.1715826348952044, "grad_norm": 4.084988594055176, "learning_rate": 9.483619502799547e-06, "loss": 0.5253, "step": 14351 }, { "epoch": 0.17159459104006505, "grad_norm": 7.247678756713867, "learning_rate": 9.483533806363211e-06, "loss": 0.5739, "step": 14352 }, { "epoch": 0.17160654718492568, "grad_norm": 1.9725960493087769, "learning_rate": 9.483448103203801e-06, "loss": 0.6638, "step": 14353 }, { "epoch": 0.17161850332978634, "grad_norm": 2.148897171020508, "learning_rate": 9.483362393321442e-06, "loss": 0.681, "step": 14354 }, { "epoch": 0.171630459474647, "grad_norm": 2.0353505611419678, "learning_rate": 9.483276676716267e-06, "loss": 0.5056, "step": 14355 }, { "epoch": 0.17164241561950763, "grad_norm": 3.2741150856018066, "learning_rate": 9.483190953388402e-06, "loss": 0.5358, "step": 14356 }, { "epoch": 0.1716543717643683, "grad_norm": 2.552490472793579, "learning_rate": 9.483105223337976e-06, "loss": 0.5712, "step": 14357 }, { "epoch": 0.17166632790922895, "grad_norm": 1.724060297012329, "learning_rate": 9.48301948656512e-06, "loss": 0.6505, "step": 14358 }, { "epoch": 0.1716782840540896, "grad_norm": 3.709500789642334, "learning_rate": 9.482933743069957e-06, "loss": 0.6391, "step": 14359 }, { "epoch": 0.17169024019895024, "grad_norm": 1.6626203060150146, "learning_rate": 9.48284799285262e-06, "loss": 0.5552, "step": 14360 }, { "epoch": 0.1717021963438109, "grad_norm": 1.6548622846603394, "learning_rate": 9.482762235913237e-06, "loss": 0.6292, "step": 14361 }, { "epoch": 0.17171415248867156, "grad_norm": 1.486547827720642, "learning_rate": 9.482676472251936e-06, "loss": 0.6241, "step": 14362 }, { "epoch": 0.17172610863353222, "grad_norm": 2.638847589492798, "learning_rate": 9.482590701868846e-06, "loss": 0.6656, "step": 14363 }, { "epoch": 0.17173806477839285, "grad_norm": 2.92471981048584, "learning_rate": 9.482504924764096e-06, "loss": 0.6225, "step": 14364 }, { "epoch": 0.1717500209232535, "grad_norm": 2.7010862827301025, "learning_rate": 9.482419140937813e-06, "loss": 0.6299, "step": 14365 }, { "epoch": 0.17176197706811416, "grad_norm": 2.5498101711273193, "learning_rate": 9.482333350390126e-06, "loss": 0.6367, "step": 14366 }, { "epoch": 0.17177393321297482, "grad_norm": 3.281548261642456, "learning_rate": 9.482247553121165e-06, "loss": 0.5972, "step": 14367 }, { "epoch": 0.17178588935783545, "grad_norm": 2.884376049041748, "learning_rate": 9.482161749131059e-06, "loss": 0.6539, "step": 14368 }, { "epoch": 0.1717978455026961, "grad_norm": 1.8121757507324219, "learning_rate": 9.482075938419934e-06, "loss": 0.6061, "step": 14369 }, { "epoch": 0.17180980164755677, "grad_norm": 2.7241263389587402, "learning_rate": 9.48199012098792e-06, "loss": 0.6441, "step": 14370 }, { "epoch": 0.1718217577924174, "grad_norm": 1.5358233451843262, "learning_rate": 9.481904296835147e-06, "loss": 0.5844, "step": 14371 }, { "epoch": 0.17183371393727806, "grad_norm": 1.288233757019043, "learning_rate": 9.48181846596174e-06, "loss": 0.5523, "step": 14372 }, { "epoch": 0.17184567008213872, "grad_norm": 5.112623691558838, "learning_rate": 9.481732628367833e-06, "loss": 0.5909, "step": 14373 }, { "epoch": 0.17185762622699938, "grad_norm": 4.144014835357666, "learning_rate": 9.48164678405355e-06, "loss": 0.6455, "step": 14374 }, { "epoch": 0.17186958237186, "grad_norm": 1.9718643426895142, "learning_rate": 9.481560933019024e-06, "loss": 0.694, "step": 14375 }, { "epoch": 0.17188153851672067, "grad_norm": 3.887451648712158, "learning_rate": 9.48147507526438e-06, "loss": 0.6062, "step": 14376 }, { "epoch": 0.17189349466158133, "grad_norm": 2.153961181640625, "learning_rate": 9.481389210789748e-06, "loss": 0.5401, "step": 14377 }, { "epoch": 0.17190545080644198, "grad_norm": 2.159991502761841, "learning_rate": 9.481303339595258e-06, "loss": 0.602, "step": 14378 }, { "epoch": 0.17191740695130261, "grad_norm": 2.0145678520202637, "learning_rate": 9.481217461681039e-06, "loss": 0.6155, "step": 14379 }, { "epoch": 0.17192936309616327, "grad_norm": 5.751029014587402, "learning_rate": 9.481131577047216e-06, "loss": 0.5575, "step": 14380 }, { "epoch": 0.17194131924102393, "grad_norm": 1.7128597497940063, "learning_rate": 9.48104568569392e-06, "loss": 0.6474, "step": 14381 }, { "epoch": 0.17195327538588456, "grad_norm": 2.04681134223938, "learning_rate": 9.480959787621281e-06, "loss": 0.6721, "step": 14382 }, { "epoch": 0.17196523153074522, "grad_norm": 2.158768892288208, "learning_rate": 9.480873882829427e-06, "loss": 0.7268, "step": 14383 }, { "epoch": 0.17197718767560588, "grad_norm": 2.18210506439209, "learning_rate": 9.480787971318487e-06, "loss": 0.6361, "step": 14384 }, { "epoch": 0.17198914382046654, "grad_norm": 1.667704701423645, "learning_rate": 9.480702053088589e-06, "loss": 0.5099, "step": 14385 }, { "epoch": 0.17200109996532717, "grad_norm": 1.4832119941711426, "learning_rate": 9.480616128139862e-06, "loss": 0.5234, "step": 14386 }, { "epoch": 0.17201305611018783, "grad_norm": 2.137984275817871, "learning_rate": 9.480530196472435e-06, "loss": 0.5429, "step": 14387 }, { "epoch": 0.1720250122550485, "grad_norm": 5.2287702560424805, "learning_rate": 9.480444258086438e-06, "loss": 0.5185, "step": 14388 }, { "epoch": 0.17203696839990915, "grad_norm": 2.1842286586761475, "learning_rate": 9.480358312982e-06, "loss": 0.6476, "step": 14389 }, { "epoch": 0.17204892454476978, "grad_norm": 3.1536178588867188, "learning_rate": 9.480272361159246e-06, "loss": 0.5642, "step": 14390 }, { "epoch": 0.17206088068963044, "grad_norm": 1.70237135887146, "learning_rate": 9.48018640261831e-06, "loss": 0.6308, "step": 14391 }, { "epoch": 0.1720728368344911, "grad_norm": 2.0036215782165527, "learning_rate": 9.480100437359317e-06, "loss": 0.6554, "step": 14392 }, { "epoch": 0.17208479297935173, "grad_norm": 3.010756015777588, "learning_rate": 9.480014465382397e-06, "loss": 0.739, "step": 14393 }, { "epoch": 0.17209674912421238, "grad_norm": 1.6205193996429443, "learning_rate": 9.479928486687681e-06, "loss": 0.6061, "step": 14394 }, { "epoch": 0.17210870526907304, "grad_norm": 3.427981376647949, "learning_rate": 9.479842501275296e-06, "loss": 0.7228, "step": 14395 }, { "epoch": 0.1721206614139337, "grad_norm": 1.8383817672729492, "learning_rate": 9.47975650914537e-06, "loss": 0.6522, "step": 14396 }, { "epoch": 0.17213261755879433, "grad_norm": 1.7353006601333618, "learning_rate": 9.479670510298036e-06, "loss": 0.6213, "step": 14397 }, { "epoch": 0.172144573703655, "grad_norm": 1.8407588005065918, "learning_rate": 9.479584504733417e-06, "loss": 0.5892, "step": 14398 }, { "epoch": 0.17215652984851565, "grad_norm": 2.0626418590545654, "learning_rate": 9.479498492451646e-06, "loss": 0.7163, "step": 14399 }, { "epoch": 0.1721684859933763, "grad_norm": 1.3925772905349731, "learning_rate": 9.479412473452853e-06, "loss": 0.6812, "step": 14400 }, { "epoch": 0.17218044213823694, "grad_norm": 2.40563702583313, "learning_rate": 9.479326447737163e-06, "loss": 0.5806, "step": 14401 }, { "epoch": 0.1721923982830976, "grad_norm": 2.314107894897461, "learning_rate": 9.479240415304706e-06, "loss": 0.8453, "step": 14402 }, { "epoch": 0.17220435442795826, "grad_norm": 2.4504923820495605, "learning_rate": 9.479154376155616e-06, "loss": 0.5715, "step": 14403 }, { "epoch": 0.1722163105728189, "grad_norm": 2.353830337524414, "learning_rate": 9.479068330290016e-06, "loss": 0.6673, "step": 14404 }, { "epoch": 0.17222826671767955, "grad_norm": 1.5837130546569824, "learning_rate": 9.478982277708035e-06, "loss": 0.6395, "step": 14405 }, { "epoch": 0.1722402228625402, "grad_norm": 2.6556951999664307, "learning_rate": 9.478896218409808e-06, "loss": 0.5398, "step": 14406 }, { "epoch": 0.17225217900740086, "grad_norm": 2.821600914001465, "learning_rate": 9.478810152395456e-06, "loss": 0.6234, "step": 14407 }, { "epoch": 0.1722641351522615, "grad_norm": 1.9914544820785522, "learning_rate": 9.478724079665116e-06, "loss": 0.5877, "step": 14408 }, { "epoch": 0.17227609129712215, "grad_norm": 1.9549946784973145, "learning_rate": 9.478638000218912e-06, "loss": 0.6414, "step": 14409 }, { "epoch": 0.1722880474419828, "grad_norm": 2.311450958251953, "learning_rate": 9.478551914056974e-06, "loss": 0.6326, "step": 14410 }, { "epoch": 0.17230000358684347, "grad_norm": 1.8855087757110596, "learning_rate": 9.478465821179432e-06, "loss": 0.6078, "step": 14411 }, { "epoch": 0.1723119597317041, "grad_norm": 1.8365399837493896, "learning_rate": 9.478379721586416e-06, "loss": 0.6784, "step": 14412 }, { "epoch": 0.17232391587656476, "grad_norm": 2.8377344608306885, "learning_rate": 9.478293615278051e-06, "loss": 0.5039, "step": 14413 }, { "epoch": 0.17233587202142542, "grad_norm": 3.0403876304626465, "learning_rate": 9.478207502254469e-06, "loss": 0.591, "step": 14414 }, { "epoch": 0.17234782816628605, "grad_norm": 1.9888157844543457, "learning_rate": 9.478121382515799e-06, "loss": 0.6258, "step": 14415 }, { "epoch": 0.1723597843111467, "grad_norm": 5.012969970703125, "learning_rate": 9.478035256062172e-06, "loss": 0.6615, "step": 14416 }, { "epoch": 0.17237174045600737, "grad_norm": 7.8267316818237305, "learning_rate": 9.477949122893713e-06, "loss": 0.6549, "step": 14417 }, { "epoch": 0.17238369660086802, "grad_norm": 1.6760059595108032, "learning_rate": 9.477862983010555e-06, "loss": 0.6072, "step": 14418 }, { "epoch": 0.17239565274572866, "grad_norm": 4.871283054351807, "learning_rate": 9.477776836412825e-06, "loss": 0.6338, "step": 14419 }, { "epoch": 0.17240760889058931, "grad_norm": 3.03385853767395, "learning_rate": 9.477690683100654e-06, "loss": 0.6879, "step": 14420 }, { "epoch": 0.17241956503544997, "grad_norm": 8.722546577453613, "learning_rate": 9.477604523074168e-06, "loss": 0.641, "step": 14421 }, { "epoch": 0.17243152118031063, "grad_norm": 2.0035293102264404, "learning_rate": 9.477518356333498e-06, "loss": 0.6798, "step": 14422 }, { "epoch": 0.17244347732517126, "grad_norm": 2.0798113346099854, "learning_rate": 9.477432182878775e-06, "loss": 0.5649, "step": 14423 }, { "epoch": 0.17245543347003192, "grad_norm": 2.6899986267089844, "learning_rate": 9.477346002710125e-06, "loss": 0.6598, "step": 14424 }, { "epoch": 0.17246738961489258, "grad_norm": 5.533033847808838, "learning_rate": 9.47725981582768e-06, "loss": 0.5717, "step": 14425 }, { "epoch": 0.17247934575975324, "grad_norm": 1.5292084217071533, "learning_rate": 9.477173622231566e-06, "loss": 0.6099, "step": 14426 }, { "epoch": 0.17249130190461387, "grad_norm": 1.9151196479797363, "learning_rate": 9.477087421921916e-06, "loss": 0.5433, "step": 14427 }, { "epoch": 0.17250325804947453, "grad_norm": 1.8155237436294556, "learning_rate": 9.477001214898857e-06, "loss": 0.6683, "step": 14428 }, { "epoch": 0.1725152141943352, "grad_norm": 4.249495029449463, "learning_rate": 9.476915001162519e-06, "loss": 0.5944, "step": 14429 }, { "epoch": 0.17252717033919582, "grad_norm": 2.291461706161499, "learning_rate": 9.47682878071303e-06, "loss": 0.6114, "step": 14430 }, { "epoch": 0.17253912648405648, "grad_norm": 4.19927978515625, "learning_rate": 9.476742553550522e-06, "loss": 0.6008, "step": 14431 }, { "epoch": 0.17255108262891714, "grad_norm": 4.78190279006958, "learning_rate": 9.476656319675122e-06, "loss": 0.6036, "step": 14432 }, { "epoch": 0.1725630387737778, "grad_norm": 2.007129192352295, "learning_rate": 9.47657007908696e-06, "loss": 0.6372, "step": 14433 }, { "epoch": 0.17257499491863842, "grad_norm": 5.270719528198242, "learning_rate": 9.476483831786164e-06, "loss": 0.6536, "step": 14434 }, { "epoch": 0.17258695106349908, "grad_norm": 6.330698490142822, "learning_rate": 9.476397577772865e-06, "loss": 0.5367, "step": 14435 }, { "epoch": 0.17259890720835974, "grad_norm": 6.171635627746582, "learning_rate": 9.476311317047193e-06, "loss": 0.598, "step": 14436 }, { "epoch": 0.1726108633532204, "grad_norm": 2.914597272872925, "learning_rate": 9.476225049609276e-06, "loss": 0.7037, "step": 14437 }, { "epoch": 0.17262281949808103, "grad_norm": 2.4979593753814697, "learning_rate": 9.476138775459242e-06, "loss": 0.6492, "step": 14438 }, { "epoch": 0.1726347756429417, "grad_norm": 1.575685739517212, "learning_rate": 9.476052494597225e-06, "loss": 0.6132, "step": 14439 }, { "epoch": 0.17264673178780235, "grad_norm": 3.0281529426574707, "learning_rate": 9.475966207023349e-06, "loss": 0.6039, "step": 14440 }, { "epoch": 0.17265868793266298, "grad_norm": 1.8762586116790771, "learning_rate": 9.475879912737746e-06, "loss": 0.625, "step": 14441 }, { "epoch": 0.17267064407752364, "grad_norm": 1.563280463218689, "learning_rate": 9.475793611740546e-06, "loss": 0.6023, "step": 14442 }, { "epoch": 0.1726826002223843, "grad_norm": 2.7713232040405273, "learning_rate": 9.475707304031879e-06, "loss": 0.6795, "step": 14443 }, { "epoch": 0.17269455636724496, "grad_norm": 3.185317039489746, "learning_rate": 9.47562098961187e-06, "loss": 0.6215, "step": 14444 }, { "epoch": 0.1727065125121056, "grad_norm": 4.73907470703125, "learning_rate": 9.475534668480652e-06, "loss": 0.5954, "step": 14445 }, { "epoch": 0.17271846865696625, "grad_norm": 2.59035587310791, "learning_rate": 9.475448340638356e-06, "loss": 0.5866, "step": 14446 }, { "epoch": 0.1727304248018269, "grad_norm": 2.0236785411834717, "learning_rate": 9.475362006085107e-06, "loss": 0.6138, "step": 14447 }, { "epoch": 0.17274238094668756, "grad_norm": 1.6242576837539673, "learning_rate": 9.475275664821038e-06, "loss": 0.5656, "step": 14448 }, { "epoch": 0.1727543370915482, "grad_norm": 2.204089879989624, "learning_rate": 9.475189316846276e-06, "loss": 0.6553, "step": 14449 }, { "epoch": 0.17276629323640885, "grad_norm": 9.927778244018555, "learning_rate": 9.475102962160953e-06, "loss": 0.6288, "step": 14450 }, { "epoch": 0.1727782493812695, "grad_norm": 4.487074851989746, "learning_rate": 9.475016600765196e-06, "loss": 0.6013, "step": 14451 }, { "epoch": 0.17279020552613014, "grad_norm": 5.622318744659424, "learning_rate": 9.474930232659136e-06, "loss": 0.7008, "step": 14452 }, { "epoch": 0.1728021616709908, "grad_norm": 3.262589454650879, "learning_rate": 9.474843857842904e-06, "loss": 0.5654, "step": 14453 }, { "epoch": 0.17281411781585146, "grad_norm": 9.910917282104492, "learning_rate": 9.474757476316625e-06, "loss": 0.5724, "step": 14454 }, { "epoch": 0.17282607396071212, "grad_norm": 5.075671195983887, "learning_rate": 9.474671088080434e-06, "loss": 0.6858, "step": 14455 }, { "epoch": 0.17283803010557275, "grad_norm": 2.7116899490356445, "learning_rate": 9.474584693134456e-06, "loss": 0.7051, "step": 14456 }, { "epoch": 0.1728499862504334, "grad_norm": 1.9996272325515747, "learning_rate": 9.474498291478821e-06, "loss": 0.576, "step": 14457 }, { "epoch": 0.17286194239529407, "grad_norm": 2.0007612705230713, "learning_rate": 9.474411883113663e-06, "loss": 0.6928, "step": 14458 }, { "epoch": 0.17287389854015472, "grad_norm": 3.0851786136627197, "learning_rate": 9.474325468039107e-06, "loss": 0.5861, "step": 14459 }, { "epoch": 0.17288585468501536, "grad_norm": 1.995270013809204, "learning_rate": 9.474239046255285e-06, "loss": 0.6449, "step": 14460 }, { "epoch": 0.17289781082987601, "grad_norm": 1.8349123001098633, "learning_rate": 9.474152617762325e-06, "loss": 0.6163, "step": 14461 }, { "epoch": 0.17290976697473667, "grad_norm": 2.3407962322235107, "learning_rate": 9.474066182560357e-06, "loss": 0.5306, "step": 14462 }, { "epoch": 0.1729217231195973, "grad_norm": 4.014438629150391, "learning_rate": 9.47397974064951e-06, "loss": 0.5576, "step": 14463 }, { "epoch": 0.17293367926445796, "grad_norm": 2.215998888015747, "learning_rate": 9.473893292029916e-06, "loss": 0.6005, "step": 14464 }, { "epoch": 0.17294563540931862, "grad_norm": 2.754664182662964, "learning_rate": 9.473806836701702e-06, "loss": 0.6924, "step": 14465 }, { "epoch": 0.17295759155417928, "grad_norm": 2.7845895290374756, "learning_rate": 9.473720374664999e-06, "loss": 0.6109, "step": 14466 }, { "epoch": 0.1729695476990399, "grad_norm": 1.4789338111877441, "learning_rate": 9.473633905919937e-06, "loss": 0.5957, "step": 14467 }, { "epoch": 0.17298150384390057, "grad_norm": 1.5915956497192383, "learning_rate": 9.473547430466645e-06, "loss": 0.5316, "step": 14468 }, { "epoch": 0.17299345998876123, "grad_norm": 2.890681743621826, "learning_rate": 9.473460948305252e-06, "loss": 0.502, "step": 14469 }, { "epoch": 0.1730054161336219, "grad_norm": 1.8402841091156006, "learning_rate": 9.473374459435889e-06, "loss": 0.6339, "step": 14470 }, { "epoch": 0.17301737227848252, "grad_norm": 2.5201170444488525, "learning_rate": 9.473287963858685e-06, "loss": 0.5771, "step": 14471 }, { "epoch": 0.17302932842334318, "grad_norm": 1.5357301235198975, "learning_rate": 9.47320146157377e-06, "loss": 0.662, "step": 14472 }, { "epoch": 0.17304128456820383, "grad_norm": 3.6632227897644043, "learning_rate": 9.473114952581273e-06, "loss": 0.58, "step": 14473 }, { "epoch": 0.17305324071306447, "grad_norm": 2.397662878036499, "learning_rate": 9.473028436881324e-06, "loss": 0.6256, "step": 14474 }, { "epoch": 0.17306519685792512, "grad_norm": 1.7724549770355225, "learning_rate": 9.472941914474054e-06, "loss": 0.6561, "step": 14475 }, { "epoch": 0.17307715300278578, "grad_norm": 2.582268476486206, "learning_rate": 9.472855385359592e-06, "loss": 0.5769, "step": 14476 }, { "epoch": 0.17308910914764644, "grad_norm": 2.6853692531585693, "learning_rate": 9.472768849538065e-06, "loss": 0.7009, "step": 14477 }, { "epoch": 0.17310106529250707, "grad_norm": 2.216243267059326, "learning_rate": 9.472682307009608e-06, "loss": 0.6249, "step": 14478 }, { "epoch": 0.17311302143736773, "grad_norm": 2.659724712371826, "learning_rate": 9.472595757774346e-06, "loss": 0.5915, "step": 14479 }, { "epoch": 0.1731249775822284, "grad_norm": 2.2775161266326904, "learning_rate": 9.47250920183241e-06, "loss": 0.7853, "step": 14480 }, { "epoch": 0.17313693372708905, "grad_norm": 2.002922296524048, "learning_rate": 9.472422639183933e-06, "loss": 0.6722, "step": 14481 }, { "epoch": 0.17314888987194968, "grad_norm": 2.023555040359497, "learning_rate": 9.472336069829043e-06, "loss": 0.6554, "step": 14482 }, { "epoch": 0.17316084601681034, "grad_norm": 1.421929955482483, "learning_rate": 9.472249493767866e-06, "loss": 0.6649, "step": 14483 }, { "epoch": 0.173172802161671, "grad_norm": 6.997707366943359, "learning_rate": 9.472162911000538e-06, "loss": 0.6198, "step": 14484 }, { "epoch": 0.17318475830653166, "grad_norm": 2.8227944374084473, "learning_rate": 9.472076321527184e-06, "loss": 0.6082, "step": 14485 }, { "epoch": 0.1731967144513923, "grad_norm": 1.6417750120162964, "learning_rate": 9.471989725347937e-06, "loss": 0.6068, "step": 14486 }, { "epoch": 0.17320867059625294, "grad_norm": 2.145226240158081, "learning_rate": 9.471903122462924e-06, "loss": 0.693, "step": 14487 }, { "epoch": 0.1732206267411136, "grad_norm": 1.7145806550979614, "learning_rate": 9.471816512872277e-06, "loss": 0.6551, "step": 14488 }, { "epoch": 0.17323258288597423, "grad_norm": 1.838150143623352, "learning_rate": 9.471729896576125e-06, "loss": 0.6636, "step": 14489 }, { "epoch": 0.1732445390308349, "grad_norm": 2.016218423843384, "learning_rate": 9.471643273574598e-06, "loss": 0.6117, "step": 14490 }, { "epoch": 0.17325649517569555, "grad_norm": 1.5733387470245361, "learning_rate": 9.471556643867826e-06, "loss": 0.6433, "step": 14491 }, { "epoch": 0.1732684513205562, "grad_norm": 1.50766122341156, "learning_rate": 9.47147000745594e-06, "loss": 0.5557, "step": 14492 }, { "epoch": 0.17328040746541684, "grad_norm": 3.8646163940429688, "learning_rate": 9.471383364339068e-06, "loss": 0.6086, "step": 14493 }, { "epoch": 0.1732923636102775, "grad_norm": 1.660317063331604, "learning_rate": 9.47129671451734e-06, "loss": 0.6384, "step": 14494 }, { "epoch": 0.17330431975513816, "grad_norm": 1.8016419410705566, "learning_rate": 9.471210057990888e-06, "loss": 0.5893, "step": 14495 }, { "epoch": 0.17331627589999882, "grad_norm": 1.1988723278045654, "learning_rate": 9.47112339475984e-06, "loss": 0.518, "step": 14496 }, { "epoch": 0.17332823204485945, "grad_norm": 2.501171350479126, "learning_rate": 9.471036724824328e-06, "loss": 0.5271, "step": 14497 }, { "epoch": 0.1733401881897201, "grad_norm": 4.026611804962158, "learning_rate": 9.470950048184478e-06, "loss": 0.6674, "step": 14498 }, { "epoch": 0.17335214433458077, "grad_norm": 2.0940825939178467, "learning_rate": 9.470863364840424e-06, "loss": 0.6413, "step": 14499 }, { "epoch": 0.1733641004794414, "grad_norm": 5.513952255249023, "learning_rate": 9.470776674792294e-06, "loss": 0.5629, "step": 14500 }, { "epoch": 0.17337605662430206, "grad_norm": 2.100277900695801, "learning_rate": 9.47068997804022e-06, "loss": 0.6983, "step": 14501 }, { "epoch": 0.1733880127691627, "grad_norm": 1.5279209613800049, "learning_rate": 9.47060327458433e-06, "loss": 0.5613, "step": 14502 }, { "epoch": 0.17339996891402337, "grad_norm": 1.348659873008728, "learning_rate": 9.470516564424752e-06, "loss": 0.6091, "step": 14503 }, { "epoch": 0.173411925058884, "grad_norm": 2.753950834274292, "learning_rate": 9.47042984756162e-06, "loss": 0.602, "step": 14504 }, { "epoch": 0.17342388120374466, "grad_norm": 2.173917293548584, "learning_rate": 9.470343123995063e-06, "loss": 0.5866, "step": 14505 }, { "epoch": 0.17343583734860532, "grad_norm": 2.4933865070343018, "learning_rate": 9.47025639372521e-06, "loss": 0.576, "step": 14506 }, { "epoch": 0.17344779349346598, "grad_norm": 2.008683681488037, "learning_rate": 9.470169656752193e-06, "loss": 0.6017, "step": 14507 }, { "epoch": 0.1734597496383266, "grad_norm": 2.951685905456543, "learning_rate": 9.47008291307614e-06, "loss": 0.5937, "step": 14508 }, { "epoch": 0.17347170578318727, "grad_norm": 2.2373437881469727, "learning_rate": 9.469996162697182e-06, "loss": 0.5219, "step": 14509 }, { "epoch": 0.17348366192804793, "grad_norm": 2.5074009895324707, "learning_rate": 9.469909405615447e-06, "loss": 0.6163, "step": 14510 }, { "epoch": 0.17349561807290856, "grad_norm": 2.2650341987609863, "learning_rate": 9.46982264183107e-06, "loss": 0.5727, "step": 14511 }, { "epoch": 0.17350757421776922, "grad_norm": 2.165055513381958, "learning_rate": 9.469735871344176e-06, "loss": 0.6053, "step": 14512 }, { "epoch": 0.17351953036262988, "grad_norm": 2.862694025039673, "learning_rate": 9.469649094154897e-06, "loss": 0.5187, "step": 14513 }, { "epoch": 0.17353148650749053, "grad_norm": 1.3811159133911133, "learning_rate": 9.469562310263365e-06, "loss": 0.6181, "step": 14514 }, { "epoch": 0.17354344265235117, "grad_norm": 2.394441604614258, "learning_rate": 9.469475519669707e-06, "loss": 0.5883, "step": 14515 }, { "epoch": 0.17355539879721182, "grad_norm": 1.6299132108688354, "learning_rate": 9.469388722374055e-06, "loss": 0.5634, "step": 14516 }, { "epoch": 0.17356735494207248, "grad_norm": 1.5721592903137207, "learning_rate": 9.469301918376539e-06, "loss": 0.6439, "step": 14517 }, { "epoch": 0.17357931108693314, "grad_norm": 2.128208637237549, "learning_rate": 9.469215107677288e-06, "loss": 0.5257, "step": 14518 }, { "epoch": 0.17359126723179377, "grad_norm": 1.3083899021148682, "learning_rate": 9.469128290276433e-06, "loss": 0.5942, "step": 14519 }, { "epoch": 0.17360322337665443, "grad_norm": 1.8948607444763184, "learning_rate": 9.469041466174104e-06, "loss": 0.521, "step": 14520 }, { "epoch": 0.1736151795215151, "grad_norm": 37.1797981262207, "learning_rate": 9.468954635370433e-06, "loss": 0.6862, "step": 14521 }, { "epoch": 0.17362713566637572, "grad_norm": 2.4812371730804443, "learning_rate": 9.468867797865549e-06, "loss": 0.6503, "step": 14522 }, { "epoch": 0.17363909181123638, "grad_norm": 2.5872981548309326, "learning_rate": 9.46878095365958e-06, "loss": 0.6422, "step": 14523 }, { "epoch": 0.17365104795609704, "grad_norm": 2.7095165252685547, "learning_rate": 9.468694102752659e-06, "loss": 0.695, "step": 14524 }, { "epoch": 0.1736630041009577, "grad_norm": 1.673147439956665, "learning_rate": 9.468607245144915e-06, "loss": 0.5699, "step": 14525 }, { "epoch": 0.17367496024581833, "grad_norm": 1.3970069885253906, "learning_rate": 9.46852038083648e-06, "loss": 0.5498, "step": 14526 }, { "epoch": 0.17368691639067899, "grad_norm": 1.919800043106079, "learning_rate": 9.46843350982748e-06, "loss": 0.5725, "step": 14527 }, { "epoch": 0.17369887253553964, "grad_norm": 2.9097366333007812, "learning_rate": 9.46834663211805e-06, "loss": 0.6144, "step": 14528 }, { "epoch": 0.1737108286804003, "grad_norm": 1.73665452003479, "learning_rate": 9.468259747708317e-06, "loss": 0.6459, "step": 14529 }, { "epoch": 0.17372278482526093, "grad_norm": 1.88288152217865, "learning_rate": 9.468172856598414e-06, "loss": 0.6804, "step": 14530 }, { "epoch": 0.1737347409701216, "grad_norm": 1.3814324140548706, "learning_rate": 9.46808595878847e-06, "loss": 0.5758, "step": 14531 }, { "epoch": 0.17374669711498225, "grad_norm": 1.9433974027633667, "learning_rate": 9.467999054278614e-06, "loss": 0.6786, "step": 14532 }, { "epoch": 0.1737586532598429, "grad_norm": 2.375183343887329, "learning_rate": 9.467912143068978e-06, "loss": 0.6955, "step": 14533 }, { "epoch": 0.17377060940470354, "grad_norm": 1.73577880859375, "learning_rate": 9.467825225159692e-06, "loss": 0.5497, "step": 14534 }, { "epoch": 0.1737825655495642, "grad_norm": 1.9313143491744995, "learning_rate": 9.467738300550888e-06, "loss": 0.6668, "step": 14535 }, { "epoch": 0.17379452169442486, "grad_norm": 1.5534625053405762, "learning_rate": 9.467651369242692e-06, "loss": 0.6579, "step": 14536 }, { "epoch": 0.1738064778392855, "grad_norm": 2.076902389526367, "learning_rate": 9.467564431235238e-06, "loss": 0.5457, "step": 14537 }, { "epoch": 0.17381843398414615, "grad_norm": 3.437936305999756, "learning_rate": 9.467477486528655e-06, "loss": 0.6197, "step": 14538 }, { "epoch": 0.1738303901290068, "grad_norm": 1.5398038625717163, "learning_rate": 9.467390535123072e-06, "loss": 0.6254, "step": 14539 }, { "epoch": 0.17384234627386747, "grad_norm": 4.7510151863098145, "learning_rate": 9.467303577018623e-06, "loss": 0.7233, "step": 14540 }, { "epoch": 0.1738543024187281, "grad_norm": 1.5710768699645996, "learning_rate": 9.467216612215438e-06, "loss": 0.6503, "step": 14541 }, { "epoch": 0.17386625856358875, "grad_norm": 5.540084362030029, "learning_rate": 9.467129640713643e-06, "loss": 0.587, "step": 14542 }, { "epoch": 0.1738782147084494, "grad_norm": 1.8231340646743774, "learning_rate": 9.467042662513372e-06, "loss": 0.6786, "step": 14543 }, { "epoch": 0.17389017085331007, "grad_norm": 2.289116859436035, "learning_rate": 9.466955677614755e-06, "loss": 0.5491, "step": 14544 }, { "epoch": 0.1739021269981707, "grad_norm": 4.515044212341309, "learning_rate": 9.46686868601792e-06, "loss": 0.6946, "step": 14545 }, { "epoch": 0.17391408314303136, "grad_norm": 1.8499797582626343, "learning_rate": 9.466781687723003e-06, "loss": 0.6234, "step": 14546 }, { "epoch": 0.17392603928789202, "grad_norm": 1.4394630193710327, "learning_rate": 9.466694682730128e-06, "loss": 0.5844, "step": 14547 }, { "epoch": 0.17393799543275265, "grad_norm": 5.251676082611084, "learning_rate": 9.46660767103943e-06, "loss": 0.6556, "step": 14548 }, { "epoch": 0.1739499515776133, "grad_norm": 5.427163600921631, "learning_rate": 9.466520652651037e-06, "loss": 0.6343, "step": 14549 }, { "epoch": 0.17396190772247397, "grad_norm": 5.456142902374268, "learning_rate": 9.466433627565081e-06, "loss": 0.5942, "step": 14550 }, { "epoch": 0.17397386386733463, "grad_norm": 2.340636968612671, "learning_rate": 9.466346595781693e-06, "loss": 0.6409, "step": 14551 }, { "epoch": 0.17398582001219526, "grad_norm": 4.803971290588379, "learning_rate": 9.466259557301e-06, "loss": 0.6653, "step": 14552 }, { "epoch": 0.17399777615705592, "grad_norm": 1.8094055652618408, "learning_rate": 9.466172512123138e-06, "loss": 0.6312, "step": 14553 }, { "epoch": 0.17400973230191658, "grad_norm": 1.740105390548706, "learning_rate": 9.466085460248233e-06, "loss": 0.7334, "step": 14554 }, { "epoch": 0.17402168844677723, "grad_norm": 2.1126818656921387, "learning_rate": 9.465998401676417e-06, "loss": 0.7631, "step": 14555 }, { "epoch": 0.17403364459163786, "grad_norm": 1.3564356565475464, "learning_rate": 9.46591133640782e-06, "loss": 0.6164, "step": 14556 }, { "epoch": 0.17404560073649852, "grad_norm": 1.7364792823791504, "learning_rate": 9.465824264442572e-06, "loss": 0.6062, "step": 14557 }, { "epoch": 0.17405755688135918, "grad_norm": 3.5026588439941406, "learning_rate": 9.465737185780806e-06, "loss": 0.5695, "step": 14558 }, { "epoch": 0.1740695130262198, "grad_norm": 1.58137845993042, "learning_rate": 9.465650100422652e-06, "loss": 0.6964, "step": 14559 }, { "epoch": 0.17408146917108047, "grad_norm": 1.6631197929382324, "learning_rate": 9.46556300836824e-06, "loss": 0.6631, "step": 14560 }, { "epoch": 0.17409342531594113, "grad_norm": 1.7617274522781372, "learning_rate": 9.465475909617699e-06, "loss": 0.6391, "step": 14561 }, { "epoch": 0.1741053814608018, "grad_norm": 2.453876256942749, "learning_rate": 9.46538880417116e-06, "loss": 0.5907, "step": 14562 }, { "epoch": 0.17411733760566242, "grad_norm": 3.836585760116577, "learning_rate": 9.465301692028757e-06, "loss": 0.6024, "step": 14563 }, { "epoch": 0.17412929375052308, "grad_norm": 4.675343990325928, "learning_rate": 9.465214573190616e-06, "loss": 0.6984, "step": 14564 }, { "epoch": 0.17414124989538374, "grad_norm": 1.5326006412506104, "learning_rate": 9.465127447656873e-06, "loss": 0.5916, "step": 14565 }, { "epoch": 0.1741532060402444, "grad_norm": 1.9875153303146362, "learning_rate": 9.465040315427652e-06, "loss": 0.6591, "step": 14566 }, { "epoch": 0.17416516218510503, "grad_norm": 1.5507898330688477, "learning_rate": 9.464953176503089e-06, "loss": 0.6235, "step": 14567 }, { "epoch": 0.17417711832996569, "grad_norm": 4.668177127838135, "learning_rate": 9.464866030883313e-06, "loss": 0.6583, "step": 14568 }, { "epoch": 0.17418907447482634, "grad_norm": 1.4276628494262695, "learning_rate": 9.464778878568454e-06, "loss": 0.5949, "step": 14569 }, { "epoch": 0.17420103061968698, "grad_norm": 3.0217056274414062, "learning_rate": 9.464691719558642e-06, "loss": 0.6142, "step": 14570 }, { "epoch": 0.17421298676454763, "grad_norm": 1.791818380355835, "learning_rate": 9.46460455385401e-06, "loss": 0.6268, "step": 14571 }, { "epoch": 0.1742249429094083, "grad_norm": 5.055515289306641, "learning_rate": 9.46451738145469e-06, "loss": 0.5861, "step": 14572 }, { "epoch": 0.17423689905426895, "grad_norm": 3.025311231613159, "learning_rate": 9.464430202360809e-06, "loss": 0.528, "step": 14573 }, { "epoch": 0.17424885519912958, "grad_norm": 4.635127067565918, "learning_rate": 9.464343016572497e-06, "loss": 0.7116, "step": 14574 }, { "epoch": 0.17426081134399024, "grad_norm": 4.532208442687988, "learning_rate": 9.464255824089888e-06, "loss": 0.5546, "step": 14575 }, { "epoch": 0.1742727674888509, "grad_norm": 2.469162940979004, "learning_rate": 9.46416862491311e-06, "loss": 0.6614, "step": 14576 }, { "epoch": 0.17428472363371156, "grad_norm": 2.621300220489502, "learning_rate": 9.464081419042298e-06, "loss": 0.7163, "step": 14577 }, { "epoch": 0.1742966797785722, "grad_norm": 1.6841459274291992, "learning_rate": 9.463994206477577e-06, "loss": 0.5503, "step": 14578 }, { "epoch": 0.17430863592343285, "grad_norm": 2.35971999168396, "learning_rate": 9.463906987219084e-06, "loss": 0.6505, "step": 14579 }, { "epoch": 0.1743205920682935, "grad_norm": 1.6447803974151611, "learning_rate": 9.463819761266946e-06, "loss": 0.6299, "step": 14580 }, { "epoch": 0.17433254821315414, "grad_norm": 7.601627349853516, "learning_rate": 9.463732528621292e-06, "loss": 0.6347, "step": 14581 }, { "epoch": 0.1743445043580148, "grad_norm": 1.7126826047897339, "learning_rate": 9.463645289282257e-06, "loss": 0.6449, "step": 14582 }, { "epoch": 0.17435646050287545, "grad_norm": 1.671392798423767, "learning_rate": 9.46355804324997e-06, "loss": 0.5978, "step": 14583 }, { "epoch": 0.1743684166477361, "grad_norm": 1.902420997619629, "learning_rate": 9.463470790524562e-06, "loss": 0.6279, "step": 14584 }, { "epoch": 0.17438037279259674, "grad_norm": 1.9013975858688354, "learning_rate": 9.463383531106163e-06, "loss": 0.7614, "step": 14585 }, { "epoch": 0.1743923289374574, "grad_norm": 1.6637136936187744, "learning_rate": 9.463296264994905e-06, "loss": 0.5729, "step": 14586 }, { "epoch": 0.17440428508231806, "grad_norm": 2.102686882019043, "learning_rate": 9.46320899219092e-06, "loss": 0.7282, "step": 14587 }, { "epoch": 0.17441624122717872, "grad_norm": 1.7571420669555664, "learning_rate": 9.463121712694335e-06, "loss": 0.5928, "step": 14588 }, { "epoch": 0.17442819737203935, "grad_norm": 1.8424381017684937, "learning_rate": 9.463034426505284e-06, "loss": 0.6229, "step": 14589 }, { "epoch": 0.1744401535169, "grad_norm": 2.0663368701934814, "learning_rate": 9.462947133623897e-06, "loss": 0.6751, "step": 14590 }, { "epoch": 0.17445210966176067, "grad_norm": 1.6997740268707275, "learning_rate": 9.462859834050304e-06, "loss": 0.6732, "step": 14591 }, { "epoch": 0.17446406580662133, "grad_norm": 1.4493765830993652, "learning_rate": 9.462772527784637e-06, "loss": 0.6027, "step": 14592 }, { "epoch": 0.17447602195148196, "grad_norm": 1.770011305809021, "learning_rate": 9.462685214827028e-06, "loss": 0.5786, "step": 14593 }, { "epoch": 0.17448797809634262, "grad_norm": 2.2107744216918945, "learning_rate": 9.462597895177605e-06, "loss": 0.6542, "step": 14594 }, { "epoch": 0.17449993424120328, "grad_norm": 2.0020272731781006, "learning_rate": 9.4625105688365e-06, "loss": 0.5743, "step": 14595 }, { "epoch": 0.1745118903860639, "grad_norm": 1.3341648578643799, "learning_rate": 9.462423235803848e-06, "loss": 0.6106, "step": 14596 }, { "epoch": 0.17452384653092456, "grad_norm": 1.9888741970062256, "learning_rate": 9.462335896079773e-06, "loss": 0.6203, "step": 14597 }, { "epoch": 0.17453580267578522, "grad_norm": 1.6076207160949707, "learning_rate": 9.462248549664413e-06, "loss": 0.5932, "step": 14598 }, { "epoch": 0.17454775882064588, "grad_norm": 1.6988017559051514, "learning_rate": 9.462161196557892e-06, "loss": 0.6728, "step": 14599 }, { "epoch": 0.1745597149655065, "grad_norm": 3.1226613521575928, "learning_rate": 9.462073836760347e-06, "loss": 0.7215, "step": 14600 }, { "epoch": 0.17457167111036717, "grad_norm": 1.9545351266860962, "learning_rate": 9.461986470271905e-06, "loss": 0.5219, "step": 14601 }, { "epoch": 0.17458362725522783, "grad_norm": 1.4554167985916138, "learning_rate": 9.461899097092699e-06, "loss": 0.6343, "step": 14602 }, { "epoch": 0.1745955834000885, "grad_norm": 1.8522130250930786, "learning_rate": 9.461811717222858e-06, "loss": 0.6587, "step": 14603 }, { "epoch": 0.17460753954494912, "grad_norm": 3.339155912399292, "learning_rate": 9.461724330662516e-06, "loss": 0.5855, "step": 14604 }, { "epoch": 0.17461949568980978, "grad_norm": 7.665992259979248, "learning_rate": 9.461636937411803e-06, "loss": 0.6863, "step": 14605 }, { "epoch": 0.17463145183467044, "grad_norm": 2.4457545280456543, "learning_rate": 9.461549537470847e-06, "loss": 0.6544, "step": 14606 }, { "epoch": 0.17464340797953107, "grad_norm": 2.0225841999053955, "learning_rate": 9.461462130839784e-06, "loss": 0.7132, "step": 14607 }, { "epoch": 0.17465536412439173, "grad_norm": 5.438364505767822, "learning_rate": 9.461374717518741e-06, "loss": 0.6499, "step": 14608 }, { "epoch": 0.17466732026925239, "grad_norm": 1.8149558305740356, "learning_rate": 9.461287297507853e-06, "loss": 0.6759, "step": 14609 }, { "epoch": 0.17467927641411304, "grad_norm": 1.7761021852493286, "learning_rate": 9.461199870807247e-06, "loss": 0.6544, "step": 14610 }, { "epoch": 0.17469123255897367, "grad_norm": 1.7508162260055542, "learning_rate": 9.461112437417056e-06, "loss": 0.5941, "step": 14611 }, { "epoch": 0.17470318870383433, "grad_norm": 2.8228869438171387, "learning_rate": 9.461024997337411e-06, "loss": 0.6216, "step": 14612 }, { "epoch": 0.174715144848695, "grad_norm": 3.761589765548706, "learning_rate": 9.460937550568445e-06, "loss": 0.5673, "step": 14613 }, { "epoch": 0.17472710099355565, "grad_norm": 2.7653045654296875, "learning_rate": 9.460850097110285e-06, "loss": 0.5694, "step": 14614 }, { "epoch": 0.17473905713841628, "grad_norm": 2.3498663902282715, "learning_rate": 9.460762636963066e-06, "loss": 0.626, "step": 14615 }, { "epoch": 0.17475101328327694, "grad_norm": 1.6824082136154175, "learning_rate": 9.460675170126915e-06, "loss": 0.6359, "step": 14616 }, { "epoch": 0.1747629694281376, "grad_norm": 3.7088630199432373, "learning_rate": 9.460587696601968e-06, "loss": 0.618, "step": 14617 }, { "epoch": 0.17477492557299823, "grad_norm": 2.422865629196167, "learning_rate": 9.460500216388355e-06, "loss": 0.6637, "step": 14618 }, { "epoch": 0.1747868817178589, "grad_norm": 1.6052703857421875, "learning_rate": 9.460412729486204e-06, "loss": 0.5893, "step": 14619 }, { "epoch": 0.17479883786271955, "grad_norm": 2.8859074115753174, "learning_rate": 9.460325235895648e-06, "loss": 0.6577, "step": 14620 }, { "epoch": 0.1748107940075802, "grad_norm": 17.426240921020508, "learning_rate": 9.46023773561682e-06, "loss": 0.6257, "step": 14621 }, { "epoch": 0.17482275015244084, "grad_norm": 1.6697468757629395, "learning_rate": 9.460150228649849e-06, "loss": 0.6449, "step": 14622 }, { "epoch": 0.1748347062973015, "grad_norm": 3.3158137798309326, "learning_rate": 9.460062714994867e-06, "loss": 0.637, "step": 14623 }, { "epoch": 0.17484666244216215, "grad_norm": 1.4755576848983765, "learning_rate": 9.459975194652004e-06, "loss": 0.5783, "step": 14624 }, { "epoch": 0.1748586185870228, "grad_norm": 2.228529214859009, "learning_rate": 9.459887667621394e-06, "loss": 0.6478, "step": 14625 }, { "epoch": 0.17487057473188344, "grad_norm": 2.35807204246521, "learning_rate": 9.459800133903167e-06, "loss": 0.6599, "step": 14626 }, { "epoch": 0.1748825308767441, "grad_norm": 2.8300323486328125, "learning_rate": 9.459712593497453e-06, "loss": 0.678, "step": 14627 }, { "epoch": 0.17489448702160476, "grad_norm": 2.278501033782959, "learning_rate": 9.459625046404383e-06, "loss": 0.6124, "step": 14628 }, { "epoch": 0.1749064431664654, "grad_norm": 1.9808191061019897, "learning_rate": 9.45953749262409e-06, "loss": 0.633, "step": 14629 }, { "epoch": 0.17491839931132605, "grad_norm": 1.6196539402008057, "learning_rate": 9.459449932156706e-06, "loss": 0.5452, "step": 14630 }, { "epoch": 0.1749303554561867, "grad_norm": 14.346561431884766, "learning_rate": 9.459362365002357e-06, "loss": 0.6752, "step": 14631 }, { "epoch": 0.17494231160104737, "grad_norm": 1.9696764945983887, "learning_rate": 9.459274791161184e-06, "loss": 0.5802, "step": 14632 }, { "epoch": 0.174954267745908, "grad_norm": 2.4287023544311523, "learning_rate": 9.45918721063331e-06, "loss": 0.5914, "step": 14633 }, { "epoch": 0.17496622389076866, "grad_norm": 3.1514382362365723, "learning_rate": 9.459099623418868e-06, "loss": 0.5596, "step": 14634 }, { "epoch": 0.17497818003562932, "grad_norm": 2.3823366165161133, "learning_rate": 9.45901202951799e-06, "loss": 0.6196, "step": 14635 }, { "epoch": 0.17499013618048997, "grad_norm": 1.8247323036193848, "learning_rate": 9.458924428930809e-06, "loss": 0.6181, "step": 14636 }, { "epoch": 0.1750020923253506, "grad_norm": 2.857537269592285, "learning_rate": 9.458836821657455e-06, "loss": 0.6105, "step": 14637 }, { "epoch": 0.17501404847021126, "grad_norm": 6.204147815704346, "learning_rate": 9.45874920769806e-06, "loss": 0.6988, "step": 14638 }, { "epoch": 0.17502600461507192, "grad_norm": 1.7261401414871216, "learning_rate": 9.458661587052752e-06, "loss": 0.5645, "step": 14639 }, { "epoch": 0.17503796075993255, "grad_norm": 1.7275021076202393, "learning_rate": 9.458573959721666e-06, "loss": 0.6389, "step": 14640 }, { "epoch": 0.1750499169047932, "grad_norm": 2.016383647918701, "learning_rate": 9.458486325704933e-06, "loss": 0.6515, "step": 14641 }, { "epoch": 0.17506187304965387, "grad_norm": 1.8335051536560059, "learning_rate": 9.458398685002685e-06, "loss": 0.722, "step": 14642 }, { "epoch": 0.17507382919451453, "grad_norm": 5.306227207183838, "learning_rate": 9.458311037615052e-06, "loss": 0.4925, "step": 14643 }, { "epoch": 0.17508578533937516, "grad_norm": 1.7585126161575317, "learning_rate": 9.458223383542165e-06, "loss": 0.6452, "step": 14644 }, { "epoch": 0.17509774148423582, "grad_norm": 1.78133225440979, "learning_rate": 9.458135722784157e-06, "loss": 0.5876, "step": 14645 }, { "epoch": 0.17510969762909648, "grad_norm": 2.104111433029175, "learning_rate": 9.458048055341156e-06, "loss": 0.7535, "step": 14646 }, { "epoch": 0.17512165377395714, "grad_norm": 1.8896924257278442, "learning_rate": 9.457960381213299e-06, "loss": 0.5999, "step": 14647 }, { "epoch": 0.17513360991881777, "grad_norm": 2.235043525695801, "learning_rate": 9.457872700400714e-06, "loss": 0.5874, "step": 14648 }, { "epoch": 0.17514556606367843, "grad_norm": 2.1684770584106445, "learning_rate": 9.457785012903532e-06, "loss": 0.704, "step": 14649 }, { "epoch": 0.17515752220853908, "grad_norm": 1.582734227180481, "learning_rate": 9.457697318721887e-06, "loss": 0.6688, "step": 14650 }, { "epoch": 0.17516947835339974, "grad_norm": 2.695950984954834, "learning_rate": 9.457609617855907e-06, "loss": 0.6204, "step": 14651 }, { "epoch": 0.17518143449826037, "grad_norm": 2.541210412979126, "learning_rate": 9.457521910305727e-06, "loss": 0.5695, "step": 14652 }, { "epoch": 0.17519339064312103, "grad_norm": 2.470414876937866, "learning_rate": 9.457434196071478e-06, "loss": 0.6863, "step": 14653 }, { "epoch": 0.1752053467879817, "grad_norm": 2.4905567169189453, "learning_rate": 9.457346475153289e-06, "loss": 0.6183, "step": 14654 }, { "epoch": 0.17521730293284232, "grad_norm": 2.4684996604919434, "learning_rate": 9.457258747551294e-06, "loss": 0.6437, "step": 14655 }, { "epoch": 0.17522925907770298, "grad_norm": 2.505561113357544, "learning_rate": 9.457171013265622e-06, "loss": 0.6794, "step": 14656 }, { "epoch": 0.17524121522256364, "grad_norm": 121.33014678955078, "learning_rate": 9.457083272296408e-06, "loss": 0.6957, "step": 14657 }, { "epoch": 0.1752531713674243, "grad_norm": 1.9320944547653198, "learning_rate": 9.456995524643782e-06, "loss": 0.676, "step": 14658 }, { "epoch": 0.17526512751228493, "grad_norm": 1.9120762348175049, "learning_rate": 9.456907770307875e-06, "loss": 0.6046, "step": 14659 }, { "epoch": 0.1752770836571456, "grad_norm": 1.545889973640442, "learning_rate": 9.456820009288818e-06, "loss": 0.5873, "step": 14660 }, { "epoch": 0.17528903980200625, "grad_norm": 1.6953883171081543, "learning_rate": 9.456732241586744e-06, "loss": 0.5893, "step": 14661 }, { "epoch": 0.1753009959468669, "grad_norm": 2.376352310180664, "learning_rate": 9.456644467201783e-06, "loss": 0.6025, "step": 14662 }, { "epoch": 0.17531295209172754, "grad_norm": 1.5372278690338135, "learning_rate": 9.45655668613407e-06, "loss": 0.5685, "step": 14663 }, { "epoch": 0.1753249082365882, "grad_norm": 1.7404807806015015, "learning_rate": 9.456468898383733e-06, "loss": 0.6103, "step": 14664 }, { "epoch": 0.17533686438144885, "grad_norm": 5.2755656242370605, "learning_rate": 9.456381103950905e-06, "loss": 0.5587, "step": 14665 }, { "epoch": 0.17534882052630948, "grad_norm": 1.9170633554458618, "learning_rate": 9.45629330283572e-06, "loss": 0.6205, "step": 14666 }, { "epoch": 0.17536077667117014, "grad_norm": 8.822287559509277, "learning_rate": 9.456205495038304e-06, "loss": 0.5969, "step": 14667 }, { "epoch": 0.1753727328160308, "grad_norm": 2.379282236099243, "learning_rate": 9.456117680558793e-06, "loss": 0.6641, "step": 14668 }, { "epoch": 0.17538468896089146, "grad_norm": 2.708949565887451, "learning_rate": 9.45602985939732e-06, "loss": 0.6116, "step": 14669 }, { "epoch": 0.1753966451057521, "grad_norm": 2.293259382247925, "learning_rate": 9.455942031554012e-06, "loss": 0.6638, "step": 14670 }, { "epoch": 0.17540860125061275, "grad_norm": 3.283539295196533, "learning_rate": 9.455854197029004e-06, "loss": 0.6343, "step": 14671 }, { "epoch": 0.1754205573954734, "grad_norm": 3.0034849643707275, "learning_rate": 9.455766355822426e-06, "loss": 0.6516, "step": 14672 }, { "epoch": 0.17543251354033407, "grad_norm": 1.9142423868179321, "learning_rate": 9.455678507934411e-06, "loss": 0.708, "step": 14673 }, { "epoch": 0.1754444696851947, "grad_norm": 2.395796060562134, "learning_rate": 9.45559065336509e-06, "loss": 0.6099, "step": 14674 }, { "epoch": 0.17545642583005536, "grad_norm": 1.6511924266815186, "learning_rate": 9.455502792114595e-06, "loss": 0.6804, "step": 14675 }, { "epoch": 0.17546838197491602, "grad_norm": 2.3821897506713867, "learning_rate": 9.455414924183059e-06, "loss": 0.632, "step": 14676 }, { "epoch": 0.17548033811977665, "grad_norm": 2.1759605407714844, "learning_rate": 9.45532704957061e-06, "loss": 0.6154, "step": 14677 }, { "epoch": 0.1754922942646373, "grad_norm": 3.117191791534424, "learning_rate": 9.455239168277387e-06, "loss": 0.6073, "step": 14678 }, { "epoch": 0.17550425040949796, "grad_norm": 3.6523444652557373, "learning_rate": 9.455151280303512e-06, "loss": 0.6143, "step": 14679 }, { "epoch": 0.17551620655435862, "grad_norm": 4.119846820831299, "learning_rate": 9.455063385649123e-06, "loss": 0.6099, "step": 14680 }, { "epoch": 0.17552816269921925, "grad_norm": 1.5277501344680786, "learning_rate": 9.454975484314352e-06, "loss": 0.6915, "step": 14681 }, { "epoch": 0.1755401188440799, "grad_norm": 2.874729633331299, "learning_rate": 9.454887576299328e-06, "loss": 0.5258, "step": 14682 }, { "epoch": 0.17555207498894057, "grad_norm": 3.1642353534698486, "learning_rate": 9.454799661604184e-06, "loss": 0.5558, "step": 14683 }, { "epoch": 0.17556403113380123, "grad_norm": 2.668483018875122, "learning_rate": 9.454711740229055e-06, "loss": 0.6872, "step": 14684 }, { "epoch": 0.17557598727866186, "grad_norm": 1.8426686525344849, "learning_rate": 9.454623812174066e-06, "loss": 0.5431, "step": 14685 }, { "epoch": 0.17558794342352252, "grad_norm": 1.6706651449203491, "learning_rate": 9.454535877439355e-06, "loss": 0.719, "step": 14686 }, { "epoch": 0.17559989956838318, "grad_norm": 2.7727346420288086, "learning_rate": 9.454447936025052e-06, "loss": 0.6299, "step": 14687 }, { "epoch": 0.1756118557132438, "grad_norm": 1.6588587760925293, "learning_rate": 9.454359987931288e-06, "loss": 0.6138, "step": 14688 }, { "epoch": 0.17562381185810447, "grad_norm": 1.9495222568511963, "learning_rate": 9.454272033158194e-06, "loss": 0.5827, "step": 14689 }, { "epoch": 0.17563576800296513, "grad_norm": 1.3490341901779175, "learning_rate": 9.454184071705906e-06, "loss": 0.6647, "step": 14690 }, { "epoch": 0.17564772414782578, "grad_norm": 1.7783946990966797, "learning_rate": 9.45409610357455e-06, "loss": 0.5688, "step": 14691 }, { "epoch": 0.17565968029268642, "grad_norm": 1.6291064023971558, "learning_rate": 9.454008128764263e-06, "loss": 0.528, "step": 14692 }, { "epoch": 0.17567163643754707, "grad_norm": 3.661452531814575, "learning_rate": 9.453920147275174e-06, "loss": 0.6566, "step": 14693 }, { "epoch": 0.17568359258240773, "grad_norm": 3.14229416847229, "learning_rate": 9.453832159107416e-06, "loss": 0.6329, "step": 14694 }, { "epoch": 0.1756955487272684, "grad_norm": 3.7476658821105957, "learning_rate": 9.453744164261121e-06, "loss": 0.696, "step": 14695 }, { "epoch": 0.17570750487212902, "grad_norm": 2.2201318740844727, "learning_rate": 9.453656162736422e-06, "loss": 0.6365, "step": 14696 }, { "epoch": 0.17571946101698968, "grad_norm": 3.125272274017334, "learning_rate": 9.453568154533448e-06, "loss": 0.6182, "step": 14697 }, { "epoch": 0.17573141716185034, "grad_norm": 2.5367279052734375, "learning_rate": 9.453480139652333e-06, "loss": 0.4836, "step": 14698 }, { "epoch": 0.17574337330671097, "grad_norm": 2.1156985759735107, "learning_rate": 9.45339211809321e-06, "loss": 0.6068, "step": 14699 }, { "epoch": 0.17575532945157163, "grad_norm": 4.422614097595215, "learning_rate": 9.45330408985621e-06, "loss": 0.6275, "step": 14700 }, { "epoch": 0.1757672855964323, "grad_norm": 1.718319058418274, "learning_rate": 9.453216054941462e-06, "loss": 0.573, "step": 14701 }, { "epoch": 0.17577924174129295, "grad_norm": 1.7960060834884644, "learning_rate": 9.453128013349103e-06, "loss": 0.5978, "step": 14702 }, { "epoch": 0.17579119788615358, "grad_norm": 2.300635814666748, "learning_rate": 9.453039965079263e-06, "loss": 0.641, "step": 14703 }, { "epoch": 0.17580315403101424, "grad_norm": 4.245604515075684, "learning_rate": 9.452951910132072e-06, "loss": 0.6389, "step": 14704 }, { "epoch": 0.1758151101758749, "grad_norm": 1.7523016929626465, "learning_rate": 9.452863848507665e-06, "loss": 0.6727, "step": 14705 }, { "epoch": 0.17582706632073555, "grad_norm": 2.1985621452331543, "learning_rate": 9.452775780206171e-06, "loss": 0.5966, "step": 14706 }, { "epoch": 0.17583902246559618, "grad_norm": 1.8929073810577393, "learning_rate": 9.452687705227727e-06, "loss": 0.6292, "step": 14707 }, { "epoch": 0.17585097861045684, "grad_norm": 1.887194037437439, "learning_rate": 9.45259962357246e-06, "loss": 0.5186, "step": 14708 }, { "epoch": 0.1758629347553175, "grad_norm": 3.9512364864349365, "learning_rate": 9.452511535240504e-06, "loss": 0.6421, "step": 14709 }, { "epoch": 0.17587489090017816, "grad_norm": 1.825988531112671, "learning_rate": 9.452423440231992e-06, "loss": 0.5786, "step": 14710 }, { "epoch": 0.1758868470450388, "grad_norm": 3.6376960277557373, "learning_rate": 9.452335338547055e-06, "loss": 0.6115, "step": 14711 }, { "epoch": 0.17589880318989945, "grad_norm": 1.7585567235946655, "learning_rate": 9.452247230185824e-06, "loss": 0.6353, "step": 14712 }, { "epoch": 0.1759107593347601, "grad_norm": 1.563964605331421, "learning_rate": 9.452159115148434e-06, "loss": 0.6705, "step": 14713 }, { "epoch": 0.17592271547962074, "grad_norm": 1.6050578355789185, "learning_rate": 9.452070993435015e-06, "loss": 0.5441, "step": 14714 }, { "epoch": 0.1759346716244814, "grad_norm": 1.5116326808929443, "learning_rate": 9.451982865045699e-06, "loss": 0.5996, "step": 14715 }, { "epoch": 0.17594662776934206, "grad_norm": 1.5498323440551758, "learning_rate": 9.45189472998062e-06, "loss": 0.5885, "step": 14716 }, { "epoch": 0.17595858391420272, "grad_norm": 1.4406553506851196, "learning_rate": 9.451806588239907e-06, "loss": 0.5699, "step": 14717 }, { "epoch": 0.17597054005906335, "grad_norm": 3.542893171310425, "learning_rate": 9.451718439823696e-06, "loss": 0.7028, "step": 14718 }, { "epoch": 0.175982496203924, "grad_norm": 2.6928441524505615, "learning_rate": 9.451630284732119e-06, "loss": 0.6121, "step": 14719 }, { "epoch": 0.17599445234878466, "grad_norm": 2.8835043907165527, "learning_rate": 9.451542122965303e-06, "loss": 0.5713, "step": 14720 }, { "epoch": 0.17600640849364532, "grad_norm": 4.6630024909973145, "learning_rate": 9.451453954523386e-06, "loss": 0.628, "step": 14721 }, { "epoch": 0.17601836463850595, "grad_norm": 1.9770081043243408, "learning_rate": 9.451365779406499e-06, "loss": 0.7191, "step": 14722 }, { "epoch": 0.1760303207833666, "grad_norm": 3.9354708194732666, "learning_rate": 9.45127759761477e-06, "loss": 0.6475, "step": 14723 }, { "epoch": 0.17604227692822727, "grad_norm": 2.4903404712677, "learning_rate": 9.451189409148335e-06, "loss": 0.6086, "step": 14724 }, { "epoch": 0.1760542330730879, "grad_norm": 1.7603237628936768, "learning_rate": 9.451101214007327e-06, "loss": 0.6635, "step": 14725 }, { "epoch": 0.17606618921794856, "grad_norm": 3.817390203475952, "learning_rate": 9.45101301219188e-06, "loss": 0.6512, "step": 14726 }, { "epoch": 0.17607814536280922, "grad_norm": 2.1956844329833984, "learning_rate": 9.450924803702117e-06, "loss": 0.6519, "step": 14727 }, { "epoch": 0.17609010150766988, "grad_norm": 2.4001283645629883, "learning_rate": 9.45083658853818e-06, "loss": 0.707, "step": 14728 }, { "epoch": 0.1761020576525305, "grad_norm": 2.386967420578003, "learning_rate": 9.450748366700198e-06, "loss": 0.5731, "step": 14729 }, { "epoch": 0.17611401379739117, "grad_norm": 2.6862974166870117, "learning_rate": 9.4506601381883e-06, "loss": 0.5952, "step": 14730 }, { "epoch": 0.17612596994225183, "grad_norm": 2.8233025074005127, "learning_rate": 9.450571903002624e-06, "loss": 0.6048, "step": 14731 }, { "epoch": 0.17613792608711248, "grad_norm": 1.8901132345199585, "learning_rate": 9.450483661143299e-06, "loss": 0.6737, "step": 14732 }, { "epoch": 0.17614988223197312, "grad_norm": 9.976083755493164, "learning_rate": 9.450395412610458e-06, "loss": 0.6008, "step": 14733 }, { "epoch": 0.17616183837683377, "grad_norm": 3.2783899307250977, "learning_rate": 9.450307157404233e-06, "loss": 0.5997, "step": 14734 }, { "epoch": 0.17617379452169443, "grad_norm": 2.5600149631500244, "learning_rate": 9.450218895524756e-06, "loss": 0.6065, "step": 14735 }, { "epoch": 0.17618575066655506, "grad_norm": 1.859309434890747, "learning_rate": 9.450130626972161e-06, "loss": 0.6704, "step": 14736 }, { "epoch": 0.17619770681141572, "grad_norm": 4.222996234893799, "learning_rate": 9.45004235174658e-06, "loss": 0.5407, "step": 14737 }, { "epoch": 0.17620966295627638, "grad_norm": 1.9355753660202026, "learning_rate": 9.449954069848143e-06, "loss": 0.5407, "step": 14738 }, { "epoch": 0.17622161910113704, "grad_norm": 25.409955978393555, "learning_rate": 9.449865781276985e-06, "loss": 0.6645, "step": 14739 }, { "epoch": 0.17623357524599767, "grad_norm": 2.582545042037964, "learning_rate": 9.449777486033235e-06, "loss": 0.6255, "step": 14740 }, { "epoch": 0.17624553139085833, "grad_norm": 1.8254648447036743, "learning_rate": 9.449689184117031e-06, "loss": 0.6162, "step": 14741 }, { "epoch": 0.176257487535719, "grad_norm": 2.4404919147491455, "learning_rate": 9.449600875528501e-06, "loss": 0.6692, "step": 14742 }, { "epoch": 0.17626944368057965, "grad_norm": 2.571730852127075, "learning_rate": 9.449512560267779e-06, "loss": 0.5954, "step": 14743 }, { "epoch": 0.17628139982544028, "grad_norm": 1.985718846321106, "learning_rate": 9.449424238334997e-06, "loss": 0.6539, "step": 14744 }, { "epoch": 0.17629335597030094, "grad_norm": 1.7634553909301758, "learning_rate": 9.449335909730289e-06, "loss": 0.6249, "step": 14745 }, { "epoch": 0.1763053121151616, "grad_norm": 4.351729869842529, "learning_rate": 9.449247574453784e-06, "loss": 0.8077, "step": 14746 }, { "epoch": 0.17631726826002223, "grad_norm": 2.4628407955169678, "learning_rate": 9.449159232505616e-06, "loss": 0.5928, "step": 14747 }, { "epoch": 0.17632922440488288, "grad_norm": 3.7058136463165283, "learning_rate": 9.44907088388592e-06, "loss": 0.6861, "step": 14748 }, { "epoch": 0.17634118054974354, "grad_norm": 1.4477882385253906, "learning_rate": 9.448982528594824e-06, "loss": 0.678, "step": 14749 }, { "epoch": 0.1763531366946042, "grad_norm": 2.1911821365356445, "learning_rate": 9.448894166632466e-06, "loss": 0.6559, "step": 14750 }, { "epoch": 0.17636509283946483, "grad_norm": 1.8270668983459473, "learning_rate": 9.448805797998973e-06, "loss": 0.5767, "step": 14751 }, { "epoch": 0.1763770489843255, "grad_norm": 1.3906571865081787, "learning_rate": 9.44871742269448e-06, "loss": 0.6295, "step": 14752 }, { "epoch": 0.17638900512918615, "grad_norm": 2.794740915298462, "learning_rate": 9.448629040719119e-06, "loss": 0.6222, "step": 14753 }, { "epoch": 0.1764009612740468, "grad_norm": 5.25739860534668, "learning_rate": 9.448540652073024e-06, "loss": 0.543, "step": 14754 }, { "epoch": 0.17641291741890744, "grad_norm": 15.572368621826172, "learning_rate": 9.448452256756327e-06, "loss": 0.5761, "step": 14755 }, { "epoch": 0.1764248735637681, "grad_norm": 2.00719952583313, "learning_rate": 9.448363854769158e-06, "loss": 0.5422, "step": 14756 }, { "epoch": 0.17643682970862876, "grad_norm": 1.9190682172775269, "learning_rate": 9.448275446111654e-06, "loss": 0.5945, "step": 14757 }, { "epoch": 0.1764487858534894, "grad_norm": 3.324424982070923, "learning_rate": 9.448187030783944e-06, "loss": 0.6754, "step": 14758 }, { "epoch": 0.17646074199835005, "grad_norm": 2.3764266967773438, "learning_rate": 9.448098608786162e-06, "loss": 0.6149, "step": 14759 }, { "epoch": 0.1764726981432107, "grad_norm": 3.1043686866760254, "learning_rate": 9.44801018011844e-06, "loss": 0.6156, "step": 14760 }, { "epoch": 0.17648465428807136, "grad_norm": 1.60678231716156, "learning_rate": 9.447921744780911e-06, "loss": 0.587, "step": 14761 }, { "epoch": 0.176496610432932, "grad_norm": 2.075206756591797, "learning_rate": 9.447833302773707e-06, "loss": 0.5759, "step": 14762 }, { "epoch": 0.17650856657779265, "grad_norm": 3.193864583969116, "learning_rate": 9.447744854096961e-06, "loss": 0.6698, "step": 14763 }, { "epoch": 0.1765205227226533, "grad_norm": 5.966070652008057, "learning_rate": 9.447656398750808e-06, "loss": 0.5879, "step": 14764 }, { "epoch": 0.17653247886751397, "grad_norm": 7.019155502319336, "learning_rate": 9.447567936735375e-06, "loss": 0.6041, "step": 14765 }, { "epoch": 0.1765444350123746, "grad_norm": 1.8199987411499023, "learning_rate": 9.447479468050801e-06, "loss": 0.648, "step": 14766 }, { "epoch": 0.17655639115723526, "grad_norm": 1.506522297859192, "learning_rate": 9.447390992697215e-06, "loss": 0.6043, "step": 14767 }, { "epoch": 0.17656834730209592, "grad_norm": 1.7729713916778564, "learning_rate": 9.44730251067475e-06, "loss": 0.6112, "step": 14768 }, { "epoch": 0.17658030344695658, "grad_norm": 7.718502998352051, "learning_rate": 9.447214021983539e-06, "loss": 0.6645, "step": 14769 }, { "epoch": 0.1765922595918172, "grad_norm": 1.822212815284729, "learning_rate": 9.447125526623715e-06, "loss": 0.7017, "step": 14770 }, { "epoch": 0.17660421573667787, "grad_norm": 5.263144493103027, "learning_rate": 9.447037024595412e-06, "loss": 0.562, "step": 14771 }, { "epoch": 0.17661617188153853, "grad_norm": 1.6265380382537842, "learning_rate": 9.446948515898758e-06, "loss": 0.614, "step": 14772 }, { "epoch": 0.17662812802639916, "grad_norm": 2.878181219100952, "learning_rate": 9.44686000053389e-06, "loss": 0.5898, "step": 14773 }, { "epoch": 0.17664008417125981, "grad_norm": 2.048279285430908, "learning_rate": 9.44677147850094e-06, "loss": 0.701, "step": 14774 }, { "epoch": 0.17665204031612047, "grad_norm": 2.1094579696655273, "learning_rate": 9.44668294980004e-06, "loss": 0.7146, "step": 14775 }, { "epoch": 0.17666399646098113, "grad_norm": 2.7388625144958496, "learning_rate": 9.446594414431324e-06, "loss": 0.53, "step": 14776 }, { "epoch": 0.17667595260584176, "grad_norm": 3.1940114498138428, "learning_rate": 9.446505872394925e-06, "loss": 0.5959, "step": 14777 }, { "epoch": 0.17668790875070242, "grad_norm": 1.7974023818969727, "learning_rate": 9.446417323690972e-06, "loss": 0.6489, "step": 14778 }, { "epoch": 0.17669986489556308, "grad_norm": 2.5297272205352783, "learning_rate": 9.446328768319601e-06, "loss": 0.6206, "step": 14779 }, { "epoch": 0.17671182104042374, "grad_norm": 2.3273465633392334, "learning_rate": 9.446240206280946e-06, "loss": 0.7212, "step": 14780 }, { "epoch": 0.17672377718528437, "grad_norm": 9.251888275146484, "learning_rate": 9.446151637575135e-06, "loss": 0.5738, "step": 14781 }, { "epoch": 0.17673573333014503, "grad_norm": 2.616853952407837, "learning_rate": 9.446063062202306e-06, "loss": 0.6602, "step": 14782 }, { "epoch": 0.1767476894750057, "grad_norm": 5.63323450088501, "learning_rate": 9.44597448016259e-06, "loss": 0.5434, "step": 14783 }, { "epoch": 0.17675964561986632, "grad_norm": 4.4824538230896, "learning_rate": 9.445885891456118e-06, "loss": 0.6235, "step": 14784 }, { "epoch": 0.17677160176472698, "grad_norm": 2.9507596492767334, "learning_rate": 9.445797296083025e-06, "loss": 0.5317, "step": 14785 }, { "epoch": 0.17678355790958764, "grad_norm": 2.220057725906372, "learning_rate": 9.445708694043442e-06, "loss": 0.5343, "step": 14786 }, { "epoch": 0.1767955140544483, "grad_norm": 1.94647216796875, "learning_rate": 9.445620085337505e-06, "loss": 0.5979, "step": 14787 }, { "epoch": 0.17680747019930892, "grad_norm": 2.5714797973632812, "learning_rate": 9.445531469965344e-06, "loss": 0.6935, "step": 14788 }, { "epoch": 0.17681942634416958, "grad_norm": 1.8426144123077393, "learning_rate": 9.445442847927093e-06, "loss": 0.5573, "step": 14789 }, { "epoch": 0.17683138248903024, "grad_norm": 2.6946260929107666, "learning_rate": 9.445354219222884e-06, "loss": 0.6037, "step": 14790 }, { "epoch": 0.1768433386338909, "grad_norm": 5.474420070648193, "learning_rate": 9.445265583852851e-06, "loss": 0.6638, "step": 14791 }, { "epoch": 0.17685529477875153, "grad_norm": 2.241481304168701, "learning_rate": 9.445176941817127e-06, "loss": 0.6294, "step": 14792 }, { "epoch": 0.1768672509236122, "grad_norm": 1.8828142881393433, "learning_rate": 9.445088293115842e-06, "loss": 0.6463, "step": 14793 }, { "epoch": 0.17687920706847285, "grad_norm": 5.762012481689453, "learning_rate": 9.444999637749135e-06, "loss": 0.6687, "step": 14794 }, { "epoch": 0.17689116321333348, "grad_norm": 1.5099990367889404, "learning_rate": 9.444910975717132e-06, "loss": 0.6074, "step": 14795 }, { "epoch": 0.17690311935819414, "grad_norm": 2.0763533115386963, "learning_rate": 9.444822307019972e-06, "loss": 0.6125, "step": 14796 }, { "epoch": 0.1769150755030548, "grad_norm": 2.797858476638794, "learning_rate": 9.444733631657783e-06, "loss": 0.5953, "step": 14797 }, { "epoch": 0.17692703164791546, "grad_norm": 1.8300468921661377, "learning_rate": 9.444644949630702e-06, "loss": 0.5534, "step": 14798 }, { "epoch": 0.1769389877927761, "grad_norm": 2.568315267562866, "learning_rate": 9.444556260938858e-06, "loss": 0.5633, "step": 14799 }, { "epoch": 0.17695094393763675, "grad_norm": 1.8588261604309082, "learning_rate": 9.444467565582387e-06, "loss": 0.6823, "step": 14800 }, { "epoch": 0.1769629000824974, "grad_norm": 1.9231009483337402, "learning_rate": 9.44437886356142e-06, "loss": 0.6517, "step": 14801 }, { "epoch": 0.17697485622735806, "grad_norm": 1.3999789953231812, "learning_rate": 9.444290154876093e-06, "loss": 0.5403, "step": 14802 }, { "epoch": 0.1769868123722187, "grad_norm": 1.7408219575881958, "learning_rate": 9.444201439526538e-06, "loss": 0.7237, "step": 14803 }, { "epoch": 0.17699876851707935, "grad_norm": 1.607816457748413, "learning_rate": 9.444112717512883e-06, "loss": 0.6149, "step": 14804 }, { "epoch": 0.17701072466194, "grad_norm": 1.953822135925293, "learning_rate": 9.444023988835268e-06, "loss": 0.5936, "step": 14805 }, { "epoch": 0.17702268080680064, "grad_norm": 2.217421293258667, "learning_rate": 9.443935253493824e-06, "loss": 0.5659, "step": 14806 }, { "epoch": 0.1770346369516613, "grad_norm": 3.9073972702026367, "learning_rate": 9.443846511488682e-06, "loss": 0.5629, "step": 14807 }, { "epoch": 0.17704659309652196, "grad_norm": 1.6291680335998535, "learning_rate": 9.443757762819976e-06, "loss": 0.6374, "step": 14808 }, { "epoch": 0.17705854924138262, "grad_norm": 3.6566874980926514, "learning_rate": 9.443669007487838e-06, "loss": 0.6284, "step": 14809 }, { "epoch": 0.17707050538624325, "grad_norm": 2.2216291427612305, "learning_rate": 9.443580245492406e-06, "loss": 0.618, "step": 14810 }, { "epoch": 0.1770824615311039, "grad_norm": 2.1855249404907227, "learning_rate": 9.443491476833809e-06, "loss": 0.8075, "step": 14811 }, { "epoch": 0.17709441767596457, "grad_norm": 6.297348499298096, "learning_rate": 9.44340270151218e-06, "loss": 0.6429, "step": 14812 }, { "epoch": 0.17710637382082522, "grad_norm": 1.9139612913131714, "learning_rate": 9.443313919527652e-06, "loss": 0.636, "step": 14813 }, { "epoch": 0.17711832996568586, "grad_norm": 2.300224781036377, "learning_rate": 9.443225130880358e-06, "loss": 0.652, "step": 14814 }, { "epoch": 0.17713028611054651, "grad_norm": 4.11099100112915, "learning_rate": 9.443136335570435e-06, "loss": 0.6574, "step": 14815 }, { "epoch": 0.17714224225540717, "grad_norm": 2.7689545154571533, "learning_rate": 9.443047533598012e-06, "loss": 0.6024, "step": 14816 }, { "epoch": 0.1771541984002678, "grad_norm": 1.5576553344726562, "learning_rate": 9.442958724963222e-06, "loss": 0.6491, "step": 14817 }, { "epoch": 0.17716615454512846, "grad_norm": 3.138313055038452, "learning_rate": 9.442869909666201e-06, "loss": 0.5525, "step": 14818 }, { "epoch": 0.17717811068998912, "grad_norm": 2.3762552738189697, "learning_rate": 9.442781087707082e-06, "loss": 0.4513, "step": 14819 }, { "epoch": 0.17719006683484978, "grad_norm": 2.1784865856170654, "learning_rate": 9.442692259085996e-06, "loss": 0.6139, "step": 14820 }, { "epoch": 0.1772020229797104, "grad_norm": 3.598254919052124, "learning_rate": 9.442603423803076e-06, "loss": 0.6447, "step": 14821 }, { "epoch": 0.17721397912457107, "grad_norm": 2.314732313156128, "learning_rate": 9.442514581858458e-06, "loss": 0.6408, "step": 14822 }, { "epoch": 0.17722593526943173, "grad_norm": 1.3533998727798462, "learning_rate": 9.442425733252273e-06, "loss": 0.5512, "step": 14823 }, { "epoch": 0.1772378914142924, "grad_norm": 2.710512399673462, "learning_rate": 9.442336877984656e-06, "loss": 0.6191, "step": 14824 }, { "epoch": 0.17724984755915302, "grad_norm": 2.6665585041046143, "learning_rate": 9.442248016055737e-06, "loss": 0.6624, "step": 14825 }, { "epoch": 0.17726180370401368, "grad_norm": 1.795966625213623, "learning_rate": 9.442159147465651e-06, "loss": 0.5306, "step": 14826 }, { "epoch": 0.17727375984887433, "grad_norm": 2.3759942054748535, "learning_rate": 9.442070272214534e-06, "loss": 0.6716, "step": 14827 }, { "epoch": 0.177285715993735, "grad_norm": 2.413769245147705, "learning_rate": 9.441981390302514e-06, "loss": 0.5968, "step": 14828 }, { "epoch": 0.17729767213859562, "grad_norm": 1.4818929433822632, "learning_rate": 9.44189250172973e-06, "loss": 0.5797, "step": 14829 }, { "epoch": 0.17730962828345628, "grad_norm": 6.058516979217529, "learning_rate": 9.441803606496312e-06, "loss": 0.6769, "step": 14830 }, { "epoch": 0.17732158442831694, "grad_norm": 2.966707944869995, "learning_rate": 9.441714704602392e-06, "loss": 0.7288, "step": 14831 }, { "epoch": 0.17733354057317757, "grad_norm": 2.011239528656006, "learning_rate": 9.441625796048104e-06, "loss": 0.486, "step": 14832 }, { "epoch": 0.17734549671803823, "grad_norm": 2.488783597946167, "learning_rate": 9.441536880833585e-06, "loss": 0.5353, "step": 14833 }, { "epoch": 0.1773574528628989, "grad_norm": 1.9221885204315186, "learning_rate": 9.441447958958965e-06, "loss": 0.7634, "step": 14834 }, { "epoch": 0.17736940900775955, "grad_norm": 3.7858457565307617, "learning_rate": 9.441359030424377e-06, "loss": 0.5666, "step": 14835 }, { "epoch": 0.17738136515262018, "grad_norm": 1.9800920486450195, "learning_rate": 9.441270095229958e-06, "loss": 0.6928, "step": 14836 }, { "epoch": 0.17739332129748084, "grad_norm": 1.811123251914978, "learning_rate": 9.441181153375835e-06, "loss": 0.6084, "step": 14837 }, { "epoch": 0.1774052774423415, "grad_norm": 2.4886677265167236, "learning_rate": 9.441092204862148e-06, "loss": 0.6487, "step": 14838 }, { "epoch": 0.17741723358720216, "grad_norm": 2.3601651191711426, "learning_rate": 9.441003249689026e-06, "loss": 0.7302, "step": 14839 }, { "epoch": 0.1774291897320628, "grad_norm": 3.3372955322265625, "learning_rate": 9.440914287856605e-06, "loss": 0.571, "step": 14840 }, { "epoch": 0.17744114587692345, "grad_norm": 1.4327472448349, "learning_rate": 9.440825319365014e-06, "loss": 0.5883, "step": 14841 }, { "epoch": 0.1774531020217841, "grad_norm": 3.9112868309020996, "learning_rate": 9.440736344214394e-06, "loss": 0.6231, "step": 14842 }, { "epoch": 0.17746505816664473, "grad_norm": 1.7705034017562866, "learning_rate": 9.440647362404872e-06, "loss": 0.6138, "step": 14843 }, { "epoch": 0.1774770143115054, "grad_norm": 2.0760326385498047, "learning_rate": 9.440558373936583e-06, "loss": 0.6022, "step": 14844 }, { "epoch": 0.17748897045636605, "grad_norm": 6.473458766937256, "learning_rate": 9.440469378809662e-06, "loss": 0.6438, "step": 14845 }, { "epoch": 0.1775009266012267, "grad_norm": 2.0155978202819824, "learning_rate": 9.44038037702424e-06, "loss": 0.6966, "step": 14846 }, { "epoch": 0.17751288274608734, "grad_norm": 1.7869105339050293, "learning_rate": 9.440291368580455e-06, "loss": 0.6415, "step": 14847 }, { "epoch": 0.177524838890948, "grad_norm": 2.246905565261841, "learning_rate": 9.440202353478433e-06, "loss": 0.6994, "step": 14848 }, { "epoch": 0.17753679503580866, "grad_norm": 2.1829214096069336, "learning_rate": 9.440113331718314e-06, "loss": 0.6126, "step": 14849 }, { "epoch": 0.17754875118066932, "grad_norm": 3.035313844680786, "learning_rate": 9.44002430330023e-06, "loss": 0.6251, "step": 14850 }, { "epoch": 0.17756070732552995, "grad_norm": 2.510652542114258, "learning_rate": 9.439935268224313e-06, "loss": 0.6135, "step": 14851 }, { "epoch": 0.1775726634703906, "grad_norm": 1.9488961696624756, "learning_rate": 9.439846226490696e-06, "loss": 0.6555, "step": 14852 }, { "epoch": 0.17758461961525127, "grad_norm": 1.8287410736083984, "learning_rate": 9.439757178099515e-06, "loss": 0.5842, "step": 14853 }, { "epoch": 0.1775965757601119, "grad_norm": 3.83410382270813, "learning_rate": 9.439668123050902e-06, "loss": 0.5522, "step": 14854 }, { "epoch": 0.17760853190497256, "grad_norm": 4.417965888977051, "learning_rate": 9.439579061344992e-06, "loss": 0.5913, "step": 14855 }, { "epoch": 0.17762048804983321, "grad_norm": 2.3115358352661133, "learning_rate": 9.439489992981918e-06, "loss": 0.5287, "step": 14856 }, { "epoch": 0.17763244419469387, "grad_norm": 1.8440966606140137, "learning_rate": 9.43940091796181e-06, "loss": 0.6491, "step": 14857 }, { "epoch": 0.1776444003395545, "grad_norm": 2.0835132598876953, "learning_rate": 9.439311836284807e-06, "loss": 0.5972, "step": 14858 }, { "epoch": 0.17765635648441516, "grad_norm": 1.583900809288025, "learning_rate": 9.439222747951039e-06, "loss": 0.6307, "step": 14859 }, { "epoch": 0.17766831262927582, "grad_norm": 3.017540216445923, "learning_rate": 9.439133652960641e-06, "loss": 0.7025, "step": 14860 }, { "epoch": 0.17768026877413648, "grad_norm": 2.133707046508789, "learning_rate": 9.439044551313746e-06, "loss": 0.6203, "step": 14861 }, { "epoch": 0.1776922249189971, "grad_norm": 2.2768681049346924, "learning_rate": 9.438955443010489e-06, "loss": 0.5896, "step": 14862 }, { "epoch": 0.17770418106385777, "grad_norm": 4.547733783721924, "learning_rate": 9.438866328051003e-06, "loss": 0.6752, "step": 14863 }, { "epoch": 0.17771613720871843, "grad_norm": 1.9559534788131714, "learning_rate": 9.43877720643542e-06, "loss": 0.7011, "step": 14864 }, { "epoch": 0.17772809335357906, "grad_norm": 2.023660898208618, "learning_rate": 9.438688078163874e-06, "loss": 0.6386, "step": 14865 }, { "epoch": 0.17774004949843972, "grad_norm": 2.6762349605560303, "learning_rate": 9.438598943236499e-06, "loss": 0.6541, "step": 14866 }, { "epoch": 0.17775200564330038, "grad_norm": 3.7006402015686035, "learning_rate": 9.43850980165343e-06, "loss": 0.569, "step": 14867 }, { "epoch": 0.17776396178816103, "grad_norm": 5.274611473083496, "learning_rate": 9.438420653414801e-06, "loss": 0.679, "step": 14868 }, { "epoch": 0.17777591793302167, "grad_norm": 2.89639949798584, "learning_rate": 9.438331498520744e-06, "loss": 0.6149, "step": 14869 }, { "epoch": 0.17778787407788232, "grad_norm": 12.6200590133667, "learning_rate": 9.438242336971391e-06, "loss": 0.5706, "step": 14870 }, { "epoch": 0.17779983022274298, "grad_norm": 3.016315460205078, "learning_rate": 9.43815316876688e-06, "loss": 0.6345, "step": 14871 }, { "epoch": 0.17781178636760364, "grad_norm": 1.5214325189590454, "learning_rate": 9.438063993907343e-06, "loss": 0.5404, "step": 14872 }, { "epoch": 0.17782374251246427, "grad_norm": 1.7109050750732422, "learning_rate": 9.437974812392912e-06, "loss": 0.5275, "step": 14873 }, { "epoch": 0.17783569865732493, "grad_norm": 1.981738805770874, "learning_rate": 9.437885624223721e-06, "loss": 0.6043, "step": 14874 }, { "epoch": 0.1778476548021856, "grad_norm": 1.9086226224899292, "learning_rate": 9.437796429399906e-06, "loss": 0.619, "step": 14875 }, { "epoch": 0.17785961094704622, "grad_norm": 1.5789213180541992, "learning_rate": 9.4377072279216e-06, "loss": 0.6339, "step": 14876 }, { "epoch": 0.17787156709190688, "grad_norm": 2.3062212467193604, "learning_rate": 9.437618019788936e-06, "loss": 0.5483, "step": 14877 }, { "epoch": 0.17788352323676754, "grad_norm": 3.1006994247436523, "learning_rate": 9.437528805002048e-06, "loss": 0.7436, "step": 14878 }, { "epoch": 0.1778954793816282, "grad_norm": 1.9101579189300537, "learning_rate": 9.43743958356107e-06, "loss": 0.6942, "step": 14879 }, { "epoch": 0.17790743552648883, "grad_norm": 1.783449411392212, "learning_rate": 9.437350355466133e-06, "loss": 0.5382, "step": 14880 }, { "epoch": 0.17791939167134949, "grad_norm": 1.6828041076660156, "learning_rate": 9.437261120717377e-06, "loss": 0.5603, "step": 14881 }, { "epoch": 0.17793134781621014, "grad_norm": 2.240525960922241, "learning_rate": 9.43717187931493e-06, "loss": 0.601, "step": 14882 }, { "epoch": 0.1779433039610708, "grad_norm": 2.5146377086639404, "learning_rate": 9.43708263125893e-06, "loss": 0.6489, "step": 14883 }, { "epoch": 0.17795526010593143, "grad_norm": 3.9846560955047607, "learning_rate": 9.436993376549505e-06, "loss": 0.6693, "step": 14884 }, { "epoch": 0.1779672162507921, "grad_norm": 2.111523151397705, "learning_rate": 9.436904115186796e-06, "loss": 0.5956, "step": 14885 }, { "epoch": 0.17797917239565275, "grad_norm": 1.9238505363464355, "learning_rate": 9.436814847170933e-06, "loss": 0.5612, "step": 14886 }, { "epoch": 0.1779911285405134, "grad_norm": 2.059748649597168, "learning_rate": 9.43672557250205e-06, "loss": 0.5908, "step": 14887 }, { "epoch": 0.17800308468537404, "grad_norm": 1.7350001335144043, "learning_rate": 9.436636291180282e-06, "loss": 0.5492, "step": 14888 }, { "epoch": 0.1780150408302347, "grad_norm": 1.7509859800338745, "learning_rate": 9.436547003205761e-06, "loss": 0.5973, "step": 14889 }, { "epoch": 0.17802699697509536, "grad_norm": 1.7178250551223755, "learning_rate": 9.436457708578621e-06, "loss": 0.6127, "step": 14890 }, { "epoch": 0.178038953119956, "grad_norm": 5.213240146636963, "learning_rate": 9.436368407298999e-06, "loss": 0.649, "step": 14891 }, { "epoch": 0.17805090926481665, "grad_norm": 6.086976528167725, "learning_rate": 9.436279099367026e-06, "loss": 0.6315, "step": 14892 }, { "epoch": 0.1780628654096773, "grad_norm": 2.0414323806762695, "learning_rate": 9.436189784782837e-06, "loss": 0.5974, "step": 14893 }, { "epoch": 0.17807482155453797, "grad_norm": 1.7800155878067017, "learning_rate": 9.436100463546566e-06, "loss": 0.5634, "step": 14894 }, { "epoch": 0.1780867776993986, "grad_norm": 1.6509802341461182, "learning_rate": 9.436011135658345e-06, "loss": 0.5788, "step": 14895 }, { "epoch": 0.17809873384425925, "grad_norm": 2.4434146881103516, "learning_rate": 9.435921801118311e-06, "loss": 0.6633, "step": 14896 }, { "epoch": 0.1781106899891199, "grad_norm": 2.5975077152252197, "learning_rate": 9.435832459926596e-06, "loss": 0.7286, "step": 14897 }, { "epoch": 0.17812264613398057, "grad_norm": 2.105180263519287, "learning_rate": 9.435743112083334e-06, "loss": 0.6563, "step": 14898 }, { "epoch": 0.1781346022788412, "grad_norm": 1.5170800685882568, "learning_rate": 9.435653757588662e-06, "loss": 0.6082, "step": 14899 }, { "epoch": 0.17814655842370186, "grad_norm": 2.612499475479126, "learning_rate": 9.435564396442708e-06, "loss": 0.6369, "step": 14900 }, { "epoch": 0.17815851456856252, "grad_norm": 1.9567886590957642, "learning_rate": 9.43547502864561e-06, "loss": 0.7108, "step": 14901 }, { "epoch": 0.17817047071342315, "grad_norm": 2.0303988456726074, "learning_rate": 9.435385654197501e-06, "loss": 0.6384, "step": 14902 }, { "epoch": 0.1781824268582838, "grad_norm": 2.283858299255371, "learning_rate": 9.435296273098518e-06, "loss": 0.7342, "step": 14903 }, { "epoch": 0.17819438300314447, "grad_norm": 1.7438312768936157, "learning_rate": 9.435206885348791e-06, "loss": 0.5899, "step": 14904 }, { "epoch": 0.17820633914800513, "grad_norm": 1.6410713195800781, "learning_rate": 9.435117490948455e-06, "loss": 0.638, "step": 14905 }, { "epoch": 0.17821829529286576, "grad_norm": 6.440434455871582, "learning_rate": 9.435028089897647e-06, "loss": 0.6206, "step": 14906 }, { "epoch": 0.17823025143772642, "grad_norm": 2.309174060821533, "learning_rate": 9.434938682196496e-06, "loss": 0.5726, "step": 14907 }, { "epoch": 0.17824220758258708, "grad_norm": 3.4224343299865723, "learning_rate": 9.434849267845139e-06, "loss": 0.6227, "step": 14908 }, { "epoch": 0.17825416372744773, "grad_norm": 3.4821295738220215, "learning_rate": 9.434759846843711e-06, "loss": 0.6719, "step": 14909 }, { "epoch": 0.17826611987230837, "grad_norm": 1.8626244068145752, "learning_rate": 9.434670419192344e-06, "loss": 0.6439, "step": 14910 }, { "epoch": 0.17827807601716902, "grad_norm": 1.6805592775344849, "learning_rate": 9.434580984891173e-06, "loss": 0.601, "step": 14911 }, { "epoch": 0.17829003216202968, "grad_norm": 2.266589641571045, "learning_rate": 9.434491543940331e-06, "loss": 0.6656, "step": 14912 }, { "epoch": 0.1783019883068903, "grad_norm": 1.958195447921753, "learning_rate": 9.434402096339955e-06, "loss": 0.6829, "step": 14913 }, { "epoch": 0.17831394445175097, "grad_norm": 2.464834451675415, "learning_rate": 9.434312642090176e-06, "loss": 0.6048, "step": 14914 }, { "epoch": 0.17832590059661163, "grad_norm": 1.7635334730148315, "learning_rate": 9.434223181191132e-06, "loss": 0.6442, "step": 14915 }, { "epoch": 0.1783378567414723, "grad_norm": 1.433591365814209, "learning_rate": 9.434133713642951e-06, "loss": 0.6038, "step": 14916 }, { "epoch": 0.17834981288633292, "grad_norm": 4.366667747497559, "learning_rate": 9.434044239445772e-06, "loss": 0.61, "step": 14917 }, { "epoch": 0.17836176903119358, "grad_norm": 2.587144374847412, "learning_rate": 9.433954758599728e-06, "loss": 0.6487, "step": 14918 }, { "epoch": 0.17837372517605424, "grad_norm": 1.605968713760376, "learning_rate": 9.433865271104954e-06, "loss": 0.7401, "step": 14919 }, { "epoch": 0.1783856813209149, "grad_norm": 1.3547316789627075, "learning_rate": 9.433775776961582e-06, "loss": 0.5786, "step": 14920 }, { "epoch": 0.17839763746577553, "grad_norm": 1.765216588973999, "learning_rate": 9.433686276169749e-06, "loss": 0.6765, "step": 14921 }, { "epoch": 0.17840959361063619, "grad_norm": 2.0191493034362793, "learning_rate": 9.433596768729586e-06, "loss": 0.6516, "step": 14922 }, { "epoch": 0.17842154975549684, "grad_norm": 1.5488959550857544, "learning_rate": 9.43350725464123e-06, "loss": 0.586, "step": 14923 }, { "epoch": 0.17843350590035748, "grad_norm": 9.344291687011719, "learning_rate": 9.433417733904812e-06, "loss": 0.6323, "step": 14924 }, { "epoch": 0.17844546204521813, "grad_norm": 3.3413236141204834, "learning_rate": 9.433328206520471e-06, "loss": 0.603, "step": 14925 }, { "epoch": 0.1784574181900788, "grad_norm": 2.21236252784729, "learning_rate": 9.433238672488338e-06, "loss": 0.6491, "step": 14926 }, { "epoch": 0.17846937433493945, "grad_norm": 10.556864738464355, "learning_rate": 9.433149131808547e-06, "loss": 0.608, "step": 14927 }, { "epoch": 0.17848133047980008, "grad_norm": 1.236387014389038, "learning_rate": 9.433059584481235e-06, "loss": 0.577, "step": 14928 }, { "epoch": 0.17849328662466074, "grad_norm": 1.868848204612732, "learning_rate": 9.432970030506533e-06, "loss": 0.652, "step": 14929 }, { "epoch": 0.1785052427695214, "grad_norm": 1.8262965679168701, "learning_rate": 9.432880469884576e-06, "loss": 0.6442, "step": 14930 }, { "epoch": 0.17851719891438206, "grad_norm": 3.1369924545288086, "learning_rate": 9.432790902615498e-06, "loss": 0.6001, "step": 14931 }, { "epoch": 0.1785291550592427, "grad_norm": 1.755101203918457, "learning_rate": 9.432701328699436e-06, "loss": 0.5941, "step": 14932 }, { "epoch": 0.17854111120410335, "grad_norm": 5.6531243324279785, "learning_rate": 9.432611748136525e-06, "loss": 0.5529, "step": 14933 }, { "epoch": 0.178553067348964, "grad_norm": 2.976872444152832, "learning_rate": 9.432522160926894e-06, "loss": 0.6577, "step": 14934 }, { "epoch": 0.17856502349382466, "grad_norm": 1.8048489093780518, "learning_rate": 9.43243256707068e-06, "loss": 0.6, "step": 14935 }, { "epoch": 0.1785769796386853, "grad_norm": 3.48807954788208, "learning_rate": 9.432342966568018e-06, "loss": 0.5415, "step": 14936 }, { "epoch": 0.17858893578354595, "grad_norm": 1.7360552549362183, "learning_rate": 9.432253359419042e-06, "loss": 0.6323, "step": 14937 }, { "epoch": 0.1786008919284066, "grad_norm": 1.8762906789779663, "learning_rate": 9.432163745623887e-06, "loss": 0.6462, "step": 14938 }, { "epoch": 0.17861284807326724, "grad_norm": 1.535784363746643, "learning_rate": 9.432074125182687e-06, "loss": 0.6254, "step": 14939 }, { "epoch": 0.1786248042181279, "grad_norm": 1.6495468616485596, "learning_rate": 9.431984498095575e-06, "loss": 0.5978, "step": 14940 }, { "epoch": 0.17863676036298856, "grad_norm": 1.3323476314544678, "learning_rate": 9.431894864362686e-06, "loss": 0.5835, "step": 14941 }, { "epoch": 0.17864871650784922, "grad_norm": 2.0749671459198, "learning_rate": 9.431805223984157e-06, "loss": 0.5885, "step": 14942 }, { "epoch": 0.17866067265270985, "grad_norm": 2.67500901222229, "learning_rate": 9.431715576960118e-06, "loss": 0.6667, "step": 14943 }, { "epoch": 0.1786726287975705, "grad_norm": 3.8330459594726562, "learning_rate": 9.431625923290707e-06, "loss": 0.5586, "step": 14944 }, { "epoch": 0.17868458494243117, "grad_norm": 1.747444748878479, "learning_rate": 9.431536262976057e-06, "loss": 0.6476, "step": 14945 }, { "epoch": 0.17869654108729183, "grad_norm": 1.6345722675323486, "learning_rate": 9.431446596016302e-06, "loss": 0.6241, "step": 14946 }, { "epoch": 0.17870849723215246, "grad_norm": 1.5824891328811646, "learning_rate": 9.431356922411579e-06, "loss": 0.4794, "step": 14947 }, { "epoch": 0.17872045337701312, "grad_norm": 3.054755449295044, "learning_rate": 9.431267242162019e-06, "loss": 0.6386, "step": 14948 }, { "epoch": 0.17873240952187378, "grad_norm": 1.7905687093734741, "learning_rate": 9.431177555267759e-06, "loss": 0.5838, "step": 14949 }, { "epoch": 0.1787443656667344, "grad_norm": 2.7347190380096436, "learning_rate": 9.431087861728931e-06, "loss": 0.6212, "step": 14950 }, { "epoch": 0.17875632181159506, "grad_norm": 1.6483588218688965, "learning_rate": 9.430998161545673e-06, "loss": 0.5939, "step": 14951 }, { "epoch": 0.17876827795645572, "grad_norm": 2.5583114624023438, "learning_rate": 9.430908454718115e-06, "loss": 0.6366, "step": 14952 }, { "epoch": 0.17878023410131638, "grad_norm": 1.9737510681152344, "learning_rate": 9.430818741246395e-06, "loss": 0.611, "step": 14953 }, { "epoch": 0.178792190246177, "grad_norm": 1.9558850526809692, "learning_rate": 9.430729021130647e-06, "loss": 0.5988, "step": 14954 }, { "epoch": 0.17880414639103767, "grad_norm": 4.615749359130859, "learning_rate": 9.430639294371004e-06, "loss": 0.5848, "step": 14955 }, { "epoch": 0.17881610253589833, "grad_norm": 1.6839544773101807, "learning_rate": 9.430549560967603e-06, "loss": 0.6651, "step": 14956 }, { "epoch": 0.178828058680759, "grad_norm": 2.496457815170288, "learning_rate": 9.430459820920578e-06, "loss": 0.5673, "step": 14957 }, { "epoch": 0.17884001482561962, "grad_norm": 2.635110378265381, "learning_rate": 9.430370074230063e-06, "loss": 0.6781, "step": 14958 }, { "epoch": 0.17885197097048028, "grad_norm": 1.4338709115982056, "learning_rate": 9.43028032089619e-06, "loss": 0.5873, "step": 14959 }, { "epoch": 0.17886392711534094, "grad_norm": 2.3297722339630127, "learning_rate": 9.430190560919096e-06, "loss": 0.5964, "step": 14960 }, { "epoch": 0.17887588326020157, "grad_norm": 2.0968244075775146, "learning_rate": 9.430100794298915e-06, "loss": 0.683, "step": 14961 }, { "epoch": 0.17888783940506223, "grad_norm": 1.5282617807388306, "learning_rate": 9.430011021035785e-06, "loss": 0.6148, "step": 14962 }, { "epoch": 0.17889979554992289, "grad_norm": 1.5737005472183228, "learning_rate": 9.429921241129836e-06, "loss": 0.6376, "step": 14963 }, { "epoch": 0.17891175169478354, "grad_norm": 3.403141975402832, "learning_rate": 9.429831454581204e-06, "loss": 0.5921, "step": 14964 }, { "epoch": 0.17892370783964417, "grad_norm": 3.103450298309326, "learning_rate": 9.429741661390025e-06, "loss": 0.5349, "step": 14965 }, { "epoch": 0.17893566398450483, "grad_norm": 1.5978636741638184, "learning_rate": 9.429651861556433e-06, "loss": 0.5574, "step": 14966 }, { "epoch": 0.1789476201293655, "grad_norm": 2.6961543560028076, "learning_rate": 9.42956205508056e-06, "loss": 0.5471, "step": 14967 }, { "epoch": 0.17895957627422615, "grad_norm": 1.8271021842956543, "learning_rate": 9.429472241962546e-06, "loss": 0.5852, "step": 14968 }, { "epoch": 0.17897153241908678, "grad_norm": 1.9011056423187256, "learning_rate": 9.42938242220252e-06, "loss": 0.5975, "step": 14969 }, { "epoch": 0.17898348856394744, "grad_norm": 1.6160304546356201, "learning_rate": 9.42929259580062e-06, "loss": 0.6339, "step": 14970 }, { "epoch": 0.1789954447088081, "grad_norm": 1.5519047975540161, "learning_rate": 9.429202762756979e-06, "loss": 0.5221, "step": 14971 }, { "epoch": 0.17900740085366873, "grad_norm": 3.109813928604126, "learning_rate": 9.429112923071736e-06, "loss": 0.5923, "step": 14972 }, { "epoch": 0.1790193569985294, "grad_norm": 1.2804369926452637, "learning_rate": 9.429023076745019e-06, "loss": 0.5126, "step": 14973 }, { "epoch": 0.17903131314339005, "grad_norm": 6.857333660125732, "learning_rate": 9.428933223776968e-06, "loss": 0.6359, "step": 14974 }, { "epoch": 0.1790432692882507, "grad_norm": 2.4353854656219482, "learning_rate": 9.428843364167714e-06, "loss": 0.6508, "step": 14975 }, { "epoch": 0.17905522543311134, "grad_norm": 1.722771167755127, "learning_rate": 9.428753497917396e-06, "loss": 0.6114, "step": 14976 }, { "epoch": 0.179067181577972, "grad_norm": 1.5044622421264648, "learning_rate": 9.428663625026146e-06, "loss": 0.5319, "step": 14977 }, { "epoch": 0.17907913772283265, "grad_norm": 4.025793075561523, "learning_rate": 9.428573745494098e-06, "loss": 0.6169, "step": 14978 }, { "epoch": 0.1790910938676933, "grad_norm": 1.5999088287353516, "learning_rate": 9.42848385932139e-06, "loss": 0.487, "step": 14979 }, { "epoch": 0.17910305001255394, "grad_norm": 1.6047370433807373, "learning_rate": 9.428393966508153e-06, "loss": 0.6416, "step": 14980 }, { "epoch": 0.1791150061574146, "grad_norm": 2.039149761199951, "learning_rate": 9.428304067054523e-06, "loss": 0.4991, "step": 14981 }, { "epoch": 0.17912696230227526, "grad_norm": 3.5704851150512695, "learning_rate": 9.428214160960636e-06, "loss": 0.6242, "step": 14982 }, { "epoch": 0.1791389184471359, "grad_norm": 1.833101749420166, "learning_rate": 9.428124248226626e-06, "loss": 0.6389, "step": 14983 }, { "epoch": 0.17915087459199655, "grad_norm": 1.529873251914978, "learning_rate": 9.428034328852629e-06, "loss": 0.6136, "step": 14984 }, { "epoch": 0.1791628307368572, "grad_norm": 1.4321372509002686, "learning_rate": 9.427944402838779e-06, "loss": 0.6386, "step": 14985 }, { "epoch": 0.17917478688171787, "grad_norm": 1.5024300813674927, "learning_rate": 9.427854470185208e-06, "loss": 0.5971, "step": 14986 }, { "epoch": 0.1791867430265785, "grad_norm": 1.803397297859192, "learning_rate": 9.427764530892055e-06, "loss": 0.5943, "step": 14987 }, { "epoch": 0.17919869917143916, "grad_norm": 3.5277323722839355, "learning_rate": 9.427674584959454e-06, "loss": 0.6259, "step": 14988 }, { "epoch": 0.17921065531629982, "grad_norm": 3.2562358379364014, "learning_rate": 9.427584632387538e-06, "loss": 0.5567, "step": 14989 }, { "epoch": 0.17922261146116047, "grad_norm": 4.220831871032715, "learning_rate": 9.427494673176444e-06, "loss": 0.6822, "step": 14990 }, { "epoch": 0.1792345676060211, "grad_norm": 1.3813942670822144, "learning_rate": 9.427404707326306e-06, "loss": 0.558, "step": 14991 }, { "epoch": 0.17924652375088176, "grad_norm": 1.7348694801330566, "learning_rate": 9.427314734837259e-06, "loss": 0.5592, "step": 14992 }, { "epoch": 0.17925847989574242, "grad_norm": 2.415163278579712, "learning_rate": 9.427224755709437e-06, "loss": 0.6866, "step": 14993 }, { "epoch": 0.17927043604060308, "grad_norm": 2.4230682849884033, "learning_rate": 9.427134769942976e-06, "loss": 0.6681, "step": 14994 }, { "epoch": 0.1792823921854637, "grad_norm": 2.4707860946655273, "learning_rate": 9.42704477753801e-06, "loss": 0.5772, "step": 14995 }, { "epoch": 0.17929434833032437, "grad_norm": 2.4987857341766357, "learning_rate": 9.426954778494677e-06, "loss": 0.6132, "step": 14996 }, { "epoch": 0.17930630447518503, "grad_norm": 1.403460144996643, "learning_rate": 9.426864772813108e-06, "loss": 0.5954, "step": 14997 }, { "epoch": 0.17931826062004566, "grad_norm": 1.729135513305664, "learning_rate": 9.426774760493439e-06, "loss": 0.5712, "step": 14998 }, { "epoch": 0.17933021676490632, "grad_norm": 2.3616082668304443, "learning_rate": 9.426684741535807e-06, "loss": 0.7438, "step": 14999 }, { "epoch": 0.17934217290976698, "grad_norm": 2.0523736476898193, "learning_rate": 9.426594715940344e-06, "loss": 0.6051, "step": 15000 }, { "epoch": 0.17935412905462764, "grad_norm": 1.953742504119873, "learning_rate": 9.426504683707187e-06, "loss": 0.597, "step": 15001 }, { "epoch": 0.17936608519948827, "grad_norm": 2.102804660797119, "learning_rate": 9.42641464483647e-06, "loss": 0.6848, "step": 15002 }, { "epoch": 0.17937804134434893, "grad_norm": 2.1712329387664795, "learning_rate": 9.426324599328328e-06, "loss": 0.575, "step": 15003 }, { "epoch": 0.17938999748920958, "grad_norm": 2.8224375247955322, "learning_rate": 9.4262345471829e-06, "loss": 0.7323, "step": 15004 }, { "epoch": 0.17940195363407024, "grad_norm": 3.530013084411621, "learning_rate": 9.426144488400315e-06, "loss": 0.6221, "step": 15005 }, { "epoch": 0.17941390977893087, "grad_norm": 2.089430809020996, "learning_rate": 9.42605442298071e-06, "loss": 0.622, "step": 15006 }, { "epoch": 0.17942586592379153, "grad_norm": 3.7489209175109863, "learning_rate": 9.425964350924221e-06, "loss": 0.5928, "step": 15007 }, { "epoch": 0.1794378220686522, "grad_norm": 1.9370701313018799, "learning_rate": 9.425874272230983e-06, "loss": 0.6355, "step": 15008 }, { "epoch": 0.17944977821351282, "grad_norm": 3.5447399616241455, "learning_rate": 9.425784186901132e-06, "loss": 0.6127, "step": 15009 }, { "epoch": 0.17946173435837348, "grad_norm": 1.8595411777496338, "learning_rate": 9.425694094934801e-06, "loss": 0.7298, "step": 15010 }, { "epoch": 0.17947369050323414, "grad_norm": 1.3537991046905518, "learning_rate": 9.425603996332125e-06, "loss": 0.6356, "step": 15011 }, { "epoch": 0.1794856466480948, "grad_norm": 1.8066779375076294, "learning_rate": 9.425513891093242e-06, "loss": 0.6372, "step": 15012 }, { "epoch": 0.17949760279295543, "grad_norm": 2.307576894760132, "learning_rate": 9.425423779218284e-06, "loss": 0.5982, "step": 15013 }, { "epoch": 0.1795095589378161, "grad_norm": 1.5647257566452026, "learning_rate": 9.425333660707388e-06, "loss": 0.6945, "step": 15014 }, { "epoch": 0.17952151508267675, "grad_norm": 2.17358136177063, "learning_rate": 9.425243535560686e-06, "loss": 0.5864, "step": 15015 }, { "epoch": 0.1795334712275374, "grad_norm": 2.2132749557495117, "learning_rate": 9.425153403778318e-06, "loss": 0.7224, "step": 15016 }, { "epoch": 0.17954542737239804, "grad_norm": 12.106553077697754, "learning_rate": 9.425063265360417e-06, "loss": 0.6036, "step": 15017 }, { "epoch": 0.1795573835172587, "grad_norm": 6.758674144744873, "learning_rate": 9.424973120307117e-06, "loss": 0.5737, "step": 15018 }, { "epoch": 0.17956933966211935, "grad_norm": 1.4682300090789795, "learning_rate": 9.424882968618555e-06, "loss": 0.6022, "step": 15019 }, { "epoch": 0.17958129580697998, "grad_norm": 1.6506602764129639, "learning_rate": 9.424792810294863e-06, "loss": 0.6449, "step": 15020 }, { "epoch": 0.17959325195184064, "grad_norm": 1.811928391456604, "learning_rate": 9.42470264533618e-06, "loss": 0.7046, "step": 15021 }, { "epoch": 0.1796052080967013, "grad_norm": 3.7293941974639893, "learning_rate": 9.42461247374264e-06, "loss": 0.6244, "step": 15022 }, { "epoch": 0.17961716424156196, "grad_norm": 3.4565558433532715, "learning_rate": 9.424522295514377e-06, "loss": 0.6216, "step": 15023 }, { "epoch": 0.1796291203864226, "grad_norm": 6.07532262802124, "learning_rate": 9.424432110651527e-06, "loss": 0.6343, "step": 15024 }, { "epoch": 0.17964107653128325, "grad_norm": 14.668479919433594, "learning_rate": 9.424341919154226e-06, "loss": 0.6988, "step": 15025 }, { "epoch": 0.1796530326761439, "grad_norm": 2.059906005859375, "learning_rate": 9.424251721022608e-06, "loss": 0.5803, "step": 15026 }, { "epoch": 0.17966498882100457, "grad_norm": 2.987612247467041, "learning_rate": 9.42416151625681e-06, "loss": 0.6632, "step": 15027 }, { "epoch": 0.1796769449658652, "grad_norm": 1.9871900081634521, "learning_rate": 9.424071304856965e-06, "loss": 0.64, "step": 15028 }, { "epoch": 0.17968890111072586, "grad_norm": 2.458639621734619, "learning_rate": 9.423981086823211e-06, "loss": 0.6043, "step": 15029 }, { "epoch": 0.17970085725558652, "grad_norm": 2.248211145401001, "learning_rate": 9.42389086215568e-06, "loss": 0.618, "step": 15030 }, { "epoch": 0.17971281340044715, "grad_norm": 2.3946869373321533, "learning_rate": 9.423800630854507e-06, "loss": 0.6425, "step": 15031 }, { "epoch": 0.1797247695453078, "grad_norm": 1.8249284029006958, "learning_rate": 9.423710392919833e-06, "loss": 0.6184, "step": 15032 }, { "epoch": 0.17973672569016846, "grad_norm": 2.6776113510131836, "learning_rate": 9.423620148351787e-06, "loss": 0.6435, "step": 15033 }, { "epoch": 0.17974868183502912, "grad_norm": 2.125131845474243, "learning_rate": 9.42352989715051e-06, "loss": 0.6451, "step": 15034 }, { "epoch": 0.17976063797988975, "grad_norm": 5.153415203094482, "learning_rate": 9.42343963931613e-06, "loss": 0.6478, "step": 15035 }, { "epoch": 0.1797725941247504, "grad_norm": 2.788578510284424, "learning_rate": 9.423349374848789e-06, "loss": 0.5788, "step": 15036 }, { "epoch": 0.17978455026961107, "grad_norm": 1.6909080743789673, "learning_rate": 9.423259103748618e-06, "loss": 0.6625, "step": 15037 }, { "epoch": 0.17979650641447173, "grad_norm": 2.1336052417755127, "learning_rate": 9.423168826015757e-06, "loss": 0.6536, "step": 15038 }, { "epoch": 0.17980846255933236, "grad_norm": 6.194189548492432, "learning_rate": 9.423078541650337e-06, "loss": 0.6355, "step": 15039 }, { "epoch": 0.17982041870419302, "grad_norm": 2.696564197540283, "learning_rate": 9.422988250652495e-06, "loss": 0.5255, "step": 15040 }, { "epoch": 0.17983237484905368, "grad_norm": 2.5062286853790283, "learning_rate": 9.422897953022367e-06, "loss": 0.6893, "step": 15041 }, { "epoch": 0.1798443309939143, "grad_norm": 3.5913476943969727, "learning_rate": 9.422807648760088e-06, "loss": 0.5325, "step": 15042 }, { "epoch": 0.17985628713877497, "grad_norm": 11.748037338256836, "learning_rate": 9.422717337865793e-06, "loss": 0.6707, "step": 15043 }, { "epoch": 0.17986824328363563, "grad_norm": 1.9373366832733154, "learning_rate": 9.422627020339618e-06, "loss": 0.5957, "step": 15044 }, { "epoch": 0.17988019942849628, "grad_norm": 7.183599472045898, "learning_rate": 9.422536696181697e-06, "loss": 0.6125, "step": 15045 }, { "epoch": 0.17989215557335692, "grad_norm": 1.8406130075454712, "learning_rate": 9.422446365392165e-06, "loss": 0.6469, "step": 15046 }, { "epoch": 0.17990411171821757, "grad_norm": 2.7108049392700195, "learning_rate": 9.422356027971162e-06, "loss": 0.566, "step": 15047 }, { "epoch": 0.17991606786307823, "grad_norm": 3.4332339763641357, "learning_rate": 9.42226568391882e-06, "loss": 0.6696, "step": 15048 }, { "epoch": 0.1799280240079389, "grad_norm": 1.875079870223999, "learning_rate": 9.422175333235275e-06, "loss": 0.6824, "step": 15049 }, { "epoch": 0.17993998015279952, "grad_norm": 2.5110113620758057, "learning_rate": 9.42208497592066e-06, "loss": 0.5862, "step": 15050 }, { "epoch": 0.17995193629766018, "grad_norm": 1.6518428325653076, "learning_rate": 9.421994611975115e-06, "loss": 0.7121, "step": 15051 }, { "epoch": 0.17996389244252084, "grad_norm": 2.5615360736846924, "learning_rate": 9.421904241398772e-06, "loss": 0.6346, "step": 15052 }, { "epoch": 0.1799758485873815, "grad_norm": 2.219261884689331, "learning_rate": 9.421813864191769e-06, "loss": 0.5894, "step": 15053 }, { "epoch": 0.17998780473224213, "grad_norm": 3.1780455112457275, "learning_rate": 9.42172348035424e-06, "loss": 0.6626, "step": 15054 }, { "epoch": 0.1799997608771028, "grad_norm": 2.073607921600342, "learning_rate": 9.421633089886322e-06, "loss": 0.681, "step": 15055 }, { "epoch": 0.18001171702196345, "grad_norm": 8.08516788482666, "learning_rate": 9.421542692788149e-06, "loss": 0.6563, "step": 15056 }, { "epoch": 0.18002367316682408, "grad_norm": 2.603161334991455, "learning_rate": 9.421452289059856e-06, "loss": 0.7031, "step": 15057 }, { "epoch": 0.18003562931168474, "grad_norm": 1.4416857957839966, "learning_rate": 9.421361878701579e-06, "loss": 0.592, "step": 15058 }, { "epoch": 0.1800475854565454, "grad_norm": 2.114610433578491, "learning_rate": 9.421271461713456e-06, "loss": 0.6627, "step": 15059 }, { "epoch": 0.18005954160140605, "grad_norm": 2.272380828857422, "learning_rate": 9.421181038095619e-06, "loss": 0.563, "step": 15060 }, { "epoch": 0.18007149774626668, "grad_norm": 3.001012086868286, "learning_rate": 9.421090607848207e-06, "loss": 0.5749, "step": 15061 }, { "epoch": 0.18008345389112734, "grad_norm": 2.6313624382019043, "learning_rate": 9.421000170971352e-06, "loss": 0.706, "step": 15062 }, { "epoch": 0.180095410035988, "grad_norm": 1.4548327922821045, "learning_rate": 9.420909727465193e-06, "loss": 0.5856, "step": 15063 }, { "epoch": 0.18010736618084866, "grad_norm": 4.0011467933654785, "learning_rate": 9.420819277329864e-06, "loss": 0.6958, "step": 15064 }, { "epoch": 0.1801193223257093, "grad_norm": 2.8158397674560547, "learning_rate": 9.420728820565501e-06, "loss": 0.5708, "step": 15065 }, { "epoch": 0.18013127847056995, "grad_norm": 1.981348991394043, "learning_rate": 9.42063835717224e-06, "loss": 0.6452, "step": 15066 }, { "epoch": 0.1801432346154306, "grad_norm": 1.6274439096450806, "learning_rate": 9.420547887150214e-06, "loss": 0.5917, "step": 15067 }, { "epoch": 0.18015519076029124, "grad_norm": 2.6996405124664307, "learning_rate": 9.420457410499562e-06, "loss": 0.7226, "step": 15068 }, { "epoch": 0.1801671469051519, "grad_norm": 2.615757465362549, "learning_rate": 9.420366927220419e-06, "loss": 0.6204, "step": 15069 }, { "epoch": 0.18017910305001256, "grad_norm": 7.2425360679626465, "learning_rate": 9.420276437312919e-06, "loss": 0.5694, "step": 15070 }, { "epoch": 0.18019105919487322, "grad_norm": 3.0320606231689453, "learning_rate": 9.420185940777198e-06, "loss": 0.5965, "step": 15071 }, { "epoch": 0.18020301533973385, "grad_norm": 3.1750638484954834, "learning_rate": 9.420095437613395e-06, "loss": 0.6137, "step": 15072 }, { "epoch": 0.1802149714845945, "grad_norm": 2.235595464706421, "learning_rate": 9.42000492782164e-06, "loss": 0.6242, "step": 15073 }, { "epoch": 0.18022692762945516, "grad_norm": 5.0187883377075195, "learning_rate": 9.419914411402074e-06, "loss": 0.6624, "step": 15074 }, { "epoch": 0.18023888377431582, "grad_norm": 2.189978837966919, "learning_rate": 9.419823888354832e-06, "loss": 0.5975, "step": 15075 }, { "epoch": 0.18025083991917645, "grad_norm": 3.5247673988342285, "learning_rate": 9.419733358680046e-06, "loss": 0.6123, "step": 15076 }, { "epoch": 0.1802627960640371, "grad_norm": 1.7509316205978394, "learning_rate": 9.419642822377855e-06, "loss": 0.614, "step": 15077 }, { "epoch": 0.18027475220889777, "grad_norm": 4.64515495300293, "learning_rate": 9.419552279448393e-06, "loss": 0.5936, "step": 15078 }, { "epoch": 0.1802867083537584, "grad_norm": 2.0966663360595703, "learning_rate": 9.419461729891798e-06, "loss": 0.5961, "step": 15079 }, { "epoch": 0.18029866449861906, "grad_norm": 6.477484703063965, "learning_rate": 9.419371173708203e-06, "loss": 0.6764, "step": 15080 }, { "epoch": 0.18031062064347972, "grad_norm": 1.776413917541504, "learning_rate": 9.419280610897746e-06, "loss": 0.5236, "step": 15081 }, { "epoch": 0.18032257678834038, "grad_norm": 4.004288673400879, "learning_rate": 9.419190041460562e-06, "loss": 0.7081, "step": 15082 }, { "epoch": 0.180334532933201, "grad_norm": 2.535382032394409, "learning_rate": 9.419099465396786e-06, "loss": 0.703, "step": 15083 }, { "epoch": 0.18034648907806167, "grad_norm": 3.6915321350097656, "learning_rate": 9.419008882706556e-06, "loss": 0.6613, "step": 15084 }, { "epoch": 0.18035844522292233, "grad_norm": 1.9529237747192383, "learning_rate": 9.418918293390005e-06, "loss": 0.6185, "step": 15085 }, { "epoch": 0.18037040136778298, "grad_norm": 1.6283822059631348, "learning_rate": 9.418827697447272e-06, "loss": 0.5909, "step": 15086 }, { "epoch": 0.18038235751264362, "grad_norm": 1.7114187479019165, "learning_rate": 9.41873709487849e-06, "loss": 0.661, "step": 15087 }, { "epoch": 0.18039431365750427, "grad_norm": 4.011483192443848, "learning_rate": 9.418646485683797e-06, "loss": 0.6298, "step": 15088 }, { "epoch": 0.18040626980236493, "grad_norm": 19.81117820739746, "learning_rate": 9.418555869863326e-06, "loss": 0.6453, "step": 15089 }, { "epoch": 0.18041822594722556, "grad_norm": 2.181419610977173, "learning_rate": 9.418465247417217e-06, "loss": 0.7456, "step": 15090 }, { "epoch": 0.18043018209208622, "grad_norm": 1.5798463821411133, "learning_rate": 9.418374618345603e-06, "loss": 0.6635, "step": 15091 }, { "epoch": 0.18044213823694688, "grad_norm": 5.232314586639404, "learning_rate": 9.418283982648619e-06, "loss": 0.5637, "step": 15092 }, { "epoch": 0.18045409438180754, "grad_norm": 3.2236344814300537, "learning_rate": 9.418193340326405e-06, "loss": 0.5857, "step": 15093 }, { "epoch": 0.18046605052666817, "grad_norm": 9.582741737365723, "learning_rate": 9.418102691379092e-06, "loss": 0.5898, "step": 15094 }, { "epoch": 0.18047800667152883, "grad_norm": 2.4315779209136963, "learning_rate": 9.41801203580682e-06, "loss": 0.6355, "step": 15095 }, { "epoch": 0.1804899628163895, "grad_norm": 2.3215837478637695, "learning_rate": 9.417921373609722e-06, "loss": 0.5689, "step": 15096 }, { "epoch": 0.18050191896125015, "grad_norm": 3.099560499191284, "learning_rate": 9.417830704787937e-06, "loss": 0.5843, "step": 15097 }, { "epoch": 0.18051387510611078, "grad_norm": 1.7712998390197754, "learning_rate": 9.417740029341598e-06, "loss": 0.5843, "step": 15098 }, { "epoch": 0.18052583125097144, "grad_norm": 2.176647186279297, "learning_rate": 9.41764934727084e-06, "loss": 0.6154, "step": 15099 }, { "epoch": 0.1805377873958321, "grad_norm": 13.923084259033203, "learning_rate": 9.417558658575805e-06, "loss": 0.712, "step": 15100 }, { "epoch": 0.18054974354069273, "grad_norm": 2.569730281829834, "learning_rate": 9.417467963256624e-06, "loss": 0.5983, "step": 15101 }, { "epoch": 0.18056169968555338, "grad_norm": 1.8337193727493286, "learning_rate": 9.417377261313431e-06, "loss": 0.6295, "step": 15102 }, { "epoch": 0.18057365583041404, "grad_norm": 2.6200828552246094, "learning_rate": 9.417286552746368e-06, "loss": 0.623, "step": 15103 }, { "epoch": 0.1805856119752747, "grad_norm": 3.6762490272521973, "learning_rate": 9.417195837555568e-06, "loss": 0.6734, "step": 15104 }, { "epoch": 0.18059756812013533, "grad_norm": 2.5917701721191406, "learning_rate": 9.417105115741167e-06, "loss": 0.6883, "step": 15105 }, { "epoch": 0.180609524264996, "grad_norm": 1.7070046663284302, "learning_rate": 9.417014387303301e-06, "loss": 0.6398, "step": 15106 }, { "epoch": 0.18062148040985665, "grad_norm": 2.6014297008514404, "learning_rate": 9.416923652242108e-06, "loss": 0.7043, "step": 15107 }, { "epoch": 0.1806334365547173, "grad_norm": 1.972480297088623, "learning_rate": 9.416832910557718e-06, "loss": 0.6709, "step": 15108 }, { "epoch": 0.18064539269957794, "grad_norm": 1.9867403507232666, "learning_rate": 9.416742162250275e-06, "loss": 0.5941, "step": 15109 }, { "epoch": 0.1806573488444386, "grad_norm": 1.9245002269744873, "learning_rate": 9.41665140731991e-06, "loss": 0.6203, "step": 15110 }, { "epoch": 0.18066930498929926, "grad_norm": 2.151968479156494, "learning_rate": 9.41656064576676e-06, "loss": 0.5542, "step": 15111 }, { "epoch": 0.18068126113415992, "grad_norm": 1.4555341005325317, "learning_rate": 9.416469877590962e-06, "loss": 0.5301, "step": 15112 }, { "epoch": 0.18069321727902055, "grad_norm": 1.8933801651000977, "learning_rate": 9.416379102792652e-06, "loss": 0.534, "step": 15113 }, { "epoch": 0.1807051734238812, "grad_norm": 3.8114330768585205, "learning_rate": 9.416288321371966e-06, "loss": 0.6327, "step": 15114 }, { "epoch": 0.18071712956874186, "grad_norm": 2.2872023582458496, "learning_rate": 9.41619753332904e-06, "loss": 0.6685, "step": 15115 }, { "epoch": 0.1807290857136025, "grad_norm": 1.8986895084381104, "learning_rate": 9.41610673866401e-06, "loss": 0.5288, "step": 15116 }, { "epoch": 0.18074104185846315, "grad_norm": 1.989703893661499, "learning_rate": 9.416015937377012e-06, "loss": 0.6852, "step": 15117 }, { "epoch": 0.1807529980033238, "grad_norm": 1.5600841045379639, "learning_rate": 9.415925129468184e-06, "loss": 0.5548, "step": 15118 }, { "epoch": 0.18076495414818447, "grad_norm": 1.8695074319839478, "learning_rate": 9.415834314937658e-06, "loss": 0.685, "step": 15119 }, { "epoch": 0.1807769102930451, "grad_norm": 2.158622980117798, "learning_rate": 9.415743493785574e-06, "loss": 0.7201, "step": 15120 }, { "epoch": 0.18078886643790576, "grad_norm": 1.5360759496688843, "learning_rate": 9.415652666012068e-06, "loss": 0.6567, "step": 15121 }, { "epoch": 0.18080082258276642, "grad_norm": 2.118879556655884, "learning_rate": 9.415561831617273e-06, "loss": 0.5112, "step": 15122 }, { "epoch": 0.18081277872762708, "grad_norm": 1.890758991241455, "learning_rate": 9.415470990601328e-06, "loss": 0.6089, "step": 15123 }, { "epoch": 0.1808247348724877, "grad_norm": 3.967308282852173, "learning_rate": 9.41538014296437e-06, "loss": 0.5356, "step": 15124 }, { "epoch": 0.18083669101734837, "grad_norm": 4.6140971183776855, "learning_rate": 9.415289288706531e-06, "loss": 0.5693, "step": 15125 }, { "epoch": 0.18084864716220903, "grad_norm": 1.960909366607666, "learning_rate": 9.415198427827951e-06, "loss": 0.5387, "step": 15126 }, { "epoch": 0.18086060330706966, "grad_norm": 2.0247113704681396, "learning_rate": 9.415107560328768e-06, "loss": 0.6353, "step": 15127 }, { "epoch": 0.18087255945193031, "grad_norm": 1.7954877614974976, "learning_rate": 9.415016686209113e-06, "loss": 0.6707, "step": 15128 }, { "epoch": 0.18088451559679097, "grad_norm": 3.611679792404175, "learning_rate": 9.414925805469126e-06, "loss": 0.6197, "step": 15129 }, { "epoch": 0.18089647174165163, "grad_norm": 2.1644084453582764, "learning_rate": 9.41483491810894e-06, "loss": 0.6836, "step": 15130 }, { "epoch": 0.18090842788651226, "grad_norm": 6.202472686767578, "learning_rate": 9.414744024128694e-06, "loss": 0.5863, "step": 15131 }, { "epoch": 0.18092038403137292, "grad_norm": 3.0840797424316406, "learning_rate": 9.414653123528525e-06, "loss": 0.6621, "step": 15132 }, { "epoch": 0.18093234017623358, "grad_norm": 2.4865190982818604, "learning_rate": 9.414562216308566e-06, "loss": 0.6044, "step": 15133 }, { "epoch": 0.18094429632109424, "grad_norm": 1.7345703840255737, "learning_rate": 9.414471302468959e-06, "loss": 0.6152, "step": 15134 }, { "epoch": 0.18095625246595487, "grad_norm": 2.810284376144409, "learning_rate": 9.414380382009833e-06, "loss": 0.6658, "step": 15135 }, { "epoch": 0.18096820861081553, "grad_norm": 3.834944486618042, "learning_rate": 9.414289454931329e-06, "loss": 0.635, "step": 15136 }, { "epoch": 0.1809801647556762, "grad_norm": 1.6423596143722534, "learning_rate": 9.414198521233583e-06, "loss": 0.6059, "step": 15137 }, { "epoch": 0.18099212090053682, "grad_norm": 1.6695553064346313, "learning_rate": 9.41410758091673e-06, "loss": 0.5725, "step": 15138 }, { "epoch": 0.18100407704539748, "grad_norm": 2.452972173690796, "learning_rate": 9.414016633980907e-06, "loss": 0.5965, "step": 15139 }, { "epoch": 0.18101603319025814, "grad_norm": 2.0293896198272705, "learning_rate": 9.41392568042625e-06, "loss": 0.6774, "step": 15140 }, { "epoch": 0.1810279893351188, "grad_norm": 3.2103493213653564, "learning_rate": 9.413834720252895e-06, "loss": 0.5529, "step": 15141 }, { "epoch": 0.18103994547997942, "grad_norm": 4.507295608520508, "learning_rate": 9.41374375346098e-06, "loss": 0.5728, "step": 15142 }, { "epoch": 0.18105190162484008, "grad_norm": 2.0148508548736572, "learning_rate": 9.413652780050642e-06, "loss": 0.6667, "step": 15143 }, { "epoch": 0.18106385776970074, "grad_norm": 6.1225128173828125, "learning_rate": 9.413561800022015e-06, "loss": 0.6299, "step": 15144 }, { "epoch": 0.1810758139145614, "grad_norm": 4.5435333251953125, "learning_rate": 9.413470813375236e-06, "loss": 0.5935, "step": 15145 }, { "epoch": 0.18108777005942203, "grad_norm": 2.1769204139709473, "learning_rate": 9.413379820110442e-06, "loss": 0.6511, "step": 15146 }, { "epoch": 0.1810997262042827, "grad_norm": 1.9157264232635498, "learning_rate": 9.413288820227769e-06, "loss": 0.713, "step": 15147 }, { "epoch": 0.18111168234914335, "grad_norm": 1.904022216796875, "learning_rate": 9.413197813727354e-06, "loss": 0.6013, "step": 15148 }, { "epoch": 0.18112363849400398, "grad_norm": 5.650284290313721, "learning_rate": 9.413106800609332e-06, "loss": 0.6853, "step": 15149 }, { "epoch": 0.18113559463886464, "grad_norm": 2.611830234527588, "learning_rate": 9.413015780873842e-06, "loss": 0.5731, "step": 15150 }, { "epoch": 0.1811475507837253, "grad_norm": 2.4073092937469482, "learning_rate": 9.41292475452102e-06, "loss": 0.7412, "step": 15151 }, { "epoch": 0.18115950692858596, "grad_norm": 2.0508131980895996, "learning_rate": 9.412833721551e-06, "loss": 0.5888, "step": 15152 }, { "epoch": 0.1811714630734466, "grad_norm": 1.8999069929122925, "learning_rate": 9.41274268196392e-06, "loss": 0.5757, "step": 15153 }, { "epoch": 0.18118341921830725, "grad_norm": 2.688239812850952, "learning_rate": 9.412651635759918e-06, "loss": 0.6391, "step": 15154 }, { "epoch": 0.1811953753631679, "grad_norm": 5.862343788146973, "learning_rate": 9.412560582939129e-06, "loss": 0.7175, "step": 15155 }, { "epoch": 0.18120733150802856, "grad_norm": 4.328077793121338, "learning_rate": 9.412469523501688e-06, "loss": 0.6723, "step": 15156 }, { "epoch": 0.1812192876528892, "grad_norm": 2.507002353668213, "learning_rate": 9.412378457447736e-06, "loss": 0.6334, "step": 15157 }, { "epoch": 0.18123124379774985, "grad_norm": 2.529982328414917, "learning_rate": 9.412287384777406e-06, "loss": 0.6132, "step": 15158 }, { "epoch": 0.1812431999426105, "grad_norm": 3.017578363418579, "learning_rate": 9.412196305490833e-06, "loss": 0.5054, "step": 15159 }, { "epoch": 0.18125515608747114, "grad_norm": 5.0966386795043945, "learning_rate": 9.41210521958816e-06, "loss": 0.6394, "step": 15160 }, { "epoch": 0.1812671122323318, "grad_norm": 2.0349652767181396, "learning_rate": 9.412014127069515e-06, "loss": 0.6038, "step": 15161 }, { "epoch": 0.18127906837719246, "grad_norm": 1.7394253015518188, "learning_rate": 9.411923027935042e-06, "loss": 0.5161, "step": 15162 }, { "epoch": 0.18129102452205312, "grad_norm": 4.594753265380859, "learning_rate": 9.411831922184875e-06, "loss": 0.5909, "step": 15163 }, { "epoch": 0.18130298066691375, "grad_norm": 1.71045982837677, "learning_rate": 9.411740809819149e-06, "loss": 0.8185, "step": 15164 }, { "epoch": 0.1813149368117744, "grad_norm": 2.981658458709717, "learning_rate": 9.411649690838002e-06, "loss": 0.5915, "step": 15165 }, { "epoch": 0.18132689295663507, "grad_norm": 3.218590259552002, "learning_rate": 9.411558565241572e-06, "loss": 0.7378, "step": 15166 }, { "epoch": 0.18133884910149572, "grad_norm": 2.8585734367370605, "learning_rate": 9.411467433029993e-06, "loss": 0.5875, "step": 15167 }, { "epoch": 0.18135080524635636, "grad_norm": 2.0127413272857666, "learning_rate": 9.411376294203403e-06, "loss": 0.7003, "step": 15168 }, { "epoch": 0.18136276139121701, "grad_norm": 2.9543066024780273, "learning_rate": 9.411285148761938e-06, "loss": 0.6157, "step": 15169 }, { "epoch": 0.18137471753607767, "grad_norm": 14.2929048538208, "learning_rate": 9.411193996705736e-06, "loss": 0.6583, "step": 15170 }, { "epoch": 0.18138667368093833, "grad_norm": 2.1871585845947266, "learning_rate": 9.411102838034934e-06, "loss": 0.5984, "step": 15171 }, { "epoch": 0.18139862982579896, "grad_norm": 2.547356605529785, "learning_rate": 9.411011672749666e-06, "loss": 0.5646, "step": 15172 }, { "epoch": 0.18141058597065962, "grad_norm": 2.1840860843658447, "learning_rate": 9.410920500850071e-06, "loss": 0.6387, "step": 15173 }, { "epoch": 0.18142254211552028, "grad_norm": 3.4430599212646484, "learning_rate": 9.410829322336285e-06, "loss": 0.6081, "step": 15174 }, { "epoch": 0.1814344982603809, "grad_norm": 3.7293403148651123, "learning_rate": 9.410738137208445e-06, "loss": 0.5937, "step": 15175 }, { "epoch": 0.18144645440524157, "grad_norm": 4.3191633224487305, "learning_rate": 9.410646945466685e-06, "loss": 0.5549, "step": 15176 }, { "epoch": 0.18145841055010223, "grad_norm": 2.2332258224487305, "learning_rate": 9.410555747111148e-06, "loss": 0.5271, "step": 15177 }, { "epoch": 0.1814703666949629, "grad_norm": 2.1692731380462646, "learning_rate": 9.410464542141966e-06, "loss": 0.6274, "step": 15178 }, { "epoch": 0.18148232283982352, "grad_norm": 1.8780158758163452, "learning_rate": 9.410373330559277e-06, "loss": 0.6739, "step": 15179 }, { "epoch": 0.18149427898468418, "grad_norm": 3.527937650680542, "learning_rate": 9.410282112363216e-06, "loss": 0.7457, "step": 15180 }, { "epoch": 0.18150623512954484, "grad_norm": 2.5471508502960205, "learning_rate": 9.41019088755392e-06, "loss": 0.7074, "step": 15181 }, { "epoch": 0.1815181912744055, "grad_norm": 2.330683946609497, "learning_rate": 9.41009965613153e-06, "loss": 0.5428, "step": 15182 }, { "epoch": 0.18153014741926612, "grad_norm": 2.492263078689575, "learning_rate": 9.41000841809618e-06, "loss": 0.5346, "step": 15183 }, { "epoch": 0.18154210356412678, "grad_norm": 2.8039164543151855, "learning_rate": 9.409917173448005e-06, "loss": 0.593, "step": 15184 }, { "epoch": 0.18155405970898744, "grad_norm": 2.792433977127075, "learning_rate": 9.409825922187144e-06, "loss": 0.6487, "step": 15185 }, { "epoch": 0.18156601585384807, "grad_norm": 6.220183372497559, "learning_rate": 9.409734664313733e-06, "loss": 0.5923, "step": 15186 }, { "epoch": 0.18157797199870873, "grad_norm": 4.460604667663574, "learning_rate": 9.40964339982791e-06, "loss": 0.6499, "step": 15187 }, { "epoch": 0.1815899281435694, "grad_norm": 3.3706963062286377, "learning_rate": 9.40955212872981e-06, "loss": 0.6482, "step": 15188 }, { "epoch": 0.18160188428843005, "grad_norm": 3.1852951049804688, "learning_rate": 9.409460851019571e-06, "loss": 0.6026, "step": 15189 }, { "epoch": 0.18161384043329068, "grad_norm": 3.5196943283081055, "learning_rate": 9.409369566697331e-06, "loss": 0.618, "step": 15190 }, { "epoch": 0.18162579657815134, "grad_norm": 1.6633988618850708, "learning_rate": 9.409278275763226e-06, "loss": 0.5803, "step": 15191 }, { "epoch": 0.181637752723012, "grad_norm": 2.3088722229003906, "learning_rate": 9.40918697821739e-06, "loss": 0.601, "step": 15192 }, { "epoch": 0.18164970886787266, "grad_norm": 3.483506202697754, "learning_rate": 9.409095674059965e-06, "loss": 0.583, "step": 15193 }, { "epoch": 0.1816616650127333, "grad_norm": 2.2323572635650635, "learning_rate": 9.409004363291086e-06, "loss": 0.5509, "step": 15194 }, { "epoch": 0.18167362115759395, "grad_norm": 4.378592014312744, "learning_rate": 9.408913045910886e-06, "loss": 0.561, "step": 15195 }, { "epoch": 0.1816855773024546, "grad_norm": 2.286529779434204, "learning_rate": 9.408821721919508e-06, "loss": 0.6662, "step": 15196 }, { "epoch": 0.18169753344731523, "grad_norm": 3.5920467376708984, "learning_rate": 9.408730391317084e-06, "loss": 0.6731, "step": 15197 }, { "epoch": 0.1817094895921759, "grad_norm": 3.0868453979492188, "learning_rate": 9.408639054103754e-06, "loss": 0.553, "step": 15198 }, { "epoch": 0.18172144573703655, "grad_norm": 5.2980804443359375, "learning_rate": 9.408547710279656e-06, "loss": 0.5838, "step": 15199 }, { "epoch": 0.1817334018818972, "grad_norm": 3.1764187812805176, "learning_rate": 9.408456359844922e-06, "loss": 0.645, "step": 15200 }, { "epoch": 0.18174535802675784, "grad_norm": 2.449378490447998, "learning_rate": 9.408365002799693e-06, "loss": 0.5848, "step": 15201 }, { "epoch": 0.1817573141716185, "grad_norm": 3.192964792251587, "learning_rate": 9.408273639144105e-06, "loss": 0.6762, "step": 15202 }, { "epoch": 0.18176927031647916, "grad_norm": 2.351504325866699, "learning_rate": 9.408182268878297e-06, "loss": 0.6809, "step": 15203 }, { "epoch": 0.18178122646133982, "grad_norm": 2.83103609085083, "learning_rate": 9.408090892002402e-06, "loss": 0.632, "step": 15204 }, { "epoch": 0.18179318260620045, "grad_norm": 2.150775671005249, "learning_rate": 9.40799950851656e-06, "loss": 0.5542, "step": 15205 }, { "epoch": 0.1818051387510611, "grad_norm": 2.9542107582092285, "learning_rate": 9.407908118420905e-06, "loss": 0.6064, "step": 15206 }, { "epoch": 0.18181709489592177, "grad_norm": 2.8081305027008057, "learning_rate": 9.407816721715578e-06, "loss": 0.6017, "step": 15207 }, { "epoch": 0.1818290510407824, "grad_norm": 1.860074520111084, "learning_rate": 9.407725318400713e-06, "loss": 0.5635, "step": 15208 }, { "epoch": 0.18184100718564306, "grad_norm": 1.8506139516830444, "learning_rate": 9.40763390847645e-06, "loss": 0.6689, "step": 15209 }, { "epoch": 0.18185296333050371, "grad_norm": 1.7951326370239258, "learning_rate": 9.407542491942923e-06, "loss": 0.728, "step": 15210 }, { "epoch": 0.18186491947536437, "grad_norm": 4.666656970977783, "learning_rate": 9.40745106880027e-06, "loss": 0.6618, "step": 15211 }, { "epoch": 0.181876875620225, "grad_norm": 1.7273757457733154, "learning_rate": 9.40735963904863e-06, "loss": 0.5772, "step": 15212 }, { "epoch": 0.18188883176508566, "grad_norm": 4.3312153816223145, "learning_rate": 9.407268202688138e-06, "loss": 0.7121, "step": 15213 }, { "epoch": 0.18190078790994632, "grad_norm": 2.104060173034668, "learning_rate": 9.40717675971893e-06, "loss": 0.594, "step": 15214 }, { "epoch": 0.18191274405480698, "grad_norm": 3.9135372638702393, "learning_rate": 9.407085310141147e-06, "loss": 0.6514, "step": 15215 }, { "epoch": 0.1819247001996676, "grad_norm": 2.7762973308563232, "learning_rate": 9.406993853954922e-06, "loss": 0.6952, "step": 15216 }, { "epoch": 0.18193665634452827, "grad_norm": 6.07488489151001, "learning_rate": 9.406902391160395e-06, "loss": 0.6123, "step": 15217 }, { "epoch": 0.18194861248938893, "grad_norm": 5.112077713012695, "learning_rate": 9.406810921757703e-06, "loss": 0.6034, "step": 15218 }, { "epoch": 0.18196056863424956, "grad_norm": 1.904690146446228, "learning_rate": 9.406719445746982e-06, "loss": 0.5795, "step": 15219 }, { "epoch": 0.18197252477911022, "grad_norm": 2.2095096111297607, "learning_rate": 9.406627963128368e-06, "loss": 0.6371, "step": 15220 }, { "epoch": 0.18198448092397088, "grad_norm": 8.102752685546875, "learning_rate": 9.406536473902002e-06, "loss": 0.5787, "step": 15221 }, { "epoch": 0.18199643706883153, "grad_norm": 2.084892749786377, "learning_rate": 9.406444978068016e-06, "loss": 0.6814, "step": 15222 }, { "epoch": 0.18200839321369217, "grad_norm": 2.9587817192077637, "learning_rate": 9.406353475626552e-06, "loss": 0.6687, "step": 15223 }, { "epoch": 0.18202034935855282, "grad_norm": 2.550786256790161, "learning_rate": 9.406261966577746e-06, "loss": 0.6925, "step": 15224 }, { "epoch": 0.18203230550341348, "grad_norm": 2.960160493850708, "learning_rate": 9.406170450921734e-06, "loss": 0.6783, "step": 15225 }, { "epoch": 0.18204426164827414, "grad_norm": 1.7196351289749146, "learning_rate": 9.406078928658652e-06, "loss": 0.5679, "step": 15226 }, { "epoch": 0.18205621779313477, "grad_norm": 2.9351112842559814, "learning_rate": 9.40598739978864e-06, "loss": 0.5589, "step": 15227 }, { "epoch": 0.18206817393799543, "grad_norm": 1.9790648221969604, "learning_rate": 9.405895864311835e-06, "loss": 0.6652, "step": 15228 }, { "epoch": 0.1820801300828561, "grad_norm": 2.5279791355133057, "learning_rate": 9.405804322228374e-06, "loss": 0.7123, "step": 15229 }, { "epoch": 0.18209208622771675, "grad_norm": 3.1994881629943848, "learning_rate": 9.405712773538394e-06, "loss": 0.5641, "step": 15230 }, { "epoch": 0.18210404237257738, "grad_norm": 3.221999168395996, "learning_rate": 9.40562121824203e-06, "loss": 0.6401, "step": 15231 }, { "epoch": 0.18211599851743804, "grad_norm": 2.5363576412200928, "learning_rate": 9.405529656339423e-06, "loss": 0.7059, "step": 15232 }, { "epoch": 0.1821279546622987, "grad_norm": 2.476004123687744, "learning_rate": 9.405438087830707e-06, "loss": 0.5546, "step": 15233 }, { "epoch": 0.18213991080715933, "grad_norm": 2.3471922874450684, "learning_rate": 9.405346512716021e-06, "loss": 0.6087, "step": 15234 }, { "epoch": 0.18215186695202, "grad_norm": 4.551801681518555, "learning_rate": 9.405254930995504e-06, "loss": 0.5662, "step": 15235 }, { "epoch": 0.18216382309688064, "grad_norm": 5.7491865158081055, "learning_rate": 9.40516334266929e-06, "loss": 0.6542, "step": 15236 }, { "epoch": 0.1821757792417413, "grad_norm": 4.00051736831665, "learning_rate": 9.40507174773752e-06, "loss": 0.5377, "step": 15237 }, { "epoch": 0.18218773538660193, "grad_norm": 13.335256576538086, "learning_rate": 9.404980146200327e-06, "loss": 0.6746, "step": 15238 }, { "epoch": 0.1821996915314626, "grad_norm": 1.9737077951431274, "learning_rate": 9.40488853805785e-06, "loss": 0.5368, "step": 15239 }, { "epoch": 0.18221164767632325, "grad_norm": 2.1815011501312256, "learning_rate": 9.404796923310228e-06, "loss": 0.6219, "step": 15240 }, { "epoch": 0.1822236038211839, "grad_norm": 7.080399990081787, "learning_rate": 9.404705301957598e-06, "loss": 0.6239, "step": 15241 }, { "epoch": 0.18223555996604454, "grad_norm": 4.453577041625977, "learning_rate": 9.404613674000097e-06, "loss": 0.6887, "step": 15242 }, { "epoch": 0.1822475161109052, "grad_norm": 2.511983633041382, "learning_rate": 9.404522039437862e-06, "loss": 0.6542, "step": 15243 }, { "epoch": 0.18225947225576586, "grad_norm": 2.2279882431030273, "learning_rate": 9.404430398271029e-06, "loss": 0.7027, "step": 15244 }, { "epoch": 0.1822714284006265, "grad_norm": 4.308931827545166, "learning_rate": 9.40433875049974e-06, "loss": 0.7318, "step": 15245 }, { "epoch": 0.18228338454548715, "grad_norm": 2.2811837196350098, "learning_rate": 9.404247096124125e-06, "loss": 0.6453, "step": 15246 }, { "epoch": 0.1822953406903478, "grad_norm": 2.287911891937256, "learning_rate": 9.40415543514433e-06, "loss": 0.6457, "step": 15247 }, { "epoch": 0.18230729683520847, "grad_norm": 2.2609333992004395, "learning_rate": 9.404063767560487e-06, "loss": 0.673, "step": 15248 }, { "epoch": 0.1823192529800691, "grad_norm": 2.013117790222168, "learning_rate": 9.403972093372733e-06, "loss": 0.6502, "step": 15249 }, { "epoch": 0.18233120912492976, "grad_norm": 1.4712928533554077, "learning_rate": 9.403880412581209e-06, "loss": 0.5872, "step": 15250 }, { "epoch": 0.1823431652697904, "grad_norm": 4.108395099639893, "learning_rate": 9.40378872518605e-06, "loss": 0.7269, "step": 15251 }, { "epoch": 0.18235512141465107, "grad_norm": 1.6368581056594849, "learning_rate": 9.403697031187394e-06, "loss": 0.6027, "step": 15252 }, { "epoch": 0.1823670775595117, "grad_norm": 1.948568344116211, "learning_rate": 9.40360533058538e-06, "loss": 0.5545, "step": 15253 }, { "epoch": 0.18237903370437236, "grad_norm": 12.600366592407227, "learning_rate": 9.403513623380143e-06, "loss": 0.6407, "step": 15254 }, { "epoch": 0.18239098984923302, "grad_norm": 2.3002381324768066, "learning_rate": 9.403421909571822e-06, "loss": 0.6924, "step": 15255 }, { "epoch": 0.18240294599409365, "grad_norm": 2.7428500652313232, "learning_rate": 9.403330189160554e-06, "loss": 0.6461, "step": 15256 }, { "epoch": 0.1824149021389543, "grad_norm": 2.1074962615966797, "learning_rate": 9.403238462146476e-06, "loss": 0.6629, "step": 15257 }, { "epoch": 0.18242685828381497, "grad_norm": 2.2833633422851562, "learning_rate": 9.403146728529727e-06, "loss": 0.725, "step": 15258 }, { "epoch": 0.18243881442867563, "grad_norm": 2.3257973194122314, "learning_rate": 9.403054988310442e-06, "loss": 0.6252, "step": 15259 }, { "epoch": 0.18245077057353626, "grad_norm": 2.7227258682250977, "learning_rate": 9.402963241488763e-06, "loss": 0.6113, "step": 15260 }, { "epoch": 0.18246272671839692, "grad_norm": 3.2248318195343018, "learning_rate": 9.402871488064824e-06, "loss": 0.6736, "step": 15261 }, { "epoch": 0.18247468286325758, "grad_norm": 2.4343650341033936, "learning_rate": 9.402779728038764e-06, "loss": 0.6638, "step": 15262 }, { "epoch": 0.18248663900811823, "grad_norm": 1.820450782775879, "learning_rate": 9.40268796141072e-06, "loss": 0.6039, "step": 15263 }, { "epoch": 0.18249859515297887, "grad_norm": 6.868619918823242, "learning_rate": 9.40259618818083e-06, "loss": 0.6729, "step": 15264 }, { "epoch": 0.18251055129783952, "grad_norm": 2.4246408939361572, "learning_rate": 9.402504408349232e-06, "loss": 0.6399, "step": 15265 }, { "epoch": 0.18252250744270018, "grad_norm": 2.084679365158081, "learning_rate": 9.40241262191606e-06, "loss": 0.6274, "step": 15266 }, { "epoch": 0.1825344635875608, "grad_norm": 2.414949655532837, "learning_rate": 9.402320828881458e-06, "loss": 0.6015, "step": 15267 }, { "epoch": 0.18254641973242147, "grad_norm": 3.8942627906799316, "learning_rate": 9.402229029245558e-06, "loss": 0.5827, "step": 15268 }, { "epoch": 0.18255837587728213, "grad_norm": 30.365554809570312, "learning_rate": 9.4021372230085e-06, "loss": 0.6322, "step": 15269 }, { "epoch": 0.1825703320221428, "grad_norm": 1.7730565071105957, "learning_rate": 9.402045410170423e-06, "loss": 0.6695, "step": 15270 }, { "epoch": 0.18258228816700342, "grad_norm": 1.8422045707702637, "learning_rate": 9.401953590731462e-06, "loss": 0.6287, "step": 15271 }, { "epoch": 0.18259424431186408, "grad_norm": 2.1995978355407715, "learning_rate": 9.401861764691757e-06, "loss": 0.7084, "step": 15272 }, { "epoch": 0.18260620045672474, "grad_norm": 2.9650042057037354, "learning_rate": 9.401769932051446e-06, "loss": 0.6369, "step": 15273 }, { "epoch": 0.1826181566015854, "grad_norm": 2.1138036251068115, "learning_rate": 9.401678092810662e-06, "loss": 0.6623, "step": 15274 }, { "epoch": 0.18263011274644603, "grad_norm": 1.7217013835906982, "learning_rate": 9.401586246969547e-06, "loss": 0.4635, "step": 15275 }, { "epoch": 0.18264206889130669, "grad_norm": 3.2665045261383057, "learning_rate": 9.40149439452824e-06, "loss": 0.6952, "step": 15276 }, { "epoch": 0.18265402503616734, "grad_norm": 4.738964080810547, "learning_rate": 9.401402535486874e-06, "loss": 0.6561, "step": 15277 }, { "epoch": 0.182665981181028, "grad_norm": 2.6288399696350098, "learning_rate": 9.401310669845591e-06, "loss": 0.5923, "step": 15278 }, { "epoch": 0.18267793732588863, "grad_norm": 2.5181734561920166, "learning_rate": 9.401218797604526e-06, "loss": 0.6234, "step": 15279 }, { "epoch": 0.1826898934707493, "grad_norm": 2.8469042778015137, "learning_rate": 9.401126918763817e-06, "loss": 0.5456, "step": 15280 }, { "epoch": 0.18270184961560995, "grad_norm": 12.508406639099121, "learning_rate": 9.401035033323604e-06, "loss": 0.5272, "step": 15281 }, { "epoch": 0.18271380576047058, "grad_norm": 1.765745997428894, "learning_rate": 9.400943141284023e-06, "loss": 0.6218, "step": 15282 }, { "epoch": 0.18272576190533124, "grad_norm": 6.729104518890381, "learning_rate": 9.400851242645211e-06, "loss": 0.5458, "step": 15283 }, { "epoch": 0.1827377180501919, "grad_norm": 3.56545352935791, "learning_rate": 9.400759337407309e-06, "loss": 0.6689, "step": 15284 }, { "epoch": 0.18274967419505256, "grad_norm": 5.272531986236572, "learning_rate": 9.400667425570452e-06, "loss": 0.59, "step": 15285 }, { "epoch": 0.1827616303399132, "grad_norm": 3.713793992996216, "learning_rate": 9.400575507134778e-06, "loss": 0.6437, "step": 15286 }, { "epoch": 0.18277358648477385, "grad_norm": 3.562969923019409, "learning_rate": 9.400483582100424e-06, "loss": 0.6813, "step": 15287 }, { "epoch": 0.1827855426296345, "grad_norm": 2.6927573680877686, "learning_rate": 9.400391650467532e-06, "loss": 0.7249, "step": 15288 }, { "epoch": 0.18279749877449517, "grad_norm": 3.027764320373535, "learning_rate": 9.400299712236235e-06, "loss": 0.5689, "step": 15289 }, { "epoch": 0.1828094549193558, "grad_norm": 1.6281064748764038, "learning_rate": 9.400207767406674e-06, "loss": 0.564, "step": 15290 }, { "epoch": 0.18282141106421645, "grad_norm": 1.9369139671325684, "learning_rate": 9.400115815978986e-06, "loss": 0.5879, "step": 15291 }, { "epoch": 0.1828333672090771, "grad_norm": 2.024824857711792, "learning_rate": 9.400023857953307e-06, "loss": 0.5287, "step": 15292 }, { "epoch": 0.18284532335393774, "grad_norm": 1.7591303586959839, "learning_rate": 9.399931893329779e-06, "loss": 0.6316, "step": 15293 }, { "epoch": 0.1828572794987984, "grad_norm": 4.654132843017578, "learning_rate": 9.399839922108536e-06, "loss": 0.658, "step": 15294 }, { "epoch": 0.18286923564365906, "grad_norm": 2.327357530593872, "learning_rate": 9.399747944289717e-06, "loss": 0.552, "step": 15295 }, { "epoch": 0.18288119178851972, "grad_norm": 3.2016539573669434, "learning_rate": 9.399655959873461e-06, "loss": 0.6034, "step": 15296 }, { "epoch": 0.18289314793338035, "grad_norm": 1.9879131317138672, "learning_rate": 9.399563968859906e-06, "loss": 0.6048, "step": 15297 }, { "epoch": 0.182905104078241, "grad_norm": 2.4676060676574707, "learning_rate": 9.399471971249188e-06, "loss": 0.7403, "step": 15298 }, { "epoch": 0.18291706022310167, "grad_norm": 2.919663429260254, "learning_rate": 9.399379967041447e-06, "loss": 0.6108, "step": 15299 }, { "epoch": 0.18292901636796233, "grad_norm": 2.079014778137207, "learning_rate": 9.39928795623682e-06, "loss": 0.6071, "step": 15300 }, { "epoch": 0.18294097251282296, "grad_norm": 2.2519333362579346, "learning_rate": 9.399195938835443e-06, "loss": 0.7035, "step": 15301 }, { "epoch": 0.18295292865768362, "grad_norm": 3.472468852996826, "learning_rate": 9.39910391483746e-06, "loss": 0.4906, "step": 15302 }, { "epoch": 0.18296488480254428, "grad_norm": 1.737709641456604, "learning_rate": 9.399011884243e-06, "loss": 0.5691, "step": 15303 }, { "epoch": 0.1829768409474049, "grad_norm": 3.953620433807373, "learning_rate": 9.39891984705221e-06, "loss": 0.6542, "step": 15304 }, { "epoch": 0.18298879709226556, "grad_norm": 2.1521599292755127, "learning_rate": 9.398827803265223e-06, "loss": 0.683, "step": 15305 }, { "epoch": 0.18300075323712622, "grad_norm": 3.6680567264556885, "learning_rate": 9.398735752882178e-06, "loss": 0.5584, "step": 15306 }, { "epoch": 0.18301270938198688, "grad_norm": 3.482970952987671, "learning_rate": 9.398643695903214e-06, "loss": 0.6155, "step": 15307 }, { "epoch": 0.1830246655268475, "grad_norm": 2.2661800384521484, "learning_rate": 9.398551632328468e-06, "loss": 0.5851, "step": 15308 }, { "epoch": 0.18303662167170817, "grad_norm": 3.0167391300201416, "learning_rate": 9.398459562158077e-06, "loss": 0.6564, "step": 15309 }, { "epoch": 0.18304857781656883, "grad_norm": 3.8860669136047363, "learning_rate": 9.39836748539218e-06, "loss": 0.6589, "step": 15310 }, { "epoch": 0.1830605339614295, "grad_norm": 3.760566234588623, "learning_rate": 9.398275402030916e-06, "loss": 0.5736, "step": 15311 }, { "epoch": 0.18307249010629012, "grad_norm": 8.252382278442383, "learning_rate": 9.398183312074423e-06, "loss": 0.5878, "step": 15312 }, { "epoch": 0.18308444625115078, "grad_norm": 1.8601993322372437, "learning_rate": 9.398091215522837e-06, "loss": 0.62, "step": 15313 }, { "epoch": 0.18309640239601144, "grad_norm": 6.838173866271973, "learning_rate": 9.3979991123763e-06, "loss": 0.598, "step": 15314 }, { "epoch": 0.18310835854087207, "grad_norm": 2.156855583190918, "learning_rate": 9.397907002634944e-06, "loss": 0.6903, "step": 15315 }, { "epoch": 0.18312031468573273, "grad_norm": 1.7244778871536255, "learning_rate": 9.397814886298915e-06, "loss": 0.63, "step": 15316 }, { "epoch": 0.18313227083059339, "grad_norm": 4.9113688468933105, "learning_rate": 9.397722763368344e-06, "loss": 0.6648, "step": 15317 }, { "epoch": 0.18314422697545404, "grad_norm": 5.828550338745117, "learning_rate": 9.397630633843374e-06, "loss": 0.5035, "step": 15318 }, { "epoch": 0.18315618312031468, "grad_norm": 4.80429744720459, "learning_rate": 9.39753849772414e-06, "loss": 0.6025, "step": 15319 }, { "epoch": 0.18316813926517533, "grad_norm": 20.402851104736328, "learning_rate": 9.397446355010782e-06, "loss": 0.6325, "step": 15320 }, { "epoch": 0.183180095410036, "grad_norm": 3.4280049800872803, "learning_rate": 9.397354205703437e-06, "loss": 0.5821, "step": 15321 }, { "epoch": 0.18319205155489665, "grad_norm": 2.4065213203430176, "learning_rate": 9.397262049802244e-06, "loss": 0.6923, "step": 15322 }, { "epoch": 0.18320400769975728, "grad_norm": 5.860293865203857, "learning_rate": 9.39716988730734e-06, "loss": 0.5594, "step": 15323 }, { "epoch": 0.18321596384461794, "grad_norm": 4.367349147796631, "learning_rate": 9.397077718218866e-06, "loss": 0.6134, "step": 15324 }, { "epoch": 0.1832279199894786, "grad_norm": 1.904376745223999, "learning_rate": 9.396985542536958e-06, "loss": 0.6726, "step": 15325 }, { "epoch": 0.18323987613433923, "grad_norm": 3.895552158355713, "learning_rate": 9.396893360261755e-06, "loss": 0.6989, "step": 15326 }, { "epoch": 0.1832518322791999, "grad_norm": 4.30360746383667, "learning_rate": 9.396801171393392e-06, "loss": 0.7801, "step": 15327 }, { "epoch": 0.18326378842406055, "grad_norm": 2.7717435359954834, "learning_rate": 9.396708975932012e-06, "loss": 0.6108, "step": 15328 }, { "epoch": 0.1832757445689212, "grad_norm": 8.617688179016113, "learning_rate": 9.396616773877752e-06, "loss": 0.6738, "step": 15329 }, { "epoch": 0.18328770071378184, "grad_norm": 2.6750872135162354, "learning_rate": 9.396524565230747e-06, "loss": 0.6553, "step": 15330 }, { "epoch": 0.1832996568586425, "grad_norm": 2.6527915000915527, "learning_rate": 9.39643234999114e-06, "loss": 0.6474, "step": 15331 }, { "epoch": 0.18331161300350315, "grad_norm": 3.191286087036133, "learning_rate": 9.396340128159066e-06, "loss": 0.6402, "step": 15332 }, { "epoch": 0.1833235691483638, "grad_norm": 4.30844783782959, "learning_rate": 9.396247899734665e-06, "loss": 0.5689, "step": 15333 }, { "epoch": 0.18333552529322444, "grad_norm": 1.8620026111602783, "learning_rate": 9.396155664718074e-06, "loss": 0.5627, "step": 15334 }, { "epoch": 0.1833474814380851, "grad_norm": 2.5222904682159424, "learning_rate": 9.396063423109432e-06, "loss": 0.5745, "step": 15335 }, { "epoch": 0.18335943758294576, "grad_norm": 3.9069244861602783, "learning_rate": 9.395971174908878e-06, "loss": 0.583, "step": 15336 }, { "epoch": 0.18337139372780642, "grad_norm": 2.1598803997039795, "learning_rate": 9.395878920116548e-06, "loss": 0.625, "step": 15337 }, { "epoch": 0.18338334987266705, "grad_norm": 2.3310799598693848, "learning_rate": 9.395786658732584e-06, "loss": 0.6179, "step": 15338 }, { "epoch": 0.1833953060175277, "grad_norm": 6.278921604156494, "learning_rate": 9.395694390757121e-06, "loss": 0.7077, "step": 15339 }, { "epoch": 0.18340726216238837, "grad_norm": 3.5483901500701904, "learning_rate": 9.395602116190299e-06, "loss": 0.659, "step": 15340 }, { "epoch": 0.183419218307249, "grad_norm": 3.601348638534546, "learning_rate": 9.395509835032257e-06, "loss": 0.67, "step": 15341 }, { "epoch": 0.18343117445210966, "grad_norm": 6.585572242736816, "learning_rate": 9.395417547283132e-06, "loss": 0.5769, "step": 15342 }, { "epoch": 0.18344313059697032, "grad_norm": 2.954251527786255, "learning_rate": 9.395325252943062e-06, "loss": 0.5938, "step": 15343 }, { "epoch": 0.18345508674183097, "grad_norm": 4.876500606536865, "learning_rate": 9.395232952012185e-06, "loss": 0.6119, "step": 15344 }, { "epoch": 0.1834670428866916, "grad_norm": 1.985480785369873, "learning_rate": 9.395140644490643e-06, "loss": 0.5739, "step": 15345 }, { "epoch": 0.18347899903155226, "grad_norm": 3.287355422973633, "learning_rate": 9.39504833037857e-06, "loss": 0.6649, "step": 15346 }, { "epoch": 0.18349095517641292, "grad_norm": 3.647590398788452, "learning_rate": 9.394956009676108e-06, "loss": 0.6707, "step": 15347 }, { "epoch": 0.18350291132127358, "grad_norm": 2.4654157161712646, "learning_rate": 9.394863682383392e-06, "loss": 0.5372, "step": 15348 }, { "epoch": 0.1835148674661342, "grad_norm": 3.3400580883026123, "learning_rate": 9.394771348500564e-06, "loss": 0.5601, "step": 15349 }, { "epoch": 0.18352682361099487, "grad_norm": 2.432190418243408, "learning_rate": 9.39467900802776e-06, "loss": 0.671, "step": 15350 }, { "epoch": 0.18353877975585553, "grad_norm": 3.3639273643493652, "learning_rate": 9.39458666096512e-06, "loss": 0.6777, "step": 15351 }, { "epoch": 0.18355073590071616, "grad_norm": 3.6389408111572266, "learning_rate": 9.39449430731278e-06, "loss": 0.5812, "step": 15352 }, { "epoch": 0.18356269204557682, "grad_norm": 2.5842642784118652, "learning_rate": 9.394401947070881e-06, "loss": 0.621, "step": 15353 }, { "epoch": 0.18357464819043748, "grad_norm": 8.062911987304688, "learning_rate": 9.394309580239562e-06, "loss": 0.593, "step": 15354 }, { "epoch": 0.18358660433529814, "grad_norm": 2.194999933242798, "learning_rate": 9.394217206818958e-06, "loss": 0.6007, "step": 15355 }, { "epoch": 0.18359856048015877, "grad_norm": 1.8511841297149658, "learning_rate": 9.394124826809212e-06, "loss": 0.53, "step": 15356 }, { "epoch": 0.18361051662501943, "grad_norm": 6.672228813171387, "learning_rate": 9.394032440210459e-06, "loss": 0.7068, "step": 15357 }, { "epoch": 0.18362247276988009, "grad_norm": 7.313584327697754, "learning_rate": 9.393940047022839e-06, "loss": 0.6448, "step": 15358 }, { "epoch": 0.18363442891474074, "grad_norm": 2.447389841079712, "learning_rate": 9.39384764724649e-06, "loss": 0.6704, "step": 15359 }, { "epoch": 0.18364638505960137, "grad_norm": 29.378536224365234, "learning_rate": 9.39375524088155e-06, "loss": 0.5834, "step": 15360 }, { "epoch": 0.18365834120446203, "grad_norm": 1.904609203338623, "learning_rate": 9.393662827928159e-06, "loss": 0.646, "step": 15361 }, { "epoch": 0.1836702973493227, "grad_norm": 5.068906307220459, "learning_rate": 9.393570408386455e-06, "loss": 0.563, "step": 15362 }, { "epoch": 0.18368225349418332, "grad_norm": 8.90829849243164, "learning_rate": 9.393477982256576e-06, "loss": 0.6388, "step": 15363 }, { "epoch": 0.18369420963904398, "grad_norm": 2.1276321411132812, "learning_rate": 9.393385549538664e-06, "loss": 0.6879, "step": 15364 }, { "epoch": 0.18370616578390464, "grad_norm": 5.673123359680176, "learning_rate": 9.39329311023285e-06, "loss": 0.5632, "step": 15365 }, { "epoch": 0.1837181219287653, "grad_norm": 7.848302364349365, "learning_rate": 9.39320066433928e-06, "loss": 0.6665, "step": 15366 }, { "epoch": 0.18373007807362593, "grad_norm": 29.497785568237305, "learning_rate": 9.39310821185809e-06, "loss": 0.6091, "step": 15367 }, { "epoch": 0.1837420342184866, "grad_norm": 2.3617444038391113, "learning_rate": 9.393015752789421e-06, "loss": 0.5462, "step": 15368 }, { "epoch": 0.18375399036334725, "grad_norm": 6.321507930755615, "learning_rate": 9.392923287133406e-06, "loss": 0.5628, "step": 15369 }, { "epoch": 0.1837659465082079, "grad_norm": 60.093143463134766, "learning_rate": 9.392830814890188e-06, "loss": 0.7174, "step": 15370 }, { "epoch": 0.18377790265306854, "grad_norm": 5.86238431930542, "learning_rate": 9.392738336059903e-06, "loss": 0.6699, "step": 15371 }, { "epoch": 0.1837898587979292, "grad_norm": 4.108979225158691, "learning_rate": 9.392645850642694e-06, "loss": 0.6268, "step": 15372 }, { "epoch": 0.18380181494278985, "grad_norm": 2.111339807510376, "learning_rate": 9.392553358638695e-06, "loss": 0.5864, "step": 15373 }, { "epoch": 0.18381377108765048, "grad_norm": 2.3660919666290283, "learning_rate": 9.392460860048046e-06, "loss": 0.6694, "step": 15374 }, { "epoch": 0.18382572723251114, "grad_norm": 3.994494915008545, "learning_rate": 9.392368354870888e-06, "loss": 0.7197, "step": 15375 }, { "epoch": 0.1838376833773718, "grad_norm": 4.35881233215332, "learning_rate": 9.392275843107358e-06, "loss": 0.63, "step": 15376 }, { "epoch": 0.18384963952223246, "grad_norm": 2.909281015396118, "learning_rate": 9.392183324757594e-06, "loss": 0.5938, "step": 15377 }, { "epoch": 0.1838615956670931, "grad_norm": 4.166842937469482, "learning_rate": 9.392090799821735e-06, "loss": 0.6399, "step": 15378 }, { "epoch": 0.18387355181195375, "grad_norm": 3.541134834289551, "learning_rate": 9.391998268299923e-06, "loss": 0.6202, "step": 15379 }, { "epoch": 0.1838855079568144, "grad_norm": 2.26633882522583, "learning_rate": 9.391905730192292e-06, "loss": 0.6254, "step": 15380 }, { "epoch": 0.18389746410167507, "grad_norm": 2.080677032470703, "learning_rate": 9.391813185498981e-06, "loss": 0.5387, "step": 15381 }, { "epoch": 0.1839094202465357, "grad_norm": 95.58967590332031, "learning_rate": 9.391720634220132e-06, "loss": 0.6034, "step": 15382 }, { "epoch": 0.18392137639139636, "grad_norm": 2.8779077529907227, "learning_rate": 9.391628076355883e-06, "loss": 0.6171, "step": 15383 }, { "epoch": 0.18393333253625702, "grad_norm": 2.5813493728637695, "learning_rate": 9.391535511906373e-06, "loss": 0.6755, "step": 15384 }, { "epoch": 0.18394528868111765, "grad_norm": 2.2510783672332764, "learning_rate": 9.391442940871737e-06, "loss": 0.5899, "step": 15385 }, { "epoch": 0.1839572448259783, "grad_norm": 3.4543538093566895, "learning_rate": 9.391350363252119e-06, "loss": 0.7131, "step": 15386 }, { "epoch": 0.18396920097083896, "grad_norm": 5.51521110534668, "learning_rate": 9.391257779047654e-06, "loss": 0.6563, "step": 15387 }, { "epoch": 0.18398115711569962, "grad_norm": 9.4759521484375, "learning_rate": 9.391165188258483e-06, "loss": 0.6157, "step": 15388 }, { "epoch": 0.18399311326056025, "grad_norm": 2.952301502227783, "learning_rate": 9.391072590884745e-06, "loss": 0.4694, "step": 15389 }, { "epoch": 0.1840050694054209, "grad_norm": 4.1889543533325195, "learning_rate": 9.390979986926577e-06, "loss": 0.6785, "step": 15390 }, { "epoch": 0.18401702555028157, "grad_norm": 2.7674293518066406, "learning_rate": 9.39088737638412e-06, "loss": 0.6741, "step": 15391 }, { "epoch": 0.18402898169514223, "grad_norm": 2.53016996383667, "learning_rate": 9.390794759257512e-06, "loss": 0.6648, "step": 15392 }, { "epoch": 0.18404093784000286, "grad_norm": 2.7250471115112305, "learning_rate": 9.39070213554689e-06, "loss": 0.6384, "step": 15393 }, { "epoch": 0.18405289398486352, "grad_norm": 2.7859158515930176, "learning_rate": 9.390609505252394e-06, "loss": 0.6024, "step": 15394 }, { "epoch": 0.18406485012972418, "grad_norm": 8.010283470153809, "learning_rate": 9.390516868374164e-06, "loss": 0.6472, "step": 15395 }, { "epoch": 0.18407680627458484, "grad_norm": 6.582740783691406, "learning_rate": 9.39042422491234e-06, "loss": 0.6026, "step": 15396 }, { "epoch": 0.18408876241944547, "grad_norm": 5.060561656951904, "learning_rate": 9.390331574867057e-06, "loss": 0.6278, "step": 15397 }, { "epoch": 0.18410071856430613, "grad_norm": 2.078002691268921, "learning_rate": 9.39023891823846e-06, "loss": 0.6299, "step": 15398 }, { "epoch": 0.18411267470916678, "grad_norm": 2.8957161903381348, "learning_rate": 9.39014625502668e-06, "loss": 0.6583, "step": 15399 }, { "epoch": 0.18412463085402742, "grad_norm": 4.50640869140625, "learning_rate": 9.390053585231861e-06, "loss": 0.7861, "step": 15400 }, { "epoch": 0.18413658699888807, "grad_norm": 4.864711761474609, "learning_rate": 9.389960908854144e-06, "loss": 0.5101, "step": 15401 }, { "epoch": 0.18414854314374873, "grad_norm": 5.0965166091918945, "learning_rate": 9.389868225893662e-06, "loss": 0.5351, "step": 15402 }, { "epoch": 0.1841604992886094, "grad_norm": 5.894058704376221, "learning_rate": 9.389775536350557e-06, "loss": 0.5578, "step": 15403 }, { "epoch": 0.18417245543347002, "grad_norm": 3.6809892654418945, "learning_rate": 9.389682840224969e-06, "loss": 0.5996, "step": 15404 }, { "epoch": 0.18418441157833068, "grad_norm": 3.3378500938415527, "learning_rate": 9.389590137517036e-06, "loss": 0.5756, "step": 15405 }, { "epoch": 0.18419636772319134, "grad_norm": 3.0726916790008545, "learning_rate": 9.389497428226897e-06, "loss": 0.6057, "step": 15406 }, { "epoch": 0.184208323868052, "grad_norm": 4.682403087615967, "learning_rate": 9.389404712354691e-06, "loss": 0.5568, "step": 15407 }, { "epoch": 0.18422028001291263, "grad_norm": 2.1755599975585938, "learning_rate": 9.389311989900556e-06, "loss": 0.5652, "step": 15408 }, { "epoch": 0.1842322361577733, "grad_norm": 3.0024821758270264, "learning_rate": 9.389219260864633e-06, "loss": 0.6699, "step": 15409 }, { "epoch": 0.18424419230263395, "grad_norm": 7.777787685394287, "learning_rate": 9.38912652524706e-06, "loss": 0.5938, "step": 15410 }, { "epoch": 0.18425614844749458, "grad_norm": 1.755460262298584, "learning_rate": 9.389033783047975e-06, "loss": 0.5795, "step": 15411 }, { "epoch": 0.18426810459235524, "grad_norm": 7.680171966552734, "learning_rate": 9.38894103426752e-06, "loss": 0.5795, "step": 15412 }, { "epoch": 0.1842800607372159, "grad_norm": 4.971930027008057, "learning_rate": 9.38884827890583e-06, "loss": 0.6813, "step": 15413 }, { "epoch": 0.18429201688207655, "grad_norm": 2.6360793113708496, "learning_rate": 9.38875551696305e-06, "loss": 0.688, "step": 15414 }, { "epoch": 0.18430397302693718, "grad_norm": 6.245013236999512, "learning_rate": 9.388662748439311e-06, "loss": 0.6471, "step": 15415 }, { "epoch": 0.18431592917179784, "grad_norm": 2.937208414077759, "learning_rate": 9.38856997333476e-06, "loss": 0.6098, "step": 15416 }, { "epoch": 0.1843278853166585, "grad_norm": 2.793168783187866, "learning_rate": 9.388477191649533e-06, "loss": 0.6777, "step": 15417 }, { "epoch": 0.18433984146151916, "grad_norm": 2.879065990447998, "learning_rate": 9.388384403383769e-06, "loss": 0.6201, "step": 15418 }, { "epoch": 0.1843517976063798, "grad_norm": 4.318058013916016, "learning_rate": 9.388291608537604e-06, "loss": 0.7052, "step": 15419 }, { "epoch": 0.18436375375124045, "grad_norm": 2.8978078365325928, "learning_rate": 9.388198807111182e-06, "loss": 0.6445, "step": 15420 }, { "epoch": 0.1843757098961011, "grad_norm": 3.842902421951294, "learning_rate": 9.38810599910464e-06, "loss": 0.5968, "step": 15421 }, { "epoch": 0.18438766604096174, "grad_norm": 2.279572010040283, "learning_rate": 9.388013184518117e-06, "loss": 0.6421, "step": 15422 }, { "epoch": 0.1843996221858224, "grad_norm": 1.8882793188095093, "learning_rate": 9.387920363351754e-06, "loss": 0.5773, "step": 15423 }, { "epoch": 0.18441157833068306, "grad_norm": 2.027773380279541, "learning_rate": 9.387827535605688e-06, "loss": 0.6096, "step": 15424 }, { "epoch": 0.18442353447554372, "grad_norm": 2.583843469619751, "learning_rate": 9.387734701280059e-06, "loss": 0.639, "step": 15425 }, { "epoch": 0.18443549062040435, "grad_norm": 3.933643102645874, "learning_rate": 9.387641860375006e-06, "loss": 0.6189, "step": 15426 }, { "epoch": 0.184447446765265, "grad_norm": 1.8284838199615479, "learning_rate": 9.38754901289067e-06, "loss": 0.6663, "step": 15427 }, { "epoch": 0.18445940291012566, "grad_norm": 5.6891937255859375, "learning_rate": 9.387456158827186e-06, "loss": 0.5611, "step": 15428 }, { "epoch": 0.18447135905498632, "grad_norm": 2.4112045764923096, "learning_rate": 9.387363298184698e-06, "loss": 0.6375, "step": 15429 }, { "epoch": 0.18448331519984695, "grad_norm": 2.366914749145508, "learning_rate": 9.387270430963342e-06, "loss": 0.7085, "step": 15430 }, { "epoch": 0.1844952713447076, "grad_norm": 2.965627908706665, "learning_rate": 9.387177557163259e-06, "loss": 0.5639, "step": 15431 }, { "epoch": 0.18450722748956827, "grad_norm": 9.682790756225586, "learning_rate": 9.387084676784586e-06, "loss": 0.5711, "step": 15432 }, { "epoch": 0.1845191836344289, "grad_norm": 6.253779411315918, "learning_rate": 9.386991789827466e-06, "loss": 0.5276, "step": 15433 }, { "epoch": 0.18453113977928956, "grad_norm": 1.8090111017227173, "learning_rate": 9.386898896292036e-06, "loss": 0.6125, "step": 15434 }, { "epoch": 0.18454309592415022, "grad_norm": 3.1912384033203125, "learning_rate": 9.386805996178436e-06, "loss": 0.6098, "step": 15435 }, { "epoch": 0.18455505206901088, "grad_norm": 2.2810003757476807, "learning_rate": 9.386713089486803e-06, "loss": 0.6344, "step": 15436 }, { "epoch": 0.1845670082138715, "grad_norm": 3.686549425125122, "learning_rate": 9.386620176217279e-06, "loss": 0.6376, "step": 15437 }, { "epoch": 0.18457896435873217, "grad_norm": 4.254916191101074, "learning_rate": 9.386527256370003e-06, "loss": 0.5671, "step": 15438 }, { "epoch": 0.18459092050359283, "grad_norm": 2.2548885345458984, "learning_rate": 9.386434329945113e-06, "loss": 0.6292, "step": 15439 }, { "epoch": 0.18460287664845348, "grad_norm": 2.2637126445770264, "learning_rate": 9.38634139694275e-06, "loss": 0.684, "step": 15440 }, { "epoch": 0.18461483279331412, "grad_norm": 5.001010417938232, "learning_rate": 9.386248457363053e-06, "loss": 0.5864, "step": 15441 }, { "epoch": 0.18462678893817477, "grad_norm": 2.4599506855010986, "learning_rate": 9.38615551120616e-06, "loss": 0.671, "step": 15442 }, { "epoch": 0.18463874508303543, "grad_norm": 2.775883913040161, "learning_rate": 9.386062558472208e-06, "loss": 0.5947, "step": 15443 }, { "epoch": 0.18465070122789606, "grad_norm": 1.7373477220535278, "learning_rate": 9.385969599161344e-06, "loss": 0.604, "step": 15444 }, { "epoch": 0.18466265737275672, "grad_norm": 4.315790176391602, "learning_rate": 9.3858766332737e-06, "loss": 0.5798, "step": 15445 }, { "epoch": 0.18467461351761738, "grad_norm": 2.466952085494995, "learning_rate": 9.385783660809421e-06, "loss": 0.5823, "step": 15446 }, { "epoch": 0.18468656966247804, "grad_norm": 13.928038597106934, "learning_rate": 9.385690681768642e-06, "loss": 0.6155, "step": 15447 }, { "epoch": 0.18469852580733867, "grad_norm": 3.5901801586151123, "learning_rate": 9.385597696151506e-06, "loss": 0.4828, "step": 15448 }, { "epoch": 0.18471048195219933, "grad_norm": 2.925410032272339, "learning_rate": 9.38550470395815e-06, "loss": 0.5467, "step": 15449 }, { "epoch": 0.18472243809706, "grad_norm": 3.1667428016662598, "learning_rate": 9.385411705188713e-06, "loss": 0.5438, "step": 15450 }, { "epoch": 0.18473439424192065, "grad_norm": 11.147555351257324, "learning_rate": 9.385318699843336e-06, "loss": 0.5706, "step": 15451 }, { "epoch": 0.18474635038678128, "grad_norm": 1.9694594144821167, "learning_rate": 9.38522568792216e-06, "loss": 0.6369, "step": 15452 }, { "epoch": 0.18475830653164194, "grad_norm": 1.9023996591567993, "learning_rate": 9.38513266942532e-06, "loss": 0.7049, "step": 15453 }, { "epoch": 0.1847702626765026, "grad_norm": 2.7098729610443115, "learning_rate": 9.38503964435296e-06, "loss": 0.564, "step": 15454 }, { "epoch": 0.18478221882136325, "grad_norm": 2.366837739944458, "learning_rate": 9.384946612705214e-06, "loss": 0.5915, "step": 15455 }, { "epoch": 0.18479417496622388, "grad_norm": 5.523597240447998, "learning_rate": 9.38485357448223e-06, "loss": 0.6759, "step": 15456 }, { "epoch": 0.18480613111108454, "grad_norm": 3.136174440383911, "learning_rate": 9.384760529684138e-06, "loss": 0.704, "step": 15457 }, { "epoch": 0.1848180872559452, "grad_norm": 3.079946756362915, "learning_rate": 9.384667478311084e-06, "loss": 0.5627, "step": 15458 }, { "epoch": 0.18483004340080583, "grad_norm": 3.7064502239227295, "learning_rate": 9.384574420363204e-06, "loss": 0.7108, "step": 15459 }, { "epoch": 0.1848419995456665, "grad_norm": 2.5202715396881104, "learning_rate": 9.384481355840643e-06, "loss": 0.6472, "step": 15460 }, { "epoch": 0.18485395569052715, "grad_norm": 12.930196762084961, "learning_rate": 9.384388284743534e-06, "loss": 0.5905, "step": 15461 }, { "epoch": 0.1848659118353878, "grad_norm": 5.807461738586426, "learning_rate": 9.384295207072019e-06, "loss": 0.6557, "step": 15462 }, { "epoch": 0.18487786798024844, "grad_norm": 2.473647117614746, "learning_rate": 9.384202122826236e-06, "loss": 0.577, "step": 15463 }, { "epoch": 0.1848898241251091, "grad_norm": 2.4382288455963135, "learning_rate": 9.38410903200633e-06, "loss": 0.5775, "step": 15464 }, { "epoch": 0.18490178026996976, "grad_norm": 3.8301613330841064, "learning_rate": 9.384015934612433e-06, "loss": 0.6087, "step": 15465 }, { "epoch": 0.18491373641483042, "grad_norm": 4.4410223960876465, "learning_rate": 9.383922830644691e-06, "loss": 0.609, "step": 15466 }, { "epoch": 0.18492569255969105, "grad_norm": 3.0743582248687744, "learning_rate": 9.383829720103242e-06, "loss": 0.6624, "step": 15467 }, { "epoch": 0.1849376487045517, "grad_norm": 2.9614098072052, "learning_rate": 9.383736602988223e-06, "loss": 0.5921, "step": 15468 }, { "epoch": 0.18494960484941236, "grad_norm": 4.58344841003418, "learning_rate": 9.383643479299775e-06, "loss": 0.6504, "step": 15469 }, { "epoch": 0.184961560994273, "grad_norm": 2.7848587036132812, "learning_rate": 9.38355034903804e-06, "loss": 0.594, "step": 15470 }, { "epoch": 0.18497351713913365, "grad_norm": 3.6870882511138916, "learning_rate": 9.383457212203156e-06, "loss": 0.6253, "step": 15471 }, { "epoch": 0.1849854732839943, "grad_norm": 1.599510908126831, "learning_rate": 9.38336406879526e-06, "loss": 0.6329, "step": 15472 }, { "epoch": 0.18499742942885497, "grad_norm": 2.94574236869812, "learning_rate": 9.383270918814495e-06, "loss": 0.5205, "step": 15473 }, { "epoch": 0.1850093855737156, "grad_norm": 3.1970529556274414, "learning_rate": 9.383177762261e-06, "loss": 0.6195, "step": 15474 }, { "epoch": 0.18502134171857626, "grad_norm": 1.8434383869171143, "learning_rate": 9.383084599134913e-06, "loss": 0.6203, "step": 15475 }, { "epoch": 0.18503329786343692, "grad_norm": 2.779256582260132, "learning_rate": 9.382991429436377e-06, "loss": 0.5817, "step": 15476 }, { "epoch": 0.18504525400829758, "grad_norm": 2.030812978744507, "learning_rate": 9.38289825316553e-06, "loss": 0.6219, "step": 15477 }, { "epoch": 0.1850572101531582, "grad_norm": 2.1713340282440186, "learning_rate": 9.38280507032251e-06, "loss": 0.5461, "step": 15478 }, { "epoch": 0.18506916629801887, "grad_norm": 6.04063081741333, "learning_rate": 9.382711880907458e-06, "loss": 0.5849, "step": 15479 }, { "epoch": 0.18508112244287953, "grad_norm": 2.764148235321045, "learning_rate": 9.382618684920514e-06, "loss": 0.7401, "step": 15480 }, { "epoch": 0.18509307858774016, "grad_norm": 3.1331145763397217, "learning_rate": 9.382525482361818e-06, "loss": 0.5902, "step": 15481 }, { "epoch": 0.18510503473260081, "grad_norm": 3.977163314819336, "learning_rate": 9.38243227323151e-06, "loss": 0.7067, "step": 15482 }, { "epoch": 0.18511699087746147, "grad_norm": 4.33912467956543, "learning_rate": 9.382339057529727e-06, "loss": 0.6316, "step": 15483 }, { "epoch": 0.18512894702232213, "grad_norm": 3.5677237510681152, "learning_rate": 9.382245835256614e-06, "loss": 0.4941, "step": 15484 }, { "epoch": 0.18514090316718276, "grad_norm": 5.398059368133545, "learning_rate": 9.382152606412307e-06, "loss": 0.5543, "step": 15485 }, { "epoch": 0.18515285931204342, "grad_norm": 1.9912288188934326, "learning_rate": 9.382059370996947e-06, "loss": 0.612, "step": 15486 }, { "epoch": 0.18516481545690408, "grad_norm": 3.0764429569244385, "learning_rate": 9.381966129010673e-06, "loss": 0.5489, "step": 15487 }, { "epoch": 0.18517677160176474, "grad_norm": 2.5560731887817383, "learning_rate": 9.381872880453624e-06, "loss": 0.5714, "step": 15488 }, { "epoch": 0.18518872774662537, "grad_norm": 1.9060792922973633, "learning_rate": 9.381779625325942e-06, "loss": 0.6347, "step": 15489 }, { "epoch": 0.18520068389148603, "grad_norm": 25.597673416137695, "learning_rate": 9.381686363627767e-06, "loss": 0.6217, "step": 15490 }, { "epoch": 0.1852126400363467, "grad_norm": 2.1563589572906494, "learning_rate": 9.381593095359236e-06, "loss": 0.5813, "step": 15491 }, { "epoch": 0.18522459618120732, "grad_norm": 2.759032964706421, "learning_rate": 9.381499820520491e-06, "loss": 0.6144, "step": 15492 }, { "epoch": 0.18523655232606798, "grad_norm": 2.3426408767700195, "learning_rate": 9.381406539111671e-06, "loss": 0.6173, "step": 15493 }, { "epoch": 0.18524850847092864, "grad_norm": 1.923499584197998, "learning_rate": 9.381313251132917e-06, "loss": 0.5126, "step": 15494 }, { "epoch": 0.1852604646157893, "grad_norm": 2.894383668899536, "learning_rate": 9.381219956584368e-06, "loss": 0.5983, "step": 15495 }, { "epoch": 0.18527242076064993, "grad_norm": 2.3338308334350586, "learning_rate": 9.381126655466163e-06, "loss": 0.5599, "step": 15496 }, { "epoch": 0.18528437690551058, "grad_norm": 2.59954833984375, "learning_rate": 9.381033347778446e-06, "loss": 0.5404, "step": 15497 }, { "epoch": 0.18529633305037124, "grad_norm": 2.103055000305176, "learning_rate": 9.380940033521352e-06, "loss": 0.6358, "step": 15498 }, { "epoch": 0.1853082891952319, "grad_norm": 4.3238525390625, "learning_rate": 9.380846712695022e-06, "loss": 0.6796, "step": 15499 }, { "epoch": 0.18532024534009253, "grad_norm": 5.707480430603027, "learning_rate": 9.380753385299599e-06, "loss": 0.6675, "step": 15500 }, { "epoch": 0.1853322014849532, "grad_norm": 2.035903215408325, "learning_rate": 9.38066005133522e-06, "loss": 0.6661, "step": 15501 }, { "epoch": 0.18534415762981385, "grad_norm": 2.391859531402588, "learning_rate": 9.380566710802024e-06, "loss": 0.6203, "step": 15502 }, { "epoch": 0.18535611377467448, "grad_norm": 2.6629927158355713, "learning_rate": 9.380473363700154e-06, "loss": 0.612, "step": 15503 }, { "epoch": 0.18536806991953514, "grad_norm": 2.0375001430511475, "learning_rate": 9.380380010029749e-06, "loss": 0.6655, "step": 15504 }, { "epoch": 0.1853800260643958, "grad_norm": 9.807445526123047, "learning_rate": 9.380286649790947e-06, "loss": 0.6024, "step": 15505 }, { "epoch": 0.18539198220925646, "grad_norm": 4.274834156036377, "learning_rate": 9.380193282983892e-06, "loss": 0.6689, "step": 15506 }, { "epoch": 0.1854039383541171, "grad_norm": 2.29852557182312, "learning_rate": 9.38009990960872e-06, "loss": 0.6363, "step": 15507 }, { "epoch": 0.18541589449897775, "grad_norm": 2.847430944442749, "learning_rate": 9.380006529665573e-06, "loss": 0.5627, "step": 15508 }, { "epoch": 0.1854278506438384, "grad_norm": 2.617810010910034, "learning_rate": 9.379913143154589e-06, "loss": 0.5386, "step": 15509 }, { "epoch": 0.18543980678869906, "grad_norm": 2.0492422580718994, "learning_rate": 9.379819750075911e-06, "loss": 0.5733, "step": 15510 }, { "epoch": 0.1854517629335597, "grad_norm": 3.85263991355896, "learning_rate": 9.379726350429679e-06, "loss": 0.5694, "step": 15511 }, { "epoch": 0.18546371907842035, "grad_norm": 12.182677268981934, "learning_rate": 9.37963294421603e-06, "loss": 0.5101, "step": 15512 }, { "epoch": 0.185475675223281, "grad_norm": 5.547000408172607, "learning_rate": 9.379539531435107e-06, "loss": 0.6213, "step": 15513 }, { "epoch": 0.18548763136814167, "grad_norm": 2.449512481689453, "learning_rate": 9.379446112087048e-06, "loss": 0.5887, "step": 15514 }, { "epoch": 0.1854995875130023, "grad_norm": 2.4690253734588623, "learning_rate": 9.379352686171994e-06, "loss": 0.632, "step": 15515 }, { "epoch": 0.18551154365786296, "grad_norm": 2.1008460521698, "learning_rate": 9.379259253690086e-06, "loss": 0.5848, "step": 15516 }, { "epoch": 0.18552349980272362, "grad_norm": 2.0026254653930664, "learning_rate": 9.379165814641463e-06, "loss": 0.6466, "step": 15517 }, { "epoch": 0.18553545594758425, "grad_norm": 10.092519760131836, "learning_rate": 9.379072369026265e-06, "loss": 0.6201, "step": 15518 }, { "epoch": 0.1855474120924449, "grad_norm": 1.9328359365463257, "learning_rate": 9.378978916844633e-06, "loss": 0.7414, "step": 15519 }, { "epoch": 0.18555936823730557, "grad_norm": 2.1218209266662598, "learning_rate": 9.378885458096705e-06, "loss": 0.6203, "step": 15520 }, { "epoch": 0.18557132438216623, "grad_norm": 3.446636915206909, "learning_rate": 9.378791992782623e-06, "loss": 0.6282, "step": 15521 }, { "epoch": 0.18558328052702686, "grad_norm": 1.7959835529327393, "learning_rate": 9.378698520902528e-06, "loss": 0.6919, "step": 15522 }, { "epoch": 0.18559523667188751, "grad_norm": 4.125764846801758, "learning_rate": 9.378605042456558e-06, "loss": 0.6221, "step": 15523 }, { "epoch": 0.18560719281674817, "grad_norm": 5.290159702301025, "learning_rate": 9.378511557444856e-06, "loss": 0.5136, "step": 15524 }, { "epoch": 0.18561914896160883, "grad_norm": 2.3446946144104004, "learning_rate": 9.378418065867558e-06, "loss": 0.6245, "step": 15525 }, { "epoch": 0.18563110510646946, "grad_norm": 1.9648840427398682, "learning_rate": 9.378324567724807e-06, "loss": 0.5701, "step": 15526 }, { "epoch": 0.18564306125133012, "grad_norm": 3.322188377380371, "learning_rate": 9.378231063016743e-06, "loss": 0.6494, "step": 15527 }, { "epoch": 0.18565501739619078, "grad_norm": 2.601443290710449, "learning_rate": 9.378137551743506e-06, "loss": 0.726, "step": 15528 }, { "epoch": 0.1856669735410514, "grad_norm": 1.4266847372055054, "learning_rate": 9.378044033905237e-06, "loss": 0.5993, "step": 15529 }, { "epoch": 0.18567892968591207, "grad_norm": 2.844243049621582, "learning_rate": 9.377950509502077e-06, "loss": 0.5229, "step": 15530 }, { "epoch": 0.18569088583077273, "grad_norm": 3.06676983833313, "learning_rate": 9.377856978534162e-06, "loss": 0.6708, "step": 15531 }, { "epoch": 0.1857028419756334, "grad_norm": 2.590308666229248, "learning_rate": 9.377763441001635e-06, "loss": 0.5636, "step": 15532 }, { "epoch": 0.18571479812049402, "grad_norm": 3.175124168395996, "learning_rate": 9.377669896904635e-06, "loss": 0.6537, "step": 15533 }, { "epoch": 0.18572675426535468, "grad_norm": 3.7590301036834717, "learning_rate": 9.377576346243307e-06, "loss": 0.6539, "step": 15534 }, { "epoch": 0.18573871041021534, "grad_norm": 3.230938673019409, "learning_rate": 9.377482789017784e-06, "loss": 0.643, "step": 15535 }, { "epoch": 0.185750666555076, "grad_norm": 2.330810070037842, "learning_rate": 9.377389225228213e-06, "loss": 0.5493, "step": 15536 }, { "epoch": 0.18576262269993662, "grad_norm": 3.2076196670532227, "learning_rate": 9.37729565487473e-06, "loss": 0.5638, "step": 15537 }, { "epoch": 0.18577457884479728, "grad_norm": 1.9927873611450195, "learning_rate": 9.377202077957476e-06, "loss": 0.6802, "step": 15538 }, { "epoch": 0.18578653498965794, "grad_norm": 2.8614423274993896, "learning_rate": 9.377108494476592e-06, "loss": 0.6358, "step": 15539 }, { "epoch": 0.18579849113451857, "grad_norm": 2.668534278869629, "learning_rate": 9.37701490443222e-06, "loss": 0.6043, "step": 15540 }, { "epoch": 0.18581044727937923, "grad_norm": 2.1153371334075928, "learning_rate": 9.376921307824496e-06, "loss": 0.6449, "step": 15541 }, { "epoch": 0.1858224034242399, "grad_norm": 2.6528732776641846, "learning_rate": 9.376827704653565e-06, "loss": 0.6491, "step": 15542 }, { "epoch": 0.18583435956910055, "grad_norm": 2.466703414916992, "learning_rate": 9.376734094919563e-06, "loss": 0.5188, "step": 15543 }, { "epoch": 0.18584631571396118, "grad_norm": 5.771639823913574, "learning_rate": 9.376640478622637e-06, "loss": 0.6858, "step": 15544 }, { "epoch": 0.18585827185882184, "grad_norm": 2.168893337249756, "learning_rate": 9.37654685576292e-06, "loss": 0.746, "step": 15545 }, { "epoch": 0.1858702280036825, "grad_norm": 1.4602106809616089, "learning_rate": 9.376453226340553e-06, "loss": 0.5714, "step": 15546 }, { "epoch": 0.18588218414854316, "grad_norm": 2.330554485321045, "learning_rate": 9.376359590355684e-06, "loss": 0.676, "step": 15547 }, { "epoch": 0.1858941402934038, "grad_norm": 2.5841622352600098, "learning_rate": 9.376265947808444e-06, "loss": 0.6379, "step": 15548 }, { "epoch": 0.18590609643826445, "grad_norm": 2.2268779277801514, "learning_rate": 9.37617229869898e-06, "loss": 0.7392, "step": 15549 }, { "epoch": 0.1859180525831251, "grad_norm": 1.7435057163238525, "learning_rate": 9.37607864302743e-06, "loss": 0.6184, "step": 15550 }, { "epoch": 0.18593000872798573, "grad_norm": 5.241826057434082, "learning_rate": 9.375984980793935e-06, "loss": 0.5675, "step": 15551 }, { "epoch": 0.1859419648728464, "grad_norm": 2.2272136211395264, "learning_rate": 9.375891311998633e-06, "loss": 0.6717, "step": 15552 }, { "epoch": 0.18595392101770705, "grad_norm": 2.945857286453247, "learning_rate": 9.375797636641667e-06, "loss": 0.5923, "step": 15553 }, { "epoch": 0.1859658771625677, "grad_norm": 2.3878231048583984, "learning_rate": 9.375703954723177e-06, "loss": 0.6519, "step": 15554 }, { "epoch": 0.18597783330742834, "grad_norm": 3.1264171600341797, "learning_rate": 9.375610266243305e-06, "loss": 0.5028, "step": 15555 }, { "epoch": 0.185989789452289, "grad_norm": 2.8623242378234863, "learning_rate": 9.375516571202188e-06, "loss": 0.6525, "step": 15556 }, { "epoch": 0.18600174559714966, "grad_norm": 2.414856195449829, "learning_rate": 9.375422869599969e-06, "loss": 0.6528, "step": 15557 }, { "epoch": 0.18601370174201032, "grad_norm": 3.4467291831970215, "learning_rate": 9.375329161436788e-06, "loss": 0.657, "step": 15558 }, { "epoch": 0.18602565788687095, "grad_norm": 3.1178834438323975, "learning_rate": 9.375235446712785e-06, "loss": 0.6411, "step": 15559 }, { "epoch": 0.1860376140317316, "grad_norm": 4.154308319091797, "learning_rate": 9.375141725428101e-06, "loss": 0.6292, "step": 15560 }, { "epoch": 0.18604957017659227, "grad_norm": 2.777280807495117, "learning_rate": 9.375047997582875e-06, "loss": 0.6943, "step": 15561 }, { "epoch": 0.1860615263214529, "grad_norm": 2.2394027709960938, "learning_rate": 9.374954263177252e-06, "loss": 0.6153, "step": 15562 }, { "epoch": 0.18607348246631356, "grad_norm": 6.257021903991699, "learning_rate": 9.374860522211368e-06, "loss": 0.649, "step": 15563 }, { "epoch": 0.18608543861117421, "grad_norm": 2.125272274017334, "learning_rate": 9.374766774685364e-06, "loss": 0.5746, "step": 15564 }, { "epoch": 0.18609739475603487, "grad_norm": 2.649080753326416, "learning_rate": 9.374673020599384e-06, "loss": 0.6257, "step": 15565 }, { "epoch": 0.1861093509008955, "grad_norm": 3.2275071144104004, "learning_rate": 9.374579259953565e-06, "loss": 0.5374, "step": 15566 }, { "epoch": 0.18612130704575616, "grad_norm": 2.2339463233947754, "learning_rate": 9.37448549274805e-06, "loss": 0.5838, "step": 15567 }, { "epoch": 0.18613326319061682, "grad_norm": 2.8871898651123047, "learning_rate": 9.374391718982975e-06, "loss": 0.6475, "step": 15568 }, { "epoch": 0.18614521933547748, "grad_norm": 1.9469490051269531, "learning_rate": 9.374297938658486e-06, "loss": 0.601, "step": 15569 }, { "epoch": 0.1861571754803381, "grad_norm": 2.183992862701416, "learning_rate": 9.374204151774723e-06, "loss": 0.5483, "step": 15570 }, { "epoch": 0.18616913162519877, "grad_norm": 2.4938302040100098, "learning_rate": 9.374110358331825e-06, "loss": 0.6925, "step": 15571 }, { "epoch": 0.18618108777005943, "grad_norm": 2.4612841606140137, "learning_rate": 9.374016558329932e-06, "loss": 0.6199, "step": 15572 }, { "epoch": 0.1861930439149201, "grad_norm": 2.5155251026153564, "learning_rate": 9.373922751769185e-06, "loss": 0.6097, "step": 15573 }, { "epoch": 0.18620500005978072, "grad_norm": 5.2637858390808105, "learning_rate": 9.373828938649726e-06, "loss": 0.5622, "step": 15574 }, { "epoch": 0.18621695620464138, "grad_norm": 2.2276248931884766, "learning_rate": 9.373735118971697e-06, "loss": 0.6408, "step": 15575 }, { "epoch": 0.18622891234950203, "grad_norm": 1.9908154010772705, "learning_rate": 9.373641292735234e-06, "loss": 0.5721, "step": 15576 }, { "epoch": 0.18624086849436267, "grad_norm": 1.7870374917984009, "learning_rate": 9.373547459940482e-06, "loss": 0.5473, "step": 15577 }, { "epoch": 0.18625282463922332, "grad_norm": 2.26869535446167, "learning_rate": 9.373453620587578e-06, "loss": 0.575, "step": 15578 }, { "epoch": 0.18626478078408398, "grad_norm": 2.7653627395629883, "learning_rate": 9.373359774676667e-06, "loss": 0.7104, "step": 15579 }, { "epoch": 0.18627673692894464, "grad_norm": 2.6508965492248535, "learning_rate": 9.373265922207885e-06, "loss": 0.6719, "step": 15580 }, { "epoch": 0.18628869307380527, "grad_norm": 2.632927179336548, "learning_rate": 9.373172063181377e-06, "loss": 0.5389, "step": 15581 }, { "epoch": 0.18630064921866593, "grad_norm": 2.5845205783843994, "learning_rate": 9.373078197597282e-06, "loss": 0.6324, "step": 15582 }, { "epoch": 0.1863126053635266, "grad_norm": 3.609687089920044, "learning_rate": 9.372984325455742e-06, "loss": 0.6333, "step": 15583 }, { "epoch": 0.18632456150838725, "grad_norm": 2.4083006381988525, "learning_rate": 9.372890446756893e-06, "loss": 0.5894, "step": 15584 }, { "epoch": 0.18633651765324788, "grad_norm": 1.4941470623016357, "learning_rate": 9.37279656150088e-06, "loss": 0.5702, "step": 15585 }, { "epoch": 0.18634847379810854, "grad_norm": 2.1171627044677734, "learning_rate": 9.372702669687846e-06, "loss": 0.6442, "step": 15586 }, { "epoch": 0.1863604299429692, "grad_norm": 2.1011407375335693, "learning_rate": 9.372608771317927e-06, "loss": 0.5968, "step": 15587 }, { "epoch": 0.18637238608782983, "grad_norm": 2.6042022705078125, "learning_rate": 9.372514866391264e-06, "loss": 0.5727, "step": 15588 }, { "epoch": 0.1863843422326905, "grad_norm": 4.316081523895264, "learning_rate": 9.372420954908002e-06, "loss": 0.6846, "step": 15589 }, { "epoch": 0.18639629837755115, "grad_norm": 2.2411866188049316, "learning_rate": 9.372327036868277e-06, "loss": 0.6024, "step": 15590 }, { "epoch": 0.1864082545224118, "grad_norm": 2.071714401245117, "learning_rate": 9.372233112272233e-06, "loss": 0.6592, "step": 15591 }, { "epoch": 0.18642021066727243, "grad_norm": 6.9885454177856445, "learning_rate": 9.37213918112001e-06, "loss": 0.6188, "step": 15592 }, { "epoch": 0.1864321668121331, "grad_norm": 2.3929271697998047, "learning_rate": 9.372045243411749e-06, "loss": 0.5824, "step": 15593 }, { "epoch": 0.18644412295699375, "grad_norm": 13.251224517822266, "learning_rate": 9.37195129914759e-06, "loss": 0.6311, "step": 15594 }, { "epoch": 0.1864560791018544, "grad_norm": 2.4987800121307373, "learning_rate": 9.371857348327675e-06, "loss": 0.5575, "step": 15595 }, { "epoch": 0.18646803524671504, "grad_norm": 4.089285850524902, "learning_rate": 9.371763390952144e-06, "loss": 0.6944, "step": 15596 }, { "epoch": 0.1864799913915757, "grad_norm": 2.287656545639038, "learning_rate": 9.371669427021139e-06, "loss": 0.591, "step": 15597 }, { "epoch": 0.18649194753643636, "grad_norm": 2.867143392562866, "learning_rate": 9.3715754565348e-06, "loss": 0.6484, "step": 15598 }, { "epoch": 0.186503903681297, "grad_norm": 2.546704053878784, "learning_rate": 9.371481479493267e-06, "loss": 0.5975, "step": 15599 }, { "epoch": 0.18651585982615765, "grad_norm": 7.692903518676758, "learning_rate": 9.371387495896683e-06, "loss": 0.5534, "step": 15600 }, { "epoch": 0.1865278159710183, "grad_norm": 4.41759729385376, "learning_rate": 9.371293505745187e-06, "loss": 0.6319, "step": 15601 }, { "epoch": 0.18653977211587897, "grad_norm": 2.6313366889953613, "learning_rate": 9.371199509038924e-06, "loss": 0.7324, "step": 15602 }, { "epoch": 0.1865517282607396, "grad_norm": 1.7152010202407837, "learning_rate": 9.37110550577803e-06, "loss": 0.5811, "step": 15603 }, { "epoch": 0.18656368440560026, "grad_norm": 1.7664971351623535, "learning_rate": 9.371011495962645e-06, "loss": 0.6036, "step": 15604 }, { "epoch": 0.18657564055046091, "grad_norm": 4.323167324066162, "learning_rate": 9.370917479592915e-06, "loss": 0.5513, "step": 15605 }, { "epoch": 0.18658759669532157, "grad_norm": 2.0792558193206787, "learning_rate": 9.370823456668979e-06, "loss": 0.6507, "step": 15606 }, { "epoch": 0.1865995528401822, "grad_norm": 4.382866382598877, "learning_rate": 9.370729427190976e-06, "loss": 0.6554, "step": 15607 }, { "epoch": 0.18661150898504286, "grad_norm": 2.1619222164154053, "learning_rate": 9.370635391159051e-06, "loss": 0.6062, "step": 15608 }, { "epoch": 0.18662346512990352, "grad_norm": 3.39752197265625, "learning_rate": 9.370541348573341e-06, "loss": 0.5689, "step": 15609 }, { "epoch": 0.18663542127476415, "grad_norm": 1.987962007522583, "learning_rate": 9.370447299433988e-06, "loss": 0.5967, "step": 15610 }, { "epoch": 0.1866473774196248, "grad_norm": 5.347665786743164, "learning_rate": 9.370353243741136e-06, "loss": 0.5835, "step": 15611 }, { "epoch": 0.18665933356448547, "grad_norm": 2.371645212173462, "learning_rate": 9.370259181494922e-06, "loss": 0.694, "step": 15612 }, { "epoch": 0.18667128970934613, "grad_norm": 2.4304378032684326, "learning_rate": 9.37016511269549e-06, "loss": 0.5388, "step": 15613 }, { "epoch": 0.18668324585420676, "grad_norm": 1.592458963394165, "learning_rate": 9.37007103734298e-06, "loss": 0.5618, "step": 15614 }, { "epoch": 0.18669520199906742, "grad_norm": 2.3937008380889893, "learning_rate": 9.369976955437533e-06, "loss": 0.6605, "step": 15615 }, { "epoch": 0.18670715814392808, "grad_norm": 5.607359409332275, "learning_rate": 9.369882866979288e-06, "loss": 0.5168, "step": 15616 }, { "epoch": 0.18671911428878873, "grad_norm": 2.0867984294891357, "learning_rate": 9.369788771968391e-06, "loss": 0.6222, "step": 15617 }, { "epoch": 0.18673107043364937, "grad_norm": 2.060262441635132, "learning_rate": 9.369694670404978e-06, "loss": 0.6992, "step": 15618 }, { "epoch": 0.18674302657851002, "grad_norm": 2.7030136585235596, "learning_rate": 9.369600562289194e-06, "loss": 0.654, "step": 15619 }, { "epoch": 0.18675498272337068, "grad_norm": 9.89100456237793, "learning_rate": 9.369506447621177e-06, "loss": 0.595, "step": 15620 }, { "epoch": 0.1867669388682313, "grad_norm": 4.606594562530518, "learning_rate": 9.36941232640107e-06, "loss": 0.6702, "step": 15621 }, { "epoch": 0.18677889501309197, "grad_norm": 4.749296188354492, "learning_rate": 9.369318198629014e-06, "loss": 0.552, "step": 15622 }, { "epoch": 0.18679085115795263, "grad_norm": 3.223215103149414, "learning_rate": 9.36922406430515e-06, "loss": 0.572, "step": 15623 }, { "epoch": 0.1868028073028133, "grad_norm": 10.019559860229492, "learning_rate": 9.369129923429618e-06, "loss": 0.5588, "step": 15624 }, { "epoch": 0.18681476344767392, "grad_norm": 2.3085482120513916, "learning_rate": 9.36903577600256e-06, "loss": 0.5814, "step": 15625 }, { "epoch": 0.18682671959253458, "grad_norm": 2.4707915782928467, "learning_rate": 9.368941622024118e-06, "loss": 0.6825, "step": 15626 }, { "epoch": 0.18683867573739524, "grad_norm": 3.9989936351776123, "learning_rate": 9.368847461494432e-06, "loss": 0.5814, "step": 15627 }, { "epoch": 0.1868506318822559, "grad_norm": 2.1231884956359863, "learning_rate": 9.368753294413645e-06, "loss": 0.548, "step": 15628 }, { "epoch": 0.18686258802711653, "grad_norm": 4.243854999542236, "learning_rate": 9.368659120781896e-06, "loss": 0.7286, "step": 15629 }, { "epoch": 0.18687454417197719, "grad_norm": 2.2330753803253174, "learning_rate": 9.368564940599327e-06, "loss": 0.7599, "step": 15630 }, { "epoch": 0.18688650031683784, "grad_norm": 2.045945405960083, "learning_rate": 9.36847075386608e-06, "loss": 0.6896, "step": 15631 }, { "epoch": 0.1868984564616985, "grad_norm": 3.561499834060669, "learning_rate": 9.368376560582296e-06, "loss": 0.5855, "step": 15632 }, { "epoch": 0.18691041260655913, "grad_norm": 1.304433822631836, "learning_rate": 9.368282360748114e-06, "loss": 0.5543, "step": 15633 }, { "epoch": 0.1869223687514198, "grad_norm": 3.040372133255005, "learning_rate": 9.368188154363677e-06, "loss": 0.6805, "step": 15634 }, { "epoch": 0.18693432489628045, "grad_norm": 2.1985058784484863, "learning_rate": 9.368093941429128e-06, "loss": 0.5959, "step": 15635 }, { "epoch": 0.18694628104114108, "grad_norm": 7.016234397888184, "learning_rate": 9.367999721944607e-06, "loss": 0.6679, "step": 15636 }, { "epoch": 0.18695823718600174, "grad_norm": 5.653444290161133, "learning_rate": 9.367905495910253e-06, "loss": 0.6683, "step": 15637 }, { "epoch": 0.1869701933308624, "grad_norm": 3.4838945865631104, "learning_rate": 9.36781126332621e-06, "loss": 0.5856, "step": 15638 }, { "epoch": 0.18698214947572306, "grad_norm": 3.836914300918579, "learning_rate": 9.36771702419262e-06, "loss": 0.6694, "step": 15639 }, { "epoch": 0.1869941056205837, "grad_norm": 2.1342053413391113, "learning_rate": 9.36762277850962e-06, "loss": 0.6143, "step": 15640 }, { "epoch": 0.18700606176544435, "grad_norm": 2.504183769226074, "learning_rate": 9.367528526277357e-06, "loss": 0.5674, "step": 15641 }, { "epoch": 0.187018017910305, "grad_norm": 2.440119743347168, "learning_rate": 9.367434267495967e-06, "loss": 0.523, "step": 15642 }, { "epoch": 0.18702997405516567, "grad_norm": 1.9892655611038208, "learning_rate": 9.367340002165594e-06, "loss": 0.6219, "step": 15643 }, { "epoch": 0.1870419302000263, "grad_norm": 5.439912796020508, "learning_rate": 9.367245730286381e-06, "loss": 0.6176, "step": 15644 }, { "epoch": 0.18705388634488695, "grad_norm": 2.209763288497925, "learning_rate": 9.367151451858465e-06, "loss": 0.647, "step": 15645 }, { "epoch": 0.1870658424897476, "grad_norm": 2.891019821166992, "learning_rate": 9.367057166881992e-06, "loss": 0.6006, "step": 15646 }, { "epoch": 0.18707779863460824, "grad_norm": 5.739094257354736, "learning_rate": 9.3669628753571e-06, "loss": 0.5595, "step": 15647 }, { "epoch": 0.1870897547794689, "grad_norm": 2.5238139629364014, "learning_rate": 9.366868577283932e-06, "loss": 0.6326, "step": 15648 }, { "epoch": 0.18710171092432956, "grad_norm": 2.5526325702667236, "learning_rate": 9.36677427266263e-06, "loss": 0.5631, "step": 15649 }, { "epoch": 0.18711366706919022, "grad_norm": 1.8120335340499878, "learning_rate": 9.366679961493333e-06, "loss": 0.6162, "step": 15650 }, { "epoch": 0.18712562321405085, "grad_norm": 2.3983352184295654, "learning_rate": 9.366585643776185e-06, "loss": 0.6219, "step": 15651 }, { "epoch": 0.1871375793589115, "grad_norm": 3.372795820236206, "learning_rate": 9.366491319511325e-06, "loss": 0.6374, "step": 15652 }, { "epoch": 0.18714953550377217, "grad_norm": 1.8474252223968506, "learning_rate": 9.366396988698897e-06, "loss": 0.5139, "step": 15653 }, { "epoch": 0.18716149164863283, "grad_norm": 2.6725544929504395, "learning_rate": 9.36630265133904e-06, "loss": 0.5755, "step": 15654 }, { "epoch": 0.18717344779349346, "grad_norm": 1.8351296186447144, "learning_rate": 9.3662083074319e-06, "loss": 0.5569, "step": 15655 }, { "epoch": 0.18718540393835412, "grad_norm": 1.7958892583847046, "learning_rate": 9.366113956977611e-06, "loss": 0.6505, "step": 15656 }, { "epoch": 0.18719736008321478, "grad_norm": 2.6798095703125, "learning_rate": 9.36601959997632e-06, "loss": 0.7058, "step": 15657 }, { "epoch": 0.1872093162280754, "grad_norm": 2.4613595008850098, "learning_rate": 9.365925236428168e-06, "loss": 0.6654, "step": 15658 }, { "epoch": 0.18722127237293607, "grad_norm": 3.7573256492614746, "learning_rate": 9.365830866333294e-06, "loss": 0.5421, "step": 15659 }, { "epoch": 0.18723322851779672, "grad_norm": 3.9724972248077393, "learning_rate": 9.365736489691842e-06, "loss": 0.5923, "step": 15660 }, { "epoch": 0.18724518466265738, "grad_norm": 2.3818178176879883, "learning_rate": 9.365642106503953e-06, "loss": 0.5762, "step": 15661 }, { "epoch": 0.187257140807518, "grad_norm": 3.67657470703125, "learning_rate": 9.365547716769767e-06, "loss": 0.6063, "step": 15662 }, { "epoch": 0.18726909695237867, "grad_norm": 1.7692393064498901, "learning_rate": 9.365453320489429e-06, "loss": 0.5929, "step": 15663 }, { "epoch": 0.18728105309723933, "grad_norm": 2.6068050861358643, "learning_rate": 9.365358917663076e-06, "loss": 0.5158, "step": 15664 }, { "epoch": 0.1872930092421, "grad_norm": 2.2078804969787598, "learning_rate": 9.365264508290853e-06, "loss": 0.6166, "step": 15665 }, { "epoch": 0.18730496538696062, "grad_norm": 2.3032703399658203, "learning_rate": 9.3651700923729e-06, "loss": 0.6128, "step": 15666 }, { "epoch": 0.18731692153182128, "grad_norm": 2.7328383922576904, "learning_rate": 9.365075669909359e-06, "loss": 0.5748, "step": 15667 }, { "epoch": 0.18732887767668194, "grad_norm": 1.8217664957046509, "learning_rate": 9.36498124090037e-06, "loss": 0.6135, "step": 15668 }, { "epoch": 0.18734083382154257, "grad_norm": 2.9564836025238037, "learning_rate": 9.364886805346076e-06, "loss": 0.6814, "step": 15669 }, { "epoch": 0.18735278996640323, "grad_norm": 2.3736305236816406, "learning_rate": 9.36479236324662e-06, "loss": 0.5648, "step": 15670 }, { "epoch": 0.18736474611126389, "grad_norm": 2.37485933303833, "learning_rate": 9.364697914602143e-06, "loss": 0.6509, "step": 15671 }, { "epoch": 0.18737670225612454, "grad_norm": 2.6118111610412598, "learning_rate": 9.364603459412786e-06, "loss": 0.5961, "step": 15672 }, { "epoch": 0.18738865840098518, "grad_norm": 3.8077144622802734, "learning_rate": 9.364508997678689e-06, "loss": 0.5944, "step": 15673 }, { "epoch": 0.18740061454584583, "grad_norm": 6.019574165344238, "learning_rate": 9.364414529399996e-06, "loss": 0.7049, "step": 15674 }, { "epoch": 0.1874125706907065, "grad_norm": 2.4543330669403076, "learning_rate": 9.36432005457685e-06, "loss": 0.6171, "step": 15675 }, { "epoch": 0.18742452683556715, "grad_norm": 1.8369863033294678, "learning_rate": 9.364225573209387e-06, "loss": 0.5571, "step": 15676 }, { "epoch": 0.18743648298042778, "grad_norm": 2.062091827392578, "learning_rate": 9.364131085297755e-06, "loss": 0.5805, "step": 15677 }, { "epoch": 0.18744843912528844, "grad_norm": 2.620932102203369, "learning_rate": 9.364036590842092e-06, "loss": 0.5997, "step": 15678 }, { "epoch": 0.1874603952701491, "grad_norm": 11.743295669555664, "learning_rate": 9.363942089842539e-06, "loss": 0.6927, "step": 15679 }, { "epoch": 0.18747235141500976, "grad_norm": 2.4798836708068848, "learning_rate": 9.36384758229924e-06, "loss": 0.683, "step": 15680 }, { "epoch": 0.1874843075598704, "grad_norm": 2.415332555770874, "learning_rate": 9.363753068212337e-06, "loss": 0.6558, "step": 15681 }, { "epoch": 0.18749626370473105, "grad_norm": 3.736851930618286, "learning_rate": 9.36365854758197e-06, "loss": 0.5594, "step": 15682 }, { "epoch": 0.1875082198495917, "grad_norm": 2.848949909210205, "learning_rate": 9.363564020408284e-06, "loss": 0.6744, "step": 15683 }, { "epoch": 0.18752017599445234, "grad_norm": 2.5103540420532227, "learning_rate": 9.363469486691416e-06, "loss": 0.6074, "step": 15684 }, { "epoch": 0.187532132139313, "grad_norm": 1.7632603645324707, "learning_rate": 9.36337494643151e-06, "loss": 0.6562, "step": 15685 }, { "epoch": 0.18754408828417365, "grad_norm": 5.298261642456055, "learning_rate": 9.363280399628709e-06, "loss": 0.6754, "step": 15686 }, { "epoch": 0.1875560444290343, "grad_norm": 4.150297164916992, "learning_rate": 9.363185846283151e-06, "loss": 0.6568, "step": 15687 }, { "epoch": 0.18756800057389494, "grad_norm": 2.821104049682617, "learning_rate": 9.363091286394983e-06, "loss": 0.6433, "step": 15688 }, { "epoch": 0.1875799567187556, "grad_norm": 1.3475425243377686, "learning_rate": 9.362996719964342e-06, "loss": 0.5516, "step": 15689 }, { "epoch": 0.18759191286361626, "grad_norm": 1.9686193466186523, "learning_rate": 9.362902146991372e-06, "loss": 0.5745, "step": 15690 }, { "epoch": 0.18760386900847692, "grad_norm": 1.9446072578430176, "learning_rate": 9.362807567476217e-06, "loss": 0.6611, "step": 15691 }, { "epoch": 0.18761582515333755, "grad_norm": 3.211469888687134, "learning_rate": 9.362712981419016e-06, "loss": 0.6063, "step": 15692 }, { "epoch": 0.1876277812981982, "grad_norm": 1.5486994981765747, "learning_rate": 9.36261838881991e-06, "loss": 0.5741, "step": 15693 }, { "epoch": 0.18763973744305887, "grad_norm": 1.7876211404800415, "learning_rate": 9.362523789679042e-06, "loss": 0.6719, "step": 15694 }, { "epoch": 0.1876516935879195, "grad_norm": 2.333942174911499, "learning_rate": 9.362429183996555e-06, "loss": 0.5739, "step": 15695 }, { "epoch": 0.18766364973278016, "grad_norm": 2.9152259826660156, "learning_rate": 9.36233457177259e-06, "loss": 0.6895, "step": 15696 }, { "epoch": 0.18767560587764082, "grad_norm": 2.559504985809326, "learning_rate": 9.362239953007289e-06, "loss": 0.6755, "step": 15697 }, { "epoch": 0.18768756202250148, "grad_norm": 2.2438442707061768, "learning_rate": 9.362145327700794e-06, "loss": 0.5661, "step": 15698 }, { "epoch": 0.1876995181673621, "grad_norm": 2.285428762435913, "learning_rate": 9.362050695853244e-06, "loss": 0.6053, "step": 15699 }, { "epoch": 0.18771147431222276, "grad_norm": 2.099759340286255, "learning_rate": 9.361956057464786e-06, "loss": 0.6534, "step": 15700 }, { "epoch": 0.18772343045708342, "grad_norm": 3.445281505584717, "learning_rate": 9.361861412535558e-06, "loss": 0.6666, "step": 15701 }, { "epoch": 0.18773538660194408, "grad_norm": 2.4998412132263184, "learning_rate": 9.361766761065704e-06, "loss": 0.6132, "step": 15702 }, { "epoch": 0.1877473427468047, "grad_norm": 2.9808847904205322, "learning_rate": 9.361672103055365e-06, "loss": 0.6013, "step": 15703 }, { "epoch": 0.18775929889166537, "grad_norm": 2.7256813049316406, "learning_rate": 9.361577438504685e-06, "loss": 0.6371, "step": 15704 }, { "epoch": 0.18777125503652603, "grad_norm": 3.0447709560394287, "learning_rate": 9.361482767413803e-06, "loss": 0.6146, "step": 15705 }, { "epoch": 0.18778321118138666, "grad_norm": 11.124210357666016, "learning_rate": 9.36138808978286e-06, "loss": 0.6401, "step": 15706 }, { "epoch": 0.18779516732624732, "grad_norm": 2.349632740020752, "learning_rate": 9.361293405612002e-06, "loss": 0.6478, "step": 15707 }, { "epoch": 0.18780712347110798, "grad_norm": 2.686713218688965, "learning_rate": 9.361198714901368e-06, "loss": 0.5961, "step": 15708 }, { "epoch": 0.18781907961596864, "grad_norm": 6.845765590667725, "learning_rate": 9.361104017651103e-06, "loss": 0.6172, "step": 15709 }, { "epoch": 0.18783103576082927, "grad_norm": 10.909187316894531, "learning_rate": 9.361009313861345e-06, "loss": 0.6289, "step": 15710 }, { "epoch": 0.18784299190568993, "grad_norm": 1.9115349054336548, "learning_rate": 9.36091460353224e-06, "loss": 0.6086, "step": 15711 }, { "epoch": 0.18785494805055059, "grad_norm": 1.9386664628982544, "learning_rate": 9.360819886663925e-06, "loss": 0.5657, "step": 15712 }, { "epoch": 0.18786690419541124, "grad_norm": 8.948509216308594, "learning_rate": 9.360725163256548e-06, "loss": 0.4966, "step": 15713 }, { "epoch": 0.18787886034027187, "grad_norm": 3.5504252910614014, "learning_rate": 9.360630433310247e-06, "loss": 0.6329, "step": 15714 }, { "epoch": 0.18789081648513253, "grad_norm": 3.6994097232818604, "learning_rate": 9.360535696825165e-06, "loss": 0.6508, "step": 15715 }, { "epoch": 0.1879027726299932, "grad_norm": 1.8172948360443115, "learning_rate": 9.360440953801444e-06, "loss": 0.6631, "step": 15716 }, { "epoch": 0.18791472877485382, "grad_norm": 2.713650941848755, "learning_rate": 9.360346204239227e-06, "loss": 0.6511, "step": 15717 }, { "epoch": 0.18792668491971448, "grad_norm": 3.543549060821533, "learning_rate": 9.360251448138656e-06, "loss": 0.5752, "step": 15718 }, { "epoch": 0.18793864106457514, "grad_norm": 6.34751033782959, "learning_rate": 9.36015668549987e-06, "loss": 0.7077, "step": 15719 }, { "epoch": 0.1879505972094358, "grad_norm": 5.0739521980285645, "learning_rate": 9.360061916323016e-06, "loss": 0.6896, "step": 15720 }, { "epoch": 0.18796255335429643, "grad_norm": 3.743009328842163, "learning_rate": 9.359967140608233e-06, "loss": 0.6744, "step": 15721 }, { "epoch": 0.1879745094991571, "grad_norm": 2.239281177520752, "learning_rate": 9.359872358355664e-06, "loss": 0.6265, "step": 15722 }, { "epoch": 0.18798646564401775, "grad_norm": 2.364288091659546, "learning_rate": 9.359777569565449e-06, "loss": 0.7038, "step": 15723 }, { "epoch": 0.1879984217888784, "grad_norm": 7.045260906219482, "learning_rate": 9.359682774237733e-06, "loss": 0.5555, "step": 15724 }, { "epoch": 0.18801037793373904, "grad_norm": 4.316839694976807, "learning_rate": 9.359587972372658e-06, "loss": 0.6983, "step": 15725 }, { "epoch": 0.1880223340785997, "grad_norm": 3.179654598236084, "learning_rate": 9.359493163970366e-06, "loss": 0.6019, "step": 15726 }, { "epoch": 0.18803429022346035, "grad_norm": 1.7639830112457275, "learning_rate": 9.359398349030997e-06, "loss": 0.602, "step": 15727 }, { "epoch": 0.18804624636832099, "grad_norm": 2.346820116043091, "learning_rate": 9.359303527554695e-06, "loss": 0.6946, "step": 15728 }, { "epoch": 0.18805820251318164, "grad_norm": 3.188866376876831, "learning_rate": 9.359208699541603e-06, "loss": 0.6459, "step": 15729 }, { "epoch": 0.1880701586580423, "grad_norm": 2.7300539016723633, "learning_rate": 9.35911386499186e-06, "loss": 0.5802, "step": 15730 }, { "epoch": 0.18808211480290296, "grad_norm": 4.4059739112854, "learning_rate": 9.359019023905611e-06, "loss": 0.5513, "step": 15731 }, { "epoch": 0.1880940709477636, "grad_norm": 2.349154233932495, "learning_rate": 9.358924176283e-06, "loss": 0.6453, "step": 15732 }, { "epoch": 0.18810602709262425, "grad_norm": 5.537987232208252, "learning_rate": 9.358829322124164e-06, "loss": 0.5789, "step": 15733 }, { "epoch": 0.1881179832374849, "grad_norm": 2.3316447734832764, "learning_rate": 9.358734461429249e-06, "loss": 0.5236, "step": 15734 }, { "epoch": 0.18812993938234557, "grad_norm": 2.6739442348480225, "learning_rate": 9.358639594198394e-06, "loss": 0.6741, "step": 15735 }, { "epoch": 0.1881418955272062, "grad_norm": 4.477518558502197, "learning_rate": 9.358544720431746e-06, "loss": 0.6124, "step": 15736 }, { "epoch": 0.18815385167206686, "grad_norm": 1.824734091758728, "learning_rate": 9.358449840129443e-06, "loss": 0.696, "step": 15737 }, { "epoch": 0.18816580781692752, "grad_norm": 1.9584578275680542, "learning_rate": 9.35835495329163e-06, "loss": 0.5483, "step": 15738 }, { "epoch": 0.18817776396178817, "grad_norm": 1.9438875913619995, "learning_rate": 9.358260059918449e-06, "loss": 0.5676, "step": 15739 }, { "epoch": 0.1881897201066488, "grad_norm": 2.056300401687622, "learning_rate": 9.35816516001004e-06, "loss": 0.5864, "step": 15740 }, { "epoch": 0.18820167625150946, "grad_norm": 1.9820069074630737, "learning_rate": 9.358070253566547e-06, "loss": 0.6199, "step": 15741 }, { "epoch": 0.18821363239637012, "grad_norm": 2.4193944931030273, "learning_rate": 9.357975340588112e-06, "loss": 0.5959, "step": 15742 }, { "epoch": 0.18822558854123075, "grad_norm": 5.5634284019470215, "learning_rate": 9.357880421074878e-06, "loss": 0.5156, "step": 15743 }, { "epoch": 0.1882375446860914, "grad_norm": 5.874279022216797, "learning_rate": 9.357785495026987e-06, "loss": 0.6452, "step": 15744 }, { "epoch": 0.18824950083095207, "grad_norm": 4.8238115310668945, "learning_rate": 9.357690562444581e-06, "loss": 0.5481, "step": 15745 }, { "epoch": 0.18826145697581273, "grad_norm": 4.540525436401367, "learning_rate": 9.357595623327803e-06, "loss": 0.545, "step": 15746 }, { "epoch": 0.18827341312067336, "grad_norm": 3.5396759510040283, "learning_rate": 9.357500677676795e-06, "loss": 0.6229, "step": 15747 }, { "epoch": 0.18828536926553402, "grad_norm": 2.2308061122894287, "learning_rate": 9.3574057254917e-06, "loss": 0.6447, "step": 15748 }, { "epoch": 0.18829732541039468, "grad_norm": 2.2607064247131348, "learning_rate": 9.357310766772655e-06, "loss": 0.6215, "step": 15749 }, { "epoch": 0.18830928155525534, "grad_norm": 2.064429521560669, "learning_rate": 9.357215801519812e-06, "loss": 0.5111, "step": 15750 }, { "epoch": 0.18832123770011597, "grad_norm": 2.685270309448242, "learning_rate": 9.357120829733306e-06, "loss": 0.5257, "step": 15751 }, { "epoch": 0.18833319384497663, "grad_norm": 4.679105758666992, "learning_rate": 9.357025851413284e-06, "loss": 0.6734, "step": 15752 }, { "epoch": 0.18834514998983728, "grad_norm": 2.8127546310424805, "learning_rate": 9.356930866559885e-06, "loss": 0.6856, "step": 15753 }, { "epoch": 0.18835710613469792, "grad_norm": 1.9270089864730835, "learning_rate": 9.356835875173251e-06, "loss": 0.6173, "step": 15754 }, { "epoch": 0.18836906227955857, "grad_norm": 2.5547752380371094, "learning_rate": 9.356740877253528e-06, "loss": 0.6145, "step": 15755 }, { "epoch": 0.18838101842441923, "grad_norm": 2.6362669467926025, "learning_rate": 9.356645872800856e-06, "loss": 0.6094, "step": 15756 }, { "epoch": 0.1883929745692799, "grad_norm": 2.0936074256896973, "learning_rate": 9.35655086181538e-06, "loss": 0.6277, "step": 15757 }, { "epoch": 0.18840493071414052, "grad_norm": 3.4143900871276855, "learning_rate": 9.356455844297237e-06, "loss": 0.6348, "step": 15758 }, { "epoch": 0.18841688685900118, "grad_norm": 7.71060848236084, "learning_rate": 9.356360820246575e-06, "loss": 0.4788, "step": 15759 }, { "epoch": 0.18842884300386184, "grad_norm": 3.084017038345337, "learning_rate": 9.356265789663534e-06, "loss": 0.6939, "step": 15760 }, { "epoch": 0.1884407991487225, "grad_norm": 3.205416202545166, "learning_rate": 9.356170752548257e-06, "loss": 0.6296, "step": 15761 }, { "epoch": 0.18845275529358313, "grad_norm": 3.9070842266082764, "learning_rate": 9.356075708900886e-06, "loss": 0.5812, "step": 15762 }, { "epoch": 0.1884647114384438, "grad_norm": 2.722585439682007, "learning_rate": 9.355980658721567e-06, "loss": 0.6337, "step": 15763 }, { "epoch": 0.18847666758330445, "grad_norm": 2.2257261276245117, "learning_rate": 9.355885602010436e-06, "loss": 0.525, "step": 15764 }, { "epoch": 0.18848862372816508, "grad_norm": 12.553803443908691, "learning_rate": 9.35579053876764e-06, "loss": 0.6199, "step": 15765 }, { "epoch": 0.18850057987302574, "grad_norm": 5.177941799163818, "learning_rate": 9.35569546899332e-06, "loss": 0.5615, "step": 15766 }, { "epoch": 0.1885125360178864, "grad_norm": 1.9744176864624023, "learning_rate": 9.355600392687621e-06, "loss": 0.6351, "step": 15767 }, { "epoch": 0.18852449216274705, "grad_norm": 2.0054471492767334, "learning_rate": 9.355505309850684e-06, "loss": 0.5811, "step": 15768 }, { "epoch": 0.18853644830760768, "grad_norm": 3.3175415992736816, "learning_rate": 9.355410220482648e-06, "loss": 0.7053, "step": 15769 }, { "epoch": 0.18854840445246834, "grad_norm": 4.451731204986572, "learning_rate": 9.355315124583662e-06, "loss": 0.5762, "step": 15770 }, { "epoch": 0.188560360597329, "grad_norm": 1.835098147392273, "learning_rate": 9.355220022153863e-06, "loss": 0.7413, "step": 15771 }, { "epoch": 0.18857231674218966, "grad_norm": 6.246463775634766, "learning_rate": 9.355124913193398e-06, "loss": 0.6106, "step": 15772 }, { "epoch": 0.1885842728870503, "grad_norm": 2.2067060470581055, "learning_rate": 9.355029797702408e-06, "loss": 0.5569, "step": 15773 }, { "epoch": 0.18859622903191095, "grad_norm": 4.270678997039795, "learning_rate": 9.354934675681034e-06, "loss": 0.6208, "step": 15774 }, { "epoch": 0.1886081851767716, "grad_norm": 1.7461920976638794, "learning_rate": 9.35483954712942e-06, "loss": 0.6681, "step": 15775 }, { "epoch": 0.18862014132163224, "grad_norm": 2.196993589401245, "learning_rate": 9.35474441204771e-06, "loss": 0.6349, "step": 15776 }, { "epoch": 0.1886320974664929, "grad_norm": 3.1066782474517822, "learning_rate": 9.354649270436044e-06, "loss": 0.5598, "step": 15777 }, { "epoch": 0.18864405361135356, "grad_norm": 3.6664795875549316, "learning_rate": 9.354554122294565e-06, "loss": 0.579, "step": 15778 }, { "epoch": 0.18865600975621422, "grad_norm": 5.8803391456604, "learning_rate": 9.354458967623419e-06, "loss": 0.6471, "step": 15779 }, { "epoch": 0.18866796590107485, "grad_norm": 1.5437837839126587, "learning_rate": 9.354363806422746e-06, "loss": 0.5192, "step": 15780 }, { "epoch": 0.1886799220459355, "grad_norm": 1.8445018529891968, "learning_rate": 9.354268638692687e-06, "loss": 0.6652, "step": 15781 }, { "epoch": 0.18869187819079616, "grad_norm": 5.167418479919434, "learning_rate": 9.354173464433388e-06, "loss": 0.6455, "step": 15782 }, { "epoch": 0.18870383433565682, "grad_norm": 3.5528221130371094, "learning_rate": 9.35407828364499e-06, "loss": 0.6365, "step": 15783 }, { "epoch": 0.18871579048051745, "grad_norm": 2.006960868835449, "learning_rate": 9.353983096327637e-06, "loss": 0.6367, "step": 15784 }, { "epoch": 0.1887277466253781, "grad_norm": 4.342739582061768, "learning_rate": 9.35388790248147e-06, "loss": 0.5979, "step": 15785 }, { "epoch": 0.18873970277023877, "grad_norm": 1.9857258796691895, "learning_rate": 9.353792702106633e-06, "loss": 0.6111, "step": 15786 }, { "epoch": 0.1887516589150994, "grad_norm": 31.960765838623047, "learning_rate": 9.353697495203269e-06, "loss": 0.4993, "step": 15787 }, { "epoch": 0.18876361505996006, "grad_norm": 3.0278780460357666, "learning_rate": 9.353602281771519e-06, "loss": 0.6095, "step": 15788 }, { "epoch": 0.18877557120482072, "grad_norm": 3.7874207496643066, "learning_rate": 9.353507061811527e-06, "loss": 0.687, "step": 15789 }, { "epoch": 0.18878752734968138, "grad_norm": 23.925622940063477, "learning_rate": 9.353411835323436e-06, "loss": 0.551, "step": 15790 }, { "epoch": 0.188799483494542, "grad_norm": 7.8832268714904785, "learning_rate": 9.353316602307388e-06, "loss": 0.6941, "step": 15791 }, { "epoch": 0.18881143963940267, "grad_norm": 2.275385618209839, "learning_rate": 9.353221362763526e-06, "loss": 0.6839, "step": 15792 }, { "epoch": 0.18882339578426333, "grad_norm": 6.449413776397705, "learning_rate": 9.353126116691995e-06, "loss": 0.6788, "step": 15793 }, { "epoch": 0.18883535192912398, "grad_norm": 2.4553563594818115, "learning_rate": 9.353030864092935e-06, "loss": 0.5092, "step": 15794 }, { "epoch": 0.18884730807398462, "grad_norm": 2.6107003688812256, "learning_rate": 9.35293560496649e-06, "loss": 0.6737, "step": 15795 }, { "epoch": 0.18885926421884527, "grad_norm": 3.516242742538452, "learning_rate": 9.352840339312802e-06, "loss": 0.6427, "step": 15796 }, { "epoch": 0.18887122036370593, "grad_norm": 4.375883102416992, "learning_rate": 9.352745067132014e-06, "loss": 0.652, "step": 15797 }, { "epoch": 0.1888831765085666, "grad_norm": 3.8732755184173584, "learning_rate": 9.352649788424271e-06, "loss": 0.6144, "step": 15798 }, { "epoch": 0.18889513265342722, "grad_norm": 42.4930419921875, "learning_rate": 9.352554503189712e-06, "loss": 0.6289, "step": 15799 }, { "epoch": 0.18890708879828788, "grad_norm": 5.028611660003662, "learning_rate": 9.352459211428484e-06, "loss": 0.68, "step": 15800 }, { "epoch": 0.18891904494314854, "grad_norm": 2.3152694702148438, "learning_rate": 9.352363913140726e-06, "loss": 0.6095, "step": 15801 }, { "epoch": 0.18893100108800917, "grad_norm": 16.668773651123047, "learning_rate": 9.352268608326585e-06, "loss": 0.7289, "step": 15802 }, { "epoch": 0.18894295723286983, "grad_norm": 2.0905659198760986, "learning_rate": 9.3521732969862e-06, "loss": 0.6456, "step": 15803 }, { "epoch": 0.1889549133777305, "grad_norm": 2.061811685562134, "learning_rate": 9.352077979119716e-06, "loss": 0.5696, "step": 15804 }, { "epoch": 0.18896686952259115, "grad_norm": 1.893413782119751, "learning_rate": 9.351982654727276e-06, "loss": 0.4417, "step": 15805 }, { "epoch": 0.18897882566745178, "grad_norm": 1.4716278314590454, "learning_rate": 9.351887323809022e-06, "loss": 0.5795, "step": 15806 }, { "epoch": 0.18899078181231244, "grad_norm": 2.6370809078216553, "learning_rate": 9.351791986365098e-06, "loss": 0.5922, "step": 15807 }, { "epoch": 0.1890027379571731, "grad_norm": 3.9309611320495605, "learning_rate": 9.351696642395647e-06, "loss": 0.6436, "step": 15808 }, { "epoch": 0.18901469410203375, "grad_norm": 3.6663131713867188, "learning_rate": 9.351601291900811e-06, "loss": 0.7021, "step": 15809 }, { "epoch": 0.18902665024689438, "grad_norm": 4.140707492828369, "learning_rate": 9.351505934880734e-06, "loss": 0.6204, "step": 15810 }, { "epoch": 0.18903860639175504, "grad_norm": 1.9370330572128296, "learning_rate": 9.351410571335557e-06, "loss": 0.6427, "step": 15811 }, { "epoch": 0.1890505625366157, "grad_norm": 4.846031188964844, "learning_rate": 9.351315201265426e-06, "loss": 0.5927, "step": 15812 }, { "epoch": 0.18906251868147633, "grad_norm": 2.801140785217285, "learning_rate": 9.35121982467048e-06, "loss": 0.7363, "step": 15813 }, { "epoch": 0.189074474826337, "grad_norm": 5.9412078857421875, "learning_rate": 9.351124441550866e-06, "loss": 0.6095, "step": 15814 }, { "epoch": 0.18908643097119765, "grad_norm": 2.103919267654419, "learning_rate": 9.351029051906723e-06, "loss": 0.6062, "step": 15815 }, { "epoch": 0.1890983871160583, "grad_norm": 3.0730090141296387, "learning_rate": 9.3509336557382e-06, "loss": 0.5593, "step": 15816 }, { "epoch": 0.18911034326091894, "grad_norm": 2.897756338119507, "learning_rate": 9.350838253045436e-06, "loss": 0.6367, "step": 15817 }, { "epoch": 0.1891222994057796, "grad_norm": 7.4351091384887695, "learning_rate": 9.350742843828572e-06, "loss": 0.6035, "step": 15818 }, { "epoch": 0.18913425555064026, "grad_norm": 2.4000232219696045, "learning_rate": 9.350647428087756e-06, "loss": 0.5621, "step": 15819 }, { "epoch": 0.18914621169550092, "grad_norm": 2.820887565612793, "learning_rate": 9.350552005823128e-06, "loss": 0.6959, "step": 15820 }, { "epoch": 0.18915816784036155, "grad_norm": 1.873050332069397, "learning_rate": 9.350456577034832e-06, "loss": 0.6642, "step": 15821 }, { "epoch": 0.1891701239852222, "grad_norm": 2.653008222579956, "learning_rate": 9.350361141723011e-06, "loss": 0.6275, "step": 15822 }, { "epoch": 0.18918208013008286, "grad_norm": 1.8582417964935303, "learning_rate": 9.350265699887809e-06, "loss": 0.604, "step": 15823 }, { "epoch": 0.1891940362749435, "grad_norm": 2.6849536895751953, "learning_rate": 9.350170251529366e-06, "loss": 0.6735, "step": 15824 }, { "epoch": 0.18920599241980415, "grad_norm": 7.326751232147217, "learning_rate": 9.350074796647827e-06, "loss": 0.5925, "step": 15825 }, { "epoch": 0.1892179485646648, "grad_norm": 2.305354595184326, "learning_rate": 9.349979335243338e-06, "loss": 0.5258, "step": 15826 }, { "epoch": 0.18922990470952547, "grad_norm": 3.4220166206359863, "learning_rate": 9.349883867316039e-06, "loss": 0.5959, "step": 15827 }, { "epoch": 0.1892418608543861, "grad_norm": 4.237244129180908, "learning_rate": 9.349788392866072e-06, "loss": 0.7054, "step": 15828 }, { "epoch": 0.18925381699924676, "grad_norm": 3.798689365386963, "learning_rate": 9.349692911893581e-06, "loss": 0.6678, "step": 15829 }, { "epoch": 0.18926577314410742, "grad_norm": 1.744517207145691, "learning_rate": 9.349597424398714e-06, "loss": 0.4963, "step": 15830 }, { "epoch": 0.18927772928896808, "grad_norm": 2.5922327041625977, "learning_rate": 9.349501930381606e-06, "loss": 0.6961, "step": 15831 }, { "epoch": 0.1892896854338287, "grad_norm": 2.590139865875244, "learning_rate": 9.349406429842407e-06, "loss": 0.6413, "step": 15832 }, { "epoch": 0.18930164157868937, "grad_norm": 2.22141695022583, "learning_rate": 9.349310922781257e-06, "loss": 0.5665, "step": 15833 }, { "epoch": 0.18931359772355003, "grad_norm": 4.455008029937744, "learning_rate": 9.3492154091983e-06, "loss": 0.5941, "step": 15834 }, { "epoch": 0.18932555386841066, "grad_norm": 2.5192267894744873, "learning_rate": 9.349119889093678e-06, "loss": 0.5691, "step": 15835 }, { "epoch": 0.18933751001327132, "grad_norm": 4.373858451843262, "learning_rate": 9.349024362467537e-06, "loss": 0.6544, "step": 15836 }, { "epoch": 0.18934946615813197, "grad_norm": 3.215576648712158, "learning_rate": 9.348928829320016e-06, "loss": 0.6323, "step": 15837 }, { "epoch": 0.18936142230299263, "grad_norm": 2.735924005508423, "learning_rate": 9.348833289651263e-06, "loss": 0.6739, "step": 15838 }, { "epoch": 0.18937337844785326, "grad_norm": 4.424706935882568, "learning_rate": 9.348737743461418e-06, "loss": 0.6968, "step": 15839 }, { "epoch": 0.18938533459271392, "grad_norm": 3.1466987133026123, "learning_rate": 9.348642190750626e-06, "loss": 0.6266, "step": 15840 }, { "epoch": 0.18939729073757458, "grad_norm": 2.0004401206970215, "learning_rate": 9.348546631519029e-06, "loss": 0.5525, "step": 15841 }, { "epoch": 0.18940924688243524, "grad_norm": 2.428546190261841, "learning_rate": 9.34845106576677e-06, "loss": 0.581, "step": 15842 }, { "epoch": 0.18942120302729587, "grad_norm": 3.598898410797119, "learning_rate": 9.348355493493994e-06, "loss": 0.6333, "step": 15843 }, { "epoch": 0.18943315917215653, "grad_norm": 2.090036153793335, "learning_rate": 9.348259914700844e-06, "loss": 0.5449, "step": 15844 }, { "epoch": 0.1894451153170172, "grad_norm": 2.179428815841675, "learning_rate": 9.348164329387462e-06, "loss": 0.6068, "step": 15845 }, { "epoch": 0.18945707146187782, "grad_norm": 11.246179580688477, "learning_rate": 9.348068737553992e-06, "loss": 0.6156, "step": 15846 }, { "epoch": 0.18946902760673848, "grad_norm": 4.900315761566162, "learning_rate": 9.347973139200578e-06, "loss": 0.5631, "step": 15847 }, { "epoch": 0.18948098375159914, "grad_norm": 2.9390640258789062, "learning_rate": 9.347877534327365e-06, "loss": 0.5853, "step": 15848 }, { "epoch": 0.1894929398964598, "grad_norm": 1.8359793424606323, "learning_rate": 9.34778192293449e-06, "loss": 0.5612, "step": 15849 }, { "epoch": 0.18950489604132043, "grad_norm": 3.0134646892547607, "learning_rate": 9.347686305022103e-06, "loss": 0.5801, "step": 15850 }, { "epoch": 0.18951685218618108, "grad_norm": 2.6722071170806885, "learning_rate": 9.347590680590346e-06, "loss": 0.6424, "step": 15851 }, { "epoch": 0.18952880833104174, "grad_norm": 1.811859130859375, "learning_rate": 9.347495049639359e-06, "loss": 0.5793, "step": 15852 }, { "epoch": 0.1895407644759024, "grad_norm": 14.917643547058105, "learning_rate": 9.347399412169288e-06, "loss": 0.6852, "step": 15853 }, { "epoch": 0.18955272062076303, "grad_norm": 2.289179563522339, "learning_rate": 9.347303768180278e-06, "loss": 0.6446, "step": 15854 }, { "epoch": 0.1895646767656237, "grad_norm": 4.0088419914245605, "learning_rate": 9.347208117672468e-06, "loss": 0.5815, "step": 15855 }, { "epoch": 0.18957663291048435, "grad_norm": 1.681394338607788, "learning_rate": 9.347112460646005e-06, "loss": 0.5889, "step": 15856 }, { "epoch": 0.189588589055345, "grad_norm": 3.6335549354553223, "learning_rate": 9.347016797101032e-06, "loss": 0.6781, "step": 15857 }, { "epoch": 0.18960054520020564, "grad_norm": 2.5464372634887695, "learning_rate": 9.34692112703769e-06, "loss": 0.5867, "step": 15858 }, { "epoch": 0.1896125013450663, "grad_norm": 1.8484687805175781, "learning_rate": 9.346825450456128e-06, "loss": 0.7345, "step": 15859 }, { "epoch": 0.18962445748992696, "grad_norm": 6.242049217224121, "learning_rate": 9.346729767356482e-06, "loss": 0.6339, "step": 15860 }, { "epoch": 0.1896364136347876, "grad_norm": 6.107038974761963, "learning_rate": 9.346634077738901e-06, "loss": 0.6587, "step": 15861 }, { "epoch": 0.18964836977964825, "grad_norm": 2.972155809402466, "learning_rate": 9.346538381603527e-06, "loss": 0.6824, "step": 15862 }, { "epoch": 0.1896603259245089, "grad_norm": 5.127257823944092, "learning_rate": 9.346442678950503e-06, "loss": 0.6773, "step": 15863 }, { "epoch": 0.18967228206936956, "grad_norm": 3.052946090698242, "learning_rate": 9.346346969779974e-06, "loss": 0.604, "step": 15864 }, { "epoch": 0.1896842382142302, "grad_norm": 3.407198667526245, "learning_rate": 9.34625125409208e-06, "loss": 0.5833, "step": 15865 }, { "epoch": 0.18969619435909085, "grad_norm": 3.177488088607788, "learning_rate": 9.346155531886967e-06, "loss": 0.5985, "step": 15866 }, { "epoch": 0.1897081505039515, "grad_norm": 3.517364501953125, "learning_rate": 9.34605980316478e-06, "loss": 0.5655, "step": 15867 }, { "epoch": 0.18972010664881217, "grad_norm": 3.8532843589782715, "learning_rate": 9.345964067925659e-06, "loss": 0.5154, "step": 15868 }, { "epoch": 0.1897320627936728, "grad_norm": 1.9835078716278076, "learning_rate": 9.34586832616975e-06, "loss": 0.6961, "step": 15869 }, { "epoch": 0.18974401893853346, "grad_norm": 2.0234570503234863, "learning_rate": 9.345772577897196e-06, "loss": 0.6359, "step": 15870 }, { "epoch": 0.18975597508339412, "grad_norm": 3.352722644805908, "learning_rate": 9.345676823108141e-06, "loss": 0.7001, "step": 15871 }, { "epoch": 0.18976793122825475, "grad_norm": 4.0858893394470215, "learning_rate": 9.34558106180273e-06, "loss": 0.6416, "step": 15872 }, { "epoch": 0.1897798873731154, "grad_norm": 3.5350985527038574, "learning_rate": 9.345485293981102e-06, "loss": 0.7415, "step": 15873 }, { "epoch": 0.18979184351797607, "grad_norm": 2.1792263984680176, "learning_rate": 9.345389519643404e-06, "loss": 0.6435, "step": 15874 }, { "epoch": 0.18980379966283673, "grad_norm": 3.026902914047241, "learning_rate": 9.34529373878978e-06, "loss": 0.6474, "step": 15875 }, { "epoch": 0.18981575580769736, "grad_norm": 2.3864221572875977, "learning_rate": 9.345197951420371e-06, "loss": 0.6162, "step": 15876 }, { "epoch": 0.18982771195255801, "grad_norm": 19.53577995300293, "learning_rate": 9.345102157535321e-06, "loss": 0.5704, "step": 15877 }, { "epoch": 0.18983966809741867, "grad_norm": 11.098848342895508, "learning_rate": 9.345006357134779e-06, "loss": 0.622, "step": 15878 }, { "epoch": 0.18985162424227933, "grad_norm": 4.101363182067871, "learning_rate": 9.344910550218882e-06, "loss": 0.6546, "step": 15879 }, { "epoch": 0.18986358038713996, "grad_norm": 3.6302199363708496, "learning_rate": 9.344814736787776e-06, "loss": 0.6057, "step": 15880 }, { "epoch": 0.18987553653200062, "grad_norm": 5.043050289154053, "learning_rate": 9.344718916841606e-06, "loss": 0.675, "step": 15881 }, { "epoch": 0.18988749267686128, "grad_norm": 6.823713302612305, "learning_rate": 9.344623090380514e-06, "loss": 0.6621, "step": 15882 }, { "epoch": 0.1898994488217219, "grad_norm": 2.3934483528137207, "learning_rate": 9.344527257404643e-06, "loss": 0.6057, "step": 15883 }, { "epoch": 0.18991140496658257, "grad_norm": 3.1535911560058594, "learning_rate": 9.34443141791414e-06, "loss": 0.5777, "step": 15884 }, { "epoch": 0.18992336111144323, "grad_norm": 6.251678466796875, "learning_rate": 9.344335571909145e-06, "loss": 0.5762, "step": 15885 }, { "epoch": 0.1899353172563039, "grad_norm": 2.9159600734710693, "learning_rate": 9.344239719389805e-06, "loss": 0.4924, "step": 15886 }, { "epoch": 0.18994727340116452, "grad_norm": 4.109712600708008, "learning_rate": 9.34414386035626e-06, "loss": 0.5816, "step": 15887 }, { "epoch": 0.18995922954602518, "grad_norm": 2.935250997543335, "learning_rate": 9.344047994808657e-06, "loss": 0.5935, "step": 15888 }, { "epoch": 0.18997118569088584, "grad_norm": 3.1865415573120117, "learning_rate": 9.34395212274714e-06, "loss": 0.6042, "step": 15889 }, { "epoch": 0.1899831418357465, "grad_norm": 2.1110761165618896, "learning_rate": 9.34385624417185e-06, "loss": 0.6908, "step": 15890 }, { "epoch": 0.18999509798060712, "grad_norm": 4.391039848327637, "learning_rate": 9.343760359082932e-06, "loss": 0.5186, "step": 15891 }, { "epoch": 0.19000705412546778, "grad_norm": 3.1706221103668213, "learning_rate": 9.34366446748053e-06, "loss": 0.6861, "step": 15892 }, { "epoch": 0.19001901027032844, "grad_norm": 10.944223403930664, "learning_rate": 9.343568569364787e-06, "loss": 0.6619, "step": 15893 }, { "epoch": 0.19003096641518907, "grad_norm": 9.923397064208984, "learning_rate": 9.343472664735848e-06, "loss": 0.5676, "step": 15894 }, { "epoch": 0.19004292256004973, "grad_norm": 3.2017273902893066, "learning_rate": 9.343376753593857e-06, "loss": 0.692, "step": 15895 }, { "epoch": 0.1900548787049104, "grad_norm": 3.3907060623168945, "learning_rate": 9.343280835938956e-06, "loss": 0.5823, "step": 15896 }, { "epoch": 0.19006683484977105, "grad_norm": 2.6056671142578125, "learning_rate": 9.343184911771291e-06, "loss": 0.6135, "step": 15897 }, { "epoch": 0.19007879099463168, "grad_norm": 2.735914945602417, "learning_rate": 9.343088981091004e-06, "loss": 0.6712, "step": 15898 }, { "epoch": 0.19009074713949234, "grad_norm": 3.2103097438812256, "learning_rate": 9.342993043898239e-06, "loss": 0.6642, "step": 15899 }, { "epoch": 0.190102703284353, "grad_norm": 6.619777679443359, "learning_rate": 9.342897100193142e-06, "loss": 0.605, "step": 15900 }, { "epoch": 0.19011465942921366, "grad_norm": 3.162632942199707, "learning_rate": 9.342801149975856e-06, "loss": 0.5863, "step": 15901 }, { "epoch": 0.1901266155740743, "grad_norm": 8.580811500549316, "learning_rate": 9.342705193246521e-06, "loss": 0.567, "step": 15902 }, { "epoch": 0.19013857171893495, "grad_norm": 5.463998794555664, "learning_rate": 9.342609230005285e-06, "loss": 0.6126, "step": 15903 }, { "epoch": 0.1901505278637956, "grad_norm": 6.888698577880859, "learning_rate": 9.342513260252294e-06, "loss": 0.6328, "step": 15904 }, { "epoch": 0.19016248400865624, "grad_norm": 8.28876781463623, "learning_rate": 9.342417283987688e-06, "loss": 0.6598, "step": 15905 }, { "epoch": 0.1901744401535169, "grad_norm": 2.7881410121917725, "learning_rate": 9.34232130121161e-06, "loss": 0.6531, "step": 15906 }, { "epoch": 0.19018639629837755, "grad_norm": 2.9269590377807617, "learning_rate": 9.342225311924206e-06, "loss": 0.5969, "step": 15907 }, { "epoch": 0.1901983524432382, "grad_norm": 5.162019729614258, "learning_rate": 9.34212931612562e-06, "loss": 0.5612, "step": 15908 }, { "epoch": 0.19021030858809884, "grad_norm": 4.094619274139404, "learning_rate": 9.342033313815998e-06, "loss": 0.5988, "step": 15909 }, { "epoch": 0.1902222647329595, "grad_norm": 2.8579156398773193, "learning_rate": 9.341937304995479e-06, "loss": 0.6583, "step": 15910 }, { "epoch": 0.19023422087782016, "grad_norm": 3.1038882732391357, "learning_rate": 9.34184128966421e-06, "loss": 0.5843, "step": 15911 }, { "epoch": 0.19024617702268082, "grad_norm": 19.554134368896484, "learning_rate": 9.341745267822335e-06, "loss": 0.5866, "step": 15912 }, { "epoch": 0.19025813316754145, "grad_norm": 3.7150588035583496, "learning_rate": 9.341649239469998e-06, "loss": 0.6266, "step": 15913 }, { "epoch": 0.1902700893124021, "grad_norm": 2.3848628997802734, "learning_rate": 9.341553204607343e-06, "loss": 0.4945, "step": 15914 }, { "epoch": 0.19028204545726277, "grad_norm": 3.1363985538482666, "learning_rate": 9.34145716323451e-06, "loss": 0.5875, "step": 15915 }, { "epoch": 0.19029400160212342, "grad_norm": 2.3874552249908447, "learning_rate": 9.34136111535165e-06, "loss": 0.7297, "step": 15916 }, { "epoch": 0.19030595774698406, "grad_norm": 3.923776388168335, "learning_rate": 9.341265060958904e-06, "loss": 0.6575, "step": 15917 }, { "epoch": 0.19031791389184471, "grad_norm": 2.143771171569824, "learning_rate": 9.341169000056414e-06, "loss": 0.568, "step": 15918 }, { "epoch": 0.19032987003670537, "grad_norm": 2.111351728439331, "learning_rate": 9.341072932644325e-06, "loss": 0.6199, "step": 15919 }, { "epoch": 0.190341826181566, "grad_norm": 8.509512901306152, "learning_rate": 9.340976858722783e-06, "loss": 0.5177, "step": 15920 }, { "epoch": 0.19035378232642666, "grad_norm": 1.8116464614868164, "learning_rate": 9.34088077829193e-06, "loss": 0.6172, "step": 15921 }, { "epoch": 0.19036573847128732, "grad_norm": 4.231378078460693, "learning_rate": 9.340784691351912e-06, "loss": 0.646, "step": 15922 }, { "epoch": 0.19037769461614798, "grad_norm": 2.0786781311035156, "learning_rate": 9.34068859790287e-06, "loss": 0.5844, "step": 15923 }, { "epoch": 0.1903896507610086, "grad_norm": 2.6803951263427734, "learning_rate": 9.340592497944952e-06, "loss": 0.6156, "step": 15924 }, { "epoch": 0.19040160690586927, "grad_norm": 1.7275797128677368, "learning_rate": 9.340496391478298e-06, "loss": 0.6204, "step": 15925 }, { "epoch": 0.19041356305072993, "grad_norm": 3.262463331222534, "learning_rate": 9.340400278503057e-06, "loss": 0.5732, "step": 15926 }, { "epoch": 0.1904255191955906, "grad_norm": 2.6562695503234863, "learning_rate": 9.340304159019367e-06, "loss": 0.5848, "step": 15927 }, { "epoch": 0.19043747534045122, "grad_norm": 3.3727614879608154, "learning_rate": 9.340208033027378e-06, "loss": 0.6921, "step": 15928 }, { "epoch": 0.19044943148531188, "grad_norm": 3.0542490482330322, "learning_rate": 9.34011190052723e-06, "loss": 0.6611, "step": 15929 }, { "epoch": 0.19046138763017254, "grad_norm": 3.4306206703186035, "learning_rate": 9.34001576151907e-06, "loss": 0.6676, "step": 15930 }, { "epoch": 0.19047334377503317, "grad_norm": 3.0051944255828857, "learning_rate": 9.339919616003041e-06, "loss": 0.6139, "step": 15931 }, { "epoch": 0.19048529991989382, "grad_norm": 3.0477054119110107, "learning_rate": 9.339823463979288e-06, "loss": 0.7236, "step": 15932 }, { "epoch": 0.19049725606475448, "grad_norm": 2.519991874694824, "learning_rate": 9.339727305447952e-06, "loss": 0.6422, "step": 15933 }, { "epoch": 0.19050921220961514, "grad_norm": 4.739306926727295, "learning_rate": 9.339631140409182e-06, "loss": 0.5468, "step": 15934 }, { "epoch": 0.19052116835447577, "grad_norm": 1.970186471939087, "learning_rate": 9.339534968863118e-06, "loss": 0.601, "step": 15935 }, { "epoch": 0.19053312449933643, "grad_norm": 3.0761499404907227, "learning_rate": 9.339438790809905e-06, "loss": 0.5997, "step": 15936 }, { "epoch": 0.1905450806441971, "grad_norm": 3.7937111854553223, "learning_rate": 9.339342606249689e-06, "loss": 0.6133, "step": 15937 }, { "epoch": 0.19055703678905775, "grad_norm": 3.3087618350982666, "learning_rate": 9.339246415182614e-06, "loss": 0.5692, "step": 15938 }, { "epoch": 0.19056899293391838, "grad_norm": 4.516046524047852, "learning_rate": 9.339150217608824e-06, "loss": 0.649, "step": 15939 }, { "epoch": 0.19058094907877904, "grad_norm": 2.4477643966674805, "learning_rate": 9.33905401352846e-06, "loss": 0.6067, "step": 15940 }, { "epoch": 0.1905929052236397, "grad_norm": 6.252349853515625, "learning_rate": 9.338957802941673e-06, "loss": 0.5432, "step": 15941 }, { "epoch": 0.19060486136850033, "grad_norm": 2.668639659881592, "learning_rate": 9.3388615858486e-06, "loss": 0.5789, "step": 15942 }, { "epoch": 0.190616817513361, "grad_norm": 3.3703768253326416, "learning_rate": 9.338765362249392e-06, "loss": 0.6307, "step": 15943 }, { "epoch": 0.19062877365822165, "grad_norm": 102.24114227294922, "learning_rate": 9.338669132144186e-06, "loss": 0.6387, "step": 15944 }, { "epoch": 0.1906407298030823, "grad_norm": 1.6889102458953857, "learning_rate": 9.338572895533133e-06, "loss": 0.5209, "step": 15945 }, { "epoch": 0.19065268594794293, "grad_norm": 2.9594171047210693, "learning_rate": 9.338476652416373e-06, "loss": 0.5717, "step": 15946 }, { "epoch": 0.1906646420928036, "grad_norm": 2.5034728050231934, "learning_rate": 9.338380402794052e-06, "loss": 0.6154, "step": 15947 }, { "epoch": 0.19067659823766425, "grad_norm": 2.9342215061187744, "learning_rate": 9.338284146666316e-06, "loss": 0.6208, "step": 15948 }, { "epoch": 0.1906885543825249, "grad_norm": 5.875106334686279, "learning_rate": 9.338187884033306e-06, "loss": 0.59, "step": 15949 }, { "epoch": 0.19070051052738554, "grad_norm": 2.6608028411865234, "learning_rate": 9.338091614895169e-06, "loss": 0.6499, "step": 15950 }, { "epoch": 0.1907124666722462, "grad_norm": 3.236109972000122, "learning_rate": 9.337995339252046e-06, "loss": 0.5535, "step": 15951 }, { "epoch": 0.19072442281710686, "grad_norm": 2.265964984893799, "learning_rate": 9.337899057104085e-06, "loss": 0.654, "step": 15952 }, { "epoch": 0.1907363789619675, "grad_norm": 2.8221874237060547, "learning_rate": 9.337802768451428e-06, "loss": 0.6551, "step": 15953 }, { "epoch": 0.19074833510682815, "grad_norm": 2.6559877395629883, "learning_rate": 9.337706473294222e-06, "loss": 0.5976, "step": 15954 }, { "epoch": 0.1907602912516888, "grad_norm": 2.1482787132263184, "learning_rate": 9.337610171632608e-06, "loss": 0.6509, "step": 15955 }, { "epoch": 0.19077224739654947, "grad_norm": 2.5243959426879883, "learning_rate": 9.337513863466734e-06, "loss": 0.6126, "step": 15956 }, { "epoch": 0.1907842035414101, "grad_norm": 2.2567429542541504, "learning_rate": 9.33741754879674e-06, "loss": 0.6106, "step": 15957 }, { "epoch": 0.19079615968627076, "grad_norm": 2.651397466659546, "learning_rate": 9.337321227622775e-06, "loss": 0.5697, "step": 15958 }, { "epoch": 0.19080811583113141, "grad_norm": 2.3097259998321533, "learning_rate": 9.337224899944981e-06, "loss": 0.6296, "step": 15959 }, { "epoch": 0.19082007197599207, "grad_norm": 2.6921112537384033, "learning_rate": 9.337128565763503e-06, "loss": 0.6041, "step": 15960 }, { "epoch": 0.1908320281208527, "grad_norm": 2.2714035511016846, "learning_rate": 9.337032225078485e-06, "loss": 0.6064, "step": 15961 }, { "epoch": 0.19084398426571336, "grad_norm": 3.0262906551361084, "learning_rate": 9.336935877890072e-06, "loss": 0.6414, "step": 15962 }, { "epoch": 0.19085594041057402, "grad_norm": 2.4610719680786133, "learning_rate": 9.336839524198406e-06, "loss": 0.5725, "step": 15963 }, { "epoch": 0.19086789655543465, "grad_norm": 9.714831352233887, "learning_rate": 9.336743164003637e-06, "loss": 0.6511, "step": 15964 }, { "epoch": 0.1908798527002953, "grad_norm": 3.459017276763916, "learning_rate": 9.336646797305905e-06, "loss": 0.6528, "step": 15965 }, { "epoch": 0.19089180884515597, "grad_norm": 4.044993877410889, "learning_rate": 9.336550424105354e-06, "loss": 0.6436, "step": 15966 }, { "epoch": 0.19090376499001663, "grad_norm": 3.2533316612243652, "learning_rate": 9.336454044402133e-06, "loss": 0.6271, "step": 15967 }, { "epoch": 0.19091572113487726, "grad_norm": 4.313207149505615, "learning_rate": 9.336357658196382e-06, "loss": 0.5865, "step": 15968 }, { "epoch": 0.19092767727973792, "grad_norm": 3.5397162437438965, "learning_rate": 9.33626126548825e-06, "loss": 0.6819, "step": 15969 }, { "epoch": 0.19093963342459858, "grad_norm": 2.0294055938720703, "learning_rate": 9.336164866277877e-06, "loss": 0.5748, "step": 15970 }, { "epoch": 0.19095158956945923, "grad_norm": 2.0160627365112305, "learning_rate": 9.336068460565408e-06, "loss": 0.6473, "step": 15971 }, { "epoch": 0.19096354571431987, "grad_norm": 2.16810941696167, "learning_rate": 9.33597204835099e-06, "loss": 0.6002, "step": 15972 }, { "epoch": 0.19097550185918052, "grad_norm": 2.7314677238464355, "learning_rate": 9.335875629634767e-06, "loss": 0.5506, "step": 15973 }, { "epoch": 0.19098745800404118, "grad_norm": 2.2219114303588867, "learning_rate": 9.33577920441688e-06, "loss": 0.5607, "step": 15974 }, { "epoch": 0.19099941414890184, "grad_norm": 3.0946033000946045, "learning_rate": 9.33568277269748e-06, "loss": 0.5309, "step": 15975 }, { "epoch": 0.19101137029376247, "grad_norm": 3.6977293491363525, "learning_rate": 9.33558633447671e-06, "loss": 0.5449, "step": 15976 }, { "epoch": 0.19102332643862313, "grad_norm": 6.746190547943115, "learning_rate": 9.33548988975471e-06, "loss": 0.7766, "step": 15977 }, { "epoch": 0.1910352825834838, "grad_norm": 4.305785179138184, "learning_rate": 9.335393438531626e-06, "loss": 0.5771, "step": 15978 }, { "epoch": 0.19104723872834442, "grad_norm": 4.015341281890869, "learning_rate": 9.335296980807606e-06, "loss": 0.6998, "step": 15979 }, { "epoch": 0.19105919487320508, "grad_norm": 2.481283187866211, "learning_rate": 9.335200516582794e-06, "loss": 0.6652, "step": 15980 }, { "epoch": 0.19107115101806574, "grad_norm": 5.981865406036377, "learning_rate": 9.335104045857333e-06, "loss": 0.6906, "step": 15981 }, { "epoch": 0.1910831071629264, "grad_norm": 3.196830987930298, "learning_rate": 9.335007568631366e-06, "loss": 0.6926, "step": 15982 }, { "epoch": 0.19109506330778703, "grad_norm": 1.8887699842453003, "learning_rate": 9.33491108490504e-06, "loss": 0.574, "step": 15983 }, { "epoch": 0.1911070194526477, "grad_norm": 2.6070098876953125, "learning_rate": 9.3348145946785e-06, "loss": 0.5652, "step": 15984 }, { "epoch": 0.19111897559750834, "grad_norm": 2.7366068363189697, "learning_rate": 9.334718097951891e-06, "loss": 0.5803, "step": 15985 }, { "epoch": 0.191130931742369, "grad_norm": 1.9346998929977417, "learning_rate": 9.334621594725357e-06, "loss": 0.6043, "step": 15986 }, { "epoch": 0.19114288788722963, "grad_norm": 3.4683592319488525, "learning_rate": 9.334525084999042e-06, "loss": 0.7426, "step": 15987 }, { "epoch": 0.1911548440320903, "grad_norm": 8.19438362121582, "learning_rate": 9.334428568773091e-06, "loss": 0.623, "step": 15988 }, { "epoch": 0.19116680017695095, "grad_norm": 3.933885335922241, "learning_rate": 9.334332046047649e-06, "loss": 0.6338, "step": 15989 }, { "epoch": 0.19117875632181158, "grad_norm": 3.2105908393859863, "learning_rate": 9.334235516822862e-06, "loss": 0.5767, "step": 15990 }, { "epoch": 0.19119071246667224, "grad_norm": 11.406784057617188, "learning_rate": 9.33413898109887e-06, "loss": 0.7011, "step": 15991 }, { "epoch": 0.1912026686115329, "grad_norm": 2.1838269233703613, "learning_rate": 9.334042438875826e-06, "loss": 0.6098, "step": 15992 }, { "epoch": 0.19121462475639356, "grad_norm": 4.692900657653809, "learning_rate": 9.333945890153865e-06, "loss": 0.5632, "step": 15993 }, { "epoch": 0.1912265809012542, "grad_norm": 7.56599760055542, "learning_rate": 9.33384933493314e-06, "loss": 0.5948, "step": 15994 }, { "epoch": 0.19123853704611485, "grad_norm": 2.2370765209198, "learning_rate": 9.333752773213791e-06, "loss": 0.6175, "step": 15995 }, { "epoch": 0.1912504931909755, "grad_norm": 15.175500869750977, "learning_rate": 9.333656204995966e-06, "loss": 0.647, "step": 15996 }, { "epoch": 0.19126244933583617, "grad_norm": 2.3095805644989014, "learning_rate": 9.333559630279807e-06, "loss": 0.6659, "step": 15997 }, { "epoch": 0.1912744054806968, "grad_norm": 4.988842487335205, "learning_rate": 9.33346304906546e-06, "loss": 0.6625, "step": 15998 }, { "epoch": 0.19128636162555746, "grad_norm": 3.8620352745056152, "learning_rate": 9.33336646135307e-06, "loss": 0.5654, "step": 15999 }, { "epoch": 0.1912983177704181, "grad_norm": 7.045495986938477, "learning_rate": 9.33326986714278e-06, "loss": 0.6043, "step": 16000 }, { "epoch": 0.19131027391527874, "grad_norm": 2.406980514526367, "learning_rate": 9.333173266434739e-06, "loss": 0.5749, "step": 16001 }, { "epoch": 0.1913222300601394, "grad_norm": 2.105715274810791, "learning_rate": 9.333076659229088e-06, "loss": 0.614, "step": 16002 }, { "epoch": 0.19133418620500006, "grad_norm": 2.6254706382751465, "learning_rate": 9.332980045525972e-06, "loss": 0.6321, "step": 16003 }, { "epoch": 0.19134614234986072, "grad_norm": 9.604035377502441, "learning_rate": 9.332883425325538e-06, "loss": 0.5516, "step": 16004 }, { "epoch": 0.19135809849472135, "grad_norm": 2.8369128704071045, "learning_rate": 9.332786798627931e-06, "loss": 0.6085, "step": 16005 }, { "epoch": 0.191370054639582, "grad_norm": 2.746816635131836, "learning_rate": 9.332690165433294e-06, "loss": 0.6786, "step": 16006 }, { "epoch": 0.19138201078444267, "grad_norm": 2.0573246479034424, "learning_rate": 9.332593525741772e-06, "loss": 0.586, "step": 16007 }, { "epoch": 0.19139396692930333, "grad_norm": 6.708432674407959, "learning_rate": 9.332496879553512e-06, "loss": 0.5988, "step": 16008 }, { "epoch": 0.19140592307416396, "grad_norm": 2.9020156860351562, "learning_rate": 9.332400226868656e-06, "loss": 0.592, "step": 16009 }, { "epoch": 0.19141787921902462, "grad_norm": 84.2484359741211, "learning_rate": 9.332303567687351e-06, "loss": 0.5556, "step": 16010 }, { "epoch": 0.19142983536388528, "grad_norm": 8.543933868408203, "learning_rate": 9.332206902009741e-06, "loss": 0.6147, "step": 16011 }, { "epoch": 0.1914417915087459, "grad_norm": 2.2129878997802734, "learning_rate": 9.332110229835974e-06, "loss": 0.6662, "step": 16012 }, { "epoch": 0.19145374765360657, "grad_norm": 2.6118648052215576, "learning_rate": 9.332013551166187e-06, "loss": 0.6459, "step": 16013 }, { "epoch": 0.19146570379846722, "grad_norm": 2.802147388458252, "learning_rate": 9.331916866000534e-06, "loss": 0.6643, "step": 16014 }, { "epoch": 0.19147765994332788, "grad_norm": 6.216515064239502, "learning_rate": 9.331820174339156e-06, "loss": 0.6122, "step": 16015 }, { "epoch": 0.1914896160881885, "grad_norm": 2.17191743850708, "learning_rate": 9.3317234761822e-06, "loss": 0.6404, "step": 16016 }, { "epoch": 0.19150157223304917, "grad_norm": 2.825716257095337, "learning_rate": 9.331626771529808e-06, "loss": 0.7148, "step": 16017 }, { "epoch": 0.19151352837790983, "grad_norm": 3.4877772331237793, "learning_rate": 9.331530060382123e-06, "loss": 0.6442, "step": 16018 }, { "epoch": 0.1915254845227705, "grad_norm": 1.9319971799850464, "learning_rate": 9.331433342739297e-06, "loss": 0.6826, "step": 16019 }, { "epoch": 0.19153744066763112, "grad_norm": 1.62190580368042, "learning_rate": 9.33133661860147e-06, "loss": 0.628, "step": 16020 }, { "epoch": 0.19154939681249178, "grad_norm": 6.319753170013428, "learning_rate": 9.331239887968789e-06, "loss": 0.6601, "step": 16021 }, { "epoch": 0.19156135295735244, "grad_norm": 2.362567663192749, "learning_rate": 9.331143150841398e-06, "loss": 0.5926, "step": 16022 }, { "epoch": 0.1915733091022131, "grad_norm": 8.08924388885498, "learning_rate": 9.331046407219441e-06, "loss": 0.66, "step": 16023 }, { "epoch": 0.19158526524707373, "grad_norm": 4.894903659820557, "learning_rate": 9.330949657103066e-06, "loss": 0.6774, "step": 16024 }, { "epoch": 0.19159722139193439, "grad_norm": 1.928106665611267, "learning_rate": 9.330852900492416e-06, "loss": 0.6334, "step": 16025 }, { "epoch": 0.19160917753679504, "grad_norm": 4.104892253875732, "learning_rate": 9.330756137387638e-06, "loss": 0.5731, "step": 16026 }, { "epoch": 0.19162113368165568, "grad_norm": 3.1031627655029297, "learning_rate": 9.330659367788875e-06, "loss": 0.5347, "step": 16027 }, { "epoch": 0.19163308982651633, "grad_norm": 1.3604495525360107, "learning_rate": 9.330562591696273e-06, "loss": 0.5852, "step": 16028 }, { "epoch": 0.191645045971377, "grad_norm": 2.5149385929107666, "learning_rate": 9.330465809109977e-06, "loss": 0.6275, "step": 16029 }, { "epoch": 0.19165700211623765, "grad_norm": 3.9235126972198486, "learning_rate": 9.33036902003013e-06, "loss": 0.6215, "step": 16030 }, { "epoch": 0.19166895826109828, "grad_norm": 2.116368293762207, "learning_rate": 9.330272224456881e-06, "loss": 0.6493, "step": 16031 }, { "epoch": 0.19168091440595894, "grad_norm": 4.406630039215088, "learning_rate": 9.330175422390375e-06, "loss": 0.7057, "step": 16032 }, { "epoch": 0.1916928705508196, "grad_norm": 1.9895106554031372, "learning_rate": 9.330078613830754e-06, "loss": 0.5452, "step": 16033 }, { "epoch": 0.19170482669568026, "grad_norm": 1.3117249011993408, "learning_rate": 9.329981798778165e-06, "loss": 0.5775, "step": 16034 }, { "epoch": 0.1917167828405409, "grad_norm": 2.2622315883636475, "learning_rate": 9.329884977232753e-06, "loss": 0.6083, "step": 16035 }, { "epoch": 0.19172873898540155, "grad_norm": 2.631049633026123, "learning_rate": 9.329788149194661e-06, "loss": 0.6607, "step": 16036 }, { "epoch": 0.1917406951302622, "grad_norm": 2.2219808101654053, "learning_rate": 9.32969131466404e-06, "loss": 0.6891, "step": 16037 }, { "epoch": 0.19175265127512284, "grad_norm": 3.5600883960723877, "learning_rate": 9.329594473641029e-06, "loss": 0.5908, "step": 16038 }, { "epoch": 0.1917646074199835, "grad_norm": 2.5593838691711426, "learning_rate": 9.329497626125777e-06, "loss": 0.595, "step": 16039 }, { "epoch": 0.19177656356484415, "grad_norm": 1.8101247549057007, "learning_rate": 9.329400772118427e-06, "loss": 0.5161, "step": 16040 }, { "epoch": 0.1917885197097048, "grad_norm": 1.8152079582214355, "learning_rate": 9.329303911619124e-06, "loss": 0.575, "step": 16041 }, { "epoch": 0.19180047585456544, "grad_norm": 5.237745761871338, "learning_rate": 9.329207044628016e-06, "loss": 0.6574, "step": 16042 }, { "epoch": 0.1918124319994261, "grad_norm": 6.099745273590088, "learning_rate": 9.329110171145246e-06, "loss": 0.65, "step": 16043 }, { "epoch": 0.19182438814428676, "grad_norm": 2.284665107727051, "learning_rate": 9.329013291170961e-06, "loss": 0.6593, "step": 16044 }, { "epoch": 0.19183634428914742, "grad_norm": 3.514376640319824, "learning_rate": 9.328916404705304e-06, "loss": 0.7659, "step": 16045 }, { "epoch": 0.19184830043400805, "grad_norm": 2.6101303100585938, "learning_rate": 9.328819511748421e-06, "loss": 0.545, "step": 16046 }, { "epoch": 0.1918602565788687, "grad_norm": 1.837395191192627, "learning_rate": 9.32872261230046e-06, "loss": 0.6459, "step": 16047 }, { "epoch": 0.19187221272372937, "grad_norm": 2.061204671859741, "learning_rate": 9.328625706361563e-06, "loss": 0.5919, "step": 16048 }, { "epoch": 0.19188416886859, "grad_norm": 5.732022762298584, "learning_rate": 9.328528793931876e-06, "loss": 0.6009, "step": 16049 }, { "epoch": 0.19189612501345066, "grad_norm": 2.3472580909729004, "learning_rate": 9.328431875011545e-06, "loss": 0.6003, "step": 16050 }, { "epoch": 0.19190808115831132, "grad_norm": 2.482168197631836, "learning_rate": 9.328334949600715e-06, "loss": 0.4896, "step": 16051 }, { "epoch": 0.19192003730317198, "grad_norm": 2.7766318321228027, "learning_rate": 9.328238017699532e-06, "loss": 0.6146, "step": 16052 }, { "epoch": 0.1919319934480326, "grad_norm": 2.631808280944824, "learning_rate": 9.328141079308139e-06, "loss": 0.5743, "step": 16053 }, { "epoch": 0.19194394959289326, "grad_norm": 2.8888707160949707, "learning_rate": 9.328044134426683e-06, "loss": 0.6104, "step": 16054 }, { "epoch": 0.19195590573775392, "grad_norm": 8.64543628692627, "learning_rate": 9.327947183055312e-06, "loss": 0.5912, "step": 16055 }, { "epoch": 0.19196786188261458, "grad_norm": 1.5324639081954956, "learning_rate": 9.327850225194166e-06, "loss": 0.5727, "step": 16056 }, { "epoch": 0.1919798180274752, "grad_norm": 6.009627819061279, "learning_rate": 9.327753260843395e-06, "loss": 0.6163, "step": 16057 }, { "epoch": 0.19199177417233587, "grad_norm": 3.730431318283081, "learning_rate": 9.327656290003143e-06, "loss": 0.5895, "step": 16058 }, { "epoch": 0.19200373031719653, "grad_norm": 1.6474010944366455, "learning_rate": 9.327559312673554e-06, "loss": 0.597, "step": 16059 }, { "epoch": 0.19201568646205716, "grad_norm": 1.6817189455032349, "learning_rate": 9.327462328854773e-06, "loss": 0.6332, "step": 16060 }, { "epoch": 0.19202764260691782, "grad_norm": 2.1581430435180664, "learning_rate": 9.327365338546949e-06, "loss": 0.6416, "step": 16061 }, { "epoch": 0.19203959875177848, "grad_norm": 10.727694511413574, "learning_rate": 9.327268341750224e-06, "loss": 0.5816, "step": 16062 }, { "epoch": 0.19205155489663914, "grad_norm": 3.0473976135253906, "learning_rate": 9.327171338464745e-06, "loss": 0.5721, "step": 16063 }, { "epoch": 0.19206351104149977, "grad_norm": 3.356368064880371, "learning_rate": 9.327074328690657e-06, "loss": 0.6804, "step": 16064 }, { "epoch": 0.19207546718636043, "grad_norm": 1.7599198818206787, "learning_rate": 9.326977312428107e-06, "loss": 0.5739, "step": 16065 }, { "epoch": 0.19208742333122109, "grad_norm": 1.7629469633102417, "learning_rate": 9.326880289677239e-06, "loss": 0.6814, "step": 16066 }, { "epoch": 0.19209937947608174, "grad_norm": 3.6100847721099854, "learning_rate": 9.326783260438196e-06, "loss": 0.6329, "step": 16067 }, { "epoch": 0.19211133562094238, "grad_norm": 3.1856212615966797, "learning_rate": 9.32668622471113e-06, "loss": 0.5732, "step": 16068 }, { "epoch": 0.19212329176580303, "grad_norm": 2.1713078022003174, "learning_rate": 9.326589182496179e-06, "loss": 0.6198, "step": 16069 }, { "epoch": 0.1921352479106637, "grad_norm": 5.430799961090088, "learning_rate": 9.326492133793495e-06, "loss": 0.7342, "step": 16070 }, { "epoch": 0.19214720405552432, "grad_norm": 2.3560142517089844, "learning_rate": 9.32639507860322e-06, "loss": 0.6116, "step": 16071 }, { "epoch": 0.19215916020038498, "grad_norm": 1.9260767698287964, "learning_rate": 9.326298016925499e-06, "loss": 0.7074, "step": 16072 }, { "epoch": 0.19217111634524564, "grad_norm": 4.1924614906311035, "learning_rate": 9.32620094876048e-06, "loss": 0.5729, "step": 16073 }, { "epoch": 0.1921830724901063, "grad_norm": 2.459470748901367, "learning_rate": 9.326103874108307e-06, "loss": 0.6299, "step": 16074 }, { "epoch": 0.19219502863496693, "grad_norm": 3.925823450088501, "learning_rate": 9.326006792969126e-06, "loss": 0.6064, "step": 16075 }, { "epoch": 0.1922069847798276, "grad_norm": 19.045207977294922, "learning_rate": 9.325909705343081e-06, "loss": 0.6166, "step": 16076 }, { "epoch": 0.19221894092468825, "grad_norm": 4.796035289764404, "learning_rate": 9.32581261123032e-06, "loss": 0.6483, "step": 16077 }, { "epoch": 0.1922308970695489, "grad_norm": 2.81231689453125, "learning_rate": 9.325715510630989e-06, "loss": 0.619, "step": 16078 }, { "epoch": 0.19224285321440954, "grad_norm": 1.9042749404907227, "learning_rate": 9.325618403545231e-06, "loss": 0.5173, "step": 16079 }, { "epoch": 0.1922548093592702, "grad_norm": 4.245659351348877, "learning_rate": 9.325521289973191e-06, "loss": 0.6191, "step": 16080 }, { "epoch": 0.19226676550413085, "grad_norm": 3.6963627338409424, "learning_rate": 9.325424169915019e-06, "loss": 0.6121, "step": 16081 }, { "epoch": 0.1922787216489915, "grad_norm": 3.685584306716919, "learning_rate": 9.325327043370858e-06, "loss": 0.5849, "step": 16082 }, { "epoch": 0.19229067779385214, "grad_norm": 4.861818790435791, "learning_rate": 9.325229910340852e-06, "loss": 0.6141, "step": 16083 }, { "epoch": 0.1923026339387128, "grad_norm": 2.794527292251587, "learning_rate": 9.32513277082515e-06, "loss": 0.5651, "step": 16084 }, { "epoch": 0.19231459008357346, "grad_norm": 3.0681090354919434, "learning_rate": 9.325035624823895e-06, "loss": 0.5374, "step": 16085 }, { "epoch": 0.1923265462284341, "grad_norm": 1.936110019683838, "learning_rate": 9.324938472337235e-06, "loss": 0.739, "step": 16086 }, { "epoch": 0.19233850237329475, "grad_norm": 1.7606070041656494, "learning_rate": 9.324841313365314e-06, "loss": 0.5819, "step": 16087 }, { "epoch": 0.1923504585181554, "grad_norm": 2.025543689727783, "learning_rate": 9.324744147908276e-06, "loss": 0.5627, "step": 16088 }, { "epoch": 0.19236241466301607, "grad_norm": 3.3023648262023926, "learning_rate": 9.324646975966271e-06, "loss": 0.6601, "step": 16089 }, { "epoch": 0.1923743708078767, "grad_norm": 2.143893003463745, "learning_rate": 9.324549797539441e-06, "loss": 0.6936, "step": 16090 }, { "epoch": 0.19238632695273736, "grad_norm": 1.7806094884872437, "learning_rate": 9.324452612627935e-06, "loss": 0.5955, "step": 16091 }, { "epoch": 0.19239828309759802, "grad_norm": 2.2131712436676025, "learning_rate": 9.324355421231897e-06, "loss": 0.6502, "step": 16092 }, { "epoch": 0.19241023924245867, "grad_norm": 2.6708879470825195, "learning_rate": 9.32425822335147e-06, "loss": 0.6239, "step": 16093 }, { "epoch": 0.1924221953873193, "grad_norm": 25.67043685913086, "learning_rate": 9.324161018986806e-06, "loss": 0.6791, "step": 16094 }, { "epoch": 0.19243415153217996, "grad_norm": 5.742928981781006, "learning_rate": 9.324063808138045e-06, "loss": 0.5384, "step": 16095 }, { "epoch": 0.19244610767704062, "grad_norm": 2.325437068939209, "learning_rate": 9.323966590805335e-06, "loss": 0.6902, "step": 16096 }, { "epoch": 0.19245806382190125, "grad_norm": 2.2498321533203125, "learning_rate": 9.323869366988821e-06, "loss": 0.6068, "step": 16097 }, { "epoch": 0.1924700199667619, "grad_norm": 2.4324896335601807, "learning_rate": 9.323772136688651e-06, "loss": 0.6153, "step": 16098 }, { "epoch": 0.19248197611162257, "grad_norm": 5.937267780303955, "learning_rate": 9.323674899904968e-06, "loss": 0.7019, "step": 16099 }, { "epoch": 0.19249393225648323, "grad_norm": 2.0860159397125244, "learning_rate": 9.32357765663792e-06, "loss": 0.6271, "step": 16100 }, { "epoch": 0.19250588840134386, "grad_norm": 3.1396875381469727, "learning_rate": 9.323480406887653e-06, "loss": 0.6534, "step": 16101 }, { "epoch": 0.19251784454620452, "grad_norm": 1.61970055103302, "learning_rate": 9.32338315065431e-06, "loss": 0.6248, "step": 16102 }, { "epoch": 0.19252980069106518, "grad_norm": 3.194441556930542, "learning_rate": 9.32328588793804e-06, "loss": 0.6599, "step": 16103 }, { "epoch": 0.19254175683592584, "grad_norm": 1.6925960779190063, "learning_rate": 9.323188618738985e-06, "loss": 0.7097, "step": 16104 }, { "epoch": 0.19255371298078647, "grad_norm": 1.9309073686599731, "learning_rate": 9.323091343057296e-06, "loss": 0.6619, "step": 16105 }, { "epoch": 0.19256566912564713, "grad_norm": 11.605262756347656, "learning_rate": 9.322994060893114e-06, "loss": 0.6964, "step": 16106 }, { "epoch": 0.19257762527050779, "grad_norm": 3.431234121322632, "learning_rate": 9.32289677224659e-06, "loss": 0.6148, "step": 16107 }, { "epoch": 0.19258958141536842, "grad_norm": 2.3118088245391846, "learning_rate": 9.322799477117864e-06, "loss": 0.6101, "step": 16108 }, { "epoch": 0.19260153756022907, "grad_norm": 7.452328681945801, "learning_rate": 9.322702175507085e-06, "loss": 0.5746, "step": 16109 }, { "epoch": 0.19261349370508973, "grad_norm": 14.568792343139648, "learning_rate": 9.3226048674144e-06, "loss": 0.6077, "step": 16110 }, { "epoch": 0.1926254498499504, "grad_norm": 3.393035411834717, "learning_rate": 9.322507552839955e-06, "loss": 0.6787, "step": 16111 }, { "epoch": 0.19263740599481102, "grad_norm": 2.2933735847473145, "learning_rate": 9.322410231783891e-06, "loss": 0.5796, "step": 16112 }, { "epoch": 0.19264936213967168, "grad_norm": 5.026335716247559, "learning_rate": 9.32231290424636e-06, "loss": 0.5771, "step": 16113 }, { "epoch": 0.19266131828453234, "grad_norm": 7.046972751617432, "learning_rate": 9.322215570227504e-06, "loss": 0.7412, "step": 16114 }, { "epoch": 0.192673274429393, "grad_norm": 3.5412344932556152, "learning_rate": 9.322118229727471e-06, "loss": 0.7102, "step": 16115 }, { "epoch": 0.19268523057425363, "grad_norm": 3.5672991275787354, "learning_rate": 9.322020882746407e-06, "loss": 0.6823, "step": 16116 }, { "epoch": 0.1926971867191143, "grad_norm": 2.2933828830718994, "learning_rate": 9.321923529284457e-06, "loss": 0.659, "step": 16117 }, { "epoch": 0.19270914286397495, "grad_norm": 2.552119016647339, "learning_rate": 9.321826169341768e-06, "loss": 0.636, "step": 16118 }, { "epoch": 0.19272109900883558, "grad_norm": 5.542308807373047, "learning_rate": 9.321728802918485e-06, "loss": 0.6001, "step": 16119 }, { "epoch": 0.19273305515369624, "grad_norm": 10.000961303710938, "learning_rate": 9.321631430014753e-06, "loss": 0.6882, "step": 16120 }, { "epoch": 0.1927450112985569, "grad_norm": 3.2421116828918457, "learning_rate": 9.321534050630719e-06, "loss": 0.6803, "step": 16121 }, { "epoch": 0.19275696744341755, "grad_norm": 2.6503994464874268, "learning_rate": 9.32143666476653e-06, "loss": 0.6006, "step": 16122 }, { "epoch": 0.19276892358827818, "grad_norm": 1.9829133749008179, "learning_rate": 9.321339272422332e-06, "loss": 0.6351, "step": 16123 }, { "epoch": 0.19278087973313884, "grad_norm": 2.0397419929504395, "learning_rate": 9.32124187359827e-06, "loss": 0.6487, "step": 16124 }, { "epoch": 0.1927928358779995, "grad_norm": 2.2600767612457275, "learning_rate": 9.321144468294493e-06, "loss": 0.5502, "step": 16125 }, { "epoch": 0.19280479202286016, "grad_norm": 5.689818382263184, "learning_rate": 9.32104705651114e-06, "loss": 0.6612, "step": 16126 }, { "epoch": 0.1928167481677208, "grad_norm": 10.149529457092285, "learning_rate": 9.320949638248365e-06, "loss": 0.5949, "step": 16127 }, { "epoch": 0.19282870431258145, "grad_norm": 2.7737395763397217, "learning_rate": 9.320852213506309e-06, "loss": 0.5518, "step": 16128 }, { "epoch": 0.1928406604574421, "grad_norm": 1.974457859992981, "learning_rate": 9.32075478228512e-06, "loss": 0.5689, "step": 16129 }, { "epoch": 0.19285261660230274, "grad_norm": 4.577449798583984, "learning_rate": 9.320657344584942e-06, "loss": 0.6157, "step": 16130 }, { "epoch": 0.1928645727471634, "grad_norm": 3.456831216812134, "learning_rate": 9.320559900405926e-06, "loss": 0.5598, "step": 16131 }, { "epoch": 0.19287652889202406, "grad_norm": 1.665373682975769, "learning_rate": 9.320462449748213e-06, "loss": 0.5556, "step": 16132 }, { "epoch": 0.19288848503688472, "grad_norm": 5.2187371253967285, "learning_rate": 9.320364992611952e-06, "loss": 0.6402, "step": 16133 }, { "epoch": 0.19290044118174535, "grad_norm": 2.0629982948303223, "learning_rate": 9.32026752899729e-06, "loss": 0.5733, "step": 16134 }, { "epoch": 0.192912397326606, "grad_norm": 2.5258851051330566, "learning_rate": 9.32017005890437e-06, "loss": 0.5863, "step": 16135 }, { "epoch": 0.19292435347146666, "grad_norm": 3.938565492630005, "learning_rate": 9.32007258233334e-06, "loss": 0.7014, "step": 16136 }, { "epoch": 0.19293630961632732, "grad_norm": 4.55159854888916, "learning_rate": 9.319975099284344e-06, "loss": 0.6226, "step": 16137 }, { "epoch": 0.19294826576118795, "grad_norm": 1.8078243732452393, "learning_rate": 9.319877609757532e-06, "loss": 0.5752, "step": 16138 }, { "epoch": 0.1929602219060486, "grad_norm": 2.828726053237915, "learning_rate": 9.319780113753047e-06, "loss": 0.6393, "step": 16139 }, { "epoch": 0.19297217805090927, "grad_norm": 1.6867365837097168, "learning_rate": 9.319682611271037e-06, "loss": 0.5801, "step": 16140 }, { "epoch": 0.19298413419576993, "grad_norm": 1.857317566871643, "learning_rate": 9.319585102311648e-06, "loss": 0.607, "step": 16141 }, { "epoch": 0.19299609034063056, "grad_norm": 2.2751574516296387, "learning_rate": 9.319487586875025e-06, "loss": 0.6156, "step": 16142 }, { "epoch": 0.19300804648549122, "grad_norm": 2.131950616836548, "learning_rate": 9.319390064961315e-06, "loss": 0.6256, "step": 16143 }, { "epoch": 0.19302000263035188, "grad_norm": 4.015000343322754, "learning_rate": 9.319292536570665e-06, "loss": 0.6514, "step": 16144 }, { "epoch": 0.1930319587752125, "grad_norm": 3.4486966133117676, "learning_rate": 9.319195001703219e-06, "loss": 0.5945, "step": 16145 }, { "epoch": 0.19304391492007317, "grad_norm": 5.4398417472839355, "learning_rate": 9.319097460359126e-06, "loss": 0.5383, "step": 16146 }, { "epoch": 0.19305587106493383, "grad_norm": 2.964972734451294, "learning_rate": 9.31899991253853e-06, "loss": 0.7371, "step": 16147 }, { "epoch": 0.19306782720979448, "grad_norm": 2.4452531337738037, "learning_rate": 9.318902358241579e-06, "loss": 0.5456, "step": 16148 }, { "epoch": 0.19307978335465512, "grad_norm": 3.0048327445983887, "learning_rate": 9.318804797468419e-06, "loss": 0.6035, "step": 16149 }, { "epoch": 0.19309173949951577, "grad_norm": 5.131166458129883, "learning_rate": 9.318707230219196e-06, "loss": 0.55, "step": 16150 }, { "epoch": 0.19310369564437643, "grad_norm": 1.865644097328186, "learning_rate": 9.318609656494055e-06, "loss": 0.5762, "step": 16151 }, { "epoch": 0.1931156517892371, "grad_norm": 2.190305471420288, "learning_rate": 9.318512076293145e-06, "loss": 0.6262, "step": 16152 }, { "epoch": 0.19312760793409772, "grad_norm": 3.0387609004974365, "learning_rate": 9.31841448961661e-06, "loss": 0.5379, "step": 16153 }, { "epoch": 0.19313956407895838, "grad_norm": 3.312114953994751, "learning_rate": 9.318316896464596e-06, "loss": 0.6254, "step": 16154 }, { "epoch": 0.19315152022381904, "grad_norm": 3.3016252517700195, "learning_rate": 9.318219296837252e-06, "loss": 0.6124, "step": 16155 }, { "epoch": 0.19316347636867967, "grad_norm": 4.853137969970703, "learning_rate": 9.318121690734723e-06, "loss": 0.5853, "step": 16156 }, { "epoch": 0.19317543251354033, "grad_norm": 2.484848737716675, "learning_rate": 9.318024078157153e-06, "loss": 0.6156, "step": 16157 }, { "epoch": 0.193187388658401, "grad_norm": 1.9849472045898438, "learning_rate": 9.317926459104694e-06, "loss": 0.6761, "step": 16158 }, { "epoch": 0.19319934480326165, "grad_norm": 4.57658576965332, "learning_rate": 9.317828833577486e-06, "loss": 0.6115, "step": 16159 }, { "epoch": 0.19321130094812228, "grad_norm": 2.6521124839782715, "learning_rate": 9.31773120157568e-06, "loss": 0.679, "step": 16160 }, { "epoch": 0.19322325709298294, "grad_norm": 1.9428099393844604, "learning_rate": 9.31763356309942e-06, "loss": 0.5727, "step": 16161 }, { "epoch": 0.1932352132378436, "grad_norm": 1.918714165687561, "learning_rate": 9.317535918148852e-06, "loss": 0.5053, "step": 16162 }, { "epoch": 0.19324716938270425, "grad_norm": 9.654394149780273, "learning_rate": 9.317438266724125e-06, "loss": 0.4887, "step": 16163 }, { "epoch": 0.19325912552756488, "grad_norm": 4.919467449188232, "learning_rate": 9.317340608825382e-06, "loss": 0.604, "step": 16164 }, { "epoch": 0.19327108167242554, "grad_norm": 4.946406841278076, "learning_rate": 9.317242944452774e-06, "loss": 0.6148, "step": 16165 }, { "epoch": 0.1932830378172862, "grad_norm": 1.832526445388794, "learning_rate": 9.317145273606445e-06, "loss": 0.6317, "step": 16166 }, { "epoch": 0.19329499396214683, "grad_norm": 3.4583046436309814, "learning_rate": 9.317047596286539e-06, "loss": 0.5805, "step": 16167 }, { "epoch": 0.1933069501070075, "grad_norm": 3.8785552978515625, "learning_rate": 9.316949912493206e-06, "loss": 0.5827, "step": 16168 }, { "epoch": 0.19331890625186815, "grad_norm": 1.9273847341537476, "learning_rate": 9.316852222226591e-06, "loss": 0.6553, "step": 16169 }, { "epoch": 0.1933308623967288, "grad_norm": 1.959699034690857, "learning_rate": 9.316754525486841e-06, "loss": 0.565, "step": 16170 }, { "epoch": 0.19334281854158944, "grad_norm": 3.3082468509674072, "learning_rate": 9.316656822274103e-06, "loss": 0.5906, "step": 16171 }, { "epoch": 0.1933547746864501, "grad_norm": 2.78263783454895, "learning_rate": 9.316559112588522e-06, "loss": 0.6917, "step": 16172 }, { "epoch": 0.19336673083131076, "grad_norm": 1.8753529787063599, "learning_rate": 9.316461396430245e-06, "loss": 0.6032, "step": 16173 }, { "epoch": 0.19337868697617142, "grad_norm": 1.981079339981079, "learning_rate": 9.316363673799419e-06, "loss": 0.5861, "step": 16174 }, { "epoch": 0.19339064312103205, "grad_norm": 2.028313398361206, "learning_rate": 9.31626594469619e-06, "loss": 0.544, "step": 16175 }, { "epoch": 0.1934025992658927, "grad_norm": 4.152911186218262, "learning_rate": 9.316168209120706e-06, "loss": 0.5502, "step": 16176 }, { "epoch": 0.19341455541075336, "grad_norm": 2.115370750427246, "learning_rate": 9.31607046707311e-06, "loss": 0.6571, "step": 16177 }, { "epoch": 0.193426511555614, "grad_norm": 2.4439477920532227, "learning_rate": 9.315972718553554e-06, "loss": 0.5766, "step": 16178 }, { "epoch": 0.19343846770047465, "grad_norm": 1.9097120761871338, "learning_rate": 9.315874963562179e-06, "loss": 0.6453, "step": 16179 }, { "epoch": 0.1934504238453353, "grad_norm": 2.437983989715576, "learning_rate": 9.315777202099136e-06, "loss": 0.6012, "step": 16180 }, { "epoch": 0.19346237999019597, "grad_norm": 3.3009653091430664, "learning_rate": 9.31567943416457e-06, "loss": 0.5869, "step": 16181 }, { "epoch": 0.1934743361350566, "grad_norm": 4.282084941864014, "learning_rate": 9.315581659758625e-06, "loss": 0.6559, "step": 16182 }, { "epoch": 0.19348629227991726, "grad_norm": 2.85994553565979, "learning_rate": 9.315483878881453e-06, "loss": 0.7272, "step": 16183 }, { "epoch": 0.19349824842477792, "grad_norm": 4.7138352394104, "learning_rate": 9.315386091533196e-06, "loss": 0.5549, "step": 16184 }, { "epoch": 0.19351020456963858, "grad_norm": 2.8814377784729004, "learning_rate": 9.315288297714003e-06, "loss": 0.6401, "step": 16185 }, { "epoch": 0.1935221607144992, "grad_norm": 2.982362985610962, "learning_rate": 9.315190497424018e-06, "loss": 0.6823, "step": 16186 }, { "epoch": 0.19353411685935987, "grad_norm": 5.429115295410156, "learning_rate": 9.31509269066339e-06, "loss": 0.5847, "step": 16187 }, { "epoch": 0.19354607300422053, "grad_norm": 10.437487602233887, "learning_rate": 9.314994877432267e-06, "loss": 0.6719, "step": 16188 }, { "epoch": 0.19355802914908116, "grad_norm": 1.9946825504302979, "learning_rate": 9.314897057730792e-06, "loss": 0.6868, "step": 16189 }, { "epoch": 0.19356998529394182, "grad_norm": 3.682377338409424, "learning_rate": 9.314799231559114e-06, "loss": 0.5926, "step": 16190 }, { "epoch": 0.19358194143880247, "grad_norm": 2.2190139293670654, "learning_rate": 9.314701398917378e-06, "loss": 0.5286, "step": 16191 }, { "epoch": 0.19359389758366313, "grad_norm": 2.1085643768310547, "learning_rate": 9.314603559805735e-06, "loss": 0.5411, "step": 16192 }, { "epoch": 0.19360585372852376, "grad_norm": 1.8344032764434814, "learning_rate": 9.314505714224326e-06, "loss": 0.5905, "step": 16193 }, { "epoch": 0.19361780987338442, "grad_norm": 2.0822107791900635, "learning_rate": 9.314407862173301e-06, "loss": 0.5366, "step": 16194 }, { "epoch": 0.19362976601824508, "grad_norm": 2.2341322898864746, "learning_rate": 9.314310003652806e-06, "loss": 0.4984, "step": 16195 }, { "epoch": 0.19364172216310574, "grad_norm": 5.811838626861572, "learning_rate": 9.314212138662989e-06, "loss": 0.5578, "step": 16196 }, { "epoch": 0.19365367830796637, "grad_norm": 2.658240795135498, "learning_rate": 9.314114267203995e-06, "loss": 0.573, "step": 16197 }, { "epoch": 0.19366563445282703, "grad_norm": 4.101373672485352, "learning_rate": 9.314016389275971e-06, "loss": 0.5568, "step": 16198 }, { "epoch": 0.1936775905976877, "grad_norm": 3.4911868572235107, "learning_rate": 9.313918504879065e-06, "loss": 0.6402, "step": 16199 }, { "epoch": 0.19368954674254835, "grad_norm": 2.286060094833374, "learning_rate": 9.31382061401342e-06, "loss": 0.5902, "step": 16200 }, { "epoch": 0.19370150288740898, "grad_norm": 2.688308000564575, "learning_rate": 9.313722716679188e-06, "loss": 0.5529, "step": 16201 }, { "epoch": 0.19371345903226964, "grad_norm": 3.325259208679199, "learning_rate": 9.313624812876514e-06, "loss": 0.5633, "step": 16202 }, { "epoch": 0.1937254151771303, "grad_norm": 5.716216087341309, "learning_rate": 9.313526902605543e-06, "loss": 0.6974, "step": 16203 }, { "epoch": 0.19373737132199093, "grad_norm": 1.55547297000885, "learning_rate": 9.313428985866424e-06, "loss": 0.5217, "step": 16204 }, { "epoch": 0.19374932746685158, "grad_norm": 5.4594855308532715, "learning_rate": 9.3133310626593e-06, "loss": 0.5109, "step": 16205 }, { "epoch": 0.19376128361171224, "grad_norm": 16.079774856567383, "learning_rate": 9.313233132984325e-06, "loss": 0.5664, "step": 16206 }, { "epoch": 0.1937732397565729, "grad_norm": 3.324428081512451, "learning_rate": 9.313135196841639e-06, "loss": 0.6648, "step": 16207 }, { "epoch": 0.19378519590143353, "grad_norm": 10.110773086547852, "learning_rate": 9.313037254231391e-06, "loss": 0.5858, "step": 16208 }, { "epoch": 0.1937971520462942, "grad_norm": 2.9803903102874756, "learning_rate": 9.312939305153731e-06, "loss": 0.638, "step": 16209 }, { "epoch": 0.19380910819115485, "grad_norm": 2.250821352005005, "learning_rate": 9.3128413496088e-06, "loss": 0.6871, "step": 16210 }, { "epoch": 0.1938210643360155, "grad_norm": 3.157972574234009, "learning_rate": 9.312743387596751e-06, "loss": 0.5583, "step": 16211 }, { "epoch": 0.19383302048087614, "grad_norm": 4.582165718078613, "learning_rate": 9.312645419117726e-06, "loss": 0.5248, "step": 16212 }, { "epoch": 0.1938449766257368, "grad_norm": 2.1550815105438232, "learning_rate": 9.312547444171875e-06, "loss": 0.6135, "step": 16213 }, { "epoch": 0.19385693277059746, "grad_norm": 2.2319753170013428, "learning_rate": 9.312449462759344e-06, "loss": 0.5654, "step": 16214 }, { "epoch": 0.1938688889154581, "grad_norm": 2.902801990509033, "learning_rate": 9.312351474880279e-06, "loss": 0.6158, "step": 16215 }, { "epoch": 0.19388084506031875, "grad_norm": 2.1267480850219727, "learning_rate": 9.312253480534827e-06, "loss": 0.5976, "step": 16216 }, { "epoch": 0.1938928012051794, "grad_norm": 2.0145175457000732, "learning_rate": 9.312155479723136e-06, "loss": 0.5773, "step": 16217 }, { "epoch": 0.19390475735004006, "grad_norm": 1.740054726600647, "learning_rate": 9.312057472445353e-06, "loss": 0.6008, "step": 16218 }, { "epoch": 0.1939167134949007, "grad_norm": 3.205784320831299, "learning_rate": 9.311959458701624e-06, "loss": 0.5633, "step": 16219 }, { "epoch": 0.19392866963976135, "grad_norm": 1.918774962425232, "learning_rate": 9.311861438492097e-06, "loss": 0.5773, "step": 16220 }, { "epoch": 0.193940625784622, "grad_norm": 3.6859805583953857, "learning_rate": 9.311763411816917e-06, "loss": 0.6221, "step": 16221 }, { "epoch": 0.19395258192948267, "grad_norm": 8.23461627960205, "learning_rate": 9.311665378676234e-06, "loss": 0.5618, "step": 16222 }, { "epoch": 0.1939645380743433, "grad_norm": 2.529219388961792, "learning_rate": 9.311567339070193e-06, "loss": 0.6469, "step": 16223 }, { "epoch": 0.19397649421920396, "grad_norm": 2.9604995250701904, "learning_rate": 9.311469292998941e-06, "loss": 0.6007, "step": 16224 }, { "epoch": 0.19398845036406462, "grad_norm": 1.8735942840576172, "learning_rate": 9.311371240462626e-06, "loss": 0.6275, "step": 16225 }, { "epoch": 0.19400040650892525, "grad_norm": 2.571373462677002, "learning_rate": 9.311273181461396e-06, "loss": 0.6322, "step": 16226 }, { "epoch": 0.1940123626537859, "grad_norm": 1.935221791267395, "learning_rate": 9.311175115995394e-06, "loss": 0.6391, "step": 16227 }, { "epoch": 0.19402431879864657, "grad_norm": 2.5060718059539795, "learning_rate": 9.31107704406477e-06, "loss": 0.6572, "step": 16228 }, { "epoch": 0.19403627494350723, "grad_norm": 3.6680238246917725, "learning_rate": 9.310978965669672e-06, "loss": 0.6679, "step": 16229 }, { "epoch": 0.19404823108836786, "grad_norm": 1.5996484756469727, "learning_rate": 9.310880880810244e-06, "loss": 0.5759, "step": 16230 }, { "epoch": 0.19406018723322851, "grad_norm": 2.295724630355835, "learning_rate": 9.310782789486637e-06, "loss": 0.513, "step": 16231 }, { "epoch": 0.19407214337808917, "grad_norm": 3.0701487064361572, "learning_rate": 9.310684691698994e-06, "loss": 0.5987, "step": 16232 }, { "epoch": 0.19408409952294983, "grad_norm": 2.053769111633301, "learning_rate": 9.310586587447465e-06, "loss": 0.6598, "step": 16233 }, { "epoch": 0.19409605566781046, "grad_norm": 6.017609596252441, "learning_rate": 9.310488476732195e-06, "loss": 0.6412, "step": 16234 }, { "epoch": 0.19410801181267112, "grad_norm": 5.102357387542725, "learning_rate": 9.310390359553332e-06, "loss": 0.6555, "step": 16235 }, { "epoch": 0.19411996795753178, "grad_norm": 2.9705519676208496, "learning_rate": 9.310292235911025e-06, "loss": 0.6349, "step": 16236 }, { "epoch": 0.1941319241023924, "grad_norm": 6.74121618270874, "learning_rate": 9.31019410580542e-06, "loss": 0.5571, "step": 16237 }, { "epoch": 0.19414388024725307, "grad_norm": 1.6597343683242798, "learning_rate": 9.310095969236661e-06, "loss": 0.5894, "step": 16238 }, { "epoch": 0.19415583639211373, "grad_norm": 1.6336677074432373, "learning_rate": 9.309997826204899e-06, "loss": 0.5998, "step": 16239 }, { "epoch": 0.1941677925369744, "grad_norm": 3.2407073974609375, "learning_rate": 9.309899676710281e-06, "loss": 0.5387, "step": 16240 }, { "epoch": 0.19417974868183502, "grad_norm": 2.4344642162323, "learning_rate": 9.30980152075295e-06, "loss": 0.6693, "step": 16241 }, { "epoch": 0.19419170482669568, "grad_norm": 1.4944576025009155, "learning_rate": 9.30970335833306e-06, "loss": 0.5296, "step": 16242 }, { "epoch": 0.19420366097155634, "grad_norm": 3.1709887981414795, "learning_rate": 9.309605189450752e-06, "loss": 0.6866, "step": 16243 }, { "epoch": 0.194215617116417, "grad_norm": 3.208003282546997, "learning_rate": 9.309507014106175e-06, "loss": 0.6503, "step": 16244 }, { "epoch": 0.19422757326127763, "grad_norm": 2.5536422729492188, "learning_rate": 9.30940883229948e-06, "loss": 0.706, "step": 16245 }, { "epoch": 0.19423952940613828, "grad_norm": 3.429194688796997, "learning_rate": 9.309310644030808e-06, "loss": 0.6664, "step": 16246 }, { "epoch": 0.19425148555099894, "grad_norm": 3.8115055561065674, "learning_rate": 9.309212449300312e-06, "loss": 0.5759, "step": 16247 }, { "epoch": 0.19426344169585957, "grad_norm": 3.9364423751831055, "learning_rate": 9.309114248108136e-06, "loss": 0.6587, "step": 16248 }, { "epoch": 0.19427539784072023, "grad_norm": 4.599864482879639, "learning_rate": 9.309016040454428e-06, "loss": 0.5947, "step": 16249 }, { "epoch": 0.1942873539855809, "grad_norm": 3.3791849613189697, "learning_rate": 9.308917826339334e-06, "loss": 0.6752, "step": 16250 }, { "epoch": 0.19429931013044155, "grad_norm": 1.9727180004119873, "learning_rate": 9.308819605763003e-06, "loss": 0.553, "step": 16251 }, { "epoch": 0.19431126627530218, "grad_norm": 2.0228960514068604, "learning_rate": 9.308721378725582e-06, "loss": 0.7284, "step": 16252 }, { "epoch": 0.19432322242016284, "grad_norm": 2.279780626296997, "learning_rate": 9.308623145227218e-06, "loss": 0.5399, "step": 16253 }, { "epoch": 0.1943351785650235, "grad_norm": 2.224921703338623, "learning_rate": 9.308524905268058e-06, "loss": 0.6577, "step": 16254 }, { "epoch": 0.19434713470988416, "grad_norm": 2.199774980545044, "learning_rate": 9.308426658848249e-06, "loss": 0.5028, "step": 16255 }, { "epoch": 0.1943590908547448, "grad_norm": 1.6011807918548584, "learning_rate": 9.30832840596794e-06, "loss": 0.6246, "step": 16256 }, { "epoch": 0.19437104699960545, "grad_norm": 2.1053555011749268, "learning_rate": 9.308230146627277e-06, "loss": 0.5967, "step": 16257 }, { "epoch": 0.1943830031444661, "grad_norm": 2.4847230911254883, "learning_rate": 9.308131880826407e-06, "loss": 0.565, "step": 16258 }, { "epoch": 0.19439495928932676, "grad_norm": 2.0318472385406494, "learning_rate": 9.308033608565478e-06, "loss": 0.5394, "step": 16259 }, { "epoch": 0.1944069154341874, "grad_norm": 2.4088616371154785, "learning_rate": 9.307935329844637e-06, "loss": 0.5688, "step": 16260 }, { "epoch": 0.19441887157904805, "grad_norm": 2.4361441135406494, "learning_rate": 9.307837044664033e-06, "loss": 0.5483, "step": 16261 }, { "epoch": 0.1944308277239087, "grad_norm": 3.63716983795166, "learning_rate": 9.307738753023811e-06, "loss": 0.6456, "step": 16262 }, { "epoch": 0.19444278386876934, "grad_norm": 3.770292043685913, "learning_rate": 9.30764045492412e-06, "loss": 0.5772, "step": 16263 }, { "epoch": 0.19445474001363, "grad_norm": 2.592595338821411, "learning_rate": 9.307542150365106e-06, "loss": 0.5826, "step": 16264 }, { "epoch": 0.19446669615849066, "grad_norm": 2.6212029457092285, "learning_rate": 9.307443839346919e-06, "loss": 0.6587, "step": 16265 }, { "epoch": 0.19447865230335132, "grad_norm": 3.8931331634521484, "learning_rate": 9.307345521869702e-06, "loss": 0.5787, "step": 16266 }, { "epoch": 0.19449060844821195, "grad_norm": 22.452125549316406, "learning_rate": 9.307247197933607e-06, "loss": 0.572, "step": 16267 }, { "epoch": 0.1945025645930726, "grad_norm": 5.580594062805176, "learning_rate": 9.307148867538779e-06, "loss": 0.5595, "step": 16268 }, { "epoch": 0.19451452073793327, "grad_norm": 1.9314360618591309, "learning_rate": 9.307050530685365e-06, "loss": 0.7425, "step": 16269 }, { "epoch": 0.19452647688279393, "grad_norm": 3.3767282962799072, "learning_rate": 9.306952187373515e-06, "loss": 0.6593, "step": 16270 }, { "epoch": 0.19453843302765456, "grad_norm": 2.0321919918060303, "learning_rate": 9.306853837603374e-06, "loss": 0.657, "step": 16271 }, { "epoch": 0.19455038917251521, "grad_norm": 2.0155718326568604, "learning_rate": 9.306755481375091e-06, "loss": 0.6262, "step": 16272 }, { "epoch": 0.19456234531737587, "grad_norm": 2.5487825870513916, "learning_rate": 9.306657118688812e-06, "loss": 0.6223, "step": 16273 }, { "epoch": 0.1945743014622365, "grad_norm": 2.3009655475616455, "learning_rate": 9.306558749544685e-06, "loss": 0.6405, "step": 16274 }, { "epoch": 0.19458625760709716, "grad_norm": 6.150939464569092, "learning_rate": 9.30646037394286e-06, "loss": 0.6309, "step": 16275 }, { "epoch": 0.19459821375195782, "grad_norm": 2.733989953994751, "learning_rate": 9.30636199188348e-06, "loss": 0.6306, "step": 16276 }, { "epoch": 0.19461016989681848, "grad_norm": 12.588835716247559, "learning_rate": 9.306263603366696e-06, "loss": 0.7124, "step": 16277 }, { "epoch": 0.1946221260416791, "grad_norm": 2.875009298324585, "learning_rate": 9.306165208392655e-06, "loss": 0.6133, "step": 16278 }, { "epoch": 0.19463408218653977, "grad_norm": 5.1695756912231445, "learning_rate": 9.306066806961502e-06, "loss": 0.6203, "step": 16279 }, { "epoch": 0.19464603833140043, "grad_norm": 4.317121982574463, "learning_rate": 9.30596839907339e-06, "loss": 0.6414, "step": 16280 }, { "epoch": 0.1946579944762611, "grad_norm": 3.716170310974121, "learning_rate": 9.30586998472846e-06, "loss": 0.6658, "step": 16281 }, { "epoch": 0.19466995062112172, "grad_norm": 1.8284473419189453, "learning_rate": 9.305771563926863e-06, "loss": 0.6012, "step": 16282 }, { "epoch": 0.19468190676598238, "grad_norm": 2.0404138565063477, "learning_rate": 9.305673136668747e-06, "loss": 0.6941, "step": 16283 }, { "epoch": 0.19469386291084304, "grad_norm": 3.1457979679107666, "learning_rate": 9.305574702954259e-06, "loss": 0.644, "step": 16284 }, { "epoch": 0.19470581905570367, "grad_norm": 2.5139780044555664, "learning_rate": 9.305476262783546e-06, "loss": 0.6045, "step": 16285 }, { "epoch": 0.19471777520056432, "grad_norm": 2.047976016998291, "learning_rate": 9.305377816156756e-06, "loss": 0.602, "step": 16286 }, { "epoch": 0.19472973134542498, "grad_norm": 2.7127041816711426, "learning_rate": 9.305279363074035e-06, "loss": 0.7204, "step": 16287 }, { "epoch": 0.19474168749028564, "grad_norm": 2.809640884399414, "learning_rate": 9.305180903535536e-06, "loss": 0.6343, "step": 16288 }, { "epoch": 0.19475364363514627, "grad_norm": 2.625492811203003, "learning_rate": 9.305082437541399e-06, "loss": 0.5903, "step": 16289 }, { "epoch": 0.19476559978000693, "grad_norm": 1.8533920049667358, "learning_rate": 9.304983965091778e-06, "loss": 0.5712, "step": 16290 }, { "epoch": 0.1947775559248676, "grad_norm": 2.5082452297210693, "learning_rate": 9.304885486186819e-06, "loss": 0.5381, "step": 16291 }, { "epoch": 0.19478951206972825, "grad_norm": 2.085047721862793, "learning_rate": 9.304787000826667e-06, "loss": 0.5201, "step": 16292 }, { "epoch": 0.19480146821458888, "grad_norm": 4.185708522796631, "learning_rate": 9.304688509011473e-06, "loss": 0.6777, "step": 16293 }, { "epoch": 0.19481342435944954, "grad_norm": 2.2046918869018555, "learning_rate": 9.30459001074138e-06, "loss": 0.5947, "step": 16294 }, { "epoch": 0.1948253805043102, "grad_norm": 2.1862006187438965, "learning_rate": 9.304491506016543e-06, "loss": 0.6952, "step": 16295 }, { "epoch": 0.19483733664917083, "grad_norm": 2.7275524139404297, "learning_rate": 9.304392994837103e-06, "loss": 0.6153, "step": 16296 }, { "epoch": 0.1948492927940315, "grad_norm": 1.74124014377594, "learning_rate": 9.304294477203213e-06, "loss": 0.6396, "step": 16297 }, { "epoch": 0.19486124893889215, "grad_norm": 3.284273862838745, "learning_rate": 9.304195953115016e-06, "loss": 0.6374, "step": 16298 }, { "epoch": 0.1948732050837528, "grad_norm": 4.653020858764648, "learning_rate": 9.304097422572663e-06, "loss": 0.6319, "step": 16299 }, { "epoch": 0.19488516122861343, "grad_norm": 2.14933180809021, "learning_rate": 9.3039988855763e-06, "loss": 0.7611, "step": 16300 }, { "epoch": 0.1948971173734741, "grad_norm": 5.5283122062683105, "learning_rate": 9.303900342126075e-06, "loss": 0.7156, "step": 16301 }, { "epoch": 0.19490907351833475, "grad_norm": 8.021360397338867, "learning_rate": 9.303801792222138e-06, "loss": 0.6278, "step": 16302 }, { "epoch": 0.1949210296631954, "grad_norm": 1.9908896684646606, "learning_rate": 9.303703235864632e-06, "loss": 0.6278, "step": 16303 }, { "epoch": 0.19493298580805604, "grad_norm": 2.8116581439971924, "learning_rate": 9.30360467305371e-06, "loss": 0.5907, "step": 16304 }, { "epoch": 0.1949449419529167, "grad_norm": 4.752719879150391, "learning_rate": 9.303506103789517e-06, "loss": 0.5589, "step": 16305 }, { "epoch": 0.19495689809777736, "grad_norm": 4.009875774383545, "learning_rate": 9.303407528072202e-06, "loss": 0.5734, "step": 16306 }, { "epoch": 0.194968854242638, "grad_norm": 2.491189479827881, "learning_rate": 9.30330894590191e-06, "loss": 0.5853, "step": 16307 }, { "epoch": 0.19498081038749865, "grad_norm": 3.8449625968933105, "learning_rate": 9.303210357278794e-06, "loss": 0.5929, "step": 16308 }, { "epoch": 0.1949927665323593, "grad_norm": 5.162853717803955, "learning_rate": 9.303111762202997e-06, "loss": 0.6458, "step": 16309 }, { "epoch": 0.19500472267721997, "grad_norm": 4.672640800476074, "learning_rate": 9.303013160674667e-06, "loss": 0.5827, "step": 16310 }, { "epoch": 0.1950166788220806, "grad_norm": 3.2065186500549316, "learning_rate": 9.302914552693955e-06, "loss": 0.6129, "step": 16311 }, { "epoch": 0.19502863496694126, "grad_norm": 4.432564735412598, "learning_rate": 9.302815938261007e-06, "loss": 0.6367, "step": 16312 }, { "epoch": 0.19504059111180191, "grad_norm": 2.7168288230895996, "learning_rate": 9.302717317375972e-06, "loss": 0.5638, "step": 16313 }, { "epoch": 0.19505254725666257, "grad_norm": 3.3013787269592285, "learning_rate": 9.302618690038998e-06, "loss": 0.6061, "step": 16314 }, { "epoch": 0.1950645034015232, "grad_norm": 2.256429672241211, "learning_rate": 9.30252005625023e-06, "loss": 0.5632, "step": 16315 }, { "epoch": 0.19507645954638386, "grad_norm": 3.466740369796753, "learning_rate": 9.302421416009819e-06, "loss": 0.66, "step": 16316 }, { "epoch": 0.19508841569124452, "grad_norm": 3.2300899028778076, "learning_rate": 9.30232276931791e-06, "loss": 0.5798, "step": 16317 }, { "epoch": 0.19510037183610518, "grad_norm": 1.4834877252578735, "learning_rate": 9.302224116174654e-06, "loss": 0.557, "step": 16318 }, { "epoch": 0.1951123279809658, "grad_norm": 6.595160961151123, "learning_rate": 9.302125456580198e-06, "loss": 0.6591, "step": 16319 }, { "epoch": 0.19512428412582647, "grad_norm": 2.100800037384033, "learning_rate": 9.302026790534689e-06, "loss": 0.5173, "step": 16320 }, { "epoch": 0.19513624027068713, "grad_norm": 1.8751599788665771, "learning_rate": 9.301928118038278e-06, "loss": 0.616, "step": 16321 }, { "epoch": 0.19514819641554776, "grad_norm": 4.491210460662842, "learning_rate": 9.301829439091106e-06, "loss": 0.6287, "step": 16322 }, { "epoch": 0.19516015256040842, "grad_norm": 6.519400596618652, "learning_rate": 9.301730753693328e-06, "loss": 0.6022, "step": 16323 }, { "epoch": 0.19517210870526908, "grad_norm": 11.2485990524292, "learning_rate": 9.30163206184509e-06, "loss": 0.6651, "step": 16324 }, { "epoch": 0.19518406485012973, "grad_norm": 3.0098540782928467, "learning_rate": 9.301533363546538e-06, "loss": 0.6146, "step": 16325 }, { "epoch": 0.19519602099499037, "grad_norm": 6.021282196044922, "learning_rate": 9.301434658797822e-06, "loss": 0.5733, "step": 16326 }, { "epoch": 0.19520797713985102, "grad_norm": 2.1978657245635986, "learning_rate": 9.30133594759909e-06, "loss": 0.6692, "step": 16327 }, { "epoch": 0.19521993328471168, "grad_norm": 3.7076542377471924, "learning_rate": 9.301237229950488e-06, "loss": 0.5456, "step": 16328 }, { "epoch": 0.19523188942957234, "grad_norm": 4.381396293640137, "learning_rate": 9.301138505852168e-06, "loss": 0.6243, "step": 16329 }, { "epoch": 0.19524384557443297, "grad_norm": 2.081817388534546, "learning_rate": 9.301039775304272e-06, "loss": 0.6083, "step": 16330 }, { "epoch": 0.19525580171929363, "grad_norm": 3.08333683013916, "learning_rate": 9.300941038306955e-06, "loss": 0.6003, "step": 16331 }, { "epoch": 0.1952677578641543, "grad_norm": 4.102783679962158, "learning_rate": 9.300842294860359e-06, "loss": 0.5148, "step": 16332 }, { "epoch": 0.19527971400901492, "grad_norm": 2.760582208633423, "learning_rate": 9.300743544964637e-06, "loss": 0.5836, "step": 16333 }, { "epoch": 0.19529167015387558, "grad_norm": 2.265747547149658, "learning_rate": 9.300644788619933e-06, "loss": 0.6605, "step": 16334 }, { "epoch": 0.19530362629873624, "grad_norm": 1.8344630002975464, "learning_rate": 9.300546025826398e-06, "loss": 0.586, "step": 16335 }, { "epoch": 0.1953155824435969, "grad_norm": 4.664031982421875, "learning_rate": 9.30044725658418e-06, "loss": 0.5844, "step": 16336 }, { "epoch": 0.19532753858845753, "grad_norm": 4.745049953460693, "learning_rate": 9.300348480893423e-06, "loss": 0.634, "step": 16337 }, { "epoch": 0.1953394947333182, "grad_norm": 2.447495937347412, "learning_rate": 9.30024969875428e-06, "loss": 0.6264, "step": 16338 }, { "epoch": 0.19535145087817885, "grad_norm": 2.48541259765625, "learning_rate": 9.300150910166896e-06, "loss": 0.7046, "step": 16339 }, { "epoch": 0.1953634070230395, "grad_norm": 2.3495357036590576, "learning_rate": 9.300052115131422e-06, "loss": 0.5939, "step": 16340 }, { "epoch": 0.19537536316790013, "grad_norm": 2.5942342281341553, "learning_rate": 9.299953313648005e-06, "loss": 0.5875, "step": 16341 }, { "epoch": 0.1953873193127608, "grad_norm": 2.020158052444458, "learning_rate": 9.29985450571679e-06, "loss": 0.6385, "step": 16342 }, { "epoch": 0.19539927545762145, "grad_norm": 2.667614698410034, "learning_rate": 9.299755691337931e-06, "loss": 0.5397, "step": 16343 }, { "epoch": 0.19541123160248208, "grad_norm": 2.0487098693847656, "learning_rate": 9.29965687051157e-06, "loss": 0.6825, "step": 16344 }, { "epoch": 0.19542318774734274, "grad_norm": 1.8578252792358398, "learning_rate": 9.29955804323786e-06, "loss": 0.5639, "step": 16345 }, { "epoch": 0.1954351438922034, "grad_norm": 6.177614688873291, "learning_rate": 9.299459209516948e-06, "loss": 0.5764, "step": 16346 }, { "epoch": 0.19544710003706406, "grad_norm": 8.207805633544922, "learning_rate": 9.299360369348981e-06, "loss": 0.6079, "step": 16347 }, { "epoch": 0.1954590561819247, "grad_norm": 9.395002365112305, "learning_rate": 9.299261522734107e-06, "loss": 0.6366, "step": 16348 }, { "epoch": 0.19547101232678535, "grad_norm": 6.089201927185059, "learning_rate": 9.299162669672476e-06, "loss": 0.6081, "step": 16349 }, { "epoch": 0.195482968471646, "grad_norm": 2.9010169506073, "learning_rate": 9.299063810164235e-06, "loss": 0.6092, "step": 16350 }, { "epoch": 0.19549492461650667, "grad_norm": 2.7769370079040527, "learning_rate": 9.298964944209532e-06, "loss": 0.5886, "step": 16351 }, { "epoch": 0.1955068807613673, "grad_norm": 6.938047409057617, "learning_rate": 9.298866071808518e-06, "loss": 0.6468, "step": 16352 }, { "epoch": 0.19551883690622796, "grad_norm": 2.840313673019409, "learning_rate": 9.298767192961337e-06, "loss": 0.571, "step": 16353 }, { "epoch": 0.1955307930510886, "grad_norm": 3.4370858669281006, "learning_rate": 9.298668307668139e-06, "loss": 0.6258, "step": 16354 }, { "epoch": 0.19554274919594924, "grad_norm": 3.2026188373565674, "learning_rate": 9.298569415929074e-06, "loss": 0.6665, "step": 16355 }, { "epoch": 0.1955547053408099, "grad_norm": 2.3125762939453125, "learning_rate": 9.298470517744288e-06, "loss": 0.6925, "step": 16356 }, { "epoch": 0.19556666148567056, "grad_norm": 9.160743713378906, "learning_rate": 9.29837161311393e-06, "loss": 0.591, "step": 16357 }, { "epoch": 0.19557861763053122, "grad_norm": 2.6796109676361084, "learning_rate": 9.29827270203815e-06, "loss": 0.6758, "step": 16358 }, { "epoch": 0.19559057377539185, "grad_norm": 2.0699098110198975, "learning_rate": 9.298173784517092e-06, "loss": 0.6247, "step": 16359 }, { "epoch": 0.1956025299202525, "grad_norm": 3.7969651222229004, "learning_rate": 9.29807486055091e-06, "loss": 0.6043, "step": 16360 }, { "epoch": 0.19561448606511317, "grad_norm": 2.505934953689575, "learning_rate": 9.297975930139749e-06, "loss": 0.6127, "step": 16361 }, { "epoch": 0.19562644220997383, "grad_norm": 34.746517181396484, "learning_rate": 9.297876993283757e-06, "loss": 0.6581, "step": 16362 }, { "epoch": 0.19563839835483446, "grad_norm": 2.3370375633239746, "learning_rate": 9.297778049983085e-06, "loss": 0.5417, "step": 16363 }, { "epoch": 0.19565035449969512, "grad_norm": 1.9258177280426025, "learning_rate": 9.297679100237876e-06, "loss": 0.5681, "step": 16364 }, { "epoch": 0.19566231064455578, "grad_norm": 2.043102741241455, "learning_rate": 9.297580144048284e-06, "loss": 0.5936, "step": 16365 }, { "epoch": 0.1956742667894164, "grad_norm": 2.292118787765503, "learning_rate": 9.297481181414455e-06, "loss": 0.6536, "step": 16366 }, { "epoch": 0.19568622293427707, "grad_norm": 1.9936591386795044, "learning_rate": 9.297382212336538e-06, "loss": 0.5145, "step": 16367 }, { "epoch": 0.19569817907913772, "grad_norm": 2.2735557556152344, "learning_rate": 9.297283236814681e-06, "loss": 0.5956, "step": 16368 }, { "epoch": 0.19571013522399838, "grad_norm": 1.6793636083602905, "learning_rate": 9.297184254849031e-06, "loss": 0.5682, "step": 16369 }, { "epoch": 0.195722091368859, "grad_norm": 6.3451433181762695, "learning_rate": 9.29708526643974e-06, "loss": 0.6142, "step": 16370 }, { "epoch": 0.19573404751371967, "grad_norm": 2.330148696899414, "learning_rate": 9.296986271586955e-06, "loss": 0.7525, "step": 16371 }, { "epoch": 0.19574600365858033, "grad_norm": 1.9941850900650024, "learning_rate": 9.29688727029082e-06, "loss": 0.6312, "step": 16372 }, { "epoch": 0.195757959803441, "grad_norm": 2.1148433685302734, "learning_rate": 9.29678826255149e-06, "loss": 0.5992, "step": 16373 }, { "epoch": 0.19576991594830162, "grad_norm": 1.697105050086975, "learning_rate": 9.29668924836911e-06, "loss": 0.6131, "step": 16374 }, { "epoch": 0.19578187209316228, "grad_norm": 3.702918529510498, "learning_rate": 9.296590227743832e-06, "loss": 0.6137, "step": 16375 }, { "epoch": 0.19579382823802294, "grad_norm": 5.168533802032471, "learning_rate": 9.296491200675799e-06, "loss": 0.5803, "step": 16376 }, { "epoch": 0.1958057843828836, "grad_norm": 3.0881271362304688, "learning_rate": 9.296392167165162e-06, "loss": 0.5726, "step": 16377 }, { "epoch": 0.19581774052774423, "grad_norm": 2.813750982284546, "learning_rate": 9.29629312721207e-06, "loss": 0.601, "step": 16378 }, { "epoch": 0.19582969667260489, "grad_norm": 2.2352852821350098, "learning_rate": 9.296194080816672e-06, "loss": 0.541, "step": 16379 }, { "epoch": 0.19584165281746554, "grad_norm": 2.7117583751678467, "learning_rate": 9.296095027979115e-06, "loss": 0.6352, "step": 16380 }, { "epoch": 0.19585360896232618, "grad_norm": 2.5141284465789795, "learning_rate": 9.295995968699547e-06, "loss": 0.6168, "step": 16381 }, { "epoch": 0.19586556510718683, "grad_norm": 4.878518581390381, "learning_rate": 9.295896902978119e-06, "loss": 0.6292, "step": 16382 }, { "epoch": 0.1958775212520475, "grad_norm": 36.327884674072266, "learning_rate": 9.295797830814979e-06, "loss": 0.6243, "step": 16383 }, { "epoch": 0.19588947739690815, "grad_norm": 2.6884405612945557, "learning_rate": 9.295698752210275e-06, "loss": 0.5745, "step": 16384 }, { "epoch": 0.19590143354176878, "grad_norm": 4.680400848388672, "learning_rate": 9.295599667164154e-06, "loss": 0.6685, "step": 16385 }, { "epoch": 0.19591338968662944, "grad_norm": 4.5817790031433105, "learning_rate": 9.295500575676769e-06, "loss": 0.5626, "step": 16386 }, { "epoch": 0.1959253458314901, "grad_norm": 3.920283555984497, "learning_rate": 9.295401477748261e-06, "loss": 0.6257, "step": 16387 }, { "epoch": 0.19593730197635076, "grad_norm": 2.692598342895508, "learning_rate": 9.295302373378787e-06, "loss": 0.6078, "step": 16388 }, { "epoch": 0.1959492581212114, "grad_norm": 1.7128101587295532, "learning_rate": 9.295203262568491e-06, "loss": 0.5988, "step": 16389 }, { "epoch": 0.19596121426607205, "grad_norm": 11.287812232971191, "learning_rate": 9.29510414531752e-06, "loss": 0.5312, "step": 16390 }, { "epoch": 0.1959731704109327, "grad_norm": 1.794384241104126, "learning_rate": 9.295005021626027e-06, "loss": 0.6387, "step": 16391 }, { "epoch": 0.19598512655579334, "grad_norm": 3.123789072036743, "learning_rate": 9.294905891494159e-06, "loss": 0.6159, "step": 16392 }, { "epoch": 0.195997082700654, "grad_norm": 2.3208415508270264, "learning_rate": 9.294806754922064e-06, "loss": 0.6065, "step": 16393 }, { "epoch": 0.19600903884551465, "grad_norm": 1.7298513650894165, "learning_rate": 9.294707611909893e-06, "loss": 0.522, "step": 16394 }, { "epoch": 0.1960209949903753, "grad_norm": 2.7063827514648438, "learning_rate": 9.29460846245779e-06, "loss": 0.6369, "step": 16395 }, { "epoch": 0.19603295113523594, "grad_norm": 1.8497778177261353, "learning_rate": 9.294509306565906e-06, "loss": 0.5901, "step": 16396 }, { "epoch": 0.1960449072800966, "grad_norm": 3.763537645339966, "learning_rate": 9.29441014423439e-06, "loss": 0.6485, "step": 16397 }, { "epoch": 0.19605686342495726, "grad_norm": 4.770641326904297, "learning_rate": 9.294310975463394e-06, "loss": 0.6544, "step": 16398 }, { "epoch": 0.19606881956981792, "grad_norm": 1.8946012258529663, "learning_rate": 9.29421180025306e-06, "loss": 0.6692, "step": 16399 }, { "epoch": 0.19608077571467855, "grad_norm": 1.775235652923584, "learning_rate": 9.294112618603541e-06, "loss": 0.5477, "step": 16400 }, { "epoch": 0.1960927318595392, "grad_norm": 2.9616355895996094, "learning_rate": 9.294013430514985e-06, "loss": 0.6194, "step": 16401 }, { "epoch": 0.19610468800439987, "grad_norm": 3.211759567260742, "learning_rate": 9.293914235987542e-06, "loss": 0.6898, "step": 16402 }, { "epoch": 0.1961166441492605, "grad_norm": 3.0686540603637695, "learning_rate": 9.293815035021358e-06, "loss": 0.603, "step": 16403 }, { "epoch": 0.19612860029412116, "grad_norm": 2.4957053661346436, "learning_rate": 9.293715827616584e-06, "loss": 0.6048, "step": 16404 }, { "epoch": 0.19614055643898182, "grad_norm": 4.969326496124268, "learning_rate": 9.293616613773366e-06, "loss": 0.6083, "step": 16405 }, { "epoch": 0.19615251258384248, "grad_norm": 5.6580634117126465, "learning_rate": 9.293517393491856e-06, "loss": 0.7378, "step": 16406 }, { "epoch": 0.1961644687287031, "grad_norm": 1.898268461227417, "learning_rate": 9.2934181667722e-06, "loss": 0.7059, "step": 16407 }, { "epoch": 0.19617642487356377, "grad_norm": 2.0963337421417236, "learning_rate": 9.29331893361455e-06, "loss": 0.6902, "step": 16408 }, { "epoch": 0.19618838101842442, "grad_norm": 2.7218072414398193, "learning_rate": 9.293219694019054e-06, "loss": 0.6841, "step": 16409 }, { "epoch": 0.19620033716328508, "grad_norm": 1.7088030576705933, "learning_rate": 9.293120447985857e-06, "loss": 0.6651, "step": 16410 }, { "epoch": 0.1962122933081457, "grad_norm": 2.747338056564331, "learning_rate": 9.293021195515111e-06, "loss": 0.6627, "step": 16411 }, { "epoch": 0.19622424945300637, "grad_norm": 2.5156455039978027, "learning_rate": 9.292921936606967e-06, "loss": 0.5913, "step": 16412 }, { "epoch": 0.19623620559786703, "grad_norm": 3.0157206058502197, "learning_rate": 9.292822671261568e-06, "loss": 0.764, "step": 16413 }, { "epoch": 0.19624816174272766, "grad_norm": 6.655264854431152, "learning_rate": 9.292723399479067e-06, "loss": 0.6944, "step": 16414 }, { "epoch": 0.19626011788758832, "grad_norm": 2.0261969566345215, "learning_rate": 9.292624121259614e-06, "loss": 0.6867, "step": 16415 }, { "epoch": 0.19627207403244898, "grad_norm": 2.555213689804077, "learning_rate": 9.292524836603355e-06, "loss": 0.6895, "step": 16416 }, { "epoch": 0.19628403017730964, "grad_norm": 2.129188299179077, "learning_rate": 9.292425545510439e-06, "loss": 0.6797, "step": 16417 }, { "epoch": 0.19629598632217027, "grad_norm": 18.75373077392578, "learning_rate": 9.292326247981017e-06, "loss": 0.7259, "step": 16418 }, { "epoch": 0.19630794246703093, "grad_norm": 2.056215286254883, "learning_rate": 9.292226944015235e-06, "loss": 0.6712, "step": 16419 }, { "epoch": 0.19631989861189159, "grad_norm": 4.136684894561768, "learning_rate": 9.292127633613244e-06, "loss": 0.5859, "step": 16420 }, { "epoch": 0.19633185475675224, "grad_norm": 2.061142921447754, "learning_rate": 9.292028316775191e-06, "loss": 0.6257, "step": 16421 }, { "epoch": 0.19634381090161288, "grad_norm": 3.709064483642578, "learning_rate": 9.291928993501227e-06, "loss": 0.6302, "step": 16422 }, { "epoch": 0.19635576704647353, "grad_norm": 5.782601356506348, "learning_rate": 9.2918296637915e-06, "loss": 0.4671, "step": 16423 }, { "epoch": 0.1963677231913342, "grad_norm": 3.9691524505615234, "learning_rate": 9.29173032764616e-06, "loss": 0.5786, "step": 16424 }, { "epoch": 0.19637967933619485, "grad_norm": 1.5465534925460815, "learning_rate": 9.291630985065356e-06, "loss": 0.5565, "step": 16425 }, { "epoch": 0.19639163548105548, "grad_norm": 2.662005662918091, "learning_rate": 9.291531636049236e-06, "loss": 0.6811, "step": 16426 }, { "epoch": 0.19640359162591614, "grad_norm": 1.9233580827713013, "learning_rate": 9.291432280597948e-06, "loss": 0.6297, "step": 16427 }, { "epoch": 0.1964155477707768, "grad_norm": 2.2721126079559326, "learning_rate": 9.291332918711641e-06, "loss": 0.619, "step": 16428 }, { "epoch": 0.19642750391563743, "grad_norm": 4.221245765686035, "learning_rate": 9.291233550390468e-06, "loss": 0.5357, "step": 16429 }, { "epoch": 0.1964394600604981, "grad_norm": 4.136621952056885, "learning_rate": 9.291134175634573e-06, "loss": 0.5549, "step": 16430 }, { "epoch": 0.19645141620535875, "grad_norm": 2.522453784942627, "learning_rate": 9.291034794444106e-06, "loss": 0.6473, "step": 16431 }, { "epoch": 0.1964633723502194, "grad_norm": 2.6409192085266113, "learning_rate": 9.29093540681922e-06, "loss": 0.5635, "step": 16432 }, { "epoch": 0.19647532849508004, "grad_norm": 10.094231605529785, "learning_rate": 9.290836012760058e-06, "loss": 0.6154, "step": 16433 }, { "epoch": 0.1964872846399407, "grad_norm": 1.996984839439392, "learning_rate": 9.290736612266776e-06, "loss": 0.5686, "step": 16434 }, { "epoch": 0.19649924078480135, "grad_norm": 2.81296968460083, "learning_rate": 9.290637205339516e-06, "loss": 0.5645, "step": 16435 }, { "epoch": 0.196511196929662, "grad_norm": 2.4937081336975098, "learning_rate": 9.290537791978431e-06, "loss": 0.641, "step": 16436 }, { "epoch": 0.19652315307452264, "grad_norm": 2.0804805755615234, "learning_rate": 9.290438372183669e-06, "loss": 0.7067, "step": 16437 }, { "epoch": 0.1965351092193833, "grad_norm": 2.979607343673706, "learning_rate": 9.29033894595538e-06, "loss": 0.6304, "step": 16438 }, { "epoch": 0.19654706536424396, "grad_norm": 2.3648884296417236, "learning_rate": 9.290239513293712e-06, "loss": 0.5778, "step": 16439 }, { "epoch": 0.1965590215091046, "grad_norm": 1.9336501359939575, "learning_rate": 9.290140074198814e-06, "loss": 0.5565, "step": 16440 }, { "epoch": 0.19657097765396525, "grad_norm": 3.8442747592926025, "learning_rate": 9.290040628670837e-06, "loss": 0.5777, "step": 16441 }, { "epoch": 0.1965829337988259, "grad_norm": 2.1696016788482666, "learning_rate": 9.289941176709929e-06, "loss": 0.6207, "step": 16442 }, { "epoch": 0.19659488994368657, "grad_norm": 2.038640022277832, "learning_rate": 9.28984171831624e-06, "loss": 0.5517, "step": 16443 }, { "epoch": 0.1966068460885472, "grad_norm": 3.5915775299072266, "learning_rate": 9.289742253489915e-06, "loss": 0.7913, "step": 16444 }, { "epoch": 0.19661880223340786, "grad_norm": 3.14811372756958, "learning_rate": 9.289642782231108e-06, "loss": 0.6304, "step": 16445 }, { "epoch": 0.19663075837826852, "grad_norm": 16.977569580078125, "learning_rate": 9.289543304539967e-06, "loss": 0.6913, "step": 16446 }, { "epoch": 0.19664271452312918, "grad_norm": 1.5204803943634033, "learning_rate": 9.28944382041664e-06, "loss": 0.6505, "step": 16447 }, { "epoch": 0.1966546706679898, "grad_norm": 8.171526908874512, "learning_rate": 9.289344329861277e-06, "loss": 0.6763, "step": 16448 }, { "epoch": 0.19666662681285046, "grad_norm": 2.406100034713745, "learning_rate": 9.289244832874025e-06, "loss": 0.6501, "step": 16449 }, { "epoch": 0.19667858295771112, "grad_norm": 2.366490125656128, "learning_rate": 9.289145329455036e-06, "loss": 0.6497, "step": 16450 }, { "epoch": 0.19669053910257175, "grad_norm": 1.7341957092285156, "learning_rate": 9.28904581960446e-06, "loss": 0.5599, "step": 16451 }, { "epoch": 0.1967024952474324, "grad_norm": 25.66960334777832, "learning_rate": 9.288946303322443e-06, "loss": 0.6819, "step": 16452 }, { "epoch": 0.19671445139229307, "grad_norm": 2.4276413917541504, "learning_rate": 9.288846780609136e-06, "loss": 0.6669, "step": 16453 }, { "epoch": 0.19672640753715373, "grad_norm": 1.9116672277450562, "learning_rate": 9.28874725146469e-06, "loss": 0.6637, "step": 16454 }, { "epoch": 0.19673836368201436, "grad_norm": 2.9336869716644287, "learning_rate": 9.28864771588925e-06, "loss": 0.6503, "step": 16455 }, { "epoch": 0.19675031982687502, "grad_norm": 2.1655831336975098, "learning_rate": 9.288548173882968e-06, "loss": 0.5943, "step": 16456 }, { "epoch": 0.19676227597173568, "grad_norm": 5.022226810455322, "learning_rate": 9.288448625445991e-06, "loss": 0.617, "step": 16457 }, { "epoch": 0.19677423211659634, "grad_norm": 2.6153452396392822, "learning_rate": 9.288349070578473e-06, "loss": 0.6973, "step": 16458 }, { "epoch": 0.19678618826145697, "grad_norm": 1.945609211921692, "learning_rate": 9.28824950928056e-06, "loss": 0.6312, "step": 16459 }, { "epoch": 0.19679814440631763, "grad_norm": 2.301875352859497, "learning_rate": 9.288149941552401e-06, "loss": 0.6121, "step": 16460 }, { "epoch": 0.19681010055117829, "grad_norm": 3.6974072456359863, "learning_rate": 9.288050367394144e-06, "loss": 0.7197, "step": 16461 }, { "epoch": 0.19682205669603892, "grad_norm": 4.304633140563965, "learning_rate": 9.287950786805942e-06, "loss": 0.6513, "step": 16462 }, { "epoch": 0.19683401284089957, "grad_norm": 3.4828450679779053, "learning_rate": 9.287851199787942e-06, "loss": 0.6079, "step": 16463 }, { "epoch": 0.19684596898576023, "grad_norm": 1.9318246841430664, "learning_rate": 9.287751606340296e-06, "loss": 0.6461, "step": 16464 }, { "epoch": 0.1968579251306209, "grad_norm": 2.0943410396575928, "learning_rate": 9.287652006463149e-06, "loss": 0.5952, "step": 16465 }, { "epoch": 0.19686988127548152, "grad_norm": 1.6433907747268677, "learning_rate": 9.287552400156653e-06, "loss": 0.5481, "step": 16466 }, { "epoch": 0.19688183742034218, "grad_norm": 2.3447680473327637, "learning_rate": 9.287452787420956e-06, "loss": 0.5593, "step": 16467 }, { "epoch": 0.19689379356520284, "grad_norm": 2.196538209915161, "learning_rate": 9.28735316825621e-06, "loss": 0.6093, "step": 16468 }, { "epoch": 0.1969057497100635, "grad_norm": 1.7775377035140991, "learning_rate": 9.287253542662562e-06, "loss": 0.5775, "step": 16469 }, { "epoch": 0.19691770585492413, "grad_norm": 2.326226234436035, "learning_rate": 9.287153910640163e-06, "loss": 0.5916, "step": 16470 }, { "epoch": 0.1969296619997848, "grad_norm": 3.1884605884552, "learning_rate": 9.287054272189159e-06, "loss": 0.5668, "step": 16471 }, { "epoch": 0.19694161814464545, "grad_norm": 2.273202896118164, "learning_rate": 9.286954627309704e-06, "loss": 0.6836, "step": 16472 }, { "epoch": 0.19695357428950608, "grad_norm": 1.5063973665237427, "learning_rate": 9.286854976001945e-06, "loss": 0.5217, "step": 16473 }, { "epoch": 0.19696553043436674, "grad_norm": 1.7076148986816406, "learning_rate": 9.286755318266032e-06, "loss": 0.6823, "step": 16474 }, { "epoch": 0.1969774865792274, "grad_norm": 4.303567409515381, "learning_rate": 9.286655654102114e-06, "loss": 0.6112, "step": 16475 }, { "epoch": 0.19698944272408805, "grad_norm": 1.5342075824737549, "learning_rate": 9.286555983510338e-06, "loss": 0.5638, "step": 16476 }, { "epoch": 0.19700139886894869, "grad_norm": 1.970042109489441, "learning_rate": 9.28645630649086e-06, "loss": 0.6103, "step": 16477 }, { "epoch": 0.19701335501380934, "grad_norm": 3.6519582271575928, "learning_rate": 9.286356623043823e-06, "loss": 0.5865, "step": 16478 }, { "epoch": 0.19702531115867, "grad_norm": 3.74595046043396, "learning_rate": 9.28625693316938e-06, "loss": 0.5929, "step": 16479 }, { "epoch": 0.19703726730353066, "grad_norm": 1.9729987382888794, "learning_rate": 9.286157236867682e-06, "loss": 0.6479, "step": 16480 }, { "epoch": 0.1970492234483913, "grad_norm": 2.474567174911499, "learning_rate": 9.286057534138871e-06, "loss": 0.5667, "step": 16481 }, { "epoch": 0.19706117959325195, "grad_norm": 2.3172430992126465, "learning_rate": 9.285957824983105e-06, "loss": 0.6692, "step": 16482 }, { "epoch": 0.1970731357381126, "grad_norm": 4.330440521240234, "learning_rate": 9.285858109400528e-06, "loss": 0.5111, "step": 16483 }, { "epoch": 0.19708509188297327, "grad_norm": 2.1751532554626465, "learning_rate": 9.285758387391293e-06, "loss": 0.6182, "step": 16484 }, { "epoch": 0.1970970480278339, "grad_norm": 16.85274314880371, "learning_rate": 9.285658658955548e-06, "loss": 0.5727, "step": 16485 }, { "epoch": 0.19710900417269456, "grad_norm": 8.594215393066406, "learning_rate": 9.285558924093441e-06, "loss": 0.5566, "step": 16486 }, { "epoch": 0.19712096031755522, "grad_norm": 2.9825806617736816, "learning_rate": 9.285459182805125e-06, "loss": 0.5806, "step": 16487 }, { "epoch": 0.19713291646241585, "grad_norm": 4.052984714508057, "learning_rate": 9.285359435090746e-06, "loss": 0.6331, "step": 16488 }, { "epoch": 0.1971448726072765, "grad_norm": 18.345489501953125, "learning_rate": 9.285259680950457e-06, "loss": 0.7633, "step": 16489 }, { "epoch": 0.19715682875213716, "grad_norm": 3.1346049308776855, "learning_rate": 9.285159920384406e-06, "loss": 0.5986, "step": 16490 }, { "epoch": 0.19716878489699782, "grad_norm": 3.896044969558716, "learning_rate": 9.285060153392741e-06, "loss": 0.5602, "step": 16491 }, { "epoch": 0.19718074104185845, "grad_norm": 11.880000114440918, "learning_rate": 9.284960379975615e-06, "loss": 0.7221, "step": 16492 }, { "epoch": 0.1971926971867191, "grad_norm": 2.2203614711761475, "learning_rate": 9.284860600133172e-06, "loss": 0.5247, "step": 16493 }, { "epoch": 0.19720465333157977, "grad_norm": 2.4096240997314453, "learning_rate": 9.284760813865568e-06, "loss": 0.6152, "step": 16494 }, { "epoch": 0.19721660947644043, "grad_norm": 2.8203723430633545, "learning_rate": 9.28466102117295e-06, "loss": 0.6313, "step": 16495 }, { "epoch": 0.19722856562130106, "grad_norm": 2.870744466781616, "learning_rate": 9.284561222055466e-06, "loss": 0.7233, "step": 16496 }, { "epoch": 0.19724052176616172, "grad_norm": 5.38578462600708, "learning_rate": 9.284461416513269e-06, "loss": 0.6362, "step": 16497 }, { "epoch": 0.19725247791102238, "grad_norm": 3.9028701782226562, "learning_rate": 9.284361604546505e-06, "loss": 0.7601, "step": 16498 }, { "epoch": 0.197264434055883, "grad_norm": 2.2841150760650635, "learning_rate": 9.28426178615533e-06, "loss": 0.6547, "step": 16499 }, { "epoch": 0.19727639020074367, "grad_norm": 2.762302875518799, "learning_rate": 9.284161961339884e-06, "loss": 0.6363, "step": 16500 }, { "epoch": 0.19728834634560433, "grad_norm": 1.7332756519317627, "learning_rate": 9.284062130100325e-06, "loss": 0.6219, "step": 16501 }, { "epoch": 0.19730030249046498, "grad_norm": 3.3391549587249756, "learning_rate": 9.283962292436798e-06, "loss": 0.6415, "step": 16502 }, { "epoch": 0.19731225863532562, "grad_norm": 2.6821060180664062, "learning_rate": 9.283862448349454e-06, "loss": 0.7252, "step": 16503 }, { "epoch": 0.19732421478018627, "grad_norm": 1.96626615524292, "learning_rate": 9.283762597838445e-06, "loss": 0.6234, "step": 16504 }, { "epoch": 0.19733617092504693, "grad_norm": 2.122901201248169, "learning_rate": 9.283662740903917e-06, "loss": 0.5602, "step": 16505 }, { "epoch": 0.1973481270699076, "grad_norm": 2.5424611568450928, "learning_rate": 9.283562877546021e-06, "loss": 0.7003, "step": 16506 }, { "epoch": 0.19736008321476822, "grad_norm": 1.8749809265136719, "learning_rate": 9.283463007764911e-06, "loss": 0.5958, "step": 16507 }, { "epoch": 0.19737203935962888, "grad_norm": 4.352720737457275, "learning_rate": 9.283363131560729e-06, "loss": 0.6163, "step": 16508 }, { "epoch": 0.19738399550448954, "grad_norm": 6.086474418640137, "learning_rate": 9.283263248933629e-06, "loss": 0.6476, "step": 16509 }, { "epoch": 0.19739595164935017, "grad_norm": 8.692018508911133, "learning_rate": 9.283163359883762e-06, "loss": 0.6592, "step": 16510 }, { "epoch": 0.19740790779421083, "grad_norm": 8.700349807739258, "learning_rate": 9.283063464411277e-06, "loss": 0.6475, "step": 16511 }, { "epoch": 0.1974198639390715, "grad_norm": 2.9211232662200928, "learning_rate": 9.282963562516321e-06, "loss": 0.7061, "step": 16512 }, { "epoch": 0.19743182008393215, "grad_norm": 1.8523521423339844, "learning_rate": 9.282863654199049e-06, "loss": 0.6181, "step": 16513 }, { "epoch": 0.19744377622879278, "grad_norm": 1.8935770988464355, "learning_rate": 9.282763739459604e-06, "loss": 0.5753, "step": 16514 }, { "epoch": 0.19745573237365344, "grad_norm": 3.0126256942749023, "learning_rate": 9.282663818298141e-06, "loss": 0.6835, "step": 16515 }, { "epoch": 0.1974676885185141, "grad_norm": 3.158754348754883, "learning_rate": 9.28256389071481e-06, "loss": 0.6017, "step": 16516 }, { "epoch": 0.19747964466337475, "grad_norm": 3.819751501083374, "learning_rate": 9.282463956709757e-06, "loss": 0.7131, "step": 16517 }, { "epoch": 0.19749160080823538, "grad_norm": 2.5427069664001465, "learning_rate": 9.282364016283135e-06, "loss": 0.6775, "step": 16518 }, { "epoch": 0.19750355695309604, "grad_norm": 14.515524864196777, "learning_rate": 9.282264069435093e-06, "loss": 0.7157, "step": 16519 }, { "epoch": 0.1975155130979567, "grad_norm": 3.5637450218200684, "learning_rate": 9.28216411616578e-06, "loss": 0.6025, "step": 16520 }, { "epoch": 0.19752746924281733, "grad_norm": 2.399090528488159, "learning_rate": 9.282064156475348e-06, "loss": 0.6275, "step": 16521 }, { "epoch": 0.197539425387678, "grad_norm": 13.571422576904297, "learning_rate": 9.281964190363946e-06, "loss": 0.6305, "step": 16522 }, { "epoch": 0.19755138153253865, "grad_norm": 2.6223480701446533, "learning_rate": 9.281864217831722e-06, "loss": 0.5729, "step": 16523 }, { "epoch": 0.1975633376773993, "grad_norm": 3.7014846801757812, "learning_rate": 9.281764238878827e-06, "loss": 0.6613, "step": 16524 }, { "epoch": 0.19757529382225994, "grad_norm": 4.678719997406006, "learning_rate": 9.281664253505412e-06, "loss": 0.6202, "step": 16525 }, { "epoch": 0.1975872499671206, "grad_norm": 2.8817059993743896, "learning_rate": 9.281564261711626e-06, "loss": 0.5159, "step": 16526 }, { "epoch": 0.19759920611198126, "grad_norm": 3.5337412357330322, "learning_rate": 9.28146426349762e-06, "loss": 0.5491, "step": 16527 }, { "epoch": 0.19761116225684192, "grad_norm": 3.203838348388672, "learning_rate": 9.281364258863544e-06, "loss": 0.6329, "step": 16528 }, { "epoch": 0.19762311840170255, "grad_norm": 2.0387184619903564, "learning_rate": 9.281264247809545e-06, "loss": 0.5294, "step": 16529 }, { "epoch": 0.1976350745465632, "grad_norm": 4.859507083892822, "learning_rate": 9.281164230335775e-06, "loss": 0.6249, "step": 16530 }, { "epoch": 0.19764703069142386, "grad_norm": 1.866611123085022, "learning_rate": 9.281064206442385e-06, "loss": 0.6259, "step": 16531 }, { "epoch": 0.1976589868362845, "grad_norm": 5.185442924499512, "learning_rate": 9.280964176129523e-06, "loss": 0.7441, "step": 16532 }, { "epoch": 0.19767094298114515, "grad_norm": 3.110985517501831, "learning_rate": 9.280864139397341e-06, "loss": 0.6014, "step": 16533 }, { "epoch": 0.1976828991260058, "grad_norm": 2.1728579998016357, "learning_rate": 9.280764096245988e-06, "loss": 0.7482, "step": 16534 }, { "epoch": 0.19769485527086647, "grad_norm": 3.8329222202301025, "learning_rate": 9.280664046675614e-06, "loss": 0.549, "step": 16535 }, { "epoch": 0.1977068114157271, "grad_norm": 2.9468910694122314, "learning_rate": 9.280563990686368e-06, "loss": 0.5461, "step": 16536 }, { "epoch": 0.19771876756058776, "grad_norm": 2.022991418838501, "learning_rate": 9.280463928278402e-06, "loss": 0.7152, "step": 16537 }, { "epoch": 0.19773072370544842, "grad_norm": 2.1523101329803467, "learning_rate": 9.280363859451867e-06, "loss": 0.7187, "step": 16538 }, { "epoch": 0.19774267985030908, "grad_norm": 4.401484966278076, "learning_rate": 9.280263784206908e-06, "loss": 0.6553, "step": 16539 }, { "epoch": 0.1977546359951697, "grad_norm": 19.37818145751953, "learning_rate": 9.28016370254368e-06, "loss": 0.6492, "step": 16540 }, { "epoch": 0.19776659214003037, "grad_norm": 3.257115364074707, "learning_rate": 9.280063614462332e-06, "loss": 0.6237, "step": 16541 }, { "epoch": 0.19777854828489103, "grad_norm": 1.806212067604065, "learning_rate": 9.27996351996301e-06, "loss": 0.5655, "step": 16542 }, { "epoch": 0.19779050442975168, "grad_norm": 1.8966712951660156, "learning_rate": 9.27986341904587e-06, "loss": 0.6425, "step": 16543 }, { "epoch": 0.19780246057461232, "grad_norm": 2.926445245742798, "learning_rate": 9.27976331171106e-06, "loss": 0.6147, "step": 16544 }, { "epoch": 0.19781441671947297, "grad_norm": 2.904435396194458, "learning_rate": 9.279663197958727e-06, "loss": 0.5155, "step": 16545 }, { "epoch": 0.19782637286433363, "grad_norm": 2.561068058013916, "learning_rate": 9.279563077789028e-06, "loss": 0.5816, "step": 16546 }, { "epoch": 0.19783832900919426, "grad_norm": 3.6765170097351074, "learning_rate": 9.279462951202107e-06, "loss": 0.531, "step": 16547 }, { "epoch": 0.19785028515405492, "grad_norm": 2.4524338245391846, "learning_rate": 9.279362818198115e-06, "loss": 0.5928, "step": 16548 }, { "epoch": 0.19786224129891558, "grad_norm": 4.744536876678467, "learning_rate": 9.279262678777205e-06, "loss": 0.7872, "step": 16549 }, { "epoch": 0.19787419744377624, "grad_norm": 2.860689878463745, "learning_rate": 9.279162532939522e-06, "loss": 0.5955, "step": 16550 }, { "epoch": 0.19788615358863687, "grad_norm": 2.1705174446105957, "learning_rate": 9.279062380685224e-06, "loss": 0.5358, "step": 16551 }, { "epoch": 0.19789810973349753, "grad_norm": 2.6403045654296875, "learning_rate": 9.278962222014454e-06, "loss": 0.6721, "step": 16552 }, { "epoch": 0.1979100658783582, "grad_norm": 2.6869614124298096, "learning_rate": 9.278862056927366e-06, "loss": 0.5357, "step": 16553 }, { "epoch": 0.19792202202321885, "grad_norm": 5.077018737792969, "learning_rate": 9.278761885424109e-06, "loss": 0.6137, "step": 16554 }, { "epoch": 0.19793397816807948, "grad_norm": 2.4917171001434326, "learning_rate": 9.278661707504833e-06, "loss": 0.5814, "step": 16555 }, { "epoch": 0.19794593431294014, "grad_norm": 2.2926902770996094, "learning_rate": 9.27856152316969e-06, "loss": 0.6568, "step": 16556 }, { "epoch": 0.1979578904578008, "grad_norm": 9.244013786315918, "learning_rate": 9.278461332418827e-06, "loss": 0.5914, "step": 16557 }, { "epoch": 0.19796984660266143, "grad_norm": 2.7103054523468018, "learning_rate": 9.278361135252396e-06, "loss": 0.5852, "step": 16558 }, { "epoch": 0.19798180274752208, "grad_norm": 3.6999218463897705, "learning_rate": 9.278260931670549e-06, "loss": 0.6728, "step": 16559 }, { "epoch": 0.19799375889238274, "grad_norm": 2.349824905395508, "learning_rate": 9.278160721673432e-06, "loss": 0.5444, "step": 16560 }, { "epoch": 0.1980057150372434, "grad_norm": 17.05982780456543, "learning_rate": 9.2780605052612e-06, "loss": 0.6913, "step": 16561 }, { "epoch": 0.19801767118210403, "grad_norm": 2.812751054763794, "learning_rate": 9.277960282433999e-06, "loss": 0.6253, "step": 16562 }, { "epoch": 0.1980296273269647, "grad_norm": 2.521768093109131, "learning_rate": 9.277860053191984e-06, "loss": 0.6371, "step": 16563 }, { "epoch": 0.19804158347182535, "grad_norm": 6.79098653793335, "learning_rate": 9.277759817535302e-06, "loss": 0.5767, "step": 16564 }, { "epoch": 0.198053539616686, "grad_norm": 1.8100948333740234, "learning_rate": 9.277659575464102e-06, "loss": 0.6386, "step": 16565 }, { "epoch": 0.19806549576154664, "grad_norm": 1.664463996887207, "learning_rate": 9.277559326978538e-06, "loss": 0.5373, "step": 16566 }, { "epoch": 0.1980774519064073, "grad_norm": 6.295030117034912, "learning_rate": 9.277459072078758e-06, "loss": 0.5455, "step": 16567 }, { "epoch": 0.19808940805126796, "grad_norm": 5.695476531982422, "learning_rate": 9.277358810764913e-06, "loss": 0.6335, "step": 16568 }, { "epoch": 0.1981013641961286, "grad_norm": 1.836253046989441, "learning_rate": 9.277258543037153e-06, "loss": 0.584, "step": 16569 }, { "epoch": 0.19811332034098925, "grad_norm": 1.9320716857910156, "learning_rate": 9.277158268895629e-06, "loss": 0.5255, "step": 16570 }, { "epoch": 0.1981252764858499, "grad_norm": 2.7075612545013428, "learning_rate": 9.277057988340491e-06, "loss": 0.5571, "step": 16571 }, { "epoch": 0.19813723263071056, "grad_norm": 5.474186897277832, "learning_rate": 9.27695770137189e-06, "loss": 0.508, "step": 16572 }, { "epoch": 0.1981491887755712, "grad_norm": 3.1346609592437744, "learning_rate": 9.276857407989975e-06, "loss": 0.5981, "step": 16573 }, { "epoch": 0.19816114492043185, "grad_norm": 2.609229326248169, "learning_rate": 9.276757108194896e-06, "loss": 0.5108, "step": 16574 }, { "epoch": 0.1981731010652925, "grad_norm": 4.813197135925293, "learning_rate": 9.276656801986805e-06, "loss": 0.5935, "step": 16575 }, { "epoch": 0.19818505721015317, "grad_norm": 2.2732203006744385, "learning_rate": 9.276556489365853e-06, "loss": 0.6282, "step": 16576 }, { "epoch": 0.1981970133550138, "grad_norm": 4.549431324005127, "learning_rate": 9.27645617033219e-06, "loss": 0.6826, "step": 16577 }, { "epoch": 0.19820896949987446, "grad_norm": 4.528674602508545, "learning_rate": 9.276355844885963e-06, "loss": 0.5841, "step": 16578 }, { "epoch": 0.19822092564473512, "grad_norm": 2.4400126934051514, "learning_rate": 9.276255513027326e-06, "loss": 0.6873, "step": 16579 }, { "epoch": 0.19823288178959575, "grad_norm": 2.2424652576446533, "learning_rate": 9.276155174756429e-06, "loss": 0.6249, "step": 16580 }, { "epoch": 0.1982448379344564, "grad_norm": 2.086981773376465, "learning_rate": 9.276054830073423e-06, "loss": 0.6625, "step": 16581 }, { "epoch": 0.19825679407931707, "grad_norm": 3.2362468242645264, "learning_rate": 9.275954478978458e-06, "loss": 0.5925, "step": 16582 }, { "epoch": 0.19826875022417773, "grad_norm": 4.707674026489258, "learning_rate": 9.275854121471682e-06, "loss": 0.6291, "step": 16583 }, { "epoch": 0.19828070636903836, "grad_norm": 2.650773525238037, "learning_rate": 9.275753757553249e-06, "loss": 0.5073, "step": 16584 }, { "epoch": 0.19829266251389902, "grad_norm": 3.347362995147705, "learning_rate": 9.275653387223309e-06, "loss": 0.569, "step": 16585 }, { "epoch": 0.19830461865875967, "grad_norm": 2.153498888015747, "learning_rate": 9.27555301048201e-06, "loss": 0.5935, "step": 16586 }, { "epoch": 0.19831657480362033, "grad_norm": 3.2503247261047363, "learning_rate": 9.275452627329504e-06, "loss": 0.6939, "step": 16587 }, { "epoch": 0.19832853094848096, "grad_norm": 2.3278591632843018, "learning_rate": 9.275352237765943e-06, "loss": 0.6404, "step": 16588 }, { "epoch": 0.19834048709334162, "grad_norm": 1.9610611200332642, "learning_rate": 9.275251841791474e-06, "loss": 0.5992, "step": 16589 }, { "epoch": 0.19835244323820228, "grad_norm": 2.7535510063171387, "learning_rate": 9.27515143940625e-06, "loss": 0.7045, "step": 16590 }, { "epoch": 0.1983643993830629, "grad_norm": 2.727277994155884, "learning_rate": 9.275051030610422e-06, "loss": 0.6961, "step": 16591 }, { "epoch": 0.19837635552792357, "grad_norm": 2.767145872116089, "learning_rate": 9.27495061540414e-06, "loss": 0.5641, "step": 16592 }, { "epoch": 0.19838831167278423, "grad_norm": 2.8811705112457275, "learning_rate": 9.274850193787555e-06, "loss": 0.5392, "step": 16593 }, { "epoch": 0.1984002678176449, "grad_norm": 3.3815739154815674, "learning_rate": 9.274749765760815e-06, "loss": 0.6406, "step": 16594 }, { "epoch": 0.19841222396250552, "grad_norm": 2.358366012573242, "learning_rate": 9.274649331324074e-06, "loss": 0.6434, "step": 16595 }, { "epoch": 0.19842418010736618, "grad_norm": 1.9512001276016235, "learning_rate": 9.274548890477481e-06, "loss": 0.585, "step": 16596 }, { "epoch": 0.19843613625222684, "grad_norm": 5.448583126068115, "learning_rate": 9.274448443221188e-06, "loss": 0.5974, "step": 16597 }, { "epoch": 0.1984480923970875, "grad_norm": 3.5227482318878174, "learning_rate": 9.274347989555342e-06, "loss": 0.6929, "step": 16598 }, { "epoch": 0.19846004854194813, "grad_norm": 2.4848363399505615, "learning_rate": 9.274247529480096e-06, "loss": 0.5923, "step": 16599 }, { "epoch": 0.19847200468680878, "grad_norm": 1.792945146560669, "learning_rate": 9.2741470629956e-06, "loss": 0.6554, "step": 16600 }, { "epoch": 0.19848396083166944, "grad_norm": 2.667611837387085, "learning_rate": 9.274046590102008e-06, "loss": 0.6135, "step": 16601 }, { "epoch": 0.1984959169765301, "grad_norm": 2.7852017879486084, "learning_rate": 9.273946110799467e-06, "loss": 0.5769, "step": 16602 }, { "epoch": 0.19850787312139073, "grad_norm": 2.2140934467315674, "learning_rate": 9.273845625088129e-06, "loss": 0.6167, "step": 16603 }, { "epoch": 0.1985198292662514, "grad_norm": 2.049062490463257, "learning_rate": 9.273745132968142e-06, "loss": 0.5539, "step": 16604 }, { "epoch": 0.19853178541111205, "grad_norm": 3.124877691268921, "learning_rate": 9.273644634439662e-06, "loss": 0.5664, "step": 16605 }, { "epoch": 0.19854374155597268, "grad_norm": 2.454511880874634, "learning_rate": 9.273544129502834e-06, "loss": 0.6333, "step": 16606 }, { "epoch": 0.19855569770083334, "grad_norm": 2.247123956680298, "learning_rate": 9.273443618157814e-06, "loss": 0.5132, "step": 16607 }, { "epoch": 0.198567653845694, "grad_norm": 2.0087461471557617, "learning_rate": 9.273343100404749e-06, "loss": 0.5036, "step": 16608 }, { "epoch": 0.19857960999055466, "grad_norm": 11.015074729919434, "learning_rate": 9.27324257624379e-06, "loss": 0.6573, "step": 16609 }, { "epoch": 0.1985915661354153, "grad_norm": 3.5772883892059326, "learning_rate": 9.273142045675087e-06, "loss": 0.5916, "step": 16610 }, { "epoch": 0.19860352228027595, "grad_norm": 18.38616943359375, "learning_rate": 9.273041508698796e-06, "loss": 0.5526, "step": 16611 }, { "epoch": 0.1986154784251366, "grad_norm": 3.4677064418792725, "learning_rate": 9.272940965315062e-06, "loss": 0.6797, "step": 16612 }, { "epoch": 0.19862743456999726, "grad_norm": 2.9468941688537598, "learning_rate": 9.272840415524038e-06, "loss": 0.5024, "step": 16613 }, { "epoch": 0.1986393907148579, "grad_norm": 5.9357123374938965, "learning_rate": 9.272739859325876e-06, "loss": 0.6558, "step": 16614 }, { "epoch": 0.19865134685971855, "grad_norm": 2.8544607162475586, "learning_rate": 9.272639296720725e-06, "loss": 0.651, "step": 16615 }, { "epoch": 0.1986633030045792, "grad_norm": 16.671300888061523, "learning_rate": 9.272538727708735e-06, "loss": 0.5222, "step": 16616 }, { "epoch": 0.19867525914943984, "grad_norm": 2.193049430847168, "learning_rate": 9.272438152290058e-06, "loss": 0.6597, "step": 16617 }, { "epoch": 0.1986872152943005, "grad_norm": 5.255748271942139, "learning_rate": 9.272337570464846e-06, "loss": 0.6142, "step": 16618 }, { "epoch": 0.19869917143916116, "grad_norm": 4.278520107269287, "learning_rate": 9.272236982233247e-06, "loss": 0.5746, "step": 16619 }, { "epoch": 0.19871112758402182, "grad_norm": 2.4019250869750977, "learning_rate": 9.272136387595415e-06, "loss": 0.547, "step": 16620 }, { "epoch": 0.19872308372888245, "grad_norm": 2.5289440155029297, "learning_rate": 9.2720357865515e-06, "loss": 0.5446, "step": 16621 }, { "epoch": 0.1987350398737431, "grad_norm": 2.287992477416992, "learning_rate": 9.27193517910165e-06, "loss": 0.552, "step": 16622 }, { "epoch": 0.19874699601860377, "grad_norm": 2.026913642883301, "learning_rate": 9.271834565246019e-06, "loss": 0.6169, "step": 16623 }, { "epoch": 0.19875895216346443, "grad_norm": 3.601897954940796, "learning_rate": 9.271733944984755e-06, "loss": 0.7622, "step": 16624 }, { "epoch": 0.19877090830832506, "grad_norm": 2.3561134338378906, "learning_rate": 9.271633318318014e-06, "loss": 0.594, "step": 16625 }, { "epoch": 0.19878286445318571, "grad_norm": 2.336191415786743, "learning_rate": 9.271532685245943e-06, "loss": 0.634, "step": 16626 }, { "epoch": 0.19879482059804637, "grad_norm": 2.744267702102661, "learning_rate": 9.271432045768692e-06, "loss": 0.5225, "step": 16627 }, { "epoch": 0.198806776742907, "grad_norm": 3.809816360473633, "learning_rate": 9.271331399886415e-06, "loss": 0.6625, "step": 16628 }, { "epoch": 0.19881873288776766, "grad_norm": 3.5350420475006104, "learning_rate": 9.27123074759926e-06, "loss": 0.6251, "step": 16629 }, { "epoch": 0.19883068903262832, "grad_norm": 2.2200562953948975, "learning_rate": 9.271130088907379e-06, "loss": 0.6147, "step": 16630 }, { "epoch": 0.19884264517748898, "grad_norm": 4.350754261016846, "learning_rate": 9.271029423810925e-06, "loss": 0.5504, "step": 16631 }, { "epoch": 0.1988546013223496, "grad_norm": 2.221440553665161, "learning_rate": 9.270928752310045e-06, "loss": 0.6785, "step": 16632 }, { "epoch": 0.19886655746721027, "grad_norm": 2.404604196548462, "learning_rate": 9.270828074404895e-06, "loss": 0.7422, "step": 16633 }, { "epoch": 0.19887851361207093, "grad_norm": 2.546412706375122, "learning_rate": 9.270727390095621e-06, "loss": 0.5526, "step": 16634 }, { "epoch": 0.1988904697569316, "grad_norm": 7.591456890106201, "learning_rate": 9.270626699382377e-06, "loss": 0.6058, "step": 16635 }, { "epoch": 0.19890242590179222, "grad_norm": 3.1161856651306152, "learning_rate": 9.270526002265312e-06, "loss": 0.7059, "step": 16636 }, { "epoch": 0.19891438204665288, "grad_norm": 1.7667460441589355, "learning_rate": 9.27042529874458e-06, "loss": 0.5803, "step": 16637 }, { "epoch": 0.19892633819151354, "grad_norm": 3.052340269088745, "learning_rate": 9.27032458882033e-06, "loss": 0.7122, "step": 16638 }, { "epoch": 0.19893829433637417, "grad_norm": 2.0329670906066895, "learning_rate": 9.27022387249271e-06, "loss": 0.6125, "step": 16639 }, { "epoch": 0.19895025048123482, "grad_norm": 2.646162748336792, "learning_rate": 9.270123149761877e-06, "loss": 0.6732, "step": 16640 }, { "epoch": 0.19896220662609548, "grad_norm": 5.365051746368408, "learning_rate": 9.270022420627977e-06, "loss": 0.5925, "step": 16641 }, { "epoch": 0.19897416277095614, "grad_norm": 5.555450439453125, "learning_rate": 9.269921685091166e-06, "loss": 0.5965, "step": 16642 }, { "epoch": 0.19898611891581677, "grad_norm": 2.474276542663574, "learning_rate": 9.26982094315159e-06, "loss": 0.6, "step": 16643 }, { "epoch": 0.19899807506067743, "grad_norm": 2.897786855697632, "learning_rate": 9.269720194809404e-06, "loss": 0.542, "step": 16644 }, { "epoch": 0.1990100312055381, "grad_norm": 3.2772202491760254, "learning_rate": 9.269619440064757e-06, "loss": 0.6194, "step": 16645 }, { "epoch": 0.19902198735039875, "grad_norm": 2.7934062480926514, "learning_rate": 9.2695186789178e-06, "loss": 0.4937, "step": 16646 }, { "epoch": 0.19903394349525938, "grad_norm": 5.481463432312012, "learning_rate": 9.269417911368684e-06, "loss": 0.4632, "step": 16647 }, { "epoch": 0.19904589964012004, "grad_norm": 7.318513870239258, "learning_rate": 9.269317137417561e-06, "loss": 0.6458, "step": 16648 }, { "epoch": 0.1990578557849807, "grad_norm": 2.9448153972625732, "learning_rate": 9.269216357064582e-06, "loss": 0.6825, "step": 16649 }, { "epoch": 0.19906981192984133, "grad_norm": 3.3436646461486816, "learning_rate": 9.269115570309897e-06, "loss": 0.6277, "step": 16650 }, { "epoch": 0.199081768074702, "grad_norm": 3.7859551906585693, "learning_rate": 9.26901477715366e-06, "loss": 0.7114, "step": 16651 }, { "epoch": 0.19909372421956265, "grad_norm": 3.6111297607421875, "learning_rate": 9.26891397759602e-06, "loss": 0.6416, "step": 16652 }, { "epoch": 0.1991056803644233, "grad_norm": 2.655381202697754, "learning_rate": 9.268813171637126e-06, "loss": 0.6456, "step": 16653 }, { "epoch": 0.19911763650928394, "grad_norm": 2.9644927978515625, "learning_rate": 9.268712359277133e-06, "loss": 0.6267, "step": 16654 }, { "epoch": 0.1991295926541446, "grad_norm": 4.032403469085693, "learning_rate": 9.26861154051619e-06, "loss": 0.6375, "step": 16655 }, { "epoch": 0.19914154879900525, "grad_norm": 4.165650844573975, "learning_rate": 9.26851071535445e-06, "loss": 0.4977, "step": 16656 }, { "epoch": 0.1991535049438659, "grad_norm": 1.8073697090148926, "learning_rate": 9.268409883792062e-06, "loss": 0.5002, "step": 16657 }, { "epoch": 0.19916546108872654, "grad_norm": 3.1649768352508545, "learning_rate": 9.268309045829179e-06, "loss": 0.5157, "step": 16658 }, { "epoch": 0.1991774172335872, "grad_norm": 3.0170507431030273, "learning_rate": 9.268208201465952e-06, "loss": 0.6, "step": 16659 }, { "epoch": 0.19918937337844786, "grad_norm": 2.437532424926758, "learning_rate": 9.268107350702529e-06, "loss": 0.6212, "step": 16660 }, { "epoch": 0.19920132952330852, "grad_norm": 2.076508045196533, "learning_rate": 9.268006493539066e-06, "loss": 0.6634, "step": 16661 }, { "epoch": 0.19921328566816915, "grad_norm": 9.95505142211914, "learning_rate": 9.267905629975711e-06, "loss": 0.5711, "step": 16662 }, { "epoch": 0.1992252418130298, "grad_norm": 3.0808022022247314, "learning_rate": 9.267804760012618e-06, "loss": 0.5885, "step": 16663 }, { "epoch": 0.19923719795789047, "grad_norm": 2.229931354522705, "learning_rate": 9.267703883649935e-06, "loss": 0.4998, "step": 16664 }, { "epoch": 0.1992491541027511, "grad_norm": 27.954818725585938, "learning_rate": 9.267603000887817e-06, "loss": 0.662, "step": 16665 }, { "epoch": 0.19926111024761176, "grad_norm": 1.768446683883667, "learning_rate": 9.26750211172641e-06, "loss": 0.6043, "step": 16666 }, { "epoch": 0.19927306639247241, "grad_norm": 2.411533832550049, "learning_rate": 9.267401216165869e-06, "loss": 0.5623, "step": 16667 }, { "epoch": 0.19928502253733307, "grad_norm": 4.439079761505127, "learning_rate": 9.267300314206347e-06, "loss": 0.6272, "step": 16668 }, { "epoch": 0.1992969786821937, "grad_norm": 5.793731689453125, "learning_rate": 9.26719940584799e-06, "loss": 0.6034, "step": 16669 }, { "epoch": 0.19930893482705436, "grad_norm": 2.3391530513763428, "learning_rate": 9.267098491090956e-06, "loss": 0.5716, "step": 16670 }, { "epoch": 0.19932089097191502, "grad_norm": 2.689222812652588, "learning_rate": 9.26699756993539e-06, "loss": 0.6703, "step": 16671 }, { "epoch": 0.19933284711677568, "grad_norm": 2.0240604877471924, "learning_rate": 9.266896642381447e-06, "loss": 0.6628, "step": 16672 }, { "epoch": 0.1993448032616363, "grad_norm": 2.3809688091278076, "learning_rate": 9.266795708429277e-06, "loss": 0.6305, "step": 16673 }, { "epoch": 0.19935675940649697, "grad_norm": 11.5440092086792, "learning_rate": 9.26669476807903e-06, "loss": 0.5987, "step": 16674 }, { "epoch": 0.19936871555135763, "grad_norm": 3.4000632762908936, "learning_rate": 9.266593821330862e-06, "loss": 0.5275, "step": 16675 }, { "epoch": 0.19938067169621826, "grad_norm": 7.663074493408203, "learning_rate": 9.26649286818492e-06, "loss": 0.6283, "step": 16676 }, { "epoch": 0.19939262784107892, "grad_norm": 2.729139804840088, "learning_rate": 9.266391908641357e-06, "loss": 0.6938, "step": 16677 }, { "epoch": 0.19940458398593958, "grad_norm": 1.7151528596878052, "learning_rate": 9.266290942700324e-06, "loss": 0.5104, "step": 16678 }, { "epoch": 0.19941654013080023, "grad_norm": 2.2114062309265137, "learning_rate": 9.266189970361972e-06, "loss": 0.6237, "step": 16679 }, { "epoch": 0.19942849627566087, "grad_norm": 2.487835168838501, "learning_rate": 9.266088991626453e-06, "loss": 0.7104, "step": 16680 }, { "epoch": 0.19944045242052152, "grad_norm": 2.3634352684020996, "learning_rate": 9.265988006493918e-06, "loss": 0.6354, "step": 16681 }, { "epoch": 0.19945240856538218, "grad_norm": 2.950047492980957, "learning_rate": 9.265887014964521e-06, "loss": 0.6055, "step": 16682 }, { "epoch": 0.19946436471024284, "grad_norm": 1.7138653993606567, "learning_rate": 9.26578601703841e-06, "loss": 0.5695, "step": 16683 }, { "epoch": 0.19947632085510347, "grad_norm": 2.189361572265625, "learning_rate": 9.265685012715737e-06, "loss": 0.6789, "step": 16684 }, { "epoch": 0.19948827699996413, "grad_norm": 3.7347052097320557, "learning_rate": 9.265584001996654e-06, "loss": 0.5835, "step": 16685 }, { "epoch": 0.1995002331448248, "grad_norm": 3.00728702545166, "learning_rate": 9.265482984881314e-06, "loss": 0.5945, "step": 16686 }, { "epoch": 0.19951218928968542, "grad_norm": 3.118149757385254, "learning_rate": 9.265381961369868e-06, "loss": 0.5736, "step": 16687 }, { "epoch": 0.19952414543454608, "grad_norm": 1.97517991065979, "learning_rate": 9.265280931462463e-06, "loss": 0.5775, "step": 16688 }, { "epoch": 0.19953610157940674, "grad_norm": 6.354101181030273, "learning_rate": 9.265179895159257e-06, "loss": 0.5593, "step": 16689 }, { "epoch": 0.1995480577242674, "grad_norm": 3.3403120040893555, "learning_rate": 9.2650788524604e-06, "loss": 0.5717, "step": 16690 }, { "epoch": 0.19956001386912803, "grad_norm": 2.106656312942505, "learning_rate": 9.264977803366038e-06, "loss": 0.5692, "step": 16691 }, { "epoch": 0.1995719700139887, "grad_norm": 2.180684804916382, "learning_rate": 9.264876747876329e-06, "loss": 0.5706, "step": 16692 }, { "epoch": 0.19958392615884935, "grad_norm": 2.951509475708008, "learning_rate": 9.264775685991421e-06, "loss": 0.6784, "step": 16693 }, { "epoch": 0.19959588230371, "grad_norm": 3.073396682739258, "learning_rate": 9.264674617711467e-06, "loss": 0.7065, "step": 16694 }, { "epoch": 0.19960783844857063, "grad_norm": 4.699062824249268, "learning_rate": 9.26457354303662e-06, "loss": 0.6163, "step": 16695 }, { "epoch": 0.1996197945934313, "grad_norm": 2.4309263229370117, "learning_rate": 9.264472461967029e-06, "loss": 0.5218, "step": 16696 }, { "epoch": 0.19963175073829195, "grad_norm": 2.066594123840332, "learning_rate": 9.264371374502846e-06, "loss": 0.572, "step": 16697 }, { "epoch": 0.19964370688315258, "grad_norm": 2.6504557132720947, "learning_rate": 9.264270280644221e-06, "loss": 0.5887, "step": 16698 }, { "epoch": 0.19965566302801324, "grad_norm": 2.0546841621398926, "learning_rate": 9.26416918039131e-06, "loss": 0.6313, "step": 16699 }, { "epoch": 0.1996676191728739, "grad_norm": 3.670593023300171, "learning_rate": 9.264068073744261e-06, "loss": 0.6158, "step": 16700 }, { "epoch": 0.19967957531773456, "grad_norm": 1.8245786428451538, "learning_rate": 9.263966960703227e-06, "loss": 0.5056, "step": 16701 }, { "epoch": 0.1996915314625952, "grad_norm": 2.692967653274536, "learning_rate": 9.26386584126836e-06, "loss": 0.687, "step": 16702 }, { "epoch": 0.19970348760745585, "grad_norm": 2.4714722633361816, "learning_rate": 9.263764715439812e-06, "loss": 0.6004, "step": 16703 }, { "epoch": 0.1997154437523165, "grad_norm": 2.2368381023406982, "learning_rate": 9.263663583217732e-06, "loss": 0.5608, "step": 16704 }, { "epoch": 0.19972739989717717, "grad_norm": 3.02671217918396, "learning_rate": 9.263562444602273e-06, "loss": 0.6032, "step": 16705 }, { "epoch": 0.1997393560420378, "grad_norm": 2.5145370960235596, "learning_rate": 9.263461299593589e-06, "loss": 0.6626, "step": 16706 }, { "epoch": 0.19975131218689846, "grad_norm": 1.8456857204437256, "learning_rate": 9.263360148191827e-06, "loss": 0.6058, "step": 16707 }, { "epoch": 0.19976326833175911, "grad_norm": 1.8450714349746704, "learning_rate": 9.263258990397143e-06, "loss": 0.647, "step": 16708 }, { "epoch": 0.19977522447661974, "grad_norm": 3.1743900775909424, "learning_rate": 9.263157826209686e-06, "loss": 0.7179, "step": 16709 }, { "epoch": 0.1997871806214804, "grad_norm": 1.889816164970398, "learning_rate": 9.26305665562961e-06, "loss": 0.6634, "step": 16710 }, { "epoch": 0.19979913676634106, "grad_norm": 3.220264196395874, "learning_rate": 9.262955478657063e-06, "loss": 0.5594, "step": 16711 }, { "epoch": 0.19981109291120172, "grad_norm": 2.224428653717041, "learning_rate": 9.2628542952922e-06, "loss": 0.6491, "step": 16712 }, { "epoch": 0.19982304905606235, "grad_norm": 2.0430736541748047, "learning_rate": 9.262753105535174e-06, "loss": 0.6526, "step": 16713 }, { "epoch": 0.199835005200923, "grad_norm": 1.6020644903182983, "learning_rate": 9.262651909386134e-06, "loss": 0.602, "step": 16714 }, { "epoch": 0.19984696134578367, "grad_norm": 2.310335397720337, "learning_rate": 9.26255070684523e-06, "loss": 0.5306, "step": 16715 }, { "epoch": 0.19985891749064433, "grad_norm": 3.2180733680725098, "learning_rate": 9.262449497912618e-06, "loss": 0.5876, "step": 16716 }, { "epoch": 0.19987087363550496, "grad_norm": 1.543947458267212, "learning_rate": 9.262348282588447e-06, "loss": 0.5241, "step": 16717 }, { "epoch": 0.19988282978036562, "grad_norm": 6.3689351081848145, "learning_rate": 9.26224706087287e-06, "loss": 0.6119, "step": 16718 }, { "epoch": 0.19989478592522628, "grad_norm": 2.7579081058502197, "learning_rate": 9.262145832766038e-06, "loss": 0.6076, "step": 16719 }, { "epoch": 0.19990674207008693, "grad_norm": 2.7389771938323975, "learning_rate": 9.262044598268104e-06, "loss": 0.7011, "step": 16720 }, { "epoch": 0.19991869821494757, "grad_norm": 2.299128293991089, "learning_rate": 9.261943357379217e-06, "loss": 0.6725, "step": 16721 }, { "epoch": 0.19993065435980822, "grad_norm": 1.9987367391586304, "learning_rate": 9.261842110099533e-06, "loss": 0.5604, "step": 16722 }, { "epoch": 0.19994261050466888, "grad_norm": 2.278761148452759, "learning_rate": 9.261740856429202e-06, "loss": 0.6413, "step": 16723 }, { "epoch": 0.1999545666495295, "grad_norm": 1.6699185371398926, "learning_rate": 9.261639596368374e-06, "loss": 0.5098, "step": 16724 }, { "epoch": 0.19996652279439017, "grad_norm": 1.8770451545715332, "learning_rate": 9.261538329917202e-06, "loss": 0.643, "step": 16725 }, { "epoch": 0.19997847893925083, "grad_norm": 4.267460823059082, "learning_rate": 9.26143705707584e-06, "loss": 0.6776, "step": 16726 }, { "epoch": 0.1999904350841115, "grad_norm": 2.198784589767456, "learning_rate": 9.261335777844436e-06, "loss": 0.6159, "step": 16727 }, { "epoch": 0.20000239122897212, "grad_norm": 3.7266664505004883, "learning_rate": 9.261234492223143e-06, "loss": 0.5351, "step": 16728 }, { "epoch": 0.20001434737383278, "grad_norm": 4.398966312408447, "learning_rate": 9.261133200212116e-06, "loss": 0.6355, "step": 16729 }, { "epoch": 0.20002630351869344, "grad_norm": 2.709480047225952, "learning_rate": 9.261031901811503e-06, "loss": 0.689, "step": 16730 }, { "epoch": 0.2000382596635541, "grad_norm": 13.265412330627441, "learning_rate": 9.26093059702146e-06, "loss": 0.5825, "step": 16731 }, { "epoch": 0.20005021580841473, "grad_norm": 2.385230541229248, "learning_rate": 9.260829285842134e-06, "loss": 0.5719, "step": 16732 }, { "epoch": 0.2000621719532754, "grad_norm": 1.965129017829895, "learning_rate": 9.260727968273679e-06, "loss": 0.6224, "step": 16733 }, { "epoch": 0.20007412809813604, "grad_norm": 10.093183517456055, "learning_rate": 9.26062664431625e-06, "loss": 0.5852, "step": 16734 }, { "epoch": 0.20008608424299668, "grad_norm": 3.354079008102417, "learning_rate": 9.260525313969995e-06, "loss": 0.6317, "step": 16735 }, { "epoch": 0.20009804038785733, "grad_norm": 2.4328453540802, "learning_rate": 9.260423977235067e-06, "loss": 0.6162, "step": 16736 }, { "epoch": 0.200109996532718, "grad_norm": 4.498820781707764, "learning_rate": 9.260322634111618e-06, "loss": 0.6387, "step": 16737 }, { "epoch": 0.20012195267757865, "grad_norm": 6.159548759460449, "learning_rate": 9.260221284599798e-06, "loss": 0.6375, "step": 16738 }, { "epoch": 0.20013390882243928, "grad_norm": 1.733111023902893, "learning_rate": 9.260119928699763e-06, "loss": 0.5773, "step": 16739 }, { "epoch": 0.20014586496729994, "grad_norm": 2.4301066398620605, "learning_rate": 9.260018566411664e-06, "loss": 0.5999, "step": 16740 }, { "epoch": 0.2001578211121606, "grad_norm": 1.8181488513946533, "learning_rate": 9.259917197735651e-06, "loss": 0.6339, "step": 16741 }, { "epoch": 0.20016977725702126, "grad_norm": 3.9989795684814453, "learning_rate": 9.259815822671877e-06, "loss": 0.6757, "step": 16742 }, { "epoch": 0.2001817334018819, "grad_norm": 2.543682813644409, "learning_rate": 9.259714441220493e-06, "loss": 0.5432, "step": 16743 }, { "epoch": 0.20019368954674255, "grad_norm": 2.949197292327881, "learning_rate": 9.259613053381654e-06, "loss": 0.6045, "step": 16744 }, { "epoch": 0.2002056456916032, "grad_norm": 4.680530071258545, "learning_rate": 9.259511659155509e-06, "loss": 0.6411, "step": 16745 }, { "epoch": 0.20021760183646384, "grad_norm": 2.3609540462493896, "learning_rate": 9.259410258542212e-06, "loss": 0.5531, "step": 16746 }, { "epoch": 0.2002295579813245, "grad_norm": 2.2128684520721436, "learning_rate": 9.259308851541914e-06, "loss": 0.5735, "step": 16747 }, { "epoch": 0.20024151412618515, "grad_norm": 5.664198398590088, "learning_rate": 9.259207438154765e-06, "loss": 0.601, "step": 16748 }, { "epoch": 0.2002534702710458, "grad_norm": 1.8275145292282104, "learning_rate": 9.25910601838092e-06, "loss": 0.7037, "step": 16749 }, { "epoch": 0.20026542641590644, "grad_norm": 1.6500812768936157, "learning_rate": 9.259004592220532e-06, "loss": 0.5521, "step": 16750 }, { "epoch": 0.2002773825607671, "grad_norm": 1.9435230493545532, "learning_rate": 9.258903159673753e-06, "loss": 0.6615, "step": 16751 }, { "epoch": 0.20028933870562776, "grad_norm": 2.315661668777466, "learning_rate": 9.25880172074073e-06, "loss": 0.7678, "step": 16752 }, { "epoch": 0.20030129485048842, "grad_norm": 1.6540099382400513, "learning_rate": 9.258700275421621e-06, "loss": 0.6029, "step": 16753 }, { "epoch": 0.20031325099534905, "grad_norm": 1.977724313735962, "learning_rate": 9.258598823716574e-06, "loss": 0.6287, "step": 16754 }, { "epoch": 0.2003252071402097, "grad_norm": 2.4064083099365234, "learning_rate": 9.258497365625746e-06, "loss": 0.6298, "step": 16755 }, { "epoch": 0.20033716328507037, "grad_norm": 2.879411458969116, "learning_rate": 9.258395901149282e-06, "loss": 0.6082, "step": 16756 }, { "epoch": 0.200349119429931, "grad_norm": 3.173914670944214, "learning_rate": 9.25829443028734e-06, "loss": 0.5747, "step": 16757 }, { "epoch": 0.20036107557479166, "grad_norm": 2.2319207191467285, "learning_rate": 9.258192953040071e-06, "loss": 0.5897, "step": 16758 }, { "epoch": 0.20037303171965232, "grad_norm": 1.483060598373413, "learning_rate": 9.258091469407627e-06, "loss": 0.6426, "step": 16759 }, { "epoch": 0.20038498786451298, "grad_norm": 3.7666032314300537, "learning_rate": 9.257989979390158e-06, "loss": 0.6664, "step": 16760 }, { "epoch": 0.2003969440093736, "grad_norm": 2.3566384315490723, "learning_rate": 9.257888482987818e-06, "loss": 0.5855, "step": 16761 }, { "epoch": 0.20040890015423427, "grad_norm": 2.128938913345337, "learning_rate": 9.25778698020076e-06, "loss": 0.5882, "step": 16762 }, { "epoch": 0.20042085629909492, "grad_norm": 123.32792663574219, "learning_rate": 9.257685471029135e-06, "loss": 0.6214, "step": 16763 }, { "epoch": 0.20043281244395558, "grad_norm": 1.8404288291931152, "learning_rate": 9.257583955473095e-06, "loss": 0.5795, "step": 16764 }, { "epoch": 0.2004447685888162, "grad_norm": 2.618769407272339, "learning_rate": 9.257482433532794e-06, "loss": 0.5683, "step": 16765 }, { "epoch": 0.20045672473367687, "grad_norm": 5.371701717376709, "learning_rate": 9.257380905208382e-06, "loss": 0.6135, "step": 16766 }, { "epoch": 0.20046868087853753, "grad_norm": 2.8479318618774414, "learning_rate": 9.257279370500013e-06, "loss": 0.6772, "step": 16767 }, { "epoch": 0.2004806370233982, "grad_norm": 7.114465713500977, "learning_rate": 9.257177829407837e-06, "loss": 0.7761, "step": 16768 }, { "epoch": 0.20049259316825882, "grad_norm": 10.45573902130127, "learning_rate": 9.257076281932009e-06, "loss": 0.622, "step": 16769 }, { "epoch": 0.20050454931311948, "grad_norm": 4.172950744628906, "learning_rate": 9.256974728072679e-06, "loss": 0.603, "step": 16770 }, { "epoch": 0.20051650545798014, "grad_norm": 2.5477845668792725, "learning_rate": 9.25687316783e-06, "loss": 0.6461, "step": 16771 }, { "epoch": 0.20052846160284077, "grad_norm": 3.1992151737213135, "learning_rate": 9.256771601204126e-06, "loss": 0.5624, "step": 16772 }, { "epoch": 0.20054041774770143, "grad_norm": 2.948899745941162, "learning_rate": 9.256670028195208e-06, "loss": 0.6434, "step": 16773 }, { "epoch": 0.20055237389256209, "grad_norm": 1.9711413383483887, "learning_rate": 9.256568448803398e-06, "loss": 0.6989, "step": 16774 }, { "epoch": 0.20056433003742274, "grad_norm": 2.0961413383483887, "learning_rate": 9.256466863028848e-06, "loss": 0.5513, "step": 16775 }, { "epoch": 0.20057628618228338, "grad_norm": 2.8576719760894775, "learning_rate": 9.25636527087171e-06, "loss": 0.6257, "step": 16776 }, { "epoch": 0.20058824232714403, "grad_norm": 3.2375800609588623, "learning_rate": 9.256263672332138e-06, "loss": 0.6771, "step": 16777 }, { "epoch": 0.2006001984720047, "grad_norm": 3.0641770362854004, "learning_rate": 9.256162067410284e-06, "loss": 0.5984, "step": 16778 }, { "epoch": 0.20061215461686535, "grad_norm": 2.9128100872039795, "learning_rate": 9.256060456106298e-06, "loss": 0.6534, "step": 16779 }, { "epoch": 0.20062411076172598, "grad_norm": 2.7838149070739746, "learning_rate": 9.255958838420336e-06, "loss": 0.5237, "step": 16780 }, { "epoch": 0.20063606690658664, "grad_norm": 2.6650030612945557, "learning_rate": 9.255857214352548e-06, "loss": 0.7335, "step": 16781 }, { "epoch": 0.2006480230514473, "grad_norm": 3.807595729827881, "learning_rate": 9.255755583903086e-06, "loss": 0.6301, "step": 16782 }, { "epoch": 0.20065997919630793, "grad_norm": 1.9052433967590332, "learning_rate": 9.255653947072105e-06, "loss": 0.5276, "step": 16783 }, { "epoch": 0.2006719353411686, "grad_norm": 2.0417466163635254, "learning_rate": 9.255552303859755e-06, "loss": 0.6456, "step": 16784 }, { "epoch": 0.20068389148602925, "grad_norm": 2.8826355934143066, "learning_rate": 9.25545065426619e-06, "loss": 0.5093, "step": 16785 }, { "epoch": 0.2006958476308899, "grad_norm": 1.5091664791107178, "learning_rate": 9.25534899829156e-06, "loss": 0.5239, "step": 16786 }, { "epoch": 0.20070780377575054, "grad_norm": 19.758617401123047, "learning_rate": 9.255247335936019e-06, "loss": 0.713, "step": 16787 }, { "epoch": 0.2007197599206112, "grad_norm": 3.8421518802642822, "learning_rate": 9.255145667199721e-06, "loss": 0.6214, "step": 16788 }, { "epoch": 0.20073171606547185, "grad_norm": 1.668028473854065, "learning_rate": 9.255043992082818e-06, "loss": 0.5157, "step": 16789 }, { "epoch": 0.2007436722103325, "grad_norm": 1.7526988983154297, "learning_rate": 9.25494231058546e-06, "loss": 0.568, "step": 16790 }, { "epoch": 0.20075562835519314, "grad_norm": 4.847332000732422, "learning_rate": 9.254840622707799e-06, "loss": 0.6619, "step": 16791 }, { "epoch": 0.2007675845000538, "grad_norm": 6.666109085083008, "learning_rate": 9.254738928449992e-06, "loss": 0.664, "step": 16792 }, { "epoch": 0.20077954064491446, "grad_norm": 1.8253251314163208, "learning_rate": 9.254637227812188e-06, "loss": 0.5792, "step": 16793 }, { "epoch": 0.2007914967897751, "grad_norm": 1.7841366529464722, "learning_rate": 9.25453552079454e-06, "loss": 0.6221, "step": 16794 }, { "epoch": 0.20080345293463575, "grad_norm": 6.08174991607666, "learning_rate": 9.2544338073972e-06, "loss": 0.6503, "step": 16795 }, { "epoch": 0.2008154090794964, "grad_norm": 1.9002196788787842, "learning_rate": 9.254332087620323e-06, "loss": 0.5561, "step": 16796 }, { "epoch": 0.20082736522435707, "grad_norm": 2.558142900466919, "learning_rate": 9.25423036146406e-06, "loss": 0.6057, "step": 16797 }, { "epoch": 0.2008393213692177, "grad_norm": 3.2171692848205566, "learning_rate": 9.254128628928563e-06, "loss": 0.6883, "step": 16798 }, { "epoch": 0.20085127751407836, "grad_norm": 3.1737897396087646, "learning_rate": 9.254026890013985e-06, "loss": 0.6173, "step": 16799 }, { "epoch": 0.20086323365893902, "grad_norm": 3.9046525955200195, "learning_rate": 9.253925144720478e-06, "loss": 0.5668, "step": 16800 }, { "epoch": 0.20087518980379968, "grad_norm": 2.7137582302093506, "learning_rate": 9.253823393048196e-06, "loss": 0.6375, "step": 16801 }, { "epoch": 0.2008871459486603, "grad_norm": 1.738102674484253, "learning_rate": 9.25372163499729e-06, "loss": 0.5521, "step": 16802 }, { "epoch": 0.20089910209352096, "grad_norm": 3.3111209869384766, "learning_rate": 9.253619870567915e-06, "loss": 0.6369, "step": 16803 }, { "epoch": 0.20091105823838162, "grad_norm": 2.153337001800537, "learning_rate": 9.25351809976022e-06, "loss": 0.5826, "step": 16804 }, { "epoch": 0.20092301438324225, "grad_norm": 2.1069223880767822, "learning_rate": 9.253416322574359e-06, "loss": 0.7427, "step": 16805 }, { "epoch": 0.2009349705281029, "grad_norm": 2.8019251823425293, "learning_rate": 9.253314539010487e-06, "loss": 0.6453, "step": 16806 }, { "epoch": 0.20094692667296357, "grad_norm": 2.075517416000366, "learning_rate": 9.253212749068753e-06, "loss": 0.631, "step": 16807 }, { "epoch": 0.20095888281782423, "grad_norm": 4.35505485534668, "learning_rate": 9.253110952749312e-06, "loss": 0.5602, "step": 16808 }, { "epoch": 0.20097083896268486, "grad_norm": 1.9644560813903809, "learning_rate": 9.253009150052318e-06, "loss": 0.5669, "step": 16809 }, { "epoch": 0.20098279510754552, "grad_norm": 1.6057778596878052, "learning_rate": 9.252907340977917e-06, "loss": 0.5623, "step": 16810 }, { "epoch": 0.20099475125240618, "grad_norm": 2.030954122543335, "learning_rate": 9.25280552552627e-06, "loss": 0.6493, "step": 16811 }, { "epoch": 0.20100670739726684, "grad_norm": 1.9432834386825562, "learning_rate": 9.252703703697526e-06, "loss": 0.6553, "step": 16812 }, { "epoch": 0.20101866354212747, "grad_norm": 1.9841187000274658, "learning_rate": 9.252601875491836e-06, "loss": 0.5597, "step": 16813 }, { "epoch": 0.20103061968698813, "grad_norm": 7.597117900848389, "learning_rate": 9.252500040909355e-06, "loss": 0.5923, "step": 16814 }, { "epoch": 0.20104257583184879, "grad_norm": 3.180820941925049, "learning_rate": 9.252398199950236e-06, "loss": 0.6109, "step": 16815 }, { "epoch": 0.20105453197670942, "grad_norm": 3.2734010219573975, "learning_rate": 9.252296352614629e-06, "loss": 0.6238, "step": 16816 }, { "epoch": 0.20106648812157007, "grad_norm": 6.034388065338135, "learning_rate": 9.25219449890269e-06, "loss": 0.6796, "step": 16817 }, { "epoch": 0.20107844426643073, "grad_norm": 3.307041883468628, "learning_rate": 9.25209263881457e-06, "loss": 0.685, "step": 16818 }, { "epoch": 0.2010904004112914, "grad_norm": 3.0702145099639893, "learning_rate": 9.251990772350421e-06, "loss": 0.6893, "step": 16819 }, { "epoch": 0.20110235655615202, "grad_norm": 3.770592451095581, "learning_rate": 9.251888899510397e-06, "loss": 0.5941, "step": 16820 }, { "epoch": 0.20111431270101268, "grad_norm": 2.913665294647217, "learning_rate": 9.251787020294651e-06, "loss": 0.5877, "step": 16821 }, { "epoch": 0.20112626884587334, "grad_norm": 2.0711727142333984, "learning_rate": 9.251685134703337e-06, "loss": 0.5899, "step": 16822 }, { "epoch": 0.201138224990734, "grad_norm": 1.9095261096954346, "learning_rate": 9.251583242736603e-06, "loss": 0.6164, "step": 16823 }, { "epoch": 0.20115018113559463, "grad_norm": 6.474010944366455, "learning_rate": 9.251481344394607e-06, "loss": 0.7086, "step": 16824 }, { "epoch": 0.2011621372804553, "grad_norm": 1.902929425239563, "learning_rate": 9.251379439677498e-06, "loss": 0.6464, "step": 16825 }, { "epoch": 0.20117409342531595, "grad_norm": 2.32641339302063, "learning_rate": 9.251277528585432e-06, "loss": 0.5366, "step": 16826 }, { "epoch": 0.2011860495701766, "grad_norm": 1.9158910512924194, "learning_rate": 9.251175611118558e-06, "loss": 0.5732, "step": 16827 }, { "epoch": 0.20119800571503724, "grad_norm": 2.9781572818756104, "learning_rate": 9.251073687277032e-06, "loss": 0.5138, "step": 16828 }, { "epoch": 0.2012099618598979, "grad_norm": 1.7082555294036865, "learning_rate": 9.250971757061007e-06, "loss": 0.5462, "step": 16829 }, { "epoch": 0.20122191800475855, "grad_norm": 2.820521354675293, "learning_rate": 9.250869820470634e-06, "loss": 0.6552, "step": 16830 }, { "epoch": 0.20123387414961919, "grad_norm": 3.3985376358032227, "learning_rate": 9.250767877506066e-06, "loss": 0.6989, "step": 16831 }, { "epoch": 0.20124583029447984, "grad_norm": 1.9865821599960327, "learning_rate": 9.250665928167457e-06, "loss": 0.6212, "step": 16832 }, { "epoch": 0.2012577864393405, "grad_norm": 2.4512059688568115, "learning_rate": 9.25056397245496e-06, "loss": 0.6589, "step": 16833 }, { "epoch": 0.20126974258420116, "grad_norm": 2.321223020553589, "learning_rate": 9.250462010368725e-06, "loss": 0.6674, "step": 16834 }, { "epoch": 0.2012816987290618, "grad_norm": 3.393803358078003, "learning_rate": 9.25036004190891e-06, "loss": 0.6262, "step": 16835 }, { "epoch": 0.20129365487392245, "grad_norm": 2.7774159908294678, "learning_rate": 9.250258067075664e-06, "loss": 0.6612, "step": 16836 }, { "epoch": 0.2013056110187831, "grad_norm": 2.32570219039917, "learning_rate": 9.25015608586914e-06, "loss": 0.6647, "step": 16837 }, { "epoch": 0.20131756716364377, "grad_norm": 2.2968807220458984, "learning_rate": 9.250054098289494e-06, "loss": 0.5938, "step": 16838 }, { "epoch": 0.2013295233085044, "grad_norm": 1.509103775024414, "learning_rate": 9.249952104336875e-06, "loss": 0.6194, "step": 16839 }, { "epoch": 0.20134147945336506, "grad_norm": 1.9140799045562744, "learning_rate": 9.249850104011437e-06, "loss": 0.5302, "step": 16840 }, { "epoch": 0.20135343559822572, "grad_norm": 5.811669826507568, "learning_rate": 9.249748097313335e-06, "loss": 0.564, "step": 16841 }, { "epoch": 0.20136539174308635, "grad_norm": 2.4774951934814453, "learning_rate": 9.24964608424272e-06, "loss": 0.6361, "step": 16842 }, { "epoch": 0.201377347887947, "grad_norm": 1.7989110946655273, "learning_rate": 9.249544064799746e-06, "loss": 0.56, "step": 16843 }, { "epoch": 0.20138930403280766, "grad_norm": 2.007868528366089, "learning_rate": 9.249442038984566e-06, "loss": 0.5845, "step": 16844 }, { "epoch": 0.20140126017766832, "grad_norm": 1.8969024419784546, "learning_rate": 9.249340006797332e-06, "loss": 0.5219, "step": 16845 }, { "epoch": 0.20141321632252895, "grad_norm": 2.134026527404785, "learning_rate": 9.249237968238198e-06, "loss": 0.547, "step": 16846 }, { "epoch": 0.2014251724673896, "grad_norm": 1.7398899793624878, "learning_rate": 9.249135923307315e-06, "loss": 0.5881, "step": 16847 }, { "epoch": 0.20143712861225027, "grad_norm": 11.139448165893555, "learning_rate": 9.249033872004838e-06, "loss": 0.5865, "step": 16848 }, { "epoch": 0.20144908475711093, "grad_norm": 5.1945390701293945, "learning_rate": 9.248931814330921e-06, "loss": 0.5597, "step": 16849 }, { "epoch": 0.20146104090197156, "grad_norm": 2.8481152057647705, "learning_rate": 9.248829750285714e-06, "loss": 0.5761, "step": 16850 }, { "epoch": 0.20147299704683222, "grad_norm": 1.6574933528900146, "learning_rate": 9.248727679869374e-06, "loss": 0.6408, "step": 16851 }, { "epoch": 0.20148495319169288, "grad_norm": 1.8152862787246704, "learning_rate": 9.24862560308205e-06, "loss": 0.5808, "step": 16852 }, { "epoch": 0.2014969093365535, "grad_norm": 1.3943394422531128, "learning_rate": 9.248523519923898e-06, "loss": 0.5644, "step": 16853 }, { "epoch": 0.20150886548141417, "grad_norm": 1.8911675214767456, "learning_rate": 9.248421430395067e-06, "loss": 0.5426, "step": 16854 }, { "epoch": 0.20152082162627483, "grad_norm": 2.034669876098633, "learning_rate": 9.248319334495716e-06, "loss": 0.7004, "step": 16855 }, { "epoch": 0.20153277777113549, "grad_norm": 2.628959894180298, "learning_rate": 9.248217232225994e-06, "loss": 0.6473, "step": 16856 }, { "epoch": 0.20154473391599612, "grad_norm": 2.438168525695801, "learning_rate": 9.248115123586056e-06, "loss": 0.6065, "step": 16857 }, { "epoch": 0.20155669006085677, "grad_norm": 11.404533386230469, "learning_rate": 9.248013008576054e-06, "loss": 0.6469, "step": 16858 }, { "epoch": 0.20156864620571743, "grad_norm": 18.734329223632812, "learning_rate": 9.24791088719614e-06, "loss": 0.6406, "step": 16859 }, { "epoch": 0.2015806023505781, "grad_norm": 2.201521635055542, "learning_rate": 9.247808759446468e-06, "loss": 0.5685, "step": 16860 }, { "epoch": 0.20159255849543872, "grad_norm": 1.9820400476455688, "learning_rate": 9.247706625327194e-06, "loss": 0.5831, "step": 16861 }, { "epoch": 0.20160451464029938, "grad_norm": 1.9281198978424072, "learning_rate": 9.247604484838468e-06, "loss": 0.5761, "step": 16862 }, { "epoch": 0.20161647078516004, "grad_norm": 2.663945198059082, "learning_rate": 9.247502337980443e-06, "loss": 0.6041, "step": 16863 }, { "epoch": 0.20162842693002067, "grad_norm": 2.5340304374694824, "learning_rate": 9.247400184753275e-06, "loss": 0.5638, "step": 16864 }, { "epoch": 0.20164038307488133, "grad_norm": 2.7310545444488525, "learning_rate": 9.247298025157114e-06, "loss": 0.5422, "step": 16865 }, { "epoch": 0.201652339219742, "grad_norm": 2.5604641437530518, "learning_rate": 9.247195859192115e-06, "loss": 0.6031, "step": 16866 }, { "epoch": 0.20166429536460265, "grad_norm": 1.919175624847412, "learning_rate": 9.24709368685843e-06, "loss": 0.6128, "step": 16867 }, { "epoch": 0.20167625150946328, "grad_norm": 1.7692906856536865, "learning_rate": 9.246991508156214e-06, "loss": 0.6725, "step": 16868 }, { "epoch": 0.20168820765432394, "grad_norm": 2.672189950942993, "learning_rate": 9.24688932308562e-06, "loss": 0.6154, "step": 16869 }, { "epoch": 0.2017001637991846, "grad_norm": 4.034116268157959, "learning_rate": 9.246787131646798e-06, "loss": 0.6202, "step": 16870 }, { "epoch": 0.20171211994404525, "grad_norm": 2.3446109294891357, "learning_rate": 9.246684933839905e-06, "loss": 0.5146, "step": 16871 }, { "epoch": 0.20172407608890588, "grad_norm": 2.5004847049713135, "learning_rate": 9.246582729665092e-06, "loss": 0.5718, "step": 16872 }, { "epoch": 0.20173603223376654, "grad_norm": 2.363415241241455, "learning_rate": 9.246480519122514e-06, "loss": 0.5812, "step": 16873 }, { "epoch": 0.2017479883786272, "grad_norm": 2.9685404300689697, "learning_rate": 9.246378302212323e-06, "loss": 0.591, "step": 16874 }, { "epoch": 0.20175994452348783, "grad_norm": 1.8484327793121338, "learning_rate": 9.246276078934671e-06, "loss": 0.5751, "step": 16875 }, { "epoch": 0.2017719006683485, "grad_norm": 2.6040937900543213, "learning_rate": 9.246173849289715e-06, "loss": 0.5214, "step": 16876 }, { "epoch": 0.20178385681320915, "grad_norm": 2.393498659133911, "learning_rate": 9.246071613277605e-06, "loss": 0.5504, "step": 16877 }, { "epoch": 0.2017958129580698, "grad_norm": 1.7992298603057861, "learning_rate": 9.245969370898498e-06, "loss": 0.6018, "step": 16878 }, { "epoch": 0.20180776910293044, "grad_norm": 3.955595016479492, "learning_rate": 9.245867122152543e-06, "loss": 0.6562, "step": 16879 }, { "epoch": 0.2018197252477911, "grad_norm": 7.143590450286865, "learning_rate": 9.245764867039895e-06, "loss": 0.6233, "step": 16880 }, { "epoch": 0.20183168139265176, "grad_norm": 2.042170286178589, "learning_rate": 9.245662605560708e-06, "loss": 0.5534, "step": 16881 }, { "epoch": 0.20184363753751242, "grad_norm": 12.478543281555176, "learning_rate": 9.245560337715133e-06, "loss": 0.5852, "step": 16882 }, { "epoch": 0.20185559368237305, "grad_norm": 2.5432896614074707, "learning_rate": 9.245458063503327e-06, "loss": 0.6418, "step": 16883 }, { "epoch": 0.2018675498272337, "grad_norm": 24.581491470336914, "learning_rate": 9.245355782925441e-06, "loss": 0.6564, "step": 16884 }, { "epoch": 0.20187950597209436, "grad_norm": 3.067747116088867, "learning_rate": 9.24525349598163e-06, "loss": 0.652, "step": 16885 }, { "epoch": 0.20189146211695502, "grad_norm": 1.3960641622543335, "learning_rate": 9.245151202672045e-06, "loss": 0.6482, "step": 16886 }, { "epoch": 0.20190341826181565, "grad_norm": 5.363736629486084, "learning_rate": 9.245048902996841e-06, "loss": 0.5728, "step": 16887 }, { "epoch": 0.2019153744066763, "grad_norm": 1.8048921823501587, "learning_rate": 9.244946596956173e-06, "loss": 0.5107, "step": 16888 }, { "epoch": 0.20192733055153697, "grad_norm": 2.176635265350342, "learning_rate": 9.24484428455019e-06, "loss": 0.6739, "step": 16889 }, { "epoch": 0.2019392866963976, "grad_norm": 1.9526985883712769, "learning_rate": 9.244741965779047e-06, "loss": 0.6001, "step": 16890 }, { "epoch": 0.20195124284125826, "grad_norm": 2.5330698490142822, "learning_rate": 9.2446396406429e-06, "loss": 0.5878, "step": 16891 }, { "epoch": 0.20196319898611892, "grad_norm": 2.13924503326416, "learning_rate": 9.244537309141902e-06, "loss": 0.6511, "step": 16892 }, { "epoch": 0.20197515513097958, "grad_norm": 1.7847061157226562, "learning_rate": 9.244434971276205e-06, "loss": 0.5979, "step": 16893 }, { "epoch": 0.2019871112758402, "grad_norm": 8.55433177947998, "learning_rate": 9.24433262704596e-06, "loss": 0.5397, "step": 16894 }, { "epoch": 0.20199906742070087, "grad_norm": 1.6220393180847168, "learning_rate": 9.244230276451324e-06, "loss": 0.541, "step": 16895 }, { "epoch": 0.20201102356556153, "grad_norm": 2.6647350788116455, "learning_rate": 9.244127919492451e-06, "loss": 0.5769, "step": 16896 }, { "epoch": 0.20202297971042218, "grad_norm": 2.280456304550171, "learning_rate": 9.244025556169492e-06, "loss": 0.5864, "step": 16897 }, { "epoch": 0.20203493585528282, "grad_norm": 2.28924560546875, "learning_rate": 9.243923186482604e-06, "loss": 0.6728, "step": 16898 }, { "epoch": 0.20204689200014347, "grad_norm": 3.3115663528442383, "learning_rate": 9.243820810431935e-06, "loss": 0.6067, "step": 16899 }, { "epoch": 0.20205884814500413, "grad_norm": 2.3398637771606445, "learning_rate": 9.243718428017642e-06, "loss": 0.6373, "step": 16900 }, { "epoch": 0.20207080428986476, "grad_norm": 3.0031259059906006, "learning_rate": 9.24361603923988e-06, "loss": 0.5955, "step": 16901 }, { "epoch": 0.20208276043472542, "grad_norm": 2.6502671241760254, "learning_rate": 9.2435136440988e-06, "loss": 0.6053, "step": 16902 }, { "epoch": 0.20209471657958608, "grad_norm": 2.544766664505005, "learning_rate": 9.243411242594555e-06, "loss": 0.6006, "step": 16903 }, { "epoch": 0.20210667272444674, "grad_norm": 2.484135150909424, "learning_rate": 9.2433088347273e-06, "loss": 0.5972, "step": 16904 }, { "epoch": 0.20211862886930737, "grad_norm": 1.668092966079712, "learning_rate": 9.243206420497189e-06, "loss": 0.6686, "step": 16905 }, { "epoch": 0.20213058501416803, "grad_norm": 1.5649746656417847, "learning_rate": 9.243103999904374e-06, "loss": 0.6112, "step": 16906 }, { "epoch": 0.2021425411590287, "grad_norm": 2.63738751411438, "learning_rate": 9.24300157294901e-06, "loss": 0.596, "step": 16907 }, { "epoch": 0.20215449730388935, "grad_norm": 1.8770146369934082, "learning_rate": 9.242899139631252e-06, "loss": 0.6363, "step": 16908 }, { "epoch": 0.20216645344874998, "grad_norm": 3.14616060256958, "learning_rate": 9.24279669995125e-06, "loss": 0.5817, "step": 16909 }, { "epoch": 0.20217840959361064, "grad_norm": 1.6946367025375366, "learning_rate": 9.242694253909158e-06, "loss": 0.6334, "step": 16910 }, { "epoch": 0.2021903657384713, "grad_norm": 5.103203296661377, "learning_rate": 9.242591801505134e-06, "loss": 0.6418, "step": 16911 }, { "epoch": 0.20220232188333193, "grad_norm": 1.356410264968872, "learning_rate": 9.242489342739324e-06, "loss": 0.6544, "step": 16912 }, { "epoch": 0.20221427802819258, "grad_norm": 3.5291125774383545, "learning_rate": 9.24238687761189e-06, "loss": 0.589, "step": 16913 }, { "epoch": 0.20222623417305324, "grad_norm": 2.3288395404815674, "learning_rate": 9.242284406122982e-06, "loss": 0.5992, "step": 16914 }, { "epoch": 0.2022381903179139, "grad_norm": 1.996638298034668, "learning_rate": 9.24218192827275e-06, "loss": 0.558, "step": 16915 }, { "epoch": 0.20225014646277453, "grad_norm": 1.4209953546524048, "learning_rate": 9.242079444061354e-06, "loss": 0.6526, "step": 16916 }, { "epoch": 0.2022621026076352, "grad_norm": 1.762418270111084, "learning_rate": 9.241976953488944e-06, "loss": 0.651, "step": 16917 }, { "epoch": 0.20227405875249585, "grad_norm": 2.975665807723999, "learning_rate": 9.241874456555674e-06, "loss": 0.6671, "step": 16918 }, { "epoch": 0.2022860148973565, "grad_norm": 1.5898685455322266, "learning_rate": 9.241771953261698e-06, "loss": 0.5386, "step": 16919 }, { "epoch": 0.20229797104221714, "grad_norm": 2.176367998123169, "learning_rate": 9.24166944360717e-06, "loss": 0.6253, "step": 16920 }, { "epoch": 0.2023099271870778, "grad_norm": 2.6582510471343994, "learning_rate": 9.241566927592245e-06, "loss": 0.602, "step": 16921 }, { "epoch": 0.20232188333193846, "grad_norm": 2.4089908599853516, "learning_rate": 9.241464405217075e-06, "loss": 0.5391, "step": 16922 }, { "epoch": 0.2023338394767991, "grad_norm": 1.8237433433532715, "learning_rate": 9.241361876481812e-06, "loss": 0.546, "step": 16923 }, { "epoch": 0.20234579562165975, "grad_norm": 2.0998198986053467, "learning_rate": 9.241259341386614e-06, "loss": 0.6334, "step": 16924 }, { "epoch": 0.2023577517665204, "grad_norm": 2.618896722793579, "learning_rate": 9.241156799931632e-06, "loss": 0.638, "step": 16925 }, { "epoch": 0.20236970791138106, "grad_norm": 1.5267634391784668, "learning_rate": 9.24105425211702e-06, "loss": 0.6022, "step": 16926 }, { "epoch": 0.2023816640562417, "grad_norm": 1.925457239151001, "learning_rate": 9.240951697942932e-06, "loss": 0.5156, "step": 16927 }, { "epoch": 0.20239362020110235, "grad_norm": 2.2590816020965576, "learning_rate": 9.240849137409522e-06, "loss": 0.6641, "step": 16928 }, { "epoch": 0.202405576345963, "grad_norm": 2.1931538581848145, "learning_rate": 9.240746570516943e-06, "loss": 0.7138, "step": 16929 }, { "epoch": 0.20241753249082367, "grad_norm": 2.4052162170410156, "learning_rate": 9.24064399726535e-06, "loss": 0.5876, "step": 16930 }, { "epoch": 0.2024294886356843, "grad_norm": 2.782938241958618, "learning_rate": 9.240541417654897e-06, "loss": 0.5487, "step": 16931 }, { "epoch": 0.20244144478054496, "grad_norm": 1.9165400266647339, "learning_rate": 9.240438831685736e-06, "loss": 0.5992, "step": 16932 }, { "epoch": 0.20245340092540562, "grad_norm": 1.8515743017196655, "learning_rate": 9.240336239358023e-06, "loss": 0.6819, "step": 16933 }, { "epoch": 0.20246535707026625, "grad_norm": 2.1540753841400146, "learning_rate": 9.24023364067191e-06, "loss": 0.6196, "step": 16934 }, { "epoch": 0.2024773132151269, "grad_norm": 1.9538395404815674, "learning_rate": 9.24013103562755e-06, "loss": 0.5573, "step": 16935 }, { "epoch": 0.20248926935998757, "grad_norm": 2.7653098106384277, "learning_rate": 9.240028424225101e-06, "loss": 0.6056, "step": 16936 }, { "epoch": 0.20250122550484823, "grad_norm": 1.8957598209381104, "learning_rate": 9.239925806464714e-06, "loss": 0.6499, "step": 16937 }, { "epoch": 0.20251318164970886, "grad_norm": 2.716754674911499, "learning_rate": 9.239823182346541e-06, "loss": 0.5787, "step": 16938 }, { "epoch": 0.20252513779456952, "grad_norm": 28.588743209838867, "learning_rate": 9.239720551870741e-06, "loss": 0.6061, "step": 16939 }, { "epoch": 0.20253709393943017, "grad_norm": 1.7043181657791138, "learning_rate": 9.239617915037464e-06, "loss": 0.6535, "step": 16940 }, { "epoch": 0.20254905008429083, "grad_norm": 2.269451141357422, "learning_rate": 9.239515271846864e-06, "loss": 0.6571, "step": 16941 }, { "epoch": 0.20256100622915146, "grad_norm": 2.3085227012634277, "learning_rate": 9.239412622299095e-06, "loss": 0.5639, "step": 16942 }, { "epoch": 0.20257296237401212, "grad_norm": 3.743304491043091, "learning_rate": 9.239309966394314e-06, "loss": 0.6211, "step": 16943 }, { "epoch": 0.20258491851887278, "grad_norm": 1.9963535070419312, "learning_rate": 9.23920730413267e-06, "loss": 0.557, "step": 16944 }, { "epoch": 0.20259687466373344, "grad_norm": 3.2605621814727783, "learning_rate": 9.239104635514323e-06, "loss": 0.6834, "step": 16945 }, { "epoch": 0.20260883080859407, "grad_norm": 4.218748092651367, "learning_rate": 9.239001960539421e-06, "loss": 0.555, "step": 16946 }, { "epoch": 0.20262078695345473, "grad_norm": 1.81154465675354, "learning_rate": 9.23889927920812e-06, "loss": 0.6435, "step": 16947 }, { "epoch": 0.2026327430983154, "grad_norm": 4.203983306884766, "learning_rate": 9.238796591520578e-06, "loss": 0.6639, "step": 16948 }, { "epoch": 0.20264469924317602, "grad_norm": 1.6613168716430664, "learning_rate": 9.238693897476942e-06, "loss": 0.6578, "step": 16949 }, { "epoch": 0.20265665538803668, "grad_norm": 2.6072325706481934, "learning_rate": 9.23859119707737e-06, "loss": 0.6559, "step": 16950 }, { "epoch": 0.20266861153289734, "grad_norm": 1.7333998680114746, "learning_rate": 9.238488490322015e-06, "loss": 0.606, "step": 16951 }, { "epoch": 0.202680567677758, "grad_norm": 2.4966259002685547, "learning_rate": 9.238385777211034e-06, "loss": 0.7594, "step": 16952 }, { "epoch": 0.20269252382261863, "grad_norm": 7.019386291503906, "learning_rate": 9.238283057744577e-06, "loss": 0.5801, "step": 16953 }, { "epoch": 0.20270447996747928, "grad_norm": 2.432615041732788, "learning_rate": 9.238180331922799e-06, "loss": 0.5874, "step": 16954 }, { "epoch": 0.20271643611233994, "grad_norm": 1.9849287271499634, "learning_rate": 9.238077599745853e-06, "loss": 0.6595, "step": 16955 }, { "epoch": 0.2027283922572006, "grad_norm": 1.8175266981124878, "learning_rate": 9.237974861213897e-06, "loss": 0.5263, "step": 16956 }, { "epoch": 0.20274034840206123, "grad_norm": 1.7253282070159912, "learning_rate": 9.237872116327082e-06, "loss": 0.5726, "step": 16957 }, { "epoch": 0.2027523045469219, "grad_norm": 2.7950327396392822, "learning_rate": 9.237769365085563e-06, "loss": 0.6386, "step": 16958 }, { "epoch": 0.20276426069178255, "grad_norm": 2.635114908218384, "learning_rate": 9.237666607489495e-06, "loss": 0.6299, "step": 16959 }, { "epoch": 0.20277621683664318, "grad_norm": 2.3776607513427734, "learning_rate": 9.237563843539028e-06, "loss": 0.5518, "step": 16960 }, { "epoch": 0.20278817298150384, "grad_norm": 2.194730043411255, "learning_rate": 9.23746107323432e-06, "loss": 0.6399, "step": 16961 }, { "epoch": 0.2028001291263645, "grad_norm": 2.192770481109619, "learning_rate": 9.237358296575524e-06, "loss": 0.5808, "step": 16962 }, { "epoch": 0.20281208527122516, "grad_norm": 2.3342795372009277, "learning_rate": 9.237255513562795e-06, "loss": 0.7434, "step": 16963 }, { "epoch": 0.2028240414160858, "grad_norm": 1.993503212928772, "learning_rate": 9.237152724196286e-06, "loss": 0.629, "step": 16964 }, { "epoch": 0.20283599756094645, "grad_norm": 6.950597286224365, "learning_rate": 9.237049928476152e-06, "loss": 0.5607, "step": 16965 }, { "epoch": 0.2028479537058071, "grad_norm": 1.9102963209152222, "learning_rate": 9.236947126402545e-06, "loss": 0.742, "step": 16966 }, { "epoch": 0.20285990985066776, "grad_norm": 2.56675386428833, "learning_rate": 9.236844317975621e-06, "loss": 0.6207, "step": 16967 }, { "epoch": 0.2028718659955284, "grad_norm": 2.4224703311920166, "learning_rate": 9.236741503195536e-06, "loss": 0.6201, "step": 16968 }, { "epoch": 0.20288382214038905, "grad_norm": 1.597743272781372, "learning_rate": 9.23663868206244e-06, "loss": 0.6211, "step": 16969 }, { "epoch": 0.2028957782852497, "grad_norm": 1.5542881488800049, "learning_rate": 9.23653585457649e-06, "loss": 0.6362, "step": 16970 }, { "epoch": 0.20290773443011034, "grad_norm": 5.53652286529541, "learning_rate": 9.236433020737839e-06, "loss": 0.5825, "step": 16971 }, { "epoch": 0.202919690574971, "grad_norm": 1.8514339923858643, "learning_rate": 9.236330180546642e-06, "loss": 0.6342, "step": 16972 }, { "epoch": 0.20293164671983166, "grad_norm": 2.8035292625427246, "learning_rate": 9.236227334003054e-06, "loss": 0.6758, "step": 16973 }, { "epoch": 0.20294360286469232, "grad_norm": 1.9204329252243042, "learning_rate": 9.236124481107225e-06, "loss": 0.5747, "step": 16974 }, { "epoch": 0.20295555900955295, "grad_norm": 2.1329798698425293, "learning_rate": 9.236021621859315e-06, "loss": 0.6717, "step": 16975 }, { "epoch": 0.2029675151544136, "grad_norm": 1.8463324308395386, "learning_rate": 9.235918756259474e-06, "loss": 0.5621, "step": 16976 }, { "epoch": 0.20297947129927427, "grad_norm": 1.650514006614685, "learning_rate": 9.235815884307859e-06, "loss": 0.7095, "step": 16977 }, { "epoch": 0.20299142744413493, "grad_norm": 2.3401358127593994, "learning_rate": 9.235713006004621e-06, "loss": 0.651, "step": 16978 }, { "epoch": 0.20300338358899556, "grad_norm": 3.1214613914489746, "learning_rate": 9.235610121349919e-06, "loss": 0.6926, "step": 16979 }, { "epoch": 0.20301533973385621, "grad_norm": 2.685983657836914, "learning_rate": 9.235507230343902e-06, "loss": 0.6333, "step": 16980 }, { "epoch": 0.20302729587871687, "grad_norm": 3.0954737663269043, "learning_rate": 9.235404332986728e-06, "loss": 0.6048, "step": 16981 }, { "epoch": 0.2030392520235775, "grad_norm": 3.3518617153167725, "learning_rate": 9.23530142927855e-06, "loss": 0.6588, "step": 16982 }, { "epoch": 0.20305120816843816, "grad_norm": 2.1359620094299316, "learning_rate": 9.235198519219523e-06, "loss": 0.6075, "step": 16983 }, { "epoch": 0.20306316431329882, "grad_norm": 1.7817350625991821, "learning_rate": 9.2350956028098e-06, "loss": 0.6564, "step": 16984 }, { "epoch": 0.20307512045815948, "grad_norm": 2.496293544769287, "learning_rate": 9.234992680049536e-06, "loss": 0.661, "step": 16985 }, { "epoch": 0.2030870766030201, "grad_norm": 3.4482245445251465, "learning_rate": 9.234889750938887e-06, "loss": 0.6601, "step": 16986 }, { "epoch": 0.20309903274788077, "grad_norm": 1.5196971893310547, "learning_rate": 9.234786815478004e-06, "loss": 0.5996, "step": 16987 }, { "epoch": 0.20311098889274143, "grad_norm": 3.395315170288086, "learning_rate": 9.234683873667044e-06, "loss": 0.5732, "step": 16988 }, { "epoch": 0.2031229450376021, "grad_norm": 2.037147283554077, "learning_rate": 9.23458092550616e-06, "loss": 0.6842, "step": 16989 }, { "epoch": 0.20313490118246272, "grad_norm": 2.1732661724090576, "learning_rate": 9.234477970995507e-06, "loss": 0.5528, "step": 16990 }, { "epoch": 0.20314685732732338, "grad_norm": 1.7630640268325806, "learning_rate": 9.23437501013524e-06, "loss": 0.7028, "step": 16991 }, { "epoch": 0.20315881347218404, "grad_norm": 2.044067621231079, "learning_rate": 9.234272042925511e-06, "loss": 0.5394, "step": 16992 }, { "epoch": 0.20317076961704467, "grad_norm": 1.5289466381072998, "learning_rate": 9.234169069366478e-06, "loss": 0.6417, "step": 16993 }, { "epoch": 0.20318272576190533, "grad_norm": 3.1395952701568604, "learning_rate": 9.234066089458291e-06, "loss": 0.5337, "step": 16994 }, { "epoch": 0.20319468190676598, "grad_norm": 1.673742413520813, "learning_rate": 9.233963103201109e-06, "loss": 0.5369, "step": 16995 }, { "epoch": 0.20320663805162664, "grad_norm": 6.874505519866943, "learning_rate": 9.233860110595083e-06, "loss": 0.5218, "step": 16996 }, { "epoch": 0.20321859419648727, "grad_norm": 4.486498832702637, "learning_rate": 9.233757111640369e-06, "loss": 0.5838, "step": 16997 }, { "epoch": 0.20323055034134793, "grad_norm": 3.0128774642944336, "learning_rate": 9.233654106337123e-06, "loss": 0.6087, "step": 16998 }, { "epoch": 0.2032425064862086, "grad_norm": 3.7016584873199463, "learning_rate": 9.233551094685495e-06, "loss": 0.6909, "step": 16999 }, { "epoch": 0.20325446263106925, "grad_norm": 4.072598457336426, "learning_rate": 9.233448076685643e-06, "loss": 0.6504, "step": 17000 }, { "epoch": 0.20326641877592988, "grad_norm": 1.886199712753296, "learning_rate": 9.23334505233772e-06, "loss": 0.6413, "step": 17001 }, { "epoch": 0.20327837492079054, "grad_norm": 3.0802619457244873, "learning_rate": 9.233242021641883e-06, "loss": 0.6361, "step": 17002 }, { "epoch": 0.2032903310656512, "grad_norm": 1.8639400005340576, "learning_rate": 9.233138984598283e-06, "loss": 0.6237, "step": 17003 }, { "epoch": 0.20330228721051186, "grad_norm": 2.6543102264404297, "learning_rate": 9.233035941207075e-06, "loss": 0.5173, "step": 17004 }, { "epoch": 0.2033142433553725, "grad_norm": 3.0752878189086914, "learning_rate": 9.232932891468418e-06, "loss": 0.6688, "step": 17005 }, { "epoch": 0.20332619950023315, "grad_norm": 4.087076187133789, "learning_rate": 9.23282983538246e-06, "loss": 0.5582, "step": 17006 }, { "epoch": 0.2033381556450938, "grad_norm": 2.3695359230041504, "learning_rate": 9.23272677294936e-06, "loss": 0.6645, "step": 17007 }, { "epoch": 0.20335011178995444, "grad_norm": 1.7658085823059082, "learning_rate": 9.23262370416927e-06, "loss": 0.6379, "step": 17008 }, { "epoch": 0.2033620679348151, "grad_norm": 2.7283427715301514, "learning_rate": 9.232520629042347e-06, "loss": 0.7079, "step": 17009 }, { "epoch": 0.20337402407967575, "grad_norm": 1.6850472688674927, "learning_rate": 9.232417547568745e-06, "loss": 0.5758, "step": 17010 }, { "epoch": 0.2033859802245364, "grad_norm": 1.5493388175964355, "learning_rate": 9.232314459748614e-06, "loss": 0.6594, "step": 17011 }, { "epoch": 0.20339793636939704, "grad_norm": 1.5143460035324097, "learning_rate": 9.232211365582115e-06, "loss": 0.5817, "step": 17012 }, { "epoch": 0.2034098925142577, "grad_norm": 3.1690194606781006, "learning_rate": 9.232108265069402e-06, "loss": 0.5024, "step": 17013 }, { "epoch": 0.20342184865911836, "grad_norm": 2.151071310043335, "learning_rate": 9.232005158210624e-06, "loss": 0.6722, "step": 17014 }, { "epoch": 0.20343380480397902, "grad_norm": 2.1051273345947266, "learning_rate": 9.231902045005941e-06, "loss": 0.6726, "step": 17015 }, { "epoch": 0.20344576094883965, "grad_norm": 2.6006417274475098, "learning_rate": 9.231798925455506e-06, "loss": 0.6724, "step": 17016 }, { "epoch": 0.2034577170937003, "grad_norm": 2.2876973152160645, "learning_rate": 9.231695799559474e-06, "loss": 0.5814, "step": 17017 }, { "epoch": 0.20346967323856097, "grad_norm": 2.17393159866333, "learning_rate": 9.231592667317999e-06, "loss": 0.6511, "step": 17018 }, { "epoch": 0.2034816293834216, "grad_norm": 2.0322086811065674, "learning_rate": 9.231489528731233e-06, "loss": 0.6754, "step": 17019 }, { "epoch": 0.20349358552828226, "grad_norm": 1.9405933618545532, "learning_rate": 9.231386383799337e-06, "loss": 0.6498, "step": 17020 }, { "epoch": 0.20350554167314291, "grad_norm": 3.084282875061035, "learning_rate": 9.23128323252246e-06, "loss": 0.6235, "step": 17021 }, { "epoch": 0.20351749781800357, "grad_norm": 2.393467903137207, "learning_rate": 9.231180074900758e-06, "loss": 0.6014, "step": 17022 }, { "epoch": 0.2035294539628642, "grad_norm": 1.7187579870224, "learning_rate": 9.231076910934388e-06, "loss": 0.6585, "step": 17023 }, { "epoch": 0.20354141010772486, "grad_norm": 1.787834882736206, "learning_rate": 9.230973740623503e-06, "loss": 0.548, "step": 17024 }, { "epoch": 0.20355336625258552, "grad_norm": 9.413414001464844, "learning_rate": 9.230870563968258e-06, "loss": 0.5994, "step": 17025 }, { "epoch": 0.20356532239744618, "grad_norm": 3.723790168762207, "learning_rate": 9.230767380968807e-06, "loss": 0.7399, "step": 17026 }, { "epoch": 0.2035772785423068, "grad_norm": 3.517174243927002, "learning_rate": 9.230664191625307e-06, "loss": 0.6741, "step": 17027 }, { "epoch": 0.20358923468716747, "grad_norm": 2.549072265625, "learning_rate": 9.230560995937909e-06, "loss": 0.597, "step": 17028 }, { "epoch": 0.20360119083202813, "grad_norm": 1.4669983386993408, "learning_rate": 9.230457793906768e-06, "loss": 0.6047, "step": 17029 }, { "epoch": 0.20361314697688876, "grad_norm": 4.551060676574707, "learning_rate": 9.230354585532045e-06, "loss": 0.6316, "step": 17030 }, { "epoch": 0.20362510312174942, "grad_norm": 3.3551838397979736, "learning_rate": 9.230251370813887e-06, "loss": 0.5578, "step": 17031 }, { "epoch": 0.20363705926661008, "grad_norm": 1.8980234861373901, "learning_rate": 9.230148149752453e-06, "loss": 0.617, "step": 17032 }, { "epoch": 0.20364901541147074, "grad_norm": 4.397526741027832, "learning_rate": 9.230044922347897e-06, "loss": 0.7051, "step": 17033 }, { "epoch": 0.20366097155633137, "grad_norm": 2.5449202060699463, "learning_rate": 9.229941688600373e-06, "loss": 0.6121, "step": 17034 }, { "epoch": 0.20367292770119202, "grad_norm": 2.4905905723571777, "learning_rate": 9.229838448510037e-06, "loss": 0.6456, "step": 17035 }, { "epoch": 0.20368488384605268, "grad_norm": 1.7371081113815308, "learning_rate": 9.229735202077043e-06, "loss": 0.591, "step": 17036 }, { "epoch": 0.20369683999091334, "grad_norm": 1.5171607732772827, "learning_rate": 9.229631949301546e-06, "loss": 0.6634, "step": 17037 }, { "epoch": 0.20370879613577397, "grad_norm": 3.573776960372925, "learning_rate": 9.229528690183701e-06, "loss": 0.5889, "step": 17038 }, { "epoch": 0.20372075228063463, "grad_norm": 1.9059818983078003, "learning_rate": 9.229425424723662e-06, "loss": 0.7231, "step": 17039 }, { "epoch": 0.2037327084254953, "grad_norm": 3.8597347736358643, "learning_rate": 9.229322152921585e-06, "loss": 0.5951, "step": 17040 }, { "epoch": 0.20374466457035592, "grad_norm": 2.9597158432006836, "learning_rate": 9.229218874777626e-06, "loss": 0.5977, "step": 17041 }, { "epoch": 0.20375662071521658, "grad_norm": 7.49216365814209, "learning_rate": 9.229115590291935e-06, "loss": 0.6948, "step": 17042 }, { "epoch": 0.20376857686007724, "grad_norm": 3.058196544647217, "learning_rate": 9.229012299464674e-06, "loss": 0.5973, "step": 17043 }, { "epoch": 0.2037805330049379, "grad_norm": 1.9948759078979492, "learning_rate": 9.228909002295992e-06, "loss": 0.695, "step": 17044 }, { "epoch": 0.20379248914979853, "grad_norm": 2.0720436573028564, "learning_rate": 9.228805698786045e-06, "loss": 0.6503, "step": 17045 }, { "epoch": 0.2038044452946592, "grad_norm": 1.6252665519714355, "learning_rate": 9.228702388934989e-06, "loss": 0.6273, "step": 17046 }, { "epoch": 0.20381640143951985, "grad_norm": 1.9418721199035645, "learning_rate": 9.22859907274298e-06, "loss": 0.5608, "step": 17047 }, { "epoch": 0.2038283575843805, "grad_norm": 1.4104418754577637, "learning_rate": 9.22849575021017e-06, "loss": 0.5213, "step": 17048 }, { "epoch": 0.20384031372924113, "grad_norm": 2.3706881999969482, "learning_rate": 9.228392421336718e-06, "loss": 0.5733, "step": 17049 }, { "epoch": 0.2038522698741018, "grad_norm": 4.381110668182373, "learning_rate": 9.228289086122776e-06, "loss": 0.6607, "step": 17050 }, { "epoch": 0.20386422601896245, "grad_norm": 2.6294450759887695, "learning_rate": 9.228185744568498e-06, "loss": 0.7042, "step": 17051 }, { "epoch": 0.20387618216382308, "grad_norm": 1.687294840812683, "learning_rate": 9.22808239667404e-06, "loss": 0.5437, "step": 17052 }, { "epoch": 0.20388813830868374, "grad_norm": 2.8956217765808105, "learning_rate": 9.22797904243956e-06, "loss": 0.6578, "step": 17053 }, { "epoch": 0.2039000944535444, "grad_norm": 2.6449403762817383, "learning_rate": 9.227875681865209e-06, "loss": 0.6273, "step": 17054 }, { "epoch": 0.20391205059840506, "grad_norm": 1.4101266860961914, "learning_rate": 9.227772314951143e-06, "loss": 0.6148, "step": 17055 }, { "epoch": 0.2039240067432657, "grad_norm": 2.4592316150665283, "learning_rate": 9.227668941697518e-06, "loss": 0.6236, "step": 17056 }, { "epoch": 0.20393596288812635, "grad_norm": 2.0700817108154297, "learning_rate": 9.227565562104488e-06, "loss": 0.5434, "step": 17057 }, { "epoch": 0.203947919032987, "grad_norm": 1.7990808486938477, "learning_rate": 9.227462176172208e-06, "loss": 0.5826, "step": 17058 }, { "epoch": 0.20395987517784767, "grad_norm": 1.5931010246276855, "learning_rate": 9.227358783900835e-06, "loss": 0.6413, "step": 17059 }, { "epoch": 0.2039718313227083, "grad_norm": 1.54194176197052, "learning_rate": 9.227255385290521e-06, "loss": 0.651, "step": 17060 }, { "epoch": 0.20398378746756896, "grad_norm": 3.6435749530792236, "learning_rate": 9.227151980341423e-06, "loss": 0.6156, "step": 17061 }, { "epoch": 0.20399574361242961, "grad_norm": 1.8471328020095825, "learning_rate": 9.227048569053696e-06, "loss": 0.6684, "step": 17062 }, { "epoch": 0.20400769975729027, "grad_norm": 1.8418010473251343, "learning_rate": 9.226945151427495e-06, "loss": 0.6525, "step": 17063 }, { "epoch": 0.2040196559021509, "grad_norm": 1.4784328937530518, "learning_rate": 9.226841727462974e-06, "loss": 0.6102, "step": 17064 }, { "epoch": 0.20403161204701156, "grad_norm": 1.9389368295669556, "learning_rate": 9.226738297160288e-06, "loss": 0.5991, "step": 17065 }, { "epoch": 0.20404356819187222, "grad_norm": 2.6168484687805176, "learning_rate": 9.226634860519594e-06, "loss": 0.5468, "step": 17066 }, { "epoch": 0.20405552433673285, "grad_norm": 2.533539295196533, "learning_rate": 9.226531417541044e-06, "loss": 0.7209, "step": 17067 }, { "epoch": 0.2040674804815935, "grad_norm": 3.409485340118408, "learning_rate": 9.226427968224798e-06, "loss": 0.6464, "step": 17068 }, { "epoch": 0.20407943662645417, "grad_norm": 1.464885950088501, "learning_rate": 9.226324512571008e-06, "loss": 0.7522, "step": 17069 }, { "epoch": 0.20409139277131483, "grad_norm": 1.6283726692199707, "learning_rate": 9.226221050579828e-06, "loss": 0.5305, "step": 17070 }, { "epoch": 0.20410334891617546, "grad_norm": 2.0853538513183594, "learning_rate": 9.226117582251415e-06, "loss": 0.5436, "step": 17071 }, { "epoch": 0.20411530506103612, "grad_norm": 3.220395565032959, "learning_rate": 9.226014107585923e-06, "loss": 0.615, "step": 17072 }, { "epoch": 0.20412726120589678, "grad_norm": 2.1011242866516113, "learning_rate": 9.225910626583509e-06, "loss": 0.5844, "step": 17073 }, { "epoch": 0.20413921735075743, "grad_norm": 1.2474769353866577, "learning_rate": 9.225807139244325e-06, "loss": 0.6641, "step": 17074 }, { "epoch": 0.20415117349561807, "grad_norm": 1.8287053108215332, "learning_rate": 9.225703645568531e-06, "loss": 0.5221, "step": 17075 }, { "epoch": 0.20416312964047872, "grad_norm": 1.8389880657196045, "learning_rate": 9.225600145556276e-06, "loss": 0.6592, "step": 17076 }, { "epoch": 0.20417508578533938, "grad_norm": 1.3142207860946655, "learning_rate": 9.225496639207722e-06, "loss": 0.5306, "step": 17077 }, { "epoch": 0.20418704193020001, "grad_norm": 2.4496989250183105, "learning_rate": 9.22539312652302e-06, "loss": 0.6008, "step": 17078 }, { "epoch": 0.20419899807506067, "grad_norm": 1.759675145149231, "learning_rate": 9.225289607502324e-06, "loss": 0.6173, "step": 17079 }, { "epoch": 0.20421095421992133, "grad_norm": 2.236398696899414, "learning_rate": 9.225186082145793e-06, "loss": 0.7322, "step": 17080 }, { "epoch": 0.204222910364782, "grad_norm": 7.744231224060059, "learning_rate": 9.22508255045358e-06, "loss": 0.6987, "step": 17081 }, { "epoch": 0.20423486650964262, "grad_norm": 1.3751577138900757, "learning_rate": 9.224979012425841e-06, "loss": 0.5695, "step": 17082 }, { "epoch": 0.20424682265450328, "grad_norm": 2.142873525619507, "learning_rate": 9.224875468062731e-06, "loss": 0.6071, "step": 17083 }, { "epoch": 0.20425877879936394, "grad_norm": 1.6329383850097656, "learning_rate": 9.224771917364403e-06, "loss": 0.6242, "step": 17084 }, { "epoch": 0.2042707349442246, "grad_norm": 1.887028455734253, "learning_rate": 9.224668360331017e-06, "loss": 0.5581, "step": 17085 }, { "epoch": 0.20428269108908523, "grad_norm": 1.8056256771087646, "learning_rate": 9.224564796962725e-06, "loss": 0.5963, "step": 17086 }, { "epoch": 0.2042946472339459, "grad_norm": 2.192906379699707, "learning_rate": 9.224461227259683e-06, "loss": 0.6503, "step": 17087 }, { "epoch": 0.20430660337880654, "grad_norm": 2.485196828842163, "learning_rate": 9.224357651222047e-06, "loss": 0.6184, "step": 17088 }, { "epoch": 0.20431855952366718, "grad_norm": 2.3216567039489746, "learning_rate": 9.224254068849972e-06, "loss": 0.6968, "step": 17089 }, { "epoch": 0.20433051566852783, "grad_norm": 1.808617353439331, "learning_rate": 9.224150480143612e-06, "loss": 0.5574, "step": 17090 }, { "epoch": 0.2043424718133885, "grad_norm": 2.044017791748047, "learning_rate": 9.224046885103124e-06, "loss": 0.6508, "step": 17091 }, { "epoch": 0.20435442795824915, "grad_norm": 1.7640841007232666, "learning_rate": 9.223943283728662e-06, "loss": 0.649, "step": 17092 }, { "epoch": 0.20436638410310978, "grad_norm": 4.500053882598877, "learning_rate": 9.223839676020381e-06, "loss": 0.6145, "step": 17093 }, { "epoch": 0.20437834024797044, "grad_norm": 2.874316453933716, "learning_rate": 9.223736061978439e-06, "loss": 0.5938, "step": 17094 }, { "epoch": 0.2043902963928311, "grad_norm": 13.223615646362305, "learning_rate": 9.22363244160299e-06, "loss": 0.6745, "step": 17095 }, { "epoch": 0.20440225253769176, "grad_norm": 1.7706893682479858, "learning_rate": 9.223528814894189e-06, "loss": 0.5828, "step": 17096 }, { "epoch": 0.2044142086825524, "grad_norm": 1.9345622062683105, "learning_rate": 9.22342518185219e-06, "loss": 0.5514, "step": 17097 }, { "epoch": 0.20442616482741305, "grad_norm": 2.0632641315460205, "learning_rate": 9.223321542477151e-06, "loss": 0.5417, "step": 17098 }, { "epoch": 0.2044381209722737, "grad_norm": 1.402815580368042, "learning_rate": 9.223217896769226e-06, "loss": 0.5672, "step": 17099 }, { "epoch": 0.20445007711713434, "grad_norm": 16.150936126708984, "learning_rate": 9.223114244728572e-06, "loss": 0.5743, "step": 17100 }, { "epoch": 0.204462033261995, "grad_norm": 5.489880084991455, "learning_rate": 9.223010586355343e-06, "loss": 0.5637, "step": 17101 }, { "epoch": 0.20447398940685566, "grad_norm": 1.456839680671692, "learning_rate": 9.222906921649693e-06, "loss": 0.6792, "step": 17102 }, { "epoch": 0.2044859455517163, "grad_norm": 2.1430721282958984, "learning_rate": 9.22280325061178e-06, "loss": 0.5776, "step": 17103 }, { "epoch": 0.20449790169657694, "grad_norm": 2.2969746589660645, "learning_rate": 9.222699573241757e-06, "loss": 0.6081, "step": 17104 }, { "epoch": 0.2045098578414376, "grad_norm": 2.1115670204162598, "learning_rate": 9.222595889539783e-06, "loss": 0.5741, "step": 17105 }, { "epoch": 0.20452181398629826, "grad_norm": 1.6362370252609253, "learning_rate": 9.222492199506012e-06, "loss": 0.5875, "step": 17106 }, { "epoch": 0.20453377013115892, "grad_norm": 2.2459139823913574, "learning_rate": 9.222388503140598e-06, "loss": 0.6928, "step": 17107 }, { "epoch": 0.20454572627601955, "grad_norm": 1.8245999813079834, "learning_rate": 9.222284800443697e-06, "loss": 0.6192, "step": 17108 }, { "epoch": 0.2045576824208802, "grad_norm": 4.337442398071289, "learning_rate": 9.222181091415466e-06, "loss": 0.5372, "step": 17109 }, { "epoch": 0.20456963856574087, "grad_norm": 3.4812350273132324, "learning_rate": 9.222077376056059e-06, "loss": 0.6041, "step": 17110 }, { "epoch": 0.2045815947106015, "grad_norm": 1.6962405443191528, "learning_rate": 9.22197365436563e-06, "loss": 0.6053, "step": 17111 }, { "epoch": 0.20459355085546216, "grad_norm": 1.440981149673462, "learning_rate": 9.221869926344338e-06, "loss": 0.5355, "step": 17112 }, { "epoch": 0.20460550700032282, "grad_norm": 2.361490488052368, "learning_rate": 9.221766191992337e-06, "loss": 0.7204, "step": 17113 }, { "epoch": 0.20461746314518348, "grad_norm": 2.3121535778045654, "learning_rate": 9.221662451309782e-06, "loss": 0.5673, "step": 17114 }, { "epoch": 0.2046294192900441, "grad_norm": 1.8347108364105225, "learning_rate": 9.221558704296832e-06, "loss": 0.6337, "step": 17115 }, { "epoch": 0.20464137543490477, "grad_norm": 1.6524018049240112, "learning_rate": 9.221454950953636e-06, "loss": 0.6097, "step": 17116 }, { "epoch": 0.20465333157976542, "grad_norm": 2.092395067214966, "learning_rate": 9.221351191280355e-06, "loss": 0.5871, "step": 17117 }, { "epoch": 0.20466528772462608, "grad_norm": 3.7511940002441406, "learning_rate": 9.221247425277143e-06, "loss": 0.6658, "step": 17118 }, { "epoch": 0.2046772438694867, "grad_norm": 1.9076814651489258, "learning_rate": 9.221143652944156e-06, "loss": 0.6393, "step": 17119 }, { "epoch": 0.20468920001434737, "grad_norm": 2.3338658809661865, "learning_rate": 9.221039874281547e-06, "loss": 0.6225, "step": 17120 }, { "epoch": 0.20470115615920803, "grad_norm": 1.5776702165603638, "learning_rate": 9.220936089289477e-06, "loss": 0.6581, "step": 17121 }, { "epoch": 0.2047131123040687, "grad_norm": 3.125795602798462, "learning_rate": 9.220832297968096e-06, "loss": 0.4996, "step": 17122 }, { "epoch": 0.20472506844892932, "grad_norm": 10.341301918029785, "learning_rate": 9.220728500317562e-06, "loss": 0.4954, "step": 17123 }, { "epoch": 0.20473702459378998, "grad_norm": 2.230269432067871, "learning_rate": 9.22062469633803e-06, "loss": 0.6417, "step": 17124 }, { "epoch": 0.20474898073865064, "grad_norm": 2.5061137676239014, "learning_rate": 9.220520886029659e-06, "loss": 0.6084, "step": 17125 }, { "epoch": 0.20476093688351127, "grad_norm": 2.3419530391693115, "learning_rate": 9.220417069392599e-06, "loss": 0.6174, "step": 17126 }, { "epoch": 0.20477289302837193, "grad_norm": 2.9652342796325684, "learning_rate": 9.22031324642701e-06, "loss": 0.644, "step": 17127 }, { "epoch": 0.20478484917323259, "grad_norm": 1.835728406906128, "learning_rate": 9.220209417133045e-06, "loss": 0.6071, "step": 17128 }, { "epoch": 0.20479680531809324, "grad_norm": 24.476884841918945, "learning_rate": 9.220105581510863e-06, "loss": 0.5888, "step": 17129 }, { "epoch": 0.20480876146295388, "grad_norm": 1.8789963722229004, "learning_rate": 9.220001739560615e-06, "loss": 0.5659, "step": 17130 }, { "epoch": 0.20482071760781453, "grad_norm": 4.7691521644592285, "learning_rate": 9.219897891282461e-06, "loss": 0.5881, "step": 17131 }, { "epoch": 0.2048326737526752, "grad_norm": 2.9194412231445312, "learning_rate": 9.219794036676557e-06, "loss": 0.6522, "step": 17132 }, { "epoch": 0.20484462989753585, "grad_norm": 3.6189308166503906, "learning_rate": 9.219690175743055e-06, "loss": 0.5715, "step": 17133 }, { "epoch": 0.20485658604239648, "grad_norm": 1.7854830026626587, "learning_rate": 9.219586308482112e-06, "loss": 0.6326, "step": 17134 }, { "epoch": 0.20486854218725714, "grad_norm": 1.6740655899047852, "learning_rate": 9.219482434893886e-06, "loss": 0.6185, "step": 17135 }, { "epoch": 0.2048804983321178, "grad_norm": 2.208496332168579, "learning_rate": 9.219378554978528e-06, "loss": 0.6065, "step": 17136 }, { "epoch": 0.20489245447697843, "grad_norm": 4.7035980224609375, "learning_rate": 9.2192746687362e-06, "loss": 0.6805, "step": 17137 }, { "epoch": 0.2049044106218391, "grad_norm": 5.072344779968262, "learning_rate": 9.219170776167055e-06, "loss": 0.7043, "step": 17138 }, { "epoch": 0.20491636676669975, "grad_norm": 3.741877555847168, "learning_rate": 9.219066877271246e-06, "loss": 0.6508, "step": 17139 }, { "epoch": 0.2049283229115604, "grad_norm": 9.987333297729492, "learning_rate": 9.218962972048932e-06, "loss": 0.6315, "step": 17140 }, { "epoch": 0.20494027905642104, "grad_norm": 4.210124492645264, "learning_rate": 9.218859060500269e-06, "loss": 0.5748, "step": 17141 }, { "epoch": 0.2049522352012817, "grad_norm": 2.645737648010254, "learning_rate": 9.218755142625411e-06, "loss": 0.6239, "step": 17142 }, { "epoch": 0.20496419134614235, "grad_norm": 1.3153469562530518, "learning_rate": 9.218651218424514e-06, "loss": 0.6431, "step": 17143 }, { "epoch": 0.204976147491003, "grad_norm": 3.015827178955078, "learning_rate": 9.218547287897736e-06, "loss": 0.578, "step": 17144 }, { "epoch": 0.20498810363586364, "grad_norm": 1.3635390996932983, "learning_rate": 9.21844335104523e-06, "loss": 0.636, "step": 17145 }, { "epoch": 0.2050000597807243, "grad_norm": 1.602736234664917, "learning_rate": 9.218339407867153e-06, "loss": 0.6479, "step": 17146 }, { "epoch": 0.20501201592558496, "grad_norm": 2.5310590267181396, "learning_rate": 9.218235458363663e-06, "loss": 0.6138, "step": 17147 }, { "epoch": 0.2050239720704456, "grad_norm": 3.2722225189208984, "learning_rate": 9.218131502534912e-06, "loss": 0.6374, "step": 17148 }, { "epoch": 0.20503592821530625, "grad_norm": 5.440561771392822, "learning_rate": 9.218027540381058e-06, "loss": 0.6827, "step": 17149 }, { "epoch": 0.2050478843601669, "grad_norm": 2.5823874473571777, "learning_rate": 9.217923571902258e-06, "loss": 0.649, "step": 17150 }, { "epoch": 0.20505984050502757, "grad_norm": 1.5647252798080444, "learning_rate": 9.217819597098667e-06, "loss": 0.6868, "step": 17151 }, { "epoch": 0.2050717966498882, "grad_norm": 1.6449494361877441, "learning_rate": 9.217715615970439e-06, "loss": 0.6319, "step": 17152 }, { "epoch": 0.20508375279474886, "grad_norm": 2.5944430828094482, "learning_rate": 9.21761162851773e-06, "loss": 0.5895, "step": 17153 }, { "epoch": 0.20509570893960952, "grad_norm": 13.922765731811523, "learning_rate": 9.2175076347407e-06, "loss": 0.6054, "step": 17154 }, { "epoch": 0.20510766508447018, "grad_norm": 3.976552724838257, "learning_rate": 9.217403634639503e-06, "loss": 0.6148, "step": 17155 }, { "epoch": 0.2051196212293308, "grad_norm": 5.636290550231934, "learning_rate": 9.21729962821429e-06, "loss": 0.5852, "step": 17156 }, { "epoch": 0.20513157737419146, "grad_norm": 2.365061044692993, "learning_rate": 9.217195615465227e-06, "loss": 0.6452, "step": 17157 }, { "epoch": 0.20514353351905212, "grad_norm": 2.2448039054870605, "learning_rate": 9.21709159639246e-06, "loss": 0.671, "step": 17158 }, { "epoch": 0.20515548966391275, "grad_norm": 2.366680145263672, "learning_rate": 9.216987570996151e-06, "loss": 0.6726, "step": 17159 }, { "epoch": 0.2051674458087734, "grad_norm": 4.232933044433594, "learning_rate": 9.216883539276453e-06, "loss": 0.6665, "step": 17160 }, { "epoch": 0.20517940195363407, "grad_norm": 2.324880599975586, "learning_rate": 9.216779501233524e-06, "loss": 0.6723, "step": 17161 }, { "epoch": 0.20519135809849473, "grad_norm": 2.083594560623169, "learning_rate": 9.21667545686752e-06, "loss": 0.5509, "step": 17162 }, { "epoch": 0.20520331424335536, "grad_norm": 2.528379440307617, "learning_rate": 9.216571406178593e-06, "loss": 0.6956, "step": 17163 }, { "epoch": 0.20521527038821602, "grad_norm": 1.37473726272583, "learning_rate": 9.216467349166905e-06, "loss": 0.6091, "step": 17164 }, { "epoch": 0.20522722653307668, "grad_norm": 6.981767177581787, "learning_rate": 9.216363285832607e-06, "loss": 0.6931, "step": 17165 }, { "epoch": 0.20523918267793734, "grad_norm": 2.097261905670166, "learning_rate": 9.21625921617586e-06, "loss": 0.5987, "step": 17166 }, { "epoch": 0.20525113882279797, "grad_norm": 9.582542419433594, "learning_rate": 9.216155140196816e-06, "loss": 0.5847, "step": 17167 }, { "epoch": 0.20526309496765863, "grad_norm": 2.267239809036255, "learning_rate": 9.216051057895633e-06, "loss": 0.5462, "step": 17168 }, { "epoch": 0.20527505111251929, "grad_norm": 2.2075815200805664, "learning_rate": 9.215946969272465e-06, "loss": 0.645, "step": 17169 }, { "epoch": 0.20528700725737994, "grad_norm": 2.1216909885406494, "learning_rate": 9.215842874327472e-06, "loss": 0.7073, "step": 17170 }, { "epoch": 0.20529896340224058, "grad_norm": 2.5281994342803955, "learning_rate": 9.215738773060806e-06, "loss": 0.5597, "step": 17171 }, { "epoch": 0.20531091954710123, "grad_norm": 1.66246497631073, "learning_rate": 9.215634665472624e-06, "loss": 0.6254, "step": 17172 }, { "epoch": 0.2053228756919619, "grad_norm": 2.16805100440979, "learning_rate": 9.215530551563083e-06, "loss": 0.6495, "step": 17173 }, { "epoch": 0.20533483183682252, "grad_norm": 5.931216716766357, "learning_rate": 9.215426431332339e-06, "loss": 0.5888, "step": 17174 }, { "epoch": 0.20534678798168318, "grad_norm": 1.7674888372421265, "learning_rate": 9.21532230478055e-06, "loss": 0.6268, "step": 17175 }, { "epoch": 0.20535874412654384, "grad_norm": 2.084197759628296, "learning_rate": 9.215218171907868e-06, "loss": 0.565, "step": 17176 }, { "epoch": 0.2053707002714045, "grad_norm": 9.635798454284668, "learning_rate": 9.215114032714451e-06, "loss": 0.5689, "step": 17177 }, { "epoch": 0.20538265641626513, "grad_norm": 1.6929938793182373, "learning_rate": 9.215009887200458e-06, "loss": 0.6081, "step": 17178 }, { "epoch": 0.2053946125611258, "grad_norm": 2.4791622161865234, "learning_rate": 9.214905735366042e-06, "loss": 0.6414, "step": 17179 }, { "epoch": 0.20540656870598645, "grad_norm": 2.3812317848205566, "learning_rate": 9.214801577211359e-06, "loss": 0.5581, "step": 17180 }, { "epoch": 0.2054185248508471, "grad_norm": 1.7498154640197754, "learning_rate": 9.214697412736567e-06, "loss": 0.5573, "step": 17181 }, { "epoch": 0.20543048099570774, "grad_norm": 2.780071258544922, "learning_rate": 9.214593241941818e-06, "loss": 0.7753, "step": 17182 }, { "epoch": 0.2054424371405684, "grad_norm": 3.8657968044281006, "learning_rate": 9.214489064827275e-06, "loss": 0.5776, "step": 17183 }, { "epoch": 0.20545439328542905, "grad_norm": 1.457353115081787, "learning_rate": 9.21438488139309e-06, "loss": 0.5566, "step": 17184 }, { "epoch": 0.20546634943028969, "grad_norm": 1.9646707773208618, "learning_rate": 9.21428069163942e-06, "loss": 0.6236, "step": 17185 }, { "epoch": 0.20547830557515034, "grad_norm": 1.7556606531143188, "learning_rate": 9.214176495566421e-06, "loss": 0.6595, "step": 17186 }, { "epoch": 0.205490261720011, "grad_norm": 1.5427547693252563, "learning_rate": 9.214072293174248e-06, "loss": 0.5975, "step": 17187 }, { "epoch": 0.20550221786487166, "grad_norm": 1.8219034671783447, "learning_rate": 9.21396808446306e-06, "loss": 0.5834, "step": 17188 }, { "epoch": 0.2055141740097323, "grad_norm": 1.8882344961166382, "learning_rate": 9.21386386943301e-06, "loss": 0.5954, "step": 17189 }, { "epoch": 0.20552613015459295, "grad_norm": 2.154137134552002, "learning_rate": 9.21375964808426e-06, "loss": 0.6553, "step": 17190 }, { "epoch": 0.2055380862994536, "grad_norm": 2.326164484024048, "learning_rate": 9.21365542041696e-06, "loss": 0.6041, "step": 17191 }, { "epoch": 0.20555004244431427, "grad_norm": 4.024430274963379, "learning_rate": 9.21355118643127e-06, "loss": 0.5949, "step": 17192 }, { "epoch": 0.2055619985891749, "grad_norm": 2.130774974822998, "learning_rate": 9.213446946127342e-06, "loss": 0.5864, "step": 17193 }, { "epoch": 0.20557395473403556, "grad_norm": 2.4087252616882324, "learning_rate": 9.213342699505336e-06, "loss": 0.6603, "step": 17194 }, { "epoch": 0.20558591087889622, "grad_norm": 2.4920239448547363, "learning_rate": 9.21323844656541e-06, "loss": 0.6631, "step": 17195 }, { "epoch": 0.20559786702375685, "grad_norm": 2.2473464012145996, "learning_rate": 9.213134187307717e-06, "loss": 0.6865, "step": 17196 }, { "epoch": 0.2056098231686175, "grad_norm": 3.919229030609131, "learning_rate": 9.213029921732414e-06, "loss": 0.5937, "step": 17197 }, { "epoch": 0.20562177931347816, "grad_norm": 3.642604112625122, "learning_rate": 9.212925649839657e-06, "loss": 0.5746, "step": 17198 }, { "epoch": 0.20563373545833882, "grad_norm": 2.309722661972046, "learning_rate": 9.212821371629604e-06, "loss": 0.6525, "step": 17199 }, { "epoch": 0.20564569160319945, "grad_norm": 2.0851035118103027, "learning_rate": 9.21271708710241e-06, "loss": 0.6238, "step": 17200 }, { "epoch": 0.2056576477480601, "grad_norm": 2.0050158500671387, "learning_rate": 9.212612796258233e-06, "loss": 0.6433, "step": 17201 }, { "epoch": 0.20566960389292077, "grad_norm": 2.8520984649658203, "learning_rate": 9.212508499097226e-06, "loss": 0.5523, "step": 17202 }, { "epoch": 0.20568156003778143, "grad_norm": 1.389358401298523, "learning_rate": 9.212404195619548e-06, "loss": 0.5788, "step": 17203 }, { "epoch": 0.20569351618264206, "grad_norm": 2.9535179138183594, "learning_rate": 9.212299885825356e-06, "loss": 0.6613, "step": 17204 }, { "epoch": 0.20570547232750272, "grad_norm": 1.520040512084961, "learning_rate": 9.212195569714805e-06, "loss": 0.6496, "step": 17205 }, { "epoch": 0.20571742847236338, "grad_norm": 1.4462790489196777, "learning_rate": 9.212091247288052e-06, "loss": 0.5165, "step": 17206 }, { "epoch": 0.205729384617224, "grad_norm": 2.2951836585998535, "learning_rate": 9.211986918545253e-06, "loss": 0.6032, "step": 17207 }, { "epoch": 0.20574134076208467, "grad_norm": 2.0985705852508545, "learning_rate": 9.211882583486564e-06, "loss": 0.577, "step": 17208 }, { "epoch": 0.20575329690694533, "grad_norm": 1.8810410499572754, "learning_rate": 9.211778242112144e-06, "loss": 0.5109, "step": 17209 }, { "epoch": 0.20576525305180599, "grad_norm": 1.5937025547027588, "learning_rate": 9.211673894422145e-06, "loss": 0.556, "step": 17210 }, { "epoch": 0.20577720919666662, "grad_norm": 1.9939919710159302, "learning_rate": 9.211569540416728e-06, "loss": 0.6187, "step": 17211 }, { "epoch": 0.20578916534152727, "grad_norm": 2.8653390407562256, "learning_rate": 9.211465180096046e-06, "loss": 0.5614, "step": 17212 }, { "epoch": 0.20580112148638793, "grad_norm": 2.0004944801330566, "learning_rate": 9.211360813460258e-06, "loss": 0.6406, "step": 17213 }, { "epoch": 0.2058130776312486, "grad_norm": 1.9580402374267578, "learning_rate": 9.211256440509519e-06, "loss": 0.6031, "step": 17214 }, { "epoch": 0.20582503377610922, "grad_norm": 1.805675983428955, "learning_rate": 9.211152061243987e-06, "loss": 0.5261, "step": 17215 }, { "epoch": 0.20583698992096988, "grad_norm": 1.6326158046722412, "learning_rate": 9.211047675663815e-06, "loss": 0.5652, "step": 17216 }, { "epoch": 0.20584894606583054, "grad_norm": 2.890399932861328, "learning_rate": 9.210943283769164e-06, "loss": 0.6613, "step": 17217 }, { "epoch": 0.20586090221069117, "grad_norm": 1.943982720375061, "learning_rate": 9.210838885560189e-06, "loss": 0.6233, "step": 17218 }, { "epoch": 0.20587285835555183, "grad_norm": 2.2184784412384033, "learning_rate": 9.210734481037044e-06, "loss": 0.6715, "step": 17219 }, { "epoch": 0.2058848145004125, "grad_norm": 2.8578271865844727, "learning_rate": 9.21063007019989e-06, "loss": 0.5422, "step": 17220 }, { "epoch": 0.20589677064527315, "grad_norm": 2.9786698818206787, "learning_rate": 9.210525653048881e-06, "loss": 0.6546, "step": 17221 }, { "epoch": 0.20590872679013378, "grad_norm": 1.8625656366348267, "learning_rate": 9.210421229584172e-06, "loss": 0.6454, "step": 17222 }, { "epoch": 0.20592068293499444, "grad_norm": 2.382948875427246, "learning_rate": 9.210316799805923e-06, "loss": 0.6161, "step": 17223 }, { "epoch": 0.2059326390798551, "grad_norm": 1.5857212543487549, "learning_rate": 9.210212363714289e-06, "loss": 0.6253, "step": 17224 }, { "epoch": 0.20594459522471575, "grad_norm": 1.6464390754699707, "learning_rate": 9.210107921309427e-06, "loss": 0.6671, "step": 17225 }, { "epoch": 0.20595655136957638, "grad_norm": 2.7540054321289062, "learning_rate": 9.21000347259149e-06, "loss": 0.6926, "step": 17226 }, { "epoch": 0.20596850751443704, "grad_norm": 1.9214829206466675, "learning_rate": 9.209899017560642e-06, "loss": 0.5884, "step": 17227 }, { "epoch": 0.2059804636592977, "grad_norm": 2.7292184829711914, "learning_rate": 9.209794556217034e-06, "loss": 0.6482, "step": 17228 }, { "epoch": 0.20599241980415836, "grad_norm": 2.08009934425354, "learning_rate": 9.209690088560824e-06, "loss": 0.6606, "step": 17229 }, { "epoch": 0.206004375949019, "grad_norm": 2.860883951187134, "learning_rate": 9.209585614592166e-06, "loss": 0.6414, "step": 17230 }, { "epoch": 0.20601633209387965, "grad_norm": 4.2752861976623535, "learning_rate": 9.209481134311223e-06, "loss": 0.5634, "step": 17231 }, { "epoch": 0.2060282882387403, "grad_norm": 4.799306869506836, "learning_rate": 9.209376647718148e-06, "loss": 0.5969, "step": 17232 }, { "epoch": 0.20604024438360094, "grad_norm": 1.76731538772583, "learning_rate": 9.209272154813096e-06, "loss": 0.6608, "step": 17233 }, { "epoch": 0.2060522005284616, "grad_norm": 1.728252649307251, "learning_rate": 9.209167655596226e-06, "loss": 0.5638, "step": 17234 }, { "epoch": 0.20606415667332226, "grad_norm": 2.0772337913513184, "learning_rate": 9.209063150067694e-06, "loss": 0.6652, "step": 17235 }, { "epoch": 0.20607611281818292, "grad_norm": 2.5510799884796143, "learning_rate": 9.208958638227658e-06, "loss": 0.6716, "step": 17236 }, { "epoch": 0.20608806896304355, "grad_norm": 3.183351993560791, "learning_rate": 9.208854120076272e-06, "loss": 0.668, "step": 17237 }, { "epoch": 0.2061000251079042, "grad_norm": 2.305088520050049, "learning_rate": 9.208749595613695e-06, "loss": 0.653, "step": 17238 }, { "epoch": 0.20611198125276486, "grad_norm": 2.9637043476104736, "learning_rate": 9.208645064840082e-06, "loss": 0.5836, "step": 17239 }, { "epoch": 0.20612393739762552, "grad_norm": 3.8500325679779053, "learning_rate": 9.208540527755593e-06, "loss": 0.5707, "step": 17240 }, { "epoch": 0.20613589354248615, "grad_norm": 1.366530418395996, "learning_rate": 9.208435984360382e-06, "loss": 0.5479, "step": 17241 }, { "epoch": 0.2061478496873468, "grad_norm": 2.540576696395874, "learning_rate": 9.208331434654606e-06, "loss": 0.6432, "step": 17242 }, { "epoch": 0.20615980583220747, "grad_norm": 5.097006797790527, "learning_rate": 9.20822687863842e-06, "loss": 0.706, "step": 17243 }, { "epoch": 0.2061717619770681, "grad_norm": 1.8839999437332153, "learning_rate": 9.208122316311985e-06, "loss": 0.6645, "step": 17244 }, { "epoch": 0.20618371812192876, "grad_norm": 1.8463903665542603, "learning_rate": 9.208017747675454e-06, "loss": 0.6351, "step": 17245 }, { "epoch": 0.20619567426678942, "grad_norm": 3.9255497455596924, "learning_rate": 9.207913172728989e-06, "loss": 0.5934, "step": 17246 }, { "epoch": 0.20620763041165008, "grad_norm": 2.5987985134124756, "learning_rate": 9.20780859147274e-06, "loss": 0.5772, "step": 17247 }, { "epoch": 0.2062195865565107, "grad_norm": 1.9715547561645508, "learning_rate": 9.207704003906868e-06, "loss": 0.525, "step": 17248 }, { "epoch": 0.20623154270137137, "grad_norm": 2.6978235244750977, "learning_rate": 9.207599410031528e-06, "loss": 0.6413, "step": 17249 }, { "epoch": 0.20624349884623203, "grad_norm": 1.6212304830551147, "learning_rate": 9.20749480984688e-06, "loss": 0.6614, "step": 17250 }, { "epoch": 0.20625545499109268, "grad_norm": 3.295959234237671, "learning_rate": 9.207390203353075e-06, "loss": 0.5929, "step": 17251 }, { "epoch": 0.20626741113595332, "grad_norm": 2.8274729251861572, "learning_rate": 9.207285590550276e-06, "loss": 0.6755, "step": 17252 }, { "epoch": 0.20627936728081397, "grad_norm": 2.9960572719573975, "learning_rate": 9.207180971438635e-06, "loss": 0.7253, "step": 17253 }, { "epoch": 0.20629132342567463, "grad_norm": 1.8285452127456665, "learning_rate": 9.207076346018314e-06, "loss": 0.6245, "step": 17254 }, { "epoch": 0.20630327957053526, "grad_norm": 2.3283896446228027, "learning_rate": 9.206971714289467e-06, "loss": 0.5417, "step": 17255 }, { "epoch": 0.20631523571539592, "grad_norm": 6.826381206512451, "learning_rate": 9.20686707625225e-06, "loss": 0.7262, "step": 17256 }, { "epoch": 0.20632719186025658, "grad_norm": 4.355699062347412, "learning_rate": 9.20676243190682e-06, "loss": 0.7193, "step": 17257 }, { "epoch": 0.20633914800511724, "grad_norm": 2.367778778076172, "learning_rate": 9.206657781253336e-06, "loss": 0.6335, "step": 17258 }, { "epoch": 0.20635110414997787, "grad_norm": 3.0335819721221924, "learning_rate": 9.206553124291952e-06, "loss": 0.7089, "step": 17259 }, { "epoch": 0.20636306029483853, "grad_norm": 1.997272253036499, "learning_rate": 9.20644846102283e-06, "loss": 0.5899, "step": 17260 }, { "epoch": 0.2063750164396992, "grad_norm": 2.790966749191284, "learning_rate": 9.20634379144612e-06, "loss": 0.539, "step": 17261 }, { "epoch": 0.20638697258455985, "grad_norm": 1.8521630764007568, "learning_rate": 9.206239115561986e-06, "loss": 0.5881, "step": 17262 }, { "epoch": 0.20639892872942048, "grad_norm": 2.0434248447418213, "learning_rate": 9.206134433370578e-06, "loss": 0.6477, "step": 17263 }, { "epoch": 0.20641088487428114, "grad_norm": 12.734107971191406, "learning_rate": 9.206029744872058e-06, "loss": 0.6467, "step": 17264 }, { "epoch": 0.2064228410191418, "grad_norm": 5.4647626876831055, "learning_rate": 9.205925050066582e-06, "loss": 0.4895, "step": 17265 }, { "epoch": 0.20643479716400243, "grad_norm": 4.397409439086914, "learning_rate": 9.205820348954307e-06, "loss": 0.6246, "step": 17266 }, { "epoch": 0.20644675330886308, "grad_norm": 4.123495101928711, "learning_rate": 9.205715641535388e-06, "loss": 0.6433, "step": 17267 }, { "epoch": 0.20645870945372374, "grad_norm": 2.466846466064453, "learning_rate": 9.205610927809983e-06, "loss": 0.5014, "step": 17268 }, { "epoch": 0.2064706655985844, "grad_norm": 3.7627689838409424, "learning_rate": 9.20550620777825e-06, "loss": 0.5281, "step": 17269 }, { "epoch": 0.20648262174344503, "grad_norm": 2.4378793239593506, "learning_rate": 9.205401481440345e-06, "loss": 0.6246, "step": 17270 }, { "epoch": 0.2064945778883057, "grad_norm": 2.463078737258911, "learning_rate": 9.205296748796428e-06, "loss": 0.621, "step": 17271 }, { "epoch": 0.20650653403316635, "grad_norm": 3.3273849487304688, "learning_rate": 9.20519200984665e-06, "loss": 0.5286, "step": 17272 }, { "epoch": 0.206518490178027, "grad_norm": 1.8957409858703613, "learning_rate": 9.205087264591175e-06, "loss": 0.6683, "step": 17273 }, { "epoch": 0.20653044632288764, "grad_norm": 1.9150042533874512, "learning_rate": 9.204982513030155e-06, "loss": 0.6926, "step": 17274 }, { "epoch": 0.2065424024677483, "grad_norm": 1.5155178308486938, "learning_rate": 9.204877755163748e-06, "loss": 0.65, "step": 17275 }, { "epoch": 0.20655435861260896, "grad_norm": 2.102607250213623, "learning_rate": 9.204772990992114e-06, "loss": 0.6576, "step": 17276 }, { "epoch": 0.2065663147574696, "grad_norm": 3.1952297687530518, "learning_rate": 9.204668220515406e-06, "loss": 0.6532, "step": 17277 }, { "epoch": 0.20657827090233025, "grad_norm": 2.106046438217163, "learning_rate": 9.204563443733785e-06, "loss": 0.6155, "step": 17278 }, { "epoch": 0.2065902270471909, "grad_norm": 1.516650676727295, "learning_rate": 9.204458660647404e-06, "loss": 0.6067, "step": 17279 }, { "epoch": 0.20660218319205156, "grad_norm": 1.9669270515441895, "learning_rate": 9.204353871256424e-06, "loss": 0.7611, "step": 17280 }, { "epoch": 0.2066141393369122, "grad_norm": 1.4602261781692505, "learning_rate": 9.204249075560999e-06, "loss": 0.5629, "step": 17281 }, { "epoch": 0.20662609548177285, "grad_norm": 2.227409601211548, "learning_rate": 9.204144273561289e-06, "loss": 0.5595, "step": 17282 }, { "epoch": 0.2066380516266335, "grad_norm": 3.29445743560791, "learning_rate": 9.204039465257448e-06, "loss": 0.6237, "step": 17283 }, { "epoch": 0.20665000777149417, "grad_norm": 1.486392855644226, "learning_rate": 9.203934650649636e-06, "loss": 0.5838, "step": 17284 }, { "epoch": 0.2066619639163548, "grad_norm": 3.93442702293396, "learning_rate": 9.20382982973801e-06, "loss": 0.4906, "step": 17285 }, { "epoch": 0.20667392006121546, "grad_norm": 2.7070391178131104, "learning_rate": 9.203725002522725e-06, "loss": 0.6098, "step": 17286 }, { "epoch": 0.20668587620607612, "grad_norm": 4.843733310699463, "learning_rate": 9.20362016900394e-06, "loss": 0.6531, "step": 17287 }, { "epoch": 0.20669783235093678, "grad_norm": 1.776117205619812, "learning_rate": 9.203515329181811e-06, "loss": 0.6176, "step": 17288 }, { "epoch": 0.2067097884957974, "grad_norm": 1.5096572637557983, "learning_rate": 9.203410483056496e-06, "loss": 0.522, "step": 17289 }, { "epoch": 0.20672174464065807, "grad_norm": 2.729201078414917, "learning_rate": 9.203305630628153e-06, "loss": 0.7069, "step": 17290 }, { "epoch": 0.20673370078551873, "grad_norm": 2.6046297550201416, "learning_rate": 9.203200771896936e-06, "loss": 0.465, "step": 17291 }, { "epoch": 0.20674565693037936, "grad_norm": 2.4192447662353516, "learning_rate": 9.203095906863007e-06, "loss": 0.5322, "step": 17292 }, { "epoch": 0.20675761307524002, "grad_norm": 5.522736549377441, "learning_rate": 9.202991035526519e-06, "loss": 0.5835, "step": 17293 }, { "epoch": 0.20676956922010067, "grad_norm": 1.3811579942703247, "learning_rate": 9.202886157887632e-06, "loss": 0.5001, "step": 17294 }, { "epoch": 0.20678152536496133, "grad_norm": 2.392291307449341, "learning_rate": 9.2027812739465e-06, "loss": 0.6631, "step": 17295 }, { "epoch": 0.20679348150982196, "grad_norm": 3.0728299617767334, "learning_rate": 9.202676383703286e-06, "loss": 0.6261, "step": 17296 }, { "epoch": 0.20680543765468262, "grad_norm": 2.0035948753356934, "learning_rate": 9.202571487158141e-06, "loss": 0.6414, "step": 17297 }, { "epoch": 0.20681739379954328, "grad_norm": 3.17822003364563, "learning_rate": 9.202466584311227e-06, "loss": 0.6548, "step": 17298 }, { "epoch": 0.20682934994440394, "grad_norm": 1.8894195556640625, "learning_rate": 9.202361675162698e-06, "loss": 0.5691, "step": 17299 }, { "epoch": 0.20684130608926457, "grad_norm": 3.444563150405884, "learning_rate": 9.202256759712713e-06, "loss": 0.6878, "step": 17300 }, { "epoch": 0.20685326223412523, "grad_norm": 1.6647140979766846, "learning_rate": 9.20215183796143e-06, "loss": 0.5453, "step": 17301 }, { "epoch": 0.2068652183789859, "grad_norm": 2.3849329948425293, "learning_rate": 9.202046909909006e-06, "loss": 0.5516, "step": 17302 }, { "epoch": 0.20687717452384652, "grad_norm": 3.6445820331573486, "learning_rate": 9.201941975555595e-06, "loss": 0.6447, "step": 17303 }, { "epoch": 0.20688913066870718, "grad_norm": 1.9446022510528564, "learning_rate": 9.201837034901359e-06, "loss": 0.5149, "step": 17304 }, { "epoch": 0.20690108681356784, "grad_norm": 2.501386880874634, "learning_rate": 9.201732087946453e-06, "loss": 0.6791, "step": 17305 }, { "epoch": 0.2069130429584285, "grad_norm": 1.9626377820968628, "learning_rate": 9.201627134691036e-06, "loss": 0.6576, "step": 17306 }, { "epoch": 0.20692499910328913, "grad_norm": 2.4795284271240234, "learning_rate": 9.201522175135263e-06, "loss": 0.6341, "step": 17307 }, { "epoch": 0.20693695524814978, "grad_norm": 2.165346384048462, "learning_rate": 9.201417209279293e-06, "loss": 0.4966, "step": 17308 }, { "epoch": 0.20694891139301044, "grad_norm": 3.5390732288360596, "learning_rate": 9.201312237123283e-06, "loss": 0.6811, "step": 17309 }, { "epoch": 0.2069608675378711, "grad_norm": 2.8819093704223633, "learning_rate": 9.201207258667389e-06, "loss": 0.5974, "step": 17310 }, { "epoch": 0.20697282368273173, "grad_norm": 2.6689741611480713, "learning_rate": 9.20110227391177e-06, "loss": 0.6389, "step": 17311 }, { "epoch": 0.2069847798275924, "grad_norm": 1.8353055715560913, "learning_rate": 9.200997282856586e-06, "loss": 0.5847, "step": 17312 }, { "epoch": 0.20699673597245305, "grad_norm": 2.258030414581299, "learning_rate": 9.200892285501989e-06, "loss": 0.7339, "step": 17313 }, { "epoch": 0.20700869211731368, "grad_norm": 6.14788293838501, "learning_rate": 9.20078728184814e-06, "loss": 0.8829, "step": 17314 }, { "epoch": 0.20702064826217434, "grad_norm": 2.906266212463379, "learning_rate": 9.200682271895193e-06, "loss": 0.6274, "step": 17315 }, { "epoch": 0.207032604407035, "grad_norm": 2.7627696990966797, "learning_rate": 9.200577255643312e-06, "loss": 0.6513, "step": 17316 }, { "epoch": 0.20704456055189566, "grad_norm": 2.3481225967407227, "learning_rate": 9.200472233092648e-06, "loss": 0.5902, "step": 17317 }, { "epoch": 0.2070565166967563, "grad_norm": 1.92720627784729, "learning_rate": 9.200367204243363e-06, "loss": 0.6691, "step": 17318 }, { "epoch": 0.20706847284161695, "grad_norm": 1.5917472839355469, "learning_rate": 9.20026216909561e-06, "loss": 0.6206, "step": 17319 }, { "epoch": 0.2070804289864776, "grad_norm": 2.8008551597595215, "learning_rate": 9.200157127649552e-06, "loss": 0.6265, "step": 17320 }, { "epoch": 0.20709238513133826, "grad_norm": 1.7082178592681885, "learning_rate": 9.20005207990534e-06, "loss": 0.5769, "step": 17321 }, { "epoch": 0.2071043412761989, "grad_norm": 1.8434224128723145, "learning_rate": 9.199947025863136e-06, "loss": 0.6704, "step": 17322 }, { "epoch": 0.20711629742105955, "grad_norm": 1.7873023748397827, "learning_rate": 9.199841965523097e-06, "loss": 0.6045, "step": 17323 }, { "epoch": 0.2071282535659202, "grad_norm": 2.1629631519317627, "learning_rate": 9.199736898885382e-06, "loss": 0.5869, "step": 17324 }, { "epoch": 0.20714020971078084, "grad_norm": 1.9874807596206665, "learning_rate": 9.199631825950144e-06, "loss": 0.5636, "step": 17325 }, { "epoch": 0.2071521658556415, "grad_norm": 1.6321184635162354, "learning_rate": 9.199526746717543e-06, "loss": 0.5091, "step": 17326 }, { "epoch": 0.20716412200050216, "grad_norm": 6.8568806648254395, "learning_rate": 9.199421661187738e-06, "loss": 0.6854, "step": 17327 }, { "epoch": 0.20717607814536282, "grad_norm": 2.6479129791259766, "learning_rate": 9.199316569360886e-06, "loss": 0.5988, "step": 17328 }, { "epoch": 0.20718803429022345, "grad_norm": 1.9916037321090698, "learning_rate": 9.199211471237141e-06, "loss": 0.6681, "step": 17329 }, { "epoch": 0.2071999904350841, "grad_norm": 3.926276922225952, "learning_rate": 9.199106366816665e-06, "loss": 0.6916, "step": 17330 }, { "epoch": 0.20721194657994477, "grad_norm": 2.443140983581543, "learning_rate": 9.199001256099614e-06, "loss": 0.6157, "step": 17331 }, { "epoch": 0.20722390272480543, "grad_norm": 1.7924022674560547, "learning_rate": 9.198896139086146e-06, "loss": 0.5718, "step": 17332 }, { "epoch": 0.20723585886966606, "grad_norm": 1.610866665840149, "learning_rate": 9.19879101577642e-06, "loss": 0.6034, "step": 17333 }, { "epoch": 0.20724781501452672, "grad_norm": 1.8046716451644897, "learning_rate": 9.198685886170589e-06, "loss": 0.7214, "step": 17334 }, { "epoch": 0.20725977115938737, "grad_norm": 1.782112956047058, "learning_rate": 9.198580750268815e-06, "loss": 0.6886, "step": 17335 }, { "epoch": 0.207271727304248, "grad_norm": 1.97156822681427, "learning_rate": 9.198475608071253e-06, "loss": 0.6217, "step": 17336 }, { "epoch": 0.20728368344910866, "grad_norm": 10.409651756286621, "learning_rate": 9.198370459578064e-06, "loss": 0.6532, "step": 17337 }, { "epoch": 0.20729563959396932, "grad_norm": 4.054176330566406, "learning_rate": 9.198265304789403e-06, "loss": 0.6155, "step": 17338 }, { "epoch": 0.20730759573882998, "grad_norm": 1.792536735534668, "learning_rate": 9.198160143705426e-06, "loss": 0.6284, "step": 17339 }, { "epoch": 0.2073195518836906, "grad_norm": 2.1014766693115234, "learning_rate": 9.198054976326296e-06, "loss": 0.662, "step": 17340 }, { "epoch": 0.20733150802855127, "grad_norm": 2.0904927253723145, "learning_rate": 9.197949802652166e-06, "loss": 0.549, "step": 17341 }, { "epoch": 0.20734346417341193, "grad_norm": 1.7768510580062866, "learning_rate": 9.197844622683197e-06, "loss": 0.5543, "step": 17342 }, { "epoch": 0.2073554203182726, "grad_norm": 1.5433342456817627, "learning_rate": 9.197739436419543e-06, "loss": 0.6116, "step": 17343 }, { "epoch": 0.20736737646313322, "grad_norm": 3.8065052032470703, "learning_rate": 9.197634243861364e-06, "loss": 0.5724, "step": 17344 }, { "epoch": 0.20737933260799388, "grad_norm": 2.195765495300293, "learning_rate": 9.197529045008819e-06, "loss": 0.6329, "step": 17345 }, { "epoch": 0.20739128875285454, "grad_norm": 2.1566519737243652, "learning_rate": 9.197423839862064e-06, "loss": 0.5948, "step": 17346 }, { "epoch": 0.2074032448977152, "grad_norm": 2.3229293823242188, "learning_rate": 9.197318628421255e-06, "loss": 0.6994, "step": 17347 }, { "epoch": 0.20741520104257583, "grad_norm": 2.555298328399658, "learning_rate": 9.197213410686553e-06, "loss": 0.612, "step": 17348 }, { "epoch": 0.20742715718743648, "grad_norm": 1.7601149082183838, "learning_rate": 9.197108186658115e-06, "loss": 0.5729, "step": 17349 }, { "epoch": 0.20743911333229714, "grad_norm": 2.948481321334839, "learning_rate": 9.197002956336099e-06, "loss": 0.5039, "step": 17350 }, { "epoch": 0.20745106947715777, "grad_norm": 2.527710437774658, "learning_rate": 9.196897719720661e-06, "loss": 0.5385, "step": 17351 }, { "epoch": 0.20746302562201843, "grad_norm": 1.5350579023361206, "learning_rate": 9.19679247681196e-06, "loss": 0.6163, "step": 17352 }, { "epoch": 0.2074749817668791, "grad_norm": 3.403532028198242, "learning_rate": 9.196687227610154e-06, "loss": 0.6082, "step": 17353 }, { "epoch": 0.20748693791173975, "grad_norm": 2.4371728897094727, "learning_rate": 9.196581972115401e-06, "loss": 0.6061, "step": 17354 }, { "epoch": 0.20749889405660038, "grad_norm": 1.7572200298309326, "learning_rate": 9.196476710327857e-06, "loss": 0.5282, "step": 17355 }, { "epoch": 0.20751085020146104, "grad_norm": 2.771921396255493, "learning_rate": 9.196371442247682e-06, "loss": 0.6052, "step": 17356 }, { "epoch": 0.2075228063463217, "grad_norm": 2.2495453357696533, "learning_rate": 9.196266167875034e-06, "loss": 0.6466, "step": 17357 }, { "epoch": 0.20753476249118236, "grad_norm": 2.379391670227051, "learning_rate": 9.196160887210069e-06, "loss": 0.6678, "step": 17358 }, { "epoch": 0.207546718636043, "grad_norm": 1.7277700901031494, "learning_rate": 9.196055600252946e-06, "loss": 0.6402, "step": 17359 }, { "epoch": 0.20755867478090365, "grad_norm": 6.3797607421875, "learning_rate": 9.195950307003822e-06, "loss": 0.6015, "step": 17360 }, { "epoch": 0.2075706309257643, "grad_norm": 2.225490093231201, "learning_rate": 9.195845007462857e-06, "loss": 0.6566, "step": 17361 }, { "epoch": 0.20758258707062494, "grad_norm": 2.0504086017608643, "learning_rate": 9.195739701630208e-06, "loss": 0.5997, "step": 17362 }, { "epoch": 0.2075945432154856, "grad_norm": 2.665064811706543, "learning_rate": 9.19563438950603e-06, "loss": 0.5507, "step": 17363 }, { "epoch": 0.20760649936034625, "grad_norm": 1.9936507940292358, "learning_rate": 9.195529071090486e-06, "loss": 0.6506, "step": 17364 }, { "epoch": 0.2076184555052069, "grad_norm": 9.358414649963379, "learning_rate": 9.195423746383728e-06, "loss": 0.7102, "step": 17365 }, { "epoch": 0.20763041165006754, "grad_norm": 2.326690673828125, "learning_rate": 9.19531841538592e-06, "loss": 0.6533, "step": 17366 }, { "epoch": 0.2076423677949282, "grad_norm": 2.3741495609283447, "learning_rate": 9.195213078097217e-06, "loss": 0.6178, "step": 17367 }, { "epoch": 0.20765432393978886, "grad_norm": 3.567643165588379, "learning_rate": 9.195107734517777e-06, "loss": 0.7998, "step": 17368 }, { "epoch": 0.20766628008464952, "grad_norm": 3.7718589305877686, "learning_rate": 9.195002384647756e-06, "loss": 0.5578, "step": 17369 }, { "epoch": 0.20767823622951015, "grad_norm": 2.4716835021972656, "learning_rate": 9.194897028487315e-06, "loss": 0.6734, "step": 17370 }, { "epoch": 0.2076901923743708, "grad_norm": 2.1043636798858643, "learning_rate": 9.194791666036612e-06, "loss": 0.6783, "step": 17371 }, { "epoch": 0.20770214851923147, "grad_norm": 4.235916614532471, "learning_rate": 9.194686297295803e-06, "loss": 0.5762, "step": 17372 }, { "epoch": 0.2077141046640921, "grad_norm": 1.8036298751831055, "learning_rate": 9.194580922265049e-06, "loss": 0.5976, "step": 17373 }, { "epoch": 0.20772606080895276, "grad_norm": 2.31373929977417, "learning_rate": 9.194475540944503e-06, "loss": 0.6195, "step": 17374 }, { "epoch": 0.20773801695381341, "grad_norm": 2.394787549972534, "learning_rate": 9.194370153334326e-06, "loss": 0.6213, "step": 17375 }, { "epoch": 0.20774997309867407, "grad_norm": 2.59728741645813, "learning_rate": 9.19426475943468e-06, "loss": 0.6264, "step": 17376 }, { "epoch": 0.2077619292435347, "grad_norm": 2.204610586166382, "learning_rate": 9.194159359245716e-06, "loss": 0.6498, "step": 17377 }, { "epoch": 0.20777388538839536, "grad_norm": 1.61775803565979, "learning_rate": 9.194053952767596e-06, "loss": 0.6101, "step": 17378 }, { "epoch": 0.20778584153325602, "grad_norm": 2.4989676475524902, "learning_rate": 9.193948540000477e-06, "loss": 0.6462, "step": 17379 }, { "epoch": 0.20779779767811668, "grad_norm": 1.6368653774261475, "learning_rate": 9.193843120944515e-06, "loss": 0.6992, "step": 17380 }, { "epoch": 0.2078097538229773, "grad_norm": 2.045318841934204, "learning_rate": 9.193737695599874e-06, "loss": 0.6252, "step": 17381 }, { "epoch": 0.20782170996783797, "grad_norm": 1.9917887449264526, "learning_rate": 9.193632263966705e-06, "loss": 0.7305, "step": 17382 }, { "epoch": 0.20783366611269863, "grad_norm": 4.658547401428223, "learning_rate": 9.193526826045172e-06, "loss": 0.7552, "step": 17383 }, { "epoch": 0.20784562225755926, "grad_norm": 1.5357452630996704, "learning_rate": 9.19342138183543e-06, "loss": 0.5584, "step": 17384 }, { "epoch": 0.20785757840241992, "grad_norm": 2.374497652053833, "learning_rate": 9.193315931337638e-06, "loss": 0.5666, "step": 17385 }, { "epoch": 0.20786953454728058, "grad_norm": 2.6236889362335205, "learning_rate": 9.193210474551953e-06, "loss": 0.6032, "step": 17386 }, { "epoch": 0.20788149069214124, "grad_norm": 1.6032369136810303, "learning_rate": 9.193105011478535e-06, "loss": 0.5734, "step": 17387 }, { "epoch": 0.20789344683700187, "grad_norm": 1.5667359828948975, "learning_rate": 9.19299954211754e-06, "loss": 0.6757, "step": 17388 }, { "epoch": 0.20790540298186252, "grad_norm": 2.701626777648926, "learning_rate": 9.192894066469128e-06, "loss": 0.6894, "step": 17389 }, { "epoch": 0.20791735912672318, "grad_norm": 2.107191562652588, "learning_rate": 9.192788584533456e-06, "loss": 0.6917, "step": 17390 }, { "epoch": 0.20792931527158384, "grad_norm": 2.279717206954956, "learning_rate": 9.192683096310685e-06, "loss": 0.6407, "step": 17391 }, { "epoch": 0.20794127141644447, "grad_norm": 1.5213359594345093, "learning_rate": 9.192577601800967e-06, "loss": 0.4541, "step": 17392 }, { "epoch": 0.20795322756130513, "grad_norm": 9.807584762573242, "learning_rate": 9.192472101004466e-06, "loss": 0.6582, "step": 17393 }, { "epoch": 0.2079651837061658, "grad_norm": 5.443457126617432, "learning_rate": 9.192366593921338e-06, "loss": 0.5997, "step": 17394 }, { "epoch": 0.20797713985102642, "grad_norm": 2.6445865631103516, "learning_rate": 9.19226108055174e-06, "loss": 0.6894, "step": 17395 }, { "epoch": 0.20798909599588708, "grad_norm": 2.234421491622925, "learning_rate": 9.192155560895834e-06, "loss": 0.6136, "step": 17396 }, { "epoch": 0.20800105214074774, "grad_norm": 1.9703090190887451, "learning_rate": 9.192050034953775e-06, "loss": 0.5904, "step": 17397 }, { "epoch": 0.2080130082856084, "grad_norm": 2.1921017169952393, "learning_rate": 9.191944502725722e-06, "loss": 0.682, "step": 17398 }, { "epoch": 0.20802496443046903, "grad_norm": 6.246174335479736, "learning_rate": 9.191838964211834e-06, "loss": 0.5457, "step": 17399 }, { "epoch": 0.2080369205753297, "grad_norm": 1.9983141422271729, "learning_rate": 9.191733419412268e-06, "loss": 0.6255, "step": 17400 }, { "epoch": 0.20804887672019035, "grad_norm": 1.6281670331954956, "learning_rate": 9.191627868327182e-06, "loss": 0.5978, "step": 17401 }, { "epoch": 0.208060832865051, "grad_norm": 1.9573637247085571, "learning_rate": 9.191522310956735e-06, "loss": 0.6773, "step": 17402 }, { "epoch": 0.20807278900991164, "grad_norm": 1.8327158689498901, "learning_rate": 9.191416747301086e-06, "loss": 0.624, "step": 17403 }, { "epoch": 0.2080847451547723, "grad_norm": 1.7677057981491089, "learning_rate": 9.191311177360393e-06, "loss": 0.5686, "step": 17404 }, { "epoch": 0.20809670129963295, "grad_norm": 4.514114856719971, "learning_rate": 9.191205601134813e-06, "loss": 0.5108, "step": 17405 }, { "epoch": 0.2081086574444936, "grad_norm": 2.0616843700408936, "learning_rate": 9.191100018624507e-06, "loss": 0.6514, "step": 17406 }, { "epoch": 0.20812061358935424, "grad_norm": 1.4954994916915894, "learning_rate": 9.190994429829631e-06, "loss": 0.6384, "step": 17407 }, { "epoch": 0.2081325697342149, "grad_norm": 1.5684856176376343, "learning_rate": 9.190888834750344e-06, "loss": 0.5626, "step": 17408 }, { "epoch": 0.20814452587907556, "grad_norm": 3.8966822624206543, "learning_rate": 9.190783233386803e-06, "loss": 0.5296, "step": 17409 }, { "epoch": 0.2081564820239362, "grad_norm": 17.028858184814453, "learning_rate": 9.190677625739168e-06, "loss": 0.6562, "step": 17410 }, { "epoch": 0.20816843816879685, "grad_norm": 2.7925548553466797, "learning_rate": 9.190572011807598e-06, "loss": 0.6636, "step": 17411 }, { "epoch": 0.2081803943136575, "grad_norm": 2.8579750061035156, "learning_rate": 9.19046639159225e-06, "loss": 0.574, "step": 17412 }, { "epoch": 0.20819235045851817, "grad_norm": 2.1546831130981445, "learning_rate": 9.190360765093282e-06, "loss": 0.6898, "step": 17413 }, { "epoch": 0.2082043066033788, "grad_norm": 1.6770431995391846, "learning_rate": 9.190255132310855e-06, "loss": 0.5899, "step": 17414 }, { "epoch": 0.20821626274823946, "grad_norm": 2.2408578395843506, "learning_rate": 9.190149493245125e-06, "loss": 0.6115, "step": 17415 }, { "epoch": 0.20822821889310011, "grad_norm": 1.8719048500061035, "learning_rate": 9.19004384789625e-06, "loss": 0.6132, "step": 17416 }, { "epoch": 0.20824017503796077, "grad_norm": 1.5203192234039307, "learning_rate": 9.18993819626439e-06, "loss": 0.4782, "step": 17417 }, { "epoch": 0.2082521311828214, "grad_norm": 7.317392826080322, "learning_rate": 9.189832538349702e-06, "loss": 0.6556, "step": 17418 }, { "epoch": 0.20826408732768206, "grad_norm": 1.7692710161209106, "learning_rate": 9.189726874152344e-06, "loss": 0.6921, "step": 17419 }, { "epoch": 0.20827604347254272, "grad_norm": 1.5962581634521484, "learning_rate": 9.189621203672479e-06, "loss": 0.6318, "step": 17420 }, { "epoch": 0.20828799961740335, "grad_norm": 2.3389227390289307, "learning_rate": 9.18951552691026e-06, "loss": 0.6445, "step": 17421 }, { "epoch": 0.208299955762264, "grad_norm": 1.5091532468795776, "learning_rate": 9.189409843865849e-06, "loss": 0.5312, "step": 17422 }, { "epoch": 0.20831191190712467, "grad_norm": 3.459264039993286, "learning_rate": 9.1893041545394e-06, "loss": 0.5465, "step": 17423 }, { "epoch": 0.20832386805198533, "grad_norm": 2.6880602836608887, "learning_rate": 9.189198458931078e-06, "loss": 0.5844, "step": 17424 }, { "epoch": 0.20833582419684596, "grad_norm": 4.0399699211120605, "learning_rate": 9.189092757041036e-06, "loss": 0.6722, "step": 17425 }, { "epoch": 0.20834778034170662, "grad_norm": 5.075092792510986, "learning_rate": 9.188987048869436e-06, "loss": 0.6023, "step": 17426 }, { "epoch": 0.20835973648656728, "grad_norm": 1.929427146911621, "learning_rate": 9.188881334416433e-06, "loss": 0.5543, "step": 17427 }, { "epoch": 0.20837169263142793, "grad_norm": 3.226586103439331, "learning_rate": 9.18877561368219e-06, "loss": 0.6068, "step": 17428 }, { "epoch": 0.20838364877628857, "grad_norm": 2.8983445167541504, "learning_rate": 9.188669886666861e-06, "loss": 0.687, "step": 17429 }, { "epoch": 0.20839560492114922, "grad_norm": 5.933431148529053, "learning_rate": 9.188564153370608e-06, "loss": 0.6874, "step": 17430 }, { "epoch": 0.20840756106600988, "grad_norm": 5.698715686798096, "learning_rate": 9.188458413793588e-06, "loss": 0.6361, "step": 17431 }, { "epoch": 0.20841951721087051, "grad_norm": 2.6289849281311035, "learning_rate": 9.188352667935957e-06, "loss": 0.6185, "step": 17432 }, { "epoch": 0.20843147335573117, "grad_norm": 3.830402135848999, "learning_rate": 9.188246915797881e-06, "loss": 0.5883, "step": 17433 }, { "epoch": 0.20844342950059183, "grad_norm": 2.2343006134033203, "learning_rate": 9.188141157379511e-06, "loss": 0.5722, "step": 17434 }, { "epoch": 0.2084553856454525, "grad_norm": 2.1401724815368652, "learning_rate": 9.18803539268101e-06, "loss": 0.6783, "step": 17435 }, { "epoch": 0.20846734179031312, "grad_norm": 13.54057788848877, "learning_rate": 9.187929621702532e-06, "loss": 0.5682, "step": 17436 }, { "epoch": 0.20847929793517378, "grad_norm": 3.98816180229187, "learning_rate": 9.187823844444242e-06, "loss": 0.6507, "step": 17437 }, { "epoch": 0.20849125408003444, "grad_norm": 3.985262393951416, "learning_rate": 9.187718060906291e-06, "loss": 0.6858, "step": 17438 }, { "epoch": 0.2085032102248951, "grad_norm": 2.2653470039367676, "learning_rate": 9.187612271088845e-06, "loss": 0.6472, "step": 17439 }, { "epoch": 0.20851516636975573, "grad_norm": 2.2818405628204346, "learning_rate": 9.187506474992061e-06, "loss": 0.6422, "step": 17440 }, { "epoch": 0.2085271225146164, "grad_norm": 2.0274558067321777, "learning_rate": 9.187400672616093e-06, "loss": 0.7399, "step": 17441 }, { "epoch": 0.20853907865947705, "grad_norm": 4.876293659210205, "learning_rate": 9.187294863961105e-06, "loss": 0.6442, "step": 17442 }, { "epoch": 0.20855103480433768, "grad_norm": 1.6720421314239502, "learning_rate": 9.187189049027254e-06, "loss": 0.6684, "step": 17443 }, { "epoch": 0.20856299094919833, "grad_norm": 2.6517553329467773, "learning_rate": 9.187083227814696e-06, "loss": 0.6491, "step": 17444 }, { "epoch": 0.208574947094059, "grad_norm": 2.907944679260254, "learning_rate": 9.186977400323594e-06, "loss": 0.6126, "step": 17445 }, { "epoch": 0.20858690323891965, "grad_norm": 1.6605348587036133, "learning_rate": 9.186871566554101e-06, "loss": 0.6245, "step": 17446 }, { "epoch": 0.20859885938378028, "grad_norm": 2.213117837905884, "learning_rate": 9.186765726506382e-06, "loss": 0.7148, "step": 17447 }, { "epoch": 0.20861081552864094, "grad_norm": 3.0357108116149902, "learning_rate": 9.186659880180593e-06, "loss": 0.7186, "step": 17448 }, { "epoch": 0.2086227716735016, "grad_norm": 1.3423142433166504, "learning_rate": 9.186554027576891e-06, "loss": 0.5776, "step": 17449 }, { "epoch": 0.20863472781836226, "grad_norm": 1.609559178352356, "learning_rate": 9.186448168695438e-06, "loss": 0.503, "step": 17450 }, { "epoch": 0.2086466839632229, "grad_norm": 2.258162498474121, "learning_rate": 9.186342303536392e-06, "loss": 0.6181, "step": 17451 }, { "epoch": 0.20865864010808355, "grad_norm": 2.0835797786712646, "learning_rate": 9.186236432099906e-06, "loss": 0.5104, "step": 17452 }, { "epoch": 0.2086705962529442, "grad_norm": 1.7958887815475464, "learning_rate": 9.186130554386148e-06, "loss": 0.5657, "step": 17453 }, { "epoch": 0.20868255239780484, "grad_norm": 1.4060863256454468, "learning_rate": 9.18602467039527e-06, "loss": 0.6594, "step": 17454 }, { "epoch": 0.2086945085426655, "grad_norm": 1.7911784648895264, "learning_rate": 9.185918780127435e-06, "loss": 0.5513, "step": 17455 }, { "epoch": 0.20870646468752616, "grad_norm": 18.22144889831543, "learning_rate": 9.185812883582799e-06, "loss": 0.5706, "step": 17456 }, { "epoch": 0.20871842083238681, "grad_norm": 1.5465632677078247, "learning_rate": 9.185706980761522e-06, "loss": 0.7043, "step": 17457 }, { "epoch": 0.20873037697724744, "grad_norm": 1.8300387859344482, "learning_rate": 9.185601071663763e-06, "loss": 0.6343, "step": 17458 }, { "epoch": 0.2087423331221081, "grad_norm": 2.263923168182373, "learning_rate": 9.18549515628968e-06, "loss": 0.6043, "step": 17459 }, { "epoch": 0.20875428926696876, "grad_norm": 1.836475133895874, "learning_rate": 9.185389234639431e-06, "loss": 0.6495, "step": 17460 }, { "epoch": 0.20876624541182942, "grad_norm": 3.8533313274383545, "learning_rate": 9.185283306713177e-06, "loss": 0.5318, "step": 17461 }, { "epoch": 0.20877820155669005, "grad_norm": 2.306692361831665, "learning_rate": 9.185177372511075e-06, "loss": 0.6354, "step": 17462 }, { "epoch": 0.2087901577015507, "grad_norm": 2.7480905055999756, "learning_rate": 9.185071432033285e-06, "loss": 0.6155, "step": 17463 }, { "epoch": 0.20880211384641137, "grad_norm": 1.570458173751831, "learning_rate": 9.184965485279967e-06, "loss": 0.6308, "step": 17464 }, { "epoch": 0.20881406999127203, "grad_norm": 1.5870226621627808, "learning_rate": 9.184859532251276e-06, "loss": 0.6816, "step": 17465 }, { "epoch": 0.20882602613613266, "grad_norm": 3.2486562728881836, "learning_rate": 9.184753572947375e-06, "loss": 0.6126, "step": 17466 }, { "epoch": 0.20883798228099332, "grad_norm": 2.112445116043091, "learning_rate": 9.18464760736842e-06, "loss": 0.6022, "step": 17467 }, { "epoch": 0.20884993842585398, "grad_norm": 2.16697096824646, "learning_rate": 9.184541635514572e-06, "loss": 0.5439, "step": 17468 }, { "epoch": 0.2088618945707146, "grad_norm": 3.6232733726501465, "learning_rate": 9.184435657385988e-06, "loss": 0.5621, "step": 17469 }, { "epoch": 0.20887385071557527, "grad_norm": 2.036032199859619, "learning_rate": 9.184329672982829e-06, "loss": 0.6063, "step": 17470 }, { "epoch": 0.20888580686043592, "grad_norm": 1.202700138092041, "learning_rate": 9.184223682305251e-06, "loss": 0.4084, "step": 17471 }, { "epoch": 0.20889776300529658, "grad_norm": 2.206186532974243, "learning_rate": 9.184117685353416e-06, "loss": 0.6336, "step": 17472 }, { "epoch": 0.2089097191501572, "grad_norm": 1.5683103799819946, "learning_rate": 9.184011682127483e-06, "loss": 0.5247, "step": 17473 }, { "epoch": 0.20892167529501787, "grad_norm": 3.5340046882629395, "learning_rate": 9.183905672627607e-06, "loss": 0.7595, "step": 17474 }, { "epoch": 0.20893363143987853, "grad_norm": 1.7009706497192383, "learning_rate": 9.183799656853951e-06, "loss": 0.5616, "step": 17475 }, { "epoch": 0.2089455875847392, "grad_norm": 1.642525553703308, "learning_rate": 9.183693634806671e-06, "loss": 0.6252, "step": 17476 }, { "epoch": 0.20895754372959982, "grad_norm": 1.9679938554763794, "learning_rate": 9.18358760648593e-06, "loss": 0.6186, "step": 17477 }, { "epoch": 0.20896949987446048, "grad_norm": 2.0025675296783447, "learning_rate": 9.183481571891883e-06, "loss": 0.5462, "step": 17478 }, { "epoch": 0.20898145601932114, "grad_norm": 1.945386290550232, "learning_rate": 9.183375531024691e-06, "loss": 0.673, "step": 17479 }, { "epoch": 0.20899341216418177, "grad_norm": 1.9816137552261353, "learning_rate": 9.183269483884511e-06, "loss": 0.6461, "step": 17480 }, { "epoch": 0.20900536830904243, "grad_norm": 2.5531809329986572, "learning_rate": 9.183163430471506e-06, "loss": 0.5324, "step": 17481 }, { "epoch": 0.2090173244539031, "grad_norm": 2.720867156982422, "learning_rate": 9.183057370785831e-06, "loss": 0.7094, "step": 17482 }, { "epoch": 0.20902928059876374, "grad_norm": 2.4691927433013916, "learning_rate": 9.182951304827648e-06, "loss": 0.6449, "step": 17483 }, { "epoch": 0.20904123674362438, "grad_norm": 2.7144172191619873, "learning_rate": 9.182845232597113e-06, "loss": 0.7038, "step": 17484 }, { "epoch": 0.20905319288848503, "grad_norm": 2.3539676666259766, "learning_rate": 9.182739154094386e-06, "loss": 0.6809, "step": 17485 }, { "epoch": 0.2090651490333457, "grad_norm": 2.1175825595855713, "learning_rate": 9.182633069319629e-06, "loss": 0.6619, "step": 17486 }, { "epoch": 0.20907710517820635, "grad_norm": 1.5843088626861572, "learning_rate": 9.182526978272998e-06, "loss": 0.5259, "step": 17487 }, { "epoch": 0.20908906132306698, "grad_norm": 3.0673251152038574, "learning_rate": 9.182420880954653e-06, "loss": 0.5337, "step": 17488 }, { "epoch": 0.20910101746792764, "grad_norm": 2.60762882232666, "learning_rate": 9.182314777364753e-06, "loss": 0.6432, "step": 17489 }, { "epoch": 0.2091129736127883, "grad_norm": 4.412845134735107, "learning_rate": 9.182208667503457e-06, "loss": 0.6284, "step": 17490 }, { "epoch": 0.20912492975764893, "grad_norm": 2.7562153339385986, "learning_rate": 9.182102551370924e-06, "loss": 0.6754, "step": 17491 }, { "epoch": 0.2091368859025096, "grad_norm": 3.065126419067383, "learning_rate": 9.181996428967316e-06, "loss": 0.5934, "step": 17492 }, { "epoch": 0.20914884204737025, "grad_norm": 1.9445040225982666, "learning_rate": 9.181890300292786e-06, "loss": 0.5665, "step": 17493 }, { "epoch": 0.2091607981922309, "grad_norm": 5.768403053283691, "learning_rate": 9.181784165347498e-06, "loss": 0.5947, "step": 17494 }, { "epoch": 0.20917275433709154, "grad_norm": 2.1857075691223145, "learning_rate": 9.181678024131608e-06, "loss": 0.614, "step": 17495 }, { "epoch": 0.2091847104819522, "grad_norm": 2.271925210952759, "learning_rate": 9.18157187664528e-06, "loss": 0.6623, "step": 17496 }, { "epoch": 0.20919666662681285, "grad_norm": 1.917616367340088, "learning_rate": 9.181465722888667e-06, "loss": 0.6266, "step": 17497 }, { "epoch": 0.2092086227716735, "grad_norm": 2.246166467666626, "learning_rate": 9.181359562861933e-06, "loss": 0.6442, "step": 17498 }, { "epoch": 0.20922057891653414, "grad_norm": 1.4898632764816284, "learning_rate": 9.181253396565237e-06, "loss": 0.5343, "step": 17499 }, { "epoch": 0.2092325350613948, "grad_norm": 1.8004125356674194, "learning_rate": 9.181147223998735e-06, "loss": 0.6195, "step": 17500 }, { "epoch": 0.20924449120625546, "grad_norm": 2.4219682216644287, "learning_rate": 9.181041045162587e-06, "loss": 0.6194, "step": 17501 }, { "epoch": 0.2092564473511161, "grad_norm": 2.9493296146392822, "learning_rate": 9.180934860056953e-06, "loss": 0.5625, "step": 17502 }, { "epoch": 0.20926840349597675, "grad_norm": 1.511892318725586, "learning_rate": 9.180828668681993e-06, "loss": 0.6315, "step": 17503 }, { "epoch": 0.2092803596408374, "grad_norm": 1.3018202781677246, "learning_rate": 9.180722471037867e-06, "loss": 0.5753, "step": 17504 }, { "epoch": 0.20929231578569807, "grad_norm": 3.3158161640167236, "learning_rate": 9.180616267124732e-06, "loss": 0.7012, "step": 17505 }, { "epoch": 0.2093042719305587, "grad_norm": 2.10697603225708, "learning_rate": 9.180510056942747e-06, "loss": 0.5556, "step": 17506 }, { "epoch": 0.20931622807541936, "grad_norm": 2.2519450187683105, "learning_rate": 9.180403840492073e-06, "loss": 0.5447, "step": 17507 }, { "epoch": 0.20932818422028002, "grad_norm": 2.0650758743286133, "learning_rate": 9.18029761777287e-06, "loss": 0.5846, "step": 17508 }, { "epoch": 0.20934014036514068, "grad_norm": 2.2145168781280518, "learning_rate": 9.180191388785293e-06, "loss": 0.5786, "step": 17509 }, { "epoch": 0.2093520965100013, "grad_norm": 1.8706018924713135, "learning_rate": 9.180085153529505e-06, "loss": 0.6597, "step": 17510 }, { "epoch": 0.20936405265486197, "grad_norm": 3.1130855083465576, "learning_rate": 9.179978912005667e-06, "loss": 0.769, "step": 17511 }, { "epoch": 0.20937600879972262, "grad_norm": 6.398810863494873, "learning_rate": 9.179872664213933e-06, "loss": 0.594, "step": 17512 }, { "epoch": 0.20938796494458325, "grad_norm": 1.8470503091812134, "learning_rate": 9.179766410154466e-06, "loss": 0.6961, "step": 17513 }, { "epoch": 0.2093999210894439, "grad_norm": 3.225684881210327, "learning_rate": 9.179660149827425e-06, "loss": 0.6278, "step": 17514 }, { "epoch": 0.20941187723430457, "grad_norm": 2.055795907974243, "learning_rate": 9.179553883232968e-06, "loss": 0.6606, "step": 17515 }, { "epoch": 0.20942383337916523, "grad_norm": 5.5492024421691895, "learning_rate": 9.179447610371255e-06, "loss": 0.6722, "step": 17516 }, { "epoch": 0.20943578952402586, "grad_norm": 2.090892791748047, "learning_rate": 9.179341331242445e-06, "loss": 0.6673, "step": 17517 }, { "epoch": 0.20944774566888652, "grad_norm": 5.463998317718506, "learning_rate": 9.179235045846698e-06, "loss": 0.6129, "step": 17518 }, { "epoch": 0.20945970181374718, "grad_norm": 2.2047548294067383, "learning_rate": 9.179128754184175e-06, "loss": 0.6996, "step": 17519 }, { "epoch": 0.20947165795860784, "grad_norm": 2.306286334991455, "learning_rate": 9.17902245625503e-06, "loss": 0.6976, "step": 17520 }, { "epoch": 0.20948361410346847, "grad_norm": 2.3742716312408447, "learning_rate": 9.178916152059429e-06, "loss": 0.5695, "step": 17521 }, { "epoch": 0.20949557024832913, "grad_norm": 3.2266392707824707, "learning_rate": 9.178809841597527e-06, "loss": 0.561, "step": 17522 }, { "epoch": 0.20950752639318979, "grad_norm": 2.529362440109253, "learning_rate": 9.178703524869486e-06, "loss": 0.6169, "step": 17523 }, { "epoch": 0.20951948253805044, "grad_norm": 3.7072219848632812, "learning_rate": 9.178597201875464e-06, "loss": 0.6531, "step": 17524 }, { "epoch": 0.20953143868291108, "grad_norm": 1.7865626811981201, "learning_rate": 9.17849087261562e-06, "loss": 0.6912, "step": 17525 }, { "epoch": 0.20954339482777173, "grad_norm": 4.469587326049805, "learning_rate": 9.178384537090115e-06, "loss": 0.6176, "step": 17526 }, { "epoch": 0.2095553509726324, "grad_norm": 2.138331174850464, "learning_rate": 9.178278195299106e-06, "loss": 0.6702, "step": 17527 }, { "epoch": 0.20956730711749302, "grad_norm": 2.1005496978759766, "learning_rate": 9.178171847242754e-06, "loss": 0.6858, "step": 17528 }, { "epoch": 0.20957926326235368, "grad_norm": 3.0642611980438232, "learning_rate": 9.17806549292122e-06, "loss": 0.5284, "step": 17529 }, { "epoch": 0.20959121940721434, "grad_norm": 2.187605381011963, "learning_rate": 9.177959132334661e-06, "loss": 0.7027, "step": 17530 }, { "epoch": 0.209603175552075, "grad_norm": 1.729549527168274, "learning_rate": 9.177852765483239e-06, "loss": 0.5247, "step": 17531 }, { "epoch": 0.20961513169693563, "grad_norm": 2.600855827331543, "learning_rate": 9.17774639236711e-06, "loss": 0.6001, "step": 17532 }, { "epoch": 0.2096270878417963, "grad_norm": 2.838557004928589, "learning_rate": 9.177640012986436e-06, "loss": 0.53, "step": 17533 }, { "epoch": 0.20963904398665695, "grad_norm": 1.9548389911651611, "learning_rate": 9.177533627341375e-06, "loss": 0.5936, "step": 17534 }, { "epoch": 0.2096510001315176, "grad_norm": 2.239305257797241, "learning_rate": 9.17742723543209e-06, "loss": 0.6295, "step": 17535 }, { "epoch": 0.20966295627637824, "grad_norm": 2.0079386234283447, "learning_rate": 9.177320837258737e-06, "loss": 0.6134, "step": 17536 }, { "epoch": 0.2096749124212389, "grad_norm": 2.0602550506591797, "learning_rate": 9.177214432821475e-06, "loss": 0.6591, "step": 17537 }, { "epoch": 0.20968686856609955, "grad_norm": 1.4638183116912842, "learning_rate": 9.177108022120466e-06, "loss": 0.5625, "step": 17538 }, { "epoch": 0.20969882471096019, "grad_norm": 1.718064546585083, "learning_rate": 9.177001605155869e-06, "loss": 0.6182, "step": 17539 }, { "epoch": 0.20971078085582084, "grad_norm": 1.7512391805648804, "learning_rate": 9.176895181927844e-06, "loss": 0.6425, "step": 17540 }, { "epoch": 0.2097227370006815, "grad_norm": 2.1805989742279053, "learning_rate": 9.176788752436547e-06, "loss": 0.6775, "step": 17541 }, { "epoch": 0.20973469314554216, "grad_norm": 2.7711613178253174, "learning_rate": 9.176682316682143e-06, "loss": 0.5813, "step": 17542 }, { "epoch": 0.2097466492904028, "grad_norm": 7.300404071807861, "learning_rate": 9.176575874664788e-06, "loss": 0.6843, "step": 17543 }, { "epoch": 0.20975860543526345, "grad_norm": 1.987551212310791, "learning_rate": 9.176469426384642e-06, "loss": 0.6367, "step": 17544 }, { "epoch": 0.2097705615801241, "grad_norm": 1.9171069860458374, "learning_rate": 9.176362971841868e-06, "loss": 0.6533, "step": 17545 }, { "epoch": 0.20978251772498477, "grad_norm": 2.7274129390716553, "learning_rate": 9.17625651103662e-06, "loss": 0.6839, "step": 17546 }, { "epoch": 0.2097944738698454, "grad_norm": 1.5078771114349365, "learning_rate": 9.176150043969062e-06, "loss": 0.6327, "step": 17547 }, { "epoch": 0.20980643001470606, "grad_norm": 2.053300619125366, "learning_rate": 9.176043570639352e-06, "loss": 0.5763, "step": 17548 }, { "epoch": 0.20981838615956672, "grad_norm": 1.9591357707977295, "learning_rate": 9.175937091047649e-06, "loss": 0.4974, "step": 17549 }, { "epoch": 0.20983034230442735, "grad_norm": 5.397017478942871, "learning_rate": 9.175830605194114e-06, "loss": 0.6081, "step": 17550 }, { "epoch": 0.209842298449288, "grad_norm": 2.4722094535827637, "learning_rate": 9.175724113078907e-06, "loss": 0.6422, "step": 17551 }, { "epoch": 0.20985425459414866, "grad_norm": 1.5370008945465088, "learning_rate": 9.175617614702187e-06, "loss": 0.6512, "step": 17552 }, { "epoch": 0.20986621073900932, "grad_norm": 7.722043514251709, "learning_rate": 9.17551111006411e-06, "loss": 0.6829, "step": 17553 }, { "epoch": 0.20987816688386995, "grad_norm": 1.4620798826217651, "learning_rate": 9.175404599164843e-06, "loss": 0.5572, "step": 17554 }, { "epoch": 0.2098901230287306, "grad_norm": 1.6508344411849976, "learning_rate": 9.17529808200454e-06, "loss": 0.6951, "step": 17555 }, { "epoch": 0.20990207917359127, "grad_norm": 1.5546162128448486, "learning_rate": 9.175191558583365e-06, "loss": 0.5349, "step": 17556 }, { "epoch": 0.20991403531845193, "grad_norm": 2.481458902359009, "learning_rate": 9.175085028901474e-06, "loss": 0.5624, "step": 17557 }, { "epoch": 0.20992599146331256, "grad_norm": 2.287600040435791, "learning_rate": 9.174978492959027e-06, "loss": 0.6484, "step": 17558 }, { "epoch": 0.20993794760817322, "grad_norm": 2.2492117881774902, "learning_rate": 9.174871950756185e-06, "loss": 0.6329, "step": 17559 }, { "epoch": 0.20994990375303388, "grad_norm": 3.9311442375183105, "learning_rate": 9.17476540229311e-06, "loss": 0.6085, "step": 17560 }, { "epoch": 0.2099618598978945, "grad_norm": 1.7826159000396729, "learning_rate": 9.174658847569958e-06, "loss": 0.6665, "step": 17561 }, { "epoch": 0.20997381604275517, "grad_norm": 2.3196966648101807, "learning_rate": 9.174552286586891e-06, "loss": 0.5786, "step": 17562 }, { "epoch": 0.20998577218761583, "grad_norm": 3.0548815727233887, "learning_rate": 9.174445719344067e-06, "loss": 0.5759, "step": 17563 }, { "epoch": 0.20999772833247649, "grad_norm": 3.171147584915161, "learning_rate": 9.174339145841646e-06, "loss": 0.5646, "step": 17564 }, { "epoch": 0.21000968447733712, "grad_norm": 2.006272077560425, "learning_rate": 9.17423256607979e-06, "loss": 0.5777, "step": 17565 }, { "epoch": 0.21002164062219777, "grad_norm": 1.921044945716858, "learning_rate": 9.174125980058658e-06, "loss": 0.6808, "step": 17566 }, { "epoch": 0.21003359676705843, "grad_norm": 1.5102266073226929, "learning_rate": 9.174019387778408e-06, "loss": 0.5704, "step": 17567 }, { "epoch": 0.2100455529119191, "grad_norm": 4.903679847717285, "learning_rate": 9.173912789239201e-06, "loss": 0.6978, "step": 17568 }, { "epoch": 0.21005750905677972, "grad_norm": 1.8895155191421509, "learning_rate": 9.173806184441199e-06, "loss": 0.6472, "step": 17569 }, { "epoch": 0.21006946520164038, "grad_norm": 1.597777247428894, "learning_rate": 9.173699573384558e-06, "loss": 0.6498, "step": 17570 }, { "epoch": 0.21008142134650104, "grad_norm": 2.7078394889831543, "learning_rate": 9.17359295606944e-06, "loss": 0.6294, "step": 17571 }, { "epoch": 0.2100933774913617, "grad_norm": 3.751359462738037, "learning_rate": 9.173486332496005e-06, "loss": 0.5561, "step": 17572 }, { "epoch": 0.21010533363622233, "grad_norm": 3.688961982727051, "learning_rate": 9.173379702664411e-06, "loss": 0.636, "step": 17573 }, { "epoch": 0.210117289781083, "grad_norm": 3.7346136569976807, "learning_rate": 9.17327306657482e-06, "loss": 0.5502, "step": 17574 }, { "epoch": 0.21012924592594365, "grad_norm": 2.1029858589172363, "learning_rate": 9.173166424227391e-06, "loss": 0.5656, "step": 17575 }, { "epoch": 0.21014120207080428, "grad_norm": 2.563206911087036, "learning_rate": 9.173059775622284e-06, "loss": 0.6422, "step": 17576 }, { "epoch": 0.21015315821566494, "grad_norm": 6.703350067138672, "learning_rate": 9.17295312075966e-06, "loss": 0.6352, "step": 17577 }, { "epoch": 0.2101651143605256, "grad_norm": 1.856094479560852, "learning_rate": 9.172846459639676e-06, "loss": 0.7007, "step": 17578 }, { "epoch": 0.21017707050538625, "grad_norm": 3.7573115825653076, "learning_rate": 9.172739792262497e-06, "loss": 0.642, "step": 17579 }, { "epoch": 0.21018902665024689, "grad_norm": 1.8737033605575562, "learning_rate": 9.172633118628278e-06, "loss": 0.6684, "step": 17580 }, { "epoch": 0.21020098279510754, "grad_norm": 14.573920249938965, "learning_rate": 9.172526438737181e-06, "loss": 0.6072, "step": 17581 }, { "epoch": 0.2102129389399682, "grad_norm": 2.012281656265259, "learning_rate": 9.172419752589366e-06, "loss": 0.6782, "step": 17582 }, { "epoch": 0.21022489508482886, "grad_norm": 3.25555419921875, "learning_rate": 9.172313060184992e-06, "loss": 0.5929, "step": 17583 }, { "epoch": 0.2102368512296895, "grad_norm": 1.6790728569030762, "learning_rate": 9.17220636152422e-06, "loss": 0.6138, "step": 17584 }, { "epoch": 0.21024880737455015, "grad_norm": 1.760793924331665, "learning_rate": 9.17209965660721e-06, "loss": 0.5431, "step": 17585 }, { "epoch": 0.2102607635194108, "grad_norm": 2.8211352825164795, "learning_rate": 9.171992945434123e-06, "loss": 0.6109, "step": 17586 }, { "epoch": 0.21027271966427144, "grad_norm": 1.9561203718185425, "learning_rate": 9.171886228005116e-06, "loss": 0.6122, "step": 17587 }, { "epoch": 0.2102846758091321, "grad_norm": 1.682559847831726, "learning_rate": 9.17177950432035e-06, "loss": 0.605, "step": 17588 }, { "epoch": 0.21029663195399276, "grad_norm": 1.9342317581176758, "learning_rate": 9.171672774379988e-06, "loss": 0.5888, "step": 17589 }, { "epoch": 0.21030858809885342, "grad_norm": 1.4794152975082397, "learning_rate": 9.171566038184187e-06, "loss": 0.6303, "step": 17590 }, { "epoch": 0.21032054424371405, "grad_norm": 1.927212119102478, "learning_rate": 9.171459295733107e-06, "loss": 0.6722, "step": 17591 }, { "epoch": 0.2103325003885747, "grad_norm": 1.3807803392410278, "learning_rate": 9.17135254702691e-06, "loss": 0.6108, "step": 17592 }, { "epoch": 0.21034445653343536, "grad_norm": 2.169782876968384, "learning_rate": 9.171245792065755e-06, "loss": 0.6027, "step": 17593 }, { "epoch": 0.21035641267829602, "grad_norm": 3.784658432006836, "learning_rate": 9.171139030849803e-06, "loss": 0.5607, "step": 17594 }, { "epoch": 0.21036836882315665, "grad_norm": 1.4169992208480835, "learning_rate": 9.171032263379211e-06, "loss": 0.5214, "step": 17595 }, { "epoch": 0.2103803249680173, "grad_norm": 1.5269207954406738, "learning_rate": 9.170925489654143e-06, "loss": 0.5993, "step": 17596 }, { "epoch": 0.21039228111287797, "grad_norm": 2.6751790046691895, "learning_rate": 9.170818709674755e-06, "loss": 0.5744, "step": 17597 }, { "epoch": 0.2104042372577386, "grad_norm": 11.880949020385742, "learning_rate": 9.170711923441213e-06, "loss": 0.5838, "step": 17598 }, { "epoch": 0.21041619340259926, "grad_norm": 1.731540560722351, "learning_rate": 9.17060513095367e-06, "loss": 0.5692, "step": 17599 }, { "epoch": 0.21042814954745992, "grad_norm": 2.04612135887146, "learning_rate": 9.170498332212294e-06, "loss": 0.6518, "step": 17600 }, { "epoch": 0.21044010569232058, "grad_norm": 1.5770834684371948, "learning_rate": 9.170391527217238e-06, "loss": 0.5032, "step": 17601 }, { "epoch": 0.2104520618371812, "grad_norm": 2.507282018661499, "learning_rate": 9.170284715968667e-06, "loss": 0.5839, "step": 17602 }, { "epoch": 0.21046401798204187, "grad_norm": 2.445672035217285, "learning_rate": 9.170177898466737e-06, "loss": 0.6484, "step": 17603 }, { "epoch": 0.21047597412690253, "grad_norm": 1.7111581563949585, "learning_rate": 9.170071074711612e-06, "loss": 0.6335, "step": 17604 }, { "epoch": 0.21048793027176319, "grad_norm": 2.126330614089966, "learning_rate": 9.169964244703448e-06, "loss": 0.5259, "step": 17605 }, { "epoch": 0.21049988641662382, "grad_norm": 2.1662137508392334, "learning_rate": 9.16985740844241e-06, "loss": 0.712, "step": 17606 }, { "epoch": 0.21051184256148447, "grad_norm": 2.543591022491455, "learning_rate": 9.169750565928656e-06, "loss": 0.6799, "step": 17607 }, { "epoch": 0.21052379870634513, "grad_norm": 5.527616500854492, "learning_rate": 9.169643717162346e-06, "loss": 0.5937, "step": 17608 }, { "epoch": 0.21053575485120576, "grad_norm": 1.1525923013687134, "learning_rate": 9.169536862143642e-06, "loss": 0.5608, "step": 17609 }, { "epoch": 0.21054771099606642, "grad_norm": 1.6038886308670044, "learning_rate": 9.1694300008727e-06, "loss": 0.587, "step": 17610 }, { "epoch": 0.21055966714092708, "grad_norm": 1.5012109279632568, "learning_rate": 9.169323133349684e-06, "loss": 0.6496, "step": 17611 }, { "epoch": 0.21057162328578774, "grad_norm": 2.314256191253662, "learning_rate": 9.169216259574753e-06, "loss": 0.6048, "step": 17612 }, { "epoch": 0.21058357943064837, "grad_norm": 1.841090440750122, "learning_rate": 9.169109379548069e-06, "loss": 0.6583, "step": 17613 }, { "epoch": 0.21059553557550903, "grad_norm": 2.767420768737793, "learning_rate": 9.169002493269788e-06, "loss": 0.5911, "step": 17614 }, { "epoch": 0.2106074917203697, "grad_norm": 2.198937177658081, "learning_rate": 9.168895600740074e-06, "loss": 0.6087, "step": 17615 }, { "epoch": 0.21061944786523035, "grad_norm": 3.139739751815796, "learning_rate": 9.168788701959087e-06, "loss": 0.5512, "step": 17616 }, { "epoch": 0.21063140401009098, "grad_norm": 2.2274138927459717, "learning_rate": 9.168681796926987e-06, "loss": 0.6312, "step": 17617 }, { "epoch": 0.21064336015495164, "grad_norm": 1.7101678848266602, "learning_rate": 9.16857488564393e-06, "loss": 0.6202, "step": 17618 }, { "epoch": 0.2106553162998123, "grad_norm": 1.5846703052520752, "learning_rate": 9.168467968110085e-06, "loss": 0.6064, "step": 17619 }, { "epoch": 0.21066727244467293, "grad_norm": 1.5347601175308228, "learning_rate": 9.168361044325604e-06, "loss": 0.6379, "step": 17620 }, { "epoch": 0.21067922858953358, "grad_norm": 1.3930530548095703, "learning_rate": 9.168254114290653e-06, "loss": 0.6713, "step": 17621 }, { "epoch": 0.21069118473439424, "grad_norm": 1.5481442213058472, "learning_rate": 9.168147178005388e-06, "loss": 0.5564, "step": 17622 }, { "epoch": 0.2107031408792549, "grad_norm": 1.9396480321884155, "learning_rate": 9.168040235469974e-06, "loss": 0.627, "step": 17623 }, { "epoch": 0.21071509702411553, "grad_norm": 1.531176209449768, "learning_rate": 9.167933286684567e-06, "loss": 0.643, "step": 17624 }, { "epoch": 0.2107270531689762, "grad_norm": 2.7429027557373047, "learning_rate": 9.16782633164933e-06, "loss": 0.5479, "step": 17625 }, { "epoch": 0.21073900931383685, "grad_norm": 1.7713561058044434, "learning_rate": 9.167719370364423e-06, "loss": 0.5894, "step": 17626 }, { "epoch": 0.2107509654586975, "grad_norm": 2.378448486328125, "learning_rate": 9.167612402830006e-06, "loss": 0.752, "step": 17627 }, { "epoch": 0.21076292160355814, "grad_norm": 2.2396304607391357, "learning_rate": 9.167505429046237e-06, "loss": 0.6406, "step": 17628 }, { "epoch": 0.2107748777484188, "grad_norm": 1.396596908569336, "learning_rate": 9.167398449013281e-06, "loss": 0.5744, "step": 17629 }, { "epoch": 0.21078683389327946, "grad_norm": 2.0918586254119873, "learning_rate": 9.167291462731295e-06, "loss": 0.6457, "step": 17630 }, { "epoch": 0.21079879003814012, "grad_norm": 1.8074957132339478, "learning_rate": 9.167184470200443e-06, "loss": 0.6093, "step": 17631 }, { "epoch": 0.21081074618300075, "grad_norm": 4.21311616897583, "learning_rate": 9.16707747142088e-06, "loss": 0.6226, "step": 17632 }, { "epoch": 0.2108227023278614, "grad_norm": 2.103187084197998, "learning_rate": 9.166970466392773e-06, "loss": 0.6745, "step": 17633 }, { "epoch": 0.21083465847272206, "grad_norm": 3.3672704696655273, "learning_rate": 9.166863455116277e-06, "loss": 0.5722, "step": 17634 }, { "epoch": 0.2108466146175827, "grad_norm": 2.135359525680542, "learning_rate": 9.166756437591554e-06, "loss": 0.6997, "step": 17635 }, { "epoch": 0.21085857076244335, "grad_norm": 1.7051715850830078, "learning_rate": 9.166649413818765e-06, "loss": 0.6754, "step": 17636 }, { "epoch": 0.210870526907304, "grad_norm": 1.3551453351974487, "learning_rate": 9.16654238379807e-06, "loss": 0.7103, "step": 17637 }, { "epoch": 0.21088248305216467, "grad_norm": 2.501983165740967, "learning_rate": 9.166435347529632e-06, "loss": 0.5885, "step": 17638 }, { "epoch": 0.2108944391970253, "grad_norm": 1.9010404348373413, "learning_rate": 9.166328305013607e-06, "loss": 0.6856, "step": 17639 }, { "epoch": 0.21090639534188596, "grad_norm": 1.937076210975647, "learning_rate": 9.166221256250158e-06, "loss": 0.6895, "step": 17640 }, { "epoch": 0.21091835148674662, "grad_norm": 3.114682912826538, "learning_rate": 9.166114201239448e-06, "loss": 0.614, "step": 17641 }, { "epoch": 0.21093030763160728, "grad_norm": 1.8238686323165894, "learning_rate": 9.166007139981631e-06, "loss": 0.6709, "step": 17642 }, { "epoch": 0.2109422637764679, "grad_norm": 2.31777024269104, "learning_rate": 9.165900072476875e-06, "loss": 0.6948, "step": 17643 }, { "epoch": 0.21095421992132857, "grad_norm": 1.7492369413375854, "learning_rate": 9.165792998725336e-06, "loss": 0.6857, "step": 17644 }, { "epoch": 0.21096617606618923, "grad_norm": 3.3677940368652344, "learning_rate": 9.165685918727176e-06, "loss": 0.6707, "step": 17645 }, { "epoch": 0.21097813221104986, "grad_norm": 2.2683873176574707, "learning_rate": 9.165578832482553e-06, "loss": 0.5796, "step": 17646 }, { "epoch": 0.21099008835591052, "grad_norm": 1.4730385541915894, "learning_rate": 9.165471739991633e-06, "loss": 0.6169, "step": 17647 }, { "epoch": 0.21100204450077117, "grad_norm": 2.0586166381835938, "learning_rate": 9.16536464125457e-06, "loss": 0.6741, "step": 17648 }, { "epoch": 0.21101400064563183, "grad_norm": 1.5313140153884888, "learning_rate": 9.16525753627153e-06, "loss": 0.5717, "step": 17649 }, { "epoch": 0.21102595679049246, "grad_norm": 1.8395123481750488, "learning_rate": 9.165150425042671e-06, "loss": 0.6044, "step": 17650 }, { "epoch": 0.21103791293535312, "grad_norm": 1.6538386344909668, "learning_rate": 9.165043307568155e-06, "loss": 0.576, "step": 17651 }, { "epoch": 0.21104986908021378, "grad_norm": 1.8216288089752197, "learning_rate": 9.164936183848141e-06, "loss": 0.5281, "step": 17652 }, { "epoch": 0.21106182522507444, "grad_norm": 5.711012840270996, "learning_rate": 9.16482905388279e-06, "loss": 0.5861, "step": 17653 }, { "epoch": 0.21107378136993507, "grad_norm": 1.872275948524475, "learning_rate": 9.164721917672264e-06, "loss": 0.5445, "step": 17654 }, { "epoch": 0.21108573751479573, "grad_norm": 2.082197666168213, "learning_rate": 9.164614775216722e-06, "loss": 0.6416, "step": 17655 }, { "epoch": 0.2110976936596564, "grad_norm": 3.4738831520080566, "learning_rate": 9.164507626516325e-06, "loss": 0.6705, "step": 17656 }, { "epoch": 0.21110964980451702, "grad_norm": 1.792452096939087, "learning_rate": 9.164400471571234e-06, "loss": 0.7001, "step": 17657 }, { "epoch": 0.21112160594937768, "grad_norm": 1.8177286386489868, "learning_rate": 9.16429331038161e-06, "loss": 0.5585, "step": 17658 }, { "epoch": 0.21113356209423834, "grad_norm": 1.8700789213180542, "learning_rate": 9.164186142947612e-06, "loss": 0.6038, "step": 17659 }, { "epoch": 0.211145518239099, "grad_norm": 2.0601375102996826, "learning_rate": 9.164078969269406e-06, "loss": 0.6248, "step": 17660 }, { "epoch": 0.21115747438395963, "grad_norm": 4.28642463684082, "learning_rate": 9.163971789347145e-06, "loss": 0.5883, "step": 17661 }, { "epoch": 0.21116943052882028, "grad_norm": 4.335712432861328, "learning_rate": 9.163864603180995e-06, "loss": 0.6732, "step": 17662 }, { "epoch": 0.21118138667368094, "grad_norm": 1.5668370723724365, "learning_rate": 9.163757410771114e-06, "loss": 0.6601, "step": 17663 }, { "epoch": 0.2111933428185416, "grad_norm": 2.3476943969726562, "learning_rate": 9.163650212117667e-06, "loss": 0.6452, "step": 17664 }, { "epoch": 0.21120529896340223, "grad_norm": 4.57767915725708, "learning_rate": 9.163543007220809e-06, "loss": 0.7844, "step": 17665 }, { "epoch": 0.2112172551082629, "grad_norm": 1.8599718809127808, "learning_rate": 9.163435796080704e-06, "loss": 0.6486, "step": 17666 }, { "epoch": 0.21122921125312355, "grad_norm": 2.576111078262329, "learning_rate": 9.163328578697513e-06, "loss": 0.7058, "step": 17667 }, { "epoch": 0.21124116739798418, "grad_norm": 1.8278509378433228, "learning_rate": 9.163221355071395e-06, "loss": 0.7524, "step": 17668 }, { "epoch": 0.21125312354284484, "grad_norm": 1.8922477960586548, "learning_rate": 9.163114125202512e-06, "loss": 0.6397, "step": 17669 }, { "epoch": 0.2112650796877055, "grad_norm": 2.17753529548645, "learning_rate": 9.163006889091025e-06, "loss": 0.6289, "step": 17670 }, { "epoch": 0.21127703583256616, "grad_norm": 2.164224863052368, "learning_rate": 9.162899646737093e-06, "loss": 0.5461, "step": 17671 }, { "epoch": 0.2112889919774268, "grad_norm": 4.595396518707275, "learning_rate": 9.16279239814088e-06, "loss": 0.6502, "step": 17672 }, { "epoch": 0.21130094812228745, "grad_norm": 1.6629090309143066, "learning_rate": 9.162685143302544e-06, "loss": 0.6443, "step": 17673 }, { "epoch": 0.2113129042671481, "grad_norm": 1.5686792135238647, "learning_rate": 9.162577882222248e-06, "loss": 0.6699, "step": 17674 }, { "epoch": 0.21132486041200876, "grad_norm": 2.2550482749938965, "learning_rate": 9.162470614900152e-06, "loss": 0.6708, "step": 17675 }, { "epoch": 0.2113368165568694, "grad_norm": 3.1857500076293945, "learning_rate": 9.162363341336413e-06, "loss": 0.5914, "step": 17676 }, { "epoch": 0.21134877270173005, "grad_norm": 1.587127923965454, "learning_rate": 9.162256061531199e-06, "loss": 0.6014, "step": 17677 }, { "epoch": 0.2113607288465907, "grad_norm": 2.4080958366394043, "learning_rate": 9.162148775484665e-06, "loss": 0.6246, "step": 17678 }, { "epoch": 0.21137268499145134, "grad_norm": 1.771231770515442, "learning_rate": 9.162041483196975e-06, "loss": 0.5996, "step": 17679 }, { "epoch": 0.211384641136312, "grad_norm": 2.5968706607818604, "learning_rate": 9.16193418466829e-06, "loss": 0.5838, "step": 17680 }, { "epoch": 0.21139659728117266, "grad_norm": 3.033677101135254, "learning_rate": 9.161826879898768e-06, "loss": 0.6377, "step": 17681 }, { "epoch": 0.21140855342603332, "grad_norm": 3.2095189094543457, "learning_rate": 9.161719568888574e-06, "loss": 0.6416, "step": 17682 }, { "epoch": 0.21142050957089395, "grad_norm": 2.475659132003784, "learning_rate": 9.161612251637864e-06, "loss": 0.5952, "step": 17683 }, { "epoch": 0.2114324657157546, "grad_norm": 4.265390872955322, "learning_rate": 9.161504928146804e-06, "loss": 0.7425, "step": 17684 }, { "epoch": 0.21144442186061527, "grad_norm": 2.51516056060791, "learning_rate": 9.161397598415552e-06, "loss": 0.6335, "step": 17685 }, { "epoch": 0.21145637800547593, "grad_norm": 5.960746765136719, "learning_rate": 9.161290262444269e-06, "loss": 0.6005, "step": 17686 }, { "epoch": 0.21146833415033656, "grad_norm": 1.778815746307373, "learning_rate": 9.161182920233117e-06, "loss": 0.613, "step": 17687 }, { "epoch": 0.21148029029519722, "grad_norm": 2.822629451751709, "learning_rate": 9.161075571782255e-06, "loss": 0.5698, "step": 17688 }, { "epoch": 0.21149224644005787, "grad_norm": 2.485081195831299, "learning_rate": 9.160968217091847e-06, "loss": 0.6432, "step": 17689 }, { "epoch": 0.21150420258491853, "grad_norm": 3.1402761936187744, "learning_rate": 9.160860856162051e-06, "loss": 0.5687, "step": 17690 }, { "epoch": 0.21151615872977916, "grad_norm": 1.780867338180542, "learning_rate": 9.16075348899303e-06, "loss": 0.6346, "step": 17691 }, { "epoch": 0.21152811487463982, "grad_norm": 2.924938917160034, "learning_rate": 9.160646115584946e-06, "loss": 0.7022, "step": 17692 }, { "epoch": 0.21154007101950048, "grad_norm": 1.9814406633377075, "learning_rate": 9.160538735937956e-06, "loss": 0.7499, "step": 17693 }, { "epoch": 0.2115520271643611, "grad_norm": 2.2230520248413086, "learning_rate": 9.160431350052223e-06, "loss": 0.5491, "step": 17694 }, { "epoch": 0.21156398330922177, "grad_norm": 1.9045087099075317, "learning_rate": 9.16032395792791e-06, "loss": 0.5203, "step": 17695 }, { "epoch": 0.21157593945408243, "grad_norm": 2.6685054302215576, "learning_rate": 9.160216559565177e-06, "loss": 0.6299, "step": 17696 }, { "epoch": 0.2115878955989431, "grad_norm": 2.3833677768707275, "learning_rate": 9.160109154964183e-06, "loss": 0.5805, "step": 17697 }, { "epoch": 0.21159985174380372, "grad_norm": 1.6996607780456543, "learning_rate": 9.160001744125091e-06, "loss": 0.6123, "step": 17698 }, { "epoch": 0.21161180788866438, "grad_norm": 1.666943073272705, "learning_rate": 9.159894327048063e-06, "loss": 0.6259, "step": 17699 }, { "epoch": 0.21162376403352504, "grad_norm": 1.9090272188186646, "learning_rate": 9.159786903733256e-06, "loss": 0.5989, "step": 17700 }, { "epoch": 0.2116357201783857, "grad_norm": 3.0906007289886475, "learning_rate": 9.159679474180835e-06, "loss": 0.6846, "step": 17701 }, { "epoch": 0.21164767632324633, "grad_norm": 3.955980062484741, "learning_rate": 9.15957203839096e-06, "loss": 0.6305, "step": 17702 }, { "epoch": 0.21165963246810698, "grad_norm": 3.3674049377441406, "learning_rate": 9.159464596363791e-06, "loss": 0.616, "step": 17703 }, { "epoch": 0.21167158861296764, "grad_norm": 3.572744369506836, "learning_rate": 9.159357148099492e-06, "loss": 0.6931, "step": 17704 }, { "epoch": 0.21168354475782827, "grad_norm": 1.879884123802185, "learning_rate": 9.159249693598222e-06, "loss": 0.5797, "step": 17705 }, { "epoch": 0.21169550090268893, "grad_norm": 1.8994630575180054, "learning_rate": 9.159142232860142e-06, "loss": 0.6272, "step": 17706 }, { "epoch": 0.2117074570475496, "grad_norm": 2.351334810256958, "learning_rate": 9.159034765885413e-06, "loss": 0.5443, "step": 17707 }, { "epoch": 0.21171941319241025, "grad_norm": 6.260842323303223, "learning_rate": 9.158927292674195e-06, "loss": 0.6488, "step": 17708 }, { "epoch": 0.21173136933727088, "grad_norm": 2.1401925086975098, "learning_rate": 9.158819813226654e-06, "loss": 0.6243, "step": 17709 }, { "epoch": 0.21174332548213154, "grad_norm": 3.2998764514923096, "learning_rate": 9.158712327542945e-06, "loss": 0.564, "step": 17710 }, { "epoch": 0.2117552816269922, "grad_norm": 4.477040767669678, "learning_rate": 9.158604835623232e-06, "loss": 0.6267, "step": 17711 }, { "epoch": 0.21176723777185286, "grad_norm": 8.69511604309082, "learning_rate": 9.15849733746768e-06, "loss": 0.6807, "step": 17712 }, { "epoch": 0.2117791939167135, "grad_norm": 2.110642194747925, "learning_rate": 9.158389833076441e-06, "loss": 0.6623, "step": 17713 }, { "epoch": 0.21179115006157415, "grad_norm": 2.196424722671509, "learning_rate": 9.158282322449686e-06, "loss": 0.5971, "step": 17714 }, { "epoch": 0.2118031062064348, "grad_norm": 3.4912455081939697, "learning_rate": 9.15817480558757e-06, "loss": 0.6686, "step": 17715 }, { "epoch": 0.21181506235129544, "grad_norm": 3.2308197021484375, "learning_rate": 9.158067282490258e-06, "loss": 0.5269, "step": 17716 }, { "epoch": 0.2118270184961561, "grad_norm": 10.082273483276367, "learning_rate": 9.157959753157908e-06, "loss": 0.6372, "step": 17717 }, { "epoch": 0.21183897464101675, "grad_norm": 2.3552048206329346, "learning_rate": 9.157852217590682e-06, "loss": 0.6162, "step": 17718 }, { "epoch": 0.2118509307858774, "grad_norm": 2.585494041442871, "learning_rate": 9.157744675788743e-06, "loss": 0.6319, "step": 17719 }, { "epoch": 0.21186288693073804, "grad_norm": 2.6272943019866943, "learning_rate": 9.15763712775225e-06, "loss": 0.6041, "step": 17720 }, { "epoch": 0.2118748430755987, "grad_norm": 3.0294365882873535, "learning_rate": 9.157529573481366e-06, "loss": 0.5783, "step": 17721 }, { "epoch": 0.21188679922045936, "grad_norm": 2.03674054145813, "learning_rate": 9.15742201297625e-06, "loss": 0.6797, "step": 17722 }, { "epoch": 0.21189875536532002, "grad_norm": 4.121866226196289, "learning_rate": 9.157314446237066e-06, "loss": 0.6072, "step": 17723 }, { "epoch": 0.21191071151018065, "grad_norm": 1.826974630355835, "learning_rate": 9.157206873263974e-06, "loss": 0.5132, "step": 17724 }, { "epoch": 0.2119226676550413, "grad_norm": 1.9087411165237427, "learning_rate": 9.157099294057137e-06, "loss": 0.6269, "step": 17725 }, { "epoch": 0.21193462379990197, "grad_norm": 3.7101893424987793, "learning_rate": 9.156991708616714e-06, "loss": 0.6695, "step": 17726 }, { "epoch": 0.2119465799447626, "grad_norm": 1.909894347190857, "learning_rate": 9.156884116942866e-06, "loss": 0.6314, "step": 17727 }, { "epoch": 0.21195853608962326, "grad_norm": 3.624821186065674, "learning_rate": 9.156776519035757e-06, "loss": 0.6156, "step": 17728 }, { "epoch": 0.21197049223448391, "grad_norm": 2.38267183303833, "learning_rate": 9.156668914895546e-06, "loss": 0.5994, "step": 17729 }, { "epoch": 0.21198244837934457, "grad_norm": 13.89744758605957, "learning_rate": 9.156561304522395e-06, "loss": 0.5243, "step": 17730 }, { "epoch": 0.2119944045242052, "grad_norm": 2.9890291690826416, "learning_rate": 9.156453687916467e-06, "loss": 0.5335, "step": 17731 }, { "epoch": 0.21200636066906586, "grad_norm": 3.5420663356781006, "learning_rate": 9.15634606507792e-06, "loss": 0.6411, "step": 17732 }, { "epoch": 0.21201831681392652, "grad_norm": 1.8764276504516602, "learning_rate": 9.15623843600692e-06, "loss": 0.6103, "step": 17733 }, { "epoch": 0.21203027295878718, "grad_norm": 2.586648464202881, "learning_rate": 9.156130800703624e-06, "loss": 0.5834, "step": 17734 }, { "epoch": 0.2120422291036478, "grad_norm": 2.641963243484497, "learning_rate": 9.156023159168195e-06, "loss": 0.6518, "step": 17735 }, { "epoch": 0.21205418524850847, "grad_norm": 6.607733726501465, "learning_rate": 9.155915511400793e-06, "loss": 0.6191, "step": 17736 }, { "epoch": 0.21206614139336913, "grad_norm": 2.854276657104492, "learning_rate": 9.155807857401583e-06, "loss": 0.5822, "step": 17737 }, { "epoch": 0.21207809753822976, "grad_norm": 2.9042296409606934, "learning_rate": 9.155700197170725e-06, "loss": 0.5026, "step": 17738 }, { "epoch": 0.21209005368309042, "grad_norm": 2.326547384262085, "learning_rate": 9.155592530708377e-06, "loss": 0.7449, "step": 17739 }, { "epoch": 0.21210200982795108, "grad_norm": 4.090002536773682, "learning_rate": 9.155484858014704e-06, "loss": 0.6853, "step": 17740 }, { "epoch": 0.21211396597281174, "grad_norm": 2.55901837348938, "learning_rate": 9.155377179089868e-06, "loss": 0.6316, "step": 17741 }, { "epoch": 0.21212592211767237, "grad_norm": 2.8403730392456055, "learning_rate": 9.15526949393403e-06, "loss": 0.6576, "step": 17742 }, { "epoch": 0.21213787826253303, "grad_norm": 2.089128017425537, "learning_rate": 9.15516180254735e-06, "loss": 0.5674, "step": 17743 }, { "epoch": 0.21214983440739368, "grad_norm": 1.8877123594284058, "learning_rate": 9.155054104929988e-06, "loss": 0.6211, "step": 17744 }, { "epoch": 0.21216179055225434, "grad_norm": 10.304569244384766, "learning_rate": 9.154946401082108e-06, "loss": 0.6864, "step": 17745 }, { "epoch": 0.21217374669711497, "grad_norm": 1.9133039712905884, "learning_rate": 9.154838691003874e-06, "loss": 0.6464, "step": 17746 }, { "epoch": 0.21218570284197563, "grad_norm": 3.5941760540008545, "learning_rate": 9.154730974695442e-06, "loss": 0.6232, "step": 17747 }, { "epoch": 0.2121976589868363, "grad_norm": 3.4573426246643066, "learning_rate": 9.154623252156977e-06, "loss": 0.7682, "step": 17748 }, { "epoch": 0.21220961513169695, "grad_norm": 1.937019944190979, "learning_rate": 9.154515523388639e-06, "loss": 0.6005, "step": 17749 }, { "epoch": 0.21222157127655758, "grad_norm": 3.194774627685547, "learning_rate": 9.15440778839059e-06, "loss": 0.6375, "step": 17750 }, { "epoch": 0.21223352742141824, "grad_norm": 1.963095784187317, "learning_rate": 9.154300047162992e-06, "loss": 0.5566, "step": 17751 }, { "epoch": 0.2122454835662789, "grad_norm": 1.833156704902649, "learning_rate": 9.15419229970601e-06, "loss": 0.5798, "step": 17752 }, { "epoch": 0.21225743971113953, "grad_norm": 3.185882806777954, "learning_rate": 9.154084546019797e-06, "loss": 0.5905, "step": 17753 }, { "epoch": 0.2122693958560002, "grad_norm": 3.070146083831787, "learning_rate": 9.15397678610452e-06, "loss": 0.6201, "step": 17754 }, { "epoch": 0.21228135200086085, "grad_norm": 3.2046284675598145, "learning_rate": 9.153869019960342e-06, "loss": 0.5741, "step": 17755 }, { "epoch": 0.2122933081457215, "grad_norm": 2.5989840030670166, "learning_rate": 9.15376124758742e-06, "loss": 0.5988, "step": 17756 }, { "epoch": 0.21230526429058214, "grad_norm": 2.710263252258301, "learning_rate": 9.153653468985922e-06, "loss": 0.7415, "step": 17757 }, { "epoch": 0.2123172204354428, "grad_norm": 1.9167122840881348, "learning_rate": 9.153545684156004e-06, "loss": 0.6286, "step": 17758 }, { "epoch": 0.21232917658030345, "grad_norm": 2.290473699569702, "learning_rate": 9.15343789309783e-06, "loss": 0.6444, "step": 17759 }, { "epoch": 0.2123411327251641, "grad_norm": 2.3445236682891846, "learning_rate": 9.153330095811559e-06, "loss": 0.6222, "step": 17760 }, { "epoch": 0.21235308887002474, "grad_norm": 2.990593433380127, "learning_rate": 9.153222292297358e-06, "loss": 0.6003, "step": 17761 }, { "epoch": 0.2123650450148854, "grad_norm": 3.2126688957214355, "learning_rate": 9.153114482555384e-06, "loss": 0.706, "step": 17762 }, { "epoch": 0.21237700115974606, "grad_norm": 2.8186490535736084, "learning_rate": 9.153006666585797e-06, "loss": 0.5087, "step": 17763 }, { "epoch": 0.2123889573046067, "grad_norm": 1.5845675468444824, "learning_rate": 9.152898844388766e-06, "loss": 0.5685, "step": 17764 }, { "epoch": 0.21240091344946735, "grad_norm": 1.9725641012191772, "learning_rate": 9.152791015964447e-06, "loss": 0.5835, "step": 17765 }, { "epoch": 0.212412869594328, "grad_norm": 4.426638603210449, "learning_rate": 9.152683181313003e-06, "loss": 0.6991, "step": 17766 }, { "epoch": 0.21242482573918867, "grad_norm": 1.980547308921814, "learning_rate": 9.152575340434596e-06, "loss": 0.5634, "step": 17767 }, { "epoch": 0.2124367818840493, "grad_norm": 1.665727138519287, "learning_rate": 9.152467493329387e-06, "loss": 0.6918, "step": 17768 }, { "epoch": 0.21244873802890996, "grad_norm": 2.159545660018921, "learning_rate": 9.152359639997538e-06, "loss": 0.6914, "step": 17769 }, { "epoch": 0.21246069417377061, "grad_norm": 6.367655277252197, "learning_rate": 9.152251780439212e-06, "loss": 0.6143, "step": 17770 }, { "epoch": 0.21247265031863127, "grad_norm": 1.4900591373443604, "learning_rate": 9.152143914654568e-06, "loss": 0.5685, "step": 17771 }, { "epoch": 0.2124846064634919, "grad_norm": 2.643704414367676, "learning_rate": 9.152036042643772e-06, "loss": 0.5638, "step": 17772 }, { "epoch": 0.21249656260835256, "grad_norm": 2.6428089141845703, "learning_rate": 9.151928164406982e-06, "loss": 0.5697, "step": 17773 }, { "epoch": 0.21250851875321322, "grad_norm": 3.0040481090545654, "learning_rate": 9.15182027994436e-06, "loss": 0.5886, "step": 17774 }, { "epoch": 0.21252047489807385, "grad_norm": 1.9343647956848145, "learning_rate": 9.15171238925607e-06, "loss": 0.6136, "step": 17775 }, { "epoch": 0.2125324310429345, "grad_norm": 1.5598609447479248, "learning_rate": 9.151604492342271e-06, "loss": 0.6012, "step": 17776 }, { "epoch": 0.21254438718779517, "grad_norm": 2.197324752807617, "learning_rate": 9.151496589203128e-06, "loss": 0.6082, "step": 17777 }, { "epoch": 0.21255634333265583, "grad_norm": 2.5994200706481934, "learning_rate": 9.1513886798388e-06, "loss": 0.5634, "step": 17778 }, { "epoch": 0.21256829947751646, "grad_norm": 2.835829496383667, "learning_rate": 9.151280764249451e-06, "loss": 0.5741, "step": 17779 }, { "epoch": 0.21258025562237712, "grad_norm": 2.456329584121704, "learning_rate": 9.15117284243524e-06, "loss": 0.6348, "step": 17780 }, { "epoch": 0.21259221176723778, "grad_norm": 1.765833854675293, "learning_rate": 9.151064914396333e-06, "loss": 0.5574, "step": 17781 }, { "epoch": 0.21260416791209844, "grad_norm": 18.87277603149414, "learning_rate": 9.150956980132888e-06, "loss": 0.6242, "step": 17782 }, { "epoch": 0.21261612405695907, "grad_norm": 4.343791484832764, "learning_rate": 9.150849039645068e-06, "loss": 0.6211, "step": 17783 }, { "epoch": 0.21262808020181972, "grad_norm": 2.623967170715332, "learning_rate": 9.150741092933035e-06, "loss": 0.6064, "step": 17784 }, { "epoch": 0.21264003634668038, "grad_norm": 6.6564717292785645, "learning_rate": 9.150633139996952e-06, "loss": 0.5533, "step": 17785 }, { "epoch": 0.21265199249154101, "grad_norm": 2.207411289215088, "learning_rate": 9.15052518083698e-06, "loss": 0.5903, "step": 17786 }, { "epoch": 0.21266394863640167, "grad_norm": 1.8832597732543945, "learning_rate": 9.15041721545328e-06, "loss": 0.6718, "step": 17787 }, { "epoch": 0.21267590478126233, "grad_norm": 1.6201032400131226, "learning_rate": 9.150309243846014e-06, "loss": 0.6431, "step": 17788 }, { "epoch": 0.212687860926123, "grad_norm": 1.7448350191116333, "learning_rate": 9.150201266015346e-06, "loss": 0.5944, "step": 17789 }, { "epoch": 0.21269981707098362, "grad_norm": 3.3240580558776855, "learning_rate": 9.150093281961434e-06, "loss": 0.6189, "step": 17790 }, { "epoch": 0.21271177321584428, "grad_norm": 2.1420035362243652, "learning_rate": 9.149985291684445e-06, "loss": 0.6072, "step": 17791 }, { "epoch": 0.21272372936070494, "grad_norm": 2.571512460708618, "learning_rate": 9.149877295184537e-06, "loss": 0.6277, "step": 17792 }, { "epoch": 0.2127356855055656, "grad_norm": 2.0739219188690186, "learning_rate": 9.149769292461874e-06, "loss": 0.5579, "step": 17793 }, { "epoch": 0.21274764165042623, "grad_norm": 1.9670906066894531, "learning_rate": 9.149661283516617e-06, "loss": 0.6443, "step": 17794 }, { "epoch": 0.2127595977952869, "grad_norm": 2.039724111557007, "learning_rate": 9.149553268348928e-06, "loss": 0.5927, "step": 17795 }, { "epoch": 0.21277155394014755, "grad_norm": 2.752561569213867, "learning_rate": 9.149445246958968e-06, "loss": 0.6739, "step": 17796 }, { "epoch": 0.21278351008500818, "grad_norm": 3.558819532394409, "learning_rate": 9.149337219346902e-06, "loss": 0.5891, "step": 17797 }, { "epoch": 0.21279546622986883, "grad_norm": 5.5273590087890625, "learning_rate": 9.149229185512888e-06, "loss": 0.5544, "step": 17798 }, { "epoch": 0.2128074223747295, "grad_norm": 2.550008535385132, "learning_rate": 9.149121145457091e-06, "loss": 0.6279, "step": 17799 }, { "epoch": 0.21281937851959015, "grad_norm": 3.60802960395813, "learning_rate": 9.149013099179673e-06, "loss": 0.5608, "step": 17800 }, { "epoch": 0.21283133466445078, "grad_norm": 2.0378661155700684, "learning_rate": 9.148905046680796e-06, "loss": 0.5639, "step": 17801 }, { "epoch": 0.21284329080931144, "grad_norm": 12.418354988098145, "learning_rate": 9.148796987960619e-06, "loss": 0.5951, "step": 17802 }, { "epoch": 0.2128552469541721, "grad_norm": 2.1398205757141113, "learning_rate": 9.148688923019307e-06, "loss": 0.4747, "step": 17803 }, { "epoch": 0.21286720309903276, "grad_norm": 2.043510913848877, "learning_rate": 9.148580851857021e-06, "loss": 0.6883, "step": 17804 }, { "epoch": 0.2128791592438934, "grad_norm": 2.059947967529297, "learning_rate": 9.148472774473924e-06, "loss": 0.7466, "step": 17805 }, { "epoch": 0.21289111538875405, "grad_norm": 5.188560485839844, "learning_rate": 9.148364690870176e-06, "loss": 0.6085, "step": 17806 }, { "epoch": 0.2129030715336147, "grad_norm": 2.508059024810791, "learning_rate": 9.148256601045941e-06, "loss": 0.5437, "step": 17807 }, { "epoch": 0.21291502767847537, "grad_norm": 1.905380129814148, "learning_rate": 9.14814850500138e-06, "loss": 0.6598, "step": 17808 }, { "epoch": 0.212926983823336, "grad_norm": 2.4098944664001465, "learning_rate": 9.148040402736657e-06, "loss": 0.5727, "step": 17809 }, { "epoch": 0.21293893996819666, "grad_norm": 3.6859917640686035, "learning_rate": 9.147932294251931e-06, "loss": 0.6694, "step": 17810 }, { "epoch": 0.21295089611305731, "grad_norm": 1.9619600772857666, "learning_rate": 9.147824179547366e-06, "loss": 0.6139, "step": 17811 }, { "epoch": 0.21296285225791795, "grad_norm": 2.2859950065612793, "learning_rate": 9.147716058623126e-06, "loss": 0.6179, "step": 17812 }, { "epoch": 0.2129748084027786, "grad_norm": 2.522559404373169, "learning_rate": 9.147607931479369e-06, "loss": 0.4828, "step": 17813 }, { "epoch": 0.21298676454763926, "grad_norm": 1.6053417921066284, "learning_rate": 9.14749979811626e-06, "loss": 0.5972, "step": 17814 }, { "epoch": 0.21299872069249992, "grad_norm": 9.960156440734863, "learning_rate": 9.14739165853396e-06, "loss": 0.7588, "step": 17815 }, { "epoch": 0.21301067683736055, "grad_norm": 6.733628749847412, "learning_rate": 9.147283512732631e-06, "loss": 0.5858, "step": 17816 }, { "epoch": 0.2130226329822212, "grad_norm": 3.25069522857666, "learning_rate": 9.147175360712436e-06, "loss": 0.5749, "step": 17817 }, { "epoch": 0.21303458912708187, "grad_norm": 2.591728925704956, "learning_rate": 9.147067202473537e-06, "loss": 0.5592, "step": 17818 }, { "epoch": 0.21304654527194253, "grad_norm": 2.4892406463623047, "learning_rate": 9.146959038016096e-06, "loss": 0.717, "step": 17819 }, { "epoch": 0.21305850141680316, "grad_norm": 2.276784896850586, "learning_rate": 9.146850867340275e-06, "loss": 0.6158, "step": 17820 }, { "epoch": 0.21307045756166382, "grad_norm": 3.5607986450195312, "learning_rate": 9.146742690446235e-06, "loss": 0.6821, "step": 17821 }, { "epoch": 0.21308241370652448, "grad_norm": 16.359262466430664, "learning_rate": 9.146634507334142e-06, "loss": 0.5874, "step": 17822 }, { "epoch": 0.2130943698513851, "grad_norm": 1.4511592388153076, "learning_rate": 9.146526318004153e-06, "loss": 0.6226, "step": 17823 }, { "epoch": 0.21310632599624577, "grad_norm": 2.764678955078125, "learning_rate": 9.146418122456438e-06, "loss": 0.5866, "step": 17824 }, { "epoch": 0.21311828214110642, "grad_norm": 2.966097593307495, "learning_rate": 9.14630992069115e-06, "loss": 0.6045, "step": 17825 }, { "epoch": 0.21313023828596708, "grad_norm": 1.9631189107894897, "learning_rate": 9.146201712708455e-06, "loss": 0.6348, "step": 17826 }, { "epoch": 0.21314219443082771, "grad_norm": 1.8649787902832031, "learning_rate": 9.14609349850852e-06, "loss": 0.6635, "step": 17827 }, { "epoch": 0.21315415057568837, "grad_norm": 1.63217294216156, "learning_rate": 9.1459852780915e-06, "loss": 0.6681, "step": 17828 }, { "epoch": 0.21316610672054903, "grad_norm": 3.0777535438537598, "learning_rate": 9.145877051457562e-06, "loss": 0.5987, "step": 17829 }, { "epoch": 0.2131780628654097, "grad_norm": 1.8889740705490112, "learning_rate": 9.145768818606864e-06, "loss": 0.6579, "step": 17830 }, { "epoch": 0.21319001901027032, "grad_norm": 1.9360960721969604, "learning_rate": 9.145660579539573e-06, "loss": 0.6503, "step": 17831 }, { "epoch": 0.21320197515513098, "grad_norm": 1.6352883577346802, "learning_rate": 9.145552334255848e-06, "loss": 0.6052, "step": 17832 }, { "epoch": 0.21321393129999164, "grad_norm": 4.9185261726379395, "learning_rate": 9.145444082755853e-06, "loss": 0.6098, "step": 17833 }, { "epoch": 0.21322588744485227, "grad_norm": 2.2817797660827637, "learning_rate": 9.14533582503975e-06, "loss": 0.6631, "step": 17834 }, { "epoch": 0.21323784358971293, "grad_norm": 1.9034430980682373, "learning_rate": 9.145227561107702e-06, "loss": 0.6179, "step": 17835 }, { "epoch": 0.2132497997345736, "grad_norm": 2.0153329372406006, "learning_rate": 9.14511929095987e-06, "loss": 0.6215, "step": 17836 }, { "epoch": 0.21326175587943424, "grad_norm": 2.071658134460449, "learning_rate": 9.145011014596417e-06, "loss": 0.5992, "step": 17837 }, { "epoch": 0.21327371202429488, "grad_norm": 2.094174385070801, "learning_rate": 9.144902732017504e-06, "loss": 0.6263, "step": 17838 }, { "epoch": 0.21328566816915553, "grad_norm": 2.077793836593628, "learning_rate": 9.144794443223294e-06, "loss": 0.5701, "step": 17839 }, { "epoch": 0.2132976243140162, "grad_norm": 1.7269021272659302, "learning_rate": 9.144686148213953e-06, "loss": 0.6531, "step": 17840 }, { "epoch": 0.21330958045887685, "grad_norm": 1.5954718589782715, "learning_rate": 9.14457784698964e-06, "loss": 0.5636, "step": 17841 }, { "epoch": 0.21332153660373748, "grad_norm": 2.3451802730560303, "learning_rate": 9.144469539550516e-06, "loss": 0.6107, "step": 17842 }, { "epoch": 0.21333349274859814, "grad_norm": 1.5897706747055054, "learning_rate": 9.144361225896745e-06, "loss": 0.6232, "step": 17843 }, { "epoch": 0.2133454488934588, "grad_norm": 2.0551016330718994, "learning_rate": 9.14425290602849e-06, "loss": 0.6572, "step": 17844 }, { "epoch": 0.21335740503831943, "grad_norm": 2.7187323570251465, "learning_rate": 9.144144579945915e-06, "loss": 0.6219, "step": 17845 }, { "epoch": 0.2133693611831801, "grad_norm": 3.391434669494629, "learning_rate": 9.144036247649179e-06, "loss": 0.5759, "step": 17846 }, { "epoch": 0.21338131732804075, "grad_norm": 2.0546882152557373, "learning_rate": 9.143927909138446e-06, "loss": 0.6569, "step": 17847 }, { "epoch": 0.2133932734729014, "grad_norm": 1.7781119346618652, "learning_rate": 9.143819564413878e-06, "loss": 0.6566, "step": 17848 }, { "epoch": 0.21340522961776204, "grad_norm": 3.251436948776245, "learning_rate": 9.143711213475639e-06, "loss": 0.6078, "step": 17849 }, { "epoch": 0.2134171857626227, "grad_norm": 2.0054657459259033, "learning_rate": 9.14360285632389e-06, "loss": 0.6053, "step": 17850 }, { "epoch": 0.21342914190748336, "grad_norm": 2.2377495765686035, "learning_rate": 9.143494492958793e-06, "loss": 0.6338, "step": 17851 }, { "epoch": 0.213441098052344, "grad_norm": 13.213162422180176, "learning_rate": 9.14338612338051e-06, "loss": 0.6389, "step": 17852 }, { "epoch": 0.21345305419720464, "grad_norm": 3.9290432929992676, "learning_rate": 9.143277747589207e-06, "loss": 0.7044, "step": 17853 }, { "epoch": 0.2134650103420653, "grad_norm": 3.0321877002716064, "learning_rate": 9.143169365585044e-06, "loss": 0.6845, "step": 17854 }, { "epoch": 0.21347696648692596, "grad_norm": 2.501600503921509, "learning_rate": 9.143060977368183e-06, "loss": 0.5894, "step": 17855 }, { "epoch": 0.2134889226317866, "grad_norm": 2.3526806831359863, "learning_rate": 9.142952582938788e-06, "loss": 0.6792, "step": 17856 }, { "epoch": 0.21350087877664725, "grad_norm": 2.052881956100464, "learning_rate": 9.14284418229702e-06, "loss": 0.6799, "step": 17857 }, { "epoch": 0.2135128349215079, "grad_norm": 3.9533112049102783, "learning_rate": 9.142735775443043e-06, "loss": 0.7123, "step": 17858 }, { "epoch": 0.21352479106636857, "grad_norm": 1.848438024520874, "learning_rate": 9.142627362377019e-06, "loss": 0.6209, "step": 17859 }, { "epoch": 0.2135367472112292, "grad_norm": 2.2857048511505127, "learning_rate": 9.14251894309911e-06, "loss": 0.5718, "step": 17860 }, { "epoch": 0.21354870335608986, "grad_norm": 2.5834572315216064, "learning_rate": 9.14241051760948e-06, "loss": 0.6254, "step": 17861 }, { "epoch": 0.21356065950095052, "grad_norm": 1.7298345565795898, "learning_rate": 9.14230208590829e-06, "loss": 0.5385, "step": 17862 }, { "epoch": 0.21357261564581118, "grad_norm": 1.8480548858642578, "learning_rate": 9.142193647995705e-06, "loss": 0.6926, "step": 17863 }, { "epoch": 0.2135845717906718, "grad_norm": 2.679828643798828, "learning_rate": 9.142085203871885e-06, "loss": 0.609, "step": 17864 }, { "epoch": 0.21359652793553247, "grad_norm": 2.1857519149780273, "learning_rate": 9.141976753536992e-06, "loss": 0.6423, "step": 17865 }, { "epoch": 0.21360848408039312, "grad_norm": 1.936524510383606, "learning_rate": 9.141868296991192e-06, "loss": 0.5414, "step": 17866 }, { "epoch": 0.21362044022525378, "grad_norm": 2.189967393875122, "learning_rate": 9.141759834234644e-06, "loss": 0.5582, "step": 17867 }, { "epoch": 0.2136323963701144, "grad_norm": 2.927196741104126, "learning_rate": 9.141651365267514e-06, "loss": 0.6557, "step": 17868 }, { "epoch": 0.21364435251497507, "grad_norm": 1.7756210565567017, "learning_rate": 9.141542890089963e-06, "loss": 0.6638, "step": 17869 }, { "epoch": 0.21365630865983573, "grad_norm": 1.5035395622253418, "learning_rate": 9.141434408702152e-06, "loss": 0.6163, "step": 17870 }, { "epoch": 0.21366826480469636, "grad_norm": 2.925199270248413, "learning_rate": 9.141325921104247e-06, "loss": 0.6108, "step": 17871 }, { "epoch": 0.21368022094955702, "grad_norm": 1.8934030532836914, "learning_rate": 9.141217427296409e-06, "loss": 0.6363, "step": 17872 }, { "epoch": 0.21369217709441768, "grad_norm": 3.8132200241088867, "learning_rate": 9.1411089272788e-06, "loss": 0.6058, "step": 17873 }, { "epoch": 0.21370413323927834, "grad_norm": 2.13692045211792, "learning_rate": 9.141000421051585e-06, "loss": 0.5507, "step": 17874 }, { "epoch": 0.21371608938413897, "grad_norm": 2.233734369277954, "learning_rate": 9.140891908614922e-06, "loss": 0.5951, "step": 17875 }, { "epoch": 0.21372804552899963, "grad_norm": 1.964353322982788, "learning_rate": 9.140783389968979e-06, "loss": 0.4894, "step": 17876 }, { "epoch": 0.21374000167386029, "grad_norm": 2.8960258960723877, "learning_rate": 9.140674865113917e-06, "loss": 0.5718, "step": 17877 }, { "epoch": 0.21375195781872094, "grad_norm": 2.0316684246063232, "learning_rate": 9.140566334049898e-06, "loss": 0.6563, "step": 17878 }, { "epoch": 0.21376391396358158, "grad_norm": 2.409802198410034, "learning_rate": 9.140457796777084e-06, "loss": 0.5754, "step": 17879 }, { "epoch": 0.21377587010844223, "grad_norm": 2.9228951930999756, "learning_rate": 9.14034925329564e-06, "loss": 0.5347, "step": 17880 }, { "epoch": 0.2137878262533029, "grad_norm": 2.642838954925537, "learning_rate": 9.140240703605728e-06, "loss": 0.7059, "step": 17881 }, { "epoch": 0.21379978239816352, "grad_norm": 1.8182231187820435, "learning_rate": 9.14013214770751e-06, "loss": 0.5602, "step": 17882 }, { "epoch": 0.21381173854302418, "grad_norm": 3.861687183380127, "learning_rate": 9.140023585601148e-06, "loss": 0.6812, "step": 17883 }, { "epoch": 0.21382369468788484, "grad_norm": 4.280677318572998, "learning_rate": 9.139915017286807e-06, "loss": 0.6519, "step": 17884 }, { "epoch": 0.2138356508327455, "grad_norm": 5.794256687164307, "learning_rate": 9.139806442764647e-06, "loss": 0.5877, "step": 17885 }, { "epoch": 0.21384760697760613, "grad_norm": 2.0045409202575684, "learning_rate": 9.139697862034836e-06, "loss": 0.5878, "step": 17886 }, { "epoch": 0.2138595631224668, "grad_norm": 2.194559097290039, "learning_rate": 9.13958927509753e-06, "loss": 0.6032, "step": 17887 }, { "epoch": 0.21387151926732745, "grad_norm": 1.6353625059127808, "learning_rate": 9.139480681952897e-06, "loss": 0.582, "step": 17888 }, { "epoch": 0.2138834754121881, "grad_norm": 2.8462605476379395, "learning_rate": 9.139372082601097e-06, "loss": 0.6441, "step": 17889 }, { "epoch": 0.21389543155704874, "grad_norm": 4.706494331359863, "learning_rate": 9.139263477042293e-06, "loss": 0.6919, "step": 17890 }, { "epoch": 0.2139073877019094, "grad_norm": 2.199787139892578, "learning_rate": 9.139154865276651e-06, "loss": 0.6371, "step": 17891 }, { "epoch": 0.21391934384677005, "grad_norm": 3.417668581008911, "learning_rate": 9.139046247304331e-06, "loss": 0.6093, "step": 17892 }, { "epoch": 0.21393129999163069, "grad_norm": 1.7624380588531494, "learning_rate": 9.138937623125494e-06, "loss": 0.6061, "step": 17893 }, { "epoch": 0.21394325613649134, "grad_norm": 3.677541494369507, "learning_rate": 9.138828992740307e-06, "loss": 0.7016, "step": 17894 }, { "epoch": 0.213955212281352, "grad_norm": 3.2059216499328613, "learning_rate": 9.138720356148932e-06, "loss": 0.6296, "step": 17895 }, { "epoch": 0.21396716842621266, "grad_norm": 2.187804698944092, "learning_rate": 9.13861171335153e-06, "loss": 0.7305, "step": 17896 }, { "epoch": 0.2139791245710733, "grad_norm": 2.575371503829956, "learning_rate": 9.138503064348267e-06, "loss": 0.6939, "step": 17897 }, { "epoch": 0.21399108071593395, "grad_norm": 2.587989568710327, "learning_rate": 9.138394409139301e-06, "loss": 0.6159, "step": 17898 }, { "epoch": 0.2140030368607946, "grad_norm": 9.365513801574707, "learning_rate": 9.138285747724801e-06, "loss": 0.5795, "step": 17899 }, { "epoch": 0.21401499300565527, "grad_norm": 2.324077844619751, "learning_rate": 9.138177080104924e-06, "loss": 0.5688, "step": 17900 }, { "epoch": 0.2140269491505159, "grad_norm": 2.1536777019500732, "learning_rate": 9.138068406279838e-06, "loss": 0.676, "step": 17901 }, { "epoch": 0.21403890529537656, "grad_norm": 2.2642629146575928, "learning_rate": 9.137959726249704e-06, "loss": 0.5744, "step": 17902 }, { "epoch": 0.21405086144023722, "grad_norm": 2.365448236465454, "learning_rate": 9.137851040014682e-06, "loss": 0.5184, "step": 17903 }, { "epoch": 0.21406281758509785, "grad_norm": 10.998905181884766, "learning_rate": 9.137742347574939e-06, "loss": 0.6764, "step": 17904 }, { "epoch": 0.2140747737299585, "grad_norm": 2.2649433612823486, "learning_rate": 9.137633648930637e-06, "loss": 0.7289, "step": 17905 }, { "epoch": 0.21408672987481916, "grad_norm": 2.6447489261627197, "learning_rate": 9.137524944081938e-06, "loss": 0.6016, "step": 17906 }, { "epoch": 0.21409868601967982, "grad_norm": 2.5036752223968506, "learning_rate": 9.137416233029007e-06, "loss": 0.553, "step": 17907 }, { "epoch": 0.21411064216454045, "grad_norm": 3.3226919174194336, "learning_rate": 9.137307515772004e-06, "loss": 0.5719, "step": 17908 }, { "epoch": 0.2141225983094011, "grad_norm": 2.4321229457855225, "learning_rate": 9.137198792311094e-06, "loss": 0.5877, "step": 17909 }, { "epoch": 0.21413455445426177, "grad_norm": 2.195692539215088, "learning_rate": 9.13709006264644e-06, "loss": 0.5968, "step": 17910 }, { "epoch": 0.21414651059912243, "grad_norm": 1.9824285507202148, "learning_rate": 9.136981326778205e-06, "loss": 0.5134, "step": 17911 }, { "epoch": 0.21415846674398306, "grad_norm": 2.178276777267456, "learning_rate": 9.136872584706551e-06, "loss": 0.6835, "step": 17912 }, { "epoch": 0.21417042288884372, "grad_norm": 2.1330864429473877, "learning_rate": 9.136763836431643e-06, "loss": 0.5927, "step": 17913 }, { "epoch": 0.21418237903370438, "grad_norm": 6.2704854011535645, "learning_rate": 9.136655081953641e-06, "loss": 0.6274, "step": 17914 }, { "epoch": 0.21419433517856504, "grad_norm": 2.793684720993042, "learning_rate": 9.136546321272713e-06, "loss": 0.5582, "step": 17915 }, { "epoch": 0.21420629132342567, "grad_norm": 1.95780348777771, "learning_rate": 9.136437554389016e-06, "loss": 0.5995, "step": 17916 }, { "epoch": 0.21421824746828633, "grad_norm": 2.4914987087249756, "learning_rate": 9.136328781302719e-06, "loss": 0.6299, "step": 17917 }, { "epoch": 0.21423020361314699, "grad_norm": 3.2195050716400146, "learning_rate": 9.13622000201398e-06, "loss": 0.6005, "step": 17918 }, { "epoch": 0.21424215975800762, "grad_norm": 3.1451237201690674, "learning_rate": 9.136111216522965e-06, "loss": 0.5873, "step": 17919 }, { "epoch": 0.21425411590286828, "grad_norm": 2.4720873832702637, "learning_rate": 9.136002424829838e-06, "loss": 0.6051, "step": 17920 }, { "epoch": 0.21426607204772893, "grad_norm": 2.0095415115356445, "learning_rate": 9.135893626934757e-06, "loss": 0.5665, "step": 17921 }, { "epoch": 0.2142780281925896, "grad_norm": 2.708012580871582, "learning_rate": 9.135784822837893e-06, "loss": 0.6166, "step": 17922 }, { "epoch": 0.21428998433745022, "grad_norm": 2.0070369243621826, "learning_rate": 9.135676012539404e-06, "loss": 0.5395, "step": 17923 }, { "epoch": 0.21430194048231088, "grad_norm": 5.435370922088623, "learning_rate": 9.135567196039453e-06, "loss": 0.5799, "step": 17924 }, { "epoch": 0.21431389662717154, "grad_norm": 1.8523812294006348, "learning_rate": 9.135458373338204e-06, "loss": 0.5881, "step": 17925 }, { "epoch": 0.2143258527720322, "grad_norm": 2.6665937900543213, "learning_rate": 9.13534954443582e-06, "loss": 0.6553, "step": 17926 }, { "epoch": 0.21433780891689283, "grad_norm": 2.07847261428833, "learning_rate": 9.135240709332468e-06, "loss": 0.5818, "step": 17927 }, { "epoch": 0.2143497650617535, "grad_norm": 4.100644111633301, "learning_rate": 9.135131868028304e-06, "loss": 0.5326, "step": 17928 }, { "epoch": 0.21436172120661415, "grad_norm": 2.8076627254486084, "learning_rate": 9.135023020523498e-06, "loss": 0.6927, "step": 17929 }, { "epoch": 0.21437367735147478, "grad_norm": 2.765479326248169, "learning_rate": 9.134914166818207e-06, "loss": 0.6527, "step": 17930 }, { "epoch": 0.21438563349633544, "grad_norm": 7.169365882873535, "learning_rate": 9.1348053069126e-06, "loss": 0.6632, "step": 17931 }, { "epoch": 0.2143975896411961, "grad_norm": 2.016280174255371, "learning_rate": 9.134696440806837e-06, "loss": 0.5858, "step": 17932 }, { "epoch": 0.21440954578605675, "grad_norm": 2.776057004928589, "learning_rate": 9.134587568501084e-06, "loss": 0.6018, "step": 17933 }, { "epoch": 0.21442150193091739, "grad_norm": 1.9629734754562378, "learning_rate": 9.134478689995499e-06, "loss": 0.5284, "step": 17934 }, { "epoch": 0.21443345807577804, "grad_norm": 3.1462292671203613, "learning_rate": 9.134369805290251e-06, "loss": 0.6063, "step": 17935 }, { "epoch": 0.2144454142206387, "grad_norm": 2.309981346130371, "learning_rate": 9.1342609143855e-06, "loss": 0.5456, "step": 17936 }, { "epoch": 0.21445737036549936, "grad_norm": 1.6129761934280396, "learning_rate": 9.134152017281409e-06, "loss": 0.485, "step": 17937 }, { "epoch": 0.21446932651036, "grad_norm": 2.599083185195923, "learning_rate": 9.134043113978143e-06, "loss": 0.6617, "step": 17938 }, { "epoch": 0.21448128265522065, "grad_norm": 2.073167324066162, "learning_rate": 9.133934204475865e-06, "loss": 0.5657, "step": 17939 }, { "epoch": 0.2144932388000813, "grad_norm": 1.6822502613067627, "learning_rate": 9.133825288774739e-06, "loss": 0.6076, "step": 17940 }, { "epoch": 0.21450519494494194, "grad_norm": 2.2761402130126953, "learning_rate": 9.133716366874926e-06, "loss": 0.5614, "step": 17941 }, { "epoch": 0.2145171510898026, "grad_norm": 2.0662319660186768, "learning_rate": 9.133607438776588e-06, "loss": 0.6263, "step": 17942 }, { "epoch": 0.21452910723466326, "grad_norm": 2.7957589626312256, "learning_rate": 9.133498504479894e-06, "loss": 0.638, "step": 17943 }, { "epoch": 0.21454106337952392, "grad_norm": 2.020505666732788, "learning_rate": 9.133389563985005e-06, "loss": 0.5657, "step": 17944 }, { "epoch": 0.21455301952438455, "grad_norm": 6.251152992248535, "learning_rate": 9.133280617292082e-06, "loss": 0.7252, "step": 17945 }, { "epoch": 0.2145649756692452, "grad_norm": 1.9817445278167725, "learning_rate": 9.133171664401292e-06, "loss": 0.6576, "step": 17946 }, { "epoch": 0.21457693181410586, "grad_norm": 2.032658338546753, "learning_rate": 9.133062705312794e-06, "loss": 0.6094, "step": 17947 }, { "epoch": 0.21458888795896652, "grad_norm": 2.796149492263794, "learning_rate": 9.132953740026753e-06, "loss": 0.5951, "step": 17948 }, { "epoch": 0.21460084410382715, "grad_norm": 1.687581181526184, "learning_rate": 9.132844768543336e-06, "loss": 0.5377, "step": 17949 }, { "epoch": 0.2146128002486878, "grad_norm": 4.610062122344971, "learning_rate": 9.132735790862703e-06, "loss": 0.6361, "step": 17950 }, { "epoch": 0.21462475639354847, "grad_norm": 1.9728220701217651, "learning_rate": 9.132626806985018e-06, "loss": 0.615, "step": 17951 }, { "epoch": 0.2146367125384091, "grad_norm": 6.392716884613037, "learning_rate": 9.132517816910444e-06, "loss": 0.5456, "step": 17952 }, { "epoch": 0.21464866868326976, "grad_norm": 2.0265116691589355, "learning_rate": 9.132408820639144e-06, "loss": 0.5716, "step": 17953 }, { "epoch": 0.21466062482813042, "grad_norm": 2.122701406478882, "learning_rate": 9.132299818171283e-06, "loss": 0.6211, "step": 17954 }, { "epoch": 0.21467258097299108, "grad_norm": 1.967575192451477, "learning_rate": 9.132190809507024e-06, "loss": 0.5735, "step": 17955 }, { "epoch": 0.2146845371178517, "grad_norm": 6.417200088500977, "learning_rate": 9.132081794646531e-06, "loss": 0.5505, "step": 17956 }, { "epoch": 0.21469649326271237, "grad_norm": 16.441463470458984, "learning_rate": 9.131972773589966e-06, "loss": 0.6321, "step": 17957 }, { "epoch": 0.21470844940757303, "grad_norm": 3.13543701171875, "learning_rate": 9.131863746337491e-06, "loss": 0.64, "step": 17958 }, { "epoch": 0.21472040555243369, "grad_norm": 1.7454999685287476, "learning_rate": 9.131754712889273e-06, "loss": 0.565, "step": 17959 }, { "epoch": 0.21473236169729432, "grad_norm": 7.011932849884033, "learning_rate": 9.131645673245477e-06, "loss": 0.6356, "step": 17960 }, { "epoch": 0.21474431784215497, "grad_norm": 6.376438140869141, "learning_rate": 9.13153662740626e-06, "loss": 0.5023, "step": 17961 }, { "epoch": 0.21475627398701563, "grad_norm": 2.5250167846679688, "learning_rate": 9.13142757537179e-06, "loss": 0.656, "step": 17962 }, { "epoch": 0.21476823013187626, "grad_norm": 1.7504397630691528, "learning_rate": 9.13131851714223e-06, "loss": 0.5588, "step": 17963 }, { "epoch": 0.21478018627673692, "grad_norm": 2.6503241062164307, "learning_rate": 9.131209452717743e-06, "loss": 0.5663, "step": 17964 }, { "epoch": 0.21479214242159758, "grad_norm": 2.118486166000366, "learning_rate": 9.131100382098493e-06, "loss": 0.5733, "step": 17965 }, { "epoch": 0.21480409856645824, "grad_norm": 1.9798121452331543, "learning_rate": 9.130991305284645e-06, "loss": 0.721, "step": 17966 }, { "epoch": 0.21481605471131887, "grad_norm": 3.046430826187134, "learning_rate": 9.130882222276358e-06, "loss": 0.6221, "step": 17967 }, { "epoch": 0.21482801085617953, "grad_norm": 5.567352294921875, "learning_rate": 9.1307731330738e-06, "loss": 0.6549, "step": 17968 }, { "epoch": 0.2148399670010402, "grad_norm": 1.591254711151123, "learning_rate": 9.130664037677132e-06, "loss": 0.5631, "step": 17969 }, { "epoch": 0.21485192314590085, "grad_norm": 2.027083158493042, "learning_rate": 9.130554936086518e-06, "loss": 0.589, "step": 17970 }, { "epoch": 0.21486387929076148, "grad_norm": 2.0714948177337646, "learning_rate": 9.130445828302123e-06, "loss": 0.6323, "step": 17971 }, { "epoch": 0.21487583543562214, "grad_norm": 2.3964614868164062, "learning_rate": 9.130336714324111e-06, "loss": 0.5872, "step": 17972 }, { "epoch": 0.2148877915804828, "grad_norm": 3.3154447078704834, "learning_rate": 9.130227594152643e-06, "loss": 0.6361, "step": 17973 }, { "epoch": 0.21489974772534345, "grad_norm": 2.618300437927246, "learning_rate": 9.130118467787886e-06, "loss": 0.5461, "step": 17974 }, { "epoch": 0.21491170387020408, "grad_norm": 6.395853519439697, "learning_rate": 9.13000933523e-06, "loss": 0.6489, "step": 17975 }, { "epoch": 0.21492366001506474, "grad_norm": 2.5779974460601807, "learning_rate": 9.12990019647915e-06, "loss": 0.6445, "step": 17976 }, { "epoch": 0.2149356161599254, "grad_norm": 2.5852227210998535, "learning_rate": 9.129791051535501e-06, "loss": 0.5395, "step": 17977 }, { "epoch": 0.21494757230478603, "grad_norm": 2.3815953731536865, "learning_rate": 9.129681900399214e-06, "loss": 0.6372, "step": 17978 }, { "epoch": 0.2149595284496467, "grad_norm": 2.029796838760376, "learning_rate": 9.129572743070456e-06, "loss": 0.5862, "step": 17979 }, { "epoch": 0.21497148459450735, "grad_norm": 2.2444097995758057, "learning_rate": 9.129463579549389e-06, "loss": 0.6625, "step": 17980 }, { "epoch": 0.214983440739368, "grad_norm": 2.981031894683838, "learning_rate": 9.129354409836175e-06, "loss": 0.5938, "step": 17981 }, { "epoch": 0.21499539688422864, "grad_norm": 1.925229549407959, "learning_rate": 9.129245233930981e-06, "loss": 0.6212, "step": 17982 }, { "epoch": 0.2150073530290893, "grad_norm": 4.299479007720947, "learning_rate": 9.129136051833968e-06, "loss": 0.5668, "step": 17983 }, { "epoch": 0.21501930917394996, "grad_norm": 4.326237678527832, "learning_rate": 9.129026863545302e-06, "loss": 0.5917, "step": 17984 }, { "epoch": 0.21503126531881062, "grad_norm": 3.0553886890411377, "learning_rate": 9.128917669065145e-06, "loss": 0.5708, "step": 17985 }, { "epoch": 0.21504322146367125, "grad_norm": 2.553245782852173, "learning_rate": 9.128808468393661e-06, "loss": 0.6554, "step": 17986 }, { "epoch": 0.2150551776085319, "grad_norm": 2.713890552520752, "learning_rate": 9.128699261531014e-06, "loss": 0.6218, "step": 17987 }, { "epoch": 0.21506713375339256, "grad_norm": 12.329564094543457, "learning_rate": 9.128590048477368e-06, "loss": 0.6592, "step": 17988 }, { "epoch": 0.2150790898982532, "grad_norm": 2.358203887939453, "learning_rate": 9.128480829232888e-06, "loss": 0.6895, "step": 17989 }, { "epoch": 0.21509104604311385, "grad_norm": 2.8581626415252686, "learning_rate": 9.128371603797734e-06, "loss": 0.5673, "step": 17990 }, { "epoch": 0.2151030021879745, "grad_norm": 2.0775396823883057, "learning_rate": 9.128262372172074e-06, "loss": 0.6145, "step": 17991 }, { "epoch": 0.21511495833283517, "grad_norm": 3.4872608184814453, "learning_rate": 9.128153134356068e-06, "loss": 0.5834, "step": 17992 }, { "epoch": 0.2151269144776958, "grad_norm": 1.754343867301941, "learning_rate": 9.128043890349882e-06, "loss": 0.6262, "step": 17993 }, { "epoch": 0.21513887062255646, "grad_norm": 17.703611373901367, "learning_rate": 9.127934640153682e-06, "loss": 0.5829, "step": 17994 }, { "epoch": 0.21515082676741712, "grad_norm": 3.37229323387146, "learning_rate": 9.127825383767627e-06, "loss": 0.6744, "step": 17995 }, { "epoch": 0.21516278291227778, "grad_norm": 1.564017653465271, "learning_rate": 9.127716121191883e-06, "loss": 0.5816, "step": 17996 }, { "epoch": 0.2151747390571384, "grad_norm": 2.999664545059204, "learning_rate": 9.127606852426614e-06, "loss": 0.6332, "step": 17997 }, { "epoch": 0.21518669520199907, "grad_norm": 1.6621580123901367, "learning_rate": 9.127497577471985e-06, "loss": 0.6543, "step": 17998 }, { "epoch": 0.21519865134685973, "grad_norm": 2.073992967605591, "learning_rate": 9.127388296328158e-06, "loss": 0.6492, "step": 17999 }, { "epoch": 0.21521060749172036, "grad_norm": 3.285078525543213, "learning_rate": 9.127279008995298e-06, "loss": 0.6825, "step": 18000 }, { "epoch": 0.21522256363658102, "grad_norm": 2.355356216430664, "learning_rate": 9.12716971547357e-06, "loss": 0.5817, "step": 18001 }, { "epoch": 0.21523451978144167, "grad_norm": 3.5444915294647217, "learning_rate": 9.127060415763134e-06, "loss": 0.6711, "step": 18002 }, { "epoch": 0.21524647592630233, "grad_norm": 1.6935111284255981, "learning_rate": 9.126951109864156e-06, "loss": 0.5527, "step": 18003 }, { "epoch": 0.21525843207116296, "grad_norm": 2.1160757541656494, "learning_rate": 9.126841797776802e-06, "loss": 0.6558, "step": 18004 }, { "epoch": 0.21527038821602362, "grad_norm": 4.626628875732422, "learning_rate": 9.126732479501233e-06, "loss": 0.6701, "step": 18005 }, { "epoch": 0.21528234436088428, "grad_norm": 6.626831531524658, "learning_rate": 9.126623155037614e-06, "loss": 0.6329, "step": 18006 }, { "epoch": 0.21529430050574494, "grad_norm": 2.7904365062713623, "learning_rate": 9.126513824386107e-06, "loss": 0.6169, "step": 18007 }, { "epoch": 0.21530625665060557, "grad_norm": 2.352672576904297, "learning_rate": 9.126404487546881e-06, "loss": 0.6105, "step": 18008 }, { "epoch": 0.21531821279546623, "grad_norm": 2.770298480987549, "learning_rate": 9.126295144520095e-06, "loss": 0.6094, "step": 18009 }, { "epoch": 0.2153301689403269, "grad_norm": 3.827202081680298, "learning_rate": 9.126185795305915e-06, "loss": 0.5814, "step": 18010 }, { "epoch": 0.21534212508518752, "grad_norm": 2.5366032123565674, "learning_rate": 9.126076439904506e-06, "loss": 0.6331, "step": 18011 }, { "epoch": 0.21535408123004818, "grad_norm": 11.21813678741455, "learning_rate": 9.12596707831603e-06, "loss": 0.6673, "step": 18012 }, { "epoch": 0.21536603737490884, "grad_norm": 1.3627642393112183, "learning_rate": 9.125857710540651e-06, "loss": 0.4821, "step": 18013 }, { "epoch": 0.2153779935197695, "grad_norm": 2.2283973693847656, "learning_rate": 9.125748336578534e-06, "loss": 0.5682, "step": 18014 }, { "epoch": 0.21538994966463013, "grad_norm": 1.713681697845459, "learning_rate": 9.125638956429843e-06, "loss": 0.5976, "step": 18015 }, { "epoch": 0.21540190580949078, "grad_norm": 2.5295729637145996, "learning_rate": 9.125529570094743e-06, "loss": 0.6472, "step": 18016 }, { "epoch": 0.21541386195435144, "grad_norm": 25.32636833190918, "learning_rate": 9.125420177573394e-06, "loss": 0.6196, "step": 18017 }, { "epoch": 0.2154258180992121, "grad_norm": 1.8351558446884155, "learning_rate": 9.125310778865963e-06, "loss": 0.5193, "step": 18018 }, { "epoch": 0.21543777424407273, "grad_norm": 2.1710729598999023, "learning_rate": 9.125201373972617e-06, "loss": 0.6494, "step": 18019 }, { "epoch": 0.2154497303889334, "grad_norm": 2.70215106010437, "learning_rate": 9.125091962893514e-06, "loss": 0.5526, "step": 18020 }, { "epoch": 0.21546168653379405, "grad_norm": 2.217366933822632, "learning_rate": 9.124982545628822e-06, "loss": 0.5919, "step": 18021 }, { "epoch": 0.21547364267865468, "grad_norm": 1.8889368772506714, "learning_rate": 9.124873122178703e-06, "loss": 0.6238, "step": 18022 }, { "epoch": 0.21548559882351534, "grad_norm": 1.8759498596191406, "learning_rate": 9.124763692543323e-06, "loss": 0.6126, "step": 18023 }, { "epoch": 0.215497554968376, "grad_norm": 2.615983724594116, "learning_rate": 9.124654256722845e-06, "loss": 0.6545, "step": 18024 }, { "epoch": 0.21550951111323666, "grad_norm": 1.606764793395996, "learning_rate": 9.124544814717433e-06, "loss": 0.6365, "step": 18025 }, { "epoch": 0.2155214672580973, "grad_norm": 2.552980422973633, "learning_rate": 9.124435366527253e-06, "loss": 0.6014, "step": 18026 }, { "epoch": 0.21553342340295795, "grad_norm": 2.4351818561553955, "learning_rate": 9.124325912152466e-06, "loss": 0.6031, "step": 18027 }, { "epoch": 0.2155453795478186, "grad_norm": 1.920862078666687, "learning_rate": 9.124216451593238e-06, "loss": 0.5888, "step": 18028 }, { "epoch": 0.21555733569267926, "grad_norm": 2.880207061767578, "learning_rate": 9.124106984849732e-06, "loss": 0.5902, "step": 18029 }, { "epoch": 0.2155692918375399, "grad_norm": 1.5976040363311768, "learning_rate": 9.123997511922113e-06, "loss": 0.4965, "step": 18030 }, { "epoch": 0.21558124798240055, "grad_norm": 1.9498481750488281, "learning_rate": 9.123888032810546e-06, "loss": 0.6937, "step": 18031 }, { "epoch": 0.2155932041272612, "grad_norm": 2.364006757736206, "learning_rate": 9.123778547515194e-06, "loss": 0.6721, "step": 18032 }, { "epoch": 0.21560516027212187, "grad_norm": 2.320650577545166, "learning_rate": 9.123669056036222e-06, "loss": 0.6353, "step": 18033 }, { "epoch": 0.2156171164169825, "grad_norm": 1.4446415901184082, "learning_rate": 9.123559558373792e-06, "loss": 0.643, "step": 18034 }, { "epoch": 0.21562907256184316, "grad_norm": 1.6978055238723755, "learning_rate": 9.123450054528071e-06, "loss": 0.5135, "step": 18035 }, { "epoch": 0.21564102870670382, "grad_norm": 2.251051425933838, "learning_rate": 9.123340544499222e-06, "loss": 0.6484, "step": 18036 }, { "epoch": 0.21565298485156445, "grad_norm": 2.4056975841522217, "learning_rate": 9.123231028287408e-06, "loss": 0.5302, "step": 18037 }, { "epoch": 0.2156649409964251, "grad_norm": 2.1468400955200195, "learning_rate": 9.123121505892796e-06, "loss": 0.5944, "step": 18038 }, { "epoch": 0.21567689714128577, "grad_norm": 2.1217408180236816, "learning_rate": 9.123011977315548e-06, "loss": 0.6491, "step": 18039 }, { "epoch": 0.21568885328614643, "grad_norm": 2.660830020904541, "learning_rate": 9.122902442555827e-06, "loss": 0.7455, "step": 18040 }, { "epoch": 0.21570080943100706, "grad_norm": 2.1867685317993164, "learning_rate": 9.122792901613802e-06, "loss": 0.5819, "step": 18041 }, { "epoch": 0.21571276557586772, "grad_norm": 1.567723274230957, "learning_rate": 9.122683354489634e-06, "loss": 0.5371, "step": 18042 }, { "epoch": 0.21572472172072837, "grad_norm": 6.332266807556152, "learning_rate": 9.122573801183487e-06, "loss": 0.6501, "step": 18043 }, { "epoch": 0.21573667786558903, "grad_norm": 2.141190528869629, "learning_rate": 9.122464241695526e-06, "loss": 0.5913, "step": 18044 }, { "epoch": 0.21574863401044966, "grad_norm": 1.7746238708496094, "learning_rate": 9.122354676025915e-06, "loss": 0.6607, "step": 18045 }, { "epoch": 0.21576059015531032, "grad_norm": 1.8753854036331177, "learning_rate": 9.122245104174819e-06, "loss": 0.579, "step": 18046 }, { "epoch": 0.21577254630017098, "grad_norm": 2.047973871231079, "learning_rate": 9.1221355261424e-06, "loss": 0.5907, "step": 18047 }, { "epoch": 0.2157845024450316, "grad_norm": 2.248833656311035, "learning_rate": 9.122025941928827e-06, "loss": 0.5581, "step": 18048 }, { "epoch": 0.21579645858989227, "grad_norm": 2.6565029621124268, "learning_rate": 9.121916351534259e-06, "loss": 0.7447, "step": 18049 }, { "epoch": 0.21580841473475293, "grad_norm": 2.3309273719787598, "learning_rate": 9.121806754958864e-06, "loss": 0.6682, "step": 18050 }, { "epoch": 0.2158203708796136, "grad_norm": 2.212324619293213, "learning_rate": 9.121697152202804e-06, "loss": 0.541, "step": 18051 }, { "epoch": 0.21583232702447422, "grad_norm": 5.60066556930542, "learning_rate": 9.121587543266246e-06, "loss": 0.6934, "step": 18052 }, { "epoch": 0.21584428316933488, "grad_norm": 2.103259801864624, "learning_rate": 9.121477928149352e-06, "loss": 0.7168, "step": 18053 }, { "epoch": 0.21585623931419554, "grad_norm": 1.375864863395691, "learning_rate": 9.121368306852288e-06, "loss": 0.6442, "step": 18054 }, { "epoch": 0.2158681954590562, "grad_norm": 2.599442481994629, "learning_rate": 9.121258679375217e-06, "loss": 0.6241, "step": 18055 }, { "epoch": 0.21588015160391683, "grad_norm": 2.5258429050445557, "learning_rate": 9.121149045718304e-06, "loss": 0.5385, "step": 18056 }, { "epoch": 0.21589210774877748, "grad_norm": 1.9229270219802856, "learning_rate": 9.121039405881713e-06, "loss": 0.6248, "step": 18057 }, { "epoch": 0.21590406389363814, "grad_norm": 2.5346009731292725, "learning_rate": 9.120929759865609e-06, "loss": 0.5831, "step": 18058 }, { "epoch": 0.21591602003849877, "grad_norm": 2.9695088863372803, "learning_rate": 9.120820107670157e-06, "loss": 0.5693, "step": 18059 }, { "epoch": 0.21592797618335943, "grad_norm": 2.346749782562256, "learning_rate": 9.12071044929552e-06, "loss": 0.5514, "step": 18060 }, { "epoch": 0.2159399323282201, "grad_norm": 2.452889919281006, "learning_rate": 9.120600784741862e-06, "loss": 0.6529, "step": 18061 }, { "epoch": 0.21595188847308075, "grad_norm": 2.5165212154388428, "learning_rate": 9.12049111400935e-06, "loss": 0.644, "step": 18062 }, { "epoch": 0.21596384461794138, "grad_norm": 2.958423614501953, "learning_rate": 9.120381437098145e-06, "loss": 0.6622, "step": 18063 }, { "epoch": 0.21597580076280204, "grad_norm": 2.7446587085723877, "learning_rate": 9.120271754008415e-06, "loss": 0.6011, "step": 18064 }, { "epoch": 0.2159877569076627, "grad_norm": 2.1969354152679443, "learning_rate": 9.120162064740323e-06, "loss": 0.5401, "step": 18065 }, { "epoch": 0.21599971305252336, "grad_norm": 6.104666233062744, "learning_rate": 9.120052369294032e-06, "loss": 0.6991, "step": 18066 }, { "epoch": 0.216011669197384, "grad_norm": 2.0709521770477295, "learning_rate": 9.11994266766971e-06, "loss": 0.6152, "step": 18067 }, { "epoch": 0.21602362534224465, "grad_norm": 2.5437142848968506, "learning_rate": 9.119832959867517e-06, "loss": 0.6672, "step": 18068 }, { "epoch": 0.2160355814871053, "grad_norm": 1.944000244140625, "learning_rate": 9.119723245887622e-06, "loss": 0.6315, "step": 18069 }, { "epoch": 0.21604753763196594, "grad_norm": 2.7529869079589844, "learning_rate": 9.119613525730184e-06, "loss": 0.6342, "step": 18070 }, { "epoch": 0.2160594937768266, "grad_norm": 2.5702662467956543, "learning_rate": 9.119503799395373e-06, "loss": 0.5816, "step": 18071 }, { "epoch": 0.21607144992168725, "grad_norm": 3.530075788497925, "learning_rate": 9.119394066883353e-06, "loss": 0.6814, "step": 18072 }, { "epoch": 0.2160834060665479, "grad_norm": 2.090477466583252, "learning_rate": 9.119284328194285e-06, "loss": 0.5907, "step": 18073 }, { "epoch": 0.21609536221140854, "grad_norm": 1.8462964296340942, "learning_rate": 9.119174583328336e-06, "loss": 0.6152, "step": 18074 }, { "epoch": 0.2161073183562692, "grad_norm": 1.8783563375473022, "learning_rate": 9.11906483228567e-06, "loss": 0.6763, "step": 18075 }, { "epoch": 0.21611927450112986, "grad_norm": 1.7567893266677856, "learning_rate": 9.11895507506645e-06, "loss": 0.5607, "step": 18076 }, { "epoch": 0.21613123064599052, "grad_norm": 3.280752182006836, "learning_rate": 9.118845311670845e-06, "loss": 0.6044, "step": 18077 }, { "epoch": 0.21614318679085115, "grad_norm": 8.418588638305664, "learning_rate": 9.118735542099015e-06, "loss": 0.6793, "step": 18078 }, { "epoch": 0.2161551429357118, "grad_norm": 1.9026678800582886, "learning_rate": 9.118625766351127e-06, "loss": 0.6267, "step": 18079 }, { "epoch": 0.21616709908057247, "grad_norm": 4.07839298248291, "learning_rate": 9.118515984427346e-06, "loss": 0.5525, "step": 18080 }, { "epoch": 0.2161790552254331, "grad_norm": 1.7070437669754028, "learning_rate": 9.118406196327836e-06, "loss": 0.5856, "step": 18081 }, { "epoch": 0.21619101137029376, "grad_norm": 3.063061475753784, "learning_rate": 9.11829640205276e-06, "loss": 0.657, "step": 18082 }, { "epoch": 0.21620296751515442, "grad_norm": 3.4520463943481445, "learning_rate": 9.118186601602283e-06, "loss": 0.7024, "step": 18083 }, { "epoch": 0.21621492366001507, "grad_norm": 3.7442967891693115, "learning_rate": 9.118076794976572e-06, "loss": 0.6014, "step": 18084 }, { "epoch": 0.2162268798048757, "grad_norm": 2.437209367752075, "learning_rate": 9.11796698217579e-06, "loss": 0.6664, "step": 18085 }, { "epoch": 0.21623883594973636, "grad_norm": 5.086016654968262, "learning_rate": 9.117857163200101e-06, "loss": 0.6634, "step": 18086 }, { "epoch": 0.21625079209459702, "grad_norm": 3.2566959857940674, "learning_rate": 9.117747338049671e-06, "loss": 0.6279, "step": 18087 }, { "epoch": 0.21626274823945768, "grad_norm": 3.4941165447235107, "learning_rate": 9.117637506724664e-06, "loss": 0.5699, "step": 18088 }, { "epoch": 0.2162747043843183, "grad_norm": 5.0517425537109375, "learning_rate": 9.117527669225245e-06, "loss": 0.623, "step": 18089 }, { "epoch": 0.21628666052917897, "grad_norm": 2.7300171852111816, "learning_rate": 9.11741782555158e-06, "loss": 0.6604, "step": 18090 }, { "epoch": 0.21629861667403963, "grad_norm": 1.964455246925354, "learning_rate": 9.11730797570383e-06, "loss": 0.6218, "step": 18091 }, { "epoch": 0.2163105728189003, "grad_norm": 5.3093581199646, "learning_rate": 9.117198119682165e-06, "loss": 0.582, "step": 18092 }, { "epoch": 0.21632252896376092, "grad_norm": 2.4166035652160645, "learning_rate": 9.117088257486744e-06, "loss": 0.638, "step": 18093 }, { "epoch": 0.21633448510862158, "grad_norm": 2.308527708053589, "learning_rate": 9.116978389117736e-06, "loss": 0.6345, "step": 18094 }, { "epoch": 0.21634644125348224, "grad_norm": 2.2011170387268066, "learning_rate": 9.116868514575304e-06, "loss": 0.6784, "step": 18095 }, { "epoch": 0.21635839739834287, "grad_norm": 2.25700044631958, "learning_rate": 9.116758633859612e-06, "loss": 0.6236, "step": 18096 }, { "epoch": 0.21637035354320353, "grad_norm": 2.9859812259674072, "learning_rate": 9.116648746970826e-06, "loss": 0.6517, "step": 18097 }, { "epoch": 0.21638230968806418, "grad_norm": 1.9152250289916992, "learning_rate": 9.116538853909112e-06, "loss": 0.7402, "step": 18098 }, { "epoch": 0.21639426583292484, "grad_norm": 1.6460649967193604, "learning_rate": 9.116428954674632e-06, "loss": 0.5159, "step": 18099 }, { "epoch": 0.21640622197778547, "grad_norm": 1.8708046674728394, "learning_rate": 9.116319049267554e-06, "loss": 0.5994, "step": 18100 }, { "epoch": 0.21641817812264613, "grad_norm": 2.698551654815674, "learning_rate": 9.116209137688039e-06, "loss": 0.5965, "step": 18101 }, { "epoch": 0.2164301342675068, "grad_norm": 9.904047012329102, "learning_rate": 9.116099219936255e-06, "loss": 0.6415, "step": 18102 }, { "epoch": 0.21644209041236745, "grad_norm": 3.7122178077697754, "learning_rate": 9.115989296012364e-06, "loss": 0.6223, "step": 18103 }, { "epoch": 0.21645404655722808, "grad_norm": 4.0809831619262695, "learning_rate": 9.115879365916535e-06, "loss": 0.5753, "step": 18104 }, { "epoch": 0.21646600270208874, "grad_norm": 2.1000709533691406, "learning_rate": 9.115769429648928e-06, "loss": 0.618, "step": 18105 }, { "epoch": 0.2164779588469494, "grad_norm": 1.6633901596069336, "learning_rate": 9.115659487209712e-06, "loss": 0.5727, "step": 18106 }, { "epoch": 0.21648991499181003, "grad_norm": 3.240673542022705, "learning_rate": 9.115549538599047e-06, "loss": 0.7767, "step": 18107 }, { "epoch": 0.2165018711366707, "grad_norm": 3.236935615539551, "learning_rate": 9.115439583817104e-06, "loss": 0.4943, "step": 18108 }, { "epoch": 0.21651382728153135, "grad_norm": 1.9579591751098633, "learning_rate": 9.115329622864043e-06, "loss": 0.6075, "step": 18109 }, { "epoch": 0.216525783426392, "grad_norm": 2.240286350250244, "learning_rate": 9.11521965574003e-06, "loss": 0.5771, "step": 18110 }, { "epoch": 0.21653773957125264, "grad_norm": 2.037916898727417, "learning_rate": 9.115109682445233e-06, "loss": 0.6542, "step": 18111 }, { "epoch": 0.2165496957161133, "grad_norm": 5.753844261169434, "learning_rate": 9.114999702979811e-06, "loss": 0.6136, "step": 18112 }, { "epoch": 0.21656165186097395, "grad_norm": 2.40138840675354, "learning_rate": 9.114889717343936e-06, "loss": 0.5135, "step": 18113 }, { "epoch": 0.2165736080058346, "grad_norm": 2.669060230255127, "learning_rate": 9.114779725537767e-06, "loss": 0.6548, "step": 18114 }, { "epoch": 0.21658556415069524, "grad_norm": 3.2259225845336914, "learning_rate": 9.114669727561472e-06, "loss": 0.6239, "step": 18115 }, { "epoch": 0.2165975202955559, "grad_norm": 4.1665167808532715, "learning_rate": 9.114559723415214e-06, "loss": 0.6495, "step": 18116 }, { "epoch": 0.21660947644041656, "grad_norm": 3.09079647064209, "learning_rate": 9.11444971309916e-06, "loss": 0.6286, "step": 18117 }, { "epoch": 0.2166214325852772, "grad_norm": 4.19186544418335, "learning_rate": 9.114339696613474e-06, "loss": 0.5809, "step": 18118 }, { "epoch": 0.21663338873013785, "grad_norm": 1.9802824258804321, "learning_rate": 9.114229673958322e-06, "loss": 0.5675, "step": 18119 }, { "epoch": 0.2166453448749985, "grad_norm": 2.384613037109375, "learning_rate": 9.114119645133866e-06, "loss": 0.5134, "step": 18120 }, { "epoch": 0.21665730101985917, "grad_norm": 1.712088942527771, "learning_rate": 9.114009610140272e-06, "loss": 0.5972, "step": 18121 }, { "epoch": 0.2166692571647198, "grad_norm": 2.1663153171539307, "learning_rate": 9.11389956897771e-06, "loss": 0.6026, "step": 18122 }, { "epoch": 0.21668121330958046, "grad_norm": 3.610097646713257, "learning_rate": 9.113789521646337e-06, "loss": 0.6782, "step": 18123 }, { "epoch": 0.21669316945444111, "grad_norm": 2.43202543258667, "learning_rate": 9.113679468146324e-06, "loss": 0.6328, "step": 18124 }, { "epoch": 0.21670512559930177, "grad_norm": 2.494358539581299, "learning_rate": 9.113569408477834e-06, "loss": 0.6338, "step": 18125 }, { "epoch": 0.2167170817441624, "grad_norm": 2.804044485092163, "learning_rate": 9.113459342641032e-06, "loss": 0.6153, "step": 18126 }, { "epoch": 0.21672903788902306, "grad_norm": 2.7398300170898438, "learning_rate": 9.113349270636083e-06, "loss": 0.6201, "step": 18127 }, { "epoch": 0.21674099403388372, "grad_norm": 3.0402326583862305, "learning_rate": 9.113239192463153e-06, "loss": 0.5827, "step": 18128 }, { "epoch": 0.21675295017874435, "grad_norm": 1.8356047868728638, "learning_rate": 9.113129108122406e-06, "loss": 0.6707, "step": 18129 }, { "epoch": 0.216764906323605, "grad_norm": 2.1262922286987305, "learning_rate": 9.113019017614004e-06, "loss": 0.6671, "step": 18130 }, { "epoch": 0.21677686246846567, "grad_norm": 2.9737255573272705, "learning_rate": 9.11290892093812e-06, "loss": 0.6559, "step": 18131 }, { "epoch": 0.21678881861332633, "grad_norm": 2.6906685829162598, "learning_rate": 9.112798818094912e-06, "loss": 0.5437, "step": 18132 }, { "epoch": 0.21680077475818696, "grad_norm": 2.6522271633148193, "learning_rate": 9.112688709084547e-06, "loss": 0.5962, "step": 18133 }, { "epoch": 0.21681273090304762, "grad_norm": 1.6098393201828003, "learning_rate": 9.112578593907191e-06, "loss": 0.5632, "step": 18134 }, { "epoch": 0.21682468704790828, "grad_norm": 1.706417202949524, "learning_rate": 9.11246847256301e-06, "loss": 0.567, "step": 18135 }, { "epoch": 0.21683664319276894, "grad_norm": 2.790139675140381, "learning_rate": 9.112358345052167e-06, "loss": 0.6921, "step": 18136 }, { "epoch": 0.21684859933762957, "grad_norm": 1.768017292022705, "learning_rate": 9.11224821137483e-06, "loss": 0.6179, "step": 18137 }, { "epoch": 0.21686055548249022, "grad_norm": 2.426521062850952, "learning_rate": 9.11213807153116e-06, "loss": 0.6279, "step": 18138 }, { "epoch": 0.21687251162735088, "grad_norm": 3.1998462677001953, "learning_rate": 9.112027925521325e-06, "loss": 0.6056, "step": 18139 }, { "epoch": 0.21688446777221151, "grad_norm": 1.8653546571731567, "learning_rate": 9.111917773345489e-06, "loss": 0.5668, "step": 18140 }, { "epoch": 0.21689642391707217, "grad_norm": 6.075760841369629, "learning_rate": 9.111807615003818e-06, "loss": 0.6248, "step": 18141 }, { "epoch": 0.21690838006193283, "grad_norm": 1.957468867301941, "learning_rate": 9.111697450496477e-06, "loss": 0.6381, "step": 18142 }, { "epoch": 0.2169203362067935, "grad_norm": 2.664297342300415, "learning_rate": 9.11158727982363e-06, "loss": 0.6637, "step": 18143 }, { "epoch": 0.21693229235165412, "grad_norm": 2.3441274166107178, "learning_rate": 9.111477102985445e-06, "loss": 0.6274, "step": 18144 }, { "epoch": 0.21694424849651478, "grad_norm": 1.762402892112732, "learning_rate": 9.111366919982084e-06, "loss": 0.5644, "step": 18145 }, { "epoch": 0.21695620464137544, "grad_norm": 2.6375982761383057, "learning_rate": 9.111256730813715e-06, "loss": 0.6973, "step": 18146 }, { "epoch": 0.2169681607862361, "grad_norm": 1.7184828519821167, "learning_rate": 9.111146535480501e-06, "loss": 0.6344, "step": 18147 }, { "epoch": 0.21698011693109673, "grad_norm": 2.480628490447998, "learning_rate": 9.111036333982607e-06, "loss": 0.6663, "step": 18148 }, { "epoch": 0.2169920730759574, "grad_norm": 1.9218108654022217, "learning_rate": 9.110926126320203e-06, "loss": 0.5302, "step": 18149 }, { "epoch": 0.21700402922081805, "grad_norm": 2.7902681827545166, "learning_rate": 9.110815912493448e-06, "loss": 0.6178, "step": 18150 }, { "epoch": 0.2170159853656787, "grad_norm": 2.2937119007110596, "learning_rate": 9.11070569250251e-06, "loss": 0.6063, "step": 18151 }, { "epoch": 0.21702794151053934, "grad_norm": 2.6196279525756836, "learning_rate": 9.110595466347555e-06, "loss": 0.6261, "step": 18152 }, { "epoch": 0.2170398976554, "grad_norm": 2.4685113430023193, "learning_rate": 9.110485234028748e-06, "loss": 0.6498, "step": 18153 }, { "epoch": 0.21705185380026065, "grad_norm": 5.525769233703613, "learning_rate": 9.110374995546252e-06, "loss": 0.6445, "step": 18154 }, { "epoch": 0.21706380994512128, "grad_norm": 1.9901033639907837, "learning_rate": 9.110264750900235e-06, "loss": 0.6367, "step": 18155 }, { "epoch": 0.21707576608998194, "grad_norm": 2.1094164848327637, "learning_rate": 9.110154500090861e-06, "loss": 0.6897, "step": 18156 }, { "epoch": 0.2170877222348426, "grad_norm": 3.2363383769989014, "learning_rate": 9.110044243118296e-06, "loss": 0.5121, "step": 18157 }, { "epoch": 0.21709967837970326, "grad_norm": 4.499545574188232, "learning_rate": 9.109933979982706e-06, "loss": 0.5541, "step": 18158 }, { "epoch": 0.2171116345245639, "grad_norm": 2.269150495529175, "learning_rate": 9.109823710684253e-06, "loss": 0.5411, "step": 18159 }, { "epoch": 0.21712359066942455, "grad_norm": 4.8283538818359375, "learning_rate": 9.109713435223107e-06, "loss": 0.5849, "step": 18160 }, { "epoch": 0.2171355468142852, "grad_norm": 1.5955649614334106, "learning_rate": 9.109603153599432e-06, "loss": 0.5352, "step": 18161 }, { "epoch": 0.21714750295914587, "grad_norm": 2.214942693710327, "learning_rate": 9.10949286581339e-06, "loss": 0.5893, "step": 18162 }, { "epoch": 0.2171594591040065, "grad_norm": 3.609358072280884, "learning_rate": 9.109382571865148e-06, "loss": 0.6599, "step": 18163 }, { "epoch": 0.21717141524886716, "grad_norm": 1.6800181865692139, "learning_rate": 9.109272271754874e-06, "loss": 0.5989, "step": 18164 }, { "epoch": 0.21718337139372781, "grad_norm": 1.819475531578064, "learning_rate": 9.109161965482732e-06, "loss": 0.6271, "step": 18165 }, { "epoch": 0.21719532753858845, "grad_norm": 2.2204010486602783, "learning_rate": 9.109051653048885e-06, "loss": 0.6389, "step": 18166 }, { "epoch": 0.2172072836834491, "grad_norm": 2.5131542682647705, "learning_rate": 9.108941334453504e-06, "loss": 0.4906, "step": 18167 }, { "epoch": 0.21721923982830976, "grad_norm": 2.6495985984802246, "learning_rate": 9.108831009696748e-06, "loss": 0.6309, "step": 18168 }, { "epoch": 0.21723119597317042, "grad_norm": 3.358497381210327, "learning_rate": 9.108720678778786e-06, "loss": 0.663, "step": 18169 }, { "epoch": 0.21724315211803105, "grad_norm": 4.043706893920898, "learning_rate": 9.108610341699784e-06, "loss": 0.5912, "step": 18170 }, { "epoch": 0.2172551082628917, "grad_norm": 6.77446174621582, "learning_rate": 9.108499998459904e-06, "loss": 0.6311, "step": 18171 }, { "epoch": 0.21726706440775237, "grad_norm": 1.8737951517105103, "learning_rate": 9.108389649059315e-06, "loss": 0.5543, "step": 18172 }, { "epoch": 0.21727902055261303, "grad_norm": 2.240032911300659, "learning_rate": 9.108279293498181e-06, "loss": 0.6003, "step": 18173 }, { "epoch": 0.21729097669747366, "grad_norm": 2.002533435821533, "learning_rate": 9.108168931776667e-06, "loss": 0.546, "step": 18174 }, { "epoch": 0.21730293284233432, "grad_norm": 1.7905256748199463, "learning_rate": 9.108058563894941e-06, "loss": 0.699, "step": 18175 }, { "epoch": 0.21731488898719498, "grad_norm": 2.2872045040130615, "learning_rate": 9.107948189853164e-06, "loss": 0.6371, "step": 18176 }, { "epoch": 0.2173268451320556, "grad_norm": 1.3939323425292969, "learning_rate": 9.107837809651505e-06, "loss": 0.6077, "step": 18177 }, { "epoch": 0.21733880127691627, "grad_norm": 4.45689582824707, "learning_rate": 9.10772742329013e-06, "loss": 0.6979, "step": 18178 }, { "epoch": 0.21735075742177692, "grad_norm": 5.196093559265137, "learning_rate": 9.1076170307692e-06, "loss": 0.5584, "step": 18179 }, { "epoch": 0.21736271356663758, "grad_norm": 2.6604487895965576, "learning_rate": 9.107506632088888e-06, "loss": 0.593, "step": 18180 }, { "epoch": 0.21737466971149821, "grad_norm": 2.2236201763153076, "learning_rate": 9.107396227249352e-06, "loss": 0.6435, "step": 18181 }, { "epoch": 0.21738662585635887, "grad_norm": 1.5030001401901245, "learning_rate": 9.107285816250761e-06, "loss": 0.5804, "step": 18182 }, { "epoch": 0.21739858200121953, "grad_norm": 1.7022708654403687, "learning_rate": 9.107175399093281e-06, "loss": 0.5071, "step": 18183 }, { "epoch": 0.2174105381460802, "grad_norm": 3.972128391265869, "learning_rate": 9.107064975777077e-06, "loss": 0.6351, "step": 18184 }, { "epoch": 0.21742249429094082, "grad_norm": 2.366971969604492, "learning_rate": 9.106954546302314e-06, "loss": 0.6136, "step": 18185 }, { "epoch": 0.21743445043580148, "grad_norm": 3.8909053802490234, "learning_rate": 9.106844110669157e-06, "loss": 0.5741, "step": 18186 }, { "epoch": 0.21744640658066214, "grad_norm": 1.6208332777023315, "learning_rate": 9.106733668877775e-06, "loss": 0.5872, "step": 18187 }, { "epoch": 0.21745836272552277, "grad_norm": 2.1223671436309814, "learning_rate": 9.10662322092833e-06, "loss": 0.6351, "step": 18188 }, { "epoch": 0.21747031887038343, "grad_norm": 2.1095030307769775, "learning_rate": 9.10651276682099e-06, "loss": 0.6085, "step": 18189 }, { "epoch": 0.2174822750152441, "grad_norm": 3.2627456188201904, "learning_rate": 9.106402306555918e-06, "loss": 0.5542, "step": 18190 }, { "epoch": 0.21749423116010475, "grad_norm": 1.8566396236419678, "learning_rate": 9.106291840133281e-06, "loss": 0.6546, "step": 18191 }, { "epoch": 0.21750618730496538, "grad_norm": 7.389224529266357, "learning_rate": 9.106181367553246e-06, "loss": 0.6253, "step": 18192 }, { "epoch": 0.21751814344982603, "grad_norm": 3.3052358627319336, "learning_rate": 9.106070888815977e-06, "loss": 0.6209, "step": 18193 }, { "epoch": 0.2175300995946867, "grad_norm": 4.658082962036133, "learning_rate": 9.105960403921641e-06, "loss": 0.6667, "step": 18194 }, { "epoch": 0.21754205573954735, "grad_norm": 3.7538223266601562, "learning_rate": 9.105849912870402e-06, "loss": 0.6924, "step": 18195 }, { "epoch": 0.21755401188440798, "grad_norm": 4.399927616119385, "learning_rate": 9.105739415662425e-06, "loss": 0.5788, "step": 18196 }, { "epoch": 0.21756596802926864, "grad_norm": 4.312838077545166, "learning_rate": 9.105628912297879e-06, "loss": 0.6502, "step": 18197 }, { "epoch": 0.2175779241741293, "grad_norm": 3.0298473834991455, "learning_rate": 9.105518402776927e-06, "loss": 0.6117, "step": 18198 }, { "epoch": 0.21758988031898993, "grad_norm": 1.7652397155761719, "learning_rate": 9.105407887099735e-06, "loss": 0.5814, "step": 18199 }, { "epoch": 0.2176018364638506, "grad_norm": 2.372205972671509, "learning_rate": 9.105297365266473e-06, "loss": 0.5049, "step": 18200 }, { "epoch": 0.21761379260871125, "grad_norm": 2.426497220993042, "learning_rate": 9.105186837277298e-06, "loss": 0.5546, "step": 18201 }, { "epoch": 0.2176257487535719, "grad_norm": 3.810697078704834, "learning_rate": 9.105076303132383e-06, "loss": 0.7182, "step": 18202 }, { "epoch": 0.21763770489843254, "grad_norm": 2.838580369949341, "learning_rate": 9.104965762831892e-06, "loss": 0.5707, "step": 18203 }, { "epoch": 0.2176496610432932, "grad_norm": 2.4495015144348145, "learning_rate": 9.10485521637599e-06, "loss": 0.5675, "step": 18204 }, { "epoch": 0.21766161718815386, "grad_norm": 4.0751566886901855, "learning_rate": 9.104744663764841e-06, "loss": 0.5847, "step": 18205 }, { "epoch": 0.21767357333301451, "grad_norm": 3.2310245037078857, "learning_rate": 9.104634104998615e-06, "loss": 0.5457, "step": 18206 }, { "epoch": 0.21768552947787514, "grad_norm": 4.057163238525391, "learning_rate": 9.104523540077476e-06, "loss": 0.6287, "step": 18207 }, { "epoch": 0.2176974856227358, "grad_norm": 2.5631749629974365, "learning_rate": 9.104412969001589e-06, "loss": 0.6622, "step": 18208 }, { "epoch": 0.21770944176759646, "grad_norm": 2.213158130645752, "learning_rate": 9.104302391771118e-06, "loss": 0.6999, "step": 18209 }, { "epoch": 0.21772139791245712, "grad_norm": 1.7681940793991089, "learning_rate": 9.104191808386234e-06, "loss": 0.6355, "step": 18210 }, { "epoch": 0.21773335405731775, "grad_norm": 2.2303528785705566, "learning_rate": 9.104081218847099e-06, "loss": 0.6223, "step": 18211 }, { "epoch": 0.2177453102021784, "grad_norm": 2.145707130432129, "learning_rate": 9.103970623153877e-06, "loss": 0.5468, "step": 18212 }, { "epoch": 0.21775726634703907, "grad_norm": 2.532104253768921, "learning_rate": 9.10386002130674e-06, "loss": 0.6183, "step": 18213 }, { "epoch": 0.2177692224918997, "grad_norm": 1.8521469831466675, "learning_rate": 9.103749413305847e-06, "loss": 0.5962, "step": 18214 }, { "epoch": 0.21778117863676036, "grad_norm": 1.4753566980361938, "learning_rate": 9.103638799151368e-06, "loss": 0.5145, "step": 18215 }, { "epoch": 0.21779313478162102, "grad_norm": 3.1203155517578125, "learning_rate": 9.10352817884347e-06, "loss": 0.6873, "step": 18216 }, { "epoch": 0.21780509092648168, "grad_norm": 2.4267349243164062, "learning_rate": 9.103417552382316e-06, "loss": 0.6182, "step": 18217 }, { "epoch": 0.2178170470713423, "grad_norm": 2.1961703300476074, "learning_rate": 9.103306919768074e-06, "loss": 0.6355, "step": 18218 }, { "epoch": 0.21782900321620297, "grad_norm": 4.84075927734375, "learning_rate": 9.103196281000905e-06, "loss": 0.6395, "step": 18219 }, { "epoch": 0.21784095936106362, "grad_norm": 2.2634592056274414, "learning_rate": 9.103085636080982e-06, "loss": 0.5919, "step": 18220 }, { "epoch": 0.21785291550592428, "grad_norm": 2.349233388900757, "learning_rate": 9.102974985008466e-06, "loss": 0.555, "step": 18221 }, { "epoch": 0.2178648716507849, "grad_norm": 4.8765082359313965, "learning_rate": 9.102864327783525e-06, "loss": 0.699, "step": 18222 }, { "epoch": 0.21787682779564557, "grad_norm": 3.0503149032592773, "learning_rate": 9.102753664406325e-06, "loss": 0.6537, "step": 18223 }, { "epoch": 0.21788878394050623, "grad_norm": 3.1289608478546143, "learning_rate": 9.102642994877029e-06, "loss": 0.5718, "step": 18224 }, { "epoch": 0.21790074008536686, "grad_norm": 29.067378997802734, "learning_rate": 9.102532319195807e-06, "loss": 0.5804, "step": 18225 }, { "epoch": 0.21791269623022752, "grad_norm": 2.3224759101867676, "learning_rate": 9.102421637362823e-06, "loss": 0.5877, "step": 18226 }, { "epoch": 0.21792465237508818, "grad_norm": 9.541190147399902, "learning_rate": 9.102310949378244e-06, "loss": 0.6225, "step": 18227 }, { "epoch": 0.21793660851994884, "grad_norm": 1.815909743309021, "learning_rate": 9.102200255242234e-06, "loss": 0.6742, "step": 18228 }, { "epoch": 0.21794856466480947, "grad_norm": 2.0439488887786865, "learning_rate": 9.10208955495496e-06, "loss": 0.6024, "step": 18229 }, { "epoch": 0.21796052080967013, "grad_norm": 1.9919450283050537, "learning_rate": 9.101978848516588e-06, "loss": 0.5584, "step": 18230 }, { "epoch": 0.21797247695453079, "grad_norm": 7.510392665863037, "learning_rate": 9.101868135927286e-06, "loss": 0.5901, "step": 18231 }, { "epoch": 0.21798443309939144, "grad_norm": 6.837403774261475, "learning_rate": 9.101757417187215e-06, "loss": 0.6049, "step": 18232 }, { "epoch": 0.21799638924425208, "grad_norm": 1.8504325151443481, "learning_rate": 9.101646692296547e-06, "loss": 0.599, "step": 18233 }, { "epoch": 0.21800834538911273, "grad_norm": 1.9660261869430542, "learning_rate": 9.101535961255445e-06, "loss": 0.6295, "step": 18234 }, { "epoch": 0.2180203015339734, "grad_norm": 2.3354692459106445, "learning_rate": 9.101425224064073e-06, "loss": 0.6224, "step": 18235 }, { "epoch": 0.21803225767883402, "grad_norm": 2.7895545959472656, "learning_rate": 9.101314480722603e-06, "loss": 0.6335, "step": 18236 }, { "epoch": 0.21804421382369468, "grad_norm": 2.6784379482269287, "learning_rate": 9.101203731231194e-06, "loss": 0.6481, "step": 18237 }, { "epoch": 0.21805616996855534, "grad_norm": 1.4043766260147095, "learning_rate": 9.101092975590017e-06, "loss": 0.6348, "step": 18238 }, { "epoch": 0.218068126113416, "grad_norm": 1.6056936979293823, "learning_rate": 9.100982213799236e-06, "loss": 0.6897, "step": 18239 }, { "epoch": 0.21808008225827663, "grad_norm": 1.7279443740844727, "learning_rate": 9.100871445859016e-06, "loss": 0.6024, "step": 18240 }, { "epoch": 0.2180920384031373, "grad_norm": 2.5024771690368652, "learning_rate": 9.100760671769528e-06, "loss": 0.6648, "step": 18241 }, { "epoch": 0.21810399454799795, "grad_norm": 1.9564603567123413, "learning_rate": 9.100649891530933e-06, "loss": 0.6171, "step": 18242 }, { "epoch": 0.2181159506928586, "grad_norm": 2.3471109867095947, "learning_rate": 9.1005391051434e-06, "loss": 0.6099, "step": 18243 }, { "epoch": 0.21812790683771924, "grad_norm": 1.685081958770752, "learning_rate": 9.100428312607093e-06, "loss": 0.6247, "step": 18244 }, { "epoch": 0.2181398629825799, "grad_norm": 2.1762161254882812, "learning_rate": 9.100317513922179e-06, "loss": 0.7394, "step": 18245 }, { "epoch": 0.21815181912744055, "grad_norm": 1.751381278038025, "learning_rate": 9.100206709088823e-06, "loss": 0.6033, "step": 18246 }, { "epoch": 0.21816377527230119, "grad_norm": 2.2477846145629883, "learning_rate": 9.100095898107193e-06, "loss": 0.6676, "step": 18247 }, { "epoch": 0.21817573141716184, "grad_norm": 2.8079614639282227, "learning_rate": 9.099985080977458e-06, "loss": 0.634, "step": 18248 }, { "epoch": 0.2181876875620225, "grad_norm": 34.07841491699219, "learning_rate": 9.099874257699777e-06, "loss": 0.6243, "step": 18249 }, { "epoch": 0.21819964370688316, "grad_norm": 2.4740469455718994, "learning_rate": 9.099763428274321e-06, "loss": 0.5665, "step": 18250 }, { "epoch": 0.2182115998517438, "grad_norm": 2.122572660446167, "learning_rate": 9.099652592701256e-06, "loss": 0.5315, "step": 18251 }, { "epoch": 0.21822355599660445, "grad_norm": 1.7104620933532715, "learning_rate": 9.099541750980746e-06, "loss": 0.5558, "step": 18252 }, { "epoch": 0.2182355121414651, "grad_norm": 2.300276517868042, "learning_rate": 9.09943090311296e-06, "loss": 0.6937, "step": 18253 }, { "epoch": 0.21824746828632577, "grad_norm": 2.9727718830108643, "learning_rate": 9.099320049098064e-06, "loss": 0.6554, "step": 18254 }, { "epoch": 0.2182594244311864, "grad_norm": 11.786859512329102, "learning_rate": 9.09920918893622e-06, "loss": 0.6585, "step": 18255 }, { "epoch": 0.21827138057604706, "grad_norm": 1.7501648664474487, "learning_rate": 9.0990983226276e-06, "loss": 0.551, "step": 18256 }, { "epoch": 0.21828333672090772, "grad_norm": 2.9814324378967285, "learning_rate": 9.098987450172365e-06, "loss": 0.6356, "step": 18257 }, { "epoch": 0.21829529286576835, "grad_norm": 1.576372504234314, "learning_rate": 9.098876571570685e-06, "loss": 0.592, "step": 18258 }, { "epoch": 0.218307249010629, "grad_norm": 1.9075994491577148, "learning_rate": 9.098765686822724e-06, "loss": 0.5755, "step": 18259 }, { "epoch": 0.21831920515548967, "grad_norm": 1.8362301588058472, "learning_rate": 9.098654795928652e-06, "loss": 0.5345, "step": 18260 }, { "epoch": 0.21833116130035032, "grad_norm": 2.485927104949951, "learning_rate": 9.09854389888863e-06, "loss": 0.6333, "step": 18261 }, { "epoch": 0.21834311744521095, "grad_norm": 1.7407877445220947, "learning_rate": 9.098432995702827e-06, "loss": 0.5992, "step": 18262 }, { "epoch": 0.2183550735900716, "grad_norm": 3.528167486190796, "learning_rate": 9.09832208637141e-06, "loss": 0.525, "step": 18263 }, { "epoch": 0.21836702973493227, "grad_norm": 1.8807991743087769, "learning_rate": 9.098211170894544e-06, "loss": 0.6547, "step": 18264 }, { "epoch": 0.21837898587979293, "grad_norm": 1.5879805088043213, "learning_rate": 9.098100249272396e-06, "loss": 0.4977, "step": 18265 }, { "epoch": 0.21839094202465356, "grad_norm": 1.7023537158966064, "learning_rate": 9.097989321505131e-06, "loss": 0.6175, "step": 18266 }, { "epoch": 0.21840289816951422, "grad_norm": 21.74099349975586, "learning_rate": 9.097878387592918e-06, "loss": 0.6173, "step": 18267 }, { "epoch": 0.21841485431437488, "grad_norm": 2.4719128608703613, "learning_rate": 9.097767447535922e-06, "loss": 0.7452, "step": 18268 }, { "epoch": 0.21842681045923554, "grad_norm": 1.8017737865447998, "learning_rate": 9.09765650133431e-06, "loss": 0.6448, "step": 18269 }, { "epoch": 0.21843876660409617, "grad_norm": 4.35551118850708, "learning_rate": 9.097545548988245e-06, "loss": 0.5282, "step": 18270 }, { "epoch": 0.21845072274895683, "grad_norm": 1.8190044164657593, "learning_rate": 9.097434590497896e-06, "loss": 0.6804, "step": 18271 }, { "epoch": 0.21846267889381749, "grad_norm": 6.53505277633667, "learning_rate": 9.09732362586343e-06, "loss": 0.5826, "step": 18272 }, { "epoch": 0.21847463503867812, "grad_norm": 2.4833879470825195, "learning_rate": 9.097212655085013e-06, "loss": 0.6035, "step": 18273 }, { "epoch": 0.21848659118353878, "grad_norm": 2.5680909156799316, "learning_rate": 9.09710167816281e-06, "loss": 0.6914, "step": 18274 }, { "epoch": 0.21849854732839943, "grad_norm": 3.292753219604492, "learning_rate": 9.09699069509699e-06, "loss": 0.6422, "step": 18275 }, { "epoch": 0.2185105034732601, "grad_norm": 4.581436634063721, "learning_rate": 9.096879705887717e-06, "loss": 0.5571, "step": 18276 }, { "epoch": 0.21852245961812072, "grad_norm": 2.332444667816162, "learning_rate": 9.096768710535158e-06, "loss": 0.588, "step": 18277 }, { "epoch": 0.21853441576298138, "grad_norm": 2.135629892349243, "learning_rate": 9.09665770903948e-06, "loss": 0.552, "step": 18278 }, { "epoch": 0.21854637190784204, "grad_norm": 1.637477993965149, "learning_rate": 9.09654670140085e-06, "loss": 0.6214, "step": 18279 }, { "epoch": 0.2185583280527027, "grad_norm": 2.4326658248901367, "learning_rate": 9.096435687619435e-06, "loss": 0.5914, "step": 18280 }, { "epoch": 0.21857028419756333, "grad_norm": 1.9074461460113525, "learning_rate": 9.096324667695397e-06, "loss": 0.6224, "step": 18281 }, { "epoch": 0.218582240342424, "grad_norm": 1.999945878982544, "learning_rate": 9.096213641628906e-06, "loss": 0.6335, "step": 18282 }, { "epoch": 0.21859419648728465, "grad_norm": 27.845531463623047, "learning_rate": 9.096102609420129e-06, "loss": 0.5279, "step": 18283 }, { "epoch": 0.21860615263214528, "grad_norm": 6.615230560302734, "learning_rate": 9.095991571069231e-06, "loss": 0.6562, "step": 18284 }, { "epoch": 0.21861810877700594, "grad_norm": 1.574162244796753, "learning_rate": 9.095880526576379e-06, "loss": 0.6091, "step": 18285 }, { "epoch": 0.2186300649218666, "grad_norm": 6.157992362976074, "learning_rate": 9.095769475941742e-06, "loss": 0.5971, "step": 18286 }, { "epoch": 0.21864202106672725, "grad_norm": 2.9697139263153076, "learning_rate": 9.09565841916548e-06, "loss": 0.6539, "step": 18287 }, { "epoch": 0.21865397721158789, "grad_norm": 4.102477073669434, "learning_rate": 9.095547356247766e-06, "loss": 0.5308, "step": 18288 }, { "epoch": 0.21866593335644854, "grad_norm": 2.0116710662841797, "learning_rate": 9.095436287188767e-06, "loss": 0.6359, "step": 18289 }, { "epoch": 0.2186778895013092, "grad_norm": 5.271248817443848, "learning_rate": 9.09532521198864e-06, "loss": 0.7199, "step": 18290 }, { "epoch": 0.21868984564616986, "grad_norm": 2.27494478225708, "learning_rate": 9.095214130647565e-06, "loss": 0.6265, "step": 18291 }, { "epoch": 0.2187018017910305, "grad_norm": 3.0908355712890625, "learning_rate": 9.095103043165699e-06, "loss": 0.6898, "step": 18292 }, { "epoch": 0.21871375793589115, "grad_norm": 3.985257387161255, "learning_rate": 9.094991949543211e-06, "loss": 0.5613, "step": 18293 }, { "epoch": 0.2187257140807518, "grad_norm": 2.6904420852661133, "learning_rate": 9.094880849780268e-06, "loss": 0.7078, "step": 18294 }, { "epoch": 0.21873767022561244, "grad_norm": 2.577010154724121, "learning_rate": 9.094769743877036e-06, "loss": 0.6528, "step": 18295 }, { "epoch": 0.2187496263704731, "grad_norm": 3.4255876541137695, "learning_rate": 9.094658631833684e-06, "loss": 0.7026, "step": 18296 }, { "epoch": 0.21876158251533376, "grad_norm": 1.608878254890442, "learning_rate": 9.094547513650375e-06, "loss": 0.6783, "step": 18297 }, { "epoch": 0.21877353866019442, "grad_norm": 1.7710949182510376, "learning_rate": 9.09443638932728e-06, "loss": 0.6928, "step": 18298 }, { "epoch": 0.21878549480505505, "grad_norm": 3.558678150177002, "learning_rate": 9.094325258864562e-06, "loss": 0.6349, "step": 18299 }, { "epoch": 0.2187974509499157, "grad_norm": 6.381823539733887, "learning_rate": 9.094214122262387e-06, "loss": 0.5672, "step": 18300 }, { "epoch": 0.21880940709477636, "grad_norm": 2.7944934368133545, "learning_rate": 9.094102979520925e-06, "loss": 0.6275, "step": 18301 }, { "epoch": 0.21882136323963702, "grad_norm": 10.320854187011719, "learning_rate": 9.093991830640342e-06, "loss": 0.6026, "step": 18302 }, { "epoch": 0.21883331938449765, "grad_norm": 3.968123435974121, "learning_rate": 9.0938806756208e-06, "loss": 0.6382, "step": 18303 }, { "epoch": 0.2188452755293583, "grad_norm": 2.9334564208984375, "learning_rate": 9.093769514462473e-06, "loss": 0.5672, "step": 18304 }, { "epoch": 0.21885723167421897, "grad_norm": 1.7162367105484009, "learning_rate": 9.093658347165524e-06, "loss": 0.5339, "step": 18305 }, { "epoch": 0.2188691878190796, "grad_norm": 2.358886480331421, "learning_rate": 9.093547173730117e-06, "loss": 0.7347, "step": 18306 }, { "epoch": 0.21888114396394026, "grad_norm": 3.9160730838775635, "learning_rate": 9.093435994156425e-06, "loss": 0.6464, "step": 18307 }, { "epoch": 0.21889310010880092, "grad_norm": 5.168284893035889, "learning_rate": 9.093324808444608e-06, "loss": 0.6996, "step": 18308 }, { "epoch": 0.21890505625366158, "grad_norm": 2.0131804943084717, "learning_rate": 9.093213616594839e-06, "loss": 0.667, "step": 18309 }, { "epoch": 0.2189170123985222, "grad_norm": 1.6416466236114502, "learning_rate": 9.093102418607277e-06, "loss": 0.6508, "step": 18310 }, { "epoch": 0.21892896854338287, "grad_norm": 1.5828843116760254, "learning_rate": 9.092991214482098e-06, "loss": 0.6085, "step": 18311 }, { "epoch": 0.21894092468824353, "grad_norm": 1.9688220024108887, "learning_rate": 9.092880004219462e-06, "loss": 0.6343, "step": 18312 }, { "epoch": 0.21895288083310419, "grad_norm": 1.746772289276123, "learning_rate": 9.092768787819538e-06, "loss": 0.6282, "step": 18313 }, { "epoch": 0.21896483697796482, "grad_norm": 1.7245768308639526, "learning_rate": 9.092657565282492e-06, "loss": 0.6463, "step": 18314 }, { "epoch": 0.21897679312282547, "grad_norm": 2.5244295597076416, "learning_rate": 9.092546336608492e-06, "loss": 0.684, "step": 18315 }, { "epoch": 0.21898874926768613, "grad_norm": 1.605216145515442, "learning_rate": 9.092435101797706e-06, "loss": 0.6126, "step": 18316 }, { "epoch": 0.2190007054125468, "grad_norm": 1.6635938882827759, "learning_rate": 9.092323860850296e-06, "loss": 0.5849, "step": 18317 }, { "epoch": 0.21901266155740742, "grad_norm": 1.9160760641098022, "learning_rate": 9.092212613766435e-06, "loss": 0.67, "step": 18318 }, { "epoch": 0.21902461770226808, "grad_norm": 2.003072500228882, "learning_rate": 9.092101360546284e-06, "loss": 0.6981, "step": 18319 }, { "epoch": 0.21903657384712874, "grad_norm": 2.7702882289886475, "learning_rate": 9.091990101190013e-06, "loss": 0.5866, "step": 18320 }, { "epoch": 0.21904852999198937, "grad_norm": 3.657274007797241, "learning_rate": 9.09187883569779e-06, "loss": 0.606, "step": 18321 }, { "epoch": 0.21906048613685003, "grad_norm": 1.4623674154281616, "learning_rate": 9.091767564069778e-06, "loss": 0.6411, "step": 18322 }, { "epoch": 0.2190724422817107, "grad_norm": 2.682678699493408, "learning_rate": 9.091656286306148e-06, "loss": 0.6529, "step": 18323 }, { "epoch": 0.21908439842657135, "grad_norm": 2.0177228450775146, "learning_rate": 9.091545002407063e-06, "loss": 0.5756, "step": 18324 }, { "epoch": 0.21909635457143198, "grad_norm": 2.8828043937683105, "learning_rate": 9.091433712372692e-06, "loss": 0.6346, "step": 18325 }, { "epoch": 0.21910831071629264, "grad_norm": 3.1683804988861084, "learning_rate": 9.091322416203202e-06, "loss": 0.6688, "step": 18326 }, { "epoch": 0.2191202668611533, "grad_norm": 2.646806001663208, "learning_rate": 9.091211113898759e-06, "loss": 0.6469, "step": 18327 }, { "epoch": 0.21913222300601395, "grad_norm": 1.674275517463684, "learning_rate": 9.09109980545953e-06, "loss": 0.5999, "step": 18328 }, { "epoch": 0.21914417915087459, "grad_norm": 1.8097467422485352, "learning_rate": 9.090988490885685e-06, "loss": 0.6917, "step": 18329 }, { "epoch": 0.21915613529573524, "grad_norm": 1.7846916913986206, "learning_rate": 9.090877170177386e-06, "loss": 0.521, "step": 18330 }, { "epoch": 0.2191680914405959, "grad_norm": 3.694793701171875, "learning_rate": 9.090765843334803e-06, "loss": 0.6555, "step": 18331 }, { "epoch": 0.21918004758545653, "grad_norm": 1.3326317071914673, "learning_rate": 9.090654510358101e-06, "loss": 0.6161, "step": 18332 }, { "epoch": 0.2191920037303172, "grad_norm": 2.097813367843628, "learning_rate": 9.09054317124745e-06, "loss": 0.5979, "step": 18333 }, { "epoch": 0.21920395987517785, "grad_norm": 1.700290560722351, "learning_rate": 9.090431826003013e-06, "loss": 0.6269, "step": 18334 }, { "epoch": 0.2192159160200385, "grad_norm": 1.4452931880950928, "learning_rate": 9.090320474624959e-06, "loss": 0.6125, "step": 18335 }, { "epoch": 0.21922787216489914, "grad_norm": 2.07983660697937, "learning_rate": 9.090209117113455e-06, "loss": 0.5722, "step": 18336 }, { "epoch": 0.2192398283097598, "grad_norm": 1.6761975288391113, "learning_rate": 9.09009775346867e-06, "loss": 0.5223, "step": 18337 }, { "epoch": 0.21925178445462046, "grad_norm": 3.1444056034088135, "learning_rate": 9.089986383690766e-06, "loss": 0.6433, "step": 18338 }, { "epoch": 0.21926374059948112, "grad_norm": 2.3606295585632324, "learning_rate": 9.089875007779915e-06, "loss": 0.6143, "step": 18339 }, { "epoch": 0.21927569674434175, "grad_norm": 2.5048766136169434, "learning_rate": 9.08976362573628e-06, "loss": 0.6226, "step": 18340 }, { "epoch": 0.2192876528892024, "grad_norm": 2.1847944259643555, "learning_rate": 9.089652237560032e-06, "loss": 0.692, "step": 18341 }, { "epoch": 0.21929960903406306, "grad_norm": 2.022768974304199, "learning_rate": 9.089540843251334e-06, "loss": 0.6615, "step": 18342 }, { "epoch": 0.2193115651789237, "grad_norm": 2.6582467555999756, "learning_rate": 9.089429442810355e-06, "loss": 0.6325, "step": 18343 }, { "epoch": 0.21932352132378435, "grad_norm": 1.2794080972671509, "learning_rate": 9.089318036237264e-06, "loss": 0.56, "step": 18344 }, { "epoch": 0.219335477468645, "grad_norm": 3.325021505355835, "learning_rate": 9.089206623532226e-06, "loss": 0.5673, "step": 18345 }, { "epoch": 0.21934743361350567, "grad_norm": 2.4523634910583496, "learning_rate": 9.089095204695405e-06, "loss": 0.5911, "step": 18346 }, { "epoch": 0.2193593897583663, "grad_norm": 1.964158296585083, "learning_rate": 9.088983779726975e-06, "loss": 0.5821, "step": 18347 }, { "epoch": 0.21937134590322696, "grad_norm": 3.5418102741241455, "learning_rate": 9.088872348627098e-06, "loss": 0.5756, "step": 18348 }, { "epoch": 0.21938330204808762, "grad_norm": 1.6610186100006104, "learning_rate": 9.088760911395942e-06, "loss": 0.5864, "step": 18349 }, { "epoch": 0.21939525819294828, "grad_norm": 2.0321009159088135, "learning_rate": 9.088649468033673e-06, "loss": 0.6336, "step": 18350 }, { "epoch": 0.2194072143378089, "grad_norm": 2.716475248336792, "learning_rate": 9.088538018540462e-06, "loss": 0.5543, "step": 18351 }, { "epoch": 0.21941917048266957, "grad_norm": 1.718613862991333, "learning_rate": 9.088426562916472e-06, "loss": 0.6001, "step": 18352 }, { "epoch": 0.21943112662753023, "grad_norm": 4.020486831665039, "learning_rate": 9.088315101161873e-06, "loss": 0.6802, "step": 18353 }, { "epoch": 0.21944308277239086, "grad_norm": 2.1606595516204834, "learning_rate": 9.088203633276831e-06, "loss": 0.557, "step": 18354 }, { "epoch": 0.21945503891725152, "grad_norm": 2.546233892440796, "learning_rate": 9.088092159261512e-06, "loss": 0.5973, "step": 18355 }, { "epoch": 0.21946699506211217, "grad_norm": 2.3764607906341553, "learning_rate": 9.087980679116084e-06, "loss": 0.6581, "step": 18356 }, { "epoch": 0.21947895120697283, "grad_norm": 3.8314034938812256, "learning_rate": 9.087869192840717e-06, "loss": 0.5448, "step": 18357 }, { "epoch": 0.21949090735183346, "grad_norm": 3.400554895401001, "learning_rate": 9.087757700435573e-06, "loss": 0.5572, "step": 18358 }, { "epoch": 0.21950286349669412, "grad_norm": 2.263712167739868, "learning_rate": 9.087646201900823e-06, "loss": 0.5797, "step": 18359 }, { "epoch": 0.21951481964155478, "grad_norm": 2.0264759063720703, "learning_rate": 9.087534697236633e-06, "loss": 0.6144, "step": 18360 }, { "epoch": 0.21952677578641544, "grad_norm": 2.5364909172058105, "learning_rate": 9.08742318644317e-06, "loss": 0.6543, "step": 18361 }, { "epoch": 0.21953873193127607, "grad_norm": 3.628784418106079, "learning_rate": 9.087311669520602e-06, "loss": 0.6766, "step": 18362 }, { "epoch": 0.21955068807613673, "grad_norm": 3.0407838821411133, "learning_rate": 9.087200146469095e-06, "loss": 0.6289, "step": 18363 }, { "epoch": 0.2195626442209974, "grad_norm": 1.8683899641036987, "learning_rate": 9.087088617288815e-06, "loss": 0.5189, "step": 18364 }, { "epoch": 0.21957460036585802, "grad_norm": 2.0911455154418945, "learning_rate": 9.086977081979933e-06, "loss": 0.5967, "step": 18365 }, { "epoch": 0.21958655651071868, "grad_norm": 2.703106164932251, "learning_rate": 9.086865540542615e-06, "loss": 0.5556, "step": 18366 }, { "epoch": 0.21959851265557934, "grad_norm": 3.5703134536743164, "learning_rate": 9.086753992977026e-06, "loss": 0.6716, "step": 18367 }, { "epoch": 0.21961046880044, "grad_norm": 2.5523247718811035, "learning_rate": 9.086642439283337e-06, "loss": 0.6264, "step": 18368 }, { "epoch": 0.21962242494530063, "grad_norm": 3.412255048751831, "learning_rate": 9.086530879461711e-06, "loss": 0.6503, "step": 18369 }, { "epoch": 0.21963438109016128, "grad_norm": 1.7829368114471436, "learning_rate": 9.086419313512319e-06, "loss": 0.6244, "step": 18370 }, { "epoch": 0.21964633723502194, "grad_norm": 5.443731307983398, "learning_rate": 9.086307741435326e-06, "loss": 0.5603, "step": 18371 }, { "epoch": 0.2196582933798826, "grad_norm": 3.0116093158721924, "learning_rate": 9.0861961632309e-06, "loss": 0.5852, "step": 18372 }, { "epoch": 0.21967024952474323, "grad_norm": 2.1464200019836426, "learning_rate": 9.086084578899208e-06, "loss": 0.6488, "step": 18373 }, { "epoch": 0.2196822056696039, "grad_norm": 3.0963375568389893, "learning_rate": 9.085972988440418e-06, "loss": 0.547, "step": 18374 }, { "epoch": 0.21969416181446455, "grad_norm": 2.5608346462249756, "learning_rate": 9.085861391854698e-06, "loss": 0.5746, "step": 18375 }, { "epoch": 0.2197061179593252, "grad_norm": 2.050769090652466, "learning_rate": 9.085749789142214e-06, "loss": 0.7474, "step": 18376 }, { "epoch": 0.21971807410418584, "grad_norm": 3.496995687484741, "learning_rate": 9.085638180303132e-06, "loss": 0.6139, "step": 18377 }, { "epoch": 0.2197300302490465, "grad_norm": 1.7450182437896729, "learning_rate": 9.085526565337622e-06, "loss": 0.6387, "step": 18378 }, { "epoch": 0.21974198639390716, "grad_norm": 2.003385305404663, "learning_rate": 9.085414944245849e-06, "loss": 0.4785, "step": 18379 }, { "epoch": 0.2197539425387678, "grad_norm": 4.873311519622803, "learning_rate": 9.085303317027984e-06, "loss": 0.5715, "step": 18380 }, { "epoch": 0.21976589868362845, "grad_norm": 1.779990315437317, "learning_rate": 9.085191683684191e-06, "loss": 0.7262, "step": 18381 }, { "epoch": 0.2197778548284891, "grad_norm": 4.390991687774658, "learning_rate": 9.08508004421464e-06, "loss": 0.5038, "step": 18382 }, { "epoch": 0.21978981097334976, "grad_norm": 5.183688640594482, "learning_rate": 9.084968398619493e-06, "loss": 0.6211, "step": 18383 }, { "epoch": 0.2198017671182104, "grad_norm": 3.845287561416626, "learning_rate": 9.084856746898926e-06, "loss": 0.5133, "step": 18384 }, { "epoch": 0.21981372326307105, "grad_norm": 4.940894603729248, "learning_rate": 9.0847450890531e-06, "loss": 0.5784, "step": 18385 }, { "epoch": 0.2198256794079317, "grad_norm": 3.7063186168670654, "learning_rate": 9.084633425082183e-06, "loss": 0.5732, "step": 18386 }, { "epoch": 0.21983763555279237, "grad_norm": 4.049845218658447, "learning_rate": 9.084521754986346e-06, "loss": 0.7005, "step": 18387 }, { "epoch": 0.219849591697653, "grad_norm": 1.8960412740707397, "learning_rate": 9.084410078765753e-06, "loss": 0.5582, "step": 18388 }, { "epoch": 0.21986154784251366, "grad_norm": 4.383116245269775, "learning_rate": 9.084298396420572e-06, "loss": 0.6457, "step": 18389 }, { "epoch": 0.21987350398737432, "grad_norm": 1.8223798274993896, "learning_rate": 9.084186707950971e-06, "loss": 0.6713, "step": 18390 }, { "epoch": 0.21988546013223495, "grad_norm": 2.425851821899414, "learning_rate": 9.084075013357118e-06, "loss": 0.6073, "step": 18391 }, { "epoch": 0.2198974162770956, "grad_norm": 1.9273662567138672, "learning_rate": 9.083963312639182e-06, "loss": 0.611, "step": 18392 }, { "epoch": 0.21990937242195627, "grad_norm": 3.675731897354126, "learning_rate": 9.083851605797324e-06, "loss": 0.6582, "step": 18393 }, { "epoch": 0.21992132856681693, "grad_norm": 4.242977142333984, "learning_rate": 9.08373989283172e-06, "loss": 0.5917, "step": 18394 }, { "epoch": 0.21993328471167756, "grad_norm": 2.532273530960083, "learning_rate": 9.083628173742531e-06, "loss": 0.6875, "step": 18395 }, { "epoch": 0.21994524085653822, "grad_norm": 2.012661933898926, "learning_rate": 9.08351644852993e-06, "loss": 0.572, "step": 18396 }, { "epoch": 0.21995719700139887, "grad_norm": 2.1169049739837646, "learning_rate": 9.083404717194078e-06, "loss": 0.6433, "step": 18397 }, { "epoch": 0.21996915314625953, "grad_norm": 9.197904586791992, "learning_rate": 9.08329297973515e-06, "loss": 0.5885, "step": 18398 }, { "epoch": 0.21998110929112016, "grad_norm": 2.518981695175171, "learning_rate": 9.083181236153307e-06, "loss": 0.6572, "step": 18399 }, { "epoch": 0.21999306543598082, "grad_norm": 3.7019877433776855, "learning_rate": 9.083069486448719e-06, "loss": 0.5357, "step": 18400 }, { "epoch": 0.22000502158084148, "grad_norm": 3.3511805534362793, "learning_rate": 9.082957730621555e-06, "loss": 0.6598, "step": 18401 }, { "epoch": 0.2200169777257021, "grad_norm": 5.156284332275391, "learning_rate": 9.082845968671981e-06, "loss": 0.6741, "step": 18402 }, { "epoch": 0.22002893387056277, "grad_norm": 2.032557725906372, "learning_rate": 9.082734200600163e-06, "loss": 0.6349, "step": 18403 }, { "epoch": 0.22004089001542343, "grad_norm": 3.0266504287719727, "learning_rate": 9.082622426406274e-06, "loss": 0.677, "step": 18404 }, { "epoch": 0.2200528461602841, "grad_norm": 1.9380600452423096, "learning_rate": 9.082510646090475e-06, "loss": 0.5481, "step": 18405 }, { "epoch": 0.22006480230514472, "grad_norm": 3.711125612258911, "learning_rate": 9.08239885965294e-06, "loss": 0.547, "step": 18406 }, { "epoch": 0.22007675845000538, "grad_norm": 3.356412172317505, "learning_rate": 9.082287067093831e-06, "loss": 0.5325, "step": 18407 }, { "epoch": 0.22008871459486604, "grad_norm": 1.8117902278900146, "learning_rate": 9.082175268413319e-06, "loss": 0.5765, "step": 18408 }, { "epoch": 0.2201006707397267, "grad_norm": 2.5099709033966064, "learning_rate": 9.08206346361157e-06, "loss": 0.6898, "step": 18409 }, { "epoch": 0.22011262688458733, "grad_norm": 2.827617645263672, "learning_rate": 9.081951652688752e-06, "loss": 0.6035, "step": 18410 }, { "epoch": 0.22012458302944798, "grad_norm": 2.4329018592834473, "learning_rate": 9.081839835645035e-06, "loss": 0.5831, "step": 18411 }, { "epoch": 0.22013653917430864, "grad_norm": 2.68847393989563, "learning_rate": 9.081728012480583e-06, "loss": 0.6423, "step": 18412 }, { "epoch": 0.22014849531916927, "grad_norm": 3.3066182136535645, "learning_rate": 9.081616183195566e-06, "loss": 0.6251, "step": 18413 }, { "epoch": 0.22016045146402993, "grad_norm": 2.7875750064849854, "learning_rate": 9.081504347790151e-06, "loss": 0.5904, "step": 18414 }, { "epoch": 0.2201724076088906, "grad_norm": 11.221385955810547, "learning_rate": 9.081392506264504e-06, "loss": 0.6488, "step": 18415 }, { "epoch": 0.22018436375375125, "grad_norm": 8.427312850952148, "learning_rate": 9.081280658618797e-06, "loss": 0.6637, "step": 18416 }, { "epoch": 0.22019631989861188, "grad_norm": 2.887402296066284, "learning_rate": 9.081168804853193e-06, "loss": 0.5726, "step": 18417 }, { "epoch": 0.22020827604347254, "grad_norm": 4.469446182250977, "learning_rate": 9.081056944967864e-06, "loss": 0.5589, "step": 18418 }, { "epoch": 0.2202202321883332, "grad_norm": 2.409438371658325, "learning_rate": 9.080945078962974e-06, "loss": 0.7219, "step": 18419 }, { "epoch": 0.22023218833319386, "grad_norm": 10.788995742797852, "learning_rate": 9.080833206838693e-06, "loss": 0.633, "step": 18420 }, { "epoch": 0.2202441444780545, "grad_norm": 2.206552505493164, "learning_rate": 9.080721328595188e-06, "loss": 0.6999, "step": 18421 }, { "epoch": 0.22025610062291515, "grad_norm": 1.9927655458450317, "learning_rate": 9.080609444232626e-06, "loss": 0.6245, "step": 18422 }, { "epoch": 0.2202680567677758, "grad_norm": 1.9285783767700195, "learning_rate": 9.080497553751178e-06, "loss": 0.6407, "step": 18423 }, { "epoch": 0.22028001291263644, "grad_norm": 2.099569320678711, "learning_rate": 9.080385657151008e-06, "loss": 0.6712, "step": 18424 }, { "epoch": 0.2202919690574971, "grad_norm": 12.825947761535645, "learning_rate": 9.080273754432285e-06, "loss": 0.6612, "step": 18425 }, { "epoch": 0.22030392520235775, "grad_norm": 2.36806321144104, "learning_rate": 9.080161845595176e-06, "loss": 0.6916, "step": 18426 }, { "epoch": 0.2203158813472184, "grad_norm": 1.686174988746643, "learning_rate": 9.080049930639851e-06, "loss": 0.5756, "step": 18427 }, { "epoch": 0.22032783749207904, "grad_norm": 2.755457639694214, "learning_rate": 9.079938009566476e-06, "loss": 0.6189, "step": 18428 }, { "epoch": 0.2203397936369397, "grad_norm": 1.5848244428634644, "learning_rate": 9.07982608237522e-06, "loss": 0.5885, "step": 18429 }, { "epoch": 0.22035174978180036, "grad_norm": 2.9777004718780518, "learning_rate": 9.07971414906625e-06, "loss": 0.6068, "step": 18430 }, { "epoch": 0.22036370592666102, "grad_norm": 4.719446182250977, "learning_rate": 9.079602209639735e-06, "loss": 0.7198, "step": 18431 }, { "epoch": 0.22037566207152165, "grad_norm": 4.622063159942627, "learning_rate": 9.079490264095841e-06, "loss": 0.564, "step": 18432 }, { "epoch": 0.2203876182163823, "grad_norm": 2.592510938644409, "learning_rate": 9.079378312434736e-06, "loss": 0.6307, "step": 18433 }, { "epoch": 0.22039957436124297, "grad_norm": 3.1682097911834717, "learning_rate": 9.079266354656591e-06, "loss": 0.6271, "step": 18434 }, { "epoch": 0.22041153050610363, "grad_norm": 1.5941174030303955, "learning_rate": 9.079154390761571e-06, "loss": 0.4925, "step": 18435 }, { "epoch": 0.22042348665096426, "grad_norm": 3.7325007915496826, "learning_rate": 9.079042420749844e-06, "loss": 0.6089, "step": 18436 }, { "epoch": 0.22043544279582492, "grad_norm": 1.508921504020691, "learning_rate": 9.07893044462158e-06, "loss": 0.5896, "step": 18437 }, { "epoch": 0.22044739894068557, "grad_norm": 1.9339687824249268, "learning_rate": 9.078818462376942e-06, "loss": 0.6262, "step": 18438 }, { "epoch": 0.2204593550855462, "grad_norm": 1.4172643423080444, "learning_rate": 9.078706474016103e-06, "loss": 0.6447, "step": 18439 }, { "epoch": 0.22047131123040686, "grad_norm": 3.9232020378112793, "learning_rate": 9.07859447953923e-06, "loss": 0.5691, "step": 18440 }, { "epoch": 0.22048326737526752, "grad_norm": 1.9670166969299316, "learning_rate": 9.078482478946488e-06, "loss": 0.6808, "step": 18441 }, { "epoch": 0.22049522352012818, "grad_norm": 2.923510789871216, "learning_rate": 9.078370472238048e-06, "loss": 0.5806, "step": 18442 }, { "epoch": 0.2205071796649888, "grad_norm": 2.0976462364196777, "learning_rate": 9.078258459414078e-06, "loss": 0.6397, "step": 18443 }, { "epoch": 0.22051913580984947, "grad_norm": 3.0035006999969482, "learning_rate": 9.078146440474744e-06, "loss": 0.7773, "step": 18444 }, { "epoch": 0.22053109195471013, "grad_norm": 2.703662157058716, "learning_rate": 9.078034415420214e-06, "loss": 0.6581, "step": 18445 }, { "epoch": 0.2205430480995708, "grad_norm": 2.0005035400390625, "learning_rate": 9.077922384250659e-06, "loss": 0.6178, "step": 18446 }, { "epoch": 0.22055500424443142, "grad_norm": 2.7704083919525146, "learning_rate": 9.077810346966242e-06, "loss": 0.6629, "step": 18447 }, { "epoch": 0.22056696038929208, "grad_norm": 2.843165636062622, "learning_rate": 9.077698303567137e-06, "loss": 0.6349, "step": 18448 }, { "epoch": 0.22057891653415274, "grad_norm": 3.1833925247192383, "learning_rate": 9.077586254053508e-06, "loss": 0.6115, "step": 18449 }, { "epoch": 0.22059087267901337, "grad_norm": 1.428203821182251, "learning_rate": 9.077474198425523e-06, "loss": 0.7069, "step": 18450 }, { "epoch": 0.22060282882387403, "grad_norm": 10.178908348083496, "learning_rate": 9.077362136683352e-06, "loss": 0.588, "step": 18451 }, { "epoch": 0.22061478496873468, "grad_norm": 1.8636622428894043, "learning_rate": 9.077250068827162e-06, "loss": 0.5964, "step": 18452 }, { "epoch": 0.22062674111359534, "grad_norm": 1.5710378885269165, "learning_rate": 9.07713799485712e-06, "loss": 0.591, "step": 18453 }, { "epoch": 0.22063869725845597, "grad_norm": 1.7778778076171875, "learning_rate": 9.077025914773394e-06, "loss": 0.5038, "step": 18454 }, { "epoch": 0.22065065340331663, "grad_norm": 2.6698598861694336, "learning_rate": 9.076913828576155e-06, "loss": 0.633, "step": 18455 }, { "epoch": 0.2206626095481773, "grad_norm": 3.9243392944335938, "learning_rate": 9.07680173626557e-06, "loss": 0.5632, "step": 18456 }, { "epoch": 0.22067456569303795, "grad_norm": 1.8166197538375854, "learning_rate": 9.076689637841805e-06, "loss": 0.5919, "step": 18457 }, { "epoch": 0.22068652183789858, "grad_norm": 3.4206631183624268, "learning_rate": 9.07657753330503e-06, "loss": 0.5842, "step": 18458 }, { "epoch": 0.22069847798275924, "grad_norm": 1.7676928043365479, "learning_rate": 9.076465422655412e-06, "loss": 0.6117, "step": 18459 }, { "epoch": 0.2207104341276199, "grad_norm": 2.007131814956665, "learning_rate": 9.07635330589312e-06, "loss": 0.5726, "step": 18460 }, { "epoch": 0.22072239027248053, "grad_norm": 3.3897392749786377, "learning_rate": 9.076241183018321e-06, "loss": 0.6052, "step": 18461 }, { "epoch": 0.2207343464173412, "grad_norm": 2.724116563796997, "learning_rate": 9.076129054031183e-06, "loss": 0.6127, "step": 18462 }, { "epoch": 0.22074630256220185, "grad_norm": 1.486763834953308, "learning_rate": 9.076016918931877e-06, "loss": 0.5483, "step": 18463 }, { "epoch": 0.2207582587070625, "grad_norm": 2.8527958393096924, "learning_rate": 9.07590477772057e-06, "loss": 0.6035, "step": 18464 }, { "epoch": 0.22077021485192314, "grad_norm": 3.723464012145996, "learning_rate": 9.075792630397427e-06, "loss": 0.5943, "step": 18465 }, { "epoch": 0.2207821709967838, "grad_norm": 3.457667112350464, "learning_rate": 9.075680476962618e-06, "loss": 0.7294, "step": 18466 }, { "epoch": 0.22079412714164445, "grad_norm": 2.3481392860412598, "learning_rate": 9.075568317416314e-06, "loss": 0.5, "step": 18467 }, { "epoch": 0.2208060832865051, "grad_norm": 1.8889861106872559, "learning_rate": 9.07545615175868e-06, "loss": 0.7028, "step": 18468 }, { "epoch": 0.22081803943136574, "grad_norm": 2.42964243888855, "learning_rate": 9.075343979989883e-06, "loss": 0.7087, "step": 18469 }, { "epoch": 0.2208299955762264, "grad_norm": 3.477492332458496, "learning_rate": 9.075231802110096e-06, "loss": 0.6484, "step": 18470 }, { "epoch": 0.22084195172108706, "grad_norm": 3.2520763874053955, "learning_rate": 9.075119618119484e-06, "loss": 0.6091, "step": 18471 }, { "epoch": 0.2208539078659477, "grad_norm": 1.8490463495254517, "learning_rate": 9.075007428018214e-06, "loss": 0.5687, "step": 18472 }, { "epoch": 0.22086586401080835, "grad_norm": 4.307589530944824, "learning_rate": 9.074895231806458e-06, "loss": 0.6496, "step": 18473 }, { "epoch": 0.220877820155669, "grad_norm": 2.8189585208892822, "learning_rate": 9.07478302948438e-06, "loss": 0.6276, "step": 18474 }, { "epoch": 0.22088977630052967, "grad_norm": 1.5466094017028809, "learning_rate": 9.074670821052152e-06, "loss": 0.5965, "step": 18475 }, { "epoch": 0.2209017324453903, "grad_norm": 2.816654682159424, "learning_rate": 9.07455860650994e-06, "loss": 0.6304, "step": 18476 }, { "epoch": 0.22091368859025096, "grad_norm": 3.6476142406463623, "learning_rate": 9.074446385857912e-06, "loss": 0.6022, "step": 18477 }, { "epoch": 0.22092564473511161, "grad_norm": 1.7761642932891846, "learning_rate": 9.074334159096239e-06, "loss": 0.5815, "step": 18478 }, { "epoch": 0.22093760087997227, "grad_norm": 2.5356595516204834, "learning_rate": 9.074221926225086e-06, "loss": 0.6011, "step": 18479 }, { "epoch": 0.2209495570248329, "grad_norm": 3.2551233768463135, "learning_rate": 9.074109687244623e-06, "loss": 0.5638, "step": 18480 }, { "epoch": 0.22096151316969356, "grad_norm": 1.8844282627105713, "learning_rate": 9.073997442155019e-06, "loss": 0.6225, "step": 18481 }, { "epoch": 0.22097346931455422, "grad_norm": 2.197765588760376, "learning_rate": 9.07388519095644e-06, "loss": 0.7624, "step": 18482 }, { "epoch": 0.22098542545941485, "grad_norm": 1.4217135906219482, "learning_rate": 9.073772933649057e-06, "loss": 0.5756, "step": 18483 }, { "epoch": 0.2209973816042755, "grad_norm": 1.994620442390442, "learning_rate": 9.073660670233036e-06, "loss": 0.5213, "step": 18484 }, { "epoch": 0.22100933774913617, "grad_norm": 1.89870285987854, "learning_rate": 9.073548400708545e-06, "loss": 0.5218, "step": 18485 }, { "epoch": 0.22102129389399683, "grad_norm": 2.4235568046569824, "learning_rate": 9.073436125075757e-06, "loss": 0.6021, "step": 18486 }, { "epoch": 0.22103325003885746, "grad_norm": 2.987318754196167, "learning_rate": 9.073323843334835e-06, "loss": 0.6078, "step": 18487 }, { "epoch": 0.22104520618371812, "grad_norm": 3.8495614528656006, "learning_rate": 9.07321155548595e-06, "loss": 0.6588, "step": 18488 }, { "epoch": 0.22105716232857878, "grad_norm": 3.6286227703094482, "learning_rate": 9.07309926152927e-06, "loss": 0.6521, "step": 18489 }, { "epoch": 0.22106911847343944, "grad_norm": 8.770013809204102, "learning_rate": 9.072986961464962e-06, "loss": 0.6521, "step": 18490 }, { "epoch": 0.22108107461830007, "grad_norm": 3.3990120887756348, "learning_rate": 9.072874655293195e-06, "loss": 0.5987, "step": 18491 }, { "epoch": 0.22109303076316073, "grad_norm": 1.8414462804794312, "learning_rate": 9.07276234301414e-06, "loss": 0.6088, "step": 18492 }, { "epoch": 0.22110498690802138, "grad_norm": 1.8119431734085083, "learning_rate": 9.072650024627963e-06, "loss": 0.5455, "step": 18493 }, { "epoch": 0.22111694305288204, "grad_norm": 1.9967241287231445, "learning_rate": 9.072537700134833e-06, "loss": 0.6283, "step": 18494 }, { "epoch": 0.22112889919774267, "grad_norm": 2.5872130393981934, "learning_rate": 9.072425369534916e-06, "loss": 0.6569, "step": 18495 }, { "epoch": 0.22114085534260333, "grad_norm": 2.395028591156006, "learning_rate": 9.072313032828387e-06, "loss": 0.4955, "step": 18496 }, { "epoch": 0.221152811487464, "grad_norm": 3.8594939708709717, "learning_rate": 9.072200690015407e-06, "loss": 0.5279, "step": 18497 }, { "epoch": 0.22116476763232462, "grad_norm": 2.6528561115264893, "learning_rate": 9.072088341096147e-06, "loss": 0.6813, "step": 18498 }, { "epoch": 0.22117672377718528, "grad_norm": 3.263547658920288, "learning_rate": 9.071975986070779e-06, "loss": 0.6007, "step": 18499 }, { "epoch": 0.22118867992204594, "grad_norm": 1.9970508813858032, "learning_rate": 9.071863624939466e-06, "loss": 0.6685, "step": 18500 }, { "epoch": 0.2212006360669066, "grad_norm": 2.763598680496216, "learning_rate": 9.071751257702379e-06, "loss": 0.5748, "step": 18501 }, { "epoch": 0.22121259221176723, "grad_norm": 2.8249900341033936, "learning_rate": 9.071638884359687e-06, "loss": 0.5535, "step": 18502 }, { "epoch": 0.2212245483566279, "grad_norm": 2.3654417991638184, "learning_rate": 9.071526504911558e-06, "loss": 0.7293, "step": 18503 }, { "epoch": 0.22123650450148855, "grad_norm": 2.3660764694213867, "learning_rate": 9.071414119358161e-06, "loss": 0.6465, "step": 18504 }, { "epoch": 0.2212484606463492, "grad_norm": 2.0958104133605957, "learning_rate": 9.071301727699664e-06, "loss": 0.6532, "step": 18505 }, { "epoch": 0.22126041679120984, "grad_norm": 4.049091815948486, "learning_rate": 9.071189329936237e-06, "loss": 0.6464, "step": 18506 }, { "epoch": 0.2212723729360705, "grad_norm": 1.9353950023651123, "learning_rate": 9.071076926068044e-06, "loss": 0.6297, "step": 18507 }, { "epoch": 0.22128432908093115, "grad_norm": 1.7944871187210083, "learning_rate": 9.070964516095259e-06, "loss": 0.6634, "step": 18508 }, { "epoch": 0.22129628522579178, "grad_norm": 2.1862735748291016, "learning_rate": 9.070852100018047e-06, "loss": 0.672, "step": 18509 }, { "epoch": 0.22130824137065244, "grad_norm": 3.0684056282043457, "learning_rate": 9.070739677836579e-06, "loss": 0.6605, "step": 18510 }, { "epoch": 0.2213201975155131, "grad_norm": 2.129502058029175, "learning_rate": 9.07062724955102e-06, "loss": 0.5581, "step": 18511 }, { "epoch": 0.22133215366037376, "grad_norm": 2.6344540119171143, "learning_rate": 9.070514815161544e-06, "loss": 0.5896, "step": 18512 }, { "epoch": 0.2213441098052344, "grad_norm": 3.7294046878814697, "learning_rate": 9.070402374668315e-06, "loss": 0.5885, "step": 18513 }, { "epoch": 0.22135606595009505, "grad_norm": 2.2169957160949707, "learning_rate": 9.070289928071503e-06, "loss": 0.6391, "step": 18514 }, { "epoch": 0.2213680220949557, "grad_norm": 2.822730302810669, "learning_rate": 9.070177475371277e-06, "loss": 0.6727, "step": 18515 }, { "epoch": 0.22137997823981637, "grad_norm": 5.996322154998779, "learning_rate": 9.070065016567806e-06, "loss": 0.6439, "step": 18516 }, { "epoch": 0.221391934384677, "grad_norm": 1.7493786811828613, "learning_rate": 9.069952551661259e-06, "loss": 0.5696, "step": 18517 }, { "epoch": 0.22140389052953766, "grad_norm": 1.227561354637146, "learning_rate": 9.0698400806518e-06, "loss": 0.598, "step": 18518 }, { "epoch": 0.22141584667439831, "grad_norm": 2.5591061115264893, "learning_rate": 9.069727603539606e-06, "loss": 0.5825, "step": 18519 }, { "epoch": 0.22142780281925895, "grad_norm": 3.342607021331787, "learning_rate": 9.069615120324836e-06, "loss": 0.5942, "step": 18520 }, { "epoch": 0.2214397589641196, "grad_norm": 1.8358708620071411, "learning_rate": 9.069502631007667e-06, "loss": 0.625, "step": 18521 }, { "epoch": 0.22145171510898026, "grad_norm": 2.4356632232666016, "learning_rate": 9.069390135588265e-06, "loss": 0.6674, "step": 18522 }, { "epoch": 0.22146367125384092, "grad_norm": 8.302597999572754, "learning_rate": 9.069277634066796e-06, "loss": 0.6681, "step": 18523 }, { "epoch": 0.22147562739870155, "grad_norm": 2.300137996673584, "learning_rate": 9.06916512644343e-06, "loss": 0.6952, "step": 18524 }, { "epoch": 0.2214875835435622, "grad_norm": 2.803375720977783, "learning_rate": 9.06905261271834e-06, "loss": 0.6928, "step": 18525 }, { "epoch": 0.22149953968842287, "grad_norm": 3.455749988555908, "learning_rate": 9.068940092891688e-06, "loss": 0.6969, "step": 18526 }, { "epoch": 0.22151149583328353, "grad_norm": 1.6649346351623535, "learning_rate": 9.068827566963647e-06, "loss": 0.6693, "step": 18527 }, { "epoch": 0.22152345197814416, "grad_norm": 2.179338216781616, "learning_rate": 9.068715034934386e-06, "loss": 0.5997, "step": 18528 }, { "epoch": 0.22153540812300482, "grad_norm": 6.566163063049316, "learning_rate": 9.06860249680407e-06, "loss": 0.6298, "step": 18529 }, { "epoch": 0.22154736426786548, "grad_norm": 1.8363054990768433, "learning_rate": 9.068489952572871e-06, "loss": 0.5619, "step": 18530 }, { "epoch": 0.2215593204127261, "grad_norm": 2.950267791748047, "learning_rate": 9.068377402240956e-06, "loss": 0.6734, "step": 18531 }, { "epoch": 0.22157127655758677, "grad_norm": 2.676508665084839, "learning_rate": 9.068264845808496e-06, "loss": 0.5558, "step": 18532 }, { "epoch": 0.22158323270244742, "grad_norm": 1.4929577112197876, "learning_rate": 9.068152283275657e-06, "loss": 0.6716, "step": 18533 }, { "epoch": 0.22159518884730808, "grad_norm": 6.373316287994385, "learning_rate": 9.068039714642611e-06, "loss": 0.5375, "step": 18534 }, { "epoch": 0.22160714499216871, "grad_norm": 1.841752052307129, "learning_rate": 9.067927139909523e-06, "loss": 0.5941, "step": 18535 }, { "epoch": 0.22161910113702937, "grad_norm": 1.3771796226501465, "learning_rate": 9.067814559076565e-06, "loss": 0.579, "step": 18536 }, { "epoch": 0.22163105728189003, "grad_norm": 1.8169423341751099, "learning_rate": 9.067701972143904e-06, "loss": 0.5978, "step": 18537 }, { "epoch": 0.2216430134267507, "grad_norm": 2.948185443878174, "learning_rate": 9.06758937911171e-06, "loss": 0.6488, "step": 18538 }, { "epoch": 0.22165496957161132, "grad_norm": 42.231773376464844, "learning_rate": 9.06747677998015e-06, "loss": 0.6459, "step": 18539 }, { "epoch": 0.22166692571647198, "grad_norm": 1.6391489505767822, "learning_rate": 9.067364174749396e-06, "loss": 0.6293, "step": 18540 }, { "epoch": 0.22167888186133264, "grad_norm": 2.294841766357422, "learning_rate": 9.067251563419614e-06, "loss": 0.5978, "step": 18541 }, { "epoch": 0.22169083800619327, "grad_norm": 2.85959529876709, "learning_rate": 9.067138945990974e-06, "loss": 0.5908, "step": 18542 }, { "epoch": 0.22170279415105393, "grad_norm": 1.9128822088241577, "learning_rate": 9.067026322463647e-06, "loss": 0.6828, "step": 18543 }, { "epoch": 0.2217147502959146, "grad_norm": 1.641661286354065, "learning_rate": 9.066913692837796e-06, "loss": 0.5907, "step": 18544 }, { "epoch": 0.22172670644077525, "grad_norm": 1.7564200162887573, "learning_rate": 9.066801057113594e-06, "loss": 0.6676, "step": 18545 }, { "epoch": 0.22173866258563588, "grad_norm": 1.4357192516326904, "learning_rate": 9.06668841529121e-06, "loss": 0.6648, "step": 18546 }, { "epoch": 0.22175061873049653, "grad_norm": 2.4196197986602783, "learning_rate": 9.066575767370814e-06, "loss": 0.5872, "step": 18547 }, { "epoch": 0.2217625748753572, "grad_norm": 2.8307456970214844, "learning_rate": 9.066463113352569e-06, "loss": 0.562, "step": 18548 }, { "epoch": 0.22177453102021785, "grad_norm": 2.0958046913146973, "learning_rate": 9.066350453236652e-06, "loss": 0.61, "step": 18549 }, { "epoch": 0.22178648716507848, "grad_norm": 2.762749433517456, "learning_rate": 9.066237787023227e-06, "loss": 0.6494, "step": 18550 }, { "epoch": 0.22179844330993914, "grad_norm": 1.6504464149475098, "learning_rate": 9.066125114712464e-06, "loss": 0.6017, "step": 18551 }, { "epoch": 0.2218103994547998, "grad_norm": 1.5277341604232788, "learning_rate": 9.06601243630453e-06, "loss": 0.6249, "step": 18552 }, { "epoch": 0.22182235559966046, "grad_norm": 2.2088875770568848, "learning_rate": 9.065899751799598e-06, "loss": 0.7116, "step": 18553 }, { "epoch": 0.2218343117445211, "grad_norm": 1.484777569770813, "learning_rate": 9.065787061197834e-06, "loss": 0.6386, "step": 18554 }, { "epoch": 0.22184626788938175, "grad_norm": 3.3656718730926514, "learning_rate": 9.065674364499408e-06, "loss": 0.6296, "step": 18555 }, { "epoch": 0.2218582240342424, "grad_norm": 2.2730824947357178, "learning_rate": 9.06556166170449e-06, "loss": 0.8034, "step": 18556 }, { "epoch": 0.22187018017910304, "grad_norm": 1.979231834411621, "learning_rate": 9.065448952813247e-06, "loss": 0.6603, "step": 18557 }, { "epoch": 0.2218821363239637, "grad_norm": 1.9031169414520264, "learning_rate": 9.065336237825848e-06, "loss": 0.6354, "step": 18558 }, { "epoch": 0.22189409246882436, "grad_norm": 2.0054194927215576, "learning_rate": 9.065223516742464e-06, "loss": 0.5085, "step": 18559 }, { "epoch": 0.22190604861368501, "grad_norm": 1.3963546752929688, "learning_rate": 9.065110789563262e-06, "loss": 0.6282, "step": 18560 }, { "epoch": 0.22191800475854565, "grad_norm": 1.7689917087554932, "learning_rate": 9.064998056288412e-06, "loss": 0.5132, "step": 18561 }, { "epoch": 0.2219299609034063, "grad_norm": 4.128530502319336, "learning_rate": 9.064885316918085e-06, "loss": 0.6356, "step": 18562 }, { "epoch": 0.22194191704826696, "grad_norm": 2.500962018966675, "learning_rate": 9.064772571452445e-06, "loss": 0.6795, "step": 18563 }, { "epoch": 0.22195387319312762, "grad_norm": 1.6534419059753418, "learning_rate": 9.064659819891665e-06, "loss": 0.5611, "step": 18564 }, { "epoch": 0.22196582933798825, "grad_norm": 3.2004318237304688, "learning_rate": 9.064547062235913e-06, "loss": 0.6151, "step": 18565 }, { "epoch": 0.2219777854828489, "grad_norm": 1.7687444686889648, "learning_rate": 9.06443429848536e-06, "loss": 0.6408, "step": 18566 }, { "epoch": 0.22198974162770957, "grad_norm": 1.770991563796997, "learning_rate": 9.064321528640172e-06, "loss": 0.5968, "step": 18567 }, { "epoch": 0.2220016977725702, "grad_norm": 1.7747905254364014, "learning_rate": 9.06420875270052e-06, "loss": 0.6192, "step": 18568 }, { "epoch": 0.22201365391743086, "grad_norm": 3.737431287765503, "learning_rate": 9.06409597066657e-06, "loss": 0.6309, "step": 18569 }, { "epoch": 0.22202561006229152, "grad_norm": 1.8966091871261597, "learning_rate": 9.063983182538497e-06, "loss": 0.5943, "step": 18570 }, { "epoch": 0.22203756620715218, "grad_norm": 1.453303337097168, "learning_rate": 9.063870388316465e-06, "loss": 0.5611, "step": 18571 }, { "epoch": 0.2220495223520128, "grad_norm": 1.8933156728744507, "learning_rate": 9.063757588000645e-06, "loss": 0.6752, "step": 18572 }, { "epoch": 0.22206147849687347, "grad_norm": 1.4887720346450806, "learning_rate": 9.063644781591208e-06, "loss": 0.578, "step": 18573 }, { "epoch": 0.22207343464173412, "grad_norm": 1.8617568016052246, "learning_rate": 9.06353196908832e-06, "loss": 0.6697, "step": 18574 }, { "epoch": 0.22208539078659478, "grad_norm": 1.8837114572525024, "learning_rate": 9.06341915049215e-06, "loss": 0.723, "step": 18575 }, { "epoch": 0.2220973469314554, "grad_norm": 2.1578927040100098, "learning_rate": 9.06330632580287e-06, "loss": 0.5594, "step": 18576 }, { "epoch": 0.22210930307631607, "grad_norm": 1.7182776927947998, "learning_rate": 9.063193495020646e-06, "loss": 0.6226, "step": 18577 }, { "epoch": 0.22212125922117673, "grad_norm": 1.5388432741165161, "learning_rate": 9.06308065814565e-06, "loss": 0.6111, "step": 18578 }, { "epoch": 0.22213321536603736, "grad_norm": 2.2613766193389893, "learning_rate": 9.062967815178052e-06, "loss": 0.6339, "step": 18579 }, { "epoch": 0.22214517151089802, "grad_norm": 2.6772639751434326, "learning_rate": 9.062854966118018e-06, "loss": 0.6191, "step": 18580 }, { "epoch": 0.22215712765575868, "grad_norm": 1.9151175022125244, "learning_rate": 9.062742110965716e-06, "loss": 0.6251, "step": 18581 }, { "epoch": 0.22216908380061934, "grad_norm": 1.4890632629394531, "learning_rate": 9.062629249721321e-06, "loss": 0.5513, "step": 18582 }, { "epoch": 0.22218103994547997, "grad_norm": 1.5292807817459106, "learning_rate": 9.062516382384998e-06, "loss": 0.594, "step": 18583 }, { "epoch": 0.22219299609034063, "grad_norm": 1.9316086769104004, "learning_rate": 9.062403508956917e-06, "loss": 0.6537, "step": 18584 }, { "epoch": 0.2222049522352013, "grad_norm": 3.6811723709106445, "learning_rate": 9.062290629437248e-06, "loss": 0.683, "step": 18585 }, { "epoch": 0.22221690838006194, "grad_norm": 14.472272872924805, "learning_rate": 9.06217774382616e-06, "loss": 0.6145, "step": 18586 }, { "epoch": 0.22222886452492258, "grad_norm": 1.9127249717712402, "learning_rate": 9.062064852123823e-06, "loss": 0.6183, "step": 18587 }, { "epoch": 0.22224082066978323, "grad_norm": 4.225550174713135, "learning_rate": 9.061951954330402e-06, "loss": 0.6316, "step": 18588 }, { "epoch": 0.2222527768146439, "grad_norm": 2.2646656036376953, "learning_rate": 9.06183905044607e-06, "loss": 0.6736, "step": 18589 }, { "epoch": 0.22226473295950452, "grad_norm": 3.1357369422912598, "learning_rate": 9.061726140470998e-06, "loss": 0.5646, "step": 18590 }, { "epoch": 0.22227668910436518, "grad_norm": 2.4278953075408936, "learning_rate": 9.061613224405352e-06, "loss": 0.5033, "step": 18591 }, { "epoch": 0.22228864524922584, "grad_norm": 1.8630907535552979, "learning_rate": 9.061500302249304e-06, "loss": 0.7175, "step": 18592 }, { "epoch": 0.2223006013940865, "grad_norm": 4.480605125427246, "learning_rate": 9.061387374003021e-06, "loss": 0.5919, "step": 18593 }, { "epoch": 0.22231255753894713, "grad_norm": 1.7157269716262817, "learning_rate": 9.061274439666674e-06, "loss": 0.6051, "step": 18594 }, { "epoch": 0.2223245136838078, "grad_norm": 2.256808042526245, "learning_rate": 9.06116149924043e-06, "loss": 0.6488, "step": 18595 }, { "epoch": 0.22233646982866845, "grad_norm": 3.2340264320373535, "learning_rate": 9.061048552724461e-06, "loss": 0.6158, "step": 18596 }, { "epoch": 0.2223484259735291, "grad_norm": 1.6563884019851685, "learning_rate": 9.060935600118934e-06, "loss": 0.5515, "step": 18597 }, { "epoch": 0.22236038211838974, "grad_norm": 3.1671042442321777, "learning_rate": 9.06082264142402e-06, "loss": 0.6472, "step": 18598 }, { "epoch": 0.2223723382632504, "grad_norm": 1.5290858745574951, "learning_rate": 9.060709676639889e-06, "loss": 0.5188, "step": 18599 }, { "epoch": 0.22238429440811106, "grad_norm": 1.805928349494934, "learning_rate": 9.060596705766708e-06, "loss": 0.6217, "step": 18600 }, { "epoch": 0.22239625055297169, "grad_norm": 2.0332882404327393, "learning_rate": 9.06048372880465e-06, "loss": 0.693, "step": 18601 }, { "epoch": 0.22240820669783234, "grad_norm": 3.654655694961548, "learning_rate": 9.06037074575388e-06, "loss": 0.6858, "step": 18602 }, { "epoch": 0.222420162842693, "grad_norm": 1.3761796951293945, "learning_rate": 9.060257756614571e-06, "loss": 0.687, "step": 18603 }, { "epoch": 0.22243211898755366, "grad_norm": 1.747597575187683, "learning_rate": 9.060144761386892e-06, "loss": 0.6125, "step": 18604 }, { "epoch": 0.2224440751324143, "grad_norm": 3.5588295459747314, "learning_rate": 9.060031760071008e-06, "loss": 0.697, "step": 18605 }, { "epoch": 0.22245603127727495, "grad_norm": 2.55682110786438, "learning_rate": 9.059918752667094e-06, "loss": 0.6587, "step": 18606 }, { "epoch": 0.2224679874221356, "grad_norm": 2.758758783340454, "learning_rate": 9.059805739175319e-06, "loss": 0.5914, "step": 18607 }, { "epoch": 0.22247994356699627, "grad_norm": 3.404554843902588, "learning_rate": 9.059692719595848e-06, "loss": 0.5786, "step": 18608 }, { "epoch": 0.2224918997118569, "grad_norm": 1.7516247034072876, "learning_rate": 9.059579693928855e-06, "loss": 0.65, "step": 18609 }, { "epoch": 0.22250385585671756, "grad_norm": 3.299006223678589, "learning_rate": 9.059466662174507e-06, "loss": 0.6089, "step": 18610 }, { "epoch": 0.22251581200157822, "grad_norm": 4.4502997398376465, "learning_rate": 9.059353624332974e-06, "loss": 0.6739, "step": 18611 }, { "epoch": 0.22252776814643888, "grad_norm": 1.897597312927246, "learning_rate": 9.059240580404427e-06, "loss": 0.5641, "step": 18612 }, { "epoch": 0.2225397242912995, "grad_norm": 1.7391935586929321, "learning_rate": 9.059127530389035e-06, "loss": 0.6196, "step": 18613 }, { "epoch": 0.22255168043616017, "grad_norm": 2.7195911407470703, "learning_rate": 9.059014474286965e-06, "loss": 0.6139, "step": 18614 }, { "epoch": 0.22256363658102082, "grad_norm": 2.2631115913391113, "learning_rate": 9.058901412098388e-06, "loss": 0.6603, "step": 18615 }, { "epoch": 0.22257559272588145, "grad_norm": 1.5742188692092896, "learning_rate": 9.058788343823476e-06, "loss": 0.6523, "step": 18616 }, { "epoch": 0.2225875488707421, "grad_norm": 2.0442955493927, "learning_rate": 9.058675269462395e-06, "loss": 0.7626, "step": 18617 }, { "epoch": 0.22259950501560277, "grad_norm": 2.0714547634124756, "learning_rate": 9.058562189015317e-06, "loss": 0.631, "step": 18618 }, { "epoch": 0.22261146116046343, "grad_norm": 4.066624164581299, "learning_rate": 9.05844910248241e-06, "loss": 0.6058, "step": 18619 }, { "epoch": 0.22262341730532406, "grad_norm": 2.631704330444336, "learning_rate": 9.058336009863844e-06, "loss": 0.6384, "step": 18620 }, { "epoch": 0.22263537345018472, "grad_norm": 2.0670158863067627, "learning_rate": 9.058222911159788e-06, "loss": 0.6649, "step": 18621 }, { "epoch": 0.22264732959504538, "grad_norm": 1.9111733436584473, "learning_rate": 9.058109806370414e-06, "loss": 0.5407, "step": 18622 }, { "epoch": 0.22265928573990604, "grad_norm": 2.840106725692749, "learning_rate": 9.05799669549589e-06, "loss": 0.5358, "step": 18623 }, { "epoch": 0.22267124188476667, "grad_norm": 2.147813081741333, "learning_rate": 9.057883578536383e-06, "loss": 0.7536, "step": 18624 }, { "epoch": 0.22268319802962733, "grad_norm": 1.8109406232833862, "learning_rate": 9.057770455492067e-06, "loss": 0.5613, "step": 18625 }, { "epoch": 0.22269515417448799, "grad_norm": 2.0565640926361084, "learning_rate": 9.057657326363111e-06, "loss": 0.649, "step": 18626 }, { "epoch": 0.22270711031934862, "grad_norm": 1.6385763883590698, "learning_rate": 9.057544191149681e-06, "loss": 0.5377, "step": 18627 }, { "epoch": 0.22271906646420928, "grad_norm": 1.6832282543182373, "learning_rate": 9.05743104985195e-06, "loss": 0.6043, "step": 18628 }, { "epoch": 0.22273102260906993, "grad_norm": 1.9202356338500977, "learning_rate": 9.057317902470087e-06, "loss": 0.6723, "step": 18629 }, { "epoch": 0.2227429787539306, "grad_norm": 2.927739143371582, "learning_rate": 9.057204749004262e-06, "loss": 0.5705, "step": 18630 }, { "epoch": 0.22275493489879122, "grad_norm": 2.160576820373535, "learning_rate": 9.057091589454643e-06, "loss": 0.6249, "step": 18631 }, { "epoch": 0.22276689104365188, "grad_norm": 2.343803644180298, "learning_rate": 9.0569784238214e-06, "loss": 0.6466, "step": 18632 }, { "epoch": 0.22277884718851254, "grad_norm": 1.432661771774292, "learning_rate": 9.056865252104705e-06, "loss": 0.5992, "step": 18633 }, { "epoch": 0.2227908033333732, "grad_norm": 3.1712841987609863, "learning_rate": 9.056752074304727e-06, "loss": 0.5443, "step": 18634 }, { "epoch": 0.22280275947823383, "grad_norm": 3.292114734649658, "learning_rate": 9.056638890421632e-06, "loss": 0.5822, "step": 18635 }, { "epoch": 0.2228147156230945, "grad_norm": 2.3987557888031006, "learning_rate": 9.056525700455594e-06, "loss": 0.5541, "step": 18636 }, { "epoch": 0.22282667176795515, "grad_norm": 2.70395565032959, "learning_rate": 9.056412504406781e-06, "loss": 0.4901, "step": 18637 }, { "epoch": 0.22283862791281578, "grad_norm": 5.4233808517456055, "learning_rate": 9.056299302275365e-06, "loss": 0.6182, "step": 18638 }, { "epoch": 0.22285058405767644, "grad_norm": 1.9724886417388916, "learning_rate": 9.05618609406151e-06, "loss": 0.6697, "step": 18639 }, { "epoch": 0.2228625402025371, "grad_norm": 3.4574427604675293, "learning_rate": 9.056072879765392e-06, "loss": 0.649, "step": 18640 }, { "epoch": 0.22287449634739775, "grad_norm": 2.1444127559661865, "learning_rate": 9.05595965938718e-06, "loss": 0.5988, "step": 18641 }, { "epoch": 0.22288645249225839, "grad_norm": 1.9311970472335815, "learning_rate": 9.055846432927038e-06, "loss": 0.5919, "step": 18642 }, { "epoch": 0.22289840863711904, "grad_norm": 2.0530078411102295, "learning_rate": 9.055733200385141e-06, "loss": 0.5909, "step": 18643 }, { "epoch": 0.2229103647819797, "grad_norm": 2.034512758255005, "learning_rate": 9.05561996176166e-06, "loss": 0.6336, "step": 18644 }, { "epoch": 0.22292232092684036, "grad_norm": 2.8720345497131348, "learning_rate": 9.055506717056761e-06, "loss": 0.6562, "step": 18645 }, { "epoch": 0.222934277071701, "grad_norm": 2.05220365524292, "learning_rate": 9.055393466270615e-06, "loss": 0.6057, "step": 18646 }, { "epoch": 0.22294623321656165, "grad_norm": 2.545628309249878, "learning_rate": 9.055280209403392e-06, "loss": 0.6419, "step": 18647 }, { "epoch": 0.2229581893614223, "grad_norm": 5.5886054039001465, "learning_rate": 9.055166946455264e-06, "loss": 0.6645, "step": 18648 }, { "epoch": 0.22297014550628294, "grad_norm": 2.312976837158203, "learning_rate": 9.055053677426396e-06, "loss": 0.6799, "step": 18649 }, { "epoch": 0.2229821016511436, "grad_norm": 1.745427131652832, "learning_rate": 9.05494040231696e-06, "loss": 0.6212, "step": 18650 }, { "epoch": 0.22299405779600426, "grad_norm": 2.1221513748168945, "learning_rate": 9.054827121127128e-06, "loss": 0.6671, "step": 18651 }, { "epoch": 0.22300601394086492, "grad_norm": 2.167173385620117, "learning_rate": 9.054713833857069e-06, "loss": 0.6485, "step": 18652 }, { "epoch": 0.22301797008572555, "grad_norm": 2.3162765502929688, "learning_rate": 9.054600540506951e-06, "loss": 0.6263, "step": 18653 }, { "epoch": 0.2230299262305862, "grad_norm": 1.7899632453918457, "learning_rate": 9.054487241076946e-06, "loss": 0.6537, "step": 18654 }, { "epoch": 0.22304188237544686, "grad_norm": 3.960144519805908, "learning_rate": 9.054373935567222e-06, "loss": 0.7154, "step": 18655 }, { "epoch": 0.22305383852030752, "grad_norm": 2.187462091445923, "learning_rate": 9.05426062397795e-06, "loss": 0.6464, "step": 18656 }, { "epoch": 0.22306579466516815, "grad_norm": 2.9590954780578613, "learning_rate": 9.054147306309301e-06, "loss": 0.6232, "step": 18657 }, { "epoch": 0.2230777508100288, "grad_norm": 2.1166179180145264, "learning_rate": 9.054033982561443e-06, "loss": 0.5564, "step": 18658 }, { "epoch": 0.22308970695488947, "grad_norm": 2.7914376258850098, "learning_rate": 9.053920652734548e-06, "loss": 0.5014, "step": 18659 }, { "epoch": 0.22310166309975013, "grad_norm": 2.9676666259765625, "learning_rate": 9.053807316828781e-06, "loss": 0.6224, "step": 18660 }, { "epoch": 0.22311361924461076, "grad_norm": 2.6867971420288086, "learning_rate": 9.053693974844317e-06, "loss": 0.6431, "step": 18661 }, { "epoch": 0.22312557538947142, "grad_norm": 2.2990596294403076, "learning_rate": 9.053580626781326e-06, "loss": 0.6147, "step": 18662 }, { "epoch": 0.22313753153433208, "grad_norm": 3.5702333450317383, "learning_rate": 9.053467272639975e-06, "loss": 0.5659, "step": 18663 }, { "epoch": 0.2231494876791927, "grad_norm": 1.6476705074310303, "learning_rate": 9.053353912420437e-06, "loss": 0.634, "step": 18664 }, { "epoch": 0.22316144382405337, "grad_norm": 2.318638801574707, "learning_rate": 9.05324054612288e-06, "loss": 0.548, "step": 18665 }, { "epoch": 0.22317339996891403, "grad_norm": 7.311406135559082, "learning_rate": 9.053127173747473e-06, "loss": 0.6213, "step": 18666 }, { "epoch": 0.22318535611377469, "grad_norm": 2.8524856567382812, "learning_rate": 9.053013795294388e-06, "loss": 0.6699, "step": 18667 }, { "epoch": 0.22319731225863532, "grad_norm": 4.411436557769775, "learning_rate": 9.052900410763794e-06, "loss": 0.6323, "step": 18668 }, { "epoch": 0.22320926840349598, "grad_norm": 3.198502779006958, "learning_rate": 9.052787020155862e-06, "loss": 0.7105, "step": 18669 }, { "epoch": 0.22322122454835663, "grad_norm": 3.197096109390259, "learning_rate": 9.052673623470763e-06, "loss": 0.6137, "step": 18670 }, { "epoch": 0.2232331806932173, "grad_norm": 5.247410297393799, "learning_rate": 9.052560220708662e-06, "loss": 0.5693, "step": 18671 }, { "epoch": 0.22324513683807792, "grad_norm": 2.197723627090454, "learning_rate": 9.052446811869736e-06, "loss": 0.6657, "step": 18672 }, { "epoch": 0.22325709298293858, "grad_norm": 2.141369581222534, "learning_rate": 9.052333396954149e-06, "loss": 0.5792, "step": 18673 }, { "epoch": 0.22326904912779924, "grad_norm": 3.127283811569214, "learning_rate": 9.052219975962075e-06, "loss": 0.564, "step": 18674 }, { "epoch": 0.22328100527265987, "grad_norm": 2.007547616958618, "learning_rate": 9.052106548893681e-06, "loss": 0.6246, "step": 18675 }, { "epoch": 0.22329296141752053, "grad_norm": 1.4811019897460938, "learning_rate": 9.051993115749143e-06, "loss": 0.5512, "step": 18676 }, { "epoch": 0.2233049175623812, "grad_norm": 4.44301176071167, "learning_rate": 9.051879676528623e-06, "loss": 0.7075, "step": 18677 }, { "epoch": 0.22331687370724185, "grad_norm": 3.5506715774536133, "learning_rate": 9.051766231232296e-06, "loss": 0.582, "step": 18678 }, { "epoch": 0.22332882985210248, "grad_norm": 3.409954786300659, "learning_rate": 9.051652779860331e-06, "loss": 0.5694, "step": 18679 }, { "epoch": 0.22334078599696314, "grad_norm": 2.438716411590576, "learning_rate": 9.0515393224129e-06, "loss": 0.5665, "step": 18680 }, { "epoch": 0.2233527421418238, "grad_norm": 1.4491227865219116, "learning_rate": 9.05142585889017e-06, "loss": 0.5997, "step": 18681 }, { "epoch": 0.22336469828668445, "grad_norm": 2.26511812210083, "learning_rate": 9.051312389292311e-06, "loss": 0.6041, "step": 18682 }, { "epoch": 0.22337665443154509, "grad_norm": 3.1749839782714844, "learning_rate": 9.051198913619497e-06, "loss": 0.6769, "step": 18683 }, { "epoch": 0.22338861057640574, "grad_norm": 6.20889949798584, "learning_rate": 9.051085431871896e-06, "loss": 0.5308, "step": 18684 }, { "epoch": 0.2234005667212664, "grad_norm": 2.2929110527038574, "learning_rate": 9.050971944049676e-06, "loss": 0.5835, "step": 18685 }, { "epoch": 0.22341252286612703, "grad_norm": 2.4479215145111084, "learning_rate": 9.05085845015301e-06, "loss": 0.5695, "step": 18686 }, { "epoch": 0.2234244790109877, "grad_norm": 3.726055860519409, "learning_rate": 9.050744950182068e-06, "loss": 0.6188, "step": 18687 }, { "epoch": 0.22343643515584835, "grad_norm": 2.508378028869629, "learning_rate": 9.050631444137019e-06, "loss": 0.6481, "step": 18688 }, { "epoch": 0.223448391300709, "grad_norm": 2.2917988300323486, "learning_rate": 9.050517932018035e-06, "loss": 0.637, "step": 18689 }, { "epoch": 0.22346034744556964, "grad_norm": 4.588029384613037, "learning_rate": 9.050404413825282e-06, "loss": 0.565, "step": 18690 }, { "epoch": 0.2234723035904303, "grad_norm": 2.1501386165618896, "learning_rate": 9.050290889558936e-06, "loss": 0.5514, "step": 18691 }, { "epoch": 0.22348425973529096, "grad_norm": 2.581430196762085, "learning_rate": 9.050177359219164e-06, "loss": 0.6129, "step": 18692 }, { "epoch": 0.22349621588015162, "grad_norm": 2.1260905265808105, "learning_rate": 9.050063822806135e-06, "loss": 0.6712, "step": 18693 }, { "epoch": 0.22350817202501225, "grad_norm": 5.040638446807861, "learning_rate": 9.049950280320022e-06, "loss": 0.6062, "step": 18694 }, { "epoch": 0.2235201281698729, "grad_norm": 4.383869171142578, "learning_rate": 9.049836731760995e-06, "loss": 0.6624, "step": 18695 }, { "epoch": 0.22353208431473356, "grad_norm": 1.862061858177185, "learning_rate": 9.049723177129221e-06, "loss": 0.5176, "step": 18696 }, { "epoch": 0.2235440404595942, "grad_norm": 1.8568637371063232, "learning_rate": 9.049609616424875e-06, "loss": 0.6058, "step": 18697 }, { "epoch": 0.22355599660445485, "grad_norm": 2.877493381500244, "learning_rate": 9.049496049648124e-06, "loss": 0.6057, "step": 18698 }, { "epoch": 0.2235679527493155, "grad_norm": 3.0830771923065186, "learning_rate": 9.04938247679914e-06, "loss": 0.6078, "step": 18699 }, { "epoch": 0.22357990889417617, "grad_norm": 2.1443073749542236, "learning_rate": 9.049268897878091e-06, "loss": 0.5996, "step": 18700 }, { "epoch": 0.2235918650390368, "grad_norm": 1.8855836391448975, "learning_rate": 9.049155312885151e-06, "loss": 0.6404, "step": 18701 }, { "epoch": 0.22360382118389746, "grad_norm": 1.4250553846359253, "learning_rate": 9.049041721820488e-06, "loss": 0.5571, "step": 18702 }, { "epoch": 0.22361577732875812, "grad_norm": 1.5557893514633179, "learning_rate": 9.04892812468427e-06, "loss": 0.6119, "step": 18703 }, { "epoch": 0.22362773347361878, "grad_norm": 2.1767184734344482, "learning_rate": 9.048814521476673e-06, "loss": 0.6373, "step": 18704 }, { "epoch": 0.2236396896184794, "grad_norm": 1.4500741958618164, "learning_rate": 9.048700912197864e-06, "loss": 0.6383, "step": 18705 }, { "epoch": 0.22365164576334007, "grad_norm": 4.0024566650390625, "learning_rate": 9.048587296848012e-06, "loss": 0.6219, "step": 18706 }, { "epoch": 0.22366360190820073, "grad_norm": 2.2995357513427734, "learning_rate": 9.048473675427288e-06, "loss": 0.5929, "step": 18707 }, { "epoch": 0.22367555805306136, "grad_norm": 7.638989448547363, "learning_rate": 9.048360047935866e-06, "loss": 0.6528, "step": 18708 }, { "epoch": 0.22368751419792202, "grad_norm": 1.6353329420089722, "learning_rate": 9.048246414373912e-06, "loss": 0.4851, "step": 18709 }, { "epoch": 0.22369947034278267, "grad_norm": 1.9883793592453003, "learning_rate": 9.048132774741599e-06, "loss": 0.5973, "step": 18710 }, { "epoch": 0.22371142648764333, "grad_norm": 3.2928364276885986, "learning_rate": 9.048019129039096e-06, "loss": 0.5759, "step": 18711 }, { "epoch": 0.22372338263250396, "grad_norm": 1.8828247785568237, "learning_rate": 9.047905477266574e-06, "loss": 0.5491, "step": 18712 }, { "epoch": 0.22373533877736462, "grad_norm": 1.5777837038040161, "learning_rate": 9.047791819424204e-06, "loss": 0.7428, "step": 18713 }, { "epoch": 0.22374729492222528, "grad_norm": 1.9431737661361694, "learning_rate": 9.047678155512154e-06, "loss": 0.5571, "step": 18714 }, { "epoch": 0.22375925106708594, "grad_norm": 1.5665568113327026, "learning_rate": 9.047564485530598e-06, "loss": 0.702, "step": 18715 }, { "epoch": 0.22377120721194657, "grad_norm": 2.8306479454040527, "learning_rate": 9.047450809479704e-06, "loss": 0.6034, "step": 18716 }, { "epoch": 0.22378316335680723, "grad_norm": 2.8458096981048584, "learning_rate": 9.047337127359644e-06, "loss": 0.5262, "step": 18717 }, { "epoch": 0.2237951195016679, "grad_norm": 2.5968310832977295, "learning_rate": 9.047223439170584e-06, "loss": 0.6476, "step": 18718 }, { "epoch": 0.22380707564652855, "grad_norm": 3.591564178466797, "learning_rate": 9.047109744912702e-06, "loss": 0.5564, "step": 18719 }, { "epoch": 0.22381903179138918, "grad_norm": 1.6624345779418945, "learning_rate": 9.046996044586163e-06, "loss": 0.6621, "step": 18720 }, { "epoch": 0.22383098793624984, "grad_norm": 2.1219675540924072, "learning_rate": 9.04688233819114e-06, "loss": 0.5917, "step": 18721 }, { "epoch": 0.2238429440811105, "grad_norm": 2.5126161575317383, "learning_rate": 9.046768625727802e-06, "loss": 0.6863, "step": 18722 }, { "epoch": 0.22385490022597113, "grad_norm": 2.3586671352386475, "learning_rate": 9.046654907196319e-06, "loss": 0.6412, "step": 18723 }, { "epoch": 0.22386685637083178, "grad_norm": 1.5820350646972656, "learning_rate": 9.046541182596865e-06, "loss": 0.6184, "step": 18724 }, { "epoch": 0.22387881251569244, "grad_norm": 1.4231723546981812, "learning_rate": 9.046427451929607e-06, "loss": 0.5233, "step": 18725 }, { "epoch": 0.2238907686605531, "grad_norm": 1.9076029062271118, "learning_rate": 9.046313715194716e-06, "loss": 0.6701, "step": 18726 }, { "epoch": 0.22390272480541373, "grad_norm": 3.8848676681518555, "learning_rate": 9.046199972392364e-06, "loss": 0.6637, "step": 18727 }, { "epoch": 0.2239146809502744, "grad_norm": 7.787550449371338, "learning_rate": 9.046086223522721e-06, "loss": 0.748, "step": 18728 }, { "epoch": 0.22392663709513505, "grad_norm": 2.951140880584717, "learning_rate": 9.045972468585954e-06, "loss": 0.6164, "step": 18729 }, { "epoch": 0.2239385932399957, "grad_norm": 2.0685322284698486, "learning_rate": 9.045858707582241e-06, "loss": 0.6279, "step": 18730 }, { "epoch": 0.22395054938485634, "grad_norm": 2.450870990753174, "learning_rate": 9.045744940511746e-06, "loss": 0.567, "step": 18731 }, { "epoch": 0.223962505529717, "grad_norm": 2.1299984455108643, "learning_rate": 9.045631167374644e-06, "loss": 0.6208, "step": 18732 }, { "epoch": 0.22397446167457766, "grad_norm": 1.6287208795547485, "learning_rate": 9.045517388171103e-06, "loss": 0.6162, "step": 18733 }, { "epoch": 0.2239864178194383, "grad_norm": 2.1796250343322754, "learning_rate": 9.045403602901296e-06, "loss": 0.695, "step": 18734 }, { "epoch": 0.22399837396429895, "grad_norm": 9.530107498168945, "learning_rate": 9.045289811565389e-06, "loss": 0.5073, "step": 18735 }, { "epoch": 0.2240103301091596, "grad_norm": 2.4988632202148438, "learning_rate": 9.045176014163557e-06, "loss": 0.5866, "step": 18736 }, { "epoch": 0.22402228625402026, "grad_norm": 5.636115550994873, "learning_rate": 9.045062210695968e-06, "loss": 0.706, "step": 18737 }, { "epoch": 0.2240342423988809, "grad_norm": 3.5236868858337402, "learning_rate": 9.044948401162795e-06, "loss": 0.681, "step": 18738 }, { "epoch": 0.22404619854374155, "grad_norm": 2.7886316776275635, "learning_rate": 9.044834585564207e-06, "loss": 0.6267, "step": 18739 }, { "epoch": 0.2240581546886022, "grad_norm": 1.7042744159698486, "learning_rate": 9.044720763900375e-06, "loss": 0.5354, "step": 18740 }, { "epoch": 0.22407011083346287, "grad_norm": 2.323763370513916, "learning_rate": 9.044606936171469e-06, "loss": 0.6157, "step": 18741 }, { "epoch": 0.2240820669783235, "grad_norm": 3.170116901397705, "learning_rate": 9.044493102377662e-06, "loss": 0.6729, "step": 18742 }, { "epoch": 0.22409402312318416, "grad_norm": 2.3241026401519775, "learning_rate": 9.044379262519124e-06, "loss": 0.583, "step": 18743 }, { "epoch": 0.22410597926804482, "grad_norm": 1.6182410717010498, "learning_rate": 9.044265416596023e-06, "loss": 0.5432, "step": 18744 }, { "epoch": 0.22411793541290545, "grad_norm": 2.6487467288970947, "learning_rate": 9.044151564608532e-06, "loss": 0.5415, "step": 18745 }, { "epoch": 0.2241298915577661, "grad_norm": 2.3010847568511963, "learning_rate": 9.044037706556824e-06, "loss": 0.5038, "step": 18746 }, { "epoch": 0.22414184770262677, "grad_norm": 2.2327513694763184, "learning_rate": 9.043923842441064e-06, "loss": 0.6098, "step": 18747 }, { "epoch": 0.22415380384748743, "grad_norm": 1.6778273582458496, "learning_rate": 9.043809972261426e-06, "loss": 0.5634, "step": 18748 }, { "epoch": 0.22416575999234806, "grad_norm": 14.3764066696167, "learning_rate": 9.043696096018081e-06, "loss": 0.6236, "step": 18749 }, { "epoch": 0.22417771613720872, "grad_norm": 1.3868573904037476, "learning_rate": 9.0435822137112e-06, "loss": 0.5373, "step": 18750 }, { "epoch": 0.22418967228206937, "grad_norm": 3.8171184062957764, "learning_rate": 9.043468325340952e-06, "loss": 0.5725, "step": 18751 }, { "epoch": 0.22420162842693003, "grad_norm": 1.6512064933776855, "learning_rate": 9.04335443090751e-06, "loss": 0.6217, "step": 18752 }, { "epoch": 0.22421358457179066, "grad_norm": 4.720926284790039, "learning_rate": 9.043240530411043e-06, "loss": 0.5864, "step": 18753 }, { "epoch": 0.22422554071665132, "grad_norm": 3.213655948638916, "learning_rate": 9.043126623851723e-06, "loss": 0.6463, "step": 18754 }, { "epoch": 0.22423749686151198, "grad_norm": 3.123499870300293, "learning_rate": 9.04301271122972e-06, "loss": 0.5918, "step": 18755 }, { "epoch": 0.2242494530063726, "grad_norm": 2.0311689376831055, "learning_rate": 9.042898792545206e-06, "loss": 0.6848, "step": 18756 }, { "epoch": 0.22426140915123327, "grad_norm": 3.275737762451172, "learning_rate": 9.042784867798348e-06, "loss": 0.6165, "step": 18757 }, { "epoch": 0.22427336529609393, "grad_norm": 2.5647213459014893, "learning_rate": 9.042670936989322e-06, "loss": 0.6416, "step": 18758 }, { "epoch": 0.2242853214409546, "grad_norm": 5.004822731018066, "learning_rate": 9.042557000118298e-06, "loss": 0.539, "step": 18759 }, { "epoch": 0.22429727758581522, "grad_norm": 2.3052423000335693, "learning_rate": 9.042443057185442e-06, "loss": 0.706, "step": 18760 }, { "epoch": 0.22430923373067588, "grad_norm": 2.2295544147491455, "learning_rate": 9.04232910819093e-06, "loss": 0.5684, "step": 18761 }, { "epoch": 0.22432118987553654, "grad_norm": 8.316160202026367, "learning_rate": 9.04221515313493e-06, "loss": 0.552, "step": 18762 }, { "epoch": 0.2243331460203972, "grad_norm": 1.4738988876342773, "learning_rate": 9.042101192017616e-06, "loss": 0.6444, "step": 18763 }, { "epoch": 0.22434510216525783, "grad_norm": 2.105729818344116, "learning_rate": 9.041987224839157e-06, "loss": 0.4602, "step": 18764 }, { "epoch": 0.22435705831011848, "grad_norm": 6.9587578773498535, "learning_rate": 9.041873251599722e-06, "loss": 0.6407, "step": 18765 }, { "epoch": 0.22436901445497914, "grad_norm": 1.8912978172302246, "learning_rate": 9.041759272299485e-06, "loss": 0.6379, "step": 18766 }, { "epoch": 0.22438097059983977, "grad_norm": 1.927071213722229, "learning_rate": 9.041645286938614e-06, "loss": 0.6956, "step": 18767 }, { "epoch": 0.22439292674470043, "grad_norm": 2.1962196826934814, "learning_rate": 9.041531295517282e-06, "loss": 0.5725, "step": 18768 }, { "epoch": 0.2244048828895611, "grad_norm": 2.5374755859375, "learning_rate": 9.04141729803566e-06, "loss": 0.7442, "step": 18769 }, { "epoch": 0.22441683903442175, "grad_norm": 2.277027130126953, "learning_rate": 9.041303294493919e-06, "loss": 0.5821, "step": 18770 }, { "epoch": 0.22442879517928238, "grad_norm": 1.7882440090179443, "learning_rate": 9.041189284892228e-06, "loss": 0.5072, "step": 18771 }, { "epoch": 0.22444075132414304, "grad_norm": 1.902467966079712, "learning_rate": 9.04107526923076e-06, "loss": 0.5224, "step": 18772 }, { "epoch": 0.2244527074690037, "grad_norm": 2.679762363433838, "learning_rate": 9.040961247509685e-06, "loss": 0.6502, "step": 18773 }, { "epoch": 0.22446466361386436, "grad_norm": 2.8388357162475586, "learning_rate": 9.040847219729174e-06, "loss": 0.678, "step": 18774 }, { "epoch": 0.224476619758725, "grad_norm": 1.7716872692108154, "learning_rate": 9.0407331858894e-06, "loss": 0.548, "step": 18775 }, { "epoch": 0.22448857590358565, "grad_norm": 2.1939010620117188, "learning_rate": 9.04061914599053e-06, "loss": 0.6137, "step": 18776 }, { "epoch": 0.2245005320484463, "grad_norm": 3.7643518447875977, "learning_rate": 9.040505100032739e-06, "loss": 0.5142, "step": 18777 }, { "epoch": 0.22451248819330696, "grad_norm": 2.790205478668213, "learning_rate": 9.040391048016195e-06, "loss": 0.6186, "step": 18778 }, { "epoch": 0.2245244443381676, "grad_norm": 1.9310029745101929, "learning_rate": 9.04027698994107e-06, "loss": 0.663, "step": 18779 }, { "epoch": 0.22453640048302825, "grad_norm": 2.671107053756714, "learning_rate": 9.040162925807536e-06, "loss": 0.6212, "step": 18780 }, { "epoch": 0.2245483566278889, "grad_norm": 2.2393691539764404, "learning_rate": 9.040048855615764e-06, "loss": 0.6227, "step": 18781 }, { "epoch": 0.22456031277274954, "grad_norm": 6.306204795837402, "learning_rate": 9.039934779365924e-06, "loss": 0.587, "step": 18782 }, { "epoch": 0.2245722689176102, "grad_norm": 3.6410560607910156, "learning_rate": 9.039820697058185e-06, "loss": 0.6724, "step": 18783 }, { "epoch": 0.22458422506247086, "grad_norm": 2.1179540157318115, "learning_rate": 9.039706608692725e-06, "loss": 0.7013, "step": 18784 }, { "epoch": 0.22459618120733152, "grad_norm": 6.90822696685791, "learning_rate": 9.039592514269708e-06, "loss": 0.5997, "step": 18785 }, { "epoch": 0.22460813735219215, "grad_norm": 1.6655246019363403, "learning_rate": 9.039478413789308e-06, "loss": 0.585, "step": 18786 }, { "epoch": 0.2246200934970528, "grad_norm": 2.2977919578552246, "learning_rate": 9.039364307251695e-06, "loss": 0.6531, "step": 18787 }, { "epoch": 0.22463204964191347, "grad_norm": 2.3103301525115967, "learning_rate": 9.039250194657041e-06, "loss": 0.559, "step": 18788 }, { "epoch": 0.22464400578677413, "grad_norm": 2.1557116508483887, "learning_rate": 9.039136076005518e-06, "loss": 0.5593, "step": 18789 }, { "epoch": 0.22465596193163476, "grad_norm": 2.9442451000213623, "learning_rate": 9.039021951297296e-06, "loss": 0.6277, "step": 18790 }, { "epoch": 0.22466791807649542, "grad_norm": 10.548810958862305, "learning_rate": 9.038907820532546e-06, "loss": 0.5474, "step": 18791 }, { "epoch": 0.22467987422135607, "grad_norm": 2.6294002532958984, "learning_rate": 9.038793683711437e-06, "loss": 0.6607, "step": 18792 }, { "epoch": 0.2246918303662167, "grad_norm": 8.291314125061035, "learning_rate": 9.038679540834146e-06, "loss": 0.5462, "step": 18793 }, { "epoch": 0.22470378651107736, "grad_norm": 2.575251817703247, "learning_rate": 9.03856539190084e-06, "loss": 0.6592, "step": 18794 }, { "epoch": 0.22471574265593802, "grad_norm": 3.743621349334717, "learning_rate": 9.03845123691169e-06, "loss": 0.5589, "step": 18795 }, { "epoch": 0.22472769880079868, "grad_norm": 4.755975723266602, "learning_rate": 9.038337075866868e-06, "loss": 0.6359, "step": 18796 }, { "epoch": 0.2247396549456593, "grad_norm": 2.276256561279297, "learning_rate": 9.038222908766546e-06, "loss": 0.546, "step": 18797 }, { "epoch": 0.22475161109051997, "grad_norm": 6.430281162261963, "learning_rate": 9.038108735610894e-06, "loss": 0.6342, "step": 18798 }, { "epoch": 0.22476356723538063, "grad_norm": 1.766185998916626, "learning_rate": 9.037994556400082e-06, "loss": 0.6449, "step": 18799 }, { "epoch": 0.2247755233802413, "grad_norm": 4.475908279418945, "learning_rate": 9.037880371134285e-06, "loss": 0.7416, "step": 18800 }, { "epoch": 0.22478747952510192, "grad_norm": 3.5268568992614746, "learning_rate": 9.037766179813672e-06, "loss": 0.609, "step": 18801 }, { "epoch": 0.22479943566996258, "grad_norm": 2.287588357925415, "learning_rate": 9.037651982438412e-06, "loss": 0.6913, "step": 18802 }, { "epoch": 0.22481139181482324, "grad_norm": 3.494856834411621, "learning_rate": 9.03753777900868e-06, "loss": 0.5474, "step": 18803 }, { "epoch": 0.22482334795968387, "grad_norm": 3.4428749084472656, "learning_rate": 9.037423569524646e-06, "loss": 0.6979, "step": 18804 }, { "epoch": 0.22483530410454453, "grad_norm": 2.0271060466766357, "learning_rate": 9.03730935398648e-06, "loss": 0.7112, "step": 18805 }, { "epoch": 0.22484726024940518, "grad_norm": 1.7470437288284302, "learning_rate": 9.037195132394357e-06, "loss": 0.6719, "step": 18806 }, { "epoch": 0.22485921639426584, "grad_norm": 6.471532344818115, "learning_rate": 9.037080904748443e-06, "loss": 0.6026, "step": 18807 }, { "epoch": 0.22487117253912647, "grad_norm": 1.7380304336547852, "learning_rate": 9.036966671048912e-06, "loss": 0.5718, "step": 18808 }, { "epoch": 0.22488312868398713, "grad_norm": 2.135296106338501, "learning_rate": 9.036852431295934e-06, "loss": 0.6204, "step": 18809 }, { "epoch": 0.2248950848288478, "grad_norm": 1.6141303777694702, "learning_rate": 9.036738185489683e-06, "loss": 0.5815, "step": 18810 }, { "epoch": 0.22490704097370845, "grad_norm": 2.069645643234253, "learning_rate": 9.03662393363033e-06, "loss": 0.6688, "step": 18811 }, { "epoch": 0.22491899711856908, "grad_norm": 2.133166790008545, "learning_rate": 9.036509675718044e-06, "loss": 0.5564, "step": 18812 }, { "epoch": 0.22493095326342974, "grad_norm": 1.6744663715362549, "learning_rate": 9.036395411752997e-06, "loss": 0.5779, "step": 18813 }, { "epoch": 0.2249429094082904, "grad_norm": 2.5337939262390137, "learning_rate": 9.036281141735361e-06, "loss": 0.6082, "step": 18814 }, { "epoch": 0.22495486555315103, "grad_norm": 2.8074560165405273, "learning_rate": 9.036166865665307e-06, "loss": 0.5799, "step": 18815 }, { "epoch": 0.2249668216980117, "grad_norm": 1.4428327083587646, "learning_rate": 9.036052583543007e-06, "loss": 0.5393, "step": 18816 }, { "epoch": 0.22497877784287235, "grad_norm": 2.4917819499969482, "learning_rate": 9.035938295368632e-06, "loss": 0.5539, "step": 18817 }, { "epoch": 0.224990733987733, "grad_norm": 3.0704538822174072, "learning_rate": 9.03582400114235e-06, "loss": 0.586, "step": 18818 }, { "epoch": 0.22500269013259364, "grad_norm": 3.3324713706970215, "learning_rate": 9.03570970086434e-06, "loss": 0.7189, "step": 18819 }, { "epoch": 0.2250146462774543, "grad_norm": 2.455375909805298, "learning_rate": 9.035595394534767e-06, "loss": 0.6056, "step": 18820 }, { "epoch": 0.22502660242231495, "grad_norm": 5.823586940765381, "learning_rate": 9.035481082153803e-06, "loss": 0.6125, "step": 18821 }, { "epoch": 0.2250385585671756, "grad_norm": 2.625411033630371, "learning_rate": 9.035366763721623e-06, "loss": 0.6597, "step": 18822 }, { "epoch": 0.22505051471203624, "grad_norm": 2.8159165382385254, "learning_rate": 9.035252439238396e-06, "loss": 0.5987, "step": 18823 }, { "epoch": 0.2250624708568969, "grad_norm": 3.6305060386657715, "learning_rate": 9.035138108704292e-06, "loss": 0.6551, "step": 18824 }, { "epoch": 0.22507442700175756, "grad_norm": 3.1804287433624268, "learning_rate": 9.035023772119485e-06, "loss": 0.6288, "step": 18825 }, { "epoch": 0.2250863831466182, "grad_norm": 1.724351406097412, "learning_rate": 9.034909429484144e-06, "loss": 0.5641, "step": 18826 }, { "epoch": 0.22509833929147885, "grad_norm": 3.3282058238983154, "learning_rate": 9.034795080798444e-06, "loss": 0.6993, "step": 18827 }, { "epoch": 0.2251102954363395, "grad_norm": 2.955960512161255, "learning_rate": 9.034680726062555e-06, "loss": 0.5702, "step": 18828 }, { "epoch": 0.22512225158120017, "grad_norm": 1.535058856010437, "learning_rate": 9.034566365276647e-06, "loss": 0.6319, "step": 18829 }, { "epoch": 0.2251342077260608, "grad_norm": 2.6327860355377197, "learning_rate": 9.034451998440892e-06, "loss": 0.6032, "step": 18830 }, { "epoch": 0.22514616387092146, "grad_norm": 1.8295557498931885, "learning_rate": 9.03433762555546e-06, "loss": 0.6081, "step": 18831 }, { "epoch": 0.22515812001578211, "grad_norm": 5.37270450592041, "learning_rate": 9.034223246620528e-06, "loss": 0.5511, "step": 18832 }, { "epoch": 0.22517007616064277, "grad_norm": 23.6578426361084, "learning_rate": 9.034108861636262e-06, "loss": 0.5659, "step": 18833 }, { "epoch": 0.2251820323055034, "grad_norm": 2.4622433185577393, "learning_rate": 9.033994470602836e-06, "loss": 0.5648, "step": 18834 }, { "epoch": 0.22519398845036406, "grad_norm": 3.3317666053771973, "learning_rate": 9.03388007352042e-06, "loss": 0.6801, "step": 18835 }, { "epoch": 0.22520594459522472, "grad_norm": 3.4240715503692627, "learning_rate": 9.033765670389187e-06, "loss": 0.5746, "step": 18836 }, { "epoch": 0.22521790074008538, "grad_norm": 1.7341455221176147, "learning_rate": 9.033651261209308e-06, "loss": 0.6171, "step": 18837 }, { "epoch": 0.225229856884946, "grad_norm": 2.2947378158569336, "learning_rate": 9.033536845980955e-06, "loss": 0.721, "step": 18838 }, { "epoch": 0.22524181302980667, "grad_norm": 3.9374189376831055, "learning_rate": 9.033422424704301e-06, "loss": 0.7233, "step": 18839 }, { "epoch": 0.22525376917466733, "grad_norm": 10.327632904052734, "learning_rate": 9.033307997379513e-06, "loss": 0.6159, "step": 18840 }, { "epoch": 0.22526572531952796, "grad_norm": 2.535433769226074, "learning_rate": 9.033193564006765e-06, "loss": 0.6338, "step": 18841 }, { "epoch": 0.22527768146438862, "grad_norm": 2.6965742111206055, "learning_rate": 9.03307912458623e-06, "loss": 0.5763, "step": 18842 }, { "epoch": 0.22528963760924928, "grad_norm": 1.6037347316741943, "learning_rate": 9.032964679118079e-06, "loss": 0.6329, "step": 18843 }, { "epoch": 0.22530159375410994, "grad_norm": 9.141597747802734, "learning_rate": 9.032850227602484e-06, "loss": 0.6954, "step": 18844 }, { "epoch": 0.22531354989897057, "grad_norm": 1.9686808586120605, "learning_rate": 9.032735770039614e-06, "loss": 0.5282, "step": 18845 }, { "epoch": 0.22532550604383123, "grad_norm": 3.314173936843872, "learning_rate": 9.03262130642964e-06, "loss": 0.6876, "step": 18846 }, { "epoch": 0.22533746218869188, "grad_norm": 4.929769515991211, "learning_rate": 9.03250683677274e-06, "loss": 0.5738, "step": 18847 }, { "epoch": 0.22534941833355254, "grad_norm": 2.01064133644104, "learning_rate": 9.032392361069081e-06, "loss": 0.5863, "step": 18848 }, { "epoch": 0.22536137447841317, "grad_norm": 1.2261115312576294, "learning_rate": 9.032277879318835e-06, "loss": 0.4752, "step": 18849 }, { "epoch": 0.22537333062327383, "grad_norm": 3.4153034687042236, "learning_rate": 9.032163391522172e-06, "loss": 0.6489, "step": 18850 }, { "epoch": 0.2253852867681345, "grad_norm": 5.294187545776367, "learning_rate": 9.032048897679269e-06, "loss": 0.5808, "step": 18851 }, { "epoch": 0.22539724291299512, "grad_norm": 3.750014305114746, "learning_rate": 9.031934397790291e-06, "loss": 0.6222, "step": 18852 }, { "epoch": 0.22540919905785578, "grad_norm": 2.3394217491149902, "learning_rate": 9.031819891855416e-06, "loss": 0.6429, "step": 18853 }, { "epoch": 0.22542115520271644, "grad_norm": 1.728298544883728, "learning_rate": 9.031705379874813e-06, "loss": 0.5984, "step": 18854 }, { "epoch": 0.2254331113475771, "grad_norm": 1.8707927465438843, "learning_rate": 9.03159086184865e-06, "loss": 0.5427, "step": 18855 }, { "epoch": 0.22544506749243773, "grad_norm": 1.508461356163025, "learning_rate": 9.031476337777106e-06, "loss": 0.563, "step": 18856 }, { "epoch": 0.2254570236372984, "grad_norm": 1.797439694404602, "learning_rate": 9.031361807660347e-06, "loss": 0.5944, "step": 18857 }, { "epoch": 0.22546897978215905, "grad_norm": 2.854966163635254, "learning_rate": 9.031247271498545e-06, "loss": 0.5986, "step": 18858 }, { "epoch": 0.2254809359270197, "grad_norm": 3.4729297161102295, "learning_rate": 9.031132729291876e-06, "loss": 0.6001, "step": 18859 }, { "epoch": 0.22549289207188034, "grad_norm": 2.1644115447998047, "learning_rate": 9.031018181040507e-06, "loss": 0.6197, "step": 18860 }, { "epoch": 0.225504848216741, "grad_norm": 2.3818180561065674, "learning_rate": 9.030903626744614e-06, "loss": 0.5995, "step": 18861 }, { "epoch": 0.22551680436160165, "grad_norm": 2.1168503761291504, "learning_rate": 9.030789066404366e-06, "loss": 0.5409, "step": 18862 }, { "epoch": 0.22552876050646228, "grad_norm": 2.0196733474731445, "learning_rate": 9.030674500019936e-06, "loss": 0.6235, "step": 18863 }, { "epoch": 0.22554071665132294, "grad_norm": 2.1473960876464844, "learning_rate": 9.030559927591494e-06, "loss": 0.6137, "step": 18864 }, { "epoch": 0.2255526727961836, "grad_norm": 1.9294761419296265, "learning_rate": 9.030445349119214e-06, "loss": 0.6492, "step": 18865 }, { "epoch": 0.22556462894104426, "grad_norm": 1.725127100944519, "learning_rate": 9.030330764603266e-06, "loss": 0.7027, "step": 18866 }, { "epoch": 0.2255765850859049, "grad_norm": 1.7005069255828857, "learning_rate": 9.030216174043823e-06, "loss": 0.6148, "step": 18867 }, { "epoch": 0.22558854123076555, "grad_norm": 2.1449074745178223, "learning_rate": 9.030101577441057e-06, "loss": 0.5874, "step": 18868 }, { "epoch": 0.2256004973756262, "grad_norm": 2.6968894004821777, "learning_rate": 9.029986974795138e-06, "loss": 0.66, "step": 18869 }, { "epoch": 0.22561245352048687, "grad_norm": 2.473567247390747, "learning_rate": 9.029872366106242e-06, "loss": 0.6654, "step": 18870 }, { "epoch": 0.2256244096653475, "grad_norm": 1.9916322231292725, "learning_rate": 9.029757751374535e-06, "loss": 0.5539, "step": 18871 }, { "epoch": 0.22563636581020816, "grad_norm": 1.5586638450622559, "learning_rate": 9.029643130600193e-06, "loss": 0.5907, "step": 18872 }, { "epoch": 0.22564832195506881, "grad_norm": 2.323270082473755, "learning_rate": 9.029528503783386e-06, "loss": 0.6415, "step": 18873 }, { "epoch": 0.22566027809992945, "grad_norm": 2.0698039531707764, "learning_rate": 9.029413870924288e-06, "loss": 0.5693, "step": 18874 }, { "epoch": 0.2256722342447901, "grad_norm": 2.076514720916748, "learning_rate": 9.02929923202307e-06, "loss": 0.5756, "step": 18875 }, { "epoch": 0.22568419038965076, "grad_norm": 2.214362382888794, "learning_rate": 9.029184587079904e-06, "loss": 0.5955, "step": 18876 }, { "epoch": 0.22569614653451142, "grad_norm": 1.8271294832229614, "learning_rate": 9.02906993609496e-06, "loss": 0.5885, "step": 18877 }, { "epoch": 0.22570810267937205, "grad_norm": 1.9669982194900513, "learning_rate": 9.028955279068411e-06, "loss": 0.5622, "step": 18878 }, { "epoch": 0.2257200588242327, "grad_norm": 2.420182466506958, "learning_rate": 9.02884061600043e-06, "loss": 0.6143, "step": 18879 }, { "epoch": 0.22573201496909337, "grad_norm": 3.8787620067596436, "learning_rate": 9.028725946891188e-06, "loss": 0.5891, "step": 18880 }, { "epoch": 0.22574397111395403, "grad_norm": 2.0933637619018555, "learning_rate": 9.028611271740857e-06, "loss": 0.6048, "step": 18881 }, { "epoch": 0.22575592725881466, "grad_norm": 2.7958929538726807, "learning_rate": 9.02849659054961e-06, "loss": 0.5697, "step": 18882 }, { "epoch": 0.22576788340367532, "grad_norm": 1.553810715675354, "learning_rate": 9.028381903317615e-06, "loss": 0.5401, "step": 18883 }, { "epoch": 0.22577983954853598, "grad_norm": 2.5281803607940674, "learning_rate": 9.02826721004505e-06, "loss": 0.6127, "step": 18884 }, { "epoch": 0.2257917956933966, "grad_norm": 2.067383289337158, "learning_rate": 9.028152510732083e-06, "loss": 0.5808, "step": 18885 }, { "epoch": 0.22580375183825727, "grad_norm": 6.052528381347656, "learning_rate": 9.028037805378888e-06, "loss": 0.6207, "step": 18886 }, { "epoch": 0.22581570798311792, "grad_norm": 2.2576420307159424, "learning_rate": 9.027923093985636e-06, "loss": 0.5982, "step": 18887 }, { "epoch": 0.22582766412797858, "grad_norm": 2.6074812412261963, "learning_rate": 9.027808376552497e-06, "loss": 0.5934, "step": 18888 }, { "epoch": 0.22583962027283921, "grad_norm": 1.857485294342041, "learning_rate": 9.027693653079645e-06, "loss": 0.6638, "step": 18889 }, { "epoch": 0.22585157641769987, "grad_norm": 3.2269105911254883, "learning_rate": 9.027578923567254e-06, "loss": 0.6648, "step": 18890 }, { "epoch": 0.22586353256256053, "grad_norm": 4.01479434967041, "learning_rate": 9.027464188015493e-06, "loss": 0.6232, "step": 18891 }, { "epoch": 0.2258754887074212, "grad_norm": 2.656973123550415, "learning_rate": 9.027349446424535e-06, "loss": 0.6291, "step": 18892 }, { "epoch": 0.22588744485228182, "grad_norm": 1.9205151796340942, "learning_rate": 9.027234698794553e-06, "loss": 0.6116, "step": 18893 }, { "epoch": 0.22589940099714248, "grad_norm": 2.0946743488311768, "learning_rate": 9.02711994512572e-06, "loss": 0.698, "step": 18894 }, { "epoch": 0.22591135714200314, "grad_norm": 2.693511962890625, "learning_rate": 9.027005185418203e-06, "loss": 0.7355, "step": 18895 }, { "epoch": 0.2259233132868638, "grad_norm": 5.498383522033691, "learning_rate": 9.026890419672178e-06, "loss": 0.6504, "step": 18896 }, { "epoch": 0.22593526943172443, "grad_norm": 5.973578453063965, "learning_rate": 9.026775647887818e-06, "loss": 0.655, "step": 18897 }, { "epoch": 0.2259472255765851, "grad_norm": 2.833559274673462, "learning_rate": 9.026660870065291e-06, "loss": 0.6055, "step": 18898 }, { "epoch": 0.22595918172144575, "grad_norm": 2.592529296875, "learning_rate": 9.026546086204776e-06, "loss": 0.5835, "step": 18899 }, { "epoch": 0.22597113786630638, "grad_norm": 1.6792888641357422, "learning_rate": 9.026431296306437e-06, "loss": 0.5787, "step": 18900 }, { "epoch": 0.22598309401116703, "grad_norm": 4.203064441680908, "learning_rate": 9.026316500370452e-06, "loss": 0.6401, "step": 18901 }, { "epoch": 0.2259950501560277, "grad_norm": 1.826296329498291, "learning_rate": 9.026201698396991e-06, "loss": 0.6752, "step": 18902 }, { "epoch": 0.22600700630088835, "grad_norm": 3.0597636699676514, "learning_rate": 9.026086890386225e-06, "loss": 0.5905, "step": 18903 }, { "epoch": 0.22601896244574898, "grad_norm": 2.5116984844207764, "learning_rate": 9.025972076338327e-06, "loss": 0.6826, "step": 18904 }, { "epoch": 0.22603091859060964, "grad_norm": 5.146886825561523, "learning_rate": 9.02585725625347e-06, "loss": 0.6402, "step": 18905 }, { "epoch": 0.2260428747354703, "grad_norm": 2.011467456817627, "learning_rate": 9.025742430131827e-06, "loss": 0.6089, "step": 18906 }, { "epoch": 0.22605483088033096, "grad_norm": 2.512603998184204, "learning_rate": 9.025627597973567e-06, "loss": 0.639, "step": 18907 }, { "epoch": 0.2260667870251916, "grad_norm": 2.3530099391937256, "learning_rate": 9.025512759778867e-06, "loss": 0.5552, "step": 18908 }, { "epoch": 0.22607874317005225, "grad_norm": 1.5570019483566284, "learning_rate": 9.025397915547892e-06, "loss": 0.5923, "step": 18909 }, { "epoch": 0.2260906993149129, "grad_norm": 1.511894702911377, "learning_rate": 9.025283065280822e-06, "loss": 0.6685, "step": 18910 }, { "epoch": 0.22610265545977354, "grad_norm": 1.918135643005371, "learning_rate": 9.025168208977826e-06, "loss": 0.6348, "step": 18911 }, { "epoch": 0.2261146116046342, "grad_norm": 2.9329702854156494, "learning_rate": 9.025053346639075e-06, "loss": 0.6786, "step": 18912 }, { "epoch": 0.22612656774949486, "grad_norm": 2.115880012512207, "learning_rate": 9.024938478264742e-06, "loss": 0.5769, "step": 18913 }, { "epoch": 0.22613852389435551, "grad_norm": 1.6951812505722046, "learning_rate": 9.024823603854999e-06, "loss": 0.5276, "step": 18914 }, { "epoch": 0.22615048003921615, "grad_norm": 2.5220391750335693, "learning_rate": 9.024708723410019e-06, "loss": 0.6954, "step": 18915 }, { "epoch": 0.2261624361840768, "grad_norm": 2.116536855697632, "learning_rate": 9.024593836929974e-06, "loss": 0.6049, "step": 18916 }, { "epoch": 0.22617439232893746, "grad_norm": 5.358249664306641, "learning_rate": 9.024478944415036e-06, "loss": 0.6073, "step": 18917 }, { "epoch": 0.22618634847379812, "grad_norm": 2.2497808933258057, "learning_rate": 9.02436404586538e-06, "loss": 0.6447, "step": 18918 }, { "epoch": 0.22619830461865875, "grad_norm": 1.6340268850326538, "learning_rate": 9.024249141281172e-06, "loss": 0.5931, "step": 18919 }, { "epoch": 0.2262102607635194, "grad_norm": 1.8443267345428467, "learning_rate": 9.02413423066259e-06, "loss": 0.6814, "step": 18920 }, { "epoch": 0.22622221690838007, "grad_norm": 3.1319422721862793, "learning_rate": 9.024019314009803e-06, "loss": 0.6298, "step": 18921 }, { "epoch": 0.2262341730532407, "grad_norm": 5.264586925506592, "learning_rate": 9.023904391322987e-06, "loss": 0.6085, "step": 18922 }, { "epoch": 0.22624612919810136, "grad_norm": 3.1523993015289307, "learning_rate": 9.02378946260231e-06, "loss": 0.6332, "step": 18923 }, { "epoch": 0.22625808534296202, "grad_norm": 2.795562267303467, "learning_rate": 9.02367452784795e-06, "loss": 0.6815, "step": 18924 }, { "epoch": 0.22627004148782268, "grad_norm": 5.911788463592529, "learning_rate": 9.023559587060071e-06, "loss": 0.6805, "step": 18925 }, { "epoch": 0.2262819976326833, "grad_norm": 3.0494861602783203, "learning_rate": 9.023444640238853e-06, "loss": 0.6015, "step": 18926 }, { "epoch": 0.22629395377754397, "grad_norm": 1.9225165843963623, "learning_rate": 9.023329687384465e-06, "loss": 0.5686, "step": 18927 }, { "epoch": 0.22630590992240462, "grad_norm": 1.4986802339553833, "learning_rate": 9.02321472849708e-06, "loss": 0.6161, "step": 18928 }, { "epoch": 0.22631786606726528, "grad_norm": 1.6568703651428223, "learning_rate": 9.023099763576869e-06, "loss": 0.6789, "step": 18929 }, { "epoch": 0.22632982221212591, "grad_norm": 1.989066481590271, "learning_rate": 9.022984792624006e-06, "loss": 0.56, "step": 18930 }, { "epoch": 0.22634177835698657, "grad_norm": 3.0484707355499268, "learning_rate": 9.022869815638664e-06, "loss": 0.6861, "step": 18931 }, { "epoch": 0.22635373450184723, "grad_norm": 3.4032363891601562, "learning_rate": 9.022754832621014e-06, "loss": 0.5879, "step": 18932 }, { "epoch": 0.22636569064670786, "grad_norm": 1.7159329652786255, "learning_rate": 9.022639843571228e-06, "loss": 0.5554, "step": 18933 }, { "epoch": 0.22637764679156852, "grad_norm": 2.4214909076690674, "learning_rate": 9.02252484848948e-06, "loss": 0.7198, "step": 18934 }, { "epoch": 0.22638960293642918, "grad_norm": 2.285116195678711, "learning_rate": 9.022409847375941e-06, "loss": 0.589, "step": 18935 }, { "epoch": 0.22640155908128984, "grad_norm": 1.8715646266937256, "learning_rate": 9.022294840230785e-06, "loss": 0.6296, "step": 18936 }, { "epoch": 0.22641351522615047, "grad_norm": 2.2649574279785156, "learning_rate": 9.022179827054183e-06, "loss": 0.5931, "step": 18937 }, { "epoch": 0.22642547137101113, "grad_norm": 2.5017707347869873, "learning_rate": 9.022064807846309e-06, "loss": 0.5707, "step": 18938 }, { "epoch": 0.2264374275158718, "grad_norm": 2.9118869304656982, "learning_rate": 9.021949782607333e-06, "loss": 0.5521, "step": 18939 }, { "epoch": 0.22644938366073245, "grad_norm": 1.335968255996704, "learning_rate": 9.02183475133743e-06, "loss": 0.5898, "step": 18940 }, { "epoch": 0.22646133980559308, "grad_norm": 2.7888448238372803, "learning_rate": 9.021719714036771e-06, "loss": 0.6681, "step": 18941 }, { "epoch": 0.22647329595045373, "grad_norm": 3.190054178237915, "learning_rate": 9.02160467070553e-06, "loss": 0.6819, "step": 18942 }, { "epoch": 0.2264852520953144, "grad_norm": 1.6853896379470825, "learning_rate": 9.021489621343878e-06, "loss": 0.7154, "step": 18943 }, { "epoch": 0.22649720824017502, "grad_norm": 2.3458666801452637, "learning_rate": 9.021374565951988e-06, "loss": 0.4879, "step": 18944 }, { "epoch": 0.22650916438503568, "grad_norm": 2.581634044647217, "learning_rate": 9.021259504530033e-06, "loss": 0.5528, "step": 18945 }, { "epoch": 0.22652112052989634, "grad_norm": 3.655886650085449, "learning_rate": 9.021144437078184e-06, "loss": 0.6555, "step": 18946 }, { "epoch": 0.226533076674757, "grad_norm": 1.7802000045776367, "learning_rate": 9.021029363596614e-06, "loss": 0.6134, "step": 18947 }, { "epoch": 0.22654503281961763, "grad_norm": 2.924672842025757, "learning_rate": 9.020914284085498e-06, "loss": 0.6514, "step": 18948 }, { "epoch": 0.2265569889644783, "grad_norm": 3.678680896759033, "learning_rate": 9.020799198545006e-06, "loss": 0.5389, "step": 18949 }, { "epoch": 0.22656894510933895, "grad_norm": 2.27671480178833, "learning_rate": 9.020684106975312e-06, "loss": 0.6109, "step": 18950 }, { "epoch": 0.2265809012541996, "grad_norm": 3.5839245319366455, "learning_rate": 9.020569009376587e-06, "loss": 0.6442, "step": 18951 }, { "epoch": 0.22659285739906024, "grad_norm": 1.4837605953216553, "learning_rate": 9.020453905749004e-06, "loss": 0.6299, "step": 18952 }, { "epoch": 0.2266048135439209, "grad_norm": 1.3442001342773438, "learning_rate": 9.020338796092738e-06, "loss": 0.5859, "step": 18953 }, { "epoch": 0.22661676968878156, "grad_norm": 1.7576907873153687, "learning_rate": 9.020223680407958e-06, "loss": 0.6247, "step": 18954 }, { "epoch": 0.22662872583364221, "grad_norm": 2.4630444049835205, "learning_rate": 9.020108558694838e-06, "loss": 0.693, "step": 18955 }, { "epoch": 0.22664068197850284, "grad_norm": 3.3315556049346924, "learning_rate": 9.019993430953553e-06, "loss": 0.6948, "step": 18956 }, { "epoch": 0.2266526381233635, "grad_norm": 1.8905282020568848, "learning_rate": 9.01987829718427e-06, "loss": 0.5382, "step": 18957 }, { "epoch": 0.22666459426822416, "grad_norm": 1.7651551961898804, "learning_rate": 9.01976315738717e-06, "loss": 0.5842, "step": 18958 }, { "epoch": 0.2266765504130848, "grad_norm": 3.1261250972747803, "learning_rate": 9.019648011562417e-06, "loss": 0.6627, "step": 18959 }, { "epoch": 0.22668850655794545, "grad_norm": 1.6196261644363403, "learning_rate": 9.01953285971019e-06, "loss": 0.5352, "step": 18960 }, { "epoch": 0.2267004627028061, "grad_norm": 5.959183692932129, "learning_rate": 9.019417701830657e-06, "loss": 0.575, "step": 18961 }, { "epoch": 0.22671241884766677, "grad_norm": 2.8112425804138184, "learning_rate": 9.019302537923995e-06, "loss": 0.5652, "step": 18962 }, { "epoch": 0.2267243749925274, "grad_norm": 3.1253347396850586, "learning_rate": 9.019187367990372e-06, "loss": 0.6511, "step": 18963 }, { "epoch": 0.22673633113738806, "grad_norm": 1.2282319068908691, "learning_rate": 9.019072192029965e-06, "loss": 0.4865, "step": 18964 }, { "epoch": 0.22674828728224872, "grad_norm": 2.4828567504882812, "learning_rate": 9.018957010042945e-06, "loss": 0.598, "step": 18965 }, { "epoch": 0.22676024342710938, "grad_norm": 1.7991907596588135, "learning_rate": 9.018841822029485e-06, "loss": 0.5845, "step": 18966 }, { "epoch": 0.22677219957197, "grad_norm": 1.485626220703125, "learning_rate": 9.018726627989756e-06, "loss": 0.6092, "step": 18967 }, { "epoch": 0.22678415571683067, "grad_norm": 2.6075308322906494, "learning_rate": 9.018611427923934e-06, "loss": 0.6463, "step": 18968 }, { "epoch": 0.22679611186169132, "grad_norm": 1.5741074085235596, "learning_rate": 9.018496221832187e-06, "loss": 0.6415, "step": 18969 }, { "epoch": 0.22680806800655195, "grad_norm": 3.1506924629211426, "learning_rate": 9.018381009714693e-06, "loss": 0.7294, "step": 18970 }, { "epoch": 0.2268200241514126, "grad_norm": 2.7532155513763428, "learning_rate": 9.018265791571623e-06, "loss": 0.609, "step": 18971 }, { "epoch": 0.22683198029627327, "grad_norm": 2.110487937927246, "learning_rate": 9.018150567403147e-06, "loss": 0.585, "step": 18972 }, { "epoch": 0.22684393644113393, "grad_norm": 1.69047212600708, "learning_rate": 9.01803533720944e-06, "loss": 0.5303, "step": 18973 }, { "epoch": 0.22685589258599456, "grad_norm": 1.6597626209259033, "learning_rate": 9.017920100990676e-06, "loss": 0.5732, "step": 18974 }, { "epoch": 0.22686784873085522, "grad_norm": 1.9349581003189087, "learning_rate": 9.017804858747026e-06, "loss": 0.4985, "step": 18975 }, { "epoch": 0.22687980487571588, "grad_norm": 1.765443205833435, "learning_rate": 9.017689610478665e-06, "loss": 0.5965, "step": 18976 }, { "epoch": 0.22689176102057654, "grad_norm": 2.187553882598877, "learning_rate": 9.01757435618576e-06, "loss": 0.5158, "step": 18977 }, { "epoch": 0.22690371716543717, "grad_norm": 2.038177967071533, "learning_rate": 9.017459095868493e-06, "loss": 0.5701, "step": 18978 }, { "epoch": 0.22691567331029783, "grad_norm": 1.6336698532104492, "learning_rate": 9.017343829527028e-06, "loss": 0.6169, "step": 18979 }, { "epoch": 0.22692762945515849, "grad_norm": 2.406365156173706, "learning_rate": 9.017228557161544e-06, "loss": 0.6469, "step": 18980 }, { "epoch": 0.22693958560001912, "grad_norm": 2.1455228328704834, "learning_rate": 9.01711327877221e-06, "loss": 0.5818, "step": 18981 }, { "epoch": 0.22695154174487978, "grad_norm": 2.4991915225982666, "learning_rate": 9.0169979943592e-06, "loss": 0.6498, "step": 18982 }, { "epoch": 0.22696349788974043, "grad_norm": 1.8377342224121094, "learning_rate": 9.01688270392269e-06, "loss": 0.5571, "step": 18983 }, { "epoch": 0.2269754540346011, "grad_norm": 5.977919101715088, "learning_rate": 9.016767407462847e-06, "loss": 0.5887, "step": 18984 }, { "epoch": 0.22698741017946172, "grad_norm": 4.524204730987549, "learning_rate": 9.016652104979849e-06, "loss": 0.5407, "step": 18985 }, { "epoch": 0.22699936632432238, "grad_norm": 1.8823679685592651, "learning_rate": 9.016536796473867e-06, "loss": 0.5109, "step": 18986 }, { "epoch": 0.22701132246918304, "grad_norm": 1.954315185546875, "learning_rate": 9.016421481945072e-06, "loss": 0.6483, "step": 18987 }, { "epoch": 0.2270232786140437, "grad_norm": 2.0090749263763428, "learning_rate": 9.01630616139364e-06, "loss": 0.5928, "step": 18988 }, { "epoch": 0.22703523475890433, "grad_norm": 1.5187638998031616, "learning_rate": 9.016190834819741e-06, "loss": 0.5764, "step": 18989 }, { "epoch": 0.227047190903765, "grad_norm": 2.7045094966888428, "learning_rate": 9.016075502223551e-06, "loss": 0.6599, "step": 18990 }, { "epoch": 0.22705914704862565, "grad_norm": 2.747170925140381, "learning_rate": 9.015960163605242e-06, "loss": 0.6212, "step": 18991 }, { "epoch": 0.22707110319348628, "grad_norm": 1.9585191011428833, "learning_rate": 9.015844818964986e-06, "loss": 0.6238, "step": 18992 }, { "epoch": 0.22708305933834694, "grad_norm": 9.453591346740723, "learning_rate": 9.015729468302956e-06, "loss": 0.6109, "step": 18993 }, { "epoch": 0.2270950154832076, "grad_norm": 2.1558895111083984, "learning_rate": 9.015614111619326e-06, "loss": 0.6431, "step": 18994 }, { "epoch": 0.22710697162806825, "grad_norm": 3.104374408721924, "learning_rate": 9.015498748914268e-06, "loss": 0.6755, "step": 18995 }, { "epoch": 0.22711892777292889, "grad_norm": 4.666251182556152, "learning_rate": 9.015383380187954e-06, "loss": 0.693, "step": 18996 }, { "epoch": 0.22713088391778954, "grad_norm": 3.4938223361968994, "learning_rate": 9.015268005440561e-06, "loss": 0.5676, "step": 18997 }, { "epoch": 0.2271428400626502, "grad_norm": 1.590378999710083, "learning_rate": 9.015152624672257e-06, "loss": 0.5996, "step": 18998 }, { "epoch": 0.22715479620751086, "grad_norm": 2.9195685386657715, "learning_rate": 9.01503723788322e-06, "loss": 0.5775, "step": 18999 }, { "epoch": 0.2271667523523715, "grad_norm": 2.4207704067230225, "learning_rate": 9.014921845073618e-06, "loss": 0.6949, "step": 19000 }, { "epoch": 0.22717870849723215, "grad_norm": 8.153053283691406, "learning_rate": 9.014806446243627e-06, "loss": 0.676, "step": 19001 }, { "epoch": 0.2271906646420928, "grad_norm": 1.6347423791885376, "learning_rate": 9.01469104139342e-06, "loss": 0.6599, "step": 19002 }, { "epoch": 0.22720262078695344, "grad_norm": 2.479678153991699, "learning_rate": 9.014575630523169e-06, "loss": 0.5963, "step": 19003 }, { "epoch": 0.2272145769318141, "grad_norm": 1.401837706565857, "learning_rate": 9.014460213633046e-06, "loss": 0.5876, "step": 19004 }, { "epoch": 0.22722653307667476, "grad_norm": 2.592658519744873, "learning_rate": 9.014344790723228e-06, "loss": 0.5922, "step": 19005 }, { "epoch": 0.22723848922153542, "grad_norm": 2.2689502239227295, "learning_rate": 9.014229361793886e-06, "loss": 0.6224, "step": 19006 }, { "epoch": 0.22725044536639605, "grad_norm": 1.8736207485198975, "learning_rate": 9.014113926845192e-06, "loss": 0.6538, "step": 19007 }, { "epoch": 0.2272624015112567, "grad_norm": 13.455611228942871, "learning_rate": 9.01399848587732e-06, "loss": 0.5943, "step": 19008 }, { "epoch": 0.22727435765611737, "grad_norm": 4.291162490844727, "learning_rate": 9.013883038890442e-06, "loss": 0.5655, "step": 19009 }, { "epoch": 0.22728631380097802, "grad_norm": 2.185487747192383, "learning_rate": 9.013767585884734e-06, "loss": 0.7031, "step": 19010 }, { "epoch": 0.22729826994583865, "grad_norm": 2.0148932933807373, "learning_rate": 9.013652126860365e-06, "loss": 0.6602, "step": 19011 }, { "epoch": 0.2273102260906993, "grad_norm": 1.8938368558883667, "learning_rate": 9.013536661817512e-06, "loss": 0.6304, "step": 19012 }, { "epoch": 0.22732218223555997, "grad_norm": 1.9319701194763184, "learning_rate": 9.013421190756346e-06, "loss": 0.617, "step": 19013 }, { "epoch": 0.22733413838042063, "grad_norm": 5.014009475708008, "learning_rate": 9.013305713677042e-06, "loss": 0.6919, "step": 19014 }, { "epoch": 0.22734609452528126, "grad_norm": 2.9894802570343018, "learning_rate": 9.01319023057977e-06, "loss": 0.6339, "step": 19015 }, { "epoch": 0.22735805067014192, "grad_norm": 1.915376901626587, "learning_rate": 9.013074741464704e-06, "loss": 0.5364, "step": 19016 }, { "epoch": 0.22737000681500258, "grad_norm": 18.625045776367188, "learning_rate": 9.01295924633202e-06, "loss": 0.5869, "step": 19017 }, { "epoch": 0.2273819629598632, "grad_norm": 2.294546127319336, "learning_rate": 9.01284374518189e-06, "loss": 0.6379, "step": 19018 }, { "epoch": 0.22739391910472387, "grad_norm": 2.9640395641326904, "learning_rate": 9.012728238014485e-06, "loss": 0.6848, "step": 19019 }, { "epoch": 0.22740587524958453, "grad_norm": 2.630105495452881, "learning_rate": 9.012612724829981e-06, "loss": 0.6531, "step": 19020 }, { "epoch": 0.22741783139444519, "grad_norm": 2.8939857482910156, "learning_rate": 9.01249720562855e-06, "loss": 0.6187, "step": 19021 }, { "epoch": 0.22742978753930582, "grad_norm": 2.0673587322235107, "learning_rate": 9.012381680410364e-06, "loss": 0.6398, "step": 19022 }, { "epoch": 0.22744174368416648, "grad_norm": 2.563002109527588, "learning_rate": 9.012266149175598e-06, "loss": 0.6532, "step": 19023 }, { "epoch": 0.22745369982902713, "grad_norm": 2.490307092666626, "learning_rate": 9.012150611924424e-06, "loss": 0.6133, "step": 19024 }, { "epoch": 0.2274656559738878, "grad_norm": 1.5647516250610352, "learning_rate": 9.012035068657016e-06, "loss": 0.6923, "step": 19025 }, { "epoch": 0.22747761211874842, "grad_norm": 1.9352152347564697, "learning_rate": 9.011919519373547e-06, "loss": 0.5846, "step": 19026 }, { "epoch": 0.22748956826360908, "grad_norm": 1.869103193283081, "learning_rate": 9.011803964074192e-06, "loss": 0.5689, "step": 19027 }, { "epoch": 0.22750152440846974, "grad_norm": 2.421987533569336, "learning_rate": 9.011688402759122e-06, "loss": 0.6175, "step": 19028 }, { "epoch": 0.22751348055333037, "grad_norm": 2.5434963703155518, "learning_rate": 9.01157283542851e-06, "loss": 0.6847, "step": 19029 }, { "epoch": 0.22752543669819103, "grad_norm": 2.5230185985565186, "learning_rate": 9.011457262082532e-06, "loss": 0.6752, "step": 19030 }, { "epoch": 0.2275373928430517, "grad_norm": 3.0826175212860107, "learning_rate": 9.011341682721358e-06, "loss": 0.6488, "step": 19031 }, { "epoch": 0.22754934898791235, "grad_norm": 2.456435441970825, "learning_rate": 9.011226097345162e-06, "loss": 0.6477, "step": 19032 }, { "epoch": 0.22756130513277298, "grad_norm": 2.0471158027648926, "learning_rate": 9.011110505954121e-06, "loss": 0.6104, "step": 19033 }, { "epoch": 0.22757326127763364, "grad_norm": 1.9608322381973267, "learning_rate": 9.010994908548404e-06, "loss": 0.5483, "step": 19034 }, { "epoch": 0.2275852174224943, "grad_norm": 2.016543388366699, "learning_rate": 9.010879305128186e-06, "loss": 0.6052, "step": 19035 }, { "epoch": 0.22759717356735495, "grad_norm": 2.0148537158966064, "learning_rate": 9.01076369569364e-06, "loss": 0.5423, "step": 19036 }, { "epoch": 0.22760912971221559, "grad_norm": 1.6684974431991577, "learning_rate": 9.01064808024494e-06, "loss": 0.6393, "step": 19037 }, { "epoch": 0.22762108585707624, "grad_norm": 1.7075690031051636, "learning_rate": 9.010532458782259e-06, "loss": 0.59, "step": 19038 }, { "epoch": 0.2276330420019369, "grad_norm": 4.1879658699035645, "learning_rate": 9.010416831305771e-06, "loss": 0.6044, "step": 19039 }, { "epoch": 0.22764499814679753, "grad_norm": 3.953794240951538, "learning_rate": 9.010301197815648e-06, "loss": 0.5287, "step": 19040 }, { "epoch": 0.2276569542916582, "grad_norm": 1.561665654182434, "learning_rate": 9.010185558312063e-06, "loss": 0.5699, "step": 19041 }, { "epoch": 0.22766891043651885, "grad_norm": 1.6118673086166382, "learning_rate": 9.01006991279519e-06, "loss": 0.5027, "step": 19042 }, { "epoch": 0.2276808665813795, "grad_norm": 1.811749815940857, "learning_rate": 9.009954261265204e-06, "loss": 0.5189, "step": 19043 }, { "epoch": 0.22769282272624014, "grad_norm": 3.8454627990722656, "learning_rate": 9.009838603722278e-06, "loss": 0.6328, "step": 19044 }, { "epoch": 0.2277047788711008, "grad_norm": 2.602301597595215, "learning_rate": 9.009722940166586e-06, "loss": 0.5789, "step": 19045 }, { "epoch": 0.22771673501596146, "grad_norm": 2.61592173576355, "learning_rate": 9.009607270598297e-06, "loss": 0.5073, "step": 19046 }, { "epoch": 0.22772869116082212, "grad_norm": 2.571204900741577, "learning_rate": 9.00949159501759e-06, "loss": 0.5788, "step": 19047 }, { "epoch": 0.22774064730568275, "grad_norm": 2.416088819503784, "learning_rate": 9.009375913424637e-06, "loss": 0.5504, "step": 19048 }, { "epoch": 0.2277526034505434, "grad_norm": 1.4908859729766846, "learning_rate": 9.009260225819606e-06, "loss": 0.5626, "step": 19049 }, { "epoch": 0.22776455959540406, "grad_norm": 1.9005521535873413, "learning_rate": 9.00914453220268e-06, "loss": 0.5887, "step": 19050 }, { "epoch": 0.2277765157402647, "grad_norm": 2.5171449184417725, "learning_rate": 9.009028832574025e-06, "loss": 0.606, "step": 19051 }, { "epoch": 0.22778847188512535, "grad_norm": 2.006333827972412, "learning_rate": 9.008913126933816e-06, "loss": 0.7013, "step": 19052 }, { "epoch": 0.227800428029986, "grad_norm": 19.96230125427246, "learning_rate": 9.00879741528223e-06, "loss": 0.6157, "step": 19053 }, { "epoch": 0.22781238417484667, "grad_norm": 1.908086895942688, "learning_rate": 9.008681697619438e-06, "loss": 0.5646, "step": 19054 }, { "epoch": 0.2278243403197073, "grad_norm": 2.051521062850952, "learning_rate": 9.008565973945613e-06, "loss": 0.6357, "step": 19055 }, { "epoch": 0.22783629646456796, "grad_norm": 2.200199604034424, "learning_rate": 9.008450244260927e-06, "loss": 0.6339, "step": 19056 }, { "epoch": 0.22784825260942862, "grad_norm": 1.6843138933181763, "learning_rate": 9.008334508565557e-06, "loss": 0.608, "step": 19057 }, { "epoch": 0.22786020875428928, "grad_norm": 1.5977622270584106, "learning_rate": 9.008218766859675e-06, "loss": 0.6249, "step": 19058 }, { "epoch": 0.2278721648991499, "grad_norm": 2.0950753688812256, "learning_rate": 9.008103019143455e-06, "loss": 0.5948, "step": 19059 }, { "epoch": 0.22788412104401057, "grad_norm": 2.510787010192871, "learning_rate": 9.007987265417071e-06, "loss": 0.5937, "step": 19060 }, { "epoch": 0.22789607718887123, "grad_norm": 2.2346274852752686, "learning_rate": 9.007871505680697e-06, "loss": 0.6663, "step": 19061 }, { "epoch": 0.22790803333373189, "grad_norm": 3.3784849643707275, "learning_rate": 9.007755739934504e-06, "loss": 0.6228, "step": 19062 }, { "epoch": 0.22791998947859252, "grad_norm": 1.931328535079956, "learning_rate": 9.007639968178666e-06, "loss": 0.6454, "step": 19063 }, { "epoch": 0.22793194562345317, "grad_norm": 4.103741645812988, "learning_rate": 9.007524190413358e-06, "loss": 0.6414, "step": 19064 }, { "epoch": 0.22794390176831383, "grad_norm": 3.4973409175872803, "learning_rate": 9.007408406638754e-06, "loss": 0.6728, "step": 19065 }, { "epoch": 0.22795585791317446, "grad_norm": 1.488918662071228, "learning_rate": 9.007292616855026e-06, "loss": 0.5401, "step": 19066 }, { "epoch": 0.22796781405803512, "grad_norm": 1.5463306903839111, "learning_rate": 9.00717682106235e-06, "loss": 0.519, "step": 19067 }, { "epoch": 0.22797977020289578, "grad_norm": 13.00170612335205, "learning_rate": 9.007061019260896e-06, "loss": 0.5422, "step": 19068 }, { "epoch": 0.22799172634775644, "grad_norm": 2.1482181549072266, "learning_rate": 9.00694521145084e-06, "loss": 0.7859, "step": 19069 }, { "epoch": 0.22800368249261707, "grad_norm": 1.5736720561981201, "learning_rate": 9.006829397632356e-06, "loss": 0.6408, "step": 19070 }, { "epoch": 0.22801563863747773, "grad_norm": 3.420837879180908, "learning_rate": 9.006713577805617e-06, "loss": 0.5835, "step": 19071 }, { "epoch": 0.2280275947823384, "grad_norm": 6.223320960998535, "learning_rate": 9.006597751970798e-06, "loss": 0.6164, "step": 19072 }, { "epoch": 0.22803955092719905, "grad_norm": 6.107424259185791, "learning_rate": 9.00648192012807e-06, "loss": 0.6913, "step": 19073 }, { "epoch": 0.22805150707205968, "grad_norm": 114.91378021240234, "learning_rate": 9.006366082277609e-06, "loss": 0.6387, "step": 19074 }, { "epoch": 0.22806346321692034, "grad_norm": 2.4974160194396973, "learning_rate": 9.006250238419587e-06, "loss": 0.5807, "step": 19075 }, { "epoch": 0.228075419361781, "grad_norm": 1.7561498880386353, "learning_rate": 9.00613438855418e-06, "loss": 0.7075, "step": 19076 }, { "epoch": 0.22808737550664163, "grad_norm": 3.0435597896575928, "learning_rate": 9.006018532681557e-06, "loss": 0.5566, "step": 19077 }, { "epoch": 0.22809933165150229, "grad_norm": 2.69429612159729, "learning_rate": 9.005902670801898e-06, "loss": 0.5902, "step": 19078 }, { "epoch": 0.22811128779636294, "grad_norm": 1.9552228450775146, "learning_rate": 9.005786802915373e-06, "loss": 0.6155, "step": 19079 }, { "epoch": 0.2281232439412236, "grad_norm": 3.3537979125976562, "learning_rate": 9.005670929022157e-06, "loss": 0.6158, "step": 19080 }, { "epoch": 0.22813520008608423, "grad_norm": 2.9657578468322754, "learning_rate": 9.005555049122421e-06, "loss": 0.597, "step": 19081 }, { "epoch": 0.2281471562309449, "grad_norm": 2.3829424381256104, "learning_rate": 9.005439163216342e-06, "loss": 0.6927, "step": 19082 }, { "epoch": 0.22815911237580555, "grad_norm": 2.6798787117004395, "learning_rate": 9.005323271304094e-06, "loss": 0.6687, "step": 19083 }, { "epoch": 0.2281710685206662, "grad_norm": 10.892335891723633, "learning_rate": 9.005207373385849e-06, "loss": 0.5721, "step": 19084 }, { "epoch": 0.22818302466552684, "grad_norm": 3.233450412750244, "learning_rate": 9.00509146946178e-06, "loss": 0.6025, "step": 19085 }, { "epoch": 0.2281949808103875, "grad_norm": 1.5472023487091064, "learning_rate": 9.004975559532065e-06, "loss": 0.5647, "step": 19086 }, { "epoch": 0.22820693695524816, "grad_norm": 2.725651741027832, "learning_rate": 9.004859643596872e-06, "loss": 0.5955, "step": 19087 }, { "epoch": 0.2282188931001088, "grad_norm": 3.1460134983062744, "learning_rate": 9.004743721656379e-06, "loss": 0.5341, "step": 19088 }, { "epoch": 0.22823084924496945, "grad_norm": 2.6298956871032715, "learning_rate": 9.004627793710759e-06, "loss": 0.6448, "step": 19089 }, { "epoch": 0.2282428053898301, "grad_norm": 2.930885076522827, "learning_rate": 9.004511859760183e-06, "loss": 0.5877, "step": 19090 }, { "epoch": 0.22825476153469076, "grad_norm": 2.5194976329803467, "learning_rate": 9.004395919804829e-06, "loss": 0.5763, "step": 19091 }, { "epoch": 0.2282667176795514, "grad_norm": 3.8775572776794434, "learning_rate": 9.00427997384487e-06, "loss": 0.6597, "step": 19092 }, { "epoch": 0.22827867382441205, "grad_norm": 3.4933149814605713, "learning_rate": 9.004164021880476e-06, "loss": 0.6339, "step": 19093 }, { "epoch": 0.2282906299692727, "grad_norm": 1.9218913316726685, "learning_rate": 9.004048063911828e-06, "loss": 0.606, "step": 19094 }, { "epoch": 0.22830258611413337, "grad_norm": 2.0856821537017822, "learning_rate": 9.003932099939093e-06, "loss": 0.6601, "step": 19095 }, { "epoch": 0.228314542258994, "grad_norm": 1.9069246053695679, "learning_rate": 9.003816129962448e-06, "loss": 0.6369, "step": 19096 }, { "epoch": 0.22832649840385466, "grad_norm": 9.665898323059082, "learning_rate": 9.003700153982067e-06, "loss": 0.597, "step": 19097 }, { "epoch": 0.22833845454871532, "grad_norm": 2.307574510574341, "learning_rate": 9.003584171998123e-06, "loss": 0.6844, "step": 19098 }, { "epoch": 0.22835041069357595, "grad_norm": 4.812709808349609, "learning_rate": 9.00346818401079e-06, "loss": 0.758, "step": 19099 }, { "epoch": 0.2283623668384366, "grad_norm": 2.492586851119995, "learning_rate": 9.003352190020242e-06, "loss": 0.6379, "step": 19100 }, { "epoch": 0.22837432298329727, "grad_norm": 1.854334831237793, "learning_rate": 9.003236190026653e-06, "loss": 0.6083, "step": 19101 }, { "epoch": 0.22838627912815793, "grad_norm": 2.5609991550445557, "learning_rate": 9.003120184030198e-06, "loss": 0.5827, "step": 19102 }, { "epoch": 0.22839823527301856, "grad_norm": 2.724099636077881, "learning_rate": 9.003004172031051e-06, "loss": 0.5337, "step": 19103 }, { "epoch": 0.22841019141787922, "grad_norm": 2.774724245071411, "learning_rate": 9.002888154029383e-06, "loss": 0.6341, "step": 19104 }, { "epoch": 0.22842214756273987, "grad_norm": 2.0306997299194336, "learning_rate": 9.00277213002537e-06, "loss": 0.5261, "step": 19105 }, { "epoch": 0.22843410370760053, "grad_norm": 1.9844262599945068, "learning_rate": 9.002656100019188e-06, "loss": 0.6438, "step": 19106 }, { "epoch": 0.22844605985246116, "grad_norm": 1.705578327178955, "learning_rate": 9.002540064011007e-06, "loss": 0.5623, "step": 19107 }, { "epoch": 0.22845801599732182, "grad_norm": 2.8234341144561768, "learning_rate": 9.002424022001004e-06, "loss": 0.6744, "step": 19108 }, { "epoch": 0.22846997214218248, "grad_norm": 1.7511051893234253, "learning_rate": 9.002307973989352e-06, "loss": 0.5584, "step": 19109 }, { "epoch": 0.2284819282870431, "grad_norm": 2.21260404586792, "learning_rate": 9.002191919976225e-06, "loss": 0.7476, "step": 19110 }, { "epoch": 0.22849388443190377, "grad_norm": 2.9341495037078857, "learning_rate": 9.002075859961795e-06, "loss": 0.6231, "step": 19111 }, { "epoch": 0.22850584057676443, "grad_norm": 7.013613224029541, "learning_rate": 9.00195979394624e-06, "loss": 0.6295, "step": 19112 }, { "epoch": 0.2285177967216251, "grad_norm": 5.983914375305176, "learning_rate": 9.001843721929732e-06, "loss": 0.5524, "step": 19113 }, { "epoch": 0.22852975286648572, "grad_norm": 2.0343339443206787, "learning_rate": 9.001727643912445e-06, "loss": 0.6417, "step": 19114 }, { "epoch": 0.22854170901134638, "grad_norm": 3.831847906112671, "learning_rate": 9.001611559894552e-06, "loss": 0.5449, "step": 19115 }, { "epoch": 0.22855366515620704, "grad_norm": 2.831773519515991, "learning_rate": 9.00149546987623e-06, "loss": 0.5968, "step": 19116 }, { "epoch": 0.2285656213010677, "grad_norm": 3.85760235786438, "learning_rate": 9.00137937385765e-06, "loss": 0.6229, "step": 19117 }, { "epoch": 0.22857757744592833, "grad_norm": 3.211357355117798, "learning_rate": 9.001263271838987e-06, "loss": 0.59, "step": 19118 }, { "epoch": 0.22858953359078898, "grad_norm": 2.513500213623047, "learning_rate": 9.001147163820417e-06, "loss": 0.7054, "step": 19119 }, { "epoch": 0.22860148973564964, "grad_norm": 1.969473123550415, "learning_rate": 9.00103104980211e-06, "loss": 0.6416, "step": 19120 }, { "epoch": 0.2286134458805103, "grad_norm": 2.17794132232666, "learning_rate": 9.000914929784247e-06, "loss": 0.5517, "step": 19121 }, { "epoch": 0.22862540202537093, "grad_norm": 3.259023427963257, "learning_rate": 9.000798803766995e-06, "loss": 0.5728, "step": 19122 }, { "epoch": 0.2286373581702316, "grad_norm": 2.650606393814087, "learning_rate": 9.000682671750532e-06, "loss": 0.6101, "step": 19123 }, { "epoch": 0.22864931431509225, "grad_norm": 3.0300629138946533, "learning_rate": 9.000566533735032e-06, "loss": 0.6381, "step": 19124 }, { "epoch": 0.22866127045995288, "grad_norm": 2.70959210395813, "learning_rate": 9.000450389720665e-06, "loss": 0.6237, "step": 19125 }, { "epoch": 0.22867322660481354, "grad_norm": 5.594067096710205, "learning_rate": 9.00033423970761e-06, "loss": 0.7032, "step": 19126 }, { "epoch": 0.2286851827496742, "grad_norm": 3.674004316329956, "learning_rate": 9.00021808369604e-06, "loss": 0.6003, "step": 19127 }, { "epoch": 0.22869713889453486, "grad_norm": 3.46044659614563, "learning_rate": 9.00010192168613e-06, "loss": 0.523, "step": 19128 }, { "epoch": 0.2287090950393955, "grad_norm": 3.210265636444092, "learning_rate": 8.99998575367805e-06, "loss": 0.6223, "step": 19129 }, { "epoch": 0.22872105118425615, "grad_norm": 2.1875243186950684, "learning_rate": 8.99986957967198e-06, "loss": 0.6034, "step": 19130 }, { "epoch": 0.2287330073291168, "grad_norm": 2.4605462551116943, "learning_rate": 8.99975339966809e-06, "loss": 0.5893, "step": 19131 }, { "epoch": 0.22874496347397746, "grad_norm": 3.6473782062530518, "learning_rate": 8.999637213666555e-06, "loss": 0.5474, "step": 19132 }, { "epoch": 0.2287569196188381, "grad_norm": 14.54249095916748, "learning_rate": 8.999521021667552e-06, "loss": 0.6955, "step": 19133 }, { "epoch": 0.22876887576369875, "grad_norm": 2.7171900272369385, "learning_rate": 8.99940482367125e-06, "loss": 0.5817, "step": 19134 }, { "epoch": 0.2287808319085594, "grad_norm": 1.6487120389938354, "learning_rate": 8.999288619677828e-06, "loss": 0.6766, "step": 19135 }, { "epoch": 0.22879278805342004, "grad_norm": 2.7272841930389404, "learning_rate": 8.999172409687458e-06, "loss": 0.7033, "step": 19136 }, { "epoch": 0.2288047441982807, "grad_norm": 10.61744213104248, "learning_rate": 8.999056193700316e-06, "loss": 0.6224, "step": 19137 }, { "epoch": 0.22881670034314136, "grad_norm": 1.4743690490722656, "learning_rate": 8.998939971716574e-06, "loss": 0.5628, "step": 19138 }, { "epoch": 0.22882865648800202, "grad_norm": 2.5600903034210205, "learning_rate": 8.998823743736407e-06, "loss": 0.73, "step": 19139 }, { "epoch": 0.22884061263286265, "grad_norm": 3.2860183715820312, "learning_rate": 8.99870750975999e-06, "loss": 0.6491, "step": 19140 }, { "epoch": 0.2288525687777233, "grad_norm": 6.8094987869262695, "learning_rate": 8.998591269787496e-06, "loss": 0.7077, "step": 19141 }, { "epoch": 0.22886452492258397, "grad_norm": 2.4253199100494385, "learning_rate": 8.9984750238191e-06, "loss": 0.6346, "step": 19142 }, { "epoch": 0.22887648106744463, "grad_norm": 4.086449146270752, "learning_rate": 8.998358771854979e-06, "loss": 0.5795, "step": 19143 }, { "epoch": 0.22888843721230526, "grad_norm": 1.9406946897506714, "learning_rate": 8.998242513895304e-06, "loss": 0.6246, "step": 19144 }, { "epoch": 0.22890039335716592, "grad_norm": 2.389674186706543, "learning_rate": 8.998126249940248e-06, "loss": 0.6559, "step": 19145 }, { "epoch": 0.22891234950202657, "grad_norm": 1.9060721397399902, "learning_rate": 8.998009979989988e-06, "loss": 0.5757, "step": 19146 }, { "epoch": 0.2289243056468872, "grad_norm": 1.8948785066604614, "learning_rate": 8.9978937040447e-06, "loss": 0.5755, "step": 19147 }, { "epoch": 0.22893626179174786, "grad_norm": 1.90733003616333, "learning_rate": 8.997777422104554e-06, "loss": 0.6257, "step": 19148 }, { "epoch": 0.22894821793660852, "grad_norm": 2.390629768371582, "learning_rate": 8.997661134169728e-06, "loss": 0.6277, "step": 19149 }, { "epoch": 0.22896017408146918, "grad_norm": 1.762073278427124, "learning_rate": 8.997544840240394e-06, "loss": 0.6066, "step": 19150 }, { "epoch": 0.2289721302263298, "grad_norm": 3.603046178817749, "learning_rate": 8.997428540316728e-06, "loss": 0.545, "step": 19151 }, { "epoch": 0.22898408637119047, "grad_norm": 2.0090079307556152, "learning_rate": 8.997312234398903e-06, "loss": 0.5867, "step": 19152 }, { "epoch": 0.22899604251605113, "grad_norm": 8.019331932067871, "learning_rate": 8.997195922487093e-06, "loss": 0.605, "step": 19153 }, { "epoch": 0.2290079986609118, "grad_norm": 3.077158212661743, "learning_rate": 8.997079604581475e-06, "loss": 0.5513, "step": 19154 }, { "epoch": 0.22901995480577242, "grad_norm": 4.896030426025391, "learning_rate": 8.996963280682223e-06, "loss": 0.6614, "step": 19155 }, { "epoch": 0.22903191095063308, "grad_norm": 8.925926208496094, "learning_rate": 8.99684695078951e-06, "loss": 0.5678, "step": 19156 }, { "epoch": 0.22904386709549374, "grad_norm": 2.1529242992401123, "learning_rate": 8.996730614903509e-06, "loss": 0.5711, "step": 19157 }, { "epoch": 0.22905582324035437, "grad_norm": 2.9311678409576416, "learning_rate": 8.996614273024397e-06, "loss": 0.5606, "step": 19158 }, { "epoch": 0.22906777938521503, "grad_norm": 2.759004592895508, "learning_rate": 8.996497925152348e-06, "loss": 0.5298, "step": 19159 }, { "epoch": 0.22907973553007568, "grad_norm": 3.5660440921783447, "learning_rate": 8.996381571287536e-06, "loss": 0.6294, "step": 19160 }, { "epoch": 0.22909169167493634, "grad_norm": 2.707348346710205, "learning_rate": 8.996265211430137e-06, "loss": 0.6448, "step": 19161 }, { "epoch": 0.22910364781979697, "grad_norm": 3.630927324295044, "learning_rate": 8.996148845580321e-06, "loss": 0.6173, "step": 19162 }, { "epoch": 0.22911560396465763, "grad_norm": 3.7516732215881348, "learning_rate": 8.996032473738267e-06, "loss": 0.5646, "step": 19163 }, { "epoch": 0.2291275601095183, "grad_norm": 3.445448160171509, "learning_rate": 8.995916095904149e-06, "loss": 0.6244, "step": 19164 }, { "epoch": 0.22913951625437895, "grad_norm": 3.2495110034942627, "learning_rate": 8.995799712078142e-06, "loss": 0.6374, "step": 19165 }, { "epoch": 0.22915147239923958, "grad_norm": 4.079583168029785, "learning_rate": 8.995683322260416e-06, "loss": 0.6422, "step": 19166 }, { "epoch": 0.22916342854410024, "grad_norm": 3.193686008453369, "learning_rate": 8.99556692645115e-06, "loss": 0.6777, "step": 19167 }, { "epoch": 0.2291753846889609, "grad_norm": 2.254394054412842, "learning_rate": 8.995450524650517e-06, "loss": 0.6363, "step": 19168 }, { "epoch": 0.22918734083382153, "grad_norm": 2.4805870056152344, "learning_rate": 8.995334116858691e-06, "loss": 0.6256, "step": 19169 }, { "epoch": 0.2291992969786822, "grad_norm": 2.999495267868042, "learning_rate": 8.995217703075849e-06, "loss": 0.7557, "step": 19170 }, { "epoch": 0.22921125312354285, "grad_norm": 1.8897767066955566, "learning_rate": 8.995101283302164e-06, "loss": 0.6763, "step": 19171 }, { "epoch": 0.2292232092684035, "grad_norm": 1.9457653760910034, "learning_rate": 8.994984857537808e-06, "loss": 0.5929, "step": 19172 }, { "epoch": 0.22923516541326414, "grad_norm": 2.276672840118408, "learning_rate": 8.99486842578296e-06, "loss": 0.657, "step": 19173 }, { "epoch": 0.2292471215581248, "grad_norm": 2.904003381729126, "learning_rate": 8.99475198803779e-06, "loss": 0.6054, "step": 19174 }, { "epoch": 0.22925907770298545, "grad_norm": 2.389010429382324, "learning_rate": 8.994635544302478e-06, "loss": 0.5632, "step": 19175 }, { "epoch": 0.2292710338478461, "grad_norm": 6.596508979797363, "learning_rate": 8.994519094577197e-06, "loss": 0.5923, "step": 19176 }, { "epoch": 0.22928298999270674, "grad_norm": 3.270811080932617, "learning_rate": 8.994402638862117e-06, "loss": 0.6548, "step": 19177 }, { "epoch": 0.2292949461375674, "grad_norm": 3.0902278423309326, "learning_rate": 8.994286177157418e-06, "loss": 0.5991, "step": 19178 }, { "epoch": 0.22930690228242806, "grad_norm": 2.254744291305542, "learning_rate": 8.994169709463271e-06, "loss": 0.6574, "step": 19179 }, { "epoch": 0.22931885842728872, "grad_norm": 1.8833186626434326, "learning_rate": 8.994053235779853e-06, "loss": 0.4648, "step": 19180 }, { "epoch": 0.22933081457214935, "grad_norm": 3.11275053024292, "learning_rate": 8.99393675610734e-06, "loss": 0.607, "step": 19181 }, { "epoch": 0.22934277071701, "grad_norm": 2.8986260890960693, "learning_rate": 8.993820270445904e-06, "loss": 0.5562, "step": 19182 }, { "epoch": 0.22935472686187067, "grad_norm": 3.5515542030334473, "learning_rate": 8.993703778795719e-06, "loss": 0.6618, "step": 19183 }, { "epoch": 0.2293666830067313, "grad_norm": 45.29923629760742, "learning_rate": 8.993587281156962e-06, "loss": 0.6317, "step": 19184 }, { "epoch": 0.22937863915159196, "grad_norm": 3.0210232734680176, "learning_rate": 8.993470777529805e-06, "loss": 0.5854, "step": 19185 }, { "epoch": 0.22939059529645262, "grad_norm": 8.698322296142578, "learning_rate": 8.993354267914424e-06, "loss": 0.7302, "step": 19186 }, { "epoch": 0.22940255144131327, "grad_norm": 7.404163360595703, "learning_rate": 8.993237752310998e-06, "loss": 0.5607, "step": 19187 }, { "epoch": 0.2294145075861739, "grad_norm": 3.1235687732696533, "learning_rate": 8.993121230719694e-06, "loss": 0.6539, "step": 19188 }, { "epoch": 0.22942646373103456, "grad_norm": 2.423309803009033, "learning_rate": 8.99300470314069e-06, "loss": 0.6136, "step": 19189 }, { "epoch": 0.22943841987589522, "grad_norm": 3.3613877296447754, "learning_rate": 8.992888169574163e-06, "loss": 0.5603, "step": 19190 }, { "epoch": 0.22945037602075588, "grad_norm": 3.993581771850586, "learning_rate": 8.992771630020288e-06, "loss": 0.6077, "step": 19191 }, { "epoch": 0.2294623321656165, "grad_norm": 3.4244844913482666, "learning_rate": 8.992655084479236e-06, "loss": 0.5898, "step": 19192 }, { "epoch": 0.22947428831047717, "grad_norm": 3.7215843200683594, "learning_rate": 8.992538532951182e-06, "loss": 0.637, "step": 19193 }, { "epoch": 0.22948624445533783, "grad_norm": 2.1268272399902344, "learning_rate": 8.992421975436304e-06, "loss": 0.5955, "step": 19194 }, { "epoch": 0.22949820060019846, "grad_norm": 2.217825412750244, "learning_rate": 8.992305411934776e-06, "loss": 0.6601, "step": 19195 }, { "epoch": 0.22951015674505912, "grad_norm": 3.047752618789673, "learning_rate": 8.99218884244677e-06, "loss": 0.6427, "step": 19196 }, { "epoch": 0.22952211288991978, "grad_norm": 2.096771240234375, "learning_rate": 8.992072266972462e-06, "loss": 0.5619, "step": 19197 }, { "epoch": 0.22953406903478044, "grad_norm": 2.4525246620178223, "learning_rate": 8.99195568551203e-06, "loss": 0.6079, "step": 19198 }, { "epoch": 0.22954602517964107, "grad_norm": 24.472000122070312, "learning_rate": 8.991839098065645e-06, "loss": 0.6214, "step": 19199 }, { "epoch": 0.22955798132450173, "grad_norm": 4.071662902832031, "learning_rate": 8.991722504633483e-06, "loss": 0.5305, "step": 19200 }, { "epoch": 0.22956993746936238, "grad_norm": 2.345127820968628, "learning_rate": 8.991605905215717e-06, "loss": 0.6171, "step": 19201 }, { "epoch": 0.22958189361422304, "grad_norm": 2.3787641525268555, "learning_rate": 8.991489299812526e-06, "loss": 0.6147, "step": 19202 }, { "epoch": 0.22959384975908367, "grad_norm": 1.7452765703201294, "learning_rate": 8.991372688424083e-06, "loss": 0.6567, "step": 19203 }, { "epoch": 0.22960580590394433, "grad_norm": 3.2253270149230957, "learning_rate": 8.991256071050561e-06, "loss": 0.6232, "step": 19204 }, { "epoch": 0.229617762048805, "grad_norm": 1.9303491115570068, "learning_rate": 8.991139447692138e-06, "loss": 0.6064, "step": 19205 }, { "epoch": 0.22962971819366562, "grad_norm": 1.673541784286499, "learning_rate": 8.991022818348986e-06, "loss": 0.5667, "step": 19206 }, { "epoch": 0.22964167433852628, "grad_norm": 3.0408504009246826, "learning_rate": 8.99090618302128e-06, "loss": 0.6138, "step": 19207 }, { "epoch": 0.22965363048338694, "grad_norm": 4.177821636199951, "learning_rate": 8.990789541709198e-06, "loss": 0.6076, "step": 19208 }, { "epoch": 0.2296655866282476, "grad_norm": 1.6846264600753784, "learning_rate": 8.990672894412913e-06, "loss": 0.59, "step": 19209 }, { "epoch": 0.22967754277310823, "grad_norm": 3.2412993907928467, "learning_rate": 8.990556241132599e-06, "loss": 0.6345, "step": 19210 }, { "epoch": 0.2296894989179689, "grad_norm": 2.7423789501190186, "learning_rate": 8.99043958186843e-06, "loss": 0.59, "step": 19211 }, { "epoch": 0.22970145506282955, "grad_norm": 2.051238775253296, "learning_rate": 8.990322916620586e-06, "loss": 0.5282, "step": 19212 }, { "epoch": 0.2297134112076902, "grad_norm": 5.307106971740723, "learning_rate": 8.990206245389237e-06, "loss": 0.5841, "step": 19213 }, { "epoch": 0.22972536735255084, "grad_norm": 5.567546367645264, "learning_rate": 8.99008956817456e-06, "loss": 0.6563, "step": 19214 }, { "epoch": 0.2297373234974115, "grad_norm": 4.214566707611084, "learning_rate": 8.989972884976729e-06, "loss": 0.6529, "step": 19215 }, { "epoch": 0.22974927964227215, "grad_norm": 3.8962433338165283, "learning_rate": 8.98985619579592e-06, "loss": 0.6701, "step": 19216 }, { "epoch": 0.22976123578713278, "grad_norm": 5.03568696975708, "learning_rate": 8.989739500632306e-06, "loss": 0.6793, "step": 19217 }, { "epoch": 0.22977319193199344, "grad_norm": 2.7374937534332275, "learning_rate": 8.989622799486064e-06, "loss": 0.5726, "step": 19218 }, { "epoch": 0.2297851480768541, "grad_norm": 3.234992742538452, "learning_rate": 8.98950609235737e-06, "loss": 0.6437, "step": 19219 }, { "epoch": 0.22979710422171476, "grad_norm": 2.146023750305176, "learning_rate": 8.989389379246396e-06, "loss": 0.7278, "step": 19220 }, { "epoch": 0.2298090603665754, "grad_norm": 2.801054000854492, "learning_rate": 8.989272660153319e-06, "loss": 0.641, "step": 19221 }, { "epoch": 0.22982101651143605, "grad_norm": 2.64072847366333, "learning_rate": 8.989155935078313e-06, "loss": 0.6263, "step": 19222 }, { "epoch": 0.2298329726562967, "grad_norm": 16.098663330078125, "learning_rate": 8.989039204021553e-06, "loss": 0.5969, "step": 19223 }, { "epoch": 0.22984492880115737, "grad_norm": 2.727424383163452, "learning_rate": 8.988922466983216e-06, "loss": 0.659, "step": 19224 }, { "epoch": 0.229856884946018, "grad_norm": 1.8070422410964966, "learning_rate": 8.988805723963474e-06, "loss": 0.6156, "step": 19225 }, { "epoch": 0.22986884109087866, "grad_norm": 3.8063554763793945, "learning_rate": 8.988688974962505e-06, "loss": 0.6643, "step": 19226 }, { "epoch": 0.22988079723573931, "grad_norm": 1.691880226135254, "learning_rate": 8.988572219980483e-06, "loss": 0.6141, "step": 19227 }, { "epoch": 0.22989275338059995, "grad_norm": 1.5155776739120483, "learning_rate": 8.988455459017582e-06, "loss": 0.5919, "step": 19228 }, { "epoch": 0.2299047095254606, "grad_norm": 2.212261199951172, "learning_rate": 8.988338692073977e-06, "loss": 0.6436, "step": 19229 }, { "epoch": 0.22991666567032126, "grad_norm": 11.108538627624512, "learning_rate": 8.988221919149845e-06, "loss": 0.586, "step": 19230 }, { "epoch": 0.22992862181518192, "grad_norm": 4.118155479431152, "learning_rate": 8.988105140245361e-06, "loss": 0.6122, "step": 19231 }, { "epoch": 0.22994057796004255, "grad_norm": 1.9651075601577759, "learning_rate": 8.987988355360696e-06, "loss": 0.681, "step": 19232 }, { "epoch": 0.2299525341049032, "grad_norm": 2.3734142780303955, "learning_rate": 8.98787156449603e-06, "loss": 0.6434, "step": 19233 }, { "epoch": 0.22996449024976387, "grad_norm": 2.434629201889038, "learning_rate": 8.987754767651538e-06, "loss": 0.6559, "step": 19234 }, { "epoch": 0.22997644639462453, "grad_norm": 5.948692798614502, "learning_rate": 8.987637964827391e-06, "loss": 0.5157, "step": 19235 }, { "epoch": 0.22998840253948516, "grad_norm": 2.427704095840454, "learning_rate": 8.987521156023768e-06, "loss": 0.4855, "step": 19236 }, { "epoch": 0.23000035868434582, "grad_norm": 3.0951833724975586, "learning_rate": 8.987404341240844e-06, "loss": 0.6146, "step": 19237 }, { "epoch": 0.23001231482920648, "grad_norm": 2.362354040145874, "learning_rate": 8.987287520478792e-06, "loss": 0.6672, "step": 19238 }, { "epoch": 0.23002427097406714, "grad_norm": 2.696427345275879, "learning_rate": 8.987170693737787e-06, "loss": 0.5796, "step": 19239 }, { "epoch": 0.23003622711892777, "grad_norm": 1.9350167512893677, "learning_rate": 8.987053861018007e-06, "loss": 0.5474, "step": 19240 }, { "epoch": 0.23004818326378842, "grad_norm": 5.2649827003479, "learning_rate": 8.986937022319625e-06, "loss": 0.6152, "step": 19241 }, { "epoch": 0.23006013940864908, "grad_norm": 2.09159779548645, "learning_rate": 8.986820177642816e-06, "loss": 0.6232, "step": 19242 }, { "epoch": 0.23007209555350971, "grad_norm": 3.532813310623169, "learning_rate": 8.986703326987757e-06, "loss": 0.6319, "step": 19243 }, { "epoch": 0.23008405169837037, "grad_norm": 4.8594441413879395, "learning_rate": 8.986586470354622e-06, "loss": 0.6183, "step": 19244 }, { "epoch": 0.23009600784323103, "grad_norm": 1.7035853862762451, "learning_rate": 8.986469607743586e-06, "loss": 0.6238, "step": 19245 }, { "epoch": 0.2301079639880917, "grad_norm": 4.0651679039001465, "learning_rate": 8.986352739154824e-06, "loss": 0.6623, "step": 19246 }, { "epoch": 0.23011992013295232, "grad_norm": 2.4483230113983154, "learning_rate": 8.986235864588513e-06, "loss": 0.7565, "step": 19247 }, { "epoch": 0.23013187627781298, "grad_norm": 2.0970845222473145, "learning_rate": 8.986118984044827e-06, "loss": 0.6435, "step": 19248 }, { "epoch": 0.23014383242267364, "grad_norm": 2.266988515853882, "learning_rate": 8.986002097523942e-06, "loss": 0.5516, "step": 19249 }, { "epoch": 0.2301557885675343, "grad_norm": 2.868941307067871, "learning_rate": 8.985885205026033e-06, "loss": 0.65, "step": 19250 }, { "epoch": 0.23016774471239493, "grad_norm": 2.4826934337615967, "learning_rate": 8.985768306551273e-06, "loss": 0.6373, "step": 19251 }, { "epoch": 0.2301797008572556, "grad_norm": 1.7871688604354858, "learning_rate": 8.98565140209984e-06, "loss": 0.6593, "step": 19252 }, { "epoch": 0.23019165700211625, "grad_norm": 2.225966215133667, "learning_rate": 8.98553449167191e-06, "loss": 0.7076, "step": 19253 }, { "epoch": 0.23020361314697688, "grad_norm": 3.3969831466674805, "learning_rate": 8.985417575267654e-06, "loss": 0.6167, "step": 19254 }, { "epoch": 0.23021556929183754, "grad_norm": 2.3859994411468506, "learning_rate": 8.985300652887251e-06, "loss": 0.5456, "step": 19255 }, { "epoch": 0.2302275254366982, "grad_norm": 2.3396291732788086, "learning_rate": 8.985183724530879e-06, "loss": 0.5569, "step": 19256 }, { "epoch": 0.23023948158155885, "grad_norm": 2.3886446952819824, "learning_rate": 8.985066790198707e-06, "loss": 0.68, "step": 19257 }, { "epoch": 0.23025143772641948, "grad_norm": 2.981156349182129, "learning_rate": 8.984949849890914e-06, "loss": 0.6153, "step": 19258 }, { "epoch": 0.23026339387128014, "grad_norm": 2.8931474685668945, "learning_rate": 8.984832903607674e-06, "loss": 0.6605, "step": 19259 }, { "epoch": 0.2302753500161408, "grad_norm": 2.116159439086914, "learning_rate": 8.984715951349161e-06, "loss": 0.6621, "step": 19260 }, { "epoch": 0.23028730616100146, "grad_norm": 3.882702112197876, "learning_rate": 8.984598993115554e-06, "loss": 0.665, "step": 19261 }, { "epoch": 0.2302992623058621, "grad_norm": 2.2369608879089355, "learning_rate": 8.984482028907026e-06, "loss": 0.5759, "step": 19262 }, { "epoch": 0.23031121845072275, "grad_norm": 2.1785573959350586, "learning_rate": 8.984365058723757e-06, "loss": 0.591, "step": 19263 }, { "epoch": 0.2303231745955834, "grad_norm": 1.8237941265106201, "learning_rate": 8.984248082565915e-06, "loss": 0.5707, "step": 19264 }, { "epoch": 0.23033513074044404, "grad_norm": 3.3331539630889893, "learning_rate": 8.984131100433678e-06, "loss": 0.5184, "step": 19265 }, { "epoch": 0.2303470868853047, "grad_norm": 1.845231056213379, "learning_rate": 8.984014112327223e-06, "loss": 0.6134, "step": 19266 }, { "epoch": 0.23035904303016536, "grad_norm": 2.0562870502471924, "learning_rate": 8.983897118246725e-06, "loss": 0.5884, "step": 19267 }, { "epoch": 0.23037099917502601, "grad_norm": 5.764023780822754, "learning_rate": 8.983780118192359e-06, "loss": 0.5791, "step": 19268 }, { "epoch": 0.23038295531988665, "grad_norm": 4.457653999328613, "learning_rate": 8.9836631121643e-06, "loss": 0.6761, "step": 19269 }, { "epoch": 0.2303949114647473, "grad_norm": 2.0875494480133057, "learning_rate": 8.983546100162726e-06, "loss": 0.6021, "step": 19270 }, { "epoch": 0.23040686760960796, "grad_norm": 3.7241628170013428, "learning_rate": 8.983429082187809e-06, "loss": 0.6187, "step": 19271 }, { "epoch": 0.23041882375446862, "grad_norm": 2.4015190601348877, "learning_rate": 8.983312058239726e-06, "loss": 0.7073, "step": 19272 }, { "epoch": 0.23043077989932925, "grad_norm": 1.7529940605163574, "learning_rate": 8.983195028318651e-06, "loss": 0.4965, "step": 19273 }, { "epoch": 0.2304427360441899, "grad_norm": 1.564739465713501, "learning_rate": 8.983077992424763e-06, "loss": 0.6124, "step": 19274 }, { "epoch": 0.23045469218905057, "grad_norm": 1.8513696193695068, "learning_rate": 8.982960950558235e-06, "loss": 0.6184, "step": 19275 }, { "epoch": 0.2304666483339112, "grad_norm": 1.7343748807907104, "learning_rate": 8.982843902719242e-06, "loss": 0.6291, "step": 19276 }, { "epoch": 0.23047860447877186, "grad_norm": 3.4031808376312256, "learning_rate": 8.982726848907962e-06, "loss": 0.6282, "step": 19277 }, { "epoch": 0.23049056062363252, "grad_norm": 1.6034541130065918, "learning_rate": 8.982609789124568e-06, "loss": 0.5931, "step": 19278 }, { "epoch": 0.23050251676849318, "grad_norm": 2.2550418376922607, "learning_rate": 8.982492723369236e-06, "loss": 0.5915, "step": 19279 }, { "epoch": 0.2305144729133538, "grad_norm": 2.217484951019287, "learning_rate": 8.982375651642144e-06, "loss": 0.6361, "step": 19280 }, { "epoch": 0.23052642905821447, "grad_norm": 2.5126051902770996, "learning_rate": 8.982258573943462e-06, "loss": 0.7713, "step": 19281 }, { "epoch": 0.23053838520307512, "grad_norm": 2.8648197650909424, "learning_rate": 8.982141490273373e-06, "loss": 0.551, "step": 19282 }, { "epoch": 0.23055034134793578, "grad_norm": 2.830834150314331, "learning_rate": 8.982024400632046e-06, "loss": 0.5877, "step": 19283 }, { "epoch": 0.23056229749279641, "grad_norm": 2.0526530742645264, "learning_rate": 8.98190730501966e-06, "loss": 0.613, "step": 19284 }, { "epoch": 0.23057425363765707, "grad_norm": 9.032781600952148, "learning_rate": 8.98179020343639e-06, "loss": 0.5725, "step": 19285 }, { "epoch": 0.23058620978251773, "grad_norm": 1.6287833452224731, "learning_rate": 8.98167309588241e-06, "loss": 0.5477, "step": 19286 }, { "epoch": 0.23059816592737836, "grad_norm": 2.3351449966430664, "learning_rate": 8.9815559823579e-06, "loss": 0.6807, "step": 19287 }, { "epoch": 0.23061012207223902, "grad_norm": 1.7672321796417236, "learning_rate": 8.981438862863032e-06, "loss": 0.6771, "step": 19288 }, { "epoch": 0.23062207821709968, "grad_norm": 4.727976322174072, "learning_rate": 8.98132173739798e-06, "loss": 0.6419, "step": 19289 }, { "epoch": 0.23063403436196034, "grad_norm": 2.0059986114501953, "learning_rate": 8.981204605962922e-06, "loss": 0.587, "step": 19290 }, { "epoch": 0.23064599050682097, "grad_norm": 1.798659324645996, "learning_rate": 8.981087468558036e-06, "loss": 0.4769, "step": 19291 }, { "epoch": 0.23065794665168163, "grad_norm": 2.3647680282592773, "learning_rate": 8.980970325183493e-06, "loss": 0.5468, "step": 19292 }, { "epoch": 0.2306699027965423, "grad_norm": 3.8468878269195557, "learning_rate": 8.980853175839473e-06, "loss": 0.6679, "step": 19293 }, { "epoch": 0.23068185894140295, "grad_norm": 1.431030035018921, "learning_rate": 8.980736020526148e-06, "loss": 0.577, "step": 19294 }, { "epoch": 0.23069381508626358, "grad_norm": 3.049952268600464, "learning_rate": 8.980618859243696e-06, "loss": 0.5837, "step": 19295 }, { "epoch": 0.23070577123112423, "grad_norm": 1.4172003269195557, "learning_rate": 8.980501691992292e-06, "loss": 0.622, "step": 19296 }, { "epoch": 0.2307177273759849, "grad_norm": 5.631045341491699, "learning_rate": 8.980384518772109e-06, "loss": 0.5962, "step": 19297 }, { "epoch": 0.23072968352084555, "grad_norm": 2.1175730228424072, "learning_rate": 8.98026733958333e-06, "loss": 0.5862, "step": 19298 }, { "epoch": 0.23074163966570618, "grad_norm": 3.2757487297058105, "learning_rate": 8.980150154426121e-06, "loss": 0.5958, "step": 19299 }, { "epoch": 0.23075359581056684, "grad_norm": 2.0978870391845703, "learning_rate": 8.980032963300665e-06, "loss": 0.5796, "step": 19300 }, { "epoch": 0.2307655519554275, "grad_norm": 1.6904410123825073, "learning_rate": 8.979915766207135e-06, "loss": 0.5744, "step": 19301 }, { "epoch": 0.23077750810028813, "grad_norm": 4.2839035987854, "learning_rate": 8.979798563145707e-06, "loss": 0.5948, "step": 19302 }, { "epoch": 0.2307894642451488, "grad_norm": 3.217104434967041, "learning_rate": 8.979681354116558e-06, "loss": 0.6682, "step": 19303 }, { "epoch": 0.23080142039000945, "grad_norm": 2.837461233139038, "learning_rate": 8.979564139119864e-06, "loss": 0.6568, "step": 19304 }, { "epoch": 0.2308133765348701, "grad_norm": 2.495558500289917, "learning_rate": 8.979446918155797e-06, "loss": 0.5516, "step": 19305 }, { "epoch": 0.23082533267973074, "grad_norm": 2.015165328979492, "learning_rate": 8.979329691224535e-06, "loss": 0.6998, "step": 19306 }, { "epoch": 0.2308372888245914, "grad_norm": 1.496838092803955, "learning_rate": 8.979212458326255e-06, "loss": 0.5875, "step": 19307 }, { "epoch": 0.23084924496945206, "grad_norm": 2.1301357746124268, "learning_rate": 8.979095219461132e-06, "loss": 0.6006, "step": 19308 }, { "epoch": 0.23086120111431271, "grad_norm": 3.568784236907959, "learning_rate": 8.978977974629339e-06, "loss": 0.6559, "step": 19309 }, { "epoch": 0.23087315725917334, "grad_norm": 5.386054039001465, "learning_rate": 8.978860723831058e-06, "loss": 0.5995, "step": 19310 }, { "epoch": 0.230885113404034, "grad_norm": 2.1822421550750732, "learning_rate": 8.97874346706646e-06, "loss": 0.7422, "step": 19311 }, { "epoch": 0.23089706954889466, "grad_norm": 3.0320045948028564, "learning_rate": 8.978626204335721e-06, "loss": 0.6414, "step": 19312 }, { "epoch": 0.2309090256937553, "grad_norm": 3.752964735031128, "learning_rate": 8.978508935639018e-06, "loss": 0.686, "step": 19313 }, { "epoch": 0.23092098183861595, "grad_norm": 2.4995839595794678, "learning_rate": 8.978391660976527e-06, "loss": 0.6469, "step": 19314 }, { "epoch": 0.2309329379834766, "grad_norm": 1.9304697513580322, "learning_rate": 8.978274380348424e-06, "loss": 0.5394, "step": 19315 }, { "epoch": 0.23094489412833727, "grad_norm": 1.771764874458313, "learning_rate": 8.978157093754884e-06, "loss": 0.6067, "step": 19316 }, { "epoch": 0.2309568502731979, "grad_norm": 3.3276898860931396, "learning_rate": 8.978039801196084e-06, "loss": 0.7039, "step": 19317 }, { "epoch": 0.23096880641805856, "grad_norm": 2.807359457015991, "learning_rate": 8.977922502672198e-06, "loss": 0.6798, "step": 19318 }, { "epoch": 0.23098076256291922, "grad_norm": 1.4581682682037354, "learning_rate": 8.977805198183403e-06, "loss": 0.5953, "step": 19319 }, { "epoch": 0.23099271870777988, "grad_norm": 6.374779224395752, "learning_rate": 8.977687887729877e-06, "loss": 0.5756, "step": 19320 }, { "epoch": 0.2310046748526405, "grad_norm": 2.4589779376983643, "learning_rate": 8.977570571311791e-06, "loss": 0.6675, "step": 19321 }, { "epoch": 0.23101663099750117, "grad_norm": 1.7869025468826294, "learning_rate": 8.977453248929327e-06, "loss": 0.6342, "step": 19322 }, { "epoch": 0.23102858714236182, "grad_norm": 4.445929050445557, "learning_rate": 8.977335920582655e-06, "loss": 0.7152, "step": 19323 }, { "epoch": 0.23104054328722246, "grad_norm": 1.7268964052200317, "learning_rate": 8.977218586271956e-06, "loss": 0.5565, "step": 19324 }, { "epoch": 0.2310524994320831, "grad_norm": 3.9314868450164795, "learning_rate": 8.977101245997401e-06, "loss": 0.5567, "step": 19325 }, { "epoch": 0.23106445557694377, "grad_norm": 2.425532579421997, "learning_rate": 8.97698389975917e-06, "loss": 0.6637, "step": 19326 }, { "epoch": 0.23107641172180443, "grad_norm": 1.947348952293396, "learning_rate": 8.976866547557438e-06, "loss": 0.5881, "step": 19327 }, { "epoch": 0.23108836786666506, "grad_norm": 2.9223544597625732, "learning_rate": 8.97674918939238e-06, "loss": 0.6438, "step": 19328 }, { "epoch": 0.23110032401152572, "grad_norm": 1.620719075202942, "learning_rate": 8.976631825264173e-06, "loss": 0.6094, "step": 19329 }, { "epoch": 0.23111228015638638, "grad_norm": 1.9159984588623047, "learning_rate": 8.976514455172991e-06, "loss": 0.6812, "step": 19330 }, { "epoch": 0.23112423630124704, "grad_norm": 2.8157289028167725, "learning_rate": 8.976397079119013e-06, "loss": 0.6139, "step": 19331 }, { "epoch": 0.23113619244610767, "grad_norm": 2.52490496635437, "learning_rate": 8.976279697102412e-06, "loss": 0.6147, "step": 19332 }, { "epoch": 0.23114814859096833, "grad_norm": 1.939958930015564, "learning_rate": 8.976162309123366e-06, "loss": 0.5856, "step": 19333 }, { "epoch": 0.231160104735829, "grad_norm": 3.0136024951934814, "learning_rate": 8.976044915182049e-06, "loss": 0.4805, "step": 19334 }, { "epoch": 0.23117206088068962, "grad_norm": 1.941467046737671, "learning_rate": 8.975927515278642e-06, "loss": 0.6915, "step": 19335 }, { "epoch": 0.23118401702555028, "grad_norm": 2.2371633052825928, "learning_rate": 8.975810109413315e-06, "loss": 0.6182, "step": 19336 }, { "epoch": 0.23119597317041093, "grad_norm": 1.8462998867034912, "learning_rate": 8.975692697586246e-06, "loss": 0.6125, "step": 19337 }, { "epoch": 0.2312079293152716, "grad_norm": 1.7766332626342773, "learning_rate": 8.975575279797614e-06, "loss": 0.6308, "step": 19338 }, { "epoch": 0.23121988546013222, "grad_norm": 3.747647523880005, "learning_rate": 8.975457856047592e-06, "loss": 0.7364, "step": 19339 }, { "epoch": 0.23123184160499288, "grad_norm": 4.39052677154541, "learning_rate": 8.975340426336356e-06, "loss": 0.6609, "step": 19340 }, { "epoch": 0.23124379774985354, "grad_norm": 1.6449203491210938, "learning_rate": 8.975222990664084e-06, "loss": 0.6412, "step": 19341 }, { "epoch": 0.2312557538947142, "grad_norm": 1.9239073991775513, "learning_rate": 8.975105549030951e-06, "loss": 0.5876, "step": 19342 }, { "epoch": 0.23126771003957483, "grad_norm": 9.932222366333008, "learning_rate": 8.974988101437132e-06, "loss": 0.6503, "step": 19343 }, { "epoch": 0.2312796661844355, "grad_norm": 3.0569071769714355, "learning_rate": 8.974870647882803e-06, "loss": 0.659, "step": 19344 }, { "epoch": 0.23129162232929615, "grad_norm": 3.929511547088623, "learning_rate": 8.974753188368144e-06, "loss": 0.5934, "step": 19345 }, { "epoch": 0.23130357847415678, "grad_norm": 2.5274765491485596, "learning_rate": 8.974635722893328e-06, "loss": 0.5715, "step": 19346 }, { "epoch": 0.23131553461901744, "grad_norm": 1.6635980606079102, "learning_rate": 8.97451825145853e-06, "loss": 0.5832, "step": 19347 }, { "epoch": 0.2313274907638781, "grad_norm": 1.8387174606323242, "learning_rate": 8.974400774063929e-06, "loss": 0.5754, "step": 19348 }, { "epoch": 0.23133944690873876, "grad_norm": 47.00367736816406, "learning_rate": 8.9742832907097e-06, "loss": 0.57, "step": 19349 }, { "epoch": 0.23135140305359939, "grad_norm": 3.850982427597046, "learning_rate": 8.974165801396019e-06, "loss": 0.5981, "step": 19350 }, { "epoch": 0.23136335919846004, "grad_norm": 4.287631511688232, "learning_rate": 8.974048306123063e-06, "loss": 0.6178, "step": 19351 }, { "epoch": 0.2313753153433207, "grad_norm": 2.8266441822052, "learning_rate": 8.973930804891006e-06, "loss": 0.5417, "step": 19352 }, { "epoch": 0.23138727148818136, "grad_norm": 7.515254020690918, "learning_rate": 8.973813297700025e-06, "loss": 0.5996, "step": 19353 }, { "epoch": 0.231399227633042, "grad_norm": 5.708799362182617, "learning_rate": 8.973695784550298e-06, "loss": 0.6821, "step": 19354 }, { "epoch": 0.23141118377790265, "grad_norm": 1.9416613578796387, "learning_rate": 8.973578265442e-06, "loss": 0.6279, "step": 19355 }, { "epoch": 0.2314231399227633, "grad_norm": 2.1264147758483887, "learning_rate": 8.973460740375308e-06, "loss": 0.5656, "step": 19356 }, { "epoch": 0.23143509606762397, "grad_norm": 2.340712785720825, "learning_rate": 8.973343209350397e-06, "loss": 0.5579, "step": 19357 }, { "epoch": 0.2314470522124846, "grad_norm": 11.197348594665527, "learning_rate": 8.973225672367444e-06, "loss": 0.6055, "step": 19358 }, { "epoch": 0.23145900835734526, "grad_norm": 1.9002012014389038, "learning_rate": 8.973108129426625e-06, "loss": 0.5734, "step": 19359 }, { "epoch": 0.23147096450220592, "grad_norm": 2.0625, "learning_rate": 8.972990580528114e-06, "loss": 0.6905, "step": 19360 }, { "epoch": 0.23148292064706655, "grad_norm": 2.4840104579925537, "learning_rate": 8.972873025672093e-06, "loss": 0.5452, "step": 19361 }, { "epoch": 0.2314948767919272, "grad_norm": 2.788085460662842, "learning_rate": 8.97275546485873e-06, "loss": 0.5705, "step": 19362 }, { "epoch": 0.23150683293678787, "grad_norm": 1.6242491006851196, "learning_rate": 8.972637898088211e-06, "loss": 0.5744, "step": 19363 }, { "epoch": 0.23151878908164852, "grad_norm": 2.2896902561187744, "learning_rate": 8.972520325360706e-06, "loss": 0.6347, "step": 19364 }, { "epoch": 0.23153074522650915, "grad_norm": 2.0716707706451416, "learning_rate": 8.972402746676392e-06, "loss": 0.5943, "step": 19365 }, { "epoch": 0.2315427013713698, "grad_norm": 3.189413547515869, "learning_rate": 8.972285162035446e-06, "loss": 0.6225, "step": 19366 }, { "epoch": 0.23155465751623047, "grad_norm": 2.0094175338745117, "learning_rate": 8.972167571438044e-06, "loss": 0.6528, "step": 19367 }, { "epoch": 0.23156661366109113, "grad_norm": 3.2098679542541504, "learning_rate": 8.972049974884362e-06, "loss": 0.6177, "step": 19368 }, { "epoch": 0.23157856980595176, "grad_norm": 2.716045379638672, "learning_rate": 8.971932372374578e-06, "loss": 0.6022, "step": 19369 }, { "epoch": 0.23159052595081242, "grad_norm": 5.744541168212891, "learning_rate": 8.971814763908869e-06, "loss": 0.7, "step": 19370 }, { "epoch": 0.23160248209567308, "grad_norm": 4.389998435974121, "learning_rate": 8.971697149487406e-06, "loss": 0.5758, "step": 19371 }, { "epoch": 0.2316144382405337, "grad_norm": 1.7642340660095215, "learning_rate": 8.971579529110372e-06, "loss": 0.6937, "step": 19372 }, { "epoch": 0.23162639438539437, "grad_norm": 1.8444751501083374, "learning_rate": 8.971461902777938e-06, "loss": 0.6122, "step": 19373 }, { "epoch": 0.23163835053025503, "grad_norm": 2.224632740020752, "learning_rate": 8.971344270490285e-06, "loss": 0.6556, "step": 19374 }, { "epoch": 0.23165030667511569, "grad_norm": 1.6172796487808228, "learning_rate": 8.971226632247587e-06, "loss": 0.5384, "step": 19375 }, { "epoch": 0.23166226281997632, "grad_norm": 2.4279944896698, "learning_rate": 8.97110898805002e-06, "loss": 0.594, "step": 19376 }, { "epoch": 0.23167421896483698, "grad_norm": 11.361741065979004, "learning_rate": 8.970991337897758e-06, "loss": 0.6213, "step": 19377 }, { "epoch": 0.23168617510969763, "grad_norm": 3.361095428466797, "learning_rate": 8.970873681790984e-06, "loss": 0.608, "step": 19378 }, { "epoch": 0.2316981312545583, "grad_norm": 3.5832107067108154, "learning_rate": 8.97075601972987e-06, "loss": 0.6657, "step": 19379 }, { "epoch": 0.23171008739941892, "grad_norm": 1.7639617919921875, "learning_rate": 8.970638351714593e-06, "loss": 0.642, "step": 19380 }, { "epoch": 0.23172204354427958, "grad_norm": 2.7086613178253174, "learning_rate": 8.970520677745328e-06, "loss": 0.5445, "step": 19381 }, { "epoch": 0.23173399968914024, "grad_norm": 2.243976593017578, "learning_rate": 8.970402997822256e-06, "loss": 0.6018, "step": 19382 }, { "epoch": 0.23174595583400087, "grad_norm": 3.4397835731506348, "learning_rate": 8.97028531194555e-06, "loss": 0.6333, "step": 19383 }, { "epoch": 0.23175791197886153, "grad_norm": 3.489182233810425, "learning_rate": 8.970167620115386e-06, "loss": 0.6565, "step": 19384 }, { "epoch": 0.2317698681237222, "grad_norm": 3.8220701217651367, "learning_rate": 8.970049922331943e-06, "loss": 0.6482, "step": 19385 }, { "epoch": 0.23178182426858285, "grad_norm": 1.9798195362091064, "learning_rate": 8.969932218595395e-06, "loss": 0.5927, "step": 19386 }, { "epoch": 0.23179378041344348, "grad_norm": 1.7477658987045288, "learning_rate": 8.96981450890592e-06, "loss": 0.5247, "step": 19387 }, { "epoch": 0.23180573655830414, "grad_norm": 2.5539748668670654, "learning_rate": 8.969696793263693e-06, "loss": 0.6443, "step": 19388 }, { "epoch": 0.2318176927031648, "grad_norm": 1.4656504392623901, "learning_rate": 8.969579071668893e-06, "loss": 0.5436, "step": 19389 }, { "epoch": 0.23182964884802545, "grad_norm": 2.466179847717285, "learning_rate": 8.969461344121693e-06, "loss": 0.5758, "step": 19390 }, { "epoch": 0.23184160499288609, "grad_norm": 2.0637590885162354, "learning_rate": 8.969343610622274e-06, "loss": 0.6431, "step": 19391 }, { "epoch": 0.23185356113774674, "grad_norm": 2.116440534591675, "learning_rate": 8.969225871170808e-06, "loss": 0.585, "step": 19392 }, { "epoch": 0.2318655172826074, "grad_norm": 2.788036346435547, "learning_rate": 8.969108125767474e-06, "loss": 0.5495, "step": 19393 }, { "epoch": 0.23187747342746803, "grad_norm": 1.8556944131851196, "learning_rate": 8.96899037441245e-06, "loss": 0.6582, "step": 19394 }, { "epoch": 0.2318894295723287, "grad_norm": 2.8088908195495605, "learning_rate": 8.96887261710591e-06, "loss": 0.6129, "step": 19395 }, { "epoch": 0.23190138571718935, "grad_norm": 6.190965175628662, "learning_rate": 8.96875485384803e-06, "loss": 0.6151, "step": 19396 }, { "epoch": 0.23191334186205, "grad_norm": 2.991055727005005, "learning_rate": 8.968637084638988e-06, "loss": 0.6276, "step": 19397 }, { "epoch": 0.23192529800691064, "grad_norm": 3.405413866043091, "learning_rate": 8.968519309478962e-06, "loss": 0.6279, "step": 19398 }, { "epoch": 0.2319372541517713, "grad_norm": 3.330209493637085, "learning_rate": 8.968401528368129e-06, "loss": 0.6365, "step": 19399 }, { "epoch": 0.23194921029663196, "grad_norm": 2.3327391147613525, "learning_rate": 8.968283741306661e-06, "loss": 0.6534, "step": 19400 }, { "epoch": 0.23196116644149262, "grad_norm": 1.8871562480926514, "learning_rate": 8.968165948294737e-06, "loss": 0.6389, "step": 19401 }, { "epoch": 0.23197312258635325, "grad_norm": 1.5754791498184204, "learning_rate": 8.968048149332536e-06, "loss": 0.5166, "step": 19402 }, { "epoch": 0.2319850787312139, "grad_norm": 3.056485891342163, "learning_rate": 8.967930344420232e-06, "loss": 0.6514, "step": 19403 }, { "epoch": 0.23199703487607456, "grad_norm": 23.29018211364746, "learning_rate": 8.967812533558002e-06, "loss": 0.6369, "step": 19404 }, { "epoch": 0.23200899102093522, "grad_norm": 1.7604377269744873, "learning_rate": 8.967694716746023e-06, "loss": 0.5534, "step": 19405 }, { "epoch": 0.23202094716579585, "grad_norm": 2.8721039295196533, "learning_rate": 8.967576893984473e-06, "loss": 0.6897, "step": 19406 }, { "epoch": 0.2320329033106565, "grad_norm": 1.8150075674057007, "learning_rate": 8.967459065273526e-06, "loss": 0.6013, "step": 19407 }, { "epoch": 0.23204485945551717, "grad_norm": 1.5920900106430054, "learning_rate": 8.96734123061336e-06, "loss": 0.512, "step": 19408 }, { "epoch": 0.2320568156003778, "grad_norm": 2.305673599243164, "learning_rate": 8.967223390004151e-06, "loss": 0.6017, "step": 19409 }, { "epoch": 0.23206877174523846, "grad_norm": 3.4418020248413086, "learning_rate": 8.96710554344608e-06, "loss": 0.733, "step": 19410 }, { "epoch": 0.23208072789009912, "grad_norm": 4.342772006988525, "learning_rate": 8.966987690939317e-06, "loss": 0.6536, "step": 19411 }, { "epoch": 0.23209268403495978, "grad_norm": 2.6563026905059814, "learning_rate": 8.966869832484042e-06, "loss": 0.6016, "step": 19412 }, { "epoch": 0.2321046401798204, "grad_norm": 5.842294216156006, "learning_rate": 8.966751968080431e-06, "loss": 0.5202, "step": 19413 }, { "epoch": 0.23211659632468107, "grad_norm": 3.389324426651001, "learning_rate": 8.966634097728664e-06, "loss": 0.6203, "step": 19414 }, { "epoch": 0.23212855246954173, "grad_norm": 5.377752304077148, "learning_rate": 8.966516221428912e-06, "loss": 0.6405, "step": 19415 }, { "epoch": 0.23214050861440239, "grad_norm": 2.4281532764434814, "learning_rate": 8.966398339181358e-06, "loss": 0.6531, "step": 19416 }, { "epoch": 0.23215246475926302, "grad_norm": 2.3201944828033447, "learning_rate": 8.966280450986173e-06, "loss": 0.7251, "step": 19417 }, { "epoch": 0.23216442090412368, "grad_norm": 3.6878669261932373, "learning_rate": 8.966162556843535e-06, "loss": 0.6004, "step": 19418 }, { "epoch": 0.23217637704898433, "grad_norm": 2.179828405380249, "learning_rate": 8.966044656753625e-06, "loss": 0.5709, "step": 19419 }, { "epoch": 0.23218833319384496, "grad_norm": 3.879408836364746, "learning_rate": 8.965926750716619e-06, "loss": 0.6217, "step": 19420 }, { "epoch": 0.23220028933870562, "grad_norm": 4.296900749206543, "learning_rate": 8.965808838732688e-06, "loss": 0.5811, "step": 19421 }, { "epoch": 0.23221224548356628, "grad_norm": 5.416468143463135, "learning_rate": 8.965690920802013e-06, "loss": 0.6125, "step": 19422 }, { "epoch": 0.23222420162842694, "grad_norm": 2.3983025550842285, "learning_rate": 8.965572996924771e-06, "loss": 0.5497, "step": 19423 }, { "epoch": 0.23223615777328757, "grad_norm": 2.062148094177246, "learning_rate": 8.965455067101138e-06, "loss": 0.5393, "step": 19424 }, { "epoch": 0.23224811391814823, "grad_norm": 7.0456132888793945, "learning_rate": 8.965337131331291e-06, "loss": 0.572, "step": 19425 }, { "epoch": 0.2322600700630089, "grad_norm": 3.578927516937256, "learning_rate": 8.965219189615407e-06, "loss": 0.6065, "step": 19426 }, { "epoch": 0.23227202620786955, "grad_norm": 2.7062671184539795, "learning_rate": 8.965101241953665e-06, "loss": 0.606, "step": 19427 }, { "epoch": 0.23228398235273018, "grad_norm": 2.9527573585510254, "learning_rate": 8.964983288346236e-06, "loss": 0.513, "step": 19428 }, { "epoch": 0.23229593849759084, "grad_norm": 3.326646566390991, "learning_rate": 8.964865328793301e-06, "loss": 0.5261, "step": 19429 }, { "epoch": 0.2323078946424515, "grad_norm": 2.4939253330230713, "learning_rate": 8.964747363295038e-06, "loss": 0.5741, "step": 19430 }, { "epoch": 0.23231985078731213, "grad_norm": 1.4901233911514282, "learning_rate": 8.96462939185162e-06, "loss": 0.5791, "step": 19431 }, { "epoch": 0.23233180693217279, "grad_norm": 2.4667046070098877, "learning_rate": 8.964511414463228e-06, "loss": 0.6341, "step": 19432 }, { "epoch": 0.23234376307703344, "grad_norm": 1.6552141904830933, "learning_rate": 8.964393431130037e-06, "loss": 0.5847, "step": 19433 }, { "epoch": 0.2323557192218941, "grad_norm": 1.8147794008255005, "learning_rate": 8.964275441852224e-06, "loss": 0.4851, "step": 19434 }, { "epoch": 0.23236767536675473, "grad_norm": 2.399404525756836, "learning_rate": 8.964157446629965e-06, "loss": 0.6402, "step": 19435 }, { "epoch": 0.2323796315116154, "grad_norm": 1.4008562564849854, "learning_rate": 8.964039445463439e-06, "loss": 0.5617, "step": 19436 }, { "epoch": 0.23239158765647605, "grad_norm": 5.722958087921143, "learning_rate": 8.963921438352821e-06, "loss": 0.4778, "step": 19437 }, { "epoch": 0.2324035438013367, "grad_norm": 2.0044007301330566, "learning_rate": 8.963803425298289e-06, "loss": 0.5991, "step": 19438 }, { "epoch": 0.23241549994619734, "grad_norm": 5.369009971618652, "learning_rate": 8.963685406300018e-06, "loss": 0.7493, "step": 19439 }, { "epoch": 0.232427456091058, "grad_norm": 2.0180578231811523, "learning_rate": 8.963567381358188e-06, "loss": 0.5513, "step": 19440 }, { "epoch": 0.23243941223591866, "grad_norm": 3.1928038597106934, "learning_rate": 8.963449350472974e-06, "loss": 0.7038, "step": 19441 }, { "epoch": 0.2324513683807793, "grad_norm": 1.9750381708145142, "learning_rate": 8.963331313644556e-06, "loss": 0.6375, "step": 19442 }, { "epoch": 0.23246332452563995, "grad_norm": 1.75736665725708, "learning_rate": 8.963213270873106e-06, "loss": 0.6104, "step": 19443 }, { "epoch": 0.2324752806705006, "grad_norm": 2.15095591545105, "learning_rate": 8.963095222158806e-06, "loss": 0.6682, "step": 19444 }, { "epoch": 0.23248723681536126, "grad_norm": 3.0593643188476562, "learning_rate": 8.962977167501828e-06, "loss": 0.6513, "step": 19445 }, { "epoch": 0.2324991929602219, "grad_norm": 1.851003646850586, "learning_rate": 8.962859106902353e-06, "loss": 0.6358, "step": 19446 }, { "epoch": 0.23251114910508255, "grad_norm": 5.527955532073975, "learning_rate": 8.962741040360556e-06, "loss": 0.6713, "step": 19447 }, { "epoch": 0.2325231052499432, "grad_norm": 5.870550632476807, "learning_rate": 8.962622967876615e-06, "loss": 0.6382, "step": 19448 }, { "epoch": 0.23253506139480387, "grad_norm": 2.8329708576202393, "learning_rate": 8.962504889450708e-06, "loss": 0.5818, "step": 19449 }, { "epoch": 0.2325470175396645, "grad_norm": 2.2092714309692383, "learning_rate": 8.96238680508301e-06, "loss": 0.6874, "step": 19450 }, { "epoch": 0.23255897368452516, "grad_norm": 1.5067976713180542, "learning_rate": 8.962268714773699e-06, "loss": 0.5624, "step": 19451 }, { "epoch": 0.23257092982938582, "grad_norm": 5.273191452026367, "learning_rate": 8.962150618522952e-06, "loss": 0.6329, "step": 19452 }, { "epoch": 0.23258288597424645, "grad_norm": 1.8101460933685303, "learning_rate": 8.962032516330945e-06, "loss": 0.6275, "step": 19453 }, { "epoch": 0.2325948421191071, "grad_norm": 2.0918121337890625, "learning_rate": 8.961914408197857e-06, "loss": 0.6985, "step": 19454 }, { "epoch": 0.23260679826396777, "grad_norm": 2.1273062229156494, "learning_rate": 8.961796294123863e-06, "loss": 0.6868, "step": 19455 }, { "epoch": 0.23261875440882843, "grad_norm": 3.2735114097595215, "learning_rate": 8.961678174109143e-06, "loss": 0.5793, "step": 19456 }, { "epoch": 0.23263071055368906, "grad_norm": 2.3532135486602783, "learning_rate": 8.961560048153872e-06, "loss": 0.5893, "step": 19457 }, { "epoch": 0.23264266669854972, "grad_norm": 2.94413423538208, "learning_rate": 8.961441916258228e-06, "loss": 0.5813, "step": 19458 }, { "epoch": 0.23265462284341037, "grad_norm": 1.5435068607330322, "learning_rate": 8.961323778422389e-06, "loss": 0.6192, "step": 19459 }, { "epoch": 0.23266657898827103, "grad_norm": 5.406652927398682, "learning_rate": 8.961205634646529e-06, "loss": 0.6338, "step": 19460 }, { "epoch": 0.23267853513313166, "grad_norm": 2.4368197917938232, "learning_rate": 8.961087484930825e-06, "loss": 0.5806, "step": 19461 }, { "epoch": 0.23269049127799232, "grad_norm": 2.4395740032196045, "learning_rate": 8.960969329275459e-06, "loss": 0.5667, "step": 19462 }, { "epoch": 0.23270244742285298, "grad_norm": 2.8763480186462402, "learning_rate": 8.960851167680606e-06, "loss": 0.674, "step": 19463 }, { "epoch": 0.23271440356771364, "grad_norm": 2.022686243057251, "learning_rate": 8.960733000146443e-06, "loss": 0.6113, "step": 19464 }, { "epoch": 0.23272635971257427, "grad_norm": 3.1952977180480957, "learning_rate": 8.960614826673143e-06, "loss": 0.6396, "step": 19465 }, { "epoch": 0.23273831585743493, "grad_norm": 1.8094995021820068, "learning_rate": 8.96049664726089e-06, "loss": 0.5897, "step": 19466 }, { "epoch": 0.2327502720022956, "grad_norm": 3.451672315597534, "learning_rate": 8.960378461909858e-06, "loss": 0.6132, "step": 19467 }, { "epoch": 0.23276222814715622, "grad_norm": 1.774794101715088, "learning_rate": 8.960260270620225e-06, "loss": 0.5552, "step": 19468 }, { "epoch": 0.23277418429201688, "grad_norm": 6.362673282623291, "learning_rate": 8.960142073392167e-06, "loss": 0.6177, "step": 19469 }, { "epoch": 0.23278614043687754, "grad_norm": 2.509434461593628, "learning_rate": 8.96002387022586e-06, "loss": 0.5696, "step": 19470 }, { "epoch": 0.2327980965817382, "grad_norm": 3.038935422897339, "learning_rate": 8.959905661121484e-06, "loss": 0.6784, "step": 19471 }, { "epoch": 0.23281005272659883, "grad_norm": 4.650693416595459, "learning_rate": 8.959787446079218e-06, "loss": 0.6273, "step": 19472 }, { "epoch": 0.23282200887145948, "grad_norm": 2.003566265106201, "learning_rate": 8.959669225099235e-06, "loss": 0.5807, "step": 19473 }, { "epoch": 0.23283396501632014, "grad_norm": 5.056605339050293, "learning_rate": 8.959550998181712e-06, "loss": 0.643, "step": 19474 }, { "epoch": 0.2328459211611808, "grad_norm": 3.4943718910217285, "learning_rate": 8.959432765326831e-06, "loss": 0.6045, "step": 19475 }, { "epoch": 0.23285787730604143, "grad_norm": 2.8597984313964844, "learning_rate": 8.959314526534766e-06, "loss": 0.692, "step": 19476 }, { "epoch": 0.2328698334509021, "grad_norm": 3.3387978076934814, "learning_rate": 8.959196281805693e-06, "loss": 0.6765, "step": 19477 }, { "epoch": 0.23288178959576275, "grad_norm": 3.148949384689331, "learning_rate": 8.959078031139793e-06, "loss": 0.5702, "step": 19478 }, { "epoch": 0.23289374574062338, "grad_norm": 12.858283042907715, "learning_rate": 8.95895977453724e-06, "loss": 0.5337, "step": 19479 }, { "epoch": 0.23290570188548404, "grad_norm": 1.7239148616790771, "learning_rate": 8.958841511998213e-06, "loss": 0.5164, "step": 19480 }, { "epoch": 0.2329176580303447, "grad_norm": 2.2112035751342773, "learning_rate": 8.95872324352289e-06, "loss": 0.6335, "step": 19481 }, { "epoch": 0.23292961417520536, "grad_norm": 1.6283161640167236, "learning_rate": 8.958604969111446e-06, "loss": 0.6463, "step": 19482 }, { "epoch": 0.232941570320066, "grad_norm": 1.6371115446090698, "learning_rate": 8.95848668876406e-06, "loss": 0.6634, "step": 19483 }, { "epoch": 0.23295352646492665, "grad_norm": 1.6063400506973267, "learning_rate": 8.958368402480911e-06, "loss": 0.6592, "step": 19484 }, { "epoch": 0.2329654826097873, "grad_norm": 5.032799243927002, "learning_rate": 8.958250110262174e-06, "loss": 0.5668, "step": 19485 }, { "epoch": 0.23297743875464796, "grad_norm": 2.827086925506592, "learning_rate": 8.958131812108027e-06, "loss": 0.643, "step": 19486 }, { "epoch": 0.2329893948995086, "grad_norm": 3.6239655017852783, "learning_rate": 8.958013508018646e-06, "loss": 0.6174, "step": 19487 }, { "epoch": 0.23300135104436925, "grad_norm": 3.20645809173584, "learning_rate": 8.957895197994211e-06, "loss": 0.5985, "step": 19488 }, { "epoch": 0.2330133071892299, "grad_norm": 1.6494466066360474, "learning_rate": 8.957776882034897e-06, "loss": 0.5672, "step": 19489 }, { "epoch": 0.23302526333409054, "grad_norm": 3.524062156677246, "learning_rate": 8.957658560140881e-06, "loss": 0.6092, "step": 19490 }, { "epoch": 0.2330372194789512, "grad_norm": 4.288513660430908, "learning_rate": 8.957540232312345e-06, "loss": 0.5618, "step": 19491 }, { "epoch": 0.23304917562381186, "grad_norm": 2.421651601791382, "learning_rate": 8.957421898549461e-06, "loss": 0.6366, "step": 19492 }, { "epoch": 0.23306113176867252, "grad_norm": 2.126462459564209, "learning_rate": 8.957303558852411e-06, "loss": 0.6444, "step": 19493 }, { "epoch": 0.23307308791353315, "grad_norm": 1.976438283920288, "learning_rate": 8.95718521322137e-06, "loss": 0.6637, "step": 19494 }, { "epoch": 0.2330850440583938, "grad_norm": 3.8990297317504883, "learning_rate": 8.957066861656513e-06, "loss": 0.6325, "step": 19495 }, { "epoch": 0.23309700020325447, "grad_norm": 2.097841501235962, "learning_rate": 8.956948504158024e-06, "loss": 0.6619, "step": 19496 }, { "epoch": 0.23310895634811513, "grad_norm": 1.6264913082122803, "learning_rate": 8.956830140726073e-06, "loss": 0.5815, "step": 19497 }, { "epoch": 0.23312091249297576, "grad_norm": 5.414178848266602, "learning_rate": 8.956711771360843e-06, "loss": 0.6786, "step": 19498 }, { "epoch": 0.23313286863783642, "grad_norm": 2.07851505279541, "learning_rate": 8.95659339606251e-06, "loss": 0.5947, "step": 19499 }, { "epoch": 0.23314482478269707, "grad_norm": 2.778744697570801, "learning_rate": 8.95647501483125e-06, "loss": 0.6927, "step": 19500 }, { "epoch": 0.2331567809275577, "grad_norm": 1.8081836700439453, "learning_rate": 8.956356627667242e-06, "loss": 0.6555, "step": 19501 }, { "epoch": 0.23316873707241836, "grad_norm": 1.8368202447891235, "learning_rate": 8.956238234570665e-06, "loss": 0.6562, "step": 19502 }, { "epoch": 0.23318069321727902, "grad_norm": 3.567254066467285, "learning_rate": 8.956119835541693e-06, "loss": 0.6272, "step": 19503 }, { "epoch": 0.23319264936213968, "grad_norm": 2.8914296627044678, "learning_rate": 8.956001430580504e-06, "loss": 0.6362, "step": 19504 }, { "epoch": 0.2332046055070003, "grad_norm": 2.6274993419647217, "learning_rate": 8.955883019687279e-06, "loss": 0.6136, "step": 19505 }, { "epoch": 0.23321656165186097, "grad_norm": 2.2651093006134033, "learning_rate": 8.955764602862191e-06, "loss": 0.6031, "step": 19506 }, { "epoch": 0.23322851779672163, "grad_norm": 1.9267897605895996, "learning_rate": 8.955646180105422e-06, "loss": 0.5673, "step": 19507 }, { "epoch": 0.2332404739415823, "grad_norm": 2.1176257133483887, "learning_rate": 8.955527751417147e-06, "loss": 0.5528, "step": 19508 }, { "epoch": 0.23325243008644292, "grad_norm": 1.9473246335983276, "learning_rate": 8.955409316797543e-06, "loss": 0.6751, "step": 19509 }, { "epoch": 0.23326438623130358, "grad_norm": 2.8045103549957275, "learning_rate": 8.95529087624679e-06, "loss": 0.6369, "step": 19510 }, { "epoch": 0.23327634237616424, "grad_norm": 2.2230477333068848, "learning_rate": 8.955172429765065e-06, "loss": 0.6901, "step": 19511 }, { "epoch": 0.23328829852102487, "grad_norm": 2.1007790565490723, "learning_rate": 8.955053977352544e-06, "loss": 0.5908, "step": 19512 }, { "epoch": 0.23330025466588553, "grad_norm": 2.065664052963257, "learning_rate": 8.954935519009405e-06, "loss": 0.6117, "step": 19513 }, { "epoch": 0.23331221081074618, "grad_norm": 2.3584539890289307, "learning_rate": 8.954817054735826e-06, "loss": 0.61, "step": 19514 }, { "epoch": 0.23332416695560684, "grad_norm": 3.1387176513671875, "learning_rate": 8.954698584531984e-06, "loss": 0.5992, "step": 19515 }, { "epoch": 0.23333612310046747, "grad_norm": 2.4490487575531006, "learning_rate": 8.95458010839806e-06, "loss": 0.6797, "step": 19516 }, { "epoch": 0.23334807924532813, "grad_norm": 8.852270126342773, "learning_rate": 8.954461626334227e-06, "loss": 0.5627, "step": 19517 }, { "epoch": 0.2333600353901888, "grad_norm": 2.085581064224243, "learning_rate": 8.954343138340666e-06, "loss": 0.536, "step": 19518 }, { "epoch": 0.23337199153504945, "grad_norm": 1.715774655342102, "learning_rate": 8.954224644417552e-06, "loss": 0.5982, "step": 19519 }, { "epoch": 0.23338394767991008, "grad_norm": 1.7152776718139648, "learning_rate": 8.954106144565066e-06, "loss": 0.5771, "step": 19520 }, { "epoch": 0.23339590382477074, "grad_norm": 4.9518513679504395, "learning_rate": 8.953987638783382e-06, "loss": 0.6076, "step": 19521 }, { "epoch": 0.2334078599696314, "grad_norm": 2.4074087142944336, "learning_rate": 8.95386912707268e-06, "loss": 0.6474, "step": 19522 }, { "epoch": 0.23341981611449206, "grad_norm": 5.73890495300293, "learning_rate": 8.953750609433138e-06, "loss": 0.5828, "step": 19523 }, { "epoch": 0.2334317722593527, "grad_norm": 2.289309024810791, "learning_rate": 8.953632085864932e-06, "loss": 0.5742, "step": 19524 }, { "epoch": 0.23344372840421335, "grad_norm": 3.678901195526123, "learning_rate": 8.95351355636824e-06, "loss": 0.52, "step": 19525 }, { "epoch": 0.233455684549074, "grad_norm": 1.3915225267410278, "learning_rate": 8.953395020943241e-06, "loss": 0.601, "step": 19526 }, { "epoch": 0.23346764069393464, "grad_norm": 1.7001371383666992, "learning_rate": 8.953276479590115e-06, "loss": 0.5815, "step": 19527 }, { "epoch": 0.2334795968387953, "grad_norm": 1.93459951877594, "learning_rate": 8.953157932309033e-06, "loss": 0.5616, "step": 19528 }, { "epoch": 0.23349155298365595, "grad_norm": 1.8571693897247314, "learning_rate": 8.953039379100177e-06, "loss": 0.6202, "step": 19529 }, { "epoch": 0.2335035091285166, "grad_norm": 1.6634790897369385, "learning_rate": 8.952920819963726e-06, "loss": 0.7279, "step": 19530 }, { "epoch": 0.23351546527337724, "grad_norm": 2.3313348293304443, "learning_rate": 8.952802254899855e-06, "loss": 0.5454, "step": 19531 }, { "epoch": 0.2335274214182379, "grad_norm": 1.9134331941604614, "learning_rate": 8.952683683908744e-06, "loss": 0.5434, "step": 19532 }, { "epoch": 0.23353937756309856, "grad_norm": 2.185729503631592, "learning_rate": 8.95256510699057e-06, "loss": 0.5369, "step": 19533 }, { "epoch": 0.23355133370795922, "grad_norm": 1.7700364589691162, "learning_rate": 8.952446524145509e-06, "loss": 0.5577, "step": 19534 }, { "epoch": 0.23356328985281985, "grad_norm": 1.9714621305465698, "learning_rate": 8.952327935373741e-06, "loss": 0.6191, "step": 19535 }, { "epoch": 0.2335752459976805, "grad_norm": 2.110694169998169, "learning_rate": 8.952209340675444e-06, "loss": 0.6086, "step": 19536 }, { "epoch": 0.23358720214254117, "grad_norm": 2.161220073699951, "learning_rate": 8.952090740050793e-06, "loss": 0.5936, "step": 19537 }, { "epoch": 0.2335991582874018, "grad_norm": 1.7248669862747192, "learning_rate": 8.95197213349997e-06, "loss": 0.5814, "step": 19538 }, { "epoch": 0.23361111443226246, "grad_norm": 2.134963035583496, "learning_rate": 8.95185352102315e-06, "loss": 0.5842, "step": 19539 }, { "epoch": 0.23362307057712312, "grad_norm": 1.787798285484314, "learning_rate": 8.951734902620512e-06, "loss": 0.5916, "step": 19540 }, { "epoch": 0.23363502672198377, "grad_norm": 1.500524640083313, "learning_rate": 8.951616278292232e-06, "loss": 0.5871, "step": 19541 }, { "epoch": 0.2336469828668444, "grad_norm": 1.9821809530258179, "learning_rate": 8.951497648038491e-06, "loss": 0.6117, "step": 19542 }, { "epoch": 0.23365893901170506, "grad_norm": 1.9366379976272583, "learning_rate": 8.951379011859464e-06, "loss": 0.6577, "step": 19543 }, { "epoch": 0.23367089515656572, "grad_norm": 1.9178626537322998, "learning_rate": 8.951260369755332e-06, "loss": 0.544, "step": 19544 }, { "epoch": 0.23368285130142638, "grad_norm": 1.5071313381195068, "learning_rate": 8.95114172172627e-06, "loss": 0.6575, "step": 19545 }, { "epoch": 0.233694807446287, "grad_norm": 1.7667264938354492, "learning_rate": 8.951023067772457e-06, "loss": 0.6605, "step": 19546 }, { "epoch": 0.23370676359114767, "grad_norm": 1.8047504425048828, "learning_rate": 8.95090440789407e-06, "loss": 0.5969, "step": 19547 }, { "epoch": 0.23371871973600833, "grad_norm": 1.5176676511764526, "learning_rate": 8.950785742091287e-06, "loss": 0.6421, "step": 19548 }, { "epoch": 0.23373067588086896, "grad_norm": 1.8872196674346924, "learning_rate": 8.95066707036429e-06, "loss": 0.6495, "step": 19549 }, { "epoch": 0.23374263202572962, "grad_norm": 3.311959981918335, "learning_rate": 8.950548392713251e-06, "loss": 0.6647, "step": 19550 }, { "epoch": 0.23375458817059028, "grad_norm": 5.165584564208984, "learning_rate": 8.950429709138353e-06, "loss": 0.6078, "step": 19551 }, { "epoch": 0.23376654431545094, "grad_norm": 4.450503349304199, "learning_rate": 8.95031101963977e-06, "loss": 0.5927, "step": 19552 }, { "epoch": 0.23377850046031157, "grad_norm": 2.122905969619751, "learning_rate": 8.95019232421768e-06, "loss": 0.697, "step": 19553 }, { "epoch": 0.23379045660517223, "grad_norm": 1.7670694589614868, "learning_rate": 8.950073622872265e-06, "loss": 0.6478, "step": 19554 }, { "epoch": 0.23380241275003288, "grad_norm": 2.592221975326538, "learning_rate": 8.9499549156037e-06, "loss": 0.6377, "step": 19555 }, { "epoch": 0.23381436889489354, "grad_norm": 1.9452475309371948, "learning_rate": 8.949836202412164e-06, "loss": 0.5852, "step": 19556 }, { "epoch": 0.23382632503975417, "grad_norm": 1.4948005676269531, "learning_rate": 8.949717483297833e-06, "loss": 0.6709, "step": 19557 }, { "epoch": 0.23383828118461483, "grad_norm": 1.6685711145401, "learning_rate": 8.949598758260888e-06, "loss": 0.6888, "step": 19558 }, { "epoch": 0.2338502373294755, "grad_norm": 1.128739833831787, "learning_rate": 8.949480027301505e-06, "loss": 0.558, "step": 19559 }, { "epoch": 0.23386219347433612, "grad_norm": 3.6989972591400146, "learning_rate": 8.949361290419862e-06, "loss": 0.5144, "step": 19560 }, { "epoch": 0.23387414961919678, "grad_norm": 2.4376933574676514, "learning_rate": 8.949242547616139e-06, "loss": 0.5756, "step": 19561 }, { "epoch": 0.23388610576405744, "grad_norm": 5.3146772384643555, "learning_rate": 8.949123798890513e-06, "loss": 0.6512, "step": 19562 }, { "epoch": 0.2338980619089181, "grad_norm": 1.6772310733795166, "learning_rate": 8.94900504424316e-06, "loss": 0.6618, "step": 19563 }, { "epoch": 0.23391001805377873, "grad_norm": 2.389944314956665, "learning_rate": 8.948886283674261e-06, "loss": 0.6742, "step": 19564 }, { "epoch": 0.2339219741986394, "grad_norm": 3.6912243366241455, "learning_rate": 8.948767517183994e-06, "loss": 0.6049, "step": 19565 }, { "epoch": 0.23393393034350005, "grad_norm": 2.459834337234497, "learning_rate": 8.948648744772534e-06, "loss": 0.6336, "step": 19566 }, { "epoch": 0.2339458864883607, "grad_norm": 2.4821743965148926, "learning_rate": 8.948529966440063e-06, "loss": 0.4929, "step": 19567 }, { "epoch": 0.23395784263322134, "grad_norm": 3.9673643112182617, "learning_rate": 8.948411182186757e-06, "loss": 0.5404, "step": 19568 }, { "epoch": 0.233969798778082, "grad_norm": 2.072410821914673, "learning_rate": 8.948292392012794e-06, "loss": 0.5292, "step": 19569 }, { "epoch": 0.23398175492294265, "grad_norm": 2.3073689937591553, "learning_rate": 8.948173595918352e-06, "loss": 0.7191, "step": 19570 }, { "epoch": 0.23399371106780328, "grad_norm": 1.9917398691177368, "learning_rate": 8.948054793903611e-06, "loss": 0.6157, "step": 19571 }, { "epoch": 0.23400566721266394, "grad_norm": 1.5667028427124023, "learning_rate": 8.947935985968746e-06, "loss": 0.555, "step": 19572 }, { "epoch": 0.2340176233575246, "grad_norm": 1.6467833518981934, "learning_rate": 8.947817172113938e-06, "loss": 0.5888, "step": 19573 }, { "epoch": 0.23402957950238526, "grad_norm": 3.1156723499298096, "learning_rate": 8.947698352339365e-06, "loss": 0.6184, "step": 19574 }, { "epoch": 0.2340415356472459, "grad_norm": 2.010744333267212, "learning_rate": 8.947579526645203e-06, "loss": 0.5712, "step": 19575 }, { "epoch": 0.23405349179210655, "grad_norm": 2.023127555847168, "learning_rate": 8.94746069503163e-06, "loss": 0.6351, "step": 19576 }, { "epoch": 0.2340654479369672, "grad_norm": 2.0889482498168945, "learning_rate": 8.947341857498827e-06, "loss": 0.6293, "step": 19577 }, { "epoch": 0.23407740408182787, "grad_norm": 1.5218946933746338, "learning_rate": 8.947223014046973e-06, "loss": 0.6339, "step": 19578 }, { "epoch": 0.2340893602266885, "grad_norm": 3.4611685276031494, "learning_rate": 8.947104164676241e-06, "loss": 0.5808, "step": 19579 }, { "epoch": 0.23410131637154916, "grad_norm": 2.074542284011841, "learning_rate": 8.946985309386815e-06, "loss": 0.5688, "step": 19580 }, { "epoch": 0.23411327251640981, "grad_norm": 3.3902957439422607, "learning_rate": 8.946866448178869e-06, "loss": 0.7158, "step": 19581 }, { "epoch": 0.23412522866127047, "grad_norm": 2.243574857711792, "learning_rate": 8.946747581052585e-06, "loss": 0.614, "step": 19582 }, { "epoch": 0.2341371848061311, "grad_norm": 1.8314930200576782, "learning_rate": 8.946628708008135e-06, "loss": 0.5999, "step": 19583 }, { "epoch": 0.23414914095099176, "grad_norm": 1.640385389328003, "learning_rate": 8.946509829045702e-06, "loss": 0.5874, "step": 19584 }, { "epoch": 0.23416109709585242, "grad_norm": 4.877458572387695, "learning_rate": 8.946390944165465e-06, "loss": 0.5442, "step": 19585 }, { "epoch": 0.23417305324071305, "grad_norm": 2.848695993423462, "learning_rate": 8.9462720533676e-06, "loss": 0.6119, "step": 19586 }, { "epoch": 0.2341850093855737, "grad_norm": 1.7287613153457642, "learning_rate": 8.946153156652286e-06, "loss": 0.5828, "step": 19587 }, { "epoch": 0.23419696553043437, "grad_norm": 2.1072158813476562, "learning_rate": 8.946034254019702e-06, "loss": 0.622, "step": 19588 }, { "epoch": 0.23420892167529503, "grad_norm": 31.57440757751465, "learning_rate": 8.945915345470026e-06, "loss": 0.6005, "step": 19589 }, { "epoch": 0.23422087782015566, "grad_norm": 3.2375497817993164, "learning_rate": 8.945796431003436e-06, "loss": 0.619, "step": 19590 }, { "epoch": 0.23423283396501632, "grad_norm": 1.7541382312774658, "learning_rate": 8.945677510620107e-06, "loss": 0.6086, "step": 19591 }, { "epoch": 0.23424479010987698, "grad_norm": 7.0809326171875, "learning_rate": 8.945558584320223e-06, "loss": 0.5459, "step": 19592 }, { "epoch": 0.23425674625473764, "grad_norm": 3.195927381515503, "learning_rate": 8.945439652103962e-06, "loss": 0.6641, "step": 19593 }, { "epoch": 0.23426870239959827, "grad_norm": 2.6700003147125244, "learning_rate": 8.945320713971497e-06, "loss": 0.666, "step": 19594 }, { "epoch": 0.23428065854445893, "grad_norm": 1.9836317300796509, "learning_rate": 8.945201769923011e-06, "loss": 0.6811, "step": 19595 }, { "epoch": 0.23429261468931958, "grad_norm": 1.606819748878479, "learning_rate": 8.945082819958678e-06, "loss": 0.6052, "step": 19596 }, { "epoch": 0.23430457083418021, "grad_norm": 2.855226993560791, "learning_rate": 8.944963864078684e-06, "loss": 0.5856, "step": 19597 }, { "epoch": 0.23431652697904087, "grad_norm": 1.8420199155807495, "learning_rate": 8.9448449022832e-06, "loss": 0.5838, "step": 19598 }, { "epoch": 0.23432848312390153, "grad_norm": 1.8723735809326172, "learning_rate": 8.944725934572407e-06, "loss": 0.6174, "step": 19599 }, { "epoch": 0.2343404392687622, "grad_norm": 2.7694778442382812, "learning_rate": 8.944606960946485e-06, "loss": 0.6564, "step": 19600 }, { "epoch": 0.23435239541362282, "grad_norm": 1.981188416481018, "learning_rate": 8.94448798140561e-06, "loss": 0.6699, "step": 19601 }, { "epoch": 0.23436435155848348, "grad_norm": 2.9715452194213867, "learning_rate": 8.94436899594996e-06, "loss": 0.6074, "step": 19602 }, { "epoch": 0.23437630770334414, "grad_norm": 1.6611518859863281, "learning_rate": 8.944250004579716e-06, "loss": 0.6835, "step": 19603 }, { "epoch": 0.2343882638482048, "grad_norm": 3.596989631652832, "learning_rate": 8.944131007295055e-06, "loss": 0.5664, "step": 19604 }, { "epoch": 0.23440021999306543, "grad_norm": 3.6088595390319824, "learning_rate": 8.944012004096155e-06, "loss": 0.6901, "step": 19605 }, { "epoch": 0.2344121761379261, "grad_norm": 2.5076751708984375, "learning_rate": 8.943892994983195e-06, "loss": 0.614, "step": 19606 }, { "epoch": 0.23442413228278675, "grad_norm": 1.9668564796447754, "learning_rate": 8.943773979956354e-06, "loss": 0.6186, "step": 19607 }, { "epoch": 0.23443608842764738, "grad_norm": 1.5807298421859741, "learning_rate": 8.94365495901581e-06, "loss": 0.5553, "step": 19608 }, { "epoch": 0.23444804457250804, "grad_norm": 3.750586986541748, "learning_rate": 8.943535932161741e-06, "loss": 0.709, "step": 19609 }, { "epoch": 0.2344600007173687, "grad_norm": 11.211395263671875, "learning_rate": 8.943416899394325e-06, "loss": 0.5819, "step": 19610 }, { "epoch": 0.23447195686222935, "grad_norm": 1.6978161334991455, "learning_rate": 8.943297860713742e-06, "loss": 0.5581, "step": 19611 }, { "epoch": 0.23448391300708998, "grad_norm": 3.113255739212036, "learning_rate": 8.94317881612017e-06, "loss": 0.6652, "step": 19612 }, { "epoch": 0.23449586915195064, "grad_norm": 2.398167610168457, "learning_rate": 8.94305976561379e-06, "loss": 0.5419, "step": 19613 }, { "epoch": 0.2345078252968113, "grad_norm": 3.78114914894104, "learning_rate": 8.942940709194774e-06, "loss": 0.7116, "step": 19614 }, { "epoch": 0.23451978144167196, "grad_norm": 2.773484230041504, "learning_rate": 8.942821646863305e-06, "loss": 0.7246, "step": 19615 }, { "epoch": 0.2345317375865326, "grad_norm": 2.9901788234710693, "learning_rate": 8.942702578619562e-06, "loss": 0.5986, "step": 19616 }, { "epoch": 0.23454369373139325, "grad_norm": 2.087031841278076, "learning_rate": 8.942583504463723e-06, "loss": 0.609, "step": 19617 }, { "epoch": 0.2345556498762539, "grad_norm": 3.018599510192871, "learning_rate": 8.942464424395964e-06, "loss": 0.6256, "step": 19618 }, { "epoch": 0.23456760602111454, "grad_norm": 2.0010786056518555, "learning_rate": 8.942345338416466e-06, "loss": 0.6505, "step": 19619 }, { "epoch": 0.2345795621659752, "grad_norm": 2.4133670330047607, "learning_rate": 8.942226246525409e-06, "loss": 0.562, "step": 19620 }, { "epoch": 0.23459151831083586, "grad_norm": 2.4417989253997803, "learning_rate": 8.942107148722968e-06, "loss": 0.6065, "step": 19621 }, { "epoch": 0.23460347445569651, "grad_norm": 7.641660690307617, "learning_rate": 8.941988045009324e-06, "loss": 0.5624, "step": 19622 }, { "epoch": 0.23461543060055715, "grad_norm": 3.0027918815612793, "learning_rate": 8.941868935384654e-06, "loss": 0.5369, "step": 19623 }, { "epoch": 0.2346273867454178, "grad_norm": 1.9153376817703247, "learning_rate": 8.94174981984914e-06, "loss": 0.5284, "step": 19624 }, { "epoch": 0.23463934289027846, "grad_norm": 1.4436390399932861, "learning_rate": 8.941630698402956e-06, "loss": 0.5419, "step": 19625 }, { "epoch": 0.23465129903513912, "grad_norm": 16.699142456054688, "learning_rate": 8.941511571046281e-06, "loss": 0.588, "step": 19626 }, { "epoch": 0.23466325517999975, "grad_norm": 4.056909084320068, "learning_rate": 8.9413924377793e-06, "loss": 0.6503, "step": 19627 }, { "epoch": 0.2346752113248604, "grad_norm": 3.8469150066375732, "learning_rate": 8.941273298602185e-06, "loss": 0.6446, "step": 19628 }, { "epoch": 0.23468716746972107, "grad_norm": 2.634770154953003, "learning_rate": 8.941154153515116e-06, "loss": 0.5291, "step": 19629 }, { "epoch": 0.2346991236145817, "grad_norm": 1.8939043283462524, "learning_rate": 8.941035002518272e-06, "loss": 0.66, "step": 19630 }, { "epoch": 0.23471107975944236, "grad_norm": 2.2710015773773193, "learning_rate": 8.940915845611831e-06, "loss": 0.6083, "step": 19631 }, { "epoch": 0.23472303590430302, "grad_norm": 6.0179290771484375, "learning_rate": 8.940796682795976e-06, "loss": 0.6788, "step": 19632 }, { "epoch": 0.23473499204916368, "grad_norm": 4.555802345275879, "learning_rate": 8.94067751407088e-06, "loss": 0.6488, "step": 19633 }, { "epoch": 0.2347469481940243, "grad_norm": 3.718761444091797, "learning_rate": 8.940558339436724e-06, "loss": 0.6377, "step": 19634 }, { "epoch": 0.23475890433888497, "grad_norm": 2.0018794536590576, "learning_rate": 8.940439158893687e-06, "loss": 0.5851, "step": 19635 }, { "epoch": 0.23477086048374562, "grad_norm": 3.0697927474975586, "learning_rate": 8.940319972441949e-06, "loss": 0.5971, "step": 19636 }, { "epoch": 0.23478281662860628, "grad_norm": 3.5687360763549805, "learning_rate": 8.940200780081687e-06, "loss": 0.6276, "step": 19637 }, { "epoch": 0.23479477277346691, "grad_norm": 1.9924674034118652, "learning_rate": 8.940081581813078e-06, "loss": 0.6893, "step": 19638 }, { "epoch": 0.23480672891832757, "grad_norm": 4.051171779632568, "learning_rate": 8.939962377636304e-06, "loss": 0.6302, "step": 19639 }, { "epoch": 0.23481868506318823, "grad_norm": 2.381681203842163, "learning_rate": 8.939843167551543e-06, "loss": 0.6474, "step": 19640 }, { "epoch": 0.2348306412080489, "grad_norm": 5.85474967956543, "learning_rate": 8.939723951558973e-06, "loss": 0.5459, "step": 19641 }, { "epoch": 0.23484259735290952, "grad_norm": 4.114344596862793, "learning_rate": 8.939604729658771e-06, "loss": 0.7097, "step": 19642 }, { "epoch": 0.23485455349777018, "grad_norm": 8.68087100982666, "learning_rate": 8.93948550185112e-06, "loss": 0.5825, "step": 19643 }, { "epoch": 0.23486650964263084, "grad_norm": 1.7020812034606934, "learning_rate": 8.939366268136194e-06, "loss": 0.6305, "step": 19644 }, { "epoch": 0.23487846578749147, "grad_norm": 2.7003495693206787, "learning_rate": 8.939247028514176e-06, "loss": 0.6186, "step": 19645 }, { "epoch": 0.23489042193235213, "grad_norm": 2.1790993213653564, "learning_rate": 8.939127782985243e-06, "loss": 0.6062, "step": 19646 }, { "epoch": 0.2349023780772128, "grad_norm": 3.426723003387451, "learning_rate": 8.939008531549574e-06, "loss": 0.6564, "step": 19647 }, { "epoch": 0.23491433422207345, "grad_norm": 2.7401680946350098, "learning_rate": 8.938889274207347e-06, "loss": 0.5545, "step": 19648 }, { "epoch": 0.23492629036693408, "grad_norm": 1.3845661878585815, "learning_rate": 8.938770010958742e-06, "loss": 0.5591, "step": 19649 }, { "epoch": 0.23493824651179473, "grad_norm": 4.174652099609375, "learning_rate": 8.938650741803938e-06, "loss": 0.6461, "step": 19650 }, { "epoch": 0.2349502026566554, "grad_norm": 2.788770914077759, "learning_rate": 8.938531466743112e-06, "loss": 0.7092, "step": 19651 }, { "epoch": 0.23496215880151605, "grad_norm": 1.8988772630691528, "learning_rate": 8.938412185776443e-06, "loss": 0.5511, "step": 19652 }, { "epoch": 0.23497411494637668, "grad_norm": 2.5871098041534424, "learning_rate": 8.938292898904114e-06, "loss": 0.6876, "step": 19653 }, { "epoch": 0.23498607109123734, "grad_norm": 1.8527055978775024, "learning_rate": 8.938173606126299e-06, "loss": 0.6492, "step": 19654 }, { "epoch": 0.234998027236098, "grad_norm": 2.49080491065979, "learning_rate": 8.938054307443179e-06, "loss": 0.6069, "step": 19655 }, { "epoch": 0.23500998338095863, "grad_norm": 2.9627814292907715, "learning_rate": 8.937935002854933e-06, "loss": 0.6286, "step": 19656 }, { "epoch": 0.2350219395258193, "grad_norm": 3.461292028427124, "learning_rate": 8.937815692361738e-06, "loss": 0.6516, "step": 19657 }, { "epoch": 0.23503389567067995, "grad_norm": 2.4012060165405273, "learning_rate": 8.937696375963776e-06, "loss": 0.5666, "step": 19658 }, { "epoch": 0.2350458518155406, "grad_norm": 4.26945686340332, "learning_rate": 8.937577053661224e-06, "loss": 0.7233, "step": 19659 }, { "epoch": 0.23505780796040124, "grad_norm": 2.6686630249023438, "learning_rate": 8.93745772545426e-06, "loss": 0.5921, "step": 19660 }, { "epoch": 0.2350697641052619, "grad_norm": 2.3740243911743164, "learning_rate": 8.937338391343066e-06, "loss": 0.559, "step": 19661 }, { "epoch": 0.23508172025012256, "grad_norm": 3.584091901779175, "learning_rate": 8.937219051327818e-06, "loss": 0.629, "step": 19662 }, { "epoch": 0.23509367639498321, "grad_norm": 3.3481414318084717, "learning_rate": 8.937099705408696e-06, "loss": 0.7158, "step": 19663 }, { "epoch": 0.23510563253984385, "grad_norm": 1.5838483572006226, "learning_rate": 8.936980353585879e-06, "loss": 0.5585, "step": 19664 }, { "epoch": 0.2351175886847045, "grad_norm": 2.4762027263641357, "learning_rate": 8.936860995859546e-06, "loss": 0.7025, "step": 19665 }, { "epoch": 0.23512954482956516, "grad_norm": 1.8376797437667847, "learning_rate": 8.936741632229877e-06, "loss": 0.6238, "step": 19666 }, { "epoch": 0.2351415009744258, "grad_norm": 2.4334428310394287, "learning_rate": 8.936622262697049e-06, "loss": 0.6661, "step": 19667 }, { "epoch": 0.23515345711928645, "grad_norm": 1.9940876960754395, "learning_rate": 8.936502887261241e-06, "loss": 0.5957, "step": 19668 }, { "epoch": 0.2351654132641471, "grad_norm": 2.155921220779419, "learning_rate": 8.936383505922635e-06, "loss": 0.6122, "step": 19669 }, { "epoch": 0.23517736940900777, "grad_norm": 2.3629584312438965, "learning_rate": 8.936264118681406e-06, "loss": 0.6484, "step": 19670 }, { "epoch": 0.2351893255538684, "grad_norm": 2.3985941410064697, "learning_rate": 8.936144725537736e-06, "loss": 0.6367, "step": 19671 }, { "epoch": 0.23520128169872906, "grad_norm": 2.5348618030548096, "learning_rate": 8.936025326491801e-06, "loss": 0.6753, "step": 19672 }, { "epoch": 0.23521323784358972, "grad_norm": 1.6855932474136353, "learning_rate": 8.935905921543785e-06, "loss": 0.5435, "step": 19673 }, { "epoch": 0.23522519398845038, "grad_norm": 2.0993261337280273, "learning_rate": 8.935786510693862e-06, "loss": 0.5822, "step": 19674 }, { "epoch": 0.235237150133311, "grad_norm": 2.3285086154937744, "learning_rate": 8.935667093942215e-06, "loss": 0.5628, "step": 19675 }, { "epoch": 0.23524910627817167, "grad_norm": 2.771804094314575, "learning_rate": 8.93554767128902e-06, "loss": 0.536, "step": 19676 }, { "epoch": 0.23526106242303232, "grad_norm": 2.2272369861602783, "learning_rate": 8.935428242734458e-06, "loss": 0.7195, "step": 19677 }, { "epoch": 0.23527301856789296, "grad_norm": 1.4781272411346436, "learning_rate": 8.935308808278707e-06, "loss": 0.5977, "step": 19678 }, { "epoch": 0.23528497471275361, "grad_norm": 4.331489562988281, "learning_rate": 8.935189367921946e-06, "loss": 0.6222, "step": 19679 }, { "epoch": 0.23529693085761427, "grad_norm": 1.8962465524673462, "learning_rate": 8.935069921664355e-06, "loss": 0.6148, "step": 19680 }, { "epoch": 0.23530888700247493, "grad_norm": 1.633834719657898, "learning_rate": 8.934950469506112e-06, "loss": 0.573, "step": 19681 }, { "epoch": 0.23532084314733556, "grad_norm": 1.6431416273117065, "learning_rate": 8.934831011447397e-06, "loss": 0.687, "step": 19682 }, { "epoch": 0.23533279929219622, "grad_norm": 2.1839139461517334, "learning_rate": 8.93471154748839e-06, "loss": 0.566, "step": 19683 }, { "epoch": 0.23534475543705688, "grad_norm": 1.8987644910812378, "learning_rate": 8.934592077629267e-06, "loss": 0.6805, "step": 19684 }, { "epoch": 0.23535671158191754, "grad_norm": 2.877640962600708, "learning_rate": 8.934472601870212e-06, "loss": 0.6408, "step": 19685 }, { "epoch": 0.23536866772677817, "grad_norm": 1.6164216995239258, "learning_rate": 8.934353120211399e-06, "loss": 0.5468, "step": 19686 }, { "epoch": 0.23538062387163883, "grad_norm": 4.043858528137207, "learning_rate": 8.934233632653012e-06, "loss": 0.5344, "step": 19687 }, { "epoch": 0.2353925800164995, "grad_norm": 2.430833578109741, "learning_rate": 8.934114139195224e-06, "loss": 0.6032, "step": 19688 }, { "epoch": 0.23540453616136012, "grad_norm": 1.4367460012435913, "learning_rate": 8.933994639838219e-06, "loss": 0.6162, "step": 19689 }, { "epoch": 0.23541649230622078, "grad_norm": 2.683163642883301, "learning_rate": 8.933875134582178e-06, "loss": 0.6536, "step": 19690 }, { "epoch": 0.23542844845108143, "grad_norm": 2.4382457733154297, "learning_rate": 8.933755623427273e-06, "loss": 0.6452, "step": 19691 }, { "epoch": 0.2354404045959421, "grad_norm": 2.330098867416382, "learning_rate": 8.933636106373691e-06, "loss": 0.6265, "step": 19692 }, { "epoch": 0.23545236074080272, "grad_norm": 3.6163008213043213, "learning_rate": 8.933516583421607e-06, "loss": 0.6777, "step": 19693 }, { "epoch": 0.23546431688566338, "grad_norm": 2.1346535682678223, "learning_rate": 8.9333970545712e-06, "loss": 0.6791, "step": 19694 }, { "epoch": 0.23547627303052404, "grad_norm": 1.8808345794677734, "learning_rate": 8.93327751982265e-06, "loss": 0.6251, "step": 19695 }, { "epoch": 0.2354882291753847, "grad_norm": 2.9019036293029785, "learning_rate": 8.933157979176137e-06, "loss": 0.5753, "step": 19696 }, { "epoch": 0.23550018532024533, "grad_norm": 1.9063488245010376, "learning_rate": 8.933038432631839e-06, "loss": 0.6344, "step": 19697 }, { "epoch": 0.235512141465106, "grad_norm": 2.345416307449341, "learning_rate": 8.932918880189936e-06, "loss": 0.577, "step": 19698 }, { "epoch": 0.23552409760996665, "grad_norm": 1.4754509925842285, "learning_rate": 8.932799321850606e-06, "loss": 0.5766, "step": 19699 }, { "epoch": 0.2355360537548273, "grad_norm": 2.943349599838257, "learning_rate": 8.932679757614033e-06, "loss": 0.6245, "step": 19700 }, { "epoch": 0.23554800989968794, "grad_norm": 2.8829829692840576, "learning_rate": 8.932560187480391e-06, "loss": 0.6422, "step": 19701 }, { "epoch": 0.2355599660445486, "grad_norm": 2.8762247562408447, "learning_rate": 8.93244061144986e-06, "loss": 0.5033, "step": 19702 }, { "epoch": 0.23557192218940926, "grad_norm": 1.642783522605896, "learning_rate": 8.93232102952262e-06, "loss": 0.7018, "step": 19703 }, { "epoch": 0.23558387833426989, "grad_norm": 2.130258083343506, "learning_rate": 8.93220144169885e-06, "loss": 0.6203, "step": 19704 }, { "epoch": 0.23559583447913054, "grad_norm": 2.4174184799194336, "learning_rate": 8.932081847978732e-06, "loss": 0.6978, "step": 19705 }, { "epoch": 0.2356077906239912, "grad_norm": 4.820963382720947, "learning_rate": 8.931962248362443e-06, "loss": 0.5796, "step": 19706 }, { "epoch": 0.23561974676885186, "grad_norm": 2.2204999923706055, "learning_rate": 8.931842642850165e-06, "loss": 0.6148, "step": 19707 }, { "epoch": 0.2356317029137125, "grad_norm": 4.646651744842529, "learning_rate": 8.93172303144207e-06, "loss": 0.5873, "step": 19708 }, { "epoch": 0.23564365905857315, "grad_norm": 1.5692148208618164, "learning_rate": 8.931603414138346e-06, "loss": 0.4862, "step": 19709 }, { "epoch": 0.2356556152034338, "grad_norm": 3.7301642894744873, "learning_rate": 8.931483790939167e-06, "loss": 0.5334, "step": 19710 }, { "epoch": 0.23566757134829447, "grad_norm": 2.096057176589966, "learning_rate": 8.931364161844715e-06, "loss": 0.5683, "step": 19711 }, { "epoch": 0.2356795274931551, "grad_norm": 4.2626447677612305, "learning_rate": 8.931244526855168e-06, "loss": 0.6289, "step": 19712 }, { "epoch": 0.23569148363801576, "grad_norm": 1.4105881452560425, "learning_rate": 8.931124885970705e-06, "loss": 0.5377, "step": 19713 }, { "epoch": 0.23570343978287642, "grad_norm": 1.689695119857788, "learning_rate": 8.931005239191508e-06, "loss": 0.5265, "step": 19714 }, { "epoch": 0.23571539592773705, "grad_norm": 7.896805763244629, "learning_rate": 8.930885586517754e-06, "loss": 0.6354, "step": 19715 }, { "epoch": 0.2357273520725977, "grad_norm": 5.285929203033447, "learning_rate": 8.930765927949622e-06, "loss": 0.6412, "step": 19716 }, { "epoch": 0.23573930821745837, "grad_norm": 3.4319491386413574, "learning_rate": 8.930646263487292e-06, "loss": 0.667, "step": 19717 }, { "epoch": 0.23575126436231902, "grad_norm": 2.0719401836395264, "learning_rate": 8.930526593130947e-06, "loss": 0.5949, "step": 19718 }, { "epoch": 0.23576322050717965, "grad_norm": 2.4040606021881104, "learning_rate": 8.930406916880761e-06, "loss": 0.6294, "step": 19719 }, { "epoch": 0.2357751766520403, "grad_norm": 2.7674434185028076, "learning_rate": 8.930287234736917e-06, "loss": 0.6626, "step": 19720 }, { "epoch": 0.23578713279690097, "grad_norm": 1.8878501653671265, "learning_rate": 8.930167546699594e-06, "loss": 0.5728, "step": 19721 }, { "epoch": 0.23579908894176163, "grad_norm": 3.1724326610565186, "learning_rate": 8.930047852768967e-06, "loss": 0.5706, "step": 19722 }, { "epoch": 0.23581104508662226, "grad_norm": 3.3164749145507812, "learning_rate": 8.929928152945225e-06, "loss": 0.6556, "step": 19723 }, { "epoch": 0.23582300123148292, "grad_norm": 1.4688233137130737, "learning_rate": 8.929808447228537e-06, "loss": 0.6705, "step": 19724 }, { "epoch": 0.23583495737634358, "grad_norm": 2.2934587001800537, "learning_rate": 8.92968873561909e-06, "loss": 0.603, "step": 19725 }, { "epoch": 0.2358469135212042, "grad_norm": 4.076776027679443, "learning_rate": 8.92956901811706e-06, "loss": 0.5589, "step": 19726 }, { "epoch": 0.23585886966606487, "grad_norm": 3.8212521076202393, "learning_rate": 8.929449294722626e-06, "loss": 0.6002, "step": 19727 }, { "epoch": 0.23587082581092553, "grad_norm": 2.8695738315582275, "learning_rate": 8.92932956543597e-06, "loss": 0.5956, "step": 19728 }, { "epoch": 0.23588278195578619, "grad_norm": 3.2557361125946045, "learning_rate": 8.92920983025727e-06, "loss": 0.7084, "step": 19729 }, { "epoch": 0.23589473810064682, "grad_norm": 1.6616966724395752, "learning_rate": 8.929090089186708e-06, "loss": 0.556, "step": 19730 }, { "epoch": 0.23590669424550748, "grad_norm": 2.35945200920105, "learning_rate": 8.928970342224458e-06, "loss": 0.5571, "step": 19731 }, { "epoch": 0.23591865039036813, "grad_norm": 1.9935557842254639, "learning_rate": 8.928850589370705e-06, "loss": 0.5995, "step": 19732 }, { "epoch": 0.2359306065352288, "grad_norm": 2.2116799354553223, "learning_rate": 8.928730830625626e-06, "loss": 0.5209, "step": 19733 }, { "epoch": 0.23594256268008942, "grad_norm": 1.995734453201294, "learning_rate": 8.9286110659894e-06, "loss": 0.6088, "step": 19734 }, { "epoch": 0.23595451882495008, "grad_norm": 3.2635698318481445, "learning_rate": 8.92849129546221e-06, "loss": 0.5981, "step": 19735 }, { "epoch": 0.23596647496981074, "grad_norm": 4.018784999847412, "learning_rate": 8.928371519044233e-06, "loss": 0.6749, "step": 19736 }, { "epoch": 0.23597843111467137, "grad_norm": 2.2562875747680664, "learning_rate": 8.928251736735647e-06, "loss": 0.5985, "step": 19737 }, { "epoch": 0.23599038725953203, "grad_norm": 2.0648746490478516, "learning_rate": 8.928131948536636e-06, "loss": 0.6305, "step": 19738 }, { "epoch": 0.2360023434043927, "grad_norm": 2.631248950958252, "learning_rate": 8.928012154447377e-06, "loss": 0.5284, "step": 19739 }, { "epoch": 0.23601429954925335, "grad_norm": 2.04927134513855, "learning_rate": 8.927892354468048e-06, "loss": 0.6404, "step": 19740 }, { "epoch": 0.23602625569411398, "grad_norm": 2.128391981124878, "learning_rate": 8.927772548598831e-06, "loss": 0.5546, "step": 19741 }, { "epoch": 0.23603821183897464, "grad_norm": 1.6316413879394531, "learning_rate": 8.927652736839905e-06, "loss": 0.5787, "step": 19742 }, { "epoch": 0.2360501679838353, "grad_norm": 13.430635452270508, "learning_rate": 8.92753291919145e-06, "loss": 0.5907, "step": 19743 }, { "epoch": 0.23606212412869595, "grad_norm": 1.804815411567688, "learning_rate": 8.927413095653648e-06, "loss": 0.5855, "step": 19744 }, { "epoch": 0.23607408027355659, "grad_norm": 2.6001336574554443, "learning_rate": 8.927293266226673e-06, "loss": 0.5728, "step": 19745 }, { "epoch": 0.23608603641841724, "grad_norm": 2.986130714416504, "learning_rate": 8.927173430910708e-06, "loss": 0.5934, "step": 19746 }, { "epoch": 0.2360979925632779, "grad_norm": 3.5093178749084473, "learning_rate": 8.927053589705933e-06, "loss": 0.5696, "step": 19747 }, { "epoch": 0.23610994870813853, "grad_norm": 2.6426548957824707, "learning_rate": 8.926933742612527e-06, "loss": 0.6794, "step": 19748 }, { "epoch": 0.2361219048529992, "grad_norm": 5.747386932373047, "learning_rate": 8.92681388963067e-06, "loss": 0.6056, "step": 19749 }, { "epoch": 0.23613386099785985, "grad_norm": 4.836272716522217, "learning_rate": 8.926694030760543e-06, "loss": 0.6115, "step": 19750 }, { "epoch": 0.2361458171427205, "grad_norm": 2.0538830757141113, "learning_rate": 8.926574166002322e-06, "loss": 0.6757, "step": 19751 }, { "epoch": 0.23615777328758114, "grad_norm": 3.4483325481414795, "learning_rate": 8.92645429535619e-06, "loss": 0.6623, "step": 19752 }, { "epoch": 0.2361697294324418, "grad_norm": 2.0886409282684326, "learning_rate": 8.926334418822326e-06, "loss": 0.6853, "step": 19753 }, { "epoch": 0.23618168557730246, "grad_norm": 3.310380458831787, "learning_rate": 8.92621453640091e-06, "loss": 0.5697, "step": 19754 }, { "epoch": 0.23619364172216312, "grad_norm": 1.8408565521240234, "learning_rate": 8.92609464809212e-06, "loss": 0.7127, "step": 19755 }, { "epoch": 0.23620559786702375, "grad_norm": 3.553330659866333, "learning_rate": 8.925974753896138e-06, "loss": 0.5777, "step": 19756 }, { "epoch": 0.2362175540118844, "grad_norm": 1.7698179483413696, "learning_rate": 8.925854853813142e-06, "loss": 0.5001, "step": 19757 }, { "epoch": 0.23622951015674507, "grad_norm": 1.7971419095993042, "learning_rate": 8.925734947843315e-06, "loss": 0.6324, "step": 19758 }, { "epoch": 0.23624146630160572, "grad_norm": 2.6310763359069824, "learning_rate": 8.925615035986833e-06, "loss": 0.6037, "step": 19759 }, { "epoch": 0.23625342244646635, "grad_norm": 3.078925848007202, "learning_rate": 8.925495118243878e-06, "loss": 0.6082, "step": 19760 }, { "epoch": 0.236265378591327, "grad_norm": 2.971923351287842, "learning_rate": 8.925375194614628e-06, "loss": 0.6454, "step": 19761 }, { "epoch": 0.23627733473618767, "grad_norm": 2.511920928955078, "learning_rate": 8.925255265099264e-06, "loss": 0.7145, "step": 19762 }, { "epoch": 0.2362892908810483, "grad_norm": 1.921078085899353, "learning_rate": 8.925135329697967e-06, "loss": 0.5892, "step": 19763 }, { "epoch": 0.23630124702590896, "grad_norm": 4.421700954437256, "learning_rate": 8.925015388410915e-06, "loss": 0.6566, "step": 19764 }, { "epoch": 0.23631320317076962, "grad_norm": 2.164189338684082, "learning_rate": 8.924895441238288e-06, "loss": 0.6398, "step": 19765 }, { "epoch": 0.23632515931563028, "grad_norm": 1.9185874462127686, "learning_rate": 8.924775488180266e-06, "loss": 0.6237, "step": 19766 }, { "epoch": 0.2363371154604909, "grad_norm": 4.494936943054199, "learning_rate": 8.92465552923703e-06, "loss": 0.6085, "step": 19767 }, { "epoch": 0.23634907160535157, "grad_norm": 2.604163408279419, "learning_rate": 8.924535564408759e-06, "loss": 0.5747, "step": 19768 }, { "epoch": 0.23636102775021223, "grad_norm": 1.988388180732727, "learning_rate": 8.924415593695633e-06, "loss": 0.5387, "step": 19769 }, { "epoch": 0.23637298389507289, "grad_norm": 3.5000030994415283, "learning_rate": 8.924295617097832e-06, "loss": 0.5882, "step": 19770 }, { "epoch": 0.23638494003993352, "grad_norm": 2.348966121673584, "learning_rate": 8.924175634615536e-06, "loss": 0.6098, "step": 19771 }, { "epoch": 0.23639689618479418, "grad_norm": 3.1117475032806396, "learning_rate": 8.924055646248924e-06, "loss": 0.6072, "step": 19772 }, { "epoch": 0.23640885232965483, "grad_norm": 2.9624180793762207, "learning_rate": 8.923935651998177e-06, "loss": 0.6839, "step": 19773 }, { "epoch": 0.23642080847451546, "grad_norm": 10.798359870910645, "learning_rate": 8.923815651863475e-06, "loss": 0.5774, "step": 19774 }, { "epoch": 0.23643276461937612, "grad_norm": 1.79038405418396, "learning_rate": 8.923695645844996e-06, "loss": 0.553, "step": 19775 }, { "epoch": 0.23644472076423678, "grad_norm": 5.694508075714111, "learning_rate": 8.923575633942923e-06, "loss": 0.5432, "step": 19776 }, { "epoch": 0.23645667690909744, "grad_norm": 1.6329814195632935, "learning_rate": 8.923455616157433e-06, "loss": 0.5686, "step": 19777 }, { "epoch": 0.23646863305395807, "grad_norm": 2.9128971099853516, "learning_rate": 8.923335592488708e-06, "loss": 0.6489, "step": 19778 }, { "epoch": 0.23648058919881873, "grad_norm": 2.0720720291137695, "learning_rate": 8.923215562936928e-06, "loss": 0.6643, "step": 19779 }, { "epoch": 0.2364925453436794, "grad_norm": 1.9769585132598877, "learning_rate": 8.92309552750227e-06, "loss": 0.7019, "step": 19780 }, { "epoch": 0.23650450148854005, "grad_norm": 2.964111328125, "learning_rate": 8.92297548618492e-06, "loss": 0.5557, "step": 19781 }, { "epoch": 0.23651645763340068, "grad_norm": 2.122565269470215, "learning_rate": 8.922855438985051e-06, "loss": 0.5678, "step": 19782 }, { "epoch": 0.23652841377826134, "grad_norm": 2.9595370292663574, "learning_rate": 8.922735385902848e-06, "loss": 0.7475, "step": 19783 }, { "epoch": 0.236540369923122, "grad_norm": 3.1431095600128174, "learning_rate": 8.92261532693849e-06, "loss": 0.5471, "step": 19784 }, { "epoch": 0.23655232606798263, "grad_norm": 1.8984185457229614, "learning_rate": 8.922495262092154e-06, "loss": 0.6128, "step": 19785 }, { "epoch": 0.23656428221284329, "grad_norm": 2.2674686908721924, "learning_rate": 8.922375191364024e-06, "loss": 0.6273, "step": 19786 }, { "epoch": 0.23657623835770394, "grad_norm": 1.6124606132507324, "learning_rate": 8.922255114754278e-06, "loss": 0.546, "step": 19787 }, { "epoch": 0.2365881945025646, "grad_norm": 2.325235366821289, "learning_rate": 8.922135032263097e-06, "loss": 0.5859, "step": 19788 }, { "epoch": 0.23660015064742523, "grad_norm": 2.281320095062256, "learning_rate": 8.92201494389066e-06, "loss": 0.6704, "step": 19789 }, { "epoch": 0.2366121067922859, "grad_norm": 2.2769930362701416, "learning_rate": 8.921894849637149e-06, "loss": 0.6574, "step": 19790 }, { "epoch": 0.23662406293714655, "grad_norm": 3.780280351638794, "learning_rate": 8.92177474950274e-06, "loss": 0.646, "step": 19791 }, { "epoch": 0.2366360190820072, "grad_norm": 3.1641297340393066, "learning_rate": 8.921654643487619e-06, "loss": 0.714, "step": 19792 }, { "epoch": 0.23664797522686784, "grad_norm": 2.2003872394561768, "learning_rate": 8.921534531591963e-06, "loss": 0.5769, "step": 19793 }, { "epoch": 0.2366599313717285, "grad_norm": 1.8862193822860718, "learning_rate": 8.92141441381595e-06, "loss": 0.571, "step": 19794 }, { "epoch": 0.23667188751658916, "grad_norm": 1.60868239402771, "learning_rate": 8.921294290159761e-06, "loss": 0.633, "step": 19795 }, { "epoch": 0.2366838436614498, "grad_norm": 1.80181086063385, "learning_rate": 8.92117416062358e-06, "loss": 0.6597, "step": 19796 }, { "epoch": 0.23669579980631045, "grad_norm": 8.85546875, "learning_rate": 8.921054025207583e-06, "loss": 0.6836, "step": 19797 }, { "epoch": 0.2367077559511711, "grad_norm": 3.1355438232421875, "learning_rate": 8.920933883911952e-06, "loss": 0.5534, "step": 19798 }, { "epoch": 0.23671971209603176, "grad_norm": 3.0438072681427, "learning_rate": 8.920813736736867e-06, "loss": 0.6348, "step": 19799 }, { "epoch": 0.2367316682408924, "grad_norm": 2.3253631591796875, "learning_rate": 8.920693583682506e-06, "loss": 0.6057, "step": 19800 }, { "epoch": 0.23674362438575305, "grad_norm": 2.080538511276245, "learning_rate": 8.920573424749052e-06, "loss": 0.6072, "step": 19801 }, { "epoch": 0.2367555805306137, "grad_norm": 3.748203992843628, "learning_rate": 8.920453259936685e-06, "loss": 0.6121, "step": 19802 }, { "epoch": 0.23676753667547437, "grad_norm": 6.4065680503845215, "learning_rate": 8.920333089245585e-06, "loss": 0.6882, "step": 19803 }, { "epoch": 0.236779492820335, "grad_norm": 2.4050962924957275, "learning_rate": 8.920212912675931e-06, "loss": 0.713, "step": 19804 }, { "epoch": 0.23679144896519566, "grad_norm": 1.809536337852478, "learning_rate": 8.920092730227903e-06, "loss": 0.5757, "step": 19805 }, { "epoch": 0.23680340511005632, "grad_norm": 1.4065511226654053, "learning_rate": 8.919972541901682e-06, "loss": 0.6151, "step": 19806 }, { "epoch": 0.23681536125491698, "grad_norm": 7.8785271644592285, "learning_rate": 8.91985234769745e-06, "loss": 0.6776, "step": 19807 }, { "epoch": 0.2368273173997776, "grad_norm": 2.04313063621521, "learning_rate": 8.919732147615384e-06, "loss": 0.5828, "step": 19808 }, { "epoch": 0.23683927354463827, "grad_norm": 4.788735389709473, "learning_rate": 8.919611941655665e-06, "loss": 0.5591, "step": 19809 }, { "epoch": 0.23685122968949893, "grad_norm": 1.7674260139465332, "learning_rate": 8.919491729818477e-06, "loss": 0.5993, "step": 19810 }, { "epoch": 0.23686318583435956, "grad_norm": 2.0522866249084473, "learning_rate": 8.919371512103994e-06, "loss": 0.595, "step": 19811 }, { "epoch": 0.23687514197922022, "grad_norm": 1.909416913986206, "learning_rate": 8.9192512885124e-06, "loss": 0.6169, "step": 19812 }, { "epoch": 0.23688709812408087, "grad_norm": 2.101311445236206, "learning_rate": 8.919131059043875e-06, "loss": 0.594, "step": 19813 }, { "epoch": 0.23689905426894153, "grad_norm": 3.154338836669922, "learning_rate": 8.9190108236986e-06, "loss": 0.5649, "step": 19814 }, { "epoch": 0.23691101041380216, "grad_norm": 1.778818964958191, "learning_rate": 8.918890582476753e-06, "loss": 0.6243, "step": 19815 }, { "epoch": 0.23692296655866282, "grad_norm": 2.1611387729644775, "learning_rate": 8.918770335378518e-06, "loss": 0.6155, "step": 19816 }, { "epoch": 0.23693492270352348, "grad_norm": 4.3480634689331055, "learning_rate": 8.918650082404071e-06, "loss": 0.613, "step": 19817 }, { "epoch": 0.23694687884838414, "grad_norm": 1.883651852607727, "learning_rate": 8.918529823553594e-06, "loss": 0.5263, "step": 19818 }, { "epoch": 0.23695883499324477, "grad_norm": 2.600515365600586, "learning_rate": 8.918409558827268e-06, "loss": 0.7, "step": 19819 }, { "epoch": 0.23697079113810543, "grad_norm": 1.9749338626861572, "learning_rate": 8.918289288225275e-06, "loss": 0.6931, "step": 19820 }, { "epoch": 0.2369827472829661, "grad_norm": 2.331145763397217, "learning_rate": 8.91816901174779e-06, "loss": 0.6259, "step": 19821 }, { "epoch": 0.23699470342782672, "grad_norm": 3.166869878768921, "learning_rate": 8.918048729394999e-06, "loss": 0.6987, "step": 19822 }, { "epoch": 0.23700665957268738, "grad_norm": 2.648249864578247, "learning_rate": 8.917928441167078e-06, "loss": 0.6744, "step": 19823 }, { "epoch": 0.23701861571754804, "grad_norm": 1.9424211978912354, "learning_rate": 8.917808147064211e-06, "loss": 0.586, "step": 19824 }, { "epoch": 0.2370305718624087, "grad_norm": 2.742330312728882, "learning_rate": 8.917687847086575e-06, "loss": 0.5472, "step": 19825 }, { "epoch": 0.23704252800726933, "grad_norm": 2.755580186843872, "learning_rate": 8.917567541234355e-06, "loss": 0.6126, "step": 19826 }, { "epoch": 0.23705448415212999, "grad_norm": 1.9672346115112305, "learning_rate": 8.917447229507728e-06, "loss": 0.6509, "step": 19827 }, { "epoch": 0.23706644029699064, "grad_norm": 1.6483806371688843, "learning_rate": 8.917326911906872e-06, "loss": 0.6231, "step": 19828 }, { "epoch": 0.2370783964418513, "grad_norm": 1.7362927198410034, "learning_rate": 8.917206588431972e-06, "loss": 0.6791, "step": 19829 }, { "epoch": 0.23709035258671193, "grad_norm": 4.923380374908447, "learning_rate": 8.917086259083207e-06, "loss": 0.5634, "step": 19830 }, { "epoch": 0.2371023087315726, "grad_norm": 3.434474468231201, "learning_rate": 8.916965923860759e-06, "loss": 0.5746, "step": 19831 }, { "epoch": 0.23711426487643325, "grad_norm": 7.126709461212158, "learning_rate": 8.916845582764804e-06, "loss": 0.6001, "step": 19832 }, { "epoch": 0.23712622102129388, "grad_norm": 2.441293716430664, "learning_rate": 8.916725235795526e-06, "loss": 0.6843, "step": 19833 }, { "epoch": 0.23713817716615454, "grad_norm": 3.9882447719573975, "learning_rate": 8.916604882953104e-06, "loss": 0.6371, "step": 19834 }, { "epoch": 0.2371501333110152, "grad_norm": 2.8482937812805176, "learning_rate": 8.91648452423772e-06, "loss": 0.6284, "step": 19835 }, { "epoch": 0.23716208945587586, "grad_norm": 3.6473658084869385, "learning_rate": 8.916364159649554e-06, "loss": 0.6323, "step": 19836 }, { "epoch": 0.2371740456007365, "grad_norm": 1.5263475179672241, "learning_rate": 8.916243789188785e-06, "loss": 0.616, "step": 19837 }, { "epoch": 0.23718600174559715, "grad_norm": 2.0936100482940674, "learning_rate": 8.916123412855595e-06, "loss": 0.6603, "step": 19838 }, { "epoch": 0.2371979578904578, "grad_norm": 2.091949224472046, "learning_rate": 8.916003030650164e-06, "loss": 0.6314, "step": 19839 }, { "epoch": 0.23720991403531846, "grad_norm": 1.983185887336731, "learning_rate": 8.915882642572671e-06, "loss": 0.6163, "step": 19840 }, { "epoch": 0.2372218701801791, "grad_norm": 2.034846067428589, "learning_rate": 8.9157622486233e-06, "loss": 0.5007, "step": 19841 }, { "epoch": 0.23723382632503975, "grad_norm": 1.953177809715271, "learning_rate": 8.91564184880223e-06, "loss": 0.6134, "step": 19842 }, { "epoch": 0.2372457824699004, "grad_norm": 1.6295973062515259, "learning_rate": 8.915521443109639e-06, "loss": 0.5996, "step": 19843 }, { "epoch": 0.23725773861476104, "grad_norm": 3.6035475730895996, "learning_rate": 8.915401031545711e-06, "loss": 0.5373, "step": 19844 }, { "epoch": 0.2372696947596217, "grad_norm": 1.9594876766204834, "learning_rate": 8.915280614110624e-06, "loss": 0.6347, "step": 19845 }, { "epoch": 0.23728165090448236, "grad_norm": 3.1438212394714355, "learning_rate": 8.915160190804562e-06, "loss": 0.5685, "step": 19846 }, { "epoch": 0.23729360704934302, "grad_norm": 2.1100270748138428, "learning_rate": 8.915039761627701e-06, "loss": 0.6751, "step": 19847 }, { "epoch": 0.23730556319420365, "grad_norm": 2.0036184787750244, "learning_rate": 8.914919326580228e-06, "loss": 0.5757, "step": 19848 }, { "epoch": 0.2373175193390643, "grad_norm": 4.118896961212158, "learning_rate": 8.914798885662314e-06, "loss": 0.5588, "step": 19849 }, { "epoch": 0.23732947548392497, "grad_norm": 3.063105821609497, "learning_rate": 8.91467843887415e-06, "loss": 0.6046, "step": 19850 }, { "epoch": 0.23734143162878563, "grad_norm": 2.3135950565338135, "learning_rate": 8.914557986215909e-06, "loss": 0.6071, "step": 19851 }, { "epoch": 0.23735338777364626, "grad_norm": 1.9348695278167725, "learning_rate": 8.914437527687776e-06, "loss": 0.6344, "step": 19852 }, { "epoch": 0.23736534391850692, "grad_norm": 2.0413575172424316, "learning_rate": 8.914317063289928e-06, "loss": 0.6251, "step": 19853 }, { "epoch": 0.23737730006336757, "grad_norm": 3.5567495822906494, "learning_rate": 8.914196593022548e-06, "loss": 0.6807, "step": 19854 }, { "epoch": 0.2373892562082282, "grad_norm": 1.9947882890701294, "learning_rate": 8.914076116885816e-06, "loss": 0.6131, "step": 19855 }, { "epoch": 0.23740121235308886, "grad_norm": 1.9614946842193604, "learning_rate": 8.913955634879917e-06, "loss": 0.6779, "step": 19856 }, { "epoch": 0.23741316849794952, "grad_norm": 1.940457820892334, "learning_rate": 8.913835147005023e-06, "loss": 0.7343, "step": 19857 }, { "epoch": 0.23742512464281018, "grad_norm": 1.6678584814071655, "learning_rate": 8.91371465326132e-06, "loss": 0.6769, "step": 19858 }, { "epoch": 0.2374370807876708, "grad_norm": 3.6234514713287354, "learning_rate": 8.91359415364899e-06, "loss": 0.6761, "step": 19859 }, { "epoch": 0.23744903693253147, "grad_norm": 6.618886947631836, "learning_rate": 8.91347364816821e-06, "loss": 0.5913, "step": 19860 }, { "epoch": 0.23746099307739213, "grad_norm": 3.7747256755828857, "learning_rate": 8.913353136819163e-06, "loss": 0.5922, "step": 19861 }, { "epoch": 0.2374729492222528, "grad_norm": 1.912998914718628, "learning_rate": 8.91323261960203e-06, "loss": 0.5557, "step": 19862 }, { "epoch": 0.23748490536711342, "grad_norm": 1.7008795738220215, "learning_rate": 8.91311209651699e-06, "loss": 0.6087, "step": 19863 }, { "epoch": 0.23749686151197408, "grad_norm": 1.5915719270706177, "learning_rate": 8.912991567564223e-06, "loss": 0.6157, "step": 19864 }, { "epoch": 0.23750881765683474, "grad_norm": 2.4719297885894775, "learning_rate": 8.912871032743913e-06, "loss": 0.5927, "step": 19865 }, { "epoch": 0.2375207738016954, "grad_norm": 2.6953513622283936, "learning_rate": 8.912750492056237e-06, "loss": 0.6219, "step": 19866 }, { "epoch": 0.23753272994655603, "grad_norm": 3.9950766563415527, "learning_rate": 8.91262994550138e-06, "loss": 0.631, "step": 19867 }, { "epoch": 0.23754468609141668, "grad_norm": 2.308131456375122, "learning_rate": 8.91250939307952e-06, "loss": 0.6768, "step": 19868 }, { "epoch": 0.23755664223627734, "grad_norm": 2.327138900756836, "learning_rate": 8.912388834790838e-06, "loss": 0.6326, "step": 19869 }, { "epoch": 0.23756859838113797, "grad_norm": 5.616152286529541, "learning_rate": 8.912268270635514e-06, "loss": 0.6056, "step": 19870 }, { "epoch": 0.23758055452599863, "grad_norm": 2.860243320465088, "learning_rate": 8.912147700613732e-06, "loss": 0.6113, "step": 19871 }, { "epoch": 0.2375925106708593, "grad_norm": 2.1330904960632324, "learning_rate": 8.912027124725668e-06, "loss": 0.5954, "step": 19872 }, { "epoch": 0.23760446681571995, "grad_norm": 1.3497453927993774, "learning_rate": 8.911906542971507e-06, "loss": 0.574, "step": 19873 }, { "epoch": 0.23761642296058058, "grad_norm": 1.836225986480713, "learning_rate": 8.911785955351428e-06, "loss": 0.6346, "step": 19874 }, { "epoch": 0.23762837910544124, "grad_norm": 2.131993055343628, "learning_rate": 8.911665361865612e-06, "loss": 0.5812, "step": 19875 }, { "epoch": 0.2376403352503019, "grad_norm": 1.8874646425247192, "learning_rate": 8.91154476251424e-06, "loss": 0.5756, "step": 19876 }, { "epoch": 0.23765229139516256, "grad_norm": 2.7860522270202637, "learning_rate": 8.911424157297493e-06, "loss": 0.6176, "step": 19877 }, { "epoch": 0.2376642475400232, "grad_norm": 1.6367228031158447, "learning_rate": 8.911303546215551e-06, "loss": 0.6062, "step": 19878 }, { "epoch": 0.23767620368488385, "grad_norm": 2.568521022796631, "learning_rate": 8.911182929268596e-06, "loss": 0.6383, "step": 19879 }, { "epoch": 0.2376881598297445, "grad_norm": 1.9593923091888428, "learning_rate": 8.911062306456808e-06, "loss": 0.5272, "step": 19880 }, { "epoch": 0.23770011597460514, "grad_norm": 1.9576611518859863, "learning_rate": 8.910941677780367e-06, "loss": 0.6078, "step": 19881 }, { "epoch": 0.2377120721194658, "grad_norm": 1.9148104190826416, "learning_rate": 8.910821043239457e-06, "loss": 0.6622, "step": 19882 }, { "epoch": 0.23772402826432645, "grad_norm": 4.071619510650635, "learning_rate": 8.910700402834256e-06, "loss": 0.6849, "step": 19883 }, { "epoch": 0.2377359844091871, "grad_norm": 2.3714029788970947, "learning_rate": 8.910579756564946e-06, "loss": 0.6108, "step": 19884 }, { "epoch": 0.23774794055404774, "grad_norm": 2.3361189365386963, "learning_rate": 8.910459104431709e-06, "loss": 0.5534, "step": 19885 }, { "epoch": 0.2377598966989084, "grad_norm": 3.685310125350952, "learning_rate": 8.910338446434721e-06, "loss": 0.5593, "step": 19886 }, { "epoch": 0.23777185284376906, "grad_norm": 6.331531524658203, "learning_rate": 8.910217782574168e-06, "loss": 0.6307, "step": 19887 }, { "epoch": 0.23778380898862972, "grad_norm": 1.8404393196105957, "learning_rate": 8.910097112850231e-06, "loss": 0.6262, "step": 19888 }, { "epoch": 0.23779576513349035, "grad_norm": 3.3052523136138916, "learning_rate": 8.90997643726309e-06, "loss": 0.6686, "step": 19889 }, { "epoch": 0.237807721278351, "grad_norm": 1.7833292484283447, "learning_rate": 8.909855755812924e-06, "loss": 0.6372, "step": 19890 }, { "epoch": 0.23781967742321167, "grad_norm": 2.0795481204986572, "learning_rate": 8.909735068499916e-06, "loss": 0.5427, "step": 19891 }, { "epoch": 0.2378316335680723, "grad_norm": 2.4274559020996094, "learning_rate": 8.909614375324246e-06, "loss": 0.6997, "step": 19892 }, { "epoch": 0.23784358971293296, "grad_norm": 3.4715335369110107, "learning_rate": 8.909493676286097e-06, "loss": 0.4705, "step": 19893 }, { "epoch": 0.23785554585779362, "grad_norm": 2.504521369934082, "learning_rate": 8.909372971385646e-06, "loss": 0.5869, "step": 19894 }, { "epoch": 0.23786750200265427, "grad_norm": 2.7936925888061523, "learning_rate": 8.909252260623078e-06, "loss": 0.5552, "step": 19895 }, { "epoch": 0.2378794581475149, "grad_norm": 1.4342803955078125, "learning_rate": 8.909131543998571e-06, "loss": 0.5599, "step": 19896 }, { "epoch": 0.23789141429237556, "grad_norm": 2.528442621231079, "learning_rate": 8.909010821512307e-06, "loss": 0.6265, "step": 19897 }, { "epoch": 0.23790337043723622, "grad_norm": 3.4023895263671875, "learning_rate": 8.908890093164469e-06, "loss": 0.5458, "step": 19898 }, { "epoch": 0.23791532658209688, "grad_norm": 1.9617691040039062, "learning_rate": 8.908769358955236e-06, "loss": 0.6384, "step": 19899 }, { "epoch": 0.2379272827269575, "grad_norm": 2.4595673084259033, "learning_rate": 8.90864861888479e-06, "loss": 0.6254, "step": 19900 }, { "epoch": 0.23793923887181817, "grad_norm": 2.3918988704681396, "learning_rate": 8.908527872953309e-06, "loss": 0.5822, "step": 19901 }, { "epoch": 0.23795119501667883, "grad_norm": 3.221237897872925, "learning_rate": 8.908407121160979e-06, "loss": 0.5117, "step": 19902 }, { "epoch": 0.23796315116153946, "grad_norm": 2.039628028869629, "learning_rate": 8.908286363507978e-06, "loss": 0.5757, "step": 19903 }, { "epoch": 0.23797510730640012, "grad_norm": 3.6942780017852783, "learning_rate": 8.908165599994487e-06, "loss": 0.571, "step": 19904 }, { "epoch": 0.23798706345126078, "grad_norm": 2.4218099117279053, "learning_rate": 8.908044830620688e-06, "loss": 0.6603, "step": 19905 }, { "epoch": 0.23799901959612144, "grad_norm": 1.8239073753356934, "learning_rate": 8.907924055386763e-06, "loss": 0.6294, "step": 19906 }, { "epoch": 0.23801097574098207, "grad_norm": 1.947538137435913, "learning_rate": 8.907803274292892e-06, "loss": 0.508, "step": 19907 }, { "epoch": 0.23802293188584273, "grad_norm": 1.816881537437439, "learning_rate": 8.907682487339254e-06, "loss": 0.6034, "step": 19908 }, { "epoch": 0.23803488803070338, "grad_norm": 6.46724796295166, "learning_rate": 8.907561694526035e-06, "loss": 0.6067, "step": 19909 }, { "epoch": 0.23804684417556404, "grad_norm": 2.3721604347229004, "learning_rate": 8.90744089585341e-06, "loss": 0.6722, "step": 19910 }, { "epoch": 0.23805880032042467, "grad_norm": 1.8812148571014404, "learning_rate": 8.907320091321566e-06, "loss": 0.5909, "step": 19911 }, { "epoch": 0.23807075646528533, "grad_norm": 2.5341546535491943, "learning_rate": 8.90719928093068e-06, "loss": 0.684, "step": 19912 }, { "epoch": 0.238082712610146, "grad_norm": 2.432816743850708, "learning_rate": 8.907078464680938e-06, "loss": 0.6313, "step": 19913 }, { "epoch": 0.23809466875500662, "grad_norm": 2.196938991546631, "learning_rate": 8.906957642572515e-06, "loss": 0.6546, "step": 19914 }, { "epoch": 0.23810662489986728, "grad_norm": 1.7620232105255127, "learning_rate": 8.906836814605595e-06, "loss": 0.6686, "step": 19915 }, { "epoch": 0.23811858104472794, "grad_norm": 4.113590717315674, "learning_rate": 8.906715980780361e-06, "loss": 0.6369, "step": 19916 }, { "epoch": 0.2381305371895886, "grad_norm": 1.8737900257110596, "learning_rate": 8.906595141096992e-06, "loss": 0.6534, "step": 19917 }, { "epoch": 0.23814249333444923, "grad_norm": 2.5594780445098877, "learning_rate": 8.90647429555567e-06, "loss": 0.5938, "step": 19918 }, { "epoch": 0.2381544494793099, "grad_norm": 2.3687853813171387, "learning_rate": 8.906353444156574e-06, "loss": 0.6209, "step": 19919 }, { "epoch": 0.23816640562417055, "grad_norm": 2.278895378112793, "learning_rate": 8.906232586899888e-06, "loss": 0.6824, "step": 19920 }, { "epoch": 0.2381783617690312, "grad_norm": 2.0896341800689697, "learning_rate": 8.906111723785794e-06, "loss": 0.6082, "step": 19921 }, { "epoch": 0.23819031791389184, "grad_norm": 3.8286478519439697, "learning_rate": 8.90599085481447e-06, "loss": 0.6429, "step": 19922 }, { "epoch": 0.2382022740587525, "grad_norm": 2.5455198287963867, "learning_rate": 8.905869979986099e-06, "loss": 0.4776, "step": 19923 }, { "epoch": 0.23821423020361315, "grad_norm": 1.841105341911316, "learning_rate": 8.905749099300861e-06, "loss": 0.5837, "step": 19924 }, { "epoch": 0.2382261863484738, "grad_norm": 1.8406137228012085, "learning_rate": 8.905628212758942e-06, "loss": 0.595, "step": 19925 }, { "epoch": 0.23823814249333444, "grad_norm": 1.9195715188980103, "learning_rate": 8.905507320360516e-06, "loss": 0.6421, "step": 19926 }, { "epoch": 0.2382500986381951, "grad_norm": 1.5234425067901611, "learning_rate": 8.90538642210577e-06, "loss": 0.6641, "step": 19927 }, { "epoch": 0.23826205478305576, "grad_norm": 1.7714831829071045, "learning_rate": 8.905265517994881e-06, "loss": 0.598, "step": 19928 }, { "epoch": 0.2382740109279164, "grad_norm": 4.2970452308654785, "learning_rate": 8.905144608028033e-06, "loss": 0.5935, "step": 19929 }, { "epoch": 0.23828596707277705, "grad_norm": 2.286266326904297, "learning_rate": 8.905023692205409e-06, "loss": 0.5788, "step": 19930 }, { "epoch": 0.2382979232176377, "grad_norm": 1.4951342344284058, "learning_rate": 8.904902770527185e-06, "loss": 0.6088, "step": 19931 }, { "epoch": 0.23830987936249837, "grad_norm": 2.574737548828125, "learning_rate": 8.904781842993548e-06, "loss": 0.6568, "step": 19932 }, { "epoch": 0.238321835507359, "grad_norm": 2.306591510772705, "learning_rate": 8.904660909604677e-06, "loss": 0.5969, "step": 19933 }, { "epoch": 0.23833379165221966, "grad_norm": 2.800396203994751, "learning_rate": 8.90453997036075e-06, "loss": 0.6238, "step": 19934 }, { "epoch": 0.23834574779708032, "grad_norm": 3.0788052082061768, "learning_rate": 8.904419025261954e-06, "loss": 0.559, "step": 19935 }, { "epoch": 0.23835770394194097, "grad_norm": 4.344311714172363, "learning_rate": 8.904298074308469e-06, "loss": 0.6787, "step": 19936 }, { "epoch": 0.2383696600868016, "grad_norm": 2.0684680938720703, "learning_rate": 8.904177117500472e-06, "loss": 0.6005, "step": 19937 }, { "epoch": 0.23838161623166226, "grad_norm": 1.5272248983383179, "learning_rate": 8.90405615483815e-06, "loss": 0.6299, "step": 19938 }, { "epoch": 0.23839357237652292, "grad_norm": 1.3847793340682983, "learning_rate": 8.90393518632168e-06, "loss": 0.5751, "step": 19939 }, { "epoch": 0.23840552852138355, "grad_norm": 6.214121341705322, "learning_rate": 8.903814211951244e-06, "loss": 0.6663, "step": 19940 }, { "epoch": 0.2384174846662442, "grad_norm": 4.036149501800537, "learning_rate": 8.903693231727028e-06, "loss": 0.6749, "step": 19941 }, { "epoch": 0.23842944081110487, "grad_norm": 1.9333797693252563, "learning_rate": 8.90357224564921e-06, "loss": 0.6057, "step": 19942 }, { "epoch": 0.23844139695596553, "grad_norm": 10.14570426940918, "learning_rate": 8.90345125371797e-06, "loss": 0.6461, "step": 19943 }, { "epoch": 0.23845335310082616, "grad_norm": 3.404855728149414, "learning_rate": 8.90333025593349e-06, "loss": 0.5781, "step": 19944 }, { "epoch": 0.23846530924568682, "grad_norm": 1.8635704517364502, "learning_rate": 8.903209252295954e-06, "loss": 0.5568, "step": 19945 }, { "epoch": 0.23847726539054748, "grad_norm": 2.6860885620117188, "learning_rate": 8.903088242805542e-06, "loss": 0.6055, "step": 19946 }, { "epoch": 0.23848922153540814, "grad_norm": 2.569131374359131, "learning_rate": 8.902967227462435e-06, "loss": 0.61, "step": 19947 }, { "epoch": 0.23850117768026877, "grad_norm": 2.1249256134033203, "learning_rate": 8.902846206266815e-06, "loss": 0.5847, "step": 19948 }, { "epoch": 0.23851313382512943, "grad_norm": 2.4462890625, "learning_rate": 8.902725179218863e-06, "loss": 0.6256, "step": 19949 }, { "epoch": 0.23852508996999008, "grad_norm": 3.143960475921631, "learning_rate": 8.902604146318761e-06, "loss": 0.56, "step": 19950 }, { "epoch": 0.23853704611485071, "grad_norm": 1.8958910703659058, "learning_rate": 8.90248310756669e-06, "loss": 0.6845, "step": 19951 }, { "epoch": 0.23854900225971137, "grad_norm": 1.6801230907440186, "learning_rate": 8.902362062962831e-06, "loss": 0.6256, "step": 19952 }, { "epoch": 0.23856095840457203, "grad_norm": 1.927707314491272, "learning_rate": 8.902241012507366e-06, "loss": 0.5092, "step": 19953 }, { "epoch": 0.2385729145494327, "grad_norm": 2.92374587059021, "learning_rate": 8.902119956200479e-06, "loss": 0.598, "step": 19954 }, { "epoch": 0.23858487069429332, "grad_norm": 2.2686851024627686, "learning_rate": 8.901998894042347e-06, "loss": 0.6018, "step": 19955 }, { "epoch": 0.23859682683915398, "grad_norm": 4.128911972045898, "learning_rate": 8.901877826033155e-06, "loss": 0.7062, "step": 19956 }, { "epoch": 0.23860878298401464, "grad_norm": 1.8956354856491089, "learning_rate": 8.901756752173083e-06, "loss": 0.7113, "step": 19957 }, { "epoch": 0.2386207391288753, "grad_norm": 1.4653624296188354, "learning_rate": 8.901635672462313e-06, "loss": 0.6591, "step": 19958 }, { "epoch": 0.23863269527373593, "grad_norm": 1.9980149269104004, "learning_rate": 8.901514586901026e-06, "loss": 0.572, "step": 19959 }, { "epoch": 0.2386446514185966, "grad_norm": 1.5328261852264404, "learning_rate": 8.901393495489404e-06, "loss": 0.6621, "step": 19960 }, { "epoch": 0.23865660756345725, "grad_norm": 1.8641504049301147, "learning_rate": 8.90127239822763e-06, "loss": 0.5646, "step": 19961 }, { "epoch": 0.23866856370831788, "grad_norm": 2.382577419281006, "learning_rate": 8.901151295115882e-06, "loss": 0.5574, "step": 19962 }, { "epoch": 0.23868051985317854, "grad_norm": 1.4775546789169312, "learning_rate": 8.901030186154346e-06, "loss": 0.5843, "step": 19963 }, { "epoch": 0.2386924759980392, "grad_norm": 5.955203533172607, "learning_rate": 8.9009090713432e-06, "loss": 0.6372, "step": 19964 }, { "epoch": 0.23870443214289985, "grad_norm": 2.06606125831604, "learning_rate": 8.900787950682625e-06, "loss": 0.66, "step": 19965 }, { "epoch": 0.23871638828776048, "grad_norm": 3.6941347122192383, "learning_rate": 8.900666824172808e-06, "loss": 0.6823, "step": 19966 }, { "epoch": 0.23872834443262114, "grad_norm": 2.481672525405884, "learning_rate": 8.900545691813926e-06, "loss": 0.7974, "step": 19967 }, { "epoch": 0.2387403005774818, "grad_norm": 2.1403653621673584, "learning_rate": 8.900424553606161e-06, "loss": 0.6375, "step": 19968 }, { "epoch": 0.23875225672234246, "grad_norm": 2.969675064086914, "learning_rate": 8.900303409549698e-06, "loss": 0.6193, "step": 19969 }, { "epoch": 0.2387642128672031, "grad_norm": 1.4018285274505615, "learning_rate": 8.900182259644713e-06, "loss": 0.5763, "step": 19970 }, { "epoch": 0.23877616901206375, "grad_norm": 2.250244140625, "learning_rate": 8.900061103891393e-06, "loss": 0.6042, "step": 19971 }, { "epoch": 0.2387881251569244, "grad_norm": 3.1649911403656006, "learning_rate": 8.899939942289916e-06, "loss": 0.6403, "step": 19972 }, { "epoch": 0.23880008130178504, "grad_norm": 3.432342052459717, "learning_rate": 8.899818774840468e-06, "loss": 0.6638, "step": 19973 }, { "epoch": 0.2388120374466457, "grad_norm": 5.586348533630371, "learning_rate": 8.899697601543225e-06, "loss": 0.6518, "step": 19974 }, { "epoch": 0.23882399359150636, "grad_norm": 1.8719534873962402, "learning_rate": 8.899576422398373e-06, "loss": 0.6021, "step": 19975 }, { "epoch": 0.23883594973636701, "grad_norm": 1.9198945760726929, "learning_rate": 8.899455237406092e-06, "loss": 0.6572, "step": 19976 }, { "epoch": 0.23884790588122765, "grad_norm": 3.504068613052368, "learning_rate": 8.899334046566564e-06, "loss": 0.5735, "step": 19977 }, { "epoch": 0.2388598620260883, "grad_norm": 3.261685609817505, "learning_rate": 8.89921284987997e-06, "loss": 0.5133, "step": 19978 }, { "epoch": 0.23887181817094896, "grad_norm": 2.6257667541503906, "learning_rate": 8.899091647346493e-06, "loss": 0.6534, "step": 19979 }, { "epoch": 0.23888377431580962, "grad_norm": 1.9333432912826538, "learning_rate": 8.898970438966314e-06, "loss": 0.5776, "step": 19980 }, { "epoch": 0.23889573046067025, "grad_norm": 2.252063751220703, "learning_rate": 8.898849224739616e-06, "loss": 0.517, "step": 19981 }, { "epoch": 0.2389076866055309, "grad_norm": 3.872567653656006, "learning_rate": 8.898728004666577e-06, "loss": 0.6803, "step": 19982 }, { "epoch": 0.23891964275039157, "grad_norm": 1.3326683044433594, "learning_rate": 8.898606778747385e-06, "loss": 0.6617, "step": 19983 }, { "epoch": 0.23893159889525223, "grad_norm": 2.3214545249938965, "learning_rate": 8.898485546982215e-06, "loss": 0.5735, "step": 19984 }, { "epoch": 0.23894355504011286, "grad_norm": 2.5848169326782227, "learning_rate": 8.898364309371255e-06, "loss": 0.6862, "step": 19985 }, { "epoch": 0.23895551118497352, "grad_norm": 2.240138053894043, "learning_rate": 8.898243065914684e-06, "loss": 0.616, "step": 19986 }, { "epoch": 0.23896746732983418, "grad_norm": 2.3245606422424316, "learning_rate": 8.898121816612682e-06, "loss": 0.6165, "step": 19987 }, { "epoch": 0.2389794234746948, "grad_norm": 2.8700010776519775, "learning_rate": 8.898000561465432e-06, "loss": 0.6579, "step": 19988 }, { "epoch": 0.23899137961955547, "grad_norm": 2.0004074573516846, "learning_rate": 8.897879300473118e-06, "loss": 0.5764, "step": 19989 }, { "epoch": 0.23900333576441612, "grad_norm": 1.7479852437973022, "learning_rate": 8.897758033635919e-06, "loss": 0.6688, "step": 19990 }, { "epoch": 0.23901529190927678, "grad_norm": 1.6425373554229736, "learning_rate": 8.897636760954018e-06, "loss": 0.5525, "step": 19991 }, { "epoch": 0.23902724805413741, "grad_norm": 2.9235923290252686, "learning_rate": 8.897515482427597e-06, "loss": 0.5967, "step": 19992 }, { "epoch": 0.23903920419899807, "grad_norm": 3.0399489402770996, "learning_rate": 8.897394198056836e-06, "loss": 0.6833, "step": 19993 }, { "epoch": 0.23905116034385873, "grad_norm": 2.0458574295043945, "learning_rate": 8.897272907841922e-06, "loss": 0.5401, "step": 19994 }, { "epoch": 0.2390631164887194, "grad_norm": 1.6803101301193237, "learning_rate": 8.897151611783032e-06, "loss": 0.6401, "step": 19995 }, { "epoch": 0.23907507263358002, "grad_norm": 1.707640290260315, "learning_rate": 8.897030309880348e-06, "loss": 0.6381, "step": 19996 }, { "epoch": 0.23908702877844068, "grad_norm": 1.4645755290985107, "learning_rate": 8.896909002134055e-06, "loss": 0.5885, "step": 19997 }, { "epoch": 0.23909898492330134, "grad_norm": 2.3647916316986084, "learning_rate": 8.896787688544333e-06, "loss": 0.6399, "step": 19998 }, { "epoch": 0.23911094106816197, "grad_norm": 3.3389158248901367, "learning_rate": 8.896666369111362e-06, "loss": 0.6122, "step": 19999 }, { "epoch": 0.23912289721302263, "grad_norm": 3.0936126708984375, "learning_rate": 8.896545043835327e-06, "loss": 0.6732, "step": 20000 }, { "epoch": 0.2391348533578833, "grad_norm": 2.2996692657470703, "learning_rate": 8.89642371271641e-06, "loss": 0.592, "step": 20001 }, { "epoch": 0.23914680950274395, "grad_norm": 4.057950019836426, "learning_rate": 8.89630237575479e-06, "loss": 0.6611, "step": 20002 }, { "epoch": 0.23915876564760458, "grad_norm": 3.967822551727295, "learning_rate": 8.896181032950653e-06, "loss": 0.5628, "step": 20003 }, { "epoch": 0.23917072179246524, "grad_norm": 1.8830269575119019, "learning_rate": 8.896059684304176e-06, "loss": 0.6245, "step": 20004 }, { "epoch": 0.2391826779373259, "grad_norm": 3.0942845344543457, "learning_rate": 8.895938329815546e-06, "loss": 0.6265, "step": 20005 }, { "epoch": 0.23919463408218655, "grad_norm": 5.984041690826416, "learning_rate": 8.895816969484942e-06, "loss": 0.5506, "step": 20006 }, { "epoch": 0.23920659022704718, "grad_norm": 1.6645569801330566, "learning_rate": 8.895695603312547e-06, "loss": 0.5551, "step": 20007 }, { "epoch": 0.23921854637190784, "grad_norm": 2.4049453735351562, "learning_rate": 8.89557423129854e-06, "loss": 0.7085, "step": 20008 }, { "epoch": 0.2392305025167685, "grad_norm": 1.7860169410705566, "learning_rate": 8.895452853443108e-06, "loss": 0.6713, "step": 20009 }, { "epoch": 0.23924245866162913, "grad_norm": 3.783632278442383, "learning_rate": 8.895331469746428e-06, "loss": 0.5228, "step": 20010 }, { "epoch": 0.2392544148064898, "grad_norm": 1.7179920673370361, "learning_rate": 8.895210080208689e-06, "loss": 0.6535, "step": 20011 }, { "epoch": 0.23926637095135045, "grad_norm": 5.7504377365112305, "learning_rate": 8.895088684830066e-06, "loss": 0.591, "step": 20012 }, { "epoch": 0.2392783270962111, "grad_norm": 1.9894801378250122, "learning_rate": 8.894967283610742e-06, "loss": 0.5489, "step": 20013 }, { "epoch": 0.23929028324107174, "grad_norm": 2.349402904510498, "learning_rate": 8.894845876550903e-06, "loss": 0.6563, "step": 20014 }, { "epoch": 0.2393022393859324, "grad_norm": 2.1395387649536133, "learning_rate": 8.89472446365073e-06, "loss": 0.6365, "step": 20015 }, { "epoch": 0.23931419553079306, "grad_norm": 9.478388786315918, "learning_rate": 8.894603044910401e-06, "loss": 0.6458, "step": 20016 }, { "epoch": 0.23932615167565371, "grad_norm": 2.1659462451934814, "learning_rate": 8.894481620330102e-06, "loss": 0.6611, "step": 20017 }, { "epoch": 0.23933810782051435, "grad_norm": 2.4447853565216064, "learning_rate": 8.894360189910014e-06, "loss": 0.5481, "step": 20018 }, { "epoch": 0.239350063965375, "grad_norm": 2.5900559425354004, "learning_rate": 8.894238753650319e-06, "loss": 0.5442, "step": 20019 }, { "epoch": 0.23936202011023566, "grad_norm": 5.830379962921143, "learning_rate": 8.8941173115512e-06, "loss": 0.6108, "step": 20020 }, { "epoch": 0.2393739762550963, "grad_norm": 1.5672881603240967, "learning_rate": 8.893995863612837e-06, "loss": 0.5906, "step": 20021 }, { "epoch": 0.23938593239995695, "grad_norm": 1.9848604202270508, "learning_rate": 8.893874409835415e-06, "loss": 0.5874, "step": 20022 }, { "epoch": 0.2393978885448176, "grad_norm": 2.272907257080078, "learning_rate": 8.893752950219114e-06, "loss": 0.6354, "step": 20023 }, { "epoch": 0.23940984468967827, "grad_norm": 2.7013754844665527, "learning_rate": 8.893631484764115e-06, "loss": 0.5887, "step": 20024 }, { "epoch": 0.2394218008345389, "grad_norm": 2.4855401515960693, "learning_rate": 8.893510013470603e-06, "loss": 0.5489, "step": 20025 }, { "epoch": 0.23943375697939956, "grad_norm": 1.493974208831787, "learning_rate": 8.89338853633876e-06, "loss": 0.55, "step": 20026 }, { "epoch": 0.23944571312426022, "grad_norm": 2.0978357791900635, "learning_rate": 8.893267053368764e-06, "loss": 0.6757, "step": 20027 }, { "epoch": 0.23945766926912088, "grad_norm": 3.634227991104126, "learning_rate": 8.893145564560803e-06, "loss": 0.6449, "step": 20028 }, { "epoch": 0.2394696254139815, "grad_norm": 2.004741907119751, "learning_rate": 8.893024069915055e-06, "loss": 0.6503, "step": 20029 }, { "epoch": 0.23948158155884217, "grad_norm": 3.418060541152954, "learning_rate": 8.892902569431703e-06, "loss": 0.6972, "step": 20030 }, { "epoch": 0.23949353770370282, "grad_norm": 4.465798377990723, "learning_rate": 8.892781063110931e-06, "loss": 0.5879, "step": 20031 }, { "epoch": 0.23950549384856346, "grad_norm": 1.9005634784698486, "learning_rate": 8.89265955095292e-06, "loss": 0.6037, "step": 20032 }, { "epoch": 0.23951744999342411, "grad_norm": 5.004436016082764, "learning_rate": 8.892538032957851e-06, "loss": 0.6163, "step": 20033 }, { "epoch": 0.23952940613828477, "grad_norm": 2.3932032585144043, "learning_rate": 8.89241650912591e-06, "loss": 0.646, "step": 20034 }, { "epoch": 0.23954136228314543, "grad_norm": 2.1476213932037354, "learning_rate": 8.892294979457272e-06, "loss": 0.6191, "step": 20035 }, { "epoch": 0.23955331842800606, "grad_norm": 3.553870916366577, "learning_rate": 8.892173443952128e-06, "loss": 0.6962, "step": 20036 }, { "epoch": 0.23956527457286672, "grad_norm": 1.7547169923782349, "learning_rate": 8.892051902610655e-06, "loss": 0.6627, "step": 20037 }, { "epoch": 0.23957723071772738, "grad_norm": 1.691766381263733, "learning_rate": 8.891930355433037e-06, "loss": 0.6322, "step": 20038 }, { "epoch": 0.23958918686258804, "grad_norm": 4.533242702484131, "learning_rate": 8.891808802419453e-06, "loss": 0.4375, "step": 20039 }, { "epoch": 0.23960114300744867, "grad_norm": 1.9406262636184692, "learning_rate": 8.891687243570088e-06, "loss": 0.6023, "step": 20040 }, { "epoch": 0.23961309915230933, "grad_norm": 1.4152313470840454, "learning_rate": 8.891565678885127e-06, "loss": 0.6184, "step": 20041 }, { "epoch": 0.23962505529717, "grad_norm": 2.477071523666382, "learning_rate": 8.891444108364748e-06, "loss": 0.6887, "step": 20042 }, { "epoch": 0.23963701144203065, "grad_norm": 1.948388934135437, "learning_rate": 8.891322532009135e-06, "loss": 0.612, "step": 20043 }, { "epoch": 0.23964896758689128, "grad_norm": 2.693631172180176, "learning_rate": 8.89120094981847e-06, "loss": 0.6184, "step": 20044 }, { "epoch": 0.23966092373175193, "grad_norm": 2.012587547302246, "learning_rate": 8.891079361792933e-06, "loss": 0.546, "step": 20045 }, { "epoch": 0.2396728798766126, "grad_norm": 2.7919063568115234, "learning_rate": 8.890957767932712e-06, "loss": 0.6373, "step": 20046 }, { "epoch": 0.23968483602147322, "grad_norm": 2.8435893058776855, "learning_rate": 8.890836168237985e-06, "loss": 0.661, "step": 20047 }, { "epoch": 0.23969679216633388, "grad_norm": 3.059988498687744, "learning_rate": 8.890714562708935e-06, "loss": 0.601, "step": 20048 }, { "epoch": 0.23970874831119454, "grad_norm": 2.698559522628784, "learning_rate": 8.890592951345745e-06, "loss": 0.6318, "step": 20049 }, { "epoch": 0.2397207044560552, "grad_norm": 1.771806240081787, "learning_rate": 8.890471334148597e-06, "loss": 0.6984, "step": 20050 }, { "epoch": 0.23973266060091583, "grad_norm": 2.7942986488342285, "learning_rate": 8.890349711117672e-06, "loss": 0.638, "step": 20051 }, { "epoch": 0.2397446167457765, "grad_norm": 3.0094211101531982, "learning_rate": 8.890228082253155e-06, "loss": 0.5756, "step": 20052 }, { "epoch": 0.23975657289063715, "grad_norm": 5.211330413818359, "learning_rate": 8.89010644755523e-06, "loss": 0.6982, "step": 20053 }, { "epoch": 0.2397685290354978, "grad_norm": 2.0978167057037354, "learning_rate": 8.889984807024072e-06, "loss": 0.6398, "step": 20054 }, { "epoch": 0.23978048518035844, "grad_norm": 2.2422127723693848, "learning_rate": 8.889863160659871e-06, "loss": 0.4699, "step": 20055 }, { "epoch": 0.2397924413252191, "grad_norm": 1.8975578546524048, "learning_rate": 8.889741508462804e-06, "loss": 0.6862, "step": 20056 }, { "epoch": 0.23980439747007976, "grad_norm": 2.1200084686279297, "learning_rate": 8.889619850433057e-06, "loss": 0.5876, "step": 20057 }, { "epoch": 0.2398163536149404, "grad_norm": 1.7378267049789429, "learning_rate": 8.88949818657081e-06, "loss": 0.6335, "step": 20058 }, { "epoch": 0.23982830975980104, "grad_norm": 1.5041154623031616, "learning_rate": 8.88937651687625e-06, "loss": 0.6414, "step": 20059 }, { "epoch": 0.2398402659046617, "grad_norm": 1.575319528579712, "learning_rate": 8.889254841349553e-06, "loss": 0.592, "step": 20060 }, { "epoch": 0.23985222204952236, "grad_norm": 1.8774440288543701, "learning_rate": 8.889133159990905e-06, "loss": 0.5731, "step": 20061 }, { "epoch": 0.239864178194383, "grad_norm": 2.042426109313965, "learning_rate": 8.889011472800489e-06, "loss": 0.6295, "step": 20062 }, { "epoch": 0.23987613433924365, "grad_norm": 5.852721214294434, "learning_rate": 8.888889779778484e-06, "loss": 0.6198, "step": 20063 }, { "epoch": 0.2398880904841043, "grad_norm": 1.8401933908462524, "learning_rate": 8.888768080925078e-06, "loss": 0.6221, "step": 20064 }, { "epoch": 0.23990004662896497, "grad_norm": 1.9017291069030762, "learning_rate": 8.888646376240449e-06, "loss": 0.6427, "step": 20065 }, { "epoch": 0.2399120027738256, "grad_norm": 2.251077651977539, "learning_rate": 8.888524665724781e-06, "loss": 0.4566, "step": 20066 }, { "epoch": 0.23992395891868626, "grad_norm": 2.5902371406555176, "learning_rate": 8.888402949378256e-06, "loss": 0.5978, "step": 20067 }, { "epoch": 0.23993591506354692, "grad_norm": 1.8950161933898926, "learning_rate": 8.88828122720106e-06, "loss": 0.6436, "step": 20068 }, { "epoch": 0.23994787120840755, "grad_norm": 13.140605926513672, "learning_rate": 8.888159499193367e-06, "loss": 0.6748, "step": 20069 }, { "epoch": 0.2399598273532682, "grad_norm": 2.233808755874634, "learning_rate": 8.88803776535537e-06, "loss": 0.6038, "step": 20070 }, { "epoch": 0.23997178349812887, "grad_norm": 1.840332269668579, "learning_rate": 8.887916025687244e-06, "loss": 0.5819, "step": 20071 }, { "epoch": 0.23998373964298952, "grad_norm": 2.665769577026367, "learning_rate": 8.887794280189175e-06, "loss": 0.6622, "step": 20072 }, { "epoch": 0.23999569578785016, "grad_norm": 2.5428905487060547, "learning_rate": 8.887672528861344e-06, "loss": 0.5746, "step": 20073 }, { "epoch": 0.2400076519327108, "grad_norm": 5.601933479309082, "learning_rate": 8.887550771703935e-06, "loss": 0.5916, "step": 20074 }, { "epoch": 0.24001960807757147, "grad_norm": 1.8619312047958374, "learning_rate": 8.88742900871713e-06, "loss": 0.6317, "step": 20075 }, { "epoch": 0.24003156422243213, "grad_norm": 3.4393460750579834, "learning_rate": 8.88730723990111e-06, "loss": 0.6376, "step": 20076 }, { "epoch": 0.24004352036729276, "grad_norm": 3.1497538089752197, "learning_rate": 8.88718546525606e-06, "loss": 0.6034, "step": 20077 }, { "epoch": 0.24005547651215342, "grad_norm": 2.402744770050049, "learning_rate": 8.88706368478216e-06, "loss": 0.6485, "step": 20078 }, { "epoch": 0.24006743265701408, "grad_norm": 2.7423694133758545, "learning_rate": 8.886941898479596e-06, "loss": 0.541, "step": 20079 }, { "epoch": 0.2400793888018747, "grad_norm": 3.8811042308807373, "learning_rate": 8.886820106348548e-06, "loss": 0.5854, "step": 20080 }, { "epoch": 0.24009134494673537, "grad_norm": 4.647634029388428, "learning_rate": 8.8866983083892e-06, "loss": 0.6303, "step": 20081 }, { "epoch": 0.24010330109159603, "grad_norm": 1.4848796129226685, "learning_rate": 8.886576504601733e-06, "loss": 0.5655, "step": 20082 }, { "epoch": 0.2401152572364567, "grad_norm": 3.725419759750366, "learning_rate": 8.886454694986333e-06, "loss": 0.558, "step": 20083 }, { "epoch": 0.24012721338131732, "grad_norm": 1.8047391176223755, "learning_rate": 8.886332879543179e-06, "loss": 0.6379, "step": 20084 }, { "epoch": 0.24013916952617798, "grad_norm": 1.6929136514663696, "learning_rate": 8.886211058272453e-06, "loss": 0.6391, "step": 20085 }, { "epoch": 0.24015112567103863, "grad_norm": 2.2312612533569336, "learning_rate": 8.886089231174342e-06, "loss": 0.6635, "step": 20086 }, { "epoch": 0.2401630818158993, "grad_norm": 2.629911184310913, "learning_rate": 8.885967398249028e-06, "loss": 0.6873, "step": 20087 }, { "epoch": 0.24017503796075992, "grad_norm": 2.047884225845337, "learning_rate": 8.885845559496689e-06, "loss": 0.6122, "step": 20088 }, { "epoch": 0.24018699410562058, "grad_norm": 9.74936580657959, "learning_rate": 8.885723714917512e-06, "loss": 0.5598, "step": 20089 }, { "epoch": 0.24019895025048124, "grad_norm": 2.3733572959899902, "learning_rate": 8.885601864511678e-06, "loss": 0.6422, "step": 20090 }, { "epoch": 0.24021090639534187, "grad_norm": 4.005295753479004, "learning_rate": 8.88548000827937e-06, "loss": 0.5306, "step": 20091 }, { "epoch": 0.24022286254020253, "grad_norm": 2.148665428161621, "learning_rate": 8.885358146220773e-06, "loss": 0.5943, "step": 20092 }, { "epoch": 0.2402348186850632, "grad_norm": 1.439712643623352, "learning_rate": 8.885236278336064e-06, "loss": 0.6056, "step": 20093 }, { "epoch": 0.24024677482992385, "grad_norm": 2.132826089859009, "learning_rate": 8.885114404625432e-06, "loss": 0.6197, "step": 20094 }, { "epoch": 0.24025873097478448, "grad_norm": 2.106163740158081, "learning_rate": 8.884992525089058e-06, "loss": 0.5267, "step": 20095 }, { "epoch": 0.24027068711964514, "grad_norm": 1.994194746017456, "learning_rate": 8.88487063972712e-06, "loss": 0.5997, "step": 20096 }, { "epoch": 0.2402826432645058, "grad_norm": 3.368997573852539, "learning_rate": 8.884748748539808e-06, "loss": 0.5624, "step": 20097 }, { "epoch": 0.24029459940936646, "grad_norm": 3.1888558864593506, "learning_rate": 8.884626851527301e-06, "loss": 0.5778, "step": 20098 }, { "epoch": 0.24030655555422709, "grad_norm": 2.268752336502075, "learning_rate": 8.884504948689783e-06, "loss": 0.6865, "step": 20099 }, { "epoch": 0.24031851169908774, "grad_norm": 2.200944423675537, "learning_rate": 8.884383040027434e-06, "loss": 0.6413, "step": 20100 }, { "epoch": 0.2403304678439484, "grad_norm": 5.563958168029785, "learning_rate": 8.88426112554044e-06, "loss": 0.632, "step": 20101 }, { "epoch": 0.24034242398880906, "grad_norm": 2.2881555557250977, "learning_rate": 8.884139205228984e-06, "loss": 0.5421, "step": 20102 }, { "epoch": 0.2403543801336697, "grad_norm": 5.128942966461182, "learning_rate": 8.884017279093244e-06, "loss": 0.5105, "step": 20103 }, { "epoch": 0.24036633627853035, "grad_norm": 1.8286824226379395, "learning_rate": 8.883895347133408e-06, "loss": 0.6866, "step": 20104 }, { "epoch": 0.240378292423391, "grad_norm": 3.676825523376465, "learning_rate": 8.883773409349657e-06, "loss": 0.6053, "step": 20105 }, { "epoch": 0.24039024856825164, "grad_norm": 2.22715425491333, "learning_rate": 8.883651465742174e-06, "loss": 0.6675, "step": 20106 }, { "epoch": 0.2404022047131123, "grad_norm": 2.0464138984680176, "learning_rate": 8.883529516311142e-06, "loss": 0.6046, "step": 20107 }, { "epoch": 0.24041416085797296, "grad_norm": 1.7700083255767822, "learning_rate": 8.883407561056745e-06, "loss": 0.5826, "step": 20108 }, { "epoch": 0.24042611700283362, "grad_norm": 3.434438467025757, "learning_rate": 8.883285599979162e-06, "loss": 0.6361, "step": 20109 }, { "epoch": 0.24043807314769425, "grad_norm": 3.3532497882843018, "learning_rate": 8.88316363307858e-06, "loss": 0.5852, "step": 20110 }, { "epoch": 0.2404500292925549, "grad_norm": 2.2473576068878174, "learning_rate": 8.88304166035518e-06, "loss": 0.5193, "step": 20111 }, { "epoch": 0.24046198543741557, "grad_norm": 1.3800493478775024, "learning_rate": 8.882919681809145e-06, "loss": 0.5602, "step": 20112 }, { "epoch": 0.24047394158227622, "grad_norm": 2.623453378677368, "learning_rate": 8.88279769744066e-06, "loss": 0.6465, "step": 20113 }, { "epoch": 0.24048589772713685, "grad_norm": 4.098741054534912, "learning_rate": 8.882675707249904e-06, "loss": 0.616, "step": 20114 }, { "epoch": 0.2404978538719975, "grad_norm": 1.807838797569275, "learning_rate": 8.88255371123706e-06, "loss": 0.6093, "step": 20115 }, { "epoch": 0.24050981001685817, "grad_norm": 21.755157470703125, "learning_rate": 8.882431709402317e-06, "loss": 0.5383, "step": 20116 }, { "epoch": 0.2405217661617188, "grad_norm": 1.963768720626831, "learning_rate": 8.882309701745852e-06, "loss": 0.5769, "step": 20117 }, { "epoch": 0.24053372230657946, "grad_norm": 6.527209758758545, "learning_rate": 8.88218768826785e-06, "loss": 0.7282, "step": 20118 }, { "epoch": 0.24054567845144012, "grad_norm": 9.85690975189209, "learning_rate": 8.882065668968494e-06, "loss": 0.6206, "step": 20119 }, { "epoch": 0.24055763459630078, "grad_norm": 2.2247989177703857, "learning_rate": 8.881943643847966e-06, "loss": 0.585, "step": 20120 }, { "epoch": 0.2405695907411614, "grad_norm": 1.63459312915802, "learning_rate": 8.88182161290645e-06, "loss": 0.5584, "step": 20121 }, { "epoch": 0.24058154688602207, "grad_norm": 2.598767042160034, "learning_rate": 8.881699576144129e-06, "loss": 0.7103, "step": 20122 }, { "epoch": 0.24059350303088273, "grad_norm": 2.3402457237243652, "learning_rate": 8.881577533561185e-06, "loss": 0.6745, "step": 20123 }, { "epoch": 0.24060545917574339, "grad_norm": 1.7522331476211548, "learning_rate": 8.881455485157804e-06, "loss": 0.6314, "step": 20124 }, { "epoch": 0.24061741532060402, "grad_norm": 2.6134824752807617, "learning_rate": 8.881333430934164e-06, "loss": 0.694, "step": 20125 }, { "epoch": 0.24062937146546468, "grad_norm": 2.480339765548706, "learning_rate": 8.881211370890451e-06, "loss": 0.7069, "step": 20126 }, { "epoch": 0.24064132761032533, "grad_norm": 2.2124576568603516, "learning_rate": 8.88108930502685e-06, "loss": 0.648, "step": 20127 }, { "epoch": 0.24065328375518596, "grad_norm": 2.0976204872131348, "learning_rate": 8.88096723334354e-06, "loss": 0.5967, "step": 20128 }, { "epoch": 0.24066523990004662, "grad_norm": 2.565915107727051, "learning_rate": 8.880845155840706e-06, "loss": 0.5525, "step": 20129 }, { "epoch": 0.24067719604490728, "grad_norm": 6.518589019775391, "learning_rate": 8.88072307251853e-06, "loss": 0.6569, "step": 20130 }, { "epoch": 0.24068915218976794, "grad_norm": 2.6127657890319824, "learning_rate": 8.880600983377197e-06, "loss": 0.5918, "step": 20131 }, { "epoch": 0.24070110833462857, "grad_norm": 3.475191831588745, "learning_rate": 8.880478888416889e-06, "loss": 0.5896, "step": 20132 }, { "epoch": 0.24071306447948923, "grad_norm": 2.1766116619110107, "learning_rate": 8.88035678763779e-06, "loss": 0.573, "step": 20133 }, { "epoch": 0.2407250206243499, "grad_norm": 2.458056688308716, "learning_rate": 8.880234681040082e-06, "loss": 0.6126, "step": 20134 }, { "epoch": 0.24073697676921055, "grad_norm": 2.0250630378723145, "learning_rate": 8.880112568623947e-06, "loss": 0.6492, "step": 20135 }, { "epoch": 0.24074893291407118, "grad_norm": 2.1019287109375, "learning_rate": 8.879990450389569e-06, "loss": 0.5542, "step": 20136 }, { "epoch": 0.24076088905893184, "grad_norm": 3.028085231781006, "learning_rate": 8.879868326337133e-06, "loss": 0.5791, "step": 20137 }, { "epoch": 0.2407728452037925, "grad_norm": 9.882952690124512, "learning_rate": 8.879746196466819e-06, "loss": 0.5979, "step": 20138 }, { "epoch": 0.24078480134865313, "grad_norm": 1.1402671337127686, "learning_rate": 8.879624060778813e-06, "loss": 0.5485, "step": 20139 }, { "epoch": 0.24079675749351379, "grad_norm": 2.3002405166625977, "learning_rate": 8.879501919273297e-06, "loss": 0.6396, "step": 20140 }, { "epoch": 0.24080871363837444, "grad_norm": 1.9036868810653687, "learning_rate": 8.879379771950454e-06, "loss": 0.6474, "step": 20141 }, { "epoch": 0.2408206697832351, "grad_norm": 2.011179208755493, "learning_rate": 8.879257618810467e-06, "loss": 0.5442, "step": 20142 }, { "epoch": 0.24083262592809573, "grad_norm": 3.3513312339782715, "learning_rate": 8.879135459853521e-06, "loss": 0.6472, "step": 20143 }, { "epoch": 0.2408445820729564, "grad_norm": 2.34824275970459, "learning_rate": 8.879013295079795e-06, "loss": 0.7074, "step": 20144 }, { "epoch": 0.24085653821781705, "grad_norm": 3.0183963775634766, "learning_rate": 8.878891124489477e-06, "loss": 0.5458, "step": 20145 }, { "epoch": 0.2408684943626777, "grad_norm": 1.634045958518982, "learning_rate": 8.878768948082746e-06, "loss": 0.5584, "step": 20146 }, { "epoch": 0.24088045050753834, "grad_norm": 3.350801467895508, "learning_rate": 8.878646765859789e-06, "loss": 0.6279, "step": 20147 }, { "epoch": 0.240892406652399, "grad_norm": 1.9724678993225098, "learning_rate": 8.878524577820786e-06, "loss": 0.6117, "step": 20148 }, { "epoch": 0.24090436279725966, "grad_norm": 1.6609177589416504, "learning_rate": 8.878402383965922e-06, "loss": 0.6989, "step": 20149 }, { "epoch": 0.2409163189421203, "grad_norm": 1.8095355033874512, "learning_rate": 8.87828018429538e-06, "loss": 0.5767, "step": 20150 }, { "epoch": 0.24092827508698095, "grad_norm": 2.618330240249634, "learning_rate": 8.878157978809342e-06, "loss": 0.66, "step": 20151 }, { "epoch": 0.2409402312318416, "grad_norm": 2.4578113555908203, "learning_rate": 8.878035767507995e-06, "loss": 0.591, "step": 20152 }, { "epoch": 0.24095218737670226, "grad_norm": 2.8757874965667725, "learning_rate": 8.877913550391517e-06, "loss": 0.6441, "step": 20153 }, { "epoch": 0.2409641435215629, "grad_norm": 2.213547945022583, "learning_rate": 8.877791327460095e-06, "loss": 0.5947, "step": 20154 }, { "epoch": 0.24097609966642355, "grad_norm": 6.316405773162842, "learning_rate": 8.87766909871391e-06, "loss": 0.6943, "step": 20155 }, { "epoch": 0.2409880558112842, "grad_norm": 1.57541024684906, "learning_rate": 8.877546864153149e-06, "loss": 0.5171, "step": 20156 }, { "epoch": 0.24100001195614487, "grad_norm": 1.5127391815185547, "learning_rate": 8.87742462377799e-06, "loss": 0.6095, "step": 20157 }, { "epoch": 0.2410119681010055, "grad_norm": 1.8755959272384644, "learning_rate": 8.877302377588619e-06, "loss": 0.5502, "step": 20158 }, { "epoch": 0.24102392424586616, "grad_norm": 6.546773910522461, "learning_rate": 8.87718012558522e-06, "loss": 0.593, "step": 20159 }, { "epoch": 0.24103588039072682, "grad_norm": 2.242851734161377, "learning_rate": 8.877057867767978e-06, "loss": 0.7058, "step": 20160 }, { "epoch": 0.24104783653558748, "grad_norm": 7.258402347564697, "learning_rate": 8.876935604137072e-06, "loss": 0.7136, "step": 20161 }, { "epoch": 0.2410597926804481, "grad_norm": 3.106130838394165, "learning_rate": 8.876813334692687e-06, "loss": 0.5597, "step": 20162 }, { "epoch": 0.24107174882530877, "grad_norm": 1.8929195404052734, "learning_rate": 8.876691059435006e-06, "loss": 0.5689, "step": 20163 }, { "epoch": 0.24108370497016943, "grad_norm": 2.977215051651001, "learning_rate": 8.876568778364215e-06, "loss": 0.5444, "step": 20164 }, { "epoch": 0.24109566111503006, "grad_norm": 2.236069917678833, "learning_rate": 8.876446491480493e-06, "loss": 0.5985, "step": 20165 }, { "epoch": 0.24110761725989072, "grad_norm": 2.082108497619629, "learning_rate": 8.876324198784027e-06, "loss": 0.5934, "step": 20166 }, { "epoch": 0.24111957340475138, "grad_norm": 2.933501720428467, "learning_rate": 8.876201900274998e-06, "loss": 0.6128, "step": 20167 }, { "epoch": 0.24113152954961203, "grad_norm": 2.319392681121826, "learning_rate": 8.876079595953592e-06, "loss": 0.5421, "step": 20168 }, { "epoch": 0.24114348569447266, "grad_norm": 3.531043767929077, "learning_rate": 8.875957285819989e-06, "loss": 0.6871, "step": 20169 }, { "epoch": 0.24115544183933332, "grad_norm": 2.029207706451416, "learning_rate": 8.875834969874377e-06, "loss": 0.6272, "step": 20170 }, { "epoch": 0.24116739798419398, "grad_norm": 2.814473867416382, "learning_rate": 8.875712648116934e-06, "loss": 0.6038, "step": 20171 }, { "epoch": 0.24117935412905464, "grad_norm": 3.0486066341400146, "learning_rate": 8.875590320547848e-06, "loss": 0.5826, "step": 20172 }, { "epoch": 0.24119131027391527, "grad_norm": 2.4656190872192383, "learning_rate": 8.8754679871673e-06, "loss": 0.6213, "step": 20173 }, { "epoch": 0.24120326641877593, "grad_norm": 1.7814611196517944, "learning_rate": 8.875345647975472e-06, "loss": 0.6335, "step": 20174 }, { "epoch": 0.2412152225636366, "grad_norm": 2.1863882541656494, "learning_rate": 8.875223302972552e-06, "loss": 0.614, "step": 20175 }, { "epoch": 0.24122717870849722, "grad_norm": 2.1204450130462646, "learning_rate": 8.87510095215872e-06, "loss": 0.5331, "step": 20176 }, { "epoch": 0.24123913485335788, "grad_norm": 2.0379061698913574, "learning_rate": 8.874978595534161e-06, "loss": 0.6874, "step": 20177 }, { "epoch": 0.24125109099821854, "grad_norm": 3.6792171001434326, "learning_rate": 8.874856233099057e-06, "loss": 0.5609, "step": 20178 }, { "epoch": 0.2412630471430792, "grad_norm": 2.0346198081970215, "learning_rate": 8.874733864853592e-06, "loss": 0.5739, "step": 20179 }, { "epoch": 0.24127500328793983, "grad_norm": 8.083189964294434, "learning_rate": 8.87461149079795e-06, "loss": 0.6238, "step": 20180 }, { "epoch": 0.24128695943280049, "grad_norm": 2.0702338218688965, "learning_rate": 8.874489110932316e-06, "loss": 0.6541, "step": 20181 }, { "epoch": 0.24129891557766114, "grad_norm": 2.273054361343384, "learning_rate": 8.874366725256872e-06, "loss": 0.7252, "step": 20182 }, { "epoch": 0.2413108717225218, "grad_norm": 2.2140166759490967, "learning_rate": 8.874244333771799e-06, "loss": 0.625, "step": 20183 }, { "epoch": 0.24132282786738243, "grad_norm": 1.9289456605911255, "learning_rate": 8.874121936477283e-06, "loss": 0.5624, "step": 20184 }, { "epoch": 0.2413347840122431, "grad_norm": 1.7272766828536987, "learning_rate": 8.873999533373507e-06, "loss": 0.698, "step": 20185 }, { "epoch": 0.24134674015710375, "grad_norm": 3.527921676635742, "learning_rate": 8.873877124460657e-06, "loss": 0.6757, "step": 20186 }, { "epoch": 0.24135869630196438, "grad_norm": 1.491229772567749, "learning_rate": 8.873754709738914e-06, "loss": 0.6536, "step": 20187 }, { "epoch": 0.24137065244682504, "grad_norm": 2.164944648742676, "learning_rate": 8.87363228920846e-06, "loss": 0.5807, "step": 20188 }, { "epoch": 0.2413826085916857, "grad_norm": 2.5834226608276367, "learning_rate": 8.873509862869482e-06, "loss": 0.6762, "step": 20189 }, { "epoch": 0.24139456473654636, "grad_norm": 2.40031361579895, "learning_rate": 8.873387430722164e-06, "loss": 0.6666, "step": 20190 }, { "epoch": 0.241406520881407, "grad_norm": 2.27133846282959, "learning_rate": 8.873264992766685e-06, "loss": 0.6186, "step": 20191 }, { "epoch": 0.24141847702626765, "grad_norm": 1.4924770593643188, "learning_rate": 8.873142549003234e-06, "loss": 0.6446, "step": 20192 }, { "epoch": 0.2414304331711283, "grad_norm": 1.8621013164520264, "learning_rate": 8.87302009943199e-06, "loss": 0.6092, "step": 20193 }, { "epoch": 0.24144238931598896, "grad_norm": 1.9304696321487427, "learning_rate": 8.872897644053137e-06, "loss": 0.6399, "step": 20194 }, { "epoch": 0.2414543454608496, "grad_norm": 5.680962562561035, "learning_rate": 8.872775182866864e-06, "loss": 0.68, "step": 20195 }, { "epoch": 0.24146630160571025, "grad_norm": 1.7122223377227783, "learning_rate": 8.872652715873349e-06, "loss": 0.6886, "step": 20196 }, { "epoch": 0.2414782577505709, "grad_norm": 2.5716168880462646, "learning_rate": 8.872530243072776e-06, "loss": 0.59, "step": 20197 }, { "epoch": 0.24149021389543154, "grad_norm": 2.2806713581085205, "learning_rate": 8.872407764465333e-06, "loss": 0.6758, "step": 20198 }, { "epoch": 0.2415021700402922, "grad_norm": 1.9822862148284912, "learning_rate": 8.872285280051199e-06, "loss": 0.5598, "step": 20199 }, { "epoch": 0.24151412618515286, "grad_norm": 2.649127960205078, "learning_rate": 8.872162789830562e-06, "loss": 0.6572, "step": 20200 }, { "epoch": 0.24152608233001352, "grad_norm": 1.7604639530181885, "learning_rate": 8.872040293803598e-06, "loss": 0.5227, "step": 20201 }, { "epoch": 0.24153803847487415, "grad_norm": 3.4310131072998047, "learning_rate": 8.871917791970502e-06, "loss": 0.5284, "step": 20202 }, { "epoch": 0.2415499946197348, "grad_norm": 1.9863063097000122, "learning_rate": 8.871795284331447e-06, "loss": 0.6348, "step": 20203 }, { "epoch": 0.24156195076459547, "grad_norm": 2.5669562816619873, "learning_rate": 8.871672770886622e-06, "loss": 0.5774, "step": 20204 }, { "epoch": 0.24157390690945613, "grad_norm": 3.0622313022613525, "learning_rate": 8.871550251636212e-06, "loss": 0.7166, "step": 20205 }, { "epoch": 0.24158586305431676, "grad_norm": 12.503388404846191, "learning_rate": 8.871427726580397e-06, "loss": 0.6206, "step": 20206 }, { "epoch": 0.24159781919917742, "grad_norm": 1.6031160354614258, "learning_rate": 8.871305195719363e-06, "loss": 0.6635, "step": 20207 }, { "epoch": 0.24160977534403807, "grad_norm": 1.4886629581451416, "learning_rate": 8.871182659053292e-06, "loss": 0.6317, "step": 20208 }, { "epoch": 0.24162173148889873, "grad_norm": 1.5212088823318481, "learning_rate": 8.871060116582368e-06, "loss": 0.4871, "step": 20209 }, { "epoch": 0.24163368763375936, "grad_norm": 1.96527099609375, "learning_rate": 8.870937568306778e-06, "loss": 0.6606, "step": 20210 }, { "epoch": 0.24164564377862002, "grad_norm": 1.9615520238876343, "learning_rate": 8.870815014226702e-06, "loss": 0.5943, "step": 20211 }, { "epoch": 0.24165759992348068, "grad_norm": 2.177797794342041, "learning_rate": 8.870692454342326e-06, "loss": 0.5616, "step": 20212 }, { "epoch": 0.2416695560683413, "grad_norm": 2.033846855163574, "learning_rate": 8.870569888653833e-06, "loss": 0.5643, "step": 20213 }, { "epoch": 0.24168151221320197, "grad_norm": 4.724534034729004, "learning_rate": 8.870447317161407e-06, "loss": 0.5712, "step": 20214 }, { "epoch": 0.24169346835806263, "grad_norm": 1.6387194395065308, "learning_rate": 8.870324739865229e-06, "loss": 0.5226, "step": 20215 }, { "epoch": 0.2417054245029233, "grad_norm": 1.9292449951171875, "learning_rate": 8.870202156765487e-06, "loss": 0.5313, "step": 20216 }, { "epoch": 0.24171738064778392, "grad_norm": 1.9790513515472412, "learning_rate": 8.870079567862363e-06, "loss": 0.5509, "step": 20217 }, { "epoch": 0.24172933679264458, "grad_norm": 4.375827789306641, "learning_rate": 8.869956973156042e-06, "loss": 0.6369, "step": 20218 }, { "epoch": 0.24174129293750524, "grad_norm": 1.693895697593689, "learning_rate": 8.869834372646705e-06, "loss": 0.5125, "step": 20219 }, { "epoch": 0.2417532490823659, "grad_norm": 2.481765031814575, "learning_rate": 8.869711766334539e-06, "loss": 0.5497, "step": 20220 }, { "epoch": 0.24176520522722653, "grad_norm": 2.2619762420654297, "learning_rate": 8.869589154219726e-06, "loss": 0.5797, "step": 20221 }, { "epoch": 0.24177716137208718, "grad_norm": 2.3983428478240967, "learning_rate": 8.869466536302448e-06, "loss": 0.5949, "step": 20222 }, { "epoch": 0.24178911751694784, "grad_norm": 2.2783563137054443, "learning_rate": 8.869343912582895e-06, "loss": 0.5674, "step": 20223 }, { "epoch": 0.24180107366180847, "grad_norm": 2.9698891639709473, "learning_rate": 8.869221283061244e-06, "loss": 0.562, "step": 20224 }, { "epoch": 0.24181302980666913, "grad_norm": 1.7770975828170776, "learning_rate": 8.869098647737685e-06, "loss": 0.6775, "step": 20225 }, { "epoch": 0.2418249859515298, "grad_norm": 2.557663679122925, "learning_rate": 8.868976006612397e-06, "loss": 0.658, "step": 20226 }, { "epoch": 0.24183694209639045, "grad_norm": 1.5129197835922241, "learning_rate": 8.868853359685565e-06, "loss": 0.654, "step": 20227 }, { "epoch": 0.24184889824125108, "grad_norm": 2.0602481365203857, "learning_rate": 8.868730706957375e-06, "loss": 0.5885, "step": 20228 }, { "epoch": 0.24186085438611174, "grad_norm": 4.4735517501831055, "learning_rate": 8.86860804842801e-06, "loss": 0.6052, "step": 20229 }, { "epoch": 0.2418728105309724, "grad_norm": 1.8981199264526367, "learning_rate": 8.868485384097651e-06, "loss": 0.6184, "step": 20230 }, { "epoch": 0.24188476667583306, "grad_norm": 1.784814476966858, "learning_rate": 8.868362713966486e-06, "loss": 0.5563, "step": 20231 }, { "epoch": 0.2418967228206937, "grad_norm": 1.6531535387039185, "learning_rate": 8.868240038034697e-06, "loss": 0.5634, "step": 20232 }, { "epoch": 0.24190867896555435, "grad_norm": 1.7325940132141113, "learning_rate": 8.868117356302468e-06, "loss": 0.5996, "step": 20233 }, { "epoch": 0.241920635110415, "grad_norm": 2.2398173809051514, "learning_rate": 8.867994668769985e-06, "loss": 0.5677, "step": 20234 }, { "epoch": 0.24193259125527564, "grad_norm": 2.15620756149292, "learning_rate": 8.86787197543743e-06, "loss": 0.6019, "step": 20235 }, { "epoch": 0.2419445474001363, "grad_norm": 3.1563236713409424, "learning_rate": 8.867749276304985e-06, "loss": 0.6533, "step": 20236 }, { "epoch": 0.24195650354499695, "grad_norm": 1.960897445678711, "learning_rate": 8.867626571372838e-06, "loss": 0.6058, "step": 20237 }, { "epoch": 0.2419684596898576, "grad_norm": 7.1351423263549805, "learning_rate": 8.867503860641171e-06, "loss": 0.6256, "step": 20238 }, { "epoch": 0.24198041583471824, "grad_norm": 1.9377192258834839, "learning_rate": 8.867381144110168e-06, "loss": 0.4894, "step": 20239 }, { "epoch": 0.2419923719795789, "grad_norm": 2.0466725826263428, "learning_rate": 8.867258421780013e-06, "loss": 0.6357, "step": 20240 }, { "epoch": 0.24200432812443956, "grad_norm": 2.5224170684814453, "learning_rate": 8.86713569365089e-06, "loss": 0.6207, "step": 20241 }, { "epoch": 0.24201628426930022, "grad_norm": 1.8357040882110596, "learning_rate": 8.867012959722984e-06, "loss": 0.6132, "step": 20242 }, { "epoch": 0.24202824041416085, "grad_norm": 2.2844698429107666, "learning_rate": 8.866890219996478e-06, "loss": 0.6434, "step": 20243 }, { "epoch": 0.2420401965590215, "grad_norm": 4.38107967376709, "learning_rate": 8.866767474471556e-06, "loss": 0.6838, "step": 20244 }, { "epoch": 0.24205215270388217, "grad_norm": 3.670081853866577, "learning_rate": 8.866644723148403e-06, "loss": 0.6297, "step": 20245 }, { "epoch": 0.2420641088487428, "grad_norm": 4.843076705932617, "learning_rate": 8.866521966027203e-06, "loss": 0.6487, "step": 20246 }, { "epoch": 0.24207606499360346, "grad_norm": 3.3122174739837646, "learning_rate": 8.866399203108137e-06, "loss": 0.6184, "step": 20247 }, { "epoch": 0.24208802113846412, "grad_norm": 2.8061318397521973, "learning_rate": 8.866276434391394e-06, "loss": 0.5961, "step": 20248 }, { "epoch": 0.24209997728332477, "grad_norm": 1.8225558996200562, "learning_rate": 8.866153659877156e-06, "loss": 0.6667, "step": 20249 }, { "epoch": 0.2421119334281854, "grad_norm": 8.254830360412598, "learning_rate": 8.866030879565606e-06, "loss": 0.6775, "step": 20250 }, { "epoch": 0.24212388957304606, "grad_norm": 1.8328344821929932, "learning_rate": 8.865908093456929e-06, "loss": 0.5455, "step": 20251 }, { "epoch": 0.24213584571790672, "grad_norm": 2.3238120079040527, "learning_rate": 8.86578530155131e-06, "loss": 0.7175, "step": 20252 }, { "epoch": 0.24214780186276738, "grad_norm": 9.173019409179688, "learning_rate": 8.865662503848931e-06, "loss": 0.5668, "step": 20253 }, { "epoch": 0.242159758007628, "grad_norm": 4.485747814178467, "learning_rate": 8.865539700349978e-06, "loss": 0.5622, "step": 20254 }, { "epoch": 0.24217171415248867, "grad_norm": 2.495431423187256, "learning_rate": 8.865416891054634e-06, "loss": 0.661, "step": 20255 }, { "epoch": 0.24218367029734933, "grad_norm": 11.065165519714355, "learning_rate": 8.865294075963085e-06, "loss": 0.6327, "step": 20256 }, { "epoch": 0.24219562644220996, "grad_norm": 1.914278507232666, "learning_rate": 8.86517125507551e-06, "loss": 0.5874, "step": 20257 }, { "epoch": 0.24220758258707062, "grad_norm": 2.2096309661865234, "learning_rate": 8.8650484283921e-06, "loss": 0.6384, "step": 20258 }, { "epoch": 0.24221953873193128, "grad_norm": 1.8916966915130615, "learning_rate": 8.864925595913036e-06, "loss": 0.5757, "step": 20259 }, { "epoch": 0.24223149487679194, "grad_norm": 1.7314530611038208, "learning_rate": 8.864802757638502e-06, "loss": 0.552, "step": 20260 }, { "epoch": 0.24224345102165257, "grad_norm": 2.065372943878174, "learning_rate": 8.864679913568684e-06, "loss": 0.6087, "step": 20261 }, { "epoch": 0.24225540716651323, "grad_norm": 9.157496452331543, "learning_rate": 8.864557063703763e-06, "loss": 0.6027, "step": 20262 }, { "epoch": 0.24226736331137388, "grad_norm": 1.7930972576141357, "learning_rate": 8.864434208043924e-06, "loss": 0.6007, "step": 20263 }, { "epoch": 0.24227931945623454, "grad_norm": 1.8867757320404053, "learning_rate": 8.864311346589354e-06, "loss": 0.582, "step": 20264 }, { "epoch": 0.24229127560109517, "grad_norm": 2.022962808609009, "learning_rate": 8.864188479340234e-06, "loss": 0.624, "step": 20265 }, { "epoch": 0.24230323174595583, "grad_norm": 2.774996519088745, "learning_rate": 8.864065606296752e-06, "loss": 0.5884, "step": 20266 }, { "epoch": 0.2423151878908165, "grad_norm": 2.001715660095215, "learning_rate": 8.863942727459086e-06, "loss": 0.5277, "step": 20267 }, { "epoch": 0.24232714403567715, "grad_norm": 1.7737150192260742, "learning_rate": 8.863819842827429e-06, "loss": 0.6397, "step": 20268 }, { "epoch": 0.24233910018053778, "grad_norm": 1.9189478158950806, "learning_rate": 8.863696952401957e-06, "loss": 0.6877, "step": 20269 }, { "epoch": 0.24235105632539844, "grad_norm": 3.5331404209136963, "learning_rate": 8.863574056182858e-06, "loss": 0.6426, "step": 20270 }, { "epoch": 0.2423630124702591, "grad_norm": 3.111025333404541, "learning_rate": 8.863451154170317e-06, "loss": 0.5119, "step": 20271 }, { "epoch": 0.24237496861511973, "grad_norm": 2.6406092643737793, "learning_rate": 8.863328246364517e-06, "loss": 0.653, "step": 20272 }, { "epoch": 0.2423869247599804, "grad_norm": 1.7377427816390991, "learning_rate": 8.863205332765644e-06, "loss": 0.5821, "step": 20273 }, { "epoch": 0.24239888090484105, "grad_norm": 1.9136979579925537, "learning_rate": 8.863082413373878e-06, "loss": 0.4927, "step": 20274 }, { "epoch": 0.2424108370497017, "grad_norm": 2.1740124225616455, "learning_rate": 8.862959488189407e-06, "loss": 0.5838, "step": 20275 }, { "epoch": 0.24242279319456234, "grad_norm": 2.1708261966705322, "learning_rate": 8.862836557212416e-06, "loss": 0.6173, "step": 20276 }, { "epoch": 0.242434749339423, "grad_norm": 2.235686779022217, "learning_rate": 8.862713620443087e-06, "loss": 0.5722, "step": 20277 }, { "epoch": 0.24244670548428365, "grad_norm": 2.012413501739502, "learning_rate": 8.862590677881604e-06, "loss": 0.6381, "step": 20278 }, { "epoch": 0.2424586616291443, "grad_norm": 2.36661958694458, "learning_rate": 8.862467729528154e-06, "loss": 0.6428, "step": 20279 }, { "epoch": 0.24247061777400494, "grad_norm": 2.731424331665039, "learning_rate": 8.862344775382918e-06, "loss": 0.678, "step": 20280 }, { "epoch": 0.2424825739188656, "grad_norm": 2.5251762866973877, "learning_rate": 8.862221815446084e-06, "loss": 0.6129, "step": 20281 }, { "epoch": 0.24249453006372626, "grad_norm": 1.553940773010254, "learning_rate": 8.862098849717836e-06, "loss": 0.559, "step": 20282 }, { "epoch": 0.2425064862085869, "grad_norm": 2.0111899375915527, "learning_rate": 8.861975878198355e-06, "loss": 0.639, "step": 20283 }, { "epoch": 0.24251844235344755, "grad_norm": 4.172872066497803, "learning_rate": 8.861852900887828e-06, "loss": 0.5622, "step": 20284 }, { "epoch": 0.2425303984983082, "grad_norm": 1.5515329837799072, "learning_rate": 8.861729917786439e-06, "loss": 0.6181, "step": 20285 }, { "epoch": 0.24254235464316887, "grad_norm": 1.858109474182129, "learning_rate": 8.86160692889437e-06, "loss": 0.4788, "step": 20286 }, { "epoch": 0.2425543107880295, "grad_norm": 2.428441286087036, "learning_rate": 8.861483934211813e-06, "loss": 0.6103, "step": 20287 }, { "epoch": 0.24256626693289016, "grad_norm": 1.791166067123413, "learning_rate": 8.861360933738941e-06, "loss": 0.5841, "step": 20288 }, { "epoch": 0.24257822307775082, "grad_norm": 1.7802103757858276, "learning_rate": 8.861237927475948e-06, "loss": 0.6529, "step": 20289 }, { "epoch": 0.24259017922261147, "grad_norm": 2.0234720706939697, "learning_rate": 8.861114915423015e-06, "loss": 0.6914, "step": 20290 }, { "epoch": 0.2426021353674721, "grad_norm": 2.288165330886841, "learning_rate": 8.860991897580325e-06, "loss": 0.6421, "step": 20291 }, { "epoch": 0.24261409151233276, "grad_norm": 2.1563587188720703, "learning_rate": 8.860868873948066e-06, "loss": 0.5343, "step": 20292 }, { "epoch": 0.24262604765719342, "grad_norm": 1.841835379600525, "learning_rate": 8.860745844526419e-06, "loss": 0.5418, "step": 20293 }, { "epoch": 0.24263800380205405, "grad_norm": 2.905033588409424, "learning_rate": 8.86062280931557e-06, "loss": 0.6015, "step": 20294 }, { "epoch": 0.2426499599469147, "grad_norm": 1.9988797903060913, "learning_rate": 8.860499768315702e-06, "loss": 0.595, "step": 20295 }, { "epoch": 0.24266191609177537, "grad_norm": 2.1401264667510986, "learning_rate": 8.860376721527003e-06, "loss": 0.7349, "step": 20296 }, { "epoch": 0.24267387223663603, "grad_norm": 2.540693759918213, "learning_rate": 8.860253668949654e-06, "loss": 0.5864, "step": 20297 }, { "epoch": 0.24268582838149666, "grad_norm": 2.0241591930389404, "learning_rate": 8.860130610583843e-06, "loss": 0.6518, "step": 20298 }, { "epoch": 0.24269778452635732, "grad_norm": 3.301905632019043, "learning_rate": 8.860007546429749e-06, "loss": 0.6717, "step": 20299 }, { "epoch": 0.24270974067121798, "grad_norm": 5.983955383300781, "learning_rate": 8.859884476487564e-06, "loss": 0.6111, "step": 20300 }, { "epoch": 0.24272169681607864, "grad_norm": 4.590939998626709, "learning_rate": 8.859761400757465e-06, "loss": 0.6877, "step": 20301 }, { "epoch": 0.24273365296093927, "grad_norm": 3.564478635787964, "learning_rate": 8.859638319239643e-06, "loss": 0.6226, "step": 20302 }, { "epoch": 0.24274560910579993, "grad_norm": 2.083941698074341, "learning_rate": 8.859515231934277e-06, "loss": 0.6115, "step": 20303 }, { "epoch": 0.24275756525066058, "grad_norm": 1.8999851942062378, "learning_rate": 8.859392138841556e-06, "loss": 0.5873, "step": 20304 }, { "epoch": 0.24276952139552122, "grad_norm": 5.282862663269043, "learning_rate": 8.859269039961663e-06, "loss": 0.5674, "step": 20305 }, { "epoch": 0.24278147754038187, "grad_norm": 2.073483467102051, "learning_rate": 8.85914593529478e-06, "loss": 0.5625, "step": 20306 }, { "epoch": 0.24279343368524253, "grad_norm": 2.3601739406585693, "learning_rate": 8.859022824841095e-06, "loss": 0.5854, "step": 20307 }, { "epoch": 0.2428053898301032, "grad_norm": 3.922435998916626, "learning_rate": 8.858899708600792e-06, "loss": 0.5768, "step": 20308 }, { "epoch": 0.24281734597496382, "grad_norm": 1.8374035358428955, "learning_rate": 8.858776586574056e-06, "loss": 0.5906, "step": 20309 }, { "epoch": 0.24282930211982448, "grad_norm": 6.441739082336426, "learning_rate": 8.858653458761071e-06, "loss": 0.6975, "step": 20310 }, { "epoch": 0.24284125826468514, "grad_norm": 3.118823289871216, "learning_rate": 8.858530325162019e-06, "loss": 0.6077, "step": 20311 }, { "epoch": 0.2428532144095458, "grad_norm": 3.853670597076416, "learning_rate": 8.858407185777089e-06, "loss": 0.5395, "step": 20312 }, { "epoch": 0.24286517055440643, "grad_norm": 3.6823832988739014, "learning_rate": 8.858284040606462e-06, "loss": 0.6556, "step": 20313 }, { "epoch": 0.2428771266992671, "grad_norm": 2.086421251296997, "learning_rate": 8.858160889650327e-06, "loss": 0.5337, "step": 20314 }, { "epoch": 0.24288908284412775, "grad_norm": 2.0335466861724854, "learning_rate": 8.858037732908864e-06, "loss": 0.6426, "step": 20315 }, { "epoch": 0.24290103898898838, "grad_norm": 2.644788980484009, "learning_rate": 8.85791457038226e-06, "loss": 0.6931, "step": 20316 }, { "epoch": 0.24291299513384904, "grad_norm": 2.8547329902648926, "learning_rate": 8.8577914020707e-06, "loss": 0.6024, "step": 20317 }, { "epoch": 0.2429249512787097, "grad_norm": 4.469514846801758, "learning_rate": 8.857668227974368e-06, "loss": 0.5296, "step": 20318 }, { "epoch": 0.24293690742357035, "grad_norm": 1.8564724922180176, "learning_rate": 8.857545048093447e-06, "loss": 0.6264, "step": 20319 }, { "epoch": 0.24294886356843098, "grad_norm": 2.2634615898132324, "learning_rate": 8.857421862428127e-06, "loss": 0.6272, "step": 20320 }, { "epoch": 0.24296081971329164, "grad_norm": 2.9137299060821533, "learning_rate": 8.857298670978586e-06, "loss": 0.6851, "step": 20321 }, { "epoch": 0.2429727758581523, "grad_norm": 6.456547737121582, "learning_rate": 8.857175473745013e-06, "loss": 0.5727, "step": 20322 }, { "epoch": 0.24298473200301296, "grad_norm": 1.5410990715026855, "learning_rate": 8.857052270727592e-06, "loss": 0.544, "step": 20323 }, { "epoch": 0.2429966881478736, "grad_norm": 2.334597587585449, "learning_rate": 8.856929061926507e-06, "loss": 0.6505, "step": 20324 }, { "epoch": 0.24300864429273425, "grad_norm": 2.492906093597412, "learning_rate": 8.856805847341943e-06, "loss": 0.5988, "step": 20325 }, { "epoch": 0.2430206004375949, "grad_norm": 2.170212745666504, "learning_rate": 8.856682626974086e-06, "loss": 0.5569, "step": 20326 }, { "epoch": 0.24303255658245557, "grad_norm": 2.4526560306549072, "learning_rate": 8.85655940082312e-06, "loss": 0.6124, "step": 20327 }, { "epoch": 0.2430445127273162, "grad_norm": 3.0143074989318848, "learning_rate": 8.856436168889226e-06, "loss": 0.6522, "step": 20328 }, { "epoch": 0.24305646887217686, "grad_norm": 2.5879967212677, "learning_rate": 8.856312931172595e-06, "loss": 0.7536, "step": 20329 }, { "epoch": 0.24306842501703751, "grad_norm": 4.611795425415039, "learning_rate": 8.85618968767341e-06, "loss": 0.5168, "step": 20330 }, { "epoch": 0.24308038116189815, "grad_norm": 3.2960448265075684, "learning_rate": 8.856066438391853e-06, "loss": 0.6538, "step": 20331 }, { "epoch": 0.2430923373067588, "grad_norm": 2.0402910709381104, "learning_rate": 8.855943183328112e-06, "loss": 0.6948, "step": 20332 }, { "epoch": 0.24310429345161946, "grad_norm": 1.9739848375320435, "learning_rate": 8.85581992248237e-06, "loss": 0.6194, "step": 20333 }, { "epoch": 0.24311624959648012, "grad_norm": 3.3132526874542236, "learning_rate": 8.85569665585481e-06, "loss": 0.6217, "step": 20334 }, { "epoch": 0.24312820574134075, "grad_norm": 3.911745548248291, "learning_rate": 8.855573383445623e-06, "loss": 0.6881, "step": 20335 }, { "epoch": 0.2431401618862014, "grad_norm": 1.5497840642929077, "learning_rate": 8.855450105254987e-06, "loss": 0.5639, "step": 20336 }, { "epoch": 0.24315211803106207, "grad_norm": 2.212273597717285, "learning_rate": 8.855326821283092e-06, "loss": 0.6511, "step": 20337 }, { "epoch": 0.24316407417592273, "grad_norm": 1.8987840414047241, "learning_rate": 8.85520353153012e-06, "loss": 0.607, "step": 20338 }, { "epoch": 0.24317603032078336, "grad_norm": 1.7513248920440674, "learning_rate": 8.855080235996257e-06, "loss": 0.6237, "step": 20339 }, { "epoch": 0.24318798646564402, "grad_norm": 1.9216872453689575, "learning_rate": 8.854956934681686e-06, "loss": 0.6138, "step": 20340 }, { "epoch": 0.24319994261050468, "grad_norm": 13.523260116577148, "learning_rate": 8.854833627586594e-06, "loss": 0.5226, "step": 20341 }, { "epoch": 0.2432118987553653, "grad_norm": 2.300784111022949, "learning_rate": 8.854710314711166e-06, "loss": 0.5612, "step": 20342 }, { "epoch": 0.24322385490022597, "grad_norm": 5.509636878967285, "learning_rate": 8.854586996055585e-06, "loss": 0.6352, "step": 20343 }, { "epoch": 0.24323581104508663, "grad_norm": 2.971200466156006, "learning_rate": 8.854463671620039e-06, "loss": 0.61, "step": 20344 }, { "epoch": 0.24324776718994728, "grad_norm": 1.7546802759170532, "learning_rate": 8.85434034140471e-06, "loss": 0.5291, "step": 20345 }, { "epoch": 0.24325972333480791, "grad_norm": 2.6528453826904297, "learning_rate": 8.854217005409785e-06, "loss": 0.6424, "step": 20346 }, { "epoch": 0.24327167947966857, "grad_norm": 2.9659335613250732, "learning_rate": 8.854093663635445e-06, "loss": 0.6382, "step": 20347 }, { "epoch": 0.24328363562452923, "grad_norm": 3.551884412765503, "learning_rate": 8.853970316081879e-06, "loss": 0.6151, "step": 20348 }, { "epoch": 0.2432955917693899, "grad_norm": 1.9969837665557861, "learning_rate": 8.853846962749272e-06, "loss": 0.6037, "step": 20349 }, { "epoch": 0.24330754791425052, "grad_norm": 3.2583487033843994, "learning_rate": 8.853723603637807e-06, "loss": 0.5025, "step": 20350 }, { "epoch": 0.24331950405911118, "grad_norm": 2.1723718643188477, "learning_rate": 8.85360023874767e-06, "loss": 0.5576, "step": 20351 }, { "epoch": 0.24333146020397184, "grad_norm": 2.2958524227142334, "learning_rate": 8.853476868079045e-06, "loss": 0.6099, "step": 20352 }, { "epoch": 0.24334341634883247, "grad_norm": 2.596259117126465, "learning_rate": 8.85335349163212e-06, "loss": 0.6463, "step": 20353 }, { "epoch": 0.24335537249369313, "grad_norm": 2.715038537979126, "learning_rate": 8.853230109407076e-06, "loss": 0.6252, "step": 20354 }, { "epoch": 0.2433673286385538, "grad_norm": 4.574159622192383, "learning_rate": 8.853106721404098e-06, "loss": 0.5578, "step": 20355 }, { "epoch": 0.24337928478341445, "grad_norm": 1.994061827659607, "learning_rate": 8.852983327623377e-06, "loss": 0.6442, "step": 20356 }, { "epoch": 0.24339124092827508, "grad_norm": 2.2066798210144043, "learning_rate": 8.85285992806509e-06, "loss": 0.6309, "step": 20357 }, { "epoch": 0.24340319707313574, "grad_norm": 3.1092569828033447, "learning_rate": 8.852736522729428e-06, "loss": 0.6491, "step": 20358 }, { "epoch": 0.2434151532179964, "grad_norm": 2.800990104675293, "learning_rate": 8.852613111616572e-06, "loss": 0.5944, "step": 20359 }, { "epoch": 0.24342710936285705, "grad_norm": 2.8301961421966553, "learning_rate": 8.852489694726712e-06, "loss": 0.6243, "step": 20360 }, { "epoch": 0.24343906550771768, "grad_norm": 3.993149995803833, "learning_rate": 8.852366272060027e-06, "loss": 0.674, "step": 20361 }, { "epoch": 0.24345102165257834, "grad_norm": 2.278853178024292, "learning_rate": 8.852242843616707e-06, "loss": 0.5877, "step": 20362 }, { "epoch": 0.243462977797439, "grad_norm": 1.9000325202941895, "learning_rate": 8.852119409396934e-06, "loss": 0.5577, "step": 20363 }, { "epoch": 0.24347493394229963, "grad_norm": 2.085869073867798, "learning_rate": 8.851995969400896e-06, "loss": 0.5319, "step": 20364 }, { "epoch": 0.2434868900871603, "grad_norm": 2.690744161605835, "learning_rate": 8.851872523628774e-06, "loss": 0.5823, "step": 20365 }, { "epoch": 0.24349884623202095, "grad_norm": 3.3283183574676514, "learning_rate": 8.851749072080757e-06, "loss": 0.6051, "step": 20366 }, { "epoch": 0.2435108023768816, "grad_norm": 2.0245440006256104, "learning_rate": 8.851625614757026e-06, "loss": 0.6338, "step": 20367 }, { "epoch": 0.24352275852174224, "grad_norm": 2.2390365600585938, "learning_rate": 8.851502151657772e-06, "loss": 0.5081, "step": 20368 }, { "epoch": 0.2435347146666029, "grad_norm": 2.059654951095581, "learning_rate": 8.851378682783177e-06, "loss": 0.5032, "step": 20369 }, { "epoch": 0.24354667081146356, "grad_norm": 1.8436049222946167, "learning_rate": 8.851255208133422e-06, "loss": 0.5392, "step": 20370 }, { "epoch": 0.24355862695632421, "grad_norm": 3.517543315887451, "learning_rate": 8.8511317277087e-06, "loss": 0.6557, "step": 20371 }, { "epoch": 0.24357058310118485, "grad_norm": 1.932098627090454, "learning_rate": 8.85100824150919e-06, "loss": 0.5855, "step": 20372 }, { "epoch": 0.2435825392460455, "grad_norm": 2.636986494064331, "learning_rate": 8.85088474953508e-06, "loss": 0.5909, "step": 20373 }, { "epoch": 0.24359449539090616, "grad_norm": 5.984766483306885, "learning_rate": 8.850761251786558e-06, "loss": 0.6322, "step": 20374 }, { "epoch": 0.2436064515357668, "grad_norm": 1.8522224426269531, "learning_rate": 8.850637748263802e-06, "loss": 0.6278, "step": 20375 }, { "epoch": 0.24361840768062745, "grad_norm": 2.990265369415283, "learning_rate": 8.850514238967001e-06, "loss": 0.6132, "step": 20376 }, { "epoch": 0.2436303638254881, "grad_norm": 4.308039665222168, "learning_rate": 8.85039072389634e-06, "loss": 0.5815, "step": 20377 }, { "epoch": 0.24364231997034877, "grad_norm": 3.3601458072662354, "learning_rate": 8.850267203052007e-06, "loss": 0.6168, "step": 20378 }, { "epoch": 0.2436542761152094, "grad_norm": 1.8961173295974731, "learning_rate": 8.850143676434184e-06, "loss": 0.585, "step": 20379 }, { "epoch": 0.24366623226007006, "grad_norm": 1.5280216932296753, "learning_rate": 8.850020144043055e-06, "loss": 0.5969, "step": 20380 }, { "epoch": 0.24367818840493072, "grad_norm": 2.083226442337036, "learning_rate": 8.849896605878809e-06, "loss": 0.5648, "step": 20381 }, { "epoch": 0.24369014454979138, "grad_norm": 2.566185712814331, "learning_rate": 8.849773061941628e-06, "loss": 0.7378, "step": 20382 }, { "epoch": 0.243702100694652, "grad_norm": 2.0360193252563477, "learning_rate": 8.849649512231699e-06, "loss": 0.4761, "step": 20383 }, { "epoch": 0.24371405683951267, "grad_norm": 6.566355228424072, "learning_rate": 8.849525956749207e-06, "loss": 0.6859, "step": 20384 }, { "epoch": 0.24372601298437332, "grad_norm": 2.5619492530822754, "learning_rate": 8.849402395494338e-06, "loss": 0.6016, "step": 20385 }, { "epoch": 0.24373796912923398, "grad_norm": 1.9265291690826416, "learning_rate": 8.849278828467274e-06, "loss": 0.5785, "step": 20386 }, { "epoch": 0.24374992527409461, "grad_norm": 1.9531846046447754, "learning_rate": 8.849155255668205e-06, "loss": 0.6112, "step": 20387 }, { "epoch": 0.24376188141895527, "grad_norm": 8.125650405883789, "learning_rate": 8.849031677097312e-06, "loss": 0.6383, "step": 20388 }, { "epoch": 0.24377383756381593, "grad_norm": 1.9215213060379028, "learning_rate": 8.848908092754785e-06, "loss": 0.5319, "step": 20389 }, { "epoch": 0.24378579370867656, "grad_norm": 2.1734726428985596, "learning_rate": 8.848784502640806e-06, "loss": 0.6664, "step": 20390 }, { "epoch": 0.24379774985353722, "grad_norm": 1.6764863729476929, "learning_rate": 8.84866090675556e-06, "loss": 0.5678, "step": 20391 }, { "epoch": 0.24380970599839788, "grad_norm": 4.634640216827393, "learning_rate": 8.848537305099233e-06, "loss": 0.6341, "step": 20392 }, { "epoch": 0.24382166214325854, "grad_norm": 2.2948029041290283, "learning_rate": 8.848413697672011e-06, "loss": 0.6069, "step": 20393 }, { "epoch": 0.24383361828811917, "grad_norm": 2.3096706867218018, "learning_rate": 8.848290084474078e-06, "loss": 0.6617, "step": 20394 }, { "epoch": 0.24384557443297983, "grad_norm": 2.4206979274749756, "learning_rate": 8.848166465505623e-06, "loss": 0.6135, "step": 20395 }, { "epoch": 0.2438575305778405, "grad_norm": 1.823976993560791, "learning_rate": 8.848042840766827e-06, "loss": 0.5531, "step": 20396 }, { "epoch": 0.24386948672270115, "grad_norm": 3.8982415199279785, "learning_rate": 8.847919210257876e-06, "loss": 0.6617, "step": 20397 }, { "epoch": 0.24388144286756178, "grad_norm": 2.821589708328247, "learning_rate": 8.84779557397896e-06, "loss": 0.702, "step": 20398 }, { "epoch": 0.24389339901242243, "grad_norm": 2.5935187339782715, "learning_rate": 8.847671931930257e-06, "loss": 0.6889, "step": 20399 }, { "epoch": 0.2439053551572831, "grad_norm": 7.4359564781188965, "learning_rate": 8.847548284111958e-06, "loss": 0.6563, "step": 20400 }, { "epoch": 0.24391731130214372, "grad_norm": 2.535703420639038, "learning_rate": 8.847424630524246e-06, "loss": 0.5923, "step": 20401 }, { "epoch": 0.24392926744700438, "grad_norm": 3.779242992401123, "learning_rate": 8.847300971167307e-06, "loss": 0.658, "step": 20402 }, { "epoch": 0.24394122359186504, "grad_norm": 2.1306984424591064, "learning_rate": 8.847177306041328e-06, "loss": 0.5859, "step": 20403 }, { "epoch": 0.2439531797367257, "grad_norm": 3.0620005130767822, "learning_rate": 8.847053635146492e-06, "loss": 0.6354, "step": 20404 }, { "epoch": 0.24396513588158633, "grad_norm": 5.5111918449401855, "learning_rate": 8.846929958482985e-06, "loss": 0.5729, "step": 20405 }, { "epoch": 0.243977092026447, "grad_norm": 2.7611606121063232, "learning_rate": 8.846806276050995e-06, "loss": 0.6696, "step": 20406 }, { "epoch": 0.24398904817130765, "grad_norm": 2.7217135429382324, "learning_rate": 8.846682587850701e-06, "loss": 0.6146, "step": 20407 }, { "epoch": 0.2440010043161683, "grad_norm": 5.2867865562438965, "learning_rate": 8.846558893882297e-06, "loss": 0.5757, "step": 20408 }, { "epoch": 0.24401296046102894, "grad_norm": 4.151219367980957, "learning_rate": 8.846435194145963e-06, "loss": 0.6247, "step": 20409 }, { "epoch": 0.2440249166058896, "grad_norm": 1.8750494718551636, "learning_rate": 8.846311488641886e-06, "loss": 0.6599, "step": 20410 }, { "epoch": 0.24403687275075026, "grad_norm": 4.53175163269043, "learning_rate": 8.84618777737025e-06, "loss": 0.6434, "step": 20411 }, { "epoch": 0.2440488288956109, "grad_norm": 4.675606727600098, "learning_rate": 8.846064060331243e-06, "loss": 0.5682, "step": 20412 }, { "epoch": 0.24406078504047155, "grad_norm": 2.2474277019500732, "learning_rate": 8.845940337525047e-06, "loss": 0.4859, "step": 20413 }, { "epoch": 0.2440727411853322, "grad_norm": 2.3516438007354736, "learning_rate": 8.845816608951852e-06, "loss": 0.6747, "step": 20414 }, { "epoch": 0.24408469733019286, "grad_norm": 4.8702073097229, "learning_rate": 8.84569287461184e-06, "loss": 0.562, "step": 20415 }, { "epoch": 0.2440966534750535, "grad_norm": 1.6575103998184204, "learning_rate": 8.845569134505199e-06, "loss": 0.7522, "step": 20416 }, { "epoch": 0.24410860961991415, "grad_norm": 2.375109910964966, "learning_rate": 8.845445388632113e-06, "loss": 0.5827, "step": 20417 }, { "epoch": 0.2441205657647748, "grad_norm": 3.9496593475341797, "learning_rate": 8.845321636992768e-06, "loss": 0.5779, "step": 20418 }, { "epoch": 0.24413252190963547, "grad_norm": 3.9415159225463867, "learning_rate": 8.84519787958735e-06, "loss": 0.6139, "step": 20419 }, { "epoch": 0.2441444780544961, "grad_norm": 2.065424919128418, "learning_rate": 8.845074116416044e-06, "loss": 0.624, "step": 20420 }, { "epoch": 0.24415643419935676, "grad_norm": 4.144406795501709, "learning_rate": 8.844950347479035e-06, "loss": 0.5615, "step": 20421 }, { "epoch": 0.24416839034421742, "grad_norm": 2.149106979370117, "learning_rate": 8.84482657277651e-06, "loss": 0.6749, "step": 20422 }, { "epoch": 0.24418034648907805, "grad_norm": 2.831582546234131, "learning_rate": 8.844702792308653e-06, "loss": 0.4876, "step": 20423 }, { "epoch": 0.2441923026339387, "grad_norm": 2.3646295070648193, "learning_rate": 8.844579006075651e-06, "loss": 0.6397, "step": 20424 }, { "epoch": 0.24420425877879937, "grad_norm": 1.476921558380127, "learning_rate": 8.84445521407769e-06, "loss": 0.5851, "step": 20425 }, { "epoch": 0.24421621492366002, "grad_norm": 2.574747085571289, "learning_rate": 8.844331416314955e-06, "loss": 0.7067, "step": 20426 }, { "epoch": 0.24422817106852066, "grad_norm": 2.420426845550537, "learning_rate": 8.84420761278763e-06, "loss": 0.535, "step": 20427 }, { "epoch": 0.24424012721338131, "grad_norm": 2.8188705444335938, "learning_rate": 8.844083803495902e-06, "loss": 0.584, "step": 20428 }, { "epoch": 0.24425208335824197, "grad_norm": 3.1551105976104736, "learning_rate": 8.843959988439956e-06, "loss": 0.6223, "step": 20429 }, { "epoch": 0.24426403950310263, "grad_norm": 2.8471105098724365, "learning_rate": 8.843836167619979e-06, "loss": 0.5629, "step": 20430 }, { "epoch": 0.24427599564796326, "grad_norm": 2.4494190216064453, "learning_rate": 8.843712341036157e-06, "loss": 0.72, "step": 20431 }, { "epoch": 0.24428795179282392, "grad_norm": 7.52224063873291, "learning_rate": 8.843588508688674e-06, "loss": 0.6803, "step": 20432 }, { "epoch": 0.24429990793768458, "grad_norm": 2.426906108856201, "learning_rate": 8.843464670577716e-06, "loss": 0.6003, "step": 20433 }, { "epoch": 0.2443118640825452, "grad_norm": 2.224405527114868, "learning_rate": 8.84334082670347e-06, "loss": 0.5642, "step": 20434 }, { "epoch": 0.24432382022740587, "grad_norm": 5.998167514801025, "learning_rate": 8.84321697706612e-06, "loss": 0.5943, "step": 20435 }, { "epoch": 0.24433577637226653, "grad_norm": 3.012800931930542, "learning_rate": 8.843093121665853e-06, "loss": 0.578, "step": 20436 }, { "epoch": 0.2443477325171272, "grad_norm": 6.236869812011719, "learning_rate": 8.842969260502854e-06, "loss": 0.5791, "step": 20437 }, { "epoch": 0.24435968866198782, "grad_norm": 2.0310139656066895, "learning_rate": 8.842845393577309e-06, "loss": 0.6869, "step": 20438 }, { "epoch": 0.24437164480684848, "grad_norm": 5.5277204513549805, "learning_rate": 8.842721520889404e-06, "loss": 0.4819, "step": 20439 }, { "epoch": 0.24438360095170913, "grad_norm": 3.176771640777588, "learning_rate": 8.842597642439324e-06, "loss": 0.5665, "step": 20440 }, { "epoch": 0.2443955570965698, "grad_norm": 11.480912208557129, "learning_rate": 8.842473758227255e-06, "loss": 0.6665, "step": 20441 }, { "epoch": 0.24440751324143042, "grad_norm": 2.2364587783813477, "learning_rate": 8.842349868253383e-06, "loss": 0.7377, "step": 20442 }, { "epoch": 0.24441946938629108, "grad_norm": 2.540750741958618, "learning_rate": 8.842225972517893e-06, "loss": 0.6467, "step": 20443 }, { "epoch": 0.24443142553115174, "grad_norm": 3.6039795875549316, "learning_rate": 8.842102071020973e-06, "loss": 0.5395, "step": 20444 }, { "epoch": 0.2444433816760124, "grad_norm": 2.237321376800537, "learning_rate": 8.841978163762805e-06, "loss": 0.5462, "step": 20445 }, { "epoch": 0.24445533782087303, "grad_norm": 2.6642887592315674, "learning_rate": 8.84185425074358e-06, "loss": 0.5753, "step": 20446 }, { "epoch": 0.2444672939657337, "grad_norm": 2.082700490951538, "learning_rate": 8.84173033196348e-06, "loss": 0.6047, "step": 20447 }, { "epoch": 0.24447925011059435, "grad_norm": 3.017683982849121, "learning_rate": 8.84160640742269e-06, "loss": 0.6172, "step": 20448 }, { "epoch": 0.24449120625545498, "grad_norm": 3.4288082122802734, "learning_rate": 8.8414824771214e-06, "loss": 0.6471, "step": 20449 }, { "epoch": 0.24450316240031564, "grad_norm": 2.9065239429473877, "learning_rate": 8.841358541059791e-06, "loss": 0.5866, "step": 20450 }, { "epoch": 0.2445151185451763, "grad_norm": 2.2640490531921387, "learning_rate": 8.841234599238054e-06, "loss": 0.6371, "step": 20451 }, { "epoch": 0.24452707469003696, "grad_norm": 2.201817035675049, "learning_rate": 8.84111065165637e-06, "loss": 0.6129, "step": 20452 }, { "epoch": 0.24453903083489759, "grad_norm": 3.525212049484253, "learning_rate": 8.840986698314928e-06, "loss": 0.589, "step": 20453 }, { "epoch": 0.24455098697975824, "grad_norm": 1.9378808736801147, "learning_rate": 8.840862739213911e-06, "loss": 0.5907, "step": 20454 }, { "epoch": 0.2445629431246189, "grad_norm": 3.0238888263702393, "learning_rate": 8.840738774353508e-06, "loss": 0.51, "step": 20455 }, { "epoch": 0.24457489926947956, "grad_norm": 3.428025007247925, "learning_rate": 8.840614803733902e-06, "loss": 0.6365, "step": 20456 }, { "epoch": 0.2445868554143402, "grad_norm": 3.3083529472351074, "learning_rate": 8.840490827355283e-06, "loss": 0.6262, "step": 20457 }, { "epoch": 0.24459881155920085, "grad_norm": 2.344677686691284, "learning_rate": 8.840366845217833e-06, "loss": 0.6647, "step": 20458 }, { "epoch": 0.2446107677040615, "grad_norm": 2.0709264278411865, "learning_rate": 8.84024285732174e-06, "loss": 0.6558, "step": 20459 }, { "epoch": 0.24462272384892214, "grad_norm": 3.3411123752593994, "learning_rate": 8.840118863667188e-06, "loss": 0.5771, "step": 20460 }, { "epoch": 0.2446346799937828, "grad_norm": 5.20996618270874, "learning_rate": 8.839994864254364e-06, "loss": 0.6529, "step": 20461 }, { "epoch": 0.24464663613864346, "grad_norm": 11.01443862915039, "learning_rate": 8.839870859083456e-06, "loss": 0.6815, "step": 20462 }, { "epoch": 0.24465859228350412, "grad_norm": 12.034637451171875, "learning_rate": 8.839746848154646e-06, "loss": 0.591, "step": 20463 }, { "epoch": 0.24467054842836475, "grad_norm": 5.422429084777832, "learning_rate": 8.839622831468123e-06, "loss": 0.5446, "step": 20464 }, { "epoch": 0.2446825045732254, "grad_norm": 2.1730105876922607, "learning_rate": 8.839498809024072e-06, "loss": 0.6392, "step": 20465 }, { "epoch": 0.24469446071808607, "grad_norm": 2.1843113899230957, "learning_rate": 8.839374780822678e-06, "loss": 0.6164, "step": 20466 }, { "epoch": 0.24470641686294672, "grad_norm": 5.115034580230713, "learning_rate": 8.83925074686413e-06, "loss": 0.5767, "step": 20467 }, { "epoch": 0.24471837300780735, "grad_norm": 1.8227556943893433, "learning_rate": 8.839126707148608e-06, "loss": 0.6362, "step": 20468 }, { "epoch": 0.244730329152668, "grad_norm": 2.05741286277771, "learning_rate": 8.839002661676306e-06, "loss": 0.68, "step": 20469 }, { "epoch": 0.24474228529752867, "grad_norm": 1.9604551792144775, "learning_rate": 8.838878610447403e-06, "loss": 0.5122, "step": 20470 }, { "epoch": 0.2447542414423893, "grad_norm": 2.7809410095214844, "learning_rate": 8.838754553462088e-06, "loss": 0.6775, "step": 20471 }, { "epoch": 0.24476619758724996, "grad_norm": 1.995633602142334, "learning_rate": 8.83863049072055e-06, "loss": 0.6171, "step": 20472 }, { "epoch": 0.24477815373211062, "grad_norm": 4.846149921417236, "learning_rate": 8.83850642222297e-06, "loss": 0.5441, "step": 20473 }, { "epoch": 0.24479010987697128, "grad_norm": 1.9368187189102173, "learning_rate": 8.838382347969535e-06, "loss": 0.5458, "step": 20474 }, { "epoch": 0.2448020660218319, "grad_norm": 3.7019660472869873, "learning_rate": 8.838258267960433e-06, "loss": 0.6429, "step": 20475 }, { "epoch": 0.24481402216669257, "grad_norm": 1.9335579872131348, "learning_rate": 8.838134182195847e-06, "loss": 0.6484, "step": 20476 }, { "epoch": 0.24482597831155323, "grad_norm": 5.421595096588135, "learning_rate": 8.838010090675968e-06, "loss": 0.5848, "step": 20477 }, { "epoch": 0.24483793445641389, "grad_norm": 40.23942947387695, "learning_rate": 8.837885993400979e-06, "loss": 0.5295, "step": 20478 }, { "epoch": 0.24484989060127452, "grad_norm": 1.904418706893921, "learning_rate": 8.837761890371065e-06, "loss": 0.5805, "step": 20479 }, { "epoch": 0.24486184674613518, "grad_norm": 3.03364896774292, "learning_rate": 8.837637781586414e-06, "loss": 0.6721, "step": 20480 }, { "epoch": 0.24487380289099583, "grad_norm": 1.608662486076355, "learning_rate": 8.837513667047211e-06, "loss": 0.5527, "step": 20481 }, { "epoch": 0.24488575903585647, "grad_norm": 2.147158622741699, "learning_rate": 8.837389546753643e-06, "loss": 0.6388, "step": 20482 }, { "epoch": 0.24489771518071712, "grad_norm": 2.433722734451294, "learning_rate": 8.837265420705897e-06, "loss": 0.6595, "step": 20483 }, { "epoch": 0.24490967132557778, "grad_norm": 2.3649322986602783, "learning_rate": 8.837141288904155e-06, "loss": 0.6747, "step": 20484 }, { "epoch": 0.24492162747043844, "grad_norm": 1.872081995010376, "learning_rate": 8.837017151348608e-06, "loss": 0.5974, "step": 20485 }, { "epoch": 0.24493358361529907, "grad_norm": 4.533174514770508, "learning_rate": 8.83689300803944e-06, "loss": 0.6415, "step": 20486 }, { "epoch": 0.24494553976015973, "grad_norm": 4.230942249298096, "learning_rate": 8.836768858976836e-06, "loss": 0.5691, "step": 20487 }, { "epoch": 0.2449574959050204, "grad_norm": 2.326183319091797, "learning_rate": 8.836644704160986e-06, "loss": 0.694, "step": 20488 }, { "epoch": 0.24496945204988105, "grad_norm": 1.8616281747817993, "learning_rate": 8.836520543592071e-06, "loss": 0.5433, "step": 20489 }, { "epoch": 0.24498140819474168, "grad_norm": 3.928701639175415, "learning_rate": 8.836396377270282e-06, "loss": 0.6505, "step": 20490 }, { "epoch": 0.24499336433960234, "grad_norm": 3.167419195175171, "learning_rate": 8.836272205195802e-06, "loss": 0.5752, "step": 20491 }, { "epoch": 0.245005320484463, "grad_norm": 1.8605197668075562, "learning_rate": 8.836148027368817e-06, "loss": 0.5742, "step": 20492 }, { "epoch": 0.24501727662932363, "grad_norm": 2.3384768962860107, "learning_rate": 8.836023843789517e-06, "loss": 0.5915, "step": 20493 }, { "epoch": 0.24502923277418429, "grad_norm": 2.99820876121521, "learning_rate": 8.835899654458084e-06, "loss": 0.5774, "step": 20494 }, { "epoch": 0.24504118891904494, "grad_norm": 1.6994189023971558, "learning_rate": 8.835775459374705e-06, "loss": 0.6198, "step": 20495 }, { "epoch": 0.2450531450639056, "grad_norm": 3.7935802936553955, "learning_rate": 8.835651258539568e-06, "loss": 0.6221, "step": 20496 }, { "epoch": 0.24506510120876623, "grad_norm": 1.7955939769744873, "learning_rate": 8.83552705195286e-06, "loss": 0.5709, "step": 20497 }, { "epoch": 0.2450770573536269, "grad_norm": 3.1300244331359863, "learning_rate": 8.835402839614762e-06, "loss": 0.5557, "step": 20498 }, { "epoch": 0.24508901349848755, "grad_norm": 3.0396738052368164, "learning_rate": 8.835278621525466e-06, "loss": 0.6499, "step": 20499 }, { "epoch": 0.2451009696433482, "grad_norm": 5.574234485626221, "learning_rate": 8.835154397685156e-06, "loss": 0.6198, "step": 20500 }, { "epoch": 0.24511292578820884, "grad_norm": 2.2496469020843506, "learning_rate": 8.835030168094018e-06, "loss": 0.6438, "step": 20501 }, { "epoch": 0.2451248819330695, "grad_norm": 2.152050733566284, "learning_rate": 8.834905932752239e-06, "loss": 0.6869, "step": 20502 }, { "epoch": 0.24513683807793016, "grad_norm": 4.568933010101318, "learning_rate": 8.834781691660003e-06, "loss": 0.7159, "step": 20503 }, { "epoch": 0.24514879422279082, "grad_norm": 1.9371287822723389, "learning_rate": 8.8346574448175e-06, "loss": 0.6609, "step": 20504 }, { "epoch": 0.24516075036765145, "grad_norm": 1.822430968284607, "learning_rate": 8.834533192224915e-06, "loss": 0.5828, "step": 20505 }, { "epoch": 0.2451727065125121, "grad_norm": 1.783712387084961, "learning_rate": 8.834408933882435e-06, "loss": 0.6183, "step": 20506 }, { "epoch": 0.24518466265737276, "grad_norm": 2.254077672958374, "learning_rate": 8.834284669790243e-06, "loss": 0.5949, "step": 20507 }, { "epoch": 0.2451966188022334, "grad_norm": 4.418447017669678, "learning_rate": 8.834160399948529e-06, "loss": 0.6564, "step": 20508 }, { "epoch": 0.24520857494709405, "grad_norm": 9.12903118133545, "learning_rate": 8.834036124357475e-06, "loss": 0.5815, "step": 20509 }, { "epoch": 0.2452205310919547, "grad_norm": 2.3738296031951904, "learning_rate": 8.833911843017273e-06, "loss": 0.5857, "step": 20510 }, { "epoch": 0.24523248723681537, "grad_norm": 4.594127655029297, "learning_rate": 8.833787555928106e-06, "loss": 0.6515, "step": 20511 }, { "epoch": 0.245244443381676, "grad_norm": 2.0604031085968018, "learning_rate": 8.83366326309016e-06, "loss": 0.5939, "step": 20512 }, { "epoch": 0.24525639952653666, "grad_norm": 2.902404308319092, "learning_rate": 8.833538964503622e-06, "loss": 0.644, "step": 20513 }, { "epoch": 0.24526835567139732, "grad_norm": 2.478604555130005, "learning_rate": 8.83341466016868e-06, "loss": 0.6775, "step": 20514 }, { "epoch": 0.24528031181625798, "grad_norm": 6.859928131103516, "learning_rate": 8.833290350085519e-06, "loss": 0.6961, "step": 20515 }, { "epoch": 0.2452922679611186, "grad_norm": 2.4306955337524414, "learning_rate": 8.833166034254325e-06, "loss": 0.6349, "step": 20516 }, { "epoch": 0.24530422410597927, "grad_norm": 4.414379596710205, "learning_rate": 8.833041712675286e-06, "loss": 0.5062, "step": 20517 }, { "epoch": 0.24531618025083993, "grad_norm": 1.608432412147522, "learning_rate": 8.832917385348586e-06, "loss": 0.56, "step": 20518 }, { "epoch": 0.24532813639570056, "grad_norm": 21.068042755126953, "learning_rate": 8.832793052274412e-06, "loss": 0.5522, "step": 20519 }, { "epoch": 0.24534009254056122, "grad_norm": 2.835151433944702, "learning_rate": 8.832668713452951e-06, "loss": 0.6267, "step": 20520 }, { "epoch": 0.24535204868542188, "grad_norm": 2.580897569656372, "learning_rate": 8.832544368884393e-06, "loss": 0.5989, "step": 20521 }, { "epoch": 0.24536400483028253, "grad_norm": 7.102370262145996, "learning_rate": 8.832420018568919e-06, "loss": 0.5655, "step": 20522 }, { "epoch": 0.24537596097514316, "grad_norm": 4.762060165405273, "learning_rate": 8.832295662506716e-06, "loss": 0.5539, "step": 20523 }, { "epoch": 0.24538791712000382, "grad_norm": 1.6375850439071655, "learning_rate": 8.832171300697974e-06, "loss": 0.6608, "step": 20524 }, { "epoch": 0.24539987326486448, "grad_norm": 2.1764116287231445, "learning_rate": 8.832046933142878e-06, "loss": 0.6562, "step": 20525 }, { "epoch": 0.24541182940972514, "grad_norm": 1.5408962965011597, "learning_rate": 8.831922559841613e-06, "loss": 0.5342, "step": 20526 }, { "epoch": 0.24542378555458577, "grad_norm": 2.3199992179870605, "learning_rate": 8.831798180794367e-06, "loss": 0.5798, "step": 20527 }, { "epoch": 0.24543574169944643, "grad_norm": 1.7223775386810303, "learning_rate": 8.831673796001325e-06, "loss": 0.5172, "step": 20528 }, { "epoch": 0.2454476978443071, "grad_norm": 1.9869405031204224, "learning_rate": 8.831549405462675e-06, "loss": 0.7265, "step": 20529 }, { "epoch": 0.24545965398916772, "grad_norm": 2.645975112915039, "learning_rate": 8.831425009178603e-06, "loss": 0.5639, "step": 20530 }, { "epoch": 0.24547161013402838, "grad_norm": 1.7810896635055542, "learning_rate": 8.831300607149296e-06, "loss": 0.6879, "step": 20531 }, { "epoch": 0.24548356627888904, "grad_norm": 3.6296966075897217, "learning_rate": 8.83117619937494e-06, "loss": 0.6585, "step": 20532 }, { "epoch": 0.2454955224237497, "grad_norm": 1.9447740316390991, "learning_rate": 8.831051785855722e-06, "loss": 0.541, "step": 20533 }, { "epoch": 0.24550747856861033, "grad_norm": 2.5843048095703125, "learning_rate": 8.830927366591827e-06, "loss": 0.6316, "step": 20534 }, { "epoch": 0.24551943471347099, "grad_norm": 2.9347469806671143, "learning_rate": 8.830802941583445e-06, "loss": 0.5814, "step": 20535 }, { "epoch": 0.24553139085833164, "grad_norm": 6.405481338500977, "learning_rate": 8.830678510830757e-06, "loss": 0.611, "step": 20536 }, { "epoch": 0.2455433470031923, "grad_norm": 5.697918891906738, "learning_rate": 8.830554074333955e-06, "loss": 0.5443, "step": 20537 }, { "epoch": 0.24555530314805293, "grad_norm": 2.3116962909698486, "learning_rate": 8.830429632093225e-06, "loss": 0.5531, "step": 20538 }, { "epoch": 0.2455672592929136, "grad_norm": 3.48889422416687, "learning_rate": 8.83030518410875e-06, "loss": 0.5257, "step": 20539 }, { "epoch": 0.24557921543777425, "grad_norm": 2.109165668487549, "learning_rate": 8.83018073038072e-06, "loss": 0.6682, "step": 20540 }, { "epoch": 0.24559117158263488, "grad_norm": 2.1489834785461426, "learning_rate": 8.830056270909319e-06, "loss": 0.556, "step": 20541 }, { "epoch": 0.24560312772749554, "grad_norm": 1.8367213010787964, "learning_rate": 8.829931805694736e-06, "loss": 0.6389, "step": 20542 }, { "epoch": 0.2456150838723562, "grad_norm": 2.884740114212036, "learning_rate": 8.829807334737157e-06, "loss": 0.5696, "step": 20543 }, { "epoch": 0.24562704001721686, "grad_norm": 2.233344793319702, "learning_rate": 8.829682858036768e-06, "loss": 0.6994, "step": 20544 }, { "epoch": 0.2456389961620775, "grad_norm": 4.731326103210449, "learning_rate": 8.829558375593754e-06, "loss": 0.5251, "step": 20545 }, { "epoch": 0.24565095230693815, "grad_norm": 4.117171764373779, "learning_rate": 8.829433887408305e-06, "loss": 0.5919, "step": 20546 }, { "epoch": 0.2456629084517988, "grad_norm": 2.2912228107452393, "learning_rate": 8.829309393480607e-06, "loss": 0.5571, "step": 20547 }, { "epoch": 0.24567486459665946, "grad_norm": 2.1681556701660156, "learning_rate": 8.829184893810846e-06, "loss": 0.5902, "step": 20548 }, { "epoch": 0.2456868207415201, "grad_norm": 2.1241111755371094, "learning_rate": 8.829060388399208e-06, "loss": 0.5904, "step": 20549 }, { "epoch": 0.24569877688638075, "grad_norm": 2.4189493656158447, "learning_rate": 8.82893587724588e-06, "loss": 0.601, "step": 20550 }, { "epoch": 0.2457107330312414, "grad_norm": 10.604104042053223, "learning_rate": 8.828811360351051e-06, "loss": 0.5909, "step": 20551 }, { "epoch": 0.24572268917610207, "grad_norm": 3.4715194702148438, "learning_rate": 8.828686837714905e-06, "loss": 0.6605, "step": 20552 }, { "epoch": 0.2457346453209627, "grad_norm": 3.082122802734375, "learning_rate": 8.828562309337627e-06, "loss": 0.6452, "step": 20553 }, { "epoch": 0.24574660146582336, "grad_norm": 1.8437142372131348, "learning_rate": 8.828437775219408e-06, "loss": 0.6209, "step": 20554 }, { "epoch": 0.24575855761068402, "grad_norm": 1.825392723083496, "learning_rate": 8.828313235360433e-06, "loss": 0.5654, "step": 20555 }, { "epoch": 0.24577051375554465, "grad_norm": 3.1369941234588623, "learning_rate": 8.828188689760888e-06, "loss": 0.5101, "step": 20556 }, { "epoch": 0.2457824699004053, "grad_norm": 1.9454468488693237, "learning_rate": 8.828064138420961e-06, "loss": 0.6064, "step": 20557 }, { "epoch": 0.24579442604526597, "grad_norm": 5.495707988739014, "learning_rate": 8.827939581340839e-06, "loss": 0.7131, "step": 20558 }, { "epoch": 0.24580638219012663, "grad_norm": 2.681706428527832, "learning_rate": 8.827815018520707e-06, "loss": 0.615, "step": 20559 }, { "epoch": 0.24581833833498726, "grad_norm": 4.083062171936035, "learning_rate": 8.827690449960754e-06, "loss": 0.6791, "step": 20560 }, { "epoch": 0.24583029447984792, "grad_norm": 2.0351510047912598, "learning_rate": 8.827565875661164e-06, "loss": 0.5878, "step": 20561 }, { "epoch": 0.24584225062470857, "grad_norm": 3.9339845180511475, "learning_rate": 8.827441295622125e-06, "loss": 0.6378, "step": 20562 }, { "epoch": 0.24585420676956923, "grad_norm": 1.8404494524002075, "learning_rate": 8.827316709843827e-06, "loss": 0.6702, "step": 20563 }, { "epoch": 0.24586616291442986, "grad_norm": 2.6410202980041504, "learning_rate": 8.827192118326451e-06, "loss": 0.5371, "step": 20564 }, { "epoch": 0.24587811905929052, "grad_norm": 4.245324611663818, "learning_rate": 8.827067521070187e-06, "loss": 0.4583, "step": 20565 }, { "epoch": 0.24589007520415118, "grad_norm": 2.2118983268737793, "learning_rate": 8.826942918075223e-06, "loss": 0.5341, "step": 20566 }, { "epoch": 0.2459020313490118, "grad_norm": 2.0817477703094482, "learning_rate": 8.826818309341744e-06, "loss": 0.4738, "step": 20567 }, { "epoch": 0.24591398749387247, "grad_norm": 2.8316876888275146, "learning_rate": 8.826693694869938e-06, "loss": 0.7165, "step": 20568 }, { "epoch": 0.24592594363873313, "grad_norm": 3.1382648944854736, "learning_rate": 8.826569074659991e-06, "loss": 0.5868, "step": 20569 }, { "epoch": 0.2459378997835938, "grad_norm": 5.142947196960449, "learning_rate": 8.82644444871209e-06, "loss": 0.5518, "step": 20570 }, { "epoch": 0.24594985592845442, "grad_norm": 2.7479841709136963, "learning_rate": 8.826319817026421e-06, "loss": 0.6051, "step": 20571 }, { "epoch": 0.24596181207331508, "grad_norm": 5.38829231262207, "learning_rate": 8.826195179603172e-06, "loss": 0.6308, "step": 20572 }, { "epoch": 0.24597376821817574, "grad_norm": 2.2570018768310547, "learning_rate": 8.82607053644253e-06, "loss": 0.5881, "step": 20573 }, { "epoch": 0.2459857243630364, "grad_norm": 2.5544631481170654, "learning_rate": 8.825945887544683e-06, "loss": 0.5894, "step": 20574 }, { "epoch": 0.24599768050789703, "grad_norm": 2.223194122314453, "learning_rate": 8.825821232909815e-06, "loss": 0.5616, "step": 20575 }, { "epoch": 0.24600963665275768, "grad_norm": 2.6561501026153564, "learning_rate": 8.825696572538116e-06, "loss": 0.6027, "step": 20576 }, { "epoch": 0.24602159279761834, "grad_norm": 3.8943326473236084, "learning_rate": 8.825571906429769e-06, "loss": 0.6124, "step": 20577 }, { "epoch": 0.24603354894247897, "grad_norm": 1.8201383352279663, "learning_rate": 8.825447234584965e-06, "loss": 0.5998, "step": 20578 }, { "epoch": 0.24604550508733963, "grad_norm": 3.359301805496216, "learning_rate": 8.825322557003887e-06, "loss": 0.5534, "step": 20579 }, { "epoch": 0.2460574612322003, "grad_norm": 3.0169453620910645, "learning_rate": 8.825197873686728e-06, "loss": 0.655, "step": 20580 }, { "epoch": 0.24606941737706095, "grad_norm": 2.2475457191467285, "learning_rate": 8.825073184633668e-06, "loss": 0.6165, "step": 20581 }, { "epoch": 0.24608137352192158, "grad_norm": 3.6075544357299805, "learning_rate": 8.824948489844898e-06, "loss": 0.595, "step": 20582 }, { "epoch": 0.24609332966678224, "grad_norm": 3.0519142150878906, "learning_rate": 8.824823789320605e-06, "loss": 0.6523, "step": 20583 }, { "epoch": 0.2461052858116429, "grad_norm": 4.201488018035889, "learning_rate": 8.824699083060975e-06, "loss": 0.6547, "step": 20584 }, { "epoch": 0.24611724195650356, "grad_norm": 14.766741752624512, "learning_rate": 8.824574371066195e-06, "loss": 0.5724, "step": 20585 }, { "epoch": 0.2461291981013642, "grad_norm": 1.588358998298645, "learning_rate": 8.824449653336454e-06, "loss": 0.5636, "step": 20586 }, { "epoch": 0.24614115424622485, "grad_norm": 2.8248887062072754, "learning_rate": 8.824324929871933e-06, "loss": 0.6071, "step": 20587 }, { "epoch": 0.2461531103910855, "grad_norm": 4.560940265655518, "learning_rate": 8.824200200672826e-06, "loss": 0.5405, "step": 20588 }, { "epoch": 0.24616506653594614, "grad_norm": 5.511033058166504, "learning_rate": 8.824075465739317e-06, "loss": 0.6637, "step": 20589 }, { "epoch": 0.2461770226808068, "grad_norm": 8.689445495605469, "learning_rate": 8.823950725071593e-06, "loss": 0.5622, "step": 20590 }, { "epoch": 0.24618897882566745, "grad_norm": 2.2374494075775146, "learning_rate": 8.82382597866984e-06, "loss": 0.6364, "step": 20591 }, { "epoch": 0.2462009349705281, "grad_norm": 2.720456838607788, "learning_rate": 8.823701226534246e-06, "loss": 0.5932, "step": 20592 }, { "epoch": 0.24621289111538874, "grad_norm": 2.5394175052642822, "learning_rate": 8.823576468665e-06, "loss": 0.5731, "step": 20593 }, { "epoch": 0.2462248472602494, "grad_norm": 5.338416576385498, "learning_rate": 8.823451705062287e-06, "loss": 0.5875, "step": 20594 }, { "epoch": 0.24623680340511006, "grad_norm": 9.580463409423828, "learning_rate": 8.823326935726296e-06, "loss": 0.5552, "step": 20595 }, { "epoch": 0.24624875954997072, "grad_norm": 2.6249923706054688, "learning_rate": 8.823202160657211e-06, "loss": 0.6496, "step": 20596 }, { "epoch": 0.24626071569483135, "grad_norm": 15.312285423278809, "learning_rate": 8.823077379855223e-06, "loss": 0.5833, "step": 20597 }, { "epoch": 0.246272671839692, "grad_norm": 2.8973960876464844, "learning_rate": 8.822952593320513e-06, "loss": 0.6233, "step": 20598 }, { "epoch": 0.24628462798455267, "grad_norm": 4.095590114593506, "learning_rate": 8.822827801053275e-06, "loss": 0.6315, "step": 20599 }, { "epoch": 0.2462965841294133, "grad_norm": 2.2021493911743164, "learning_rate": 8.822703003053694e-06, "loss": 0.6087, "step": 20600 }, { "epoch": 0.24630854027427396, "grad_norm": 1.9411430358886719, "learning_rate": 8.822578199321954e-06, "loss": 0.5882, "step": 20601 }, { "epoch": 0.24632049641913462, "grad_norm": 3.043663263320923, "learning_rate": 8.822453389858244e-06, "loss": 0.7391, "step": 20602 }, { "epoch": 0.24633245256399527, "grad_norm": 2.381559371948242, "learning_rate": 8.822328574662752e-06, "loss": 0.5857, "step": 20603 }, { "epoch": 0.2463444087088559, "grad_norm": 3.266913652420044, "learning_rate": 8.822203753735666e-06, "loss": 0.665, "step": 20604 }, { "epoch": 0.24635636485371656, "grad_norm": 4.196216106414795, "learning_rate": 8.82207892707717e-06, "loss": 0.6841, "step": 20605 }, { "epoch": 0.24636832099857722, "grad_norm": 1.9776926040649414, "learning_rate": 8.821954094687455e-06, "loss": 0.5794, "step": 20606 }, { "epoch": 0.24638027714343788, "grad_norm": 4.2186713218688965, "learning_rate": 8.821829256566705e-06, "loss": 0.6482, "step": 20607 }, { "epoch": 0.2463922332882985, "grad_norm": 4.115396499633789, "learning_rate": 8.82170441271511e-06, "loss": 0.5713, "step": 20608 }, { "epoch": 0.24640418943315917, "grad_norm": 2.7784714698791504, "learning_rate": 8.821579563132855e-06, "loss": 0.5984, "step": 20609 }, { "epoch": 0.24641614557801983, "grad_norm": 3.1130545139312744, "learning_rate": 8.821454707820127e-06, "loss": 0.6218, "step": 20610 }, { "epoch": 0.2464281017228805, "grad_norm": 6.672640323638916, "learning_rate": 8.821329846777114e-06, "loss": 0.6674, "step": 20611 }, { "epoch": 0.24644005786774112, "grad_norm": 5.479443550109863, "learning_rate": 8.821204980004003e-06, "loss": 0.6471, "step": 20612 }, { "epoch": 0.24645201401260178, "grad_norm": 5.90296745300293, "learning_rate": 8.821080107500983e-06, "loss": 0.5872, "step": 20613 }, { "epoch": 0.24646397015746244, "grad_norm": 2.2290964126586914, "learning_rate": 8.82095522926824e-06, "loss": 0.6246, "step": 20614 }, { "epoch": 0.24647592630232307, "grad_norm": 1.7882664203643799, "learning_rate": 8.82083034530596e-06, "loss": 0.6522, "step": 20615 }, { "epoch": 0.24648788244718373, "grad_norm": 4.5670270919799805, "learning_rate": 8.820705455614332e-06, "loss": 0.6575, "step": 20616 }, { "epoch": 0.24649983859204438, "grad_norm": 2.723456859588623, "learning_rate": 8.82058056019354e-06, "loss": 0.5915, "step": 20617 }, { "epoch": 0.24651179473690504, "grad_norm": 2.1165175437927246, "learning_rate": 8.820455659043778e-06, "loss": 0.6195, "step": 20618 }, { "epoch": 0.24652375088176567, "grad_norm": 1.6001139879226685, "learning_rate": 8.820330752165226e-06, "loss": 0.5999, "step": 20619 }, { "epoch": 0.24653570702662633, "grad_norm": 2.2228033542633057, "learning_rate": 8.820205839558076e-06, "loss": 0.5715, "step": 20620 }, { "epoch": 0.246547663171487, "grad_norm": 3.7120375633239746, "learning_rate": 8.820080921222514e-06, "loss": 0.5732, "step": 20621 }, { "epoch": 0.24655961931634765, "grad_norm": 3.893573045730591, "learning_rate": 8.819955997158726e-06, "loss": 0.5946, "step": 20622 }, { "epoch": 0.24657157546120828, "grad_norm": 1.8775070905685425, "learning_rate": 8.819831067366902e-06, "loss": 0.6701, "step": 20623 }, { "epoch": 0.24658353160606894, "grad_norm": 11.389738082885742, "learning_rate": 8.819706131847225e-06, "loss": 0.5971, "step": 20624 }, { "epoch": 0.2465954877509296, "grad_norm": 4.016618728637695, "learning_rate": 8.819581190599886e-06, "loss": 0.6208, "step": 20625 }, { "epoch": 0.24660744389579023, "grad_norm": 5.599995136260986, "learning_rate": 8.819456243625074e-06, "loss": 0.5974, "step": 20626 }, { "epoch": 0.2466194000406509, "grad_norm": 2.7704293727874756, "learning_rate": 8.81933129092297e-06, "loss": 0.6579, "step": 20627 }, { "epoch": 0.24663135618551155, "grad_norm": 1.7931262254714966, "learning_rate": 8.819206332493768e-06, "loss": 0.5169, "step": 20628 }, { "epoch": 0.2466433123303722, "grad_norm": 1.9281333684921265, "learning_rate": 8.819081368337652e-06, "loss": 0.5793, "step": 20629 }, { "epoch": 0.24665526847523284, "grad_norm": 14.409501075744629, "learning_rate": 8.81895639845481e-06, "loss": 0.6379, "step": 20630 }, { "epoch": 0.2466672246200935, "grad_norm": 7.546848297119141, "learning_rate": 8.818831422845428e-06, "loss": 0.5396, "step": 20631 }, { "epoch": 0.24667918076495415, "grad_norm": 5.909548759460449, "learning_rate": 8.818706441509695e-06, "loss": 0.6187, "step": 20632 }, { "epoch": 0.2466911369098148, "grad_norm": 3.4690816402435303, "learning_rate": 8.818581454447798e-06, "loss": 0.5916, "step": 20633 }, { "epoch": 0.24670309305467544, "grad_norm": 2.601611614227295, "learning_rate": 8.818456461659926e-06, "loss": 0.691, "step": 20634 }, { "epoch": 0.2467150491995361, "grad_norm": 1.7203933000564575, "learning_rate": 8.818331463146263e-06, "loss": 0.6715, "step": 20635 }, { "epoch": 0.24672700534439676, "grad_norm": 2.7148618698120117, "learning_rate": 8.818206458907e-06, "loss": 0.5821, "step": 20636 }, { "epoch": 0.2467389614892574, "grad_norm": 3.8733668327331543, "learning_rate": 8.818081448942323e-06, "loss": 0.6889, "step": 20637 }, { "epoch": 0.24675091763411805, "grad_norm": 2.1197123527526855, "learning_rate": 8.817956433252418e-06, "loss": 0.6312, "step": 20638 }, { "epoch": 0.2467628737789787, "grad_norm": 2.3334693908691406, "learning_rate": 8.817831411837474e-06, "loss": 0.5745, "step": 20639 }, { "epoch": 0.24677482992383937, "grad_norm": 2.0184497833251953, "learning_rate": 8.817706384697679e-06, "loss": 0.5113, "step": 20640 }, { "epoch": 0.2467867860687, "grad_norm": 2.5385055541992188, "learning_rate": 8.81758135183322e-06, "loss": 0.6753, "step": 20641 }, { "epoch": 0.24679874221356066, "grad_norm": 1.9808564186096191, "learning_rate": 8.817456313244284e-06, "loss": 0.6933, "step": 20642 }, { "epoch": 0.24681069835842132, "grad_norm": 4.076135635375977, "learning_rate": 8.817331268931058e-06, "loss": 0.6395, "step": 20643 }, { "epoch": 0.24682265450328197, "grad_norm": 7.017560005187988, "learning_rate": 8.81720621889373e-06, "loss": 0.6003, "step": 20644 }, { "epoch": 0.2468346106481426, "grad_norm": 2.0053870677948, "learning_rate": 8.817081163132488e-06, "loss": 0.7178, "step": 20645 }, { "epoch": 0.24684656679300326, "grad_norm": 14.541288375854492, "learning_rate": 8.816956101647518e-06, "loss": 0.6144, "step": 20646 }, { "epoch": 0.24685852293786392, "grad_norm": 3.0318310260772705, "learning_rate": 8.816831034439011e-06, "loss": 0.655, "step": 20647 }, { "epoch": 0.24687047908272455, "grad_norm": 2.339249610900879, "learning_rate": 8.816705961507151e-06, "loss": 0.5893, "step": 20648 }, { "epoch": 0.2468824352275852, "grad_norm": 1.7714346647262573, "learning_rate": 8.816580882852129e-06, "loss": 0.5715, "step": 20649 }, { "epoch": 0.24689439137244587, "grad_norm": 3.895491600036621, "learning_rate": 8.816455798474127e-06, "loss": 0.6186, "step": 20650 }, { "epoch": 0.24690634751730653, "grad_norm": 1.6730858087539673, "learning_rate": 8.816330708373338e-06, "loss": 0.5712, "step": 20651 }, { "epoch": 0.24691830366216716, "grad_norm": 2.9297263622283936, "learning_rate": 8.816205612549945e-06, "loss": 0.6432, "step": 20652 }, { "epoch": 0.24693025980702782, "grad_norm": 2.066697120666504, "learning_rate": 8.81608051100414e-06, "loss": 0.6315, "step": 20653 }, { "epoch": 0.24694221595188848, "grad_norm": 10.277877807617188, "learning_rate": 8.815955403736112e-06, "loss": 0.6518, "step": 20654 }, { "epoch": 0.24695417209674914, "grad_norm": 2.550126075744629, "learning_rate": 8.815830290746041e-06, "loss": 0.6009, "step": 20655 }, { "epoch": 0.24696612824160977, "grad_norm": 3.171480655670166, "learning_rate": 8.815705172034121e-06, "loss": 0.6195, "step": 20656 }, { "epoch": 0.24697808438647043, "grad_norm": 1.889304518699646, "learning_rate": 8.815580047600535e-06, "loss": 0.5115, "step": 20657 }, { "epoch": 0.24699004053133108, "grad_norm": 2.0668528079986572, "learning_rate": 8.815454917445476e-06, "loss": 0.493, "step": 20658 }, { "epoch": 0.24700199667619172, "grad_norm": 2.437739610671997, "learning_rate": 8.815329781569128e-06, "loss": 0.7558, "step": 20659 }, { "epoch": 0.24701395282105237, "grad_norm": 2.522000312805176, "learning_rate": 8.81520463997168e-06, "loss": 0.557, "step": 20660 }, { "epoch": 0.24702590896591303, "grad_norm": 2.049320936203003, "learning_rate": 8.815079492653318e-06, "loss": 0.5734, "step": 20661 }, { "epoch": 0.2470378651107737, "grad_norm": 2.8304171562194824, "learning_rate": 8.81495433961423e-06, "loss": 0.6057, "step": 20662 }, { "epoch": 0.24704982125563432, "grad_norm": 6.126027584075928, "learning_rate": 8.814829180854606e-06, "loss": 0.6824, "step": 20663 }, { "epoch": 0.24706177740049498, "grad_norm": 4.022127628326416, "learning_rate": 8.81470401637463e-06, "loss": 0.6709, "step": 20664 }, { "epoch": 0.24707373354535564, "grad_norm": 1.885959506034851, "learning_rate": 8.814578846174495e-06, "loss": 0.6112, "step": 20665 }, { "epoch": 0.2470856896902163, "grad_norm": 2.403214931488037, "learning_rate": 8.814453670254383e-06, "loss": 0.6123, "step": 20666 }, { "epoch": 0.24709764583507693, "grad_norm": 1.9333726167678833, "learning_rate": 8.814328488614484e-06, "loss": 0.6165, "step": 20667 }, { "epoch": 0.2471096019799376, "grad_norm": 6.675673484802246, "learning_rate": 8.814203301254987e-06, "loss": 0.4614, "step": 20668 }, { "epoch": 0.24712155812479825, "grad_norm": 2.6142187118530273, "learning_rate": 8.814078108176078e-06, "loss": 0.6257, "step": 20669 }, { "epoch": 0.2471335142696589, "grad_norm": 3.1657392978668213, "learning_rate": 8.813952909377944e-06, "loss": 0.6709, "step": 20670 }, { "epoch": 0.24714547041451954, "grad_norm": 2.4611222743988037, "learning_rate": 8.813827704860779e-06, "loss": 0.6355, "step": 20671 }, { "epoch": 0.2471574265593802, "grad_norm": 1.9472498893737793, "learning_rate": 8.813702494624761e-06, "loss": 0.5046, "step": 20672 }, { "epoch": 0.24716938270424085, "grad_norm": 2.8871235847473145, "learning_rate": 8.813577278670084e-06, "loss": 0.6756, "step": 20673 }, { "epoch": 0.24718133884910148, "grad_norm": 2.410517930984497, "learning_rate": 8.813452056996933e-06, "loss": 0.6176, "step": 20674 }, { "epoch": 0.24719329499396214, "grad_norm": 1.6632018089294434, "learning_rate": 8.8133268296055e-06, "loss": 0.6383, "step": 20675 }, { "epoch": 0.2472052511388228, "grad_norm": 1.8074768781661987, "learning_rate": 8.813201596495968e-06, "loss": 0.6333, "step": 20676 }, { "epoch": 0.24721720728368346, "grad_norm": 5.030950546264648, "learning_rate": 8.813076357668527e-06, "loss": 0.642, "step": 20677 }, { "epoch": 0.2472291634285441, "grad_norm": 1.670569896697998, "learning_rate": 8.812951113123364e-06, "loss": 0.6364, "step": 20678 }, { "epoch": 0.24724111957340475, "grad_norm": 2.0774075984954834, "learning_rate": 8.812825862860667e-06, "loss": 0.5619, "step": 20679 }, { "epoch": 0.2472530757182654, "grad_norm": 5.603381156921387, "learning_rate": 8.812700606880626e-06, "loss": 0.6846, "step": 20680 }, { "epoch": 0.24726503186312607, "grad_norm": 12.177350997924805, "learning_rate": 8.812575345183425e-06, "loss": 0.4789, "step": 20681 }, { "epoch": 0.2472769880079867, "grad_norm": 1.844200849533081, "learning_rate": 8.812450077769255e-06, "loss": 0.6762, "step": 20682 }, { "epoch": 0.24728894415284736, "grad_norm": 2.990563154220581, "learning_rate": 8.812324804638302e-06, "loss": 0.7368, "step": 20683 }, { "epoch": 0.24730090029770802, "grad_norm": 2.150291681289673, "learning_rate": 8.812199525790753e-06, "loss": 0.6058, "step": 20684 }, { "epoch": 0.24731285644256865, "grad_norm": 4.302565574645996, "learning_rate": 8.812074241226798e-06, "loss": 0.6017, "step": 20685 }, { "epoch": 0.2473248125874293, "grad_norm": 4.104959011077881, "learning_rate": 8.811948950946625e-06, "loss": 0.6631, "step": 20686 }, { "epoch": 0.24733676873228996, "grad_norm": 3.858311653137207, "learning_rate": 8.81182365495042e-06, "loss": 0.5199, "step": 20687 }, { "epoch": 0.24734872487715062, "grad_norm": 6.88450813293457, "learning_rate": 8.811698353238374e-06, "loss": 0.6272, "step": 20688 }, { "epoch": 0.24736068102201125, "grad_norm": 1.7337220907211304, "learning_rate": 8.81157304581067e-06, "loss": 0.5641, "step": 20689 }, { "epoch": 0.2473726371668719, "grad_norm": 2.3699536323547363, "learning_rate": 8.8114477326675e-06, "loss": 0.5729, "step": 20690 }, { "epoch": 0.24738459331173257, "grad_norm": 2.279387950897217, "learning_rate": 8.81132241380905e-06, "loss": 0.6884, "step": 20691 }, { "epoch": 0.24739654945659323, "grad_norm": 2.8987767696380615, "learning_rate": 8.81119708923551e-06, "loss": 0.5808, "step": 20692 }, { "epoch": 0.24740850560145386, "grad_norm": 2.1172258853912354, "learning_rate": 8.811071758947065e-06, "loss": 0.6211, "step": 20693 }, { "epoch": 0.24742046174631452, "grad_norm": 1.6744871139526367, "learning_rate": 8.810946422943905e-06, "loss": 0.5662, "step": 20694 }, { "epoch": 0.24743241789117518, "grad_norm": 4.167840003967285, "learning_rate": 8.810821081226217e-06, "loss": 0.4589, "step": 20695 }, { "epoch": 0.2474443740360358, "grad_norm": 1.6928972005844116, "learning_rate": 8.81069573379419e-06, "loss": 0.5722, "step": 20696 }, { "epoch": 0.24745633018089647, "grad_norm": 1.808996319770813, "learning_rate": 8.81057038064801e-06, "loss": 0.6321, "step": 20697 }, { "epoch": 0.24746828632575713, "grad_norm": 4.045746326446533, "learning_rate": 8.810445021787867e-06, "loss": 0.6203, "step": 20698 }, { "epoch": 0.24748024247061778, "grad_norm": 2.780804395675659, "learning_rate": 8.810319657213948e-06, "loss": 0.6161, "step": 20699 }, { "epoch": 0.24749219861547841, "grad_norm": 2.0569775104522705, "learning_rate": 8.81019428692644e-06, "loss": 0.6216, "step": 20700 }, { "epoch": 0.24750415476033907, "grad_norm": 2.733328104019165, "learning_rate": 8.810068910925533e-06, "loss": 0.7031, "step": 20701 }, { "epoch": 0.24751611090519973, "grad_norm": 1.5412695407867432, "learning_rate": 8.809943529211416e-06, "loss": 0.6472, "step": 20702 }, { "epoch": 0.2475280670500604, "grad_norm": 2.283101797103882, "learning_rate": 8.809818141784272e-06, "loss": 0.6759, "step": 20703 }, { "epoch": 0.24754002319492102, "grad_norm": 10.385899543762207, "learning_rate": 8.809692748644295e-06, "loss": 0.5842, "step": 20704 }, { "epoch": 0.24755197933978168, "grad_norm": 3.2368481159210205, "learning_rate": 8.809567349791667e-06, "loss": 0.6105, "step": 20705 }, { "epoch": 0.24756393548464234, "grad_norm": 1.4265691041946411, "learning_rate": 8.809441945226581e-06, "loss": 0.5602, "step": 20706 }, { "epoch": 0.24757589162950297, "grad_norm": 3.6317849159240723, "learning_rate": 8.809316534949224e-06, "loss": 0.6005, "step": 20707 }, { "epoch": 0.24758784777436363, "grad_norm": 1.810910701751709, "learning_rate": 8.809191118959781e-06, "loss": 0.6153, "step": 20708 }, { "epoch": 0.2475998039192243, "grad_norm": 2.7163872718811035, "learning_rate": 8.809065697258445e-06, "loss": 0.7486, "step": 20709 }, { "epoch": 0.24761176006408495, "grad_norm": 5.790111064910889, "learning_rate": 8.808940269845399e-06, "loss": 0.5737, "step": 20710 }, { "epoch": 0.24762371620894558, "grad_norm": 2.6806752681732178, "learning_rate": 8.808814836720835e-06, "loss": 0.5435, "step": 20711 }, { "epoch": 0.24763567235380624, "grad_norm": 4.181455135345459, "learning_rate": 8.808689397884939e-06, "loss": 0.6217, "step": 20712 }, { "epoch": 0.2476476284986669, "grad_norm": 2.8130695819854736, "learning_rate": 8.808563953337902e-06, "loss": 0.6348, "step": 20713 }, { "epoch": 0.24765958464352755, "grad_norm": 1.6813836097717285, "learning_rate": 8.808438503079907e-06, "loss": 0.6295, "step": 20714 }, { "epoch": 0.24767154078838818, "grad_norm": 7.770697116851807, "learning_rate": 8.808313047111145e-06, "loss": 0.626, "step": 20715 }, { "epoch": 0.24768349693324884, "grad_norm": 3.5254271030426025, "learning_rate": 8.808187585431806e-06, "loss": 0.6129, "step": 20716 }, { "epoch": 0.2476954530781095, "grad_norm": 2.0123093128204346, "learning_rate": 8.808062118042075e-06, "loss": 0.6268, "step": 20717 }, { "epoch": 0.24770740922297013, "grad_norm": 5.617987155914307, "learning_rate": 8.807936644942142e-06, "loss": 0.6122, "step": 20718 }, { "epoch": 0.2477193653678308, "grad_norm": 8.73895263671875, "learning_rate": 8.807811166132195e-06, "loss": 0.5673, "step": 20719 }, { "epoch": 0.24773132151269145, "grad_norm": 3.7831366062164307, "learning_rate": 8.80768568161242e-06, "loss": 0.6668, "step": 20720 }, { "epoch": 0.2477432776575521, "grad_norm": 2.1000800132751465, "learning_rate": 8.807560191383007e-06, "loss": 0.5186, "step": 20721 }, { "epoch": 0.24775523380241274, "grad_norm": 3.205883264541626, "learning_rate": 8.807434695444144e-06, "loss": 0.6198, "step": 20722 }, { "epoch": 0.2477671899472734, "grad_norm": 2.2751307487487793, "learning_rate": 8.807309193796018e-06, "loss": 0.4952, "step": 20723 }, { "epoch": 0.24777914609213406, "grad_norm": 2.7171666622161865, "learning_rate": 8.80718368643882e-06, "loss": 0.5251, "step": 20724 }, { "epoch": 0.24779110223699471, "grad_norm": 3.6443328857421875, "learning_rate": 8.807058173372737e-06, "loss": 0.6432, "step": 20725 }, { "epoch": 0.24780305838185535, "grad_norm": 1.7027041912078857, "learning_rate": 8.806932654597956e-06, "loss": 0.533, "step": 20726 }, { "epoch": 0.247815014526716, "grad_norm": 7.838318824768066, "learning_rate": 8.806807130114667e-06, "loss": 0.57, "step": 20727 }, { "epoch": 0.24782697067157666, "grad_norm": 7.559429168701172, "learning_rate": 8.806681599923055e-06, "loss": 0.6895, "step": 20728 }, { "epoch": 0.24783892681643732, "grad_norm": 3.0118448734283447, "learning_rate": 8.806556064023313e-06, "loss": 0.5731, "step": 20729 }, { "epoch": 0.24785088296129795, "grad_norm": 3.425950288772583, "learning_rate": 8.806430522415623e-06, "loss": 0.5973, "step": 20730 }, { "epoch": 0.2478628391061586, "grad_norm": 2.1574058532714844, "learning_rate": 8.80630497510018e-06, "loss": 0.6532, "step": 20731 }, { "epoch": 0.24787479525101927, "grad_norm": 3.5132408142089844, "learning_rate": 8.806179422077168e-06, "loss": 0.5575, "step": 20732 }, { "epoch": 0.2478867513958799, "grad_norm": 3.168039560317993, "learning_rate": 8.806053863346778e-06, "loss": 0.6435, "step": 20733 }, { "epoch": 0.24789870754074056, "grad_norm": 5.743423938751221, "learning_rate": 8.805928298909194e-06, "loss": 0.6477, "step": 20734 }, { "epoch": 0.24791066368560122, "grad_norm": 2.232628107070923, "learning_rate": 8.805802728764608e-06, "loss": 0.6289, "step": 20735 }, { "epoch": 0.24792261983046188, "grad_norm": 2.7198383808135986, "learning_rate": 8.805677152913208e-06, "loss": 0.6369, "step": 20736 }, { "epoch": 0.2479345759753225, "grad_norm": 2.0165982246398926, "learning_rate": 8.805551571355182e-06, "loss": 0.611, "step": 20737 }, { "epoch": 0.24794653212018317, "grad_norm": 1.3040186166763306, "learning_rate": 8.805425984090717e-06, "loss": 0.5749, "step": 20738 }, { "epoch": 0.24795848826504382, "grad_norm": 2.1049129962921143, "learning_rate": 8.805300391120001e-06, "loss": 0.6177, "step": 20739 }, { "epoch": 0.24797044440990448, "grad_norm": 4.551478385925293, "learning_rate": 8.805174792443226e-06, "loss": 0.6051, "step": 20740 }, { "epoch": 0.24798240055476511, "grad_norm": 2.2790303230285645, "learning_rate": 8.805049188060578e-06, "loss": 0.5918, "step": 20741 }, { "epoch": 0.24799435669962577, "grad_norm": 2.9436893463134766, "learning_rate": 8.804923577972243e-06, "loss": 0.5791, "step": 20742 }, { "epoch": 0.24800631284448643, "grad_norm": 2.786669969558716, "learning_rate": 8.804797962178412e-06, "loss": 0.5408, "step": 20743 }, { "epoch": 0.24801826898934706, "grad_norm": 3.387519598007202, "learning_rate": 8.804672340679274e-06, "loss": 0.601, "step": 20744 }, { "epoch": 0.24803022513420772, "grad_norm": 2.218071937561035, "learning_rate": 8.804546713475016e-06, "loss": 0.567, "step": 20745 }, { "epoch": 0.24804218127906838, "grad_norm": 1.9816839694976807, "learning_rate": 8.804421080565826e-06, "loss": 0.6345, "step": 20746 }, { "epoch": 0.24805413742392904, "grad_norm": 2.4956839084625244, "learning_rate": 8.804295441951893e-06, "loss": 0.6303, "step": 20747 }, { "epoch": 0.24806609356878967, "grad_norm": 3.8709523677825928, "learning_rate": 8.804169797633406e-06, "loss": 0.5726, "step": 20748 }, { "epoch": 0.24807804971365033, "grad_norm": 3.5731191635131836, "learning_rate": 8.804044147610552e-06, "loss": 0.6045, "step": 20749 }, { "epoch": 0.248090005858511, "grad_norm": 2.033592462539673, "learning_rate": 8.803918491883523e-06, "loss": 0.5838, "step": 20750 }, { "epoch": 0.24810196200337165, "grad_norm": 2.0856571197509766, "learning_rate": 8.803792830452503e-06, "loss": 0.6057, "step": 20751 }, { "epoch": 0.24811391814823228, "grad_norm": 2.0072293281555176, "learning_rate": 8.803667163317681e-06, "loss": 0.7131, "step": 20752 }, { "epoch": 0.24812587429309294, "grad_norm": 2.1102921962738037, "learning_rate": 8.803541490479248e-06, "loss": 0.6493, "step": 20753 }, { "epoch": 0.2481378304379536, "grad_norm": 2.579921007156372, "learning_rate": 8.80341581193739e-06, "loss": 0.6829, "step": 20754 }, { "epoch": 0.24814978658281422, "grad_norm": 2.1484153270721436, "learning_rate": 8.803290127692297e-06, "loss": 0.6199, "step": 20755 }, { "epoch": 0.24816174272767488, "grad_norm": 1.7444239854812622, "learning_rate": 8.803164437744158e-06, "loss": 0.5227, "step": 20756 }, { "epoch": 0.24817369887253554, "grad_norm": 2.57993745803833, "learning_rate": 8.803038742093159e-06, "loss": 0.5352, "step": 20757 }, { "epoch": 0.2481856550173962, "grad_norm": 1.4524314403533936, "learning_rate": 8.802913040739488e-06, "loss": 0.4826, "step": 20758 }, { "epoch": 0.24819761116225683, "grad_norm": 2.868006467819214, "learning_rate": 8.80278733368334e-06, "loss": 0.5727, "step": 20759 }, { "epoch": 0.2482095673071175, "grad_norm": 1.816254734992981, "learning_rate": 8.802661620924896e-06, "loss": 0.6337, "step": 20760 }, { "epoch": 0.24822152345197815, "grad_norm": 2.9685375690460205, "learning_rate": 8.802535902464349e-06, "loss": 0.5352, "step": 20761 }, { "epoch": 0.2482334795968388, "grad_norm": 2.4633607864379883, "learning_rate": 8.802410178301883e-06, "loss": 0.5718, "step": 20762 }, { "epoch": 0.24824543574169944, "grad_norm": 1.9357324838638306, "learning_rate": 8.80228444843769e-06, "loss": 0.5809, "step": 20763 }, { "epoch": 0.2482573918865601, "grad_norm": 2.4525485038757324, "learning_rate": 8.802158712871961e-06, "loss": 0.6557, "step": 20764 }, { "epoch": 0.24826934803142076, "grad_norm": 3.101722002029419, "learning_rate": 8.802032971604879e-06, "loss": 0.5761, "step": 20765 }, { "epoch": 0.2482813041762814, "grad_norm": 4.300206661224365, "learning_rate": 8.801907224636636e-06, "loss": 0.6212, "step": 20766 }, { "epoch": 0.24829326032114205, "grad_norm": 2.031283140182495, "learning_rate": 8.80178147196742e-06, "loss": 0.6044, "step": 20767 }, { "epoch": 0.2483052164660027, "grad_norm": 3.7409331798553467, "learning_rate": 8.801655713597417e-06, "loss": 0.6171, "step": 20768 }, { "epoch": 0.24831717261086336, "grad_norm": 3.3505821228027344, "learning_rate": 8.80152994952682e-06, "loss": 0.6242, "step": 20769 }, { "epoch": 0.248329128755724, "grad_norm": 5.873533248901367, "learning_rate": 8.801404179755815e-06, "loss": 0.6769, "step": 20770 }, { "epoch": 0.24834108490058465, "grad_norm": 1.9481757879257202, "learning_rate": 8.80127840428459e-06, "loss": 0.5829, "step": 20771 }, { "epoch": 0.2483530410454453, "grad_norm": 2.490431308746338, "learning_rate": 8.801152623113335e-06, "loss": 0.6859, "step": 20772 }, { "epoch": 0.24836499719030597, "grad_norm": 8.837197303771973, "learning_rate": 8.801026836242237e-06, "loss": 0.6075, "step": 20773 }, { "epoch": 0.2483769533351666, "grad_norm": 4.541754722595215, "learning_rate": 8.800901043671487e-06, "loss": 0.6044, "step": 20774 }, { "epoch": 0.24838890948002726, "grad_norm": 2.4763429164886475, "learning_rate": 8.800775245401275e-06, "loss": 0.5863, "step": 20775 }, { "epoch": 0.24840086562488792, "grad_norm": 2.412891387939453, "learning_rate": 8.800649441431782e-06, "loss": 0.5836, "step": 20776 }, { "epoch": 0.24841282176974855, "grad_norm": 1.8211889266967773, "learning_rate": 8.800523631763204e-06, "loss": 0.5868, "step": 20777 }, { "epoch": 0.2484247779146092, "grad_norm": 3.911641836166382, "learning_rate": 8.800397816395726e-06, "loss": 0.674, "step": 20778 }, { "epoch": 0.24843673405946987, "grad_norm": 1.929273009300232, "learning_rate": 8.800271995329541e-06, "loss": 0.5431, "step": 20779 }, { "epoch": 0.24844869020433052, "grad_norm": 1.7475945949554443, "learning_rate": 8.800146168564833e-06, "loss": 0.6699, "step": 20780 }, { "epoch": 0.24846064634919116, "grad_norm": 2.2284116744995117, "learning_rate": 8.800020336101791e-06, "loss": 0.5954, "step": 20781 }, { "epoch": 0.24847260249405181, "grad_norm": 1.6748065948486328, "learning_rate": 8.799894497940607e-06, "loss": 0.6204, "step": 20782 }, { "epoch": 0.24848455863891247, "grad_norm": 7.525763034820557, "learning_rate": 8.799768654081467e-06, "loss": 0.676, "step": 20783 }, { "epoch": 0.24849651478377313, "grad_norm": 3.807008743286133, "learning_rate": 8.79964280452456e-06, "loss": 0.5631, "step": 20784 }, { "epoch": 0.24850847092863376, "grad_norm": 2.8668129444122314, "learning_rate": 8.799516949270075e-06, "loss": 0.5939, "step": 20785 }, { "epoch": 0.24852042707349442, "grad_norm": 3.15350079536438, "learning_rate": 8.7993910883182e-06, "loss": 0.7763, "step": 20786 }, { "epoch": 0.24853238321835508, "grad_norm": 1.8594977855682373, "learning_rate": 8.799265221669127e-06, "loss": 0.5279, "step": 20787 }, { "epoch": 0.24854433936321574, "grad_norm": 8.782175064086914, "learning_rate": 8.79913934932304e-06, "loss": 0.6138, "step": 20788 }, { "epoch": 0.24855629550807637, "grad_norm": 1.8996459245681763, "learning_rate": 8.799013471280132e-06, "loss": 0.6134, "step": 20789 }, { "epoch": 0.24856825165293703, "grad_norm": 2.268238067626953, "learning_rate": 8.798887587540588e-06, "loss": 0.5565, "step": 20790 }, { "epoch": 0.2485802077977977, "grad_norm": 5.438268661499023, "learning_rate": 8.7987616981046e-06, "loss": 0.5756, "step": 20791 }, { "epoch": 0.24859216394265832, "grad_norm": 2.181535243988037, "learning_rate": 8.798635802972355e-06, "loss": 0.7071, "step": 20792 }, { "epoch": 0.24860412008751898, "grad_norm": 1.6323163509368896, "learning_rate": 8.798509902144042e-06, "loss": 0.6205, "step": 20793 }, { "epoch": 0.24861607623237963, "grad_norm": 3.332857370376587, "learning_rate": 8.798383995619849e-06, "loss": 0.5931, "step": 20794 }, { "epoch": 0.2486280323772403, "grad_norm": 3.698460578918457, "learning_rate": 8.798258083399967e-06, "loss": 0.6023, "step": 20795 }, { "epoch": 0.24863998852210092, "grad_norm": 2.423213005065918, "learning_rate": 8.798132165484584e-06, "loss": 0.6188, "step": 20796 }, { "epoch": 0.24865194466696158, "grad_norm": 4.6517815589904785, "learning_rate": 8.798006241873886e-06, "loss": 0.6152, "step": 20797 }, { "epoch": 0.24866390081182224, "grad_norm": 4.277427673339844, "learning_rate": 8.797880312568066e-06, "loss": 0.6218, "step": 20798 }, { "epoch": 0.2486758569566829, "grad_norm": 3.4920620918273926, "learning_rate": 8.79775437756731e-06, "loss": 0.6301, "step": 20799 }, { "epoch": 0.24868781310154353, "grad_norm": 1.6670576333999634, "learning_rate": 8.797628436871808e-06, "loss": 0.7079, "step": 20800 }, { "epoch": 0.2486997692464042, "grad_norm": 2.554262399673462, "learning_rate": 8.797502490481749e-06, "loss": 0.638, "step": 20801 }, { "epoch": 0.24871172539126485, "grad_norm": 3.0270020961761475, "learning_rate": 8.79737653839732e-06, "loss": 0.5489, "step": 20802 }, { "epoch": 0.24872368153612548, "grad_norm": 2.3633081912994385, "learning_rate": 8.797250580618713e-06, "loss": 0.5827, "step": 20803 }, { "epoch": 0.24873563768098614, "grad_norm": 4.0897440910339355, "learning_rate": 8.797124617146115e-06, "loss": 0.5802, "step": 20804 }, { "epoch": 0.2487475938258468, "grad_norm": 1.465681791305542, "learning_rate": 8.796998647979715e-06, "loss": 0.6187, "step": 20805 }, { "epoch": 0.24875954997070746, "grad_norm": 3.282339334487915, "learning_rate": 8.796872673119703e-06, "loss": 0.7405, "step": 20806 }, { "epoch": 0.2487715061155681, "grad_norm": 2.295497179031372, "learning_rate": 8.796746692566266e-06, "loss": 0.6319, "step": 20807 }, { "epoch": 0.24878346226042874, "grad_norm": 7.232915878295898, "learning_rate": 8.796620706319593e-06, "loss": 0.7387, "step": 20808 }, { "epoch": 0.2487954184052894, "grad_norm": 1.6133524179458618, "learning_rate": 8.796494714379874e-06, "loss": 0.6395, "step": 20809 }, { "epoch": 0.24880737455015006, "grad_norm": 1.6663830280303955, "learning_rate": 8.7963687167473e-06, "loss": 0.6387, "step": 20810 }, { "epoch": 0.2488193306950107, "grad_norm": 1.8912783861160278, "learning_rate": 8.796242713422057e-06, "loss": 0.6149, "step": 20811 }, { "epoch": 0.24883128683987135, "grad_norm": 1.9124476909637451, "learning_rate": 8.796116704404331e-06, "loss": 0.6125, "step": 20812 }, { "epoch": 0.248843242984732, "grad_norm": 3.6320855617523193, "learning_rate": 8.795990689694319e-06, "loss": 0.5692, "step": 20813 }, { "epoch": 0.24885519912959264, "grad_norm": 3.7471766471862793, "learning_rate": 8.795864669292204e-06, "loss": 0.6364, "step": 20814 }, { "epoch": 0.2488671552744533, "grad_norm": 2.3094735145568848, "learning_rate": 8.795738643198175e-06, "loss": 0.5474, "step": 20815 }, { "epoch": 0.24887911141931396, "grad_norm": 2.3878486156463623, "learning_rate": 8.795612611412422e-06, "loss": 0.63, "step": 20816 }, { "epoch": 0.24889106756417462, "grad_norm": 2.1308865547180176, "learning_rate": 8.795486573935136e-06, "loss": 0.6787, "step": 20817 }, { "epoch": 0.24890302370903525, "grad_norm": 3.2645108699798584, "learning_rate": 8.795360530766506e-06, "loss": 0.543, "step": 20818 }, { "epoch": 0.2489149798538959, "grad_norm": 3.5814950466156006, "learning_rate": 8.795234481906717e-06, "loss": 0.5593, "step": 20819 }, { "epoch": 0.24892693599875657, "grad_norm": 2.0695104598999023, "learning_rate": 8.79510842735596e-06, "loss": 0.5408, "step": 20820 }, { "epoch": 0.24893889214361722, "grad_norm": 2.0569489002227783, "learning_rate": 8.794982367114427e-06, "loss": 0.6356, "step": 20821 }, { "epoch": 0.24895084828847786, "grad_norm": 5.566231727600098, "learning_rate": 8.794856301182301e-06, "loss": 0.6169, "step": 20822 }, { "epoch": 0.2489628044333385, "grad_norm": 18.651004791259766, "learning_rate": 8.794730229559777e-06, "loss": 0.5034, "step": 20823 }, { "epoch": 0.24897476057819917, "grad_norm": 1.8908405303955078, "learning_rate": 8.794604152247042e-06, "loss": 0.6346, "step": 20824 }, { "epoch": 0.2489867167230598, "grad_norm": 4.11629056930542, "learning_rate": 8.794478069244283e-06, "loss": 0.7119, "step": 20825 }, { "epoch": 0.24899867286792046, "grad_norm": 2.9987382888793945, "learning_rate": 8.79435198055169e-06, "loss": 0.6047, "step": 20826 }, { "epoch": 0.24901062901278112, "grad_norm": 2.160734176635742, "learning_rate": 8.794225886169454e-06, "loss": 0.6483, "step": 20827 }, { "epoch": 0.24902258515764178, "grad_norm": 2.121736764907837, "learning_rate": 8.794099786097763e-06, "loss": 0.6532, "step": 20828 }, { "epoch": 0.2490345413025024, "grad_norm": 6.011086940765381, "learning_rate": 8.793973680336804e-06, "loss": 0.5992, "step": 20829 }, { "epoch": 0.24904649744736307, "grad_norm": 4.1307148933410645, "learning_rate": 8.79384756888677e-06, "loss": 0.6131, "step": 20830 }, { "epoch": 0.24905845359222373, "grad_norm": 2.0029494762420654, "learning_rate": 8.793721451747847e-06, "loss": 0.6556, "step": 20831 }, { "epoch": 0.2490704097370844, "grad_norm": 3.6014623641967773, "learning_rate": 8.793595328920227e-06, "loss": 0.6648, "step": 20832 }, { "epoch": 0.24908236588194502, "grad_norm": 4.01762580871582, "learning_rate": 8.793469200404097e-06, "loss": 0.6974, "step": 20833 }, { "epoch": 0.24909432202680568, "grad_norm": 2.7273027896881104, "learning_rate": 8.793343066199645e-06, "loss": 0.6604, "step": 20834 }, { "epoch": 0.24910627817166633, "grad_norm": 4.673349380493164, "learning_rate": 8.793216926307062e-06, "loss": 0.6277, "step": 20835 }, { "epoch": 0.24911823431652697, "grad_norm": 4.486557960510254, "learning_rate": 8.793090780726537e-06, "loss": 0.6245, "step": 20836 }, { "epoch": 0.24913019046138762, "grad_norm": 1.5436099767684937, "learning_rate": 8.792964629458259e-06, "loss": 0.5527, "step": 20837 }, { "epoch": 0.24914214660624828, "grad_norm": 4.2613959312438965, "learning_rate": 8.792838472502416e-06, "loss": 0.593, "step": 20838 }, { "epoch": 0.24915410275110894, "grad_norm": 2.881737232208252, "learning_rate": 8.7927123098592e-06, "loss": 0.5923, "step": 20839 }, { "epoch": 0.24916605889596957, "grad_norm": 3.986006021499634, "learning_rate": 8.792586141528797e-06, "loss": 0.6321, "step": 20840 }, { "epoch": 0.24917801504083023, "grad_norm": 3.2643818855285645, "learning_rate": 8.792459967511397e-06, "loss": 0.5751, "step": 20841 }, { "epoch": 0.2491899711856909, "grad_norm": 2.8726162910461426, "learning_rate": 8.79233378780719e-06, "loss": 0.6254, "step": 20842 }, { "epoch": 0.24920192733055155, "grad_norm": 2.3776369094848633, "learning_rate": 8.792207602416367e-06, "loss": 0.5884, "step": 20843 }, { "epoch": 0.24921388347541218, "grad_norm": 5.241275310516357, "learning_rate": 8.792081411339114e-06, "loss": 0.706, "step": 20844 }, { "epoch": 0.24922583962027284, "grad_norm": 3.3358521461486816, "learning_rate": 8.791955214575621e-06, "loss": 0.6351, "step": 20845 }, { "epoch": 0.2492377957651335, "grad_norm": 2.0891690254211426, "learning_rate": 8.791829012126078e-06, "loss": 0.5629, "step": 20846 }, { "epoch": 0.24924975190999415, "grad_norm": 4.4283766746521, "learning_rate": 8.791702803990675e-06, "loss": 0.5718, "step": 20847 }, { "epoch": 0.24926170805485479, "grad_norm": 2.1981735229492188, "learning_rate": 8.7915765901696e-06, "loss": 0.555, "step": 20848 }, { "epoch": 0.24927366419971544, "grad_norm": 2.561138153076172, "learning_rate": 8.791450370663041e-06, "loss": 0.6135, "step": 20849 }, { "epoch": 0.2492856203445761, "grad_norm": 2.2366650104522705, "learning_rate": 8.791324145471189e-06, "loss": 0.6568, "step": 20850 }, { "epoch": 0.24929757648943673, "grad_norm": 2.2121188640594482, "learning_rate": 8.791197914594233e-06, "loss": 0.6401, "step": 20851 }, { "epoch": 0.2493095326342974, "grad_norm": 2.1517279148101807, "learning_rate": 8.791071678032363e-06, "loss": 0.5629, "step": 20852 }, { "epoch": 0.24932148877915805, "grad_norm": 3.8563144207000732, "learning_rate": 8.790945435785767e-06, "loss": 0.5678, "step": 20853 }, { "epoch": 0.2493334449240187, "grad_norm": 1.5463963747024536, "learning_rate": 8.790819187854634e-06, "loss": 0.5787, "step": 20854 }, { "epoch": 0.24934540106887934, "grad_norm": 2.5197596549987793, "learning_rate": 8.790692934239155e-06, "loss": 0.6215, "step": 20855 }, { "epoch": 0.24935735721374, "grad_norm": 6.944044589996338, "learning_rate": 8.79056667493952e-06, "loss": 0.5218, "step": 20856 }, { "epoch": 0.24936931335860066, "grad_norm": 6.448892593383789, "learning_rate": 8.790440409955916e-06, "loss": 0.5735, "step": 20857 }, { "epoch": 0.24938126950346132, "grad_norm": 3.30722975730896, "learning_rate": 8.790314139288532e-06, "loss": 0.635, "step": 20858 }, { "epoch": 0.24939322564832195, "grad_norm": 1.6428608894348145, "learning_rate": 8.790187862937558e-06, "loss": 0.5177, "step": 20859 }, { "epoch": 0.2494051817931826, "grad_norm": 5.058126926422119, "learning_rate": 8.790061580903185e-06, "loss": 0.5735, "step": 20860 }, { "epoch": 0.24941713793804327, "grad_norm": 2.7158050537109375, "learning_rate": 8.7899352931856e-06, "loss": 0.6296, "step": 20861 }, { "epoch": 0.2494290940829039, "grad_norm": 4.465529441833496, "learning_rate": 8.789808999784997e-06, "loss": 0.5718, "step": 20862 }, { "epoch": 0.24944105022776455, "grad_norm": 1.8412061929702759, "learning_rate": 8.789682700701559e-06, "loss": 0.5318, "step": 20863 }, { "epoch": 0.2494530063726252, "grad_norm": 2.176800489425659, "learning_rate": 8.78955639593548e-06, "loss": 0.6564, "step": 20864 }, { "epoch": 0.24946496251748587, "grad_norm": 6.7814717292785645, "learning_rate": 8.789430085486946e-06, "loss": 0.5647, "step": 20865 }, { "epoch": 0.2494769186623465, "grad_norm": 2.202685594558716, "learning_rate": 8.789303769356147e-06, "loss": 0.6216, "step": 20866 }, { "epoch": 0.24948887480720716, "grad_norm": 2.022141933441162, "learning_rate": 8.789177447543276e-06, "loss": 0.6071, "step": 20867 }, { "epoch": 0.24950083095206782, "grad_norm": 1.547413945198059, "learning_rate": 8.789051120048519e-06, "loss": 0.5584, "step": 20868 }, { "epoch": 0.24951278709692848, "grad_norm": 3.1728639602661133, "learning_rate": 8.788924786872067e-06, "loss": 0.608, "step": 20869 }, { "epoch": 0.2495247432417891, "grad_norm": 2.331728219985962, "learning_rate": 8.788798448014107e-06, "loss": 0.5719, "step": 20870 }, { "epoch": 0.24953669938664977, "grad_norm": 2.7881460189819336, "learning_rate": 8.788672103474832e-06, "loss": 0.6395, "step": 20871 }, { "epoch": 0.24954865553151043, "grad_norm": 3.2558019161224365, "learning_rate": 8.78854575325443e-06, "loss": 0.5823, "step": 20872 }, { "epoch": 0.24956061167637106, "grad_norm": 2.4991776943206787, "learning_rate": 8.78841939735309e-06, "loss": 0.5249, "step": 20873 }, { "epoch": 0.24957256782123172, "grad_norm": 1.6109626293182373, "learning_rate": 8.788293035771e-06, "loss": 0.587, "step": 20874 }, { "epoch": 0.24958452396609238, "grad_norm": 3.540590524673462, "learning_rate": 8.788166668508352e-06, "loss": 0.6611, "step": 20875 }, { "epoch": 0.24959648011095303, "grad_norm": 1.7126643657684326, "learning_rate": 8.788040295565334e-06, "loss": 0.6411, "step": 20876 }, { "epoch": 0.24960843625581366, "grad_norm": 1.7592320442199707, "learning_rate": 8.787913916942137e-06, "loss": 0.6296, "step": 20877 }, { "epoch": 0.24962039240067432, "grad_norm": 4.921842575073242, "learning_rate": 8.787787532638951e-06, "loss": 0.6764, "step": 20878 }, { "epoch": 0.24963234854553498, "grad_norm": 1.9496264457702637, "learning_rate": 8.787661142655963e-06, "loss": 0.532, "step": 20879 }, { "epoch": 0.24964430469039564, "grad_norm": 1.9759618043899536, "learning_rate": 8.787534746993362e-06, "loss": 0.5509, "step": 20880 }, { "epoch": 0.24965626083525627, "grad_norm": 3.8558743000030518, "learning_rate": 8.787408345651342e-06, "loss": 0.6205, "step": 20881 }, { "epoch": 0.24966821698011693, "grad_norm": 2.1306207180023193, "learning_rate": 8.787281938630088e-06, "loss": 0.58, "step": 20882 }, { "epoch": 0.2496801731249776, "grad_norm": 3.1750223636627197, "learning_rate": 8.787155525929792e-06, "loss": 0.6974, "step": 20883 }, { "epoch": 0.24969212926983822, "grad_norm": 3.4150607585906982, "learning_rate": 8.787029107550641e-06, "loss": 0.633, "step": 20884 }, { "epoch": 0.24970408541469888, "grad_norm": 4.97568416595459, "learning_rate": 8.786902683492828e-06, "loss": 0.6858, "step": 20885 }, { "epoch": 0.24971604155955954, "grad_norm": 1.8536723852157593, "learning_rate": 8.786776253756542e-06, "loss": 0.6085, "step": 20886 }, { "epoch": 0.2497279977044202, "grad_norm": 2.3071603775024414, "learning_rate": 8.78664981834197e-06, "loss": 0.6277, "step": 20887 }, { "epoch": 0.24973995384928083, "grad_norm": 4.700714111328125, "learning_rate": 8.786523377249304e-06, "loss": 0.6678, "step": 20888 }, { "epoch": 0.24975190999414149, "grad_norm": 1.9936455488204956, "learning_rate": 8.786396930478732e-06, "loss": 0.6506, "step": 20889 }, { "epoch": 0.24976386613900214, "grad_norm": 3.4253244400024414, "learning_rate": 8.786270478030442e-06, "loss": 0.5897, "step": 20890 }, { "epoch": 0.2497758222838628, "grad_norm": 1.714545726776123, "learning_rate": 8.78614401990463e-06, "loss": 0.5801, "step": 20891 }, { "epoch": 0.24978777842872343, "grad_norm": 2.5263633728027344, "learning_rate": 8.78601755610148e-06, "loss": 0.6503, "step": 20892 }, { "epoch": 0.2497997345735841, "grad_norm": 5.520733833312988, "learning_rate": 8.785891086621183e-06, "loss": 0.6453, "step": 20893 }, { "epoch": 0.24981169071844475, "grad_norm": 3.294264078140259, "learning_rate": 8.78576461146393e-06, "loss": 0.7203, "step": 20894 }, { "epoch": 0.24982364686330538, "grad_norm": 1.5854309797286987, "learning_rate": 8.785638130629907e-06, "loss": 0.6524, "step": 20895 }, { "epoch": 0.24983560300816604, "grad_norm": 2.712146043777466, "learning_rate": 8.785511644119309e-06, "loss": 0.6327, "step": 20896 }, { "epoch": 0.2498475591530267, "grad_norm": 3.0870590209960938, "learning_rate": 8.785385151932321e-06, "loss": 0.5169, "step": 20897 }, { "epoch": 0.24985951529788736, "grad_norm": 3.4344351291656494, "learning_rate": 8.785258654069135e-06, "loss": 0.5499, "step": 20898 }, { "epoch": 0.249871471442748, "grad_norm": 3.883836030960083, "learning_rate": 8.785132150529941e-06, "loss": 0.5592, "step": 20899 }, { "epoch": 0.24988342758760865, "grad_norm": 4.390721797943115, "learning_rate": 8.785005641314928e-06, "loss": 0.6839, "step": 20900 }, { "epoch": 0.2498953837324693, "grad_norm": 3.292252540588379, "learning_rate": 8.784879126424284e-06, "loss": 0.5942, "step": 20901 }, { "epoch": 0.24990733987732996, "grad_norm": 4.870948314666748, "learning_rate": 8.7847526058582e-06, "loss": 0.5104, "step": 20902 }, { "epoch": 0.2499192960221906, "grad_norm": 2.3710529804229736, "learning_rate": 8.784626079616869e-06, "loss": 0.6416, "step": 20903 }, { "epoch": 0.24993125216705125, "grad_norm": 3.5561275482177734, "learning_rate": 8.784499547700476e-06, "loss": 0.4969, "step": 20904 }, { "epoch": 0.2499432083119119, "grad_norm": 4.433429718017578, "learning_rate": 8.78437301010921e-06, "loss": 0.5404, "step": 20905 }, { "epoch": 0.24995516445677257, "grad_norm": 2.0198726654052734, "learning_rate": 8.784246466843267e-06, "loss": 0.7411, "step": 20906 }, { "epoch": 0.2499671206016332, "grad_norm": 2.1494529247283936, "learning_rate": 8.784119917902832e-06, "loss": 0.5977, "step": 20907 }, { "epoch": 0.24997907674649386, "grad_norm": 2.5517616271972656, "learning_rate": 8.783993363288094e-06, "loss": 0.5544, "step": 20908 }, { "epoch": 0.24999103289135452, "grad_norm": 2.2184629440307617, "learning_rate": 8.783866802999246e-06, "loss": 0.5065, "step": 20909 }, { "epoch": 0.25000298903621515, "grad_norm": 2.654766798019409, "learning_rate": 8.783740237036476e-06, "loss": 0.5207, "step": 20910 }, { "epoch": 0.2500149451810758, "grad_norm": 2.917722225189209, "learning_rate": 8.783613665399975e-06, "loss": 0.574, "step": 20911 }, { "epoch": 0.25002690132593647, "grad_norm": 11.039008140563965, "learning_rate": 8.78348708808993e-06, "loss": 0.6237, "step": 20912 }, { "epoch": 0.2500388574707971, "grad_norm": 6.294854164123535, "learning_rate": 8.783360505106535e-06, "loss": 0.6536, "step": 20913 }, { "epoch": 0.2500508136156578, "grad_norm": 4.657723426818848, "learning_rate": 8.783233916449975e-06, "loss": 0.6989, "step": 20914 }, { "epoch": 0.25006276976051844, "grad_norm": 3.7140204906463623, "learning_rate": 8.783107322120443e-06, "loss": 0.6427, "step": 20915 }, { "epoch": 0.25007472590537905, "grad_norm": 6.247495651245117, "learning_rate": 8.782980722118128e-06, "loss": 0.5926, "step": 20916 }, { "epoch": 0.2500866820502397, "grad_norm": 2.9790923595428467, "learning_rate": 8.782854116443221e-06, "loss": 0.5914, "step": 20917 }, { "epoch": 0.25009863819510036, "grad_norm": 3.2583186626434326, "learning_rate": 8.782727505095911e-06, "loss": 0.6456, "step": 20918 }, { "epoch": 0.250110594339961, "grad_norm": 3.1085939407348633, "learning_rate": 8.782600888076387e-06, "loss": 0.7102, "step": 20919 }, { "epoch": 0.2501225504848217, "grad_norm": 2.389200210571289, "learning_rate": 8.78247426538484e-06, "loss": 0.6715, "step": 20920 }, { "epoch": 0.25013450662968234, "grad_norm": 4.960359573364258, "learning_rate": 8.782347637021458e-06, "loss": 0.6007, "step": 20921 }, { "epoch": 0.250146462774543, "grad_norm": 2.3146328926086426, "learning_rate": 8.782221002986433e-06, "loss": 0.5006, "step": 20922 }, { "epoch": 0.2501584189194036, "grad_norm": 6.074888229370117, "learning_rate": 8.782094363279955e-06, "loss": 0.6978, "step": 20923 }, { "epoch": 0.25017037506426426, "grad_norm": 3.178251028060913, "learning_rate": 8.781967717902212e-06, "loss": 0.6703, "step": 20924 }, { "epoch": 0.2501823312091249, "grad_norm": 2.550196647644043, "learning_rate": 8.781841066853395e-06, "loss": 0.5988, "step": 20925 }, { "epoch": 0.2501942873539856, "grad_norm": 2.2916157245635986, "learning_rate": 8.781714410133695e-06, "loss": 0.5913, "step": 20926 }, { "epoch": 0.25020624349884624, "grad_norm": 2.877277374267578, "learning_rate": 8.781587747743298e-06, "loss": 0.5631, "step": 20927 }, { "epoch": 0.2502181996437069, "grad_norm": 1.7869127988815308, "learning_rate": 8.7814610796824e-06, "loss": 0.5669, "step": 20928 }, { "epoch": 0.25023015578856755, "grad_norm": 2.297499179840088, "learning_rate": 8.781334405951187e-06, "loss": 0.5976, "step": 20929 }, { "epoch": 0.2502421119334282, "grad_norm": 4.3208327293396, "learning_rate": 8.781207726549848e-06, "loss": 0.5908, "step": 20930 }, { "epoch": 0.2502540680782888, "grad_norm": 3.174509286880493, "learning_rate": 8.781081041478576e-06, "loss": 0.6529, "step": 20931 }, { "epoch": 0.2502660242231495, "grad_norm": 3.1929240226745605, "learning_rate": 8.78095435073756e-06, "loss": 0.6259, "step": 20932 }, { "epoch": 0.25027798036801013, "grad_norm": 3.7879526615142822, "learning_rate": 8.780827654326988e-06, "loss": 0.582, "step": 20933 }, { "epoch": 0.2502899365128708, "grad_norm": 4.841834545135498, "learning_rate": 8.780700952247052e-06, "loss": 0.6015, "step": 20934 }, { "epoch": 0.25030189265773145, "grad_norm": 2.333254098892212, "learning_rate": 8.780574244497941e-06, "loss": 0.6524, "step": 20935 }, { "epoch": 0.2503138488025921, "grad_norm": 2.2828598022460938, "learning_rate": 8.780447531079846e-06, "loss": 0.6432, "step": 20936 }, { "epoch": 0.25032580494745277, "grad_norm": 1.7238998413085938, "learning_rate": 8.780320811992957e-06, "loss": 0.5778, "step": 20937 }, { "epoch": 0.25033776109231337, "grad_norm": 2.764374017715454, "learning_rate": 8.780194087237463e-06, "loss": 0.5545, "step": 20938 }, { "epoch": 0.25034971723717403, "grad_norm": 5.6580328941345215, "learning_rate": 8.780067356813555e-06, "loss": 0.6087, "step": 20939 }, { "epoch": 0.2503616733820347, "grad_norm": 2.653636932373047, "learning_rate": 8.779940620721422e-06, "loss": 0.5933, "step": 20940 }, { "epoch": 0.25037362952689535, "grad_norm": 2.889681339263916, "learning_rate": 8.779813878961254e-06, "loss": 0.6385, "step": 20941 }, { "epoch": 0.250385585671756, "grad_norm": 2.0888240337371826, "learning_rate": 8.779687131533244e-06, "loss": 0.6085, "step": 20942 }, { "epoch": 0.25039754181661666, "grad_norm": 2.7236664295196533, "learning_rate": 8.779560378437577e-06, "loss": 0.673, "step": 20943 }, { "epoch": 0.2504094979614773, "grad_norm": 1.7023956775665283, "learning_rate": 8.779433619674447e-06, "loss": 0.5699, "step": 20944 }, { "epoch": 0.2504214541063379, "grad_norm": 1.585667371749878, "learning_rate": 8.779306855244043e-06, "loss": 0.6157, "step": 20945 }, { "epoch": 0.2504334102511986, "grad_norm": 2.749541759490967, "learning_rate": 8.779180085146555e-06, "loss": 0.6427, "step": 20946 }, { "epoch": 0.25044536639605924, "grad_norm": 2.0076282024383545, "learning_rate": 8.779053309382172e-06, "loss": 0.6002, "step": 20947 }, { "epoch": 0.2504573225409199, "grad_norm": 2.989086627960205, "learning_rate": 8.778926527951087e-06, "loss": 0.6, "step": 20948 }, { "epoch": 0.25046927868578056, "grad_norm": 3.6010324954986572, "learning_rate": 8.778799740853487e-06, "loss": 0.6443, "step": 20949 }, { "epoch": 0.2504812348306412, "grad_norm": 2.7184128761291504, "learning_rate": 8.778672948089564e-06, "loss": 0.6013, "step": 20950 }, { "epoch": 0.2504931909755019, "grad_norm": 1.7054811716079712, "learning_rate": 8.778546149659505e-06, "loss": 0.6044, "step": 20951 }, { "epoch": 0.25050514712036254, "grad_norm": 3.0332963466644287, "learning_rate": 8.778419345563505e-06, "loss": 0.6391, "step": 20952 }, { "epoch": 0.25051710326522314, "grad_norm": 3.3599812984466553, "learning_rate": 8.778292535801751e-06, "loss": 0.6717, "step": 20953 }, { "epoch": 0.2505290594100838, "grad_norm": 3.510043144226074, "learning_rate": 8.778165720374435e-06, "loss": 0.6262, "step": 20954 }, { "epoch": 0.25054101555494446, "grad_norm": 3.2092769145965576, "learning_rate": 8.778038899281746e-06, "loss": 0.7519, "step": 20955 }, { "epoch": 0.2505529716998051, "grad_norm": 3.091472864151001, "learning_rate": 8.777912072523873e-06, "loss": 0.5587, "step": 20956 }, { "epoch": 0.2505649278446658, "grad_norm": 2.80328106880188, "learning_rate": 8.77778524010101e-06, "loss": 0.5755, "step": 20957 }, { "epoch": 0.25057688398952643, "grad_norm": 1.8359789848327637, "learning_rate": 8.77765840201334e-06, "loss": 0.5796, "step": 20958 }, { "epoch": 0.2505888401343871, "grad_norm": 1.8076375722885132, "learning_rate": 8.77753155826106e-06, "loss": 0.6626, "step": 20959 }, { "epoch": 0.2506007962792477, "grad_norm": 2.8981456756591797, "learning_rate": 8.77740470884436e-06, "loss": 0.5661, "step": 20960 }, { "epoch": 0.25061275242410835, "grad_norm": 2.04589581489563, "learning_rate": 8.777277853763426e-06, "loss": 0.5893, "step": 20961 }, { "epoch": 0.250624708568969, "grad_norm": 9.792579650878906, "learning_rate": 8.77715099301845e-06, "loss": 0.5411, "step": 20962 }, { "epoch": 0.25063666471382967, "grad_norm": 2.4086368083953857, "learning_rate": 8.777024126609626e-06, "loss": 0.5252, "step": 20963 }, { "epoch": 0.25064862085869033, "grad_norm": 2.2378547191619873, "learning_rate": 8.776897254537139e-06, "loss": 0.6698, "step": 20964 }, { "epoch": 0.250660577003551, "grad_norm": 2.2341554164886475, "learning_rate": 8.776770376801178e-06, "loss": 0.5874, "step": 20965 }, { "epoch": 0.25067253314841165, "grad_norm": 1.9299856424331665, "learning_rate": 8.77664349340194e-06, "loss": 0.5291, "step": 20966 }, { "epoch": 0.25068448929327225, "grad_norm": 3.6521494388580322, "learning_rate": 8.77651660433961e-06, "loss": 0.6069, "step": 20967 }, { "epoch": 0.2506964454381329, "grad_norm": 2.5733861923217773, "learning_rate": 8.776389709614382e-06, "loss": 0.6556, "step": 20968 }, { "epoch": 0.25070840158299357, "grad_norm": 1.6668809652328491, "learning_rate": 8.776262809226442e-06, "loss": 0.5141, "step": 20969 }, { "epoch": 0.2507203577278542, "grad_norm": 1.3121671676635742, "learning_rate": 8.776135903175985e-06, "loss": 0.5133, "step": 20970 }, { "epoch": 0.2507323138727149, "grad_norm": 2.220249652862549, "learning_rate": 8.776008991463197e-06, "loss": 0.5852, "step": 20971 }, { "epoch": 0.25074427001757554, "grad_norm": 4.439333438873291, "learning_rate": 8.775882074088269e-06, "loss": 0.6878, "step": 20972 }, { "epoch": 0.2507562261624362, "grad_norm": 2.074795722961426, "learning_rate": 8.775755151051396e-06, "loss": 0.661, "step": 20973 }, { "epoch": 0.25076818230729686, "grad_norm": 14.488365173339844, "learning_rate": 8.775628222352762e-06, "loss": 0.6233, "step": 20974 }, { "epoch": 0.25078013845215746, "grad_norm": 2.426863431930542, "learning_rate": 8.775501287992558e-06, "loss": 0.5883, "step": 20975 }, { "epoch": 0.2507920945970181, "grad_norm": 2.6060266494750977, "learning_rate": 8.77537434797098e-06, "loss": 0.6222, "step": 20976 }, { "epoch": 0.2508040507418788, "grad_norm": 1.7905454635620117, "learning_rate": 8.775247402288214e-06, "loss": 0.5699, "step": 20977 }, { "epoch": 0.25081600688673944, "grad_norm": 1.6152827739715576, "learning_rate": 8.77512045094445e-06, "loss": 0.6264, "step": 20978 }, { "epoch": 0.2508279630316001, "grad_norm": 3.376638412475586, "learning_rate": 8.774993493939881e-06, "loss": 0.6277, "step": 20979 }, { "epoch": 0.25083991917646076, "grad_norm": 2.5090644359588623, "learning_rate": 8.774866531274696e-06, "loss": 0.6442, "step": 20980 }, { "epoch": 0.2508518753213214, "grad_norm": 1.9466480016708374, "learning_rate": 8.774739562949082e-06, "loss": 0.6019, "step": 20981 }, { "epoch": 0.250863831466182, "grad_norm": 7.992786884307861, "learning_rate": 8.774612588963237e-06, "loss": 0.5946, "step": 20982 }, { "epoch": 0.2508757876110427, "grad_norm": 2.538243293762207, "learning_rate": 8.774485609317343e-06, "loss": 0.6719, "step": 20983 }, { "epoch": 0.25088774375590334, "grad_norm": 2.110161542892456, "learning_rate": 8.774358624011597e-06, "loss": 0.5239, "step": 20984 }, { "epoch": 0.250899699900764, "grad_norm": 1.882685661315918, "learning_rate": 8.774231633046187e-06, "loss": 0.5892, "step": 20985 }, { "epoch": 0.25091165604562465, "grad_norm": 5.250094413757324, "learning_rate": 8.7741046364213e-06, "loss": 0.58, "step": 20986 }, { "epoch": 0.2509236121904853, "grad_norm": 2.0744388103485107, "learning_rate": 8.773977634137135e-06, "loss": 0.6729, "step": 20987 }, { "epoch": 0.25093556833534597, "grad_norm": 2.4879884719848633, "learning_rate": 8.773850626193873e-06, "loss": 0.7042, "step": 20988 }, { "epoch": 0.25094752448020663, "grad_norm": 5.753368854522705, "learning_rate": 8.773723612591708e-06, "loss": 0.6575, "step": 20989 }, { "epoch": 0.25095948062506723, "grad_norm": 2.413527727127075, "learning_rate": 8.773596593330834e-06, "loss": 0.6268, "step": 20990 }, { "epoch": 0.2509714367699279, "grad_norm": 3.0655055046081543, "learning_rate": 8.773469568411437e-06, "loss": 0.571, "step": 20991 }, { "epoch": 0.25098339291478855, "grad_norm": 2.4527313709259033, "learning_rate": 8.773342537833708e-06, "loss": 0.6598, "step": 20992 }, { "epoch": 0.2509953490596492, "grad_norm": 1.5820022821426392, "learning_rate": 8.77321550159784e-06, "loss": 0.6117, "step": 20993 }, { "epoch": 0.25100730520450987, "grad_norm": 2.03656005859375, "learning_rate": 8.773088459704023e-06, "loss": 0.7065, "step": 20994 }, { "epoch": 0.2510192613493705, "grad_norm": 6.195692539215088, "learning_rate": 8.772961412152445e-06, "loss": 0.5795, "step": 20995 }, { "epoch": 0.2510312174942312, "grad_norm": 3.894376039505005, "learning_rate": 8.772834358943298e-06, "loss": 0.6383, "step": 20996 }, { "epoch": 0.2510431736390918, "grad_norm": 3.944838523864746, "learning_rate": 8.772707300076772e-06, "loss": 0.6508, "step": 20997 }, { "epoch": 0.25105512978395245, "grad_norm": 2.7860257625579834, "learning_rate": 8.77258023555306e-06, "loss": 0.6225, "step": 20998 }, { "epoch": 0.2510670859288131, "grad_norm": 3.4020705223083496, "learning_rate": 8.772453165372347e-06, "loss": 0.6872, "step": 20999 }, { "epoch": 0.25107904207367376, "grad_norm": 1.982153058052063, "learning_rate": 8.77232608953483e-06, "loss": 0.6368, "step": 21000 }, { "epoch": 0.2510909982185344, "grad_norm": 1.8278498649597168, "learning_rate": 8.772199008040697e-06, "loss": 0.6064, "step": 21001 }, { "epoch": 0.2511029543633951, "grad_norm": 2.4397430419921875, "learning_rate": 8.772071920890135e-06, "loss": 0.5613, "step": 21002 }, { "epoch": 0.25111491050825574, "grad_norm": 1.632431149482727, "learning_rate": 8.771944828083341e-06, "loss": 0.647, "step": 21003 }, { "epoch": 0.25112686665311634, "grad_norm": 2.3012006282806396, "learning_rate": 8.7718177296205e-06, "loss": 0.6438, "step": 21004 }, { "epoch": 0.251138822797977, "grad_norm": 2.1921823024749756, "learning_rate": 8.771690625501806e-06, "loss": 0.552, "step": 21005 }, { "epoch": 0.25115077894283766, "grad_norm": 4.269461154937744, "learning_rate": 8.771563515727448e-06, "loss": 0.5856, "step": 21006 }, { "epoch": 0.2511627350876983, "grad_norm": 3.834703207015991, "learning_rate": 8.771436400297617e-06, "loss": 0.564, "step": 21007 }, { "epoch": 0.251174691232559, "grad_norm": 2.0171055793762207, "learning_rate": 8.771309279212505e-06, "loss": 0.5815, "step": 21008 }, { "epoch": 0.25118664737741964, "grad_norm": 1.8487699031829834, "learning_rate": 8.771182152472301e-06, "loss": 0.5527, "step": 21009 }, { "epoch": 0.2511986035222803, "grad_norm": 1.9757663011550903, "learning_rate": 8.771055020077195e-06, "loss": 0.7001, "step": 21010 }, { "epoch": 0.25121055966714095, "grad_norm": 2.1430857181549072, "learning_rate": 8.77092788202738e-06, "loss": 0.484, "step": 21011 }, { "epoch": 0.25122251581200156, "grad_norm": 2.460176944732666, "learning_rate": 8.770800738323044e-06, "loss": 0.5274, "step": 21012 }, { "epoch": 0.2512344719568622, "grad_norm": 8.38953971862793, "learning_rate": 8.770673588964378e-06, "loss": 0.5501, "step": 21013 }, { "epoch": 0.2512464281017229, "grad_norm": 1.83204984664917, "learning_rate": 8.770546433951575e-06, "loss": 0.5777, "step": 21014 }, { "epoch": 0.25125838424658353, "grad_norm": 1.8938405513763428, "learning_rate": 8.770419273284825e-06, "loss": 0.6138, "step": 21015 }, { "epoch": 0.2512703403914442, "grad_norm": 2.020630359649658, "learning_rate": 8.770292106964316e-06, "loss": 0.6068, "step": 21016 }, { "epoch": 0.25128229653630485, "grad_norm": 1.8224080801010132, "learning_rate": 8.770164934990242e-06, "loss": 0.6218, "step": 21017 }, { "epoch": 0.2512942526811655, "grad_norm": 1.6808558702468872, "learning_rate": 8.77003775736279e-06, "loss": 0.5554, "step": 21018 }, { "epoch": 0.2513062088260261, "grad_norm": 1.861619472503662, "learning_rate": 8.769910574082156e-06, "loss": 0.5353, "step": 21019 }, { "epoch": 0.25131816497088677, "grad_norm": 1.9023702144622803, "learning_rate": 8.769783385148528e-06, "loss": 0.6904, "step": 21020 }, { "epoch": 0.25133012111574743, "grad_norm": 2.388429880142212, "learning_rate": 8.769656190562093e-06, "loss": 0.6423, "step": 21021 }, { "epoch": 0.2513420772606081, "grad_norm": 3.3926892280578613, "learning_rate": 8.769528990323047e-06, "loss": 0.7415, "step": 21022 }, { "epoch": 0.25135403340546875, "grad_norm": 2.1857542991638184, "learning_rate": 8.769401784431578e-06, "loss": 0.6314, "step": 21023 }, { "epoch": 0.2513659895503294, "grad_norm": 7.815442085266113, "learning_rate": 8.769274572887879e-06, "loss": 0.5887, "step": 21024 }, { "epoch": 0.25137794569519006, "grad_norm": 4.586945056915283, "learning_rate": 8.769147355692139e-06, "loss": 0.6047, "step": 21025 }, { "epoch": 0.25138990184005067, "grad_norm": 2.2063112258911133, "learning_rate": 8.76902013284455e-06, "loss": 0.6246, "step": 21026 }, { "epoch": 0.2514018579849113, "grad_norm": 2.2862279415130615, "learning_rate": 8.7688929043453e-06, "loss": 0.5803, "step": 21027 }, { "epoch": 0.251413814129772, "grad_norm": 3.5825722217559814, "learning_rate": 8.768765670194583e-06, "loss": 0.5872, "step": 21028 }, { "epoch": 0.25142577027463264, "grad_norm": 4.465357303619385, "learning_rate": 8.768638430392587e-06, "loss": 0.5837, "step": 21029 }, { "epoch": 0.2514377264194933, "grad_norm": 2.1363532543182373, "learning_rate": 8.768511184939506e-06, "loss": 0.6621, "step": 21030 }, { "epoch": 0.25144968256435396, "grad_norm": 2.5324254035949707, "learning_rate": 8.768383933835528e-06, "loss": 0.6057, "step": 21031 }, { "epoch": 0.2514616387092146, "grad_norm": 3.032952070236206, "learning_rate": 8.768256677080847e-06, "loss": 0.627, "step": 21032 }, { "epoch": 0.2514735948540753, "grad_norm": 3.409337282180786, "learning_rate": 8.768129414675648e-06, "loss": 0.6062, "step": 21033 }, { "epoch": 0.2514855509989359, "grad_norm": 2.6131370067596436, "learning_rate": 8.768002146620128e-06, "loss": 0.5722, "step": 21034 }, { "epoch": 0.25149750714379654, "grad_norm": 2.5640854835510254, "learning_rate": 8.767874872914474e-06, "loss": 0.6178, "step": 21035 }, { "epoch": 0.2515094632886572, "grad_norm": 3.546954393386841, "learning_rate": 8.76774759355888e-06, "loss": 0.6582, "step": 21036 }, { "epoch": 0.25152141943351786, "grad_norm": 1.9923605918884277, "learning_rate": 8.767620308553535e-06, "loss": 0.5358, "step": 21037 }, { "epoch": 0.2515333755783785, "grad_norm": 2.0496599674224854, "learning_rate": 8.767493017898629e-06, "loss": 0.5201, "step": 21038 }, { "epoch": 0.2515453317232392, "grad_norm": 2.7987987995147705, "learning_rate": 8.767365721594353e-06, "loss": 0.6497, "step": 21039 }, { "epoch": 0.25155728786809983, "grad_norm": 2.192023277282715, "learning_rate": 8.7672384196409e-06, "loss": 0.6666, "step": 21040 }, { "epoch": 0.25156924401296044, "grad_norm": 2.2971158027648926, "learning_rate": 8.76711111203846e-06, "loss": 0.6203, "step": 21041 }, { "epoch": 0.2515812001578211, "grad_norm": 2.9715235233306885, "learning_rate": 8.766983798787222e-06, "loss": 0.6426, "step": 21042 }, { "epoch": 0.25159315630268175, "grad_norm": 3.1871252059936523, "learning_rate": 8.76685647988738e-06, "loss": 0.5109, "step": 21043 }, { "epoch": 0.2516051124475424, "grad_norm": 2.2505249977111816, "learning_rate": 8.766729155339123e-06, "loss": 0.6458, "step": 21044 }, { "epoch": 0.25161706859240307, "grad_norm": 2.479321241378784, "learning_rate": 8.766601825142642e-06, "loss": 0.6332, "step": 21045 }, { "epoch": 0.25162902473726373, "grad_norm": 13.602059364318848, "learning_rate": 8.766474489298128e-06, "loss": 0.7201, "step": 21046 }, { "epoch": 0.2516409808821244, "grad_norm": 2.8424763679504395, "learning_rate": 8.766347147805772e-06, "loss": 0.6282, "step": 21047 }, { "epoch": 0.25165293702698505, "grad_norm": 2.366473436355591, "learning_rate": 8.766219800665765e-06, "loss": 0.6416, "step": 21048 }, { "epoch": 0.25166489317184565, "grad_norm": 1.8220000267028809, "learning_rate": 8.766092447878299e-06, "loss": 0.5899, "step": 21049 }, { "epoch": 0.2516768493167063, "grad_norm": 1.5121654272079468, "learning_rate": 8.765965089443562e-06, "loss": 0.5357, "step": 21050 }, { "epoch": 0.25168880546156697, "grad_norm": 3.869744300842285, "learning_rate": 8.76583772536175e-06, "loss": 0.5981, "step": 21051 }, { "epoch": 0.2517007616064276, "grad_norm": 2.3621580600738525, "learning_rate": 8.765710355633048e-06, "loss": 0.6456, "step": 21052 }, { "epoch": 0.2517127177512883, "grad_norm": 5.383135795593262, "learning_rate": 8.765582980257652e-06, "loss": 0.6683, "step": 21053 }, { "epoch": 0.25172467389614894, "grad_norm": 2.300976037979126, "learning_rate": 8.765455599235748e-06, "loss": 0.6355, "step": 21054 }, { "epoch": 0.2517366300410096, "grad_norm": 3.678699254989624, "learning_rate": 8.765328212567532e-06, "loss": 0.5815, "step": 21055 }, { "epoch": 0.2517485861858702, "grad_norm": 1.6886882781982422, "learning_rate": 8.765200820253193e-06, "loss": 0.629, "step": 21056 }, { "epoch": 0.25176054233073086, "grad_norm": 1.6726258993148804, "learning_rate": 8.765073422292922e-06, "loss": 0.5655, "step": 21057 }, { "epoch": 0.2517724984755915, "grad_norm": 13.171707153320312, "learning_rate": 8.764946018686911e-06, "loss": 0.6234, "step": 21058 }, { "epoch": 0.2517844546204522, "grad_norm": 1.9846397638320923, "learning_rate": 8.764818609435349e-06, "loss": 0.6822, "step": 21059 }, { "epoch": 0.25179641076531284, "grad_norm": 3.2208635807037354, "learning_rate": 8.764691194538428e-06, "loss": 0.624, "step": 21060 }, { "epoch": 0.2518083669101735, "grad_norm": 1.6183793544769287, "learning_rate": 8.764563773996338e-06, "loss": 0.5469, "step": 21061 }, { "epoch": 0.25182032305503416, "grad_norm": 2.135258436203003, "learning_rate": 8.764436347809272e-06, "loss": 0.5671, "step": 21062 }, { "epoch": 0.25183227919989476, "grad_norm": 3.2678370475769043, "learning_rate": 8.764308915977421e-06, "loss": 0.5957, "step": 21063 }, { "epoch": 0.2518442353447554, "grad_norm": 3.269808530807495, "learning_rate": 8.764181478500977e-06, "loss": 0.5867, "step": 21064 }, { "epoch": 0.2518561914896161, "grad_norm": 2.428302049636841, "learning_rate": 8.764054035380128e-06, "loss": 0.6246, "step": 21065 }, { "epoch": 0.25186814763447674, "grad_norm": 3.739201545715332, "learning_rate": 8.763926586615065e-06, "loss": 0.6126, "step": 21066 }, { "epoch": 0.2518801037793374, "grad_norm": 11.033820152282715, "learning_rate": 8.763799132205983e-06, "loss": 0.6964, "step": 21067 }, { "epoch": 0.25189205992419805, "grad_norm": 1.9932208061218262, "learning_rate": 8.76367167215307e-06, "loss": 0.5669, "step": 21068 }, { "epoch": 0.2519040160690587, "grad_norm": 2.2644460201263428, "learning_rate": 8.763544206456517e-06, "loss": 0.6306, "step": 21069 }, { "epoch": 0.25191597221391937, "grad_norm": 1.5422090291976929, "learning_rate": 8.763416735116517e-06, "loss": 0.5488, "step": 21070 }, { "epoch": 0.25192792835878, "grad_norm": 1.5112793445587158, "learning_rate": 8.76328925813326e-06, "loss": 0.5506, "step": 21071 }, { "epoch": 0.25193988450364063, "grad_norm": 3.056018352508545, "learning_rate": 8.763161775506937e-06, "loss": 0.5604, "step": 21072 }, { "epoch": 0.2519518406485013, "grad_norm": 1.5096415281295776, "learning_rate": 8.76303428723774e-06, "loss": 0.5733, "step": 21073 }, { "epoch": 0.25196379679336195, "grad_norm": 3.7343881130218506, "learning_rate": 8.762906793325861e-06, "loss": 0.6818, "step": 21074 }, { "epoch": 0.2519757529382226, "grad_norm": 2.5445990562438965, "learning_rate": 8.762779293771488e-06, "loss": 0.7094, "step": 21075 }, { "epoch": 0.25198770908308327, "grad_norm": 2.546144485473633, "learning_rate": 8.762651788574813e-06, "loss": 0.7222, "step": 21076 }, { "epoch": 0.2519996652279439, "grad_norm": 1.3897899389266968, "learning_rate": 8.76252427773603e-06, "loss": 0.5467, "step": 21077 }, { "epoch": 0.25201162137280453, "grad_norm": 2.23742413520813, "learning_rate": 8.76239676125533e-06, "loss": 0.6199, "step": 21078 }, { "epoch": 0.2520235775176652, "grad_norm": 5.502904415130615, "learning_rate": 8.762269239132901e-06, "loss": 0.5731, "step": 21079 }, { "epoch": 0.25203553366252585, "grad_norm": 1.5879188776016235, "learning_rate": 8.762141711368936e-06, "loss": 0.5729, "step": 21080 }, { "epoch": 0.2520474898073865, "grad_norm": 2.3616721630096436, "learning_rate": 8.762014177963626e-06, "loss": 0.6218, "step": 21081 }, { "epoch": 0.25205944595224716, "grad_norm": 1.9057422876358032, "learning_rate": 8.761886638917162e-06, "loss": 0.6291, "step": 21082 }, { "epoch": 0.2520714020971078, "grad_norm": 2.8764684200286865, "learning_rate": 8.761759094229736e-06, "loss": 0.6495, "step": 21083 }, { "epoch": 0.2520833582419685, "grad_norm": 1.5765528678894043, "learning_rate": 8.761631543901539e-06, "loss": 0.5508, "step": 21084 }, { "epoch": 0.2520953143868291, "grad_norm": 2.3839972019195557, "learning_rate": 8.761503987932762e-06, "loss": 0.4958, "step": 21085 }, { "epoch": 0.25210727053168974, "grad_norm": 9.754135131835938, "learning_rate": 8.761376426323597e-06, "loss": 0.6094, "step": 21086 }, { "epoch": 0.2521192266765504, "grad_norm": 2.367661714553833, "learning_rate": 8.761248859074234e-06, "loss": 0.7232, "step": 21087 }, { "epoch": 0.25213118282141106, "grad_norm": 2.501248836517334, "learning_rate": 8.761121286184866e-06, "loss": 0.551, "step": 21088 }, { "epoch": 0.2521431389662717, "grad_norm": 1.659048318862915, "learning_rate": 8.760993707655682e-06, "loss": 0.5489, "step": 21089 }, { "epoch": 0.2521550951111324, "grad_norm": 2.3179149627685547, "learning_rate": 8.760866123486875e-06, "loss": 0.6496, "step": 21090 }, { "epoch": 0.25216705125599304, "grad_norm": 6.851738929748535, "learning_rate": 8.760738533678636e-06, "loss": 0.5689, "step": 21091 }, { "epoch": 0.2521790074008537, "grad_norm": 3.4434425830841064, "learning_rate": 8.760610938231156e-06, "loss": 0.5752, "step": 21092 }, { "epoch": 0.2521909635457143, "grad_norm": 5.617177963256836, "learning_rate": 8.760483337144626e-06, "loss": 0.5987, "step": 21093 }, { "epoch": 0.25220291969057496, "grad_norm": 3.0744712352752686, "learning_rate": 8.760355730419238e-06, "loss": 0.6073, "step": 21094 }, { "epoch": 0.2522148758354356, "grad_norm": 2.095395088195801, "learning_rate": 8.760228118055184e-06, "loss": 0.5874, "step": 21095 }, { "epoch": 0.2522268319802963, "grad_norm": 3.0256805419921875, "learning_rate": 8.760100500052656e-06, "loss": 0.519, "step": 21096 }, { "epoch": 0.25223878812515693, "grad_norm": 2.246065378189087, "learning_rate": 8.75997287641184e-06, "loss": 0.5728, "step": 21097 }, { "epoch": 0.2522507442700176, "grad_norm": 3.7637786865234375, "learning_rate": 8.759845247132935e-06, "loss": 0.6015, "step": 21098 }, { "epoch": 0.25226270041487825, "grad_norm": 1.9740458726882935, "learning_rate": 8.759717612216126e-06, "loss": 0.6568, "step": 21099 }, { "epoch": 0.25227465655973885, "grad_norm": 1.9509848356246948, "learning_rate": 8.759589971661609e-06, "loss": 0.5764, "step": 21100 }, { "epoch": 0.2522866127045995, "grad_norm": 1.696395754814148, "learning_rate": 8.759462325469572e-06, "loss": 0.7491, "step": 21101 }, { "epoch": 0.25229856884946017, "grad_norm": 2.2539124488830566, "learning_rate": 8.75933467364021e-06, "loss": 0.5628, "step": 21102 }, { "epoch": 0.25231052499432083, "grad_norm": 3.943240165710449, "learning_rate": 8.75920701617371e-06, "loss": 0.6932, "step": 21103 }, { "epoch": 0.2523224811391815, "grad_norm": 2.2620365619659424, "learning_rate": 8.759079353070266e-06, "loss": 0.6266, "step": 21104 }, { "epoch": 0.25233443728404215, "grad_norm": 2.345705986022949, "learning_rate": 8.758951684330071e-06, "loss": 0.605, "step": 21105 }, { "epoch": 0.2523463934289028, "grad_norm": 65.89309692382812, "learning_rate": 8.758824009953314e-06, "loss": 0.6654, "step": 21106 }, { "epoch": 0.25235834957376346, "grad_norm": 2.4731719493865967, "learning_rate": 8.758696329940186e-06, "loss": 0.5328, "step": 21107 }, { "epoch": 0.25237030571862407, "grad_norm": 1.820892333984375, "learning_rate": 8.758568644290878e-06, "loss": 0.5924, "step": 21108 }, { "epoch": 0.2523822618634847, "grad_norm": 2.565133571624756, "learning_rate": 8.758440953005585e-06, "loss": 0.5927, "step": 21109 }, { "epoch": 0.2523942180083454, "grad_norm": 1.2916274070739746, "learning_rate": 8.758313256084497e-06, "loss": 0.5422, "step": 21110 }, { "epoch": 0.25240617415320604, "grad_norm": 3.6651341915130615, "learning_rate": 8.758185553527806e-06, "loss": 0.6332, "step": 21111 }, { "epoch": 0.2524181302980667, "grad_norm": 4.5104594230651855, "learning_rate": 8.758057845335699e-06, "loss": 0.5865, "step": 21112 }, { "epoch": 0.25243008644292736, "grad_norm": 1.937310814857483, "learning_rate": 8.757930131508372e-06, "loss": 0.6359, "step": 21113 }, { "epoch": 0.252442042587788, "grad_norm": 4.362624168395996, "learning_rate": 8.757802412046018e-06, "loss": 0.5834, "step": 21114 }, { "epoch": 0.2524539987326486, "grad_norm": 2.4131152629852295, "learning_rate": 8.757674686948822e-06, "loss": 0.6073, "step": 21115 }, { "epoch": 0.2524659548775093, "grad_norm": 2.8625879287719727, "learning_rate": 8.757546956216983e-06, "loss": 0.5867, "step": 21116 }, { "epoch": 0.25247791102236994, "grad_norm": 24.842140197753906, "learning_rate": 8.757419219850687e-06, "loss": 0.6635, "step": 21117 }, { "epoch": 0.2524898671672306, "grad_norm": 2.40787672996521, "learning_rate": 8.757291477850127e-06, "loss": 0.6305, "step": 21118 }, { "epoch": 0.25250182331209126, "grad_norm": 1.7827129364013672, "learning_rate": 8.757163730215496e-06, "loss": 0.5158, "step": 21119 }, { "epoch": 0.2525137794569519, "grad_norm": 2.164940595626831, "learning_rate": 8.757035976946983e-06, "loss": 0.5706, "step": 21120 }, { "epoch": 0.2525257356018126, "grad_norm": 2.1168103218078613, "learning_rate": 8.756908218044784e-06, "loss": 0.5801, "step": 21121 }, { "epoch": 0.2525376917466732, "grad_norm": 2.128183126449585, "learning_rate": 8.756780453509086e-06, "loss": 0.5602, "step": 21122 }, { "epoch": 0.25254964789153383, "grad_norm": 5.403163909912109, "learning_rate": 8.756652683340084e-06, "loss": 0.5883, "step": 21123 }, { "epoch": 0.2525616040363945, "grad_norm": 1.7071514129638672, "learning_rate": 8.756524907537966e-06, "loss": 0.6483, "step": 21124 }, { "epoch": 0.25257356018125515, "grad_norm": 4.505743980407715, "learning_rate": 8.756397126102926e-06, "loss": 0.6616, "step": 21125 }, { "epoch": 0.2525855163261158, "grad_norm": 1.8373324871063232, "learning_rate": 8.756269339035155e-06, "loss": 0.6186, "step": 21126 }, { "epoch": 0.25259747247097647, "grad_norm": 2.4157509803771973, "learning_rate": 8.756141546334846e-06, "loss": 0.7051, "step": 21127 }, { "epoch": 0.25260942861583713, "grad_norm": 1.874427318572998, "learning_rate": 8.756013748002188e-06, "loss": 0.6085, "step": 21128 }, { "epoch": 0.2526213847606978, "grad_norm": 3.452693223953247, "learning_rate": 8.755885944037374e-06, "loss": 0.553, "step": 21129 }, { "epoch": 0.2526333409055584, "grad_norm": 2.023223638534546, "learning_rate": 8.755758134440597e-06, "loss": 0.598, "step": 21130 }, { "epoch": 0.25264529705041905, "grad_norm": 3.30972957611084, "learning_rate": 8.755630319212047e-06, "loss": 0.6015, "step": 21131 }, { "epoch": 0.2526572531952797, "grad_norm": 1.9099349975585938, "learning_rate": 8.755502498351916e-06, "loss": 0.5565, "step": 21132 }, { "epoch": 0.25266920934014037, "grad_norm": 2.806462287902832, "learning_rate": 8.755374671860394e-06, "loss": 0.6945, "step": 21133 }, { "epoch": 0.252681165485001, "grad_norm": 2.619985580444336, "learning_rate": 8.755246839737677e-06, "loss": 0.6888, "step": 21134 }, { "epoch": 0.2526931216298617, "grad_norm": 2.6368627548217773, "learning_rate": 8.755119001983953e-06, "loss": 0.6732, "step": 21135 }, { "epoch": 0.25270507777472234, "grad_norm": 1.927220344543457, "learning_rate": 8.754991158599413e-06, "loss": 0.6064, "step": 21136 }, { "epoch": 0.25271703391958295, "grad_norm": 2.207209825515747, "learning_rate": 8.754863309584252e-06, "loss": 0.6544, "step": 21137 }, { "epoch": 0.2527289900644436, "grad_norm": 1.9885979890823364, "learning_rate": 8.75473545493866e-06, "loss": 0.541, "step": 21138 }, { "epoch": 0.25274094620930426, "grad_norm": 3.0653674602508545, "learning_rate": 8.75460759466283e-06, "loss": 0.5348, "step": 21139 }, { "epoch": 0.2527529023541649, "grad_norm": 3.477774143218994, "learning_rate": 8.754479728756951e-06, "loss": 0.6561, "step": 21140 }, { "epoch": 0.2527648584990256, "grad_norm": 2.1254022121429443, "learning_rate": 8.754351857221217e-06, "loss": 0.6831, "step": 21141 }, { "epoch": 0.25277681464388624, "grad_norm": 2.7460155487060547, "learning_rate": 8.754223980055819e-06, "loss": 0.5302, "step": 21142 }, { "epoch": 0.2527887707887469, "grad_norm": 2.726552724838257, "learning_rate": 8.754096097260947e-06, "loss": 0.6325, "step": 21143 }, { "epoch": 0.2528007269336075, "grad_norm": 1.8636873960494995, "learning_rate": 8.753968208836797e-06, "loss": 0.5786, "step": 21144 }, { "epoch": 0.25281268307846816, "grad_norm": 2.810152292251587, "learning_rate": 8.753840314783557e-06, "loss": 0.5754, "step": 21145 }, { "epoch": 0.2528246392233288, "grad_norm": 2.4021260738372803, "learning_rate": 8.753712415101422e-06, "loss": 0.6223, "step": 21146 }, { "epoch": 0.2528365953681895, "grad_norm": 2.1018471717834473, "learning_rate": 8.753584509790577e-06, "loss": 0.5353, "step": 21147 }, { "epoch": 0.25284855151305013, "grad_norm": 1.7661175727844238, "learning_rate": 8.753456598851225e-06, "loss": 0.6824, "step": 21148 }, { "epoch": 0.2528605076579108, "grad_norm": 1.975385308265686, "learning_rate": 8.753328682283548e-06, "loss": 0.6859, "step": 21149 }, { "epoch": 0.25287246380277145, "grad_norm": 2.8134350776672363, "learning_rate": 8.75320076008774e-06, "loss": 0.6504, "step": 21150 }, { "epoch": 0.2528844199476321, "grad_norm": 2.492490291595459, "learning_rate": 8.753072832263997e-06, "loss": 0.6134, "step": 21151 }, { "epoch": 0.2528963760924927, "grad_norm": 2.0542073249816895, "learning_rate": 8.752944898812508e-06, "loss": 0.6243, "step": 21152 }, { "epoch": 0.2529083322373534, "grad_norm": 2.4127373695373535, "learning_rate": 8.752816959733462e-06, "loss": 0.5671, "step": 21153 }, { "epoch": 0.25292028838221403, "grad_norm": 2.343503475189209, "learning_rate": 8.752689015027056e-06, "loss": 0.6282, "step": 21154 }, { "epoch": 0.2529322445270747, "grad_norm": 2.011850595474243, "learning_rate": 8.75256106469348e-06, "loss": 0.738, "step": 21155 }, { "epoch": 0.25294420067193535, "grad_norm": 2.584359645843506, "learning_rate": 8.752433108732923e-06, "loss": 0.5555, "step": 21156 }, { "epoch": 0.252956156816796, "grad_norm": 2.0445239543914795, "learning_rate": 8.75230514714558e-06, "loss": 0.604, "step": 21157 }, { "epoch": 0.25296811296165667, "grad_norm": 1.4855883121490479, "learning_rate": 8.752177179931642e-06, "loss": 0.66, "step": 21158 }, { "epoch": 0.25298006910651727, "grad_norm": 1.4071112871170044, "learning_rate": 8.752049207091301e-06, "loss": 0.5637, "step": 21159 }, { "epoch": 0.2529920252513779, "grad_norm": 1.8577722311019897, "learning_rate": 8.75192122862475e-06, "loss": 0.5836, "step": 21160 }, { "epoch": 0.2530039813962386, "grad_norm": 1.4891879558563232, "learning_rate": 8.751793244532178e-06, "loss": 0.565, "step": 21161 }, { "epoch": 0.25301593754109925, "grad_norm": 2.0680861473083496, "learning_rate": 8.751665254813781e-06, "loss": 0.625, "step": 21162 }, { "epoch": 0.2530278936859599, "grad_norm": 4.576295852661133, "learning_rate": 8.751537259469746e-06, "loss": 0.6846, "step": 21163 }, { "epoch": 0.25303984983082056, "grad_norm": 2.5533406734466553, "learning_rate": 8.75140925850027e-06, "loss": 0.5338, "step": 21164 }, { "epoch": 0.2530518059756812, "grad_norm": 2.079843282699585, "learning_rate": 8.751281251905542e-06, "loss": 0.7137, "step": 21165 }, { "epoch": 0.2530637621205419, "grad_norm": 2.873425245285034, "learning_rate": 8.751153239685753e-06, "loss": 0.5706, "step": 21166 }, { "epoch": 0.2530757182654025, "grad_norm": 1.9938411712646484, "learning_rate": 8.751025221841098e-06, "loss": 0.5589, "step": 21167 }, { "epoch": 0.25308767441026314, "grad_norm": 5.38493537902832, "learning_rate": 8.750897198371767e-06, "loss": 0.6077, "step": 21168 }, { "epoch": 0.2530996305551238, "grad_norm": 12.596331596374512, "learning_rate": 8.75076916927795e-06, "loss": 0.6227, "step": 21169 }, { "epoch": 0.25311158669998446, "grad_norm": 2.1381912231445312, "learning_rate": 8.750641134559845e-06, "loss": 0.6701, "step": 21170 }, { "epoch": 0.2531235428448451, "grad_norm": 3.616769790649414, "learning_rate": 8.750513094217639e-06, "loss": 0.6122, "step": 21171 }, { "epoch": 0.2531354989897058, "grad_norm": 3.024087429046631, "learning_rate": 8.750385048251524e-06, "loss": 0.5645, "step": 21172 }, { "epoch": 0.25314745513456643, "grad_norm": 1.4759641885757446, "learning_rate": 8.750256996661693e-06, "loss": 0.588, "step": 21173 }, { "epoch": 0.25315941127942704, "grad_norm": 1.3567343950271606, "learning_rate": 8.75012893944834e-06, "loss": 0.6259, "step": 21174 }, { "epoch": 0.2531713674242877, "grad_norm": 13.325828552246094, "learning_rate": 8.750000876611655e-06, "loss": 0.6226, "step": 21175 }, { "epoch": 0.25318332356914836, "grad_norm": 2.0654456615448, "learning_rate": 8.749872808151828e-06, "loss": 0.6259, "step": 21176 }, { "epoch": 0.253195279714009, "grad_norm": 2.332660436630249, "learning_rate": 8.749744734069056e-06, "loss": 0.6414, "step": 21177 }, { "epoch": 0.2532072358588697, "grad_norm": 27.560131072998047, "learning_rate": 8.749616654363528e-06, "loss": 0.7691, "step": 21178 }, { "epoch": 0.25321919200373033, "grad_norm": 2.4308290481567383, "learning_rate": 8.749488569035438e-06, "loss": 0.6286, "step": 21179 }, { "epoch": 0.253231148148591, "grad_norm": 2.932183265686035, "learning_rate": 8.749360478084974e-06, "loss": 0.6216, "step": 21180 }, { "epoch": 0.2532431042934516, "grad_norm": 9.348209381103516, "learning_rate": 8.749232381512332e-06, "loss": 0.6035, "step": 21181 }, { "epoch": 0.25325506043831225, "grad_norm": 6.205034255981445, "learning_rate": 8.749104279317701e-06, "loss": 0.4937, "step": 21182 }, { "epoch": 0.2532670165831729, "grad_norm": 2.5549347400665283, "learning_rate": 8.748976171501278e-06, "loss": 0.5235, "step": 21183 }, { "epoch": 0.25327897272803357, "grad_norm": 3.2909560203552246, "learning_rate": 8.74884805806325e-06, "loss": 0.6594, "step": 21184 }, { "epoch": 0.2532909288728942, "grad_norm": 1.4762225151062012, "learning_rate": 8.74871993900381e-06, "loss": 0.5252, "step": 21185 }, { "epoch": 0.2533028850177549, "grad_norm": 1.7838891744613647, "learning_rate": 8.748591814323154e-06, "loss": 0.5424, "step": 21186 }, { "epoch": 0.25331484116261554, "grad_norm": 2.0400681495666504, "learning_rate": 8.748463684021468e-06, "loss": 0.5815, "step": 21187 }, { "epoch": 0.2533267973074762, "grad_norm": 1.5727070569992065, "learning_rate": 8.748335548098948e-06, "loss": 0.5486, "step": 21188 }, { "epoch": 0.2533387534523368, "grad_norm": 1.7037760019302368, "learning_rate": 8.748207406555787e-06, "loss": 0.5314, "step": 21189 }, { "epoch": 0.25335070959719747, "grad_norm": 2.6540911197662354, "learning_rate": 8.748079259392174e-06, "loss": 0.6131, "step": 21190 }, { "epoch": 0.2533626657420581, "grad_norm": 3.1249895095825195, "learning_rate": 8.747951106608305e-06, "loss": 0.5781, "step": 21191 }, { "epoch": 0.2533746218869188, "grad_norm": 2.5228991508483887, "learning_rate": 8.747822948204368e-06, "loss": 0.6749, "step": 21192 }, { "epoch": 0.25338657803177944, "grad_norm": 3.0624313354492188, "learning_rate": 8.747694784180558e-06, "loss": 0.6444, "step": 21193 }, { "epoch": 0.2533985341766401, "grad_norm": 1.833656668663025, "learning_rate": 8.747566614537066e-06, "loss": 0.594, "step": 21194 }, { "epoch": 0.25341049032150076, "grad_norm": 4.31046724319458, "learning_rate": 8.747438439274086e-06, "loss": 0.6425, "step": 21195 }, { "epoch": 0.25342244646636136, "grad_norm": 1.6351187229156494, "learning_rate": 8.747310258391808e-06, "loss": 0.5982, "step": 21196 }, { "epoch": 0.253434402611222, "grad_norm": 3.1286206245422363, "learning_rate": 8.747182071890423e-06, "loss": 0.6852, "step": 21197 }, { "epoch": 0.2534463587560827, "grad_norm": 2.466240882873535, "learning_rate": 8.747053879770126e-06, "loss": 0.6513, "step": 21198 }, { "epoch": 0.25345831490094334, "grad_norm": 2.587362051010132, "learning_rate": 8.746925682031111e-06, "loss": 0.6157, "step": 21199 }, { "epoch": 0.253470271045804, "grad_norm": 2.6275079250335693, "learning_rate": 8.746797478673566e-06, "loss": 0.6076, "step": 21200 }, { "epoch": 0.25348222719066466, "grad_norm": 1.6181899309158325, "learning_rate": 8.746669269697683e-06, "loss": 0.6444, "step": 21201 }, { "epoch": 0.2534941833355253, "grad_norm": 2.0933666229248047, "learning_rate": 8.74654105510366e-06, "loss": 0.5808, "step": 21202 }, { "epoch": 0.2535061394803859, "grad_norm": 21.754697799682617, "learning_rate": 8.746412834891681e-06, "loss": 0.6274, "step": 21203 }, { "epoch": 0.2535180956252466, "grad_norm": 3.6106250286102295, "learning_rate": 8.746284609061945e-06, "loss": 0.5683, "step": 21204 }, { "epoch": 0.25353005177010723, "grad_norm": 2.0274205207824707, "learning_rate": 8.74615637761464e-06, "loss": 0.5756, "step": 21205 }, { "epoch": 0.2535420079149679, "grad_norm": 1.9683094024658203, "learning_rate": 8.746028140549963e-06, "loss": 0.6634, "step": 21206 }, { "epoch": 0.25355396405982855, "grad_norm": 2.116799831390381, "learning_rate": 8.745899897868102e-06, "loss": 0.6489, "step": 21207 }, { "epoch": 0.2535659202046892, "grad_norm": 4.512132167816162, "learning_rate": 8.745771649569253e-06, "loss": 0.5568, "step": 21208 }, { "epoch": 0.25357787634954987, "grad_norm": 2.474229574203491, "learning_rate": 8.745643395653602e-06, "loss": 0.6942, "step": 21209 }, { "epoch": 0.2535898324944105, "grad_norm": 2.9802520275115967, "learning_rate": 8.745515136121348e-06, "loss": 0.5479, "step": 21210 }, { "epoch": 0.25360178863927113, "grad_norm": 1.871071219444275, "learning_rate": 8.745386870972681e-06, "loss": 0.5601, "step": 21211 }, { "epoch": 0.2536137447841318, "grad_norm": 2.292358875274658, "learning_rate": 8.745258600207793e-06, "loss": 0.6578, "step": 21212 }, { "epoch": 0.25362570092899245, "grad_norm": 3.3542091846466064, "learning_rate": 8.745130323826875e-06, "loss": 0.6026, "step": 21213 }, { "epoch": 0.2536376570738531, "grad_norm": 2.548466205596924, "learning_rate": 8.745002041830121e-06, "loss": 0.6457, "step": 21214 }, { "epoch": 0.25364961321871377, "grad_norm": 2.025725841522217, "learning_rate": 8.744873754217724e-06, "loss": 0.5955, "step": 21215 }, { "epoch": 0.2536615693635744, "grad_norm": 2.5208182334899902, "learning_rate": 8.744745460989876e-06, "loss": 0.5846, "step": 21216 }, { "epoch": 0.2536735255084351, "grad_norm": 2.0208070278167725, "learning_rate": 8.744617162146767e-06, "loss": 0.6199, "step": 21217 }, { "epoch": 0.2536854816532957, "grad_norm": 2.762784242630005, "learning_rate": 8.744488857688594e-06, "loss": 0.5688, "step": 21218 }, { "epoch": 0.25369743779815634, "grad_norm": 1.8335578441619873, "learning_rate": 8.744360547615543e-06, "loss": 0.5346, "step": 21219 }, { "epoch": 0.253709393943017, "grad_norm": 5.961941719055176, "learning_rate": 8.744232231927813e-06, "loss": 0.6696, "step": 21220 }, { "epoch": 0.25372135008787766, "grad_norm": 2.4183945655822754, "learning_rate": 8.744103910625592e-06, "loss": 0.6591, "step": 21221 }, { "epoch": 0.2537333062327383, "grad_norm": 2.6629016399383545, "learning_rate": 8.743975583709074e-06, "loss": 0.5387, "step": 21222 }, { "epoch": 0.253745262377599, "grad_norm": 1.7435623407363892, "learning_rate": 8.743847251178452e-06, "loss": 0.5526, "step": 21223 }, { "epoch": 0.25375721852245964, "grad_norm": 1.6378570795059204, "learning_rate": 8.743718913033919e-06, "loss": 0.5953, "step": 21224 }, { "epoch": 0.2537691746673203, "grad_norm": 2.485377788543701, "learning_rate": 8.743590569275665e-06, "loss": 0.5286, "step": 21225 }, { "epoch": 0.2537811308121809, "grad_norm": 1.6153547763824463, "learning_rate": 8.743462219903883e-06, "loss": 0.6513, "step": 21226 }, { "epoch": 0.25379308695704156, "grad_norm": 3.0101892948150635, "learning_rate": 8.743333864918765e-06, "loss": 0.5429, "step": 21227 }, { "epoch": 0.2538050431019022, "grad_norm": 2.730990171432495, "learning_rate": 8.743205504320506e-06, "loss": 0.6271, "step": 21228 }, { "epoch": 0.2538169992467629, "grad_norm": 2.089555263519287, "learning_rate": 8.743077138109298e-06, "loss": 0.6366, "step": 21229 }, { "epoch": 0.25382895539162353, "grad_norm": 3.193169355392456, "learning_rate": 8.742948766285329e-06, "loss": 0.5299, "step": 21230 }, { "epoch": 0.2538409115364842, "grad_norm": 1.3892964124679565, "learning_rate": 8.742820388848798e-06, "loss": 0.5717, "step": 21231 }, { "epoch": 0.25385286768134485, "grad_norm": 2.1295666694641113, "learning_rate": 8.742692005799894e-06, "loss": 0.5797, "step": 21232 }, { "epoch": 0.25386482382620545, "grad_norm": 1.704524278640747, "learning_rate": 8.74256361713881e-06, "loss": 0.5868, "step": 21233 }, { "epoch": 0.2538767799710661, "grad_norm": 2.612741708755493, "learning_rate": 8.74243522286574e-06, "loss": 0.5762, "step": 21234 }, { "epoch": 0.25388873611592677, "grad_norm": 3.477351427078247, "learning_rate": 8.742306822980873e-06, "loss": 0.6195, "step": 21235 }, { "epoch": 0.25390069226078743, "grad_norm": 17.562217712402344, "learning_rate": 8.742178417484404e-06, "loss": 0.7008, "step": 21236 }, { "epoch": 0.2539126484056481, "grad_norm": 1.8537344932556152, "learning_rate": 8.742050006376525e-06, "loss": 0.6366, "step": 21237 }, { "epoch": 0.25392460455050875, "grad_norm": 2.5933117866516113, "learning_rate": 8.74192158965743e-06, "loss": 0.6694, "step": 21238 }, { "epoch": 0.2539365606953694, "grad_norm": 3.9982962608337402, "learning_rate": 8.741793167327309e-06, "loss": 0.7189, "step": 21239 }, { "epoch": 0.25394851684023, "grad_norm": 2.7704901695251465, "learning_rate": 8.741664739386357e-06, "loss": 0.5563, "step": 21240 }, { "epoch": 0.25396047298509067, "grad_norm": 2.60678768157959, "learning_rate": 8.741536305834764e-06, "loss": 0.6146, "step": 21241 }, { "epoch": 0.2539724291299513, "grad_norm": 3.2048795223236084, "learning_rate": 8.741407866672724e-06, "loss": 0.6363, "step": 21242 }, { "epoch": 0.253984385274812, "grad_norm": 2.3389925956726074, "learning_rate": 8.741279421900431e-06, "loss": 0.6997, "step": 21243 }, { "epoch": 0.25399634141967264, "grad_norm": 2.069315195083618, "learning_rate": 8.741150971518075e-06, "loss": 0.5744, "step": 21244 }, { "epoch": 0.2540082975645333, "grad_norm": 1.6104182004928589, "learning_rate": 8.74102251552585e-06, "loss": 0.5827, "step": 21245 }, { "epoch": 0.25402025370939396, "grad_norm": 1.560692310333252, "learning_rate": 8.74089405392395e-06, "loss": 0.6176, "step": 21246 }, { "epoch": 0.2540322098542546, "grad_norm": 5.474822521209717, "learning_rate": 8.740765586712565e-06, "loss": 0.6059, "step": 21247 }, { "epoch": 0.2540441659991152, "grad_norm": 3.257054090499878, "learning_rate": 8.740637113891888e-06, "loss": 0.5796, "step": 21248 }, { "epoch": 0.2540561221439759, "grad_norm": 2.2775096893310547, "learning_rate": 8.740508635462112e-06, "loss": 0.5575, "step": 21249 }, { "epoch": 0.25406807828883654, "grad_norm": 3.2707693576812744, "learning_rate": 8.740380151423432e-06, "loss": 0.593, "step": 21250 }, { "epoch": 0.2540800344336972, "grad_norm": 2.5566799640655518, "learning_rate": 8.740251661776036e-06, "loss": 0.5977, "step": 21251 }, { "epoch": 0.25409199057855786, "grad_norm": 2.0366523265838623, "learning_rate": 8.740123166520122e-06, "loss": 0.6659, "step": 21252 }, { "epoch": 0.2541039467234185, "grad_norm": 2.7281336784362793, "learning_rate": 8.73999466565588e-06, "loss": 0.5465, "step": 21253 }, { "epoch": 0.2541159028682792, "grad_norm": 4.830211639404297, "learning_rate": 8.739866159183501e-06, "loss": 0.5114, "step": 21254 }, { "epoch": 0.2541278590131398, "grad_norm": 2.2397825717926025, "learning_rate": 8.73973764710318e-06, "loss": 0.6556, "step": 21255 }, { "epoch": 0.25413981515800044, "grad_norm": 2.4097933769226074, "learning_rate": 8.73960912941511e-06, "loss": 0.5989, "step": 21256 }, { "epoch": 0.2541517713028611, "grad_norm": 9.596612930297852, "learning_rate": 8.739480606119481e-06, "loss": 0.6426, "step": 21257 }, { "epoch": 0.25416372744772175, "grad_norm": 8.135554313659668, "learning_rate": 8.739352077216487e-06, "loss": 0.6977, "step": 21258 }, { "epoch": 0.2541756835925824, "grad_norm": 2.6674859523773193, "learning_rate": 8.739223542706324e-06, "loss": 0.6453, "step": 21259 }, { "epoch": 0.25418763973744307, "grad_norm": 6.870941162109375, "learning_rate": 8.73909500258918e-06, "loss": 0.6045, "step": 21260 }, { "epoch": 0.25419959588230373, "grad_norm": 2.5095255374908447, "learning_rate": 8.738966456865253e-06, "loss": 0.6408, "step": 21261 }, { "epoch": 0.25421155202716433, "grad_norm": 6.597192287445068, "learning_rate": 8.738837905534728e-06, "loss": 0.5426, "step": 21262 }, { "epoch": 0.254223508172025, "grad_norm": 2.180842876434326, "learning_rate": 8.738709348597805e-06, "loss": 0.6392, "step": 21263 }, { "epoch": 0.25423546431688565, "grad_norm": 3.0263278484344482, "learning_rate": 8.738580786054673e-06, "loss": 0.6552, "step": 21264 }, { "epoch": 0.2542474204617463, "grad_norm": 2.933797597885132, "learning_rate": 8.738452217905526e-06, "loss": 0.5082, "step": 21265 }, { "epoch": 0.25425937660660697, "grad_norm": 1.8307116031646729, "learning_rate": 8.738323644150557e-06, "loss": 0.6197, "step": 21266 }, { "epoch": 0.2542713327514676, "grad_norm": 3.1106622219085693, "learning_rate": 8.738195064789959e-06, "loss": 0.6019, "step": 21267 }, { "epoch": 0.2542832888963283, "grad_norm": 2.833256721496582, "learning_rate": 8.738066479823923e-06, "loss": 0.6419, "step": 21268 }, { "epoch": 0.25429524504118894, "grad_norm": 3.2903976440429688, "learning_rate": 8.737937889252643e-06, "loss": 0.5605, "step": 21269 }, { "epoch": 0.25430720118604955, "grad_norm": 2.964268445968628, "learning_rate": 8.737809293076313e-06, "loss": 0.5828, "step": 21270 }, { "epoch": 0.2543191573309102, "grad_norm": 4.556121826171875, "learning_rate": 8.737680691295123e-06, "loss": 0.5815, "step": 21271 }, { "epoch": 0.25433111347577086, "grad_norm": 2.8957087993621826, "learning_rate": 8.737552083909269e-06, "loss": 0.6543, "step": 21272 }, { "epoch": 0.2543430696206315, "grad_norm": 3.5595703125, "learning_rate": 8.737423470918943e-06, "loss": 0.5739, "step": 21273 }, { "epoch": 0.2543550257654922, "grad_norm": 2.0194337368011475, "learning_rate": 8.737294852324334e-06, "loss": 0.5822, "step": 21274 }, { "epoch": 0.25436698191035284, "grad_norm": 3.453166961669922, "learning_rate": 8.737166228125641e-06, "loss": 0.4665, "step": 21275 }, { "epoch": 0.2543789380552135, "grad_norm": 4.848689079284668, "learning_rate": 8.737037598323053e-06, "loss": 0.7239, "step": 21276 }, { "epoch": 0.2543908942000741, "grad_norm": 6.820971965789795, "learning_rate": 8.736908962916764e-06, "loss": 0.6126, "step": 21277 }, { "epoch": 0.25440285034493476, "grad_norm": 2.421541213989258, "learning_rate": 8.736780321906966e-06, "loss": 0.694, "step": 21278 }, { "epoch": 0.2544148064897954, "grad_norm": 2.0989208221435547, "learning_rate": 8.736651675293854e-06, "loss": 0.5686, "step": 21279 }, { "epoch": 0.2544267626346561, "grad_norm": 1.6951589584350586, "learning_rate": 8.736523023077619e-06, "loss": 0.5783, "step": 21280 }, { "epoch": 0.25443871877951674, "grad_norm": 2.13175630569458, "learning_rate": 8.736394365258454e-06, "loss": 0.6281, "step": 21281 }, { "epoch": 0.2544506749243774, "grad_norm": 2.0799057483673096, "learning_rate": 8.736265701836552e-06, "loss": 0.5853, "step": 21282 }, { "epoch": 0.25446263106923805, "grad_norm": 3.9062883853912354, "learning_rate": 8.736137032812107e-06, "loss": 0.5913, "step": 21283 }, { "epoch": 0.2544745872140987, "grad_norm": 2.0093331336975098, "learning_rate": 8.736008358185311e-06, "loss": 0.5735, "step": 21284 }, { "epoch": 0.2544865433589593, "grad_norm": 5.372435569763184, "learning_rate": 8.735879677956358e-06, "loss": 0.5703, "step": 21285 }, { "epoch": 0.25449849950382, "grad_norm": 3.0898385047912598, "learning_rate": 8.735750992125441e-06, "loss": 0.6496, "step": 21286 }, { "epoch": 0.25451045564868063, "grad_norm": 1.641296148300171, "learning_rate": 8.735622300692749e-06, "loss": 0.5454, "step": 21287 }, { "epoch": 0.2545224117935413, "grad_norm": 1.9053844213485718, "learning_rate": 8.735493603658481e-06, "loss": 0.6248, "step": 21288 }, { "epoch": 0.25453436793840195, "grad_norm": 1.937704086303711, "learning_rate": 8.735364901022825e-06, "loss": 0.6146, "step": 21289 }, { "epoch": 0.2545463240832626, "grad_norm": 1.5330668687820435, "learning_rate": 8.735236192785977e-06, "loss": 0.5792, "step": 21290 }, { "epoch": 0.25455828022812327, "grad_norm": 2.107602596282959, "learning_rate": 8.735107478948129e-06, "loss": 0.5703, "step": 21291 }, { "epoch": 0.25457023637298387, "grad_norm": 2.2071738243103027, "learning_rate": 8.734978759509473e-06, "loss": 0.669, "step": 21292 }, { "epoch": 0.25458219251784453, "grad_norm": 2.73203444480896, "learning_rate": 8.734850034470204e-06, "loss": 0.5294, "step": 21293 }, { "epoch": 0.2545941486627052, "grad_norm": 2.066592216491699, "learning_rate": 8.734721303830514e-06, "loss": 0.6662, "step": 21294 }, { "epoch": 0.25460610480756585, "grad_norm": 2.161214828491211, "learning_rate": 8.734592567590595e-06, "loss": 0.5078, "step": 21295 }, { "epoch": 0.2546180609524265, "grad_norm": 1.5891233682632446, "learning_rate": 8.734463825750643e-06, "loss": 0.5933, "step": 21296 }, { "epoch": 0.25463001709728716, "grad_norm": 1.685235857963562, "learning_rate": 8.73433507831085e-06, "loss": 0.7145, "step": 21297 }, { "epoch": 0.2546419732421478, "grad_norm": 4.107487201690674, "learning_rate": 8.734206325271406e-06, "loss": 0.6204, "step": 21298 }, { "epoch": 0.2546539293870084, "grad_norm": 2.098128080368042, "learning_rate": 8.734077566632508e-06, "loss": 0.6077, "step": 21299 }, { "epoch": 0.2546658855318691, "grad_norm": 2.7105207443237305, "learning_rate": 8.733948802394346e-06, "loss": 0.5162, "step": 21300 }, { "epoch": 0.25467784167672974, "grad_norm": 2.3652961254119873, "learning_rate": 8.733820032557115e-06, "loss": 0.6399, "step": 21301 }, { "epoch": 0.2546897978215904, "grad_norm": 5.163214206695557, "learning_rate": 8.733691257121008e-06, "loss": 0.5266, "step": 21302 }, { "epoch": 0.25470175396645106, "grad_norm": 2.713984966278076, "learning_rate": 8.733562476086216e-06, "loss": 0.73, "step": 21303 }, { "epoch": 0.2547137101113117, "grad_norm": 3.826900005340576, "learning_rate": 8.733433689452937e-06, "loss": 0.6352, "step": 21304 }, { "epoch": 0.2547256662561724, "grad_norm": 2.2701029777526855, "learning_rate": 8.733304897221358e-06, "loss": 0.6337, "step": 21305 }, { "epoch": 0.25473762240103304, "grad_norm": 2.195535659790039, "learning_rate": 8.733176099391677e-06, "loss": 0.5909, "step": 21306 }, { "epoch": 0.25474957854589364, "grad_norm": 2.0984888076782227, "learning_rate": 8.733047295964084e-06, "loss": 0.7261, "step": 21307 }, { "epoch": 0.2547615346907543, "grad_norm": 2.6550934314727783, "learning_rate": 8.732918486938774e-06, "loss": 0.6127, "step": 21308 }, { "epoch": 0.25477349083561496, "grad_norm": 12.046574592590332, "learning_rate": 8.73278967231594e-06, "loss": 0.6278, "step": 21309 }, { "epoch": 0.2547854469804756, "grad_norm": 1.4190669059753418, "learning_rate": 8.732660852095772e-06, "loss": 0.58, "step": 21310 }, { "epoch": 0.2547974031253363, "grad_norm": 2.1906800270080566, "learning_rate": 8.732532026278468e-06, "loss": 0.6362, "step": 21311 }, { "epoch": 0.25480935927019693, "grad_norm": 1.9298937320709229, "learning_rate": 8.732403194864218e-06, "loss": 0.5448, "step": 21312 }, { "epoch": 0.2548213154150576, "grad_norm": 2.8771066665649414, "learning_rate": 8.732274357853217e-06, "loss": 0.6712, "step": 21313 }, { "epoch": 0.2548332715599182, "grad_norm": 2.1656415462493896, "learning_rate": 8.732145515245658e-06, "loss": 0.5749, "step": 21314 }, { "epoch": 0.25484522770477885, "grad_norm": 1.7019479274749756, "learning_rate": 8.732016667041732e-06, "loss": 0.569, "step": 21315 }, { "epoch": 0.2548571838496395, "grad_norm": 2.112330198287964, "learning_rate": 8.731887813241635e-06, "loss": 0.5938, "step": 21316 }, { "epoch": 0.25486913999450017, "grad_norm": 3.556011199951172, "learning_rate": 8.731758953845557e-06, "loss": 0.6316, "step": 21317 }, { "epoch": 0.25488109613936083, "grad_norm": 4.73148775100708, "learning_rate": 8.731630088853695e-06, "loss": 0.5975, "step": 21318 }, { "epoch": 0.2548930522842215, "grad_norm": 3.3526175022125244, "learning_rate": 8.731501218266239e-06, "loss": 0.6236, "step": 21319 }, { "epoch": 0.25490500842908215, "grad_norm": 3.0608479976654053, "learning_rate": 8.731372342083384e-06, "loss": 0.63, "step": 21320 }, { "epoch": 0.25491696457394275, "grad_norm": 2.5368685722351074, "learning_rate": 8.731243460305323e-06, "loss": 0.6547, "step": 21321 }, { "epoch": 0.2549289207188034, "grad_norm": 2.2651960849761963, "learning_rate": 8.73111457293225e-06, "loss": 0.559, "step": 21322 }, { "epoch": 0.25494087686366407, "grad_norm": 2.265429735183716, "learning_rate": 8.730985679964357e-06, "loss": 0.6121, "step": 21323 }, { "epoch": 0.2549528330085247, "grad_norm": 2.9970695972442627, "learning_rate": 8.730856781401837e-06, "loss": 0.6593, "step": 21324 }, { "epoch": 0.2549647891533854, "grad_norm": 2.666855573654175, "learning_rate": 8.730727877244886e-06, "loss": 0.7083, "step": 21325 }, { "epoch": 0.25497674529824604, "grad_norm": 3.1791417598724365, "learning_rate": 8.730598967493692e-06, "loss": 0.6078, "step": 21326 }, { "epoch": 0.2549887014431067, "grad_norm": 2.020904064178467, "learning_rate": 8.730470052148456e-06, "loss": 0.5028, "step": 21327 }, { "epoch": 0.25500065758796736, "grad_norm": 2.587697744369507, "learning_rate": 8.730341131209363e-06, "loss": 0.6182, "step": 21328 }, { "epoch": 0.25501261373282796, "grad_norm": 2.893136978149414, "learning_rate": 8.730212204676612e-06, "loss": 0.4827, "step": 21329 }, { "epoch": 0.2550245698776886, "grad_norm": 3.808072805404663, "learning_rate": 8.730083272550394e-06, "loss": 0.5701, "step": 21330 }, { "epoch": 0.2550365260225493, "grad_norm": 2.9143667221069336, "learning_rate": 8.729954334830903e-06, "loss": 0.6358, "step": 21331 }, { "epoch": 0.25504848216740994, "grad_norm": 2.286848545074463, "learning_rate": 8.729825391518333e-06, "loss": 0.7007, "step": 21332 }, { "epoch": 0.2550604383122706, "grad_norm": 11.442540168762207, "learning_rate": 8.729696442612877e-06, "loss": 0.6222, "step": 21333 }, { "epoch": 0.25507239445713126, "grad_norm": 1.5390524864196777, "learning_rate": 8.729567488114725e-06, "loss": 0.5414, "step": 21334 }, { "epoch": 0.2550843506019919, "grad_norm": 1.8770920038223267, "learning_rate": 8.729438528024075e-06, "loss": 0.6108, "step": 21335 }, { "epoch": 0.2550963067468525, "grad_norm": 2.9756906032562256, "learning_rate": 8.729309562341119e-06, "loss": 0.5887, "step": 21336 }, { "epoch": 0.2551082628917132, "grad_norm": 1.7376967668533325, "learning_rate": 8.72918059106605e-06, "loss": 0.4907, "step": 21337 }, { "epoch": 0.25512021903657384, "grad_norm": 2.02388072013855, "learning_rate": 8.72905161419906e-06, "loss": 0.6189, "step": 21338 }, { "epoch": 0.2551321751814345, "grad_norm": 2.688835859298706, "learning_rate": 8.728922631740346e-06, "loss": 0.6266, "step": 21339 }, { "epoch": 0.25514413132629515, "grad_norm": 3.160858392715454, "learning_rate": 8.728793643690099e-06, "loss": 0.6556, "step": 21340 }, { "epoch": 0.2551560874711558, "grad_norm": 1.9530848264694214, "learning_rate": 8.728664650048512e-06, "loss": 0.5378, "step": 21341 }, { "epoch": 0.25516804361601647, "grad_norm": 2.9481141567230225, "learning_rate": 8.72853565081578e-06, "loss": 0.6209, "step": 21342 }, { "epoch": 0.25517999976087713, "grad_norm": 2.3656015396118164, "learning_rate": 8.728406645992094e-06, "loss": 0.6434, "step": 21343 }, { "epoch": 0.25519195590573773, "grad_norm": 1.5271031856536865, "learning_rate": 8.72827763557765e-06, "loss": 0.664, "step": 21344 }, { "epoch": 0.2552039120505984, "grad_norm": 3.0612528324127197, "learning_rate": 8.728148619572639e-06, "loss": 0.6628, "step": 21345 }, { "epoch": 0.25521586819545905, "grad_norm": 2.4456772804260254, "learning_rate": 8.728019597977258e-06, "loss": 0.5854, "step": 21346 }, { "epoch": 0.2552278243403197, "grad_norm": 2.5967698097229004, "learning_rate": 8.727890570791696e-06, "loss": 0.6004, "step": 21347 }, { "epoch": 0.25523978048518037, "grad_norm": 1.8160263299942017, "learning_rate": 8.727761538016151e-06, "loss": 0.5979, "step": 21348 }, { "epoch": 0.255251736630041, "grad_norm": 3.2899227142333984, "learning_rate": 8.727632499650814e-06, "loss": 0.4889, "step": 21349 }, { "epoch": 0.2552636927749017, "grad_norm": 2.858785390853882, "learning_rate": 8.727503455695878e-06, "loss": 0.6742, "step": 21350 }, { "epoch": 0.2552756489197623, "grad_norm": 3.7898595333099365, "learning_rate": 8.727374406151537e-06, "loss": 0.6752, "step": 21351 }, { "epoch": 0.25528760506462295, "grad_norm": 1.9131598472595215, "learning_rate": 8.727245351017985e-06, "loss": 0.4884, "step": 21352 }, { "epoch": 0.2552995612094836, "grad_norm": 3.1094563007354736, "learning_rate": 8.727116290295417e-06, "loss": 0.6416, "step": 21353 }, { "epoch": 0.25531151735434426, "grad_norm": 1.5306992530822754, "learning_rate": 8.726987223984023e-06, "loss": 0.585, "step": 21354 }, { "epoch": 0.2553234734992049, "grad_norm": 2.58044171333313, "learning_rate": 8.726858152084e-06, "loss": 0.5399, "step": 21355 }, { "epoch": 0.2553354296440656, "grad_norm": 3.7028703689575195, "learning_rate": 8.726729074595539e-06, "loss": 0.7297, "step": 21356 }, { "epoch": 0.25534738578892624, "grad_norm": 17.340932846069336, "learning_rate": 8.726599991518834e-06, "loss": 0.6605, "step": 21357 }, { "epoch": 0.25535934193378684, "grad_norm": 1.8352978229522705, "learning_rate": 8.726470902854078e-06, "loss": 0.6376, "step": 21358 }, { "epoch": 0.2553712980786475, "grad_norm": 1.4498950242996216, "learning_rate": 8.726341808601469e-06, "loss": 0.5208, "step": 21359 }, { "epoch": 0.25538325422350816, "grad_norm": 4.365004539489746, "learning_rate": 8.726212708761194e-06, "loss": 0.6525, "step": 21360 }, { "epoch": 0.2553952103683688, "grad_norm": 2.173308849334717, "learning_rate": 8.726083603333453e-06, "loss": 0.6256, "step": 21361 }, { "epoch": 0.2554071665132295, "grad_norm": 3.9485104084014893, "learning_rate": 8.725954492318434e-06, "loss": 0.5653, "step": 21362 }, { "epoch": 0.25541912265809014, "grad_norm": 2.5986711978912354, "learning_rate": 8.725825375716333e-06, "loss": 0.6095, "step": 21363 }, { "epoch": 0.2554310788029508, "grad_norm": 2.0925025939941406, "learning_rate": 8.725696253527345e-06, "loss": 0.6957, "step": 21364 }, { "epoch": 0.25544303494781145, "grad_norm": 6.470468044281006, "learning_rate": 8.725567125751661e-06, "loss": 0.6085, "step": 21365 }, { "epoch": 0.25545499109267206, "grad_norm": 3.3878769874572754, "learning_rate": 8.725437992389477e-06, "loss": 0.6189, "step": 21366 }, { "epoch": 0.2554669472375327, "grad_norm": 1.7580822706222534, "learning_rate": 8.725308853440985e-06, "loss": 0.5807, "step": 21367 }, { "epoch": 0.2554789033823934, "grad_norm": 2.097402572631836, "learning_rate": 8.725179708906378e-06, "loss": 0.6628, "step": 21368 }, { "epoch": 0.25549085952725403, "grad_norm": 1.7858150005340576, "learning_rate": 8.725050558785852e-06, "loss": 0.64, "step": 21369 }, { "epoch": 0.2555028156721147, "grad_norm": 2.661250114440918, "learning_rate": 8.724921403079599e-06, "loss": 0.6216, "step": 21370 }, { "epoch": 0.25551477181697535, "grad_norm": 2.2996065616607666, "learning_rate": 8.724792241787814e-06, "loss": 0.6022, "step": 21371 }, { "epoch": 0.255526727961836, "grad_norm": 1.7168101072311401, "learning_rate": 8.72466307491069e-06, "loss": 0.6198, "step": 21372 }, { "epoch": 0.2555386841066966, "grad_norm": 2.3768327236175537, "learning_rate": 8.724533902448417e-06, "loss": 0.5396, "step": 21373 }, { "epoch": 0.25555064025155727, "grad_norm": 3.6291308403015137, "learning_rate": 8.724404724401196e-06, "loss": 0.5324, "step": 21374 }, { "epoch": 0.25556259639641793, "grad_norm": 2.038548469543457, "learning_rate": 8.724275540769215e-06, "loss": 0.5762, "step": 21375 }, { "epoch": 0.2555745525412786, "grad_norm": 2.5700483322143555, "learning_rate": 8.724146351552672e-06, "loss": 0.5618, "step": 21376 }, { "epoch": 0.25558650868613925, "grad_norm": 2.203089714050293, "learning_rate": 8.724017156751756e-06, "loss": 0.7058, "step": 21377 }, { "epoch": 0.2555984648309999, "grad_norm": 2.839857339859009, "learning_rate": 8.723887956366663e-06, "loss": 0.5071, "step": 21378 }, { "epoch": 0.25561042097586056, "grad_norm": 1.9543004035949707, "learning_rate": 8.723758750397587e-06, "loss": 0.6189, "step": 21379 }, { "epoch": 0.25562237712072117, "grad_norm": 1.908969521522522, "learning_rate": 8.723629538844722e-06, "loss": 0.6086, "step": 21380 }, { "epoch": 0.2556343332655818, "grad_norm": 1.96369469165802, "learning_rate": 8.72350032170826e-06, "loss": 0.6231, "step": 21381 }, { "epoch": 0.2556462894104425, "grad_norm": 2.4474451541900635, "learning_rate": 8.723371098988398e-06, "loss": 0.6306, "step": 21382 }, { "epoch": 0.25565824555530314, "grad_norm": 2.0973589420318604, "learning_rate": 8.723241870685328e-06, "loss": 0.5961, "step": 21383 }, { "epoch": 0.2556702017001638, "grad_norm": 17.389705657958984, "learning_rate": 8.723112636799242e-06, "loss": 0.4998, "step": 21384 }, { "epoch": 0.25568215784502446, "grad_norm": 2.9040474891662598, "learning_rate": 8.722983397330336e-06, "loss": 0.603, "step": 21385 }, { "epoch": 0.2556941139898851, "grad_norm": 2.5623295307159424, "learning_rate": 8.722854152278804e-06, "loss": 0.6275, "step": 21386 }, { "epoch": 0.2557060701347458, "grad_norm": 1.9027283191680908, "learning_rate": 8.722724901644837e-06, "loss": 0.5942, "step": 21387 }, { "epoch": 0.2557180262796064, "grad_norm": 3.5682647228240967, "learning_rate": 8.722595645428631e-06, "loss": 0.5868, "step": 21388 }, { "epoch": 0.25572998242446704, "grad_norm": 1.7077224254608154, "learning_rate": 8.722466383630382e-06, "loss": 0.601, "step": 21389 }, { "epoch": 0.2557419385693277, "grad_norm": 3.829253911972046, "learning_rate": 8.722337116250279e-06, "loss": 0.641, "step": 21390 }, { "epoch": 0.25575389471418836, "grad_norm": 8.042061805725098, "learning_rate": 8.72220784328852e-06, "loss": 0.7425, "step": 21391 }, { "epoch": 0.255765850859049, "grad_norm": 2.6498465538024902, "learning_rate": 8.722078564745296e-06, "loss": 0.6338, "step": 21392 }, { "epoch": 0.2557778070039097, "grad_norm": 4.139626502990723, "learning_rate": 8.721949280620802e-06, "loss": 0.6151, "step": 21393 }, { "epoch": 0.25578976314877033, "grad_norm": 2.006355047225952, "learning_rate": 8.721819990915231e-06, "loss": 0.6238, "step": 21394 }, { "epoch": 0.25580171929363094, "grad_norm": 4.772996425628662, "learning_rate": 8.72169069562878e-06, "loss": 0.6893, "step": 21395 }, { "epoch": 0.2558136754384916, "grad_norm": 2.2661826610565186, "learning_rate": 8.721561394761638e-06, "loss": 0.6579, "step": 21396 }, { "epoch": 0.25582563158335225, "grad_norm": 1.9294512271881104, "learning_rate": 8.721432088314003e-06, "loss": 0.6422, "step": 21397 }, { "epoch": 0.2558375877282129, "grad_norm": 1.800855278968811, "learning_rate": 8.721302776286066e-06, "loss": 0.5685, "step": 21398 }, { "epoch": 0.25584954387307357, "grad_norm": 1.84215247631073, "learning_rate": 8.721173458678026e-06, "loss": 0.6562, "step": 21399 }, { "epoch": 0.25586150001793423, "grad_norm": 3.171755790710449, "learning_rate": 8.72104413549007e-06, "loss": 0.6261, "step": 21400 }, { "epoch": 0.2558734561627949, "grad_norm": 1.6995160579681396, "learning_rate": 8.720914806722393e-06, "loss": 0.6645, "step": 21401 }, { "epoch": 0.25588541230765555, "grad_norm": 2.0336952209472656, "learning_rate": 8.720785472375194e-06, "loss": 0.6087, "step": 21402 }, { "epoch": 0.25589736845251615, "grad_norm": 5.367598533630371, "learning_rate": 8.720656132448664e-06, "loss": 0.5412, "step": 21403 }, { "epoch": 0.2559093245973768, "grad_norm": 1.3690203428268433, "learning_rate": 8.720526786942997e-06, "loss": 0.5679, "step": 21404 }, { "epoch": 0.25592128074223747, "grad_norm": 1.7110741138458252, "learning_rate": 8.720397435858385e-06, "loss": 0.6418, "step": 21405 }, { "epoch": 0.2559332368870981, "grad_norm": 3.7484781742095947, "learning_rate": 8.720268079195027e-06, "loss": 0.6316, "step": 21406 }, { "epoch": 0.2559451930319588, "grad_norm": 2.4784092903137207, "learning_rate": 8.720138716953112e-06, "loss": 0.547, "step": 21407 }, { "epoch": 0.25595714917681944, "grad_norm": 6.012615203857422, "learning_rate": 8.720009349132835e-06, "loss": 0.6957, "step": 21408 }, { "epoch": 0.2559691053216801, "grad_norm": 1.9459290504455566, "learning_rate": 8.719879975734391e-06, "loss": 0.585, "step": 21409 }, { "epoch": 0.2559810614665407, "grad_norm": 2.2145371437072754, "learning_rate": 8.719750596757975e-06, "loss": 0.6216, "step": 21410 }, { "epoch": 0.25599301761140136, "grad_norm": 1.6388360261917114, "learning_rate": 8.719621212203779e-06, "loss": 0.4585, "step": 21411 }, { "epoch": 0.256004973756262, "grad_norm": 3.635622501373291, "learning_rate": 8.719491822071996e-06, "loss": 0.5826, "step": 21412 }, { "epoch": 0.2560169299011227, "grad_norm": 2.3995296955108643, "learning_rate": 8.719362426362825e-06, "loss": 0.5591, "step": 21413 }, { "epoch": 0.25602888604598334, "grad_norm": 2.0617799758911133, "learning_rate": 8.719233025076456e-06, "loss": 0.5404, "step": 21414 }, { "epoch": 0.256040842190844, "grad_norm": 2.5243821144104004, "learning_rate": 8.719103618213082e-06, "loss": 0.696, "step": 21415 }, { "epoch": 0.25605279833570466, "grad_norm": 1.643008828163147, "learning_rate": 8.718974205772902e-06, "loss": 0.5835, "step": 21416 }, { "epoch": 0.25606475448056526, "grad_norm": 2.3297271728515625, "learning_rate": 8.718844787756105e-06, "loss": 0.6681, "step": 21417 }, { "epoch": 0.2560767106254259, "grad_norm": 3.548765182495117, "learning_rate": 8.718715364162886e-06, "loss": 0.6079, "step": 21418 }, { "epoch": 0.2560886667702866, "grad_norm": 3.3993186950683594, "learning_rate": 8.718585934993442e-06, "loss": 0.6482, "step": 21419 }, { "epoch": 0.25610062291514724, "grad_norm": 1.9955651760101318, "learning_rate": 8.718456500247964e-06, "loss": 0.5189, "step": 21420 }, { "epoch": 0.2561125790600079, "grad_norm": 1.9910616874694824, "learning_rate": 8.718327059926647e-06, "loss": 0.548, "step": 21421 }, { "epoch": 0.25612453520486855, "grad_norm": 1.9168370962142944, "learning_rate": 8.718197614029687e-06, "loss": 0.5784, "step": 21422 }, { "epoch": 0.2561364913497292, "grad_norm": 5.269309997558594, "learning_rate": 8.718068162557276e-06, "loss": 0.8129, "step": 21423 }, { "epoch": 0.25614844749458987, "grad_norm": 2.5434305667877197, "learning_rate": 8.717938705509607e-06, "loss": 0.5926, "step": 21424 }, { "epoch": 0.2561604036394505, "grad_norm": 1.756169080734253, "learning_rate": 8.717809242886877e-06, "loss": 0.5175, "step": 21425 }, { "epoch": 0.25617235978431113, "grad_norm": 4.350037097930908, "learning_rate": 8.717679774689278e-06, "loss": 0.5671, "step": 21426 }, { "epoch": 0.2561843159291718, "grad_norm": 1.7623021602630615, "learning_rate": 8.717550300917005e-06, "loss": 0.5899, "step": 21427 }, { "epoch": 0.25619627207403245, "grad_norm": 1.8025786876678467, "learning_rate": 8.717420821570253e-06, "loss": 0.5518, "step": 21428 }, { "epoch": 0.2562082282188931, "grad_norm": 1.9027044773101807, "learning_rate": 8.717291336649215e-06, "loss": 0.7298, "step": 21429 }, { "epoch": 0.25622018436375377, "grad_norm": 2.3402369022369385, "learning_rate": 8.717161846154085e-06, "loss": 0.6554, "step": 21430 }, { "epoch": 0.2562321405086144, "grad_norm": 6.635422229766846, "learning_rate": 8.717032350085059e-06, "loss": 0.5856, "step": 21431 }, { "epoch": 0.25624409665347503, "grad_norm": 2.6247096061706543, "learning_rate": 8.716902848442329e-06, "loss": 0.6491, "step": 21432 }, { "epoch": 0.2562560527983357, "grad_norm": 2.6982240676879883, "learning_rate": 8.716773341226088e-06, "loss": 0.6419, "step": 21433 }, { "epoch": 0.25626800894319635, "grad_norm": 4.659365177154541, "learning_rate": 8.716643828436535e-06, "loss": 0.5948, "step": 21434 }, { "epoch": 0.256279965088057, "grad_norm": 1.3365263938903809, "learning_rate": 8.71651431007386e-06, "loss": 0.6019, "step": 21435 }, { "epoch": 0.25629192123291766, "grad_norm": 2.402738094329834, "learning_rate": 8.71638478613826e-06, "loss": 0.6967, "step": 21436 }, { "epoch": 0.2563038773777783, "grad_norm": 1.6668634414672852, "learning_rate": 8.716255256629924e-06, "loss": 0.64, "step": 21437 }, { "epoch": 0.256315833522639, "grad_norm": 2.3296897411346436, "learning_rate": 8.716125721549054e-06, "loss": 0.7118, "step": 21438 }, { "epoch": 0.2563277896674996, "grad_norm": 2.39908766746521, "learning_rate": 8.715996180895837e-06, "loss": 0.5921, "step": 21439 }, { "epoch": 0.25633974581236024, "grad_norm": 7.01820182800293, "learning_rate": 8.715866634670474e-06, "loss": 0.5193, "step": 21440 }, { "epoch": 0.2563517019572209, "grad_norm": 2.399653434753418, "learning_rate": 8.715737082873154e-06, "loss": 0.5922, "step": 21441 }, { "epoch": 0.25636365810208156, "grad_norm": 2.372420310974121, "learning_rate": 8.715607525504073e-06, "loss": 0.6277, "step": 21442 }, { "epoch": 0.2563756142469422, "grad_norm": 2.1772537231445312, "learning_rate": 8.715477962563425e-06, "loss": 0.5889, "step": 21443 }, { "epoch": 0.2563875703918029, "grad_norm": 1.8169691562652588, "learning_rate": 8.715348394051407e-06, "loss": 0.5648, "step": 21444 }, { "epoch": 0.25639952653666354, "grad_norm": 2.313807487487793, "learning_rate": 8.715218819968207e-06, "loss": 0.6047, "step": 21445 }, { "epoch": 0.2564114826815242, "grad_norm": 3.5661065578460693, "learning_rate": 8.715089240314027e-06, "loss": 0.5803, "step": 21446 }, { "epoch": 0.2564234388263848, "grad_norm": 2.8021059036254883, "learning_rate": 8.714959655089054e-06, "loss": 0.746, "step": 21447 }, { "epoch": 0.25643539497124546, "grad_norm": 4.1637797355651855, "learning_rate": 8.714830064293487e-06, "loss": 0.5754, "step": 21448 }, { "epoch": 0.2564473511161061, "grad_norm": 1.607763409614563, "learning_rate": 8.71470046792752e-06, "loss": 0.6176, "step": 21449 }, { "epoch": 0.2564593072609668, "grad_norm": 6.4173583984375, "learning_rate": 8.714570865991348e-06, "loss": 0.6305, "step": 21450 }, { "epoch": 0.25647126340582743, "grad_norm": 2.9931178092956543, "learning_rate": 8.71444125848516e-06, "loss": 0.6533, "step": 21451 }, { "epoch": 0.2564832195506881, "grad_norm": 2.629349946975708, "learning_rate": 8.714311645409157e-06, "loss": 0.6186, "step": 21452 }, { "epoch": 0.25649517569554875, "grad_norm": 2.555304527282715, "learning_rate": 8.714182026763528e-06, "loss": 0.5724, "step": 21453 }, { "epoch": 0.25650713184040935, "grad_norm": 1.8906563520431519, "learning_rate": 8.714052402548472e-06, "loss": 0.5406, "step": 21454 }, { "epoch": 0.25651908798527, "grad_norm": 2.140713930130005, "learning_rate": 8.71392277276418e-06, "loss": 0.639, "step": 21455 }, { "epoch": 0.25653104413013067, "grad_norm": 2.178297758102417, "learning_rate": 8.713793137410849e-06, "loss": 0.6999, "step": 21456 }, { "epoch": 0.25654300027499133, "grad_norm": 18.276655197143555, "learning_rate": 8.713663496488672e-06, "loss": 0.5373, "step": 21457 }, { "epoch": 0.256554956419852, "grad_norm": 4.647797107696533, "learning_rate": 8.713533849997841e-06, "loss": 0.6682, "step": 21458 }, { "epoch": 0.25656691256471265, "grad_norm": 5.892979621887207, "learning_rate": 8.713404197938555e-06, "loss": 0.5597, "step": 21459 }, { "epoch": 0.2565788687095733, "grad_norm": 5.493052005767822, "learning_rate": 8.713274540311005e-06, "loss": 0.6005, "step": 21460 }, { "epoch": 0.25659082485443396, "grad_norm": 3.5162060260772705, "learning_rate": 8.713144877115387e-06, "loss": 0.6097, "step": 21461 }, { "epoch": 0.25660278099929457, "grad_norm": 2.607862710952759, "learning_rate": 8.713015208351894e-06, "loss": 0.563, "step": 21462 }, { "epoch": 0.2566147371441552, "grad_norm": 2.0651018619537354, "learning_rate": 8.712885534020724e-06, "loss": 0.6428, "step": 21463 }, { "epoch": 0.2566266932890159, "grad_norm": 7.055947780609131, "learning_rate": 8.712755854122067e-06, "loss": 0.7078, "step": 21464 }, { "epoch": 0.25663864943387654, "grad_norm": 3.2298269271850586, "learning_rate": 8.71262616865612e-06, "loss": 0.5934, "step": 21465 }, { "epoch": 0.2566506055787372, "grad_norm": 2.665238618850708, "learning_rate": 8.712496477623077e-06, "loss": 0.6259, "step": 21466 }, { "epoch": 0.25666256172359786, "grad_norm": 2.175975799560547, "learning_rate": 8.712366781023131e-06, "loss": 0.6372, "step": 21467 }, { "epoch": 0.2566745178684585, "grad_norm": 4.500019550323486, "learning_rate": 8.712237078856481e-06, "loss": 0.6457, "step": 21468 }, { "epoch": 0.2566864740133191, "grad_norm": 1.9515029191970825, "learning_rate": 8.712107371123315e-06, "loss": 0.5185, "step": 21469 }, { "epoch": 0.2566984301581798, "grad_norm": 9.162092208862305, "learning_rate": 8.711977657823831e-06, "loss": 0.6227, "step": 21470 }, { "epoch": 0.25671038630304044, "grad_norm": 5.801311492919922, "learning_rate": 8.711847938958225e-06, "loss": 0.6133, "step": 21471 }, { "epoch": 0.2567223424479011, "grad_norm": 2.8610305786132812, "learning_rate": 8.71171821452669e-06, "loss": 0.5864, "step": 21472 }, { "epoch": 0.25673429859276176, "grad_norm": 2.575901985168457, "learning_rate": 8.71158848452942e-06, "loss": 0.564, "step": 21473 }, { "epoch": 0.2567462547376224, "grad_norm": 6.239470481872559, "learning_rate": 8.711458748966608e-06, "loss": 0.5761, "step": 21474 }, { "epoch": 0.2567582108824831, "grad_norm": 4.076982498168945, "learning_rate": 8.711329007838453e-06, "loss": 0.5508, "step": 21475 }, { "epoch": 0.2567701670273437, "grad_norm": 2.322568893432617, "learning_rate": 8.711199261145144e-06, "loss": 0.6187, "step": 21476 }, { "epoch": 0.25678212317220434, "grad_norm": 2.650278091430664, "learning_rate": 8.711069508886882e-06, "loss": 0.5645, "step": 21477 }, { "epoch": 0.256794079317065, "grad_norm": 1.916011929512024, "learning_rate": 8.710939751063857e-06, "loss": 0.5315, "step": 21478 }, { "epoch": 0.25680603546192565, "grad_norm": 2.6368515491485596, "learning_rate": 8.710809987676262e-06, "loss": 0.6316, "step": 21479 }, { "epoch": 0.2568179916067863, "grad_norm": 2.4198362827301025, "learning_rate": 8.710680218724297e-06, "loss": 0.6394, "step": 21480 }, { "epoch": 0.25682994775164697, "grad_norm": 2.792219638824463, "learning_rate": 8.710550444208153e-06, "loss": 0.6097, "step": 21481 }, { "epoch": 0.25684190389650763, "grad_norm": 2.71940016746521, "learning_rate": 8.710420664128026e-06, "loss": 0.654, "step": 21482 }, { "epoch": 0.2568538600413683, "grad_norm": 2.142653703689575, "learning_rate": 8.710290878484109e-06, "loss": 0.6724, "step": 21483 }, { "epoch": 0.2568658161862289, "grad_norm": 3.468559980392456, "learning_rate": 8.710161087276598e-06, "loss": 0.6807, "step": 21484 }, { "epoch": 0.25687777233108955, "grad_norm": 4.619884967803955, "learning_rate": 8.710031290505687e-06, "loss": 0.5875, "step": 21485 }, { "epoch": 0.2568897284759502, "grad_norm": 2.697359800338745, "learning_rate": 8.709901488171573e-06, "loss": 0.5641, "step": 21486 }, { "epoch": 0.25690168462081087, "grad_norm": 1.9672943353652954, "learning_rate": 8.709771680274445e-06, "loss": 0.5784, "step": 21487 }, { "epoch": 0.2569136407656715, "grad_norm": 3.0650978088378906, "learning_rate": 8.709641866814505e-06, "loss": 0.5633, "step": 21488 }, { "epoch": 0.2569255969105322, "grad_norm": 2.365513563156128, "learning_rate": 8.709512047791941e-06, "loss": 0.6154, "step": 21489 }, { "epoch": 0.25693755305539284, "grad_norm": 5.938733100891113, "learning_rate": 8.709382223206952e-06, "loss": 0.5646, "step": 21490 }, { "epoch": 0.25694950920025345, "grad_norm": 4.796708106994629, "learning_rate": 8.70925239305973e-06, "loss": 0.6405, "step": 21491 }, { "epoch": 0.2569614653451141, "grad_norm": 1.9769867658615112, "learning_rate": 8.70912255735047e-06, "loss": 0.6211, "step": 21492 }, { "epoch": 0.25697342148997476, "grad_norm": 2.1626455783843994, "learning_rate": 8.708992716079369e-06, "loss": 0.6342, "step": 21493 }, { "epoch": 0.2569853776348354, "grad_norm": 2.530939817428589, "learning_rate": 8.708862869246622e-06, "loss": 0.6222, "step": 21494 }, { "epoch": 0.2569973337796961, "grad_norm": 2.119096517562866, "learning_rate": 8.70873301685242e-06, "loss": 0.6911, "step": 21495 }, { "epoch": 0.25700928992455674, "grad_norm": 1.826009750366211, "learning_rate": 8.70860315889696e-06, "loss": 0.5351, "step": 21496 }, { "epoch": 0.2570212460694174, "grad_norm": 2.6425647735595703, "learning_rate": 8.708473295380435e-06, "loss": 0.6154, "step": 21497 }, { "epoch": 0.25703320221427806, "grad_norm": 3.1178057193756104, "learning_rate": 8.708343426303043e-06, "loss": 0.5617, "step": 21498 }, { "epoch": 0.25704515835913866, "grad_norm": 2.4280779361724854, "learning_rate": 8.708213551664977e-06, "loss": 0.5488, "step": 21499 }, { "epoch": 0.2570571145039993, "grad_norm": 2.312664031982422, "learning_rate": 8.70808367146643e-06, "loss": 0.7041, "step": 21500 }, { "epoch": 0.25706907064886, "grad_norm": 2.5059235095977783, "learning_rate": 8.7079537857076e-06, "loss": 0.5815, "step": 21501 }, { "epoch": 0.25708102679372064, "grad_norm": 3.405684471130371, "learning_rate": 8.707823894388678e-06, "loss": 0.6907, "step": 21502 }, { "epoch": 0.2570929829385813, "grad_norm": 2.412994623184204, "learning_rate": 8.707693997509864e-06, "loss": 0.4934, "step": 21503 }, { "epoch": 0.25710493908344195, "grad_norm": 2.3406405448913574, "learning_rate": 8.707564095071347e-06, "loss": 0.6371, "step": 21504 }, { "epoch": 0.2571168952283026, "grad_norm": 2.35371994972229, "learning_rate": 8.707434187073325e-06, "loss": 0.5528, "step": 21505 }, { "epoch": 0.2571288513731632, "grad_norm": 1.6149948835372925, "learning_rate": 8.707304273515991e-06, "loss": 0.5969, "step": 21506 }, { "epoch": 0.2571408075180239, "grad_norm": 3.9482038021087646, "learning_rate": 8.707174354399543e-06, "loss": 0.554, "step": 21507 }, { "epoch": 0.25715276366288453, "grad_norm": 4.069968223571777, "learning_rate": 8.707044429724173e-06, "loss": 0.5859, "step": 21508 }, { "epoch": 0.2571647198077452, "grad_norm": 2.3926444053649902, "learning_rate": 8.706914499490079e-06, "loss": 0.662, "step": 21509 }, { "epoch": 0.25717667595260585, "grad_norm": 6.149299621582031, "learning_rate": 8.70678456369745e-06, "loss": 0.6241, "step": 21510 }, { "epoch": 0.2571886320974665, "grad_norm": 10.164670944213867, "learning_rate": 8.706654622346487e-06, "loss": 0.6661, "step": 21511 }, { "epoch": 0.25720058824232717, "grad_norm": 2.6530282497406006, "learning_rate": 8.706524675437382e-06, "loss": 0.6285, "step": 21512 }, { "epoch": 0.25721254438718777, "grad_norm": 4.324448108673096, "learning_rate": 8.706394722970329e-06, "loss": 0.6502, "step": 21513 }, { "epoch": 0.25722450053204843, "grad_norm": 2.4964261054992676, "learning_rate": 8.706264764945524e-06, "loss": 0.6348, "step": 21514 }, { "epoch": 0.2572364566769091, "grad_norm": 3.8199450969696045, "learning_rate": 8.706134801363163e-06, "loss": 0.5634, "step": 21515 }, { "epoch": 0.25724841282176975, "grad_norm": 5.451338291168213, "learning_rate": 8.706004832223439e-06, "loss": 0.5948, "step": 21516 }, { "epoch": 0.2572603689666304, "grad_norm": 1.9601192474365234, "learning_rate": 8.705874857526547e-06, "loss": 0.5896, "step": 21517 }, { "epoch": 0.25727232511149106, "grad_norm": 3.2778444290161133, "learning_rate": 8.705744877272681e-06, "loss": 0.5973, "step": 21518 }, { "epoch": 0.2572842812563517, "grad_norm": 1.7072397470474243, "learning_rate": 8.70561489146204e-06, "loss": 0.5649, "step": 21519 }, { "epoch": 0.2572962374012124, "grad_norm": 2.412891387939453, "learning_rate": 8.705484900094816e-06, "loss": 0.6023, "step": 21520 }, { "epoch": 0.257308193546073, "grad_norm": 1.9027087688446045, "learning_rate": 8.705354903171203e-06, "loss": 0.6491, "step": 21521 }, { "epoch": 0.25732014969093364, "grad_norm": 2.1704416275024414, "learning_rate": 8.705224900691398e-06, "loss": 0.5993, "step": 21522 }, { "epoch": 0.2573321058357943, "grad_norm": 3.7937376499176025, "learning_rate": 8.705094892655595e-06, "loss": 0.5538, "step": 21523 }, { "epoch": 0.25734406198065496, "grad_norm": 5.184283256530762, "learning_rate": 8.704964879063988e-06, "loss": 0.5446, "step": 21524 }, { "epoch": 0.2573560181255156, "grad_norm": 3.4159092903137207, "learning_rate": 8.704834859916776e-06, "loss": 0.5333, "step": 21525 }, { "epoch": 0.2573679742703763, "grad_norm": 7.3729119300842285, "learning_rate": 8.704704835214148e-06, "loss": 0.5948, "step": 21526 }, { "epoch": 0.25737993041523693, "grad_norm": 2.99043869972229, "learning_rate": 8.704574804956303e-06, "loss": 0.5385, "step": 21527 }, { "epoch": 0.25739188656009754, "grad_norm": 2.471442461013794, "learning_rate": 8.704444769143434e-06, "loss": 0.6437, "step": 21528 }, { "epoch": 0.2574038427049582, "grad_norm": 3.0861728191375732, "learning_rate": 8.704314727775739e-06, "loss": 0.6038, "step": 21529 }, { "epoch": 0.25741579884981886, "grad_norm": 2.2957537174224854, "learning_rate": 8.70418468085341e-06, "loss": 0.5469, "step": 21530 }, { "epoch": 0.2574277549946795, "grad_norm": 2.1727709770202637, "learning_rate": 8.704054628376641e-06, "loss": 0.5977, "step": 21531 }, { "epoch": 0.2574397111395402, "grad_norm": 2.1020421981811523, "learning_rate": 8.70392457034563e-06, "loss": 0.6794, "step": 21532 }, { "epoch": 0.25745166728440083, "grad_norm": 2.672360420227051, "learning_rate": 8.703794506760573e-06, "loss": 0.6431, "step": 21533 }, { "epoch": 0.2574636234292615, "grad_norm": 1.7125625610351562, "learning_rate": 8.70366443762166e-06, "loss": 0.6271, "step": 21534 }, { "epoch": 0.2574755795741221, "grad_norm": 2.192131519317627, "learning_rate": 8.703534362929091e-06, "loss": 0.5996, "step": 21535 }, { "epoch": 0.25748753571898275, "grad_norm": 2.152113437652588, "learning_rate": 8.70340428268306e-06, "loss": 0.6319, "step": 21536 }, { "epoch": 0.2574994918638434, "grad_norm": 3.134577512741089, "learning_rate": 8.703274196883758e-06, "loss": 0.6231, "step": 21537 }, { "epoch": 0.25751144800870407, "grad_norm": 5.5637946128845215, "learning_rate": 8.703144105531384e-06, "loss": 0.6663, "step": 21538 }, { "epoch": 0.25752340415356473, "grad_norm": 7.364226818084717, "learning_rate": 8.703014008626134e-06, "loss": 0.6218, "step": 21539 }, { "epoch": 0.2575353602984254, "grad_norm": 2.730903387069702, "learning_rate": 8.7028839061682e-06, "loss": 0.7063, "step": 21540 }, { "epoch": 0.25754731644328605, "grad_norm": 2.261104106903076, "learning_rate": 8.70275379815778e-06, "loss": 0.6591, "step": 21541 }, { "epoch": 0.2575592725881467, "grad_norm": 2.8128716945648193, "learning_rate": 8.702623684595064e-06, "loss": 0.6, "step": 21542 }, { "epoch": 0.2575712287330073, "grad_norm": 5.028329849243164, "learning_rate": 8.702493565480254e-06, "loss": 0.6475, "step": 21543 }, { "epoch": 0.25758318487786797, "grad_norm": 1.9927133321762085, "learning_rate": 8.702363440813539e-06, "loss": 0.5332, "step": 21544 }, { "epoch": 0.2575951410227286, "grad_norm": 3.2533063888549805, "learning_rate": 8.70223331059512e-06, "loss": 0.5892, "step": 21545 }, { "epoch": 0.2576070971675893, "grad_norm": 4.5652265548706055, "learning_rate": 8.702103174825187e-06, "loss": 0.5818, "step": 21546 }, { "epoch": 0.25761905331244994, "grad_norm": 3.0373730659484863, "learning_rate": 8.701973033503938e-06, "loss": 0.5891, "step": 21547 }, { "epoch": 0.2576310094573106, "grad_norm": 85.21595764160156, "learning_rate": 8.701842886631567e-06, "loss": 0.5779, "step": 21548 }, { "epoch": 0.25764296560217126, "grad_norm": 3.653595447540283, "learning_rate": 8.701712734208268e-06, "loss": 0.6288, "step": 21549 }, { "epoch": 0.25765492174703186, "grad_norm": 2.916826009750366, "learning_rate": 8.70158257623424e-06, "loss": 0.5169, "step": 21550 }, { "epoch": 0.2576668778918925, "grad_norm": 4.722535610198975, "learning_rate": 8.701452412709675e-06, "loss": 0.6728, "step": 21551 }, { "epoch": 0.2576788340367532, "grad_norm": 3.1243999004364014, "learning_rate": 8.701322243634768e-06, "loss": 0.6255, "step": 21552 }, { "epoch": 0.25769079018161384, "grad_norm": 3.18986177444458, "learning_rate": 8.701192069009714e-06, "loss": 0.6478, "step": 21553 }, { "epoch": 0.2577027463264745, "grad_norm": 2.3657453060150146, "learning_rate": 8.701061888834711e-06, "loss": 0.5951, "step": 21554 }, { "epoch": 0.25771470247133516, "grad_norm": 1.9519546031951904, "learning_rate": 8.700931703109954e-06, "loss": 0.5466, "step": 21555 }, { "epoch": 0.2577266586161958, "grad_norm": 3.2394773960113525, "learning_rate": 8.700801511835634e-06, "loss": 0.6001, "step": 21556 }, { "epoch": 0.2577386147610565, "grad_norm": 2.1966893672943115, "learning_rate": 8.700671315011953e-06, "loss": 0.6676, "step": 21557 }, { "epoch": 0.2577505709059171, "grad_norm": 2.4196503162384033, "learning_rate": 8.700541112639098e-06, "loss": 0.5624, "step": 21558 }, { "epoch": 0.25776252705077773, "grad_norm": 2.3719284534454346, "learning_rate": 8.70041090471727e-06, "loss": 0.5494, "step": 21559 }, { "epoch": 0.2577744831956384, "grad_norm": 9.969426155090332, "learning_rate": 8.700280691246664e-06, "loss": 0.545, "step": 21560 }, { "epoch": 0.25778643934049905, "grad_norm": 1.8783893585205078, "learning_rate": 8.700150472227472e-06, "loss": 0.5782, "step": 21561 }, { "epoch": 0.2577983954853597, "grad_norm": 2.5652127265930176, "learning_rate": 8.700020247659892e-06, "loss": 0.5955, "step": 21562 }, { "epoch": 0.25781035163022037, "grad_norm": 6.775190830230713, "learning_rate": 8.699890017544118e-06, "loss": 0.6318, "step": 21563 }, { "epoch": 0.257822307775081, "grad_norm": 3.13703989982605, "learning_rate": 8.699759781880346e-06, "loss": 0.604, "step": 21564 }, { "epoch": 0.25783426391994163, "grad_norm": 4.0823974609375, "learning_rate": 8.699629540668772e-06, "loss": 0.5736, "step": 21565 }, { "epoch": 0.2578462200648023, "grad_norm": 3.518002986907959, "learning_rate": 8.699499293909588e-06, "loss": 0.5971, "step": 21566 }, { "epoch": 0.25785817620966295, "grad_norm": 2.938352108001709, "learning_rate": 8.699369041602994e-06, "loss": 0.6546, "step": 21567 }, { "epoch": 0.2578701323545236, "grad_norm": 4.344358921051025, "learning_rate": 8.699238783749181e-06, "loss": 0.5953, "step": 21568 }, { "epoch": 0.25788208849938427, "grad_norm": 140.44361877441406, "learning_rate": 8.699108520348348e-06, "loss": 0.6374, "step": 21569 }, { "epoch": 0.2578940446442449, "grad_norm": 7.095614910125732, "learning_rate": 8.698978251400689e-06, "loss": 0.5907, "step": 21570 }, { "epoch": 0.2579060007891056, "grad_norm": 5.129184246063232, "learning_rate": 8.698847976906397e-06, "loss": 0.6252, "step": 21571 }, { "epoch": 0.2579179569339662, "grad_norm": 1.8084862232208252, "learning_rate": 8.698717696865671e-06, "loss": 0.555, "step": 21572 }, { "epoch": 0.25792991307882684, "grad_norm": 2.082273244857788, "learning_rate": 8.698587411278705e-06, "loss": 0.6648, "step": 21573 }, { "epoch": 0.2579418692236875, "grad_norm": 2.310120105743408, "learning_rate": 8.698457120145693e-06, "loss": 0.6438, "step": 21574 }, { "epoch": 0.25795382536854816, "grad_norm": 2.0929548740386963, "learning_rate": 8.698326823466831e-06, "loss": 0.6727, "step": 21575 }, { "epoch": 0.2579657815134088, "grad_norm": 2.4579474925994873, "learning_rate": 8.698196521242315e-06, "loss": 0.5607, "step": 21576 }, { "epoch": 0.2579777376582695, "grad_norm": 3.862497568130493, "learning_rate": 8.698066213472341e-06, "loss": 0.5765, "step": 21577 }, { "epoch": 0.25798969380313014, "grad_norm": 1.85841703414917, "learning_rate": 8.697935900157101e-06, "loss": 0.5802, "step": 21578 }, { "epoch": 0.2580016499479908, "grad_norm": 2.6000752449035645, "learning_rate": 8.697805581296796e-06, "loss": 0.6832, "step": 21579 }, { "epoch": 0.2580136060928514, "grad_norm": 2.5555508136749268, "learning_rate": 8.697675256891618e-06, "loss": 0.6362, "step": 21580 }, { "epoch": 0.25802556223771206, "grad_norm": 3.04300856590271, "learning_rate": 8.69754492694176e-06, "loss": 0.6623, "step": 21581 }, { "epoch": 0.2580375183825727, "grad_norm": 3.2993950843811035, "learning_rate": 8.697414591447424e-06, "loss": 0.611, "step": 21582 }, { "epoch": 0.2580494745274334, "grad_norm": 3.8176913261413574, "learning_rate": 8.6972842504088e-06, "loss": 0.6413, "step": 21583 }, { "epoch": 0.25806143067229403, "grad_norm": 3.5577914714813232, "learning_rate": 8.697153903826085e-06, "loss": 0.5264, "step": 21584 }, { "epoch": 0.2580733868171547, "grad_norm": 2.9091200828552246, "learning_rate": 8.697023551699475e-06, "loss": 0.6615, "step": 21585 }, { "epoch": 0.25808534296201535, "grad_norm": 4.095006465911865, "learning_rate": 8.696893194029165e-06, "loss": 0.5367, "step": 21586 }, { "epoch": 0.25809729910687595, "grad_norm": 3.287139892578125, "learning_rate": 8.69676283081535e-06, "loss": 0.5303, "step": 21587 }, { "epoch": 0.2581092552517366, "grad_norm": 6.200963497161865, "learning_rate": 8.696632462058226e-06, "loss": 0.7382, "step": 21588 }, { "epoch": 0.25812121139659727, "grad_norm": 2.5111708641052246, "learning_rate": 8.69650208775799e-06, "loss": 0.5702, "step": 21589 }, { "epoch": 0.25813316754145793, "grad_norm": 2.1956450939178467, "learning_rate": 8.696371707914834e-06, "loss": 0.6705, "step": 21590 }, { "epoch": 0.2581451236863186, "grad_norm": 2.49186110496521, "learning_rate": 8.696241322528956e-06, "loss": 0.6037, "step": 21591 }, { "epoch": 0.25815707983117925, "grad_norm": 1.4320735931396484, "learning_rate": 8.696110931600553e-06, "loss": 0.5737, "step": 21592 }, { "epoch": 0.2581690359760399, "grad_norm": 3.15592360496521, "learning_rate": 8.695980535129815e-06, "loss": 0.6151, "step": 21593 }, { "epoch": 0.2581809921209005, "grad_norm": 2.079090118408203, "learning_rate": 8.695850133116943e-06, "loss": 0.6423, "step": 21594 }, { "epoch": 0.25819294826576117, "grad_norm": 1.5446125268936157, "learning_rate": 8.69571972556213e-06, "loss": 0.6032, "step": 21595 }, { "epoch": 0.2582049044106218, "grad_norm": 4.937659740447998, "learning_rate": 8.695589312465574e-06, "loss": 0.5625, "step": 21596 }, { "epoch": 0.2582168605554825, "grad_norm": 2.322392225265503, "learning_rate": 8.695458893827467e-06, "loss": 0.5087, "step": 21597 }, { "epoch": 0.25822881670034314, "grad_norm": 3.211195468902588, "learning_rate": 8.695328469648005e-06, "loss": 0.6188, "step": 21598 }, { "epoch": 0.2582407728452038, "grad_norm": 1.8917492628097534, "learning_rate": 8.695198039927387e-06, "loss": 0.7049, "step": 21599 }, { "epoch": 0.25825272899006446, "grad_norm": 1.845464825630188, "learning_rate": 8.695067604665805e-06, "loss": 0.5834, "step": 21600 }, { "epoch": 0.2582646851349251, "grad_norm": 1.5479933023452759, "learning_rate": 8.694937163863457e-06, "loss": 0.5426, "step": 21601 }, { "epoch": 0.2582766412797857, "grad_norm": 2.4288203716278076, "learning_rate": 8.694806717520537e-06, "loss": 0.6592, "step": 21602 }, { "epoch": 0.2582885974246464, "grad_norm": 2.8516669273376465, "learning_rate": 8.694676265637243e-06, "loss": 0.6415, "step": 21603 }, { "epoch": 0.25830055356950704, "grad_norm": 2.1105926036834717, "learning_rate": 8.694545808213767e-06, "loss": 0.6638, "step": 21604 }, { "epoch": 0.2583125097143677, "grad_norm": 3.4080028533935547, "learning_rate": 8.694415345250306e-06, "loss": 0.5474, "step": 21605 }, { "epoch": 0.25832446585922836, "grad_norm": 1.782031536102295, "learning_rate": 8.694284876747056e-06, "loss": 0.6078, "step": 21606 }, { "epoch": 0.258336422004089, "grad_norm": 3.0291366577148438, "learning_rate": 8.694154402704215e-06, "loss": 0.7138, "step": 21607 }, { "epoch": 0.2583483781489497, "grad_norm": 2.3669991493225098, "learning_rate": 8.694023923121974e-06, "loss": 0.6685, "step": 21608 }, { "epoch": 0.2583603342938103, "grad_norm": 1.3467248678207397, "learning_rate": 8.693893438000533e-06, "loss": 0.612, "step": 21609 }, { "epoch": 0.25837229043867094, "grad_norm": 4.7629804611206055, "learning_rate": 8.693762947340084e-06, "loss": 0.6047, "step": 21610 }, { "epoch": 0.2583842465835316, "grad_norm": 2.2786622047424316, "learning_rate": 8.693632451140825e-06, "loss": 0.695, "step": 21611 }, { "epoch": 0.25839620272839225, "grad_norm": 2.3047449588775635, "learning_rate": 8.69350194940295e-06, "loss": 0.5901, "step": 21612 }, { "epoch": 0.2584081588732529, "grad_norm": 2.3339664936065674, "learning_rate": 8.693371442126656e-06, "loss": 0.6224, "step": 21613 }, { "epoch": 0.25842011501811357, "grad_norm": 2.3365719318389893, "learning_rate": 8.69324092931214e-06, "loss": 0.6139, "step": 21614 }, { "epoch": 0.25843207116297423, "grad_norm": 3.461535930633545, "learning_rate": 8.693110410959595e-06, "loss": 0.5864, "step": 21615 }, { "epoch": 0.2584440273078349, "grad_norm": 2.452225923538208, "learning_rate": 8.692979887069217e-06, "loss": 0.5669, "step": 21616 }, { "epoch": 0.2584559834526955, "grad_norm": 5.509669780731201, "learning_rate": 8.692849357641205e-06, "loss": 0.7048, "step": 21617 }, { "epoch": 0.25846793959755615, "grad_norm": 2.709310293197632, "learning_rate": 8.692718822675749e-06, "loss": 0.7064, "step": 21618 }, { "epoch": 0.2584798957424168, "grad_norm": 2.2521615028381348, "learning_rate": 8.69258828217305e-06, "loss": 0.5706, "step": 21619 }, { "epoch": 0.25849185188727747, "grad_norm": 1.9591727256774902, "learning_rate": 8.692457736133301e-06, "loss": 0.593, "step": 21620 }, { "epoch": 0.2585038080321381, "grad_norm": 1.831398844718933, "learning_rate": 8.6923271845567e-06, "loss": 0.5133, "step": 21621 }, { "epoch": 0.2585157641769988, "grad_norm": 2.9273953437805176, "learning_rate": 8.69219662744344e-06, "loss": 0.665, "step": 21622 }, { "epoch": 0.25852772032185944, "grad_norm": 2.0387320518493652, "learning_rate": 8.692066064793719e-06, "loss": 0.6177, "step": 21623 }, { "epoch": 0.25853967646672005, "grad_norm": 7.12673807144165, "learning_rate": 8.69193549660773e-06, "loss": 0.6359, "step": 21624 }, { "epoch": 0.2585516326115807, "grad_norm": 5.622238636016846, "learning_rate": 8.691804922885672e-06, "loss": 0.6304, "step": 21625 }, { "epoch": 0.25856358875644136, "grad_norm": 2.8259434700012207, "learning_rate": 8.691674343627739e-06, "loss": 0.5321, "step": 21626 }, { "epoch": 0.258575544901302, "grad_norm": 4.4654035568237305, "learning_rate": 8.691543758834126e-06, "loss": 0.6346, "step": 21627 }, { "epoch": 0.2585875010461627, "grad_norm": 1.5177501440048218, "learning_rate": 8.691413168505031e-06, "loss": 0.5387, "step": 21628 }, { "epoch": 0.25859945719102334, "grad_norm": 1.836199164390564, "learning_rate": 8.691282572640649e-06, "loss": 0.6499, "step": 21629 }, { "epoch": 0.258611413335884, "grad_norm": 2.269758462905884, "learning_rate": 8.691151971241176e-06, "loss": 0.5774, "step": 21630 }, { "epoch": 0.2586233694807446, "grad_norm": 3.073650598526001, "learning_rate": 8.691021364306808e-06, "loss": 0.5879, "step": 21631 }, { "epoch": 0.25863532562560526, "grad_norm": 6.9759602546691895, "learning_rate": 8.69089075183774e-06, "loss": 0.5838, "step": 21632 }, { "epoch": 0.2586472817704659, "grad_norm": 4.60490083694458, "learning_rate": 8.690760133834166e-06, "loss": 0.6316, "step": 21633 }, { "epoch": 0.2586592379153266, "grad_norm": 1.8057222366333008, "learning_rate": 8.690629510296287e-06, "loss": 0.5799, "step": 21634 }, { "epoch": 0.25867119406018724, "grad_norm": 1.8012080192565918, "learning_rate": 8.690498881224295e-06, "loss": 0.5496, "step": 21635 }, { "epoch": 0.2586831502050479, "grad_norm": 3.411590099334717, "learning_rate": 8.690368246618386e-06, "loss": 0.6332, "step": 21636 }, { "epoch": 0.25869510634990855, "grad_norm": 7.598806858062744, "learning_rate": 8.690237606478758e-06, "loss": 0.5749, "step": 21637 }, { "epoch": 0.2587070624947692, "grad_norm": 11.33539867401123, "learning_rate": 8.690106960805604e-06, "loss": 0.6777, "step": 21638 }, { "epoch": 0.2587190186396298, "grad_norm": 1.8429715633392334, "learning_rate": 8.689976309599122e-06, "loss": 0.5661, "step": 21639 }, { "epoch": 0.2587309747844905, "grad_norm": 3.0673723220825195, "learning_rate": 8.689845652859508e-06, "loss": 0.6058, "step": 21640 }, { "epoch": 0.25874293092935113, "grad_norm": 3.1670727729797363, "learning_rate": 8.689714990586957e-06, "loss": 0.6088, "step": 21641 }, { "epoch": 0.2587548870742118, "grad_norm": 2.6257164478302, "learning_rate": 8.689584322781666e-06, "loss": 0.5944, "step": 21642 }, { "epoch": 0.25876684321907245, "grad_norm": 2.011385202407837, "learning_rate": 8.689453649443829e-06, "loss": 0.6086, "step": 21643 }, { "epoch": 0.2587787993639331, "grad_norm": 1.6514666080474854, "learning_rate": 8.689322970573643e-06, "loss": 0.5587, "step": 21644 }, { "epoch": 0.25879075550879377, "grad_norm": 1.9725711345672607, "learning_rate": 8.689192286171306e-06, "loss": 0.515, "step": 21645 }, { "epoch": 0.25880271165365437, "grad_norm": 2.0367586612701416, "learning_rate": 8.68906159623701e-06, "loss": 0.557, "step": 21646 }, { "epoch": 0.25881466779851503, "grad_norm": 1.861948847770691, "learning_rate": 8.688930900770953e-06, "loss": 0.6555, "step": 21647 }, { "epoch": 0.2588266239433757, "grad_norm": 2.822096824645996, "learning_rate": 8.688800199773333e-06, "loss": 0.6161, "step": 21648 }, { "epoch": 0.25883858008823635, "grad_norm": 2.8316009044647217, "learning_rate": 8.688669493244342e-06, "loss": 0.573, "step": 21649 }, { "epoch": 0.258850536233097, "grad_norm": 2.340808391571045, "learning_rate": 8.68853878118418e-06, "loss": 0.5556, "step": 21650 }, { "epoch": 0.25886249237795766, "grad_norm": 26.921703338623047, "learning_rate": 8.68840806359304e-06, "loss": 0.6084, "step": 21651 }, { "epoch": 0.2588744485228183, "grad_norm": 2.414186954498291, "learning_rate": 8.688277340471119e-06, "loss": 0.5668, "step": 21652 }, { "epoch": 0.2588864046676789, "grad_norm": 2.5681984424591064, "learning_rate": 8.688146611818611e-06, "loss": 0.6243, "step": 21653 }, { "epoch": 0.2588983608125396, "grad_norm": 2.7464070320129395, "learning_rate": 8.688015877635716e-06, "loss": 0.5707, "step": 21654 }, { "epoch": 0.25891031695740024, "grad_norm": 2.4576661586761475, "learning_rate": 8.68788513792263e-06, "loss": 0.5496, "step": 21655 }, { "epoch": 0.2589222731022609, "grad_norm": 1.6712950468063354, "learning_rate": 8.687754392679544e-06, "loss": 0.6289, "step": 21656 }, { "epoch": 0.25893422924712156, "grad_norm": 3.7750418186187744, "learning_rate": 8.687623641906657e-06, "loss": 0.5733, "step": 21657 }, { "epoch": 0.2589461853919822, "grad_norm": 3.4816970825195312, "learning_rate": 8.687492885604167e-06, "loss": 0.6057, "step": 21658 }, { "epoch": 0.2589581415368429, "grad_norm": 2.5647058486938477, "learning_rate": 8.687362123772268e-06, "loss": 0.5977, "step": 21659 }, { "epoch": 0.25897009768170354, "grad_norm": 1.8553866147994995, "learning_rate": 8.687231356411156e-06, "loss": 0.627, "step": 21660 }, { "epoch": 0.25898205382656414, "grad_norm": 2.270838975906372, "learning_rate": 8.687100583521028e-06, "loss": 0.5244, "step": 21661 }, { "epoch": 0.2589940099714248, "grad_norm": 2.3736371994018555, "learning_rate": 8.686969805102079e-06, "loss": 0.5823, "step": 21662 }, { "epoch": 0.25900596611628546, "grad_norm": 4.1355133056640625, "learning_rate": 8.686839021154505e-06, "loss": 0.6634, "step": 21663 }, { "epoch": 0.2590179222611461, "grad_norm": 2.3019375801086426, "learning_rate": 8.686708231678505e-06, "loss": 0.6314, "step": 21664 }, { "epoch": 0.2590298784060068, "grad_norm": 1.4268406629562378, "learning_rate": 8.686577436674271e-06, "loss": 0.5683, "step": 21665 }, { "epoch": 0.25904183455086743, "grad_norm": 1.577326774597168, "learning_rate": 8.686446636142e-06, "loss": 0.5826, "step": 21666 }, { "epoch": 0.2590537906957281, "grad_norm": 2.2156405448913574, "learning_rate": 8.686315830081893e-06, "loss": 0.5959, "step": 21667 }, { "epoch": 0.2590657468405887, "grad_norm": 1.988427996635437, "learning_rate": 8.68618501849414e-06, "loss": 0.5289, "step": 21668 }, { "epoch": 0.25907770298544935, "grad_norm": 3.3526175022125244, "learning_rate": 8.68605420137894e-06, "loss": 0.5885, "step": 21669 }, { "epoch": 0.25908965913031, "grad_norm": 3.7452304363250732, "learning_rate": 8.685923378736487e-06, "loss": 0.5073, "step": 21670 }, { "epoch": 0.25910161527517067, "grad_norm": 1.7777196168899536, "learning_rate": 8.685792550566981e-06, "loss": 0.6551, "step": 21671 }, { "epoch": 0.25911357142003133, "grad_norm": 1.9103238582611084, "learning_rate": 8.685661716870615e-06, "loss": 0.5496, "step": 21672 }, { "epoch": 0.259125527564892, "grad_norm": 1.934640884399414, "learning_rate": 8.685530877647585e-06, "loss": 0.6343, "step": 21673 }, { "epoch": 0.25913748370975265, "grad_norm": 3.0887491703033447, "learning_rate": 8.685400032898089e-06, "loss": 0.5464, "step": 21674 }, { "epoch": 0.2591494398546133, "grad_norm": 3.7929275035858154, "learning_rate": 8.685269182622323e-06, "loss": 0.6247, "step": 21675 }, { "epoch": 0.2591613959994739, "grad_norm": 2.236156463623047, "learning_rate": 8.685138326820483e-06, "loss": 0.558, "step": 21676 }, { "epoch": 0.25917335214433457, "grad_norm": 3.054267406463623, "learning_rate": 8.685007465492765e-06, "loss": 0.6361, "step": 21677 }, { "epoch": 0.2591853082891952, "grad_norm": 3.117030382156372, "learning_rate": 8.684876598639365e-06, "loss": 0.7318, "step": 21678 }, { "epoch": 0.2591972644340559, "grad_norm": 1.7440524101257324, "learning_rate": 8.684745726260479e-06, "loss": 0.5592, "step": 21679 }, { "epoch": 0.25920922057891654, "grad_norm": 3.1183249950408936, "learning_rate": 8.684614848356304e-06, "loss": 0.5819, "step": 21680 }, { "epoch": 0.2592211767237772, "grad_norm": 1.6566530466079712, "learning_rate": 8.684483964927035e-06, "loss": 0.5374, "step": 21681 }, { "epoch": 0.25923313286863786, "grad_norm": 1.8569153547286987, "learning_rate": 8.684353075972871e-06, "loss": 0.5961, "step": 21682 }, { "epoch": 0.25924508901349846, "grad_norm": 5.010685920715332, "learning_rate": 8.684222181494006e-06, "loss": 0.5899, "step": 21683 }, { "epoch": 0.2592570451583591, "grad_norm": 2.6783344745635986, "learning_rate": 8.684091281490637e-06, "loss": 0.7519, "step": 21684 }, { "epoch": 0.2592690013032198, "grad_norm": 1.7455302476882935, "learning_rate": 8.683960375962959e-06, "loss": 0.5478, "step": 21685 }, { "epoch": 0.25928095744808044, "grad_norm": 2.4303696155548096, "learning_rate": 8.68382946491117e-06, "loss": 0.5692, "step": 21686 }, { "epoch": 0.2592929135929411, "grad_norm": 1.6851229667663574, "learning_rate": 8.683698548335465e-06, "loss": 0.5611, "step": 21687 }, { "epoch": 0.25930486973780176, "grad_norm": 1.698272705078125, "learning_rate": 8.68356762623604e-06, "loss": 0.658, "step": 21688 }, { "epoch": 0.2593168258826624, "grad_norm": 4.144759654998779, "learning_rate": 8.683436698613094e-06, "loss": 0.6303, "step": 21689 }, { "epoch": 0.259328782027523, "grad_norm": 2.615577459335327, "learning_rate": 8.683305765466821e-06, "loss": 0.5322, "step": 21690 }, { "epoch": 0.2593407381723837, "grad_norm": 3.7434122562408447, "learning_rate": 8.683174826797417e-06, "loss": 0.5205, "step": 21691 }, { "epoch": 0.25935269431724434, "grad_norm": 2.4607536792755127, "learning_rate": 8.683043882605083e-06, "loss": 0.5528, "step": 21692 }, { "epoch": 0.259364650462105, "grad_norm": 2.2197093963623047, "learning_rate": 8.682912932890008e-06, "loss": 0.5848, "step": 21693 }, { "epoch": 0.25937660660696565, "grad_norm": 1.9401404857635498, "learning_rate": 8.682781977652393e-06, "loss": 0.6942, "step": 21694 }, { "epoch": 0.2593885627518263, "grad_norm": 2.9635956287384033, "learning_rate": 8.682651016892432e-06, "loss": 0.6555, "step": 21695 }, { "epoch": 0.25940051889668697, "grad_norm": 1.8656418323516846, "learning_rate": 8.682520050610325e-06, "loss": 0.5703, "step": 21696 }, { "epoch": 0.25941247504154763, "grad_norm": 2.806976318359375, "learning_rate": 8.682389078806263e-06, "loss": 0.7186, "step": 21697 }, { "epoch": 0.25942443118640823, "grad_norm": 3.3048489093780518, "learning_rate": 8.68225810148045e-06, "loss": 0.6962, "step": 21698 }, { "epoch": 0.2594363873312689, "grad_norm": 2.347708225250244, "learning_rate": 8.682127118633075e-06, "loss": 0.6805, "step": 21699 }, { "epoch": 0.25944834347612955, "grad_norm": 38.01825714111328, "learning_rate": 8.681996130264336e-06, "loss": 0.6164, "step": 21700 }, { "epoch": 0.2594602996209902, "grad_norm": 22.399608612060547, "learning_rate": 8.681865136374434e-06, "loss": 0.5264, "step": 21701 }, { "epoch": 0.25947225576585087, "grad_norm": 3.204977035522461, "learning_rate": 8.68173413696356e-06, "loss": 0.6007, "step": 21702 }, { "epoch": 0.2594842119107115, "grad_norm": 2.3140175342559814, "learning_rate": 8.681603132031913e-06, "loss": 0.595, "step": 21703 }, { "epoch": 0.2594961680555722, "grad_norm": 2.0169572830200195, "learning_rate": 8.681472121579689e-06, "loss": 0.605, "step": 21704 }, { "epoch": 0.2595081242004328, "grad_norm": 4.452978610992432, "learning_rate": 8.681341105607083e-06, "loss": 0.6585, "step": 21705 }, { "epoch": 0.25952008034529345, "grad_norm": 9.001766204833984, "learning_rate": 8.681210084114297e-06, "loss": 0.513, "step": 21706 }, { "epoch": 0.2595320364901541, "grad_norm": 4.576259136199951, "learning_rate": 8.68107905710152e-06, "loss": 0.5757, "step": 21707 }, { "epoch": 0.25954399263501476, "grad_norm": 2.338484764099121, "learning_rate": 8.680948024568953e-06, "loss": 0.6478, "step": 21708 }, { "epoch": 0.2595559487798754, "grad_norm": 1.6634273529052734, "learning_rate": 8.680816986516792e-06, "loss": 0.5757, "step": 21709 }, { "epoch": 0.2595679049247361, "grad_norm": 2.2935147285461426, "learning_rate": 8.680685942945231e-06, "loss": 0.6356, "step": 21710 }, { "epoch": 0.25957986106959674, "grad_norm": 3.148695945739746, "learning_rate": 8.68055489385447e-06, "loss": 0.6648, "step": 21711 }, { "epoch": 0.25959181721445734, "grad_norm": 1.967879056930542, "learning_rate": 8.680423839244703e-06, "loss": 0.6608, "step": 21712 }, { "epoch": 0.259603773359318, "grad_norm": 5.203068256378174, "learning_rate": 8.680292779116126e-06, "loss": 0.5443, "step": 21713 }, { "epoch": 0.25961572950417866, "grad_norm": 1.938014268875122, "learning_rate": 8.68016171346894e-06, "loss": 0.5271, "step": 21714 }, { "epoch": 0.2596276856490393, "grad_norm": 9.82693862915039, "learning_rate": 8.680030642303336e-06, "loss": 0.6279, "step": 21715 }, { "epoch": 0.2596396417939, "grad_norm": 3.1962320804595947, "learning_rate": 8.679899565619514e-06, "loss": 0.6017, "step": 21716 }, { "epoch": 0.25965159793876064, "grad_norm": 1.7708475589752197, "learning_rate": 8.67976848341767e-06, "loss": 0.5611, "step": 21717 }, { "epoch": 0.2596635540836213, "grad_norm": 2.911964178085327, "learning_rate": 8.679637395698e-06, "loss": 0.6495, "step": 21718 }, { "epoch": 0.25967551022848195, "grad_norm": 1.970872163772583, "learning_rate": 8.679506302460699e-06, "loss": 0.5456, "step": 21719 }, { "epoch": 0.25968746637334256, "grad_norm": 1.6804273128509521, "learning_rate": 8.679375203705966e-06, "loss": 0.559, "step": 21720 }, { "epoch": 0.2596994225182032, "grad_norm": 2.0332741737365723, "learning_rate": 8.679244099433996e-06, "loss": 0.5656, "step": 21721 }, { "epoch": 0.2597113786630639, "grad_norm": 1.9999313354492188, "learning_rate": 8.679112989644989e-06, "loss": 0.5774, "step": 21722 }, { "epoch": 0.25972333480792453, "grad_norm": 4.716170787811279, "learning_rate": 8.678981874339137e-06, "loss": 0.5795, "step": 21723 }, { "epoch": 0.2597352909527852, "grad_norm": 1.4489065408706665, "learning_rate": 8.67885075351664e-06, "loss": 0.5892, "step": 21724 }, { "epoch": 0.25974724709764585, "grad_norm": 1.6174736022949219, "learning_rate": 8.678719627177691e-06, "loss": 0.5818, "step": 21725 }, { "epoch": 0.2597592032425065, "grad_norm": 7.280523300170898, "learning_rate": 8.678588495322491e-06, "loss": 0.5511, "step": 21726 }, { "epoch": 0.2597711593873671, "grad_norm": 12.778850555419922, "learning_rate": 8.678457357951233e-06, "loss": 0.6074, "step": 21727 }, { "epoch": 0.25978311553222777, "grad_norm": 1.9702829122543335, "learning_rate": 8.678326215064117e-06, "loss": 0.5989, "step": 21728 }, { "epoch": 0.25979507167708843, "grad_norm": 2.5098936557769775, "learning_rate": 8.678195066661336e-06, "loss": 0.556, "step": 21729 }, { "epoch": 0.2598070278219491, "grad_norm": 2.490238904953003, "learning_rate": 8.678063912743088e-06, "loss": 0.6381, "step": 21730 }, { "epoch": 0.25981898396680975, "grad_norm": 2.024507522583008, "learning_rate": 8.677932753309573e-06, "loss": 0.5409, "step": 21731 }, { "epoch": 0.2598309401116704, "grad_norm": 2.4598820209503174, "learning_rate": 8.67780158836098e-06, "loss": 0.5849, "step": 21732 }, { "epoch": 0.25984289625653106, "grad_norm": 5.820984840393066, "learning_rate": 8.677670417897514e-06, "loss": 0.6085, "step": 21733 }, { "epoch": 0.2598548524013917, "grad_norm": 3.6000847816467285, "learning_rate": 8.677539241919369e-06, "loss": 0.5969, "step": 21734 }, { "epoch": 0.2598668085462523, "grad_norm": 2.2723329067230225, "learning_rate": 8.677408060426739e-06, "loss": 0.6439, "step": 21735 }, { "epoch": 0.259878764691113, "grad_norm": 1.6130211353302002, "learning_rate": 8.677276873419823e-06, "loss": 0.6181, "step": 21736 }, { "epoch": 0.25989072083597364, "grad_norm": 2.648838520050049, "learning_rate": 8.677145680898815e-06, "loss": 0.5708, "step": 21737 }, { "epoch": 0.2599026769808343, "grad_norm": 3.3587119579315186, "learning_rate": 8.677014482863917e-06, "loss": 0.6771, "step": 21738 }, { "epoch": 0.25991463312569496, "grad_norm": 2.569160223007202, "learning_rate": 8.676883279315323e-06, "loss": 0.6015, "step": 21739 }, { "epoch": 0.2599265892705556, "grad_norm": 1.762485384941101, "learning_rate": 8.676752070253228e-06, "loss": 0.6575, "step": 21740 }, { "epoch": 0.2599385454154163, "grad_norm": 2.2759346961975098, "learning_rate": 8.676620855677829e-06, "loss": 0.6719, "step": 21741 }, { "epoch": 0.2599505015602769, "grad_norm": 5.278906345367432, "learning_rate": 8.676489635589326e-06, "loss": 0.5534, "step": 21742 }, { "epoch": 0.25996245770513754, "grad_norm": 2.4966869354248047, "learning_rate": 8.676358409987913e-06, "loss": 0.6468, "step": 21743 }, { "epoch": 0.2599744138499982, "grad_norm": 1.779896855354309, "learning_rate": 8.676227178873787e-06, "loss": 0.6925, "step": 21744 }, { "epoch": 0.25998636999485886, "grad_norm": 5.313531398773193, "learning_rate": 8.676095942247146e-06, "loss": 0.5591, "step": 21745 }, { "epoch": 0.2599983261397195, "grad_norm": 2.130214214324951, "learning_rate": 8.675964700108185e-06, "loss": 0.5867, "step": 21746 }, { "epoch": 0.2600102822845802, "grad_norm": 1.8334693908691406, "learning_rate": 8.675833452457104e-06, "loss": 0.5603, "step": 21747 }, { "epoch": 0.26002223842944083, "grad_norm": 9.120327949523926, "learning_rate": 8.675702199294096e-06, "loss": 0.6184, "step": 21748 }, { "epoch": 0.26003419457430144, "grad_norm": 2.111614465713501, "learning_rate": 8.67557094061936e-06, "loss": 0.5588, "step": 21749 }, { "epoch": 0.2600461507191621, "grad_norm": 7.423687934875488, "learning_rate": 8.675439676433092e-06, "loss": 0.4615, "step": 21750 }, { "epoch": 0.26005810686402275, "grad_norm": 1.9811804294586182, "learning_rate": 8.67530840673549e-06, "loss": 0.6472, "step": 21751 }, { "epoch": 0.2600700630088834, "grad_norm": 1.7637075185775757, "learning_rate": 8.67517713152675e-06, "loss": 0.5845, "step": 21752 }, { "epoch": 0.26008201915374407, "grad_norm": 1.7160273790359497, "learning_rate": 8.675045850807067e-06, "loss": 0.5746, "step": 21753 }, { "epoch": 0.26009397529860473, "grad_norm": 2.118220329284668, "learning_rate": 8.67491456457664e-06, "loss": 0.6808, "step": 21754 }, { "epoch": 0.2601059314434654, "grad_norm": 1.488344430923462, "learning_rate": 8.674783272835667e-06, "loss": 0.542, "step": 21755 }, { "epoch": 0.26011788758832605, "grad_norm": 1.670997977256775, "learning_rate": 8.674651975584344e-06, "loss": 0.6132, "step": 21756 }, { "epoch": 0.26012984373318665, "grad_norm": 3.2256417274475098, "learning_rate": 8.674520672822866e-06, "loss": 0.5693, "step": 21757 }, { "epoch": 0.2601417998780473, "grad_norm": 20.540075302124023, "learning_rate": 8.67438936455143e-06, "loss": 0.6523, "step": 21758 }, { "epoch": 0.26015375602290797, "grad_norm": 4.302548408508301, "learning_rate": 8.674258050770236e-06, "loss": 0.6412, "step": 21759 }, { "epoch": 0.2601657121677686, "grad_norm": 2.098085403442383, "learning_rate": 8.674126731479478e-06, "loss": 0.6106, "step": 21760 }, { "epoch": 0.2601776683126293, "grad_norm": 1.6309093236923218, "learning_rate": 8.673995406679354e-06, "loss": 0.5593, "step": 21761 }, { "epoch": 0.26018962445748994, "grad_norm": 1.95303475856781, "learning_rate": 8.673864076370063e-06, "loss": 0.5822, "step": 21762 }, { "epoch": 0.2602015806023506, "grad_norm": 6.951023578643799, "learning_rate": 8.673732740551797e-06, "loss": 0.6102, "step": 21763 }, { "epoch": 0.2602135367472112, "grad_norm": 3.703190803527832, "learning_rate": 8.673601399224755e-06, "loss": 0.6629, "step": 21764 }, { "epoch": 0.26022549289207186, "grad_norm": 1.5998690128326416, "learning_rate": 8.673470052389137e-06, "loss": 0.6616, "step": 21765 }, { "epoch": 0.2602374490369325, "grad_norm": 3.9551820755004883, "learning_rate": 8.673338700045137e-06, "loss": 0.5434, "step": 21766 }, { "epoch": 0.2602494051817932, "grad_norm": 3.73498272895813, "learning_rate": 8.673207342192953e-06, "loss": 0.6118, "step": 21767 }, { "epoch": 0.26026136132665384, "grad_norm": 1.9374949932098389, "learning_rate": 8.67307597883278e-06, "loss": 0.5069, "step": 21768 }, { "epoch": 0.2602733174715145, "grad_norm": 2.6668193340301514, "learning_rate": 8.672944609964817e-06, "loss": 0.6125, "step": 21769 }, { "epoch": 0.26028527361637516, "grad_norm": 2.4332940578460693, "learning_rate": 8.672813235589261e-06, "loss": 0.5939, "step": 21770 }, { "epoch": 0.26029722976123576, "grad_norm": 3.0401952266693115, "learning_rate": 8.67268185570631e-06, "loss": 0.5937, "step": 21771 }, { "epoch": 0.2603091859060964, "grad_norm": 4.527225971221924, "learning_rate": 8.672550470316157e-06, "loss": 0.6215, "step": 21772 }, { "epoch": 0.2603211420509571, "grad_norm": 1.9560822248458862, "learning_rate": 8.672419079419002e-06, "loss": 0.5311, "step": 21773 }, { "epoch": 0.26033309819581774, "grad_norm": 1.5925782918930054, "learning_rate": 8.672287683015042e-06, "loss": 0.6243, "step": 21774 }, { "epoch": 0.2603450543406784, "grad_norm": 1.8708502054214478, "learning_rate": 8.672156281104473e-06, "loss": 0.5781, "step": 21775 }, { "epoch": 0.26035701048553905, "grad_norm": 2.6104977130889893, "learning_rate": 8.672024873687495e-06, "loss": 0.6043, "step": 21776 }, { "epoch": 0.2603689666303997, "grad_norm": 2.786090850830078, "learning_rate": 8.6718934607643e-06, "loss": 0.6545, "step": 21777 }, { "epoch": 0.26038092277526037, "grad_norm": 1.4012194871902466, "learning_rate": 8.671762042335088e-06, "loss": 0.6264, "step": 21778 }, { "epoch": 0.260392878920121, "grad_norm": 2.0475711822509766, "learning_rate": 8.671630618400058e-06, "loss": 0.5765, "step": 21779 }, { "epoch": 0.26040483506498163, "grad_norm": 3.527296304702759, "learning_rate": 8.671499188959402e-06, "loss": 0.5687, "step": 21780 }, { "epoch": 0.2604167912098423, "grad_norm": 2.3737146854400635, "learning_rate": 8.671367754013322e-06, "loss": 0.6506, "step": 21781 }, { "epoch": 0.26042874735470295, "grad_norm": 3.9585928916931152, "learning_rate": 8.67123631356201e-06, "loss": 0.5584, "step": 21782 }, { "epoch": 0.2604407034995636, "grad_norm": 2.724790334701538, "learning_rate": 8.671104867605668e-06, "loss": 0.8096, "step": 21783 }, { "epoch": 0.26045265964442427, "grad_norm": 1.5792769193649292, "learning_rate": 8.670973416144492e-06, "loss": 0.5504, "step": 21784 }, { "epoch": 0.2604646157892849, "grad_norm": 5.52922248840332, "learning_rate": 8.670841959178677e-06, "loss": 0.498, "step": 21785 }, { "epoch": 0.26047657193414553, "grad_norm": 2.9089980125427246, "learning_rate": 8.670710496708423e-06, "loss": 0.7155, "step": 21786 }, { "epoch": 0.2604885280790062, "grad_norm": 1.5620951652526855, "learning_rate": 8.670579028733925e-06, "loss": 0.5629, "step": 21787 }, { "epoch": 0.26050048422386685, "grad_norm": 2.226365566253662, "learning_rate": 8.67044755525538e-06, "loss": 0.6314, "step": 21788 }, { "epoch": 0.2605124403687275, "grad_norm": 3.7255709171295166, "learning_rate": 8.670316076272987e-06, "loss": 0.657, "step": 21789 }, { "epoch": 0.26052439651358816, "grad_norm": 1.794079065322876, "learning_rate": 8.670184591786941e-06, "loss": 0.543, "step": 21790 }, { "epoch": 0.2605363526584488, "grad_norm": 2.4502577781677246, "learning_rate": 8.67005310179744e-06, "loss": 0.629, "step": 21791 }, { "epoch": 0.2605483088033095, "grad_norm": 2.552236795425415, "learning_rate": 8.669921606304682e-06, "loss": 0.6558, "step": 21792 }, { "epoch": 0.26056026494817014, "grad_norm": 24.775426864624023, "learning_rate": 8.669790105308863e-06, "loss": 0.6154, "step": 21793 }, { "epoch": 0.26057222109303074, "grad_norm": 2.498879909515381, "learning_rate": 8.66965859881018e-06, "loss": 0.6432, "step": 21794 }, { "epoch": 0.2605841772378914, "grad_norm": 3.4044415950775146, "learning_rate": 8.669527086808832e-06, "loss": 0.6325, "step": 21795 }, { "epoch": 0.26059613338275206, "grad_norm": 2.5086584091186523, "learning_rate": 8.669395569305015e-06, "loss": 0.5267, "step": 21796 }, { "epoch": 0.2606080895276127, "grad_norm": 6.96594762802124, "learning_rate": 8.669264046298926e-06, "loss": 0.6102, "step": 21797 }, { "epoch": 0.2606200456724734, "grad_norm": 2.452526807785034, "learning_rate": 8.669132517790762e-06, "loss": 0.5947, "step": 21798 }, { "epoch": 0.26063200181733404, "grad_norm": 1.6945278644561768, "learning_rate": 8.669000983780722e-06, "loss": 0.6198, "step": 21799 }, { "epoch": 0.2606439579621947, "grad_norm": 3.0029921531677246, "learning_rate": 8.668869444269e-06, "loss": 0.6215, "step": 21800 }, { "epoch": 0.2606559141070553, "grad_norm": 3.9747133255004883, "learning_rate": 8.6687378992558e-06, "loss": 0.5214, "step": 21801 }, { "epoch": 0.26066787025191596, "grad_norm": 1.895635724067688, "learning_rate": 8.668606348741308e-06, "loss": 0.6154, "step": 21802 }, { "epoch": 0.2606798263967766, "grad_norm": 3.554715633392334, "learning_rate": 8.668474792725732e-06, "loss": 0.6287, "step": 21803 }, { "epoch": 0.2606917825416373, "grad_norm": 3.5479743480682373, "learning_rate": 8.668343231209265e-06, "loss": 0.6489, "step": 21804 }, { "epoch": 0.26070373868649793, "grad_norm": 1.8589317798614502, "learning_rate": 8.668211664192101e-06, "loss": 0.6092, "step": 21805 }, { "epoch": 0.2607156948313586, "grad_norm": 2.1357192993164062, "learning_rate": 8.668080091674445e-06, "loss": 0.6427, "step": 21806 }, { "epoch": 0.26072765097621925, "grad_norm": 3.4539196491241455, "learning_rate": 8.667948513656485e-06, "loss": 0.547, "step": 21807 }, { "epoch": 0.26073960712107985, "grad_norm": 2.7991693019866943, "learning_rate": 8.667816930138427e-06, "loss": 0.6418, "step": 21808 }, { "epoch": 0.2607515632659405, "grad_norm": 2.0421369075775146, "learning_rate": 8.667685341120463e-06, "loss": 0.6246, "step": 21809 }, { "epoch": 0.26076351941080117, "grad_norm": 1.4209784269332886, "learning_rate": 8.667553746602793e-06, "loss": 0.5751, "step": 21810 }, { "epoch": 0.26077547555566183, "grad_norm": 2.203889846801758, "learning_rate": 8.66742214658561e-06, "loss": 0.7583, "step": 21811 }, { "epoch": 0.2607874317005225, "grad_norm": 6.31353235244751, "learning_rate": 8.667290541069117e-06, "loss": 0.5914, "step": 21812 }, { "epoch": 0.26079938784538315, "grad_norm": 2.5932674407958984, "learning_rate": 8.66715893005351e-06, "loss": 0.6191, "step": 21813 }, { "epoch": 0.2608113439902438, "grad_norm": 2.453892230987549, "learning_rate": 8.667027313538982e-06, "loss": 0.5331, "step": 21814 }, { "epoch": 0.26082330013510446, "grad_norm": 4.154977798461914, "learning_rate": 8.666895691525736e-06, "loss": 0.58, "step": 21815 }, { "epoch": 0.26083525627996507, "grad_norm": 4.823586940765381, "learning_rate": 8.666764064013966e-06, "loss": 0.6616, "step": 21816 }, { "epoch": 0.2608472124248257, "grad_norm": 5.57746696472168, "learning_rate": 8.666632431003869e-06, "loss": 0.5639, "step": 21817 }, { "epoch": 0.2608591685696864, "grad_norm": 2.839888095855713, "learning_rate": 8.666500792495646e-06, "loss": 0.6224, "step": 21818 }, { "epoch": 0.26087112471454704, "grad_norm": 2.577887535095215, "learning_rate": 8.666369148489491e-06, "loss": 0.6465, "step": 21819 }, { "epoch": 0.2608830808594077, "grad_norm": 3.088527202606201, "learning_rate": 8.666237498985601e-06, "loss": 0.7406, "step": 21820 }, { "epoch": 0.26089503700426836, "grad_norm": 3.8684732913970947, "learning_rate": 8.666105843984176e-06, "loss": 0.6393, "step": 21821 }, { "epoch": 0.260906993149129, "grad_norm": 10.041948318481445, "learning_rate": 8.665974183485412e-06, "loss": 0.5818, "step": 21822 }, { "epoch": 0.2609189492939896, "grad_norm": 1.553855299949646, "learning_rate": 8.66584251748951e-06, "loss": 0.5042, "step": 21823 }, { "epoch": 0.2609309054388503, "grad_norm": 2.7406387329101562, "learning_rate": 8.66571084599666e-06, "loss": 0.6723, "step": 21824 }, { "epoch": 0.26094286158371094, "grad_norm": 1.8592685461044312, "learning_rate": 8.665579169007064e-06, "loss": 0.5271, "step": 21825 }, { "epoch": 0.2609548177285716, "grad_norm": 3.0175681114196777, "learning_rate": 8.665447486520919e-06, "loss": 0.732, "step": 21826 }, { "epoch": 0.26096677387343226, "grad_norm": 3.7292990684509277, "learning_rate": 8.665315798538425e-06, "loss": 0.5451, "step": 21827 }, { "epoch": 0.2609787300182929, "grad_norm": 2.637521505355835, "learning_rate": 8.665184105059774e-06, "loss": 0.5751, "step": 21828 }, { "epoch": 0.2609906861631536, "grad_norm": 1.9951306581497192, "learning_rate": 8.665052406085169e-06, "loss": 0.6479, "step": 21829 }, { "epoch": 0.2610026423080142, "grad_norm": 4.297728061676025, "learning_rate": 8.664920701614802e-06, "loss": 0.6847, "step": 21830 }, { "epoch": 0.26101459845287484, "grad_norm": 3.9184350967407227, "learning_rate": 8.664788991648876e-06, "loss": 0.6084, "step": 21831 }, { "epoch": 0.2610265545977355, "grad_norm": 1.5669499635696411, "learning_rate": 8.664657276187584e-06, "loss": 0.6597, "step": 21832 }, { "epoch": 0.26103851074259615, "grad_norm": 2.0134172439575195, "learning_rate": 8.664525555231125e-06, "loss": 0.5355, "step": 21833 }, { "epoch": 0.2610504668874568, "grad_norm": 1.920129418373108, "learning_rate": 8.664393828779698e-06, "loss": 0.6095, "step": 21834 }, { "epoch": 0.26106242303231747, "grad_norm": 3.119945526123047, "learning_rate": 8.6642620968335e-06, "loss": 0.5667, "step": 21835 }, { "epoch": 0.26107437917717813, "grad_norm": 3.6072561740875244, "learning_rate": 8.664130359392728e-06, "loss": 0.5965, "step": 21836 }, { "epoch": 0.2610863353220388, "grad_norm": 3.8672637939453125, "learning_rate": 8.663998616457579e-06, "loss": 0.6426, "step": 21837 }, { "epoch": 0.2610982914668994, "grad_norm": 1.9988293647766113, "learning_rate": 8.66386686802825e-06, "loss": 0.6561, "step": 21838 }, { "epoch": 0.26111024761176005, "grad_norm": 1.992345929145813, "learning_rate": 8.66373511410494e-06, "loss": 0.5298, "step": 21839 }, { "epoch": 0.2611222037566207, "grad_norm": 2.8907246589660645, "learning_rate": 8.663603354687848e-06, "loss": 0.5794, "step": 21840 }, { "epoch": 0.26113415990148137, "grad_norm": 2.268155813217163, "learning_rate": 8.663471589777167e-06, "loss": 0.643, "step": 21841 }, { "epoch": 0.261146116046342, "grad_norm": 4.113855361938477, "learning_rate": 8.663339819373099e-06, "loss": 0.6232, "step": 21842 }, { "epoch": 0.2611580721912027, "grad_norm": 2.08017635345459, "learning_rate": 8.66320804347584e-06, "loss": 0.6365, "step": 21843 }, { "epoch": 0.26117002833606334, "grad_norm": 12.719132423400879, "learning_rate": 8.663076262085588e-06, "loss": 0.5269, "step": 21844 }, { "epoch": 0.26118198448092395, "grad_norm": 2.980126142501831, "learning_rate": 8.66294447520254e-06, "loss": 0.665, "step": 21845 }, { "epoch": 0.2611939406257846, "grad_norm": 3.3624982833862305, "learning_rate": 8.662812682826892e-06, "loss": 0.6262, "step": 21846 }, { "epoch": 0.26120589677064526, "grad_norm": 3.7133724689483643, "learning_rate": 8.662680884958844e-06, "loss": 0.6553, "step": 21847 }, { "epoch": 0.2612178529155059, "grad_norm": 6.688416957855225, "learning_rate": 8.662549081598593e-06, "loss": 0.6093, "step": 21848 }, { "epoch": 0.2612298090603666, "grad_norm": 2.0906693935394287, "learning_rate": 8.662417272746337e-06, "loss": 0.6619, "step": 21849 }, { "epoch": 0.26124176520522724, "grad_norm": 2.729513645172119, "learning_rate": 8.662285458402275e-06, "loss": 0.6462, "step": 21850 }, { "epoch": 0.2612537213500879, "grad_norm": 4.146589279174805, "learning_rate": 8.6621536385666e-06, "loss": 0.7101, "step": 21851 }, { "epoch": 0.26126567749494856, "grad_norm": 3.6432063579559326, "learning_rate": 8.662021813239514e-06, "loss": 0.7618, "step": 21852 }, { "epoch": 0.26127763363980916, "grad_norm": 4.556940078735352, "learning_rate": 8.661889982421212e-06, "loss": 0.5821, "step": 21853 }, { "epoch": 0.2612895897846698, "grad_norm": 3.04436993598938, "learning_rate": 8.661758146111897e-06, "loss": 0.6378, "step": 21854 }, { "epoch": 0.2613015459295305, "grad_norm": 2.645533323287964, "learning_rate": 8.661626304311758e-06, "loss": 0.6524, "step": 21855 }, { "epoch": 0.26131350207439114, "grad_norm": 2.821227550506592, "learning_rate": 8.661494457021e-06, "loss": 0.5601, "step": 21856 }, { "epoch": 0.2613254582192518, "grad_norm": 2.8965554237365723, "learning_rate": 8.661362604239815e-06, "loss": 0.6105, "step": 21857 }, { "epoch": 0.26133741436411245, "grad_norm": 2.9816582202911377, "learning_rate": 8.661230745968408e-06, "loss": 0.4834, "step": 21858 }, { "epoch": 0.2613493705089731, "grad_norm": 2.7785699367523193, "learning_rate": 8.661098882206968e-06, "loss": 0.5289, "step": 21859 }, { "epoch": 0.2613613266538337, "grad_norm": 5.240564346313477, "learning_rate": 8.660967012955701e-06, "loss": 0.5516, "step": 21860 }, { "epoch": 0.2613732827986944, "grad_norm": 3.1005008220672607, "learning_rate": 8.660835138214799e-06, "loss": 0.6576, "step": 21861 }, { "epoch": 0.26138523894355503, "grad_norm": 2.288498878479004, "learning_rate": 8.660703257984462e-06, "loss": 0.5816, "step": 21862 }, { "epoch": 0.2613971950884157, "grad_norm": 2.021454095840454, "learning_rate": 8.660571372264887e-06, "loss": 0.5299, "step": 21863 }, { "epoch": 0.26140915123327635, "grad_norm": 4.523050785064697, "learning_rate": 8.660439481056275e-06, "loss": 0.73, "step": 21864 }, { "epoch": 0.261421107378137, "grad_norm": 1.9658961296081543, "learning_rate": 8.660307584358818e-06, "loss": 0.6738, "step": 21865 }, { "epoch": 0.26143306352299767, "grad_norm": 1.8388820886611938, "learning_rate": 8.660175682172718e-06, "loss": 0.6384, "step": 21866 }, { "epoch": 0.26144501966785827, "grad_norm": 1.6921923160552979, "learning_rate": 8.660043774498171e-06, "loss": 0.6206, "step": 21867 }, { "epoch": 0.26145697581271893, "grad_norm": 1.7023138999938965, "learning_rate": 8.659911861335375e-06, "loss": 0.6608, "step": 21868 }, { "epoch": 0.2614689319575796, "grad_norm": 4.047979831695557, "learning_rate": 8.65977994268453e-06, "loss": 0.6324, "step": 21869 }, { "epoch": 0.26148088810244025, "grad_norm": 3.3699920177459717, "learning_rate": 8.65964801854583e-06, "loss": 0.6098, "step": 21870 }, { "epoch": 0.2614928442473009, "grad_norm": 2.184260129928589, "learning_rate": 8.659516088919475e-06, "loss": 0.549, "step": 21871 }, { "epoch": 0.26150480039216156, "grad_norm": 2.068880081176758, "learning_rate": 8.659384153805664e-06, "loss": 0.6709, "step": 21872 }, { "epoch": 0.2615167565370222, "grad_norm": 2.172602891921997, "learning_rate": 8.659252213204592e-06, "loss": 0.552, "step": 21873 }, { "epoch": 0.2615287126818829, "grad_norm": 2.6998684406280518, "learning_rate": 8.65912026711646e-06, "loss": 0.5726, "step": 21874 }, { "epoch": 0.2615406688267435, "grad_norm": 3.561758518218994, "learning_rate": 8.658988315541462e-06, "loss": 0.5756, "step": 21875 }, { "epoch": 0.26155262497160414, "grad_norm": 1.7167493104934692, "learning_rate": 8.658856358479799e-06, "loss": 0.6699, "step": 21876 }, { "epoch": 0.2615645811164648, "grad_norm": 4.253298282623291, "learning_rate": 8.658724395931668e-06, "loss": 0.5853, "step": 21877 }, { "epoch": 0.26157653726132546, "grad_norm": 3.3619179725646973, "learning_rate": 8.658592427897266e-06, "loss": 0.6091, "step": 21878 }, { "epoch": 0.2615884934061861, "grad_norm": 5.002792835235596, "learning_rate": 8.658460454376793e-06, "loss": 0.5676, "step": 21879 }, { "epoch": 0.2616004495510468, "grad_norm": 2.1054117679595947, "learning_rate": 8.658328475370445e-06, "loss": 0.5979, "step": 21880 }, { "epoch": 0.26161240569590744, "grad_norm": 10.503273963928223, "learning_rate": 8.65819649087842e-06, "loss": 0.6078, "step": 21881 }, { "epoch": 0.26162436184076804, "grad_norm": 2.7418103218078613, "learning_rate": 8.658064500900917e-06, "loss": 0.5785, "step": 21882 }, { "epoch": 0.2616363179856287, "grad_norm": 1.9000194072723389, "learning_rate": 8.65793250543813e-06, "loss": 0.5696, "step": 21883 }, { "epoch": 0.26164827413048936, "grad_norm": 2.046260118484497, "learning_rate": 8.657800504490266e-06, "loss": 0.5551, "step": 21884 }, { "epoch": 0.26166023027535, "grad_norm": 1.9177355766296387, "learning_rate": 8.657668498057514e-06, "loss": 0.5676, "step": 21885 }, { "epoch": 0.2616721864202107, "grad_norm": 2.607135534286499, "learning_rate": 8.657536486140076e-06, "loss": 0.7072, "step": 21886 }, { "epoch": 0.26168414256507133, "grad_norm": 2.522831916809082, "learning_rate": 8.657404468738147e-06, "loss": 0.6748, "step": 21887 }, { "epoch": 0.261696098709932, "grad_norm": 3.0133419036865234, "learning_rate": 8.65727244585193e-06, "loss": 0.6203, "step": 21888 }, { "epoch": 0.2617080548547926, "grad_norm": 2.433295965194702, "learning_rate": 8.657140417481618e-06, "loss": 0.6157, "step": 21889 }, { "epoch": 0.26172001099965325, "grad_norm": 2.4310684204101562, "learning_rate": 8.65700838362741e-06, "loss": 0.6029, "step": 21890 }, { "epoch": 0.2617319671445139, "grad_norm": 9.565752983093262, "learning_rate": 8.656876344289507e-06, "loss": 0.6591, "step": 21891 }, { "epoch": 0.26174392328937457, "grad_norm": 1.9202858209609985, "learning_rate": 8.656744299468104e-06, "loss": 0.6242, "step": 21892 }, { "epoch": 0.26175587943423523, "grad_norm": 2.1318914890289307, "learning_rate": 8.6566122491634e-06, "loss": 0.5589, "step": 21893 }, { "epoch": 0.2617678355790959, "grad_norm": 3.182443618774414, "learning_rate": 8.656480193375594e-06, "loss": 0.5322, "step": 21894 }, { "epoch": 0.26177979172395655, "grad_norm": 3.5882625579833984, "learning_rate": 8.656348132104881e-06, "loss": 0.6333, "step": 21895 }, { "epoch": 0.2617917478688172, "grad_norm": 2.5216429233551025, "learning_rate": 8.656216065351462e-06, "loss": 0.6584, "step": 21896 }, { "epoch": 0.2618037040136778, "grad_norm": 2.5312042236328125, "learning_rate": 8.656083993115534e-06, "loss": 0.6229, "step": 21897 }, { "epoch": 0.26181566015853847, "grad_norm": 2.715773582458496, "learning_rate": 8.655951915397296e-06, "loss": 0.6186, "step": 21898 }, { "epoch": 0.2618276163033991, "grad_norm": 2.0741019248962402, "learning_rate": 8.655819832196943e-06, "loss": 0.6544, "step": 21899 }, { "epoch": 0.2618395724482598, "grad_norm": 2.921052932739258, "learning_rate": 8.655687743514678e-06, "loss": 0.5218, "step": 21900 }, { "epoch": 0.26185152859312044, "grad_norm": 78.31507873535156, "learning_rate": 8.655555649350693e-06, "loss": 0.6708, "step": 21901 }, { "epoch": 0.2618634847379811, "grad_norm": 1.6414943933486938, "learning_rate": 8.65542354970519e-06, "loss": 0.6825, "step": 21902 }, { "epoch": 0.26187544088284176, "grad_norm": 2.0164380073547363, "learning_rate": 8.655291444578368e-06, "loss": 0.5956, "step": 21903 }, { "epoch": 0.26188739702770236, "grad_norm": 2.4904706478118896, "learning_rate": 8.655159333970423e-06, "loss": 0.4808, "step": 21904 }, { "epoch": 0.261899353172563, "grad_norm": 2.427826166152954, "learning_rate": 8.655027217881553e-06, "loss": 0.6182, "step": 21905 }, { "epoch": 0.2619113093174237, "grad_norm": 2.2474825382232666, "learning_rate": 8.654895096311958e-06, "loss": 0.6622, "step": 21906 }, { "epoch": 0.26192326546228434, "grad_norm": 1.6181464195251465, "learning_rate": 8.654762969261834e-06, "loss": 0.5951, "step": 21907 }, { "epoch": 0.261935221607145, "grad_norm": 5.267941951751709, "learning_rate": 8.654630836731379e-06, "loss": 0.6523, "step": 21908 }, { "epoch": 0.26194717775200566, "grad_norm": 3.32413387298584, "learning_rate": 8.654498698720793e-06, "loss": 0.5323, "step": 21909 }, { "epoch": 0.2619591338968663, "grad_norm": 1.8337986469268799, "learning_rate": 8.654366555230273e-06, "loss": 0.596, "step": 21910 }, { "epoch": 0.261971090041727, "grad_norm": 2.329315662384033, "learning_rate": 8.654234406260018e-06, "loss": 0.6111, "step": 21911 }, { "epoch": 0.2619830461865876, "grad_norm": 11.254980087280273, "learning_rate": 8.654102251810223e-06, "loss": 0.4875, "step": 21912 }, { "epoch": 0.26199500233144823, "grad_norm": 2.2044835090637207, "learning_rate": 8.653970091881093e-06, "loss": 0.5763, "step": 21913 }, { "epoch": 0.2620069584763089, "grad_norm": 2.13059401512146, "learning_rate": 8.65383792647282e-06, "loss": 0.6419, "step": 21914 }, { "epoch": 0.26201891462116955, "grad_norm": 3.367189407348633, "learning_rate": 8.653705755585602e-06, "loss": 0.6031, "step": 21915 }, { "epoch": 0.2620308707660302, "grad_norm": 4.883423328399658, "learning_rate": 8.653573579219641e-06, "loss": 0.6178, "step": 21916 }, { "epoch": 0.26204282691089087, "grad_norm": 3.2398645877838135, "learning_rate": 8.653441397375132e-06, "loss": 0.5965, "step": 21917 }, { "epoch": 0.26205478305575153, "grad_norm": 2.3912441730499268, "learning_rate": 8.653309210052277e-06, "loss": 0.604, "step": 21918 }, { "epoch": 0.26206673920061213, "grad_norm": 2.335869789123535, "learning_rate": 8.65317701725127e-06, "loss": 0.5693, "step": 21919 }, { "epoch": 0.2620786953454728, "grad_norm": 1.8027926683425903, "learning_rate": 8.653044818972311e-06, "loss": 0.5512, "step": 21920 }, { "epoch": 0.26209065149033345, "grad_norm": 2.392817258834839, "learning_rate": 8.652912615215599e-06, "loss": 0.6666, "step": 21921 }, { "epoch": 0.2621026076351941, "grad_norm": 8.055377006530762, "learning_rate": 8.652780405981333e-06, "loss": 0.6111, "step": 21922 }, { "epoch": 0.26211456378005477, "grad_norm": 3.6729447841644287, "learning_rate": 8.652648191269708e-06, "loss": 0.6025, "step": 21923 }, { "epoch": 0.2621265199249154, "grad_norm": 2.0765957832336426, "learning_rate": 8.652515971080924e-06, "loss": 0.646, "step": 21924 }, { "epoch": 0.2621384760697761, "grad_norm": 3.6050972938537598, "learning_rate": 8.652383745415181e-06, "loss": 0.6514, "step": 21925 }, { "epoch": 0.2621504322146367, "grad_norm": 2.811892032623291, "learning_rate": 8.652251514272674e-06, "loss": 0.6022, "step": 21926 }, { "epoch": 0.26216238835949734, "grad_norm": 4.251468658447266, "learning_rate": 8.652119277653604e-06, "loss": 0.6284, "step": 21927 }, { "epoch": 0.262174344504358, "grad_norm": 2.2160227298736572, "learning_rate": 8.651987035558166e-06, "loss": 0.621, "step": 21928 }, { "epoch": 0.26218630064921866, "grad_norm": 2.453328847885132, "learning_rate": 8.651854787986562e-06, "loss": 0.6112, "step": 21929 }, { "epoch": 0.2621982567940793, "grad_norm": 3.6189780235290527, "learning_rate": 8.651722534938988e-06, "loss": 0.614, "step": 21930 }, { "epoch": 0.26221021293894, "grad_norm": 4.2223615646362305, "learning_rate": 8.651590276415644e-06, "loss": 0.6517, "step": 21931 }, { "epoch": 0.26222216908380064, "grad_norm": 2.0769641399383545, "learning_rate": 8.651458012416727e-06, "loss": 0.625, "step": 21932 }, { "epoch": 0.2622341252286613, "grad_norm": 4.907998561859131, "learning_rate": 8.651325742942435e-06, "loss": 0.5981, "step": 21933 }, { "epoch": 0.2622460813735219, "grad_norm": 4.636595726013184, "learning_rate": 8.65119346799297e-06, "loss": 0.6538, "step": 21934 }, { "epoch": 0.26225803751838256, "grad_norm": 11.447127342224121, "learning_rate": 8.651061187568523e-06, "loss": 0.5335, "step": 21935 }, { "epoch": 0.2622699936632432, "grad_norm": 2.9408085346221924, "learning_rate": 8.6509289016693e-06, "loss": 0.5898, "step": 21936 }, { "epoch": 0.2622819498081039, "grad_norm": 1.9694472551345825, "learning_rate": 8.650796610295495e-06, "loss": 0.617, "step": 21937 }, { "epoch": 0.26229390595296453, "grad_norm": 2.08327054977417, "learning_rate": 8.650664313447308e-06, "loss": 0.6626, "step": 21938 }, { "epoch": 0.2623058620978252, "grad_norm": 2.29411244392395, "learning_rate": 8.650532011124936e-06, "loss": 0.6359, "step": 21939 }, { "epoch": 0.26231781824268585, "grad_norm": 2.2215096950531006, "learning_rate": 8.65039970332858e-06, "loss": 0.579, "step": 21940 }, { "epoch": 0.26232977438754645, "grad_norm": 2.7523326873779297, "learning_rate": 8.650267390058436e-06, "loss": 0.7196, "step": 21941 }, { "epoch": 0.2623417305324071, "grad_norm": 3.1373584270477295, "learning_rate": 8.650135071314702e-06, "loss": 0.6208, "step": 21942 }, { "epoch": 0.2623536866772678, "grad_norm": 3.035046100616455, "learning_rate": 8.650002747097578e-06, "loss": 0.6961, "step": 21943 }, { "epoch": 0.26236564282212843, "grad_norm": 2.1739284992218018, "learning_rate": 8.649870417407262e-06, "loss": 0.5681, "step": 21944 }, { "epoch": 0.2623775989669891, "grad_norm": 2.620358943939209, "learning_rate": 8.649738082243952e-06, "loss": 0.6121, "step": 21945 }, { "epoch": 0.26238955511184975, "grad_norm": 2.0451605319976807, "learning_rate": 8.649605741607847e-06, "loss": 0.6233, "step": 21946 }, { "epoch": 0.2624015112567104, "grad_norm": 4.31215238571167, "learning_rate": 8.649473395499146e-06, "loss": 0.5147, "step": 21947 }, { "epoch": 0.262413467401571, "grad_norm": 6.113190174102783, "learning_rate": 8.649341043918046e-06, "loss": 0.5746, "step": 21948 }, { "epoch": 0.26242542354643167, "grad_norm": 2.2502081394195557, "learning_rate": 8.649208686864745e-06, "loss": 0.6294, "step": 21949 }, { "epoch": 0.2624373796912923, "grad_norm": 1.4361168146133423, "learning_rate": 8.649076324339445e-06, "loss": 0.6216, "step": 21950 }, { "epoch": 0.262449335836153, "grad_norm": 19.804441452026367, "learning_rate": 8.648943956342341e-06, "loss": 0.614, "step": 21951 }, { "epoch": 0.26246129198101364, "grad_norm": 2.348942995071411, "learning_rate": 8.648811582873631e-06, "loss": 0.5747, "step": 21952 }, { "epoch": 0.2624732481258743, "grad_norm": 1.8762186765670776, "learning_rate": 8.648679203933516e-06, "loss": 0.5441, "step": 21953 }, { "epoch": 0.26248520427073496, "grad_norm": 4.022724151611328, "learning_rate": 8.648546819522196e-06, "loss": 0.5427, "step": 21954 }, { "epoch": 0.2624971604155956, "grad_norm": 2.73787522315979, "learning_rate": 8.648414429639865e-06, "loss": 0.6805, "step": 21955 }, { "epoch": 0.2625091165604562, "grad_norm": 3.089385509490967, "learning_rate": 8.648282034286724e-06, "loss": 0.7287, "step": 21956 }, { "epoch": 0.2625210727053169, "grad_norm": 22.19700813293457, "learning_rate": 8.64814963346297e-06, "loss": 0.6427, "step": 21957 }, { "epoch": 0.26253302885017754, "grad_norm": 4.448354244232178, "learning_rate": 8.648017227168803e-06, "loss": 0.5461, "step": 21958 }, { "epoch": 0.2625449849950382, "grad_norm": 4.363247871398926, "learning_rate": 8.647884815404422e-06, "loss": 0.6072, "step": 21959 }, { "epoch": 0.26255694113989886, "grad_norm": 2.0129919052124023, "learning_rate": 8.647752398170023e-06, "loss": 0.65, "step": 21960 }, { "epoch": 0.2625688972847595, "grad_norm": 1.614194393157959, "learning_rate": 8.647619975465809e-06, "loss": 0.6018, "step": 21961 }, { "epoch": 0.2625808534296202, "grad_norm": 2.5498580932617188, "learning_rate": 8.647487547291974e-06, "loss": 0.6007, "step": 21962 }, { "epoch": 0.2625928095744808, "grad_norm": 2.5938782691955566, "learning_rate": 8.64735511364872e-06, "loss": 0.6015, "step": 21963 }, { "epoch": 0.26260476571934144, "grad_norm": 4.103032112121582, "learning_rate": 8.647222674536244e-06, "loss": 0.5642, "step": 21964 }, { "epoch": 0.2626167218642021, "grad_norm": 1.6595613956451416, "learning_rate": 8.647090229954742e-06, "loss": 0.6073, "step": 21965 }, { "epoch": 0.26262867800906275, "grad_norm": 6.617701530456543, "learning_rate": 8.646957779904418e-06, "loss": 0.567, "step": 21966 }, { "epoch": 0.2626406341539234, "grad_norm": 1.776708722114563, "learning_rate": 8.646825324385467e-06, "loss": 0.678, "step": 21967 }, { "epoch": 0.26265259029878407, "grad_norm": 3.138796329498291, "learning_rate": 8.646692863398087e-06, "loss": 0.5593, "step": 21968 }, { "epoch": 0.26266454644364473, "grad_norm": 1.9356268644332886, "learning_rate": 8.64656039694248e-06, "loss": 0.5222, "step": 21969 }, { "epoch": 0.2626765025885054, "grad_norm": 2.646148920059204, "learning_rate": 8.646427925018843e-06, "loss": 0.6812, "step": 21970 }, { "epoch": 0.262688458733366, "grad_norm": 2.1120941638946533, "learning_rate": 8.646295447627373e-06, "loss": 0.6241, "step": 21971 }, { "epoch": 0.26270041487822665, "grad_norm": 2.162769317626953, "learning_rate": 8.64616296476827e-06, "loss": 0.5337, "step": 21972 }, { "epoch": 0.2627123710230873, "grad_norm": 1.5831360816955566, "learning_rate": 8.646030476441734e-06, "loss": 0.6128, "step": 21973 }, { "epoch": 0.26272432716794797, "grad_norm": 2.121638298034668, "learning_rate": 8.645897982647962e-06, "loss": 0.6869, "step": 21974 }, { "epoch": 0.2627362833128086, "grad_norm": 1.9619379043579102, "learning_rate": 8.645765483387151e-06, "loss": 0.6369, "step": 21975 }, { "epoch": 0.2627482394576693, "grad_norm": 2.4260168075561523, "learning_rate": 8.645632978659504e-06, "loss": 0.5813, "step": 21976 }, { "epoch": 0.26276019560252994, "grad_norm": 2.412620782852173, "learning_rate": 8.645500468465216e-06, "loss": 0.7264, "step": 21977 }, { "epoch": 0.26277215174739055, "grad_norm": 2.8435823917388916, "learning_rate": 8.645367952804489e-06, "loss": 0.5106, "step": 21978 }, { "epoch": 0.2627841078922512, "grad_norm": 2.3391730785369873, "learning_rate": 8.645235431677518e-06, "loss": 0.6654, "step": 21979 }, { "epoch": 0.26279606403711187, "grad_norm": 1.9063527584075928, "learning_rate": 8.645102905084503e-06, "loss": 0.5524, "step": 21980 }, { "epoch": 0.2628080201819725, "grad_norm": 2.380743980407715, "learning_rate": 8.644970373025645e-06, "loss": 0.5843, "step": 21981 }, { "epoch": 0.2628199763268332, "grad_norm": 2.0110301971435547, "learning_rate": 8.644837835501141e-06, "loss": 0.4889, "step": 21982 }, { "epoch": 0.26283193247169384, "grad_norm": 2.2398481369018555, "learning_rate": 8.64470529251119e-06, "loss": 0.5724, "step": 21983 }, { "epoch": 0.2628438886165545, "grad_norm": 1.9944370985031128, "learning_rate": 8.644572744055988e-06, "loss": 0.5649, "step": 21984 }, { "epoch": 0.2628558447614151, "grad_norm": 1.9226568937301636, "learning_rate": 8.644440190135736e-06, "loss": 0.5523, "step": 21985 }, { "epoch": 0.26286780090627576, "grad_norm": 2.1063945293426514, "learning_rate": 8.644307630750635e-06, "loss": 0.6056, "step": 21986 }, { "epoch": 0.2628797570511364, "grad_norm": 3.344277858734131, "learning_rate": 8.644175065900882e-06, "loss": 0.6357, "step": 21987 }, { "epoch": 0.2628917131959971, "grad_norm": 2.57673716545105, "learning_rate": 8.644042495586676e-06, "loss": 0.5793, "step": 21988 }, { "epoch": 0.26290366934085774, "grad_norm": 2.4999427795410156, "learning_rate": 8.643909919808214e-06, "loss": 0.5021, "step": 21989 }, { "epoch": 0.2629156254857184, "grad_norm": 7.804208755493164, "learning_rate": 8.643777338565695e-06, "loss": 0.6009, "step": 21990 }, { "epoch": 0.26292758163057905, "grad_norm": 6.313608646392822, "learning_rate": 8.64364475185932e-06, "loss": 0.6448, "step": 21991 }, { "epoch": 0.2629395377754397, "grad_norm": 6.286041736602783, "learning_rate": 8.643512159689288e-06, "loss": 0.5617, "step": 21992 }, { "epoch": 0.2629514939203003, "grad_norm": 1.84547758102417, "learning_rate": 8.643379562055795e-06, "loss": 0.6329, "step": 21993 }, { "epoch": 0.262963450065161, "grad_norm": 4.261591911315918, "learning_rate": 8.643246958959042e-06, "loss": 0.6896, "step": 21994 }, { "epoch": 0.26297540621002163, "grad_norm": 1.6600767374038696, "learning_rate": 8.643114350399228e-06, "loss": 0.5672, "step": 21995 }, { "epoch": 0.2629873623548823, "grad_norm": 4.001858711242676, "learning_rate": 8.64298173637655e-06, "loss": 0.5914, "step": 21996 }, { "epoch": 0.26299931849974295, "grad_norm": 2.313835382461548, "learning_rate": 8.642849116891208e-06, "loss": 0.5253, "step": 21997 }, { "epoch": 0.2630112746446036, "grad_norm": 4.515830993652344, "learning_rate": 8.642716491943402e-06, "loss": 0.5461, "step": 21998 }, { "epoch": 0.26302323078946427, "grad_norm": 2.2060441970825195, "learning_rate": 8.642583861533327e-06, "loss": 0.5893, "step": 21999 }, { "epoch": 0.26303518693432487, "grad_norm": 1.4899888038635254, "learning_rate": 8.642451225661187e-06, "loss": 0.5274, "step": 22000 }, { "epoch": 0.26304714307918553, "grad_norm": 2.502847194671631, "learning_rate": 8.642318584327177e-06, "loss": 0.6429, "step": 22001 }, { "epoch": 0.2630590992240462, "grad_norm": 2.0032498836517334, "learning_rate": 8.642185937531498e-06, "loss": 0.6729, "step": 22002 }, { "epoch": 0.26307105536890685, "grad_norm": 1.8636324405670166, "learning_rate": 8.642053285274349e-06, "loss": 0.5226, "step": 22003 }, { "epoch": 0.2630830115137675, "grad_norm": 5.985151767730713, "learning_rate": 8.641920627555927e-06, "loss": 0.5298, "step": 22004 }, { "epoch": 0.26309496765862816, "grad_norm": 3.874973773956299, "learning_rate": 8.641787964376433e-06, "loss": 0.5535, "step": 22005 }, { "epoch": 0.2631069238034888, "grad_norm": 3.6046395301818848, "learning_rate": 8.641655295736064e-06, "loss": 0.642, "step": 22006 }, { "epoch": 0.2631188799483494, "grad_norm": 2.732152223587036, "learning_rate": 8.64152262163502e-06, "loss": 0.5951, "step": 22007 }, { "epoch": 0.2631308360932101, "grad_norm": 1.9341847896575928, "learning_rate": 8.641389942073501e-06, "loss": 0.5881, "step": 22008 }, { "epoch": 0.26314279223807074, "grad_norm": 2.125277519226074, "learning_rate": 8.641257257051705e-06, "loss": 0.6347, "step": 22009 }, { "epoch": 0.2631547483829314, "grad_norm": 2.0300512313842773, "learning_rate": 8.64112456656983e-06, "loss": 0.5709, "step": 22010 }, { "epoch": 0.26316670452779206, "grad_norm": 2.729240894317627, "learning_rate": 8.640991870628074e-06, "loss": 0.6378, "step": 22011 }, { "epoch": 0.2631786606726527, "grad_norm": 2.652440309524536, "learning_rate": 8.64085916922664e-06, "loss": 0.5838, "step": 22012 }, { "epoch": 0.2631906168175134, "grad_norm": 2.443972587585449, "learning_rate": 8.640726462365726e-06, "loss": 0.639, "step": 22013 }, { "epoch": 0.26320257296237404, "grad_norm": 2.6199982166290283, "learning_rate": 8.640593750045526e-06, "loss": 0.6191, "step": 22014 }, { "epoch": 0.26321452910723464, "grad_norm": 1.7890305519104004, "learning_rate": 8.640461032266246e-06, "loss": 0.5173, "step": 22015 }, { "epoch": 0.2632264852520953, "grad_norm": 2.791940450668335, "learning_rate": 8.64032830902808e-06, "loss": 0.6637, "step": 22016 }, { "epoch": 0.26323844139695596, "grad_norm": 5.990108013153076, "learning_rate": 8.640195580331228e-06, "loss": 0.4759, "step": 22017 }, { "epoch": 0.2632503975418166, "grad_norm": 1.750577688217163, "learning_rate": 8.640062846175892e-06, "loss": 0.5908, "step": 22018 }, { "epoch": 0.2632623536866773, "grad_norm": 2.489978551864624, "learning_rate": 8.639930106562266e-06, "loss": 0.7241, "step": 22019 }, { "epoch": 0.26327430983153793, "grad_norm": 2.5229709148406982, "learning_rate": 8.639797361490553e-06, "loss": 0.5768, "step": 22020 }, { "epoch": 0.2632862659763986, "grad_norm": 3.9041898250579834, "learning_rate": 8.639664610960954e-06, "loss": 0.74, "step": 22021 }, { "epoch": 0.2632982221212592, "grad_norm": 3.1132125854492188, "learning_rate": 8.639531854973662e-06, "loss": 0.7039, "step": 22022 }, { "epoch": 0.26331017826611985, "grad_norm": 1.898027777671814, "learning_rate": 8.63939909352888e-06, "loss": 0.5955, "step": 22023 }, { "epoch": 0.2633221344109805, "grad_norm": 2.048957109451294, "learning_rate": 8.639266326626805e-06, "loss": 0.5697, "step": 22024 }, { "epoch": 0.26333409055584117, "grad_norm": 2.2135117053985596, "learning_rate": 8.639133554267637e-06, "loss": 0.6007, "step": 22025 }, { "epoch": 0.26334604670070183, "grad_norm": 1.7126741409301758, "learning_rate": 8.639000776451577e-06, "loss": 0.5931, "step": 22026 }, { "epoch": 0.2633580028455625, "grad_norm": 9.20417308807373, "learning_rate": 8.63886799317882e-06, "loss": 0.5204, "step": 22027 }, { "epoch": 0.26336995899042315, "grad_norm": 2.4241905212402344, "learning_rate": 8.63873520444957e-06, "loss": 0.5724, "step": 22028 }, { "epoch": 0.2633819151352838, "grad_norm": 2.040797472000122, "learning_rate": 8.638602410264023e-06, "loss": 0.5876, "step": 22029 }, { "epoch": 0.2633938712801444, "grad_norm": 2.239548921585083, "learning_rate": 8.63846961062238e-06, "loss": 0.5805, "step": 22030 }, { "epoch": 0.26340582742500507, "grad_norm": 21.328195571899414, "learning_rate": 8.638336805524836e-06, "loss": 0.628, "step": 22031 }, { "epoch": 0.2634177835698657, "grad_norm": 3.0832459926605225, "learning_rate": 8.638203994971595e-06, "loss": 0.5784, "step": 22032 }, { "epoch": 0.2634297397147264, "grad_norm": 5.940131187438965, "learning_rate": 8.638071178962854e-06, "loss": 0.6829, "step": 22033 }, { "epoch": 0.26344169585958704, "grad_norm": 22.339468002319336, "learning_rate": 8.637938357498812e-06, "loss": 0.6546, "step": 22034 }, { "epoch": 0.2634536520044477, "grad_norm": 1.6420403718948364, "learning_rate": 8.637805530579669e-06, "loss": 0.6056, "step": 22035 }, { "epoch": 0.26346560814930836, "grad_norm": 2.463870048522949, "learning_rate": 8.637672698205622e-06, "loss": 0.6486, "step": 22036 }, { "epoch": 0.26347756429416896, "grad_norm": 1.760838508605957, "learning_rate": 8.637539860376874e-06, "loss": 0.6042, "step": 22037 }, { "epoch": 0.2634895204390296, "grad_norm": 2.9524953365325928, "learning_rate": 8.63740701709362e-06, "loss": 0.6378, "step": 22038 }, { "epoch": 0.2635014765838903, "grad_norm": 2.805716037750244, "learning_rate": 8.637274168356062e-06, "loss": 0.5952, "step": 22039 }, { "epoch": 0.26351343272875094, "grad_norm": 1.9118069410324097, "learning_rate": 8.6371413141644e-06, "loss": 0.5188, "step": 22040 }, { "epoch": 0.2635253888736116, "grad_norm": 1.9268518686294556, "learning_rate": 8.637008454518829e-06, "loss": 0.6224, "step": 22041 }, { "epoch": 0.26353734501847226, "grad_norm": 2.5723278522491455, "learning_rate": 8.636875589419552e-06, "loss": 0.54, "step": 22042 }, { "epoch": 0.2635493011633329, "grad_norm": 2.237212896347046, "learning_rate": 8.636742718866769e-06, "loss": 0.5938, "step": 22043 }, { "epoch": 0.2635612573081935, "grad_norm": 2.9345743656158447, "learning_rate": 8.636609842860676e-06, "loss": 0.6361, "step": 22044 }, { "epoch": 0.2635732134530542, "grad_norm": 5.501091480255127, "learning_rate": 8.636476961401472e-06, "loss": 0.6258, "step": 22045 }, { "epoch": 0.26358516959791484, "grad_norm": 3.463963508605957, "learning_rate": 8.63634407448936e-06, "loss": 0.611, "step": 22046 }, { "epoch": 0.2635971257427755, "grad_norm": 1.6451231241226196, "learning_rate": 8.636211182124537e-06, "loss": 0.6568, "step": 22047 }, { "epoch": 0.26360908188763615, "grad_norm": 2.9065096378326416, "learning_rate": 8.636078284307202e-06, "loss": 0.6612, "step": 22048 }, { "epoch": 0.2636210380324968, "grad_norm": 2.3866076469421387, "learning_rate": 8.635945381037554e-06, "loss": 0.6333, "step": 22049 }, { "epoch": 0.26363299417735747, "grad_norm": 4.717637538909912, "learning_rate": 8.635812472315792e-06, "loss": 0.5291, "step": 22050 }, { "epoch": 0.26364495032221813, "grad_norm": 2.074430465698242, "learning_rate": 8.635679558142119e-06, "loss": 0.5825, "step": 22051 }, { "epoch": 0.26365690646707873, "grad_norm": 2.972450017929077, "learning_rate": 8.63554663851673e-06, "loss": 0.6152, "step": 22052 }, { "epoch": 0.2636688626119394, "grad_norm": 1.520957350730896, "learning_rate": 8.635413713439827e-06, "loss": 0.6756, "step": 22053 }, { "epoch": 0.26368081875680005, "grad_norm": 7.147656440734863, "learning_rate": 8.635280782911607e-06, "loss": 0.626, "step": 22054 }, { "epoch": 0.2636927749016607, "grad_norm": 7.795617580413818, "learning_rate": 8.63514784693227e-06, "loss": 0.4917, "step": 22055 }, { "epoch": 0.26370473104652137, "grad_norm": 1.9518682956695557, "learning_rate": 8.635014905502017e-06, "loss": 0.5883, "step": 22056 }, { "epoch": 0.263716687191382, "grad_norm": 1.7380702495574951, "learning_rate": 8.634881958621046e-06, "loss": 0.6012, "step": 22057 }, { "epoch": 0.2637286433362427, "grad_norm": 2.7615976333618164, "learning_rate": 8.634749006289556e-06, "loss": 0.6125, "step": 22058 }, { "epoch": 0.2637405994811033, "grad_norm": 2.483247756958008, "learning_rate": 8.63461604850775e-06, "loss": 0.5592, "step": 22059 }, { "epoch": 0.26375255562596395, "grad_norm": 2.192643165588379, "learning_rate": 8.63448308527582e-06, "loss": 0.5335, "step": 22060 }, { "epoch": 0.2637645117708246, "grad_norm": 2.748263359069824, "learning_rate": 8.634350116593973e-06, "loss": 0.6482, "step": 22061 }, { "epoch": 0.26377646791568526, "grad_norm": 2.221853733062744, "learning_rate": 8.634217142462403e-06, "loss": 0.5909, "step": 22062 }, { "epoch": 0.2637884240605459, "grad_norm": 2.205530881881714, "learning_rate": 8.634084162881312e-06, "loss": 0.5628, "step": 22063 }, { "epoch": 0.2638003802054066, "grad_norm": 4.6532158851623535, "learning_rate": 8.633951177850898e-06, "loss": 0.5892, "step": 22064 }, { "epoch": 0.26381233635026724, "grad_norm": 2.8122785091400146, "learning_rate": 8.633818187371363e-06, "loss": 0.6074, "step": 22065 }, { "epoch": 0.26382429249512784, "grad_norm": 2.7482712268829346, "learning_rate": 8.633685191442904e-06, "loss": 0.5643, "step": 22066 }, { "epoch": 0.2638362486399885, "grad_norm": 2.2669479846954346, "learning_rate": 8.633552190065722e-06, "loss": 0.5757, "step": 22067 }, { "epoch": 0.26384820478484916, "grad_norm": 2.430055856704712, "learning_rate": 8.633419183240014e-06, "loss": 0.5767, "step": 22068 }, { "epoch": 0.2638601609297098, "grad_norm": 3.3371455669403076, "learning_rate": 8.633286170965981e-06, "loss": 0.5474, "step": 22069 }, { "epoch": 0.2638721170745705, "grad_norm": 4.853588581085205, "learning_rate": 8.633153153243824e-06, "loss": 0.6184, "step": 22070 }, { "epoch": 0.26388407321943114, "grad_norm": 4.377864360809326, "learning_rate": 8.63302013007374e-06, "loss": 0.6128, "step": 22071 }, { "epoch": 0.2638960293642918, "grad_norm": 17.78346061706543, "learning_rate": 8.63288710145593e-06, "loss": 0.7157, "step": 22072 }, { "epoch": 0.26390798550915245, "grad_norm": 2.1286723613739014, "learning_rate": 8.632754067390592e-06, "loss": 0.5735, "step": 22073 }, { "epoch": 0.26391994165401306, "grad_norm": 4.794113636016846, "learning_rate": 8.632621027877928e-06, "loss": 0.5409, "step": 22074 }, { "epoch": 0.2639318977988737, "grad_norm": 12.2431058883667, "learning_rate": 8.632487982918133e-06, "loss": 0.6661, "step": 22075 }, { "epoch": 0.2639438539437344, "grad_norm": 7.230341911315918, "learning_rate": 8.632354932511412e-06, "loss": 0.5395, "step": 22076 }, { "epoch": 0.26395581008859503, "grad_norm": 2.239922523498535, "learning_rate": 8.632221876657961e-06, "loss": 0.5511, "step": 22077 }, { "epoch": 0.2639677662334557, "grad_norm": 7.22446870803833, "learning_rate": 8.632088815357981e-06, "loss": 0.5756, "step": 22078 }, { "epoch": 0.26397972237831635, "grad_norm": 3.377915143966675, "learning_rate": 8.63195574861167e-06, "loss": 0.6313, "step": 22079 }, { "epoch": 0.263991678523177, "grad_norm": 3.38162899017334, "learning_rate": 8.631822676419229e-06, "loss": 0.5974, "step": 22080 }, { "epoch": 0.2640036346680376, "grad_norm": 1.9079322814941406, "learning_rate": 8.631689598780856e-06, "loss": 0.5478, "step": 22081 }, { "epoch": 0.26401559081289827, "grad_norm": 1.5890899896621704, "learning_rate": 8.631556515696754e-06, "loss": 0.6625, "step": 22082 }, { "epoch": 0.26402754695775893, "grad_norm": 2.8630659580230713, "learning_rate": 8.631423427167116e-06, "loss": 0.5204, "step": 22083 }, { "epoch": 0.2640395031026196, "grad_norm": 2.9265310764312744, "learning_rate": 8.631290333192149e-06, "loss": 0.5069, "step": 22084 }, { "epoch": 0.26405145924748025, "grad_norm": 2.3544654846191406, "learning_rate": 8.631157233772047e-06, "loss": 0.6117, "step": 22085 }, { "epoch": 0.2640634153923409, "grad_norm": 2.8412325382232666, "learning_rate": 8.631024128907013e-06, "loss": 0.6577, "step": 22086 }, { "epoch": 0.26407537153720156, "grad_norm": 2.1811635494232178, "learning_rate": 8.630891018597246e-06, "loss": 0.6382, "step": 22087 }, { "epoch": 0.2640873276820622, "grad_norm": 2.71492338180542, "learning_rate": 8.630757902842945e-06, "loss": 0.6701, "step": 22088 }, { "epoch": 0.2640992838269228, "grad_norm": 2.3834869861602783, "learning_rate": 8.630624781644308e-06, "loss": 0.5421, "step": 22089 }, { "epoch": 0.2641112399717835, "grad_norm": 2.6390185356140137, "learning_rate": 8.630491655001539e-06, "loss": 0.57, "step": 22090 }, { "epoch": 0.26412319611664414, "grad_norm": 3.025970935821533, "learning_rate": 8.630358522914833e-06, "loss": 0.6491, "step": 22091 }, { "epoch": 0.2641351522615048, "grad_norm": 4.798320293426514, "learning_rate": 8.63022538538439e-06, "loss": 0.537, "step": 22092 }, { "epoch": 0.26414710840636546, "grad_norm": 3.5174872875213623, "learning_rate": 8.630092242410413e-06, "loss": 0.6222, "step": 22093 }, { "epoch": 0.2641590645512261, "grad_norm": 1.5811764001846313, "learning_rate": 8.6299590939931e-06, "loss": 0.6171, "step": 22094 }, { "epoch": 0.2641710206960868, "grad_norm": 2.5720484256744385, "learning_rate": 8.629825940132651e-06, "loss": 0.6578, "step": 22095 }, { "epoch": 0.2641829768409474, "grad_norm": 3.6771743297576904, "learning_rate": 8.629692780829263e-06, "loss": 0.6003, "step": 22096 }, { "epoch": 0.26419493298580804, "grad_norm": 8.386305809020996, "learning_rate": 8.629559616083138e-06, "loss": 0.5058, "step": 22097 }, { "epoch": 0.2642068891306687, "grad_norm": 1.8443408012390137, "learning_rate": 8.629426445894478e-06, "loss": 0.5434, "step": 22098 }, { "epoch": 0.26421884527552936, "grad_norm": 3.2607359886169434, "learning_rate": 8.629293270263476e-06, "loss": 0.6471, "step": 22099 }, { "epoch": 0.26423080142039, "grad_norm": 11.699018478393555, "learning_rate": 8.629160089190341e-06, "loss": 0.6038, "step": 22100 }, { "epoch": 0.2642427575652507, "grad_norm": 1.5891870260238647, "learning_rate": 8.629026902675264e-06, "loss": 0.6367, "step": 22101 }, { "epoch": 0.26425471371011133, "grad_norm": 1.4093531370162964, "learning_rate": 8.628893710718448e-06, "loss": 0.5375, "step": 22102 }, { "epoch": 0.26426666985497194, "grad_norm": 3.099825382232666, "learning_rate": 8.628760513320094e-06, "loss": 0.6719, "step": 22103 }, { "epoch": 0.2642786259998326, "grad_norm": 1.5919039249420166, "learning_rate": 8.628627310480402e-06, "loss": 0.5487, "step": 22104 }, { "epoch": 0.26429058214469325, "grad_norm": 2.7728445529937744, "learning_rate": 8.628494102199569e-06, "loss": 0.6412, "step": 22105 }, { "epoch": 0.2643025382895539, "grad_norm": 2.5456223487854004, "learning_rate": 8.628360888477796e-06, "loss": 0.5131, "step": 22106 }, { "epoch": 0.26431449443441457, "grad_norm": 3.9203219413757324, "learning_rate": 8.628227669315283e-06, "loss": 0.562, "step": 22107 }, { "epoch": 0.26432645057927523, "grad_norm": 2.7825231552124023, "learning_rate": 8.628094444712231e-06, "loss": 0.6517, "step": 22108 }, { "epoch": 0.2643384067241359, "grad_norm": 2.547468423843384, "learning_rate": 8.627961214668838e-06, "loss": 0.6586, "step": 22109 }, { "epoch": 0.26435036286899655, "grad_norm": 3.110170364379883, "learning_rate": 8.627827979185304e-06, "loss": 0.6294, "step": 22110 }, { "epoch": 0.26436231901385715, "grad_norm": 5.106881141662598, "learning_rate": 8.627694738261829e-06, "loss": 0.4993, "step": 22111 }, { "epoch": 0.2643742751587178, "grad_norm": 1.8997211456298828, "learning_rate": 8.627561491898611e-06, "loss": 0.5744, "step": 22112 }, { "epoch": 0.26438623130357847, "grad_norm": 1.5552231073379517, "learning_rate": 8.627428240095854e-06, "loss": 0.6548, "step": 22113 }, { "epoch": 0.2643981874484391, "grad_norm": 2.446786403656006, "learning_rate": 8.627294982853755e-06, "loss": 0.5262, "step": 22114 }, { "epoch": 0.2644101435932998, "grad_norm": 2.98453688621521, "learning_rate": 8.627161720172514e-06, "loss": 0.6359, "step": 22115 }, { "epoch": 0.26442209973816044, "grad_norm": 1.6829192638397217, "learning_rate": 8.627028452052331e-06, "loss": 0.5765, "step": 22116 }, { "epoch": 0.2644340558830211, "grad_norm": 5.7591681480407715, "learning_rate": 8.626895178493406e-06, "loss": 0.6572, "step": 22117 }, { "epoch": 0.2644460120278817, "grad_norm": 2.8008246421813965, "learning_rate": 8.626761899495939e-06, "loss": 0.5901, "step": 22118 }, { "epoch": 0.26445796817274236, "grad_norm": 1.7889679670333862, "learning_rate": 8.626628615060132e-06, "loss": 0.5791, "step": 22119 }, { "epoch": 0.264469924317603, "grad_norm": 1.677269458770752, "learning_rate": 8.626495325186178e-06, "loss": 0.5267, "step": 22120 }, { "epoch": 0.2644818804624637, "grad_norm": 2.7455475330352783, "learning_rate": 8.626362029874285e-06, "loss": 0.5595, "step": 22121 }, { "epoch": 0.26449383660732434, "grad_norm": 3.486515522003174, "learning_rate": 8.626228729124647e-06, "loss": 0.7204, "step": 22122 }, { "epoch": 0.264505792752185, "grad_norm": 2.1810896396636963, "learning_rate": 8.626095422937466e-06, "loss": 0.5955, "step": 22123 }, { "epoch": 0.26451774889704566, "grad_norm": 3.8957629203796387, "learning_rate": 8.625962111312943e-06, "loss": 0.5431, "step": 22124 }, { "epoch": 0.26452970504190626, "grad_norm": 6.3954176902771, "learning_rate": 8.625828794251278e-06, "loss": 0.6128, "step": 22125 }, { "epoch": 0.2645416611867669, "grad_norm": 4.791439533233643, "learning_rate": 8.625695471752668e-06, "loss": 0.6992, "step": 22126 }, { "epoch": 0.2645536173316276, "grad_norm": 1.9106415510177612, "learning_rate": 8.625562143817316e-06, "loss": 0.6154, "step": 22127 }, { "epoch": 0.26456557347648824, "grad_norm": 2.4428904056549072, "learning_rate": 8.62542881044542e-06, "loss": 0.6, "step": 22128 }, { "epoch": 0.2645775296213489, "grad_norm": 5.322116374969482, "learning_rate": 8.62529547163718e-06, "loss": 0.6352, "step": 22129 }, { "epoch": 0.26458948576620955, "grad_norm": 1.7075313329696655, "learning_rate": 8.625162127392796e-06, "loss": 0.6232, "step": 22130 }, { "epoch": 0.2646014419110702, "grad_norm": 1.6878063678741455, "learning_rate": 8.62502877771247e-06, "loss": 0.5448, "step": 22131 }, { "epoch": 0.26461339805593087, "grad_norm": 6.306392192840576, "learning_rate": 8.624895422596402e-06, "loss": 0.554, "step": 22132 }, { "epoch": 0.2646253542007915, "grad_norm": 7.083683490753174, "learning_rate": 8.624762062044788e-06, "loss": 0.6224, "step": 22133 }, { "epoch": 0.26463731034565213, "grad_norm": 3.54038667678833, "learning_rate": 8.62462869605783e-06, "loss": 0.6577, "step": 22134 }, { "epoch": 0.2646492664905128, "grad_norm": 2.0561492443084717, "learning_rate": 8.62449532463573e-06, "loss": 0.6605, "step": 22135 }, { "epoch": 0.26466122263537345, "grad_norm": 4.836825847625732, "learning_rate": 8.624361947778685e-06, "loss": 0.6319, "step": 22136 }, { "epoch": 0.2646731787802341, "grad_norm": 2.4971702098846436, "learning_rate": 8.624228565486898e-06, "loss": 0.6399, "step": 22137 }, { "epoch": 0.26468513492509477, "grad_norm": 2.9935193061828613, "learning_rate": 8.624095177760566e-06, "loss": 0.6059, "step": 22138 }, { "epoch": 0.2646970910699554, "grad_norm": 2.402811288833618, "learning_rate": 8.623961784599893e-06, "loss": 0.7283, "step": 22139 }, { "epoch": 0.26470904721481603, "grad_norm": 3.1443610191345215, "learning_rate": 8.623828386005073e-06, "loss": 0.6036, "step": 22140 }, { "epoch": 0.2647210033596767, "grad_norm": 2.228483200073242, "learning_rate": 8.623694981976311e-06, "loss": 0.6071, "step": 22141 }, { "epoch": 0.26473295950453735, "grad_norm": 2.919933557510376, "learning_rate": 8.623561572513806e-06, "loss": 0.5436, "step": 22142 }, { "epoch": 0.264744915649398, "grad_norm": 2.2396304607391357, "learning_rate": 8.623428157617755e-06, "loss": 0.625, "step": 22143 }, { "epoch": 0.26475687179425866, "grad_norm": 1.8221921920776367, "learning_rate": 8.623294737288363e-06, "loss": 0.547, "step": 22144 }, { "epoch": 0.2647688279391193, "grad_norm": 2.4182674884796143, "learning_rate": 8.623161311525827e-06, "loss": 0.7822, "step": 22145 }, { "epoch": 0.26478078408398, "grad_norm": 2.2027671337127686, "learning_rate": 8.623027880330349e-06, "loss": 0.6458, "step": 22146 }, { "epoch": 0.26479274022884064, "grad_norm": 2.8118700981140137, "learning_rate": 8.622894443702126e-06, "loss": 0.6247, "step": 22147 }, { "epoch": 0.26480469637370124, "grad_norm": 7.316366195678711, "learning_rate": 8.622761001641362e-06, "loss": 0.6132, "step": 22148 }, { "epoch": 0.2648166525185619, "grad_norm": 2.735898971557617, "learning_rate": 8.622627554148253e-06, "loss": 0.5109, "step": 22149 }, { "epoch": 0.26482860866342256, "grad_norm": 2.0662341117858887, "learning_rate": 8.622494101223001e-06, "loss": 0.6638, "step": 22150 }, { "epoch": 0.2648405648082832, "grad_norm": 3.3059723377227783, "learning_rate": 8.622360642865807e-06, "loss": 0.5745, "step": 22151 }, { "epoch": 0.2648525209531439, "grad_norm": 2.9239304065704346, "learning_rate": 8.622227179076869e-06, "loss": 0.6172, "step": 22152 }, { "epoch": 0.26486447709800454, "grad_norm": 2.437433958053589, "learning_rate": 8.622093709856391e-06, "loss": 0.6081, "step": 22153 }, { "epoch": 0.2648764332428652, "grad_norm": 2.8984739780426025, "learning_rate": 8.62196023520457e-06, "loss": 0.582, "step": 22154 }, { "epoch": 0.2648883893877258, "grad_norm": 4.769566059112549, "learning_rate": 8.621826755121604e-06, "loss": 0.5986, "step": 22155 }, { "epoch": 0.26490034553258646, "grad_norm": 2.421790361404419, "learning_rate": 8.621693269607699e-06, "loss": 0.5727, "step": 22156 }, { "epoch": 0.2649123016774471, "grad_norm": 3.6728286743164062, "learning_rate": 8.621559778663052e-06, "loss": 0.6102, "step": 22157 }, { "epoch": 0.2649242578223078, "grad_norm": 2.7819619178771973, "learning_rate": 8.621426282287861e-06, "loss": 0.6691, "step": 22158 }, { "epoch": 0.26493621396716843, "grad_norm": 6.046766757965088, "learning_rate": 8.62129278048233e-06, "loss": 0.6324, "step": 22159 }, { "epoch": 0.2649481701120291, "grad_norm": 3.7186007499694824, "learning_rate": 8.621159273246658e-06, "loss": 0.5855, "step": 22160 }, { "epoch": 0.26496012625688975, "grad_norm": 2.2523341178894043, "learning_rate": 8.621025760581044e-06, "loss": 0.64, "step": 22161 }, { "epoch": 0.26497208240175035, "grad_norm": 7.0424394607543945, "learning_rate": 8.620892242485689e-06, "loss": 0.6163, "step": 22162 }, { "epoch": 0.264984038546611, "grad_norm": 2.0009090900421143, "learning_rate": 8.620758718960794e-06, "loss": 0.6258, "step": 22163 }, { "epoch": 0.26499599469147167, "grad_norm": 2.0390625, "learning_rate": 8.620625190006558e-06, "loss": 0.5808, "step": 22164 }, { "epoch": 0.26500795083633233, "grad_norm": 2.660752534866333, "learning_rate": 8.62049165562318e-06, "loss": 0.6506, "step": 22165 }, { "epoch": 0.265019906981193, "grad_norm": 1.3190250396728516, "learning_rate": 8.620358115810864e-06, "loss": 0.5205, "step": 22166 }, { "epoch": 0.26503186312605365, "grad_norm": 3.572281837463379, "learning_rate": 8.620224570569808e-06, "loss": 0.6057, "step": 22167 }, { "epoch": 0.2650438192709143, "grad_norm": 3.0778884887695312, "learning_rate": 8.62009101990021e-06, "loss": 0.5552, "step": 22168 }, { "epoch": 0.26505577541577496, "grad_norm": 3.312373399734497, "learning_rate": 8.619957463802275e-06, "loss": 0.5549, "step": 22169 }, { "epoch": 0.26506773156063557, "grad_norm": 2.179337739944458, "learning_rate": 8.6198239022762e-06, "loss": 0.5802, "step": 22170 }, { "epoch": 0.2650796877054962, "grad_norm": 3.5370819568634033, "learning_rate": 8.619690335322188e-06, "loss": 0.5681, "step": 22171 }, { "epoch": 0.2650916438503569, "grad_norm": 2.9707229137420654, "learning_rate": 8.619556762940435e-06, "loss": 0.7114, "step": 22172 }, { "epoch": 0.26510359999521754, "grad_norm": 2.712376594543457, "learning_rate": 8.619423185131147e-06, "loss": 0.6168, "step": 22173 }, { "epoch": 0.2651155561400782, "grad_norm": 2.5435428619384766, "learning_rate": 8.619289601894518e-06, "loss": 0.672, "step": 22174 }, { "epoch": 0.26512751228493886, "grad_norm": 1.866608738899231, "learning_rate": 8.619156013230751e-06, "loss": 0.602, "step": 22175 }, { "epoch": 0.2651394684297995, "grad_norm": 1.7262660264968872, "learning_rate": 8.619022419140048e-06, "loss": 0.5376, "step": 22176 }, { "epoch": 0.2651514245746601, "grad_norm": 3.27133846282959, "learning_rate": 8.618888819622609e-06, "loss": 0.6332, "step": 22177 }, { "epoch": 0.2651633807195208, "grad_norm": 2.3904221057891846, "learning_rate": 8.618755214678632e-06, "loss": 0.606, "step": 22178 }, { "epoch": 0.26517533686438144, "grad_norm": 6.1064066886901855, "learning_rate": 8.618621604308319e-06, "loss": 0.6126, "step": 22179 }, { "epoch": 0.2651872930092421, "grad_norm": 1.6016539335250854, "learning_rate": 8.61848798851187e-06, "loss": 0.5917, "step": 22180 }, { "epoch": 0.26519924915410276, "grad_norm": 2.4695229530334473, "learning_rate": 8.618354367289486e-06, "loss": 0.5432, "step": 22181 }, { "epoch": 0.2652112052989634, "grad_norm": 2.742048740386963, "learning_rate": 8.618220740641365e-06, "loss": 0.5689, "step": 22182 }, { "epoch": 0.2652231614438241, "grad_norm": 4.219211101531982, "learning_rate": 8.618087108567712e-06, "loss": 0.7126, "step": 22183 }, { "epoch": 0.2652351175886847, "grad_norm": 5.905008316040039, "learning_rate": 8.617953471068721e-06, "loss": 0.5591, "step": 22184 }, { "epoch": 0.26524707373354534, "grad_norm": 1.6198780536651611, "learning_rate": 8.617819828144598e-06, "loss": 0.5931, "step": 22185 }, { "epoch": 0.265259029878406, "grad_norm": 1.6663987636566162, "learning_rate": 8.61768617979554e-06, "loss": 0.5864, "step": 22186 }, { "epoch": 0.26527098602326665, "grad_norm": 2.7491445541381836, "learning_rate": 8.617552526021752e-06, "loss": 0.7511, "step": 22187 }, { "epoch": 0.2652829421681273, "grad_norm": 2.1404242515563965, "learning_rate": 8.617418866823429e-06, "loss": 0.555, "step": 22188 }, { "epoch": 0.26529489831298797, "grad_norm": 1.6355470418930054, "learning_rate": 8.617285202200773e-06, "loss": 0.5594, "step": 22189 }, { "epoch": 0.26530685445784863, "grad_norm": 1.789131760597229, "learning_rate": 8.617151532153987e-06, "loss": 0.6255, "step": 22190 }, { "epoch": 0.2653188106027093, "grad_norm": 2.11588454246521, "learning_rate": 8.617017856683267e-06, "loss": 0.7141, "step": 22191 }, { "epoch": 0.2653307667475699, "grad_norm": 2.506399631500244, "learning_rate": 8.616884175788817e-06, "loss": 0.5776, "step": 22192 }, { "epoch": 0.26534272289243055, "grad_norm": 5.323897838592529, "learning_rate": 8.616750489470835e-06, "loss": 0.6248, "step": 22193 }, { "epoch": 0.2653546790372912, "grad_norm": 5.716737270355225, "learning_rate": 8.616616797729525e-06, "loss": 0.6129, "step": 22194 }, { "epoch": 0.26536663518215187, "grad_norm": 2.0790131092071533, "learning_rate": 8.616483100565082e-06, "loss": 0.5685, "step": 22195 }, { "epoch": 0.2653785913270125, "grad_norm": 1.9810962677001953, "learning_rate": 8.616349397977714e-06, "loss": 0.5566, "step": 22196 }, { "epoch": 0.2653905474718732, "grad_norm": 3.973076820373535, "learning_rate": 8.616215689967616e-06, "loss": 0.6268, "step": 22197 }, { "epoch": 0.26540250361673384, "grad_norm": 1.7612978219985962, "learning_rate": 8.616081976534986e-06, "loss": 0.5405, "step": 22198 }, { "epoch": 0.26541445976159445, "grad_norm": 2.4224507808685303, "learning_rate": 8.615948257680033e-06, "loss": 0.6057, "step": 22199 }, { "epoch": 0.2654264159064551, "grad_norm": 5.374908447265625, "learning_rate": 8.61581453340295e-06, "loss": 0.631, "step": 22200 }, { "epoch": 0.26543837205131576, "grad_norm": 6.822108268737793, "learning_rate": 8.61568080370394e-06, "loss": 0.6236, "step": 22201 }, { "epoch": 0.2654503281961764, "grad_norm": 1.9545965194702148, "learning_rate": 8.615547068583205e-06, "loss": 0.6394, "step": 22202 }, { "epoch": 0.2654622843410371, "grad_norm": 9.833747863769531, "learning_rate": 8.615413328040945e-06, "loss": 0.66, "step": 22203 }, { "epoch": 0.26547424048589774, "grad_norm": 2.1347484588623047, "learning_rate": 8.61527958207736e-06, "loss": 0.6025, "step": 22204 }, { "epoch": 0.2654861966307584, "grad_norm": 2.3036179542541504, "learning_rate": 8.615145830692648e-06, "loss": 0.6191, "step": 22205 }, { "epoch": 0.26549815277561906, "grad_norm": 3.900245189666748, "learning_rate": 8.615012073887015e-06, "loss": 0.6362, "step": 22206 }, { "epoch": 0.26551010892047966, "grad_norm": 2.696413993835449, "learning_rate": 8.614878311660654e-06, "loss": 0.6387, "step": 22207 }, { "epoch": 0.2655220650653403, "grad_norm": 2.708178758621216, "learning_rate": 8.614744544013776e-06, "loss": 0.6004, "step": 22208 }, { "epoch": 0.265534021210201, "grad_norm": 2.0790278911590576, "learning_rate": 8.614610770946572e-06, "loss": 0.7624, "step": 22209 }, { "epoch": 0.26554597735506164, "grad_norm": 1.9501616954803467, "learning_rate": 8.614476992459246e-06, "loss": 0.6095, "step": 22210 }, { "epoch": 0.2655579334999223, "grad_norm": 6.566558361053467, "learning_rate": 8.614343208552001e-06, "loss": 0.7166, "step": 22211 }, { "epoch": 0.26556988964478295, "grad_norm": 3.171004056930542, "learning_rate": 8.614209419225035e-06, "loss": 0.549, "step": 22212 }, { "epoch": 0.2655818457896436, "grad_norm": 2.9531261920928955, "learning_rate": 8.614075624478548e-06, "loss": 0.5625, "step": 22213 }, { "epoch": 0.2655938019345042, "grad_norm": 2.781977653503418, "learning_rate": 8.613941824312743e-06, "loss": 0.5096, "step": 22214 }, { "epoch": 0.2656057580793649, "grad_norm": 1.5436205863952637, "learning_rate": 8.613808018727818e-06, "loss": 0.5068, "step": 22215 }, { "epoch": 0.26561771422422553, "grad_norm": 4.630314350128174, "learning_rate": 8.613674207723977e-06, "loss": 0.6275, "step": 22216 }, { "epoch": 0.2656296703690862, "grad_norm": 3.135035276412964, "learning_rate": 8.613540391301416e-06, "loss": 0.6061, "step": 22217 }, { "epoch": 0.26564162651394685, "grad_norm": 2.594247341156006, "learning_rate": 8.61340656946034e-06, "loss": 0.5724, "step": 22218 }, { "epoch": 0.2656535826588075, "grad_norm": 3.556605100631714, "learning_rate": 8.613272742200949e-06, "loss": 0.6109, "step": 22219 }, { "epoch": 0.26566553880366817, "grad_norm": 1.9958271980285645, "learning_rate": 8.61313890952344e-06, "loss": 0.5895, "step": 22220 }, { "epoch": 0.26567749494852877, "grad_norm": 2.8453683853149414, "learning_rate": 8.613005071428017e-06, "loss": 0.5985, "step": 22221 }, { "epoch": 0.26568945109338943, "grad_norm": 2.274681568145752, "learning_rate": 8.612871227914883e-06, "loss": 0.6015, "step": 22222 }, { "epoch": 0.2657014072382501, "grad_norm": 1.880895733833313, "learning_rate": 8.612737378984232e-06, "loss": 0.6412, "step": 22223 }, { "epoch": 0.26571336338311075, "grad_norm": 14.592266082763672, "learning_rate": 8.61260352463627e-06, "loss": 0.6126, "step": 22224 }, { "epoch": 0.2657253195279714, "grad_norm": 2.159298896789551, "learning_rate": 8.612469664871196e-06, "loss": 0.5988, "step": 22225 }, { "epoch": 0.26573727567283206, "grad_norm": 3.0020627975463867, "learning_rate": 8.612335799689213e-06, "loss": 0.55, "step": 22226 }, { "epoch": 0.2657492318176927, "grad_norm": 2.303374767303467, "learning_rate": 8.612201929090517e-06, "loss": 0.6086, "step": 22227 }, { "epoch": 0.2657611879625534, "grad_norm": 2.4563379287719727, "learning_rate": 8.612068053075312e-06, "loss": 0.5938, "step": 22228 }, { "epoch": 0.265773144107414, "grad_norm": 3.707627296447754, "learning_rate": 8.6119341716438e-06, "loss": 0.5971, "step": 22229 }, { "epoch": 0.26578510025227464, "grad_norm": 2.064340114593506, "learning_rate": 8.611800284796176e-06, "loss": 0.6319, "step": 22230 }, { "epoch": 0.2657970563971353, "grad_norm": 2.9731335639953613, "learning_rate": 8.611666392532647e-06, "loss": 0.6938, "step": 22231 }, { "epoch": 0.26580901254199596, "grad_norm": 12.088715553283691, "learning_rate": 8.611532494853412e-06, "loss": 0.5762, "step": 22232 }, { "epoch": 0.2658209686868566, "grad_norm": 1.7962709665298462, "learning_rate": 8.61139859175867e-06, "loss": 0.52, "step": 22233 }, { "epoch": 0.2658329248317173, "grad_norm": 3.3688607215881348, "learning_rate": 8.611264683248624e-06, "loss": 0.5543, "step": 22234 }, { "epoch": 0.26584488097657794, "grad_norm": 2.354546308517456, "learning_rate": 8.611130769323473e-06, "loss": 0.539, "step": 22235 }, { "epoch": 0.26585683712143854, "grad_norm": 1.8725467920303345, "learning_rate": 8.610996849983419e-06, "loss": 0.5977, "step": 22236 }, { "epoch": 0.2658687932662992, "grad_norm": 1.887393832206726, "learning_rate": 8.610862925228663e-06, "loss": 0.5526, "step": 22237 }, { "epoch": 0.26588074941115986, "grad_norm": 2.2712855339050293, "learning_rate": 8.610728995059402e-06, "loss": 0.6662, "step": 22238 }, { "epoch": 0.2658927055560205, "grad_norm": 1.631778597831726, "learning_rate": 8.610595059475843e-06, "loss": 0.5087, "step": 22239 }, { "epoch": 0.2659046617008812, "grad_norm": 2.9018213748931885, "learning_rate": 8.610461118478185e-06, "loss": 0.5711, "step": 22240 }, { "epoch": 0.26591661784574183, "grad_norm": 2.3636062145233154, "learning_rate": 8.610327172066624e-06, "loss": 0.7359, "step": 22241 }, { "epoch": 0.2659285739906025, "grad_norm": 2.0168564319610596, "learning_rate": 8.610193220241367e-06, "loss": 0.564, "step": 22242 }, { "epoch": 0.26594053013546315, "grad_norm": 2.144469976425171, "learning_rate": 8.610059263002612e-06, "loss": 0.6126, "step": 22243 }, { "epoch": 0.26595248628032375, "grad_norm": 2.3944201469421387, "learning_rate": 8.60992530035056e-06, "loss": 0.6189, "step": 22244 }, { "epoch": 0.2659644424251844, "grad_norm": 3.1283586025238037, "learning_rate": 8.609791332285413e-06, "loss": 0.599, "step": 22245 }, { "epoch": 0.26597639857004507, "grad_norm": 1.7602604627609253, "learning_rate": 8.60965735880737e-06, "loss": 0.5646, "step": 22246 }, { "epoch": 0.26598835471490573, "grad_norm": 4.416581153869629, "learning_rate": 8.609523379916634e-06, "loss": 0.6142, "step": 22247 }, { "epoch": 0.2660003108597664, "grad_norm": 1.796004295349121, "learning_rate": 8.609389395613403e-06, "loss": 0.5424, "step": 22248 }, { "epoch": 0.26601226700462705, "grad_norm": 5.296689510345459, "learning_rate": 8.609255405897881e-06, "loss": 0.6056, "step": 22249 }, { "epoch": 0.2660242231494877, "grad_norm": 3.833725690841675, "learning_rate": 8.609121410770269e-06, "loss": 0.6673, "step": 22250 }, { "epoch": 0.2660361792943483, "grad_norm": 3.6612720489501953, "learning_rate": 8.608987410230765e-06, "loss": 0.6065, "step": 22251 }, { "epoch": 0.26604813543920897, "grad_norm": 2.8785459995269775, "learning_rate": 8.60885340427957e-06, "loss": 0.6121, "step": 22252 }, { "epoch": 0.2660600915840696, "grad_norm": 1.9615592956542969, "learning_rate": 8.608719392916891e-06, "loss": 0.64, "step": 22253 }, { "epoch": 0.2660720477289303, "grad_norm": 2.1029155254364014, "learning_rate": 8.608585376142922e-06, "loss": 0.7214, "step": 22254 }, { "epoch": 0.26608400387379094, "grad_norm": 11.484476089477539, "learning_rate": 8.608451353957865e-06, "loss": 0.6551, "step": 22255 }, { "epoch": 0.2660959600186516, "grad_norm": 2.1900858879089355, "learning_rate": 8.608317326361923e-06, "loss": 0.5471, "step": 22256 }, { "epoch": 0.26610791616351226, "grad_norm": 2.8031303882598877, "learning_rate": 8.6081832933553e-06, "loss": 0.6276, "step": 22257 }, { "epoch": 0.26611987230837286, "grad_norm": 2.1985974311828613, "learning_rate": 8.60804925493819e-06, "loss": 0.5953, "step": 22258 }, { "epoch": 0.2661318284532335, "grad_norm": 2.167598009109497, "learning_rate": 8.607915211110796e-06, "loss": 0.6378, "step": 22259 }, { "epoch": 0.2661437845980942, "grad_norm": 3.1722679138183594, "learning_rate": 8.607781161873321e-06, "loss": 0.6334, "step": 22260 }, { "epoch": 0.26615574074295484, "grad_norm": 4.518165588378906, "learning_rate": 8.607647107225967e-06, "loss": 0.5959, "step": 22261 }, { "epoch": 0.2661676968878155, "grad_norm": 1.922926902770996, "learning_rate": 8.607513047168935e-06, "loss": 0.5898, "step": 22262 }, { "epoch": 0.26617965303267616, "grad_norm": 12.538970947265625, "learning_rate": 8.60737898170242e-06, "loss": 0.5663, "step": 22263 }, { "epoch": 0.2661916091775368, "grad_norm": 1.8477182388305664, "learning_rate": 8.607244910826629e-06, "loss": 0.6815, "step": 22264 }, { "epoch": 0.2662035653223975, "grad_norm": 2.7927470207214355, "learning_rate": 8.607110834541763e-06, "loss": 0.6411, "step": 22265 }, { "epoch": 0.2662155214672581, "grad_norm": 3.6307904720306396, "learning_rate": 8.60697675284802e-06, "loss": 0.5573, "step": 22266 }, { "epoch": 0.26622747761211873, "grad_norm": 3.680101156234741, "learning_rate": 8.606842665745603e-06, "loss": 0.6059, "step": 22267 }, { "epoch": 0.2662394337569794, "grad_norm": 5.863171100616455, "learning_rate": 8.606708573234713e-06, "loss": 0.6315, "step": 22268 }, { "epoch": 0.26625138990184005, "grad_norm": 4.397050380706787, "learning_rate": 8.606574475315551e-06, "loss": 0.7481, "step": 22269 }, { "epoch": 0.2662633460467007, "grad_norm": 51.73747253417969, "learning_rate": 8.606440371988314e-06, "loss": 0.6056, "step": 22270 }, { "epoch": 0.26627530219156137, "grad_norm": 2.4430336952209473, "learning_rate": 8.60630626325321e-06, "loss": 0.5869, "step": 22271 }, { "epoch": 0.26628725833642203, "grad_norm": 4.0691375732421875, "learning_rate": 8.606172149110437e-06, "loss": 0.6097, "step": 22272 }, { "epoch": 0.26629921448128263, "grad_norm": 2.18831467628479, "learning_rate": 8.606038029560195e-06, "loss": 0.6043, "step": 22273 }, { "epoch": 0.2663111706261433, "grad_norm": 4.208797931671143, "learning_rate": 8.605903904602688e-06, "loss": 0.7156, "step": 22274 }, { "epoch": 0.26632312677100395, "grad_norm": 3.0848193168640137, "learning_rate": 8.605769774238114e-06, "loss": 0.6165, "step": 22275 }, { "epoch": 0.2663350829158646, "grad_norm": 2.0346364974975586, "learning_rate": 8.605635638466675e-06, "loss": 0.5981, "step": 22276 }, { "epoch": 0.26634703906072527, "grad_norm": 5.284122943878174, "learning_rate": 8.605501497288573e-06, "loss": 0.5606, "step": 22277 }, { "epoch": 0.2663589952055859, "grad_norm": 1.965562343597412, "learning_rate": 8.605367350704009e-06, "loss": 0.6224, "step": 22278 }, { "epoch": 0.2663709513504466, "grad_norm": 2.957526683807373, "learning_rate": 8.605233198713183e-06, "loss": 0.7219, "step": 22279 }, { "epoch": 0.2663829074953072, "grad_norm": 3.0429892539978027, "learning_rate": 8.605099041316298e-06, "loss": 0.609, "step": 22280 }, { "epoch": 0.26639486364016784, "grad_norm": 3.323582887649536, "learning_rate": 8.604964878513555e-06, "loss": 0.6423, "step": 22281 }, { "epoch": 0.2664068197850285, "grad_norm": 3.8336284160614014, "learning_rate": 8.604830710305151e-06, "loss": 0.7014, "step": 22282 }, { "epoch": 0.26641877592988916, "grad_norm": 3.7233963012695312, "learning_rate": 8.604696536691293e-06, "loss": 0.5366, "step": 22283 }, { "epoch": 0.2664307320747498, "grad_norm": 1.9797354936599731, "learning_rate": 8.604562357672181e-06, "loss": 0.6113, "step": 22284 }, { "epoch": 0.2664426882196105, "grad_norm": 4.424306869506836, "learning_rate": 8.604428173248013e-06, "loss": 0.5934, "step": 22285 }, { "epoch": 0.26645464436447114, "grad_norm": 2.0488646030426025, "learning_rate": 8.604293983418991e-06, "loss": 0.7944, "step": 22286 }, { "epoch": 0.2664666005093318, "grad_norm": 3.0530574321746826, "learning_rate": 8.60415978818532e-06, "loss": 0.6055, "step": 22287 }, { "epoch": 0.2664785566541924, "grad_norm": 1.900175929069519, "learning_rate": 8.604025587547198e-06, "loss": 0.6328, "step": 22288 }, { "epoch": 0.26649051279905306, "grad_norm": 2.1713004112243652, "learning_rate": 8.603891381504825e-06, "loss": 0.64, "step": 22289 }, { "epoch": 0.2665024689439137, "grad_norm": 1.822397232055664, "learning_rate": 8.603757170058406e-06, "loss": 0.6012, "step": 22290 }, { "epoch": 0.2665144250887744, "grad_norm": 1.6574866771697998, "learning_rate": 8.60362295320814e-06, "loss": 0.5221, "step": 22291 }, { "epoch": 0.26652638123363503, "grad_norm": 2.3236236572265625, "learning_rate": 8.603488730954228e-06, "loss": 0.6153, "step": 22292 }, { "epoch": 0.2665383373784957, "grad_norm": 5.6583170890808105, "learning_rate": 8.603354503296873e-06, "loss": 0.6366, "step": 22293 }, { "epoch": 0.26655029352335635, "grad_norm": 6.0637006759643555, "learning_rate": 8.603220270236274e-06, "loss": 0.5332, "step": 22294 }, { "epoch": 0.26656224966821696, "grad_norm": 2.6848127841949463, "learning_rate": 8.603086031772633e-06, "loss": 0.5413, "step": 22295 }, { "epoch": 0.2665742058130776, "grad_norm": 3.0708847045898438, "learning_rate": 8.602951787906153e-06, "loss": 0.6096, "step": 22296 }, { "epoch": 0.2665861619579383, "grad_norm": 3.707230567932129, "learning_rate": 8.602817538637031e-06, "loss": 0.5784, "step": 22297 }, { "epoch": 0.26659811810279893, "grad_norm": 2.469527006149292, "learning_rate": 8.602683283965475e-06, "loss": 0.5994, "step": 22298 }, { "epoch": 0.2666100742476596, "grad_norm": 2.4718430042266846, "learning_rate": 8.602549023891679e-06, "loss": 0.5119, "step": 22299 }, { "epoch": 0.26662203039252025, "grad_norm": 6.245321273803711, "learning_rate": 8.602414758415851e-06, "loss": 0.6043, "step": 22300 }, { "epoch": 0.2666339865373809, "grad_norm": 4.20758056640625, "learning_rate": 8.60228048753819e-06, "loss": 0.5912, "step": 22301 }, { "epoch": 0.26664594268224157, "grad_norm": 3.330986738204956, "learning_rate": 8.602146211258893e-06, "loss": 0.6504, "step": 22302 }, { "epoch": 0.26665789882710217, "grad_norm": 2.697265386581421, "learning_rate": 8.602011929578168e-06, "loss": 0.5765, "step": 22303 }, { "epoch": 0.2666698549719628, "grad_norm": 5.132709503173828, "learning_rate": 8.601877642496212e-06, "loss": 0.553, "step": 22304 }, { "epoch": 0.2666818111168235, "grad_norm": 7.188483238220215, "learning_rate": 8.601743350013228e-06, "loss": 0.6443, "step": 22305 }, { "epoch": 0.26669376726168414, "grad_norm": 2.4079160690307617, "learning_rate": 8.601609052129417e-06, "loss": 0.5347, "step": 22306 }, { "epoch": 0.2667057234065448, "grad_norm": 2.5372796058654785, "learning_rate": 8.60147474884498e-06, "loss": 0.6025, "step": 22307 }, { "epoch": 0.26671767955140546, "grad_norm": 2.360165596008301, "learning_rate": 8.60134044016012e-06, "loss": 0.5609, "step": 22308 }, { "epoch": 0.2667296356962661, "grad_norm": 3.424879312515259, "learning_rate": 8.601206126075035e-06, "loss": 0.6345, "step": 22309 }, { "epoch": 0.2667415918411267, "grad_norm": 3.8258087635040283, "learning_rate": 8.601071806589931e-06, "loss": 0.5537, "step": 22310 }, { "epoch": 0.2667535479859874, "grad_norm": 3.467146158218384, "learning_rate": 8.600937481705005e-06, "loss": 0.646, "step": 22311 }, { "epoch": 0.26676550413084804, "grad_norm": 5.925421237945557, "learning_rate": 8.600803151420462e-06, "loss": 0.585, "step": 22312 }, { "epoch": 0.2667774602757087, "grad_norm": 2.5039994716644287, "learning_rate": 8.6006688157365e-06, "loss": 0.5978, "step": 22313 }, { "epoch": 0.26678941642056936, "grad_norm": 1.8335281610488892, "learning_rate": 8.600534474653325e-06, "loss": 0.5935, "step": 22314 }, { "epoch": 0.26680137256543, "grad_norm": 2.5571818351745605, "learning_rate": 8.600400128171133e-06, "loss": 0.5637, "step": 22315 }, { "epoch": 0.2668133287102907, "grad_norm": 2.508962631225586, "learning_rate": 8.600265776290132e-06, "loss": 0.6546, "step": 22316 }, { "epoch": 0.2668252848551513, "grad_norm": 2.982069730758667, "learning_rate": 8.600131419010515e-06, "loss": 0.6661, "step": 22317 }, { "epoch": 0.26683724100001194, "grad_norm": 6.692822456359863, "learning_rate": 8.599997056332493e-06, "loss": 0.692, "step": 22318 }, { "epoch": 0.2668491971448726, "grad_norm": 2.142864942550659, "learning_rate": 8.599862688256259e-06, "loss": 0.5101, "step": 22319 }, { "epoch": 0.26686115328973326, "grad_norm": 4.406092643737793, "learning_rate": 8.599728314782019e-06, "loss": 0.6188, "step": 22320 }, { "epoch": 0.2668731094345939, "grad_norm": 1.7560114860534668, "learning_rate": 8.599593935909974e-06, "loss": 0.5596, "step": 22321 }, { "epoch": 0.2668850655794546, "grad_norm": 2.9635069370269775, "learning_rate": 8.599459551640323e-06, "loss": 0.602, "step": 22322 }, { "epoch": 0.26689702172431523, "grad_norm": 3.090285062789917, "learning_rate": 8.599325161973273e-06, "loss": 0.5298, "step": 22323 }, { "epoch": 0.2669089778691759, "grad_norm": 3.8435308933258057, "learning_rate": 8.599190766909021e-06, "loss": 0.6385, "step": 22324 }, { "epoch": 0.2669209340140365, "grad_norm": 2.826681613922119, "learning_rate": 8.599056366447768e-06, "loss": 0.6439, "step": 22325 }, { "epoch": 0.26693289015889715, "grad_norm": 3.9254343509674072, "learning_rate": 8.598921960589719e-06, "loss": 0.6197, "step": 22326 }, { "epoch": 0.2669448463037578, "grad_norm": 3.9534780979156494, "learning_rate": 8.598787549335072e-06, "loss": 0.7482, "step": 22327 }, { "epoch": 0.26695680244861847, "grad_norm": 5.4665093421936035, "learning_rate": 8.598653132684031e-06, "loss": 0.6199, "step": 22328 }, { "epoch": 0.2669687585934791, "grad_norm": 2.819348096847534, "learning_rate": 8.598518710636799e-06, "loss": 0.6868, "step": 22329 }, { "epoch": 0.2669807147383398, "grad_norm": 2.112185478210449, "learning_rate": 8.598384283193572e-06, "loss": 0.5601, "step": 22330 }, { "epoch": 0.26699267088320044, "grad_norm": 2.572916269302368, "learning_rate": 8.598249850354557e-06, "loss": 0.691, "step": 22331 }, { "epoch": 0.26700462702806105, "grad_norm": 7.374268054962158, "learning_rate": 8.598115412119955e-06, "loss": 0.5694, "step": 22332 }, { "epoch": 0.2670165831729217, "grad_norm": 2.0392301082611084, "learning_rate": 8.597980968489962e-06, "loss": 0.5912, "step": 22333 }, { "epoch": 0.26702853931778237, "grad_norm": 1.8148581981658936, "learning_rate": 8.597846519464788e-06, "loss": 0.5989, "step": 22334 }, { "epoch": 0.267040495462643, "grad_norm": 2.1303892135620117, "learning_rate": 8.597712065044628e-06, "loss": 0.6789, "step": 22335 }, { "epoch": 0.2670524516075037, "grad_norm": 2.4269092082977295, "learning_rate": 8.597577605229685e-06, "loss": 0.5147, "step": 22336 }, { "epoch": 0.26706440775236434, "grad_norm": 2.9495127201080322, "learning_rate": 8.597443140020163e-06, "loss": 0.5763, "step": 22337 }, { "epoch": 0.267076363897225, "grad_norm": 2.7626419067382812, "learning_rate": 8.597308669416264e-06, "loss": 0.6676, "step": 22338 }, { "epoch": 0.2670883200420856, "grad_norm": 5.069882392883301, "learning_rate": 8.597174193418185e-06, "loss": 0.547, "step": 22339 }, { "epoch": 0.26710027618694626, "grad_norm": 2.6193370819091797, "learning_rate": 8.597039712026132e-06, "loss": 0.5368, "step": 22340 }, { "epoch": 0.2671122323318069, "grad_norm": 2.3607747554779053, "learning_rate": 8.596905225240304e-06, "loss": 0.5889, "step": 22341 }, { "epoch": 0.2671241884766676, "grad_norm": 3.1360232830047607, "learning_rate": 8.596770733060905e-06, "loss": 0.6306, "step": 22342 }, { "epoch": 0.26713614462152824, "grad_norm": 13.405261039733887, "learning_rate": 8.596636235488134e-06, "loss": 0.6647, "step": 22343 }, { "epoch": 0.2671481007663889, "grad_norm": 2.48781418800354, "learning_rate": 8.596501732522196e-06, "loss": 0.5647, "step": 22344 }, { "epoch": 0.26716005691124955, "grad_norm": 3.5805554389953613, "learning_rate": 8.59636722416329e-06, "loss": 0.5951, "step": 22345 }, { "epoch": 0.2671720130561102, "grad_norm": 2.2219362258911133, "learning_rate": 8.596232710411618e-06, "loss": 0.6137, "step": 22346 }, { "epoch": 0.2671839692009708, "grad_norm": 2.2538645267486572, "learning_rate": 8.596098191267383e-06, "loss": 0.5701, "step": 22347 }, { "epoch": 0.2671959253458315, "grad_norm": 5.277360916137695, "learning_rate": 8.595963666730786e-06, "loss": 0.6724, "step": 22348 }, { "epoch": 0.26720788149069213, "grad_norm": 2.9318323135375977, "learning_rate": 8.595829136802029e-06, "loss": 0.6125, "step": 22349 }, { "epoch": 0.2672198376355528, "grad_norm": 3.6499221324920654, "learning_rate": 8.595694601481313e-06, "loss": 0.602, "step": 22350 }, { "epoch": 0.26723179378041345, "grad_norm": 1.8566011190414429, "learning_rate": 8.595560060768842e-06, "loss": 0.531, "step": 22351 }, { "epoch": 0.2672437499252741, "grad_norm": 1.9555490016937256, "learning_rate": 8.595425514664812e-06, "loss": 0.5922, "step": 22352 }, { "epoch": 0.26725570607013477, "grad_norm": 2.556006908416748, "learning_rate": 8.595290963169431e-06, "loss": 0.6085, "step": 22353 }, { "epoch": 0.26726766221499537, "grad_norm": 3.6349172592163086, "learning_rate": 8.595156406282899e-06, "loss": 0.5035, "step": 22354 }, { "epoch": 0.26727961835985603, "grad_norm": 6.208028793334961, "learning_rate": 8.595021844005417e-06, "loss": 0.6118, "step": 22355 }, { "epoch": 0.2672915745047167, "grad_norm": 3.215131998062134, "learning_rate": 8.594887276337187e-06, "loss": 0.616, "step": 22356 }, { "epoch": 0.26730353064957735, "grad_norm": 2.357177734375, "learning_rate": 8.594752703278408e-06, "loss": 0.5874, "step": 22357 }, { "epoch": 0.267315486794438, "grad_norm": 5.8061323165893555, "learning_rate": 8.594618124829288e-06, "loss": 0.5834, "step": 22358 }, { "epoch": 0.26732744293929867, "grad_norm": 28.53171157836914, "learning_rate": 8.594483540990024e-06, "loss": 0.6067, "step": 22359 }, { "epoch": 0.2673393990841593, "grad_norm": 4.1258769035339355, "learning_rate": 8.594348951760819e-06, "loss": 0.5558, "step": 22360 }, { "epoch": 0.26735135522902, "grad_norm": 3.6636290550231934, "learning_rate": 8.594214357141874e-06, "loss": 0.5841, "step": 22361 }, { "epoch": 0.2673633113738806, "grad_norm": 1.9836747646331787, "learning_rate": 8.594079757133394e-06, "loss": 0.737, "step": 22362 }, { "epoch": 0.26737526751874124, "grad_norm": 2.498171806335449, "learning_rate": 8.593945151735577e-06, "loss": 0.7373, "step": 22363 }, { "epoch": 0.2673872236636019, "grad_norm": 4.605936527252197, "learning_rate": 8.593810540948626e-06, "loss": 0.6568, "step": 22364 }, { "epoch": 0.26739917980846256, "grad_norm": 2.444977283477783, "learning_rate": 8.593675924772747e-06, "loss": 0.6551, "step": 22365 }, { "epoch": 0.2674111359533232, "grad_norm": 2.5917792320251465, "learning_rate": 8.593541303208134e-06, "loss": 0.4801, "step": 22366 }, { "epoch": 0.2674230920981839, "grad_norm": 3.1053555011749268, "learning_rate": 8.593406676254996e-06, "loss": 0.599, "step": 22367 }, { "epoch": 0.26743504824304454, "grad_norm": 3.757211446762085, "learning_rate": 8.59327204391353e-06, "loss": 0.5488, "step": 22368 }, { "epoch": 0.26744700438790514, "grad_norm": 1.897323727607727, "learning_rate": 8.59313740618394e-06, "loss": 0.5301, "step": 22369 }, { "epoch": 0.2674589605327658, "grad_norm": 4.016818046569824, "learning_rate": 8.593002763066427e-06, "loss": 0.7058, "step": 22370 }, { "epoch": 0.26747091667762646, "grad_norm": 2.2709667682647705, "learning_rate": 8.592868114561195e-06, "loss": 0.6352, "step": 22371 }, { "epoch": 0.2674828728224871, "grad_norm": 7.3448991775512695, "learning_rate": 8.592733460668445e-06, "loss": 0.5666, "step": 22372 }, { "epoch": 0.2674948289673478, "grad_norm": 2.7147903442382812, "learning_rate": 8.592598801388377e-06, "loss": 0.6624, "step": 22373 }, { "epoch": 0.26750678511220843, "grad_norm": 2.224085569381714, "learning_rate": 8.592464136721195e-06, "loss": 0.5969, "step": 22374 }, { "epoch": 0.2675187412570691, "grad_norm": 3.942030429840088, "learning_rate": 8.5923294666671e-06, "loss": 0.5935, "step": 22375 }, { "epoch": 0.2675306974019297, "grad_norm": 2.7733397483825684, "learning_rate": 8.592194791226294e-06, "loss": 0.6262, "step": 22376 }, { "epoch": 0.26754265354679035, "grad_norm": 9.493896484375, "learning_rate": 8.592060110398979e-06, "loss": 0.5836, "step": 22377 }, { "epoch": 0.267554609691651, "grad_norm": 4.539403915405273, "learning_rate": 8.591925424185357e-06, "loss": 0.6252, "step": 22378 }, { "epoch": 0.26756656583651167, "grad_norm": 2.141951084136963, "learning_rate": 8.59179073258563e-06, "loss": 0.607, "step": 22379 }, { "epoch": 0.26757852198137233, "grad_norm": 3.8141562938690186, "learning_rate": 8.591656035600003e-06, "loss": 0.633, "step": 22380 }, { "epoch": 0.267590478126233, "grad_norm": 7.460138320922852, "learning_rate": 8.591521333228672e-06, "loss": 0.5405, "step": 22381 }, { "epoch": 0.26760243427109365, "grad_norm": 2.2752768993377686, "learning_rate": 8.591386625471841e-06, "loss": 0.5557, "step": 22382 }, { "epoch": 0.2676143904159543, "grad_norm": 1.7645072937011719, "learning_rate": 8.591251912329715e-06, "loss": 0.5312, "step": 22383 }, { "epoch": 0.2676263465608149, "grad_norm": 2.8805389404296875, "learning_rate": 8.591117193802493e-06, "loss": 0.5898, "step": 22384 }, { "epoch": 0.26763830270567557, "grad_norm": 3.3758597373962402, "learning_rate": 8.59098246989038e-06, "loss": 0.7801, "step": 22385 }, { "epoch": 0.2676502588505362, "grad_norm": 6.255879878997803, "learning_rate": 8.590847740593574e-06, "loss": 0.6103, "step": 22386 }, { "epoch": 0.2676622149953969, "grad_norm": 6.049798011779785, "learning_rate": 8.59071300591228e-06, "loss": 0.6164, "step": 22387 }, { "epoch": 0.26767417114025754, "grad_norm": 3.3779988288879395, "learning_rate": 8.590578265846698e-06, "loss": 0.5202, "step": 22388 }, { "epoch": 0.2676861272851182, "grad_norm": 1.9551877975463867, "learning_rate": 8.590443520397032e-06, "loss": 0.5088, "step": 22389 }, { "epoch": 0.26769808342997886, "grad_norm": 1.936943769454956, "learning_rate": 8.590308769563483e-06, "loss": 0.654, "step": 22390 }, { "epoch": 0.26771003957483946, "grad_norm": 3.147739887237549, "learning_rate": 8.590174013346253e-06, "loss": 0.5881, "step": 22391 }, { "epoch": 0.2677219957197001, "grad_norm": 1.834869146347046, "learning_rate": 8.590039251745545e-06, "loss": 0.7157, "step": 22392 }, { "epoch": 0.2677339518645608, "grad_norm": 2.560680627822876, "learning_rate": 8.58990448476156e-06, "loss": 0.5605, "step": 22393 }, { "epoch": 0.26774590800942144, "grad_norm": 2.5674493312835693, "learning_rate": 8.5897697123945e-06, "loss": 0.5834, "step": 22394 }, { "epoch": 0.2677578641542821, "grad_norm": 2.1315152645111084, "learning_rate": 8.589634934644568e-06, "loss": 0.6416, "step": 22395 }, { "epoch": 0.26776982029914276, "grad_norm": 2.8116135597229004, "learning_rate": 8.589500151511966e-06, "loss": 0.5828, "step": 22396 }, { "epoch": 0.2677817764440034, "grad_norm": 8.291850090026855, "learning_rate": 8.589365362996895e-06, "loss": 0.5503, "step": 22397 }, { "epoch": 0.267793732588864, "grad_norm": 3.3650400638580322, "learning_rate": 8.589230569099558e-06, "loss": 0.6043, "step": 22398 }, { "epoch": 0.2678056887337247, "grad_norm": 4.888371467590332, "learning_rate": 8.589095769820156e-06, "loss": 0.5783, "step": 22399 }, { "epoch": 0.26781764487858534, "grad_norm": 2.4778802394866943, "learning_rate": 8.588960965158894e-06, "loss": 0.5817, "step": 22400 }, { "epoch": 0.267829601023446, "grad_norm": 2.1598381996154785, "learning_rate": 8.588826155115972e-06, "loss": 0.516, "step": 22401 }, { "epoch": 0.26784155716830665, "grad_norm": 3.1821672916412354, "learning_rate": 8.588691339691593e-06, "loss": 0.6686, "step": 22402 }, { "epoch": 0.2678535133131673, "grad_norm": 8.330595970153809, "learning_rate": 8.588556518885957e-06, "loss": 0.648, "step": 22403 }, { "epoch": 0.26786546945802797, "grad_norm": 3.496596336364746, "learning_rate": 8.58842169269927e-06, "loss": 0.6993, "step": 22404 }, { "epoch": 0.26787742560288863, "grad_norm": 1.8539066314697266, "learning_rate": 8.58828686113173e-06, "loss": 0.5518, "step": 22405 }, { "epoch": 0.26788938174774923, "grad_norm": 1.7752859592437744, "learning_rate": 8.58815202418354e-06, "loss": 0.5903, "step": 22406 }, { "epoch": 0.2679013378926099, "grad_norm": 4.5652384757995605, "learning_rate": 8.588017181854905e-06, "loss": 0.5677, "step": 22407 }, { "epoch": 0.26791329403747055, "grad_norm": 4.090716361999512, "learning_rate": 8.587882334146024e-06, "loss": 0.6763, "step": 22408 }, { "epoch": 0.2679252501823312, "grad_norm": 2.6596555709838867, "learning_rate": 8.587747481057103e-06, "loss": 0.5948, "step": 22409 }, { "epoch": 0.26793720632719187, "grad_norm": 2.664912462234497, "learning_rate": 8.587612622588342e-06, "loss": 0.5754, "step": 22410 }, { "epoch": 0.2679491624720525, "grad_norm": 2.810847282409668, "learning_rate": 8.587477758739941e-06, "loss": 0.68, "step": 22411 }, { "epoch": 0.2679611186169132, "grad_norm": 3.193082571029663, "learning_rate": 8.587342889512105e-06, "loss": 0.5653, "step": 22412 }, { "epoch": 0.2679730747617738, "grad_norm": 2.916455030441284, "learning_rate": 8.587208014905037e-06, "loss": 0.6213, "step": 22413 }, { "epoch": 0.26798503090663445, "grad_norm": 7.205226421356201, "learning_rate": 8.587073134918936e-06, "loss": 0.65, "step": 22414 }, { "epoch": 0.2679969870514951, "grad_norm": 3.040064573287964, "learning_rate": 8.586938249554007e-06, "loss": 0.6029, "step": 22415 }, { "epoch": 0.26800894319635576, "grad_norm": 4.3199639320373535, "learning_rate": 8.58680335881045e-06, "loss": 0.6415, "step": 22416 }, { "epoch": 0.2680208993412164, "grad_norm": 3.975248098373413, "learning_rate": 8.58666846268847e-06, "loss": 0.5287, "step": 22417 }, { "epoch": 0.2680328554860771, "grad_norm": 2.6380202770233154, "learning_rate": 8.586533561188269e-06, "loss": 0.6359, "step": 22418 }, { "epoch": 0.26804481163093774, "grad_norm": 2.4340076446533203, "learning_rate": 8.586398654310047e-06, "loss": 0.5396, "step": 22419 }, { "epoch": 0.2680567677757984, "grad_norm": 7.158082008361816, "learning_rate": 8.586263742054006e-06, "loss": 0.6201, "step": 22420 }, { "epoch": 0.268068723920659, "grad_norm": 3.8759448528289795, "learning_rate": 8.586128824420351e-06, "loss": 0.566, "step": 22421 }, { "epoch": 0.26808068006551966, "grad_norm": 2.0034894943237305, "learning_rate": 8.585993901409284e-06, "loss": 0.5683, "step": 22422 }, { "epoch": 0.2680926362103803, "grad_norm": 1.8822650909423828, "learning_rate": 8.585858973021004e-06, "loss": 0.5638, "step": 22423 }, { "epoch": 0.268104592355241, "grad_norm": 3.9822864532470703, "learning_rate": 8.585724039255719e-06, "loss": 0.5337, "step": 22424 }, { "epoch": 0.26811654850010164, "grad_norm": 2.9123566150665283, "learning_rate": 8.585589100113626e-06, "loss": 0.5769, "step": 22425 }, { "epoch": 0.2681285046449623, "grad_norm": 1.5826987028121948, "learning_rate": 8.58545415559493e-06, "loss": 0.6455, "step": 22426 }, { "epoch": 0.26814046078982295, "grad_norm": 3.9555177688598633, "learning_rate": 8.585319205699831e-06, "loss": 0.6504, "step": 22427 }, { "epoch": 0.26815241693468356, "grad_norm": 2.3537967205047607, "learning_rate": 8.585184250428535e-06, "loss": 0.6778, "step": 22428 }, { "epoch": 0.2681643730795442, "grad_norm": 3.3166146278381348, "learning_rate": 8.585049289781243e-06, "loss": 0.6572, "step": 22429 }, { "epoch": 0.2681763292244049, "grad_norm": 3.059927463531494, "learning_rate": 8.584914323758154e-06, "loss": 0.6506, "step": 22430 }, { "epoch": 0.26818828536926553, "grad_norm": 4.444097995758057, "learning_rate": 8.584779352359476e-06, "loss": 0.5292, "step": 22431 }, { "epoch": 0.2682002415141262, "grad_norm": 3.246931314468384, "learning_rate": 8.584644375585406e-06, "loss": 0.5138, "step": 22432 }, { "epoch": 0.26821219765898685, "grad_norm": 2.8827195167541504, "learning_rate": 8.584509393436152e-06, "loss": 0.6358, "step": 22433 }, { "epoch": 0.2682241538038475, "grad_norm": 5.4717512130737305, "learning_rate": 8.584374405911913e-06, "loss": 0.5833, "step": 22434 }, { "epoch": 0.2682361099487081, "grad_norm": 2.8132216930389404, "learning_rate": 8.58423941301289e-06, "loss": 0.6526, "step": 22435 }, { "epoch": 0.26824806609356877, "grad_norm": 3.160134792327881, "learning_rate": 8.58410441473929e-06, "loss": 0.6267, "step": 22436 }, { "epoch": 0.26826002223842943, "grad_norm": 2.4695136547088623, "learning_rate": 8.583969411091308e-06, "loss": 0.5811, "step": 22437 }, { "epoch": 0.2682719783832901, "grad_norm": 3.0941741466522217, "learning_rate": 8.583834402069156e-06, "loss": 0.5354, "step": 22438 }, { "epoch": 0.26828393452815075, "grad_norm": 4.1552734375, "learning_rate": 8.583699387673029e-06, "loss": 0.5405, "step": 22439 }, { "epoch": 0.2682958906730114, "grad_norm": 2.5512921810150146, "learning_rate": 8.583564367903131e-06, "loss": 0.6, "step": 22440 }, { "epoch": 0.26830784681787206, "grad_norm": 2.0907974243164062, "learning_rate": 8.583429342759669e-06, "loss": 0.6401, "step": 22441 }, { "epoch": 0.2683198029627327, "grad_norm": 3.536269426345825, "learning_rate": 8.58329431224284e-06, "loss": 0.536, "step": 22442 }, { "epoch": 0.2683317591075933, "grad_norm": 2.608585834503174, "learning_rate": 8.583159276352848e-06, "loss": 0.5335, "step": 22443 }, { "epoch": 0.268343715252454, "grad_norm": 3.2564237117767334, "learning_rate": 8.583024235089898e-06, "loss": 0.612, "step": 22444 }, { "epoch": 0.26835567139731464, "grad_norm": 4.472459316253662, "learning_rate": 8.582889188454189e-06, "loss": 0.5292, "step": 22445 }, { "epoch": 0.2683676275421753, "grad_norm": 5.8148908615112305, "learning_rate": 8.582754136445923e-06, "loss": 0.5801, "step": 22446 }, { "epoch": 0.26837958368703596, "grad_norm": 1.876975655555725, "learning_rate": 8.582619079065308e-06, "loss": 0.5913, "step": 22447 }, { "epoch": 0.2683915398318966, "grad_norm": 11.570573806762695, "learning_rate": 8.58248401631254e-06, "loss": 0.576, "step": 22448 }, { "epoch": 0.2684034959767573, "grad_norm": 2.8127336502075195, "learning_rate": 8.582348948187826e-06, "loss": 0.6187, "step": 22449 }, { "epoch": 0.2684154521216179, "grad_norm": 1.8671503067016602, "learning_rate": 8.582213874691368e-06, "loss": 0.6884, "step": 22450 }, { "epoch": 0.26842740826647854, "grad_norm": 3.9010016918182373, "learning_rate": 8.582078795823365e-06, "loss": 0.5659, "step": 22451 }, { "epoch": 0.2684393644113392, "grad_norm": 2.5744123458862305, "learning_rate": 8.581943711584025e-06, "loss": 0.6114, "step": 22452 }, { "epoch": 0.26845132055619986, "grad_norm": 3.040074110031128, "learning_rate": 8.581808621973544e-06, "loss": 0.5894, "step": 22453 }, { "epoch": 0.2684632767010605, "grad_norm": 1.8587075471878052, "learning_rate": 8.581673526992132e-06, "loss": 0.5495, "step": 22454 }, { "epoch": 0.2684752328459212, "grad_norm": 2.153515577316284, "learning_rate": 8.581538426639985e-06, "loss": 0.5789, "step": 22455 }, { "epoch": 0.26848718899078183, "grad_norm": 2.325024127960205, "learning_rate": 8.58140332091731e-06, "loss": 0.6193, "step": 22456 }, { "epoch": 0.26849914513564244, "grad_norm": 11.959413528442383, "learning_rate": 8.581268209824308e-06, "loss": 0.6221, "step": 22457 }, { "epoch": 0.2685111012805031, "grad_norm": 3.8823506832122803, "learning_rate": 8.58113309336118e-06, "loss": 0.5509, "step": 22458 }, { "epoch": 0.26852305742536375, "grad_norm": 2.4765408039093018, "learning_rate": 8.580997971528132e-06, "loss": 0.5355, "step": 22459 }, { "epoch": 0.2685350135702244, "grad_norm": 6.187368869781494, "learning_rate": 8.580862844325365e-06, "loss": 0.5686, "step": 22460 }, { "epoch": 0.26854696971508507, "grad_norm": 3.436213254928589, "learning_rate": 8.58072771175308e-06, "loss": 0.596, "step": 22461 }, { "epoch": 0.26855892585994573, "grad_norm": 5.103250503540039, "learning_rate": 8.580592573811481e-06, "loss": 0.4863, "step": 22462 }, { "epoch": 0.2685708820048064, "grad_norm": 9.087924003601074, "learning_rate": 8.580457430500771e-06, "loss": 0.6346, "step": 22463 }, { "epoch": 0.26858283814966705, "grad_norm": 48.37788772583008, "learning_rate": 8.580322281821153e-06, "loss": 0.7096, "step": 22464 }, { "epoch": 0.26859479429452765, "grad_norm": 1.8037972450256348, "learning_rate": 8.580187127772829e-06, "loss": 0.5573, "step": 22465 }, { "epoch": 0.2686067504393883, "grad_norm": 3.078051805496216, "learning_rate": 8.580051968356e-06, "loss": 0.6113, "step": 22466 }, { "epoch": 0.26861870658424897, "grad_norm": 2.1409285068511963, "learning_rate": 8.579916803570873e-06, "loss": 0.5507, "step": 22467 }, { "epoch": 0.2686306627291096, "grad_norm": 1.7688477039337158, "learning_rate": 8.579781633417647e-06, "loss": 0.5729, "step": 22468 }, { "epoch": 0.2686426188739703, "grad_norm": 20.25889778137207, "learning_rate": 8.579646457896524e-06, "loss": 0.6362, "step": 22469 }, { "epoch": 0.26865457501883094, "grad_norm": 6.165035247802734, "learning_rate": 8.57951127700771e-06, "loss": 0.5176, "step": 22470 }, { "epoch": 0.2686665311636916, "grad_norm": 2.623918056488037, "learning_rate": 8.579376090751407e-06, "loss": 0.5971, "step": 22471 }, { "epoch": 0.2686784873085522, "grad_norm": 3.2794947624206543, "learning_rate": 8.579240899127817e-06, "loss": 0.5583, "step": 22472 }, { "epoch": 0.26869044345341286, "grad_norm": 3.3399107456207275, "learning_rate": 8.579105702137141e-06, "loss": 0.5623, "step": 22473 }, { "epoch": 0.2687023995982735, "grad_norm": 1.8649754524230957, "learning_rate": 8.578970499779583e-06, "loss": 0.6558, "step": 22474 }, { "epoch": 0.2687143557431342, "grad_norm": 1.6780799627304077, "learning_rate": 8.578835292055346e-06, "loss": 0.6116, "step": 22475 }, { "epoch": 0.26872631188799484, "grad_norm": 6.841350078582764, "learning_rate": 8.578700078964634e-06, "loss": 0.581, "step": 22476 }, { "epoch": 0.2687382680328555, "grad_norm": 1.447072982788086, "learning_rate": 8.578564860507648e-06, "loss": 0.6209, "step": 22477 }, { "epoch": 0.26875022417771616, "grad_norm": 3.8889102935791016, "learning_rate": 8.578429636684592e-06, "loss": 0.6178, "step": 22478 }, { "epoch": 0.2687621803225768, "grad_norm": 5.188684940338135, "learning_rate": 8.578294407495666e-06, "loss": 0.565, "step": 22479 }, { "epoch": 0.2687741364674374, "grad_norm": 2.406223773956299, "learning_rate": 8.578159172941076e-06, "loss": 0.5319, "step": 22480 }, { "epoch": 0.2687860926122981, "grad_norm": 2.642763137817383, "learning_rate": 8.578023933021024e-06, "loss": 0.6194, "step": 22481 }, { "epoch": 0.26879804875715874, "grad_norm": 2.9443278312683105, "learning_rate": 8.577888687735711e-06, "loss": 0.5422, "step": 22482 }, { "epoch": 0.2688100049020194, "grad_norm": 2.290189504623413, "learning_rate": 8.577753437085343e-06, "loss": 0.5992, "step": 22483 }, { "epoch": 0.26882196104688005, "grad_norm": 7.323580741882324, "learning_rate": 8.577618181070121e-06, "loss": 0.5993, "step": 22484 }, { "epoch": 0.2688339171917407, "grad_norm": 1.8321624994277954, "learning_rate": 8.577482919690247e-06, "loss": 0.6048, "step": 22485 }, { "epoch": 0.26884587333660137, "grad_norm": 2.621877908706665, "learning_rate": 8.577347652945924e-06, "loss": 0.6281, "step": 22486 }, { "epoch": 0.268857829481462, "grad_norm": 3.9340667724609375, "learning_rate": 8.577212380837356e-06, "loss": 0.6973, "step": 22487 }, { "epoch": 0.26886978562632263, "grad_norm": 2.600990056991577, "learning_rate": 8.577077103364745e-06, "loss": 0.4834, "step": 22488 }, { "epoch": 0.2688817417711833, "grad_norm": 2.182380199432373, "learning_rate": 8.576941820528295e-06, "loss": 0.6915, "step": 22489 }, { "epoch": 0.26889369791604395, "grad_norm": 4.144122123718262, "learning_rate": 8.576806532328209e-06, "loss": 0.6115, "step": 22490 }, { "epoch": 0.2689056540609046, "grad_norm": 2.7620253562927246, "learning_rate": 8.576671238764687e-06, "loss": 0.5793, "step": 22491 }, { "epoch": 0.26891761020576527, "grad_norm": 3.701395034790039, "learning_rate": 8.576535939837935e-06, "loss": 0.5515, "step": 22492 }, { "epoch": 0.2689295663506259, "grad_norm": 5.79127311706543, "learning_rate": 8.576400635548154e-06, "loss": 0.5752, "step": 22493 }, { "epoch": 0.26894152249548653, "grad_norm": 4.8840227127075195, "learning_rate": 8.576265325895547e-06, "loss": 0.6309, "step": 22494 }, { "epoch": 0.2689534786403472, "grad_norm": 2.3255889415740967, "learning_rate": 8.576130010880317e-06, "loss": 0.6331, "step": 22495 }, { "epoch": 0.26896543478520785, "grad_norm": 2.4645369052886963, "learning_rate": 8.575994690502669e-06, "loss": 0.5602, "step": 22496 }, { "epoch": 0.2689773909300685, "grad_norm": 4.017687797546387, "learning_rate": 8.575859364762804e-06, "loss": 0.6629, "step": 22497 }, { "epoch": 0.26898934707492916, "grad_norm": 11.101999282836914, "learning_rate": 8.575724033660926e-06, "loss": 0.6487, "step": 22498 }, { "epoch": 0.2690013032197898, "grad_norm": 3.3201024532318115, "learning_rate": 8.575588697197234e-06, "loss": 0.5905, "step": 22499 }, { "epoch": 0.2690132593646505, "grad_norm": 8.476625442504883, "learning_rate": 8.575453355371937e-06, "loss": 0.6505, "step": 22500 }, { "epoch": 0.26902521550951114, "grad_norm": 6.730556964874268, "learning_rate": 8.575318008185233e-06, "loss": 0.5607, "step": 22501 }, { "epoch": 0.26903717165437174, "grad_norm": 6.186646461486816, "learning_rate": 8.575182655637328e-06, "loss": 0.611, "step": 22502 }, { "epoch": 0.2690491277992324, "grad_norm": 2.526021957397461, "learning_rate": 8.575047297728424e-06, "loss": 0.6411, "step": 22503 }, { "epoch": 0.26906108394409306, "grad_norm": 2.0052859783172607, "learning_rate": 8.574911934458723e-06, "loss": 0.5812, "step": 22504 }, { "epoch": 0.2690730400889537, "grad_norm": 7.909202575683594, "learning_rate": 8.57477656582843e-06, "loss": 0.6082, "step": 22505 }, { "epoch": 0.2690849962338144, "grad_norm": 4.6512041091918945, "learning_rate": 8.574641191837748e-06, "loss": 0.5834, "step": 22506 }, { "epoch": 0.26909695237867504, "grad_norm": 3.655322790145874, "learning_rate": 8.574505812486876e-06, "loss": 0.6957, "step": 22507 }, { "epoch": 0.2691089085235357, "grad_norm": 2.4520952701568604, "learning_rate": 8.57437042777602e-06, "loss": 0.7092, "step": 22508 }, { "epoch": 0.2691208646683963, "grad_norm": 3.4156575202941895, "learning_rate": 8.574235037705384e-06, "loss": 0.6225, "step": 22509 }, { "epoch": 0.26913282081325696, "grad_norm": 5.0669050216674805, "learning_rate": 8.574099642275169e-06, "loss": 0.5907, "step": 22510 }, { "epoch": 0.2691447769581176, "grad_norm": 2.809187889099121, "learning_rate": 8.57396424148558e-06, "loss": 0.6309, "step": 22511 }, { "epoch": 0.2691567331029783, "grad_norm": 2.5322391986846924, "learning_rate": 8.573828835336819e-06, "loss": 0.6556, "step": 22512 }, { "epoch": 0.26916868924783893, "grad_norm": 2.139206886291504, "learning_rate": 8.573693423829088e-06, "loss": 0.6504, "step": 22513 }, { "epoch": 0.2691806453926996, "grad_norm": 6.136883735656738, "learning_rate": 8.573558006962591e-06, "loss": 0.6276, "step": 22514 }, { "epoch": 0.26919260153756025, "grad_norm": 2.31581711769104, "learning_rate": 8.573422584737532e-06, "loss": 0.6972, "step": 22515 }, { "epoch": 0.26920455768242085, "grad_norm": 2.1048150062561035, "learning_rate": 8.573287157154111e-06, "loss": 0.5668, "step": 22516 }, { "epoch": 0.2692165138272815, "grad_norm": 11.816017150878906, "learning_rate": 8.573151724212537e-06, "loss": 0.5405, "step": 22517 }, { "epoch": 0.26922846997214217, "grad_norm": 2.239842653274536, "learning_rate": 8.573016285913006e-06, "loss": 0.6752, "step": 22518 }, { "epoch": 0.26924042611700283, "grad_norm": 1.8779059648513794, "learning_rate": 8.572880842255726e-06, "loss": 0.5126, "step": 22519 }, { "epoch": 0.2692523822618635, "grad_norm": 5.179654121398926, "learning_rate": 8.572745393240897e-06, "loss": 0.682, "step": 22520 }, { "epoch": 0.26926433840672415, "grad_norm": 2.098329782485962, "learning_rate": 8.572609938868726e-06, "loss": 0.6062, "step": 22521 }, { "epoch": 0.2692762945515848, "grad_norm": 2.44177508354187, "learning_rate": 8.572474479139411e-06, "loss": 0.6883, "step": 22522 }, { "epoch": 0.26928825069644546, "grad_norm": 3.1303670406341553, "learning_rate": 8.572339014053158e-06, "loss": 0.7166, "step": 22523 }, { "epoch": 0.26930020684130607, "grad_norm": 2.6554183959960938, "learning_rate": 8.572203543610173e-06, "loss": 0.6676, "step": 22524 }, { "epoch": 0.2693121629861667, "grad_norm": 3.774369955062866, "learning_rate": 8.572068067810652e-06, "loss": 0.5621, "step": 22525 }, { "epoch": 0.2693241191310274, "grad_norm": 3.3614039421081543, "learning_rate": 8.571932586654805e-06, "loss": 0.558, "step": 22526 }, { "epoch": 0.26933607527588804, "grad_norm": 3.823746681213379, "learning_rate": 8.571797100142832e-06, "loss": 0.6021, "step": 22527 }, { "epoch": 0.2693480314207487, "grad_norm": 2.7195498943328857, "learning_rate": 8.571661608274937e-06, "loss": 0.491, "step": 22528 }, { "epoch": 0.26935998756560936, "grad_norm": 2.158743143081665, "learning_rate": 8.571526111051322e-06, "loss": 0.4954, "step": 22529 }, { "epoch": 0.26937194371047, "grad_norm": 11.80801010131836, "learning_rate": 8.571390608472189e-06, "loss": 0.6591, "step": 22530 }, { "epoch": 0.2693838998553306, "grad_norm": 1.5491962432861328, "learning_rate": 8.571255100537746e-06, "loss": 0.5897, "step": 22531 }, { "epoch": 0.2693958560001913, "grad_norm": 3.5602176189422607, "learning_rate": 8.571119587248193e-06, "loss": 0.5917, "step": 22532 }, { "epoch": 0.26940781214505194, "grad_norm": 3.1385788917541504, "learning_rate": 8.570984068603732e-06, "loss": 0.6197, "step": 22533 }, { "epoch": 0.2694197682899126, "grad_norm": 1.8675346374511719, "learning_rate": 8.570848544604569e-06, "loss": 0.5584, "step": 22534 }, { "epoch": 0.26943172443477326, "grad_norm": 2.869072437286377, "learning_rate": 8.570713015250904e-06, "loss": 0.6474, "step": 22535 }, { "epoch": 0.2694436805796339, "grad_norm": 2.858703136444092, "learning_rate": 8.570577480542946e-06, "loss": 0.6391, "step": 22536 }, { "epoch": 0.2694556367244946, "grad_norm": 3.1518514156341553, "learning_rate": 8.57044194048089e-06, "loss": 0.6825, "step": 22537 }, { "epoch": 0.26946759286935523, "grad_norm": 2.3093814849853516, "learning_rate": 8.570306395064947e-06, "loss": 0.5607, "step": 22538 }, { "epoch": 0.26947954901421584, "grad_norm": 7.827247142791748, "learning_rate": 8.570170844295314e-06, "loss": 0.6521, "step": 22539 }, { "epoch": 0.2694915051590765, "grad_norm": 7.725436687469482, "learning_rate": 8.570035288172198e-06, "loss": 0.5347, "step": 22540 }, { "epoch": 0.26950346130393715, "grad_norm": 3.5950326919555664, "learning_rate": 8.569899726695803e-06, "loss": 0.6431, "step": 22541 }, { "epoch": 0.2695154174487978, "grad_norm": 3.5098884105682373, "learning_rate": 8.569764159866329e-06, "loss": 0.5824, "step": 22542 }, { "epoch": 0.26952737359365847, "grad_norm": 1.6836601495742798, "learning_rate": 8.569628587683983e-06, "loss": 0.5701, "step": 22543 }, { "epoch": 0.26953932973851913, "grad_norm": 2.497931480407715, "learning_rate": 8.569493010148963e-06, "loss": 0.6226, "step": 22544 }, { "epoch": 0.2695512858833798, "grad_norm": 5.57146692276001, "learning_rate": 8.569357427261478e-06, "loss": 0.57, "step": 22545 }, { "epoch": 0.2695632420282404, "grad_norm": 2.1526660919189453, "learning_rate": 8.569221839021728e-06, "loss": 0.5679, "step": 22546 }, { "epoch": 0.26957519817310105, "grad_norm": 4.09801721572876, "learning_rate": 8.569086245429916e-06, "loss": 0.5511, "step": 22547 }, { "epoch": 0.2695871543179617, "grad_norm": 4.744704723358154, "learning_rate": 8.568950646486249e-06, "loss": 0.5238, "step": 22548 }, { "epoch": 0.26959911046282237, "grad_norm": 2.2004952430725098, "learning_rate": 8.568815042190926e-06, "loss": 0.6043, "step": 22549 }, { "epoch": 0.269611066607683, "grad_norm": 11.982404708862305, "learning_rate": 8.568679432544153e-06, "loss": 0.6482, "step": 22550 }, { "epoch": 0.2696230227525437, "grad_norm": 2.4433696269989014, "learning_rate": 8.568543817546133e-06, "loss": 0.5734, "step": 22551 }, { "epoch": 0.26963497889740434, "grad_norm": 2.344254970550537, "learning_rate": 8.568408197197067e-06, "loss": 0.5814, "step": 22552 }, { "epoch": 0.26964693504226495, "grad_norm": 3.3242290019989014, "learning_rate": 8.568272571497162e-06, "loss": 0.7651, "step": 22553 }, { "epoch": 0.2696588911871256, "grad_norm": 3.776811361312866, "learning_rate": 8.56813694044662e-06, "loss": 0.6273, "step": 22554 }, { "epoch": 0.26967084733198626, "grad_norm": 3.617990016937256, "learning_rate": 8.56800130404564e-06, "loss": 0.6192, "step": 22555 }, { "epoch": 0.2696828034768469, "grad_norm": 2.0166401863098145, "learning_rate": 8.567865662294435e-06, "loss": 0.6348, "step": 22556 }, { "epoch": 0.2696947596217076, "grad_norm": 1.9611074924468994, "learning_rate": 8.5677300151932e-06, "loss": 0.6594, "step": 22557 }, { "epoch": 0.26970671576656824, "grad_norm": 5.990206241607666, "learning_rate": 8.56759436274214e-06, "loss": 0.6172, "step": 22558 }, { "epoch": 0.2697186719114289, "grad_norm": 5.938731670379639, "learning_rate": 8.56745870494146e-06, "loss": 0.6472, "step": 22559 }, { "epoch": 0.26973062805628956, "grad_norm": 6.631576061248779, "learning_rate": 8.567323041791365e-06, "loss": 0.5175, "step": 22560 }, { "epoch": 0.26974258420115016, "grad_norm": 2.466547966003418, "learning_rate": 8.567187373292056e-06, "loss": 0.5931, "step": 22561 }, { "epoch": 0.2697545403460108, "grad_norm": 2.327923059463501, "learning_rate": 8.567051699443735e-06, "loss": 0.5863, "step": 22562 }, { "epoch": 0.2697664964908715, "grad_norm": 7.1589579582214355, "learning_rate": 8.566916020246608e-06, "loss": 0.6195, "step": 22563 }, { "epoch": 0.26977845263573214, "grad_norm": 3.9049272537231445, "learning_rate": 8.566780335700878e-06, "loss": 0.6127, "step": 22564 }, { "epoch": 0.2697904087805928, "grad_norm": 4.453236103057861, "learning_rate": 8.56664464580675e-06, "loss": 0.6424, "step": 22565 }, { "epoch": 0.26980236492545345, "grad_norm": 2.493675947189331, "learning_rate": 8.566508950564423e-06, "loss": 0.6168, "step": 22566 }, { "epoch": 0.2698143210703141, "grad_norm": 5.933764457702637, "learning_rate": 8.566373249974105e-06, "loss": 0.5918, "step": 22567 }, { "epoch": 0.2698262772151747, "grad_norm": 2.913186550140381, "learning_rate": 8.566237544035996e-06, "loss": 0.5937, "step": 22568 }, { "epoch": 0.2698382333600354, "grad_norm": 1.9167792797088623, "learning_rate": 8.566101832750302e-06, "loss": 0.5763, "step": 22569 }, { "epoch": 0.26985018950489603, "grad_norm": 4.14937686920166, "learning_rate": 8.565966116117225e-06, "loss": 0.619, "step": 22570 }, { "epoch": 0.2698621456497567, "grad_norm": 2.789886951446533, "learning_rate": 8.565830394136969e-06, "loss": 0.5926, "step": 22571 }, { "epoch": 0.26987410179461735, "grad_norm": 1.628197431564331, "learning_rate": 8.565694666809739e-06, "loss": 0.6129, "step": 22572 }, { "epoch": 0.269886057939478, "grad_norm": 18.26226806640625, "learning_rate": 8.565558934135735e-06, "loss": 0.5209, "step": 22573 }, { "epoch": 0.26989801408433867, "grad_norm": 3.9076433181762695, "learning_rate": 8.565423196115165e-06, "loss": 0.5882, "step": 22574 }, { "epoch": 0.26990997022919927, "grad_norm": 5.4364776611328125, "learning_rate": 8.56528745274823e-06, "loss": 0.6051, "step": 22575 }, { "epoch": 0.26992192637405993, "grad_norm": 3.6614301204681396, "learning_rate": 8.565151704035132e-06, "loss": 0.6281, "step": 22576 }, { "epoch": 0.2699338825189206, "grad_norm": 2.607764720916748, "learning_rate": 8.56501594997608e-06, "loss": 0.6823, "step": 22577 }, { "epoch": 0.26994583866378125, "grad_norm": 4.031237602233887, "learning_rate": 8.56488019057127e-06, "loss": 0.5354, "step": 22578 }, { "epoch": 0.2699577948086419, "grad_norm": 3.3967268466949463, "learning_rate": 8.564744425820911e-06, "loss": 0.6751, "step": 22579 }, { "epoch": 0.26996975095350256, "grad_norm": 2.824542760848999, "learning_rate": 8.564608655725206e-06, "loss": 0.6115, "step": 22580 }, { "epoch": 0.2699817070983632, "grad_norm": 3.016864776611328, "learning_rate": 8.564472880284356e-06, "loss": 0.5931, "step": 22581 }, { "epoch": 0.2699936632432239, "grad_norm": 1.8535327911376953, "learning_rate": 8.564337099498568e-06, "loss": 0.7039, "step": 22582 }, { "epoch": 0.2700056193880845, "grad_norm": 2.0729241371154785, "learning_rate": 8.564201313368041e-06, "loss": 0.5778, "step": 22583 }, { "epoch": 0.27001757553294514, "grad_norm": 2.3315927982330322, "learning_rate": 8.564065521892983e-06, "loss": 0.5944, "step": 22584 }, { "epoch": 0.2700295316778058, "grad_norm": 16.318029403686523, "learning_rate": 8.563929725073597e-06, "loss": 0.5673, "step": 22585 }, { "epoch": 0.27004148782266646, "grad_norm": 2.3406569957733154, "learning_rate": 8.563793922910086e-06, "loss": 0.6424, "step": 22586 }, { "epoch": 0.2700534439675271, "grad_norm": 3.338650703430176, "learning_rate": 8.563658115402651e-06, "loss": 0.5755, "step": 22587 }, { "epoch": 0.2700654001123878, "grad_norm": 2.234541654586792, "learning_rate": 8.5635223025515e-06, "loss": 0.5633, "step": 22588 }, { "epoch": 0.27007735625724844, "grad_norm": 4.90207052230835, "learning_rate": 8.563386484356834e-06, "loss": 0.5894, "step": 22589 }, { "epoch": 0.27008931240210904, "grad_norm": 11.78229808807373, "learning_rate": 8.563250660818856e-06, "loss": 0.5388, "step": 22590 }, { "epoch": 0.2701012685469697, "grad_norm": 2.536059856414795, "learning_rate": 8.563114831937773e-06, "loss": 0.6039, "step": 22591 }, { "epoch": 0.27011322469183036, "grad_norm": 2.7295942306518555, "learning_rate": 8.562978997713787e-06, "loss": 0.6732, "step": 22592 }, { "epoch": 0.270125180836691, "grad_norm": 4.1275506019592285, "learning_rate": 8.562843158147099e-06, "loss": 0.6137, "step": 22593 }, { "epoch": 0.2701371369815517, "grad_norm": 2.16853404045105, "learning_rate": 8.562707313237916e-06, "loss": 0.6642, "step": 22594 }, { "epoch": 0.27014909312641233, "grad_norm": 1.8400278091430664, "learning_rate": 8.562571462986443e-06, "loss": 0.6213, "step": 22595 }, { "epoch": 0.270161049271273, "grad_norm": 3.0776054859161377, "learning_rate": 8.562435607392878e-06, "loss": 0.7025, "step": 22596 }, { "epoch": 0.27017300541613365, "grad_norm": 3.36858868598938, "learning_rate": 8.562299746457429e-06, "loss": 0.5849, "step": 22597 }, { "epoch": 0.27018496156099425, "grad_norm": 9.000177383422852, "learning_rate": 8.5621638801803e-06, "loss": 0.5152, "step": 22598 }, { "epoch": 0.2701969177058549, "grad_norm": 2.4834587574005127, "learning_rate": 8.562028008561694e-06, "loss": 0.6245, "step": 22599 }, { "epoch": 0.27020887385071557, "grad_norm": 2.723515748977661, "learning_rate": 8.561892131601811e-06, "loss": 0.5645, "step": 22600 }, { "epoch": 0.27022082999557623, "grad_norm": 24.228059768676758, "learning_rate": 8.561756249300861e-06, "loss": 0.622, "step": 22601 }, { "epoch": 0.2702327861404369, "grad_norm": 3.372180223464966, "learning_rate": 8.561620361659044e-06, "loss": 0.5637, "step": 22602 }, { "epoch": 0.27024474228529755, "grad_norm": 2.751338005065918, "learning_rate": 8.561484468676566e-06, "loss": 0.709, "step": 22603 }, { "epoch": 0.2702566984301582, "grad_norm": 2.7973461151123047, "learning_rate": 8.561348570353628e-06, "loss": 0.7778, "step": 22604 }, { "epoch": 0.2702686545750188, "grad_norm": 2.8440282344818115, "learning_rate": 8.561212666690435e-06, "loss": 0.7024, "step": 22605 }, { "epoch": 0.27028061071987947, "grad_norm": 2.793276786804199, "learning_rate": 8.561076757687193e-06, "loss": 0.573, "step": 22606 }, { "epoch": 0.2702925668647401, "grad_norm": 8.10882568359375, "learning_rate": 8.560940843344101e-06, "loss": 0.5637, "step": 22607 }, { "epoch": 0.2703045230096008, "grad_norm": 3.0045406818389893, "learning_rate": 8.560804923661367e-06, "loss": 0.5813, "step": 22608 }, { "epoch": 0.27031647915446144, "grad_norm": 2.6603658199310303, "learning_rate": 8.560668998639192e-06, "loss": 0.4852, "step": 22609 }, { "epoch": 0.2703284352993221, "grad_norm": 2.7814173698425293, "learning_rate": 8.560533068277783e-06, "loss": 0.6, "step": 22610 }, { "epoch": 0.27034039144418276, "grad_norm": 4.863980770111084, "learning_rate": 8.56039713257734e-06, "loss": 0.6322, "step": 22611 }, { "epoch": 0.27035234758904336, "grad_norm": 3.5036537647247314, "learning_rate": 8.560261191538071e-06, "loss": 0.662, "step": 22612 }, { "epoch": 0.270364303733904, "grad_norm": 1.5436373949050903, "learning_rate": 8.560125245160176e-06, "loss": 0.575, "step": 22613 }, { "epoch": 0.2703762598787647, "grad_norm": 8.781298637390137, "learning_rate": 8.559989293443862e-06, "loss": 0.6574, "step": 22614 }, { "epoch": 0.27038821602362534, "grad_norm": 2.693188190460205, "learning_rate": 8.55985333638933e-06, "loss": 0.6063, "step": 22615 }, { "epoch": 0.270400172168486, "grad_norm": 5.628568172454834, "learning_rate": 8.559717373996786e-06, "loss": 0.5965, "step": 22616 }, { "epoch": 0.27041212831334666, "grad_norm": 3.642920732498169, "learning_rate": 8.559581406266432e-06, "loss": 0.6085, "step": 22617 }, { "epoch": 0.2704240844582073, "grad_norm": 2.003582239151001, "learning_rate": 8.559445433198476e-06, "loss": 0.5987, "step": 22618 }, { "epoch": 0.270436040603068, "grad_norm": 4.039196968078613, "learning_rate": 8.559309454793117e-06, "loss": 0.6424, "step": 22619 }, { "epoch": 0.2704479967479286, "grad_norm": 3.507138967514038, "learning_rate": 8.559173471050562e-06, "loss": 0.5388, "step": 22620 }, { "epoch": 0.27045995289278923, "grad_norm": 3.9900500774383545, "learning_rate": 8.55903748197101e-06, "loss": 0.608, "step": 22621 }, { "epoch": 0.2704719090376499, "grad_norm": 8.301724433898926, "learning_rate": 8.558901487554671e-06, "loss": 0.5747, "step": 22622 }, { "epoch": 0.27048386518251055, "grad_norm": 2.6710281372070312, "learning_rate": 8.558765487801747e-06, "loss": 0.6169, "step": 22623 }, { "epoch": 0.2704958213273712, "grad_norm": 4.713459014892578, "learning_rate": 8.558629482712442e-06, "loss": 0.5746, "step": 22624 }, { "epoch": 0.27050777747223187, "grad_norm": 8.403581619262695, "learning_rate": 8.558493472286959e-06, "loss": 0.5665, "step": 22625 }, { "epoch": 0.27051973361709253, "grad_norm": 3.0557217597961426, "learning_rate": 8.558357456525503e-06, "loss": 0.571, "step": 22626 }, { "epoch": 0.27053168976195313, "grad_norm": 1.77603018283844, "learning_rate": 8.558221435428276e-06, "loss": 0.551, "step": 22627 }, { "epoch": 0.2705436459068138, "grad_norm": 3.279169797897339, "learning_rate": 8.558085408995484e-06, "loss": 0.595, "step": 22628 }, { "epoch": 0.27055560205167445, "grad_norm": 6.532366752624512, "learning_rate": 8.55794937722733e-06, "loss": 0.6138, "step": 22629 }, { "epoch": 0.2705675581965351, "grad_norm": 1.9451477527618408, "learning_rate": 8.557813340124018e-06, "loss": 0.5846, "step": 22630 }, { "epoch": 0.27057951434139577, "grad_norm": 10.07235336303711, "learning_rate": 8.557677297685753e-06, "loss": 0.6822, "step": 22631 }, { "epoch": 0.2705914704862564, "grad_norm": 5.254629611968994, "learning_rate": 8.557541249912737e-06, "loss": 0.6855, "step": 22632 }, { "epoch": 0.2706034266311171, "grad_norm": 2.947410821914673, "learning_rate": 8.557405196805177e-06, "loss": 0.6068, "step": 22633 }, { "epoch": 0.2706153827759777, "grad_norm": 4.007492542266846, "learning_rate": 8.557269138363275e-06, "loss": 0.6703, "step": 22634 }, { "epoch": 0.27062733892083835, "grad_norm": 2.254993200302124, "learning_rate": 8.557133074587234e-06, "loss": 0.6121, "step": 22635 }, { "epoch": 0.270639295065699, "grad_norm": 8.918603897094727, "learning_rate": 8.55699700547726e-06, "loss": 0.5502, "step": 22636 }, { "epoch": 0.27065125121055966, "grad_norm": 6.8658881187438965, "learning_rate": 8.556860931033555e-06, "loss": 0.6511, "step": 22637 }, { "epoch": 0.2706632073554203, "grad_norm": 4.054788589477539, "learning_rate": 8.556724851256326e-06, "loss": 0.6176, "step": 22638 }, { "epoch": 0.270675163500281, "grad_norm": 2.0745041370391846, "learning_rate": 8.556588766145776e-06, "loss": 0.6089, "step": 22639 }, { "epoch": 0.27068711964514164, "grad_norm": 2.7902517318725586, "learning_rate": 8.556452675702108e-06, "loss": 0.7082, "step": 22640 }, { "epoch": 0.2706990757900023, "grad_norm": 1.7835744619369507, "learning_rate": 8.556316579925526e-06, "loss": 0.6157, "step": 22641 }, { "epoch": 0.2707110319348629, "grad_norm": 2.145453453063965, "learning_rate": 8.556180478816234e-06, "loss": 0.7028, "step": 22642 }, { "epoch": 0.27072298807972356, "grad_norm": 1.7845138311386108, "learning_rate": 8.556044372374438e-06, "loss": 0.5728, "step": 22643 }, { "epoch": 0.2707349442245842, "grad_norm": 5.642791271209717, "learning_rate": 8.555908260600341e-06, "loss": 0.6599, "step": 22644 }, { "epoch": 0.2707469003694449, "grad_norm": 7.814757347106934, "learning_rate": 8.555772143494146e-06, "loss": 0.6248, "step": 22645 }, { "epoch": 0.27075885651430553, "grad_norm": 5.053218841552734, "learning_rate": 8.555636021056057e-06, "loss": 0.5966, "step": 22646 }, { "epoch": 0.2707708126591662, "grad_norm": 9.121907234191895, "learning_rate": 8.555499893286281e-06, "loss": 0.5678, "step": 22647 }, { "epoch": 0.27078276880402685, "grad_norm": 4.443605422973633, "learning_rate": 8.555363760185021e-06, "loss": 0.7202, "step": 22648 }, { "epoch": 0.27079472494888746, "grad_norm": 2.1773879528045654, "learning_rate": 8.555227621752478e-06, "loss": 0.5018, "step": 22649 }, { "epoch": 0.2708066810937481, "grad_norm": 2.1440701484680176, "learning_rate": 8.55509147798886e-06, "loss": 0.6296, "step": 22650 }, { "epoch": 0.2708186372386088, "grad_norm": 5.9887800216674805, "learning_rate": 8.55495532889437e-06, "loss": 0.6242, "step": 22651 }, { "epoch": 0.27083059338346943, "grad_norm": 2.9235970973968506, "learning_rate": 8.55481917446921e-06, "loss": 0.496, "step": 22652 }, { "epoch": 0.2708425495283301, "grad_norm": 2.0227439403533936, "learning_rate": 8.554683014713587e-06, "loss": 0.5685, "step": 22653 }, { "epoch": 0.27085450567319075, "grad_norm": 2.04689359664917, "learning_rate": 8.554546849627706e-06, "loss": 0.5706, "step": 22654 }, { "epoch": 0.2708664618180514, "grad_norm": 2.364009380340576, "learning_rate": 8.554410679211768e-06, "loss": 0.6614, "step": 22655 }, { "epoch": 0.27087841796291207, "grad_norm": 4.024877548217773, "learning_rate": 8.554274503465978e-06, "loss": 0.5934, "step": 22656 }, { "epoch": 0.27089037410777267, "grad_norm": 2.479797601699829, "learning_rate": 8.554138322390542e-06, "loss": 0.5799, "step": 22657 }, { "epoch": 0.2709023302526333, "grad_norm": 3.592262029647827, "learning_rate": 8.554002135985661e-06, "loss": 0.6405, "step": 22658 }, { "epoch": 0.270914286397494, "grad_norm": 1.8197976350784302, "learning_rate": 8.553865944251542e-06, "loss": 0.4628, "step": 22659 }, { "epoch": 0.27092624254235464, "grad_norm": 2.5735108852386475, "learning_rate": 8.553729747188388e-06, "loss": 0.5548, "step": 22660 }, { "epoch": 0.2709381986872153, "grad_norm": 5.589890956878662, "learning_rate": 8.553593544796406e-06, "loss": 0.5412, "step": 22661 }, { "epoch": 0.27095015483207596, "grad_norm": 3.4038500785827637, "learning_rate": 8.553457337075795e-06, "loss": 0.5178, "step": 22662 }, { "epoch": 0.2709621109769366, "grad_norm": 1.7930070161819458, "learning_rate": 8.553321124026763e-06, "loss": 0.5688, "step": 22663 }, { "epoch": 0.2709740671217972, "grad_norm": 1.8352817296981812, "learning_rate": 8.553184905649514e-06, "loss": 0.6317, "step": 22664 }, { "epoch": 0.2709860232666579, "grad_norm": 3.1604597568511963, "learning_rate": 8.553048681944251e-06, "loss": 0.6397, "step": 22665 }, { "epoch": 0.27099797941151854, "grad_norm": 2.4807488918304443, "learning_rate": 8.55291245291118e-06, "loss": 0.5634, "step": 22666 }, { "epoch": 0.2710099355563792, "grad_norm": 5.48319149017334, "learning_rate": 8.552776218550502e-06, "loss": 0.5538, "step": 22667 }, { "epoch": 0.27102189170123986, "grad_norm": 4.321256160736084, "learning_rate": 8.552639978862424e-06, "loss": 0.522, "step": 22668 }, { "epoch": 0.2710338478461005, "grad_norm": 2.663424253463745, "learning_rate": 8.552503733847151e-06, "loss": 0.6262, "step": 22669 }, { "epoch": 0.2710458039909612, "grad_norm": 3.087292432785034, "learning_rate": 8.552367483504885e-06, "loss": 0.6322, "step": 22670 }, { "epoch": 0.2710577601358218, "grad_norm": 5.03840446472168, "learning_rate": 8.552231227835833e-06, "loss": 0.6941, "step": 22671 }, { "epoch": 0.27106971628068244, "grad_norm": 3.573758363723755, "learning_rate": 8.552094966840195e-06, "loss": 0.6281, "step": 22672 }, { "epoch": 0.2710816724255431, "grad_norm": 2.634289264678955, "learning_rate": 8.55195870051818e-06, "loss": 0.5772, "step": 22673 }, { "epoch": 0.27109362857040376, "grad_norm": 6.6521077156066895, "learning_rate": 8.55182242886999e-06, "loss": 0.6157, "step": 22674 }, { "epoch": 0.2711055847152644, "grad_norm": 13.480046272277832, "learning_rate": 8.551686151895828e-06, "loss": 0.6218, "step": 22675 }, { "epoch": 0.2711175408601251, "grad_norm": 5.038514137268066, "learning_rate": 8.551549869595901e-06, "loss": 0.706, "step": 22676 }, { "epoch": 0.27112949700498573, "grad_norm": 3.0253477096557617, "learning_rate": 8.551413581970413e-06, "loss": 0.5434, "step": 22677 }, { "epoch": 0.2711414531498464, "grad_norm": 2.0470967292785645, "learning_rate": 8.551277289019567e-06, "loss": 0.643, "step": 22678 }, { "epoch": 0.271153409294707, "grad_norm": 1.9036282300949097, "learning_rate": 8.551140990743569e-06, "loss": 0.6487, "step": 22679 }, { "epoch": 0.27116536543956765, "grad_norm": 2.946824073791504, "learning_rate": 8.551004687142621e-06, "loss": 0.5557, "step": 22680 }, { "epoch": 0.2711773215844283, "grad_norm": 8.108216285705566, "learning_rate": 8.55086837821693e-06, "loss": 0.6831, "step": 22681 }, { "epoch": 0.27118927772928897, "grad_norm": 2.8282277584075928, "learning_rate": 8.550732063966698e-06, "loss": 0.5752, "step": 22682 }, { "epoch": 0.2712012338741496, "grad_norm": 1.9981439113616943, "learning_rate": 8.550595744392133e-06, "loss": 0.5661, "step": 22683 }, { "epoch": 0.2712131900190103, "grad_norm": 3.1186413764953613, "learning_rate": 8.550459419493435e-06, "loss": 0.5794, "step": 22684 }, { "epoch": 0.27122514616387094, "grad_norm": 2.0424957275390625, "learning_rate": 8.550323089270811e-06, "loss": 0.6825, "step": 22685 }, { "epoch": 0.27123710230873155, "grad_norm": 3.4093456268310547, "learning_rate": 8.550186753724465e-06, "loss": 0.6084, "step": 22686 }, { "epoch": 0.2712490584535922, "grad_norm": 2.155902862548828, "learning_rate": 8.550050412854602e-06, "loss": 0.5676, "step": 22687 }, { "epoch": 0.27126101459845287, "grad_norm": 1.9288009405136108, "learning_rate": 8.549914066661424e-06, "loss": 0.7622, "step": 22688 }, { "epoch": 0.2712729707433135, "grad_norm": 2.8801801204681396, "learning_rate": 8.54977771514514e-06, "loss": 0.5952, "step": 22689 }, { "epoch": 0.2712849268881742, "grad_norm": 2.199258327484131, "learning_rate": 8.549641358305949e-06, "loss": 0.6235, "step": 22690 }, { "epoch": 0.27129688303303484, "grad_norm": 5.454029083251953, "learning_rate": 8.54950499614406e-06, "loss": 0.6603, "step": 22691 }, { "epoch": 0.2713088391778955, "grad_norm": 2.2830896377563477, "learning_rate": 8.549368628659675e-06, "loss": 0.5468, "step": 22692 }, { "epoch": 0.2713207953227561, "grad_norm": 2.81396484375, "learning_rate": 8.549232255852999e-06, "loss": 0.6753, "step": 22693 }, { "epoch": 0.27133275146761676, "grad_norm": 1.946619987487793, "learning_rate": 8.549095877724236e-06, "loss": 0.502, "step": 22694 }, { "epoch": 0.2713447076124774, "grad_norm": 3.0988121032714844, "learning_rate": 8.548959494273591e-06, "loss": 0.6711, "step": 22695 }, { "epoch": 0.2713566637573381, "grad_norm": 2.098487615585327, "learning_rate": 8.548823105501269e-06, "loss": 0.527, "step": 22696 }, { "epoch": 0.27136861990219874, "grad_norm": 4.349335193634033, "learning_rate": 8.548686711407475e-06, "loss": 0.6245, "step": 22697 }, { "epoch": 0.2713805760470594, "grad_norm": 4.4571309089660645, "learning_rate": 8.548550311992412e-06, "loss": 0.7153, "step": 22698 }, { "epoch": 0.27139253219192006, "grad_norm": 3.106130838394165, "learning_rate": 8.548413907256285e-06, "loss": 0.6164, "step": 22699 }, { "epoch": 0.2714044883367807, "grad_norm": 2.152653455734253, "learning_rate": 8.5482774971993e-06, "loss": 0.5318, "step": 22700 }, { "epoch": 0.2714164444816413, "grad_norm": 2.6537060737609863, "learning_rate": 8.548141081821659e-06, "loss": 0.6224, "step": 22701 }, { "epoch": 0.271428400626502, "grad_norm": 4.074338912963867, "learning_rate": 8.548004661123569e-06, "loss": 0.597, "step": 22702 }, { "epoch": 0.27144035677136263, "grad_norm": 2.2359507083892822, "learning_rate": 8.547868235105232e-06, "loss": 0.6743, "step": 22703 }, { "epoch": 0.2714523129162233, "grad_norm": 1.583713412284851, "learning_rate": 8.547731803766854e-06, "loss": 0.6005, "step": 22704 }, { "epoch": 0.27146426906108395, "grad_norm": 2.784242630004883, "learning_rate": 8.54759536710864e-06, "loss": 0.4771, "step": 22705 }, { "epoch": 0.2714762252059446, "grad_norm": 6.470542907714844, "learning_rate": 8.547458925130795e-06, "loss": 0.5573, "step": 22706 }, { "epoch": 0.27148818135080527, "grad_norm": 2.8141794204711914, "learning_rate": 8.547322477833522e-06, "loss": 0.5733, "step": 22707 }, { "epoch": 0.27150013749566587, "grad_norm": 4.235090255737305, "learning_rate": 8.547186025217026e-06, "loss": 0.5751, "step": 22708 }, { "epoch": 0.27151209364052653, "grad_norm": 7.385677814483643, "learning_rate": 8.547049567281513e-06, "loss": 0.5249, "step": 22709 }, { "epoch": 0.2715240497853872, "grad_norm": 2.415487766265869, "learning_rate": 8.546913104027186e-06, "loss": 0.5959, "step": 22710 }, { "epoch": 0.27153600593024785, "grad_norm": 4.060417652130127, "learning_rate": 8.54677663545425e-06, "loss": 0.6823, "step": 22711 }, { "epoch": 0.2715479620751085, "grad_norm": 5.413696765899658, "learning_rate": 8.546640161562908e-06, "loss": 0.5547, "step": 22712 }, { "epoch": 0.27155991821996917, "grad_norm": 2.9070944786071777, "learning_rate": 8.54650368235337e-06, "loss": 0.5985, "step": 22713 }, { "epoch": 0.2715718743648298, "grad_norm": 5.311195373535156, "learning_rate": 8.546367197825835e-06, "loss": 0.5692, "step": 22714 }, { "epoch": 0.2715838305096905, "grad_norm": 3.6361846923828125, "learning_rate": 8.54623070798051e-06, "loss": 0.6403, "step": 22715 }, { "epoch": 0.2715957866545511, "grad_norm": 5.867760181427002, "learning_rate": 8.5460942128176e-06, "loss": 0.6419, "step": 22716 }, { "epoch": 0.27160774279941174, "grad_norm": 12.51755142211914, "learning_rate": 8.545957712337309e-06, "loss": 0.7125, "step": 22717 }, { "epoch": 0.2716196989442724, "grad_norm": 3.1907906532287598, "learning_rate": 8.54582120653984e-06, "loss": 0.6391, "step": 22718 }, { "epoch": 0.27163165508913306, "grad_norm": 3.432461977005005, "learning_rate": 8.545684695425403e-06, "loss": 0.5757, "step": 22719 }, { "epoch": 0.2716436112339937, "grad_norm": 3.946603298187256, "learning_rate": 8.545548178994197e-06, "loss": 0.5571, "step": 22720 }, { "epoch": 0.2716555673788544, "grad_norm": 2.9582836627960205, "learning_rate": 8.54541165724643e-06, "loss": 0.6604, "step": 22721 }, { "epoch": 0.27166752352371504, "grad_norm": 3.472217321395874, "learning_rate": 8.545275130182304e-06, "loss": 0.6128, "step": 22722 }, { "epoch": 0.27167947966857564, "grad_norm": 3.9265739917755127, "learning_rate": 8.545138597802027e-06, "loss": 0.7037, "step": 22723 }, { "epoch": 0.2716914358134363, "grad_norm": 4.526739597320557, "learning_rate": 8.545002060105801e-06, "loss": 0.6317, "step": 22724 }, { "epoch": 0.27170339195829696, "grad_norm": 2.879208564758301, "learning_rate": 8.544865517093832e-06, "loss": 0.6715, "step": 22725 }, { "epoch": 0.2717153481031576, "grad_norm": 2.6598005294799805, "learning_rate": 8.544728968766325e-06, "loss": 0.5979, "step": 22726 }, { "epoch": 0.2717273042480183, "grad_norm": 4.356904983520508, "learning_rate": 8.544592415123483e-06, "loss": 0.5979, "step": 22727 }, { "epoch": 0.27173926039287893, "grad_norm": 1.977486252784729, "learning_rate": 8.544455856165512e-06, "loss": 0.5753, "step": 22728 }, { "epoch": 0.2717512165377396, "grad_norm": 4.477833271026611, "learning_rate": 8.544319291892618e-06, "loss": 0.6175, "step": 22729 }, { "epoch": 0.2717631726826002, "grad_norm": 3.5414814949035645, "learning_rate": 8.544182722305006e-06, "loss": 0.6537, "step": 22730 }, { "epoch": 0.27177512882746085, "grad_norm": 3.6524322032928467, "learning_rate": 8.544046147402876e-06, "loss": 0.5745, "step": 22731 }, { "epoch": 0.2717870849723215, "grad_norm": 3.127873659133911, "learning_rate": 8.543909567186439e-06, "loss": 0.5411, "step": 22732 }, { "epoch": 0.27179904111718217, "grad_norm": 3.913498640060425, "learning_rate": 8.543772981655896e-06, "loss": 0.6184, "step": 22733 }, { "epoch": 0.27181099726204283, "grad_norm": 2.4281792640686035, "learning_rate": 8.543636390811453e-06, "loss": 0.6332, "step": 22734 }, { "epoch": 0.2718229534069035, "grad_norm": 89.02975463867188, "learning_rate": 8.543499794653313e-06, "loss": 0.5813, "step": 22735 }, { "epoch": 0.27183490955176415, "grad_norm": 2.4539566040039062, "learning_rate": 8.543363193181687e-06, "loss": 0.6333, "step": 22736 }, { "epoch": 0.2718468656966248, "grad_norm": 4.794932842254639, "learning_rate": 8.543226586396772e-06, "loss": 0.5506, "step": 22737 }, { "epoch": 0.2718588218414854, "grad_norm": 2.470691204071045, "learning_rate": 8.543089974298775e-06, "loss": 0.5636, "step": 22738 }, { "epoch": 0.27187077798634607, "grad_norm": 2.6145284175872803, "learning_rate": 8.542953356887904e-06, "loss": 0.6285, "step": 22739 }, { "epoch": 0.2718827341312067, "grad_norm": 2.18221378326416, "learning_rate": 8.542816734164361e-06, "loss": 0.5691, "step": 22740 }, { "epoch": 0.2718946902760674, "grad_norm": 1.8094406127929688, "learning_rate": 8.542680106128352e-06, "loss": 0.6331, "step": 22741 }, { "epoch": 0.27190664642092804, "grad_norm": 2.951361894607544, "learning_rate": 8.542543472780082e-06, "loss": 0.5169, "step": 22742 }, { "epoch": 0.2719186025657887, "grad_norm": 3.8062093257904053, "learning_rate": 8.542406834119755e-06, "loss": 0.5622, "step": 22743 }, { "epoch": 0.27193055871064936, "grad_norm": 5.438041687011719, "learning_rate": 8.542270190147576e-06, "loss": 0.6317, "step": 22744 }, { "epoch": 0.27194251485550996, "grad_norm": 2.791393995285034, "learning_rate": 8.54213354086375e-06, "loss": 0.5718, "step": 22745 }, { "epoch": 0.2719544710003706, "grad_norm": 7.304370403289795, "learning_rate": 8.541996886268484e-06, "loss": 0.6238, "step": 22746 }, { "epoch": 0.2719664271452313, "grad_norm": 7.222146034240723, "learning_rate": 8.54186022636198e-06, "loss": 0.5292, "step": 22747 }, { "epoch": 0.27197838329009194, "grad_norm": 5.908478260040283, "learning_rate": 8.541723561144443e-06, "loss": 0.5977, "step": 22748 }, { "epoch": 0.2719903394349526, "grad_norm": 3.098252773284912, "learning_rate": 8.54158689061608e-06, "loss": 0.5927, "step": 22749 }, { "epoch": 0.27200229557981326, "grad_norm": 6.668893337249756, "learning_rate": 8.541450214777095e-06, "loss": 0.5569, "step": 22750 }, { "epoch": 0.2720142517246739, "grad_norm": 6.55320405960083, "learning_rate": 8.541313533627692e-06, "loss": 0.7156, "step": 22751 }, { "epoch": 0.2720262078695345, "grad_norm": 3.9867372512817383, "learning_rate": 8.541176847168078e-06, "loss": 0.6138, "step": 22752 }, { "epoch": 0.2720381640143952, "grad_norm": 4.91623592376709, "learning_rate": 8.541040155398456e-06, "loss": 0.5031, "step": 22753 }, { "epoch": 0.27205012015925584, "grad_norm": 2.904646158218384, "learning_rate": 8.540903458319031e-06, "loss": 0.5777, "step": 22754 }, { "epoch": 0.2720620763041165, "grad_norm": 2.0186896324157715, "learning_rate": 8.54076675593001e-06, "loss": 0.6325, "step": 22755 }, { "epoch": 0.27207403244897715, "grad_norm": 3.771988868713379, "learning_rate": 8.540630048231595e-06, "loss": 0.5844, "step": 22756 }, { "epoch": 0.2720859885938378, "grad_norm": 3.9633243083953857, "learning_rate": 8.540493335223993e-06, "loss": 0.5462, "step": 22757 }, { "epoch": 0.27209794473869847, "grad_norm": 2.267630100250244, "learning_rate": 8.540356616907411e-06, "loss": 0.56, "step": 22758 }, { "epoch": 0.27210990088355913, "grad_norm": 3.6603240966796875, "learning_rate": 8.54021989328205e-06, "loss": 0.5714, "step": 22759 }, { "epoch": 0.27212185702841973, "grad_norm": 2.4099483489990234, "learning_rate": 8.540083164348117e-06, "loss": 0.5753, "step": 22760 }, { "epoch": 0.2721338131732804, "grad_norm": 2.9870619773864746, "learning_rate": 8.539946430105816e-06, "loss": 0.6965, "step": 22761 }, { "epoch": 0.27214576931814105, "grad_norm": 6.1140360832214355, "learning_rate": 8.539809690555354e-06, "loss": 0.583, "step": 22762 }, { "epoch": 0.2721577254630017, "grad_norm": 2.9497058391571045, "learning_rate": 8.539672945696933e-06, "loss": 0.6757, "step": 22763 }, { "epoch": 0.27216968160786237, "grad_norm": 4.248139381408691, "learning_rate": 8.539536195530761e-06, "loss": 0.6161, "step": 22764 }, { "epoch": 0.272181637752723, "grad_norm": 11.60340690612793, "learning_rate": 8.53939944005704e-06, "loss": 0.5968, "step": 22765 }, { "epoch": 0.2721935938975837, "grad_norm": 6.5151567459106445, "learning_rate": 8.53926267927598e-06, "loss": 0.5734, "step": 22766 }, { "epoch": 0.2722055500424443, "grad_norm": 2.8441367149353027, "learning_rate": 8.539125913187782e-06, "loss": 0.6119, "step": 22767 }, { "epoch": 0.27221750618730495, "grad_norm": 2.4033541679382324, "learning_rate": 8.538989141792652e-06, "loss": 0.5707, "step": 22768 }, { "epoch": 0.2722294623321656, "grad_norm": 5.188885688781738, "learning_rate": 8.538852365090795e-06, "loss": 0.5928, "step": 22769 }, { "epoch": 0.27224141847702626, "grad_norm": 3.263740062713623, "learning_rate": 8.538715583082415e-06, "loss": 0.5369, "step": 22770 }, { "epoch": 0.2722533746218869, "grad_norm": 5.7427778244018555, "learning_rate": 8.53857879576772e-06, "loss": 0.6182, "step": 22771 }, { "epoch": 0.2722653307667476, "grad_norm": 3.471698522567749, "learning_rate": 8.538442003146915e-06, "loss": 0.5744, "step": 22772 }, { "epoch": 0.27227728691160824, "grad_norm": 6.195023536682129, "learning_rate": 8.5383052052202e-06, "loss": 0.5668, "step": 22773 }, { "epoch": 0.2722892430564689, "grad_norm": 1.9190746545791626, "learning_rate": 8.538168401987786e-06, "loss": 0.7232, "step": 22774 }, { "epoch": 0.2723011992013295, "grad_norm": 2.4949240684509277, "learning_rate": 8.538031593449875e-06, "loss": 0.6678, "step": 22775 }, { "epoch": 0.27231315534619016, "grad_norm": 2.9119975566864014, "learning_rate": 8.537894779606675e-06, "loss": 0.5587, "step": 22776 }, { "epoch": 0.2723251114910508, "grad_norm": 3.772714138031006, "learning_rate": 8.537757960458388e-06, "loss": 0.66, "step": 22777 }, { "epoch": 0.2723370676359115, "grad_norm": 2.088777780532837, "learning_rate": 8.53762113600522e-06, "loss": 0.6364, "step": 22778 }, { "epoch": 0.27234902378077214, "grad_norm": 2.1289479732513428, "learning_rate": 8.537484306247376e-06, "loss": 0.6209, "step": 22779 }, { "epoch": 0.2723609799256328, "grad_norm": 3.692629337310791, "learning_rate": 8.537347471185063e-06, "loss": 0.6516, "step": 22780 }, { "epoch": 0.27237293607049345, "grad_norm": 3.830953359603882, "learning_rate": 8.537210630818485e-06, "loss": 0.6462, "step": 22781 }, { "epoch": 0.27238489221535406, "grad_norm": 2.0414419174194336, "learning_rate": 8.537073785147845e-06, "loss": 0.5348, "step": 22782 }, { "epoch": 0.2723968483602147, "grad_norm": 3.213872194290161, "learning_rate": 8.536936934173352e-06, "loss": 0.6294, "step": 22783 }, { "epoch": 0.2724088045050754, "grad_norm": 2.2716281414031982, "learning_rate": 8.536800077895209e-06, "loss": 0.6321, "step": 22784 }, { "epoch": 0.27242076064993603, "grad_norm": 1.9843721389770508, "learning_rate": 8.536663216313622e-06, "loss": 0.5353, "step": 22785 }, { "epoch": 0.2724327167947967, "grad_norm": 3.9066948890686035, "learning_rate": 8.536526349428795e-06, "loss": 0.6017, "step": 22786 }, { "epoch": 0.27244467293965735, "grad_norm": 2.8268768787384033, "learning_rate": 8.536389477240936e-06, "loss": 0.602, "step": 22787 }, { "epoch": 0.272456629084518, "grad_norm": 2.9246466159820557, "learning_rate": 8.536252599750247e-06, "loss": 0.6626, "step": 22788 }, { "epoch": 0.2724685852293786, "grad_norm": 2.619354486465454, "learning_rate": 8.536115716956936e-06, "loss": 0.7219, "step": 22789 }, { "epoch": 0.27248054137423927, "grad_norm": 2.36503267288208, "learning_rate": 8.535978828861205e-06, "loss": 0.4993, "step": 22790 }, { "epoch": 0.27249249751909993, "grad_norm": 3.05730938911438, "learning_rate": 8.535841935463262e-06, "loss": 0.6303, "step": 22791 }, { "epoch": 0.2725044536639606, "grad_norm": 2.4243593215942383, "learning_rate": 8.535705036763311e-06, "loss": 0.5766, "step": 22792 }, { "epoch": 0.27251640980882125, "grad_norm": 3.0304908752441406, "learning_rate": 8.53556813276156e-06, "loss": 0.6737, "step": 22793 }, { "epoch": 0.2725283659536819, "grad_norm": 2.165003538131714, "learning_rate": 8.535431223458211e-06, "loss": 0.4722, "step": 22794 }, { "epoch": 0.27254032209854256, "grad_norm": 2.969386577606201, "learning_rate": 8.535294308853467e-06, "loss": 0.6126, "step": 22795 }, { "epoch": 0.2725522782434032, "grad_norm": 3.1665475368499756, "learning_rate": 8.535157388947541e-06, "loss": 0.6251, "step": 22796 }, { "epoch": 0.2725642343882638, "grad_norm": 2.7976901531219482, "learning_rate": 8.535020463740632e-06, "loss": 0.5505, "step": 22797 }, { "epoch": 0.2725761905331245, "grad_norm": 4.7787580490112305, "learning_rate": 8.534883533232946e-06, "loss": 0.5811, "step": 22798 }, { "epoch": 0.27258814667798514, "grad_norm": 11.606215476989746, "learning_rate": 8.534746597424693e-06, "loss": 0.6209, "step": 22799 }, { "epoch": 0.2726001028228458, "grad_norm": 1.5908644199371338, "learning_rate": 8.534609656316072e-06, "loss": 0.5465, "step": 22800 }, { "epoch": 0.27261205896770646, "grad_norm": 86.16333770751953, "learning_rate": 8.534472709907293e-06, "loss": 0.543, "step": 22801 }, { "epoch": 0.2726240151125671, "grad_norm": 3.8804855346679688, "learning_rate": 8.534335758198557e-06, "loss": 0.6757, "step": 22802 }, { "epoch": 0.2726359712574278, "grad_norm": 2.4483208656311035, "learning_rate": 8.534198801190074e-06, "loss": 0.603, "step": 22803 }, { "epoch": 0.2726479274022884, "grad_norm": 2.9032950401306152, "learning_rate": 8.534061838882048e-06, "loss": 0.6408, "step": 22804 }, { "epoch": 0.27265988354714904, "grad_norm": 1.9072126150131226, "learning_rate": 8.533924871274683e-06, "loss": 0.5744, "step": 22805 }, { "epoch": 0.2726718396920097, "grad_norm": 1.663915753364563, "learning_rate": 8.533787898368184e-06, "loss": 0.6375, "step": 22806 }, { "epoch": 0.27268379583687036, "grad_norm": 3.0222439765930176, "learning_rate": 8.533650920162759e-06, "loss": 0.6323, "step": 22807 }, { "epoch": 0.272695751981731, "grad_norm": 6.8167572021484375, "learning_rate": 8.53351393665861e-06, "loss": 0.5725, "step": 22808 }, { "epoch": 0.2727077081265917, "grad_norm": 2.746659517288208, "learning_rate": 8.533376947855948e-06, "loss": 0.622, "step": 22809 }, { "epoch": 0.27271966427145233, "grad_norm": 3.9523510932922363, "learning_rate": 8.53323995375497e-06, "loss": 0.6718, "step": 22810 }, { "epoch": 0.27273162041631294, "grad_norm": 4.065062522888184, "learning_rate": 8.53310295435589e-06, "loss": 0.6181, "step": 22811 }, { "epoch": 0.2727435765611736, "grad_norm": 6.782173156738281, "learning_rate": 8.532965949658907e-06, "loss": 0.7032, "step": 22812 }, { "epoch": 0.27275553270603425, "grad_norm": 3.2963528633117676, "learning_rate": 8.53282893966423e-06, "loss": 0.6562, "step": 22813 }, { "epoch": 0.2727674888508949, "grad_norm": 4.030464172363281, "learning_rate": 8.532691924372062e-06, "loss": 0.6718, "step": 22814 }, { "epoch": 0.27277944499575557, "grad_norm": 6.232667446136475, "learning_rate": 8.532554903782613e-06, "loss": 0.5165, "step": 22815 }, { "epoch": 0.27279140114061623, "grad_norm": 1.9796134233474731, "learning_rate": 8.532417877896085e-06, "loss": 0.5894, "step": 22816 }, { "epoch": 0.2728033572854769, "grad_norm": 2.9149653911590576, "learning_rate": 8.532280846712682e-06, "loss": 0.5632, "step": 22817 }, { "epoch": 0.27281531343033755, "grad_norm": 2.080597400665283, "learning_rate": 8.53214381023261e-06, "loss": 0.6671, "step": 22818 }, { "epoch": 0.27282726957519815, "grad_norm": 2.054867744445801, "learning_rate": 8.532006768456078e-06, "loss": 0.6019, "step": 22819 }, { "epoch": 0.2728392257200588, "grad_norm": 6.17327356338501, "learning_rate": 8.53186972138329e-06, "loss": 0.5806, "step": 22820 }, { "epoch": 0.27285118186491947, "grad_norm": 3.484185218811035, "learning_rate": 8.53173266901445e-06, "loss": 0.6217, "step": 22821 }, { "epoch": 0.2728631380097801, "grad_norm": 2.5055179595947266, "learning_rate": 8.531595611349763e-06, "loss": 0.6478, "step": 22822 }, { "epoch": 0.2728750941546408, "grad_norm": 5.175747394561768, "learning_rate": 8.531458548389438e-06, "loss": 0.6905, "step": 22823 }, { "epoch": 0.27288705029950144, "grad_norm": 4.145076751708984, "learning_rate": 8.531321480133677e-06, "loss": 0.6196, "step": 22824 }, { "epoch": 0.2728990064443621, "grad_norm": 5.778473854064941, "learning_rate": 8.531184406582686e-06, "loss": 0.5808, "step": 22825 }, { "epoch": 0.2729109625892227, "grad_norm": 4.003582954406738, "learning_rate": 8.531047327736672e-06, "loss": 0.5297, "step": 22826 }, { "epoch": 0.27292291873408336, "grad_norm": 6.017975807189941, "learning_rate": 8.530910243595841e-06, "loss": 0.5809, "step": 22827 }, { "epoch": 0.272934874878944, "grad_norm": 2.0823044776916504, "learning_rate": 8.530773154160397e-06, "loss": 0.5347, "step": 22828 }, { "epoch": 0.2729468310238047, "grad_norm": 3.052812337875366, "learning_rate": 8.530636059430547e-06, "loss": 0.6778, "step": 22829 }, { "epoch": 0.27295878716866534, "grad_norm": 2.6485307216644287, "learning_rate": 8.530498959406495e-06, "loss": 0.6854, "step": 22830 }, { "epoch": 0.272970743313526, "grad_norm": 2.781583070755005, "learning_rate": 8.530361854088448e-06, "loss": 0.6473, "step": 22831 }, { "epoch": 0.27298269945838666, "grad_norm": 1.665104866027832, "learning_rate": 8.530224743476609e-06, "loss": 0.6047, "step": 22832 }, { "epoch": 0.2729946556032473, "grad_norm": 3.0456457138061523, "learning_rate": 8.530087627571186e-06, "loss": 0.594, "step": 22833 }, { "epoch": 0.2730066117481079, "grad_norm": 3.924201488494873, "learning_rate": 8.529950506372384e-06, "loss": 0.609, "step": 22834 }, { "epoch": 0.2730185678929686, "grad_norm": 5.10449743270874, "learning_rate": 8.529813379880409e-06, "loss": 0.5995, "step": 22835 }, { "epoch": 0.27303052403782924, "grad_norm": 2.546785593032837, "learning_rate": 8.529676248095466e-06, "loss": 0.6384, "step": 22836 }, { "epoch": 0.2730424801826899, "grad_norm": 9.522642135620117, "learning_rate": 8.529539111017761e-06, "loss": 0.5724, "step": 22837 }, { "epoch": 0.27305443632755055, "grad_norm": 1.8847016096115112, "learning_rate": 8.529401968647501e-06, "loss": 0.63, "step": 22838 }, { "epoch": 0.2730663924724112, "grad_norm": 2.3687829971313477, "learning_rate": 8.52926482098489e-06, "loss": 0.6169, "step": 22839 }, { "epoch": 0.27307834861727187, "grad_norm": 3.179216146469116, "learning_rate": 8.529127668030133e-06, "loss": 0.5753, "step": 22840 }, { "epoch": 0.2730903047621325, "grad_norm": 4.5173773765563965, "learning_rate": 8.528990509783435e-06, "loss": 0.5976, "step": 22841 }, { "epoch": 0.27310226090699313, "grad_norm": 2.286275625228882, "learning_rate": 8.528853346245006e-06, "loss": 0.5484, "step": 22842 }, { "epoch": 0.2731142170518538, "grad_norm": 3.2656497955322266, "learning_rate": 8.528716177415046e-06, "loss": 0.6467, "step": 22843 }, { "epoch": 0.27312617319671445, "grad_norm": 2.6486623287200928, "learning_rate": 8.528579003293765e-06, "loss": 0.5714, "step": 22844 }, { "epoch": 0.2731381293415751, "grad_norm": 4.852511405944824, "learning_rate": 8.528441823881368e-06, "loss": 0.5986, "step": 22845 }, { "epoch": 0.27315008548643577, "grad_norm": 3.0204060077667236, "learning_rate": 8.528304639178059e-06, "loss": 0.6236, "step": 22846 }, { "epoch": 0.2731620416312964, "grad_norm": 3.748821258544922, "learning_rate": 8.528167449184046e-06, "loss": 0.6294, "step": 22847 }, { "epoch": 0.27317399777615703, "grad_norm": 4.330060005187988, "learning_rate": 8.528030253899531e-06, "loss": 0.6952, "step": 22848 }, { "epoch": 0.2731859539210177, "grad_norm": 2.761600971221924, "learning_rate": 8.527893053324723e-06, "loss": 0.6078, "step": 22849 }, { "epoch": 0.27319791006587835, "grad_norm": 2.6347765922546387, "learning_rate": 8.527755847459828e-06, "loss": 0.5932, "step": 22850 }, { "epoch": 0.273209866210739, "grad_norm": 3.009791135787964, "learning_rate": 8.52761863630505e-06, "loss": 0.492, "step": 22851 }, { "epoch": 0.27322182235559966, "grad_norm": 3.3128952980041504, "learning_rate": 8.527481419860595e-06, "loss": 0.6921, "step": 22852 }, { "epoch": 0.2732337785004603, "grad_norm": 2.102642059326172, "learning_rate": 8.527344198126669e-06, "loss": 0.526, "step": 22853 }, { "epoch": 0.273245734645321, "grad_norm": 4.874023914337158, "learning_rate": 8.527206971103479e-06, "loss": 0.6289, "step": 22854 }, { "epoch": 0.27325769079018164, "grad_norm": 2.722100019454956, "learning_rate": 8.527069738791227e-06, "loss": 0.5535, "step": 22855 }, { "epoch": 0.27326964693504224, "grad_norm": 5.678736209869385, "learning_rate": 8.526932501190124e-06, "loss": 0.6022, "step": 22856 }, { "epoch": 0.2732816030799029, "grad_norm": 3.46028470993042, "learning_rate": 8.52679525830037e-06, "loss": 0.6849, "step": 22857 }, { "epoch": 0.27329355922476356, "grad_norm": 3.6025328636169434, "learning_rate": 8.526658010122177e-06, "loss": 0.6047, "step": 22858 }, { "epoch": 0.2733055153696242, "grad_norm": 3.289222478866577, "learning_rate": 8.526520756655747e-06, "loss": 0.5614, "step": 22859 }, { "epoch": 0.2733174715144849, "grad_norm": 3.953735828399658, "learning_rate": 8.526383497901288e-06, "loss": 0.7252, "step": 22860 }, { "epoch": 0.27332942765934554, "grad_norm": 1.9304202795028687, "learning_rate": 8.526246233859002e-06, "loss": 0.5809, "step": 22861 }, { "epoch": 0.2733413838042062, "grad_norm": 5.8284759521484375, "learning_rate": 8.526108964529099e-06, "loss": 0.5896, "step": 22862 }, { "epoch": 0.2733533399490668, "grad_norm": 4.069942951202393, "learning_rate": 8.525971689911781e-06, "loss": 0.5952, "step": 22863 }, { "epoch": 0.27336529609392746, "grad_norm": 6.9682111740112305, "learning_rate": 8.525834410007257e-06, "loss": 0.6671, "step": 22864 }, { "epoch": 0.2733772522387881, "grad_norm": 5.163772106170654, "learning_rate": 8.525697124815733e-06, "loss": 0.5799, "step": 22865 }, { "epoch": 0.2733892083836488, "grad_norm": 3.68082857131958, "learning_rate": 8.525559834337412e-06, "loss": 0.6071, "step": 22866 }, { "epoch": 0.27340116452850943, "grad_norm": 2.390338659286499, "learning_rate": 8.525422538572503e-06, "loss": 0.4823, "step": 22867 }, { "epoch": 0.2734131206733701, "grad_norm": 4.039303779602051, "learning_rate": 8.525285237521208e-06, "loss": 0.6434, "step": 22868 }, { "epoch": 0.27342507681823075, "grad_norm": 2.1372013092041016, "learning_rate": 8.525147931183737e-06, "loss": 0.5335, "step": 22869 }, { "epoch": 0.27343703296309135, "grad_norm": 4.131368637084961, "learning_rate": 8.525010619560294e-06, "loss": 0.5862, "step": 22870 }, { "epoch": 0.273448989107952, "grad_norm": 1.9641199111938477, "learning_rate": 8.524873302651084e-06, "loss": 0.606, "step": 22871 }, { "epoch": 0.27346094525281267, "grad_norm": 11.584867477416992, "learning_rate": 8.524735980456316e-06, "loss": 0.6043, "step": 22872 }, { "epoch": 0.27347290139767333, "grad_norm": 3.6219232082366943, "learning_rate": 8.524598652976192e-06, "loss": 0.6719, "step": 22873 }, { "epoch": 0.273484857542534, "grad_norm": 4.564840793609619, "learning_rate": 8.52446132021092e-06, "loss": 0.5153, "step": 22874 }, { "epoch": 0.27349681368739465, "grad_norm": 12.29720401763916, "learning_rate": 8.524323982160707e-06, "loss": 0.5932, "step": 22875 }, { "epoch": 0.2735087698322553, "grad_norm": 2.280341863632202, "learning_rate": 8.524186638825756e-06, "loss": 0.5955, "step": 22876 }, { "epoch": 0.27352072597711596, "grad_norm": 8.8938570022583, "learning_rate": 8.524049290206275e-06, "loss": 0.6493, "step": 22877 }, { "epoch": 0.27353268212197657, "grad_norm": 7.788684844970703, "learning_rate": 8.523911936302471e-06, "loss": 0.7206, "step": 22878 }, { "epoch": 0.2735446382668372, "grad_norm": 6.507806777954102, "learning_rate": 8.523774577114547e-06, "loss": 0.5517, "step": 22879 }, { "epoch": 0.2735565944116979, "grad_norm": 3.082580804824829, "learning_rate": 8.523637212642712e-06, "loss": 0.5802, "step": 22880 }, { "epoch": 0.27356855055655854, "grad_norm": 6.178034782409668, "learning_rate": 8.523499842887168e-06, "loss": 0.6925, "step": 22881 }, { "epoch": 0.2735805067014192, "grad_norm": 2.800830841064453, "learning_rate": 8.523362467848126e-06, "loss": 0.7212, "step": 22882 }, { "epoch": 0.27359246284627986, "grad_norm": 2.042449712753296, "learning_rate": 8.523225087525788e-06, "loss": 0.6328, "step": 22883 }, { "epoch": 0.2736044189911405, "grad_norm": 2.3185479640960693, "learning_rate": 8.523087701920363e-06, "loss": 0.5739, "step": 22884 }, { "epoch": 0.2736163751360011, "grad_norm": 8.37667465209961, "learning_rate": 8.522950311032055e-06, "loss": 0.6392, "step": 22885 }, { "epoch": 0.2736283312808618, "grad_norm": 4.104849338531494, "learning_rate": 8.522812914861069e-06, "loss": 0.7211, "step": 22886 }, { "epoch": 0.27364028742572244, "grad_norm": 4.702139377593994, "learning_rate": 8.522675513407612e-06, "loss": 0.6465, "step": 22887 }, { "epoch": 0.2736522435705831, "grad_norm": 4.367249488830566, "learning_rate": 8.522538106671895e-06, "loss": 0.6092, "step": 22888 }, { "epoch": 0.27366419971544376, "grad_norm": 2.345945119857788, "learning_rate": 8.522400694654115e-06, "loss": 0.672, "step": 22889 }, { "epoch": 0.2736761558603044, "grad_norm": 4.185103416442871, "learning_rate": 8.522263277354483e-06, "loss": 0.6221, "step": 22890 }, { "epoch": 0.2736881120051651, "grad_norm": 9.90237808227539, "learning_rate": 8.522125854773206e-06, "loss": 0.6134, "step": 22891 }, { "epoch": 0.27370006815002573, "grad_norm": 2.5654666423797607, "learning_rate": 8.521988426910488e-06, "loss": 0.5783, "step": 22892 }, { "epoch": 0.27371202429488634, "grad_norm": 5.742456912994385, "learning_rate": 8.521850993766539e-06, "loss": 0.5402, "step": 22893 }, { "epoch": 0.273723980439747, "grad_norm": 2.9659814834594727, "learning_rate": 8.521713555341557e-06, "loss": 0.6001, "step": 22894 }, { "epoch": 0.27373593658460765, "grad_norm": 4.296636581420898, "learning_rate": 8.521576111635757e-06, "loss": 0.6077, "step": 22895 }, { "epoch": 0.2737478927294683, "grad_norm": 3.731184959411621, "learning_rate": 8.521438662649338e-06, "loss": 0.7012, "step": 22896 }, { "epoch": 0.27375984887432897, "grad_norm": 4.0443644523620605, "learning_rate": 8.521301208382511e-06, "loss": 0.5795, "step": 22897 }, { "epoch": 0.27377180501918963, "grad_norm": 4.004940032958984, "learning_rate": 8.52116374883548e-06, "loss": 0.6136, "step": 22898 }, { "epoch": 0.2737837611640503, "grad_norm": 2.578786849975586, "learning_rate": 8.521026284008451e-06, "loss": 0.6347, "step": 22899 }, { "epoch": 0.2737957173089109, "grad_norm": 4.2811808586120605, "learning_rate": 8.520888813901632e-06, "loss": 0.629, "step": 22900 }, { "epoch": 0.27380767345377155, "grad_norm": 2.6804676055908203, "learning_rate": 8.520751338515226e-06, "loss": 0.6507, "step": 22901 }, { "epoch": 0.2738196295986322, "grad_norm": 4.85445499420166, "learning_rate": 8.520613857849443e-06, "loss": 0.5864, "step": 22902 }, { "epoch": 0.27383158574349287, "grad_norm": 4.94807243347168, "learning_rate": 8.520476371904485e-06, "loss": 0.6439, "step": 22903 }, { "epoch": 0.2738435418883535, "grad_norm": 6.702431678771973, "learning_rate": 8.520338880680561e-06, "loss": 0.633, "step": 22904 }, { "epoch": 0.2738554980332142, "grad_norm": 5.023779392242432, "learning_rate": 8.520201384177874e-06, "loss": 0.6193, "step": 22905 }, { "epoch": 0.27386745417807484, "grad_norm": 3.1535866260528564, "learning_rate": 8.520063882396636e-06, "loss": 0.5909, "step": 22906 }, { "epoch": 0.27387941032293545, "grad_norm": 3.288264751434326, "learning_rate": 8.519926375337049e-06, "loss": 0.605, "step": 22907 }, { "epoch": 0.2738913664677961, "grad_norm": 8.571951866149902, "learning_rate": 8.519788862999318e-06, "loss": 0.6164, "step": 22908 }, { "epoch": 0.27390332261265676, "grad_norm": 3.8722076416015625, "learning_rate": 8.519651345383652e-06, "loss": 0.6276, "step": 22909 }, { "epoch": 0.2739152787575174, "grad_norm": 5.499910354614258, "learning_rate": 8.519513822490257e-06, "loss": 0.5988, "step": 22910 }, { "epoch": 0.2739272349023781, "grad_norm": 2.9750819206237793, "learning_rate": 8.519376294319339e-06, "loss": 0.6654, "step": 22911 }, { "epoch": 0.27393919104723874, "grad_norm": 3.686333179473877, "learning_rate": 8.519238760871103e-06, "loss": 0.666, "step": 22912 }, { "epoch": 0.2739511471920994, "grad_norm": 3.191516399383545, "learning_rate": 8.519101222145755e-06, "loss": 0.5595, "step": 22913 }, { "epoch": 0.27396310333696006, "grad_norm": 2.31528902053833, "learning_rate": 8.518963678143503e-06, "loss": 0.5744, "step": 22914 }, { "epoch": 0.27397505948182066, "grad_norm": 4.723104953765869, "learning_rate": 8.518826128864552e-06, "loss": 0.5592, "step": 22915 }, { "epoch": 0.2739870156266813, "grad_norm": 8.270048141479492, "learning_rate": 8.51868857430911e-06, "loss": 0.5573, "step": 22916 }, { "epoch": 0.273998971771542, "grad_norm": 2.7627573013305664, "learning_rate": 8.518551014477381e-06, "loss": 0.6647, "step": 22917 }, { "epoch": 0.27401092791640264, "grad_norm": 3.8559486865997314, "learning_rate": 8.518413449369571e-06, "loss": 0.6338, "step": 22918 }, { "epoch": 0.2740228840612633, "grad_norm": 2.772965669631958, "learning_rate": 8.518275878985889e-06, "loss": 0.5939, "step": 22919 }, { "epoch": 0.27403484020612395, "grad_norm": 4.850397109985352, "learning_rate": 8.51813830332654e-06, "loss": 0.6464, "step": 22920 }, { "epoch": 0.2740467963509846, "grad_norm": 11.02669620513916, "learning_rate": 8.518000722391731e-06, "loss": 0.7387, "step": 22921 }, { "epoch": 0.2740587524958452, "grad_norm": 13.046719551086426, "learning_rate": 8.517863136181665e-06, "loss": 0.5178, "step": 22922 }, { "epoch": 0.2740707086407059, "grad_norm": 6.521946430206299, "learning_rate": 8.517725544696553e-06, "loss": 0.5985, "step": 22923 }, { "epoch": 0.27408266478556653, "grad_norm": 2.6644704341888428, "learning_rate": 8.517587947936597e-06, "loss": 0.5122, "step": 22924 }, { "epoch": 0.2740946209304272, "grad_norm": 2.255504846572876, "learning_rate": 8.517450345902007e-06, "loss": 0.6, "step": 22925 }, { "epoch": 0.27410657707528785, "grad_norm": 2.2184417247772217, "learning_rate": 8.517312738592986e-06, "loss": 0.5574, "step": 22926 }, { "epoch": 0.2741185332201485, "grad_norm": 5.075031280517578, "learning_rate": 8.517175126009743e-06, "loss": 0.6511, "step": 22927 }, { "epoch": 0.27413048936500917, "grad_norm": 4.620270252227783, "learning_rate": 8.517037508152483e-06, "loss": 0.6278, "step": 22928 }, { "epoch": 0.27414244550986977, "grad_norm": 4.481427192687988, "learning_rate": 8.516899885021415e-06, "loss": 0.6207, "step": 22929 }, { "epoch": 0.27415440165473043, "grad_norm": 2.9049763679504395, "learning_rate": 8.516762256616741e-06, "loss": 0.6205, "step": 22930 }, { "epoch": 0.2741663577995911, "grad_norm": 9.561430931091309, "learning_rate": 8.51662462293867e-06, "loss": 0.5618, "step": 22931 }, { "epoch": 0.27417831394445175, "grad_norm": 3.118529796600342, "learning_rate": 8.516486983987408e-06, "loss": 0.6341, "step": 22932 }, { "epoch": 0.2741902700893124, "grad_norm": 4.317927360534668, "learning_rate": 8.516349339763161e-06, "loss": 0.6356, "step": 22933 }, { "epoch": 0.27420222623417306, "grad_norm": 6.407649993896484, "learning_rate": 8.516211690266136e-06, "loss": 0.658, "step": 22934 }, { "epoch": 0.2742141823790337, "grad_norm": 3.235219955444336, "learning_rate": 8.516074035496537e-06, "loss": 0.6282, "step": 22935 }, { "epoch": 0.2742261385238944, "grad_norm": 3.4795970916748047, "learning_rate": 8.515936375454575e-06, "loss": 0.6689, "step": 22936 }, { "epoch": 0.274238094668755, "grad_norm": 5.207983493804932, "learning_rate": 8.515798710140453e-06, "loss": 0.6592, "step": 22937 }, { "epoch": 0.27425005081361564, "grad_norm": 22.426158905029297, "learning_rate": 8.515661039554379e-06, "loss": 0.6847, "step": 22938 }, { "epoch": 0.2742620069584763, "grad_norm": 2.841050624847412, "learning_rate": 8.515523363696559e-06, "loss": 0.5349, "step": 22939 }, { "epoch": 0.27427396310333696, "grad_norm": 4.639043807983398, "learning_rate": 8.515385682567198e-06, "loss": 0.6075, "step": 22940 }, { "epoch": 0.2742859192481976, "grad_norm": 2.0185964107513428, "learning_rate": 8.515247996166504e-06, "loss": 0.5971, "step": 22941 }, { "epoch": 0.2742978753930583, "grad_norm": 2.8661115169525146, "learning_rate": 8.515110304494682e-06, "loss": 0.6228, "step": 22942 }, { "epoch": 0.27430983153791894, "grad_norm": 2.862581968307495, "learning_rate": 8.514972607551943e-06, "loss": 0.5724, "step": 22943 }, { "epoch": 0.27432178768277954, "grad_norm": 2.3463430404663086, "learning_rate": 8.514834905338487e-06, "loss": 0.5921, "step": 22944 }, { "epoch": 0.2743337438276402, "grad_norm": 6.377634525299072, "learning_rate": 8.514697197854525e-06, "loss": 0.4733, "step": 22945 }, { "epoch": 0.27434569997250086, "grad_norm": 2.5211260318756104, "learning_rate": 8.51455948510026e-06, "loss": 0.5855, "step": 22946 }, { "epoch": 0.2743576561173615, "grad_norm": 3.9211719036102295, "learning_rate": 8.514421767075902e-06, "loss": 0.6197, "step": 22947 }, { "epoch": 0.2743696122622222, "grad_norm": 2.9329686164855957, "learning_rate": 8.514284043781658e-06, "loss": 0.5524, "step": 22948 }, { "epoch": 0.27438156840708283, "grad_norm": 2.4976000785827637, "learning_rate": 8.51414631521773e-06, "loss": 0.5944, "step": 22949 }, { "epoch": 0.2743935245519435, "grad_norm": 6.900609016418457, "learning_rate": 8.514008581384328e-06, "loss": 0.6389, "step": 22950 }, { "epoch": 0.27440548069680415, "grad_norm": 2.8355138301849365, "learning_rate": 8.513870842281658e-06, "loss": 0.6489, "step": 22951 }, { "epoch": 0.27441743684166475, "grad_norm": 2.0388529300689697, "learning_rate": 8.513733097909926e-06, "loss": 0.6434, "step": 22952 }, { "epoch": 0.2744293929865254, "grad_norm": 6.619661331176758, "learning_rate": 8.513595348269338e-06, "loss": 0.599, "step": 22953 }, { "epoch": 0.27444134913138607, "grad_norm": 7.033531665802002, "learning_rate": 8.513457593360101e-06, "loss": 0.6047, "step": 22954 }, { "epoch": 0.27445330527624673, "grad_norm": 4.893008708953857, "learning_rate": 8.513319833182423e-06, "loss": 0.5643, "step": 22955 }, { "epoch": 0.2744652614211074, "grad_norm": 1.9572570323944092, "learning_rate": 8.513182067736509e-06, "loss": 0.6054, "step": 22956 }, { "epoch": 0.27447721756596805, "grad_norm": 2.0391018390655518, "learning_rate": 8.513044297022566e-06, "loss": 0.5823, "step": 22957 }, { "epoch": 0.2744891737108287, "grad_norm": 2.968043327331543, "learning_rate": 8.512906521040802e-06, "loss": 0.5499, "step": 22958 }, { "epoch": 0.2745011298556893, "grad_norm": 2.9113197326660156, "learning_rate": 8.512768739791419e-06, "loss": 0.5613, "step": 22959 }, { "epoch": 0.27451308600054997, "grad_norm": 6.245728492736816, "learning_rate": 8.51263095327463e-06, "loss": 0.6546, "step": 22960 }, { "epoch": 0.2745250421454106, "grad_norm": 2.6816060543060303, "learning_rate": 8.512493161490636e-06, "loss": 0.717, "step": 22961 }, { "epoch": 0.2745369982902713, "grad_norm": 3.8755080699920654, "learning_rate": 8.512355364439648e-06, "loss": 0.6188, "step": 22962 }, { "epoch": 0.27454895443513194, "grad_norm": 3.900449514389038, "learning_rate": 8.51221756212187e-06, "loss": 0.6673, "step": 22963 }, { "epoch": 0.2745609105799926, "grad_norm": 2.396543264389038, "learning_rate": 8.512079754537507e-06, "loss": 0.5524, "step": 22964 }, { "epoch": 0.27457286672485326, "grad_norm": 2.597769021987915, "learning_rate": 8.51194194168677e-06, "loss": 0.5813, "step": 22965 }, { "epoch": 0.27458482286971386, "grad_norm": 2.3198258876800537, "learning_rate": 8.511804123569865e-06, "loss": 0.5276, "step": 22966 }, { "epoch": 0.2745967790145745, "grad_norm": 3.543091058731079, "learning_rate": 8.511666300186995e-06, "loss": 0.5859, "step": 22967 }, { "epoch": 0.2746087351594352, "grad_norm": 2.293233633041382, "learning_rate": 8.51152847153837e-06, "loss": 0.597, "step": 22968 }, { "epoch": 0.27462069130429584, "grad_norm": 4.802877902984619, "learning_rate": 8.511390637624196e-06, "loss": 0.7309, "step": 22969 }, { "epoch": 0.2746326474491565, "grad_norm": 5.026474475860596, "learning_rate": 8.511252798444677e-06, "loss": 0.5925, "step": 22970 }, { "epoch": 0.27464460359401716, "grad_norm": 2.1774754524230957, "learning_rate": 8.511114954000024e-06, "loss": 0.5673, "step": 22971 }, { "epoch": 0.2746565597388778, "grad_norm": 1.9820387363433838, "learning_rate": 8.510977104290441e-06, "loss": 0.5694, "step": 22972 }, { "epoch": 0.2746685158837385, "grad_norm": 1.9386948347091675, "learning_rate": 8.510839249316133e-06, "loss": 0.5901, "step": 22973 }, { "epoch": 0.2746804720285991, "grad_norm": 4.934162616729736, "learning_rate": 8.510701389077312e-06, "loss": 0.5316, "step": 22974 }, { "epoch": 0.27469242817345974, "grad_norm": 2.973742723464966, "learning_rate": 8.510563523574182e-06, "loss": 0.5999, "step": 22975 }, { "epoch": 0.2747043843183204, "grad_norm": 2.8186731338500977, "learning_rate": 8.51042565280695e-06, "loss": 0.6396, "step": 22976 }, { "epoch": 0.27471634046318105, "grad_norm": 6.808391094207764, "learning_rate": 8.51028777677582e-06, "loss": 0.62, "step": 22977 }, { "epoch": 0.2747282966080417, "grad_norm": 1.7689012289047241, "learning_rate": 8.510149895481002e-06, "loss": 0.6436, "step": 22978 }, { "epoch": 0.27474025275290237, "grad_norm": 2.8784427642822266, "learning_rate": 8.510012008922702e-06, "loss": 0.6294, "step": 22979 }, { "epoch": 0.27475220889776303, "grad_norm": 4.627851963043213, "learning_rate": 8.509874117101126e-06, "loss": 0.5958, "step": 22980 }, { "epoch": 0.27476416504262363, "grad_norm": 9.913178443908691, "learning_rate": 8.509736220016482e-06, "loss": 0.5683, "step": 22981 }, { "epoch": 0.2747761211874843, "grad_norm": 4.081493377685547, "learning_rate": 8.509598317668975e-06, "loss": 0.6222, "step": 22982 }, { "epoch": 0.27478807733234495, "grad_norm": 3.5025293827056885, "learning_rate": 8.509460410058813e-06, "loss": 0.5542, "step": 22983 }, { "epoch": 0.2748000334772056, "grad_norm": 3.9350202083587646, "learning_rate": 8.509322497186204e-06, "loss": 0.6036, "step": 22984 }, { "epoch": 0.27481198962206627, "grad_norm": 2.79129958152771, "learning_rate": 8.509184579051353e-06, "loss": 0.6128, "step": 22985 }, { "epoch": 0.2748239457669269, "grad_norm": 3.6827986240386963, "learning_rate": 8.509046655654466e-06, "loss": 0.5289, "step": 22986 }, { "epoch": 0.2748359019117876, "grad_norm": 2.3796894550323486, "learning_rate": 8.508908726995752e-06, "loss": 0.6628, "step": 22987 }, { "epoch": 0.27484785805664824, "grad_norm": 2.1701347827911377, "learning_rate": 8.508770793075416e-06, "loss": 0.5297, "step": 22988 }, { "epoch": 0.27485981420150885, "grad_norm": 1.7412526607513428, "learning_rate": 8.508632853893668e-06, "loss": 0.6267, "step": 22989 }, { "epoch": 0.2748717703463695, "grad_norm": 3.0055441856384277, "learning_rate": 8.50849490945071e-06, "loss": 0.5444, "step": 22990 }, { "epoch": 0.27488372649123016, "grad_norm": 1.5269968509674072, "learning_rate": 8.508356959746754e-06, "loss": 0.5386, "step": 22991 }, { "epoch": 0.2748956826360908, "grad_norm": 2.6729300022125244, "learning_rate": 8.508219004782002e-06, "loss": 0.6047, "step": 22992 }, { "epoch": 0.2749076387809515, "grad_norm": 2.333761215209961, "learning_rate": 8.508081044556663e-06, "loss": 0.6586, "step": 22993 }, { "epoch": 0.27491959492581214, "grad_norm": 1.9247469902038574, "learning_rate": 8.507943079070945e-06, "loss": 0.4901, "step": 22994 }, { "epoch": 0.2749315510706728, "grad_norm": 3.344447374343872, "learning_rate": 8.507805108325054e-06, "loss": 0.6262, "step": 22995 }, { "epoch": 0.2749435072155334, "grad_norm": 2.354844808578491, "learning_rate": 8.507667132319196e-06, "loss": 0.7132, "step": 22996 }, { "epoch": 0.27495546336039406, "grad_norm": 2.4952900409698486, "learning_rate": 8.50752915105358e-06, "loss": 0.621, "step": 22997 }, { "epoch": 0.2749674195052547, "grad_norm": 2.978105306625366, "learning_rate": 8.50739116452841e-06, "loss": 0.6845, "step": 22998 }, { "epoch": 0.2749793756501154, "grad_norm": 7.488860607147217, "learning_rate": 8.507253172743895e-06, "loss": 0.5011, "step": 22999 }, { "epoch": 0.27499133179497603, "grad_norm": 2.762871742248535, "learning_rate": 8.507115175700242e-06, "loss": 0.5842, "step": 23000 }, { "epoch": 0.2750032879398367, "grad_norm": 2.218815803527832, "learning_rate": 8.506977173397657e-06, "loss": 0.607, "step": 23001 }, { "epoch": 0.27501524408469735, "grad_norm": 2.6465251445770264, "learning_rate": 8.506839165836347e-06, "loss": 0.6179, "step": 23002 }, { "epoch": 0.27502720022955796, "grad_norm": 3.824982166290283, "learning_rate": 8.50670115301652e-06, "loss": 0.6959, "step": 23003 }, { "epoch": 0.2750391563744186, "grad_norm": 2.688654899597168, "learning_rate": 8.50656313493838e-06, "loss": 0.5137, "step": 23004 }, { "epoch": 0.2750511125192793, "grad_norm": 3.303603172302246, "learning_rate": 8.50642511160214e-06, "loss": 0.6499, "step": 23005 }, { "epoch": 0.27506306866413993, "grad_norm": 4.446824550628662, "learning_rate": 8.506287083007999e-06, "loss": 0.6518, "step": 23006 }, { "epoch": 0.2750750248090006, "grad_norm": 4.996046543121338, "learning_rate": 8.506149049156172e-06, "loss": 0.465, "step": 23007 }, { "epoch": 0.27508698095386125, "grad_norm": 2.4470582008361816, "learning_rate": 8.506011010046857e-06, "loss": 0.6753, "step": 23008 }, { "epoch": 0.2750989370987219, "grad_norm": 3.058948040008545, "learning_rate": 8.50587296568027e-06, "loss": 0.547, "step": 23009 }, { "epoch": 0.27511089324358257, "grad_norm": 2.9931349754333496, "learning_rate": 8.505734916056612e-06, "loss": 0.7207, "step": 23010 }, { "epoch": 0.27512284938844317, "grad_norm": 1.8322960138320923, "learning_rate": 8.505596861176093e-06, "loss": 0.5593, "step": 23011 }, { "epoch": 0.27513480553330383, "grad_norm": 2.1563870906829834, "learning_rate": 8.505458801038921e-06, "loss": 0.5321, "step": 23012 }, { "epoch": 0.2751467616781645, "grad_norm": 3.0499958992004395, "learning_rate": 8.505320735645299e-06, "loss": 0.579, "step": 23013 }, { "epoch": 0.27515871782302515, "grad_norm": 2.54525089263916, "learning_rate": 8.505182664995435e-06, "loss": 0.5494, "step": 23014 }, { "epoch": 0.2751706739678858, "grad_norm": 2.3727097511291504, "learning_rate": 8.505044589089539e-06, "loss": 0.575, "step": 23015 }, { "epoch": 0.27518263011274646, "grad_norm": 1.6377277374267578, "learning_rate": 8.504906507927815e-06, "loss": 0.5185, "step": 23016 }, { "epoch": 0.2751945862576071, "grad_norm": 3.0775327682495117, "learning_rate": 8.504768421510472e-06, "loss": 0.5886, "step": 23017 }, { "epoch": 0.2752065424024677, "grad_norm": 2.604767084121704, "learning_rate": 8.504630329837715e-06, "loss": 0.6023, "step": 23018 }, { "epoch": 0.2752184985473284, "grad_norm": 2.513704776763916, "learning_rate": 8.504492232909753e-06, "loss": 0.5194, "step": 23019 }, { "epoch": 0.27523045469218904, "grad_norm": 6.781410217285156, "learning_rate": 8.504354130726793e-06, "loss": 0.6031, "step": 23020 }, { "epoch": 0.2752424108370497, "grad_norm": 2.1259310245513916, "learning_rate": 8.504216023289043e-06, "loss": 0.6591, "step": 23021 }, { "epoch": 0.27525436698191036, "grad_norm": 2.818030595779419, "learning_rate": 8.504077910596706e-06, "loss": 0.6512, "step": 23022 }, { "epoch": 0.275266323126771, "grad_norm": 2.535064935684204, "learning_rate": 8.503939792649993e-06, "loss": 0.627, "step": 23023 }, { "epoch": 0.2752782792716317, "grad_norm": 4.081157207489014, "learning_rate": 8.50380166944911e-06, "loss": 0.7038, "step": 23024 }, { "epoch": 0.2752902354164923, "grad_norm": 4.824132919311523, "learning_rate": 8.503663540994263e-06, "loss": 0.5694, "step": 23025 }, { "epoch": 0.27530219156135294, "grad_norm": 3.0053417682647705, "learning_rate": 8.50352540728566e-06, "loss": 0.638, "step": 23026 }, { "epoch": 0.2753141477062136, "grad_norm": 3.802318811416626, "learning_rate": 8.503387268323509e-06, "loss": 0.6679, "step": 23027 }, { "epoch": 0.27532610385107426, "grad_norm": 10.433219909667969, "learning_rate": 8.503249124108015e-06, "loss": 0.6715, "step": 23028 }, { "epoch": 0.2753380599959349, "grad_norm": 21.03022575378418, "learning_rate": 8.503110974639388e-06, "loss": 0.5587, "step": 23029 }, { "epoch": 0.2753500161407956, "grad_norm": 2.0641582012176514, "learning_rate": 8.502972819917833e-06, "loss": 0.5705, "step": 23030 }, { "epoch": 0.27536197228565623, "grad_norm": 2.4098708629608154, "learning_rate": 8.502834659943558e-06, "loss": 0.5279, "step": 23031 }, { "epoch": 0.2753739284305169, "grad_norm": 2.6252410411834717, "learning_rate": 8.50269649471677e-06, "loss": 0.5255, "step": 23032 }, { "epoch": 0.2753858845753775, "grad_norm": 2.4031074047088623, "learning_rate": 8.502558324237677e-06, "loss": 0.617, "step": 23033 }, { "epoch": 0.27539784072023815, "grad_norm": 4.487651824951172, "learning_rate": 8.502420148506483e-06, "loss": 0.576, "step": 23034 }, { "epoch": 0.2754097968650988, "grad_norm": 2.4107277393341064, "learning_rate": 8.5022819675234e-06, "loss": 0.584, "step": 23035 }, { "epoch": 0.27542175300995947, "grad_norm": 2.8287243843078613, "learning_rate": 8.502143781288632e-06, "loss": 0.5401, "step": 23036 }, { "epoch": 0.2754337091548201, "grad_norm": 3.3078722953796387, "learning_rate": 8.502005589802387e-06, "loss": 0.6093, "step": 23037 }, { "epoch": 0.2754456652996808, "grad_norm": 1.6633131504058838, "learning_rate": 8.50186739306487e-06, "loss": 0.5383, "step": 23038 }, { "epoch": 0.27545762144454145, "grad_norm": 2.265664577484131, "learning_rate": 8.501729191076294e-06, "loss": 0.5711, "step": 23039 }, { "epoch": 0.27546957758940205, "grad_norm": 4.893076419830322, "learning_rate": 8.50159098383686e-06, "loss": 0.5712, "step": 23040 }, { "epoch": 0.2754815337342627, "grad_norm": 2.8508126735687256, "learning_rate": 8.50145277134678e-06, "loss": 0.6324, "step": 23041 }, { "epoch": 0.27549348987912337, "grad_norm": 3.061552047729492, "learning_rate": 8.501314553606256e-06, "loss": 0.6062, "step": 23042 }, { "epoch": 0.275505446023984, "grad_norm": 2.8914332389831543, "learning_rate": 8.501176330615501e-06, "loss": 0.6579, "step": 23043 }, { "epoch": 0.2755174021688447, "grad_norm": 6.423476696014404, "learning_rate": 8.501038102374719e-06, "loss": 0.6315, "step": 23044 }, { "epoch": 0.27552935831370534, "grad_norm": 7.0832366943359375, "learning_rate": 8.500899868884118e-06, "loss": 0.6156, "step": 23045 }, { "epoch": 0.275541314458566, "grad_norm": 3.0429561138153076, "learning_rate": 8.500761630143905e-06, "loss": 0.6203, "step": 23046 }, { "epoch": 0.27555327060342666, "grad_norm": 1.693051815032959, "learning_rate": 8.500623386154288e-06, "loss": 0.5748, "step": 23047 }, { "epoch": 0.27556522674828726, "grad_norm": 5.2382707595825195, "learning_rate": 8.500485136915473e-06, "loss": 0.6064, "step": 23048 }, { "epoch": 0.2755771828931479, "grad_norm": 3.0556440353393555, "learning_rate": 8.500346882427669e-06, "loss": 0.6275, "step": 23049 }, { "epoch": 0.2755891390380086, "grad_norm": 1.8094462156295776, "learning_rate": 8.500208622691081e-06, "loss": 0.4865, "step": 23050 }, { "epoch": 0.27560109518286924, "grad_norm": 2.470881938934326, "learning_rate": 8.500070357705918e-06, "loss": 0.6831, "step": 23051 }, { "epoch": 0.2756130513277299, "grad_norm": 2.3493101596832275, "learning_rate": 8.499932087472388e-06, "loss": 0.6326, "step": 23052 }, { "epoch": 0.27562500747259056, "grad_norm": 3.0382254123687744, "learning_rate": 8.499793811990698e-06, "loss": 0.5848, "step": 23053 }, { "epoch": 0.2756369636174512, "grad_norm": 11.317169189453125, "learning_rate": 8.499655531261053e-06, "loss": 0.6999, "step": 23054 }, { "epoch": 0.2756489197623118, "grad_norm": 5.4660749435424805, "learning_rate": 8.499517245283662e-06, "loss": 0.5416, "step": 23055 }, { "epoch": 0.2756608759071725, "grad_norm": 2.588515281677246, "learning_rate": 8.499378954058735e-06, "loss": 0.6669, "step": 23056 }, { "epoch": 0.27567283205203313, "grad_norm": 3.0009167194366455, "learning_rate": 8.499240657586474e-06, "loss": 0.6762, "step": 23057 }, { "epoch": 0.2756847881968938, "grad_norm": 1.9060245752334595, "learning_rate": 8.49910235586709e-06, "loss": 0.5182, "step": 23058 }, { "epoch": 0.27569674434175445, "grad_norm": 3.6547787189483643, "learning_rate": 8.49896404890079e-06, "loss": 0.5301, "step": 23059 }, { "epoch": 0.2757087004866151, "grad_norm": 2.8443076610565186, "learning_rate": 8.49882573668778e-06, "loss": 0.5544, "step": 23060 }, { "epoch": 0.27572065663147577, "grad_norm": 2.5824530124664307, "learning_rate": 8.49868741922827e-06, "loss": 0.5659, "step": 23061 }, { "epoch": 0.27573261277633637, "grad_norm": 6.867028713226318, "learning_rate": 8.498549096522466e-06, "loss": 0.5811, "step": 23062 }, { "epoch": 0.27574456892119703, "grad_norm": 2.9081871509552, "learning_rate": 8.498410768570573e-06, "loss": 0.5841, "step": 23063 }, { "epoch": 0.2757565250660577, "grad_norm": 3.0463016033172607, "learning_rate": 8.498272435372802e-06, "loss": 0.6334, "step": 23064 }, { "epoch": 0.27576848121091835, "grad_norm": 3.2259552478790283, "learning_rate": 8.498134096929359e-06, "loss": 0.5887, "step": 23065 }, { "epoch": 0.275780437355779, "grad_norm": 3.781519651412964, "learning_rate": 8.49799575324045e-06, "loss": 0.5217, "step": 23066 }, { "epoch": 0.27579239350063967, "grad_norm": 1.6306968927383423, "learning_rate": 8.497857404306286e-06, "loss": 0.6361, "step": 23067 }, { "epoch": 0.2758043496455003, "grad_norm": 2.4341790676116943, "learning_rate": 8.497719050127071e-06, "loss": 0.6715, "step": 23068 }, { "epoch": 0.275816305790361, "grad_norm": 3.0195882320404053, "learning_rate": 8.497580690703015e-06, "loss": 0.6747, "step": 23069 }, { "epoch": 0.2758282619352216, "grad_norm": 2.058401107788086, "learning_rate": 8.497442326034324e-06, "loss": 0.5882, "step": 23070 }, { "epoch": 0.27584021808008224, "grad_norm": 2.7849514484405518, "learning_rate": 8.497303956121206e-06, "loss": 0.545, "step": 23071 }, { "epoch": 0.2758521742249429, "grad_norm": 1.5021144151687622, "learning_rate": 8.497165580963869e-06, "loss": 0.5294, "step": 23072 }, { "epoch": 0.27586413036980356, "grad_norm": 3.6840782165527344, "learning_rate": 8.497027200562517e-06, "loss": 0.5949, "step": 23073 }, { "epoch": 0.2758760865146642, "grad_norm": 6.60377311706543, "learning_rate": 8.496888814917361e-06, "loss": 0.684, "step": 23074 }, { "epoch": 0.2758880426595249, "grad_norm": 4.0066118240356445, "learning_rate": 8.49675042402861e-06, "loss": 0.6127, "step": 23075 }, { "epoch": 0.27589999880438554, "grad_norm": 3.7877588272094727, "learning_rate": 8.49661202789647e-06, "loss": 0.6214, "step": 23076 }, { "epoch": 0.27591195494924614, "grad_norm": 2.4512922763824463, "learning_rate": 8.496473626521145e-06, "loss": 0.6041, "step": 23077 }, { "epoch": 0.2759239110941068, "grad_norm": 7.1189866065979, "learning_rate": 8.496335219902844e-06, "loss": 0.6526, "step": 23078 }, { "epoch": 0.27593586723896746, "grad_norm": 2.7521071434020996, "learning_rate": 8.496196808041779e-06, "loss": 0.6084, "step": 23079 }, { "epoch": 0.2759478233838281, "grad_norm": 3.0807502269744873, "learning_rate": 8.496058390938155e-06, "loss": 0.5162, "step": 23080 }, { "epoch": 0.2759597795286888, "grad_norm": 2.810171365737915, "learning_rate": 8.495919968592177e-06, "loss": 0.5703, "step": 23081 }, { "epoch": 0.27597173567354943, "grad_norm": 3.1732351779937744, "learning_rate": 8.495781541004055e-06, "loss": 0.559, "step": 23082 }, { "epoch": 0.2759836918184101, "grad_norm": 2.741819381713867, "learning_rate": 8.495643108173996e-06, "loss": 0.5525, "step": 23083 }, { "epoch": 0.2759956479632707, "grad_norm": 2.165968894958496, "learning_rate": 8.495504670102208e-06, "loss": 0.6036, "step": 23084 }, { "epoch": 0.27600760410813135, "grad_norm": 8.421060562133789, "learning_rate": 8.4953662267889e-06, "loss": 0.5287, "step": 23085 }, { "epoch": 0.276019560252992, "grad_norm": 2.118412733078003, "learning_rate": 8.495227778234277e-06, "loss": 0.5555, "step": 23086 }, { "epoch": 0.27603151639785267, "grad_norm": 2.2385401725769043, "learning_rate": 8.495089324438546e-06, "loss": 0.5961, "step": 23087 }, { "epoch": 0.27604347254271333, "grad_norm": 4.317897796630859, "learning_rate": 8.494950865401916e-06, "loss": 0.6273, "step": 23088 }, { "epoch": 0.276055428687574, "grad_norm": 2.3963699340820312, "learning_rate": 8.494812401124597e-06, "loss": 0.5991, "step": 23089 }, { "epoch": 0.27606738483243465, "grad_norm": 3.672175645828247, "learning_rate": 8.494673931606793e-06, "loss": 0.5879, "step": 23090 }, { "epoch": 0.2760793409772953, "grad_norm": 6.821648597717285, "learning_rate": 8.494535456848713e-06, "loss": 0.6037, "step": 23091 }, { "epoch": 0.2760912971221559, "grad_norm": 2.2293739318847656, "learning_rate": 8.494396976850565e-06, "loss": 0.5382, "step": 23092 }, { "epoch": 0.27610325326701657, "grad_norm": 15.059133529663086, "learning_rate": 8.494258491612555e-06, "loss": 0.4817, "step": 23093 }, { "epoch": 0.2761152094118772, "grad_norm": 2.413959503173828, "learning_rate": 8.494120001134894e-06, "loss": 0.5031, "step": 23094 }, { "epoch": 0.2761271655567379, "grad_norm": 2.263131856918335, "learning_rate": 8.493981505417787e-06, "loss": 0.5404, "step": 23095 }, { "epoch": 0.27613912170159854, "grad_norm": 6.935654163360596, "learning_rate": 8.493843004461442e-06, "loss": 0.528, "step": 23096 }, { "epoch": 0.2761510778464592, "grad_norm": 2.965367555618286, "learning_rate": 8.493704498266066e-06, "loss": 0.6775, "step": 23097 }, { "epoch": 0.27616303399131986, "grad_norm": 2.316509962081909, "learning_rate": 8.49356598683187e-06, "loss": 0.6065, "step": 23098 }, { "epoch": 0.27617499013618046, "grad_norm": 2.52687668800354, "learning_rate": 8.493427470159057e-06, "loss": 0.5768, "step": 23099 }, { "epoch": 0.2761869462810411, "grad_norm": 16.39228057861328, "learning_rate": 8.493288948247839e-06, "loss": 0.5855, "step": 23100 }, { "epoch": 0.2761989024259018, "grad_norm": 3.222996473312378, "learning_rate": 8.493150421098422e-06, "loss": 0.5352, "step": 23101 }, { "epoch": 0.27621085857076244, "grad_norm": 4.5467634201049805, "learning_rate": 8.493011888711012e-06, "loss": 0.54, "step": 23102 }, { "epoch": 0.2762228147156231, "grad_norm": 5.320845127105713, "learning_rate": 8.49287335108582e-06, "loss": 0.622, "step": 23103 }, { "epoch": 0.27623477086048376, "grad_norm": 3.628253936767578, "learning_rate": 8.492734808223048e-06, "loss": 0.6296, "step": 23104 }, { "epoch": 0.2762467270053444, "grad_norm": 4.064345836639404, "learning_rate": 8.492596260122911e-06, "loss": 0.652, "step": 23105 }, { "epoch": 0.2762586831502051, "grad_norm": 2.9300169944763184, "learning_rate": 8.492457706785612e-06, "loss": 0.6109, "step": 23106 }, { "epoch": 0.2762706392950657, "grad_norm": 3.0619640350341797, "learning_rate": 8.492319148211364e-06, "loss": 0.5898, "step": 23107 }, { "epoch": 0.27628259543992634, "grad_norm": 2.10640549659729, "learning_rate": 8.492180584400368e-06, "loss": 0.5487, "step": 23108 }, { "epoch": 0.276294551584787, "grad_norm": 1.770862102508545, "learning_rate": 8.492042015352836e-06, "loss": 0.5216, "step": 23109 }, { "epoch": 0.27630650772964765, "grad_norm": 7.3139872550964355, "learning_rate": 8.491903441068972e-06, "loss": 0.6175, "step": 23110 }, { "epoch": 0.2763184638745083, "grad_norm": 2.5332870483398438, "learning_rate": 8.491764861548988e-06, "loss": 0.6105, "step": 23111 }, { "epoch": 0.27633042001936897, "grad_norm": 14.014123916625977, "learning_rate": 8.49162627679309e-06, "loss": 0.5426, "step": 23112 }, { "epoch": 0.27634237616422963, "grad_norm": 2.2254817485809326, "learning_rate": 8.491487686801488e-06, "loss": 0.5459, "step": 23113 }, { "epoch": 0.27635433230909023, "grad_norm": 2.039705753326416, "learning_rate": 8.491349091574386e-06, "loss": 0.475, "step": 23114 }, { "epoch": 0.2763662884539509, "grad_norm": 1.7996174097061157, "learning_rate": 8.491210491111993e-06, "loss": 0.6206, "step": 23115 }, { "epoch": 0.27637824459881155, "grad_norm": 2.5973474979400635, "learning_rate": 8.491071885414517e-06, "loss": 0.6369, "step": 23116 }, { "epoch": 0.2763902007436722, "grad_norm": 8.881152153015137, "learning_rate": 8.490933274482169e-06, "loss": 0.5807, "step": 23117 }, { "epoch": 0.27640215688853287, "grad_norm": 2.102853775024414, "learning_rate": 8.490794658315152e-06, "loss": 0.5489, "step": 23118 }, { "epoch": 0.2764141130333935, "grad_norm": 2.434083938598633, "learning_rate": 8.490656036913677e-06, "loss": 0.6299, "step": 23119 }, { "epoch": 0.2764260691782542, "grad_norm": 1.6060388088226318, "learning_rate": 8.49051741027795e-06, "loss": 0.5946, "step": 23120 }, { "epoch": 0.2764380253231148, "grad_norm": 2.0701777935028076, "learning_rate": 8.49037877840818e-06, "loss": 0.5768, "step": 23121 }, { "epoch": 0.27644998146797545, "grad_norm": 3.557340383529663, "learning_rate": 8.490240141304576e-06, "loss": 0.5738, "step": 23122 }, { "epoch": 0.2764619376128361, "grad_norm": 3.2272047996520996, "learning_rate": 8.490101498967343e-06, "loss": 0.5938, "step": 23123 }, { "epoch": 0.27647389375769676, "grad_norm": 2.565131187438965, "learning_rate": 8.489962851396691e-06, "loss": 0.5773, "step": 23124 }, { "epoch": 0.2764858499025574, "grad_norm": 2.175934076309204, "learning_rate": 8.489824198592826e-06, "loss": 0.5847, "step": 23125 }, { "epoch": 0.2764978060474181, "grad_norm": 2.988029718399048, "learning_rate": 8.48968554055596e-06, "loss": 0.6001, "step": 23126 }, { "epoch": 0.27650976219227874, "grad_norm": 3.849553346633911, "learning_rate": 8.489546877286294e-06, "loss": 0.6415, "step": 23127 }, { "epoch": 0.2765217183371394, "grad_norm": 1.9310060739517212, "learning_rate": 8.489408208784042e-06, "loss": 0.6007, "step": 23128 }, { "epoch": 0.276533674482, "grad_norm": 6.830490589141846, "learning_rate": 8.489269535049412e-06, "loss": 0.6821, "step": 23129 }, { "epoch": 0.27654563062686066, "grad_norm": 8.080142974853516, "learning_rate": 8.489130856082608e-06, "loss": 0.5796, "step": 23130 }, { "epoch": 0.2765575867717213, "grad_norm": 3.0767323970794678, "learning_rate": 8.488992171883842e-06, "loss": 0.7221, "step": 23131 }, { "epoch": 0.276569542916582, "grad_norm": 3.580832004547119, "learning_rate": 8.488853482453317e-06, "loss": 0.631, "step": 23132 }, { "epoch": 0.27658149906144264, "grad_norm": 2.3840651512145996, "learning_rate": 8.488714787791243e-06, "loss": 0.6724, "step": 23133 }, { "epoch": 0.2765934552063033, "grad_norm": 2.2063331604003906, "learning_rate": 8.48857608789783e-06, "loss": 0.584, "step": 23134 }, { "epoch": 0.27660541135116395, "grad_norm": 4.508152484893799, "learning_rate": 8.488437382773287e-06, "loss": 0.67, "step": 23135 }, { "epoch": 0.27661736749602456, "grad_norm": 6.494954586029053, "learning_rate": 8.48829867241782e-06, "loss": 0.6558, "step": 23136 }, { "epoch": 0.2766293236408852, "grad_norm": 3.2722933292388916, "learning_rate": 8.488159956831632e-06, "loss": 0.6478, "step": 23137 }, { "epoch": 0.2766412797857459, "grad_norm": 2.8186635971069336, "learning_rate": 8.488021236014938e-06, "loss": 0.6347, "step": 23138 }, { "epoch": 0.27665323593060653, "grad_norm": 3.2485523223876953, "learning_rate": 8.487882509967946e-06, "loss": 0.6328, "step": 23139 }, { "epoch": 0.2766651920754672, "grad_norm": 2.6837494373321533, "learning_rate": 8.48774377869086e-06, "loss": 0.5811, "step": 23140 }, { "epoch": 0.27667714822032785, "grad_norm": 4.3801445960998535, "learning_rate": 8.487605042183891e-06, "loss": 0.6694, "step": 23141 }, { "epoch": 0.2766891043651885, "grad_norm": 2.2443699836730957, "learning_rate": 8.487466300447243e-06, "loss": 0.6449, "step": 23142 }, { "epoch": 0.2767010605100491, "grad_norm": 4.423727035522461, "learning_rate": 8.487327553481128e-06, "loss": 0.5892, "step": 23143 }, { "epoch": 0.27671301665490977, "grad_norm": 4.547608852386475, "learning_rate": 8.487188801285754e-06, "loss": 0.5742, "step": 23144 }, { "epoch": 0.27672497279977043, "grad_norm": 3.0738048553466797, "learning_rate": 8.48705004386133e-06, "loss": 0.6278, "step": 23145 }, { "epoch": 0.2767369289446311, "grad_norm": 4.1917948722839355, "learning_rate": 8.486911281208059e-06, "loss": 0.5636, "step": 23146 }, { "epoch": 0.27674888508949175, "grad_norm": 2.8207736015319824, "learning_rate": 8.486772513326152e-06, "loss": 0.5786, "step": 23147 }, { "epoch": 0.2767608412343524, "grad_norm": 3.576021432876587, "learning_rate": 8.486633740215819e-06, "loss": 0.5288, "step": 23148 }, { "epoch": 0.27677279737921306, "grad_norm": 1.8634660243988037, "learning_rate": 8.486494961877265e-06, "loss": 0.578, "step": 23149 }, { "epoch": 0.2767847535240737, "grad_norm": 3.5277507305145264, "learning_rate": 8.4863561783107e-06, "loss": 0.5832, "step": 23150 }, { "epoch": 0.2767967096689343, "grad_norm": 2.6112544536590576, "learning_rate": 8.48621738951633e-06, "loss": 0.6014, "step": 23151 }, { "epoch": 0.276808665813795, "grad_norm": 2.9360191822052, "learning_rate": 8.486078595494367e-06, "loss": 0.5582, "step": 23152 }, { "epoch": 0.27682062195865564, "grad_norm": 16.178251266479492, "learning_rate": 8.485939796245016e-06, "loss": 0.6571, "step": 23153 }, { "epoch": 0.2768325781035163, "grad_norm": 2.418165683746338, "learning_rate": 8.485800991768485e-06, "loss": 0.6083, "step": 23154 }, { "epoch": 0.27684453424837696, "grad_norm": 6.038896560668945, "learning_rate": 8.485662182064984e-06, "loss": 0.5833, "step": 23155 }, { "epoch": 0.2768564903932376, "grad_norm": 3.2742016315460205, "learning_rate": 8.485523367134718e-06, "loss": 0.5425, "step": 23156 }, { "epoch": 0.2768684465380983, "grad_norm": 7.072093963623047, "learning_rate": 8.4853845469779e-06, "loss": 0.5455, "step": 23157 }, { "epoch": 0.2768804026829589, "grad_norm": 2.4801814556121826, "learning_rate": 8.485245721594735e-06, "loss": 0.632, "step": 23158 }, { "epoch": 0.27689235882781954, "grad_norm": 2.386662721633911, "learning_rate": 8.48510689098543e-06, "loss": 0.6216, "step": 23159 }, { "epoch": 0.2769043149726802, "grad_norm": 3.2516231536865234, "learning_rate": 8.484968055150197e-06, "loss": 0.6795, "step": 23160 }, { "epoch": 0.27691627111754086, "grad_norm": 1.7284458875656128, "learning_rate": 8.48482921408924e-06, "loss": 0.5664, "step": 23161 }, { "epoch": 0.2769282272624015, "grad_norm": 2.670448064804077, "learning_rate": 8.484690367802771e-06, "loss": 0.498, "step": 23162 }, { "epoch": 0.2769401834072622, "grad_norm": 4.783660888671875, "learning_rate": 8.484551516290996e-06, "loss": 0.6447, "step": 23163 }, { "epoch": 0.27695213955212283, "grad_norm": 2.105708360671997, "learning_rate": 8.484412659554123e-06, "loss": 0.6954, "step": 23164 }, { "epoch": 0.2769640956969835, "grad_norm": 2.4345054626464844, "learning_rate": 8.484273797592361e-06, "loss": 0.7393, "step": 23165 }, { "epoch": 0.2769760518418441, "grad_norm": 2.8995494842529297, "learning_rate": 8.484134930405918e-06, "loss": 0.6026, "step": 23166 }, { "epoch": 0.27698800798670475, "grad_norm": 2.592346668243408, "learning_rate": 8.483996057995004e-06, "loss": 0.4881, "step": 23167 }, { "epoch": 0.2769999641315654, "grad_norm": 2.1149678230285645, "learning_rate": 8.483857180359822e-06, "loss": 0.5588, "step": 23168 }, { "epoch": 0.27701192027642607, "grad_norm": 6.124787330627441, "learning_rate": 8.483718297500586e-06, "loss": 0.6618, "step": 23169 }, { "epoch": 0.27702387642128673, "grad_norm": 2.3533356189727783, "learning_rate": 8.483579409417502e-06, "loss": 0.629, "step": 23170 }, { "epoch": 0.2770358325661474, "grad_norm": 9.123823165893555, "learning_rate": 8.483440516110778e-06, "loss": 0.5738, "step": 23171 }, { "epoch": 0.27704778871100805, "grad_norm": 2.737539291381836, "learning_rate": 8.483301617580624e-06, "loss": 0.7266, "step": 23172 }, { "epoch": 0.27705974485586865, "grad_norm": 3.5436112880706787, "learning_rate": 8.483162713827245e-06, "loss": 0.5579, "step": 23173 }, { "epoch": 0.2770717010007293, "grad_norm": 2.712024688720703, "learning_rate": 8.48302380485085e-06, "loss": 0.6212, "step": 23174 }, { "epoch": 0.27708365714558997, "grad_norm": 2.534628391265869, "learning_rate": 8.48288489065165e-06, "loss": 0.5704, "step": 23175 }, { "epoch": 0.2770956132904506, "grad_norm": 4.001523017883301, "learning_rate": 8.482745971229854e-06, "loss": 0.5735, "step": 23176 }, { "epoch": 0.2771075694353113, "grad_norm": 2.024156332015991, "learning_rate": 8.482607046585665e-06, "loss": 0.5742, "step": 23177 }, { "epoch": 0.27711952558017194, "grad_norm": 1.9458065032958984, "learning_rate": 8.482468116719295e-06, "loss": 0.6094, "step": 23178 }, { "epoch": 0.2771314817250326, "grad_norm": 5.608463764190674, "learning_rate": 8.48232918163095e-06, "loss": 0.6477, "step": 23179 }, { "epoch": 0.2771434378698932, "grad_norm": 2.53009295463562, "learning_rate": 8.482190241320843e-06, "loss": 0.6502, "step": 23180 }, { "epoch": 0.27715539401475386, "grad_norm": 4.339591979980469, "learning_rate": 8.48205129578918e-06, "loss": 0.5927, "step": 23181 }, { "epoch": 0.2771673501596145, "grad_norm": 3.290841579437256, "learning_rate": 8.481912345036164e-06, "loss": 0.6238, "step": 23182 }, { "epoch": 0.2771793063044752, "grad_norm": 2.45341157913208, "learning_rate": 8.481773389062013e-06, "loss": 0.491, "step": 23183 }, { "epoch": 0.27719126244933584, "grad_norm": 3.078150987625122, "learning_rate": 8.481634427866928e-06, "loss": 0.5434, "step": 23184 }, { "epoch": 0.2772032185941965, "grad_norm": 3.631910562515259, "learning_rate": 8.48149546145112e-06, "loss": 0.6098, "step": 23185 }, { "epoch": 0.27721517473905716, "grad_norm": 4.9711408615112305, "learning_rate": 8.481356489814797e-06, "loss": 0.5197, "step": 23186 }, { "epoch": 0.2772271308839178, "grad_norm": 2.841569423675537, "learning_rate": 8.481217512958168e-06, "loss": 0.5599, "step": 23187 }, { "epoch": 0.2772390870287784, "grad_norm": 2.3946352005004883, "learning_rate": 8.481078530881441e-06, "loss": 0.6412, "step": 23188 }, { "epoch": 0.2772510431736391, "grad_norm": 4.56093168258667, "learning_rate": 8.480939543584824e-06, "loss": 0.659, "step": 23189 }, { "epoch": 0.27726299931849974, "grad_norm": 2.3810348510742188, "learning_rate": 8.480800551068526e-06, "loss": 0.5836, "step": 23190 }, { "epoch": 0.2772749554633604, "grad_norm": 2.8043322563171387, "learning_rate": 8.480661553332757e-06, "loss": 0.5576, "step": 23191 }, { "epoch": 0.27728691160822105, "grad_norm": 3.2468955516815186, "learning_rate": 8.480522550377721e-06, "loss": 0.6421, "step": 23192 }, { "epoch": 0.2772988677530817, "grad_norm": 3.1431713104248047, "learning_rate": 8.48038354220363e-06, "loss": 0.6734, "step": 23193 }, { "epoch": 0.27731082389794237, "grad_norm": 6.001555919647217, "learning_rate": 8.480244528810691e-06, "loss": 0.6501, "step": 23194 }, { "epoch": 0.277322780042803, "grad_norm": 3.3531248569488525, "learning_rate": 8.480105510199112e-06, "loss": 0.6486, "step": 23195 }, { "epoch": 0.27733473618766363, "grad_norm": 2.600614070892334, "learning_rate": 8.479966486369104e-06, "loss": 0.5322, "step": 23196 }, { "epoch": 0.2773466923325243, "grad_norm": 2.801593542098999, "learning_rate": 8.479827457320874e-06, "loss": 0.7419, "step": 23197 }, { "epoch": 0.27735864847738495, "grad_norm": 4.969696044921875, "learning_rate": 8.47968842305463e-06, "loss": 0.4436, "step": 23198 }, { "epoch": 0.2773706046222456, "grad_norm": 2.4943065643310547, "learning_rate": 8.479549383570583e-06, "loss": 0.6332, "step": 23199 }, { "epoch": 0.27738256076710627, "grad_norm": 3.7708206176757812, "learning_rate": 8.479410338868937e-06, "loss": 0.5703, "step": 23200 }, { "epoch": 0.2773945169119669, "grad_norm": 3.7558605670928955, "learning_rate": 8.479271288949902e-06, "loss": 0.6372, "step": 23201 }, { "epoch": 0.27740647305682753, "grad_norm": 4.20701265335083, "learning_rate": 8.479132233813688e-06, "loss": 0.4999, "step": 23202 }, { "epoch": 0.2774184292016882, "grad_norm": 2.5740487575531006, "learning_rate": 8.478993173460503e-06, "loss": 0.5619, "step": 23203 }, { "epoch": 0.27743038534654885, "grad_norm": 2.1409053802490234, "learning_rate": 8.478854107890558e-06, "loss": 0.5737, "step": 23204 }, { "epoch": 0.2774423414914095, "grad_norm": 2.6997132301330566, "learning_rate": 8.478715037104055e-06, "loss": 0.6317, "step": 23205 }, { "epoch": 0.27745429763627016, "grad_norm": 1.9810558557510376, "learning_rate": 8.478575961101208e-06, "loss": 0.6598, "step": 23206 }, { "epoch": 0.2774662537811308, "grad_norm": 1.6249667406082153, "learning_rate": 8.478436879882223e-06, "loss": 0.5814, "step": 23207 }, { "epoch": 0.2774782099259915, "grad_norm": 3.27842378616333, "learning_rate": 8.478297793447312e-06, "loss": 0.5274, "step": 23208 }, { "epoch": 0.27749016607085214, "grad_norm": 2.953721761703491, "learning_rate": 8.47815870179668e-06, "loss": 0.5903, "step": 23209 }, { "epoch": 0.27750212221571274, "grad_norm": 3.760796546936035, "learning_rate": 8.478019604930536e-06, "loss": 0.5154, "step": 23210 }, { "epoch": 0.2775140783605734, "grad_norm": 2.8210153579711914, "learning_rate": 8.477880502849089e-06, "loss": 0.5974, "step": 23211 }, { "epoch": 0.27752603450543406, "grad_norm": 2.8118293285369873, "learning_rate": 8.477741395552547e-06, "loss": 0.5868, "step": 23212 }, { "epoch": 0.2775379906502947, "grad_norm": 1.9033852815628052, "learning_rate": 8.477602283041122e-06, "loss": 0.5945, "step": 23213 }, { "epoch": 0.2775499467951554, "grad_norm": 2.8095922470092773, "learning_rate": 8.477463165315019e-06, "loss": 0.627, "step": 23214 }, { "epoch": 0.27756190294001604, "grad_norm": 4.885743618011475, "learning_rate": 8.477324042374446e-06, "loss": 0.6534, "step": 23215 }, { "epoch": 0.2775738590848767, "grad_norm": 4.375583171844482, "learning_rate": 8.477184914219616e-06, "loss": 0.5097, "step": 23216 }, { "epoch": 0.2775858152297373, "grad_norm": 2.477302074432373, "learning_rate": 8.477045780850733e-06, "loss": 0.5829, "step": 23217 }, { "epoch": 0.27759777137459796, "grad_norm": 2.5739047527313232, "learning_rate": 8.476906642268007e-06, "loss": 0.6881, "step": 23218 }, { "epoch": 0.2776097275194586, "grad_norm": 2.2478792667388916, "learning_rate": 8.476767498471649e-06, "loss": 0.5554, "step": 23219 }, { "epoch": 0.2776216836643193, "grad_norm": 2.649251937866211, "learning_rate": 8.476628349461865e-06, "loss": 0.6377, "step": 23220 }, { "epoch": 0.27763363980917993, "grad_norm": 5.328951835632324, "learning_rate": 8.476489195238864e-06, "loss": 0.6019, "step": 23221 }, { "epoch": 0.2776455959540406, "grad_norm": 5.420260429382324, "learning_rate": 8.476350035802857e-06, "loss": 0.6376, "step": 23222 }, { "epoch": 0.27765755209890125, "grad_norm": 4.197351455688477, "learning_rate": 8.476210871154048e-06, "loss": 0.6235, "step": 23223 }, { "epoch": 0.2776695082437619, "grad_norm": 2.3559439182281494, "learning_rate": 8.47607170129265e-06, "loss": 0.6226, "step": 23224 }, { "epoch": 0.2776814643886225, "grad_norm": 2.4927384853363037, "learning_rate": 8.475932526218868e-06, "loss": 0.6485, "step": 23225 }, { "epoch": 0.27769342053348317, "grad_norm": 1.7445040941238403, "learning_rate": 8.475793345932916e-06, "loss": 0.5729, "step": 23226 }, { "epoch": 0.27770537667834383, "grad_norm": 3.0895678997039795, "learning_rate": 8.475654160434998e-06, "loss": 0.6086, "step": 23227 }, { "epoch": 0.2777173328232045, "grad_norm": 3.795445203781128, "learning_rate": 8.475514969725324e-06, "loss": 0.5626, "step": 23228 }, { "epoch": 0.27772928896806515, "grad_norm": 7.876209735870361, "learning_rate": 8.475375773804101e-06, "loss": 0.6271, "step": 23229 }, { "epoch": 0.2777412451129258, "grad_norm": 1.5363177061080933, "learning_rate": 8.475236572671544e-06, "loss": 0.6595, "step": 23230 }, { "epoch": 0.27775320125778646, "grad_norm": 3.583296537399292, "learning_rate": 8.475097366327856e-06, "loss": 0.598, "step": 23231 }, { "epoch": 0.27776515740264707, "grad_norm": 3.1788220405578613, "learning_rate": 8.474958154773244e-06, "loss": 0.6079, "step": 23232 }, { "epoch": 0.2777771135475077, "grad_norm": 4.1609110832214355, "learning_rate": 8.474818938007924e-06, "loss": 0.6109, "step": 23233 }, { "epoch": 0.2777890696923684, "grad_norm": 6.745570659637451, "learning_rate": 8.4746797160321e-06, "loss": 0.6696, "step": 23234 }, { "epoch": 0.27780102583722904, "grad_norm": 5.688290596008301, "learning_rate": 8.47454048884598e-06, "loss": 0.6097, "step": 23235 }, { "epoch": 0.2778129819820897, "grad_norm": 2.288332939147949, "learning_rate": 8.474401256449775e-06, "loss": 0.5984, "step": 23236 }, { "epoch": 0.27782493812695036, "grad_norm": 4.547853469848633, "learning_rate": 8.47426201884369e-06, "loss": 0.573, "step": 23237 }, { "epoch": 0.277836894271811, "grad_norm": 3.1544547080993652, "learning_rate": 8.474122776027941e-06, "loss": 0.6101, "step": 23238 }, { "epoch": 0.2778488504166716, "grad_norm": 1.8020870685577393, "learning_rate": 8.47398352800273e-06, "loss": 0.5024, "step": 23239 }, { "epoch": 0.2778608065615323, "grad_norm": 2.8610680103302, "learning_rate": 8.473844274768271e-06, "loss": 0.5466, "step": 23240 }, { "epoch": 0.27787276270639294, "grad_norm": 4.800365924835205, "learning_rate": 8.473705016324766e-06, "loss": 0.5228, "step": 23241 }, { "epoch": 0.2778847188512536, "grad_norm": 2.0998997688293457, "learning_rate": 8.473565752672432e-06, "loss": 0.5626, "step": 23242 }, { "epoch": 0.27789667499611426, "grad_norm": 10.9515380859375, "learning_rate": 8.47342648381147e-06, "loss": 0.6135, "step": 23243 }, { "epoch": 0.2779086311409749, "grad_norm": 2.662119150161743, "learning_rate": 8.473287209742097e-06, "loss": 0.6349, "step": 23244 }, { "epoch": 0.2779205872858356, "grad_norm": 2.117035150527954, "learning_rate": 8.473147930464516e-06, "loss": 0.6374, "step": 23245 }, { "epoch": 0.27793254343069623, "grad_norm": 2.6583540439605713, "learning_rate": 8.473008645978937e-06, "loss": 0.5819, "step": 23246 }, { "epoch": 0.27794449957555684, "grad_norm": 4.054573059082031, "learning_rate": 8.472869356285568e-06, "loss": 0.5839, "step": 23247 }, { "epoch": 0.2779564557204175, "grad_norm": 2.3939008712768555, "learning_rate": 8.47273006138462e-06, "loss": 0.6083, "step": 23248 }, { "epoch": 0.27796841186527815, "grad_norm": 1.8485263586044312, "learning_rate": 8.472590761276301e-06, "loss": 0.5916, "step": 23249 }, { "epoch": 0.2779803680101388, "grad_norm": 5.752583026885986, "learning_rate": 8.47245145596082e-06, "loss": 0.5891, "step": 23250 }, { "epoch": 0.27799232415499947, "grad_norm": 3.4360101222991943, "learning_rate": 8.472312145438384e-06, "loss": 0.5897, "step": 23251 }, { "epoch": 0.27800428029986013, "grad_norm": 3.946624994277954, "learning_rate": 8.472172829709206e-06, "loss": 0.6244, "step": 23252 }, { "epoch": 0.2780162364447208, "grad_norm": 2.1154677867889404, "learning_rate": 8.472033508773492e-06, "loss": 0.5115, "step": 23253 }, { "epoch": 0.2780281925895814, "grad_norm": 2.675544261932373, "learning_rate": 8.47189418263145e-06, "loss": 0.5845, "step": 23254 }, { "epoch": 0.27804014873444205, "grad_norm": 2.517796277999878, "learning_rate": 8.47175485128329e-06, "loss": 0.6025, "step": 23255 }, { "epoch": 0.2780521048793027, "grad_norm": 24.165042877197266, "learning_rate": 8.471615514729223e-06, "loss": 0.5923, "step": 23256 }, { "epoch": 0.27806406102416337, "grad_norm": 3.6039998531341553, "learning_rate": 8.471476172969455e-06, "loss": 0.571, "step": 23257 }, { "epoch": 0.278076017169024, "grad_norm": 3.306262969970703, "learning_rate": 8.471336826004197e-06, "loss": 0.5926, "step": 23258 }, { "epoch": 0.2780879733138847, "grad_norm": 1.9402459859848022, "learning_rate": 8.471197473833657e-06, "loss": 0.508, "step": 23259 }, { "epoch": 0.27809992945874534, "grad_norm": 3.983478546142578, "learning_rate": 8.471058116458044e-06, "loss": 0.5836, "step": 23260 }, { "epoch": 0.27811188560360595, "grad_norm": 9.888154029846191, "learning_rate": 8.470918753877568e-06, "loss": 0.5659, "step": 23261 }, { "epoch": 0.2781238417484666, "grad_norm": 5.539229869842529, "learning_rate": 8.470779386092435e-06, "loss": 0.6922, "step": 23262 }, { "epoch": 0.27813579789332726, "grad_norm": 3.369483470916748, "learning_rate": 8.470640013102857e-06, "loss": 0.6888, "step": 23263 }, { "epoch": 0.2781477540381879, "grad_norm": 4.27647590637207, "learning_rate": 8.470500634909043e-06, "loss": 0.6264, "step": 23264 }, { "epoch": 0.2781597101830486, "grad_norm": 2.577814817428589, "learning_rate": 8.470361251511198e-06, "loss": 0.6444, "step": 23265 }, { "epoch": 0.27817166632790924, "grad_norm": 49.07487106323242, "learning_rate": 8.470221862909536e-06, "loss": 0.5357, "step": 23266 }, { "epoch": 0.2781836224727699, "grad_norm": 3.2358710765838623, "learning_rate": 8.470082469104264e-06, "loss": 0.643, "step": 23267 }, { "epoch": 0.27819557861763056, "grad_norm": 2.2955899238586426, "learning_rate": 8.469943070095592e-06, "loss": 0.6407, "step": 23268 }, { "epoch": 0.27820753476249116, "grad_norm": 3.367846727371216, "learning_rate": 8.469803665883726e-06, "loss": 0.6118, "step": 23269 }, { "epoch": 0.2782194909073518, "grad_norm": 2.9782726764678955, "learning_rate": 8.469664256468877e-06, "loss": 0.6278, "step": 23270 }, { "epoch": 0.2782314470522125, "grad_norm": 3.2121338844299316, "learning_rate": 8.469524841851255e-06, "loss": 0.5296, "step": 23271 }, { "epoch": 0.27824340319707314, "grad_norm": 2.8420300483703613, "learning_rate": 8.469385422031069e-06, "loss": 0.5689, "step": 23272 }, { "epoch": 0.2782553593419338, "grad_norm": 4.543376922607422, "learning_rate": 8.469245997008526e-06, "loss": 0.6109, "step": 23273 }, { "epoch": 0.27826731548679445, "grad_norm": 2.350358486175537, "learning_rate": 8.469106566783836e-06, "loss": 0.6095, "step": 23274 }, { "epoch": 0.2782792716316551, "grad_norm": 2.466294527053833, "learning_rate": 8.468967131357208e-06, "loss": 0.6002, "step": 23275 }, { "epoch": 0.2782912277765157, "grad_norm": 5.315056800842285, "learning_rate": 8.468827690728854e-06, "loss": 0.6886, "step": 23276 }, { "epoch": 0.2783031839213764, "grad_norm": 2.6257898807525635, "learning_rate": 8.46868824489898e-06, "loss": 0.5903, "step": 23277 }, { "epoch": 0.27831514006623703, "grad_norm": 2.8893141746520996, "learning_rate": 8.468548793867794e-06, "loss": 0.6413, "step": 23278 }, { "epoch": 0.2783270962110977, "grad_norm": 2.112813711166382, "learning_rate": 8.468409337635508e-06, "loss": 0.6279, "step": 23279 }, { "epoch": 0.27833905235595835, "grad_norm": 3.152195453643799, "learning_rate": 8.46826987620233e-06, "loss": 0.6864, "step": 23280 }, { "epoch": 0.278351008500819, "grad_norm": 5.353691577911377, "learning_rate": 8.468130409568467e-06, "loss": 0.5963, "step": 23281 }, { "epoch": 0.27836296464567967, "grad_norm": 3.412304162979126, "learning_rate": 8.467990937734132e-06, "loss": 0.5969, "step": 23282 }, { "epoch": 0.2783749207905403, "grad_norm": 2.216529607772827, "learning_rate": 8.467851460699531e-06, "loss": 0.6979, "step": 23283 }, { "epoch": 0.27838687693540093, "grad_norm": 2.498060464859009, "learning_rate": 8.467711978464875e-06, "loss": 0.6594, "step": 23284 }, { "epoch": 0.2783988330802616, "grad_norm": 1.8153693675994873, "learning_rate": 8.467572491030373e-06, "loss": 0.5809, "step": 23285 }, { "epoch": 0.27841078922512225, "grad_norm": 1.9571678638458252, "learning_rate": 8.467432998396233e-06, "loss": 0.7067, "step": 23286 }, { "epoch": 0.2784227453699829, "grad_norm": 12.87704086303711, "learning_rate": 8.467293500562665e-06, "loss": 0.6393, "step": 23287 }, { "epoch": 0.27843470151484356, "grad_norm": 4.257476329803467, "learning_rate": 8.467153997529879e-06, "loss": 0.534, "step": 23288 }, { "epoch": 0.2784466576597042, "grad_norm": 3.0480213165283203, "learning_rate": 8.467014489298083e-06, "loss": 0.5903, "step": 23289 }, { "epoch": 0.2784586138045649, "grad_norm": 19.507139205932617, "learning_rate": 8.466874975867484e-06, "loss": 0.5172, "step": 23290 }, { "epoch": 0.2784705699494255, "grad_norm": 2.0944387912750244, "learning_rate": 8.466735457238297e-06, "loss": 0.7042, "step": 23291 }, { "epoch": 0.27848252609428614, "grad_norm": 4.682381629943848, "learning_rate": 8.466595933410725e-06, "loss": 0.6231, "step": 23292 }, { "epoch": 0.2784944822391468, "grad_norm": 5.579942226409912, "learning_rate": 8.466456404384983e-06, "loss": 0.5702, "step": 23293 }, { "epoch": 0.27850643838400746, "grad_norm": 3.005762815475464, "learning_rate": 8.466316870161278e-06, "loss": 0.5656, "step": 23294 }, { "epoch": 0.2785183945288681, "grad_norm": 3.0082058906555176, "learning_rate": 8.466177330739816e-06, "loss": 0.6801, "step": 23295 }, { "epoch": 0.2785303506737288, "grad_norm": 3.125930070877075, "learning_rate": 8.466037786120808e-06, "loss": 0.6572, "step": 23296 }, { "epoch": 0.27854230681858944, "grad_norm": 2.9794528484344482, "learning_rate": 8.465898236304465e-06, "loss": 0.5633, "step": 23297 }, { "epoch": 0.27855426296345004, "grad_norm": 4.591870307922363, "learning_rate": 8.465758681290996e-06, "loss": 0.5984, "step": 23298 }, { "epoch": 0.2785662191083107, "grad_norm": 6.426731586456299, "learning_rate": 8.46561912108061e-06, "loss": 0.5739, "step": 23299 }, { "epoch": 0.27857817525317136, "grad_norm": 1.7359733581542969, "learning_rate": 8.465479555673515e-06, "loss": 0.6926, "step": 23300 }, { "epoch": 0.278590131398032, "grad_norm": 1.8973275423049927, "learning_rate": 8.46533998506992e-06, "loss": 0.6151, "step": 23301 }, { "epoch": 0.2786020875428927, "grad_norm": 3.876720666885376, "learning_rate": 8.465200409270035e-06, "loss": 0.6266, "step": 23302 }, { "epoch": 0.27861404368775333, "grad_norm": 3.0843939781188965, "learning_rate": 8.465060828274071e-06, "loss": 0.5544, "step": 23303 }, { "epoch": 0.278625999832614, "grad_norm": 2.5116541385650635, "learning_rate": 8.464921242082238e-06, "loss": 0.5765, "step": 23304 }, { "epoch": 0.27863795597747465, "grad_norm": 9.813802719116211, "learning_rate": 8.46478165069474e-06, "loss": 0.574, "step": 23305 }, { "epoch": 0.27864991212233525, "grad_norm": 4.98707389831543, "learning_rate": 8.464642054111792e-06, "loss": 0.6625, "step": 23306 }, { "epoch": 0.2786618682671959, "grad_norm": 2.3991827964782715, "learning_rate": 8.464502452333599e-06, "loss": 0.5808, "step": 23307 }, { "epoch": 0.27867382441205657, "grad_norm": 2.722297430038452, "learning_rate": 8.464362845360375e-06, "loss": 0.5822, "step": 23308 }, { "epoch": 0.27868578055691723, "grad_norm": 7.540282249450684, "learning_rate": 8.464223233192323e-06, "loss": 0.6928, "step": 23309 }, { "epoch": 0.2786977367017779, "grad_norm": 3.237614393234253, "learning_rate": 8.464083615829657e-06, "loss": 0.5688, "step": 23310 }, { "epoch": 0.27870969284663855, "grad_norm": 3.8936686515808105, "learning_rate": 8.463943993272586e-06, "loss": 0.5011, "step": 23311 }, { "epoch": 0.2787216489914992, "grad_norm": 3.2909183502197266, "learning_rate": 8.46380436552132e-06, "loss": 0.5796, "step": 23312 }, { "epoch": 0.2787336051363598, "grad_norm": 3.4176595211029053, "learning_rate": 8.463664732576065e-06, "loss": 0.5363, "step": 23313 }, { "epoch": 0.27874556128122047, "grad_norm": 4.095933437347412, "learning_rate": 8.463525094437033e-06, "loss": 0.6105, "step": 23314 }, { "epoch": 0.2787575174260811, "grad_norm": 7.467504978179932, "learning_rate": 8.463385451104433e-06, "loss": 0.6541, "step": 23315 }, { "epoch": 0.2787694735709418, "grad_norm": 3.6589229106903076, "learning_rate": 8.463245802578473e-06, "loss": 0.598, "step": 23316 }, { "epoch": 0.27878142971580244, "grad_norm": 3.4060769081115723, "learning_rate": 8.463106148859366e-06, "loss": 0.6422, "step": 23317 }, { "epoch": 0.2787933858606631, "grad_norm": 2.031778573989868, "learning_rate": 8.462966489947317e-06, "loss": 0.5917, "step": 23318 }, { "epoch": 0.27880534200552376, "grad_norm": 2.9595537185668945, "learning_rate": 8.462826825842538e-06, "loss": 0.5968, "step": 23319 }, { "epoch": 0.27881729815038436, "grad_norm": 5.240101337432861, "learning_rate": 8.462687156545237e-06, "loss": 0.5783, "step": 23320 }, { "epoch": 0.278829254295245, "grad_norm": 3.7590675354003906, "learning_rate": 8.462547482055626e-06, "loss": 0.5802, "step": 23321 }, { "epoch": 0.2788412104401057, "grad_norm": 3.421689033508301, "learning_rate": 8.462407802373912e-06, "loss": 0.6299, "step": 23322 }, { "epoch": 0.27885316658496634, "grad_norm": 3.185089111328125, "learning_rate": 8.462268117500305e-06, "loss": 0.5193, "step": 23323 }, { "epoch": 0.278865122729827, "grad_norm": 2.6472997665405273, "learning_rate": 8.462128427435014e-06, "loss": 0.5397, "step": 23324 }, { "epoch": 0.27887707887468766, "grad_norm": 2.560565948486328, "learning_rate": 8.461988732178251e-06, "loss": 0.5432, "step": 23325 }, { "epoch": 0.2788890350195483, "grad_norm": 2.000948190689087, "learning_rate": 8.461849031730223e-06, "loss": 0.5719, "step": 23326 }, { "epoch": 0.278900991164409, "grad_norm": 2.288994073867798, "learning_rate": 8.461709326091139e-06, "loss": 0.6511, "step": 23327 }, { "epoch": 0.2789129473092696, "grad_norm": 3.464613676071167, "learning_rate": 8.461569615261211e-06, "loss": 0.6421, "step": 23328 }, { "epoch": 0.27892490345413024, "grad_norm": 3.023242950439453, "learning_rate": 8.461429899240646e-06, "loss": 0.6049, "step": 23329 }, { "epoch": 0.2789368595989909, "grad_norm": 3.9703781604766846, "learning_rate": 8.461290178029655e-06, "loss": 0.7164, "step": 23330 }, { "epoch": 0.27894881574385155, "grad_norm": 3.428290605545044, "learning_rate": 8.461150451628448e-06, "loss": 0.5669, "step": 23331 }, { "epoch": 0.2789607718887122, "grad_norm": 2.0174357891082764, "learning_rate": 8.461010720037231e-06, "loss": 0.5502, "step": 23332 }, { "epoch": 0.27897272803357287, "grad_norm": 6.690379619598389, "learning_rate": 8.460870983256218e-06, "loss": 0.5648, "step": 23333 }, { "epoch": 0.27898468417843353, "grad_norm": 5.282708644866943, "learning_rate": 8.460731241285616e-06, "loss": 0.661, "step": 23334 }, { "epoch": 0.27899664032329413, "grad_norm": 4.45827579498291, "learning_rate": 8.460591494125637e-06, "loss": 0.6724, "step": 23335 }, { "epoch": 0.2790085964681548, "grad_norm": 3.8326292037963867, "learning_rate": 8.460451741776487e-06, "loss": 0.5666, "step": 23336 }, { "epoch": 0.27902055261301545, "grad_norm": 3.230458974838257, "learning_rate": 8.460311984238376e-06, "loss": 0.639, "step": 23337 }, { "epoch": 0.2790325087578761, "grad_norm": 6.338900566101074, "learning_rate": 8.460172221511517e-06, "loss": 0.5964, "step": 23338 }, { "epoch": 0.27904446490273677, "grad_norm": 7.446452617645264, "learning_rate": 8.460032453596116e-06, "loss": 0.6306, "step": 23339 }, { "epoch": 0.2790564210475974, "grad_norm": 2.4297163486480713, "learning_rate": 8.459892680492387e-06, "loss": 0.7158, "step": 23340 }, { "epoch": 0.2790683771924581, "grad_norm": 5.23828125, "learning_rate": 8.459752902200534e-06, "loss": 0.6512, "step": 23341 }, { "epoch": 0.27908033333731874, "grad_norm": 5.893354892730713, "learning_rate": 8.459613118720768e-06, "loss": 0.6676, "step": 23342 }, { "epoch": 0.27909228948217935, "grad_norm": 3.7518351078033447, "learning_rate": 8.459473330053303e-06, "loss": 0.645, "step": 23343 }, { "epoch": 0.27910424562704, "grad_norm": 2.974682092666626, "learning_rate": 8.459333536198345e-06, "loss": 0.6435, "step": 23344 }, { "epoch": 0.27911620177190066, "grad_norm": 4.591875076293945, "learning_rate": 8.459193737156102e-06, "loss": 0.6474, "step": 23345 }, { "epoch": 0.2791281579167613, "grad_norm": 2.115865468978882, "learning_rate": 8.459053932926786e-06, "loss": 0.6224, "step": 23346 }, { "epoch": 0.279140114061622, "grad_norm": 2.1504580974578857, "learning_rate": 8.458914123510609e-06, "loss": 0.5585, "step": 23347 }, { "epoch": 0.27915207020648264, "grad_norm": 1.7925376892089844, "learning_rate": 8.458774308907776e-06, "loss": 0.6318, "step": 23348 }, { "epoch": 0.2791640263513433, "grad_norm": 2.3918113708496094, "learning_rate": 8.458634489118498e-06, "loss": 0.6517, "step": 23349 }, { "epoch": 0.2791759824962039, "grad_norm": 7.079801082611084, "learning_rate": 8.458494664142986e-06, "loss": 0.5568, "step": 23350 }, { "epoch": 0.27918793864106456, "grad_norm": 8.464107513427734, "learning_rate": 8.45835483398145e-06, "loss": 0.6697, "step": 23351 }, { "epoch": 0.2791998947859252, "grad_norm": 2.5628507137298584, "learning_rate": 8.458214998634097e-06, "loss": 0.6984, "step": 23352 }, { "epoch": 0.2792118509307859, "grad_norm": 3.3826169967651367, "learning_rate": 8.458075158101139e-06, "loss": 0.6673, "step": 23353 }, { "epoch": 0.27922380707564654, "grad_norm": 1.995269775390625, "learning_rate": 8.457935312382784e-06, "loss": 0.6345, "step": 23354 }, { "epoch": 0.2792357632205072, "grad_norm": 7.046656131744385, "learning_rate": 8.457795461479244e-06, "loss": 0.6281, "step": 23355 }, { "epoch": 0.27924771936536785, "grad_norm": 2.003178119659424, "learning_rate": 8.457655605390726e-06, "loss": 0.5399, "step": 23356 }, { "epoch": 0.27925967551022846, "grad_norm": 4.757135391235352, "learning_rate": 8.457515744117444e-06, "loss": 0.5913, "step": 23357 }, { "epoch": 0.2792716316550891, "grad_norm": 2.736562728881836, "learning_rate": 8.457375877659603e-06, "loss": 0.5788, "step": 23358 }, { "epoch": 0.2792835877999498, "grad_norm": 5.173031330108643, "learning_rate": 8.457236006017415e-06, "loss": 0.6605, "step": 23359 }, { "epoch": 0.27929554394481043, "grad_norm": 12.12258529663086, "learning_rate": 8.457096129191088e-06, "loss": 0.5654, "step": 23360 }, { "epoch": 0.2793075000896711, "grad_norm": 3.37395977973938, "learning_rate": 8.456956247180835e-06, "loss": 0.6364, "step": 23361 }, { "epoch": 0.27931945623453175, "grad_norm": 2.9726505279541016, "learning_rate": 8.456816359986862e-06, "loss": 0.5871, "step": 23362 }, { "epoch": 0.2793314123793924, "grad_norm": 2.8983116149902344, "learning_rate": 8.456676467609381e-06, "loss": 0.5891, "step": 23363 }, { "epoch": 0.27934336852425307, "grad_norm": 2.2165334224700928, "learning_rate": 8.456536570048602e-06, "loss": 0.5374, "step": 23364 }, { "epoch": 0.27935532466911367, "grad_norm": 2.7550711631774902, "learning_rate": 8.456396667304734e-06, "loss": 0.5765, "step": 23365 }, { "epoch": 0.27936728081397433, "grad_norm": 4.099065780639648, "learning_rate": 8.456256759377987e-06, "loss": 0.5614, "step": 23366 }, { "epoch": 0.279379236958835, "grad_norm": 3.1918787956237793, "learning_rate": 8.456116846268572e-06, "loss": 0.5576, "step": 23367 }, { "epoch": 0.27939119310369565, "grad_norm": 1.6222134828567505, "learning_rate": 8.455976927976694e-06, "loss": 0.562, "step": 23368 }, { "epoch": 0.2794031492485563, "grad_norm": 3.5455574989318848, "learning_rate": 8.455837004502569e-06, "loss": 0.6467, "step": 23369 }, { "epoch": 0.27941510539341696, "grad_norm": 2.71187686920166, "learning_rate": 8.455697075846403e-06, "loss": 0.6482, "step": 23370 }, { "epoch": 0.2794270615382776, "grad_norm": 2.4509222507476807, "learning_rate": 8.455557142008407e-06, "loss": 0.5961, "step": 23371 }, { "epoch": 0.2794390176831382, "grad_norm": 2.613433599472046, "learning_rate": 8.455417202988791e-06, "loss": 0.5201, "step": 23372 }, { "epoch": 0.2794509738279989, "grad_norm": 5.857083320617676, "learning_rate": 8.455277258787767e-06, "loss": 0.5276, "step": 23373 }, { "epoch": 0.27946292997285954, "grad_norm": 2.465834140777588, "learning_rate": 8.45513730940554e-06, "loss": 0.5756, "step": 23374 }, { "epoch": 0.2794748861177202, "grad_norm": 2.1350479125976562, "learning_rate": 8.454997354842323e-06, "loss": 0.5636, "step": 23375 }, { "epoch": 0.27948684226258086, "grad_norm": 3.4951303005218506, "learning_rate": 8.454857395098323e-06, "loss": 0.5806, "step": 23376 }, { "epoch": 0.2794987984074415, "grad_norm": 4.173470973968506, "learning_rate": 8.454717430173755e-06, "loss": 0.655, "step": 23377 }, { "epoch": 0.2795107545523022, "grad_norm": 2.240750789642334, "learning_rate": 8.454577460068824e-06, "loss": 0.583, "step": 23378 }, { "epoch": 0.2795227106971628, "grad_norm": 2.8100194931030273, "learning_rate": 8.454437484783743e-06, "loss": 0.5905, "step": 23379 }, { "epoch": 0.27953466684202344, "grad_norm": 4.976795196533203, "learning_rate": 8.45429750431872e-06, "loss": 0.7025, "step": 23380 }, { "epoch": 0.2795466229868841, "grad_norm": 2.6715753078460693, "learning_rate": 8.454157518673966e-06, "loss": 0.6291, "step": 23381 }, { "epoch": 0.27955857913174476, "grad_norm": 2.7083287239074707, "learning_rate": 8.454017527849691e-06, "loss": 0.5888, "step": 23382 }, { "epoch": 0.2795705352766054, "grad_norm": 2.7480275630950928, "learning_rate": 8.453877531846103e-06, "loss": 0.6537, "step": 23383 }, { "epoch": 0.2795824914214661, "grad_norm": 2.466397285461426, "learning_rate": 8.453737530663415e-06, "loss": 0.6595, "step": 23384 }, { "epoch": 0.27959444756632673, "grad_norm": 1.9819926023483276, "learning_rate": 8.453597524301834e-06, "loss": 0.6784, "step": 23385 }, { "epoch": 0.2796064037111874, "grad_norm": 9.520577430725098, "learning_rate": 8.453457512761571e-06, "loss": 0.6022, "step": 23386 }, { "epoch": 0.279618359856048, "grad_norm": 2.4385857582092285, "learning_rate": 8.453317496042839e-06, "loss": 0.5648, "step": 23387 }, { "epoch": 0.27963031600090865, "grad_norm": 2.635378122329712, "learning_rate": 8.453177474145841e-06, "loss": 0.5844, "step": 23388 }, { "epoch": 0.2796422721457693, "grad_norm": 2.8157482147216797, "learning_rate": 8.453037447070794e-06, "loss": 0.7104, "step": 23389 }, { "epoch": 0.27965422829062997, "grad_norm": 6.71814489364624, "learning_rate": 8.452897414817904e-06, "loss": 0.6276, "step": 23390 }, { "epoch": 0.27966618443549063, "grad_norm": 4.774880409240723, "learning_rate": 8.452757377387383e-06, "loss": 0.6007, "step": 23391 }, { "epoch": 0.2796781405803513, "grad_norm": 2.1877036094665527, "learning_rate": 8.45261733477944e-06, "loss": 0.6161, "step": 23392 }, { "epoch": 0.27969009672521195, "grad_norm": 9.71313762664795, "learning_rate": 8.452477286994283e-06, "loss": 0.6351, "step": 23393 }, { "epoch": 0.27970205287007255, "grad_norm": 2.9355924129486084, "learning_rate": 8.452337234032127e-06, "loss": 0.6302, "step": 23394 }, { "epoch": 0.2797140090149332, "grad_norm": 2.1099960803985596, "learning_rate": 8.452197175893178e-06, "loss": 0.6014, "step": 23395 }, { "epoch": 0.27972596515979387, "grad_norm": 5.195544719696045, "learning_rate": 8.452057112577645e-06, "loss": 0.7223, "step": 23396 }, { "epoch": 0.2797379213046545, "grad_norm": 2.6268374919891357, "learning_rate": 8.451917044085742e-06, "loss": 0.5641, "step": 23397 }, { "epoch": 0.2797498774495152, "grad_norm": 1.548622488975525, "learning_rate": 8.451776970417677e-06, "loss": 0.5805, "step": 23398 }, { "epoch": 0.27976183359437584, "grad_norm": 3.585238456726074, "learning_rate": 8.45163689157366e-06, "loss": 0.6105, "step": 23399 }, { "epoch": 0.2797737897392365, "grad_norm": 1.6689201593399048, "learning_rate": 8.451496807553901e-06, "loss": 0.631, "step": 23400 }, { "epoch": 0.27978574588409716, "grad_norm": 2.4117207527160645, "learning_rate": 8.45135671835861e-06, "loss": 0.5538, "step": 23401 }, { "epoch": 0.27979770202895776, "grad_norm": 4.017586708068848, "learning_rate": 8.451216623987998e-06, "loss": 0.6324, "step": 23402 }, { "epoch": 0.2798096581738184, "grad_norm": 1.9535189867019653, "learning_rate": 8.451076524442274e-06, "loss": 0.6194, "step": 23403 }, { "epoch": 0.2798216143186791, "grad_norm": 2.123502254486084, "learning_rate": 8.45093641972165e-06, "loss": 0.5908, "step": 23404 }, { "epoch": 0.27983357046353974, "grad_norm": 2.296696901321411, "learning_rate": 8.450796309826332e-06, "loss": 0.6188, "step": 23405 }, { "epoch": 0.2798455266084004, "grad_norm": 141.75335693359375, "learning_rate": 8.450656194756535e-06, "loss": 0.533, "step": 23406 }, { "epoch": 0.27985748275326106, "grad_norm": 4.499929428100586, "learning_rate": 8.450516074512465e-06, "loss": 0.7698, "step": 23407 }, { "epoch": 0.2798694388981217, "grad_norm": 2.2030527591705322, "learning_rate": 8.450375949094334e-06, "loss": 0.5835, "step": 23408 }, { "epoch": 0.2798813950429823, "grad_norm": 2.4230356216430664, "learning_rate": 8.450235818502353e-06, "loss": 0.4881, "step": 23409 }, { "epoch": 0.279893351187843, "grad_norm": 33.9222297668457, "learning_rate": 8.45009568273673e-06, "loss": 0.664, "step": 23410 }, { "epoch": 0.27990530733270363, "grad_norm": 6.729655742645264, "learning_rate": 8.449955541797678e-06, "loss": 0.6013, "step": 23411 }, { "epoch": 0.2799172634775643, "grad_norm": 3.314301013946533, "learning_rate": 8.449815395685403e-06, "loss": 0.5793, "step": 23412 }, { "epoch": 0.27992921962242495, "grad_norm": 2.0587823390960693, "learning_rate": 8.44967524440012e-06, "loss": 0.6119, "step": 23413 }, { "epoch": 0.2799411757672856, "grad_norm": 2.1645095348358154, "learning_rate": 8.449535087942035e-06, "loss": 0.5961, "step": 23414 }, { "epoch": 0.27995313191214627, "grad_norm": 2.2227628231048584, "learning_rate": 8.449394926311361e-06, "loss": 0.5956, "step": 23415 }, { "epoch": 0.2799650880570069, "grad_norm": 6.547240734100342, "learning_rate": 8.449254759508306e-06, "loss": 0.6896, "step": 23416 }, { "epoch": 0.27997704420186753, "grad_norm": 3.025845766067505, "learning_rate": 8.44911458753308e-06, "loss": 0.6599, "step": 23417 }, { "epoch": 0.2799890003467282, "grad_norm": 4.226705551147461, "learning_rate": 8.448974410385898e-06, "loss": 0.6532, "step": 23418 }, { "epoch": 0.28000095649158885, "grad_norm": 2.3523612022399902, "learning_rate": 8.448834228066964e-06, "loss": 0.5611, "step": 23419 }, { "epoch": 0.2800129126364495, "grad_norm": 3.554401159286499, "learning_rate": 8.448694040576493e-06, "loss": 0.5917, "step": 23420 }, { "epoch": 0.28002486878131017, "grad_norm": 2.672524929046631, "learning_rate": 8.44855384791469e-06, "loss": 0.6407, "step": 23421 }, { "epoch": 0.2800368249261708, "grad_norm": 4.774472713470459, "learning_rate": 8.44841365008177e-06, "loss": 0.6286, "step": 23422 }, { "epoch": 0.2800487810710315, "grad_norm": 1.8098067045211792, "learning_rate": 8.448273447077941e-06, "loss": 0.5397, "step": 23423 }, { "epoch": 0.2800607372158921, "grad_norm": 1.8014419078826904, "learning_rate": 8.448133238903415e-06, "loss": 0.5557, "step": 23424 }, { "epoch": 0.28007269336075274, "grad_norm": 5.679074764251709, "learning_rate": 8.447993025558401e-06, "loss": 0.6733, "step": 23425 }, { "epoch": 0.2800846495056134, "grad_norm": 2.377699136734009, "learning_rate": 8.447852807043107e-06, "loss": 0.5352, "step": 23426 }, { "epoch": 0.28009660565047406, "grad_norm": 3.36924147605896, "learning_rate": 8.447712583357749e-06, "loss": 0.608, "step": 23427 }, { "epoch": 0.2801085617953347, "grad_norm": 2.407031774520874, "learning_rate": 8.447572354502528e-06, "loss": 0.7484, "step": 23428 }, { "epoch": 0.2801205179401954, "grad_norm": 2.94326114654541, "learning_rate": 8.447432120477664e-06, "loss": 0.6075, "step": 23429 }, { "epoch": 0.28013247408505604, "grad_norm": 8.152557373046875, "learning_rate": 8.447291881283363e-06, "loss": 0.6479, "step": 23430 }, { "epoch": 0.28014443022991664, "grad_norm": 7.545764446258545, "learning_rate": 8.447151636919835e-06, "loss": 0.6074, "step": 23431 }, { "epoch": 0.2801563863747773, "grad_norm": 2.635319709777832, "learning_rate": 8.447011387387292e-06, "loss": 0.7194, "step": 23432 }, { "epoch": 0.28016834251963796, "grad_norm": 1.9010599851608276, "learning_rate": 8.446871132685942e-06, "loss": 0.6307, "step": 23433 }, { "epoch": 0.2801802986644986, "grad_norm": 2.1920409202575684, "learning_rate": 8.446730872815995e-06, "loss": 0.6934, "step": 23434 }, { "epoch": 0.2801922548093593, "grad_norm": 3.1908047199249268, "learning_rate": 8.446590607777665e-06, "loss": 0.517, "step": 23435 }, { "epoch": 0.28020421095421993, "grad_norm": 3.281064748764038, "learning_rate": 8.446450337571162e-06, "loss": 0.5895, "step": 23436 }, { "epoch": 0.2802161670990806, "grad_norm": 2.490535259246826, "learning_rate": 8.446310062196692e-06, "loss": 0.6384, "step": 23437 }, { "epoch": 0.2802281232439412, "grad_norm": 2.7269558906555176, "learning_rate": 8.446169781654468e-06, "loss": 0.6389, "step": 23438 }, { "epoch": 0.28024007938880185, "grad_norm": 2.204845428466797, "learning_rate": 8.446029495944698e-06, "loss": 0.6308, "step": 23439 }, { "epoch": 0.2802520355336625, "grad_norm": 2.124985456466675, "learning_rate": 8.4458892050676e-06, "loss": 0.6767, "step": 23440 }, { "epoch": 0.28026399167852317, "grad_norm": 3.504110336303711, "learning_rate": 8.445748909023373e-06, "loss": 0.6809, "step": 23441 }, { "epoch": 0.28027594782338383, "grad_norm": 4.084357738494873, "learning_rate": 8.445608607812236e-06, "loss": 0.5582, "step": 23442 }, { "epoch": 0.2802879039682445, "grad_norm": 2.010028123855591, "learning_rate": 8.445468301434396e-06, "loss": 0.5673, "step": 23443 }, { "epoch": 0.28029986011310515, "grad_norm": 2.249392032623291, "learning_rate": 8.445327989890066e-06, "loss": 0.6708, "step": 23444 }, { "epoch": 0.2803118162579658, "grad_norm": 1.7074077129364014, "learning_rate": 8.445187673179453e-06, "loss": 0.632, "step": 23445 }, { "epoch": 0.2803237724028264, "grad_norm": 1.95919668674469, "learning_rate": 8.44504735130277e-06, "loss": 0.5803, "step": 23446 }, { "epoch": 0.28033572854768707, "grad_norm": 4.668668746948242, "learning_rate": 8.444907024260223e-06, "loss": 0.6696, "step": 23447 }, { "epoch": 0.2803476846925477, "grad_norm": 4.4320173263549805, "learning_rate": 8.444766692052028e-06, "loss": 0.6227, "step": 23448 }, { "epoch": 0.2803596408374084, "grad_norm": 2.1963138580322266, "learning_rate": 8.444626354678392e-06, "loss": 0.5992, "step": 23449 }, { "epoch": 0.28037159698226904, "grad_norm": 4.421561241149902, "learning_rate": 8.444486012139527e-06, "loss": 0.6179, "step": 23450 }, { "epoch": 0.2803835531271297, "grad_norm": 2.9350016117095947, "learning_rate": 8.444345664435645e-06, "loss": 0.5524, "step": 23451 }, { "epoch": 0.28039550927199036, "grad_norm": 2.1415932178497314, "learning_rate": 8.444205311566951e-06, "loss": 0.6022, "step": 23452 }, { "epoch": 0.28040746541685097, "grad_norm": 1.6134669780731201, "learning_rate": 8.444064953533662e-06, "loss": 0.6013, "step": 23453 }, { "epoch": 0.2804194215617116, "grad_norm": 13.648109436035156, "learning_rate": 8.443924590335983e-06, "loss": 0.6359, "step": 23454 }, { "epoch": 0.2804313777065723, "grad_norm": 2.2212512493133545, "learning_rate": 8.443784221974128e-06, "loss": 0.6221, "step": 23455 }, { "epoch": 0.28044333385143294, "grad_norm": 2.5279617309570312, "learning_rate": 8.443643848448306e-06, "loss": 0.6081, "step": 23456 }, { "epoch": 0.2804552899962936, "grad_norm": 6.207760810852051, "learning_rate": 8.443503469758728e-06, "loss": 0.5965, "step": 23457 }, { "epoch": 0.28046724614115426, "grad_norm": 3.101921796798706, "learning_rate": 8.443363085905605e-06, "loss": 0.5789, "step": 23458 }, { "epoch": 0.2804792022860149, "grad_norm": 1.9396449327468872, "learning_rate": 8.443222696889147e-06, "loss": 0.5836, "step": 23459 }, { "epoch": 0.2804911584308756, "grad_norm": 2.581130266189575, "learning_rate": 8.443082302709561e-06, "loss": 0.5748, "step": 23460 }, { "epoch": 0.2805031145757362, "grad_norm": 9.864806175231934, "learning_rate": 8.442941903367063e-06, "loss": 0.6143, "step": 23461 }, { "epoch": 0.28051507072059684, "grad_norm": 2.0779194831848145, "learning_rate": 8.442801498861863e-06, "loss": 0.5469, "step": 23462 }, { "epoch": 0.2805270268654575, "grad_norm": 2.0893380641937256, "learning_rate": 8.442661089194168e-06, "loss": 0.5833, "step": 23463 }, { "epoch": 0.28053898301031815, "grad_norm": 2.1053571701049805, "learning_rate": 8.442520674364192e-06, "loss": 0.6176, "step": 23464 }, { "epoch": 0.2805509391551788, "grad_norm": 5.115795135498047, "learning_rate": 8.442380254372142e-06, "loss": 0.5683, "step": 23465 }, { "epoch": 0.28056289530003947, "grad_norm": 3.249800443649292, "learning_rate": 8.442239829218232e-06, "loss": 0.5713, "step": 23466 }, { "epoch": 0.28057485144490013, "grad_norm": 3.6484737396240234, "learning_rate": 8.44209939890267e-06, "loss": 0.6032, "step": 23467 }, { "epoch": 0.28058680758976073, "grad_norm": 2.661825180053711, "learning_rate": 8.441958963425669e-06, "loss": 0.5863, "step": 23468 }, { "epoch": 0.2805987637346214, "grad_norm": 2.7525079250335693, "learning_rate": 8.441818522787438e-06, "loss": 0.6073, "step": 23469 }, { "epoch": 0.28061071987948205, "grad_norm": 2.4568355083465576, "learning_rate": 8.441678076988187e-06, "loss": 0.6384, "step": 23470 }, { "epoch": 0.2806226760243427, "grad_norm": 2.3312604427337646, "learning_rate": 8.44153762602813e-06, "loss": 0.5993, "step": 23471 }, { "epoch": 0.28063463216920337, "grad_norm": 5.80059289932251, "learning_rate": 8.441397169907473e-06, "loss": 0.5471, "step": 23472 }, { "epoch": 0.280646588314064, "grad_norm": 3.507716655731201, "learning_rate": 8.441256708626428e-06, "loss": 0.6322, "step": 23473 }, { "epoch": 0.2806585444589247, "grad_norm": 3.0112857818603516, "learning_rate": 8.441116242185207e-06, "loss": 0.6505, "step": 23474 }, { "epoch": 0.2806705006037853, "grad_norm": 7.1690592765808105, "learning_rate": 8.440975770584022e-06, "loss": 0.6901, "step": 23475 }, { "epoch": 0.28068245674864595, "grad_norm": 2.1514148712158203, "learning_rate": 8.440835293823078e-06, "loss": 0.4833, "step": 23476 }, { "epoch": 0.2806944128935066, "grad_norm": 2.1729519367218018, "learning_rate": 8.440694811902592e-06, "loss": 0.4792, "step": 23477 }, { "epoch": 0.28070636903836726, "grad_norm": 3.1001429557800293, "learning_rate": 8.44055432482277e-06, "loss": 0.6321, "step": 23478 }, { "epoch": 0.2807183251832279, "grad_norm": 2.1710116863250732, "learning_rate": 8.440413832583826e-06, "loss": 0.6767, "step": 23479 }, { "epoch": 0.2807302813280886, "grad_norm": 3.560781240463257, "learning_rate": 8.44027333518597e-06, "loss": 0.6154, "step": 23480 }, { "epoch": 0.28074223747294924, "grad_norm": 2.536196708679199, "learning_rate": 8.44013283262941e-06, "loss": 0.668, "step": 23481 }, { "epoch": 0.2807541936178099, "grad_norm": 3.537323474884033, "learning_rate": 8.43999232491436e-06, "loss": 0.5376, "step": 23482 }, { "epoch": 0.2807661497626705, "grad_norm": 5.648717880249023, "learning_rate": 8.439851812041026e-06, "loss": 0.6409, "step": 23483 }, { "epoch": 0.28077810590753116, "grad_norm": 1.9787108898162842, "learning_rate": 8.439711294009625e-06, "loss": 0.5204, "step": 23484 }, { "epoch": 0.2807900620523918, "grad_norm": 1.7807402610778809, "learning_rate": 8.439570770820364e-06, "loss": 0.5742, "step": 23485 }, { "epoch": 0.2808020181972525, "grad_norm": 4.859498500823975, "learning_rate": 8.439430242473455e-06, "loss": 0.5744, "step": 23486 }, { "epoch": 0.28081397434211314, "grad_norm": 1.7697442770004272, "learning_rate": 8.439289708969107e-06, "loss": 0.5727, "step": 23487 }, { "epoch": 0.2808259304869738, "grad_norm": 2.7612781524658203, "learning_rate": 8.439149170307534e-06, "loss": 0.4838, "step": 23488 }, { "epoch": 0.28083788663183445, "grad_norm": 2.828104257583618, "learning_rate": 8.439008626488943e-06, "loss": 0.6596, "step": 23489 }, { "epoch": 0.28084984277669506, "grad_norm": 2.438225746154785, "learning_rate": 8.438868077513548e-06, "loss": 0.6439, "step": 23490 }, { "epoch": 0.2808617989215557, "grad_norm": 2.0662436485290527, "learning_rate": 8.438727523381556e-06, "loss": 0.6513, "step": 23491 }, { "epoch": 0.2808737550664164, "grad_norm": 2.031534194946289, "learning_rate": 8.43858696409318e-06, "loss": 0.5966, "step": 23492 }, { "epoch": 0.28088571121127703, "grad_norm": 1.9959787130355835, "learning_rate": 8.43844639964863e-06, "loss": 0.6415, "step": 23493 }, { "epoch": 0.2808976673561377, "grad_norm": 3.729731559753418, "learning_rate": 8.43830583004812e-06, "loss": 0.6051, "step": 23494 }, { "epoch": 0.28090962350099835, "grad_norm": 3.299868583679199, "learning_rate": 8.438165255291857e-06, "loss": 0.5427, "step": 23495 }, { "epoch": 0.280921579645859, "grad_norm": 2.066892623901367, "learning_rate": 8.438024675380052e-06, "loss": 0.5976, "step": 23496 }, { "epoch": 0.2809335357907196, "grad_norm": 1.9848660230636597, "learning_rate": 8.437884090312917e-06, "loss": 0.5909, "step": 23497 }, { "epoch": 0.28094549193558027, "grad_norm": 3.117786407470703, "learning_rate": 8.437743500090663e-06, "loss": 0.62, "step": 23498 }, { "epoch": 0.28095744808044093, "grad_norm": 2.0248372554779053, "learning_rate": 8.4376029047135e-06, "loss": 0.653, "step": 23499 }, { "epoch": 0.2809694042253016, "grad_norm": 2.825815439224243, "learning_rate": 8.43746230418164e-06, "loss": 0.4683, "step": 23500 }, { "epoch": 0.28098136037016225, "grad_norm": 2.916341781616211, "learning_rate": 8.437321698495293e-06, "loss": 0.6932, "step": 23501 }, { "epoch": 0.2809933165150229, "grad_norm": 1.9705424308776855, "learning_rate": 8.437181087654668e-06, "loss": 0.576, "step": 23502 }, { "epoch": 0.28100527265988356, "grad_norm": 4.662336349487305, "learning_rate": 8.43704047165998e-06, "loss": 0.6354, "step": 23503 }, { "epoch": 0.2810172288047442, "grad_norm": 7.031429767608643, "learning_rate": 8.436899850511438e-06, "loss": 0.6112, "step": 23504 }, { "epoch": 0.2810291849496048, "grad_norm": 6.452847003936768, "learning_rate": 8.436759224209252e-06, "loss": 0.6614, "step": 23505 }, { "epoch": 0.2810411410944655, "grad_norm": 4.22351598739624, "learning_rate": 8.436618592753632e-06, "loss": 0.6643, "step": 23506 }, { "epoch": 0.28105309723932614, "grad_norm": 2.13120698928833, "learning_rate": 8.43647795614479e-06, "loss": 0.5735, "step": 23507 }, { "epoch": 0.2810650533841868, "grad_norm": 1.366829752922058, "learning_rate": 8.436337314382938e-06, "loss": 0.6838, "step": 23508 }, { "epoch": 0.28107700952904746, "grad_norm": 3.815959930419922, "learning_rate": 8.436196667468288e-06, "loss": 0.6696, "step": 23509 }, { "epoch": 0.2810889656739081, "grad_norm": 2.0734806060791016, "learning_rate": 8.436056015401046e-06, "loss": 0.6874, "step": 23510 }, { "epoch": 0.2811009218187688, "grad_norm": 4.71267557144165, "learning_rate": 8.435915358181425e-06, "loss": 0.6365, "step": 23511 }, { "epoch": 0.2811128779636294, "grad_norm": 3.4118146896362305, "learning_rate": 8.43577469580964e-06, "loss": 0.5701, "step": 23512 }, { "epoch": 0.28112483410849004, "grad_norm": 2.8785016536712646, "learning_rate": 8.435634028285897e-06, "loss": 0.6966, "step": 23513 }, { "epoch": 0.2811367902533507, "grad_norm": 3.27587890625, "learning_rate": 8.43549335561041e-06, "loss": 0.628, "step": 23514 }, { "epoch": 0.28114874639821136, "grad_norm": 4.024534702301025, "learning_rate": 8.435352677783387e-06, "loss": 0.6466, "step": 23515 }, { "epoch": 0.281160702543072, "grad_norm": 2.81465744972229, "learning_rate": 8.43521199480504e-06, "loss": 0.5516, "step": 23516 }, { "epoch": 0.2811726586879327, "grad_norm": 2.4276773929595947, "learning_rate": 8.435071306675579e-06, "loss": 0.5969, "step": 23517 }, { "epoch": 0.28118461483279333, "grad_norm": 1.9912030696868896, "learning_rate": 8.434930613395219e-06, "loss": 0.5954, "step": 23518 }, { "epoch": 0.281196570977654, "grad_norm": 5.361783027648926, "learning_rate": 8.434789914964167e-06, "loss": 0.6765, "step": 23519 }, { "epoch": 0.2812085271225146, "grad_norm": 2.8144876956939697, "learning_rate": 8.434649211382637e-06, "loss": 0.5622, "step": 23520 }, { "epoch": 0.28122048326737525, "grad_norm": 3.0939137935638428, "learning_rate": 8.434508502650837e-06, "loss": 0.5897, "step": 23521 }, { "epoch": 0.2812324394122359, "grad_norm": 1.7893140316009521, "learning_rate": 8.43436778876898e-06, "loss": 0.5537, "step": 23522 }, { "epoch": 0.28124439555709657, "grad_norm": 3.6150436401367188, "learning_rate": 8.434227069737275e-06, "loss": 0.6689, "step": 23523 }, { "epoch": 0.28125635170195723, "grad_norm": 2.779731035232544, "learning_rate": 8.434086345555935e-06, "loss": 0.6084, "step": 23524 }, { "epoch": 0.2812683078468179, "grad_norm": 2.50361967086792, "learning_rate": 8.43394561622517e-06, "loss": 0.4992, "step": 23525 }, { "epoch": 0.28128026399167855, "grad_norm": 2.7104828357696533, "learning_rate": 8.433804881745192e-06, "loss": 0.5912, "step": 23526 }, { "epoch": 0.28129222013653915, "grad_norm": 4.196561336517334, "learning_rate": 8.433664142116213e-06, "loss": 0.536, "step": 23527 }, { "epoch": 0.2813041762813998, "grad_norm": 6.375138759613037, "learning_rate": 8.43352339733844e-06, "loss": 0.7442, "step": 23528 }, { "epoch": 0.28131613242626047, "grad_norm": 4.879256725311279, "learning_rate": 8.433382647412086e-06, "loss": 0.6715, "step": 23529 }, { "epoch": 0.2813280885711211, "grad_norm": 3.6903305053710938, "learning_rate": 8.433241892337363e-06, "loss": 0.6978, "step": 23530 }, { "epoch": 0.2813400447159818, "grad_norm": 3.0165562629699707, "learning_rate": 8.433101132114483e-06, "loss": 0.5861, "step": 23531 }, { "epoch": 0.28135200086084244, "grad_norm": 3.238128662109375, "learning_rate": 8.432960366743656e-06, "loss": 0.5391, "step": 23532 }, { "epoch": 0.2813639570057031, "grad_norm": 6.824046611785889, "learning_rate": 8.432819596225091e-06, "loss": 0.6686, "step": 23533 }, { "epoch": 0.2813759131505637, "grad_norm": 5.171238899230957, "learning_rate": 8.432678820559002e-06, "loss": 0.6439, "step": 23534 }, { "epoch": 0.28138786929542436, "grad_norm": 2.615161895751953, "learning_rate": 8.432538039745596e-06, "loss": 0.6166, "step": 23535 }, { "epoch": 0.281399825440285, "grad_norm": 4.535269260406494, "learning_rate": 8.43239725378509e-06, "loss": 0.6294, "step": 23536 }, { "epoch": 0.2814117815851457, "grad_norm": 33.86236572265625, "learning_rate": 8.432256462677692e-06, "loss": 0.5829, "step": 23537 }, { "epoch": 0.28142373773000634, "grad_norm": 2.889129161834717, "learning_rate": 8.432115666423613e-06, "loss": 0.5226, "step": 23538 }, { "epoch": 0.281435693874867, "grad_norm": 2.955322265625, "learning_rate": 8.431974865023062e-06, "loss": 0.5831, "step": 23539 }, { "epoch": 0.28144765001972766, "grad_norm": 8.841838836669922, "learning_rate": 8.431834058476255e-06, "loss": 0.6257, "step": 23540 }, { "epoch": 0.2814596061645883, "grad_norm": 5.782421112060547, "learning_rate": 8.431693246783402e-06, "loss": 0.5865, "step": 23541 }, { "epoch": 0.2814715623094489, "grad_norm": 1.8805075883865356, "learning_rate": 8.43155242994471e-06, "loss": 0.5262, "step": 23542 }, { "epoch": 0.2814835184543096, "grad_norm": 3.281363010406494, "learning_rate": 8.431411607960394e-06, "loss": 0.6038, "step": 23543 }, { "epoch": 0.28149547459917024, "grad_norm": 2.7927513122558594, "learning_rate": 8.431270780830663e-06, "loss": 0.5681, "step": 23544 }, { "epoch": 0.2815074307440309, "grad_norm": 4.76555061340332, "learning_rate": 8.43112994855573e-06, "loss": 0.638, "step": 23545 }, { "epoch": 0.28151938688889155, "grad_norm": 1.9356215000152588, "learning_rate": 8.430989111135806e-06, "loss": 0.6478, "step": 23546 }, { "epoch": 0.2815313430337522, "grad_norm": 2.5730910301208496, "learning_rate": 8.430848268571102e-06, "loss": 0.6139, "step": 23547 }, { "epoch": 0.28154329917861287, "grad_norm": 2.80743145942688, "learning_rate": 8.430707420861828e-06, "loss": 0.6512, "step": 23548 }, { "epoch": 0.2815552553234735, "grad_norm": 3.8162238597869873, "learning_rate": 8.430566568008196e-06, "loss": 0.5846, "step": 23549 }, { "epoch": 0.28156721146833413, "grad_norm": 2.1728713512420654, "learning_rate": 8.430425710010418e-06, "loss": 0.5766, "step": 23550 }, { "epoch": 0.2815791676131948, "grad_norm": 3.933332920074463, "learning_rate": 8.430284846868703e-06, "loss": 0.5716, "step": 23551 }, { "epoch": 0.28159112375805545, "grad_norm": 6.710850715637207, "learning_rate": 8.430143978583264e-06, "loss": 0.6006, "step": 23552 }, { "epoch": 0.2816030799029161, "grad_norm": 2.5396430492401123, "learning_rate": 8.430003105154311e-06, "loss": 0.7491, "step": 23553 }, { "epoch": 0.28161503604777677, "grad_norm": 2.6880993843078613, "learning_rate": 8.42986222658206e-06, "loss": 0.8338, "step": 23554 }, { "epoch": 0.2816269921926374, "grad_norm": 1.4839316606521606, "learning_rate": 8.429721342866715e-06, "loss": 0.6422, "step": 23555 }, { "epoch": 0.28163894833749803, "grad_norm": 4.443660259246826, "learning_rate": 8.42958045400849e-06, "loss": 0.5153, "step": 23556 }, { "epoch": 0.2816509044823587, "grad_norm": 4.157006740570068, "learning_rate": 8.4294395600076e-06, "loss": 0.5776, "step": 23557 }, { "epoch": 0.28166286062721935, "grad_norm": 2.027818202972412, "learning_rate": 8.429298660864252e-06, "loss": 0.5713, "step": 23558 }, { "epoch": 0.28167481677208, "grad_norm": 2.1298305988311768, "learning_rate": 8.42915775657866e-06, "loss": 0.691, "step": 23559 }, { "epoch": 0.28168677291694066, "grad_norm": 2.3689486980438232, "learning_rate": 8.429016847151031e-06, "loss": 0.6508, "step": 23560 }, { "epoch": 0.2816987290618013, "grad_norm": 1.9168020486831665, "learning_rate": 8.428875932581579e-06, "loss": 0.6187, "step": 23561 }, { "epoch": 0.281710685206662, "grad_norm": 2.1247520446777344, "learning_rate": 8.428735012870515e-06, "loss": 0.5315, "step": 23562 }, { "epoch": 0.28172264135152264, "grad_norm": 2.5163822174072266, "learning_rate": 8.428594088018053e-06, "loss": 0.6181, "step": 23563 }, { "epoch": 0.28173459749638324, "grad_norm": 2.189436435699463, "learning_rate": 8.428453158024402e-06, "loss": 0.5837, "step": 23564 }, { "epoch": 0.2817465536412439, "grad_norm": 2.691865921020508, "learning_rate": 8.428312222889772e-06, "loss": 0.6143, "step": 23565 }, { "epoch": 0.28175850978610456, "grad_norm": 2.19207501411438, "learning_rate": 8.428171282614375e-06, "loss": 0.5931, "step": 23566 }, { "epoch": 0.2817704659309652, "grad_norm": 3.8096835613250732, "learning_rate": 8.428030337198422e-06, "loss": 0.547, "step": 23567 }, { "epoch": 0.2817824220758259, "grad_norm": 2.449326515197754, "learning_rate": 8.42788938664213e-06, "loss": 0.6793, "step": 23568 }, { "epoch": 0.28179437822068654, "grad_norm": 4.843338489532471, "learning_rate": 8.427748430945703e-06, "loss": 0.4815, "step": 23569 }, { "epoch": 0.2818063343655472, "grad_norm": 2.3392152786254883, "learning_rate": 8.427607470109353e-06, "loss": 0.5978, "step": 23570 }, { "epoch": 0.2818182905104078, "grad_norm": 2.580002546310425, "learning_rate": 8.427466504133297e-06, "loss": 0.5719, "step": 23571 }, { "epoch": 0.28183024665526846, "grad_norm": 1.6920955181121826, "learning_rate": 8.42732553301774e-06, "loss": 0.5792, "step": 23572 }, { "epoch": 0.2818422028001291, "grad_norm": 2.654773235321045, "learning_rate": 8.427184556762897e-06, "loss": 0.6032, "step": 23573 }, { "epoch": 0.2818541589449898, "grad_norm": 7.250102519989014, "learning_rate": 8.42704357536898e-06, "loss": 0.5525, "step": 23574 }, { "epoch": 0.28186611508985043, "grad_norm": 2.2255048751831055, "learning_rate": 8.426902588836196e-06, "loss": 0.635, "step": 23575 }, { "epoch": 0.2818780712347111, "grad_norm": 2.9354441165924072, "learning_rate": 8.426761597164764e-06, "loss": 0.658, "step": 23576 }, { "epoch": 0.28189002737957175, "grad_norm": 2.9899351596832275, "learning_rate": 8.426620600354887e-06, "loss": 0.5892, "step": 23577 }, { "epoch": 0.2819019835244324, "grad_norm": 2.488067865371704, "learning_rate": 8.42647959840678e-06, "loss": 0.6594, "step": 23578 }, { "epoch": 0.281913939669293, "grad_norm": 3.2460784912109375, "learning_rate": 8.426338591320658e-06, "loss": 0.5926, "step": 23579 }, { "epoch": 0.28192589581415367, "grad_norm": 2.073254108428955, "learning_rate": 8.426197579096727e-06, "loss": 0.5923, "step": 23580 }, { "epoch": 0.28193785195901433, "grad_norm": 4.852719306945801, "learning_rate": 8.426056561735198e-06, "loss": 0.6128, "step": 23581 }, { "epoch": 0.281949808103875, "grad_norm": 3.016442060470581, "learning_rate": 8.42591553923629e-06, "loss": 0.6574, "step": 23582 }, { "epoch": 0.28196176424873565, "grad_norm": 3.984950065612793, "learning_rate": 8.425774511600206e-06, "loss": 0.6509, "step": 23583 }, { "epoch": 0.2819737203935963, "grad_norm": 2.034583568572998, "learning_rate": 8.425633478827162e-06, "loss": 0.5696, "step": 23584 }, { "epoch": 0.28198567653845696, "grad_norm": 3.461493492126465, "learning_rate": 8.425492440917369e-06, "loss": 0.6134, "step": 23585 }, { "epoch": 0.28199763268331757, "grad_norm": 2.0298643112182617, "learning_rate": 8.425351397871038e-06, "loss": 0.6068, "step": 23586 }, { "epoch": 0.2820095888281782, "grad_norm": 2.028259515762329, "learning_rate": 8.425210349688378e-06, "loss": 0.6159, "step": 23587 }, { "epoch": 0.2820215449730389, "grad_norm": 3.0615272521972656, "learning_rate": 8.425069296369605e-06, "loss": 0.6228, "step": 23588 }, { "epoch": 0.28203350111789954, "grad_norm": 2.3157877922058105, "learning_rate": 8.424928237914929e-06, "loss": 0.5751, "step": 23589 }, { "epoch": 0.2820454572627602, "grad_norm": 3.1274726390838623, "learning_rate": 8.42478717432456e-06, "loss": 0.6438, "step": 23590 }, { "epoch": 0.28205741340762086, "grad_norm": 2.7825927734375, "learning_rate": 8.424646105598711e-06, "loss": 0.6228, "step": 23591 }, { "epoch": 0.2820693695524815, "grad_norm": 1.5749690532684326, "learning_rate": 8.424505031737592e-06, "loss": 0.5974, "step": 23592 }, { "epoch": 0.2820813256973421, "grad_norm": 8.167349815368652, "learning_rate": 8.424363952741415e-06, "loss": 0.4511, "step": 23593 }, { "epoch": 0.2820932818422028, "grad_norm": 5.871894359588623, "learning_rate": 8.424222868610393e-06, "loss": 0.6354, "step": 23594 }, { "epoch": 0.28210523798706344, "grad_norm": 5.947964668273926, "learning_rate": 8.424081779344738e-06, "loss": 0.5042, "step": 23595 }, { "epoch": 0.2821171941319241, "grad_norm": 2.301652193069458, "learning_rate": 8.423940684944658e-06, "loss": 0.6375, "step": 23596 }, { "epoch": 0.28212915027678476, "grad_norm": 3.308361530303955, "learning_rate": 8.423799585410368e-06, "loss": 0.653, "step": 23597 }, { "epoch": 0.2821411064216454, "grad_norm": 2.1941161155700684, "learning_rate": 8.42365848074208e-06, "loss": 0.5925, "step": 23598 }, { "epoch": 0.2821530625665061, "grad_norm": 2.1174657344818115, "learning_rate": 8.42351737094e-06, "loss": 0.6307, "step": 23599 }, { "epoch": 0.28216501871136673, "grad_norm": 2.764514923095703, "learning_rate": 8.423376256004345e-06, "loss": 0.6768, "step": 23600 }, { "epoch": 0.28217697485622734, "grad_norm": 3.299870252609253, "learning_rate": 8.423235135935325e-06, "loss": 0.5464, "step": 23601 }, { "epoch": 0.282188931001088, "grad_norm": 3.6278042793273926, "learning_rate": 8.423094010733152e-06, "loss": 0.6158, "step": 23602 }, { "epoch": 0.28220088714594865, "grad_norm": 2.718970775604248, "learning_rate": 8.422952880398038e-06, "loss": 0.5459, "step": 23603 }, { "epoch": 0.2822128432908093, "grad_norm": 4.050073623657227, "learning_rate": 8.422811744930195e-06, "loss": 0.577, "step": 23604 }, { "epoch": 0.28222479943566997, "grad_norm": 2.5320935249328613, "learning_rate": 8.422670604329832e-06, "loss": 0.6667, "step": 23605 }, { "epoch": 0.28223675558053063, "grad_norm": 4.819515228271484, "learning_rate": 8.422529458597163e-06, "loss": 0.643, "step": 23606 }, { "epoch": 0.2822487117253913, "grad_norm": 2.230731248855591, "learning_rate": 8.422388307732399e-06, "loss": 0.5978, "step": 23607 }, { "epoch": 0.2822606678702519, "grad_norm": 4.834364891052246, "learning_rate": 8.42224715173575e-06, "loss": 0.6309, "step": 23608 }, { "epoch": 0.28227262401511255, "grad_norm": 7.275041580200195, "learning_rate": 8.422105990607431e-06, "loss": 0.6755, "step": 23609 }, { "epoch": 0.2822845801599732, "grad_norm": 5.035735607147217, "learning_rate": 8.42196482434765e-06, "loss": 0.6329, "step": 23610 }, { "epoch": 0.28229653630483387, "grad_norm": 2.609266996383667, "learning_rate": 8.421823652956622e-06, "loss": 0.5573, "step": 23611 }, { "epoch": 0.2823084924496945, "grad_norm": 3.093985080718994, "learning_rate": 8.421682476434559e-06, "loss": 0.6685, "step": 23612 }, { "epoch": 0.2823204485945552, "grad_norm": 2.93615984916687, "learning_rate": 8.421541294781668e-06, "loss": 0.5756, "step": 23613 }, { "epoch": 0.28233240473941584, "grad_norm": 2.965083122253418, "learning_rate": 8.421400107998165e-06, "loss": 0.6319, "step": 23614 }, { "epoch": 0.28234436088427645, "grad_norm": 3.369626522064209, "learning_rate": 8.42125891608426e-06, "loss": 0.558, "step": 23615 }, { "epoch": 0.2823563170291371, "grad_norm": 1.935981035232544, "learning_rate": 8.421117719040165e-06, "loss": 0.65, "step": 23616 }, { "epoch": 0.28236827317399776, "grad_norm": 3.1573503017425537, "learning_rate": 8.420976516866091e-06, "loss": 0.6646, "step": 23617 }, { "epoch": 0.2823802293188584, "grad_norm": 3.611809253692627, "learning_rate": 8.420835309562253e-06, "loss": 0.5859, "step": 23618 }, { "epoch": 0.2823921854637191, "grad_norm": 3.432096481323242, "learning_rate": 8.420694097128859e-06, "loss": 0.6948, "step": 23619 }, { "epoch": 0.28240414160857974, "grad_norm": 6.660858631134033, "learning_rate": 8.420552879566121e-06, "loss": 0.7126, "step": 23620 }, { "epoch": 0.2824160977534404, "grad_norm": 4.031493186950684, "learning_rate": 8.420411656874252e-06, "loss": 0.5357, "step": 23621 }, { "epoch": 0.28242805389830106, "grad_norm": 4.383328437805176, "learning_rate": 8.420270429053465e-06, "loss": 0.6483, "step": 23622 }, { "epoch": 0.28244001004316166, "grad_norm": 2.224003553390503, "learning_rate": 8.42012919610397e-06, "loss": 0.6065, "step": 23623 }, { "epoch": 0.2824519661880223, "grad_norm": 2.8352365493774414, "learning_rate": 8.41998795802598e-06, "loss": 0.6059, "step": 23624 }, { "epoch": 0.282463922332883, "grad_norm": 3.1172642707824707, "learning_rate": 8.419846714819703e-06, "loss": 0.53, "step": 23625 }, { "epoch": 0.28247587847774364, "grad_norm": 4.13407564163208, "learning_rate": 8.419705466485354e-06, "loss": 0.6505, "step": 23626 }, { "epoch": 0.2824878346226043, "grad_norm": 2.632253408432007, "learning_rate": 8.419564213023148e-06, "loss": 0.6222, "step": 23627 }, { "epoch": 0.28249979076746495, "grad_norm": 2.4504759311676025, "learning_rate": 8.41942295443329e-06, "loss": 0.5785, "step": 23628 }, { "epoch": 0.2825117469123256, "grad_norm": 3.4498395919799805, "learning_rate": 8.419281690715997e-06, "loss": 0.612, "step": 23629 }, { "epoch": 0.2825237030571862, "grad_norm": 2.797642707824707, "learning_rate": 8.419140421871478e-06, "loss": 0.616, "step": 23630 }, { "epoch": 0.2825356592020469, "grad_norm": 8.457188606262207, "learning_rate": 8.418999147899944e-06, "loss": 0.5716, "step": 23631 }, { "epoch": 0.28254761534690753, "grad_norm": 3.649242877960205, "learning_rate": 8.418857868801613e-06, "loss": 0.6146, "step": 23632 }, { "epoch": 0.2825595714917682, "grad_norm": 4.054201602935791, "learning_rate": 8.41871658457669e-06, "loss": 0.6007, "step": 23633 }, { "epoch": 0.28257152763662885, "grad_norm": 4.5504231452941895, "learning_rate": 8.418575295225388e-06, "loss": 0.5755, "step": 23634 }, { "epoch": 0.2825834837814895, "grad_norm": 6.576040744781494, "learning_rate": 8.418434000747922e-06, "loss": 0.6874, "step": 23635 }, { "epoch": 0.28259543992635017, "grad_norm": 3.6003310680389404, "learning_rate": 8.418292701144502e-06, "loss": 0.6609, "step": 23636 }, { "epoch": 0.2826073960712108, "grad_norm": 5.198257923126221, "learning_rate": 8.418151396415339e-06, "loss": 0.5487, "step": 23637 }, { "epoch": 0.28261935221607143, "grad_norm": 2.7642765045166016, "learning_rate": 8.418010086560647e-06, "loss": 0.627, "step": 23638 }, { "epoch": 0.2826313083609321, "grad_norm": 2.1532182693481445, "learning_rate": 8.417868771580635e-06, "loss": 0.5723, "step": 23639 }, { "epoch": 0.28264326450579275, "grad_norm": 6.479841232299805, "learning_rate": 8.41772745147552e-06, "loss": 0.5554, "step": 23640 }, { "epoch": 0.2826552206506534, "grad_norm": 3.7829833030700684, "learning_rate": 8.417586126245508e-06, "loss": 0.694, "step": 23641 }, { "epoch": 0.28266717679551406, "grad_norm": 3.1955273151397705, "learning_rate": 8.417444795890813e-06, "loss": 0.6063, "step": 23642 }, { "epoch": 0.2826791329403747, "grad_norm": 6.418928623199463, "learning_rate": 8.417303460411648e-06, "loss": 0.6367, "step": 23643 }, { "epoch": 0.2826910890852354, "grad_norm": 2.7874977588653564, "learning_rate": 8.417162119808224e-06, "loss": 0.5973, "step": 23644 }, { "epoch": 0.282703045230096, "grad_norm": 6.677100658416748, "learning_rate": 8.417020774080755e-06, "loss": 0.5676, "step": 23645 }, { "epoch": 0.28271500137495664, "grad_norm": 1.7042862176895142, "learning_rate": 8.41687942322945e-06, "loss": 0.5801, "step": 23646 }, { "epoch": 0.2827269575198173, "grad_norm": 2.228569984436035, "learning_rate": 8.416738067254522e-06, "loss": 0.5647, "step": 23647 }, { "epoch": 0.28273891366467796, "grad_norm": 3.0413148403167725, "learning_rate": 8.416596706156182e-06, "loss": 0.686, "step": 23648 }, { "epoch": 0.2827508698095386, "grad_norm": 3.1748406887054443, "learning_rate": 8.416455339934644e-06, "loss": 0.521, "step": 23649 }, { "epoch": 0.2827628259543993, "grad_norm": 3.8979616165161133, "learning_rate": 8.41631396859012e-06, "loss": 0.5722, "step": 23650 }, { "epoch": 0.28277478209925994, "grad_norm": 3.900988817214966, "learning_rate": 8.41617259212282e-06, "loss": 0.65, "step": 23651 }, { "epoch": 0.28278673824412054, "grad_norm": 2.697972297668457, "learning_rate": 8.416031210532958e-06, "loss": 0.5433, "step": 23652 }, { "epoch": 0.2827986943889812, "grad_norm": 3.159654378890991, "learning_rate": 8.415889823820745e-06, "loss": 0.5257, "step": 23653 }, { "epoch": 0.28281065053384186, "grad_norm": 5.24042272567749, "learning_rate": 8.415748431986393e-06, "loss": 0.5784, "step": 23654 }, { "epoch": 0.2828226066787025, "grad_norm": 2.2930755615234375, "learning_rate": 8.415607035030113e-06, "loss": 0.5699, "step": 23655 }, { "epoch": 0.2828345628235632, "grad_norm": 2.9636642932891846, "learning_rate": 8.415465632952118e-06, "loss": 0.6448, "step": 23656 }, { "epoch": 0.28284651896842383, "grad_norm": 2.23929762840271, "learning_rate": 8.415324225752622e-06, "loss": 0.5613, "step": 23657 }, { "epoch": 0.2828584751132845, "grad_norm": 2.1935718059539795, "learning_rate": 8.415182813431834e-06, "loss": 0.647, "step": 23658 }, { "epoch": 0.28287043125814515, "grad_norm": 5.311241149902344, "learning_rate": 8.415041395989967e-06, "loss": 0.6388, "step": 23659 }, { "epoch": 0.28288238740300575, "grad_norm": 2.801875352859497, "learning_rate": 8.414899973427234e-06, "loss": 0.6801, "step": 23660 }, { "epoch": 0.2828943435478664, "grad_norm": 2.9541263580322266, "learning_rate": 8.414758545743846e-06, "loss": 0.606, "step": 23661 }, { "epoch": 0.28290629969272707, "grad_norm": 4.351266860961914, "learning_rate": 8.414617112940016e-06, "loss": 0.6244, "step": 23662 }, { "epoch": 0.28291825583758773, "grad_norm": 2.448245048522949, "learning_rate": 8.414475675015953e-06, "loss": 0.5306, "step": 23663 }, { "epoch": 0.2829302119824484, "grad_norm": 1.9300471544265747, "learning_rate": 8.414334231971875e-06, "loss": 0.6647, "step": 23664 }, { "epoch": 0.28294216812730905, "grad_norm": 2.22025990486145, "learning_rate": 8.414192783807989e-06, "loss": 0.5762, "step": 23665 }, { "epoch": 0.2829541242721697, "grad_norm": 3.010904550552368, "learning_rate": 8.414051330524508e-06, "loss": 0.5803, "step": 23666 }, { "epoch": 0.2829660804170303, "grad_norm": 2.2700538635253906, "learning_rate": 8.413909872121647e-06, "loss": 0.7181, "step": 23667 }, { "epoch": 0.28297803656189097, "grad_norm": 2.190929412841797, "learning_rate": 8.413768408599613e-06, "loss": 0.6672, "step": 23668 }, { "epoch": 0.2829899927067516, "grad_norm": 2.1447036266326904, "learning_rate": 8.413626939958626e-06, "loss": 0.7172, "step": 23669 }, { "epoch": 0.2830019488516123, "grad_norm": 2.0437867641448975, "learning_rate": 8.41348546619889e-06, "loss": 0.5893, "step": 23670 }, { "epoch": 0.28301390499647294, "grad_norm": 5.3374247550964355, "learning_rate": 8.41334398732062e-06, "loss": 0.5613, "step": 23671 }, { "epoch": 0.2830258611413336, "grad_norm": 3.319669008255005, "learning_rate": 8.41320250332403e-06, "loss": 0.5331, "step": 23672 }, { "epoch": 0.28303781728619426, "grad_norm": 2.9578607082366943, "learning_rate": 8.41306101420933e-06, "loss": 0.6699, "step": 23673 }, { "epoch": 0.28304977343105486, "grad_norm": 2.0674684047698975, "learning_rate": 8.412919519976732e-06, "loss": 0.6104, "step": 23674 }, { "epoch": 0.2830617295759155, "grad_norm": 4.533059597015381, "learning_rate": 8.41277802062645e-06, "loss": 0.6632, "step": 23675 }, { "epoch": 0.2830736857207762, "grad_norm": 2.5503005981445312, "learning_rate": 8.412636516158695e-06, "loss": 0.6538, "step": 23676 }, { "epoch": 0.28308564186563684, "grad_norm": 4.731359481811523, "learning_rate": 8.41249500657368e-06, "loss": 0.6488, "step": 23677 }, { "epoch": 0.2830975980104975, "grad_norm": 2.7113664150238037, "learning_rate": 8.412353491871615e-06, "loss": 0.5977, "step": 23678 }, { "epoch": 0.28310955415535816, "grad_norm": 2.1892359256744385, "learning_rate": 8.412211972052715e-06, "loss": 0.5641, "step": 23679 }, { "epoch": 0.2831215103002188, "grad_norm": 2.134049415588379, "learning_rate": 8.412070447117192e-06, "loss": 0.6328, "step": 23680 }, { "epoch": 0.2831334664450795, "grad_norm": 3.735921859741211, "learning_rate": 8.411928917065255e-06, "loss": 0.6187, "step": 23681 }, { "epoch": 0.2831454225899401, "grad_norm": 2.13775634765625, "learning_rate": 8.411787381897121e-06, "loss": 0.6821, "step": 23682 }, { "epoch": 0.28315737873480074, "grad_norm": 14.578287124633789, "learning_rate": 8.411645841612998e-06, "loss": 0.6239, "step": 23683 }, { "epoch": 0.2831693348796614, "grad_norm": 4.019008159637451, "learning_rate": 8.4115042962131e-06, "loss": 0.6102, "step": 23684 }, { "epoch": 0.28318129102452205, "grad_norm": 3.4200186729431152, "learning_rate": 8.41136274569764e-06, "loss": 0.6407, "step": 23685 }, { "epoch": 0.2831932471693827, "grad_norm": 2.129155158996582, "learning_rate": 8.411221190066828e-06, "loss": 0.6349, "step": 23686 }, { "epoch": 0.28320520331424337, "grad_norm": 2.2831311225891113, "learning_rate": 8.41107962932088e-06, "loss": 0.6227, "step": 23687 }, { "epoch": 0.28321715945910403, "grad_norm": 3.194283962249756, "learning_rate": 8.410938063460003e-06, "loss": 0.6047, "step": 23688 }, { "epoch": 0.28322911560396463, "grad_norm": 7.333206653594971, "learning_rate": 8.410796492484415e-06, "loss": 0.6714, "step": 23689 }, { "epoch": 0.2832410717488253, "grad_norm": 2.007398843765259, "learning_rate": 8.410654916394324e-06, "loss": 0.5532, "step": 23690 }, { "epoch": 0.28325302789368595, "grad_norm": 3.1938154697418213, "learning_rate": 8.410513335189945e-06, "loss": 0.6452, "step": 23691 }, { "epoch": 0.2832649840385466, "grad_norm": 1.8359249830245972, "learning_rate": 8.410371748871489e-06, "loss": 0.5934, "step": 23692 }, { "epoch": 0.28327694018340727, "grad_norm": 2.90700364112854, "learning_rate": 8.410230157439166e-06, "loss": 0.6152, "step": 23693 }, { "epoch": 0.2832888963282679, "grad_norm": 2.8860228061676025, "learning_rate": 8.410088560893193e-06, "loss": 0.6619, "step": 23694 }, { "epoch": 0.2833008524731286, "grad_norm": 1.8570038080215454, "learning_rate": 8.409946959233779e-06, "loss": 0.5593, "step": 23695 }, { "epoch": 0.28331280861798924, "grad_norm": 2.633216619491577, "learning_rate": 8.409805352461139e-06, "loss": 0.5438, "step": 23696 }, { "epoch": 0.28332476476284985, "grad_norm": 3.1074037551879883, "learning_rate": 8.409663740575482e-06, "loss": 0.7186, "step": 23697 }, { "epoch": 0.2833367209077105, "grad_norm": 2.232309103012085, "learning_rate": 8.409522123577023e-06, "loss": 0.5338, "step": 23698 }, { "epoch": 0.28334867705257116, "grad_norm": 1.813549518585205, "learning_rate": 8.409380501465973e-06, "loss": 0.6195, "step": 23699 }, { "epoch": 0.2833606331974318, "grad_norm": 1.740525722503662, "learning_rate": 8.409238874242547e-06, "loss": 0.6083, "step": 23700 }, { "epoch": 0.2833725893422925, "grad_norm": 2.148984909057617, "learning_rate": 8.409097241906952e-06, "loss": 0.6708, "step": 23701 }, { "epoch": 0.28338454548715314, "grad_norm": 2.272007703781128, "learning_rate": 8.408955604459405e-06, "loss": 0.6321, "step": 23702 }, { "epoch": 0.2833965016320138, "grad_norm": 1.9052984714508057, "learning_rate": 8.408813961900117e-06, "loss": 0.6205, "step": 23703 }, { "epoch": 0.2834084577768744, "grad_norm": 2.4016871452331543, "learning_rate": 8.408672314229301e-06, "loss": 0.6555, "step": 23704 }, { "epoch": 0.28342041392173506, "grad_norm": 3.8489277362823486, "learning_rate": 8.408530661447168e-06, "loss": 0.559, "step": 23705 }, { "epoch": 0.2834323700665957, "grad_norm": 4.000458240509033, "learning_rate": 8.40838900355393e-06, "loss": 0.6164, "step": 23706 }, { "epoch": 0.2834443262114564, "grad_norm": 2.02250599861145, "learning_rate": 8.408247340549802e-06, "loss": 0.5346, "step": 23707 }, { "epoch": 0.28345628235631704, "grad_norm": 1.843162178993225, "learning_rate": 8.408105672434995e-06, "loss": 0.5812, "step": 23708 }, { "epoch": 0.2834682385011777, "grad_norm": 4.744382381439209, "learning_rate": 8.407963999209721e-06, "loss": 0.583, "step": 23709 }, { "epoch": 0.28348019464603835, "grad_norm": 3.390856981277466, "learning_rate": 8.407822320874193e-06, "loss": 0.6882, "step": 23710 }, { "epoch": 0.28349215079089896, "grad_norm": 4.171115875244141, "learning_rate": 8.407680637428623e-06, "loss": 0.6148, "step": 23711 }, { "epoch": 0.2835041069357596, "grad_norm": 3.8401694297790527, "learning_rate": 8.407538948873224e-06, "loss": 0.5642, "step": 23712 }, { "epoch": 0.2835160630806203, "grad_norm": 1.4744793176651, "learning_rate": 8.407397255208208e-06, "loss": 0.5642, "step": 23713 }, { "epoch": 0.28352801922548093, "grad_norm": 3.1663143634796143, "learning_rate": 8.407255556433789e-06, "loss": 0.5479, "step": 23714 }, { "epoch": 0.2835399753703416, "grad_norm": 3.6586923599243164, "learning_rate": 8.407113852550178e-06, "loss": 0.6787, "step": 23715 }, { "epoch": 0.28355193151520225, "grad_norm": 5.6393256187438965, "learning_rate": 8.406972143557586e-06, "loss": 0.5314, "step": 23716 }, { "epoch": 0.2835638876600629, "grad_norm": 2.6377005577087402, "learning_rate": 8.406830429456227e-06, "loss": 0.5563, "step": 23717 }, { "epoch": 0.28357584380492357, "grad_norm": 2.1212544441223145, "learning_rate": 8.406688710246316e-06, "loss": 0.5461, "step": 23718 }, { "epoch": 0.28358779994978417, "grad_norm": 6.757644176483154, "learning_rate": 8.40654698592806e-06, "loss": 0.5928, "step": 23719 }, { "epoch": 0.28359975609464483, "grad_norm": 3.492596387863159, "learning_rate": 8.406405256501677e-06, "loss": 0.5709, "step": 23720 }, { "epoch": 0.2836117122395055, "grad_norm": 2.165688991546631, "learning_rate": 8.406263521967377e-06, "loss": 0.6143, "step": 23721 }, { "epoch": 0.28362366838436615, "grad_norm": 1.6296952962875366, "learning_rate": 8.406121782325373e-06, "loss": 0.5401, "step": 23722 }, { "epoch": 0.2836356245292268, "grad_norm": 1.7367310523986816, "learning_rate": 8.405980037575876e-06, "loss": 0.6147, "step": 23723 }, { "epoch": 0.28364758067408746, "grad_norm": 1.6561452150344849, "learning_rate": 8.405838287719102e-06, "loss": 0.5734, "step": 23724 }, { "epoch": 0.2836595368189481, "grad_norm": 1.7981189489364624, "learning_rate": 8.405696532755259e-06, "loss": 0.5858, "step": 23725 }, { "epoch": 0.2836714929638087, "grad_norm": 2.1950795650482178, "learning_rate": 8.405554772684563e-06, "loss": 0.5243, "step": 23726 }, { "epoch": 0.2836834491086694, "grad_norm": 4.3288140296936035, "learning_rate": 8.405413007507226e-06, "loss": 0.5284, "step": 23727 }, { "epoch": 0.28369540525353004, "grad_norm": 1.6638598442077637, "learning_rate": 8.40527123722346e-06, "loss": 0.5315, "step": 23728 }, { "epoch": 0.2837073613983907, "grad_norm": 3.3224825859069824, "learning_rate": 8.405129461833477e-06, "loss": 0.5832, "step": 23729 }, { "epoch": 0.28371931754325136, "grad_norm": 4.831843376159668, "learning_rate": 8.404987681337491e-06, "loss": 0.5131, "step": 23730 }, { "epoch": 0.283731273688112, "grad_norm": 3.1305272579193115, "learning_rate": 8.404845895735713e-06, "loss": 0.6227, "step": 23731 }, { "epoch": 0.2837432298329727, "grad_norm": 1.7073019742965698, "learning_rate": 8.404704105028355e-06, "loss": 0.5656, "step": 23732 }, { "epoch": 0.2837551859778333, "grad_norm": 3.197680711746216, "learning_rate": 8.404562309215635e-06, "loss": 0.559, "step": 23733 }, { "epoch": 0.28376714212269394, "grad_norm": 4.217437744140625, "learning_rate": 8.40442050829776e-06, "loss": 0.5852, "step": 23734 }, { "epoch": 0.2837790982675546, "grad_norm": 3.188847541809082, "learning_rate": 8.404278702274942e-06, "loss": 0.6306, "step": 23735 }, { "epoch": 0.28379105441241526, "grad_norm": 3.102513074874878, "learning_rate": 8.404136891147399e-06, "loss": 0.5911, "step": 23736 }, { "epoch": 0.2838030105572759, "grad_norm": 3.3365097045898438, "learning_rate": 8.40399507491534e-06, "loss": 0.7264, "step": 23737 }, { "epoch": 0.2838149667021366, "grad_norm": 1.879497766494751, "learning_rate": 8.403853253578978e-06, "loss": 0.5558, "step": 23738 }, { "epoch": 0.28382692284699723, "grad_norm": 2.0675745010375977, "learning_rate": 8.403711427138526e-06, "loss": 0.6625, "step": 23739 }, { "epoch": 0.2838388789918579, "grad_norm": 2.0852103233337402, "learning_rate": 8.403569595594196e-06, "loss": 0.5542, "step": 23740 }, { "epoch": 0.2838508351367185, "grad_norm": 3.0457427501678467, "learning_rate": 8.4034277589462e-06, "loss": 0.6623, "step": 23741 }, { "epoch": 0.28386279128157915, "grad_norm": 2.5520849227905273, "learning_rate": 8.403285917194755e-06, "loss": 0.612, "step": 23742 }, { "epoch": 0.2838747474264398, "grad_norm": 6.122073173522949, "learning_rate": 8.40314407034007e-06, "loss": 0.6069, "step": 23743 }, { "epoch": 0.28388670357130047, "grad_norm": 4.636293411254883, "learning_rate": 8.403002218382358e-06, "loss": 0.6184, "step": 23744 }, { "epoch": 0.28389865971616113, "grad_norm": 2.1647355556488037, "learning_rate": 8.402860361321832e-06, "loss": 0.4797, "step": 23745 }, { "epoch": 0.2839106158610218, "grad_norm": 1.6430214643478394, "learning_rate": 8.402718499158704e-06, "loss": 0.6954, "step": 23746 }, { "epoch": 0.28392257200588245, "grad_norm": 2.6516852378845215, "learning_rate": 8.402576631893188e-06, "loss": 0.5981, "step": 23747 }, { "epoch": 0.28393452815074305, "grad_norm": 2.197223663330078, "learning_rate": 8.402434759525496e-06, "loss": 0.5572, "step": 23748 }, { "epoch": 0.2839464842956037, "grad_norm": 3.6249639987945557, "learning_rate": 8.402292882055841e-06, "loss": 0.6976, "step": 23749 }, { "epoch": 0.28395844044046437, "grad_norm": 2.553917169570923, "learning_rate": 8.402150999484436e-06, "loss": 0.6038, "step": 23750 }, { "epoch": 0.283970396585325, "grad_norm": 4.813284873962402, "learning_rate": 8.402009111811495e-06, "loss": 0.5815, "step": 23751 }, { "epoch": 0.2839823527301857, "grad_norm": 2.0771641731262207, "learning_rate": 8.401867219037228e-06, "loss": 0.5212, "step": 23752 }, { "epoch": 0.28399430887504634, "grad_norm": 3.3715641498565674, "learning_rate": 8.401725321161849e-06, "loss": 0.6539, "step": 23753 }, { "epoch": 0.284006265019907, "grad_norm": 2.395188570022583, "learning_rate": 8.40158341818557e-06, "loss": 0.6512, "step": 23754 }, { "epoch": 0.28401822116476766, "grad_norm": 2.5838441848754883, "learning_rate": 8.401441510108605e-06, "loss": 0.6008, "step": 23755 }, { "epoch": 0.28403017730962826, "grad_norm": 3.7650859355926514, "learning_rate": 8.401299596931168e-06, "loss": 0.6796, "step": 23756 }, { "epoch": 0.2840421334544889, "grad_norm": 2.440633535385132, "learning_rate": 8.401157678653467e-06, "loss": 0.6724, "step": 23757 }, { "epoch": 0.2840540895993496, "grad_norm": 4.260440349578857, "learning_rate": 8.40101575527572e-06, "loss": 0.5969, "step": 23758 }, { "epoch": 0.28406604574421024, "grad_norm": 3.058271646499634, "learning_rate": 8.400873826798137e-06, "loss": 0.6104, "step": 23759 }, { "epoch": 0.2840780018890709, "grad_norm": 1.9912587404251099, "learning_rate": 8.400731893220933e-06, "loss": 0.6342, "step": 23760 }, { "epoch": 0.28408995803393156, "grad_norm": 14.6886568069458, "learning_rate": 8.400589954544319e-06, "loss": 0.5992, "step": 23761 }, { "epoch": 0.2841019141787922, "grad_norm": 1.7716490030288696, "learning_rate": 8.400448010768507e-06, "loss": 0.6063, "step": 23762 }, { "epoch": 0.2841138703236528, "grad_norm": 2.2106082439422607, "learning_rate": 8.400306061893713e-06, "loss": 0.5471, "step": 23763 }, { "epoch": 0.2841258264685135, "grad_norm": 6.242458343505859, "learning_rate": 8.400164107920145e-06, "loss": 0.5193, "step": 23764 }, { "epoch": 0.28413778261337413, "grad_norm": 2.2892680168151855, "learning_rate": 8.400022148848022e-06, "loss": 0.5884, "step": 23765 }, { "epoch": 0.2841497387582348, "grad_norm": 1.7320904731750488, "learning_rate": 8.399880184677553e-06, "loss": 0.6868, "step": 23766 }, { "epoch": 0.28416169490309545, "grad_norm": 2.305367946624756, "learning_rate": 8.39973821540895e-06, "loss": 0.5953, "step": 23767 }, { "epoch": 0.2841736510479561, "grad_norm": 1.9528690576553345, "learning_rate": 8.39959624104243e-06, "loss": 0.5721, "step": 23768 }, { "epoch": 0.28418560719281677, "grad_norm": 2.458122491836548, "learning_rate": 8.399454261578201e-06, "loss": 0.5725, "step": 23769 }, { "epoch": 0.2841975633376774, "grad_norm": 2.463984251022339, "learning_rate": 8.399312277016478e-06, "loss": 0.6093, "step": 23770 }, { "epoch": 0.28420951948253803, "grad_norm": 2.750401735305786, "learning_rate": 8.399170287357476e-06, "loss": 0.5707, "step": 23771 }, { "epoch": 0.2842214756273987, "grad_norm": 2.676668643951416, "learning_rate": 8.399028292601406e-06, "loss": 0.6084, "step": 23772 }, { "epoch": 0.28423343177225935, "grad_norm": 3.8814802169799805, "learning_rate": 8.39888629274848e-06, "loss": 0.5899, "step": 23773 }, { "epoch": 0.28424538791712, "grad_norm": 4.83243989944458, "learning_rate": 8.398744287798912e-06, "loss": 0.6189, "step": 23774 }, { "epoch": 0.28425734406198067, "grad_norm": 3.0990848541259766, "learning_rate": 8.398602277752915e-06, "loss": 0.5536, "step": 23775 }, { "epoch": 0.2842693002068413, "grad_norm": 2.6569278240203857, "learning_rate": 8.398460262610702e-06, "loss": 0.6092, "step": 23776 }, { "epoch": 0.284281256351702, "grad_norm": 3.637282609939575, "learning_rate": 8.398318242372486e-06, "loss": 0.5854, "step": 23777 }, { "epoch": 0.2842932124965626, "grad_norm": 2.978835105895996, "learning_rate": 8.398176217038479e-06, "loss": 0.6349, "step": 23778 }, { "epoch": 0.28430516864142324, "grad_norm": 2.2928693294525146, "learning_rate": 8.398034186608895e-06, "loss": 0.6703, "step": 23779 }, { "epoch": 0.2843171247862839, "grad_norm": 1.7787643671035767, "learning_rate": 8.397892151083947e-06, "loss": 0.6291, "step": 23780 }, { "epoch": 0.28432908093114456, "grad_norm": 2.806901216506958, "learning_rate": 8.397750110463846e-06, "loss": 0.7298, "step": 23781 }, { "epoch": 0.2843410370760052, "grad_norm": 3.5564992427825928, "learning_rate": 8.397608064748809e-06, "loss": 0.674, "step": 23782 }, { "epoch": 0.2843529932208659, "grad_norm": 2.9961001873016357, "learning_rate": 8.397466013939046e-06, "loss": 0.6217, "step": 23783 }, { "epoch": 0.28436494936572654, "grad_norm": 3.36361026763916, "learning_rate": 8.39732395803477e-06, "loss": 0.6962, "step": 23784 }, { "epoch": 0.28437690551058714, "grad_norm": 1.721198558807373, "learning_rate": 8.397181897036195e-06, "loss": 0.5919, "step": 23785 }, { "epoch": 0.2843888616554478, "grad_norm": 2.6671998500823975, "learning_rate": 8.397039830943534e-06, "loss": 0.6387, "step": 23786 }, { "epoch": 0.28440081780030846, "grad_norm": 2.3652477264404297, "learning_rate": 8.396897759757e-06, "loss": 0.5585, "step": 23787 }, { "epoch": 0.2844127739451691, "grad_norm": 2.981757640838623, "learning_rate": 8.396755683476803e-06, "loss": 0.556, "step": 23788 }, { "epoch": 0.2844247300900298, "grad_norm": 88.37516021728516, "learning_rate": 8.396613602103161e-06, "loss": 0.581, "step": 23789 }, { "epoch": 0.28443668623489043, "grad_norm": 2.7078652381896973, "learning_rate": 8.396471515636284e-06, "loss": 0.6054, "step": 23790 }, { "epoch": 0.2844486423797511, "grad_norm": 3.8563995361328125, "learning_rate": 8.396329424076387e-06, "loss": 0.5925, "step": 23791 }, { "epoch": 0.28446059852461175, "grad_norm": 1.9613085985183716, "learning_rate": 8.396187327423681e-06, "loss": 0.5187, "step": 23792 }, { "epoch": 0.28447255466947236, "grad_norm": 2.6582353115081787, "learning_rate": 8.396045225678382e-06, "loss": 0.5699, "step": 23793 }, { "epoch": 0.284484510814333, "grad_norm": 2.3183059692382812, "learning_rate": 8.3959031188407e-06, "loss": 0.7057, "step": 23794 }, { "epoch": 0.2844964669591937, "grad_norm": 2.322976589202881, "learning_rate": 8.395761006910847e-06, "loss": 0.6711, "step": 23795 }, { "epoch": 0.28450842310405433, "grad_norm": 1.9382508993148804, "learning_rate": 8.39561888988904e-06, "loss": 0.65, "step": 23796 }, { "epoch": 0.284520379248915, "grad_norm": 1.809401512145996, "learning_rate": 8.395476767775492e-06, "loss": 0.5811, "step": 23797 }, { "epoch": 0.28453233539377565, "grad_norm": 6.320191383361816, "learning_rate": 8.395334640570413e-06, "loss": 0.6387, "step": 23798 }, { "epoch": 0.2845442915386363, "grad_norm": 2.4222428798675537, "learning_rate": 8.395192508274018e-06, "loss": 0.6495, "step": 23799 }, { "epoch": 0.2845562476834969, "grad_norm": 4.424661636352539, "learning_rate": 8.395050370886519e-06, "loss": 0.6108, "step": 23800 }, { "epoch": 0.28456820382835757, "grad_norm": 1.9145268201828003, "learning_rate": 8.39490822840813e-06, "loss": 0.5817, "step": 23801 }, { "epoch": 0.2845801599732182, "grad_norm": 2.698976755142212, "learning_rate": 8.394766080839066e-06, "loss": 0.6318, "step": 23802 }, { "epoch": 0.2845921161180789, "grad_norm": 4.741962432861328, "learning_rate": 8.394623928179534e-06, "loss": 0.512, "step": 23803 }, { "epoch": 0.28460407226293954, "grad_norm": 4.277493000030518, "learning_rate": 8.394481770429754e-06, "loss": 0.6293, "step": 23804 }, { "epoch": 0.2846160284078002, "grad_norm": 2.1025547981262207, "learning_rate": 8.394339607589937e-06, "loss": 0.6646, "step": 23805 }, { "epoch": 0.28462798455266086, "grad_norm": 2.126643180847168, "learning_rate": 8.394197439660295e-06, "loss": 0.5659, "step": 23806 }, { "epoch": 0.28463994069752147, "grad_norm": 2.534376621246338, "learning_rate": 8.394055266641042e-06, "loss": 0.5392, "step": 23807 }, { "epoch": 0.2846518968423821, "grad_norm": 2.0018563270568848, "learning_rate": 8.393913088532392e-06, "loss": 0.5495, "step": 23808 }, { "epoch": 0.2846638529872428, "grad_norm": 5.708410263061523, "learning_rate": 8.393770905334556e-06, "loss": 0.6514, "step": 23809 }, { "epoch": 0.28467580913210344, "grad_norm": 1.9674590826034546, "learning_rate": 8.393628717047748e-06, "loss": 0.6065, "step": 23810 }, { "epoch": 0.2846877652769641, "grad_norm": 5.819747447967529, "learning_rate": 8.393486523672182e-06, "loss": 0.6062, "step": 23811 }, { "epoch": 0.28469972142182476, "grad_norm": 17.391660690307617, "learning_rate": 8.393344325208073e-06, "loss": 0.5274, "step": 23812 }, { "epoch": 0.2847116775666854, "grad_norm": 3.1057965755462646, "learning_rate": 8.39320212165563e-06, "loss": 0.6564, "step": 23813 }, { "epoch": 0.2847236337115461, "grad_norm": 3.368757963180542, "learning_rate": 8.39305991301507e-06, "loss": 0.7057, "step": 23814 }, { "epoch": 0.2847355898564067, "grad_norm": 3.510267734527588, "learning_rate": 8.392917699286602e-06, "loss": 0.6354, "step": 23815 }, { "epoch": 0.28474754600126734, "grad_norm": 2.1554157733917236, "learning_rate": 8.392775480470442e-06, "loss": 0.5728, "step": 23816 }, { "epoch": 0.284759502146128, "grad_norm": 1.722313642501831, "learning_rate": 8.392633256566807e-06, "loss": 0.5905, "step": 23817 }, { "epoch": 0.28477145829098865, "grad_norm": 2.229139804840088, "learning_rate": 8.392491027575901e-06, "loss": 0.5019, "step": 23818 }, { "epoch": 0.2847834144358493, "grad_norm": 2.7013497352600098, "learning_rate": 8.392348793497948e-06, "loss": 0.5345, "step": 23819 }, { "epoch": 0.28479537058070997, "grad_norm": 2.0348832607269287, "learning_rate": 8.392206554333154e-06, "loss": 0.6125, "step": 23820 }, { "epoch": 0.28480732672557063, "grad_norm": 3.121067523956299, "learning_rate": 8.392064310081732e-06, "loss": 0.6441, "step": 23821 }, { "epoch": 0.28481928287043123, "grad_norm": 1.9604548215866089, "learning_rate": 8.391922060743899e-06, "loss": 0.6073, "step": 23822 }, { "epoch": 0.2848312390152919, "grad_norm": 2.149212121963501, "learning_rate": 8.391779806319866e-06, "loss": 0.6173, "step": 23823 }, { "epoch": 0.28484319516015255, "grad_norm": 6.329075336456299, "learning_rate": 8.39163754680985e-06, "loss": 0.5912, "step": 23824 }, { "epoch": 0.2848551513050132, "grad_norm": 4.158055305480957, "learning_rate": 8.39149528221406e-06, "loss": 0.5894, "step": 23825 }, { "epoch": 0.28486710744987387, "grad_norm": 2.502758741378784, "learning_rate": 8.391353012532708e-06, "loss": 0.585, "step": 23826 }, { "epoch": 0.2848790635947345, "grad_norm": 3.724358081817627, "learning_rate": 8.391210737766013e-06, "loss": 0.5979, "step": 23827 }, { "epoch": 0.2848910197395952, "grad_norm": 1.7203913927078247, "learning_rate": 8.391068457914186e-06, "loss": 0.5777, "step": 23828 }, { "epoch": 0.2849029758844558, "grad_norm": 2.19836163520813, "learning_rate": 8.390926172977439e-06, "loss": 0.6359, "step": 23829 }, { "epoch": 0.28491493202931645, "grad_norm": 1.8309297561645508, "learning_rate": 8.390783882955983e-06, "loss": 0.5055, "step": 23830 }, { "epoch": 0.2849268881741771, "grad_norm": 2.397174119949341, "learning_rate": 8.39064158785004e-06, "loss": 0.5889, "step": 23831 }, { "epoch": 0.28493884431903777, "grad_norm": 3.160831928253174, "learning_rate": 8.390499287659815e-06, "loss": 0.6096, "step": 23832 }, { "epoch": 0.2849508004638984, "grad_norm": 6.375048637390137, "learning_rate": 8.390356982385524e-06, "loss": 0.5893, "step": 23833 }, { "epoch": 0.2849627566087591, "grad_norm": 12.787052154541016, "learning_rate": 8.39021467202738e-06, "loss": 0.5274, "step": 23834 }, { "epoch": 0.28497471275361974, "grad_norm": 1.6295169591903687, "learning_rate": 8.390072356585598e-06, "loss": 0.6026, "step": 23835 }, { "epoch": 0.2849866688984804, "grad_norm": 2.6398301124572754, "learning_rate": 8.389930036060391e-06, "loss": 0.6693, "step": 23836 }, { "epoch": 0.284998625043341, "grad_norm": 3.04689884185791, "learning_rate": 8.389787710451971e-06, "loss": 0.5346, "step": 23837 }, { "epoch": 0.28501058118820166, "grad_norm": 2.4431262016296387, "learning_rate": 8.389645379760554e-06, "loss": 0.6188, "step": 23838 }, { "epoch": 0.2850225373330623, "grad_norm": 3.1103930473327637, "learning_rate": 8.38950304398635e-06, "loss": 0.5951, "step": 23839 }, { "epoch": 0.285034493477923, "grad_norm": 2.057884693145752, "learning_rate": 8.389360703129575e-06, "loss": 0.5068, "step": 23840 }, { "epoch": 0.28504644962278364, "grad_norm": 4.041038513183594, "learning_rate": 8.389218357190441e-06, "loss": 0.6409, "step": 23841 }, { "epoch": 0.2850584057676443, "grad_norm": 2.7423932552337646, "learning_rate": 8.389076006169161e-06, "loss": 0.5648, "step": 23842 }, { "epoch": 0.28507036191250495, "grad_norm": 2.2397243976593018, "learning_rate": 8.388933650065951e-06, "loss": 0.5973, "step": 23843 }, { "epoch": 0.28508231805736556, "grad_norm": 5.322158336639404, "learning_rate": 8.388791288881022e-06, "loss": 0.6662, "step": 23844 }, { "epoch": 0.2850942742022262, "grad_norm": 1.919235348701477, "learning_rate": 8.388648922614589e-06, "loss": 0.5702, "step": 23845 }, { "epoch": 0.2851062303470869, "grad_norm": 2.0186781883239746, "learning_rate": 8.388506551266866e-06, "loss": 0.6342, "step": 23846 }, { "epoch": 0.28511818649194753, "grad_norm": 2.721409797668457, "learning_rate": 8.388364174838064e-06, "loss": 0.5832, "step": 23847 }, { "epoch": 0.2851301426368082, "grad_norm": 2.269258737564087, "learning_rate": 8.388221793328397e-06, "loss": 0.6484, "step": 23848 }, { "epoch": 0.28514209878166885, "grad_norm": 2.104050397872925, "learning_rate": 8.388079406738082e-06, "loss": 0.6492, "step": 23849 }, { "epoch": 0.2851540549265295, "grad_norm": 1.6214526891708374, "learning_rate": 8.387937015067326e-06, "loss": 0.6546, "step": 23850 }, { "epoch": 0.28516601107139017, "grad_norm": 2.920109272003174, "learning_rate": 8.38779461831635e-06, "loss": 0.6206, "step": 23851 }, { "epoch": 0.28517796721625077, "grad_norm": 2.624845027923584, "learning_rate": 8.387652216485363e-06, "loss": 0.6007, "step": 23852 }, { "epoch": 0.28518992336111143, "grad_norm": 1.967323660850525, "learning_rate": 8.387509809574579e-06, "loss": 0.5531, "step": 23853 }, { "epoch": 0.2852018795059721, "grad_norm": 2.7498862743377686, "learning_rate": 8.387367397584211e-06, "loss": 0.6106, "step": 23854 }, { "epoch": 0.28521383565083275, "grad_norm": 2.018052577972412, "learning_rate": 8.387224980514476e-06, "loss": 0.5749, "step": 23855 }, { "epoch": 0.2852257917956934, "grad_norm": 2.189568042755127, "learning_rate": 8.387082558365585e-06, "loss": 0.5897, "step": 23856 }, { "epoch": 0.28523774794055407, "grad_norm": 4.633103370666504, "learning_rate": 8.38694013113775e-06, "loss": 0.5984, "step": 23857 }, { "epoch": 0.2852497040854147, "grad_norm": 1.5822112560272217, "learning_rate": 8.386797698831186e-06, "loss": 0.6175, "step": 23858 }, { "epoch": 0.2852616602302753, "grad_norm": 1.5690042972564697, "learning_rate": 8.386655261446107e-06, "loss": 0.5544, "step": 23859 }, { "epoch": 0.285273616375136, "grad_norm": 4.35628080368042, "learning_rate": 8.386512818982729e-06, "loss": 0.5528, "step": 23860 }, { "epoch": 0.28528557251999664, "grad_norm": 2.1493923664093018, "learning_rate": 8.386370371441258e-06, "loss": 0.5329, "step": 23861 }, { "epoch": 0.2852975286648573, "grad_norm": 2.490856409072876, "learning_rate": 8.386227918821917e-06, "loss": 0.602, "step": 23862 }, { "epoch": 0.28530948480971796, "grad_norm": 5.106073379516602, "learning_rate": 8.386085461124913e-06, "loss": 0.5711, "step": 23863 }, { "epoch": 0.2853214409545786, "grad_norm": 1.8579233884811401, "learning_rate": 8.385942998350462e-06, "loss": 0.7515, "step": 23864 }, { "epoch": 0.2853333970994393, "grad_norm": 1.7727171182632446, "learning_rate": 8.38580053049878e-06, "loss": 0.6554, "step": 23865 }, { "epoch": 0.2853453532442999, "grad_norm": 1.9567780494689941, "learning_rate": 8.385658057570075e-06, "loss": 0.6046, "step": 23866 }, { "epoch": 0.28535730938916054, "grad_norm": 2.530137538909912, "learning_rate": 8.385515579564565e-06, "loss": 0.6788, "step": 23867 }, { "epoch": 0.2853692655340212, "grad_norm": 1.679579496383667, "learning_rate": 8.385373096482462e-06, "loss": 0.6383, "step": 23868 }, { "epoch": 0.28538122167888186, "grad_norm": 1.800787329673767, "learning_rate": 8.38523060832398e-06, "loss": 0.575, "step": 23869 }, { "epoch": 0.2853931778237425, "grad_norm": 4.845431327819824, "learning_rate": 8.385088115089332e-06, "loss": 0.596, "step": 23870 }, { "epoch": 0.2854051339686032, "grad_norm": 1.756731390953064, "learning_rate": 8.384945616778734e-06, "loss": 0.5531, "step": 23871 }, { "epoch": 0.28541709011346383, "grad_norm": 1.903801679611206, "learning_rate": 8.384803113392397e-06, "loss": 0.6748, "step": 23872 }, { "epoch": 0.2854290462583245, "grad_norm": 2.904266357421875, "learning_rate": 8.384660604930536e-06, "loss": 0.7486, "step": 23873 }, { "epoch": 0.2854410024031851, "grad_norm": 1.5575475692749023, "learning_rate": 8.384518091393364e-06, "loss": 0.5894, "step": 23874 }, { "epoch": 0.28545295854804575, "grad_norm": 3.2510814666748047, "learning_rate": 8.384375572781096e-06, "loss": 0.6308, "step": 23875 }, { "epoch": 0.2854649146929064, "grad_norm": 8.49859619140625, "learning_rate": 8.384233049093944e-06, "loss": 0.5456, "step": 23876 }, { "epoch": 0.28547687083776707, "grad_norm": 8.715691566467285, "learning_rate": 8.384090520332122e-06, "loss": 0.6736, "step": 23877 }, { "epoch": 0.28548882698262773, "grad_norm": 2.1862645149230957, "learning_rate": 8.383947986495846e-06, "loss": 0.641, "step": 23878 }, { "epoch": 0.2855007831274884, "grad_norm": 4.43370246887207, "learning_rate": 8.383805447585328e-06, "loss": 0.5243, "step": 23879 }, { "epoch": 0.28551273927234905, "grad_norm": 2.9253900051116943, "learning_rate": 8.383662903600779e-06, "loss": 0.6938, "step": 23880 }, { "epoch": 0.28552469541720965, "grad_norm": 4.166447162628174, "learning_rate": 8.383520354542416e-06, "loss": 0.6395, "step": 23881 }, { "epoch": 0.2855366515620703, "grad_norm": 2.494150400161743, "learning_rate": 8.383377800410454e-06, "loss": 0.5615, "step": 23882 }, { "epoch": 0.28554860770693097, "grad_norm": 2.2006242275238037, "learning_rate": 8.383235241205104e-06, "loss": 0.6176, "step": 23883 }, { "epoch": 0.2855605638517916, "grad_norm": 2.9449660778045654, "learning_rate": 8.38309267692658e-06, "loss": 0.6666, "step": 23884 }, { "epoch": 0.2855725199966523, "grad_norm": 2.144986391067505, "learning_rate": 8.3829501075751e-06, "loss": 0.6898, "step": 23885 }, { "epoch": 0.28558447614151294, "grad_norm": 1.7152416706085205, "learning_rate": 8.38280753315087e-06, "loss": 0.6443, "step": 23886 }, { "epoch": 0.2855964322863736, "grad_norm": 2.678712844848633, "learning_rate": 8.382664953654109e-06, "loss": 0.6423, "step": 23887 }, { "epoch": 0.2856083884312342, "grad_norm": 3.4089982509613037, "learning_rate": 8.382522369085032e-06, "loss": 0.6878, "step": 23888 }, { "epoch": 0.28562034457609486, "grad_norm": 3.949443817138672, "learning_rate": 8.382379779443849e-06, "loss": 0.6048, "step": 23889 }, { "epoch": 0.2856323007209555, "grad_norm": 2.6231203079223633, "learning_rate": 8.382237184730776e-06, "loss": 0.529, "step": 23890 }, { "epoch": 0.2856442568658162, "grad_norm": 4.342251300811768, "learning_rate": 8.382094584946026e-06, "loss": 0.5465, "step": 23891 }, { "epoch": 0.28565621301067684, "grad_norm": 43.64408874511719, "learning_rate": 8.381951980089814e-06, "loss": 0.595, "step": 23892 }, { "epoch": 0.2856681691555375, "grad_norm": 2.246687173843384, "learning_rate": 8.381809370162352e-06, "loss": 0.6651, "step": 23893 }, { "epoch": 0.28568012530039816, "grad_norm": 2.026149272918701, "learning_rate": 8.381666755163855e-06, "loss": 0.5435, "step": 23894 }, { "epoch": 0.2856920814452588, "grad_norm": 3.464049816131592, "learning_rate": 8.381524135094537e-06, "loss": 0.6731, "step": 23895 }, { "epoch": 0.2857040375901194, "grad_norm": 5.514615535736084, "learning_rate": 8.381381509954611e-06, "loss": 0.6768, "step": 23896 }, { "epoch": 0.2857159937349801, "grad_norm": 1.7015806436538696, "learning_rate": 8.381238879744292e-06, "loss": 0.6426, "step": 23897 }, { "epoch": 0.28572794987984074, "grad_norm": 4.107646942138672, "learning_rate": 8.381096244463793e-06, "loss": 0.6234, "step": 23898 }, { "epoch": 0.2857399060247014, "grad_norm": 4.753880023956299, "learning_rate": 8.380953604113327e-06, "loss": 0.557, "step": 23899 }, { "epoch": 0.28575186216956205, "grad_norm": 2.483883857727051, "learning_rate": 8.380810958693111e-06, "loss": 0.536, "step": 23900 }, { "epoch": 0.2857638183144227, "grad_norm": 2.257467746734619, "learning_rate": 8.380668308203356e-06, "loss": 0.629, "step": 23901 }, { "epoch": 0.28577577445928337, "grad_norm": 4.040227890014648, "learning_rate": 8.380525652644277e-06, "loss": 0.6228, "step": 23902 }, { "epoch": 0.285787730604144, "grad_norm": 2.5153861045837402, "learning_rate": 8.380382992016088e-06, "loss": 0.5416, "step": 23903 }, { "epoch": 0.28579968674900463, "grad_norm": 1.6784189939498901, "learning_rate": 8.380240326319002e-06, "loss": 0.7246, "step": 23904 }, { "epoch": 0.2858116428938653, "grad_norm": 1.797956109046936, "learning_rate": 8.380097655553235e-06, "loss": 0.6585, "step": 23905 }, { "epoch": 0.28582359903872595, "grad_norm": 1.9641178846359253, "learning_rate": 8.379954979718998e-06, "loss": 0.577, "step": 23906 }, { "epoch": 0.2858355551835866, "grad_norm": 3.998668670654297, "learning_rate": 8.379812298816507e-06, "loss": 0.5508, "step": 23907 }, { "epoch": 0.28584751132844727, "grad_norm": 4.07505464553833, "learning_rate": 8.379669612845975e-06, "loss": 0.5948, "step": 23908 }, { "epoch": 0.2858594674733079, "grad_norm": 1.8266334533691406, "learning_rate": 8.379526921807618e-06, "loss": 0.5327, "step": 23909 }, { "epoch": 0.2858714236181686, "grad_norm": 3.0190036296844482, "learning_rate": 8.379384225701647e-06, "loss": 0.5402, "step": 23910 }, { "epoch": 0.2858833797630292, "grad_norm": 2.353768825531006, "learning_rate": 8.379241524528278e-06, "loss": 0.6713, "step": 23911 }, { "epoch": 0.28589533590788985, "grad_norm": 2.4333174228668213, "learning_rate": 8.379098818287722e-06, "loss": 0.6428, "step": 23912 }, { "epoch": 0.2859072920527505, "grad_norm": 2.519909381866455, "learning_rate": 8.378956106980199e-06, "loss": 0.602, "step": 23913 }, { "epoch": 0.28591924819761116, "grad_norm": 1.8657763004302979, "learning_rate": 8.378813390605917e-06, "loss": 0.6292, "step": 23914 }, { "epoch": 0.2859312043424718, "grad_norm": 2.363293170928955, "learning_rate": 8.378670669165094e-06, "loss": 0.6085, "step": 23915 }, { "epoch": 0.2859431604873325, "grad_norm": 2.7238929271698, "learning_rate": 8.37852794265794e-06, "loss": 0.6638, "step": 23916 }, { "epoch": 0.28595511663219314, "grad_norm": 1.8902250528335571, "learning_rate": 8.378385211084673e-06, "loss": 0.6495, "step": 23917 }, { "epoch": 0.28596707277705374, "grad_norm": 1.6264891624450684, "learning_rate": 8.378242474445506e-06, "loss": 0.5704, "step": 23918 }, { "epoch": 0.2859790289219144, "grad_norm": 3.3034768104553223, "learning_rate": 8.378099732740651e-06, "loss": 0.6384, "step": 23919 }, { "epoch": 0.28599098506677506, "grad_norm": 2.3169655799865723, "learning_rate": 8.377956985970323e-06, "loss": 0.5807, "step": 23920 }, { "epoch": 0.2860029412116357, "grad_norm": 2.8017048835754395, "learning_rate": 8.377814234134738e-06, "loss": 0.6677, "step": 23921 }, { "epoch": 0.2860148973564964, "grad_norm": 2.3040239810943604, "learning_rate": 8.377671477234109e-06, "loss": 0.5671, "step": 23922 }, { "epoch": 0.28602685350135704, "grad_norm": 3.912862539291382, "learning_rate": 8.377528715268647e-06, "loss": 0.6851, "step": 23923 }, { "epoch": 0.2860388096462177, "grad_norm": 2.4392101764678955, "learning_rate": 8.37738594823857e-06, "loss": 0.6532, "step": 23924 }, { "epoch": 0.2860507657910783, "grad_norm": 7.006452560424805, "learning_rate": 8.377243176144092e-06, "loss": 0.5725, "step": 23925 }, { "epoch": 0.28606272193593896, "grad_norm": 6.609785556793213, "learning_rate": 8.377100398985425e-06, "loss": 0.6417, "step": 23926 }, { "epoch": 0.2860746780807996, "grad_norm": 1.7843778133392334, "learning_rate": 8.376957616762784e-06, "loss": 0.5668, "step": 23927 }, { "epoch": 0.2860866342256603, "grad_norm": 1.724486231803894, "learning_rate": 8.376814829476381e-06, "loss": 0.6009, "step": 23928 }, { "epoch": 0.28609859037052093, "grad_norm": 6.741063117980957, "learning_rate": 8.376672037126434e-06, "loss": 0.6771, "step": 23929 }, { "epoch": 0.2861105465153816, "grad_norm": 3.5829877853393555, "learning_rate": 8.376529239713156e-06, "loss": 0.6153, "step": 23930 }, { "epoch": 0.28612250266024225, "grad_norm": 2.0735833644866943, "learning_rate": 8.376386437236759e-06, "loss": 0.5543, "step": 23931 }, { "epoch": 0.2861344588051029, "grad_norm": 1.6529550552368164, "learning_rate": 8.37624362969746e-06, "loss": 0.6159, "step": 23932 }, { "epoch": 0.2861464149499635, "grad_norm": 2.1145269870758057, "learning_rate": 8.37610081709547e-06, "loss": 0.6683, "step": 23933 }, { "epoch": 0.28615837109482417, "grad_norm": 3.352409601211548, "learning_rate": 8.375957999431007e-06, "loss": 0.7048, "step": 23934 }, { "epoch": 0.28617032723968483, "grad_norm": 2.1238656044006348, "learning_rate": 8.375815176704281e-06, "loss": 0.558, "step": 23935 }, { "epoch": 0.2861822833845455, "grad_norm": 3.2612199783325195, "learning_rate": 8.375672348915509e-06, "loss": 0.5783, "step": 23936 }, { "epoch": 0.28619423952940615, "grad_norm": 1.4561386108398438, "learning_rate": 8.375529516064904e-06, "loss": 0.5954, "step": 23937 }, { "epoch": 0.2862061956742668, "grad_norm": 4.130690574645996, "learning_rate": 8.37538667815268e-06, "loss": 0.6814, "step": 23938 }, { "epoch": 0.28621815181912746, "grad_norm": 1.8586030006408691, "learning_rate": 8.375243835179052e-06, "loss": 0.4685, "step": 23939 }, { "epoch": 0.28623010796398807, "grad_norm": 3.6141891479492188, "learning_rate": 8.375100987144235e-06, "loss": 0.5971, "step": 23940 }, { "epoch": 0.2862420641088487, "grad_norm": 6.223785400390625, "learning_rate": 8.374958134048441e-06, "loss": 0.5843, "step": 23941 }, { "epoch": 0.2862540202537094, "grad_norm": 2.9572949409484863, "learning_rate": 8.374815275891886e-06, "loss": 0.5592, "step": 23942 }, { "epoch": 0.28626597639857004, "grad_norm": 4.614498615264893, "learning_rate": 8.374672412674782e-06, "loss": 0.5915, "step": 23943 }, { "epoch": 0.2862779325434307, "grad_norm": 3.324310302734375, "learning_rate": 8.374529544397345e-06, "loss": 0.7302, "step": 23944 }, { "epoch": 0.28628988868829136, "grad_norm": 1.5522794723510742, "learning_rate": 8.374386671059791e-06, "loss": 0.6189, "step": 23945 }, { "epoch": 0.286301844833152, "grad_norm": 2.6485185623168945, "learning_rate": 8.37424379266233e-06, "loss": 0.6064, "step": 23946 }, { "epoch": 0.2863138009780126, "grad_norm": 1.7105424404144287, "learning_rate": 8.37410090920518e-06, "loss": 0.5887, "step": 23947 }, { "epoch": 0.2863257571228733, "grad_norm": 2.0290040969848633, "learning_rate": 8.373958020688553e-06, "loss": 0.6785, "step": 23948 }, { "epoch": 0.28633771326773394, "grad_norm": 1.9281500577926636, "learning_rate": 8.373815127112663e-06, "loss": 0.6829, "step": 23949 }, { "epoch": 0.2863496694125946, "grad_norm": 2.264207363128662, "learning_rate": 8.373672228477728e-06, "loss": 0.568, "step": 23950 }, { "epoch": 0.28636162555745526, "grad_norm": 2.2378034591674805, "learning_rate": 8.373529324783958e-06, "loss": 0.6721, "step": 23951 }, { "epoch": 0.2863735817023159, "grad_norm": 3.2613558769226074, "learning_rate": 8.373386416031569e-06, "loss": 0.72, "step": 23952 }, { "epoch": 0.2863855378471766, "grad_norm": 2.0309386253356934, "learning_rate": 8.373243502220772e-06, "loss": 0.6127, "step": 23953 }, { "epoch": 0.28639749399203723, "grad_norm": 2.9759228229522705, "learning_rate": 8.373100583351787e-06, "loss": 0.6173, "step": 23954 }, { "epoch": 0.28640945013689784, "grad_norm": 2.3641040325164795, "learning_rate": 8.372957659424826e-06, "loss": 0.679, "step": 23955 }, { "epoch": 0.2864214062817585, "grad_norm": 9.895705223083496, "learning_rate": 8.372814730440103e-06, "loss": 0.639, "step": 23956 }, { "epoch": 0.28643336242661915, "grad_norm": 1.6023714542388916, "learning_rate": 8.37267179639783e-06, "loss": 0.6461, "step": 23957 }, { "epoch": 0.2864453185714798, "grad_norm": 2.5029776096343994, "learning_rate": 8.372528857298227e-06, "loss": 0.6137, "step": 23958 }, { "epoch": 0.28645727471634047, "grad_norm": 2.0577328205108643, "learning_rate": 8.372385913141502e-06, "loss": 0.5671, "step": 23959 }, { "epoch": 0.28646923086120113, "grad_norm": 2.328882932662964, "learning_rate": 8.372242963927876e-06, "loss": 0.5322, "step": 23960 }, { "epoch": 0.2864811870060618, "grad_norm": 2.309969425201416, "learning_rate": 8.372100009657557e-06, "loss": 0.5175, "step": 23961 }, { "epoch": 0.2864931431509224, "grad_norm": 2.34513521194458, "learning_rate": 8.371957050330763e-06, "loss": 0.6014, "step": 23962 }, { "epoch": 0.28650509929578305, "grad_norm": 2.116180896759033, "learning_rate": 8.371814085947706e-06, "loss": 0.5874, "step": 23963 }, { "epoch": 0.2865170554406437, "grad_norm": 2.467712163925171, "learning_rate": 8.371671116508605e-06, "loss": 0.5946, "step": 23964 }, { "epoch": 0.28652901158550437, "grad_norm": 8.955517768859863, "learning_rate": 8.371528142013667e-06, "loss": 0.7616, "step": 23965 }, { "epoch": 0.286540967730365, "grad_norm": 2.2500710487365723, "learning_rate": 8.371385162463113e-06, "loss": 0.6173, "step": 23966 }, { "epoch": 0.2865529238752257, "grad_norm": 3.1497201919555664, "learning_rate": 8.371242177857155e-06, "loss": 0.617, "step": 23967 }, { "epoch": 0.28656488002008634, "grad_norm": 3.1561689376831055, "learning_rate": 8.371099188196007e-06, "loss": 0.6304, "step": 23968 }, { "epoch": 0.286576836164947, "grad_norm": 2.8925633430480957, "learning_rate": 8.370956193479884e-06, "loss": 0.4832, "step": 23969 }, { "epoch": 0.2865887923098076, "grad_norm": 2.2446236610412598, "learning_rate": 8.370813193709e-06, "loss": 0.5397, "step": 23970 }, { "epoch": 0.28660074845466826, "grad_norm": 2.740541696548462, "learning_rate": 8.370670188883569e-06, "loss": 0.5905, "step": 23971 }, { "epoch": 0.2866127045995289, "grad_norm": 6.124820232391357, "learning_rate": 8.370527179003806e-06, "loss": 0.6439, "step": 23972 }, { "epoch": 0.2866246607443896, "grad_norm": 2.0303449630737305, "learning_rate": 8.370384164069926e-06, "loss": 0.6298, "step": 23973 }, { "epoch": 0.28663661688925024, "grad_norm": 1.2889204025268555, "learning_rate": 8.370241144082145e-06, "loss": 0.5679, "step": 23974 }, { "epoch": 0.2866485730341109, "grad_norm": 3.43070387840271, "learning_rate": 8.370098119040673e-06, "loss": 0.6451, "step": 23975 }, { "epoch": 0.28666052917897156, "grad_norm": 2.9668893814086914, "learning_rate": 8.369955088945727e-06, "loss": 0.5907, "step": 23976 }, { "epoch": 0.28667248532383216, "grad_norm": 2.6906375885009766, "learning_rate": 8.36981205379752e-06, "loss": 0.6096, "step": 23977 }, { "epoch": 0.2866844414686928, "grad_norm": 3.3986496925354004, "learning_rate": 8.36966901359627e-06, "loss": 0.6147, "step": 23978 }, { "epoch": 0.2866963976135535, "grad_norm": 5.26533317565918, "learning_rate": 8.369525968342192e-06, "loss": 0.6358, "step": 23979 }, { "epoch": 0.28670835375841414, "grad_norm": 3.5267221927642822, "learning_rate": 8.369382918035493e-06, "loss": 0.5434, "step": 23980 }, { "epoch": 0.2867203099032748, "grad_norm": 1.31923508644104, "learning_rate": 8.369239862676395e-06, "loss": 0.5858, "step": 23981 }, { "epoch": 0.28673226604813545, "grad_norm": 5.908867359161377, "learning_rate": 8.369096802265108e-06, "loss": 0.6678, "step": 23982 }, { "epoch": 0.2867442221929961, "grad_norm": 1.7449204921722412, "learning_rate": 8.36895373680185e-06, "loss": 0.6399, "step": 23983 }, { "epoch": 0.2867561783378567, "grad_norm": 3.00447678565979, "learning_rate": 8.368810666286832e-06, "loss": 0.6564, "step": 23984 }, { "epoch": 0.2867681344827174, "grad_norm": 2.13033127784729, "learning_rate": 8.368667590720272e-06, "loss": 0.5753, "step": 23985 }, { "epoch": 0.28678009062757803, "grad_norm": 1.818433403968811, "learning_rate": 8.368524510102382e-06, "loss": 0.5896, "step": 23986 }, { "epoch": 0.2867920467724387, "grad_norm": 2.33286452293396, "learning_rate": 8.368381424433379e-06, "loss": 0.5697, "step": 23987 }, { "epoch": 0.28680400291729935, "grad_norm": 3.9712166786193848, "learning_rate": 8.368238333713476e-06, "loss": 0.6049, "step": 23988 }, { "epoch": 0.28681595906216, "grad_norm": 4.381786823272705, "learning_rate": 8.368095237942887e-06, "loss": 0.5406, "step": 23989 }, { "epoch": 0.28682791520702067, "grad_norm": 3.0227887630462646, "learning_rate": 8.367952137121826e-06, "loss": 0.6145, "step": 23990 }, { "epoch": 0.2868398713518813, "grad_norm": 2.3342909812927246, "learning_rate": 8.36780903125051e-06, "loss": 0.5045, "step": 23991 }, { "epoch": 0.28685182749674193, "grad_norm": 1.7819689512252808, "learning_rate": 8.367665920329153e-06, "loss": 0.5604, "step": 23992 }, { "epoch": 0.2868637836416026, "grad_norm": 1.8699392080307007, "learning_rate": 8.367522804357968e-06, "loss": 0.6826, "step": 23993 }, { "epoch": 0.28687573978646325, "grad_norm": 2.007977247238159, "learning_rate": 8.36737968333717e-06, "loss": 0.5962, "step": 23994 }, { "epoch": 0.2868876959313239, "grad_norm": 2.104569911956787, "learning_rate": 8.367236557266976e-06, "loss": 0.5709, "step": 23995 }, { "epoch": 0.28689965207618456, "grad_norm": 2.7653908729553223, "learning_rate": 8.367093426147598e-06, "loss": 0.5908, "step": 23996 }, { "epoch": 0.2869116082210452, "grad_norm": 18.51898956298828, "learning_rate": 8.366950289979252e-06, "loss": 0.6165, "step": 23997 }, { "epoch": 0.2869235643659059, "grad_norm": 3.9575400352478027, "learning_rate": 8.36680714876215e-06, "loss": 0.6174, "step": 23998 }, { "epoch": 0.2869355205107665, "grad_norm": 2.6392059326171875, "learning_rate": 8.366664002496512e-06, "loss": 0.6202, "step": 23999 }, { "epoch": 0.28694747665562714, "grad_norm": 1.6290476322174072, "learning_rate": 8.366520851182546e-06, "loss": 0.4873, "step": 24000 }, { "epoch": 0.2869594328004878, "grad_norm": 3.859499216079712, "learning_rate": 8.366377694820472e-06, "loss": 0.5444, "step": 24001 }, { "epoch": 0.28697138894534846, "grad_norm": 3.5404372215270996, "learning_rate": 8.366234533410502e-06, "loss": 0.6781, "step": 24002 }, { "epoch": 0.2869833450902091, "grad_norm": 2.6466569900512695, "learning_rate": 8.366091366952852e-06, "loss": 0.6184, "step": 24003 }, { "epoch": 0.2869953012350698, "grad_norm": 2.2044668197631836, "learning_rate": 8.365948195447735e-06, "loss": 0.6202, "step": 24004 }, { "epoch": 0.28700725737993044, "grad_norm": 2.7549619674682617, "learning_rate": 8.365805018895368e-06, "loss": 0.5376, "step": 24005 }, { "epoch": 0.28701921352479104, "grad_norm": 5.569498538970947, "learning_rate": 8.365661837295963e-06, "loss": 0.6723, "step": 24006 }, { "epoch": 0.2870311696696517, "grad_norm": 6.551187038421631, "learning_rate": 8.365518650649738e-06, "loss": 0.6099, "step": 24007 }, { "epoch": 0.28704312581451236, "grad_norm": 1.447468638420105, "learning_rate": 8.365375458956904e-06, "loss": 0.5647, "step": 24008 }, { "epoch": 0.287055081959373, "grad_norm": 3.69590425491333, "learning_rate": 8.365232262217677e-06, "loss": 0.5562, "step": 24009 }, { "epoch": 0.2870670381042337, "grad_norm": 3.003910779953003, "learning_rate": 8.365089060432274e-06, "loss": 0.6987, "step": 24010 }, { "epoch": 0.28707899424909433, "grad_norm": 1.9769136905670166, "learning_rate": 8.364945853600908e-06, "loss": 0.6426, "step": 24011 }, { "epoch": 0.287090950393955, "grad_norm": 2.42663311958313, "learning_rate": 8.364802641723793e-06, "loss": 0.5477, "step": 24012 }, { "epoch": 0.28710290653881565, "grad_norm": 2.7263481616973877, "learning_rate": 8.364659424801144e-06, "loss": 0.644, "step": 24013 }, { "epoch": 0.28711486268367625, "grad_norm": 1.5639623403549194, "learning_rate": 8.364516202833177e-06, "loss": 0.5389, "step": 24014 }, { "epoch": 0.2871268188285369, "grad_norm": 2.588132619857788, "learning_rate": 8.364372975820106e-06, "loss": 0.5412, "step": 24015 }, { "epoch": 0.28713877497339757, "grad_norm": 2.272568464279175, "learning_rate": 8.364229743762146e-06, "loss": 0.6469, "step": 24016 }, { "epoch": 0.28715073111825823, "grad_norm": 5.634814739227295, "learning_rate": 8.364086506659512e-06, "loss": 0.6583, "step": 24017 }, { "epoch": 0.2871626872631189, "grad_norm": 73.77857208251953, "learning_rate": 8.363943264512415e-06, "loss": 0.7258, "step": 24018 }, { "epoch": 0.28717464340797955, "grad_norm": 1.6654459238052368, "learning_rate": 8.363800017321076e-06, "loss": 0.4832, "step": 24019 }, { "epoch": 0.2871865995528402, "grad_norm": 3.8396034240722656, "learning_rate": 8.363656765085709e-06, "loss": 0.6037, "step": 24020 }, { "epoch": 0.2871985556977008, "grad_norm": 1.8462064266204834, "learning_rate": 8.363513507806522e-06, "loss": 0.6085, "step": 24021 }, { "epoch": 0.28721051184256147, "grad_norm": 5.227768421173096, "learning_rate": 8.363370245483738e-06, "loss": 0.6337, "step": 24022 }, { "epoch": 0.2872224679874221, "grad_norm": 4.401490688323975, "learning_rate": 8.363226978117567e-06, "loss": 0.6508, "step": 24023 }, { "epoch": 0.2872344241322828, "grad_norm": 2.134636878967285, "learning_rate": 8.363083705708226e-06, "loss": 0.585, "step": 24024 }, { "epoch": 0.28724638027714344, "grad_norm": 4.882652759552002, "learning_rate": 8.362940428255928e-06, "loss": 0.5894, "step": 24025 }, { "epoch": 0.2872583364220041, "grad_norm": 2.6670210361480713, "learning_rate": 8.362797145760888e-06, "loss": 0.6549, "step": 24026 }, { "epoch": 0.28727029256686476, "grad_norm": 3.8898122310638428, "learning_rate": 8.362653858223323e-06, "loss": 0.6127, "step": 24027 }, { "epoch": 0.2872822487117254, "grad_norm": 2.340129852294922, "learning_rate": 8.362510565643446e-06, "loss": 0.5944, "step": 24028 }, { "epoch": 0.287294204856586, "grad_norm": 2.489654302597046, "learning_rate": 8.362367268021475e-06, "loss": 0.6116, "step": 24029 }, { "epoch": 0.2873061610014467, "grad_norm": 1.8805748224258423, "learning_rate": 8.362223965357618e-06, "loss": 0.5592, "step": 24030 }, { "epoch": 0.28731811714630734, "grad_norm": 4.341660022735596, "learning_rate": 8.362080657652099e-06, "loss": 0.6311, "step": 24031 }, { "epoch": 0.287330073291168, "grad_norm": 1.9054640531539917, "learning_rate": 8.361937344905124e-06, "loss": 0.5506, "step": 24032 }, { "epoch": 0.28734202943602866, "grad_norm": 2.649844169616699, "learning_rate": 8.361794027116913e-06, "loss": 0.5955, "step": 24033 }, { "epoch": 0.2873539855808893, "grad_norm": 3.2369544506073, "learning_rate": 8.361650704287682e-06, "loss": 0.6731, "step": 24034 }, { "epoch": 0.28736594172575, "grad_norm": 1.685025930404663, "learning_rate": 8.36150737641764e-06, "loss": 0.598, "step": 24035 }, { "epoch": 0.2873778978706106, "grad_norm": 2.228532075881958, "learning_rate": 8.36136404350701e-06, "loss": 0.6109, "step": 24036 }, { "epoch": 0.28738985401547124, "grad_norm": 3.79422926902771, "learning_rate": 8.361220705556e-06, "loss": 0.5816, "step": 24037 }, { "epoch": 0.2874018101603319, "grad_norm": 11.774763107299805, "learning_rate": 8.361077362564829e-06, "loss": 0.5374, "step": 24038 }, { "epoch": 0.28741376630519255, "grad_norm": 2.6270463466644287, "learning_rate": 8.360934014533711e-06, "loss": 0.616, "step": 24039 }, { "epoch": 0.2874257224500532, "grad_norm": 2.193037509918213, "learning_rate": 8.360790661462859e-06, "loss": 0.692, "step": 24040 }, { "epoch": 0.28743767859491387, "grad_norm": 2.953963279724121, "learning_rate": 8.36064730335249e-06, "loss": 0.7154, "step": 24041 }, { "epoch": 0.28744963473977453, "grad_norm": 2.258352756500244, "learning_rate": 8.360503940202818e-06, "loss": 0.5786, "step": 24042 }, { "epoch": 0.28746159088463513, "grad_norm": 1.5116467475891113, "learning_rate": 8.36036057201406e-06, "loss": 0.641, "step": 24043 }, { "epoch": 0.2874735470294958, "grad_norm": 2.7295236587524414, "learning_rate": 8.360217198786427e-06, "loss": 0.6075, "step": 24044 }, { "epoch": 0.28748550317435645, "grad_norm": 2.9361965656280518, "learning_rate": 8.360073820520138e-06, "loss": 0.6219, "step": 24045 }, { "epoch": 0.2874974593192171, "grad_norm": 3.711091995239258, "learning_rate": 8.359930437215405e-06, "loss": 0.6125, "step": 24046 }, { "epoch": 0.28750941546407777, "grad_norm": 2.922316789627075, "learning_rate": 8.359787048872446e-06, "loss": 0.5542, "step": 24047 }, { "epoch": 0.2875213716089384, "grad_norm": 1.7334325313568115, "learning_rate": 8.359643655491472e-06, "loss": 0.5142, "step": 24048 }, { "epoch": 0.2875333277537991, "grad_norm": 3.519726514816284, "learning_rate": 8.359500257072704e-06, "loss": 0.5355, "step": 24049 }, { "epoch": 0.28754528389865974, "grad_norm": 2.0420923233032227, "learning_rate": 8.35935685361635e-06, "loss": 0.5937, "step": 24050 }, { "epoch": 0.28755724004352035, "grad_norm": 2.8701884746551514, "learning_rate": 8.359213445122629e-06, "loss": 0.7767, "step": 24051 }, { "epoch": 0.287569196188381, "grad_norm": 1.8667840957641602, "learning_rate": 8.359070031591758e-06, "loss": 0.6577, "step": 24052 }, { "epoch": 0.28758115233324166, "grad_norm": 2.017798662185669, "learning_rate": 8.358926613023948e-06, "loss": 0.6073, "step": 24053 }, { "epoch": 0.2875931084781023, "grad_norm": 1.9971257448196411, "learning_rate": 8.358783189419415e-06, "loss": 0.5133, "step": 24054 }, { "epoch": 0.287605064622963, "grad_norm": 2.9289307594299316, "learning_rate": 8.358639760778376e-06, "loss": 0.6107, "step": 24055 }, { "epoch": 0.28761702076782364, "grad_norm": 9.012429237365723, "learning_rate": 8.358496327101043e-06, "loss": 0.4895, "step": 24056 }, { "epoch": 0.2876289769126843, "grad_norm": 7.322540283203125, "learning_rate": 8.358352888387633e-06, "loss": 0.662, "step": 24057 }, { "epoch": 0.2876409330575449, "grad_norm": 3.271867513656616, "learning_rate": 8.358209444638362e-06, "loss": 0.5836, "step": 24058 }, { "epoch": 0.28765288920240556, "grad_norm": 2.6143248081207275, "learning_rate": 8.358065995853444e-06, "loss": 0.6031, "step": 24059 }, { "epoch": 0.2876648453472662, "grad_norm": 2.553131580352783, "learning_rate": 8.357922542033093e-06, "loss": 0.5434, "step": 24060 }, { "epoch": 0.2876768014921269, "grad_norm": 3.2211129665374756, "learning_rate": 8.357779083177527e-06, "loss": 0.6832, "step": 24061 }, { "epoch": 0.28768875763698754, "grad_norm": 2.3777685165405273, "learning_rate": 8.357635619286957e-06, "loss": 0.6347, "step": 24062 }, { "epoch": 0.2877007137818482, "grad_norm": 2.4977807998657227, "learning_rate": 8.357492150361603e-06, "loss": 0.6235, "step": 24063 }, { "epoch": 0.28771266992670885, "grad_norm": 3.847519874572754, "learning_rate": 8.357348676401675e-06, "loss": 0.5832, "step": 24064 }, { "epoch": 0.28772462607156946, "grad_norm": 2.7308318614959717, "learning_rate": 8.357205197407392e-06, "loss": 0.6745, "step": 24065 }, { "epoch": 0.2877365822164301, "grad_norm": 2.3775527477264404, "learning_rate": 8.35706171337897e-06, "loss": 0.6487, "step": 24066 }, { "epoch": 0.2877485383612908, "grad_norm": 4.448132514953613, "learning_rate": 8.356918224316619e-06, "loss": 0.5652, "step": 24067 }, { "epoch": 0.28776049450615143, "grad_norm": 2.1222589015960693, "learning_rate": 8.356774730220558e-06, "loss": 0.6829, "step": 24068 }, { "epoch": 0.2877724506510121, "grad_norm": 2.081146001815796, "learning_rate": 8.356631231091002e-06, "loss": 0.6867, "step": 24069 }, { "epoch": 0.28778440679587275, "grad_norm": 3.0250489711761475, "learning_rate": 8.356487726928164e-06, "loss": 0.6641, "step": 24070 }, { "epoch": 0.2877963629407334, "grad_norm": 3.526963949203491, "learning_rate": 8.356344217732262e-06, "loss": 0.5743, "step": 24071 }, { "epoch": 0.28780831908559407, "grad_norm": 2.0615971088409424, "learning_rate": 8.35620070350351e-06, "loss": 0.6166, "step": 24072 }, { "epoch": 0.28782027523045467, "grad_norm": 7.633542060852051, "learning_rate": 8.356057184242121e-06, "loss": 0.6129, "step": 24073 }, { "epoch": 0.28783223137531533, "grad_norm": 1.748317003250122, "learning_rate": 8.355913659948316e-06, "loss": 0.595, "step": 24074 }, { "epoch": 0.287844187520176, "grad_norm": 2.572815179824829, "learning_rate": 8.355770130622304e-06, "loss": 0.597, "step": 24075 }, { "epoch": 0.28785614366503665, "grad_norm": 7.459159851074219, "learning_rate": 8.355626596264303e-06, "loss": 0.6787, "step": 24076 }, { "epoch": 0.2878680998098973, "grad_norm": 1.983678936958313, "learning_rate": 8.355483056874529e-06, "loss": 0.6358, "step": 24077 }, { "epoch": 0.28788005595475796, "grad_norm": 3.1205036640167236, "learning_rate": 8.355339512453196e-06, "loss": 0.6051, "step": 24078 }, { "epoch": 0.2878920120996186, "grad_norm": 3.0021870136260986, "learning_rate": 8.355195963000518e-06, "loss": 0.5779, "step": 24079 }, { "epoch": 0.2879039682444792, "grad_norm": 2.398908853530884, "learning_rate": 8.355052408516712e-06, "loss": 0.5719, "step": 24080 }, { "epoch": 0.2879159243893399, "grad_norm": 1.64639413356781, "learning_rate": 8.354908849001994e-06, "loss": 0.5334, "step": 24081 }, { "epoch": 0.28792788053420054, "grad_norm": 2.916747570037842, "learning_rate": 8.354765284456579e-06, "loss": 0.6696, "step": 24082 }, { "epoch": 0.2879398366790612, "grad_norm": 3.21997332572937, "learning_rate": 8.35462171488068e-06, "loss": 0.6368, "step": 24083 }, { "epoch": 0.28795179282392186, "grad_norm": 2.212442398071289, "learning_rate": 8.354478140274515e-06, "loss": 0.6231, "step": 24084 }, { "epoch": 0.2879637489687825, "grad_norm": 2.7259442806243896, "learning_rate": 8.354334560638298e-06, "loss": 0.6463, "step": 24085 }, { "epoch": 0.2879757051136432, "grad_norm": 1.3965258598327637, "learning_rate": 8.354190975972243e-06, "loss": 0.5323, "step": 24086 }, { "epoch": 0.28798766125850384, "grad_norm": 4.918988227844238, "learning_rate": 8.354047386276569e-06, "loss": 0.6455, "step": 24087 }, { "epoch": 0.28799961740336444, "grad_norm": 3.4183852672576904, "learning_rate": 8.353903791551488e-06, "loss": 0.5347, "step": 24088 }, { "epoch": 0.2880115735482251, "grad_norm": 1.9774832725524902, "learning_rate": 8.353760191797216e-06, "loss": 0.5651, "step": 24089 }, { "epoch": 0.28802352969308576, "grad_norm": 2.060995578765869, "learning_rate": 8.353616587013968e-06, "loss": 0.6543, "step": 24090 }, { "epoch": 0.2880354858379464, "grad_norm": 2.8033041954040527, "learning_rate": 8.35347297720196e-06, "loss": 0.5054, "step": 24091 }, { "epoch": 0.2880474419828071, "grad_norm": 3.28814959526062, "learning_rate": 8.35332936236141e-06, "loss": 0.6503, "step": 24092 }, { "epoch": 0.28805939812766773, "grad_norm": 10.443758964538574, "learning_rate": 8.353185742492526e-06, "loss": 0.5858, "step": 24093 }, { "epoch": 0.2880713542725284, "grad_norm": 2.8623812198638916, "learning_rate": 8.353042117595532e-06, "loss": 0.5759, "step": 24094 }, { "epoch": 0.288083310417389, "grad_norm": 3.7715907096862793, "learning_rate": 8.352898487670638e-06, "loss": 0.6516, "step": 24095 }, { "epoch": 0.28809526656224965, "grad_norm": 2.347198247909546, "learning_rate": 8.352754852718061e-06, "loss": 0.6289, "step": 24096 }, { "epoch": 0.2881072227071103, "grad_norm": 1.6125695705413818, "learning_rate": 8.352611212738017e-06, "loss": 0.5936, "step": 24097 }, { "epoch": 0.28811917885197097, "grad_norm": 2.8777947425842285, "learning_rate": 8.352467567730719e-06, "loss": 0.6816, "step": 24098 }, { "epoch": 0.28813113499683163, "grad_norm": 5.464585781097412, "learning_rate": 8.352323917696384e-06, "loss": 0.5924, "step": 24099 }, { "epoch": 0.2881430911416923, "grad_norm": 2.375537633895874, "learning_rate": 8.352180262635228e-06, "loss": 0.5611, "step": 24100 }, { "epoch": 0.28815504728655295, "grad_norm": 1.6815307140350342, "learning_rate": 8.352036602547466e-06, "loss": 0.6193, "step": 24101 }, { "epoch": 0.28816700343141355, "grad_norm": 9.694989204406738, "learning_rate": 8.351892937433313e-06, "loss": 0.5625, "step": 24102 }, { "epoch": 0.2881789595762742, "grad_norm": 6.624824047088623, "learning_rate": 8.351749267292985e-06, "loss": 0.5573, "step": 24103 }, { "epoch": 0.28819091572113487, "grad_norm": 2.7818186283111572, "learning_rate": 8.351605592126696e-06, "loss": 0.5442, "step": 24104 }, { "epoch": 0.2882028718659955, "grad_norm": 2.338714838027954, "learning_rate": 8.351461911934664e-06, "loss": 0.5562, "step": 24105 }, { "epoch": 0.2882148280108562, "grad_norm": 10.89769458770752, "learning_rate": 8.351318226717103e-06, "loss": 0.577, "step": 24106 }, { "epoch": 0.28822678415571684, "grad_norm": 2.9632761478424072, "learning_rate": 8.351174536474227e-06, "loss": 0.6131, "step": 24107 }, { "epoch": 0.2882387403005775, "grad_norm": 2.662263870239258, "learning_rate": 8.351030841206254e-06, "loss": 0.5562, "step": 24108 }, { "epoch": 0.28825069644543816, "grad_norm": 3.076432943344116, "learning_rate": 8.350887140913397e-06, "loss": 0.5873, "step": 24109 }, { "epoch": 0.28826265259029876, "grad_norm": 3.7333126068115234, "learning_rate": 8.350743435595874e-06, "loss": 0.5724, "step": 24110 }, { "epoch": 0.2882746087351594, "grad_norm": 2.9359829425811768, "learning_rate": 8.350599725253899e-06, "loss": 0.5237, "step": 24111 }, { "epoch": 0.2882865648800201, "grad_norm": 4.746336460113525, "learning_rate": 8.350456009887689e-06, "loss": 0.5517, "step": 24112 }, { "epoch": 0.28829852102488074, "grad_norm": 6.270701885223389, "learning_rate": 8.350312289497455e-06, "loss": 0.6575, "step": 24113 }, { "epoch": 0.2883104771697414, "grad_norm": 6.622447490692139, "learning_rate": 8.350168564083418e-06, "loss": 0.5282, "step": 24114 }, { "epoch": 0.28832243331460206, "grad_norm": 2.1248292922973633, "learning_rate": 8.350024833645792e-06, "loss": 0.5781, "step": 24115 }, { "epoch": 0.2883343894594627, "grad_norm": 2.159926176071167, "learning_rate": 8.34988109818479e-06, "loss": 0.5774, "step": 24116 }, { "epoch": 0.2883463456043233, "grad_norm": 7.174739837646484, "learning_rate": 8.349737357700632e-06, "loss": 0.6507, "step": 24117 }, { "epoch": 0.288358301749184, "grad_norm": 2.1482372283935547, "learning_rate": 8.34959361219353e-06, "loss": 0.6732, "step": 24118 }, { "epoch": 0.28837025789404463, "grad_norm": 2.0397772789001465, "learning_rate": 8.349449861663699e-06, "loss": 0.7145, "step": 24119 }, { "epoch": 0.2883822140389053, "grad_norm": 4.07138729095459, "learning_rate": 8.349306106111357e-06, "loss": 0.5864, "step": 24120 }, { "epoch": 0.28839417018376595, "grad_norm": 1.2649598121643066, "learning_rate": 8.349162345536719e-06, "loss": 0.5517, "step": 24121 }, { "epoch": 0.2884061263286266, "grad_norm": 2.5555641651153564, "learning_rate": 8.349018579939998e-06, "loss": 0.5755, "step": 24122 }, { "epoch": 0.28841808247348727, "grad_norm": 3.762688159942627, "learning_rate": 8.348874809321414e-06, "loss": 0.5972, "step": 24123 }, { "epoch": 0.2884300386183479, "grad_norm": 6.345340251922607, "learning_rate": 8.34873103368118e-06, "loss": 0.6265, "step": 24124 }, { "epoch": 0.28844199476320853, "grad_norm": 1.922037124633789, "learning_rate": 8.348587253019512e-06, "loss": 0.6511, "step": 24125 }, { "epoch": 0.2884539509080692, "grad_norm": 5.174302577972412, "learning_rate": 8.348443467336624e-06, "loss": 0.7047, "step": 24126 }, { "epoch": 0.28846590705292985, "grad_norm": 13.731285095214844, "learning_rate": 8.348299676632734e-06, "loss": 0.645, "step": 24127 }, { "epoch": 0.2884778631977905, "grad_norm": 2.139888048171997, "learning_rate": 8.348155880908057e-06, "loss": 0.5794, "step": 24128 }, { "epoch": 0.28848981934265117, "grad_norm": 4.970696926116943, "learning_rate": 8.348012080162808e-06, "loss": 0.7339, "step": 24129 }, { "epoch": 0.2885017754875118, "grad_norm": 2.296410322189331, "learning_rate": 8.347868274397203e-06, "loss": 0.4888, "step": 24130 }, { "epoch": 0.2885137316323725, "grad_norm": 3.2450954914093018, "learning_rate": 8.34772446361146e-06, "loss": 0.5828, "step": 24131 }, { "epoch": 0.2885256877772331, "grad_norm": 1.7923972606658936, "learning_rate": 8.347580647805788e-06, "loss": 0.6559, "step": 24132 }, { "epoch": 0.28853764392209375, "grad_norm": 2.4855527877807617, "learning_rate": 8.34743682698041e-06, "loss": 0.6014, "step": 24133 }, { "epoch": 0.2885496000669544, "grad_norm": 2.5591132640838623, "learning_rate": 8.347293001135537e-06, "loss": 0.6094, "step": 24134 }, { "epoch": 0.28856155621181506, "grad_norm": 8.22000789642334, "learning_rate": 8.347149170271389e-06, "loss": 0.578, "step": 24135 }, { "epoch": 0.2885735123566757, "grad_norm": 3.163551092147827, "learning_rate": 8.347005334388176e-06, "loss": 0.5746, "step": 24136 }, { "epoch": 0.2885854685015364, "grad_norm": 2.3541016578674316, "learning_rate": 8.346861493486118e-06, "loss": 0.5927, "step": 24137 }, { "epoch": 0.28859742464639704, "grad_norm": 3.0365288257598877, "learning_rate": 8.346717647565429e-06, "loss": 0.5577, "step": 24138 }, { "epoch": 0.28860938079125764, "grad_norm": 2.076977014541626, "learning_rate": 8.346573796626323e-06, "loss": 0.5711, "step": 24139 }, { "epoch": 0.2886213369361183, "grad_norm": 1.9725652933120728, "learning_rate": 8.34642994066902e-06, "loss": 0.6344, "step": 24140 }, { "epoch": 0.28863329308097896, "grad_norm": 3.9826550483703613, "learning_rate": 8.346286079693734e-06, "loss": 0.6404, "step": 24141 }, { "epoch": 0.2886452492258396, "grad_norm": 1.7514774799346924, "learning_rate": 8.346142213700678e-06, "loss": 0.4953, "step": 24142 }, { "epoch": 0.2886572053707003, "grad_norm": 7.957141399383545, "learning_rate": 8.345998342690071e-06, "loss": 0.541, "step": 24143 }, { "epoch": 0.28866916151556093, "grad_norm": 1.5263677835464478, "learning_rate": 8.345854466662128e-06, "loss": 0.5768, "step": 24144 }, { "epoch": 0.2886811176604216, "grad_norm": 8.222723007202148, "learning_rate": 8.345710585617063e-06, "loss": 0.5349, "step": 24145 }, { "epoch": 0.28869307380528225, "grad_norm": 2.311457872390747, "learning_rate": 8.345566699555094e-06, "loss": 0.5403, "step": 24146 }, { "epoch": 0.28870502995014286, "grad_norm": 2.040760040283203, "learning_rate": 8.345422808476436e-06, "loss": 0.654, "step": 24147 }, { "epoch": 0.2887169860950035, "grad_norm": 1.9874165058135986, "learning_rate": 8.345278912381305e-06, "loss": 0.4765, "step": 24148 }, { "epoch": 0.2887289422398642, "grad_norm": 1.7841582298278809, "learning_rate": 8.345135011269916e-06, "loss": 0.5928, "step": 24149 }, { "epoch": 0.28874089838472483, "grad_norm": 2.9486823081970215, "learning_rate": 8.344991105142484e-06, "loss": 0.6202, "step": 24150 }, { "epoch": 0.2887528545295855, "grad_norm": 1.3934073448181152, "learning_rate": 8.344847193999226e-06, "loss": 0.5774, "step": 24151 }, { "epoch": 0.28876481067444615, "grad_norm": 2.205902099609375, "learning_rate": 8.344703277840359e-06, "loss": 0.6024, "step": 24152 }, { "epoch": 0.2887767668193068, "grad_norm": 2.8466124534606934, "learning_rate": 8.344559356666098e-06, "loss": 0.5483, "step": 24153 }, { "epoch": 0.2887887229641674, "grad_norm": 2.0291004180908203, "learning_rate": 8.344415430476655e-06, "loss": 0.6336, "step": 24154 }, { "epoch": 0.28880067910902807, "grad_norm": 1.5318377017974854, "learning_rate": 8.344271499272252e-06, "loss": 0.558, "step": 24155 }, { "epoch": 0.2888126352538887, "grad_norm": 2.50048565864563, "learning_rate": 8.344127563053101e-06, "loss": 0.6914, "step": 24156 }, { "epoch": 0.2888245913987494, "grad_norm": 1.8940489292144775, "learning_rate": 8.34398362181942e-06, "loss": 0.5685, "step": 24157 }, { "epoch": 0.28883654754361004, "grad_norm": 1.5749869346618652, "learning_rate": 8.343839675571423e-06, "loss": 0.5789, "step": 24158 }, { "epoch": 0.2888485036884707, "grad_norm": 1.8697896003723145, "learning_rate": 8.343695724309328e-06, "loss": 0.6154, "step": 24159 }, { "epoch": 0.28886045983333136, "grad_norm": 1.7666290998458862, "learning_rate": 8.343551768033347e-06, "loss": 0.6973, "step": 24160 }, { "epoch": 0.28887241597819197, "grad_norm": 4.040115833282471, "learning_rate": 8.3434078067437e-06, "loss": 0.5973, "step": 24161 }, { "epoch": 0.2888843721230526, "grad_norm": 12.63537311553955, "learning_rate": 8.3432638404406e-06, "loss": 0.6537, "step": 24162 }, { "epoch": 0.2888963282679133, "grad_norm": 5.331809997558594, "learning_rate": 8.343119869124265e-06, "loss": 0.5767, "step": 24163 }, { "epoch": 0.28890828441277394, "grad_norm": 2.479217529296875, "learning_rate": 8.34297589279491e-06, "loss": 0.7067, "step": 24164 }, { "epoch": 0.2889202405576346, "grad_norm": 1.5825871229171753, "learning_rate": 8.34283191145275e-06, "loss": 0.4777, "step": 24165 }, { "epoch": 0.28893219670249526, "grad_norm": 2.165637493133545, "learning_rate": 8.342687925098e-06, "loss": 0.5912, "step": 24166 }, { "epoch": 0.2889441528473559, "grad_norm": 3.301431655883789, "learning_rate": 8.34254393373088e-06, "loss": 0.756, "step": 24167 }, { "epoch": 0.2889561089922166, "grad_norm": 2.176077127456665, "learning_rate": 8.342399937351602e-06, "loss": 0.556, "step": 24168 }, { "epoch": 0.2889680651370772, "grad_norm": 3.334472894668579, "learning_rate": 8.342255935960383e-06, "loss": 0.5899, "step": 24169 }, { "epoch": 0.28898002128193784, "grad_norm": 2.9483847618103027, "learning_rate": 8.342111929557442e-06, "loss": 0.6521, "step": 24170 }, { "epoch": 0.2889919774267985, "grad_norm": 8.4705228805542, "learning_rate": 8.34196791814299e-06, "loss": 0.5758, "step": 24171 }, { "epoch": 0.28900393357165916, "grad_norm": 1.7921198606491089, "learning_rate": 8.341823901717247e-06, "loss": 0.6982, "step": 24172 }, { "epoch": 0.2890158897165198, "grad_norm": 5.829188346862793, "learning_rate": 8.341679880280426e-06, "loss": 0.5971, "step": 24173 }, { "epoch": 0.2890278458613805, "grad_norm": 1.977048397064209, "learning_rate": 8.341535853832744e-06, "loss": 0.5395, "step": 24174 }, { "epoch": 0.28903980200624113, "grad_norm": 5.096386432647705, "learning_rate": 8.341391822374419e-06, "loss": 0.6499, "step": 24175 }, { "epoch": 0.28905175815110173, "grad_norm": 3.932302236557007, "learning_rate": 8.341247785905662e-06, "loss": 0.6533, "step": 24176 }, { "epoch": 0.2890637142959624, "grad_norm": 1.8492850065231323, "learning_rate": 8.341103744426693e-06, "loss": 0.5037, "step": 24177 }, { "epoch": 0.28907567044082305, "grad_norm": 1.5899003744125366, "learning_rate": 8.340959697937727e-06, "loss": 0.5905, "step": 24178 }, { "epoch": 0.2890876265856837, "grad_norm": 2.697766065597534, "learning_rate": 8.340815646438982e-06, "loss": 0.6273, "step": 24179 }, { "epoch": 0.28909958273054437, "grad_norm": 5.029721260070801, "learning_rate": 8.34067158993067e-06, "loss": 0.5336, "step": 24180 }, { "epoch": 0.289111538875405, "grad_norm": 2.5470848083496094, "learning_rate": 8.34052752841301e-06, "loss": 0.6458, "step": 24181 }, { "epoch": 0.2891234950202657, "grad_norm": 2.514258623123169, "learning_rate": 8.340383461886217e-06, "loss": 0.5543, "step": 24182 }, { "epoch": 0.2891354511651263, "grad_norm": 3.7873857021331787, "learning_rate": 8.340239390350506e-06, "loss": 0.5987, "step": 24183 }, { "epoch": 0.28914740730998695, "grad_norm": 6.5945563316345215, "learning_rate": 8.340095313806096e-06, "loss": 0.6443, "step": 24184 }, { "epoch": 0.2891593634548476, "grad_norm": 1.6964755058288574, "learning_rate": 8.339951232253201e-06, "loss": 0.6214, "step": 24185 }, { "epoch": 0.28917131959970827, "grad_norm": 4.7242302894592285, "learning_rate": 8.339807145692035e-06, "loss": 0.6019, "step": 24186 }, { "epoch": 0.2891832757445689, "grad_norm": 3.2351410388946533, "learning_rate": 8.33966305412282e-06, "loss": 0.6774, "step": 24187 }, { "epoch": 0.2891952318894296, "grad_norm": 3.8248047828674316, "learning_rate": 8.339518957545764e-06, "loss": 0.7036, "step": 24188 }, { "epoch": 0.28920718803429024, "grad_norm": 3.632866382598877, "learning_rate": 8.33937485596109e-06, "loss": 0.6684, "step": 24189 }, { "epoch": 0.2892191441791509, "grad_norm": 4.2450103759765625, "learning_rate": 8.339230749369013e-06, "loss": 0.6832, "step": 24190 }, { "epoch": 0.2892311003240115, "grad_norm": 16.68244171142578, "learning_rate": 8.339086637769745e-06, "loss": 0.5661, "step": 24191 }, { "epoch": 0.28924305646887216, "grad_norm": 2.7615323066711426, "learning_rate": 8.338942521163506e-06, "loss": 0.6358, "step": 24192 }, { "epoch": 0.2892550126137328, "grad_norm": 1.3700519800186157, "learning_rate": 8.338798399550512e-06, "loss": 0.5198, "step": 24193 }, { "epoch": 0.2892669687585935, "grad_norm": 1.8548492193222046, "learning_rate": 8.338654272930974e-06, "loss": 0.5849, "step": 24194 }, { "epoch": 0.28927892490345414, "grad_norm": 2.135140895843506, "learning_rate": 8.338510141305117e-06, "loss": 0.6009, "step": 24195 }, { "epoch": 0.2892908810483148, "grad_norm": 2.046130418777466, "learning_rate": 8.338366004673149e-06, "loss": 0.6718, "step": 24196 }, { "epoch": 0.28930283719317545, "grad_norm": 1.96662175655365, "learning_rate": 8.33822186303529e-06, "loss": 0.6422, "step": 24197 }, { "epoch": 0.28931479333803606, "grad_norm": 1.9180768728256226, "learning_rate": 8.338077716391755e-06, "loss": 0.6413, "step": 24198 }, { "epoch": 0.2893267494828967, "grad_norm": 1.876516342163086, "learning_rate": 8.33793356474276e-06, "loss": 0.5797, "step": 24199 }, { "epoch": 0.2893387056277574, "grad_norm": 7.016129970550537, "learning_rate": 8.337789408088524e-06, "loss": 0.5234, "step": 24200 }, { "epoch": 0.28935066177261803, "grad_norm": 1.6715497970581055, "learning_rate": 8.33764524642926e-06, "loss": 0.6027, "step": 24201 }, { "epoch": 0.2893626179174787, "grad_norm": 1.515697717666626, "learning_rate": 8.337501079765185e-06, "loss": 0.6025, "step": 24202 }, { "epoch": 0.28937457406233935, "grad_norm": 2.4188990592956543, "learning_rate": 8.337356908096515e-06, "loss": 0.5834, "step": 24203 }, { "epoch": 0.2893865302072, "grad_norm": 2.803776264190674, "learning_rate": 8.337212731423465e-06, "loss": 0.6425, "step": 24204 }, { "epoch": 0.28939848635206067, "grad_norm": 1.95917546749115, "learning_rate": 8.337068549746254e-06, "loss": 0.6513, "step": 24205 }, { "epoch": 0.28941044249692127, "grad_norm": 1.860058069229126, "learning_rate": 8.336924363065098e-06, "loss": 0.6034, "step": 24206 }, { "epoch": 0.28942239864178193, "grad_norm": 2.9508578777313232, "learning_rate": 8.336780171380212e-06, "loss": 0.6024, "step": 24207 }, { "epoch": 0.2894343547866426, "grad_norm": 1.9773555994033813, "learning_rate": 8.33663597469181e-06, "loss": 0.6353, "step": 24208 }, { "epoch": 0.28944631093150325, "grad_norm": 2.871018886566162, "learning_rate": 8.336491773000112e-06, "loss": 0.5278, "step": 24209 }, { "epoch": 0.2894582670763639, "grad_norm": 2.2165982723236084, "learning_rate": 8.336347566305331e-06, "loss": 0.5564, "step": 24210 }, { "epoch": 0.28947022322122457, "grad_norm": 1.6114747524261475, "learning_rate": 8.336203354607689e-06, "loss": 0.5691, "step": 24211 }, { "epoch": 0.2894821793660852, "grad_norm": 1.542560338973999, "learning_rate": 8.336059137907395e-06, "loss": 0.6086, "step": 24212 }, { "epoch": 0.2894941355109458, "grad_norm": 2.4773802757263184, "learning_rate": 8.335914916204669e-06, "loss": 0.5998, "step": 24213 }, { "epoch": 0.2895060916558065, "grad_norm": 2.8156542778015137, "learning_rate": 8.335770689499727e-06, "loss": 0.6224, "step": 24214 }, { "epoch": 0.28951804780066714, "grad_norm": 2.6077425479888916, "learning_rate": 8.335626457792785e-06, "loss": 0.5865, "step": 24215 }, { "epoch": 0.2895300039455278, "grad_norm": 6.429141521453857, "learning_rate": 8.335482221084059e-06, "loss": 0.6318, "step": 24216 }, { "epoch": 0.28954196009038846, "grad_norm": 2.2468996047973633, "learning_rate": 8.335337979373765e-06, "loss": 0.5668, "step": 24217 }, { "epoch": 0.2895539162352491, "grad_norm": 1.5286285877227783, "learning_rate": 8.33519373266212e-06, "loss": 0.6111, "step": 24218 }, { "epoch": 0.2895658723801098, "grad_norm": 1.9462602138519287, "learning_rate": 8.335049480949342e-06, "loss": 0.6749, "step": 24219 }, { "epoch": 0.2895778285249704, "grad_norm": 1.8331968784332275, "learning_rate": 8.334905224235643e-06, "loss": 0.6139, "step": 24220 }, { "epoch": 0.28958978466983104, "grad_norm": 1.9208428859710693, "learning_rate": 8.334760962521244e-06, "loss": 0.6118, "step": 24221 }, { "epoch": 0.2896017408146917, "grad_norm": 2.0165114402770996, "learning_rate": 8.334616695806358e-06, "loss": 0.5858, "step": 24222 }, { "epoch": 0.28961369695955236, "grad_norm": 3.092008113861084, "learning_rate": 8.334472424091202e-06, "loss": 0.6049, "step": 24223 }, { "epoch": 0.289625653104413, "grad_norm": 2.5557494163513184, "learning_rate": 8.334328147375993e-06, "loss": 0.6767, "step": 24224 }, { "epoch": 0.2896376092492737, "grad_norm": 3.0329060554504395, "learning_rate": 8.334183865660947e-06, "loss": 0.682, "step": 24225 }, { "epoch": 0.28964956539413433, "grad_norm": 2.0277090072631836, "learning_rate": 8.334039578946281e-06, "loss": 0.6339, "step": 24226 }, { "epoch": 0.289661521538995, "grad_norm": 1.886839747428894, "learning_rate": 8.333895287232211e-06, "loss": 0.6856, "step": 24227 }, { "epoch": 0.2896734776838556, "grad_norm": 3.207472324371338, "learning_rate": 8.333750990518953e-06, "loss": 0.5282, "step": 24228 }, { "epoch": 0.28968543382871625, "grad_norm": 3.068071126937866, "learning_rate": 8.333606688806722e-06, "loss": 0.6238, "step": 24229 }, { "epoch": 0.2896973899735769, "grad_norm": 2.0528173446655273, "learning_rate": 8.333462382095738e-06, "loss": 0.6078, "step": 24230 }, { "epoch": 0.28970934611843757, "grad_norm": 2.248868465423584, "learning_rate": 8.333318070386213e-06, "loss": 0.6071, "step": 24231 }, { "epoch": 0.28972130226329823, "grad_norm": 14.728782653808594, "learning_rate": 8.33317375367837e-06, "loss": 0.609, "step": 24232 }, { "epoch": 0.2897332584081589, "grad_norm": 1.7771602869033813, "learning_rate": 8.333029431972418e-06, "loss": 0.5547, "step": 24233 }, { "epoch": 0.28974521455301955, "grad_norm": 2.131427049636841, "learning_rate": 8.332885105268575e-06, "loss": 0.6391, "step": 24234 }, { "epoch": 0.28975717069788015, "grad_norm": 1.9126417636871338, "learning_rate": 8.332740773567061e-06, "loss": 0.5982, "step": 24235 }, { "epoch": 0.2897691268427408, "grad_norm": 1.9792044162750244, "learning_rate": 8.332596436868091e-06, "loss": 0.6077, "step": 24236 }, { "epoch": 0.28978108298760147, "grad_norm": 1.6085858345031738, "learning_rate": 8.332452095171879e-06, "loss": 0.6436, "step": 24237 }, { "epoch": 0.2897930391324621, "grad_norm": 2.412243127822876, "learning_rate": 8.332307748478643e-06, "loss": 0.6468, "step": 24238 }, { "epoch": 0.2898049952773228, "grad_norm": 2.087968111038208, "learning_rate": 8.332163396788601e-06, "loss": 0.5694, "step": 24239 }, { "epoch": 0.28981695142218344, "grad_norm": 2.586510181427002, "learning_rate": 8.332019040101968e-06, "loss": 0.6232, "step": 24240 }, { "epoch": 0.2898289075670441, "grad_norm": 3.9465816020965576, "learning_rate": 8.331874678418962e-06, "loss": 0.5856, "step": 24241 }, { "epoch": 0.2898408637119047, "grad_norm": 2.268867254257202, "learning_rate": 8.331730311739796e-06, "loss": 0.612, "step": 24242 }, { "epoch": 0.28985281985676536, "grad_norm": 2.5605454444885254, "learning_rate": 8.33158594006469e-06, "loss": 0.7039, "step": 24243 }, { "epoch": 0.289864776001626, "grad_norm": 2.433443069458008, "learning_rate": 8.331441563393858e-06, "loss": 0.6226, "step": 24244 }, { "epoch": 0.2898767321464867, "grad_norm": 2.183457136154175, "learning_rate": 8.331297181727518e-06, "loss": 0.5565, "step": 24245 }, { "epoch": 0.28988868829134734, "grad_norm": 2.7635128498077393, "learning_rate": 8.331152795065885e-06, "loss": 0.6925, "step": 24246 }, { "epoch": 0.289900644436208, "grad_norm": 1.749970555305481, "learning_rate": 8.331008403409178e-06, "loss": 0.6032, "step": 24247 }, { "epoch": 0.28991260058106866, "grad_norm": 3.266968011856079, "learning_rate": 8.33086400675761e-06, "loss": 0.6816, "step": 24248 }, { "epoch": 0.2899245567259293, "grad_norm": 2.173732042312622, "learning_rate": 8.330719605111401e-06, "loss": 0.6028, "step": 24249 }, { "epoch": 0.2899365128707899, "grad_norm": 2.0109922885894775, "learning_rate": 8.330575198470768e-06, "loss": 0.5624, "step": 24250 }, { "epoch": 0.2899484690156506, "grad_norm": 4.43190336227417, "learning_rate": 8.330430786835923e-06, "loss": 0.6247, "step": 24251 }, { "epoch": 0.28996042516051124, "grad_norm": 2.5713086128234863, "learning_rate": 8.330286370207085e-06, "loss": 0.7209, "step": 24252 }, { "epoch": 0.2899723813053719, "grad_norm": 2.0570549964904785, "learning_rate": 8.330141948584473e-06, "loss": 0.6611, "step": 24253 }, { "epoch": 0.28998433745023255, "grad_norm": 4.334863185882568, "learning_rate": 8.329997521968299e-06, "loss": 0.6107, "step": 24254 }, { "epoch": 0.2899962935950932, "grad_norm": 2.0101890563964844, "learning_rate": 8.329853090358784e-06, "loss": 0.5963, "step": 24255 }, { "epoch": 0.29000824973995387, "grad_norm": 2.7473556995391846, "learning_rate": 8.32970865375614e-06, "loss": 0.6371, "step": 24256 }, { "epoch": 0.2900202058848145, "grad_norm": 1.6005854606628418, "learning_rate": 8.329564212160588e-06, "loss": 0.7432, "step": 24257 }, { "epoch": 0.29003216202967513, "grad_norm": 3.1647205352783203, "learning_rate": 8.329419765572343e-06, "loss": 0.6608, "step": 24258 }, { "epoch": 0.2900441181745358, "grad_norm": 2.424429416656494, "learning_rate": 8.329275313991619e-06, "loss": 0.6945, "step": 24259 }, { "epoch": 0.29005607431939645, "grad_norm": 1.5420708656311035, "learning_rate": 8.329130857418635e-06, "loss": 0.5583, "step": 24260 }, { "epoch": 0.2900680304642571, "grad_norm": 3.2794547080993652, "learning_rate": 8.328986395853609e-06, "loss": 0.8049, "step": 24261 }, { "epoch": 0.29007998660911777, "grad_norm": 1.6534347534179688, "learning_rate": 8.328841929296756e-06, "loss": 0.6194, "step": 24262 }, { "epoch": 0.2900919427539784, "grad_norm": 1.5673760175704956, "learning_rate": 8.328697457748291e-06, "loss": 0.6006, "step": 24263 }, { "epoch": 0.2901038988988391, "grad_norm": 1.6177066564559937, "learning_rate": 8.328552981208435e-06, "loss": 0.6427, "step": 24264 }, { "epoch": 0.2901158550436997, "grad_norm": 1.6967393159866333, "learning_rate": 8.3284084996774e-06, "loss": 0.6706, "step": 24265 }, { "epoch": 0.29012781118856035, "grad_norm": 1.6154471635818481, "learning_rate": 8.328264013155405e-06, "loss": 0.5607, "step": 24266 }, { "epoch": 0.290139767333421, "grad_norm": 1.339492917060852, "learning_rate": 8.328119521642667e-06, "loss": 0.6019, "step": 24267 }, { "epoch": 0.29015172347828166, "grad_norm": 1.531947374343872, "learning_rate": 8.327975025139401e-06, "loss": 0.5005, "step": 24268 }, { "epoch": 0.2901636796231423, "grad_norm": 1.5837985277175903, "learning_rate": 8.327830523645826e-06, "loss": 0.6229, "step": 24269 }, { "epoch": 0.290175635768003, "grad_norm": 1.6965142488479614, "learning_rate": 8.327686017162155e-06, "loss": 0.6146, "step": 24270 }, { "epoch": 0.29018759191286364, "grad_norm": 2.150040864944458, "learning_rate": 8.32754150568861e-06, "loss": 0.5896, "step": 24271 }, { "epoch": 0.29019954805772424, "grad_norm": 4.430164337158203, "learning_rate": 8.327396989225403e-06, "loss": 0.6842, "step": 24272 }, { "epoch": 0.2902115042025849, "grad_norm": 1.425369381904602, "learning_rate": 8.32725246777275e-06, "loss": 0.6011, "step": 24273 }, { "epoch": 0.29022346034744556, "grad_norm": 1.8326513767242432, "learning_rate": 8.327107941330875e-06, "loss": 0.6837, "step": 24274 }, { "epoch": 0.2902354164923062, "grad_norm": 2.321845293045044, "learning_rate": 8.326963409899986e-06, "loss": 0.6327, "step": 24275 }, { "epoch": 0.2902473726371669, "grad_norm": 1.5140202045440674, "learning_rate": 8.326818873480305e-06, "loss": 0.6126, "step": 24276 }, { "epoch": 0.29025932878202754, "grad_norm": 2.373380661010742, "learning_rate": 8.326674332072047e-06, "loss": 0.6045, "step": 24277 }, { "epoch": 0.2902712849268882, "grad_norm": 1.4010456800460815, "learning_rate": 8.32652978567543e-06, "loss": 0.564, "step": 24278 }, { "epoch": 0.2902832410717488, "grad_norm": 2.9498159885406494, "learning_rate": 8.326385234290668e-06, "loss": 0.639, "step": 24279 }, { "epoch": 0.29029519721660946, "grad_norm": 1.7599420547485352, "learning_rate": 8.32624067791798e-06, "loss": 0.6244, "step": 24280 }, { "epoch": 0.2903071533614701, "grad_norm": 5.148091793060303, "learning_rate": 8.326096116557584e-06, "loss": 0.5889, "step": 24281 }, { "epoch": 0.2903191095063308, "grad_norm": 6.558106899261475, "learning_rate": 8.325951550209693e-06, "loss": 0.6102, "step": 24282 }, { "epoch": 0.29033106565119143, "grad_norm": 1.7460542917251587, "learning_rate": 8.325806978874527e-06, "loss": 0.4988, "step": 24283 }, { "epoch": 0.2903430217960521, "grad_norm": 1.611818790435791, "learning_rate": 8.325662402552301e-06, "loss": 0.5846, "step": 24284 }, { "epoch": 0.29035497794091275, "grad_norm": 2.769876480102539, "learning_rate": 8.325517821243232e-06, "loss": 0.6926, "step": 24285 }, { "epoch": 0.2903669340857734, "grad_norm": 1.78895103931427, "learning_rate": 8.325373234947539e-06, "loss": 0.6077, "step": 24286 }, { "epoch": 0.290378890230634, "grad_norm": 2.6529250144958496, "learning_rate": 8.325228643665435e-06, "loss": 0.5545, "step": 24287 }, { "epoch": 0.29039084637549467, "grad_norm": 2.8506276607513428, "learning_rate": 8.32508404739714e-06, "loss": 0.6071, "step": 24288 }, { "epoch": 0.29040280252035533, "grad_norm": 2.0504837036132812, "learning_rate": 8.324939446142869e-06, "loss": 0.6293, "step": 24289 }, { "epoch": 0.290414758665216, "grad_norm": 2.234788656234741, "learning_rate": 8.32479483990284e-06, "loss": 0.655, "step": 24290 }, { "epoch": 0.29042671481007665, "grad_norm": 2.418449640274048, "learning_rate": 8.32465022867727e-06, "loss": 0.6081, "step": 24291 }, { "epoch": 0.2904386709549373, "grad_norm": 2.04783296585083, "learning_rate": 8.324505612466373e-06, "loss": 0.602, "step": 24292 }, { "epoch": 0.29045062709979796, "grad_norm": 2.0754618644714355, "learning_rate": 8.32436099127037e-06, "loss": 0.6095, "step": 24293 }, { "epoch": 0.29046258324465857, "grad_norm": 6.557660102844238, "learning_rate": 8.324216365089475e-06, "loss": 0.6894, "step": 24294 }, { "epoch": 0.2904745393895192, "grad_norm": 1.7511099576950073, "learning_rate": 8.324071733923906e-06, "loss": 0.7118, "step": 24295 }, { "epoch": 0.2904864955343799, "grad_norm": 1.978641152381897, "learning_rate": 8.323927097773878e-06, "loss": 0.5956, "step": 24296 }, { "epoch": 0.29049845167924054, "grad_norm": 1.699006199836731, "learning_rate": 8.323782456639611e-06, "loss": 0.5962, "step": 24297 }, { "epoch": 0.2905104078241012, "grad_norm": 1.8982185125350952, "learning_rate": 8.323637810521322e-06, "loss": 0.563, "step": 24298 }, { "epoch": 0.29052236396896186, "grad_norm": 1.8062089681625366, "learning_rate": 8.323493159419224e-06, "loss": 0.6792, "step": 24299 }, { "epoch": 0.2905343201138225, "grad_norm": 2.2959976196289062, "learning_rate": 8.323348503333537e-06, "loss": 0.6196, "step": 24300 }, { "epoch": 0.2905462762586831, "grad_norm": 1.719008207321167, "learning_rate": 8.323203842264477e-06, "loss": 0.5203, "step": 24301 }, { "epoch": 0.2905582324035438, "grad_norm": 2.741567850112915, "learning_rate": 8.32305917621226e-06, "loss": 0.6774, "step": 24302 }, { "epoch": 0.29057018854840444, "grad_norm": 2.584069013595581, "learning_rate": 8.322914505177107e-06, "loss": 0.6009, "step": 24303 }, { "epoch": 0.2905821446932651, "grad_norm": 3.729705333709717, "learning_rate": 8.322769829159229e-06, "loss": 0.6063, "step": 24304 }, { "epoch": 0.29059410083812576, "grad_norm": 2.1632096767425537, "learning_rate": 8.322625148158847e-06, "loss": 0.7576, "step": 24305 }, { "epoch": 0.2906060569829864, "grad_norm": 1.5279308557510376, "learning_rate": 8.322480462176176e-06, "loss": 0.5451, "step": 24306 }, { "epoch": 0.2906180131278471, "grad_norm": 2.0933241844177246, "learning_rate": 8.322335771211434e-06, "loss": 0.6777, "step": 24307 }, { "epoch": 0.29062996927270773, "grad_norm": 1.7035330533981323, "learning_rate": 8.322191075264839e-06, "loss": 0.6341, "step": 24308 }, { "epoch": 0.29064192541756834, "grad_norm": 1.275250792503357, "learning_rate": 8.322046374336606e-06, "loss": 0.5698, "step": 24309 }, { "epoch": 0.290653881562429, "grad_norm": 2.0033059120178223, "learning_rate": 8.32190166842695e-06, "loss": 0.5201, "step": 24310 }, { "epoch": 0.29066583770728965, "grad_norm": 1.9120327234268188, "learning_rate": 8.321756957536093e-06, "loss": 0.5757, "step": 24311 }, { "epoch": 0.2906777938521503, "grad_norm": 2.8227007389068604, "learning_rate": 8.32161224166425e-06, "loss": 0.6736, "step": 24312 }, { "epoch": 0.29068974999701097, "grad_norm": 1.9837015867233276, "learning_rate": 8.321467520811636e-06, "loss": 0.5567, "step": 24313 }, { "epoch": 0.29070170614187163, "grad_norm": 1.6644786596298218, "learning_rate": 8.321322794978473e-06, "loss": 0.6911, "step": 24314 }, { "epoch": 0.2907136622867323, "grad_norm": 2.3337905406951904, "learning_rate": 8.321178064164972e-06, "loss": 0.5942, "step": 24315 }, { "epoch": 0.2907256184315929, "grad_norm": 3.2022013664245605, "learning_rate": 8.321033328371352e-06, "loss": 0.5261, "step": 24316 }, { "epoch": 0.29073757457645355, "grad_norm": 2.0603973865509033, "learning_rate": 8.320888587597832e-06, "loss": 0.6444, "step": 24317 }, { "epoch": 0.2907495307213142, "grad_norm": 1.6505485773086548, "learning_rate": 8.320743841844629e-06, "loss": 0.5962, "step": 24318 }, { "epoch": 0.29076148686617487, "grad_norm": 4.343466281890869, "learning_rate": 8.320599091111956e-06, "loss": 0.6307, "step": 24319 }, { "epoch": 0.2907734430110355, "grad_norm": 2.0544090270996094, "learning_rate": 8.320454335400034e-06, "loss": 0.6032, "step": 24320 }, { "epoch": 0.2907853991558962, "grad_norm": 2.6241674423217773, "learning_rate": 8.32030957470908e-06, "loss": 0.5786, "step": 24321 }, { "epoch": 0.29079735530075684, "grad_norm": 1.9048233032226562, "learning_rate": 8.32016480903931e-06, "loss": 0.621, "step": 24322 }, { "epoch": 0.2908093114456175, "grad_norm": 2.4035532474517822, "learning_rate": 8.32002003839094e-06, "loss": 0.6188, "step": 24323 }, { "epoch": 0.2908212675904781, "grad_norm": 1.7954431772232056, "learning_rate": 8.319875262764187e-06, "loss": 0.5194, "step": 24324 }, { "epoch": 0.29083322373533876, "grad_norm": 2.0137548446655273, "learning_rate": 8.319730482159271e-06, "loss": 0.5779, "step": 24325 }, { "epoch": 0.2908451798801994, "grad_norm": 4.5390238761901855, "learning_rate": 8.319585696576409e-06, "loss": 0.6574, "step": 24326 }, { "epoch": 0.2908571360250601, "grad_norm": 3.948655605316162, "learning_rate": 8.319440906015814e-06, "loss": 0.5725, "step": 24327 }, { "epoch": 0.29086909216992074, "grad_norm": 3.027906894683838, "learning_rate": 8.319296110477705e-06, "loss": 0.6379, "step": 24328 }, { "epoch": 0.2908810483147814, "grad_norm": 2.404588460922241, "learning_rate": 8.3191513099623e-06, "loss": 0.6474, "step": 24329 }, { "epoch": 0.29089300445964206, "grad_norm": 1.7485979795455933, "learning_rate": 8.319006504469817e-06, "loss": 0.5593, "step": 24330 }, { "epoch": 0.29090496060450266, "grad_norm": 1.7604113817214966, "learning_rate": 8.318861694000473e-06, "loss": 0.6012, "step": 24331 }, { "epoch": 0.2909169167493633, "grad_norm": 3.105201482772827, "learning_rate": 8.318716878554481e-06, "loss": 0.5729, "step": 24332 }, { "epoch": 0.290928872894224, "grad_norm": 2.850520610809326, "learning_rate": 8.318572058132065e-06, "loss": 0.5356, "step": 24333 }, { "epoch": 0.29094082903908464, "grad_norm": 4.166544437408447, "learning_rate": 8.318427232733434e-06, "loss": 0.5752, "step": 24334 }, { "epoch": 0.2909527851839453, "grad_norm": 2.544538974761963, "learning_rate": 8.318282402358812e-06, "loss": 0.5264, "step": 24335 }, { "epoch": 0.29096474132880595, "grad_norm": 1.722962737083435, "learning_rate": 8.318137567008414e-06, "loss": 0.5419, "step": 24336 }, { "epoch": 0.2909766974736666, "grad_norm": 4.900580406188965, "learning_rate": 8.317992726682456e-06, "loss": 0.6103, "step": 24337 }, { "epoch": 0.2909886536185272, "grad_norm": 1.6260992288589478, "learning_rate": 8.317847881381155e-06, "loss": 0.569, "step": 24338 }, { "epoch": 0.2910006097633879, "grad_norm": 1.7462494373321533, "learning_rate": 8.317703031104734e-06, "loss": 0.5754, "step": 24339 }, { "epoch": 0.29101256590824853, "grad_norm": 1.965707778930664, "learning_rate": 8.317558175853401e-06, "loss": 0.5321, "step": 24340 }, { "epoch": 0.2910245220531092, "grad_norm": 2.6532411575317383, "learning_rate": 8.31741331562738e-06, "loss": 0.6808, "step": 24341 }, { "epoch": 0.29103647819796985, "grad_norm": 1.6515367031097412, "learning_rate": 8.317268450426885e-06, "loss": 0.6283, "step": 24342 }, { "epoch": 0.2910484343428305, "grad_norm": 2.7519779205322266, "learning_rate": 8.317123580252133e-06, "loss": 0.5475, "step": 24343 }, { "epoch": 0.29106039048769117, "grad_norm": 1.5643057823181152, "learning_rate": 8.316978705103344e-06, "loss": 0.7094, "step": 24344 }, { "epoch": 0.2910723466325518, "grad_norm": 2.2148303985595703, "learning_rate": 8.316833824980734e-06, "loss": 0.6285, "step": 24345 }, { "epoch": 0.29108430277741243, "grad_norm": 1.918097734451294, "learning_rate": 8.316688939884518e-06, "loss": 0.5152, "step": 24346 }, { "epoch": 0.2910962589222731, "grad_norm": 2.388601303100586, "learning_rate": 8.316544049814917e-06, "loss": 0.7305, "step": 24347 }, { "epoch": 0.29110821506713375, "grad_norm": 2.6273488998413086, "learning_rate": 8.316399154772145e-06, "loss": 0.6894, "step": 24348 }, { "epoch": 0.2911201712119944, "grad_norm": 7.0695013999938965, "learning_rate": 8.316254254756422e-06, "loss": 0.6104, "step": 24349 }, { "epoch": 0.29113212735685506, "grad_norm": 2.0247597694396973, "learning_rate": 8.316109349767964e-06, "loss": 0.5326, "step": 24350 }, { "epoch": 0.2911440835017157, "grad_norm": 2.076709508895874, "learning_rate": 8.315964439806987e-06, "loss": 0.6326, "step": 24351 }, { "epoch": 0.2911560396465764, "grad_norm": 2.0784342288970947, "learning_rate": 8.31581952487371e-06, "loss": 0.6083, "step": 24352 }, { "epoch": 0.291167995791437, "grad_norm": 2.330894708633423, "learning_rate": 8.31567460496835e-06, "loss": 0.6441, "step": 24353 }, { "epoch": 0.29117995193629764, "grad_norm": 1.5869321823120117, "learning_rate": 8.315529680091123e-06, "loss": 0.6439, "step": 24354 }, { "epoch": 0.2911919080811583, "grad_norm": 1.6811158657073975, "learning_rate": 8.31538475024225e-06, "loss": 0.6718, "step": 24355 }, { "epoch": 0.29120386422601896, "grad_norm": 2.478055238723755, "learning_rate": 8.315239815421943e-06, "loss": 0.6167, "step": 24356 }, { "epoch": 0.2912158203708796, "grad_norm": 2.9142234325408936, "learning_rate": 8.315094875630426e-06, "loss": 0.6399, "step": 24357 }, { "epoch": 0.2912277765157403, "grad_norm": 2.035512685775757, "learning_rate": 8.314949930867908e-06, "loss": 0.6082, "step": 24358 }, { "epoch": 0.29123973266060094, "grad_norm": 1.7660232782363892, "learning_rate": 8.314804981134612e-06, "loss": 0.6356, "step": 24359 }, { "epoch": 0.29125168880546154, "grad_norm": 2.590310573577881, "learning_rate": 8.314660026430756e-06, "loss": 0.537, "step": 24360 }, { "epoch": 0.2912636449503222, "grad_norm": 2.968127965927124, "learning_rate": 8.314515066756555e-06, "loss": 0.6527, "step": 24361 }, { "epoch": 0.29127560109518286, "grad_norm": 4.040018081665039, "learning_rate": 8.314370102112227e-06, "loss": 0.5711, "step": 24362 }, { "epoch": 0.2912875572400435, "grad_norm": 2.170759439468384, "learning_rate": 8.314225132497987e-06, "loss": 0.6045, "step": 24363 }, { "epoch": 0.2912995133849042, "grad_norm": 1.747999906539917, "learning_rate": 8.314080157914056e-06, "loss": 0.6046, "step": 24364 }, { "epoch": 0.29131146952976483, "grad_norm": 2.850584030151367, "learning_rate": 8.31393517836065e-06, "loss": 0.6385, "step": 24365 }, { "epoch": 0.2913234256746255, "grad_norm": 2.115165948867798, "learning_rate": 8.313790193837987e-06, "loss": 0.6178, "step": 24366 }, { "epoch": 0.29133538181948615, "grad_norm": 1.448461890220642, "learning_rate": 8.313645204346283e-06, "loss": 0.6623, "step": 24367 }, { "epoch": 0.29134733796434675, "grad_norm": 2.7402713298797607, "learning_rate": 8.313500209885756e-06, "loss": 0.5716, "step": 24368 }, { "epoch": 0.2913592941092074, "grad_norm": 1.426157832145691, "learning_rate": 8.313355210456624e-06, "loss": 0.5565, "step": 24369 }, { "epoch": 0.29137125025406807, "grad_norm": 1.5907515287399292, "learning_rate": 8.313210206059105e-06, "loss": 0.6086, "step": 24370 }, { "epoch": 0.29138320639892873, "grad_norm": 3.009023427963257, "learning_rate": 8.313065196693415e-06, "loss": 0.5512, "step": 24371 }, { "epoch": 0.2913951625437894, "grad_norm": 1.6734020709991455, "learning_rate": 8.312920182359772e-06, "loss": 0.6332, "step": 24372 }, { "epoch": 0.29140711868865005, "grad_norm": 2.984149694442749, "learning_rate": 8.312775163058394e-06, "loss": 0.701, "step": 24373 }, { "epoch": 0.2914190748335107, "grad_norm": 1.6868542432785034, "learning_rate": 8.312630138789495e-06, "loss": 0.6139, "step": 24374 }, { "epoch": 0.2914310309783713, "grad_norm": 1.5965638160705566, "learning_rate": 8.3124851095533e-06, "loss": 0.546, "step": 24375 }, { "epoch": 0.29144298712323197, "grad_norm": 1.703027606010437, "learning_rate": 8.312340075350018e-06, "loss": 0.5355, "step": 24376 }, { "epoch": 0.2914549432680926, "grad_norm": 3.9335057735443115, "learning_rate": 8.312195036179872e-06, "loss": 0.7019, "step": 24377 }, { "epoch": 0.2914668994129533, "grad_norm": 2.3483879566192627, "learning_rate": 8.312049992043077e-06, "loss": 0.6273, "step": 24378 }, { "epoch": 0.29147885555781394, "grad_norm": 1.7575725317001343, "learning_rate": 8.311904942939852e-06, "loss": 0.7029, "step": 24379 }, { "epoch": 0.2914908117026746, "grad_norm": 1.8207504749298096, "learning_rate": 8.311759888870412e-06, "loss": 0.5876, "step": 24380 }, { "epoch": 0.29150276784753526, "grad_norm": 1.6651253700256348, "learning_rate": 8.311614829834978e-06, "loss": 0.5509, "step": 24381 }, { "epoch": 0.2915147239923959, "grad_norm": 2.6086761951446533, "learning_rate": 8.311469765833767e-06, "loss": 0.6543, "step": 24382 }, { "epoch": 0.2915266801372565, "grad_norm": 1.9599446058273315, "learning_rate": 8.311324696866993e-06, "loss": 0.6367, "step": 24383 }, { "epoch": 0.2915386362821172, "grad_norm": 3.1938817501068115, "learning_rate": 8.311179622934875e-06, "loss": 0.5718, "step": 24384 }, { "epoch": 0.29155059242697784, "grad_norm": 5.299185276031494, "learning_rate": 8.311034544037634e-06, "loss": 0.6401, "step": 24385 }, { "epoch": 0.2915625485718385, "grad_norm": 3.281310558319092, "learning_rate": 8.310889460175484e-06, "loss": 0.5413, "step": 24386 }, { "epoch": 0.29157450471669916, "grad_norm": 2.6246652603149414, "learning_rate": 8.310744371348643e-06, "loss": 0.5944, "step": 24387 }, { "epoch": 0.2915864608615598, "grad_norm": 2.567014217376709, "learning_rate": 8.31059927755733e-06, "loss": 0.6515, "step": 24388 }, { "epoch": 0.2915984170064205, "grad_norm": 2.0681324005126953, "learning_rate": 8.31045417880176e-06, "loss": 0.638, "step": 24389 }, { "epoch": 0.2916103731512811, "grad_norm": 3.5060131549835205, "learning_rate": 8.310309075082153e-06, "loss": 0.6303, "step": 24390 }, { "epoch": 0.29162232929614174, "grad_norm": 1.8874849081039429, "learning_rate": 8.310163966398727e-06, "loss": 0.5575, "step": 24391 }, { "epoch": 0.2916342854410024, "grad_norm": 1.612259030342102, "learning_rate": 8.310018852751696e-06, "loss": 0.6164, "step": 24392 }, { "epoch": 0.29164624158586305, "grad_norm": 1.7670434713363647, "learning_rate": 8.309873734141283e-06, "loss": 0.5161, "step": 24393 }, { "epoch": 0.2916581977307237, "grad_norm": 1.8059499263763428, "learning_rate": 8.3097286105677e-06, "loss": 0.5939, "step": 24394 }, { "epoch": 0.29167015387558437, "grad_norm": 2.6433372497558594, "learning_rate": 8.309583482031167e-06, "loss": 0.5861, "step": 24395 }, { "epoch": 0.29168211002044503, "grad_norm": 1.5323207378387451, "learning_rate": 8.309438348531904e-06, "loss": 0.6112, "step": 24396 }, { "epoch": 0.29169406616530563, "grad_norm": 1.517402172088623, "learning_rate": 8.309293210070125e-06, "loss": 0.6441, "step": 24397 }, { "epoch": 0.2917060223101663, "grad_norm": 1.8390084505081177, "learning_rate": 8.309148066646051e-06, "loss": 0.5436, "step": 24398 }, { "epoch": 0.29171797845502695, "grad_norm": 2.4554524421691895, "learning_rate": 8.309002918259895e-06, "loss": 0.5665, "step": 24399 }, { "epoch": 0.2917299345998876, "grad_norm": 2.7584404945373535, "learning_rate": 8.308857764911878e-06, "loss": 0.5939, "step": 24400 }, { "epoch": 0.29174189074474827, "grad_norm": 5.297068119049072, "learning_rate": 8.308712606602218e-06, "loss": 0.6387, "step": 24401 }, { "epoch": 0.2917538468896089, "grad_norm": 2.3305704593658447, "learning_rate": 8.30856744333113e-06, "loss": 0.658, "step": 24402 }, { "epoch": 0.2917658030344696, "grad_norm": 2.5555412769317627, "learning_rate": 8.308422275098836e-06, "loss": 0.6612, "step": 24403 }, { "epoch": 0.29177775917933024, "grad_norm": 20.09398078918457, "learning_rate": 8.308277101905548e-06, "loss": 0.6378, "step": 24404 }, { "epoch": 0.29178971532419085, "grad_norm": 8.10053539276123, "learning_rate": 8.30813192375149e-06, "loss": 0.6009, "step": 24405 }, { "epoch": 0.2918016714690515, "grad_norm": 2.9149906635284424, "learning_rate": 8.307986740636874e-06, "loss": 0.657, "step": 24406 }, { "epoch": 0.29181362761391216, "grad_norm": 1.7539517879486084, "learning_rate": 8.30784155256192e-06, "loss": 0.5646, "step": 24407 }, { "epoch": 0.2918255837587728, "grad_norm": 1.5818455219268799, "learning_rate": 8.307696359526847e-06, "loss": 0.6521, "step": 24408 }, { "epoch": 0.2918375399036335, "grad_norm": 1.8447165489196777, "learning_rate": 8.307551161531872e-06, "loss": 0.5963, "step": 24409 }, { "epoch": 0.29184949604849414, "grad_norm": 1.737353801727295, "learning_rate": 8.307405958577212e-06, "loss": 0.6018, "step": 24410 }, { "epoch": 0.2918614521933548, "grad_norm": 1.9728175401687622, "learning_rate": 8.307260750663085e-06, "loss": 0.529, "step": 24411 }, { "epoch": 0.2918734083382154, "grad_norm": 10.227632522583008, "learning_rate": 8.307115537789707e-06, "loss": 0.6187, "step": 24412 }, { "epoch": 0.29188536448307606, "grad_norm": 1.9039733409881592, "learning_rate": 8.306970319957299e-06, "loss": 0.6727, "step": 24413 }, { "epoch": 0.2918973206279367, "grad_norm": 1.671979546546936, "learning_rate": 8.306825097166077e-06, "loss": 0.6872, "step": 24414 }, { "epoch": 0.2919092767727974, "grad_norm": 2.349963903427124, "learning_rate": 8.30667986941626e-06, "loss": 0.5218, "step": 24415 }, { "epoch": 0.29192123291765804, "grad_norm": 5.3592305183410645, "learning_rate": 8.306534636708062e-06, "loss": 0.7095, "step": 24416 }, { "epoch": 0.2919331890625187, "grad_norm": 1.9668010473251343, "learning_rate": 8.306389399041706e-06, "loss": 0.6633, "step": 24417 }, { "epoch": 0.29194514520737935, "grad_norm": 2.766617774963379, "learning_rate": 8.306244156417408e-06, "loss": 0.6274, "step": 24418 }, { "epoch": 0.29195710135223996, "grad_norm": 2.23702335357666, "learning_rate": 8.306098908835383e-06, "loss": 0.5948, "step": 24419 }, { "epoch": 0.2919690574971006, "grad_norm": 2.3423564434051514, "learning_rate": 8.30595365629585e-06, "loss": 0.621, "step": 24420 }, { "epoch": 0.2919810136419613, "grad_norm": 2.0434815883636475, "learning_rate": 8.305808398799032e-06, "loss": 0.5995, "step": 24421 }, { "epoch": 0.29199296978682193, "grad_norm": 2.683467149734497, "learning_rate": 8.30566313634514e-06, "loss": 0.5588, "step": 24422 }, { "epoch": 0.2920049259316826, "grad_norm": 3.206157684326172, "learning_rate": 8.305517868934394e-06, "loss": 0.6809, "step": 24423 }, { "epoch": 0.29201688207654325, "grad_norm": 2.212430715560913, "learning_rate": 8.305372596567013e-06, "loss": 0.7426, "step": 24424 }, { "epoch": 0.2920288382214039, "grad_norm": 4.015080451965332, "learning_rate": 8.305227319243211e-06, "loss": 0.6521, "step": 24425 }, { "epoch": 0.29204079436626457, "grad_norm": 2.3123619556427, "learning_rate": 8.305082036963214e-06, "loss": 0.5744, "step": 24426 }, { "epoch": 0.29205275051112517, "grad_norm": 1.6568392515182495, "learning_rate": 8.304936749727231e-06, "loss": 0.6302, "step": 24427 }, { "epoch": 0.29206470665598583, "grad_norm": 2.4214255809783936, "learning_rate": 8.304791457535486e-06, "loss": 0.6162, "step": 24428 }, { "epoch": 0.2920766628008465, "grad_norm": 5.016497611999512, "learning_rate": 8.304646160388192e-06, "loss": 0.6337, "step": 24429 }, { "epoch": 0.29208861894570715, "grad_norm": 2.757291078567505, "learning_rate": 8.304500858285571e-06, "loss": 0.6337, "step": 24430 }, { "epoch": 0.2921005750905678, "grad_norm": 3.3966829776763916, "learning_rate": 8.304355551227839e-06, "loss": 0.5979, "step": 24431 }, { "epoch": 0.29211253123542846, "grad_norm": 1.682530164718628, "learning_rate": 8.304210239215213e-06, "loss": 0.6318, "step": 24432 }, { "epoch": 0.2921244873802891, "grad_norm": 1.799911379814148, "learning_rate": 8.304064922247915e-06, "loss": 0.7102, "step": 24433 }, { "epoch": 0.2921364435251497, "grad_norm": 1.8986523151397705, "learning_rate": 8.303919600326157e-06, "loss": 0.5115, "step": 24434 }, { "epoch": 0.2921483996700104, "grad_norm": 2.0792715549468994, "learning_rate": 8.303774273450161e-06, "loss": 0.5988, "step": 24435 }, { "epoch": 0.29216035581487104, "grad_norm": 4.104678630828857, "learning_rate": 8.303628941620144e-06, "loss": 0.6524, "step": 24436 }, { "epoch": 0.2921723119597317, "grad_norm": 2.030324935913086, "learning_rate": 8.303483604836322e-06, "loss": 0.7082, "step": 24437 }, { "epoch": 0.29218426810459236, "grad_norm": 1.771479606628418, "learning_rate": 8.303338263098918e-06, "loss": 0.6031, "step": 24438 }, { "epoch": 0.292196224249453, "grad_norm": 3.820864677429199, "learning_rate": 8.303192916408143e-06, "loss": 0.6323, "step": 24439 }, { "epoch": 0.2922081803943137, "grad_norm": 1.9709731340408325, "learning_rate": 8.303047564764219e-06, "loss": 0.6015, "step": 24440 }, { "epoch": 0.29222013653917434, "grad_norm": 2.1695895195007324, "learning_rate": 8.302902208167365e-06, "loss": 0.6883, "step": 24441 }, { "epoch": 0.29223209268403494, "grad_norm": 1.673974633216858, "learning_rate": 8.302756846617797e-06, "loss": 0.6165, "step": 24442 }, { "epoch": 0.2922440488288956, "grad_norm": 2.6352908611297607, "learning_rate": 8.302611480115733e-06, "loss": 0.5739, "step": 24443 }, { "epoch": 0.29225600497375626, "grad_norm": 3.963999032974243, "learning_rate": 8.30246610866139e-06, "loss": 0.5589, "step": 24444 }, { "epoch": 0.2922679611186169, "grad_norm": 3.6140294075012207, "learning_rate": 8.302320732254989e-06, "loss": 0.5854, "step": 24445 }, { "epoch": 0.2922799172634776, "grad_norm": 2.2928149700164795, "learning_rate": 8.302175350896746e-06, "loss": 0.6769, "step": 24446 }, { "epoch": 0.29229187340833823, "grad_norm": 1.6470849514007568, "learning_rate": 8.302029964586879e-06, "loss": 0.5971, "step": 24447 }, { "epoch": 0.2923038295531989, "grad_norm": 1.6081221103668213, "learning_rate": 8.301884573325608e-06, "loss": 0.6786, "step": 24448 }, { "epoch": 0.2923157856980595, "grad_norm": 7.970398426055908, "learning_rate": 8.301739177113146e-06, "loss": 0.621, "step": 24449 }, { "epoch": 0.29232774184292015, "grad_norm": 4.321261405944824, "learning_rate": 8.301593775949716e-06, "loss": 0.5503, "step": 24450 }, { "epoch": 0.2923396979877808, "grad_norm": 1.8075957298278809, "learning_rate": 8.301448369835536e-06, "loss": 0.6469, "step": 24451 }, { "epoch": 0.29235165413264147, "grad_norm": 4.591891288757324, "learning_rate": 8.30130295877082e-06, "loss": 0.5848, "step": 24452 }, { "epoch": 0.29236361027750213, "grad_norm": 1.6758166551589966, "learning_rate": 8.30115754275579e-06, "loss": 0.6304, "step": 24453 }, { "epoch": 0.2923755664223628, "grad_norm": 2.2943363189697266, "learning_rate": 8.301012121790663e-06, "loss": 0.5207, "step": 24454 }, { "epoch": 0.29238752256722345, "grad_norm": 2.428053140640259, "learning_rate": 8.300866695875656e-06, "loss": 0.5509, "step": 24455 }, { "epoch": 0.29239947871208405, "grad_norm": 1.8209172487258911, "learning_rate": 8.300721265010987e-06, "loss": 0.5697, "step": 24456 }, { "epoch": 0.2924114348569447, "grad_norm": 2.737703323364258, "learning_rate": 8.300575829196876e-06, "loss": 0.6008, "step": 24457 }, { "epoch": 0.29242339100180537, "grad_norm": 2.7610085010528564, "learning_rate": 8.300430388433538e-06, "loss": 0.5621, "step": 24458 }, { "epoch": 0.292435347146666, "grad_norm": 2.267991304397583, "learning_rate": 8.300284942721194e-06, "loss": 0.6358, "step": 24459 }, { "epoch": 0.2924473032915267, "grad_norm": 1.7480745315551758, "learning_rate": 8.30013949206006e-06, "loss": 0.5553, "step": 24460 }, { "epoch": 0.29245925943638734, "grad_norm": 3.370096206665039, "learning_rate": 8.299994036450356e-06, "loss": 0.6288, "step": 24461 }, { "epoch": 0.292471215581248, "grad_norm": 3.0422871112823486, "learning_rate": 8.2998485758923e-06, "loss": 0.5964, "step": 24462 }, { "epoch": 0.29248317172610866, "grad_norm": 3.289841413497925, "learning_rate": 8.299703110386109e-06, "loss": 0.6364, "step": 24463 }, { "epoch": 0.29249512787096926, "grad_norm": 2.341885805130005, "learning_rate": 8.299557639931999e-06, "loss": 0.5784, "step": 24464 }, { "epoch": 0.2925070840158299, "grad_norm": 2.0744662284851074, "learning_rate": 8.299412164530194e-06, "loss": 0.6032, "step": 24465 }, { "epoch": 0.2925190401606906, "grad_norm": 1.6645886898040771, "learning_rate": 8.299266684180906e-06, "loss": 0.5633, "step": 24466 }, { "epoch": 0.29253099630555124, "grad_norm": 2.178104877471924, "learning_rate": 8.299121198884358e-06, "loss": 0.6652, "step": 24467 }, { "epoch": 0.2925429524504119, "grad_norm": 2.486919641494751, "learning_rate": 8.298975708640767e-06, "loss": 0.6388, "step": 24468 }, { "epoch": 0.29255490859527256, "grad_norm": 2.15653657913208, "learning_rate": 8.298830213450346e-06, "loss": 0.6304, "step": 24469 }, { "epoch": 0.2925668647401332, "grad_norm": 1.5562416315078735, "learning_rate": 8.298684713313321e-06, "loss": 0.682, "step": 24470 }, { "epoch": 0.2925788208849938, "grad_norm": 1.693107008934021, "learning_rate": 8.298539208229906e-06, "loss": 0.6187, "step": 24471 }, { "epoch": 0.2925907770298545, "grad_norm": 1.6731979846954346, "learning_rate": 8.298393698200318e-06, "loss": 0.6099, "step": 24472 }, { "epoch": 0.29260273317471513, "grad_norm": 2.6996355056762695, "learning_rate": 8.298248183224778e-06, "loss": 0.6768, "step": 24473 }, { "epoch": 0.2926146893195758, "grad_norm": 2.3519222736358643, "learning_rate": 8.298102663303501e-06, "loss": 0.6743, "step": 24474 }, { "epoch": 0.29262664546443645, "grad_norm": 1.9577715396881104, "learning_rate": 8.297957138436712e-06, "loss": 0.6517, "step": 24475 }, { "epoch": 0.2926386016092971, "grad_norm": 2.1577227115631104, "learning_rate": 8.297811608624622e-06, "loss": 0.5664, "step": 24476 }, { "epoch": 0.29265055775415777, "grad_norm": 3.6312694549560547, "learning_rate": 8.297666073867452e-06, "loss": 0.6422, "step": 24477 }, { "epoch": 0.2926625138990184, "grad_norm": 1.8452868461608887, "learning_rate": 8.297520534165419e-06, "loss": 0.5969, "step": 24478 }, { "epoch": 0.29267447004387903, "grad_norm": 5.122835159301758, "learning_rate": 8.297374989518743e-06, "loss": 0.6491, "step": 24479 }, { "epoch": 0.2926864261887397, "grad_norm": 2.4194016456604004, "learning_rate": 8.29722943992764e-06, "loss": 0.5943, "step": 24480 }, { "epoch": 0.29269838233360035, "grad_norm": 2.1152865886688232, "learning_rate": 8.297083885392333e-06, "loss": 0.6304, "step": 24481 }, { "epoch": 0.292710338478461, "grad_norm": 1.7540273666381836, "learning_rate": 8.296938325913036e-06, "loss": 0.5849, "step": 24482 }, { "epoch": 0.29272229462332167, "grad_norm": 1.7079790830612183, "learning_rate": 8.296792761489966e-06, "loss": 0.6518, "step": 24483 }, { "epoch": 0.2927342507681823, "grad_norm": 3.307147264480591, "learning_rate": 8.296647192123345e-06, "loss": 0.6306, "step": 24484 }, { "epoch": 0.292746206913043, "grad_norm": 2.3510255813598633, "learning_rate": 8.296501617813391e-06, "loss": 0.5882, "step": 24485 }, { "epoch": 0.2927581630579036, "grad_norm": 2.0404953956604004, "learning_rate": 8.296356038560318e-06, "loss": 0.5098, "step": 24486 }, { "epoch": 0.29277011920276425, "grad_norm": 1.6204438209533691, "learning_rate": 8.29621045436435e-06, "loss": 0.6558, "step": 24487 }, { "epoch": 0.2927820753476249, "grad_norm": 1.646216869354248, "learning_rate": 8.296064865225702e-06, "loss": 0.5532, "step": 24488 }, { "epoch": 0.29279403149248556, "grad_norm": 1.5528291463851929, "learning_rate": 8.295919271144593e-06, "loss": 0.586, "step": 24489 }, { "epoch": 0.2928059876373462, "grad_norm": 1.9108326435089111, "learning_rate": 8.29577367212124e-06, "loss": 0.6629, "step": 24490 }, { "epoch": 0.2928179437822069, "grad_norm": 2.93856143951416, "learning_rate": 8.295628068155865e-06, "loss": 0.5914, "step": 24491 }, { "epoch": 0.29282989992706754, "grad_norm": 2.112379550933838, "learning_rate": 8.295482459248683e-06, "loss": 0.5495, "step": 24492 }, { "epoch": 0.29284185607192814, "grad_norm": 3.418398857116699, "learning_rate": 8.295336845399914e-06, "loss": 0.5618, "step": 24493 }, { "epoch": 0.2928538122167888, "grad_norm": 1.428444743156433, "learning_rate": 8.295191226609776e-06, "loss": 0.6559, "step": 24494 }, { "epoch": 0.29286576836164946, "grad_norm": 2.3597311973571777, "learning_rate": 8.295045602878484e-06, "loss": 0.6786, "step": 24495 }, { "epoch": 0.2928777245065101, "grad_norm": 1.7010505199432373, "learning_rate": 8.294899974206264e-06, "loss": 0.6596, "step": 24496 }, { "epoch": 0.2928896806513708, "grad_norm": 1.8026334047317505, "learning_rate": 8.294754340593325e-06, "loss": 0.6007, "step": 24497 }, { "epoch": 0.29290163679623143, "grad_norm": 2.2177023887634277, "learning_rate": 8.294608702039892e-06, "loss": 0.4655, "step": 24498 }, { "epoch": 0.2929135929410921, "grad_norm": 2.1455113887786865, "learning_rate": 8.294463058546184e-06, "loss": 0.5683, "step": 24499 }, { "epoch": 0.29292554908595275, "grad_norm": 1.812393069267273, "learning_rate": 8.294317410112415e-06, "loss": 0.6722, "step": 24500 }, { "epoch": 0.29293750523081336, "grad_norm": 1.578900694847107, "learning_rate": 8.294171756738805e-06, "loss": 0.6805, "step": 24501 }, { "epoch": 0.292949461375674, "grad_norm": 1.3190350532531738, "learning_rate": 8.294026098425572e-06, "loss": 0.6096, "step": 24502 }, { "epoch": 0.2929614175205347, "grad_norm": 2.4041659832000732, "learning_rate": 8.293880435172937e-06, "loss": 0.7195, "step": 24503 }, { "epoch": 0.29297337366539533, "grad_norm": 1.9223238229751587, "learning_rate": 8.293734766981117e-06, "loss": 0.5534, "step": 24504 }, { "epoch": 0.292985329810256, "grad_norm": 1.4919527769088745, "learning_rate": 8.293589093850327e-06, "loss": 0.5908, "step": 24505 }, { "epoch": 0.29299728595511665, "grad_norm": 3.0051345825195312, "learning_rate": 8.293443415780791e-06, "loss": 0.6618, "step": 24506 }, { "epoch": 0.2930092420999773, "grad_norm": 2.526925563812256, "learning_rate": 8.293297732772723e-06, "loss": 0.7149, "step": 24507 }, { "epoch": 0.2930211982448379, "grad_norm": 6.395779609680176, "learning_rate": 8.293152044826345e-06, "loss": 0.5874, "step": 24508 }, { "epoch": 0.29303315438969857, "grad_norm": 1.8022434711456299, "learning_rate": 8.293006351941873e-06, "loss": 0.5329, "step": 24509 }, { "epoch": 0.2930451105345592, "grad_norm": 1.5698636770248413, "learning_rate": 8.292860654119527e-06, "loss": 0.5925, "step": 24510 }, { "epoch": 0.2930570666794199, "grad_norm": 4.71060037612915, "learning_rate": 8.292714951359525e-06, "loss": 0.6618, "step": 24511 }, { "epoch": 0.29306902282428055, "grad_norm": 2.6002564430236816, "learning_rate": 8.292569243662083e-06, "loss": 0.6362, "step": 24512 }, { "epoch": 0.2930809789691412, "grad_norm": 1.8135201930999756, "learning_rate": 8.292423531027423e-06, "loss": 0.6424, "step": 24513 }, { "epoch": 0.29309293511400186, "grad_norm": 3.2035281658172607, "learning_rate": 8.292277813455762e-06, "loss": 0.5763, "step": 24514 }, { "epoch": 0.29310489125886247, "grad_norm": 2.5941500663757324, "learning_rate": 8.29213209094732e-06, "loss": 0.64, "step": 24515 }, { "epoch": 0.2931168474037231, "grad_norm": 3.9099948406219482, "learning_rate": 8.291986363502314e-06, "loss": 0.7196, "step": 24516 }, { "epoch": 0.2931288035485838, "grad_norm": 4.319674491882324, "learning_rate": 8.291840631120963e-06, "loss": 0.6379, "step": 24517 }, { "epoch": 0.29314075969344444, "grad_norm": 1.4701125621795654, "learning_rate": 8.291694893803482e-06, "loss": 0.691, "step": 24518 }, { "epoch": 0.2931527158383051, "grad_norm": 4.46894645690918, "learning_rate": 8.291549151550096e-06, "loss": 0.6547, "step": 24519 }, { "epoch": 0.29316467198316576, "grad_norm": 3.645176410675049, "learning_rate": 8.29140340436102e-06, "loss": 0.6053, "step": 24520 }, { "epoch": 0.2931766281280264, "grad_norm": 2.634220600128174, "learning_rate": 8.291257652236471e-06, "loss": 0.6472, "step": 24521 }, { "epoch": 0.2931885842728871, "grad_norm": 1.5069231986999512, "learning_rate": 8.291111895176671e-06, "loss": 0.5474, "step": 24522 }, { "epoch": 0.2932005404177477, "grad_norm": 2.0720906257629395, "learning_rate": 8.290966133181837e-06, "loss": 0.5937, "step": 24523 }, { "epoch": 0.29321249656260834, "grad_norm": 1.51283860206604, "learning_rate": 8.290820366252188e-06, "loss": 0.666, "step": 24524 }, { "epoch": 0.293224452707469, "grad_norm": 1.6215310096740723, "learning_rate": 8.290674594387941e-06, "loss": 0.6037, "step": 24525 }, { "epoch": 0.29323640885232966, "grad_norm": 1.7735629081726074, "learning_rate": 8.290528817589318e-06, "loss": 0.4773, "step": 24526 }, { "epoch": 0.2932483649971903, "grad_norm": 1.8406697511672974, "learning_rate": 8.290383035856532e-06, "loss": 0.6353, "step": 24527 }, { "epoch": 0.293260321142051, "grad_norm": 6.367076873779297, "learning_rate": 8.290237249189807e-06, "loss": 0.6394, "step": 24528 }, { "epoch": 0.29327227728691163, "grad_norm": 1.8732558488845825, "learning_rate": 8.290091457589359e-06, "loss": 0.6225, "step": 24529 }, { "epoch": 0.29328423343177223, "grad_norm": 1.5768115520477295, "learning_rate": 8.289945661055408e-06, "loss": 0.5181, "step": 24530 }, { "epoch": 0.2932961895766329, "grad_norm": 1.8004997968673706, "learning_rate": 8.289799859588171e-06, "loss": 0.6306, "step": 24531 }, { "epoch": 0.29330814572149355, "grad_norm": 2.2645163536071777, "learning_rate": 8.289654053187867e-06, "loss": 0.6241, "step": 24532 }, { "epoch": 0.2933201018663542, "grad_norm": 2.5541183948516846, "learning_rate": 8.289508241854716e-06, "loss": 0.5671, "step": 24533 }, { "epoch": 0.29333205801121487, "grad_norm": 1.8088343143463135, "learning_rate": 8.289362425588935e-06, "loss": 0.5615, "step": 24534 }, { "epoch": 0.2933440141560755, "grad_norm": 1.934938907623291, "learning_rate": 8.289216604390745e-06, "loss": 0.6011, "step": 24535 }, { "epoch": 0.2933559703009362, "grad_norm": 2.4110376834869385, "learning_rate": 8.289070778260361e-06, "loss": 0.6514, "step": 24536 }, { "epoch": 0.29336792644579684, "grad_norm": 1.6654962301254272, "learning_rate": 8.288924947198005e-06, "loss": 0.6061, "step": 24537 }, { "epoch": 0.29337988259065745, "grad_norm": 1.9746088981628418, "learning_rate": 8.288779111203893e-06, "loss": 0.6202, "step": 24538 }, { "epoch": 0.2933918387355181, "grad_norm": 3.8996851444244385, "learning_rate": 8.288633270278246e-06, "loss": 0.5825, "step": 24539 }, { "epoch": 0.29340379488037877, "grad_norm": 1.8707869052886963, "learning_rate": 8.28848742442128e-06, "loss": 0.5939, "step": 24540 }, { "epoch": 0.2934157510252394, "grad_norm": 2.3346126079559326, "learning_rate": 8.288341573633218e-06, "loss": 0.5113, "step": 24541 }, { "epoch": 0.2934277071701001, "grad_norm": 1.4688670635223389, "learning_rate": 8.288195717914275e-06, "loss": 0.6628, "step": 24542 }, { "epoch": 0.29343966331496074, "grad_norm": 1.7286428213119507, "learning_rate": 8.28804985726467e-06, "loss": 0.5651, "step": 24543 }, { "epoch": 0.2934516194598214, "grad_norm": 2.4389238357543945, "learning_rate": 8.287903991684625e-06, "loss": 0.6281, "step": 24544 }, { "epoch": 0.293463575604682, "grad_norm": 3.305453062057495, "learning_rate": 8.287758121174354e-06, "loss": 0.5286, "step": 24545 }, { "epoch": 0.29347553174954266, "grad_norm": 2.105757474899292, "learning_rate": 8.287612245734078e-06, "loss": 0.6326, "step": 24546 }, { "epoch": 0.2934874878944033, "grad_norm": 1.4891294240951538, "learning_rate": 8.287466365364016e-06, "loss": 0.5928, "step": 24547 }, { "epoch": 0.293499444039264, "grad_norm": 1.9669915437698364, "learning_rate": 8.287320480064387e-06, "loss": 0.5595, "step": 24548 }, { "epoch": 0.29351140018412464, "grad_norm": 2.2352821826934814, "learning_rate": 8.28717458983541e-06, "loss": 0.623, "step": 24549 }, { "epoch": 0.2935233563289853, "grad_norm": 1.8658610582351685, "learning_rate": 8.2870286946773e-06, "loss": 0.5321, "step": 24550 }, { "epoch": 0.29353531247384596, "grad_norm": 2.245241165161133, "learning_rate": 8.286882794590282e-06, "loss": 0.5805, "step": 24551 }, { "epoch": 0.29354726861870656, "grad_norm": 7.109738826751709, "learning_rate": 8.286736889574572e-06, "loss": 0.6493, "step": 24552 }, { "epoch": 0.2935592247635672, "grad_norm": 1.9773218631744385, "learning_rate": 8.286590979630387e-06, "loss": 0.6076, "step": 24553 }, { "epoch": 0.2935711809084279, "grad_norm": 1.9565670490264893, "learning_rate": 8.286445064757946e-06, "loss": 0.6072, "step": 24554 }, { "epoch": 0.29358313705328853, "grad_norm": 2.373608350753784, "learning_rate": 8.28629914495747e-06, "loss": 0.6317, "step": 24555 }, { "epoch": 0.2935950931981492, "grad_norm": 1.8271170854568481, "learning_rate": 8.286153220229178e-06, "loss": 0.7308, "step": 24556 }, { "epoch": 0.29360704934300985, "grad_norm": 3.856825113296509, "learning_rate": 8.286007290573286e-06, "loss": 0.5973, "step": 24557 }, { "epoch": 0.2936190054878705, "grad_norm": 5.723826885223389, "learning_rate": 8.285861355990014e-06, "loss": 0.5831, "step": 24558 }, { "epoch": 0.29363096163273117, "grad_norm": 2.585327386856079, "learning_rate": 8.285715416479583e-06, "loss": 0.5803, "step": 24559 }, { "epoch": 0.29364291777759177, "grad_norm": 6.965539455413818, "learning_rate": 8.285569472042208e-06, "loss": 0.5801, "step": 24560 }, { "epoch": 0.29365487392245243, "grad_norm": 4.669718265533447, "learning_rate": 8.28542352267811e-06, "loss": 0.5565, "step": 24561 }, { "epoch": 0.2936668300673131, "grad_norm": 1.6287554502487183, "learning_rate": 8.28527756838751e-06, "loss": 0.6171, "step": 24562 }, { "epoch": 0.29367878621217375, "grad_norm": 1.5052671432495117, "learning_rate": 8.285131609170623e-06, "loss": 0.5612, "step": 24563 }, { "epoch": 0.2936907423570344, "grad_norm": 2.7557568550109863, "learning_rate": 8.28498564502767e-06, "loss": 0.5653, "step": 24564 }, { "epoch": 0.29370269850189507, "grad_norm": 3.3549649715423584, "learning_rate": 8.284839675958869e-06, "loss": 0.5208, "step": 24565 }, { "epoch": 0.2937146546467557, "grad_norm": 1.3720301389694214, "learning_rate": 8.28469370196444e-06, "loss": 0.5575, "step": 24566 }, { "epoch": 0.2937266107916163, "grad_norm": 2.4418506622314453, "learning_rate": 8.2845477230446e-06, "loss": 0.6634, "step": 24567 }, { "epoch": 0.293738566936477, "grad_norm": 2.3340933322906494, "learning_rate": 8.284401739199569e-06, "loss": 0.5519, "step": 24568 }, { "epoch": 0.29375052308133764, "grad_norm": 2.340101718902588, "learning_rate": 8.284255750429566e-06, "loss": 0.6142, "step": 24569 }, { "epoch": 0.2937624792261983, "grad_norm": 1.8667004108428955, "learning_rate": 8.28410975673481e-06, "loss": 0.62, "step": 24570 }, { "epoch": 0.29377443537105896, "grad_norm": 1.637912392616272, "learning_rate": 8.283963758115519e-06, "loss": 0.6027, "step": 24571 }, { "epoch": 0.2937863915159196, "grad_norm": 1.5796892642974854, "learning_rate": 8.283817754571915e-06, "loss": 0.5356, "step": 24572 }, { "epoch": 0.2937983476607803, "grad_norm": 1.2645756006240845, "learning_rate": 8.283671746104212e-06, "loss": 0.5456, "step": 24573 }, { "epoch": 0.2938103038056409, "grad_norm": 1.7111308574676514, "learning_rate": 8.283525732712634e-06, "loss": 0.5903, "step": 24574 }, { "epoch": 0.29382225995050154, "grad_norm": 1.4295525550842285, "learning_rate": 8.283379714397395e-06, "loss": 0.5872, "step": 24575 }, { "epoch": 0.2938342160953622, "grad_norm": 1.4985798597335815, "learning_rate": 8.283233691158718e-06, "loss": 0.5674, "step": 24576 }, { "epoch": 0.29384617224022286, "grad_norm": 1.7384642362594604, "learning_rate": 8.283087662996821e-06, "loss": 0.5535, "step": 24577 }, { "epoch": 0.2938581283850835, "grad_norm": 2.610996961593628, "learning_rate": 8.28294162991192e-06, "loss": 0.6274, "step": 24578 }, { "epoch": 0.2938700845299442, "grad_norm": 2.0251564979553223, "learning_rate": 8.282795591904238e-06, "loss": 0.501, "step": 24579 }, { "epoch": 0.29388204067480483, "grad_norm": 2.0277724266052246, "learning_rate": 8.282649548973993e-06, "loss": 0.6895, "step": 24580 }, { "epoch": 0.2938939968196655, "grad_norm": 1.9108610153198242, "learning_rate": 8.282503501121402e-06, "loss": 0.6026, "step": 24581 }, { "epoch": 0.2939059529645261, "grad_norm": 1.4366261959075928, "learning_rate": 8.282357448346687e-06, "loss": 0.5283, "step": 24582 }, { "epoch": 0.29391790910938675, "grad_norm": 2.0049989223480225, "learning_rate": 8.282211390650064e-06, "loss": 0.625, "step": 24583 }, { "epoch": 0.2939298652542474, "grad_norm": 2.179046392440796, "learning_rate": 8.282065328031751e-06, "loss": 0.7331, "step": 24584 }, { "epoch": 0.29394182139910807, "grad_norm": 1.5120728015899658, "learning_rate": 8.281919260491972e-06, "loss": 0.6429, "step": 24585 }, { "epoch": 0.29395377754396873, "grad_norm": 1.7696701288223267, "learning_rate": 8.281773188030945e-06, "loss": 0.6382, "step": 24586 }, { "epoch": 0.2939657336888294, "grad_norm": 2.1806845664978027, "learning_rate": 8.281627110648886e-06, "loss": 0.5664, "step": 24587 }, { "epoch": 0.29397768983369005, "grad_norm": 2.434994697570801, "learning_rate": 8.281481028346015e-06, "loss": 0.5341, "step": 24588 }, { "epoch": 0.29398964597855065, "grad_norm": 3.5213656425476074, "learning_rate": 8.281334941122552e-06, "loss": 0.6523, "step": 24589 }, { "epoch": 0.2940016021234113, "grad_norm": 2.708237648010254, "learning_rate": 8.281188848978716e-06, "loss": 0.5631, "step": 24590 }, { "epoch": 0.29401355826827197, "grad_norm": 2.003445863723755, "learning_rate": 8.281042751914722e-06, "loss": 0.6041, "step": 24591 }, { "epoch": 0.2940255144131326, "grad_norm": 1.6453444957733154, "learning_rate": 8.280896649930798e-06, "loss": 0.5501, "step": 24592 }, { "epoch": 0.2940374705579933, "grad_norm": 3.098008632659912, "learning_rate": 8.280750543027154e-06, "loss": 0.6803, "step": 24593 }, { "epoch": 0.29404942670285394, "grad_norm": 2.073699951171875, "learning_rate": 8.280604431204015e-06, "loss": 0.6416, "step": 24594 }, { "epoch": 0.2940613828477146, "grad_norm": 1.3779468536376953, "learning_rate": 8.2804583144616e-06, "loss": 0.6295, "step": 24595 }, { "epoch": 0.29407333899257526, "grad_norm": 3.0735793113708496, "learning_rate": 8.280312192800122e-06, "loss": 0.5865, "step": 24596 }, { "epoch": 0.29408529513743586, "grad_norm": 6.034856796264648, "learning_rate": 8.280166066219805e-06, "loss": 0.724, "step": 24597 }, { "epoch": 0.2940972512822965, "grad_norm": 1.5659371614456177, "learning_rate": 8.280019934720868e-06, "loss": 0.5924, "step": 24598 }, { "epoch": 0.2941092074271572, "grad_norm": 1.3281923532485962, "learning_rate": 8.279873798303531e-06, "loss": 0.5843, "step": 24599 }, { "epoch": 0.29412116357201784, "grad_norm": 1.6965336799621582, "learning_rate": 8.279727656968009e-06, "loss": 0.7158, "step": 24600 }, { "epoch": 0.2941331197168785, "grad_norm": 1.6775318384170532, "learning_rate": 8.279581510714525e-06, "loss": 0.6697, "step": 24601 }, { "epoch": 0.29414507586173916, "grad_norm": 1.8680198192596436, "learning_rate": 8.279435359543297e-06, "loss": 0.6667, "step": 24602 }, { "epoch": 0.2941570320065998, "grad_norm": 1.3531540632247925, "learning_rate": 8.279289203454543e-06, "loss": 0.6328, "step": 24603 }, { "epoch": 0.2941689881514604, "grad_norm": 9.139494895935059, "learning_rate": 8.279143042448485e-06, "loss": 0.6206, "step": 24604 }, { "epoch": 0.2941809442963211, "grad_norm": 2.9039735794067383, "learning_rate": 8.278996876525337e-06, "loss": 0.6711, "step": 24605 }, { "epoch": 0.29419290044118174, "grad_norm": 3.6360888481140137, "learning_rate": 8.278850705685325e-06, "loss": 0.5417, "step": 24606 }, { "epoch": 0.2942048565860424, "grad_norm": 1.8875327110290527, "learning_rate": 8.278704529928662e-06, "loss": 0.5683, "step": 24607 }, { "epoch": 0.29421681273090305, "grad_norm": 1.7461960315704346, "learning_rate": 8.278558349255573e-06, "loss": 0.6118, "step": 24608 }, { "epoch": 0.2942287688757637, "grad_norm": 2.8450520038604736, "learning_rate": 8.278412163666272e-06, "loss": 0.5603, "step": 24609 }, { "epoch": 0.29424072502062437, "grad_norm": 1.567766785621643, "learning_rate": 8.278265973160982e-06, "loss": 0.6349, "step": 24610 }, { "epoch": 0.294252681165485, "grad_norm": 2.7090327739715576, "learning_rate": 8.278119777739918e-06, "loss": 0.6478, "step": 24611 }, { "epoch": 0.29426463731034563, "grad_norm": 2.603581190109253, "learning_rate": 8.277973577403302e-06, "loss": 0.5628, "step": 24612 }, { "epoch": 0.2942765934552063, "grad_norm": 2.560633659362793, "learning_rate": 8.277827372151355e-06, "loss": 0.7548, "step": 24613 }, { "epoch": 0.29428854960006695, "grad_norm": 1.8445641994476318, "learning_rate": 8.277681161984294e-06, "loss": 0.7342, "step": 24614 }, { "epoch": 0.2943005057449276, "grad_norm": 1.5664749145507812, "learning_rate": 8.277534946902338e-06, "loss": 0.5539, "step": 24615 }, { "epoch": 0.29431246188978827, "grad_norm": 2.0101423263549805, "learning_rate": 8.277388726905705e-06, "loss": 0.6307, "step": 24616 }, { "epoch": 0.2943244180346489, "grad_norm": 2.395207405090332, "learning_rate": 8.277242501994617e-06, "loss": 0.668, "step": 24617 }, { "epoch": 0.2943363741795096, "grad_norm": 1.374258041381836, "learning_rate": 8.277096272169294e-06, "loss": 0.623, "step": 24618 }, { "epoch": 0.2943483303243702, "grad_norm": 3.3548295497894287, "learning_rate": 8.276950037429951e-06, "loss": 0.6825, "step": 24619 }, { "epoch": 0.29436028646923085, "grad_norm": 1.605634093284607, "learning_rate": 8.27680379777681e-06, "loss": 0.6063, "step": 24620 }, { "epoch": 0.2943722426140915, "grad_norm": 1.1319173574447632, "learning_rate": 8.276657553210092e-06, "loss": 0.5339, "step": 24621 }, { "epoch": 0.29438419875895216, "grad_norm": 1.4099923372268677, "learning_rate": 8.276511303730012e-06, "loss": 0.5516, "step": 24622 }, { "epoch": 0.2943961549038128, "grad_norm": 3.958500385284424, "learning_rate": 8.276365049336795e-06, "loss": 0.5636, "step": 24623 }, { "epoch": 0.2944081110486735, "grad_norm": 1.557965636253357, "learning_rate": 8.276218790030654e-06, "loss": 0.5657, "step": 24624 }, { "epoch": 0.29442006719353414, "grad_norm": 1.4593275785446167, "learning_rate": 8.276072525811811e-06, "loss": 0.5994, "step": 24625 }, { "epoch": 0.29443202333839474, "grad_norm": 17.761281967163086, "learning_rate": 8.275926256680486e-06, "loss": 0.6616, "step": 24626 }, { "epoch": 0.2944439794832554, "grad_norm": 1.87116277217865, "learning_rate": 8.2757799826369e-06, "loss": 0.515, "step": 24627 }, { "epoch": 0.29445593562811606, "grad_norm": 2.709130048751831, "learning_rate": 8.27563370368127e-06, "loss": 0.5978, "step": 24628 }, { "epoch": 0.2944678917729767, "grad_norm": 7.468008995056152, "learning_rate": 8.275487419813813e-06, "loss": 0.6154, "step": 24629 }, { "epoch": 0.2944798479178374, "grad_norm": 1.8425724506378174, "learning_rate": 8.275341131034753e-06, "loss": 0.6029, "step": 24630 }, { "epoch": 0.29449180406269804, "grad_norm": 1.5370941162109375, "learning_rate": 8.275194837344307e-06, "loss": 0.559, "step": 24631 }, { "epoch": 0.2945037602075587, "grad_norm": 1.9748382568359375, "learning_rate": 8.275048538742695e-06, "loss": 0.6861, "step": 24632 }, { "epoch": 0.2945157163524193, "grad_norm": 1.8383588790893555, "learning_rate": 8.274902235230136e-06, "loss": 0.6473, "step": 24633 }, { "epoch": 0.29452767249727996, "grad_norm": 1.5685133934020996, "learning_rate": 8.274755926806848e-06, "loss": 0.7012, "step": 24634 }, { "epoch": 0.2945396286421406, "grad_norm": 1.6330435276031494, "learning_rate": 8.274609613473053e-06, "loss": 0.638, "step": 24635 }, { "epoch": 0.2945515847870013, "grad_norm": 2.0921788215637207, "learning_rate": 8.274463295228969e-06, "loss": 0.5674, "step": 24636 }, { "epoch": 0.29456354093186193, "grad_norm": 1.3437824249267578, "learning_rate": 8.274316972074816e-06, "loss": 0.6042, "step": 24637 }, { "epoch": 0.2945754970767226, "grad_norm": 1.8829889297485352, "learning_rate": 8.274170644010813e-06, "loss": 0.6652, "step": 24638 }, { "epoch": 0.29458745322158325, "grad_norm": 2.788256883621216, "learning_rate": 8.27402431103718e-06, "loss": 0.5619, "step": 24639 }, { "epoch": 0.2945994093664439, "grad_norm": 1.7303985357284546, "learning_rate": 8.273877973154135e-06, "loss": 0.6057, "step": 24640 }, { "epoch": 0.2946113655113045, "grad_norm": 1.8177716732025146, "learning_rate": 8.273731630361899e-06, "loss": 0.6181, "step": 24641 }, { "epoch": 0.29462332165616517, "grad_norm": 3.3895747661590576, "learning_rate": 8.27358528266069e-06, "loss": 0.597, "step": 24642 }, { "epoch": 0.29463527780102583, "grad_norm": 2.2475926876068115, "learning_rate": 8.273438930050728e-06, "loss": 0.6813, "step": 24643 }, { "epoch": 0.2946472339458865, "grad_norm": 2.327901601791382, "learning_rate": 8.273292572532233e-06, "loss": 0.7003, "step": 24644 }, { "epoch": 0.29465919009074715, "grad_norm": 3.050825834274292, "learning_rate": 8.273146210105424e-06, "loss": 0.6096, "step": 24645 }, { "epoch": 0.2946711462356078, "grad_norm": 1.6820999383926392, "learning_rate": 8.27299984277052e-06, "loss": 0.6585, "step": 24646 }, { "epoch": 0.29468310238046846, "grad_norm": 2.337810754776001, "learning_rate": 8.272853470527744e-06, "loss": 0.6831, "step": 24647 }, { "epoch": 0.29469505852532907, "grad_norm": 1.2344194650650024, "learning_rate": 8.27270709337731e-06, "loss": 0.5578, "step": 24648 }, { "epoch": 0.2947070146701897, "grad_norm": 2.060835838317871, "learning_rate": 8.272560711319441e-06, "loss": 0.6454, "step": 24649 }, { "epoch": 0.2947189708150504, "grad_norm": 1.65347421169281, "learning_rate": 8.272414324354356e-06, "loss": 0.6289, "step": 24650 }, { "epoch": 0.29473092695991104, "grad_norm": 1.693137764930725, "learning_rate": 8.272267932482275e-06, "loss": 0.6055, "step": 24651 }, { "epoch": 0.2947428831047717, "grad_norm": 1.9251751899719238, "learning_rate": 8.272121535703413e-06, "loss": 0.6751, "step": 24652 }, { "epoch": 0.29475483924963236, "grad_norm": 1.3637957572937012, "learning_rate": 8.271975134017995e-06, "loss": 0.6151, "step": 24653 }, { "epoch": 0.294766795394493, "grad_norm": 1.6381334066390991, "learning_rate": 8.27182872742624e-06, "loss": 0.6509, "step": 24654 }, { "epoch": 0.2947787515393537, "grad_norm": 1.5517587661743164, "learning_rate": 8.271682315928364e-06, "loss": 0.622, "step": 24655 }, { "epoch": 0.2947907076842143, "grad_norm": 2.3822696208953857, "learning_rate": 8.271535899524591e-06, "loss": 0.6839, "step": 24656 }, { "epoch": 0.29480266382907494, "grad_norm": 1.4716681241989136, "learning_rate": 8.271389478215137e-06, "loss": 0.6616, "step": 24657 }, { "epoch": 0.2948146199739356, "grad_norm": 1.7999221086502075, "learning_rate": 8.271243052000222e-06, "loss": 0.6252, "step": 24658 }, { "epoch": 0.29482657611879626, "grad_norm": 2.50942325592041, "learning_rate": 8.271096620880068e-06, "loss": 0.6664, "step": 24659 }, { "epoch": 0.2948385322636569, "grad_norm": 1.5327017307281494, "learning_rate": 8.270950184854892e-06, "loss": 0.7193, "step": 24660 }, { "epoch": 0.2948504884085176, "grad_norm": 1.771644949913025, "learning_rate": 8.270803743924916e-06, "loss": 0.5327, "step": 24661 }, { "epoch": 0.29486244455337823, "grad_norm": 1.415107011795044, "learning_rate": 8.270657298090357e-06, "loss": 0.6881, "step": 24662 }, { "epoch": 0.29487440069823884, "grad_norm": 1.830987572669983, "learning_rate": 8.270510847351436e-06, "loss": 0.6179, "step": 24663 }, { "epoch": 0.2948863568430995, "grad_norm": 2.4796037673950195, "learning_rate": 8.270364391708372e-06, "loss": 0.5288, "step": 24664 }, { "epoch": 0.29489831298796015, "grad_norm": 6.742864608764648, "learning_rate": 8.270217931161385e-06, "loss": 0.6838, "step": 24665 }, { "epoch": 0.2949102691328208, "grad_norm": 2.1581547260284424, "learning_rate": 8.270071465710694e-06, "loss": 0.4991, "step": 24666 }, { "epoch": 0.29492222527768147, "grad_norm": 1.862665057182312, "learning_rate": 8.26992499535652e-06, "loss": 0.5922, "step": 24667 }, { "epoch": 0.29493418142254213, "grad_norm": 1.376969337463379, "learning_rate": 8.269778520099083e-06, "loss": 0.5346, "step": 24668 }, { "epoch": 0.2949461375674028, "grad_norm": 1.4021579027175903, "learning_rate": 8.2696320399386e-06, "loss": 0.66, "step": 24669 }, { "epoch": 0.2949580937122634, "grad_norm": 1.6788432598114014, "learning_rate": 8.269485554875292e-06, "loss": 0.6884, "step": 24670 }, { "epoch": 0.29497004985712405, "grad_norm": 1.8625445365905762, "learning_rate": 8.269339064909379e-06, "loss": 0.5918, "step": 24671 }, { "epoch": 0.2949820060019847, "grad_norm": 1.5790753364562988, "learning_rate": 8.269192570041083e-06, "loss": 0.5527, "step": 24672 }, { "epoch": 0.29499396214684537, "grad_norm": 1.5906767845153809, "learning_rate": 8.269046070270616e-06, "loss": 0.6304, "step": 24673 }, { "epoch": 0.295005918291706, "grad_norm": 1.8120994567871094, "learning_rate": 8.268899565598207e-06, "loss": 0.5633, "step": 24674 }, { "epoch": 0.2950178744365667, "grad_norm": 1.7035248279571533, "learning_rate": 8.26875305602407e-06, "loss": 0.5979, "step": 24675 }, { "epoch": 0.29502983058142734, "grad_norm": 2.0420355796813965, "learning_rate": 8.268606541548427e-06, "loss": 0.6059, "step": 24676 }, { "epoch": 0.295041786726288, "grad_norm": 1.2338354587554932, "learning_rate": 8.268460022171497e-06, "loss": 0.621, "step": 24677 }, { "epoch": 0.2950537428711486, "grad_norm": 2.586270570755005, "learning_rate": 8.268313497893497e-06, "loss": 0.6022, "step": 24678 }, { "epoch": 0.29506569901600926, "grad_norm": 2.962380886077881, "learning_rate": 8.26816696871465e-06, "loss": 0.5897, "step": 24679 }, { "epoch": 0.2950776551608699, "grad_norm": 2.4803457260131836, "learning_rate": 8.268020434635176e-06, "loss": 0.579, "step": 24680 }, { "epoch": 0.2950896113057306, "grad_norm": 2.634136915206909, "learning_rate": 8.267873895655295e-06, "loss": 0.7117, "step": 24681 }, { "epoch": 0.29510156745059124, "grad_norm": 1.6856852769851685, "learning_rate": 8.267727351775225e-06, "loss": 0.5086, "step": 24682 }, { "epoch": 0.2951135235954519, "grad_norm": 2.2224817276000977, "learning_rate": 8.267580802995185e-06, "loss": 0.6505, "step": 24683 }, { "epoch": 0.29512547974031256, "grad_norm": 2.5608572959899902, "learning_rate": 8.267434249315396e-06, "loss": 0.5691, "step": 24684 }, { "epoch": 0.29513743588517316, "grad_norm": 2.391754150390625, "learning_rate": 8.267287690736077e-06, "loss": 0.6666, "step": 24685 }, { "epoch": 0.2951493920300338, "grad_norm": 1.7722110748291016, "learning_rate": 8.26714112725745e-06, "loss": 0.5775, "step": 24686 }, { "epoch": 0.2951613481748945, "grad_norm": 1.352690577507019, "learning_rate": 8.266994558879734e-06, "loss": 0.5848, "step": 24687 }, { "epoch": 0.29517330431975514, "grad_norm": 2.2964627742767334, "learning_rate": 8.266847985603145e-06, "loss": 0.6433, "step": 24688 }, { "epoch": 0.2951852604646158, "grad_norm": 1.5009334087371826, "learning_rate": 8.266701407427909e-06, "loss": 0.5952, "step": 24689 }, { "epoch": 0.29519721660947645, "grad_norm": 1.4248046875, "learning_rate": 8.266554824354242e-06, "loss": 0.5462, "step": 24690 }, { "epoch": 0.2952091727543371, "grad_norm": 1.5475815534591675, "learning_rate": 8.266408236382363e-06, "loss": 0.5581, "step": 24691 }, { "epoch": 0.2952211288991977, "grad_norm": 1.9697978496551514, "learning_rate": 8.266261643512495e-06, "loss": 0.6312, "step": 24692 }, { "epoch": 0.2952330850440584, "grad_norm": 1.9919713735580444, "learning_rate": 8.266115045744854e-06, "loss": 0.5517, "step": 24693 }, { "epoch": 0.29524504118891903, "grad_norm": 2.4693222045898438, "learning_rate": 8.265968443079664e-06, "loss": 0.6584, "step": 24694 }, { "epoch": 0.2952569973337797, "grad_norm": 1.4957845211029053, "learning_rate": 8.265821835517144e-06, "loss": 0.607, "step": 24695 }, { "epoch": 0.29526895347864035, "grad_norm": 2.048929214477539, "learning_rate": 8.265675223057508e-06, "loss": 0.6191, "step": 24696 }, { "epoch": 0.295280909623501, "grad_norm": 2.157477617263794, "learning_rate": 8.265528605700983e-06, "loss": 0.6457, "step": 24697 }, { "epoch": 0.29529286576836167, "grad_norm": 1.6217565536499023, "learning_rate": 8.265381983447785e-06, "loss": 0.5718, "step": 24698 }, { "epoch": 0.2953048219132223, "grad_norm": 1.9683271646499634, "learning_rate": 8.265235356298135e-06, "loss": 0.5653, "step": 24699 }, { "epoch": 0.29531677805808293, "grad_norm": 1.692787528038025, "learning_rate": 8.265088724252257e-06, "loss": 0.5731, "step": 24700 }, { "epoch": 0.2953287342029436, "grad_norm": 2.719939708709717, "learning_rate": 8.264942087310363e-06, "loss": 0.6752, "step": 24701 }, { "epoch": 0.29534069034780425, "grad_norm": 8.402965545654297, "learning_rate": 8.264795445472678e-06, "loss": 0.6682, "step": 24702 }, { "epoch": 0.2953526464926649, "grad_norm": 2.0757627487182617, "learning_rate": 8.26464879873942e-06, "loss": 0.6132, "step": 24703 }, { "epoch": 0.29536460263752556, "grad_norm": 1.2837942838668823, "learning_rate": 8.26450214711081e-06, "loss": 0.5461, "step": 24704 }, { "epoch": 0.2953765587823862, "grad_norm": 3.051340341567993, "learning_rate": 8.264355490587067e-06, "loss": 0.7152, "step": 24705 }, { "epoch": 0.2953885149272469, "grad_norm": 2.1568520069122314, "learning_rate": 8.264208829168412e-06, "loss": 0.5719, "step": 24706 }, { "epoch": 0.2954004710721075, "grad_norm": 2.638500452041626, "learning_rate": 8.264062162855063e-06, "loss": 0.625, "step": 24707 }, { "epoch": 0.29541242721696814, "grad_norm": 3.1497788429260254, "learning_rate": 8.263915491647242e-06, "loss": 0.5819, "step": 24708 }, { "epoch": 0.2954243833618288, "grad_norm": 1.3634412288665771, "learning_rate": 8.263768815545168e-06, "loss": 0.5052, "step": 24709 }, { "epoch": 0.29543633950668946, "grad_norm": 3.4545536041259766, "learning_rate": 8.263622134549061e-06, "loss": 0.6432, "step": 24710 }, { "epoch": 0.2954482956515501, "grad_norm": 1.722256064414978, "learning_rate": 8.263475448659142e-06, "loss": 0.5601, "step": 24711 }, { "epoch": 0.2954602517964108, "grad_norm": 2.430619239807129, "learning_rate": 8.263328757875629e-06, "loss": 0.6, "step": 24712 }, { "epoch": 0.29547220794127144, "grad_norm": 1.9537572860717773, "learning_rate": 8.263182062198742e-06, "loss": 0.5155, "step": 24713 }, { "epoch": 0.2954841640861321, "grad_norm": 1.5415852069854736, "learning_rate": 8.263035361628704e-06, "loss": 0.5748, "step": 24714 }, { "epoch": 0.2954961202309927, "grad_norm": 2.810046911239624, "learning_rate": 8.26288865616573e-06, "loss": 0.6868, "step": 24715 }, { "epoch": 0.29550807637585336, "grad_norm": 2.104196786880493, "learning_rate": 8.262741945810047e-06, "loss": 0.5365, "step": 24716 }, { "epoch": 0.295520032520714, "grad_norm": 1.7785207033157349, "learning_rate": 8.262595230561868e-06, "loss": 0.5928, "step": 24717 }, { "epoch": 0.2955319886655747, "grad_norm": 1.8662419319152832, "learning_rate": 8.262448510421416e-06, "loss": 0.592, "step": 24718 }, { "epoch": 0.29554394481043533, "grad_norm": 2.284515857696533, "learning_rate": 8.262301785388911e-06, "loss": 0.6384, "step": 24719 }, { "epoch": 0.295555900955296, "grad_norm": 2.706239700317383, "learning_rate": 8.262155055464573e-06, "loss": 0.5938, "step": 24720 }, { "epoch": 0.29556785710015665, "grad_norm": 2.721811532974243, "learning_rate": 8.262008320648622e-06, "loss": 0.7125, "step": 24721 }, { "epoch": 0.29557981324501725, "grad_norm": 1.496621012687683, "learning_rate": 8.26186158094128e-06, "loss": 0.6707, "step": 24722 }, { "epoch": 0.2955917693898779, "grad_norm": 2.081634283065796, "learning_rate": 8.261714836342762e-06, "loss": 0.5632, "step": 24723 }, { "epoch": 0.29560372553473857, "grad_norm": 2.2395119667053223, "learning_rate": 8.261568086853291e-06, "loss": 0.663, "step": 24724 }, { "epoch": 0.29561568167959923, "grad_norm": 2.7106821537017822, "learning_rate": 8.261421332473087e-06, "loss": 0.5483, "step": 24725 }, { "epoch": 0.2956276378244599, "grad_norm": 2.781804084777832, "learning_rate": 8.261274573202371e-06, "loss": 0.5671, "step": 24726 }, { "epoch": 0.29563959396932055, "grad_norm": 2.5866036415100098, "learning_rate": 8.261127809041365e-06, "loss": 0.5812, "step": 24727 }, { "epoch": 0.2956515501141812, "grad_norm": 2.723649263381958, "learning_rate": 8.260981039990283e-06, "loss": 0.5297, "step": 24728 }, { "epoch": 0.2956635062590418, "grad_norm": 1.583255410194397, "learning_rate": 8.26083426604935e-06, "loss": 0.5439, "step": 24729 }, { "epoch": 0.29567546240390247, "grad_norm": 2.5954430103302, "learning_rate": 8.260687487218782e-06, "loss": 0.6495, "step": 24730 }, { "epoch": 0.2956874185487631, "grad_norm": 1.800613284111023, "learning_rate": 8.260540703498802e-06, "loss": 0.5389, "step": 24731 }, { "epoch": 0.2956993746936238, "grad_norm": 2.2547988891601562, "learning_rate": 8.260393914889631e-06, "loss": 0.5818, "step": 24732 }, { "epoch": 0.29571133083848444, "grad_norm": 1.5334022045135498, "learning_rate": 8.260247121391486e-06, "loss": 0.5866, "step": 24733 }, { "epoch": 0.2957232869833451, "grad_norm": 3.791062593460083, "learning_rate": 8.260100323004592e-06, "loss": 0.6835, "step": 24734 }, { "epoch": 0.29573524312820576, "grad_norm": 1.787721037864685, "learning_rate": 8.259953519729163e-06, "loss": 0.5411, "step": 24735 }, { "epoch": 0.2957471992730664, "grad_norm": 1.4482102394104004, "learning_rate": 8.259806711565423e-06, "loss": 0.5692, "step": 24736 }, { "epoch": 0.295759155417927, "grad_norm": 3.322046995162964, "learning_rate": 8.259659898513592e-06, "loss": 0.6838, "step": 24737 }, { "epoch": 0.2957711115627877, "grad_norm": 1.340488076210022, "learning_rate": 8.259513080573889e-06, "loss": 0.5578, "step": 24738 }, { "epoch": 0.29578306770764834, "grad_norm": 2.5898139476776123, "learning_rate": 8.259366257746533e-06, "loss": 0.5502, "step": 24739 }, { "epoch": 0.295795023852509, "grad_norm": 2.161943197250366, "learning_rate": 8.259219430031749e-06, "loss": 0.6293, "step": 24740 }, { "epoch": 0.29580697999736966, "grad_norm": 4.287644386291504, "learning_rate": 8.259072597429751e-06, "loss": 0.6338, "step": 24741 }, { "epoch": 0.2958189361422303, "grad_norm": 2.1364734172821045, "learning_rate": 8.258925759940764e-06, "loss": 0.6706, "step": 24742 }, { "epoch": 0.295830892287091, "grad_norm": 1.6191526651382446, "learning_rate": 8.258778917565004e-06, "loss": 0.6221, "step": 24743 }, { "epoch": 0.2958428484319516, "grad_norm": 1.7429426908493042, "learning_rate": 8.258632070302695e-06, "loss": 0.5224, "step": 24744 }, { "epoch": 0.29585480457681224, "grad_norm": 3.088265895843506, "learning_rate": 8.258485218154055e-06, "loss": 0.6711, "step": 24745 }, { "epoch": 0.2958667607216729, "grad_norm": 2.207552433013916, "learning_rate": 8.258338361119304e-06, "loss": 0.6244, "step": 24746 }, { "epoch": 0.29587871686653355, "grad_norm": 2.7390429973602295, "learning_rate": 8.258191499198665e-06, "loss": 0.6666, "step": 24747 }, { "epoch": 0.2958906730113942, "grad_norm": 2.0893406867980957, "learning_rate": 8.258044632392355e-06, "loss": 0.6157, "step": 24748 }, { "epoch": 0.29590262915625487, "grad_norm": 9.174555778503418, "learning_rate": 8.257897760700598e-06, "loss": 0.6771, "step": 24749 }, { "epoch": 0.29591458530111553, "grad_norm": 3.876108169555664, "learning_rate": 8.257750884123608e-06, "loss": 0.6232, "step": 24750 }, { "epoch": 0.29592654144597613, "grad_norm": 1.9419214725494385, "learning_rate": 8.257604002661611e-06, "loss": 0.5546, "step": 24751 }, { "epoch": 0.2959384975908368, "grad_norm": 2.339353561401367, "learning_rate": 8.257457116314824e-06, "loss": 0.6187, "step": 24752 }, { "epoch": 0.29595045373569745, "grad_norm": 1.9158849716186523, "learning_rate": 8.25731022508347e-06, "loss": 0.6067, "step": 24753 }, { "epoch": 0.2959624098805581, "grad_norm": 2.384598970413208, "learning_rate": 8.257163328967768e-06, "loss": 0.5764, "step": 24754 }, { "epoch": 0.29597436602541877, "grad_norm": 2.17611026763916, "learning_rate": 8.257016427967936e-06, "loss": 0.5637, "step": 24755 }, { "epoch": 0.2959863221702794, "grad_norm": 2.0008304119110107, "learning_rate": 8.256869522084197e-06, "loss": 0.5413, "step": 24756 }, { "epoch": 0.2959982783151401, "grad_norm": 1.9104318618774414, "learning_rate": 8.256722611316772e-06, "loss": 0.5994, "step": 24757 }, { "epoch": 0.29601023446000074, "grad_norm": 1.8749310970306396, "learning_rate": 8.256575695665877e-06, "loss": 0.6956, "step": 24758 }, { "epoch": 0.29602219060486135, "grad_norm": 8.765824317932129, "learning_rate": 8.256428775131738e-06, "loss": 0.5564, "step": 24759 }, { "epoch": 0.296034146749722, "grad_norm": 1.7908704280853271, "learning_rate": 8.256281849714571e-06, "loss": 0.5976, "step": 24760 }, { "epoch": 0.29604610289458266, "grad_norm": 1.892391324043274, "learning_rate": 8.2561349194146e-06, "loss": 0.6538, "step": 24761 }, { "epoch": 0.2960580590394433, "grad_norm": 2.5388638973236084, "learning_rate": 8.25598798423204e-06, "loss": 0.5641, "step": 24762 }, { "epoch": 0.296070015184304, "grad_norm": 2.0101869106292725, "learning_rate": 8.255841044167117e-06, "loss": 0.5794, "step": 24763 }, { "epoch": 0.29608197132916464, "grad_norm": 1.9314241409301758, "learning_rate": 8.255694099220047e-06, "loss": 0.6794, "step": 24764 }, { "epoch": 0.2960939274740253, "grad_norm": 2.229130506515503, "learning_rate": 8.255547149391053e-06, "loss": 0.6772, "step": 24765 }, { "epoch": 0.2961058836188859, "grad_norm": 3.219881772994995, "learning_rate": 8.255400194680354e-06, "loss": 0.5584, "step": 24766 }, { "epoch": 0.29611783976374656, "grad_norm": 1.7648162841796875, "learning_rate": 8.25525323508817e-06, "loss": 0.5919, "step": 24767 }, { "epoch": 0.2961297959086072, "grad_norm": 2.259121894836426, "learning_rate": 8.255106270614724e-06, "loss": 0.6272, "step": 24768 }, { "epoch": 0.2961417520534679, "grad_norm": 2.4817850589752197, "learning_rate": 8.254959301260233e-06, "loss": 0.6631, "step": 24769 }, { "epoch": 0.29615370819832854, "grad_norm": 1.8769428730010986, "learning_rate": 8.25481232702492e-06, "loss": 0.6175, "step": 24770 }, { "epoch": 0.2961656643431892, "grad_norm": 8.743062973022461, "learning_rate": 8.254665347909005e-06, "loss": 0.591, "step": 24771 }, { "epoch": 0.29617762048804985, "grad_norm": 2.3282952308654785, "learning_rate": 8.254518363912706e-06, "loss": 0.5171, "step": 24772 }, { "epoch": 0.2961895766329105, "grad_norm": 2.1244285106658936, "learning_rate": 8.254371375036247e-06, "loss": 0.6963, "step": 24773 }, { "epoch": 0.2962015327777711, "grad_norm": 2.4097378253936768, "learning_rate": 8.254224381279845e-06, "loss": 0.5219, "step": 24774 }, { "epoch": 0.2962134889226318, "grad_norm": 2.624027967453003, "learning_rate": 8.254077382643722e-06, "loss": 0.6061, "step": 24775 }, { "epoch": 0.29622544506749243, "grad_norm": 1.478777527809143, "learning_rate": 8.2539303791281e-06, "loss": 0.5743, "step": 24776 }, { "epoch": 0.2962374012123531, "grad_norm": 1.7471885681152344, "learning_rate": 8.253783370733196e-06, "loss": 0.5933, "step": 24777 }, { "epoch": 0.29624935735721375, "grad_norm": 2.758678913116455, "learning_rate": 8.253636357459234e-06, "loss": 0.5614, "step": 24778 }, { "epoch": 0.2962613135020744, "grad_norm": 1.7875686883926392, "learning_rate": 8.253489339306431e-06, "loss": 0.5725, "step": 24779 }, { "epoch": 0.29627326964693507, "grad_norm": 2.056901216506958, "learning_rate": 8.253342316275009e-06, "loss": 0.5717, "step": 24780 }, { "epoch": 0.29628522579179567, "grad_norm": 4.907699108123779, "learning_rate": 8.25319528836519e-06, "loss": 0.559, "step": 24781 }, { "epoch": 0.29629718193665633, "grad_norm": 2.141103506088257, "learning_rate": 8.253048255577193e-06, "loss": 0.6431, "step": 24782 }, { "epoch": 0.296309138081517, "grad_norm": 1.709661841392517, "learning_rate": 8.252901217911238e-06, "loss": 0.483, "step": 24783 }, { "epoch": 0.29632109422637765, "grad_norm": 4.378586769104004, "learning_rate": 8.252754175367546e-06, "loss": 0.624, "step": 24784 }, { "epoch": 0.2963330503712383, "grad_norm": 1.7902686595916748, "learning_rate": 8.252607127946338e-06, "loss": 0.5701, "step": 24785 }, { "epoch": 0.29634500651609896, "grad_norm": 1.697171688079834, "learning_rate": 8.252460075647835e-06, "loss": 0.6054, "step": 24786 }, { "epoch": 0.2963569626609596, "grad_norm": 2.872755527496338, "learning_rate": 8.252313018472255e-06, "loss": 0.637, "step": 24787 }, { "epoch": 0.2963689188058202, "grad_norm": 2.232362985610962, "learning_rate": 8.25216595641982e-06, "loss": 0.622, "step": 24788 }, { "epoch": 0.2963808749506809, "grad_norm": 1.9236499071121216, "learning_rate": 8.252018889490752e-06, "loss": 0.5852, "step": 24789 }, { "epoch": 0.29639283109554154, "grad_norm": 1.5850274562835693, "learning_rate": 8.251871817685269e-06, "loss": 0.5712, "step": 24790 }, { "epoch": 0.2964047872404022, "grad_norm": 1.8184471130371094, "learning_rate": 8.251724741003592e-06, "loss": 0.6206, "step": 24791 }, { "epoch": 0.29641674338526286, "grad_norm": 2.1455178260803223, "learning_rate": 8.251577659445944e-06, "loss": 0.5979, "step": 24792 }, { "epoch": 0.2964286995301235, "grad_norm": 2.348341464996338, "learning_rate": 8.251430573012541e-06, "loss": 0.5876, "step": 24793 }, { "epoch": 0.2964406556749842, "grad_norm": 6.098733425140381, "learning_rate": 8.251283481703609e-06, "loss": 0.5109, "step": 24794 }, { "epoch": 0.29645261181984484, "grad_norm": 2.0734400749206543, "learning_rate": 8.251136385519365e-06, "loss": 0.6267, "step": 24795 }, { "epoch": 0.29646456796470544, "grad_norm": 5.980037212371826, "learning_rate": 8.250989284460031e-06, "loss": 0.5959, "step": 24796 }, { "epoch": 0.2964765241095661, "grad_norm": 1.757036805152893, "learning_rate": 8.250842178525826e-06, "loss": 0.5631, "step": 24797 }, { "epoch": 0.29648848025442676, "grad_norm": 1.319217562675476, "learning_rate": 8.250695067716972e-06, "loss": 0.5208, "step": 24798 }, { "epoch": 0.2965004363992874, "grad_norm": 2.3400378227233887, "learning_rate": 8.25054795203369e-06, "loss": 0.6153, "step": 24799 }, { "epoch": 0.2965123925441481, "grad_norm": 5.2000813484191895, "learning_rate": 8.250400831476198e-06, "loss": 0.6655, "step": 24800 }, { "epoch": 0.29652434868900873, "grad_norm": 1.8352611064910889, "learning_rate": 8.250253706044719e-06, "loss": 0.6132, "step": 24801 }, { "epoch": 0.2965363048338694, "grad_norm": 1.4246326684951782, "learning_rate": 8.250106575739474e-06, "loss": 0.5739, "step": 24802 }, { "epoch": 0.29654826097873, "grad_norm": 1.9320068359375, "learning_rate": 8.249959440560682e-06, "loss": 0.552, "step": 24803 }, { "epoch": 0.29656021712359065, "grad_norm": 1.9309775829315186, "learning_rate": 8.249812300508563e-06, "loss": 0.5847, "step": 24804 }, { "epoch": 0.2965721732684513, "grad_norm": 5.826612949371338, "learning_rate": 8.249665155583341e-06, "loss": 0.6907, "step": 24805 }, { "epoch": 0.29658412941331197, "grad_norm": 2.9317381381988525, "learning_rate": 8.249518005785234e-06, "loss": 0.6317, "step": 24806 }, { "epoch": 0.29659608555817263, "grad_norm": 2.1457080841064453, "learning_rate": 8.249370851114463e-06, "loss": 0.5597, "step": 24807 }, { "epoch": 0.2966080417030333, "grad_norm": 9.491700172424316, "learning_rate": 8.24922369157125e-06, "loss": 0.6565, "step": 24808 }, { "epoch": 0.29661999784789395, "grad_norm": 1.786572813987732, "learning_rate": 8.249076527155813e-06, "loss": 0.6769, "step": 24809 }, { "epoch": 0.29663195399275455, "grad_norm": 2.738992929458618, "learning_rate": 8.248929357868374e-06, "loss": 0.6573, "step": 24810 }, { "epoch": 0.2966439101376152, "grad_norm": 2.444880723953247, "learning_rate": 8.248782183709155e-06, "loss": 0.5307, "step": 24811 }, { "epoch": 0.29665586628247587, "grad_norm": 3.0606658458709717, "learning_rate": 8.248635004678373e-06, "loss": 0.5887, "step": 24812 }, { "epoch": 0.2966678224273365, "grad_norm": 2.451788902282715, "learning_rate": 8.248487820776254e-06, "loss": 0.5297, "step": 24813 }, { "epoch": 0.2966797785721972, "grad_norm": 1.5042753219604492, "learning_rate": 8.248340632003015e-06, "loss": 0.5431, "step": 24814 }, { "epoch": 0.29669173471705784, "grad_norm": 1.9319806098937988, "learning_rate": 8.248193438358879e-06, "loss": 0.4852, "step": 24815 }, { "epoch": 0.2967036908619185, "grad_norm": 2.027107000350952, "learning_rate": 8.248046239844063e-06, "loss": 0.5982, "step": 24816 }, { "epoch": 0.29671564700677916, "grad_norm": 1.91325843334198, "learning_rate": 8.247899036458792e-06, "loss": 0.5928, "step": 24817 }, { "epoch": 0.29672760315163976, "grad_norm": 1.729780912399292, "learning_rate": 8.247751828203285e-06, "loss": 0.517, "step": 24818 }, { "epoch": 0.2967395592965004, "grad_norm": 2.1553144454956055, "learning_rate": 8.247604615077762e-06, "loss": 0.4936, "step": 24819 }, { "epoch": 0.2967515154413611, "grad_norm": 1.6467127799987793, "learning_rate": 8.247457397082443e-06, "loss": 0.5965, "step": 24820 }, { "epoch": 0.29676347158622174, "grad_norm": 2.8743252754211426, "learning_rate": 8.24731017421755e-06, "loss": 0.6109, "step": 24821 }, { "epoch": 0.2967754277310824, "grad_norm": 2.2892379760742188, "learning_rate": 8.247162946483306e-06, "loss": 0.5854, "step": 24822 }, { "epoch": 0.29678738387594306, "grad_norm": 1.9099584817886353, "learning_rate": 8.247015713879929e-06, "loss": 0.6981, "step": 24823 }, { "epoch": 0.2967993400208037, "grad_norm": 1.7850173711776733, "learning_rate": 8.246868476407642e-06, "loss": 0.6319, "step": 24824 }, { "epoch": 0.2968112961656643, "grad_norm": 2.2691516876220703, "learning_rate": 8.24672123406666e-06, "loss": 0.6041, "step": 24825 }, { "epoch": 0.296823252310525, "grad_norm": 1.8274016380310059, "learning_rate": 8.246573986857212e-06, "loss": 0.5233, "step": 24826 }, { "epoch": 0.29683520845538564, "grad_norm": 9.07558822631836, "learning_rate": 8.246426734779512e-06, "loss": 0.5684, "step": 24827 }, { "epoch": 0.2968471646002463, "grad_norm": 2.3162262439727783, "learning_rate": 8.246279477833785e-06, "loss": 0.599, "step": 24828 }, { "epoch": 0.29685912074510695, "grad_norm": 2.8784399032592773, "learning_rate": 8.24613221602025e-06, "loss": 0.6614, "step": 24829 }, { "epoch": 0.2968710768899676, "grad_norm": 8.848552703857422, "learning_rate": 8.245984949339128e-06, "loss": 0.5002, "step": 24830 }, { "epoch": 0.29688303303482827, "grad_norm": 2.0737314224243164, "learning_rate": 8.245837677790641e-06, "loss": 0.6795, "step": 24831 }, { "epoch": 0.29689498917968893, "grad_norm": 2.1365554332733154, "learning_rate": 8.245690401375008e-06, "loss": 0.5677, "step": 24832 }, { "epoch": 0.29690694532454953, "grad_norm": 1.8667376041412354, "learning_rate": 8.24554312009245e-06, "loss": 0.616, "step": 24833 }, { "epoch": 0.2969189014694102, "grad_norm": 2.6821341514587402, "learning_rate": 8.24539583394319e-06, "loss": 0.4742, "step": 24834 }, { "epoch": 0.29693085761427085, "grad_norm": 2.5798025131225586, "learning_rate": 8.245248542927446e-06, "loss": 0.6665, "step": 24835 }, { "epoch": 0.2969428137591315, "grad_norm": 3.065007448196411, "learning_rate": 8.245101247045442e-06, "loss": 0.6043, "step": 24836 }, { "epoch": 0.29695476990399217, "grad_norm": 2.5817818641662598, "learning_rate": 8.244953946297395e-06, "loss": 0.5616, "step": 24837 }, { "epoch": 0.2969667260488528, "grad_norm": 1.8932230472564697, "learning_rate": 8.24480664068353e-06, "loss": 0.6298, "step": 24838 }, { "epoch": 0.2969786821937135, "grad_norm": 1.5175381898880005, "learning_rate": 8.244659330204065e-06, "loss": 0.741, "step": 24839 }, { "epoch": 0.2969906383385741, "grad_norm": 2.0227746963500977, "learning_rate": 8.244512014859221e-06, "loss": 0.6075, "step": 24840 }, { "epoch": 0.29700259448343475, "grad_norm": 6.065584182739258, "learning_rate": 8.244364694649221e-06, "loss": 0.6387, "step": 24841 }, { "epoch": 0.2970145506282954, "grad_norm": 2.3400113582611084, "learning_rate": 8.244217369574285e-06, "loss": 0.6866, "step": 24842 }, { "epoch": 0.29702650677315606, "grad_norm": 2.0337634086608887, "learning_rate": 8.244070039634632e-06, "loss": 0.5966, "step": 24843 }, { "epoch": 0.2970384629180167, "grad_norm": 2.2854747772216797, "learning_rate": 8.243922704830485e-06, "loss": 0.581, "step": 24844 }, { "epoch": 0.2970504190628774, "grad_norm": 3.8597872257232666, "learning_rate": 8.243775365162064e-06, "loss": 0.6663, "step": 24845 }, { "epoch": 0.29706237520773804, "grad_norm": 2.2599904537200928, "learning_rate": 8.24362802062959e-06, "loss": 0.6404, "step": 24846 }, { "epoch": 0.29707433135259864, "grad_norm": 2.877537488937378, "learning_rate": 8.243480671233287e-06, "loss": 0.6704, "step": 24847 }, { "epoch": 0.2970862874974593, "grad_norm": 6.2641801834106445, "learning_rate": 8.243333316973371e-06, "loss": 0.6394, "step": 24848 }, { "epoch": 0.29709824364231996, "grad_norm": 5.722592830657959, "learning_rate": 8.243185957850066e-06, "loss": 0.6175, "step": 24849 }, { "epoch": 0.2971101997871806, "grad_norm": 1.4346321821212769, "learning_rate": 8.243038593863593e-06, "loss": 0.6059, "step": 24850 }, { "epoch": 0.2971221559320413, "grad_norm": 1.424948811531067, "learning_rate": 8.24289122501417e-06, "loss": 0.6713, "step": 24851 }, { "epoch": 0.29713411207690194, "grad_norm": 2.718322277069092, "learning_rate": 8.24274385130202e-06, "loss": 0.5161, "step": 24852 }, { "epoch": 0.2971460682217626, "grad_norm": 5.5728607177734375, "learning_rate": 8.242596472727364e-06, "loss": 0.5259, "step": 24853 }, { "epoch": 0.29715802436662325, "grad_norm": 35.890350341796875, "learning_rate": 8.242449089290424e-06, "loss": 0.6403, "step": 24854 }, { "epoch": 0.29716998051148386, "grad_norm": 1.7902356386184692, "learning_rate": 8.242301700991421e-06, "loss": 0.5528, "step": 24855 }, { "epoch": 0.2971819366563445, "grad_norm": 2.065046787261963, "learning_rate": 8.242154307830575e-06, "loss": 0.6686, "step": 24856 }, { "epoch": 0.2971938928012052, "grad_norm": 1.9620977640151978, "learning_rate": 8.242006909808104e-06, "loss": 0.5893, "step": 24857 }, { "epoch": 0.29720584894606583, "grad_norm": 2.271423101425171, "learning_rate": 8.241859506924234e-06, "loss": 0.6845, "step": 24858 }, { "epoch": 0.2972178050909265, "grad_norm": 1.9544330835342407, "learning_rate": 8.241712099179187e-06, "loss": 0.5644, "step": 24859 }, { "epoch": 0.29722976123578715, "grad_norm": 17.93254280090332, "learning_rate": 8.241564686573178e-06, "loss": 0.5352, "step": 24860 }, { "epoch": 0.2972417173806478, "grad_norm": 1.68105149269104, "learning_rate": 8.241417269106432e-06, "loss": 0.7091, "step": 24861 }, { "epoch": 0.2972536735255084, "grad_norm": 2.2361786365509033, "learning_rate": 8.241269846779168e-06, "loss": 0.5141, "step": 24862 }, { "epoch": 0.29726562967036907, "grad_norm": 3.4821720123291016, "learning_rate": 8.24112241959161e-06, "loss": 0.5675, "step": 24863 }, { "epoch": 0.29727758581522973, "grad_norm": 2.239240884780884, "learning_rate": 8.240974987543977e-06, "loss": 0.5459, "step": 24864 }, { "epoch": 0.2972895419600904, "grad_norm": 2.0381431579589844, "learning_rate": 8.24082755063649e-06, "loss": 0.6195, "step": 24865 }, { "epoch": 0.29730149810495105, "grad_norm": 3.4300878047943115, "learning_rate": 8.240680108869371e-06, "loss": 0.672, "step": 24866 }, { "epoch": 0.2973134542498117, "grad_norm": 3.4834280014038086, "learning_rate": 8.240532662242841e-06, "loss": 0.6233, "step": 24867 }, { "epoch": 0.29732541039467236, "grad_norm": 9.130109786987305, "learning_rate": 8.24038521075712e-06, "loss": 0.6753, "step": 24868 }, { "epoch": 0.29733736653953297, "grad_norm": 2.1636600494384766, "learning_rate": 8.24023775441243e-06, "loss": 0.5881, "step": 24869 }, { "epoch": 0.2973493226843936, "grad_norm": 3.2815072536468506, "learning_rate": 8.240090293208991e-06, "loss": 0.624, "step": 24870 }, { "epoch": 0.2973612788292543, "grad_norm": 1.6304336786270142, "learning_rate": 8.239942827147026e-06, "loss": 0.5857, "step": 24871 }, { "epoch": 0.29737323497411494, "grad_norm": 1.9755009412765503, "learning_rate": 8.239795356226757e-06, "loss": 0.5717, "step": 24872 }, { "epoch": 0.2973851911189756, "grad_norm": 6.4588117599487305, "learning_rate": 8.239647880448402e-06, "loss": 0.6538, "step": 24873 }, { "epoch": 0.29739714726383626, "grad_norm": 4.228722095489502, "learning_rate": 8.239500399812183e-06, "loss": 0.533, "step": 24874 }, { "epoch": 0.2974091034086969, "grad_norm": 3.9900753498077393, "learning_rate": 8.239352914318322e-06, "loss": 0.6383, "step": 24875 }, { "epoch": 0.2974210595535576, "grad_norm": 1.7438682317733765, "learning_rate": 8.23920542396704e-06, "loss": 0.5166, "step": 24876 }, { "epoch": 0.2974330156984182, "grad_norm": 2.334459066390991, "learning_rate": 8.239057928758556e-06, "loss": 0.58, "step": 24877 }, { "epoch": 0.29744497184327884, "grad_norm": 2.0926623344421387, "learning_rate": 8.238910428693096e-06, "loss": 0.6255, "step": 24878 }, { "epoch": 0.2974569279881395, "grad_norm": 1.7258970737457275, "learning_rate": 8.238762923770877e-06, "loss": 0.7001, "step": 24879 }, { "epoch": 0.29746888413300016, "grad_norm": 1.7249053716659546, "learning_rate": 8.238615413992122e-06, "loss": 0.5973, "step": 24880 }, { "epoch": 0.2974808402778608, "grad_norm": 1.8877654075622559, "learning_rate": 8.238467899357051e-06, "loss": 0.5755, "step": 24881 }, { "epoch": 0.2974927964227215, "grad_norm": 9.094133377075195, "learning_rate": 8.238320379865888e-06, "loss": 0.732, "step": 24882 }, { "epoch": 0.29750475256758213, "grad_norm": 3.8322277069091797, "learning_rate": 8.238172855518849e-06, "loss": 0.5987, "step": 24883 }, { "epoch": 0.29751670871244273, "grad_norm": 3.316786527633667, "learning_rate": 8.23802532631616e-06, "loss": 0.6034, "step": 24884 }, { "epoch": 0.2975286648573034, "grad_norm": 2.339690685272217, "learning_rate": 8.237877792258039e-06, "loss": 0.5577, "step": 24885 }, { "epoch": 0.29754062100216405, "grad_norm": 3.845712184906006, "learning_rate": 8.23773025334471e-06, "loss": 0.7196, "step": 24886 }, { "epoch": 0.2975525771470247, "grad_norm": 5.070055961608887, "learning_rate": 8.237582709576393e-06, "loss": 0.6666, "step": 24887 }, { "epoch": 0.29756453329188537, "grad_norm": 1.7998307943344116, "learning_rate": 8.237435160953308e-06, "loss": 0.5916, "step": 24888 }, { "epoch": 0.29757648943674603, "grad_norm": 2.3518292903900146, "learning_rate": 8.23728760747568e-06, "loss": 0.5467, "step": 24889 }, { "epoch": 0.2975884455816067, "grad_norm": 1.9925490617752075, "learning_rate": 8.237140049143726e-06, "loss": 0.5574, "step": 24890 }, { "epoch": 0.29760040172646735, "grad_norm": 34.93111801147461, "learning_rate": 8.23699248595767e-06, "loss": 0.6794, "step": 24891 }, { "epoch": 0.29761235787132795, "grad_norm": 1.9009348154067993, "learning_rate": 8.23684491791773e-06, "loss": 0.6012, "step": 24892 }, { "epoch": 0.2976243140161886, "grad_norm": 1.7509138584136963, "learning_rate": 8.236697345024131e-06, "loss": 0.5878, "step": 24893 }, { "epoch": 0.29763627016104927, "grad_norm": 2.454695224761963, "learning_rate": 8.236549767277093e-06, "loss": 0.5832, "step": 24894 }, { "epoch": 0.2976482263059099, "grad_norm": 3.3527936935424805, "learning_rate": 8.236402184676837e-06, "loss": 0.5635, "step": 24895 }, { "epoch": 0.2976601824507706, "grad_norm": 2.4306793212890625, "learning_rate": 8.236254597223583e-06, "loss": 0.54, "step": 24896 }, { "epoch": 0.29767213859563124, "grad_norm": 1.6011056900024414, "learning_rate": 8.236107004917556e-06, "loss": 0.564, "step": 24897 }, { "epoch": 0.2976840947404919, "grad_norm": 2.6149160861968994, "learning_rate": 8.235959407758974e-06, "loss": 0.6325, "step": 24898 }, { "epoch": 0.2976960508853525, "grad_norm": 1.6434882879257202, "learning_rate": 8.23581180574806e-06, "loss": 0.6157, "step": 24899 }, { "epoch": 0.29770800703021316, "grad_norm": 1.6149507761001587, "learning_rate": 8.235664198885033e-06, "loss": 0.4871, "step": 24900 }, { "epoch": 0.2977199631750738, "grad_norm": 1.5433694124221802, "learning_rate": 8.235516587170118e-06, "loss": 0.5608, "step": 24901 }, { "epoch": 0.2977319193199345, "grad_norm": 2.4556972980499268, "learning_rate": 8.235368970603533e-06, "loss": 0.6019, "step": 24902 }, { "epoch": 0.29774387546479514, "grad_norm": 1.9128283262252808, "learning_rate": 8.235221349185502e-06, "loss": 0.6692, "step": 24903 }, { "epoch": 0.2977558316096558, "grad_norm": 1.59526789188385, "learning_rate": 8.235073722916245e-06, "loss": 0.6188, "step": 24904 }, { "epoch": 0.29776778775451646, "grad_norm": 1.9743807315826416, "learning_rate": 8.234926091795984e-06, "loss": 0.7017, "step": 24905 }, { "epoch": 0.29777974389937706, "grad_norm": 1.6611384153366089, "learning_rate": 8.234778455824937e-06, "loss": 0.5776, "step": 24906 }, { "epoch": 0.2977917000442377, "grad_norm": 2.425039052963257, "learning_rate": 8.23463081500333e-06, "loss": 0.5885, "step": 24907 }, { "epoch": 0.2978036561890984, "grad_norm": 3.4390573501586914, "learning_rate": 8.234483169331382e-06, "loss": 0.5592, "step": 24908 }, { "epoch": 0.29781561233395903, "grad_norm": 3.4681496620178223, "learning_rate": 8.234335518809316e-06, "loss": 0.5588, "step": 24909 }, { "epoch": 0.2978275684788197, "grad_norm": 7.4666972160339355, "learning_rate": 8.23418786343735e-06, "loss": 0.6581, "step": 24910 }, { "epoch": 0.29783952462368035, "grad_norm": 4.408160209655762, "learning_rate": 8.23404020321571e-06, "loss": 0.6114, "step": 24911 }, { "epoch": 0.297851480768541, "grad_norm": 1.3462445735931396, "learning_rate": 8.233892538144616e-06, "loss": 0.6336, "step": 24912 }, { "epoch": 0.29786343691340167, "grad_norm": 1.9018574953079224, "learning_rate": 8.233744868224288e-06, "loss": 0.6311, "step": 24913 }, { "epoch": 0.29787539305826227, "grad_norm": 1.7427337169647217, "learning_rate": 8.233597193454948e-06, "loss": 0.5762, "step": 24914 }, { "epoch": 0.29788734920312293, "grad_norm": 1.6566706895828247, "learning_rate": 8.233449513836815e-06, "loss": 0.6397, "step": 24915 }, { "epoch": 0.2978993053479836, "grad_norm": 2.5998361110687256, "learning_rate": 8.233301829370115e-06, "loss": 0.5835, "step": 24916 }, { "epoch": 0.29791126149284425, "grad_norm": 1.9292302131652832, "learning_rate": 8.233154140055068e-06, "loss": 0.6233, "step": 24917 }, { "epoch": 0.2979232176377049, "grad_norm": 2.0811901092529297, "learning_rate": 8.233006445891895e-06, "loss": 0.6001, "step": 24918 }, { "epoch": 0.29793517378256557, "grad_norm": 5.323301792144775, "learning_rate": 8.232858746880816e-06, "loss": 0.631, "step": 24919 }, { "epoch": 0.2979471299274262, "grad_norm": 2.572584629058838, "learning_rate": 8.232711043022055e-06, "loss": 0.5204, "step": 24920 }, { "epoch": 0.2979590860722868, "grad_norm": 1.8489124774932861, "learning_rate": 8.232563334315832e-06, "loss": 0.7039, "step": 24921 }, { "epoch": 0.2979710422171475, "grad_norm": 40.77748107910156, "learning_rate": 8.232415620762368e-06, "loss": 0.5486, "step": 24922 }, { "epoch": 0.29798299836200814, "grad_norm": 2.8432068824768066, "learning_rate": 8.232267902361887e-06, "loss": 0.5737, "step": 24923 }, { "epoch": 0.2979949545068688, "grad_norm": 2.6298606395721436, "learning_rate": 8.232120179114607e-06, "loss": 0.5979, "step": 24924 }, { "epoch": 0.29800691065172946, "grad_norm": 3.13322377204895, "learning_rate": 8.231972451020752e-06, "loss": 0.7026, "step": 24925 }, { "epoch": 0.2980188667965901, "grad_norm": 2.3136675357818604, "learning_rate": 8.231824718080543e-06, "loss": 0.5578, "step": 24926 }, { "epoch": 0.2980308229414508, "grad_norm": 2.4664876461029053, "learning_rate": 8.2316769802942e-06, "loss": 0.678, "step": 24927 }, { "epoch": 0.2980427790863114, "grad_norm": 2.0297529697418213, "learning_rate": 8.231529237661948e-06, "loss": 0.5725, "step": 24928 }, { "epoch": 0.29805473523117204, "grad_norm": 1.8722954988479614, "learning_rate": 8.231381490184004e-06, "loss": 0.6829, "step": 24929 }, { "epoch": 0.2980666913760327, "grad_norm": 3.1184921264648438, "learning_rate": 8.231233737860596e-06, "loss": 0.602, "step": 24930 }, { "epoch": 0.29807864752089336, "grad_norm": 2.5064144134521484, "learning_rate": 8.231085980691938e-06, "loss": 0.703, "step": 24931 }, { "epoch": 0.298090603665754, "grad_norm": 1.981520652770996, "learning_rate": 8.230938218678255e-06, "loss": 0.6106, "step": 24932 }, { "epoch": 0.2981025598106147, "grad_norm": 2.7556354999542236, "learning_rate": 8.23079045181977e-06, "loss": 0.5854, "step": 24933 }, { "epoch": 0.29811451595547533, "grad_norm": 2.071049213409424, "learning_rate": 8.230642680116703e-06, "loss": 0.6116, "step": 24934 }, { "epoch": 0.298126472100336, "grad_norm": 2.2811203002929688, "learning_rate": 8.230494903569276e-06, "loss": 0.5865, "step": 24935 }, { "epoch": 0.2981384282451966, "grad_norm": 2.7650537490844727, "learning_rate": 8.230347122177711e-06, "loss": 0.6133, "step": 24936 }, { "epoch": 0.29815038439005725, "grad_norm": 2.4993362426757812, "learning_rate": 8.230199335942229e-06, "loss": 0.6577, "step": 24937 }, { "epoch": 0.2981623405349179, "grad_norm": 3.144970178604126, "learning_rate": 8.23005154486305e-06, "loss": 0.6485, "step": 24938 }, { "epoch": 0.29817429667977857, "grad_norm": 1.7238560914993286, "learning_rate": 8.229903748940398e-06, "loss": 0.5006, "step": 24939 }, { "epoch": 0.29818625282463923, "grad_norm": 1.843572735786438, "learning_rate": 8.229755948174494e-06, "loss": 0.5414, "step": 24940 }, { "epoch": 0.2981982089694999, "grad_norm": 1.6985092163085938, "learning_rate": 8.229608142565559e-06, "loss": 0.5929, "step": 24941 }, { "epoch": 0.29821016511436055, "grad_norm": 4.057965278625488, "learning_rate": 8.229460332113814e-06, "loss": 0.628, "step": 24942 }, { "epoch": 0.29822212125922115, "grad_norm": 1.5694082975387573, "learning_rate": 8.229312516819483e-06, "loss": 0.6331, "step": 24943 }, { "epoch": 0.2982340774040818, "grad_norm": 1.7283633947372437, "learning_rate": 8.229164696682787e-06, "loss": 0.6203, "step": 24944 }, { "epoch": 0.29824603354894247, "grad_norm": 1.670101284980774, "learning_rate": 8.229016871703948e-06, "loss": 0.6929, "step": 24945 }, { "epoch": 0.2982579896938031, "grad_norm": 1.6497786045074463, "learning_rate": 8.228869041883186e-06, "loss": 0.5395, "step": 24946 }, { "epoch": 0.2982699458386638, "grad_norm": 1.825966715812683, "learning_rate": 8.22872120722072e-06, "loss": 0.5674, "step": 24947 }, { "epoch": 0.29828190198352444, "grad_norm": 1.6621015071868896, "learning_rate": 8.22857336771678e-06, "loss": 0.6495, "step": 24948 }, { "epoch": 0.2982938581283851, "grad_norm": 1.5726587772369385, "learning_rate": 8.22842552337158e-06, "loss": 0.5536, "step": 24949 }, { "epoch": 0.29830581427324576, "grad_norm": 2.1494646072387695, "learning_rate": 8.228277674185345e-06, "loss": 0.6699, "step": 24950 }, { "epoch": 0.29831777041810636, "grad_norm": 64.03427124023438, "learning_rate": 8.228129820158297e-06, "loss": 0.5653, "step": 24951 }, { "epoch": 0.298329726562967, "grad_norm": 1.9566177129745483, "learning_rate": 8.227981961290654e-06, "loss": 0.6112, "step": 24952 }, { "epoch": 0.2983416827078277, "grad_norm": 18.992815017700195, "learning_rate": 8.227834097582643e-06, "loss": 0.6244, "step": 24953 }, { "epoch": 0.29835363885268834, "grad_norm": 3.0783350467681885, "learning_rate": 8.227686229034484e-06, "loss": 0.6953, "step": 24954 }, { "epoch": 0.298365594997549, "grad_norm": 3.365748643875122, "learning_rate": 8.227538355646398e-06, "loss": 0.5849, "step": 24955 }, { "epoch": 0.29837755114240966, "grad_norm": 3.421250343322754, "learning_rate": 8.227390477418604e-06, "loss": 0.6484, "step": 24956 }, { "epoch": 0.2983895072872703, "grad_norm": 1.9850807189941406, "learning_rate": 8.22724259435133e-06, "loss": 0.5557, "step": 24957 }, { "epoch": 0.2984014634321309, "grad_norm": 2.1887643337249756, "learning_rate": 8.227094706444792e-06, "loss": 0.6029, "step": 24958 }, { "epoch": 0.2984134195769916, "grad_norm": 3.6089932918548584, "learning_rate": 8.226946813699212e-06, "loss": 0.6403, "step": 24959 }, { "epoch": 0.29842537572185224, "grad_norm": 1.7945148944854736, "learning_rate": 8.226798916114818e-06, "loss": 0.5957, "step": 24960 }, { "epoch": 0.2984373318667129, "grad_norm": 1.5782763957977295, "learning_rate": 8.226651013691826e-06, "loss": 0.6016, "step": 24961 }, { "epoch": 0.29844928801157355, "grad_norm": 1.60214102268219, "learning_rate": 8.226503106430459e-06, "loss": 0.5767, "step": 24962 }, { "epoch": 0.2984612441564342, "grad_norm": 2.614490270614624, "learning_rate": 8.226355194330939e-06, "loss": 0.5625, "step": 24963 }, { "epoch": 0.29847320030129487, "grad_norm": 9.739962577819824, "learning_rate": 8.226207277393488e-06, "loss": 0.6141, "step": 24964 }, { "epoch": 0.2984851564461555, "grad_norm": 2.1235766410827637, "learning_rate": 8.226059355618328e-06, "loss": 0.5897, "step": 24965 }, { "epoch": 0.29849711259101613, "grad_norm": 2.6242713928222656, "learning_rate": 8.225911429005681e-06, "loss": 0.6782, "step": 24966 }, { "epoch": 0.2985090687358768, "grad_norm": 4.534177303314209, "learning_rate": 8.225763497555768e-06, "loss": 0.5652, "step": 24967 }, { "epoch": 0.29852102488073745, "grad_norm": 2.5518360137939453, "learning_rate": 8.22561556126881e-06, "loss": 0.6199, "step": 24968 }, { "epoch": 0.2985329810255981, "grad_norm": 5.312769412994385, "learning_rate": 8.22546762014503e-06, "loss": 0.6621, "step": 24969 }, { "epoch": 0.29854493717045877, "grad_norm": 11.975263595581055, "learning_rate": 8.225319674184654e-06, "loss": 0.5998, "step": 24970 }, { "epoch": 0.2985568933153194, "grad_norm": 3.4934215545654297, "learning_rate": 8.225171723387895e-06, "loss": 0.5882, "step": 24971 }, { "epoch": 0.2985688494601801, "grad_norm": 1.7283046245574951, "learning_rate": 8.225023767754981e-06, "loss": 0.6019, "step": 24972 }, { "epoch": 0.2985808056050407, "grad_norm": 4.178959846496582, "learning_rate": 8.224875807286133e-06, "loss": 0.6221, "step": 24973 }, { "epoch": 0.29859276174990135, "grad_norm": 2.5085606575012207, "learning_rate": 8.224727841981571e-06, "loss": 0.5731, "step": 24974 }, { "epoch": 0.298604717894762, "grad_norm": 3.7252745628356934, "learning_rate": 8.224579871841519e-06, "loss": 0.6816, "step": 24975 }, { "epoch": 0.29861667403962266, "grad_norm": 18.67259979248047, "learning_rate": 8.224431896866198e-06, "loss": 0.6708, "step": 24976 }, { "epoch": 0.2986286301844833, "grad_norm": 1.8380955457687378, "learning_rate": 8.224283917055828e-06, "loss": 0.5518, "step": 24977 }, { "epoch": 0.298640586329344, "grad_norm": 3.154642105102539, "learning_rate": 8.224135932410637e-06, "loss": 0.6064, "step": 24978 }, { "epoch": 0.29865254247420464, "grad_norm": 3.8139569759368896, "learning_rate": 8.22398794293084e-06, "loss": 0.6191, "step": 24979 }, { "epoch": 0.29866449861906524, "grad_norm": 2.032310962677002, "learning_rate": 8.22383994861666e-06, "loss": 0.5857, "step": 24980 }, { "epoch": 0.2986764547639259, "grad_norm": 1.654450535774231, "learning_rate": 8.223691949468324e-06, "loss": 0.6465, "step": 24981 }, { "epoch": 0.29868841090878656, "grad_norm": 1.3965909481048584, "learning_rate": 8.22354394548605e-06, "loss": 0.606, "step": 24982 }, { "epoch": 0.2987003670536472, "grad_norm": 1.9059114456176758, "learning_rate": 8.223395936670058e-06, "loss": 0.5693, "step": 24983 }, { "epoch": 0.2987123231985079, "grad_norm": 3.9617161750793457, "learning_rate": 8.223247923020574e-06, "loss": 0.5847, "step": 24984 }, { "epoch": 0.29872427934336854, "grad_norm": 3.9886293411254883, "learning_rate": 8.223099904537817e-06, "loss": 0.6722, "step": 24985 }, { "epoch": 0.2987362354882292, "grad_norm": 1.755619764328003, "learning_rate": 8.222951881222013e-06, "loss": 0.6311, "step": 24986 }, { "epoch": 0.2987481916330898, "grad_norm": 1.7403552532196045, "learning_rate": 8.222803853073378e-06, "loss": 0.6228, "step": 24987 }, { "epoch": 0.29876014777795046, "grad_norm": 2.067564010620117, "learning_rate": 8.22265582009214e-06, "loss": 0.6644, "step": 24988 }, { "epoch": 0.2987721039228111, "grad_norm": 2.201460599899292, "learning_rate": 8.222507782278516e-06, "loss": 0.6012, "step": 24989 }, { "epoch": 0.2987840600676718, "grad_norm": 1.4716473817825317, "learning_rate": 8.222359739632731e-06, "loss": 0.615, "step": 24990 }, { "epoch": 0.29879601621253243, "grad_norm": 1.9747644662857056, "learning_rate": 8.222211692155005e-06, "loss": 0.5908, "step": 24991 }, { "epoch": 0.2988079723573931, "grad_norm": 1.4884763956069946, "learning_rate": 8.222063639845562e-06, "loss": 0.6024, "step": 24992 }, { "epoch": 0.29881992850225375, "grad_norm": 1.8339935541152954, "learning_rate": 8.221915582704623e-06, "loss": 0.6347, "step": 24993 }, { "epoch": 0.2988318846471144, "grad_norm": 1.5034093856811523, "learning_rate": 8.221767520732411e-06, "loss": 0.5211, "step": 24994 }, { "epoch": 0.298843840791975, "grad_norm": 2.527660369873047, "learning_rate": 8.221619453929146e-06, "loss": 0.5673, "step": 24995 }, { "epoch": 0.29885579693683567, "grad_norm": 6.48853063583374, "learning_rate": 8.221471382295053e-06, "loss": 0.6172, "step": 24996 }, { "epoch": 0.29886775308169633, "grad_norm": 3.89968204498291, "learning_rate": 8.22132330583035e-06, "loss": 0.695, "step": 24997 }, { "epoch": 0.298879709226557, "grad_norm": 1.6188747882843018, "learning_rate": 8.221175224535261e-06, "loss": 0.5758, "step": 24998 }, { "epoch": 0.29889166537141765, "grad_norm": 1.8595484495162964, "learning_rate": 8.221027138410009e-06, "loss": 0.6288, "step": 24999 }, { "epoch": 0.2989036215162783, "grad_norm": 1.789007544517517, "learning_rate": 8.220879047454816e-06, "loss": 0.6341, "step": 25000 }, { "epoch": 0.29891557766113896, "grad_norm": 3.178844928741455, "learning_rate": 8.220730951669905e-06, "loss": 0.6835, "step": 25001 }, { "epoch": 0.29892753380599957, "grad_norm": 1.5584838390350342, "learning_rate": 8.220582851055494e-06, "loss": 0.5358, "step": 25002 }, { "epoch": 0.2989394899508602, "grad_norm": 3.149195671081543, "learning_rate": 8.220434745611807e-06, "loss": 0.7142, "step": 25003 }, { "epoch": 0.2989514460957209, "grad_norm": 2.1718740463256836, "learning_rate": 8.220286635339068e-06, "loss": 0.5579, "step": 25004 }, { "epoch": 0.29896340224058154, "grad_norm": 3.5512988567352295, "learning_rate": 8.220138520237499e-06, "loss": 0.64, "step": 25005 }, { "epoch": 0.2989753583854422, "grad_norm": 1.822256088256836, "learning_rate": 8.219990400307319e-06, "loss": 0.5721, "step": 25006 }, { "epoch": 0.29898731453030286, "grad_norm": 1.8944638967514038, "learning_rate": 8.219842275548751e-06, "loss": 0.5602, "step": 25007 }, { "epoch": 0.2989992706751635, "grad_norm": 15.636730194091797, "learning_rate": 8.21969414596202e-06, "loss": 0.5455, "step": 25008 }, { "epoch": 0.2990112268200242, "grad_norm": 1.4004746675491333, "learning_rate": 8.219546011547345e-06, "loss": 0.5146, "step": 25009 }, { "epoch": 0.2990231829648848, "grad_norm": 2.5447440147399902, "learning_rate": 8.219397872304951e-06, "loss": 0.6277, "step": 25010 }, { "epoch": 0.29903513910974544, "grad_norm": 3.161618232727051, "learning_rate": 8.219249728235057e-06, "loss": 0.6174, "step": 25011 }, { "epoch": 0.2990470952546061, "grad_norm": 2.675859212875366, "learning_rate": 8.219101579337886e-06, "loss": 0.654, "step": 25012 }, { "epoch": 0.29905905139946676, "grad_norm": 3.8459527492523193, "learning_rate": 8.218953425613663e-06, "loss": 0.6674, "step": 25013 }, { "epoch": 0.2990710075443274, "grad_norm": 1.603344440460205, "learning_rate": 8.218805267062607e-06, "loss": 0.6025, "step": 25014 }, { "epoch": 0.2990829636891881, "grad_norm": 4.660642147064209, "learning_rate": 8.218657103684941e-06, "loss": 0.5695, "step": 25015 }, { "epoch": 0.29909491983404873, "grad_norm": 1.8146647214889526, "learning_rate": 8.218508935480886e-06, "loss": 0.7441, "step": 25016 }, { "epoch": 0.29910687597890934, "grad_norm": 1.7681831121444702, "learning_rate": 8.218360762450666e-06, "loss": 0.4944, "step": 25017 }, { "epoch": 0.29911883212377, "grad_norm": 1.8490573167800903, "learning_rate": 8.218212584594503e-06, "loss": 0.5887, "step": 25018 }, { "epoch": 0.29913078826863065, "grad_norm": 1.9253053665161133, "learning_rate": 8.21806440191262e-06, "loss": 0.6786, "step": 25019 }, { "epoch": 0.2991427444134913, "grad_norm": 3.1993868350982666, "learning_rate": 8.217916214405235e-06, "loss": 0.6929, "step": 25020 }, { "epoch": 0.29915470055835197, "grad_norm": 1.708939552307129, "learning_rate": 8.217768022072576e-06, "loss": 0.6308, "step": 25021 }, { "epoch": 0.29916665670321263, "grad_norm": 1.754491925239563, "learning_rate": 8.217619824914861e-06, "loss": 0.6803, "step": 25022 }, { "epoch": 0.2991786128480733, "grad_norm": 22.71828269958496, "learning_rate": 8.217471622932313e-06, "loss": 0.6012, "step": 25023 }, { "epoch": 0.2991905689929339, "grad_norm": 2.1113381385803223, "learning_rate": 8.217323416125157e-06, "loss": 0.6383, "step": 25024 }, { "epoch": 0.29920252513779455, "grad_norm": 1.400194525718689, "learning_rate": 8.217175204493613e-06, "loss": 0.5052, "step": 25025 }, { "epoch": 0.2992144812826552, "grad_norm": 1.5299417972564697, "learning_rate": 8.2170269880379e-06, "loss": 0.6192, "step": 25026 }, { "epoch": 0.29922643742751587, "grad_norm": 3.3724467754364014, "learning_rate": 8.216878766758246e-06, "loss": 0.5572, "step": 25027 }, { "epoch": 0.2992383935723765, "grad_norm": 1.483829379081726, "learning_rate": 8.216730540654872e-06, "loss": 0.6068, "step": 25028 }, { "epoch": 0.2992503497172372, "grad_norm": 1.749128818511963, "learning_rate": 8.216582309727998e-06, "loss": 0.5969, "step": 25029 }, { "epoch": 0.29926230586209784, "grad_norm": 2.7342369556427, "learning_rate": 8.216434073977846e-06, "loss": 0.5305, "step": 25030 }, { "epoch": 0.2992742620069585, "grad_norm": 3.07466459274292, "learning_rate": 8.216285833404641e-06, "loss": 0.5849, "step": 25031 }, { "epoch": 0.2992862181518191, "grad_norm": 2.5764665603637695, "learning_rate": 8.216137588008604e-06, "loss": 0.589, "step": 25032 }, { "epoch": 0.29929817429667976, "grad_norm": 1.6189039945602417, "learning_rate": 8.215989337789959e-06, "loss": 0.537, "step": 25033 }, { "epoch": 0.2993101304415404, "grad_norm": 1.9901955127716064, "learning_rate": 8.215841082748923e-06, "loss": 0.6287, "step": 25034 }, { "epoch": 0.2993220865864011, "grad_norm": 1.7843188047409058, "learning_rate": 8.215692822885723e-06, "loss": 0.5804, "step": 25035 }, { "epoch": 0.29933404273126174, "grad_norm": 2.1212141513824463, "learning_rate": 8.21554455820058e-06, "loss": 0.6219, "step": 25036 }, { "epoch": 0.2993459988761224, "grad_norm": 1.8260571956634521, "learning_rate": 8.21539628869372e-06, "loss": 0.6773, "step": 25037 }, { "epoch": 0.29935795502098306, "grad_norm": 1.4468634128570557, "learning_rate": 8.215248014365357e-06, "loss": 0.5755, "step": 25038 }, { "epoch": 0.29936991116584366, "grad_norm": 1.9234968423843384, "learning_rate": 8.21509973521572e-06, "loss": 0.6938, "step": 25039 }, { "epoch": 0.2993818673107043, "grad_norm": 1.8619436025619507, "learning_rate": 8.21495145124503e-06, "loss": 0.5782, "step": 25040 }, { "epoch": 0.299393823455565, "grad_norm": 1.7423242330551147, "learning_rate": 8.214803162453509e-06, "loss": 0.6286, "step": 25041 }, { "epoch": 0.29940577960042564, "grad_norm": 2.745246171951294, "learning_rate": 8.214654868841378e-06, "loss": 0.6784, "step": 25042 }, { "epoch": 0.2994177357452863, "grad_norm": 3.8728713989257812, "learning_rate": 8.214506570408862e-06, "loss": 0.6025, "step": 25043 }, { "epoch": 0.29942969189014695, "grad_norm": 2.739229917526245, "learning_rate": 8.214358267156181e-06, "loss": 0.5259, "step": 25044 }, { "epoch": 0.2994416480350076, "grad_norm": 4.495220184326172, "learning_rate": 8.21420995908356e-06, "loss": 0.5929, "step": 25045 }, { "epoch": 0.2994536041798682, "grad_norm": 2.852895498275757, "learning_rate": 8.214061646191218e-06, "loss": 0.5565, "step": 25046 }, { "epoch": 0.2994655603247289, "grad_norm": 6.09945011138916, "learning_rate": 8.21391332847938e-06, "loss": 0.5533, "step": 25047 }, { "epoch": 0.29947751646958953, "grad_norm": 2.1729447841644287, "learning_rate": 8.213765005948267e-06, "loss": 0.5864, "step": 25048 }, { "epoch": 0.2994894726144502, "grad_norm": 2.9984121322631836, "learning_rate": 8.213616678598102e-06, "loss": 0.6052, "step": 25049 }, { "epoch": 0.29950142875931085, "grad_norm": 1.8098169565200806, "learning_rate": 8.213468346429107e-06, "loss": 0.6169, "step": 25050 }, { "epoch": 0.2995133849041715, "grad_norm": 2.3953921794891357, "learning_rate": 8.213320009441505e-06, "loss": 0.7024, "step": 25051 }, { "epoch": 0.29952534104903217, "grad_norm": 1.8711214065551758, "learning_rate": 8.21317166763552e-06, "loss": 0.6281, "step": 25052 }, { "epoch": 0.2995372971938928, "grad_norm": 5.111044406890869, "learning_rate": 8.21302332101137e-06, "loss": 0.6742, "step": 25053 }, { "epoch": 0.29954925333875343, "grad_norm": 2.9926722049713135, "learning_rate": 8.212874969569281e-06, "loss": 0.5633, "step": 25054 }, { "epoch": 0.2995612094836141, "grad_norm": 2.1082091331481934, "learning_rate": 8.212726613309477e-06, "loss": 0.532, "step": 25055 }, { "epoch": 0.29957316562847475, "grad_norm": 1.4187556505203247, "learning_rate": 8.212578252232175e-06, "loss": 0.5521, "step": 25056 }, { "epoch": 0.2995851217733354, "grad_norm": 2.0514678955078125, "learning_rate": 8.2124298863376e-06, "loss": 0.6979, "step": 25057 }, { "epoch": 0.29959707791819606, "grad_norm": 1.7546665668487549, "learning_rate": 8.212281515625978e-06, "loss": 0.6178, "step": 25058 }, { "epoch": 0.2996090340630567, "grad_norm": 4.594765663146973, "learning_rate": 8.212133140097527e-06, "loss": 0.5443, "step": 25059 }, { "epoch": 0.2996209902079174, "grad_norm": 1.9242316484451294, "learning_rate": 8.21198475975247e-06, "loss": 0.6322, "step": 25060 }, { "epoch": 0.299632946352778, "grad_norm": 2.0151357650756836, "learning_rate": 8.21183637459103e-06, "loss": 0.6741, "step": 25061 }, { "epoch": 0.29964490249763864, "grad_norm": 1.9918397665023804, "learning_rate": 8.211687984613432e-06, "loss": 0.603, "step": 25062 }, { "epoch": 0.2996568586424993, "grad_norm": 2.857438325881958, "learning_rate": 8.211539589819896e-06, "loss": 0.5255, "step": 25063 }, { "epoch": 0.29966881478735996, "grad_norm": 1.936690330505371, "learning_rate": 8.211391190210645e-06, "loss": 0.578, "step": 25064 }, { "epoch": 0.2996807709322206, "grad_norm": 3.1089868545532227, "learning_rate": 8.2112427857859e-06, "loss": 0.5418, "step": 25065 }, { "epoch": 0.2996927270770813, "grad_norm": 6.928170680999756, "learning_rate": 8.211094376545886e-06, "loss": 0.6457, "step": 25066 }, { "epoch": 0.29970468322194194, "grad_norm": 6.5810933113098145, "learning_rate": 8.210945962490825e-06, "loss": 0.6567, "step": 25067 }, { "epoch": 0.2997166393668026, "grad_norm": 1.4803225994110107, "learning_rate": 8.210797543620939e-06, "loss": 0.5623, "step": 25068 }, { "epoch": 0.2997285955116632, "grad_norm": 3.9693779945373535, "learning_rate": 8.21064911993645e-06, "loss": 0.5605, "step": 25069 }, { "epoch": 0.29974055165652386, "grad_norm": 2.928971529006958, "learning_rate": 8.210500691437581e-06, "loss": 0.6973, "step": 25070 }, { "epoch": 0.2997525078013845, "grad_norm": 1.2319087982177734, "learning_rate": 8.210352258124557e-06, "loss": 0.5663, "step": 25071 }, { "epoch": 0.2997644639462452, "grad_norm": 1.7752474546432495, "learning_rate": 8.210203819997595e-06, "loss": 0.5925, "step": 25072 }, { "epoch": 0.29977642009110583, "grad_norm": 3.8108103275299072, "learning_rate": 8.210055377056923e-06, "loss": 0.5654, "step": 25073 }, { "epoch": 0.2997883762359665, "grad_norm": 1.78573477268219, "learning_rate": 8.209906929302763e-06, "loss": 0.6774, "step": 25074 }, { "epoch": 0.29980033238082715, "grad_norm": 1.7044281959533691, "learning_rate": 8.209758476735334e-06, "loss": 0.5585, "step": 25075 }, { "epoch": 0.29981228852568775, "grad_norm": 2.9517710208892822, "learning_rate": 8.209610019354861e-06, "loss": 0.5763, "step": 25076 }, { "epoch": 0.2998242446705484, "grad_norm": 3.1236531734466553, "learning_rate": 8.209461557161566e-06, "loss": 0.5764, "step": 25077 }, { "epoch": 0.29983620081540907, "grad_norm": 1.5988763570785522, "learning_rate": 8.209313090155673e-06, "loss": 0.5929, "step": 25078 }, { "epoch": 0.29984815696026973, "grad_norm": 2.491116762161255, "learning_rate": 8.209164618337405e-06, "loss": 0.7136, "step": 25079 }, { "epoch": 0.2998601131051304, "grad_norm": 1.6741297245025635, "learning_rate": 8.20901614170698e-06, "loss": 0.6217, "step": 25080 }, { "epoch": 0.29987206924999105, "grad_norm": 1.896405816078186, "learning_rate": 8.208867660264625e-06, "loss": 0.6309, "step": 25081 }, { "epoch": 0.2998840253948517, "grad_norm": 1.6131622791290283, "learning_rate": 8.208719174010563e-06, "loss": 0.6033, "step": 25082 }, { "epoch": 0.2998959815397123, "grad_norm": 3.1709883213043213, "learning_rate": 8.208570682945014e-06, "loss": 0.574, "step": 25083 }, { "epoch": 0.29990793768457297, "grad_norm": 3.1826891899108887, "learning_rate": 8.208422187068202e-06, "loss": 0.6468, "step": 25084 }, { "epoch": 0.2999198938294336, "grad_norm": 3.252401113510132, "learning_rate": 8.208273686380349e-06, "loss": 0.6063, "step": 25085 }, { "epoch": 0.2999318499742943, "grad_norm": 2.2031466960906982, "learning_rate": 8.208125180881677e-06, "loss": 0.6764, "step": 25086 }, { "epoch": 0.29994380611915494, "grad_norm": 4.727572917938232, "learning_rate": 8.207976670572412e-06, "loss": 0.6141, "step": 25087 }, { "epoch": 0.2999557622640156, "grad_norm": 3.9701220989227295, "learning_rate": 8.207828155452775e-06, "loss": 0.6726, "step": 25088 }, { "epoch": 0.29996771840887626, "grad_norm": 1.663486123085022, "learning_rate": 8.207679635522988e-06, "loss": 0.6442, "step": 25089 }, { "epoch": 0.2999796745537369, "grad_norm": 3.8781216144561768, "learning_rate": 8.207531110783272e-06, "loss": 0.5749, "step": 25090 }, { "epoch": 0.2999916306985975, "grad_norm": 1.9648991823196411, "learning_rate": 8.207382581233854e-06, "loss": 0.6528, "step": 25091 }, { "epoch": 0.3000035868434582, "grad_norm": 1.5689257383346558, "learning_rate": 8.207234046874954e-06, "loss": 0.6017, "step": 25092 }, { "epoch": 0.30001554298831884, "grad_norm": 3.304257869720459, "learning_rate": 8.207085507706794e-06, "loss": 0.5733, "step": 25093 }, { "epoch": 0.3000274991331795, "grad_norm": 2.8003387451171875, "learning_rate": 8.206936963729597e-06, "loss": 0.5413, "step": 25094 }, { "epoch": 0.30003945527804016, "grad_norm": 2.35561466217041, "learning_rate": 8.20678841494359e-06, "loss": 0.5853, "step": 25095 }, { "epoch": 0.3000514114229008, "grad_norm": 12.733757972717285, "learning_rate": 8.20663986134899e-06, "loss": 0.6123, "step": 25096 }, { "epoch": 0.3000633675677615, "grad_norm": 1.3698006868362427, "learning_rate": 8.206491302946023e-06, "loss": 0.5574, "step": 25097 }, { "epoch": 0.3000753237126221, "grad_norm": 2.1315760612487793, "learning_rate": 8.20634273973491e-06, "loss": 0.6794, "step": 25098 }, { "epoch": 0.30008727985748274, "grad_norm": 1.3617931604385376, "learning_rate": 8.206194171715875e-06, "loss": 0.5626, "step": 25099 }, { "epoch": 0.3000992360023434, "grad_norm": 1.253740668296814, "learning_rate": 8.20604559888914e-06, "loss": 0.5694, "step": 25100 }, { "epoch": 0.30011119214720405, "grad_norm": 1.860268235206604, "learning_rate": 8.205897021254929e-06, "loss": 0.5155, "step": 25101 }, { "epoch": 0.3001231482920647, "grad_norm": 2.631646156311035, "learning_rate": 8.205748438813465e-06, "loss": 0.5968, "step": 25102 }, { "epoch": 0.30013510443692537, "grad_norm": 2.341911554336548, "learning_rate": 8.205599851564969e-06, "loss": 0.5244, "step": 25103 }, { "epoch": 0.30014706058178603, "grad_norm": 4.824429512023926, "learning_rate": 8.205451259509664e-06, "loss": 0.5306, "step": 25104 }, { "epoch": 0.30015901672664663, "grad_norm": 1.765604019165039, "learning_rate": 8.205302662647774e-06, "loss": 0.668, "step": 25105 }, { "epoch": 0.3001709728715073, "grad_norm": 2.625675916671753, "learning_rate": 8.205154060979521e-06, "loss": 0.6377, "step": 25106 }, { "epoch": 0.30018292901636795, "grad_norm": 3.650725841522217, "learning_rate": 8.205005454505129e-06, "loss": 0.6476, "step": 25107 }, { "epoch": 0.3001948851612286, "grad_norm": 2.581116199493408, "learning_rate": 8.204856843224817e-06, "loss": 0.673, "step": 25108 }, { "epoch": 0.30020684130608927, "grad_norm": 2.3073434829711914, "learning_rate": 8.204708227138813e-06, "loss": 0.5418, "step": 25109 }, { "epoch": 0.3002187974509499, "grad_norm": 2.3211212158203125, "learning_rate": 8.204559606247339e-06, "loss": 0.6019, "step": 25110 }, { "epoch": 0.3002307535958106, "grad_norm": 1.7130799293518066, "learning_rate": 8.204410980550615e-06, "loss": 0.5732, "step": 25111 }, { "epoch": 0.30024270974067124, "grad_norm": 2.5610568523406982, "learning_rate": 8.204262350048864e-06, "loss": 0.6006, "step": 25112 }, { "epoch": 0.30025466588553185, "grad_norm": 2.2250399589538574, "learning_rate": 8.204113714742312e-06, "loss": 0.5935, "step": 25113 }, { "epoch": 0.3002666220303925, "grad_norm": 1.408679485321045, "learning_rate": 8.20396507463118e-06, "loss": 0.5343, "step": 25114 }, { "epoch": 0.30027857817525316, "grad_norm": 2.4497852325439453, "learning_rate": 8.20381642971569e-06, "loss": 0.6621, "step": 25115 }, { "epoch": 0.3002905343201138, "grad_norm": 1.5922991037368774, "learning_rate": 8.203667779996067e-06, "loss": 0.6331, "step": 25116 }, { "epoch": 0.3003024904649745, "grad_norm": 1.6163198947906494, "learning_rate": 8.203519125472533e-06, "loss": 0.5418, "step": 25117 }, { "epoch": 0.30031444660983514, "grad_norm": 2.888319253921509, "learning_rate": 8.203370466145309e-06, "loss": 0.7255, "step": 25118 }, { "epoch": 0.3003264027546958, "grad_norm": 2.4275333881378174, "learning_rate": 8.20322180201462e-06, "loss": 0.6539, "step": 25119 }, { "epoch": 0.3003383588995564, "grad_norm": 1.6786643266677856, "learning_rate": 8.20307313308069e-06, "loss": 0.5381, "step": 25120 }, { "epoch": 0.30035031504441706, "grad_norm": 3.0508172512054443, "learning_rate": 8.20292445934374e-06, "loss": 0.5119, "step": 25121 }, { "epoch": 0.3003622711892777, "grad_norm": 2.370521068572998, "learning_rate": 8.202775780803992e-06, "loss": 0.5563, "step": 25122 }, { "epoch": 0.3003742273341384, "grad_norm": 1.977545142173767, "learning_rate": 8.202627097461671e-06, "loss": 0.652, "step": 25123 }, { "epoch": 0.30038618347899904, "grad_norm": 2.4282004833221436, "learning_rate": 8.202478409317e-06, "loss": 0.6255, "step": 25124 }, { "epoch": 0.3003981396238597, "grad_norm": 2.004666805267334, "learning_rate": 8.202329716370201e-06, "loss": 0.618, "step": 25125 }, { "epoch": 0.30041009576872035, "grad_norm": 2.5413365364074707, "learning_rate": 8.202181018621496e-06, "loss": 0.6498, "step": 25126 }, { "epoch": 0.300422051913581, "grad_norm": 2.2208359241485596, "learning_rate": 8.20203231607111e-06, "loss": 0.594, "step": 25127 }, { "epoch": 0.3004340080584416, "grad_norm": 3.875378131866455, "learning_rate": 8.201883608719266e-06, "loss": 0.5208, "step": 25128 }, { "epoch": 0.3004459642033023, "grad_norm": 2.6264119148254395, "learning_rate": 8.201734896566185e-06, "loss": 0.5353, "step": 25129 }, { "epoch": 0.30045792034816293, "grad_norm": 2.2375354766845703, "learning_rate": 8.201586179612091e-06, "loss": 0.6307, "step": 25130 }, { "epoch": 0.3004698764930236, "grad_norm": 1.312644362449646, "learning_rate": 8.201437457857209e-06, "loss": 0.6087, "step": 25131 }, { "epoch": 0.30048183263788425, "grad_norm": 3.519249439239502, "learning_rate": 8.201288731301758e-06, "loss": 0.6107, "step": 25132 }, { "epoch": 0.3004937887827449, "grad_norm": 1.467281460762024, "learning_rate": 8.201139999945964e-06, "loss": 0.6273, "step": 25133 }, { "epoch": 0.30050574492760557, "grad_norm": 1.806762456893921, "learning_rate": 8.20099126379005e-06, "loss": 0.6592, "step": 25134 }, { "epoch": 0.30051770107246617, "grad_norm": 2.113006353378296, "learning_rate": 8.200842522834237e-06, "loss": 0.6738, "step": 25135 }, { "epoch": 0.30052965721732683, "grad_norm": 2.0333189964294434, "learning_rate": 8.20069377707875e-06, "loss": 0.6871, "step": 25136 }, { "epoch": 0.3005416133621875, "grad_norm": 1.56195867061615, "learning_rate": 8.20054502652381e-06, "loss": 0.6463, "step": 25137 }, { "epoch": 0.30055356950704815, "grad_norm": 2.084625244140625, "learning_rate": 8.200396271169644e-06, "loss": 0.5688, "step": 25138 }, { "epoch": 0.3005655256519088, "grad_norm": 4.016580104827881, "learning_rate": 8.20024751101647e-06, "loss": 0.5864, "step": 25139 }, { "epoch": 0.30057748179676946, "grad_norm": 3.7332048416137695, "learning_rate": 8.200098746064515e-06, "loss": 0.5714, "step": 25140 }, { "epoch": 0.3005894379416301, "grad_norm": 1.506320595741272, "learning_rate": 8.199949976313999e-06, "loss": 0.5807, "step": 25141 }, { "epoch": 0.3006013940864907, "grad_norm": 1.6628668308258057, "learning_rate": 8.199801201765147e-06, "loss": 0.6124, "step": 25142 }, { "epoch": 0.3006133502313514, "grad_norm": 1.5014028549194336, "learning_rate": 8.199652422418183e-06, "loss": 0.5865, "step": 25143 }, { "epoch": 0.30062530637621204, "grad_norm": 2.0535266399383545, "learning_rate": 8.199503638273327e-06, "loss": 0.7069, "step": 25144 }, { "epoch": 0.3006372625210727, "grad_norm": 2.3084864616394043, "learning_rate": 8.199354849330804e-06, "loss": 0.5713, "step": 25145 }, { "epoch": 0.30064921866593336, "grad_norm": 2.022081136703491, "learning_rate": 8.19920605559084e-06, "loss": 0.5809, "step": 25146 }, { "epoch": 0.300661174810794, "grad_norm": 1.3367063999176025, "learning_rate": 8.199057257053654e-06, "loss": 0.5878, "step": 25147 }, { "epoch": 0.3006731309556547, "grad_norm": 2.306776523590088, "learning_rate": 8.198908453719469e-06, "loss": 0.5862, "step": 25148 }, { "epoch": 0.30068508710051534, "grad_norm": 1.6252084970474243, "learning_rate": 8.198759645588509e-06, "loss": 0.593, "step": 25149 }, { "epoch": 0.30069704324537594, "grad_norm": 3.0927894115448, "learning_rate": 8.198610832660997e-06, "loss": 0.5673, "step": 25150 }, { "epoch": 0.3007089993902366, "grad_norm": 1.7685916423797607, "learning_rate": 8.198462014937159e-06, "loss": 0.65, "step": 25151 }, { "epoch": 0.30072095553509726, "grad_norm": 1.9704904556274414, "learning_rate": 8.198313192417215e-06, "loss": 0.6526, "step": 25152 }, { "epoch": 0.3007329116799579, "grad_norm": 2.754415273666382, "learning_rate": 8.198164365101389e-06, "loss": 0.5646, "step": 25153 }, { "epoch": 0.3007448678248186, "grad_norm": 5.206949234008789, "learning_rate": 8.198015532989905e-06, "loss": 0.6963, "step": 25154 }, { "epoch": 0.30075682396967923, "grad_norm": 1.6359612941741943, "learning_rate": 8.197866696082984e-06, "loss": 0.6265, "step": 25155 }, { "epoch": 0.3007687801145399, "grad_norm": 1.7360111474990845, "learning_rate": 8.19771785438085e-06, "loss": 0.6072, "step": 25156 }, { "epoch": 0.3007807362594005, "grad_norm": 1.6693867444992065, "learning_rate": 8.197569007883728e-06, "loss": 0.6317, "step": 25157 }, { "epoch": 0.30079269240426115, "grad_norm": 2.453639030456543, "learning_rate": 8.19742015659184e-06, "loss": 0.6686, "step": 25158 }, { "epoch": 0.3008046485491218, "grad_norm": 1.4891207218170166, "learning_rate": 8.197271300505408e-06, "loss": 0.615, "step": 25159 }, { "epoch": 0.30081660469398247, "grad_norm": 1.3337715864181519, "learning_rate": 8.197122439624657e-06, "loss": 0.5692, "step": 25160 }, { "epoch": 0.30082856083884313, "grad_norm": 10.25890827178955, "learning_rate": 8.196973573949809e-06, "loss": 0.5645, "step": 25161 }, { "epoch": 0.3008405169837038, "grad_norm": 1.9603358507156372, "learning_rate": 8.196824703481088e-06, "loss": 0.526, "step": 25162 }, { "epoch": 0.30085247312856445, "grad_norm": 1.8943426609039307, "learning_rate": 8.196675828218718e-06, "loss": 0.5532, "step": 25163 }, { "epoch": 0.30086442927342505, "grad_norm": 2.634188175201416, "learning_rate": 8.19652694816292e-06, "loss": 0.6595, "step": 25164 }, { "epoch": 0.3008763854182857, "grad_norm": 2.549421548843384, "learning_rate": 8.196378063313919e-06, "loss": 0.5955, "step": 25165 }, { "epoch": 0.30088834156314637, "grad_norm": 2.8605008125305176, "learning_rate": 8.196229173671938e-06, "loss": 0.6285, "step": 25166 }, { "epoch": 0.300900297708007, "grad_norm": 1.7234388589859009, "learning_rate": 8.1960802792372e-06, "loss": 0.5932, "step": 25167 }, { "epoch": 0.3009122538528677, "grad_norm": 2.2263944149017334, "learning_rate": 8.195931380009928e-06, "loss": 0.6088, "step": 25168 }, { "epoch": 0.30092420999772834, "grad_norm": 1.6176294088363647, "learning_rate": 8.195782475990345e-06, "loss": 0.6152, "step": 25169 }, { "epoch": 0.300936166142589, "grad_norm": 1.4039758443832397, "learning_rate": 8.195633567178676e-06, "loss": 0.5102, "step": 25170 }, { "epoch": 0.30094812228744966, "grad_norm": 2.223768949508667, "learning_rate": 8.195484653575143e-06, "loss": 0.5972, "step": 25171 }, { "epoch": 0.30096007843231026, "grad_norm": 1.6894536018371582, "learning_rate": 8.195335735179968e-06, "loss": 0.6039, "step": 25172 }, { "epoch": 0.3009720345771709, "grad_norm": 2.09096097946167, "learning_rate": 8.195186811993377e-06, "loss": 0.7026, "step": 25173 }, { "epoch": 0.3009839907220316, "grad_norm": 2.606229305267334, "learning_rate": 8.195037884015591e-06, "loss": 0.5768, "step": 25174 }, { "epoch": 0.30099594686689224, "grad_norm": 3.026242733001709, "learning_rate": 8.194888951246835e-06, "loss": 0.6116, "step": 25175 }, { "epoch": 0.3010079030117529, "grad_norm": 1.659914255142212, "learning_rate": 8.19474001368733e-06, "loss": 0.6242, "step": 25176 }, { "epoch": 0.30101985915661356, "grad_norm": 1.5967954397201538, "learning_rate": 8.194591071337303e-06, "loss": 0.5944, "step": 25177 }, { "epoch": 0.3010318153014742, "grad_norm": 2.166590452194214, "learning_rate": 8.194442124196976e-06, "loss": 0.6519, "step": 25178 }, { "epoch": 0.3010437714463348, "grad_norm": 1.4612582921981812, "learning_rate": 8.19429317226657e-06, "loss": 0.6435, "step": 25179 }, { "epoch": 0.3010557275911955, "grad_norm": 4.061615467071533, "learning_rate": 8.194144215546311e-06, "loss": 0.5523, "step": 25180 }, { "epoch": 0.30106768373605614, "grad_norm": 1.7057533264160156, "learning_rate": 8.19399525403642e-06, "loss": 0.6228, "step": 25181 }, { "epoch": 0.3010796398809168, "grad_norm": 1.8783167600631714, "learning_rate": 8.193846287737123e-06, "loss": 0.608, "step": 25182 }, { "epoch": 0.30109159602577745, "grad_norm": 2.5122830867767334, "learning_rate": 8.193697316648642e-06, "loss": 0.6053, "step": 25183 }, { "epoch": 0.3011035521706381, "grad_norm": 3.651705265045166, "learning_rate": 8.1935483407712e-06, "loss": 0.6171, "step": 25184 }, { "epoch": 0.30111550831549877, "grad_norm": 2.949796199798584, "learning_rate": 8.193399360105021e-06, "loss": 0.6333, "step": 25185 }, { "epoch": 0.30112746446035943, "grad_norm": 1.991458773612976, "learning_rate": 8.19325037465033e-06, "loss": 0.6059, "step": 25186 }, { "epoch": 0.30113942060522003, "grad_norm": 2.1651649475097656, "learning_rate": 8.193101384407345e-06, "loss": 0.6609, "step": 25187 }, { "epoch": 0.3011513767500807, "grad_norm": 2.2056620121002197, "learning_rate": 8.192952389376297e-06, "loss": 0.6664, "step": 25188 }, { "epoch": 0.30116333289494135, "grad_norm": 1.7622941732406616, "learning_rate": 8.192803389557404e-06, "loss": 0.5751, "step": 25189 }, { "epoch": 0.301175289039802, "grad_norm": 2.298823356628418, "learning_rate": 8.19265438495089e-06, "loss": 0.6457, "step": 25190 }, { "epoch": 0.30118724518466267, "grad_norm": 2.366790294647217, "learning_rate": 8.19250537555698e-06, "loss": 0.6248, "step": 25191 }, { "epoch": 0.3011992013295233, "grad_norm": 4.67565393447876, "learning_rate": 8.1923563613759e-06, "loss": 0.5418, "step": 25192 }, { "epoch": 0.301211157474384, "grad_norm": 2.5876049995422363, "learning_rate": 8.192207342407865e-06, "loss": 0.6132, "step": 25193 }, { "epoch": 0.3012231136192446, "grad_norm": 1.8683868646621704, "learning_rate": 8.192058318653106e-06, "loss": 0.5658, "step": 25194 }, { "epoch": 0.30123506976410525, "grad_norm": 1.5883064270019531, "learning_rate": 8.191909290111846e-06, "loss": 0.5755, "step": 25195 }, { "epoch": 0.3012470259089659, "grad_norm": 1.6549493074417114, "learning_rate": 8.191760256784305e-06, "loss": 0.589, "step": 25196 }, { "epoch": 0.30125898205382656, "grad_norm": 6.507879257202148, "learning_rate": 8.191611218670708e-06, "loss": 0.5968, "step": 25197 }, { "epoch": 0.3012709381986872, "grad_norm": 1.4807077646255493, "learning_rate": 8.191462175771279e-06, "loss": 0.5577, "step": 25198 }, { "epoch": 0.3012828943435479, "grad_norm": 3.8657824993133545, "learning_rate": 8.19131312808624e-06, "loss": 0.6267, "step": 25199 }, { "epoch": 0.30129485048840854, "grad_norm": 2.6136975288391113, "learning_rate": 8.191164075615817e-06, "loss": 0.6024, "step": 25200 }, { "epoch": 0.30130680663326914, "grad_norm": 2.4353315830230713, "learning_rate": 8.191015018360231e-06, "loss": 0.6261, "step": 25201 }, { "epoch": 0.3013187627781298, "grad_norm": 1.8091646432876587, "learning_rate": 8.190865956319709e-06, "loss": 0.5576, "step": 25202 }, { "epoch": 0.30133071892299046, "grad_norm": 1.4378021955490112, "learning_rate": 8.19071688949447e-06, "loss": 0.6048, "step": 25203 }, { "epoch": 0.3013426750678511, "grad_norm": 1.8454383611679077, "learning_rate": 8.19056781788474e-06, "loss": 0.658, "step": 25204 }, { "epoch": 0.3013546312127118, "grad_norm": 1.7363280057907104, "learning_rate": 8.190418741490743e-06, "loss": 0.6163, "step": 25205 }, { "epoch": 0.30136658735757244, "grad_norm": 1.8946220874786377, "learning_rate": 8.190269660312701e-06, "loss": 0.551, "step": 25206 }, { "epoch": 0.3013785435024331, "grad_norm": 2.211764335632324, "learning_rate": 8.190120574350839e-06, "loss": 0.5559, "step": 25207 }, { "epoch": 0.30139049964729375, "grad_norm": 1.6191354990005493, "learning_rate": 8.189971483605379e-06, "loss": 0.5084, "step": 25208 }, { "epoch": 0.30140245579215436, "grad_norm": 2.7167744636535645, "learning_rate": 8.189822388076545e-06, "loss": 0.5899, "step": 25209 }, { "epoch": 0.301414411937015, "grad_norm": 5.571324825286865, "learning_rate": 8.189673287764561e-06, "loss": 0.4854, "step": 25210 }, { "epoch": 0.3014263680818757, "grad_norm": 2.2579424381256104, "learning_rate": 8.189524182669652e-06, "loss": 0.7094, "step": 25211 }, { "epoch": 0.30143832422673633, "grad_norm": 2.1891276836395264, "learning_rate": 8.189375072792041e-06, "loss": 0.671, "step": 25212 }, { "epoch": 0.301450280371597, "grad_norm": 1.767013669013977, "learning_rate": 8.189225958131948e-06, "loss": 0.5494, "step": 25213 }, { "epoch": 0.30146223651645765, "grad_norm": 2.0971946716308594, "learning_rate": 8.1890768386896e-06, "loss": 0.5291, "step": 25214 }, { "epoch": 0.3014741926613183, "grad_norm": 1.4536967277526855, "learning_rate": 8.188927714465223e-06, "loss": 0.574, "step": 25215 }, { "epoch": 0.3014861488061789, "grad_norm": 1.5946824550628662, "learning_rate": 8.188778585459034e-06, "loss": 0.5771, "step": 25216 }, { "epoch": 0.30149810495103957, "grad_norm": 2.608853340148926, "learning_rate": 8.188629451671262e-06, "loss": 0.6104, "step": 25217 }, { "epoch": 0.30151006109590023, "grad_norm": 1.6056638956069946, "learning_rate": 8.188480313102127e-06, "loss": 0.6326, "step": 25218 }, { "epoch": 0.3015220172407609, "grad_norm": 1.6755847930908203, "learning_rate": 8.188331169751856e-06, "loss": 0.5382, "step": 25219 }, { "epoch": 0.30153397338562155, "grad_norm": 1.9734017848968506, "learning_rate": 8.18818202162067e-06, "loss": 0.5656, "step": 25220 }, { "epoch": 0.3015459295304822, "grad_norm": 3.570714235305786, "learning_rate": 8.188032868708796e-06, "loss": 0.5555, "step": 25221 }, { "epoch": 0.30155788567534286, "grad_norm": 1.4380488395690918, "learning_rate": 8.187883711016451e-06, "loss": 0.5733, "step": 25222 }, { "epoch": 0.30156984182020347, "grad_norm": 1.612324833869934, "learning_rate": 8.187734548543867e-06, "loss": 0.5341, "step": 25223 }, { "epoch": 0.3015817979650641, "grad_norm": 1.7966371774673462, "learning_rate": 8.187585381291263e-06, "loss": 0.5555, "step": 25224 }, { "epoch": 0.3015937541099248, "grad_norm": 2.5070626735687256, "learning_rate": 8.187436209258862e-06, "loss": 0.6, "step": 25225 }, { "epoch": 0.30160571025478544, "grad_norm": 1.4009764194488525, "learning_rate": 8.18728703244689e-06, "loss": 0.5842, "step": 25226 }, { "epoch": 0.3016176663996461, "grad_norm": 3.363086700439453, "learning_rate": 8.187137850855569e-06, "loss": 0.6283, "step": 25227 }, { "epoch": 0.30162962254450676, "grad_norm": 1.5866377353668213, "learning_rate": 8.186988664485125e-06, "loss": 0.6115, "step": 25228 }, { "epoch": 0.3016415786893674, "grad_norm": 3.3394479751586914, "learning_rate": 8.18683947333578e-06, "loss": 0.5417, "step": 25229 }, { "epoch": 0.3016535348342281, "grad_norm": 2.286558151245117, "learning_rate": 8.186690277407756e-06, "loss": 0.6, "step": 25230 }, { "epoch": 0.3016654909790887, "grad_norm": 2.116894245147705, "learning_rate": 8.18654107670128e-06, "loss": 0.6423, "step": 25231 }, { "epoch": 0.30167744712394934, "grad_norm": 1.4283093214035034, "learning_rate": 8.186391871216574e-06, "loss": 0.6439, "step": 25232 }, { "epoch": 0.30168940326881, "grad_norm": 1.9306279420852661, "learning_rate": 8.186242660953862e-06, "loss": 0.5494, "step": 25233 }, { "epoch": 0.30170135941367066, "grad_norm": 2.105504274368286, "learning_rate": 8.186093445913368e-06, "loss": 0.5703, "step": 25234 }, { "epoch": 0.3017133155585313, "grad_norm": 1.6589938402175903, "learning_rate": 8.185944226095314e-06, "loss": 0.5168, "step": 25235 }, { "epoch": 0.301725271703392, "grad_norm": 3.2794201374053955, "learning_rate": 8.185795001499928e-06, "loss": 0.6442, "step": 25236 }, { "epoch": 0.30173722784825263, "grad_norm": 3.097942352294922, "learning_rate": 8.185645772127431e-06, "loss": 0.5965, "step": 25237 }, { "epoch": 0.30174918399311323, "grad_norm": 1.4067528247833252, "learning_rate": 8.185496537978045e-06, "loss": 0.497, "step": 25238 }, { "epoch": 0.3017611401379739, "grad_norm": 3.0373215675354004, "learning_rate": 8.185347299051995e-06, "loss": 0.5606, "step": 25239 }, { "epoch": 0.30177309628283455, "grad_norm": 1.7642229795455933, "learning_rate": 8.185198055349507e-06, "loss": 0.6778, "step": 25240 }, { "epoch": 0.3017850524276952, "grad_norm": 1.5333982706069946, "learning_rate": 8.185048806870804e-06, "loss": 0.5985, "step": 25241 }, { "epoch": 0.30179700857255587, "grad_norm": 1.2488603591918945, "learning_rate": 8.184899553616109e-06, "loss": 0.5493, "step": 25242 }, { "epoch": 0.30180896471741653, "grad_norm": 1.387372612953186, "learning_rate": 8.184750295585643e-06, "loss": 0.5999, "step": 25243 }, { "epoch": 0.3018209208622772, "grad_norm": 1.8530341386795044, "learning_rate": 8.184601032779635e-06, "loss": 0.5497, "step": 25244 }, { "epoch": 0.30183287700713785, "grad_norm": 1.7321481704711914, "learning_rate": 8.184451765198306e-06, "loss": 0.6139, "step": 25245 }, { "epoch": 0.30184483315199845, "grad_norm": 2.1611900329589844, "learning_rate": 8.18430249284188e-06, "loss": 0.4862, "step": 25246 }, { "epoch": 0.3018567892968591, "grad_norm": 1.4219958782196045, "learning_rate": 8.184153215710582e-06, "loss": 0.6329, "step": 25247 }, { "epoch": 0.30186874544171977, "grad_norm": 1.9955741167068481, "learning_rate": 8.184003933804634e-06, "loss": 0.684, "step": 25248 }, { "epoch": 0.3018807015865804, "grad_norm": 2.165198802947998, "learning_rate": 8.183854647124261e-06, "loss": 0.6094, "step": 25249 }, { "epoch": 0.3018926577314411, "grad_norm": 2.923112630844116, "learning_rate": 8.183705355669685e-06, "loss": 0.5872, "step": 25250 }, { "epoch": 0.30190461387630174, "grad_norm": 1.5442781448364258, "learning_rate": 8.183556059441134e-06, "loss": 0.6683, "step": 25251 }, { "epoch": 0.3019165700211624, "grad_norm": 1.9571620225906372, "learning_rate": 8.183406758438827e-06, "loss": 0.7273, "step": 25252 }, { "epoch": 0.301928526166023, "grad_norm": 2.7744555473327637, "learning_rate": 8.183257452662993e-06, "loss": 0.5632, "step": 25253 }, { "epoch": 0.30194048231088366, "grad_norm": 1.4167606830596924, "learning_rate": 8.183108142113851e-06, "loss": 0.6165, "step": 25254 }, { "epoch": 0.3019524384557443, "grad_norm": 1.6454992294311523, "learning_rate": 8.182958826791627e-06, "loss": 0.5439, "step": 25255 }, { "epoch": 0.301964394600605, "grad_norm": 1.688928246498108, "learning_rate": 8.182809506696548e-06, "loss": 0.6023, "step": 25256 }, { "epoch": 0.30197635074546564, "grad_norm": 1.3676882982254028, "learning_rate": 8.182660181828832e-06, "loss": 0.6686, "step": 25257 }, { "epoch": 0.3019883068903263, "grad_norm": 1.450378179550171, "learning_rate": 8.182510852188705e-06, "loss": 0.6362, "step": 25258 }, { "epoch": 0.30200026303518696, "grad_norm": 1.535310983657837, "learning_rate": 8.182361517776394e-06, "loss": 0.5505, "step": 25259 }, { "epoch": 0.30201221918004756, "grad_norm": 2.0785536766052246, "learning_rate": 8.18221217859212e-06, "loss": 0.4655, "step": 25260 }, { "epoch": 0.3020241753249082, "grad_norm": 1.4858464002609253, "learning_rate": 8.182062834636107e-06, "loss": 0.5263, "step": 25261 }, { "epoch": 0.3020361314697689, "grad_norm": 1.9456959962844849, "learning_rate": 8.18191348590858e-06, "loss": 0.625, "step": 25262 }, { "epoch": 0.30204808761462953, "grad_norm": 1.5268715620040894, "learning_rate": 8.18176413240976e-06, "loss": 0.6396, "step": 25263 }, { "epoch": 0.3020600437594902, "grad_norm": 1.9283897876739502, "learning_rate": 8.181614774139877e-06, "loss": 0.7101, "step": 25264 }, { "epoch": 0.30207199990435085, "grad_norm": 1.9194022417068481, "learning_rate": 8.18146541109915e-06, "loss": 0.5604, "step": 25265 }, { "epoch": 0.3020839560492115, "grad_norm": 1.425345540046692, "learning_rate": 8.181316043287803e-06, "loss": 0.549, "step": 25266 }, { "epoch": 0.30209591219407217, "grad_norm": 1.1914676427841187, "learning_rate": 8.181166670706062e-06, "loss": 0.5228, "step": 25267 }, { "epoch": 0.3021078683389328, "grad_norm": 6.3543524742126465, "learning_rate": 8.18101729335415e-06, "loss": 0.6647, "step": 25268 }, { "epoch": 0.30211982448379343, "grad_norm": 3.1253395080566406, "learning_rate": 8.180867911232293e-06, "loss": 0.6246, "step": 25269 }, { "epoch": 0.3021317806286541, "grad_norm": 1.6724951267242432, "learning_rate": 8.180718524340712e-06, "loss": 0.6692, "step": 25270 }, { "epoch": 0.30214373677351475, "grad_norm": 2.735635280609131, "learning_rate": 8.180569132679632e-06, "loss": 0.6589, "step": 25271 }, { "epoch": 0.3021556929183754, "grad_norm": 1.663983702659607, "learning_rate": 8.180419736249278e-06, "loss": 0.6598, "step": 25272 }, { "epoch": 0.30216764906323607, "grad_norm": 1.470733880996704, "learning_rate": 8.180270335049872e-06, "loss": 0.6885, "step": 25273 }, { "epoch": 0.3021796052080967, "grad_norm": 1.7204129695892334, "learning_rate": 8.18012092908164e-06, "loss": 0.5959, "step": 25274 }, { "epoch": 0.3021915613529573, "grad_norm": 1.5016605854034424, "learning_rate": 8.179971518344807e-06, "loss": 0.6254, "step": 25275 }, { "epoch": 0.302203517497818, "grad_norm": 1.953586220741272, "learning_rate": 8.179822102839593e-06, "loss": 0.6183, "step": 25276 }, { "epoch": 0.30221547364267864, "grad_norm": 1.4287195205688477, "learning_rate": 8.179672682566226e-06, "loss": 0.5776, "step": 25277 }, { "epoch": 0.3022274297875393, "grad_norm": 6.586002349853516, "learning_rate": 8.179523257524928e-06, "loss": 0.5748, "step": 25278 }, { "epoch": 0.30223938593239996, "grad_norm": 1.9402798414230347, "learning_rate": 8.179373827715923e-06, "loss": 0.5364, "step": 25279 }, { "epoch": 0.3022513420772606, "grad_norm": 2.022529363632202, "learning_rate": 8.179224393139436e-06, "loss": 0.574, "step": 25280 }, { "epoch": 0.3022632982221213, "grad_norm": 15.181520462036133, "learning_rate": 8.179074953795692e-06, "loss": 0.6385, "step": 25281 }, { "epoch": 0.30227525436698194, "grad_norm": 4.329978942871094, "learning_rate": 8.178925509684912e-06, "loss": 0.6085, "step": 25282 }, { "epoch": 0.30228721051184254, "grad_norm": 1.8097532987594604, "learning_rate": 8.178776060807323e-06, "loss": 0.6844, "step": 25283 }, { "epoch": 0.3022991666567032, "grad_norm": 2.4485316276550293, "learning_rate": 8.178626607163147e-06, "loss": 0.6287, "step": 25284 }, { "epoch": 0.30231112280156386, "grad_norm": 3.4337167739868164, "learning_rate": 8.178477148752611e-06, "loss": 0.5132, "step": 25285 }, { "epoch": 0.3023230789464245, "grad_norm": 2.855468273162842, "learning_rate": 8.178327685575935e-06, "loss": 0.5751, "step": 25286 }, { "epoch": 0.3023350350912852, "grad_norm": 2.324044942855835, "learning_rate": 8.178178217633346e-06, "loss": 0.5833, "step": 25287 }, { "epoch": 0.30234699123614583, "grad_norm": 1.8939489126205444, "learning_rate": 8.178028744925069e-06, "loss": 0.7227, "step": 25288 }, { "epoch": 0.3023589473810065, "grad_norm": 1.560637354850769, "learning_rate": 8.177879267451325e-06, "loss": 0.6905, "step": 25289 }, { "epoch": 0.3023709035258671, "grad_norm": 3.2608249187469482, "learning_rate": 8.17772978521234e-06, "loss": 0.6555, "step": 25290 }, { "epoch": 0.30238285967072775, "grad_norm": 1.607178807258606, "learning_rate": 8.177580298208337e-06, "loss": 0.5993, "step": 25291 }, { "epoch": 0.3023948158155884, "grad_norm": 2.5176563262939453, "learning_rate": 8.177430806439543e-06, "loss": 0.6557, "step": 25292 }, { "epoch": 0.3024067719604491, "grad_norm": 1.5951863527297974, "learning_rate": 8.177281309906179e-06, "loss": 0.6554, "step": 25293 }, { "epoch": 0.30241872810530973, "grad_norm": 3.182896137237549, "learning_rate": 8.17713180860847e-06, "loss": 0.5915, "step": 25294 }, { "epoch": 0.3024306842501704, "grad_norm": 3.4460484981536865, "learning_rate": 8.17698230254664e-06, "loss": 0.5118, "step": 25295 }, { "epoch": 0.30244264039503105, "grad_norm": 3.038217067718506, "learning_rate": 8.176832791720917e-06, "loss": 0.6404, "step": 25296 }, { "epoch": 0.30245459653989165, "grad_norm": 2.206332206726074, "learning_rate": 8.17668327613152e-06, "loss": 0.6059, "step": 25297 }, { "epoch": 0.3024665526847523, "grad_norm": 3.563739061355591, "learning_rate": 8.176533755778675e-06, "loss": 0.6327, "step": 25298 }, { "epoch": 0.30247850882961297, "grad_norm": 1.5552068948745728, "learning_rate": 8.176384230662606e-06, "loss": 0.5831, "step": 25299 }, { "epoch": 0.3024904649744736, "grad_norm": 3.587956190109253, "learning_rate": 8.176234700783537e-06, "loss": 0.5801, "step": 25300 }, { "epoch": 0.3025024211193343, "grad_norm": 1.7253503799438477, "learning_rate": 8.176085166141694e-06, "loss": 0.58, "step": 25301 }, { "epoch": 0.30251437726419494, "grad_norm": 2.503328323364258, "learning_rate": 8.1759356267373e-06, "loss": 0.6536, "step": 25302 }, { "epoch": 0.3025263334090556, "grad_norm": 2.1478512287139893, "learning_rate": 8.175786082570578e-06, "loss": 0.664, "step": 25303 }, { "epoch": 0.30253828955391626, "grad_norm": 1.7051681280136108, "learning_rate": 8.175636533641755e-06, "loss": 0.5505, "step": 25304 }, { "epoch": 0.30255024569877687, "grad_norm": 3.1269845962524414, "learning_rate": 8.175486979951053e-06, "loss": 0.6698, "step": 25305 }, { "epoch": 0.3025622018436375, "grad_norm": 1.7884879112243652, "learning_rate": 8.175337421498696e-06, "loss": 0.5417, "step": 25306 }, { "epoch": 0.3025741579884982, "grad_norm": 2.0698764324188232, "learning_rate": 8.175187858284911e-06, "loss": 0.6267, "step": 25307 }, { "epoch": 0.30258611413335884, "grad_norm": 2.1429390907287598, "learning_rate": 8.17503829030992e-06, "loss": 0.5816, "step": 25308 }, { "epoch": 0.3025980702782195, "grad_norm": 2.106752872467041, "learning_rate": 8.174888717573949e-06, "loss": 0.6445, "step": 25309 }, { "epoch": 0.30261002642308016, "grad_norm": 2.0413572788238525, "learning_rate": 8.17473914007722e-06, "loss": 0.6306, "step": 25310 }, { "epoch": 0.3026219825679408, "grad_norm": 12.693368911743164, "learning_rate": 8.174589557819957e-06, "loss": 0.5364, "step": 25311 }, { "epoch": 0.3026339387128014, "grad_norm": 1.6546275615692139, "learning_rate": 8.174439970802387e-06, "loss": 0.5447, "step": 25312 }, { "epoch": 0.3026458948576621, "grad_norm": 3.144256353378296, "learning_rate": 8.174290379024733e-06, "loss": 0.6379, "step": 25313 }, { "epoch": 0.30265785100252274, "grad_norm": 1.5412565469741821, "learning_rate": 8.174140782487219e-06, "loss": 0.5631, "step": 25314 }, { "epoch": 0.3026698071473834, "grad_norm": 3.0207371711730957, "learning_rate": 8.17399118119007e-06, "loss": 0.5592, "step": 25315 }, { "epoch": 0.30268176329224405, "grad_norm": 2.691725969314575, "learning_rate": 8.17384157513351e-06, "loss": 0.5572, "step": 25316 }, { "epoch": 0.3026937194371047, "grad_norm": 3.5133001804351807, "learning_rate": 8.173691964317762e-06, "loss": 0.6068, "step": 25317 }, { "epoch": 0.30270567558196537, "grad_norm": 1.6668907403945923, "learning_rate": 8.173542348743053e-06, "loss": 0.664, "step": 25318 }, { "epoch": 0.302717631726826, "grad_norm": 2.0960307121276855, "learning_rate": 8.173392728409605e-06, "loss": 0.7448, "step": 25319 }, { "epoch": 0.30272958787168663, "grad_norm": 1.7111774682998657, "learning_rate": 8.173243103317644e-06, "loss": 0.6336, "step": 25320 }, { "epoch": 0.3027415440165473, "grad_norm": 2.541320562362671, "learning_rate": 8.173093473467393e-06, "loss": 0.6581, "step": 25321 }, { "epoch": 0.30275350016140795, "grad_norm": 1.9453984498977661, "learning_rate": 8.172943838859078e-06, "loss": 0.6384, "step": 25322 }, { "epoch": 0.3027654563062686, "grad_norm": 2.3852434158325195, "learning_rate": 8.172794199492921e-06, "loss": 0.5577, "step": 25323 }, { "epoch": 0.30277741245112927, "grad_norm": 4.979051113128662, "learning_rate": 8.17264455536915e-06, "loss": 0.6916, "step": 25324 }, { "epoch": 0.3027893685959899, "grad_norm": 1.5954904556274414, "learning_rate": 8.172494906487986e-06, "loss": 0.5258, "step": 25325 }, { "epoch": 0.3028013247408506, "grad_norm": 1.880417823791504, "learning_rate": 8.172345252849654e-06, "loss": 0.5829, "step": 25326 }, { "epoch": 0.3028132808857112, "grad_norm": 2.1222925186157227, "learning_rate": 8.17219559445438e-06, "loss": 0.549, "step": 25327 }, { "epoch": 0.30282523703057185, "grad_norm": 2.050997734069824, "learning_rate": 8.172045931302387e-06, "loss": 0.6227, "step": 25328 }, { "epoch": 0.3028371931754325, "grad_norm": 2.399890899658203, "learning_rate": 8.1718962633939e-06, "loss": 0.6576, "step": 25329 }, { "epoch": 0.30284914932029317, "grad_norm": 1.757154941558838, "learning_rate": 8.171746590729144e-06, "loss": 0.6425, "step": 25330 }, { "epoch": 0.3028611054651538, "grad_norm": 2.188066244125366, "learning_rate": 8.171596913308341e-06, "loss": 0.5892, "step": 25331 }, { "epoch": 0.3028730616100145, "grad_norm": 1.8890600204467773, "learning_rate": 8.171447231131718e-06, "loss": 0.6856, "step": 25332 }, { "epoch": 0.30288501775487514, "grad_norm": 1.9094527959823608, "learning_rate": 8.1712975441995e-06, "loss": 0.5824, "step": 25333 }, { "epoch": 0.30289697389973574, "grad_norm": 1.424346923828125, "learning_rate": 8.171147852511908e-06, "loss": 0.5394, "step": 25334 }, { "epoch": 0.3029089300445964, "grad_norm": 1.828567624092102, "learning_rate": 8.17099815606917e-06, "loss": 0.5338, "step": 25335 }, { "epoch": 0.30292088618945706, "grad_norm": 3.7311301231384277, "learning_rate": 8.170848454871507e-06, "loss": 0.6663, "step": 25336 }, { "epoch": 0.3029328423343177, "grad_norm": 1.6896148920059204, "learning_rate": 8.170698748919146e-06, "loss": 0.4488, "step": 25337 }, { "epoch": 0.3029447984791784, "grad_norm": 3.775705575942993, "learning_rate": 8.170549038212312e-06, "loss": 0.6315, "step": 25338 }, { "epoch": 0.30295675462403904, "grad_norm": 1.9245359897613525, "learning_rate": 8.17039932275123e-06, "loss": 0.5244, "step": 25339 }, { "epoch": 0.3029687107688997, "grad_norm": 1.8539936542510986, "learning_rate": 8.17024960253612e-06, "loss": 0.6085, "step": 25340 }, { "epoch": 0.30298066691376035, "grad_norm": 2.6025948524475098, "learning_rate": 8.17009987756721e-06, "loss": 0.5607, "step": 25341 }, { "epoch": 0.30299262305862096, "grad_norm": 2.625552177429199, "learning_rate": 8.169950147844725e-06, "loss": 0.6574, "step": 25342 }, { "epoch": 0.3030045792034816, "grad_norm": 1.8868435621261597, "learning_rate": 8.169800413368888e-06, "loss": 0.7504, "step": 25343 }, { "epoch": 0.3030165353483423, "grad_norm": 1.7362117767333984, "learning_rate": 8.169650674139923e-06, "loss": 0.6662, "step": 25344 }, { "epoch": 0.30302849149320293, "grad_norm": 10.765466690063477, "learning_rate": 8.169500930158057e-06, "loss": 0.6052, "step": 25345 }, { "epoch": 0.3030404476380636, "grad_norm": 1.4270986318588257, "learning_rate": 8.169351181423513e-06, "loss": 0.5801, "step": 25346 }, { "epoch": 0.30305240378292425, "grad_norm": 1.2558512687683105, "learning_rate": 8.169201427936516e-06, "loss": 0.6393, "step": 25347 }, { "epoch": 0.3030643599277849, "grad_norm": 1.9208277463912964, "learning_rate": 8.16905166969729e-06, "loss": 0.5992, "step": 25348 }, { "epoch": 0.3030763160726455, "grad_norm": 3.5170183181762695, "learning_rate": 8.168901906706058e-06, "loss": 0.5593, "step": 25349 }, { "epoch": 0.30308827221750617, "grad_norm": 1.5947145223617554, "learning_rate": 8.168752138963047e-06, "loss": 0.5556, "step": 25350 }, { "epoch": 0.30310022836236683, "grad_norm": 1.989200472831726, "learning_rate": 8.168602366468482e-06, "loss": 0.5593, "step": 25351 }, { "epoch": 0.3031121845072275, "grad_norm": 1.5283735990524292, "learning_rate": 8.168452589222586e-06, "loss": 0.5892, "step": 25352 }, { "epoch": 0.30312414065208815, "grad_norm": 2.446164131164551, "learning_rate": 8.168302807225585e-06, "loss": 0.5769, "step": 25353 }, { "epoch": 0.3031360967969488, "grad_norm": 2.6498963832855225, "learning_rate": 8.1681530204777e-06, "loss": 0.6371, "step": 25354 }, { "epoch": 0.30314805294180946, "grad_norm": 1.5890036821365356, "learning_rate": 8.16800322897916e-06, "loss": 0.6542, "step": 25355 }, { "epoch": 0.30316000908667007, "grad_norm": 1.5253984928131104, "learning_rate": 8.167853432730188e-06, "loss": 0.6051, "step": 25356 }, { "epoch": 0.3031719652315307, "grad_norm": 1.7061371803283691, "learning_rate": 8.16770363173101e-06, "loss": 0.7054, "step": 25357 }, { "epoch": 0.3031839213763914, "grad_norm": 2.9298009872436523, "learning_rate": 8.167553825981847e-06, "loss": 0.6339, "step": 25358 }, { "epoch": 0.30319587752125204, "grad_norm": 3.033596992492676, "learning_rate": 8.167404015482925e-06, "loss": 0.5321, "step": 25359 }, { "epoch": 0.3032078336661127, "grad_norm": 4.363256931304932, "learning_rate": 8.167254200234472e-06, "loss": 0.6098, "step": 25360 }, { "epoch": 0.30321978981097336, "grad_norm": 4.111640930175781, "learning_rate": 8.167104380236708e-06, "loss": 0.5283, "step": 25361 }, { "epoch": 0.303231745955834, "grad_norm": 1.7646726369857788, "learning_rate": 8.16695455548986e-06, "loss": 0.4952, "step": 25362 }, { "epoch": 0.3032437021006947, "grad_norm": 2.367976188659668, "learning_rate": 8.166804725994154e-06, "loss": 0.6109, "step": 25363 }, { "epoch": 0.3032556582455553, "grad_norm": 1.7837086915969849, "learning_rate": 8.166654891749812e-06, "loss": 0.6049, "step": 25364 }, { "epoch": 0.30326761439041594, "grad_norm": 2.0868868827819824, "learning_rate": 8.166505052757058e-06, "loss": 0.5841, "step": 25365 }, { "epoch": 0.3032795705352766, "grad_norm": 1.9818596839904785, "learning_rate": 8.16635520901612e-06, "loss": 0.725, "step": 25366 }, { "epoch": 0.30329152668013726, "grad_norm": 2.3891777992248535, "learning_rate": 8.166205360527223e-06, "loss": 0.6501, "step": 25367 }, { "epoch": 0.3033034828249979, "grad_norm": 1.5620393753051758, "learning_rate": 8.166055507290588e-06, "loss": 0.5983, "step": 25368 }, { "epoch": 0.3033154389698586, "grad_norm": 3.948674440383911, "learning_rate": 8.165905649306441e-06, "loss": 0.5494, "step": 25369 }, { "epoch": 0.30332739511471923, "grad_norm": 2.866997003555298, "learning_rate": 8.16575578657501e-06, "loss": 0.5169, "step": 25370 }, { "epoch": 0.30333935125957984, "grad_norm": 3.3170082569122314, "learning_rate": 8.165605919096514e-06, "loss": 0.7033, "step": 25371 }, { "epoch": 0.3033513074044405, "grad_norm": 1.768829107284546, "learning_rate": 8.165456046871181e-06, "loss": 0.5863, "step": 25372 }, { "epoch": 0.30336326354930115, "grad_norm": 1.730433702468872, "learning_rate": 8.165306169899235e-06, "loss": 0.6386, "step": 25373 }, { "epoch": 0.3033752196941618, "grad_norm": 3.137580633163452, "learning_rate": 8.165156288180903e-06, "loss": 0.5301, "step": 25374 }, { "epoch": 0.30338717583902247, "grad_norm": 2.4185824394226074, "learning_rate": 8.165006401716407e-06, "loss": 0.5822, "step": 25375 }, { "epoch": 0.30339913198388313, "grad_norm": 1.4404075145721436, "learning_rate": 8.164856510505971e-06, "loss": 0.6254, "step": 25376 }, { "epoch": 0.3034110881287438, "grad_norm": 2.454836130142212, "learning_rate": 8.164706614549825e-06, "loss": 0.5867, "step": 25377 }, { "epoch": 0.3034230442736044, "grad_norm": 2.3230960369110107, "learning_rate": 8.164556713848186e-06, "loss": 0.5979, "step": 25378 }, { "epoch": 0.30343500041846505, "grad_norm": 2.5173373222351074, "learning_rate": 8.164406808401286e-06, "loss": 0.5445, "step": 25379 }, { "epoch": 0.3034469565633257, "grad_norm": 1.6444631814956665, "learning_rate": 8.164256898209347e-06, "loss": 0.737, "step": 25380 }, { "epoch": 0.30345891270818637, "grad_norm": 4.451457500457764, "learning_rate": 8.164106983272591e-06, "loss": 0.6456, "step": 25381 }, { "epoch": 0.303470868853047, "grad_norm": 1.7649078369140625, "learning_rate": 8.163957063591247e-06, "loss": 0.55, "step": 25382 }, { "epoch": 0.3034828249979077, "grad_norm": 2.8159162998199463, "learning_rate": 8.163807139165537e-06, "loss": 0.6293, "step": 25383 }, { "epoch": 0.30349478114276834, "grad_norm": 2.1743273735046387, "learning_rate": 8.163657209995688e-06, "loss": 0.5407, "step": 25384 }, { "epoch": 0.303506737287629, "grad_norm": 2.293811798095703, "learning_rate": 8.163507276081924e-06, "loss": 0.6207, "step": 25385 }, { "epoch": 0.3035186934324896, "grad_norm": 2.8641114234924316, "learning_rate": 8.16335733742447e-06, "loss": 0.6122, "step": 25386 }, { "epoch": 0.30353064957735026, "grad_norm": 1.961365818977356, "learning_rate": 8.16320739402355e-06, "loss": 0.6511, "step": 25387 }, { "epoch": 0.3035426057222109, "grad_norm": 1.7698372602462769, "learning_rate": 8.163057445879387e-06, "loss": 0.6345, "step": 25388 }, { "epoch": 0.3035545618670716, "grad_norm": 2.7592477798461914, "learning_rate": 8.16290749299221e-06, "loss": 0.5593, "step": 25389 }, { "epoch": 0.30356651801193224, "grad_norm": 2.7472519874572754, "learning_rate": 8.162757535362243e-06, "loss": 0.523, "step": 25390 }, { "epoch": 0.3035784741567929, "grad_norm": 1.7288320064544678, "learning_rate": 8.162607572989708e-06, "loss": 0.5513, "step": 25391 }, { "epoch": 0.30359043030165356, "grad_norm": 3.6597297191619873, "learning_rate": 8.162457605874831e-06, "loss": 0.6894, "step": 25392 }, { "epoch": 0.30360238644651416, "grad_norm": 2.3057024478912354, "learning_rate": 8.162307634017841e-06, "loss": 0.5311, "step": 25393 }, { "epoch": 0.3036143425913748, "grad_norm": 1.9790703058242798, "learning_rate": 8.162157657418957e-06, "loss": 0.5259, "step": 25394 }, { "epoch": 0.3036262987362355, "grad_norm": 4.7488908767700195, "learning_rate": 8.162007676078407e-06, "loss": 0.5869, "step": 25395 }, { "epoch": 0.30363825488109614, "grad_norm": 2.1458520889282227, "learning_rate": 8.161857689996415e-06, "loss": 0.5518, "step": 25396 }, { "epoch": 0.3036502110259568, "grad_norm": 1.5092394351959229, "learning_rate": 8.161707699173207e-06, "loss": 0.6539, "step": 25397 }, { "epoch": 0.30366216717081745, "grad_norm": 3.2077150344848633, "learning_rate": 8.161557703609006e-06, "loss": 0.6395, "step": 25398 }, { "epoch": 0.3036741233156781, "grad_norm": 1.8271968364715576, "learning_rate": 8.161407703304037e-06, "loss": 0.6071, "step": 25399 }, { "epoch": 0.30368607946053877, "grad_norm": 1.8123810291290283, "learning_rate": 8.161257698258527e-06, "loss": 0.5419, "step": 25400 }, { "epoch": 0.3036980356053994, "grad_norm": 3.1212656497955322, "learning_rate": 8.1611076884727e-06, "loss": 0.6011, "step": 25401 }, { "epoch": 0.30370999175026003, "grad_norm": 2.0322227478027344, "learning_rate": 8.160957673946782e-06, "loss": 0.5202, "step": 25402 }, { "epoch": 0.3037219478951207, "grad_norm": 2.862748384475708, "learning_rate": 8.160807654680995e-06, "loss": 0.6709, "step": 25403 }, { "epoch": 0.30373390403998135, "grad_norm": 2.4529881477355957, "learning_rate": 8.160657630675566e-06, "loss": 0.5983, "step": 25404 }, { "epoch": 0.303745860184842, "grad_norm": 2.047699213027954, "learning_rate": 8.16050760193072e-06, "loss": 0.6601, "step": 25405 }, { "epoch": 0.30375781632970267, "grad_norm": 1.660054326057434, "learning_rate": 8.160357568446682e-06, "loss": 0.5813, "step": 25406 }, { "epoch": 0.3037697724745633, "grad_norm": 1.6157536506652832, "learning_rate": 8.160207530223676e-06, "loss": 0.5734, "step": 25407 }, { "epoch": 0.30378172861942393, "grad_norm": 3.030585765838623, "learning_rate": 8.160057487261928e-06, "loss": 0.5886, "step": 25408 }, { "epoch": 0.3037936847642846, "grad_norm": 1.7136257886886597, "learning_rate": 8.159907439561662e-06, "loss": 0.5664, "step": 25409 }, { "epoch": 0.30380564090914525, "grad_norm": 4.125977039337158, "learning_rate": 8.159757387123103e-06, "loss": 0.5547, "step": 25410 }, { "epoch": 0.3038175970540059, "grad_norm": 2.5520846843719482, "learning_rate": 8.159607329946478e-06, "loss": 0.6006, "step": 25411 }, { "epoch": 0.30382955319886656, "grad_norm": 1.6320416927337646, "learning_rate": 8.15945726803201e-06, "loss": 0.6553, "step": 25412 }, { "epoch": 0.3038415093437272, "grad_norm": 1.852281928062439, "learning_rate": 8.159307201379924e-06, "loss": 0.6652, "step": 25413 }, { "epoch": 0.3038534654885879, "grad_norm": 5.987697124481201, "learning_rate": 8.159157129990447e-06, "loss": 0.5344, "step": 25414 }, { "epoch": 0.3038654216334485, "grad_norm": 4.02517032623291, "learning_rate": 8.159007053863801e-06, "loss": 0.5657, "step": 25415 }, { "epoch": 0.30387737777830914, "grad_norm": 2.8258769512176514, "learning_rate": 8.158856973000215e-06, "loss": 0.6123, "step": 25416 }, { "epoch": 0.3038893339231698, "grad_norm": 1.9520363807678223, "learning_rate": 8.15870688739991e-06, "loss": 0.6435, "step": 25417 }, { "epoch": 0.30390129006803046, "grad_norm": 2.0527443885803223, "learning_rate": 8.158556797063112e-06, "loss": 0.5672, "step": 25418 }, { "epoch": 0.3039132462128911, "grad_norm": 2.8393168449401855, "learning_rate": 8.158406701990049e-06, "loss": 0.6393, "step": 25419 }, { "epoch": 0.3039252023577518, "grad_norm": 2.5999417304992676, "learning_rate": 8.158256602180943e-06, "loss": 0.5741, "step": 25420 }, { "epoch": 0.30393715850261244, "grad_norm": 2.3641157150268555, "learning_rate": 8.15810649763602e-06, "loss": 0.6053, "step": 25421 }, { "epoch": 0.3039491146474731, "grad_norm": 2.1718978881835938, "learning_rate": 8.157956388355504e-06, "loss": 0.5499, "step": 25422 }, { "epoch": 0.3039610707923337, "grad_norm": 6.072206020355225, "learning_rate": 8.157806274339621e-06, "loss": 0.6304, "step": 25423 }, { "epoch": 0.30397302693719436, "grad_norm": 2.3342626094818115, "learning_rate": 8.157656155588598e-06, "loss": 0.5974, "step": 25424 }, { "epoch": 0.303984983082055, "grad_norm": 3.1167526245117188, "learning_rate": 8.157506032102658e-06, "loss": 0.6459, "step": 25425 }, { "epoch": 0.3039969392269157, "grad_norm": 6.053066253662109, "learning_rate": 8.157355903882025e-06, "loss": 0.6513, "step": 25426 }, { "epoch": 0.30400889537177633, "grad_norm": 1.3960872888565063, "learning_rate": 8.157205770926928e-06, "loss": 0.5426, "step": 25427 }, { "epoch": 0.304020851516637, "grad_norm": 3.1137983798980713, "learning_rate": 8.157055633237588e-06, "loss": 0.6586, "step": 25428 }, { "epoch": 0.30403280766149765, "grad_norm": 1.5932363271713257, "learning_rate": 8.156905490814232e-06, "loss": 0.6843, "step": 25429 }, { "epoch": 0.30404476380635825, "grad_norm": 1.427990198135376, "learning_rate": 8.156755343657086e-06, "loss": 0.5457, "step": 25430 }, { "epoch": 0.3040567199512189, "grad_norm": 2.23496675491333, "learning_rate": 8.156605191766373e-06, "loss": 0.597, "step": 25431 }, { "epoch": 0.30406867609607957, "grad_norm": 1.3864187002182007, "learning_rate": 8.15645503514232e-06, "loss": 0.6321, "step": 25432 }, { "epoch": 0.30408063224094023, "grad_norm": 1.5927592515945435, "learning_rate": 8.156304873785152e-06, "loss": 0.6107, "step": 25433 }, { "epoch": 0.3040925883858009, "grad_norm": 1.8382885456085205, "learning_rate": 8.156154707695093e-06, "loss": 0.5502, "step": 25434 }, { "epoch": 0.30410454453066155, "grad_norm": 1.9760619401931763, "learning_rate": 8.156004536872368e-06, "loss": 0.6251, "step": 25435 }, { "epoch": 0.3041165006755222, "grad_norm": 6.186678886413574, "learning_rate": 8.155854361317203e-06, "loss": 0.5997, "step": 25436 }, { "epoch": 0.3041284568203828, "grad_norm": 2.4180989265441895, "learning_rate": 8.155704181029824e-06, "loss": 0.6297, "step": 25437 }, { "epoch": 0.30414041296524347, "grad_norm": 2.6961848735809326, "learning_rate": 8.155553996010456e-06, "loss": 0.6076, "step": 25438 }, { "epoch": 0.3041523691101041, "grad_norm": 4.104405879974365, "learning_rate": 8.155403806259324e-06, "loss": 0.5538, "step": 25439 }, { "epoch": 0.3041643252549648, "grad_norm": 2.006119966506958, "learning_rate": 8.15525361177665e-06, "loss": 0.608, "step": 25440 }, { "epoch": 0.30417628139982544, "grad_norm": 2.5376291275024414, "learning_rate": 8.155103412562663e-06, "loss": 0.524, "step": 25441 }, { "epoch": 0.3041882375446861, "grad_norm": 1.578057885169983, "learning_rate": 8.154953208617588e-06, "loss": 0.5743, "step": 25442 }, { "epoch": 0.30420019368954676, "grad_norm": 1.8507540225982666, "learning_rate": 8.154802999941651e-06, "loss": 0.5251, "step": 25443 }, { "epoch": 0.3042121498344074, "grad_norm": 2.0363235473632812, "learning_rate": 8.154652786535073e-06, "loss": 0.6235, "step": 25444 }, { "epoch": 0.304224105979268, "grad_norm": 1.5123090744018555, "learning_rate": 8.154502568398083e-06, "loss": 0.6172, "step": 25445 }, { "epoch": 0.3042360621241287, "grad_norm": 1.6924031972885132, "learning_rate": 8.154352345530905e-06, "loss": 0.4934, "step": 25446 }, { "epoch": 0.30424801826898934, "grad_norm": 1.5793427228927612, "learning_rate": 8.154202117933765e-06, "loss": 0.6176, "step": 25447 }, { "epoch": 0.30425997441385, "grad_norm": 2.30475115776062, "learning_rate": 8.154051885606887e-06, "loss": 0.5658, "step": 25448 }, { "epoch": 0.30427193055871066, "grad_norm": 1.7411463260650635, "learning_rate": 8.153901648550498e-06, "loss": 0.7643, "step": 25449 }, { "epoch": 0.3042838867035713, "grad_norm": 1.692389726638794, "learning_rate": 8.15375140676482e-06, "loss": 0.5678, "step": 25450 }, { "epoch": 0.304295842848432, "grad_norm": 1.726163625717163, "learning_rate": 8.153601160250082e-06, "loss": 0.6423, "step": 25451 }, { "epoch": 0.3043077989932926, "grad_norm": 2.2098119258880615, "learning_rate": 8.153450909006508e-06, "loss": 0.5644, "step": 25452 }, { "epoch": 0.30431975513815324, "grad_norm": 2.219528913497925, "learning_rate": 8.153300653034324e-06, "loss": 0.5151, "step": 25453 }, { "epoch": 0.3043317112830139, "grad_norm": 2.5849955081939697, "learning_rate": 8.153150392333754e-06, "loss": 0.6555, "step": 25454 }, { "epoch": 0.30434366742787455, "grad_norm": 4.547835826873779, "learning_rate": 8.153000126905022e-06, "loss": 0.6057, "step": 25455 }, { "epoch": 0.3043556235727352, "grad_norm": 1.9382444620132446, "learning_rate": 8.152849856748354e-06, "loss": 0.5337, "step": 25456 }, { "epoch": 0.30436757971759587, "grad_norm": 1.8024117946624756, "learning_rate": 8.15269958186398e-06, "loss": 0.5677, "step": 25457 }, { "epoch": 0.30437953586245653, "grad_norm": 1.4595667123794556, "learning_rate": 8.152549302252123e-06, "loss": 0.572, "step": 25458 }, { "epoch": 0.3043914920073172, "grad_norm": 1.4723305702209473, "learning_rate": 8.152399017913003e-06, "loss": 0.596, "step": 25459 }, { "epoch": 0.3044034481521778, "grad_norm": 2.988675594329834, "learning_rate": 8.15224872884685e-06, "loss": 0.6725, "step": 25460 }, { "epoch": 0.30441540429703845, "grad_norm": 2.141455888748169, "learning_rate": 8.152098435053891e-06, "loss": 0.6434, "step": 25461 }, { "epoch": 0.3044273604418991, "grad_norm": 3.039388656616211, "learning_rate": 8.151948136534347e-06, "loss": 0.5943, "step": 25462 }, { "epoch": 0.30443931658675977, "grad_norm": 2.738114833831787, "learning_rate": 8.151797833288447e-06, "loss": 0.61, "step": 25463 }, { "epoch": 0.3044512727316204, "grad_norm": 1.7941911220550537, "learning_rate": 8.151647525316415e-06, "loss": 0.507, "step": 25464 }, { "epoch": 0.3044632288764811, "grad_norm": 7.284714698791504, "learning_rate": 8.151497212618475e-06, "loss": 0.6332, "step": 25465 }, { "epoch": 0.30447518502134174, "grad_norm": 2.03361439704895, "learning_rate": 8.151346895194856e-06, "loss": 0.581, "step": 25466 }, { "epoch": 0.30448714116620235, "grad_norm": 3.8140504360198975, "learning_rate": 8.15119657304578e-06, "loss": 0.5776, "step": 25467 }, { "epoch": 0.304499097311063, "grad_norm": 4.870624542236328, "learning_rate": 8.151046246171472e-06, "loss": 0.6444, "step": 25468 }, { "epoch": 0.30451105345592366, "grad_norm": 2.7842512130737305, "learning_rate": 8.150895914572161e-06, "loss": 0.6408, "step": 25469 }, { "epoch": 0.3045230096007843, "grad_norm": 2.3188178539276123, "learning_rate": 8.15074557824807e-06, "loss": 0.5594, "step": 25470 }, { "epoch": 0.304534965745645, "grad_norm": 28.06902503967285, "learning_rate": 8.150595237199426e-06, "loss": 0.581, "step": 25471 }, { "epoch": 0.30454692189050564, "grad_norm": 1.9437023401260376, "learning_rate": 8.15044489142645e-06, "loss": 0.6432, "step": 25472 }, { "epoch": 0.3045588780353663, "grad_norm": 7.270541667938232, "learning_rate": 8.150294540929373e-06, "loss": 0.6085, "step": 25473 }, { "epoch": 0.3045708341802269, "grad_norm": 1.883239984512329, "learning_rate": 8.150144185708417e-06, "loss": 0.5873, "step": 25474 }, { "epoch": 0.30458279032508756, "grad_norm": 2.8080122470855713, "learning_rate": 8.14999382576381e-06, "loss": 0.6709, "step": 25475 }, { "epoch": 0.3045947464699482, "grad_norm": 2.0124080181121826, "learning_rate": 8.149843461095775e-06, "loss": 0.5621, "step": 25476 }, { "epoch": 0.3046067026148089, "grad_norm": 6.247300624847412, "learning_rate": 8.149693091704539e-06, "loss": 0.5963, "step": 25477 }, { "epoch": 0.30461865875966954, "grad_norm": 1.8488130569458008, "learning_rate": 8.149542717590326e-06, "loss": 0.5874, "step": 25478 }, { "epoch": 0.3046306149045302, "grad_norm": 2.929046869277954, "learning_rate": 8.149392338753363e-06, "loss": 0.5707, "step": 25479 }, { "epoch": 0.30464257104939085, "grad_norm": 2.602104902267456, "learning_rate": 8.149241955193876e-06, "loss": 0.6812, "step": 25480 }, { "epoch": 0.3046545271942515, "grad_norm": 4.476101875305176, "learning_rate": 8.149091566912089e-06, "loss": 0.6059, "step": 25481 }, { "epoch": 0.3046664833391121, "grad_norm": 6.070493698120117, "learning_rate": 8.148941173908227e-06, "loss": 0.5988, "step": 25482 }, { "epoch": 0.3046784394839728, "grad_norm": 2.316950798034668, "learning_rate": 8.148790776182517e-06, "loss": 0.6666, "step": 25483 }, { "epoch": 0.30469039562883343, "grad_norm": 1.9436562061309814, "learning_rate": 8.148640373735187e-06, "loss": 0.5845, "step": 25484 }, { "epoch": 0.3047023517736941, "grad_norm": 4.538208961486816, "learning_rate": 8.148489966566455e-06, "loss": 0.5729, "step": 25485 }, { "epoch": 0.30471430791855475, "grad_norm": 5.762479305267334, "learning_rate": 8.148339554676554e-06, "loss": 0.6014, "step": 25486 }, { "epoch": 0.3047262640634154, "grad_norm": 2.8712680339813232, "learning_rate": 8.148189138065706e-06, "loss": 0.6203, "step": 25487 }, { "epoch": 0.30473822020827607, "grad_norm": 3.085920572280884, "learning_rate": 8.148038716734134e-06, "loss": 0.5703, "step": 25488 }, { "epoch": 0.30475017635313667, "grad_norm": 1.6427379846572876, "learning_rate": 8.14788829068207e-06, "loss": 0.5783, "step": 25489 }, { "epoch": 0.30476213249799733, "grad_norm": 4.185636043548584, "learning_rate": 8.147737859909736e-06, "loss": 0.5237, "step": 25490 }, { "epoch": 0.304774088642858, "grad_norm": 1.6648166179656982, "learning_rate": 8.147587424417358e-06, "loss": 0.5506, "step": 25491 }, { "epoch": 0.30478604478771865, "grad_norm": 2.304032802581787, "learning_rate": 8.147436984205162e-06, "loss": 0.6776, "step": 25492 }, { "epoch": 0.3047980009325793, "grad_norm": 1.3146806955337524, "learning_rate": 8.147286539273373e-06, "loss": 0.5144, "step": 25493 }, { "epoch": 0.30480995707743996, "grad_norm": 3.043755531311035, "learning_rate": 8.147136089622215e-06, "loss": 0.7089, "step": 25494 }, { "epoch": 0.3048219132223006, "grad_norm": 2.971501350402832, "learning_rate": 8.146985635251916e-06, "loss": 0.5948, "step": 25495 }, { "epoch": 0.3048338693671612, "grad_norm": 3.1945955753326416, "learning_rate": 8.146835176162703e-06, "loss": 0.5547, "step": 25496 }, { "epoch": 0.3048458255120219, "grad_norm": 1.973055362701416, "learning_rate": 8.146684712354796e-06, "loss": 0.5722, "step": 25497 }, { "epoch": 0.30485778165688254, "grad_norm": 2.8326618671417236, "learning_rate": 8.146534243828426e-06, "loss": 0.528, "step": 25498 }, { "epoch": 0.3048697378017432, "grad_norm": 2.3847742080688477, "learning_rate": 8.146383770583816e-06, "loss": 0.6559, "step": 25499 }, { "epoch": 0.30488169394660386, "grad_norm": 2.7197556495666504, "learning_rate": 8.146233292621193e-06, "loss": 0.5945, "step": 25500 }, { "epoch": 0.3048936500914645, "grad_norm": 5.0525617599487305, "learning_rate": 8.14608280994078e-06, "loss": 0.6587, "step": 25501 }, { "epoch": 0.3049056062363252, "grad_norm": 2.2400996685028076, "learning_rate": 8.145932322542806e-06, "loss": 0.5329, "step": 25502 }, { "epoch": 0.30491756238118584, "grad_norm": 3.105785369873047, "learning_rate": 8.145781830427497e-06, "loss": 0.6903, "step": 25503 }, { "epoch": 0.30492951852604644, "grad_norm": 1.6252799034118652, "learning_rate": 8.145631333595077e-06, "loss": 0.6899, "step": 25504 }, { "epoch": 0.3049414746709071, "grad_norm": 2.9702799320220947, "learning_rate": 8.145480832045769e-06, "loss": 0.5872, "step": 25505 }, { "epoch": 0.30495343081576776, "grad_norm": 2.252821207046509, "learning_rate": 8.145330325779802e-06, "loss": 0.6423, "step": 25506 }, { "epoch": 0.3049653869606284, "grad_norm": 3.1633379459381104, "learning_rate": 8.145179814797403e-06, "loss": 0.6346, "step": 25507 }, { "epoch": 0.3049773431054891, "grad_norm": 2.3078625202178955, "learning_rate": 8.145029299098794e-06, "loss": 0.6141, "step": 25508 }, { "epoch": 0.30498929925034973, "grad_norm": 2.4737930297851562, "learning_rate": 8.144878778684202e-06, "loss": 0.6856, "step": 25509 }, { "epoch": 0.3050012553952104, "grad_norm": 3.6457161903381348, "learning_rate": 8.144728253553855e-06, "loss": 0.5483, "step": 25510 }, { "epoch": 0.305013211540071, "grad_norm": 2.5162835121154785, "learning_rate": 8.144577723707974e-06, "loss": 0.6008, "step": 25511 }, { "epoch": 0.30502516768493165, "grad_norm": 2.0399386882781982, "learning_rate": 8.144427189146789e-06, "loss": 0.6176, "step": 25512 }, { "epoch": 0.3050371238297923, "grad_norm": 2.0613205432891846, "learning_rate": 8.144276649870525e-06, "loss": 0.7007, "step": 25513 }, { "epoch": 0.30504907997465297, "grad_norm": 2.245993137359619, "learning_rate": 8.144126105879406e-06, "loss": 0.6594, "step": 25514 }, { "epoch": 0.30506103611951363, "grad_norm": 2.8363466262817383, "learning_rate": 8.143975557173658e-06, "loss": 0.6313, "step": 25515 }, { "epoch": 0.3050729922643743, "grad_norm": 2.4282422065734863, "learning_rate": 8.14382500375351e-06, "loss": 0.6348, "step": 25516 }, { "epoch": 0.30508494840923495, "grad_norm": 1.9864556789398193, "learning_rate": 8.143674445619184e-06, "loss": 0.5711, "step": 25517 }, { "epoch": 0.3050969045540956, "grad_norm": 4.552668571472168, "learning_rate": 8.143523882770906e-06, "loss": 0.6234, "step": 25518 }, { "epoch": 0.3051088606989562, "grad_norm": 1.616438865661621, "learning_rate": 8.143373315208903e-06, "loss": 0.5981, "step": 25519 }, { "epoch": 0.30512081684381687, "grad_norm": 2.015018939971924, "learning_rate": 8.143222742933401e-06, "loss": 0.5605, "step": 25520 }, { "epoch": 0.3051327729886775, "grad_norm": 1.8367592096328735, "learning_rate": 8.143072165944625e-06, "loss": 0.6763, "step": 25521 }, { "epoch": 0.3051447291335382, "grad_norm": 7.611279010772705, "learning_rate": 8.142921584242802e-06, "loss": 0.5717, "step": 25522 }, { "epoch": 0.30515668527839884, "grad_norm": 1.5837595462799072, "learning_rate": 8.142770997828157e-06, "loss": 0.5884, "step": 25523 }, { "epoch": 0.3051686414232595, "grad_norm": 2.6393089294433594, "learning_rate": 8.142620406700913e-06, "loss": 0.6363, "step": 25524 }, { "epoch": 0.30518059756812016, "grad_norm": 13.409727096557617, "learning_rate": 8.142469810861302e-06, "loss": 0.5323, "step": 25525 }, { "epoch": 0.30519255371298076, "grad_norm": 2.036708354949951, "learning_rate": 8.142319210309546e-06, "loss": 0.5677, "step": 25526 }, { "epoch": 0.3052045098578414, "grad_norm": 1.9894787073135376, "learning_rate": 8.142168605045869e-06, "loss": 0.5841, "step": 25527 }, { "epoch": 0.3052164660027021, "grad_norm": 3.1402747631073, "learning_rate": 8.1420179950705e-06, "loss": 0.6024, "step": 25528 }, { "epoch": 0.30522842214756274, "grad_norm": 1.9685052633285522, "learning_rate": 8.141867380383664e-06, "loss": 0.5911, "step": 25529 }, { "epoch": 0.3052403782924234, "grad_norm": 4.61005163192749, "learning_rate": 8.141716760985588e-06, "loss": 0.6894, "step": 25530 }, { "epoch": 0.30525233443728406, "grad_norm": 1.5436242818832397, "learning_rate": 8.141566136876495e-06, "loss": 0.5108, "step": 25531 }, { "epoch": 0.3052642905821447, "grad_norm": 2.8503425121307373, "learning_rate": 8.141415508056612e-06, "loss": 0.6431, "step": 25532 }, { "epoch": 0.3052762467270053, "grad_norm": 3.0391030311584473, "learning_rate": 8.141264874526166e-06, "loss": 0.6361, "step": 25533 }, { "epoch": 0.305288202871866, "grad_norm": 1.7512052059173584, "learning_rate": 8.141114236285383e-06, "loss": 0.7083, "step": 25534 }, { "epoch": 0.30530015901672664, "grad_norm": 1.7878398895263672, "learning_rate": 8.140963593334487e-06, "loss": 0.5952, "step": 25535 }, { "epoch": 0.3053121151615873, "grad_norm": 3.837397813796997, "learning_rate": 8.140812945673704e-06, "loss": 0.5791, "step": 25536 }, { "epoch": 0.30532407130644795, "grad_norm": 10.099471092224121, "learning_rate": 8.140662293303262e-06, "loss": 0.6479, "step": 25537 }, { "epoch": 0.3053360274513086, "grad_norm": 3.1591432094573975, "learning_rate": 8.140511636223385e-06, "loss": 0.6908, "step": 25538 }, { "epoch": 0.30534798359616927, "grad_norm": 4.098202228546143, "learning_rate": 8.140360974434302e-06, "loss": 0.5809, "step": 25539 }, { "epoch": 0.30535993974102993, "grad_norm": 4.458811283111572, "learning_rate": 8.140210307936234e-06, "loss": 0.579, "step": 25540 }, { "epoch": 0.30537189588589053, "grad_norm": 7.2463579177856445, "learning_rate": 8.14005963672941e-06, "loss": 0.6612, "step": 25541 }, { "epoch": 0.3053838520307512, "grad_norm": 1.9452664852142334, "learning_rate": 8.139908960814056e-06, "loss": 0.6385, "step": 25542 }, { "epoch": 0.30539580817561185, "grad_norm": 2.1485531330108643, "learning_rate": 8.139758280190396e-06, "loss": 0.5845, "step": 25543 }, { "epoch": 0.3054077643204725, "grad_norm": 1.7030556201934814, "learning_rate": 8.139607594858659e-06, "loss": 0.6095, "step": 25544 }, { "epoch": 0.30541972046533317, "grad_norm": 2.64569091796875, "learning_rate": 8.13945690481907e-06, "loss": 0.7518, "step": 25545 }, { "epoch": 0.3054316766101938, "grad_norm": 1.9077476263046265, "learning_rate": 8.139306210071852e-06, "loss": 0.6674, "step": 25546 }, { "epoch": 0.3054436327550545, "grad_norm": 2.0134644508361816, "learning_rate": 8.139155510617234e-06, "loss": 0.683, "step": 25547 }, { "epoch": 0.3054555888999151, "grad_norm": 1.9345829486846924, "learning_rate": 8.139004806455441e-06, "loss": 0.6275, "step": 25548 }, { "epoch": 0.30546754504477575, "grad_norm": 2.7511675357818604, "learning_rate": 8.1388540975867e-06, "loss": 0.5329, "step": 25549 }, { "epoch": 0.3054795011896364, "grad_norm": 3.0378267765045166, "learning_rate": 8.138703384011235e-06, "loss": 0.561, "step": 25550 }, { "epoch": 0.30549145733449706, "grad_norm": 2.083474636077881, "learning_rate": 8.138552665729274e-06, "loss": 0.6069, "step": 25551 }, { "epoch": 0.3055034134793577, "grad_norm": 5.351327419281006, "learning_rate": 8.13840194274104e-06, "loss": 0.6058, "step": 25552 }, { "epoch": 0.3055153696242184, "grad_norm": 1.290464997291565, "learning_rate": 8.138251215046763e-06, "loss": 0.5087, "step": 25553 }, { "epoch": 0.30552732576907904, "grad_norm": 3.260875940322876, "learning_rate": 8.138100482646667e-06, "loss": 0.5098, "step": 25554 }, { "epoch": 0.30553928191393964, "grad_norm": 1.5751842260360718, "learning_rate": 8.137949745540978e-06, "loss": 0.5833, "step": 25555 }, { "epoch": 0.3055512380588003, "grad_norm": 2.4329609870910645, "learning_rate": 8.137799003729922e-06, "loss": 0.6043, "step": 25556 }, { "epoch": 0.30556319420366096, "grad_norm": 2.6630399227142334, "learning_rate": 8.137648257213726e-06, "loss": 0.6129, "step": 25557 }, { "epoch": 0.3055751503485216, "grad_norm": 3.06662917137146, "learning_rate": 8.137497505992615e-06, "loss": 0.6263, "step": 25558 }, { "epoch": 0.3055871064933823, "grad_norm": 19.277793884277344, "learning_rate": 8.137346750066814e-06, "loss": 0.5923, "step": 25559 }, { "epoch": 0.30559906263824294, "grad_norm": 3.2091562747955322, "learning_rate": 8.137195989436552e-06, "loss": 0.6177, "step": 25560 }, { "epoch": 0.3056110187831036, "grad_norm": 3.1147079467773438, "learning_rate": 8.137045224102053e-06, "loss": 0.5701, "step": 25561 }, { "epoch": 0.30562297492796425, "grad_norm": 2.1136586666107178, "learning_rate": 8.136894454063545e-06, "loss": 0.719, "step": 25562 }, { "epoch": 0.30563493107282486, "grad_norm": 3.3728418350219727, "learning_rate": 8.136743679321251e-06, "loss": 0.6598, "step": 25563 }, { "epoch": 0.3056468872176855, "grad_norm": 5.918145656585693, "learning_rate": 8.136592899875397e-06, "loss": 0.6379, "step": 25564 }, { "epoch": 0.3056588433625462, "grad_norm": 2.7910797595977783, "learning_rate": 8.136442115726212e-06, "loss": 0.4579, "step": 25565 }, { "epoch": 0.30567079950740683, "grad_norm": 2.4820351600646973, "learning_rate": 8.136291326873923e-06, "loss": 0.5771, "step": 25566 }, { "epoch": 0.3056827556522675, "grad_norm": 1.8851890563964844, "learning_rate": 8.136140533318753e-06, "loss": 0.6122, "step": 25567 }, { "epoch": 0.30569471179712815, "grad_norm": 2.34745454788208, "learning_rate": 8.135989735060929e-06, "loss": 0.6882, "step": 25568 }, { "epoch": 0.3057066679419888, "grad_norm": 1.8810282945632935, "learning_rate": 8.135838932100678e-06, "loss": 0.5906, "step": 25569 }, { "epoch": 0.3057186240868494, "grad_norm": 2.3297412395477295, "learning_rate": 8.135688124438223e-06, "loss": 0.5911, "step": 25570 }, { "epoch": 0.30573058023171007, "grad_norm": 3.3253467082977295, "learning_rate": 8.135537312073794e-06, "loss": 0.6855, "step": 25571 }, { "epoch": 0.30574253637657073, "grad_norm": 3.864858388900757, "learning_rate": 8.135386495007616e-06, "loss": 0.6546, "step": 25572 }, { "epoch": 0.3057544925214314, "grad_norm": 3.650747776031494, "learning_rate": 8.135235673239913e-06, "loss": 0.6244, "step": 25573 }, { "epoch": 0.30576644866629205, "grad_norm": 4.977535724639893, "learning_rate": 8.135084846770915e-06, "loss": 0.6111, "step": 25574 }, { "epoch": 0.3057784048111527, "grad_norm": 1.906297206878662, "learning_rate": 8.134934015600845e-06, "loss": 0.5715, "step": 25575 }, { "epoch": 0.30579036095601336, "grad_norm": 2.7639997005462646, "learning_rate": 8.134783179729931e-06, "loss": 0.6583, "step": 25576 }, { "epoch": 0.305802317100874, "grad_norm": 1.746888518333435, "learning_rate": 8.134632339158398e-06, "loss": 0.5706, "step": 25577 }, { "epoch": 0.3058142732457346, "grad_norm": 5.041322708129883, "learning_rate": 8.134481493886473e-06, "loss": 0.6953, "step": 25578 }, { "epoch": 0.3058262293905953, "grad_norm": 3.0154600143432617, "learning_rate": 8.13433064391438e-06, "loss": 0.5887, "step": 25579 }, { "epoch": 0.30583818553545594, "grad_norm": 2.4225246906280518, "learning_rate": 8.13417978924235e-06, "loss": 0.5552, "step": 25580 }, { "epoch": 0.3058501416803166, "grad_norm": 2.385308265686035, "learning_rate": 8.134028929870605e-06, "loss": 0.6438, "step": 25581 }, { "epoch": 0.30586209782517726, "grad_norm": 2.2195467948913574, "learning_rate": 8.133878065799373e-06, "loss": 0.649, "step": 25582 }, { "epoch": 0.3058740539700379, "grad_norm": 2.498755931854248, "learning_rate": 8.13372719702888e-06, "loss": 0.6104, "step": 25583 }, { "epoch": 0.3058860101148986, "grad_norm": 3.1747915744781494, "learning_rate": 8.13357632355935e-06, "loss": 0.5682, "step": 25584 }, { "epoch": 0.3058979662597592, "grad_norm": 1.4670095443725586, "learning_rate": 8.133425445391012e-06, "loss": 0.6341, "step": 25585 }, { "epoch": 0.30590992240461984, "grad_norm": 2.5929853916168213, "learning_rate": 8.133274562524092e-06, "loss": 0.5553, "step": 25586 }, { "epoch": 0.3059218785494805, "grad_norm": 2.602316379547119, "learning_rate": 8.133123674958815e-06, "loss": 0.635, "step": 25587 }, { "epoch": 0.30593383469434116, "grad_norm": 3.2560105323791504, "learning_rate": 8.132972782695408e-06, "loss": 0.6945, "step": 25588 }, { "epoch": 0.3059457908392018, "grad_norm": 1.735974907875061, "learning_rate": 8.1328218857341e-06, "loss": 0.5869, "step": 25589 }, { "epoch": 0.3059577469840625, "grad_norm": 5.7548747062683105, "learning_rate": 8.132670984075111e-06, "loss": 0.6868, "step": 25590 }, { "epoch": 0.30596970312892313, "grad_norm": 1.5765315294265747, "learning_rate": 8.132520077718671e-06, "loss": 0.5581, "step": 25591 }, { "epoch": 0.30598165927378373, "grad_norm": 3.505389451980591, "learning_rate": 8.132369166665007e-06, "loss": 0.6161, "step": 25592 }, { "epoch": 0.3059936154186444, "grad_norm": 2.0892014503479004, "learning_rate": 8.132218250914343e-06, "loss": 0.5096, "step": 25593 }, { "epoch": 0.30600557156350505, "grad_norm": 1.7944785356521606, "learning_rate": 8.132067330466909e-06, "loss": 0.5886, "step": 25594 }, { "epoch": 0.3060175277083657, "grad_norm": 7.849276065826416, "learning_rate": 8.131916405322926e-06, "loss": 0.6055, "step": 25595 }, { "epoch": 0.30602948385322637, "grad_norm": 1.5329022407531738, "learning_rate": 8.131765475482624e-06, "loss": 0.5935, "step": 25596 }, { "epoch": 0.30604143999808703, "grad_norm": 2.082993268966675, "learning_rate": 8.131614540946228e-06, "loss": 0.503, "step": 25597 }, { "epoch": 0.3060533961429477, "grad_norm": 8.466001510620117, "learning_rate": 8.13146360171397e-06, "loss": 0.5837, "step": 25598 }, { "epoch": 0.30606535228780835, "grad_norm": 2.3747754096984863, "learning_rate": 8.131312657786065e-06, "loss": 0.6278, "step": 25599 }, { "epoch": 0.30607730843266895, "grad_norm": 10.198386192321777, "learning_rate": 8.131161709162747e-06, "loss": 0.5643, "step": 25600 }, { "epoch": 0.3060892645775296, "grad_norm": 2.463094472885132, "learning_rate": 8.131010755844243e-06, "loss": 0.6168, "step": 25601 }, { "epoch": 0.30610122072239027, "grad_norm": 8.483623504638672, "learning_rate": 8.130859797830775e-06, "loss": 0.6158, "step": 25602 }, { "epoch": 0.3061131768672509, "grad_norm": 1.8895903825759888, "learning_rate": 8.130708835122573e-06, "loss": 0.5425, "step": 25603 }, { "epoch": 0.3061251330121116, "grad_norm": 2.5646491050720215, "learning_rate": 8.130557867719862e-06, "loss": 0.6671, "step": 25604 }, { "epoch": 0.30613708915697224, "grad_norm": 4.870854377746582, "learning_rate": 8.130406895622868e-06, "loss": 0.5523, "step": 25605 }, { "epoch": 0.3061490453018329, "grad_norm": 10.648587226867676, "learning_rate": 8.130255918831818e-06, "loss": 0.5267, "step": 25606 }, { "epoch": 0.3061610014466935, "grad_norm": 1.6076911687850952, "learning_rate": 8.130104937346937e-06, "loss": 0.6661, "step": 25607 }, { "epoch": 0.30617295759155416, "grad_norm": 2.7233901023864746, "learning_rate": 8.129953951168452e-06, "loss": 0.6038, "step": 25608 }, { "epoch": 0.3061849137364148, "grad_norm": 3.2355034351348877, "learning_rate": 8.129802960296592e-06, "loss": 0.5786, "step": 25609 }, { "epoch": 0.3061968698812755, "grad_norm": 6.2895684242248535, "learning_rate": 8.129651964731579e-06, "loss": 0.6307, "step": 25610 }, { "epoch": 0.30620882602613614, "grad_norm": 2.435241937637329, "learning_rate": 8.129500964473644e-06, "loss": 0.5718, "step": 25611 }, { "epoch": 0.3062207821709968, "grad_norm": 1.8375779390335083, "learning_rate": 8.129349959523011e-06, "loss": 0.6159, "step": 25612 }, { "epoch": 0.30623273831585746, "grad_norm": 2.0320627689361572, "learning_rate": 8.129198949879906e-06, "loss": 0.6079, "step": 25613 }, { "epoch": 0.30624469446071806, "grad_norm": 2.7198524475097656, "learning_rate": 8.129047935544556e-06, "loss": 0.5714, "step": 25614 }, { "epoch": 0.3062566506055787, "grad_norm": 4.4287309646606445, "learning_rate": 8.128896916517188e-06, "loss": 0.5929, "step": 25615 }, { "epoch": 0.3062686067504394, "grad_norm": 2.6372790336608887, "learning_rate": 8.128745892798026e-06, "loss": 0.5923, "step": 25616 }, { "epoch": 0.30628056289530003, "grad_norm": 2.072234869003296, "learning_rate": 8.128594864387301e-06, "loss": 0.5997, "step": 25617 }, { "epoch": 0.3062925190401607, "grad_norm": 5.8816423416137695, "learning_rate": 8.128443831285235e-06, "loss": 0.6833, "step": 25618 }, { "epoch": 0.30630447518502135, "grad_norm": 3.7222816944122314, "learning_rate": 8.128292793492059e-06, "loss": 0.5802, "step": 25619 }, { "epoch": 0.306316431329882, "grad_norm": 4.120509624481201, "learning_rate": 8.128141751007995e-06, "loss": 0.5499, "step": 25620 }, { "epoch": 0.30632838747474267, "grad_norm": 1.979430913925171, "learning_rate": 8.127990703833271e-06, "loss": 0.6205, "step": 25621 }, { "epoch": 0.3063403436196033, "grad_norm": 4.30220365524292, "learning_rate": 8.127839651968114e-06, "loss": 0.6307, "step": 25622 }, { "epoch": 0.30635229976446393, "grad_norm": 2.0472896099090576, "learning_rate": 8.127688595412751e-06, "loss": 0.6316, "step": 25623 }, { "epoch": 0.3063642559093246, "grad_norm": 2.686119318008423, "learning_rate": 8.127537534167407e-06, "loss": 0.6575, "step": 25624 }, { "epoch": 0.30637621205418525, "grad_norm": 2.0236001014709473, "learning_rate": 8.127386468232308e-06, "loss": 0.5739, "step": 25625 }, { "epoch": 0.3063881681990459, "grad_norm": 3.1461689472198486, "learning_rate": 8.127235397607684e-06, "loss": 0.6149, "step": 25626 }, { "epoch": 0.30640012434390657, "grad_norm": 2.0905609130859375, "learning_rate": 8.12708432229376e-06, "loss": 0.6043, "step": 25627 }, { "epoch": 0.3064120804887672, "grad_norm": 4.342745780944824, "learning_rate": 8.12693324229076e-06, "loss": 0.6095, "step": 25628 }, { "epoch": 0.3064240366336278, "grad_norm": 3.159379005432129, "learning_rate": 8.126782157598914e-06, "loss": 0.6019, "step": 25629 }, { "epoch": 0.3064359927784885, "grad_norm": 9.088698387145996, "learning_rate": 8.126631068218447e-06, "loss": 0.5557, "step": 25630 }, { "epoch": 0.30644794892334914, "grad_norm": 4.228440284729004, "learning_rate": 8.126479974149584e-06, "loss": 0.6181, "step": 25631 }, { "epoch": 0.3064599050682098, "grad_norm": 4.624890327453613, "learning_rate": 8.126328875392554e-06, "loss": 0.6964, "step": 25632 }, { "epoch": 0.30647186121307046, "grad_norm": 8.464534759521484, "learning_rate": 8.126177771947584e-06, "loss": 0.6334, "step": 25633 }, { "epoch": 0.3064838173579311, "grad_norm": 2.0777831077575684, "learning_rate": 8.126026663814897e-06, "loss": 0.6481, "step": 25634 }, { "epoch": 0.3064957735027918, "grad_norm": 2.3790504932403564, "learning_rate": 8.125875550994723e-06, "loss": 0.6452, "step": 25635 }, { "epoch": 0.30650772964765244, "grad_norm": 3.6469404697418213, "learning_rate": 8.125724433487288e-06, "loss": 0.5817, "step": 25636 }, { "epoch": 0.30651968579251304, "grad_norm": 3.3531363010406494, "learning_rate": 8.125573311292818e-06, "loss": 0.6764, "step": 25637 }, { "epoch": 0.3065316419373737, "grad_norm": 4.596226215362549, "learning_rate": 8.12542218441154e-06, "loss": 0.5358, "step": 25638 }, { "epoch": 0.30654359808223436, "grad_norm": 4.6975998878479, "learning_rate": 8.125271052843679e-06, "loss": 0.7297, "step": 25639 }, { "epoch": 0.306555554227095, "grad_norm": 5.274006366729736, "learning_rate": 8.125119916589463e-06, "loss": 0.5643, "step": 25640 }, { "epoch": 0.3065675103719557, "grad_norm": 2.9855518341064453, "learning_rate": 8.12496877564912e-06, "loss": 0.6285, "step": 25641 }, { "epoch": 0.30657946651681633, "grad_norm": 6.788904190063477, "learning_rate": 8.124817630022874e-06, "loss": 0.6152, "step": 25642 }, { "epoch": 0.306591422661677, "grad_norm": 3.5726406574249268, "learning_rate": 8.124666479710954e-06, "loss": 0.627, "step": 25643 }, { "epoch": 0.3066033788065376, "grad_norm": 3.146124839782715, "learning_rate": 8.124515324713585e-06, "loss": 0.6723, "step": 25644 }, { "epoch": 0.30661533495139826, "grad_norm": 3.5074546337127686, "learning_rate": 8.124364165030993e-06, "loss": 0.6587, "step": 25645 }, { "epoch": 0.3066272910962589, "grad_norm": 2.3050286769866943, "learning_rate": 8.124213000663408e-06, "loss": 0.5825, "step": 25646 }, { "epoch": 0.3066392472411196, "grad_norm": 6.031650066375732, "learning_rate": 8.124061831611053e-06, "loss": 0.6302, "step": 25647 }, { "epoch": 0.30665120338598023, "grad_norm": 2.6602413654327393, "learning_rate": 8.123910657874157e-06, "loss": 0.6479, "step": 25648 }, { "epoch": 0.3066631595308409, "grad_norm": 3.113240957260132, "learning_rate": 8.123759479452945e-06, "loss": 0.549, "step": 25649 }, { "epoch": 0.30667511567570155, "grad_norm": 4.027711391448975, "learning_rate": 8.123608296347644e-06, "loss": 0.7233, "step": 25650 }, { "epoch": 0.30668707182056215, "grad_norm": 7.164718151092529, "learning_rate": 8.123457108558484e-06, "loss": 0.5884, "step": 25651 }, { "epoch": 0.3066990279654228, "grad_norm": 2.999770402908325, "learning_rate": 8.123305916085686e-06, "loss": 0.5576, "step": 25652 }, { "epoch": 0.30671098411028347, "grad_norm": 5.534820556640625, "learning_rate": 8.123154718929481e-06, "loss": 0.6406, "step": 25653 }, { "epoch": 0.3067229402551441, "grad_norm": 7.075750350952148, "learning_rate": 8.123003517090094e-06, "loss": 0.6208, "step": 25654 }, { "epoch": 0.3067348964000048, "grad_norm": 2.463325262069702, "learning_rate": 8.122852310567752e-06, "loss": 0.562, "step": 25655 }, { "epoch": 0.30674685254486544, "grad_norm": 2.1751792430877686, "learning_rate": 8.122701099362683e-06, "loss": 0.6411, "step": 25656 }, { "epoch": 0.3067588086897261, "grad_norm": 2.418020248413086, "learning_rate": 8.122549883475111e-06, "loss": 0.637, "step": 25657 }, { "epoch": 0.30677076483458676, "grad_norm": 3.9638121128082275, "learning_rate": 8.122398662905267e-06, "loss": 0.5121, "step": 25658 }, { "epoch": 0.30678272097944737, "grad_norm": 1.832165241241455, "learning_rate": 8.122247437653372e-06, "loss": 0.5631, "step": 25659 }, { "epoch": 0.306794677124308, "grad_norm": 4.317789554595947, "learning_rate": 8.122096207719657e-06, "loss": 0.5958, "step": 25660 }, { "epoch": 0.3068066332691687, "grad_norm": 3.0421864986419678, "learning_rate": 8.121944973104348e-06, "loss": 0.6169, "step": 25661 }, { "epoch": 0.30681858941402934, "grad_norm": 6.260060787200928, "learning_rate": 8.121793733807673e-06, "loss": 0.5754, "step": 25662 }, { "epoch": 0.30683054555889, "grad_norm": 3.6793980598449707, "learning_rate": 8.121642489829855e-06, "loss": 0.5378, "step": 25663 }, { "epoch": 0.30684250170375066, "grad_norm": 5.783364772796631, "learning_rate": 8.121491241171123e-06, "loss": 0.5222, "step": 25664 }, { "epoch": 0.3068544578486113, "grad_norm": 4.410067558288574, "learning_rate": 8.121339987831703e-06, "loss": 0.6042, "step": 25665 }, { "epoch": 0.3068664139934719, "grad_norm": 2.732976198196411, "learning_rate": 8.121188729811824e-06, "loss": 0.6112, "step": 25666 }, { "epoch": 0.3068783701383326, "grad_norm": 2.9910855293273926, "learning_rate": 8.121037467111711e-06, "loss": 0.5598, "step": 25667 }, { "epoch": 0.30689032628319324, "grad_norm": 2.603518486022949, "learning_rate": 8.120886199731593e-06, "loss": 0.5893, "step": 25668 }, { "epoch": 0.3069022824280539, "grad_norm": 2.326932668685913, "learning_rate": 8.120734927671693e-06, "loss": 0.6457, "step": 25669 }, { "epoch": 0.30691423857291456, "grad_norm": 2.4238314628601074, "learning_rate": 8.120583650932238e-06, "loss": 0.5316, "step": 25670 }, { "epoch": 0.3069261947177752, "grad_norm": 5.234060287475586, "learning_rate": 8.120432369513461e-06, "loss": 0.5787, "step": 25671 }, { "epoch": 0.3069381508626359, "grad_norm": 2.2813522815704346, "learning_rate": 8.120281083415583e-06, "loss": 0.5344, "step": 25672 }, { "epoch": 0.3069501070074965, "grad_norm": 1.644656777381897, "learning_rate": 8.12012979263883e-06, "loss": 0.5631, "step": 25673 }, { "epoch": 0.30696206315235713, "grad_norm": 2.158799171447754, "learning_rate": 8.119978497183436e-06, "loss": 0.7145, "step": 25674 }, { "epoch": 0.3069740192972178, "grad_norm": 4.283261299133301, "learning_rate": 8.119827197049618e-06, "loss": 0.5061, "step": 25675 }, { "epoch": 0.30698597544207845, "grad_norm": 3.2769832611083984, "learning_rate": 8.119675892237613e-06, "loss": 0.5562, "step": 25676 }, { "epoch": 0.3069979315869391, "grad_norm": 1.6593903303146362, "learning_rate": 8.119524582747639e-06, "loss": 0.5756, "step": 25677 }, { "epoch": 0.30700988773179977, "grad_norm": 1.7536413669586182, "learning_rate": 8.11937326857993e-06, "loss": 0.5699, "step": 25678 }, { "epoch": 0.3070218438766604, "grad_norm": 3.3311827182769775, "learning_rate": 8.119221949734707e-06, "loss": 0.5662, "step": 25679 }, { "epoch": 0.3070338000215211, "grad_norm": 3.5183873176574707, "learning_rate": 8.119070626212201e-06, "loss": 0.605, "step": 25680 }, { "epoch": 0.3070457561663817, "grad_norm": 2.5405757427215576, "learning_rate": 8.118919298012637e-06, "loss": 0.6152, "step": 25681 }, { "epoch": 0.30705771231124235, "grad_norm": 2.7919700145721436, "learning_rate": 8.118767965136243e-06, "loss": 0.5866, "step": 25682 }, { "epoch": 0.307069668456103, "grad_norm": 2.652547597885132, "learning_rate": 8.118616627583244e-06, "loss": 0.6367, "step": 25683 }, { "epoch": 0.30708162460096367, "grad_norm": 5.427772521972656, "learning_rate": 8.11846528535387e-06, "loss": 0.6039, "step": 25684 }, { "epoch": 0.3070935807458243, "grad_norm": 2.187967538833618, "learning_rate": 8.118313938448345e-06, "loss": 0.6867, "step": 25685 }, { "epoch": 0.307105536890685, "grad_norm": 2.4756736755371094, "learning_rate": 8.118162586866897e-06, "loss": 0.6164, "step": 25686 }, { "epoch": 0.30711749303554564, "grad_norm": 3.2958381175994873, "learning_rate": 8.118011230609754e-06, "loss": 0.5553, "step": 25687 }, { "epoch": 0.30712944918040624, "grad_norm": 4.470081329345703, "learning_rate": 8.11785986967714e-06, "loss": 0.5562, "step": 25688 }, { "epoch": 0.3071414053252669, "grad_norm": 1.6484636068344116, "learning_rate": 8.117708504069285e-06, "loss": 0.608, "step": 25689 }, { "epoch": 0.30715336147012756, "grad_norm": 4.625491142272949, "learning_rate": 8.117557133786419e-06, "loss": 0.708, "step": 25690 }, { "epoch": 0.3071653176149882, "grad_norm": 3.592231035232544, "learning_rate": 8.11740575882876e-06, "loss": 0.658, "step": 25691 }, { "epoch": 0.3071772737598489, "grad_norm": 4.987026691436768, "learning_rate": 8.117254379196542e-06, "loss": 0.5908, "step": 25692 }, { "epoch": 0.30718922990470954, "grad_norm": 1.71010422706604, "learning_rate": 8.11710299488999e-06, "loss": 0.571, "step": 25693 }, { "epoch": 0.3072011860495702, "grad_norm": 3.5857274532318115, "learning_rate": 8.116951605909331e-06, "loss": 0.5419, "step": 25694 }, { "epoch": 0.30721314219443085, "grad_norm": 3.540311098098755, "learning_rate": 8.116800212254791e-06, "loss": 0.5697, "step": 25695 }, { "epoch": 0.30722509833929146, "grad_norm": 2.4994561672210693, "learning_rate": 8.1166488139266e-06, "loss": 0.7483, "step": 25696 }, { "epoch": 0.3072370544841521, "grad_norm": 1.8829245567321777, "learning_rate": 8.116497410924981e-06, "loss": 0.6211, "step": 25697 }, { "epoch": 0.3072490106290128, "grad_norm": 1.9939196109771729, "learning_rate": 8.116346003250163e-06, "loss": 0.5756, "step": 25698 }, { "epoch": 0.30726096677387343, "grad_norm": 3.49554181098938, "learning_rate": 8.116194590902375e-06, "loss": 0.5934, "step": 25699 }, { "epoch": 0.3072729229187341, "grad_norm": 2.201310396194458, "learning_rate": 8.11604317388184e-06, "loss": 0.6058, "step": 25700 }, { "epoch": 0.30728487906359475, "grad_norm": 2.3216075897216797, "learning_rate": 8.11589175218879e-06, "loss": 0.6635, "step": 25701 }, { "epoch": 0.3072968352084554, "grad_norm": 1.9242514371871948, "learning_rate": 8.115740325823444e-06, "loss": 0.5635, "step": 25702 }, { "epoch": 0.307308791353316, "grad_norm": 2.2909014225006104, "learning_rate": 8.11558889478604e-06, "loss": 0.6227, "step": 25703 }, { "epoch": 0.30732074749817667, "grad_norm": 5.219910621643066, "learning_rate": 8.115437459076797e-06, "loss": 0.6224, "step": 25704 }, { "epoch": 0.30733270364303733, "grad_norm": 2.441683530807495, "learning_rate": 8.115286018695945e-06, "loss": 0.5503, "step": 25705 }, { "epoch": 0.307344659787898, "grad_norm": 2.0656323432922363, "learning_rate": 8.11513457364371e-06, "loss": 0.472, "step": 25706 }, { "epoch": 0.30735661593275865, "grad_norm": 1.987342119216919, "learning_rate": 8.114983123920318e-06, "loss": 0.5273, "step": 25707 }, { "epoch": 0.3073685720776193, "grad_norm": 81.0356216430664, "learning_rate": 8.114831669526001e-06, "loss": 0.6179, "step": 25708 }, { "epoch": 0.30738052822247997, "grad_norm": 2.3363935947418213, "learning_rate": 8.114680210460981e-06, "loss": 0.5991, "step": 25709 }, { "epoch": 0.30739248436734057, "grad_norm": 2.648911952972412, "learning_rate": 8.114528746725488e-06, "loss": 0.646, "step": 25710 }, { "epoch": 0.3074044405122012, "grad_norm": 3.5403194427490234, "learning_rate": 8.114377278319746e-06, "loss": 0.6091, "step": 25711 }, { "epoch": 0.3074163966570619, "grad_norm": 2.9957141876220703, "learning_rate": 8.114225805243987e-06, "loss": 0.5405, "step": 25712 }, { "epoch": 0.30742835280192254, "grad_norm": 4.105926513671875, "learning_rate": 8.114074327498433e-06, "loss": 0.5044, "step": 25713 }, { "epoch": 0.3074403089467832, "grad_norm": 2.6741561889648438, "learning_rate": 8.113922845083313e-06, "loss": 0.5362, "step": 25714 }, { "epoch": 0.30745226509164386, "grad_norm": 2.189060688018799, "learning_rate": 8.113771357998857e-06, "loss": 0.5717, "step": 25715 }, { "epoch": 0.3074642212365045, "grad_norm": 2.047447443008423, "learning_rate": 8.113619866245288e-06, "loss": 0.5244, "step": 25716 }, { "epoch": 0.3074761773813652, "grad_norm": 2.8542892932891846, "learning_rate": 8.113468369822837e-06, "loss": 0.5486, "step": 25717 }, { "epoch": 0.3074881335262258, "grad_norm": 1.8934226036071777, "learning_rate": 8.113316868731727e-06, "loss": 0.5851, "step": 25718 }, { "epoch": 0.30750008967108644, "grad_norm": 1.7069679498672485, "learning_rate": 8.113165362972188e-06, "loss": 0.5136, "step": 25719 }, { "epoch": 0.3075120458159471, "grad_norm": 3.1831562519073486, "learning_rate": 8.113013852544447e-06, "loss": 0.6142, "step": 25720 }, { "epoch": 0.30752400196080776, "grad_norm": 2.160123109817505, "learning_rate": 8.11286233744873e-06, "loss": 0.5867, "step": 25721 }, { "epoch": 0.3075359581056684, "grad_norm": 3.4304819107055664, "learning_rate": 8.112710817685265e-06, "loss": 0.58, "step": 25722 }, { "epoch": 0.3075479142505291, "grad_norm": 2.6032233238220215, "learning_rate": 8.112559293254279e-06, "loss": 0.5373, "step": 25723 }, { "epoch": 0.30755987039538973, "grad_norm": 2.8118298053741455, "learning_rate": 8.112407764156e-06, "loss": 0.6207, "step": 25724 }, { "epoch": 0.30757182654025034, "grad_norm": 4.710904598236084, "learning_rate": 8.112256230390654e-06, "loss": 0.7237, "step": 25725 }, { "epoch": 0.307583782685111, "grad_norm": 5.077353477478027, "learning_rate": 8.11210469195847e-06, "loss": 0.5321, "step": 25726 }, { "epoch": 0.30759573882997165, "grad_norm": 7.874357223510742, "learning_rate": 8.111953148859672e-06, "loss": 0.5432, "step": 25727 }, { "epoch": 0.3076076949748323, "grad_norm": 2.6928646564483643, "learning_rate": 8.111801601094488e-06, "loss": 0.5411, "step": 25728 }, { "epoch": 0.30761965111969297, "grad_norm": 2.1918745040893555, "learning_rate": 8.11165004866315e-06, "loss": 0.6425, "step": 25729 }, { "epoch": 0.30763160726455363, "grad_norm": 3.768575429916382, "learning_rate": 8.111498491565879e-06, "loss": 0.5507, "step": 25730 }, { "epoch": 0.3076435634094143, "grad_norm": 2.0340235233306885, "learning_rate": 8.111346929802905e-06, "loss": 0.5944, "step": 25731 }, { "epoch": 0.3076555195542749, "grad_norm": 2.839756965637207, "learning_rate": 8.111195363374459e-06, "loss": 0.6063, "step": 25732 }, { "epoch": 0.30766747569913555, "grad_norm": 3.4103474617004395, "learning_rate": 8.11104379228076e-06, "loss": 0.5933, "step": 25733 }, { "epoch": 0.3076794318439962, "grad_norm": 2.4354422092437744, "learning_rate": 8.11089221652204e-06, "loss": 0.521, "step": 25734 }, { "epoch": 0.30769138798885687, "grad_norm": 2.792412757873535, "learning_rate": 8.110740636098529e-06, "loss": 0.6248, "step": 25735 }, { "epoch": 0.3077033441337175, "grad_norm": 1.553418517112732, "learning_rate": 8.110589051010448e-06, "loss": 0.5575, "step": 25736 }, { "epoch": 0.3077153002785782, "grad_norm": 3.1069130897521973, "learning_rate": 8.110437461258031e-06, "loss": 0.5341, "step": 25737 }, { "epoch": 0.30772725642343884, "grad_norm": 2.463381052017212, "learning_rate": 8.1102858668415e-06, "loss": 0.5332, "step": 25738 }, { "epoch": 0.3077392125682995, "grad_norm": 1.8801757097244263, "learning_rate": 8.110134267761086e-06, "loss": 0.5998, "step": 25739 }, { "epoch": 0.3077511687131601, "grad_norm": 2.5073399543762207, "learning_rate": 8.109982664017013e-06, "loss": 0.5665, "step": 25740 }, { "epoch": 0.30776312485802076, "grad_norm": 1.4725539684295654, "learning_rate": 8.10983105560951e-06, "loss": 0.5518, "step": 25741 }, { "epoch": 0.3077750810028814, "grad_norm": 1.7687054872512817, "learning_rate": 8.109679442538803e-06, "loss": 0.643, "step": 25742 }, { "epoch": 0.3077870371477421, "grad_norm": 2.135303020477295, "learning_rate": 8.109527824805124e-06, "loss": 0.6121, "step": 25743 }, { "epoch": 0.30779899329260274, "grad_norm": 2.725458860397339, "learning_rate": 8.109376202408694e-06, "loss": 0.6371, "step": 25744 }, { "epoch": 0.3078109494374634, "grad_norm": 1.9659215211868286, "learning_rate": 8.109224575349744e-06, "loss": 0.4929, "step": 25745 }, { "epoch": 0.30782290558232406, "grad_norm": 1.9069112539291382, "learning_rate": 8.109072943628502e-06, "loss": 0.6142, "step": 25746 }, { "epoch": 0.30783486172718466, "grad_norm": 1.6984984874725342, "learning_rate": 8.108921307245193e-06, "loss": 0.5225, "step": 25747 }, { "epoch": 0.3078468178720453, "grad_norm": 11.403671264648438, "learning_rate": 8.108769666200046e-06, "loss": 0.5802, "step": 25748 }, { "epoch": 0.307858774016906, "grad_norm": 2.5420610904693604, "learning_rate": 8.108618020493288e-06, "loss": 0.7133, "step": 25749 }, { "epoch": 0.30787073016176664, "grad_norm": 7.000823497772217, "learning_rate": 8.108466370125145e-06, "loss": 0.6352, "step": 25750 }, { "epoch": 0.3078826863066273, "grad_norm": 1.9200257062911987, "learning_rate": 8.108314715095846e-06, "loss": 0.5771, "step": 25751 }, { "epoch": 0.30789464245148795, "grad_norm": 2.4323606491088867, "learning_rate": 8.108163055405618e-06, "loss": 0.6336, "step": 25752 }, { "epoch": 0.3079065985963486, "grad_norm": 1.5463217496871948, "learning_rate": 8.10801139105469e-06, "loss": 0.527, "step": 25753 }, { "epoch": 0.30791855474120927, "grad_norm": 1.8404244184494019, "learning_rate": 8.107859722043287e-06, "loss": 0.5554, "step": 25754 }, { "epoch": 0.3079305108860699, "grad_norm": 1.938107967376709, "learning_rate": 8.107708048371636e-06, "loss": 0.6608, "step": 25755 }, { "epoch": 0.30794246703093053, "grad_norm": 5.4258856773376465, "learning_rate": 8.107556370039966e-06, "loss": 0.6893, "step": 25756 }, { "epoch": 0.3079544231757912, "grad_norm": 1.9214774370193481, "learning_rate": 8.107404687048506e-06, "loss": 0.5452, "step": 25757 }, { "epoch": 0.30796637932065185, "grad_norm": 1.7455803155899048, "learning_rate": 8.107252999397481e-06, "loss": 0.6328, "step": 25758 }, { "epoch": 0.3079783354655125, "grad_norm": 4.93098783493042, "learning_rate": 8.107101307087118e-06, "loss": 0.5514, "step": 25759 }, { "epoch": 0.30799029161037317, "grad_norm": 1.4989680051803589, "learning_rate": 8.106949610117647e-06, "loss": 0.5959, "step": 25760 }, { "epoch": 0.3080022477552338, "grad_norm": 2.322356939315796, "learning_rate": 8.106797908489292e-06, "loss": 0.5636, "step": 25761 }, { "epoch": 0.30801420390009443, "grad_norm": 1.6432603597640991, "learning_rate": 8.106646202202284e-06, "loss": 0.6017, "step": 25762 }, { "epoch": 0.3080261600449551, "grad_norm": 1.705017328262329, "learning_rate": 8.106494491256849e-06, "loss": 0.5358, "step": 25763 }, { "epoch": 0.30803811618981575, "grad_norm": 2.676032304763794, "learning_rate": 8.106342775653214e-06, "loss": 0.5698, "step": 25764 }, { "epoch": 0.3080500723346764, "grad_norm": 5.213281631469727, "learning_rate": 8.106191055391609e-06, "loss": 0.588, "step": 25765 }, { "epoch": 0.30806202847953706, "grad_norm": 1.4471772909164429, "learning_rate": 8.106039330472256e-06, "loss": 0.5753, "step": 25766 }, { "epoch": 0.3080739846243977, "grad_norm": 3.1009140014648438, "learning_rate": 8.105887600895387e-06, "loss": 0.5748, "step": 25767 }, { "epoch": 0.3080859407692584, "grad_norm": 2.072899580001831, "learning_rate": 8.10573586666123e-06, "loss": 0.5525, "step": 25768 }, { "epoch": 0.308097896914119, "grad_norm": 3.1714677810668945, "learning_rate": 8.10558412777001e-06, "loss": 0.605, "step": 25769 }, { "epoch": 0.30810985305897964, "grad_norm": 1.8948619365692139, "learning_rate": 8.105432384221956e-06, "loss": 0.6468, "step": 25770 }, { "epoch": 0.3081218092038403, "grad_norm": 2.6217470169067383, "learning_rate": 8.105280636017294e-06, "loss": 0.6308, "step": 25771 }, { "epoch": 0.30813376534870096, "grad_norm": 2.6028521060943604, "learning_rate": 8.105128883156254e-06, "loss": 0.7024, "step": 25772 }, { "epoch": 0.3081457214935616, "grad_norm": 2.478372812271118, "learning_rate": 8.104977125639062e-06, "loss": 0.5391, "step": 25773 }, { "epoch": 0.3081576776384223, "grad_norm": 5.024886131286621, "learning_rate": 8.104825363465945e-06, "loss": 0.7625, "step": 25774 }, { "epoch": 0.30816963378328294, "grad_norm": 2.2551867961883545, "learning_rate": 8.104673596637131e-06, "loss": 0.5836, "step": 25775 }, { "epoch": 0.3081815899281436, "grad_norm": 2.2140190601348877, "learning_rate": 8.104521825152849e-06, "loss": 0.6481, "step": 25776 }, { "epoch": 0.3081935460730042, "grad_norm": 3.5408663749694824, "learning_rate": 8.104370049013325e-06, "loss": 0.5794, "step": 25777 }, { "epoch": 0.30820550221786486, "grad_norm": 2.4120452404022217, "learning_rate": 8.104218268218787e-06, "loss": 0.6195, "step": 25778 }, { "epoch": 0.3082174583627255, "grad_norm": 1.7746778726577759, "learning_rate": 8.104066482769462e-06, "loss": 0.6537, "step": 25779 }, { "epoch": 0.3082294145075862, "grad_norm": 1.8419111967086792, "learning_rate": 8.10391469266558e-06, "loss": 0.6369, "step": 25780 }, { "epoch": 0.30824137065244683, "grad_norm": 2.048729658126831, "learning_rate": 8.103762897907365e-06, "loss": 0.5659, "step": 25781 }, { "epoch": 0.3082533267973075, "grad_norm": 2.389094352722168, "learning_rate": 8.103611098495048e-06, "loss": 0.5844, "step": 25782 }, { "epoch": 0.30826528294216815, "grad_norm": 1.4643118381500244, "learning_rate": 8.103459294428855e-06, "loss": 0.6864, "step": 25783 }, { "epoch": 0.30827723908702875, "grad_norm": 1.9768859148025513, "learning_rate": 8.103307485709014e-06, "loss": 0.5612, "step": 25784 }, { "epoch": 0.3082891952318894, "grad_norm": 1.276037335395813, "learning_rate": 8.103155672335752e-06, "loss": 0.5445, "step": 25785 }, { "epoch": 0.30830115137675007, "grad_norm": 2.057097911834717, "learning_rate": 8.103003854309296e-06, "loss": 0.5041, "step": 25786 }, { "epoch": 0.30831310752161073, "grad_norm": 1.6294450759887695, "learning_rate": 8.102852031629876e-06, "loss": 0.586, "step": 25787 }, { "epoch": 0.3083250636664714, "grad_norm": 3.531838893890381, "learning_rate": 8.102700204297718e-06, "loss": 0.5568, "step": 25788 }, { "epoch": 0.30833701981133205, "grad_norm": 4.976775169372559, "learning_rate": 8.10254837231305e-06, "loss": 0.6062, "step": 25789 }, { "epoch": 0.3083489759561927, "grad_norm": 17.6435489654541, "learning_rate": 8.1023965356761e-06, "loss": 0.6834, "step": 25790 }, { "epoch": 0.3083609321010533, "grad_norm": 2.2649199962615967, "learning_rate": 8.102244694387094e-06, "loss": 0.595, "step": 25791 }, { "epoch": 0.30837288824591397, "grad_norm": 1.9001251459121704, "learning_rate": 8.102092848446262e-06, "loss": 0.5887, "step": 25792 }, { "epoch": 0.3083848443907746, "grad_norm": 11.973470687866211, "learning_rate": 8.10194099785383e-06, "loss": 0.5788, "step": 25793 }, { "epoch": 0.3083968005356353, "grad_norm": 9.099292755126953, "learning_rate": 8.101789142610029e-06, "loss": 0.5931, "step": 25794 }, { "epoch": 0.30840875668049594, "grad_norm": 1.8432408571243286, "learning_rate": 8.101637282715081e-06, "loss": 0.579, "step": 25795 }, { "epoch": 0.3084207128253566, "grad_norm": 2.815030097961426, "learning_rate": 8.10148541816922e-06, "loss": 0.5348, "step": 25796 }, { "epoch": 0.30843266897021726, "grad_norm": 2.7767343521118164, "learning_rate": 8.101333548972668e-06, "loss": 0.502, "step": 25797 }, { "epoch": 0.3084446251150779, "grad_norm": 6.437219142913818, "learning_rate": 8.101181675125655e-06, "loss": 0.6554, "step": 25798 }, { "epoch": 0.3084565812599385, "grad_norm": 2.11014723777771, "learning_rate": 8.10102979662841e-06, "loss": 0.6189, "step": 25799 }, { "epoch": 0.3084685374047992, "grad_norm": 6.37424898147583, "learning_rate": 8.100877913481162e-06, "loss": 0.5548, "step": 25800 }, { "epoch": 0.30848049354965984, "grad_norm": 2.122699499130249, "learning_rate": 8.100726025684134e-06, "loss": 0.6079, "step": 25801 }, { "epoch": 0.3084924496945205, "grad_norm": 1.5386404991149902, "learning_rate": 8.100574133237555e-06, "loss": 0.6118, "step": 25802 }, { "epoch": 0.30850440583938116, "grad_norm": 1.6498081684112549, "learning_rate": 8.100422236141655e-06, "loss": 0.6853, "step": 25803 }, { "epoch": 0.3085163619842418, "grad_norm": 3.0328009128570557, "learning_rate": 8.100270334396663e-06, "loss": 0.6379, "step": 25804 }, { "epoch": 0.3085283181291025, "grad_norm": 2.1243722438812256, "learning_rate": 8.100118428002804e-06, "loss": 0.5435, "step": 25805 }, { "epoch": 0.3085402742739631, "grad_norm": 1.4853897094726562, "learning_rate": 8.099966516960305e-06, "loss": 0.5127, "step": 25806 }, { "epoch": 0.30855223041882374, "grad_norm": 3.9614908695220947, "learning_rate": 8.099814601269395e-06, "loss": 0.6187, "step": 25807 }, { "epoch": 0.3085641865636844, "grad_norm": 4.446424961090088, "learning_rate": 8.099662680930305e-06, "loss": 0.6601, "step": 25808 }, { "epoch": 0.30857614270854505, "grad_norm": 1.7703771591186523, "learning_rate": 8.099510755943258e-06, "loss": 0.683, "step": 25809 }, { "epoch": 0.3085880988534057, "grad_norm": 2.1955604553222656, "learning_rate": 8.099358826308483e-06, "loss": 0.6207, "step": 25810 }, { "epoch": 0.30860005499826637, "grad_norm": 2.4906930923461914, "learning_rate": 8.099206892026208e-06, "loss": 0.573, "step": 25811 }, { "epoch": 0.30861201114312703, "grad_norm": 2.0434906482696533, "learning_rate": 8.099054953096662e-06, "loss": 0.6053, "step": 25812 }, { "epoch": 0.3086239672879877, "grad_norm": 2.0399351119995117, "learning_rate": 8.098903009520072e-06, "loss": 0.6885, "step": 25813 }, { "epoch": 0.3086359234328483, "grad_norm": 1.962119221687317, "learning_rate": 8.098751061296666e-06, "loss": 0.5229, "step": 25814 }, { "epoch": 0.30864787957770895, "grad_norm": 1.61753511428833, "learning_rate": 8.098599108426673e-06, "loss": 0.5616, "step": 25815 }, { "epoch": 0.3086598357225696, "grad_norm": 1.940692663192749, "learning_rate": 8.098447150910318e-06, "loss": 0.5392, "step": 25816 }, { "epoch": 0.30867179186743027, "grad_norm": 2.0862245559692383, "learning_rate": 8.098295188747831e-06, "loss": 0.5925, "step": 25817 }, { "epoch": 0.3086837480122909, "grad_norm": 3.0692949295043945, "learning_rate": 8.098143221939438e-06, "loss": 0.571, "step": 25818 }, { "epoch": 0.3086957041571516, "grad_norm": 1.5767292976379395, "learning_rate": 8.097991250485369e-06, "loss": 0.5792, "step": 25819 }, { "epoch": 0.30870766030201224, "grad_norm": 1.8184317350387573, "learning_rate": 8.097839274385853e-06, "loss": 0.613, "step": 25820 }, { "epoch": 0.30871961644687285, "grad_norm": 1.7304444313049316, "learning_rate": 8.097687293641115e-06, "loss": 0.5539, "step": 25821 }, { "epoch": 0.3087315725917335, "grad_norm": 2.7026026248931885, "learning_rate": 8.097535308251382e-06, "loss": 0.6031, "step": 25822 }, { "epoch": 0.30874352873659416, "grad_norm": 3.1077208518981934, "learning_rate": 8.097383318216885e-06, "loss": 0.6765, "step": 25823 }, { "epoch": 0.3087554848814548, "grad_norm": 1.8316783905029297, "learning_rate": 8.09723132353785e-06, "loss": 0.6859, "step": 25824 }, { "epoch": 0.3087674410263155, "grad_norm": 1.7094955444335938, "learning_rate": 8.097079324214507e-06, "loss": 0.6673, "step": 25825 }, { "epoch": 0.30877939717117614, "grad_norm": 3.6158158779144287, "learning_rate": 8.096927320247083e-06, "loss": 0.6596, "step": 25826 }, { "epoch": 0.3087913533160368, "grad_norm": 1.6990671157836914, "learning_rate": 8.096775311635805e-06, "loss": 0.5551, "step": 25827 }, { "epoch": 0.3088033094608974, "grad_norm": 2.1807825565338135, "learning_rate": 8.0966232983809e-06, "loss": 0.5377, "step": 25828 }, { "epoch": 0.30881526560575806, "grad_norm": 1.5911617279052734, "learning_rate": 8.096471280482599e-06, "loss": 0.5839, "step": 25829 }, { "epoch": 0.3088272217506187, "grad_norm": 2.2995965480804443, "learning_rate": 8.096319257941127e-06, "loss": 0.5848, "step": 25830 }, { "epoch": 0.3088391778954794, "grad_norm": 2.1256933212280273, "learning_rate": 8.096167230756715e-06, "loss": 0.4998, "step": 25831 }, { "epoch": 0.30885113404034004, "grad_norm": 5.376012325286865, "learning_rate": 8.096015198929587e-06, "loss": 0.5801, "step": 25832 }, { "epoch": 0.3088630901852007, "grad_norm": 1.9205645322799683, "learning_rate": 8.095863162459976e-06, "loss": 0.5188, "step": 25833 }, { "epoch": 0.30887504633006135, "grad_norm": 1.8809436559677124, "learning_rate": 8.095711121348105e-06, "loss": 0.5739, "step": 25834 }, { "epoch": 0.308887002474922, "grad_norm": 1.542240023612976, "learning_rate": 8.095559075594205e-06, "loss": 0.586, "step": 25835 }, { "epoch": 0.3088989586197826, "grad_norm": 2.198909282684326, "learning_rate": 8.095407025198503e-06, "loss": 0.5286, "step": 25836 }, { "epoch": 0.3089109147646433, "grad_norm": 1.5646040439605713, "learning_rate": 8.095254970161225e-06, "loss": 0.6581, "step": 25837 }, { "epoch": 0.30892287090950393, "grad_norm": 2.3092827796936035, "learning_rate": 8.095102910482603e-06, "loss": 0.6736, "step": 25838 }, { "epoch": 0.3089348270543646, "grad_norm": 9.640352249145508, "learning_rate": 8.094950846162866e-06, "loss": 0.5587, "step": 25839 }, { "epoch": 0.30894678319922525, "grad_norm": 2.1245803833007812, "learning_rate": 8.094798777202235e-06, "loss": 0.6254, "step": 25840 }, { "epoch": 0.3089587393440859, "grad_norm": 1.8080273866653442, "learning_rate": 8.094646703600945e-06, "loss": 0.669, "step": 25841 }, { "epoch": 0.30897069548894657, "grad_norm": 7.306482791900635, "learning_rate": 8.09449462535922e-06, "loss": 0.6881, "step": 25842 }, { "epoch": 0.30898265163380717, "grad_norm": 2.09181547164917, "learning_rate": 8.094342542477289e-06, "loss": 0.59, "step": 25843 }, { "epoch": 0.30899460777866783, "grad_norm": 2.0656371116638184, "learning_rate": 8.094190454955381e-06, "loss": 0.6692, "step": 25844 }, { "epoch": 0.3090065639235285, "grad_norm": 2.2815937995910645, "learning_rate": 8.094038362793723e-06, "loss": 0.6251, "step": 25845 }, { "epoch": 0.30901852006838915, "grad_norm": 3.0613772869110107, "learning_rate": 8.093886265992544e-06, "loss": 0.6686, "step": 25846 }, { "epoch": 0.3090304762132498, "grad_norm": 2.201368808746338, "learning_rate": 8.09373416455207e-06, "loss": 0.5923, "step": 25847 }, { "epoch": 0.30904243235811046, "grad_norm": 2.0856103897094727, "learning_rate": 8.093582058472532e-06, "loss": 0.6009, "step": 25848 }, { "epoch": 0.3090543885029711, "grad_norm": 1.993569254875183, "learning_rate": 8.093429947754157e-06, "loss": 0.5961, "step": 25849 }, { "epoch": 0.3090663446478317, "grad_norm": 1.4971661567687988, "learning_rate": 8.093277832397173e-06, "loss": 0.6329, "step": 25850 }, { "epoch": 0.3090783007926924, "grad_norm": 1.528417944908142, "learning_rate": 8.093125712401806e-06, "loss": 0.5444, "step": 25851 }, { "epoch": 0.30909025693755304, "grad_norm": 1.8823517560958862, "learning_rate": 8.092973587768288e-06, "loss": 0.5862, "step": 25852 }, { "epoch": 0.3091022130824137, "grad_norm": 1.8493224382400513, "learning_rate": 8.092821458496844e-06, "loss": 0.6985, "step": 25853 }, { "epoch": 0.30911416922727436, "grad_norm": 3.2455432415008545, "learning_rate": 8.092669324587704e-06, "loss": 0.5437, "step": 25854 }, { "epoch": 0.309126125372135, "grad_norm": 2.3417952060699463, "learning_rate": 8.092517186041094e-06, "loss": 0.5388, "step": 25855 }, { "epoch": 0.3091380815169957, "grad_norm": 10.460553169250488, "learning_rate": 8.092365042857245e-06, "loss": 0.5202, "step": 25856 }, { "epoch": 0.30915003766185634, "grad_norm": 2.9791500568389893, "learning_rate": 8.092212895036383e-06, "loss": 0.4751, "step": 25857 }, { "epoch": 0.30916199380671694, "grad_norm": 2.249983310699463, "learning_rate": 8.092060742578737e-06, "loss": 0.6284, "step": 25858 }, { "epoch": 0.3091739499515776, "grad_norm": 1.6451984643936157, "learning_rate": 8.091908585484536e-06, "loss": 0.6569, "step": 25859 }, { "epoch": 0.30918590609643826, "grad_norm": 1.671245813369751, "learning_rate": 8.091756423754004e-06, "loss": 0.5015, "step": 25860 }, { "epoch": 0.3091978622412989, "grad_norm": 3.0891945362091064, "learning_rate": 8.091604257387375e-06, "loss": 0.6497, "step": 25861 }, { "epoch": 0.3092098183861596, "grad_norm": 1.8610587120056152, "learning_rate": 8.091452086384873e-06, "loss": 0.5322, "step": 25862 }, { "epoch": 0.30922177453102023, "grad_norm": 1.9132698774337769, "learning_rate": 8.091299910746729e-06, "loss": 0.4928, "step": 25863 }, { "epoch": 0.3092337306758809, "grad_norm": 2.0410971641540527, "learning_rate": 8.091147730473168e-06, "loss": 0.6289, "step": 25864 }, { "epoch": 0.3092456868207415, "grad_norm": 2.6494104862213135, "learning_rate": 8.090995545564422e-06, "loss": 0.6043, "step": 25865 }, { "epoch": 0.30925764296560215, "grad_norm": 3.449561357498169, "learning_rate": 8.090843356020714e-06, "loss": 0.5819, "step": 25866 }, { "epoch": 0.3092695991104628, "grad_norm": 1.3605728149414062, "learning_rate": 8.090691161842278e-06, "loss": 0.5076, "step": 25867 }, { "epoch": 0.30928155525532347, "grad_norm": 2.7810404300689697, "learning_rate": 8.09053896302934e-06, "loss": 0.6221, "step": 25868 }, { "epoch": 0.30929351140018413, "grad_norm": 2.4305531978607178, "learning_rate": 8.090386759582128e-06, "loss": 0.6632, "step": 25869 }, { "epoch": 0.3093054675450448, "grad_norm": 1.6057225465774536, "learning_rate": 8.090234551500869e-06, "loss": 0.6259, "step": 25870 }, { "epoch": 0.30931742368990545, "grad_norm": 1.4985947608947754, "learning_rate": 8.09008233878579e-06, "loss": 0.7341, "step": 25871 }, { "epoch": 0.3093293798347661, "grad_norm": 1.4768867492675781, "learning_rate": 8.089930121437125e-06, "loss": 0.5077, "step": 25872 }, { "epoch": 0.3093413359796267, "grad_norm": 1.4636187553405762, "learning_rate": 8.089777899455099e-06, "loss": 0.5242, "step": 25873 }, { "epoch": 0.30935329212448737, "grad_norm": 2.2355751991271973, "learning_rate": 8.089625672839939e-06, "loss": 0.6171, "step": 25874 }, { "epoch": 0.309365248269348, "grad_norm": 3.129519462585449, "learning_rate": 8.089473441591873e-06, "loss": 0.6521, "step": 25875 }, { "epoch": 0.3093772044142087, "grad_norm": 3.9331815242767334, "learning_rate": 8.089321205711131e-06, "loss": 0.6197, "step": 25876 }, { "epoch": 0.30938916055906934, "grad_norm": 1.2289808988571167, "learning_rate": 8.089168965197943e-06, "loss": 0.6016, "step": 25877 }, { "epoch": 0.30940111670393, "grad_norm": 4.101467132568359, "learning_rate": 8.089016720052531e-06, "loss": 0.6753, "step": 25878 }, { "epoch": 0.30941307284879066, "grad_norm": 1.4000484943389893, "learning_rate": 8.088864470275131e-06, "loss": 0.5525, "step": 25879 }, { "epoch": 0.30942502899365126, "grad_norm": 1.7323919534683228, "learning_rate": 8.088712215865968e-06, "loss": 0.512, "step": 25880 }, { "epoch": 0.3094369851385119, "grad_norm": 3.630645513534546, "learning_rate": 8.088559956825268e-06, "loss": 0.6882, "step": 25881 }, { "epoch": 0.3094489412833726, "grad_norm": 2.427119016647339, "learning_rate": 8.088407693153262e-06, "loss": 0.6279, "step": 25882 }, { "epoch": 0.30946089742823324, "grad_norm": 1.6692936420440674, "learning_rate": 8.088255424850178e-06, "loss": 0.5359, "step": 25883 }, { "epoch": 0.3094728535730939, "grad_norm": 2.441687822341919, "learning_rate": 8.088103151916245e-06, "loss": 0.6551, "step": 25884 }, { "epoch": 0.30948480971795456, "grad_norm": 1.9160264730453491, "learning_rate": 8.087950874351689e-06, "loss": 0.7234, "step": 25885 }, { "epoch": 0.3094967658628152, "grad_norm": 2.4171242713928223, "learning_rate": 8.087798592156739e-06, "loss": 0.5529, "step": 25886 }, { "epoch": 0.3095087220076758, "grad_norm": 2.1320858001708984, "learning_rate": 8.087646305331626e-06, "loss": 0.6406, "step": 25887 }, { "epoch": 0.3095206781525365, "grad_norm": 2.337372064590454, "learning_rate": 8.087494013876575e-06, "loss": 0.6495, "step": 25888 }, { "epoch": 0.30953263429739714, "grad_norm": 1.3406152725219727, "learning_rate": 8.087341717791817e-06, "loss": 0.5, "step": 25889 }, { "epoch": 0.3095445904422578, "grad_norm": 2.156724452972412, "learning_rate": 8.08718941707758e-06, "loss": 0.6517, "step": 25890 }, { "epoch": 0.30955654658711845, "grad_norm": 4.8019938468933105, "learning_rate": 8.087037111734088e-06, "loss": 0.5714, "step": 25891 }, { "epoch": 0.3095685027319791, "grad_norm": 1.6525601148605347, "learning_rate": 8.086884801761574e-06, "loss": 0.5797, "step": 25892 }, { "epoch": 0.30958045887683977, "grad_norm": 27.3851318359375, "learning_rate": 8.086732487160265e-06, "loss": 0.6053, "step": 25893 }, { "epoch": 0.30959241502170043, "grad_norm": 3.8229987621307373, "learning_rate": 8.08658016793039e-06, "loss": 0.6492, "step": 25894 }, { "epoch": 0.30960437116656103, "grad_norm": 2.1775641441345215, "learning_rate": 8.08642784407218e-06, "loss": 0.6183, "step": 25895 }, { "epoch": 0.3096163273114217, "grad_norm": 4.45285701751709, "learning_rate": 8.086275515585857e-06, "loss": 0.6175, "step": 25896 }, { "epoch": 0.30962828345628235, "grad_norm": 1.8816933631896973, "learning_rate": 8.086123182471654e-06, "loss": 0.664, "step": 25897 }, { "epoch": 0.309640239601143, "grad_norm": 3.9151270389556885, "learning_rate": 8.085970844729798e-06, "loss": 0.6204, "step": 25898 }, { "epoch": 0.30965219574600367, "grad_norm": 2.9610488414764404, "learning_rate": 8.08581850236052e-06, "loss": 0.6564, "step": 25899 }, { "epoch": 0.3096641518908643, "grad_norm": 2.5008115768432617, "learning_rate": 8.085666155364043e-06, "loss": 0.569, "step": 25900 }, { "epoch": 0.309676108035725, "grad_norm": 2.1350317001342773, "learning_rate": 8.085513803740602e-06, "loss": 0.6485, "step": 25901 }, { "epoch": 0.3096880641805856, "grad_norm": 7.590294361114502, "learning_rate": 8.085361447490419e-06, "loss": 0.6143, "step": 25902 }, { "epoch": 0.30970002032544625, "grad_norm": 2.0355849266052246, "learning_rate": 8.085209086613726e-06, "loss": 0.6366, "step": 25903 }, { "epoch": 0.3097119764703069, "grad_norm": 2.080740213394165, "learning_rate": 8.08505672111075e-06, "loss": 0.5723, "step": 25904 }, { "epoch": 0.30972393261516756, "grad_norm": 3.5804126262664795, "learning_rate": 8.084904350981724e-06, "loss": 0.6408, "step": 25905 }, { "epoch": 0.3097358887600282, "grad_norm": 1.803619384765625, "learning_rate": 8.08475197622687e-06, "loss": 0.6866, "step": 25906 }, { "epoch": 0.3097478449048889, "grad_norm": 2.16668963432312, "learning_rate": 8.08459959684642e-06, "loss": 0.7289, "step": 25907 }, { "epoch": 0.30975980104974954, "grad_norm": 8.834358215332031, "learning_rate": 8.084447212840603e-06, "loss": 0.5087, "step": 25908 }, { "epoch": 0.30977175719461014, "grad_norm": 2.2603607177734375, "learning_rate": 8.084294824209646e-06, "loss": 0.558, "step": 25909 }, { "epoch": 0.3097837133394708, "grad_norm": 3.267094135284424, "learning_rate": 8.084142430953778e-06, "loss": 0.632, "step": 25910 }, { "epoch": 0.30979566948433146, "grad_norm": 2.481872320175171, "learning_rate": 8.083990033073228e-06, "loss": 0.6334, "step": 25911 }, { "epoch": 0.3098076256291921, "grad_norm": 2.0095632076263428, "learning_rate": 8.083837630568223e-06, "loss": 0.5994, "step": 25912 }, { "epoch": 0.3098195817740528, "grad_norm": 2.2797937393188477, "learning_rate": 8.083685223438993e-06, "loss": 0.5733, "step": 25913 }, { "epoch": 0.30983153791891344, "grad_norm": 1.581436276435852, "learning_rate": 8.083532811685767e-06, "loss": 0.6289, "step": 25914 }, { "epoch": 0.3098434940637741, "grad_norm": 4.208446979522705, "learning_rate": 8.083380395308769e-06, "loss": 0.5647, "step": 25915 }, { "epoch": 0.30985545020863475, "grad_norm": 2.3286986351013184, "learning_rate": 8.083227974308234e-06, "loss": 0.6449, "step": 25916 }, { "epoch": 0.30986740635349536, "grad_norm": 2.032909631729126, "learning_rate": 8.083075548684388e-06, "loss": 0.6311, "step": 25917 }, { "epoch": 0.309879362498356, "grad_norm": 6.302534580230713, "learning_rate": 8.082923118437457e-06, "loss": 0.6696, "step": 25918 }, { "epoch": 0.3098913186432167, "grad_norm": 4.064305305480957, "learning_rate": 8.082770683567673e-06, "loss": 0.599, "step": 25919 }, { "epoch": 0.30990327478807733, "grad_norm": 2.3472273349761963, "learning_rate": 8.082618244075265e-06, "loss": 0.5318, "step": 25920 }, { "epoch": 0.309915230932938, "grad_norm": 6.127068996429443, "learning_rate": 8.082465799960457e-06, "loss": 0.6705, "step": 25921 }, { "epoch": 0.30992718707779865, "grad_norm": 1.6348168849945068, "learning_rate": 8.082313351223485e-06, "loss": 0.6304, "step": 25922 }, { "epoch": 0.3099391432226593, "grad_norm": 1.404542326927185, "learning_rate": 8.08216089786457e-06, "loss": 0.5133, "step": 25923 }, { "epoch": 0.3099510993675199, "grad_norm": 10.812952995300293, "learning_rate": 8.082008439883944e-06, "loss": 0.6143, "step": 25924 }, { "epoch": 0.30996305551238057, "grad_norm": 3.328535318374634, "learning_rate": 8.081855977281834e-06, "loss": 0.6108, "step": 25925 }, { "epoch": 0.30997501165724123, "grad_norm": 2.111624240875244, "learning_rate": 8.081703510058471e-06, "loss": 0.5749, "step": 25926 }, { "epoch": 0.3099869678021019, "grad_norm": 1.9415404796600342, "learning_rate": 8.081551038214085e-06, "loss": 0.5299, "step": 25927 }, { "epoch": 0.30999892394696255, "grad_norm": 3.9745428562164307, "learning_rate": 8.081398561748901e-06, "loss": 0.5598, "step": 25928 }, { "epoch": 0.3100108800918232, "grad_norm": 1.6066747903823853, "learning_rate": 8.081246080663148e-06, "loss": 0.5586, "step": 25929 }, { "epoch": 0.31002283623668386, "grad_norm": 1.8003028631210327, "learning_rate": 8.081093594957056e-06, "loss": 0.5386, "step": 25930 }, { "epoch": 0.3100347923815445, "grad_norm": 3.2453453540802, "learning_rate": 8.080941104630852e-06, "loss": 0.6065, "step": 25931 }, { "epoch": 0.3100467485264051, "grad_norm": 2.089813232421875, "learning_rate": 8.080788609684769e-06, "loss": 0.5739, "step": 25932 }, { "epoch": 0.3100587046712658, "grad_norm": 1.698327660560608, "learning_rate": 8.080636110119029e-06, "loss": 0.5571, "step": 25933 }, { "epoch": 0.31007066081612644, "grad_norm": 3.9180808067321777, "learning_rate": 8.080483605933867e-06, "loss": 0.6382, "step": 25934 }, { "epoch": 0.3100826169609871, "grad_norm": 4.5483222007751465, "learning_rate": 8.080331097129506e-06, "loss": 0.5489, "step": 25935 }, { "epoch": 0.31009457310584776, "grad_norm": 1.6901228427886963, "learning_rate": 8.08017858370618e-06, "loss": 0.6292, "step": 25936 }, { "epoch": 0.3101065292507084, "grad_norm": 2.749138593673706, "learning_rate": 8.080026065664115e-06, "loss": 0.6894, "step": 25937 }, { "epoch": 0.3101184853955691, "grad_norm": 4.399777889251709, "learning_rate": 8.079873543003539e-06, "loss": 0.6021, "step": 25938 }, { "epoch": 0.3101304415404297, "grad_norm": 1.698339581489563, "learning_rate": 8.079721015724681e-06, "loss": 0.579, "step": 25939 }, { "epoch": 0.31014239768529034, "grad_norm": 1.8450487852096558, "learning_rate": 8.079568483827773e-06, "loss": 0.5953, "step": 25940 }, { "epoch": 0.310154353830151, "grad_norm": 5.993383407592773, "learning_rate": 8.07941594731304e-06, "loss": 0.6346, "step": 25941 }, { "epoch": 0.31016630997501166, "grad_norm": 1.6856107711791992, "learning_rate": 8.07926340618071e-06, "loss": 0.6264, "step": 25942 }, { "epoch": 0.3101782661198723, "grad_norm": 3.569662094116211, "learning_rate": 8.079110860431016e-06, "loss": 0.6317, "step": 25943 }, { "epoch": 0.310190222264733, "grad_norm": 4.188401699066162, "learning_rate": 8.078958310064183e-06, "loss": 0.6082, "step": 25944 }, { "epoch": 0.31020217840959363, "grad_norm": 2.1128976345062256, "learning_rate": 8.078805755080441e-06, "loss": 0.5883, "step": 25945 }, { "epoch": 0.31021413455445424, "grad_norm": 1.743747353553772, "learning_rate": 8.07865319548002e-06, "loss": 0.5867, "step": 25946 }, { "epoch": 0.3102260906993149, "grad_norm": 2.530531167984009, "learning_rate": 8.078500631263146e-06, "loss": 0.7902, "step": 25947 }, { "epoch": 0.31023804684417555, "grad_norm": 3.5243849754333496, "learning_rate": 8.07834806243005e-06, "loss": 0.6676, "step": 25948 }, { "epoch": 0.3102500029890362, "grad_norm": 3.2617592811584473, "learning_rate": 8.07819548898096e-06, "loss": 0.5964, "step": 25949 }, { "epoch": 0.31026195913389687, "grad_norm": 1.848484992980957, "learning_rate": 8.078042910916106e-06, "loss": 0.6296, "step": 25950 }, { "epoch": 0.31027391527875753, "grad_norm": 1.7196214199066162, "learning_rate": 8.077890328235716e-06, "loss": 0.5932, "step": 25951 }, { "epoch": 0.3102858714236182, "grad_norm": 1.7491449117660522, "learning_rate": 8.077737740940018e-06, "loss": 0.6856, "step": 25952 }, { "epoch": 0.31029782756847885, "grad_norm": 7.654494285583496, "learning_rate": 8.07758514902924e-06, "loss": 0.6347, "step": 25953 }, { "epoch": 0.31030978371333945, "grad_norm": 1.9756966829299927, "learning_rate": 8.077432552503612e-06, "loss": 0.6595, "step": 25954 }, { "epoch": 0.3103217398582001, "grad_norm": 1.8957664966583252, "learning_rate": 8.077279951363365e-06, "loss": 0.5782, "step": 25955 }, { "epoch": 0.31033369600306077, "grad_norm": 1.6212990283966064, "learning_rate": 8.077127345608724e-06, "loss": 0.6973, "step": 25956 }, { "epoch": 0.3103456521479214, "grad_norm": 12.309733390808105, "learning_rate": 8.076974735239919e-06, "loss": 0.6349, "step": 25957 }, { "epoch": 0.3103576082927821, "grad_norm": 2.25528621673584, "learning_rate": 8.076822120257181e-06, "loss": 0.6605, "step": 25958 }, { "epoch": 0.31036956443764274, "grad_norm": 3.259556293487549, "learning_rate": 8.076669500660738e-06, "loss": 0.6419, "step": 25959 }, { "epoch": 0.3103815205825034, "grad_norm": 2.4562010765075684, "learning_rate": 8.076516876450817e-06, "loss": 0.6298, "step": 25960 }, { "epoch": 0.310393476727364, "grad_norm": 4.863750457763672, "learning_rate": 8.076364247627649e-06, "loss": 0.563, "step": 25961 }, { "epoch": 0.31040543287222466, "grad_norm": 10.136723518371582, "learning_rate": 8.07621161419146e-06, "loss": 0.5419, "step": 25962 }, { "epoch": 0.3104173890170853, "grad_norm": 2.779221773147583, "learning_rate": 8.076058976142482e-06, "loss": 0.591, "step": 25963 }, { "epoch": 0.310429345161946, "grad_norm": 2.355593204498291, "learning_rate": 8.075906333480944e-06, "loss": 0.6603, "step": 25964 }, { "epoch": 0.31044130130680664, "grad_norm": 2.275078773498535, "learning_rate": 8.07575368620707e-06, "loss": 0.6285, "step": 25965 }, { "epoch": 0.3104532574516673, "grad_norm": 2.3127450942993164, "learning_rate": 8.075601034321094e-06, "loss": 0.6556, "step": 25966 }, { "epoch": 0.31046521359652796, "grad_norm": 3.0373036861419678, "learning_rate": 8.075448377823244e-06, "loss": 0.5645, "step": 25967 }, { "epoch": 0.31047716974138856, "grad_norm": 1.738566279411316, "learning_rate": 8.075295716713749e-06, "loss": 0.586, "step": 25968 }, { "epoch": 0.3104891258862492, "grad_norm": 2.169996976852417, "learning_rate": 8.075143050992835e-06, "loss": 0.619, "step": 25969 }, { "epoch": 0.3105010820311099, "grad_norm": 2.5762860774993896, "learning_rate": 8.074990380660734e-06, "loss": 0.5731, "step": 25970 }, { "epoch": 0.31051303817597053, "grad_norm": 2.2906572818756104, "learning_rate": 8.074837705717676e-06, "loss": 0.5333, "step": 25971 }, { "epoch": 0.3105249943208312, "grad_norm": 7.154650688171387, "learning_rate": 8.074685026163884e-06, "loss": 0.6017, "step": 25972 }, { "epoch": 0.31053695046569185, "grad_norm": 1.976718783378601, "learning_rate": 8.074532341999593e-06, "loss": 0.5194, "step": 25973 }, { "epoch": 0.3105489066105525, "grad_norm": 2.0537643432617188, "learning_rate": 8.07437965322503e-06, "loss": 0.6156, "step": 25974 }, { "epoch": 0.31056086275541317, "grad_norm": 4.809193134307861, "learning_rate": 8.074226959840423e-06, "loss": 0.658, "step": 25975 }, { "epoch": 0.3105728189002738, "grad_norm": 2.351764678955078, "learning_rate": 8.074074261846004e-06, "loss": 0.6707, "step": 25976 }, { "epoch": 0.31058477504513443, "grad_norm": 2.011963367462158, "learning_rate": 8.073921559241997e-06, "loss": 0.6508, "step": 25977 }, { "epoch": 0.3105967311899951, "grad_norm": 3.8486485481262207, "learning_rate": 8.073768852028634e-06, "loss": 0.6147, "step": 25978 }, { "epoch": 0.31060868733485575, "grad_norm": 2.538034677505493, "learning_rate": 8.073616140206144e-06, "loss": 0.5318, "step": 25979 }, { "epoch": 0.3106206434797164, "grad_norm": 2.834637403488159, "learning_rate": 8.073463423774756e-06, "loss": 0.6207, "step": 25980 }, { "epoch": 0.31063259962457707, "grad_norm": 3.8516156673431396, "learning_rate": 8.0733107027347e-06, "loss": 0.6743, "step": 25981 }, { "epoch": 0.3106445557694377, "grad_norm": 2.0214781761169434, "learning_rate": 8.073157977086203e-06, "loss": 0.5522, "step": 25982 }, { "epoch": 0.31065651191429833, "grad_norm": 2.141957998275757, "learning_rate": 8.073005246829493e-06, "loss": 0.7152, "step": 25983 }, { "epoch": 0.310668468059159, "grad_norm": 2.076051712036133, "learning_rate": 8.072852511964801e-06, "loss": 0.6159, "step": 25984 }, { "epoch": 0.31068042420401965, "grad_norm": 1.8495993614196777, "learning_rate": 8.072699772492358e-06, "loss": 0.534, "step": 25985 }, { "epoch": 0.3106923803488803, "grad_norm": 1.6798515319824219, "learning_rate": 8.07254702841239e-06, "loss": 0.675, "step": 25986 }, { "epoch": 0.31070433649374096, "grad_norm": 1.908613681793213, "learning_rate": 8.072394279725126e-06, "loss": 0.6106, "step": 25987 }, { "epoch": 0.3107162926386016, "grad_norm": 2.033472776412964, "learning_rate": 8.072241526430795e-06, "loss": 0.5285, "step": 25988 }, { "epoch": 0.3107282487834623, "grad_norm": 2.3998687267303467, "learning_rate": 8.072088768529626e-06, "loss": 0.5845, "step": 25989 }, { "epoch": 0.31074020492832294, "grad_norm": 2.8053219318389893, "learning_rate": 8.071936006021852e-06, "loss": 0.5229, "step": 25990 }, { "epoch": 0.31075216107318354, "grad_norm": 2.9920036792755127, "learning_rate": 8.071783238907697e-06, "loss": 0.5923, "step": 25991 }, { "epoch": 0.3107641172180442, "grad_norm": 2.095205545425415, "learning_rate": 8.071630467187395e-06, "loss": 0.624, "step": 25992 }, { "epoch": 0.31077607336290486, "grad_norm": 3.140166759490967, "learning_rate": 8.071477690861169e-06, "loss": 0.5323, "step": 25993 }, { "epoch": 0.3107880295077655, "grad_norm": 2.008244752883911, "learning_rate": 8.071324909929252e-06, "loss": 0.6192, "step": 25994 }, { "epoch": 0.3107999856526262, "grad_norm": 3.1107451915740967, "learning_rate": 8.071172124391874e-06, "loss": 0.6561, "step": 25995 }, { "epoch": 0.31081194179748683, "grad_norm": 1.8287832736968994, "learning_rate": 8.071019334249262e-06, "loss": 0.6771, "step": 25996 }, { "epoch": 0.3108238979423475, "grad_norm": 7.025443077087402, "learning_rate": 8.070866539501643e-06, "loss": 0.6084, "step": 25997 }, { "epoch": 0.3108358540872081, "grad_norm": 2.1069588661193848, "learning_rate": 8.070713740149252e-06, "loss": 0.5092, "step": 25998 }, { "epoch": 0.31084781023206876, "grad_norm": 2.076056480407715, "learning_rate": 8.070560936192314e-06, "loss": 0.5948, "step": 25999 }, { "epoch": 0.3108597663769294, "grad_norm": 2.137115478515625, "learning_rate": 8.070408127631057e-06, "loss": 0.578, "step": 26000 }, { "epoch": 0.3108717225217901, "grad_norm": 2.2066290378570557, "learning_rate": 8.070255314465713e-06, "loss": 0.6145, "step": 26001 }, { "epoch": 0.31088367866665073, "grad_norm": 1.7560632228851318, "learning_rate": 8.070102496696513e-06, "loss": 0.6765, "step": 26002 }, { "epoch": 0.3108956348115114, "grad_norm": 2.664984941482544, "learning_rate": 8.069949674323681e-06, "loss": 0.6367, "step": 26003 }, { "epoch": 0.31090759095637205, "grad_norm": 2.5494914054870605, "learning_rate": 8.069796847347448e-06, "loss": 0.5889, "step": 26004 }, { "epoch": 0.31091954710123265, "grad_norm": 2.5329320430755615, "learning_rate": 8.069644015768044e-06, "loss": 0.594, "step": 26005 }, { "epoch": 0.3109315032460933, "grad_norm": 6.22170877456665, "learning_rate": 8.069491179585698e-06, "loss": 0.6246, "step": 26006 }, { "epoch": 0.31094345939095397, "grad_norm": 2.395944118499756, "learning_rate": 8.06933833880064e-06, "loss": 0.542, "step": 26007 }, { "epoch": 0.3109554155358146, "grad_norm": 2.105018138885498, "learning_rate": 8.069185493413097e-06, "loss": 0.6676, "step": 26008 }, { "epoch": 0.3109673716806753, "grad_norm": 4.509771347045898, "learning_rate": 8.0690326434233e-06, "loss": 0.5615, "step": 26009 }, { "epoch": 0.31097932782553594, "grad_norm": 6.6625447273254395, "learning_rate": 8.068879788831478e-06, "loss": 0.6767, "step": 26010 }, { "epoch": 0.3109912839703966, "grad_norm": 4.287215709686279, "learning_rate": 8.06872692963786e-06, "loss": 0.5963, "step": 26011 }, { "epoch": 0.31100324011525726, "grad_norm": 2.8008267879486084, "learning_rate": 8.068574065842675e-06, "loss": 0.5984, "step": 26012 }, { "epoch": 0.31101519626011787, "grad_norm": 1.893347144126892, "learning_rate": 8.068421197446153e-06, "loss": 0.6847, "step": 26013 }, { "epoch": 0.3110271524049785, "grad_norm": 3.834563732147217, "learning_rate": 8.06826832444852e-06, "loss": 0.6565, "step": 26014 }, { "epoch": 0.3110391085498392, "grad_norm": 2.1026906967163086, "learning_rate": 8.06811544685001e-06, "loss": 0.6362, "step": 26015 }, { "epoch": 0.31105106469469984, "grad_norm": 6.699082851409912, "learning_rate": 8.067962564650847e-06, "loss": 0.5816, "step": 26016 }, { "epoch": 0.3110630208395605, "grad_norm": 1.938186764717102, "learning_rate": 8.067809677851266e-06, "loss": 0.5871, "step": 26017 }, { "epoch": 0.31107497698442116, "grad_norm": 1.9465457201004028, "learning_rate": 8.067656786451493e-06, "loss": 0.6229, "step": 26018 }, { "epoch": 0.3110869331292818, "grad_norm": 2.2938671112060547, "learning_rate": 8.067503890451758e-06, "loss": 0.6202, "step": 26019 }, { "epoch": 0.3110988892741424, "grad_norm": 2.3304202556610107, "learning_rate": 8.067350989852289e-06, "loss": 0.5378, "step": 26020 }, { "epoch": 0.3111108454190031, "grad_norm": 2.2210962772369385, "learning_rate": 8.067198084653316e-06, "loss": 0.697, "step": 26021 }, { "epoch": 0.31112280156386374, "grad_norm": 2.014678955078125, "learning_rate": 8.06704517485507e-06, "loss": 0.5748, "step": 26022 }, { "epoch": 0.3111347577087244, "grad_norm": 4.270129203796387, "learning_rate": 8.06689226045778e-06, "loss": 0.571, "step": 26023 }, { "epoch": 0.31114671385358506, "grad_norm": 2.5837366580963135, "learning_rate": 8.066739341461672e-06, "loss": 0.5336, "step": 26024 }, { "epoch": 0.3111586699984457, "grad_norm": 2.1538424491882324, "learning_rate": 8.066586417866977e-06, "loss": 0.6653, "step": 26025 }, { "epoch": 0.3111706261433064, "grad_norm": 2.1638026237487793, "learning_rate": 8.066433489673926e-06, "loss": 0.5769, "step": 26026 }, { "epoch": 0.31118258228816703, "grad_norm": 1.753995656967163, "learning_rate": 8.066280556882746e-06, "loss": 0.6095, "step": 26027 }, { "epoch": 0.31119453843302763, "grad_norm": 5.563403606414795, "learning_rate": 8.06612761949367e-06, "loss": 0.536, "step": 26028 }, { "epoch": 0.3112064945778883, "grad_norm": 3.5210556983947754, "learning_rate": 8.065974677506922e-06, "loss": 0.6726, "step": 26029 }, { "epoch": 0.31121845072274895, "grad_norm": 2.628736972808838, "learning_rate": 8.065821730922735e-06, "loss": 0.5903, "step": 26030 }, { "epoch": 0.3112304068676096, "grad_norm": 1.8292827606201172, "learning_rate": 8.065668779741337e-06, "loss": 0.6122, "step": 26031 }, { "epoch": 0.31124236301247027, "grad_norm": 2.3549413681030273, "learning_rate": 8.065515823962958e-06, "loss": 0.5613, "step": 26032 }, { "epoch": 0.3112543191573309, "grad_norm": 2.028338670730591, "learning_rate": 8.065362863587829e-06, "loss": 0.5742, "step": 26033 }, { "epoch": 0.3112662753021916, "grad_norm": 1.5228508710861206, "learning_rate": 8.065209898616175e-06, "loss": 0.5585, "step": 26034 }, { "epoch": 0.3112782314470522, "grad_norm": 3.1005449295043945, "learning_rate": 8.065056929048227e-06, "loss": 0.589, "step": 26035 }, { "epoch": 0.31129018759191285, "grad_norm": 2.400801181793213, "learning_rate": 8.064903954884217e-06, "loss": 0.5782, "step": 26036 }, { "epoch": 0.3113021437367735, "grad_norm": 2.2031686305999756, "learning_rate": 8.064750976124374e-06, "loss": 0.5977, "step": 26037 }, { "epoch": 0.31131409988163417, "grad_norm": 2.6232492923736572, "learning_rate": 8.064597992768924e-06, "loss": 0.5564, "step": 26038 }, { "epoch": 0.3113260560264948, "grad_norm": 2.730069875717163, "learning_rate": 8.0644450048181e-06, "loss": 0.5265, "step": 26039 }, { "epoch": 0.3113380121713555, "grad_norm": 1.8659188747406006, "learning_rate": 8.064292012272128e-06, "loss": 0.5086, "step": 26040 }, { "epoch": 0.31134996831621614, "grad_norm": 1.831379771232605, "learning_rate": 8.06413901513124e-06, "loss": 0.6518, "step": 26041 }, { "epoch": 0.31136192446107674, "grad_norm": 2.017279863357544, "learning_rate": 8.063986013395665e-06, "loss": 0.6116, "step": 26042 }, { "epoch": 0.3113738806059374, "grad_norm": 2.6494832038879395, "learning_rate": 8.063833007065631e-06, "loss": 0.5609, "step": 26043 }, { "epoch": 0.31138583675079806, "grad_norm": 2.3024003505706787, "learning_rate": 8.06367999614137e-06, "loss": 0.6249, "step": 26044 }, { "epoch": 0.3113977928956587, "grad_norm": 4.448216915130615, "learning_rate": 8.063526980623109e-06, "loss": 0.5314, "step": 26045 }, { "epoch": 0.3114097490405194, "grad_norm": 2.462278366088867, "learning_rate": 8.063373960511079e-06, "loss": 0.5092, "step": 26046 }, { "epoch": 0.31142170518538004, "grad_norm": 1.7679824829101562, "learning_rate": 8.063220935805508e-06, "loss": 0.5144, "step": 26047 }, { "epoch": 0.3114336613302407, "grad_norm": 2.0289037227630615, "learning_rate": 8.063067906506626e-06, "loss": 0.5909, "step": 26048 }, { "epoch": 0.31144561747510136, "grad_norm": 5.988314151763916, "learning_rate": 8.062914872614664e-06, "loss": 0.5349, "step": 26049 }, { "epoch": 0.31145757361996196, "grad_norm": 3.115586519241333, "learning_rate": 8.06276183412985e-06, "loss": 0.6419, "step": 26050 }, { "epoch": 0.3114695297648226, "grad_norm": 1.7994166612625122, "learning_rate": 8.062608791052413e-06, "loss": 0.6213, "step": 26051 }, { "epoch": 0.3114814859096833, "grad_norm": 3.13116455078125, "learning_rate": 8.062455743382584e-06, "loss": 0.6148, "step": 26052 }, { "epoch": 0.31149344205454393, "grad_norm": 2.1539905071258545, "learning_rate": 8.062302691120592e-06, "loss": 0.607, "step": 26053 }, { "epoch": 0.3115053981994046, "grad_norm": 3.0092825889587402, "learning_rate": 8.062149634266664e-06, "loss": 0.6504, "step": 26054 }, { "epoch": 0.31151735434426525, "grad_norm": 2.282121181488037, "learning_rate": 8.061996572821036e-06, "loss": 0.6839, "step": 26055 }, { "epoch": 0.3115293104891259, "grad_norm": 2.1915054321289062, "learning_rate": 8.061843506783931e-06, "loss": 0.5533, "step": 26056 }, { "epoch": 0.3115412666339865, "grad_norm": 3.9255878925323486, "learning_rate": 8.06169043615558e-06, "loss": 0.6578, "step": 26057 }, { "epoch": 0.31155322277884717, "grad_norm": 4.784755706787109, "learning_rate": 8.061537360936214e-06, "loss": 0.6481, "step": 26058 }, { "epoch": 0.31156517892370783, "grad_norm": 2.0550873279571533, "learning_rate": 8.061384281126061e-06, "loss": 0.6091, "step": 26059 }, { "epoch": 0.3115771350685685, "grad_norm": 2.9551408290863037, "learning_rate": 8.061231196725352e-06, "loss": 0.5767, "step": 26060 }, { "epoch": 0.31158909121342915, "grad_norm": 2.026299238204956, "learning_rate": 8.061078107734317e-06, "loss": 0.5217, "step": 26061 }, { "epoch": 0.3116010473582898, "grad_norm": 4.029451370239258, "learning_rate": 8.060925014153183e-06, "loss": 0.5166, "step": 26062 }, { "epoch": 0.31161300350315047, "grad_norm": 1.6643776893615723, "learning_rate": 8.060771915982181e-06, "loss": 0.6911, "step": 26063 }, { "epoch": 0.31162495964801107, "grad_norm": 3.0135860443115234, "learning_rate": 8.060618813221541e-06, "loss": 0.664, "step": 26064 }, { "epoch": 0.3116369157928717, "grad_norm": 1.3367336988449097, "learning_rate": 8.060465705871492e-06, "loss": 0.4975, "step": 26065 }, { "epoch": 0.3116488719377324, "grad_norm": 1.5957980155944824, "learning_rate": 8.060312593932265e-06, "loss": 0.6053, "step": 26066 }, { "epoch": 0.31166082808259304, "grad_norm": 1.8005561828613281, "learning_rate": 8.060159477404089e-06, "loss": 0.6286, "step": 26067 }, { "epoch": 0.3116727842274537, "grad_norm": 7.905538082122803, "learning_rate": 8.060006356287191e-06, "loss": 0.5938, "step": 26068 }, { "epoch": 0.31168474037231436, "grad_norm": 1.8692169189453125, "learning_rate": 8.059853230581803e-06, "loss": 0.5893, "step": 26069 }, { "epoch": 0.311696696517175, "grad_norm": 1.6562553644180298, "learning_rate": 8.059700100288154e-06, "loss": 0.561, "step": 26070 }, { "epoch": 0.3117086526620357, "grad_norm": 2.950275182723999, "learning_rate": 8.059546965406475e-06, "loss": 0.4621, "step": 26071 }, { "epoch": 0.3117206088068963, "grad_norm": 3.006594181060791, "learning_rate": 8.059393825936992e-06, "loss": 0.5953, "step": 26072 }, { "epoch": 0.31173256495175694, "grad_norm": 1.9045681953430176, "learning_rate": 8.059240681879939e-06, "loss": 0.5287, "step": 26073 }, { "epoch": 0.3117445210966176, "grad_norm": 3.371391534805298, "learning_rate": 8.059087533235543e-06, "loss": 0.6087, "step": 26074 }, { "epoch": 0.31175647724147826, "grad_norm": 2.173017978668213, "learning_rate": 8.058934380004035e-06, "loss": 0.6508, "step": 26075 }, { "epoch": 0.3117684333863389, "grad_norm": 2.0508880615234375, "learning_rate": 8.058781222185644e-06, "loss": 0.6853, "step": 26076 }, { "epoch": 0.3117803895311996, "grad_norm": 3.156583309173584, "learning_rate": 8.0586280597806e-06, "loss": 0.6338, "step": 26077 }, { "epoch": 0.31179234567606023, "grad_norm": 2.3812570571899414, "learning_rate": 8.058474892789131e-06, "loss": 0.6451, "step": 26078 }, { "epoch": 0.31180430182092084, "grad_norm": 3.1072447299957275, "learning_rate": 8.05832172121147e-06, "loss": 0.5174, "step": 26079 }, { "epoch": 0.3118162579657815, "grad_norm": 1.9014885425567627, "learning_rate": 8.058168545047844e-06, "loss": 0.6175, "step": 26080 }, { "epoch": 0.31182821411064215, "grad_norm": 1.5768744945526123, "learning_rate": 8.058015364298482e-06, "loss": 0.5957, "step": 26081 }, { "epoch": 0.3118401702555028, "grad_norm": 1.7339353561401367, "learning_rate": 8.057862178963617e-06, "loss": 0.6168, "step": 26082 }, { "epoch": 0.31185212640036347, "grad_norm": 3.6303656101226807, "learning_rate": 8.057708989043475e-06, "loss": 0.6709, "step": 26083 }, { "epoch": 0.31186408254522413, "grad_norm": 3.932481527328491, "learning_rate": 8.05755579453829e-06, "loss": 0.5408, "step": 26084 }, { "epoch": 0.3118760386900848, "grad_norm": 2.3926751613616943, "learning_rate": 8.057402595448287e-06, "loss": 0.5013, "step": 26085 }, { "epoch": 0.31188799483494545, "grad_norm": 9.197356224060059, "learning_rate": 8.057249391773698e-06, "loss": 0.6616, "step": 26086 }, { "epoch": 0.31189995097980605, "grad_norm": 3.3667972087860107, "learning_rate": 8.057096183514753e-06, "loss": 0.5339, "step": 26087 }, { "epoch": 0.3119119071246667, "grad_norm": 2.1723315715789795, "learning_rate": 8.056942970671682e-06, "loss": 0.6401, "step": 26088 }, { "epoch": 0.31192386326952737, "grad_norm": 2.1647517681121826, "learning_rate": 8.056789753244713e-06, "loss": 0.6525, "step": 26089 }, { "epoch": 0.311935819414388, "grad_norm": 1.6557512283325195, "learning_rate": 8.056636531234079e-06, "loss": 0.5947, "step": 26090 }, { "epoch": 0.3119477755592487, "grad_norm": 2.867084264755249, "learning_rate": 8.056483304640006e-06, "loss": 0.7056, "step": 26091 }, { "epoch": 0.31195973170410934, "grad_norm": 1.8485841751098633, "learning_rate": 8.056330073462725e-06, "loss": 0.6035, "step": 26092 }, { "epoch": 0.31197168784897, "grad_norm": 4.216673851013184, "learning_rate": 8.056176837702468e-06, "loss": 0.5839, "step": 26093 }, { "epoch": 0.3119836439938306, "grad_norm": 2.3934638500213623, "learning_rate": 8.056023597359461e-06, "loss": 0.593, "step": 26094 }, { "epoch": 0.31199560013869126, "grad_norm": 2.574033498764038, "learning_rate": 8.055870352433936e-06, "loss": 0.749, "step": 26095 }, { "epoch": 0.3120075562835519, "grad_norm": 3.3241586685180664, "learning_rate": 8.055717102926126e-06, "loss": 0.5383, "step": 26096 }, { "epoch": 0.3120195124284126, "grad_norm": 3.2978694438934326, "learning_rate": 8.055563848836253e-06, "loss": 0.5889, "step": 26097 }, { "epoch": 0.31203146857327324, "grad_norm": 9.265822410583496, "learning_rate": 8.055410590164553e-06, "loss": 0.673, "step": 26098 }, { "epoch": 0.3120434247181339, "grad_norm": 2.04475736618042, "learning_rate": 8.055257326911253e-06, "loss": 0.5607, "step": 26099 }, { "epoch": 0.31205538086299456, "grad_norm": 2.6398229598999023, "learning_rate": 8.055104059076585e-06, "loss": 0.7206, "step": 26100 }, { "epoch": 0.31206733700785516, "grad_norm": 2.710000991821289, "learning_rate": 8.054950786660777e-06, "loss": 0.5083, "step": 26101 }, { "epoch": 0.3120792931527158, "grad_norm": 1.5785484313964844, "learning_rate": 8.054797509664059e-06, "loss": 0.5804, "step": 26102 }, { "epoch": 0.3120912492975765, "grad_norm": 9.643776893615723, "learning_rate": 8.054644228086661e-06, "loss": 0.717, "step": 26103 }, { "epoch": 0.31210320544243714, "grad_norm": 5.740251541137695, "learning_rate": 8.054490941928815e-06, "loss": 0.6028, "step": 26104 }, { "epoch": 0.3121151615872978, "grad_norm": 6.692023754119873, "learning_rate": 8.054337651190749e-06, "loss": 0.698, "step": 26105 }, { "epoch": 0.31212711773215845, "grad_norm": 2.4518611431121826, "learning_rate": 8.054184355872691e-06, "loss": 0.7197, "step": 26106 }, { "epoch": 0.3121390738770191, "grad_norm": 4.137126445770264, "learning_rate": 8.054031055974874e-06, "loss": 0.5794, "step": 26107 }, { "epoch": 0.31215103002187977, "grad_norm": 2.0941240787506104, "learning_rate": 8.053877751497526e-06, "loss": 0.6468, "step": 26108 }, { "epoch": 0.3121629861667404, "grad_norm": 2.24253249168396, "learning_rate": 8.053724442440876e-06, "loss": 0.6097, "step": 26109 }, { "epoch": 0.31217494231160103, "grad_norm": 7.101386547088623, "learning_rate": 8.053571128805157e-06, "loss": 0.4848, "step": 26110 }, { "epoch": 0.3121868984564617, "grad_norm": 2.592177391052246, "learning_rate": 8.053417810590598e-06, "loss": 0.5649, "step": 26111 }, { "epoch": 0.31219885460132235, "grad_norm": 2.4793314933776855, "learning_rate": 8.053264487797426e-06, "loss": 0.6123, "step": 26112 }, { "epoch": 0.312210810746183, "grad_norm": 1.7155711650848389, "learning_rate": 8.053111160425874e-06, "loss": 0.5936, "step": 26113 }, { "epoch": 0.31222276689104367, "grad_norm": 3.242636203765869, "learning_rate": 8.052957828476173e-06, "loss": 0.6554, "step": 26114 }, { "epoch": 0.3122347230359043, "grad_norm": 3.02982759475708, "learning_rate": 8.052804491948548e-06, "loss": 0.6925, "step": 26115 }, { "epoch": 0.31224667918076493, "grad_norm": 2.2315237522125244, "learning_rate": 8.052651150843233e-06, "loss": 0.5913, "step": 26116 }, { "epoch": 0.3122586353256256, "grad_norm": 3.376955270767212, "learning_rate": 8.052497805160456e-06, "loss": 0.6086, "step": 26117 }, { "epoch": 0.31227059147048625, "grad_norm": 2.3076729774475098, "learning_rate": 8.05234445490045e-06, "loss": 0.5248, "step": 26118 }, { "epoch": 0.3122825476153469, "grad_norm": 2.5673253536224365, "learning_rate": 8.05219110006344e-06, "loss": 0.6925, "step": 26119 }, { "epoch": 0.31229450376020756, "grad_norm": 2.356971502304077, "learning_rate": 8.05203774064966e-06, "loss": 0.577, "step": 26120 }, { "epoch": 0.3123064599050682, "grad_norm": 2.4534332752227783, "learning_rate": 8.051884376659337e-06, "loss": 0.6438, "step": 26121 }, { "epoch": 0.3123184160499289, "grad_norm": 2.542389392852783, "learning_rate": 8.051731008092703e-06, "loss": 0.6658, "step": 26122 }, { "epoch": 0.3123303721947895, "grad_norm": 4.696577072143555, "learning_rate": 8.05157763494999e-06, "loss": 0.638, "step": 26123 }, { "epoch": 0.31234232833965014, "grad_norm": 1.8675103187561035, "learning_rate": 8.051424257231424e-06, "loss": 0.6304, "step": 26124 }, { "epoch": 0.3123542844845108, "grad_norm": 2.997051954269409, "learning_rate": 8.051270874937236e-06, "loss": 0.5637, "step": 26125 }, { "epoch": 0.31236624062937146, "grad_norm": 2.5048325061798096, "learning_rate": 8.051117488067655e-06, "loss": 0.5645, "step": 26126 }, { "epoch": 0.3123781967742321, "grad_norm": 3.5667612552642822, "learning_rate": 8.050964096622915e-06, "loss": 0.6251, "step": 26127 }, { "epoch": 0.3123901529190928, "grad_norm": 2.9957549571990967, "learning_rate": 8.050810700603243e-06, "loss": 0.6319, "step": 26128 }, { "epoch": 0.31240210906395344, "grad_norm": 2.178737163543701, "learning_rate": 8.050657300008868e-06, "loss": 0.6306, "step": 26129 }, { "epoch": 0.3124140652088141, "grad_norm": 7.46230936050415, "learning_rate": 8.050503894840022e-06, "loss": 0.6291, "step": 26130 }, { "epoch": 0.3124260213536747, "grad_norm": 3.4536831378936768, "learning_rate": 8.050350485096936e-06, "loss": 0.7026, "step": 26131 }, { "epoch": 0.31243797749853536, "grad_norm": 3.10304856300354, "learning_rate": 8.050197070779838e-06, "loss": 0.5413, "step": 26132 }, { "epoch": 0.312449933643396, "grad_norm": 4.645123481750488, "learning_rate": 8.050043651888959e-06, "loss": 0.628, "step": 26133 }, { "epoch": 0.3124618897882567, "grad_norm": 5.207320690155029, "learning_rate": 8.049890228424528e-06, "loss": 0.5883, "step": 26134 }, { "epoch": 0.31247384593311733, "grad_norm": 2.293966770172119, "learning_rate": 8.049736800386775e-06, "loss": 0.5424, "step": 26135 }, { "epoch": 0.312485802077978, "grad_norm": 2.48736572265625, "learning_rate": 8.049583367775932e-06, "loss": 0.6397, "step": 26136 }, { "epoch": 0.31249775822283865, "grad_norm": 15.957707405090332, "learning_rate": 8.049429930592228e-06, "loss": 0.6318, "step": 26137 }, { "epoch": 0.31250971436769925, "grad_norm": 6.739706039428711, "learning_rate": 8.049276488835892e-06, "loss": 0.6754, "step": 26138 }, { "epoch": 0.3125216705125599, "grad_norm": 2.8974738121032715, "learning_rate": 8.049123042507156e-06, "loss": 0.5596, "step": 26139 }, { "epoch": 0.31253362665742057, "grad_norm": 4.967837810516357, "learning_rate": 8.048969591606248e-06, "loss": 0.5005, "step": 26140 }, { "epoch": 0.31254558280228123, "grad_norm": 58.34925842285156, "learning_rate": 8.048816136133401e-06, "loss": 0.4905, "step": 26141 }, { "epoch": 0.3125575389471419, "grad_norm": 3.779999017715454, "learning_rate": 8.048662676088843e-06, "loss": 0.6031, "step": 26142 }, { "epoch": 0.31256949509200255, "grad_norm": 4.676631450653076, "learning_rate": 8.048509211472804e-06, "loss": 0.6094, "step": 26143 }, { "epoch": 0.3125814512368632, "grad_norm": 74.88501739501953, "learning_rate": 8.048355742285513e-06, "loss": 0.6156, "step": 26144 }, { "epoch": 0.31259340738172386, "grad_norm": 3.7470808029174805, "learning_rate": 8.048202268527204e-06, "loss": 0.5094, "step": 26145 }, { "epoch": 0.31260536352658447, "grad_norm": 2.0519986152648926, "learning_rate": 8.048048790198105e-06, "loss": 0.5043, "step": 26146 }, { "epoch": 0.3126173196714451, "grad_norm": 2.357100248336792, "learning_rate": 8.047895307298445e-06, "loss": 0.5544, "step": 26147 }, { "epoch": 0.3126292758163058, "grad_norm": 3.7045719623565674, "learning_rate": 8.047741819828454e-06, "loss": 0.6275, "step": 26148 }, { "epoch": 0.31264123196116644, "grad_norm": 4.824325084686279, "learning_rate": 8.047588327788365e-06, "loss": 0.6654, "step": 26149 }, { "epoch": 0.3126531881060271, "grad_norm": 4.402836799621582, "learning_rate": 8.047434831178406e-06, "loss": 0.5965, "step": 26150 }, { "epoch": 0.31266514425088776, "grad_norm": 2.598273754119873, "learning_rate": 8.047281329998808e-06, "loss": 0.5306, "step": 26151 }, { "epoch": 0.3126771003957484, "grad_norm": 3.776280403137207, "learning_rate": 8.0471278242498e-06, "loss": 0.6281, "step": 26152 }, { "epoch": 0.312689056540609, "grad_norm": 3.108241081237793, "learning_rate": 8.046974313931613e-06, "loss": 0.5777, "step": 26153 }, { "epoch": 0.3127010126854697, "grad_norm": 4.644714832305908, "learning_rate": 8.046820799044479e-06, "loss": 0.6398, "step": 26154 }, { "epoch": 0.31271296883033034, "grad_norm": 3.8919970989227295, "learning_rate": 8.046667279588625e-06, "loss": 0.6575, "step": 26155 }, { "epoch": 0.312724924975191, "grad_norm": 2.2542314529418945, "learning_rate": 8.046513755564282e-06, "loss": 0.5761, "step": 26156 }, { "epoch": 0.31273688112005166, "grad_norm": 3.157989740371704, "learning_rate": 8.046360226971682e-06, "loss": 0.5386, "step": 26157 }, { "epoch": 0.3127488372649123, "grad_norm": 3.303434133529663, "learning_rate": 8.046206693811054e-06, "loss": 0.5789, "step": 26158 }, { "epoch": 0.312760793409773, "grad_norm": 2.9815633296966553, "learning_rate": 8.046053156082628e-06, "loss": 0.6939, "step": 26159 }, { "epoch": 0.3127727495546336, "grad_norm": 5.682551860809326, "learning_rate": 8.045899613786634e-06, "loss": 0.6314, "step": 26160 }, { "epoch": 0.31278470569949424, "grad_norm": 6.879532814025879, "learning_rate": 8.045746066923303e-06, "loss": 0.6102, "step": 26161 }, { "epoch": 0.3127966618443549, "grad_norm": 4.521731376647949, "learning_rate": 8.045592515492864e-06, "loss": 0.6176, "step": 26162 }, { "epoch": 0.31280861798921555, "grad_norm": 3.881157398223877, "learning_rate": 8.045438959495548e-06, "loss": 0.6557, "step": 26163 }, { "epoch": 0.3128205741340762, "grad_norm": 4.029031276702881, "learning_rate": 8.045285398931588e-06, "loss": 0.617, "step": 26164 }, { "epoch": 0.31283253027893687, "grad_norm": 2.4376184940338135, "learning_rate": 8.04513183380121e-06, "loss": 0.7146, "step": 26165 }, { "epoch": 0.31284448642379753, "grad_norm": 4.0461506843566895, "learning_rate": 8.044978264104646e-06, "loss": 0.6519, "step": 26166 }, { "epoch": 0.3128564425686582, "grad_norm": 8.929909706115723, "learning_rate": 8.044824689842127e-06, "loss": 0.6643, "step": 26167 }, { "epoch": 0.3128683987135188, "grad_norm": 4.6860671043396, "learning_rate": 8.044671111013881e-06, "loss": 0.6385, "step": 26168 }, { "epoch": 0.31288035485837945, "grad_norm": 6.815706253051758, "learning_rate": 8.044517527620142e-06, "loss": 0.5807, "step": 26169 }, { "epoch": 0.3128923110032401, "grad_norm": 3.758790969848633, "learning_rate": 8.044363939661136e-06, "loss": 0.6483, "step": 26170 }, { "epoch": 0.31290426714810077, "grad_norm": 3.7595653533935547, "learning_rate": 8.044210347137098e-06, "loss": 0.5632, "step": 26171 }, { "epoch": 0.3129162232929614, "grad_norm": 4.269272804260254, "learning_rate": 8.044056750048252e-06, "loss": 0.6397, "step": 26172 }, { "epoch": 0.3129281794378221, "grad_norm": 8.302521705627441, "learning_rate": 8.043903148394835e-06, "loss": 0.6707, "step": 26173 }, { "epoch": 0.31294013558268274, "grad_norm": 6.42866325378418, "learning_rate": 8.043749542177073e-06, "loss": 0.6351, "step": 26174 }, { "epoch": 0.31295209172754335, "grad_norm": 16.271451950073242, "learning_rate": 8.0435959313952e-06, "loss": 0.6344, "step": 26175 }, { "epoch": 0.312964047872404, "grad_norm": 3.052446126937866, "learning_rate": 8.043442316049441e-06, "loss": 0.6217, "step": 26176 }, { "epoch": 0.31297600401726466, "grad_norm": 4.961187362670898, "learning_rate": 8.043288696140032e-06, "loss": 0.5929, "step": 26177 }, { "epoch": 0.3129879601621253, "grad_norm": 3.7241029739379883, "learning_rate": 8.0431350716672e-06, "loss": 0.6042, "step": 26178 }, { "epoch": 0.312999916306986, "grad_norm": 2.3966877460479736, "learning_rate": 8.042981442631175e-06, "loss": 0.6127, "step": 26179 }, { "epoch": 0.31301187245184664, "grad_norm": 3.402841329574585, "learning_rate": 8.04282780903219e-06, "loss": 0.6869, "step": 26180 }, { "epoch": 0.3130238285967073, "grad_norm": 4.230349063873291, "learning_rate": 8.042674170870473e-06, "loss": 0.617, "step": 26181 }, { "epoch": 0.3130357847415679, "grad_norm": 2.7445731163024902, "learning_rate": 8.042520528146257e-06, "loss": 0.5927, "step": 26182 }, { "epoch": 0.31304774088642856, "grad_norm": 2.3318190574645996, "learning_rate": 8.042366880859769e-06, "loss": 0.5631, "step": 26183 }, { "epoch": 0.3130596970312892, "grad_norm": 3.972050905227661, "learning_rate": 8.04221322901124e-06, "loss": 0.5247, "step": 26184 }, { "epoch": 0.3130716531761499, "grad_norm": 6.0517144203186035, "learning_rate": 8.042059572600903e-06, "loss": 0.5016, "step": 26185 }, { "epoch": 0.31308360932101054, "grad_norm": 3.889368772506714, "learning_rate": 8.041905911628986e-06, "loss": 0.4838, "step": 26186 }, { "epoch": 0.3130955654658712, "grad_norm": 2.052950382232666, "learning_rate": 8.041752246095723e-06, "loss": 0.7119, "step": 26187 }, { "epoch": 0.31310752161073185, "grad_norm": 8.546213150024414, "learning_rate": 8.041598576001338e-06, "loss": 0.5894, "step": 26188 }, { "epoch": 0.3131194777555925, "grad_norm": 7.369202136993408, "learning_rate": 8.041444901346068e-06, "loss": 0.6918, "step": 26189 }, { "epoch": 0.3131314339004531, "grad_norm": 3.3987607955932617, "learning_rate": 8.041291222130139e-06, "loss": 0.6925, "step": 26190 }, { "epoch": 0.3131433900453138, "grad_norm": 3.0239062309265137, "learning_rate": 8.041137538353783e-06, "loss": 0.6986, "step": 26191 }, { "epoch": 0.31315534619017443, "grad_norm": 3.9858412742614746, "learning_rate": 8.04098385001723e-06, "loss": 0.6473, "step": 26192 }, { "epoch": 0.3131673023350351, "grad_norm": 2.4950692653656006, "learning_rate": 8.040830157120712e-06, "loss": 0.6852, "step": 26193 }, { "epoch": 0.31317925847989575, "grad_norm": 2.031209707260132, "learning_rate": 8.040676459664457e-06, "loss": 0.5991, "step": 26194 }, { "epoch": 0.3131912146247564, "grad_norm": 2.9111263751983643, "learning_rate": 8.040522757648698e-06, "loss": 0.5973, "step": 26195 }, { "epoch": 0.31320317076961707, "grad_norm": 10.78427505493164, "learning_rate": 8.040369051073665e-06, "loss": 0.5263, "step": 26196 }, { "epoch": 0.31321512691447767, "grad_norm": 2.2153773307800293, "learning_rate": 8.040215339939586e-06, "loss": 0.623, "step": 26197 }, { "epoch": 0.31322708305933833, "grad_norm": 6.237493515014648, "learning_rate": 8.040061624246695e-06, "loss": 0.562, "step": 26198 }, { "epoch": 0.313239039204199, "grad_norm": 2.2822072505950928, "learning_rate": 8.039907903995218e-06, "loss": 0.5485, "step": 26199 }, { "epoch": 0.31325099534905965, "grad_norm": 3.338107109069824, "learning_rate": 8.039754179185391e-06, "loss": 0.5668, "step": 26200 }, { "epoch": 0.3132629514939203, "grad_norm": 3.59373140335083, "learning_rate": 8.039600449817441e-06, "loss": 0.5625, "step": 26201 }, { "epoch": 0.31327490763878096, "grad_norm": 6.115091323852539, "learning_rate": 8.0394467158916e-06, "loss": 0.5948, "step": 26202 }, { "epoch": 0.3132868637836416, "grad_norm": 2.885683298110962, "learning_rate": 8.039292977408097e-06, "loss": 0.5702, "step": 26203 }, { "epoch": 0.3132988199285023, "grad_norm": 2.7714812755584717, "learning_rate": 8.039139234367163e-06, "loss": 0.5797, "step": 26204 }, { "epoch": 0.3133107760733629, "grad_norm": 5.651506423950195, "learning_rate": 8.038985486769031e-06, "loss": 0.5145, "step": 26205 }, { "epoch": 0.31332273221822354, "grad_norm": 2.383780002593994, "learning_rate": 8.038831734613928e-06, "loss": 0.5926, "step": 26206 }, { "epoch": 0.3133346883630842, "grad_norm": 9.358104705810547, "learning_rate": 8.038677977902086e-06, "loss": 0.6446, "step": 26207 }, { "epoch": 0.31334664450794486, "grad_norm": 1.8169384002685547, "learning_rate": 8.038524216633735e-06, "loss": 0.6279, "step": 26208 }, { "epoch": 0.3133586006528055, "grad_norm": 2.322228193283081, "learning_rate": 8.038370450809107e-06, "loss": 0.653, "step": 26209 }, { "epoch": 0.3133705567976662, "grad_norm": 4.860852241516113, "learning_rate": 8.038216680428432e-06, "loss": 0.5481, "step": 26210 }, { "epoch": 0.31338251294252684, "grad_norm": 14.833768844604492, "learning_rate": 8.038062905491939e-06, "loss": 0.6277, "step": 26211 }, { "epoch": 0.31339446908738744, "grad_norm": 3.596618890762329, "learning_rate": 8.03790912599986e-06, "loss": 0.6443, "step": 26212 }, { "epoch": 0.3134064252322481, "grad_norm": 4.05769681930542, "learning_rate": 8.037755341952427e-06, "loss": 0.5425, "step": 26213 }, { "epoch": 0.31341838137710876, "grad_norm": 6.087942123413086, "learning_rate": 8.037601553349867e-06, "loss": 0.5534, "step": 26214 }, { "epoch": 0.3134303375219694, "grad_norm": 2.361300468444824, "learning_rate": 8.037447760192414e-06, "loss": 0.69, "step": 26215 }, { "epoch": 0.3134422936668301, "grad_norm": 3.447817325592041, "learning_rate": 8.037293962480296e-06, "loss": 0.676, "step": 26216 }, { "epoch": 0.31345424981169073, "grad_norm": 3.114957571029663, "learning_rate": 8.037140160213748e-06, "loss": 0.6607, "step": 26217 }, { "epoch": 0.3134662059565514, "grad_norm": 55.01979064941406, "learning_rate": 8.036986353392996e-06, "loss": 0.5656, "step": 26218 }, { "epoch": 0.313478162101412, "grad_norm": 2.1704249382019043, "learning_rate": 8.036832542018272e-06, "loss": 0.5454, "step": 26219 }, { "epoch": 0.31349011824627265, "grad_norm": 2.8860278129577637, "learning_rate": 8.036678726089806e-06, "loss": 0.5377, "step": 26220 }, { "epoch": 0.3135020743911333, "grad_norm": 2.8472795486450195, "learning_rate": 8.03652490560783e-06, "loss": 0.6213, "step": 26221 }, { "epoch": 0.31351403053599397, "grad_norm": 3.6174614429473877, "learning_rate": 8.036371080572573e-06, "loss": 0.5565, "step": 26222 }, { "epoch": 0.31352598668085463, "grad_norm": 5.250739574432373, "learning_rate": 8.036217250984269e-06, "loss": 0.5884, "step": 26223 }, { "epoch": 0.3135379428257153, "grad_norm": 1.8111610412597656, "learning_rate": 8.036063416843145e-06, "loss": 0.606, "step": 26224 }, { "epoch": 0.31354989897057595, "grad_norm": 2.8836162090301514, "learning_rate": 8.035909578149433e-06, "loss": 0.63, "step": 26225 }, { "epoch": 0.3135618551154366, "grad_norm": 2.1137545108795166, "learning_rate": 8.035755734903363e-06, "loss": 0.6612, "step": 26226 }, { "epoch": 0.3135738112602972, "grad_norm": 2.9154534339904785, "learning_rate": 8.035601887105169e-06, "loss": 0.6228, "step": 26227 }, { "epoch": 0.31358576740515787, "grad_norm": 2.2276463508605957, "learning_rate": 8.035448034755077e-06, "loss": 0.6632, "step": 26228 }, { "epoch": 0.3135977235500185, "grad_norm": 3.4475901126861572, "learning_rate": 8.035294177853322e-06, "loss": 0.5776, "step": 26229 }, { "epoch": 0.3136096796948792, "grad_norm": 2.392237663269043, "learning_rate": 8.03514031640013e-06, "loss": 0.5688, "step": 26230 }, { "epoch": 0.31362163583973984, "grad_norm": 4.064979076385498, "learning_rate": 8.034986450395736e-06, "loss": 0.6802, "step": 26231 }, { "epoch": 0.3136335919846005, "grad_norm": 3.426421642303467, "learning_rate": 8.034832579840369e-06, "loss": 0.5713, "step": 26232 }, { "epoch": 0.31364554812946116, "grad_norm": 3.4024429321289062, "learning_rate": 8.03467870473426e-06, "loss": 0.562, "step": 26233 }, { "epoch": 0.31365750427432176, "grad_norm": 2.550179958343506, "learning_rate": 8.034524825077638e-06, "loss": 0.5749, "step": 26234 }, { "epoch": 0.3136694604191824, "grad_norm": 1.7738425731658936, "learning_rate": 8.034370940870735e-06, "loss": 0.6126, "step": 26235 }, { "epoch": 0.3136814165640431, "grad_norm": 2.7048897743225098, "learning_rate": 8.034217052113783e-06, "loss": 0.5994, "step": 26236 }, { "epoch": 0.31369337270890374, "grad_norm": 2.0963358879089355, "learning_rate": 8.034063158807013e-06, "loss": 0.614, "step": 26237 }, { "epoch": 0.3137053288537644, "grad_norm": 2.626366376876831, "learning_rate": 8.033909260950653e-06, "loss": 0.5361, "step": 26238 }, { "epoch": 0.31371728499862506, "grad_norm": 1.955527901649475, "learning_rate": 8.033755358544936e-06, "loss": 0.5397, "step": 26239 }, { "epoch": 0.3137292411434857, "grad_norm": 2.204962730407715, "learning_rate": 8.033601451590091e-06, "loss": 0.7086, "step": 26240 }, { "epoch": 0.3137411972883463, "grad_norm": 2.7630035877227783, "learning_rate": 8.033447540086353e-06, "loss": 0.657, "step": 26241 }, { "epoch": 0.313753153433207, "grad_norm": 1.7632372379302979, "learning_rate": 8.033293624033947e-06, "loss": 0.6276, "step": 26242 }, { "epoch": 0.31376510957806764, "grad_norm": 2.2005059719085693, "learning_rate": 8.033139703433107e-06, "loss": 0.5954, "step": 26243 }, { "epoch": 0.3137770657229283, "grad_norm": 3.4991025924682617, "learning_rate": 8.032985778284062e-06, "loss": 0.6046, "step": 26244 }, { "epoch": 0.31378902186778895, "grad_norm": 3.3316681385040283, "learning_rate": 8.032831848587045e-06, "loss": 0.5687, "step": 26245 }, { "epoch": 0.3138009780126496, "grad_norm": 1.9194799661636353, "learning_rate": 8.032677914342285e-06, "loss": 0.5978, "step": 26246 }, { "epoch": 0.31381293415751027, "grad_norm": 4.365418434143066, "learning_rate": 8.032523975550015e-06, "loss": 0.592, "step": 26247 }, { "epoch": 0.31382489030237093, "grad_norm": 4.354944229125977, "learning_rate": 8.032370032210463e-06, "loss": 0.6246, "step": 26248 }, { "epoch": 0.31383684644723153, "grad_norm": 3.3382866382598877, "learning_rate": 8.032216084323862e-06, "loss": 0.6792, "step": 26249 }, { "epoch": 0.3138488025920922, "grad_norm": 1.8490209579467773, "learning_rate": 8.032062131890443e-06, "loss": 0.6062, "step": 26250 }, { "epoch": 0.31386075873695285, "grad_norm": 9.987878799438477, "learning_rate": 8.031908174910437e-06, "loss": 0.555, "step": 26251 }, { "epoch": 0.3138727148818135, "grad_norm": 3.5012869834899902, "learning_rate": 8.031754213384072e-06, "loss": 0.5876, "step": 26252 }, { "epoch": 0.31388467102667417, "grad_norm": 8.178387641906738, "learning_rate": 8.03160024731158e-06, "loss": 0.5562, "step": 26253 }, { "epoch": 0.3138966271715348, "grad_norm": 3.3592770099639893, "learning_rate": 8.031446276693194e-06, "loss": 0.5819, "step": 26254 }, { "epoch": 0.3139085833163955, "grad_norm": 3.7105612754821777, "learning_rate": 8.031292301529144e-06, "loss": 0.4378, "step": 26255 }, { "epoch": 0.3139205394612561, "grad_norm": 1.6223602294921875, "learning_rate": 8.03113832181966e-06, "loss": 0.6329, "step": 26256 }, { "epoch": 0.31393249560611675, "grad_norm": 2.4835283756256104, "learning_rate": 8.030984337564974e-06, "loss": 0.6618, "step": 26257 }, { "epoch": 0.3139444517509774, "grad_norm": 2.493818521499634, "learning_rate": 8.030830348765314e-06, "loss": 0.5275, "step": 26258 }, { "epoch": 0.31395640789583806, "grad_norm": 1.6622233390808105, "learning_rate": 8.030676355420915e-06, "loss": 0.6175, "step": 26259 }, { "epoch": 0.3139683640406987, "grad_norm": 2.769658088684082, "learning_rate": 8.030522357532006e-06, "loss": 0.5525, "step": 26260 }, { "epoch": 0.3139803201855594, "grad_norm": 16.831254959106445, "learning_rate": 8.030368355098818e-06, "loss": 0.6674, "step": 26261 }, { "epoch": 0.31399227633042004, "grad_norm": 3.0683600902557373, "learning_rate": 8.030214348121581e-06, "loss": 0.7277, "step": 26262 }, { "epoch": 0.3140042324752807, "grad_norm": 2.581824779510498, "learning_rate": 8.030060336600527e-06, "loss": 0.5524, "step": 26263 }, { "epoch": 0.3140161886201413, "grad_norm": 2.357637882232666, "learning_rate": 8.029906320535887e-06, "loss": 0.5762, "step": 26264 }, { "epoch": 0.31402814476500196, "grad_norm": 2.058743715286255, "learning_rate": 8.029752299927891e-06, "loss": 0.5468, "step": 26265 }, { "epoch": 0.3140401009098626, "grad_norm": 2.146121025085449, "learning_rate": 8.029598274776771e-06, "loss": 0.6338, "step": 26266 }, { "epoch": 0.3140520570547233, "grad_norm": 2.7418859004974365, "learning_rate": 8.029444245082758e-06, "loss": 0.6076, "step": 26267 }, { "epoch": 0.31406401319958394, "grad_norm": 2.0159943103790283, "learning_rate": 8.029290210846083e-06, "loss": 0.5676, "step": 26268 }, { "epoch": 0.3140759693444446, "grad_norm": 3.4112675189971924, "learning_rate": 8.029136172066975e-06, "loss": 0.5525, "step": 26269 }, { "epoch": 0.31408792548930525, "grad_norm": 2.053410053253174, "learning_rate": 8.028982128745669e-06, "loss": 0.5696, "step": 26270 }, { "epoch": 0.31409988163416586, "grad_norm": 3.5447142124176025, "learning_rate": 8.028828080882392e-06, "loss": 0.5711, "step": 26271 }, { "epoch": 0.3141118377790265, "grad_norm": 2.363360643386841, "learning_rate": 8.028674028477377e-06, "loss": 0.6325, "step": 26272 }, { "epoch": 0.3141237939238872, "grad_norm": 3.0077338218688965, "learning_rate": 8.028519971530854e-06, "loss": 0.5977, "step": 26273 }, { "epoch": 0.31413575006874783, "grad_norm": 2.9838786125183105, "learning_rate": 8.028365910043054e-06, "loss": 0.5992, "step": 26274 }, { "epoch": 0.3141477062136085, "grad_norm": 2.9311344623565674, "learning_rate": 8.02821184401421e-06, "loss": 0.6161, "step": 26275 }, { "epoch": 0.31415966235846915, "grad_norm": 1.8002671003341675, "learning_rate": 8.02805777344455e-06, "loss": 0.6244, "step": 26276 }, { "epoch": 0.3141716185033298, "grad_norm": 5.378389358520508, "learning_rate": 8.027903698334309e-06, "loss": 0.6263, "step": 26277 }, { "epoch": 0.3141835746481904, "grad_norm": 4.543228626251221, "learning_rate": 8.027749618683715e-06, "loss": 0.6522, "step": 26278 }, { "epoch": 0.31419553079305107, "grad_norm": 2.6176774501800537, "learning_rate": 8.027595534492998e-06, "loss": 0.5935, "step": 26279 }, { "epoch": 0.31420748693791173, "grad_norm": 6.748737335205078, "learning_rate": 8.027441445762392e-06, "loss": 0.6183, "step": 26280 }, { "epoch": 0.3142194430827724, "grad_norm": 2.515246868133545, "learning_rate": 8.027287352492125e-06, "loss": 0.5597, "step": 26281 }, { "epoch": 0.31423139922763305, "grad_norm": 2.621534585952759, "learning_rate": 8.027133254682431e-06, "loss": 0.5939, "step": 26282 }, { "epoch": 0.3142433553724937, "grad_norm": 3.1962642669677734, "learning_rate": 8.02697915233354e-06, "loss": 0.5837, "step": 26283 }, { "epoch": 0.31425531151735436, "grad_norm": 3.2430667877197266, "learning_rate": 8.026825045445683e-06, "loss": 0.687, "step": 26284 }, { "epoch": 0.314267267662215, "grad_norm": 3.7269153594970703, "learning_rate": 8.026670934019093e-06, "loss": 0.5399, "step": 26285 }, { "epoch": 0.3142792238070756, "grad_norm": 2.4394867420196533, "learning_rate": 8.026516818053996e-06, "loss": 0.6805, "step": 26286 }, { "epoch": 0.3142911799519363, "grad_norm": 5.314305305480957, "learning_rate": 8.026362697550629e-06, "loss": 0.579, "step": 26287 }, { "epoch": 0.31430313609679694, "grad_norm": 1.4308335781097412, "learning_rate": 8.026208572509218e-06, "loss": 0.5331, "step": 26288 }, { "epoch": 0.3143150922416576, "grad_norm": 2.1348626613616943, "learning_rate": 8.026054442929998e-06, "loss": 0.5877, "step": 26289 }, { "epoch": 0.31432704838651826, "grad_norm": 2.3740808963775635, "learning_rate": 8.025900308813199e-06, "loss": 0.6475, "step": 26290 }, { "epoch": 0.3143390045313789, "grad_norm": 2.331024169921875, "learning_rate": 8.02574617015905e-06, "loss": 0.6289, "step": 26291 }, { "epoch": 0.3143509606762396, "grad_norm": 3.5714523792266846, "learning_rate": 8.025592026967785e-06, "loss": 0.6078, "step": 26292 }, { "epoch": 0.3143629168211002, "grad_norm": 2.2950940132141113, "learning_rate": 8.025437879239634e-06, "loss": 0.6274, "step": 26293 }, { "epoch": 0.31437487296596084, "grad_norm": 3.074686050415039, "learning_rate": 8.025283726974827e-06, "loss": 0.6366, "step": 26294 }, { "epoch": 0.3143868291108215, "grad_norm": 1.5805718898773193, "learning_rate": 8.025129570173598e-06, "loss": 0.5399, "step": 26295 }, { "epoch": 0.31439878525568216, "grad_norm": 3.3669838905334473, "learning_rate": 8.024975408836175e-06, "loss": 0.6338, "step": 26296 }, { "epoch": 0.3144107414005428, "grad_norm": 2.3639302253723145, "learning_rate": 8.02482124296279e-06, "loss": 0.6027, "step": 26297 }, { "epoch": 0.3144226975454035, "grad_norm": 2.264113426208496, "learning_rate": 8.024667072553679e-06, "loss": 0.5697, "step": 26298 }, { "epoch": 0.31443465369026413, "grad_norm": 2.450235366821289, "learning_rate": 8.024512897609065e-06, "loss": 0.6291, "step": 26299 }, { "epoch": 0.31444660983512474, "grad_norm": 3.1239256858825684, "learning_rate": 8.024358718129184e-06, "loss": 0.6266, "step": 26300 }, { "epoch": 0.3144585659799854, "grad_norm": 3.8752381801605225, "learning_rate": 8.024204534114266e-06, "loss": 0.6907, "step": 26301 }, { "epoch": 0.31447052212484605, "grad_norm": 2.033583402633667, "learning_rate": 8.024050345564543e-06, "loss": 0.5493, "step": 26302 }, { "epoch": 0.3144824782697067, "grad_norm": 3.0896999835968018, "learning_rate": 8.023896152480246e-06, "loss": 0.5273, "step": 26303 }, { "epoch": 0.31449443441456737, "grad_norm": 3.9854788780212402, "learning_rate": 8.023741954861606e-06, "loss": 0.7864, "step": 26304 }, { "epoch": 0.31450639055942803, "grad_norm": 3.1278226375579834, "learning_rate": 8.023587752708854e-06, "loss": 0.63, "step": 26305 }, { "epoch": 0.3145183467042887, "grad_norm": 3.4495742321014404, "learning_rate": 8.02343354602222e-06, "loss": 0.6496, "step": 26306 }, { "epoch": 0.31453030284914935, "grad_norm": 1.7668980360031128, "learning_rate": 8.023279334801938e-06, "loss": 0.5427, "step": 26307 }, { "epoch": 0.31454225899400995, "grad_norm": 2.1275713443756104, "learning_rate": 8.023125119048239e-06, "loss": 0.5976, "step": 26308 }, { "epoch": 0.3145542151388706, "grad_norm": 12.787280082702637, "learning_rate": 8.022970898761353e-06, "loss": 0.5931, "step": 26309 }, { "epoch": 0.31456617128373127, "grad_norm": 1.8666096925735474, "learning_rate": 8.02281667394151e-06, "loss": 0.5523, "step": 26310 }, { "epoch": 0.3145781274285919, "grad_norm": 2.7090260982513428, "learning_rate": 8.022662444588944e-06, "loss": 0.6065, "step": 26311 }, { "epoch": 0.3145900835734526, "grad_norm": 4.139443874359131, "learning_rate": 8.022508210703882e-06, "loss": 0.6369, "step": 26312 }, { "epoch": 0.31460203971831324, "grad_norm": 1.5130243301391602, "learning_rate": 8.022353972286561e-06, "loss": 0.5821, "step": 26313 }, { "epoch": 0.3146139958631739, "grad_norm": 1.6892809867858887, "learning_rate": 8.022199729337209e-06, "loss": 0.5698, "step": 26314 }, { "epoch": 0.3146259520080345, "grad_norm": 1.9138107299804688, "learning_rate": 8.022045481856058e-06, "loss": 0.6189, "step": 26315 }, { "epoch": 0.31463790815289516, "grad_norm": 4.638916969299316, "learning_rate": 8.02189122984334e-06, "loss": 0.6256, "step": 26316 }, { "epoch": 0.3146498642977558, "grad_norm": 3.249863624572754, "learning_rate": 8.021736973299283e-06, "loss": 0.5922, "step": 26317 }, { "epoch": 0.3146618204426165, "grad_norm": 1.984392762184143, "learning_rate": 8.02158271222412e-06, "loss": 0.6591, "step": 26318 }, { "epoch": 0.31467377658747714, "grad_norm": 2.033599376678467, "learning_rate": 8.021428446618085e-06, "loss": 0.6013, "step": 26319 }, { "epoch": 0.3146857327323378, "grad_norm": 2.413484573364258, "learning_rate": 8.021274176481408e-06, "loss": 0.5508, "step": 26320 }, { "epoch": 0.31469768887719846, "grad_norm": 3.5979323387145996, "learning_rate": 8.021119901814317e-06, "loss": 0.6899, "step": 26321 }, { "epoch": 0.3147096450220591, "grad_norm": 2.5653839111328125, "learning_rate": 8.020965622617049e-06, "loss": 0.6615, "step": 26322 }, { "epoch": 0.3147216011669197, "grad_norm": 2.4665167331695557, "learning_rate": 8.02081133888983e-06, "loss": 0.5892, "step": 26323 }, { "epoch": 0.3147335573117804, "grad_norm": 2.0294501781463623, "learning_rate": 8.020657050632894e-06, "loss": 0.6209, "step": 26324 }, { "epoch": 0.31474551345664104, "grad_norm": 2.9256701469421387, "learning_rate": 8.020502757846474e-06, "loss": 0.6312, "step": 26325 }, { "epoch": 0.3147574696015017, "grad_norm": 1.8723076581954956, "learning_rate": 8.020348460530797e-06, "loss": 0.5357, "step": 26326 }, { "epoch": 0.31476942574636235, "grad_norm": 2.3275110721588135, "learning_rate": 8.020194158686097e-06, "loss": 0.6062, "step": 26327 }, { "epoch": 0.314781381891223, "grad_norm": 1.5089595317840576, "learning_rate": 8.020039852312606e-06, "loss": 0.6084, "step": 26328 }, { "epoch": 0.31479333803608367, "grad_norm": 2.556386947631836, "learning_rate": 8.019885541410554e-06, "loss": 0.5975, "step": 26329 }, { "epoch": 0.3148052941809443, "grad_norm": 2.182077169418335, "learning_rate": 8.019731225980174e-06, "loss": 0.617, "step": 26330 }, { "epoch": 0.31481725032580493, "grad_norm": 2.884110689163208, "learning_rate": 8.019576906021695e-06, "loss": 0.6654, "step": 26331 }, { "epoch": 0.3148292064706656, "grad_norm": 2.041663408279419, "learning_rate": 8.01942258153535e-06, "loss": 0.5515, "step": 26332 }, { "epoch": 0.31484116261552625, "grad_norm": 1.931876301765442, "learning_rate": 8.019268252521371e-06, "loss": 0.5906, "step": 26333 }, { "epoch": 0.3148531187603869, "grad_norm": 1.931187391281128, "learning_rate": 8.019113918979987e-06, "loss": 0.6066, "step": 26334 }, { "epoch": 0.31486507490524757, "grad_norm": 2.0840203762054443, "learning_rate": 8.018959580911431e-06, "loss": 0.5807, "step": 26335 }, { "epoch": 0.3148770310501082, "grad_norm": 3.1748733520507812, "learning_rate": 8.018805238315936e-06, "loss": 0.5884, "step": 26336 }, { "epoch": 0.31488898719496883, "grad_norm": 2.719653606414795, "learning_rate": 8.018650891193732e-06, "loss": 0.6825, "step": 26337 }, { "epoch": 0.3149009433398295, "grad_norm": 2.537442207336426, "learning_rate": 8.018496539545048e-06, "loss": 0.6888, "step": 26338 }, { "epoch": 0.31491289948469015, "grad_norm": 4.243539810180664, "learning_rate": 8.018342183370118e-06, "loss": 0.5247, "step": 26339 }, { "epoch": 0.3149248556295508, "grad_norm": 4.66345739364624, "learning_rate": 8.018187822669174e-06, "loss": 0.6287, "step": 26340 }, { "epoch": 0.31493681177441146, "grad_norm": 2.337801694869995, "learning_rate": 8.018033457442448e-06, "loss": 0.5585, "step": 26341 }, { "epoch": 0.3149487679192721, "grad_norm": 2.1634223461151123, "learning_rate": 8.017879087690168e-06, "loss": 0.6124, "step": 26342 }, { "epoch": 0.3149607240641328, "grad_norm": 1.9548120498657227, "learning_rate": 8.017724713412568e-06, "loss": 0.5337, "step": 26343 }, { "epoch": 0.31497268020899344, "grad_norm": 1.9712990522384644, "learning_rate": 8.017570334609878e-06, "loss": 0.6221, "step": 26344 }, { "epoch": 0.31498463635385404, "grad_norm": 5.684313774108887, "learning_rate": 8.017415951282333e-06, "loss": 0.6509, "step": 26345 }, { "epoch": 0.3149965924987147, "grad_norm": 2.23870587348938, "learning_rate": 8.01726156343016e-06, "loss": 0.6178, "step": 26346 }, { "epoch": 0.31500854864357536, "grad_norm": 1.7019613981246948, "learning_rate": 8.017107171053595e-06, "loss": 0.4843, "step": 26347 }, { "epoch": 0.315020504788436, "grad_norm": 4.5621514320373535, "learning_rate": 8.016952774152867e-06, "loss": 0.5887, "step": 26348 }, { "epoch": 0.3150324609332967, "grad_norm": 2.4628820419311523, "learning_rate": 8.016798372728206e-06, "loss": 0.5973, "step": 26349 }, { "epoch": 0.31504441707815733, "grad_norm": 2.753875494003296, "learning_rate": 8.016643966779845e-06, "loss": 0.6419, "step": 26350 }, { "epoch": 0.315056373223018, "grad_norm": 4.2710862159729, "learning_rate": 8.016489556308016e-06, "loss": 0.5915, "step": 26351 }, { "epoch": 0.3150683293678786, "grad_norm": 3.39909291267395, "learning_rate": 8.01633514131295e-06, "loss": 0.6166, "step": 26352 }, { "epoch": 0.31508028551273926, "grad_norm": 3.486534595489502, "learning_rate": 8.016180721794879e-06, "loss": 0.5244, "step": 26353 }, { "epoch": 0.3150922416575999, "grad_norm": 3.2965519428253174, "learning_rate": 8.016026297754033e-06, "loss": 0.6036, "step": 26354 }, { "epoch": 0.3151041978024606, "grad_norm": 2.107982873916626, "learning_rate": 8.015871869190646e-06, "loss": 0.6169, "step": 26355 }, { "epoch": 0.31511615394732123, "grad_norm": 2.599412441253662, "learning_rate": 8.01571743610495e-06, "loss": 0.5557, "step": 26356 }, { "epoch": 0.3151281100921819, "grad_norm": 3.6637449264526367, "learning_rate": 8.015562998497173e-06, "loss": 0.535, "step": 26357 }, { "epoch": 0.31514006623704255, "grad_norm": 2.215325117111206, "learning_rate": 8.015408556367548e-06, "loss": 0.567, "step": 26358 }, { "epoch": 0.31515202238190315, "grad_norm": 4.199827194213867, "learning_rate": 8.015254109716308e-06, "loss": 0.702, "step": 26359 }, { "epoch": 0.3151639785267638, "grad_norm": 3.673797130584717, "learning_rate": 8.015099658543684e-06, "loss": 0.622, "step": 26360 }, { "epoch": 0.31517593467162447, "grad_norm": 3.133408546447754, "learning_rate": 8.014945202849908e-06, "loss": 0.5569, "step": 26361 }, { "epoch": 0.31518789081648513, "grad_norm": 2.2428274154663086, "learning_rate": 8.01479074263521e-06, "loss": 0.5552, "step": 26362 }, { "epoch": 0.3151998469613458, "grad_norm": 2.0704691410064697, "learning_rate": 8.014636277899824e-06, "loss": 0.6137, "step": 26363 }, { "epoch": 0.31521180310620645, "grad_norm": 4.016349792480469, "learning_rate": 8.014481808643976e-06, "loss": 0.7065, "step": 26364 }, { "epoch": 0.3152237592510671, "grad_norm": 2.5906789302825928, "learning_rate": 8.014327334867905e-06, "loss": 0.5628, "step": 26365 }, { "epoch": 0.31523571539592776, "grad_norm": 2.1671884059906006, "learning_rate": 8.01417285657184e-06, "loss": 0.6313, "step": 26366 }, { "epoch": 0.31524767154078837, "grad_norm": 2.2995147705078125, "learning_rate": 8.014018373756011e-06, "loss": 0.5658, "step": 26367 }, { "epoch": 0.315259627685649, "grad_norm": 2.4223756790161133, "learning_rate": 8.013863886420652e-06, "loss": 0.6024, "step": 26368 }, { "epoch": 0.3152715838305097, "grad_norm": 7.506769180297852, "learning_rate": 8.013709394565994e-06, "loss": 0.5598, "step": 26369 }, { "epoch": 0.31528353997537034, "grad_norm": 1.6909103393554688, "learning_rate": 8.013554898192265e-06, "loss": 0.5725, "step": 26370 }, { "epoch": 0.315295496120231, "grad_norm": 1.7127209901809692, "learning_rate": 8.013400397299701e-06, "loss": 0.69, "step": 26371 }, { "epoch": 0.31530745226509166, "grad_norm": 2.5846054553985596, "learning_rate": 8.013245891888532e-06, "loss": 0.5878, "step": 26372 }, { "epoch": 0.3153194084099523, "grad_norm": 2.2865118980407715, "learning_rate": 8.01309138195899e-06, "loss": 0.5647, "step": 26373 }, { "epoch": 0.3153313645548129, "grad_norm": 3.4181084632873535, "learning_rate": 8.01293686751131e-06, "loss": 0.5554, "step": 26374 }, { "epoch": 0.3153433206996736, "grad_norm": 1.8415393829345703, "learning_rate": 8.012782348545718e-06, "loss": 0.6209, "step": 26375 }, { "epoch": 0.31535527684453424, "grad_norm": 1.8277839422225952, "learning_rate": 8.012627825062446e-06, "loss": 0.5067, "step": 26376 }, { "epoch": 0.3153672329893949, "grad_norm": 2.090043306350708, "learning_rate": 8.01247329706173e-06, "loss": 0.5811, "step": 26377 }, { "epoch": 0.31537918913425556, "grad_norm": 2.424267530441284, "learning_rate": 8.0123187645438e-06, "loss": 0.5966, "step": 26378 }, { "epoch": 0.3153911452791162, "grad_norm": 3.6639392375946045, "learning_rate": 8.012164227508888e-06, "loss": 0.6137, "step": 26379 }, { "epoch": 0.3154031014239769, "grad_norm": 5.067405700683594, "learning_rate": 8.012009685957223e-06, "loss": 0.6282, "step": 26380 }, { "epoch": 0.31541505756883753, "grad_norm": 1.8674542903900146, "learning_rate": 8.01185513988904e-06, "loss": 0.6083, "step": 26381 }, { "epoch": 0.31542701371369813, "grad_norm": 1.5820385217666626, "learning_rate": 8.011700589304567e-06, "loss": 0.6026, "step": 26382 }, { "epoch": 0.3154389698585588, "grad_norm": 2.8462655544281006, "learning_rate": 8.011546034204043e-06, "loss": 0.4992, "step": 26383 }, { "epoch": 0.31545092600341945, "grad_norm": 2.4862303733825684, "learning_rate": 8.011391474587691e-06, "loss": 0.6691, "step": 26384 }, { "epoch": 0.3154628821482801, "grad_norm": 1.8180580139160156, "learning_rate": 8.01123691045575e-06, "loss": 0.5096, "step": 26385 }, { "epoch": 0.31547483829314077, "grad_norm": 2.3018627166748047, "learning_rate": 8.011082341808444e-06, "loss": 0.4649, "step": 26386 }, { "epoch": 0.3154867944380014, "grad_norm": 1.6930197477340698, "learning_rate": 8.010927768646013e-06, "loss": 0.5626, "step": 26387 }, { "epoch": 0.3154987505828621, "grad_norm": 21.548641204833984, "learning_rate": 8.010773190968685e-06, "loss": 0.5741, "step": 26388 }, { "epoch": 0.3155107067277227, "grad_norm": 1.7169710397720337, "learning_rate": 8.01061860877669e-06, "loss": 0.6448, "step": 26389 }, { "epoch": 0.31552266287258335, "grad_norm": 6.330544471740723, "learning_rate": 8.010464022070264e-06, "loss": 0.6352, "step": 26390 }, { "epoch": 0.315534619017444, "grad_norm": 1.9876149892807007, "learning_rate": 8.010309430849634e-06, "loss": 0.6604, "step": 26391 }, { "epoch": 0.31554657516230467, "grad_norm": 2.780447483062744, "learning_rate": 8.010154835115035e-06, "loss": 0.6399, "step": 26392 }, { "epoch": 0.3155585313071653, "grad_norm": 3.5285847187042236, "learning_rate": 8.0100002348667e-06, "loss": 0.6098, "step": 26393 }, { "epoch": 0.315570487452026, "grad_norm": 1.845458745956421, "learning_rate": 8.009845630104857e-06, "loss": 0.5489, "step": 26394 }, { "epoch": 0.31558244359688664, "grad_norm": 4.98915958404541, "learning_rate": 8.00969102082974e-06, "loss": 0.6062, "step": 26395 }, { "epoch": 0.31559439974174724, "grad_norm": 2.819683313369751, "learning_rate": 8.009536407041581e-06, "loss": 0.7116, "step": 26396 }, { "epoch": 0.3156063558866079, "grad_norm": 2.9241411685943604, "learning_rate": 8.009381788740612e-06, "loss": 0.6226, "step": 26397 }, { "epoch": 0.31561831203146856, "grad_norm": 2.6164207458496094, "learning_rate": 8.009227165927065e-06, "loss": 0.5878, "step": 26398 }, { "epoch": 0.3156302681763292, "grad_norm": 1.2827486991882324, "learning_rate": 8.00907253860117e-06, "loss": 0.5149, "step": 26399 }, { "epoch": 0.3156422243211899, "grad_norm": 4.254106521606445, "learning_rate": 8.008917906763158e-06, "loss": 0.5985, "step": 26400 }, { "epoch": 0.31565418046605054, "grad_norm": 4.761552333831787, "learning_rate": 8.008763270413267e-06, "loss": 0.6264, "step": 26401 }, { "epoch": 0.3156661366109112, "grad_norm": 1.6272571086883545, "learning_rate": 8.008608629551722e-06, "loss": 0.5995, "step": 26402 }, { "epoch": 0.31567809275577186, "grad_norm": 2.1541218757629395, "learning_rate": 8.008453984178758e-06, "loss": 0.6119, "step": 26403 }, { "epoch": 0.31569004890063246, "grad_norm": 2.1111648082733154, "learning_rate": 8.008299334294607e-06, "loss": 0.5532, "step": 26404 }, { "epoch": 0.3157020050454931, "grad_norm": 3.0218520164489746, "learning_rate": 8.0081446798995e-06, "loss": 0.5646, "step": 26405 }, { "epoch": 0.3157139611903538, "grad_norm": 2.0577049255371094, "learning_rate": 8.00799002099367e-06, "loss": 0.5362, "step": 26406 }, { "epoch": 0.31572591733521443, "grad_norm": 2.319868326187134, "learning_rate": 8.007835357577348e-06, "loss": 0.6234, "step": 26407 }, { "epoch": 0.3157378734800751, "grad_norm": 1.9135088920593262, "learning_rate": 8.007680689650765e-06, "loss": 0.5913, "step": 26408 }, { "epoch": 0.31574982962493575, "grad_norm": 6.37738561630249, "learning_rate": 8.007526017214156e-06, "loss": 0.6075, "step": 26409 }, { "epoch": 0.3157617857697964, "grad_norm": 1.621212363243103, "learning_rate": 8.007371340267751e-06, "loss": 0.6343, "step": 26410 }, { "epoch": 0.315773741914657, "grad_norm": 5.052705764770508, "learning_rate": 8.007216658811781e-06, "loss": 0.5821, "step": 26411 }, { "epoch": 0.31578569805951767, "grad_norm": 2.5241217613220215, "learning_rate": 8.00706197284648e-06, "loss": 0.5918, "step": 26412 }, { "epoch": 0.31579765420437833, "grad_norm": 1.881723403930664, "learning_rate": 8.006907282372077e-06, "loss": 0.58, "step": 26413 }, { "epoch": 0.315809610349239, "grad_norm": 1.9975510835647583, "learning_rate": 8.006752587388807e-06, "loss": 0.6192, "step": 26414 }, { "epoch": 0.31582156649409965, "grad_norm": 2.2525742053985596, "learning_rate": 8.0065978878969e-06, "loss": 0.6458, "step": 26415 }, { "epoch": 0.3158335226389603, "grad_norm": 2.1876766681671143, "learning_rate": 8.00644318389659e-06, "loss": 0.5793, "step": 26416 }, { "epoch": 0.31584547878382097, "grad_norm": 1.7169959545135498, "learning_rate": 8.00628847538811e-06, "loss": 0.6109, "step": 26417 }, { "epoch": 0.31585743492868157, "grad_norm": 2.5593693256378174, "learning_rate": 8.006133762371685e-06, "loss": 0.5529, "step": 26418 }, { "epoch": 0.3158693910735422, "grad_norm": 3.3165688514709473, "learning_rate": 8.005979044847554e-06, "loss": 0.5882, "step": 26419 }, { "epoch": 0.3158813472184029, "grad_norm": 2.872535228729248, "learning_rate": 8.005824322815946e-06, "loss": 0.5711, "step": 26420 }, { "epoch": 0.31589330336326354, "grad_norm": 1.9699212312698364, "learning_rate": 8.005669596277095e-06, "loss": 0.5844, "step": 26421 }, { "epoch": 0.3159052595081242, "grad_norm": 2.3234918117523193, "learning_rate": 8.005514865231231e-06, "loss": 0.547, "step": 26422 }, { "epoch": 0.31591721565298486, "grad_norm": 7.62861967086792, "learning_rate": 8.005360129678586e-06, "loss": 0.6478, "step": 26423 }, { "epoch": 0.3159291717978455, "grad_norm": 1.7423911094665527, "learning_rate": 8.005205389619395e-06, "loss": 0.5225, "step": 26424 }, { "epoch": 0.3159411279427062, "grad_norm": 1.8458400964736938, "learning_rate": 8.005050645053886e-06, "loss": 0.6961, "step": 26425 }, { "epoch": 0.3159530840875668, "grad_norm": 2.5077457427978516, "learning_rate": 8.004895895982293e-06, "loss": 0.5782, "step": 26426 }, { "epoch": 0.31596504023242744, "grad_norm": 1.7057445049285889, "learning_rate": 8.004741142404848e-06, "loss": 0.5532, "step": 26427 }, { "epoch": 0.3159769963772881, "grad_norm": 1.9385935068130493, "learning_rate": 8.004586384321784e-06, "loss": 0.6889, "step": 26428 }, { "epoch": 0.31598895252214876, "grad_norm": 2.215041399002075, "learning_rate": 8.004431621733331e-06, "loss": 0.6883, "step": 26429 }, { "epoch": 0.3160009086670094, "grad_norm": 6.320705413818359, "learning_rate": 8.004276854639721e-06, "loss": 0.591, "step": 26430 }, { "epoch": 0.3160128648118701, "grad_norm": 2.900148868560791, "learning_rate": 8.004122083041191e-06, "loss": 0.6523, "step": 26431 }, { "epoch": 0.31602482095673073, "grad_norm": 1.6639347076416016, "learning_rate": 8.003967306937965e-06, "loss": 0.7074, "step": 26432 }, { "epoch": 0.31603677710159134, "grad_norm": 1.8673839569091797, "learning_rate": 8.003812526330282e-06, "loss": 0.6178, "step": 26433 }, { "epoch": 0.316048733246452, "grad_norm": 2.0137650966644287, "learning_rate": 8.003657741218371e-06, "loss": 0.5514, "step": 26434 }, { "epoch": 0.31606068939131265, "grad_norm": 1.4834696054458618, "learning_rate": 8.003502951602464e-06, "loss": 0.6101, "step": 26435 }, { "epoch": 0.3160726455361733, "grad_norm": 19.681381225585938, "learning_rate": 8.003348157482793e-06, "loss": 0.5966, "step": 26436 }, { "epoch": 0.31608460168103397, "grad_norm": 1.7632298469543457, "learning_rate": 8.003193358859592e-06, "loss": 0.58, "step": 26437 }, { "epoch": 0.31609655782589463, "grad_norm": 3.0246083736419678, "learning_rate": 8.00303855573309e-06, "loss": 0.6753, "step": 26438 }, { "epoch": 0.3161085139707553, "grad_norm": 1.6712557077407837, "learning_rate": 8.002883748103524e-06, "loss": 0.628, "step": 26439 }, { "epoch": 0.31612047011561595, "grad_norm": 3.2872474193573, "learning_rate": 8.00272893597112e-06, "loss": 0.6543, "step": 26440 }, { "epoch": 0.31613242626047655, "grad_norm": 1.934985876083374, "learning_rate": 8.002574119336115e-06, "loss": 0.673, "step": 26441 }, { "epoch": 0.3161443824053372, "grad_norm": 1.9097148180007935, "learning_rate": 8.00241929819874e-06, "loss": 0.6091, "step": 26442 }, { "epoch": 0.31615633855019787, "grad_norm": 3.730604887008667, "learning_rate": 8.002264472559227e-06, "loss": 0.5817, "step": 26443 }, { "epoch": 0.3161682946950585, "grad_norm": 1.9478421211242676, "learning_rate": 8.002109642417806e-06, "loss": 0.7796, "step": 26444 }, { "epoch": 0.3161802508399192, "grad_norm": 2.2121422290802, "learning_rate": 8.001954807774711e-06, "loss": 0.6553, "step": 26445 }, { "epoch": 0.31619220698477984, "grad_norm": 5.3344221115112305, "learning_rate": 8.001799968630176e-06, "loss": 0.6244, "step": 26446 }, { "epoch": 0.3162041631296405, "grad_norm": 1.4370298385620117, "learning_rate": 8.001645124984429e-06, "loss": 0.6223, "step": 26447 }, { "epoch": 0.3162161192745011, "grad_norm": 1.9186997413635254, "learning_rate": 8.001490276837707e-06, "loss": 0.6593, "step": 26448 }, { "epoch": 0.31622807541936176, "grad_norm": 3.6262130737304688, "learning_rate": 8.001335424190237e-06, "loss": 0.5322, "step": 26449 }, { "epoch": 0.3162400315642224, "grad_norm": 19.432605743408203, "learning_rate": 8.001180567042255e-06, "loss": 0.5931, "step": 26450 }, { "epoch": 0.3162519877090831, "grad_norm": 1.7685930728912354, "learning_rate": 8.001025705393992e-06, "loss": 0.5634, "step": 26451 }, { "epoch": 0.31626394385394374, "grad_norm": 2.3218190670013428, "learning_rate": 8.00087083924568e-06, "loss": 0.6444, "step": 26452 }, { "epoch": 0.3162758999988044, "grad_norm": 1.5747627019882202, "learning_rate": 8.000715968597553e-06, "loss": 0.5671, "step": 26453 }, { "epoch": 0.31628785614366506, "grad_norm": 3.0057761669158936, "learning_rate": 8.00056109344984e-06, "loss": 0.6285, "step": 26454 }, { "epoch": 0.31629981228852566, "grad_norm": 2.085139036178589, "learning_rate": 8.000406213802775e-06, "loss": 0.5988, "step": 26455 }, { "epoch": 0.3163117684333863, "grad_norm": 2.4632909297943115, "learning_rate": 8.000251329656591e-06, "loss": 0.5529, "step": 26456 }, { "epoch": 0.316323724578247, "grad_norm": 2.148641347885132, "learning_rate": 8.00009644101152e-06, "loss": 0.6369, "step": 26457 }, { "epoch": 0.31633568072310764, "grad_norm": 2.4062423706054688, "learning_rate": 7.999941547867791e-06, "loss": 0.5618, "step": 26458 }, { "epoch": 0.3163476368679683, "grad_norm": 1.5537059307098389, "learning_rate": 7.99978665022564e-06, "loss": 0.4789, "step": 26459 }, { "epoch": 0.31635959301282895, "grad_norm": 2.0529379844665527, "learning_rate": 7.999631748085301e-06, "loss": 0.6397, "step": 26460 }, { "epoch": 0.3163715491576896, "grad_norm": 1.8065332174301147, "learning_rate": 7.999476841447002e-06, "loss": 0.5828, "step": 26461 }, { "epoch": 0.31638350530255027, "grad_norm": 4.989997386932373, "learning_rate": 7.999321930310976e-06, "loss": 0.6077, "step": 26462 }, { "epoch": 0.3163954614474109, "grad_norm": 1.8391159772872925, "learning_rate": 7.999167014677456e-06, "loss": 0.704, "step": 26463 }, { "epoch": 0.31640741759227153, "grad_norm": 1.681053638458252, "learning_rate": 7.999012094546675e-06, "loss": 0.6321, "step": 26464 }, { "epoch": 0.3164193737371322, "grad_norm": 1.9747318029403687, "learning_rate": 7.998857169918866e-06, "loss": 0.6858, "step": 26465 }, { "epoch": 0.31643132988199285, "grad_norm": 1.3084295988082886, "learning_rate": 7.998702240794258e-06, "loss": 0.5464, "step": 26466 }, { "epoch": 0.3164432860268535, "grad_norm": 2.0526785850524902, "learning_rate": 7.998547307173086e-06, "loss": 0.6398, "step": 26467 }, { "epoch": 0.31645524217171417, "grad_norm": 2.4636998176574707, "learning_rate": 7.998392369055582e-06, "loss": 0.6321, "step": 26468 }, { "epoch": 0.3164671983165748, "grad_norm": 2.1785926818847656, "learning_rate": 7.998237426441977e-06, "loss": 0.5553, "step": 26469 }, { "epoch": 0.31647915446143543, "grad_norm": 1.8210344314575195, "learning_rate": 7.998082479332506e-06, "loss": 0.5096, "step": 26470 }, { "epoch": 0.3164911106062961, "grad_norm": 1.9829521179199219, "learning_rate": 7.997927527727399e-06, "loss": 0.6846, "step": 26471 }, { "epoch": 0.31650306675115675, "grad_norm": 1.8506325483322144, "learning_rate": 7.997772571626888e-06, "loss": 0.5666, "step": 26472 }, { "epoch": 0.3165150228960174, "grad_norm": 3.2126755714416504, "learning_rate": 7.997617611031207e-06, "loss": 0.5345, "step": 26473 }, { "epoch": 0.31652697904087806, "grad_norm": 2.322924852371216, "learning_rate": 7.997462645940589e-06, "loss": 0.5695, "step": 26474 }, { "epoch": 0.3165389351857387, "grad_norm": 2.9899275302886963, "learning_rate": 7.997307676355264e-06, "loss": 0.5479, "step": 26475 }, { "epoch": 0.3165508913305994, "grad_norm": 2.6190733909606934, "learning_rate": 7.997152702275464e-06, "loss": 0.6113, "step": 26476 }, { "epoch": 0.31656284747546, "grad_norm": 3.7070679664611816, "learning_rate": 7.996997723701425e-06, "loss": 0.6211, "step": 26477 }, { "epoch": 0.31657480362032064, "grad_norm": 2.8449172973632812, "learning_rate": 7.996842740633376e-06, "loss": 0.6046, "step": 26478 }, { "epoch": 0.3165867597651813, "grad_norm": 2.5029947757720947, "learning_rate": 7.996687753071552e-06, "loss": 0.6148, "step": 26479 }, { "epoch": 0.31659871591004196, "grad_norm": 3.6271581649780273, "learning_rate": 7.996532761016182e-06, "loss": 0.5466, "step": 26480 }, { "epoch": 0.3166106720549026, "grad_norm": 1.410038948059082, "learning_rate": 7.996377764467502e-06, "loss": 0.576, "step": 26481 }, { "epoch": 0.3166226281997633, "grad_norm": 1.5187883377075195, "learning_rate": 7.996222763425744e-06, "loss": 0.6706, "step": 26482 }, { "epoch": 0.31663458434462394, "grad_norm": 2.942957878112793, "learning_rate": 7.996067757891136e-06, "loss": 0.5841, "step": 26483 }, { "epoch": 0.3166465404894846, "grad_norm": 9.338868141174316, "learning_rate": 7.995912747863916e-06, "loss": 0.6752, "step": 26484 }, { "epoch": 0.3166584966343452, "grad_norm": 2.614226818084717, "learning_rate": 7.995757733344315e-06, "loss": 0.683, "step": 26485 }, { "epoch": 0.31667045277920586, "grad_norm": 1.9210952520370483, "learning_rate": 7.995602714332563e-06, "loss": 0.6145, "step": 26486 }, { "epoch": 0.3166824089240665, "grad_norm": 2.814073085784912, "learning_rate": 7.995447690828894e-06, "loss": 0.5788, "step": 26487 }, { "epoch": 0.3166943650689272, "grad_norm": 2.6868836879730225, "learning_rate": 7.995292662833543e-06, "loss": 0.584, "step": 26488 }, { "epoch": 0.31670632121378783, "grad_norm": 1.8798081874847412, "learning_rate": 7.995137630346736e-06, "loss": 0.5996, "step": 26489 }, { "epoch": 0.3167182773586485, "grad_norm": 3.3091139793395996, "learning_rate": 7.994982593368712e-06, "loss": 0.6756, "step": 26490 }, { "epoch": 0.31673023350350915, "grad_norm": 2.2905142307281494, "learning_rate": 7.994827551899701e-06, "loss": 0.5336, "step": 26491 }, { "epoch": 0.31674218964836975, "grad_norm": 2.163848876953125, "learning_rate": 7.994672505939935e-06, "loss": 0.5979, "step": 26492 }, { "epoch": 0.3167541457932304, "grad_norm": 1.7057989835739136, "learning_rate": 7.994517455489647e-06, "loss": 0.6406, "step": 26493 }, { "epoch": 0.31676610193809107, "grad_norm": 3.303504467010498, "learning_rate": 7.994362400549069e-06, "loss": 0.6938, "step": 26494 }, { "epoch": 0.31677805808295173, "grad_norm": 2.4384238719940186, "learning_rate": 7.994207341118435e-06, "loss": 0.5615, "step": 26495 }, { "epoch": 0.3167900142278124, "grad_norm": 5.08245325088501, "learning_rate": 7.994052277197975e-06, "loss": 0.6114, "step": 26496 }, { "epoch": 0.31680197037267305, "grad_norm": 2.040513515472412, "learning_rate": 7.993897208787924e-06, "loss": 0.5282, "step": 26497 }, { "epoch": 0.3168139265175337, "grad_norm": 2.344419240951538, "learning_rate": 7.993742135888514e-06, "loss": 0.6609, "step": 26498 }, { "epoch": 0.31682588266239436, "grad_norm": 2.7297422885894775, "learning_rate": 7.993587058499975e-06, "loss": 0.5448, "step": 26499 }, { "epoch": 0.31683783880725497, "grad_norm": 2.0263190269470215, "learning_rate": 7.993431976622542e-06, "loss": 0.5799, "step": 26500 }, { "epoch": 0.3168497949521156, "grad_norm": 1.2256567478179932, "learning_rate": 7.99327689025645e-06, "loss": 0.5699, "step": 26501 }, { "epoch": 0.3168617510969763, "grad_norm": 1.5278112888336182, "learning_rate": 7.993121799401925e-06, "loss": 0.5687, "step": 26502 }, { "epoch": 0.31687370724183694, "grad_norm": 2.03389310836792, "learning_rate": 7.992966704059207e-06, "loss": 0.5425, "step": 26503 }, { "epoch": 0.3168856633866976, "grad_norm": 2.023362159729004, "learning_rate": 7.992811604228521e-06, "loss": 0.5757, "step": 26504 }, { "epoch": 0.31689761953155826, "grad_norm": 1.9816205501556396, "learning_rate": 7.992656499910106e-06, "loss": 0.6726, "step": 26505 }, { "epoch": 0.3169095756764189, "grad_norm": 2.3854336738586426, "learning_rate": 7.99250139110419e-06, "loss": 0.6219, "step": 26506 }, { "epoch": 0.3169215318212795, "grad_norm": 3.105792760848999, "learning_rate": 7.99234627781101e-06, "loss": 0.5672, "step": 26507 }, { "epoch": 0.3169334879661402, "grad_norm": 2.313297748565674, "learning_rate": 7.992191160030793e-06, "loss": 0.6153, "step": 26508 }, { "epoch": 0.31694544411100084, "grad_norm": 1.9301977157592773, "learning_rate": 7.992036037763776e-06, "loss": 0.4755, "step": 26509 }, { "epoch": 0.3169574002558615, "grad_norm": 2.467353343963623, "learning_rate": 7.99188091101019e-06, "loss": 0.562, "step": 26510 }, { "epoch": 0.31696935640072216, "grad_norm": 1.5014878511428833, "learning_rate": 7.99172577977027e-06, "loss": 0.5651, "step": 26511 }, { "epoch": 0.3169813125455828, "grad_norm": 1.9875870943069458, "learning_rate": 7.991570644044244e-06, "loss": 0.6188, "step": 26512 }, { "epoch": 0.3169932686904435, "grad_norm": 1.9797989130020142, "learning_rate": 7.99141550383235e-06, "loss": 0.5696, "step": 26513 }, { "epoch": 0.3170052248353041, "grad_norm": 1.9675053358078003, "learning_rate": 7.991260359134815e-06, "loss": 0.502, "step": 26514 }, { "epoch": 0.31701718098016474, "grad_norm": 1.703059434890747, "learning_rate": 7.991105209951875e-06, "loss": 0.6424, "step": 26515 }, { "epoch": 0.3170291371250254, "grad_norm": 3.575411558151245, "learning_rate": 7.990950056283764e-06, "loss": 0.5699, "step": 26516 }, { "epoch": 0.31704109326988605, "grad_norm": 2.802899122238159, "learning_rate": 7.990794898130711e-06, "loss": 0.6376, "step": 26517 }, { "epoch": 0.3170530494147467, "grad_norm": 1.9860782623291016, "learning_rate": 7.990639735492951e-06, "loss": 0.6639, "step": 26518 }, { "epoch": 0.31706500555960737, "grad_norm": 1.7660783529281616, "learning_rate": 7.990484568370716e-06, "loss": 0.632, "step": 26519 }, { "epoch": 0.31707696170446803, "grad_norm": 2.2940518856048584, "learning_rate": 7.990329396764239e-06, "loss": 0.6167, "step": 26520 }, { "epoch": 0.3170889178493287, "grad_norm": 4.245205879211426, "learning_rate": 7.990174220673752e-06, "loss": 0.6838, "step": 26521 }, { "epoch": 0.3171008739941893, "grad_norm": 3.152221441268921, "learning_rate": 7.990019040099488e-06, "loss": 0.5802, "step": 26522 }, { "epoch": 0.31711283013904995, "grad_norm": 1.6118881702423096, "learning_rate": 7.98986385504168e-06, "loss": 0.5973, "step": 26523 }, { "epoch": 0.3171247862839106, "grad_norm": 2.008420467376709, "learning_rate": 7.98970866550056e-06, "loss": 0.5999, "step": 26524 }, { "epoch": 0.31713674242877127, "grad_norm": 4.496870040893555, "learning_rate": 7.989553471476362e-06, "loss": 0.6732, "step": 26525 }, { "epoch": 0.3171486985736319, "grad_norm": 2.6292238235473633, "learning_rate": 7.989398272969318e-06, "loss": 0.4762, "step": 26526 }, { "epoch": 0.3171606547184926, "grad_norm": 1.4666035175323486, "learning_rate": 7.98924306997966e-06, "loss": 0.5064, "step": 26527 }, { "epoch": 0.31717261086335324, "grad_norm": 2.473667621612549, "learning_rate": 7.989087862507621e-06, "loss": 0.5757, "step": 26528 }, { "epoch": 0.31718456700821385, "grad_norm": 1.6833128929138184, "learning_rate": 7.988932650553436e-06, "loss": 0.7184, "step": 26529 }, { "epoch": 0.3171965231530745, "grad_norm": 2.6561660766601562, "learning_rate": 7.988777434117334e-06, "loss": 0.607, "step": 26530 }, { "epoch": 0.31720847929793516, "grad_norm": 2.2630069255828857, "learning_rate": 7.98862221319955e-06, "loss": 0.5133, "step": 26531 }, { "epoch": 0.3172204354427958, "grad_norm": 2.4668643474578857, "learning_rate": 7.988466987800318e-06, "loss": 0.5641, "step": 26532 }, { "epoch": 0.3172323915876565, "grad_norm": 1.921903133392334, "learning_rate": 7.988311757919868e-06, "loss": 0.6957, "step": 26533 }, { "epoch": 0.31724434773251714, "grad_norm": 3.156062126159668, "learning_rate": 7.988156523558434e-06, "loss": 0.6139, "step": 26534 }, { "epoch": 0.3172563038773778, "grad_norm": 1.7676976919174194, "learning_rate": 7.988001284716247e-06, "loss": 0.5718, "step": 26535 }, { "epoch": 0.3172682600222384, "grad_norm": 4.687809467315674, "learning_rate": 7.987846041393544e-06, "loss": 0.5803, "step": 26536 }, { "epoch": 0.31728021616709906, "grad_norm": 4.468903064727783, "learning_rate": 7.987690793590554e-06, "loss": 0.6786, "step": 26537 }, { "epoch": 0.3172921723119597, "grad_norm": 2.3114190101623535, "learning_rate": 7.987535541307511e-06, "loss": 0.5316, "step": 26538 }, { "epoch": 0.3173041284568204, "grad_norm": 6.1016764640808105, "learning_rate": 7.98738028454465e-06, "loss": 0.6055, "step": 26539 }, { "epoch": 0.31731608460168104, "grad_norm": 2.661118268966675, "learning_rate": 7.987225023302199e-06, "loss": 0.6165, "step": 26540 }, { "epoch": 0.3173280407465417, "grad_norm": 4.555022716522217, "learning_rate": 7.987069757580392e-06, "loss": 0.6237, "step": 26541 }, { "epoch": 0.31733999689140235, "grad_norm": 2.365968704223633, "learning_rate": 7.986914487379468e-06, "loss": 0.6461, "step": 26542 }, { "epoch": 0.317351953036263, "grad_norm": 1.9250097274780273, "learning_rate": 7.986759212699652e-06, "loss": 0.6055, "step": 26543 }, { "epoch": 0.3173639091811236, "grad_norm": 1.804139256477356, "learning_rate": 7.98660393354118e-06, "loss": 0.5572, "step": 26544 }, { "epoch": 0.3173758653259843, "grad_norm": 1.939989447593689, "learning_rate": 7.986448649904286e-06, "loss": 0.605, "step": 26545 }, { "epoch": 0.31738782147084493, "grad_norm": 1.4552233219146729, "learning_rate": 7.986293361789201e-06, "loss": 0.6024, "step": 26546 }, { "epoch": 0.3173997776157056, "grad_norm": 1.5024282932281494, "learning_rate": 7.986138069196158e-06, "loss": 0.5511, "step": 26547 }, { "epoch": 0.31741173376056625, "grad_norm": 2.8980212211608887, "learning_rate": 7.98598277212539e-06, "loss": 0.5416, "step": 26548 }, { "epoch": 0.3174236899054269, "grad_norm": 7.59077787399292, "learning_rate": 7.985827470577132e-06, "loss": 0.5497, "step": 26549 }, { "epoch": 0.31743564605028757, "grad_norm": 2.624596118927002, "learning_rate": 7.985672164551614e-06, "loss": 0.6151, "step": 26550 }, { "epoch": 0.31744760219514817, "grad_norm": 2.3644487857818604, "learning_rate": 7.985516854049069e-06, "loss": 0.523, "step": 26551 }, { "epoch": 0.31745955834000883, "grad_norm": 4.259082317352295, "learning_rate": 7.985361539069733e-06, "loss": 0.5909, "step": 26552 }, { "epoch": 0.3174715144848695, "grad_norm": 2.1649465560913086, "learning_rate": 7.985206219613835e-06, "loss": 0.5528, "step": 26553 }, { "epoch": 0.31748347062973015, "grad_norm": 2.5450146198272705, "learning_rate": 7.98505089568161e-06, "loss": 0.793, "step": 26554 }, { "epoch": 0.3174954267745908, "grad_norm": 1.8968693017959595, "learning_rate": 7.984895567273291e-06, "loss": 0.5259, "step": 26555 }, { "epoch": 0.31750738291945146, "grad_norm": 2.5107619762420654, "learning_rate": 7.98474023438911e-06, "loss": 0.5447, "step": 26556 }, { "epoch": 0.3175193390643121, "grad_norm": 2.7671749591827393, "learning_rate": 7.984584897029302e-06, "loss": 0.6109, "step": 26557 }, { "epoch": 0.3175312952091728, "grad_norm": 3.4559507369995117, "learning_rate": 7.984429555194097e-06, "loss": 0.5831, "step": 26558 }, { "epoch": 0.3175432513540334, "grad_norm": 2.461374282836914, "learning_rate": 7.984274208883729e-06, "loss": 0.5913, "step": 26559 }, { "epoch": 0.31755520749889404, "grad_norm": 2.0325284004211426, "learning_rate": 7.984118858098431e-06, "loss": 0.542, "step": 26560 }, { "epoch": 0.3175671636437547, "grad_norm": 2.9544918537139893, "learning_rate": 7.983963502838438e-06, "loss": 0.7071, "step": 26561 }, { "epoch": 0.31757911978861536, "grad_norm": 2.686990261077881, "learning_rate": 7.983808143103978e-06, "loss": 0.5937, "step": 26562 }, { "epoch": 0.317591075933476, "grad_norm": 2.100955009460449, "learning_rate": 7.98365277889529e-06, "loss": 0.678, "step": 26563 }, { "epoch": 0.3176030320783367, "grad_norm": 3.8456900119781494, "learning_rate": 7.983497410212604e-06, "loss": 0.5589, "step": 26564 }, { "epoch": 0.31761498822319734, "grad_norm": 2.243056297302246, "learning_rate": 7.983342037056152e-06, "loss": 0.5964, "step": 26565 }, { "epoch": 0.31762694436805794, "grad_norm": 1.9540023803710938, "learning_rate": 7.983186659426168e-06, "loss": 0.6063, "step": 26566 }, { "epoch": 0.3176389005129186, "grad_norm": 1.572657585144043, "learning_rate": 7.983031277322884e-06, "loss": 0.5993, "step": 26567 }, { "epoch": 0.31765085665777926, "grad_norm": 2.098217725753784, "learning_rate": 7.982875890746535e-06, "loss": 0.5821, "step": 26568 }, { "epoch": 0.3176628128026399, "grad_norm": 2.2202632427215576, "learning_rate": 7.982720499697354e-06, "loss": 0.6858, "step": 26569 }, { "epoch": 0.3176747689475006, "grad_norm": 2.2906131744384766, "learning_rate": 7.982565104175572e-06, "loss": 0.6849, "step": 26570 }, { "epoch": 0.31768672509236123, "grad_norm": 1.5738648176193237, "learning_rate": 7.982409704181425e-06, "loss": 0.4984, "step": 26571 }, { "epoch": 0.3176986812372219, "grad_norm": 2.5492637157440186, "learning_rate": 7.98225429971514e-06, "loss": 0.5575, "step": 26572 }, { "epoch": 0.3177106373820825, "grad_norm": 3.003378391265869, "learning_rate": 7.982098890776956e-06, "loss": 0.53, "step": 26573 }, { "epoch": 0.31772259352694315, "grad_norm": 12.58610725402832, "learning_rate": 7.981943477367105e-06, "loss": 0.6056, "step": 26574 }, { "epoch": 0.3177345496718038, "grad_norm": 2.0758845806121826, "learning_rate": 7.981788059485819e-06, "loss": 0.7694, "step": 26575 }, { "epoch": 0.31774650581666447, "grad_norm": 2.0162224769592285, "learning_rate": 7.98163263713333e-06, "loss": 0.5872, "step": 26576 }, { "epoch": 0.31775846196152513, "grad_norm": 2.84512996673584, "learning_rate": 7.981477210309873e-06, "loss": 0.6134, "step": 26577 }, { "epoch": 0.3177704181063858, "grad_norm": 3.1389901638031006, "learning_rate": 7.98132177901568e-06, "loss": 0.601, "step": 26578 }, { "epoch": 0.31778237425124645, "grad_norm": 2.138479709625244, "learning_rate": 7.981166343250986e-06, "loss": 0.6591, "step": 26579 }, { "epoch": 0.3177943303961071, "grad_norm": 1.5352039337158203, "learning_rate": 7.98101090301602e-06, "loss": 0.5562, "step": 26580 }, { "epoch": 0.3178062865409677, "grad_norm": 2.263678550720215, "learning_rate": 7.980855458311019e-06, "loss": 0.5732, "step": 26581 }, { "epoch": 0.31781824268582837, "grad_norm": 4.709409236907959, "learning_rate": 7.980700009136214e-06, "loss": 0.5529, "step": 26582 }, { "epoch": 0.317830198830689, "grad_norm": 1.9806863069534302, "learning_rate": 7.98054455549184e-06, "loss": 0.5821, "step": 26583 }, { "epoch": 0.3178421549755497, "grad_norm": 4.828098297119141, "learning_rate": 7.980389097378127e-06, "loss": 0.6865, "step": 26584 }, { "epoch": 0.31785411112041034, "grad_norm": 1.5706124305725098, "learning_rate": 7.98023363479531e-06, "loss": 0.633, "step": 26585 }, { "epoch": 0.317866067265271, "grad_norm": 5.574050426483154, "learning_rate": 7.980078167743623e-06, "loss": 0.6778, "step": 26586 }, { "epoch": 0.31787802341013166, "grad_norm": 1.7355045080184937, "learning_rate": 7.9799226962233e-06, "loss": 0.5645, "step": 26587 }, { "epoch": 0.31788997955499226, "grad_norm": 1.6520169973373413, "learning_rate": 7.97976722023457e-06, "loss": 0.5113, "step": 26588 }, { "epoch": 0.3179019356998529, "grad_norm": 3.193359613418579, "learning_rate": 7.979611739777669e-06, "loss": 0.5879, "step": 26589 }, { "epoch": 0.3179138918447136, "grad_norm": 1.9945412874221802, "learning_rate": 7.979456254852829e-06, "loss": 0.586, "step": 26590 }, { "epoch": 0.31792584798957424, "grad_norm": 2.565885543823242, "learning_rate": 7.979300765460285e-06, "loss": 0.6668, "step": 26591 }, { "epoch": 0.3179378041344349, "grad_norm": 2.490748405456543, "learning_rate": 7.979145271600269e-06, "loss": 0.6037, "step": 26592 }, { "epoch": 0.31794976027929556, "grad_norm": 2.43264102935791, "learning_rate": 7.978989773273013e-06, "loss": 0.6216, "step": 26593 }, { "epoch": 0.3179617164241562, "grad_norm": 4.186165809631348, "learning_rate": 7.97883427047875e-06, "loss": 0.5922, "step": 26594 }, { "epoch": 0.3179736725690168, "grad_norm": 2.830218553543091, "learning_rate": 7.978678763217716e-06, "loss": 0.5947, "step": 26595 }, { "epoch": 0.3179856287138775, "grad_norm": 1.6310019493103027, "learning_rate": 7.978523251490143e-06, "loss": 0.5888, "step": 26596 }, { "epoch": 0.31799758485873814, "grad_norm": 1.9984227418899536, "learning_rate": 7.978367735296263e-06, "loss": 0.5866, "step": 26597 }, { "epoch": 0.3180095410035988, "grad_norm": 2.031698703765869, "learning_rate": 7.978212214636311e-06, "loss": 0.561, "step": 26598 }, { "epoch": 0.31802149714845945, "grad_norm": 1.58157479763031, "learning_rate": 7.978056689510518e-06, "loss": 0.5466, "step": 26599 }, { "epoch": 0.3180334532933201, "grad_norm": 2.24576735496521, "learning_rate": 7.977901159919119e-06, "loss": 0.67, "step": 26600 }, { "epoch": 0.31804540943818077, "grad_norm": 3.0470075607299805, "learning_rate": 7.977745625862348e-06, "loss": 0.6092, "step": 26601 }, { "epoch": 0.31805736558304143, "grad_norm": 2.6407833099365234, "learning_rate": 7.977590087340436e-06, "loss": 0.5622, "step": 26602 }, { "epoch": 0.31806932172790203, "grad_norm": 2.1308960914611816, "learning_rate": 7.977434544353615e-06, "loss": 0.6728, "step": 26603 }, { "epoch": 0.3180812778727627, "grad_norm": 4.703514099121094, "learning_rate": 7.977278996902123e-06, "loss": 0.5561, "step": 26604 }, { "epoch": 0.31809323401762335, "grad_norm": 3.032219886779785, "learning_rate": 7.977123444986187e-06, "loss": 0.6721, "step": 26605 }, { "epoch": 0.318105190162484, "grad_norm": 1.9999489784240723, "learning_rate": 7.976967888606047e-06, "loss": 0.5583, "step": 26606 }, { "epoch": 0.31811714630734467, "grad_norm": 1.9292727708816528, "learning_rate": 7.976812327761934e-06, "loss": 0.5544, "step": 26607 }, { "epoch": 0.3181291024522053, "grad_norm": 2.2497670650482178, "learning_rate": 7.976656762454077e-06, "loss": 0.6673, "step": 26608 }, { "epoch": 0.318141058597066, "grad_norm": 1.6439144611358643, "learning_rate": 7.976501192682714e-06, "loss": 0.6265, "step": 26609 }, { "epoch": 0.3181530147419266, "grad_norm": 5.065791130065918, "learning_rate": 7.976345618448077e-06, "loss": 0.7314, "step": 26610 }, { "epoch": 0.31816497088678725, "grad_norm": 1.7517391443252563, "learning_rate": 7.976190039750402e-06, "loss": 0.6871, "step": 26611 }, { "epoch": 0.3181769270316479, "grad_norm": 1.8666390180587769, "learning_rate": 7.976034456589916e-06, "loss": 0.6564, "step": 26612 }, { "epoch": 0.31818888317650856, "grad_norm": 1.9922754764556885, "learning_rate": 7.975878868966856e-06, "loss": 0.6278, "step": 26613 }, { "epoch": 0.3182008393213692, "grad_norm": 3.2860052585601807, "learning_rate": 7.975723276881455e-06, "loss": 0.6197, "step": 26614 }, { "epoch": 0.3182127954662299, "grad_norm": 4.52720832824707, "learning_rate": 7.975567680333946e-06, "loss": 0.4963, "step": 26615 }, { "epoch": 0.31822475161109054, "grad_norm": 2.541262626647949, "learning_rate": 7.975412079324565e-06, "loss": 0.5628, "step": 26616 }, { "epoch": 0.3182367077559512, "grad_norm": 2.408973455429077, "learning_rate": 7.97525647385354e-06, "loss": 0.5132, "step": 26617 }, { "epoch": 0.3182486639008118, "grad_norm": 4.70901346206665, "learning_rate": 7.97510086392111e-06, "loss": 0.627, "step": 26618 }, { "epoch": 0.31826062004567246, "grad_norm": 1.716377854347229, "learning_rate": 7.974945249527506e-06, "loss": 0.6134, "step": 26619 }, { "epoch": 0.3182725761905331, "grad_norm": 2.497295618057251, "learning_rate": 7.974789630672959e-06, "loss": 0.7229, "step": 26620 }, { "epoch": 0.3182845323353938, "grad_norm": 1.575697898864746, "learning_rate": 7.974634007357704e-06, "loss": 0.6274, "step": 26621 }, { "epoch": 0.31829648848025444, "grad_norm": 2.665140390396118, "learning_rate": 7.974478379581975e-06, "loss": 0.6267, "step": 26622 }, { "epoch": 0.3183084446251151, "grad_norm": 1.5324711799621582, "learning_rate": 7.974322747346007e-06, "loss": 0.6212, "step": 26623 }, { "epoch": 0.31832040076997575, "grad_norm": 1.4605860710144043, "learning_rate": 7.97416711065003e-06, "loss": 0.629, "step": 26624 }, { "epoch": 0.31833235691483636, "grad_norm": 1.9730995893478394, "learning_rate": 7.974011469494281e-06, "loss": 0.5925, "step": 26625 }, { "epoch": 0.318344313059697, "grad_norm": 2.042520761489868, "learning_rate": 7.973855823878988e-06, "loss": 0.6295, "step": 26626 }, { "epoch": 0.3183562692045577, "grad_norm": 2.354010820388794, "learning_rate": 7.97370017380439e-06, "loss": 0.6147, "step": 26627 }, { "epoch": 0.31836822534941833, "grad_norm": 1.8383673429489136, "learning_rate": 7.973544519270715e-06, "loss": 0.6917, "step": 26628 }, { "epoch": 0.318380181494279, "grad_norm": 1.8414541482925415, "learning_rate": 7.973388860278204e-06, "loss": 0.6114, "step": 26629 }, { "epoch": 0.31839213763913965, "grad_norm": 2.5504047870635986, "learning_rate": 7.973233196827081e-06, "loss": 0.5985, "step": 26630 }, { "epoch": 0.3184040937840003, "grad_norm": 2.8680379390716553, "learning_rate": 7.973077528917588e-06, "loss": 0.5892, "step": 26631 }, { "epoch": 0.3184160499288609, "grad_norm": 1.8688942193984985, "learning_rate": 7.972921856549954e-06, "loss": 0.6275, "step": 26632 }, { "epoch": 0.31842800607372157, "grad_norm": 2.1427998542785645, "learning_rate": 7.972766179724411e-06, "loss": 0.4907, "step": 26633 }, { "epoch": 0.31843996221858223, "grad_norm": 2.333617687225342, "learning_rate": 7.972610498441196e-06, "loss": 0.5389, "step": 26634 }, { "epoch": 0.3184519183634429, "grad_norm": 2.629274368286133, "learning_rate": 7.972454812700541e-06, "loss": 0.5121, "step": 26635 }, { "epoch": 0.31846387450830355, "grad_norm": 1.765987753868103, "learning_rate": 7.97229912250268e-06, "loss": 0.5315, "step": 26636 }, { "epoch": 0.3184758306531642, "grad_norm": 2.386817455291748, "learning_rate": 7.972143427847845e-06, "loss": 0.606, "step": 26637 }, { "epoch": 0.31848778679802486, "grad_norm": 2.9324417114257812, "learning_rate": 7.97198772873627e-06, "loss": 0.6451, "step": 26638 }, { "epoch": 0.3184997429428855, "grad_norm": 1.4846502542495728, "learning_rate": 7.97183202516819e-06, "loss": 0.4781, "step": 26639 }, { "epoch": 0.3185116990877461, "grad_norm": 1.6990933418273926, "learning_rate": 7.971676317143838e-06, "loss": 0.6137, "step": 26640 }, { "epoch": 0.3185236552326068, "grad_norm": 2.167839527130127, "learning_rate": 7.971520604663444e-06, "loss": 0.5712, "step": 26641 }, { "epoch": 0.31853561137746744, "grad_norm": 2.59773588180542, "learning_rate": 7.971364887727245e-06, "loss": 0.6614, "step": 26642 }, { "epoch": 0.3185475675223281, "grad_norm": 6.989361763000488, "learning_rate": 7.971209166335475e-06, "loss": 0.5332, "step": 26643 }, { "epoch": 0.31855952366718876, "grad_norm": 2.4213082790374756, "learning_rate": 7.971053440488366e-06, "loss": 0.5472, "step": 26644 }, { "epoch": 0.3185714798120494, "grad_norm": 1.7343590259552002, "learning_rate": 7.970897710186152e-06, "loss": 0.6331, "step": 26645 }, { "epoch": 0.3185834359569101, "grad_norm": 6.335399150848389, "learning_rate": 7.970741975429066e-06, "loss": 0.5564, "step": 26646 }, { "epoch": 0.3185953921017707, "grad_norm": 1.936806559562683, "learning_rate": 7.970586236217341e-06, "loss": 0.628, "step": 26647 }, { "epoch": 0.31860734824663134, "grad_norm": 3.204859972000122, "learning_rate": 7.970430492551213e-06, "loss": 0.5867, "step": 26648 }, { "epoch": 0.318619304391492, "grad_norm": 1.8695199489593506, "learning_rate": 7.970274744430914e-06, "loss": 0.6702, "step": 26649 }, { "epoch": 0.31863126053635266, "grad_norm": 3.9260072708129883, "learning_rate": 7.970118991856677e-06, "loss": 0.5948, "step": 26650 }, { "epoch": 0.3186432166812133, "grad_norm": 1.936852216720581, "learning_rate": 7.969963234828735e-06, "loss": 0.6165, "step": 26651 }, { "epoch": 0.318655172826074, "grad_norm": 3.684863567352295, "learning_rate": 7.969807473347324e-06, "loss": 0.6288, "step": 26652 }, { "epoch": 0.31866712897093463, "grad_norm": 1.4729760885238647, "learning_rate": 7.969651707412675e-06, "loss": 0.5204, "step": 26653 }, { "epoch": 0.31867908511579524, "grad_norm": 5.445056915283203, "learning_rate": 7.969495937025023e-06, "loss": 0.537, "step": 26654 }, { "epoch": 0.3186910412606559, "grad_norm": 2.210984945297241, "learning_rate": 7.969340162184601e-06, "loss": 0.582, "step": 26655 }, { "epoch": 0.31870299740551655, "grad_norm": 2.9261295795440674, "learning_rate": 7.969184382891643e-06, "loss": 0.519, "step": 26656 }, { "epoch": 0.3187149535503772, "grad_norm": 2.039374589920044, "learning_rate": 7.969028599146383e-06, "loss": 0.5964, "step": 26657 }, { "epoch": 0.31872690969523787, "grad_norm": 3.130183458328247, "learning_rate": 7.968872810949053e-06, "loss": 0.662, "step": 26658 }, { "epoch": 0.31873886584009853, "grad_norm": 1.8708295822143555, "learning_rate": 7.96871701829989e-06, "loss": 0.6058, "step": 26659 }, { "epoch": 0.3187508219849592, "grad_norm": 1.5556236505508423, "learning_rate": 7.968561221199122e-06, "loss": 0.5864, "step": 26660 }, { "epoch": 0.31876277812981985, "grad_norm": 2.573230504989624, "learning_rate": 7.96840541964699e-06, "loss": 0.5761, "step": 26661 }, { "epoch": 0.31877473427468045, "grad_norm": 1.5347352027893066, "learning_rate": 7.96824961364372e-06, "loss": 0.6971, "step": 26662 }, { "epoch": 0.3187866904195411, "grad_norm": 11.596508979797363, "learning_rate": 7.96809380318955e-06, "loss": 0.4996, "step": 26663 }, { "epoch": 0.31879864656440177, "grad_norm": 2.080552101135254, "learning_rate": 7.967937988284714e-06, "loss": 0.5584, "step": 26664 }, { "epoch": 0.3188106027092624, "grad_norm": 1.4133446216583252, "learning_rate": 7.967782168929442e-06, "loss": 0.5417, "step": 26665 }, { "epoch": 0.3188225588541231, "grad_norm": 2.048112154006958, "learning_rate": 7.967626345123973e-06, "loss": 0.5904, "step": 26666 }, { "epoch": 0.31883451499898374, "grad_norm": 2.029909133911133, "learning_rate": 7.967470516868536e-06, "loss": 0.7158, "step": 26667 }, { "epoch": 0.3188464711438444, "grad_norm": 10.966562271118164, "learning_rate": 7.967314684163366e-06, "loss": 0.5715, "step": 26668 }, { "epoch": 0.318858427288705, "grad_norm": 6.480106353759766, "learning_rate": 7.967158847008697e-06, "loss": 0.6443, "step": 26669 }, { "epoch": 0.31887038343356566, "grad_norm": 2.515289306640625, "learning_rate": 7.967003005404765e-06, "loss": 0.642, "step": 26670 }, { "epoch": 0.3188823395784263, "grad_norm": 2.9475557804107666, "learning_rate": 7.9668471593518e-06, "loss": 0.6055, "step": 26671 }, { "epoch": 0.318894295723287, "grad_norm": 6.397141933441162, "learning_rate": 7.966691308850037e-06, "loss": 0.6377, "step": 26672 }, { "epoch": 0.31890625186814764, "grad_norm": 2.305818557739258, "learning_rate": 7.966535453899711e-06, "loss": 0.5756, "step": 26673 }, { "epoch": 0.3189182080130083, "grad_norm": 2.750145673751831, "learning_rate": 7.966379594501053e-06, "loss": 0.5599, "step": 26674 }, { "epoch": 0.31893016415786896, "grad_norm": 2.036790609359741, "learning_rate": 7.9662237306543e-06, "loss": 0.5433, "step": 26675 }, { "epoch": 0.3189421203027296, "grad_norm": 1.7488895654678345, "learning_rate": 7.966067862359681e-06, "loss": 0.7091, "step": 26676 }, { "epoch": 0.3189540764475902, "grad_norm": 2.02854585647583, "learning_rate": 7.965911989617435e-06, "loss": 0.4938, "step": 26677 }, { "epoch": 0.3189660325924509, "grad_norm": 5.047337055206299, "learning_rate": 7.965756112427793e-06, "loss": 0.6228, "step": 26678 }, { "epoch": 0.31897798873731154, "grad_norm": 2.1670217514038086, "learning_rate": 7.965600230790988e-06, "loss": 0.5708, "step": 26679 }, { "epoch": 0.3189899448821722, "grad_norm": 1.8933894634246826, "learning_rate": 7.965444344707256e-06, "loss": 0.5905, "step": 26680 }, { "epoch": 0.31900190102703285, "grad_norm": 3.8108222484588623, "learning_rate": 7.965288454176832e-06, "loss": 0.6064, "step": 26681 }, { "epoch": 0.3190138571718935, "grad_norm": 5.158873558044434, "learning_rate": 7.965132559199944e-06, "loss": 0.4686, "step": 26682 }, { "epoch": 0.31902581331675417, "grad_norm": 3.044048309326172, "learning_rate": 7.964976659776831e-06, "loss": 0.5424, "step": 26683 }, { "epoch": 0.3190377694616148, "grad_norm": 2.422729730606079, "learning_rate": 7.964820755907723e-06, "loss": 0.5886, "step": 26684 }, { "epoch": 0.31904972560647543, "grad_norm": 2.4325830936431885, "learning_rate": 7.964664847592858e-06, "loss": 0.5948, "step": 26685 }, { "epoch": 0.3190616817513361, "grad_norm": 2.1344144344329834, "learning_rate": 7.964508934832466e-06, "loss": 0.4854, "step": 26686 }, { "epoch": 0.31907363789619675, "grad_norm": 3.4832592010498047, "learning_rate": 7.964353017626782e-06, "loss": 0.6234, "step": 26687 }, { "epoch": 0.3190855940410574, "grad_norm": 2.4153025150299072, "learning_rate": 7.964197095976042e-06, "loss": 0.5623, "step": 26688 }, { "epoch": 0.31909755018591807, "grad_norm": 3.4045090675354004, "learning_rate": 7.964041169880476e-06, "loss": 0.7099, "step": 26689 }, { "epoch": 0.3191095063307787, "grad_norm": 3.2664575576782227, "learning_rate": 7.96388523934032e-06, "loss": 0.4825, "step": 26690 }, { "epoch": 0.31912146247563933, "grad_norm": 2.1278347969055176, "learning_rate": 7.96372930435581e-06, "loss": 0.4765, "step": 26691 }, { "epoch": 0.3191334186205, "grad_norm": 2.003941059112549, "learning_rate": 7.963573364927175e-06, "loss": 0.6091, "step": 26692 }, { "epoch": 0.31914537476536065, "grad_norm": 51.4421272277832, "learning_rate": 7.963417421054653e-06, "loss": 0.6094, "step": 26693 }, { "epoch": 0.3191573309102213, "grad_norm": 1.9876725673675537, "learning_rate": 7.963261472738473e-06, "loss": 0.6036, "step": 26694 }, { "epoch": 0.31916928705508196, "grad_norm": 1.6956864595413208, "learning_rate": 7.963105519978873e-06, "loss": 0.5768, "step": 26695 }, { "epoch": 0.3191812431999426, "grad_norm": 2.4375972747802734, "learning_rate": 7.962949562776088e-06, "loss": 0.6333, "step": 26696 }, { "epoch": 0.3191931993448033, "grad_norm": 9.443520545959473, "learning_rate": 7.962793601130347e-06, "loss": 0.5831, "step": 26697 }, { "epoch": 0.31920515548966394, "grad_norm": 2.6350409984588623, "learning_rate": 7.962637635041888e-06, "loss": 0.6067, "step": 26698 }, { "epoch": 0.31921711163452454, "grad_norm": 3.058253765106201, "learning_rate": 7.962481664510942e-06, "loss": 0.6067, "step": 26699 }, { "epoch": 0.3192290677793852, "grad_norm": 8.07880973815918, "learning_rate": 7.962325689537745e-06, "loss": 0.5664, "step": 26700 }, { "epoch": 0.31924102392424586, "grad_norm": 1.9115689992904663, "learning_rate": 7.962169710122531e-06, "loss": 0.5683, "step": 26701 }, { "epoch": 0.3192529800691065, "grad_norm": 1.9425112009048462, "learning_rate": 7.962013726265532e-06, "loss": 0.6098, "step": 26702 }, { "epoch": 0.3192649362139672, "grad_norm": 2.311591863632202, "learning_rate": 7.961857737966982e-06, "loss": 0.5668, "step": 26703 }, { "epoch": 0.31927689235882784, "grad_norm": 1.8517042398452759, "learning_rate": 7.961701745227117e-06, "loss": 0.6169, "step": 26704 }, { "epoch": 0.3192888485036885, "grad_norm": 2.1285886764526367, "learning_rate": 7.96154574804617e-06, "loss": 0.5611, "step": 26705 }, { "epoch": 0.3193008046485491, "grad_norm": 5.291508197784424, "learning_rate": 7.961389746424372e-06, "loss": 0.5263, "step": 26706 }, { "epoch": 0.31931276079340976, "grad_norm": 3.849580764770508, "learning_rate": 7.961233740361964e-06, "loss": 0.7193, "step": 26707 }, { "epoch": 0.3193247169382704, "grad_norm": 4.342982292175293, "learning_rate": 7.961077729859173e-06, "loss": 0.6201, "step": 26708 }, { "epoch": 0.3193366730831311, "grad_norm": 2.0128395557403564, "learning_rate": 7.960921714916236e-06, "loss": 0.6151, "step": 26709 }, { "epoch": 0.31934862922799173, "grad_norm": 1.8533916473388672, "learning_rate": 7.960765695533386e-06, "loss": 0.5846, "step": 26710 }, { "epoch": 0.3193605853728524, "grad_norm": 2.257176637649536, "learning_rate": 7.960609671710857e-06, "loss": 0.6952, "step": 26711 }, { "epoch": 0.31937254151771305, "grad_norm": 2.441797971725464, "learning_rate": 7.960453643448882e-06, "loss": 0.5326, "step": 26712 }, { "epoch": 0.31938449766257365, "grad_norm": 3.8618571758270264, "learning_rate": 7.960297610747698e-06, "loss": 0.6582, "step": 26713 }, { "epoch": 0.3193964538074343, "grad_norm": 4.807223320007324, "learning_rate": 7.960141573607538e-06, "loss": 0.6412, "step": 26714 }, { "epoch": 0.31940840995229497, "grad_norm": 3.341008424758911, "learning_rate": 7.959985532028634e-06, "loss": 0.6872, "step": 26715 }, { "epoch": 0.31942036609715563, "grad_norm": 1.7348397970199585, "learning_rate": 7.95982948601122e-06, "loss": 0.511, "step": 26716 }, { "epoch": 0.3194323222420163, "grad_norm": 2.132343292236328, "learning_rate": 7.959673435555533e-06, "loss": 0.6395, "step": 26717 }, { "epoch": 0.31944427838687695, "grad_norm": 3.114973306655884, "learning_rate": 7.959517380661806e-06, "loss": 0.5241, "step": 26718 }, { "epoch": 0.3194562345317376, "grad_norm": 1.9517407417297363, "learning_rate": 7.95936132133027e-06, "loss": 0.7032, "step": 26719 }, { "epoch": 0.31946819067659826, "grad_norm": 2.0243136882781982, "learning_rate": 7.959205257561163e-06, "loss": 0.6299, "step": 26720 }, { "epoch": 0.31948014682145887, "grad_norm": 3.7793102264404297, "learning_rate": 7.959049189354715e-06, "loss": 0.6341, "step": 26721 }, { "epoch": 0.3194921029663195, "grad_norm": 1.6970226764678955, "learning_rate": 7.958893116711164e-06, "loss": 0.5542, "step": 26722 }, { "epoch": 0.3195040591111802, "grad_norm": 2.6813478469848633, "learning_rate": 7.958737039630742e-06, "loss": 0.4922, "step": 26723 }, { "epoch": 0.31951601525604084, "grad_norm": 2.1430301666259766, "learning_rate": 7.958580958113682e-06, "loss": 0.5799, "step": 26724 }, { "epoch": 0.3195279714009015, "grad_norm": 2.0439751148223877, "learning_rate": 7.958424872160221e-06, "loss": 0.5313, "step": 26725 }, { "epoch": 0.31953992754576216, "grad_norm": 1.8401647806167603, "learning_rate": 7.95826878177059e-06, "loss": 0.5151, "step": 26726 }, { "epoch": 0.3195518836906228, "grad_norm": 2.917593240737915, "learning_rate": 7.958112686945025e-06, "loss": 0.5567, "step": 26727 }, { "epoch": 0.3195638398354834, "grad_norm": 1.5855555534362793, "learning_rate": 7.95795658768376e-06, "loss": 0.6484, "step": 26728 }, { "epoch": 0.3195757959803441, "grad_norm": 9.309370040893555, "learning_rate": 7.957800483987028e-06, "loss": 0.6652, "step": 26729 }, { "epoch": 0.31958775212520474, "grad_norm": 1.7208887338638306, "learning_rate": 7.957644375855065e-06, "loss": 0.5471, "step": 26730 }, { "epoch": 0.3195997082700654, "grad_norm": 4.135568141937256, "learning_rate": 7.957488263288103e-06, "loss": 0.5914, "step": 26731 }, { "epoch": 0.31961166441492606, "grad_norm": 2.4677915573120117, "learning_rate": 7.957332146286375e-06, "loss": 0.5757, "step": 26732 }, { "epoch": 0.3196236205597867, "grad_norm": 3.852325677871704, "learning_rate": 7.957176024850119e-06, "loss": 0.5979, "step": 26733 }, { "epoch": 0.3196355767046474, "grad_norm": 2.1703059673309326, "learning_rate": 7.957019898979566e-06, "loss": 0.6335, "step": 26734 }, { "epoch": 0.31964753284950803, "grad_norm": 2.010423183441162, "learning_rate": 7.956863768674951e-06, "loss": 0.5822, "step": 26735 }, { "epoch": 0.31965948899436863, "grad_norm": 3.705871820449829, "learning_rate": 7.956707633936509e-06, "loss": 0.6915, "step": 26736 }, { "epoch": 0.3196714451392293, "grad_norm": 3.522749423980713, "learning_rate": 7.956551494764474e-06, "loss": 0.648, "step": 26737 }, { "epoch": 0.31968340128408995, "grad_norm": 3.55129075050354, "learning_rate": 7.956395351159079e-06, "loss": 0.599, "step": 26738 }, { "epoch": 0.3196953574289506, "grad_norm": 3.6972408294677734, "learning_rate": 7.956239203120558e-06, "loss": 0.5349, "step": 26739 }, { "epoch": 0.31970731357381127, "grad_norm": 35.13603591918945, "learning_rate": 7.956083050649147e-06, "loss": 0.6711, "step": 26740 }, { "epoch": 0.31971926971867193, "grad_norm": 3.3098185062408447, "learning_rate": 7.955926893745077e-06, "loss": 0.6595, "step": 26741 }, { "epoch": 0.3197312258635326, "grad_norm": 1.8130764961242676, "learning_rate": 7.955770732408585e-06, "loss": 0.6119, "step": 26742 }, { "epoch": 0.3197431820083932, "grad_norm": 1.683698058128357, "learning_rate": 7.955614566639906e-06, "loss": 0.4973, "step": 26743 }, { "epoch": 0.31975513815325385, "grad_norm": 1.5049835443496704, "learning_rate": 7.95545839643927e-06, "loss": 0.5871, "step": 26744 }, { "epoch": 0.3197670942981145, "grad_norm": 3.485639810562134, "learning_rate": 7.955302221806916e-06, "loss": 0.7015, "step": 26745 }, { "epoch": 0.31977905044297517, "grad_norm": 1.896059274673462, "learning_rate": 7.955146042743073e-06, "loss": 0.6059, "step": 26746 }, { "epoch": 0.3197910065878358, "grad_norm": 6.316697120666504, "learning_rate": 7.95498985924798e-06, "loss": 0.6404, "step": 26747 }, { "epoch": 0.3198029627326965, "grad_norm": 4.160735607147217, "learning_rate": 7.954833671321869e-06, "loss": 0.52, "step": 26748 }, { "epoch": 0.31981491887755714, "grad_norm": 3.0026915073394775, "learning_rate": 7.954677478964974e-06, "loss": 0.6385, "step": 26749 }, { "epoch": 0.31982687502241774, "grad_norm": 1.77782142162323, "learning_rate": 7.95452128217753e-06, "loss": 0.5525, "step": 26750 }, { "epoch": 0.3198388311672784, "grad_norm": 2.690730094909668, "learning_rate": 7.954365080959769e-06, "loss": 0.564, "step": 26751 }, { "epoch": 0.31985078731213906, "grad_norm": 1.9735387563705444, "learning_rate": 7.95420887531193e-06, "loss": 0.6137, "step": 26752 }, { "epoch": 0.3198627434569997, "grad_norm": 2.533020257949829, "learning_rate": 7.954052665234241e-06, "loss": 0.5534, "step": 26753 }, { "epoch": 0.3198746996018604, "grad_norm": 1.7683792114257812, "learning_rate": 7.953896450726944e-06, "loss": 0.5644, "step": 26754 }, { "epoch": 0.31988665574672104, "grad_norm": 3.569507360458374, "learning_rate": 7.953740231790265e-06, "loss": 0.5773, "step": 26755 }, { "epoch": 0.3198986118915817, "grad_norm": 1.4771320819854736, "learning_rate": 7.953584008424443e-06, "loss": 0.5934, "step": 26756 }, { "epoch": 0.31991056803644236, "grad_norm": 1.7269912958145142, "learning_rate": 7.953427780629713e-06, "loss": 0.4602, "step": 26757 }, { "epoch": 0.31992252418130296, "grad_norm": 4.658987045288086, "learning_rate": 7.953271548406304e-06, "loss": 0.713, "step": 26758 }, { "epoch": 0.3199344803261636, "grad_norm": 3.2820310592651367, "learning_rate": 7.953115311754457e-06, "loss": 0.5904, "step": 26759 }, { "epoch": 0.3199464364710243, "grad_norm": 2.1237313747406006, "learning_rate": 7.952959070674402e-06, "loss": 0.672, "step": 26760 }, { "epoch": 0.31995839261588493, "grad_norm": 1.8453608751296997, "learning_rate": 7.952802825166375e-06, "loss": 0.6192, "step": 26761 }, { "epoch": 0.3199703487607456, "grad_norm": 1.7838724851608276, "learning_rate": 7.95264657523061e-06, "loss": 0.5413, "step": 26762 }, { "epoch": 0.31998230490560625, "grad_norm": 2.558119773864746, "learning_rate": 7.952490320867339e-06, "loss": 0.5563, "step": 26763 }, { "epoch": 0.3199942610504669, "grad_norm": 2.056638717651367, "learning_rate": 7.952334062076798e-06, "loss": 0.5546, "step": 26764 }, { "epoch": 0.3200062171953275, "grad_norm": 1.6876790523529053, "learning_rate": 7.952177798859223e-06, "loss": 0.5297, "step": 26765 }, { "epoch": 0.3200181733401882, "grad_norm": 2.0067577362060547, "learning_rate": 7.952021531214848e-06, "loss": 0.5194, "step": 26766 }, { "epoch": 0.32003012948504883, "grad_norm": 1.912222981452942, "learning_rate": 7.951865259143904e-06, "loss": 0.5985, "step": 26767 }, { "epoch": 0.3200420856299095, "grad_norm": 3.634641170501709, "learning_rate": 7.95170898264663e-06, "loss": 0.6703, "step": 26768 }, { "epoch": 0.32005404177477015, "grad_norm": 2.0704872608184814, "learning_rate": 7.951552701723255e-06, "loss": 0.7038, "step": 26769 }, { "epoch": 0.3200659979196308, "grad_norm": 3.341601610183716, "learning_rate": 7.951396416374018e-06, "loss": 0.7355, "step": 26770 }, { "epoch": 0.32007795406449147, "grad_norm": 4.265626430511475, "learning_rate": 7.95124012659915e-06, "loss": 0.702, "step": 26771 }, { "epoch": 0.3200899102093521, "grad_norm": 2.1228206157684326, "learning_rate": 7.951083832398889e-06, "loss": 0.6358, "step": 26772 }, { "epoch": 0.3201018663542127, "grad_norm": 1.677303433418274, "learning_rate": 7.950927533773468e-06, "loss": 0.5734, "step": 26773 }, { "epoch": 0.3201138224990734, "grad_norm": 1.4332704544067383, "learning_rate": 7.950771230723117e-06, "loss": 0.5372, "step": 26774 }, { "epoch": 0.32012577864393404, "grad_norm": 2.0995121002197266, "learning_rate": 7.950614923248077e-06, "loss": 0.5398, "step": 26775 }, { "epoch": 0.3201377347887947, "grad_norm": 3.22784423828125, "learning_rate": 7.950458611348579e-06, "loss": 0.677, "step": 26776 }, { "epoch": 0.32014969093365536, "grad_norm": 2.3462204933166504, "learning_rate": 7.950302295024856e-06, "loss": 0.529, "step": 26777 }, { "epoch": 0.320161647078516, "grad_norm": 4.117376327514648, "learning_rate": 7.950145974277145e-06, "loss": 0.6137, "step": 26778 }, { "epoch": 0.3201736032233767, "grad_norm": 1.930267333984375, "learning_rate": 7.949989649105681e-06, "loss": 0.5485, "step": 26779 }, { "epoch": 0.3201855593682373, "grad_norm": 1.6376349925994873, "learning_rate": 7.949833319510697e-06, "loss": 0.5793, "step": 26780 }, { "epoch": 0.32019751551309794, "grad_norm": 4.421901702880859, "learning_rate": 7.949676985492428e-06, "loss": 0.5998, "step": 26781 }, { "epoch": 0.3202094716579586, "grad_norm": 3.191730499267578, "learning_rate": 7.949520647051105e-06, "loss": 0.572, "step": 26782 }, { "epoch": 0.32022142780281926, "grad_norm": 3.935866355895996, "learning_rate": 7.949364304186967e-06, "loss": 0.6179, "step": 26783 }, { "epoch": 0.3202333839476799, "grad_norm": 1.5929512977600098, "learning_rate": 7.949207956900247e-06, "loss": 0.5347, "step": 26784 }, { "epoch": 0.3202453400925406, "grad_norm": 1.9448496103286743, "learning_rate": 7.949051605191177e-06, "loss": 0.6689, "step": 26785 }, { "epoch": 0.32025729623740123, "grad_norm": 4.536477088928223, "learning_rate": 7.948895249059998e-06, "loss": 0.6126, "step": 26786 }, { "epoch": 0.32026925238226184, "grad_norm": 1.4849460124969482, "learning_rate": 7.948738888506938e-06, "loss": 0.5365, "step": 26787 }, { "epoch": 0.3202812085271225, "grad_norm": 4.552818298339844, "learning_rate": 7.948582523532231e-06, "loss": 0.672, "step": 26788 }, { "epoch": 0.32029316467198315, "grad_norm": 2.1503143310546875, "learning_rate": 7.948426154136118e-06, "loss": 0.6155, "step": 26789 }, { "epoch": 0.3203051208168438, "grad_norm": 1.8106552362442017, "learning_rate": 7.948269780318826e-06, "loss": 0.5914, "step": 26790 }, { "epoch": 0.32031707696170447, "grad_norm": 3.2933261394500732, "learning_rate": 7.948113402080595e-06, "loss": 0.4892, "step": 26791 }, { "epoch": 0.32032903310656513, "grad_norm": 11.323558807373047, "learning_rate": 7.947957019421657e-06, "loss": 0.5531, "step": 26792 }, { "epoch": 0.3203409892514258, "grad_norm": 1.8755064010620117, "learning_rate": 7.947800632342247e-06, "loss": 0.5666, "step": 26793 }, { "epoch": 0.32035294539628645, "grad_norm": 2.5599019527435303, "learning_rate": 7.9476442408426e-06, "loss": 0.6216, "step": 26794 }, { "epoch": 0.32036490154114705, "grad_norm": 4.2822794914245605, "learning_rate": 7.94748784492295e-06, "loss": 0.5179, "step": 26795 }, { "epoch": 0.3203768576860077, "grad_norm": 1.7793232202529907, "learning_rate": 7.94733144458353e-06, "loss": 0.5999, "step": 26796 }, { "epoch": 0.32038881383086837, "grad_norm": 2.614511251449585, "learning_rate": 7.947175039824578e-06, "loss": 0.6137, "step": 26797 }, { "epoch": 0.320400769975729, "grad_norm": 3.553664445877075, "learning_rate": 7.947018630646325e-06, "loss": 0.6024, "step": 26798 }, { "epoch": 0.3204127261205897, "grad_norm": 2.9326908588409424, "learning_rate": 7.946862217049009e-06, "loss": 0.5967, "step": 26799 }, { "epoch": 0.32042468226545034, "grad_norm": 2.211329936981201, "learning_rate": 7.94670579903286e-06, "loss": 0.5738, "step": 26800 }, { "epoch": 0.320436638410311, "grad_norm": 2.3152124881744385, "learning_rate": 7.946549376598118e-06, "loss": 0.5511, "step": 26801 }, { "epoch": 0.3204485945551716, "grad_norm": 4.411102771759033, "learning_rate": 7.946392949745012e-06, "loss": 0.597, "step": 26802 }, { "epoch": 0.32046055070003227, "grad_norm": 2.1796352863311768, "learning_rate": 7.946236518473782e-06, "loss": 0.5707, "step": 26803 }, { "epoch": 0.3204725068448929, "grad_norm": 2.138162136077881, "learning_rate": 7.946080082784657e-06, "loss": 0.5732, "step": 26804 }, { "epoch": 0.3204844629897536, "grad_norm": 1.8652235269546509, "learning_rate": 7.945923642677878e-06, "loss": 0.5482, "step": 26805 }, { "epoch": 0.32049641913461424, "grad_norm": 3.8560354709625244, "learning_rate": 7.945767198153674e-06, "loss": 0.601, "step": 26806 }, { "epoch": 0.3205083752794749, "grad_norm": 1.6163944005966187, "learning_rate": 7.94561074921228e-06, "loss": 0.6387, "step": 26807 }, { "epoch": 0.32052033142433556, "grad_norm": 3.2729179859161377, "learning_rate": 7.945454295853934e-06, "loss": 0.6345, "step": 26808 }, { "epoch": 0.32053228756919616, "grad_norm": 4.109349727630615, "learning_rate": 7.945297838078869e-06, "loss": 0.6369, "step": 26809 }, { "epoch": 0.3205442437140568, "grad_norm": 2.1710150241851807, "learning_rate": 7.945141375887318e-06, "loss": 0.6718, "step": 26810 }, { "epoch": 0.3205561998589175, "grad_norm": 3.2005491256713867, "learning_rate": 7.944984909279519e-06, "loss": 0.6198, "step": 26811 }, { "epoch": 0.32056815600377814, "grad_norm": 2.2943220138549805, "learning_rate": 7.944828438255703e-06, "loss": 0.6337, "step": 26812 }, { "epoch": 0.3205801121486388, "grad_norm": 3.4723081588745117, "learning_rate": 7.944671962816108e-06, "loss": 0.5521, "step": 26813 }, { "epoch": 0.32059206829349945, "grad_norm": 2.2704620361328125, "learning_rate": 7.944515482960966e-06, "loss": 0.5814, "step": 26814 }, { "epoch": 0.3206040244383601, "grad_norm": 2.8754220008850098, "learning_rate": 7.944358998690512e-06, "loss": 0.5762, "step": 26815 }, { "epoch": 0.32061598058322077, "grad_norm": 4.5974202156066895, "learning_rate": 7.944202510004982e-06, "loss": 0.5741, "step": 26816 }, { "epoch": 0.3206279367280814, "grad_norm": 1.735607624053955, "learning_rate": 7.944046016904609e-06, "loss": 0.6863, "step": 26817 }, { "epoch": 0.32063989287294203, "grad_norm": 8.571578025817871, "learning_rate": 7.94388951938963e-06, "loss": 0.7064, "step": 26818 }, { "epoch": 0.3206518490178027, "grad_norm": 3.500072956085205, "learning_rate": 7.943733017460275e-06, "loss": 0.5366, "step": 26819 }, { "epoch": 0.32066380516266335, "grad_norm": 2.682506561279297, "learning_rate": 7.943576511116784e-06, "loss": 0.6348, "step": 26820 }, { "epoch": 0.320675761307524, "grad_norm": 2.1646358966827393, "learning_rate": 7.943420000359392e-06, "loss": 0.6264, "step": 26821 }, { "epoch": 0.32068771745238467, "grad_norm": 2.352651357650757, "learning_rate": 7.943263485188329e-06, "loss": 0.6718, "step": 26822 }, { "epoch": 0.3206996735972453, "grad_norm": 4.418330192565918, "learning_rate": 7.943106965603831e-06, "loss": 0.5669, "step": 26823 }, { "epoch": 0.32071162974210593, "grad_norm": 4.911539077758789, "learning_rate": 7.942950441606134e-06, "loss": 0.6597, "step": 26824 }, { "epoch": 0.3207235858869666, "grad_norm": 2.6801958084106445, "learning_rate": 7.942793913195472e-06, "loss": 0.5641, "step": 26825 }, { "epoch": 0.32073554203182725, "grad_norm": 2.287473678588867, "learning_rate": 7.942637380372082e-06, "loss": 0.5898, "step": 26826 }, { "epoch": 0.3207474981766879, "grad_norm": 2.0574634075164795, "learning_rate": 7.942480843136195e-06, "loss": 0.5655, "step": 26827 }, { "epoch": 0.32075945432154856, "grad_norm": 4.3792805671691895, "learning_rate": 7.94232430148805e-06, "loss": 0.6103, "step": 26828 }, { "epoch": 0.3207714104664092, "grad_norm": 2.507716417312622, "learning_rate": 7.942167755427877e-06, "loss": 0.6465, "step": 26829 }, { "epoch": 0.3207833666112699, "grad_norm": 2.056269407272339, "learning_rate": 7.942011204955913e-06, "loss": 0.6083, "step": 26830 }, { "epoch": 0.32079532275613054, "grad_norm": 14.032737731933594, "learning_rate": 7.941854650072393e-06, "loss": 0.5672, "step": 26831 }, { "epoch": 0.32080727890099114, "grad_norm": 2.6053097248077393, "learning_rate": 7.941698090777553e-06, "loss": 0.5773, "step": 26832 }, { "epoch": 0.3208192350458518, "grad_norm": 2.5013530254364014, "learning_rate": 7.941541527071624e-06, "loss": 0.6161, "step": 26833 }, { "epoch": 0.32083119119071246, "grad_norm": 2.276860237121582, "learning_rate": 7.941384958954844e-06, "loss": 0.6358, "step": 26834 }, { "epoch": 0.3208431473355731, "grad_norm": 2.462895154953003, "learning_rate": 7.941228386427448e-06, "loss": 0.5989, "step": 26835 }, { "epoch": 0.3208551034804338, "grad_norm": 3.245755672454834, "learning_rate": 7.941071809489667e-06, "loss": 0.7101, "step": 26836 }, { "epoch": 0.32086705962529444, "grad_norm": 2.2644290924072266, "learning_rate": 7.94091522814174e-06, "loss": 0.5668, "step": 26837 }, { "epoch": 0.3208790157701551, "grad_norm": 3.990959882736206, "learning_rate": 7.9407586423839e-06, "loss": 0.5814, "step": 26838 }, { "epoch": 0.3208909719150157, "grad_norm": 2.3247828483581543, "learning_rate": 7.940602052216381e-06, "loss": 0.5291, "step": 26839 }, { "epoch": 0.32090292805987636, "grad_norm": 1.950760006904602, "learning_rate": 7.94044545763942e-06, "loss": 0.6002, "step": 26840 }, { "epoch": 0.320914884204737, "grad_norm": 2.9567415714263916, "learning_rate": 7.940288858653251e-06, "loss": 0.5698, "step": 26841 }, { "epoch": 0.3209268403495977, "grad_norm": 2.035266876220703, "learning_rate": 7.940132255258108e-06, "loss": 0.6189, "step": 26842 }, { "epoch": 0.32093879649445833, "grad_norm": 1.869055151939392, "learning_rate": 7.939975647454226e-06, "loss": 0.5825, "step": 26843 }, { "epoch": 0.320950752639319, "grad_norm": 4.798406600952148, "learning_rate": 7.939819035241841e-06, "loss": 0.5957, "step": 26844 }, { "epoch": 0.32096270878417965, "grad_norm": 1.4966387748718262, "learning_rate": 7.939662418621185e-06, "loss": 0.5922, "step": 26845 }, { "epoch": 0.32097466492904025, "grad_norm": 2.179777145385742, "learning_rate": 7.939505797592494e-06, "loss": 0.575, "step": 26846 }, { "epoch": 0.3209866210739009, "grad_norm": 2.2226216793060303, "learning_rate": 7.939349172156005e-06, "loss": 0.7209, "step": 26847 }, { "epoch": 0.32099857721876157, "grad_norm": 2.402587890625, "learning_rate": 7.939192542311953e-06, "loss": 0.6194, "step": 26848 }, { "epoch": 0.32101053336362223, "grad_norm": 2.238996982574463, "learning_rate": 7.93903590806057e-06, "loss": 0.6287, "step": 26849 }, { "epoch": 0.3210224895084829, "grad_norm": 1.757723093032837, "learning_rate": 7.938879269402092e-06, "loss": 0.543, "step": 26850 }, { "epoch": 0.32103444565334355, "grad_norm": 3.9147706031799316, "learning_rate": 7.938722626336755e-06, "loss": 0.6275, "step": 26851 }, { "epoch": 0.3210464017982042, "grad_norm": 2.986752986907959, "learning_rate": 7.938565978864792e-06, "loss": 0.61, "step": 26852 }, { "epoch": 0.32105835794306486, "grad_norm": 2.593780040740967, "learning_rate": 7.938409326986439e-06, "loss": 0.5589, "step": 26853 }, { "epoch": 0.32107031408792547, "grad_norm": 2.358794689178467, "learning_rate": 7.938252670701931e-06, "loss": 0.6048, "step": 26854 }, { "epoch": 0.3210822702327861, "grad_norm": 2.840158462524414, "learning_rate": 7.938096010011503e-06, "loss": 0.611, "step": 26855 }, { "epoch": 0.3210942263776468, "grad_norm": 2.6679327487945557, "learning_rate": 7.937939344915388e-06, "loss": 0.5724, "step": 26856 }, { "epoch": 0.32110618252250744, "grad_norm": 1.6997008323669434, "learning_rate": 7.937782675413826e-06, "loss": 0.5808, "step": 26857 }, { "epoch": 0.3211181386673681, "grad_norm": 1.6274466514587402, "learning_rate": 7.937626001507046e-06, "loss": 0.553, "step": 26858 }, { "epoch": 0.32113009481222876, "grad_norm": 7.159151077270508, "learning_rate": 7.937469323195284e-06, "loss": 0.5016, "step": 26859 }, { "epoch": 0.3211420509570894, "grad_norm": 2.4982547760009766, "learning_rate": 7.937312640478778e-06, "loss": 0.643, "step": 26860 }, { "epoch": 0.32115400710195, "grad_norm": 2.284580945968628, "learning_rate": 7.93715595335776e-06, "loss": 0.7337, "step": 26861 }, { "epoch": 0.3211659632468107, "grad_norm": 1.508726954460144, "learning_rate": 7.93699926183247e-06, "loss": 0.5564, "step": 26862 }, { "epoch": 0.32117791939167134, "grad_norm": 1.8056608438491821, "learning_rate": 7.936842565903135e-06, "loss": 0.5471, "step": 26863 }, { "epoch": 0.321189875536532, "grad_norm": 2.465710401535034, "learning_rate": 7.936685865569996e-06, "loss": 0.5286, "step": 26864 }, { "epoch": 0.32120183168139266, "grad_norm": 2.1263248920440674, "learning_rate": 7.936529160833283e-06, "loss": 0.5723, "step": 26865 }, { "epoch": 0.3212137878262533, "grad_norm": 2.3098835945129395, "learning_rate": 7.936372451693239e-06, "loss": 0.5147, "step": 26866 }, { "epoch": 0.321225743971114, "grad_norm": 13.07076644897461, "learning_rate": 7.936215738150092e-06, "loss": 0.5777, "step": 26867 }, { "epoch": 0.3212377001159746, "grad_norm": 11.859936714172363, "learning_rate": 7.936059020204078e-06, "loss": 0.5917, "step": 26868 }, { "epoch": 0.32124965626083524, "grad_norm": 3.8421144485473633, "learning_rate": 7.935902297855435e-06, "loss": 0.6034, "step": 26869 }, { "epoch": 0.3212616124056959, "grad_norm": 2.0871613025665283, "learning_rate": 7.935745571104395e-06, "loss": 0.6092, "step": 26870 }, { "epoch": 0.32127356855055655, "grad_norm": 4.4057698249816895, "learning_rate": 7.935588839951194e-06, "loss": 0.5338, "step": 26871 }, { "epoch": 0.3212855246954172, "grad_norm": 1.7485259771347046, "learning_rate": 7.935432104396068e-06, "loss": 0.6483, "step": 26872 }, { "epoch": 0.32129748084027787, "grad_norm": 3.013596773147583, "learning_rate": 7.93527536443925e-06, "loss": 0.6172, "step": 26873 }, { "epoch": 0.32130943698513853, "grad_norm": 2.7725608348846436, "learning_rate": 7.935118620080976e-06, "loss": 0.5486, "step": 26874 }, { "epoch": 0.3213213931299992, "grad_norm": 1.613845705986023, "learning_rate": 7.934961871321481e-06, "loss": 0.4952, "step": 26875 }, { "epoch": 0.3213333492748598, "grad_norm": 2.108529806137085, "learning_rate": 7.934805118161002e-06, "loss": 0.5413, "step": 26876 }, { "epoch": 0.32134530541972045, "grad_norm": 3.656026601791382, "learning_rate": 7.93464836059977e-06, "loss": 0.5049, "step": 26877 }, { "epoch": 0.3213572615645811, "grad_norm": 2.5071234703063965, "learning_rate": 7.934491598638025e-06, "loss": 0.5744, "step": 26878 }, { "epoch": 0.32136921770944177, "grad_norm": 2.2543065547943115, "learning_rate": 7.934334832275998e-06, "loss": 0.6051, "step": 26879 }, { "epoch": 0.3213811738543024, "grad_norm": 2.5283241271972656, "learning_rate": 7.934178061513926e-06, "loss": 0.6321, "step": 26880 }, { "epoch": 0.3213931299991631, "grad_norm": 2.34891414642334, "learning_rate": 7.934021286352042e-06, "loss": 0.5602, "step": 26881 }, { "epoch": 0.32140508614402374, "grad_norm": 2.139667272567749, "learning_rate": 7.933864506790583e-06, "loss": 0.643, "step": 26882 }, { "epoch": 0.32141704228888435, "grad_norm": 3.0730574131011963, "learning_rate": 7.933707722829785e-06, "loss": 0.6697, "step": 26883 }, { "epoch": 0.321428998433745, "grad_norm": 4.84186315536499, "learning_rate": 7.933550934469881e-06, "loss": 0.6537, "step": 26884 }, { "epoch": 0.32144095457860566, "grad_norm": 7.366784572601318, "learning_rate": 7.933394141711109e-06, "loss": 0.575, "step": 26885 }, { "epoch": 0.3214529107234663, "grad_norm": 4.402588367462158, "learning_rate": 7.933237344553699e-06, "loss": 0.6218, "step": 26886 }, { "epoch": 0.321464866868327, "grad_norm": 1.5777525901794434, "learning_rate": 7.933080542997891e-06, "loss": 0.5066, "step": 26887 }, { "epoch": 0.32147682301318764, "grad_norm": 1.662984013557434, "learning_rate": 7.932923737043918e-06, "loss": 0.5774, "step": 26888 }, { "epoch": 0.3214887791580483, "grad_norm": 2.8640050888061523, "learning_rate": 7.932766926692015e-06, "loss": 0.6169, "step": 26889 }, { "epoch": 0.32150073530290896, "grad_norm": 1.8670777082443237, "learning_rate": 7.932610111942417e-06, "loss": 0.5775, "step": 26890 }, { "epoch": 0.32151269144776956, "grad_norm": 3.050509452819824, "learning_rate": 7.932453292795362e-06, "loss": 0.5249, "step": 26891 }, { "epoch": 0.3215246475926302, "grad_norm": 2.7523934841156006, "learning_rate": 7.93229646925108e-06, "loss": 0.6962, "step": 26892 }, { "epoch": 0.3215366037374909, "grad_norm": 3.5392000675201416, "learning_rate": 7.93213964130981e-06, "loss": 0.5762, "step": 26893 }, { "epoch": 0.32154855988235154, "grad_norm": 2.194740056991577, "learning_rate": 7.931982808971788e-06, "loss": 0.6156, "step": 26894 }, { "epoch": 0.3215605160272122, "grad_norm": 1.6991503238677979, "learning_rate": 7.931825972237245e-06, "loss": 0.5935, "step": 26895 }, { "epoch": 0.32157247217207285, "grad_norm": 9.701720237731934, "learning_rate": 7.93166913110642e-06, "loss": 0.6389, "step": 26896 }, { "epoch": 0.3215844283169335, "grad_norm": 6.277223110198975, "learning_rate": 7.931512285579546e-06, "loss": 0.635, "step": 26897 }, { "epoch": 0.3215963844617941, "grad_norm": 3.683323621749878, "learning_rate": 7.931355435656858e-06, "loss": 0.6628, "step": 26898 }, { "epoch": 0.3216083406066548, "grad_norm": 2.2371997833251953, "learning_rate": 7.931198581338596e-06, "loss": 0.5995, "step": 26899 }, { "epoch": 0.32162029675151543, "grad_norm": 2.371797800064087, "learning_rate": 7.931041722624986e-06, "loss": 0.5686, "step": 26900 }, { "epoch": 0.3216322528963761, "grad_norm": 2.488576650619507, "learning_rate": 7.930884859516272e-06, "loss": 0.5877, "step": 26901 }, { "epoch": 0.32164420904123675, "grad_norm": 4.046450138092041, "learning_rate": 7.930727992012684e-06, "loss": 0.5982, "step": 26902 }, { "epoch": 0.3216561651860974, "grad_norm": 2.585165500640869, "learning_rate": 7.930571120114461e-06, "loss": 0.7161, "step": 26903 }, { "epoch": 0.32166812133095807, "grad_norm": 2.9241254329681396, "learning_rate": 7.930414243821833e-06, "loss": 0.6206, "step": 26904 }, { "epoch": 0.32168007747581867, "grad_norm": 2.1597509384155273, "learning_rate": 7.93025736313504e-06, "loss": 0.6545, "step": 26905 }, { "epoch": 0.32169203362067933, "grad_norm": 3.4233875274658203, "learning_rate": 7.930100478054317e-06, "loss": 0.6402, "step": 26906 }, { "epoch": 0.32170398976554, "grad_norm": 1.5013537406921387, "learning_rate": 7.929943588579898e-06, "loss": 0.5028, "step": 26907 }, { "epoch": 0.32171594591040065, "grad_norm": 2.051588773727417, "learning_rate": 7.929786694712017e-06, "loss": 0.6129, "step": 26908 }, { "epoch": 0.3217279020552613, "grad_norm": 1.530596375465393, "learning_rate": 7.92962979645091e-06, "loss": 0.5795, "step": 26909 }, { "epoch": 0.32173985820012196, "grad_norm": 3.370241165161133, "learning_rate": 7.929472893796812e-06, "loss": 0.6953, "step": 26910 }, { "epoch": 0.3217518143449826, "grad_norm": 1.8240690231323242, "learning_rate": 7.929315986749961e-06, "loss": 0.6012, "step": 26911 }, { "epoch": 0.3217637704898433, "grad_norm": 1.774393081665039, "learning_rate": 7.92915907531059e-06, "loss": 0.6294, "step": 26912 }, { "epoch": 0.3217757266347039, "grad_norm": 2.79276442527771, "learning_rate": 7.929002159478933e-06, "loss": 0.655, "step": 26913 }, { "epoch": 0.32178768277956454, "grad_norm": 2.524373769760132, "learning_rate": 7.928845239255228e-06, "loss": 0.6236, "step": 26914 }, { "epoch": 0.3217996389244252, "grad_norm": 2.5504274368286133, "learning_rate": 7.92868831463971e-06, "loss": 0.6223, "step": 26915 }, { "epoch": 0.32181159506928586, "grad_norm": 2.879535675048828, "learning_rate": 7.928531385632612e-06, "loss": 0.6356, "step": 26916 }, { "epoch": 0.3218235512141465, "grad_norm": 4.422842502593994, "learning_rate": 7.928374452234171e-06, "loss": 0.606, "step": 26917 }, { "epoch": 0.3218355073590072, "grad_norm": 2.0880885124206543, "learning_rate": 7.928217514444623e-06, "loss": 0.6207, "step": 26918 }, { "epoch": 0.32184746350386784, "grad_norm": 6.137839317321777, "learning_rate": 7.928060572264202e-06, "loss": 0.582, "step": 26919 }, { "epoch": 0.32185941964872844, "grad_norm": 1.9131431579589844, "learning_rate": 7.927903625693142e-06, "loss": 0.5732, "step": 26920 }, { "epoch": 0.3218713757935891, "grad_norm": 5.838708877563477, "learning_rate": 7.927746674731683e-06, "loss": 0.5484, "step": 26921 }, { "epoch": 0.32188333193844976, "grad_norm": 2.4372408390045166, "learning_rate": 7.927589719380056e-06, "loss": 0.6814, "step": 26922 }, { "epoch": 0.3218952880833104, "grad_norm": 4.81032133102417, "learning_rate": 7.927432759638497e-06, "loss": 0.5889, "step": 26923 }, { "epoch": 0.3219072442281711, "grad_norm": 2.678196668624878, "learning_rate": 7.927275795507243e-06, "loss": 0.582, "step": 26924 }, { "epoch": 0.32191920037303173, "grad_norm": 2.1118056774139404, "learning_rate": 7.92711882698653e-06, "loss": 0.5909, "step": 26925 }, { "epoch": 0.3219311565178924, "grad_norm": 2.0738911628723145, "learning_rate": 7.92696185407659e-06, "loss": 0.4743, "step": 26926 }, { "epoch": 0.321943112662753, "grad_norm": 2.0543019771575928, "learning_rate": 7.926804876777662e-06, "loss": 0.5784, "step": 26927 }, { "epoch": 0.32195506880761365, "grad_norm": 2.4306249618530273, "learning_rate": 7.926647895089978e-06, "loss": 0.676, "step": 26928 }, { "epoch": 0.3219670249524743, "grad_norm": 2.933881998062134, "learning_rate": 7.926490909013776e-06, "loss": 0.6282, "step": 26929 }, { "epoch": 0.32197898109733497, "grad_norm": 2.1562387943267822, "learning_rate": 7.92633391854929e-06, "loss": 0.6059, "step": 26930 }, { "epoch": 0.32199093724219563, "grad_norm": 3.396822929382324, "learning_rate": 7.926176923696756e-06, "loss": 0.6052, "step": 26931 }, { "epoch": 0.3220028933870563, "grad_norm": 1.533632516860962, "learning_rate": 7.92601992445641e-06, "loss": 0.5558, "step": 26932 }, { "epoch": 0.32201484953191695, "grad_norm": 1.7438805103302002, "learning_rate": 7.925862920828485e-06, "loss": 0.626, "step": 26933 }, { "epoch": 0.3220268056767776, "grad_norm": 7.627529144287109, "learning_rate": 7.925705912813219e-06, "loss": 0.6088, "step": 26934 }, { "epoch": 0.3220387618216382, "grad_norm": 2.3182852268218994, "learning_rate": 7.925548900410846e-06, "loss": 0.6417, "step": 26935 }, { "epoch": 0.32205071796649887, "grad_norm": 2.5881431102752686, "learning_rate": 7.925391883621603e-06, "loss": 0.6484, "step": 26936 }, { "epoch": 0.3220626741113595, "grad_norm": 2.5244834423065186, "learning_rate": 7.925234862445726e-06, "loss": 0.4601, "step": 26937 }, { "epoch": 0.3220746302562202, "grad_norm": 1.9897063970565796, "learning_rate": 7.925077836883446e-06, "loss": 0.5553, "step": 26938 }, { "epoch": 0.32208658640108084, "grad_norm": 2.4860589504241943, "learning_rate": 7.924920806935001e-06, "loss": 0.5089, "step": 26939 }, { "epoch": 0.3220985425459415, "grad_norm": 1.4888180494308472, "learning_rate": 7.92476377260063e-06, "loss": 0.5502, "step": 26940 }, { "epoch": 0.32211049869080216, "grad_norm": 1.5429526567459106, "learning_rate": 7.924606733880564e-06, "loss": 0.6977, "step": 26941 }, { "epoch": 0.32212245483566276, "grad_norm": 5.397665500640869, "learning_rate": 7.924449690775039e-06, "loss": 0.5872, "step": 26942 }, { "epoch": 0.3221344109805234, "grad_norm": 3.2101683616638184, "learning_rate": 7.924292643284292e-06, "loss": 0.5268, "step": 26943 }, { "epoch": 0.3221463671253841, "grad_norm": 3.765265464782715, "learning_rate": 7.924135591408558e-06, "loss": 0.6725, "step": 26944 }, { "epoch": 0.32215832327024474, "grad_norm": 1.9490886926651, "learning_rate": 7.923978535148071e-06, "loss": 0.531, "step": 26945 }, { "epoch": 0.3221702794151054, "grad_norm": 2.329157829284668, "learning_rate": 7.923821474503069e-06, "loss": 0.652, "step": 26946 }, { "epoch": 0.32218223555996606, "grad_norm": 1.5775781869888306, "learning_rate": 7.923664409473785e-06, "loss": 0.6865, "step": 26947 }, { "epoch": 0.3221941917048267, "grad_norm": 1.968026876449585, "learning_rate": 7.923507340060457e-06, "loss": 0.534, "step": 26948 }, { "epoch": 0.3222061478496874, "grad_norm": 1.729583978652954, "learning_rate": 7.92335026626332e-06, "loss": 0.5863, "step": 26949 }, { "epoch": 0.322218103994548, "grad_norm": 2.0666003227233887, "learning_rate": 7.923193188082609e-06, "loss": 0.5211, "step": 26950 }, { "epoch": 0.32223006013940864, "grad_norm": 3.002638339996338, "learning_rate": 7.923036105518557e-06, "loss": 0.5431, "step": 26951 }, { "epoch": 0.3222420162842693, "grad_norm": 2.138897180557251, "learning_rate": 7.922879018571403e-06, "loss": 0.5331, "step": 26952 }, { "epoch": 0.32225397242912995, "grad_norm": 5.055375576019287, "learning_rate": 7.922721927241384e-06, "loss": 0.6364, "step": 26953 }, { "epoch": 0.3222659285739906, "grad_norm": 15.712846755981445, "learning_rate": 7.92256483152873e-06, "loss": 0.5839, "step": 26954 }, { "epoch": 0.32227788471885127, "grad_norm": 2.46297025680542, "learning_rate": 7.922407731433682e-06, "loss": 0.6375, "step": 26955 }, { "epoch": 0.32228984086371193, "grad_norm": 2.3693504333496094, "learning_rate": 7.922250626956471e-06, "loss": 0.5262, "step": 26956 }, { "epoch": 0.32230179700857253, "grad_norm": 1.9345717430114746, "learning_rate": 7.922093518097335e-06, "loss": 0.6105, "step": 26957 }, { "epoch": 0.3223137531534332, "grad_norm": 1.7017085552215576, "learning_rate": 7.921936404856512e-06, "loss": 0.5527, "step": 26958 }, { "epoch": 0.32232570929829385, "grad_norm": 4.300602436065674, "learning_rate": 7.921779287234233e-06, "loss": 0.5975, "step": 26959 }, { "epoch": 0.3223376654431545, "grad_norm": 2.820913791656494, "learning_rate": 7.921622165230735e-06, "loss": 0.5728, "step": 26960 }, { "epoch": 0.32234962158801517, "grad_norm": 2.050601005554199, "learning_rate": 7.921465038846254e-06, "loss": 0.462, "step": 26961 }, { "epoch": 0.3223615777328758, "grad_norm": 2.17867374420166, "learning_rate": 7.921307908081027e-06, "loss": 0.5295, "step": 26962 }, { "epoch": 0.3223735338777365, "grad_norm": 2.0282111167907715, "learning_rate": 7.921150772935289e-06, "loss": 0.6294, "step": 26963 }, { "epoch": 0.3223854900225971, "grad_norm": 4.608590602874756, "learning_rate": 7.920993633409273e-06, "loss": 0.5805, "step": 26964 }, { "epoch": 0.32239744616745775, "grad_norm": 18.113014221191406, "learning_rate": 7.920836489503219e-06, "loss": 0.6633, "step": 26965 }, { "epoch": 0.3224094023123184, "grad_norm": 6.268910884857178, "learning_rate": 7.920679341217358e-06, "loss": 0.5339, "step": 26966 }, { "epoch": 0.32242135845717906, "grad_norm": 9.127695083618164, "learning_rate": 7.92052218855193e-06, "loss": 0.5715, "step": 26967 }, { "epoch": 0.3224333146020397, "grad_norm": 3.5776469707489014, "learning_rate": 7.920365031507166e-06, "loss": 0.6074, "step": 26968 }, { "epoch": 0.3224452707469004, "grad_norm": 7.259677410125732, "learning_rate": 7.920207870083306e-06, "loss": 0.5785, "step": 26969 }, { "epoch": 0.32245722689176104, "grad_norm": 1.535256028175354, "learning_rate": 7.920050704280584e-06, "loss": 0.6007, "step": 26970 }, { "epoch": 0.3224691830366217, "grad_norm": 2.0388243198394775, "learning_rate": 7.919893534099235e-06, "loss": 0.6851, "step": 26971 }, { "epoch": 0.3224811391814823, "grad_norm": 2.773578405380249, "learning_rate": 7.919736359539496e-06, "loss": 0.7052, "step": 26972 }, { "epoch": 0.32249309532634296, "grad_norm": 1.921924114227295, "learning_rate": 7.919579180601602e-06, "loss": 0.7095, "step": 26973 }, { "epoch": 0.3225050514712036, "grad_norm": 2.312222957611084, "learning_rate": 7.919421997285787e-06, "loss": 0.6882, "step": 26974 }, { "epoch": 0.3225170076160643, "grad_norm": 2.1957931518554688, "learning_rate": 7.91926480959229e-06, "loss": 0.5663, "step": 26975 }, { "epoch": 0.32252896376092494, "grad_norm": 1.7942653894424438, "learning_rate": 7.919107617521345e-06, "loss": 0.5643, "step": 26976 }, { "epoch": 0.3225409199057856, "grad_norm": 2.2638373374938965, "learning_rate": 7.918950421073187e-06, "loss": 0.5578, "step": 26977 }, { "epoch": 0.32255287605064625, "grad_norm": 7.6030354499816895, "learning_rate": 7.918793220248053e-06, "loss": 0.643, "step": 26978 }, { "epoch": 0.32256483219550686, "grad_norm": 7.044876575469971, "learning_rate": 7.918636015046177e-06, "loss": 0.5966, "step": 26979 }, { "epoch": 0.3225767883403675, "grad_norm": 2.2338809967041016, "learning_rate": 7.918478805467797e-06, "loss": 0.5736, "step": 26980 }, { "epoch": 0.3225887444852282, "grad_norm": 4.044449329376221, "learning_rate": 7.918321591513149e-06, "loss": 0.5952, "step": 26981 }, { "epoch": 0.32260070063008883, "grad_norm": 2.795323371887207, "learning_rate": 7.918164373182465e-06, "loss": 0.6317, "step": 26982 }, { "epoch": 0.3226126567749495, "grad_norm": 1.4737919569015503, "learning_rate": 7.918007150475984e-06, "loss": 0.5038, "step": 26983 }, { "epoch": 0.32262461291981015, "grad_norm": 4.0565409660339355, "learning_rate": 7.917849923393941e-06, "loss": 0.5689, "step": 26984 }, { "epoch": 0.3226365690646708, "grad_norm": 3.0319504737854004, "learning_rate": 7.917692691936573e-06, "loss": 0.542, "step": 26985 }, { "epoch": 0.3226485252095314, "grad_norm": 1.9800375699996948, "learning_rate": 7.917535456104112e-06, "loss": 0.5902, "step": 26986 }, { "epoch": 0.32266048135439207, "grad_norm": 8.899724960327148, "learning_rate": 7.917378215896799e-06, "loss": 0.576, "step": 26987 }, { "epoch": 0.32267243749925273, "grad_norm": 6.2257771492004395, "learning_rate": 7.917220971314863e-06, "loss": 0.625, "step": 26988 }, { "epoch": 0.3226843936441134, "grad_norm": 1.6842650175094604, "learning_rate": 7.917063722358547e-06, "loss": 0.5952, "step": 26989 }, { "epoch": 0.32269634978897405, "grad_norm": 6.943859577178955, "learning_rate": 7.916906469028083e-06, "loss": 0.4863, "step": 26990 }, { "epoch": 0.3227083059338347, "grad_norm": 2.3944525718688965, "learning_rate": 7.916749211323708e-06, "loss": 0.5934, "step": 26991 }, { "epoch": 0.32272026207869536, "grad_norm": 3.0000290870666504, "learning_rate": 7.916591949245655e-06, "loss": 0.6462, "step": 26992 }, { "epoch": 0.322732218223556, "grad_norm": 3.5746805667877197, "learning_rate": 7.916434682794162e-06, "loss": 0.5399, "step": 26993 }, { "epoch": 0.3227441743684166, "grad_norm": 2.432072401046753, "learning_rate": 7.91627741196947e-06, "loss": 0.623, "step": 26994 }, { "epoch": 0.3227561305132773, "grad_norm": 1.796452522277832, "learning_rate": 7.916120136771803e-06, "loss": 0.5201, "step": 26995 }, { "epoch": 0.32276808665813794, "grad_norm": 1.9413776397705078, "learning_rate": 7.915962857201406e-06, "loss": 0.5087, "step": 26996 }, { "epoch": 0.3227800428029986, "grad_norm": 6.385105609893799, "learning_rate": 7.915805573258512e-06, "loss": 0.6442, "step": 26997 }, { "epoch": 0.32279199894785926, "grad_norm": 1.9577213525772095, "learning_rate": 7.915648284943358e-06, "loss": 0.5801, "step": 26998 }, { "epoch": 0.3228039550927199, "grad_norm": 2.069641351699829, "learning_rate": 7.91549099225618e-06, "loss": 0.5415, "step": 26999 }, { "epoch": 0.3228159112375806, "grad_norm": 2.4541938304901123, "learning_rate": 7.915333695197211e-06, "loss": 0.6452, "step": 27000 } ], "logging_steps": 1.0, "max_steps": 83639, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.712331440243186e+19, "train_batch_size": 3, "trial_name": null, "trial_params": null }