{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999330102940848, "eval_steps": 50000, "global_step": 11195, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017863921577384274, "grad_norm": 18.786898101687864, "learning_rate": 3.571428571428571e-09, "loss": 1.4782, "step": 2 }, { "epoch": 0.0003572784315476855, "grad_norm": 141.45917172677423, "learning_rate": 7.142857142857142e-09, "loss": -0.1862, "step": 4 }, { "epoch": 0.0005359176473215282, "grad_norm": 24.466191846240218, "learning_rate": 1.0714285714285715e-08, "loss": 1.279, "step": 6 }, { "epoch": 0.000714556863095371, "grad_norm": 55.97975882796835, "learning_rate": 1.4285714285714284e-08, "loss": 1.3407, "step": 8 }, { "epoch": 0.0008931960788692137, "grad_norm": 7.152679218818933, "learning_rate": 1.7857142857142856e-08, "loss": 1.0668, "step": 10 }, { "epoch": 0.0010718352946430564, "grad_norm": 74.68711100840125, "learning_rate": 2.142857142857143e-08, "loss": 0.3619, "step": 12 }, { "epoch": 0.0012504745104168992, "grad_norm": 160.77378201801125, "learning_rate": 2.5e-08, "loss": -0.6484, "step": 14 }, { "epoch": 0.001429113726190742, "grad_norm": 33.2448781593214, "learning_rate": 2.857142857142857e-08, "loss": -0.1408, "step": 16 }, { "epoch": 0.0016077529419645847, "grad_norm": 22.88015572234827, "learning_rate": 3.214285714285714e-08, "loss": 0.3728, "step": 18 }, { "epoch": 0.0017863921577384275, "grad_norm": 65.36930469336315, "learning_rate": 3.571428571428571e-08, "loss": 0.3539, "step": 20 }, { "epoch": 0.0019650313735122705, "grad_norm": 51.63678124878017, "learning_rate": 3.9285714285714285e-08, "loss": 0.9107, "step": 22 }, { "epoch": 0.002143670589286113, "grad_norm": 91.5382807132486, "learning_rate": 4.285714285714286e-08, "loss": -0.4873, "step": 24 }, { "epoch": 0.0023223098050599556, "grad_norm": 16.84983022360917, "learning_rate": 4.642857142857143e-08, "loss": 0.3308, "step": 26 }, { "epoch": 0.0025009490208337984, "grad_norm": 27.78721199384035, "learning_rate": 5e-08, "loss": -0.4459, "step": 28 }, { "epoch": 0.002679588236607641, "grad_norm": 98.38693145560542, "learning_rate": 5.3571428571428564e-08, "loss": -1.7249, "step": 30 }, { "epoch": 0.002858227452381484, "grad_norm": 13.949863031186622, "learning_rate": 5.714285714285714e-08, "loss": 0.7553, "step": 32 }, { "epoch": 0.0030368666681553267, "grad_norm": 127.30515135885115, "learning_rate": 6.071428571428572e-08, "loss": -0.6736, "step": 34 }, { "epoch": 0.0032155058839291695, "grad_norm": 9.435278624770483, "learning_rate": 6.428571428571428e-08, "loss": -0.1768, "step": 36 }, { "epoch": 0.0033941450997030122, "grad_norm": 16.583029177127234, "learning_rate": 6.785714285714285e-08, "loss": 0.2611, "step": 38 }, { "epoch": 0.003572784315476855, "grad_norm": 12.633346367043353, "learning_rate": 7.142857142857142e-08, "loss": -0.18, "step": 40 }, { "epoch": 0.0037514235312506978, "grad_norm": 31.037346770161427, "learning_rate": 7.5e-08, "loss": -0.4858, "step": 42 }, { "epoch": 0.003930062747024541, "grad_norm": 44.498895867183464, "learning_rate": 7.857142857142857e-08, "loss": 0.7181, "step": 44 }, { "epoch": 0.004108701962798384, "grad_norm": 13.30141621557057, "learning_rate": 8.214285714285714e-08, "loss": -1.0592, "step": 46 }, { "epoch": 0.004287341178572226, "grad_norm": 347.6827644383598, "learning_rate": 8.571428571428572e-08, "loss": -0.1683, "step": 48 }, { "epoch": 0.004465980394346068, "grad_norm": 14.410268492040505, "learning_rate": 8.928571428571429e-08, "loss": 0.445, "step": 50 }, { "epoch": 0.004644619610119911, "grad_norm": 8.837192436164814, "learning_rate": 9.285714285714286e-08, "loss": 0.6057, "step": 52 }, { "epoch": 0.004823258825893754, "grad_norm": 309.30357761429144, "learning_rate": 9.642857142857142e-08, "loss": -1.2918, "step": 54 }, { "epoch": 0.005001898041667597, "grad_norm": 15.209636865779116, "learning_rate": 1e-07, "loss": -1.0245, "step": 56 }, { "epoch": 0.0051805372574414395, "grad_norm": 45.87125325299059, "learning_rate": 1.0357142857142857e-07, "loss": -0.652, "step": 58 }, { "epoch": 0.005359176473215282, "grad_norm": 67.88878198016224, "learning_rate": 1.0714285714285713e-07, "loss": 0.8178, "step": 60 }, { "epoch": 0.005537815688989125, "grad_norm": 9.507536057377022, "learning_rate": 1.107142857142857e-07, "loss": 0.4417, "step": 62 }, { "epoch": 0.005716454904762968, "grad_norm": 7.611265225795403, "learning_rate": 1.1428571428571427e-07, "loss": 0.2199, "step": 64 }, { "epoch": 0.005895094120536811, "grad_norm": 60.57253085467558, "learning_rate": 1.1785714285714285e-07, "loss": -1.1649, "step": 66 }, { "epoch": 0.006073733336310653, "grad_norm": 53.31721659968289, "learning_rate": 1.2142857142857143e-07, "loss": 0.7922, "step": 68 }, { "epoch": 0.006252372552084496, "grad_norm": 27.347438367311593, "learning_rate": 1.25e-07, "loss": 1.3836, "step": 70 }, { "epoch": 0.006431011767858339, "grad_norm": 15.442302497390033, "learning_rate": 1.2857142857142855e-07, "loss": 0.4335, "step": 72 }, { "epoch": 0.006609650983632182, "grad_norm": 110.89138125886971, "learning_rate": 1.3214285714285714e-07, "loss": 0.2548, "step": 74 }, { "epoch": 0.0067882901994060245, "grad_norm": 7.4413406329128, "learning_rate": 1.357142857142857e-07, "loss": -0.2982, "step": 76 }, { "epoch": 0.006966929415179867, "grad_norm": 20.468612833509873, "learning_rate": 1.392857142857143e-07, "loss": -0.2663, "step": 78 }, { "epoch": 0.00714556863095371, "grad_norm": 11.960250505866753, "learning_rate": 1.4285714285714285e-07, "loss": 0.3624, "step": 80 }, { "epoch": 0.007324207846727553, "grad_norm": 10.670062557705627, "learning_rate": 1.4642857142857143e-07, "loss": 0.0248, "step": 82 }, { "epoch": 0.0075028470625013955, "grad_norm": 11.890099492535073, "learning_rate": 1.5e-07, "loss": -0.2556, "step": 84 }, { "epoch": 0.007681486278275238, "grad_norm": 19.44156518357278, "learning_rate": 1.5357142857142858e-07, "loss": 0.0133, "step": 86 }, { "epoch": 0.007860125494049082, "grad_norm": 24.17483543805482, "learning_rate": 1.5714285714285714e-07, "loss": -0.2753, "step": 88 }, { "epoch": 0.008038764709822925, "grad_norm": 72.21453768546192, "learning_rate": 1.6071428571428573e-07, "loss": 0.2617, "step": 90 }, { "epoch": 0.008217403925596767, "grad_norm": 19.234185398385165, "learning_rate": 1.6428571428571429e-07, "loss": -0.3758, "step": 92 }, { "epoch": 0.008396043141370609, "grad_norm": 13.961217295741308, "learning_rate": 1.6785714285714285e-07, "loss": -0.1137, "step": 94 }, { "epoch": 0.008574682357144451, "grad_norm": 77.63058240293415, "learning_rate": 1.7142857142857143e-07, "loss": -0.7708, "step": 96 }, { "epoch": 0.008753321572918294, "grad_norm": 57.325246077889844, "learning_rate": 1.75e-07, "loss": 0.9129, "step": 98 }, { "epoch": 0.008931960788692137, "grad_norm": 7.775116613562281, "learning_rate": 1.7857142857142858e-07, "loss": 0.9652, "step": 100 }, { "epoch": 0.00911060000446598, "grad_norm": 30.191947783101043, "learning_rate": 1.8214285714285714e-07, "loss": -0.2648, "step": 102 }, { "epoch": 0.009289239220239822, "grad_norm": 35.297402400331634, "learning_rate": 1.8571428571428572e-07, "loss": -1.2386, "step": 104 }, { "epoch": 0.009467878436013665, "grad_norm": 13.463143794995528, "learning_rate": 1.8928571428571426e-07, "loss": -0.3006, "step": 106 }, { "epoch": 0.009646517651787508, "grad_norm": 14.010196787114443, "learning_rate": 1.9285714285714284e-07, "loss": 0.5769, "step": 108 }, { "epoch": 0.00982515686756135, "grad_norm": 101.61530358107976, "learning_rate": 1.964285714285714e-07, "loss": -0.6349, "step": 110 }, { "epoch": 0.010003796083335193, "grad_norm": 33.87296753589062, "learning_rate": 2e-07, "loss": 0.4282, "step": 112 }, { "epoch": 0.010182435299109036, "grad_norm": 33.4046220516572, "learning_rate": 2.0357142857142855e-07, "loss": -1.0798, "step": 114 }, { "epoch": 0.010361074514882879, "grad_norm": 131.36281609772607, "learning_rate": 2.0714285714285714e-07, "loss": -1.2895, "step": 116 }, { "epoch": 0.010539713730656722, "grad_norm": 16.788463555219156, "learning_rate": 2.107142857142857e-07, "loss": -0.3415, "step": 118 }, { "epoch": 0.010718352946430565, "grad_norm": 52.6586168792974, "learning_rate": 2.1428571428571426e-07, "loss": -0.6542, "step": 120 }, { "epoch": 0.010896992162204407, "grad_norm": 25.08602995545093, "learning_rate": 2.1785714285714284e-07, "loss": 0.0057, "step": 122 }, { "epoch": 0.01107563137797825, "grad_norm": 14.979408371858296, "learning_rate": 2.214285714285714e-07, "loss": -0.5648, "step": 124 }, { "epoch": 0.011254270593752093, "grad_norm": 55.70051492833123, "learning_rate": 2.25e-07, "loss": -1.0923, "step": 126 }, { "epoch": 0.011432909809525936, "grad_norm": 17.799335527340446, "learning_rate": 2.2857142857142855e-07, "loss": -0.619, "step": 128 }, { "epoch": 0.011611549025299778, "grad_norm": 50.199008254023305, "learning_rate": 2.3214285714285714e-07, "loss": 0.0713, "step": 130 }, { "epoch": 0.011790188241073621, "grad_norm": 50.413836708195745, "learning_rate": 2.357142857142857e-07, "loss": 1.2218, "step": 132 }, { "epoch": 0.011968827456847464, "grad_norm": 31.825126615333854, "learning_rate": 2.392857142857143e-07, "loss": 0.0376, "step": 134 }, { "epoch": 0.012147466672621307, "grad_norm": 13.622462045018468, "learning_rate": 2.4285714285714287e-07, "loss": -0.3619, "step": 136 }, { "epoch": 0.01232610588839515, "grad_norm": 22.113950912290004, "learning_rate": 2.4642857142857143e-07, "loss": -0.3998, "step": 138 }, { "epoch": 0.012504745104168992, "grad_norm": 17.3896527973983, "learning_rate": 2.5e-07, "loss": -0.3682, "step": 140 }, { "epoch": 0.012683384319942835, "grad_norm": 27.401409073956057, "learning_rate": 2.5357142857142855e-07, "loss": -0.0518, "step": 142 }, { "epoch": 0.012862023535716678, "grad_norm": 10.17186112410248, "learning_rate": 2.571428571428571e-07, "loss": -1.0686, "step": 144 }, { "epoch": 0.01304066275149052, "grad_norm": 8.996973164517435, "learning_rate": 2.607142857142857e-07, "loss": 1.0144, "step": 146 }, { "epoch": 0.013219301967264363, "grad_norm": 24.471208120063643, "learning_rate": 2.642857142857143e-07, "loss": -0.9954, "step": 148 }, { "epoch": 0.013397941183038206, "grad_norm": 20.838540771708097, "learning_rate": 2.6785714285714284e-07, "loss": -0.8454, "step": 150 }, { "epoch": 0.013576580398812049, "grad_norm": 15.961518264489039, "learning_rate": 2.714285714285714e-07, "loss": -0.6966, "step": 152 }, { "epoch": 0.013755219614585892, "grad_norm": 38.96435013001869, "learning_rate": 2.75e-07, "loss": 0.6919, "step": 154 }, { "epoch": 0.013933858830359734, "grad_norm": 29.621944479162106, "learning_rate": 2.785714285714286e-07, "loss": -0.5975, "step": 156 }, { "epoch": 0.014112498046133577, "grad_norm": 32.17520244448743, "learning_rate": 2.8214285714285713e-07, "loss": 0.0857, "step": 158 }, { "epoch": 0.01429113726190742, "grad_norm": 35.327788103767375, "learning_rate": 2.857142857142857e-07, "loss": -0.2045, "step": 160 }, { "epoch": 0.014469776477681263, "grad_norm": 31.717958156680044, "learning_rate": 2.892857142857143e-07, "loss": -1.0658, "step": 162 }, { "epoch": 0.014648415693455106, "grad_norm": 21.04899331628252, "learning_rate": 2.9285714285714287e-07, "loss": 0.5059, "step": 164 }, { "epoch": 0.014827054909228948, "grad_norm": 17.621903817142602, "learning_rate": 2.9642857142857143e-07, "loss": 0.4982, "step": 166 }, { "epoch": 0.015005694125002791, "grad_norm": 46.923469732307204, "learning_rate": 3e-07, "loss": 1.4866, "step": 168 }, { "epoch": 0.015184333340776634, "grad_norm": 34.88436184269214, "learning_rate": 3.0357142857142855e-07, "loss": 0.2659, "step": 170 }, { "epoch": 0.015362972556550477, "grad_norm": 12.715980771933697, "learning_rate": 3.0714285714285716e-07, "loss": 0.1582, "step": 172 }, { "epoch": 0.01554161177232432, "grad_norm": 98.98228218979395, "learning_rate": 3.107142857142857e-07, "loss": 0.4628, "step": 174 }, { "epoch": 0.015720250988098164, "grad_norm": 23.130580249439944, "learning_rate": 3.142857142857143e-07, "loss": -1.0461, "step": 176 }, { "epoch": 0.015898890203872005, "grad_norm": 15.814198760806349, "learning_rate": 3.1785714285714284e-07, "loss": 1.1899, "step": 178 }, { "epoch": 0.01607752941964585, "grad_norm": 20.57208677782911, "learning_rate": 3.2142857142857145e-07, "loss": -0.656, "step": 180 }, { "epoch": 0.01625616863541969, "grad_norm": 25.53571167601916, "learning_rate": 3.25e-07, "loss": 0.8772, "step": 182 }, { "epoch": 0.016434807851193535, "grad_norm": 43.803895374726665, "learning_rate": 3.2857142857142857e-07, "loss": 0.1954, "step": 184 }, { "epoch": 0.016613447066967376, "grad_norm": 57.54575073569181, "learning_rate": 3.3214285714285713e-07, "loss": -0.9364, "step": 186 }, { "epoch": 0.016792086282741217, "grad_norm": 25.42859300640042, "learning_rate": 3.357142857142857e-07, "loss": -0.6152, "step": 188 }, { "epoch": 0.01697072549851506, "grad_norm": 8.501999115069156, "learning_rate": 3.392857142857143e-07, "loss": 1.1694, "step": 190 }, { "epoch": 0.017149364714288903, "grad_norm": 11.141677528514908, "learning_rate": 3.4285714285714286e-07, "loss": 0.5064, "step": 192 }, { "epoch": 0.017328003930062747, "grad_norm": 6.46652227548591, "learning_rate": 3.464285714285714e-07, "loss": 0.439, "step": 194 }, { "epoch": 0.017506643145836588, "grad_norm": 21.601321625013263, "learning_rate": 3.5e-07, "loss": -0.3558, "step": 196 }, { "epoch": 0.017685282361610433, "grad_norm": 10.643173651958431, "learning_rate": 3.535714285714286e-07, "loss": -0.3033, "step": 198 }, { "epoch": 0.017863921577384274, "grad_norm": 14.30335193109796, "learning_rate": 3.5714285714285716e-07, "loss": -0.0935, "step": 200 }, { "epoch": 0.018042560793158118, "grad_norm": 16.01725887245097, "learning_rate": 3.607142857142857e-07, "loss": -0.946, "step": 202 }, { "epoch": 0.01822120000893196, "grad_norm": 7.207767471711995, "learning_rate": 3.642857142857143e-07, "loss": 0.1079, "step": 204 }, { "epoch": 0.018399839224705804, "grad_norm": 16.252610217044754, "learning_rate": 3.678571428571429e-07, "loss": -0.139, "step": 206 }, { "epoch": 0.018578478440479645, "grad_norm": 7.102073860852195, "learning_rate": 3.7142857142857145e-07, "loss": 0.0614, "step": 208 }, { "epoch": 0.01875711765625349, "grad_norm": 14.59925127591764, "learning_rate": 3.75e-07, "loss": -0.0862, "step": 210 }, { "epoch": 0.01893575687202733, "grad_norm": 10.259456459207716, "learning_rate": 3.785714285714285e-07, "loss": -0.0677, "step": 212 }, { "epoch": 0.019114396087801175, "grad_norm": 18.712524397737937, "learning_rate": 3.821428571428571e-07, "loss": 0.795, "step": 214 }, { "epoch": 0.019293035303575016, "grad_norm": 9.580295733703363, "learning_rate": 3.857142857142857e-07, "loss": -0.5644, "step": 216 }, { "epoch": 0.01947167451934886, "grad_norm": 45.63689110881815, "learning_rate": 3.8928571428571425e-07, "loss": -0.0978, "step": 218 }, { "epoch": 0.0196503137351227, "grad_norm": 237.65776832190863, "learning_rate": 3.928571428571428e-07, "loss": 0.3863, "step": 220 }, { "epoch": 0.019828952950896546, "grad_norm": 31.072561679968853, "learning_rate": 3.9642857142857137e-07, "loss": 0.2116, "step": 222 }, { "epoch": 0.020007592166670387, "grad_norm": 98.49513689613056, "learning_rate": 4e-07, "loss": -0.3138, "step": 224 }, { "epoch": 0.02018623138244423, "grad_norm": 53.250930706361835, "learning_rate": 4.0357142857142854e-07, "loss": -0.2908, "step": 226 }, { "epoch": 0.020364870598218072, "grad_norm": 377.633564947556, "learning_rate": 4.071428571428571e-07, "loss": -0.665, "step": 228 }, { "epoch": 0.020543509813991917, "grad_norm": 10.48379756059339, "learning_rate": 4.1071428571428566e-07, "loss": 0.4489, "step": 230 }, { "epoch": 0.020722149029765758, "grad_norm": 20.901422099962033, "learning_rate": 4.142857142857143e-07, "loss": -0.4179, "step": 232 }, { "epoch": 0.020900788245539603, "grad_norm": 169.23377991885957, "learning_rate": 4.1785714285714283e-07, "loss": 0.3354, "step": 234 }, { "epoch": 0.021079427461313444, "grad_norm": 497.4325331137882, "learning_rate": 4.214285714285714e-07, "loss": -1.5918, "step": 236 }, { "epoch": 0.021258066677087288, "grad_norm": 15.34467830297135, "learning_rate": 4.2499999999999995e-07, "loss": -0.4828, "step": 238 }, { "epoch": 0.02143670589286113, "grad_norm": 253.7486068311509, "learning_rate": 4.285714285714285e-07, "loss": 0.1765, "step": 240 }, { "epoch": 0.021615345108634974, "grad_norm": 13.065429359035145, "learning_rate": 4.3214285714285713e-07, "loss": 1.1736, "step": 242 }, { "epoch": 0.021793984324408815, "grad_norm": 18.647905640195226, "learning_rate": 4.357142857142857e-07, "loss": -0.1571, "step": 244 }, { "epoch": 0.02197262354018266, "grad_norm": 10.559381690563692, "learning_rate": 4.3928571428571425e-07, "loss": -0.1131, "step": 246 }, { "epoch": 0.0221512627559565, "grad_norm": 8.547237409478244, "learning_rate": 4.428571428571428e-07, "loss": -0.415, "step": 248 }, { "epoch": 0.022329901971730345, "grad_norm": 28.4424188740383, "learning_rate": 4.464285714285714e-07, "loss": -0.8173, "step": 250 }, { "epoch": 0.022508541187504186, "grad_norm": 36.53475676870502, "learning_rate": 4.5e-07, "loss": -1.3847, "step": 252 }, { "epoch": 0.02268718040327803, "grad_norm": 13.60485246145826, "learning_rate": 4.5357142857142854e-07, "loss": 0.6507, "step": 254 }, { "epoch": 0.02286581961905187, "grad_norm": 14.571882246205039, "learning_rate": 4.571428571428571e-07, "loss": -0.1055, "step": 256 }, { "epoch": 0.023044458834825716, "grad_norm": 34.21510040469157, "learning_rate": 4.6071428571428566e-07, "loss": -0.8227, "step": 258 }, { "epoch": 0.023223098050599557, "grad_norm": 35.80881682357974, "learning_rate": 4.6428571428571427e-07, "loss": 0.5357, "step": 260 }, { "epoch": 0.0234017372663734, "grad_norm": 13.417275045750547, "learning_rate": 4.6785714285714283e-07, "loss": 0.2815, "step": 262 }, { "epoch": 0.023580376482147242, "grad_norm": 78.35879081287722, "learning_rate": 4.714285714285714e-07, "loss": 0.9288, "step": 264 }, { "epoch": 0.023759015697921087, "grad_norm": 79.11485112144716, "learning_rate": 4.7499999999999995e-07, "loss": 0.0788, "step": 266 }, { "epoch": 0.023937654913694928, "grad_norm": 17.317996257788188, "learning_rate": 4.785714285714286e-07, "loss": 0.4557, "step": 268 }, { "epoch": 0.024116294129468772, "grad_norm": 23.225389150063233, "learning_rate": 4.821428571428571e-07, "loss": 0.2645, "step": 270 }, { "epoch": 0.024294933345242613, "grad_norm": 23.30582804994815, "learning_rate": 4.857142857142857e-07, "loss": 0.855, "step": 272 }, { "epoch": 0.024473572561016458, "grad_norm": 12.215515801387403, "learning_rate": 4.892857142857142e-07, "loss": 0.4007, "step": 274 }, { "epoch": 0.0246522117767903, "grad_norm": 5.473740573063913, "learning_rate": 4.928571428571429e-07, "loss": 0.4459, "step": 276 }, { "epoch": 0.024830850992564144, "grad_norm": 145.44763206004765, "learning_rate": 4.964285714285715e-07, "loss": 0.1412, "step": 278 }, { "epoch": 0.025009490208337985, "grad_norm": 9.412886094157415, "learning_rate": 5e-07, "loss": 0.5834, "step": 280 }, { "epoch": 0.02518812942411183, "grad_norm": 17.12988835197626, "learning_rate": 5.035714285714285e-07, "loss": -0.7642, "step": 282 }, { "epoch": 0.02536676863988567, "grad_norm": 6.010372732476812, "learning_rate": 5.071428571428571e-07, "loss": 0.4461, "step": 284 }, { "epoch": 0.025545407855659515, "grad_norm": 20.395038135939004, "learning_rate": 5.107142857142856e-07, "loss": -0.213, "step": 286 }, { "epoch": 0.025724047071433356, "grad_norm": 23.185234843453575, "learning_rate": 5.142857142857142e-07, "loss": -0.3535, "step": 288 }, { "epoch": 0.0259026862872072, "grad_norm": 9.251757068259934, "learning_rate": 5.178571428571428e-07, "loss": -0.0089, "step": 290 }, { "epoch": 0.02608132550298104, "grad_norm": 80.37441071906842, "learning_rate": 5.214285714285714e-07, "loss": -0.5338, "step": 292 }, { "epoch": 0.026259964718754886, "grad_norm": 49.30886839051496, "learning_rate": 5.25e-07, "loss": 1.3021, "step": 294 }, { "epoch": 0.026438603934528727, "grad_norm": 28.368978659714074, "learning_rate": 5.285714285714286e-07, "loss": 1.3604, "step": 296 }, { "epoch": 0.02661724315030257, "grad_norm": 27.59997916842794, "learning_rate": 5.321428571428571e-07, "loss": 0.3308, "step": 298 }, { "epoch": 0.026795882366076412, "grad_norm": 14.52101296414554, "learning_rate": 5.357142857142857e-07, "loss": -0.9965, "step": 300 }, { "epoch": 0.026974521581850257, "grad_norm": 13.31895039773276, "learning_rate": 5.392857142857142e-07, "loss": 0.9059, "step": 302 }, { "epoch": 0.027153160797624098, "grad_norm": 11.682333390729287, "learning_rate": 5.428571428571428e-07, "loss": 0.8933, "step": 304 }, { "epoch": 0.027331800013397942, "grad_norm": 57.051751452080765, "learning_rate": 5.464285714285713e-07, "loss": -0.8037, "step": 306 }, { "epoch": 0.027510439229171783, "grad_norm": 12.944853383718819, "learning_rate": 5.5e-07, "loss": -0.0176, "step": 308 }, { "epoch": 0.027689078444945628, "grad_norm": 56.28454885182189, "learning_rate": 5.535714285714285e-07, "loss": 1.217, "step": 310 }, { "epoch": 0.02786771766071947, "grad_norm": 9.601463571881528, "learning_rate": 5.571428571428571e-07, "loss": -1.6717, "step": 312 }, { "epoch": 0.028046356876493313, "grad_norm": 16.737296226121394, "learning_rate": 5.607142857142857e-07, "loss": 0.5795, "step": 314 }, { "epoch": 0.028224996092267154, "grad_norm": 10.346421187854492, "learning_rate": 5.642857142857143e-07, "loss": 0.3788, "step": 316 }, { "epoch": 0.028403635308041, "grad_norm": 42.23449350242347, "learning_rate": 5.678571428571428e-07, "loss": -0.6476, "step": 318 }, { "epoch": 0.02858227452381484, "grad_norm": 29.47988464415715, "learning_rate": 5.714285714285714e-07, "loss": -0.2644, "step": 320 }, { "epoch": 0.028760913739588684, "grad_norm": 38.80033588067409, "learning_rate": 5.749999999999999e-07, "loss": -0.5994, "step": 322 }, { "epoch": 0.028939552955362526, "grad_norm": 9.37933086814724, "learning_rate": 5.785714285714286e-07, "loss": 1.176, "step": 324 }, { "epoch": 0.02911819217113637, "grad_norm": 12.98871474840485, "learning_rate": 5.821428571428571e-07, "loss": -0.2034, "step": 326 }, { "epoch": 0.02929683138691021, "grad_norm": 71.90373067077257, "learning_rate": 5.857142857142857e-07, "loss": -0.136, "step": 328 }, { "epoch": 0.029475470602684056, "grad_norm": 29.751854590302433, "learning_rate": 5.892857142857142e-07, "loss": 0.4423, "step": 330 }, { "epoch": 0.029654109818457897, "grad_norm": 105.39328781847306, "learning_rate": 5.928571428571429e-07, "loss": -0.1587, "step": 332 }, { "epoch": 0.02983274903423174, "grad_norm": 8.117417303803103, "learning_rate": 5.964285714285714e-07, "loss": -0.2874, "step": 334 }, { "epoch": 0.030011388250005582, "grad_norm": 44.32377655354686, "learning_rate": 6e-07, "loss": -0.3831, "step": 336 }, { "epoch": 0.030190027465779427, "grad_norm": 16.114091362211134, "learning_rate": 6.035714285714285e-07, "loss": 0.7262, "step": 338 }, { "epoch": 0.030368666681553268, "grad_norm": 10.074575819452377, "learning_rate": 6.071428571428571e-07, "loss": -0.1516, "step": 340 }, { "epoch": 0.030547305897327112, "grad_norm": 25.955014559073526, "learning_rate": 6.107142857142857e-07, "loss": 0.5815, "step": 342 }, { "epoch": 0.030725945113100953, "grad_norm": 8.86871072062853, "learning_rate": 6.142857142857143e-07, "loss": -0.4638, "step": 344 }, { "epoch": 0.030904584328874798, "grad_norm": 15.684693590794478, "learning_rate": 6.178571428571428e-07, "loss": 0.6791, "step": 346 }, { "epoch": 0.03108322354464864, "grad_norm": 25.790572414901558, "learning_rate": 6.214285714285714e-07, "loss": 0.6459, "step": 348 }, { "epoch": 0.03126186276042248, "grad_norm": 41.088208098267835, "learning_rate": 6.249999999999999e-07, "loss": -1.4909, "step": 350 }, { "epoch": 0.03144050197619633, "grad_norm": 15.38064049943472, "learning_rate": 6.285714285714286e-07, "loss": 0.5668, "step": 352 }, { "epoch": 0.031619141191970165, "grad_norm": 98.15068334262885, "learning_rate": 6.321428571428571e-07, "loss": 0.5718, "step": 354 }, { "epoch": 0.03179778040774401, "grad_norm": 16.81376421971975, "learning_rate": 6.357142857142857e-07, "loss": 0.7078, "step": 356 }, { "epoch": 0.031976419623517854, "grad_norm": 48.63858713206136, "learning_rate": 6.392857142857142e-07, "loss": -0.3251, "step": 358 }, { "epoch": 0.0321550588392917, "grad_norm": 23.86488071912955, "learning_rate": 6.428571428571429e-07, "loss": -0.3271, "step": 360 }, { "epoch": 0.032333698055065536, "grad_norm": 58.123025840130694, "learning_rate": 6.464285714285714e-07, "loss": -0.1834, "step": 362 }, { "epoch": 0.03251233727083938, "grad_norm": 35.13632421436362, "learning_rate": 6.5e-07, "loss": 0.4707, "step": 364 }, { "epoch": 0.032690976486613225, "grad_norm": 35.804725855041646, "learning_rate": 6.535714285714285e-07, "loss": -0.505, "step": 366 }, { "epoch": 0.03286961570238707, "grad_norm": 4.257012808359532, "learning_rate": 6.571428571428571e-07, "loss": 0.2541, "step": 368 }, { "epoch": 0.03304825491816091, "grad_norm": 48.00970869424486, "learning_rate": 6.607142857142857e-07, "loss": 0.4749, "step": 370 }, { "epoch": 0.03322689413393475, "grad_norm": 74.14780490787471, "learning_rate": 6.642857142857143e-07, "loss": -2.7692, "step": 372 }, { "epoch": 0.0334055333497086, "grad_norm": 10.686241031133832, "learning_rate": 6.678571428571428e-07, "loss": 0.4286, "step": 374 }, { "epoch": 0.033584172565482434, "grad_norm": 16.157233143744882, "learning_rate": 6.714285714285714e-07, "loss": -0.1312, "step": 376 }, { "epoch": 0.03376281178125628, "grad_norm": 37.50860252891162, "learning_rate": 6.75e-07, "loss": 0.0704, "step": 378 }, { "epoch": 0.03394145099703012, "grad_norm": 6.03560627367461, "learning_rate": 6.785714285714286e-07, "loss": -0.4255, "step": 380 }, { "epoch": 0.03412009021280397, "grad_norm": 14.19019174458354, "learning_rate": 6.821428571428571e-07, "loss": -0.877, "step": 382 }, { "epoch": 0.034298729428577805, "grad_norm": 10.609559811764768, "learning_rate": 6.857142857142857e-07, "loss": 0.998, "step": 384 }, { "epoch": 0.03447736864435165, "grad_norm": 81.9759456429903, "learning_rate": 6.892857142857142e-07, "loss": -0.5104, "step": 386 }, { "epoch": 0.034656007860125494, "grad_norm": 18.259985295401563, "learning_rate": 6.928571428571428e-07, "loss": 0.0397, "step": 388 }, { "epoch": 0.03483464707589934, "grad_norm": 11.06827510803682, "learning_rate": 6.964285714285714e-07, "loss": 0.5514, "step": 390 }, { "epoch": 0.035013286291673176, "grad_norm": 29.893815376326693, "learning_rate": 7e-07, "loss": -0.9798, "step": 392 }, { "epoch": 0.03519192550744702, "grad_norm": 7.664994955938363, "learning_rate": 7.035714285714286e-07, "loss": 0.0833, "step": 394 }, { "epoch": 0.035370564723220865, "grad_norm": 8.769356380494807, "learning_rate": 7.071428571428572e-07, "loss": 0.9603, "step": 396 }, { "epoch": 0.03554920393899471, "grad_norm": 15.1626622429554, "learning_rate": 7.107142857142857e-07, "loss": 0.412, "step": 398 }, { "epoch": 0.03572784315476855, "grad_norm": 7.944225195621045, "learning_rate": 7.142857142857143e-07, "loss": -0.3729, "step": 400 }, { "epoch": 0.03590648237054239, "grad_norm": 11.03600245649332, "learning_rate": 7.178571428571428e-07, "loss": 0.0548, "step": 402 }, { "epoch": 0.036085121586316236, "grad_norm": 15.38672668348361, "learning_rate": 7.214285714285714e-07, "loss": -0.5513, "step": 404 }, { "epoch": 0.03626376080209008, "grad_norm": 23.354798098855003, "learning_rate": 7.249999999999999e-07, "loss": -0.5169, "step": 406 }, { "epoch": 0.03644240001786392, "grad_norm": 22.602617689462775, "learning_rate": 7.285714285714286e-07, "loss": -0.5757, "step": 408 }, { "epoch": 0.03662103923363776, "grad_norm": 17.796804694220217, "learning_rate": 7.321428571428571e-07, "loss": -0.0482, "step": 410 }, { "epoch": 0.03679967844941161, "grad_norm": 11.647863075834916, "learning_rate": 7.357142857142858e-07, "loss": 0.1823, "step": 412 }, { "epoch": 0.03697831766518545, "grad_norm": 13.893615881254936, "learning_rate": 7.392857142857143e-07, "loss": 0.9917, "step": 414 }, { "epoch": 0.03715695688095929, "grad_norm": 46.2794115730405, "learning_rate": 7.428571428571429e-07, "loss": -0.1787, "step": 416 }, { "epoch": 0.037335596096733134, "grad_norm": 24.65157096762547, "learning_rate": 7.464285714285714e-07, "loss": -0.929, "step": 418 }, { "epoch": 0.03751423531250698, "grad_norm": 57.633051311861195, "learning_rate": 7.5e-07, "loss": -1.7342, "step": 420 }, { "epoch": 0.03769287452828082, "grad_norm": 12.913357677945527, "learning_rate": 7.535714285714285e-07, "loss": -0.3654, "step": 422 }, { "epoch": 0.03787151374405466, "grad_norm": 22.367222291930677, "learning_rate": 7.57142857142857e-07, "loss": -0.9228, "step": 424 }, { "epoch": 0.038050152959828505, "grad_norm": 28.15506314264641, "learning_rate": 7.607142857142856e-07, "loss": -0.5825, "step": 426 }, { "epoch": 0.03822879217560235, "grad_norm": 51.39587186274059, "learning_rate": 7.642857142857142e-07, "loss": -0.969, "step": 428 }, { "epoch": 0.038407431391376194, "grad_norm": 63.08422066162326, "learning_rate": 7.678571428571429e-07, "loss": -0.1692, "step": 430 }, { "epoch": 0.03858607060715003, "grad_norm": 12.881602547979098, "learning_rate": 7.714285714285714e-07, "loss": -0.6435, "step": 432 }, { "epoch": 0.038764709822923876, "grad_norm": 23.736380757907785, "learning_rate": 7.75e-07, "loss": -0.4726, "step": 434 }, { "epoch": 0.03894334903869772, "grad_norm": 18.22656082565359, "learning_rate": 7.785714285714285e-07, "loss": 0.4469, "step": 436 }, { "epoch": 0.039121988254471565, "grad_norm": 25.89924937452942, "learning_rate": 7.821428571428571e-07, "loss": -0.1204, "step": 438 }, { "epoch": 0.0393006274702454, "grad_norm": 14.261702701305678, "learning_rate": 7.857142857142856e-07, "loss": 0.0273, "step": 440 }, { "epoch": 0.03947926668601925, "grad_norm": 16.842009672094417, "learning_rate": 7.892857142857142e-07, "loss": 0.0327, "step": 442 }, { "epoch": 0.03965790590179309, "grad_norm": 10.807883980693756, "learning_rate": 7.928571428571427e-07, "loss": -0.3812, "step": 444 }, { "epoch": 0.039836545117566936, "grad_norm": 32.95647357334557, "learning_rate": 7.964285714285714e-07, "loss": 0.1896, "step": 446 }, { "epoch": 0.040015184333340774, "grad_norm": 46.42548265180584, "learning_rate": 8e-07, "loss": -0.7144, "step": 448 }, { "epoch": 0.04019382354911462, "grad_norm": 34.959255371543776, "learning_rate": 8.035714285714286e-07, "loss": -0.1981, "step": 450 }, { "epoch": 0.04037246276488846, "grad_norm": 20.416810742343475, "learning_rate": 8.071428571428571e-07, "loss": -0.7458, "step": 452 }, { "epoch": 0.04055110198066231, "grad_norm": 17.18388456292334, "learning_rate": 8.107142857142857e-07, "loss": -0.3197, "step": 454 }, { "epoch": 0.040729741196436145, "grad_norm": 32.83541238724778, "learning_rate": 8.142857142857142e-07, "loss": -0.3276, "step": 456 }, { "epoch": 0.04090838041220999, "grad_norm": 5.318567917034655, "learning_rate": 8.178571428571428e-07, "loss": 0.1512, "step": 458 }, { "epoch": 0.041087019627983834, "grad_norm": 25.581065853005054, "learning_rate": 8.214285714285713e-07, "loss": -0.9867, "step": 460 }, { "epoch": 0.04126565884375768, "grad_norm": 18.31063677061266, "learning_rate": 8.249999999999999e-07, "loss": 0.1462, "step": 462 }, { "epoch": 0.041444298059531516, "grad_norm": 22.485660985418505, "learning_rate": 8.285714285714285e-07, "loss": -1.3378, "step": 464 }, { "epoch": 0.04162293727530536, "grad_norm": 10.479476125305112, "learning_rate": 8.321428571428572e-07, "loss": 0.7789, "step": 466 }, { "epoch": 0.041801576491079205, "grad_norm": 10.278777129471372, "learning_rate": 8.357142857142857e-07, "loss": -0.1207, "step": 468 }, { "epoch": 0.04198021570685305, "grad_norm": 19.69059685464581, "learning_rate": 8.392857142857143e-07, "loss": -1.603, "step": 470 }, { "epoch": 0.04215885492262689, "grad_norm": 4.69346544238761, "learning_rate": 8.428571428571428e-07, "loss": 1.0629, "step": 472 }, { "epoch": 0.04233749413840073, "grad_norm": 8.445859483760879, "learning_rate": 8.464285714285714e-07, "loss": -0.2013, "step": 474 }, { "epoch": 0.042516133354174576, "grad_norm": 9.610740899494015, "learning_rate": 8.499999999999999e-07, "loss": -0.1099, "step": 476 }, { "epoch": 0.04269477256994842, "grad_norm": 75.22497182929848, "learning_rate": 8.535714285714285e-07, "loss": 0.465, "step": 478 }, { "epoch": 0.04287341178572226, "grad_norm": 13.720244793279019, "learning_rate": 8.57142857142857e-07, "loss": -0.3701, "step": 480 }, { "epoch": 0.0430520510014961, "grad_norm": 16.483124397169917, "learning_rate": 8.607142857142857e-07, "loss": 1.162, "step": 482 }, { "epoch": 0.04323069021726995, "grad_norm": 26.090858747006585, "learning_rate": 8.642857142857143e-07, "loss": 0.4719, "step": 484 }, { "epoch": 0.04340932943304379, "grad_norm": 25.604845113019504, "learning_rate": 8.678571428571429e-07, "loss": -0.1329, "step": 486 }, { "epoch": 0.04358796864881763, "grad_norm": 23.47139346172605, "learning_rate": 8.714285714285714e-07, "loss": 0.2902, "step": 488 }, { "epoch": 0.043766607864591474, "grad_norm": 16.137086109198993, "learning_rate": 8.75e-07, "loss": 0.0302, "step": 490 }, { "epoch": 0.04394524708036532, "grad_norm": 16.767553440757894, "learning_rate": 8.785714285714285e-07, "loss": 0.1213, "step": 492 }, { "epoch": 0.04412388629613916, "grad_norm": 10.532076568313597, "learning_rate": 8.821428571428571e-07, "loss": -0.2142, "step": 494 }, { "epoch": 0.044302525511913, "grad_norm": 32.67530469803419, "learning_rate": 8.857142857142856e-07, "loss": -0.0568, "step": 496 }, { "epoch": 0.044481164727686845, "grad_norm": 12.453093142211447, "learning_rate": 8.892857142857142e-07, "loss": -0.2068, "step": 498 }, { "epoch": 0.04465980394346069, "grad_norm": 18.737631825701637, "learning_rate": 8.928571428571428e-07, "loss": 1.1226, "step": 500 }, { "epoch": 0.044838443159234534, "grad_norm": 93.07172343480592, "learning_rate": 8.964285714285715e-07, "loss": -0.2146, "step": 502 }, { "epoch": 0.04501708237500837, "grad_norm": 21.68795169063128, "learning_rate": 9e-07, "loss": -0.7323, "step": 504 }, { "epoch": 0.045195721590782216, "grad_norm": 22.081828439166827, "learning_rate": 9.035714285714286e-07, "loss": -0.3523, "step": 506 }, { "epoch": 0.04537436080655606, "grad_norm": 54.92092148887799, "learning_rate": 9.071428571428571e-07, "loss": -0.8824, "step": 508 }, { "epoch": 0.045553000022329905, "grad_norm": 15.338369012539884, "learning_rate": 9.107142857142857e-07, "loss": -0.7932, "step": 510 }, { "epoch": 0.04573163923810374, "grad_norm": 24.714747127695592, "learning_rate": 9.142857142857142e-07, "loss": -0.0448, "step": 512 }, { "epoch": 0.04591027845387759, "grad_norm": 12.04873805105941, "learning_rate": 9.178571428571428e-07, "loss": 0.3112, "step": 514 }, { "epoch": 0.04608891766965143, "grad_norm": 181.78615742473352, "learning_rate": 9.214285714285713e-07, "loss": -0.8394, "step": 516 }, { "epoch": 0.046267556885425276, "grad_norm": 17.28217179390549, "learning_rate": 9.25e-07, "loss": 0.0523, "step": 518 }, { "epoch": 0.046446196101199114, "grad_norm": 35.968539584527896, "learning_rate": 9.285714285714285e-07, "loss": -0.3607, "step": 520 }, { "epoch": 0.04662483531697296, "grad_norm": 71.67694519618041, "learning_rate": 9.321428571428572e-07, "loss": -0.8583, "step": 522 }, { "epoch": 0.0468034745327468, "grad_norm": 21.150806803003256, "learning_rate": 9.357142857142857e-07, "loss": -0.7567, "step": 524 }, { "epoch": 0.04698211374852065, "grad_norm": 42.88180123261061, "learning_rate": 9.392857142857143e-07, "loss": 0.4549, "step": 526 }, { "epoch": 0.047160752964294485, "grad_norm": 12.550528368523038, "learning_rate": 9.428571428571428e-07, "loss": -0.4668, "step": 528 }, { "epoch": 0.04733939218006833, "grad_norm": 19.082570600745118, "learning_rate": 9.464285714285714e-07, "loss": -0.4774, "step": 530 }, { "epoch": 0.047518031395842174, "grad_norm": 16.497704548545343, "learning_rate": 9.499999999999999e-07, "loss": -0.7104, "step": 532 }, { "epoch": 0.04769667061161602, "grad_norm": 45.45874567774335, "learning_rate": 9.535714285714286e-07, "loss": -1.323, "step": 534 }, { "epoch": 0.047875309827389856, "grad_norm": 16.347033198252387, "learning_rate": 9.571428571428572e-07, "loss": 0.1279, "step": 536 }, { "epoch": 0.0480539490431637, "grad_norm": 22.230971525569977, "learning_rate": 9.607142857142857e-07, "loss": 0.1163, "step": 538 }, { "epoch": 0.048232588258937545, "grad_norm": 26.32676183414654, "learning_rate": 9.642857142857142e-07, "loss": -0.7552, "step": 540 }, { "epoch": 0.04841122747471139, "grad_norm": 12.703048123998625, "learning_rate": 9.678571428571428e-07, "loss": 0.4976, "step": 542 }, { "epoch": 0.04858986669048523, "grad_norm": 63.310992238956814, "learning_rate": 9.714285714285715e-07, "loss": -0.9511, "step": 544 }, { "epoch": 0.04876850590625907, "grad_norm": 12.911545523774448, "learning_rate": 9.75e-07, "loss": -0.8078, "step": 546 }, { "epoch": 0.048947145122032916, "grad_norm": 30.025932801913083, "learning_rate": 9.785714285714285e-07, "loss": -0.4778, "step": 548 }, { "epoch": 0.04912578433780676, "grad_norm": 203.54285866249398, "learning_rate": 9.82142857142857e-07, "loss": 0.1356, "step": 550 }, { "epoch": 0.0493044235535806, "grad_norm": 28.625176974753142, "learning_rate": 9.857142857142857e-07, "loss": -1.3454, "step": 552 }, { "epoch": 0.04948306276935444, "grad_norm": 15.440906720196093, "learning_rate": 9.892857142857142e-07, "loss": 0.2358, "step": 554 }, { "epoch": 0.04966170198512829, "grad_norm": 49.41530692603001, "learning_rate": 9.92857142857143e-07, "loss": -1.1567, "step": 556 }, { "epoch": 0.049840341200902125, "grad_norm": 14.82807354772422, "learning_rate": 9.964285714285714e-07, "loss": 1.0641, "step": 558 }, { "epoch": 0.05001898041667597, "grad_norm": 14.133147194074468, "learning_rate": 1e-06, "loss": -0.8806, "step": 560 }, { "epoch": 0.050197619632449814, "grad_norm": 25.151746491297576, "learning_rate": 1.0035714285714285e-06, "loss": -1.0499, "step": 562 }, { "epoch": 0.05037625884822366, "grad_norm": 35.729226645965205, "learning_rate": 1.007142857142857e-06, "loss": -0.7048, "step": 564 }, { "epoch": 0.050554898063997496, "grad_norm": 13.14311145749665, "learning_rate": 1.0107142857142857e-06, "loss": 0.2104, "step": 566 }, { "epoch": 0.05073353727977134, "grad_norm": 4.494330543507203, "learning_rate": 1.0142857142857142e-06, "loss": 0.3244, "step": 568 }, { "epoch": 0.050912176495545185, "grad_norm": 6.039112717591176, "learning_rate": 1.0178571428571427e-06, "loss": 0.1549, "step": 570 }, { "epoch": 0.05109081571131903, "grad_norm": 42.77718947585413, "learning_rate": 1.0214285714285712e-06, "loss": -0.961, "step": 572 }, { "epoch": 0.05126945492709287, "grad_norm": 17.395586072188596, "learning_rate": 1.025e-06, "loss": 0.8792, "step": 574 }, { "epoch": 0.05144809414286671, "grad_norm": 18.599206460411995, "learning_rate": 1.0285714285714284e-06, "loss": -0.072, "step": 576 }, { "epoch": 0.051626733358640556, "grad_norm": 22.859272203716237, "learning_rate": 1.0321428571428572e-06, "loss": 1.6208, "step": 578 }, { "epoch": 0.0518053725744144, "grad_norm": 20.07612617725731, "learning_rate": 1.0357142857142857e-06, "loss": 1.3748, "step": 580 }, { "epoch": 0.05198401179018824, "grad_norm": 5.337992103636566, "learning_rate": 1.0392857142857144e-06, "loss": 1.3796, "step": 582 }, { "epoch": 0.05216265100596208, "grad_norm": 9.697085652597309, "learning_rate": 1.0428571428571429e-06, "loss": 0.4489, "step": 584 }, { "epoch": 0.05234129022173593, "grad_norm": 6.100031818244373, "learning_rate": 1.0464285714285714e-06, "loss": -0.1982, "step": 586 }, { "epoch": 0.05251992943750977, "grad_norm": 15.210614483601574, "learning_rate": 1.05e-06, "loss": 0.7042, "step": 588 }, { "epoch": 0.05269856865328361, "grad_norm": 26.30230906978214, "learning_rate": 1.0535714285714286e-06, "loss": -1.2676, "step": 590 }, { "epoch": 0.05287720786905745, "grad_norm": 19.8513757185723, "learning_rate": 1.0571428571428571e-06, "loss": -0.2689, "step": 592 }, { "epoch": 0.0530558470848313, "grad_norm": 81.98984967188774, "learning_rate": 1.0607142857142856e-06, "loss": -0.8201, "step": 594 }, { "epoch": 0.05323448630060514, "grad_norm": 3.726132030136058, "learning_rate": 1.0642857142857141e-06, "loss": 0.9346, "step": 596 }, { "epoch": 0.05341312551637898, "grad_norm": 192.88068081394258, "learning_rate": 1.0678571428571429e-06, "loss": 1.3043, "step": 598 }, { "epoch": 0.053591764732152825, "grad_norm": 7.598763525916748, "learning_rate": 1.0714285714285714e-06, "loss": 0.7603, "step": 600 }, { "epoch": 0.05377040394792667, "grad_norm": 48.59232564230822, "learning_rate": 1.0749999999999999e-06, "loss": -0.4389, "step": 602 }, { "epoch": 0.053949043163700514, "grad_norm": 28.483818948563258, "learning_rate": 1.0785714285714284e-06, "loss": -0.8228, "step": 604 }, { "epoch": 0.05412768237947435, "grad_norm": 11.974633426670104, "learning_rate": 1.082142857142857e-06, "loss": 0.4897, "step": 606 }, { "epoch": 0.054306321595248196, "grad_norm": 10.068366384272537, "learning_rate": 1.0857142857142856e-06, "loss": 1.0376, "step": 608 }, { "epoch": 0.05448496081102204, "grad_norm": 79.30543519066805, "learning_rate": 1.0892857142857141e-06, "loss": -1.6442, "step": 610 }, { "epoch": 0.054663600026795885, "grad_norm": 25.069470128415784, "learning_rate": 1.0928571428571426e-06, "loss": 0.509, "step": 612 }, { "epoch": 0.05484223924256972, "grad_norm": 14.772585371709757, "learning_rate": 1.0964285714285715e-06, "loss": 0.1952, "step": 614 }, { "epoch": 0.05502087845834357, "grad_norm": 9.709461937875181, "learning_rate": 1.1e-06, "loss": 0.2733, "step": 616 }, { "epoch": 0.05519951767411741, "grad_norm": 12.710500530288677, "learning_rate": 1.1035714285714286e-06, "loss": 0.6297, "step": 618 }, { "epoch": 0.055378156889891256, "grad_norm": 26.78331129939232, "learning_rate": 1.107142857142857e-06, "loss": 0.5085, "step": 620 }, { "epoch": 0.05555679610566509, "grad_norm": 13.550413028291967, "learning_rate": 1.1107142857142858e-06, "loss": -0.5542, "step": 622 }, { "epoch": 0.05573543532143894, "grad_norm": 27.54613273602916, "learning_rate": 1.1142857142857143e-06, "loss": -0.9058, "step": 624 }, { "epoch": 0.05591407453721278, "grad_norm": 19.39525544657483, "learning_rate": 1.1178571428571428e-06, "loss": -0.9789, "step": 626 }, { "epoch": 0.05609271375298663, "grad_norm": 13.882024962714542, "learning_rate": 1.1214285714285713e-06, "loss": 0.7137, "step": 628 }, { "epoch": 0.056271352968760464, "grad_norm": 160.4291790304214, "learning_rate": 1.125e-06, "loss": -0.0324, "step": 630 }, { "epoch": 0.05644999218453431, "grad_norm": 28.356495205023133, "learning_rate": 1.1285714285714285e-06, "loss": -0.7846, "step": 632 }, { "epoch": 0.05662863140030815, "grad_norm": 30.345350072063656, "learning_rate": 1.132142857142857e-06, "loss": -1.0468, "step": 634 }, { "epoch": 0.056807270616082, "grad_norm": 18.406115701441294, "learning_rate": 1.1357142857142855e-06, "loss": 0.1442, "step": 636 }, { "epoch": 0.056985909831855835, "grad_norm": 3.846132981903999, "learning_rate": 1.1392857142857143e-06, "loss": -0.5766, "step": 638 }, { "epoch": 0.05716454904762968, "grad_norm": 10.605748586317322, "learning_rate": 1.1428571428571428e-06, "loss": 0.9585, "step": 640 }, { "epoch": 0.057343188263403524, "grad_norm": 20.320166822334407, "learning_rate": 1.1464285714285713e-06, "loss": -0.3259, "step": 642 }, { "epoch": 0.05752182747917737, "grad_norm": 27.930112540472113, "learning_rate": 1.1499999999999998e-06, "loss": 1.1216, "step": 644 }, { "epoch": 0.05770046669495121, "grad_norm": 17.680148372804315, "learning_rate": 1.1535714285714285e-06, "loss": 0.2852, "step": 646 }, { "epoch": 0.05787910591072505, "grad_norm": 26.28619156872102, "learning_rate": 1.1571428571428572e-06, "loss": 0.4683, "step": 648 }, { "epoch": 0.058057745126498896, "grad_norm": 36.93556665308351, "learning_rate": 1.1607142857142857e-06, "loss": -0.8555, "step": 650 }, { "epoch": 0.05823638434227274, "grad_norm": 26.42977525480554, "learning_rate": 1.1642857142857142e-06, "loss": 0.009, "step": 652 }, { "epoch": 0.05841502355804658, "grad_norm": 16.665730767993644, "learning_rate": 1.167857142857143e-06, "loss": 0.0003, "step": 654 }, { "epoch": 0.05859366277382042, "grad_norm": 22.31030331010904, "learning_rate": 1.1714285714285715e-06, "loss": -0.1901, "step": 656 }, { "epoch": 0.05877230198959427, "grad_norm": 25.212416674921183, "learning_rate": 1.175e-06, "loss": -0.2648, "step": 658 }, { "epoch": 0.05895094120536811, "grad_norm": 4.287534458922773, "learning_rate": 1.1785714285714285e-06, "loss": 0.4502, "step": 660 }, { "epoch": 0.05912958042114195, "grad_norm": 28.398564305081404, "learning_rate": 1.1821428571428572e-06, "loss": -0.1649, "step": 662 }, { "epoch": 0.05930821963691579, "grad_norm": 20.84927262974546, "learning_rate": 1.1857142857142857e-06, "loss": -1.02, "step": 664 }, { "epoch": 0.05948685885268964, "grad_norm": 19.207704524120977, "learning_rate": 1.1892857142857142e-06, "loss": -1.0815, "step": 666 }, { "epoch": 0.05966549806846348, "grad_norm": 7.172676257749706, "learning_rate": 1.1928571428571427e-06, "loss": 0.149, "step": 668 }, { "epoch": 0.05984413728423732, "grad_norm": 185.9813708748809, "learning_rate": 1.1964285714285714e-06, "loss": -0.419, "step": 670 }, { "epoch": 0.060022776500011164, "grad_norm": 28.82018543276407, "learning_rate": 1.2e-06, "loss": 0.4264, "step": 672 }, { "epoch": 0.06020141571578501, "grad_norm": 6.572067697614008, "learning_rate": 1.2035714285714285e-06, "loss": 0.4796, "step": 674 }, { "epoch": 0.06038005493155885, "grad_norm": 43.55639080189611, "learning_rate": 1.207142857142857e-06, "loss": 0.1217, "step": 676 }, { "epoch": 0.06055869414733269, "grad_norm": 16.887239665658523, "learning_rate": 1.2107142857142857e-06, "loss": -0.4538, "step": 678 }, { "epoch": 0.060737333363106535, "grad_norm": 5.874705530456831, "learning_rate": 1.2142857142857142e-06, "loss": 0.9072, "step": 680 }, { "epoch": 0.06091597257888038, "grad_norm": 33.32882822746136, "learning_rate": 1.2178571428571427e-06, "loss": 1.1749, "step": 682 }, { "epoch": 0.061094611794654224, "grad_norm": 6.321857126901793, "learning_rate": 1.2214285714285714e-06, "loss": 1.038, "step": 684 }, { "epoch": 0.06127325101042806, "grad_norm": 12.893650213198452, "learning_rate": 1.2250000000000001e-06, "loss": 0.0435, "step": 686 }, { "epoch": 0.061451890226201906, "grad_norm": 13.523471349611016, "learning_rate": 1.2285714285714286e-06, "loss": -0.0736, "step": 688 }, { "epoch": 0.06163052944197575, "grad_norm": 77.62786201446939, "learning_rate": 1.2321428571428571e-06, "loss": 0.5405, "step": 690 }, { "epoch": 0.061809168657749596, "grad_norm": 16.892636725422985, "learning_rate": 1.2357142857142857e-06, "loss": -0.0992, "step": 692 }, { "epoch": 0.06198780787352343, "grad_norm": 9.868850918157808, "learning_rate": 1.2392857142857144e-06, "loss": -0.0542, "step": 694 }, { "epoch": 0.06216644708929728, "grad_norm": 28.32769300380758, "learning_rate": 1.2428571428571429e-06, "loss": -0.0393, "step": 696 }, { "epoch": 0.06234508630507112, "grad_norm": 23.520892007741622, "learning_rate": 1.2464285714285714e-06, "loss": 0.1555, "step": 698 }, { "epoch": 0.06252372552084497, "grad_norm": 16.573767172878387, "learning_rate": 1.2499999999999999e-06, "loss": 0.264, "step": 700 }, { "epoch": 0.0627023647366188, "grad_norm": 28.373090906661126, "learning_rate": 1.2535714285714284e-06, "loss": 0.398, "step": 702 }, { "epoch": 0.06288100395239266, "grad_norm": 26.67872321646854, "learning_rate": 1.2571428571428571e-06, "loss": -0.7336, "step": 704 }, { "epoch": 0.0630596431681665, "grad_norm": 16.033978622256686, "learning_rate": 1.2607142857142856e-06, "loss": -0.0118, "step": 706 }, { "epoch": 0.06323828238394033, "grad_norm": 8.13487183866734, "learning_rate": 1.2642857142857141e-06, "loss": -0.794, "step": 708 }, { "epoch": 0.06341692159971418, "grad_norm": 10.308351734322788, "learning_rate": 1.2678571428571426e-06, "loss": -0.8124, "step": 710 }, { "epoch": 0.06359556081548802, "grad_norm": 12.790953443361225, "learning_rate": 1.2714285714285714e-06, "loss": -0.3245, "step": 712 }, { "epoch": 0.06377420003126186, "grad_norm": 25.966363129251587, "learning_rate": 1.2749999999999999e-06, "loss": -0.054, "step": 714 }, { "epoch": 0.06395283924703571, "grad_norm": 19.12167189817984, "learning_rate": 1.2785714285714284e-06, "loss": -1.1811, "step": 716 }, { "epoch": 0.06413147846280955, "grad_norm": 11.027398752851669, "learning_rate": 1.282142857142857e-06, "loss": -0.5512, "step": 718 }, { "epoch": 0.0643101176785834, "grad_norm": 34.04145708957315, "learning_rate": 1.2857142857142858e-06, "loss": -0.3744, "step": 720 }, { "epoch": 0.06448875689435724, "grad_norm": 22.928328590482234, "learning_rate": 1.2892857142857143e-06, "loss": -0.279, "step": 722 }, { "epoch": 0.06466739611013107, "grad_norm": 5.564873521160975, "learning_rate": 1.2928571428571428e-06, "loss": -0.4375, "step": 724 }, { "epoch": 0.06484603532590492, "grad_norm": 58.50927660823324, "learning_rate": 1.2964285714285713e-06, "loss": -0.8552, "step": 726 }, { "epoch": 0.06502467454167876, "grad_norm": 25.34670961834223, "learning_rate": 1.3e-06, "loss": 0.8063, "step": 728 }, { "epoch": 0.0652033137574526, "grad_norm": 20.102581924937606, "learning_rate": 1.3035714285714286e-06, "loss": 0.2343, "step": 730 }, { "epoch": 0.06538195297322645, "grad_norm": 40.47630238408024, "learning_rate": 1.307142857142857e-06, "loss": -0.2963, "step": 732 }, { "epoch": 0.06556059218900029, "grad_norm": 79.1622703000773, "learning_rate": 1.3107142857142856e-06, "loss": -0.2152, "step": 734 }, { "epoch": 0.06573923140477414, "grad_norm": 6.866541427408445, "learning_rate": 1.3142857142857143e-06, "loss": -0.5232, "step": 736 }, { "epoch": 0.06591787062054798, "grad_norm": 34.65128846078003, "learning_rate": 1.3178571428571428e-06, "loss": -1.5085, "step": 738 }, { "epoch": 0.06609650983632182, "grad_norm": 26.395919530698183, "learning_rate": 1.3214285714285713e-06, "loss": -0.6487, "step": 740 }, { "epoch": 0.06627514905209567, "grad_norm": 13.365762818584514, "learning_rate": 1.3249999999999998e-06, "loss": -0.1043, "step": 742 }, { "epoch": 0.0664537882678695, "grad_norm": 29.89133967389217, "learning_rate": 1.3285714285714285e-06, "loss": 0.6051, "step": 744 }, { "epoch": 0.06663242748364334, "grad_norm": 47.81888999375012, "learning_rate": 1.332142857142857e-06, "loss": -1.0832, "step": 746 }, { "epoch": 0.0668110666994172, "grad_norm": 23.99589121933495, "learning_rate": 1.3357142857142855e-06, "loss": -0.9564, "step": 748 }, { "epoch": 0.06698970591519103, "grad_norm": 13.343145066425377, "learning_rate": 1.339285714285714e-06, "loss": -0.8897, "step": 750 }, { "epoch": 0.06716834513096487, "grad_norm": 18.202310201721303, "learning_rate": 1.3428571428571428e-06, "loss": 0.0583, "step": 752 }, { "epoch": 0.06734698434673872, "grad_norm": 33.125698326955124, "learning_rate": 1.3464285714285715e-06, "loss": -0.3985, "step": 754 }, { "epoch": 0.06752562356251256, "grad_norm": 38.01427312125671, "learning_rate": 1.35e-06, "loss": 0.17, "step": 756 }, { "epoch": 0.06770426277828641, "grad_norm": 7.765097156906743, "learning_rate": 1.3535714285714285e-06, "loss": 0.0211, "step": 758 }, { "epoch": 0.06788290199406025, "grad_norm": 28.161670971624936, "learning_rate": 1.3571428571428572e-06, "loss": -1.6169, "step": 760 }, { "epoch": 0.06806154120983408, "grad_norm": 11.727811600695428, "learning_rate": 1.3607142857142857e-06, "loss": 0.8844, "step": 762 }, { "epoch": 0.06824018042560794, "grad_norm": 80.37293446023979, "learning_rate": 1.3642857142857142e-06, "loss": 0.0074, "step": 764 }, { "epoch": 0.06841881964138177, "grad_norm": 17.13454377367636, "learning_rate": 1.3678571428571427e-06, "loss": 0.2405, "step": 766 }, { "epoch": 0.06859745885715561, "grad_norm": 20.92993173033128, "learning_rate": 1.3714285714285715e-06, "loss": 0.0457, "step": 768 }, { "epoch": 0.06877609807292946, "grad_norm": 26.541891132006423, "learning_rate": 1.375e-06, "loss": 1.0034, "step": 770 }, { "epoch": 0.0689547372887033, "grad_norm": 23.669871511751232, "learning_rate": 1.3785714285714285e-06, "loss": 0.2469, "step": 772 }, { "epoch": 0.06913337650447715, "grad_norm": 16.400537509948702, "learning_rate": 1.382142857142857e-06, "loss": -1.1068, "step": 774 }, { "epoch": 0.06931201572025099, "grad_norm": 11.882264816232887, "learning_rate": 1.3857142857142857e-06, "loss": 0.7452, "step": 776 }, { "epoch": 0.06949065493602483, "grad_norm": 16.64124026884856, "learning_rate": 1.3892857142857142e-06, "loss": 0.0753, "step": 778 }, { "epoch": 0.06966929415179868, "grad_norm": 14.206432465213492, "learning_rate": 1.3928571428571427e-06, "loss": -0.4745, "step": 780 }, { "epoch": 0.06984793336757252, "grad_norm": 14.483116580817635, "learning_rate": 1.3964285714285712e-06, "loss": -0.1286, "step": 782 }, { "epoch": 0.07002657258334635, "grad_norm": 17.426937500168112, "learning_rate": 1.4e-06, "loss": -0.6039, "step": 784 }, { "epoch": 0.0702052117991202, "grad_norm": 10.04071340085676, "learning_rate": 1.4035714285714284e-06, "loss": -0.1785, "step": 786 }, { "epoch": 0.07038385101489404, "grad_norm": 41.73004148457904, "learning_rate": 1.4071428571428572e-06, "loss": 0.3893, "step": 788 }, { "epoch": 0.07056249023066789, "grad_norm": 12.710154185106477, "learning_rate": 1.4107142857142857e-06, "loss": 0.7565, "step": 790 }, { "epoch": 0.07074112944644173, "grad_norm": 67.90792537049707, "learning_rate": 1.4142857142857144e-06, "loss": -0.2369, "step": 792 }, { "epoch": 0.07091976866221557, "grad_norm": 24.989952815065873, "learning_rate": 1.417857142857143e-06, "loss": -0.5019, "step": 794 }, { "epoch": 0.07109840787798942, "grad_norm": 28.911163011537386, "learning_rate": 1.4214285714285714e-06, "loss": -0.0186, "step": 796 }, { "epoch": 0.07127704709376326, "grad_norm": 18.219430900923214, "learning_rate": 1.425e-06, "loss": 0.3836, "step": 798 }, { "epoch": 0.0714556863095371, "grad_norm": 40.73357036748451, "learning_rate": 1.4285714285714286e-06, "loss": 0.1142, "step": 800 }, { "epoch": 0.07163432552531095, "grad_norm": 5.1431010582014665, "learning_rate": 1.4321428571428571e-06, "loss": 0.7393, "step": 802 }, { "epoch": 0.07181296474108478, "grad_norm": 6.230430177192511, "learning_rate": 1.4357142857142856e-06, "loss": 0.5837, "step": 804 }, { "epoch": 0.07199160395685864, "grad_norm": 11.663561453153337, "learning_rate": 1.4392857142857141e-06, "loss": -1.6362, "step": 806 }, { "epoch": 0.07217024317263247, "grad_norm": 13.72119452523062, "learning_rate": 1.4428571428571429e-06, "loss": 0.5231, "step": 808 }, { "epoch": 0.07234888238840631, "grad_norm": 28.777438927238425, "learning_rate": 1.4464285714285714e-06, "loss": 0.3919, "step": 810 }, { "epoch": 0.07252752160418016, "grad_norm": 18.901253536575457, "learning_rate": 1.4499999999999999e-06, "loss": 0.2988, "step": 812 }, { "epoch": 0.072706160819954, "grad_norm": 10.740076852306832, "learning_rate": 1.4535714285714284e-06, "loss": -0.5567, "step": 814 }, { "epoch": 0.07288480003572784, "grad_norm": 11.195897877011069, "learning_rate": 1.4571428571428571e-06, "loss": 0.1955, "step": 816 }, { "epoch": 0.07306343925150169, "grad_norm": 41.13951082093633, "learning_rate": 1.4607142857142856e-06, "loss": -1.1244, "step": 818 }, { "epoch": 0.07324207846727553, "grad_norm": 32.554905919293674, "learning_rate": 1.4642857142857141e-06, "loss": -0.1244, "step": 820 }, { "epoch": 0.07342071768304938, "grad_norm": 37.68471758489275, "learning_rate": 1.4678571428571426e-06, "loss": 0.2861, "step": 822 }, { "epoch": 0.07359935689882321, "grad_norm": 23.065097404123808, "learning_rate": 1.4714285714285716e-06, "loss": -0.6053, "step": 824 }, { "epoch": 0.07377799611459705, "grad_norm": 11.660423206743037, "learning_rate": 1.475e-06, "loss": -0.0726, "step": 826 }, { "epoch": 0.0739566353303709, "grad_norm": 10.78627281944274, "learning_rate": 1.4785714285714286e-06, "loss": 0.1708, "step": 828 }, { "epoch": 0.07413527454614474, "grad_norm": 11.536722347548027, "learning_rate": 1.482142857142857e-06, "loss": -0.5214, "step": 830 }, { "epoch": 0.07431391376191858, "grad_norm": 9.01127850300883, "learning_rate": 1.4857142857142858e-06, "loss": 0.1532, "step": 832 }, { "epoch": 0.07449255297769243, "grad_norm": 4.663696070195425, "learning_rate": 1.4892857142857143e-06, "loss": 0.1069, "step": 834 }, { "epoch": 0.07467119219346627, "grad_norm": 43.96274445926488, "learning_rate": 1.4928571428571428e-06, "loss": -0.5585, "step": 836 }, { "epoch": 0.07484983140924012, "grad_norm": 16.186288677831907, "learning_rate": 1.4964285714285713e-06, "loss": -1.3232, "step": 838 }, { "epoch": 0.07502847062501396, "grad_norm": 20.89875948934193, "learning_rate": 1.5e-06, "loss": -1.3659, "step": 840 }, { "epoch": 0.0752071098407878, "grad_norm": 23.164967572959174, "learning_rate": 1.5035714285714285e-06, "loss": -0.7054, "step": 842 }, { "epoch": 0.07538574905656165, "grad_norm": 6.033203441141947, "learning_rate": 1.507142857142857e-06, "loss": 0.7126, "step": 844 }, { "epoch": 0.07556438827233548, "grad_norm": 9.406802747333767, "learning_rate": 1.5107142857142856e-06, "loss": -0.5697, "step": 846 }, { "epoch": 0.07574302748810932, "grad_norm": 3.4623882619958497, "learning_rate": 1.514285714285714e-06, "loss": 1.0136, "step": 848 }, { "epoch": 0.07592166670388317, "grad_norm": 26.55448868158875, "learning_rate": 1.5178571428571428e-06, "loss": -0.964, "step": 850 }, { "epoch": 0.07610030591965701, "grad_norm": 19.885417007704728, "learning_rate": 1.5214285714285713e-06, "loss": -0.4758, "step": 852 }, { "epoch": 0.07627894513543086, "grad_norm": 23.785059700185876, "learning_rate": 1.5249999999999998e-06, "loss": -1.2146, "step": 854 }, { "epoch": 0.0764575843512047, "grad_norm": 30.209650541504843, "learning_rate": 1.5285714285714283e-06, "loss": -0.6953, "step": 856 }, { "epoch": 0.07663622356697854, "grad_norm": 189.65459970035, "learning_rate": 1.5321428571428572e-06, "loss": -1.1653, "step": 858 }, { "epoch": 0.07681486278275239, "grad_norm": 18.363568337603773, "learning_rate": 1.5357142857142857e-06, "loss": -0.223, "step": 860 }, { "epoch": 0.07699350199852623, "grad_norm": 5.948879219730996, "learning_rate": 1.5392857142857143e-06, "loss": 0.4675, "step": 862 }, { "epoch": 0.07717214121430006, "grad_norm": 6.7237674874175655, "learning_rate": 1.5428571428571428e-06, "loss": 0.538, "step": 864 }, { "epoch": 0.07735078043007391, "grad_norm": 43.45942848022883, "learning_rate": 1.5464285714285715e-06, "loss": 0.378, "step": 866 }, { "epoch": 0.07752941964584775, "grad_norm": 14.333275787474568, "learning_rate": 1.55e-06, "loss": -0.244, "step": 868 }, { "epoch": 0.0777080588616216, "grad_norm": 11.770683103069704, "learning_rate": 1.5535714285714285e-06, "loss": -0.0272, "step": 870 }, { "epoch": 0.07788669807739544, "grad_norm": 39.045913666137174, "learning_rate": 1.557142857142857e-06, "loss": -0.3321, "step": 872 }, { "epoch": 0.07806533729316928, "grad_norm": 33.26933288694241, "learning_rate": 1.5607142857142857e-06, "loss": -0.1164, "step": 874 }, { "epoch": 0.07824397650894313, "grad_norm": 13.62895551187715, "learning_rate": 1.5642857142857142e-06, "loss": 0.0691, "step": 876 }, { "epoch": 0.07842261572471697, "grad_norm": 96.52520011796142, "learning_rate": 1.5678571428571427e-06, "loss": 0.0992, "step": 878 }, { "epoch": 0.0786012549404908, "grad_norm": 40.72059665804886, "learning_rate": 1.5714285714285712e-06, "loss": 0.5654, "step": 880 }, { "epoch": 0.07877989415626466, "grad_norm": 17.74968020728648, "learning_rate": 1.575e-06, "loss": 0.0231, "step": 882 }, { "epoch": 0.0789585333720385, "grad_norm": 43.022951609976445, "learning_rate": 1.5785714285714285e-06, "loss": 0.1888, "step": 884 }, { "epoch": 0.07913717258781235, "grad_norm": 18.06170673674392, "learning_rate": 1.582142857142857e-06, "loss": -1.1252, "step": 886 }, { "epoch": 0.07931581180358618, "grad_norm": 14.735344799241687, "learning_rate": 1.5857142857142855e-06, "loss": -0.1549, "step": 888 }, { "epoch": 0.07949445101936002, "grad_norm": 27.927421236727064, "learning_rate": 1.5892857142857142e-06, "loss": -1.4199, "step": 890 }, { "epoch": 0.07967309023513387, "grad_norm": 15.516495325854535, "learning_rate": 1.5928571428571427e-06, "loss": 0.1189, "step": 892 }, { "epoch": 0.07985172945090771, "grad_norm": 11.319281300082203, "learning_rate": 1.5964285714285714e-06, "loss": 0.2553, "step": 894 }, { "epoch": 0.08003036866668155, "grad_norm": 87.62757719551676, "learning_rate": 1.6e-06, "loss": 0.2503, "step": 896 }, { "epoch": 0.0802090078824554, "grad_norm": 29.828387939189426, "learning_rate": 1.6035714285714286e-06, "loss": -0.5792, "step": 898 }, { "epoch": 0.08038764709822924, "grad_norm": 19.15112275898433, "learning_rate": 1.6071428571428572e-06, "loss": -0.7156, "step": 900 }, { "epoch": 0.08056628631400309, "grad_norm": 24.730058747687245, "learning_rate": 1.6107142857142857e-06, "loss": -1.3794, "step": 902 }, { "epoch": 0.08074492552977693, "grad_norm": 29.87491215010619, "learning_rate": 1.6142857142857142e-06, "loss": -0.4198, "step": 904 }, { "epoch": 0.08092356474555076, "grad_norm": 20.454714743173746, "learning_rate": 1.6178571428571429e-06, "loss": 0.3896, "step": 906 }, { "epoch": 0.08110220396132461, "grad_norm": 13.362975232172813, "learning_rate": 1.6214285714285714e-06, "loss": -0.6634, "step": 908 }, { "epoch": 0.08128084317709845, "grad_norm": 54.5929383346933, "learning_rate": 1.625e-06, "loss": 0.4989, "step": 910 }, { "epoch": 0.08145948239287229, "grad_norm": 35.6447433640949, "learning_rate": 1.6285714285714284e-06, "loss": -0.543, "step": 912 }, { "epoch": 0.08163812160864614, "grad_norm": 20.51734525465778, "learning_rate": 1.6321428571428571e-06, "loss": -0.6763, "step": 914 }, { "epoch": 0.08181676082441998, "grad_norm": 11.445264505018823, "learning_rate": 1.6357142857142856e-06, "loss": 0.359, "step": 916 }, { "epoch": 0.08199540004019383, "grad_norm": 13.531330064356085, "learning_rate": 1.6392857142857141e-06, "loss": 0.0963, "step": 918 }, { "epoch": 0.08217403925596767, "grad_norm": 12.885918885263019, "learning_rate": 1.6428571428571426e-06, "loss": 0.343, "step": 920 }, { "epoch": 0.0823526784717415, "grad_norm": 16.100549420761723, "learning_rate": 1.6464285714285714e-06, "loss": -0.3855, "step": 922 }, { "epoch": 0.08253131768751536, "grad_norm": 30.432542589101885, "learning_rate": 1.6499999999999999e-06, "loss": -0.427, "step": 924 }, { "epoch": 0.0827099569032892, "grad_norm": 18.98041287743886, "learning_rate": 1.6535714285714284e-06, "loss": -1.2609, "step": 926 }, { "epoch": 0.08288859611906303, "grad_norm": 6.888863197743628, "learning_rate": 1.657142857142857e-06, "loss": 0.6509, "step": 928 }, { "epoch": 0.08306723533483688, "grad_norm": 17.884259579939457, "learning_rate": 1.6607142857142858e-06, "loss": -0.3371, "step": 930 }, { "epoch": 0.08324587455061072, "grad_norm": 25.36372252667491, "learning_rate": 1.6642857142857143e-06, "loss": -0.2142, "step": 932 }, { "epoch": 0.08342451376638456, "grad_norm": 8.796159461074387, "learning_rate": 1.6678571428571428e-06, "loss": 0.2954, "step": 934 }, { "epoch": 0.08360315298215841, "grad_norm": 14.836856952156342, "learning_rate": 1.6714285714285713e-06, "loss": -0.9315, "step": 936 }, { "epoch": 0.08378179219793225, "grad_norm": 4.9505094246797245, "learning_rate": 1.675e-06, "loss": 0.0992, "step": 938 }, { "epoch": 0.0839604314137061, "grad_norm": 9.526204559173745, "learning_rate": 1.6785714285714286e-06, "loss": 0.0102, "step": 940 }, { "epoch": 0.08413907062947994, "grad_norm": 37.4383493877977, "learning_rate": 1.682142857142857e-06, "loss": -2.1607, "step": 942 }, { "epoch": 0.08431770984525377, "grad_norm": 10.555464890297015, "learning_rate": 1.6857142857142856e-06, "loss": 0.5415, "step": 944 }, { "epoch": 0.08449634906102763, "grad_norm": 5.648277101046396, "learning_rate": 1.6892857142857143e-06, "loss": 0.6472, "step": 946 }, { "epoch": 0.08467498827680146, "grad_norm": 9.215845924334577, "learning_rate": 1.6928571428571428e-06, "loss": 0.7277, "step": 948 }, { "epoch": 0.0848536274925753, "grad_norm": 5.321502059619279, "learning_rate": 1.6964285714285713e-06, "loss": 0.5052, "step": 950 }, { "epoch": 0.08503226670834915, "grad_norm": 8.79325247348784, "learning_rate": 1.6999999999999998e-06, "loss": -0.0165, "step": 952 }, { "epoch": 0.08521090592412299, "grad_norm": 15.437520756881288, "learning_rate": 1.7035714285714285e-06, "loss": -0.8723, "step": 954 }, { "epoch": 0.08538954513989684, "grad_norm": 23.68072493397188, "learning_rate": 1.707142857142857e-06, "loss": -1.023, "step": 956 }, { "epoch": 0.08556818435567068, "grad_norm": 17.021667136318033, "learning_rate": 1.7107142857142855e-06, "loss": -0.0342, "step": 958 }, { "epoch": 0.08574682357144452, "grad_norm": 7.075506875580485, "learning_rate": 1.714285714285714e-06, "loss": 0.0347, "step": 960 }, { "epoch": 0.08592546278721837, "grad_norm": 27.50197746796469, "learning_rate": 1.7178571428571428e-06, "loss": -2.452, "step": 962 }, { "epoch": 0.0861041020029922, "grad_norm": 8.106721476988632, "learning_rate": 1.7214285714285715e-06, "loss": 1.1155, "step": 964 }, { "epoch": 0.08628274121876604, "grad_norm": 15.197129952364772, "learning_rate": 1.725e-06, "loss": 0.0584, "step": 966 }, { "epoch": 0.0864613804345399, "grad_norm": 12.266752660145851, "learning_rate": 1.7285714285714285e-06, "loss": -0.8534, "step": 968 }, { "epoch": 0.08664001965031373, "grad_norm": 15.948744692777254, "learning_rate": 1.7321428571428572e-06, "loss": -1.5197, "step": 970 }, { "epoch": 0.08681865886608758, "grad_norm": 40.623288725976664, "learning_rate": 1.7357142857142857e-06, "loss": -1.1423, "step": 972 }, { "epoch": 0.08699729808186142, "grad_norm": 10.847348893962236, "learning_rate": 1.7392857142857142e-06, "loss": 0.2272, "step": 974 }, { "epoch": 0.08717593729763526, "grad_norm": 19.118563497680565, "learning_rate": 1.7428571428571427e-06, "loss": -0.1794, "step": 976 }, { "epoch": 0.08735457651340911, "grad_norm": 6.873106817193855, "learning_rate": 1.7464285714285715e-06, "loss": -0.3481, "step": 978 }, { "epoch": 0.08753321572918295, "grad_norm": 13.483159051310256, "learning_rate": 1.75e-06, "loss": -0.7515, "step": 980 }, { "epoch": 0.08771185494495679, "grad_norm": 28.414390077504084, "learning_rate": 1.7535714285714285e-06, "loss": 0.6794, "step": 982 }, { "epoch": 0.08789049416073064, "grad_norm": 24.452428604300994, "learning_rate": 1.757142857142857e-06, "loss": -0.2964, "step": 984 }, { "epoch": 0.08806913337650447, "grad_norm": 18.39364238866967, "learning_rate": 1.7607142857142855e-06, "loss": -0.0698, "step": 986 }, { "epoch": 0.08824777259227833, "grad_norm": 9.492709524611628, "learning_rate": 1.7642857142857142e-06, "loss": 0.5731, "step": 988 }, { "epoch": 0.08842641180805216, "grad_norm": 12.814697672611308, "learning_rate": 1.7678571428571427e-06, "loss": -1.3821, "step": 990 }, { "epoch": 0.088605051023826, "grad_norm": 17.226504039016447, "learning_rate": 1.7714285714285712e-06, "loss": 0.3136, "step": 992 }, { "epoch": 0.08878369023959985, "grad_norm": 21.58233420319042, "learning_rate": 1.7749999999999997e-06, "loss": -0.3956, "step": 994 }, { "epoch": 0.08896232945537369, "grad_norm": 12.208182445717695, "learning_rate": 1.7785714285714285e-06, "loss": -0.9194, "step": 996 }, { "epoch": 0.08914096867114753, "grad_norm": 13.99372505161912, "learning_rate": 1.7821428571428572e-06, "loss": -0.0336, "step": 998 }, { "epoch": 0.08931960788692138, "grad_norm": 7.697213339948659, "learning_rate": 1.7857142857142857e-06, "loss": 0.969, "step": 1000 }, { "epoch": 0.08949824710269522, "grad_norm": 12.908174606801982, "learning_rate": 1.7892857142857142e-06, "loss": 0.401, "step": 1002 }, { "epoch": 0.08967688631846907, "grad_norm": 11.880782155192504, "learning_rate": 1.792857142857143e-06, "loss": -0.2337, "step": 1004 }, { "epoch": 0.0898555255342429, "grad_norm": 11.00961646303294, "learning_rate": 1.7964285714285714e-06, "loss": 0.4857, "step": 1006 }, { "epoch": 0.09003416475001674, "grad_norm": 14.199910252247195, "learning_rate": 1.8e-06, "loss": 0.424, "step": 1008 }, { "epoch": 0.0902128039657906, "grad_norm": 24.7734375, "learning_rate": 1.8035714285714284e-06, "loss": -0.6131, "step": 1010 }, { "epoch": 0.09039144318156443, "grad_norm": 12.820834988972175, "learning_rate": 1.8071428571428571e-06, "loss": 0.4289, "step": 1012 }, { "epoch": 0.09057008239733827, "grad_norm": 10.783304831355622, "learning_rate": 1.8107142857142857e-06, "loss": 0.3756, "step": 1014 }, { "epoch": 0.09074872161311212, "grad_norm": 23.80765129646153, "learning_rate": 1.8142857142857142e-06, "loss": -0.2521, "step": 1016 }, { "epoch": 0.09092736082888596, "grad_norm": 15.784848367604969, "learning_rate": 1.8178571428571427e-06, "loss": -0.4667, "step": 1018 }, { "epoch": 0.09110600004465981, "grad_norm": 260.16093276566335, "learning_rate": 1.8214285714285714e-06, "loss": -1.4882, "step": 1020 }, { "epoch": 0.09128463926043365, "grad_norm": 35.93558928344487, "learning_rate": 1.8249999999999999e-06, "loss": 0.2476, "step": 1022 }, { "epoch": 0.09146327847620749, "grad_norm": 25.611398352802556, "learning_rate": 1.8285714285714284e-06, "loss": -0.1487, "step": 1024 }, { "epoch": 0.09164191769198134, "grad_norm": 11.158503798953065, "learning_rate": 1.832142857142857e-06, "loss": -0.9396, "step": 1026 }, { "epoch": 0.09182055690775517, "grad_norm": 33.63742618616483, "learning_rate": 1.8357142857142856e-06, "loss": 0.2621, "step": 1028 }, { "epoch": 0.09199919612352901, "grad_norm": 20.352049838215006, "learning_rate": 1.8392857142857141e-06, "loss": -0.1247, "step": 1030 }, { "epoch": 0.09217783533930286, "grad_norm": 30.398186519138665, "learning_rate": 1.8428571428571426e-06, "loss": 0.0106, "step": 1032 }, { "epoch": 0.0923564745550767, "grad_norm": 29.040936220488828, "learning_rate": 1.8464285714285714e-06, "loss": -0.3131, "step": 1034 }, { "epoch": 0.09253511377085055, "grad_norm": 14.78302752591739, "learning_rate": 1.85e-06, "loss": -0.0904, "step": 1036 }, { "epoch": 0.09271375298662439, "grad_norm": 19.59763094539427, "learning_rate": 1.8535714285714286e-06, "loss": 0.3336, "step": 1038 }, { "epoch": 0.09289239220239823, "grad_norm": 56.21695209743232, "learning_rate": 1.857142857142857e-06, "loss": -1.603, "step": 1040 }, { "epoch": 0.09307103141817208, "grad_norm": 5.214631974281336, "learning_rate": 1.8607142857142856e-06, "loss": 0.1683, "step": 1042 }, { "epoch": 0.09324967063394592, "grad_norm": 7.483284952507149, "learning_rate": 1.8642857142857143e-06, "loss": -1.0009, "step": 1044 }, { "epoch": 0.09342830984971975, "grad_norm": 12.84734388872335, "learning_rate": 1.8678571428571428e-06, "loss": -0.7672, "step": 1046 }, { "epoch": 0.0936069490654936, "grad_norm": 27.372242967257794, "learning_rate": 1.8714285714285713e-06, "loss": 0.0122, "step": 1048 }, { "epoch": 0.09378558828126744, "grad_norm": 6.403496276010846, "learning_rate": 1.8749999999999998e-06, "loss": -0.119, "step": 1050 }, { "epoch": 0.0939642274970413, "grad_norm": 16.344131844547544, "learning_rate": 1.8785714285714286e-06, "loss": -0.8833, "step": 1052 }, { "epoch": 0.09414286671281513, "grad_norm": 11.167201888129986, "learning_rate": 1.882142857142857e-06, "loss": -1.5198, "step": 1054 }, { "epoch": 0.09432150592858897, "grad_norm": 30.783907919827303, "learning_rate": 1.8857142857142856e-06, "loss": -0.8854, "step": 1056 }, { "epoch": 0.09450014514436282, "grad_norm": 29.30791979308078, "learning_rate": 1.889285714285714e-06, "loss": 0.6242, "step": 1058 }, { "epoch": 0.09467878436013666, "grad_norm": 12.541376222476979, "learning_rate": 1.8928571428571428e-06, "loss": 0.0073, "step": 1060 }, { "epoch": 0.0948574235759105, "grad_norm": 11.862900522784322, "learning_rate": 1.8964285714285713e-06, "loss": -0.7177, "step": 1062 }, { "epoch": 0.09503606279168435, "grad_norm": 27.368503304794203, "learning_rate": 1.8999999999999998e-06, "loss": 0.0308, "step": 1064 }, { "epoch": 0.09521470200745819, "grad_norm": 5.380947637093724, "learning_rate": 1.9035714285714283e-06, "loss": 0.6898, "step": 1066 }, { "epoch": 0.09539334122323204, "grad_norm": 50.14668668563134, "learning_rate": 1.9071428571428572e-06, "loss": -0.9894, "step": 1068 }, { "epoch": 0.09557198043900587, "grad_norm": 15.909888190566523, "learning_rate": 1.9107142857142858e-06, "loss": -0.1899, "step": 1070 }, { "epoch": 0.09575061965477971, "grad_norm": 7.71205126478639, "learning_rate": 1.9142857142857145e-06, "loss": 0.655, "step": 1072 }, { "epoch": 0.09592925887055356, "grad_norm": 6.38669426122981, "learning_rate": 1.9178571428571428e-06, "loss": 0.5175, "step": 1074 }, { "epoch": 0.0961078980863274, "grad_norm": 7.382254443194335, "learning_rate": 1.9214285714285715e-06, "loss": -0.3562, "step": 1076 }, { "epoch": 0.09628653730210124, "grad_norm": 5.738793772848538, "learning_rate": 1.9249999999999998e-06, "loss": -0.8792, "step": 1078 }, { "epoch": 0.09646517651787509, "grad_norm": 7.367584897404236, "learning_rate": 1.9285714285714285e-06, "loss": 0.7871, "step": 1080 }, { "epoch": 0.09664381573364893, "grad_norm": 14.724617017548422, "learning_rate": 1.9321428571428572e-06, "loss": -1.2754, "step": 1082 }, { "epoch": 0.09682245494942278, "grad_norm": 94.31744933998162, "learning_rate": 1.9357142857142855e-06, "loss": -0.6855, "step": 1084 }, { "epoch": 0.09700109416519662, "grad_norm": 8.765541878644441, "learning_rate": 1.9392857142857142e-06, "loss": -0.5939, "step": 1086 }, { "epoch": 0.09717973338097045, "grad_norm": 11.274915105326702, "learning_rate": 1.942857142857143e-06, "loss": -0.342, "step": 1088 }, { "epoch": 0.0973583725967443, "grad_norm": 3.26541677294795, "learning_rate": 1.9464285714285712e-06, "loss": 1.0291, "step": 1090 }, { "epoch": 0.09753701181251814, "grad_norm": 9.578464321579197, "learning_rate": 1.95e-06, "loss": -0.6121, "step": 1092 }, { "epoch": 0.09771565102829198, "grad_norm": 26.125808338272044, "learning_rate": 1.9535714285714283e-06, "loss": -0.601, "step": 1094 }, { "epoch": 0.09789429024406583, "grad_norm": 14.447349986445936, "learning_rate": 1.957142857142857e-06, "loss": 1.0724, "step": 1096 }, { "epoch": 0.09807292945983967, "grad_norm": 9.843678695178253, "learning_rate": 1.9607142857142857e-06, "loss": 0.9116, "step": 1098 }, { "epoch": 0.09825156867561352, "grad_norm": 10.083489366905685, "learning_rate": 1.964285714285714e-06, "loss": -0.2897, "step": 1100 }, { "epoch": 0.09843020789138736, "grad_norm": 14.202614685895538, "learning_rate": 1.9678571428571427e-06, "loss": 1.0178, "step": 1102 }, { "epoch": 0.0986088471071612, "grad_norm": 10.011898972358583, "learning_rate": 1.9714285714285714e-06, "loss": 0.2322, "step": 1104 }, { "epoch": 0.09878748632293505, "grad_norm": 9.06206979717046, "learning_rate": 1.975e-06, "loss": 0.5657, "step": 1106 }, { "epoch": 0.09896612553870889, "grad_norm": 11.25351812665936, "learning_rate": 1.9785714285714284e-06, "loss": -0.0885, "step": 1108 }, { "epoch": 0.09914476475448272, "grad_norm": 11.536947853374581, "learning_rate": 1.982142857142857e-06, "loss": 0.8923, "step": 1110 }, { "epoch": 0.09932340397025657, "grad_norm": 9.791622816488138, "learning_rate": 1.985714285714286e-06, "loss": -0.6163, "step": 1112 }, { "epoch": 0.09950204318603041, "grad_norm": 13.803048745226032, "learning_rate": 1.989285714285714e-06, "loss": -0.1024, "step": 1114 }, { "epoch": 0.09968068240180425, "grad_norm": 5.313789659925086, "learning_rate": 1.992857142857143e-06, "loss": 0.0987, "step": 1116 }, { "epoch": 0.0998593216175781, "grad_norm": 33.220308053771866, "learning_rate": 1.996428571428571e-06, "loss": 0.1438, "step": 1118 }, { "epoch": 0.10003796083335194, "grad_norm": 31.40494788967377, "learning_rate": 2e-06, "loss": -0.917, "step": 1120 }, { "epoch": 0.10021660004912579, "grad_norm": 13.45157726923774, "learning_rate": 1.9999998055358194e-06, "loss": -0.5248, "step": 1122 }, { "epoch": 0.10039523926489963, "grad_norm": 19.803876955800064, "learning_rate": 1.9999992221433542e-06, "loss": 0.0668, "step": 1124 }, { "epoch": 0.10057387848067346, "grad_norm": 23.143021113707935, "learning_rate": 1.999998249822831e-06, "loss": -0.319, "step": 1126 }, { "epoch": 0.10075251769644732, "grad_norm": 58.44690557663746, "learning_rate": 1.9999968885746276e-06, "loss": -1.3864, "step": 1128 }, { "epoch": 0.10093115691222115, "grad_norm": 9.779143767837308, "learning_rate": 1.999995138399274e-06, "loss": -0.2301, "step": 1130 }, { "epoch": 0.10110979612799499, "grad_norm": 32.483439290275285, "learning_rate": 1.99999299929745e-06, "loss": -0.8539, "step": 1132 }, { "epoch": 0.10128843534376884, "grad_norm": 44.00923475977088, "learning_rate": 1.9999904712699885e-06, "loss": -0.448, "step": 1134 }, { "epoch": 0.10146707455954268, "grad_norm": 100.62266107530152, "learning_rate": 1.9999875543178727e-06, "loss": -0.4286, "step": 1136 }, { "epoch": 0.10164571377531653, "grad_norm": 11.748129837872069, "learning_rate": 1.9999842484422363e-06, "loss": -0.4838, "step": 1138 }, { "epoch": 0.10182435299109037, "grad_norm": 12.451268535164022, "learning_rate": 1.9999805536443655e-06, "loss": 0.2594, "step": 1140 }, { "epoch": 0.10200299220686421, "grad_norm": 38.42274392825898, "learning_rate": 1.9999764699256977e-06, "loss": -0.2807, "step": 1142 }, { "epoch": 0.10218163142263806, "grad_norm": 6.220837420057457, "learning_rate": 1.999971997287821e-06, "loss": 0.6787, "step": 1144 }, { "epoch": 0.1023602706384119, "grad_norm": 11.514973056511574, "learning_rate": 1.999967135732474e-06, "loss": -0.9254, "step": 1146 }, { "epoch": 0.10253890985418573, "grad_norm": 12.437261377853835, "learning_rate": 1.9999618852615486e-06, "loss": -0.5988, "step": 1148 }, { "epoch": 0.10271754906995959, "grad_norm": 13.01404106696218, "learning_rate": 1.9999562458770864e-06, "loss": -0.0311, "step": 1150 }, { "epoch": 0.10289618828573342, "grad_norm": 7.02982136093793, "learning_rate": 1.999950217581281e-06, "loss": -0.3905, "step": 1152 }, { "epoch": 0.10307482750150727, "grad_norm": 8.46023773652926, "learning_rate": 1.9999438003764767e-06, "loss": 0.1836, "step": 1154 }, { "epoch": 0.10325346671728111, "grad_norm": 12.81388497659314, "learning_rate": 1.999936994265169e-06, "loss": 0.0312, "step": 1156 }, { "epoch": 0.10343210593305495, "grad_norm": 11.092210775701034, "learning_rate": 1.999929799250006e-06, "loss": -0.0754, "step": 1158 }, { "epoch": 0.1036107451488288, "grad_norm": 12.639734715442323, "learning_rate": 1.999922215333785e-06, "loss": 0.2023, "step": 1160 }, { "epoch": 0.10378938436460264, "grad_norm": 14.024513037620338, "learning_rate": 1.9999142425194564e-06, "loss": -0.6638, "step": 1162 }, { "epoch": 0.10396802358037648, "grad_norm": 38.13899269346105, "learning_rate": 1.9999058808101204e-06, "loss": -0.353, "step": 1164 }, { "epoch": 0.10414666279615033, "grad_norm": 33.716029272184656, "learning_rate": 1.9998971302090296e-06, "loss": -0.283, "step": 1166 }, { "epoch": 0.10432530201192416, "grad_norm": 8.430555410313557, "learning_rate": 1.999887990719587e-06, "loss": 0.2286, "step": 1168 }, { "epoch": 0.10450394122769802, "grad_norm": 6.96197919832501, "learning_rate": 1.9998784623453473e-06, "loss": -1.1275, "step": 1170 }, { "epoch": 0.10468258044347185, "grad_norm": 58.67664552842766, "learning_rate": 1.999868545090017e-06, "loss": -0.8126, "step": 1172 }, { "epoch": 0.10486121965924569, "grad_norm": 37.43940090645649, "learning_rate": 1.9998582389574518e-06, "loss": 0.2366, "step": 1174 }, { "epoch": 0.10503985887501954, "grad_norm": 3.197523177766046, "learning_rate": 1.9998475439516615e-06, "loss": 0.2369, "step": 1176 }, { "epoch": 0.10521849809079338, "grad_norm": 6.548974913542797, "learning_rate": 1.9998364600768047e-06, "loss": 0.1387, "step": 1178 }, { "epoch": 0.10539713730656722, "grad_norm": 11.546493751106262, "learning_rate": 1.999824987337193e-06, "loss": -0.2145, "step": 1180 }, { "epoch": 0.10557577652234107, "grad_norm": 40.48209993366281, "learning_rate": 1.9998131257372875e-06, "loss": -0.7884, "step": 1182 }, { "epoch": 0.1057544157381149, "grad_norm": 18.813875572632558, "learning_rate": 1.999800875281702e-06, "loss": -0.1518, "step": 1184 }, { "epoch": 0.10593305495388876, "grad_norm": 13.86355861408928, "learning_rate": 1.999788235975202e-06, "loss": -1.2622, "step": 1186 }, { "epoch": 0.1061116941696626, "grad_norm": 4.042324969501786, "learning_rate": 1.9997752078227018e-06, "loss": 0.8105, "step": 1188 }, { "epoch": 0.10629033338543643, "grad_norm": 12.514432585987219, "learning_rate": 1.999761790829269e-06, "loss": 0.3559, "step": 1190 }, { "epoch": 0.10646897260121028, "grad_norm": 20.598025536586253, "learning_rate": 1.999747985000122e-06, "loss": -1.2657, "step": 1192 }, { "epoch": 0.10664761181698412, "grad_norm": 14.895492158552795, "learning_rate": 1.99973379034063e-06, "loss": 0.6906, "step": 1194 }, { "epoch": 0.10682625103275796, "grad_norm": 30.681772354040376, "learning_rate": 1.9997192068563144e-06, "loss": 0.1548, "step": 1196 }, { "epoch": 0.10700489024853181, "grad_norm": 14.15569110634455, "learning_rate": 1.999704234552846e-06, "loss": 0.3046, "step": 1198 }, { "epoch": 0.10718352946430565, "grad_norm": 3.4977638730789713, "learning_rate": 1.999688873436049e-06, "loss": 0.2238, "step": 1200 }, { "epoch": 0.1073621686800795, "grad_norm": 16.455105828055316, "learning_rate": 1.9996731235118973e-06, "loss": -1.1321, "step": 1202 }, { "epoch": 0.10754080789585334, "grad_norm": 8.661953696106675, "learning_rate": 1.999656984786516e-06, "loss": -0.3517, "step": 1204 }, { "epoch": 0.10771944711162718, "grad_norm": 45.509987206621744, "learning_rate": 1.999640457266183e-06, "loss": -0.7836, "step": 1206 }, { "epoch": 0.10789808632740103, "grad_norm": 7.610849727155039, "learning_rate": 1.9996235409573257e-06, "loss": -0.1465, "step": 1208 }, { "epoch": 0.10807672554317486, "grad_norm": 9.217960440234833, "learning_rate": 1.999606235866523e-06, "loss": -0.1669, "step": 1210 }, { "epoch": 0.1082553647589487, "grad_norm": 20.62544536687708, "learning_rate": 1.9995885420005057e-06, "loss": 0.2496, "step": 1212 }, { "epoch": 0.10843400397472255, "grad_norm": 18.090206873244895, "learning_rate": 1.999570459366156e-06, "loss": 0.0459, "step": 1214 }, { "epoch": 0.10861264319049639, "grad_norm": 7.507903448818275, "learning_rate": 1.9995519879705057e-06, "loss": -0.2795, "step": 1216 }, { "epoch": 0.10879128240627024, "grad_norm": 54.139193253074524, "learning_rate": 1.99953312782074e-06, "loss": -0.2465, "step": 1218 }, { "epoch": 0.10896992162204408, "grad_norm": 14.204920085613713, "learning_rate": 1.999513878924193e-06, "loss": 0.3016, "step": 1220 }, { "epoch": 0.10914856083781792, "grad_norm": 17.813087641074294, "learning_rate": 1.9994942412883515e-06, "loss": -0.4989, "step": 1222 }, { "epoch": 0.10932720005359177, "grad_norm": 12.155892884174467, "learning_rate": 1.9994742149208535e-06, "loss": 0.4296, "step": 1224 }, { "epoch": 0.1095058392693656, "grad_norm": 9.178398346978645, "learning_rate": 1.999453799829488e-06, "loss": -0.28, "step": 1226 }, { "epoch": 0.10968447848513944, "grad_norm": 1.9589220609480973, "learning_rate": 1.999432996022194e-06, "loss": 0.1311, "step": 1228 }, { "epoch": 0.1098631177009133, "grad_norm": 22.919210766690217, "learning_rate": 1.9994118035070636e-06, "loss": -0.7654, "step": 1230 }, { "epoch": 0.11004175691668713, "grad_norm": 22.078116015346723, "learning_rate": 1.9993902222923388e-06, "loss": -0.6029, "step": 1232 }, { "epoch": 0.11022039613246098, "grad_norm": 34.919798595988134, "learning_rate": 1.9993682523864134e-06, "loss": -0.1708, "step": 1234 }, { "epoch": 0.11039903534823482, "grad_norm": 7.681277850653842, "learning_rate": 1.9993458937978318e-06, "loss": 0.1505, "step": 1236 }, { "epoch": 0.11057767456400866, "grad_norm": 11.729440436321706, "learning_rate": 1.99932314653529e-06, "loss": -0.8912, "step": 1238 }, { "epoch": 0.11075631377978251, "grad_norm": 18.599549384452505, "learning_rate": 1.999300010607635e-06, "loss": -0.0215, "step": 1240 }, { "epoch": 0.11093495299555635, "grad_norm": 13.352429321079144, "learning_rate": 1.9992764860238653e-06, "loss": -0.2771, "step": 1242 }, { "epoch": 0.11111359221133019, "grad_norm": 10.760071051267287, "learning_rate": 1.99925257279313e-06, "loss": -0.829, "step": 1244 }, { "epoch": 0.11129223142710404, "grad_norm": 18.621970935260098, "learning_rate": 1.9992282709247297e-06, "loss": 0.7023, "step": 1246 }, { "epoch": 0.11147087064287788, "grad_norm": 17.654202556896244, "learning_rate": 1.9992035804281164e-06, "loss": 0.1515, "step": 1248 }, { "epoch": 0.11164950985865173, "grad_norm": 5.346532978152721, "learning_rate": 1.999178501312892e-06, "loss": 0.9915, "step": 1250 }, { "epoch": 0.11182814907442556, "grad_norm": 21.680582047986007, "learning_rate": 1.9991530335888116e-06, "loss": 0.5667, "step": 1252 }, { "epoch": 0.1120067882901994, "grad_norm": 16.351966557657043, "learning_rate": 1.9991271772657797e-06, "loss": -0.4948, "step": 1254 }, { "epoch": 0.11218542750597325, "grad_norm": 23.102595512920054, "learning_rate": 1.999100932353853e-06, "loss": -0.5431, "step": 1256 }, { "epoch": 0.11236406672174709, "grad_norm": 24.16580654674467, "learning_rate": 1.999074298863238e-06, "loss": -0.6827, "step": 1258 }, { "epoch": 0.11254270593752093, "grad_norm": 22.35898873923877, "learning_rate": 1.9990472768042944e-06, "loss": -1.532, "step": 1260 }, { "epoch": 0.11272134515329478, "grad_norm": 9.701212179992275, "learning_rate": 1.999019866187531e-06, "loss": 0.7678, "step": 1262 }, { "epoch": 0.11289998436906862, "grad_norm": 30.35586862149764, "learning_rate": 1.998992067023609e-06, "loss": -0.3691, "step": 1264 }, { "epoch": 0.11307862358484247, "grad_norm": 8.657260877064678, "learning_rate": 1.9989638793233398e-06, "loss": 0.6545, "step": 1266 }, { "epoch": 0.1132572628006163, "grad_norm": 8.504142200734446, "learning_rate": 1.998935303097687e-06, "loss": 1.2293, "step": 1268 }, { "epoch": 0.11343590201639014, "grad_norm": 6.847315029558204, "learning_rate": 1.9989063383577642e-06, "loss": 0.6113, "step": 1270 }, { "epoch": 0.113614541232164, "grad_norm": 10.803591547815534, "learning_rate": 1.998876985114837e-06, "loss": -0.2638, "step": 1272 }, { "epoch": 0.11379318044793783, "grad_norm": 203.7380172856799, "learning_rate": 1.9988472433803214e-06, "loss": -0.8638, "step": 1274 }, { "epoch": 0.11397181966371167, "grad_norm": 18.91082960418076, "learning_rate": 1.9988171131657846e-06, "loss": 0.2452, "step": 1276 }, { "epoch": 0.11415045887948552, "grad_norm": 19.044085303804795, "learning_rate": 1.9987865944829457e-06, "loss": -0.2499, "step": 1278 }, { "epoch": 0.11432909809525936, "grad_norm": 18.762516167600047, "learning_rate": 1.998755687343674e-06, "loss": 0.2757, "step": 1280 }, { "epoch": 0.11450773731103321, "grad_norm": 13.13704049189703, "learning_rate": 1.99872439175999e-06, "loss": -0.3807, "step": 1282 }, { "epoch": 0.11468637652680705, "grad_norm": 11.10768305560478, "learning_rate": 1.9986927077440654e-06, "loss": -0.1085, "step": 1284 }, { "epoch": 0.11486501574258089, "grad_norm": 8.35878630374528, "learning_rate": 1.998660635308224e-06, "loss": 0.1523, "step": 1286 }, { "epoch": 0.11504365495835474, "grad_norm": 10.881349661084204, "learning_rate": 1.9986281744649387e-06, "loss": -0.1369, "step": 1288 }, { "epoch": 0.11522229417412858, "grad_norm": 13.745994955598373, "learning_rate": 1.9985953252268343e-06, "loss": 0.1196, "step": 1290 }, { "epoch": 0.11540093338990241, "grad_norm": 5.8422963991547165, "learning_rate": 1.998562087606687e-06, "loss": 0.6035, "step": 1292 }, { "epoch": 0.11557957260567626, "grad_norm": 8.926749212421331, "learning_rate": 1.998528461617424e-06, "loss": 0.2548, "step": 1294 }, { "epoch": 0.1157582118214501, "grad_norm": 20.471807679537456, "learning_rate": 1.998494447272124e-06, "loss": -0.3135, "step": 1296 }, { "epoch": 0.11593685103722394, "grad_norm": 8.839795849544327, "learning_rate": 1.998460044584015e-06, "loss": -0.2888, "step": 1298 }, { "epoch": 0.11611549025299779, "grad_norm": 21.45533762146781, "learning_rate": 1.998425253566478e-06, "loss": -0.8135, "step": 1300 }, { "epoch": 0.11629412946877163, "grad_norm": 12.480897879946315, "learning_rate": 1.9983900742330437e-06, "loss": -1.2431, "step": 1302 }, { "epoch": 0.11647276868454548, "grad_norm": 6.918678389456926, "learning_rate": 1.9983545065973947e-06, "loss": 0.3079, "step": 1304 }, { "epoch": 0.11665140790031932, "grad_norm": 6.949666120893473, "learning_rate": 1.998318550673364e-06, "loss": -0.7741, "step": 1306 }, { "epoch": 0.11683004711609316, "grad_norm": 5.956659824486239, "learning_rate": 1.9982822064749365e-06, "loss": 0.7133, "step": 1308 }, { "epoch": 0.117008686331867, "grad_norm": 7.714098312483332, "learning_rate": 1.9982454740162463e-06, "loss": 0.4109, "step": 1310 }, { "epoch": 0.11718732554764084, "grad_norm": 9.854421686228955, "learning_rate": 1.998208353311581e-06, "loss": 0.5385, "step": 1312 }, { "epoch": 0.11736596476341468, "grad_norm": 15.738726259531326, "learning_rate": 1.9981708443753765e-06, "loss": 0.4006, "step": 1314 }, { "epoch": 0.11754460397918853, "grad_norm": 17.00690006021683, "learning_rate": 1.9981329472222226e-06, "loss": -0.5801, "step": 1316 }, { "epoch": 0.11772324319496237, "grad_norm": 8.110718838105583, "learning_rate": 1.9980946618668577e-06, "loss": -0.4139, "step": 1318 }, { "epoch": 0.11790188241073622, "grad_norm": 16.58688321609032, "learning_rate": 1.998055988324172e-06, "loss": -1.0092, "step": 1320 }, { "epoch": 0.11808052162651006, "grad_norm": 8.528048295304782, "learning_rate": 1.9980169266092072e-06, "loss": -0.1167, "step": 1322 }, { "epoch": 0.1182591608422839, "grad_norm": 4.2600340480966095, "learning_rate": 1.997977476737155e-06, "loss": -0.0703, "step": 1324 }, { "epoch": 0.11843780005805775, "grad_norm": 17.707802917914112, "learning_rate": 1.9979376387233593e-06, "loss": -0.6152, "step": 1326 }, { "epoch": 0.11861643927383159, "grad_norm": 18.32912900862569, "learning_rate": 1.9978974125833134e-06, "loss": 1.1127, "step": 1328 }, { "epoch": 0.11879507848960542, "grad_norm": 19.612073395017735, "learning_rate": 1.997856798332663e-06, "loss": -0.3214, "step": 1330 }, { "epoch": 0.11897371770537928, "grad_norm": 7.469228816944427, "learning_rate": 1.9978157959872037e-06, "loss": -0.3134, "step": 1332 }, { "epoch": 0.11915235692115311, "grad_norm": 35.951790636907084, "learning_rate": 1.9977744055628826e-06, "loss": -0.2685, "step": 1334 }, { "epoch": 0.11933099613692696, "grad_norm": 9.25556118789304, "learning_rate": 1.997732627075798e-06, "loss": -0.2308, "step": 1336 }, { "epoch": 0.1195096353527008, "grad_norm": 6.268176085043038, "learning_rate": 1.997690460542198e-06, "loss": -0.4492, "step": 1338 }, { "epoch": 0.11968827456847464, "grad_norm": 10.847529651313833, "learning_rate": 1.997647905978483e-06, "loss": 1.6072, "step": 1340 }, { "epoch": 0.11986691378424849, "grad_norm": 6.669412270063695, "learning_rate": 1.9976049634012036e-06, "loss": -0.0199, "step": 1342 }, { "epoch": 0.12004555300002233, "grad_norm": 13.397207487272535, "learning_rate": 1.997561632827061e-06, "loss": -0.3173, "step": 1344 }, { "epoch": 0.12022419221579617, "grad_norm": 13.13820253012779, "learning_rate": 1.997517914272908e-06, "loss": -0.6572, "step": 1346 }, { "epoch": 0.12040283143157002, "grad_norm": 9.717004355510989, "learning_rate": 1.9974738077557477e-06, "loss": 0.1894, "step": 1348 }, { "epoch": 0.12058147064734386, "grad_norm": 17.325582580899788, "learning_rate": 1.997429313292735e-06, "loss": -0.3639, "step": 1350 }, { "epoch": 0.1207601098631177, "grad_norm": 6.603878396017356, "learning_rate": 1.9973844309011746e-06, "loss": -0.7871, "step": 1352 }, { "epoch": 0.12093874907889154, "grad_norm": 10.007817074062187, "learning_rate": 1.997339160598522e-06, "loss": 0.4267, "step": 1354 }, { "epoch": 0.12111738829466538, "grad_norm": 6.4086445124772435, "learning_rate": 1.9972935024023854e-06, "loss": -0.0116, "step": 1356 }, { "epoch": 0.12129602751043923, "grad_norm": 17.993971768887803, "learning_rate": 1.9972474563305215e-06, "loss": 0.2222, "step": 1358 }, { "epoch": 0.12147466672621307, "grad_norm": 18.40307465447775, "learning_rate": 1.9972010224008395e-06, "loss": 0.2922, "step": 1360 }, { "epoch": 0.12165330594198691, "grad_norm": 7.619274804156873, "learning_rate": 1.9971542006313983e-06, "loss": 0.2716, "step": 1362 }, { "epoch": 0.12183194515776076, "grad_norm": 7.6787416762428435, "learning_rate": 1.997106991040409e-06, "loss": -1.106, "step": 1364 }, { "epoch": 0.1220105843735346, "grad_norm": 7.390289234494305, "learning_rate": 1.9970593936462322e-06, "loss": 0.0975, "step": 1366 }, { "epoch": 0.12218922358930845, "grad_norm": 6.804843905170086, "learning_rate": 1.9970114084673796e-06, "loss": 0.955, "step": 1368 }, { "epoch": 0.12236786280508229, "grad_norm": 9.15883186054885, "learning_rate": 1.9969630355225147e-06, "loss": -0.0001, "step": 1370 }, { "epoch": 0.12254650202085612, "grad_norm": 22.389102519330553, "learning_rate": 1.996914274830451e-06, "loss": 0.9772, "step": 1372 }, { "epoch": 0.12272514123662998, "grad_norm": 40.515819332835136, "learning_rate": 1.9968651264101522e-06, "loss": 0.2415, "step": 1374 }, { "epoch": 0.12290378045240381, "grad_norm": 10.705711865192681, "learning_rate": 1.9968155902807344e-06, "loss": -0.5327, "step": 1376 }, { "epoch": 0.12308241966817765, "grad_norm": 7.279401945763696, "learning_rate": 1.996765666461463e-06, "loss": 0.5443, "step": 1378 }, { "epoch": 0.1232610588839515, "grad_norm": 9.09638189134469, "learning_rate": 1.996715354971755e-06, "loss": 0.0894, "step": 1380 }, { "epoch": 0.12343969809972534, "grad_norm": 9.792814103660278, "learning_rate": 1.9966646558311783e-06, "loss": 1.3555, "step": 1382 }, { "epoch": 0.12361833731549919, "grad_norm": 19.4993286017229, "learning_rate": 1.9966135690594508e-06, "loss": -0.8551, "step": 1384 }, { "epoch": 0.12379697653127303, "grad_norm": 6.589611616947317, "learning_rate": 1.9965620946764418e-06, "loss": -0.8384, "step": 1386 }, { "epoch": 0.12397561574704687, "grad_norm": 24.662145801752025, "learning_rate": 1.996510232702171e-06, "loss": -0.426, "step": 1388 }, { "epoch": 0.12415425496282072, "grad_norm": 21.135481000651634, "learning_rate": 1.9964579831568087e-06, "loss": -0.2528, "step": 1390 }, { "epoch": 0.12433289417859456, "grad_norm": 18.798594255850457, "learning_rate": 1.996405346060677e-06, "loss": 0.2351, "step": 1392 }, { "epoch": 0.12451153339436839, "grad_norm": 7.528992373868596, "learning_rate": 1.9963523214342473e-06, "loss": -0.6953, "step": 1394 }, { "epoch": 0.12469017261014224, "grad_norm": 10.375156263530481, "learning_rate": 1.996298909298143e-06, "loss": 0.7596, "step": 1396 }, { "epoch": 0.12486881182591608, "grad_norm": 4.555724665493208, "learning_rate": 1.9962451096731368e-06, "loss": 0.2119, "step": 1398 }, { "epoch": 0.12504745104168993, "grad_norm": 16.903952846188723, "learning_rate": 1.9961909225801536e-06, "loss": 0.6113, "step": 1400 }, { "epoch": 0.12522609025746376, "grad_norm": 16.430652425005164, "learning_rate": 1.996136348040268e-06, "loss": 0.5119, "step": 1402 }, { "epoch": 0.1254047294732376, "grad_norm": 3.9908196481717133, "learning_rate": 1.996081386074705e-06, "loss": 0.7715, "step": 1404 }, { "epoch": 0.12558336868901146, "grad_norm": 11.721846596081622, "learning_rate": 1.9960260367048424e-06, "loss": -0.4924, "step": 1406 }, { "epoch": 0.1257620079047853, "grad_norm": 11.532824566448097, "learning_rate": 1.9959702999522056e-06, "loss": 0.2054, "step": 1408 }, { "epoch": 0.12594064712055913, "grad_norm": 12.252353986995978, "learning_rate": 1.995914175838473e-06, "loss": -0.2281, "step": 1410 }, { "epoch": 0.126119286336333, "grad_norm": 21.34695468597885, "learning_rate": 1.995857664385473e-06, "loss": -0.1524, "step": 1412 }, { "epoch": 0.12629792555210684, "grad_norm": 5.547846491454952, "learning_rate": 1.9958007656151837e-06, "loss": 1.0655, "step": 1414 }, { "epoch": 0.12647656476788066, "grad_norm": 5.941247018740705, "learning_rate": 1.995743479549735e-06, "loss": 0.377, "step": 1416 }, { "epoch": 0.1266552039836545, "grad_norm": 8.588876014865418, "learning_rate": 1.9956858062114078e-06, "loss": 0.4573, "step": 1418 }, { "epoch": 0.12683384319942836, "grad_norm": 9.266256944485754, "learning_rate": 1.9956277456226316e-06, "loss": -0.9967, "step": 1420 }, { "epoch": 0.1270124824152022, "grad_norm": 33.25848876896412, "learning_rate": 1.9955692978059894e-06, "loss": 0.0903, "step": 1422 }, { "epoch": 0.12719112163097604, "grad_norm": 6.407916489608223, "learning_rate": 1.9955104627842117e-06, "loss": -0.4375, "step": 1424 }, { "epoch": 0.1273697608467499, "grad_norm": 20.408342847072905, "learning_rate": 1.9954512405801815e-06, "loss": -0.1774, "step": 1426 }, { "epoch": 0.12754840006252371, "grad_norm": 22.630633416845765, "learning_rate": 1.9953916312169326e-06, "loss": -0.8882, "step": 1428 }, { "epoch": 0.12772703927829757, "grad_norm": 67.53468799092249, "learning_rate": 1.9953316347176486e-06, "loss": -0.0854, "step": 1430 }, { "epoch": 0.12790567849407142, "grad_norm": 6.8073013488498315, "learning_rate": 1.9952712511056633e-06, "loss": -0.4645, "step": 1432 }, { "epoch": 0.12808431770984524, "grad_norm": 11.705322385289296, "learning_rate": 1.995210480404462e-06, "loss": -0.0257, "step": 1434 }, { "epoch": 0.1282629569256191, "grad_norm": 6.7019596820871925, "learning_rate": 1.99514932263768e-06, "loss": -0.0478, "step": 1436 }, { "epoch": 0.12844159614139294, "grad_norm": 10.754559281956704, "learning_rate": 1.9950877778291036e-06, "loss": -0.6606, "step": 1438 }, { "epoch": 0.1286202353571668, "grad_norm": 49.5954458791644, "learning_rate": 1.9950258460026693e-06, "loss": 0.1794, "step": 1440 }, { "epoch": 0.12879887457294062, "grad_norm": 10.290371544361072, "learning_rate": 1.9949635271824635e-06, "loss": 0.1234, "step": 1442 }, { "epoch": 0.12897751378871447, "grad_norm": 46.645587047604245, "learning_rate": 1.9949008213927244e-06, "loss": 0.0426, "step": 1444 }, { "epoch": 0.12915615300448832, "grad_norm": 7.851385404952403, "learning_rate": 1.9948377286578397e-06, "loss": 0.0915, "step": 1446 }, { "epoch": 0.12933479222026215, "grad_norm": 14.858412269187204, "learning_rate": 1.9947742490023484e-06, "loss": -1.2215, "step": 1448 }, { "epoch": 0.129513431436036, "grad_norm": 21.01240245971189, "learning_rate": 1.9947103824509386e-06, "loss": -0.735, "step": 1450 }, { "epoch": 0.12969207065180985, "grad_norm": 9.706045598851892, "learning_rate": 1.9946461290284513e-06, "loss": 0.1041, "step": 1452 }, { "epoch": 0.12987070986758367, "grad_norm": 9.833247900985032, "learning_rate": 1.9945814887598755e-06, "loss": -1.3881, "step": 1454 }, { "epoch": 0.13004934908335752, "grad_norm": 11.884282710248414, "learning_rate": 1.9945164616703512e-06, "loss": -0.2745, "step": 1456 }, { "epoch": 0.13022798829913138, "grad_norm": 8.090304432953749, "learning_rate": 1.99445104778517e-06, "loss": -0.0014, "step": 1458 }, { "epoch": 0.1304066275149052, "grad_norm": 5.2046788733487315, "learning_rate": 1.9943852471297734e-06, "loss": -0.2446, "step": 1460 }, { "epoch": 0.13058526673067905, "grad_norm": 4.484768202287278, "learning_rate": 1.9943190597297527e-06, "loss": 0.367, "step": 1462 }, { "epoch": 0.1307639059464529, "grad_norm": 3.5964174405479192, "learning_rate": 1.99425248561085e-06, "loss": -0.0024, "step": 1464 }, { "epoch": 0.13094254516222673, "grad_norm": 9.661853337331786, "learning_rate": 1.9941855247989582e-06, "loss": -0.5338, "step": 1466 }, { "epoch": 0.13112118437800058, "grad_norm": 7.101759098970105, "learning_rate": 1.9941181773201203e-06, "loss": -0.057, "step": 1468 }, { "epoch": 0.13129982359377443, "grad_norm": 12.749573943555427, "learning_rate": 1.994050443200529e-06, "loss": -0.9857, "step": 1470 }, { "epoch": 0.13147846280954828, "grad_norm": 19.55145049116801, "learning_rate": 1.9939823224665287e-06, "loss": -0.0704, "step": 1472 }, { "epoch": 0.1316571020253221, "grad_norm": 4.982507816352589, "learning_rate": 1.993913815144613e-06, "loss": 0.0759, "step": 1474 }, { "epoch": 0.13183574124109596, "grad_norm": 81.5413855572356, "learning_rate": 1.993844921261427e-06, "loss": 0.3713, "step": 1476 }, { "epoch": 0.1320143804568698, "grad_norm": 3.6898120162345567, "learning_rate": 1.993775640843765e-06, "loss": 0.6008, "step": 1478 }, { "epoch": 0.13219301967264363, "grad_norm": 3.296475788552332, "learning_rate": 1.9937059739185717e-06, "loss": 0.0723, "step": 1480 }, { "epoch": 0.13237165888841748, "grad_norm": 15.776364497494741, "learning_rate": 1.9936359205129434e-06, "loss": 0.0128, "step": 1482 }, { "epoch": 0.13255029810419133, "grad_norm": 13.072034942356131, "learning_rate": 1.9935654806541254e-06, "loss": 0.0344, "step": 1484 }, { "epoch": 0.13272893731996516, "grad_norm": 5.687913481385641, "learning_rate": 1.993494654369514e-06, "loss": -0.7973, "step": 1486 }, { "epoch": 0.132907576535739, "grad_norm": 5.647392798930763, "learning_rate": 1.993423441686655e-06, "loss": -0.3838, "step": 1488 }, { "epoch": 0.13308621575151286, "grad_norm": 21.614987210709625, "learning_rate": 1.993351842633246e-06, "loss": 1.0226, "step": 1490 }, { "epoch": 0.13326485496728668, "grad_norm": 5.847934142946069, "learning_rate": 1.993279857237133e-06, "loss": 0.872, "step": 1492 }, { "epoch": 0.13344349418306053, "grad_norm": 5.290627184719259, "learning_rate": 1.9932074855263134e-06, "loss": -1.3804, "step": 1494 }, { "epoch": 0.1336221333988344, "grad_norm": 7.069759452644886, "learning_rate": 1.9931347275289347e-06, "loss": -0.6339, "step": 1496 }, { "epoch": 0.1338007726146082, "grad_norm": 8.94923324284073, "learning_rate": 1.993061583273295e-06, "loss": -0.6523, "step": 1498 }, { "epoch": 0.13397941183038206, "grad_norm": 13.318391884080063, "learning_rate": 1.992988052787841e-06, "loss": -0.1266, "step": 1500 }, { "epoch": 0.1341580510461559, "grad_norm": 10.073363700869734, "learning_rate": 1.9929141361011718e-06, "loss": 0.2308, "step": 1502 }, { "epoch": 0.13433669026192974, "grad_norm": 10.271427738304112, "learning_rate": 1.9928398332420356e-06, "loss": -0.4901, "step": 1504 }, { "epoch": 0.1345153294777036, "grad_norm": 16.640011542389608, "learning_rate": 1.9927651442393304e-06, "loss": -0.2997, "step": 1506 }, { "epoch": 0.13469396869347744, "grad_norm": 14.229016297170942, "learning_rate": 1.9926900691221056e-06, "loss": -0.5398, "step": 1508 }, { "epoch": 0.1348726079092513, "grad_norm": 18.385933250828096, "learning_rate": 1.9926146079195592e-06, "loss": 0.3307, "step": 1510 }, { "epoch": 0.13505124712502511, "grad_norm": 8.255500578094697, "learning_rate": 1.992538760661041e-06, "loss": 0.8855, "step": 1512 }, { "epoch": 0.13522988634079897, "grad_norm": 19.64251510520491, "learning_rate": 1.9924625273760497e-06, "loss": -1.2658, "step": 1514 }, { "epoch": 0.13540852555657282, "grad_norm": 2.7989225085490137, "learning_rate": 1.9923859080942344e-06, "loss": 0.3466, "step": 1516 }, { "epoch": 0.13558716477234664, "grad_norm": 78.34315790562057, "learning_rate": 1.9923089028453946e-06, "loss": -0.2569, "step": 1518 }, { "epoch": 0.1357658039881205, "grad_norm": 16.016121371881148, "learning_rate": 1.9922315116594806e-06, "loss": 0.4597, "step": 1520 }, { "epoch": 0.13594444320389434, "grad_norm": 12.671289169372908, "learning_rate": 1.992153734566591e-06, "loss": -0.6349, "step": 1522 }, { "epoch": 0.13612308241966817, "grad_norm": 11.912757356791788, "learning_rate": 1.992075571596976e-06, "loss": 0.9239, "step": 1524 }, { "epoch": 0.13630172163544202, "grad_norm": 13.954079111590957, "learning_rate": 1.9919970227810353e-06, "loss": -1.2441, "step": 1526 }, { "epoch": 0.13648036085121587, "grad_norm": 18.58664763907102, "learning_rate": 1.991918088149319e-06, "loss": -0.7985, "step": 1528 }, { "epoch": 0.1366590000669897, "grad_norm": 21.126035766490393, "learning_rate": 1.9918387677325266e-06, "loss": -0.9611, "step": 1530 }, { "epoch": 0.13683763928276355, "grad_norm": 8.422812103621341, "learning_rate": 1.9917590615615085e-06, "loss": -0.6094, "step": 1532 }, { "epoch": 0.1370162784985374, "grad_norm": 8.95744222304555, "learning_rate": 1.991678969667264e-06, "loss": 0.7338, "step": 1534 }, { "epoch": 0.13719491771431122, "grad_norm": 4.87825998709673, "learning_rate": 1.991598492080944e-06, "loss": 0.3618, "step": 1536 }, { "epoch": 0.13737355693008507, "grad_norm": 15.669483066934944, "learning_rate": 1.991517628833848e-06, "loss": -0.305, "step": 1538 }, { "epoch": 0.13755219614585892, "grad_norm": 10.597415436238341, "learning_rate": 1.991436379957426e-06, "loss": 0.7377, "step": 1540 }, { "epoch": 0.13773083536163278, "grad_norm": 6.022292685164929, "learning_rate": 1.9913547454832778e-06, "loss": 1.2021, "step": 1542 }, { "epoch": 0.1379094745774066, "grad_norm": 11.064765310302352, "learning_rate": 1.991272725443154e-06, "loss": -0.7089, "step": 1544 }, { "epoch": 0.13808811379318045, "grad_norm": 8.283400461499275, "learning_rate": 1.991190319868954e-06, "loss": -1.2374, "step": 1546 }, { "epoch": 0.1382667530089543, "grad_norm": 10.219470824036822, "learning_rate": 1.991107528792728e-06, "loss": -0.7759, "step": 1548 }, { "epoch": 0.13844539222472813, "grad_norm": 10.477777852930055, "learning_rate": 1.991024352246676e-06, "loss": 0.5313, "step": 1550 }, { "epoch": 0.13862403144050198, "grad_norm": 4.9139072411880145, "learning_rate": 1.9909407902631467e-06, "loss": 0.4166, "step": 1552 }, { "epoch": 0.13880267065627583, "grad_norm": 6.461837244636534, "learning_rate": 1.9908568428746405e-06, "loss": -0.3556, "step": 1554 }, { "epoch": 0.13898130987204965, "grad_norm": 5.273495731738675, "learning_rate": 1.990772510113807e-06, "loss": 0.5853, "step": 1556 }, { "epoch": 0.1391599490878235, "grad_norm": 3.416686003715565, "learning_rate": 1.9906877920134454e-06, "loss": 0.8684, "step": 1558 }, { "epoch": 0.13933858830359735, "grad_norm": 8.345573997435988, "learning_rate": 1.9906026886065044e-06, "loss": -0.2105, "step": 1560 }, { "epoch": 0.13951722751937118, "grad_norm": 8.28204088573041, "learning_rate": 1.9905171999260845e-06, "loss": 0.38, "step": 1562 }, { "epoch": 0.13969586673514503, "grad_norm": 13.428502960958118, "learning_rate": 1.9904313260054332e-06, "loss": -0.3032, "step": 1564 }, { "epoch": 0.13987450595091888, "grad_norm": 8.5655534409349, "learning_rate": 1.9903450668779505e-06, "loss": 0.6114, "step": 1566 }, { "epoch": 0.1400531451666927, "grad_norm": 12.417912840979834, "learning_rate": 1.9902584225771843e-06, "loss": 0.3167, "step": 1568 }, { "epoch": 0.14023178438246656, "grad_norm": 7.532172562059281, "learning_rate": 1.990171393136833e-06, "loss": -0.4411, "step": 1570 }, { "epoch": 0.1404104235982404, "grad_norm": 9.89149430298064, "learning_rate": 1.9900839785907457e-06, "loss": -0.1857, "step": 1572 }, { "epoch": 0.14058906281401426, "grad_norm": 22.950619970520403, "learning_rate": 1.989996178972919e-06, "loss": -1.1512, "step": 1574 }, { "epoch": 0.14076770202978808, "grad_norm": 10.06111088043649, "learning_rate": 1.9899079943175016e-06, "loss": -0.0838, "step": 1576 }, { "epoch": 0.14094634124556193, "grad_norm": 14.905536498581554, "learning_rate": 1.989819424658791e-06, "loss": -0.0247, "step": 1578 }, { "epoch": 0.14112498046133579, "grad_norm": 17.267633685473616, "learning_rate": 1.989730470031234e-06, "loss": 0.0957, "step": 1580 }, { "epoch": 0.1413036196771096, "grad_norm": 9.285182543610226, "learning_rate": 1.9896411304694277e-06, "loss": 0.1092, "step": 1582 }, { "epoch": 0.14148225889288346, "grad_norm": 24.457083345755887, "learning_rate": 1.9895514060081198e-06, "loss": -0.0945, "step": 1584 }, { "epoch": 0.1416608981086573, "grad_norm": 43.925478162860536, "learning_rate": 1.989461296682205e-06, "loss": 0.0488, "step": 1586 }, { "epoch": 0.14183953732443114, "grad_norm": 19.80362192029805, "learning_rate": 1.98937080252673e-06, "loss": -1.6795, "step": 1588 }, { "epoch": 0.142018176540205, "grad_norm": 15.604907980381425, "learning_rate": 1.989279923576891e-06, "loss": -0.2201, "step": 1590 }, { "epoch": 0.14219681575597884, "grad_norm": 4.969637233765609, "learning_rate": 1.9891886598680334e-06, "loss": 0.1323, "step": 1592 }, { "epoch": 0.14237545497175266, "grad_norm": 4.4549218551341125, "learning_rate": 1.989097011435652e-06, "loss": 0.8857, "step": 1594 }, { "epoch": 0.14255409418752651, "grad_norm": 36.36344385269532, "learning_rate": 1.9890049783153914e-06, "loss": -0.3939, "step": 1596 }, { "epoch": 0.14273273340330037, "grad_norm": 12.59047518196264, "learning_rate": 1.9889125605430455e-06, "loss": -0.4319, "step": 1598 }, { "epoch": 0.1429113726190742, "grad_norm": 8.932382252106876, "learning_rate": 1.9888197581545587e-06, "loss": -0.8252, "step": 1600 }, { "epoch": 0.14309001183484804, "grad_norm": 16.770092201047692, "learning_rate": 1.988726571186025e-06, "loss": -0.626, "step": 1602 }, { "epoch": 0.1432686510506219, "grad_norm": 14.438438484108142, "learning_rate": 1.9886329996736867e-06, "loss": -1.5514, "step": 1604 }, { "epoch": 0.14344729026639574, "grad_norm": 22.6764766931471, "learning_rate": 1.988539043653936e-06, "loss": -0.3946, "step": 1606 }, { "epoch": 0.14362592948216957, "grad_norm": 14.304299216467012, "learning_rate": 1.9884447031633164e-06, "loss": -0.1435, "step": 1608 }, { "epoch": 0.14380456869794342, "grad_norm": 12.497963090875986, "learning_rate": 1.9883499782385185e-06, "loss": 0.7644, "step": 1610 }, { "epoch": 0.14398320791371727, "grad_norm": 6.5257229576215465, "learning_rate": 1.9882548689163836e-06, "loss": 0.7907, "step": 1612 }, { "epoch": 0.1441618471294911, "grad_norm": 6.75794966894748, "learning_rate": 1.988159375233903e-06, "loss": -0.1845, "step": 1614 }, { "epoch": 0.14434048634526495, "grad_norm": 13.278526260227627, "learning_rate": 1.9880634972282166e-06, "loss": -0.5324, "step": 1616 }, { "epoch": 0.1445191255610388, "grad_norm": 9.626426690123003, "learning_rate": 1.987967234936614e-06, "loss": -0.4118, "step": 1618 }, { "epoch": 0.14469776477681262, "grad_norm": 4.766439999790876, "learning_rate": 1.987870588396534e-06, "loss": -1.079, "step": 1620 }, { "epoch": 0.14487640399258647, "grad_norm": 4.7601829599432195, "learning_rate": 1.9877735576455657e-06, "loss": 0.7802, "step": 1622 }, { "epoch": 0.14505504320836032, "grad_norm": 18.762843908198924, "learning_rate": 1.9876761427214473e-06, "loss": -0.4262, "step": 1624 }, { "epoch": 0.14523368242413415, "grad_norm": 17.18456739896158, "learning_rate": 1.9875783436620657e-06, "loss": 0.1505, "step": 1626 }, { "epoch": 0.145412321639908, "grad_norm": 14.889572570691564, "learning_rate": 1.9874801605054577e-06, "loss": -0.3189, "step": 1628 }, { "epoch": 0.14559096085568185, "grad_norm": 14.763705852462886, "learning_rate": 1.98738159328981e-06, "loss": -1.0413, "step": 1630 }, { "epoch": 0.14576960007145567, "grad_norm": 10.344182215762183, "learning_rate": 1.987282642053458e-06, "loss": -0.7738, "step": 1632 }, { "epoch": 0.14594823928722953, "grad_norm": 11.37846439130944, "learning_rate": 1.987183306834886e-06, "loss": 0.0455, "step": 1634 }, { "epoch": 0.14612687850300338, "grad_norm": 5.188978053562806, "learning_rate": 1.987083587672729e-06, "loss": -0.0245, "step": 1636 }, { "epoch": 0.14630551771877723, "grad_norm": 17.66741298802553, "learning_rate": 1.986983484605771e-06, "loss": -2.1333, "step": 1638 }, { "epoch": 0.14648415693455105, "grad_norm": 49.581850139756284, "learning_rate": 1.9868829976729444e-06, "loss": -0.2726, "step": 1640 }, { "epoch": 0.1466627961503249, "grad_norm": 17.103313887877565, "learning_rate": 1.9867821269133306e-06, "loss": -1.2452, "step": 1642 }, { "epoch": 0.14684143536609875, "grad_norm": 7.81796927697485, "learning_rate": 1.9866808723661626e-06, "loss": 0.1003, "step": 1644 }, { "epoch": 0.14702007458187258, "grad_norm": 5.904743628173295, "learning_rate": 1.98657923407082e-06, "loss": -0.1915, "step": 1646 }, { "epoch": 0.14719871379764643, "grad_norm": 7.060661414181599, "learning_rate": 1.9864772120668337e-06, "loss": 0.7746, "step": 1648 }, { "epoch": 0.14737735301342028, "grad_norm": 15.150659407060473, "learning_rate": 1.9863748063938822e-06, "loss": 0.4196, "step": 1650 }, { "epoch": 0.1475559922291941, "grad_norm": 13.407198643410457, "learning_rate": 1.986272017091794e-06, "loss": -0.5096, "step": 1652 }, { "epoch": 0.14773463144496796, "grad_norm": 5.275325906095977, "learning_rate": 1.986168844200548e-06, "loss": -0.1861, "step": 1654 }, { "epoch": 0.1479132706607418, "grad_norm": 24.485175182361623, "learning_rate": 1.986065287760269e-06, "loss": 0.6937, "step": 1656 }, { "epoch": 0.14809190987651563, "grad_norm": 6.730351142055021, "learning_rate": 1.985961347811235e-06, "loss": 0.1684, "step": 1658 }, { "epoch": 0.14827054909228948, "grad_norm": 13.255419504498315, "learning_rate": 1.98585702439387e-06, "loss": -0.1495, "step": 1660 }, { "epoch": 0.14844918830806333, "grad_norm": 5.0937428854676154, "learning_rate": 1.9857523175487493e-06, "loss": -0.297, "step": 1662 }, { "epoch": 0.14862782752383716, "grad_norm": 15.519738212029283, "learning_rate": 1.985647227316595e-06, "loss": -0.9355, "step": 1664 }, { "epoch": 0.148806466739611, "grad_norm": 10.371215129939422, "learning_rate": 1.9855417537382807e-06, "loss": -0.5514, "step": 1666 }, { "epoch": 0.14898510595538486, "grad_norm": 8.87814146508285, "learning_rate": 1.985435896854828e-06, "loss": 0.4095, "step": 1668 }, { "epoch": 0.14916374517115868, "grad_norm": 8.181966761000988, "learning_rate": 1.985329656707407e-06, "loss": -0.0476, "step": 1670 }, { "epoch": 0.14934238438693254, "grad_norm": 15.130145750282933, "learning_rate": 1.985223033337338e-06, "loss": 0.2225, "step": 1672 }, { "epoch": 0.1495210236027064, "grad_norm": 13.716346199924084, "learning_rate": 1.9851160267860904e-06, "loss": -1.5425, "step": 1674 }, { "epoch": 0.14969966281848024, "grad_norm": 11.391024861165711, "learning_rate": 1.985008637095281e-06, "loss": 0.3905, "step": 1676 }, { "epoch": 0.14987830203425406, "grad_norm": 20.074653153136918, "learning_rate": 1.984900864306677e-06, "loss": -0.3628, "step": 1678 }, { "epoch": 0.15005694125002791, "grad_norm": 4.037650771027541, "learning_rate": 1.984792708462195e-06, "loss": 0.784, "step": 1680 }, { "epoch": 0.15023558046580177, "grad_norm": 3.2125280297388707, "learning_rate": 1.984684169603899e-06, "loss": 0.2697, "step": 1682 }, { "epoch": 0.1504142196815756, "grad_norm": 51.64026333080613, "learning_rate": 1.984575247774003e-06, "loss": 0.5362, "step": 1684 }, { "epoch": 0.15059285889734944, "grad_norm": 9.67598152800394, "learning_rate": 1.98446594301487e-06, "loss": -0.3219, "step": 1686 }, { "epoch": 0.1507714981131233, "grad_norm": 19.878162060520005, "learning_rate": 1.9843562553690123e-06, "loss": 0.5016, "step": 1688 }, { "epoch": 0.15095013732889712, "grad_norm": 28.72251467265869, "learning_rate": 1.9842461848790896e-06, "loss": -0.229, "step": 1690 }, { "epoch": 0.15112877654467097, "grad_norm": 7.700179397672995, "learning_rate": 1.9841357315879115e-06, "loss": -0.4349, "step": 1692 }, { "epoch": 0.15130741576044482, "grad_norm": 5.203790845451804, "learning_rate": 1.9840248955384374e-06, "loss": -0.6368, "step": 1694 }, { "epoch": 0.15148605497621864, "grad_norm": 22.886802421868726, "learning_rate": 1.9839136767737733e-06, "loss": -0.2853, "step": 1696 }, { "epoch": 0.1516646941919925, "grad_norm": 12.258878643286273, "learning_rate": 1.983802075337176e-06, "loss": -0.1842, "step": 1698 }, { "epoch": 0.15184333340776635, "grad_norm": 5.731021576201294, "learning_rate": 1.9836900912720504e-06, "loss": 0.0958, "step": 1700 }, { "epoch": 0.15202197262354017, "grad_norm": 11.740981516579279, "learning_rate": 1.98357772462195e-06, "loss": -0.0326, "step": 1702 }, { "epoch": 0.15220061183931402, "grad_norm": 12.496388638478969, "learning_rate": 1.983464975430578e-06, "loss": 0.107, "step": 1704 }, { "epoch": 0.15237925105508787, "grad_norm": 13.309234648495417, "learning_rate": 1.983351843741785e-06, "loss": 0.5216, "step": 1706 }, { "epoch": 0.15255789027086172, "grad_norm": 11.873501010448317, "learning_rate": 1.9832383295995718e-06, "loss": 0.9809, "step": 1708 }, { "epoch": 0.15273652948663555, "grad_norm": 7.509882963972765, "learning_rate": 1.9831244330480866e-06, "loss": 0.1368, "step": 1710 }, { "epoch": 0.1529151687024094, "grad_norm": 6.137930475432272, "learning_rate": 1.983010154131628e-06, "loss": 0.6354, "step": 1712 }, { "epoch": 0.15309380791818325, "grad_norm": 22.139367217934367, "learning_rate": 1.982895492894641e-06, "loss": -0.9146, "step": 1714 }, { "epoch": 0.15327244713395707, "grad_norm": 44.343012238401215, "learning_rate": 1.9827804493817218e-06, "loss": -0.835, "step": 1716 }, { "epoch": 0.15345108634973093, "grad_norm": 18.27915569601026, "learning_rate": 1.9826650236376133e-06, "loss": -0.8882, "step": 1718 }, { "epoch": 0.15362972556550478, "grad_norm": 8.45741058978913, "learning_rate": 1.9825492157072085e-06, "loss": 0.3828, "step": 1720 }, { "epoch": 0.1538083647812786, "grad_norm": 11.852329820249214, "learning_rate": 1.9824330256355476e-06, "loss": -0.8234, "step": 1722 }, { "epoch": 0.15398700399705245, "grad_norm": 5.554424295748584, "learning_rate": 1.982316453467821e-06, "loss": -0.0446, "step": 1724 }, { "epoch": 0.1541656432128263, "grad_norm": 14.726656270424273, "learning_rate": 1.9821994992493663e-06, "loss": -0.2207, "step": 1726 }, { "epoch": 0.15434428242860013, "grad_norm": 32.167201958453546, "learning_rate": 1.9820821630256704e-06, "loss": -0.3188, "step": 1728 }, { "epoch": 0.15452292164437398, "grad_norm": 15.901937863279972, "learning_rate": 1.9819644448423688e-06, "loss": 0.1285, "step": 1730 }, { "epoch": 0.15470156086014783, "grad_norm": 6.721246016197734, "learning_rate": 1.9818463447452456e-06, "loss": -0.3678, "step": 1732 }, { "epoch": 0.15488020007592165, "grad_norm": 5.187794872313962, "learning_rate": 1.9817278627802335e-06, "loss": 0.348, "step": 1734 }, { "epoch": 0.1550588392916955, "grad_norm": 30.34849264558909, "learning_rate": 1.981608998993413e-06, "loss": 0.0755, "step": 1736 }, { "epoch": 0.15523747850746936, "grad_norm": 24.553607447213842, "learning_rate": 1.981489753431013e-06, "loss": -0.0887, "step": 1738 }, { "epoch": 0.1554161177232432, "grad_norm": 23.547528192665467, "learning_rate": 1.981370126139413e-06, "loss": -0.6398, "step": 1740 }, { "epoch": 0.15559475693901703, "grad_norm": 16.072087970103635, "learning_rate": 1.981250117165139e-06, "loss": -0.5794, "step": 1742 }, { "epoch": 0.15577339615479088, "grad_norm": 28.22924906776104, "learning_rate": 1.981129726554865e-06, "loss": -0.1706, "step": 1744 }, { "epoch": 0.15595203537056473, "grad_norm": 8.369111681323929, "learning_rate": 1.9810089543554153e-06, "loss": -0.2971, "step": 1746 }, { "epoch": 0.15613067458633856, "grad_norm": 5.638790265469978, "learning_rate": 1.9808878006137607e-06, "loss": 0.3384, "step": 1748 }, { "epoch": 0.1563093138021124, "grad_norm": 6.493813431607154, "learning_rate": 1.980766265377022e-06, "loss": 0.4253, "step": 1750 }, { "epoch": 0.15648795301788626, "grad_norm": 6.337173134464739, "learning_rate": 1.9806443486924677e-06, "loss": -0.67, "step": 1752 }, { "epoch": 0.15666659223366008, "grad_norm": 15.065116939907435, "learning_rate": 1.9805220506075146e-06, "loss": 0.1094, "step": 1754 }, { "epoch": 0.15684523144943394, "grad_norm": 7.629472515226443, "learning_rate": 1.9803993711697277e-06, "loss": -0.6432, "step": 1756 }, { "epoch": 0.1570238706652078, "grad_norm": 7.1531261305768545, "learning_rate": 1.9802763104268205e-06, "loss": -0.1251, "step": 1758 }, { "epoch": 0.1572025098809816, "grad_norm": 8.808265913383797, "learning_rate": 1.980152868426655e-06, "loss": -0.6406, "step": 1760 }, { "epoch": 0.15738114909675546, "grad_norm": 10.8264419546751, "learning_rate": 1.9800290452172414e-06, "loss": -0.7097, "step": 1762 }, { "epoch": 0.15755978831252931, "grad_norm": 6.755993478088664, "learning_rate": 1.979904840846738e-06, "loss": 0.1326, "step": 1764 }, { "epoch": 0.15773842752830314, "grad_norm": 14.76470524582505, "learning_rate": 1.9797802553634505e-06, "loss": 0.0543, "step": 1766 }, { "epoch": 0.157917066744077, "grad_norm": 9.062217971096484, "learning_rate": 1.9796552888158353e-06, "loss": 0.3829, "step": 1768 }, { "epoch": 0.15809570595985084, "grad_norm": 44.652388686717394, "learning_rate": 1.9795299412524945e-06, "loss": -1.2856, "step": 1770 }, { "epoch": 0.1582743451756247, "grad_norm": 14.122346611952455, "learning_rate": 1.979404212722179e-06, "loss": -0.4054, "step": 1772 }, { "epoch": 0.15845298439139852, "grad_norm": 11.148580185338659, "learning_rate": 1.979278103273789e-06, "loss": -0.1662, "step": 1774 }, { "epoch": 0.15863162360717237, "grad_norm": 6.595715848929095, "learning_rate": 1.979151612956372e-06, "loss": 0.2162, "step": 1776 }, { "epoch": 0.15881026282294622, "grad_norm": 12.893862046213961, "learning_rate": 1.979024741819123e-06, "loss": -0.2634, "step": 1778 }, { "epoch": 0.15898890203872004, "grad_norm": 5.595321690206429, "learning_rate": 1.978897489911386e-06, "loss": 0.5868, "step": 1780 }, { "epoch": 0.1591675412544939, "grad_norm": 33.47449216906154, "learning_rate": 1.978769857282654e-06, "loss": 0.1319, "step": 1782 }, { "epoch": 0.15934618047026775, "grad_norm": 14.450139049666182, "learning_rate": 1.9786418439825653e-06, "loss": 0.6933, "step": 1784 }, { "epoch": 0.15952481968604157, "grad_norm": 9.743374260490073, "learning_rate": 1.9785134500609085e-06, "loss": -0.1536, "step": 1786 }, { "epoch": 0.15970345890181542, "grad_norm": 27.549169244719863, "learning_rate": 1.9783846755676196e-06, "loss": -0.598, "step": 1788 }, { "epoch": 0.15988209811758927, "grad_norm": 19.61348702528481, "learning_rate": 1.978255520552783e-06, "loss": -1.1347, "step": 1790 }, { "epoch": 0.1600607373333631, "grad_norm": 10.107737955504605, "learning_rate": 1.9781259850666305e-06, "loss": 0.1216, "step": 1792 }, { "epoch": 0.16023937654913695, "grad_norm": 18.502580617361275, "learning_rate": 1.977996069159542e-06, "loss": 0.0338, "step": 1794 }, { "epoch": 0.1604180157649108, "grad_norm": 5.407230883571282, "learning_rate": 1.977865772882046e-06, "loss": -0.1691, "step": 1796 }, { "epoch": 0.16059665498068462, "grad_norm": 13.201219698879568, "learning_rate": 1.9777350962848178e-06, "loss": -0.6237, "step": 1798 }, { "epoch": 0.16077529419645847, "grad_norm": 4.539059558545734, "learning_rate": 1.9776040394186815e-06, "loss": 0.1623, "step": 1800 }, { "epoch": 0.16095393341223233, "grad_norm": 18.141302945450025, "learning_rate": 1.977472602334609e-06, "loss": -1.3376, "step": 1802 }, { "epoch": 0.16113257262800618, "grad_norm": 4.257859312027643, "learning_rate": 1.977340785083719e-06, "loss": 0.7349, "step": 1804 }, { "epoch": 0.16131121184378, "grad_norm": 6.726271282985429, "learning_rate": 1.9772085877172804e-06, "loss": -0.0884, "step": 1806 }, { "epoch": 0.16148985105955385, "grad_norm": 5.944620139753004, "learning_rate": 1.977076010286708e-06, "loss": -0.1473, "step": 1808 }, { "epoch": 0.1616684902753277, "grad_norm": 5.4334106853260264, "learning_rate": 1.9769430528435643e-06, "loss": 0.3521, "step": 1810 }, { "epoch": 0.16184712949110153, "grad_norm": 8.228802816982629, "learning_rate": 1.9768097154395606e-06, "loss": -0.1295, "step": 1812 }, { "epoch": 0.16202576870687538, "grad_norm": 25.577310091248968, "learning_rate": 1.9766759981265556e-06, "loss": 0.7588, "step": 1814 }, { "epoch": 0.16220440792264923, "grad_norm": 9.63466208862624, "learning_rate": 1.976541900956556e-06, "loss": -1.1718, "step": 1816 }, { "epoch": 0.16238304713842305, "grad_norm": 7.496732635863643, "learning_rate": 1.976407423981716e-06, "loss": 0.106, "step": 1818 }, { "epoch": 0.1625616863541969, "grad_norm": 11.122017449894678, "learning_rate": 1.976272567254337e-06, "loss": -0.3325, "step": 1820 }, { "epoch": 0.16274032556997076, "grad_norm": 4.843495220743915, "learning_rate": 1.9761373308268686e-06, "loss": 0.8805, "step": 1822 }, { "epoch": 0.16291896478574458, "grad_norm": 4.535812501305259, "learning_rate": 1.976001714751909e-06, "loss": -0.6803, "step": 1824 }, { "epoch": 0.16309760400151843, "grad_norm": 12.847098032491436, "learning_rate": 1.9758657190822023e-06, "loss": 0.5635, "step": 1826 }, { "epoch": 0.16327624321729228, "grad_norm": 5.362103626745298, "learning_rate": 1.9757293438706413e-06, "loss": 0.9732, "step": 1828 }, { "epoch": 0.1634548824330661, "grad_norm": 6.382736597812163, "learning_rate": 1.975592589170266e-06, "loss": 0.1077, "step": 1830 }, { "epoch": 0.16363352164883996, "grad_norm": 9.795081356191691, "learning_rate": 1.975455455034264e-06, "loss": -0.9321, "step": 1832 }, { "epoch": 0.1638121608646138, "grad_norm": 12.252024580584312, "learning_rate": 1.975317941515972e-06, "loss": -0.5981, "step": 1834 }, { "epoch": 0.16399080008038766, "grad_norm": 19.47214024203474, "learning_rate": 1.975180048668871e-06, "loss": -0.1088, "step": 1836 }, { "epoch": 0.16416943929616148, "grad_norm": 5.254095206000346, "learning_rate": 1.975041776546593e-06, "loss": 0.6897, "step": 1838 }, { "epoch": 0.16434807851193534, "grad_norm": 16.13717688392871, "learning_rate": 1.974903125202915e-06, "loss": -0.7068, "step": 1840 }, { "epoch": 0.1645267177277092, "grad_norm": 6.838468281530594, "learning_rate": 1.9747640946917626e-06, "loss": 0.1632, "step": 1842 }, { "epoch": 0.164705356943483, "grad_norm": 10.560648558380912, "learning_rate": 1.9746246850672093e-06, "loss": -0.9205, "step": 1844 }, { "epoch": 0.16488399615925686, "grad_norm": 13.660224388591853, "learning_rate": 1.974484896383475e-06, "loss": -0.1393, "step": 1846 }, { "epoch": 0.16506263537503071, "grad_norm": 10.251639630434962, "learning_rate": 1.974344728694927e-06, "loss": 0.0997, "step": 1848 }, { "epoch": 0.16524127459080454, "grad_norm": 6.237652904035356, "learning_rate": 1.9742041820560814e-06, "loss": 0.2139, "step": 1850 }, { "epoch": 0.1654199138065784, "grad_norm": 10.44466022061146, "learning_rate": 1.9740632565216003e-06, "loss": 0.2741, "step": 1852 }, { "epoch": 0.16559855302235224, "grad_norm": 4.363320810656043, "learning_rate": 1.9739219521462935e-06, "loss": -0.0787, "step": 1854 }, { "epoch": 0.16577719223812606, "grad_norm": 15.473441227563168, "learning_rate": 1.9737802689851184e-06, "loss": -1.0951, "step": 1856 }, { "epoch": 0.16595583145389992, "grad_norm": 9.344189687736982, "learning_rate": 1.9736382070931802e-06, "loss": -0.9264, "step": 1858 }, { "epoch": 0.16613447066967377, "grad_norm": 5.560606559216043, "learning_rate": 1.97349576652573e-06, "loss": -1.0227, "step": 1860 }, { "epoch": 0.1663131098854476, "grad_norm": 25.308127998203215, "learning_rate": 1.973352947338167e-06, "loss": 0.8917, "step": 1862 }, { "epoch": 0.16649174910122144, "grad_norm": 10.47877136657859, "learning_rate": 1.9732097495860385e-06, "loss": -0.1654, "step": 1864 }, { "epoch": 0.1666703883169953, "grad_norm": 36.206390457499865, "learning_rate": 1.9730661733250373e-06, "loss": -0.4059, "step": 1866 }, { "epoch": 0.16684902753276912, "grad_norm": 4.6851886391295405, "learning_rate": 1.9729222186110044e-06, "loss": 0.2723, "step": 1868 }, { "epoch": 0.16702766674854297, "grad_norm": 10.550534304426801, "learning_rate": 1.972777885499928e-06, "loss": 0.5469, "step": 1870 }, { "epoch": 0.16720630596431682, "grad_norm": 12.054276741827138, "learning_rate": 1.972633174047943e-06, "loss": -0.5769, "step": 1872 }, { "epoch": 0.16738494518009067, "grad_norm": 11.753272067678642, "learning_rate": 1.9724880843113326e-06, "loss": -0.1583, "step": 1874 }, { "epoch": 0.1675635843958645, "grad_norm": 9.966904615881017, "learning_rate": 1.972342616346526e-06, "loss": -0.2653, "step": 1876 }, { "epoch": 0.16774222361163835, "grad_norm": 13.805962443778537, "learning_rate": 1.9721967702100996e-06, "loss": -0.028, "step": 1878 }, { "epoch": 0.1679208628274122, "grad_norm": 25.929957016903007, "learning_rate": 1.9720505459587767e-06, "loss": -1.2396, "step": 1880 }, { "epoch": 0.16809950204318602, "grad_norm": 20.462950186184436, "learning_rate": 1.9719039436494288e-06, "loss": 0.0371, "step": 1882 }, { "epoch": 0.16827814125895987, "grad_norm": 11.746651030178972, "learning_rate": 1.9717569633390734e-06, "loss": -0.1319, "step": 1884 }, { "epoch": 0.16845678047473372, "grad_norm": 16.70569181926197, "learning_rate": 1.9716096050848756e-06, "loss": -0.2528, "step": 1886 }, { "epoch": 0.16863541969050755, "grad_norm": 20.950408509125037, "learning_rate": 1.971461868944147e-06, "loss": -0.2226, "step": 1888 }, { "epoch": 0.1688140589062814, "grad_norm": 11.536580825511287, "learning_rate": 1.9713137549743456e-06, "loss": 0.0658, "step": 1890 }, { "epoch": 0.16899269812205525, "grad_norm": 5.110152159215225, "learning_rate": 1.9711652632330782e-06, "loss": 0.37, "step": 1892 }, { "epoch": 0.16917133733782908, "grad_norm": 13.578654850673205, "learning_rate": 1.971016393778097e-06, "loss": -0.7437, "step": 1894 }, { "epoch": 0.16934997655360293, "grad_norm": 3.6588400771479535, "learning_rate": 1.970867146667302e-06, "loss": 0.4077, "step": 1896 }, { "epoch": 0.16952861576937678, "grad_norm": 23.41727512786367, "learning_rate": 1.970717521958739e-06, "loss": -0.0369, "step": 1898 }, { "epoch": 0.1697072549851506, "grad_norm": 0.9580397294353584, "learning_rate": 1.9705675197106016e-06, "loss": 0.6887, "step": 1900 }, { "epoch": 0.16988589420092445, "grad_norm": 7.028366921935384, "learning_rate": 1.97041713998123e-06, "loss": 0.5212, "step": 1902 }, { "epoch": 0.1700645334166983, "grad_norm": 9.083659968204929, "learning_rate": 1.970266382829111e-06, "loss": -0.134, "step": 1904 }, { "epoch": 0.17024317263247216, "grad_norm": 7.879226277121011, "learning_rate": 1.970115248312878e-06, "loss": -0.06, "step": 1906 }, { "epoch": 0.17042181184824598, "grad_norm": 5.652710370852478, "learning_rate": 1.969963736491312e-06, "loss": 0.8589, "step": 1908 }, { "epoch": 0.17060045106401983, "grad_norm": 41.302594311677325, "learning_rate": 1.9698118474233404e-06, "loss": -0.5259, "step": 1910 }, { "epoch": 0.17077909027979368, "grad_norm": 14.698215942075516, "learning_rate": 1.9696595811680367e-06, "loss": 0.6292, "step": 1912 }, { "epoch": 0.1709577294955675, "grad_norm": 25.111774056984952, "learning_rate": 1.969506937784622e-06, "loss": 0.1555, "step": 1914 }, { "epoch": 0.17113636871134136, "grad_norm": 10.131246264843544, "learning_rate": 1.9693539173324627e-06, "loss": 0.8472, "step": 1916 }, { "epoch": 0.1713150079271152, "grad_norm": 7.5793322673180255, "learning_rate": 1.969200519871074e-06, "loss": -0.7358, "step": 1918 }, { "epoch": 0.17149364714288903, "grad_norm": 5.202414707582706, "learning_rate": 1.9690467454601157e-06, "loss": -0.7448, "step": 1920 }, { "epoch": 0.17167228635866288, "grad_norm": 43.45548469860422, "learning_rate": 1.968892594159396e-06, "loss": -0.9018, "step": 1922 }, { "epoch": 0.17185092557443674, "grad_norm": 13.159430982352424, "learning_rate": 1.9687380660288674e-06, "loss": -0.8854, "step": 1924 }, { "epoch": 0.17202956479021056, "grad_norm": 7.7632384902622595, "learning_rate": 1.968583161128631e-06, "loss": 0.3657, "step": 1926 }, { "epoch": 0.1722082040059844, "grad_norm": 21.109674255373264, "learning_rate": 1.9684278795189336e-06, "loss": 0.4894, "step": 1928 }, { "epoch": 0.17238684322175826, "grad_norm": 10.811397077283551, "learning_rate": 1.968272221260169e-06, "loss": -1.339, "step": 1930 }, { "epoch": 0.1725654824375321, "grad_norm": 14.547681611968386, "learning_rate": 1.9681161864128764e-06, "loss": -0.5821, "step": 1932 }, { "epoch": 0.17274412165330594, "grad_norm": 9.437845590086978, "learning_rate": 1.9679597750377424e-06, "loss": 0.3894, "step": 1934 }, { "epoch": 0.1729227608690798, "grad_norm": 31.842480155924864, "learning_rate": 1.9678029871956e-06, "loss": 0.3764, "step": 1936 }, { "epoch": 0.17310140008485364, "grad_norm": 8.968767013267836, "learning_rate": 1.967645822947429e-06, "loss": -0.4415, "step": 1938 }, { "epoch": 0.17328003930062746, "grad_norm": 14.965877631670413, "learning_rate": 1.967488282354354e-06, "loss": 0.3682, "step": 1940 }, { "epoch": 0.17345867851640132, "grad_norm": 7.480334885796364, "learning_rate": 1.967330365477647e-06, "loss": 0.033, "step": 1942 }, { "epoch": 0.17363731773217517, "grad_norm": 8.132042752207497, "learning_rate": 1.9671720723787274e-06, "loss": -0.2987, "step": 1944 }, { "epoch": 0.173815956947949, "grad_norm": 13.324171224446172, "learning_rate": 1.967013403119159e-06, "loss": 0.0978, "step": 1946 }, { "epoch": 0.17399459616372284, "grad_norm": 8.415943328695626, "learning_rate": 1.966854357760653e-06, "loss": -0.2519, "step": 1948 }, { "epoch": 0.1741732353794967, "grad_norm": 13.09231188030188, "learning_rate": 1.9666949363650665e-06, "loss": 0.0213, "step": 1950 }, { "epoch": 0.17435187459527052, "grad_norm": 27.197187570018155, "learning_rate": 1.966535138994404e-06, "loss": -0.2317, "step": 1952 }, { "epoch": 0.17453051381104437, "grad_norm": 8.749418838819516, "learning_rate": 1.9663749657108136e-06, "loss": 0.5051, "step": 1954 }, { "epoch": 0.17470915302681822, "grad_norm": 17.287633561882373, "learning_rate": 1.966214416576592e-06, "loss": -0.8446, "step": 1956 }, { "epoch": 0.17488779224259204, "grad_norm": 7.325464998139356, "learning_rate": 1.966053491654182e-06, "loss": -0.0586, "step": 1958 }, { "epoch": 0.1750664314583659, "grad_norm": 18.04884915473967, "learning_rate": 1.9658921910061705e-06, "loss": -0.775, "step": 1960 }, { "epoch": 0.17524507067413975, "grad_norm": 11.402769294474382, "learning_rate": 1.9657305146952934e-06, "loss": 0.9192, "step": 1962 }, { "epoch": 0.17542370988991357, "grad_norm": 25.326447739036066, "learning_rate": 1.9655684627844296e-06, "loss": -0.5734, "step": 1964 }, { "epoch": 0.17560234910568742, "grad_norm": 16.913940711166614, "learning_rate": 1.965406035336607e-06, "loss": 0.4026, "step": 1966 }, { "epoch": 0.17578098832146127, "grad_norm": 7.568589966245261, "learning_rate": 1.965243232414998e-06, "loss": 0.5638, "step": 1968 }, { "epoch": 0.17595962753723512, "grad_norm": 11.364944205191215, "learning_rate": 1.96508005408292e-06, "loss": 0.3506, "step": 1970 }, { "epoch": 0.17613826675300895, "grad_norm": 13.870978795109414, "learning_rate": 1.9649165004038396e-06, "loss": -0.8679, "step": 1972 }, { "epoch": 0.1763169059687828, "grad_norm": 14.240354251527044, "learning_rate": 1.9647525714413662e-06, "loss": 0.4197, "step": 1974 }, { "epoch": 0.17649554518455665, "grad_norm": 12.464905569221509, "learning_rate": 1.964588267259257e-06, "loss": 0.378, "step": 1976 }, { "epoch": 0.17667418440033048, "grad_norm": 31.694125293577674, "learning_rate": 1.964423587921414e-06, "loss": -0.7766, "step": 1978 }, { "epoch": 0.17685282361610433, "grad_norm": 13.316605628824217, "learning_rate": 1.9642585334918867e-06, "loss": -0.4876, "step": 1980 }, { "epoch": 0.17703146283187818, "grad_norm": 22.66037677933114, "learning_rate": 1.9640931040348685e-06, "loss": -1.1886, "step": 1982 }, { "epoch": 0.177210102047652, "grad_norm": 12.812585151203326, "learning_rate": 1.963927299614699e-06, "loss": -0.7275, "step": 1984 }, { "epoch": 0.17738874126342585, "grad_norm": 7.465098745481211, "learning_rate": 1.963761120295866e-06, "loss": -0.647, "step": 1986 }, { "epoch": 0.1775673804791997, "grad_norm": 7.271940771837414, "learning_rate": 1.9635945661430005e-06, "loss": -1.1782, "step": 1988 }, { "epoch": 0.17774601969497353, "grad_norm": 9.236134214699005, "learning_rate": 1.96342763722088e-06, "loss": -0.5372, "step": 1990 }, { "epoch": 0.17792465891074738, "grad_norm": 11.908163242357766, "learning_rate": 1.9632603335944277e-06, "loss": -0.3728, "step": 1992 }, { "epoch": 0.17810329812652123, "grad_norm": 16.037760046666527, "learning_rate": 1.9630926553287128e-06, "loss": -0.875, "step": 1994 }, { "epoch": 0.17828193734229505, "grad_norm": 8.663412608064915, "learning_rate": 1.9629246024889506e-06, "loss": 0.7201, "step": 1996 }, { "epoch": 0.1784605765580689, "grad_norm": 5.758836010836481, "learning_rate": 1.9627561751405016e-06, "loss": 0.4285, "step": 1998 }, { "epoch": 0.17863921577384276, "grad_norm": 15.139408531175453, "learning_rate": 1.962587373348871e-06, "loss": -0.9175, "step": 2000 }, { "epoch": 0.1788178549896166, "grad_norm": 10.012031946183864, "learning_rate": 1.9624181971797123e-06, "loss": -0.7408, "step": 2002 }, { "epoch": 0.17899649420539043, "grad_norm": 9.897470524862772, "learning_rate": 1.9622486466988213e-06, "loss": -0.3061, "step": 2004 }, { "epoch": 0.17917513342116428, "grad_norm": 5.127751797910714, "learning_rate": 1.962078721972142e-06, "loss": 0.1192, "step": 2006 }, { "epoch": 0.17935377263693814, "grad_norm": 8.014090526856823, "learning_rate": 1.9619084230657623e-06, "loss": 0.1646, "step": 2008 }, { "epoch": 0.17953241185271196, "grad_norm": 14.641093992621633, "learning_rate": 1.9617377500459165e-06, "loss": 1.0753, "step": 2010 }, { "epoch": 0.1797110510684858, "grad_norm": 9.270751358412744, "learning_rate": 1.9615667029789847e-06, "loss": -0.2895, "step": 2012 }, { "epoch": 0.17988969028425966, "grad_norm": 12.278914600827472, "learning_rate": 1.961395281931491e-06, "loss": 1.2567, "step": 2014 }, { "epoch": 0.18006832950003349, "grad_norm": 12.773129296740482, "learning_rate": 1.9612234869701067e-06, "loss": -0.1078, "step": 2016 }, { "epoch": 0.18024696871580734, "grad_norm": 4.691847349293481, "learning_rate": 1.9610513181616473e-06, "loss": 0.6975, "step": 2018 }, { "epoch": 0.1804256079315812, "grad_norm": 8.472044807063442, "learning_rate": 1.960878775573074e-06, "loss": 0.6006, "step": 2020 }, { "epoch": 0.180604247147355, "grad_norm": 12.65589614185105, "learning_rate": 1.9607058592714943e-06, "loss": -1.0186, "step": 2022 }, { "epoch": 0.18078288636312886, "grad_norm": 5.370938563099003, "learning_rate": 1.9605325693241595e-06, "loss": -0.2047, "step": 2024 }, { "epoch": 0.18096152557890272, "grad_norm": 22.259583884408784, "learning_rate": 1.9603589057984673e-06, "loss": -0.5062, "step": 2026 }, { "epoch": 0.18114016479467654, "grad_norm": 13.204932383689531, "learning_rate": 1.96018486876196e-06, "loss": -0.2301, "step": 2028 }, { "epoch": 0.1813188040104504, "grad_norm": 30.197216547341434, "learning_rate": 1.9600104582823257e-06, "loss": 0.4433, "step": 2030 }, { "epoch": 0.18149744322622424, "grad_norm": 12.369130601853273, "learning_rate": 1.959835674427398e-06, "loss": -0.0888, "step": 2032 }, { "epoch": 0.18167608244199807, "grad_norm": 12.741570173005869, "learning_rate": 1.9596605172651546e-06, "loss": -0.4973, "step": 2034 }, { "epoch": 0.18185472165777192, "grad_norm": 4.78006904328732, "learning_rate": 1.95948498686372e-06, "loss": 0.5623, "step": 2036 }, { "epoch": 0.18203336087354577, "grad_norm": 8.902019365936242, "learning_rate": 1.959309083291362e-06, "loss": 0.1303, "step": 2038 }, { "epoch": 0.18221200008931962, "grad_norm": 16.322808644079917, "learning_rate": 1.959132806616495e-06, "loss": -1.6378, "step": 2040 }, { "epoch": 0.18239063930509344, "grad_norm": 13.537523467236412, "learning_rate": 1.958956156907678e-06, "loss": -1.5085, "step": 2042 }, { "epoch": 0.1825692785208673, "grad_norm": 11.946614725249772, "learning_rate": 1.9587791342336148e-06, "loss": -0.6513, "step": 2044 }, { "epoch": 0.18274791773664115, "grad_norm": 9.168378589183032, "learning_rate": 1.9586017386631547e-06, "loss": 1.1734, "step": 2046 }, { "epoch": 0.18292655695241497, "grad_norm": 9.641667981973315, "learning_rate": 1.958423970265292e-06, "loss": -0.296, "step": 2048 }, { "epoch": 0.18310519616818882, "grad_norm": 17.65744344003091, "learning_rate": 1.9582458291091663e-06, "loss": -1.3639, "step": 2050 }, { "epoch": 0.18328383538396267, "grad_norm": 6.465492792192907, "learning_rate": 1.9580673152640605e-06, "loss": 0.0228, "step": 2052 }, { "epoch": 0.1834624745997365, "grad_norm": 4.579009195292284, "learning_rate": 1.957888428799405e-06, "loss": 0.1959, "step": 2054 }, { "epoch": 0.18364111381551035, "grad_norm": 11.350766032579791, "learning_rate": 1.9577091697847727e-06, "loss": -0.0265, "step": 2056 }, { "epoch": 0.1838197530312842, "grad_norm": 13.18731660398988, "learning_rate": 1.957529538289883e-06, "loss": -0.2332, "step": 2058 }, { "epoch": 0.18399839224705802, "grad_norm": 7.469396331784738, "learning_rate": 1.9573495343846e-06, "loss": 0.3269, "step": 2060 }, { "epoch": 0.18417703146283187, "grad_norm": 6.841869635906994, "learning_rate": 1.957169158138932e-06, "loss": -0.1237, "step": 2062 }, { "epoch": 0.18435567067860573, "grad_norm": 18.31017676709287, "learning_rate": 1.956988409623033e-06, "loss": -0.4786, "step": 2064 }, { "epoch": 0.18453430989437955, "grad_norm": 6.757384323050177, "learning_rate": 1.9568072889071996e-06, "loss": 0.8328, "step": 2066 }, { "epoch": 0.1847129491101534, "grad_norm": 7.3225453817589905, "learning_rate": 1.956625796061877e-06, "loss": -0.0506, "step": 2068 }, { "epoch": 0.18489158832592725, "grad_norm": 10.563026550531177, "learning_rate": 1.956443931157651e-06, "loss": -0.892, "step": 2070 }, { "epoch": 0.1850702275417011, "grad_norm": 9.057022438778816, "learning_rate": 1.956261694265255e-06, "loss": -0.04, "step": 2072 }, { "epoch": 0.18524886675747493, "grad_norm": 8.395574093989987, "learning_rate": 1.9560790854555666e-06, "loss": 0.0928, "step": 2074 }, { "epoch": 0.18542750597324878, "grad_norm": 9.835399561466296, "learning_rate": 1.955896104799606e-06, "loss": 1.0844, "step": 2076 }, { "epoch": 0.18560614518902263, "grad_norm": 12.086453230148633, "learning_rate": 1.955712752368541e-06, "loss": 0.3963, "step": 2078 }, { "epoch": 0.18578478440479645, "grad_norm": 19.201281441205296, "learning_rate": 1.955529028233682e-06, "loss": -0.64, "step": 2080 }, { "epoch": 0.1859634236205703, "grad_norm": 9.14999436341826, "learning_rate": 1.955344932466484e-06, "loss": -0.0435, "step": 2082 }, { "epoch": 0.18614206283634416, "grad_norm": 3.288300269595966, "learning_rate": 1.955160465138548e-06, "loss": 0.1043, "step": 2084 }, { "epoch": 0.18632070205211798, "grad_norm": 6.1382875143152456, "learning_rate": 1.9549756263216182e-06, "loss": -0.123, "step": 2086 }, { "epoch": 0.18649934126789183, "grad_norm": 16.546016451109633, "learning_rate": 1.9547904160875835e-06, "loss": 0.5481, "step": 2088 }, { "epoch": 0.18667798048366568, "grad_norm": 10.226722896708148, "learning_rate": 1.9546048345084775e-06, "loss": 0.1618, "step": 2090 }, { "epoch": 0.1868566196994395, "grad_norm": 17.171254751650903, "learning_rate": 1.954418881656478e-06, "loss": -0.6188, "step": 2092 }, { "epoch": 0.18703525891521336, "grad_norm": 5.617309568986139, "learning_rate": 1.954232557603908e-06, "loss": 0.9418, "step": 2094 }, { "epoch": 0.1872138981309872, "grad_norm": 9.669044289921768, "learning_rate": 1.954045862423233e-06, "loss": 1.2598, "step": 2096 }, { "epoch": 0.18739253734676103, "grad_norm": 2.5290690315753324, "learning_rate": 1.953858796187065e-06, "loss": -0.0519, "step": 2098 }, { "epoch": 0.18757117656253489, "grad_norm": 14.91439449686134, "learning_rate": 1.95367135896816e-06, "loss": -0.1064, "step": 2100 }, { "epoch": 0.18774981577830874, "grad_norm": 9.626242817488555, "learning_rate": 1.9534835508394163e-06, "loss": 0.3294, "step": 2102 }, { "epoch": 0.1879284549940826, "grad_norm": 8.453432765018913, "learning_rate": 1.953295371873878e-06, "loss": -0.1643, "step": 2104 }, { "epoch": 0.1881070942098564, "grad_norm": 5.406723619122685, "learning_rate": 1.9531068221447337e-06, "loss": -0.0782, "step": 2106 }, { "epoch": 0.18828573342563026, "grad_norm": 10.041146503657346, "learning_rate": 1.9529179017253156e-06, "loss": -0.8217, "step": 2108 }, { "epoch": 0.18846437264140412, "grad_norm": 11.07704060443249, "learning_rate": 1.9527286106891e-06, "loss": -0.1499, "step": 2110 }, { "epoch": 0.18864301185717794, "grad_norm": 21.593617859807342, "learning_rate": 1.952538949109708e-06, "loss": 0.103, "step": 2112 }, { "epoch": 0.1888216510729518, "grad_norm": 23.947253438873215, "learning_rate": 1.9523489170609036e-06, "loss": -1.9872, "step": 2114 }, { "epoch": 0.18900029028872564, "grad_norm": 12.156673629949815, "learning_rate": 1.9521585146165966e-06, "loss": -1.0088, "step": 2116 }, { "epoch": 0.18917892950449947, "grad_norm": 12.387337131757588, "learning_rate": 1.9519677418508394e-06, "loss": -0.2456, "step": 2118 }, { "epoch": 0.18935756872027332, "grad_norm": 10.4237913460214, "learning_rate": 1.9517765988378288e-06, "loss": -0.6399, "step": 2120 }, { "epoch": 0.18953620793604717, "grad_norm": 6.878231606877003, "learning_rate": 1.951585085651906e-06, "loss": 0.4228, "step": 2122 }, { "epoch": 0.189714847151821, "grad_norm": 9.42963652766878, "learning_rate": 1.951393202367556e-06, "loss": -0.9697, "step": 2124 }, { "epoch": 0.18989348636759484, "grad_norm": 9.281538525344866, "learning_rate": 1.9512009490594075e-06, "loss": -0.0193, "step": 2126 }, { "epoch": 0.1900721255833687, "grad_norm": 15.352897822809126, "learning_rate": 1.9510083258022334e-06, "loss": -0.6272, "step": 2128 }, { "epoch": 0.19025076479914252, "grad_norm": 8.967022559986598, "learning_rate": 1.9508153326709498e-06, "loss": -0.834, "step": 2130 }, { "epoch": 0.19042940401491637, "grad_norm": 20.691746578402796, "learning_rate": 1.950621969740618e-06, "loss": -0.1451, "step": 2132 }, { "epoch": 0.19060804323069022, "grad_norm": 3.1485978835836317, "learning_rate": 1.9504282370864417e-06, "loss": 0.81, "step": 2134 }, { "epoch": 0.19078668244646407, "grad_norm": 13.755575263509634, "learning_rate": 1.9502341347837692e-06, "loss": -0.522, "step": 2136 }, { "epoch": 0.1909653216622379, "grad_norm": 7.4100393910461415, "learning_rate": 1.9500396629080926e-06, "loss": -1.002, "step": 2138 }, { "epoch": 0.19114396087801175, "grad_norm": 4.323076850343918, "learning_rate": 1.9498448215350476e-06, "loss": -0.721, "step": 2140 }, { "epoch": 0.1913226000937856, "grad_norm": 13.636570488487932, "learning_rate": 1.949649610740413e-06, "loss": 0.1687, "step": 2142 }, { "epoch": 0.19150123930955942, "grad_norm": 5.586895541750677, "learning_rate": 1.9494540306001123e-06, "loss": 0.4185, "step": 2144 }, { "epoch": 0.19167987852533327, "grad_norm": 5.327690934653626, "learning_rate": 1.949258081190212e-06, "loss": 0.4357, "step": 2146 }, { "epoch": 0.19185851774110713, "grad_norm": 5.013888715119974, "learning_rate": 1.9490617625869224e-06, "loss": -1.5296, "step": 2148 }, { "epoch": 0.19203715695688095, "grad_norm": 7.250114440014761, "learning_rate": 1.948865074866597e-06, "loss": 0.3692, "step": 2150 }, { "epoch": 0.1922157961726548, "grad_norm": 13.270390206229278, "learning_rate": 1.9486680181057337e-06, "loss": 0.8274, "step": 2152 }, { "epoch": 0.19239443538842865, "grad_norm": 11.427888427488817, "learning_rate": 1.9484705923809735e-06, "loss": 0.2969, "step": 2154 }, { "epoch": 0.19257307460420248, "grad_norm": 9.309267443099293, "learning_rate": 1.9482727977691008e-06, "loss": 0.1124, "step": 2156 }, { "epoch": 0.19275171381997633, "grad_norm": 1.9221560885176543, "learning_rate": 1.9480746343470435e-06, "loss": 0.5837, "step": 2158 }, { "epoch": 0.19293035303575018, "grad_norm": 6.834547012599419, "learning_rate": 1.947876102191873e-06, "loss": 0.9747, "step": 2160 }, { "epoch": 0.193108992251524, "grad_norm": 27.327728565234914, "learning_rate": 1.9476772013808033e-06, "loss": 0.3906, "step": 2162 }, { "epoch": 0.19328763146729785, "grad_norm": 7.40306492817742, "learning_rate": 1.947477931991194e-06, "loss": -0.6661, "step": 2164 }, { "epoch": 0.1934662706830717, "grad_norm": 12.446919770271604, "learning_rate": 1.9472782941005456e-06, "loss": -0.1196, "step": 2166 }, { "epoch": 0.19364490989884556, "grad_norm": 15.436764101102469, "learning_rate": 1.947078287786503e-06, "loss": -0.4677, "step": 2168 }, { "epoch": 0.19382354911461938, "grad_norm": 3.783545183700664, "learning_rate": 1.946877913126855e-06, "loss": 0.2979, "step": 2170 }, { "epoch": 0.19400218833039323, "grad_norm": 8.494867458196403, "learning_rate": 1.9466771701995325e-06, "loss": 0.5559, "step": 2172 }, { "epoch": 0.19418082754616708, "grad_norm": 16.702373148712375, "learning_rate": 1.94647605908261e-06, "loss": 0.0615, "step": 2174 }, { "epoch": 0.1943594667619409, "grad_norm": 10.782307537014232, "learning_rate": 1.9462745798543053e-06, "loss": -0.3113, "step": 2176 }, { "epoch": 0.19453810597771476, "grad_norm": 7.770718843845636, "learning_rate": 1.9460727325929795e-06, "loss": 0.3729, "step": 2178 }, { "epoch": 0.1947167451934886, "grad_norm": 11.81007965965441, "learning_rate": 1.945870517377137e-06, "loss": -0.4186, "step": 2180 }, { "epoch": 0.19489538440926243, "grad_norm": 12.61587164191029, "learning_rate": 1.945667934285425e-06, "loss": 0.4765, "step": 2182 }, { "epoch": 0.19507402362503629, "grad_norm": 5.965241204730554, "learning_rate": 1.9454649833966333e-06, "loss": -0.0998, "step": 2184 }, { "epoch": 0.19525266284081014, "grad_norm": 8.495875984996568, "learning_rate": 1.945261664789696e-06, "loss": -0.1742, "step": 2186 }, { "epoch": 0.19543130205658396, "grad_norm": 11.997646736826338, "learning_rate": 1.945057978543689e-06, "loss": -0.5804, "step": 2188 }, { "epoch": 0.1956099412723578, "grad_norm": 12.220000610351548, "learning_rate": 1.9448539247378313e-06, "loss": 0.1687, "step": 2190 }, { "epoch": 0.19578858048813166, "grad_norm": 9.846071387367488, "learning_rate": 1.944649503451486e-06, "loss": -0.2462, "step": 2192 }, { "epoch": 0.1959672197039055, "grad_norm": 8.778603586015839, "learning_rate": 1.9444447147641577e-06, "loss": -0.2243, "step": 2194 }, { "epoch": 0.19614585891967934, "grad_norm": 18.505014925489203, "learning_rate": 1.9442395587554946e-06, "loss": -0.4093, "step": 2196 }, { "epoch": 0.1963244981354532, "grad_norm": 11.233851392744066, "learning_rate": 1.9440340355052884e-06, "loss": -0.3197, "step": 2198 }, { "epoch": 0.19650313735122704, "grad_norm": 9.29221218087251, "learning_rate": 1.943828145093472e-06, "loss": 0.1825, "step": 2200 }, { "epoch": 0.19668177656700087, "grad_norm": 14.489476561638568, "learning_rate": 1.943621887600123e-06, "loss": -0.4806, "step": 2202 }, { "epoch": 0.19686041578277472, "grad_norm": 9.428745276429572, "learning_rate": 1.9434152631054597e-06, "loss": -0.4404, "step": 2204 }, { "epoch": 0.19703905499854857, "grad_norm": 12.628566729119973, "learning_rate": 1.943208271689845e-06, "loss": 0.5629, "step": 2206 }, { "epoch": 0.1972176942143224, "grad_norm": 14.942647517884842, "learning_rate": 1.943000913433783e-06, "loss": 0.0634, "step": 2208 }, { "epoch": 0.19739633343009624, "grad_norm": 6.893033875995751, "learning_rate": 1.942793188417922e-06, "loss": 0.5565, "step": 2210 }, { "epoch": 0.1975749726458701, "grad_norm": 14.65787612580797, "learning_rate": 1.942585096723052e-06, "loss": -0.3917, "step": 2212 }, { "epoch": 0.19775361186164392, "grad_norm": 10.682785689847702, "learning_rate": 1.9423766384301055e-06, "loss": 0.0978, "step": 2214 }, { "epoch": 0.19793225107741777, "grad_norm": 7.104684342869597, "learning_rate": 1.942167813620158e-06, "loss": 0.1773, "step": 2216 }, { "epoch": 0.19811089029319162, "grad_norm": 8.638358565635956, "learning_rate": 1.941958622374427e-06, "loss": 0.2775, "step": 2218 }, { "epoch": 0.19828952950896545, "grad_norm": 15.63170315201185, "learning_rate": 1.9417490647742737e-06, "loss": -1.3329, "step": 2220 }, { "epoch": 0.1984681687247393, "grad_norm": 19.912300088744615, "learning_rate": 1.9415391409012e-06, "loss": 0.1954, "step": 2222 }, { "epoch": 0.19864680794051315, "grad_norm": 8.36601435738831, "learning_rate": 1.941328850836852e-06, "loss": 0.3241, "step": 2224 }, { "epoch": 0.19882544715628697, "grad_norm": 11.648884181995582, "learning_rate": 1.9411181946630172e-06, "loss": -0.3406, "step": 2226 }, { "epoch": 0.19900408637206082, "grad_norm": 15.422385534862546, "learning_rate": 1.940907172461626e-06, "loss": -0.103, "step": 2228 }, { "epoch": 0.19918272558783467, "grad_norm": 11.216909613583274, "learning_rate": 1.9406957843147505e-06, "loss": -0.5823, "step": 2230 }, { "epoch": 0.1993613648036085, "grad_norm": 8.644654367486348, "learning_rate": 1.9404840303046055e-06, "loss": -0.2731, "step": 2232 }, { "epoch": 0.19954000401938235, "grad_norm": 9.567396319096442, "learning_rate": 1.940271910513549e-06, "loss": -0.7198, "step": 2234 }, { "epoch": 0.1997186432351562, "grad_norm": 18.04830469850934, "learning_rate": 1.9400594250240794e-06, "loss": -1.8288, "step": 2236 }, { "epoch": 0.19989728245093005, "grad_norm": 13.807128966687259, "learning_rate": 1.939846573918839e-06, "loss": -0.7117, "step": 2238 }, { "epoch": 0.20007592166670388, "grad_norm": 18.80576911227767, "learning_rate": 1.9396333572806112e-06, "loss": -0.6041, "step": 2240 }, { "epoch": 0.20025456088247773, "grad_norm": 15.643839228608394, "learning_rate": 1.939419775192322e-06, "loss": 0.2694, "step": 2242 }, { "epoch": 0.20043320009825158, "grad_norm": 7.535421522028376, "learning_rate": 1.9392058277370403e-06, "loss": -0.9879, "step": 2244 }, { "epoch": 0.2006118393140254, "grad_norm": 11.747885655845161, "learning_rate": 1.9389915149979754e-06, "loss": -0.2689, "step": 2246 }, { "epoch": 0.20079047852979925, "grad_norm": 9.883763927152236, "learning_rate": 1.9387768370584797e-06, "loss": -0.5326, "step": 2248 }, { "epoch": 0.2009691177455731, "grad_norm": 16.01548494603397, "learning_rate": 1.938561794002048e-06, "loss": -0.7701, "step": 2250 }, { "epoch": 0.20114775696134693, "grad_norm": 7.518595721024701, "learning_rate": 1.9383463859123165e-06, "loss": 0.6039, "step": 2252 }, { "epoch": 0.20132639617712078, "grad_norm": 8.158367948942486, "learning_rate": 1.9381306128730633e-06, "loss": -0.1914, "step": 2254 }, { "epoch": 0.20150503539289463, "grad_norm": 12.447995458803108, "learning_rate": 1.937914474968209e-06, "loss": -0.911, "step": 2256 }, { "epoch": 0.20168367460866846, "grad_norm": 11.243745888014292, "learning_rate": 1.937697972281815e-06, "loss": 0.3363, "step": 2258 }, { "epoch": 0.2018623138244423, "grad_norm": 9.865548934860787, "learning_rate": 1.9374811048980863e-06, "loss": -0.0551, "step": 2260 }, { "epoch": 0.20204095304021616, "grad_norm": 16.139042380663227, "learning_rate": 1.9372638729013683e-06, "loss": 0.2317, "step": 2262 }, { "epoch": 0.20221959225598998, "grad_norm": 20.12085448981704, "learning_rate": 1.937046276376149e-06, "loss": 0.9402, "step": 2264 }, { "epoch": 0.20239823147176383, "grad_norm": 7.5235949348305695, "learning_rate": 1.936828315407057e-06, "loss": -0.5622, "step": 2266 }, { "epoch": 0.20257687068753769, "grad_norm": 8.795380541629243, "learning_rate": 1.936609990078865e-06, "loss": -0.8208, "step": 2268 }, { "epoch": 0.20275550990331154, "grad_norm": 4.215696006499267, "learning_rate": 1.9363913004764844e-06, "loss": -0.3547, "step": 2270 }, { "epoch": 0.20293414911908536, "grad_norm": 27.33208121556459, "learning_rate": 1.9361722466849702e-06, "loss": 0.1836, "step": 2272 }, { "epoch": 0.2031127883348592, "grad_norm": 3.280751072326545, "learning_rate": 1.935952828789519e-06, "loss": 0.4729, "step": 2274 }, { "epoch": 0.20329142755063306, "grad_norm": 5.159759067262942, "learning_rate": 1.935733046875469e-06, "loss": 0.2195, "step": 2276 }, { "epoch": 0.2034700667664069, "grad_norm": 13.013395010796808, "learning_rate": 1.9355129010282986e-06, "loss": 0.2101, "step": 2278 }, { "epoch": 0.20364870598218074, "grad_norm": 32.262316340648304, "learning_rate": 1.9352923913336293e-06, "loss": -0.3407, "step": 2280 }, { "epoch": 0.2038273451979546, "grad_norm": 14.641802143012217, "learning_rate": 1.9350715178772236e-06, "loss": -0.0207, "step": 2282 }, { "epoch": 0.20400598441372841, "grad_norm": 12.642088551900812, "learning_rate": 1.9348502807449858e-06, "loss": -0.5094, "step": 2284 }, { "epoch": 0.20418462362950227, "grad_norm": 9.987521587630182, "learning_rate": 1.93462868002296e-06, "loss": -0.1735, "step": 2286 }, { "epoch": 0.20436326284527612, "grad_norm": 16.413784542208628, "learning_rate": 1.9344067157973345e-06, "loss": 0.5074, "step": 2288 }, { "epoch": 0.20454190206104994, "grad_norm": 14.405445531531264, "learning_rate": 1.934184388154437e-06, "loss": 0.6059, "step": 2290 }, { "epoch": 0.2047205412768238, "grad_norm": 16.595314081477657, "learning_rate": 1.933961697180737e-06, "loss": -0.1032, "step": 2292 }, { "epoch": 0.20489918049259764, "grad_norm": 8.558745737610986, "learning_rate": 1.933738642962845e-06, "loss": -0.3379, "step": 2294 }, { "epoch": 0.20507781970837147, "grad_norm": 15.337453758462281, "learning_rate": 1.9335152255875125e-06, "loss": 0.0014, "step": 2296 }, { "epoch": 0.20525645892414532, "grad_norm": 3.1666969164022567, "learning_rate": 1.9332914451416345e-06, "loss": 0.3072, "step": 2298 }, { "epoch": 0.20543509813991917, "grad_norm": 14.10512853110195, "learning_rate": 1.9330673017122446e-06, "loss": 0.08, "step": 2300 }, { "epoch": 0.20561373735569302, "grad_norm": 26.868160921229098, "learning_rate": 1.932842795386519e-06, "loss": 1.0571, "step": 2302 }, { "epoch": 0.20579237657146685, "grad_norm": 14.809903331701255, "learning_rate": 1.932617926251774e-06, "loss": 0.6897, "step": 2304 }, { "epoch": 0.2059710157872407, "grad_norm": 11.717067587564838, "learning_rate": 1.932392694395468e-06, "loss": 0.1145, "step": 2306 }, { "epoch": 0.20614965500301455, "grad_norm": 7.54681308002225, "learning_rate": 1.9321670999052e-06, "loss": -0.2811, "step": 2308 }, { "epoch": 0.20632829421878837, "grad_norm": 5.957912012245086, "learning_rate": 1.93194114286871e-06, "loss": -0.3226, "step": 2310 }, { "epoch": 0.20650693343456222, "grad_norm": 4.126844051741304, "learning_rate": 1.931714823373879e-06, "loss": 0.9939, "step": 2312 }, { "epoch": 0.20668557265033607, "grad_norm": 9.466001609042877, "learning_rate": 1.931488141508729e-06, "loss": -0.6783, "step": 2314 }, { "epoch": 0.2068642118661099, "grad_norm": 17.38966508188574, "learning_rate": 1.9312610973614233e-06, "loss": 0.675, "step": 2316 }, { "epoch": 0.20704285108188375, "grad_norm": 13.62388284285432, "learning_rate": 1.931033691020266e-06, "loss": -0.3676, "step": 2318 }, { "epoch": 0.2072214902976576, "grad_norm": 5.212422263757311, "learning_rate": 1.9308059225737012e-06, "loss": -0.2147, "step": 2320 }, { "epoch": 0.20740012951343142, "grad_norm": 5.123085036373785, "learning_rate": 1.930577792110315e-06, "loss": -1.2387, "step": 2322 }, { "epoch": 0.20757876872920528, "grad_norm": 8.25358596086446, "learning_rate": 1.930349299718834e-06, "loss": 0.0723, "step": 2324 }, { "epoch": 0.20775740794497913, "grad_norm": 7.042223966617087, "learning_rate": 1.9301204454881242e-06, "loss": 0.0891, "step": 2326 }, { "epoch": 0.20793604716075295, "grad_norm": 18.50419940196853, "learning_rate": 1.929891229507195e-06, "loss": -1.2236, "step": 2328 }, { "epoch": 0.2081146863765268, "grad_norm": 15.909274370200801, "learning_rate": 1.9296616518651944e-06, "loss": -0.5551, "step": 2330 }, { "epoch": 0.20829332559230065, "grad_norm": 6.959299838923947, "learning_rate": 1.929431712651411e-06, "loss": -0.2459, "step": 2332 }, { "epoch": 0.2084719648080745, "grad_norm": 10.541683835303356, "learning_rate": 1.929201411955276e-06, "loss": -0.9033, "step": 2334 }, { "epoch": 0.20865060402384833, "grad_norm": 13.163948905296238, "learning_rate": 1.928970749866359e-06, "loss": -0.5748, "step": 2336 }, { "epoch": 0.20882924323962218, "grad_norm": 10.274216863855237, "learning_rate": 1.928739726474371e-06, "loss": -0.9291, "step": 2338 }, { "epoch": 0.20900788245539603, "grad_norm": 12.952353386622569, "learning_rate": 1.9285083418691637e-06, "loss": -1.4101, "step": 2340 }, { "epoch": 0.20918652167116986, "grad_norm": 8.167195776582519, "learning_rate": 1.9282765961407295e-06, "loss": -0.6313, "step": 2342 }, { "epoch": 0.2093651608869437, "grad_norm": 17.02958617876318, "learning_rate": 1.9280444893792005e-06, "loss": -1.0058, "step": 2344 }, { "epoch": 0.20954380010271756, "grad_norm": 18.176099002568506, "learning_rate": 1.9278120216748497e-06, "loss": -0.0167, "step": 2346 }, { "epoch": 0.20972243931849138, "grad_norm": 3.673761410794444, "learning_rate": 1.92757919311809e-06, "loss": 0.875, "step": 2348 }, { "epoch": 0.20990107853426523, "grad_norm": 10.8220405603402, "learning_rate": 1.9273460037994757e-06, "loss": -0.3943, "step": 2350 }, { "epoch": 0.21007971775003909, "grad_norm": 13.675555866109073, "learning_rate": 1.9271124538097003e-06, "loss": -0.5174, "step": 2352 }, { "epoch": 0.2102583569658129, "grad_norm": 8.868409624618133, "learning_rate": 1.926878543239598e-06, "loss": -0.7058, "step": 2354 }, { "epoch": 0.21043699618158676, "grad_norm": 4.08853422840519, "learning_rate": 1.9266442721801436e-06, "loss": 0.212, "step": 2356 }, { "epoch": 0.2106156353973606, "grad_norm": 10.017328507259972, "learning_rate": 1.9264096407224515e-06, "loss": 0.7344, "step": 2358 }, { "epoch": 0.21079427461313444, "grad_norm": 11.100683490226865, "learning_rate": 1.9261746489577764e-06, "loss": 0.0863, "step": 2360 }, { "epoch": 0.2109729138289083, "grad_norm": 6.88064925701042, "learning_rate": 1.9259392969775136e-06, "loss": 0.7878, "step": 2362 }, { "epoch": 0.21115155304468214, "grad_norm": 10.221487069531403, "learning_rate": 1.925703584873198e-06, "loss": 0.5254, "step": 2364 }, { "epoch": 0.211330192260456, "grad_norm": 7.7613238391137696, "learning_rate": 1.9254675127365044e-06, "loss": -0.1229, "step": 2366 }, { "epoch": 0.2115088314762298, "grad_norm": 7.16990777588167, "learning_rate": 1.9252310806592486e-06, "loss": -0.1505, "step": 2368 }, { "epoch": 0.21168747069200367, "grad_norm": 6.926461725859381, "learning_rate": 1.9249942887333855e-06, "loss": 0.6656, "step": 2370 }, { "epoch": 0.21186610990777752, "grad_norm": 23.953489376713208, "learning_rate": 1.92475713705101e-06, "loss": -1.4039, "step": 2372 }, { "epoch": 0.21204474912355134, "grad_norm": 7.250675564564872, "learning_rate": 1.9245196257043573e-06, "loss": -0.1698, "step": 2374 }, { "epoch": 0.2122233883393252, "grad_norm": 22.82064949682702, "learning_rate": 1.924281754785802e-06, "loss": 0.196, "step": 2376 }, { "epoch": 0.21240202755509904, "grad_norm": 5.620183281132866, "learning_rate": 1.924043524387859e-06, "loss": -0.7774, "step": 2378 }, { "epoch": 0.21258066677087287, "grad_norm": 12.389193939269969, "learning_rate": 1.923804934603183e-06, "loss": -0.7173, "step": 2380 }, { "epoch": 0.21275930598664672, "grad_norm": 18.89691950807145, "learning_rate": 1.923565985524568e-06, "loss": -0.2265, "step": 2382 }, { "epoch": 0.21293794520242057, "grad_norm": 9.52188280416381, "learning_rate": 1.9233266772449488e-06, "loss": 0.0398, "step": 2384 }, { "epoch": 0.2131165844181944, "grad_norm": 22.47504787758214, "learning_rate": 1.9230870098573986e-06, "loss": -0.2815, "step": 2386 }, { "epoch": 0.21329522363396825, "grad_norm": 11.272585090456493, "learning_rate": 1.922846983455131e-06, "loss": -0.9487, "step": 2388 }, { "epoch": 0.2134738628497421, "grad_norm": 19.4980099958644, "learning_rate": 1.922606598131499e-06, "loss": -0.5516, "step": 2390 }, { "epoch": 0.21365250206551592, "grad_norm": 5.631889024182995, "learning_rate": 1.9223658539799952e-06, "loss": -0.9456, "step": 2392 }, { "epoch": 0.21383114128128977, "grad_norm": 29.022532161182543, "learning_rate": 1.9221247510942518e-06, "loss": 0.1351, "step": 2394 }, { "epoch": 0.21400978049706362, "grad_norm": 11.484540416538294, "learning_rate": 1.9218832895680406e-06, "loss": -0.6728, "step": 2396 }, { "epoch": 0.21418841971283747, "grad_norm": 10.97513544437885, "learning_rate": 1.9216414694952732e-06, "loss": 0.3418, "step": 2398 }, { "epoch": 0.2143670589286113, "grad_norm": 11.386096279083649, "learning_rate": 1.9213992909700002e-06, "loss": -0.5595, "step": 2400 }, { "epoch": 0.21454569814438515, "grad_norm": 11.13942683171626, "learning_rate": 1.921156754086411e-06, "loss": 0.0164, "step": 2402 }, { "epoch": 0.214724337360159, "grad_norm": 6.706828768055991, "learning_rate": 1.9209138589388354e-06, "loss": 0.5699, "step": 2404 }, { "epoch": 0.21490297657593282, "grad_norm": 3.6937244704860146, "learning_rate": 1.920670605621743e-06, "loss": 0.2305, "step": 2406 }, { "epoch": 0.21508161579170668, "grad_norm": 25.85573874547426, "learning_rate": 1.920426994229741e-06, "loss": -1.4667, "step": 2408 }, { "epoch": 0.21526025500748053, "grad_norm": 16.760574874305103, "learning_rate": 1.920183024857577e-06, "loss": -0.5839, "step": 2410 }, { "epoch": 0.21543889422325435, "grad_norm": 5.234612940604057, "learning_rate": 1.9199386976001378e-06, "loss": 0.1716, "step": 2412 }, { "epoch": 0.2156175334390282, "grad_norm": 14.515129877521872, "learning_rate": 1.919694012552449e-06, "loss": -0.182, "step": 2414 }, { "epoch": 0.21579617265480205, "grad_norm": 8.201953041282684, "learning_rate": 1.9194489698096757e-06, "loss": -0.03, "step": 2416 }, { "epoch": 0.21597481187057588, "grad_norm": 6.867928769808129, "learning_rate": 1.9192035694671216e-06, "loss": 0.7976, "step": 2418 }, { "epoch": 0.21615345108634973, "grad_norm": 9.927133203136366, "learning_rate": 1.9189578116202307e-06, "loss": -0.6097, "step": 2420 }, { "epoch": 0.21633209030212358, "grad_norm": 10.566513833786873, "learning_rate": 1.918711696364584e-06, "loss": -0.3898, "step": 2422 }, { "epoch": 0.2165107295178974, "grad_norm": 13.406032035851961, "learning_rate": 1.918465223795904e-06, "loss": 0.0269, "step": 2424 }, { "epoch": 0.21668936873367126, "grad_norm": 23.669845725726383, "learning_rate": 1.9182183940100498e-06, "loss": 0.2452, "step": 2426 }, { "epoch": 0.2168680079494451, "grad_norm": 11.547433305387948, "learning_rate": 1.9179712071030208e-06, "loss": 0.1669, "step": 2428 }, { "epoch": 0.21704664716521893, "grad_norm": 7.655756222119376, "learning_rate": 1.9177236631709556e-06, "loss": 1.0142, "step": 2430 }, { "epoch": 0.21722528638099278, "grad_norm": 11.33514599235355, "learning_rate": 1.9174757623101303e-06, "loss": -0.5333, "step": 2432 }, { "epoch": 0.21740392559676663, "grad_norm": 9.668175897402026, "learning_rate": 1.917227504616961e-06, "loss": 0.3627, "step": 2434 }, { "epoch": 0.21758256481254049, "grad_norm": 5.2141196901947655, "learning_rate": 1.916978890188002e-06, "loss": -1.3147, "step": 2436 }, { "epoch": 0.2177612040283143, "grad_norm": 6.615831184327596, "learning_rate": 1.9167299191199462e-06, "loss": -0.1527, "step": 2438 }, { "epoch": 0.21793984324408816, "grad_norm": 33.00273907015617, "learning_rate": 1.916480591509626e-06, "loss": -0.313, "step": 2440 }, { "epoch": 0.218118482459862, "grad_norm": 15.665188063519368, "learning_rate": 1.916230907454012e-06, "loss": -0.1094, "step": 2442 }, { "epoch": 0.21829712167563584, "grad_norm": 13.460547549488142, "learning_rate": 1.915980867050213e-06, "loss": 0.0445, "step": 2444 }, { "epoch": 0.2184757608914097, "grad_norm": 30.781034858067944, "learning_rate": 1.915730470395477e-06, "loss": -0.6278, "step": 2446 }, { "epoch": 0.21865440010718354, "grad_norm": 13.181865048698889, "learning_rate": 1.9154797175871905e-06, "loss": -0.4005, "step": 2448 }, { "epoch": 0.21883303932295736, "grad_norm": 15.562353907133053, "learning_rate": 1.9152286087228786e-06, "loss": 0.4829, "step": 2450 }, { "epoch": 0.2190116785387312, "grad_norm": 7.962148048371711, "learning_rate": 1.9149771439002034e-06, "loss": -1.3248, "step": 2452 }, { "epoch": 0.21919031775450507, "grad_norm": 21.77858145305662, "learning_rate": 1.9147253232169685e-06, "loss": -0.6419, "step": 2454 }, { "epoch": 0.2193689569702789, "grad_norm": 12.889688057805706, "learning_rate": 1.9144731467711132e-06, "loss": -0.7391, "step": 2456 }, { "epoch": 0.21954759618605274, "grad_norm": 12.866455798488298, "learning_rate": 1.914220614660715e-06, "loss": 0.4028, "step": 2458 }, { "epoch": 0.2197262354018266, "grad_norm": 18.886273181791733, "learning_rate": 1.9139677269839933e-06, "loss": 0.2803, "step": 2460 }, { "epoch": 0.21990487461760042, "grad_norm": 8.472486058975326, "learning_rate": 1.913714483839301e-06, "loss": -0.1742, "step": 2462 }, { "epoch": 0.22008351383337427, "grad_norm": 7.341555003581017, "learning_rate": 1.913460885325133e-06, "loss": -0.8056, "step": 2464 }, { "epoch": 0.22026215304914812, "grad_norm": 7.163757650530698, "learning_rate": 1.91320693154012e-06, "loss": -0.8086, "step": 2466 }, { "epoch": 0.22044079226492197, "grad_norm": 8.41101211608297, "learning_rate": 1.9129526225830323e-06, "loss": 1.2553, "step": 2468 }, { "epoch": 0.2206194314806958, "grad_norm": 11.443859896087904, "learning_rate": 1.9126979585527774e-06, "loss": 0.0674, "step": 2470 }, { "epoch": 0.22079807069646964, "grad_norm": 9.721993938369604, "learning_rate": 1.9124429395484027e-06, "loss": -0.8809, "step": 2472 }, { "epoch": 0.2209767099122435, "grad_norm": 22.365347543732764, "learning_rate": 1.9121875656690906e-06, "loss": 0.4032, "step": 2474 }, { "epoch": 0.22115534912801732, "grad_norm": 5.897796879015337, "learning_rate": 1.911931837014164e-06, "loss": -0.548, "step": 2476 }, { "epoch": 0.22133398834379117, "grad_norm": 8.118965137852728, "learning_rate": 1.9116757536830832e-06, "loss": 0.4134, "step": 2478 }, { "epoch": 0.22151262755956502, "grad_norm": 3.163351247460062, "learning_rate": 1.9114193157754463e-06, "loss": -0.2392, "step": 2480 }, { "epoch": 0.22169126677533885, "grad_norm": 16.06253039880392, "learning_rate": 1.911162523390989e-06, "loss": -0.946, "step": 2482 }, { "epoch": 0.2218699059911127, "grad_norm": 8.367154674305358, "learning_rate": 1.910905376629585e-06, "loss": 0.4112, "step": 2484 }, { "epoch": 0.22204854520688655, "grad_norm": 29.06162213055863, "learning_rate": 1.9106478755912465e-06, "loss": 0.8716, "step": 2486 }, { "epoch": 0.22222718442266037, "grad_norm": 4.522990519258767, "learning_rate": 1.910390020376122e-06, "loss": 0.5312, "step": 2488 }, { "epoch": 0.22240582363843422, "grad_norm": 4.308555678945582, "learning_rate": 1.9101318110845e-06, "loss": -0.0531, "step": 2490 }, { "epoch": 0.22258446285420808, "grad_norm": 15.16378678587575, "learning_rate": 1.9098732478168046e-06, "loss": -0.0029, "step": 2492 }, { "epoch": 0.2227631020699819, "grad_norm": 12.906066154297427, "learning_rate": 1.9096143306735987e-06, "loss": 0.1965, "step": 2494 }, { "epoch": 0.22294174128575575, "grad_norm": 8.423136827143974, "learning_rate": 1.909355059755582e-06, "loss": 0.6873, "step": 2496 }, { "epoch": 0.2231203805015296, "grad_norm": 10.365534129533671, "learning_rate": 1.9090954351635927e-06, "loss": -0.5151, "step": 2498 }, { "epoch": 0.22329901971730345, "grad_norm": 10.089504990925674, "learning_rate": 1.9088354569986067e-06, "loss": -0.2943, "step": 2500 }, { "epoch": 0.22347765893307728, "grad_norm": 37.73166398128865, "learning_rate": 1.908575125361736e-06, "loss": -0.9665, "step": 2502 }, { "epoch": 0.22365629814885113, "grad_norm": 3.0850586231170465, "learning_rate": 1.908314440354231e-06, "loss": 1.2091, "step": 2504 }, { "epoch": 0.22383493736462498, "grad_norm": 12.265491888210828, "learning_rate": 1.9080534020774804e-06, "loss": 0.3393, "step": 2506 }, { "epoch": 0.2240135765803988, "grad_norm": 6.623651043292554, "learning_rate": 1.9077920106330083e-06, "loss": -0.1585, "step": 2508 }, { "epoch": 0.22419221579617266, "grad_norm": 7.229923468853492, "learning_rate": 1.907530266122478e-06, "loss": 0.3614, "step": 2510 }, { "epoch": 0.2243708550119465, "grad_norm": 10.019152039607615, "learning_rate": 1.907268168647689e-06, "loss": -0.3783, "step": 2512 }, { "epoch": 0.22454949422772033, "grad_norm": 10.32327133375237, "learning_rate": 1.9070057183105783e-06, "loss": -1.3064, "step": 2514 }, { "epoch": 0.22472813344349418, "grad_norm": 4.795949641633854, "learning_rate": 1.9067429152132207e-06, "loss": 0.1792, "step": 2516 }, { "epoch": 0.22490677265926803, "grad_norm": 3.829818911342514, "learning_rate": 1.9064797594578277e-06, "loss": -0.1671, "step": 2518 }, { "epoch": 0.22508541187504186, "grad_norm": 5.684364880033254, "learning_rate": 1.9062162511467476e-06, "loss": 0.0599, "step": 2520 }, { "epoch": 0.2252640510908157, "grad_norm": 8.246780229597091, "learning_rate": 1.9059523903824669e-06, "loss": -0.9178, "step": 2522 }, { "epoch": 0.22544269030658956, "grad_norm": 7.3235002251720935, "learning_rate": 1.905688177267608e-06, "loss": -0.8057, "step": 2524 }, { "epoch": 0.22562132952236338, "grad_norm": 8.581968575527858, "learning_rate": 1.9054236119049312e-06, "loss": 0.0791, "step": 2526 }, { "epoch": 0.22579996873813724, "grad_norm": 19.296829137091247, "learning_rate": 1.9051586943973333e-06, "loss": -0.3052, "step": 2528 }, { "epoch": 0.2259786079539111, "grad_norm": 13.508586837081106, "learning_rate": 1.9048934248478483e-06, "loss": -0.2362, "step": 2530 }, { "epoch": 0.22615724716968494, "grad_norm": 6.017188248080809, "learning_rate": 1.9046278033596467e-06, "loss": -0.772, "step": 2532 }, { "epoch": 0.22633588638545876, "grad_norm": 26.018129337045327, "learning_rate": 1.9043618300360374e-06, "loss": -0.9909, "step": 2534 }, { "epoch": 0.2265145256012326, "grad_norm": 11.40212494972497, "learning_rate": 1.9040955049804634e-06, "loss": 0.9178, "step": 2536 }, { "epoch": 0.22669316481700647, "grad_norm": 13.94028051720759, "learning_rate": 1.9038288282965071e-06, "loss": -0.8194, "step": 2538 }, { "epoch": 0.2268718040327803, "grad_norm": 3.037128529380917, "learning_rate": 1.9035618000878864e-06, "loss": 0.0787, "step": 2540 }, { "epoch": 0.22705044324855414, "grad_norm": 11.245665329739102, "learning_rate": 1.9032944204584559e-06, "loss": -0.7121, "step": 2542 }, { "epoch": 0.227229082464328, "grad_norm": 8.710698392177989, "learning_rate": 1.9030266895122074e-06, "loss": -0.0047, "step": 2544 }, { "epoch": 0.22740772168010182, "grad_norm": 15.944680425217204, "learning_rate": 1.902758607353269e-06, "loss": -1.1256, "step": 2546 }, { "epoch": 0.22758636089587567, "grad_norm": 11.499431430195731, "learning_rate": 1.902490174085905e-06, "loss": -0.4231, "step": 2548 }, { "epoch": 0.22776500011164952, "grad_norm": 16.507906782149124, "learning_rate": 1.9022213898145174e-06, "loss": -0.858, "step": 2550 }, { "epoch": 0.22794363932742334, "grad_norm": 10.937386997184102, "learning_rate": 1.901952254643644e-06, "loss": 0.5012, "step": 2552 }, { "epoch": 0.2281222785431972, "grad_norm": 10.887334383943251, "learning_rate": 1.9016827686779587e-06, "loss": 0.2953, "step": 2554 }, { "epoch": 0.22830091775897104, "grad_norm": 10.365049807276577, "learning_rate": 1.9014129320222723e-06, "loss": -0.569, "step": 2556 }, { "epoch": 0.22847955697474487, "grad_norm": 9.198484743805974, "learning_rate": 1.9011427447815318e-06, "loss": -0.4233, "step": 2558 }, { "epoch": 0.22865819619051872, "grad_norm": 11.38401420496941, "learning_rate": 1.900872207060821e-06, "loss": -1.4364, "step": 2560 }, { "epoch": 0.22883683540629257, "grad_norm": 42.067714781684124, "learning_rate": 1.9006013189653598e-06, "loss": -0.7101, "step": 2562 }, { "epoch": 0.22901547462206642, "grad_norm": 7.0506903560623275, "learning_rate": 1.900330080600504e-06, "loss": -0.1361, "step": 2564 }, { "epoch": 0.22919411383784025, "grad_norm": 5.427845067353647, "learning_rate": 1.9000584920717458e-06, "loss": -0.2715, "step": 2566 }, { "epoch": 0.2293727530536141, "grad_norm": 11.896663781533853, "learning_rate": 1.8997865534847138e-06, "loss": -0.3023, "step": 2568 }, { "epoch": 0.22955139226938795, "grad_norm": 8.526688358775392, "learning_rate": 1.8995142649451728e-06, "loss": -0.3119, "step": 2570 }, { "epoch": 0.22973003148516177, "grad_norm": 3.595968606525398, "learning_rate": 1.8992416265590233e-06, "loss": 0.5785, "step": 2572 }, { "epoch": 0.22990867070093562, "grad_norm": 6.193950027022809, "learning_rate": 1.898968638432302e-06, "loss": -0.2478, "step": 2574 }, { "epoch": 0.23008730991670948, "grad_norm": 23.966773875513574, "learning_rate": 1.8986953006711822e-06, "loss": -0.9396, "step": 2576 }, { "epoch": 0.2302659491324833, "grad_norm": 27.642709432427658, "learning_rate": 1.8984216133819723e-06, "loss": -1.2774, "step": 2578 }, { "epoch": 0.23044458834825715, "grad_norm": 4.2794062967383075, "learning_rate": 1.8981475766711172e-06, "loss": 1.0877, "step": 2580 }, { "epoch": 0.230623227564031, "grad_norm": 8.732420782892461, "learning_rate": 1.8978731906451975e-06, "loss": -0.3469, "step": 2582 }, { "epoch": 0.23080186677980483, "grad_norm": 11.586233823334672, "learning_rate": 1.8975984554109296e-06, "loss": 0.2643, "step": 2584 }, { "epoch": 0.23098050599557868, "grad_norm": 11.309295706301016, "learning_rate": 1.8973233710751661e-06, "loss": -0.8751, "step": 2586 }, { "epoch": 0.23115914521135253, "grad_norm": 12.977752428886474, "learning_rate": 1.897047937744895e-06, "loss": -0.5765, "step": 2588 }, { "epoch": 0.23133778442712635, "grad_norm": 8.25582033089005, "learning_rate": 1.8967721555272398e-06, "loss": 0.3445, "step": 2590 }, { "epoch": 0.2315164236429002, "grad_norm": 10.052649183933276, "learning_rate": 1.8964960245294605e-06, "loss": -0.9813, "step": 2592 }, { "epoch": 0.23169506285867406, "grad_norm": 7.453513721109761, "learning_rate": 1.8962195448589519e-06, "loss": 0.289, "step": 2594 }, { "epoch": 0.23187370207444788, "grad_norm": 6.740337378776847, "learning_rate": 1.8959427166232454e-06, "loss": -0.4429, "step": 2596 }, { "epoch": 0.23205234129022173, "grad_norm": 23.566392976645524, "learning_rate": 1.8956655399300065e-06, "loss": -0.8349, "step": 2598 }, { "epoch": 0.23223098050599558, "grad_norm": 15.524873034561598, "learning_rate": 1.8953880148870375e-06, "loss": -0.7155, "step": 2600 }, { "epoch": 0.23240961972176943, "grad_norm": 7.430178332235918, "learning_rate": 1.895110141602276e-06, "loss": 0.1102, "step": 2602 }, { "epoch": 0.23258825893754326, "grad_norm": 17.372072700518768, "learning_rate": 1.8948319201837943e-06, "loss": -0.7177, "step": 2604 }, { "epoch": 0.2327668981533171, "grad_norm": 5.182853732147753, "learning_rate": 1.8945533507398011e-06, "loss": -0.889, "step": 2606 }, { "epoch": 0.23294553736909096, "grad_norm": 4.754332724682026, "learning_rate": 1.8942744333786395e-06, "loss": -0.3922, "step": 2608 }, { "epoch": 0.23312417658486478, "grad_norm": 16.05034726916952, "learning_rate": 1.893995168208789e-06, "loss": 0.1113, "step": 2610 }, { "epoch": 0.23330281580063864, "grad_norm": 4.03190051603958, "learning_rate": 1.8937155553388627e-06, "loss": 0.2277, "step": 2612 }, { "epoch": 0.2334814550164125, "grad_norm": 8.31427074874857, "learning_rate": 1.893435594877611e-06, "loss": 0.5003, "step": 2614 }, { "epoch": 0.2336600942321863, "grad_norm": 2.275046077722761, "learning_rate": 1.893155286933918e-06, "loss": 0.0485, "step": 2616 }, { "epoch": 0.23383873344796016, "grad_norm": 12.177969781455646, "learning_rate": 1.8928746316168031e-06, "loss": 0.0098, "step": 2618 }, { "epoch": 0.234017372663734, "grad_norm": 8.981973609098992, "learning_rate": 1.892593629035422e-06, "loss": -0.0523, "step": 2620 }, { "epoch": 0.23419601187950784, "grad_norm": 3.8510192116511, "learning_rate": 1.8923122792990639e-06, "loss": 0.7916, "step": 2622 }, { "epoch": 0.2343746510952817, "grad_norm": 11.470920450914605, "learning_rate": 1.8920305825171539e-06, "loss": 0.7657, "step": 2624 }, { "epoch": 0.23455329031105554, "grad_norm": 15.47009542760039, "learning_rate": 1.8917485387992515e-06, "loss": -0.73, "step": 2626 }, { "epoch": 0.23473192952682936, "grad_norm": 9.197791323702392, "learning_rate": 1.8914661482550522e-06, "loss": 0.9209, "step": 2628 }, { "epoch": 0.23491056874260322, "grad_norm": 11.763916864303079, "learning_rate": 1.8911834109943853e-06, "loss": -1.2778, "step": 2630 }, { "epoch": 0.23508920795837707, "grad_norm": 1.5418976361577295, "learning_rate": 1.8909003271272152e-06, "loss": 0.09, "step": 2632 }, { "epoch": 0.23526784717415092, "grad_norm": 9.28420428899457, "learning_rate": 1.8906168967636415e-06, "loss": 0.6814, "step": 2634 }, { "epoch": 0.23544648638992474, "grad_norm": 4.3069435480198806, "learning_rate": 1.8903331200138981e-06, "loss": 0.3261, "step": 2636 }, { "epoch": 0.2356251256056986, "grad_norm": 6.133764419056729, "learning_rate": 1.890048996988354e-06, "loss": 0.4001, "step": 2638 }, { "epoch": 0.23580376482147244, "grad_norm": 16.832511252293692, "learning_rate": 1.8897645277975125e-06, "loss": -0.1041, "step": 2640 }, { "epoch": 0.23598240403724627, "grad_norm": 4.7678263598448964, "learning_rate": 1.8894797125520122e-06, "loss": 0.4345, "step": 2642 }, { "epoch": 0.23616104325302012, "grad_norm": 11.326575190320867, "learning_rate": 1.889194551362625e-06, "loss": -0.1131, "step": 2644 }, { "epoch": 0.23633968246879397, "grad_norm": 11.02760111947269, "learning_rate": 1.8889090443402589e-06, "loss": -0.727, "step": 2646 }, { "epoch": 0.2365183216845678, "grad_norm": 2.5339562348799287, "learning_rate": 1.8886231915959554e-06, "loss": 0.2175, "step": 2648 }, { "epoch": 0.23669696090034165, "grad_norm": 20.024374033160733, "learning_rate": 1.8883369932408911e-06, "loss": 0.5644, "step": 2650 }, { "epoch": 0.2368756001161155, "grad_norm": 5.615705143309413, "learning_rate": 1.888050449386376e-06, "loss": 0.4166, "step": 2652 }, { "epoch": 0.23705423933188932, "grad_norm": 14.530322761075048, "learning_rate": 1.8877635601438557e-06, "loss": -0.7267, "step": 2654 }, { "epoch": 0.23723287854766317, "grad_norm": 3.2481069186516436, "learning_rate": 1.887476325624909e-06, "loss": -0.297, "step": 2656 }, { "epoch": 0.23741151776343702, "grad_norm": 16.947734598105214, "learning_rate": 1.8871887459412502e-06, "loss": -0.8132, "step": 2658 }, { "epoch": 0.23759015697921085, "grad_norm": 6.46436990805024, "learning_rate": 1.8869008212047266e-06, "loss": 0.0988, "step": 2660 }, { "epoch": 0.2377687961949847, "grad_norm": 7.837615878329966, "learning_rate": 1.886612551527321e-06, "loss": -0.608, "step": 2662 }, { "epoch": 0.23794743541075855, "grad_norm": 31.72316875778894, "learning_rate": 1.886323937021149e-06, "loss": -0.4813, "step": 2664 }, { "epoch": 0.2381260746265324, "grad_norm": 9.69543565153957, "learning_rate": 1.8860349777984613e-06, "loss": -0.5474, "step": 2666 }, { "epoch": 0.23830471384230623, "grad_norm": 8.260375925150422, "learning_rate": 1.885745673971642e-06, "loss": -0.3414, "step": 2668 }, { "epoch": 0.23848335305808008, "grad_norm": 9.420862753978367, "learning_rate": 1.8854560256532098e-06, "loss": -0.6548, "step": 2670 }, { "epoch": 0.23866199227385393, "grad_norm": 16.92253775104511, "learning_rate": 1.8851660329558172e-06, "loss": -0.0205, "step": 2672 }, { "epoch": 0.23884063148962775, "grad_norm": 17.857865237202294, "learning_rate": 1.8848756959922508e-06, "loss": -0.0766, "step": 2674 }, { "epoch": 0.2390192707054016, "grad_norm": 7.250550084785458, "learning_rate": 1.8845850148754303e-06, "loss": 0.17, "step": 2676 }, { "epoch": 0.23919790992117546, "grad_norm": 7.7308600559439595, "learning_rate": 1.8842939897184102e-06, "loss": -0.57, "step": 2678 }, { "epoch": 0.23937654913694928, "grad_norm": 27.352903601797355, "learning_rate": 1.8840026206343782e-06, "loss": -0.232, "step": 2680 }, { "epoch": 0.23955518835272313, "grad_norm": 4.252210154582023, "learning_rate": 1.8837109077366566e-06, "loss": 0.3998, "step": 2682 }, { "epoch": 0.23973382756849698, "grad_norm": 4.449822951663073, "learning_rate": 1.8834188511387e-06, "loss": -0.6044, "step": 2684 }, { "epoch": 0.2399124667842708, "grad_norm": 17.505481842304427, "learning_rate": 1.8831264509540984e-06, "loss": -0.1202, "step": 2686 }, { "epoch": 0.24009110600004466, "grad_norm": 12.272345889552351, "learning_rate": 1.8828337072965735e-06, "loss": -1.056, "step": 2688 }, { "epoch": 0.2402697452158185, "grad_norm": 19.97183074826892, "learning_rate": 1.882540620279982e-06, "loss": -0.8874, "step": 2690 }, { "epoch": 0.24044838443159233, "grad_norm": 6.365655370685712, "learning_rate": 1.8822471900183144e-06, "loss": -0.5957, "step": 2692 }, { "epoch": 0.24062702364736618, "grad_norm": 11.553094781202027, "learning_rate": 1.8819534166256932e-06, "loss": 0.1924, "step": 2694 }, { "epoch": 0.24080566286314004, "grad_norm": 16.969320101495853, "learning_rate": 1.8816593002163757e-06, "loss": -0.1366, "step": 2696 }, { "epoch": 0.2409843020789139, "grad_norm": 12.85163670666266, "learning_rate": 1.8813648409047517e-06, "loss": -0.3645, "step": 2698 }, { "epoch": 0.2411629412946877, "grad_norm": 7.195534105232934, "learning_rate": 1.8810700388053454e-06, "loss": 0.0387, "step": 2700 }, { "epoch": 0.24134158051046156, "grad_norm": 11.474988856756912, "learning_rate": 1.880774894032813e-06, "loss": -0.2026, "step": 2702 }, { "epoch": 0.2415202197262354, "grad_norm": 15.656821112961635, "learning_rate": 1.8804794067019452e-06, "loss": -0.0677, "step": 2704 }, { "epoch": 0.24169885894200924, "grad_norm": 12.241040748779543, "learning_rate": 1.880183576927665e-06, "loss": 0.387, "step": 2706 }, { "epoch": 0.2418774981577831, "grad_norm": 23.1813041861223, "learning_rate": 1.879887404825029e-06, "loss": 0.8722, "step": 2708 }, { "epoch": 0.24205613737355694, "grad_norm": 2.235691136423569, "learning_rate": 1.8795908905092275e-06, "loss": -0.3964, "step": 2710 }, { "epoch": 0.24223477658933076, "grad_norm": 11.155005634055088, "learning_rate": 1.879294034095583e-06, "loss": 1.0924, "step": 2712 }, { "epoch": 0.24241341580510462, "grad_norm": 17.548629768112416, "learning_rate": 1.8789968356995512e-06, "loss": -0.8108, "step": 2714 }, { "epoch": 0.24259205502087847, "grad_norm": 7.484223875937069, "learning_rate": 1.8786992954367209e-06, "loss": 0.2715, "step": 2716 }, { "epoch": 0.2427706942366523, "grad_norm": 7.992901036543644, "learning_rate": 1.8784014134228143e-06, "loss": -0.1725, "step": 2718 }, { "epoch": 0.24294933345242614, "grad_norm": 5.805379135801588, "learning_rate": 1.878103189773686e-06, "loss": -0.2132, "step": 2720 }, { "epoch": 0.2431279726682, "grad_norm": 13.855283182609892, "learning_rate": 1.8778046246053237e-06, "loss": 0.3194, "step": 2722 }, { "epoch": 0.24330661188397382, "grad_norm": 21.674270156487538, "learning_rate": 1.8775057180338477e-06, "loss": -0.7065, "step": 2724 }, { "epoch": 0.24348525109974767, "grad_norm": 9.862903767529067, "learning_rate": 1.8772064701755113e-06, "loss": -0.688, "step": 2726 }, { "epoch": 0.24366389031552152, "grad_norm": 6.501928190193251, "learning_rate": 1.8769068811467009e-06, "loss": -0.5409, "step": 2728 }, { "epoch": 0.24384252953129537, "grad_norm": 9.597312837262786, "learning_rate": 1.8766069510639343e-06, "loss": -0.2451, "step": 2730 }, { "epoch": 0.2440211687470692, "grad_norm": 11.171034305125424, "learning_rate": 1.8763066800438634e-06, "loss": -0.5241, "step": 2732 }, { "epoch": 0.24419980796284305, "grad_norm": 7.469586313581558, "learning_rate": 1.8760060682032723e-06, "loss": -0.6767, "step": 2734 }, { "epoch": 0.2443784471786169, "grad_norm": 5.920396121922799, "learning_rate": 1.875705115659077e-06, "loss": -0.8962, "step": 2736 }, { "epoch": 0.24455708639439072, "grad_norm": 9.428038445803393, "learning_rate": 1.8754038225283268e-06, "loss": -0.4366, "step": 2738 }, { "epoch": 0.24473572561016457, "grad_norm": 14.483649670855959, "learning_rate": 1.875102188928203e-06, "loss": 0.4278, "step": 2740 }, { "epoch": 0.24491436482593842, "grad_norm": 8.199577450913868, "learning_rate": 1.8748002149760195e-06, "loss": -0.4542, "step": 2742 }, { "epoch": 0.24509300404171225, "grad_norm": 12.108134387054058, "learning_rate": 1.8744979007892225e-06, "loss": 0.1795, "step": 2744 }, { "epoch": 0.2452716432574861, "grad_norm": 11.876245694325803, "learning_rate": 1.8741952464853904e-06, "loss": -0.8023, "step": 2746 }, { "epoch": 0.24545028247325995, "grad_norm": 5.197771864143767, "learning_rate": 1.8738922521822347e-06, "loss": 0.285, "step": 2748 }, { "epoch": 0.24562892168903377, "grad_norm": 3.9733604024652003, "learning_rate": 1.8735889179975976e-06, "loss": -0.3059, "step": 2750 }, { "epoch": 0.24580756090480763, "grad_norm": 9.218165285349755, "learning_rate": 1.8732852440494548e-06, "loss": -0.5126, "step": 2752 }, { "epoch": 0.24598620012058148, "grad_norm": 10.144653460589437, "learning_rate": 1.8729812304559136e-06, "loss": -0.1876, "step": 2754 }, { "epoch": 0.2461648393363553, "grad_norm": 17.8274214502941, "learning_rate": 1.8726768773352139e-06, "loss": -0.0822, "step": 2756 }, { "epoch": 0.24634347855212915, "grad_norm": 17.658047474876717, "learning_rate": 1.8723721848057267e-06, "loss": -0.0626, "step": 2758 }, { "epoch": 0.246522117767903, "grad_norm": 10.039945167300584, "learning_rate": 1.872067152985956e-06, "loss": -0.3712, "step": 2760 }, { "epoch": 0.24670075698367686, "grad_norm": 13.722398277179506, "learning_rate": 1.8717617819945367e-06, "loss": 0.3037, "step": 2762 }, { "epoch": 0.24687939619945068, "grad_norm": 14.05428850965766, "learning_rate": 1.8714560719502376e-06, "loss": 0.0633, "step": 2764 }, { "epoch": 0.24705803541522453, "grad_norm": 26.83103348568885, "learning_rate": 1.8711500229719563e-06, "loss": -1.091, "step": 2766 }, { "epoch": 0.24723667463099838, "grad_norm": 4.122191426681096, "learning_rate": 1.870843635178725e-06, "loss": 1.7941, "step": 2768 }, { "epoch": 0.2474153138467722, "grad_norm": 6.810998593691069, "learning_rate": 1.870536908689706e-06, "loss": 0.2309, "step": 2770 }, { "epoch": 0.24759395306254606, "grad_norm": 5.794567708043086, "learning_rate": 1.8702298436241945e-06, "loss": 1.0029, "step": 2772 }, { "epoch": 0.2477725922783199, "grad_norm": 10.134065690262265, "learning_rate": 1.8699224401016168e-06, "loss": 0.5782, "step": 2774 }, { "epoch": 0.24795123149409373, "grad_norm": 7.586604017368867, "learning_rate": 1.8696146982415302e-06, "loss": -0.4478, "step": 2776 }, { "epoch": 0.24812987070986758, "grad_norm": 3.747964942597182, "learning_rate": 1.8693066181636247e-06, "loss": 0.5894, "step": 2778 }, { "epoch": 0.24830850992564144, "grad_norm": 7.200087165304895, "learning_rate": 1.8689981999877213e-06, "loss": -0.6078, "step": 2780 }, { "epoch": 0.24848714914141526, "grad_norm": 7.9994206218728445, "learning_rate": 1.8686894438337728e-06, "loss": -0.1913, "step": 2782 }, { "epoch": 0.2486657883571891, "grad_norm": 9.022891707654836, "learning_rate": 1.8683803498218629e-06, "loss": 0.1495, "step": 2784 }, { "epoch": 0.24884442757296296, "grad_norm": 8.870674046104195, "learning_rate": 1.8680709180722071e-06, "loss": 0.1911, "step": 2786 }, { "epoch": 0.24902306678873679, "grad_norm": 2.1246028977898717, "learning_rate": 1.8677611487051524e-06, "loss": 0.8726, "step": 2788 }, { "epoch": 0.24920170600451064, "grad_norm": 19.317751074899917, "learning_rate": 1.8674510418411766e-06, "loss": -0.5464, "step": 2790 }, { "epoch": 0.2493803452202845, "grad_norm": 11.79536543528657, "learning_rate": 1.867140597600889e-06, "loss": -0.9924, "step": 2792 }, { "epoch": 0.2495589844360583, "grad_norm": 6.073782578290864, "learning_rate": 1.8668298161050306e-06, "loss": 1.3238, "step": 2794 }, { "epoch": 0.24973762365183216, "grad_norm": 7.848602617620312, "learning_rate": 1.866518697474473e-06, "loss": -0.1187, "step": 2796 }, { "epoch": 0.24991626286760601, "grad_norm": 9.491661427680343, "learning_rate": 1.8662072418302186e-06, "loss": 0.2255, "step": 2798 }, { "epoch": 0.25009490208337987, "grad_norm": 9.752736416757406, "learning_rate": 1.865895449293402e-06, "loss": -1.1065, "step": 2800 }, { "epoch": 0.2502735412991537, "grad_norm": 8.369302662156581, "learning_rate": 1.865583319985288e-06, "loss": -0.1786, "step": 2802 }, { "epoch": 0.2504521805149275, "grad_norm": 6.017846903482986, "learning_rate": 1.8652708540272718e-06, "loss": -0.3443, "step": 2804 }, { "epoch": 0.2506308197307014, "grad_norm": 6.977612253049481, "learning_rate": 1.864958051540881e-06, "loss": 0.6596, "step": 2806 }, { "epoch": 0.2508094589464752, "grad_norm": 11.403230952123215, "learning_rate": 1.8646449126477735e-06, "loss": 0.4822, "step": 2808 }, { "epoch": 0.25098809816224904, "grad_norm": 9.03244823703879, "learning_rate": 1.8643314374697377e-06, "loss": 0.3581, "step": 2810 }, { "epoch": 0.2511667373780229, "grad_norm": 7.543308086456965, "learning_rate": 1.8640176261286923e-06, "loss": -0.7934, "step": 2812 }, { "epoch": 0.25134537659379674, "grad_norm": 8.478703916577135, "learning_rate": 1.8637034787466885e-06, "loss": 0.8853, "step": 2814 }, { "epoch": 0.2515240158095706, "grad_norm": 7.572498867957962, "learning_rate": 1.8633889954459065e-06, "loss": 0.8703, "step": 2816 }, { "epoch": 0.25170265502534445, "grad_norm": 11.921395028428012, "learning_rate": 1.863074176348658e-06, "loss": 0.0539, "step": 2818 }, { "epoch": 0.25188129424111827, "grad_norm": 10.570043274241403, "learning_rate": 1.862759021577385e-06, "loss": 0.1313, "step": 2820 }, { "epoch": 0.25205993345689215, "grad_norm": 8.21093935834898, "learning_rate": 1.86244353125466e-06, "loss": 0.0404, "step": 2822 }, { "epoch": 0.252238572672666, "grad_norm": 7.437263549123501, "learning_rate": 1.8621277055031866e-06, "loss": -0.9347, "step": 2824 }, { "epoch": 0.2524172118884398, "grad_norm": 6.884066897167527, "learning_rate": 1.8618115444457975e-06, "loss": -1.3943, "step": 2826 }, { "epoch": 0.2525958511042137, "grad_norm": 9.495600434533074, "learning_rate": 1.8614950482054575e-06, "loss": 0.0589, "step": 2828 }, { "epoch": 0.2527744903199875, "grad_norm": 8.7852461332385, "learning_rate": 1.861178216905261e-06, "loss": -0.1331, "step": 2830 }, { "epoch": 0.2529531295357613, "grad_norm": 5.2646441805480375, "learning_rate": 1.8608610506684318e-06, "loss": 0.7826, "step": 2832 }, { "epoch": 0.2531317687515352, "grad_norm": 8.167417167381323, "learning_rate": 1.8605435496183261e-06, "loss": 0.6664, "step": 2834 }, { "epoch": 0.253310407967309, "grad_norm": 10.019340123664426, "learning_rate": 1.860225713878428e-06, "loss": -0.619, "step": 2836 }, { "epoch": 0.25348904718308285, "grad_norm": 7.069721681979039, "learning_rate": 1.8599075435723534e-06, "loss": 1.2887, "step": 2838 }, { "epoch": 0.25366768639885673, "grad_norm": 3.7265411312362313, "learning_rate": 1.8595890388238477e-06, "loss": 0.3493, "step": 2840 }, { "epoch": 0.25384632561463055, "grad_norm": 20.098212379939184, "learning_rate": 1.8592701997567862e-06, "loss": 0.0858, "step": 2842 }, { "epoch": 0.2540249648304044, "grad_norm": 6.311902914985553, "learning_rate": 1.8589510264951745e-06, "loss": 1.0483, "step": 2844 }, { "epoch": 0.25420360404617826, "grad_norm": 7.757653425877273, "learning_rate": 1.8586315191631483e-06, "loss": 0.2644, "step": 2846 }, { "epoch": 0.2543822432619521, "grad_norm": 25.470508949996955, "learning_rate": 1.858311677884973e-06, "loss": -0.4861, "step": 2848 }, { "epoch": 0.2545608824777259, "grad_norm": 10.027804819941199, "learning_rate": 1.8579915027850436e-06, "loss": -0.6178, "step": 2850 }, { "epoch": 0.2547395216934998, "grad_norm": 6.214896230038478, "learning_rate": 1.8576709939878858e-06, "loss": -0.6494, "step": 2852 }, { "epoch": 0.2549181609092736, "grad_norm": 20.450919595575343, "learning_rate": 1.8573501516181546e-06, "loss": -1.1018, "step": 2854 }, { "epoch": 0.25509680012504743, "grad_norm": 9.02510595708555, "learning_rate": 1.8570289758006343e-06, "loss": -0.3778, "step": 2856 }, { "epoch": 0.2552754393408213, "grad_norm": 11.689924182499137, "learning_rate": 1.8567074666602394e-06, "loss": -0.8899, "step": 2858 }, { "epoch": 0.25545407855659513, "grad_norm": 5.504765093622244, "learning_rate": 1.856385624322014e-06, "loss": 0.7756, "step": 2860 }, { "epoch": 0.25563271777236896, "grad_norm": 4.48411341312936, "learning_rate": 1.8560634489111316e-06, "loss": 0.73, "step": 2862 }, { "epoch": 0.25581135698814284, "grad_norm": 4.009056091707867, "learning_rate": 1.8557409405528954e-06, "loss": 0.0695, "step": 2864 }, { "epoch": 0.25598999620391666, "grad_norm": 4.909366468280102, "learning_rate": 1.8554180993727383e-06, "loss": 0.2944, "step": 2866 }, { "epoch": 0.2561686354196905, "grad_norm": 7.937003292852533, "learning_rate": 1.8550949254962221e-06, "loss": -0.1763, "step": 2868 }, { "epoch": 0.25634727463546436, "grad_norm": 12.998313794451319, "learning_rate": 1.8547714190490381e-06, "loss": -0.9211, "step": 2870 }, { "epoch": 0.2565259138512382, "grad_norm": 16.50578807688571, "learning_rate": 1.8544475801570077e-06, "loss": -0.4076, "step": 2872 }, { "epoch": 0.256704553067012, "grad_norm": 11.457331573160644, "learning_rate": 1.8541234089460805e-06, "loss": -1.4501, "step": 2874 }, { "epoch": 0.2568831922827859, "grad_norm": 5.158101991920828, "learning_rate": 1.8537989055423362e-06, "loss": -0.3303, "step": 2876 }, { "epoch": 0.2570618314985597, "grad_norm": 13.566459675427485, "learning_rate": 1.8534740700719833e-06, "loss": -1.1951, "step": 2878 }, { "epoch": 0.2572404707143336, "grad_norm": 7.464039102940486, "learning_rate": 1.8531489026613593e-06, "loss": -0.5147, "step": 2880 }, { "epoch": 0.2574191099301074, "grad_norm": 6.2198233900111415, "learning_rate": 1.8528234034369315e-06, "loss": -0.0845, "step": 2882 }, { "epoch": 0.25759774914588124, "grad_norm": 7.08186803883224, "learning_rate": 1.8524975725252955e-06, "loss": -0.0287, "step": 2884 }, { "epoch": 0.2577763883616551, "grad_norm": 6.877440140913311, "learning_rate": 1.852171410053176e-06, "loss": -0.1852, "step": 2886 }, { "epoch": 0.25795502757742894, "grad_norm": 5.230574682984866, "learning_rate": 1.851844916147427e-06, "loss": 0.7117, "step": 2888 }, { "epoch": 0.25813366679320277, "grad_norm": 10.666711052166463, "learning_rate": 1.8515180909350314e-06, "loss": -0.1885, "step": 2890 }, { "epoch": 0.25831230600897664, "grad_norm": 7.178537104347661, "learning_rate": 1.8511909345431005e-06, "loss": -0.2104, "step": 2892 }, { "epoch": 0.25849094522475047, "grad_norm": 13.581962157395443, "learning_rate": 1.8508634470988748e-06, "loss": -0.4284, "step": 2894 }, { "epoch": 0.2586695844405243, "grad_norm": 6.3305062793917175, "learning_rate": 1.8505356287297237e-06, "loss": 0.1955, "step": 2896 }, { "epoch": 0.25884822365629817, "grad_norm": 4.677115776555535, "learning_rate": 1.8502074795631448e-06, "loss": 0.62, "step": 2898 }, { "epoch": 0.259026862872072, "grad_norm": 7.540783431739987, "learning_rate": 1.8498789997267647e-06, "loss": 0.7945, "step": 2900 }, { "epoch": 0.2592055020878458, "grad_norm": 6.30058115140651, "learning_rate": 1.8495501893483386e-06, "loss": -0.8048, "step": 2902 }, { "epoch": 0.2593841413036197, "grad_norm": 5.896621202143012, "learning_rate": 1.84922104855575e-06, "loss": -0.693, "step": 2904 }, { "epoch": 0.2595627805193935, "grad_norm": 29.379529238591452, "learning_rate": 1.8488915774770113e-06, "loss": 0.1847, "step": 2906 }, { "epoch": 0.25974141973516734, "grad_norm": 13.72406778696075, "learning_rate": 1.848561776240263e-06, "loss": -0.2075, "step": 2908 }, { "epoch": 0.2599200589509412, "grad_norm": 7.061346179255146, "learning_rate": 1.848231644973774e-06, "loss": -0.3, "step": 2910 }, { "epoch": 0.26009869816671505, "grad_norm": 7.268574827515595, "learning_rate": 1.8479011838059422e-06, "loss": -0.1533, "step": 2912 }, { "epoch": 0.26027733738248887, "grad_norm": 21.13953003793153, "learning_rate": 1.8475703928652927e-06, "loss": -1.1064, "step": 2914 }, { "epoch": 0.26045597659826275, "grad_norm": 7.642353509828595, "learning_rate": 1.84723927228048e-06, "loss": 0.2786, "step": 2916 }, { "epoch": 0.2606346158140366, "grad_norm": 6.182713053696225, "learning_rate": 1.846907822180286e-06, "loss": -0.0285, "step": 2918 }, { "epoch": 0.2608132550298104, "grad_norm": 5.265706138098906, "learning_rate": 1.846576042693621e-06, "loss": -0.4149, "step": 2920 }, { "epoch": 0.2609918942455843, "grad_norm": 2.2255499428034855, "learning_rate": 1.8462439339495237e-06, "loss": 0.1782, "step": 2922 }, { "epoch": 0.2611705334613581, "grad_norm": 5.645480868703002, "learning_rate": 1.8459114960771604e-06, "loss": 0.1469, "step": 2924 }, { "epoch": 0.2613491726771319, "grad_norm": 9.013791854902998, "learning_rate": 1.8455787292058258e-06, "loss": 0.4562, "step": 2926 }, { "epoch": 0.2615278118929058, "grad_norm": 11.044115555503769, "learning_rate": 1.8452456334649425e-06, "loss": 0.8548, "step": 2928 }, { "epoch": 0.2617064511086796, "grad_norm": 2.5368610874348083, "learning_rate": 1.8449122089840604e-06, "loss": -0.0184, "step": 2930 }, { "epoch": 0.26188509032445345, "grad_norm": 9.302205995190233, "learning_rate": 1.8445784558928578e-06, "loss": 0.3179, "step": 2932 }, { "epoch": 0.26206372954022733, "grad_norm": 12.455007528075367, "learning_rate": 1.8442443743211414e-06, "loss": -0.8974, "step": 2934 }, { "epoch": 0.26224236875600115, "grad_norm": 7.555211109213637, "learning_rate": 1.843909964398844e-06, "loss": 0.4902, "step": 2936 }, { "epoch": 0.262421007971775, "grad_norm": 16.06976462817916, "learning_rate": 1.843575226256028e-06, "loss": -0.9418, "step": 2938 }, { "epoch": 0.26259964718754886, "grad_norm": 4.737795560368829, "learning_rate": 1.8432401600228821e-06, "loss": 0.7203, "step": 2940 }, { "epoch": 0.2627782864033227, "grad_norm": 8.65718729060819, "learning_rate": 1.8429047658297232e-06, "loss": 0.6181, "step": 2942 }, { "epoch": 0.26295692561909656, "grad_norm": 6.819301185044091, "learning_rate": 1.8425690438069954e-06, "loss": 0.3627, "step": 2944 }, { "epoch": 0.2631355648348704, "grad_norm": 7.4102311520166735, "learning_rate": 1.842232994085271e-06, "loss": -0.4658, "step": 2946 }, { "epoch": 0.2633142040506442, "grad_norm": 9.93090294242941, "learning_rate": 1.8418966167952488e-06, "loss": -0.4197, "step": 2948 }, { "epoch": 0.2634928432664181, "grad_norm": 13.52107345924259, "learning_rate": 1.8415599120677556e-06, "loss": -1.1606, "step": 2950 }, { "epoch": 0.2636714824821919, "grad_norm": 13.757247956546056, "learning_rate": 1.8412228800337454e-06, "loss": -0.7398, "step": 2952 }, { "epoch": 0.26385012169796573, "grad_norm": 9.835418178431912, "learning_rate": 1.8408855208242994e-06, "loss": 0.0346, "step": 2954 }, { "epoch": 0.2640287609137396, "grad_norm": 5.163920852172783, "learning_rate": 1.8405478345706267e-06, "loss": -1.2497, "step": 2956 }, { "epoch": 0.26420740012951344, "grad_norm": 11.093636869471581, "learning_rate": 1.8402098214040622e-06, "loss": -0.2204, "step": 2958 }, { "epoch": 0.26438603934528726, "grad_norm": 19.235253172225452, "learning_rate": 1.8398714814560696e-06, "loss": -1.0922, "step": 2960 }, { "epoch": 0.26456467856106114, "grad_norm": 6.082001143952462, "learning_rate": 1.8395328148582387e-06, "loss": 0.3074, "step": 2962 }, { "epoch": 0.26474331777683496, "grad_norm": 6.7968926791257145, "learning_rate": 1.8391938217422861e-06, "loss": -0.3066, "step": 2964 }, { "epoch": 0.2649219569926088, "grad_norm": 12.047766666093139, "learning_rate": 1.8388545022400566e-06, "loss": -0.7328, "step": 2966 }, { "epoch": 0.26510059620838267, "grad_norm": 4.182719220278071, "learning_rate": 1.8385148564835204e-06, "loss": -1.3801, "step": 2968 }, { "epoch": 0.2652792354241565, "grad_norm": 6.433953947396328, "learning_rate": 1.8381748846047758e-06, "loss": -0.2634, "step": 2970 }, { "epoch": 0.2654578746399303, "grad_norm": 6.096270387234915, "learning_rate": 1.8378345867360474e-06, "loss": 0.3723, "step": 2972 }, { "epoch": 0.2656365138557042, "grad_norm": 6.23510860247582, "learning_rate": 1.8374939630096869e-06, "loss": 0.5116, "step": 2974 }, { "epoch": 0.265815153071478, "grad_norm": 2.4514670631208086, "learning_rate": 1.8371530135581721e-06, "loss": 0.2313, "step": 2976 }, { "epoch": 0.26599379228725184, "grad_norm": 12.087257397020878, "learning_rate": 1.836811738514108e-06, "loss": -0.1172, "step": 2978 }, { "epoch": 0.2661724315030257, "grad_norm": 7.047748114043514, "learning_rate": 1.8364701380102264e-06, "loss": 0.4405, "step": 2980 }, { "epoch": 0.26635107071879954, "grad_norm": 5.82636960672677, "learning_rate": 1.8361282121793856e-06, "loss": 1.3353, "step": 2982 }, { "epoch": 0.26652970993457337, "grad_norm": 4.569531809323675, "learning_rate": 1.83578596115457e-06, "loss": 0.552, "step": 2984 }, { "epoch": 0.26670834915034725, "grad_norm": 12.257395477689556, "learning_rate": 1.8354433850688904e-06, "loss": -0.4448, "step": 2986 }, { "epoch": 0.26688698836612107, "grad_norm": 10.484464869561737, "learning_rate": 1.8351004840555845e-06, "loss": -0.7842, "step": 2988 }, { "epoch": 0.2670656275818949, "grad_norm": 10.036144167871482, "learning_rate": 1.834757258248017e-06, "loss": -0.0816, "step": 2990 }, { "epoch": 0.2672442667976688, "grad_norm": 8.223545546476926, "learning_rate": 1.8344137077796771e-06, "loss": 0.0518, "step": 2992 }, { "epoch": 0.2674229060134426, "grad_norm": 8.916177991744487, "learning_rate": 1.834069832784182e-06, "loss": -0.0695, "step": 2994 }, { "epoch": 0.2676015452292164, "grad_norm": 38.79211263813416, "learning_rate": 1.8337256333952737e-06, "loss": 0.5162, "step": 2996 }, { "epoch": 0.2677801844449903, "grad_norm": 2.553572945303757, "learning_rate": 1.833381109746822e-06, "loss": 0.7526, "step": 2998 }, { "epoch": 0.2679588236607641, "grad_norm": 1.951079495279322, "learning_rate": 1.8330362619728214e-06, "loss": -0.6759, "step": 3000 }, { "epoch": 0.26813746287653795, "grad_norm": 7.458615629795995, "learning_rate": 1.8326910902073932e-06, "loss": -0.2881, "step": 3002 }, { "epoch": 0.2683161020923118, "grad_norm": 9.556556999110406, "learning_rate": 1.8323455945847846e-06, "loss": -0.9081, "step": 3004 }, { "epoch": 0.26849474130808565, "grad_norm": 21.3549853755657, "learning_rate": 1.8319997752393678e-06, "loss": -0.5948, "step": 3006 }, { "epoch": 0.2686733805238595, "grad_norm": 9.090158913611303, "learning_rate": 1.8316536323056429e-06, "loss": -0.3864, "step": 3008 }, { "epoch": 0.26885201973963335, "grad_norm": 5.095033823187421, "learning_rate": 1.8313071659182339e-06, "loss": 0.416, "step": 3010 }, { "epoch": 0.2690306589554072, "grad_norm": 9.891718751234066, "learning_rate": 1.8309603762118918e-06, "loss": -0.5177, "step": 3012 }, { "epoch": 0.26920929817118106, "grad_norm": 4.062342714052572, "learning_rate": 1.830613263321493e-06, "loss": -0.0509, "step": 3014 }, { "epoch": 0.2693879373869549, "grad_norm": 54.36804238891745, "learning_rate": 1.830265827382039e-06, "loss": -0.7312, "step": 3016 }, { "epoch": 0.2695665766027287, "grad_norm": 5.991384678740996, "learning_rate": 1.8299180685286584e-06, "loss": 0.5975, "step": 3018 }, { "epoch": 0.2697452158185026, "grad_norm": 5.825956130245316, "learning_rate": 1.8295699868966036e-06, "loss": -1.2551, "step": 3020 }, { "epoch": 0.2699238550342764, "grad_norm": 6.961689609778677, "learning_rate": 1.8292215826212537e-06, "loss": 0.322, "step": 3022 }, { "epoch": 0.27010249425005023, "grad_norm": 10.381673114566594, "learning_rate": 1.8288728558381131e-06, "loss": -1.5843, "step": 3024 }, { "epoch": 0.2702811334658241, "grad_norm": 3.2952234551891784, "learning_rate": 1.828523806682812e-06, "loss": 0.1154, "step": 3026 }, { "epoch": 0.27045977268159793, "grad_norm": 6.452258922473332, "learning_rate": 1.8281744352911045e-06, "loss": -0.2153, "step": 3028 }, { "epoch": 0.27063841189737176, "grad_norm": 6.18869654327833, "learning_rate": 1.8278247417988717e-06, "loss": -1.1816, "step": 3030 }, { "epoch": 0.27081705111314563, "grad_norm": 28.081356111042005, "learning_rate": 1.8274747263421192e-06, "loss": -1.1857, "step": 3032 }, { "epoch": 0.27099569032891946, "grad_norm": 9.533813932171203, "learning_rate": 1.8271243890569781e-06, "loss": 0.81, "step": 3034 }, { "epoch": 0.2711743295446933, "grad_norm": 7.143798123094966, "learning_rate": 1.8267737300797043e-06, "loss": 0.1189, "step": 3036 }, { "epoch": 0.27135296876046716, "grad_norm": 12.69078705399333, "learning_rate": 1.826422749546679e-06, "loss": -0.567, "step": 3038 }, { "epoch": 0.271531607976241, "grad_norm": 5.165263303265907, "learning_rate": 1.8260714475944084e-06, "loss": -0.1434, "step": 3040 }, { "epoch": 0.2717102471920148, "grad_norm": 8.367389010470294, "learning_rate": 1.825719824359524e-06, "loss": -0.6504, "step": 3042 }, { "epoch": 0.2718888864077887, "grad_norm": 5.1039248454620685, "learning_rate": 1.8253678799787821e-06, "loss": -0.1534, "step": 3044 }, { "epoch": 0.2720675256235625, "grad_norm": 8.464155576693097, "learning_rate": 1.8250156145890635e-06, "loss": -0.0655, "step": 3046 }, { "epoch": 0.27224616483933634, "grad_norm": 7.397844010811581, "learning_rate": 1.8246630283273746e-06, "loss": -0.2341, "step": 3048 }, { "epoch": 0.2724248040551102, "grad_norm": 6.100533646551347, "learning_rate": 1.8243101213308462e-06, "loss": 0.6468, "step": 3050 }, { "epoch": 0.27260344327088404, "grad_norm": 9.253044246075138, "learning_rate": 1.8239568937367336e-06, "loss": -1.1184, "step": 3052 }, { "epoch": 0.27278208248665786, "grad_norm": 9.200507473425294, "learning_rate": 1.823603345682417e-06, "loss": 0.0263, "step": 3054 }, { "epoch": 0.27296072170243174, "grad_norm": 8.6974333912491, "learning_rate": 1.8232494773054013e-06, "loss": 0.8259, "step": 3056 }, { "epoch": 0.27313936091820556, "grad_norm": 6.669778892135084, "learning_rate": 1.8228952887433163e-06, "loss": 0.0249, "step": 3058 }, { "epoch": 0.2733180001339794, "grad_norm": 3.744301981028544, "learning_rate": 1.8225407801339153e-06, "loss": 0.1353, "step": 3060 }, { "epoch": 0.27349663934975327, "grad_norm": 6.403322025244738, "learning_rate": 1.822185951615077e-06, "loss": -0.2469, "step": 3062 }, { "epoch": 0.2736752785655271, "grad_norm": 15.082995893943576, "learning_rate": 1.8218308033248048e-06, "loss": -0.7397, "step": 3064 }, { "epoch": 0.2738539177813009, "grad_norm": 3.7279449393827337, "learning_rate": 1.8214753354012256e-06, "loss": 0.5909, "step": 3066 }, { "epoch": 0.2740325569970748, "grad_norm": 12.034425311502208, "learning_rate": 1.8211195479825907e-06, "loss": -0.4164, "step": 3068 }, { "epoch": 0.2742111962128486, "grad_norm": 14.539281319719532, "learning_rate": 1.8207634412072764e-06, "loss": 0.4659, "step": 3070 }, { "epoch": 0.27438983542862244, "grad_norm": 16.962959776953173, "learning_rate": 1.8204070152137821e-06, "loss": -1.1304, "step": 3072 }, { "epoch": 0.2745684746443963, "grad_norm": 4.552095647427149, "learning_rate": 1.8200502701407326e-06, "loss": 0.9494, "step": 3074 }, { "epoch": 0.27474711386017014, "grad_norm": 3.4593400217715615, "learning_rate": 1.8196932061268756e-06, "loss": 0.0542, "step": 3076 }, { "epoch": 0.274925753075944, "grad_norm": 6.413772260485634, "learning_rate": 1.819335823311084e-06, "loss": -0.6877, "step": 3078 }, { "epoch": 0.27510439229171785, "grad_norm": 9.134955005587141, "learning_rate": 1.8189781218323539e-06, "loss": -0.753, "step": 3080 }, { "epoch": 0.27528303150749167, "grad_norm": 16.834161920581593, "learning_rate": 1.8186201018298054e-06, "loss": -0.2181, "step": 3082 }, { "epoch": 0.27546167072326555, "grad_norm": 6.031378077594919, "learning_rate": 1.8182617634426824e-06, "loss": -0.1082, "step": 3084 }, { "epoch": 0.2756403099390394, "grad_norm": 3.3434883086575007, "learning_rate": 1.8179031068103537e-06, "loss": -0.6215, "step": 3086 }, { "epoch": 0.2758189491548132, "grad_norm": 10.847146329615626, "learning_rate": 1.8175441320723101e-06, "loss": 0.1408, "step": 3088 }, { "epoch": 0.2759975883705871, "grad_norm": 8.618179499366486, "learning_rate": 1.8171848393681678e-06, "loss": -0.2375, "step": 3090 }, { "epoch": 0.2761762275863609, "grad_norm": 14.452644100952222, "learning_rate": 1.8168252288376658e-06, "loss": -0.2264, "step": 3092 }, { "epoch": 0.2763548668021347, "grad_norm": 7.845438133438772, "learning_rate": 1.8164653006206662e-06, "loss": -0.2582, "step": 3094 }, { "epoch": 0.2765335060179086, "grad_norm": 8.27456752419136, "learning_rate": 1.816105054857156e-06, "loss": 0.3378, "step": 3096 }, { "epoch": 0.2767121452336824, "grad_norm": 8.233252895852909, "learning_rate": 1.8157444916872442e-06, "loss": -0.9994, "step": 3098 }, { "epoch": 0.27689078444945625, "grad_norm": 6.213577342883574, "learning_rate": 1.8153836112511651e-06, "loss": -0.8304, "step": 3100 }, { "epoch": 0.27706942366523013, "grad_norm": 5.141937366616026, "learning_rate": 1.8150224136892746e-06, "loss": 0.4457, "step": 3102 }, { "epoch": 0.27724806288100395, "grad_norm": 5.204661099616884, "learning_rate": 1.814660899142053e-06, "loss": 0.1801, "step": 3104 }, { "epoch": 0.2774267020967778, "grad_norm": 8.90200436770817, "learning_rate": 1.8142990677501034e-06, "loss": -0.3516, "step": 3106 }, { "epoch": 0.27760534131255166, "grad_norm": 5.112648571439178, "learning_rate": 1.8139369196541522e-06, "loss": 0.0487, "step": 3108 }, { "epoch": 0.2777839805283255, "grad_norm": 17.081845896404626, "learning_rate": 1.8135744549950494e-06, "loss": 0.3966, "step": 3110 }, { "epoch": 0.2779626197440993, "grad_norm": 7.905246595557426, "learning_rate": 1.8132116739137672e-06, "loss": 0.1677, "step": 3112 }, { "epoch": 0.2781412589598732, "grad_norm": 8.72749966991012, "learning_rate": 1.8128485765514024e-06, "loss": 0.065, "step": 3114 }, { "epoch": 0.278319898175647, "grad_norm": 22.828592889626652, "learning_rate": 1.8124851630491729e-06, "loss": 0.0257, "step": 3116 }, { "epoch": 0.27849853739142083, "grad_norm": 6.677226794561918, "learning_rate": 1.812121433548421e-06, "loss": 0.3204, "step": 3118 }, { "epoch": 0.2786771766071947, "grad_norm": 24.25073366923347, "learning_rate": 1.8117573881906112e-06, "loss": 0.4891, "step": 3120 }, { "epoch": 0.27885581582296853, "grad_norm": 12.029702619648896, "learning_rate": 1.8113930271173312e-06, "loss": -0.2392, "step": 3122 }, { "epoch": 0.27903445503874236, "grad_norm": 8.25051138477801, "learning_rate": 1.8110283504702916e-06, "loss": -0.866, "step": 3124 }, { "epoch": 0.27921309425451624, "grad_norm": 5.458271210683379, "learning_rate": 1.810663358391325e-06, "loss": 0.7606, "step": 3126 }, { "epoch": 0.27939173347029006, "grad_norm": 6.634498840124937, "learning_rate": 1.8102980510223876e-06, "loss": 0.7834, "step": 3128 }, { "epoch": 0.2795703726860639, "grad_norm": 3.071427392800951, "learning_rate": 1.8099324285055575e-06, "loss": 0.1401, "step": 3130 }, { "epoch": 0.27974901190183776, "grad_norm": 3.2068282886731976, "learning_rate": 1.8095664909830357e-06, "loss": -0.2986, "step": 3132 }, { "epoch": 0.2799276511176116, "grad_norm": 19.418941948429744, "learning_rate": 1.8092002385971457e-06, "loss": -1.1773, "step": 3134 }, { "epoch": 0.2801062903333854, "grad_norm": 21.54317025914369, "learning_rate": 1.8088336714903338e-06, "loss": -1.0554, "step": 3136 }, { "epoch": 0.2802849295491593, "grad_norm": 8.616575687993118, "learning_rate": 1.8084667898051678e-06, "loss": -0.1039, "step": 3138 }, { "epoch": 0.2804635687649331, "grad_norm": 5.402649137811823, "learning_rate": 1.8080995936843387e-06, "loss": -0.3496, "step": 3140 }, { "epoch": 0.28064220798070694, "grad_norm": 15.12254830659027, "learning_rate": 1.8077320832706593e-06, "loss": -0.745, "step": 3142 }, { "epoch": 0.2808208471964808, "grad_norm": 12.956579026250585, "learning_rate": 1.807364258707065e-06, "loss": 0.301, "step": 3144 }, { "epoch": 0.28099948641225464, "grad_norm": 11.429468555661009, "learning_rate": 1.8069961201366132e-06, "loss": -0.2556, "step": 3146 }, { "epoch": 0.2811781256280285, "grad_norm": 7.211645287920716, "learning_rate": 1.8066276677024833e-06, "loss": -0.7249, "step": 3148 }, { "epoch": 0.28135676484380234, "grad_norm": 4.4549509688396824, "learning_rate": 1.8062589015479768e-06, "loss": 0.4607, "step": 3150 }, { "epoch": 0.28153540405957617, "grad_norm": 6.575093710343959, "learning_rate": 1.8058898218165177e-06, "loss": 0.2202, "step": 3152 }, { "epoch": 0.28171404327535005, "grad_norm": 14.184682961310436, "learning_rate": 1.8055204286516514e-06, "loss": -0.7459, "step": 3154 }, { "epoch": 0.28189268249112387, "grad_norm": 3.5651386594624688, "learning_rate": 1.8051507221970456e-06, "loss": 0.0352, "step": 3156 }, { "epoch": 0.2820713217068977, "grad_norm": 13.100074569839347, "learning_rate": 1.8047807025964888e-06, "loss": -1.9003, "step": 3158 }, { "epoch": 0.28224996092267157, "grad_norm": 6.515269025048006, "learning_rate": 1.8044103699938929e-06, "loss": -1.4813, "step": 3160 }, { "epoch": 0.2824286001384454, "grad_norm": 11.353358211374344, "learning_rate": 1.8040397245332906e-06, "loss": -0.6491, "step": 3162 }, { "epoch": 0.2826072393542192, "grad_norm": 7.405676195796856, "learning_rate": 1.8036687663588364e-06, "loss": 0.5113, "step": 3164 }, { "epoch": 0.2827858785699931, "grad_norm": 2.926982800984786, "learning_rate": 1.8032974956148062e-06, "loss": 0.5766, "step": 3166 }, { "epoch": 0.2829645177857669, "grad_norm": 12.511285798784519, "learning_rate": 1.8029259124455983e-06, "loss": -0.5236, "step": 3168 }, { "epoch": 0.28314315700154075, "grad_norm": 11.184940322355253, "learning_rate": 1.8025540169957312e-06, "loss": -0.2954, "step": 3170 }, { "epoch": 0.2833217962173146, "grad_norm": 6.042001894716019, "learning_rate": 1.802181809409846e-06, "loss": 0.0084, "step": 3172 }, { "epoch": 0.28350043543308845, "grad_norm": 5.819259814618958, "learning_rate": 1.801809289832705e-06, "loss": -0.9441, "step": 3174 }, { "epoch": 0.2836790746488623, "grad_norm": 4.08084798938897, "learning_rate": 1.8014364584091912e-06, "loss": -0.9821, "step": 3176 }, { "epoch": 0.28385771386463615, "grad_norm": 6.246642469741277, "learning_rate": 1.8010633152843094e-06, "loss": 0.1971, "step": 3178 }, { "epoch": 0.28403635308041, "grad_norm": 3.185344191417601, "learning_rate": 1.8006898606031857e-06, "loss": -1.063, "step": 3180 }, { "epoch": 0.2842149922961838, "grad_norm": 9.37160664498881, "learning_rate": 1.8003160945110674e-06, "loss": -0.3859, "step": 3182 }, { "epoch": 0.2843936315119577, "grad_norm": 9.467859207402242, "learning_rate": 1.7999420171533222e-06, "loss": -0.0023, "step": 3184 }, { "epoch": 0.2845722707277315, "grad_norm": 9.890645827140395, "learning_rate": 1.7995676286754397e-06, "loss": 0.2973, "step": 3186 }, { "epoch": 0.2847509099435053, "grad_norm": 4.333517633089076, "learning_rate": 1.7991929292230303e-06, "loss": -0.2183, "step": 3188 }, { "epoch": 0.2849295491592792, "grad_norm": 27.740701303122503, "learning_rate": 1.798817918941825e-06, "loss": -1.6334, "step": 3190 }, { "epoch": 0.28510818837505303, "grad_norm": 15.789040755644354, "learning_rate": 1.7984425979776762e-06, "loss": 1.0569, "step": 3192 }, { "epoch": 0.28528682759082685, "grad_norm": 16.639167886615734, "learning_rate": 1.7980669664765566e-06, "loss": -0.8139, "step": 3194 }, { "epoch": 0.28546546680660073, "grad_norm": 5.804294167254904, "learning_rate": 1.7976910245845603e-06, "loss": 0.2002, "step": 3196 }, { "epoch": 0.28564410602237456, "grad_norm": 2.604647894582805, "learning_rate": 1.7973147724479012e-06, "loss": -0.8085, "step": 3198 }, { "epoch": 0.2858227452381484, "grad_norm": 10.855670807650812, "learning_rate": 1.7969382102129149e-06, "loss": -0.8262, "step": 3200 }, { "epoch": 0.28600138445392226, "grad_norm": 6.685088391286715, "learning_rate": 1.7965613380260569e-06, "loss": 0.1567, "step": 3202 }, { "epoch": 0.2861800236696961, "grad_norm": 3.73610797079932, "learning_rate": 1.7961841560339037e-06, "loss": 0.4189, "step": 3204 }, { "epoch": 0.2863586628854699, "grad_norm": 19.309790560091773, "learning_rate": 1.795806664383152e-06, "loss": -1.1765, "step": 3206 }, { "epoch": 0.2865373021012438, "grad_norm": 5.814713579571325, "learning_rate": 1.7954288632206186e-06, "loss": 0.406, "step": 3208 }, { "epoch": 0.2867159413170176, "grad_norm": 7.938950774005961, "learning_rate": 1.7950507526932419e-06, "loss": 0.7646, "step": 3210 }, { "epoch": 0.2868945805327915, "grad_norm": 7.068202054670235, "learning_rate": 1.794672332948079e-06, "loss": 1.6283, "step": 3212 }, { "epoch": 0.2870732197485653, "grad_norm": 6.61847149170219, "learning_rate": 1.7942936041323087e-06, "loss": 0.2099, "step": 3214 }, { "epoch": 0.28725185896433914, "grad_norm": 10.29658899213943, "learning_rate": 1.793914566393229e-06, "loss": 0.3897, "step": 3216 }, { "epoch": 0.287430498180113, "grad_norm": 5.142526755998186, "learning_rate": 1.7935352198782584e-06, "loss": 0.9492, "step": 3218 }, { "epoch": 0.28760913739588684, "grad_norm": 6.895689748631025, "learning_rate": 1.7931555647349358e-06, "loss": -1.4771, "step": 3220 }, { "epoch": 0.28778777661166066, "grad_norm": 7.756640634835952, "learning_rate": 1.7927756011109195e-06, "loss": -0.573, "step": 3222 }, { "epoch": 0.28796641582743454, "grad_norm": 22.243087552038638, "learning_rate": 1.7923953291539883e-06, "loss": -0.8846, "step": 3224 }, { "epoch": 0.28814505504320836, "grad_norm": 6.998494667591359, "learning_rate": 1.792014749012041e-06, "loss": -0.5158, "step": 3226 }, { "epoch": 0.2883236942589822, "grad_norm": 8.410421591407582, "learning_rate": 1.7916338608330956e-06, "loss": 1.2704, "step": 3228 }, { "epoch": 0.28850233347475607, "grad_norm": 8.547806879042499, "learning_rate": 1.7912526647652908e-06, "loss": -0.659, "step": 3230 }, { "epoch": 0.2886809726905299, "grad_norm": 13.766516735624396, "learning_rate": 1.7908711609568839e-06, "loss": -1.0008, "step": 3232 }, { "epoch": 0.2888596119063037, "grad_norm": 6.495497978449158, "learning_rate": 1.790489349556253e-06, "loss": -0.305, "step": 3234 }, { "epoch": 0.2890382511220776, "grad_norm": 12.826166967649815, "learning_rate": 1.7901072307118952e-06, "loss": -0.2239, "step": 3236 }, { "epoch": 0.2892168903378514, "grad_norm": 9.497433767547173, "learning_rate": 1.7897248045724278e-06, "loss": -0.3249, "step": 3238 }, { "epoch": 0.28939552955362524, "grad_norm": 9.393125382073816, "learning_rate": 1.7893420712865864e-06, "loss": -0.9311, "step": 3240 }, { "epoch": 0.2895741687693991, "grad_norm": 30.472709077716676, "learning_rate": 1.7889590310032275e-06, "loss": -1.1763, "step": 3242 }, { "epoch": 0.28975280798517294, "grad_norm": 19.1999757448679, "learning_rate": 1.788575683871326e-06, "loss": 0.305, "step": 3244 }, { "epoch": 0.28993144720094677, "grad_norm": 21.308357798304094, "learning_rate": 1.7881920300399765e-06, "loss": -0.8716, "step": 3246 }, { "epoch": 0.29011008641672065, "grad_norm": 11.992167777874663, "learning_rate": 1.7878080696583928e-06, "loss": 0.8963, "step": 3248 }, { "epoch": 0.29028872563249447, "grad_norm": 20.68657873585968, "learning_rate": 1.7874238028759083e-06, "loss": -0.28, "step": 3250 }, { "epoch": 0.2904673648482683, "grad_norm": 12.828941551546947, "learning_rate": 1.7870392298419752e-06, "loss": 0.5094, "step": 3252 }, { "epoch": 0.2906460040640422, "grad_norm": 6.15782153214337, "learning_rate": 1.7866543507061644e-06, "loss": -1.1879, "step": 3254 }, { "epoch": 0.290824643279816, "grad_norm": 11.050307284264097, "learning_rate": 1.7862691656181666e-06, "loss": -0.5184, "step": 3256 }, { "epoch": 0.2910032824955898, "grad_norm": 23.637828754819836, "learning_rate": 1.7858836747277914e-06, "loss": -0.8156, "step": 3258 }, { "epoch": 0.2911819217113637, "grad_norm": 17.376182021956268, "learning_rate": 1.7854978781849668e-06, "loss": 0.4088, "step": 3260 }, { "epoch": 0.2913605609271375, "grad_norm": 5.759100636629149, "learning_rate": 1.7851117761397403e-06, "loss": -0.2793, "step": 3262 }, { "epoch": 0.29153920014291135, "grad_norm": 16.911882798350877, "learning_rate": 1.7847253687422776e-06, "loss": 0.0031, "step": 3264 }, { "epoch": 0.2917178393586852, "grad_norm": 7.315398334705374, "learning_rate": 1.784338656142864e-06, "loss": -0.8139, "step": 3266 }, { "epoch": 0.29189647857445905, "grad_norm": 9.34902203183093, "learning_rate": 1.7839516384919024e-06, "loss": -0.5541, "step": 3268 }, { "epoch": 0.2920751177902329, "grad_norm": 10.825457443897182, "learning_rate": 1.7835643159399154e-06, "loss": -0.1754, "step": 3270 }, { "epoch": 0.29225375700600675, "grad_norm": 6.63729006332898, "learning_rate": 1.7831766886375433e-06, "loss": 1.3472, "step": 3272 }, { "epoch": 0.2924323962217806, "grad_norm": 10.406507334233755, "learning_rate": 1.782788756735546e-06, "loss": 0.1401, "step": 3274 }, { "epoch": 0.29261103543755446, "grad_norm": 9.536510379190197, "learning_rate": 1.7824005203848006e-06, "loss": -0.0756, "step": 3276 }, { "epoch": 0.2927896746533283, "grad_norm": 10.904081200009758, "learning_rate": 1.7820119797363035e-06, "loss": -0.1946, "step": 3278 }, { "epoch": 0.2929683138691021, "grad_norm": 13.899568817257032, "learning_rate": 1.7816231349411693e-06, "loss": 0.0182, "step": 3280 }, { "epoch": 0.293146953084876, "grad_norm": 8.482123931634197, "learning_rate": 1.7812339861506302e-06, "loss": -1.9375, "step": 3282 }, { "epoch": 0.2933255923006498, "grad_norm": 7.73664714691886, "learning_rate": 1.7808445335160381e-06, "loss": 0.1744, "step": 3284 }, { "epoch": 0.29350423151642363, "grad_norm": 16.939059365198954, "learning_rate": 1.7804547771888614e-06, "loss": -0.9626, "step": 3286 }, { "epoch": 0.2936828707321975, "grad_norm": 8.66865350494915, "learning_rate": 1.7800647173206877e-06, "loss": 1.1781, "step": 3288 }, { "epoch": 0.29386150994797133, "grad_norm": 6.944155413228896, "learning_rate": 1.7796743540632221e-06, "loss": 0.188, "step": 3290 }, { "epoch": 0.29404014916374516, "grad_norm": 8.797935191580688, "learning_rate": 1.7792836875682887e-06, "loss": -0.2106, "step": 3292 }, { "epoch": 0.29421878837951904, "grad_norm": 27.48881053428547, "learning_rate": 1.7788927179878278e-06, "loss": -0.141, "step": 3294 }, { "epoch": 0.29439742759529286, "grad_norm": 8.132965877966535, "learning_rate": 1.7785014454738993e-06, "loss": -0.201, "step": 3296 }, { "epoch": 0.2945760668110667, "grad_norm": 16.97751881222728, "learning_rate": 1.7781098701786795e-06, "loss": 0.0827, "step": 3298 }, { "epoch": 0.29475470602684056, "grad_norm": 10.629936597317421, "learning_rate": 1.777717992254464e-06, "loss": -1.4502, "step": 3300 }, { "epoch": 0.2949333452426144, "grad_norm": 7.386901046302914, "learning_rate": 1.7773258118536642e-06, "loss": -0.1112, "step": 3302 }, { "epoch": 0.2951119844583882, "grad_norm": 6.610559235662526, "learning_rate": 1.7769333291288108e-06, "loss": -0.4039, "step": 3304 }, { "epoch": 0.2952906236741621, "grad_norm": 11.169061394498735, "learning_rate": 1.7765405442325518e-06, "loss": -0.6856, "step": 3306 }, { "epoch": 0.2954692628899359, "grad_norm": 20.305607304099063, "learning_rate": 1.7761474573176517e-06, "loss": 0.6946, "step": 3308 }, { "epoch": 0.29564790210570974, "grad_norm": 7.240199864616297, "learning_rate": 1.7757540685369933e-06, "loss": 0.6473, "step": 3310 }, { "epoch": 0.2958265413214836, "grad_norm": 2.6605540690914977, "learning_rate": 1.7753603780435773e-06, "loss": 0.4734, "step": 3312 }, { "epoch": 0.29600518053725744, "grad_norm": 11.753801745339675, "learning_rate": 1.7749663859905202e-06, "loss": -0.3365, "step": 3314 }, { "epoch": 0.29618381975303126, "grad_norm": 6.377285117697978, "learning_rate": 1.774572092531057e-06, "loss": -1.724, "step": 3316 }, { "epoch": 0.29636245896880514, "grad_norm": 1.0111289053935946, "learning_rate": 1.77417749781854e-06, "loss": 0.3676, "step": 3318 }, { "epoch": 0.29654109818457897, "grad_norm": 15.789580973210477, "learning_rate": 1.7737826020064375e-06, "loss": -0.2539, "step": 3320 }, { "epoch": 0.2967197374003528, "grad_norm": 8.798380910874002, "learning_rate": 1.7733874052483363e-06, "loss": -0.6066, "step": 3322 }, { "epoch": 0.29689837661612667, "grad_norm": 5.544617443320453, "learning_rate": 1.7729919076979397e-06, "loss": 0.0945, "step": 3324 }, { "epoch": 0.2970770158319005, "grad_norm": 5.973137805554182, "learning_rate": 1.7725961095090674e-06, "loss": -0.1102, "step": 3326 }, { "epoch": 0.2972556550476743, "grad_norm": 10.835889622280673, "learning_rate": 1.7722000108356569e-06, "loss": -0.8645, "step": 3328 }, { "epoch": 0.2974342942634482, "grad_norm": 9.047965766363031, "learning_rate": 1.7718036118317618e-06, "loss": -0.7049, "step": 3330 }, { "epoch": 0.297612933479222, "grad_norm": 4.477230960807809, "learning_rate": 1.7714069126515536e-06, "loss": -0.455, "step": 3332 }, { "epoch": 0.29779157269499584, "grad_norm": 9.851661627177734, "learning_rate": 1.7710099134493196e-06, "loss": -0.1513, "step": 3334 }, { "epoch": 0.2979702119107697, "grad_norm": 6.625409383540894, "learning_rate": 1.7706126143794636e-06, "loss": 0.0578, "step": 3336 }, { "epoch": 0.29814885112654355, "grad_norm": 9.474503283200598, "learning_rate": 1.770215015596507e-06, "loss": 0.311, "step": 3338 }, { "epoch": 0.29832749034231737, "grad_norm": 5.151043476943986, "learning_rate": 1.769817117255087e-06, "loss": -0.1151, "step": 3340 }, { "epoch": 0.29850612955809125, "grad_norm": 3.6807139396577426, "learning_rate": 1.7694189195099577e-06, "loss": 0.7887, "step": 3342 }, { "epoch": 0.2986847687738651, "grad_norm": 5.585622669231009, "learning_rate": 1.7690204225159893e-06, "loss": -1.1815, "step": 3344 }, { "epoch": 0.29886340798963895, "grad_norm": 4.502016039758322, "learning_rate": 1.7686216264281688e-06, "loss": -0.394, "step": 3346 }, { "epoch": 0.2990420472054128, "grad_norm": 21.492059705181674, "learning_rate": 1.7682225314015992e-06, "loss": 0.3688, "step": 3348 }, { "epoch": 0.2992206864211866, "grad_norm": 16.193352661205036, "learning_rate": 1.7678231375914998e-06, "loss": -0.5955, "step": 3350 }, { "epoch": 0.2993993256369605, "grad_norm": 9.409992069318067, "learning_rate": 1.7674234451532063e-06, "loss": -0.9496, "step": 3352 }, { "epoch": 0.2995779648527343, "grad_norm": 10.886112542549911, "learning_rate": 1.76702345424217e-06, "loss": -0.7131, "step": 3354 }, { "epoch": 0.2997566040685081, "grad_norm": 14.975689324767428, "learning_rate": 1.7666231650139598e-06, "loss": -0.1426, "step": 3356 }, { "epoch": 0.299935243284282, "grad_norm": 6.4624325776410725, "learning_rate": 1.7662225776242584e-06, "loss": -0.2367, "step": 3358 }, { "epoch": 0.30011388250005583, "grad_norm": 6.4943517940291295, "learning_rate": 1.765821692228866e-06, "loss": 0.6671, "step": 3360 }, { "epoch": 0.30029252171582965, "grad_norm": 3.307305059130529, "learning_rate": 1.7654205089836983e-06, "loss": -0.6076, "step": 3362 }, { "epoch": 0.30047116093160353, "grad_norm": 18.547368622327536, "learning_rate": 1.765019028044787e-06, "loss": -1.2054, "step": 3364 }, { "epoch": 0.30064980014737736, "grad_norm": 5.707521515453554, "learning_rate": 1.764617249568279e-06, "loss": -0.2782, "step": 3366 }, { "epoch": 0.3008284393631512, "grad_norm": 9.673692276769248, "learning_rate": 1.764215173710438e-06, "loss": -0.2189, "step": 3368 }, { "epoch": 0.30100707857892506, "grad_norm": 9.95205141322848, "learning_rate": 1.763812800627642e-06, "loss": -0.3515, "step": 3370 }, { "epoch": 0.3011857177946989, "grad_norm": 8.895563078325551, "learning_rate": 1.763410130476386e-06, "loss": 0.0982, "step": 3372 }, { "epoch": 0.3013643570104727, "grad_norm": 11.953933030286649, "learning_rate": 1.7630071634132792e-06, "loss": 1.7192, "step": 3374 }, { "epoch": 0.3015429962262466, "grad_norm": 6.869753135898943, "learning_rate": 1.7626038995950471e-06, "loss": 0.8069, "step": 3376 }, { "epoch": 0.3017216354420204, "grad_norm": 14.305674695421077, "learning_rate": 1.7622003391785307e-06, "loss": -0.0134, "step": 3378 }, { "epoch": 0.30190027465779423, "grad_norm": 3.462921882570081, "learning_rate": 1.761796482320686e-06, "loss": 0.3388, "step": 3380 }, { "epoch": 0.3020789138735681, "grad_norm": 5.897037163037561, "learning_rate": 1.7613923291785842e-06, "loss": 0.3783, "step": 3382 }, { "epoch": 0.30225755308934193, "grad_norm": 6.8832907883974155, "learning_rate": 1.760987879909412e-06, "loss": 0.3982, "step": 3384 }, { "epoch": 0.30243619230511576, "grad_norm": 4.929925637757961, "learning_rate": 1.7605831346704712e-06, "loss": -0.1965, "step": 3386 }, { "epoch": 0.30261483152088964, "grad_norm": 12.954896002837534, "learning_rate": 1.7601780936191787e-06, "loss": -0.6319, "step": 3388 }, { "epoch": 0.30279347073666346, "grad_norm": 13.18827289648601, "learning_rate": 1.7597727569130667e-06, "loss": -0.7741, "step": 3390 }, { "epoch": 0.3029721099524373, "grad_norm": 13.585362444701513, "learning_rate": 1.7593671247097816e-06, "loss": -0.2458, "step": 3392 }, { "epoch": 0.30315074916821116, "grad_norm": 3.8742884320998274, "learning_rate": 1.758961197167086e-06, "loss": 0.1819, "step": 3394 }, { "epoch": 0.303329388383985, "grad_norm": 2.1130940747529685, "learning_rate": 1.7585549744428558e-06, "loss": 0.4761, "step": 3396 }, { "epoch": 0.3035080275997588, "grad_norm": 6.727040272757316, "learning_rate": 1.7581484566950832e-06, "loss": -0.4836, "step": 3398 }, { "epoch": 0.3036866668155327, "grad_norm": 14.602929310025223, "learning_rate": 1.757741644081874e-06, "loss": -0.6633, "step": 3400 }, { "epoch": 0.3038653060313065, "grad_norm": 14.853292060488803, "learning_rate": 1.7573345367614496e-06, "loss": -0.1428, "step": 3402 }, { "epoch": 0.30404394524708034, "grad_norm": 34.28684185535411, "learning_rate": 1.7569271348921453e-06, "loss": -0.7517, "step": 3404 }, { "epoch": 0.3042225844628542, "grad_norm": 9.313308578309037, "learning_rate": 1.7565194386324111e-06, "loss": 0.7588, "step": 3406 }, { "epoch": 0.30440122367862804, "grad_norm": 9.66419078646158, "learning_rate": 1.756111448140812e-06, "loss": -0.3245, "step": 3408 }, { "epoch": 0.3045798628944019, "grad_norm": 11.715999026063075, "learning_rate": 1.7557031635760274e-06, "loss": -0.0083, "step": 3410 }, { "epoch": 0.30475850211017574, "grad_norm": 17.74461347488434, "learning_rate": 1.7552945850968496e-06, "loss": -1.193, "step": 3412 }, { "epoch": 0.30493714132594957, "grad_norm": 8.490103401839448, "learning_rate": 1.7548857128621874e-06, "loss": -0.0002, "step": 3414 }, { "epoch": 0.30511578054172345, "grad_norm": 7.561999343819578, "learning_rate": 1.7544765470310624e-06, "loss": 1.1966, "step": 3416 }, { "epoch": 0.30529441975749727, "grad_norm": 14.29260950713471, "learning_rate": 1.7540670877626109e-06, "loss": -0.6143, "step": 3418 }, { "epoch": 0.3054730589732711, "grad_norm": 9.170835575818128, "learning_rate": 1.753657335216083e-06, "loss": -1.9108, "step": 3420 }, { "epoch": 0.305651698189045, "grad_norm": 4.473656155288642, "learning_rate": 1.7532472895508432e-06, "loss": 1.1086, "step": 3422 }, { "epoch": 0.3058303374048188, "grad_norm": 24.397461856093376, "learning_rate": 1.75283695092637e-06, "loss": -1.1868, "step": 3424 }, { "epoch": 0.3060089766205926, "grad_norm": 3.0206187906483613, "learning_rate": 1.7524263195022558e-06, "loss": 0.0868, "step": 3426 }, { "epoch": 0.3061876158363665, "grad_norm": 17.65543589107633, "learning_rate": 1.7520153954382067e-06, "loss": 0.3071, "step": 3428 }, { "epoch": 0.3063662550521403, "grad_norm": 17.176875267210637, "learning_rate": 1.7516041788940426e-06, "loss": 1.1541, "step": 3430 }, { "epoch": 0.30654489426791415, "grad_norm": 14.351544956950356, "learning_rate": 1.7511926700296971e-06, "loss": 0.4163, "step": 3432 }, { "epoch": 0.306723533483688, "grad_norm": 25.65572186575784, "learning_rate": 1.7507808690052181e-06, "loss": -0.1617, "step": 3434 }, { "epoch": 0.30690217269946185, "grad_norm": 10.494568646177695, "learning_rate": 1.7503687759807667e-06, "loss": 0.3788, "step": 3436 }, { "epoch": 0.3070808119152357, "grad_norm": 17.401406838555825, "learning_rate": 1.7499563911166172e-06, "loss": -0.6907, "step": 3438 }, { "epoch": 0.30725945113100955, "grad_norm": 6.04808832657112, "learning_rate": 1.7495437145731579e-06, "loss": 0.3903, "step": 3440 }, { "epoch": 0.3074380903467834, "grad_norm": 9.437629345140124, "learning_rate": 1.749130746510891e-06, "loss": -0.1169, "step": 3442 }, { "epoch": 0.3076167295625572, "grad_norm": 7.881736417276082, "learning_rate": 1.7487174870904304e-06, "loss": 0.7702, "step": 3444 }, { "epoch": 0.3077953687783311, "grad_norm": 4.918420360333426, "learning_rate": 1.7483039364725052e-06, "loss": 0.5999, "step": 3446 }, { "epoch": 0.3079740079941049, "grad_norm": 9.079592666563993, "learning_rate": 1.747890094817957e-06, "loss": -0.8228, "step": 3448 }, { "epoch": 0.3081526472098787, "grad_norm": 10.675829845754746, "learning_rate": 1.7474759622877397e-06, "loss": -0.1097, "step": 3450 }, { "epoch": 0.3083312864256526, "grad_norm": 1.7886010120331397, "learning_rate": 1.7470615390429222e-06, "loss": 0.0551, "step": 3452 }, { "epoch": 0.30850992564142643, "grad_norm": 12.165800995708729, "learning_rate": 1.746646825244685e-06, "loss": -1.3853, "step": 3454 }, { "epoch": 0.30868856485720025, "grad_norm": 7.079368768343907, "learning_rate": 1.7462318210543222e-06, "loss": -0.7161, "step": 3456 }, { "epoch": 0.30886720407297413, "grad_norm": 9.097667571241345, "learning_rate": 1.7458165266332402e-06, "loss": -0.2322, "step": 3458 }, { "epoch": 0.30904584328874796, "grad_norm": 8.560819363462128, "learning_rate": 1.7454009421429595e-06, "loss": 0.7356, "step": 3460 }, { "epoch": 0.3092244825045218, "grad_norm": 12.326468816984601, "learning_rate": 1.7449850677451124e-06, "loss": 0.0427, "step": 3462 }, { "epoch": 0.30940312172029566, "grad_norm": 19.033914915019125, "learning_rate": 1.744568903601444e-06, "loss": -0.0273, "step": 3464 }, { "epoch": 0.3095817609360695, "grad_norm": 6.449517449359281, "learning_rate": 1.7441524498738127e-06, "loss": -0.1835, "step": 3466 }, { "epoch": 0.3097604001518433, "grad_norm": 12.696327483464769, "learning_rate": 1.7437357067241889e-06, "loss": 0.8975, "step": 3468 }, { "epoch": 0.3099390393676172, "grad_norm": 5.1784012752447115, "learning_rate": 1.7433186743146559e-06, "loss": 0.8874, "step": 3470 }, { "epoch": 0.310117678583391, "grad_norm": 8.557396474619376, "learning_rate": 1.7429013528074097e-06, "loss": -0.31, "step": 3472 }, { "epoch": 0.3102963177991649, "grad_norm": 23.602608539795185, "learning_rate": 1.7424837423647582e-06, "loss": 0.0559, "step": 3474 }, { "epoch": 0.3104749570149387, "grad_norm": 4.215586740872998, "learning_rate": 1.7420658431491222e-06, "loss": 0.4191, "step": 3476 }, { "epoch": 0.31065359623071254, "grad_norm": 9.052684861987034, "learning_rate": 1.741647655323034e-06, "loss": -0.7555, "step": 3478 }, { "epoch": 0.3108322354464864, "grad_norm": 8.083489590212764, "learning_rate": 1.7412291790491392e-06, "loss": 1.2331, "step": 3480 }, { "epoch": 0.31101087466226024, "grad_norm": 9.601134201442116, "learning_rate": 1.740810414490195e-06, "loss": 0.3662, "step": 3482 }, { "epoch": 0.31118951387803406, "grad_norm": 11.235244908297018, "learning_rate": 1.7403913618090706e-06, "loss": 0.1373, "step": 3484 }, { "epoch": 0.31136815309380794, "grad_norm": 8.941639615101229, "learning_rate": 1.739972021168748e-06, "loss": 0.2485, "step": 3486 }, { "epoch": 0.31154679230958177, "grad_norm": 6.463051465114807, "learning_rate": 1.7395523927323196e-06, "loss": 0.0649, "step": 3488 }, { "epoch": 0.3117254315253556, "grad_norm": 3.4988196971366343, "learning_rate": 1.7391324766629922e-06, "loss": -0.0706, "step": 3490 }, { "epoch": 0.31190407074112947, "grad_norm": 1.4614043280406244, "learning_rate": 1.738712273124082e-06, "loss": 0.3974, "step": 3492 }, { "epoch": 0.3120827099569033, "grad_norm": 13.671098192217405, "learning_rate": 1.7382917822790186e-06, "loss": -0.471, "step": 3494 }, { "epoch": 0.3122613491726771, "grad_norm": 10.911092912598777, "learning_rate": 1.7378710042913425e-06, "loss": -1.071, "step": 3496 }, { "epoch": 0.312439988388451, "grad_norm": 3.7089751184718613, "learning_rate": 1.7374499393247066e-06, "loss": 0.6248, "step": 3498 }, { "epoch": 0.3126186276042248, "grad_norm": 10.130192684890442, "learning_rate": 1.7370285875428745e-06, "loss": 0.3889, "step": 3500 }, { "epoch": 0.31279726681999864, "grad_norm": 7.904330416363848, "learning_rate": 1.7366069491097222e-06, "loss": 0.3415, "step": 3502 }, { "epoch": 0.3129759060357725, "grad_norm": 3.6719336971704726, "learning_rate": 1.7361850241892366e-06, "loss": 0.5311, "step": 3504 }, { "epoch": 0.31315454525154635, "grad_norm": 5.63370755474385, "learning_rate": 1.7357628129455168e-06, "loss": -0.2066, "step": 3506 }, { "epoch": 0.31333318446732017, "grad_norm": 10.450875306773801, "learning_rate": 1.735340315542772e-06, "loss": -0.5826, "step": 3508 }, { "epoch": 0.31351182368309405, "grad_norm": 9.860355769735781, "learning_rate": 1.734917532145324e-06, "loss": 0.144, "step": 3510 }, { "epoch": 0.3136904628988679, "grad_norm": 4.717804466655618, "learning_rate": 1.7344944629176053e-06, "loss": -1.0164, "step": 3512 }, { "epoch": 0.3138691021146417, "grad_norm": 13.465542369278266, "learning_rate": 1.7340711080241587e-06, "loss": -1.086, "step": 3514 }, { "epoch": 0.3140477413304156, "grad_norm": 8.203858319156955, "learning_rate": 1.7336474676296398e-06, "loss": -0.4492, "step": 3516 }, { "epoch": 0.3142263805461894, "grad_norm": 11.545914255108952, "learning_rate": 1.733223541898814e-06, "loss": 0.4116, "step": 3518 }, { "epoch": 0.3144050197619632, "grad_norm": 20.007463969777707, "learning_rate": 1.732799330996558e-06, "loss": -1.1414, "step": 3520 }, { "epoch": 0.3145836589777371, "grad_norm": 16.783307565608062, "learning_rate": 1.7323748350878598e-06, "loss": -1.175, "step": 3522 }, { "epoch": 0.3147622981935109, "grad_norm": 12.59294485542742, "learning_rate": 1.7319500543378173e-06, "loss": 0.0297, "step": 3524 }, { "epoch": 0.31494093740928475, "grad_norm": 12.242682938084322, "learning_rate": 1.7315249889116403e-06, "loss": -0.8423, "step": 3526 }, { "epoch": 0.31511957662505863, "grad_norm": 12.305916977266147, "learning_rate": 1.7310996389746486e-06, "loss": 0.8472, "step": 3528 }, { "epoch": 0.31529821584083245, "grad_norm": 19.162634030027032, "learning_rate": 1.7306740046922728e-06, "loss": -0.6108, "step": 3530 }, { "epoch": 0.3154768550566063, "grad_norm": 11.698575081416834, "learning_rate": 1.730248086230054e-06, "loss": -1.3496, "step": 3532 }, { "epoch": 0.31565549427238015, "grad_norm": 15.18441879350934, "learning_rate": 1.7298218837536442e-06, "loss": -0.0431, "step": 3534 }, { "epoch": 0.315834133488154, "grad_norm": 9.43151746690402, "learning_rate": 1.729395397428806e-06, "loss": -0.0643, "step": 3536 }, { "epoch": 0.3160127727039278, "grad_norm": 8.72811463297697, "learning_rate": 1.7289686274214115e-06, "loss": -0.0179, "step": 3538 }, { "epoch": 0.3161914119197017, "grad_norm": 8.965496895460928, "learning_rate": 1.7285415738974437e-06, "loss": 0.365, "step": 3540 }, { "epoch": 0.3163700511354755, "grad_norm": 7.809853311448166, "learning_rate": 1.728114237022996e-06, "loss": 0.0767, "step": 3542 }, { "epoch": 0.3165486903512494, "grad_norm": 6.753200478685811, "learning_rate": 1.7276866169642716e-06, "loss": 0.0531, "step": 3544 }, { "epoch": 0.3167273295670232, "grad_norm": 7.583767581565325, "learning_rate": 1.7272587138875845e-06, "loss": -0.6755, "step": 3546 }, { "epoch": 0.31690596878279703, "grad_norm": 11.881665096025117, "learning_rate": 1.7268305279593578e-06, "loss": -0.2497, "step": 3548 }, { "epoch": 0.3170846079985709, "grad_norm": 31.24522912800799, "learning_rate": 1.7264020593461255e-06, "loss": -0.1065, "step": 3550 }, { "epoch": 0.31726324721434473, "grad_norm": 4.0496192376381055, "learning_rate": 1.7259733082145308e-06, "loss": 0.7583, "step": 3552 }, { "epoch": 0.31744188643011856, "grad_norm": 8.827252532247302, "learning_rate": 1.725544274731328e-06, "loss": -0.0343, "step": 3554 }, { "epoch": 0.31762052564589244, "grad_norm": 18.130520104987163, "learning_rate": 1.7251149590633798e-06, "loss": -1.1785, "step": 3556 }, { "epoch": 0.31779916486166626, "grad_norm": 2.0650056588601076, "learning_rate": 1.724685361377659e-06, "loss": 0.0507, "step": 3558 }, { "epoch": 0.3179778040774401, "grad_norm": 35.82339391735256, "learning_rate": 1.724255481841249e-06, "loss": -0.9117, "step": 3560 }, { "epoch": 0.31815644329321396, "grad_norm": 4.159583339732558, "learning_rate": 1.7238253206213415e-06, "loss": 0.5074, "step": 3562 }, { "epoch": 0.3183350825089878, "grad_norm": 13.290405277361689, "learning_rate": 1.7233948778852386e-06, "loss": -0.1906, "step": 3564 }, { "epoch": 0.3185137217247616, "grad_norm": 6.3818092631663275, "learning_rate": 1.722964153800352e-06, "loss": -0.4985, "step": 3566 }, { "epoch": 0.3186923609405355, "grad_norm": 7.383568134693293, "learning_rate": 1.722533148534202e-06, "loss": -1.1811, "step": 3568 }, { "epoch": 0.3188710001563093, "grad_norm": 20.593409904484993, "learning_rate": 1.7221018622544194e-06, "loss": -0.0686, "step": 3570 }, { "epoch": 0.31904963937208314, "grad_norm": 7.372476436956347, "learning_rate": 1.7216702951287432e-06, "loss": -1.427, "step": 3572 }, { "epoch": 0.319228278587857, "grad_norm": 5.069485401470033, "learning_rate": 1.7212384473250224e-06, "loss": -0.0924, "step": 3574 }, { "epoch": 0.31940691780363084, "grad_norm": 12.03801681660867, "learning_rate": 1.7208063190112143e-06, "loss": -0.7302, "step": 3576 }, { "epoch": 0.31958555701940466, "grad_norm": 5.504263872341804, "learning_rate": 1.7203739103553863e-06, "loss": 0.2869, "step": 3578 }, { "epoch": 0.31976419623517854, "grad_norm": 12.51863540086557, "learning_rate": 1.7199412215257146e-06, "loss": -0.8401, "step": 3580 }, { "epoch": 0.31994283545095237, "grad_norm": 13.831187315401051, "learning_rate": 1.7195082526904833e-06, "loss": 0.1711, "step": 3582 }, { "epoch": 0.3201214746667262, "grad_norm": 9.355291376841528, "learning_rate": 1.7190750040180873e-06, "loss": -0.3061, "step": 3584 }, { "epoch": 0.32030011388250007, "grad_norm": 14.48973351443483, "learning_rate": 1.7186414756770287e-06, "loss": -0.0103, "step": 3586 }, { "epoch": 0.3204787530982739, "grad_norm": 7.6060931826672995, "learning_rate": 1.718207667835919e-06, "loss": 0.5606, "step": 3588 }, { "epoch": 0.3206573923140477, "grad_norm": 5.398933708031228, "learning_rate": 1.7177735806634788e-06, "loss": -0.1508, "step": 3590 }, { "epoch": 0.3208360315298216, "grad_norm": 8.164635334269471, "learning_rate": 1.7173392143285361e-06, "loss": -0.0265, "step": 3592 }, { "epoch": 0.3210146707455954, "grad_norm": 19.457006546736977, "learning_rate": 1.716904569000029e-06, "loss": -0.1162, "step": 3594 }, { "epoch": 0.32119330996136924, "grad_norm": 2.903665685657097, "learning_rate": 1.7164696448470033e-06, "loss": 0.3729, "step": 3596 }, { "epoch": 0.3213719491771431, "grad_norm": 9.506296179544515, "learning_rate": 1.7160344420386128e-06, "loss": -0.5628, "step": 3598 }, { "epoch": 0.32155058839291695, "grad_norm": 24.340753686750592, "learning_rate": 1.715598960744121e-06, "loss": -0.485, "step": 3600 }, { "epoch": 0.32172922760869077, "grad_norm": 9.261686211588772, "learning_rate": 1.7151632011328983e-06, "loss": 0.3952, "step": 3602 }, { "epoch": 0.32190786682446465, "grad_norm": 6.613985514220924, "learning_rate": 1.7147271633744244e-06, "loss": 0.9234, "step": 3604 }, { "epoch": 0.3220865060402385, "grad_norm": 11.919180509316565, "learning_rate": 1.7142908476382865e-06, "loss": 0.0702, "step": 3606 }, { "epoch": 0.32226514525601235, "grad_norm": 1.4493524947234822, "learning_rate": 1.7138542540941802e-06, "loss": 0.5521, "step": 3608 }, { "epoch": 0.3224437844717862, "grad_norm": 19.934272337435516, "learning_rate": 1.713417382911909e-06, "loss": -0.426, "step": 3610 }, { "epoch": 0.32262242368756, "grad_norm": 4.79588919086998, "learning_rate": 1.7129802342613849e-06, "loss": 0.3067, "step": 3612 }, { "epoch": 0.3228010629033339, "grad_norm": 5.298114462802961, "learning_rate": 1.712542808312627e-06, "loss": 0.4108, "step": 3614 }, { "epoch": 0.3229797021191077, "grad_norm": 18.72465816740931, "learning_rate": 1.7121051052357626e-06, "loss": -0.8905, "step": 3616 }, { "epoch": 0.3231583413348815, "grad_norm": 13.602756086342621, "learning_rate": 1.7116671252010275e-06, "loss": -1.0169, "step": 3618 }, { "epoch": 0.3233369805506554, "grad_norm": 4.082323507405582, "learning_rate": 1.7112288683787636e-06, "loss": -0.2369, "step": 3620 }, { "epoch": 0.32351561976642923, "grad_norm": 4.002075133877082, "learning_rate": 1.710790334939422e-06, "loss": 0.0434, "step": 3622 }, { "epoch": 0.32369425898220305, "grad_norm": 8.739489100557854, "learning_rate": 1.710351525053561e-06, "loss": -0.2337, "step": 3624 }, { "epoch": 0.32387289819797693, "grad_norm": 6.493732364986732, "learning_rate": 1.7099124388918456e-06, "loss": 0.4917, "step": 3626 }, { "epoch": 0.32405153741375076, "grad_norm": 12.72816302665876, "learning_rate": 1.7094730766250494e-06, "loss": -1.4109, "step": 3628 }, { "epoch": 0.3242301766295246, "grad_norm": 8.287589241571503, "learning_rate": 1.7090334384240523e-06, "loss": 1.393, "step": 3630 }, { "epoch": 0.32440881584529846, "grad_norm": 12.4696573869004, "learning_rate": 1.7085935244598426e-06, "loss": -0.1731, "step": 3632 }, { "epoch": 0.3245874550610723, "grad_norm": 10.603575164745807, "learning_rate": 1.708153334903515e-06, "loss": -0.0568, "step": 3634 }, { "epoch": 0.3247660942768461, "grad_norm": 6.0021342614222375, "learning_rate": 1.7077128699262718e-06, "loss": -1.1866, "step": 3636 }, { "epoch": 0.32494473349262, "grad_norm": 7.368794303417893, "learning_rate": 1.7072721296994222e-06, "loss": 0.3638, "step": 3638 }, { "epoch": 0.3251233727083938, "grad_norm": 13.798865031134298, "learning_rate": 1.7068311143943827e-06, "loss": -0.4983, "step": 3640 }, { "epoch": 0.32530201192416763, "grad_norm": 6.810900019074147, "learning_rate": 1.7063898241826769e-06, "loss": 0.5145, "step": 3642 }, { "epoch": 0.3254806511399415, "grad_norm": 10.446393090491771, "learning_rate": 1.7059482592359348e-06, "loss": -0.222, "step": 3644 }, { "epoch": 0.32565929035571534, "grad_norm": 6.206647127628131, "learning_rate": 1.7055064197258932e-06, "loss": 0.1688, "step": 3646 }, { "epoch": 0.32583792957148916, "grad_norm": 6.110790976413927, "learning_rate": 1.7050643058243966e-06, "loss": -0.0829, "step": 3648 }, { "epoch": 0.32601656878726304, "grad_norm": 2.830396888319756, "learning_rate": 1.7046219177033954e-06, "loss": 0.268, "step": 3650 }, { "epoch": 0.32619520800303686, "grad_norm": 20.208676973023245, "learning_rate": 1.7041792555349468e-06, "loss": -0.2434, "step": 3652 }, { "epoch": 0.3263738472188107, "grad_norm": 13.217588116601197, "learning_rate": 1.703736319491215e-06, "loss": -0.923, "step": 3654 }, { "epoch": 0.32655248643458457, "grad_norm": 8.544199299805962, "learning_rate": 1.70329310974447e-06, "loss": -1.0441, "step": 3656 }, { "epoch": 0.3267311256503584, "grad_norm": 4.822538730432194, "learning_rate": 1.7028496264670885e-06, "loss": 0.0815, "step": 3658 }, { "epoch": 0.3269097648661322, "grad_norm": 10.984470155393321, "learning_rate": 1.7024058698315545e-06, "loss": -0.1201, "step": 3660 }, { "epoch": 0.3270884040819061, "grad_norm": 4.850038445703775, "learning_rate": 1.7019618400104569e-06, "loss": -0.1317, "step": 3662 }, { "epoch": 0.3272670432976799, "grad_norm": 13.37647731144758, "learning_rate": 1.7015175371764915e-06, "loss": 0.2065, "step": 3664 }, { "epoch": 0.32744568251345374, "grad_norm": 10.28449010375993, "learning_rate": 1.7010729615024605e-06, "loss": 0.0771, "step": 3666 }, { "epoch": 0.3276243217292276, "grad_norm": 13.104866207273803, "learning_rate": 1.7006281131612718e-06, "loss": -0.0617, "step": 3668 }, { "epoch": 0.32780296094500144, "grad_norm": 11.382665713063508, "learning_rate": 1.70018299232594e-06, "loss": 0.4456, "step": 3670 }, { "epoch": 0.3279816001607753, "grad_norm": 4.224477087763534, "learning_rate": 1.6997375991695848e-06, "loss": 0.1662, "step": 3672 }, { "epoch": 0.32816023937654915, "grad_norm": 3.265721661902995, "learning_rate": 1.6992919338654318e-06, "loss": -0.526, "step": 3674 }, { "epoch": 0.32833887859232297, "grad_norm": 5.897181739773994, "learning_rate": 1.6988459965868138e-06, "loss": 0.2969, "step": 3676 }, { "epoch": 0.32851751780809685, "grad_norm": 12.204109025226273, "learning_rate": 1.698399787507168e-06, "loss": -0.7744, "step": 3678 }, { "epoch": 0.32869615702387067, "grad_norm": 6.914671307500559, "learning_rate": 1.6979533068000378e-06, "loss": 0.6361, "step": 3680 }, { "epoch": 0.3288747962396445, "grad_norm": 9.080763520391953, "learning_rate": 1.697506554639072e-06, "loss": -0.6053, "step": 3682 }, { "epoch": 0.3290534354554184, "grad_norm": 4.034400123145657, "learning_rate": 1.6970595311980254e-06, "loss": 0.832, "step": 3684 }, { "epoch": 0.3292320746711922, "grad_norm": 10.787660500756864, "learning_rate": 1.6966122366507582e-06, "loss": -1.0715, "step": 3686 }, { "epoch": 0.329410713886966, "grad_norm": 36.822261014907816, "learning_rate": 1.6961646711712358e-06, "loss": -1.7461, "step": 3688 }, { "epoch": 0.3295893531027399, "grad_norm": 7.622422580044321, "learning_rate": 1.6957168349335292e-06, "loss": 1.0125, "step": 3690 }, { "epoch": 0.3297679923185137, "grad_norm": 6.312015023816774, "learning_rate": 1.6952687281118142e-06, "loss": -0.247, "step": 3692 }, { "epoch": 0.32994663153428755, "grad_norm": 15.10291689558288, "learning_rate": 1.6948203508803732e-06, "loss": -0.2247, "step": 3694 }, { "epoch": 0.33012527075006143, "grad_norm": 7.170139476744284, "learning_rate": 1.6943717034135916e-06, "loss": 0.2146, "step": 3696 }, { "epoch": 0.33030390996583525, "grad_norm": 16.174922342718332, "learning_rate": 1.693922785885962e-06, "loss": -0.8482, "step": 3698 }, { "epoch": 0.3304825491816091, "grad_norm": 10.011666168658254, "learning_rate": 1.693473598472081e-06, "loss": 0.7757, "step": 3700 }, { "epoch": 0.33066118839738295, "grad_norm": 4.37470549545876, "learning_rate": 1.6930241413466499e-06, "loss": -0.0301, "step": 3702 }, { "epoch": 0.3308398276131568, "grad_norm": 5.312299657297749, "learning_rate": 1.6925744146844757e-06, "loss": -0.8526, "step": 3704 }, { "epoch": 0.3310184668289306, "grad_norm": 17.96578015538977, "learning_rate": 1.69212441866047e-06, "loss": -1.2144, "step": 3706 }, { "epoch": 0.3311971060447045, "grad_norm": 7.813310260716939, "learning_rate": 1.691674153449649e-06, "loss": 0.2064, "step": 3708 }, { "epoch": 0.3313757452604783, "grad_norm": 10.812316187360098, "learning_rate": 1.691223619227133e-06, "loss": 0.0346, "step": 3710 }, { "epoch": 0.33155438447625213, "grad_norm": 6.709545270858467, "learning_rate": 1.6907728161681477e-06, "loss": -0.6891, "step": 3712 }, { "epoch": 0.331733023692026, "grad_norm": 18.71207998208268, "learning_rate": 1.690321744448024e-06, "loss": -0.0866, "step": 3714 }, { "epoch": 0.33191166290779983, "grad_norm": 5.38513523254322, "learning_rate": 1.6898704042421958e-06, "loss": -0.1802, "step": 3716 }, { "epoch": 0.33209030212357366, "grad_norm": 5.306700828640369, "learning_rate": 1.689418795726202e-06, "loss": -0.0952, "step": 3718 }, { "epoch": 0.33226894133934753, "grad_norm": 10.52784017266466, "learning_rate": 1.6889669190756866e-06, "loss": 0.2603, "step": 3720 }, { "epoch": 0.33244758055512136, "grad_norm": 5.813551612804029, "learning_rate": 1.6885147744663965e-06, "loss": 0.5416, "step": 3722 }, { "epoch": 0.3326262197708952, "grad_norm": 10.853677476242824, "learning_rate": 1.688062362074184e-06, "loss": -0.7132, "step": 3724 }, { "epoch": 0.33280485898666906, "grad_norm": 6.4810882695324175, "learning_rate": 1.687609682075005e-06, "loss": 0.0931, "step": 3726 }, { "epoch": 0.3329834982024429, "grad_norm": 12.80292849419006, "learning_rate": 1.6871567346449198e-06, "loss": -0.7887, "step": 3728 }, { "epoch": 0.3331621374182167, "grad_norm": 9.019153136022009, "learning_rate": 1.686703519960092e-06, "loss": 1.3736, "step": 3730 }, { "epoch": 0.3333407766339906, "grad_norm": 15.062745658169208, "learning_rate": 1.6862500381967904e-06, "loss": -1.116, "step": 3732 }, { "epoch": 0.3335194158497644, "grad_norm": 7.732658180274037, "learning_rate": 1.685796289531386e-06, "loss": -0.3871, "step": 3734 }, { "epoch": 0.33369805506553823, "grad_norm": 6.46315800104218, "learning_rate": 1.685342274140355e-06, "loss": 0.0413, "step": 3736 }, { "epoch": 0.3338766942813121, "grad_norm": 3.3331066690309616, "learning_rate": 1.6848879922002771e-06, "loss": -0.5303, "step": 3738 }, { "epoch": 0.33405533349708594, "grad_norm": 15.42851261349725, "learning_rate": 1.6844334438878352e-06, "loss": -0.3367, "step": 3740 }, { "epoch": 0.3342339727128598, "grad_norm": 9.780523401152326, "learning_rate": 1.683978629379816e-06, "loss": 0.8608, "step": 3742 }, { "epoch": 0.33441261192863364, "grad_norm": 11.166773174024678, "learning_rate": 1.6835235488531097e-06, "loss": 0.5347, "step": 3744 }, { "epoch": 0.33459125114440746, "grad_norm": 5.61259142964748, "learning_rate": 1.6830682024847102e-06, "loss": -0.1517, "step": 3746 }, { "epoch": 0.33476989036018134, "grad_norm": 3.314169696834932, "learning_rate": 1.6826125904517143e-06, "loss": 0.5116, "step": 3748 }, { "epoch": 0.33494852957595517, "grad_norm": 12.647840329093121, "learning_rate": 1.6821567129313227e-06, "loss": -0.7164, "step": 3750 }, { "epoch": 0.335127168791729, "grad_norm": 13.413612602567506, "learning_rate": 1.6817005701008391e-06, "loss": -0.3492, "step": 3752 }, { "epoch": 0.33530580800750287, "grad_norm": 15.792940247779129, "learning_rate": 1.6812441621376706e-06, "loss": -0.201, "step": 3754 }, { "epoch": 0.3354844472232767, "grad_norm": 7.568433215365596, "learning_rate": 1.6807874892193268e-06, "loss": 0.5427, "step": 3756 }, { "epoch": 0.3356630864390505, "grad_norm": 6.669404548477322, "learning_rate": 1.6803305515234208e-06, "loss": 1.0975, "step": 3758 }, { "epoch": 0.3358417256548244, "grad_norm": 6.976923097046406, "learning_rate": 1.6798733492276687e-06, "loss": -0.353, "step": 3760 }, { "epoch": 0.3360203648705982, "grad_norm": 9.387119444995326, "learning_rate": 1.6794158825098892e-06, "loss": -0.0988, "step": 3762 }, { "epoch": 0.33619900408637204, "grad_norm": 3.948771978777815, "learning_rate": 1.678958151548005e-06, "loss": -0.1317, "step": 3764 }, { "epoch": 0.3363776433021459, "grad_norm": 14.32438759018983, "learning_rate": 1.6785001565200393e-06, "loss": 0.0762, "step": 3766 }, { "epoch": 0.33655628251791975, "grad_norm": 10.262149960011197, "learning_rate": 1.6780418976041203e-06, "loss": -0.6743, "step": 3768 }, { "epoch": 0.33673492173369357, "grad_norm": 5.7661516316059425, "learning_rate": 1.6775833749784777e-06, "loss": 0.1989, "step": 3770 }, { "epoch": 0.33691356094946745, "grad_norm": 11.263903355357241, "learning_rate": 1.6771245888214441e-06, "loss": 0.0012, "step": 3772 }, { "epoch": 0.3370922001652413, "grad_norm": 6.201673515635215, "learning_rate": 1.676665539311454e-06, "loss": -1.4239, "step": 3774 }, { "epoch": 0.3372708393810151, "grad_norm": 6.376350110271531, "learning_rate": 1.6762062266270448e-06, "loss": 0.4756, "step": 3776 }, { "epoch": 0.337449478596789, "grad_norm": 6.911793964882654, "learning_rate": 1.6757466509468568e-06, "loss": -0.3034, "step": 3778 }, { "epoch": 0.3376281178125628, "grad_norm": 4.680012534035095, "learning_rate": 1.6752868124496312e-06, "loss": 1.2835, "step": 3780 }, { "epoch": 0.3378067570283366, "grad_norm": 8.275424045173843, "learning_rate": 1.674826711314213e-06, "loss": -0.1462, "step": 3782 }, { "epoch": 0.3379853962441105, "grad_norm": 5.195473299908161, "learning_rate": 1.6743663477195486e-06, "loss": 0.6543, "step": 3784 }, { "epoch": 0.3381640354598843, "grad_norm": 5.535522362682567, "learning_rate": 1.6739057218446857e-06, "loss": -0.0253, "step": 3786 }, { "epoch": 0.33834267467565815, "grad_norm": 4.721960327266144, "learning_rate": 1.6734448338687753e-06, "loss": -1.2184, "step": 3788 }, { "epoch": 0.33852131389143203, "grad_norm": 8.428936264602692, "learning_rate": 1.6729836839710696e-06, "loss": -0.2689, "step": 3790 }, { "epoch": 0.33869995310720585, "grad_norm": 23.864686346032943, "learning_rate": 1.6725222723309228e-06, "loss": -0.5435, "step": 3792 }, { "epoch": 0.3388785923229797, "grad_norm": 10.858238544962825, "learning_rate": 1.6720605991277916e-06, "loss": 0.2111, "step": 3794 }, { "epoch": 0.33905723153875356, "grad_norm": 3.6475457810999066, "learning_rate": 1.671598664541233e-06, "loss": 0.8729, "step": 3796 }, { "epoch": 0.3392358707545274, "grad_norm": 9.543238969772249, "learning_rate": 1.6711364687509071e-06, "loss": 0.1962, "step": 3798 }, { "epoch": 0.3394145099703012, "grad_norm": 3.3840205830338097, "learning_rate": 1.6706740119365745e-06, "loss": 0.6139, "step": 3800 }, { "epoch": 0.3395931491860751, "grad_norm": 4.5114456424374625, "learning_rate": 1.6702112942780979e-06, "loss": 0.7203, "step": 3802 }, { "epoch": 0.3397717884018489, "grad_norm": 11.055931080670993, "learning_rate": 1.6697483159554414e-06, "loss": 0.1757, "step": 3804 }, { "epoch": 0.3399504276176228, "grad_norm": 9.514935448127394, "learning_rate": 1.6692850771486704e-06, "loss": 0.2669, "step": 3806 }, { "epoch": 0.3401290668333966, "grad_norm": 11.35300876884488, "learning_rate": 1.6688215780379515e-06, "loss": -1.3042, "step": 3808 }, { "epoch": 0.34030770604917043, "grad_norm": 13.153391910454124, "learning_rate": 1.6683578188035527e-06, "loss": -0.8903, "step": 3810 }, { "epoch": 0.3404863452649443, "grad_norm": 5.925984348344684, "learning_rate": 1.667893799625843e-06, "loss": 0.0517, "step": 3812 }, { "epoch": 0.34066498448071814, "grad_norm": 57.608800766733985, "learning_rate": 1.667429520685293e-06, "loss": -0.3751, "step": 3814 }, { "epoch": 0.34084362369649196, "grad_norm": 13.252662247285448, "learning_rate": 1.6669649821624731e-06, "loss": 0.0362, "step": 3816 }, { "epoch": 0.34102226291226584, "grad_norm": 9.76300238221505, "learning_rate": 1.6665001842380568e-06, "loss": 0.2302, "step": 3818 }, { "epoch": 0.34120090212803966, "grad_norm": 20.533813873877186, "learning_rate": 1.666035127092816e-06, "loss": -2.3714, "step": 3820 }, { "epoch": 0.3413795413438135, "grad_norm": 18.73761716758939, "learning_rate": 1.6655698109076254e-06, "loss": -0.5778, "step": 3822 }, { "epoch": 0.34155818055958737, "grad_norm": 14.034407160638668, "learning_rate": 1.6651042358634594e-06, "loss": -1.106, "step": 3824 }, { "epoch": 0.3417368197753612, "grad_norm": 14.177965632369697, "learning_rate": 1.6646384021413932e-06, "loss": -1.5253, "step": 3826 }, { "epoch": 0.341915458991135, "grad_norm": 11.778446258511707, "learning_rate": 1.6641723099226028e-06, "loss": 0.0507, "step": 3828 }, { "epoch": 0.3420940982069089, "grad_norm": 7.01903860393886, "learning_rate": 1.6637059593883648e-06, "loss": 0.3894, "step": 3830 }, { "epoch": 0.3422727374226827, "grad_norm": 24.81454651509876, "learning_rate": 1.6632393507200562e-06, "loss": -1.3836, "step": 3832 }, { "epoch": 0.34245137663845654, "grad_norm": 2.0331746300011524, "learning_rate": 1.6627724840991542e-06, "loss": -0.7325, "step": 3834 }, { "epoch": 0.3426300158542304, "grad_norm": 17.396601798925865, "learning_rate": 1.6623053597072363e-06, "loss": -1.0247, "step": 3836 }, { "epoch": 0.34280865507000424, "grad_norm": 8.661970431171437, "learning_rate": 1.6618379777259809e-06, "loss": 0.3398, "step": 3838 }, { "epoch": 0.34298729428577807, "grad_norm": 9.71818359294531, "learning_rate": 1.6613703383371653e-06, "loss": 1.0519, "step": 3840 }, { "epoch": 0.34316593350155195, "grad_norm": 17.372458292622444, "learning_rate": 1.6609024417226687e-06, "loss": -0.7845, "step": 3842 }, { "epoch": 0.34334457271732577, "grad_norm": 12.272949519858386, "learning_rate": 1.660434288064469e-06, "loss": -0.294, "step": 3844 }, { "epoch": 0.3435232119330996, "grad_norm": 10.93002081056727, "learning_rate": 1.659965877544644e-06, "loss": 0.2197, "step": 3846 }, { "epoch": 0.34370185114887347, "grad_norm": 6.3913048566780235, "learning_rate": 1.6594972103453724e-06, "loss": -0.489, "step": 3848 }, { "epoch": 0.3438804903646473, "grad_norm": 7.239822610610392, "learning_rate": 1.6590282866489319e-06, "loss": -0.0941, "step": 3850 }, { "epoch": 0.3440591295804211, "grad_norm": 10.094347484372326, "learning_rate": 1.6585591066377002e-06, "loss": 0.1319, "step": 3852 }, { "epoch": 0.344237768796195, "grad_norm": 17.670224869079288, "learning_rate": 1.6580896704941549e-06, "loss": -0.5675, "step": 3854 }, { "epoch": 0.3444164080119688, "grad_norm": 10.228070129750849, "learning_rate": 1.6576199784008726e-06, "loss": -0.4597, "step": 3856 }, { "epoch": 0.34459504722774265, "grad_norm": 7.475480753770957, "learning_rate": 1.6571500305405307e-06, "loss": 0.4059, "step": 3858 }, { "epoch": 0.3447736864435165, "grad_norm": 14.29286199229056, "learning_rate": 1.6566798270959044e-06, "loss": -0.0564, "step": 3860 }, { "epoch": 0.34495232565929035, "grad_norm": 10.499029568831922, "learning_rate": 1.6562093682498697e-06, "loss": -0.4313, "step": 3862 }, { "epoch": 0.3451309648750642, "grad_norm": 7.468917525579343, "learning_rate": 1.655738654185401e-06, "loss": -0.7336, "step": 3864 }, { "epoch": 0.34530960409083805, "grad_norm": 12.125200092984935, "learning_rate": 1.6552676850855724e-06, "loss": -0.3428, "step": 3866 }, { "epoch": 0.3454882433066119, "grad_norm": 17.906444288867416, "learning_rate": 1.6547964611335576e-06, "loss": 0.2501, "step": 3868 }, { "epoch": 0.34566688252238575, "grad_norm": 6.3641624332320825, "learning_rate": 1.6543249825126283e-06, "loss": 0.6611, "step": 3870 }, { "epoch": 0.3458455217381596, "grad_norm": 2.4969625140192306, "learning_rate": 1.6538532494061568e-06, "loss": 0.001, "step": 3872 }, { "epoch": 0.3460241609539334, "grad_norm": 12.406982400256455, "learning_rate": 1.6533812619976123e-06, "loss": -0.4185, "step": 3874 }, { "epoch": 0.3462028001697073, "grad_norm": 8.118339743170822, "learning_rate": 1.6529090204705651e-06, "loss": -0.53, "step": 3876 }, { "epoch": 0.3463814393854811, "grad_norm": 10.99925055985261, "learning_rate": 1.6524365250086828e-06, "loss": -0.6997, "step": 3878 }, { "epoch": 0.34656007860125493, "grad_norm": 9.350627090722103, "learning_rate": 1.6519637757957324e-06, "loss": 0.1818, "step": 3880 }, { "epoch": 0.3467387178170288, "grad_norm": 14.993487661021732, "learning_rate": 1.6514907730155796e-06, "loss": -0.2012, "step": 3882 }, { "epoch": 0.34691735703280263, "grad_norm": 8.92454945685285, "learning_rate": 1.6510175168521886e-06, "loss": 0.3385, "step": 3884 }, { "epoch": 0.34709599624857645, "grad_norm": 4.91871740386128, "learning_rate": 1.6505440074896217e-06, "loss": -1.0066, "step": 3886 }, { "epoch": 0.34727463546435033, "grad_norm": 3.3440477470384087, "learning_rate": 1.6500702451120407e-06, "loss": 0.1917, "step": 3888 }, { "epoch": 0.34745327468012416, "grad_norm": 10.9195126228422, "learning_rate": 1.6495962299037048e-06, "loss": -0.5558, "step": 3890 }, { "epoch": 0.347631913895898, "grad_norm": 16.10431426631638, "learning_rate": 1.6491219620489723e-06, "loss": -1.3315, "step": 3892 }, { "epoch": 0.34781055311167186, "grad_norm": 18.7220698550722, "learning_rate": 1.6486474417322992e-06, "loss": 0.5769, "step": 3894 }, { "epoch": 0.3479891923274457, "grad_norm": 6.576792529925926, "learning_rate": 1.64817266913824e-06, "loss": 0.2332, "step": 3896 }, { "epoch": 0.3481678315432195, "grad_norm": 7.041920072371176, "learning_rate": 1.6476976444514472e-06, "loss": -1.373, "step": 3898 }, { "epoch": 0.3483464707589934, "grad_norm": 7.681639631801359, "learning_rate": 1.647222367856671e-06, "loss": -1.6695, "step": 3900 }, { "epoch": 0.3485251099747672, "grad_norm": 6.910158458168104, "learning_rate": 1.6467468395387608e-06, "loss": 0.2205, "step": 3902 }, { "epoch": 0.34870374919054103, "grad_norm": 16.21792865062604, "learning_rate": 1.6462710596826624e-06, "loss": 0.1475, "step": 3904 }, { "epoch": 0.3488823884063149, "grad_norm": 10.88103098640696, "learning_rate": 1.6457950284734204e-06, "loss": -1.3603, "step": 3906 }, { "epoch": 0.34906102762208874, "grad_norm": 4.8207481348174275, "learning_rate": 1.6453187460961763e-06, "loss": 0.2414, "step": 3908 }, { "epoch": 0.34923966683786256, "grad_norm": 7.9582783429174935, "learning_rate": 1.6448422127361705e-06, "loss": -0.9681, "step": 3910 }, { "epoch": 0.34941830605363644, "grad_norm": 5.458294448519901, "learning_rate": 1.64436542857874e-06, "loss": 0.0257, "step": 3912 }, { "epoch": 0.34959694526941026, "grad_norm": 10.184326426462487, "learning_rate": 1.6438883938093198e-06, "loss": -0.1704, "step": 3914 }, { "epoch": 0.3497755844851841, "grad_norm": 4.761750994879032, "learning_rate": 1.643411108613442e-06, "loss": -0.052, "step": 3916 }, { "epoch": 0.34995422370095797, "grad_norm": 4.552154517180959, "learning_rate": 1.6429335731767367e-06, "loss": -0.9301, "step": 3918 }, { "epoch": 0.3501328629167318, "grad_norm": 9.858664606439637, "learning_rate": 1.6424557876849305e-06, "loss": -0.7753, "step": 3920 }, { "epoch": 0.3503115021325056, "grad_norm": 18.917851392754923, "learning_rate": 1.6419777523238483e-06, "loss": -1.3184, "step": 3922 }, { "epoch": 0.3504901413482795, "grad_norm": 6.990673391407443, "learning_rate": 1.6414994672794111e-06, "loss": 0.2952, "step": 3924 }, { "epoch": 0.3506687805640533, "grad_norm": 12.919964834820867, "learning_rate": 1.6410209327376382e-06, "loss": -0.398, "step": 3926 }, { "epoch": 0.35084741977982714, "grad_norm": 10.759799837427456, "learning_rate": 1.6405421488846444e-06, "loss": -0.3161, "step": 3928 }, { "epoch": 0.351026058995601, "grad_norm": 8.428852991014889, "learning_rate": 1.640063115906643e-06, "loss": -0.1361, "step": 3930 }, { "epoch": 0.35120469821137484, "grad_norm": 16.517190012770598, "learning_rate": 1.639583833989943e-06, "loss": 0.2248, "step": 3932 }, { "epoch": 0.35138333742714867, "grad_norm": 6.338857483551538, "learning_rate": 1.6391043033209509e-06, "loss": 0.6891, "step": 3934 }, { "epoch": 0.35156197664292255, "grad_norm": 16.991680128919437, "learning_rate": 1.63862452408617e-06, "loss": -0.8239, "step": 3936 }, { "epoch": 0.35174061585869637, "grad_norm": 3.3245684465558774, "learning_rate": 1.6381444964722e-06, "loss": 0.762, "step": 3938 }, { "epoch": 0.35191925507447025, "grad_norm": 5.5167351056758465, "learning_rate": 1.637664220665737e-06, "loss": -1.1562, "step": 3940 }, { "epoch": 0.3520978942902441, "grad_norm": 8.354457885029667, "learning_rate": 1.637183696853574e-06, "loss": -0.3792, "step": 3942 }, { "epoch": 0.3522765335060179, "grad_norm": 15.16576447091622, "learning_rate": 1.6367029252226003e-06, "loss": 0.4635, "step": 3944 }, { "epoch": 0.3524551727217918, "grad_norm": 7.8210622026071865, "learning_rate": 1.6362219059598019e-06, "loss": -1.5769, "step": 3946 }, { "epoch": 0.3526338119375656, "grad_norm": 3.8190648824729445, "learning_rate": 1.6357406392522606e-06, "loss": 0.8644, "step": 3948 }, { "epoch": 0.3528124511533394, "grad_norm": 2.3671831691186145, "learning_rate": 1.6352591252871549e-06, "loss": -0.0839, "step": 3950 }, { "epoch": 0.3529910903691133, "grad_norm": 6.071261652086467, "learning_rate": 1.6347773642517586e-06, "loss": -0.3674, "step": 3952 }, { "epoch": 0.3531697295848871, "grad_norm": 6.292431344895597, "learning_rate": 1.634295356333443e-06, "loss": -0.3491, "step": 3954 }, { "epoch": 0.35334836880066095, "grad_norm": 11.006936400551302, "learning_rate": 1.6338131017196738e-06, "loss": -0.7029, "step": 3956 }, { "epoch": 0.35352700801643483, "grad_norm": 10.324388344591723, "learning_rate": 1.6333306005980147e-06, "loss": -0.1758, "step": 3958 }, { "epoch": 0.35370564723220865, "grad_norm": 21.0580086376467, "learning_rate": 1.632847853156123e-06, "loss": -1.8801, "step": 3960 }, { "epoch": 0.3538842864479825, "grad_norm": 3.6077356620262058, "learning_rate": 1.6323648595817532e-06, "loss": 0.239, "step": 3962 }, { "epoch": 0.35406292566375636, "grad_norm": 5.212115793682344, "learning_rate": 1.6318816200627552e-06, "loss": -0.4404, "step": 3964 }, { "epoch": 0.3542415648795302, "grad_norm": 11.826436213241053, "learning_rate": 1.6313981347870748e-06, "loss": -0.5478, "step": 3966 }, { "epoch": 0.354420204095304, "grad_norm": 18.30223238310229, "learning_rate": 1.6309144039427531e-06, "loss": -0.589, "step": 3968 }, { "epoch": 0.3545988433110779, "grad_norm": 10.154092694559521, "learning_rate": 1.6304304277179263e-06, "loss": 0.3244, "step": 3970 }, { "epoch": 0.3547774825268517, "grad_norm": 9.1228037242137, "learning_rate": 1.6299462063008269e-06, "loss": -0.26, "step": 3972 }, { "epoch": 0.35495612174262553, "grad_norm": 10.814941836133254, "learning_rate": 1.629461739879782e-06, "loss": 0.6553, "step": 3974 }, { "epoch": 0.3551347609583994, "grad_norm": 7.993600670058724, "learning_rate": 1.6289770286432147e-06, "loss": -0.7247, "step": 3976 }, { "epoch": 0.35531340017417323, "grad_norm": 5.316510470655985, "learning_rate": 1.6284920727796428e-06, "loss": -0.1229, "step": 3978 }, { "epoch": 0.35549203938994706, "grad_norm": 5.080619316797108, "learning_rate": 1.6280068724776794e-06, "loss": 0.1854, "step": 3980 }, { "epoch": 0.35567067860572094, "grad_norm": 8.581058188320812, "learning_rate": 1.6275214279260325e-06, "loss": -0.1791, "step": 3982 }, { "epoch": 0.35584931782149476, "grad_norm": 5.876451029046132, "learning_rate": 1.6270357393135055e-06, "loss": 0.0874, "step": 3984 }, { "epoch": 0.3560279570372686, "grad_norm": 9.004107915312096, "learning_rate": 1.6265498068289963e-06, "loss": 0.364, "step": 3986 }, { "epoch": 0.35620659625304246, "grad_norm": 5.412605915398807, "learning_rate": 1.626063630661498e-06, "loss": -0.8335, "step": 3988 }, { "epoch": 0.3563852354688163, "grad_norm": 3.8319954126764606, "learning_rate": 1.625577211000098e-06, "loss": 0.9662, "step": 3990 }, { "epoch": 0.3565638746845901, "grad_norm": 6.459063515352773, "learning_rate": 1.6250905480339789e-06, "loss": -0.6114, "step": 3992 }, { "epoch": 0.356742513900364, "grad_norm": 23.912166087824808, "learning_rate": 1.6246036419524179e-06, "loss": -1.2182, "step": 3994 }, { "epoch": 0.3569211531161378, "grad_norm": 6.266668142494407, "learning_rate": 1.6241164929447862e-06, "loss": -0.5175, "step": 3996 }, { "epoch": 0.35709979233191164, "grad_norm": 9.891871851330492, "learning_rate": 1.6236291012005503e-06, "loss": 0.2359, "step": 3998 }, { "epoch": 0.3572784315476855, "grad_norm": 5.336516900392411, "learning_rate": 1.6231414669092702e-06, "loss": -0.133, "step": 4000 }, { "epoch": 0.35745707076345934, "grad_norm": 9.891768113740618, "learning_rate": 1.6226535902606008e-06, "loss": 0.4194, "step": 4002 }, { "epoch": 0.3576357099792332, "grad_norm": 6.471738852740906, "learning_rate": 1.6221654714442916e-06, "loss": -0.0538, "step": 4004 }, { "epoch": 0.35781434919500704, "grad_norm": 16.067130404753655, "learning_rate": 1.6216771106501855e-06, "loss": -0.9131, "step": 4006 }, { "epoch": 0.35799298841078087, "grad_norm": 9.704296489768238, "learning_rate": 1.6211885080682199e-06, "loss": 0.0849, "step": 4008 }, { "epoch": 0.35817162762655474, "grad_norm": 13.230125636789484, "learning_rate": 1.6206996638884263e-06, "loss": 0.7272, "step": 4010 }, { "epoch": 0.35835026684232857, "grad_norm": 3.5046708411451353, "learning_rate": 1.6202105783009298e-06, "loss": 0.2712, "step": 4012 }, { "epoch": 0.3585289060581024, "grad_norm": 7.501166952264958, "learning_rate": 1.61972125149595e-06, "loss": -0.9251, "step": 4014 }, { "epoch": 0.35870754527387627, "grad_norm": 8.637018206307879, "learning_rate": 1.6192316836637998e-06, "loss": 1.1676, "step": 4016 }, { "epoch": 0.3588861844896501, "grad_norm": 7.434341144524767, "learning_rate": 1.618741874994886e-06, "loss": 0.6245, "step": 4018 }, { "epoch": 0.3590648237054239, "grad_norm": 8.553808994560827, "learning_rate": 1.6182518256797093e-06, "loss": 0.9467, "step": 4020 }, { "epoch": 0.3592434629211978, "grad_norm": 10.920503674862955, "learning_rate": 1.6177615359088633e-06, "loss": 0.4729, "step": 4022 }, { "epoch": 0.3594221021369716, "grad_norm": 12.754384801511952, "learning_rate": 1.617271005873036e-06, "loss": 0.1703, "step": 4024 }, { "epoch": 0.35960074135274545, "grad_norm": 4.129827968533631, "learning_rate": 1.6167802357630082e-06, "loss": -0.2621, "step": 4026 }, { "epoch": 0.3597793805685193, "grad_norm": 32.32235478953576, "learning_rate": 1.6162892257696544e-06, "loss": -0.0934, "step": 4028 }, { "epoch": 0.35995801978429315, "grad_norm": 4.894623215915731, "learning_rate": 1.6157979760839426e-06, "loss": 0.6233, "step": 4030 }, { "epoch": 0.36013665900006697, "grad_norm": 11.376926803885862, "learning_rate": 1.6153064868969334e-06, "loss": 0.0765, "step": 4032 }, { "epoch": 0.36031529821584085, "grad_norm": 23.258482995457808, "learning_rate": 1.614814758399781e-06, "loss": -0.2218, "step": 4034 }, { "epoch": 0.3604939374316147, "grad_norm": 7.2888581307597, "learning_rate": 1.6143227907837327e-06, "loss": -0.1699, "step": 4036 }, { "epoch": 0.3606725766473885, "grad_norm": 13.02504664905124, "learning_rate": 1.6138305842401282e-06, "loss": -0.0423, "step": 4038 }, { "epoch": 0.3608512158631624, "grad_norm": 5.701427133824012, "learning_rate": 1.613338138960401e-06, "loss": 0.2234, "step": 4040 }, { "epoch": 0.3610298550789362, "grad_norm": 11.159276044327253, "learning_rate": 1.6128454551360768e-06, "loss": -0.0653, "step": 4042 }, { "epoch": 0.36120849429471, "grad_norm": 11.57264058596118, "learning_rate": 1.6123525329587746e-06, "loss": -0.0278, "step": 4044 }, { "epoch": 0.3613871335104839, "grad_norm": 4.873514976264765, "learning_rate": 1.6118593726202058e-06, "loss": 0.0046, "step": 4046 }, { "epoch": 0.36156577272625773, "grad_norm": 11.73244967456545, "learning_rate": 1.6113659743121737e-06, "loss": 0.5419, "step": 4048 }, { "epoch": 0.36174441194203155, "grad_norm": 4.983425516319972, "learning_rate": 1.6108723382265757e-06, "loss": 0.1541, "step": 4050 }, { "epoch": 0.36192305115780543, "grad_norm": 16.667976531372684, "learning_rate": 1.6103784645554009e-06, "loss": -0.2854, "step": 4052 }, { "epoch": 0.36210169037357925, "grad_norm": 6.079459781739901, "learning_rate": 1.6098843534907304e-06, "loss": 0.7308, "step": 4054 }, { "epoch": 0.3622803295893531, "grad_norm": 8.095247697156667, "learning_rate": 1.609390005224738e-06, "loss": -0.0367, "step": 4056 }, { "epoch": 0.36245896880512696, "grad_norm": 19.701005257455694, "learning_rate": 1.6088954199496901e-06, "loss": 0.6875, "step": 4058 }, { "epoch": 0.3626376080209008, "grad_norm": 16.901933439814105, "learning_rate": 1.6084005978579445e-06, "loss": -0.4183, "step": 4060 }, { "epoch": 0.3628162472366746, "grad_norm": 9.267524820381476, "learning_rate": 1.6079055391419519e-06, "loss": 0.8423, "step": 4062 }, { "epoch": 0.3629948864524485, "grad_norm": 3.908700525295812, "learning_rate": 1.6074102439942545e-06, "loss": -0.1029, "step": 4064 }, { "epoch": 0.3631735256682223, "grad_norm": 9.42193087281319, "learning_rate": 1.6069147126074868e-06, "loss": 0.2914, "step": 4066 }, { "epoch": 0.36335216488399613, "grad_norm": 6.386134278971335, "learning_rate": 1.6064189451743749e-06, "loss": 0.8655, "step": 4068 }, { "epoch": 0.36353080409977, "grad_norm": 20.595982339412476, "learning_rate": 1.6059229418877367e-06, "loss": -0.939, "step": 4070 }, { "epoch": 0.36370944331554383, "grad_norm": 9.349789915937576, "learning_rate": 1.6054267029404819e-06, "loss": -0.4378, "step": 4072 }, { "epoch": 0.3638880825313177, "grad_norm": 14.839634503493192, "learning_rate": 1.6049302285256122e-06, "loss": 0.4809, "step": 4074 }, { "epoch": 0.36406672174709154, "grad_norm": 12.36911086394059, "learning_rate": 1.6044335188362204e-06, "loss": -1.2069, "step": 4076 }, { "epoch": 0.36424536096286536, "grad_norm": 14.84526359470552, "learning_rate": 1.603936574065491e-06, "loss": 0.7467, "step": 4078 }, { "epoch": 0.36442400017863924, "grad_norm": 18.03095635186583, "learning_rate": 1.6034393944067e-06, "loss": 0.7319, "step": 4080 }, { "epoch": 0.36460263939441306, "grad_norm": 10.584643025097728, "learning_rate": 1.6029419800532146e-06, "loss": -0.3813, "step": 4082 }, { "epoch": 0.3647812786101869, "grad_norm": 4.666983707194377, "learning_rate": 1.6024443311984934e-06, "loss": -0.6174, "step": 4084 }, { "epoch": 0.36495991782596077, "grad_norm": 9.555729083277125, "learning_rate": 1.6019464480360861e-06, "loss": -0.0287, "step": 4086 }, { "epoch": 0.3651385570417346, "grad_norm": 9.72239101687239, "learning_rate": 1.6014483307596333e-06, "loss": -0.2147, "step": 4088 }, { "epoch": 0.3653171962575084, "grad_norm": 5.400478038114879, "learning_rate": 1.6009499795628675e-06, "loss": -0.2907, "step": 4090 }, { "epoch": 0.3654958354732823, "grad_norm": 4.926639055648745, "learning_rate": 1.6004513946396111e-06, "loss": 0.3526, "step": 4092 }, { "epoch": 0.3656744746890561, "grad_norm": 12.124581555883626, "learning_rate": 1.5999525761837787e-06, "loss": 0.1759, "step": 4094 }, { "epoch": 0.36585311390482994, "grad_norm": 10.131373153964041, "learning_rate": 1.599453524389374e-06, "loss": -0.4592, "step": 4096 }, { "epoch": 0.3660317531206038, "grad_norm": 7.168228984547183, "learning_rate": 1.5989542394504928e-06, "loss": -0.7373, "step": 4098 }, { "epoch": 0.36621039233637764, "grad_norm": 10.139523862234954, "learning_rate": 1.5984547215613211e-06, "loss": -0.0944, "step": 4100 }, { "epoch": 0.36638903155215147, "grad_norm": 6.916707540490894, "learning_rate": 1.5979549709161357e-06, "loss": 0.0843, "step": 4102 }, { "epoch": 0.36656767076792535, "grad_norm": 6.5952644236135685, "learning_rate": 1.5974549877093037e-06, "loss": -0.0557, "step": 4104 }, { "epoch": 0.36674630998369917, "grad_norm": 6.601705226822497, "learning_rate": 1.596954772135283e-06, "loss": 0.0215, "step": 4106 }, { "epoch": 0.366924949199473, "grad_norm": 6.029983624741318, "learning_rate": 1.5964543243886213e-06, "loss": -0.1371, "step": 4108 }, { "epoch": 0.3671035884152469, "grad_norm": 17.22934005385599, "learning_rate": 1.595953644663957e-06, "loss": 0.0569, "step": 4110 }, { "epoch": 0.3672822276310207, "grad_norm": 9.793800370428263, "learning_rate": 1.5954527331560186e-06, "loss": -0.0096, "step": 4112 }, { "epoch": 0.3674608668467945, "grad_norm": 15.85592779268978, "learning_rate": 1.5949515900596251e-06, "loss": -0.7715, "step": 4114 }, { "epoch": 0.3676395060625684, "grad_norm": 2.595012219791637, "learning_rate": 1.5944502155696846e-06, "loss": 0.2472, "step": 4116 }, { "epoch": 0.3678181452783422, "grad_norm": 8.210385087120372, "learning_rate": 1.5939486098811967e-06, "loss": -1.3828, "step": 4118 }, { "epoch": 0.36799678449411605, "grad_norm": 27.35149332324871, "learning_rate": 1.5934467731892496e-06, "loss": -0.5651, "step": 4120 }, { "epoch": 0.3681754237098899, "grad_norm": 18.270073707068583, "learning_rate": 1.5929447056890216e-06, "loss": -0.6351, "step": 4122 }, { "epoch": 0.36835406292566375, "grad_norm": 5.048792046836763, "learning_rate": 1.5924424075757816e-06, "loss": -0.2166, "step": 4124 }, { "epoch": 0.3685327021414376, "grad_norm": 13.835794681822184, "learning_rate": 1.5919398790448871e-06, "loss": -0.3101, "step": 4126 }, { "epoch": 0.36871134135721145, "grad_norm": 6.212664976942073, "learning_rate": 1.591437120291786e-06, "loss": 0.0302, "step": 4128 }, { "epoch": 0.3688899805729853, "grad_norm": 6.003492928091653, "learning_rate": 1.5909341315120152e-06, "loss": 0.3355, "step": 4130 }, { "epoch": 0.3690686197887591, "grad_norm": 7.130603928850397, "learning_rate": 1.5904309129012013e-06, "loss": 1.2239, "step": 4132 }, { "epoch": 0.369247259004533, "grad_norm": 13.043446639755178, "learning_rate": 1.5899274646550605e-06, "loss": 0.4449, "step": 4134 }, { "epoch": 0.3694258982203068, "grad_norm": 7.431175042906105, "learning_rate": 1.5894237869693982e-06, "loss": -0.215, "step": 4136 }, { "epoch": 0.3696045374360807, "grad_norm": 9.455585455881437, "learning_rate": 1.5889198800401083e-06, "loss": -0.8049, "step": 4138 }, { "epoch": 0.3697831766518545, "grad_norm": 5.434120267894105, "learning_rate": 1.5884157440631752e-06, "loss": -0.1254, "step": 4140 }, { "epoch": 0.36996181586762833, "grad_norm": 8.348745158125983, "learning_rate": 1.5879113792346713e-06, "loss": 0.5046, "step": 4142 }, { "epoch": 0.3701404550834022, "grad_norm": 23.434310980960028, "learning_rate": 1.5874067857507585e-06, "loss": -0.4428, "step": 4144 }, { "epoch": 0.37031909429917603, "grad_norm": 37.69973662509544, "learning_rate": 1.5869019638076874e-06, "loss": 0.2463, "step": 4146 }, { "epoch": 0.37049773351494986, "grad_norm": 13.541644819193113, "learning_rate": 1.5863969136017977e-06, "loss": -0.7267, "step": 4148 }, { "epoch": 0.37067637273072374, "grad_norm": 18.070398629820676, "learning_rate": 1.585891635329518e-06, "loss": -1.296, "step": 4150 }, { "epoch": 0.37085501194649756, "grad_norm": 11.412283025644863, "learning_rate": 1.5853861291873648e-06, "loss": -0.1651, "step": 4152 }, { "epoch": 0.3710336511622714, "grad_norm": 9.13032313766301, "learning_rate": 1.584880395371944e-06, "loss": -1.1376, "step": 4154 }, { "epoch": 0.37121229037804526, "grad_norm": 10.072084778469051, "learning_rate": 1.58437443407995e-06, "loss": -0.4357, "step": 4156 }, { "epoch": 0.3713909295938191, "grad_norm": 8.70873842407526, "learning_rate": 1.5838682455081657e-06, "loss": 0.2143, "step": 4158 }, { "epoch": 0.3715695688095929, "grad_norm": 8.35216677757162, "learning_rate": 1.583361829853461e-06, "loss": 0.4123, "step": 4160 }, { "epoch": 0.3717482080253668, "grad_norm": 12.341451155596596, "learning_rate": 1.5828551873127969e-06, "loss": -0.533, "step": 4162 }, { "epoch": 0.3719268472411406, "grad_norm": 3.484852026886076, "learning_rate": 1.5823483180832202e-06, "loss": -0.0014, "step": 4164 }, { "epoch": 0.37210548645691444, "grad_norm": 5.492380239216753, "learning_rate": 1.5818412223618665e-06, "loss": -0.5251, "step": 4166 }, { "epoch": 0.3722841256726883, "grad_norm": 13.951429855736338, "learning_rate": 1.5813339003459604e-06, "loss": 0.8398, "step": 4168 }, { "epoch": 0.37246276488846214, "grad_norm": 16.657898898214174, "learning_rate": 1.5808263522328134e-06, "loss": 0.5515, "step": 4170 }, { "epoch": 0.37264140410423596, "grad_norm": 6.700191255942694, "learning_rate": 1.5803185782198254e-06, "loss": 0.2774, "step": 4172 }, { "epoch": 0.37282004332000984, "grad_norm": 15.770845212474198, "learning_rate": 1.5798105785044839e-06, "loss": -0.7709, "step": 4174 }, { "epoch": 0.37299868253578367, "grad_norm": 9.93808653438094, "learning_rate": 1.579302353284365e-06, "loss": -0.3511, "step": 4176 }, { "epoch": 0.3731773217515575, "grad_norm": 20.904563938456867, "learning_rate": 1.5787939027571313e-06, "loss": -0.1251, "step": 4178 }, { "epoch": 0.37335596096733137, "grad_norm": 6.65988990944723, "learning_rate": 1.578285227120534e-06, "loss": 0.1704, "step": 4180 }, { "epoch": 0.3735346001831052, "grad_norm": 6.47340144746553, "learning_rate": 1.5777763265724114e-06, "loss": 1.119, "step": 4182 }, { "epoch": 0.373713239398879, "grad_norm": 4.5375392018876255, "learning_rate": 1.5772672013106892e-06, "loss": 0.9762, "step": 4184 }, { "epoch": 0.3738918786146529, "grad_norm": 4.707913923009821, "learning_rate": 1.5767578515333806e-06, "loss": -0.659, "step": 4186 }, { "epoch": 0.3740705178304267, "grad_norm": 8.957360881690754, "learning_rate": 1.5762482774385866e-06, "loss": -0.4524, "step": 4188 }, { "epoch": 0.37424915704620054, "grad_norm": 20.51286247346939, "learning_rate": 1.5757384792244946e-06, "loss": -1.573, "step": 4190 }, { "epoch": 0.3744277962619744, "grad_norm": 7.529119293058224, "learning_rate": 1.5752284570893797e-06, "loss": -0.4985, "step": 4192 }, { "epoch": 0.37460643547774825, "grad_norm": 17.32107628982364, "learning_rate": 1.574718211231604e-06, "loss": -0.6263, "step": 4194 }, { "epoch": 0.37478507469352207, "grad_norm": 10.535872805975375, "learning_rate": 1.5742077418496167e-06, "loss": 0.4303, "step": 4196 }, { "epoch": 0.37496371390929595, "grad_norm": 6.114488885811863, "learning_rate": 1.5736970491419537e-06, "loss": 0.5526, "step": 4198 }, { "epoch": 0.37514235312506977, "grad_norm": 13.14199629624018, "learning_rate": 1.5731861333072375e-06, "loss": -0.947, "step": 4200 }, { "epoch": 0.37532099234084365, "grad_norm": 4.597715366083944, "learning_rate": 1.5726749945441786e-06, "loss": 0.6871, "step": 4202 }, { "epoch": 0.3754996315566175, "grad_norm": 9.5880547433881, "learning_rate": 1.5721636330515732e-06, "loss": 0.0959, "step": 4204 }, { "epoch": 0.3756782707723913, "grad_norm": 8.32910700120367, "learning_rate": 1.5716520490283031e-06, "loss": -0.1503, "step": 4206 }, { "epoch": 0.3758569099881652, "grad_norm": 7.047088010829228, "learning_rate": 1.5711402426733393e-06, "loss": 0.2659, "step": 4208 }, { "epoch": 0.376035549203939, "grad_norm": 28.15586414136272, "learning_rate": 1.5706282141857373e-06, "loss": 0.4177, "step": 4210 }, { "epoch": 0.3762141884197128, "grad_norm": 8.890420331426045, "learning_rate": 1.570115963764639e-06, "loss": 0.0918, "step": 4212 }, { "epoch": 0.3763928276354867, "grad_norm": 12.753000542113547, "learning_rate": 1.569603491609274e-06, "loss": -0.638, "step": 4214 }, { "epoch": 0.37657146685126053, "grad_norm": 4.0996747422879425, "learning_rate": 1.5690907979189567e-06, "loss": -1.1913, "step": 4216 }, { "epoch": 0.37675010606703435, "grad_norm": 5.676544705191624, "learning_rate": 1.5685778828930882e-06, "loss": 0.0886, "step": 4218 }, { "epoch": 0.37692874528280823, "grad_norm": 10.251482600473578, "learning_rate": 1.5680647467311555e-06, "loss": -0.6029, "step": 4220 }, { "epoch": 0.37710738449858205, "grad_norm": 9.17991397659454, "learning_rate": 1.5675513896327325e-06, "loss": -0.664, "step": 4222 }, { "epoch": 0.3772860237143559, "grad_norm": 10.971484590569034, "learning_rate": 1.5670378117974782e-06, "loss": 0.6859, "step": 4224 }, { "epoch": 0.37746466293012976, "grad_norm": 16.998099725766682, "learning_rate": 1.5665240134251372e-06, "loss": -0.3265, "step": 4226 }, { "epoch": 0.3776433021459036, "grad_norm": 5.98405403890026, "learning_rate": 1.5660099947155404e-06, "loss": 0.5102, "step": 4228 }, { "epoch": 0.3778219413616774, "grad_norm": 9.483952671558795, "learning_rate": 1.565495755868604e-06, "loss": 0.3907, "step": 4230 }, { "epoch": 0.3780005805774513, "grad_norm": 13.485909349263864, "learning_rate": 1.5649812970843307e-06, "loss": -0.2886, "step": 4232 }, { "epoch": 0.3781792197932251, "grad_norm": 6.910012717559638, "learning_rate": 1.5644666185628073e-06, "loss": 0.7437, "step": 4234 }, { "epoch": 0.37835785900899893, "grad_norm": 3.3390985382520144, "learning_rate": 1.5639517205042075e-06, "loss": -0.4768, "step": 4236 }, { "epoch": 0.3785364982247728, "grad_norm": 12.599951946832972, "learning_rate": 1.5634366031087898e-06, "loss": 0.2824, "step": 4238 }, { "epoch": 0.37871513744054663, "grad_norm": 6.5066157198823955, "learning_rate": 1.5629212665768976e-06, "loss": 0.1982, "step": 4240 }, { "epoch": 0.37889377665632046, "grad_norm": 6.018686758879687, "learning_rate": 1.5624057111089601e-06, "loss": -0.0253, "step": 4242 }, { "epoch": 0.37907241587209434, "grad_norm": 6.263238847286146, "learning_rate": 1.5618899369054913e-06, "loss": -0.5244, "step": 4244 }, { "epoch": 0.37925105508786816, "grad_norm": 15.19448855012253, "learning_rate": 1.5613739441670903e-06, "loss": -0.765, "step": 4246 }, { "epoch": 0.379429694303642, "grad_norm": 10.34202206288118, "learning_rate": 1.560857733094442e-06, "loss": -0.8372, "step": 4248 }, { "epoch": 0.37960833351941586, "grad_norm": 6.35333039664287, "learning_rate": 1.5603413038883145e-06, "loss": -0.5702, "step": 4250 }, { "epoch": 0.3797869727351897, "grad_norm": 5.246915910480677, "learning_rate": 1.5598246567495625e-06, "loss": -0.4079, "step": 4252 }, { "epoch": 0.3799656119509635, "grad_norm": 9.118199152581216, "learning_rate": 1.5593077918791245e-06, "loss": 0.6974, "step": 4254 }, { "epoch": 0.3801442511667374, "grad_norm": 9.289570068660169, "learning_rate": 1.558790709478024e-06, "loss": 0.7019, "step": 4256 }, { "epoch": 0.3803228903825112, "grad_norm": 24.473682203654374, "learning_rate": 1.558273409747369e-06, "loss": -0.1696, "step": 4258 }, { "epoch": 0.38050152959828504, "grad_norm": 6.298180078097369, "learning_rate": 1.5577558928883517e-06, "loss": 0.2202, "step": 4260 }, { "epoch": 0.3806801688140589, "grad_norm": 8.731379723865194, "learning_rate": 1.5572381591022495e-06, "loss": -1.0715, "step": 4262 }, { "epoch": 0.38085880802983274, "grad_norm": 4.708112030679879, "learning_rate": 1.5567202085904234e-06, "loss": 0.7891, "step": 4264 }, { "epoch": 0.38103744724560656, "grad_norm": 8.667367124372449, "learning_rate": 1.5562020415543199e-06, "loss": -0.2721, "step": 4266 }, { "epoch": 0.38121608646138044, "grad_norm": 7.2375742221643495, "learning_rate": 1.5556836581954675e-06, "loss": -0.3794, "step": 4268 }, { "epoch": 0.38139472567715427, "grad_norm": 23.187605289518363, "learning_rate": 1.5551650587154811e-06, "loss": 0.2793, "step": 4270 }, { "epoch": 0.38157336489292815, "grad_norm": 11.756902087884443, "learning_rate": 1.5546462433160587e-06, "loss": -0.4911, "step": 4272 }, { "epoch": 0.38175200410870197, "grad_norm": 6.411678042909495, "learning_rate": 1.5541272121989821e-06, "loss": -0.9292, "step": 4274 }, { "epoch": 0.3819306433244758, "grad_norm": 6.779666838885895, "learning_rate": 1.5536079655661173e-06, "loss": 0.8482, "step": 4276 }, { "epoch": 0.3821092825402497, "grad_norm": 11.763405802081857, "learning_rate": 1.553088503619414e-06, "loss": -0.8126, "step": 4278 }, { "epoch": 0.3822879217560235, "grad_norm": 10.193952705056546, "learning_rate": 1.5525688265609054e-06, "loss": -1.3166, "step": 4280 }, { "epoch": 0.3824665609717973, "grad_norm": 5.905177104726292, "learning_rate": 1.5520489345927094e-06, "loss": -0.4153, "step": 4282 }, { "epoch": 0.3826452001875712, "grad_norm": 6.983595563506588, "learning_rate": 1.5515288279170262e-06, "loss": 0.3074, "step": 4284 }, { "epoch": 0.382823839403345, "grad_norm": 11.310821240034848, "learning_rate": 1.55100850673614e-06, "loss": 0.1285, "step": 4286 }, { "epoch": 0.38300247861911885, "grad_norm": 4.518592783196605, "learning_rate": 1.5504879712524185e-06, "loss": -0.0638, "step": 4288 }, { "epoch": 0.3831811178348927, "grad_norm": 9.070844925426341, "learning_rate": 1.549967221668313e-06, "loss": 0.5895, "step": 4290 }, { "epoch": 0.38335975705066655, "grad_norm": 19.204722872648656, "learning_rate": 1.5494462581863577e-06, "loss": 0.5233, "step": 4292 }, { "epoch": 0.3835383962664404, "grad_norm": 26.909808943632665, "learning_rate": 1.5489250810091697e-06, "loss": -1.034, "step": 4294 }, { "epoch": 0.38371703548221425, "grad_norm": 10.605579894099932, "learning_rate": 1.54840369033945e-06, "loss": -0.0046, "step": 4296 }, { "epoch": 0.3838956746979881, "grad_norm": 4.028634812533744, "learning_rate": 1.5478820863799818e-06, "loss": 1.0703, "step": 4298 }, { "epoch": 0.3840743139137619, "grad_norm": 7.866775335218661, "learning_rate": 1.547360269333632e-06, "loss": -0.2274, "step": 4300 }, { "epoch": 0.3842529531295358, "grad_norm": 5.570028118562522, "learning_rate": 1.5468382394033504e-06, "loss": 0.2735, "step": 4302 }, { "epoch": 0.3844315923453096, "grad_norm": 11.604778463146591, "learning_rate": 1.5463159967921684e-06, "loss": -0.9368, "step": 4304 }, { "epoch": 0.3846102315610834, "grad_norm": 17.230640992536287, "learning_rate": 1.5457935417032015e-06, "loss": -0.6115, "step": 4306 }, { "epoch": 0.3847888707768573, "grad_norm": 6.011720970901961, "learning_rate": 1.5452708743396471e-06, "loss": 0.0594, "step": 4308 }, { "epoch": 0.38496750999263113, "grad_norm": 13.31722780906577, "learning_rate": 1.5447479949047856e-06, "loss": -0.727, "step": 4310 }, { "epoch": 0.38514614920840495, "grad_norm": 10.519872841882405, "learning_rate": 1.5442249036019797e-06, "loss": 0.2713, "step": 4312 }, { "epoch": 0.38532478842417883, "grad_norm": 6.991700838510925, "learning_rate": 1.543701600634674e-06, "loss": 1.0026, "step": 4314 }, { "epoch": 0.38550342763995266, "grad_norm": 6.398491455148731, "learning_rate": 1.543178086206396e-06, "loss": 0.7893, "step": 4316 }, { "epoch": 0.3856820668557265, "grad_norm": 6.076648366080617, "learning_rate": 1.5426543605207551e-06, "loss": 0.0904, "step": 4318 }, { "epoch": 0.38586070607150036, "grad_norm": 15.04642981071683, "learning_rate": 1.542130423781444e-06, "loss": 0.2795, "step": 4320 }, { "epoch": 0.3860393452872742, "grad_norm": 12.964245852814832, "learning_rate": 1.5416062761922357e-06, "loss": 0.0336, "step": 4322 }, { "epoch": 0.386217984503048, "grad_norm": 11.031409762254933, "learning_rate": 1.5410819179569862e-06, "loss": -0.9104, "step": 4324 }, { "epoch": 0.3863966237188219, "grad_norm": 6.327454632702335, "learning_rate": 1.5405573492796336e-06, "loss": 0.2437, "step": 4326 }, { "epoch": 0.3865752629345957, "grad_norm": 5.2133391878009, "learning_rate": 1.5400325703641971e-06, "loss": -0.3902, "step": 4328 }, { "epoch": 0.38675390215036953, "grad_norm": 6.31634344047312, "learning_rate": 1.5395075814147784e-06, "loss": -0.1264, "step": 4330 }, { "epoch": 0.3869325413661434, "grad_norm": 9.768751843160015, "learning_rate": 1.5389823826355608e-06, "loss": -0.8742, "step": 4332 }, { "epoch": 0.38711118058191724, "grad_norm": 11.922852162403233, "learning_rate": 1.5384569742308082e-06, "loss": -0.3044, "step": 4334 }, { "epoch": 0.3872898197976911, "grad_norm": 4.977948581134202, "learning_rate": 1.5379313564048677e-06, "loss": 0.0895, "step": 4336 }, { "epoch": 0.38746845901346494, "grad_norm": 12.45854086474478, "learning_rate": 1.5374055293621667e-06, "loss": 0.4463, "step": 4338 }, { "epoch": 0.38764709822923876, "grad_norm": 4.933347670216391, "learning_rate": 1.536879493307214e-06, "loss": -0.0876, "step": 4340 }, { "epoch": 0.38782573744501264, "grad_norm": 11.925280963138968, "learning_rate": 1.5363532484446002e-06, "loss": -0.0799, "step": 4342 }, { "epoch": 0.38800437666078647, "grad_norm": 3.2976313442075065, "learning_rate": 1.5358267949789964e-06, "loss": 0.5096, "step": 4344 }, { "epoch": 0.3881830158765603, "grad_norm": 6.8887045469880155, "learning_rate": 1.5353001331151563e-06, "loss": 0.2695, "step": 4346 }, { "epoch": 0.38836165509233417, "grad_norm": 16.407693388887726, "learning_rate": 1.5347732630579127e-06, "loss": -0.2921, "step": 4348 }, { "epoch": 0.388540294308108, "grad_norm": 11.698048772884025, "learning_rate": 1.5342461850121803e-06, "loss": -0.6659, "step": 4350 }, { "epoch": 0.3887189335238818, "grad_norm": 15.154952223755009, "learning_rate": 1.5337188991829552e-06, "loss": 0.7095, "step": 4352 }, { "epoch": 0.3888975727396557, "grad_norm": 19.783867106940395, "learning_rate": 1.5331914057753137e-06, "loss": -0.3407, "step": 4354 }, { "epoch": 0.3890762119554295, "grad_norm": 5.2377938563684925, "learning_rate": 1.5326637049944129e-06, "loss": -0.4285, "step": 4356 }, { "epoch": 0.38925485117120334, "grad_norm": 11.207944804173442, "learning_rate": 1.5321357970454904e-06, "loss": 0.7768, "step": 4358 }, { "epoch": 0.3894334903869772, "grad_norm": 6.691785063434501, "learning_rate": 1.5316076821338649e-06, "loss": 0.5784, "step": 4360 }, { "epoch": 0.38961212960275104, "grad_norm": 4.202957320147964, "learning_rate": 1.5310793604649348e-06, "loss": 0.5068, "step": 4362 }, { "epoch": 0.38979076881852487, "grad_norm": 7.529162612291887, "learning_rate": 1.53055083224418e-06, "loss": 0.0836, "step": 4364 }, { "epoch": 0.38996940803429875, "grad_norm": 14.216561687718418, "learning_rate": 1.5300220976771598e-06, "loss": -0.4894, "step": 4366 }, { "epoch": 0.39014804725007257, "grad_norm": 10.365154696615273, "learning_rate": 1.5294931569695138e-06, "loss": -0.9479, "step": 4368 }, { "epoch": 0.3903266864658464, "grad_norm": 8.635349863957408, "learning_rate": 1.5289640103269623e-06, "loss": 0.6183, "step": 4370 }, { "epoch": 0.3905053256816203, "grad_norm": 8.693424098627291, "learning_rate": 1.5284346579553057e-06, "loss": -0.103, "step": 4372 }, { "epoch": 0.3906839648973941, "grad_norm": 6.992306977854919, "learning_rate": 1.5279051000604238e-06, "loss": -0.6414, "step": 4374 }, { "epoch": 0.3908626041131679, "grad_norm": 42.0033844083761, "learning_rate": 1.5273753368482767e-06, "loss": -0.4972, "step": 4376 }, { "epoch": 0.3910412433289418, "grad_norm": 20.660399604470534, "learning_rate": 1.5268453685249044e-06, "loss": -1.3496, "step": 4378 }, { "epoch": 0.3912198825447156, "grad_norm": 10.941159055914342, "learning_rate": 1.5263151952964269e-06, "loss": 0.0859, "step": 4380 }, { "epoch": 0.39139852176048945, "grad_norm": 7.49813565288352, "learning_rate": 1.5257848173690427e-06, "loss": -0.1444, "step": 4382 }, { "epoch": 0.3915771609762633, "grad_norm": 13.079268939851973, "learning_rate": 1.525254234949032e-06, "loss": -0.9675, "step": 4384 }, { "epoch": 0.39175580019203715, "grad_norm": 6.943965339240135, "learning_rate": 1.5247234482427524e-06, "loss": 0.2849, "step": 4386 }, { "epoch": 0.391934439407811, "grad_norm": 6.046730532633098, "learning_rate": 1.5241924574566425e-06, "loss": 0.0026, "step": 4388 }, { "epoch": 0.39211307862358485, "grad_norm": 3.693633587288869, "learning_rate": 1.5236612627972196e-06, "loss": -0.1292, "step": 4390 }, { "epoch": 0.3922917178393587, "grad_norm": 7.917579223506263, "learning_rate": 1.5231298644710801e-06, "loss": 0.6153, "step": 4392 }, { "epoch": 0.3924703570551325, "grad_norm": 11.652290714973173, "learning_rate": 1.5225982626849e-06, "loss": 0.9974, "step": 4394 }, { "epoch": 0.3926489962709064, "grad_norm": 7.331167016693874, "learning_rate": 1.5220664576454344e-06, "loss": -0.0501, "step": 4396 }, { "epoch": 0.3928276354866802, "grad_norm": 6.29381038026911, "learning_rate": 1.521534449559517e-06, "loss": 0.1774, "step": 4398 }, { "epoch": 0.3930062747024541, "grad_norm": 16.91699779668736, "learning_rate": 1.5210022386340618e-06, "loss": -0.474, "step": 4400 }, { "epoch": 0.3931849139182279, "grad_norm": 13.474978960406522, "learning_rate": 1.5204698250760595e-06, "loss": -0.3824, "step": 4402 }, { "epoch": 0.39336355313400173, "grad_norm": 7.456860899540614, "learning_rate": 1.5199372090925816e-06, "loss": -0.9022, "step": 4404 }, { "epoch": 0.3935421923497756, "grad_norm": 9.491117641611764, "learning_rate": 1.5194043908907772e-06, "loss": -0.7751, "step": 4406 }, { "epoch": 0.39372083156554943, "grad_norm": 4.9339190668056245, "learning_rate": 1.5188713706778746e-06, "loss": -0.4286, "step": 4408 }, { "epoch": 0.39389947078132326, "grad_norm": 5.731934737606634, "learning_rate": 1.5183381486611806e-06, "loss": -0.6387, "step": 4410 }, { "epoch": 0.39407810999709714, "grad_norm": 7.343706625952363, "learning_rate": 1.51780472504808e-06, "loss": 0.667, "step": 4412 }, { "epoch": 0.39425674921287096, "grad_norm": 7.848970780661235, "learning_rate": 1.5172711000460363e-06, "loss": 0.6119, "step": 4414 }, { "epoch": 0.3944353884286448, "grad_norm": 7.602635840043149, "learning_rate": 1.5167372738625921e-06, "loss": 0.4347, "step": 4416 }, { "epoch": 0.39461402764441866, "grad_norm": 10.66991561841982, "learning_rate": 1.5162032467053672e-06, "loss": 0.1092, "step": 4418 }, { "epoch": 0.3947926668601925, "grad_norm": 5.86969923766329, "learning_rate": 1.5156690187820594e-06, "loss": -0.9024, "step": 4420 }, { "epoch": 0.3949713060759663, "grad_norm": 4.14351222303983, "learning_rate": 1.5151345903004456e-06, "loss": -0.0509, "step": 4422 }, { "epoch": 0.3951499452917402, "grad_norm": 16.91559786622616, "learning_rate": 1.5145999614683805e-06, "loss": -0.1253, "step": 4424 }, { "epoch": 0.395328584507514, "grad_norm": 4.711229407026871, "learning_rate": 1.5140651324937954e-06, "loss": 0.5852, "step": 4426 }, { "epoch": 0.39550722372328784, "grad_norm": 7.146419786535133, "learning_rate": 1.5135301035847014e-06, "loss": -0.1327, "step": 4428 }, { "epoch": 0.3956858629390617, "grad_norm": 11.285463513145704, "learning_rate": 1.5129948749491862e-06, "loss": -0.4455, "step": 4430 }, { "epoch": 0.39586450215483554, "grad_norm": 6.329200333017494, "learning_rate": 1.512459446795415e-06, "loss": -0.4883, "step": 4432 }, { "epoch": 0.39604314137060936, "grad_norm": 22.79985201603565, "learning_rate": 1.5119238193316316e-06, "loss": -1.0994, "step": 4434 }, { "epoch": 0.39622178058638324, "grad_norm": 7.717748661959632, "learning_rate": 1.511387992766156e-06, "loss": -0.5527, "step": 4436 }, { "epoch": 0.39640041980215707, "grad_norm": 15.891305794475029, "learning_rate": 1.510851967307387e-06, "loss": -0.623, "step": 4438 }, { "epoch": 0.3965790590179309, "grad_norm": 8.696569744294747, "learning_rate": 1.5103157431637999e-06, "loss": -0.3648, "step": 4440 }, { "epoch": 0.39675769823370477, "grad_norm": 9.13769108563344, "learning_rate": 1.5097793205439469e-06, "loss": 0.2808, "step": 4442 }, { "epoch": 0.3969363374494786, "grad_norm": 4.523502435036943, "learning_rate": 1.509242699656459e-06, "loss": -0.0648, "step": 4444 }, { "epoch": 0.3971149766652524, "grad_norm": 8.835156474516975, "learning_rate": 1.5087058807100425e-06, "loss": -0.2272, "step": 4446 }, { "epoch": 0.3972936158810263, "grad_norm": 13.130252622685783, "learning_rate": 1.5081688639134819e-06, "loss": -1.9158, "step": 4448 }, { "epoch": 0.3974722550968001, "grad_norm": 8.393841402262025, "learning_rate": 1.5076316494756385e-06, "loss": 0.4275, "step": 4450 }, { "epoch": 0.39765089431257394, "grad_norm": 5.759439432629272, "learning_rate": 1.5070942376054493e-06, "loss": -1.0195, "step": 4452 }, { "epoch": 0.3978295335283478, "grad_norm": 6.250851382441523, "learning_rate": 1.5065566285119297e-06, "loss": 0.6455, "step": 4454 }, { "epoch": 0.39800817274412165, "grad_norm": 3.948983416540121, "learning_rate": 1.5060188224041712e-06, "loss": 0.3186, "step": 4456 }, { "epoch": 0.39818681195989547, "grad_norm": 12.603733999008922, "learning_rate": 1.5054808194913416e-06, "loss": -0.2959, "step": 4458 }, { "epoch": 0.39836545117566935, "grad_norm": 6.699035677618814, "learning_rate": 1.5049426199826858e-06, "loss": 0.2436, "step": 4460 }, { "epoch": 0.3985440903914432, "grad_norm": 10.36871956699497, "learning_rate": 1.5044042240875241e-06, "loss": -0.3723, "step": 4462 }, { "epoch": 0.398722729607217, "grad_norm": 9.463299180099417, "learning_rate": 1.5038656320152546e-06, "loss": -0.0795, "step": 4464 }, { "epoch": 0.3989013688229909, "grad_norm": 6.924385390567843, "learning_rate": 1.5033268439753507e-06, "loss": -0.6332, "step": 4466 }, { "epoch": 0.3990800080387647, "grad_norm": 11.199584231152672, "learning_rate": 1.5027878601773632e-06, "loss": -1.5979, "step": 4468 }, { "epoch": 0.3992586472545386, "grad_norm": 18.94688137833886, "learning_rate": 1.5022486808309168e-06, "loss": 0.1153, "step": 4470 }, { "epoch": 0.3994372864703124, "grad_norm": 8.838827859489486, "learning_rate": 1.5017093061457147e-06, "loss": 0.3648, "step": 4472 }, { "epoch": 0.3996159256860862, "grad_norm": 18.373055076171497, "learning_rate": 1.5011697363315344e-06, "loss": -0.5421, "step": 4474 }, { "epoch": 0.3997945649018601, "grad_norm": 4.921283020265321, "learning_rate": 1.5006299715982302e-06, "loss": -0.5543, "step": 4476 }, { "epoch": 0.39997320411763393, "grad_norm": 3.778500400568936, "learning_rate": 1.5000900121557323e-06, "loss": 0.2948, "step": 4478 }, { "epoch": 0.40015184333340775, "grad_norm": 6.268571346395725, "learning_rate": 1.499549858214045e-06, "loss": -0.822, "step": 4480 }, { "epoch": 0.40033048254918163, "grad_norm": 9.282880748991662, "learning_rate": 1.4990095099832511e-06, "loss": -0.7473, "step": 4482 }, { "epoch": 0.40050912176495546, "grad_norm": 4.9032721745981505, "learning_rate": 1.4984689676735065e-06, "loss": -0.7373, "step": 4484 }, { "epoch": 0.4006877609807293, "grad_norm": 7.682694336419073, "learning_rate": 1.497928231495043e-06, "loss": -0.068, "step": 4486 }, { "epoch": 0.40086640019650316, "grad_norm": 18.16364834877318, "learning_rate": 1.497387301658169e-06, "loss": -1.3622, "step": 4488 }, { "epoch": 0.401045039412277, "grad_norm": 4.112121360531598, "learning_rate": 1.4968461783732671e-06, "loss": -0.5171, "step": 4490 }, { "epoch": 0.4012236786280508, "grad_norm": 8.027751472309783, "learning_rate": 1.4963048618507954e-06, "loss": -0.03, "step": 4492 }, { "epoch": 0.4014023178438247, "grad_norm": 8.650478601169814, "learning_rate": 1.495763352301288e-06, "loss": -0.9229, "step": 4494 }, { "epoch": 0.4015809570595985, "grad_norm": 10.291552544294406, "learning_rate": 1.495221649935352e-06, "loss": 0.0936, "step": 4496 }, { "epoch": 0.40175959627537233, "grad_norm": 4.40598572283426, "learning_rate": 1.494679754963672e-06, "loss": 0.4768, "step": 4498 }, { "epoch": 0.4019382354911462, "grad_norm": 7.210088380226944, "learning_rate": 1.4941376675970057e-06, "loss": -0.1529, "step": 4500 }, { "epoch": 0.40211687470692004, "grad_norm": 18.49157605532221, "learning_rate": 1.4935953880461864e-06, "loss": -1.7958, "step": 4502 }, { "epoch": 0.40229551392269386, "grad_norm": 16.97413462706982, "learning_rate": 1.4930529165221222e-06, "loss": -0.7223, "step": 4504 }, { "epoch": 0.40247415313846774, "grad_norm": 8.886137086046542, "learning_rate": 1.4925102532357955e-06, "loss": -0.6969, "step": 4506 }, { "epoch": 0.40265279235424156, "grad_norm": 12.042637417393122, "learning_rate": 1.491967398398263e-06, "loss": -0.6833, "step": 4508 }, { "epoch": 0.4028314315700154, "grad_norm": 7.066248886186589, "learning_rate": 1.491424352220657e-06, "loss": 0.5876, "step": 4510 }, { "epoch": 0.40301007078578927, "grad_norm": 9.852468160071101, "learning_rate": 1.4908811149141833e-06, "loss": -0.7248, "step": 4512 }, { "epoch": 0.4031887100015631, "grad_norm": 11.203928217697765, "learning_rate": 1.4903376866901227e-06, "loss": -0.5636, "step": 4514 }, { "epoch": 0.4033673492173369, "grad_norm": 9.265466492504869, "learning_rate": 1.489794067759829e-06, "loss": 0.4989, "step": 4516 }, { "epoch": 0.4035459884331108, "grad_norm": 16.60042643171686, "learning_rate": 1.4892502583347316e-06, "loss": -0.659, "step": 4518 }, { "epoch": 0.4037246276488846, "grad_norm": 5.678658120589503, "learning_rate": 1.4887062586263333e-06, "loss": -2.1993, "step": 4520 }, { "epoch": 0.40390326686465844, "grad_norm": 16.761098344816467, "learning_rate": 1.4881620688462114e-06, "loss": -0.2349, "step": 4522 }, { "epoch": 0.4040819060804323, "grad_norm": 8.077140349655151, "learning_rate": 1.4876176892060156e-06, "loss": -0.4753, "step": 4524 }, { "epoch": 0.40426054529620614, "grad_norm": 7.899810218945853, "learning_rate": 1.4870731199174717e-06, "loss": 0.5998, "step": 4526 }, { "epoch": 0.40443918451197997, "grad_norm": 7.852706355189278, "learning_rate": 1.486528361192378e-06, "loss": 0.2094, "step": 4528 }, { "epoch": 0.40461782372775384, "grad_norm": 10.217103224653387, "learning_rate": 1.4859834132426058e-06, "loss": -0.3531, "step": 4530 }, { "epoch": 0.40479646294352767, "grad_norm": 3.4351454560468353, "learning_rate": 1.485438276280102e-06, "loss": 1.1248, "step": 4532 }, { "epoch": 0.40497510215930155, "grad_norm": 5.500320078465995, "learning_rate": 1.484892950516885e-06, "loss": -0.2551, "step": 4534 }, { "epoch": 0.40515374137507537, "grad_norm": 6.035952933358635, "learning_rate": 1.4843474361650476e-06, "loss": 0.8941, "step": 4536 }, { "epoch": 0.4053323805908492, "grad_norm": 6.057709049485841, "learning_rate": 1.4838017334367562e-06, "loss": 0.0281, "step": 4538 }, { "epoch": 0.4055110198066231, "grad_norm": 8.283295461566809, "learning_rate": 1.4832558425442491e-06, "loss": 0.0314, "step": 4540 }, { "epoch": 0.4056896590223969, "grad_norm": 13.49573647320151, "learning_rate": 1.48270976369984e-06, "loss": -1.0105, "step": 4542 }, { "epoch": 0.4058682982381707, "grad_norm": 13.637727444969753, "learning_rate": 1.4821634971159135e-06, "loss": -0.7946, "step": 4544 }, { "epoch": 0.4060469374539446, "grad_norm": 7.244163531660332, "learning_rate": 1.481617043004929e-06, "loss": -0.2622, "step": 4546 }, { "epoch": 0.4062255766697184, "grad_norm": 2.9325552370766794, "learning_rate": 1.4810704015794171e-06, "loss": -0.2307, "step": 4548 }, { "epoch": 0.40640421588549225, "grad_norm": 8.513407735277916, "learning_rate": 1.4805235730519827e-06, "loss": -1.6916, "step": 4550 }, { "epoch": 0.4065828551012661, "grad_norm": 6.089374555971258, "learning_rate": 1.4799765576353027e-06, "loss": 0.4691, "step": 4552 }, { "epoch": 0.40676149431703995, "grad_norm": 14.404259666265501, "learning_rate": 1.479429355542127e-06, "loss": 0.1801, "step": 4554 }, { "epoch": 0.4069401335328138, "grad_norm": 9.100275693636721, "learning_rate": 1.478881966985278e-06, "loss": -0.259, "step": 4556 }, { "epoch": 0.40711877274858765, "grad_norm": 6.8458931410861155, "learning_rate": 1.4783343921776509e-06, "loss": 0.1578, "step": 4558 }, { "epoch": 0.4072974119643615, "grad_norm": 10.829870130160048, "learning_rate": 1.4777866313322126e-06, "loss": 0.008, "step": 4560 }, { "epoch": 0.4074760511801353, "grad_norm": 12.319343949722425, "learning_rate": 1.477238684662003e-06, "loss": 0.1659, "step": 4562 }, { "epoch": 0.4076546903959092, "grad_norm": 3.914149239620218, "learning_rate": 1.4766905523801342e-06, "loss": 0.5473, "step": 4564 }, { "epoch": 0.407833329611683, "grad_norm": 5.400992775539121, "learning_rate": 1.4761422346997905e-06, "loss": -0.1564, "step": 4566 }, { "epoch": 0.40801196882745683, "grad_norm": 8.32070678349821, "learning_rate": 1.475593731834228e-06, "loss": 0.4944, "step": 4568 }, { "epoch": 0.4081906080432307, "grad_norm": 4.275466465179331, "learning_rate": 1.475045043996775e-06, "loss": -0.6638, "step": 4570 }, { "epoch": 0.40836924725900453, "grad_norm": 11.316846486733699, "learning_rate": 1.4744961714008317e-06, "loss": -0.35, "step": 4572 }, { "epoch": 0.40854788647477835, "grad_norm": 11.647272902463182, "learning_rate": 1.4739471142598703e-06, "loss": -1.1454, "step": 4574 }, { "epoch": 0.40872652569055223, "grad_norm": 4.271781084302208, "learning_rate": 1.473397872787435e-06, "loss": -0.6284, "step": 4576 }, { "epoch": 0.40890516490632606, "grad_norm": 7.593623407509628, "learning_rate": 1.472848447197141e-06, "loss": 0.3062, "step": 4578 }, { "epoch": 0.4090838041220999, "grad_norm": 9.965977585190307, "learning_rate": 1.4722988377026754e-06, "loss": -0.3313, "step": 4580 }, { "epoch": 0.40926244333787376, "grad_norm": 7.536243097324598, "learning_rate": 1.4717490445177977e-06, "loss": 0.1637, "step": 4582 }, { "epoch": 0.4094410825536476, "grad_norm": 13.337067907568505, "learning_rate": 1.4711990678563369e-06, "loss": -0.6244, "step": 4584 }, { "epoch": 0.4096197217694214, "grad_norm": 6.804214058138869, "learning_rate": 1.4706489079321953e-06, "loss": 0.8176, "step": 4586 }, { "epoch": 0.4097983609851953, "grad_norm": 1.8555739202307602, "learning_rate": 1.4700985649593455e-06, "loss": 0.1396, "step": 4588 }, { "epoch": 0.4099770002009691, "grad_norm": 4.446115002324119, "learning_rate": 1.4695480391518317e-06, "loss": -0.1473, "step": 4590 }, { "epoch": 0.41015563941674293, "grad_norm": 8.849510446575165, "learning_rate": 1.4689973307237686e-06, "loss": -0.4417, "step": 4592 }, { "epoch": 0.4103342786325168, "grad_norm": 10.069209355792601, "learning_rate": 1.4684464398893427e-06, "loss": -1.2397, "step": 4594 }, { "epoch": 0.41051291784829064, "grad_norm": 7.730756926791374, "learning_rate": 1.4678953668628107e-06, "loss": -0.4983, "step": 4596 }, { "epoch": 0.4106915570640645, "grad_norm": 5.771592440295399, "learning_rate": 1.4673441118585009e-06, "loss": 0.5847, "step": 4598 }, { "epoch": 0.41087019627983834, "grad_norm": 5.378995607917894, "learning_rate": 1.4667926750908116e-06, "loss": -0.2129, "step": 4600 }, { "epoch": 0.41104883549561216, "grad_norm": 10.343809744210015, "learning_rate": 1.4662410567742126e-06, "loss": -0.1293, "step": 4602 }, { "epoch": 0.41122747471138604, "grad_norm": 7.005551452931309, "learning_rate": 1.4656892571232438e-06, "loss": -0.2976, "step": 4604 }, { "epoch": 0.41140611392715987, "grad_norm": 8.057154576409966, "learning_rate": 1.4651372763525153e-06, "loss": 1.2835, "step": 4606 }, { "epoch": 0.4115847531429337, "grad_norm": 8.497535348242389, "learning_rate": 1.4645851146767087e-06, "loss": 0.4225, "step": 4608 }, { "epoch": 0.41176339235870757, "grad_norm": 3.187209022554779, "learning_rate": 1.4640327723105752e-06, "loss": 0.4614, "step": 4610 }, { "epoch": 0.4119420315744814, "grad_norm": 16.655018716215842, "learning_rate": 1.4634802494689362e-06, "loss": -0.2987, "step": 4612 }, { "epoch": 0.4121206707902552, "grad_norm": 6.32570633996417, "learning_rate": 1.4629275463666835e-06, "loss": -0.2832, "step": 4614 }, { "epoch": 0.4122993100060291, "grad_norm": 5.2069537764421225, "learning_rate": 1.462374663218779e-06, "loss": 0.58, "step": 4616 }, { "epoch": 0.4124779492218029, "grad_norm": 6.604597813665374, "learning_rate": 1.461821600240255e-06, "loss": -0.3209, "step": 4618 }, { "epoch": 0.41265658843757674, "grad_norm": 6.836645889635039, "learning_rate": 1.4612683576462133e-06, "loss": -0.5799, "step": 4620 }, { "epoch": 0.4128352276533506, "grad_norm": 15.288248094047802, "learning_rate": 1.4607149356518252e-06, "loss": 0.3192, "step": 4622 }, { "epoch": 0.41301386686912445, "grad_norm": 6.959036724911852, "learning_rate": 1.4601613344723322e-06, "loss": -0.1222, "step": 4624 }, { "epoch": 0.41319250608489827, "grad_norm": 8.901569836643457, "learning_rate": 1.4596075543230461e-06, "loss": -0.212, "step": 4626 }, { "epoch": 0.41337114530067215, "grad_norm": 8.509561882530567, "learning_rate": 1.4590535954193472e-06, "loss": -0.1314, "step": 4628 }, { "epoch": 0.413549784516446, "grad_norm": 5.853006956230916, "learning_rate": 1.4584994579766863e-06, "loss": -1.0224, "step": 4630 }, { "epoch": 0.4137284237322198, "grad_norm": 10.10622634082825, "learning_rate": 1.4579451422105825e-06, "loss": -0.1097, "step": 4632 }, { "epoch": 0.4139070629479937, "grad_norm": 12.679834314461191, "learning_rate": 1.4573906483366255e-06, "loss": -1.1179, "step": 4634 }, { "epoch": 0.4140857021637675, "grad_norm": 5.945205667465744, "learning_rate": 1.4568359765704733e-06, "loss": 0.1301, "step": 4636 }, { "epoch": 0.4142643413795413, "grad_norm": 8.349140383392866, "learning_rate": 1.4562811271278537e-06, "loss": 0.7666, "step": 4638 }, { "epoch": 0.4144429805953152, "grad_norm": 55.049254401456245, "learning_rate": 1.4557261002245636e-06, "loss": -0.6001, "step": 4640 }, { "epoch": 0.414621619811089, "grad_norm": 8.56620407735666, "learning_rate": 1.4551708960764684e-06, "loss": 0.5621, "step": 4642 }, { "epoch": 0.41480025902686285, "grad_norm": 19.40573969613879, "learning_rate": 1.4546155148995026e-06, "loss": -1.0248, "step": 4644 }, { "epoch": 0.41497889824263673, "grad_norm": 5.2221144378757645, "learning_rate": 1.45405995690967e-06, "loss": 0.6362, "step": 4646 }, { "epoch": 0.41515753745841055, "grad_norm": 6.8767218080948815, "learning_rate": 1.4535042223230427e-06, "loss": 0.4767, "step": 4648 }, { "epoch": 0.4153361766741844, "grad_norm": 5.002712467682083, "learning_rate": 1.4529483113557614e-06, "loss": -0.0344, "step": 4650 }, { "epoch": 0.41551481588995826, "grad_norm": 49.872133865798496, "learning_rate": 1.4523922242240362e-06, "loss": -0.3933, "step": 4652 }, { "epoch": 0.4156934551057321, "grad_norm": 15.207269054780749, "learning_rate": 1.451835961144145e-06, "loss": -0.3941, "step": 4654 }, { "epoch": 0.4158720943215059, "grad_norm": 6.87278933962658, "learning_rate": 1.4512795223324342e-06, "loss": -0.0487, "step": 4656 }, { "epoch": 0.4160507335372798, "grad_norm": 3.5050939547887974, "learning_rate": 1.4507229080053183e-06, "loss": 1.2313, "step": 4658 }, { "epoch": 0.4162293727530536, "grad_norm": 14.631654448135674, "learning_rate": 1.4501661183792806e-06, "loss": -1.5284, "step": 4660 }, { "epoch": 0.41640801196882743, "grad_norm": 5.394725356867149, "learning_rate": 1.4496091536708729e-06, "loss": -0.1437, "step": 4662 }, { "epoch": 0.4165866511846013, "grad_norm": 3.1109270219228833, "learning_rate": 1.449052014096714e-06, "loss": -0.4093, "step": 4664 }, { "epoch": 0.41676529040037513, "grad_norm": 6.06993857941826, "learning_rate": 1.4484946998734912e-06, "loss": -0.9576, "step": 4666 }, { "epoch": 0.416943929616149, "grad_norm": 4.897296876146599, "learning_rate": 1.44793721121796e-06, "loss": -0.8542, "step": 4668 }, { "epoch": 0.41712256883192284, "grad_norm": 7.499434640556481, "learning_rate": 1.4473795483469439e-06, "loss": -0.9727, "step": 4670 }, { "epoch": 0.41730120804769666, "grad_norm": 13.239796866619063, "learning_rate": 1.446821711477333e-06, "loss": -0.5723, "step": 4672 }, { "epoch": 0.41747984726347054, "grad_norm": 9.659127902199799, "learning_rate": 1.446263700826087e-06, "loss": -0.3008, "step": 4674 }, { "epoch": 0.41765848647924436, "grad_norm": 8.792496724891432, "learning_rate": 1.4457055166102312e-06, "loss": 0.5686, "step": 4676 }, { "epoch": 0.4178371256950182, "grad_norm": 9.121949822647268, "learning_rate": 1.4451471590468594e-06, "loss": -0.8123, "step": 4678 }, { "epoch": 0.41801576491079206, "grad_norm": 7.478962958350111, "learning_rate": 1.444588628353133e-06, "loss": 0.2538, "step": 4680 }, { "epoch": 0.4181944041265659, "grad_norm": 32.99155942304126, "learning_rate": 1.4440299247462798e-06, "loss": -1.5306, "step": 4682 }, { "epoch": 0.4183730433423397, "grad_norm": 11.212014389154058, "learning_rate": 1.4434710484435963e-06, "loss": 0.2573, "step": 4684 }, { "epoch": 0.4185516825581136, "grad_norm": 8.401518784321128, "learning_rate": 1.442911999662445e-06, "loss": 0.4994, "step": 4686 }, { "epoch": 0.4187303217738874, "grad_norm": 9.553439366811912, "learning_rate": 1.4423527786202553e-06, "loss": -0.5746, "step": 4688 }, { "epoch": 0.41890896098966124, "grad_norm": 4.751574205248037, "learning_rate": 1.4417933855345252e-06, "loss": 0.3669, "step": 4690 }, { "epoch": 0.4190876002054351, "grad_norm": 6.3754709107246414, "learning_rate": 1.4412338206228175e-06, "loss": 0.043, "step": 4692 }, { "epoch": 0.41926623942120894, "grad_norm": 3.6513354536515505, "learning_rate": 1.4406740841027633e-06, "loss": 0.6946, "step": 4694 }, { "epoch": 0.41944487863698277, "grad_norm": 2.9459486863690834, "learning_rate": 1.4401141761920602e-06, "loss": 0.7625, "step": 4696 }, { "epoch": 0.41962351785275664, "grad_norm": 3.2453010040946486, "learning_rate": 1.4395540971084719e-06, "loss": 0.1101, "step": 4698 }, { "epoch": 0.41980215706853047, "grad_norm": 8.222260048352544, "learning_rate": 1.4389938470698295e-06, "loss": 0.1157, "step": 4700 }, { "epoch": 0.4199807962843043, "grad_norm": 5.0919028835579665, "learning_rate": 1.4384334262940297e-06, "loss": -1.034, "step": 4702 }, { "epoch": 0.42015943550007817, "grad_norm": 11.143135290901215, "learning_rate": 1.437872834999036e-06, "loss": 0.5111, "step": 4704 }, { "epoch": 0.420338074715852, "grad_norm": 9.271705112115848, "learning_rate": 1.4373120734028788e-06, "loss": 0.2572, "step": 4706 }, { "epoch": 0.4205167139316258, "grad_norm": 3.360448905157947, "learning_rate": 1.4367511417236533e-06, "loss": -0.27, "step": 4708 }, { "epoch": 0.4206953531473997, "grad_norm": 8.333768095755252, "learning_rate": 1.4361900401795227e-06, "loss": -1.5039, "step": 4710 }, { "epoch": 0.4208739923631735, "grad_norm": 6.676218945935079, "learning_rate": 1.4356287689887143e-06, "loss": 0.8279, "step": 4712 }, { "epoch": 0.42105263157894735, "grad_norm": 7.698508467442964, "learning_rate": 1.4350673283695233e-06, "loss": -0.0879, "step": 4714 }, { "epoch": 0.4212312707947212, "grad_norm": 11.218402485260059, "learning_rate": 1.4345057185403098e-06, "loss": 0.4581, "step": 4716 }, { "epoch": 0.42140991001049505, "grad_norm": 4.952808069143745, "learning_rate": 1.433943939719499e-06, "loss": 0.2947, "step": 4718 }, { "epoch": 0.42158854922626887, "grad_norm": 36.6643396737339, "learning_rate": 1.4333819921255834e-06, "loss": -1.2176, "step": 4720 }, { "epoch": 0.42176718844204275, "grad_norm": 5.069086758378164, "learning_rate": 1.43281987597712e-06, "loss": 0.0677, "step": 4722 }, { "epoch": 0.4219458276578166, "grad_norm": 3.589223648326345, "learning_rate": 1.432257591492732e-06, "loss": -0.0346, "step": 4724 }, { "epoch": 0.4221244668735904, "grad_norm": 11.40705998301292, "learning_rate": 1.4316951388911074e-06, "loss": -0.8976, "step": 4726 }, { "epoch": 0.4223031060893643, "grad_norm": 4.78411839031843, "learning_rate": 1.4311325183909999e-06, "loss": -0.1259, "step": 4728 }, { "epoch": 0.4224817453051381, "grad_norm": 15.2524357085662, "learning_rate": 1.430569730211229e-06, "loss": -2.6574, "step": 4730 }, { "epoch": 0.422660384520912, "grad_norm": 2.9590570522737205, "learning_rate": 1.4300067745706788e-06, "loss": 0.0295, "step": 4732 }, { "epoch": 0.4228390237366858, "grad_norm": 12.198599588033852, "learning_rate": 1.429443651688299e-06, "loss": -1.06, "step": 4734 }, { "epoch": 0.4230176629524596, "grad_norm": 13.673375475697801, "learning_rate": 1.4288803617831036e-06, "loss": -0.3184, "step": 4736 }, { "epoch": 0.4231963021682335, "grad_norm": 5.082775814980586, "learning_rate": 1.4283169050741719e-06, "loss": 0.0714, "step": 4738 }, { "epoch": 0.42337494138400733, "grad_norm": 13.801511510363751, "learning_rate": 1.427753281780649e-06, "loss": 0.1316, "step": 4740 }, { "epoch": 0.42355358059978115, "grad_norm": 10.718628996674871, "learning_rate": 1.4271894921217428e-06, "loss": -1.451, "step": 4742 }, { "epoch": 0.42373221981555503, "grad_norm": 2.8390091164104287, "learning_rate": 1.4266255363167284e-06, "loss": 0.5032, "step": 4744 }, { "epoch": 0.42391085903132886, "grad_norm": 12.479574376208834, "learning_rate": 1.4260614145849432e-06, "loss": -0.5393, "step": 4746 }, { "epoch": 0.4240894982471027, "grad_norm": 7.639181737969873, "learning_rate": 1.4254971271457905e-06, "loss": -0.7149, "step": 4748 }, { "epoch": 0.42426813746287656, "grad_norm": 5.3905636106672015, "learning_rate": 1.424932674218738e-06, "loss": 0.1178, "step": 4750 }, { "epoch": 0.4244467766786504, "grad_norm": 5.484780494936577, "learning_rate": 1.424368056023317e-06, "loss": 0.4142, "step": 4752 }, { "epoch": 0.4246254158944242, "grad_norm": 9.082924183254532, "learning_rate": 1.4238032727791234e-06, "loss": 0.1343, "step": 4754 }, { "epoch": 0.4248040551101981, "grad_norm": 5.86749469953508, "learning_rate": 1.4232383247058178e-06, "loss": 0.679, "step": 4756 }, { "epoch": 0.4249826943259719, "grad_norm": 6.283547356128462, "learning_rate": 1.4226732120231244e-06, "loss": 0.8085, "step": 4758 }, { "epoch": 0.42516133354174573, "grad_norm": 7.421921772056571, "learning_rate": 1.4221079349508318e-06, "loss": -0.0617, "step": 4760 }, { "epoch": 0.4253399727575196, "grad_norm": 15.460828937845232, "learning_rate": 1.421542493708792e-06, "loss": -1.4733, "step": 4762 }, { "epoch": 0.42551861197329344, "grad_norm": 7.2810951838590885, "learning_rate": 1.4209768885169212e-06, "loss": 0.2841, "step": 4764 }, { "epoch": 0.42569725118906726, "grad_norm": 3.1340843910291842, "learning_rate": 1.420411119595199e-06, "loss": 0.5969, "step": 4766 }, { "epoch": 0.42587589040484114, "grad_norm": 6.973308355360841, "learning_rate": 1.41984518716367e-06, "loss": 0.248, "step": 4768 }, { "epoch": 0.42605452962061496, "grad_norm": 3.9554900682459913, "learning_rate": 1.4192790914424398e-06, "loss": 0.6917, "step": 4770 }, { "epoch": 0.4262331688363888, "grad_norm": 6.46215838298605, "learning_rate": 1.41871283265168e-06, "loss": 0.023, "step": 4772 }, { "epoch": 0.42641180805216267, "grad_norm": 8.289576997168925, "learning_rate": 1.418146411011625e-06, "loss": -0.0457, "step": 4774 }, { "epoch": 0.4265904472679365, "grad_norm": 15.716727822522907, "learning_rate": 1.4175798267425718e-06, "loss": -0.8526, "step": 4776 }, { "epoch": 0.4267690864837103, "grad_norm": 6.466165662149503, "learning_rate": 1.4170130800648812e-06, "loss": -0.7182, "step": 4778 }, { "epoch": 0.4269477256994842, "grad_norm": 16.24496898531925, "learning_rate": 1.4164461711989767e-06, "loss": -0.3819, "step": 4780 }, { "epoch": 0.427126364915258, "grad_norm": 14.271996904073474, "learning_rate": 1.415879100365346e-06, "loss": -0.4986, "step": 4782 }, { "epoch": 0.42730500413103184, "grad_norm": 4.719168739181423, "learning_rate": 1.4153118677845384e-06, "loss": 0.7023, "step": 4784 }, { "epoch": 0.4274836433468057, "grad_norm": 7.9290907160745885, "learning_rate": 1.4147444736771665e-06, "loss": 0.5738, "step": 4786 }, { "epoch": 0.42766228256257954, "grad_norm": 17.603527651894538, "learning_rate": 1.414176918263907e-06, "loss": -0.7211, "step": 4788 }, { "epoch": 0.42784092177835337, "grad_norm": 7.910586408056987, "learning_rate": 1.4136092017654975e-06, "loss": -0.0817, "step": 4790 }, { "epoch": 0.42801956099412725, "grad_norm": 5.816983042568484, "learning_rate": 1.413041324402739e-06, "loss": -0.7452, "step": 4792 }, { "epoch": 0.42819820020990107, "grad_norm": 5.591790564690379, "learning_rate": 1.4124732863964956e-06, "loss": 0.2434, "step": 4794 }, { "epoch": 0.42837683942567495, "grad_norm": 7.925133871879202, "learning_rate": 1.4119050879676931e-06, "loss": -0.2412, "step": 4796 }, { "epoch": 0.4285554786414488, "grad_norm": 5.811168230958049, "learning_rate": 1.4113367293373197e-06, "loss": 0.3119, "step": 4798 }, { "epoch": 0.4287341178572226, "grad_norm": 4.969934094489026, "learning_rate": 1.4107682107264267e-06, "loss": -0.6094, "step": 4800 }, { "epoch": 0.4289127570729965, "grad_norm": 10.951348306218787, "learning_rate": 1.4101995323561268e-06, "loss": 0.0393, "step": 4802 }, { "epoch": 0.4290913962887703, "grad_norm": 4.371146766811523, "learning_rate": 1.4096306944475955e-06, "loss": 0.1738, "step": 4804 }, { "epoch": 0.4292700355045441, "grad_norm": 7.560540820360531, "learning_rate": 1.4090616972220697e-06, "loss": -0.7181, "step": 4806 }, { "epoch": 0.429448674720318, "grad_norm": 15.385777611481355, "learning_rate": 1.4084925409008483e-06, "loss": -1.3457, "step": 4808 }, { "epoch": 0.4296273139360918, "grad_norm": 10.260763332235875, "learning_rate": 1.407923225705293e-06, "loss": 0.4682, "step": 4810 }, { "epoch": 0.42980595315186565, "grad_norm": 11.347293184811535, "learning_rate": 1.407353751856826e-06, "loss": -0.5666, "step": 4812 }, { "epoch": 0.42998459236763953, "grad_norm": 15.09301756035381, "learning_rate": 1.4067841195769323e-06, "loss": -0.1682, "step": 4814 }, { "epoch": 0.43016323158341335, "grad_norm": 8.42258021528499, "learning_rate": 1.4062143290871578e-06, "loss": -0.3745, "step": 4816 }, { "epoch": 0.4303418707991872, "grad_norm": 72.69265552976277, "learning_rate": 1.40564438060911e-06, "loss": -1.5662, "step": 4818 }, { "epoch": 0.43052051001496106, "grad_norm": 8.163131208096894, "learning_rate": 1.4050742743644585e-06, "loss": 0.929, "step": 4820 }, { "epoch": 0.4306991492307349, "grad_norm": 7.502162875793981, "learning_rate": 1.4045040105749338e-06, "loss": 0.0321, "step": 4822 }, { "epoch": 0.4308777884465087, "grad_norm": 6.622482109232865, "learning_rate": 1.4039335894623269e-06, "loss": -0.4903, "step": 4824 }, { "epoch": 0.4310564276622826, "grad_norm": 9.191941621414072, "learning_rate": 1.4033630112484915e-06, "loss": 0.9641, "step": 4826 }, { "epoch": 0.4312350668780564, "grad_norm": 15.044119482570569, "learning_rate": 1.4027922761553415e-06, "loss": 0.6454, "step": 4828 }, { "epoch": 0.43141370609383023, "grad_norm": 12.127919268417399, "learning_rate": 1.4022213844048514e-06, "loss": -0.9043, "step": 4830 }, { "epoch": 0.4315923453096041, "grad_norm": 10.823546307153027, "learning_rate": 1.401650336219058e-06, "loss": -0.0937, "step": 4832 }, { "epoch": 0.43177098452537793, "grad_norm": 16.295221407598493, "learning_rate": 1.401079131820058e-06, "loss": -0.0906, "step": 4834 }, { "epoch": 0.43194962374115176, "grad_norm": 5.314177483893652, "learning_rate": 1.4005077714300085e-06, "loss": -0.0055, "step": 4836 }, { "epoch": 0.43212826295692564, "grad_norm": 6.061631917251669, "learning_rate": 1.399936255271128e-06, "loss": -0.6081, "step": 4838 }, { "epoch": 0.43230690217269946, "grad_norm": 30.522160718433337, "learning_rate": 1.3993645835656952e-06, "loss": -2.1256, "step": 4840 }, { "epoch": 0.4324855413884733, "grad_norm": 8.982605549260931, "learning_rate": 1.3987927565360501e-06, "loss": -0.4696, "step": 4842 }, { "epoch": 0.43266418060424716, "grad_norm": 9.020607138625097, "learning_rate": 1.3982207744045923e-06, "loss": -0.7765, "step": 4844 }, { "epoch": 0.432842819820021, "grad_norm": 20.662376614829352, "learning_rate": 1.3976486373937806e-06, "loss": -0.293, "step": 4846 }, { "epoch": 0.4330214590357948, "grad_norm": 5.491699023385399, "learning_rate": 1.3970763457261368e-06, "loss": -0.233, "step": 4848 }, { "epoch": 0.4332000982515687, "grad_norm": 3.0901249156010757, "learning_rate": 1.3965038996242406e-06, "loss": -0.3453, "step": 4850 }, { "epoch": 0.4333787374673425, "grad_norm": 6.965315857877516, "learning_rate": 1.3959312993107327e-06, "loss": -1.3186, "step": 4852 }, { "epoch": 0.43355737668311634, "grad_norm": 1.984277016931478, "learning_rate": 1.3953585450083141e-06, "loss": 0.0822, "step": 4854 }, { "epoch": 0.4337360158988902, "grad_norm": 13.397097577861489, "learning_rate": 1.3947856369397443e-06, "loss": -0.988, "step": 4856 }, { "epoch": 0.43391465511466404, "grad_norm": 5.222350380499579, "learning_rate": 1.394212575327844e-06, "loss": -0.3492, "step": 4858 }, { "epoch": 0.43409329433043786, "grad_norm": 11.271417194880806, "learning_rate": 1.393639360395493e-06, "loss": -1.6758, "step": 4860 }, { "epoch": 0.43427193354621174, "grad_norm": 5.955376627275984, "learning_rate": 1.393065992365631e-06, "loss": 0.1727, "step": 4862 }, { "epoch": 0.43445057276198557, "grad_norm": 11.144774275522307, "learning_rate": 1.392492471461257e-06, "loss": 0.472, "step": 4864 }, { "epoch": 0.43462921197775944, "grad_norm": 5.035083139863908, "learning_rate": 1.3919187979054294e-06, "loss": 0.0958, "step": 4866 }, { "epoch": 0.43480785119353327, "grad_norm": 11.087157275763682, "learning_rate": 1.3913449719212662e-06, "loss": -0.2181, "step": 4868 }, { "epoch": 0.4349864904093071, "grad_norm": 12.479169044218436, "learning_rate": 1.390770993731945e-06, "loss": -1.0691, "step": 4870 }, { "epoch": 0.43516512962508097, "grad_norm": 10.394258572264299, "learning_rate": 1.3901968635607015e-06, "loss": 0.1939, "step": 4872 }, { "epoch": 0.4353437688408548, "grad_norm": 4.7846153233802, "learning_rate": 1.3896225816308312e-06, "loss": 0.2193, "step": 4874 }, { "epoch": 0.4355224080566286, "grad_norm": 2.949048493378226, "learning_rate": 1.3890481481656897e-06, "loss": 0.2691, "step": 4876 }, { "epoch": 0.4357010472724025, "grad_norm": 5.070406167879985, "learning_rate": 1.3884735633886893e-06, "loss": -1.076, "step": 4878 }, { "epoch": 0.4358796864881763, "grad_norm": 11.27201621757949, "learning_rate": 1.3878988275233028e-06, "loss": -1.0438, "step": 4880 }, { "epoch": 0.43605832570395014, "grad_norm": 6.873557199457843, "learning_rate": 1.3873239407930616e-06, "loss": 0.1252, "step": 4882 }, { "epoch": 0.436236964919724, "grad_norm": 7.4211577058809794, "learning_rate": 1.3867489034215544e-06, "loss": 0.2971, "step": 4884 }, { "epoch": 0.43641560413549785, "grad_norm": 3.494101867972588, "learning_rate": 1.3861737156324306e-06, "loss": -1.1985, "step": 4886 }, { "epoch": 0.43659424335127167, "grad_norm": 15.779138646664592, "learning_rate": 1.385598377649397e-06, "loss": -0.6269, "step": 4888 }, { "epoch": 0.43677288256704555, "grad_norm": 1.7606915441794786, "learning_rate": 1.3850228896962177e-06, "loss": 0.0871, "step": 4890 }, { "epoch": 0.4369515217828194, "grad_norm": 7.5747541028893, "learning_rate": 1.3844472519967179e-06, "loss": -0.1483, "step": 4892 }, { "epoch": 0.4371301609985932, "grad_norm": 15.886860419364645, "learning_rate": 1.3838714647747783e-06, "loss": -0.5396, "step": 4894 }, { "epoch": 0.4373088002143671, "grad_norm": 7.833586289670578, "learning_rate": 1.383295528254339e-06, "loss": -0.4497, "step": 4896 }, { "epoch": 0.4374874394301409, "grad_norm": 6.715405008539019, "learning_rate": 1.3827194426593988e-06, "loss": -0.4847, "step": 4898 }, { "epoch": 0.4376660786459147, "grad_norm": 7.56712263080677, "learning_rate": 1.3821432082140125e-06, "loss": 0.3095, "step": 4900 }, { "epoch": 0.4378447178616886, "grad_norm": 6.778938426853305, "learning_rate": 1.3815668251422953e-06, "loss": 1.6904, "step": 4902 }, { "epoch": 0.4380233570774624, "grad_norm": 3.2949764335380003, "learning_rate": 1.380990293668418e-06, "loss": 0.7876, "step": 4904 }, { "epoch": 0.43820199629323625, "grad_norm": 2.1056025927861026, "learning_rate": 1.3804136140166105e-06, "loss": 0.3889, "step": 4906 }, { "epoch": 0.43838063550901013, "grad_norm": 7.988333062561187, "learning_rate": 1.3798367864111597e-06, "loss": -0.7213, "step": 4908 }, { "epoch": 0.43855927472478395, "grad_norm": 5.825866425218844, "learning_rate": 1.3792598110764105e-06, "loss": 0.0313, "step": 4910 }, { "epoch": 0.4387379139405578, "grad_norm": 9.918710278994583, "learning_rate": 1.3786826882367645e-06, "loss": -0.5051, "step": 4912 }, { "epoch": 0.43891655315633166, "grad_norm": 7.983113108911082, "learning_rate": 1.3781054181166813e-06, "loss": -0.5717, "step": 4914 }, { "epoch": 0.4390951923721055, "grad_norm": 15.313524858082204, "learning_rate": 1.3775280009406776e-06, "loss": 0.072, "step": 4916 }, { "epoch": 0.4392738315878793, "grad_norm": 6.028057933728953, "learning_rate": 1.3769504369333277e-06, "loss": -0.2534, "step": 4918 }, { "epoch": 0.4394524708036532, "grad_norm": 11.736891556672317, "learning_rate": 1.3763727263192624e-06, "loss": -0.916, "step": 4920 }, { "epoch": 0.439631110019427, "grad_norm": 3.8845392758655675, "learning_rate": 1.3757948693231693e-06, "loss": -0.4118, "step": 4922 }, { "epoch": 0.43980974923520083, "grad_norm": 4.809376879834854, "learning_rate": 1.3752168661697939e-06, "loss": -0.9664, "step": 4924 }, { "epoch": 0.4399883884509747, "grad_norm": 10.540161346995744, "learning_rate": 1.3746387170839379e-06, "loss": -0.5593, "step": 4926 }, { "epoch": 0.44016702766674853, "grad_norm": 6.406677529560344, "learning_rate": 1.3740604222904598e-06, "loss": 0.3633, "step": 4928 }, { "epoch": 0.4403456668825224, "grad_norm": 6.831677523093505, "learning_rate": 1.3734819820142752e-06, "loss": -0.4563, "step": 4930 }, { "epoch": 0.44052430609829624, "grad_norm": 8.140627342999903, "learning_rate": 1.3729033964803552e-06, "loss": -0.9753, "step": 4932 }, { "epoch": 0.44070294531407006, "grad_norm": 28.442231105416226, "learning_rate": 1.3723246659137285e-06, "loss": -0.6028, "step": 4934 }, { "epoch": 0.44088158452984394, "grad_norm": 4.138703443794773, "learning_rate": 1.3717457905394802e-06, "loss": -0.8259, "step": 4936 }, { "epoch": 0.44106022374561776, "grad_norm": 13.208309586721228, "learning_rate": 1.3711667705827507e-06, "loss": 0.369, "step": 4938 }, { "epoch": 0.4412388629613916, "grad_norm": 8.258582996812127, "learning_rate": 1.370587606268738e-06, "loss": -1.126, "step": 4940 }, { "epoch": 0.44141750217716547, "grad_norm": 11.483603024649293, "learning_rate": 1.3700082978226949e-06, "loss": 0.2135, "step": 4942 }, { "epoch": 0.4415961413929393, "grad_norm": 4.1147073590221, "learning_rate": 1.3694288454699308e-06, "loss": 0.1548, "step": 4944 }, { "epoch": 0.4417747806087131, "grad_norm": 5.912381657712489, "learning_rate": 1.368849249435812e-06, "loss": -0.8135, "step": 4946 }, { "epoch": 0.441953419824487, "grad_norm": 15.516977671870768, "learning_rate": 1.368269509945759e-06, "loss": -0.9119, "step": 4948 }, { "epoch": 0.4421320590402608, "grad_norm": 3.075816416714999, "learning_rate": 1.3676896272252492e-06, "loss": 0.2646, "step": 4950 }, { "epoch": 0.44231069825603464, "grad_norm": 8.316610825630836, "learning_rate": 1.3671096014998157e-06, "loss": 0.3245, "step": 4952 }, { "epoch": 0.4424893374718085, "grad_norm": 13.039425268320878, "learning_rate": 1.3665294329950464e-06, "loss": -0.2301, "step": 4954 }, { "epoch": 0.44266797668758234, "grad_norm": 4.739210825383961, "learning_rate": 1.365949121936586e-06, "loss": -0.8721, "step": 4956 }, { "epoch": 0.44284661590335617, "grad_norm": 5.934283539768815, "learning_rate": 1.365368668550133e-06, "loss": -0.3806, "step": 4958 }, { "epoch": 0.44302525511913005, "grad_norm": 7.345405034997991, "learning_rate": 1.3647880730614432e-06, "loss": 0.1236, "step": 4960 }, { "epoch": 0.44320389433490387, "grad_norm": 9.153507798493965, "learning_rate": 1.3642073356963261e-06, "loss": -0.7064, "step": 4962 }, { "epoch": 0.4433825335506777, "grad_norm": 5.2930817201389715, "learning_rate": 1.363626456680647e-06, "loss": 0.1384, "step": 4964 }, { "epoch": 0.4435611727664516, "grad_norm": 7.4606429915695776, "learning_rate": 1.363045436240326e-06, "loss": 0.4721, "step": 4966 }, { "epoch": 0.4437398119822254, "grad_norm": 6.5542934876734495, "learning_rate": 1.3624642746013388e-06, "loss": 0.1508, "step": 4968 }, { "epoch": 0.4439184511979992, "grad_norm": 8.528691505523383, "learning_rate": 1.3618829719897156e-06, "loss": 0.5221, "step": 4970 }, { "epoch": 0.4440970904137731, "grad_norm": 11.529237152828971, "learning_rate": 1.3613015286315412e-06, "loss": 0.0514, "step": 4972 }, { "epoch": 0.4442757296295469, "grad_norm": 6.980083469886448, "learning_rate": 1.3607199447529556e-06, "loss": 0.2589, "step": 4974 }, { "epoch": 0.44445436884532075, "grad_norm": 9.400792969067323, "learning_rate": 1.3601382205801533e-06, "loss": -0.0563, "step": 4976 }, { "epoch": 0.4446330080610946, "grad_norm": 11.20620170280742, "learning_rate": 1.3595563563393832e-06, "loss": 0.5521, "step": 4978 }, { "epoch": 0.44481164727686845, "grad_norm": 13.927156861405257, "learning_rate": 1.3589743522569493e-06, "loss": -0.8596, "step": 4980 }, { "epoch": 0.4449902864926423, "grad_norm": 13.086596881708244, "learning_rate": 1.3583922085592085e-06, "loss": -0.1921, "step": 4982 }, { "epoch": 0.44516892570841615, "grad_norm": 22.61924915042288, "learning_rate": 1.3578099254725737e-06, "loss": 0.1815, "step": 4984 }, { "epoch": 0.44534756492419, "grad_norm": 2.2564438628287236, "learning_rate": 1.3572275032235115e-06, "loss": 0.0752, "step": 4986 }, { "epoch": 0.4455262041399638, "grad_norm": 6.9554263988903555, "learning_rate": 1.3566449420385415e-06, "loss": -0.4246, "step": 4988 }, { "epoch": 0.4457048433557377, "grad_norm": 22.35165056453291, "learning_rate": 1.3560622421442394e-06, "loss": -0.4389, "step": 4990 }, { "epoch": 0.4458834825715115, "grad_norm": 4.712709919025761, "learning_rate": 1.355479403767233e-06, "loss": -0.7307, "step": 4992 }, { "epoch": 0.4460621217872853, "grad_norm": 2.379903449656624, "learning_rate": 1.354896427134205e-06, "loss": -0.2149, "step": 4994 }, { "epoch": 0.4462407610030592, "grad_norm": 6.23740980430936, "learning_rate": 1.3543133124718913e-06, "loss": 0.5572, "step": 4996 }, { "epoch": 0.44641940021883303, "grad_norm": 17.832114680116987, "learning_rate": 1.3537300600070819e-06, "loss": -1.0035, "step": 4998 }, { "epoch": 0.4465980394346069, "grad_norm": 8.82938407089017, "learning_rate": 1.3531466699666198e-06, "loss": -0.2616, "step": 5000 }, { "epoch": 0.44677667865038073, "grad_norm": 10.284569108882058, "learning_rate": 1.3525631425774028e-06, "loss": 0.2122, "step": 5002 }, { "epoch": 0.44695531786615456, "grad_norm": 2.98146162581518, "learning_rate": 1.3519794780663803e-06, "loss": 0.0944, "step": 5004 }, { "epoch": 0.44713395708192843, "grad_norm": 37.8828986967982, "learning_rate": 1.3513956766605568e-06, "loss": 0.484, "step": 5006 }, { "epoch": 0.44731259629770226, "grad_norm": 12.019894956107535, "learning_rate": 1.3508117385869886e-06, "loss": 0.1764, "step": 5008 }, { "epoch": 0.4474912355134761, "grad_norm": 4.227167392958412, "learning_rate": 1.350227664072786e-06, "loss": -0.042, "step": 5010 }, { "epoch": 0.44766987472924996, "grad_norm": 17.085656183383676, "learning_rate": 1.3496434533451123e-06, "loss": -0.6447, "step": 5012 }, { "epoch": 0.4478485139450238, "grad_norm": 10.267386228768407, "learning_rate": 1.3490591066311836e-06, "loss": 0.142, "step": 5014 }, { "epoch": 0.4480271531607976, "grad_norm": 8.228031849578354, "learning_rate": 1.3484746241582685e-06, "loss": -0.1781, "step": 5016 }, { "epoch": 0.4482057923765715, "grad_norm": 7.85512177371134, "learning_rate": 1.3478900061536893e-06, "loss": 0.6687, "step": 5018 }, { "epoch": 0.4483844315923453, "grad_norm": 9.615642407996308, "learning_rate": 1.34730525284482e-06, "loss": 0.4511, "step": 5020 }, { "epoch": 0.44856307080811914, "grad_norm": 8.02623737813649, "learning_rate": 1.3467203644590886e-06, "loss": -0.7219, "step": 5022 }, { "epoch": 0.448741710023893, "grad_norm": 4.8648140939004945, "learning_rate": 1.3461353412239742e-06, "loss": 0.2924, "step": 5024 }, { "epoch": 0.44892034923966684, "grad_norm": 6.9368651503408705, "learning_rate": 1.3455501833670087e-06, "loss": -1.2288, "step": 5026 }, { "epoch": 0.44909898845544066, "grad_norm": 5.334494265724252, "learning_rate": 1.344964891115777e-06, "loss": 0.9898, "step": 5028 }, { "epoch": 0.44927762767121454, "grad_norm": 13.333092750921951, "learning_rate": 1.3443794646979158e-06, "loss": 0.567, "step": 5030 }, { "epoch": 0.44945626688698836, "grad_norm": 9.482574945662051, "learning_rate": 1.3437939043411138e-06, "loss": 0.1615, "step": 5032 }, { "epoch": 0.4496349061027622, "grad_norm": 9.319870457874535, "learning_rate": 1.3432082102731125e-06, "loss": -0.4033, "step": 5034 }, { "epoch": 0.44981354531853607, "grad_norm": 27.976350878842183, "learning_rate": 1.342622382721704e-06, "loss": 0.8307, "step": 5036 }, { "epoch": 0.4499921845343099, "grad_norm": 3.5179530594704778, "learning_rate": 1.3420364219147345e-06, "loss": 0.048, "step": 5038 }, { "epoch": 0.4501708237500837, "grad_norm": 8.87199630412514, "learning_rate": 1.3414503280800999e-06, "loss": 0.4381, "step": 5040 }, { "epoch": 0.4503494629658576, "grad_norm": 8.897493468086866, "learning_rate": 1.3408641014457488e-06, "loss": -0.0348, "step": 5042 }, { "epoch": 0.4505281021816314, "grad_norm": 19.447675880885324, "learning_rate": 1.340277742239682e-06, "loss": -0.9579, "step": 5044 }, { "epoch": 0.45070674139740524, "grad_norm": 7.438416753095017, "learning_rate": 1.3396912506899508e-06, "loss": 0.995, "step": 5046 }, { "epoch": 0.4508853806131791, "grad_norm": 7.8974058117009225, "learning_rate": 1.3391046270246575e-06, "loss": -1.2417, "step": 5048 }, { "epoch": 0.45106401982895294, "grad_norm": 9.51587034012539, "learning_rate": 1.3385178714719582e-06, "loss": -0.3868, "step": 5050 }, { "epoch": 0.45124265904472677, "grad_norm": 13.704893165570008, "learning_rate": 1.3379309842600578e-06, "loss": -1.0782, "step": 5052 }, { "epoch": 0.45142129826050065, "grad_norm": 9.945101152138651, "learning_rate": 1.337343965617214e-06, "loss": -0.2369, "step": 5054 }, { "epoch": 0.45159993747627447, "grad_norm": 9.522504552718416, "learning_rate": 1.3367568157717347e-06, "loss": -0.355, "step": 5056 }, { "epoch": 0.4517785766920483, "grad_norm": 7.350004608775659, "learning_rate": 1.3361695349519789e-06, "loss": -0.3951, "step": 5058 }, { "epoch": 0.4519572159078222, "grad_norm": 13.351963061757587, "learning_rate": 1.3355821233863572e-06, "loss": -0.243, "step": 5060 }, { "epoch": 0.452135855123596, "grad_norm": 5.864083394705314, "learning_rate": 1.3349945813033302e-06, "loss": -0.2498, "step": 5062 }, { "epoch": 0.4523144943393699, "grad_norm": 7.819242451846083, "learning_rate": 1.3344069089314099e-06, "loss": -0.3054, "step": 5064 }, { "epoch": 0.4524931335551437, "grad_norm": 9.058437936618128, "learning_rate": 1.3338191064991585e-06, "loss": 0.7923, "step": 5066 }, { "epoch": 0.4526717727709175, "grad_norm": 12.094106427443092, "learning_rate": 1.33323117423519e-06, "loss": 0.7898, "step": 5068 }, { "epoch": 0.4528504119866914, "grad_norm": 9.460792345087963, "learning_rate": 1.3326431123681665e-06, "loss": 0.3204, "step": 5070 }, { "epoch": 0.4530290512024652, "grad_norm": 7.234299859136632, "learning_rate": 1.332054921126803e-06, "loss": -0.4045, "step": 5072 }, { "epoch": 0.45320769041823905, "grad_norm": 12.285820108255772, "learning_rate": 1.331466600739863e-06, "loss": 0.2204, "step": 5074 }, { "epoch": 0.45338632963401293, "grad_norm": 12.026019814880176, "learning_rate": 1.3308781514361615e-06, "loss": -2.2562, "step": 5076 }, { "epoch": 0.45356496884978675, "grad_norm": 15.239409224305888, "learning_rate": 1.3302895734445633e-06, "loss": -0.7683, "step": 5078 }, { "epoch": 0.4537436080655606, "grad_norm": 7.965578173518087, "learning_rate": 1.3297008669939827e-06, "loss": -0.4944, "step": 5080 }, { "epoch": 0.45392224728133446, "grad_norm": 4.389743485657567, "learning_rate": 1.3291120323133841e-06, "loss": -0.6621, "step": 5082 }, { "epoch": 0.4541008864971083, "grad_norm": 5.707876405272046, "learning_rate": 1.3285230696317826e-06, "loss": 0.7733, "step": 5084 }, { "epoch": 0.4542795257128821, "grad_norm": 8.66302731713131, "learning_rate": 1.327933979178242e-06, "loss": 0.6019, "step": 5086 }, { "epoch": 0.454458164928656, "grad_norm": 3.5119122376566363, "learning_rate": 1.3273447611818766e-06, "loss": 0.3964, "step": 5088 }, { "epoch": 0.4546368041444298, "grad_norm": 13.276301927535911, "learning_rate": 1.32675541587185e-06, "loss": -1.495, "step": 5090 }, { "epoch": 0.45481544336020363, "grad_norm": 9.95861377613798, "learning_rate": 1.3261659434773748e-06, "loss": -0.4445, "step": 5092 }, { "epoch": 0.4549940825759775, "grad_norm": 13.859143793871928, "learning_rate": 1.3255763442277147e-06, "loss": -0.9644, "step": 5094 }, { "epoch": 0.45517272179175133, "grad_norm": 9.379178756193156, "learning_rate": 1.32498661835218e-06, "loss": -0.8036, "step": 5096 }, { "epoch": 0.45535136100752516, "grad_norm": 15.30721219684557, "learning_rate": 1.3243967660801328e-06, "loss": -0.6255, "step": 5098 }, { "epoch": 0.45553000022329904, "grad_norm": 16.55529766749529, "learning_rate": 1.3238067876409834e-06, "loss": -0.8717, "step": 5100 }, { "epoch": 0.45570863943907286, "grad_norm": 7.293225047384516, "learning_rate": 1.3232166832641904e-06, "loss": 0.9758, "step": 5102 }, { "epoch": 0.4558872786548467, "grad_norm": 10.026706129902765, "learning_rate": 1.3226264531792633e-06, "loss": 0.4426, "step": 5104 }, { "epoch": 0.45606591787062056, "grad_norm": 4.835458660915816, "learning_rate": 1.3220360976157582e-06, "loss": 0.38, "step": 5106 }, { "epoch": 0.4562445570863944, "grad_norm": 9.90206368255324, "learning_rate": 1.3214456168032818e-06, "loss": 0.3236, "step": 5108 }, { "epoch": 0.4564231963021682, "grad_norm": 20.85812189002876, "learning_rate": 1.320855010971489e-06, "loss": -0.4333, "step": 5110 }, { "epoch": 0.4566018355179421, "grad_norm": 9.59190405566517, "learning_rate": 1.320264280350082e-06, "loss": -0.3062, "step": 5112 }, { "epoch": 0.4567804747337159, "grad_norm": 12.876041703534975, "learning_rate": 1.319673425168814e-06, "loss": -0.3386, "step": 5114 }, { "epoch": 0.45695911394948974, "grad_norm": 9.855631702601524, "learning_rate": 1.3190824456574847e-06, "loss": 0.8922, "step": 5116 }, { "epoch": 0.4571377531652636, "grad_norm": 4.309909872011061, "learning_rate": 1.318491342045943e-06, "loss": 0.2437, "step": 5118 }, { "epoch": 0.45731639238103744, "grad_norm": 6.191540256337643, "learning_rate": 1.3179001145640853e-06, "loss": -0.1055, "step": 5120 }, { "epoch": 0.45749503159681126, "grad_norm": 4.393612100056923, "learning_rate": 1.317308763441858e-06, "loss": 0.1222, "step": 5122 }, { "epoch": 0.45767367081258514, "grad_norm": 3.4730637679194944, "learning_rate": 1.3167172889092529e-06, "loss": -0.4428, "step": 5124 }, { "epoch": 0.45785231002835897, "grad_norm": 9.626647411901322, "learning_rate": 1.316125691196312e-06, "loss": -0.8808, "step": 5126 }, { "epoch": 0.45803094924413285, "grad_norm": 5.758891321613075, "learning_rate": 1.3155339705331244e-06, "loss": -0.4663, "step": 5128 }, { "epoch": 0.45820958845990667, "grad_norm": 13.857113968786766, "learning_rate": 1.3149421271498267e-06, "loss": 0.404, "step": 5130 }, { "epoch": 0.4583882276756805, "grad_norm": 7.386738632393258, "learning_rate": 1.3143501612766041e-06, "loss": 0.0433, "step": 5132 }, { "epoch": 0.45856686689145437, "grad_norm": 12.155086981439794, "learning_rate": 1.3137580731436884e-06, "loss": 0.2709, "step": 5134 }, { "epoch": 0.4587455061072282, "grad_norm": 8.540408852860818, "learning_rate": 1.3131658629813595e-06, "loss": -0.2306, "step": 5136 }, { "epoch": 0.458924145323002, "grad_norm": 6.792185882504984, "learning_rate": 1.3125735310199452e-06, "loss": -0.2062, "step": 5138 }, { "epoch": 0.4591027845387759, "grad_norm": 6.432493466649331, "learning_rate": 1.3119810774898198e-06, "loss": -0.8859, "step": 5140 }, { "epoch": 0.4592814237545497, "grad_norm": 5.8270730713294885, "learning_rate": 1.3113885026214055e-06, "loss": 0.4217, "step": 5142 }, { "epoch": 0.45946006297032355, "grad_norm": 8.523930690073664, "learning_rate": 1.3107958066451717e-06, "loss": -0.9011, "step": 5144 }, { "epoch": 0.4596387021860974, "grad_norm": 5.139829440719556, "learning_rate": 1.3102029897916333e-06, "loss": 0.1097, "step": 5146 }, { "epoch": 0.45981734140187125, "grad_norm": 7.360063848633178, "learning_rate": 1.3096100522913556e-06, "loss": 0.1441, "step": 5148 }, { "epoch": 0.4599959806176451, "grad_norm": 5.609061994825159, "learning_rate": 1.3090169943749473e-06, "loss": -0.5798, "step": 5150 }, { "epoch": 0.46017461983341895, "grad_norm": 10.323583207358665, "learning_rate": 1.3084238162730663e-06, "loss": -0.4126, "step": 5152 }, { "epoch": 0.4603532590491928, "grad_norm": 18.640178051536115, "learning_rate": 1.307830518216416e-06, "loss": -1.0865, "step": 5154 }, { "epoch": 0.4605318982649666, "grad_norm": 8.167150003052674, "learning_rate": 1.3072371004357468e-06, "loss": -0.6889, "step": 5156 }, { "epoch": 0.4607105374807405, "grad_norm": 11.798898445049925, "learning_rate": 1.3066435631618558e-06, "loss": -0.3467, "step": 5158 }, { "epoch": 0.4608891766965143, "grad_norm": 3.5055888377075908, "learning_rate": 1.3060499066255864e-06, "loss": 0.5657, "step": 5160 }, { "epoch": 0.4610678159122881, "grad_norm": 5.140650044759946, "learning_rate": 1.3054561310578285e-06, "loss": -0.6931, "step": 5162 }, { "epoch": 0.461246455128062, "grad_norm": 7.243398357344679, "learning_rate": 1.3048622366895187e-06, "loss": -0.0525, "step": 5164 }, { "epoch": 0.46142509434383583, "grad_norm": 4.3834343399381295, "learning_rate": 1.3042682237516386e-06, "loss": 0.2224, "step": 5166 }, { "epoch": 0.46160373355960965, "grad_norm": 11.155935758551399, "learning_rate": 1.3036740924752172e-06, "loss": -1.392, "step": 5168 }, { "epoch": 0.46178237277538353, "grad_norm": 11.18995579878708, "learning_rate": 1.3030798430913287e-06, "loss": 0.3922, "step": 5170 }, { "epoch": 0.46196101199115736, "grad_norm": 11.288851303712182, "learning_rate": 1.302485475831094e-06, "loss": -0.2833, "step": 5172 }, { "epoch": 0.4621396512069312, "grad_norm": 17.259924645817456, "learning_rate": 1.301890990925679e-06, "loss": -0.118, "step": 5174 }, { "epoch": 0.46231829042270506, "grad_norm": 12.461155464394643, "learning_rate": 1.3012963886062956e-06, "loss": -1.282, "step": 5176 }, { "epoch": 0.4624969296384789, "grad_norm": 19.91546005786475, "learning_rate": 1.300701669104202e-06, "loss": -0.4369, "step": 5178 }, { "epoch": 0.4626755688542527, "grad_norm": 7.205510933818838, "learning_rate": 1.3001068326507007e-06, "loss": -0.9316, "step": 5180 }, { "epoch": 0.4628542080700266, "grad_norm": 14.834956276793504, "learning_rate": 1.2995118794771415e-06, "loss": -0.9122, "step": 5182 }, { "epoch": 0.4630328472858004, "grad_norm": 4.493450060346083, "learning_rate": 1.2989168098149175e-06, "loss": -0.2245, "step": 5184 }, { "epoch": 0.46321148650157423, "grad_norm": 10.845659686962806, "learning_rate": 1.2983216238954694e-06, "loss": 0.4933, "step": 5186 }, { "epoch": 0.4633901257173481, "grad_norm": 19.808974665411682, "learning_rate": 1.2977263219502807e-06, "loss": -0.1444, "step": 5188 }, { "epoch": 0.46356876493312194, "grad_norm": 4.070536953234362, "learning_rate": 1.2971309042108815e-06, "loss": 0.0439, "step": 5190 }, { "epoch": 0.46374740414889576, "grad_norm": 8.688291225491081, "learning_rate": 1.2965353709088473e-06, "loss": -1.102, "step": 5192 }, { "epoch": 0.46392604336466964, "grad_norm": 9.637871001112332, "learning_rate": 1.2959397222757974e-06, "loss": -0.3254, "step": 5194 }, { "epoch": 0.46410468258044346, "grad_norm": 14.374327536071307, "learning_rate": 1.2953439585433963e-06, "loss": -0.4473, "step": 5196 }, { "epoch": 0.46428332179621734, "grad_norm": 9.493772774644547, "learning_rate": 1.2947480799433536e-06, "loss": -0.9195, "step": 5198 }, { "epoch": 0.46446196101199116, "grad_norm": 5.016719710533242, "learning_rate": 1.2941520867074232e-06, "loss": 0.3561, "step": 5200 }, { "epoch": 0.464640600227765, "grad_norm": 9.489574936249944, "learning_rate": 1.2935559790674042e-06, "loss": -1.1077, "step": 5202 }, { "epoch": 0.46481923944353887, "grad_norm": 9.572132320600565, "learning_rate": 1.2929597572551396e-06, "loss": 0.2462, "step": 5204 }, { "epoch": 0.4649978786593127, "grad_norm": 12.38877826027355, "learning_rate": 1.2923634215025166e-06, "loss": -2.0243, "step": 5206 }, { "epoch": 0.4651765178750865, "grad_norm": 6.684917200755521, "learning_rate": 1.2917669720414677e-06, "loss": 0.1034, "step": 5208 }, { "epoch": 0.4653551570908604, "grad_norm": 6.068607046563132, "learning_rate": 1.2911704091039684e-06, "loss": 0.6802, "step": 5210 }, { "epoch": 0.4655337963066342, "grad_norm": 13.42346707761408, "learning_rate": 1.2905737329220392e-06, "loss": -0.9123, "step": 5212 }, { "epoch": 0.46571243552240804, "grad_norm": 8.861095012569121, "learning_rate": 1.2899769437277445e-06, "loss": -0.79, "step": 5214 }, { "epoch": 0.4658910747381819, "grad_norm": 10.32996013918015, "learning_rate": 1.2893800417531925e-06, "loss": -0.0221, "step": 5216 }, { "epoch": 0.46606971395395574, "grad_norm": 9.248445534899998, "learning_rate": 1.2887830272305353e-06, "loss": -0.4362, "step": 5218 }, { "epoch": 0.46624835316972957, "grad_norm": 7.76456043460773, "learning_rate": 1.2881859003919686e-06, "loss": -0.0785, "step": 5220 }, { "epoch": 0.46642699238550345, "grad_norm": 7.070207019672436, "learning_rate": 1.2875886614697323e-06, "loss": -0.0974, "step": 5222 }, { "epoch": 0.46660563160127727, "grad_norm": 7.055565324244241, "learning_rate": 1.286991310696109e-06, "loss": 0.9823, "step": 5224 }, { "epoch": 0.4667842708170511, "grad_norm": 2.8175359780181615, "learning_rate": 1.2863938483034263e-06, "loss": 0.4396, "step": 5226 }, { "epoch": 0.466962910032825, "grad_norm": 31.021393315931487, "learning_rate": 1.2857962745240532e-06, "loss": -0.3022, "step": 5228 }, { "epoch": 0.4671415492485988, "grad_norm": 12.075825500703358, "learning_rate": 1.2851985895904038e-06, "loss": -0.4465, "step": 5230 }, { "epoch": 0.4673201884643726, "grad_norm": 4.3794408875086575, "learning_rate": 1.2846007937349347e-06, "loss": 0.7641, "step": 5232 }, { "epoch": 0.4674988276801465, "grad_norm": 10.085666691271424, "learning_rate": 1.2840028871901448e-06, "loss": 0.4173, "step": 5234 }, { "epoch": 0.4676774668959203, "grad_norm": 7.627908620926133, "learning_rate": 1.283404870188578e-06, "loss": 1.2199, "step": 5236 }, { "epoch": 0.46785610611169415, "grad_norm": 8.306443381372633, "learning_rate": 1.2828067429628197e-06, "loss": -0.7306, "step": 5238 }, { "epoch": 0.468034745327468, "grad_norm": 9.415363547933032, "learning_rate": 1.2822085057454983e-06, "loss": -0.2939, "step": 5240 }, { "epoch": 0.46821338454324185, "grad_norm": 18.21141341613224, "learning_rate": 1.2816101587692858e-06, "loss": -0.8642, "step": 5242 }, { "epoch": 0.4683920237590157, "grad_norm": 9.21336367736913, "learning_rate": 1.2810117022668954e-06, "loss": 0.2114, "step": 5244 }, { "epoch": 0.46857066297478955, "grad_norm": 9.319226184859465, "learning_rate": 1.2804131364710846e-06, "loss": 0.3403, "step": 5246 }, { "epoch": 0.4687493021905634, "grad_norm": 22.403382100500263, "learning_rate": 1.2798144616146523e-06, "loss": 1.0393, "step": 5248 }, { "epoch": 0.4689279414063372, "grad_norm": 13.040080566157366, "learning_rate": 1.2792156779304398e-06, "loss": 0.0337, "step": 5250 }, { "epoch": 0.4691065806221111, "grad_norm": 12.795139963652337, "learning_rate": 1.2786167856513323e-06, "loss": -1.1532, "step": 5252 }, { "epoch": 0.4692852198378849, "grad_norm": 1.969701809365878, "learning_rate": 1.2780177850102545e-06, "loss": 0.15, "step": 5254 }, { "epoch": 0.4694638590536587, "grad_norm": 16.646263349355426, "learning_rate": 1.2774186762401754e-06, "loss": 0.2132, "step": 5256 }, { "epoch": 0.4696424982694326, "grad_norm": 12.081304410524785, "learning_rate": 1.2768194595741053e-06, "loss": -0.8695, "step": 5258 }, { "epoch": 0.46982113748520643, "grad_norm": 5.042830887142979, "learning_rate": 1.2762201352450966e-06, "loss": 1.4969, "step": 5260 }, { "epoch": 0.4699997767009803, "grad_norm": 4.674225385005424, "learning_rate": 1.2756207034862439e-06, "loss": 0.692, "step": 5262 }, { "epoch": 0.47017841591675413, "grad_norm": 14.680144330804183, "learning_rate": 1.2750211645306826e-06, "loss": 1.1737, "step": 5264 }, { "epoch": 0.47035705513252796, "grad_norm": 15.98804981279475, "learning_rate": 1.2744215186115903e-06, "loss": 0.3036, "step": 5266 }, { "epoch": 0.47053569434830184, "grad_norm": 15.149217118282879, "learning_rate": 1.2738217659621871e-06, "loss": -0.7596, "step": 5268 }, { "epoch": 0.47071433356407566, "grad_norm": 10.505967034155109, "learning_rate": 1.2732219068157332e-06, "loss": 1.1795, "step": 5270 }, { "epoch": 0.4708929727798495, "grad_norm": 25.850688920718945, "learning_rate": 1.272621941405531e-06, "loss": -1.7326, "step": 5272 }, { "epoch": 0.47107161199562336, "grad_norm": 7.293082515767055, "learning_rate": 1.2720218699649241e-06, "loss": -0.7428, "step": 5274 }, { "epoch": 0.4712502512113972, "grad_norm": 7.817034816680511, "learning_rate": 1.2714216927272971e-06, "loss": 0.1456, "step": 5276 }, { "epoch": 0.471428890427171, "grad_norm": 4.6443910056228415, "learning_rate": 1.2708214099260764e-06, "loss": -0.0041, "step": 5278 }, { "epoch": 0.4716075296429449, "grad_norm": 10.728006975528688, "learning_rate": 1.2702210217947287e-06, "loss": 0.5783, "step": 5280 }, { "epoch": 0.4717861688587187, "grad_norm": 4.844976848157242, "learning_rate": 1.2696205285667618e-06, "loss": -0.1554, "step": 5282 }, { "epoch": 0.47196480807449254, "grad_norm": 3.5573358416665237, "learning_rate": 1.2690199304757248e-06, "loss": 0.2226, "step": 5284 }, { "epoch": 0.4721434472902664, "grad_norm": 7.455112919914871, "learning_rate": 1.2684192277552073e-06, "loss": 1.4827, "step": 5286 }, { "epoch": 0.47232208650604024, "grad_norm": 22.836143302185874, "learning_rate": 1.2678184206388393e-06, "loss": -1.2909, "step": 5288 }, { "epoch": 0.47250072572181406, "grad_norm": 7.738069518888441, "learning_rate": 1.2672175093602927e-06, "loss": 0.143, "step": 5290 }, { "epoch": 0.47267936493758794, "grad_norm": 4.0689573173618685, "learning_rate": 1.2666164941532776e-06, "loss": -0.181, "step": 5292 }, { "epoch": 0.47285800415336177, "grad_norm": 5.731398638978685, "learning_rate": 1.2660153752515466e-06, "loss": 0.2389, "step": 5294 }, { "epoch": 0.4730366433691356, "grad_norm": 4.219400087354596, "learning_rate": 1.2654141528888921e-06, "loss": 0.0818, "step": 5296 }, { "epoch": 0.47321528258490947, "grad_norm": 7.134293501693811, "learning_rate": 1.264812827299146e-06, "loss": 0.2632, "step": 5298 }, { "epoch": 0.4733939218006833, "grad_norm": 5.579485046251818, "learning_rate": 1.2642113987161813e-06, "loss": 0.8183, "step": 5300 }, { "epoch": 0.4735725610164571, "grad_norm": 3.4779002155557586, "learning_rate": 1.2636098673739104e-06, "loss": 0.0596, "step": 5302 }, { "epoch": 0.473751200232231, "grad_norm": 5.443798582646391, "learning_rate": 1.2630082335062855e-06, "loss": 0.0376, "step": 5304 }, { "epoch": 0.4739298394480048, "grad_norm": 6.588246729183692, "learning_rate": 1.2624064973473002e-06, "loss": -0.8207, "step": 5306 }, { "epoch": 0.47410847866377864, "grad_norm": 9.179150582702185, "learning_rate": 1.2618046591309856e-06, "loss": 0.7323, "step": 5308 }, { "epoch": 0.4742871178795525, "grad_norm": 10.40553404948446, "learning_rate": 1.2612027190914142e-06, "loss": 0.0574, "step": 5310 }, { "epoch": 0.47446575709532635, "grad_norm": 1.6510501063118121, "learning_rate": 1.2606006774626975e-06, "loss": 0.1272, "step": 5312 }, { "epoch": 0.47464439631110017, "grad_norm": 6.69579848458654, "learning_rate": 1.2599985344789867e-06, "loss": -1.2869, "step": 5314 }, { "epoch": 0.47482303552687405, "grad_norm": 7.283452695425701, "learning_rate": 1.259396290374472e-06, "loss": -0.6389, "step": 5316 }, { "epoch": 0.4750016747426479, "grad_norm": 5.041561388329989, "learning_rate": 1.2587939453833833e-06, "loss": 0.3912, "step": 5318 }, { "epoch": 0.4751803139584217, "grad_norm": 24.378745705788475, "learning_rate": 1.2581914997399896e-06, "loss": -1.4433, "step": 5320 }, { "epoch": 0.4753589531741956, "grad_norm": 8.485145154530771, "learning_rate": 1.2575889536785993e-06, "loss": -1.0535, "step": 5322 }, { "epoch": 0.4755375923899694, "grad_norm": 7.267405433315093, "learning_rate": 1.2569863074335597e-06, "loss": -0.0547, "step": 5324 }, { "epoch": 0.4757162316057433, "grad_norm": 8.328315546600646, "learning_rate": 1.2563835612392567e-06, "loss": 0.0363, "step": 5326 }, { "epoch": 0.4758948708215171, "grad_norm": 15.077227345365198, "learning_rate": 1.2557807153301154e-06, "loss": -0.2978, "step": 5328 }, { "epoch": 0.4760735100372909, "grad_norm": 11.84367656433368, "learning_rate": 1.2551777699406e-06, "loss": -0.5552, "step": 5330 }, { "epoch": 0.4762521492530648, "grad_norm": 32.7865783026664, "learning_rate": 1.2545747253052126e-06, "loss": -0.6607, "step": 5332 }, { "epoch": 0.47643078846883863, "grad_norm": 8.51919005076737, "learning_rate": 1.253971581658495e-06, "loss": -1.008, "step": 5334 }, { "epoch": 0.47660942768461245, "grad_norm": 6.632499624540603, "learning_rate": 1.2533683392350262e-06, "loss": -0.8388, "step": 5336 }, { "epoch": 0.47678806690038633, "grad_norm": 6.104305261420777, "learning_rate": 1.2527649982694248e-06, "loss": -0.8817, "step": 5338 }, { "epoch": 0.47696670611616016, "grad_norm": 4.337378130253322, "learning_rate": 1.252161558996347e-06, "loss": -0.7395, "step": 5340 }, { "epoch": 0.477145345331934, "grad_norm": 12.33005693235348, "learning_rate": 1.2515580216504873e-06, "loss": -0.8209, "step": 5342 }, { "epoch": 0.47732398454770786, "grad_norm": 18.218682159768633, "learning_rate": 1.2509543864665789e-06, "loss": -1.0448, "step": 5344 }, { "epoch": 0.4775026237634817, "grad_norm": 6.466521390613633, "learning_rate": 1.2503506536793922e-06, "loss": -0.5921, "step": 5346 }, { "epoch": 0.4776812629792555, "grad_norm": 6.785512120956851, "learning_rate": 1.249746823523736e-06, "loss": -0.1243, "step": 5348 }, { "epoch": 0.4778599021950294, "grad_norm": 9.69471245813903, "learning_rate": 1.2491428962344574e-06, "loss": -0.4616, "step": 5350 }, { "epoch": 0.4780385414108032, "grad_norm": 14.168174992467714, "learning_rate": 1.2485388720464405e-06, "loss": -1.6097, "step": 5352 }, { "epoch": 0.47821718062657703, "grad_norm": 14.603383317618183, "learning_rate": 1.2479347511946076e-06, "loss": -0.4293, "step": 5354 }, { "epoch": 0.4783958198423509, "grad_norm": 5.02061314185732, "learning_rate": 1.2473305339139185e-06, "loss": 0.2084, "step": 5356 }, { "epoch": 0.47857445905812473, "grad_norm": 4.426898596405991, "learning_rate": 1.24672622043937e-06, "loss": 0.6123, "step": 5358 }, { "epoch": 0.47875309827389856, "grad_norm": 7.620994391250699, "learning_rate": 1.2461218110059973e-06, "loss": -0.6177, "step": 5360 }, { "epoch": 0.47893173748967244, "grad_norm": 4.846742948188045, "learning_rate": 1.245517305848872e-06, "loss": 0.0818, "step": 5362 }, { "epoch": 0.47911037670544626, "grad_norm": 9.132805816935107, "learning_rate": 1.2449127052031034e-06, "loss": 0.3205, "step": 5364 }, { "epoch": 0.4792890159212201, "grad_norm": 2.4551054134384884, "learning_rate": 1.2443080093038384e-06, "loss": -0.1635, "step": 5366 }, { "epoch": 0.47946765513699396, "grad_norm": 6.255950841315418, "learning_rate": 1.2437032183862592e-06, "loss": -0.0662, "step": 5368 }, { "epoch": 0.4796462943527678, "grad_norm": 5.04170836544395, "learning_rate": 1.243098332685587e-06, "loss": 0.7909, "step": 5370 }, { "epoch": 0.4798249335685416, "grad_norm": 4.553280438013058, "learning_rate": 1.242493352437079e-06, "loss": -0.1705, "step": 5372 }, { "epoch": 0.4800035727843155, "grad_norm": 4.3593765313477375, "learning_rate": 1.2418882778760286e-06, "loss": -0.9559, "step": 5374 }, { "epoch": 0.4801822120000893, "grad_norm": 10.743834745861774, "learning_rate": 1.241283109237767e-06, "loss": -1.3546, "step": 5376 }, { "epoch": 0.48036085121586314, "grad_norm": 12.329694331356617, "learning_rate": 1.2406778467576614e-06, "loss": 0.6092, "step": 5378 }, { "epoch": 0.480539490431637, "grad_norm": 8.204159161002577, "learning_rate": 1.2400724906711154e-06, "loss": -0.5278, "step": 5380 }, { "epoch": 0.48071812964741084, "grad_norm": 6.987503887032125, "learning_rate": 1.239467041213569e-06, "loss": -0.3454, "step": 5382 }, { "epoch": 0.48089676886318466, "grad_norm": 4.619936465202187, "learning_rate": 1.238861498620499e-06, "loss": 0.116, "step": 5384 }, { "epoch": 0.48107540807895854, "grad_norm": 4.620438259194819, "learning_rate": 1.2382558631274176e-06, "loss": 0.6812, "step": 5386 }, { "epoch": 0.48125404729473237, "grad_norm": 9.07165906375153, "learning_rate": 1.2376501349698745e-06, "loss": -0.969, "step": 5388 }, { "epoch": 0.4814326865105062, "grad_norm": 5.907649767474921, "learning_rate": 1.237044314383454e-06, "loss": 0.8727, "step": 5390 }, { "epoch": 0.48161132572628007, "grad_norm": 20.24709723118508, "learning_rate": 1.236438401603776e-06, "loss": -0.4015, "step": 5392 }, { "epoch": 0.4817899649420539, "grad_norm": 10.714541484867985, "learning_rate": 1.235832396866499e-06, "loss": -0.4967, "step": 5394 }, { "epoch": 0.4819686041578278, "grad_norm": 11.673803780351477, "learning_rate": 1.2352263004073142e-06, "loss": 0.0105, "step": 5396 }, { "epoch": 0.4821472433736016, "grad_norm": 14.957496952628492, "learning_rate": 1.23462011246195e-06, "loss": -0.1379, "step": 5398 }, { "epoch": 0.4823258825893754, "grad_norm": 3.437617213244861, "learning_rate": 1.2340138332661702e-06, "loss": -0.0458, "step": 5400 }, { "epoch": 0.4825045218051493, "grad_norm": 2.187814744376884, "learning_rate": 1.2334074630557735e-06, "loss": 0.0578, "step": 5402 }, { "epoch": 0.4826831610209231, "grad_norm": 11.793913250931931, "learning_rate": 1.2328010020665955e-06, "loss": -0.9961, "step": 5404 }, { "epoch": 0.48286180023669695, "grad_norm": 6.868493625344387, "learning_rate": 1.232194450534505e-06, "loss": 0.2454, "step": 5406 }, { "epoch": 0.4830404394524708, "grad_norm": 7.377123510823532, "learning_rate": 1.2315878086954075e-06, "loss": -0.1405, "step": 5408 }, { "epoch": 0.48321907866824465, "grad_norm": 7.734653660541961, "learning_rate": 1.2309810767852433e-06, "loss": 1.0518, "step": 5410 }, { "epoch": 0.4833977178840185, "grad_norm": 8.773858377037579, "learning_rate": 1.2303742550399876e-06, "loss": -0.1751, "step": 5412 }, { "epoch": 0.48357635709979235, "grad_norm": 9.88156411952289, "learning_rate": 1.2297673436956505e-06, "loss": -0.0871, "step": 5414 }, { "epoch": 0.4837549963155662, "grad_norm": 7.582984357356675, "learning_rate": 1.2291603429882771e-06, "loss": 0.0423, "step": 5416 }, { "epoch": 0.48393363553134, "grad_norm": 8.019203978344052, "learning_rate": 1.228553253153947e-06, "loss": 0.9609, "step": 5418 }, { "epoch": 0.4841122747471139, "grad_norm": 9.361562228125935, "learning_rate": 1.2279460744287753e-06, "loss": 0.0314, "step": 5420 }, { "epoch": 0.4842909139628877, "grad_norm": 8.205173200398342, "learning_rate": 1.2273388070489102e-06, "loss": -0.4299, "step": 5422 }, { "epoch": 0.4844695531786615, "grad_norm": 12.11526192590169, "learning_rate": 1.2267314512505355e-06, "loss": -0.4304, "step": 5424 }, { "epoch": 0.4846481923944354, "grad_norm": 30.311922111115166, "learning_rate": 1.2261240072698693e-06, "loss": -0.6694, "step": 5426 }, { "epoch": 0.48482683161020923, "grad_norm": 5.994069983699091, "learning_rate": 1.2255164753431639e-06, "loss": -0.1642, "step": 5428 }, { "epoch": 0.48500547082598305, "grad_norm": 6.752151146485131, "learning_rate": 1.2249088557067052e-06, "loss": 0.6578, "step": 5430 }, { "epoch": 0.48518411004175693, "grad_norm": 4.964405007413951, "learning_rate": 1.224301148596814e-06, "loss": -0.4644, "step": 5432 }, { "epoch": 0.48536274925753076, "grad_norm": 11.493262929796634, "learning_rate": 1.223693354249845e-06, "loss": -0.3326, "step": 5434 }, { "epoch": 0.4855413884733046, "grad_norm": 15.52486762882628, "learning_rate": 1.2230854729021858e-06, "loss": -1.4554, "step": 5436 }, { "epoch": 0.48572002768907846, "grad_norm": 5.543980490657064, "learning_rate": 1.2224775047902598e-06, "loss": 0.6611, "step": 5438 }, { "epoch": 0.4858986669048523, "grad_norm": 6.5386097886897945, "learning_rate": 1.2218694501505227e-06, "loss": 0.2563, "step": 5440 }, { "epoch": 0.4860773061206261, "grad_norm": 12.241864675465983, "learning_rate": 1.221261309219464e-06, "loss": 0.8055, "step": 5442 }, { "epoch": 0.4862559453364, "grad_norm": 5.95505602498769, "learning_rate": 1.2206530822336069e-06, "loss": -0.9362, "step": 5444 }, { "epoch": 0.4864345845521738, "grad_norm": 4.894073927719661, "learning_rate": 1.2200447694295082e-06, "loss": -0.1559, "step": 5446 }, { "epoch": 0.48661322376794763, "grad_norm": 7.8751428984740794, "learning_rate": 1.2194363710437584e-06, "loss": 0.196, "step": 5448 }, { "epoch": 0.4867918629837215, "grad_norm": 18.788489953594677, "learning_rate": 1.2188278873129803e-06, "loss": -0.3198, "step": 5450 }, { "epoch": 0.48697050219949534, "grad_norm": 16.0658188296759, "learning_rate": 1.2182193184738307e-06, "loss": 0.0166, "step": 5452 }, { "epoch": 0.48714914141526916, "grad_norm": 10.719728653163799, "learning_rate": 1.2176106647629996e-06, "loss": -0.4236, "step": 5454 }, { "epoch": 0.48732778063104304, "grad_norm": 8.709985846979881, "learning_rate": 1.2170019264172092e-06, "loss": -0.6045, "step": 5456 }, { "epoch": 0.48750641984681686, "grad_norm": 7.070175456199465, "learning_rate": 1.2163931036732152e-06, "loss": -0.8156, "step": 5458 }, { "epoch": 0.48768505906259074, "grad_norm": 24.736165196981197, "learning_rate": 1.2157841967678063e-06, "loss": -0.3516, "step": 5460 }, { "epoch": 0.48786369827836457, "grad_norm": 15.11944124834345, "learning_rate": 1.215175205937803e-06, "loss": -0.1594, "step": 5462 }, { "epoch": 0.4880423374941384, "grad_norm": 5.894925032924408, "learning_rate": 1.2145661314200604e-06, "loss": -0.0132, "step": 5464 }, { "epoch": 0.48822097670991227, "grad_norm": 10.606875410281686, "learning_rate": 1.2139569734514637e-06, "loss": 0.5706, "step": 5466 }, { "epoch": 0.4883996159256861, "grad_norm": 3.9132835846640943, "learning_rate": 1.2133477322689321e-06, "loss": 0.1114, "step": 5468 }, { "epoch": 0.4885782551414599, "grad_norm": 7.435801336104251, "learning_rate": 1.2127384081094166e-06, "loss": 0.3889, "step": 5470 }, { "epoch": 0.4887568943572338, "grad_norm": 10.245646598968609, "learning_rate": 1.212129001209901e-06, "loss": -0.8693, "step": 5472 }, { "epoch": 0.4889355335730076, "grad_norm": 9.439309054158672, "learning_rate": 1.2115195118074006e-06, "loss": -1.0068, "step": 5474 }, { "epoch": 0.48911417278878144, "grad_norm": 12.019807362972534, "learning_rate": 1.2109099401389634e-06, "loss": -0.9961, "step": 5476 }, { "epoch": 0.4892928120045553, "grad_norm": 11.46149533194254, "learning_rate": 1.2103002864416687e-06, "loss": 0.1665, "step": 5478 }, { "epoch": 0.48947145122032915, "grad_norm": 5.169799388188223, "learning_rate": 1.2096905509526286e-06, "loss": 1.0412, "step": 5480 }, { "epoch": 0.48965009043610297, "grad_norm": 7.884904234213301, "learning_rate": 1.209080733908986e-06, "loss": 0.2086, "step": 5482 }, { "epoch": 0.48982872965187685, "grad_norm": 10.447587489491818, "learning_rate": 1.2084708355479165e-06, "loss": -0.6786, "step": 5484 }, { "epoch": 0.49000736886765067, "grad_norm": 7.683501653696283, "learning_rate": 1.2078608561066268e-06, "loss": -0.4677, "step": 5486 }, { "epoch": 0.4901860080834245, "grad_norm": 8.784092344705428, "learning_rate": 1.207250795822355e-06, "loss": -0.6446, "step": 5488 }, { "epoch": 0.4903646472991984, "grad_norm": 8.046944212384386, "learning_rate": 1.2066406549323706e-06, "loss": 0.304, "step": 5490 }, { "epoch": 0.4905432865149722, "grad_norm": 17.841053560220928, "learning_rate": 1.2060304336739758e-06, "loss": 0.553, "step": 5492 }, { "epoch": 0.490721925730746, "grad_norm": 6.597774488745861, "learning_rate": 1.2054201322845017e-06, "loss": 0.403, "step": 5494 }, { "epoch": 0.4909005649465199, "grad_norm": 17.572357560786788, "learning_rate": 1.2048097510013123e-06, "loss": -1.4377, "step": 5496 }, { "epoch": 0.4910792041622937, "grad_norm": 16.933417584867964, "learning_rate": 1.2041992900618023e-06, "loss": -0.7687, "step": 5498 }, { "epoch": 0.49125784337806755, "grad_norm": 9.999983215317945, "learning_rate": 1.2035887497033973e-06, "loss": -0.3928, "step": 5500 }, { "epoch": 0.49143648259384143, "grad_norm": 2.744010297728142, "learning_rate": 1.2029781301635534e-06, "loss": 0.5272, "step": 5502 }, { "epoch": 0.49161512180961525, "grad_norm": 13.169229443136624, "learning_rate": 1.2023674316797582e-06, "loss": -1.0813, "step": 5504 }, { "epoch": 0.4917937610253891, "grad_norm": 2.734340645710528, "learning_rate": 1.2017566544895293e-06, "loss": 0.1476, "step": 5506 }, { "epoch": 0.49197240024116295, "grad_norm": 7.030542770421677, "learning_rate": 1.2011457988304158e-06, "loss": 0.4155, "step": 5508 }, { "epoch": 0.4921510394569368, "grad_norm": 7.284552746031618, "learning_rate": 1.2005348649399965e-06, "loss": 0.7304, "step": 5510 }, { "epoch": 0.4923296786727106, "grad_norm": 8.175629141860238, "learning_rate": 1.1999238530558808e-06, "loss": -0.2691, "step": 5512 }, { "epoch": 0.4925083178884845, "grad_norm": 5.621915352948474, "learning_rate": 1.1993127634157087e-06, "loss": -0.3643, "step": 5514 }, { "epoch": 0.4926869571042583, "grad_norm": 6.735012511062613, "learning_rate": 1.1987015962571507e-06, "loss": 0.3358, "step": 5516 }, { "epoch": 0.49286559632003213, "grad_norm": 10.374877193597356, "learning_rate": 1.198090351817906e-06, "loss": 0.366, "step": 5518 }, { "epoch": 0.493044235535806, "grad_norm": 5.6762291875246405, "learning_rate": 1.1974790303357058e-06, "loss": 0.2495, "step": 5520 }, { "epoch": 0.49322287475157983, "grad_norm": 4.6886188443379, "learning_rate": 1.1968676320483101e-06, "loss": -0.4992, "step": 5522 }, { "epoch": 0.4934015139673537, "grad_norm": 8.0930712794572, "learning_rate": 1.196256157193509e-06, "loss": -0.714, "step": 5524 }, { "epoch": 0.49358015318312753, "grad_norm": 4.5167330006212865, "learning_rate": 1.1956446060091226e-06, "loss": -1.6811, "step": 5526 }, { "epoch": 0.49375879239890136, "grad_norm": 5.721108205720506, "learning_rate": 1.195032978733e-06, "loss": -0.5588, "step": 5528 }, { "epoch": 0.49393743161467524, "grad_norm": 6.578412047441082, "learning_rate": 1.1944212756030207e-06, "loss": -0.1443, "step": 5530 }, { "epoch": 0.49411607083044906, "grad_norm": 7.947216904725938, "learning_rate": 1.1938094968570938e-06, "loss": -0.4744, "step": 5532 }, { "epoch": 0.4942947100462229, "grad_norm": 4.650090181850328, "learning_rate": 1.1931976427331566e-06, "loss": 0.2584, "step": 5534 }, { "epoch": 0.49447334926199676, "grad_norm": 10.362991185160341, "learning_rate": 1.1925857134691772e-06, "loss": 0.1772, "step": 5536 }, { "epoch": 0.4946519884777706, "grad_norm": 15.225306295185872, "learning_rate": 1.1919737093031516e-06, "loss": 0.0421, "step": 5538 }, { "epoch": 0.4948306276935444, "grad_norm": 10.234268024846454, "learning_rate": 1.1913616304731063e-06, "loss": -1.1824, "step": 5540 }, { "epoch": 0.4950092669093183, "grad_norm": 5.664246696897174, "learning_rate": 1.190749477217096e-06, "loss": -0.5548, "step": 5542 }, { "epoch": 0.4951879061250921, "grad_norm": 6.829787691315687, "learning_rate": 1.1901372497732036e-06, "loss": 0.0594, "step": 5544 }, { "epoch": 0.49536654534086594, "grad_norm": 5.321182874382085, "learning_rate": 1.1895249483795428e-06, "loss": -0.2498, "step": 5546 }, { "epoch": 0.4955451845566398, "grad_norm": 5.644124390613547, "learning_rate": 1.1889125732742546e-06, "loss": -0.0534, "step": 5548 }, { "epoch": 0.49572382377241364, "grad_norm": 1.9512768966316625, "learning_rate": 1.1883001246955086e-06, "loss": 0.7929, "step": 5550 }, { "epoch": 0.49590246298818746, "grad_norm": 9.30181189493792, "learning_rate": 1.1876876028815042e-06, "loss": 0.4264, "step": 5552 }, { "epoch": 0.49608110220396134, "grad_norm": 3.0220280005964804, "learning_rate": 1.1870750080704679e-06, "loss": 0.6916, "step": 5554 }, { "epoch": 0.49625974141973517, "grad_norm": 12.539983659700153, "learning_rate": 1.1864623405006553e-06, "loss": -0.3957, "step": 5556 }, { "epoch": 0.496438380635509, "grad_norm": 7.1402293957529555, "learning_rate": 1.1858496004103507e-06, "loss": -0.7162, "step": 5558 }, { "epoch": 0.49661701985128287, "grad_norm": 6.138090819222969, "learning_rate": 1.1852367880378653e-06, "loss": 0.6183, "step": 5560 }, { "epoch": 0.4967956590670567, "grad_norm": 6.649644323029822, "learning_rate": 1.1846239036215395e-06, "loss": -0.663, "step": 5562 }, { "epoch": 0.4969742982828305, "grad_norm": 5.742843486658492, "learning_rate": 1.1840109473997418e-06, "loss": -0.1874, "step": 5564 }, { "epoch": 0.4971529374986044, "grad_norm": 8.283796962399418, "learning_rate": 1.1833979196108678e-06, "loss": 0.6628, "step": 5566 }, { "epoch": 0.4973315767143782, "grad_norm": 10.867171703686399, "learning_rate": 1.1827848204933418e-06, "loss": -0.1479, "step": 5568 }, { "epoch": 0.49751021593015204, "grad_norm": 3.6840356620363046, "learning_rate": 1.1821716502856152e-06, "loss": -1.1402, "step": 5570 }, { "epoch": 0.4976888551459259, "grad_norm": 6.451390067836004, "learning_rate": 1.181558409226167e-06, "loss": -0.5757, "step": 5572 }, { "epoch": 0.49786749436169975, "grad_norm": 4.262955161932446, "learning_rate": 1.1809450975535047e-06, "loss": -0.1431, "step": 5574 }, { "epoch": 0.49804613357747357, "grad_norm": 9.69614423431785, "learning_rate": 1.1803317155061622e-06, "loss": 0.0945, "step": 5576 }, { "epoch": 0.49822477279324745, "grad_norm": 7.6952359113415865, "learning_rate": 1.1797182633227007e-06, "loss": 0.2113, "step": 5578 }, { "epoch": 0.4984034120090213, "grad_norm": 9.288970099808521, "learning_rate": 1.1791047412417104e-06, "loss": 0.1913, "step": 5580 }, { "epoch": 0.4985820512247951, "grad_norm": 12.167336084074135, "learning_rate": 1.1784911495018065e-06, "loss": -0.1438, "step": 5582 }, { "epoch": 0.498760690440569, "grad_norm": 5.711622121455584, "learning_rate": 1.1778774883416322e-06, "loss": 0.5956, "step": 5584 }, { "epoch": 0.4989393296563428, "grad_norm": 9.931443390064638, "learning_rate": 1.1772637579998583e-06, "loss": -0.6094, "step": 5586 }, { "epoch": 0.4991179688721166, "grad_norm": 7.187238605555917, "learning_rate": 1.1766499587151813e-06, "loss": -0.9557, "step": 5588 }, { "epoch": 0.4992966080878905, "grad_norm": 12.479914282479943, "learning_rate": 1.1760360907263256e-06, "loss": -1.5736, "step": 5590 }, { "epoch": 0.4994752473036643, "grad_norm": 19.07173268344615, "learning_rate": 1.1754221542720419e-06, "loss": -2.5332, "step": 5592 }, { "epoch": 0.4996538865194382, "grad_norm": 13.479147736635825, "learning_rate": 1.1748081495911068e-06, "loss": -1.2793, "step": 5594 }, { "epoch": 0.49983252573521203, "grad_norm": 8.021349552601453, "learning_rate": 1.1741940769223253e-06, "loss": -0.3481, "step": 5596 }, { "epoch": 0.5000111649509859, "grad_norm": 7.513591657844475, "learning_rate": 1.1735799365045264e-06, "loss": -1.591, "step": 5598 }, { "epoch": 0.5001898041667597, "grad_norm": 3.3882207124916786, "learning_rate": 1.1729657285765677e-06, "loss": 0.8201, "step": 5600 }, { "epoch": 0.5003684433825335, "grad_norm": 2.4605759127959934, "learning_rate": 1.172351453377332e-06, "loss": -0.7679, "step": 5602 }, { "epoch": 0.5005470825983074, "grad_norm": 5.945238070329112, "learning_rate": 1.1717371111457276e-06, "loss": -0.143, "step": 5604 }, { "epoch": 0.5007257218140813, "grad_norm": 6.1890451062657945, "learning_rate": 1.1711227021206904e-06, "loss": -0.6198, "step": 5606 }, { "epoch": 0.500904361029855, "grad_norm": 8.76761314978246, "learning_rate": 1.170508226541181e-06, "loss": -0.6795, "step": 5608 }, { "epoch": 0.5010830002456289, "grad_norm": 12.615673888506029, "learning_rate": 1.1698936846461866e-06, "loss": -1.3061, "step": 5610 }, { "epoch": 0.5012616394614028, "grad_norm": 9.352317938889701, "learning_rate": 1.1692790766747203e-06, "loss": -0.3704, "step": 5612 }, { "epoch": 0.5014402786771766, "grad_norm": 8.474138298226016, "learning_rate": 1.1686644028658199e-06, "loss": 0.1583, "step": 5614 }, { "epoch": 0.5016189178929504, "grad_norm": 8.426551327378789, "learning_rate": 1.1680496634585497e-06, "loss": -0.4084, "step": 5616 }, { "epoch": 0.5017975571087243, "grad_norm": 18.18620877745732, "learning_rate": 1.1674348586919995e-06, "loss": -0.8414, "step": 5618 }, { "epoch": 0.5019761963244981, "grad_norm": 6.7981472183990945, "learning_rate": 1.1668199888052843e-06, "loss": 0.4086, "step": 5620 }, { "epoch": 0.502154835540272, "grad_norm": 7.8818876633207, "learning_rate": 1.1662050540375446e-06, "loss": -0.759, "step": 5622 }, { "epoch": 0.5023334747560458, "grad_norm": 6.649446117308734, "learning_rate": 1.1655900546279452e-06, "loss": -0.1688, "step": 5624 }, { "epoch": 0.5025121139718197, "grad_norm": 9.53104307856844, "learning_rate": 1.1649749908156775e-06, "loss": -0.3048, "step": 5626 }, { "epoch": 0.5026907531875935, "grad_norm": 8.73128098500788, "learning_rate": 1.164359862839957e-06, "loss": -0.3941, "step": 5628 }, { "epoch": 0.5028693924033674, "grad_norm": 6.041596861688155, "learning_rate": 1.1637446709400247e-06, "loss": -0.493, "step": 5630 }, { "epoch": 0.5030480316191412, "grad_norm": 16.548456420829993, "learning_rate": 1.1631294153551457e-06, "loss": -0.1602, "step": 5632 }, { "epoch": 0.503226670834915, "grad_norm": 9.861307816192694, "learning_rate": 1.162514096324611e-06, "loss": -0.7075, "step": 5634 }, { "epoch": 0.5034053100506889, "grad_norm": 5.286647769434003, "learning_rate": 1.1618987140877349e-06, "loss": 0.2225, "step": 5636 }, { "epoch": 0.5035839492664628, "grad_norm": 18.062533791058357, "learning_rate": 1.1612832688838574e-06, "loss": -0.6212, "step": 5638 }, { "epoch": 0.5037625884822365, "grad_norm": 5.898678402054916, "learning_rate": 1.1606677609523428e-06, "loss": 0.8205, "step": 5640 }, { "epoch": 0.5039412276980104, "grad_norm": 10.482239370674622, "learning_rate": 1.160052190532579e-06, "loss": -1.0542, "step": 5642 }, { "epoch": 0.5041198669137843, "grad_norm": 4.593523292396912, "learning_rate": 1.1594365578639791e-06, "loss": -0.0624, "step": 5644 }, { "epoch": 0.5042985061295581, "grad_norm": 11.915108176447468, "learning_rate": 1.1588208631859807e-06, "loss": 0.0587, "step": 5646 }, { "epoch": 0.504477145345332, "grad_norm": 30.922146403242994, "learning_rate": 1.1582051067380434e-06, "loss": 0.1451, "step": 5648 }, { "epoch": 0.5046557845611058, "grad_norm": 4.917330530036188, "learning_rate": 1.1575892887596541e-06, "loss": -0.5437, "step": 5650 }, { "epoch": 0.5048344237768796, "grad_norm": 18.379117543285965, "learning_rate": 1.1569734094903208e-06, "loss": -0.7647, "step": 5652 }, { "epoch": 0.5050130629926535, "grad_norm": 6.2052662418368545, "learning_rate": 1.1563574691695765e-06, "loss": -0.5959, "step": 5654 }, { "epoch": 0.5051917022084274, "grad_norm": 15.955452893941345, "learning_rate": 1.1557414680369784e-06, "loss": -0.8889, "step": 5656 }, { "epoch": 0.5053703414242011, "grad_norm": 8.268109415143298, "learning_rate": 1.1551254063321064e-06, "loss": 0.3398, "step": 5658 }, { "epoch": 0.505548980639975, "grad_norm": 4.611469581693114, "learning_rate": 1.154509284294564e-06, "loss": -0.4933, "step": 5660 }, { "epoch": 0.5057276198557489, "grad_norm": 11.135554075805807, "learning_rate": 1.1538931021639793e-06, "loss": -0.9929, "step": 5662 }, { "epoch": 0.5059062590715226, "grad_norm": 10.897635919222559, "learning_rate": 1.1532768601800025e-06, "loss": -0.0912, "step": 5664 }, { "epoch": 0.5060848982872965, "grad_norm": 8.414664555990557, "learning_rate": 1.1526605585823081e-06, "loss": 0.135, "step": 5666 }, { "epoch": 0.5062635375030704, "grad_norm": 9.993314033813705, "learning_rate": 1.1520441976105927e-06, "loss": 0.0661, "step": 5668 }, { "epoch": 0.5064421767188442, "grad_norm": 3.3360924427936998, "learning_rate": 1.1514277775045766e-06, "loss": -0.2382, "step": 5670 }, { "epoch": 0.506620815934618, "grad_norm": 11.543743364448892, "learning_rate": 1.1508112985040034e-06, "loss": -1.5625, "step": 5672 }, { "epoch": 0.5067994551503919, "grad_norm": 7.809801047473812, "learning_rate": 1.1501947608486394e-06, "loss": -0.0876, "step": 5674 }, { "epoch": 0.5069780943661657, "grad_norm": 17.517164259696028, "learning_rate": 1.1495781647782729e-06, "loss": -0.6098, "step": 5676 }, { "epoch": 0.5071567335819396, "grad_norm": 11.260408122117962, "learning_rate": 1.148961510532716e-06, "loss": 0.6069, "step": 5678 }, { "epoch": 0.5073353727977135, "grad_norm": 7.067458311159711, "learning_rate": 1.148344798351803e-06, "loss": -0.9082, "step": 5680 }, { "epoch": 0.5075140120134872, "grad_norm": 6.845342581447115, "learning_rate": 1.147728028475391e-06, "loss": 0.0222, "step": 5682 }, { "epoch": 0.5076926512292611, "grad_norm": 4.0958793867800605, "learning_rate": 1.147111201143359e-06, "loss": -0.2905, "step": 5684 }, { "epoch": 0.507871290445035, "grad_norm": 14.50407405050388, "learning_rate": 1.1464943165956087e-06, "loss": -0.764, "step": 5686 }, { "epoch": 0.5080499296608088, "grad_norm": 5.249785645968156, "learning_rate": 1.1458773750720638e-06, "loss": 0.5924, "step": 5688 }, { "epoch": 0.5082285688765826, "grad_norm": 7.121579871898241, "learning_rate": 1.145260376812671e-06, "loss": -0.7333, "step": 5690 }, { "epoch": 0.5084072080923565, "grad_norm": 10.73961048634621, "learning_rate": 1.1446433220573975e-06, "loss": 0.5993, "step": 5692 }, { "epoch": 0.5085858473081303, "grad_norm": 4.026184211105822, "learning_rate": 1.144026211046234e-06, "loss": -0.348, "step": 5694 }, { "epoch": 0.5087644865239042, "grad_norm": 8.533021847682528, "learning_rate": 1.1434090440191926e-06, "loss": -0.799, "step": 5696 }, { "epoch": 0.508943125739678, "grad_norm": 9.92613174558267, "learning_rate": 1.1427918212163066e-06, "loss": -0.5749, "step": 5698 }, { "epoch": 0.5091217649554518, "grad_norm": 8.475107882102813, "learning_rate": 1.1421745428776318e-06, "loss": -0.7956, "step": 5700 }, { "epoch": 0.5093004041712257, "grad_norm": 8.478157252058079, "learning_rate": 1.141557209243245e-06, "loss": -0.6589, "step": 5702 }, { "epoch": 0.5094790433869996, "grad_norm": 6.235054763027284, "learning_rate": 1.140939820553245e-06, "loss": 0.7095, "step": 5704 }, { "epoch": 0.5096576826027733, "grad_norm": 7.424810788604787, "learning_rate": 1.1403223770477516e-06, "loss": -0.2639, "step": 5706 }, { "epoch": 0.5098363218185472, "grad_norm": 17.433323732644546, "learning_rate": 1.1397048789669059e-06, "loss": -0.6789, "step": 5708 }, { "epoch": 0.5100149610343211, "grad_norm": 14.271041593865114, "learning_rate": 1.139087326550871e-06, "loss": -0.5695, "step": 5710 }, { "epoch": 0.5101936002500949, "grad_norm": 7.2293599420778, "learning_rate": 1.13846972003983e-06, "loss": 0.0085, "step": 5712 }, { "epoch": 0.5103722394658687, "grad_norm": 4.239577584727496, "learning_rate": 1.1378520596739877e-06, "loss": -1.3724, "step": 5714 }, { "epoch": 0.5105508786816426, "grad_norm": 13.016661310785016, "learning_rate": 1.1372343456935701e-06, "loss": -0.0117, "step": 5716 }, { "epoch": 0.5107295178974164, "grad_norm": 4.996491918139877, "learning_rate": 1.1366165783388234e-06, "loss": 0.1514, "step": 5718 }, { "epoch": 0.5109081571131903, "grad_norm": 3.7438013343921557, "learning_rate": 1.1359987578500148e-06, "loss": 0.1941, "step": 5720 }, { "epoch": 0.5110867963289641, "grad_norm": 4.780862231518266, "learning_rate": 1.135380884467432e-06, "loss": -0.4425, "step": 5722 }, { "epoch": 0.5112654355447379, "grad_norm": 5.098666012898274, "learning_rate": 1.1347629584313837e-06, "loss": 0.6009, "step": 5724 }, { "epoch": 0.5114440747605118, "grad_norm": 4.805160275901274, "learning_rate": 1.1341449799821994e-06, "loss": -0.0764, "step": 5726 }, { "epoch": 0.5116227139762857, "grad_norm": 12.774462998781067, "learning_rate": 1.1335269493602277e-06, "loss": -1.2318, "step": 5728 }, { "epoch": 0.5118013531920594, "grad_norm": 16.82414846093397, "learning_rate": 1.1329088668058385e-06, "loss": -0.62, "step": 5730 }, { "epoch": 0.5119799924078333, "grad_norm": 6.079714530467089, "learning_rate": 1.1322907325594216e-06, "loss": -0.4139, "step": 5732 }, { "epoch": 0.5121586316236072, "grad_norm": 6.256583751575497, "learning_rate": 1.131672546861387e-06, "loss": 0.2825, "step": 5734 }, { "epoch": 0.512337270839381, "grad_norm": 7.811270655230657, "learning_rate": 1.1310543099521644e-06, "loss": -0.587, "step": 5736 }, { "epoch": 0.5125159100551548, "grad_norm": 8.445367154801177, "learning_rate": 1.1304360220722043e-06, "loss": 0.0306, "step": 5738 }, { "epoch": 0.5126945492709287, "grad_norm": 6.536858737632451, "learning_rate": 1.1298176834619754e-06, "loss": -0.0726, "step": 5740 }, { "epoch": 0.5128731884867025, "grad_norm": 11.141847348133144, "learning_rate": 1.1291992943619682e-06, "loss": -0.5325, "step": 5742 }, { "epoch": 0.5130518277024764, "grad_norm": 3.970021922629913, "learning_rate": 1.128580855012691e-06, "loss": -0.2046, "step": 5744 }, { "epoch": 0.5132304669182502, "grad_norm": 6.033610934339603, "learning_rate": 1.1279623656546726e-06, "loss": -1.3875, "step": 5746 }, { "epoch": 0.513409106134024, "grad_norm": 6.393619348826261, "learning_rate": 1.1273438265284615e-06, "loss": -0.8687, "step": 5748 }, { "epoch": 0.5135877453497979, "grad_norm": 7.444330236719744, "learning_rate": 1.1267252378746243e-06, "loss": 0.1954, "step": 5750 }, { "epoch": 0.5137663845655718, "grad_norm": 3.2744466823792733, "learning_rate": 1.126106599933748e-06, "loss": 0.8458, "step": 5752 }, { "epoch": 0.5139450237813455, "grad_norm": 6.171901577276637, "learning_rate": 1.1254879129464389e-06, "loss": 0.2144, "step": 5754 }, { "epoch": 0.5141236629971194, "grad_norm": 6.534686630896211, "learning_rate": 1.1248691771533213e-06, "loss": -2.5382, "step": 5756 }, { "epoch": 0.5143023022128933, "grad_norm": 8.200359932047904, "learning_rate": 1.1242503927950395e-06, "loss": 0.1226, "step": 5758 }, { "epoch": 0.5144809414286672, "grad_norm": 5.808665661857311, "learning_rate": 1.1236315601122559e-06, "loss": 0.25, "step": 5760 }, { "epoch": 0.514659580644441, "grad_norm": 4.832829525877805, "learning_rate": 1.1230126793456526e-06, "loss": 0.6086, "step": 5762 }, { "epoch": 0.5148382198602148, "grad_norm": 7.8717615644119405, "learning_rate": 1.1223937507359296e-06, "loss": -0.4819, "step": 5764 }, { "epoch": 0.5150168590759887, "grad_norm": 6.630150861908057, "learning_rate": 1.1217747745238056e-06, "loss": -0.0071, "step": 5766 }, { "epoch": 0.5151954982917625, "grad_norm": 13.10536105115956, "learning_rate": 1.121155750950018e-06, "loss": 0.3575, "step": 5768 }, { "epoch": 0.5153741375075364, "grad_norm": 6.489577814368222, "learning_rate": 1.1205366802553228e-06, "loss": 0.2158, "step": 5770 }, { "epoch": 0.5155527767233102, "grad_norm": 11.207716762974629, "learning_rate": 1.1199175626804947e-06, "loss": 0.6363, "step": 5772 }, { "epoch": 0.515731415939084, "grad_norm": 6.38695138863382, "learning_rate": 1.119298398466325e-06, "loss": 1.1409, "step": 5774 }, { "epoch": 0.5159100551548579, "grad_norm": 7.580094732331717, "learning_rate": 1.118679187853625e-06, "loss": -0.9819, "step": 5776 }, { "epoch": 0.5160886943706318, "grad_norm": 9.338387982532353, "learning_rate": 1.1180599310832228e-06, "loss": -0.2646, "step": 5778 }, { "epoch": 0.5162673335864055, "grad_norm": 6.290676350580588, "learning_rate": 1.1174406283959653e-06, "loss": -0.0812, "step": 5780 }, { "epoch": 0.5164459728021794, "grad_norm": 7.443697102676632, "learning_rate": 1.116821280032717e-06, "loss": 0.5672, "step": 5782 }, { "epoch": 0.5166246120179533, "grad_norm": 14.30826902528744, "learning_rate": 1.1162018862343595e-06, "loss": -1.0527, "step": 5784 }, { "epoch": 0.5168032512337271, "grad_norm": 7.819107557334409, "learning_rate": 1.1155824472417929e-06, "loss": -1.0621, "step": 5786 }, { "epoch": 0.5169818904495009, "grad_norm": 11.452887851398824, "learning_rate": 1.1149629632959348e-06, "loss": 0.5573, "step": 5788 }, { "epoch": 0.5171605296652748, "grad_norm": 6.889224232267443, "learning_rate": 1.1143434346377191e-06, "loss": 0.7465, "step": 5790 }, { "epoch": 0.5173391688810486, "grad_norm": 9.74350933294025, "learning_rate": 1.1137238615080998e-06, "loss": 0.2427, "step": 5792 }, { "epoch": 0.5175178080968225, "grad_norm": 22.37822474252489, "learning_rate": 1.1131042441480452e-06, "loss": -1.1507, "step": 5794 }, { "epoch": 0.5176964473125963, "grad_norm": 5.22446626556338, "learning_rate": 1.112484582798542e-06, "loss": -0.8419, "step": 5796 }, { "epoch": 0.5178750865283701, "grad_norm": 5.195193732488791, "learning_rate": 1.1118648777005946e-06, "loss": -0.5398, "step": 5798 }, { "epoch": 0.518053725744144, "grad_norm": 7.17673781347142, "learning_rate": 1.1112451290952236e-06, "loss": 0.0957, "step": 5800 }, { "epoch": 0.5182323649599179, "grad_norm": 10.068172775004596, "learning_rate": 1.110625337223467e-06, "loss": -0.2678, "step": 5802 }, { "epoch": 0.5184110041756916, "grad_norm": 19.54084295666467, "learning_rate": 1.1100055023263796e-06, "loss": -2.2106, "step": 5804 }, { "epoch": 0.5185896433914655, "grad_norm": 4.759948551637233, "learning_rate": 1.109385624645032e-06, "loss": 0.5245, "step": 5806 }, { "epoch": 0.5187682826072394, "grad_norm": 5.383203854888475, "learning_rate": 1.1087657044205133e-06, "loss": -0.0336, "step": 5808 }, { "epoch": 0.5189469218230132, "grad_norm": 3.236197727410363, "learning_rate": 1.108145741893927e-06, "loss": 0.4418, "step": 5810 }, { "epoch": 0.519125561038787, "grad_norm": 6.334640501882165, "learning_rate": 1.107525737306395e-06, "loss": -0.8603, "step": 5812 }, { "epoch": 0.5193042002545609, "grad_norm": 7.098289079434889, "learning_rate": 1.1069056908990542e-06, "loss": 0.4913, "step": 5814 }, { "epoch": 0.5194828394703347, "grad_norm": 18.03917923034991, "learning_rate": 1.106285602913058e-06, "loss": -0.0068, "step": 5816 }, { "epoch": 0.5196614786861086, "grad_norm": 7.982516973986979, "learning_rate": 1.1056654735895765e-06, "loss": -0.6284, "step": 5818 }, { "epoch": 0.5198401179018824, "grad_norm": 9.565899811778523, "learning_rate": 1.1050453031697957e-06, "loss": -0.2005, "step": 5820 }, { "epoch": 0.5200187571176562, "grad_norm": 5.6373437018201376, "learning_rate": 1.1044250918949174e-06, "loss": 0.9147, "step": 5822 }, { "epoch": 0.5201973963334301, "grad_norm": 30.697849618359367, "learning_rate": 1.1038048400061593e-06, "loss": -0.5813, "step": 5824 }, { "epoch": 0.520376035549204, "grad_norm": 5.877947777972037, "learning_rate": 1.1031845477447552e-06, "loss": -1.0366, "step": 5826 }, { "epoch": 0.5205546747649777, "grad_norm": 21.624546994442305, "learning_rate": 1.1025642153519536e-06, "loss": 0.3273, "step": 5828 }, { "epoch": 0.5207333139807516, "grad_norm": 17.081562725823712, "learning_rate": 1.1019438430690202e-06, "loss": -2.0188, "step": 5830 }, { "epoch": 0.5209119531965255, "grad_norm": 5.746846536770188, "learning_rate": 1.1013234311372353e-06, "loss": -0.2306, "step": 5832 }, { "epoch": 0.5210905924122993, "grad_norm": 15.724070685601145, "learning_rate": 1.1007029797978938e-06, "loss": -1.4768, "step": 5834 }, { "epoch": 0.5212692316280731, "grad_norm": 8.689416029154915, "learning_rate": 1.1000824892923076e-06, "loss": 0.6638, "step": 5836 }, { "epoch": 0.521447870843847, "grad_norm": 15.779734321552725, "learning_rate": 1.0994619598618033e-06, "loss": -1.0282, "step": 5838 }, { "epoch": 0.5216265100596208, "grad_norm": 15.947959668952612, "learning_rate": 1.0988413917477218e-06, "loss": 0.1946, "step": 5840 }, { "epoch": 0.5218051492753947, "grad_norm": 4.79724477529363, "learning_rate": 1.0982207851914197e-06, "loss": -0.0069, "step": 5842 }, { "epoch": 0.5219837884911686, "grad_norm": 16.253898270395783, "learning_rate": 1.0976001404342689e-06, "loss": -1.1628, "step": 5844 }, { "epoch": 0.5221624277069423, "grad_norm": 10.134098062604313, "learning_rate": 1.0969794577176552e-06, "loss": 0.3788, "step": 5846 }, { "epoch": 0.5223410669227162, "grad_norm": 4.300256792429373, "learning_rate": 1.0963587372829803e-06, "loss": -0.9688, "step": 5848 }, { "epoch": 0.5225197061384901, "grad_norm": 7.104182835296697, "learning_rate": 1.0957379793716594e-06, "loss": -1.3756, "step": 5850 }, { "epoch": 0.5226983453542638, "grad_norm": 12.016634537807763, "learning_rate": 1.0951171842251235e-06, "loss": 0.0719, "step": 5852 }, { "epoch": 0.5228769845700377, "grad_norm": 11.369848405268606, "learning_rate": 1.0944963520848168e-06, "loss": -0.4047, "step": 5854 }, { "epoch": 0.5230556237858116, "grad_norm": 5.240735735601924, "learning_rate": 1.0938754831921987e-06, "loss": 0.3001, "step": 5856 }, { "epoch": 0.5232342630015854, "grad_norm": 4.229659604564343, "learning_rate": 1.093254577788743e-06, "loss": -0.0709, "step": 5858 }, { "epoch": 0.5234129022173593, "grad_norm": 5.563473734119213, "learning_rate": 1.0926336361159372e-06, "loss": 0.4008, "step": 5860 }, { "epoch": 0.5235915414331331, "grad_norm": 15.486942821267816, "learning_rate": 1.0920126584152832e-06, "loss": -0.2284, "step": 5862 }, { "epoch": 0.5237701806489069, "grad_norm": 6.773314724593745, "learning_rate": 1.0913916449282967e-06, "loss": 0.0912, "step": 5864 }, { "epoch": 0.5239488198646808, "grad_norm": 4.782359474458642, "learning_rate": 1.0907705958965075e-06, "loss": -0.0111, "step": 5866 }, { "epoch": 0.5241274590804547, "grad_norm": 5.048148641111788, "learning_rate": 1.0901495115614597e-06, "loss": 0.3636, "step": 5868 }, { "epoch": 0.5243060982962284, "grad_norm": 4.382261952247404, "learning_rate": 1.0895283921647096e-06, "loss": 0.3686, "step": 5870 }, { "epoch": 0.5244847375120023, "grad_norm": 8.562865757613519, "learning_rate": 1.0889072379478288e-06, "loss": 0.4472, "step": 5872 }, { "epoch": 0.5246633767277762, "grad_norm": 9.59775084533155, "learning_rate": 1.088286049152402e-06, "loss": -0.3687, "step": 5874 }, { "epoch": 0.52484201594355, "grad_norm": 9.281273016596039, "learning_rate": 1.0876648260200267e-06, "loss": 0.3467, "step": 5876 }, { "epoch": 0.5250206551593238, "grad_norm": 5.732214580472545, "learning_rate": 1.0870435687923142e-06, "loss": -0.7673, "step": 5878 }, { "epoch": 0.5251992943750977, "grad_norm": 4.455853399361514, "learning_rate": 1.0864222777108892e-06, "loss": -0.7769, "step": 5880 }, { "epoch": 0.5253779335908715, "grad_norm": 6.938862434841062, "learning_rate": 1.0858009530173896e-06, "loss": -0.2505, "step": 5882 }, { "epoch": 0.5255565728066454, "grad_norm": 6.409896185119062, "learning_rate": 1.085179594953466e-06, "loss": 0.8227, "step": 5884 }, { "epoch": 0.5257352120224192, "grad_norm": 11.328591719911408, "learning_rate": 1.0845582037607822e-06, "loss": -0.3397, "step": 5886 }, { "epoch": 0.5259138512381931, "grad_norm": 10.6672896759562, "learning_rate": 1.0839367796810143e-06, "loss": -1.5096, "step": 5888 }, { "epoch": 0.5260924904539669, "grad_norm": 7.879158254289767, "learning_rate": 1.083315322955853e-06, "loss": 0.5556, "step": 5890 }, { "epoch": 0.5262711296697408, "grad_norm": 10.49742894256955, "learning_rate": 1.0826938338269996e-06, "loss": -0.213, "step": 5892 }, { "epoch": 0.5264497688855146, "grad_norm": 7.373271416282859, "learning_rate": 1.0820723125361684e-06, "loss": 0.5867, "step": 5894 }, { "epoch": 0.5266284081012884, "grad_norm": 8.236290377773948, "learning_rate": 1.0814507593250878e-06, "loss": 0.1529, "step": 5896 }, { "epoch": 0.5268070473170623, "grad_norm": 2.3963705593897946, "learning_rate": 1.0808291744354967e-06, "loss": 0.6835, "step": 5898 }, { "epoch": 0.5269856865328362, "grad_norm": 17.780157555138448, "learning_rate": 1.0802075581091471e-06, "loss": -0.609, "step": 5900 }, { "epoch": 0.5271643257486099, "grad_norm": 10.480113861979678, "learning_rate": 1.079585910587804e-06, "loss": 0.1963, "step": 5902 }, { "epoch": 0.5273429649643838, "grad_norm": 8.526583223071299, "learning_rate": 1.0789642321132426e-06, "loss": 0.9555, "step": 5904 }, { "epoch": 0.5275216041801577, "grad_norm": 16.928109243750743, "learning_rate": 1.0783425229272523e-06, "loss": 0.5285, "step": 5906 }, { "epoch": 0.5277002433959315, "grad_norm": 7.737782846929117, "learning_rate": 1.0777207832716326e-06, "loss": -0.9262, "step": 5908 }, { "epoch": 0.5278788826117053, "grad_norm": 10.894191601240141, "learning_rate": 1.0770990133881964e-06, "loss": -0.0932, "step": 5910 }, { "epoch": 0.5280575218274792, "grad_norm": 12.294774652861614, "learning_rate": 1.0764772135187673e-06, "loss": -0.0644, "step": 5912 }, { "epoch": 0.528236161043253, "grad_norm": 5.7094036500375855, "learning_rate": 1.0758553839051805e-06, "loss": 1.0548, "step": 5914 }, { "epoch": 0.5284148002590269, "grad_norm": 9.694293390485988, "learning_rate": 1.0752335247892839e-06, "loss": -0.3869, "step": 5916 }, { "epoch": 0.5285934394748008, "grad_norm": 9.647390900346167, "learning_rate": 1.0746116364129359e-06, "loss": -1.4421, "step": 5918 }, { "epoch": 0.5287720786905745, "grad_norm": 5.370297703793353, "learning_rate": 1.0739897190180065e-06, "loss": 0.1992, "step": 5920 }, { "epoch": 0.5289507179063484, "grad_norm": 13.997209543333986, "learning_rate": 1.0733677728463769e-06, "loss": -0.3503, "step": 5922 }, { "epoch": 0.5291293571221223, "grad_norm": 9.540628390619279, "learning_rate": 1.0727457981399394e-06, "loss": 0.5026, "step": 5924 }, { "epoch": 0.529307996337896, "grad_norm": 3.884290939489484, "learning_rate": 1.072123795140598e-06, "loss": 0.5623, "step": 5926 }, { "epoch": 0.5294866355536699, "grad_norm": 7.52382309540484, "learning_rate": 1.0715017640902675e-06, "loss": -0.5125, "step": 5928 }, { "epoch": 0.5296652747694438, "grad_norm": 11.27932274753526, "learning_rate": 1.070879705230873e-06, "loss": -0.8539, "step": 5930 }, { "epoch": 0.5298439139852176, "grad_norm": 7.91220797079706, "learning_rate": 1.070257618804351e-06, "loss": 0.1745, "step": 5932 }, { "epoch": 0.5300225532009915, "grad_norm": 7.1853761935253235, "learning_rate": 1.0696355050526482e-06, "loss": 0.4203, "step": 5934 }, { "epoch": 0.5302011924167653, "grad_norm": 8.449153284391551, "learning_rate": 1.0690133642177229e-06, "loss": -0.4483, "step": 5936 }, { "epoch": 0.5303798316325391, "grad_norm": 10.775843937299536, "learning_rate": 1.0683911965415427e-06, "loss": 0.5864, "step": 5938 }, { "epoch": 0.530558470848313, "grad_norm": 7.09472730181087, "learning_rate": 1.067769002266087e-06, "loss": -0.5734, "step": 5940 }, { "epoch": 0.5307371100640869, "grad_norm": 15.207382437185576, "learning_rate": 1.0671467816333438e-06, "loss": -0.6167, "step": 5942 }, { "epoch": 0.5309157492798606, "grad_norm": 8.612930920454856, "learning_rate": 1.066524534885313e-06, "loss": -0.492, "step": 5944 }, { "epoch": 0.5310943884956345, "grad_norm": 5.0584721495399085, "learning_rate": 1.065902262264004e-06, "loss": 0.6354, "step": 5946 }, { "epoch": 0.5312730277114084, "grad_norm": 7.290578735571034, "learning_rate": 1.0652799640114364e-06, "loss": 0.4115, "step": 5948 }, { "epoch": 0.5314516669271822, "grad_norm": 9.130466443751313, "learning_rate": 1.0646576403696393e-06, "loss": 0.1943, "step": 5950 }, { "epoch": 0.531630306142956, "grad_norm": 15.04206595414823, "learning_rate": 1.0640352915806518e-06, "loss": -0.9369, "step": 5952 }, { "epoch": 0.5318089453587299, "grad_norm": 4.433452210708792, "learning_rate": 1.0634129178865238e-06, "loss": -0.5579, "step": 5954 }, { "epoch": 0.5319875845745037, "grad_norm": 4.672656347966476, "learning_rate": 1.0627905195293135e-06, "loss": -0.6543, "step": 5956 }, { "epoch": 0.5321662237902776, "grad_norm": 8.488760304877967, "learning_rate": 1.0621680967510892e-06, "loss": 0.8304, "step": 5958 }, { "epoch": 0.5323448630060514, "grad_norm": 15.179840291305151, "learning_rate": 1.061545649793929e-06, "loss": -1.2912, "step": 5960 }, { "epoch": 0.5325235022218252, "grad_norm": 2.997909135170587, "learning_rate": 1.06092317889992e-06, "loss": 0.7477, "step": 5962 }, { "epoch": 0.5327021414375991, "grad_norm": 7.083030095808909, "learning_rate": 1.060300684311159e-06, "loss": -1.0971, "step": 5964 }, { "epoch": 0.532880780653373, "grad_norm": 9.916586311933722, "learning_rate": 1.059678166269752e-06, "loss": -0.0519, "step": 5966 }, { "epoch": 0.5330594198691467, "grad_norm": 7.345084860397823, "learning_rate": 1.0590556250178133e-06, "loss": -0.4436, "step": 5968 }, { "epoch": 0.5332380590849206, "grad_norm": 15.083479457308997, "learning_rate": 1.0584330607974673e-06, "loss": -1.1464, "step": 5970 }, { "epoch": 0.5334166983006945, "grad_norm": 4.597712047306387, "learning_rate": 1.0578104738508468e-06, "loss": -0.8785, "step": 5972 }, { "epoch": 0.5335953375164683, "grad_norm": 11.382787030256337, "learning_rate": 1.0571878644200932e-06, "loss": -0.7502, "step": 5974 }, { "epoch": 0.5337739767322421, "grad_norm": 16.0813153570621, "learning_rate": 1.0565652327473576e-06, "loss": -0.6547, "step": 5976 }, { "epoch": 0.533952615948016, "grad_norm": 12.675489331191764, "learning_rate": 1.0559425790747986e-06, "loss": -0.5919, "step": 5978 }, { "epoch": 0.5341312551637898, "grad_norm": 14.47633588091646, "learning_rate": 1.055319903644584e-06, "loss": -0.6066, "step": 5980 }, { "epoch": 0.5343098943795637, "grad_norm": 10.466201517577566, "learning_rate": 1.05469720669889e-06, "loss": 0.1448, "step": 5982 }, { "epoch": 0.5344885335953375, "grad_norm": 15.396821206712909, "learning_rate": 1.0540744884799011e-06, "loss": 0.5253, "step": 5984 }, { "epoch": 0.5346671728111113, "grad_norm": 5.908610523144082, "learning_rate": 1.0534517492298101e-06, "loss": -0.8483, "step": 5986 }, { "epoch": 0.5348458120268852, "grad_norm": 5.523895201543186, "learning_rate": 1.0528289891908178e-06, "loss": 0.0607, "step": 5988 }, { "epoch": 0.5350244512426591, "grad_norm": 9.300777148518625, "learning_rate": 1.0522062086051336e-06, "loss": 0.011, "step": 5990 }, { "epoch": 0.5352030904584328, "grad_norm": 18.199528547615422, "learning_rate": 1.0515834077149736e-06, "loss": -1.367, "step": 5992 }, { "epoch": 0.5353817296742067, "grad_norm": 10.814598276804917, "learning_rate": 1.050960586762564e-06, "loss": 0.2869, "step": 5994 }, { "epoch": 0.5355603688899806, "grad_norm": 12.06877518527279, "learning_rate": 1.0503377459901368e-06, "loss": -0.5727, "step": 5996 }, { "epoch": 0.5357390081057544, "grad_norm": 7.44087027474318, "learning_rate": 1.0497148856399325e-06, "loss": -1.2101, "step": 5998 }, { "epoch": 0.5359176473215282, "grad_norm": 5.993808094494919, "learning_rate": 1.0490920059541995e-06, "loss": -0.3713, "step": 6000 }, { "epoch": 0.5360962865373021, "grad_norm": 3.1565998752164957, "learning_rate": 1.0484691071751927e-06, "loss": -0.1136, "step": 6002 }, { "epoch": 0.5362749257530759, "grad_norm": 4.512554880585163, "learning_rate": 1.0478461895451755e-06, "loss": -0.0415, "step": 6004 }, { "epoch": 0.5364535649688498, "grad_norm": 10.00232516913255, "learning_rate": 1.0472232533064187e-06, "loss": -0.7912, "step": 6006 }, { "epoch": 0.5366322041846237, "grad_norm": 6.233564510580472, "learning_rate": 1.0466002987011987e-06, "loss": 0.8452, "step": 6008 }, { "epoch": 0.5368108434003974, "grad_norm": 5.3016449372942445, "learning_rate": 1.0459773259718016e-06, "loss": -0.2217, "step": 6010 }, { "epoch": 0.5369894826161713, "grad_norm": 7.110107920668622, "learning_rate": 1.0453543353605182e-06, "loss": 0.1117, "step": 6012 }, { "epoch": 0.5371681218319452, "grad_norm": 11.450834578703342, "learning_rate": 1.0447313271096475e-06, "loss": -1.5427, "step": 6014 }, { "epoch": 0.537346761047719, "grad_norm": 4.873207936654465, "learning_rate": 1.0441083014614951e-06, "loss": -0.5261, "step": 6016 }, { "epoch": 0.5375254002634928, "grad_norm": 7.878346292307395, "learning_rate": 1.0434852586583737e-06, "loss": -0.6363, "step": 6018 }, { "epoch": 0.5377040394792667, "grad_norm": 7.510376427939912, "learning_rate": 1.0428621989426014e-06, "loss": 0.7952, "step": 6020 }, { "epoch": 0.5378826786950406, "grad_norm": 3.5223904087841444, "learning_rate": 1.0422391225565044e-06, "loss": -0.0018, "step": 6022 }, { "epoch": 0.5380613179108144, "grad_norm": 8.9341091312586, "learning_rate": 1.0416160297424148e-06, "loss": -0.9371, "step": 6024 }, { "epoch": 0.5382399571265882, "grad_norm": 9.845815677979457, "learning_rate": 1.040992920742671e-06, "loss": -0.254, "step": 6026 }, { "epoch": 0.5384185963423621, "grad_norm": 6.350234452372421, "learning_rate": 1.0403697957996178e-06, "loss": 0.8469, "step": 6028 }, { "epoch": 0.5385972355581359, "grad_norm": 5.802408625891183, "learning_rate": 1.039746655155606e-06, "loss": -0.4715, "step": 6030 }, { "epoch": 0.5387758747739098, "grad_norm": 7.568706393254078, "learning_rate": 1.0391234990529929e-06, "loss": -0.5923, "step": 6032 }, { "epoch": 0.5389545139896836, "grad_norm": 5.438998662567956, "learning_rate": 1.0385003277341416e-06, "loss": -2.1175, "step": 6034 }, { "epoch": 0.5391331532054574, "grad_norm": 9.415830681976, "learning_rate": 1.0378771414414202e-06, "loss": -0.6018, "step": 6036 }, { "epoch": 0.5393117924212313, "grad_norm": 5.173771352085644, "learning_rate": 1.0372539404172052e-06, "loss": 0.2073, "step": 6038 }, { "epoch": 0.5394904316370052, "grad_norm": 9.456222858881654, "learning_rate": 1.0366307249038757e-06, "loss": -0.5784, "step": 6040 }, { "epoch": 0.5396690708527789, "grad_norm": 5.5711196806799945, "learning_rate": 1.0360074951438185e-06, "loss": 0.3279, "step": 6042 }, { "epoch": 0.5398477100685528, "grad_norm": 7.051071237111445, "learning_rate": 1.0353842513794252e-06, "loss": 0.4647, "step": 6044 }, { "epoch": 0.5400263492843267, "grad_norm": 1.7740433116128456, "learning_rate": 1.0347609938530931e-06, "loss": 0.9898, "step": 6046 }, { "epoch": 0.5402049885001005, "grad_norm": 6.8348154771143, "learning_rate": 1.0341377228072245e-06, "loss": -0.4608, "step": 6048 }, { "epoch": 0.5403836277158743, "grad_norm": 12.634105458145386, "learning_rate": 1.0335144384842278e-06, "loss": -0.2582, "step": 6050 }, { "epoch": 0.5405622669316482, "grad_norm": 10.159297920059025, "learning_rate": 1.0328911411265148e-06, "loss": -0.3232, "step": 6052 }, { "epoch": 0.540740906147422, "grad_norm": 5.61987206581545, "learning_rate": 1.0322678309765047e-06, "loss": 0.5174, "step": 6054 }, { "epoch": 0.5409195453631959, "grad_norm": 14.341913774760195, "learning_rate": 1.0316445082766199e-06, "loss": -0.5047, "step": 6056 }, { "epoch": 0.5410981845789697, "grad_norm": 7.133028559843769, "learning_rate": 1.0310211732692883e-06, "loss": 0.5568, "step": 6058 }, { "epoch": 0.5412768237947435, "grad_norm": 8.490299299196785, "learning_rate": 1.0303978261969429e-06, "loss": -1.1398, "step": 6060 }, { "epoch": 0.5414554630105174, "grad_norm": 14.405313126227018, "learning_rate": 1.0297744673020203e-06, "loss": 0.9455, "step": 6062 }, { "epoch": 0.5416341022262913, "grad_norm": 6.5152011065648905, "learning_rate": 1.0291510968269632e-06, "loss": -0.9259, "step": 6064 }, { "epoch": 0.541812741442065, "grad_norm": 5.498522473323942, "learning_rate": 1.0285277150142177e-06, "loss": 0.4923, "step": 6066 }, { "epoch": 0.5419913806578389, "grad_norm": 14.93946321107811, "learning_rate": 1.0279043221062348e-06, "loss": 1.1606, "step": 6068 }, { "epoch": 0.5421700198736128, "grad_norm": 21.513130614077518, "learning_rate": 1.0272809183454701e-06, "loss": -1.85, "step": 6070 }, { "epoch": 0.5423486590893866, "grad_norm": 9.660932967030515, "learning_rate": 1.0266575039743822e-06, "loss": -0.2224, "step": 6072 }, { "epoch": 0.5425272983051604, "grad_norm": 8.253757430331351, "learning_rate": 1.0260340792354348e-06, "loss": 0.1958, "step": 6074 }, { "epoch": 0.5427059375209343, "grad_norm": 18.049023309632037, "learning_rate": 1.0254106443710958e-06, "loss": -1.024, "step": 6076 }, { "epoch": 0.5428845767367081, "grad_norm": 14.713538152193532, "learning_rate": 1.0247871996238365e-06, "loss": -0.3426, "step": 6078 }, { "epoch": 0.543063215952482, "grad_norm": 11.4813465926292, "learning_rate": 1.0241637452361322e-06, "loss": 0.0104, "step": 6080 }, { "epoch": 0.5432418551682558, "grad_norm": 4.081514433491664, "learning_rate": 1.0235402814504624e-06, "loss": 0.7703, "step": 6082 }, { "epoch": 0.5434204943840296, "grad_norm": 25.11536978259341, "learning_rate": 1.0229168085093092e-06, "loss": -1.2391, "step": 6084 }, { "epoch": 0.5435991335998035, "grad_norm": 4.035642374895446, "learning_rate": 1.0222933266551593e-06, "loss": 1.1135, "step": 6086 }, { "epoch": 0.5437777728155774, "grad_norm": 3.4408565432935267, "learning_rate": 1.0216698361305027e-06, "loss": -0.6639, "step": 6088 }, { "epoch": 0.5439564120313511, "grad_norm": 4.4532256533306755, "learning_rate": 1.0210463371778317e-06, "loss": 0.6723, "step": 6090 }, { "epoch": 0.544135051247125, "grad_norm": 3.813154820974452, "learning_rate": 1.0204228300396437e-06, "loss": -0.1363, "step": 6092 }, { "epoch": 0.5443136904628989, "grad_norm": 9.561260068202145, "learning_rate": 1.0197993149584381e-06, "loss": -0.0347, "step": 6094 }, { "epoch": 0.5444923296786727, "grad_norm": 6.422646580586627, "learning_rate": 1.0191757921767169e-06, "loss": -0.513, "step": 6096 }, { "epoch": 0.5446709688944465, "grad_norm": 2.7518587766329836, "learning_rate": 1.0185522619369866e-06, "loss": 0.5292, "step": 6098 }, { "epoch": 0.5448496081102204, "grad_norm": 7.316621059313236, "learning_rate": 1.0179287244817553e-06, "loss": -0.1022, "step": 6100 }, { "epoch": 0.5450282473259942, "grad_norm": 8.152899627670342, "learning_rate": 1.0173051800535345e-06, "loss": -0.8202, "step": 6102 }, { "epoch": 0.5452068865417681, "grad_norm": 4.9430194377636605, "learning_rate": 1.0166816288948388e-06, "loss": 0.5409, "step": 6104 }, { "epoch": 0.545385525757542, "grad_norm": 6.925151109940332, "learning_rate": 1.0160580712481838e-06, "loss": -0.1556, "step": 6106 }, { "epoch": 0.5455641649733157, "grad_norm": 9.003498245229766, "learning_rate": 1.0154345073560903e-06, "loss": -0.1766, "step": 6108 }, { "epoch": 0.5457428041890896, "grad_norm": 11.050055621943578, "learning_rate": 1.0148109374610788e-06, "loss": -0.4965, "step": 6110 }, { "epoch": 0.5459214434048635, "grad_norm": 7.0510606874162525, "learning_rate": 1.0141873618056736e-06, "loss": -0.242, "step": 6112 }, { "epoch": 0.5461000826206373, "grad_norm": 2.282227920577527, "learning_rate": 1.0135637806324012e-06, "loss": 0.0301, "step": 6114 }, { "epoch": 0.5462787218364111, "grad_norm": 6.118981654893233, "learning_rate": 1.0129401941837896e-06, "loss": -0.1095, "step": 6116 }, { "epoch": 0.546457361052185, "grad_norm": 10.120560414474454, "learning_rate": 1.0123166027023698e-06, "loss": 0.8145, "step": 6118 }, { "epoch": 0.5466360002679588, "grad_norm": 5.430131852883249, "learning_rate": 1.0116930064306735e-06, "loss": 0.222, "step": 6120 }, { "epoch": 0.5468146394837327, "grad_norm": 8.924447725877164, "learning_rate": 1.0110694056112357e-06, "loss": 0.6644, "step": 6122 }, { "epoch": 0.5469932786995065, "grad_norm": 16.778122708197365, "learning_rate": 1.010445800486592e-06, "loss": -1.9404, "step": 6124 }, { "epoch": 0.5471719179152803, "grad_norm": 7.668476167622772, "learning_rate": 1.0098221912992804e-06, "loss": -0.0635, "step": 6126 }, { "epoch": 0.5473505571310542, "grad_norm": 7.94420214671069, "learning_rate": 1.0091985782918398e-06, "loss": -1.006, "step": 6128 }, { "epoch": 0.5475291963468281, "grad_norm": 32.01356600260466, "learning_rate": 1.0085749617068112e-06, "loss": 0.1172, "step": 6130 }, { "epoch": 0.5477078355626018, "grad_norm": 6.981287058131696, "learning_rate": 1.007951341786737e-06, "loss": 0.161, "step": 6132 }, { "epoch": 0.5478864747783757, "grad_norm": 4.63784696612568, "learning_rate": 1.0073277187741604e-06, "loss": -0.2938, "step": 6134 }, { "epoch": 0.5480651139941496, "grad_norm": 8.521890613526217, "learning_rate": 1.0067040929116263e-06, "loss": 1.218, "step": 6136 }, { "epoch": 0.5482437532099234, "grad_norm": 4.374649905774827, "learning_rate": 1.0060804644416803e-06, "loss": 0.0888, "step": 6138 }, { "epoch": 0.5484223924256972, "grad_norm": 8.035979425809355, "learning_rate": 1.0054568336068688e-06, "loss": -1.3811, "step": 6140 }, { "epoch": 0.5486010316414711, "grad_norm": 8.405084578833442, "learning_rate": 1.0048332006497404e-06, "loss": -1.3307, "step": 6142 }, { "epoch": 0.5487796708572449, "grad_norm": 12.266831648449662, "learning_rate": 1.004209565812843e-06, "loss": -0.2697, "step": 6144 }, { "epoch": 0.5489583100730188, "grad_norm": 8.721778480381158, "learning_rate": 1.003585929338726e-06, "loss": 1.5048, "step": 6146 }, { "epoch": 0.5491369492887926, "grad_norm": 4.865538585681935, "learning_rate": 1.0029622914699399e-06, "loss": -0.3014, "step": 6148 }, { "epoch": 0.5493155885045664, "grad_norm": 12.868913415479847, "learning_rate": 1.002338652449034e-06, "loss": -0.1346, "step": 6150 }, { "epoch": 0.5494942277203403, "grad_norm": 14.826723890765884, "learning_rate": 1.00171501251856e-06, "loss": -0.7663, "step": 6152 }, { "epoch": 0.5496728669361142, "grad_norm": 5.119464210549829, "learning_rate": 1.001091371921069e-06, "loss": -0.7573, "step": 6154 }, { "epoch": 0.549851506151888, "grad_norm": 10.366476579336004, "learning_rate": 1.0004677308991128e-06, "loss": -0.3286, "step": 6156 }, { "epoch": 0.5500301453676618, "grad_norm": 5.368954119282538, "learning_rate": 9.998440896952426e-07, "loss": 0.0968, "step": 6158 }, { "epoch": 0.5502087845834357, "grad_norm": 11.812588484498288, "learning_rate": 9.992204485520105e-07, "loss": -0.1098, "step": 6160 }, { "epoch": 0.5503874237992096, "grad_norm": 5.740181210412189, "learning_rate": 9.985968077119674e-07, "loss": 0.5945, "step": 6162 }, { "epoch": 0.5505660630149833, "grad_norm": 7.579593980126142, "learning_rate": 9.979731674176666e-07, "loss": 0.9206, "step": 6164 }, { "epoch": 0.5507447022307572, "grad_norm": 4.3005053910735125, "learning_rate": 9.973495279116577e-07, "loss": -0.5645, "step": 6166 }, { "epoch": 0.5509233414465311, "grad_norm": 3.3919039656521868, "learning_rate": 9.967258894364925e-07, "loss": 0.301, "step": 6168 }, { "epoch": 0.5511019806623049, "grad_norm": 4.962872561827616, "learning_rate": 9.961022522347226e-07, "loss": -0.5229, "step": 6170 }, { "epoch": 0.5512806198780787, "grad_norm": 3.4904315126457095, "learning_rate": 9.954786165488966e-07, "loss": -1.209, "step": 6172 }, { "epoch": 0.5514592590938526, "grad_norm": 6.686897892451497, "learning_rate": 9.948549826215648e-07, "loss": 0.2273, "step": 6174 }, { "epoch": 0.5516378983096264, "grad_norm": 10.505450332771838, "learning_rate": 9.942313506952766e-07, "loss": -0.7237, "step": 6176 }, { "epoch": 0.5518165375254003, "grad_norm": 8.397345495866722, "learning_rate": 9.936077210125796e-07, "loss": 0.0252, "step": 6178 }, { "epoch": 0.5519951767411742, "grad_norm": 5.248537859080672, "learning_rate": 9.92984093816021e-07, "loss": -0.0693, "step": 6180 }, { "epoch": 0.5521738159569479, "grad_norm": 4.1452523340373135, "learning_rate": 9.923604693481474e-07, "loss": 0.1256, "step": 6182 }, { "epoch": 0.5523524551727218, "grad_norm": 13.426262561382728, "learning_rate": 9.917368478515043e-07, "loss": -0.1649, "step": 6184 }, { "epoch": 0.5525310943884957, "grad_norm": 5.617540117637488, "learning_rate": 9.91113229568635e-07, "loss": -0.5238, "step": 6186 }, { "epoch": 0.5527097336042694, "grad_norm": 3.792801669651349, "learning_rate": 9.904896147420827e-07, "loss": 0.657, "step": 6188 }, { "epoch": 0.5528883728200433, "grad_norm": 3.9344746911081665, "learning_rate": 9.898660036143893e-07, "loss": 0.4969, "step": 6190 }, { "epoch": 0.5530670120358172, "grad_norm": 5.586144418151863, "learning_rate": 9.89242396428094e-07, "loss": 0.5934, "step": 6192 }, { "epoch": 0.553245651251591, "grad_norm": 7.8252169447980355, "learning_rate": 9.88618793425736e-07, "loss": 1.1123, "step": 6194 }, { "epoch": 0.5534242904673649, "grad_norm": 8.63961703334761, "learning_rate": 9.879951948498521e-07, "loss": -1.1401, "step": 6196 }, { "epoch": 0.5536029296831387, "grad_norm": 16.884220571198274, "learning_rate": 9.873716009429776e-07, "loss": -1.7701, "step": 6198 }, { "epoch": 0.5537815688989125, "grad_norm": 5.08196144144022, "learning_rate": 9.867480119476454e-07, "loss": -0.8116, "step": 6200 }, { "epoch": 0.5539602081146864, "grad_norm": 13.178294652616168, "learning_rate": 9.86124428106387e-07, "loss": 0.1614, "step": 6202 }, { "epoch": 0.5541388473304603, "grad_norm": 10.541458027299099, "learning_rate": 9.855008496617326e-07, "loss": -0.2833, "step": 6204 }, { "epoch": 0.554317486546234, "grad_norm": 9.32942781024654, "learning_rate": 9.848772768562087e-07, "loss": -0.8443, "step": 6206 }, { "epoch": 0.5544961257620079, "grad_norm": 9.197023192082487, "learning_rate": 9.842537099323405e-07, "loss": 0.0444, "step": 6208 }, { "epoch": 0.5546747649777818, "grad_norm": 13.660780654874229, "learning_rate": 9.836301491326513e-07, "loss": -1.0775, "step": 6210 }, { "epoch": 0.5548534041935556, "grad_norm": 14.504518002349087, "learning_rate": 9.830065946996612e-07, "loss": 0.464, "step": 6212 }, { "epoch": 0.5550320434093294, "grad_norm": 4.910601004743024, "learning_rate": 9.823830468758884e-07, "loss": 1.3619, "step": 6214 }, { "epoch": 0.5552106826251033, "grad_norm": 3.7656645080266666, "learning_rate": 9.817595059038481e-07, "loss": -0.2202, "step": 6216 }, { "epoch": 0.5553893218408771, "grad_norm": 6.424551375939029, "learning_rate": 9.81135972026054e-07, "loss": -0.3808, "step": 6218 }, { "epoch": 0.555567961056651, "grad_norm": 2.3990933891395794, "learning_rate": 9.805124454850148e-07, "loss": 0.3465, "step": 6220 }, { "epoch": 0.5557466002724248, "grad_norm": 4.281613076957268, "learning_rate": 9.79888926523238e-07, "loss": -0.7713, "step": 6222 }, { "epoch": 0.5559252394881986, "grad_norm": 9.36878578225009, "learning_rate": 9.792654153832287e-07, "loss": 0.1294, "step": 6224 }, { "epoch": 0.5561038787039725, "grad_norm": 4.5561184083122255, "learning_rate": 9.78641912307487e-07, "loss": -0.2315, "step": 6226 }, { "epoch": 0.5562825179197464, "grad_norm": 7.234060721440232, "learning_rate": 9.780184175385114e-07, "loss": 0.3771, "step": 6228 }, { "epoch": 0.5564611571355201, "grad_norm": 6.50272341674754, "learning_rate": 9.773949313187968e-07, "loss": 0.1049, "step": 6230 }, { "epoch": 0.556639796351294, "grad_norm": 10.680975098975814, "learning_rate": 9.76771453890834e-07, "loss": -0.5417, "step": 6232 }, { "epoch": 0.5568184355670679, "grad_norm": 4.970689694849244, "learning_rate": 9.761479854971122e-07, "loss": 0.381, "step": 6234 }, { "epoch": 0.5569970747828417, "grad_norm": 15.552107807335203, "learning_rate": 9.755245263801148e-07, "loss": 0.0723, "step": 6236 }, { "epoch": 0.5571757139986155, "grad_norm": 3.9796620937655245, "learning_rate": 9.749010767823235e-07, "loss": -1.2315, "step": 6238 }, { "epoch": 0.5573543532143894, "grad_norm": 6.113426328649923, "learning_rate": 9.742776369462153e-07, "loss": 0.3411, "step": 6240 }, { "epoch": 0.5575329924301632, "grad_norm": 10.150284831743754, "learning_rate": 9.736542071142634e-07, "loss": 0.2732, "step": 6242 }, { "epoch": 0.5577116316459371, "grad_norm": 14.888725039464637, "learning_rate": 9.730307875289378e-07, "loss": 0.842, "step": 6244 }, { "epoch": 0.557890270861711, "grad_norm": 11.171356658648339, "learning_rate": 9.724073784327035e-07, "loss": 0.2531, "step": 6246 }, { "epoch": 0.5580689100774847, "grad_norm": 18.727689362998124, "learning_rate": 9.717839800680221e-07, "loss": -1.0081, "step": 6248 }, { "epoch": 0.5582475492932586, "grad_norm": 5.385587157078535, "learning_rate": 9.711605926773514e-07, "loss": 0.4679, "step": 6250 }, { "epoch": 0.5584261885090325, "grad_norm": 7.422490208877268, "learning_rate": 9.705372165031443e-07, "loss": -0.1184, "step": 6252 }, { "epoch": 0.5586048277248062, "grad_norm": 9.591949393322636, "learning_rate": 9.699138517878488e-07, "loss": -0.7586, "step": 6254 }, { "epoch": 0.5587834669405801, "grad_norm": 5.434087800766604, "learning_rate": 9.692904987739097e-07, "loss": 0.6654, "step": 6256 }, { "epoch": 0.558962106156354, "grad_norm": 15.637729677850908, "learning_rate": 9.68667157703767e-07, "loss": -1.8745, "step": 6258 }, { "epoch": 0.5591407453721278, "grad_norm": 12.683089668917614, "learning_rate": 9.680438288198549e-07, "loss": -0.342, "step": 6260 }, { "epoch": 0.5593193845879016, "grad_norm": 12.787180226672886, "learning_rate": 9.674205123646042e-07, "loss": -1.2193, "step": 6262 }, { "epoch": 0.5594980238036755, "grad_norm": 11.278378107872632, "learning_rate": 9.667972085804404e-07, "loss": -0.7298, "step": 6264 }, { "epoch": 0.5596766630194493, "grad_norm": 5.812300811964206, "learning_rate": 9.661739177097834e-07, "loss": 0.6614, "step": 6266 }, { "epoch": 0.5598553022352232, "grad_norm": 16.86958734809248, "learning_rate": 9.655506399950495e-07, "loss": 0.4407, "step": 6268 }, { "epoch": 0.560033941450997, "grad_norm": 4.605723237309889, "learning_rate": 9.649273756786485e-07, "loss": -0.0823, "step": 6270 }, { "epoch": 0.5602125806667708, "grad_norm": 7.903068052956282, "learning_rate": 9.64304125002986e-07, "loss": 0.793, "step": 6272 }, { "epoch": 0.5603912198825447, "grad_norm": 8.555592221708569, "learning_rate": 9.636808882104615e-07, "loss": -0.2598, "step": 6274 }, { "epoch": 0.5605698590983186, "grad_norm": 12.512546803694619, "learning_rate": 9.630576655434693e-07, "loss": -0.3819, "step": 6276 }, { "epoch": 0.5607484983140923, "grad_norm": 8.804348686083078, "learning_rate": 9.624344572443993e-07, "loss": -0.5494, "step": 6278 }, { "epoch": 0.5609271375298662, "grad_norm": 23.831927206890576, "learning_rate": 9.618112635556339e-07, "loss": -0.4328, "step": 6280 }, { "epoch": 0.5611057767456401, "grad_norm": 5.646320373623576, "learning_rate": 9.611880847195509e-07, "loss": 0.2706, "step": 6282 }, { "epoch": 0.5612844159614139, "grad_norm": 8.016654322159344, "learning_rate": 9.605649209785231e-07, "loss": 0.3634, "step": 6284 }, { "epoch": 0.5614630551771878, "grad_norm": 4.955208998813656, "learning_rate": 9.599417725749154e-07, "loss": -0.2831, "step": 6286 }, { "epoch": 0.5616416943929616, "grad_norm": 11.746255480886903, "learning_rate": 9.593186397510887e-07, "loss": 0.1926, "step": 6288 }, { "epoch": 0.5618203336087355, "grad_norm": 4.02686562276101, "learning_rate": 9.586955227493966e-07, "loss": 0.5869, "step": 6290 }, { "epoch": 0.5619989728245093, "grad_norm": 9.582691292748054, "learning_rate": 9.580724218121873e-07, "loss": -0.9434, "step": 6292 }, { "epoch": 0.5621776120402832, "grad_norm": 5.973444026591222, "learning_rate": 9.574493371818021e-07, "loss": -0.0558, "step": 6294 }, { "epoch": 0.562356251256057, "grad_norm": 3.9849033379408256, "learning_rate": 9.568262691005765e-07, "loss": -0.443, "step": 6296 }, { "epoch": 0.5625348904718308, "grad_norm": 8.735563739824642, "learning_rate": 9.562032178108394e-07, "loss": -0.3444, "step": 6298 }, { "epoch": 0.5627135296876047, "grad_norm": 12.7701585766678, "learning_rate": 9.555801835549127e-07, "loss": -1.1993, "step": 6300 }, { "epoch": 0.5628921689033786, "grad_norm": 4.2073730783251015, "learning_rate": 9.549571665751125e-07, "loss": 0.2076, "step": 6302 }, { "epoch": 0.5630708081191523, "grad_norm": 8.792804325011971, "learning_rate": 9.543341671137475e-07, "loss": 0.2654, "step": 6304 }, { "epoch": 0.5632494473349262, "grad_norm": 2.5503510700979017, "learning_rate": 9.537111854131207e-07, "loss": 0.2732, "step": 6306 }, { "epoch": 0.5634280865507001, "grad_norm": 7.250001315412731, "learning_rate": 9.53088221715526e-07, "loss": 0.406, "step": 6308 }, { "epoch": 0.5636067257664739, "grad_norm": 8.633507554740072, "learning_rate": 9.524652762632524e-07, "loss": 0.5064, "step": 6310 }, { "epoch": 0.5637853649822477, "grad_norm": 4.208256535096874, "learning_rate": 9.518423492985816e-07, "loss": 0.3399, "step": 6312 }, { "epoch": 0.5639640041980216, "grad_norm": 6.222941610590474, "learning_rate": 9.51219441063786e-07, "loss": 0.6213, "step": 6314 }, { "epoch": 0.5641426434137954, "grad_norm": 7.975294828794806, "learning_rate": 9.505965518011338e-07, "loss": -0.6703, "step": 6316 }, { "epoch": 0.5643212826295693, "grad_norm": 11.148221928758485, "learning_rate": 9.499736817528836e-07, "loss": 0.0729, "step": 6318 }, { "epoch": 0.5644999218453431, "grad_norm": 9.043140015926783, "learning_rate": 9.493508311612874e-07, "loss": 0.7969, "step": 6320 }, { "epoch": 0.5646785610611169, "grad_norm": 10.7847929951729, "learning_rate": 9.487280002685892e-07, "loss": -0.0447, "step": 6322 }, { "epoch": 0.5648572002768908, "grad_norm": 9.333597542792399, "learning_rate": 9.481051893170259e-07, "loss": 0.0787, "step": 6324 }, { "epoch": 0.5650358394926647, "grad_norm": 9.981168181216002, "learning_rate": 9.474823985488263e-07, "loss": -0.1805, "step": 6326 }, { "epoch": 0.5652144787084384, "grad_norm": 7.63335198923466, "learning_rate": 9.468596282062113e-07, "loss": 0.169, "step": 6328 }, { "epoch": 0.5653931179242123, "grad_norm": 11.83742964501103, "learning_rate": 9.46236878531394e-07, "loss": -1.6587, "step": 6330 }, { "epoch": 0.5655717571399862, "grad_norm": 13.972951099419113, "learning_rate": 9.456141497665796e-07, "loss": -0.0543, "step": 6332 }, { "epoch": 0.56575039635576, "grad_norm": 6.56623051287368, "learning_rate": 9.449914421539645e-07, "loss": -0.2918, "step": 6334 }, { "epoch": 0.5659290355715338, "grad_norm": 7.157683782746278, "learning_rate": 9.443687559357374e-07, "loss": -0.0224, "step": 6336 }, { "epoch": 0.5661076747873077, "grad_norm": 9.399724071592225, "learning_rate": 9.437460913540795e-07, "loss": -0.3888, "step": 6338 }, { "epoch": 0.5662863140030815, "grad_norm": 4.7769512723338465, "learning_rate": 9.431234486511614e-07, "loss": -0.4302, "step": 6340 }, { "epoch": 0.5664649532188554, "grad_norm": 2.3221115176670946, "learning_rate": 9.425008280691474e-07, "loss": 0.4022, "step": 6342 }, { "epoch": 0.5666435924346293, "grad_norm": 15.193256307699324, "learning_rate": 9.418782298501921e-07, "loss": 0.165, "step": 6344 }, { "epoch": 0.566822231650403, "grad_norm": 9.389301835207808, "learning_rate": 9.412556542364418e-07, "loss": -0.0228, "step": 6346 }, { "epoch": 0.5670008708661769, "grad_norm": 10.638387705322174, "learning_rate": 9.406331014700335e-07, "loss": -0.4349, "step": 6348 }, { "epoch": 0.5671795100819508, "grad_norm": 7.382806041250482, "learning_rate": 9.400105717930955e-07, "loss": 0.0978, "step": 6350 }, { "epoch": 0.5673581492977245, "grad_norm": 5.84053808058819, "learning_rate": 9.393880654477478e-07, "loss": -0.7438, "step": 6352 }, { "epoch": 0.5675367885134984, "grad_norm": 7.303945379247256, "learning_rate": 9.387655826761003e-07, "loss": -0.4792, "step": 6354 }, { "epoch": 0.5677154277292723, "grad_norm": 24.646449008538102, "learning_rate": 9.381431237202541e-07, "loss": -0.777, "step": 6356 }, { "epoch": 0.5678940669450461, "grad_norm": 8.512306728773732, "learning_rate": 9.375206888223018e-07, "loss": -0.1446, "step": 6358 }, { "epoch": 0.56807270616082, "grad_norm": 14.267914871325475, "learning_rate": 9.368982782243253e-07, "loss": -0.0763, "step": 6360 }, { "epoch": 0.5682513453765938, "grad_norm": 8.408542249621624, "learning_rate": 9.362758921683979e-07, "loss": -0.1733, "step": 6362 }, { "epoch": 0.5684299845923676, "grad_norm": 9.059822055172315, "learning_rate": 9.356535308965833e-07, "loss": -0.5415, "step": 6364 }, { "epoch": 0.5686086238081415, "grad_norm": 8.650907665105192, "learning_rate": 9.350311946509359e-07, "loss": 0.1804, "step": 6366 }, { "epoch": 0.5687872630239154, "grad_norm": 4.537227502632476, "learning_rate": 9.344088836734989e-07, "loss": 1.0261, "step": 6368 }, { "epoch": 0.5689659022396891, "grad_norm": 14.945895465303028, "learning_rate": 9.337865982063074e-07, "loss": -1.0652, "step": 6370 }, { "epoch": 0.569144541455463, "grad_norm": 11.443468548133009, "learning_rate": 9.331643384913863e-07, "loss": -0.7363, "step": 6372 }, { "epoch": 0.5693231806712369, "grad_norm": 17.633329979668474, "learning_rate": 9.325421047707487e-07, "loss": 0.0265, "step": 6374 }, { "epoch": 0.5695018198870107, "grad_norm": 7.38536687994686, "learning_rate": 9.319198972863998e-07, "loss": -0.0867, "step": 6376 }, { "epoch": 0.5696804591027845, "grad_norm": 14.958534402301709, "learning_rate": 9.312977162803339e-07, "loss": 0.2595, "step": 6378 }, { "epoch": 0.5698590983185584, "grad_norm": 9.201577126039064, "learning_rate": 9.306755619945345e-07, "loss": 0.1351, "step": 6380 }, { "epoch": 0.5700377375343322, "grad_norm": 2.2628891836721845, "learning_rate": 9.300534346709751e-07, "loss": 0.9794, "step": 6382 }, { "epoch": 0.5702163767501061, "grad_norm": 8.440089753744893, "learning_rate": 9.294313345516186e-07, "loss": -1.0983, "step": 6384 }, { "epoch": 0.5703950159658799, "grad_norm": 6.53420063152842, "learning_rate": 9.288092618784178e-07, "loss": -0.7514, "step": 6386 }, { "epoch": 0.5705736551816537, "grad_norm": 3.8235474914019765, "learning_rate": 9.281872168933137e-07, "loss": -0.4367, "step": 6388 }, { "epoch": 0.5707522943974276, "grad_norm": 3.6815675763641686, "learning_rate": 9.275651998382377e-07, "loss": 0.8561, "step": 6390 }, { "epoch": 0.5709309336132015, "grad_norm": 18.45687623942844, "learning_rate": 9.269432109551099e-07, "loss": -1.8692, "step": 6392 }, { "epoch": 0.5711095728289752, "grad_norm": 10.977944544333289, "learning_rate": 9.263212504858392e-07, "loss": -1.0698, "step": 6394 }, { "epoch": 0.5712882120447491, "grad_norm": 6.427924580803971, "learning_rate": 9.256993186723235e-07, "loss": -1.3166, "step": 6396 }, { "epoch": 0.571466851260523, "grad_norm": 17.334746572151065, "learning_rate": 9.250774157564501e-07, "loss": -1.2893, "step": 6398 }, { "epoch": 0.5716454904762968, "grad_norm": 3.4096347895175203, "learning_rate": 9.244555419800949e-07, "loss": -0.5893, "step": 6400 }, { "epoch": 0.5718241296920706, "grad_norm": 12.86405049609242, "learning_rate": 9.238336975851218e-07, "loss": 0.5576, "step": 6402 }, { "epoch": 0.5720027689078445, "grad_norm": 9.168722673517134, "learning_rate": 9.232118828133836e-07, "loss": -0.9878, "step": 6404 }, { "epoch": 0.5721814081236183, "grad_norm": 8.894142675141953, "learning_rate": 9.225900979067222e-07, "loss": 0.2876, "step": 6406 }, { "epoch": 0.5723600473393922, "grad_norm": 5.952262435459809, "learning_rate": 9.219683431069669e-07, "loss": 0.2887, "step": 6408 }, { "epoch": 0.572538686555166, "grad_norm": 12.675603090042268, "learning_rate": 9.213466186559361e-07, "loss": 0.0992, "step": 6410 }, { "epoch": 0.5727173257709398, "grad_norm": 5.701004261089526, "learning_rate": 9.207249247954362e-07, "loss": 1.0637, "step": 6412 }, { "epoch": 0.5728959649867137, "grad_norm": 11.980105120622769, "learning_rate": 9.201032617672609e-07, "loss": -0.9042, "step": 6414 }, { "epoch": 0.5730746042024876, "grad_norm": 7.642585612058689, "learning_rate": 9.194816298131932e-07, "loss": -0.0611, "step": 6416 }, { "epoch": 0.5732532434182614, "grad_norm": 2.215946521754117, "learning_rate": 9.188600291750029e-07, "loss": 0.161, "step": 6418 }, { "epoch": 0.5734318826340352, "grad_norm": 9.941684441711788, "learning_rate": 9.182384600944493e-07, "loss": 0.1391, "step": 6420 }, { "epoch": 0.5736105218498091, "grad_norm": 10.138293922653327, "learning_rate": 9.176169228132768e-07, "loss": -0.681, "step": 6422 }, { "epoch": 0.573789161065583, "grad_norm": 6.605735841085175, "learning_rate": 9.16995417573219e-07, "loss": 0.3211, "step": 6424 }, { "epoch": 0.5739678002813567, "grad_norm": 7.18805592916273, "learning_rate": 9.163739446159984e-07, "loss": -0.5904, "step": 6426 }, { "epoch": 0.5741464394971306, "grad_norm": 17.183955600446744, "learning_rate": 9.157525041833216e-07, "loss": -0.3003, "step": 6428 }, { "epoch": 0.5743250787129045, "grad_norm": 13.452773954660627, "learning_rate": 9.151310965168855e-07, "loss": -0.5871, "step": 6430 }, { "epoch": 0.5745037179286783, "grad_norm": 7.018651908263794, "learning_rate": 9.145097218583728e-07, "loss": -0.9103, "step": 6432 }, { "epoch": 0.5746823571444521, "grad_norm": 4.136288311603061, "learning_rate": 9.138883804494541e-07, "loss": -0.3888, "step": 6434 }, { "epoch": 0.574860996360226, "grad_norm": 6.3300169577228305, "learning_rate": 9.132670725317862e-07, "loss": 1.0666, "step": 6436 }, { "epoch": 0.5750396355759998, "grad_norm": 5.25854595901066, "learning_rate": 9.126457983470136e-07, "loss": -0.0075, "step": 6438 }, { "epoch": 0.5752182747917737, "grad_norm": 10.169529704544901, "learning_rate": 9.120245581367677e-07, "loss": -0.4713, "step": 6440 }, { "epoch": 0.5753969140075476, "grad_norm": 8.930342362388615, "learning_rate": 9.114033521426659e-07, "loss": 0.1286, "step": 6442 }, { "epoch": 0.5755755532233213, "grad_norm": 9.224013245281096, "learning_rate": 9.107821806063131e-07, "loss": -0.2689, "step": 6444 }, { "epoch": 0.5757541924390952, "grad_norm": 10.99115258654269, "learning_rate": 9.101610437693009e-07, "loss": -0.0271, "step": 6446 }, { "epoch": 0.5759328316548691, "grad_norm": 7.161983408781135, "learning_rate": 9.095399418732062e-07, "loss": 0.0175, "step": 6448 }, { "epoch": 0.5761114708706428, "grad_norm": 11.65899347740833, "learning_rate": 9.089188751595936e-07, "loss": 0.412, "step": 6450 }, { "epoch": 0.5762901100864167, "grad_norm": 11.247197459082585, "learning_rate": 9.082978438700138e-07, "loss": -0.4433, "step": 6452 }, { "epoch": 0.5764687493021906, "grad_norm": 4.750642632631256, "learning_rate": 9.076768482460034e-07, "loss": -0.1358, "step": 6454 }, { "epoch": 0.5766473885179644, "grad_norm": 6.356240889731211, "learning_rate": 9.070558885290847e-07, "loss": 0.639, "step": 6456 }, { "epoch": 0.5768260277337383, "grad_norm": 10.910972643886728, "learning_rate": 9.06434964960767e-07, "loss": -0.3793, "step": 6458 }, { "epoch": 0.5770046669495121, "grad_norm": 6.491006645108731, "learning_rate": 9.058140777825453e-07, "loss": 0.1314, "step": 6460 }, { "epoch": 0.5771833061652859, "grad_norm": 6.156750963615348, "learning_rate": 9.051932272358996e-07, "loss": -0.1564, "step": 6462 }, { "epoch": 0.5773619453810598, "grad_norm": 4.675888939862632, "learning_rate": 9.045724135622965e-07, "loss": -0.535, "step": 6464 }, { "epoch": 0.5775405845968337, "grad_norm": 10.32015762386155, "learning_rate": 9.039516370031885e-07, "loss": -0.1347, "step": 6466 }, { "epoch": 0.5777192238126074, "grad_norm": 11.860583096816294, "learning_rate": 9.033308978000127e-07, "loss": -0.0244, "step": 6468 }, { "epoch": 0.5778978630283813, "grad_norm": 4.333391311453272, "learning_rate": 9.027101961941923e-07, "loss": 0.2527, "step": 6470 }, { "epoch": 0.5780765022441552, "grad_norm": 10.197624558241952, "learning_rate": 9.020895324271358e-07, "loss": 0.2011, "step": 6472 }, { "epoch": 0.578255141459929, "grad_norm": 10.1951097022455, "learning_rate": 9.014689067402372e-07, "loss": -0.3716, "step": 6474 }, { "epoch": 0.5784337806757028, "grad_norm": 9.86887797586136, "learning_rate": 9.008483193748748e-07, "loss": -1.1739, "step": 6476 }, { "epoch": 0.5786124198914767, "grad_norm": 5.147161438504084, "learning_rate": 9.002277705724129e-07, "loss": -0.0781, "step": 6478 }, { "epoch": 0.5787910591072505, "grad_norm": 8.036416614660698, "learning_rate": 8.996072605742013e-07, "loss": -0.7882, "step": 6480 }, { "epoch": 0.5789696983230244, "grad_norm": 11.222459100252312, "learning_rate": 8.989867896215728e-07, "loss": -1.0331, "step": 6482 }, { "epoch": 0.5791483375387982, "grad_norm": 6.850183258946606, "learning_rate": 8.983663579558464e-07, "loss": 0.5471, "step": 6484 }, { "epoch": 0.579326976754572, "grad_norm": 13.3773043064667, "learning_rate": 8.977459658183264e-07, "loss": -1.0991, "step": 6486 }, { "epoch": 0.5795056159703459, "grad_norm": 8.840879372058476, "learning_rate": 8.971256134502997e-07, "loss": -1.0663, "step": 6488 }, { "epoch": 0.5796842551861198, "grad_norm": 10.263523763500785, "learning_rate": 8.965053010930397e-07, "loss": -0.2316, "step": 6490 }, { "epoch": 0.5798628944018935, "grad_norm": 7.650248265445326, "learning_rate": 8.958850289878031e-07, "loss": -1.4023, "step": 6492 }, { "epoch": 0.5800415336176674, "grad_norm": 3.2708867224610323, "learning_rate": 8.952647973758319e-07, "loss": 0.477, "step": 6494 }, { "epoch": 0.5802201728334413, "grad_norm": 4.760882509951235, "learning_rate": 8.946446064983512e-07, "loss": 0.0127, "step": 6496 }, { "epoch": 0.5803988120492151, "grad_norm": 8.135192112544596, "learning_rate": 8.940244565965707e-07, "loss": -0.0584, "step": 6498 }, { "epoch": 0.5805774512649889, "grad_norm": 17.250077841762774, "learning_rate": 8.93404347911685e-07, "loss": -0.8794, "step": 6500 }, { "epoch": 0.5807560904807628, "grad_norm": 5.905194223489916, "learning_rate": 8.927842806848713e-07, "loss": 0.2934, "step": 6502 }, { "epoch": 0.5809347296965366, "grad_norm": 15.466091351387174, "learning_rate": 8.921642551572915e-07, "loss": -0.9527, "step": 6504 }, { "epoch": 0.5811133689123105, "grad_norm": 17.724478412911495, "learning_rate": 8.915442715700909e-07, "loss": -0.069, "step": 6506 }, { "epoch": 0.5812920081280843, "grad_norm": 4.28897022363453, "learning_rate": 8.909243301643997e-07, "loss": -0.3054, "step": 6508 }, { "epoch": 0.5814706473438581, "grad_norm": 5.0721375835181854, "learning_rate": 8.903044311813293e-07, "loss": -0.6099, "step": 6510 }, { "epoch": 0.581649286559632, "grad_norm": 8.187582466933206, "learning_rate": 8.896845748619768e-07, "loss": -1.1539, "step": 6512 }, { "epoch": 0.5818279257754059, "grad_norm": 4.6290479936103965, "learning_rate": 8.890647614474222e-07, "loss": -0.0854, "step": 6514 }, { "epoch": 0.5820065649911796, "grad_norm": 8.731606906376424, "learning_rate": 8.884449911787274e-07, "loss": 0.1862, "step": 6516 }, { "epoch": 0.5821852042069535, "grad_norm": 5.359301864080518, "learning_rate": 8.878252642969396e-07, "loss": -0.1071, "step": 6518 }, { "epoch": 0.5823638434227274, "grad_norm": 3.7432370555676546, "learning_rate": 8.87205581043088e-07, "loss": 0.2743, "step": 6520 }, { "epoch": 0.5825424826385012, "grad_norm": 13.569167787787318, "learning_rate": 8.865859416581847e-07, "loss": 0.1657, "step": 6522 }, { "epoch": 0.582721121854275, "grad_norm": 3.774909764910326, "learning_rate": 8.85966346383225e-07, "loss": -0.3642, "step": 6524 }, { "epoch": 0.5828997610700489, "grad_norm": 7.879520390433161, "learning_rate": 8.853467954591874e-07, "loss": 0.2417, "step": 6526 }, { "epoch": 0.5830784002858227, "grad_norm": 3.150810313073017, "learning_rate": 8.847272891270329e-07, "loss": 0.4209, "step": 6528 }, { "epoch": 0.5832570395015966, "grad_norm": 9.373080044286487, "learning_rate": 8.841078276277046e-07, "loss": 0.214, "step": 6530 }, { "epoch": 0.5834356787173705, "grad_norm": 20.576828807886372, "learning_rate": 8.83488411202129e-07, "loss": -0.3969, "step": 6532 }, { "epoch": 0.5836143179331442, "grad_norm": 11.682638370960586, "learning_rate": 8.828690400912147e-07, "loss": -1.5727, "step": 6534 }, { "epoch": 0.5837929571489181, "grad_norm": 9.1870756505424, "learning_rate": 8.822497145358525e-07, "loss": 0.4011, "step": 6536 }, { "epoch": 0.583971596364692, "grad_norm": 10.55019985208552, "learning_rate": 8.816304347769156e-07, "loss": -1.6511, "step": 6538 }, { "epoch": 0.5841502355804657, "grad_norm": 11.419431996558473, "learning_rate": 8.810112010552603e-07, "loss": -0.4594, "step": 6540 }, { "epoch": 0.5843288747962396, "grad_norm": 8.556543659217187, "learning_rate": 8.803920136117228e-07, "loss": -0.6404, "step": 6542 }, { "epoch": 0.5845075140120135, "grad_norm": 3.212342931566515, "learning_rate": 8.797728726871235e-07, "loss": 1.2105, "step": 6544 }, { "epoch": 0.5846861532277873, "grad_norm": 9.240575164464449, "learning_rate": 8.791537785222638e-07, "loss": 0.7065, "step": 6546 }, { "epoch": 0.5848647924435612, "grad_norm": 13.875208947610258, "learning_rate": 8.785347313579272e-07, "loss": -1.3557, "step": 6548 }, { "epoch": 0.585043431659335, "grad_norm": 11.930154987361268, "learning_rate": 8.77915731434878e-07, "loss": -1.2346, "step": 6550 }, { "epoch": 0.5852220708751089, "grad_norm": 7.387131363244601, "learning_rate": 8.772967789938635e-07, "loss": -0.2215, "step": 6552 }, { "epoch": 0.5854007100908827, "grad_norm": 15.86166807237139, "learning_rate": 8.766778742756116e-07, "loss": -0.447, "step": 6554 }, { "epoch": 0.5855793493066566, "grad_norm": 5.390536896358827, "learning_rate": 8.760590175208318e-07, "loss": -0.5422, "step": 6556 }, { "epoch": 0.5857579885224304, "grad_norm": 6.8336276789593, "learning_rate": 8.75440208970215e-07, "loss": 0.4791, "step": 6558 }, { "epoch": 0.5859366277382042, "grad_norm": 5.770961865624498, "learning_rate": 8.748214488644333e-07, "loss": 0.2803, "step": 6560 }, { "epoch": 0.5861152669539781, "grad_norm": 10.746069611023415, "learning_rate": 8.742027374441411e-07, "loss": -0.7845, "step": 6562 }, { "epoch": 0.586293906169752, "grad_norm": 5.156898966321964, "learning_rate": 8.735840749499714e-07, "loss": 0.5538, "step": 6564 }, { "epoch": 0.5864725453855257, "grad_norm": 4.913996127458163, "learning_rate": 8.7296546162254e-07, "loss": 0.3683, "step": 6566 }, { "epoch": 0.5866511846012996, "grad_norm": 8.002971574116284, "learning_rate": 8.723468977024441e-07, "loss": -0.6478, "step": 6568 }, { "epoch": 0.5868298238170735, "grad_norm": 8.514112089659905, "learning_rate": 8.717283834302593e-07, "loss": -0.0003, "step": 6570 }, { "epoch": 0.5870084630328473, "grad_norm": 7.586787544498747, "learning_rate": 8.711099190465441e-07, "loss": 0.3582, "step": 6572 }, { "epoch": 0.5871871022486211, "grad_norm": 10.252728727031533, "learning_rate": 8.704915047918374e-07, "loss": -0.5289, "step": 6574 }, { "epoch": 0.587365741464395, "grad_norm": 9.932567211877151, "learning_rate": 8.698731409066568e-07, "loss": 0.1028, "step": 6576 }, { "epoch": 0.5875443806801688, "grad_norm": 8.559824281831949, "learning_rate": 8.692548276315024e-07, "loss": -0.6383, "step": 6578 }, { "epoch": 0.5877230198959427, "grad_norm": 5.837860240072315, "learning_rate": 8.686365652068535e-07, "loss": -0.2478, "step": 6580 }, { "epoch": 0.5879016591117165, "grad_norm": 8.351798187156128, "learning_rate": 8.6801835387317e-07, "loss": 0.1446, "step": 6582 }, { "epoch": 0.5880802983274903, "grad_norm": 4.434354284989494, "learning_rate": 8.674001938708917e-07, "loss": 1.0367, "step": 6584 }, { "epoch": 0.5882589375432642, "grad_norm": 49.53616645552394, "learning_rate": 8.667820854404386e-07, "loss": -0.7646, "step": 6586 }, { "epoch": 0.5884375767590381, "grad_norm": 1.8016056316999154, "learning_rate": 8.661640288222109e-07, "loss": -0.2461, "step": 6588 }, { "epoch": 0.5886162159748118, "grad_norm": 2.9497485360766644, "learning_rate": 8.655460242565878e-07, "loss": 0.4196, "step": 6590 }, { "epoch": 0.5887948551905857, "grad_norm": 10.929912267613789, "learning_rate": 8.649280719839288e-07, "loss": -0.8016, "step": 6592 }, { "epoch": 0.5889734944063596, "grad_norm": 2.5409106292260875, "learning_rate": 8.643101722445741e-07, "loss": 0.4342, "step": 6594 }, { "epoch": 0.5891521336221334, "grad_norm": 13.806337664884083, "learning_rate": 8.63692325278841e-07, "loss": 0.7733, "step": 6596 }, { "epoch": 0.5893307728379072, "grad_norm": 14.20773633372029, "learning_rate": 8.630745313270282e-07, "loss": -0.2752, "step": 6598 }, { "epoch": 0.5895094120536811, "grad_norm": 4.149526334641085, "learning_rate": 8.624567906294138e-07, "loss": -0.0767, "step": 6600 }, { "epoch": 0.5896880512694549, "grad_norm": 4.8324526061108575, "learning_rate": 8.618391034262545e-07, "loss": 0.4086, "step": 6602 }, { "epoch": 0.5898666904852288, "grad_norm": 18.644830374852738, "learning_rate": 8.61221469957786e-07, "loss": -1.4722, "step": 6604 }, { "epoch": 0.5900453297010027, "grad_norm": 11.377349275560013, "learning_rate": 8.606038904642238e-07, "loss": -0.42, "step": 6606 }, { "epoch": 0.5902239689167764, "grad_norm": 4.302175131396449, "learning_rate": 8.59986365185762e-07, "loss": 1.8412, "step": 6608 }, { "epoch": 0.5904026081325503, "grad_norm": 25.327549056530074, "learning_rate": 8.593688943625734e-07, "loss": -0.9775, "step": 6610 }, { "epoch": 0.5905812473483242, "grad_norm": 9.556992484389621, "learning_rate": 8.587514782348103e-07, "loss": -0.7205, "step": 6612 }, { "epoch": 0.590759886564098, "grad_norm": 8.819386057689634, "learning_rate": 8.581341170426034e-07, "loss": -1.0494, "step": 6614 }, { "epoch": 0.5909385257798718, "grad_norm": 13.690153517938597, "learning_rate": 8.575168110260616e-07, "loss": -2.3121, "step": 6616 }, { "epoch": 0.5911171649956457, "grad_norm": 3.6504844135055654, "learning_rate": 8.568995604252729e-07, "loss": 0.4779, "step": 6618 }, { "epoch": 0.5912958042114195, "grad_norm": 8.493504903708269, "learning_rate": 8.562823654803033e-07, "loss": -1.4141, "step": 6620 }, { "epoch": 0.5914744434271934, "grad_norm": 4.801305060990844, "learning_rate": 8.556652264311983e-07, "loss": -0.1051, "step": 6622 }, { "epoch": 0.5916530826429672, "grad_norm": 7.323857022317517, "learning_rate": 8.550481435179797e-07, "loss": -0.1317, "step": 6624 }, { "epoch": 0.591831721858741, "grad_norm": 10.387415431811098, "learning_rate": 8.544311169806489e-07, "loss": -0.1234, "step": 6626 }, { "epoch": 0.5920103610745149, "grad_norm": 10.001405235738282, "learning_rate": 8.538141470591857e-07, "loss": -1.3852, "step": 6628 }, { "epoch": 0.5921890002902888, "grad_norm": 13.914203966504314, "learning_rate": 8.531972339935461e-07, "loss": 0.5906, "step": 6630 }, { "epoch": 0.5923676395060625, "grad_norm": 2.740129617005948, "learning_rate": 8.525803780236658e-07, "loss": -0.4009, "step": 6632 }, { "epoch": 0.5925462787218364, "grad_norm": 14.488232803572277, "learning_rate": 8.519635793894573e-07, "loss": 0.0865, "step": 6634 }, { "epoch": 0.5927249179376103, "grad_norm": 4.356875342168098, "learning_rate": 8.513468383308116e-07, "loss": -0.0143, "step": 6636 }, { "epoch": 0.592903557153384, "grad_norm": 6.457693482563731, "learning_rate": 8.507301550875959e-07, "loss": -0.0681, "step": 6638 }, { "epoch": 0.5930821963691579, "grad_norm": 12.370462221408264, "learning_rate": 8.501135298996563e-07, "loss": -0.4272, "step": 6640 }, { "epoch": 0.5932608355849318, "grad_norm": 5.905541755960793, "learning_rate": 8.494969630068162e-07, "loss": -0.6542, "step": 6642 }, { "epoch": 0.5934394748007056, "grad_norm": 8.741686986482009, "learning_rate": 8.488804546488753e-07, "loss": 1.5665, "step": 6644 }, { "epoch": 0.5936181140164795, "grad_norm": 14.24908069523012, "learning_rate": 8.482640050656112e-07, "loss": 0.4391, "step": 6646 }, { "epoch": 0.5937967532322533, "grad_norm": 6.853123764267587, "learning_rate": 8.476476144967793e-07, "loss": -0.1663, "step": 6648 }, { "epoch": 0.5939753924480271, "grad_norm": 18.804014808024128, "learning_rate": 8.470312831821106e-07, "loss": -0.0948, "step": 6650 }, { "epoch": 0.594154031663801, "grad_norm": 18.818592795808435, "learning_rate": 8.46415011361314e-07, "loss": -0.2451, "step": 6652 }, { "epoch": 0.5943326708795749, "grad_norm": 8.952366129351773, "learning_rate": 8.457987992740754e-07, "loss": -1.1997, "step": 6654 }, { "epoch": 0.5945113100953486, "grad_norm": 14.421143353089295, "learning_rate": 8.451826471600575e-07, "loss": -0.4516, "step": 6656 }, { "epoch": 0.5946899493111225, "grad_norm": 5.61629337839966, "learning_rate": 8.445665552588982e-07, "loss": -0.1967, "step": 6658 }, { "epoch": 0.5948685885268964, "grad_norm": 9.593271249760656, "learning_rate": 8.439505238102139e-07, "loss": 0.0465, "step": 6660 }, { "epoch": 0.5950472277426702, "grad_norm": 5.411339982188518, "learning_rate": 8.433345530535968e-07, "loss": -0.116, "step": 6662 }, { "epoch": 0.595225866958444, "grad_norm": 5.903781597077935, "learning_rate": 8.42718643228615e-07, "loss": -0.4113, "step": 6664 }, { "epoch": 0.5954045061742179, "grad_norm": 10.65591635978797, "learning_rate": 8.421027945748134e-07, "loss": -0.4802, "step": 6666 }, { "epoch": 0.5955831453899917, "grad_norm": 8.923191380968287, "learning_rate": 8.414870073317131e-07, "loss": -0.2098, "step": 6668 }, { "epoch": 0.5957617846057656, "grad_norm": 6.819942223983761, "learning_rate": 8.408712817388111e-07, "loss": -0.0323, "step": 6670 }, { "epoch": 0.5959404238215394, "grad_norm": 13.873112180658975, "learning_rate": 8.402556180355806e-07, "loss": -0.0326, "step": 6672 }, { "epoch": 0.5961190630373132, "grad_norm": 7.705641762892131, "learning_rate": 8.396400164614707e-07, "loss": -0.3697, "step": 6674 }, { "epoch": 0.5962977022530871, "grad_norm": 8.665308870422578, "learning_rate": 8.390244772559066e-07, "loss": 0.3044, "step": 6676 }, { "epoch": 0.596476341468861, "grad_norm": 12.525699898967098, "learning_rate": 8.384090006582884e-07, "loss": -0.829, "step": 6678 }, { "epoch": 0.5966549806846347, "grad_norm": 12.953938383878484, "learning_rate": 8.377935869079924e-07, "loss": -0.1553, "step": 6680 }, { "epoch": 0.5968336199004086, "grad_norm": 9.841277281196309, "learning_rate": 8.371782362443715e-07, "loss": -0.6762, "step": 6682 }, { "epoch": 0.5970122591161825, "grad_norm": 7.092410057662021, "learning_rate": 8.365629489067515e-07, "loss": 0.5176, "step": 6684 }, { "epoch": 0.5971908983319564, "grad_norm": 12.79755583315558, "learning_rate": 8.35947725134436e-07, "loss": 0.5159, "step": 6686 }, { "epoch": 0.5973695375477301, "grad_norm": 9.157611003664307, "learning_rate": 8.353325651667029e-07, "loss": -1.5305, "step": 6688 }, { "epoch": 0.597548176763504, "grad_norm": 34.31554695317142, "learning_rate": 8.347174692428055e-07, "loss": -0.4099, "step": 6690 }, { "epoch": 0.5977268159792779, "grad_norm": 14.850613647523385, "learning_rate": 8.341024376019716e-07, "loss": -0.0306, "step": 6692 }, { "epoch": 0.5979054551950517, "grad_norm": 18.180578009184604, "learning_rate": 8.334874704834047e-07, "loss": -0.451, "step": 6694 }, { "epoch": 0.5980840944108256, "grad_norm": 9.777849380153928, "learning_rate": 8.328725681262831e-07, "loss": -0.3089, "step": 6696 }, { "epoch": 0.5982627336265994, "grad_norm": 8.729815936163654, "learning_rate": 8.322577307697593e-07, "loss": 0.0612, "step": 6698 }, { "epoch": 0.5984413728423732, "grad_norm": 5.298170443312174, "learning_rate": 8.316429586529614e-07, "loss": 0.6521, "step": 6700 }, { "epoch": 0.5986200120581471, "grad_norm": 10.562246827471586, "learning_rate": 8.310282520149918e-07, "loss": -0.6327, "step": 6702 }, { "epoch": 0.598798651273921, "grad_norm": 8.986638641464651, "learning_rate": 8.304136110949269e-07, "loss": -0.8918, "step": 6704 }, { "epoch": 0.5989772904896947, "grad_norm": 7.220858179377275, "learning_rate": 8.297990361318182e-07, "loss": 0.0818, "step": 6706 }, { "epoch": 0.5991559297054686, "grad_norm": 12.479536472417806, "learning_rate": 8.291845273646911e-07, "loss": 0.0786, "step": 6708 }, { "epoch": 0.5993345689212425, "grad_norm": 8.315159881210803, "learning_rate": 8.285700850325465e-07, "loss": 0.3239, "step": 6710 }, { "epoch": 0.5995132081370163, "grad_norm": 30.212572888565365, "learning_rate": 8.27955709374357e-07, "loss": 0.5051, "step": 6712 }, { "epoch": 0.5996918473527901, "grad_norm": 7.759613719253136, "learning_rate": 8.273414006290715e-07, "loss": 0.3593, "step": 6714 }, { "epoch": 0.599870486568564, "grad_norm": 32.631356391083024, "learning_rate": 8.267271590356127e-07, "loss": 0.3794, "step": 6716 }, { "epoch": 0.6000491257843378, "grad_norm": 3.957754327020478, "learning_rate": 8.261129848328749e-07, "loss": 0.3155, "step": 6718 }, { "epoch": 0.6002277650001117, "grad_norm": 3.4636096154146996, "learning_rate": 8.254988782597293e-07, "loss": -1.1651, "step": 6720 }, { "epoch": 0.6004064042158855, "grad_norm": 4.7368731564625035, "learning_rate": 8.24884839555019e-07, "loss": 0.0516, "step": 6722 }, { "epoch": 0.6005850434316593, "grad_norm": 12.731177105177606, "learning_rate": 8.242708689575608e-07, "loss": 0.2724, "step": 6724 }, { "epoch": 0.6007636826474332, "grad_norm": 12.382426798015127, "learning_rate": 8.236569667061454e-07, "loss": -0.9093, "step": 6726 }, { "epoch": 0.6009423218632071, "grad_norm": 17.671173000811503, "learning_rate": 8.23043133039537e-07, "loss": -0.3908, "step": 6728 }, { "epoch": 0.6011209610789808, "grad_norm": 9.170667110625391, "learning_rate": 8.224293681964728e-07, "loss": 0.0807, "step": 6730 }, { "epoch": 0.6012996002947547, "grad_norm": 4.093993900400751, "learning_rate": 8.218156724156632e-07, "loss": -0.4948, "step": 6732 }, { "epoch": 0.6014782395105286, "grad_norm": 8.482751286992485, "learning_rate": 8.21202045935792e-07, "loss": 0.1192, "step": 6734 }, { "epoch": 0.6016568787263024, "grad_norm": 9.386540748156206, "learning_rate": 8.205884889955161e-07, "loss": -0.1144, "step": 6736 }, { "epoch": 0.6018355179420762, "grad_norm": 5.408715716668362, "learning_rate": 8.19975001833465e-07, "loss": 0.1111, "step": 6738 }, { "epoch": 0.6020141571578501, "grad_norm": 9.021323903571167, "learning_rate": 8.193615846882409e-07, "loss": -0.0193, "step": 6740 }, { "epoch": 0.6021927963736239, "grad_norm": 9.391728239582525, "learning_rate": 8.187482377984203e-07, "loss": 0.4406, "step": 6742 }, { "epoch": 0.6023714355893978, "grad_norm": 9.854996520592914, "learning_rate": 8.181349614025498e-07, "loss": -0.2232, "step": 6744 }, { "epoch": 0.6025500748051716, "grad_norm": 3.9387794716766593, "learning_rate": 8.175217557391507e-07, "loss": 1.2653, "step": 6746 }, { "epoch": 0.6027287140209454, "grad_norm": 11.590049132406712, "learning_rate": 8.169086210467162e-07, "loss": -0.1865, "step": 6748 }, { "epoch": 0.6029073532367193, "grad_norm": 9.929873436988478, "learning_rate": 8.162955575637116e-07, "loss": -1.4693, "step": 6750 }, { "epoch": 0.6030859924524932, "grad_norm": 23.307184590070612, "learning_rate": 8.156825655285745e-07, "loss": -0.7727, "step": 6752 }, { "epoch": 0.6032646316682669, "grad_norm": 6.5868132202482785, "learning_rate": 8.15069645179715e-07, "loss": 0.7575, "step": 6754 }, { "epoch": 0.6034432708840408, "grad_norm": 2.531375834786956, "learning_rate": 8.144567967555154e-07, "loss": -0.3076, "step": 6756 }, { "epoch": 0.6036219100998147, "grad_norm": 14.831278683666426, "learning_rate": 8.138440204943296e-07, "loss": -0.0923, "step": 6758 }, { "epoch": 0.6038005493155885, "grad_norm": 7.160531042001479, "learning_rate": 8.132313166344836e-07, "loss": -0.6547, "step": 6760 }, { "epoch": 0.6039791885313623, "grad_norm": 14.652662932240657, "learning_rate": 8.126186854142751e-07, "loss": 0.3381, "step": 6762 }, { "epoch": 0.6041578277471362, "grad_norm": 5.810188736336839, "learning_rate": 8.120061270719749e-07, "loss": 0.2953, "step": 6764 }, { "epoch": 0.60433646696291, "grad_norm": 9.74077463191134, "learning_rate": 8.113936418458231e-07, "loss": 0.0052, "step": 6766 }, { "epoch": 0.6045151061786839, "grad_norm": 10.18634251296823, "learning_rate": 8.107812299740326e-07, "loss": -0.312, "step": 6768 }, { "epoch": 0.6046937453944577, "grad_norm": 6.575745213271518, "learning_rate": 8.10168891694789e-07, "loss": 0.2889, "step": 6770 }, { "epoch": 0.6048723846102315, "grad_norm": 9.10693117210925, "learning_rate": 8.095566272462463e-07, "loss": -0.0561, "step": 6772 }, { "epoch": 0.6050510238260054, "grad_norm": 7.334494874459765, "learning_rate": 8.089444368665326e-07, "loss": -1.0583, "step": 6774 }, { "epoch": 0.6052296630417793, "grad_norm": 10.966747307601697, "learning_rate": 8.083323207937462e-07, "loss": -1.0663, "step": 6776 }, { "epoch": 0.605408302257553, "grad_norm": 6.49527040219699, "learning_rate": 8.077202792659557e-07, "loss": -0.3815, "step": 6778 }, { "epoch": 0.6055869414733269, "grad_norm": 6.752768054939293, "learning_rate": 8.07108312521202e-07, "loss": -0.5754, "step": 6780 }, { "epoch": 0.6057655806891008, "grad_norm": 11.442379770378638, "learning_rate": 8.064964207974958e-07, "loss": -0.0298, "step": 6782 }, { "epoch": 0.6059442199048746, "grad_norm": 16.172136358093375, "learning_rate": 8.058846043328198e-07, "loss": 0.0177, "step": 6784 }, { "epoch": 0.6061228591206484, "grad_norm": 10.62396558326294, "learning_rate": 8.052728633651262e-07, "loss": -0.1852, "step": 6786 }, { "epoch": 0.6063014983364223, "grad_norm": 11.534831496232806, "learning_rate": 8.046611981323386e-07, "loss": -1.2326, "step": 6788 }, { "epoch": 0.6064801375521961, "grad_norm": 8.598547134102528, "learning_rate": 8.040496088723512e-07, "loss": -0.0948, "step": 6790 }, { "epoch": 0.60665877676797, "grad_norm": 5.250106810436916, "learning_rate": 8.034380958230278e-07, "loss": -0.3433, "step": 6792 }, { "epoch": 0.6068374159837439, "grad_norm": 10.807296294552222, "learning_rate": 8.028266592222035e-07, "loss": -0.1314, "step": 6794 }, { "epoch": 0.6070160551995176, "grad_norm": 12.928527374312953, "learning_rate": 8.02215299307684e-07, "loss": -1.0108, "step": 6796 }, { "epoch": 0.6071946944152915, "grad_norm": 6.019004288765196, "learning_rate": 8.016040163172435e-07, "loss": 0.415, "step": 6798 }, { "epoch": 0.6073733336310654, "grad_norm": 6.332197823707378, "learning_rate": 8.009928104886275e-07, "loss": -0.1089, "step": 6800 }, { "epoch": 0.6075519728468392, "grad_norm": 5.819276858364523, "learning_rate": 8.003816820595515e-07, "loss": -0.2826, "step": 6802 }, { "epoch": 0.607730612062613, "grad_norm": 7.8581881830989575, "learning_rate": 7.997706312677011e-07, "loss": -1.0451, "step": 6804 }, { "epoch": 0.6079092512783869, "grad_norm": 7.414400790408524, "learning_rate": 7.991596583507305e-07, "loss": -0.2472, "step": 6806 }, { "epoch": 0.6080878904941607, "grad_norm": 13.773316190794487, "learning_rate": 7.985487635462647e-07, "loss": -1.4069, "step": 6808 }, { "epoch": 0.6082665297099346, "grad_norm": 5.807936979514138, "learning_rate": 7.979379470918983e-07, "loss": -0.565, "step": 6810 }, { "epoch": 0.6084451689257084, "grad_norm": 11.509978899103704, "learning_rate": 7.973272092251946e-07, "loss": -0.4129, "step": 6812 }, { "epoch": 0.6086238081414823, "grad_norm": 6.546063284069516, "learning_rate": 7.967165501836872e-07, "loss": 0.886, "step": 6814 }, { "epoch": 0.6088024473572561, "grad_norm": 12.895990833970213, "learning_rate": 7.961059702048787e-07, "loss": -0.8611, "step": 6816 }, { "epoch": 0.60898108657303, "grad_norm": 9.78310271927664, "learning_rate": 7.954954695262413e-07, "loss": -0.6814, "step": 6818 }, { "epoch": 0.6091597257888038, "grad_norm": 23.63355758122997, "learning_rate": 7.948850483852152e-07, "loss": 0.1288, "step": 6820 }, { "epoch": 0.6093383650045776, "grad_norm": 6.120182244573042, "learning_rate": 7.942747070192108e-07, "loss": 0.2486, "step": 6822 }, { "epoch": 0.6095170042203515, "grad_norm": 9.19300397157993, "learning_rate": 7.936644456656081e-07, "loss": -0.2921, "step": 6824 }, { "epoch": 0.6096956434361254, "grad_norm": 8.489520176065561, "learning_rate": 7.930542645617538e-07, "loss": -0.4633, "step": 6826 }, { "epoch": 0.6098742826518991, "grad_norm": 7.298446322717318, "learning_rate": 7.924441639449649e-07, "loss": -0.7259, "step": 6828 }, { "epoch": 0.610052921867673, "grad_norm": 4.972312945361678, "learning_rate": 7.918341440525277e-07, "loss": -1.0784, "step": 6830 }, { "epoch": 0.6102315610834469, "grad_norm": 8.28203259494966, "learning_rate": 7.91224205121695e-07, "loss": -1.1286, "step": 6832 }, { "epoch": 0.6104102002992207, "grad_norm": 8.267600503085117, "learning_rate": 7.906143473896901e-07, "loss": -0.68, "step": 6834 }, { "epoch": 0.6105888395149945, "grad_norm": 7.450301237224615, "learning_rate": 7.900045710937038e-07, "loss": -0.1043, "step": 6836 }, { "epoch": 0.6107674787307684, "grad_norm": 14.1226680484821, "learning_rate": 7.893948764708955e-07, "loss": 0.9556, "step": 6838 }, { "epoch": 0.6109461179465422, "grad_norm": 5.0596767135296865, "learning_rate": 7.887852637583926e-07, "loss": 0.3931, "step": 6840 }, { "epoch": 0.6111247571623161, "grad_norm": 7.490632628754068, "learning_rate": 7.881757331932908e-07, "loss": -0.6705, "step": 6842 }, { "epoch": 0.61130339637809, "grad_norm": 9.228564608004925, "learning_rate": 7.87566285012654e-07, "loss": -0.3139, "step": 6844 }, { "epoch": 0.6114820355938637, "grad_norm": 24.593083645277055, "learning_rate": 7.869569194535135e-07, "loss": -0.8555, "step": 6846 }, { "epoch": 0.6116606748096376, "grad_norm": 11.528454945806919, "learning_rate": 7.863476367528689e-07, "loss": 0.36, "step": 6848 }, { "epoch": 0.6118393140254115, "grad_norm": 7.810146129769876, "learning_rate": 7.857384371476882e-07, "loss": -0.1869, "step": 6850 }, { "epoch": 0.6120179532411852, "grad_norm": 3.9518577745922827, "learning_rate": 7.851293208749055e-07, "loss": 0.3176, "step": 6852 }, { "epoch": 0.6121965924569591, "grad_norm": 5.484966365742382, "learning_rate": 7.845202881714236e-07, "loss": 0.0589, "step": 6854 }, { "epoch": 0.612375231672733, "grad_norm": 12.909820579978119, "learning_rate": 7.839113392741129e-07, "loss": -1.5152, "step": 6856 }, { "epoch": 0.6125538708885068, "grad_norm": 9.285105305918302, "learning_rate": 7.833024744198112e-07, "loss": -0.6474, "step": 6858 }, { "epoch": 0.6127325101042806, "grad_norm": 10.841116901584824, "learning_rate": 7.826936938453224e-07, "loss": 0.4286, "step": 6860 }, { "epoch": 0.6129111493200545, "grad_norm": 9.589615418615761, "learning_rate": 7.82084997787419e-07, "loss": -0.2209, "step": 6862 }, { "epoch": 0.6130897885358283, "grad_norm": 10.589343029244809, "learning_rate": 7.814763864828405e-07, "loss": 0.1427, "step": 6864 }, { "epoch": 0.6132684277516022, "grad_norm": 5.682618110526863, "learning_rate": 7.808678601682925e-07, "loss": -0.1882, "step": 6866 }, { "epoch": 0.613447066967376, "grad_norm": 10.598472240401746, "learning_rate": 7.802594190804485e-07, "loss": 0.0618, "step": 6868 }, { "epoch": 0.6136257061831498, "grad_norm": 6.834787570691177, "learning_rate": 7.796510634559486e-07, "loss": -0.0537, "step": 6870 }, { "epoch": 0.6138043453989237, "grad_norm": 10.778121007673587, "learning_rate": 7.79042793531399e-07, "loss": 0.6809, "step": 6872 }, { "epoch": 0.6139829846146976, "grad_norm": 13.31269026785979, "learning_rate": 7.784346095433735e-07, "loss": -0.2978, "step": 6874 }, { "epoch": 0.6141616238304713, "grad_norm": 6.018761547865837, "learning_rate": 7.778265117284121e-07, "loss": 0.1849, "step": 6876 }, { "epoch": 0.6143402630462452, "grad_norm": 7.298336822004836, "learning_rate": 7.772185003230217e-07, "loss": 0.1384, "step": 6878 }, { "epoch": 0.6145189022620191, "grad_norm": 6.29827607803721, "learning_rate": 7.766105755636743e-07, "loss": 0.0224, "step": 6880 }, { "epoch": 0.6146975414777929, "grad_norm": 8.774384444945145, "learning_rate": 7.760027376868094e-07, "loss": -0.3536, "step": 6882 }, { "epoch": 0.6148761806935668, "grad_norm": 5.706593417710657, "learning_rate": 7.75394986928833e-07, "loss": 0.2921, "step": 6884 }, { "epoch": 0.6150548199093406, "grad_norm": 12.096999724880133, "learning_rate": 7.747873235261156e-07, "loss": 0.8224, "step": 6886 }, { "epoch": 0.6152334591251144, "grad_norm": 2.7054086200973, "learning_rate": 7.741797477149955e-07, "loss": 0.0366, "step": 6888 }, { "epoch": 0.6154120983408883, "grad_norm": 5.331299135578725, "learning_rate": 7.735722597317759e-07, "loss": -1.295, "step": 6890 }, { "epoch": 0.6155907375566622, "grad_norm": 7.463400996271897, "learning_rate": 7.729648598127263e-07, "loss": -0.3416, "step": 6892 }, { "epoch": 0.6157693767724359, "grad_norm": 1.7824900811174873, "learning_rate": 7.723575481940814e-07, "loss": 1.1979, "step": 6894 }, { "epoch": 0.6159480159882098, "grad_norm": 7.014453407793613, "learning_rate": 7.717503251120421e-07, "loss": -0.4571, "step": 6896 }, { "epoch": 0.6161266552039837, "grad_norm": 11.536531226255793, "learning_rate": 7.711431908027748e-07, "loss": -0.3053, "step": 6898 }, { "epoch": 0.6163052944197575, "grad_norm": 4.590052978941235, "learning_rate": 7.705361455024109e-07, "loss": 0.3818, "step": 6900 }, { "epoch": 0.6164839336355313, "grad_norm": 11.81766393100315, "learning_rate": 7.699291894470478e-07, "loss": 0.2052, "step": 6902 }, { "epoch": 0.6166625728513052, "grad_norm": 15.859331704066516, "learning_rate": 7.693223228727481e-07, "loss": -0.1365, "step": 6904 }, { "epoch": 0.616841212067079, "grad_norm": 3.623282584776952, "learning_rate": 7.68715546015539e-07, "loss": 0.8205, "step": 6906 }, { "epoch": 0.6170198512828529, "grad_norm": 19.22527836274419, "learning_rate": 7.681088591114134e-07, "loss": 0.8173, "step": 6908 }, { "epoch": 0.6171984904986267, "grad_norm": 4.035089128495471, "learning_rate": 7.675022623963288e-07, "loss": 0.3532, "step": 6910 }, { "epoch": 0.6173771297144005, "grad_norm": 8.79100803119812, "learning_rate": 7.668957561062088e-07, "loss": 0.0709, "step": 6912 }, { "epoch": 0.6175557689301744, "grad_norm": 3.9200568981323123, "learning_rate": 7.662893404769397e-07, "loss": -0.6944, "step": 6914 }, { "epoch": 0.6177344081459483, "grad_norm": 7.378996978250452, "learning_rate": 7.656830157443743e-07, "loss": -0.4761, "step": 6916 }, { "epoch": 0.617913047361722, "grad_norm": 10.12859742105359, "learning_rate": 7.6507678214433e-07, "loss": -0.1319, "step": 6918 }, { "epoch": 0.6180916865774959, "grad_norm": 10.52102771743127, "learning_rate": 7.644706399125871e-07, "loss": -0.0332, "step": 6920 }, { "epoch": 0.6182703257932698, "grad_norm": 6.107028988802031, "learning_rate": 7.638645892848922e-07, "loss": 0.5958, "step": 6922 }, { "epoch": 0.6184489650090436, "grad_norm": 8.258979765390924, "learning_rate": 7.632586304969557e-07, "loss": -0.0383, "step": 6924 }, { "epoch": 0.6186276042248174, "grad_norm": 9.894750617510017, "learning_rate": 7.626527637844518e-07, "loss": 0.1021, "step": 6926 }, { "epoch": 0.6188062434405913, "grad_norm": 3.8399794778672836, "learning_rate": 7.620469893830193e-07, "loss": 0.3836, "step": 6928 }, { "epoch": 0.6189848826563651, "grad_norm": 3.2912364308345192, "learning_rate": 7.61441307528261e-07, "loss": 0.3087, "step": 6930 }, { "epoch": 0.619163521872139, "grad_norm": 3.2689082966215484, "learning_rate": 7.608357184557443e-07, "loss": -0.3114, "step": 6932 }, { "epoch": 0.6193421610879128, "grad_norm": 12.172069308060745, "learning_rate": 7.602302224009994e-07, "loss": 0.5448, "step": 6934 }, { "epoch": 0.6195208003036866, "grad_norm": 10.20814080089219, "learning_rate": 7.596248195995206e-07, "loss": -1.4234, "step": 6936 }, { "epoch": 0.6196994395194605, "grad_norm": 12.678101915842, "learning_rate": 7.590195102867673e-07, "loss": -0.9356, "step": 6938 }, { "epoch": 0.6198780787352344, "grad_norm": 4.1047720422304526, "learning_rate": 7.584142946981604e-07, "loss": -0.3585, "step": 6940 }, { "epoch": 0.6200567179510081, "grad_norm": 9.192894837357962, "learning_rate": 7.578091730690857e-07, "loss": -0.195, "step": 6942 }, { "epoch": 0.620235357166782, "grad_norm": 4.991900177568908, "learning_rate": 7.572041456348924e-07, "loss": -0.1669, "step": 6944 }, { "epoch": 0.6204139963825559, "grad_norm": 2.7353637651459337, "learning_rate": 7.56599212630893e-07, "loss": -0.1867, "step": 6946 }, { "epoch": 0.6205926355983298, "grad_norm": 9.568762230943154, "learning_rate": 7.559943742923625e-07, "loss": -0.4471, "step": 6948 }, { "epoch": 0.6207712748141035, "grad_norm": 8.017204857708748, "learning_rate": 7.5538963085454e-07, "loss": -0.4842, "step": 6950 }, { "epoch": 0.6209499140298774, "grad_norm": 14.536752360463913, "learning_rate": 7.547849825526276e-07, "loss": -0.711, "step": 6952 }, { "epoch": 0.6211285532456513, "grad_norm": 4.699581801294203, "learning_rate": 7.541804296217897e-07, "loss": -0.366, "step": 6954 }, { "epoch": 0.6213071924614251, "grad_norm": 10.485346059841273, "learning_rate": 7.535759722971545e-07, "loss": 0.8745, "step": 6956 }, { "epoch": 0.621485831677199, "grad_norm": 7.5012513706210475, "learning_rate": 7.529716108138123e-07, "loss": 0.7395, "step": 6958 }, { "epoch": 0.6216644708929728, "grad_norm": 20.264656696701188, "learning_rate": 7.523673454068165e-07, "loss": -0.2688, "step": 6960 }, { "epoch": 0.6218431101087466, "grad_norm": 14.353208793197876, "learning_rate": 7.517631763111832e-07, "loss": -0.3808, "step": 6962 }, { "epoch": 0.6220217493245205, "grad_norm": 5.501869404148704, "learning_rate": 7.511591037618902e-07, "loss": -0.5493, "step": 6964 }, { "epoch": 0.6222003885402944, "grad_norm": 18.466699270134708, "learning_rate": 7.5055512799388e-07, "loss": -0.5463, "step": 6966 }, { "epoch": 0.6223790277560681, "grad_norm": 5.825047557456579, "learning_rate": 7.499512492420545e-07, "loss": 0.4541, "step": 6968 }, { "epoch": 0.622557666971842, "grad_norm": 14.755243436226483, "learning_rate": 7.493474677412793e-07, "loss": -0.5556, "step": 6970 }, { "epoch": 0.6227363061876159, "grad_norm": 8.299568845282153, "learning_rate": 7.487437837263833e-07, "loss": 0.1039, "step": 6972 }, { "epoch": 0.6229149454033897, "grad_norm": 16.948092931179787, "learning_rate": 7.481401974321549e-07, "loss": 0.7526, "step": 6974 }, { "epoch": 0.6230935846191635, "grad_norm": 13.600866940859643, "learning_rate": 7.475367090933469e-07, "loss": -0.9984, "step": 6976 }, { "epoch": 0.6232722238349374, "grad_norm": 6.436948585967475, "learning_rate": 7.469333189446729e-07, "loss": -0.0063, "step": 6978 }, { "epoch": 0.6234508630507112, "grad_norm": 8.243318568719191, "learning_rate": 7.463300272208082e-07, "loss": -0.0238, "step": 6980 }, { "epoch": 0.6236295022664851, "grad_norm": 6.243234558518969, "learning_rate": 7.457268341563901e-07, "loss": 0.4286, "step": 6982 }, { "epoch": 0.6238081414822589, "grad_norm": 8.60788432818552, "learning_rate": 7.451237399860175e-07, "loss": -0.319, "step": 6984 }, { "epoch": 0.6239867806980327, "grad_norm": 7.003671228366508, "learning_rate": 7.445207449442512e-07, "loss": -1.1172, "step": 6986 }, { "epoch": 0.6241654199138066, "grad_norm": 5.426408175342582, "learning_rate": 7.439178492656123e-07, "loss": 0.8504, "step": 6988 }, { "epoch": 0.6243440591295805, "grad_norm": 4.896738150022696, "learning_rate": 7.433150531845847e-07, "loss": 1.2874, "step": 6990 }, { "epoch": 0.6245226983453542, "grad_norm": 9.127674703647445, "learning_rate": 7.427123569356126e-07, "loss": 0.2792, "step": 6992 }, { "epoch": 0.6247013375611281, "grad_norm": 9.170686245099365, "learning_rate": 7.421097607531018e-07, "loss": -0.1411, "step": 6994 }, { "epoch": 0.624879976776902, "grad_norm": 6.006350018068443, "learning_rate": 7.415072648714186e-07, "loss": -0.1984, "step": 6996 }, { "epoch": 0.6250586159926758, "grad_norm": 14.991739160017165, "learning_rate": 7.409048695248918e-07, "loss": -0.1789, "step": 6998 }, { "epoch": 0.6252372552084496, "grad_norm": 6.193621757095819, "learning_rate": 7.403025749478088e-07, "loss": -0.588, "step": 7000 }, { "epoch": 0.6254158944242235, "grad_norm": 5.510022654685856, "learning_rate": 7.397003813744195e-07, "loss": 0.8201, "step": 7002 }, { "epoch": 0.6255945336399973, "grad_norm": 16.778347339744908, "learning_rate": 7.390982890389344e-07, "loss": 0.0289, "step": 7004 }, { "epoch": 0.6257731728557712, "grad_norm": 5.379700136017508, "learning_rate": 7.384962981755241e-07, "loss": 0.9652, "step": 7006 }, { "epoch": 0.625951812071545, "grad_norm": 7.3818772142358675, "learning_rate": 7.378944090183196e-07, "loss": -0.3026, "step": 7008 }, { "epoch": 0.6261304512873188, "grad_norm": 7.6108146417922145, "learning_rate": 7.372926218014131e-07, "loss": -0.5368, "step": 7010 }, { "epoch": 0.6263090905030927, "grad_norm": 10.987837917092023, "learning_rate": 7.366909367588565e-07, "loss": 0.4067, "step": 7012 }, { "epoch": 0.6264877297188666, "grad_norm": 7.368784467440196, "learning_rate": 7.360893541246622e-07, "loss": 0.1144, "step": 7014 }, { "epoch": 0.6266663689346403, "grad_norm": 12.30861854369131, "learning_rate": 7.354878741328026e-07, "loss": 0.2275, "step": 7016 }, { "epoch": 0.6268450081504142, "grad_norm": 12.85745851643913, "learning_rate": 7.348864970172106e-07, "loss": 0.1659, "step": 7018 }, { "epoch": 0.6270236473661881, "grad_norm": 5.838029478839652, "learning_rate": 7.342852230117788e-07, "loss": 1.585, "step": 7020 }, { "epoch": 0.6272022865819619, "grad_norm": 20.110203581414524, "learning_rate": 7.336840523503595e-07, "loss": 0.4064, "step": 7022 }, { "epoch": 0.6273809257977357, "grad_norm": 8.22361466356283, "learning_rate": 7.330829852667648e-07, "loss": 0.7614, "step": 7024 }, { "epoch": 0.6275595650135096, "grad_norm": 12.673626309506203, "learning_rate": 7.324820219947679e-07, "loss": 0.1335, "step": 7026 }, { "epoch": 0.6277382042292834, "grad_norm": 5.620692320295056, "learning_rate": 7.318811627680988e-07, "loss": 1.4457, "step": 7028 }, { "epoch": 0.6279168434450573, "grad_norm": 8.303003838473112, "learning_rate": 7.312804078204497e-07, "loss": 0.8258, "step": 7030 }, { "epoch": 0.6280954826608312, "grad_norm": 2.407636664008439, "learning_rate": 7.306797573854716e-07, "loss": 0.3221, "step": 7032 }, { "epoch": 0.6282741218766049, "grad_norm": 16.79493084678938, "learning_rate": 7.300792116967733e-07, "loss": -1.5812, "step": 7034 }, { "epoch": 0.6284527610923788, "grad_norm": 17.715939791754717, "learning_rate": 7.294787709879248e-07, "loss": -1.7552, "step": 7036 }, { "epoch": 0.6286314003081527, "grad_norm": 10.03143937832336, "learning_rate": 7.288784354924545e-07, "loss": -0.3496, "step": 7038 }, { "epoch": 0.6288100395239264, "grad_norm": 12.989895341435409, "learning_rate": 7.2827820544385e-07, "loss": -1.0858, "step": 7040 }, { "epoch": 0.6289886787397003, "grad_norm": 4.581108709344926, "learning_rate": 7.276780810755574e-07, "loss": 1.1851, "step": 7042 }, { "epoch": 0.6291673179554742, "grad_norm": 9.075818607620885, "learning_rate": 7.270780626209822e-07, "loss": -0.2987, "step": 7044 }, { "epoch": 0.629345957171248, "grad_norm": 7.396092914449488, "learning_rate": 7.26478150313489e-07, "loss": 0.08, "step": 7046 }, { "epoch": 0.6295245963870219, "grad_norm": 4.241513699294594, "learning_rate": 7.258783443864001e-07, "loss": 0.0783, "step": 7048 }, { "epoch": 0.6297032356027957, "grad_norm": 1.6798146754180951, "learning_rate": 7.252786450729975e-07, "loss": 0.1004, "step": 7050 }, { "epoch": 0.6298818748185695, "grad_norm": 13.519623234481934, "learning_rate": 7.246790526065212e-07, "loss": -1.2662, "step": 7052 }, { "epoch": 0.6300605140343434, "grad_norm": 14.104366389353235, "learning_rate": 7.240795672201694e-07, "loss": 0.1979, "step": 7054 }, { "epoch": 0.6302391532501173, "grad_norm": 10.778928645017324, "learning_rate": 7.23480189147099e-07, "loss": -0.043, "step": 7056 }, { "epoch": 0.630417792465891, "grad_norm": 7.491657958345653, "learning_rate": 7.228809186204254e-07, "loss": -1.0219, "step": 7058 }, { "epoch": 0.6305964316816649, "grad_norm": 3.807204273784665, "learning_rate": 7.222817558732224e-07, "loss": 0.9187, "step": 7060 }, { "epoch": 0.6307750708974388, "grad_norm": 6.776648019044773, "learning_rate": 7.2168270113852e-07, "loss": -0.6149, "step": 7062 }, { "epoch": 0.6309537101132126, "grad_norm": 7.735118976607625, "learning_rate": 7.210837546493087e-07, "loss": 0.0579, "step": 7064 }, { "epoch": 0.6311323493289864, "grad_norm": 26.284499975552965, "learning_rate": 7.204849166385356e-07, "loss": 0.1608, "step": 7066 }, { "epoch": 0.6313109885447603, "grad_norm": 6.9049228411430255, "learning_rate": 7.198861873391056e-07, "loss": -0.5016, "step": 7068 }, { "epoch": 0.6314896277605341, "grad_norm": 7.131502898242872, "learning_rate": 7.192875669838813e-07, "loss": 0.1259, "step": 7070 }, { "epoch": 0.631668266976308, "grad_norm": 7.348552026653124, "learning_rate": 7.186890558056836e-07, "loss": 0.9898, "step": 7072 }, { "epoch": 0.6318469061920818, "grad_norm": 15.226197725235567, "learning_rate": 7.180906540372903e-07, "loss": -0.6186, "step": 7074 }, { "epoch": 0.6320255454078556, "grad_norm": 7.126773278938005, "learning_rate": 7.174923619114366e-07, "loss": 0.5772, "step": 7076 }, { "epoch": 0.6322041846236295, "grad_norm": 9.234483230911852, "learning_rate": 7.168941796608154e-07, "loss": -0.3912, "step": 7078 }, { "epoch": 0.6323828238394034, "grad_norm": 5.616856253543759, "learning_rate": 7.162961075180771e-07, "loss": 0.3458, "step": 7080 }, { "epoch": 0.6325614630551772, "grad_norm": 14.922482910555479, "learning_rate": 7.156981457158284e-07, "loss": -0.1516, "step": 7082 }, { "epoch": 0.632740102270951, "grad_norm": 22.45917112619656, "learning_rate": 7.151002944866336e-07, "loss": -0.484, "step": 7084 }, { "epoch": 0.6329187414867249, "grad_norm": 5.923938562789382, "learning_rate": 7.145025540630148e-07, "loss": 0.0142, "step": 7086 }, { "epoch": 0.6330973807024988, "grad_norm": 4.633113651876679, "learning_rate": 7.139049246774489e-07, "loss": 0.6396, "step": 7088 }, { "epoch": 0.6332760199182725, "grad_norm": 4.279790030703514, "learning_rate": 7.133074065623718e-07, "loss": 0.0658, "step": 7090 }, { "epoch": 0.6334546591340464, "grad_norm": 5.071653967440415, "learning_rate": 7.127099999501753e-07, "loss": -0.9141, "step": 7092 }, { "epoch": 0.6336332983498203, "grad_norm": 13.830708511309343, "learning_rate": 7.121127050732076e-07, "loss": -0.9509, "step": 7094 }, { "epoch": 0.6338119375655941, "grad_norm": 8.012999463396376, "learning_rate": 7.115155221637735e-07, "loss": -0.4622, "step": 7096 }, { "epoch": 0.6339905767813679, "grad_norm": 14.50740230639819, "learning_rate": 7.109184514541342e-07, "loss": -0.8298, "step": 7098 }, { "epoch": 0.6341692159971418, "grad_norm": 10.06365353624411, "learning_rate": 7.103214931765079e-07, "loss": 0.3943, "step": 7100 }, { "epoch": 0.6343478552129156, "grad_norm": 13.806221065603168, "learning_rate": 7.097246475630682e-07, "loss": 0.0166, "step": 7102 }, { "epoch": 0.6345264944286895, "grad_norm": 3.866485100352774, "learning_rate": 7.091279148459455e-07, "loss": 0.0182, "step": 7104 }, { "epoch": 0.6347051336444633, "grad_norm": 7.935705748086609, "learning_rate": 7.085312952572262e-07, "loss": -0.6371, "step": 7106 }, { "epoch": 0.6348837728602371, "grad_norm": 10.412190159541701, "learning_rate": 7.079347890289522e-07, "loss": -0.0682, "step": 7108 }, { "epoch": 0.635062412076011, "grad_norm": 13.039588510772624, "learning_rate": 7.073383963931219e-07, "loss": -0.1893, "step": 7110 }, { "epoch": 0.6352410512917849, "grad_norm": 5.073340218765325, "learning_rate": 7.067421175816892e-07, "loss": -0.4261, "step": 7112 }, { "epoch": 0.6354196905075586, "grad_norm": 11.261660488056021, "learning_rate": 7.061459528265644e-07, "loss": -0.3455, "step": 7114 }, { "epoch": 0.6355983297233325, "grad_norm": 3.075158563262874, "learning_rate": 7.055499023596119e-07, "loss": 0.2384, "step": 7116 }, { "epoch": 0.6357769689391064, "grad_norm": 10.666615923125049, "learning_rate": 7.049539664126534e-07, "loss": -0.9853, "step": 7118 }, { "epoch": 0.6359556081548802, "grad_norm": 6.722340462785816, "learning_rate": 7.043581452174652e-07, "loss": 0.1809, "step": 7120 }, { "epoch": 0.636134247370654, "grad_norm": 10.339654117310937, "learning_rate": 7.037624390057787e-07, "loss": -0.5528, "step": 7122 }, { "epoch": 0.6363128865864279, "grad_norm": 18.763871008526184, "learning_rate": 7.031668480092812e-07, "loss": -0.3263, "step": 7124 }, { "epoch": 0.6364915258022017, "grad_norm": 11.76199507729204, "learning_rate": 7.02571372459615e-07, "loss": -0.7656, "step": 7126 }, { "epoch": 0.6366701650179756, "grad_norm": 6.337372679625899, "learning_rate": 7.019760125883772e-07, "loss": 0.9002, "step": 7128 }, { "epoch": 0.6368488042337495, "grad_norm": 7.0525236983038, "learning_rate": 7.013807686271202e-07, "loss": 0.1738, "step": 7130 }, { "epoch": 0.6370274434495232, "grad_norm": 6.322399271158482, "learning_rate": 7.007856408073513e-07, "loss": 0.1986, "step": 7132 }, { "epoch": 0.6372060826652971, "grad_norm": 8.815927258393922, "learning_rate": 7.001906293605329e-07, "loss": -0.2934, "step": 7134 }, { "epoch": 0.637384721881071, "grad_norm": 13.631669678102188, "learning_rate": 6.995957345180812e-07, "loss": -1.1274, "step": 7136 }, { "epoch": 0.6375633610968447, "grad_norm": 9.8612691326345, "learning_rate": 6.990009565113677e-07, "loss": -0.5939, "step": 7138 }, { "epoch": 0.6377420003126186, "grad_norm": 9.022843087835193, "learning_rate": 6.984062955717195e-07, "loss": 0.8339, "step": 7140 }, { "epoch": 0.6379206395283925, "grad_norm": 7.496764438794996, "learning_rate": 6.978117519304159e-07, "loss": -0.2991, "step": 7142 }, { "epoch": 0.6380992787441663, "grad_norm": 11.915319477749472, "learning_rate": 6.97217325818692e-07, "loss": 0.1431, "step": 7144 }, { "epoch": 0.6382779179599402, "grad_norm": 8.311290681681767, "learning_rate": 6.966230174677374e-07, "loss": -0.4376, "step": 7146 }, { "epoch": 0.638456557175714, "grad_norm": 19.39381725460664, "learning_rate": 6.960288271086954e-07, "loss": -1.5328, "step": 7148 }, { "epoch": 0.6386351963914878, "grad_norm": 9.581997012525461, "learning_rate": 6.954347549726633e-07, "loss": 0.4375, "step": 7150 }, { "epoch": 0.6388138356072617, "grad_norm": 10.443378917189964, "learning_rate": 6.948408012906926e-07, "loss": -0.8281, "step": 7152 }, { "epoch": 0.6389924748230356, "grad_norm": 3.852003586835766, "learning_rate": 6.942469662937889e-07, "loss": 0.0072, "step": 7154 }, { "epoch": 0.6391711140388093, "grad_norm": 8.093081177874108, "learning_rate": 6.936532502129112e-07, "loss": -1.2135, "step": 7156 }, { "epoch": 0.6393497532545832, "grad_norm": 7.731551331157811, "learning_rate": 6.930596532789728e-07, "loss": -0.7066, "step": 7158 }, { "epoch": 0.6395283924703571, "grad_norm": 4.63496500736595, "learning_rate": 6.924661757228403e-07, "loss": 0.16, "step": 7160 }, { "epoch": 0.6397070316861309, "grad_norm": 12.360803957967487, "learning_rate": 6.918728177753337e-07, "loss": -0.4804, "step": 7162 }, { "epoch": 0.6398856709019047, "grad_norm": 13.343628786773811, "learning_rate": 6.91279579667227e-07, "loss": -0.1182, "step": 7164 }, { "epoch": 0.6400643101176786, "grad_norm": 11.848106368180476, "learning_rate": 6.90686461629247e-07, "loss": 0.3662, "step": 7166 }, { "epoch": 0.6402429493334524, "grad_norm": 6.745283951830369, "learning_rate": 6.900934638920752e-07, "loss": 0.2759, "step": 7168 }, { "epoch": 0.6404215885492263, "grad_norm": 9.20717667669938, "learning_rate": 6.895005866863438e-07, "loss": -0.6222, "step": 7170 }, { "epoch": 0.6406002277650001, "grad_norm": 7.50854437810542, "learning_rate": 6.889078302426401e-07, "loss": -0.1292, "step": 7172 }, { "epoch": 0.6407788669807739, "grad_norm": 4.227990323534731, "learning_rate": 6.883151947915045e-07, "loss": -1.102, "step": 7174 }, { "epoch": 0.6409575061965478, "grad_norm": 11.885080014586565, "learning_rate": 6.877226805634287e-07, "loss": -0.6696, "step": 7176 }, { "epoch": 0.6411361454123217, "grad_norm": 9.9713020525919, "learning_rate": 6.87130287788859e-07, "loss": 0.5492, "step": 7178 }, { "epoch": 0.6413147846280954, "grad_norm": 18.68253242529678, "learning_rate": 6.865380166981938e-07, "loss": -1.1801, "step": 7180 }, { "epoch": 0.6414934238438693, "grad_norm": 4.221942964877858, "learning_rate": 6.859458675217836e-07, "loss": -0.5097, "step": 7182 }, { "epoch": 0.6416720630596432, "grad_norm": 5.183405535686717, "learning_rate": 6.853538404899324e-07, "loss": 0.2509, "step": 7184 }, { "epoch": 0.641850702275417, "grad_norm": 8.315541105481538, "learning_rate": 6.84761935832896e-07, "loss": 0.418, "step": 7186 }, { "epoch": 0.6420293414911908, "grad_norm": 3.401660732380614, "learning_rate": 6.841701537808835e-07, "loss": 0.5973, "step": 7188 }, { "epoch": 0.6422079807069647, "grad_norm": 6.503295649828788, "learning_rate": 6.83578494564055e-07, "loss": 0.4337, "step": 7190 }, { "epoch": 0.6423866199227385, "grad_norm": 7.226650917955719, "learning_rate": 6.82986958412524e-07, "loss": -0.1415, "step": 7192 }, { "epoch": 0.6425652591385124, "grad_norm": 3.250618802361575, "learning_rate": 6.823955455563557e-07, "loss": 0.4056, "step": 7194 }, { "epoch": 0.6427438983542862, "grad_norm": 15.190193345607986, "learning_rate": 6.81804256225567e-07, "loss": -1.9187, "step": 7196 }, { "epoch": 0.64292253757006, "grad_norm": 22.401807357898395, "learning_rate": 6.81213090650127e-07, "loss": -0.8009, "step": 7198 }, { "epoch": 0.6431011767858339, "grad_norm": 12.851684198701898, "learning_rate": 6.806220490599573e-07, "loss": -0.2826, "step": 7200 }, { "epoch": 0.6432798160016078, "grad_norm": 11.192133279722603, "learning_rate": 6.800311316849308e-07, "loss": -0.6956, "step": 7202 }, { "epoch": 0.6434584552173815, "grad_norm": 4.392141494320047, "learning_rate": 6.794403387548713e-07, "loss": -0.5832, "step": 7204 }, { "epoch": 0.6436370944331554, "grad_norm": 5.403475187127298, "learning_rate": 6.788496704995552e-07, "loss": 0.825, "step": 7206 }, { "epoch": 0.6438157336489293, "grad_norm": 12.88392403956289, "learning_rate": 6.782591271487108e-07, "loss": -0.1964, "step": 7208 }, { "epoch": 0.6439943728647031, "grad_norm": 18.686177184577843, "learning_rate": 6.776687089320164e-07, "loss": -0.5215, "step": 7210 }, { "epoch": 0.644173012080477, "grad_norm": 9.11150961937402, "learning_rate": 6.770784160791023e-07, "loss": -0.7776, "step": 7212 }, { "epoch": 0.6443516512962508, "grad_norm": 3.315487072634818, "learning_rate": 6.764882488195509e-07, "loss": 0.1748, "step": 7214 }, { "epoch": 0.6445302905120247, "grad_norm": 15.273617825821681, "learning_rate": 6.758982073828943e-07, "loss": -0.8322, "step": 7216 }, { "epoch": 0.6447089297277985, "grad_norm": 9.007174705033327, "learning_rate": 6.753082919986165e-07, "loss": -0.0739, "step": 7218 }, { "epoch": 0.6448875689435724, "grad_norm": 8.234541773690644, "learning_rate": 6.747185028961523e-07, "loss": -0.5608, "step": 7220 }, { "epoch": 0.6450662081593462, "grad_norm": 8.303778411113758, "learning_rate": 6.741288403048881e-07, "loss": 0.53, "step": 7222 }, { "epoch": 0.64524484737512, "grad_norm": 13.282856060980212, "learning_rate": 6.735393044541593e-07, "loss": -0.5118, "step": 7224 }, { "epoch": 0.6454234865908939, "grad_norm": 4.676749757738679, "learning_rate": 6.729498955732536e-07, "loss": -0.1659, "step": 7226 }, { "epoch": 0.6456021258066678, "grad_norm": 7.849580702547783, "learning_rate": 6.723606138914093e-07, "loss": -1.0259, "step": 7228 }, { "epoch": 0.6457807650224415, "grad_norm": 11.577032410914265, "learning_rate": 6.717714596378137e-07, "loss": -0.0627, "step": 7230 }, { "epoch": 0.6459594042382154, "grad_norm": 13.992331448170772, "learning_rate": 6.711824330416065e-07, "loss": -0.1328, "step": 7232 }, { "epoch": 0.6461380434539893, "grad_norm": 29.826032658084006, "learning_rate": 6.70593534331877e-07, "loss": 0.0318, "step": 7234 }, { "epoch": 0.646316682669763, "grad_norm": 5.1578126331272705, "learning_rate": 6.700047637376634e-07, "loss": 0.7183, "step": 7236 }, { "epoch": 0.6464953218855369, "grad_norm": 5.855433570191184, "learning_rate": 6.694161214879563e-07, "loss": -0.1595, "step": 7238 }, { "epoch": 0.6466739611013108, "grad_norm": 3.312101088202055, "learning_rate": 6.688276078116952e-07, "loss": -0.5741, "step": 7240 }, { "epoch": 0.6468526003170846, "grad_norm": 5.161400644215426, "learning_rate": 6.682392229377698e-07, "loss": -0.2894, "step": 7242 }, { "epoch": 0.6470312395328585, "grad_norm": 8.024299434137001, "learning_rate": 6.676509670950194e-07, "loss": -1.2121, "step": 7244 }, { "epoch": 0.6472098787486323, "grad_norm": 6.687511978851713, "learning_rate": 6.670628405122336e-07, "loss": -0.0311, "step": 7246 }, { "epoch": 0.6473885179644061, "grad_norm": 9.672717031576454, "learning_rate": 6.664748434181515e-07, "loss": 0.1904, "step": 7248 }, { "epoch": 0.64756715718018, "grad_norm": 16.72861767062425, "learning_rate": 6.658869760414616e-07, "loss": -0.6517, "step": 7250 }, { "epoch": 0.6477457963959539, "grad_norm": 9.139589117318044, "learning_rate": 6.652992386108024e-07, "loss": 0.2076, "step": 7252 }, { "epoch": 0.6479244356117276, "grad_norm": 9.311908472719052, "learning_rate": 6.647116313547619e-07, "loss": -0.4938, "step": 7254 }, { "epoch": 0.6481030748275015, "grad_norm": 3.9103342905582914, "learning_rate": 6.641241545018765e-07, "loss": -0.5029, "step": 7256 }, { "epoch": 0.6482817140432754, "grad_norm": 8.13252637434962, "learning_rate": 6.63536808280633e-07, "loss": -0.3271, "step": 7258 }, { "epoch": 0.6484603532590492, "grad_norm": 3.3051308769594234, "learning_rate": 6.629495929194673e-07, "loss": -0.08, "step": 7260 }, { "epoch": 0.648638992474823, "grad_norm": 9.280497408065651, "learning_rate": 6.623625086467641e-07, "loss": -0.2864, "step": 7262 }, { "epoch": 0.6488176316905969, "grad_norm": 7.690998800866611, "learning_rate": 6.617755556908566e-07, "loss": -1.0418, "step": 7264 }, { "epoch": 0.6489962709063707, "grad_norm": 7.675912699653709, "learning_rate": 6.611887342800277e-07, "loss": -1.6185, "step": 7266 }, { "epoch": 0.6491749101221446, "grad_norm": 8.410668328726311, "learning_rate": 6.606020446425093e-07, "loss": -1.3575, "step": 7268 }, { "epoch": 0.6493535493379184, "grad_norm": 7.881370268833531, "learning_rate": 6.600154870064812e-07, "loss": -1.1659, "step": 7270 }, { "epoch": 0.6495321885536922, "grad_norm": 16.702040605877794, "learning_rate": 6.594290616000721e-07, "loss": -1.2375, "step": 7272 }, { "epoch": 0.6497108277694661, "grad_norm": 3.4095168939953138, "learning_rate": 6.588427686513598e-07, "loss": 0.6225, "step": 7274 }, { "epoch": 0.64988946698524, "grad_norm": 9.006358549587645, "learning_rate": 6.582566083883704e-07, "loss": -0.6238, "step": 7276 }, { "epoch": 0.6500681062010137, "grad_norm": 10.224662540155474, "learning_rate": 6.576705810390779e-07, "loss": -0.1974, "step": 7278 }, { "epoch": 0.6502467454167876, "grad_norm": 8.864915733216602, "learning_rate": 6.570846868314048e-07, "loss": -0.7611, "step": 7280 }, { "epoch": 0.6504253846325615, "grad_norm": 10.155834201224136, "learning_rate": 6.56498925993223e-07, "loss": 0.8636, "step": 7282 }, { "epoch": 0.6506040238483353, "grad_norm": 14.041180671107108, "learning_rate": 6.559132987523503e-07, "loss": -1.1629, "step": 7284 }, { "epoch": 0.6507826630641091, "grad_norm": 7.276811417476584, "learning_rate": 6.553278053365542e-07, "loss": 0.8059, "step": 7286 }, { "epoch": 0.650961302279883, "grad_norm": 7.642858136002562, "learning_rate": 6.547424459735502e-07, "loss": -0.0028, "step": 7288 }, { "epoch": 0.6511399414956568, "grad_norm": 25.626647003092188, "learning_rate": 6.54157220891e-07, "loss": -2.4604, "step": 7290 }, { "epoch": 0.6513185807114307, "grad_norm": 8.101921757795097, "learning_rate": 6.535721303165152e-07, "loss": 0.0317, "step": 7292 }, { "epoch": 0.6514972199272046, "grad_norm": 1.4056546540480128, "learning_rate": 6.529871744776538e-07, "loss": 0.3459, "step": 7294 }, { "epoch": 0.6516758591429783, "grad_norm": 13.053412237607525, "learning_rate": 6.524023536019221e-07, "loss": -0.956, "step": 7296 }, { "epoch": 0.6518544983587522, "grad_norm": 19.842579628379816, "learning_rate": 6.518176679167727e-07, "loss": -0.5758, "step": 7298 }, { "epoch": 0.6520331375745261, "grad_norm": 17.6877898606991, "learning_rate": 6.512331176496071e-07, "loss": -0.7608, "step": 7300 }, { "epoch": 0.6522117767902998, "grad_norm": 6.125353744568043, "learning_rate": 6.506487030277734e-07, "loss": 0.5326, "step": 7302 }, { "epoch": 0.6523904160060737, "grad_norm": 6.380756546653854, "learning_rate": 6.500644242785667e-07, "loss": -0.0573, "step": 7304 }, { "epoch": 0.6525690552218476, "grad_norm": 13.237700313677092, "learning_rate": 6.4948028162923e-07, "loss": -1.4156, "step": 7306 }, { "epoch": 0.6527476944376214, "grad_norm": 3.1810951959683615, "learning_rate": 6.488962753069527e-07, "loss": 1.4778, "step": 7308 }, { "epoch": 0.6529263336533953, "grad_norm": 8.964389285866309, "learning_rate": 6.483124055388714e-07, "loss": 0.3209, "step": 7310 }, { "epoch": 0.6531049728691691, "grad_norm": 14.75084619196154, "learning_rate": 6.477286725520694e-07, "loss": 0.6619, "step": 7312 }, { "epoch": 0.6532836120849429, "grad_norm": 10.016877237435015, "learning_rate": 6.471450765735773e-07, "loss": 0.7248, "step": 7314 }, { "epoch": 0.6534622513007168, "grad_norm": 19.49504691576932, "learning_rate": 6.465616178303727e-07, "loss": -1.3654, "step": 7316 }, { "epoch": 0.6536408905164907, "grad_norm": 5.68469034738948, "learning_rate": 6.45978296549378e-07, "loss": 0.3335, "step": 7318 }, { "epoch": 0.6538195297322644, "grad_norm": 9.530060440624347, "learning_rate": 6.453951129574643e-07, "loss": -0.0841, "step": 7320 }, { "epoch": 0.6539981689480383, "grad_norm": 6.475138729774383, "learning_rate": 6.448120672814481e-07, "loss": 1.066, "step": 7322 }, { "epoch": 0.6541768081638122, "grad_norm": 6.498304292598125, "learning_rate": 6.442291597480922e-07, "loss": -0.8003, "step": 7324 }, { "epoch": 0.654355447379586, "grad_norm": 8.317129122529018, "learning_rate": 6.436463905841059e-07, "loss": -0.3144, "step": 7326 }, { "epoch": 0.6545340865953598, "grad_norm": 10.94672079236321, "learning_rate": 6.430637600161446e-07, "loss": -0.6268, "step": 7328 }, { "epoch": 0.6547127258111337, "grad_norm": 11.448984852395894, "learning_rate": 6.424812682708103e-07, "loss": -0.6682, "step": 7330 }, { "epoch": 0.6548913650269075, "grad_norm": 12.635186948310261, "learning_rate": 6.418989155746501e-07, "loss": -1.4099, "step": 7332 }, { "epoch": 0.6550700042426814, "grad_norm": 10.274587402969987, "learning_rate": 6.413167021541575e-07, "loss": 1.3514, "step": 7334 }, { "epoch": 0.6552486434584552, "grad_norm": 12.405042767341445, "learning_rate": 6.407346282357719e-07, "loss": -0.7431, "step": 7336 }, { "epoch": 0.655427282674229, "grad_norm": 9.369340739797945, "learning_rate": 6.401526940458784e-07, "loss": -0.8232, "step": 7338 }, { "epoch": 0.6556059218900029, "grad_norm": 34.28369977573161, "learning_rate": 6.395708998108076e-07, "loss": -0.8744, "step": 7340 }, { "epoch": 0.6557845611057768, "grad_norm": 10.872490955172715, "learning_rate": 6.389892457568362e-07, "loss": -0.8103, "step": 7342 }, { "epoch": 0.6559632003215506, "grad_norm": 10.401911750558464, "learning_rate": 6.384077321101853e-07, "loss": 0.8029, "step": 7344 }, { "epoch": 0.6561418395373244, "grad_norm": 3.7725047978698676, "learning_rate": 6.378263590970222e-07, "loss": 0.4891, "step": 7346 }, { "epoch": 0.6563204787530983, "grad_norm": 6.9163675205236945, "learning_rate": 6.372451269434597e-07, "loss": 0.0702, "step": 7348 }, { "epoch": 0.6564991179688722, "grad_norm": 18.535009301972813, "learning_rate": 6.366640358755555e-07, "loss": -0.7943, "step": 7350 }, { "epoch": 0.6566777571846459, "grad_norm": 5.224045493811394, "learning_rate": 6.36083086119312e-07, "loss": -0.2826, "step": 7352 }, { "epoch": 0.6568563964004198, "grad_norm": 7.640573325889296, "learning_rate": 6.355022779006771e-07, "loss": -0.5718, "step": 7354 }, { "epoch": 0.6570350356161937, "grad_norm": 10.049444412892312, "learning_rate": 6.34921611445544e-07, "loss": -0.4784, "step": 7356 }, { "epoch": 0.6572136748319675, "grad_norm": 5.997002488650222, "learning_rate": 6.343410869797499e-07, "loss": 0.0075, "step": 7358 }, { "epoch": 0.6573923140477413, "grad_norm": 11.200702740557139, "learning_rate": 6.337607047290773e-07, "loss": 0.517, "step": 7360 }, { "epoch": 0.6575709532635152, "grad_norm": 8.275621336691328, "learning_rate": 6.331804649192536e-07, "loss": -0.1827, "step": 7362 }, { "epoch": 0.657749592479289, "grad_norm": 4.558146450714601, "learning_rate": 6.326003677759502e-07, "loss": -0.0617, "step": 7364 }, { "epoch": 0.6579282316950629, "grad_norm": 6.3239741522534025, "learning_rate": 6.320204135247833e-07, "loss": -0.4678, "step": 7366 }, { "epoch": 0.6581068709108367, "grad_norm": 6.099598430485698, "learning_rate": 6.314406023913137e-07, "loss": 0.1732, "step": 7368 }, { "epoch": 0.6582855101266105, "grad_norm": 14.615403376887707, "learning_rate": 6.30860934601047e-07, "loss": -0.1099, "step": 7370 }, { "epoch": 0.6584641493423844, "grad_norm": 2.9791773726975648, "learning_rate": 6.30281410379431e-07, "loss": -0.2249, "step": 7372 }, { "epoch": 0.6586427885581583, "grad_norm": 8.9795014805043, "learning_rate": 6.297020299518603e-07, "loss": -0.0318, "step": 7374 }, { "epoch": 0.658821427773932, "grad_norm": 8.878029319733676, "learning_rate": 6.291227935436725e-07, "loss": 0.092, "step": 7376 }, { "epoch": 0.6590000669897059, "grad_norm": 7.103290074952311, "learning_rate": 6.285437013801479e-07, "loss": 0.265, "step": 7378 }, { "epoch": 0.6591787062054798, "grad_norm": 12.101302122561995, "learning_rate": 6.279647536865128e-07, "loss": -0.176, "step": 7380 }, { "epoch": 0.6593573454212536, "grad_norm": 10.850507142480643, "learning_rate": 6.273859506879364e-07, "loss": -1.3659, "step": 7382 }, { "epoch": 0.6595359846370275, "grad_norm": 7.913545039922181, "learning_rate": 6.268072926095311e-07, "loss": -1.1955, "step": 7384 }, { "epoch": 0.6597146238528013, "grad_norm": 8.47317760757623, "learning_rate": 6.262287796763537e-07, "loss": -0.2765, "step": 7386 }, { "epoch": 0.6598932630685751, "grad_norm": 10.49459590808173, "learning_rate": 6.256504121134042e-07, "loss": 0.3417, "step": 7388 }, { "epoch": 0.660071902284349, "grad_norm": 7.0582721850175885, "learning_rate": 6.250721901456264e-07, "loss": -0.6387, "step": 7390 }, { "epoch": 0.6602505415001229, "grad_norm": 15.438606500546316, "learning_rate": 6.244941139979069e-07, "loss": -0.127, "step": 7392 }, { "epoch": 0.6604291807158966, "grad_norm": 6.404299257585711, "learning_rate": 6.23916183895076e-07, "loss": -0.1753, "step": 7394 }, { "epoch": 0.6606078199316705, "grad_norm": 11.463149363720293, "learning_rate": 6.233384000619074e-07, "loss": 1.1148, "step": 7396 }, { "epoch": 0.6607864591474444, "grad_norm": 15.432501721205153, "learning_rate": 6.227607627231172e-07, "loss": -0.8826, "step": 7398 }, { "epoch": 0.6609650983632182, "grad_norm": 10.991739465926965, "learning_rate": 6.221832721033648e-07, "loss": -0.9557, "step": 7400 }, { "epoch": 0.661143737578992, "grad_norm": 3.58262531175822, "learning_rate": 6.216059284272534e-07, "loss": 1.2474, "step": 7402 }, { "epoch": 0.6613223767947659, "grad_norm": 9.443555493374586, "learning_rate": 6.210287319193282e-07, "loss": -0.068, "step": 7404 }, { "epoch": 0.6615010160105397, "grad_norm": 6.962835838669455, "learning_rate": 6.204516828040763e-07, "loss": -0.1125, "step": 7406 }, { "epoch": 0.6616796552263136, "grad_norm": 3.2436170253068335, "learning_rate": 6.198747813059295e-07, "loss": -0.114, "step": 7408 }, { "epoch": 0.6618582944420874, "grad_norm": 5.991760318255878, "learning_rate": 6.19298027649261e-07, "loss": -0.3573, "step": 7410 }, { "epoch": 0.6620369336578612, "grad_norm": 20.298209366020366, "learning_rate": 6.187214220583864e-07, "loss": -1.2469, "step": 7412 }, { "epoch": 0.6622155728736351, "grad_norm": 7.40838180314447, "learning_rate": 6.181449647575639e-07, "loss": -0.6748, "step": 7414 }, { "epoch": 0.662394212089409, "grad_norm": 3.854419198225831, "learning_rate": 6.175686559709945e-07, "loss": 0.369, "step": 7416 }, { "epoch": 0.6625728513051827, "grad_norm": 13.77772085800172, "learning_rate": 6.169924959228208e-07, "loss": -0.4722, "step": 7418 }, { "epoch": 0.6627514905209566, "grad_norm": 10.595921476724804, "learning_rate": 6.164164848371274e-07, "loss": -0.0035, "step": 7420 }, { "epoch": 0.6629301297367305, "grad_norm": 10.91990632963703, "learning_rate": 6.15840622937942e-07, "loss": -0.5603, "step": 7422 }, { "epoch": 0.6631087689525043, "grad_norm": 7.336498300000362, "learning_rate": 6.152649104492334e-07, "loss": -0.4969, "step": 7424 }, { "epoch": 0.6632874081682781, "grad_norm": 4.443639886406579, "learning_rate": 6.146893475949122e-07, "loss": 0.5278, "step": 7426 }, { "epoch": 0.663466047384052, "grad_norm": 3.8879975175339423, "learning_rate": 6.141139345988313e-07, "loss": 0.2943, "step": 7428 }, { "epoch": 0.6636446865998258, "grad_norm": 7.762541436961666, "learning_rate": 6.135386716847857e-07, "loss": 0.696, "step": 7430 }, { "epoch": 0.6638233258155997, "grad_norm": 5.653861537375449, "learning_rate": 6.129635590765105e-07, "loss": 0.3809, "step": 7432 }, { "epoch": 0.6640019650313735, "grad_norm": 10.05022216854966, "learning_rate": 6.123885969976839e-07, "loss": -1.7844, "step": 7434 }, { "epoch": 0.6641806042471473, "grad_norm": 6.647983629667873, "learning_rate": 6.118137856719252e-07, "loss": -0.2477, "step": 7436 }, { "epoch": 0.6643592434629212, "grad_norm": 8.11066945352694, "learning_rate": 6.112391253227938e-07, "loss": -1.2569, "step": 7438 }, { "epoch": 0.6645378826786951, "grad_norm": 7.19523058916121, "learning_rate": 6.106646161737923e-07, "loss": 0.2371, "step": 7440 }, { "epoch": 0.6647165218944688, "grad_norm": 6.602146967008127, "learning_rate": 6.100902584483634e-07, "loss": 0.6887, "step": 7442 }, { "epoch": 0.6648951611102427, "grad_norm": 18.655813240804626, "learning_rate": 6.095160523698912e-07, "loss": -0.891, "step": 7444 }, { "epoch": 0.6650738003260166, "grad_norm": 10.632596161580384, "learning_rate": 6.089419981617006e-07, "loss": -0.4851, "step": 7446 }, { "epoch": 0.6652524395417904, "grad_norm": 2.405790607146551, "learning_rate": 6.083680960470574e-07, "loss": -0.1949, "step": 7448 }, { "epoch": 0.6654310787575642, "grad_norm": 4.435955154797665, "learning_rate": 6.077943462491688e-07, "loss": -0.9017, "step": 7450 }, { "epoch": 0.6656097179733381, "grad_norm": 7.7356557691137375, "learning_rate": 6.072207489911818e-07, "loss": 0.3642, "step": 7452 }, { "epoch": 0.6657883571891119, "grad_norm": 30.977815627238087, "learning_rate": 6.066473044961851e-07, "loss": 0.1522, "step": 7454 }, { "epoch": 0.6659669964048858, "grad_norm": 28.190777032000533, "learning_rate": 6.060740129872072e-07, "loss": -1.6169, "step": 7456 }, { "epoch": 0.6661456356206596, "grad_norm": 8.76725767437608, "learning_rate": 6.055008746872183e-07, "loss": -0.7232, "step": 7458 }, { "epoch": 0.6663242748364334, "grad_norm": 3.960871046964745, "learning_rate": 6.049278898191268e-07, "loss": -0.0215, "step": 7460 }, { "epoch": 0.6665029140522073, "grad_norm": 15.500430685643211, "learning_rate": 6.043550586057837e-07, "loss": -0.3071, "step": 7462 }, { "epoch": 0.6666815532679812, "grad_norm": 4.5668471019953065, "learning_rate": 6.037823812699791e-07, "loss": 1.0245, "step": 7464 }, { "epoch": 0.6668601924837549, "grad_norm": 4.3095321394256, "learning_rate": 6.032098580344434e-07, "loss": -0.0996, "step": 7466 }, { "epoch": 0.6670388316995288, "grad_norm": 8.71642816726652, "learning_rate": 6.026374891218471e-07, "loss": -0.1367, "step": 7468 }, { "epoch": 0.6672174709153027, "grad_norm": 6.731895751581144, "learning_rate": 6.020652747548007e-07, "loss": -0.4008, "step": 7470 }, { "epoch": 0.6673961101310765, "grad_norm": 8.470772251922803, "learning_rate": 6.014932151558547e-07, "loss": 0.1461, "step": 7472 }, { "epoch": 0.6675747493468503, "grad_norm": 14.640221133203609, "learning_rate": 6.009213105474991e-07, "loss": -0.1386, "step": 7474 }, { "epoch": 0.6677533885626242, "grad_norm": 3.7367241146584296, "learning_rate": 6.00349561152164e-07, "loss": -0.0444, "step": 7476 }, { "epoch": 0.6679320277783981, "grad_norm": 10.900755451866019, "learning_rate": 5.99777967192219e-07, "loss": -0.0762, "step": 7478 }, { "epoch": 0.6681106669941719, "grad_norm": 5.784360966183678, "learning_rate": 5.992065288899729e-07, "loss": -0.3426, "step": 7480 }, { "epoch": 0.6682893062099458, "grad_norm": 6.63366938111991, "learning_rate": 5.986352464676743e-07, "loss": 0.0206, "step": 7482 }, { "epoch": 0.6684679454257196, "grad_norm": 11.82778184360211, "learning_rate": 5.980641201475121e-07, "loss": -1.4276, "step": 7484 }, { "epoch": 0.6686465846414934, "grad_norm": 13.153703964226793, "learning_rate": 5.97493150151612e-07, "loss": -0.1629, "step": 7486 }, { "epoch": 0.6688252238572673, "grad_norm": 16.600783989099263, "learning_rate": 5.969223367020411e-07, "loss": 0.0095, "step": 7488 }, { "epoch": 0.6690038630730412, "grad_norm": 5.785268345892382, "learning_rate": 5.963516800208056e-07, "loss": -0.3054, "step": 7490 }, { "epoch": 0.6691825022888149, "grad_norm": 5.52579207460413, "learning_rate": 5.957811803298489e-07, "loss": 0.2452, "step": 7492 }, { "epoch": 0.6693611415045888, "grad_norm": 5.925810540365152, "learning_rate": 5.952108378510551e-07, "loss": 0.2066, "step": 7494 }, { "epoch": 0.6695397807203627, "grad_norm": 7.706303124658483, "learning_rate": 5.946406528062467e-07, "loss": -0.875, "step": 7496 }, { "epoch": 0.6697184199361365, "grad_norm": 6.761098708753757, "learning_rate": 5.940706254171848e-07, "loss": -0.5155, "step": 7498 }, { "epoch": 0.6698970591519103, "grad_norm": 7.7835829581094265, "learning_rate": 5.93500755905569e-07, "loss": 0.2442, "step": 7500 }, { "epoch": 0.6700756983676842, "grad_norm": 4.432189557714414, "learning_rate": 5.929310444930376e-07, "loss": -0.014, "step": 7502 }, { "epoch": 0.670254337583458, "grad_norm": 3.7549883407228903, "learning_rate": 5.923614914011679e-07, "loss": -0.5474, "step": 7504 }, { "epoch": 0.6704329767992319, "grad_norm": 9.870790767510806, "learning_rate": 5.917920968514751e-07, "loss": 0.333, "step": 7506 }, { "epoch": 0.6706116160150057, "grad_norm": 4.130856604609, "learning_rate": 5.912228610654126e-07, "loss": -0.2058, "step": 7508 }, { "epoch": 0.6707902552307795, "grad_norm": 7.204045367244734, "learning_rate": 5.90653784264373e-07, "loss": 0.9529, "step": 7510 }, { "epoch": 0.6709688944465534, "grad_norm": 22.49803526035489, "learning_rate": 5.900848666696859e-07, "loss": -2.554, "step": 7512 }, { "epoch": 0.6711475336623273, "grad_norm": 6.131584520770431, "learning_rate": 5.895161085026192e-07, "loss": -0.3289, "step": 7514 }, { "epoch": 0.671326172878101, "grad_norm": 7.9110891135882655, "learning_rate": 5.889475099843797e-07, "loss": -0.6649, "step": 7516 }, { "epoch": 0.6715048120938749, "grad_norm": 10.110986501557004, "learning_rate": 5.883790713361117e-07, "loss": -0.5568, "step": 7518 }, { "epoch": 0.6716834513096488, "grad_norm": 7.489301553305732, "learning_rate": 5.878107927788961e-07, "loss": -1.4836, "step": 7520 }, { "epoch": 0.6718620905254226, "grad_norm": 9.575117974015182, "learning_rate": 5.872426745337533e-07, "loss": 0.6874, "step": 7522 }, { "epoch": 0.6720407297411964, "grad_norm": 8.547011574834304, "learning_rate": 5.866747168216407e-07, "loss": -0.6487, "step": 7524 }, { "epoch": 0.6722193689569703, "grad_norm": 12.382223467851503, "learning_rate": 5.861069198634526e-07, "loss": -0.8911, "step": 7526 }, { "epoch": 0.6723980081727441, "grad_norm": 10.115598858742674, "learning_rate": 5.855392838800217e-07, "loss": -0.7174, "step": 7528 }, { "epoch": 0.672576647388518, "grad_norm": 8.751455567409336, "learning_rate": 5.849718090921176e-07, "loss": 0.3546, "step": 7530 }, { "epoch": 0.6727552866042918, "grad_norm": 5.663412880311735, "learning_rate": 5.844044957204477e-07, "loss": 0.18, "step": 7532 }, { "epoch": 0.6729339258200656, "grad_norm": 4.1505820268366005, "learning_rate": 5.838373439856555e-07, "loss": -0.5904, "step": 7534 }, { "epoch": 0.6731125650358395, "grad_norm": 3.794729623523844, "learning_rate": 5.83270354108323e-07, "loss": -0.1847, "step": 7536 }, { "epoch": 0.6732912042516134, "grad_norm": 11.050322818787425, "learning_rate": 5.827035263089691e-07, "loss": 1.2207, "step": 7538 }, { "epoch": 0.6734698434673871, "grad_norm": 10.534949131260301, "learning_rate": 5.821368608080479e-07, "loss": -1.0427, "step": 7540 }, { "epoch": 0.673648482683161, "grad_norm": 5.4210665317175915, "learning_rate": 5.815703578259525e-07, "loss": -0.1512, "step": 7542 }, { "epoch": 0.6738271218989349, "grad_norm": 7.098230501415173, "learning_rate": 5.81004017583012e-07, "loss": -0.3887, "step": 7544 }, { "epoch": 0.6740057611147087, "grad_norm": 12.803496979814124, "learning_rate": 5.804378402994918e-07, "loss": 0.0498, "step": 7546 }, { "epoch": 0.6741844003304825, "grad_norm": 2.6249636692984377, "learning_rate": 5.798718261955949e-07, "loss": -0.0935, "step": 7548 }, { "epoch": 0.6743630395462564, "grad_norm": 12.192418754014405, "learning_rate": 5.793059754914595e-07, "loss": -2.0059, "step": 7550 }, { "epoch": 0.6745416787620302, "grad_norm": 6.810317223508348, "learning_rate": 5.78740288407162e-07, "loss": 0.5471, "step": 7552 }, { "epoch": 0.6747203179778041, "grad_norm": 3.877668876782641, "learning_rate": 5.781747651627131e-07, "loss": 0.1455, "step": 7554 }, { "epoch": 0.674898957193578, "grad_norm": 8.258285061425388, "learning_rate": 5.776094059780608e-07, "loss": -0.7576, "step": 7556 }, { "epoch": 0.6750775964093517, "grad_norm": 8.723740328821522, "learning_rate": 5.770442110730907e-07, "loss": 0.2769, "step": 7558 }, { "epoch": 0.6752562356251256, "grad_norm": 8.234759037682366, "learning_rate": 5.764791806676217e-07, "loss": -0.7084, "step": 7560 }, { "epoch": 0.6754348748408995, "grad_norm": 7.011686651214829, "learning_rate": 5.759143149814106e-07, "loss": -0.2112, "step": 7562 }, { "epoch": 0.6756135140566732, "grad_norm": 6.675226374870123, "learning_rate": 5.753496142341503e-07, "loss": -0.2721, "step": 7564 }, { "epoch": 0.6757921532724471, "grad_norm": 5.111268045715882, "learning_rate": 5.747850786454673e-07, "loss": -0.1312, "step": 7566 }, { "epoch": 0.675970792488221, "grad_norm": 6.9552558291325015, "learning_rate": 5.742207084349273e-07, "loss": 0.0042, "step": 7568 }, { "epoch": 0.6761494317039948, "grad_norm": 5.8107471078945, "learning_rate": 5.736565038220289e-07, "loss": 0.7353, "step": 7570 }, { "epoch": 0.6763280709197687, "grad_norm": 10.145003162757678, "learning_rate": 5.730924650262081e-07, "loss": 0.371, "step": 7572 }, { "epoch": 0.6765067101355425, "grad_norm": 6.198398432453452, "learning_rate": 5.725285922668345e-07, "loss": 0.2417, "step": 7574 }, { "epoch": 0.6766853493513163, "grad_norm": 14.70124388122561, "learning_rate": 5.719648857632148e-07, "loss": 0.5433, "step": 7576 }, { "epoch": 0.6768639885670902, "grad_norm": 10.464543377081501, "learning_rate": 5.714013457345903e-07, "loss": 0.848, "step": 7578 }, { "epoch": 0.6770426277828641, "grad_norm": 12.819247223032809, "learning_rate": 5.708379724001377e-07, "loss": -0.0835, "step": 7580 }, { "epoch": 0.6772212669986378, "grad_norm": 11.387258107937875, "learning_rate": 5.70274765978969e-07, "loss": -0.737, "step": 7582 }, { "epoch": 0.6773999062144117, "grad_norm": 7.5038630391454975, "learning_rate": 5.697117266901312e-07, "loss": 0.2718, "step": 7584 }, { "epoch": 0.6775785454301856, "grad_norm": 6.821968987342131, "learning_rate": 5.691488547526064e-07, "loss": 0.5899, "step": 7586 }, { "epoch": 0.6777571846459594, "grad_norm": 24.14018633298853, "learning_rate": 5.685861503853104e-07, "loss": -1.3318, "step": 7588 }, { "epoch": 0.6779358238617332, "grad_norm": 6.716536472216703, "learning_rate": 5.680236138070962e-07, "loss": -0.333, "step": 7590 }, { "epoch": 0.6781144630775071, "grad_norm": 14.016274937898643, "learning_rate": 5.674612452367499e-07, "loss": -0.0021, "step": 7592 }, { "epoch": 0.6782931022932809, "grad_norm": 9.695046912281944, "learning_rate": 5.668990448929923e-07, "loss": -1.2787, "step": 7594 }, { "epoch": 0.6784717415090548, "grad_norm": 11.616124200036088, "learning_rate": 5.663370129944789e-07, "loss": -0.4742, "step": 7596 }, { "epoch": 0.6786503807248286, "grad_norm": 12.106309465804843, "learning_rate": 5.657751497598002e-07, "loss": 0.7106, "step": 7598 }, { "epoch": 0.6788290199406024, "grad_norm": 5.410128751058273, "learning_rate": 5.652134554074806e-07, "loss": -1.0935, "step": 7600 }, { "epoch": 0.6790076591563763, "grad_norm": 7.828307250560601, "learning_rate": 5.646519301559791e-07, "loss": -0.5289, "step": 7602 }, { "epoch": 0.6791862983721502, "grad_norm": 8.899170106063831, "learning_rate": 5.640905742236885e-07, "loss": -0.3718, "step": 7604 }, { "epoch": 0.6793649375879239, "grad_norm": 4.01865352912313, "learning_rate": 5.635293878289369e-07, "loss": -1.1842, "step": 7606 }, { "epoch": 0.6795435768036978, "grad_norm": 8.247889855760315, "learning_rate": 5.629683711899844e-07, "loss": -0.1351, "step": 7608 }, { "epoch": 0.6797222160194717, "grad_norm": 11.115445609759226, "learning_rate": 5.624075245250268e-07, "loss": 0.1194, "step": 7610 }, { "epoch": 0.6799008552352456, "grad_norm": 14.067470909618713, "learning_rate": 5.618468480521934e-07, "loss": -1.081, "step": 7612 }, { "epoch": 0.6800794944510193, "grad_norm": 10.448264778298903, "learning_rate": 5.61286341989547e-07, "loss": -0.5904, "step": 7614 }, { "epoch": 0.6802581336667932, "grad_norm": 10.366287065856236, "learning_rate": 5.607260065550845e-07, "loss": -0.5993, "step": 7616 }, { "epoch": 0.6804367728825671, "grad_norm": 4.948063427271102, "learning_rate": 5.601658419667363e-07, "loss": -0.5266, "step": 7618 }, { "epoch": 0.6806154120983409, "grad_norm": 5.490039823349453, "learning_rate": 5.596058484423655e-07, "loss": -0.2221, "step": 7620 }, { "epoch": 0.6807940513141147, "grad_norm": 14.041482775169815, "learning_rate": 5.590460261997706e-07, "loss": 0.4209, "step": 7622 }, { "epoch": 0.6809726905298886, "grad_norm": 7.166057146752151, "learning_rate": 5.584863754566818e-07, "loss": -0.1107, "step": 7624 }, { "epoch": 0.6811513297456624, "grad_norm": 18.537616324493843, "learning_rate": 5.579268964307634e-07, "loss": -0.3333, "step": 7626 }, { "epoch": 0.6813299689614363, "grad_norm": 17.696868033174507, "learning_rate": 5.573675893396122e-07, "loss": -0.956, "step": 7628 }, { "epoch": 0.6815086081772102, "grad_norm": 5.900236428906234, "learning_rate": 5.568084544007588e-07, "loss": -0.8728, "step": 7630 }, { "epoch": 0.6816872473929839, "grad_norm": 7.629553763963331, "learning_rate": 5.562494918316667e-07, "loss": -0.7014, "step": 7632 }, { "epoch": 0.6818658866087578, "grad_norm": 6.132233999387351, "learning_rate": 5.556907018497321e-07, "loss": 0.7095, "step": 7634 }, { "epoch": 0.6820445258245317, "grad_norm": 2.738887442235283, "learning_rate": 5.551320846722847e-07, "loss": 0.4001, "step": 7636 }, { "epoch": 0.6822231650403054, "grad_norm": 8.265977588769937, "learning_rate": 5.545736405165864e-07, "loss": -0.1098, "step": 7638 }, { "epoch": 0.6824018042560793, "grad_norm": 7.3172216577117535, "learning_rate": 5.540153695998317e-07, "loss": -0.588, "step": 7640 }, { "epoch": 0.6825804434718532, "grad_norm": 7.851322728381828, "learning_rate": 5.53457272139148e-07, "loss": 0.5301, "step": 7642 }, { "epoch": 0.682759082687627, "grad_norm": 13.32897502876282, "learning_rate": 5.528993483515951e-07, "loss": 0.0441, "step": 7644 }, { "epoch": 0.6829377219034009, "grad_norm": 12.326429823447778, "learning_rate": 5.523415984541666e-07, "loss": -0.4557, "step": 7646 }, { "epoch": 0.6831163611191747, "grad_norm": 6.074056720105806, "learning_rate": 5.517840226637857e-07, "loss": 0.0936, "step": 7648 }, { "epoch": 0.6832950003349485, "grad_norm": 5.190727424874169, "learning_rate": 5.512266211973102e-07, "loss": -0.4328, "step": 7650 }, { "epoch": 0.6834736395507224, "grad_norm": 5.336577839220339, "learning_rate": 5.506693942715291e-07, "loss": 0.1835, "step": 7652 }, { "epoch": 0.6836522787664963, "grad_norm": 10.229693138315614, "learning_rate": 5.50112342103164e-07, "loss": -0.2878, "step": 7654 }, { "epoch": 0.68383091798227, "grad_norm": 12.178709018986398, "learning_rate": 5.495554649088681e-07, "loss": -0.954, "step": 7656 }, { "epoch": 0.6840095571980439, "grad_norm": 19.425989011431714, "learning_rate": 5.489987629052268e-07, "loss": -0.1371, "step": 7658 }, { "epoch": 0.6841881964138178, "grad_norm": 9.80155694232566, "learning_rate": 5.484422363087575e-07, "loss": -0.9601, "step": 7660 }, { "epoch": 0.6843668356295916, "grad_norm": 4.053285683510764, "learning_rate": 5.478858853359087e-07, "loss": 0.239, "step": 7662 }, { "epoch": 0.6845454748453654, "grad_norm": 9.457204294807767, "learning_rate": 5.473297102030614e-07, "loss": 0.5124, "step": 7664 }, { "epoch": 0.6847241140611393, "grad_norm": 9.736041910600063, "learning_rate": 5.467737111265278e-07, "loss": -0.3219, "step": 7666 }, { "epoch": 0.6849027532769131, "grad_norm": 6.139054658165804, "learning_rate": 5.462178883225517e-07, "loss": 0.4405, "step": 7668 }, { "epoch": 0.685081392492687, "grad_norm": 3.186140181757937, "learning_rate": 5.456622420073083e-07, "loss": -0.3324, "step": 7670 }, { "epoch": 0.6852600317084608, "grad_norm": 5.678068284384987, "learning_rate": 5.451067723969047e-07, "loss": 0.7193, "step": 7672 }, { "epoch": 0.6854386709242346, "grad_norm": 12.558509914183626, "learning_rate": 5.44551479707378e-07, "loss": -0.4422, "step": 7674 }, { "epoch": 0.6856173101400085, "grad_norm": 5.6392134065167525, "learning_rate": 5.439963641546972e-07, "loss": 0.0507, "step": 7676 }, { "epoch": 0.6857959493557824, "grad_norm": 7.990669292734471, "learning_rate": 5.434414259547632e-07, "loss": -0.1059, "step": 7678 }, { "epoch": 0.6859745885715561, "grad_norm": 14.368721030174145, "learning_rate": 5.428866653234077e-07, "loss": -1.3172, "step": 7680 }, { "epoch": 0.68615322778733, "grad_norm": 8.726715496364722, "learning_rate": 5.423320824763914e-07, "loss": 1.0263, "step": 7682 }, { "epoch": 0.6863318670031039, "grad_norm": 10.98638628456828, "learning_rate": 5.417776776294081e-07, "loss": 0.7359, "step": 7684 }, { "epoch": 0.6865105062188777, "grad_norm": 8.702864637880646, "learning_rate": 5.412234509980813e-07, "loss": -0.4252, "step": 7686 }, { "epoch": 0.6866891454346515, "grad_norm": 12.604650432456033, "learning_rate": 5.406694027979657e-07, "loss": -0.6778, "step": 7688 }, { "epoch": 0.6868677846504254, "grad_norm": 9.15318523060266, "learning_rate": 5.401155332445462e-07, "loss": -0.7526, "step": 7690 }, { "epoch": 0.6870464238661992, "grad_norm": 15.592336953763137, "learning_rate": 5.395618425532389e-07, "loss": -0.0365, "step": 7692 }, { "epoch": 0.6872250630819731, "grad_norm": 4.497807922509374, "learning_rate": 5.390083309393889e-07, "loss": 0.4501, "step": 7694 }, { "epoch": 0.6874037022977469, "grad_norm": 6.180506906306026, "learning_rate": 5.384549986182729e-07, "loss": 0.4642, "step": 7696 }, { "epoch": 0.6875823415135207, "grad_norm": 4.705034140970732, "learning_rate": 5.379018458050974e-07, "loss": -0.3943, "step": 7698 }, { "epoch": 0.6877609807292946, "grad_norm": 17.370611940276817, "learning_rate": 5.373488727150002e-07, "loss": -0.2916, "step": 7700 }, { "epoch": 0.6879396199450685, "grad_norm": 9.403478473930164, "learning_rate": 5.36796079563047e-07, "loss": -0.4805, "step": 7702 }, { "epoch": 0.6881182591608422, "grad_norm": 11.098237497427977, "learning_rate": 5.362434665642353e-07, "loss": -0.3592, "step": 7704 }, { "epoch": 0.6882968983766161, "grad_norm": 24.88854795555453, "learning_rate": 5.356910339334921e-07, "loss": -0.224, "step": 7706 }, { "epoch": 0.68847553759239, "grad_norm": 9.634516383546412, "learning_rate": 5.351387818856733e-07, "loss": -0.2905, "step": 7708 }, { "epoch": 0.6886541768081638, "grad_norm": 30.978324941980603, "learning_rate": 5.345867106355664e-07, "loss": -0.7945, "step": 7710 }, { "epoch": 0.6888328160239376, "grad_norm": 7.07897674623117, "learning_rate": 5.34034820397887e-07, "loss": 0.286, "step": 7712 }, { "epoch": 0.6890114552397115, "grad_norm": 13.717386860131482, "learning_rate": 5.334831113872815e-07, "loss": -0.2946, "step": 7714 }, { "epoch": 0.6891900944554853, "grad_norm": 3.5901955981085796, "learning_rate": 5.329315838183242e-07, "loss": -0.0721, "step": 7716 }, { "epoch": 0.6893687336712592, "grad_norm": 3.6001484840289106, "learning_rate": 5.323802379055203e-07, "loss": -0.0562, "step": 7718 }, { "epoch": 0.689547372887033, "grad_norm": 4.5711583585578905, "learning_rate": 5.31829073863304e-07, "loss": 0.3321, "step": 7720 }, { "epoch": 0.6897260121028068, "grad_norm": 6.288037329099473, "learning_rate": 5.312780919060385e-07, "loss": -0.3568, "step": 7722 }, { "epoch": 0.6899046513185807, "grad_norm": 3.993727415017994, "learning_rate": 5.307272922480162e-07, "loss": 0.1574, "step": 7724 }, { "epoch": 0.6900832905343546, "grad_norm": 3.724513356211867, "learning_rate": 5.30176675103459e-07, "loss": 0.5457, "step": 7726 }, { "epoch": 0.6902619297501283, "grad_norm": 7.052065813126393, "learning_rate": 5.296262406865173e-07, "loss": 0.0501, "step": 7728 }, { "epoch": 0.6904405689659022, "grad_norm": 8.738427493048707, "learning_rate": 5.290759892112702e-07, "loss": -0.9374, "step": 7730 }, { "epoch": 0.6906192081816761, "grad_norm": 6.256286816102807, "learning_rate": 5.285259208917268e-07, "loss": -0.6247, "step": 7732 }, { "epoch": 0.6907978473974499, "grad_norm": 8.933063819957058, "learning_rate": 5.279760359418246e-07, "loss": 1.211, "step": 7734 }, { "epoch": 0.6909764866132238, "grad_norm": 11.425120840956946, "learning_rate": 5.274263345754285e-07, "loss": -0.0582, "step": 7736 }, { "epoch": 0.6911551258289976, "grad_norm": 17.094528375788112, "learning_rate": 5.268768170063333e-07, "loss": -0.011, "step": 7738 }, { "epoch": 0.6913337650447715, "grad_norm": 3.8265755144975064, "learning_rate": 5.263274834482622e-07, "loss": 0.9955, "step": 7740 }, { "epoch": 0.6915124042605453, "grad_norm": 10.058601714195877, "learning_rate": 5.257783341148664e-07, "loss": -1.3971, "step": 7742 }, { "epoch": 0.6916910434763192, "grad_norm": 8.841927384377541, "learning_rate": 5.252293692197256e-07, "loss": 0.4143, "step": 7744 }, { "epoch": 0.691869682692093, "grad_norm": 11.637734278237296, "learning_rate": 5.246805889763483e-07, "loss": -0.4447, "step": 7746 }, { "epoch": 0.6920483219078668, "grad_norm": 4.675260102079319, "learning_rate": 5.241319935981698e-07, "loss": -0.2362, "step": 7748 }, { "epoch": 0.6922269611236407, "grad_norm": 5.00083820946461, "learning_rate": 5.235835832985551e-07, "loss": 0.7968, "step": 7750 }, { "epoch": 0.6924056003394146, "grad_norm": 11.948147324472723, "learning_rate": 5.230353582907963e-07, "loss": -0.3476, "step": 7752 }, { "epoch": 0.6925842395551883, "grad_norm": 5.047316587988105, "learning_rate": 5.224873187881136e-07, "loss": 0.0445, "step": 7754 }, { "epoch": 0.6927628787709622, "grad_norm": 21.54821126997661, "learning_rate": 5.219394650036553e-07, "loss": 1.1471, "step": 7756 }, { "epoch": 0.6929415179867361, "grad_norm": 3.6315293537830167, "learning_rate": 5.213917971504969e-07, "loss": -0.9873, "step": 7758 }, { "epoch": 0.6931201572025099, "grad_norm": 16.923874896249565, "learning_rate": 5.208443154416429e-07, "loss": -0.6068, "step": 7760 }, { "epoch": 0.6932987964182837, "grad_norm": 2.518235930808818, "learning_rate": 5.202970200900228e-07, "loss": 0.655, "step": 7762 }, { "epoch": 0.6934774356340576, "grad_norm": 7.237787418225668, "learning_rate": 5.197499113084967e-07, "loss": -0.1201, "step": 7764 }, { "epoch": 0.6936560748498314, "grad_norm": 10.446899202246307, "learning_rate": 5.192029893098507e-07, "loss": -1.6385, "step": 7766 }, { "epoch": 0.6938347140656053, "grad_norm": 15.581200329593914, "learning_rate": 5.186562543067974e-07, "loss": -0.5219, "step": 7768 }, { "epoch": 0.6940133532813791, "grad_norm": 20.413837511915553, "learning_rate": 5.18109706511978e-07, "loss": -1.0609, "step": 7770 }, { "epoch": 0.6941919924971529, "grad_norm": 6.942039310608884, "learning_rate": 5.175633461379604e-07, "loss": -0.5028, "step": 7772 }, { "epoch": 0.6943706317129268, "grad_norm": 13.13606769220007, "learning_rate": 5.170171733972398e-07, "loss": -0.9086, "step": 7774 }, { "epoch": 0.6945492709287007, "grad_norm": 13.696163135840441, "learning_rate": 5.164711885022382e-07, "loss": -0.383, "step": 7776 }, { "epoch": 0.6947279101444744, "grad_norm": 7.7899569779311095, "learning_rate": 5.159253916653045e-07, "loss": 0.7089, "step": 7778 }, { "epoch": 0.6949065493602483, "grad_norm": 12.952060632725168, "learning_rate": 5.15379783098715e-07, "loss": -1.602, "step": 7780 }, { "epoch": 0.6950851885760222, "grad_norm": 9.329690313075275, "learning_rate": 5.148343630146716e-07, "loss": -1.0562, "step": 7782 }, { "epoch": 0.695263827791796, "grad_norm": 13.105415773924225, "learning_rate": 5.14289131625304e-07, "loss": -0.485, "step": 7784 }, { "epoch": 0.6954424670075698, "grad_norm": 7.375617049961026, "learning_rate": 5.137440891426679e-07, "loss": -0.8822, "step": 7786 }, { "epoch": 0.6956211062233437, "grad_norm": 9.294909767997474, "learning_rate": 5.131992357787467e-07, "loss": -0.029, "step": 7788 }, { "epoch": 0.6957997454391175, "grad_norm": 6.527545078900649, "learning_rate": 5.126545717454484e-07, "loss": -0.2834, "step": 7790 }, { "epoch": 0.6959783846548914, "grad_norm": 11.156562864901362, "learning_rate": 5.121100972546084e-07, "loss": 1.7915, "step": 7792 }, { "epoch": 0.6961570238706652, "grad_norm": 7.054577189752187, "learning_rate": 5.11565812517989e-07, "loss": -0.0742, "step": 7794 }, { "epoch": 0.696335663086439, "grad_norm": 13.024127578073008, "learning_rate": 5.110217177472767e-07, "loss": -0.4142, "step": 7796 }, { "epoch": 0.6965143023022129, "grad_norm": 13.49262120665837, "learning_rate": 5.104778131540862e-07, "loss": -0.7827, "step": 7798 }, { "epoch": 0.6966929415179868, "grad_norm": 7.368708885278773, "learning_rate": 5.099340989499578e-07, "loss": -0.7904, "step": 7800 }, { "epoch": 0.6968715807337605, "grad_norm": 10.643829176961637, "learning_rate": 5.093905753463567e-07, "loss": -0.0086, "step": 7802 }, { "epoch": 0.6970502199495344, "grad_norm": 8.651050534704089, "learning_rate": 5.088472425546746e-07, "loss": -0.5261, "step": 7804 }, { "epoch": 0.6972288591653083, "grad_norm": 4.031031876214891, "learning_rate": 5.083041007862294e-07, "loss": 0.288, "step": 7806 }, { "epoch": 0.6974074983810821, "grad_norm": 12.033959338811314, "learning_rate": 5.07761150252264e-07, "loss": -0.5922, "step": 7808 }, { "epoch": 0.697586137596856, "grad_norm": 21.967927616186284, "learning_rate": 5.072183911639478e-07, "loss": -1.3064, "step": 7810 }, { "epoch": 0.6977647768126298, "grad_norm": 4.54035340433483, "learning_rate": 5.066758237323747e-07, "loss": -0.5298, "step": 7812 }, { "epoch": 0.6979434160284036, "grad_norm": 8.486504556206844, "learning_rate": 5.061334481685651e-07, "loss": 0.6445, "step": 7814 }, { "epoch": 0.6981220552441775, "grad_norm": 6.560529140438221, "learning_rate": 5.055912646834635e-07, "loss": 0.2495, "step": 7816 }, { "epoch": 0.6983006944599514, "grad_norm": 3.7291963174731593, "learning_rate": 5.050492734879404e-07, "loss": 0.6664, "step": 7818 }, { "epoch": 0.6984793336757251, "grad_norm": 8.622950655960912, "learning_rate": 5.045074747927927e-07, "loss": -0.432, "step": 7820 }, { "epoch": 0.698657972891499, "grad_norm": 7.435317544729801, "learning_rate": 5.039658688087401e-07, "loss": -1.1086, "step": 7822 }, { "epoch": 0.6988366121072729, "grad_norm": 8.22078502684017, "learning_rate": 5.034244557464292e-07, "loss": 0.3597, "step": 7824 }, { "epoch": 0.6990152513230466, "grad_norm": 9.727668815722444, "learning_rate": 5.028832358164303e-07, "loss": -1.2207, "step": 7826 }, { "epoch": 0.6991938905388205, "grad_norm": 18.88678459463703, "learning_rate": 5.023422092292398e-07, "loss": -1.0883, "step": 7828 }, { "epoch": 0.6993725297545944, "grad_norm": 3.8614200008037747, "learning_rate": 5.01801376195278e-07, "loss": 0.6781, "step": 7830 }, { "epoch": 0.6995511689703682, "grad_norm": 1.0110910596769003, "learning_rate": 5.0126073692489e-07, "loss": 0.2601, "step": 7832 }, { "epoch": 0.699729808186142, "grad_norm": 19.09686084135664, "learning_rate": 5.007202916283465e-07, "loss": -0.6517, "step": 7834 }, { "epoch": 0.6999084474019159, "grad_norm": 16.77711137426384, "learning_rate": 5.001800405158411e-07, "loss": -0.664, "step": 7836 }, { "epoch": 0.7000870866176897, "grad_norm": 6.324755263588237, "learning_rate": 4.996399837974931e-07, "loss": 0.5204, "step": 7838 }, { "epoch": 0.7002657258334636, "grad_norm": 10.346525897349856, "learning_rate": 4.991001216833456e-07, "loss": -1.1964, "step": 7840 }, { "epoch": 0.7004443650492375, "grad_norm": 18.601508360379004, "learning_rate": 4.985604543833672e-07, "loss": 0.5278, "step": 7842 }, { "epoch": 0.7006230042650112, "grad_norm": 8.318986011484654, "learning_rate": 4.98020982107449e-07, "loss": 0.7301, "step": 7844 }, { "epoch": 0.7008016434807851, "grad_norm": 8.366565157615225, "learning_rate": 4.974817050654073e-07, "loss": -0.5393, "step": 7846 }, { "epoch": 0.700980282696559, "grad_norm": 11.577798156655263, "learning_rate": 4.969426234669826e-07, "loss": -0.4384, "step": 7848 }, { "epoch": 0.7011589219123328, "grad_norm": 2.237830098273006, "learning_rate": 4.964037375218379e-07, "loss": -1.0395, "step": 7850 }, { "epoch": 0.7013375611281066, "grad_norm": 6.156690552724874, "learning_rate": 4.958650474395623e-07, "loss": -0.598, "step": 7852 }, { "epoch": 0.7015162003438805, "grad_norm": 10.796270814544092, "learning_rate": 4.953265534296676e-07, "loss": -0.8324, "step": 7854 }, { "epoch": 0.7016948395596543, "grad_norm": 29.71105870340779, "learning_rate": 4.94788255701589e-07, "loss": -0.5891, "step": 7856 }, { "epoch": 0.7018734787754282, "grad_norm": 7.697753271314, "learning_rate": 4.942501544646858e-07, "loss": -1.2665, "step": 7858 }, { "epoch": 0.702052117991202, "grad_norm": 4.446696677690507, "learning_rate": 4.937122499282407e-07, "loss": -0.3548, "step": 7860 }, { "epoch": 0.7022307572069758, "grad_norm": 4.946660874938316, "learning_rate": 4.931745423014605e-07, "loss": 0.5584, "step": 7862 }, { "epoch": 0.7024093964227497, "grad_norm": 10.050906877737024, "learning_rate": 4.926370317934746e-07, "loss": -0.3045, "step": 7864 }, { "epoch": 0.7025880356385236, "grad_norm": 19.659156960372897, "learning_rate": 4.920997186133361e-07, "loss": 0.1317, "step": 7866 }, { "epoch": 0.7027666748542973, "grad_norm": 11.448956864328295, "learning_rate": 4.915626029700219e-07, "loss": 0.0594, "step": 7868 }, { "epoch": 0.7029453140700712, "grad_norm": 6.451362572397194, "learning_rate": 4.910256850724305e-07, "loss": 0.1535, "step": 7870 }, { "epoch": 0.7031239532858451, "grad_norm": 12.126097953872634, "learning_rate": 4.904889651293846e-07, "loss": -0.3159, "step": 7872 }, { "epoch": 0.703302592501619, "grad_norm": 4.758648277500143, "learning_rate": 4.89952443349631e-07, "loss": 0.5244, "step": 7874 }, { "epoch": 0.7034812317173927, "grad_norm": 8.064157714754685, "learning_rate": 4.894161199418372e-07, "loss": 0.5006, "step": 7876 }, { "epoch": 0.7036598709331666, "grad_norm": 4.918638491305729, "learning_rate": 4.888799951145947e-07, "loss": 0.3268, "step": 7878 }, { "epoch": 0.7038385101489405, "grad_norm": 14.840223162430616, "learning_rate": 4.883440690764178e-07, "loss": 0.1204, "step": 7880 }, { "epoch": 0.7040171493647143, "grad_norm": 4.963826937515105, "learning_rate": 4.878083420357434e-07, "loss": -0.2046, "step": 7882 }, { "epoch": 0.7041957885804881, "grad_norm": 18.84442856106199, "learning_rate": 4.872728142009307e-07, "loss": -0.7684, "step": 7884 }, { "epoch": 0.704374427796262, "grad_norm": 15.384463987339123, "learning_rate": 4.867374857802619e-07, "loss": -0.313, "step": 7886 }, { "epoch": 0.7045530670120358, "grad_norm": 8.151071358688599, "learning_rate": 4.862023569819417e-07, "loss": 0.3081, "step": 7888 }, { "epoch": 0.7047317062278097, "grad_norm": 6.8764827169771, "learning_rate": 4.856674280140961e-07, "loss": -0.8312, "step": 7890 }, { "epoch": 0.7049103454435836, "grad_norm": 5.585114702568312, "learning_rate": 4.851326990847746e-07, "loss": 0.5409, "step": 7892 }, { "epoch": 0.7050889846593573, "grad_norm": 7.172528937285731, "learning_rate": 4.845981704019483e-07, "loss": 0.0686, "step": 7894 }, { "epoch": 0.7052676238751312, "grad_norm": 9.660808980803534, "learning_rate": 4.840638421735108e-07, "loss": -0.49, "step": 7896 }, { "epoch": 0.7054462630909051, "grad_norm": 6.705203856598613, "learning_rate": 4.835297146072771e-07, "loss": -0.3104, "step": 7898 }, { "epoch": 0.7056249023066788, "grad_norm": 7.471725318140137, "learning_rate": 4.829957879109849e-07, "loss": -0.9002, "step": 7900 }, { "epoch": 0.7058035415224527, "grad_norm": 7.080712525457029, "learning_rate": 4.824620622922938e-07, "loss": 0.1567, "step": 7902 }, { "epoch": 0.7059821807382266, "grad_norm": 8.423878164339554, "learning_rate": 4.819285379587836e-07, "loss": -0.1987, "step": 7904 }, { "epoch": 0.7061608199540004, "grad_norm": 5.929554652559124, "learning_rate": 4.813952151179581e-07, "loss": -0.1046, "step": 7906 }, { "epoch": 0.7063394591697743, "grad_norm": 7.080233026322739, "learning_rate": 4.808620939772418e-07, "loss": 0.3369, "step": 7908 }, { "epoch": 0.7065180983855481, "grad_norm": 11.025231986513722, "learning_rate": 4.803291747439798e-07, "loss": -0.0477, "step": 7910 }, { "epoch": 0.7066967376013219, "grad_norm": 4.4615270756138585, "learning_rate": 4.7979645762544e-07, "loss": 0.2824, "step": 7912 }, { "epoch": 0.7068753768170958, "grad_norm": 14.816394921561711, "learning_rate": 4.792639428288112e-07, "loss": 0.3061, "step": 7914 }, { "epoch": 0.7070540160328697, "grad_norm": 9.95399420998535, "learning_rate": 4.787316305612034e-07, "loss": -0.0181, "step": 7916 }, { "epoch": 0.7072326552486434, "grad_norm": 4.387139399563918, "learning_rate": 4.781995210296478e-07, "loss": -0.4121, "step": 7918 }, { "epoch": 0.7074112944644173, "grad_norm": 10.311487136576055, "learning_rate": 4.776676144410972e-07, "loss": 0.6132, "step": 7920 }, { "epoch": 0.7075899336801912, "grad_norm": 13.076700331227842, "learning_rate": 4.771359110024255e-07, "loss": -0.6857, "step": 7922 }, { "epoch": 0.707768572895965, "grad_norm": 11.689587411770438, "learning_rate": 4.7660441092042616e-07, "loss": -0.7111, "step": 7924 }, { "epoch": 0.7079472121117388, "grad_norm": 13.922399440353585, "learning_rate": 4.760731144018153e-07, "loss": -0.3539, "step": 7926 }, { "epoch": 0.7081258513275127, "grad_norm": 9.760241875700006, "learning_rate": 4.755420216532292e-07, "loss": -0.5305, "step": 7928 }, { "epoch": 0.7083044905432865, "grad_norm": 3.3581938330466423, "learning_rate": 4.7501113288122475e-07, "loss": 0.7312, "step": 7930 }, { "epoch": 0.7084831297590604, "grad_norm": 6.082657484719653, "learning_rate": 4.7448044829227987e-07, "loss": 0.9441, "step": 7932 }, { "epoch": 0.7086617689748342, "grad_norm": 6.3045988584808015, "learning_rate": 4.7394996809279253e-07, "loss": -0.4966, "step": 7934 }, { "epoch": 0.708840408190608, "grad_norm": 12.708986025866267, "learning_rate": 4.734196924890822e-07, "loss": -0.0083, "step": 7936 }, { "epoch": 0.7090190474063819, "grad_norm": 9.906166822403167, "learning_rate": 4.7288962168738677e-07, "loss": -0.2849, "step": 7938 }, { "epoch": 0.7091976866221558, "grad_norm": 11.041825625636942, "learning_rate": 4.7235975589386713e-07, "loss": 0.0459, "step": 7940 }, { "epoch": 0.7093763258379295, "grad_norm": 6.7694833514967305, "learning_rate": 4.7183009531460295e-07, "loss": -0.3631, "step": 7942 }, { "epoch": 0.7095549650537034, "grad_norm": 2.953600648388528, "learning_rate": 4.713006401555936e-07, "loss": 0.3594, "step": 7944 }, { "epoch": 0.7097336042694773, "grad_norm": 9.2117918490283, "learning_rate": 4.707713906227594e-07, "loss": 1.0347, "step": 7946 }, { "epoch": 0.7099122434852511, "grad_norm": 8.844295633151463, "learning_rate": 4.702423469219408e-07, "loss": -0.6498, "step": 7948 }, { "epoch": 0.7100908827010249, "grad_norm": 8.814675786227681, "learning_rate": 4.6971350925889754e-07, "loss": -0.3835, "step": 7950 }, { "epoch": 0.7102695219167988, "grad_norm": 6.658906648260219, "learning_rate": 4.6918487783930995e-07, "loss": -0.0782, "step": 7952 }, { "epoch": 0.7104481611325726, "grad_norm": 3.37221454540582, "learning_rate": 4.686564528687775e-07, "loss": 0.7256, "step": 7954 }, { "epoch": 0.7106268003483465, "grad_norm": 7.135269260044086, "learning_rate": 4.6812823455282023e-07, "loss": -0.3581, "step": 7956 }, { "epoch": 0.7108054395641203, "grad_norm": 7.51133811966286, "learning_rate": 4.676002230968763e-07, "loss": -1.0424, "step": 7958 }, { "epoch": 0.7109840787798941, "grad_norm": 10.298056735820584, "learning_rate": 4.6707241870630456e-07, "loss": -0.8916, "step": 7960 }, { "epoch": 0.711162717995668, "grad_norm": 2.134819120834461, "learning_rate": 4.66544821586384e-07, "loss": 1.0318, "step": 7962 }, { "epoch": 0.7113413572114419, "grad_norm": 6.896853860287749, "learning_rate": 4.66017431942311e-07, "loss": 0.7351, "step": 7964 }, { "epoch": 0.7115199964272156, "grad_norm": 9.011922780254258, "learning_rate": 4.6549024997920283e-07, "loss": 0.7683, "step": 7966 }, { "epoch": 0.7116986356429895, "grad_norm": 14.026757963437015, "learning_rate": 4.649632759020955e-07, "loss": -1.2064, "step": 7968 }, { "epoch": 0.7118772748587634, "grad_norm": 6.170727123279757, "learning_rate": 4.644365099159442e-07, "loss": -0.1362, "step": 7970 }, { "epoch": 0.7120559140745372, "grad_norm": 10.265993248127796, "learning_rate": 4.63909952225623e-07, "loss": -1.2719, "step": 7972 }, { "epoch": 0.712234553290311, "grad_norm": 4.8398516318539535, "learning_rate": 4.6338360303592527e-07, "loss": -0.7521, "step": 7974 }, { "epoch": 0.7124131925060849, "grad_norm": 3.173422299775497, "learning_rate": 4.6285746255156346e-07, "loss": 0.4052, "step": 7976 }, { "epoch": 0.7125918317218587, "grad_norm": 12.594551458485121, "learning_rate": 4.6233153097716784e-07, "loss": -0.2764, "step": 7978 }, { "epoch": 0.7127704709376326, "grad_norm": 5.052998609973347, "learning_rate": 4.6180580851728833e-07, "loss": -0.0929, "step": 7980 }, { "epoch": 0.7129491101534065, "grad_norm": 6.616920294653021, "learning_rate": 4.6128029537639346e-07, "loss": -1.3316, "step": 7982 }, { "epoch": 0.7131277493691802, "grad_norm": 8.467734638451217, "learning_rate": 4.607549917588701e-07, "loss": -0.533, "step": 7984 }, { "epoch": 0.7133063885849541, "grad_norm": 7.552149222952509, "learning_rate": 4.6022989786902386e-07, "loss": 0.7694, "step": 7986 }, { "epoch": 0.713485027800728, "grad_norm": 7.158926783850579, "learning_rate": 4.5970501391107853e-07, "loss": -0.3075, "step": 7988 }, { "epoch": 0.7136636670165017, "grad_norm": 12.124916941564743, "learning_rate": 4.591803400891767e-07, "loss": -0.9546, "step": 7990 }, { "epoch": 0.7138423062322756, "grad_norm": 14.742414690055178, "learning_rate": 4.586558766073779e-07, "loss": 0.5986, "step": 7992 }, { "epoch": 0.7140209454480495, "grad_norm": 13.187640004070218, "learning_rate": 4.5813162366966197e-07, "loss": -0.6302, "step": 7994 }, { "epoch": 0.7141995846638233, "grad_norm": 9.831976543724444, "learning_rate": 4.576075814799255e-07, "loss": -0.4123, "step": 7996 }, { "epoch": 0.7143782238795972, "grad_norm": 10.722289098905504, "learning_rate": 4.57083750241983e-07, "loss": -1.5989, "step": 7998 }, { "epoch": 0.714556863095371, "grad_norm": 11.131886611717388, "learning_rate": 4.565601301595674e-07, "loss": 0.8949, "step": 8000 }, { "epoch": 0.7147355023111448, "grad_norm": 12.677040336157738, "learning_rate": 4.5603672143632945e-07, "loss": -0.9524, "step": 8002 }, { "epoch": 0.7149141415269187, "grad_norm": 17.720040841152407, "learning_rate": 4.5551352427583766e-07, "loss": -0.4911, "step": 8004 }, { "epoch": 0.7150927807426926, "grad_norm": 7.818327904398941, "learning_rate": 4.5499053888157836e-07, "loss": 0.213, "step": 8006 }, { "epoch": 0.7152714199584664, "grad_norm": 6.319168647458266, "learning_rate": 4.5446776545695507e-07, "loss": -0.1025, "step": 8008 }, { "epoch": 0.7154500591742402, "grad_norm": 14.376488649991009, "learning_rate": 4.5394520420529e-07, "loss": -0.7224, "step": 8010 }, { "epoch": 0.7156286983900141, "grad_norm": 15.086250542939288, "learning_rate": 4.5342285532982105e-07, "loss": 0.18, "step": 8012 }, { "epoch": 0.715807337605788, "grad_norm": 12.109591731931449, "learning_rate": 4.529007190337045e-07, "loss": 0.5958, "step": 8014 }, { "epoch": 0.7159859768215617, "grad_norm": 14.829120566387642, "learning_rate": 4.5237879552001523e-07, "loss": -0.1994, "step": 8016 }, { "epoch": 0.7161646160373356, "grad_norm": 5.112025702004451, "learning_rate": 4.518570849917428e-07, "loss": -0.1829, "step": 8018 }, { "epoch": 0.7163432552531095, "grad_norm": 8.886489523142526, "learning_rate": 4.513355876517957e-07, "loss": -0.1284, "step": 8020 }, { "epoch": 0.7165218944688833, "grad_norm": 4.63075805192822, "learning_rate": 4.5081430370299943e-07, "loss": 0.4531, "step": 8022 }, { "epoch": 0.7167005336846571, "grad_norm": 12.261645328706592, "learning_rate": 4.50293233348095e-07, "loss": -0.5729, "step": 8024 }, { "epoch": 0.716879172900431, "grad_norm": 11.826022688210534, "learning_rate": 4.4977237678974247e-07, "loss": -0.6661, "step": 8026 }, { "epoch": 0.7170578121162048, "grad_norm": 5.861396461197775, "learning_rate": 4.4925173423051734e-07, "loss": -0.2223, "step": 8028 }, { "epoch": 0.7172364513319787, "grad_norm": 11.118469035740958, "learning_rate": 4.4873130587291276e-07, "loss": -0.1177, "step": 8030 }, { "epoch": 0.7174150905477525, "grad_norm": 8.348923811401907, "learning_rate": 4.4821109191933727e-07, "loss": -0.7862, "step": 8032 }, { "epoch": 0.7175937297635263, "grad_norm": 8.144404787687257, "learning_rate": 4.4769109257211723e-07, "loss": 0.3287, "step": 8034 }, { "epoch": 0.7177723689793002, "grad_norm": 3.9434432936681083, "learning_rate": 4.47171308033495e-07, "loss": 1.2515, "step": 8036 }, { "epoch": 0.7179510081950741, "grad_norm": 9.400503234379014, "learning_rate": 4.4665173850562975e-07, "loss": 0.1591, "step": 8038 }, { "epoch": 0.7181296474108478, "grad_norm": 10.062424558747672, "learning_rate": 4.461323841905966e-07, "loss": 0.1897, "step": 8040 }, { "epoch": 0.7183082866266217, "grad_norm": 12.186709960370893, "learning_rate": 4.4561324529038745e-07, "loss": -1.6887, "step": 8042 }, { "epoch": 0.7184869258423956, "grad_norm": 9.920057427640012, "learning_rate": 4.4509432200691023e-07, "loss": -0.4598, "step": 8044 }, { "epoch": 0.7186655650581694, "grad_norm": 6.394060251533773, "learning_rate": 4.445756145419882e-07, "loss": 0.0725, "step": 8046 }, { "epoch": 0.7188442042739432, "grad_norm": 18.061629858474326, "learning_rate": 4.4405712309736153e-07, "loss": 0.2736, "step": 8048 }, { "epoch": 0.7190228434897171, "grad_norm": 3.803790290210465, "learning_rate": 4.435388478746873e-07, "loss": 0.7036, "step": 8050 }, { "epoch": 0.7192014827054909, "grad_norm": 9.818545671907227, "learning_rate": 4.430207890755364e-07, "loss": 0.2573, "step": 8052 }, { "epoch": 0.7193801219212648, "grad_norm": 15.08420376171709, "learning_rate": 4.425029469013969e-07, "loss": -0.7451, "step": 8054 }, { "epoch": 0.7195587611370386, "grad_norm": 16.389253658969555, "learning_rate": 4.419853215536723e-07, "loss": -0.744, "step": 8056 }, { "epoch": 0.7197374003528124, "grad_norm": 11.42463669480997, "learning_rate": 4.414679132336817e-07, "loss": 0.897, "step": 8058 }, { "epoch": 0.7199160395685863, "grad_norm": 7.400212527135781, "learning_rate": 4.4095072214266005e-07, "loss": -0.4632, "step": 8060 }, { "epoch": 0.7200946787843602, "grad_norm": 12.214272852024836, "learning_rate": 4.4043374848175764e-07, "loss": -0.1048, "step": 8062 }, { "epoch": 0.7202733180001339, "grad_norm": 3.80422450700947, "learning_rate": 4.399169924520403e-07, "loss": 0.8035, "step": 8064 }, { "epoch": 0.7204519572159078, "grad_norm": 8.718322962212257, "learning_rate": 4.394004542544886e-07, "loss": 0.1255, "step": 8066 }, { "epoch": 0.7206305964316817, "grad_norm": 5.5267639934466, "learning_rate": 4.3888413408999914e-07, "loss": -0.6338, "step": 8068 }, { "epoch": 0.7208092356474555, "grad_norm": 6.396386568908731, "learning_rate": 4.383680321593836e-07, "loss": -0.2088, "step": 8070 }, { "epoch": 0.7209878748632293, "grad_norm": 5.963222640609238, "learning_rate": 4.378521486633685e-07, "loss": -1.6587, "step": 8072 }, { "epoch": 0.7211665140790032, "grad_norm": 14.295725491698013, "learning_rate": 4.3733648380259566e-07, "loss": -1.1774, "step": 8074 }, { "epoch": 0.721345153294777, "grad_norm": 8.335978228139727, "learning_rate": 4.368210377776221e-07, "loss": -1.4897, "step": 8076 }, { "epoch": 0.7215237925105509, "grad_norm": 7.872801458668775, "learning_rate": 4.363058107889187e-07, "loss": 0.397, "step": 8078 }, { "epoch": 0.7217024317263248, "grad_norm": 9.960381418301553, "learning_rate": 4.357908030368719e-07, "loss": -0.5889, "step": 8080 }, { "epoch": 0.7218810709420985, "grad_norm": 13.245574788012929, "learning_rate": 4.352760147217834e-07, "loss": -0.2872, "step": 8082 }, { "epoch": 0.7220597101578724, "grad_norm": 3.9014100875692006, "learning_rate": 4.3476144604386923e-07, "loss": 0.933, "step": 8084 }, { "epoch": 0.7222383493736463, "grad_norm": 10.672955169300508, "learning_rate": 4.342470972032587e-07, "loss": 0.0139, "step": 8086 }, { "epoch": 0.72241698858942, "grad_norm": 9.71176946732795, "learning_rate": 4.337329683999972e-07, "loss": 0.6018, "step": 8088 }, { "epoch": 0.7225956278051939, "grad_norm": 8.7790415963303, "learning_rate": 4.3321905983404395e-07, "loss": -1.1624, "step": 8090 }, { "epoch": 0.7227742670209678, "grad_norm": 16.88839619458848, "learning_rate": 4.3270537170527257e-07, "loss": -0.5658, "step": 8092 }, { "epoch": 0.7229529062367416, "grad_norm": 2.7672813326383183, "learning_rate": 4.3219190421347097e-07, "loss": 0.2688, "step": 8094 }, { "epoch": 0.7231315454525155, "grad_norm": 9.112172765311401, "learning_rate": 4.316786575583412e-07, "loss": -0.5588, "step": 8096 }, { "epoch": 0.7233101846682893, "grad_norm": 11.058954910099475, "learning_rate": 4.3116563193949993e-07, "loss": 0.1296, "step": 8098 }, { "epoch": 0.7234888238840631, "grad_norm": 14.547486518765773, "learning_rate": 4.306528275564765e-07, "loss": -1.5898, "step": 8100 }, { "epoch": 0.723667463099837, "grad_norm": 7.5705833425976365, "learning_rate": 4.30140244608715e-07, "loss": -0.8156, "step": 8102 }, { "epoch": 0.7238461023156109, "grad_norm": 4.616074894368583, "learning_rate": 4.2962788329557475e-07, "loss": 0.1453, "step": 8104 }, { "epoch": 0.7240247415313846, "grad_norm": 4.362193300675252, "learning_rate": 4.2911574381632645e-07, "loss": 0.3747, "step": 8106 }, { "epoch": 0.7242033807471585, "grad_norm": 13.808230745759031, "learning_rate": 4.286038263701559e-07, "loss": -0.0651, "step": 8108 }, { "epoch": 0.7243820199629324, "grad_norm": 10.878176871868929, "learning_rate": 4.280921311561628e-07, "loss": 0.1744, "step": 8110 }, { "epoch": 0.7245606591787062, "grad_norm": 9.383295890051315, "learning_rate": 4.2758065837335886e-07, "loss": -0.4102, "step": 8112 }, { "epoch": 0.72473929839448, "grad_norm": 6.439250819065821, "learning_rate": 4.270694082206715e-07, "loss": -0.582, "step": 8114 }, { "epoch": 0.7249179376102539, "grad_norm": 8.503348644586245, "learning_rate": 4.2655838089693984e-07, "loss": -0.9613, "step": 8116 }, { "epoch": 0.7250965768260277, "grad_norm": 14.464673912046408, "learning_rate": 4.260475766009175e-07, "loss": -0.9417, "step": 8118 }, { "epoch": 0.7252752160418016, "grad_norm": 2.13644134003868, "learning_rate": 4.2553699553126975e-07, "loss": -0.1692, "step": 8120 }, { "epoch": 0.7254538552575754, "grad_norm": 5.0536183755110295, "learning_rate": 4.2502663788657665e-07, "loss": 0.123, "step": 8122 }, { "epoch": 0.7256324944733492, "grad_norm": 11.902677705972955, "learning_rate": 4.2451650386533067e-07, "loss": -0.2944, "step": 8124 }, { "epoch": 0.7258111336891231, "grad_norm": 7.728791535383271, "learning_rate": 4.240065936659374e-07, "loss": -0.1098, "step": 8126 }, { "epoch": 0.725989772904897, "grad_norm": 5.4078426771360775, "learning_rate": 4.234969074867154e-07, "loss": -0.4154, "step": 8128 }, { "epoch": 0.7261684121206707, "grad_norm": 6.788945626151659, "learning_rate": 4.229874455258964e-07, "loss": -0.0539, "step": 8130 }, { "epoch": 0.7263470513364446, "grad_norm": 6.876574804099564, "learning_rate": 4.224782079816239e-07, "loss": 1.0542, "step": 8132 }, { "epoch": 0.7265256905522185, "grad_norm": 5.466654523200217, "learning_rate": 4.2196919505195483e-07, "loss": -0.376, "step": 8134 }, { "epoch": 0.7267043297679923, "grad_norm": 13.719940064413318, "learning_rate": 4.214604069348595e-07, "loss": -1.5072, "step": 8136 }, { "epoch": 0.7268829689837661, "grad_norm": 13.610142998206912, "learning_rate": 4.209518438282199e-07, "loss": 0.2884, "step": 8138 }, { "epoch": 0.72706160819954, "grad_norm": 30.888669837278623, "learning_rate": 4.2044350592983024e-07, "loss": -1.4059, "step": 8140 }, { "epoch": 0.7272402474153139, "grad_norm": 6.541875783246556, "learning_rate": 4.199353934373976e-07, "loss": -0.9704, "step": 8142 }, { "epoch": 0.7274188866310877, "grad_norm": 8.603669257637943, "learning_rate": 4.1942750654854145e-07, "loss": 0.0611, "step": 8144 }, { "epoch": 0.7275975258468615, "grad_norm": 9.832433583966152, "learning_rate": 4.189198454607934e-07, "loss": -0.1839, "step": 8146 }, { "epoch": 0.7277761650626354, "grad_norm": 14.206829330775712, "learning_rate": 4.184124103715971e-07, "loss": -0.7911, "step": 8148 }, { "epoch": 0.7279548042784092, "grad_norm": 3.403149514858758, "learning_rate": 4.179052014783092e-07, "loss": -0.1997, "step": 8150 }, { "epoch": 0.7281334434941831, "grad_norm": 9.109299203024063, "learning_rate": 4.173982189781965e-07, "loss": 0.0901, "step": 8152 }, { "epoch": 0.728312082709957, "grad_norm": 9.800091521166635, "learning_rate": 4.1689146306843945e-07, "loss": -0.7227, "step": 8154 }, { "epoch": 0.7284907219257307, "grad_norm": 13.148640006053443, "learning_rate": 4.1638493394612927e-07, "loss": -1.0163, "step": 8156 }, { "epoch": 0.7286693611415046, "grad_norm": 6.684394757213526, "learning_rate": 4.1587863180827087e-07, "loss": -0.1235, "step": 8158 }, { "epoch": 0.7288480003572785, "grad_norm": 3.6882563962868655, "learning_rate": 4.153725568517781e-07, "loss": 0.1429, "step": 8160 }, { "epoch": 0.7290266395730522, "grad_norm": 4.980802110473402, "learning_rate": 4.1486670927347844e-07, "loss": -1.0593, "step": 8162 }, { "epoch": 0.7292052787888261, "grad_norm": 22.39797833720155, "learning_rate": 4.143610892701106e-07, "loss": -0.261, "step": 8164 }, { "epoch": 0.7293839180046, "grad_norm": 9.035612220984687, "learning_rate": 4.1385569703832346e-07, "loss": -1.8952, "step": 8166 }, { "epoch": 0.7295625572203738, "grad_norm": 11.403224930622011, "learning_rate": 4.1335053277467955e-07, "loss": 0.955, "step": 8168 }, { "epoch": 0.7297411964361477, "grad_norm": 6.636296843196183, "learning_rate": 4.1284559667565124e-07, "loss": -0.9275, "step": 8170 }, { "epoch": 0.7299198356519215, "grad_norm": 9.780278069223192, "learning_rate": 4.1234088893762274e-07, "loss": 0.2607, "step": 8172 }, { "epoch": 0.7300984748676953, "grad_norm": 3.819597984849864, "learning_rate": 4.118364097568886e-07, "loss": 0.5093, "step": 8174 }, { "epoch": 0.7302771140834692, "grad_norm": 8.767198064481917, "learning_rate": 4.113321593296554e-07, "loss": 0.9375, "step": 8176 }, { "epoch": 0.7304557532992431, "grad_norm": 9.127727362236254, "learning_rate": 4.108281378520403e-07, "loss": -0.3873, "step": 8178 }, { "epoch": 0.7306343925150168, "grad_norm": 9.685745178764964, "learning_rate": 4.103243455200718e-07, "loss": -0.38, "step": 8180 }, { "epoch": 0.7308130317307907, "grad_norm": 9.09923186361423, "learning_rate": 4.0982078252968885e-07, "loss": -0.942, "step": 8182 }, { "epoch": 0.7309916709465646, "grad_norm": 8.256289772093833, "learning_rate": 4.093174490767418e-07, "loss": 0.1185, "step": 8184 }, { "epoch": 0.7311703101623384, "grad_norm": 8.19255410785053, "learning_rate": 4.0881434535699056e-07, "loss": -0.0566, "step": 8186 }, { "epoch": 0.7313489493781122, "grad_norm": 8.651489270730359, "learning_rate": 4.0831147156610676e-07, "loss": -0.2573, "step": 8188 }, { "epoch": 0.7315275885938861, "grad_norm": 15.086425520491975, "learning_rate": 4.0780882789967195e-07, "loss": -0.8118, "step": 8190 }, { "epoch": 0.7317062278096599, "grad_norm": 9.083493245901591, "learning_rate": 4.073064145531797e-07, "loss": -1.1322, "step": 8192 }, { "epoch": 0.7318848670254338, "grad_norm": 3.9240395701179622, "learning_rate": 4.068042317220315e-07, "loss": -0.752, "step": 8194 }, { "epoch": 0.7320635062412076, "grad_norm": 6.396042744773424, "learning_rate": 4.063022796015411e-07, "loss": 0.6288, "step": 8196 }, { "epoch": 0.7322421454569814, "grad_norm": 8.415019513094531, "learning_rate": 4.058005583869317e-07, "loss": 0.497, "step": 8198 }, { "epoch": 0.7324207846727553, "grad_norm": 4.899378149316503, "learning_rate": 4.05299068273337e-07, "loss": 0.1347, "step": 8200 }, { "epoch": 0.7325994238885292, "grad_norm": 5.969950769882798, "learning_rate": 4.047978094558009e-07, "loss": 0.413, "step": 8202 }, { "epoch": 0.7327780631043029, "grad_norm": 11.863947493227974, "learning_rate": 4.042967821292772e-07, "loss": -0.7248, "step": 8204 }, { "epoch": 0.7329567023200768, "grad_norm": 5.814668640509602, "learning_rate": 4.037959864886291e-07, "loss": -0.7638, "step": 8206 }, { "epoch": 0.7331353415358507, "grad_norm": 9.067410947132576, "learning_rate": 4.032954227286306e-07, "loss": 0.9224, "step": 8208 }, { "epoch": 0.7333139807516245, "grad_norm": 3.0302975131475915, "learning_rate": 4.0279509104396515e-07, "loss": -0.3407, "step": 8210 }, { "epoch": 0.7334926199673983, "grad_norm": 26.87154887906829, "learning_rate": 4.0229499162922587e-07, "loss": -0.0644, "step": 8212 }, { "epoch": 0.7336712591831722, "grad_norm": 2.5735631174396714, "learning_rate": 4.0179512467891565e-07, "loss": 0.2471, "step": 8214 }, { "epoch": 0.733849898398946, "grad_norm": 6.634642295825351, "learning_rate": 4.012954903874468e-07, "loss": -0.8372, "step": 8216 }, { "epoch": 0.7340285376147199, "grad_norm": 4.475024107116893, "learning_rate": 4.0079608894914186e-07, "loss": 0.0923, "step": 8218 }, { "epoch": 0.7342071768304937, "grad_norm": 6.002666198895398, "learning_rate": 4.002969205582313e-07, "loss": 0.156, "step": 8220 }, { "epoch": 0.7343858160462675, "grad_norm": 22.251051974341433, "learning_rate": 3.997979854088559e-07, "loss": -0.3916, "step": 8222 }, { "epoch": 0.7345644552620414, "grad_norm": 4.063127439903345, "learning_rate": 3.992992836950665e-07, "loss": -0.0672, "step": 8224 }, { "epoch": 0.7347430944778153, "grad_norm": 21.771873261895294, "learning_rate": 3.9880081561082224e-07, "loss": -0.7843, "step": 8226 }, { "epoch": 0.734921733693589, "grad_norm": 3.813862416458838, "learning_rate": 3.9830258134999074e-07, "loss": -0.6716, "step": 8228 }, { "epoch": 0.7351003729093629, "grad_norm": 10.795864118627609, "learning_rate": 3.9780458110634993e-07, "loss": -1.0645, "step": 8230 }, { "epoch": 0.7352790121251368, "grad_norm": 18.93667812263578, "learning_rate": 3.973068150735862e-07, "loss": -0.1972, "step": 8232 }, { "epoch": 0.7354576513409106, "grad_norm": 5.419179988639201, "learning_rate": 3.9680928344529476e-07, "loss": 0.5683, "step": 8234 }, { "epoch": 0.7356362905566844, "grad_norm": 6.281414883855009, "learning_rate": 3.9631198641497985e-07, "loss": -0.0825, "step": 8236 }, { "epoch": 0.7358149297724583, "grad_norm": 6.8164289151664725, "learning_rate": 3.9581492417605486e-07, "loss": 0.1073, "step": 8238 }, { "epoch": 0.7359935689882321, "grad_norm": 7.951592138395958, "learning_rate": 3.9531809692184046e-07, "loss": -1.1849, "step": 8240 }, { "epoch": 0.736172208204006, "grad_norm": 7.2414597821579685, "learning_rate": 3.948215048455674e-07, "loss": -0.3634, "step": 8242 }, { "epoch": 0.7363508474197799, "grad_norm": 5.890068371931296, "learning_rate": 3.943251481403739e-07, "loss": -0.5042, "step": 8244 }, { "epoch": 0.7365294866355536, "grad_norm": 14.006642536891201, "learning_rate": 3.9382902699930834e-07, "loss": -1.0771, "step": 8246 }, { "epoch": 0.7367081258513275, "grad_norm": 10.8374008367769, "learning_rate": 3.933331416153253e-07, "loss": -1.395, "step": 8248 }, { "epoch": 0.7368867650671014, "grad_norm": 11.390865454209282, "learning_rate": 3.928374921812888e-07, "loss": 0.0179, "step": 8250 }, { "epoch": 0.7370654042828751, "grad_norm": 6.0354605885036525, "learning_rate": 3.923420788899715e-07, "loss": 0.0719, "step": 8252 }, { "epoch": 0.737244043498649, "grad_norm": 8.550743329489922, "learning_rate": 3.9184690193405255e-07, "loss": 0.1418, "step": 8254 }, { "epoch": 0.7374226827144229, "grad_norm": 13.25663274619834, "learning_rate": 3.913519615061214e-07, "loss": -0.6927, "step": 8256 }, { "epoch": 0.7376013219301967, "grad_norm": 15.041470992206015, "learning_rate": 3.908572577986744e-07, "loss": 0.5824, "step": 8258 }, { "epoch": 0.7377799611459706, "grad_norm": 11.020436463063783, "learning_rate": 3.9036279100411517e-07, "loss": 0.2028, "step": 8260 }, { "epoch": 0.7379586003617444, "grad_norm": 2.9364797565445095, "learning_rate": 3.8986856131475623e-07, "loss": -0.2249, "step": 8262 }, { "epoch": 0.7381372395775182, "grad_norm": 18.207828657312835, "learning_rate": 3.893745689228175e-07, "loss": -1.1763, "step": 8264 }, { "epoch": 0.7383158787932921, "grad_norm": 11.641828272541378, "learning_rate": 3.888808140204266e-07, "loss": -0.6757, "step": 8266 }, { "epoch": 0.738494518009066, "grad_norm": 9.037583608491197, "learning_rate": 3.883872967996189e-07, "loss": -1.3404, "step": 8268 }, { "epoch": 0.7386731572248398, "grad_norm": 10.336770460224184, "learning_rate": 3.8789401745233706e-07, "loss": -0.5655, "step": 8270 }, { "epoch": 0.7388517964406136, "grad_norm": 9.127266380310616, "learning_rate": 3.87400976170432e-07, "loss": 0.2654, "step": 8272 }, { "epoch": 0.7390304356563875, "grad_norm": 3.0232099107352233, "learning_rate": 3.869081731456606e-07, "loss": -0.673, "step": 8274 }, { "epoch": 0.7392090748721614, "grad_norm": 8.080744954883734, "learning_rate": 3.8641560856968804e-07, "loss": -0.1282, "step": 8276 }, { "epoch": 0.7393877140879351, "grad_norm": 10.68015683976927, "learning_rate": 3.859232826340876e-07, "loss": -0.4117, "step": 8278 }, { "epoch": 0.739566353303709, "grad_norm": 8.953048732359491, "learning_rate": 3.854311955303378e-07, "loss": -0.0155, "step": 8280 }, { "epoch": 0.7397449925194829, "grad_norm": 11.967974844350234, "learning_rate": 3.849393474498256e-07, "loss": 0.4789, "step": 8282 }, { "epoch": 0.7399236317352567, "grad_norm": 9.592405541864347, "learning_rate": 3.844477385838446e-07, "loss": 0.6488, "step": 8284 }, { "epoch": 0.7401022709510305, "grad_norm": 3.522025017644615, "learning_rate": 3.839563691235956e-07, "loss": 0.2246, "step": 8286 }, { "epoch": 0.7402809101668044, "grad_norm": 5.680102221683981, "learning_rate": 3.8346523926018604e-07, "loss": 0.6978, "step": 8288 }, { "epoch": 0.7404595493825782, "grad_norm": 5.336953464373077, "learning_rate": 3.829743491846301e-07, "loss": -0.3671, "step": 8290 }, { "epoch": 0.7406381885983521, "grad_norm": 7.630166226597414, "learning_rate": 3.824836990878495e-07, "loss": -0.494, "step": 8292 }, { "epoch": 0.7408168278141259, "grad_norm": 9.185331958135881, "learning_rate": 3.819932891606711e-07, "loss": -1.133, "step": 8294 }, { "epoch": 0.7409954670298997, "grad_norm": 22.490318419431546, "learning_rate": 3.815031195938295e-07, "loss": -0.9971, "step": 8296 }, { "epoch": 0.7411741062456736, "grad_norm": 9.097440305106732, "learning_rate": 3.8101319057796576e-07, "loss": 0.2321, "step": 8298 }, { "epoch": 0.7413527454614475, "grad_norm": 6.6941897008325375, "learning_rate": 3.80523502303627e-07, "loss": 0.6102, "step": 8300 }, { "epoch": 0.7415313846772212, "grad_norm": 12.85786379049346, "learning_rate": 3.8003405496126697e-07, "loss": -0.3257, "step": 8302 }, { "epoch": 0.7417100238929951, "grad_norm": 4.857955676275889, "learning_rate": 3.795448487412455e-07, "loss": 0.449, "step": 8304 }, { "epoch": 0.741888663108769, "grad_norm": 18.465914281030738, "learning_rate": 3.7905588383382924e-07, "loss": -0.9481, "step": 8306 }, { "epoch": 0.7420673023245428, "grad_norm": 7.886894326147423, "learning_rate": 3.7856716042918937e-07, "loss": -1.2396, "step": 8308 }, { "epoch": 0.7422459415403166, "grad_norm": 7.638676119837097, "learning_rate": 3.780786787174055e-07, "loss": -0.3519, "step": 8310 }, { "epoch": 0.7424245807560905, "grad_norm": 9.082256381618244, "learning_rate": 3.7759043888846173e-07, "loss": 0.4824, "step": 8312 }, { "epoch": 0.7426032199718643, "grad_norm": 10.57470900403503, "learning_rate": 3.7710244113224796e-07, "loss": 0.0229, "step": 8314 }, { "epoch": 0.7427818591876382, "grad_norm": 7.448863092502749, "learning_rate": 3.766146856385606e-07, "loss": -0.0255, "step": 8316 }, { "epoch": 0.742960498403412, "grad_norm": 8.035252623844748, "learning_rate": 3.7612717259710156e-07, "loss": -1.1302, "step": 8318 }, { "epoch": 0.7431391376191858, "grad_norm": 1.5550727558594277, "learning_rate": 3.7563990219747854e-07, "loss": 0.1821, "step": 8320 }, { "epoch": 0.7433177768349597, "grad_norm": 5.048475076812432, "learning_rate": 3.7515287462920473e-07, "loss": 1.217, "step": 8322 }, { "epoch": 0.7434964160507336, "grad_norm": 2.9633482929734654, "learning_rate": 3.7466609008169914e-07, "loss": -1.6996, "step": 8324 }, { "epoch": 0.7436750552665073, "grad_norm": 7.117209475302243, "learning_rate": 3.7417954874428624e-07, "loss": -0.8339, "step": 8326 }, { "epoch": 0.7438536944822812, "grad_norm": 3.8887174901581263, "learning_rate": 3.736932508061952e-07, "loss": 0.4845, "step": 8328 }, { "epoch": 0.7440323336980551, "grad_norm": 8.953801580565688, "learning_rate": 3.732071964565613e-07, "loss": -0.7893, "step": 8330 }, { "epoch": 0.7442109729138289, "grad_norm": 11.582664269168196, "learning_rate": 3.727213858844249e-07, "loss": -1.2951, "step": 8332 }, { "epoch": 0.7443896121296028, "grad_norm": 4.751046517181541, "learning_rate": 3.722358192787316e-07, "loss": -1.4971, "step": 8334 }, { "epoch": 0.7445682513453766, "grad_norm": 5.060025022241294, "learning_rate": 3.717504968283319e-07, "loss": -0.446, "step": 8336 }, { "epoch": 0.7447468905611504, "grad_norm": 6.25845649824927, "learning_rate": 3.712654187219817e-07, "loss": 0.277, "step": 8338 }, { "epoch": 0.7449255297769243, "grad_norm": 6.613007253073236, "learning_rate": 3.707805851483413e-07, "loss": 0.8702, "step": 8340 }, { "epoch": 0.7451041689926982, "grad_norm": 9.350686244974383, "learning_rate": 3.702959962959764e-07, "loss": 0.6294, "step": 8342 }, { "epoch": 0.7452828082084719, "grad_norm": 9.626580046724595, "learning_rate": 3.6981165235335743e-07, "loss": -0.9044, "step": 8344 }, { "epoch": 0.7454614474242458, "grad_norm": 8.631701533435033, "learning_rate": 3.693275535088597e-07, "loss": -0.4047, "step": 8346 }, { "epoch": 0.7456400866400197, "grad_norm": 8.220182228760104, "learning_rate": 3.688436999507623e-07, "loss": -0.7149, "step": 8348 }, { "epoch": 0.7458187258557935, "grad_norm": 10.973249680797794, "learning_rate": 3.6836009186724994e-07, "loss": -0.2333, "step": 8350 }, { "epoch": 0.7459973650715673, "grad_norm": 13.522321543715806, "learning_rate": 3.678767294464116e-07, "loss": -0.1083, "step": 8352 }, { "epoch": 0.7461760042873412, "grad_norm": 9.71031092419954, "learning_rate": 3.673936128762405e-07, "loss": -0.3772, "step": 8354 }, { "epoch": 0.746354643503115, "grad_norm": 3.003159131224327, "learning_rate": 3.669107423446346e-07, "loss": -1.078, "step": 8356 }, { "epoch": 0.7465332827188889, "grad_norm": 4.271013259199028, "learning_rate": 3.6642811803939564e-07, "loss": 0.2851, "step": 8358 }, { "epoch": 0.7467119219346627, "grad_norm": 4.463803414586295, "learning_rate": 3.659457401482305e-07, "loss": 0.7241, "step": 8360 }, { "epoch": 0.7468905611504365, "grad_norm": 9.047726711311418, "learning_rate": 3.6546360885874895e-07, "loss": 0.9807, "step": 8362 }, { "epoch": 0.7470692003662104, "grad_norm": 13.907677463604074, "learning_rate": 3.649817243584653e-07, "loss": 1.1813, "step": 8364 }, { "epoch": 0.7472478395819843, "grad_norm": 14.214610811501696, "learning_rate": 3.6450008683479926e-07, "loss": -0.3554, "step": 8366 }, { "epoch": 0.747426478797758, "grad_norm": 8.170152763516679, "learning_rate": 3.6401869647507223e-07, "loss": -1.35, "step": 8368 }, { "epoch": 0.7476051180135319, "grad_norm": 7.684034605425903, "learning_rate": 3.635375534665111e-07, "loss": -0.1015, "step": 8370 }, { "epoch": 0.7477837572293058, "grad_norm": 11.563361718311661, "learning_rate": 3.6305665799624576e-07, "loss": -0.3416, "step": 8372 }, { "epoch": 0.7479623964450796, "grad_norm": 2.0221053639324134, "learning_rate": 3.625760102513102e-07, "loss": 0.4723, "step": 8374 }, { "epoch": 0.7481410356608534, "grad_norm": 9.381989175341788, "learning_rate": 3.620956104186421e-07, "loss": 0.2797, "step": 8376 }, { "epoch": 0.7483196748766273, "grad_norm": 4.061744267522387, "learning_rate": 3.616154586850825e-07, "loss": 0.5273, "step": 8378 }, { "epoch": 0.7484983140924011, "grad_norm": 13.926530156054666, "learning_rate": 3.6113555523737626e-07, "loss": -0.5584, "step": 8380 }, { "epoch": 0.748676953308175, "grad_norm": 12.461240567435178, "learning_rate": 3.6065590026217084e-07, "loss": 0.1723, "step": 8382 }, { "epoch": 0.7488555925239488, "grad_norm": 5.128362343530153, "learning_rate": 3.6017649394601804e-07, "loss": 0.0518, "step": 8384 }, { "epoch": 0.7490342317397226, "grad_norm": 9.51147891766427, "learning_rate": 3.5969733647537246e-07, "loss": -0.609, "step": 8386 }, { "epoch": 0.7492128709554965, "grad_norm": 34.7434725771439, "learning_rate": 3.5921842803659217e-07, "loss": -0.6965, "step": 8388 }, { "epoch": 0.7493915101712704, "grad_norm": 5.75875486532262, "learning_rate": 3.5873976881593826e-07, "loss": -0.6244, "step": 8390 }, { "epoch": 0.7495701493870441, "grad_norm": 7.932523113304822, "learning_rate": 3.5826135899957467e-07, "loss": -0.2568, "step": 8392 }, { "epoch": 0.749748788602818, "grad_norm": 3.693828518673574, "learning_rate": 3.577831987735691e-07, "loss": 0.8487, "step": 8394 }, { "epoch": 0.7499274278185919, "grad_norm": 15.676905485477226, "learning_rate": 3.573052883238904e-07, "loss": -0.8921, "step": 8396 }, { "epoch": 0.7501060670343657, "grad_norm": 12.04513454110336, "learning_rate": 3.5682762783641276e-07, "loss": -1.397, "step": 8398 }, { "epoch": 0.7502847062501395, "grad_norm": 2.8852325454156182, "learning_rate": 3.5635021749691164e-07, "loss": 0.2065, "step": 8400 }, { "epoch": 0.7504633454659134, "grad_norm": 4.117550915308897, "learning_rate": 3.55873057491065e-07, "loss": 0.0301, "step": 8402 }, { "epoch": 0.7506419846816873, "grad_norm": 4.706569690155561, "learning_rate": 3.5539614800445404e-07, "loss": -0.1998, "step": 8404 }, { "epoch": 0.7508206238974611, "grad_norm": 5.680503146492787, "learning_rate": 3.549194892225625e-07, "loss": 0.2251, "step": 8406 }, { "epoch": 0.750999263113235, "grad_norm": 10.136134677296324, "learning_rate": 3.5444308133077663e-07, "loss": 0.8303, "step": 8408 }, { "epoch": 0.7511779023290088, "grad_norm": 8.147095201191966, "learning_rate": 3.539669245143846e-07, "loss": -0.1579, "step": 8410 }, { "epoch": 0.7513565415447826, "grad_norm": 18.842127994298192, "learning_rate": 3.5349101895857767e-07, "loss": 0.3224, "step": 8412 }, { "epoch": 0.7515351807605565, "grad_norm": 9.631444185689432, "learning_rate": 3.5301536484844917e-07, "loss": -0.0705, "step": 8414 }, { "epoch": 0.7517138199763304, "grad_norm": 13.38047150971391, "learning_rate": 3.5253996236899384e-07, "loss": -1.1755, "step": 8416 }, { "epoch": 0.7518924591921041, "grad_norm": 9.91627779500036, "learning_rate": 3.5206481170510916e-07, "loss": 0.0166, "step": 8418 }, { "epoch": 0.752071098407878, "grad_norm": 11.792086931878698, "learning_rate": 3.515899130415957e-07, "loss": 1.1247, "step": 8420 }, { "epoch": 0.7522497376236519, "grad_norm": 5.8213564762127845, "learning_rate": 3.5111526656315405e-07, "loss": 0.3411, "step": 8422 }, { "epoch": 0.7524283768394256, "grad_norm": 7.033921476002302, "learning_rate": 3.5064087245438803e-07, "loss": -0.0898, "step": 8424 }, { "epoch": 0.7526070160551995, "grad_norm": 9.244388837321045, "learning_rate": 3.5016673089980296e-07, "loss": -0.3724, "step": 8426 }, { "epoch": 0.7527856552709734, "grad_norm": 4.547057472578893, "learning_rate": 3.496928420838059e-07, "loss": 0.8762, "step": 8428 }, { "epoch": 0.7529642944867472, "grad_norm": 12.13918897857749, "learning_rate": 3.492192061907057e-07, "loss": -0.3155, "step": 8430 }, { "epoch": 0.7531429337025211, "grad_norm": 2.733610384166309, "learning_rate": 3.487458234047126e-07, "loss": 0.2912, "step": 8432 }, { "epoch": 0.7533215729182949, "grad_norm": 11.563099118768669, "learning_rate": 3.482726939099392e-07, "loss": 0.4033, "step": 8434 }, { "epoch": 0.7535002121340687, "grad_norm": 9.526157707284847, "learning_rate": 3.477998178903981e-07, "loss": -1.9718, "step": 8436 }, { "epoch": 0.7536788513498426, "grad_norm": 6.645589826023134, "learning_rate": 3.473271955300048e-07, "loss": 0.4225, "step": 8438 }, { "epoch": 0.7538574905656165, "grad_norm": 6.926478798857714, "learning_rate": 3.468548270125753e-07, "loss": -0.4823, "step": 8440 }, { "epoch": 0.7540361297813902, "grad_norm": 5.632788458361488, "learning_rate": 3.4638271252182714e-07, "loss": 0.6195, "step": 8442 }, { "epoch": 0.7542147689971641, "grad_norm": 5.703779125586523, "learning_rate": 3.4591085224137906e-07, "loss": -0.3272, "step": 8444 }, { "epoch": 0.754393408212938, "grad_norm": 11.895080934548243, "learning_rate": 3.4543924635475097e-07, "loss": -0.3807, "step": 8446 }, { "epoch": 0.7545720474287118, "grad_norm": 12.289996810676776, "learning_rate": 3.4496789504536395e-07, "loss": 0.386, "step": 8448 }, { "epoch": 0.7547506866444856, "grad_norm": 8.347175038042671, "learning_rate": 3.444967984965391e-07, "loss": -0.2217, "step": 8450 }, { "epoch": 0.7549293258602595, "grad_norm": 9.636611078137847, "learning_rate": 3.4402595689150004e-07, "loss": 0.2976, "step": 8452 }, { "epoch": 0.7551079650760333, "grad_norm": 5.9751696191437995, "learning_rate": 3.435553704133706e-07, "loss": -1.1626, "step": 8454 }, { "epoch": 0.7552866042918072, "grad_norm": 39.12732745673093, "learning_rate": 3.430850392451744e-07, "loss": -0.8825, "step": 8456 }, { "epoch": 0.755465243507581, "grad_norm": 4.342357384816536, "learning_rate": 3.42614963569837e-07, "loss": -0.2395, "step": 8458 }, { "epoch": 0.7556438827233548, "grad_norm": 6.957314726994117, "learning_rate": 3.42145143570184e-07, "loss": -0.9562, "step": 8460 }, { "epoch": 0.7558225219391287, "grad_norm": 14.25869485711509, "learning_rate": 3.4167557942894196e-07, "loss": -1.5419, "step": 8462 }, { "epoch": 0.7560011611549026, "grad_norm": 5.654521867824983, "learning_rate": 3.4120627132873745e-07, "loss": -0.0357, "step": 8464 }, { "epoch": 0.7561798003706763, "grad_norm": 4.1445441358187995, "learning_rate": 3.407372194520979e-07, "loss": -0.8063, "step": 8466 }, { "epoch": 0.7563584395864502, "grad_norm": 15.396933688777176, "learning_rate": 3.4026842398145116e-07, "loss": -1.3953, "step": 8468 }, { "epoch": 0.7565370788022241, "grad_norm": 9.773767308708814, "learning_rate": 3.3979988509912437e-07, "loss": -1.5997, "step": 8470 }, { "epoch": 0.7567157180179979, "grad_norm": 3.0358358086376867, "learning_rate": 3.3933160298734586e-07, "loss": -0.3585, "step": 8472 }, { "epoch": 0.7568943572337717, "grad_norm": 8.546474189356447, "learning_rate": 3.38863577828244e-07, "loss": 0.0329, "step": 8474 }, { "epoch": 0.7570729964495456, "grad_norm": 37.57793496573571, "learning_rate": 3.38395809803847e-07, "loss": -0.424, "step": 8476 }, { "epoch": 0.7572516356653194, "grad_norm": 8.33824633413471, "learning_rate": 3.37928299096083e-07, "loss": -0.215, "step": 8478 }, { "epoch": 0.7574302748810933, "grad_norm": 6.3250836634474155, "learning_rate": 3.3746104588678027e-07, "loss": -0.8996, "step": 8480 }, { "epoch": 0.7576089140968671, "grad_norm": 5.009298642648026, "learning_rate": 3.3699405035766716e-07, "loss": -1.4788, "step": 8482 }, { "epoch": 0.7577875533126409, "grad_norm": 9.535975951170839, "learning_rate": 3.365273126903704e-07, "loss": 0.4734, "step": 8484 }, { "epoch": 0.7579661925284148, "grad_norm": 9.790709579983968, "learning_rate": 3.360608330664186e-07, "loss": -0.9817, "step": 8486 }, { "epoch": 0.7581448317441887, "grad_norm": 8.109316199058252, "learning_rate": 3.3559461166723893e-07, "loss": 0.2312, "step": 8488 }, { "epoch": 0.7583234709599624, "grad_norm": 2.7233559189711625, "learning_rate": 3.3512864867415736e-07, "loss": 0.111, "step": 8490 }, { "epoch": 0.7585021101757363, "grad_norm": 6.325657492980936, "learning_rate": 3.346629442684005e-07, "loss": 0.5766, "step": 8492 }, { "epoch": 0.7586807493915102, "grad_norm": 6.159122629477466, "learning_rate": 3.341974986310939e-07, "loss": 0.0499, "step": 8494 }, { "epoch": 0.758859388607284, "grad_norm": 4.679097082628976, "learning_rate": 3.337323119432627e-07, "loss": 0.0839, "step": 8496 }, { "epoch": 0.7590380278230578, "grad_norm": 3.5556223551780075, "learning_rate": 3.3326738438583114e-07, "loss": 0.2424, "step": 8498 }, { "epoch": 0.7592166670388317, "grad_norm": 9.20337079534352, "learning_rate": 3.3280271613962276e-07, "loss": 0.1763, "step": 8500 }, { "epoch": 0.7593953062546055, "grad_norm": 6.929110566457105, "learning_rate": 3.323383073853605e-07, "loss": -0.2563, "step": 8502 }, { "epoch": 0.7595739454703794, "grad_norm": 4.460832103546408, "learning_rate": 3.318741583036655e-07, "loss": -0.382, "step": 8504 }, { "epoch": 0.7597525846861533, "grad_norm": 5.434152734827626, "learning_rate": 3.3141026907505855e-07, "loss": 0.2161, "step": 8506 }, { "epoch": 0.759931223901927, "grad_norm": 7.0733613074370005, "learning_rate": 3.309466398799601e-07, "loss": 0.9703, "step": 8508 }, { "epoch": 0.7601098631177009, "grad_norm": 15.357666665709315, "learning_rate": 3.3048327089868786e-07, "loss": -1.3422, "step": 8510 }, { "epoch": 0.7602885023334748, "grad_norm": 8.746060165606577, "learning_rate": 3.3002016231145957e-07, "loss": 0.2195, "step": 8512 }, { "epoch": 0.7604671415492485, "grad_norm": 4.393374196635775, "learning_rate": 3.295573142983914e-07, "loss": 0.4392, "step": 8514 }, { "epoch": 0.7606457807650224, "grad_norm": 5.2218399494120264, "learning_rate": 3.2909472703949734e-07, "loss": -0.423, "step": 8516 }, { "epoch": 0.7608244199807963, "grad_norm": 11.051014945942713, "learning_rate": 3.2863240071469143e-07, "loss": 0.7388, "step": 8518 }, { "epoch": 0.7610030591965701, "grad_norm": 5.9279155269889925, "learning_rate": 3.2817033550378534e-07, "loss": 0.8759, "step": 8520 }, { "epoch": 0.761181698412344, "grad_norm": 9.108797084476736, "learning_rate": 3.277085315864896e-07, "loss": -0.7931, "step": 8522 }, { "epoch": 0.7613603376281178, "grad_norm": 8.924912772300022, "learning_rate": 3.2724698914241224e-07, "loss": 0.4761, "step": 8524 }, { "epoch": 0.7615389768438916, "grad_norm": 9.605392896330223, "learning_rate": 3.2678570835106045e-07, "loss": -0.2129, "step": 8526 }, { "epoch": 0.7617176160596655, "grad_norm": 10.235492767562498, "learning_rate": 3.2632468939183944e-07, "loss": -2.2211, "step": 8528 }, { "epoch": 0.7618962552754394, "grad_norm": 12.022927950744322, "learning_rate": 3.2586393244405263e-07, "loss": 0.2691, "step": 8530 }, { "epoch": 0.7620748944912131, "grad_norm": 2.1721418477522576, "learning_rate": 3.254034376869014e-07, "loss": 0.0451, "step": 8532 }, { "epoch": 0.762253533706987, "grad_norm": 1.823655035253875, "learning_rate": 3.2494320529948547e-07, "loss": 0.454, "step": 8534 }, { "epoch": 0.7624321729227609, "grad_norm": 3.7527708624992466, "learning_rate": 3.244832354608018e-07, "loss": -0.205, "step": 8536 }, { "epoch": 0.7626108121385348, "grad_norm": 11.844857921464802, "learning_rate": 3.2402352834974557e-07, "loss": -0.9217, "step": 8538 }, { "epoch": 0.7627894513543085, "grad_norm": 10.904102190463831, "learning_rate": 3.235640841451106e-07, "loss": -0.2491, "step": 8540 }, { "epoch": 0.7629680905700824, "grad_norm": 4.17843671961175, "learning_rate": 3.231049030255878e-07, "loss": 0.6057, "step": 8542 }, { "epoch": 0.7631467297858563, "grad_norm": 16.601292277212565, "learning_rate": 3.2264598516976503e-07, "loss": -0.6926, "step": 8544 }, { "epoch": 0.7633253690016301, "grad_norm": 9.010493201369922, "learning_rate": 3.221873307561288e-07, "loss": 0.053, "step": 8546 }, { "epoch": 0.7635040082174039, "grad_norm": 7.927577266936109, "learning_rate": 3.2172893996306283e-07, "loss": 0.79, "step": 8548 }, { "epoch": 0.7636826474331778, "grad_norm": 7.867783646029701, "learning_rate": 3.212708129688483e-07, "loss": -0.14, "step": 8550 }, { "epoch": 0.7638612866489516, "grad_norm": 6.326625919565898, "learning_rate": 3.2081294995166373e-07, "loss": -0.202, "step": 8552 }, { "epoch": 0.7640399258647255, "grad_norm": 9.866064737190685, "learning_rate": 3.2035535108958524e-07, "loss": 0.6213, "step": 8554 }, { "epoch": 0.7642185650804993, "grad_norm": 15.913331859148007, "learning_rate": 3.19898016560586e-07, "loss": -0.7849, "step": 8556 }, { "epoch": 0.7643972042962731, "grad_norm": 5.545349771047085, "learning_rate": 3.1944094654253604e-07, "loss": 0.6657, "step": 8558 }, { "epoch": 0.764575843512047, "grad_norm": 21.455139910157964, "learning_rate": 3.189841412132027e-07, "loss": -0.936, "step": 8560 }, { "epoch": 0.7647544827278209, "grad_norm": 6.162009396474992, "learning_rate": 3.1852760075025145e-07, "loss": 0.165, "step": 8562 }, { "epoch": 0.7649331219435946, "grad_norm": 7.457541254986406, "learning_rate": 3.180713253312429e-07, "loss": -0.3639, "step": 8564 }, { "epoch": 0.7651117611593685, "grad_norm": 8.579111289212614, "learning_rate": 3.176153151336359e-07, "loss": -0.9765, "step": 8566 }, { "epoch": 0.7652904003751424, "grad_norm": 8.206405594012368, "learning_rate": 3.171595703347861e-07, "loss": -0.05, "step": 8568 }, { "epoch": 0.7654690395909162, "grad_norm": 4.951335981008174, "learning_rate": 3.167040911119445e-07, "loss": -0.1851, "step": 8570 }, { "epoch": 0.76564767880669, "grad_norm": 9.967967418219404, "learning_rate": 3.162488776422608e-07, "loss": -0.1332, "step": 8572 }, { "epoch": 0.7658263180224639, "grad_norm": 8.185964658793704, "learning_rate": 3.157939301027802e-07, "loss": -0.0949, "step": 8574 }, { "epoch": 0.7660049572382377, "grad_norm": 8.233533204570303, "learning_rate": 3.1533924867044515e-07, "loss": -0.4757, "step": 8576 }, { "epoch": 0.7661835964540116, "grad_norm": 10.343096847887733, "learning_rate": 3.1488483352209325e-07, "loss": 0.4549, "step": 8578 }, { "epoch": 0.7663622356697855, "grad_norm": 15.623462815010994, "learning_rate": 3.144306848344599e-07, "loss": -1.945, "step": 8580 }, { "epoch": 0.7665408748855592, "grad_norm": 8.127833884546678, "learning_rate": 3.139768027841764e-07, "loss": 0.9325, "step": 8582 }, { "epoch": 0.7667195141013331, "grad_norm": 3.073239084344636, "learning_rate": 3.135231875477703e-07, "loss": 0.7041, "step": 8584 }, { "epoch": 0.766898153317107, "grad_norm": 10.35120171835909, "learning_rate": 3.130698393016655e-07, "loss": -0.4792, "step": 8586 }, { "epoch": 0.7670767925328807, "grad_norm": 2.135403690453778, "learning_rate": 3.1261675822218224e-07, "loss": 0.1193, "step": 8588 }, { "epoch": 0.7672554317486546, "grad_norm": 23.021649330372163, "learning_rate": 3.121639444855361e-07, "loss": 0.1532, "step": 8590 }, { "epoch": 0.7674340709644285, "grad_norm": 6.185372651576232, "learning_rate": 3.11711398267839e-07, "loss": 1.053, "step": 8592 }, { "epoch": 0.7676127101802023, "grad_norm": 7.478011896638674, "learning_rate": 3.1125911974509976e-07, "loss": -0.4927, "step": 8594 }, { "epoch": 0.7677913493959762, "grad_norm": 11.576517711691265, "learning_rate": 3.108071090932223e-07, "loss": 0.5704, "step": 8596 }, { "epoch": 0.76796998861175, "grad_norm": 9.133968596629767, "learning_rate": 3.1035536648800565e-07, "loss": -0.749, "step": 8598 }, { "epoch": 0.7681486278275238, "grad_norm": 9.605786852322202, "learning_rate": 3.0990389210514577e-07, "loss": 0.6984, "step": 8600 }, { "epoch": 0.7683272670432977, "grad_norm": 11.489078145176686, "learning_rate": 3.094526861202339e-07, "loss": 0.1394, "step": 8602 }, { "epoch": 0.7685059062590716, "grad_norm": 9.10262198769021, "learning_rate": 3.0900174870875663e-07, "loss": -1.0826, "step": 8604 }, { "epoch": 0.7686845454748453, "grad_norm": 5.084113806338404, "learning_rate": 3.0855108004609643e-07, "loss": 0.6872, "step": 8606 }, { "epoch": 0.7688631846906192, "grad_norm": 8.494569726816657, "learning_rate": 3.0810068030753113e-07, "loss": -0.2488, "step": 8608 }, { "epoch": 0.7690418239063931, "grad_norm": 8.829669648467842, "learning_rate": 3.0765054966823436e-07, "loss": 0.3309, "step": 8610 }, { "epoch": 0.7692204631221669, "grad_norm": 8.287516055084373, "learning_rate": 3.0720068830327385e-07, "loss": -0.364, "step": 8612 }, { "epoch": 0.7693991023379407, "grad_norm": 8.79430182378744, "learning_rate": 3.067510963876139e-07, "loss": 0.4539, "step": 8614 }, { "epoch": 0.7695777415537146, "grad_norm": 6.511146085440509, "learning_rate": 3.063017740961136e-07, "loss": -0.4383, "step": 8616 }, { "epoch": 0.7697563807694884, "grad_norm": 15.653623196865738, "learning_rate": 3.0585272160352694e-07, "loss": 0.7111, "step": 8618 }, { "epoch": 0.7699350199852623, "grad_norm": 2.8254198656925222, "learning_rate": 3.0540393908450346e-07, "loss": -1.8704, "step": 8620 }, { "epoch": 0.7701136592010361, "grad_norm": 4.799095012864913, "learning_rate": 3.0495542671358744e-07, "loss": 0.1254, "step": 8622 }, { "epoch": 0.7702922984168099, "grad_norm": 5.29150874986081, "learning_rate": 3.045071846652175e-07, "loss": 0.6474, "step": 8624 }, { "epoch": 0.7704709376325838, "grad_norm": 4.173419566456929, "learning_rate": 3.040592131137278e-07, "loss": 0.6019, "step": 8626 }, { "epoch": 0.7706495768483577, "grad_norm": 7.736738857122589, "learning_rate": 3.036115122333478e-07, "loss": -0.3057, "step": 8628 }, { "epoch": 0.7708282160641314, "grad_norm": 7.995464708326815, "learning_rate": 3.03164082198201e-07, "loss": -0.5424, "step": 8630 }, { "epoch": 0.7710068552799053, "grad_norm": 8.729642456004024, "learning_rate": 3.02716923182305e-07, "loss": -0.7502, "step": 8632 }, { "epoch": 0.7711854944956792, "grad_norm": 9.011060911457651, "learning_rate": 3.0227003535957294e-07, "loss": -1.164, "step": 8634 }, { "epoch": 0.771364133711453, "grad_norm": 14.279880716803373, "learning_rate": 3.0182341890381226e-07, "loss": -1.1736, "step": 8636 }, { "epoch": 0.7715427729272268, "grad_norm": 2.20530635980457, "learning_rate": 3.013770739887246e-07, "loss": -0.7734, "step": 8638 }, { "epoch": 0.7717214121430007, "grad_norm": 6.907996599789629, "learning_rate": 3.0093100078790625e-07, "loss": 0.0367, "step": 8640 }, { "epoch": 0.7719000513587745, "grad_norm": 9.808852763157212, "learning_rate": 3.0048519947484795e-07, "loss": 0.3156, "step": 8642 }, { "epoch": 0.7720786905745484, "grad_norm": 15.046914549208148, "learning_rate": 3.000396702229341e-07, "loss": -0.0867, "step": 8644 }, { "epoch": 0.7722573297903222, "grad_norm": 6.946283502146129, "learning_rate": 2.995944132054438e-07, "loss": -0.2379, "step": 8646 }, { "epoch": 0.772435969006096, "grad_norm": 19.64752809152902, "learning_rate": 2.991494285955497e-07, "loss": -1.1761, "step": 8648 }, { "epoch": 0.7726146082218699, "grad_norm": 7.615833964277027, "learning_rate": 2.9870471656632004e-07, "loss": 0.0107, "step": 8650 }, { "epoch": 0.7727932474376438, "grad_norm": 8.77471006952362, "learning_rate": 2.98260277290715e-07, "loss": -0.036, "step": 8652 }, { "epoch": 0.7729718866534175, "grad_norm": 5.040392417768447, "learning_rate": 2.9781611094158986e-07, "loss": 0.0772, "step": 8654 }, { "epoch": 0.7731505258691914, "grad_norm": 9.901344408399801, "learning_rate": 2.9737221769169395e-07, "loss": 0.2355, "step": 8656 }, { "epoch": 0.7733291650849653, "grad_norm": 19.46345813036774, "learning_rate": 2.9692859771366887e-07, "loss": -0.5285, "step": 8658 }, { "epoch": 0.7735078043007391, "grad_norm": 11.491059518275026, "learning_rate": 2.9648525118005184e-07, "loss": -0.3526, "step": 8660 }, { "epoch": 0.773686443516513, "grad_norm": 3.3907058635860667, "learning_rate": 2.9604217826327325e-07, "loss": 0.049, "step": 8662 }, { "epoch": 0.7738650827322868, "grad_norm": 5.535962871823915, "learning_rate": 2.955993791356558e-07, "loss": 0.473, "step": 8664 }, { "epoch": 0.7740437219480607, "grad_norm": 8.159453599625014, "learning_rate": 2.9515685396941716e-07, "loss": 0.8734, "step": 8666 }, { "epoch": 0.7742223611638345, "grad_norm": 6.1715826424031395, "learning_rate": 2.947146029366676e-07, "loss": 0.6503, "step": 8668 }, { "epoch": 0.7744010003796084, "grad_norm": 6.52006602622459, "learning_rate": 2.942726262094114e-07, "loss": -0.726, "step": 8670 }, { "epoch": 0.7745796395953822, "grad_norm": 5.613970815715215, "learning_rate": 2.938309239595458e-07, "loss": 0.3103, "step": 8672 }, { "epoch": 0.774758278811156, "grad_norm": 10.56230678043039, "learning_rate": 2.933894963588611e-07, "loss": 0.7171, "step": 8674 }, { "epoch": 0.7749369180269299, "grad_norm": 5.993945563887911, "learning_rate": 2.9294834357904163e-07, "loss": 1.6443, "step": 8676 }, { "epoch": 0.7751155572427038, "grad_norm": 7.832253347848422, "learning_rate": 2.925074657916634e-07, "loss": -0.6225, "step": 8678 }, { "epoch": 0.7752941964584775, "grad_norm": 12.189138136102017, "learning_rate": 2.920668631681964e-07, "loss": -0.1651, "step": 8680 }, { "epoch": 0.7754728356742514, "grad_norm": 8.212389671478755, "learning_rate": 2.9162653588000397e-07, "loss": 0.3264, "step": 8682 }, { "epoch": 0.7756514748900253, "grad_norm": 13.717618230828478, "learning_rate": 2.91186484098342e-07, "loss": 0.9165, "step": 8684 }, { "epoch": 0.775830114105799, "grad_norm": 14.546893356426388, "learning_rate": 2.907467079943584e-07, "loss": -0.4787, "step": 8686 }, { "epoch": 0.7760087533215729, "grad_norm": 7.2177982115704244, "learning_rate": 2.903072077390949e-07, "loss": -0.7432, "step": 8688 }, { "epoch": 0.7761873925373468, "grad_norm": 8.099759378509098, "learning_rate": 2.8986798350348573e-07, "loss": 0.5709, "step": 8690 }, { "epoch": 0.7763660317531206, "grad_norm": 8.419081187371386, "learning_rate": 2.894290354583575e-07, "loss": 0.0212, "step": 8692 }, { "epoch": 0.7765446709688945, "grad_norm": 14.08093897462902, "learning_rate": 2.8899036377442967e-07, "loss": -0.8848, "step": 8694 }, { "epoch": 0.7767233101846683, "grad_norm": 2.3576873402274523, "learning_rate": 2.885519686223142e-07, "loss": 0.609, "step": 8696 }, { "epoch": 0.7769019494004421, "grad_norm": 19.70744819615916, "learning_rate": 2.881138501725151e-07, "loss": -1.423, "step": 8698 }, { "epoch": 0.777080588616216, "grad_norm": 2.7708728376359377, "learning_rate": 2.876760085954292e-07, "loss": 0.1597, "step": 8700 }, { "epoch": 0.7772592278319899, "grad_norm": 5.5385022223029825, "learning_rate": 2.8723844406134503e-07, "loss": -0.1155, "step": 8702 }, { "epoch": 0.7774378670477636, "grad_norm": 12.579498178149338, "learning_rate": 2.8680115674044503e-07, "loss": -1.4051, "step": 8704 }, { "epoch": 0.7776165062635375, "grad_norm": 4.122723500316095, "learning_rate": 2.863641468028016e-07, "loss": 0.7626, "step": 8706 }, { "epoch": 0.7777951454793114, "grad_norm": 14.225747870176116, "learning_rate": 2.8592741441838076e-07, "loss": -0.3697, "step": 8708 }, { "epoch": 0.7779737846950852, "grad_norm": 6.7679904388288055, "learning_rate": 2.854909597570402e-07, "loss": -0.2649, "step": 8710 }, { "epoch": 0.778152423910859, "grad_norm": 4.6548009064052875, "learning_rate": 2.850547829885287e-07, "loss": 0.0112, "step": 8712 }, { "epoch": 0.7783310631266329, "grad_norm": 9.213322687359119, "learning_rate": 2.846188842824887e-07, "loss": -0.7639, "step": 8714 }, { "epoch": 0.7785097023424067, "grad_norm": 15.445220622829918, "learning_rate": 2.841832638084536e-07, "loss": -0.0071, "step": 8716 }, { "epoch": 0.7786883415581806, "grad_norm": 3.4681789168653983, "learning_rate": 2.837479217358478e-07, "loss": -0.7047, "step": 8718 }, { "epoch": 0.7788669807739544, "grad_norm": 3.967604223948385, "learning_rate": 2.833128582339887e-07, "loss": 0.1652, "step": 8720 }, { "epoch": 0.7790456199897282, "grad_norm": 9.174077823871837, "learning_rate": 2.828780734720846e-07, "loss": 0.054, "step": 8722 }, { "epoch": 0.7792242592055021, "grad_norm": 7.602566847753497, "learning_rate": 2.824435676192358e-07, "loss": 0.3069, "step": 8724 }, { "epoch": 0.779402898421276, "grad_norm": 6.340086231307123, "learning_rate": 2.8200934084443394e-07, "loss": 0.2783, "step": 8726 }, { "epoch": 0.7795815376370497, "grad_norm": 11.414775014123006, "learning_rate": 2.8157539331656213e-07, "loss": -1.5363, "step": 8728 }, { "epoch": 0.7797601768528236, "grad_norm": 7.753084614843429, "learning_rate": 2.8114172520439503e-07, "loss": -0.615, "step": 8730 }, { "epoch": 0.7799388160685975, "grad_norm": 5.8512166510570625, "learning_rate": 2.80708336676598e-07, "loss": 0.1761, "step": 8732 }, { "epoch": 0.7801174552843713, "grad_norm": 8.545014508426464, "learning_rate": 2.802752279017284e-07, "loss": -0.8531, "step": 8734 }, { "epoch": 0.7802960945001451, "grad_norm": 7.255295759570239, "learning_rate": 2.7984239904823415e-07, "loss": -0.6136, "step": 8736 }, { "epoch": 0.780474733715919, "grad_norm": 7.633880196962241, "learning_rate": 2.7940985028445576e-07, "loss": -0.0436, "step": 8738 }, { "epoch": 0.7806533729316928, "grad_norm": 2.29949479981606, "learning_rate": 2.7897758177862253e-07, "loss": 0.1414, "step": 8740 }, { "epoch": 0.7808320121474667, "grad_norm": 9.227896597955494, "learning_rate": 2.7854559369885646e-07, "loss": 0.345, "step": 8742 }, { "epoch": 0.7810106513632405, "grad_norm": 2.431522277648395, "learning_rate": 2.781138862131699e-07, "loss": 0.2968, "step": 8744 }, { "epoch": 0.7811892905790143, "grad_norm": 7.663280513272214, "learning_rate": 2.776824594894661e-07, "loss": 0.8306, "step": 8746 }, { "epoch": 0.7813679297947882, "grad_norm": 6.1372604665221395, "learning_rate": 2.772513136955391e-07, "loss": 0.1855, "step": 8748 }, { "epoch": 0.7815465690105621, "grad_norm": 5.9339524764011635, "learning_rate": 2.768204489990743e-07, "loss": -0.1208, "step": 8750 }, { "epoch": 0.7817252082263358, "grad_norm": 6.7456328781846775, "learning_rate": 2.763898655676462e-07, "loss": 0.1873, "step": 8752 }, { "epoch": 0.7819038474421097, "grad_norm": 6.880918140360754, "learning_rate": 2.759595635687215e-07, "loss": -0.8912, "step": 8754 }, { "epoch": 0.7820824866578836, "grad_norm": 6.97117702490396, "learning_rate": 2.755295431696568e-07, "loss": 0.1453, "step": 8756 }, { "epoch": 0.7822611258736574, "grad_norm": 7.270811793196141, "learning_rate": 2.750998045376992e-07, "loss": -1.351, "step": 8758 }, { "epoch": 0.7824397650894312, "grad_norm": 11.119395012075536, "learning_rate": 2.746703478399862e-07, "loss": -0.6877, "step": 8760 }, { "epoch": 0.7826184043052051, "grad_norm": 10.246114971503948, "learning_rate": 2.7424117324354566e-07, "loss": 1.1075, "step": 8762 }, { "epoch": 0.7827970435209789, "grad_norm": 17.000621111125984, "learning_rate": 2.738122809152962e-07, "loss": 1.2813, "step": 8764 }, { "epoch": 0.7829756827367528, "grad_norm": 3.454400426841098, "learning_rate": 2.733836710220455e-07, "loss": 0.2664, "step": 8766 }, { "epoch": 0.7831543219525267, "grad_norm": 5.741291583488062, "learning_rate": 2.729553437304921e-07, "loss": -0.6329, "step": 8768 }, { "epoch": 0.7833329611683004, "grad_norm": 9.676675766710538, "learning_rate": 2.725272992072256e-07, "loss": 0.1927, "step": 8770 }, { "epoch": 0.7835116003840743, "grad_norm": 4.222869444201826, "learning_rate": 2.720995376187237e-07, "loss": 1.0082, "step": 8772 }, { "epoch": 0.7836902395998482, "grad_norm": 2.2546197300782898, "learning_rate": 2.716720591313554e-07, "loss": -0.1555, "step": 8774 }, { "epoch": 0.783868878815622, "grad_norm": 22.837805688207958, "learning_rate": 2.71244863911379e-07, "loss": -1.2201, "step": 8776 }, { "epoch": 0.7840475180313958, "grad_norm": 10.103828052208417, "learning_rate": 2.708179521249431e-07, "loss": -0.1199, "step": 8778 }, { "epoch": 0.7842261572471697, "grad_norm": 20.149729444329704, "learning_rate": 2.703913239380855e-07, "loss": -0.5905, "step": 8780 }, { "epoch": 0.7844047964629435, "grad_norm": 7.9401961425845675, "learning_rate": 2.699649795167344e-07, "loss": 0.3059, "step": 8782 }, { "epoch": 0.7845834356787174, "grad_norm": 12.619303107267292, "learning_rate": 2.695389190267071e-07, "loss": 0.2259, "step": 8784 }, { "epoch": 0.7847620748944912, "grad_norm": 4.956470021851706, "learning_rate": 2.691131426337103e-07, "loss": -0.4987, "step": 8786 }, { "epoch": 0.784940714110265, "grad_norm": 11.611478427591125, "learning_rate": 2.6868765050334076e-07, "loss": -1.4215, "step": 8788 }, { "epoch": 0.7851193533260389, "grad_norm": 3.5508927095920795, "learning_rate": 2.6826244280108433e-07, "loss": -0.2405, "step": 8790 }, { "epoch": 0.7852979925418128, "grad_norm": 5.622633542196139, "learning_rate": 2.678375196923165e-07, "loss": 0.3667, "step": 8792 }, { "epoch": 0.7854766317575865, "grad_norm": 9.796759353758222, "learning_rate": 2.674128813423019e-07, "loss": -0.5396, "step": 8794 }, { "epoch": 0.7856552709733604, "grad_norm": 6.318271830923655, "learning_rate": 2.669885279161943e-07, "loss": -0.605, "step": 8796 }, { "epoch": 0.7858339101891343, "grad_norm": 5.138279156411121, "learning_rate": 2.665644595790372e-07, "loss": -0.8207, "step": 8798 }, { "epoch": 0.7860125494049082, "grad_norm": 5.394439320813378, "learning_rate": 2.6614067649576176e-07, "loss": 0.1375, "step": 8800 }, { "epoch": 0.7861911886206819, "grad_norm": 11.200530748053234, "learning_rate": 2.6571717883119026e-07, "loss": -1.6231, "step": 8802 }, { "epoch": 0.7863698278364558, "grad_norm": 11.65647450248781, "learning_rate": 2.652939667500329e-07, "loss": -0.0562, "step": 8804 }, { "epoch": 0.7865484670522297, "grad_norm": 3.9473425750555515, "learning_rate": 2.6487104041688836e-07, "loss": 0.412, "step": 8806 }, { "epoch": 0.7867271062680035, "grad_norm": 8.511761774214346, "learning_rate": 2.644483999962449e-07, "loss": -0.5886, "step": 8808 }, { "epoch": 0.7869057454837773, "grad_norm": 8.643868415335845, "learning_rate": 2.6402604565247945e-07, "loss": 0.5116, "step": 8810 }, { "epoch": 0.7870843846995512, "grad_norm": 10.19686403124889, "learning_rate": 2.636039775498574e-07, "loss": -0.6039, "step": 8812 }, { "epoch": 0.787263023915325, "grad_norm": 4.582151549598325, "learning_rate": 2.631821958525331e-07, "loss": 0.9243, "step": 8814 }, { "epoch": 0.7874416631310989, "grad_norm": 3.307224174872167, "learning_rate": 2.627607007245496e-07, "loss": -0.0233, "step": 8816 }, { "epoch": 0.7876203023468727, "grad_norm": 4.190820999739248, "learning_rate": 2.623394923298383e-07, "loss": 0.6349, "step": 8818 }, { "epoch": 0.7877989415626465, "grad_norm": 7.718275294977029, "learning_rate": 2.6191857083221866e-07, "loss": -0.2198, "step": 8820 }, { "epoch": 0.7879775807784204, "grad_norm": 6.924705460547187, "learning_rate": 2.614979363953991e-07, "loss": -1.5522, "step": 8822 }, { "epoch": 0.7881562199941943, "grad_norm": 4.4501167710660186, "learning_rate": 2.6107758918297695e-07, "loss": 0.0628, "step": 8824 }, { "epoch": 0.788334859209968, "grad_norm": 7.068915230112579, "learning_rate": 2.6065752935843643e-07, "loss": 0.4689, "step": 8826 }, { "epoch": 0.7885134984257419, "grad_norm": 8.061411229628693, "learning_rate": 2.602377570851508e-07, "loss": -0.9643, "step": 8828 }, { "epoch": 0.7886921376415158, "grad_norm": 10.436119125300408, "learning_rate": 2.598182725263818e-07, "loss": -0.4221, "step": 8830 }, { "epoch": 0.7888707768572896, "grad_norm": 7.323591118941677, "learning_rate": 2.593990758452784e-07, "loss": 0.3511, "step": 8832 }, { "epoch": 0.7890494160730634, "grad_norm": 3.01814947058734, "learning_rate": 2.5898016720487835e-07, "loss": -0.8203, "step": 8834 }, { "epoch": 0.7892280552888373, "grad_norm": 8.287539530065745, "learning_rate": 2.5856154676810716e-07, "loss": -0.2979, "step": 8836 }, { "epoch": 0.7894066945046111, "grad_norm": 8.491356887133408, "learning_rate": 2.5814321469777835e-07, "loss": -1.6349, "step": 8838 }, { "epoch": 0.789585333720385, "grad_norm": 4.489930861520626, "learning_rate": 2.5772517115659244e-07, "loss": 0.2011, "step": 8840 }, { "epoch": 0.7897639729361589, "grad_norm": 7.489162498680384, "learning_rate": 2.5730741630713893e-07, "loss": 0.464, "step": 8842 }, { "epoch": 0.7899426121519326, "grad_norm": 10.082812167063762, "learning_rate": 2.568899503118945e-07, "loss": -0.6047, "step": 8844 }, { "epoch": 0.7901212513677065, "grad_norm": 5.21435160511287, "learning_rate": 2.5647277333322336e-07, "loss": 0.1872, "step": 8846 }, { "epoch": 0.7902998905834804, "grad_norm": 2.8354399273488418, "learning_rate": 2.5605588553337763e-07, "loss": 0.3211, "step": 8848 }, { "epoch": 0.7904785297992541, "grad_norm": 11.415491493795695, "learning_rate": 2.556392870744967e-07, "loss": -1.3766, "step": 8850 }, { "epoch": 0.790657169015028, "grad_norm": 6.697539877094681, "learning_rate": 2.5522297811860783e-07, "loss": 0.9085, "step": 8852 }, { "epoch": 0.7908358082308019, "grad_norm": 23.855670553940104, "learning_rate": 2.548069588276246e-07, "loss": -0.547, "step": 8854 }, { "epoch": 0.7910144474465757, "grad_norm": 11.424868419732594, "learning_rate": 2.5439122936334945e-07, "loss": -0.7611, "step": 8856 }, { "epoch": 0.7911930866623496, "grad_norm": 14.144797165927633, "learning_rate": 2.539757898874714e-07, "loss": -1.6897, "step": 8858 }, { "epoch": 0.7913717258781234, "grad_norm": 7.819126584187698, "learning_rate": 2.535606405615661e-07, "loss": 0.1063, "step": 8860 }, { "epoch": 0.7915503650938972, "grad_norm": 7.6139363554570885, "learning_rate": 2.531457815470971e-07, "loss": -1.1104, "step": 8862 }, { "epoch": 0.7917290043096711, "grad_norm": 7.031161566284152, "learning_rate": 2.52731213005415e-07, "loss": 0.9465, "step": 8864 }, { "epoch": 0.791907643525445, "grad_norm": 2.451202513970612, "learning_rate": 2.523169350977572e-07, "loss": 0.3547, "step": 8866 }, { "epoch": 0.7920862827412187, "grad_norm": 9.60269513445131, "learning_rate": 2.5190294798524805e-07, "loss": -0.2773, "step": 8868 }, { "epoch": 0.7922649219569926, "grad_norm": 7.222646743165728, "learning_rate": 2.514892518288988e-07, "loss": -0.0283, "step": 8870 }, { "epoch": 0.7924435611727665, "grad_norm": 10.941016106151137, "learning_rate": 2.5107584678960815e-07, "loss": -0.5631, "step": 8872 }, { "epoch": 0.7926222003885403, "grad_norm": 27.872677830334023, "learning_rate": 2.506627330281603e-07, "loss": -0.9861, "step": 8874 }, { "epoch": 0.7928008396043141, "grad_norm": 9.402222439625602, "learning_rate": 2.502499107052274e-07, "loss": -0.6764, "step": 8876 }, { "epoch": 0.792979478820088, "grad_norm": 7.080155980175386, "learning_rate": 2.4983737998136744e-07, "loss": 0.4184, "step": 8878 }, { "epoch": 0.7931581180358618, "grad_norm": 11.212995578186312, "learning_rate": 2.494251410170255e-07, "loss": 0.6977, "step": 8880 }, { "epoch": 0.7933367572516357, "grad_norm": 10.140609952800855, "learning_rate": 2.490131939725331e-07, "loss": -0.6121, "step": 8882 }, { "epoch": 0.7935153964674095, "grad_norm": 13.486077935993961, "learning_rate": 2.4860153900810787e-07, "loss": 0.8879, "step": 8884 }, { "epoch": 0.7936940356831833, "grad_norm": 26.111703217558883, "learning_rate": 2.481901762838543e-07, "loss": -1.2285, "step": 8886 }, { "epoch": 0.7938726748989572, "grad_norm": 8.198411964535229, "learning_rate": 2.477791059597629e-07, "loss": 0.5178, "step": 8888 }, { "epoch": 0.7940513141147311, "grad_norm": 20.874294611725432, "learning_rate": 2.473683281957107e-07, "loss": -0.7324, "step": 8890 }, { "epoch": 0.7942299533305048, "grad_norm": 4.973885431471094, "learning_rate": 2.469578431514611e-07, "loss": 0.048, "step": 8892 }, { "epoch": 0.7944085925462787, "grad_norm": 2.5417076065851965, "learning_rate": 2.465476509866627e-07, "loss": 0.1975, "step": 8894 }, { "epoch": 0.7945872317620526, "grad_norm": 10.463900681307221, "learning_rate": 2.461377518608513e-07, "loss": -0.3718, "step": 8896 }, { "epoch": 0.7947658709778264, "grad_norm": 7.085431166015912, "learning_rate": 2.4572814593344805e-07, "loss": -0.0263, "step": 8898 }, { "epoch": 0.7949445101936002, "grad_norm": 10.736852368329224, "learning_rate": 2.453188333637606e-07, "loss": -0.3119, "step": 8900 }, { "epoch": 0.7951231494093741, "grad_norm": 9.24123854291885, "learning_rate": 2.449098143109819e-07, "loss": -0.3292, "step": 8902 }, { "epoch": 0.7953017886251479, "grad_norm": 7.727528140492882, "learning_rate": 2.445010889341914e-07, "loss": -0.4477, "step": 8904 }, { "epoch": 0.7954804278409218, "grad_norm": 7.495119414189409, "learning_rate": 2.440926573923541e-07, "loss": -0.3507, "step": 8906 }, { "epoch": 0.7956590670566956, "grad_norm": 9.650819702121243, "learning_rate": 2.4368451984432e-07, "loss": 0.0896, "step": 8908 }, { "epoch": 0.7958377062724694, "grad_norm": 14.70306325123192, "learning_rate": 2.4327667644882545e-07, "loss": -0.1144, "step": 8910 }, { "epoch": 0.7960163454882433, "grad_norm": 4.669776879153245, "learning_rate": 2.4286912736449317e-07, "loss": -1.5461, "step": 8912 }, { "epoch": 0.7961949847040172, "grad_norm": 9.009437275544608, "learning_rate": 2.4246187274982954e-07, "loss": -0.6358, "step": 8914 }, { "epoch": 0.7963736239197909, "grad_norm": 9.22055604991526, "learning_rate": 2.42054912763228e-07, "loss": 0.2008, "step": 8916 }, { "epoch": 0.7965522631355648, "grad_norm": 4.60902574153506, "learning_rate": 2.416482475629666e-07, "loss": -1.3536, "step": 8918 }, { "epoch": 0.7967309023513387, "grad_norm": 7.085868591175414, "learning_rate": 2.4124187730720915e-07, "loss": -0.3669, "step": 8920 }, { "epoch": 0.7969095415671125, "grad_norm": 4.504245662555214, "learning_rate": 2.4083580215400435e-07, "loss": 0.3835, "step": 8922 }, { "epoch": 0.7970881807828863, "grad_norm": 7.98864536341623, "learning_rate": 2.404300222612865e-07, "loss": 0.3288, "step": 8924 }, { "epoch": 0.7972668199986602, "grad_norm": 12.111043861816848, "learning_rate": 2.4002453778687513e-07, "loss": -0.5367, "step": 8926 }, { "epoch": 0.797445459214434, "grad_norm": 4.771655660108707, "learning_rate": 2.3961934888847413e-07, "loss": -0.2786, "step": 8928 }, { "epoch": 0.7976240984302079, "grad_norm": 10.109987785716843, "learning_rate": 2.392144557236732e-07, "loss": 0.2822, "step": 8930 }, { "epoch": 0.7978027376459818, "grad_norm": 9.04770183573159, "learning_rate": 2.3880985844994673e-07, "loss": -1.8171, "step": 8932 }, { "epoch": 0.7979813768617556, "grad_norm": 13.72906898537108, "learning_rate": 2.3840555722465427e-07, "loss": -1.3478, "step": 8934 }, { "epoch": 0.7981600160775294, "grad_norm": 8.958385047467619, "learning_rate": 2.3800155220503982e-07, "loss": 0.3053, "step": 8936 }, { "epoch": 0.7983386552933033, "grad_norm": 12.545099914323185, "learning_rate": 2.3759784354823243e-07, "loss": -1.2192, "step": 8938 }, { "epoch": 0.7985172945090772, "grad_norm": 5.138770235541905, "learning_rate": 2.3719443141124617e-07, "loss": -0.2793, "step": 8940 }, { "epoch": 0.7986959337248509, "grad_norm": 8.642990588674616, "learning_rate": 2.3679131595097868e-07, "loss": 0.4787, "step": 8942 }, { "epoch": 0.7988745729406248, "grad_norm": 8.340838092069944, "learning_rate": 2.3638849732421373e-07, "loss": -0.6604, "step": 8944 }, { "epoch": 0.7990532121563987, "grad_norm": 6.45985250345974, "learning_rate": 2.359859756876189e-07, "loss": 0.8806, "step": 8946 }, { "epoch": 0.7992318513721725, "grad_norm": 12.645653477771832, "learning_rate": 2.3558375119774577e-07, "loss": -0.2524, "step": 8948 }, { "epoch": 0.7994104905879463, "grad_norm": 5.69937738063163, "learning_rate": 2.3518182401103114e-07, "loss": 0.2265, "step": 8950 }, { "epoch": 0.7995891298037202, "grad_norm": 4.459677924257575, "learning_rate": 2.347801942837959e-07, "loss": 0.3094, "step": 8952 }, { "epoch": 0.799767769019494, "grad_norm": 2.926951684853426, "learning_rate": 2.3437886217224522e-07, "loss": 0.4781, "step": 8954 }, { "epoch": 0.7999464082352679, "grad_norm": 13.470550062790332, "learning_rate": 2.3397782783246844e-07, "loss": -0.2598, "step": 8956 }, { "epoch": 0.8001250474510417, "grad_norm": 3.8671072286886834, "learning_rate": 2.3357709142043935e-07, "loss": -0.786, "step": 8958 }, { "epoch": 0.8003036866668155, "grad_norm": 4.236890657888095, "learning_rate": 2.3317665309201595e-07, "loss": -0.7519, "step": 8960 }, { "epoch": 0.8004823258825894, "grad_norm": 14.224492315742767, "learning_rate": 2.327765130029393e-07, "loss": -0.5218, "step": 8962 }, { "epoch": 0.8006609650983633, "grad_norm": 5.335454737437947, "learning_rate": 2.3237667130883553e-07, "loss": -0.192, "step": 8964 }, { "epoch": 0.800839604314137, "grad_norm": 6.085636312649154, "learning_rate": 2.319771281652152e-07, "loss": 0.6591, "step": 8966 }, { "epoch": 0.8010182435299109, "grad_norm": 7.3119744005986345, "learning_rate": 2.315778837274709e-07, "loss": 0.7147, "step": 8968 }, { "epoch": 0.8011968827456848, "grad_norm": 14.288258909026222, "learning_rate": 2.3117893815088062e-07, "loss": -1.3517, "step": 8970 }, { "epoch": 0.8013755219614586, "grad_norm": 17.25447886827142, "learning_rate": 2.3078029159060574e-07, "loss": -1.2851, "step": 8972 }, { "epoch": 0.8015541611772324, "grad_norm": 3.0152165427819146, "learning_rate": 2.3038194420169055e-07, "loss": -1.1703, "step": 8974 }, { "epoch": 0.8017328003930063, "grad_norm": 4.033739843979313, "learning_rate": 2.2998389613906433e-07, "loss": -0.1073, "step": 8976 }, { "epoch": 0.8019114396087801, "grad_norm": 7.222153691023722, "learning_rate": 2.2958614755753904e-07, "loss": 0.0445, "step": 8978 }, { "epoch": 0.802090078824554, "grad_norm": 11.14038325364464, "learning_rate": 2.2918869861181067e-07, "loss": -0.6097, "step": 8980 }, { "epoch": 0.8022687180403278, "grad_norm": 2.7296013716422567, "learning_rate": 2.2879154945645772e-07, "loss": 1.0459, "step": 8982 }, { "epoch": 0.8024473572561016, "grad_norm": 36.88093172629998, "learning_rate": 2.2839470024594321e-07, "loss": 0.1461, "step": 8984 }, { "epoch": 0.8026259964718755, "grad_norm": 3.8103393936178, "learning_rate": 2.2799815113461306e-07, "loss": -0.1652, "step": 8986 }, { "epoch": 0.8028046356876494, "grad_norm": 5.038199985231484, "learning_rate": 2.276019022766963e-07, "loss": -0.5307, "step": 8988 }, { "epoch": 0.8029832749034231, "grad_norm": 13.375511088365933, "learning_rate": 2.2720595382630535e-07, "loss": -0.6907, "step": 8990 }, { "epoch": 0.803161914119197, "grad_norm": 9.845065946266086, "learning_rate": 2.26810305937436e-07, "loss": 0.3322, "step": 8992 }, { "epoch": 0.8033405533349709, "grad_norm": 4.110798520298374, "learning_rate": 2.264149587639671e-07, "loss": 1.3272, "step": 8994 }, { "epoch": 0.8035191925507447, "grad_norm": 12.50273895773692, "learning_rate": 2.2601991245965946e-07, "loss": -1.4402, "step": 8996 }, { "epoch": 0.8036978317665185, "grad_norm": 5.023949106128732, "learning_rate": 2.2562516717815872e-07, "loss": -0.1526, "step": 8998 }, { "epoch": 0.8038764709822924, "grad_norm": 7.328270754147512, "learning_rate": 2.2523072307299252e-07, "loss": 0.141, "step": 9000 }, { "epoch": 0.8040551101980662, "grad_norm": 5.395543781695716, "learning_rate": 2.248365802975707e-07, "loss": -0.0923, "step": 9002 }, { "epoch": 0.8042337494138401, "grad_norm": 10.681677944657332, "learning_rate": 2.2444273900518696e-07, "loss": -0.9412, "step": 9004 }, { "epoch": 0.804412388629614, "grad_norm": 18.85377450889932, "learning_rate": 2.2404919934901734e-07, "loss": -0.6801, "step": 9006 }, { "epoch": 0.8045910278453877, "grad_norm": 8.833839497969096, "learning_rate": 2.236559614821205e-07, "loss": 0.3696, "step": 9008 }, { "epoch": 0.8047696670611616, "grad_norm": 7.2011777020589385, "learning_rate": 2.232630255574378e-07, "loss": -0.1228, "step": 9010 }, { "epoch": 0.8049483062769355, "grad_norm": 8.262580758073799, "learning_rate": 2.228703917277932e-07, "loss": 0.2506, "step": 9012 }, { "epoch": 0.8051269454927092, "grad_norm": 8.474230129866978, "learning_rate": 2.2247806014589353e-07, "loss": -0.1573, "step": 9014 }, { "epoch": 0.8053055847084831, "grad_norm": 27.767768188950882, "learning_rate": 2.2208603096432686e-07, "loss": -1.2175, "step": 9016 }, { "epoch": 0.805484223924257, "grad_norm": 7.354266229951555, "learning_rate": 2.21694304335565e-07, "loss": -0.5388, "step": 9018 }, { "epoch": 0.8056628631400308, "grad_norm": 4.431748006838629, "learning_rate": 2.2130288041196132e-07, "loss": -0.5162, "step": 9020 }, { "epoch": 0.8058415023558047, "grad_norm": 4.874863451488082, "learning_rate": 2.209117593457518e-07, "loss": -1.151, "step": 9022 }, { "epoch": 0.8060201415715785, "grad_norm": 8.205931439575622, "learning_rate": 2.205209412890545e-07, "loss": 0.5055, "step": 9024 }, { "epoch": 0.8061987807873523, "grad_norm": 5.369256255291356, "learning_rate": 2.201304263938699e-07, "loss": -1.2729, "step": 9026 }, { "epoch": 0.8063774200031262, "grad_norm": 3.468991657808972, "learning_rate": 2.197402148120795e-07, "loss": 0.9247, "step": 9028 }, { "epoch": 0.8065560592189001, "grad_norm": 6.918341498992348, "learning_rate": 2.1935030669544853e-07, "loss": 0.4897, "step": 9030 }, { "epoch": 0.8067346984346738, "grad_norm": 4.790608341713128, "learning_rate": 2.18960702195623e-07, "loss": 0.2993, "step": 9032 }, { "epoch": 0.8069133376504477, "grad_norm": 20.849178219341763, "learning_rate": 2.1857140146413134e-07, "loss": -0.6981, "step": 9034 }, { "epoch": 0.8070919768662216, "grad_norm": 7.067995617021585, "learning_rate": 2.1818240465238324e-07, "loss": -0.6372, "step": 9036 }, { "epoch": 0.8072706160819954, "grad_norm": 6.384760845249052, "learning_rate": 2.1779371191167083e-07, "loss": -0.4133, "step": 9038 }, { "epoch": 0.8074492552977692, "grad_norm": 3.1024284018862653, "learning_rate": 2.1740532339316764e-07, "loss": -0.5167, "step": 9040 }, { "epoch": 0.8076278945135431, "grad_norm": 7.284226754544445, "learning_rate": 2.1701723924792915e-07, "loss": -0.7873, "step": 9042 }, { "epoch": 0.8078065337293169, "grad_norm": 10.691354870629192, "learning_rate": 2.1662945962689205e-07, "loss": 0.4963, "step": 9044 }, { "epoch": 0.8079851729450908, "grad_norm": 9.65701913085256, "learning_rate": 2.162419846808754e-07, "loss": 0.2625, "step": 9046 }, { "epoch": 0.8081638121608646, "grad_norm": 5.983652096105401, "learning_rate": 2.158548145605784e-07, "loss": -0.451, "step": 9048 }, { "epoch": 0.8083424513766384, "grad_norm": 5.718202658553395, "learning_rate": 2.1546794941658285e-07, "loss": 0.1843, "step": 9050 }, { "epoch": 0.8085210905924123, "grad_norm": 25.297076464222478, "learning_rate": 2.150813893993514e-07, "loss": -1.3389, "step": 9052 }, { "epoch": 0.8086997298081862, "grad_norm": 6.259666596818102, "learning_rate": 2.1469513465922884e-07, "loss": -0.1457, "step": 9054 }, { "epoch": 0.8088783690239599, "grad_norm": 7.378266467847081, "learning_rate": 2.1430918534643994e-07, "loss": 0.1787, "step": 9056 }, { "epoch": 0.8090570082397338, "grad_norm": 8.08511186153145, "learning_rate": 2.139235416110915e-07, "loss": -0.7663, "step": 9058 }, { "epoch": 0.8092356474555077, "grad_norm": 20.435189866093275, "learning_rate": 2.1353820360317154e-07, "loss": -0.4871, "step": 9060 }, { "epoch": 0.8094142866712815, "grad_norm": 9.314441452930934, "learning_rate": 2.131531714725483e-07, "loss": 0.1805, "step": 9062 }, { "epoch": 0.8095929258870553, "grad_norm": 10.97227590899203, "learning_rate": 2.1276844536897244e-07, "loss": 0.0443, "step": 9064 }, { "epoch": 0.8097715651028292, "grad_norm": 8.26806650717626, "learning_rate": 2.1238402544207445e-07, "loss": -0.3364, "step": 9066 }, { "epoch": 0.8099502043186031, "grad_norm": 5.43420380404404, "learning_rate": 2.1199991184136658e-07, "loss": -0.1599, "step": 9068 }, { "epoch": 0.8101288435343769, "grad_norm": 13.13897310050303, "learning_rate": 2.116161047162408e-07, "loss": -0.6639, "step": 9070 }, { "epoch": 0.8103074827501507, "grad_norm": 6.125949396784378, "learning_rate": 2.112326042159711e-07, "loss": 0.3305, "step": 9072 }, { "epoch": 0.8104861219659246, "grad_norm": 12.45059940473186, "learning_rate": 2.108494104897115e-07, "loss": -1.241, "step": 9074 }, { "epoch": 0.8106647611816984, "grad_norm": 13.610266882909514, "learning_rate": 2.1046652368649688e-07, "loss": -0.7589, "step": 9076 }, { "epoch": 0.8108434003974723, "grad_norm": 6.85963609758983, "learning_rate": 2.1008394395524286e-07, "loss": -0.6115, "step": 9078 }, { "epoch": 0.8110220396132461, "grad_norm": 6.983025815227437, "learning_rate": 2.0970167144474593e-07, "loss": -0.0696, "step": 9080 }, { "epoch": 0.8112006788290199, "grad_norm": 7.323068659703155, "learning_rate": 2.09319706303682e-07, "loss": -0.2184, "step": 9082 }, { "epoch": 0.8113793180447938, "grad_norm": 15.745157617608035, "learning_rate": 2.0893804868060816e-07, "loss": -0.3169, "step": 9084 }, { "epoch": 0.8115579572605677, "grad_norm": 7.595977330965965, "learning_rate": 2.0855669872396252e-07, "loss": 0.2508, "step": 9086 }, { "epoch": 0.8117365964763414, "grad_norm": 12.63909004992207, "learning_rate": 2.0817565658206282e-07, "loss": -0.4641, "step": 9088 }, { "epoch": 0.8119152356921153, "grad_norm": 1.9776503995471444, "learning_rate": 2.0779492240310658e-07, "loss": 0.1364, "step": 9090 }, { "epoch": 0.8120938749078892, "grad_norm": 2.490586295598929, "learning_rate": 2.074144963351725e-07, "loss": 0.5455, "step": 9092 }, { "epoch": 0.812272514123663, "grad_norm": 9.743767727192822, "learning_rate": 2.0703437852621908e-07, "loss": -0.5995, "step": 9094 }, { "epoch": 0.8124511533394368, "grad_norm": 5.640753662670846, "learning_rate": 2.0665456912408473e-07, "loss": -0.7922, "step": 9096 }, { "epoch": 0.8126297925552107, "grad_norm": 2.635999431685298, "learning_rate": 2.0627506827648822e-07, "loss": 0.3967, "step": 9098 }, { "epoch": 0.8128084317709845, "grad_norm": 8.395502303218066, "learning_rate": 2.0589587613102854e-07, "loss": 0.6982, "step": 9100 }, { "epoch": 0.8129870709867584, "grad_norm": 7.592789954262582, "learning_rate": 2.0551699283518354e-07, "loss": -1.0479, "step": 9102 }, { "epoch": 0.8131657102025323, "grad_norm": 11.04076426915151, "learning_rate": 2.051384185363121e-07, "loss": 0.526, "step": 9104 }, { "epoch": 0.813344349418306, "grad_norm": 7.423844568023095, "learning_rate": 2.047601533816522e-07, "loss": 0.5297, "step": 9106 }, { "epoch": 0.8135229886340799, "grad_norm": 7.724979990714124, "learning_rate": 2.0438219751832276e-07, "loss": 0.4773, "step": 9108 }, { "epoch": 0.8137016278498538, "grad_norm": 9.296161015125579, "learning_rate": 2.040045510933205e-07, "loss": 0.2355, "step": 9110 }, { "epoch": 0.8138802670656275, "grad_norm": 13.560098321158744, "learning_rate": 2.0362721425352335e-07, "loss": 0.0973, "step": 9112 }, { "epoch": 0.8140589062814014, "grad_norm": 5.483688441936017, "learning_rate": 2.0325018714568844e-07, "loss": 0.5283, "step": 9114 }, { "epoch": 0.8142375454971753, "grad_norm": 5.065386667357562, "learning_rate": 2.0287346991645148e-07, "loss": 0.2482, "step": 9116 }, { "epoch": 0.8144161847129491, "grad_norm": 6.507023464696988, "learning_rate": 2.0249706271232946e-07, "loss": -0.5473, "step": 9118 }, { "epoch": 0.814594823928723, "grad_norm": 13.939239530248436, "learning_rate": 2.0212096567971736e-07, "loss": -0.2756, "step": 9120 }, { "epoch": 0.8147734631444968, "grad_norm": 28.264453532592732, "learning_rate": 2.017451789648903e-07, "loss": 0.6544, "step": 9122 }, { "epoch": 0.8149521023602706, "grad_norm": 10.50028700663366, "learning_rate": 2.0136970271400188e-07, "loss": -0.2286, "step": 9124 }, { "epoch": 0.8151307415760445, "grad_norm": 4.169853492193977, "learning_rate": 2.0099453707308568e-07, "loss": -1.0682, "step": 9126 }, { "epoch": 0.8153093807918184, "grad_norm": 22.884674194497094, "learning_rate": 2.0061968218805435e-07, "loss": -1.1807, "step": 9128 }, { "epoch": 0.8154880200075921, "grad_norm": 6.696279022000093, "learning_rate": 2.0024513820469957e-07, "loss": -0.8239, "step": 9130 }, { "epoch": 0.815666659223366, "grad_norm": 8.284962408594042, "learning_rate": 1.9987090526869212e-07, "loss": 0.5925, "step": 9132 }, { "epoch": 0.8158452984391399, "grad_norm": 17.245540318838138, "learning_rate": 1.9949698352558187e-07, "loss": 0.2503, "step": 9134 }, { "epoch": 0.8160239376549137, "grad_norm": 8.785299541683788, "learning_rate": 1.991233731207974e-07, "loss": -0.1419, "step": 9136 }, { "epoch": 0.8162025768706875, "grad_norm": 8.861191874434661, "learning_rate": 1.9875007419964607e-07, "loss": -0.5899, "step": 9138 }, { "epoch": 0.8163812160864614, "grad_norm": 12.159092744812547, "learning_rate": 1.9837708690731514e-07, "loss": -1.3029, "step": 9140 }, { "epoch": 0.8165598553022352, "grad_norm": 8.320073628466606, "learning_rate": 1.9800441138886993e-07, "loss": -0.2021, "step": 9142 }, { "epoch": 0.8167384945180091, "grad_norm": 8.222135245518981, "learning_rate": 1.9763204778925402e-07, "loss": -0.469, "step": 9144 }, { "epoch": 0.8169171337337829, "grad_norm": 9.787691475467813, "learning_rate": 1.9725999625329027e-07, "loss": -0.1799, "step": 9146 }, { "epoch": 0.8170957729495567, "grad_norm": 6.539833962202239, "learning_rate": 1.9688825692568023e-07, "loss": -0.0106, "step": 9148 }, { "epoch": 0.8172744121653306, "grad_norm": 7.567075748000449, "learning_rate": 1.9651682995100373e-07, "loss": 0.5112, "step": 9150 }, { "epoch": 0.8174530513811045, "grad_norm": 17.304325969699743, "learning_rate": 1.961457154737194e-07, "loss": -0.2697, "step": 9152 }, { "epoch": 0.8176316905968782, "grad_norm": 9.287143672715322, "learning_rate": 1.9577491363816446e-07, "loss": 0.2906, "step": 9154 }, { "epoch": 0.8178103298126521, "grad_norm": 26.179699156965473, "learning_rate": 1.9540442458855356e-07, "loss": -1.9114, "step": 9156 }, { "epoch": 0.817988969028426, "grad_norm": 7.324757532266453, "learning_rate": 1.9503424846898086e-07, "loss": 0.0517, "step": 9158 }, { "epoch": 0.8181676082441998, "grad_norm": 9.552809648484354, "learning_rate": 1.9466438542341824e-07, "loss": -0.6273, "step": 9160 }, { "epoch": 0.8183462474599736, "grad_norm": 2.845635260804979, "learning_rate": 1.942948355957158e-07, "loss": 0.6855, "step": 9162 }, { "epoch": 0.8185248866757475, "grad_norm": 8.73121632355822, "learning_rate": 1.9392559912960227e-07, "loss": -0.8082, "step": 9164 }, { "epoch": 0.8187035258915213, "grad_norm": 7.678975907631839, "learning_rate": 1.93556676168684e-07, "loss": 0.3158, "step": 9166 }, { "epoch": 0.8188821651072952, "grad_norm": 6.511089548560594, "learning_rate": 1.9318806685644573e-07, "loss": 0.6002, "step": 9168 }, { "epoch": 0.819060804323069, "grad_norm": 20.16719285683085, "learning_rate": 1.928197713362495e-07, "loss": -0.3696, "step": 9170 }, { "epoch": 0.8192394435388428, "grad_norm": 8.947111070551715, "learning_rate": 1.9245178975133658e-07, "loss": 0.363, "step": 9172 }, { "epoch": 0.8194180827546167, "grad_norm": 2.214661928817547, "learning_rate": 1.920841222448255e-07, "loss": -0.5412, "step": 9174 }, { "epoch": 0.8195967219703906, "grad_norm": 9.632007376506428, "learning_rate": 1.917167689597119e-07, "loss": -1.0229, "step": 9176 }, { "epoch": 0.8197753611861643, "grad_norm": 11.31315450724444, "learning_rate": 1.9134973003887034e-07, "loss": 0.2523, "step": 9178 }, { "epoch": 0.8199540004019382, "grad_norm": 4.140339161345563, "learning_rate": 1.9098300562505264e-07, "loss": -0.4692, "step": 9180 }, { "epoch": 0.8201326396177121, "grad_norm": 5.470248643708897, "learning_rate": 1.906165958608882e-07, "loss": 0.1308, "step": 9182 }, { "epoch": 0.8203112788334859, "grad_norm": 14.1291751598684, "learning_rate": 1.9025050088888428e-07, "loss": 0.2301, "step": 9184 }, { "epoch": 0.8204899180492597, "grad_norm": 8.72630108857565, "learning_rate": 1.8988472085142547e-07, "loss": -0.7891, "step": 9186 }, { "epoch": 0.8206685572650336, "grad_norm": 7.052019833645596, "learning_rate": 1.8951925589077443e-07, "loss": 0.0593, "step": 9188 }, { "epoch": 0.8208471964808074, "grad_norm": 2.0927746906712885, "learning_rate": 1.8915410614907035e-07, "loss": -0.2252, "step": 9190 }, { "epoch": 0.8210258356965813, "grad_norm": 6.494119478349029, "learning_rate": 1.8878927176833037e-07, "loss": -0.717, "step": 9192 }, { "epoch": 0.8212044749123552, "grad_norm": 15.00004628492208, "learning_rate": 1.8842475289044878e-07, "loss": -1.4818, "step": 9194 }, { "epoch": 0.821383114128129, "grad_norm": 2.8460318664139295, "learning_rate": 1.8806054965719808e-07, "loss": 0.8969, "step": 9196 }, { "epoch": 0.8215617533439028, "grad_norm": 6.895140122282375, "learning_rate": 1.8769666221022652e-07, "loss": 0.0991, "step": 9198 }, { "epoch": 0.8217403925596767, "grad_norm": 9.89825868031058, "learning_rate": 1.8733309069106042e-07, "loss": 0.5322, "step": 9200 }, { "epoch": 0.8219190317754506, "grad_norm": 6.7758750885278305, "learning_rate": 1.869698352411032e-07, "loss": 0.6659, "step": 9202 }, { "epoch": 0.8220976709912243, "grad_norm": 7.145277114737168, "learning_rate": 1.8660689600163482e-07, "loss": -0.5574, "step": 9204 }, { "epoch": 0.8222763102069982, "grad_norm": 12.021031070098823, "learning_rate": 1.8624427311381298e-07, "loss": 0.2469, "step": 9206 }, { "epoch": 0.8224549494227721, "grad_norm": 4.273457807809952, "learning_rate": 1.8588196671867228e-07, "loss": -0.6983, "step": 9208 }, { "epoch": 0.8226335886385459, "grad_norm": 8.820846609022421, "learning_rate": 1.855199769571234e-07, "loss": -1.2157, "step": 9210 }, { "epoch": 0.8228122278543197, "grad_norm": 4.791073350523623, "learning_rate": 1.8515830396995456e-07, "loss": -0.0541, "step": 9212 }, { "epoch": 0.8229908670700936, "grad_norm": 10.98723346964131, "learning_rate": 1.8479694789783084e-07, "loss": -1.6526, "step": 9214 }, { "epoch": 0.8231695062858674, "grad_norm": 8.51601248043323, "learning_rate": 1.8443590888129356e-07, "loss": 0.3717, "step": 9216 }, { "epoch": 0.8233481455016413, "grad_norm": 10.375147806976129, "learning_rate": 1.840751870607614e-07, "loss": 0.5861, "step": 9218 }, { "epoch": 0.8235267847174151, "grad_norm": 5.295321526495792, "learning_rate": 1.8371478257652906e-07, "loss": 0.2404, "step": 9220 }, { "epoch": 0.8237054239331889, "grad_norm": 9.56724360871796, "learning_rate": 1.8335469556876838e-07, "loss": -1.1532, "step": 9222 }, { "epoch": 0.8238840631489628, "grad_norm": 6.295769606275529, "learning_rate": 1.8299492617752688e-07, "loss": 0.1931, "step": 9224 }, { "epoch": 0.8240627023647367, "grad_norm": 13.287744276182423, "learning_rate": 1.826354745427291e-07, "loss": -0.4926, "step": 9226 }, { "epoch": 0.8242413415805104, "grad_norm": 13.10548155737079, "learning_rate": 1.8227634080417676e-07, "loss": 0.686, "step": 9228 }, { "epoch": 0.8244199807962843, "grad_norm": 9.69398173406323, "learning_rate": 1.8191752510154633e-07, "loss": -0.2163, "step": 9230 }, { "epoch": 0.8245986200120582, "grad_norm": 8.115789739194843, "learning_rate": 1.815590275743918e-07, "loss": -0.3273, "step": 9232 }, { "epoch": 0.824777259227832, "grad_norm": 6.867722977558455, "learning_rate": 1.812008483621429e-07, "loss": 0.0115, "step": 9234 }, { "epoch": 0.8249558984436058, "grad_norm": 8.533523424336012, "learning_rate": 1.8084298760410576e-07, "loss": -0.3642, "step": 9236 }, { "epoch": 0.8251345376593797, "grad_norm": 7.385741606443353, "learning_rate": 1.8048544543946255e-07, "loss": -0.2998, "step": 9238 }, { "epoch": 0.8253131768751535, "grad_norm": 12.279962137269537, "learning_rate": 1.801282220072715e-07, "loss": 0.0403, "step": 9240 }, { "epoch": 0.8254918160909274, "grad_norm": 3.3672769981580397, "learning_rate": 1.7977131744646724e-07, "loss": 0.8058, "step": 9242 }, { "epoch": 0.8256704553067012, "grad_norm": 6.141411417915394, "learning_rate": 1.7941473189585955e-07, "loss": -0.8235, "step": 9244 }, { "epoch": 0.825849094522475, "grad_norm": 9.098193783765716, "learning_rate": 1.7905846549413494e-07, "loss": -0.1562, "step": 9246 }, { "epoch": 0.8260277337382489, "grad_norm": 10.12820986506504, "learning_rate": 1.787025183798553e-07, "loss": -0.7313, "step": 9248 }, { "epoch": 0.8262063729540228, "grad_norm": 12.542772888536188, "learning_rate": 1.7834689069145915e-07, "loss": -0.766, "step": 9250 }, { "epoch": 0.8263850121697965, "grad_norm": 11.753362945444326, "learning_rate": 1.7799158256725965e-07, "loss": -0.2828, "step": 9252 }, { "epoch": 0.8265636513855704, "grad_norm": 3.643100797160837, "learning_rate": 1.776365941454464e-07, "loss": 0.3888, "step": 9254 }, { "epoch": 0.8267422906013443, "grad_norm": 8.476043150852712, "learning_rate": 1.7728192556408461e-07, "loss": -0.4019, "step": 9256 }, { "epoch": 0.8269209298171181, "grad_norm": 8.110550458973705, "learning_rate": 1.7692757696111426e-07, "loss": -1.2069, "step": 9258 }, { "epoch": 0.827099569032892, "grad_norm": 4.537280890268671, "learning_rate": 1.7657354847435225e-07, "loss": -0.5618, "step": 9260 }, { "epoch": 0.8272782082486658, "grad_norm": 4.989872403090192, "learning_rate": 1.762198402414905e-07, "loss": 0.2747, "step": 9262 }, { "epoch": 0.8274568474644396, "grad_norm": 4.637273020986256, "learning_rate": 1.7586645240009545e-07, "loss": -0.4138, "step": 9264 }, { "epoch": 0.8276354866802135, "grad_norm": 6.900010548459158, "learning_rate": 1.7551338508761003e-07, "loss": -0.0114, "step": 9266 }, { "epoch": 0.8278141258959874, "grad_norm": 8.259564952708333, "learning_rate": 1.751606384413522e-07, "loss": -0.924, "step": 9268 }, { "epoch": 0.8279927651117611, "grad_norm": 7.891567906611347, "learning_rate": 1.7480821259851487e-07, "loss": -0.4983, "step": 9270 }, { "epoch": 0.828171404327535, "grad_norm": 13.485003583806474, "learning_rate": 1.744561076961667e-07, "loss": -2.0221, "step": 9272 }, { "epoch": 0.8283500435433089, "grad_norm": 12.756070356211477, "learning_rate": 1.7410432387125117e-07, "loss": 0.933, "step": 9274 }, { "epoch": 0.8285286827590826, "grad_norm": 1.5440307825162645, "learning_rate": 1.737528612605873e-07, "loss": 0.9616, "step": 9276 }, { "epoch": 0.8287073219748565, "grad_norm": 11.500359571060024, "learning_rate": 1.7340172000086828e-07, "loss": 0.3248, "step": 9278 }, { "epoch": 0.8288859611906304, "grad_norm": 10.628919888129802, "learning_rate": 1.7305090022866286e-07, "loss": -0.3208, "step": 9280 }, { "epoch": 0.8290646004064042, "grad_norm": 5.773090274677859, "learning_rate": 1.727004020804157e-07, "loss": -0.0749, "step": 9282 }, { "epoch": 0.829243239622178, "grad_norm": 4.266607932838858, "learning_rate": 1.7235022569244472e-07, "loss": 0.0204, "step": 9284 }, { "epoch": 0.8294218788379519, "grad_norm": 5.0401517415288914, "learning_rate": 1.7200037120094345e-07, "loss": -0.7959, "step": 9286 }, { "epoch": 0.8296005180537257, "grad_norm": 12.581611633409501, "learning_rate": 1.7165083874198061e-07, "loss": -0.409, "step": 9288 }, { "epoch": 0.8297791572694996, "grad_norm": 8.141615560235207, "learning_rate": 1.7130162845149898e-07, "loss": 0.5791, "step": 9290 }, { "epoch": 0.8299577964852735, "grad_norm": 18.481487187246152, "learning_rate": 1.7095274046531638e-07, "loss": -0.4834, "step": 9292 }, { "epoch": 0.8301364357010472, "grad_norm": 4.89877994684915, "learning_rate": 1.7060417491912538e-07, "loss": 0.4387, "step": 9294 }, { "epoch": 0.8303150749168211, "grad_norm": 4.596828959926223, "learning_rate": 1.702559319484932e-07, "loss": -0.244, "step": 9296 }, { "epoch": 0.830493714132595, "grad_norm": 13.582539041773046, "learning_rate": 1.6990801168886082e-07, "loss": -0.4886, "step": 9298 }, { "epoch": 0.8306723533483688, "grad_norm": 12.934771116374955, "learning_rate": 1.6956041427554467e-07, "loss": 0.0482, "step": 9300 }, { "epoch": 0.8308509925641426, "grad_norm": 6.409464704089473, "learning_rate": 1.692131398437352e-07, "loss": -0.0728, "step": 9302 }, { "epoch": 0.8310296317799165, "grad_norm": 5.457049250317038, "learning_rate": 1.688661885284972e-07, "loss": 0.2003, "step": 9304 }, { "epoch": 0.8312082709956903, "grad_norm": 10.511130836401996, "learning_rate": 1.6851956046477002e-07, "loss": 0.0021, "step": 9306 }, { "epoch": 0.8313869102114642, "grad_norm": 3.354413151816367, "learning_rate": 1.6817325578736708e-07, "loss": -0.1742, "step": 9308 }, { "epoch": 0.831565549427238, "grad_norm": 20.86949953262386, "learning_rate": 1.6782727463097623e-07, "loss": -0.61, "step": 9310 }, { "epoch": 0.8317441886430118, "grad_norm": 8.858439724110738, "learning_rate": 1.6748161713015906e-07, "loss": -0.306, "step": 9312 }, { "epoch": 0.8319228278587857, "grad_norm": 4.528567868644343, "learning_rate": 1.671362834193515e-07, "loss": -0.3188, "step": 9314 }, { "epoch": 0.8321014670745596, "grad_norm": 11.126436290820678, "learning_rate": 1.6679127363286428e-07, "loss": 0.4395, "step": 9316 }, { "epoch": 0.8322801062903333, "grad_norm": 7.6623926550361, "learning_rate": 1.6644658790488076e-07, "loss": -0.8273, "step": 9318 }, { "epoch": 0.8324587455061072, "grad_norm": 7.3836014583403315, "learning_rate": 1.6610222636945937e-07, "loss": -0.5254, "step": 9320 }, { "epoch": 0.8326373847218811, "grad_norm": 8.857970326528761, "learning_rate": 1.6575818916053198e-07, "loss": 0.2318, "step": 9322 }, { "epoch": 0.8328160239376549, "grad_norm": 8.72405472583563, "learning_rate": 1.6541447641190453e-07, "loss": -1.1387, "step": 9324 }, { "epoch": 0.8329946631534287, "grad_norm": 10.170091979624543, "learning_rate": 1.6507108825725646e-07, "loss": -1.4115, "step": 9326 }, { "epoch": 0.8331733023692026, "grad_norm": 7.2484097545410995, "learning_rate": 1.6472802483014126e-07, "loss": -0.27, "step": 9328 }, { "epoch": 0.8333519415849765, "grad_norm": 9.862516600929668, "learning_rate": 1.643852862639864e-07, "loss": 0.1371, "step": 9330 }, { "epoch": 0.8335305808007503, "grad_norm": 11.552314850001046, "learning_rate": 1.6404287269209182e-07, "loss": -0.2806, "step": 9332 }, { "epoch": 0.8337092200165241, "grad_norm": 7.963605828501813, "learning_rate": 1.6370078424763244e-07, "loss": 0.1185, "step": 9334 }, { "epoch": 0.833887859232298, "grad_norm": 10.884786127010567, "learning_rate": 1.6335902106365606e-07, "loss": 0.0411, "step": 9336 }, { "epoch": 0.8340664984480718, "grad_norm": 4.202259527675492, "learning_rate": 1.63017583273084e-07, "loss": 0.503, "step": 9338 }, { "epoch": 0.8342451376638457, "grad_norm": 9.275702157394626, "learning_rate": 1.6267647100871119e-07, "loss": -0.5705, "step": 9340 }, { "epoch": 0.8344237768796195, "grad_norm": 8.820693515491755, "learning_rate": 1.6233568440320579e-07, "loss": -0.4019, "step": 9342 }, { "epoch": 0.8346024160953933, "grad_norm": 4.554511405458993, "learning_rate": 1.6199522358910957e-07, "loss": 0.3815, "step": 9344 }, { "epoch": 0.8347810553111672, "grad_norm": 2.387210177261299, "learning_rate": 1.616550886988368e-07, "loss": 0.2681, "step": 9346 }, { "epoch": 0.8349596945269411, "grad_norm": 5.508092475430629, "learning_rate": 1.613152798646763e-07, "loss": -0.8086, "step": 9348 }, { "epoch": 0.8351383337427148, "grad_norm": 14.749532401221657, "learning_rate": 1.6097579721878917e-07, "loss": -0.8053, "step": 9350 }, { "epoch": 0.8353169729584887, "grad_norm": 3.813134937883053, "learning_rate": 1.6063664089320972e-07, "loss": 0.5978, "step": 9352 }, { "epoch": 0.8354956121742626, "grad_norm": 10.792584904276437, "learning_rate": 1.602978110198453e-07, "loss": 0.4696, "step": 9354 }, { "epoch": 0.8356742513900364, "grad_norm": 10.873063539876307, "learning_rate": 1.5995930773047683e-07, "loss": 0.0954, "step": 9356 }, { "epoch": 0.8358528906058103, "grad_norm": 5.065872952548485, "learning_rate": 1.5962113115675747e-07, "loss": -0.0638, "step": 9358 }, { "epoch": 0.8360315298215841, "grad_norm": 6.085099403741731, "learning_rate": 1.5928328143021396e-07, "loss": 0.0352, "step": 9360 }, { "epoch": 0.8362101690373579, "grad_norm": 3.6466832650717547, "learning_rate": 1.5894575868224557e-07, "loss": 0.9084, "step": 9362 }, { "epoch": 0.8363888082531318, "grad_norm": 5.371874588128074, "learning_rate": 1.5860856304412474e-07, "loss": -0.3413, "step": 9364 }, { "epoch": 0.8365674474689057, "grad_norm": 13.154673719778387, "learning_rate": 1.5827169464699575e-07, "loss": 0.3868, "step": 9366 }, { "epoch": 0.8367460866846794, "grad_norm": 6.76474254042115, "learning_rate": 1.579351536218765e-07, "loss": -0.7669, "step": 9368 }, { "epoch": 0.8369247259004533, "grad_norm": 6.3316935708656805, "learning_rate": 1.575989400996579e-07, "loss": -1.245, "step": 9370 }, { "epoch": 0.8371033651162272, "grad_norm": 6.511193247954405, "learning_rate": 1.572630542111023e-07, "loss": -0.7074, "step": 9372 }, { "epoch": 0.837282004332001, "grad_norm": 7.43413024986928, "learning_rate": 1.5692749608684553e-07, "loss": 0.7145, "step": 9374 }, { "epoch": 0.8374606435477748, "grad_norm": 10.90894500750452, "learning_rate": 1.5659226585739548e-07, "loss": 0.2034, "step": 9376 }, { "epoch": 0.8376392827635487, "grad_norm": 14.617170797692221, "learning_rate": 1.562573636531328e-07, "loss": -0.4884, "step": 9378 }, { "epoch": 0.8378179219793225, "grad_norm": 6.282427169796625, "learning_rate": 1.5592278960431048e-07, "loss": -0.2547, "step": 9380 }, { "epoch": 0.8379965611950964, "grad_norm": 1.5322082012722351, "learning_rate": 1.5558854384105391e-07, "loss": 0.9128, "step": 9382 }, { "epoch": 0.8381752004108702, "grad_norm": 14.592073854828367, "learning_rate": 1.5525462649336084e-07, "loss": 0.8289, "step": 9384 }, { "epoch": 0.838353839626644, "grad_norm": 16.5460016958521, "learning_rate": 1.5492103769110087e-07, "loss": -0.0656, "step": 9386 }, { "epoch": 0.8385324788424179, "grad_norm": 1.6401799642864139, "learning_rate": 1.545877775640162e-07, "loss": 0.647, "step": 9388 }, { "epoch": 0.8387111180581918, "grad_norm": 6.822659258075743, "learning_rate": 1.5425484624172126e-07, "loss": -0.0775, "step": 9390 }, { "epoch": 0.8388897572739655, "grad_norm": 16.043866106660126, "learning_rate": 1.5392224385370257e-07, "loss": -0.0983, "step": 9392 }, { "epoch": 0.8390683964897394, "grad_norm": 3.984729107847514, "learning_rate": 1.5358997052931844e-07, "loss": 0.51, "step": 9394 }, { "epoch": 0.8392470357055133, "grad_norm": 9.160964533235527, "learning_rate": 1.532580263977995e-07, "loss": 0.0985, "step": 9396 }, { "epoch": 0.8394256749212871, "grad_norm": 11.062669655743562, "learning_rate": 1.5292641158824848e-07, "loss": -1.0098, "step": 9398 }, { "epoch": 0.8396043141370609, "grad_norm": 8.457087182263077, "learning_rate": 1.52595126229639e-07, "loss": -0.5338, "step": 9400 }, { "epoch": 0.8397829533528348, "grad_norm": 7.987055080481327, "learning_rate": 1.5226417045081817e-07, "loss": -1.1001, "step": 9402 }, { "epoch": 0.8399615925686086, "grad_norm": 3.17950858847665, "learning_rate": 1.5193354438050398e-07, "loss": 0.2528, "step": 9404 }, { "epoch": 0.8401402317843825, "grad_norm": 5.5298984406511495, "learning_rate": 1.5160324814728586e-07, "loss": 0.1333, "step": 9406 }, { "epoch": 0.8403188710001563, "grad_norm": 9.823253400632954, "learning_rate": 1.5127328187962553e-07, "loss": -1.7033, "step": 9408 }, { "epoch": 0.8404975102159301, "grad_norm": 4.381813846716936, "learning_rate": 1.5094364570585626e-07, "loss": 0.0823, "step": 9410 }, { "epoch": 0.840676149431704, "grad_norm": 8.659414435977958, "learning_rate": 1.5061433975418304e-07, "loss": 0.061, "step": 9412 }, { "epoch": 0.8408547886474779, "grad_norm": 11.88241506259256, "learning_rate": 1.502853641526821e-07, "loss": -0.4977, "step": 9414 }, { "epoch": 0.8410334278632516, "grad_norm": 8.302406090972307, "learning_rate": 1.4995671902930153e-07, "loss": -0.7654, "step": 9416 }, { "epoch": 0.8412120670790255, "grad_norm": 12.09817211121652, "learning_rate": 1.4962840451186088e-07, "loss": -0.934, "step": 9418 }, { "epoch": 0.8413907062947994, "grad_norm": 4.472211870664591, "learning_rate": 1.493004207280506e-07, "loss": 0.4924, "step": 9420 }, { "epoch": 0.8415693455105732, "grad_norm": 2.8006432577745626, "learning_rate": 1.4897276780543277e-07, "loss": 0.6812, "step": 9422 }, { "epoch": 0.841747984726347, "grad_norm": 6.23731653711983, "learning_rate": 1.4864544587144168e-07, "loss": -0.3342, "step": 9424 }, { "epoch": 0.8419266239421209, "grad_norm": 4.310298191739057, "learning_rate": 1.4831845505338147e-07, "loss": 0.6234, "step": 9426 }, { "epoch": 0.8421052631578947, "grad_norm": 6.356007427136816, "learning_rate": 1.479917954784282e-07, "loss": -0.4033, "step": 9428 }, { "epoch": 0.8422839023736686, "grad_norm": 10.951747486600205, "learning_rate": 1.4766546727362949e-07, "loss": -0.4068, "step": 9430 }, { "epoch": 0.8424625415894424, "grad_norm": 2.511450674603584, "learning_rate": 1.473394705659028e-07, "loss": 0.1507, "step": 9432 }, { "epoch": 0.8426411808052162, "grad_norm": 6.0868942429336, "learning_rate": 1.4701380548203812e-07, "loss": -0.5487, "step": 9434 }, { "epoch": 0.8428198200209901, "grad_norm": 6.8469200282529235, "learning_rate": 1.4668847214869583e-07, "loss": -0.5987, "step": 9436 }, { "epoch": 0.842998459236764, "grad_norm": 6.871466543592917, "learning_rate": 1.4636347069240728e-07, "loss": 0.3322, "step": 9438 }, { "epoch": 0.8431770984525377, "grad_norm": 3.5834530802982103, "learning_rate": 1.4603880123957445e-07, "loss": -0.003, "step": 9440 }, { "epoch": 0.8433557376683116, "grad_norm": 3.0089007898255296, "learning_rate": 1.457144639164708e-07, "loss": 0.3026, "step": 9442 }, { "epoch": 0.8435343768840855, "grad_norm": 4.218213986854105, "learning_rate": 1.4539045884924005e-07, "loss": -0.04, "step": 9444 }, { "epoch": 0.8437130160998593, "grad_norm": 5.673992171681825, "learning_rate": 1.4506678616389712e-07, "loss": 0.463, "step": 9446 }, { "epoch": 0.8438916553156331, "grad_norm": 3.0326453074890174, "learning_rate": 1.4474344598632758e-07, "loss": -0.2755, "step": 9448 }, { "epoch": 0.844070294531407, "grad_norm": 17.18129060420584, "learning_rate": 1.4442043844228746e-07, "loss": -1.08, "step": 9450 }, { "epoch": 0.8442489337471808, "grad_norm": 15.258782531248603, "learning_rate": 1.4409776365740378e-07, "loss": -0.1184, "step": 9452 }, { "epoch": 0.8444275729629547, "grad_norm": 5.096565825664575, "learning_rate": 1.437754217571735e-07, "loss": 0.6916, "step": 9454 }, { "epoch": 0.8446062121787286, "grad_norm": 5.173775038657437, "learning_rate": 1.4345341286696466e-07, "loss": 0.2258, "step": 9456 }, { "epoch": 0.8447848513945023, "grad_norm": 5.012702637189028, "learning_rate": 1.431317371120161e-07, "loss": -0.4971, "step": 9458 }, { "epoch": 0.8449634906102762, "grad_norm": 10.849634865800388, "learning_rate": 1.4281039461743606e-07, "loss": -0.7398, "step": 9460 }, { "epoch": 0.8451421298260501, "grad_norm": 4.656936082921895, "learning_rate": 1.4248938550820399e-07, "loss": 0.052, "step": 9462 }, { "epoch": 0.845320769041824, "grad_norm": 12.90624940886046, "learning_rate": 1.4216870990916942e-07, "loss": -0.1126, "step": 9464 }, { "epoch": 0.8454994082575977, "grad_norm": 5.374544523590773, "learning_rate": 1.4184836794505218e-07, "loss": 0.0766, "step": 9466 }, { "epoch": 0.8456780474733716, "grad_norm": 10.613416753022555, "learning_rate": 1.4152835974044243e-07, "loss": -0.4778, "step": 9468 }, { "epoch": 0.8458566866891455, "grad_norm": 15.75269645739138, "learning_rate": 1.4120868541980025e-07, "loss": -1.8844, "step": 9470 }, { "epoch": 0.8460353259049193, "grad_norm": 8.217249280019468, "learning_rate": 1.4088934510745642e-07, "loss": -0.2692, "step": 9472 }, { "epoch": 0.8462139651206931, "grad_norm": 16.416712623699684, "learning_rate": 1.4057033892761106e-07, "loss": -0.9449, "step": 9474 }, { "epoch": 0.846392604336467, "grad_norm": 7.2040691956750535, "learning_rate": 1.4025166700433478e-07, "loss": -0.1415, "step": 9476 }, { "epoch": 0.8465712435522408, "grad_norm": 8.026578620774156, "learning_rate": 1.3993332946156823e-07, "loss": 0.2924, "step": 9478 }, { "epoch": 0.8467498827680147, "grad_norm": 7.376557783704275, "learning_rate": 1.396153264231218e-07, "loss": -0.0019, "step": 9480 }, { "epoch": 0.8469285219837885, "grad_norm": 24.971151568250338, "learning_rate": 1.3929765801267602e-07, "loss": -0.614, "step": 9482 }, { "epoch": 0.8471071611995623, "grad_norm": 6.692166420535766, "learning_rate": 1.3898032435378126e-07, "loss": 0.4754, "step": 9484 }, { "epoch": 0.8472858004153362, "grad_norm": 11.990414923964396, "learning_rate": 1.3866332556985726e-07, "loss": 0.4728, "step": 9486 }, { "epoch": 0.8474644396311101, "grad_norm": 2.900075280100333, "learning_rate": 1.3834666178419375e-07, "loss": 0.6308, "step": 9488 }, { "epoch": 0.8476430788468838, "grad_norm": 2.9640861417785733, "learning_rate": 1.380303331199507e-07, "loss": 0.6596, "step": 9490 }, { "epoch": 0.8478217180626577, "grad_norm": 7.1487117860629485, "learning_rate": 1.3771433970015733e-07, "loss": -0.0703, "step": 9492 }, { "epoch": 0.8480003572784316, "grad_norm": 10.898339493286924, "learning_rate": 1.3739868164771197e-07, "loss": 0.3344, "step": 9494 }, { "epoch": 0.8481789964942054, "grad_norm": 6.353557052969761, "learning_rate": 1.3708335908538316e-07, "loss": 0.2969, "step": 9496 }, { "epoch": 0.8483576357099792, "grad_norm": 11.929270519071462, "learning_rate": 1.3676837213580895e-07, "loss": 0.0434, "step": 9498 }, { "epoch": 0.8485362749257531, "grad_norm": 3.553163574868248, "learning_rate": 1.3645372092149654e-07, "loss": 0.5155, "step": 9500 }, { "epoch": 0.8487149141415269, "grad_norm": 16.93606662667584, "learning_rate": 1.3613940556482284e-07, "loss": -0.0527, "step": 9502 }, { "epoch": 0.8488935533573008, "grad_norm": 11.92605185678081, "learning_rate": 1.3582542618803382e-07, "loss": -1.1694, "step": 9504 }, { "epoch": 0.8490721925730746, "grad_norm": 6.480052858772853, "learning_rate": 1.3551178291324539e-07, "loss": 0.0194, "step": 9506 }, { "epoch": 0.8492508317888484, "grad_norm": 8.218465473776254, "learning_rate": 1.3519847586244182e-07, "loss": -0.8117, "step": 9508 }, { "epoch": 0.8494294710046223, "grad_norm": 44.98528133855978, "learning_rate": 1.34885505157477e-07, "loss": -2.6929, "step": 9510 }, { "epoch": 0.8496081102203962, "grad_norm": 18.513237418582328, "learning_rate": 1.3457287092007485e-07, "loss": -2.0396, "step": 9512 }, { "epoch": 0.8497867494361699, "grad_norm": 5.911012051803185, "learning_rate": 1.3426057327182706e-07, "loss": -0.0252, "step": 9514 }, { "epoch": 0.8499653886519438, "grad_norm": 11.07928572628172, "learning_rate": 1.3394861233419518e-07, "loss": -0.6031, "step": 9516 }, { "epoch": 0.8501440278677177, "grad_norm": 21.933410437234862, "learning_rate": 1.3363698822850998e-07, "loss": -1.2694, "step": 9518 }, { "epoch": 0.8503226670834915, "grad_norm": 21.06620018036149, "learning_rate": 1.3332570107597019e-07, "loss": -0.3303, "step": 9520 }, { "epoch": 0.8505013062992653, "grad_norm": 6.224647371040293, "learning_rate": 1.3301475099764493e-07, "loss": 0.6632, "step": 9522 }, { "epoch": 0.8506799455150392, "grad_norm": 9.47806586836051, "learning_rate": 1.3270413811447123e-07, "loss": 0.1371, "step": 9524 }, { "epoch": 0.850858584730813, "grad_norm": 5.8967748458596985, "learning_rate": 1.3239386254725548e-07, "loss": 0.282, "step": 9526 }, { "epoch": 0.8510372239465869, "grad_norm": 7.08097515869093, "learning_rate": 1.3208392441667215e-07, "loss": 0.2193, "step": 9528 }, { "epoch": 0.8512158631623608, "grad_norm": 13.485453928265343, "learning_rate": 1.3177432384326538e-07, "loss": 0.252, "step": 9530 }, { "epoch": 0.8513945023781345, "grad_norm": 9.42666671141427, "learning_rate": 1.3146506094744735e-07, "loss": 0.2458, "step": 9532 }, { "epoch": 0.8515731415939084, "grad_norm": 5.492754585432615, "learning_rate": 1.3115613584949935e-07, "loss": -0.2667, "step": 9534 }, { "epoch": 0.8517517808096823, "grad_norm": 5.350975282849018, "learning_rate": 1.3084754866957116e-07, "loss": -0.9994, "step": 9536 }, { "epoch": 0.851930420025456, "grad_norm": 11.599760487155542, "learning_rate": 1.3053929952768117e-07, "loss": -1.356, "step": 9538 }, { "epoch": 0.8521090592412299, "grad_norm": 3.5902455367594555, "learning_rate": 1.3023138854371574e-07, "loss": 0.2108, "step": 9540 }, { "epoch": 0.8522876984570038, "grad_norm": 9.101426702313113, "learning_rate": 1.2992381583743028e-07, "loss": -0.6476, "step": 9542 }, { "epoch": 0.8524663376727776, "grad_norm": 7.118808666386061, "learning_rate": 1.29616581528449e-07, "loss": 0.0648, "step": 9544 }, { "epoch": 0.8526449768885515, "grad_norm": 5.289139492088714, "learning_rate": 1.29309685736264e-07, "loss": -0.6709, "step": 9546 }, { "epoch": 0.8528236161043253, "grad_norm": 11.721232809904613, "learning_rate": 1.2900312858023533e-07, "loss": -0.7979, "step": 9548 }, { "epoch": 0.8530022553200991, "grad_norm": 12.645184084938906, "learning_rate": 1.2869691017959194e-07, "loss": 0.039, "step": 9550 }, { "epoch": 0.853180894535873, "grad_norm": 10.948581161198897, "learning_rate": 1.283910306534308e-07, "loss": -0.9252, "step": 9552 }, { "epoch": 0.8533595337516469, "grad_norm": 4.645159320377971, "learning_rate": 1.2808549012071734e-07, "loss": 0.4127, "step": 9554 }, { "epoch": 0.8535381729674206, "grad_norm": 4.437126896176561, "learning_rate": 1.2778028870028468e-07, "loss": -0.1795, "step": 9556 }, { "epoch": 0.8537168121831945, "grad_norm": 9.439002719507352, "learning_rate": 1.2747542651083475e-07, "loss": 0.8379, "step": 9558 }, { "epoch": 0.8538954513989684, "grad_norm": 7.1961779410481865, "learning_rate": 1.2717090367093653e-07, "loss": 1.2536, "step": 9560 }, { "epoch": 0.8540740906147422, "grad_norm": 17.473925538655266, "learning_rate": 1.268667202990279e-07, "loss": 0.236, "step": 9562 }, { "epoch": 0.854252729830516, "grad_norm": 16.492737067465864, "learning_rate": 1.2656287651341424e-07, "loss": 0.0129, "step": 9564 }, { "epoch": 0.8544313690462899, "grad_norm": 5.323398137821804, "learning_rate": 1.2625937243226915e-07, "loss": -0.7674, "step": 9566 }, { "epoch": 0.8546100082620637, "grad_norm": 4.642715959183927, "learning_rate": 1.2595620817363395e-07, "loss": -0.4851, "step": 9568 }, { "epoch": 0.8547886474778376, "grad_norm": 9.994451891100162, "learning_rate": 1.256533838554179e-07, "loss": -1.7394, "step": 9570 }, { "epoch": 0.8549672866936114, "grad_norm": 3.490437386985451, "learning_rate": 1.2535089959539801e-07, "loss": -0.6887, "step": 9572 }, { "epoch": 0.8551459259093852, "grad_norm": 4.159275645868572, "learning_rate": 1.250487555112185e-07, "loss": 0.2226, "step": 9574 }, { "epoch": 0.8553245651251591, "grad_norm": 4.5521308436704135, "learning_rate": 1.247469517203923e-07, "loss": 0.007, "step": 9576 }, { "epoch": 0.855503204340933, "grad_norm": 10.766393891492072, "learning_rate": 1.2444548834029922e-07, "loss": -0.3864, "step": 9578 }, { "epoch": 0.8556818435567067, "grad_norm": 9.5609436763289, "learning_rate": 1.2414436548818728e-07, "loss": 1.7319, "step": 9580 }, { "epoch": 0.8558604827724806, "grad_norm": 4.6179783074491905, "learning_rate": 1.2384358328117116e-07, "loss": -0.4642, "step": 9582 }, { "epoch": 0.8560391219882545, "grad_norm": 3.2700630949506917, "learning_rate": 1.2354314183623393e-07, "loss": -0.1961, "step": 9584 }, { "epoch": 0.8562177612040283, "grad_norm": 5.08896622957616, "learning_rate": 1.2324304127022567e-07, "loss": 0.3086, "step": 9586 }, { "epoch": 0.8563964004198021, "grad_norm": 6.029133321498077, "learning_rate": 1.2294328169986402e-07, "loss": -0.2939, "step": 9588 }, { "epoch": 0.856575039635576, "grad_norm": 5.298002860022781, "learning_rate": 1.2264386324173391e-07, "loss": 0.2938, "step": 9590 }, { "epoch": 0.8567536788513499, "grad_norm": 3.340015357890197, "learning_rate": 1.2234478601228803e-07, "loss": -0.0362, "step": 9592 }, { "epoch": 0.8569323180671237, "grad_norm": 8.128849936734305, "learning_rate": 1.2204605012784552e-07, "loss": 0.0097, "step": 9594 }, { "epoch": 0.8571109572828975, "grad_norm": 10.097415978766161, "learning_rate": 1.2174765570459332e-07, "loss": -1.2176, "step": 9596 }, { "epoch": 0.8572895964986714, "grad_norm": 17.40586953971544, "learning_rate": 1.2144960285858553e-07, "loss": -1.0979, "step": 9598 }, { "epoch": 0.8574682357144452, "grad_norm": 7.569455817755536, "learning_rate": 1.211518917057438e-07, "loss": -1.4831, "step": 9600 }, { "epoch": 0.8576468749302191, "grad_norm": 8.501187409881615, "learning_rate": 1.2085452236185578e-07, "loss": 0.883, "step": 9602 }, { "epoch": 0.857825514145993, "grad_norm": 19.525655448745503, "learning_rate": 1.2055749494257716e-07, "loss": -0.9699, "step": 9604 }, { "epoch": 0.8580041533617667, "grad_norm": 9.079980867340499, "learning_rate": 1.2026080956343032e-07, "loss": -0.057, "step": 9606 }, { "epoch": 0.8581827925775406, "grad_norm": 8.098623333020045, "learning_rate": 1.199644663398046e-07, "loss": 0.0085, "step": 9608 }, { "epoch": 0.8583614317933145, "grad_norm": 5.911611232936818, "learning_rate": 1.1966846538695628e-07, "loss": -0.266, "step": 9610 }, { "epoch": 0.8585400710090882, "grad_norm": 10.481454365693589, "learning_rate": 1.193728068200087e-07, "loss": 0.3301, "step": 9612 }, { "epoch": 0.8587187102248621, "grad_norm": 11.912979587055522, "learning_rate": 1.1907749075395146e-07, "loss": -0.0946, "step": 9614 }, { "epoch": 0.858897349440636, "grad_norm": 6.467145195523329, "learning_rate": 1.1878251730364153e-07, "loss": -0.8435, "step": 9616 }, { "epoch": 0.8590759886564098, "grad_norm": 6.277207017606997, "learning_rate": 1.184878865838026e-07, "loss": -0.7333, "step": 9618 }, { "epoch": 0.8592546278721837, "grad_norm": 3.449815916595904, "learning_rate": 1.1819359870902468e-07, "loss": -0.2074, "step": 9620 }, { "epoch": 0.8594332670879575, "grad_norm": 9.095124550459929, "learning_rate": 1.1789965379376487e-07, "loss": 1.0192, "step": 9622 }, { "epoch": 0.8596119063037313, "grad_norm": 6.533583812712908, "learning_rate": 1.1760605195234663e-07, "loss": -1.2171, "step": 9624 }, { "epoch": 0.8597905455195052, "grad_norm": 3.15660576656559, "learning_rate": 1.1731279329896027e-07, "loss": 0.8358, "step": 9626 }, { "epoch": 0.8599691847352791, "grad_norm": 6.95640585999794, "learning_rate": 1.1701987794766188e-07, "loss": -0.1342, "step": 9628 }, { "epoch": 0.8601478239510528, "grad_norm": 13.797824433817286, "learning_rate": 1.1672730601237458e-07, "loss": -1.3284, "step": 9630 }, { "epoch": 0.8603264631668267, "grad_norm": 13.141754791280372, "learning_rate": 1.1643507760688842e-07, "loss": -0.888, "step": 9632 }, { "epoch": 0.8605051023826006, "grad_norm": 3.4741209221824447, "learning_rate": 1.1614319284485919e-07, "loss": -0.0898, "step": 9634 }, { "epoch": 0.8606837415983744, "grad_norm": 8.38482181184587, "learning_rate": 1.1585165183980871e-07, "loss": -0.4846, "step": 9636 }, { "epoch": 0.8608623808141482, "grad_norm": 4.927937192368002, "learning_rate": 1.1556045470512588e-07, "loss": -0.3679, "step": 9638 }, { "epoch": 0.8610410200299221, "grad_norm": 9.099646056282046, "learning_rate": 1.1526960155406551e-07, "loss": -0.4562, "step": 9640 }, { "epoch": 0.8612196592456959, "grad_norm": 4.511428097036618, "learning_rate": 1.1497909249974858e-07, "loss": -0.7158, "step": 9642 }, { "epoch": 0.8613982984614698, "grad_norm": 26.79315754097723, "learning_rate": 1.1468892765516225e-07, "loss": -0.4306, "step": 9644 }, { "epoch": 0.8615769376772436, "grad_norm": 2.5700966416827815, "learning_rate": 1.1439910713316026e-07, "loss": -0.5114, "step": 9646 }, { "epoch": 0.8617555768930174, "grad_norm": 16.82189361327458, "learning_rate": 1.1410963104646143e-07, "loss": -0.3247, "step": 9648 }, { "epoch": 0.8619342161087913, "grad_norm": 7.991171018036215, "learning_rate": 1.1382049950765138e-07, "loss": -0.4745, "step": 9650 }, { "epoch": 0.8621128553245652, "grad_norm": 14.13263975156999, "learning_rate": 1.135317126291816e-07, "loss": 0.8144, "step": 9652 }, { "epoch": 0.8622914945403389, "grad_norm": 17.47428356006334, "learning_rate": 1.1324327052336991e-07, "loss": -0.935, "step": 9654 }, { "epoch": 0.8624701337561128, "grad_norm": 3.609425598054098, "learning_rate": 1.1295517330239913e-07, "loss": -1.1334, "step": 9656 }, { "epoch": 0.8626487729718867, "grad_norm": 13.519307775890665, "learning_rate": 1.1266742107831862e-07, "loss": 0.3081, "step": 9658 }, { "epoch": 0.8628274121876605, "grad_norm": 10.209913440303795, "learning_rate": 1.1238001396304341e-07, "loss": -0.706, "step": 9660 }, { "epoch": 0.8630060514034343, "grad_norm": 13.251506791719066, "learning_rate": 1.12092952068354e-07, "loss": 0.4951, "step": 9662 }, { "epoch": 0.8631846906192082, "grad_norm": 18.47432019295424, "learning_rate": 1.1180623550589719e-07, "loss": -1.2118, "step": 9664 }, { "epoch": 0.863363329834982, "grad_norm": 7.616575746668336, "learning_rate": 1.1151986438718541e-07, "loss": -0.1647, "step": 9666 }, { "epoch": 0.8635419690507559, "grad_norm": 3.7629334260321428, "learning_rate": 1.1123383882359594e-07, "loss": -0.0347, "step": 9668 }, { "epoch": 0.8637206082665297, "grad_norm": 16.233419087292482, "learning_rate": 1.1094815892637255e-07, "loss": -0.7476, "step": 9670 }, { "epoch": 0.8638992474823035, "grad_norm": 8.112232212061137, "learning_rate": 1.1066282480662414e-07, "loss": -1.2822, "step": 9672 }, { "epoch": 0.8640778866980774, "grad_norm": 12.645310183182785, "learning_rate": 1.103778365753254e-07, "loss": -1.2336, "step": 9674 }, { "epoch": 0.8642565259138513, "grad_norm": 13.922742480514065, "learning_rate": 1.1009319434331621e-07, "loss": 0.2609, "step": 9676 }, { "epoch": 0.864435165129625, "grad_norm": 5.852381894607702, "learning_rate": 1.0980889822130213e-07, "loss": -0.8766, "step": 9678 }, { "epoch": 0.8646138043453989, "grad_norm": 15.356840496952103, "learning_rate": 1.0952494831985403e-07, "loss": -0.0525, "step": 9680 }, { "epoch": 0.8647924435611728, "grad_norm": 8.816635135625502, "learning_rate": 1.0924134474940771e-07, "loss": -0.0835, "step": 9682 }, { "epoch": 0.8649710827769466, "grad_norm": 3.502561721639121, "learning_rate": 1.089580876202647e-07, "loss": -0.2808, "step": 9684 }, { "epoch": 0.8651497219927204, "grad_norm": 11.272307595078587, "learning_rate": 1.0867517704259244e-07, "loss": -0.9372, "step": 9686 }, { "epoch": 0.8653283612084943, "grad_norm": 17.524497188478314, "learning_rate": 1.08392613126422e-07, "loss": -1.2526, "step": 9688 }, { "epoch": 0.8655070004242681, "grad_norm": 9.185712783226869, "learning_rate": 1.0811039598165095e-07, "loss": 0.206, "step": 9690 }, { "epoch": 0.865685639640042, "grad_norm": 10.303442474185395, "learning_rate": 1.0782852571804135e-07, "loss": -0.4562, "step": 9692 }, { "epoch": 0.8658642788558158, "grad_norm": 3.769305571288443, "learning_rate": 1.0754700244522075e-07, "loss": 0.1454, "step": 9694 }, { "epoch": 0.8660429180715896, "grad_norm": 6.223738773149645, "learning_rate": 1.0726582627268133e-07, "loss": -0.0192, "step": 9696 }, { "epoch": 0.8662215572873635, "grad_norm": 3.394418308499436, "learning_rate": 1.0698499730978061e-07, "loss": -0.7044, "step": 9698 }, { "epoch": 0.8664001965031374, "grad_norm": 4.938516825986499, "learning_rate": 1.0670451566574102e-07, "loss": 0.1277, "step": 9700 }, { "epoch": 0.8665788357189111, "grad_norm": 15.93907103929533, "learning_rate": 1.064243814496496e-07, "loss": -0.4163, "step": 9702 }, { "epoch": 0.866757474934685, "grad_norm": 8.942233026159665, "learning_rate": 1.0614459477045856e-07, "loss": -1.036, "step": 9704 }, { "epoch": 0.8669361141504589, "grad_norm": 3.9924009381773855, "learning_rate": 1.0586515573698485e-07, "loss": 0.3306, "step": 9706 }, { "epoch": 0.8671147533662327, "grad_norm": 4.681114628748207, "learning_rate": 1.055860644579104e-07, "loss": 0.1542, "step": 9708 }, { "epoch": 0.8672933925820066, "grad_norm": 4.437474425336184, "learning_rate": 1.0530732104178153e-07, "loss": 0.2161, "step": 9710 }, { "epoch": 0.8674720317977804, "grad_norm": 7.926818388739987, "learning_rate": 1.050289255970096e-07, "loss": -0.1614, "step": 9712 }, { "epoch": 0.8676506710135542, "grad_norm": 10.74509513205782, "learning_rate": 1.0475087823187056e-07, "loss": -1.2445, "step": 9714 }, { "epoch": 0.8678293102293281, "grad_norm": 21.500867249496444, "learning_rate": 1.0447317905450448e-07, "loss": -1.4809, "step": 9716 }, { "epoch": 0.868007949445102, "grad_norm": 1.0828504525451377, "learning_rate": 1.0419582817291695e-07, "loss": 0.2568, "step": 9718 }, { "epoch": 0.8681865886608757, "grad_norm": 3.5059075227369303, "learning_rate": 1.0391882569497757e-07, "loss": -0.2021, "step": 9720 }, { "epoch": 0.8683652278766496, "grad_norm": 6.289284497246462, "learning_rate": 1.0364217172842005e-07, "loss": 0.214, "step": 9722 }, { "epoch": 0.8685438670924235, "grad_norm": 6.962918839792063, "learning_rate": 1.0336586638084321e-07, "loss": 0.4318, "step": 9724 }, { "epoch": 0.8687225063081974, "grad_norm": 17.328867312492886, "learning_rate": 1.030899097597101e-07, "loss": 1.1088, "step": 9726 }, { "epoch": 0.8689011455239711, "grad_norm": 5.138678184978072, "learning_rate": 1.0281430197234797e-07, "loss": 0.4716, "step": 9728 }, { "epoch": 0.869079784739745, "grad_norm": 8.54145041091209, "learning_rate": 1.0253904312594864e-07, "loss": -1.8865, "step": 9730 }, { "epoch": 0.8692584239555189, "grad_norm": 14.052783644154422, "learning_rate": 1.0226413332756789e-07, "loss": -1.3993, "step": 9732 }, { "epoch": 0.8694370631712927, "grad_norm": 4.786412661904941, "learning_rate": 1.0198957268412622e-07, "loss": -0.0127, "step": 9734 }, { "epoch": 0.8696157023870665, "grad_norm": 6.122694866277194, "learning_rate": 1.0171536130240776e-07, "loss": -0.6428, "step": 9736 }, { "epoch": 0.8697943416028404, "grad_norm": 10.834275737755558, "learning_rate": 1.014414992890611e-07, "loss": -0.8671, "step": 9738 }, { "epoch": 0.8699729808186142, "grad_norm": 8.421527579365844, "learning_rate": 1.0116798675059912e-07, "loss": -0.0839, "step": 9740 }, { "epoch": 0.8701516200343881, "grad_norm": 2.9267918632799828, "learning_rate": 1.0089482379339842e-07, "loss": -0.5057, "step": 9742 }, { "epoch": 0.8703302592501619, "grad_norm": 13.578491346467535, "learning_rate": 1.006220105237e-07, "loss": -1.0013, "step": 9744 }, { "epoch": 0.8705088984659357, "grad_norm": 6.2014120893434805, "learning_rate": 1.0034954704760857e-07, "loss": -0.3588, "step": 9746 }, { "epoch": 0.8706875376817096, "grad_norm": 14.676292786200781, "learning_rate": 1.0007743347109321e-07, "loss": -0.2796, "step": 9748 }, { "epoch": 0.8708661768974835, "grad_norm": 18.363491061208325, "learning_rate": 9.980566989998585e-08, "loss": 0.4567, "step": 9750 }, { "epoch": 0.8710448161132572, "grad_norm": 5.41833873274851, "learning_rate": 9.953425643998381e-08, "loss": -1.3665, "step": 9752 }, { "epoch": 0.8712234553290311, "grad_norm": 14.123036475976326, "learning_rate": 9.926319319664733e-08, "loss": 0.0352, "step": 9754 }, { "epoch": 0.871402094544805, "grad_norm": 4.3857562129243, "learning_rate": 9.899248027540031e-08, "loss": 0.6051, "step": 9756 }, { "epoch": 0.8715807337605788, "grad_norm": 2.3001768292992, "learning_rate": 9.872211778153083e-08, "loss": 0.563, "step": 9758 }, { "epoch": 0.8717593729763526, "grad_norm": 10.10527434160216, "learning_rate": 9.845210582019048e-08, "loss": -0.2185, "step": 9760 }, { "epoch": 0.8719380121921265, "grad_norm": 14.89931366639055, "learning_rate": 9.818244449639479e-08, "loss": -1.0154, "step": 9762 }, { "epoch": 0.8721166514079003, "grad_norm": 13.413787216889942, "learning_rate": 9.791313391502243e-08, "loss": -1.142, "step": 9764 }, { "epoch": 0.8722952906236742, "grad_norm": 19.82867368355302, "learning_rate": 9.764417418081605e-08, "loss": -0.3367, "step": 9766 }, { "epoch": 0.872473929839448, "grad_norm": 7.763970857057507, "learning_rate": 9.737556539838188e-08, "loss": -0.8115, "step": 9768 }, { "epoch": 0.8726525690552218, "grad_norm": 5.856835387131273, "learning_rate": 9.710730767218911e-08, "loss": -0.1074, "step": 9770 }, { "epoch": 0.8728312082709957, "grad_norm": 9.94944112923835, "learning_rate": 9.683940110657085e-08, "loss": -0.4231, "step": 9772 }, { "epoch": 0.8730098474867696, "grad_norm": 6.982631935214385, "learning_rate": 9.657184580572408e-08, "loss": -0.4517, "step": 9774 }, { "epoch": 0.8731884867025433, "grad_norm": 13.85455465514438, "learning_rate": 9.630464187370801e-08, "loss": -1.8464, "step": 9776 }, { "epoch": 0.8733671259183172, "grad_norm": 15.349530220919782, "learning_rate": 9.603778941444607e-08, "loss": -0.7016, "step": 9778 }, { "epoch": 0.8735457651340911, "grad_norm": 2.0496542894965035, "learning_rate": 9.577128853172478e-08, "loss": -0.6447, "step": 9780 }, { "epoch": 0.8737244043498649, "grad_norm": 5.881579022826933, "learning_rate": 9.550513932919391e-08, "loss": -0.1869, "step": 9782 }, { "epoch": 0.8739030435656387, "grad_norm": 18.208367505539776, "learning_rate": 9.52393419103663e-08, "loss": 0.5999, "step": 9784 }, { "epoch": 0.8740816827814126, "grad_norm": 13.743013028608107, "learning_rate": 9.497389637861819e-08, "loss": -1.6537, "step": 9786 }, { "epoch": 0.8742603219971864, "grad_norm": 3.86996920945489, "learning_rate": 9.470880283718918e-08, "loss": -0.0212, "step": 9788 }, { "epoch": 0.8744389612129603, "grad_norm": 15.559250825630482, "learning_rate": 9.444406138918104e-08, "loss": -1.1456, "step": 9790 }, { "epoch": 0.8746176004287342, "grad_norm": 7.133903428348326, "learning_rate": 9.417967213755962e-08, "loss": -1.5716, "step": 9792 }, { "epoch": 0.8747962396445079, "grad_norm": 9.729147497862094, "learning_rate": 9.391563518515333e-08, "loss": 0.3039, "step": 9794 }, { "epoch": 0.8749748788602818, "grad_norm": 13.265932688385444, "learning_rate": 9.36519506346537e-08, "loss": 0.2107, "step": 9796 }, { "epoch": 0.8751535180760557, "grad_norm": 8.420661003643435, "learning_rate": 9.338861858861502e-08, "loss": 0.9911, "step": 9798 }, { "epoch": 0.8753321572918294, "grad_norm": 4.297178389502355, "learning_rate": 9.312563914945459e-08, "loss": 0.7554, "step": 9800 }, { "epoch": 0.8755107965076033, "grad_norm": 4.963018026146635, "learning_rate": 9.286301241945283e-08, "loss": -0.1291, "step": 9802 }, { "epoch": 0.8756894357233772, "grad_norm": 5.883202528467535, "learning_rate": 9.260073850075211e-08, "loss": 0.4796, "step": 9804 }, { "epoch": 0.875868074939151, "grad_norm": 8.78172306402021, "learning_rate": 9.233881749535877e-08, "loss": -0.1679, "step": 9806 }, { "epoch": 0.8760467141549249, "grad_norm": 6.130276645311113, "learning_rate": 9.207724950514128e-08, "loss": 0.0431, "step": 9808 }, { "epoch": 0.8762253533706987, "grad_norm": 14.846969737154332, "learning_rate": 9.181603463183063e-08, "loss": -0.8901, "step": 9810 }, { "epoch": 0.8764039925864725, "grad_norm": 8.87672858798807, "learning_rate": 9.155517297702076e-08, "loss": -1.3661, "step": 9812 }, { "epoch": 0.8765826318022464, "grad_norm": 14.27377370721305, "learning_rate": 9.129466464216806e-08, "loss": -1.4703, "step": 9814 }, { "epoch": 0.8767612710180203, "grad_norm": 6.804166964429517, "learning_rate": 9.103450972859184e-08, "loss": -0.0248, "step": 9816 }, { "epoch": 0.876939910233794, "grad_norm": 8.274854270366385, "learning_rate": 9.077470833747359e-08, "loss": -0.7349, "step": 9818 }, { "epoch": 0.8771185494495679, "grad_norm": 5.842710677965002, "learning_rate": 9.051526056985737e-08, "loss": -0.8912, "step": 9820 }, { "epoch": 0.8772971886653418, "grad_norm": 8.425161424669458, "learning_rate": 9.025616652665014e-08, "loss": 0.3683, "step": 9822 }, { "epoch": 0.8774758278811156, "grad_norm": 6.234155330755585, "learning_rate": 8.999742630862051e-08, "loss": -0.4236, "step": 9824 }, { "epoch": 0.8776544670968894, "grad_norm": 6.191078461354567, "learning_rate": 8.973904001639976e-08, "loss": -0.7802, "step": 9826 }, { "epoch": 0.8778331063126633, "grad_norm": 1.5676224474863707, "learning_rate": 8.948100775048229e-08, "loss": 1.23, "step": 9828 }, { "epoch": 0.8780117455284371, "grad_norm": 8.412963455380185, "learning_rate": 8.922332961122359e-08, "loss": 0.5152, "step": 9830 }, { "epoch": 0.878190384744211, "grad_norm": 7.7456372194325365, "learning_rate": 8.896600569884228e-08, "loss": 0.707, "step": 9832 }, { "epoch": 0.8783690239599848, "grad_norm": 7.012018512916825, "learning_rate": 8.870903611341873e-08, "loss": 0.2016, "step": 9834 }, { "epoch": 0.8785476631757586, "grad_norm": 9.339558659649354, "learning_rate": 8.84524209548958e-08, "loss": -0.8334, "step": 9836 }, { "epoch": 0.8787263023915325, "grad_norm": 6.253319431010222, "learning_rate": 8.819616032307853e-08, "loss": 0.0725, "step": 9838 }, { "epoch": 0.8789049416073064, "grad_norm": 16.910965182632335, "learning_rate": 8.794025431763374e-08, "loss": 0.1787, "step": 9840 }, { "epoch": 0.8790835808230801, "grad_norm": 10.1347782342979, "learning_rate": 8.768470303809084e-08, "loss": 0.1869, "step": 9842 }, { "epoch": 0.879262220038854, "grad_norm": 7.35013358066127, "learning_rate": 8.742950658384062e-08, "loss": -0.6806, "step": 9844 }, { "epoch": 0.8794408592546279, "grad_norm": 4.234813934886674, "learning_rate": 8.717466505413628e-08, "loss": 0.2978, "step": 9846 }, { "epoch": 0.8796194984704017, "grad_norm": 6.527071697745457, "learning_rate": 8.692017854809297e-08, "loss": -0.5857, "step": 9848 }, { "epoch": 0.8797981376861755, "grad_norm": 9.34045597049825, "learning_rate": 8.666604716468784e-08, "loss": 0.0739, "step": 9850 }, { "epoch": 0.8799767769019494, "grad_norm": 6.861865727586611, "learning_rate": 8.64122710027596e-08, "loss": -0.309, "step": 9852 }, { "epoch": 0.8801554161177232, "grad_norm": 6.083798464755849, "learning_rate": 8.61588501610091e-08, "loss": 0.3672, "step": 9854 }, { "epoch": 0.8803340553334971, "grad_norm": 8.216740003442952, "learning_rate": 8.590578473799903e-08, "loss": 0.6073, "step": 9856 }, { "epoch": 0.880512694549271, "grad_norm": 14.84380139793733, "learning_rate": 8.565307483215322e-08, "loss": -0.4271, "step": 9858 }, { "epoch": 0.8806913337650448, "grad_norm": 16.725410289892274, "learning_rate": 8.540072054175829e-08, "loss": -0.2148, "step": 9860 }, { "epoch": 0.8808699729808186, "grad_norm": 10.775490635014862, "learning_rate": 8.514872196496181e-08, "loss": 0.0287, "step": 9862 }, { "epoch": 0.8810486121965925, "grad_norm": 7.0289536180603545, "learning_rate": 8.489707919977296e-08, "loss": -0.1633, "step": 9864 }, { "epoch": 0.8812272514123664, "grad_norm": 12.156934704166474, "learning_rate": 8.4645792344063e-08, "loss": 0.772, "step": 9866 }, { "epoch": 0.8814058906281401, "grad_norm": 21.0690516574463, "learning_rate": 8.43948614955643e-08, "loss": 0.2566, "step": 9868 }, { "epoch": 0.881584529843914, "grad_norm": 13.833438796288654, "learning_rate": 8.414428675187113e-08, "loss": 0.2632, "step": 9870 }, { "epoch": 0.8817631690596879, "grad_norm": 10.906698398062039, "learning_rate": 8.389406821043899e-08, "loss": -0.334, "step": 9872 }, { "epoch": 0.8819418082754616, "grad_norm": 17.035595596690605, "learning_rate": 8.364420596858512e-08, "loss": -0.0318, "step": 9874 }, { "epoch": 0.8821204474912355, "grad_norm": 4.783895321750141, "learning_rate": 8.339470012348815e-08, "loss": -0.1809, "step": 9876 }, { "epoch": 0.8822990867070094, "grad_norm": 17.970373567955537, "learning_rate": 8.314555077218765e-08, "loss": -0.821, "step": 9878 }, { "epoch": 0.8824777259227832, "grad_norm": 11.389093743526221, "learning_rate": 8.289675801158502e-08, "loss": -1.2339, "step": 9880 }, { "epoch": 0.882656365138557, "grad_norm": 22.18829086398598, "learning_rate": 8.264832193844273e-08, "loss": -0.3393, "step": 9882 }, { "epoch": 0.8828350043543309, "grad_norm": 7.87122799590853, "learning_rate": 8.240024264938472e-08, "loss": -0.2713, "step": 9884 }, { "epoch": 0.8830136435701047, "grad_norm": 8.886838512242461, "learning_rate": 8.215252024089614e-08, "loss": -0.1838, "step": 9886 }, { "epoch": 0.8831922827858786, "grad_norm": 6.724773598869682, "learning_rate": 8.190515480932303e-08, "loss": 1.1678, "step": 9888 }, { "epoch": 0.8833709220016525, "grad_norm": 14.221348921136798, "learning_rate": 8.16581464508732e-08, "loss": -1.0453, "step": 9890 }, { "epoch": 0.8835495612174262, "grad_norm": 11.449179434212605, "learning_rate": 8.141149526161462e-08, "loss": -1.1131, "step": 9892 }, { "epoch": 0.8837282004332001, "grad_norm": 14.443843620389742, "learning_rate": 8.11652013374774e-08, "loss": -0.2214, "step": 9894 }, { "epoch": 0.883906839648974, "grad_norm": 7.079391669308225, "learning_rate": 8.091926477425237e-08, "loss": -0.28, "step": 9896 }, { "epoch": 0.8840854788647478, "grad_norm": 5.753361590077721, "learning_rate": 8.067368566759069e-08, "loss": 0.2581, "step": 9898 }, { "epoch": 0.8842641180805216, "grad_norm": 13.550314496321432, "learning_rate": 8.04284641130053e-08, "loss": -1.263, "step": 9900 }, { "epoch": 0.8844427572962955, "grad_norm": 8.811354975340901, "learning_rate": 8.018360020586989e-08, "loss": -1.3678, "step": 9902 }, { "epoch": 0.8846213965120693, "grad_norm": 10.845167259801316, "learning_rate": 7.993909404141897e-08, "loss": 0.2544, "step": 9904 }, { "epoch": 0.8848000357278432, "grad_norm": 9.90723267503816, "learning_rate": 7.969494571474777e-08, "loss": -1.3289, "step": 9906 }, { "epoch": 0.884978674943617, "grad_norm": 11.539695231679174, "learning_rate": 7.94511553208127e-08, "loss": -0.0776, "step": 9908 }, { "epoch": 0.8851573141593908, "grad_norm": 10.509083043539794, "learning_rate": 7.920772295443078e-08, "loss": -0.2785, "step": 9910 }, { "epoch": 0.8853359533751647, "grad_norm": 15.484627268378953, "learning_rate": 7.896464871027941e-08, "loss": -0.195, "step": 9912 }, { "epoch": 0.8855145925909386, "grad_norm": 4.526504241284029, "learning_rate": 7.872193268289718e-08, "loss": 0.5738, "step": 9914 }, { "epoch": 0.8856932318067123, "grad_norm": 12.205107973489266, "learning_rate": 7.847957496668367e-08, "loss": -0.8508, "step": 9916 }, { "epoch": 0.8858718710224862, "grad_norm": 4.275140119788268, "learning_rate": 7.823757565589828e-08, "loss": -0.2452, "step": 9918 }, { "epoch": 0.8860505102382601, "grad_norm": 6.6399201142434805, "learning_rate": 7.799593484466138e-08, "loss": 0.1782, "step": 9920 }, { "epoch": 0.8862291494540339, "grad_norm": 10.33554074080235, "learning_rate": 7.775465262695413e-08, "loss": -1.2871, "step": 9922 }, { "epoch": 0.8864077886698077, "grad_norm": 10.549967356265219, "learning_rate": 7.751372909661768e-08, "loss": -0.0, "step": 9924 }, { "epoch": 0.8865864278855816, "grad_norm": 10.334433056137955, "learning_rate": 7.727316434735443e-08, "loss": -1.2069, "step": 9926 }, { "epoch": 0.8867650671013554, "grad_norm": 7.290217431780531, "learning_rate": 7.703295847272662e-08, "loss": 0.2518, "step": 9928 }, { "epoch": 0.8869437063171293, "grad_norm": 6.3915988021623145, "learning_rate": 7.679311156615741e-08, "loss": 1.2653, "step": 9930 }, { "epoch": 0.8871223455329031, "grad_norm": 10.645864667056282, "learning_rate": 7.655362372092955e-08, "loss": -0.1435, "step": 9932 }, { "epoch": 0.8873009847486769, "grad_norm": 5.075416194593291, "learning_rate": 7.631449503018705e-08, "loss": -1.7812, "step": 9934 }, { "epoch": 0.8874796239644508, "grad_norm": 2.946705455898713, "learning_rate": 7.607572558693365e-08, "loss": -0.0441, "step": 9936 }, { "epoch": 0.8876582631802247, "grad_norm": 28.819187935500867, "learning_rate": 7.583731548403372e-08, "loss": 0.1102, "step": 9938 }, { "epoch": 0.8878369023959984, "grad_norm": 3.8669358296817475, "learning_rate": 7.559926481421152e-08, "loss": 0.0474, "step": 9940 }, { "epoch": 0.8880155416117723, "grad_norm": 8.190452451027298, "learning_rate": 7.536157367005203e-08, "loss": -0.0488, "step": 9942 }, { "epoch": 0.8881941808275462, "grad_norm": 4.265214725647912, "learning_rate": 7.51242421439997e-08, "loss": 0.1218, "step": 9944 }, { "epoch": 0.88837282004332, "grad_norm": 3.057536092125632, "learning_rate": 7.488727032835951e-08, "loss": 0.0551, "step": 9946 }, { "epoch": 0.8885514592590938, "grad_norm": 10.092510702388779, "learning_rate": 7.465065831529682e-08, "loss": -0.5056, "step": 9948 }, { "epoch": 0.8887300984748677, "grad_norm": 8.570505228718332, "learning_rate": 7.44144061968367e-08, "loss": -0.4154, "step": 9950 }, { "epoch": 0.8889087376906415, "grad_norm": 15.272311539379341, "learning_rate": 7.417851406486408e-08, "loss": -1.2813, "step": 9952 }, { "epoch": 0.8890873769064154, "grad_norm": 26.67413810331437, "learning_rate": 7.394298201112414e-08, "loss": -1.3805, "step": 9954 }, { "epoch": 0.8892660161221893, "grad_norm": 8.485753855353828, "learning_rate": 7.370781012722205e-08, "loss": 0.1447, "step": 9956 }, { "epoch": 0.889444655337963, "grad_norm": 14.303665566308956, "learning_rate": 7.347299850462285e-08, "loss": -0.4111, "step": 9958 }, { "epoch": 0.8896232945537369, "grad_norm": 5.26765362067006, "learning_rate": 7.323854723465139e-08, "loss": -0.665, "step": 9960 }, { "epoch": 0.8898019337695108, "grad_norm": 15.429783919793675, "learning_rate": 7.300445640849251e-08, "loss": -0.7512, "step": 9962 }, { "epoch": 0.8899805729852845, "grad_norm": 4.388825940567327, "learning_rate": 7.277072611719082e-08, "loss": -0.2282, "step": 9964 }, { "epoch": 0.8901592122010584, "grad_norm": 10.805417955356535, "learning_rate": 7.253735645165037e-08, "loss": 0.6768, "step": 9966 }, { "epoch": 0.8903378514168323, "grad_norm": 13.729315718833982, "learning_rate": 7.230434750263525e-08, "loss": -0.0687, "step": 9968 }, { "epoch": 0.8905164906326061, "grad_norm": 6.431542463853867, "learning_rate": 7.207169936076973e-08, "loss": -0.6684, "step": 9970 }, { "epoch": 0.89069512984838, "grad_norm": 5.04829674881409, "learning_rate": 7.183941211653676e-08, "loss": 0.961, "step": 9972 }, { "epoch": 0.8908737690641538, "grad_norm": 11.636779067903426, "learning_rate": 7.160748586027976e-08, "loss": -0.3899, "step": 9974 }, { "epoch": 0.8910524082799276, "grad_norm": 15.926110360377939, "learning_rate": 7.137592068220133e-08, "loss": -1.2142, "step": 9976 }, { "epoch": 0.8912310474957015, "grad_norm": 13.827654917857322, "learning_rate": 7.114471667236343e-08, "loss": 0.0335, "step": 9978 }, { "epoch": 0.8914096867114754, "grad_norm": 8.939016360247336, "learning_rate": 7.091387392068826e-08, "loss": -0.7845, "step": 9980 }, { "epoch": 0.8915883259272491, "grad_norm": 4.45810225889138, "learning_rate": 7.068339251695698e-08, "loss": 0.9571, "step": 9982 }, { "epoch": 0.891766965143023, "grad_norm": 12.699484290518146, "learning_rate": 7.045327255081046e-08, "loss": -0.771, "step": 9984 }, { "epoch": 0.8919456043587969, "grad_norm": 6.890291443793936, "learning_rate": 7.022351411174865e-08, "loss": -0.2241, "step": 9986 }, { "epoch": 0.8921242435745707, "grad_norm": 3.6782151048330323, "learning_rate": 6.99941172891313e-08, "loss": -0.4122, "step": 9988 }, { "epoch": 0.8923028827903445, "grad_norm": 8.07956657606885, "learning_rate": 6.976508217217714e-08, "loss": -1.1418, "step": 9990 }, { "epoch": 0.8924815220061184, "grad_norm": 7.34924708032769, "learning_rate": 6.95364088499647e-08, "loss": -1.3823, "step": 9992 }, { "epoch": 0.8926601612218923, "grad_norm": 5.919699558676118, "learning_rate": 6.93080974114313e-08, "loss": -0.6852, "step": 9994 }, { "epoch": 0.8928388004376661, "grad_norm": 9.671694752333957, "learning_rate": 6.908014794537397e-08, "loss": -0.6208, "step": 9996 }, { "epoch": 0.8930174396534399, "grad_norm": 6.898879593262516, "learning_rate": 6.885256054044841e-08, "loss": -0.2748, "step": 9998 }, { "epoch": 0.8931960788692138, "grad_norm": 19.844119091055667, "learning_rate": 6.862533528517e-08, "loss": -0.8042, "step": 10000 }, { "epoch": 0.8933747180849876, "grad_norm": 4.517590580060111, "learning_rate": 6.839847226791295e-08, "loss": 0.3871, "step": 10002 }, { "epoch": 0.8935533573007615, "grad_norm": 12.73257452283285, "learning_rate": 6.817197157691112e-08, "loss": 0.2462, "step": 10004 }, { "epoch": 0.8937319965165353, "grad_norm": 8.768986620058982, "learning_rate": 6.79458333002565e-08, "loss": -0.8276, "step": 10006 }, { "epoch": 0.8939106357323091, "grad_norm": 5.936108235320959, "learning_rate": 6.772005752590105e-08, "loss": -0.6034, "step": 10008 }, { "epoch": 0.894089274948083, "grad_norm": 7.4953822861302, "learning_rate": 6.749464434165531e-08, "loss": -0.2419, "step": 10010 }, { "epoch": 0.8942679141638569, "grad_norm": 8.274441205199345, "learning_rate": 6.726959383518871e-08, "loss": -0.0446, "step": 10012 }, { "epoch": 0.8944465533796306, "grad_norm": 3.849242217555822, "learning_rate": 6.70449060940299e-08, "loss": -1.2253, "step": 10014 }, { "epoch": 0.8946251925954045, "grad_norm": 6.662104507932113, "learning_rate": 6.682058120556632e-08, "loss": 0.7921, "step": 10016 }, { "epoch": 0.8948038318111784, "grad_norm": 14.41197478105772, "learning_rate": 6.659661925704441e-08, "loss": 0.1939, "step": 10018 }, { "epoch": 0.8949824710269522, "grad_norm": 30.46751900558148, "learning_rate": 6.637302033556891e-08, "loss": -2.134, "step": 10020 }, { "epoch": 0.895161110242726, "grad_norm": 11.561725322443905, "learning_rate": 6.614978452810416e-08, "loss": -0.3992, "step": 10022 }, { "epoch": 0.8953397494584999, "grad_norm": 6.522041217054895, "learning_rate": 6.59269119214727e-08, "loss": -0.3001, "step": 10024 }, { "epoch": 0.8955183886742737, "grad_norm": 5.571433961607173, "learning_rate": 6.570440260235621e-08, "loss": -0.6429, "step": 10026 }, { "epoch": 0.8956970278900476, "grad_norm": 3.9807594799601347, "learning_rate": 6.548225665729467e-08, "loss": -0.4173, "step": 10028 }, { "epoch": 0.8958756671058214, "grad_norm": 11.368494877566247, "learning_rate": 6.52604741726871e-08, "loss": -0.8708, "step": 10030 }, { "epoch": 0.8960543063215952, "grad_norm": 3.171530418282599, "learning_rate": 6.503905523479092e-08, "loss": 0.8357, "step": 10032 }, { "epoch": 0.8962329455373691, "grad_norm": 5.408660703997055, "learning_rate": 6.481799992972192e-08, "loss": -0.0689, "step": 10034 }, { "epoch": 0.896411584753143, "grad_norm": 6.653047606169008, "learning_rate": 6.459730834345534e-08, "loss": -1.4445, "step": 10036 }, { "epoch": 0.8965902239689167, "grad_norm": 5.72237693964278, "learning_rate": 6.437698056182428e-08, "loss": -0.2824, "step": 10038 }, { "epoch": 0.8967688631846906, "grad_norm": 6.790494322276482, "learning_rate": 6.41570166705201e-08, "loss": -0.753, "step": 10040 }, { "epoch": 0.8969475024004645, "grad_norm": 8.821881866722665, "learning_rate": 6.393741675509334e-08, "loss": -0.4105, "step": 10042 }, { "epoch": 0.8971261416162383, "grad_norm": 6.818829037551783, "learning_rate": 6.371818090095249e-08, "loss": -0.0029, "step": 10044 }, { "epoch": 0.8973047808320121, "grad_norm": 8.150907089211183, "learning_rate": 6.349930919336455e-08, "loss": -0.5791, "step": 10046 }, { "epoch": 0.897483420047786, "grad_norm": 4.729431391405392, "learning_rate": 6.328080171745509e-08, "loss": -0.2799, "step": 10048 }, { "epoch": 0.8976620592635598, "grad_norm": 9.186838969629665, "learning_rate": 6.30626585582078e-08, "loss": -0.4165, "step": 10050 }, { "epoch": 0.8978406984793337, "grad_norm": 5.287648495151353, "learning_rate": 6.28448798004647e-08, "loss": -0.4621, "step": 10052 }, { "epoch": 0.8980193376951076, "grad_norm": 6.111805308052209, "learning_rate": 6.262746552892606e-08, "loss": -0.8844, "step": 10054 }, { "epoch": 0.8981979769108813, "grad_norm": 12.685344644510058, "learning_rate": 6.241041582815044e-08, "loss": -0.2469, "step": 10056 }, { "epoch": 0.8983766161266552, "grad_norm": 5.169897340976449, "learning_rate": 6.219373078255507e-08, "loss": -0.5677, "step": 10058 }, { "epoch": 0.8985552553424291, "grad_norm": 11.984406193937565, "learning_rate": 6.197741047641436e-08, "loss": -0.9523, "step": 10060 }, { "epoch": 0.8987338945582029, "grad_norm": 4.5919945924574215, "learning_rate": 6.176145499386154e-08, "loss": -0.4534, "step": 10062 }, { "epoch": 0.8989125337739767, "grad_norm": 12.687623873702549, "learning_rate": 6.15458644188882e-08, "loss": -1.7433, "step": 10064 }, { "epoch": 0.8990911729897506, "grad_norm": 4.8062884385019125, "learning_rate": 6.133063883534295e-08, "loss": 0.0259, "step": 10066 }, { "epoch": 0.8992698122055244, "grad_norm": 12.136466670418846, "learning_rate": 6.11157783269336e-08, "loss": 0.0357, "step": 10068 }, { "epoch": 0.8994484514212983, "grad_norm": 8.216651329454367, "learning_rate": 6.090128297722563e-08, "loss": 0.9215, "step": 10070 }, { "epoch": 0.8996270906370721, "grad_norm": 6.048782717984237, "learning_rate": 6.068715286964187e-08, "loss": 0.2132, "step": 10072 }, { "epoch": 0.8998057298528459, "grad_norm": 7.283373870853188, "learning_rate": 6.047338808746394e-08, "loss": 1.3297, "step": 10074 }, { "epoch": 0.8999843690686198, "grad_norm": 6.264122837582003, "learning_rate": 6.025998871383098e-08, "loss": 0.3385, "step": 10076 }, { "epoch": 0.9001630082843937, "grad_norm": 11.778277844383371, "learning_rate": 6.004695483173994e-08, "loss": -0.1814, "step": 10078 }, { "epoch": 0.9003416475001674, "grad_norm": 6.13153163873566, "learning_rate": 5.983428652404587e-08, "loss": 0.2861, "step": 10080 }, { "epoch": 0.9005202867159413, "grad_norm": 14.052316733934669, "learning_rate": 5.96219838734614e-08, "loss": -0.0001, "step": 10082 }, { "epoch": 0.9006989259317152, "grad_norm": 12.267622125428574, "learning_rate": 5.941004696255736e-08, "loss": 0.1213, "step": 10084 }, { "epoch": 0.900877565147489, "grad_norm": 3.439818276773016, "learning_rate": 5.9198475873761477e-08, "loss": -0.7474, "step": 10086 }, { "epoch": 0.9010562043632628, "grad_norm": 7.881462957059803, "learning_rate": 5.898727068935994e-08, "loss": 0.5797, "step": 10088 }, { "epoch": 0.9012348435790367, "grad_norm": 6.019646902786618, "learning_rate": 5.877643149149669e-08, "loss": 0.6032, "step": 10090 }, { "epoch": 0.9014134827948105, "grad_norm": 30.60370452995897, "learning_rate": 5.856595836217315e-08, "loss": -1.0412, "step": 10092 }, { "epoch": 0.9015921220105844, "grad_norm": 11.980305723132718, "learning_rate": 5.835585138324783e-08, "loss": -0.0248, "step": 10094 }, { "epoch": 0.9017707612263582, "grad_norm": 9.10753391826051, "learning_rate": 5.814611063643749e-08, "loss": -0.1919, "step": 10096 }, { "epoch": 0.901949400442132, "grad_norm": 11.247197798251273, "learning_rate": 5.7936736203316296e-08, "loss": -0.2582, "step": 10098 }, { "epoch": 0.9021280396579059, "grad_norm": 12.58875109185714, "learning_rate": 5.77277281653159e-08, "loss": -0.6896, "step": 10100 }, { "epoch": 0.9023066788736798, "grad_norm": 11.983599581742828, "learning_rate": 5.7519086603725395e-08, "loss": -0.9654, "step": 10102 }, { "epoch": 0.9024853180894535, "grad_norm": 8.521251366643025, "learning_rate": 5.7310811599691755e-08, "loss": -0.4926, "step": 10104 }, { "epoch": 0.9026639573052274, "grad_norm": 3.7849647623132667, "learning_rate": 5.7102903234218405e-08, "loss": -0.2967, "step": 10106 }, { "epoch": 0.9028425965210013, "grad_norm": 11.477684594566265, "learning_rate": 5.689536158816721e-08, "loss": -0.263, "step": 10108 }, { "epoch": 0.9030212357367751, "grad_norm": 7.245812094424948, "learning_rate": 5.6688186742256835e-08, "loss": -0.607, "step": 10110 }, { "epoch": 0.9031998749525489, "grad_norm": 9.88815050071079, "learning_rate": 5.6481378777063714e-08, "loss": -0.3363, "step": 10112 }, { "epoch": 0.9033785141683228, "grad_norm": 19.473289780942977, "learning_rate": 5.627493777302106e-08, "loss": -0.5831, "step": 10114 }, { "epoch": 0.9035571533840966, "grad_norm": 7.8016553149364505, "learning_rate": 5.606886381041975e-08, "loss": 0.1, "step": 10116 }, { "epoch": 0.9037357925998705, "grad_norm": 4.371715075367482, "learning_rate": 5.586315696940802e-08, "loss": -0.549, "step": 10118 }, { "epoch": 0.9039144318156443, "grad_norm": 8.581734764593008, "learning_rate": 5.5657817329990417e-08, "loss": -0.3029, "step": 10120 }, { "epoch": 0.9040930710314182, "grad_norm": 14.174675207436964, "learning_rate": 5.5452844972029954e-08, "loss": -0.8664, "step": 10122 }, { "epoch": 0.904271710247192, "grad_norm": 11.826292352039097, "learning_rate": 5.524823997524619e-08, "loss": -0.7394, "step": 10124 }, { "epoch": 0.9044503494629659, "grad_norm": 10.457509853167798, "learning_rate": 5.504400241921547e-08, "loss": 0.1285, "step": 10126 }, { "epoch": 0.9046289886787398, "grad_norm": 8.487637674060599, "learning_rate": 5.484013238337182e-08, "loss": 0.3877, "step": 10128 }, { "epoch": 0.9048076278945135, "grad_norm": 5.377578826425869, "learning_rate": 5.463662994700591e-08, "loss": 0.3116, "step": 10130 }, { "epoch": 0.9049862671102874, "grad_norm": 4.2676199136254915, "learning_rate": 5.4433495189265764e-08, "loss": -0.7911, "step": 10132 }, { "epoch": 0.9051649063260613, "grad_norm": 6.080904994167867, "learning_rate": 5.423072818915619e-08, "loss": 0.1397, "step": 10134 }, { "epoch": 0.905343545541835, "grad_norm": 16.37313529310046, "learning_rate": 5.4028329025539e-08, "loss": -1.1826, "step": 10136 }, { "epoch": 0.9055221847576089, "grad_norm": 20.71726521829076, "learning_rate": 5.38262977771331e-08, "loss": -1.1295, "step": 10138 }, { "epoch": 0.9057008239733828, "grad_norm": 28.014492099235444, "learning_rate": 5.362463452251398e-08, "loss": -0.6478, "step": 10140 }, { "epoch": 0.9058794631891566, "grad_norm": 5.413599388055103, "learning_rate": 5.3423339340114334e-08, "loss": 0.3325, "step": 10142 }, { "epoch": 0.9060581024049305, "grad_norm": 10.270497737660861, "learning_rate": 5.322241230822333e-08, "loss": -1.501, "step": 10144 }, { "epoch": 0.9062367416207043, "grad_norm": 32.823597453732695, "learning_rate": 5.302185350498767e-08, "loss": -0.9731, "step": 10146 }, { "epoch": 0.9064153808364781, "grad_norm": 14.041533849674774, "learning_rate": 5.282166300840984e-08, "loss": -0.4436, "step": 10148 }, { "epoch": 0.906594020052252, "grad_norm": 7.564141717947861, "learning_rate": 5.2621840896349914e-08, "loss": -0.6768, "step": 10150 }, { "epoch": 0.9067726592680259, "grad_norm": 16.9590778359238, "learning_rate": 5.242238724652426e-08, "loss": -1.0282, "step": 10152 }, { "epoch": 0.9069512984837996, "grad_norm": 15.754491740762875, "learning_rate": 5.222330213650616e-08, "loss": 0.0142, "step": 10154 }, { "epoch": 0.9071299376995735, "grad_norm": 10.504784356890227, "learning_rate": 5.202458564372525e-08, "loss": 0.3375, "step": 10156 }, { "epoch": 0.9073085769153474, "grad_norm": 16.562727555924987, "learning_rate": 5.182623784546847e-08, "loss": -1.6484, "step": 10158 }, { "epoch": 0.9074872161311212, "grad_norm": 10.581556836910789, "learning_rate": 5.1628258818878333e-08, "loss": -0.0833, "step": 10160 }, { "epoch": 0.907665855346895, "grad_norm": 7.301487643059996, "learning_rate": 5.143064864095481e-08, "loss": 0.4883, "step": 10162 }, { "epoch": 0.9078444945626689, "grad_norm": 7.835609862760315, "learning_rate": 5.123340738855397e-08, "loss": -0.6455, "step": 10164 }, { "epoch": 0.9080231337784427, "grad_norm": 5.786181846443077, "learning_rate": 5.103653513838868e-08, "loss": 0.5228, "step": 10166 }, { "epoch": 0.9082017729942166, "grad_norm": 25.34761260639254, "learning_rate": 5.084003196702802e-08, "loss": -0.4433, "step": 10168 }, { "epoch": 0.9083804122099904, "grad_norm": 8.114777222758491, "learning_rate": 5.0643897950897626e-08, "loss": -1.8342, "step": 10170 }, { "epoch": 0.9085590514257642, "grad_norm": 5.48910501074353, "learning_rate": 5.0448133166279935e-08, "loss": -1.2559, "step": 10172 }, { "epoch": 0.9087376906415381, "grad_norm": 11.795685603327136, "learning_rate": 5.025273768931293e-08, "loss": -0.2682, "step": 10174 }, { "epoch": 0.908916329857312, "grad_norm": 19.690995163458442, "learning_rate": 5.005771159599148e-08, "loss": -1.6792, "step": 10176 }, { "epoch": 0.9090949690730857, "grad_norm": 8.621637103402893, "learning_rate": 4.986305496216725e-08, "loss": 0.0873, "step": 10178 }, { "epoch": 0.9092736082888596, "grad_norm": 3.2093032427736006, "learning_rate": 4.966876786354723e-08, "loss": -0.2373, "step": 10180 }, { "epoch": 0.9094522475046335, "grad_norm": 8.516162092437412, "learning_rate": 4.947485037569543e-08, "loss": 0.3644, "step": 10182 }, { "epoch": 0.9096308867204073, "grad_norm": 6.116662091800043, "learning_rate": 4.9281302574031846e-08, "loss": -1.0913, "step": 10184 }, { "epoch": 0.9098095259361811, "grad_norm": 6.762838234613396, "learning_rate": 4.90881245338326e-08, "loss": 0.368, "step": 10186 }, { "epoch": 0.909988165151955, "grad_norm": 8.772388697885933, "learning_rate": 4.889531633023014e-08, "loss": -0.3483, "step": 10188 }, { "epoch": 0.9101668043677288, "grad_norm": 7.2725267014155905, "learning_rate": 4.870287803821316e-08, "loss": -0.1104, "step": 10190 }, { "epoch": 0.9103454435835027, "grad_norm": 4.83618518398428, "learning_rate": 4.8510809732626337e-08, "loss": 0.2803, "step": 10192 }, { "epoch": 0.9105240827992765, "grad_norm": 9.099190778698315, "learning_rate": 4.831911148817036e-08, "loss": 0.1726, "step": 10194 }, { "epoch": 0.9107027220150503, "grad_norm": 1.9934655970237696, "learning_rate": 4.812778337940216e-08, "loss": 0.4121, "step": 10196 }, { "epoch": 0.9108813612308242, "grad_norm": 3.5216521415164173, "learning_rate": 4.7936825480734636e-08, "loss": -0.1703, "step": 10198 }, { "epoch": 0.9110600004465981, "grad_norm": 4.998060231640027, "learning_rate": 4.774623786643683e-08, "loss": -1.4581, "step": 10200 }, { "epoch": 0.9112386396623718, "grad_norm": 18.788875713728522, "learning_rate": 4.755602061063369e-08, "loss": -1.0334, "step": 10202 }, { "epoch": 0.9114172788781457, "grad_norm": 8.15659281861559, "learning_rate": 4.736617378730601e-08, "loss": 0.8177, "step": 10204 }, { "epoch": 0.9115959180939196, "grad_norm": 6.0066888400793, "learning_rate": 4.7176697470290624e-08, "loss": -0.5034, "step": 10206 }, { "epoch": 0.9117745573096934, "grad_norm": 7.86633623474465, "learning_rate": 4.698759173328015e-08, "loss": -1.1512, "step": 10208 }, { "epoch": 0.9119531965254672, "grad_norm": 12.125025169110474, "learning_rate": 4.6798856649823303e-08, "loss": -1.0165, "step": 10210 }, { "epoch": 0.9121318357412411, "grad_norm": 4.7315365179225415, "learning_rate": 4.66104922933247e-08, "loss": -0.5046, "step": 10212 }, { "epoch": 0.9123104749570149, "grad_norm": 6.283768941146141, "learning_rate": 4.642249873704418e-08, "loss": -1.6894, "step": 10214 }, { "epoch": 0.9124891141727888, "grad_norm": 12.508482839430846, "learning_rate": 4.6234876054097927e-08, "loss": -0.8995, "step": 10216 }, { "epoch": 0.9126677533885627, "grad_norm": 27.398630194289595, "learning_rate": 4.6047624317457787e-08, "loss": -0.7141, "step": 10218 }, { "epoch": 0.9128463926043364, "grad_norm": 4.822349476539102, "learning_rate": 4.586074359995118e-08, "loss": 0.1634, "step": 10220 }, { "epoch": 0.9130250318201103, "grad_norm": 5.473417023651215, "learning_rate": 4.56742339742614e-08, "loss": 0.1436, "step": 10222 }, { "epoch": 0.9132036710358842, "grad_norm": 7.527877369026775, "learning_rate": 4.5488095512927317e-08, "loss": 0.5236, "step": 10224 }, { "epoch": 0.913382310251658, "grad_norm": 2.3724236819952904, "learning_rate": 4.530232828834346e-08, "loss": 0.5675, "step": 10226 }, { "epoch": 0.9135609494674318, "grad_norm": 9.51867847277993, "learning_rate": 4.511693237275993e-08, "loss": -0.1666, "step": 10228 }, { "epoch": 0.9137395886832057, "grad_norm": 13.167819266431266, "learning_rate": 4.4931907838282265e-08, "loss": -0.3483, "step": 10230 }, { "epoch": 0.9139182278989795, "grad_norm": 4.153302287500565, "learning_rate": 4.474725475687236e-08, "loss": 0.2894, "step": 10232 }, { "epoch": 0.9140968671147534, "grad_norm": 9.634019861100663, "learning_rate": 4.456297320034641e-08, "loss": -1.1123, "step": 10234 }, { "epoch": 0.9142755063305272, "grad_norm": 4.786067555124135, "learning_rate": 4.437906324037699e-08, "loss": 0.3652, "step": 10236 }, { "epoch": 0.914454145546301, "grad_norm": 8.679919111868164, "learning_rate": 4.419552494849188e-08, "loss": 0.7352, "step": 10238 }, { "epoch": 0.9146327847620749, "grad_norm": 13.868446245687771, "learning_rate": 4.401235839607431e-08, "loss": 0.3927, "step": 10240 }, { "epoch": 0.9148114239778488, "grad_norm": 16.69902092353583, "learning_rate": 4.382956365436297e-08, "loss": -0.6, "step": 10242 }, { "epoch": 0.9149900631936225, "grad_norm": 7.767998499594169, "learning_rate": 4.364714079445187e-08, "loss": 0.9535, "step": 10244 }, { "epoch": 0.9151687024093964, "grad_norm": 6.708461531209869, "learning_rate": 4.346508988729059e-08, "loss": -0.7692, "step": 10246 }, { "epoch": 0.9153473416251703, "grad_norm": 3.9915299144619265, "learning_rate": 4.328341100368371e-08, "loss": 0.762, "step": 10248 }, { "epoch": 0.915525980840944, "grad_norm": 4.85605991536465, "learning_rate": 4.310210421429139e-08, "loss": 0.6885, "step": 10250 }, { "epoch": 0.9157046200567179, "grad_norm": 7.947108663048148, "learning_rate": 4.292116958962888e-08, "loss": 0.4998, "step": 10252 }, { "epoch": 0.9158832592724918, "grad_norm": 4.025621136428218, "learning_rate": 4.274060720006689e-08, "loss": -1.0718, "step": 10254 }, { "epoch": 0.9160618984882657, "grad_norm": 5.753960282226786, "learning_rate": 4.256041711583125e-08, "loss": -0.1201, "step": 10256 }, { "epoch": 0.9162405377040395, "grad_norm": 10.64817884750978, "learning_rate": 4.2380599407003006e-08, "loss": -0.8137, "step": 10258 }, { "epoch": 0.9164191769198133, "grad_norm": 8.557155751228398, "learning_rate": 4.220115414351832e-08, "loss": -1.0679, "step": 10260 }, { "epoch": 0.9165978161355872, "grad_norm": 9.680667953775194, "learning_rate": 4.2022081395168385e-08, "loss": -1.3814, "step": 10262 }, { "epoch": 0.916776455351361, "grad_norm": 11.155764785601216, "learning_rate": 4.184338123160003e-08, "loss": -1.0695, "step": 10264 }, { "epoch": 0.9169550945671349, "grad_norm": 15.667869413484649, "learning_rate": 4.166505372231466e-08, "loss": -1.0284, "step": 10266 }, { "epoch": 0.9171337337829087, "grad_norm": 5.445762407716108, "learning_rate": 4.14870989366688e-08, "loss": 0.1357, "step": 10268 }, { "epoch": 0.9173123729986825, "grad_norm": 6.6361930868008, "learning_rate": 4.130951694387419e-08, "loss": 1.2987, "step": 10270 }, { "epoch": 0.9174910122144564, "grad_norm": 12.89432830327265, "learning_rate": 4.113230781299748e-08, "loss": -0.3668, "step": 10272 }, { "epoch": 0.9176696514302303, "grad_norm": 7.066253744813017, "learning_rate": 4.0955471612960315e-08, "loss": -0.518, "step": 10274 }, { "epoch": 0.917848290646004, "grad_norm": 13.428995537365337, "learning_rate": 4.077900841253934e-08, "loss": -0.9696, "step": 10276 }, { "epoch": 0.9180269298617779, "grad_norm": 7.964067107871558, "learning_rate": 4.06029182803661e-08, "loss": -0.59, "step": 10278 }, { "epoch": 0.9182055690775518, "grad_norm": 25.228996379182057, "learning_rate": 4.0427201284927156e-08, "loss": -0.5814, "step": 10280 }, { "epoch": 0.9183842082933256, "grad_norm": 6.4371317045170695, "learning_rate": 4.025185749456361e-08, "loss": -0.2446, "step": 10282 }, { "epoch": 0.9185628475090994, "grad_norm": 13.986409266321326, "learning_rate": 4.0076886977471805e-08, "loss": -0.4945, "step": 10284 }, { "epoch": 0.9187414867248733, "grad_norm": 14.642962518505584, "learning_rate": 3.9902289801702515e-08, "loss": 0.0457, "step": 10286 }, { "epoch": 0.9189201259406471, "grad_norm": 11.898891467866346, "learning_rate": 3.9728066035161765e-08, "loss": -0.9791, "step": 10288 }, { "epoch": 0.919098765156421, "grad_norm": 5.84320881831495, "learning_rate": 3.955421574561002e-08, "loss": 1.004, "step": 10290 }, { "epoch": 0.9192774043721949, "grad_norm": 8.808194887726756, "learning_rate": 3.938073900066263e-08, "loss": -0.7412, "step": 10292 }, { "epoch": 0.9194560435879686, "grad_norm": 13.164932394292386, "learning_rate": 3.920763586778964e-08, "loss": -0.2645, "step": 10294 }, { "epoch": 0.9196346828037425, "grad_norm": 9.421370105278983, "learning_rate": 3.9034906414315725e-08, "loss": -0.2292, "step": 10296 }, { "epoch": 0.9198133220195164, "grad_norm": 30.18108190701793, "learning_rate": 3.886255070742017e-08, "loss": 0.1765, "step": 10298 }, { "epoch": 0.9199919612352901, "grad_norm": 8.96035299082658, "learning_rate": 3.869056881413735e-08, "loss": -1.0054, "step": 10300 }, { "epoch": 0.920170600451064, "grad_norm": 7.4927699525943, "learning_rate": 3.8518960801355415e-08, "loss": -0.228, "step": 10302 }, { "epoch": 0.9203492396668379, "grad_norm": 4.082264169864987, "learning_rate": 3.834772673581788e-08, "loss": -0.1311, "step": 10304 }, { "epoch": 0.9205278788826117, "grad_norm": 9.707701262716748, "learning_rate": 3.8176866684122454e-08, "loss": -0.8637, "step": 10306 }, { "epoch": 0.9207065180983856, "grad_norm": 7.485381056795284, "learning_rate": 3.800638071272144e-08, "loss": 0.05, "step": 10308 }, { "epoch": 0.9208851573141594, "grad_norm": 11.078267213583828, "learning_rate": 3.7836268887921684e-08, "loss": -1.5745, "step": 10310 }, { "epoch": 0.9210637965299332, "grad_norm": 4.709239951198999, "learning_rate": 3.7666531275884595e-08, "loss": 0.2666, "step": 10312 }, { "epoch": 0.9212424357457071, "grad_norm": 5.0096654930295275, "learning_rate": 3.749716794262592e-08, "loss": -0.1001, "step": 10314 }, { "epoch": 0.921421074961481, "grad_norm": 10.91418168611103, "learning_rate": 3.732817895401574e-08, "loss": -0.6386, "step": 10316 }, { "epoch": 0.9215997141772547, "grad_norm": 5.917188093106321, "learning_rate": 3.7159564375778606e-08, "loss": 0.4964, "step": 10318 }, { "epoch": 0.9217783533930286, "grad_norm": 9.22544819226386, "learning_rate": 3.699132427349383e-08, "loss": 0.2012, "step": 10320 }, { "epoch": 0.9219569926088025, "grad_norm": 7.522220754818919, "learning_rate": 3.682345871259451e-08, "loss": 0.0878, "step": 10322 }, { "epoch": 0.9221356318245763, "grad_norm": 12.273585444286569, "learning_rate": 3.6655967758368324e-08, "loss": -0.1498, "step": 10324 }, { "epoch": 0.9223142710403501, "grad_norm": 8.457411942932715, "learning_rate": 3.648885147595748e-08, "loss": -0.8437, "step": 10326 }, { "epoch": 0.922492910256124, "grad_norm": 9.973195965002777, "learning_rate": 3.632210993035778e-08, "loss": 1.4999, "step": 10328 }, { "epoch": 0.9226715494718978, "grad_norm": 7.768028455326594, "learning_rate": 3.615574318642023e-08, "loss": -0.5411, "step": 10330 }, { "epoch": 0.9228501886876717, "grad_norm": 4.390858274079982, "learning_rate": 3.598975130884929e-08, "loss": -0.8244, "step": 10332 }, { "epoch": 0.9230288279034455, "grad_norm": 4.753866128117461, "learning_rate": 3.582413436220422e-08, "loss": 0.3373, "step": 10334 }, { "epoch": 0.9232074671192193, "grad_norm": 17.49236283548793, "learning_rate": 3.56588924108977e-08, "loss": -0.9399, "step": 10336 }, { "epoch": 0.9233861063349932, "grad_norm": 5.115826654392043, "learning_rate": 3.549402551919722e-08, "loss": 0.2295, "step": 10338 }, { "epoch": 0.9235647455507671, "grad_norm": 4.505241308661233, "learning_rate": 3.532953375122427e-08, "loss": -0.1107, "step": 10340 }, { "epoch": 0.9237433847665408, "grad_norm": 4.409246197068971, "learning_rate": 3.5165417170954226e-08, "loss": 0.2213, "step": 10342 }, { "epoch": 0.9239220239823147, "grad_norm": 8.738662786429161, "learning_rate": 3.500167584221681e-08, "loss": -0.2695, "step": 10344 }, { "epoch": 0.9241006631980886, "grad_norm": 8.199828206565519, "learning_rate": 3.4838309828695624e-08, "loss": 0.5047, "step": 10346 }, { "epoch": 0.9242793024138624, "grad_norm": 3.6937588092937728, "learning_rate": 3.467531919392841e-08, "loss": -0.2064, "step": 10348 }, { "epoch": 0.9244579416296362, "grad_norm": 5.982309009709084, "learning_rate": 3.451270400130646e-08, "loss": 0.8314, "step": 10350 }, { "epoch": 0.9246365808454101, "grad_norm": 12.358168744470067, "learning_rate": 3.4350464314075954e-08, "loss": -0.9916, "step": 10352 }, { "epoch": 0.9248152200611839, "grad_norm": 10.525010249841914, "learning_rate": 3.4188600195336426e-08, "loss": 0.5521, "step": 10354 }, { "epoch": 0.9249938592769578, "grad_norm": 11.073935250449695, "learning_rate": 3.4027111708041064e-08, "loss": -0.3335, "step": 10356 }, { "epoch": 0.9251724984927316, "grad_norm": 10.991800893832114, "learning_rate": 3.386599891499764e-08, "loss": -1.0741, "step": 10358 }, { "epoch": 0.9253511377085054, "grad_norm": 9.887201810601152, "learning_rate": 3.3705261878867354e-08, "loss": -0.2285, "step": 10360 }, { "epoch": 0.9255297769242793, "grad_norm": 20.46592970193275, "learning_rate": 3.354490066216553e-08, "loss": -1.3948, "step": 10362 }, { "epoch": 0.9257084161400532, "grad_norm": 4.461329775262916, "learning_rate": 3.3384915327260953e-08, "loss": 0.5872, "step": 10364 }, { "epoch": 0.9258870553558269, "grad_norm": 8.815208073073604, "learning_rate": 3.3225305936376734e-08, "loss": -0.5076, "step": 10366 }, { "epoch": 0.9260656945716008, "grad_norm": 10.643602310383528, "learning_rate": 3.306607255158944e-08, "loss": -1.1639, "step": 10368 }, { "epoch": 0.9262443337873747, "grad_norm": 11.995032235881808, "learning_rate": 3.29072152348292e-08, "loss": -0.3128, "step": 10370 }, { "epoch": 0.9264229730031485, "grad_norm": 13.81872694409542, "learning_rate": 3.274873404788026e-08, "loss": -0.6662, "step": 10372 }, { "epoch": 0.9266016122189223, "grad_norm": 16.47485470049262, "learning_rate": 3.2590629052380744e-08, "loss": -1.9429, "step": 10374 }, { "epoch": 0.9267802514346962, "grad_norm": 4.022437821423928, "learning_rate": 3.243290030982171e-08, "loss": 0.3063, "step": 10376 }, { "epoch": 0.92695889065047, "grad_norm": 19.54799048201031, "learning_rate": 3.227554788154863e-08, "loss": 0.0431, "step": 10378 }, { "epoch": 0.9271375298662439, "grad_norm": 5.1429786743457715, "learning_rate": 3.211857182876032e-08, "loss": -0.9191, "step": 10380 }, { "epoch": 0.9273161690820177, "grad_norm": 11.81106413334184, "learning_rate": 3.1961972212508956e-08, "loss": 0.4717, "step": 10382 }, { "epoch": 0.9274948082977915, "grad_norm": 8.505511572610809, "learning_rate": 3.180574909370082e-08, "loss": -0.1266, "step": 10384 }, { "epoch": 0.9276734475135654, "grad_norm": 5.5208026045417915, "learning_rate": 3.1649902533095515e-08, "loss": 1.0359, "step": 10386 }, { "epoch": 0.9278520867293393, "grad_norm": 8.329205469750654, "learning_rate": 3.149443259130613e-08, "loss": -1.2662, "step": 10388 }, { "epoch": 0.9280307259451132, "grad_norm": 13.193923615108893, "learning_rate": 3.133933932879928e-08, "loss": -0.5091, "step": 10390 }, { "epoch": 0.9282093651608869, "grad_norm": 11.669123436336635, "learning_rate": 3.118462280589518e-08, "loss": -1.3315, "step": 10392 }, { "epoch": 0.9283880043766608, "grad_norm": 6.63620228410913, "learning_rate": 3.103028308276745e-08, "loss": -0.8621, "step": 10394 }, { "epoch": 0.9285666435924347, "grad_norm": 4.951833273747824, "learning_rate": 3.0876320219443195e-08, "loss": 0.6546, "step": 10396 }, { "epoch": 0.9287452828082084, "grad_norm": 15.320287102382059, "learning_rate": 3.072273427580308e-08, "loss": -0.8433, "step": 10398 }, { "epoch": 0.9289239220239823, "grad_norm": 19.844686555162724, "learning_rate": 3.0569525311580745e-08, "loss": 0.8608, "step": 10400 }, { "epoch": 0.9291025612397562, "grad_norm": 9.20859353458789, "learning_rate": 3.041669338636388e-08, "loss": -0.2942, "step": 10402 }, { "epoch": 0.92928120045553, "grad_norm": 21.182994802407617, "learning_rate": 3.02642385595927e-08, "loss": -0.083, "step": 10404 }, { "epoch": 0.9294598396713039, "grad_norm": 5.37072726022379, "learning_rate": 3.0112160890561563e-08, "loss": -1.2846, "step": 10406 }, { "epoch": 0.9296384788870777, "grad_norm": 5.9101310774827445, "learning_rate": 2.9960460438417715e-08, "loss": -0.1775, "step": 10408 }, { "epoch": 0.9298171181028515, "grad_norm": 14.098409013741586, "learning_rate": 2.980913726216161e-08, "loss": -0.2247, "step": 10410 }, { "epoch": 0.9299957573186254, "grad_norm": 4.912671206065622, "learning_rate": 2.9658191420647293e-08, "loss": -0.1344, "step": 10412 }, { "epoch": 0.9301743965343993, "grad_norm": 3.2284501952402755, "learning_rate": 2.9507622972581915e-08, "loss": -0.2773, "step": 10414 }, { "epoch": 0.930353035750173, "grad_norm": 8.191288894009123, "learning_rate": 2.9357431976525625e-08, "loss": -1.3089, "step": 10416 }, { "epoch": 0.9305316749659469, "grad_norm": 9.702046160837993, "learning_rate": 2.920761849089204e-08, "loss": -1.689, "step": 10418 }, { "epoch": 0.9307103141817208, "grad_norm": 5.767486189158902, "learning_rate": 2.9058182573947986e-08, "loss": 1.0824, "step": 10420 }, { "epoch": 0.9308889533974946, "grad_norm": 5.217459330599049, "learning_rate": 2.8909124283813203e-08, "loss": 1.1732, "step": 10422 }, { "epoch": 0.9310675926132684, "grad_norm": 6.931446905753184, "learning_rate": 2.8760443678460754e-08, "loss": 0.3537, "step": 10424 }, { "epoch": 0.9312462318290423, "grad_norm": 6.0585742313112405, "learning_rate": 2.8612140815716723e-08, "loss": 0.7947, "step": 10426 }, { "epoch": 0.9314248710448161, "grad_norm": 4.686733539225394, "learning_rate": 2.846421575326019e-08, "loss": 0.5409, "step": 10428 }, { "epoch": 0.93160351026059, "grad_norm": 11.155783592754002, "learning_rate": 2.831666854862358e-08, "loss": -0.0517, "step": 10430 }, { "epoch": 0.9317821494763638, "grad_norm": 9.790713476225068, "learning_rate": 2.8169499259192098e-08, "loss": 0.7708, "step": 10432 }, { "epoch": 0.9319607886921376, "grad_norm": 9.751515979775073, "learning_rate": 2.8022707942204072e-08, "loss": -0.886, "step": 10434 }, { "epoch": 0.9321394279079115, "grad_norm": 5.016475617244462, "learning_rate": 2.787629465475072e-08, "loss": -0.3136, "step": 10436 }, { "epoch": 0.9323180671236854, "grad_norm": 9.015718086066238, "learning_rate": 2.7730259453776495e-08, "loss": -1.0385, "step": 10438 }, { "epoch": 0.9324967063394591, "grad_norm": 11.28205340369856, "learning_rate": 2.7584602396078516e-08, "loss": 0.3088, "step": 10440 }, { "epoch": 0.932675345555233, "grad_norm": 4.546255797077518, "learning_rate": 2.7439323538307023e-08, "loss": -0.2179, "step": 10442 }, { "epoch": 0.9328539847710069, "grad_norm": 3.264721321024978, "learning_rate": 2.7294422936965044e-08, "loss": 1.0634, "step": 10444 }, { "epoch": 0.9330326239867807, "grad_norm": 6.208730322244272, "learning_rate": 2.7149900648408386e-08, "loss": 0.1271, "step": 10446 }, { "epoch": 0.9332112632025545, "grad_norm": 23.8592151169332, "learning_rate": 2.7005756728846085e-08, "loss": -0.8016, "step": 10448 }, { "epoch": 0.9333899024183284, "grad_norm": 13.184504019500862, "learning_rate": 2.6861991234339632e-08, "loss": -1.9003, "step": 10450 }, { "epoch": 0.9335685416341022, "grad_norm": 14.495359006911757, "learning_rate": 2.671860422080363e-08, "loss": 0.2169, "step": 10452 }, { "epoch": 0.9337471808498761, "grad_norm": 11.367364689340654, "learning_rate": 2.6575595744005474e-08, "loss": 0.7249, "step": 10454 }, { "epoch": 0.93392582006565, "grad_norm": 5.362793657847595, "learning_rate": 2.6432965859564894e-08, "loss": 0.3272, "step": 10456 }, { "epoch": 0.9341044592814237, "grad_norm": 8.697206631915916, "learning_rate": 2.6290714622954846e-08, "loss": -0.8107, "step": 10458 }, { "epoch": 0.9342830984971976, "grad_norm": 16.865315689041093, "learning_rate": 2.6148842089500744e-08, "loss": -0.6626, "step": 10460 }, { "epoch": 0.9344617377129715, "grad_norm": 16.30414656805133, "learning_rate": 2.600734831438123e-08, "loss": 0.4729, "step": 10462 }, { "epoch": 0.9346403769287452, "grad_norm": 8.316356710749586, "learning_rate": 2.5866233352626943e-08, "loss": -0.3397, "step": 10464 }, { "epoch": 0.9348190161445191, "grad_norm": 1.9790687804526783, "learning_rate": 2.572549725912143e-08, "loss": 0.7971, "step": 10466 }, { "epoch": 0.934997655360293, "grad_norm": 8.737818494675798, "learning_rate": 2.5585140088601243e-08, "loss": -0.1628, "step": 10468 }, { "epoch": 0.9351762945760668, "grad_norm": 7.327266293055392, "learning_rate": 2.544516189565482e-08, "loss": -0.5703, "step": 10470 }, { "epoch": 0.9353549337918406, "grad_norm": 11.54313861444389, "learning_rate": 2.5305562734724062e-08, "loss": -1.6861, "step": 10472 }, { "epoch": 0.9355335730076145, "grad_norm": 5.737104800374359, "learning_rate": 2.516634266010287e-08, "loss": -1.1325, "step": 10474 }, { "epoch": 0.9357122122233883, "grad_norm": 9.930912929646043, "learning_rate": 2.5027501725938037e-08, "loss": -1.0128, "step": 10476 }, { "epoch": 0.9358908514391622, "grad_norm": 6.510048947948664, "learning_rate": 2.488903998622849e-08, "loss": -1.0646, "step": 10478 }, { "epoch": 0.936069490654936, "grad_norm": 5.080174428693581, "learning_rate": 2.475095749482603e-08, "loss": -0.2711, "step": 10480 }, { "epoch": 0.9362481298707098, "grad_norm": 12.209799061214495, "learning_rate": 2.4613254305434815e-08, "loss": -0.5939, "step": 10482 }, { "epoch": 0.9364267690864837, "grad_norm": 7.588517760843723, "learning_rate": 2.447593047161167e-08, "loss": 0.8732, "step": 10484 }, { "epoch": 0.9366054083022576, "grad_norm": 10.84247047284842, "learning_rate": 2.4338986046765542e-08, "loss": -1.0311, "step": 10486 }, { "epoch": 0.9367840475180313, "grad_norm": 10.626303559165951, "learning_rate": 2.4202421084158265e-08, "loss": 0.8288, "step": 10488 }, { "epoch": 0.9369626867338052, "grad_norm": 8.180020942194837, "learning_rate": 2.4066235636903466e-08, "loss": -1.2259, "step": 10490 }, { "epoch": 0.9371413259495791, "grad_norm": 12.851954899974423, "learning_rate": 2.3930429757967662e-08, "loss": 0.1333, "step": 10492 }, { "epoch": 0.9373199651653529, "grad_norm": 4.374709201412572, "learning_rate": 2.379500350016983e-08, "loss": 0.3615, "step": 10494 }, { "epoch": 0.9374986043811268, "grad_norm": 5.865931553011776, "learning_rate": 2.3659956916180946e-08, "loss": 0.2886, "step": 10496 }, { "epoch": 0.9376772435969006, "grad_norm": 7.8644905809065575, "learning_rate": 2.352529005852433e-08, "loss": -0.4588, "step": 10498 }, { "epoch": 0.9378558828126744, "grad_norm": 4.728715693051644, "learning_rate": 2.3391002979575748e-08, "loss": -0.4251, "step": 10500 }, { "epoch": 0.9380345220284483, "grad_norm": 16.816469312454345, "learning_rate": 2.3257095731563535e-08, "loss": 0.3349, "step": 10502 }, { "epoch": 0.9382131612442222, "grad_norm": 24.355334364605195, "learning_rate": 2.3123568366567703e-08, "loss": -0.7314, "step": 10504 }, { "epoch": 0.9383918004599959, "grad_norm": 6.789449349324363, "learning_rate": 2.2990420936520927e-08, "loss": 0.6245, "step": 10506 }, { "epoch": 0.9385704396757698, "grad_norm": 12.559853649866096, "learning_rate": 2.2857653493208227e-08, "loss": 0.0706, "step": 10508 }, { "epoch": 0.9387490788915437, "grad_norm": 6.865735556919508, "learning_rate": 2.2725266088266305e-08, "loss": -0.0995, "step": 10510 }, { "epoch": 0.9389277181073175, "grad_norm": 8.986706983360765, "learning_rate": 2.259325877318452e-08, "loss": 1.178, "step": 10512 }, { "epoch": 0.9391063573230913, "grad_norm": 16.491844242342086, "learning_rate": 2.2461631599304144e-08, "loss": 0.0903, "step": 10514 }, { "epoch": 0.9392849965388652, "grad_norm": 3.6512537018085585, "learning_rate": 2.2330384617818998e-08, "loss": 0.5357, "step": 10516 }, { "epoch": 0.9394636357546391, "grad_norm": 5.85477390963639, "learning_rate": 2.2199517879774587e-08, "loss": 0.761, "step": 10518 }, { "epoch": 0.9396422749704129, "grad_norm": 16.389006004705276, "learning_rate": 2.2069031436068642e-08, "loss": -0.3284, "step": 10520 }, { "epoch": 0.9398209141861867, "grad_norm": 5.195731927677007, "learning_rate": 2.193892533745123e-08, "loss": -0.2058, "step": 10522 }, { "epoch": 0.9399995534019606, "grad_norm": 3.61568042181824, "learning_rate": 2.180919963452388e-08, "loss": 0.3138, "step": 10524 }, { "epoch": 0.9401781926177344, "grad_norm": 5.310901895141666, "learning_rate": 2.1679854377741115e-08, "loss": -0.2553, "step": 10526 }, { "epoch": 0.9403568318335083, "grad_norm": 6.052293818671504, "learning_rate": 2.1550889617408698e-08, "loss": -0.9219, "step": 10528 }, { "epoch": 0.9405354710492821, "grad_norm": 7.537877123051078, "learning_rate": 2.1422305403684838e-08, "loss": 1.0616, "step": 10530 }, { "epoch": 0.9407141102650559, "grad_norm": 8.146144643104904, "learning_rate": 2.129410178657931e-08, "loss": -0.7872, "step": 10532 }, { "epoch": 0.9408927494808298, "grad_norm": 15.43067985337343, "learning_rate": 2.116627881595434e-08, "loss": -0.5187, "step": 10534 }, { "epoch": 0.9410713886966037, "grad_norm": 14.582212801073137, "learning_rate": 2.103883654152394e-08, "loss": -0.0173, "step": 10536 }, { "epoch": 0.9412500279123774, "grad_norm": 13.617079366144091, "learning_rate": 2.0911775012853904e-08, "loss": -0.7769, "step": 10538 }, { "epoch": 0.9414286671281513, "grad_norm": 9.763086779515763, "learning_rate": 2.0785094279362038e-08, "loss": -0.4962, "step": 10540 }, { "epoch": 0.9416073063439252, "grad_norm": 11.239171899448445, "learning_rate": 2.0658794390318368e-08, "loss": -0.5206, "step": 10542 }, { "epoch": 0.941785945559699, "grad_norm": 9.847592720937485, "learning_rate": 2.053287539484405e-08, "loss": 0.8647, "step": 10544 }, { "epoch": 0.9419645847754728, "grad_norm": 9.775071212626232, "learning_rate": 2.04073373419128e-08, "loss": -0.1617, "step": 10546 }, { "epoch": 0.9421432239912467, "grad_norm": 7.574831406130358, "learning_rate": 2.0282180280350002e-08, "loss": -0.2599, "step": 10548 }, { "epoch": 0.9423218632070205, "grad_norm": 8.772522196661457, "learning_rate": 2.015740425883272e-08, "loss": 0.3717, "step": 10550 }, { "epoch": 0.9425005024227944, "grad_norm": 4.379110966166981, "learning_rate": 2.0033009325889806e-08, "loss": -0.6668, "step": 10552 }, { "epoch": 0.9426791416385683, "grad_norm": 5.907605212514105, "learning_rate": 1.9908995529902106e-08, "loss": -1.2208, "step": 10554 }, { "epoch": 0.942857780854342, "grad_norm": 8.753983816869644, "learning_rate": 1.9785362919101933e-08, "loss": 0.5667, "step": 10556 }, { "epoch": 0.9430364200701159, "grad_norm": 11.437810174457088, "learning_rate": 1.9662111541573712e-08, "loss": 0.4756, "step": 10558 }, { "epoch": 0.9432150592858898, "grad_norm": 5.829050435490517, "learning_rate": 1.9539241445253206e-08, "loss": -0.6209, "step": 10560 }, { "epoch": 0.9433936985016635, "grad_norm": 7.106825822165727, "learning_rate": 1.9416752677928307e-08, "loss": 0.552, "step": 10562 }, { "epoch": 0.9435723377174374, "grad_norm": 15.413719486648443, "learning_rate": 1.929464528723812e-08, "loss": -1.8178, "step": 10564 }, { "epoch": 0.9437509769332113, "grad_norm": 2.6064298535685717, "learning_rate": 1.9172919320673776e-08, "loss": 1.0083, "step": 10566 }, { "epoch": 0.9439296161489851, "grad_norm": 7.723299855420498, "learning_rate": 1.9051574825578067e-08, "loss": -1.3989, "step": 10568 }, { "epoch": 0.944108255364759, "grad_norm": 4.934723278508048, "learning_rate": 1.8930611849145127e-08, "loss": 0.1512, "step": 10570 }, { "epoch": 0.9442868945805328, "grad_norm": 13.544036251012843, "learning_rate": 1.8810030438421e-08, "loss": -0.1989, "step": 10572 }, { "epoch": 0.9444655337963066, "grad_norm": 10.56173071232402, "learning_rate": 1.868983064030316e-08, "loss": -0.1398, "step": 10574 }, { "epoch": 0.9446441730120805, "grad_norm": 5.31570642981932, "learning_rate": 1.8570012501540888e-08, "loss": 0.7927, "step": 10576 }, { "epoch": 0.9448228122278544, "grad_norm": 7.536541484663163, "learning_rate": 1.8450576068734568e-08, "loss": -0.4042, "step": 10578 }, { "epoch": 0.9450014514436281, "grad_norm": 10.702552590761131, "learning_rate": 1.8331521388336602e-08, "loss": 1.0176, "step": 10580 }, { "epoch": 0.945180090659402, "grad_norm": 6.023638888899897, "learning_rate": 1.8212848506650723e-08, "loss": -0.6308, "step": 10582 }, { "epoch": 0.9453587298751759, "grad_norm": 6.05614775737677, "learning_rate": 1.8094557469832128e-08, "loss": 0.2828, "step": 10584 }, { "epoch": 0.9455373690909497, "grad_norm": 3.6114150057998335, "learning_rate": 1.7976648323887568e-08, "loss": 0.6474, "step": 10586 }, { "epoch": 0.9457160083067235, "grad_norm": 12.877364154529428, "learning_rate": 1.7859121114675245e-08, "loss": 0.1353, "step": 10588 }, { "epoch": 0.9458946475224974, "grad_norm": 7.683506370245395, "learning_rate": 1.7741975887904937e-08, "loss": -0.0873, "step": 10590 }, { "epoch": 0.9460732867382712, "grad_norm": 9.94855503344169, "learning_rate": 1.7625212689137524e-08, "loss": -1.1387, "step": 10592 }, { "epoch": 0.9462519259540451, "grad_norm": 3.9977449974957686, "learning_rate": 1.75088315637858e-08, "loss": 0.902, "step": 10594 }, { "epoch": 0.9464305651698189, "grad_norm": 5.209202055643317, "learning_rate": 1.7392832557113433e-08, "loss": -0.59, "step": 10596 }, { "epoch": 0.9466092043855927, "grad_norm": 12.605349516208634, "learning_rate": 1.7277215714235905e-08, "loss": 0.0077, "step": 10598 }, { "epoch": 0.9467878436013666, "grad_norm": 3.613701675287216, "learning_rate": 1.7161981080119793e-08, "loss": 0.0071, "step": 10600 }, { "epoch": 0.9469664828171405, "grad_norm": 7.621464394257803, "learning_rate": 1.7047128699583025e-08, "loss": 0.0345, "step": 10602 }, { "epoch": 0.9471451220329142, "grad_norm": 10.715843407159225, "learning_rate": 1.6932658617295202e-08, "loss": 0.4603, "step": 10604 }, { "epoch": 0.9473237612486881, "grad_norm": 12.408972760888338, "learning_rate": 1.6818570877776718e-08, "loss": 0.111, "step": 10606 }, { "epoch": 0.947502400464462, "grad_norm": 6.600936083445618, "learning_rate": 1.6704865525399737e-08, "loss": 1.1279, "step": 10608 }, { "epoch": 0.9476810396802358, "grad_norm": 3.778622305350094, "learning_rate": 1.6591542604387442e-08, "loss": -0.3807, "step": 10610 }, { "epoch": 0.9478596788960096, "grad_norm": 11.177544453054079, "learning_rate": 1.6478602158814135e-08, "loss": -0.7156, "step": 10612 }, { "epoch": 0.9480383181117835, "grad_norm": 5.514671524240013, "learning_rate": 1.6366044232605792e-08, "loss": -0.6144, "step": 10614 }, { "epoch": 0.9482169573275573, "grad_norm": 12.43465279812673, "learning_rate": 1.62538688695395e-08, "loss": 0.5402, "step": 10616 }, { "epoch": 0.9483955965433312, "grad_norm": 9.20573722410951, "learning_rate": 1.6142076113243032e-08, "loss": 0.3353, "step": 10618 }, { "epoch": 0.948574235759105, "grad_norm": 8.113141605532704, "learning_rate": 1.6030666007196047e-08, "loss": -0.7578, "step": 10620 }, { "epoch": 0.9487528749748788, "grad_norm": 5.9985731335603365, "learning_rate": 1.5919638594729113e-08, "loss": -0.0326, "step": 10622 }, { "epoch": 0.9489315141906527, "grad_norm": 9.540422872132114, "learning_rate": 1.5808993919023793e-08, "loss": -0.7248, "step": 10624 }, { "epoch": 0.9491101534064266, "grad_norm": 10.292924647082934, "learning_rate": 1.5698732023113002e-08, "loss": -0.0596, "step": 10626 }, { "epoch": 0.9492887926222003, "grad_norm": 6.091280549315452, "learning_rate": 1.558885294988066e-08, "loss": 0.6323, "step": 10628 }, { "epoch": 0.9494674318379742, "grad_norm": 9.876285131343945, "learning_rate": 1.547935674206202e-08, "loss": 0.0604, "step": 10630 }, { "epoch": 0.9496460710537481, "grad_norm": 4.620427732607769, "learning_rate": 1.5370243442243137e-08, "loss": -0.5329, "step": 10632 }, { "epoch": 0.9498247102695219, "grad_norm": 7.0527860290444355, "learning_rate": 1.5261513092861167e-08, "loss": -0.1322, "step": 10634 }, { "epoch": 0.9500033494852957, "grad_norm": 7.8059298545115485, "learning_rate": 1.515316573620473e-08, "loss": -0.4499, "step": 10636 }, { "epoch": 0.9501819887010696, "grad_norm": 8.492953801959985, "learning_rate": 1.50452014144129e-08, "loss": -0.0463, "step": 10638 }, { "epoch": 0.9503606279168434, "grad_norm": 11.486806848179471, "learning_rate": 1.4937620169476194e-08, "loss": 0.1005, "step": 10640 }, { "epoch": 0.9505392671326173, "grad_norm": 3.257886121910556, "learning_rate": 1.4830422043235923e-08, "loss": 0.6906, "step": 10642 }, { "epoch": 0.9507179063483912, "grad_norm": 3.664921576479252, "learning_rate": 1.4723607077384626e-08, "loss": -0.9515, "step": 10644 }, { "epoch": 0.9508965455641649, "grad_norm": 6.572913055499536, "learning_rate": 1.4617175313465513e-08, "loss": -0.5003, "step": 10646 }, { "epoch": 0.9510751847799388, "grad_norm": 7.526863055081524, "learning_rate": 1.4511126792873029e-08, "loss": 0.792, "step": 10648 }, { "epoch": 0.9512538239957127, "grad_norm": 9.251003726422828, "learning_rate": 1.4405461556852405e-08, "loss": 1.0024, "step": 10650 }, { "epoch": 0.9514324632114866, "grad_norm": 8.589595942025772, "learning_rate": 1.4300179646499877e-08, "loss": -0.2697, "step": 10652 }, { "epoch": 0.9516111024272603, "grad_norm": 6.947488004242382, "learning_rate": 1.4195281102762579e-08, "loss": 1.0357, "step": 10654 }, { "epoch": 0.9517897416430342, "grad_norm": 9.05302323062621, "learning_rate": 1.4090765966438323e-08, "loss": 0.4635, "step": 10656 }, { "epoch": 0.9519683808588081, "grad_norm": 6.700915988538374, "learning_rate": 1.3986634278176367e-08, "loss": -0.0468, "step": 10658 }, { "epoch": 0.9521470200745819, "grad_norm": 12.904491240069833, "learning_rate": 1.3882886078476096e-08, "loss": -0.1851, "step": 10660 }, { "epoch": 0.9523256592903557, "grad_norm": 12.135693426073379, "learning_rate": 1.3779521407688455e-08, "loss": 0.6987, "step": 10662 }, { "epoch": 0.9525042985061296, "grad_norm": 17.04515183058615, "learning_rate": 1.3676540306014839e-08, "loss": -1.5843, "step": 10664 }, { "epoch": 0.9526829377219034, "grad_norm": 5.298163963283502, "learning_rate": 1.357394281350721e-08, "loss": -1.5494, "step": 10666 }, { "epoch": 0.9528615769376773, "grad_norm": 13.221051532056036, "learning_rate": 1.3471728970068985e-08, "loss": -0.3167, "step": 10668 }, { "epoch": 0.9530402161534511, "grad_norm": 9.870319271160808, "learning_rate": 1.3369898815454028e-08, "loss": -1.1798, "step": 10670 }, { "epoch": 0.9532188553692249, "grad_norm": 6.461421039757999, "learning_rate": 1.3268452389266771e-08, "loss": 0.1585, "step": 10672 }, { "epoch": 0.9533974945849988, "grad_norm": 11.025830199109265, "learning_rate": 1.3167389730962652e-08, "loss": 0.0796, "step": 10674 }, { "epoch": 0.9535761338007727, "grad_norm": 10.629957411385574, "learning_rate": 1.3066710879847898e-08, "loss": -1.2342, "step": 10676 }, { "epoch": 0.9537547730165464, "grad_norm": 5.7976991627951335, "learning_rate": 1.2966415875079295e-08, "loss": 0.7963, "step": 10678 }, { "epoch": 0.9539334122323203, "grad_norm": 11.979989215419291, "learning_rate": 1.2866504755664532e-08, "loss": -0.6824, "step": 10680 }, { "epoch": 0.9541120514480942, "grad_norm": 11.353432802521324, "learning_rate": 1.2766977560461634e-08, "loss": -0.8993, "step": 10682 }, { "epoch": 0.954290690663868, "grad_norm": 5.632499104113281, "learning_rate": 1.2667834328179861e-08, "loss": -0.2496, "step": 10684 }, { "epoch": 0.9544693298796418, "grad_norm": 8.515888399022698, "learning_rate": 1.256907509737859e-08, "loss": -0.0294, "step": 10686 }, { "epoch": 0.9546479690954157, "grad_norm": 8.321482458823068, "learning_rate": 1.2470699906468096e-08, "loss": 0.7081, "step": 10688 }, { "epoch": 0.9548266083111895, "grad_norm": 3.8000857745076915, "learning_rate": 1.2372708793709552e-08, "loss": -0.1538, "step": 10690 }, { "epoch": 0.9550052475269634, "grad_norm": 8.86664456625266, "learning_rate": 1.2275101797214139e-08, "loss": -0.7377, "step": 10692 }, { "epoch": 0.9551838867427372, "grad_norm": 5.094879212449803, "learning_rate": 1.2177878954944044e-08, "loss": 0.5318, "step": 10694 }, { "epoch": 0.955362525958511, "grad_norm": 15.491317778474233, "learning_rate": 1.2081040304712242e-08, "loss": -0.8841, "step": 10696 }, { "epoch": 0.9555411651742849, "grad_norm": 13.843051178044844, "learning_rate": 1.1984585884181719e-08, "loss": 0.756, "step": 10698 }, { "epoch": 0.9557198043900588, "grad_norm": 10.579490584745319, "learning_rate": 1.1888515730866578e-08, "loss": 0.5358, "step": 10700 }, { "epoch": 0.9558984436058325, "grad_norm": 3.1212563024784936, "learning_rate": 1.179282988213104e-08, "loss": -0.0774, "step": 10702 }, { "epoch": 0.9560770828216064, "grad_norm": 6.088514690703803, "learning_rate": 1.1697528375190225e-08, "loss": 0.2335, "step": 10704 }, { "epoch": 0.9562557220373803, "grad_norm": 12.297675333310586, "learning_rate": 1.1602611247109373e-08, "loss": -0.0751, "step": 10706 }, { "epoch": 0.9564343612531541, "grad_norm": 10.14416272915542, "learning_rate": 1.1508078534804622e-08, "loss": -0.5465, "step": 10708 }, { "epoch": 0.9566130004689279, "grad_norm": 5.176750719935955, "learning_rate": 1.1413930275042449e-08, "loss": -0.0352, "step": 10710 }, { "epoch": 0.9567916396847018, "grad_norm": 26.528481203065628, "learning_rate": 1.1320166504439566e-08, "loss": 0.006, "step": 10712 }, { "epoch": 0.9569702789004756, "grad_norm": 11.2398530764497, "learning_rate": 1.1226787259463578e-08, "loss": 0.428, "step": 10714 }, { "epoch": 0.9571489181162495, "grad_norm": 12.523365003884052, "learning_rate": 1.113379257643221e-08, "loss": 0.093, "step": 10716 }, { "epoch": 0.9573275573320233, "grad_norm": 8.166733358720695, "learning_rate": 1.1041182491513868e-08, "loss": 0.0466, "step": 10718 }, { "epoch": 0.9575061965477971, "grad_norm": 6.966232871584367, "learning_rate": 1.094895704072707e-08, "loss": -1.3445, "step": 10720 }, { "epoch": 0.957684835763571, "grad_norm": 6.158342499612156, "learning_rate": 1.0857116259940902e-08, "loss": 0.1543, "step": 10722 }, { "epoch": 0.9578634749793449, "grad_norm": 11.188566189047604, "learning_rate": 1.0765660184875124e-08, "loss": 0.7712, "step": 10724 }, { "epoch": 0.9580421141951186, "grad_norm": 6.726459001726265, "learning_rate": 1.0674588851099176e-08, "loss": -0.2607, "step": 10726 }, { "epoch": 0.9582207534108925, "grad_norm": 8.787475551175092, "learning_rate": 1.0583902294033608e-08, "loss": 0.1827, "step": 10728 }, { "epoch": 0.9583993926266664, "grad_norm": 5.213993851915956, "learning_rate": 1.0493600548948877e-08, "loss": -0.8258, "step": 10730 }, { "epoch": 0.9585780318424402, "grad_norm": 6.9211687196985165, "learning_rate": 1.0403683650965888e-08, "loss": 0.7741, "step": 10732 }, { "epoch": 0.958756671058214, "grad_norm": 5.171748318229945, "learning_rate": 1.0314151635056001e-08, "loss": -0.393, "step": 10734 }, { "epoch": 0.9589353102739879, "grad_norm": 11.780930087251308, "learning_rate": 1.0225004536040471e-08, "loss": -0.1355, "step": 10736 }, { "epoch": 0.9591139494897617, "grad_norm": 15.173802859299663, "learning_rate": 1.0136242388591454e-08, "loss": -0.7626, "step": 10738 }, { "epoch": 0.9592925887055356, "grad_norm": 3.7338863037077217, "learning_rate": 1.004786522723089e-08, "loss": -0.2588, "step": 10740 }, { "epoch": 0.9594712279213095, "grad_norm": 5.374533699568081, "learning_rate": 9.959873086331172e-09, "loss": -0.9071, "step": 10742 }, { "epoch": 0.9596498671370832, "grad_norm": 7.610133953745184, "learning_rate": 9.872266000114926e-09, "loss": -0.7793, "step": 10744 }, { "epoch": 0.9598285063528571, "grad_norm": 12.325951368897961, "learning_rate": 9.785044002655118e-09, "loss": -0.5145, "step": 10746 }, { "epoch": 0.960007145568631, "grad_norm": 4.217604644146506, "learning_rate": 9.698207127874835e-09, "loss": 0.0407, "step": 10748 }, { "epoch": 0.9601857847844047, "grad_norm": 5.383041044789885, "learning_rate": 9.611755409547284e-09, "loss": 0.7733, "step": 10750 }, { "epoch": 0.9603644240001786, "grad_norm": 4.8822607110096605, "learning_rate": 9.525688881296123e-09, "loss": 1.005, "step": 10752 }, { "epoch": 0.9605430632159525, "grad_norm": 3.4163362451826846, "learning_rate": 9.440007576595022e-09, "loss": -0.8663, "step": 10754 }, { "epoch": 0.9607217024317263, "grad_norm": 6.976857759009648, "learning_rate": 9.354711528767767e-09, "loss": -1.5166, "step": 10756 }, { "epoch": 0.9609003416475002, "grad_norm": 7.15336690737811, "learning_rate": 9.269800770988712e-09, "loss": 0.2181, "step": 10758 }, { "epoch": 0.961078980863274, "grad_norm": 10.985131705687856, "learning_rate": 9.185275336281772e-09, "loss": -1.6642, "step": 10760 }, { "epoch": 0.9612576200790478, "grad_norm": 2.9362400680640346, "learning_rate": 9.10113525752132e-09, "loss": -0.2892, "step": 10762 }, { "epoch": 0.9614362592948217, "grad_norm": 8.711967427903053, "learning_rate": 9.017380567431953e-09, "loss": -0.0881, "step": 10764 }, { "epoch": 0.9616148985105956, "grad_norm": 11.633266996032393, "learning_rate": 8.934011298588062e-09, "loss": -0.5356, "step": 10766 }, { "epoch": 0.9617935377263693, "grad_norm": 6.656792703631464, "learning_rate": 8.851027483414486e-09, "loss": 0.473, "step": 10768 }, { "epoch": 0.9619721769421432, "grad_norm": 4.478438538504695, "learning_rate": 8.768429154185853e-09, "loss": 0.2716, "step": 10770 }, { "epoch": 0.9621508161579171, "grad_norm": 9.068884974605048, "learning_rate": 8.686216343027242e-09, "loss": -0.0352, "step": 10772 }, { "epoch": 0.9623294553736909, "grad_norm": 14.569163095581848, "learning_rate": 8.604389081913188e-09, "loss": 0.1745, "step": 10774 }, { "epoch": 0.9625080945894647, "grad_norm": 7.53499399408963, "learning_rate": 8.52294740266879e-09, "loss": -0.5236, "step": 10776 }, { "epoch": 0.9626867338052386, "grad_norm": 9.323271595824703, "learning_rate": 8.441891336969265e-09, "loss": -0.839, "step": 10778 }, { "epoch": 0.9628653730210124, "grad_norm": 5.585324383146052, "learning_rate": 8.361220916339173e-09, "loss": -1.2163, "step": 10780 }, { "epoch": 0.9630440122367863, "grad_norm": 3.427462401291671, "learning_rate": 8.280936172153862e-09, "loss": 0.0299, "step": 10782 }, { "epoch": 0.9632226514525601, "grad_norm": 9.443274745936714, "learning_rate": 8.201037135638245e-09, "loss": -0.171, "step": 10784 }, { "epoch": 0.963401290668334, "grad_norm": 4.61465513749011, "learning_rate": 8.121523837867238e-09, "loss": -0.5962, "step": 10786 }, { "epoch": 0.9635799298841078, "grad_norm": 7.238975034630383, "learning_rate": 8.042396309765998e-09, "loss": 0.806, "step": 10788 }, { "epoch": 0.9637585690998817, "grad_norm": 5.26837412442558, "learning_rate": 7.963654582109347e-09, "loss": -0.6238, "step": 10790 }, { "epoch": 0.9639372083156555, "grad_norm": 14.401726598012694, "learning_rate": 7.885298685522235e-09, "loss": -0.3869, "step": 10792 }, { "epoch": 0.9641158475314293, "grad_norm": 5.098948628380969, "learning_rate": 7.807328650479394e-09, "loss": -0.2621, "step": 10794 }, { "epoch": 0.9642944867472032, "grad_norm": 9.14325174263507, "learning_rate": 7.729744507305568e-09, "loss": 0.2508, "step": 10796 }, { "epoch": 0.9644731259629771, "grad_norm": 10.151829341406936, "learning_rate": 7.65254628617562e-09, "loss": -2.0322, "step": 10798 }, { "epoch": 0.9646517651787508, "grad_norm": 7.468756895181833, "learning_rate": 7.575734017113866e-09, "loss": 0.7425, "step": 10800 }, { "epoch": 0.9648304043945247, "grad_norm": 4.364508556129022, "learning_rate": 7.499307729995075e-09, "loss": 0.0956, "step": 10802 }, { "epoch": 0.9650090436102986, "grad_norm": 7.770274560660445, "learning_rate": 7.423267454543358e-09, "loss": -0.4164, "step": 10804 }, { "epoch": 0.9651876828260724, "grad_norm": 5.591806596273859, "learning_rate": 7.34761322033306e-09, "loss": 0.7689, "step": 10806 }, { "epoch": 0.9653663220418462, "grad_norm": 7.779795560528763, "learning_rate": 7.272345056788199e-09, "loss": -0.2913, "step": 10808 }, { "epoch": 0.9655449612576201, "grad_norm": 10.647263840767819, "learning_rate": 7.19746299318269e-09, "loss": 0.2496, "step": 10810 }, { "epoch": 0.9657236004733939, "grad_norm": 7.353094381194883, "learning_rate": 7.12296705864035e-09, "loss": -0.3785, "step": 10812 }, { "epoch": 0.9659022396891678, "grad_norm": 1.874373331250462, "learning_rate": 7.048857282134668e-09, "loss": 0.2639, "step": 10814 }, { "epoch": 0.9660808789049417, "grad_norm": 3.449760489511925, "learning_rate": 6.975133692489144e-09, "loss": 0.3758, "step": 10816 }, { "epoch": 0.9662595181207154, "grad_norm": 10.09405195888799, "learning_rate": 6.901796318376951e-09, "loss": 0.2494, "step": 10818 }, { "epoch": 0.9664381573364893, "grad_norm": 5.63962636968085, "learning_rate": 6.828845188321053e-09, "loss": 0.394, "step": 10820 }, { "epoch": 0.9666167965522632, "grad_norm": 11.782010243623937, "learning_rate": 6.756280330694197e-09, "loss": -0.7758, "step": 10822 }, { "epoch": 0.966795435768037, "grad_norm": 6.879480739781415, "learning_rate": 6.684101773718809e-09, "loss": -0.7123, "step": 10824 }, { "epoch": 0.9669740749838108, "grad_norm": 6.224207338169688, "learning_rate": 6.6123095454675426e-09, "loss": 0.1727, "step": 10826 }, { "epoch": 0.9671527141995847, "grad_norm": 4.5688004560071915, "learning_rate": 6.5409036738619525e-09, "loss": -0.5189, "step": 10828 }, { "epoch": 0.9673313534153585, "grad_norm": 4.974721139731434, "learning_rate": 6.469884186674157e-09, "loss": 0.6486, "step": 10830 }, { "epoch": 0.9675099926311324, "grad_norm": 10.028176095177948, "learning_rate": 6.399251111525505e-09, "loss": 0.5301, "step": 10832 }, { "epoch": 0.9676886318469062, "grad_norm": 15.169349405197083, "learning_rate": 6.329004475887245e-09, "loss": -0.317, "step": 10834 }, { "epoch": 0.96786727106268, "grad_norm": 5.6766703697516565, "learning_rate": 6.2591443070801884e-09, "loss": 0.4762, "step": 10836 }, { "epoch": 0.9680459102784539, "grad_norm": 6.037182356098021, "learning_rate": 6.189670632275157e-09, "loss": 0.1132, "step": 10838 }, { "epoch": 0.9682245494942278, "grad_norm": 10.75934567188495, "learning_rate": 6.120583478492092e-09, "loss": -0.6846, "step": 10840 }, { "epoch": 0.9684031887100015, "grad_norm": 4.249354369546437, "learning_rate": 6.051882872601277e-09, "loss": 0.7473, "step": 10842 }, { "epoch": 0.9685818279257754, "grad_norm": 11.198655688125758, "learning_rate": 5.983568841322228e-09, "loss": -0.5116, "step": 10844 }, { "epoch": 0.9687604671415493, "grad_norm": 6.955778220082398, "learning_rate": 5.915641411224137e-09, "loss": 0.5152, "step": 10846 }, { "epoch": 0.968939106357323, "grad_norm": 9.624978994371356, "learning_rate": 5.848100608725981e-09, "loss": -0.7437, "step": 10848 }, { "epoch": 0.9691177455730969, "grad_norm": 16.2792002143576, "learning_rate": 5.780946460096192e-09, "loss": -0.3415, "step": 10850 }, { "epoch": 0.9692963847888708, "grad_norm": 25.606265779452666, "learning_rate": 5.714178991452989e-09, "loss": 0.1457, "step": 10852 }, { "epoch": 0.9694750240046446, "grad_norm": 5.001996976696689, "learning_rate": 5.647798228764156e-09, "loss": -0.6126, "step": 10854 }, { "epoch": 0.9696536632204185, "grad_norm": 6.949066417754916, "learning_rate": 5.5818041978470395e-09, "loss": 0.1387, "step": 10856 }, { "epoch": 0.9698323024361923, "grad_norm": 2.1264605271375534, "learning_rate": 5.516196924368666e-09, "loss": 0.8729, "step": 10858 }, { "epoch": 0.9700109416519661, "grad_norm": 4.301440002271291, "learning_rate": 5.4509764338454e-09, "loss": 0.2754, "step": 10860 }, { "epoch": 0.97018958086774, "grad_norm": 10.856058396409352, "learning_rate": 5.386142751643508e-09, "loss": -0.3456, "step": 10862 }, { "epoch": 0.9703682200835139, "grad_norm": 4.479728817225314, "learning_rate": 5.321695902978373e-09, "loss": -0.526, "step": 10864 }, { "epoch": 0.9705468592992876, "grad_norm": 14.638917218799593, "learning_rate": 5.257635912915614e-09, "loss": -0.4745, "step": 10866 }, { "epoch": 0.9707254985150615, "grad_norm": 7.188718476880545, "learning_rate": 5.193962806369634e-09, "loss": 0.1141, "step": 10868 }, { "epoch": 0.9709041377308354, "grad_norm": 8.413539292867014, "learning_rate": 5.130676608104845e-09, "loss": 0.5127, "step": 10870 }, { "epoch": 0.9710827769466092, "grad_norm": 21.733729799238972, "learning_rate": 5.067777342735113e-09, "loss": -0.6117, "step": 10872 }, { "epoch": 0.971261416162383, "grad_norm": 7.070169521171715, "learning_rate": 5.005265034723538e-09, "loss": -0.1446, "step": 10874 }, { "epoch": 0.9714400553781569, "grad_norm": 6.927135801953154, "learning_rate": 4.943139708383115e-09, "loss": 0.2505, "step": 10876 }, { "epoch": 0.9716186945939307, "grad_norm": 15.834596657128508, "learning_rate": 4.881401387876071e-09, "loss": -2.0157, "step": 10878 }, { "epoch": 0.9717973338097046, "grad_norm": 15.677282159332348, "learning_rate": 4.820050097214312e-09, "loss": -0.3954, "step": 10880 }, { "epoch": 0.9719759730254784, "grad_norm": 4.4965202864981775, "learning_rate": 4.75908586025886e-09, "loss": -0.017, "step": 10882 }, { "epoch": 0.9721546122412522, "grad_norm": 8.977079238825747, "learning_rate": 4.698508700720638e-09, "loss": -0.163, "step": 10884 }, { "epoch": 0.9723332514570261, "grad_norm": 7.114981048623083, "learning_rate": 4.638318642159689e-09, "loss": -0.3765, "step": 10886 }, { "epoch": 0.9725118906728, "grad_norm": 11.634954968714895, "learning_rate": 4.578515707985731e-09, "loss": -0.8692, "step": 10888 }, { "epoch": 0.9726905298885737, "grad_norm": 9.137218081695377, "learning_rate": 4.5190999214578255e-09, "loss": -0.1957, "step": 10890 }, { "epoch": 0.9728691691043476, "grad_norm": 10.769496537167488, "learning_rate": 4.460071305684376e-09, "loss": -0.8314, "step": 10892 }, { "epoch": 0.9730478083201215, "grad_norm": 10.74177946668241, "learning_rate": 4.401429883623353e-09, "loss": 0.2019, "step": 10894 }, { "epoch": 0.9732264475358953, "grad_norm": 12.196803840587961, "learning_rate": 4.3431756780819564e-09, "loss": 1.0964, "step": 10896 }, { "epoch": 0.9734050867516691, "grad_norm": 6.5048501919173445, "learning_rate": 4.2853087117169505e-09, "loss": -0.7406, "step": 10898 }, { "epoch": 0.973583725967443, "grad_norm": 4.128281184208347, "learning_rate": 4.2278290070346665e-09, "loss": -0.8194, "step": 10900 }, { "epoch": 0.9737623651832168, "grad_norm": 3.209481087689081, "learning_rate": 4.170736586390222e-09, "loss": -0.1576, "step": 10902 }, { "epoch": 0.9739410043989907, "grad_norm": 22.083285733537537, "learning_rate": 4.1140314719887434e-09, "loss": 0.1711, "step": 10904 }, { "epoch": 0.9741196436147646, "grad_norm": 9.21538603878047, "learning_rate": 4.0577136858843676e-09, "loss": -0.1073, "step": 10906 }, { "epoch": 0.9742982828305383, "grad_norm": 11.65925522586563, "learning_rate": 4.0017832499805724e-09, "loss": -0.5899, "step": 10908 }, { "epoch": 0.9744769220463122, "grad_norm": 10.041343293318745, "learning_rate": 3.946240186030514e-09, "loss": 0.4535, "step": 10910 }, { "epoch": 0.9746555612620861, "grad_norm": 6.299621740217758, "learning_rate": 3.891084515636245e-09, "loss": -0.7919, "step": 10912 }, { "epoch": 0.9748342004778598, "grad_norm": 11.959869038046635, "learning_rate": 3.836316260249606e-09, "loss": 0.2113, "step": 10914 }, { "epoch": 0.9750128396936337, "grad_norm": 12.33892496183793, "learning_rate": 3.7819354411713355e-09, "loss": -1.1883, "step": 10916 }, { "epoch": 0.9751914789094076, "grad_norm": 7.781499039061939, "learning_rate": 3.727942079551627e-09, "loss": -0.4536, "step": 10918 }, { "epoch": 0.9753701181251815, "grad_norm": 8.68800143983383, "learning_rate": 3.6743361963902376e-09, "loss": 0.2364, "step": 10920 }, { "epoch": 0.9755487573409553, "grad_norm": 10.909534211254988, "learning_rate": 3.6211178125359343e-09, "loss": 0.0044, "step": 10922 }, { "epoch": 0.9757273965567291, "grad_norm": 5.118305206536228, "learning_rate": 3.5682869486868274e-09, "loss": -0.8956, "step": 10924 }, { "epoch": 0.975906035772503, "grad_norm": 7.545028808215173, "learning_rate": 3.515843625390369e-09, "loss": -0.3428, "step": 10926 }, { "epoch": 0.9760846749882768, "grad_norm": 9.493289384347289, "learning_rate": 3.4637878630431326e-09, "loss": -1.1621, "step": 10928 }, { "epoch": 0.9762633142040507, "grad_norm": 8.150546714372236, "learning_rate": 3.412119681891257e-09, "loss": 0.0183, "step": 10930 }, { "epoch": 0.9764419534198245, "grad_norm": 7.860562608302472, "learning_rate": 3.360839102029778e-09, "loss": 0.1943, "step": 10932 }, { "epoch": 0.9766205926355983, "grad_norm": 4.055887092613924, "learning_rate": 3.309946143403408e-09, "loss": 0.8141, "step": 10934 }, { "epoch": 0.9767992318513722, "grad_norm": 7.0540713086548745, "learning_rate": 3.259440825805648e-09, "loss": -0.1014, "step": 10936 }, { "epoch": 0.9769778710671461, "grad_norm": 7.665287336026462, "learning_rate": 3.209323168879563e-09, "loss": -0.3637, "step": 10938 }, { "epoch": 0.9771565102829198, "grad_norm": 6.889486690274446, "learning_rate": 3.159593192117338e-09, "loss": -0.7691, "step": 10940 }, { "epoch": 0.9773351494986937, "grad_norm": 7.8209056343499155, "learning_rate": 3.1102509148602794e-09, "loss": 0.6456, "step": 10942 }, { "epoch": 0.9775137887144676, "grad_norm": 4.469673968208428, "learning_rate": 3.061296356299037e-09, "loss": 0.5379, "step": 10944 }, { "epoch": 0.9776924279302414, "grad_norm": 11.077715910553126, "learning_rate": 3.0127295354734905e-09, "loss": -1.2877, "step": 10946 }, { "epoch": 0.9778710671460152, "grad_norm": 21.585139411368257, "learning_rate": 2.964550471272642e-09, "loss": 0.463, "step": 10948 }, { "epoch": 0.9780497063617891, "grad_norm": 17.034717787724627, "learning_rate": 2.916759182434503e-09, "loss": 1.3567, "step": 10950 }, { "epoch": 0.9782283455775629, "grad_norm": 6.108655023712705, "learning_rate": 2.8693556875467595e-09, "loss": -1.3236, "step": 10952 }, { "epoch": 0.9784069847933368, "grad_norm": 17.43997185801064, "learning_rate": 2.8223400050457757e-09, "loss": -0.9225, "step": 10954 }, { "epoch": 0.9785856240091106, "grad_norm": 6.510174930407916, "learning_rate": 2.775712153217369e-09, "loss": 0.1306, "step": 10956 }, { "epoch": 0.9787642632248844, "grad_norm": 9.486343204705433, "learning_rate": 2.7294721501963656e-09, "loss": -0.2062, "step": 10958 }, { "epoch": 0.9789429024406583, "grad_norm": 15.357060581126285, "learning_rate": 2.6836200139668254e-09, "loss": -1.2625, "step": 10960 }, { "epoch": 0.9791215416564322, "grad_norm": 17.068845581993912, "learning_rate": 2.6381557623620375e-09, "loss": -1.4281, "step": 10962 }, { "epoch": 0.9793001808722059, "grad_norm": 8.591159499474454, "learning_rate": 2.593079413064192e-09, "loss": -0.6589, "step": 10964 }, { "epoch": 0.9794788200879798, "grad_norm": 17.20091184816093, "learning_rate": 2.5483909836049312e-09, "loss": -0.8748, "step": 10966 }, { "epoch": 0.9796574593037537, "grad_norm": 4.222586010993649, "learning_rate": 2.5040904913646856e-09, "loss": 0.8351, "step": 10968 }, { "epoch": 0.9798360985195275, "grad_norm": 5.142036406558405, "learning_rate": 2.460177953573339e-09, "loss": 0.7436, "step": 10970 }, { "epoch": 0.9800147377353013, "grad_norm": 6.222496859226683, "learning_rate": 2.416653387309564e-09, "loss": -0.6561, "step": 10972 }, { "epoch": 0.9801933769510752, "grad_norm": 8.461437038000511, "learning_rate": 2.3735168095013746e-09, "loss": 0.3992, "step": 10974 }, { "epoch": 0.980372016166849, "grad_norm": 7.768439721120554, "learning_rate": 2.3307682369257955e-09, "loss": -0.2503, "step": 10976 }, { "epoch": 0.9805506553826229, "grad_norm": 15.14770871200106, "learning_rate": 2.2884076862089707e-09, "loss": 0.6292, "step": 10978 }, { "epoch": 0.9807292945983968, "grad_norm": 5.317076742367794, "learning_rate": 2.2464351738261667e-09, "loss": -0.3447, "step": 10980 }, { "epoch": 0.9809079338141705, "grad_norm": 5.524425024433397, "learning_rate": 2.204850716101547e-09, "loss": 0.0565, "step": 10982 }, { "epoch": 0.9810865730299444, "grad_norm": 2.1583725806636003, "learning_rate": 2.1636543292086195e-09, "loss": -2.0503, "step": 10984 }, { "epoch": 0.9812652122457183, "grad_norm": 11.70691133780764, "learning_rate": 2.1228460291697893e-09, "loss": 0.0411, "step": 10986 }, { "epoch": 0.981443851461492, "grad_norm": 6.4106468928810205, "learning_rate": 2.0824258318565825e-09, "loss": -0.0603, "step": 10988 }, { "epoch": 0.9816224906772659, "grad_norm": 5.103835903531361, "learning_rate": 2.042393752989424e-09, "loss": 0.4448, "step": 10990 }, { "epoch": 0.9818011298930398, "grad_norm": 5.968991579289823, "learning_rate": 2.002749808138193e-09, "loss": -1.6013, "step": 10992 }, { "epoch": 0.9819797691088136, "grad_norm": 9.507408465763069, "learning_rate": 1.963494012721223e-09, "loss": 0.3484, "step": 10994 }, { "epoch": 0.9821584083245875, "grad_norm": 9.447926401780288, "learning_rate": 1.924626382006411e-09, "loss": -0.3786, "step": 10996 }, { "epoch": 0.9823370475403613, "grad_norm": 4.599786438338231, "learning_rate": 1.886146931110444e-09, "loss": 0.3843, "step": 10998 }, { "epoch": 0.9825156867561351, "grad_norm": 3.0385616252576284, "learning_rate": 1.8480556749991271e-09, "loss": 0.9107, "step": 11000 }, { "epoch": 0.982694325971909, "grad_norm": 10.9401998620894, "learning_rate": 1.8103526284870552e-09, "loss": -1.5925, "step": 11002 }, { "epoch": 0.9828729651876829, "grad_norm": 10.45555955393768, "learning_rate": 1.7730378062381646e-09, "loss": 0.2465, "step": 11004 }, { "epoch": 0.9830516044034566, "grad_norm": 4.755994327204909, "learning_rate": 1.7361112227651798e-09, "loss": 0.2021, "step": 11006 }, { "epoch": 0.9832302436192305, "grad_norm": 11.092572216858922, "learning_rate": 1.6995728924299456e-09, "loss": -0.855, "step": 11008 }, { "epoch": 0.9834088828350044, "grad_norm": 5.361883794063885, "learning_rate": 1.6634228294433172e-09, "loss": -0.659, "step": 11010 }, { "epoch": 0.9835875220507782, "grad_norm": 11.631050750157286, "learning_rate": 1.6276610478648256e-09, "loss": 0.135, "step": 11012 }, { "epoch": 0.983766161266552, "grad_norm": 5.168423610285485, "learning_rate": 1.592287561603567e-09, "loss": -0.6268, "step": 11014 }, { "epoch": 0.9839448004823259, "grad_norm": 9.743473705316365, "learning_rate": 1.5573023844169807e-09, "loss": 0.097, "step": 11016 }, { "epoch": 0.9841234396980997, "grad_norm": 20.290881675037944, "learning_rate": 1.5227055299120717e-09, "loss": -0.7961, "step": 11018 }, { "epoch": 0.9843020789138736, "grad_norm": 4.1858505872223635, "learning_rate": 1.4884970115444096e-09, "loss": -0.9782, "step": 11020 }, { "epoch": 0.9844807181296474, "grad_norm": 7.626256370222892, "learning_rate": 1.454676842618685e-09, "loss": -0.9681, "step": 11022 }, { "epoch": 0.9846593573454212, "grad_norm": 13.352643017826974, "learning_rate": 1.4212450362883765e-09, "loss": -0.3139, "step": 11024 }, { "epoch": 0.9848379965611951, "grad_norm": 5.863675179867165, "learning_rate": 1.388201605556305e-09, "loss": -0.0556, "step": 11026 }, { "epoch": 0.985016635776969, "grad_norm": 12.55449789838163, "learning_rate": 1.355546563273746e-09, "loss": -0.7406, "step": 11028 }, { "epoch": 0.9851952749927427, "grad_norm": 13.024409339676826, "learning_rate": 1.3232799221414292e-09, "loss": -0.1412, "step": 11030 }, { "epoch": 0.9853739142085166, "grad_norm": 6.728082181703044, "learning_rate": 1.2914016947086493e-09, "loss": 0.2833, "step": 11032 }, { "epoch": 0.9855525534242905, "grad_norm": 10.195732675130063, "learning_rate": 1.2599118933737107e-09, "loss": -1.2488, "step": 11034 }, { "epoch": 0.9857311926400643, "grad_norm": 12.194371560433373, "learning_rate": 1.228810530383817e-09, "loss": -0.5336, "step": 11036 }, { "epoch": 0.9859098318558381, "grad_norm": 3.6078778749592004, "learning_rate": 1.198097617835403e-09, "loss": 0.1308, "step": 11038 }, { "epoch": 0.986088471071612, "grad_norm": 7.139118594948829, "learning_rate": 1.1677731676733581e-09, "loss": -0.1591, "step": 11040 }, { "epoch": 0.9862671102873858, "grad_norm": 16.220818712900925, "learning_rate": 1.137837191691915e-09, "loss": 0.0999, "step": 11042 }, { "epoch": 0.9864457495031597, "grad_norm": 5.286714514427241, "learning_rate": 1.108289701533982e-09, "loss": -0.7614, "step": 11044 }, { "epoch": 0.9866243887189335, "grad_norm": 9.151118692686524, "learning_rate": 1.079130708691256e-09, "loss": -0.1344, "step": 11046 }, { "epoch": 0.9868030279347074, "grad_norm": 7.922616295675682, "learning_rate": 1.050360224504665e-09, "loss": -0.2105, "step": 11048 }, { "epoch": 0.9869816671504812, "grad_norm": 7.408949219284442, "learning_rate": 1.0219782601638137e-09, "loss": 0.7879, "step": 11050 }, { "epoch": 0.9871603063662551, "grad_norm": 14.80738194890184, "learning_rate": 9.93984826707317e-10, "loss": 0.4491, "step": 11052 }, { "epoch": 0.987338945582029, "grad_norm": 9.763023090879528, "learning_rate": 9.663799350224655e-10, "loss": 0.3755, "step": 11054 }, { "epoch": 0.9875175847978027, "grad_norm": 8.674799795341551, "learning_rate": 9.39163595845671e-10, "loss": 0.1487, "step": 11056 }, { "epoch": 0.9876962240135766, "grad_norm": 19.225787107231216, "learning_rate": 9.123358197621333e-10, "loss": -0.0409, "step": 11058 }, { "epoch": 0.9878748632293505, "grad_norm": 4.102444334146474, "learning_rate": 8.858966172059501e-10, "loss": -0.7279, "step": 11060 }, { "epoch": 0.9880535024451242, "grad_norm": 15.705603135461969, "learning_rate": 8.59845998460118e-10, "loss": -0.2768, "step": 11062 }, { "epoch": 0.9882321416608981, "grad_norm": 6.532229993388432, "learning_rate": 8.341839736563106e-10, "loss": -0.5822, "step": 11064 }, { "epoch": 0.988410780876672, "grad_norm": 8.526827493820607, "learning_rate": 8.089105527754325e-10, "loss": -0.0271, "step": 11066 }, { "epoch": 0.9885894200924458, "grad_norm": 13.97562848419778, "learning_rate": 7.840257456468436e-10, "loss": -0.6748, "step": 11068 }, { "epoch": 0.9887680593082196, "grad_norm": 4.183882332046182, "learning_rate": 7.595295619490238e-10, "loss": -0.4046, "step": 11070 }, { "epoch": 0.9889466985239935, "grad_norm": 9.943065308519913, "learning_rate": 7.354220112092413e-10, "loss": -0.9186, "step": 11072 }, { "epoch": 0.9891253377397673, "grad_norm": 5.581033925896422, "learning_rate": 7.117031028036624e-10, "loss": -0.4947, "step": 11074 }, { "epoch": 0.9893039769555412, "grad_norm": 4.363095900270923, "learning_rate": 6.883728459571303e-10, "loss": 0.297, "step": 11076 }, { "epoch": 0.989482616171315, "grad_norm": 7.971118051562363, "learning_rate": 6.654312497434977e-10, "loss": -0.0866, "step": 11078 }, { "epoch": 0.9896612553870888, "grad_norm": 18.829097485258686, "learning_rate": 6.428783230854051e-10, "loss": -1.1787, "step": 11080 }, { "epoch": 0.9898398946028627, "grad_norm": 4.596045718411376, "learning_rate": 6.207140747542805e-10, "loss": -1.117, "step": 11082 }, { "epoch": 0.9900185338186366, "grad_norm": 6.979260921316275, "learning_rate": 5.989385133704506e-10, "loss": -0.8225, "step": 11084 }, { "epoch": 0.9901971730344103, "grad_norm": 7.692042055678591, "learning_rate": 5.775516474031405e-10, "loss": 1.1631, "step": 11086 }, { "epoch": 0.9903758122501842, "grad_norm": 11.423226210046725, "learning_rate": 5.565534851702525e-10, "loss": -1.3411, "step": 11088 }, { "epoch": 0.9905544514659581, "grad_norm": 4.033605079849234, "learning_rate": 5.359440348384758e-10, "loss": 0.301, "step": 11090 }, { "epoch": 0.9907330906817319, "grad_norm": 5.41054351191822, "learning_rate": 5.157233044235099e-10, "loss": 0.7839, "step": 11092 }, { "epoch": 0.9909117298975058, "grad_norm": 5.6342768148033056, "learning_rate": 4.958913017897304e-10, "loss": 0.1718, "step": 11094 }, { "epoch": 0.9910903691132796, "grad_norm": 8.711929333248722, "learning_rate": 4.764480346504118e-10, "loss": -0.059, "step": 11096 }, { "epoch": 0.9912690083290534, "grad_norm": 14.643816458616, "learning_rate": 4.573935105675053e-10, "loss": -0.1419, "step": 11098 }, { "epoch": 0.9914476475448273, "grad_norm": 8.870960014541259, "learning_rate": 4.3872773695197154e-10, "loss": 0.2118, "step": 11100 }, { "epoch": 0.9916262867606012, "grad_norm": 10.603715828315835, "learning_rate": 4.204507210633368e-10, "loss": -0.7078, "step": 11102 }, { "epoch": 0.9918049259763749, "grad_norm": 4.673799966614927, "learning_rate": 4.025624700101371e-10, "loss": -0.0131, "step": 11104 }, { "epoch": 0.9919835651921488, "grad_norm": 6.407064544252598, "learning_rate": 3.85062990749585e-10, "loss": -0.137, "step": 11106 }, { "epoch": 0.9921622044079227, "grad_norm": 15.183355574411852, "learning_rate": 3.679522900877918e-10, "loss": -0.3141, "step": 11108 }, { "epoch": 0.9923408436236965, "grad_norm": 47.08339354024543, "learning_rate": 3.512303746795453e-10, "loss": -1.7037, "step": 11110 }, { "epoch": 0.9925194828394703, "grad_norm": 5.460933483378798, "learning_rate": 3.3489725102842094e-10, "loss": -0.8234, "step": 11112 }, { "epoch": 0.9926981220552442, "grad_norm": 4.621027090667254, "learning_rate": 3.1895292548678175e-10, "loss": 0.3461, "step": 11114 }, { "epoch": 0.992876761271018, "grad_norm": 2.4868974655605376, "learning_rate": 3.033974042560006e-10, "loss": 0.4766, "step": 11116 }, { "epoch": 0.9930554004867919, "grad_norm": 8.882039891229635, "learning_rate": 2.8823069338601567e-10, "loss": 0.1138, "step": 11118 }, { "epoch": 0.9932340397025657, "grad_norm": 8.996559757117101, "learning_rate": 2.7345279877555306e-10, "loss": -0.8051, "step": 11120 }, { "epoch": 0.9934126789183395, "grad_norm": 17.426162588996036, "learning_rate": 2.5906372617212623e-10, "loss": -0.8797, "step": 11122 }, { "epoch": 0.9935913181341134, "grad_norm": 9.394490238346034, "learning_rate": 2.4506348117214747e-10, "loss": -0.7599, "step": 11124 }, { "epoch": 0.9937699573498873, "grad_norm": 9.600184963351822, "learning_rate": 2.3145206922059458e-10, "loss": -0.137, "step": 11126 }, { "epoch": 0.993948596565661, "grad_norm": 6.89586870658845, "learning_rate": 2.1822949561134397e-10, "loss": -0.6446, "step": 11128 }, { "epoch": 0.9941272357814349, "grad_norm": 5.523167453838832, "learning_rate": 2.0539576548717075e-10, "loss": -0.4887, "step": 11130 }, { "epoch": 0.9943058749972088, "grad_norm": 8.737591910254032, "learning_rate": 1.929508838393046e-10, "loss": -0.334, "step": 11132 }, { "epoch": 0.9944845142129826, "grad_norm": 15.22076515470005, "learning_rate": 1.8089485550798477e-10, "loss": 0.0077, "step": 11134 }, { "epoch": 0.9946631534287564, "grad_norm": 9.21619237561244, "learning_rate": 1.692276851822383e-10, "loss": 0.5525, "step": 11136 }, { "epoch": 0.9948417926445303, "grad_norm": 6.570531671340745, "learning_rate": 1.5794937739954662e-10, "loss": -1.2073, "step": 11138 }, { "epoch": 0.9950204318603041, "grad_norm": 3.769125929471818, "learning_rate": 1.4705993654651195e-10, "loss": -0.0096, "step": 11140 }, { "epoch": 0.995199071076078, "grad_norm": 10.237161923742814, "learning_rate": 1.3655936685841307e-10, "loss": -1.5398, "step": 11142 }, { "epoch": 0.9953777102918518, "grad_norm": 20.452016358895552, "learning_rate": 1.2644767241898336e-10, "loss": -0.3221, "step": 11144 }, { "epoch": 0.9955563495076256, "grad_norm": 10.620200116414724, "learning_rate": 1.1672485716118784e-10, "loss": -0.6979, "step": 11146 }, { "epoch": 0.9957349887233995, "grad_norm": 15.140123105071464, "learning_rate": 1.0739092486633516e-10, "loss": -0.9164, "step": 11148 }, { "epoch": 0.9959136279391734, "grad_norm": 10.205929427710071, "learning_rate": 9.844587916474356e-11, "loss": -0.4778, "step": 11150 }, { "epoch": 0.9960922671549471, "grad_norm": 10.766225590445124, "learning_rate": 8.988972353540792e-11, "loss": -1.7908, "step": 11152 }, { "epoch": 0.996270906370721, "grad_norm": 5.124047167522953, "learning_rate": 8.17224613059997e-11, "loss": 1.5006, "step": 11154 }, { "epoch": 0.9964495455864949, "grad_norm": 8.486512197730383, "learning_rate": 7.394409565308901e-11, "loss": -0.9282, "step": 11156 }, { "epoch": 0.9966281848022687, "grad_norm": 15.605482438980228, "learning_rate": 6.655462960181158e-11, "loss": -0.0252, "step": 11158 }, { "epoch": 0.9968068240180425, "grad_norm": 3.9239788111932206, "learning_rate": 5.95540660260907e-11, "loss": -0.1919, "step": 11160 }, { "epoch": 0.9969854632338164, "grad_norm": 8.74706807742868, "learning_rate": 5.294240764874835e-11, "loss": 0.1436, "step": 11162 }, { "epoch": 0.9971641024495902, "grad_norm": 11.890319579684611, "learning_rate": 4.6719657041283115e-11, "loss": -1.2637, "step": 11164 }, { "epoch": 0.9973427416653641, "grad_norm": 3.034232019734971, "learning_rate": 4.088581662387014e-11, "loss": 0.3186, "step": 11166 }, { "epoch": 0.997521380881138, "grad_norm": 9.213857613352868, "learning_rate": 3.544088866536121e-11, "loss": -1.2049, "step": 11168 }, { "epoch": 0.9977000200969117, "grad_norm": 8.982388112501779, "learning_rate": 3.038487528350675e-11, "loss": -0.9177, "step": 11170 }, { "epoch": 0.9978786593126856, "grad_norm": 12.378828593161277, "learning_rate": 2.5717778444622752e-11, "loss": 0.5692, "step": 11172 }, { "epoch": 0.9980572985284595, "grad_norm": 11.155724435602803, "learning_rate": 2.1439599964145906e-11, "loss": -0.5715, "step": 11174 }, { "epoch": 0.9982359377442332, "grad_norm": 15.765458044674707, "learning_rate": 1.7550341505745413e-11, "loss": -0.4739, "step": 11176 }, { "epoch": 0.9984145769600071, "grad_norm": 9.698307039057287, "learning_rate": 1.4050004582211173e-11, "loss": -0.795, "step": 11178 }, { "epoch": 0.998593216175781, "grad_norm": 18.59437695896766, "learning_rate": 1.093859055478763e-11, "loss": 0.0643, "step": 11180 }, { "epoch": 0.9987718553915549, "grad_norm": 12.827160890803338, "learning_rate": 8.216100633617885e-12, "loss": -1.5371, "step": 11182 }, { "epoch": 0.9989504946073287, "grad_norm": 8.42095681813594, "learning_rate": 5.882535877632655e-12, "loss": 0.569, "step": 11184 }, { "epoch": 0.9991291338231025, "grad_norm": 13.190696581998973, "learning_rate": 3.937897194439266e-12, "loss": -0.3791, "step": 11186 }, { "epoch": 0.9993077730388764, "grad_norm": 6.573446100336809, "learning_rate": 2.3821853402106185e-12, "loss": -0.348, "step": 11188 }, { "epoch": 0.9994864122546502, "grad_norm": 9.958166740864867, "learning_rate": 1.2154009201292837e-12, "loss": 0.7883, "step": 11190 }, { "epoch": 0.9996650514704241, "grad_norm": 5.930873666768824, "learning_rate": 4.3754438794341155e-13, "loss": 0.2164, "step": 11192 }, { "epoch": 0.9998436906861979, "grad_norm": 8.556649318430303, "learning_rate": 4.861604629979865e-14, "loss": -0.3744, "step": 11194 } ], "logging_steps": 2, "max_steps": 11195, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 236493063192576.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }