{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4878, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002050020500205002, "grad_norm": 37.64109909379259, "learning_rate": 1.360544217687075e-06, "loss": 4.3738, "step": 1 }, { "epoch": 0.0004100041000410004, "grad_norm": 39.87283600719748, "learning_rate": 2.72108843537415e-06, "loss": 4.512, "step": 2 }, { "epoch": 0.0006150061500615006, "grad_norm": 39.472896772216814, "learning_rate": 4.081632653061224e-06, "loss": 4.4425, "step": 3 }, { "epoch": 0.0008200082000820008, "grad_norm": 33.38905207047185, "learning_rate": 5.4421768707483e-06, "loss": 3.9203, "step": 4 }, { "epoch": 0.001025010250102501, "grad_norm": 23.08525793257891, "learning_rate": 6.802721088435375e-06, "loss": 3.4012, "step": 5 }, { "epoch": 0.0012300123001230013, "grad_norm": 19.088184046352346, "learning_rate": 8.163265306122448e-06, "loss": 2.9879, "step": 6 }, { "epoch": 0.0014350143501435015, "grad_norm": 20.338037765865536, "learning_rate": 9.523809523809523e-06, "loss": 2.8328, "step": 7 }, { "epoch": 0.0016400164001640015, "grad_norm": 14.542801875019975, "learning_rate": 1.08843537414966e-05, "loss": 2.5144, "step": 8 }, { "epoch": 0.0018450184501845018, "grad_norm": 12.089573241966134, "learning_rate": 1.2244897959183674e-05, "loss": 2.3532, "step": 9 }, { "epoch": 0.002050020500205002, "grad_norm": 8.053574726057994, "learning_rate": 1.360544217687075e-05, "loss": 2.1302, "step": 10 }, { "epoch": 0.002255022550225502, "grad_norm": 7.013914761875336, "learning_rate": 1.4965986394557824e-05, "loss": 2.1662, "step": 11 }, { "epoch": 0.0024600246002460025, "grad_norm": 5.418822437305117, "learning_rate": 1.6326530612244897e-05, "loss": 1.9452, "step": 12 }, { "epoch": 0.0026650266502665026, "grad_norm": 5.940146989570587, "learning_rate": 1.7687074829931973e-05, "loss": 2.0287, "step": 13 }, { "epoch": 0.002870028700287003, "grad_norm": 4.234885410824282, "learning_rate": 1.9047619047619046e-05, "loss": 1.8651, "step": 14 }, { "epoch": 0.003075030750307503, "grad_norm": 3.4291547117840477, "learning_rate": 2.0408163265306123e-05, "loss": 1.8889, "step": 15 }, { "epoch": 0.003280032800328003, "grad_norm": 3.078114123353822, "learning_rate": 2.17687074829932e-05, "loss": 1.8215, "step": 16 }, { "epoch": 0.0034850348503485036, "grad_norm": 3.1064321131215844, "learning_rate": 2.3129251700680275e-05, "loss": 1.8451, "step": 17 }, { "epoch": 0.0036900369003690036, "grad_norm": 2.71846922315322, "learning_rate": 2.448979591836735e-05, "loss": 1.797, "step": 18 }, { "epoch": 0.003895038950389504, "grad_norm": 2.266772231433325, "learning_rate": 2.5850340136054425e-05, "loss": 1.7298, "step": 19 }, { "epoch": 0.004100041000410004, "grad_norm": 2.456088314124704, "learning_rate": 2.72108843537415e-05, "loss": 1.7609, "step": 20 }, { "epoch": 0.004305043050430504, "grad_norm": 2.0487428566103847, "learning_rate": 2.857142857142857e-05, "loss": 1.6864, "step": 21 }, { "epoch": 0.004510045100451004, "grad_norm": 2.267998391844328, "learning_rate": 2.9931972789115647e-05, "loss": 1.6707, "step": 22 }, { "epoch": 0.004715047150471505, "grad_norm": 2.0069255800287094, "learning_rate": 3.1292517006802724e-05, "loss": 1.6673, "step": 23 }, { "epoch": 0.004920049200492005, "grad_norm": 2.1781103798419172, "learning_rate": 3.265306122448979e-05, "loss": 1.666, "step": 24 }, { "epoch": 0.005125051250512505, "grad_norm": 1.6937443021011496, "learning_rate": 3.401360544217687e-05, "loss": 1.5955, "step": 25 }, { "epoch": 0.005330053300533005, "grad_norm": 1.8065755233050094, "learning_rate": 3.5374149659863946e-05, "loss": 1.658, "step": 26 }, { "epoch": 0.005535055350553505, "grad_norm": 1.9182580321381462, "learning_rate": 3.673469387755102e-05, "loss": 1.6824, "step": 27 }, { "epoch": 0.005740057400574006, "grad_norm": 1.4236834414305823, "learning_rate": 3.809523809523809e-05, "loss": 1.6489, "step": 28 }, { "epoch": 0.005945059450594506, "grad_norm": 1.8731003710510907, "learning_rate": 3.945578231292517e-05, "loss": 1.6246, "step": 29 }, { "epoch": 0.006150061500615006, "grad_norm": 1.6001336131350723, "learning_rate": 4.0816326530612245e-05, "loss": 1.5944, "step": 30 }, { "epoch": 0.006355063550635506, "grad_norm": 1.6408864666073666, "learning_rate": 4.217687074829932e-05, "loss": 1.6628, "step": 31 }, { "epoch": 0.006560065600656006, "grad_norm": 1.393804100860193, "learning_rate": 4.35374149659864e-05, "loss": 1.5598, "step": 32 }, { "epoch": 0.006765067650676507, "grad_norm": 1.4316132023407333, "learning_rate": 4.4897959183673474e-05, "loss": 1.6082, "step": 33 }, { "epoch": 0.006970069700697007, "grad_norm": 1.3589272465171653, "learning_rate": 4.625850340136055e-05, "loss": 1.5753, "step": 34 }, { "epoch": 0.007175071750717507, "grad_norm": 1.223106309031031, "learning_rate": 4.761904761904762e-05, "loss": 1.6179, "step": 35 }, { "epoch": 0.007380073800738007, "grad_norm": 1.3729279883180139, "learning_rate": 4.89795918367347e-05, "loss": 1.6669, "step": 36 }, { "epoch": 0.007585075850758507, "grad_norm": 1.2255963668981733, "learning_rate": 5.034013605442177e-05, "loss": 1.6251, "step": 37 }, { "epoch": 0.007790077900779008, "grad_norm": 1.3998796560107785, "learning_rate": 5.170068027210885e-05, "loss": 1.5948, "step": 38 }, { "epoch": 0.007995079950799507, "grad_norm": 1.3886588280175667, "learning_rate": 5.3061224489795926e-05, "loss": 1.5789, "step": 39 }, { "epoch": 0.008200082000820008, "grad_norm": 1.3232325802476532, "learning_rate": 5.4421768707483e-05, "loss": 1.6008, "step": 40 }, { "epoch": 0.008405084050840509, "grad_norm": 1.0695923303603694, "learning_rate": 5.5782312925170065e-05, "loss": 1.5568, "step": 41 }, { "epoch": 0.008610086100861008, "grad_norm": 1.4754731221757968, "learning_rate": 5.714285714285714e-05, "loss": 1.6707, "step": 42 }, { "epoch": 0.008815088150881509, "grad_norm": 1.1993902293465908, "learning_rate": 5.850340136054422e-05, "loss": 1.5834, "step": 43 }, { "epoch": 0.009020090200902008, "grad_norm": 1.333962688219652, "learning_rate": 5.9863945578231295e-05, "loss": 1.6117, "step": 44 }, { "epoch": 0.00922509225092251, "grad_norm": 1.177419286591825, "learning_rate": 6.122448979591838e-05, "loss": 1.5804, "step": 45 }, { "epoch": 0.00943009430094301, "grad_norm": 1.1413837752740432, "learning_rate": 6.258503401360545e-05, "loss": 1.5583, "step": 46 }, { "epoch": 0.00963509635096351, "grad_norm": 1.1203710530412794, "learning_rate": 6.394557823129253e-05, "loss": 1.5605, "step": 47 }, { "epoch": 0.00984009840098401, "grad_norm": 1.1063122050829752, "learning_rate": 6.530612244897959e-05, "loss": 1.5567, "step": 48 }, { "epoch": 0.01004510045100451, "grad_norm": 1.184928744026521, "learning_rate": 6.666666666666667e-05, "loss": 1.5321, "step": 49 }, { "epoch": 0.01025010250102501, "grad_norm": 1.188774308015982, "learning_rate": 6.802721088435374e-05, "loss": 1.6235, "step": 50 }, { "epoch": 0.010455104551045511, "grad_norm": 1.122080209760507, "learning_rate": 6.938775510204082e-05, "loss": 1.577, "step": 51 }, { "epoch": 0.01066010660106601, "grad_norm": 1.1881047432968916, "learning_rate": 7.074829931972789e-05, "loss": 1.5724, "step": 52 }, { "epoch": 0.010865108651086511, "grad_norm": 1.094093048422763, "learning_rate": 7.210884353741498e-05, "loss": 1.5447, "step": 53 }, { "epoch": 0.01107011070110701, "grad_norm": 1.307737210298449, "learning_rate": 7.346938775510205e-05, "loss": 1.5103, "step": 54 }, { "epoch": 0.011275112751127511, "grad_norm": 1.0517542954361705, "learning_rate": 7.482993197278913e-05, "loss": 1.6141, "step": 55 }, { "epoch": 0.011480114801148012, "grad_norm": 1.0023397214832885, "learning_rate": 7.619047619047618e-05, "loss": 1.571, "step": 56 }, { "epoch": 0.011685116851168511, "grad_norm": 1.045679620732055, "learning_rate": 7.755102040816327e-05, "loss": 1.5603, "step": 57 }, { "epoch": 0.011890118901189012, "grad_norm": 0.9846175647305268, "learning_rate": 7.891156462585034e-05, "loss": 1.5654, "step": 58 }, { "epoch": 0.012095120951209511, "grad_norm": 1.0438531186227216, "learning_rate": 8.027210884353742e-05, "loss": 1.5724, "step": 59 }, { "epoch": 0.012300123001230012, "grad_norm": 1.018743767783559, "learning_rate": 8.163265306122449e-05, "loss": 1.5669, "step": 60 }, { "epoch": 0.012505125051250513, "grad_norm": 1.0821685280571838, "learning_rate": 8.299319727891157e-05, "loss": 1.5832, "step": 61 }, { "epoch": 0.012710127101271012, "grad_norm": 1.2877899437969236, "learning_rate": 8.435374149659864e-05, "loss": 1.616, "step": 62 }, { "epoch": 0.012915129151291513, "grad_norm": 1.0758679535113083, "learning_rate": 8.571428571428571e-05, "loss": 1.5676, "step": 63 }, { "epoch": 0.013120131201312012, "grad_norm": 1.0735525982150025, "learning_rate": 8.70748299319728e-05, "loss": 1.6061, "step": 64 }, { "epoch": 0.013325133251332513, "grad_norm": 1.2104333826852989, "learning_rate": 8.843537414965987e-05, "loss": 1.6127, "step": 65 }, { "epoch": 0.013530135301353014, "grad_norm": 1.3035430704098177, "learning_rate": 8.979591836734695e-05, "loss": 1.6724, "step": 66 }, { "epoch": 0.013735137351373513, "grad_norm": 0.984580171641528, "learning_rate": 9.115646258503402e-05, "loss": 1.5247, "step": 67 }, { "epoch": 0.013940139401394014, "grad_norm": 1.0972435438699233, "learning_rate": 9.25170068027211e-05, "loss": 1.5786, "step": 68 }, { "epoch": 0.014145141451414513, "grad_norm": 1.2511354478770902, "learning_rate": 9.387755102040817e-05, "loss": 1.6113, "step": 69 }, { "epoch": 0.014350143501435014, "grad_norm": 1.0936337767540483, "learning_rate": 9.523809523809524e-05, "loss": 1.5919, "step": 70 }, { "epoch": 0.014555145551455515, "grad_norm": 1.1467496227035352, "learning_rate": 9.659863945578231e-05, "loss": 1.5329, "step": 71 }, { "epoch": 0.014760147601476014, "grad_norm": 1.0713353060352995, "learning_rate": 9.79591836734694e-05, "loss": 1.5832, "step": 72 }, { "epoch": 0.014965149651496515, "grad_norm": 1.1166945816101541, "learning_rate": 9.931972789115646e-05, "loss": 1.6824, "step": 73 }, { "epoch": 0.015170151701517014, "grad_norm": 1.0960393805979314, "learning_rate": 0.00010068027210884355, "loss": 1.6052, "step": 74 }, { "epoch": 0.015375153751537515, "grad_norm": 1.048126877391856, "learning_rate": 0.00010204081632653062, "loss": 1.5975, "step": 75 }, { "epoch": 0.015580155801558016, "grad_norm": 1.055404329447733, "learning_rate": 0.0001034013605442177, "loss": 1.6247, "step": 76 }, { "epoch": 0.015785157851578517, "grad_norm": 1.088923103794291, "learning_rate": 0.00010476190476190477, "loss": 1.565, "step": 77 }, { "epoch": 0.015990159901599015, "grad_norm": 1.1753599911471933, "learning_rate": 0.00010612244897959185, "loss": 1.5744, "step": 78 }, { "epoch": 0.016195161951619515, "grad_norm": 1.0794471964691739, "learning_rate": 0.00010748299319727892, "loss": 1.6342, "step": 79 }, { "epoch": 0.016400164001640016, "grad_norm": 1.2035928559538578, "learning_rate": 0.000108843537414966, "loss": 1.5847, "step": 80 }, { "epoch": 0.016605166051660517, "grad_norm": 1.08114649790515, "learning_rate": 0.00011020408163265306, "loss": 1.615, "step": 81 }, { "epoch": 0.016810168101681018, "grad_norm": 1.2086894815182474, "learning_rate": 0.00011156462585034013, "loss": 1.6343, "step": 82 }, { "epoch": 0.017015170151701516, "grad_norm": 1.0410041600988618, "learning_rate": 0.00011292517006802721, "loss": 1.629, "step": 83 }, { "epoch": 0.017220172201722016, "grad_norm": 1.0784682769702671, "learning_rate": 0.00011428571428571428, "loss": 1.5933, "step": 84 }, { "epoch": 0.017425174251742517, "grad_norm": 1.0787477943305062, "learning_rate": 0.00011564625850340137, "loss": 1.6087, "step": 85 }, { "epoch": 0.017630176301763018, "grad_norm": 0.9608305364472287, "learning_rate": 0.00011700680272108844, "loss": 1.5138, "step": 86 }, { "epoch": 0.01783517835178352, "grad_norm": 0.9577791495814476, "learning_rate": 0.00011836734693877552, "loss": 1.575, "step": 87 }, { "epoch": 0.018040180401804017, "grad_norm": 0.9781754947236491, "learning_rate": 0.00011972789115646259, "loss": 1.5526, "step": 88 }, { "epoch": 0.018245182451824517, "grad_norm": 1.1099549338633823, "learning_rate": 0.00012108843537414967, "loss": 1.6376, "step": 89 }, { "epoch": 0.01845018450184502, "grad_norm": 1.0700630463054988, "learning_rate": 0.00012244897959183676, "loss": 1.6277, "step": 90 }, { "epoch": 0.01865518655186552, "grad_norm": 1.069946434823473, "learning_rate": 0.0001238095238095238, "loss": 1.6369, "step": 91 }, { "epoch": 0.01886018860188602, "grad_norm": 0.977616253015782, "learning_rate": 0.0001251700680272109, "loss": 1.6097, "step": 92 }, { "epoch": 0.019065190651906518, "grad_norm": 1.2977078345338526, "learning_rate": 0.00012653061224489798, "loss": 1.5614, "step": 93 }, { "epoch": 0.01927019270192702, "grad_norm": 1.1905228370313738, "learning_rate": 0.00012789115646258506, "loss": 1.5899, "step": 94 }, { "epoch": 0.01947519475194752, "grad_norm": 0.9076860345777181, "learning_rate": 0.00012925170068027212, "loss": 1.5657, "step": 95 }, { "epoch": 0.01968019680196802, "grad_norm": 1.0262019407725596, "learning_rate": 0.00013061224489795917, "loss": 1.6335, "step": 96 }, { "epoch": 0.01988519885198852, "grad_norm": 1.2489537278474532, "learning_rate": 0.00013197278911564626, "loss": 1.6117, "step": 97 }, { "epoch": 0.02009020090200902, "grad_norm": 1.1470594814333361, "learning_rate": 0.00013333333333333334, "loss": 1.6462, "step": 98 }, { "epoch": 0.02029520295202952, "grad_norm": 1.1140813099111917, "learning_rate": 0.0001346938775510204, "loss": 1.6675, "step": 99 }, { "epoch": 0.02050020500205002, "grad_norm": 1.0347428297472658, "learning_rate": 0.00013605442176870748, "loss": 1.6536, "step": 100 }, { "epoch": 0.02070520705207052, "grad_norm": 1.0859214215544615, "learning_rate": 0.00013741496598639456, "loss": 1.6088, "step": 101 }, { "epoch": 0.020910209102091022, "grad_norm": 1.049032445550708, "learning_rate": 0.00013877551020408165, "loss": 1.636, "step": 102 }, { "epoch": 0.02111521115211152, "grad_norm": 1.0986846920925164, "learning_rate": 0.0001401360544217687, "loss": 1.6125, "step": 103 }, { "epoch": 0.02132021320213202, "grad_norm": 1.1108375489491855, "learning_rate": 0.00014149659863945578, "loss": 1.6426, "step": 104 }, { "epoch": 0.02152521525215252, "grad_norm": 1.177355850105982, "learning_rate": 0.00014285714285714287, "loss": 1.5887, "step": 105 }, { "epoch": 0.021730217302173022, "grad_norm": 1.1216542459849195, "learning_rate": 0.00014421768707482995, "loss": 1.6183, "step": 106 }, { "epoch": 0.021935219352193523, "grad_norm": 1.1743373962795476, "learning_rate": 0.000145578231292517, "loss": 1.6305, "step": 107 }, { "epoch": 0.02214022140221402, "grad_norm": 1.23704891266505, "learning_rate": 0.0001469387755102041, "loss": 1.6428, "step": 108 }, { "epoch": 0.02234522345223452, "grad_norm": 1.23714819830845, "learning_rate": 0.00014829931972789117, "loss": 1.7324, "step": 109 }, { "epoch": 0.022550225502255022, "grad_norm": 1.0262073791537545, "learning_rate": 0.00014965986394557826, "loss": 1.5315, "step": 110 }, { "epoch": 0.022755227552275523, "grad_norm": 1.1553886431113178, "learning_rate": 0.0001510204081632653, "loss": 1.6211, "step": 111 }, { "epoch": 0.022960229602296024, "grad_norm": 1.1861448771352119, "learning_rate": 0.00015238095238095237, "loss": 1.631, "step": 112 }, { "epoch": 0.02316523165231652, "grad_norm": 1.314362679837142, "learning_rate": 0.00015374149659863945, "loss": 1.6005, "step": 113 }, { "epoch": 0.023370233702337023, "grad_norm": 1.263537873458596, "learning_rate": 0.00015510204081632654, "loss": 1.6394, "step": 114 }, { "epoch": 0.023575235752357523, "grad_norm": 1.1888656577304768, "learning_rate": 0.00015646258503401362, "loss": 1.6573, "step": 115 }, { "epoch": 0.023780237802378024, "grad_norm": 1.1256448238432635, "learning_rate": 0.00015782312925170067, "loss": 1.6569, "step": 116 }, { "epoch": 0.023985239852398525, "grad_norm": 1.215308548571746, "learning_rate": 0.00015918367346938776, "loss": 1.6432, "step": 117 }, { "epoch": 0.024190241902419023, "grad_norm": 1.235957365691106, "learning_rate": 0.00016054421768707484, "loss": 1.632, "step": 118 }, { "epoch": 0.024395243952439524, "grad_norm": 1.3100431872941194, "learning_rate": 0.00016190476190476192, "loss": 1.6065, "step": 119 }, { "epoch": 0.024600246002460024, "grad_norm": 1.2873096863935316, "learning_rate": 0.00016326530612244898, "loss": 1.4878, "step": 120 }, { "epoch": 0.024805248052480525, "grad_norm": 1.2096947815483081, "learning_rate": 0.00016462585034013606, "loss": 1.6348, "step": 121 }, { "epoch": 0.025010250102501026, "grad_norm": 1.263927652256806, "learning_rate": 0.00016598639455782315, "loss": 1.632, "step": 122 }, { "epoch": 0.025215252152521524, "grad_norm": 1.3027019149962984, "learning_rate": 0.00016734693877551023, "loss": 1.6461, "step": 123 }, { "epoch": 0.025420254202542025, "grad_norm": 1.4804585808697446, "learning_rate": 0.00016870748299319729, "loss": 1.6369, "step": 124 }, { "epoch": 0.025625256252562525, "grad_norm": 1.03261017329102, "learning_rate": 0.00017006802721088434, "loss": 1.6201, "step": 125 }, { "epoch": 0.025830258302583026, "grad_norm": 1.1685157501083643, "learning_rate": 0.00017142857142857143, "loss": 1.6189, "step": 126 }, { "epoch": 0.026035260352603527, "grad_norm": 1.2129101850548476, "learning_rate": 0.0001727891156462585, "loss": 1.5909, "step": 127 }, { "epoch": 0.026240262402624025, "grad_norm": 1.2418719064099584, "learning_rate": 0.0001741496598639456, "loss": 1.6522, "step": 128 }, { "epoch": 0.026445264452644526, "grad_norm": 1.2136958419140977, "learning_rate": 0.00017551020408163265, "loss": 1.609, "step": 129 }, { "epoch": 0.026650266502665027, "grad_norm": 1.4799462684969362, "learning_rate": 0.00017687074829931973, "loss": 1.7233, "step": 130 }, { "epoch": 0.026855268552685527, "grad_norm": 1.1277518979638743, "learning_rate": 0.00017823129251700681, "loss": 1.6089, "step": 131 }, { "epoch": 0.02706027060270603, "grad_norm": 2.624680936006109, "learning_rate": 0.0001795918367346939, "loss": 1.6087, "step": 132 }, { "epoch": 0.027265272652726526, "grad_norm": 1.1282762242532551, "learning_rate": 0.00018095238095238095, "loss": 1.621, "step": 133 }, { "epoch": 0.027470274702747027, "grad_norm": 1.4754640362528888, "learning_rate": 0.00018231292517006804, "loss": 1.6509, "step": 134 }, { "epoch": 0.027675276752767528, "grad_norm": 1.163950069566443, "learning_rate": 0.00018367346938775512, "loss": 1.6341, "step": 135 }, { "epoch": 0.02788027880278803, "grad_norm": 1.0401044592245412, "learning_rate": 0.0001850340136054422, "loss": 1.7007, "step": 136 }, { "epoch": 0.02808528085280853, "grad_norm": 1.3591068252321274, "learning_rate": 0.00018639455782312926, "loss": 1.6996, "step": 137 }, { "epoch": 0.028290282902829027, "grad_norm": 1.1397043865244645, "learning_rate": 0.00018775510204081634, "loss": 1.5753, "step": 138 }, { "epoch": 0.028495284952849528, "grad_norm": 1.1540356283967317, "learning_rate": 0.00018911564625850343, "loss": 1.6087, "step": 139 }, { "epoch": 0.02870028700287003, "grad_norm": 1.230927536460245, "learning_rate": 0.00019047619047619048, "loss": 1.5921, "step": 140 }, { "epoch": 0.02890528905289053, "grad_norm": 1.127823608621212, "learning_rate": 0.00019183673469387756, "loss": 1.5908, "step": 141 }, { "epoch": 0.02911029110291103, "grad_norm": 1.1887684798697176, "learning_rate": 0.00019319727891156462, "loss": 1.6862, "step": 142 }, { "epoch": 0.029315293152931528, "grad_norm": 1.2863024046618226, "learning_rate": 0.0001945578231292517, "loss": 1.6094, "step": 143 }, { "epoch": 0.02952029520295203, "grad_norm": 1.1885397366217607, "learning_rate": 0.0001959183673469388, "loss": 1.6782, "step": 144 }, { "epoch": 0.02972529725297253, "grad_norm": 1.184431128873404, "learning_rate": 0.00019727891156462587, "loss": 1.71, "step": 145 }, { "epoch": 0.02993029930299303, "grad_norm": 1.43125176648162, "learning_rate": 0.00019863945578231293, "loss": 1.7075, "step": 146 }, { "epoch": 0.03013530135301353, "grad_norm": 1.2742070457194576, "learning_rate": 0.0002, "loss": 1.6954, "step": 147 }, { "epoch": 0.03034030340303403, "grad_norm": 1.6480154811113514, "learning_rate": 0.00019999997795227224, "loss": 1.6228, "step": 148 }, { "epoch": 0.03054530545305453, "grad_norm": 1.0680986570634565, "learning_rate": 0.0001999999118090987, "loss": 1.7318, "step": 149 }, { "epoch": 0.03075030750307503, "grad_norm": 1.2622281390071541, "learning_rate": 0.0001999998015705085, "loss": 1.7252, "step": 150 }, { "epoch": 0.03095530955309553, "grad_norm": 1.1040247667751586, "learning_rate": 0.00019999964723655032, "loss": 1.6719, "step": 151 }, { "epoch": 0.031160311603116032, "grad_norm": 1.165309851848606, "learning_rate": 0.00019999944880729213, "loss": 1.6872, "step": 152 }, { "epoch": 0.03136531365313653, "grad_norm": 1.1139968864900518, "learning_rate": 0.00019999920628282152, "loss": 1.7099, "step": 153 }, { "epoch": 0.031570315703157034, "grad_norm": 1.1039470575016952, "learning_rate": 0.00019999891966324538, "loss": 1.6291, "step": 154 }, { "epoch": 0.03177531775317753, "grad_norm": 1.1360166943272085, "learning_rate": 0.0001999985889486901, "loss": 1.6737, "step": 155 }, { "epoch": 0.03198031980319803, "grad_norm": 0.958675839167699, "learning_rate": 0.00019999821413930146, "loss": 1.6348, "step": 156 }, { "epoch": 0.03218532185321853, "grad_norm": 1.1670953989857722, "learning_rate": 0.00019999779523524483, "loss": 1.7131, "step": 157 }, { "epoch": 0.03239032390323903, "grad_norm": 1.082095257350303, "learning_rate": 0.00019999733223670488, "loss": 1.648, "step": 158 }, { "epoch": 0.032595325953259535, "grad_norm": 1.1171693717316644, "learning_rate": 0.00019999682514388578, "loss": 1.7464, "step": 159 }, { "epoch": 0.03280032800328003, "grad_norm": 1.1401801511264775, "learning_rate": 0.00019999627395701113, "loss": 1.7225, "step": 160 }, { "epoch": 0.03300533005330053, "grad_norm": 0.9789030820958351, "learning_rate": 0.00019999567867632397, "loss": 1.7047, "step": 161 }, { "epoch": 0.033210332103321034, "grad_norm": 1.1084676506755051, "learning_rate": 0.0001999950393020868, "loss": 1.7534, "step": 162 }, { "epoch": 0.03341533415334153, "grad_norm": 1.0600543936710014, "learning_rate": 0.00019999435583458155, "loss": 1.672, "step": 163 }, { "epoch": 0.033620336203362036, "grad_norm": 1.113180803512727, "learning_rate": 0.00019999362827410964, "loss": 1.7815, "step": 164 }, { "epoch": 0.033825338253382534, "grad_norm": 0.9440610859866324, "learning_rate": 0.0001999928566209918, "loss": 1.7094, "step": 165 }, { "epoch": 0.03403034030340303, "grad_norm": 1.059843009867599, "learning_rate": 0.0001999920408755684, "loss": 1.7048, "step": 166 }, { "epoch": 0.034235342353423535, "grad_norm": 0.94896147910256, "learning_rate": 0.00019999118103819906, "loss": 1.691, "step": 167 }, { "epoch": 0.03444034440344403, "grad_norm": 1.0680880654418765, "learning_rate": 0.000199990277109263, "loss": 1.6689, "step": 168 }, { "epoch": 0.03464534645346454, "grad_norm": 0.992212593994851, "learning_rate": 0.00019998932908915873, "loss": 1.6764, "step": 169 }, { "epoch": 0.034850348503485035, "grad_norm": 0.879423198436075, "learning_rate": 0.00019998833697830435, "loss": 1.6399, "step": 170 }, { "epoch": 0.03505535055350553, "grad_norm": 1.2176693077785534, "learning_rate": 0.0001999873007771373, "loss": 1.7688, "step": 171 }, { "epoch": 0.035260352603526036, "grad_norm": 1.013637605210522, "learning_rate": 0.00019998622048611453, "loss": 1.6707, "step": 172 }, { "epoch": 0.035465354653546534, "grad_norm": 0.9420062932703167, "learning_rate": 0.0001999850961057124, "loss": 1.6589, "step": 173 }, { "epoch": 0.03567035670356704, "grad_norm": 0.9351476084159396, "learning_rate": 0.00019998392763642667, "loss": 1.7395, "step": 174 }, { "epoch": 0.035875358753587536, "grad_norm": 1.0370641703483614, "learning_rate": 0.00019998271507877261, "loss": 1.7044, "step": 175 }, { "epoch": 0.03608036080360803, "grad_norm": 0.920277981500977, "learning_rate": 0.0001999814584332849, "loss": 1.7103, "step": 176 }, { "epoch": 0.03628536285362854, "grad_norm": 1.0533965924810595, "learning_rate": 0.00019998015770051765, "loss": 1.691, "step": 177 }, { "epoch": 0.036490364903649035, "grad_norm": 1.0051367563688869, "learning_rate": 0.00019997881288104445, "loss": 1.7118, "step": 178 }, { "epoch": 0.03669536695366954, "grad_norm": 0.9093082446698368, "learning_rate": 0.00019997742397545828, "loss": 1.6682, "step": 179 }, { "epoch": 0.03690036900369004, "grad_norm": 1.0155664884370854, "learning_rate": 0.0001999759909843716, "loss": 1.7184, "step": 180 }, { "epoch": 0.037105371053710534, "grad_norm": 0.9544214348599572, "learning_rate": 0.0001999745139084163, "loss": 1.6963, "step": 181 }, { "epoch": 0.03731037310373104, "grad_norm": 0.9560958102567276, "learning_rate": 0.00019997299274824367, "loss": 1.695, "step": 182 }, { "epoch": 0.037515375153751536, "grad_norm": 0.988594578922743, "learning_rate": 0.0001999714275045245, "loss": 1.6722, "step": 183 }, { "epoch": 0.03772037720377204, "grad_norm": 1.0361367366457916, "learning_rate": 0.00019996981817794898, "loss": 1.7246, "step": 184 }, { "epoch": 0.03792537925379254, "grad_norm": 0.9208549225862263, "learning_rate": 0.00019996816476922677, "loss": 1.6338, "step": 185 }, { "epoch": 0.038130381303813035, "grad_norm": 1.0410273936492669, "learning_rate": 0.0001999664672790869, "loss": 1.6747, "step": 186 }, { "epoch": 0.03833538335383354, "grad_norm": 0.9382463828768859, "learning_rate": 0.00019996472570827796, "loss": 1.6842, "step": 187 }, { "epoch": 0.03854038540385404, "grad_norm": 0.853513525121089, "learning_rate": 0.00019996294005756785, "loss": 1.6848, "step": 188 }, { "epoch": 0.03874538745387454, "grad_norm": 0.9463063527448772, "learning_rate": 0.00019996111032774395, "loss": 1.6699, "step": 189 }, { "epoch": 0.03895038950389504, "grad_norm": 0.9675038054864104, "learning_rate": 0.00019995923651961315, "loss": 1.7363, "step": 190 }, { "epoch": 0.039155391553915536, "grad_norm": 1.2525486011718874, "learning_rate": 0.00019995731863400165, "loss": 1.6369, "step": 191 }, { "epoch": 0.03936039360393604, "grad_norm": 1.0253006271832148, "learning_rate": 0.00019995535667175518, "loss": 1.6919, "step": 192 }, { "epoch": 0.03956539565395654, "grad_norm": 0.962317240667466, "learning_rate": 0.00019995335063373887, "loss": 1.7173, "step": 193 }, { "epoch": 0.03977039770397704, "grad_norm": 0.9703756024479744, "learning_rate": 0.00019995130052083727, "loss": 1.7198, "step": 194 }, { "epoch": 0.03997539975399754, "grad_norm": 0.870466862148783, "learning_rate": 0.00019994920633395445, "loss": 1.6581, "step": 195 }, { "epoch": 0.04018040180401804, "grad_norm": 0.926969236942892, "learning_rate": 0.0001999470680740138, "loss": 1.6509, "step": 196 }, { "epoch": 0.04038540385403854, "grad_norm": 0.9586452572040121, "learning_rate": 0.0001999448857419582, "loss": 1.6437, "step": 197 }, { "epoch": 0.04059040590405904, "grad_norm": 1.2221310341386278, "learning_rate": 0.00019994265933874998, "loss": 1.6887, "step": 198 }, { "epoch": 0.04079540795407954, "grad_norm": 0.9575123751994977, "learning_rate": 0.00019994038886537085, "loss": 1.6664, "step": 199 }, { "epoch": 0.04100041000410004, "grad_norm": 0.9562693613955552, "learning_rate": 0.00019993807432282202, "loss": 1.7664, "step": 200 }, { "epoch": 0.04120541205412054, "grad_norm": 0.9364057810167934, "learning_rate": 0.00019993571571212408, "loss": 1.6861, "step": 201 }, { "epoch": 0.04141041410414104, "grad_norm": 0.9607321302901767, "learning_rate": 0.00019993331303431707, "loss": 1.7155, "step": 202 }, { "epoch": 0.04161541615416154, "grad_norm": 0.8967762993782126, "learning_rate": 0.00019993086629046045, "loss": 1.6838, "step": 203 }, { "epoch": 0.041820418204182044, "grad_norm": 0.9405470283620619, "learning_rate": 0.00019992837548163316, "loss": 1.7276, "step": 204 }, { "epoch": 0.04202542025420254, "grad_norm": 0.9271912358418682, "learning_rate": 0.0001999258406089335, "loss": 1.6796, "step": 205 }, { "epoch": 0.04223042230422304, "grad_norm": 0.966775595641443, "learning_rate": 0.00019992326167347923, "loss": 1.6944, "step": 206 }, { "epoch": 0.042435424354243544, "grad_norm": 0.9040603671143699, "learning_rate": 0.00019992063867640757, "loss": 1.6618, "step": 207 }, { "epoch": 0.04264042640426404, "grad_norm": 0.9709079011282012, "learning_rate": 0.00019991797161887512, "loss": 1.7549, "step": 208 }, { "epoch": 0.042845428454284545, "grad_norm": 1.6669592548994134, "learning_rate": 0.00019991526050205797, "loss": 1.6914, "step": 209 }, { "epoch": 0.04305043050430504, "grad_norm": 0.8671344062949641, "learning_rate": 0.00019991250532715155, "loss": 1.6139, "step": 210 }, { "epoch": 0.04325543255432554, "grad_norm": 0.908591899397052, "learning_rate": 0.0001999097060953708, "loss": 1.6494, "step": 211 }, { "epoch": 0.043460434604346045, "grad_norm": 0.929737140473697, "learning_rate": 0.00019990686280795003, "loss": 1.7305, "step": 212 }, { "epoch": 0.04366543665436654, "grad_norm": 1.0175328138250153, "learning_rate": 0.000199903975466143, "loss": 1.6901, "step": 213 }, { "epoch": 0.043870438704387046, "grad_norm": 0.945359063406858, "learning_rate": 0.00019990104407122293, "loss": 1.6753, "step": 214 }, { "epoch": 0.044075440754407544, "grad_norm": 0.8947645059347635, "learning_rate": 0.00019989806862448243, "loss": 1.761, "step": 215 }, { "epoch": 0.04428044280442804, "grad_norm": 0.7859015453280959, "learning_rate": 0.00019989504912723346, "loss": 1.6966, "step": 216 }, { "epoch": 0.044485444854448546, "grad_norm": 0.9367836223750419, "learning_rate": 0.0001998919855808076, "loss": 1.7264, "step": 217 }, { "epoch": 0.04469044690446904, "grad_norm": 1.1755111563714267, "learning_rate": 0.00019988887798655562, "loss": 1.7141, "step": 218 }, { "epoch": 0.04489544895448955, "grad_norm": 0.9180246013524452, "learning_rate": 0.00019988572634584793, "loss": 1.7313, "step": 219 }, { "epoch": 0.045100451004510045, "grad_norm": 0.8905061476312363, "learning_rate": 0.00019988253066007417, "loss": 1.6064, "step": 220 }, { "epoch": 0.04530545305453054, "grad_norm": 0.9164868611871474, "learning_rate": 0.00019987929093064354, "loss": 1.6908, "step": 221 }, { "epoch": 0.04551045510455105, "grad_norm": 0.8809112992446848, "learning_rate": 0.00019987600715898462, "loss": 1.7134, "step": 222 }, { "epoch": 0.045715457154571544, "grad_norm": 0.8130758075460126, "learning_rate": 0.00019987267934654538, "loss": 1.6908, "step": 223 }, { "epoch": 0.04592045920459205, "grad_norm": 0.959569750555629, "learning_rate": 0.00019986930749479328, "loss": 1.7207, "step": 224 }, { "epoch": 0.046125461254612546, "grad_norm": 0.9399740349053441, "learning_rate": 0.00019986589160521509, "loss": 1.7304, "step": 225 }, { "epoch": 0.04633046330463304, "grad_norm": 0.9749818376521407, "learning_rate": 0.00019986243167931705, "loss": 1.7212, "step": 226 }, { "epoch": 0.04653546535465355, "grad_norm": 0.9480976624242922, "learning_rate": 0.00019985892771862493, "loss": 1.6559, "step": 227 }, { "epoch": 0.046740467404674045, "grad_norm": 0.8757724926796171, "learning_rate": 0.00019985537972468372, "loss": 1.6985, "step": 228 }, { "epoch": 0.04694546945469455, "grad_norm": 1.0392770845993302, "learning_rate": 0.000199851787699058, "loss": 1.6915, "step": 229 }, { "epoch": 0.04715047150471505, "grad_norm": 0.825488039548811, "learning_rate": 0.00019984815164333163, "loss": 1.6358, "step": 230 }, { "epoch": 0.047355473554735544, "grad_norm": 0.8603098218807111, "learning_rate": 0.00019984447155910797, "loss": 1.6625, "step": 231 }, { "epoch": 0.04756047560475605, "grad_norm": 1.0881930338910415, "learning_rate": 0.00019984074744800977, "loss": 1.744, "step": 232 }, { "epoch": 0.047765477654776546, "grad_norm": 0.9004790977146169, "learning_rate": 0.0001998369793116792, "loss": 1.6156, "step": 233 }, { "epoch": 0.04797047970479705, "grad_norm": 0.843192453484823, "learning_rate": 0.00019983316715177783, "loss": 1.7418, "step": 234 }, { "epoch": 0.04817548175481755, "grad_norm": 0.8986673302180079, "learning_rate": 0.0001998293109699866, "loss": 1.6954, "step": 235 }, { "epoch": 0.048380483804838045, "grad_norm": 0.9264618698411912, "learning_rate": 0.000199825410768006, "loss": 1.7137, "step": 236 }, { "epoch": 0.04858548585485855, "grad_norm": 0.8689597161934539, "learning_rate": 0.0001998214665475558, "loss": 1.6582, "step": 237 }, { "epoch": 0.04879048790487905, "grad_norm": 0.8772130277417363, "learning_rate": 0.00019981747831037522, "loss": 1.6941, "step": 238 }, { "epoch": 0.04899548995489955, "grad_norm": 0.8340591687106946, "learning_rate": 0.00019981344605822288, "loss": 1.6634, "step": 239 }, { "epoch": 0.04920049200492005, "grad_norm": 0.8401965767330165, "learning_rate": 0.00019980936979287686, "loss": 1.678, "step": 240 }, { "epoch": 0.049405494054940546, "grad_norm": 0.8245301241762782, "learning_rate": 0.00019980524951613456, "loss": 1.669, "step": 241 }, { "epoch": 0.04961049610496105, "grad_norm": 0.9437689960366863, "learning_rate": 0.00019980108522981284, "loss": 1.738, "step": 242 }, { "epoch": 0.04981549815498155, "grad_norm": 0.9763198363780384, "learning_rate": 0.000199796876935748, "loss": 1.668, "step": 243 }, { "epoch": 0.05002050020500205, "grad_norm": 0.8737629260795708, "learning_rate": 0.00019979262463579568, "loss": 1.6792, "step": 244 }, { "epoch": 0.05022550225502255, "grad_norm": 0.8069493184489265, "learning_rate": 0.00019978832833183097, "loss": 1.7035, "step": 245 }, { "epoch": 0.05043050430504305, "grad_norm": 0.8681544137254046, "learning_rate": 0.0001997839880257483, "loss": 1.6365, "step": 246 }, { "epoch": 0.05063550635506355, "grad_norm": 0.8722461307414754, "learning_rate": 0.0001997796037194616, "loss": 1.6715, "step": 247 }, { "epoch": 0.05084050840508405, "grad_norm": 0.7593083712776789, "learning_rate": 0.0001997751754149041, "loss": 1.6472, "step": 248 }, { "epoch": 0.051045510455104554, "grad_norm": 0.8123117432522353, "learning_rate": 0.00019977070311402853, "loss": 1.7593, "step": 249 }, { "epoch": 0.05125051250512505, "grad_norm": 0.7658545177246406, "learning_rate": 0.00019976618681880694, "loss": 1.6805, "step": 250 }, { "epoch": 0.05145551455514555, "grad_norm": 0.8504380870043369, "learning_rate": 0.00019976162653123083, "loss": 1.7382, "step": 251 }, { "epoch": 0.05166051660516605, "grad_norm": 0.8765622953723591, "learning_rate": 0.00019975702225331107, "loss": 1.7509, "step": 252 }, { "epoch": 0.05186551865518655, "grad_norm": 0.906069518948904, "learning_rate": 0.0001997523739870779, "loss": 1.6519, "step": 253 }, { "epoch": 0.052070520705207055, "grad_norm": 0.8265904007741197, "learning_rate": 0.0001997476817345811, "loss": 1.6882, "step": 254 }, { "epoch": 0.05227552275522755, "grad_norm": 0.8681084240084533, "learning_rate": 0.00019974294549788963, "loss": 1.6868, "step": 255 }, { "epoch": 0.05248052480524805, "grad_norm": 0.7676002631197196, "learning_rate": 0.00019973816527909198, "loss": 1.6594, "step": 256 }, { "epoch": 0.052685526855268554, "grad_norm": 0.9402313473097638, "learning_rate": 0.00019973334108029607, "loss": 1.6828, "step": 257 }, { "epoch": 0.05289052890528905, "grad_norm": 0.9680655375503149, "learning_rate": 0.00019972847290362905, "loss": 1.7108, "step": 258 }, { "epoch": 0.053095530955309556, "grad_norm": 0.8961177011725427, "learning_rate": 0.0001997235607512377, "loss": 1.669, "step": 259 }, { "epoch": 0.05330053300533005, "grad_norm": 0.8980329048421593, "learning_rate": 0.00019971860462528792, "loss": 1.6589, "step": 260 }, { "epoch": 0.05350553505535055, "grad_norm": 0.8786908981440502, "learning_rate": 0.00019971360452796522, "loss": 1.7079, "step": 261 }, { "epoch": 0.053710537105371055, "grad_norm": 0.8939300913741398, "learning_rate": 0.0001997085604614744, "loss": 1.6911, "step": 262 }, { "epoch": 0.05391553915539155, "grad_norm": 0.9058662042202257, "learning_rate": 0.0001997034724280396, "loss": 1.6606, "step": 263 }, { "epoch": 0.05412054120541206, "grad_norm": 0.916508823598797, "learning_rate": 0.0001996983404299045, "loss": 1.7122, "step": 264 }, { "epoch": 0.054325543255432554, "grad_norm": 0.8464400647917526, "learning_rate": 0.00019969316446933206, "loss": 1.8475, "step": 265 }, { "epoch": 0.05453054530545305, "grad_norm": 0.8758257718918342, "learning_rate": 0.00019968794454860463, "loss": 1.6787, "step": 266 }, { "epoch": 0.054735547355473556, "grad_norm": 0.7925659885251065, "learning_rate": 0.00019968268067002394, "loss": 1.6086, "step": 267 }, { "epoch": 0.05494054940549405, "grad_norm": 0.8450354500328823, "learning_rate": 0.00019967737283591114, "loss": 1.6648, "step": 268 }, { "epoch": 0.05514555145551456, "grad_norm": 0.9036819266280411, "learning_rate": 0.00019967202104860673, "loss": 1.6666, "step": 269 }, { "epoch": 0.055350553505535055, "grad_norm": 0.8565219317533235, "learning_rate": 0.00019966662531047065, "loss": 1.7605, "step": 270 }, { "epoch": 0.05555555555555555, "grad_norm": 0.9283608775604035, "learning_rate": 0.0001996611856238821, "loss": 1.6799, "step": 271 }, { "epoch": 0.05576055760557606, "grad_norm": 0.8540262917400903, "learning_rate": 0.0001996557019912398, "loss": 1.6752, "step": 272 }, { "epoch": 0.055965559655596554, "grad_norm": 0.797370579432595, "learning_rate": 0.00019965017441496175, "loss": 1.6608, "step": 273 }, { "epoch": 0.05617056170561706, "grad_norm": 0.8447672234496054, "learning_rate": 0.00019964460289748534, "loss": 1.6921, "step": 274 }, { "epoch": 0.056375563755637556, "grad_norm": 0.7526738281793146, "learning_rate": 0.00019963898744126743, "loss": 1.6387, "step": 275 }, { "epoch": 0.056580565805658053, "grad_norm": 0.8521597244164438, "learning_rate": 0.0001996333280487841, "loss": 1.7606, "step": 276 }, { "epoch": 0.05678556785567856, "grad_norm": 0.8451918881710495, "learning_rate": 0.00019962762472253097, "loss": 1.6698, "step": 277 }, { "epoch": 0.056990569905699055, "grad_norm": 0.7823593836795394, "learning_rate": 0.00019962187746502285, "loss": 1.6631, "step": 278 }, { "epoch": 0.05719557195571956, "grad_norm": 0.8325494714864385, "learning_rate": 0.0001996160862787941, "loss": 1.646, "step": 279 }, { "epoch": 0.05740057400574006, "grad_norm": 0.9395873033868068, "learning_rate": 0.0001996102511663983, "loss": 1.7329, "step": 280 }, { "epoch": 0.057605576055760555, "grad_norm": 0.90380560375249, "learning_rate": 0.00019960437213040853, "loss": 1.6352, "step": 281 }, { "epoch": 0.05781057810578106, "grad_norm": 0.7899481343839726, "learning_rate": 0.00019959844917341718, "loss": 1.6621, "step": 282 }, { "epoch": 0.058015580155801556, "grad_norm": 0.9016359282524641, "learning_rate": 0.0001995924822980359, "loss": 1.6813, "step": 283 }, { "epoch": 0.05822058220582206, "grad_norm": 0.7609504593299513, "learning_rate": 0.00019958647150689597, "loss": 1.6573, "step": 284 }, { "epoch": 0.05842558425584256, "grad_norm": 0.8335768095218871, "learning_rate": 0.00019958041680264777, "loss": 1.6838, "step": 285 }, { "epoch": 0.058630586305863056, "grad_norm": 0.8152716191306715, "learning_rate": 0.00019957431818796114, "loss": 1.7209, "step": 286 }, { "epoch": 0.05883558835588356, "grad_norm": 0.8147120853791615, "learning_rate": 0.0001995681756655254, "loss": 1.7223, "step": 287 }, { "epoch": 0.05904059040590406, "grad_norm": 0.9314119965471911, "learning_rate": 0.000199561989238049, "loss": 1.7462, "step": 288 }, { "epoch": 0.05924559245592456, "grad_norm": 0.848695964389857, "learning_rate": 0.0001995557589082599, "loss": 1.6657, "step": 289 }, { "epoch": 0.05945059450594506, "grad_norm": 0.8647246480101162, "learning_rate": 0.00019954948467890547, "loss": 1.7354, "step": 290 }, { "epoch": 0.05965559655596556, "grad_norm": 0.785338177840371, "learning_rate": 0.0001995431665527523, "loss": 1.6301, "step": 291 }, { "epoch": 0.05986059860598606, "grad_norm": 0.9148906004988794, "learning_rate": 0.00019953680453258635, "loss": 1.7205, "step": 292 }, { "epoch": 0.06006560065600656, "grad_norm": 0.8427839862925017, "learning_rate": 0.0001995303986212131, "loss": 1.6924, "step": 293 }, { "epoch": 0.06027060270602706, "grad_norm": 0.8366619935581524, "learning_rate": 0.00019952394882145717, "loss": 1.7019, "step": 294 }, { "epoch": 0.06047560475604756, "grad_norm": 0.9462889093224218, "learning_rate": 0.00019951745513616264, "loss": 1.6461, "step": 295 }, { "epoch": 0.06068060680606806, "grad_norm": 0.8181337956924931, "learning_rate": 0.00019951091756819297, "loss": 1.7102, "step": 296 }, { "epoch": 0.06088560885608856, "grad_norm": 0.8745010970962003, "learning_rate": 0.00019950433612043092, "loss": 1.6653, "step": 297 }, { "epoch": 0.06109061090610906, "grad_norm": 0.8691592008417229, "learning_rate": 0.0001994977107957786, "loss": 1.7435, "step": 298 }, { "epoch": 0.061295612956129564, "grad_norm": 0.8262244757756005, "learning_rate": 0.00019949104159715743, "loss": 1.7613, "step": 299 }, { "epoch": 0.06150061500615006, "grad_norm": 0.8767746516904414, "learning_rate": 0.0001994843285275083, "loss": 1.6388, "step": 300 }, { "epoch": 0.06170561705617056, "grad_norm": 0.735136392661012, "learning_rate": 0.00019947757158979136, "loss": 1.7056, "step": 301 }, { "epoch": 0.06191061910619106, "grad_norm": 0.8063600969530592, "learning_rate": 0.00019947077078698606, "loss": 1.6949, "step": 302 }, { "epoch": 0.06211562115621156, "grad_norm": 0.7916804685850515, "learning_rate": 0.00019946392612209127, "loss": 1.735, "step": 303 }, { "epoch": 0.062320623206232065, "grad_norm": 0.7717510489712706, "learning_rate": 0.00019945703759812519, "loss": 1.704, "step": 304 }, { "epoch": 0.06252562525625256, "grad_norm": 0.8214275311030498, "learning_rate": 0.00019945010521812536, "loss": 1.7546, "step": 305 }, { "epoch": 0.06273062730627306, "grad_norm": 0.9410459345695534, "learning_rate": 0.00019944312898514862, "loss": 1.6898, "step": 306 }, { "epoch": 0.06293562935629356, "grad_norm": 0.7562640868994822, "learning_rate": 0.00019943610890227115, "loss": 1.6685, "step": 307 }, { "epoch": 0.06314063140631407, "grad_norm": 0.8090080988717784, "learning_rate": 0.0001994290449725885, "loss": 1.7533, "step": 308 }, { "epoch": 0.06334563345633457, "grad_norm": 0.834008023305076, "learning_rate": 0.00019942193719921556, "loss": 1.7246, "step": 309 }, { "epoch": 0.06355063550635506, "grad_norm": 0.7497143199335253, "learning_rate": 0.00019941478558528655, "loss": 1.7394, "step": 310 }, { "epoch": 0.06375563755637556, "grad_norm": 0.7627945575098661, "learning_rate": 0.000199407590133955, "loss": 1.6969, "step": 311 }, { "epoch": 0.06396063960639606, "grad_norm": 0.9400529224551543, "learning_rate": 0.0001994003508483937, "loss": 1.751, "step": 312 }, { "epoch": 0.06416564165641657, "grad_norm": 0.8363970953361202, "learning_rate": 0.00019939306773179497, "loss": 1.664, "step": 313 }, { "epoch": 0.06437064370643707, "grad_norm": 0.7992189104603543, "learning_rate": 0.00019938574078737022, "loss": 1.7102, "step": 314 }, { "epoch": 0.06457564575645756, "grad_norm": 0.7383565024737827, "learning_rate": 0.00019937837001835038, "loss": 1.6802, "step": 315 }, { "epoch": 0.06478064780647806, "grad_norm": 0.8490497834561951, "learning_rate": 0.0001993709554279856, "loss": 1.6754, "step": 316 }, { "epoch": 0.06498564985649856, "grad_norm": 0.824252780929852, "learning_rate": 0.00019936349701954535, "loss": 1.6617, "step": 317 }, { "epoch": 0.06519065190651907, "grad_norm": 0.8708350937462481, "learning_rate": 0.0001993559947963185, "loss": 1.6481, "step": 318 }, { "epoch": 0.06539565395653957, "grad_norm": 0.9448886748422978, "learning_rate": 0.00019934844876161317, "loss": 1.6939, "step": 319 }, { "epoch": 0.06560065600656007, "grad_norm": 0.8886030172642768, "learning_rate": 0.00019934085891875678, "loss": 1.6466, "step": 320 }, { "epoch": 0.06580565805658056, "grad_norm": 0.8468417149960004, "learning_rate": 0.00019933322527109613, "loss": 1.7076, "step": 321 }, { "epoch": 0.06601066010660106, "grad_norm": 0.8356183208795587, "learning_rate": 0.0001993255478219973, "loss": 1.6777, "step": 322 }, { "epoch": 0.06621566215662157, "grad_norm": 0.8176615780960169, "learning_rate": 0.00019931782657484578, "loss": 1.6329, "step": 323 }, { "epoch": 0.06642066420664207, "grad_norm": 0.9234714415387821, "learning_rate": 0.00019931006153304617, "loss": 1.7228, "step": 324 }, { "epoch": 0.06662566625666257, "grad_norm": 0.8750201930968287, "learning_rate": 0.00019930225270002255, "loss": 1.7187, "step": 325 }, { "epoch": 0.06683066830668306, "grad_norm": 0.8043754773013777, "learning_rate": 0.0001992944000792183, "loss": 1.6983, "step": 326 }, { "epoch": 0.06703567035670356, "grad_norm": 1.176385094398134, "learning_rate": 0.000199286503674096, "loss": 1.6879, "step": 327 }, { "epoch": 0.06724067240672407, "grad_norm": 0.8257921560671576, "learning_rate": 0.00019927856348813766, "loss": 1.7166, "step": 328 }, { "epoch": 0.06744567445674457, "grad_norm": 0.7395939677267889, "learning_rate": 0.0001992705795248445, "loss": 1.7063, "step": 329 }, { "epoch": 0.06765067650676507, "grad_norm": 0.8505478671569628, "learning_rate": 0.00019926255178773713, "loss": 1.6846, "step": 330 }, { "epoch": 0.06785567855678556, "grad_norm": 1.0025970595269902, "learning_rate": 0.00019925448028035536, "loss": 1.7244, "step": 331 }, { "epoch": 0.06806068060680606, "grad_norm": 0.8071768060875504, "learning_rate": 0.00019924636500625838, "loss": 1.6797, "step": 332 }, { "epoch": 0.06826568265682657, "grad_norm": 0.8500723882929231, "learning_rate": 0.0001992382059690247, "loss": 1.7133, "step": 333 }, { "epoch": 0.06847068470684707, "grad_norm": 0.8353911832574507, "learning_rate": 0.00019923000317225204, "loss": 1.6779, "step": 334 }, { "epoch": 0.06867568675686757, "grad_norm": 0.7469721061109655, "learning_rate": 0.00019922175661955748, "loss": 1.5978, "step": 335 }, { "epoch": 0.06888068880688807, "grad_norm": 0.8370216466399697, "learning_rate": 0.00019921346631457737, "loss": 1.6644, "step": 336 }, { "epoch": 0.06908569085690856, "grad_norm": 0.7795077165915985, "learning_rate": 0.00019920513226096733, "loss": 1.6899, "step": 337 }, { "epoch": 0.06929069290692907, "grad_norm": 0.7989381240543669, "learning_rate": 0.00019919675446240236, "loss": 1.6641, "step": 338 }, { "epoch": 0.06949569495694957, "grad_norm": 0.8202459565119227, "learning_rate": 0.00019918833292257662, "loss": 1.764, "step": 339 }, { "epoch": 0.06970069700697007, "grad_norm": 0.9642826930192796, "learning_rate": 0.00019917986764520363, "loss": 1.7256, "step": 340 }, { "epoch": 0.06990569905699057, "grad_norm": 0.7751249580268084, "learning_rate": 0.00019917135863401628, "loss": 1.724, "step": 341 }, { "epoch": 0.07011070110701106, "grad_norm": 0.8240115578533698, "learning_rate": 0.0001991628058927666, "loss": 1.7282, "step": 342 }, { "epoch": 0.07031570315703158, "grad_norm": 0.8694534341023328, "learning_rate": 0.0001991542094252259, "loss": 1.7179, "step": 343 }, { "epoch": 0.07052070520705207, "grad_norm": 0.7949885187461482, "learning_rate": 0.00019914556923518494, "loss": 1.6659, "step": 344 }, { "epoch": 0.07072570725707257, "grad_norm": 0.7884711254464456, "learning_rate": 0.00019913688532645357, "loss": 1.6789, "step": 345 }, { "epoch": 0.07093070930709307, "grad_norm": 0.7936316579720082, "learning_rate": 0.00019912815770286107, "loss": 1.6737, "step": 346 }, { "epoch": 0.07113571135711357, "grad_norm": 0.7975937243843574, "learning_rate": 0.00019911938636825585, "loss": 1.7021, "step": 347 }, { "epoch": 0.07134071340713408, "grad_norm": 0.768223286707478, "learning_rate": 0.0001991105713265057, "loss": 1.7509, "step": 348 }, { "epoch": 0.07154571545715457, "grad_norm": 0.7908301258100003, "learning_rate": 0.0001991017125814977, "loss": 1.6639, "step": 349 }, { "epoch": 0.07175071750717507, "grad_norm": 0.7863374328870854, "learning_rate": 0.00019909281013713806, "loss": 1.6864, "step": 350 }, { "epoch": 0.07195571955719557, "grad_norm": 0.7432698938040034, "learning_rate": 0.00019908386399735241, "loss": 1.7412, "step": 351 }, { "epoch": 0.07216072160721607, "grad_norm": 0.816977470055735, "learning_rate": 0.00019907487416608564, "loss": 1.7051, "step": 352 }, { "epoch": 0.07236572365723658, "grad_norm": 0.7736641813354557, "learning_rate": 0.00019906584064730175, "loss": 1.7509, "step": 353 }, { "epoch": 0.07257072570725707, "grad_norm": 0.7890465259589068, "learning_rate": 0.00019905676344498418, "loss": 1.5788, "step": 354 }, { "epoch": 0.07277572775727757, "grad_norm": 0.8288921793208962, "learning_rate": 0.00019904764256313556, "loss": 1.7062, "step": 355 }, { "epoch": 0.07298072980729807, "grad_norm": 0.8620745516366974, "learning_rate": 0.00019903847800577777, "loss": 1.7138, "step": 356 }, { "epoch": 0.07318573185731857, "grad_norm": 0.7864264586400553, "learning_rate": 0.00019902926977695195, "loss": 1.6925, "step": 357 }, { "epoch": 0.07339073390733908, "grad_norm": 0.7068687312949009, "learning_rate": 0.00019902001788071853, "loss": 1.7022, "step": 358 }, { "epoch": 0.07359573595735958, "grad_norm": 0.8337630012991832, "learning_rate": 0.0001990107223211572, "loss": 1.6438, "step": 359 }, { "epoch": 0.07380073800738007, "grad_norm": 0.8748687360789178, "learning_rate": 0.00019900138310236683, "loss": 1.7435, "step": 360 }, { "epoch": 0.07400574005740057, "grad_norm": 0.8398150383516015, "learning_rate": 0.00019899200022846562, "loss": 1.7629, "step": 361 }, { "epoch": 0.07421074210742107, "grad_norm": 0.7886652412451903, "learning_rate": 0.00019898257370359098, "loss": 1.7215, "step": 362 }, { "epoch": 0.07441574415744158, "grad_norm": 0.833456831173047, "learning_rate": 0.00019897310353189957, "loss": 1.7028, "step": 363 }, { "epoch": 0.07462074620746208, "grad_norm": 0.7372738914896392, "learning_rate": 0.0001989635897175673, "loss": 1.698, "step": 364 }, { "epoch": 0.07482574825748257, "grad_norm": 0.8129197926773489, "learning_rate": 0.0001989540322647894, "loss": 1.7716, "step": 365 }, { "epoch": 0.07503075030750307, "grad_norm": 0.7706484417008176, "learning_rate": 0.00019894443117778022, "loss": 1.632, "step": 366 }, { "epoch": 0.07523575235752357, "grad_norm": 0.7547385265793601, "learning_rate": 0.00019893478646077338, "loss": 1.6862, "step": 367 }, { "epoch": 0.07544075440754408, "grad_norm": 0.7988981756827476, "learning_rate": 0.0001989250981180218, "loss": 1.7207, "step": 368 }, { "epoch": 0.07564575645756458, "grad_norm": 0.8369643222520285, "learning_rate": 0.00019891536615379755, "loss": 1.6916, "step": 369 }, { "epoch": 0.07585075850758508, "grad_norm": 0.7968054048841254, "learning_rate": 0.00019890559057239205, "loss": 1.6586, "step": 370 }, { "epoch": 0.07605576055760557, "grad_norm": 0.7636168310737993, "learning_rate": 0.00019889577137811583, "loss": 1.6611, "step": 371 }, { "epoch": 0.07626076260762607, "grad_norm": 0.7681478549081471, "learning_rate": 0.00019888590857529875, "loss": 1.6902, "step": 372 }, { "epoch": 0.07646576465764658, "grad_norm": 0.7249456750008301, "learning_rate": 0.00019887600216828984, "loss": 1.6903, "step": 373 }, { "epoch": 0.07667076670766708, "grad_norm": 0.7452739123590649, "learning_rate": 0.00019886605216145738, "loss": 1.6806, "step": 374 }, { "epoch": 0.07687576875768758, "grad_norm": 0.868138692805327, "learning_rate": 0.00019885605855918885, "loss": 1.7422, "step": 375 }, { "epoch": 0.07708077080770807, "grad_norm": 0.8676648084050012, "learning_rate": 0.00019884602136589102, "loss": 1.6858, "step": 376 }, { "epoch": 0.07728577285772857, "grad_norm": 0.816884434773197, "learning_rate": 0.0001988359405859898, "loss": 1.7284, "step": 377 }, { "epoch": 0.07749077490774908, "grad_norm": 0.8149320377988114, "learning_rate": 0.00019882581622393034, "loss": 1.6996, "step": 378 }, { "epoch": 0.07769577695776958, "grad_norm": 0.7684164517583669, "learning_rate": 0.00019881564828417707, "loss": 1.6851, "step": 379 }, { "epoch": 0.07790077900779008, "grad_norm": 0.8709540920827276, "learning_rate": 0.00019880543677121358, "loss": 1.6891, "step": 380 }, { "epoch": 0.07810578105781057, "grad_norm": 0.7534280894079938, "learning_rate": 0.00019879518168954265, "loss": 1.6665, "step": 381 }, { "epoch": 0.07831078310783107, "grad_norm": 0.8437338486777195, "learning_rate": 0.00019878488304368635, "loss": 1.7022, "step": 382 }, { "epoch": 0.07851578515785158, "grad_norm": 0.8125361927980784, "learning_rate": 0.00019877454083818585, "loss": 1.7219, "step": 383 }, { "epoch": 0.07872078720787208, "grad_norm": 0.75216898844302, "learning_rate": 0.00019876415507760165, "loss": 1.6029, "step": 384 }, { "epoch": 0.07892578925789258, "grad_norm": 0.8817636028790754, "learning_rate": 0.00019875372576651337, "loss": 1.7671, "step": 385 }, { "epoch": 0.07913079130791308, "grad_norm": 0.8658881974299575, "learning_rate": 0.00019874325290951988, "loss": 1.7312, "step": 386 }, { "epoch": 0.07933579335793357, "grad_norm": 0.7908139405025414, "learning_rate": 0.00019873273651123925, "loss": 1.6827, "step": 387 }, { "epoch": 0.07954079540795408, "grad_norm": 0.8606154137481271, "learning_rate": 0.0001987221765763087, "loss": 1.7425, "step": 388 }, { "epoch": 0.07974579745797458, "grad_norm": 0.7930288347953397, "learning_rate": 0.00019871157310938467, "loss": 1.6605, "step": 389 }, { "epoch": 0.07995079950799508, "grad_norm": 0.8315190946529247, "learning_rate": 0.00019870092611514286, "loss": 1.697, "step": 390 }, { "epoch": 0.08015580155801558, "grad_norm": 0.7642307128697646, "learning_rate": 0.00019869023559827807, "loss": 1.667, "step": 391 }, { "epoch": 0.08036080360803607, "grad_norm": 0.8535484452935961, "learning_rate": 0.00019867950156350435, "loss": 1.6685, "step": 392 }, { "epoch": 0.08056580565805659, "grad_norm": 0.8219053835318966, "learning_rate": 0.00019866872401555488, "loss": 1.6292, "step": 393 }, { "epoch": 0.08077080770807708, "grad_norm": 0.7499777228628962, "learning_rate": 0.00019865790295918212, "loss": 1.7191, "step": 394 }, { "epoch": 0.08097580975809758, "grad_norm": 0.8495663893303287, "learning_rate": 0.00019864703839915767, "loss": 1.6794, "step": 395 }, { "epoch": 0.08118081180811808, "grad_norm": 0.7346984863887028, "learning_rate": 0.00019863613034027224, "loss": 1.6282, "step": 396 }, { "epoch": 0.08138581385813858, "grad_norm": 0.8050009341689788, "learning_rate": 0.00019862517878733586, "loss": 1.7637, "step": 397 }, { "epoch": 0.08159081590815909, "grad_norm": 0.7988611683570271, "learning_rate": 0.00019861418374517764, "loss": 1.7244, "step": 398 }, { "epoch": 0.08179581795817958, "grad_norm": 0.7577691801900096, "learning_rate": 0.0001986031452186459, "loss": 1.7389, "step": 399 }, { "epoch": 0.08200082000820008, "grad_norm": 0.7556795415346234, "learning_rate": 0.0001985920632126081, "loss": 1.6342, "step": 400 }, { "epoch": 0.08220582205822058, "grad_norm": 0.8482017309815681, "learning_rate": 0.0001985809377319509, "loss": 1.7032, "step": 401 }, { "epoch": 0.08241082410824108, "grad_norm": 0.787147249291876, "learning_rate": 0.00019856976878158023, "loss": 1.7228, "step": 402 }, { "epoch": 0.08261582615826159, "grad_norm": 0.8036672897169851, "learning_rate": 0.00019855855636642094, "loss": 1.6603, "step": 403 }, { "epoch": 0.08282082820828209, "grad_norm": 0.7641728093193555, "learning_rate": 0.00019854730049141732, "loss": 1.6314, "step": 404 }, { "epoch": 0.08302583025830258, "grad_norm": 0.7653906874680237, "learning_rate": 0.00019853600116153262, "loss": 1.72, "step": 405 }, { "epoch": 0.08323083230832308, "grad_norm": 0.7857851929745933, "learning_rate": 0.00019852465838174937, "loss": 1.6812, "step": 406 }, { "epoch": 0.08343583435834358, "grad_norm": 0.8000632649586302, "learning_rate": 0.0001985132721570692, "loss": 1.6784, "step": 407 }, { "epoch": 0.08364083640836409, "grad_norm": 0.7996010748331727, "learning_rate": 0.00019850184249251294, "loss": 1.7292, "step": 408 }, { "epoch": 0.08384583845838459, "grad_norm": 0.8035675536154948, "learning_rate": 0.00019849036939312056, "loss": 1.7375, "step": 409 }, { "epoch": 0.08405084050840508, "grad_norm": 0.8122035791807035, "learning_rate": 0.00019847885286395113, "loss": 1.6656, "step": 410 }, { "epoch": 0.08425584255842558, "grad_norm": 0.7621885251273879, "learning_rate": 0.00019846729291008293, "loss": 1.7645, "step": 411 }, { "epoch": 0.08446084460844608, "grad_norm": 0.7011620695678992, "learning_rate": 0.0001984556895366134, "loss": 1.6803, "step": 412 }, { "epoch": 0.08466584665846659, "grad_norm": 0.8582839410110316, "learning_rate": 0.0001984440427486591, "loss": 1.7871, "step": 413 }, { "epoch": 0.08487084870848709, "grad_norm": 0.8347710297475587, "learning_rate": 0.00019843235255135572, "loss": 1.7437, "step": 414 }, { "epoch": 0.08507585075850758, "grad_norm": 0.8325109206375484, "learning_rate": 0.00019842061894985807, "loss": 1.7213, "step": 415 }, { "epoch": 0.08528085280852808, "grad_norm": 0.8158999680605145, "learning_rate": 0.00019840884194934018, "loss": 1.7098, "step": 416 }, { "epoch": 0.08548585485854858, "grad_norm": 0.7540846343454949, "learning_rate": 0.0001983970215549952, "loss": 1.6895, "step": 417 }, { "epoch": 0.08569085690856909, "grad_norm": 0.7971443040047358, "learning_rate": 0.0001983851577720353, "loss": 1.6831, "step": 418 }, { "epoch": 0.08589585895858959, "grad_norm": 0.8281239117924155, "learning_rate": 0.00019837325060569197, "loss": 1.6004, "step": 419 }, { "epoch": 0.08610086100861009, "grad_norm": 0.7655996559675265, "learning_rate": 0.00019836130006121563, "loss": 1.6425, "step": 420 }, { "epoch": 0.08630586305863058, "grad_norm": 0.781090999746419, "learning_rate": 0.00019834930614387602, "loss": 1.6565, "step": 421 }, { "epoch": 0.08651086510865108, "grad_norm": 0.7906233152409404, "learning_rate": 0.00019833726885896185, "loss": 1.6829, "step": 422 }, { "epoch": 0.08671586715867159, "grad_norm": 0.8806550648859682, "learning_rate": 0.00019832518821178102, "loss": 1.6959, "step": 423 }, { "epoch": 0.08692086920869209, "grad_norm": 0.8347538964383344, "learning_rate": 0.00019831306420766057, "loss": 1.7088, "step": 424 }, { "epoch": 0.08712587125871259, "grad_norm": 0.7325629458284244, "learning_rate": 0.00019830089685194663, "loss": 1.6846, "step": 425 }, { "epoch": 0.08733087330873308, "grad_norm": 0.765571450743036, "learning_rate": 0.00019828868615000443, "loss": 1.6897, "step": 426 }, { "epoch": 0.08753587535875358, "grad_norm": 0.8480287539216526, "learning_rate": 0.00019827643210721838, "loss": 1.7078, "step": 427 }, { "epoch": 0.08774087740877409, "grad_norm": 0.7425628970025092, "learning_rate": 0.00019826413472899193, "loss": 1.633, "step": 428 }, { "epoch": 0.08794587945879459, "grad_norm": 0.7986674418566917, "learning_rate": 0.0001982517940207476, "loss": 1.7174, "step": 429 }, { "epoch": 0.08815088150881509, "grad_norm": 0.8721087815441965, "learning_rate": 0.00019823940998792722, "loss": 1.7366, "step": 430 }, { "epoch": 0.08835588355883559, "grad_norm": 0.7875399357031263, "learning_rate": 0.00019822698263599145, "loss": 1.5826, "step": 431 }, { "epoch": 0.08856088560885608, "grad_norm": 0.8162070818499303, "learning_rate": 0.00019821451197042026, "loss": 1.709, "step": 432 }, { "epoch": 0.0887658876588766, "grad_norm": 0.7635777512510616, "learning_rate": 0.00019820199799671265, "loss": 1.6264, "step": 433 }, { "epoch": 0.08897088970889709, "grad_norm": 0.8896320078667912, "learning_rate": 0.0001981894407203867, "loss": 1.6593, "step": 434 }, { "epoch": 0.08917589175891759, "grad_norm": 0.8711397890377418, "learning_rate": 0.0001981768401469796, "loss": 1.7198, "step": 435 }, { "epoch": 0.08938089380893809, "grad_norm": 0.8172766441468787, "learning_rate": 0.00019816419628204758, "loss": 1.7353, "step": 436 }, { "epoch": 0.08958589585895858, "grad_norm": 0.7827133814345119, "learning_rate": 0.00019815150913116608, "loss": 1.7884, "step": 437 }, { "epoch": 0.0897908979089791, "grad_norm": 0.8487907274850991, "learning_rate": 0.00019813877869992954, "loss": 1.7279, "step": 438 }, { "epoch": 0.08999589995899959, "grad_norm": 0.8387400254977331, "learning_rate": 0.0001981260049939515, "loss": 1.6778, "step": 439 }, { "epoch": 0.09020090200902009, "grad_norm": 0.8368595835848883, "learning_rate": 0.00019811318801886456, "loss": 1.714, "step": 440 }, { "epoch": 0.09040590405904059, "grad_norm": 0.8700540729481459, "learning_rate": 0.00019810032778032043, "loss": 1.7357, "step": 441 }, { "epoch": 0.09061090610906108, "grad_norm": 0.8994143848786682, "learning_rate": 0.00019808742428398994, "loss": 1.7405, "step": 442 }, { "epoch": 0.0908159081590816, "grad_norm": 0.7740404614686632, "learning_rate": 0.00019807447753556287, "loss": 1.7154, "step": 443 }, { "epoch": 0.0910209102091021, "grad_norm": 0.7928740791403559, "learning_rate": 0.0001980614875407482, "loss": 1.6624, "step": 444 }, { "epoch": 0.09122591225912259, "grad_norm": 0.7870107002717357, "learning_rate": 0.00019804845430527391, "loss": 1.708, "step": 445 }, { "epoch": 0.09143091430914309, "grad_norm": 0.7895707063600054, "learning_rate": 0.00019803537783488707, "loss": 1.7503, "step": 446 }, { "epoch": 0.09163591635916359, "grad_norm": 0.8489490388680045, "learning_rate": 0.0001980222581353538, "loss": 1.7634, "step": 447 }, { "epoch": 0.0918409184091841, "grad_norm": 0.8287261838512044, "learning_rate": 0.00019800909521245933, "loss": 1.6905, "step": 448 }, { "epoch": 0.0920459204592046, "grad_norm": 0.7462095172109476, "learning_rate": 0.00019799588907200782, "loss": 1.8165, "step": 449 }, { "epoch": 0.09225092250922509, "grad_norm": 0.7309335585458496, "learning_rate": 0.00019798263971982266, "loss": 1.6134, "step": 450 }, { "epoch": 0.09245592455924559, "grad_norm": 0.8290559074120037, "learning_rate": 0.0001979693471617462, "loss": 1.7024, "step": 451 }, { "epoch": 0.09266092660926609, "grad_norm": 0.7986537368313037, "learning_rate": 0.00019795601140363983, "loss": 1.7512, "step": 452 }, { "epoch": 0.0928659286592866, "grad_norm": 0.7763318066313323, "learning_rate": 0.00019794263245138404, "loss": 1.7358, "step": 453 }, { "epoch": 0.0930709307093071, "grad_norm": 0.8431382846534751, "learning_rate": 0.00019792921031087829, "loss": 1.6584, "step": 454 }, { "epoch": 0.09327593275932759, "grad_norm": 0.8732411541366695, "learning_rate": 0.0001979157449880412, "loss": 1.6535, "step": 455 }, { "epoch": 0.09348093480934809, "grad_norm": 0.7938674820974778, "learning_rate": 0.00019790223648881035, "loss": 1.6841, "step": 456 }, { "epoch": 0.09368593685936859, "grad_norm": 0.7838316116341935, "learning_rate": 0.00019788868481914233, "loss": 1.6705, "step": 457 }, { "epoch": 0.0938909389093891, "grad_norm": 0.8464944297811771, "learning_rate": 0.00019787508998501285, "loss": 1.6512, "step": 458 }, { "epoch": 0.0940959409594096, "grad_norm": 0.7274538896666675, "learning_rate": 0.00019786145199241658, "loss": 1.6973, "step": 459 }, { "epoch": 0.0943009430094301, "grad_norm": 0.8452629491526242, "learning_rate": 0.00019784777084736732, "loss": 1.7159, "step": 460 }, { "epoch": 0.09450594505945059, "grad_norm": 0.8259490676322622, "learning_rate": 0.00019783404655589776, "loss": 1.6919, "step": 461 }, { "epoch": 0.09471094710947109, "grad_norm": 0.7959521989923912, "learning_rate": 0.00019782027912405975, "loss": 1.7484, "step": 462 }, { "epoch": 0.0949159491594916, "grad_norm": 0.8355678732640808, "learning_rate": 0.00019780646855792404, "loss": 1.6646, "step": 463 }, { "epoch": 0.0951209512095121, "grad_norm": 0.8871398787753433, "learning_rate": 0.0001977926148635805, "loss": 1.7233, "step": 464 }, { "epoch": 0.0953259532595326, "grad_norm": 0.8427949928791173, "learning_rate": 0.000197778718047138, "loss": 1.7198, "step": 465 }, { "epoch": 0.09553095530955309, "grad_norm": 0.8112619766607961, "learning_rate": 0.00019776477811472436, "loss": 1.6703, "step": 466 }, { "epoch": 0.09573595735957359, "grad_norm": 0.7727413832903378, "learning_rate": 0.00019775079507248645, "loss": 1.6463, "step": 467 }, { "epoch": 0.0959409594095941, "grad_norm": 0.8525189308487066, "learning_rate": 0.00019773676892659024, "loss": 1.6884, "step": 468 }, { "epoch": 0.0961459614596146, "grad_norm": 0.7999931325392722, "learning_rate": 0.0001977226996832205, "loss": 1.6569, "step": 469 }, { "epoch": 0.0963509635096351, "grad_norm": 0.8190908180073811, "learning_rate": 0.00019770858734858126, "loss": 1.7679, "step": 470 }, { "epoch": 0.0965559655596556, "grad_norm": 0.7800499019875347, "learning_rate": 0.0001976944319288953, "loss": 1.624, "step": 471 }, { "epoch": 0.09676096760967609, "grad_norm": 0.7463117231647134, "learning_rate": 0.00019768023343040455, "loss": 1.6632, "step": 472 }, { "epoch": 0.0969659696596966, "grad_norm": 0.7659423912829482, "learning_rate": 0.00019766599185936997, "loss": 1.6501, "step": 473 }, { "epoch": 0.0971709717097171, "grad_norm": 0.8057912288118586, "learning_rate": 0.00019765170722207135, "loss": 1.7021, "step": 474 }, { "epoch": 0.0973759737597376, "grad_norm": 0.7579472265489474, "learning_rate": 0.00019763737952480762, "loss": 1.6855, "step": 475 }, { "epoch": 0.0975809758097581, "grad_norm": 0.795453799926076, "learning_rate": 0.00019762300877389666, "loss": 1.6781, "step": 476 }, { "epoch": 0.09778597785977859, "grad_norm": 0.7803483098398056, "learning_rate": 0.00019760859497567528, "loss": 1.6827, "step": 477 }, { "epoch": 0.0979909799097991, "grad_norm": 0.7606664546018259, "learning_rate": 0.00019759413813649933, "loss": 1.7018, "step": 478 }, { "epoch": 0.0981959819598196, "grad_norm": 0.8243364804331386, "learning_rate": 0.00019757963826274357, "loss": 1.6747, "step": 479 }, { "epoch": 0.0984009840098401, "grad_norm": 0.8193616865124249, "learning_rate": 0.00019756509536080185, "loss": 1.6041, "step": 480 }, { "epoch": 0.0986059860598606, "grad_norm": 0.7639841242141624, "learning_rate": 0.0001975505094370869, "loss": 1.694, "step": 481 }, { "epoch": 0.09881098810988109, "grad_norm": 0.8576414677090153, "learning_rate": 0.00019753588049803046, "loss": 1.734, "step": 482 }, { "epoch": 0.0990159901599016, "grad_norm": 0.8721771050311198, "learning_rate": 0.00019752120855008324, "loss": 1.647, "step": 483 }, { "epoch": 0.0992209922099221, "grad_norm": 0.8216809030017547, "learning_rate": 0.00019750649359971488, "loss": 1.676, "step": 484 }, { "epoch": 0.0994259942599426, "grad_norm": 0.741288134401714, "learning_rate": 0.000197491735653414, "loss": 1.6838, "step": 485 }, { "epoch": 0.0996309963099631, "grad_norm": 0.7470529216950845, "learning_rate": 0.00019747693471768818, "loss": 1.7296, "step": 486 }, { "epoch": 0.0998359983599836, "grad_norm": 0.7504402483351444, "learning_rate": 0.000197462090799064, "loss": 1.7273, "step": 487 }, { "epoch": 0.1000410004100041, "grad_norm": 0.7446405182211473, "learning_rate": 0.00019744720390408687, "loss": 1.668, "step": 488 }, { "epoch": 0.1002460024600246, "grad_norm": 0.7099938307259624, "learning_rate": 0.00019743227403932134, "loss": 1.7785, "step": 489 }, { "epoch": 0.1004510045100451, "grad_norm": 0.7746770926232007, "learning_rate": 0.00019741730121135075, "loss": 1.7409, "step": 490 }, { "epoch": 0.1006560065600656, "grad_norm": 0.8024043188757939, "learning_rate": 0.0001974022854267774, "loss": 1.6947, "step": 491 }, { "epoch": 0.1008610086100861, "grad_norm": 0.7006027936532472, "learning_rate": 0.00019738722669222268, "loss": 1.6709, "step": 492 }, { "epoch": 0.1010660106601066, "grad_norm": 0.7513617348789843, "learning_rate": 0.00019737212501432666, "loss": 1.742, "step": 493 }, { "epoch": 0.1012710127101271, "grad_norm": 0.7470340242865157, "learning_rate": 0.0001973569803997486, "loss": 1.6897, "step": 494 }, { "epoch": 0.1014760147601476, "grad_norm": 0.8020519910234862, "learning_rate": 0.00019734179285516655, "loss": 1.7027, "step": 495 }, { "epoch": 0.1016810168101681, "grad_norm": 0.7248567489397829, "learning_rate": 0.00019732656238727754, "loss": 1.6378, "step": 496 }, { "epoch": 0.1018860188601886, "grad_norm": 0.763222840367488, "learning_rate": 0.0001973112890027975, "loss": 1.6748, "step": 497 }, { "epoch": 0.10209102091020911, "grad_norm": 0.7729051614464107, "learning_rate": 0.00019729597270846133, "loss": 1.7187, "step": 498 }, { "epoch": 0.1022960229602296, "grad_norm": 0.7125702383653151, "learning_rate": 0.00019728061351102273, "loss": 1.6378, "step": 499 }, { "epoch": 0.1025010250102501, "grad_norm": 0.7155720204867984, "learning_rate": 0.00019726521141725454, "loss": 1.6643, "step": 500 }, { "epoch": 0.1027060270602706, "grad_norm": 0.7089598253301342, "learning_rate": 0.00019724976643394827, "loss": 1.6806, "step": 501 }, { "epoch": 0.1029110291102911, "grad_norm": 0.7082371126527723, "learning_rate": 0.00019723427856791452, "loss": 1.6904, "step": 502 }, { "epoch": 0.10311603116031161, "grad_norm": 0.7447287455120091, "learning_rate": 0.00019721874782598273, "loss": 1.6732, "step": 503 }, { "epoch": 0.1033210332103321, "grad_norm": 0.7286536803119806, "learning_rate": 0.00019720317421500122, "loss": 1.645, "step": 504 }, { "epoch": 0.1035260352603526, "grad_norm": 2.770519946109433, "learning_rate": 0.0001971875577418373, "loss": 1.7472, "step": 505 }, { "epoch": 0.1037310373103731, "grad_norm": 0.783805035999857, "learning_rate": 0.00019717189841337703, "loss": 1.7358, "step": 506 }, { "epoch": 0.1039360393603936, "grad_norm": 0.7637995545329411, "learning_rate": 0.00019715619623652554, "loss": 1.6203, "step": 507 }, { "epoch": 0.10414104141041411, "grad_norm": 0.6924701868056873, "learning_rate": 0.00019714045121820676, "loss": 1.6707, "step": 508 }, { "epoch": 0.1043460434604346, "grad_norm": 0.8118324325378106, "learning_rate": 0.00019712466336536353, "loss": 1.737, "step": 509 }, { "epoch": 0.1045510455104551, "grad_norm": 0.7902623460751316, "learning_rate": 0.0001971088326849576, "loss": 1.7185, "step": 510 }, { "epoch": 0.1047560475604756, "grad_norm": 0.7316929483305475, "learning_rate": 0.0001970929591839695, "loss": 1.6915, "step": 511 }, { "epoch": 0.1049610496104961, "grad_norm": 0.8508758741225556, "learning_rate": 0.0001970770428693988, "loss": 1.669, "step": 512 }, { "epoch": 0.10516605166051661, "grad_norm": 0.737216755724445, "learning_rate": 0.0001970610837482638, "loss": 1.6925, "step": 513 }, { "epoch": 0.10537105371053711, "grad_norm": 0.7490703230727511, "learning_rate": 0.00019704508182760185, "loss": 1.7485, "step": 514 }, { "epoch": 0.1055760557605576, "grad_norm": 0.7208283831084737, "learning_rate": 0.00019702903711446898, "loss": 1.6914, "step": 515 }, { "epoch": 0.1057810578105781, "grad_norm": 0.7705109170182252, "learning_rate": 0.0001970129496159402, "loss": 1.7231, "step": 516 }, { "epoch": 0.1059860598605986, "grad_norm": 0.7913632313341308, "learning_rate": 0.0001969968193391094, "loss": 1.6956, "step": 517 }, { "epoch": 0.10619106191061911, "grad_norm": 0.7524549480699058, "learning_rate": 0.00019698064629108928, "loss": 1.6691, "step": 518 }, { "epoch": 0.10639606396063961, "grad_norm": 0.8414835918561137, "learning_rate": 0.0001969644304790114, "loss": 1.6683, "step": 519 }, { "epoch": 0.1066010660106601, "grad_norm": 0.8179155243555126, "learning_rate": 0.0001969481719100262, "loss": 1.7008, "step": 520 }, { "epoch": 0.1068060680606806, "grad_norm": 0.7488520165577811, "learning_rate": 0.00019693187059130303, "loss": 1.7221, "step": 521 }, { "epoch": 0.1070110701107011, "grad_norm": 0.7098979253267371, "learning_rate": 0.00019691552653002992, "loss": 1.6796, "step": 522 }, { "epoch": 0.10721607216072161, "grad_norm": 0.7276703610950348, "learning_rate": 0.00019689913973341397, "loss": 1.6978, "step": 523 }, { "epoch": 0.10742107421074211, "grad_norm": 1.4401586354659643, "learning_rate": 0.00019688271020868093, "loss": 1.7208, "step": 524 }, { "epoch": 0.10762607626076261, "grad_norm": 0.7654467696451249, "learning_rate": 0.0001968662379630755, "loss": 1.7143, "step": 525 }, { "epoch": 0.1078310783107831, "grad_norm": 0.7932918174797919, "learning_rate": 0.0001968497230038612, "loss": 1.7658, "step": 526 }, { "epoch": 0.1080360803608036, "grad_norm": 0.7214865764464613, "learning_rate": 0.00019683316533832042, "loss": 1.7363, "step": 527 }, { "epoch": 0.10824108241082411, "grad_norm": 0.7710833669927666, "learning_rate": 0.00019681656497375424, "loss": 1.6911, "step": 528 }, { "epoch": 0.10844608446084461, "grad_norm": 2.082117028514879, "learning_rate": 0.00019679992191748275, "loss": 1.7145, "step": 529 }, { "epoch": 0.10865108651086511, "grad_norm": 1.5991309954490074, "learning_rate": 0.00019678323617684473, "loss": 1.7231, "step": 530 }, { "epoch": 0.1088560885608856, "grad_norm": 2.3948981536411247, "learning_rate": 0.00019676650775919788, "loss": 1.6851, "step": 531 }, { "epoch": 0.1090610906109061, "grad_norm": 3.4599222886447807, "learning_rate": 0.0001967497366719186, "loss": 1.8015, "step": 532 }, { "epoch": 0.10926609266092661, "grad_norm": 1.178961869403467, "learning_rate": 0.0001967329229224023, "loss": 1.6913, "step": 533 }, { "epoch": 0.10947109471094711, "grad_norm": 1.0419999568710827, "learning_rate": 0.0001967160665180629, "loss": 1.6393, "step": 534 }, { "epoch": 0.10967609676096761, "grad_norm": 0.8422855168488247, "learning_rate": 0.00019669916746633347, "loss": 1.7423, "step": 535 }, { "epoch": 0.1098810988109881, "grad_norm": 4.052938668367732, "learning_rate": 0.00019668222577466567, "loss": 1.7677, "step": 536 }, { "epoch": 0.1100861008610086, "grad_norm": 0.924623638056083, "learning_rate": 0.00019666524145053004, "loss": 1.6939, "step": 537 }, { "epoch": 0.11029110291102912, "grad_norm": 0.8238440764742335, "learning_rate": 0.0001966482145014158, "loss": 1.6461, "step": 538 }, { "epoch": 0.11049610496104961, "grad_norm": 0.8970963953123221, "learning_rate": 0.00019663114493483115, "loss": 1.6731, "step": 539 }, { "epoch": 0.11070110701107011, "grad_norm": 0.7865007821667908, "learning_rate": 0.00019661403275830297, "loss": 1.6787, "step": 540 }, { "epoch": 0.11090610906109061, "grad_norm": 0.8696028850679443, "learning_rate": 0.00019659687797937697, "loss": 1.7659, "step": 541 }, { "epoch": 0.1111111111111111, "grad_norm": 0.6906295733072493, "learning_rate": 0.00019657968060561758, "loss": 1.6525, "step": 542 }, { "epoch": 0.11131611316113162, "grad_norm": 0.7930376310943481, "learning_rate": 0.0001965624406446081, "loss": 1.6842, "step": 543 }, { "epoch": 0.11152111521115211, "grad_norm": 0.7544524600342143, "learning_rate": 0.00019654515810395057, "loss": 1.6905, "step": 544 }, { "epoch": 0.11172611726117261, "grad_norm": 0.7725631779594652, "learning_rate": 0.00019652783299126578, "loss": 1.632, "step": 545 }, { "epoch": 0.11193111931119311, "grad_norm": 0.7642907196358375, "learning_rate": 0.00019651046531419332, "loss": 1.8001, "step": 546 }, { "epoch": 0.1121361213612136, "grad_norm": 0.7457051675697578, "learning_rate": 0.00019649305508039159, "loss": 1.6379, "step": 547 }, { "epoch": 0.11234112341123412, "grad_norm": 0.7815886279365971, "learning_rate": 0.00019647560229753768, "loss": 1.752, "step": 548 }, { "epoch": 0.11254612546125461, "grad_norm": 0.7001482181012725, "learning_rate": 0.00019645810697332746, "loss": 1.6749, "step": 549 }, { "epoch": 0.11275112751127511, "grad_norm": 0.7021756980401541, "learning_rate": 0.0001964405691154756, "loss": 1.7534, "step": 550 }, { "epoch": 0.11295612956129561, "grad_norm": 0.7041752086506201, "learning_rate": 0.00019642298873171545, "loss": 1.7196, "step": 551 }, { "epoch": 0.11316113161131611, "grad_norm": 0.6641202881414029, "learning_rate": 0.00019640536582979923, "loss": 1.7212, "step": 552 }, { "epoch": 0.11336613366133662, "grad_norm": 0.8219760989551604, "learning_rate": 0.00019638770041749778, "loss": 1.7443, "step": 553 }, { "epoch": 0.11357113571135712, "grad_norm": 0.7446323188545191, "learning_rate": 0.0001963699925026008, "loss": 1.6992, "step": 554 }, { "epoch": 0.11377613776137761, "grad_norm": 0.7283067844851286, "learning_rate": 0.0001963522420929166, "loss": 1.6881, "step": 555 }, { "epoch": 0.11398113981139811, "grad_norm": 0.7547374240703352, "learning_rate": 0.0001963344491962724, "loss": 1.7041, "step": 556 }, { "epoch": 0.11418614186141861, "grad_norm": 0.8045748005183614, "learning_rate": 0.00019631661382051396, "loss": 1.6051, "step": 557 }, { "epoch": 0.11439114391143912, "grad_norm": 0.7879592335697111, "learning_rate": 0.00019629873597350596, "loss": 1.6769, "step": 558 }, { "epoch": 0.11459614596145962, "grad_norm": 0.8023384806228847, "learning_rate": 0.00019628081566313164, "loss": 1.6205, "step": 559 }, { "epoch": 0.11480114801148011, "grad_norm": 0.757443234258085, "learning_rate": 0.0001962628528972931, "loss": 1.6705, "step": 560 }, { "epoch": 0.11500615006150061, "grad_norm": 0.8172873659873222, "learning_rate": 0.00019624484768391106, "loss": 1.7583, "step": 561 }, { "epoch": 0.11521115211152111, "grad_norm": 0.7089928380076084, "learning_rate": 0.00019622680003092503, "loss": 1.7084, "step": 562 }, { "epoch": 0.11541615416154162, "grad_norm": 0.7292017391593942, "learning_rate": 0.0001962087099462932, "loss": 1.737, "step": 563 }, { "epoch": 0.11562115621156212, "grad_norm": 0.7595599987912868, "learning_rate": 0.0001961905774379925, "loss": 1.6998, "step": 564 }, { "epoch": 0.11582615826158262, "grad_norm": 0.7094092757884572, "learning_rate": 0.0001961724025140185, "loss": 1.688, "step": 565 }, { "epoch": 0.11603116031160311, "grad_norm": 0.762874147705992, "learning_rate": 0.00019615418518238552, "loss": 1.7624, "step": 566 }, { "epoch": 0.11623616236162361, "grad_norm": 0.8326950889290972, "learning_rate": 0.00019613592545112657, "loss": 1.7572, "step": 567 }, { "epoch": 0.11644116441164412, "grad_norm": 0.7155392474035641, "learning_rate": 0.0001961176233282934, "loss": 1.6575, "step": 568 }, { "epoch": 0.11664616646166462, "grad_norm": 0.7732444983449245, "learning_rate": 0.00019609927882195636, "loss": 1.722, "step": 569 }, { "epoch": 0.11685116851168512, "grad_norm": 0.8822083366753746, "learning_rate": 0.0001960808919402046, "loss": 1.7837, "step": 570 }, { "epoch": 0.11705617056170561, "grad_norm": 0.7530112763599104, "learning_rate": 0.0001960624626911459, "loss": 1.7197, "step": 571 }, { "epoch": 0.11726117261172611, "grad_norm": 0.7286752953774666, "learning_rate": 0.00019604399108290665, "loss": 1.6545, "step": 572 }, { "epoch": 0.11746617466174662, "grad_norm": 0.7897632469994833, "learning_rate": 0.00019602547712363203, "loss": 1.7346, "step": 573 }, { "epoch": 0.11767117671176712, "grad_norm": 0.7938108321618835, "learning_rate": 0.0001960069208214859, "loss": 1.7572, "step": 574 }, { "epoch": 0.11787617876178762, "grad_norm": 0.8020492204411614, "learning_rate": 0.0001959883221846507, "loss": 1.6684, "step": 575 }, { "epoch": 0.11808118081180811, "grad_norm": 0.8291826844922005, "learning_rate": 0.00019596968122132755, "loss": 1.7271, "step": 576 }, { "epoch": 0.11828618286182861, "grad_norm": 0.7087079948977139, "learning_rate": 0.00019595099793973635, "loss": 1.6655, "step": 577 }, { "epoch": 0.11849118491184912, "grad_norm": 0.819297167312269, "learning_rate": 0.0001959322723481155, "loss": 1.6903, "step": 578 }, { "epoch": 0.11869618696186962, "grad_norm": 0.8177083385090965, "learning_rate": 0.0001959135044547222, "loss": 1.7223, "step": 579 }, { "epoch": 0.11890118901189012, "grad_norm": 0.7209537141489044, "learning_rate": 0.0001958946942678322, "loss": 1.6286, "step": 580 }, { "epoch": 0.11910619106191062, "grad_norm": 0.7611356839436053, "learning_rate": 0.00019587584179573994, "loss": 1.7327, "step": 581 }, { "epoch": 0.11931119311193111, "grad_norm": 0.8254627704961581, "learning_rate": 0.0001958569470467585, "loss": 1.6685, "step": 582 }, { "epoch": 0.11951619516195162, "grad_norm": 0.7622126317435031, "learning_rate": 0.00019583801002921963, "loss": 1.6962, "step": 583 }, { "epoch": 0.11972119721197212, "grad_norm": 0.817331535155199, "learning_rate": 0.0001958190307514737, "loss": 1.7762, "step": 584 }, { "epoch": 0.11992619926199262, "grad_norm": 0.7629200159897975, "learning_rate": 0.00019580000922188965, "loss": 1.6366, "step": 585 }, { "epoch": 0.12013120131201312, "grad_norm": 0.7079526051805777, "learning_rate": 0.00019578094544885516, "loss": 1.634, "step": 586 }, { "epoch": 0.12033620336203361, "grad_norm": 0.7599249140746483, "learning_rate": 0.0001957618394407765, "loss": 1.7381, "step": 587 }, { "epoch": 0.12054120541205413, "grad_norm": 0.82356941117184, "learning_rate": 0.0001957426912060785, "loss": 1.765, "step": 588 }, { "epoch": 0.12074620746207462, "grad_norm": 0.6894290046436214, "learning_rate": 0.00019572350075320469, "loss": 1.6965, "step": 589 }, { "epoch": 0.12095120951209512, "grad_norm": 0.6762523177558277, "learning_rate": 0.0001957042680906172, "loss": 1.6276, "step": 590 }, { "epoch": 0.12115621156211562, "grad_norm": 0.8909921375408455, "learning_rate": 0.00019568499322679674, "loss": 1.7439, "step": 591 }, { "epoch": 0.12136121361213612, "grad_norm": 0.7602235580506228, "learning_rate": 0.00019566567617024263, "loss": 1.674, "step": 592 }, { "epoch": 0.12156621566215663, "grad_norm": 0.7242445798680968, "learning_rate": 0.00019564631692947288, "loss": 1.7531, "step": 593 }, { "epoch": 0.12177121771217712, "grad_norm": 0.8480253501470826, "learning_rate": 0.00019562691551302397, "loss": 1.7475, "step": 594 }, { "epoch": 0.12197621976219762, "grad_norm": 0.7729121548165052, "learning_rate": 0.00019560747192945107, "loss": 1.7089, "step": 595 }, { "epoch": 0.12218122181221812, "grad_norm": 0.7076690388262546, "learning_rate": 0.00019558798618732792, "loss": 1.6762, "step": 596 }, { "epoch": 0.12238622386223862, "grad_norm": 0.7922373701126969, "learning_rate": 0.00019556845829524683, "loss": 1.7165, "step": 597 }, { "epoch": 0.12259122591225913, "grad_norm": 0.7028133998075322, "learning_rate": 0.00019554888826181873, "loss": 1.6457, "step": 598 }, { "epoch": 0.12279622796227962, "grad_norm": 0.6977790685235766, "learning_rate": 0.0001955292760956731, "loss": 1.6802, "step": 599 }, { "epoch": 0.12300123001230012, "grad_norm": 0.7493226703610272, "learning_rate": 0.00019550962180545808, "loss": 1.6978, "step": 600 }, { "epoch": 0.12320623206232062, "grad_norm": 0.8600862131180389, "learning_rate": 0.00019548992539984022, "loss": 1.7207, "step": 601 }, { "epoch": 0.12341123411234112, "grad_norm": 0.7623787496255919, "learning_rate": 0.00019547018688750476, "loss": 1.6811, "step": 602 }, { "epoch": 0.12361623616236163, "grad_norm": 0.7632695969304624, "learning_rate": 0.0001954504062771555, "loss": 1.671, "step": 603 }, { "epoch": 0.12382123821238213, "grad_norm": 0.6927444238734469, "learning_rate": 0.00019543058357751483, "loss": 1.6437, "step": 604 }, { "epoch": 0.12402624026240262, "grad_norm": 0.7282969189338665, "learning_rate": 0.00019541071879732367, "loss": 1.6515, "step": 605 }, { "epoch": 0.12423124231242312, "grad_norm": 0.7525430459845069, "learning_rate": 0.0001953908119453414, "loss": 1.6612, "step": 606 }, { "epoch": 0.12443624436244362, "grad_norm": 0.7143221822535928, "learning_rate": 0.00019537086303034608, "loss": 1.7161, "step": 607 }, { "epoch": 0.12464124641246413, "grad_norm": 0.7008301427591075, "learning_rate": 0.00019535087206113427, "loss": 1.6549, "step": 608 }, { "epoch": 0.12484624846248463, "grad_norm": 0.771609790409738, "learning_rate": 0.0001953308390465211, "loss": 1.671, "step": 609 }, { "epoch": 0.12505125051250512, "grad_norm": 0.7008284466642101, "learning_rate": 0.00019531076399534022, "loss": 1.7197, "step": 610 }, { "epoch": 0.12525625256252562, "grad_norm": 0.7175186999596657, "learning_rate": 0.00019529064691644376, "loss": 1.6728, "step": 611 }, { "epoch": 0.12546125461254612, "grad_norm": 0.8027127988740639, "learning_rate": 0.00019527048781870247, "loss": 1.7976, "step": 612 }, { "epoch": 0.12566625666256662, "grad_norm": 0.6970642714660732, "learning_rate": 0.00019525028671100566, "loss": 1.73, "step": 613 }, { "epoch": 0.12587125871258711, "grad_norm": 0.7145094642434252, "learning_rate": 0.000195230043602261, "loss": 1.7257, "step": 614 }, { "epoch": 0.12607626076260764, "grad_norm": 0.6747921321796597, "learning_rate": 0.0001952097585013948, "loss": 1.7103, "step": 615 }, { "epoch": 0.12628126281262814, "grad_norm": 0.6894592468090761, "learning_rate": 0.00019518943141735195, "loss": 1.6761, "step": 616 }, { "epoch": 0.12648626486264863, "grad_norm": 0.7051256329334021, "learning_rate": 0.0001951690623590957, "loss": 1.7204, "step": 617 }, { "epoch": 0.12669126691266913, "grad_norm": 0.6235098069211322, "learning_rate": 0.0001951486513356079, "loss": 1.7023, "step": 618 }, { "epoch": 0.12689626896268963, "grad_norm": 0.7556495063175042, "learning_rate": 0.00019512819835588885, "loss": 1.7459, "step": 619 }, { "epoch": 0.12710127101271013, "grad_norm": 0.7032330727817956, "learning_rate": 0.00019510770342895742, "loss": 1.7094, "step": 620 }, { "epoch": 0.12730627306273062, "grad_norm": 0.6963801753782002, "learning_rate": 0.0001950871665638509, "loss": 1.6947, "step": 621 }, { "epoch": 0.12751127511275112, "grad_norm": 0.7427477546158143, "learning_rate": 0.0001950665877696252, "loss": 1.6702, "step": 622 }, { "epoch": 0.12771627716277162, "grad_norm": 0.7175560457683797, "learning_rate": 0.00019504596705535455, "loss": 1.733, "step": 623 }, { "epoch": 0.12792127921279212, "grad_norm": 0.6382014645533433, "learning_rate": 0.00019502530443013178, "loss": 1.6044, "step": 624 }, { "epoch": 0.12812628126281264, "grad_norm": 0.6931547392933766, "learning_rate": 0.00019500459990306817, "loss": 1.6547, "step": 625 }, { "epoch": 0.12833128331283314, "grad_norm": 0.8131052747319877, "learning_rate": 0.00019498385348329348, "loss": 1.7869, "step": 626 }, { "epoch": 0.12853628536285364, "grad_norm": 0.7495456567167106, "learning_rate": 0.00019496306517995587, "loss": 1.7483, "step": 627 }, { "epoch": 0.12874128741287413, "grad_norm": 0.7062520390299838, "learning_rate": 0.00019494223500222217, "loss": 1.6604, "step": 628 }, { "epoch": 0.12894628946289463, "grad_norm": 0.7931400753068338, "learning_rate": 0.00019492136295927743, "loss": 1.7362, "step": 629 }, { "epoch": 0.12915129151291513, "grad_norm": 0.6875574892712922, "learning_rate": 0.00019490044906032532, "loss": 1.6809, "step": 630 }, { "epoch": 0.12935629356293563, "grad_norm": 0.7111801502584136, "learning_rate": 0.0001948794933145879, "loss": 1.6546, "step": 631 }, { "epoch": 0.12956129561295612, "grad_norm": 0.7613366380444706, "learning_rate": 0.00019485849573130573, "loss": 1.7999, "step": 632 }, { "epoch": 0.12976629766297662, "grad_norm": 0.662224975718407, "learning_rate": 0.00019483745631973775, "loss": 1.6292, "step": 633 }, { "epoch": 0.12997129971299712, "grad_norm": 0.7246334231070312, "learning_rate": 0.0001948163750891614, "loss": 1.6925, "step": 634 }, { "epoch": 0.13017630176301764, "grad_norm": 0.6538882243932387, "learning_rate": 0.0001947952520488726, "loss": 1.659, "step": 635 }, { "epoch": 0.13038130381303814, "grad_norm": 0.6964780144077147, "learning_rate": 0.00019477408720818554, "loss": 1.7207, "step": 636 }, { "epoch": 0.13058630586305864, "grad_norm": 0.9548657397064115, "learning_rate": 0.00019475288057643303, "loss": 1.678, "step": 637 }, { "epoch": 0.13079130791307914, "grad_norm": 0.6384891194382379, "learning_rate": 0.00019473163216296625, "loss": 1.6983, "step": 638 }, { "epoch": 0.13099630996309963, "grad_norm": 0.7488985765381595, "learning_rate": 0.0001947103419771547, "loss": 1.6761, "step": 639 }, { "epoch": 0.13120131201312013, "grad_norm": 0.6866424805132988, "learning_rate": 0.00019468901002838644, "loss": 1.7165, "step": 640 }, { "epoch": 0.13140631406314063, "grad_norm": 0.763162900878345, "learning_rate": 0.0001946676363260679, "loss": 1.7092, "step": 641 }, { "epoch": 0.13161131611316113, "grad_norm": 0.6962827620119562, "learning_rate": 0.0001946462208796239, "loss": 1.6458, "step": 642 }, { "epoch": 0.13181631816318162, "grad_norm": 0.8119141383482595, "learning_rate": 0.00019462476369849766, "loss": 1.689, "step": 643 }, { "epoch": 0.13202132021320212, "grad_norm": 0.6672873571751815, "learning_rate": 0.00019460326479215083, "loss": 1.6127, "step": 644 }, { "epoch": 0.13222632226322265, "grad_norm": 0.7079483424952373, "learning_rate": 0.00019458172417006347, "loss": 1.7251, "step": 645 }, { "epoch": 0.13243132431324314, "grad_norm": 0.7584391354266112, "learning_rate": 0.00019456014184173398, "loss": 1.6707, "step": 646 }, { "epoch": 0.13263632636326364, "grad_norm": 0.716327893186426, "learning_rate": 0.00019453851781667925, "loss": 1.6356, "step": 647 }, { "epoch": 0.13284132841328414, "grad_norm": 0.6541907454010327, "learning_rate": 0.00019451685210443442, "loss": 1.7062, "step": 648 }, { "epoch": 0.13304633046330464, "grad_norm": 0.7137558788176999, "learning_rate": 0.00019449514471455313, "loss": 1.6513, "step": 649 }, { "epoch": 0.13325133251332513, "grad_norm": 0.7223891571495126, "learning_rate": 0.00019447339565660732, "loss": 1.7102, "step": 650 }, { "epoch": 0.13345633456334563, "grad_norm": 0.735645043490125, "learning_rate": 0.00019445160494018735, "loss": 1.7034, "step": 651 }, { "epoch": 0.13366133661336613, "grad_norm": 0.7096162370659085, "learning_rate": 0.00019442977257490193, "loss": 1.7026, "step": 652 }, { "epoch": 0.13386633866338662, "grad_norm": 0.7283081633487957, "learning_rate": 0.0001944078985703782, "loss": 1.7265, "step": 653 }, { "epoch": 0.13407134071340712, "grad_norm": 0.7076346579111047, "learning_rate": 0.00019438598293626148, "loss": 1.6433, "step": 654 }, { "epoch": 0.13427634276342765, "grad_norm": 0.8176454103089839, "learning_rate": 0.0001943640256822157, "loss": 1.7193, "step": 655 }, { "epoch": 0.13448134481344814, "grad_norm": 0.7026108842601033, "learning_rate": 0.00019434202681792293, "loss": 1.6104, "step": 656 }, { "epoch": 0.13468634686346864, "grad_norm": 0.685796468425738, "learning_rate": 0.00019431998635308372, "loss": 1.5332, "step": 657 }, { "epoch": 0.13489134891348914, "grad_norm": 0.7127802289338453, "learning_rate": 0.0001942979042974168, "loss": 1.6274, "step": 658 }, { "epoch": 0.13509635096350964, "grad_norm": 0.717598671490414, "learning_rate": 0.00019427578066065954, "loss": 1.7171, "step": 659 }, { "epoch": 0.13530135301353013, "grad_norm": 0.6912432544716882, "learning_rate": 0.00019425361545256727, "loss": 1.764, "step": 660 }, { "epoch": 0.13550635506355063, "grad_norm": 0.8142900011304077, "learning_rate": 0.00019423140868291396, "loss": 1.6733, "step": 661 }, { "epoch": 0.13571135711357113, "grad_norm": 0.6982401576159545, "learning_rate": 0.00019420916036149178, "loss": 1.7152, "step": 662 }, { "epoch": 0.13591635916359163, "grad_norm": 0.6730554983695302, "learning_rate": 0.00019418687049811115, "loss": 1.6932, "step": 663 }, { "epoch": 0.13612136121361212, "grad_norm": 0.7400434973007438, "learning_rate": 0.00019416453910260097, "loss": 1.7901, "step": 664 }, { "epoch": 0.13632636326363265, "grad_norm": 0.7113339420294928, "learning_rate": 0.0001941421661848083, "loss": 1.6646, "step": 665 }, { "epoch": 0.13653136531365315, "grad_norm": 0.6665150137351554, "learning_rate": 0.00019411975175459865, "loss": 1.6461, "step": 666 }, { "epoch": 0.13673636736367364, "grad_norm": 0.7453441081986116, "learning_rate": 0.00019409729582185574, "loss": 1.6918, "step": 667 }, { "epoch": 0.13694136941369414, "grad_norm": 0.7024419398270235, "learning_rate": 0.0001940747983964816, "loss": 1.6069, "step": 668 }, { "epoch": 0.13714637146371464, "grad_norm": 0.6826807669155491, "learning_rate": 0.00019405225948839657, "loss": 1.694, "step": 669 }, { "epoch": 0.13735137351373514, "grad_norm": 0.6680462216322685, "learning_rate": 0.0001940296791075393, "loss": 1.6868, "step": 670 }, { "epoch": 0.13755637556375563, "grad_norm": 0.8221542322696126, "learning_rate": 0.0001940070572638667, "loss": 1.6666, "step": 671 }, { "epoch": 0.13776137761377613, "grad_norm": 0.6739792335836811, "learning_rate": 0.00019398439396735398, "loss": 1.7057, "step": 672 }, { "epoch": 0.13796637966379663, "grad_norm": 0.6915752955334832, "learning_rate": 0.00019396168922799462, "loss": 1.6484, "step": 673 }, { "epoch": 0.13817138171381713, "grad_norm": 0.7598970632238722, "learning_rate": 0.00019393894305580041, "loss": 1.6509, "step": 674 }, { "epoch": 0.13837638376383765, "grad_norm": 0.6784006628222828, "learning_rate": 0.00019391615546080133, "loss": 1.6495, "step": 675 }, { "epoch": 0.13858138581385815, "grad_norm": 0.6894897436652209, "learning_rate": 0.0001938933264530457, "loss": 1.6764, "step": 676 }, { "epoch": 0.13878638786387865, "grad_norm": 0.7857036837790523, "learning_rate": 0.00019387045604260007, "loss": 1.6664, "step": 677 }, { "epoch": 0.13899138991389914, "grad_norm": 0.6818001039808111, "learning_rate": 0.00019384754423954926, "loss": 1.6526, "step": 678 }, { "epoch": 0.13919639196391964, "grad_norm": 0.7081724548685108, "learning_rate": 0.00019382459105399632, "loss": 1.6996, "step": 679 }, { "epoch": 0.13940139401394014, "grad_norm": 0.7140795694108882, "learning_rate": 0.00019380159649606257, "loss": 1.7172, "step": 680 }, { "epoch": 0.13960639606396064, "grad_norm": 0.7080091135319612, "learning_rate": 0.00019377856057588755, "loss": 1.6917, "step": 681 }, { "epoch": 0.13981139811398113, "grad_norm": 0.6761647836996737, "learning_rate": 0.00019375548330362907, "loss": 1.7723, "step": 682 }, { "epoch": 0.14001640016400163, "grad_norm": 0.7143847128372665, "learning_rate": 0.00019373236468946318, "loss": 1.6908, "step": 683 }, { "epoch": 0.14022140221402213, "grad_norm": 0.7447363936876037, "learning_rate": 0.0001937092047435841, "loss": 1.6374, "step": 684 }, { "epoch": 0.14042640426404265, "grad_norm": 0.6796516121340412, "learning_rate": 0.00019368600347620428, "loss": 1.6395, "step": 685 }, { "epoch": 0.14063140631406315, "grad_norm": 0.6497045167602054, "learning_rate": 0.00019366276089755453, "loss": 1.6955, "step": 686 }, { "epoch": 0.14083640836408365, "grad_norm": 0.730287449803201, "learning_rate": 0.00019363947701788372, "loss": 1.8079, "step": 687 }, { "epoch": 0.14104141041410415, "grad_norm": 0.8091140477761806, "learning_rate": 0.00019361615184745895, "loss": 1.6728, "step": 688 }, { "epoch": 0.14124641246412464, "grad_norm": 0.6323788011111765, "learning_rate": 0.00019359278539656557, "loss": 1.646, "step": 689 }, { "epoch": 0.14145141451414514, "grad_norm": 0.7258790043908935, "learning_rate": 0.00019356937767550715, "loss": 1.7093, "step": 690 }, { "epoch": 0.14165641656416564, "grad_norm": 0.6949124425464823, "learning_rate": 0.00019354592869460545, "loss": 1.6549, "step": 691 }, { "epoch": 0.14186141861418614, "grad_norm": 0.7000728118326219, "learning_rate": 0.00019352243846420034, "loss": 1.7468, "step": 692 }, { "epoch": 0.14206642066420663, "grad_norm": 0.7011285591169198, "learning_rate": 0.00019349890699464997, "loss": 1.6919, "step": 693 }, { "epoch": 0.14227142271422713, "grad_norm": 0.668775875766485, "learning_rate": 0.0001934753342963307, "loss": 1.69, "step": 694 }, { "epoch": 0.14247642476424766, "grad_norm": 0.7273460222566939, "learning_rate": 0.0001934517203796369, "loss": 1.6517, "step": 695 }, { "epoch": 0.14268142681426815, "grad_norm": 0.7469698129504518, "learning_rate": 0.0001934280652549814, "loss": 1.7333, "step": 696 }, { "epoch": 0.14288642886428865, "grad_norm": 0.5847816123055689, "learning_rate": 0.0001934043689327949, "loss": 1.7141, "step": 697 }, { "epoch": 0.14309143091430915, "grad_norm": 0.7884207755015578, "learning_rate": 0.00019338063142352644, "loss": 1.7298, "step": 698 }, { "epoch": 0.14329643296432965, "grad_norm": 0.8044078385037076, "learning_rate": 0.00019335685273764322, "loss": 1.7462, "step": 699 }, { "epoch": 0.14350143501435014, "grad_norm": 0.6380879417186065, "learning_rate": 0.0001933330328856305, "loss": 1.6637, "step": 700 }, { "epoch": 0.14370643706437064, "grad_norm": 0.7217261357612341, "learning_rate": 0.0001933091718779918, "loss": 1.7333, "step": 701 }, { "epoch": 0.14391143911439114, "grad_norm": 0.8240663565497209, "learning_rate": 0.0001932852697252487, "loss": 1.608, "step": 702 }, { "epoch": 0.14411644116441163, "grad_norm": 0.6671549962916544, "learning_rate": 0.000193261326437941, "loss": 1.7626, "step": 703 }, { "epoch": 0.14432144321443213, "grad_norm": 0.7294815007273447, "learning_rate": 0.0001932373420266266, "loss": 1.6816, "step": 704 }, { "epoch": 0.14452644526445266, "grad_norm": 0.7136490706611349, "learning_rate": 0.0001932133165018815, "loss": 1.7224, "step": 705 }, { "epoch": 0.14473144731447316, "grad_norm": 0.6768368867330093, "learning_rate": 0.0001931892498742999, "loss": 1.7045, "step": 706 }, { "epoch": 0.14493644936449365, "grad_norm": 0.6901595801919572, "learning_rate": 0.00019316514215449404, "loss": 1.69, "step": 707 }, { "epoch": 0.14514145141451415, "grad_norm": 0.7379399680837766, "learning_rate": 0.0001931409933530944, "loss": 1.7071, "step": 708 }, { "epoch": 0.14534645346453465, "grad_norm": 0.6950229857578474, "learning_rate": 0.00019311680348074945, "loss": 1.6706, "step": 709 }, { "epoch": 0.14555145551455514, "grad_norm": 1.4239867677236318, "learning_rate": 0.00019309257254812584, "loss": 1.6475, "step": 710 }, { "epoch": 0.14575645756457564, "grad_norm": 0.7853253803550803, "learning_rate": 0.00019306830056590833, "loss": 1.7422, "step": 711 }, { "epoch": 0.14596145961459614, "grad_norm": 0.7912920302745574, "learning_rate": 0.00019304398754479976, "loss": 1.6676, "step": 712 }, { "epoch": 0.14616646166461664, "grad_norm": 0.7559316805604154, "learning_rate": 0.000193019633495521, "loss": 1.705, "step": 713 }, { "epoch": 0.14637146371463713, "grad_norm": 0.7515552055905841, "learning_rate": 0.00019299523842881118, "loss": 1.6541, "step": 714 }, { "epoch": 0.14657646576465766, "grad_norm": 0.7718033361975094, "learning_rate": 0.00019297080235542731, "loss": 1.6959, "step": 715 }, { "epoch": 0.14678146781467816, "grad_norm": 0.7365498016658, "learning_rate": 0.0001929463252861447, "loss": 1.6972, "step": 716 }, { "epoch": 0.14698646986469865, "grad_norm": 0.6738686695891294, "learning_rate": 0.00019292180723175654, "loss": 1.675, "step": 717 }, { "epoch": 0.14719147191471915, "grad_norm": 0.7561118156387349, "learning_rate": 0.0001928972482030742, "loss": 1.7018, "step": 718 }, { "epoch": 0.14739647396473965, "grad_norm": 0.7795391791753485, "learning_rate": 0.0001928726482109271, "loss": 1.7041, "step": 719 }, { "epoch": 0.14760147601476015, "grad_norm": 0.6935708735515262, "learning_rate": 0.00019284800726616274, "loss": 1.6432, "step": 720 }, { "epoch": 0.14780647806478064, "grad_norm": 0.7664408672918853, "learning_rate": 0.00019282332537964663, "loss": 1.7302, "step": 721 }, { "epoch": 0.14801148011480114, "grad_norm": 0.7477278209068755, "learning_rate": 0.0001927986025622624, "loss": 1.6377, "step": 722 }, { "epoch": 0.14821648216482164, "grad_norm": 0.7276436261402113, "learning_rate": 0.0001927738388249116, "loss": 1.6981, "step": 723 }, { "epoch": 0.14842148421484214, "grad_norm": 0.8276994419711222, "learning_rate": 0.000192749034178514, "loss": 1.7433, "step": 724 }, { "epoch": 0.14862648626486266, "grad_norm": 0.749452837493542, "learning_rate": 0.00019272418863400728, "loss": 1.6612, "step": 725 }, { "epoch": 0.14883148831488316, "grad_norm": 0.7116894702731099, "learning_rate": 0.0001926993022023472, "loss": 1.7382, "step": 726 }, { "epoch": 0.14903649036490366, "grad_norm": 0.8298799172038066, "learning_rate": 0.0001926743748945076, "loss": 1.6468, "step": 727 }, { "epoch": 0.14924149241492415, "grad_norm": 0.7418561397260032, "learning_rate": 0.00019264940672148018, "loss": 1.6234, "step": 728 }, { "epoch": 0.14944649446494465, "grad_norm": 0.691296953981057, "learning_rate": 0.00019262439769427488, "loss": 1.6744, "step": 729 }, { "epoch": 0.14965149651496515, "grad_norm": 0.7064986352931485, "learning_rate": 0.00019259934782391946, "loss": 1.7109, "step": 730 }, { "epoch": 0.14985649856498565, "grad_norm": 0.7862650639141375, "learning_rate": 0.00019257425712145986, "loss": 1.6965, "step": 731 }, { "epoch": 0.15006150061500614, "grad_norm": 0.7759906572416613, "learning_rate": 0.00019254912559795982, "loss": 1.7832, "step": 732 }, { "epoch": 0.15026650266502664, "grad_norm": 0.689959023978982, "learning_rate": 0.00019252395326450132, "loss": 1.7431, "step": 733 }, { "epoch": 0.15047150471504714, "grad_norm": 0.7773747442610998, "learning_rate": 0.00019249874013218415, "loss": 1.674, "step": 734 }, { "epoch": 0.15067650676506766, "grad_norm": 0.7046889912343489, "learning_rate": 0.0001924734862121262, "loss": 1.6346, "step": 735 }, { "epoch": 0.15088150881508816, "grad_norm": 0.7343048491563514, "learning_rate": 0.00019244819151546322, "loss": 1.7504, "step": 736 }, { "epoch": 0.15108651086510866, "grad_norm": 0.7836332111854394, "learning_rate": 0.00019242285605334912, "loss": 1.7218, "step": 737 }, { "epoch": 0.15129151291512916, "grad_norm": 0.719339195666881, "learning_rate": 0.00019239747983695562, "loss": 1.7019, "step": 738 }, { "epoch": 0.15149651496514965, "grad_norm": 0.6783349731793807, "learning_rate": 0.00019237206287747252, "loss": 1.6142, "step": 739 }, { "epoch": 0.15170151701517015, "grad_norm": 0.7290436025627312, "learning_rate": 0.0001923466051861075, "loss": 1.6786, "step": 740 }, { "epoch": 0.15190651906519065, "grad_norm": 0.7727689220580422, "learning_rate": 0.00019232110677408625, "loss": 1.7007, "step": 741 }, { "epoch": 0.15211152111521115, "grad_norm": 0.7224680246507847, "learning_rate": 0.00019229556765265246, "loss": 1.6901, "step": 742 }, { "epoch": 0.15231652316523164, "grad_norm": 0.7535700570650502, "learning_rate": 0.00019226998783306772, "loss": 1.6844, "step": 743 }, { "epoch": 0.15252152521525214, "grad_norm": 0.8030946274710645, "learning_rate": 0.00019224436732661148, "loss": 1.7006, "step": 744 }, { "epoch": 0.15272652726527267, "grad_norm": 0.7401564290099959, "learning_rate": 0.0001922187061445813, "loss": 1.6807, "step": 745 }, { "epoch": 0.15293152931529316, "grad_norm": 0.8159603172747175, "learning_rate": 0.00019219300429829258, "loss": 1.7427, "step": 746 }, { "epoch": 0.15313653136531366, "grad_norm": 0.7196371365597042, "learning_rate": 0.00019216726179907868, "loss": 1.6721, "step": 747 }, { "epoch": 0.15334153341533416, "grad_norm": 0.7042282056199732, "learning_rate": 0.00019214147865829082, "loss": 1.7774, "step": 748 }, { "epoch": 0.15354653546535466, "grad_norm": 0.7038330647815587, "learning_rate": 0.00019211565488729825, "loss": 1.6748, "step": 749 }, { "epoch": 0.15375153751537515, "grad_norm": 0.7215543246141798, "learning_rate": 0.00019208979049748805, "loss": 1.6464, "step": 750 }, { "epoch": 0.15395653956539565, "grad_norm": 0.650350189502158, "learning_rate": 0.00019206388550026523, "loss": 1.805, "step": 751 }, { "epoch": 0.15416154161541615, "grad_norm": 0.7248020516056501, "learning_rate": 0.00019203793990705273, "loss": 1.6476, "step": 752 }, { "epoch": 0.15436654366543665, "grad_norm": 0.6850130547924245, "learning_rate": 0.00019201195372929139, "loss": 1.6508, "step": 753 }, { "epoch": 0.15457154571545714, "grad_norm": 0.6584037461711865, "learning_rate": 0.0001919859269784399, "loss": 1.6975, "step": 754 }, { "epoch": 0.15477654776547767, "grad_norm": 0.7042533368717044, "learning_rate": 0.00019195985966597494, "loss": 1.7084, "step": 755 }, { "epoch": 0.15498154981549817, "grad_norm": 0.6916924771265472, "learning_rate": 0.00019193375180339095, "loss": 1.6243, "step": 756 }, { "epoch": 0.15518655186551866, "grad_norm": 0.789790297021731, "learning_rate": 0.0001919076034022003, "loss": 1.7046, "step": 757 }, { "epoch": 0.15539155391553916, "grad_norm": 0.6875331912721294, "learning_rate": 0.00019188141447393334, "loss": 1.6407, "step": 758 }, { "epoch": 0.15559655596555966, "grad_norm": 0.7752788318017302, "learning_rate": 0.00019185518503013807, "loss": 1.7957, "step": 759 }, { "epoch": 0.15580155801558015, "grad_norm": 0.8078684289774277, "learning_rate": 0.00019182891508238056, "loss": 1.6765, "step": 760 }, { "epoch": 0.15600656006560065, "grad_norm": 0.6386796570512407, "learning_rate": 0.00019180260464224467, "loss": 1.6398, "step": 761 }, { "epoch": 0.15621156211562115, "grad_norm": 0.6455166552222789, "learning_rate": 0.00019177625372133209, "loss": 1.6925, "step": 762 }, { "epoch": 0.15641656416564165, "grad_norm": 0.810862189886265, "learning_rate": 0.00019174986233126234, "loss": 1.7278, "step": 763 }, { "epoch": 0.15662156621566214, "grad_norm": 0.6163496250678044, "learning_rate": 0.00019172343048367289, "loss": 1.6303, "step": 764 }, { "epoch": 0.15682656826568267, "grad_norm": 0.696217271270026, "learning_rate": 0.00019169695819021892, "loss": 1.7329, "step": 765 }, { "epoch": 0.15703157031570317, "grad_norm": 0.7349137675712252, "learning_rate": 0.00019167044546257355, "loss": 1.673, "step": 766 }, { "epoch": 0.15723657236572366, "grad_norm": 0.7847045456766413, "learning_rate": 0.0001916438923124277, "loss": 1.6088, "step": 767 }, { "epoch": 0.15744157441574416, "grad_norm": 0.7747429199903232, "learning_rate": 0.00019161729875149006, "loss": 1.7312, "step": 768 }, { "epoch": 0.15764657646576466, "grad_norm": 0.7437304522919976, "learning_rate": 0.0001915906647914872, "loss": 1.7226, "step": 769 }, { "epoch": 0.15785157851578516, "grad_norm": 0.7334915002288332, "learning_rate": 0.00019156399044416352, "loss": 1.6969, "step": 770 }, { "epoch": 0.15805658056580565, "grad_norm": 0.6953103054492031, "learning_rate": 0.00019153727572128113, "loss": 1.6887, "step": 771 }, { "epoch": 0.15826158261582615, "grad_norm": 0.687499632492689, "learning_rate": 0.00019151052063462007, "loss": 1.7419, "step": 772 }, { "epoch": 0.15846658466584665, "grad_norm": 0.7805948008431428, "learning_rate": 0.00019148372519597808, "loss": 1.749, "step": 773 }, { "epoch": 0.15867158671586715, "grad_norm": 0.7818982203947401, "learning_rate": 0.00019145688941717075, "loss": 1.6652, "step": 774 }, { "epoch": 0.15887658876588767, "grad_norm": 0.7320495222334545, "learning_rate": 0.0001914300133100314, "loss": 1.6915, "step": 775 }, { "epoch": 0.15908159081590817, "grad_norm": 0.7804260936083108, "learning_rate": 0.00019140309688641123, "loss": 1.6963, "step": 776 }, { "epoch": 0.15928659286592867, "grad_norm": 0.7604663864321175, "learning_rate": 0.0001913761401581791, "loss": 1.6859, "step": 777 }, { "epoch": 0.15949159491594916, "grad_norm": 0.6727302874956032, "learning_rate": 0.00019134914313722178, "loss": 1.6377, "step": 778 }, { "epoch": 0.15969659696596966, "grad_norm": 0.7101389386956383, "learning_rate": 0.00019132210583544366, "loss": 1.7482, "step": 779 }, { "epoch": 0.15990159901599016, "grad_norm": 0.6984364332599773, "learning_rate": 0.00019129502826476698, "loss": 1.7494, "step": 780 }, { "epoch": 0.16010660106601066, "grad_norm": 0.7207480762012807, "learning_rate": 0.00019126791043713173, "loss": 1.674, "step": 781 }, { "epoch": 0.16031160311603115, "grad_norm": 0.6498071125057682, "learning_rate": 0.0001912407523644956, "loss": 1.716, "step": 782 }, { "epoch": 0.16051660516605165, "grad_norm": 0.7053619225609432, "learning_rate": 0.00019121355405883413, "loss": 1.579, "step": 783 }, { "epoch": 0.16072160721607215, "grad_norm": 0.674479155987102, "learning_rate": 0.0001911863155321405, "loss": 1.7246, "step": 784 }, { "epoch": 0.16092660926609267, "grad_norm": 0.7343009952595465, "learning_rate": 0.00019115903679642565, "loss": 1.7329, "step": 785 }, { "epoch": 0.16113161131611317, "grad_norm": 0.7132288209781151, "learning_rate": 0.0001911317178637183, "loss": 1.7128, "step": 786 }, { "epoch": 0.16133661336613367, "grad_norm": 0.6988174737208028, "learning_rate": 0.00019110435874606486, "loss": 1.7224, "step": 787 }, { "epoch": 0.16154161541615417, "grad_norm": 0.688544636732044, "learning_rate": 0.0001910769594555294, "loss": 1.7219, "step": 788 }, { "epoch": 0.16174661746617466, "grad_norm": 0.6968777239911965, "learning_rate": 0.00019104952000419378, "loss": 1.7181, "step": 789 }, { "epoch": 0.16195161951619516, "grad_norm": 0.6527150151185663, "learning_rate": 0.00019102204040415761, "loss": 1.6811, "step": 790 }, { "epoch": 0.16215662156621566, "grad_norm": 0.6982138890248064, "learning_rate": 0.00019099452066753808, "loss": 1.6319, "step": 791 }, { "epoch": 0.16236162361623616, "grad_norm": 0.7445818886153832, "learning_rate": 0.00019096696080647018, "loss": 1.66, "step": 792 }, { "epoch": 0.16256662566625665, "grad_norm": 0.6518191707772367, "learning_rate": 0.00019093936083310653, "loss": 1.7224, "step": 793 }, { "epoch": 0.16277162771627715, "grad_norm": 0.7155219264389436, "learning_rate": 0.00019091172075961748, "loss": 1.7605, "step": 794 }, { "epoch": 0.16297662976629768, "grad_norm": 0.7073441135239922, "learning_rate": 0.00019088404059819104, "loss": 1.7127, "step": 795 }, { "epoch": 0.16318163181631817, "grad_norm": 0.7002062904148844, "learning_rate": 0.0001908563203610329, "loss": 1.701, "step": 796 }, { "epoch": 0.16338663386633867, "grad_norm": 0.6533771171765951, "learning_rate": 0.00019082856006036645, "loss": 1.635, "step": 797 }, { "epoch": 0.16359163591635917, "grad_norm": 0.7396803765413693, "learning_rate": 0.0001908007597084327, "loss": 1.7317, "step": 798 }, { "epoch": 0.16379663796637967, "grad_norm": 0.6843103151239934, "learning_rate": 0.0001907729193174903, "loss": 1.6857, "step": 799 }, { "epoch": 0.16400164001640016, "grad_norm": 0.6891857006068521, "learning_rate": 0.00019074503889981566, "loss": 1.6891, "step": 800 }, { "epoch": 0.16420664206642066, "grad_norm": 0.7208825918374903, "learning_rate": 0.0001907171184677028, "loss": 1.7782, "step": 801 }, { "epoch": 0.16441164411644116, "grad_norm": 0.7424150106814328, "learning_rate": 0.0001906891580334633, "loss": 1.719, "step": 802 }, { "epoch": 0.16461664616646166, "grad_norm": 0.6813977567751245, "learning_rate": 0.00019066115760942647, "loss": 1.7918, "step": 803 }, { "epoch": 0.16482164821648215, "grad_norm": 0.6815153922978643, "learning_rate": 0.00019063311720793918, "loss": 1.7102, "step": 804 }, { "epoch": 0.16502665026650268, "grad_norm": 0.7434835896386757, "learning_rate": 0.00019060503684136603, "loss": 1.6669, "step": 805 }, { "epoch": 0.16523165231652318, "grad_norm": 0.7176791513974383, "learning_rate": 0.00019057691652208915, "loss": 1.6751, "step": 806 }, { "epoch": 0.16543665436654367, "grad_norm": 0.6404635546952481, "learning_rate": 0.00019054875626250834, "loss": 1.6802, "step": 807 }, { "epoch": 0.16564165641656417, "grad_norm": 0.7862699666116406, "learning_rate": 0.00019052055607504103, "loss": 1.7165, "step": 808 }, { "epoch": 0.16584665846658467, "grad_norm": 0.6748901758704504, "learning_rate": 0.00019049231597212214, "loss": 1.6411, "step": 809 }, { "epoch": 0.16605166051660517, "grad_norm": 0.6334195790465038, "learning_rate": 0.0001904640359662043, "loss": 1.666, "step": 810 }, { "epoch": 0.16625666256662566, "grad_norm": 0.7032533821534248, "learning_rate": 0.00019043571606975777, "loss": 1.7189, "step": 811 }, { "epoch": 0.16646166461664616, "grad_norm": 0.6736035565240697, "learning_rate": 0.00019040735629527027, "loss": 1.7225, "step": 812 }, { "epoch": 0.16666666666666666, "grad_norm": 0.6990622565895293, "learning_rate": 0.00019037895665524715, "loss": 1.6483, "step": 813 }, { "epoch": 0.16687166871668715, "grad_norm": 0.7308111216648203, "learning_rate": 0.00019035051716221143, "loss": 1.6895, "step": 814 }, { "epoch": 0.16707667076670768, "grad_norm": 0.7051146623961817, "learning_rate": 0.00019032203782870364, "loss": 1.6705, "step": 815 }, { "epoch": 0.16728167281672818, "grad_norm": 0.7347551716579923, "learning_rate": 0.0001902935186672818, "loss": 1.7335, "step": 816 }, { "epoch": 0.16748667486674867, "grad_norm": 0.7324400798011572, "learning_rate": 0.00019026495969052162, "loss": 1.695, "step": 817 }, { "epoch": 0.16769167691676917, "grad_norm": 0.74982494417203, "learning_rate": 0.00019023636091101626, "loss": 1.652, "step": 818 }, { "epoch": 0.16789667896678967, "grad_norm": 0.7942630371480432, "learning_rate": 0.00019020772234137656, "loss": 1.7479, "step": 819 }, { "epoch": 0.16810168101681017, "grad_norm": 0.7354769200197937, "learning_rate": 0.00019017904399423077, "loss": 1.7174, "step": 820 }, { "epoch": 0.16830668306683066, "grad_norm": 0.7562489211350594, "learning_rate": 0.00019015032588222473, "loss": 1.6347, "step": 821 }, { "epoch": 0.16851168511685116, "grad_norm": 0.7595626084321155, "learning_rate": 0.00019012156801802182, "loss": 1.6914, "step": 822 }, { "epoch": 0.16871668716687166, "grad_norm": 0.7214950578904973, "learning_rate": 0.000190092770414303, "loss": 1.6705, "step": 823 }, { "epoch": 0.16892168921689216, "grad_norm": 0.7447618573876167, "learning_rate": 0.00019006393308376672, "loss": 1.6672, "step": 824 }, { "epoch": 0.16912669126691268, "grad_norm": 0.7445575734469274, "learning_rate": 0.00019003505603912884, "loss": 1.7343, "step": 825 }, { "epoch": 0.16933169331693318, "grad_norm": 0.6733678011951022, "learning_rate": 0.0001900061392931229, "loss": 1.7441, "step": 826 }, { "epoch": 0.16953669536695368, "grad_norm": 0.762440487524822, "learning_rate": 0.00018997718285849983, "loss": 1.6341, "step": 827 }, { "epoch": 0.16974169741697417, "grad_norm": 0.701991498412628, "learning_rate": 0.0001899481867480281, "loss": 1.6574, "step": 828 }, { "epoch": 0.16994669946699467, "grad_norm": 0.6739744404871206, "learning_rate": 0.0001899191509744937, "loss": 1.6725, "step": 829 }, { "epoch": 0.17015170151701517, "grad_norm": 0.6718533792642212, "learning_rate": 0.0001898900755507001, "loss": 1.6701, "step": 830 }, { "epoch": 0.17035670356703567, "grad_norm": 0.6811662941338024, "learning_rate": 0.00018986096048946824, "loss": 1.7758, "step": 831 }, { "epoch": 0.17056170561705616, "grad_norm": 0.6874609101501581, "learning_rate": 0.0001898318058036365, "loss": 1.7216, "step": 832 }, { "epoch": 0.17076670766707666, "grad_norm": 0.6533975558087971, "learning_rate": 0.00018980261150606075, "loss": 1.7028, "step": 833 }, { "epoch": 0.17097170971709716, "grad_norm": 0.7098335837225809, "learning_rate": 0.00018977337760961444, "loss": 1.7797, "step": 834 }, { "epoch": 0.17117671176711768, "grad_norm": 0.6179015872900394, "learning_rate": 0.00018974410412718836, "loss": 1.688, "step": 835 }, { "epoch": 0.17138171381713818, "grad_norm": 0.580510879130731, "learning_rate": 0.0001897147910716907, "loss": 1.7012, "step": 836 }, { "epoch": 0.17158671586715868, "grad_norm": 0.6795642698500958, "learning_rate": 0.0001896854384560473, "loss": 1.7463, "step": 837 }, { "epoch": 0.17179171791717918, "grad_norm": 0.5666775996494776, "learning_rate": 0.00018965604629320125, "loss": 1.6507, "step": 838 }, { "epoch": 0.17199671996719967, "grad_norm": 0.6045763823421743, "learning_rate": 0.00018962661459611318, "loss": 1.7533, "step": 839 }, { "epoch": 0.17220172201722017, "grad_norm": 0.6508738877098017, "learning_rate": 0.0001895971433777612, "loss": 1.6705, "step": 840 }, { "epoch": 0.17240672406724067, "grad_norm": 0.7193160516803674, "learning_rate": 0.00018956763265114065, "loss": 1.5835, "step": 841 }, { "epoch": 0.17261172611726117, "grad_norm": 0.6129779610520512, "learning_rate": 0.00018953808242926453, "loss": 1.6987, "step": 842 }, { "epoch": 0.17281672816728166, "grad_norm": 0.7416867858127081, "learning_rate": 0.0001895084927251631, "loss": 1.7034, "step": 843 }, { "epoch": 0.17302173021730216, "grad_norm": 0.6270794650525814, "learning_rate": 0.00018947886355188406, "loss": 1.6292, "step": 844 }, { "epoch": 0.1732267322673227, "grad_norm": 0.6314331647234465, "learning_rate": 0.00018944919492249254, "loss": 1.726, "step": 845 }, { "epoch": 0.17343173431734318, "grad_norm": 0.6365832806860144, "learning_rate": 0.00018941948685007108, "loss": 1.7083, "step": 846 }, { "epoch": 0.17363673636736368, "grad_norm": 0.6234270131402238, "learning_rate": 0.00018938973934771956, "loss": 1.6867, "step": 847 }, { "epoch": 0.17384173841738418, "grad_norm": 0.6091048529498941, "learning_rate": 0.0001893599524285553, "loss": 1.6649, "step": 848 }, { "epoch": 0.17404674046740468, "grad_norm": 0.6382079176903626, "learning_rate": 0.00018933012610571295, "loss": 1.6588, "step": 849 }, { "epoch": 0.17425174251742517, "grad_norm": 0.7037016853945891, "learning_rate": 0.0001893002603923446, "loss": 1.7812, "step": 850 }, { "epoch": 0.17445674456744567, "grad_norm": 0.6517788002418122, "learning_rate": 0.00018927035530161962, "loss": 1.6209, "step": 851 }, { "epoch": 0.17466174661746617, "grad_norm": 0.595878747391538, "learning_rate": 0.00018924041084672486, "loss": 1.6769, "step": 852 }, { "epoch": 0.17486674866748667, "grad_norm": 0.6481032387337692, "learning_rate": 0.00018921042704086443, "loss": 1.6431, "step": 853 }, { "epoch": 0.17507175071750716, "grad_norm": 0.6672757210081901, "learning_rate": 0.00018918040389725982, "loss": 1.7228, "step": 854 }, { "epoch": 0.1752767527675277, "grad_norm": 0.6868433443528669, "learning_rate": 0.00018915034142914986, "loss": 1.7128, "step": 855 }, { "epoch": 0.17548175481754819, "grad_norm": 0.6167996729418488, "learning_rate": 0.00018912023964979077, "loss": 1.6251, "step": 856 }, { "epoch": 0.17568675686756868, "grad_norm": 0.7049193924498683, "learning_rate": 0.00018909009857245605, "loss": 1.7385, "step": 857 }, { "epoch": 0.17589175891758918, "grad_norm": 0.754097457272695, "learning_rate": 0.00018905991821043652, "loss": 1.6502, "step": 858 }, { "epoch": 0.17609676096760968, "grad_norm": 0.738043389759603, "learning_rate": 0.00018902969857704043, "loss": 1.6915, "step": 859 }, { "epoch": 0.17630176301763018, "grad_norm": 0.6476270549640606, "learning_rate": 0.00018899943968559316, "loss": 1.6814, "step": 860 }, { "epoch": 0.17650676506765067, "grad_norm": 0.7189972551247747, "learning_rate": 0.00018896914154943758, "loss": 1.5685, "step": 861 }, { "epoch": 0.17671176711767117, "grad_norm": 0.7085270944540953, "learning_rate": 0.00018893880418193376, "loss": 1.7272, "step": 862 }, { "epoch": 0.17691676916769167, "grad_norm": 0.6505751176170754, "learning_rate": 0.00018890842759645908, "loss": 1.7331, "step": 863 }, { "epoch": 0.17712177121771217, "grad_norm": 0.7960554282230355, "learning_rate": 0.00018887801180640827, "loss": 1.7332, "step": 864 }, { "epoch": 0.1773267732677327, "grad_norm": 0.6346770840549366, "learning_rate": 0.00018884755682519328, "loss": 1.6385, "step": 865 }, { "epoch": 0.1775317753177532, "grad_norm": 0.6484925128459751, "learning_rate": 0.00018881706266624343, "loss": 1.6236, "step": 866 }, { "epoch": 0.17773677736777369, "grad_norm": 0.6834546493061789, "learning_rate": 0.0001887865293430052, "loss": 1.6875, "step": 867 }, { "epoch": 0.17794177941779418, "grad_norm": 0.625820435126923, "learning_rate": 0.00018875595686894243, "loss": 1.6337, "step": 868 }, { "epoch": 0.17814678146781468, "grad_norm": 0.6976994235085078, "learning_rate": 0.00018872534525753615, "loss": 1.6852, "step": 869 }, { "epoch": 0.17835178351783518, "grad_norm": 0.663386461429031, "learning_rate": 0.00018869469452228476, "loss": 1.6919, "step": 870 }, { "epoch": 0.17855678556785567, "grad_norm": 0.6523917836752499, "learning_rate": 0.00018866400467670378, "loss": 1.7506, "step": 871 }, { "epoch": 0.17876178761787617, "grad_norm": 0.642552322851459, "learning_rate": 0.00018863327573432606, "loss": 1.7302, "step": 872 }, { "epoch": 0.17896678966789667, "grad_norm": 0.696356842279368, "learning_rate": 0.00018860250770870167, "loss": 1.6714, "step": 873 }, { "epoch": 0.17917179171791717, "grad_norm": 0.6254447508296375, "learning_rate": 0.0001885717006133979, "loss": 1.5611, "step": 874 }, { "epoch": 0.1793767937679377, "grad_norm": 0.6867999913543839, "learning_rate": 0.0001885408544619993, "loss": 1.7095, "step": 875 }, { "epoch": 0.1795817958179582, "grad_norm": 0.6469042868572288, "learning_rate": 0.0001885099692681076, "loss": 1.6229, "step": 876 }, { "epoch": 0.1797867978679787, "grad_norm": 0.6901766814219229, "learning_rate": 0.00018847904504534175, "loss": 1.7213, "step": 877 }, { "epoch": 0.17999179991799918, "grad_norm": 0.6621394493706978, "learning_rate": 0.00018844808180733797, "loss": 1.6127, "step": 878 }, { "epoch": 0.18019680196801968, "grad_norm": 0.6281790144588328, "learning_rate": 0.00018841707956774963, "loss": 1.6385, "step": 879 }, { "epoch": 0.18040180401804018, "grad_norm": 0.6411798812730471, "learning_rate": 0.00018838603834024729, "loss": 1.7314, "step": 880 }, { "epoch": 0.18060680606806068, "grad_norm": 0.6230387534082905, "learning_rate": 0.0001883549581385187, "loss": 1.6461, "step": 881 }, { "epoch": 0.18081180811808117, "grad_norm": 0.6955757939286624, "learning_rate": 0.00018832383897626892, "loss": 1.7007, "step": 882 }, { "epoch": 0.18101681016810167, "grad_norm": 0.6160689276828045, "learning_rate": 0.00018829268086721995, "loss": 1.6704, "step": 883 }, { "epoch": 0.18122181221812217, "grad_norm": 0.6294389998455219, "learning_rate": 0.0001882614838251112, "loss": 1.6471, "step": 884 }, { "epoch": 0.1814268142681427, "grad_norm": 0.6831616359839734, "learning_rate": 0.00018823024786369908, "loss": 1.6823, "step": 885 }, { "epoch": 0.1816318163181632, "grad_norm": 0.6948313729204673, "learning_rate": 0.00018819897299675726, "loss": 1.6644, "step": 886 }, { "epoch": 0.1818368183681837, "grad_norm": 0.5933553903879749, "learning_rate": 0.00018816765923807655, "loss": 1.6355, "step": 887 }, { "epoch": 0.1820418204182042, "grad_norm": 0.6718657080059676, "learning_rate": 0.00018813630660146488, "loss": 1.7208, "step": 888 }, { "epoch": 0.18224682246822468, "grad_norm": 0.6431331703803308, "learning_rate": 0.00018810491510074735, "loss": 1.6422, "step": 889 }, { "epoch": 0.18245182451824518, "grad_norm": 0.6522599073402241, "learning_rate": 0.00018807348474976618, "loss": 1.7161, "step": 890 }, { "epoch": 0.18265682656826568, "grad_norm": 0.6525287583387367, "learning_rate": 0.00018804201556238068, "loss": 1.6337, "step": 891 }, { "epoch": 0.18286182861828618, "grad_norm": 0.7264408965183254, "learning_rate": 0.0001880105075524674, "loss": 1.6296, "step": 892 }, { "epoch": 0.18306683066830667, "grad_norm": 0.7192954633367987, "learning_rate": 0.0001879789607339199, "loss": 1.6373, "step": 893 }, { "epoch": 0.18327183271832717, "grad_norm": 0.6692160236756574, "learning_rate": 0.0001879473751206489, "loss": 1.6973, "step": 894 }, { "epoch": 0.1834768347683477, "grad_norm": 0.665613575409276, "learning_rate": 0.00018791575072658225, "loss": 1.6229, "step": 895 }, { "epoch": 0.1836818368183682, "grad_norm": 0.7646965856440251, "learning_rate": 0.00018788408756566485, "loss": 1.7205, "step": 896 }, { "epoch": 0.1838868388683887, "grad_norm": 0.6247188692917274, "learning_rate": 0.00018785238565185866, "loss": 1.5969, "step": 897 }, { "epoch": 0.1840918409184092, "grad_norm": 0.6727511736404558, "learning_rate": 0.00018782064499914288, "loss": 1.6933, "step": 898 }, { "epoch": 0.18429684296842969, "grad_norm": 0.6824196078723912, "learning_rate": 0.00018778886562151366, "loss": 1.6754, "step": 899 }, { "epoch": 0.18450184501845018, "grad_norm": 0.7009488925557734, "learning_rate": 0.00018775704753298423, "loss": 1.6492, "step": 900 }, { "epoch": 0.18470684706847068, "grad_norm": 0.5620775171970334, "learning_rate": 0.0001877251907475849, "loss": 1.5734, "step": 901 }, { "epoch": 0.18491184911849118, "grad_norm": 0.662085497525188, "learning_rate": 0.00018769329527936317, "loss": 1.6812, "step": 902 }, { "epoch": 0.18511685116851168, "grad_norm": 0.7093135331815954, "learning_rate": 0.0001876613611423834, "loss": 1.6756, "step": 903 }, { "epoch": 0.18532185321853217, "grad_norm": 0.626843350279162, "learning_rate": 0.00018762938835072712, "loss": 1.6753, "step": 904 }, { "epoch": 0.1855268552685527, "grad_norm": 0.6209614968378987, "learning_rate": 0.00018759737691849288, "loss": 1.6498, "step": 905 }, { "epoch": 0.1857318573185732, "grad_norm": 0.7326164217270169, "learning_rate": 0.00018756532685979628, "loss": 1.711, "step": 906 }, { "epoch": 0.1859368593685937, "grad_norm": 0.6614405610821985, "learning_rate": 0.0001875332381887699, "loss": 1.6676, "step": 907 }, { "epoch": 0.1861418614186142, "grad_norm": 0.6623463439047697, "learning_rate": 0.00018750111091956346, "loss": 1.7013, "step": 908 }, { "epoch": 0.1863468634686347, "grad_norm": 0.6543491722596658, "learning_rate": 0.00018746894506634353, "loss": 1.7193, "step": 909 }, { "epoch": 0.18655186551865519, "grad_norm": 0.6192123062294089, "learning_rate": 0.00018743674064329387, "loss": 1.6958, "step": 910 }, { "epoch": 0.18675686756867568, "grad_norm": 0.559686087942116, "learning_rate": 0.00018740449766461512, "loss": 1.6373, "step": 911 }, { "epoch": 0.18696186961869618, "grad_norm": 0.6982378232968304, "learning_rate": 0.000187372216144525, "loss": 1.7026, "step": 912 }, { "epoch": 0.18716687166871668, "grad_norm": 0.710073928723775, "learning_rate": 0.00018733989609725815, "loss": 1.6801, "step": 913 }, { "epoch": 0.18737187371873718, "grad_norm": 0.6245826793678664, "learning_rate": 0.00018730753753706626, "loss": 1.6767, "step": 914 }, { "epoch": 0.1875768757687577, "grad_norm": 0.6103701115701096, "learning_rate": 0.00018727514047821802, "loss": 1.6452, "step": 915 }, { "epoch": 0.1877818778187782, "grad_norm": 0.686021057243264, "learning_rate": 0.00018724270493499903, "loss": 1.6239, "step": 916 }, { "epoch": 0.1879868798687987, "grad_norm": 0.6986842278961767, "learning_rate": 0.00018721023092171187, "loss": 1.6669, "step": 917 }, { "epoch": 0.1881918819188192, "grad_norm": 0.6354306436352476, "learning_rate": 0.00018717771845267614, "loss": 1.6364, "step": 918 }, { "epoch": 0.1883968839688397, "grad_norm": 0.7265544328486528, "learning_rate": 0.00018714516754222835, "loss": 1.6864, "step": 919 }, { "epoch": 0.1886018860188602, "grad_norm": 0.6790080191377459, "learning_rate": 0.00018711257820472197, "loss": 1.7714, "step": 920 }, { "epoch": 0.18880688806888068, "grad_norm": 0.6033377256121235, "learning_rate": 0.00018707995045452744, "loss": 1.7136, "step": 921 }, { "epoch": 0.18901189011890118, "grad_norm": 0.6661358074015494, "learning_rate": 0.00018704728430603206, "loss": 1.715, "step": 922 }, { "epoch": 0.18921689216892168, "grad_norm": 0.6739728687339908, "learning_rate": 0.00018701457977364017, "loss": 1.7122, "step": 923 }, { "epoch": 0.18942189421894218, "grad_norm": 0.708908289034046, "learning_rate": 0.00018698183687177296, "loss": 1.7211, "step": 924 }, { "epoch": 0.1896268962689627, "grad_norm": 0.6053915906514942, "learning_rate": 0.00018694905561486856, "loss": 1.6773, "step": 925 }, { "epoch": 0.1898318983189832, "grad_norm": 0.7007139325779282, "learning_rate": 0.00018691623601738199, "loss": 1.6532, "step": 926 }, { "epoch": 0.1900369003690037, "grad_norm": 0.7269221330685796, "learning_rate": 0.00018688337809378528, "loss": 1.6779, "step": 927 }, { "epoch": 0.1902419024190242, "grad_norm": 0.6831541499910486, "learning_rate": 0.00018685048185856722, "loss": 1.6481, "step": 928 }, { "epoch": 0.1904469044690447, "grad_norm": 0.6311514207678236, "learning_rate": 0.00018681754732623355, "loss": 1.6201, "step": 929 }, { "epoch": 0.1906519065190652, "grad_norm": 0.7593897817772871, "learning_rate": 0.00018678457451130691, "loss": 1.7087, "step": 930 }, { "epoch": 0.1908569085690857, "grad_norm": 0.7043794145600288, "learning_rate": 0.00018675156342832684, "loss": 1.6666, "step": 931 }, { "epoch": 0.19106191061910618, "grad_norm": 0.6435374384674419, "learning_rate": 0.0001867185140918497, "loss": 1.6296, "step": 932 }, { "epoch": 0.19126691266912668, "grad_norm": 0.6863965252841963, "learning_rate": 0.00018668542651644878, "loss": 1.6131, "step": 933 }, { "epoch": 0.19147191471914718, "grad_norm": 0.6623138147392169, "learning_rate": 0.00018665230071671413, "loss": 1.7351, "step": 934 }, { "epoch": 0.1916769167691677, "grad_norm": 0.5431701316896971, "learning_rate": 0.00018661913670725276, "loss": 1.6592, "step": 935 }, { "epoch": 0.1918819188191882, "grad_norm": 0.6212699878028247, "learning_rate": 0.00018658593450268852, "loss": 1.7047, "step": 936 }, { "epoch": 0.1920869208692087, "grad_norm": 0.6761152604779588, "learning_rate": 0.00018655269411766207, "loss": 1.6299, "step": 937 }, { "epoch": 0.1922919229192292, "grad_norm": 0.5955755276333651, "learning_rate": 0.00018651941556683085, "loss": 1.7577, "step": 938 }, { "epoch": 0.1924969249692497, "grad_norm": 0.6937537649155028, "learning_rate": 0.00018648609886486923, "loss": 1.6609, "step": 939 }, { "epoch": 0.1927019270192702, "grad_norm": 0.62982357910632, "learning_rate": 0.00018645274402646835, "loss": 1.6935, "step": 940 }, { "epoch": 0.1929069290692907, "grad_norm": 0.6297491935607556, "learning_rate": 0.00018641935106633622, "loss": 1.6599, "step": 941 }, { "epoch": 0.1931119311193112, "grad_norm": 0.6171966103138852, "learning_rate": 0.00018638591999919755, "loss": 1.6043, "step": 942 }, { "epoch": 0.19331693316933168, "grad_norm": 0.6342148064043517, "learning_rate": 0.00018635245083979394, "loss": 1.699, "step": 943 }, { "epoch": 0.19352193521935218, "grad_norm": 0.6778005036022733, "learning_rate": 0.00018631894360288383, "loss": 1.7222, "step": 944 }, { "epoch": 0.1937269372693727, "grad_norm": 0.7039712152490014, "learning_rate": 0.00018628539830324229, "loss": 1.6682, "step": 945 }, { "epoch": 0.1939319393193932, "grad_norm": 0.6359100004005492, "learning_rate": 0.00018625181495566135, "loss": 1.7002, "step": 946 }, { "epoch": 0.1941369413694137, "grad_norm": 0.6493205974074798, "learning_rate": 0.00018621819357494973, "loss": 1.6289, "step": 947 }, { "epoch": 0.1943419434194342, "grad_norm": 0.7134232018326085, "learning_rate": 0.00018618453417593286, "loss": 1.6818, "step": 948 }, { "epoch": 0.1945469454694547, "grad_norm": 0.6617690737765433, "learning_rate": 0.0001861508367734531, "loss": 1.6865, "step": 949 }, { "epoch": 0.1947519475194752, "grad_norm": 0.635854748112604, "learning_rate": 0.00018611710138236945, "loss": 1.6846, "step": 950 }, { "epoch": 0.1949569495694957, "grad_norm": 0.6819488168694777, "learning_rate": 0.00018608332801755764, "loss": 1.6825, "step": 951 }, { "epoch": 0.1951619516195162, "grad_norm": 0.6442455936564754, "learning_rate": 0.00018604951669391019, "loss": 1.6546, "step": 952 }, { "epoch": 0.19536695366953669, "grad_norm": 0.6260761239670712, "learning_rate": 0.00018601566742633643, "loss": 1.616, "step": 953 }, { "epoch": 0.19557195571955718, "grad_norm": 0.6311195148986711, "learning_rate": 0.0001859817802297623, "loss": 1.615, "step": 954 }, { "epoch": 0.1957769577695777, "grad_norm": 0.7400128305595713, "learning_rate": 0.00018594785511913048, "loss": 1.7351, "step": 955 }, { "epoch": 0.1959819598195982, "grad_norm": 0.7062002100164632, "learning_rate": 0.0001859138921094005, "loss": 1.6898, "step": 956 }, { "epoch": 0.1961869618696187, "grad_norm": 0.7025409544089614, "learning_rate": 0.0001858798912155484, "loss": 1.7283, "step": 957 }, { "epoch": 0.1963919639196392, "grad_norm": 0.7267700125822197, "learning_rate": 0.00018584585245256708, "loss": 1.6672, "step": 958 }, { "epoch": 0.1965969659696597, "grad_norm": 0.6454570816655464, "learning_rate": 0.00018581177583546605, "loss": 1.6633, "step": 959 }, { "epoch": 0.1968019680196802, "grad_norm": 0.681269214064641, "learning_rate": 0.00018577766137927161, "loss": 1.6807, "step": 960 }, { "epoch": 0.1970069700697007, "grad_norm": 0.6016372216397006, "learning_rate": 0.00018574350909902662, "loss": 1.6529, "step": 961 }, { "epoch": 0.1972119721197212, "grad_norm": 0.7040037068472542, "learning_rate": 0.00018570931900979077, "loss": 1.6621, "step": 962 }, { "epoch": 0.1974169741697417, "grad_norm": 0.7182140442483879, "learning_rate": 0.00018567509112664022, "loss": 1.6996, "step": 963 }, { "epoch": 0.19762197621976219, "grad_norm": 0.6923275670242, "learning_rate": 0.00018564082546466805, "loss": 1.7104, "step": 964 }, { "epoch": 0.1978269782697827, "grad_norm": 0.7235893988059019, "learning_rate": 0.0001856065220389837, "loss": 1.7492, "step": 965 }, { "epoch": 0.1980319803198032, "grad_norm": 0.6825868110391046, "learning_rate": 0.00018557218086471356, "loss": 1.7028, "step": 966 }, { "epoch": 0.1982369823698237, "grad_norm": 0.6804354496100514, "learning_rate": 0.00018553780195700042, "loss": 1.6618, "step": 967 }, { "epoch": 0.1984419844198442, "grad_norm": 0.6574339754924009, "learning_rate": 0.00018550338533100392, "loss": 1.63, "step": 968 }, { "epoch": 0.1986469864698647, "grad_norm": 0.7600921012603404, "learning_rate": 0.00018546893100190016, "loss": 1.6948, "step": 969 }, { "epoch": 0.1988519885198852, "grad_norm": 0.6543012879369916, "learning_rate": 0.00018543443898488197, "loss": 1.6899, "step": 970 }, { "epoch": 0.1990569905699057, "grad_norm": 0.7578265469318429, "learning_rate": 0.0001853999092951587, "loss": 1.7701, "step": 971 }, { "epoch": 0.1992619926199262, "grad_norm": 0.8386036267324629, "learning_rate": 0.00018536534194795644, "loss": 1.5844, "step": 972 }, { "epoch": 0.1994669946699467, "grad_norm": 0.7164929122871192, "learning_rate": 0.0001853307369585178, "loss": 1.6906, "step": 973 }, { "epoch": 0.1996719967199672, "grad_norm": 0.7492342299544166, "learning_rate": 0.00018529609434210197, "loss": 1.6171, "step": 974 }, { "epoch": 0.1998769987699877, "grad_norm": 0.6558700088496812, "learning_rate": 0.00018526141411398484, "loss": 1.6507, "step": 975 }, { "epoch": 0.2000820008200082, "grad_norm": 0.6426340152263696, "learning_rate": 0.00018522669628945874, "loss": 1.5916, "step": 976 }, { "epoch": 0.2002870028700287, "grad_norm": 0.6934145519717209, "learning_rate": 0.00018519194088383273, "loss": 1.651, "step": 977 }, { "epoch": 0.2004920049200492, "grad_norm": 0.7040967656789024, "learning_rate": 0.00018515714791243228, "loss": 1.6111, "step": 978 }, { "epoch": 0.2006970069700697, "grad_norm": 0.6901121663671723, "learning_rate": 0.00018512231739059958, "loss": 1.679, "step": 979 }, { "epoch": 0.2009020090200902, "grad_norm": 0.6868686799005908, "learning_rate": 0.00018508744933369328, "loss": 1.6811, "step": 980 }, { "epoch": 0.2011070110701107, "grad_norm": 0.746174738997187, "learning_rate": 0.0001850525437570886, "loss": 1.7036, "step": 981 }, { "epoch": 0.2013120131201312, "grad_norm": 0.7323903706351386, "learning_rate": 0.00018501760067617733, "loss": 1.6839, "step": 982 }, { "epoch": 0.2015170151701517, "grad_norm": 0.6249017402280476, "learning_rate": 0.00018498262010636774, "loss": 1.6363, "step": 983 }, { "epoch": 0.2017220172201722, "grad_norm": 0.6545683006113007, "learning_rate": 0.00018494760206308475, "loss": 1.7319, "step": 984 }, { "epoch": 0.20192701927019271, "grad_norm": 0.6704929354264713, "learning_rate": 0.00018491254656176966, "loss": 1.6923, "step": 985 }, { "epoch": 0.2021320213202132, "grad_norm": 0.6023514081810556, "learning_rate": 0.00018487745361788039, "loss": 1.6631, "step": 986 }, { "epoch": 0.2023370233702337, "grad_norm": 0.7083014769759648, "learning_rate": 0.00018484232324689125, "loss": 1.7633, "step": 987 }, { "epoch": 0.2025420254202542, "grad_norm": 0.6047125819281758, "learning_rate": 0.00018480715546429326, "loss": 1.6393, "step": 988 }, { "epoch": 0.2027470274702747, "grad_norm": 0.64552835821049, "learning_rate": 0.00018477195028559376, "loss": 1.7111, "step": 989 }, { "epoch": 0.2029520295202952, "grad_norm": 0.5766011316751815, "learning_rate": 0.0001847367077263166, "loss": 1.7569, "step": 990 }, { "epoch": 0.2031570315703157, "grad_norm": 0.6472249738364082, "learning_rate": 0.00018470142780200222, "loss": 1.6724, "step": 991 }, { "epoch": 0.2033620336203362, "grad_norm": 0.6631421149372152, "learning_rate": 0.00018466611052820738, "loss": 1.7017, "step": 992 }, { "epoch": 0.2035670356703567, "grad_norm": 0.67922295872642, "learning_rate": 0.00018463075592050547, "loss": 1.6453, "step": 993 }, { "epoch": 0.2037720377203772, "grad_norm": 0.5695165455860778, "learning_rate": 0.0001845953639944862, "loss": 1.693, "step": 994 }, { "epoch": 0.20397703977039772, "grad_norm": 0.6029213831421523, "learning_rate": 0.00018455993476575584, "loss": 1.7288, "step": 995 }, { "epoch": 0.20418204182041821, "grad_norm": 0.7098523384647641, "learning_rate": 0.00018452446824993704, "loss": 1.6359, "step": 996 }, { "epoch": 0.2043870438704387, "grad_norm": 0.6320691249332998, "learning_rate": 0.000184488964462669, "loss": 1.6041, "step": 997 }, { "epoch": 0.2045920459204592, "grad_norm": 0.6523600893387305, "learning_rate": 0.00018445342341960716, "loss": 1.7956, "step": 998 }, { "epoch": 0.2047970479704797, "grad_norm": 0.6267989345857562, "learning_rate": 0.0001844178451364236, "loss": 1.6364, "step": 999 }, { "epoch": 0.2050020500205002, "grad_norm": 0.6630669793804784, "learning_rate": 0.00018438222962880666, "loss": 1.6816, "step": 1000 }, { "epoch": 0.2052070520705207, "grad_norm": 0.6527480820089179, "learning_rate": 0.00018434657691246122, "loss": 1.7495, "step": 1001 }, { "epoch": 0.2054120541205412, "grad_norm": 0.6253097938893096, "learning_rate": 0.00018431088700310844, "loss": 1.6822, "step": 1002 }, { "epoch": 0.2056170561705617, "grad_norm": 0.6066983766470995, "learning_rate": 0.00018427515991648603, "loss": 1.6449, "step": 1003 }, { "epoch": 0.2058220582205822, "grad_norm": 0.6705916229756751, "learning_rate": 0.00018423939566834793, "loss": 1.7344, "step": 1004 }, { "epoch": 0.20602706027060272, "grad_norm": 0.5808359319763892, "learning_rate": 0.0001842035942744646, "loss": 1.6742, "step": 1005 }, { "epoch": 0.20623206232062322, "grad_norm": 0.6844397083927419, "learning_rate": 0.0001841677557506228, "loss": 1.6905, "step": 1006 }, { "epoch": 0.2064370643706437, "grad_norm": 0.6498127791345648, "learning_rate": 0.0001841318801126257, "loss": 1.6887, "step": 1007 }, { "epoch": 0.2066420664206642, "grad_norm": 0.5929726972211439, "learning_rate": 0.0001840959673762929, "loss": 1.6322, "step": 1008 }, { "epoch": 0.2068470684706847, "grad_norm": 0.6521214221380964, "learning_rate": 0.00018406001755746015, "loss": 1.646, "step": 1009 }, { "epoch": 0.2070520705207052, "grad_norm": 0.6553444612125416, "learning_rate": 0.00018402403067197974, "loss": 1.7022, "step": 1010 }, { "epoch": 0.2072570725707257, "grad_norm": 0.6878399963942156, "learning_rate": 0.0001839880067357203, "loss": 1.7154, "step": 1011 }, { "epoch": 0.2074620746207462, "grad_norm": 0.6195305531040249, "learning_rate": 0.00018395194576456667, "loss": 1.6426, "step": 1012 }, { "epoch": 0.2076670766707667, "grad_norm": 0.6540177502815521, "learning_rate": 0.00018391584777442015, "loss": 1.651, "step": 1013 }, { "epoch": 0.2078720787207872, "grad_norm": 0.6667688860618947, "learning_rate": 0.00018387971278119834, "loss": 1.6757, "step": 1014 }, { "epoch": 0.20807708077080772, "grad_norm": 0.6866339014112675, "learning_rate": 0.00018384354080083504, "loss": 1.66, "step": 1015 }, { "epoch": 0.20828208282082822, "grad_norm": 0.5714898507843518, "learning_rate": 0.00018380733184928053, "loss": 1.6507, "step": 1016 }, { "epoch": 0.20848708487084872, "grad_norm": 0.7040472841317994, "learning_rate": 0.0001837710859425013, "loss": 1.6743, "step": 1017 }, { "epoch": 0.2086920869208692, "grad_norm": 0.7176484615257038, "learning_rate": 0.0001837348030964801, "loss": 1.7361, "step": 1018 }, { "epoch": 0.2088970889708897, "grad_norm": 0.6394810283324819, "learning_rate": 0.00018369848332721607, "loss": 1.6316, "step": 1019 }, { "epoch": 0.2091020910209102, "grad_norm": 0.7614099541056943, "learning_rate": 0.00018366212665072454, "loss": 1.7467, "step": 1020 }, { "epoch": 0.2093070930709307, "grad_norm": 0.6270839442565453, "learning_rate": 0.00018362573308303718, "loss": 1.6569, "step": 1021 }, { "epoch": 0.2095120951209512, "grad_norm": 0.609601236813474, "learning_rate": 0.0001835893026402019, "loss": 1.5895, "step": 1022 }, { "epoch": 0.2097170971709717, "grad_norm": 0.6098604247930536, "learning_rate": 0.00018355283533828286, "loss": 1.6569, "step": 1023 }, { "epoch": 0.2099220992209922, "grad_norm": 0.6809722972139036, "learning_rate": 0.00018351633119336044, "loss": 1.6978, "step": 1024 }, { "epoch": 0.21012710127101272, "grad_norm": 0.6063103095680642, "learning_rate": 0.00018347979022153137, "loss": 1.7016, "step": 1025 }, { "epoch": 0.21033210332103322, "grad_norm": 0.6132134343622095, "learning_rate": 0.00018344321243890854, "loss": 1.6838, "step": 1026 }, { "epoch": 0.21053710537105372, "grad_norm": 0.6482611541562687, "learning_rate": 0.00018340659786162108, "loss": 1.5998, "step": 1027 }, { "epoch": 0.21074210742107422, "grad_norm": 0.5718708862079491, "learning_rate": 0.00018336994650581438, "loss": 1.7523, "step": 1028 }, { "epoch": 0.2109471094710947, "grad_norm": 0.5876548902833577, "learning_rate": 0.00018333325838765, "loss": 1.7048, "step": 1029 }, { "epoch": 0.2111521115211152, "grad_norm": 0.6689386890967747, "learning_rate": 0.00018329653352330572, "loss": 1.6088, "step": 1030 }, { "epoch": 0.2113571135711357, "grad_norm": 0.6344376859708848, "learning_rate": 0.00018325977192897554, "loss": 1.7159, "step": 1031 }, { "epoch": 0.2115621156211562, "grad_norm": 0.5688368287200323, "learning_rate": 0.00018322297362086972, "loss": 1.7146, "step": 1032 }, { "epoch": 0.2117671176711767, "grad_norm": 0.6200379089749919, "learning_rate": 0.00018318613861521455, "loss": 1.6564, "step": 1033 }, { "epoch": 0.2119721197211972, "grad_norm": 0.738264631673555, "learning_rate": 0.00018314926692825263, "loss": 1.6543, "step": 1034 }, { "epoch": 0.21217712177121772, "grad_norm": 0.5996884334912127, "learning_rate": 0.0001831123585762427, "loss": 1.6602, "step": 1035 }, { "epoch": 0.21238212382123822, "grad_norm": 0.665617761833811, "learning_rate": 0.00018307541357545965, "loss": 1.7042, "step": 1036 }, { "epoch": 0.21258712587125872, "grad_norm": 0.7386404176333486, "learning_rate": 0.00018303843194219458, "loss": 1.7546, "step": 1037 }, { "epoch": 0.21279212792127922, "grad_norm": 0.7153636445810029, "learning_rate": 0.00018300141369275469, "loss": 1.7095, "step": 1038 }, { "epoch": 0.21299712997129971, "grad_norm": 0.6267714193921318, "learning_rate": 0.00018296435884346336, "loss": 1.6147, "step": 1039 }, { "epoch": 0.2132021320213202, "grad_norm": 0.5966612011210995, "learning_rate": 0.00018292726741066007, "loss": 1.6725, "step": 1040 }, { "epoch": 0.2134071340713407, "grad_norm": 0.6435584621675465, "learning_rate": 0.00018289013941070046, "loss": 1.666, "step": 1041 }, { "epoch": 0.2136121361213612, "grad_norm": 0.696637634061216, "learning_rate": 0.0001828529748599563, "loss": 1.7, "step": 1042 }, { "epoch": 0.2138171381713817, "grad_norm": 0.6752273655174961, "learning_rate": 0.00018281577377481548, "loss": 1.6443, "step": 1043 }, { "epoch": 0.2140221402214022, "grad_norm": 0.7460891430531494, "learning_rate": 0.00018277853617168195, "loss": 1.7339, "step": 1044 }, { "epoch": 0.21422714227142273, "grad_norm": 0.7186007723286648, "learning_rate": 0.00018274126206697583, "loss": 1.7144, "step": 1045 }, { "epoch": 0.21443214432144322, "grad_norm": 0.6265002973175462, "learning_rate": 0.00018270395147713332, "loss": 1.6081, "step": 1046 }, { "epoch": 0.21463714637146372, "grad_norm": 0.6161241021074347, "learning_rate": 0.00018266660441860667, "loss": 1.6279, "step": 1047 }, { "epoch": 0.21484214842148422, "grad_norm": 0.6506433832876856, "learning_rate": 0.00018262922090786423, "loss": 1.6392, "step": 1048 }, { "epoch": 0.21504715047150472, "grad_norm": 0.6137088349058902, "learning_rate": 0.00018259180096139046, "loss": 1.5933, "step": 1049 }, { "epoch": 0.21525215252152521, "grad_norm": 0.6235691472210967, "learning_rate": 0.00018255434459568578, "loss": 1.6799, "step": 1050 }, { "epoch": 0.2154571545715457, "grad_norm": 0.6335587073147397, "learning_rate": 0.00018251685182726684, "loss": 1.7331, "step": 1051 }, { "epoch": 0.2156621566215662, "grad_norm": 0.6247519602576221, "learning_rate": 0.00018247932267266624, "loss": 1.7287, "step": 1052 }, { "epoch": 0.2158671586715867, "grad_norm": 0.6612000305498553, "learning_rate": 0.00018244175714843256, "loss": 1.7095, "step": 1053 }, { "epoch": 0.2160721607216072, "grad_norm": 0.698803591480772, "learning_rate": 0.00018240415527113056, "loss": 1.7888, "step": 1054 }, { "epoch": 0.21627716277162773, "grad_norm": 0.6422452000784236, "learning_rate": 0.0001823665170573409, "loss": 1.6688, "step": 1055 }, { "epoch": 0.21648216482164823, "grad_norm": 0.6150914827269612, "learning_rate": 0.00018232884252366037, "loss": 1.6808, "step": 1056 }, { "epoch": 0.21668716687166872, "grad_norm": 0.6327455073208014, "learning_rate": 0.0001822911316867017, "loss": 1.7, "step": 1057 }, { "epoch": 0.21689216892168922, "grad_norm": 0.5797578498575513, "learning_rate": 0.00018225338456309364, "loss": 1.6743, "step": 1058 }, { "epoch": 0.21709717097170972, "grad_norm": 0.6689974232616634, "learning_rate": 0.00018221560116948103, "loss": 1.6638, "step": 1059 }, { "epoch": 0.21730217302173022, "grad_norm": 0.6262350475559852, "learning_rate": 0.0001821777815225245, "loss": 1.6817, "step": 1060 }, { "epoch": 0.2175071750717507, "grad_norm": 0.6647200127526969, "learning_rate": 0.00018213992563890095, "loss": 1.6059, "step": 1061 }, { "epoch": 0.2177121771217712, "grad_norm": 0.6901827437214324, "learning_rate": 0.000182102033535303, "loss": 1.6737, "step": 1062 }, { "epoch": 0.2179171791717917, "grad_norm": 0.6446134041565922, "learning_rate": 0.00018206410522843936, "loss": 1.6196, "step": 1063 }, { "epoch": 0.2181221812218122, "grad_norm": 0.6626325366198685, "learning_rate": 0.00018202614073503472, "loss": 1.7149, "step": 1064 }, { "epoch": 0.21832718327183273, "grad_norm": 0.6738404387198728, "learning_rate": 0.00018198814007182968, "loss": 1.7126, "step": 1065 }, { "epoch": 0.21853218532185323, "grad_norm": 0.6412262519983919, "learning_rate": 0.0001819501032555808, "loss": 1.6831, "step": 1066 }, { "epoch": 0.21873718737187373, "grad_norm": 0.6720492541608051, "learning_rate": 0.0001819120303030606, "loss": 1.6247, "step": 1067 }, { "epoch": 0.21894218942189422, "grad_norm": 0.6070616714045508, "learning_rate": 0.00018187392123105751, "loss": 1.6523, "step": 1068 }, { "epoch": 0.21914719147191472, "grad_norm": 0.6303014408998721, "learning_rate": 0.00018183577605637588, "loss": 1.6995, "step": 1069 }, { "epoch": 0.21935219352193522, "grad_norm": 0.610438410446903, "learning_rate": 0.00018179759479583605, "loss": 1.7238, "step": 1070 }, { "epoch": 0.21955719557195572, "grad_norm": 0.5703559673359521, "learning_rate": 0.0001817593774662742, "loss": 1.6505, "step": 1071 }, { "epoch": 0.2197621976219762, "grad_norm": 0.6254315676741946, "learning_rate": 0.0001817211240845424, "loss": 1.7664, "step": 1072 }, { "epoch": 0.2199671996719967, "grad_norm": 0.6509528677848835, "learning_rate": 0.0001816828346675087, "loss": 1.618, "step": 1073 }, { "epoch": 0.2201722017220172, "grad_norm": 0.6047442337801903, "learning_rate": 0.00018164450923205698, "loss": 1.6593, "step": 1074 }, { "epoch": 0.22037720377203773, "grad_norm": 0.5747572431805756, "learning_rate": 0.000181606147795087, "loss": 1.6534, "step": 1075 }, { "epoch": 0.22058220582205823, "grad_norm": 0.692640993465633, "learning_rate": 0.00018156775037351443, "loss": 1.7792, "step": 1076 }, { "epoch": 0.22078720787207873, "grad_norm": 0.5732946235655187, "learning_rate": 0.00018152931698427077, "loss": 1.6232, "step": 1077 }, { "epoch": 0.22099220992209923, "grad_norm": 0.6856102460653584, "learning_rate": 0.0001814908476443034, "loss": 1.7241, "step": 1078 }, { "epoch": 0.22119721197211972, "grad_norm": 0.6430265909429804, "learning_rate": 0.00018145234237057554, "loss": 1.6716, "step": 1079 }, { "epoch": 0.22140221402214022, "grad_norm": 0.5978237061298097, "learning_rate": 0.00018141380118006633, "loss": 1.5965, "step": 1080 }, { "epoch": 0.22160721607216072, "grad_norm": 0.6535802650069463, "learning_rate": 0.0001813752240897706, "loss": 1.6572, "step": 1081 }, { "epoch": 0.22181221812218122, "grad_norm": 0.6183980048551903, "learning_rate": 0.00018133661111669914, "loss": 1.681, "step": 1082 }, { "epoch": 0.2220172201722017, "grad_norm": 0.5553454745738735, "learning_rate": 0.0001812979622778785, "loss": 1.651, "step": 1083 }, { "epoch": 0.2222222222222222, "grad_norm": 0.6686772813481492, "learning_rate": 0.0001812592775903511, "loss": 1.754, "step": 1084 }, { "epoch": 0.22242722427224274, "grad_norm": 0.6317618463239347, "learning_rate": 0.00018122055707117505, "loss": 1.6074, "step": 1085 }, { "epoch": 0.22263222632226323, "grad_norm": 0.6715124896586417, "learning_rate": 0.0001811818007374244, "loss": 1.6941, "step": 1086 }, { "epoch": 0.22283722837228373, "grad_norm": 0.6871089219892469, "learning_rate": 0.0001811430086061889, "loss": 1.6937, "step": 1087 }, { "epoch": 0.22304223042230423, "grad_norm": 0.6184197465342502, "learning_rate": 0.00018110418069457418, "loss": 1.7004, "step": 1088 }, { "epoch": 0.22324723247232472, "grad_norm": 0.676386424973752, "learning_rate": 0.0001810653170197015, "loss": 1.6206, "step": 1089 }, { "epoch": 0.22345223452234522, "grad_norm": 0.6523990157189616, "learning_rate": 0.00018102641759870804, "loss": 1.6688, "step": 1090 }, { "epoch": 0.22365723657236572, "grad_norm": 0.6129149497419799, "learning_rate": 0.00018098748244874666, "loss": 1.6234, "step": 1091 }, { "epoch": 0.22386223862238622, "grad_norm": 0.6246462746819592, "learning_rate": 0.00018094851158698597, "loss": 1.6615, "step": 1092 }, { "epoch": 0.22406724067240671, "grad_norm": 0.6510923258120017, "learning_rate": 0.00018090950503061036, "loss": 1.6218, "step": 1093 }, { "epoch": 0.2242722427224272, "grad_norm": 0.6329807738737093, "learning_rate": 0.0001808704627968199, "loss": 1.65, "step": 1094 }, { "epoch": 0.22447724477244774, "grad_norm": 0.6415947415364737, "learning_rate": 0.00018083138490283056, "loss": 1.7035, "step": 1095 }, { "epoch": 0.22468224682246823, "grad_norm": 0.6309501001858635, "learning_rate": 0.00018079227136587382, "loss": 1.714, "step": 1096 }, { "epoch": 0.22488724887248873, "grad_norm": 0.656375003661927, "learning_rate": 0.000180753122203197, "loss": 1.716, "step": 1097 }, { "epoch": 0.22509225092250923, "grad_norm": 0.5832571059442074, "learning_rate": 0.0001807139374320631, "loss": 1.5351, "step": 1098 }, { "epoch": 0.22529725297252973, "grad_norm": 0.6963227362958574, "learning_rate": 0.00018067471706975083, "loss": 1.7598, "step": 1099 }, { "epoch": 0.22550225502255022, "grad_norm": 0.6855640626310727, "learning_rate": 0.00018063546113355455, "loss": 1.6721, "step": 1100 }, { "epoch": 0.22570725707257072, "grad_norm": 0.6485668313776874, "learning_rate": 0.00018059616964078443, "loss": 1.7035, "step": 1101 }, { "epoch": 0.22591225912259122, "grad_norm": 0.62947514920102, "learning_rate": 0.00018055684260876614, "loss": 1.6161, "step": 1102 }, { "epoch": 0.22611726117261172, "grad_norm": 0.6629950604358232, "learning_rate": 0.00018051748005484117, "loss": 1.6534, "step": 1103 }, { "epoch": 0.22632226322263221, "grad_norm": 0.6555220358174337, "learning_rate": 0.00018047808199636657, "loss": 1.72, "step": 1104 }, { "epoch": 0.22652726527265274, "grad_norm": 0.657585363257024, "learning_rate": 0.00018043864845071513, "loss": 1.6073, "step": 1105 }, { "epoch": 0.22673226732267324, "grad_norm": 0.7428113892379641, "learning_rate": 0.0001803991794352753, "loss": 1.7752, "step": 1106 }, { "epoch": 0.22693726937269373, "grad_norm": 0.5902889006678823, "learning_rate": 0.000180359674967451, "loss": 1.6372, "step": 1107 }, { "epoch": 0.22714227142271423, "grad_norm": 0.6880548646796353, "learning_rate": 0.000180320135064662, "loss": 1.7503, "step": 1108 }, { "epoch": 0.22734727347273473, "grad_norm": 0.6351447864709977, "learning_rate": 0.00018028055974434354, "loss": 1.6692, "step": 1109 }, { "epoch": 0.22755227552275523, "grad_norm": 0.6463876645966141, "learning_rate": 0.00018024094902394658, "loss": 1.7113, "step": 1110 }, { "epoch": 0.22775727757277572, "grad_norm": 0.5800066938406596, "learning_rate": 0.0001802013029209377, "loss": 1.7219, "step": 1111 }, { "epoch": 0.22796227962279622, "grad_norm": 0.6459432254651825, "learning_rate": 0.0001801616214527989, "loss": 1.6143, "step": 1112 }, { "epoch": 0.22816728167281672, "grad_norm": 0.6808409468168838, "learning_rate": 0.00018012190463702799, "loss": 1.6921, "step": 1113 }, { "epoch": 0.22837228372283722, "grad_norm": 0.6271128415064747, "learning_rate": 0.00018008215249113823, "loss": 1.6554, "step": 1114 }, { "epoch": 0.22857728577285774, "grad_norm": 0.5682211232353036, "learning_rate": 0.00018004236503265858, "loss": 1.5751, "step": 1115 }, { "epoch": 0.22878228782287824, "grad_norm": 0.6811284162153839, "learning_rate": 0.00018000254227913348, "loss": 1.675, "step": 1116 }, { "epoch": 0.22898728987289874, "grad_norm": 0.6729865719086112, "learning_rate": 0.00017996268424812288, "loss": 1.7078, "step": 1117 }, { "epoch": 0.22919229192291923, "grad_norm": 0.5559498998406727, "learning_rate": 0.00017992279095720246, "loss": 1.5794, "step": 1118 }, { "epoch": 0.22939729397293973, "grad_norm": 0.6010677513988688, "learning_rate": 0.0001798828624239633, "loss": 1.6065, "step": 1119 }, { "epoch": 0.22960229602296023, "grad_norm": 0.6766942591577791, "learning_rate": 0.00017984289866601204, "loss": 1.7126, "step": 1120 }, { "epoch": 0.22980729807298073, "grad_norm": 0.6355849074308124, "learning_rate": 0.00017980289970097096, "loss": 1.7291, "step": 1121 }, { "epoch": 0.23001230012300122, "grad_norm": 0.6013376258637811, "learning_rate": 0.00017976286554647773, "loss": 1.6295, "step": 1122 }, { "epoch": 0.23021730217302172, "grad_norm": 0.6281327708449368, "learning_rate": 0.00017972279622018557, "loss": 1.7222, "step": 1123 }, { "epoch": 0.23042230422304222, "grad_norm": 0.6114395285130246, "learning_rate": 0.00017968269173976328, "loss": 1.6986, "step": 1124 }, { "epoch": 0.23062730627306274, "grad_norm": 0.5898172463075345, "learning_rate": 0.00017964255212289513, "loss": 1.6767, "step": 1125 }, { "epoch": 0.23083230832308324, "grad_norm": 0.5356245153773754, "learning_rate": 0.0001796023773872808, "loss": 1.6343, "step": 1126 }, { "epoch": 0.23103731037310374, "grad_norm": 0.6818053088312895, "learning_rate": 0.00017956216755063558, "loss": 1.693, "step": 1127 }, { "epoch": 0.23124231242312424, "grad_norm": 0.5810994751825839, "learning_rate": 0.00017952192263069017, "loss": 1.7253, "step": 1128 }, { "epoch": 0.23144731447314473, "grad_norm": 0.5725178214330862, "learning_rate": 0.00017948164264519072, "loss": 1.659, "step": 1129 }, { "epoch": 0.23165231652316523, "grad_norm": 0.623038594680975, "learning_rate": 0.0001794413276118989, "loss": 1.7561, "step": 1130 }, { "epoch": 0.23185731857318573, "grad_norm": 0.6555934366154492, "learning_rate": 0.00017940097754859177, "loss": 1.6605, "step": 1131 }, { "epoch": 0.23206232062320623, "grad_norm": 0.5497217183103144, "learning_rate": 0.00017936059247306195, "loss": 1.5735, "step": 1132 }, { "epoch": 0.23226732267322672, "grad_norm": 0.633888958656351, "learning_rate": 0.00017932017240311735, "loss": 1.6126, "step": 1133 }, { "epoch": 0.23247232472324722, "grad_norm": 0.6734381308538583, "learning_rate": 0.00017927971735658143, "loss": 1.7163, "step": 1134 }, { "epoch": 0.23267732677326775, "grad_norm": 0.6729603556436241, "learning_rate": 0.00017923922735129302, "loss": 1.6758, "step": 1135 }, { "epoch": 0.23288232882328824, "grad_norm": 0.6939613987632494, "learning_rate": 0.00017919870240510632, "loss": 1.7194, "step": 1136 }, { "epoch": 0.23308733087330874, "grad_norm": 0.677342108238906, "learning_rate": 0.00017915814253589108, "loss": 1.6796, "step": 1137 }, { "epoch": 0.23329233292332924, "grad_norm": 0.7101418129420499, "learning_rate": 0.00017911754776153224, "loss": 1.6604, "step": 1138 }, { "epoch": 0.23349733497334973, "grad_norm": 0.684114516013831, "learning_rate": 0.00017907691809993038, "loss": 1.722, "step": 1139 }, { "epoch": 0.23370233702337023, "grad_norm": 0.6880244159063225, "learning_rate": 0.00017903625356900128, "loss": 1.7303, "step": 1140 }, { "epoch": 0.23390733907339073, "grad_norm": 0.6773122681633772, "learning_rate": 0.00017899555418667614, "loss": 1.6886, "step": 1141 }, { "epoch": 0.23411234112341123, "grad_norm": 0.6800676743574444, "learning_rate": 0.0001789548199709015, "loss": 1.7234, "step": 1142 }, { "epoch": 0.23431734317343172, "grad_norm": 0.6115666392352647, "learning_rate": 0.00017891405093963938, "loss": 1.6425, "step": 1143 }, { "epoch": 0.23452234522345222, "grad_norm": 0.6441014373889458, "learning_rate": 0.000178873247110867, "loss": 1.7114, "step": 1144 }, { "epoch": 0.23472734727347275, "grad_norm": 0.6619614500831901, "learning_rate": 0.00017883240850257706, "loss": 1.6928, "step": 1145 }, { "epoch": 0.23493234932349324, "grad_norm": 0.5846167586860179, "learning_rate": 0.00017879153513277748, "loss": 1.6468, "step": 1146 }, { "epoch": 0.23513735137351374, "grad_norm": 0.6275117122748974, "learning_rate": 0.0001787506270194916, "loss": 1.7417, "step": 1147 }, { "epoch": 0.23534235342353424, "grad_norm": 0.7217133165951389, "learning_rate": 0.000178709684180758, "loss": 1.666, "step": 1148 }, { "epoch": 0.23554735547355474, "grad_norm": 0.6605461085192059, "learning_rate": 0.00017866870663463057, "loss": 1.7194, "step": 1149 }, { "epoch": 0.23575235752357523, "grad_norm": 0.6334386119819172, "learning_rate": 0.00017862769439917867, "loss": 1.6781, "step": 1150 }, { "epoch": 0.23595735957359573, "grad_norm": 0.6583847305168595, "learning_rate": 0.0001785866474924867, "loss": 1.6675, "step": 1151 }, { "epoch": 0.23616236162361623, "grad_norm": 0.7187168348467076, "learning_rate": 0.00017854556593265459, "loss": 1.6713, "step": 1152 }, { "epoch": 0.23636736367363673, "grad_norm": 0.543285806477839, "learning_rate": 0.0001785044497377974, "loss": 1.5351, "step": 1153 }, { "epoch": 0.23657236572365722, "grad_norm": 0.5968396343728228, "learning_rate": 0.00017846329892604547, "loss": 1.5917, "step": 1154 }, { "epoch": 0.23677736777367775, "grad_norm": 0.595296242453684, "learning_rate": 0.00017842211351554448, "loss": 1.6154, "step": 1155 }, { "epoch": 0.23698236982369825, "grad_norm": 0.655695938326144, "learning_rate": 0.0001783808935244553, "loss": 1.6976, "step": 1156 }, { "epoch": 0.23718737187371874, "grad_norm": 0.6603466475083676, "learning_rate": 0.00017833963897095407, "loss": 1.7848, "step": 1157 }, { "epoch": 0.23739237392373924, "grad_norm": 0.5965591505612715, "learning_rate": 0.00017829834987323219, "loss": 1.6585, "step": 1158 }, { "epoch": 0.23759737597375974, "grad_norm": 0.6846445124693009, "learning_rate": 0.0001782570262494963, "loss": 1.638, "step": 1159 }, { "epoch": 0.23780237802378024, "grad_norm": 0.6081023084140853, "learning_rate": 0.0001782156681179682, "loss": 1.6193, "step": 1160 }, { "epoch": 0.23800738007380073, "grad_norm": 0.6142293079588311, "learning_rate": 0.00017817427549688493, "loss": 1.6415, "step": 1161 }, { "epoch": 0.23821238212382123, "grad_norm": 0.5689166948788361, "learning_rate": 0.0001781328484044988, "loss": 1.6667, "step": 1162 }, { "epoch": 0.23841738417384173, "grad_norm": 0.5966731578529272, "learning_rate": 0.00017809138685907726, "loss": 1.6608, "step": 1163 }, { "epoch": 0.23862238622386223, "grad_norm": 0.5574581986963836, "learning_rate": 0.00017804989087890299, "loss": 1.6127, "step": 1164 }, { "epoch": 0.23882738827388275, "grad_norm": 0.5552595351059207, "learning_rate": 0.00017800836048227376, "loss": 1.6528, "step": 1165 }, { "epoch": 0.23903239032390325, "grad_norm": 0.5962411346751958, "learning_rate": 0.00017796679568750267, "loss": 1.6704, "step": 1166 }, { "epoch": 0.23923739237392375, "grad_norm": 0.7255158685221458, "learning_rate": 0.00017792519651291784, "loss": 1.7162, "step": 1167 }, { "epoch": 0.23944239442394424, "grad_norm": 0.6535718068137458, "learning_rate": 0.00017788356297686266, "loss": 1.7305, "step": 1168 }, { "epoch": 0.23964739647396474, "grad_norm": 0.6141326957283628, "learning_rate": 0.00017784189509769562, "loss": 1.7464, "step": 1169 }, { "epoch": 0.23985239852398524, "grad_norm": 0.6225349922606345, "learning_rate": 0.00017780019289379034, "loss": 1.6247, "step": 1170 }, { "epoch": 0.24005740057400574, "grad_norm": 0.6527944754509596, "learning_rate": 0.00017775845638353562, "loss": 1.7579, "step": 1171 }, { "epoch": 0.24026240262402623, "grad_norm": 0.545256922886162, "learning_rate": 0.00017771668558533535, "loss": 1.6288, "step": 1172 }, { "epoch": 0.24046740467404673, "grad_norm": 0.6322860390515759, "learning_rate": 0.00017767488051760857, "loss": 1.6815, "step": 1173 }, { "epoch": 0.24067240672406723, "grad_norm": 0.558527531232273, "learning_rate": 0.00017763304119878937, "loss": 1.679, "step": 1174 }, { "epoch": 0.24087740877408775, "grad_norm": 0.6015816114871074, "learning_rate": 0.00017759116764732707, "loss": 1.6893, "step": 1175 }, { "epoch": 0.24108241082410825, "grad_norm": 0.6444916956192004, "learning_rate": 0.0001775492598816859, "loss": 1.63, "step": 1176 }, { "epoch": 0.24128741287412875, "grad_norm": 0.6275065932751949, "learning_rate": 0.00017750731792034538, "loss": 1.6183, "step": 1177 }, { "epoch": 0.24149241492414925, "grad_norm": 0.6675104860296741, "learning_rate": 0.00017746534178179996, "loss": 1.7199, "step": 1178 }, { "epoch": 0.24169741697416974, "grad_norm": 0.6991590756154631, "learning_rate": 0.00017742333148455921, "loss": 1.7149, "step": 1179 }, { "epoch": 0.24190241902419024, "grad_norm": 0.5910431795162895, "learning_rate": 0.00017738128704714777, "loss": 1.72, "step": 1180 }, { "epoch": 0.24210742107421074, "grad_norm": 0.5400219386867433, "learning_rate": 0.00017733920848810527, "loss": 1.6114, "step": 1181 }, { "epoch": 0.24231242312423124, "grad_norm": 0.6883171673615529, "learning_rate": 0.00017729709582598656, "loss": 1.63, "step": 1182 }, { "epoch": 0.24251742517425173, "grad_norm": 0.6973382336237606, "learning_rate": 0.0001772549490793613, "loss": 1.7232, "step": 1183 }, { "epoch": 0.24272242722427223, "grad_norm": 0.5317517508919957, "learning_rate": 0.00017721276826681437, "loss": 1.5532, "step": 1184 }, { "epoch": 0.24292742927429276, "grad_norm": 0.6139132606059314, "learning_rate": 0.00017717055340694553, "loss": 1.6358, "step": 1185 }, { "epoch": 0.24313243132431325, "grad_norm": 0.6646427306471898, "learning_rate": 0.00017712830451836964, "loss": 1.663, "step": 1186 }, { "epoch": 0.24333743337433375, "grad_norm": 0.6247243553899692, "learning_rate": 0.00017708602161971653, "loss": 1.6915, "step": 1187 }, { "epoch": 0.24354243542435425, "grad_norm": 0.6688362626005366, "learning_rate": 0.0001770437047296311, "loss": 1.6821, "step": 1188 }, { "epoch": 0.24374743747437475, "grad_norm": 0.6187248183297874, "learning_rate": 0.00017700135386677305, "loss": 1.6545, "step": 1189 }, { "epoch": 0.24395243952439524, "grad_norm": 0.6568357054279115, "learning_rate": 0.00017695896904981731, "loss": 1.673, "step": 1190 }, { "epoch": 0.24415744157441574, "grad_norm": 0.5936537232963011, "learning_rate": 0.0001769165502974536, "loss": 1.6379, "step": 1191 }, { "epoch": 0.24436244362443624, "grad_norm": 0.596265149943637, "learning_rate": 0.00017687409762838664, "loss": 1.7245, "step": 1192 }, { "epoch": 0.24456744567445673, "grad_norm": 0.668349935288758, "learning_rate": 0.00017683161106133618, "loss": 1.6877, "step": 1193 }, { "epoch": 0.24477244772447723, "grad_norm": 0.6397956848249847, "learning_rate": 0.00017678909061503683, "loss": 1.7403, "step": 1194 }, { "epoch": 0.24497744977449776, "grad_norm": 0.615062511481778, "learning_rate": 0.00017674653630823822, "loss": 1.641, "step": 1195 }, { "epoch": 0.24518245182451825, "grad_norm": 0.729722158941603, "learning_rate": 0.00017670394815970478, "loss": 1.69, "step": 1196 }, { "epoch": 0.24538745387453875, "grad_norm": 0.5890849203624392, "learning_rate": 0.00017666132618821603, "loss": 1.7275, "step": 1197 }, { "epoch": 0.24559245592455925, "grad_norm": 0.5945395303668463, "learning_rate": 0.00017661867041256628, "loss": 1.6575, "step": 1198 }, { "epoch": 0.24579745797457975, "grad_norm": 0.656394857559786, "learning_rate": 0.00017657598085156481, "loss": 1.6021, "step": 1199 }, { "epoch": 0.24600246002460024, "grad_norm": 0.5612491650367858, "learning_rate": 0.00017653325752403575, "loss": 1.6334, "step": 1200 }, { "epoch": 0.24620746207462074, "grad_norm": 0.5995355142372665, "learning_rate": 0.0001764905004488182, "loss": 1.6672, "step": 1201 }, { "epoch": 0.24641246412464124, "grad_norm": 0.9692712141583573, "learning_rate": 0.000176447709644766, "loss": 1.6688, "step": 1202 }, { "epoch": 0.24661746617466174, "grad_norm": 0.6451967023076832, "learning_rate": 0.00017640488513074803, "loss": 1.6387, "step": 1203 }, { "epoch": 0.24682246822468223, "grad_norm": 0.6329026205423912, "learning_rate": 0.00017636202692564794, "loss": 1.5987, "step": 1204 }, { "epoch": 0.24702747027470276, "grad_norm": 0.5864552343557909, "learning_rate": 0.0001763191350483642, "loss": 1.6265, "step": 1205 }, { "epoch": 0.24723247232472326, "grad_norm": 0.6180913141017705, "learning_rate": 0.00017627620951781022, "loss": 1.6636, "step": 1206 }, { "epoch": 0.24743747437474375, "grad_norm": 0.5974270129407625, "learning_rate": 0.0001762332503529142, "loss": 1.6617, "step": 1207 }, { "epoch": 0.24764247642476425, "grad_norm": 0.5795015068206213, "learning_rate": 0.00017619025757261922, "loss": 1.6589, "step": 1208 }, { "epoch": 0.24784747847478475, "grad_norm": 0.6240424455080713, "learning_rate": 0.00017614723119588306, "loss": 1.6332, "step": 1209 }, { "epoch": 0.24805248052480525, "grad_norm": 0.5506171723169756, "learning_rate": 0.00017610417124167845, "loss": 1.641, "step": 1210 }, { "epoch": 0.24825748257482574, "grad_norm": 0.5821762097512913, "learning_rate": 0.00017606107772899287, "loss": 1.6975, "step": 1211 }, { "epoch": 0.24846248462484624, "grad_norm": 0.6268004380209853, "learning_rate": 0.0001760179506768286, "loss": 1.7219, "step": 1212 }, { "epoch": 0.24866748667486674, "grad_norm": 0.6683423084019315, "learning_rate": 0.00017597479010420268, "loss": 1.6747, "step": 1213 }, { "epoch": 0.24887248872488724, "grad_norm": 0.5853556399217581, "learning_rate": 0.00017593159603014705, "loss": 1.6004, "step": 1214 }, { "epoch": 0.24907749077490776, "grad_norm": 0.5836242010044278, "learning_rate": 0.00017588836847370816, "loss": 1.6399, "step": 1215 }, { "epoch": 0.24928249282492826, "grad_norm": 0.6125128198338414, "learning_rate": 0.0001758451074539476, "loss": 1.6455, "step": 1216 }, { "epoch": 0.24948749487494876, "grad_norm": 0.5888477564814605, "learning_rate": 0.00017580181298994138, "loss": 1.6125, "step": 1217 }, { "epoch": 0.24969249692496925, "grad_norm": 0.6456215977499503, "learning_rate": 0.00017575848510078046, "loss": 1.6527, "step": 1218 }, { "epoch": 0.24989749897498975, "grad_norm": 0.6259218740613081, "learning_rate": 0.0001757151238055704, "loss": 1.7374, "step": 1219 }, { "epoch": 0.25010250102501025, "grad_norm": 0.6256511247046743, "learning_rate": 0.00017567172912343163, "loss": 1.7331, "step": 1220 }, { "epoch": 0.25030750307503075, "grad_norm": 0.6469612239408504, "learning_rate": 0.00017562830107349921, "loss": 1.7232, "step": 1221 }, { "epoch": 0.25051250512505124, "grad_norm": 0.5977854832938785, "learning_rate": 0.00017558483967492294, "loss": 1.6107, "step": 1222 }, { "epoch": 0.25071750717507174, "grad_norm": 0.6316968216081972, "learning_rate": 0.0001755413449468673, "loss": 1.7046, "step": 1223 }, { "epoch": 0.25092250922509224, "grad_norm": 0.5939743168294715, "learning_rate": 0.00017549781690851148, "loss": 1.6284, "step": 1224 }, { "epoch": 0.25112751127511274, "grad_norm": 0.5745089711040662, "learning_rate": 0.0001754542555790494, "loss": 1.6245, "step": 1225 }, { "epoch": 0.25133251332513323, "grad_norm": 0.61712776627725, "learning_rate": 0.00017541066097768963, "loss": 1.6657, "step": 1226 }, { "epoch": 0.25153751537515373, "grad_norm": 0.5732872885962008, "learning_rate": 0.00017536703312365538, "loss": 1.695, "step": 1227 }, { "epoch": 0.25174251742517423, "grad_norm": 0.5875974180912894, "learning_rate": 0.0001753233720361846, "loss": 1.6971, "step": 1228 }, { "epoch": 0.2519475194751947, "grad_norm": 0.6930756963760735, "learning_rate": 0.00017527967773452977, "loss": 1.6736, "step": 1229 }, { "epoch": 0.2521525215252153, "grad_norm": 0.5669508245232758, "learning_rate": 0.00017523595023795813, "loss": 1.6108, "step": 1230 }, { "epoch": 0.2523575235752358, "grad_norm": 0.65153166234203, "learning_rate": 0.00017519218956575154, "loss": 1.701, "step": 1231 }, { "epoch": 0.2525625256252563, "grad_norm": 0.6157448600343771, "learning_rate": 0.00017514839573720643, "loss": 1.6984, "step": 1232 }, { "epoch": 0.25276752767527677, "grad_norm": 0.5775774658675717, "learning_rate": 0.0001751045687716339, "loss": 1.6983, "step": 1233 }, { "epoch": 0.25297252972529727, "grad_norm": 0.6765353133342762, "learning_rate": 0.0001750607086883597, "loss": 1.6635, "step": 1234 }, { "epoch": 0.25317753177531777, "grad_norm": 0.5587045697539772, "learning_rate": 0.00017501681550672406, "loss": 1.7145, "step": 1235 }, { "epoch": 0.25338253382533826, "grad_norm": 0.6258177026130144, "learning_rate": 0.0001749728892460819, "loss": 1.6848, "step": 1236 }, { "epoch": 0.25358753587535876, "grad_norm": 0.6721455377933682, "learning_rate": 0.00017492892992580273, "loss": 1.7098, "step": 1237 }, { "epoch": 0.25379253792537926, "grad_norm": 0.596785572111564, "learning_rate": 0.00017488493756527058, "loss": 1.5643, "step": 1238 }, { "epoch": 0.25399753997539976, "grad_norm": 0.5282985964042504, "learning_rate": 0.00017484091218388412, "loss": 1.6862, "step": 1239 }, { "epoch": 0.25420254202542025, "grad_norm": 0.6465647003155989, "learning_rate": 0.00017479685380105648, "loss": 1.7735, "step": 1240 }, { "epoch": 0.25440754407544075, "grad_norm": 0.7092872012738344, "learning_rate": 0.00017475276243621548, "loss": 1.7216, "step": 1241 }, { "epoch": 0.25461254612546125, "grad_norm": 0.6179172722131054, "learning_rate": 0.00017470863810880335, "loss": 1.6967, "step": 1242 }, { "epoch": 0.25481754817548175, "grad_norm": 0.5870969934010827, "learning_rate": 0.00017466448083827696, "loss": 1.6133, "step": 1243 }, { "epoch": 0.25502255022550224, "grad_norm": 0.626709306491909, "learning_rate": 0.00017462029064410764, "loss": 1.6671, "step": 1244 }, { "epoch": 0.25522755227552274, "grad_norm": 0.6497905454914743, "learning_rate": 0.00017457606754578121, "loss": 1.6596, "step": 1245 }, { "epoch": 0.25543255432554324, "grad_norm": 0.5855519005678879, "learning_rate": 0.00017453181156279812, "loss": 1.5915, "step": 1246 }, { "epoch": 0.25563755637556373, "grad_norm": 0.6294201366641414, "learning_rate": 0.00017448752271467322, "loss": 1.6856, "step": 1247 }, { "epoch": 0.25584255842558423, "grad_norm": 0.6313807726517735, "learning_rate": 0.00017444320102093586, "loss": 1.6572, "step": 1248 }, { "epoch": 0.25604756047560473, "grad_norm": 0.589907047746136, "learning_rate": 0.00017439884650112989, "loss": 1.7081, "step": 1249 }, { "epoch": 0.2562525625256253, "grad_norm": 0.5695463687038377, "learning_rate": 0.00017435445917481367, "loss": 1.6886, "step": 1250 }, { "epoch": 0.2564575645756458, "grad_norm": 0.6706794470638209, "learning_rate": 0.00017431003906156, "loss": 1.6505, "step": 1251 }, { "epoch": 0.2566625666256663, "grad_norm": 0.6487571707875301, "learning_rate": 0.0001742655861809561, "loss": 1.7099, "step": 1252 }, { "epoch": 0.2568675686756868, "grad_norm": 0.5843031658919652, "learning_rate": 0.0001742211005526037, "loss": 1.6746, "step": 1253 }, { "epoch": 0.2570725707257073, "grad_norm": 0.6033662248436049, "learning_rate": 0.0001741765821961189, "loss": 1.6349, "step": 1254 }, { "epoch": 0.25727757277572777, "grad_norm": 0.6140630362597016, "learning_rate": 0.00017413203113113228, "loss": 1.7308, "step": 1255 }, { "epoch": 0.25748257482574827, "grad_norm": 0.5967057128088326, "learning_rate": 0.00017408744737728885, "loss": 1.6947, "step": 1256 }, { "epoch": 0.25768757687576876, "grad_norm": 0.5534018256707625, "learning_rate": 0.00017404283095424802, "loss": 1.5626, "step": 1257 }, { "epoch": 0.25789257892578926, "grad_norm": 0.5719676468948501, "learning_rate": 0.00017399818188168365, "loss": 1.6869, "step": 1258 }, { "epoch": 0.25809758097580976, "grad_norm": 0.5886964261328439, "learning_rate": 0.00017395350017928383, "loss": 1.6181, "step": 1259 }, { "epoch": 0.25830258302583026, "grad_norm": 0.6594976215291675, "learning_rate": 0.00017390878586675127, "loss": 1.7694, "step": 1260 }, { "epoch": 0.25850758507585075, "grad_norm": 0.6469873592257641, "learning_rate": 0.00017386403896380294, "loss": 1.6883, "step": 1261 }, { "epoch": 0.25871258712587125, "grad_norm": 0.6166959802811376, "learning_rate": 0.00017381925949017015, "loss": 1.6766, "step": 1262 }, { "epoch": 0.25891758917589175, "grad_norm": 0.6189575888663595, "learning_rate": 0.00017377444746559864, "loss": 1.7222, "step": 1263 }, { "epoch": 0.25912259122591225, "grad_norm": 0.6133730278154782, "learning_rate": 0.00017372960290984842, "loss": 1.7051, "step": 1264 }, { "epoch": 0.25932759327593274, "grad_norm": 0.5794436823907965, "learning_rate": 0.00017368472584269399, "loss": 1.6266, "step": 1265 }, { "epoch": 0.25953259532595324, "grad_norm": 0.5921412182656112, "learning_rate": 0.00017363981628392404, "loss": 1.6364, "step": 1266 }, { "epoch": 0.25973759737597374, "grad_norm": 0.6056471099876192, "learning_rate": 0.00017359487425334166, "loss": 1.618, "step": 1267 }, { "epoch": 0.25994259942599424, "grad_norm": 0.7019773106193898, "learning_rate": 0.00017354989977076422, "loss": 1.7432, "step": 1268 }, { "epoch": 0.26014760147601473, "grad_norm": 0.6064269477414022, "learning_rate": 0.00017350489285602346, "loss": 1.6912, "step": 1269 }, { "epoch": 0.2603526035260353, "grad_norm": 0.5943643011595683, "learning_rate": 0.00017345985352896535, "loss": 1.6425, "step": 1270 }, { "epoch": 0.2605576055760558, "grad_norm": 0.6071028301014882, "learning_rate": 0.00017341478180945026, "loss": 1.6788, "step": 1271 }, { "epoch": 0.2607626076260763, "grad_norm": 0.6102675017067584, "learning_rate": 0.00017336967771735266, "loss": 1.7146, "step": 1272 }, { "epoch": 0.2609676096760968, "grad_norm": 0.5191364633878737, "learning_rate": 0.00017332454127256148, "loss": 1.6282, "step": 1273 }, { "epoch": 0.2611726117261173, "grad_norm": 0.605045078944176, "learning_rate": 0.0001732793724949798, "loss": 1.7719, "step": 1274 }, { "epoch": 0.2613776137761378, "grad_norm": 0.6608931979706598, "learning_rate": 0.00017323417140452504, "loss": 1.6727, "step": 1275 }, { "epoch": 0.26158261582615827, "grad_norm": 0.6501551072589721, "learning_rate": 0.0001731889380211288, "loss": 1.6668, "step": 1276 }, { "epoch": 0.26178761787617877, "grad_norm": 0.612729731177046, "learning_rate": 0.00017314367236473697, "loss": 1.6437, "step": 1277 }, { "epoch": 0.26199261992619927, "grad_norm": 0.6255821252613866, "learning_rate": 0.0001730983744553096, "loss": 1.6706, "step": 1278 }, { "epoch": 0.26219762197621976, "grad_norm": 0.6686075379709553, "learning_rate": 0.00017305304431282104, "loss": 1.6272, "step": 1279 }, { "epoch": 0.26240262402624026, "grad_norm": 0.5678836681217919, "learning_rate": 0.00017300768195725982, "loss": 1.666, "step": 1280 }, { "epoch": 0.26260762607626076, "grad_norm": 0.5876097229058804, "learning_rate": 0.0001729622874086287, "loss": 1.7364, "step": 1281 }, { "epoch": 0.26281262812628126, "grad_norm": 0.6272330518141753, "learning_rate": 0.0001729168606869446, "loss": 1.6717, "step": 1282 }, { "epoch": 0.26301763017630175, "grad_norm": 0.5425694659985298, "learning_rate": 0.0001728714018122386, "loss": 1.6753, "step": 1283 }, { "epoch": 0.26322263222632225, "grad_norm": 0.5444006285868122, "learning_rate": 0.000172825910804556, "loss": 1.6493, "step": 1284 }, { "epoch": 0.26342763427634275, "grad_norm": 0.5581509043738077, "learning_rate": 0.00017278038768395634, "loss": 1.6092, "step": 1285 }, { "epoch": 0.26363263632636325, "grad_norm": 0.6006936411981415, "learning_rate": 0.00017273483247051322, "loss": 1.6916, "step": 1286 }, { "epoch": 0.26383763837638374, "grad_norm": 0.6017265100904956, "learning_rate": 0.00017268924518431438, "loss": 1.6164, "step": 1287 }, { "epoch": 0.26404264042640424, "grad_norm": 0.6061915792148266, "learning_rate": 0.00017264362584546177, "loss": 1.6671, "step": 1288 }, { "epoch": 0.26424764247642474, "grad_norm": 0.5556108252651032, "learning_rate": 0.00017259797447407142, "loss": 1.6239, "step": 1289 }, { "epoch": 0.2644526445264453, "grad_norm": 0.661357957597831, "learning_rate": 0.00017255229109027355, "loss": 1.6603, "step": 1290 }, { "epoch": 0.2646576465764658, "grad_norm": 0.6175750504161309, "learning_rate": 0.00017250657571421245, "loss": 1.6146, "step": 1291 }, { "epoch": 0.2648626486264863, "grad_norm": 0.600257010147089, "learning_rate": 0.00017246082836604648, "loss": 1.6829, "step": 1292 }, { "epoch": 0.2650676506765068, "grad_norm": 0.5775177622266917, "learning_rate": 0.0001724150490659482, "loss": 1.7015, "step": 1293 }, { "epoch": 0.2652726527265273, "grad_norm": 0.6838693345849056, "learning_rate": 0.00017236923783410413, "loss": 1.7381, "step": 1294 }, { "epoch": 0.2654776547765478, "grad_norm": 0.6305676657796984, "learning_rate": 0.000172323394690715, "loss": 1.5973, "step": 1295 }, { "epoch": 0.2656826568265683, "grad_norm": 0.5119521897728015, "learning_rate": 0.00017227751965599554, "loss": 1.6255, "step": 1296 }, { "epoch": 0.2658876588765888, "grad_norm": 0.6786487420519335, "learning_rate": 0.00017223161275017453, "loss": 1.6456, "step": 1297 }, { "epoch": 0.26609266092660927, "grad_norm": 0.6986941688961231, "learning_rate": 0.0001721856739934949, "loss": 1.7205, "step": 1298 }, { "epoch": 0.26629766297662977, "grad_norm": 0.6502912351351416, "learning_rate": 0.00017213970340621346, "loss": 1.6901, "step": 1299 }, { "epoch": 0.26650266502665027, "grad_norm": 0.6331665213576733, "learning_rate": 0.00017209370100860122, "loss": 1.6167, "step": 1300 }, { "epoch": 0.26670766707667076, "grad_norm": 0.6334026362816706, "learning_rate": 0.00017204766682094312, "loss": 1.7432, "step": 1301 }, { "epoch": 0.26691266912669126, "grad_norm": 0.6276459470257831, "learning_rate": 0.00017200160086353815, "loss": 1.6979, "step": 1302 }, { "epoch": 0.26711767117671176, "grad_norm": 0.5802282455752659, "learning_rate": 0.00017195550315669928, "loss": 1.695, "step": 1303 }, { "epoch": 0.26732267322673225, "grad_norm": 0.6581337896490818, "learning_rate": 0.00017190937372075352, "loss": 1.6429, "step": 1304 }, { "epoch": 0.26752767527675275, "grad_norm": 0.6623965176565422, "learning_rate": 0.00017186321257604186, "loss": 1.6979, "step": 1305 }, { "epoch": 0.26773267732677325, "grad_norm": 0.6159210747516142, "learning_rate": 0.0001718170197429193, "loss": 1.6429, "step": 1306 }, { "epoch": 0.26793767937679375, "grad_norm": 0.5850424705820864, "learning_rate": 0.0001717707952417547, "loss": 1.6739, "step": 1307 }, { "epoch": 0.26814268142681424, "grad_norm": 0.6487051129394048, "learning_rate": 0.00017172453909293105, "loss": 1.6742, "step": 1308 }, { "epoch": 0.26834768347683474, "grad_norm": 0.6208189819600345, "learning_rate": 0.00017167825131684513, "loss": 1.6212, "step": 1309 }, { "epoch": 0.2685526855268553, "grad_norm": 0.594602984726979, "learning_rate": 0.00017163193193390784, "loss": 1.7176, "step": 1310 }, { "epoch": 0.2687576875768758, "grad_norm": 0.6677213097723341, "learning_rate": 0.0001715855809645438, "loss": 1.6581, "step": 1311 }, { "epoch": 0.2689626896268963, "grad_norm": 0.6438882694035374, "learning_rate": 0.00017153919842919176, "loss": 1.6564, "step": 1312 }, { "epoch": 0.2691676916769168, "grad_norm": 0.5995075789368377, "learning_rate": 0.00017149278434830433, "loss": 1.7549, "step": 1313 }, { "epoch": 0.2693726937269373, "grad_norm": 0.5750266252366152, "learning_rate": 0.00017144633874234799, "loss": 1.6935, "step": 1314 }, { "epoch": 0.2695776957769578, "grad_norm": 0.6608867219851539, "learning_rate": 0.0001713998616318031, "loss": 1.6916, "step": 1315 }, { "epoch": 0.2697826978269783, "grad_norm": 0.6304124991165608, "learning_rate": 0.000171353353037164, "loss": 1.6271, "step": 1316 }, { "epoch": 0.2699876998769988, "grad_norm": 0.5345250031806231, "learning_rate": 0.00017130681297893884, "loss": 1.6428, "step": 1317 }, { "epoch": 0.2701927019270193, "grad_norm": 0.6341024047899217, "learning_rate": 0.0001712602414776497, "loss": 1.752, "step": 1318 }, { "epoch": 0.27039770397703977, "grad_norm": 0.7167032559485453, "learning_rate": 0.00017121363855383248, "loss": 1.666, "step": 1319 }, { "epoch": 0.27060270602706027, "grad_norm": 0.599522774387851, "learning_rate": 0.00017116700422803693, "loss": 1.6305, "step": 1320 }, { "epoch": 0.27080770807708077, "grad_norm": 0.5570009859079458, "learning_rate": 0.0001711203385208267, "loss": 1.5971, "step": 1321 }, { "epoch": 0.27101271012710126, "grad_norm": 0.5968462144206931, "learning_rate": 0.00017107364145277923, "loss": 1.6723, "step": 1322 }, { "epoch": 0.27121771217712176, "grad_norm": 0.6055178925964502, "learning_rate": 0.00017102691304448581, "loss": 1.6337, "step": 1323 }, { "epoch": 0.27142271422714226, "grad_norm": 0.5554698383764707, "learning_rate": 0.00017098015331655156, "loss": 1.6817, "step": 1324 }, { "epoch": 0.27162771627716276, "grad_norm": 0.5744501417185245, "learning_rate": 0.00017093336228959536, "loss": 1.6714, "step": 1325 }, { "epoch": 0.27183271832718325, "grad_norm": 0.5497029266074408, "learning_rate": 0.00017088653998424994, "loss": 1.6724, "step": 1326 }, { "epoch": 0.27203772037720375, "grad_norm": 0.626441371749172, "learning_rate": 0.00017083968642116182, "loss": 1.6562, "step": 1327 }, { "epoch": 0.27224272242722425, "grad_norm": 0.566767549313504, "learning_rate": 0.00017079280162099128, "loss": 1.7045, "step": 1328 }, { "epoch": 0.27244772447724475, "grad_norm": 0.6688022993264132, "learning_rate": 0.0001707458856044124, "loss": 1.6831, "step": 1329 }, { "epoch": 0.2726527265272653, "grad_norm": 0.6527138355403727, "learning_rate": 0.000170698938392113, "loss": 1.6803, "step": 1330 }, { "epoch": 0.2728577285772858, "grad_norm": 0.5890437952057781, "learning_rate": 0.00017065196000479467, "loss": 1.643, "step": 1331 }, { "epoch": 0.2730627306273063, "grad_norm": 0.5615301477669931, "learning_rate": 0.00017060495046317272, "loss": 1.6531, "step": 1332 }, { "epoch": 0.2732677326773268, "grad_norm": 0.6048771172107056, "learning_rate": 0.00017055790978797627, "loss": 1.7048, "step": 1333 }, { "epoch": 0.2734727347273473, "grad_norm": 0.611478334204796, "learning_rate": 0.00017051083799994813, "loss": 1.6763, "step": 1334 }, { "epoch": 0.2736777367773678, "grad_norm": 0.6393942037768471, "learning_rate": 0.00017046373511984476, "loss": 1.7222, "step": 1335 }, { "epoch": 0.2738827388273883, "grad_norm": 0.6955381000036535, "learning_rate": 0.0001704166011684364, "loss": 1.6932, "step": 1336 }, { "epoch": 0.2740877408774088, "grad_norm": 0.5767841297419991, "learning_rate": 0.00017036943616650699, "loss": 1.6721, "step": 1337 }, { "epoch": 0.2742927429274293, "grad_norm": 0.5969083859379442, "learning_rate": 0.00017032224013485415, "loss": 1.619, "step": 1338 }, { "epoch": 0.2744977449774498, "grad_norm": 0.5438130904033252, "learning_rate": 0.00017027501309428922, "loss": 1.5779, "step": 1339 }, { "epoch": 0.2747027470274703, "grad_norm": 0.6023741598939625, "learning_rate": 0.00017022775506563714, "loss": 1.5599, "step": 1340 }, { "epoch": 0.27490774907749077, "grad_norm": 0.5376175874356415, "learning_rate": 0.0001701804660697366, "loss": 1.6476, "step": 1341 }, { "epoch": 0.27511275112751127, "grad_norm": 0.5870472264603122, "learning_rate": 0.0001701331461274398, "loss": 1.6252, "step": 1342 }, { "epoch": 0.27531775317753177, "grad_norm": 0.6088578288023003, "learning_rate": 0.0001700857952596128, "loss": 1.6766, "step": 1343 }, { "epoch": 0.27552275522755226, "grad_norm": 0.5608785246945501, "learning_rate": 0.0001700384134871351, "loss": 1.6357, "step": 1344 }, { "epoch": 0.27572775727757276, "grad_norm": 0.5514302257527606, "learning_rate": 0.00016999100083089994, "loss": 1.6706, "step": 1345 }, { "epoch": 0.27593275932759326, "grad_norm": 0.6264056553330709, "learning_rate": 0.00016994355731181414, "loss": 1.6735, "step": 1346 }, { "epoch": 0.27613776137761376, "grad_norm": 0.6268090004738055, "learning_rate": 0.00016989608295079814, "loss": 1.666, "step": 1347 }, { "epoch": 0.27634276342763425, "grad_norm": 0.6191028198003796, "learning_rate": 0.00016984857776878597, "loss": 1.6315, "step": 1348 }, { "epoch": 0.27654776547765475, "grad_norm": 0.596446209853012, "learning_rate": 0.00016980104178672528, "loss": 1.6243, "step": 1349 }, { "epoch": 0.2767527675276753, "grad_norm": 0.6415935522089514, "learning_rate": 0.00016975347502557722, "loss": 1.6834, "step": 1350 }, { "epoch": 0.2769577695776958, "grad_norm": 0.6927149512261636, "learning_rate": 0.00016970587750631664, "loss": 1.6837, "step": 1351 }, { "epoch": 0.2771627716277163, "grad_norm": 0.6023580006661222, "learning_rate": 0.00016965824924993186, "loss": 1.7345, "step": 1352 }, { "epoch": 0.2773677736777368, "grad_norm": 0.5884586926153903, "learning_rate": 0.00016961059027742473, "loss": 1.6748, "step": 1353 }, { "epoch": 0.2775727757277573, "grad_norm": 0.5902395958526696, "learning_rate": 0.00016956290060981077, "loss": 1.589, "step": 1354 }, { "epoch": 0.2777777777777778, "grad_norm": 0.6005857241077599, "learning_rate": 0.0001695151802681189, "loss": 1.6168, "step": 1355 }, { "epoch": 0.2779827798277983, "grad_norm": 0.6140206599296282, "learning_rate": 0.00016946742927339162, "loss": 1.655, "step": 1356 }, { "epoch": 0.2781877818778188, "grad_norm": 0.543111149759142, "learning_rate": 0.00016941964764668498, "loss": 1.6522, "step": 1357 }, { "epoch": 0.2783927839278393, "grad_norm": 0.5320484042760413, "learning_rate": 0.0001693718354090685, "loss": 1.597, "step": 1358 }, { "epoch": 0.2785977859778598, "grad_norm": 0.5717413309730874, "learning_rate": 0.0001693239925816252, "loss": 1.6199, "step": 1359 }, { "epoch": 0.2788027880278803, "grad_norm": 0.6147641595117893, "learning_rate": 0.00016927611918545157, "loss": 1.6841, "step": 1360 }, { "epoch": 0.2790077900779008, "grad_norm": 0.5576693273730527, "learning_rate": 0.0001692282152416576, "loss": 1.7787, "step": 1361 }, { "epoch": 0.27921279212792127, "grad_norm": 0.5728239367377934, "learning_rate": 0.0001691802807713668, "loss": 1.6289, "step": 1362 }, { "epoch": 0.27941779417794177, "grad_norm": 0.6177858903277267, "learning_rate": 0.00016913231579571608, "loss": 1.7331, "step": 1363 }, { "epoch": 0.27962279622796227, "grad_norm": 0.547970777812909, "learning_rate": 0.00016908432033585578, "loss": 1.6687, "step": 1364 }, { "epoch": 0.27982779827798276, "grad_norm": 0.5619612331214883, "learning_rate": 0.00016903629441294976, "loss": 1.7112, "step": 1365 }, { "epoch": 0.28003280032800326, "grad_norm": 0.6197396709335629, "learning_rate": 0.00016898823804817524, "loss": 1.7214, "step": 1366 }, { "epoch": 0.28023780237802376, "grad_norm": 0.6096890992042201, "learning_rate": 0.00016894015126272288, "loss": 1.756, "step": 1367 }, { "epoch": 0.28044280442804426, "grad_norm": 0.5746803751581485, "learning_rate": 0.00016889203407779679, "loss": 1.6358, "step": 1368 }, { "epoch": 0.28064780647806475, "grad_norm": 0.559456353139058, "learning_rate": 0.00016884388651461447, "loss": 1.6158, "step": 1369 }, { "epoch": 0.2808528085280853, "grad_norm": 0.6279177200550988, "learning_rate": 0.0001687957085944068, "loss": 1.7557, "step": 1370 }, { "epoch": 0.2810578105781058, "grad_norm": 0.5954126669108744, "learning_rate": 0.000168747500338418, "loss": 1.6827, "step": 1371 }, { "epoch": 0.2812628126281263, "grad_norm": 0.7091191735393801, "learning_rate": 0.00016869926176790582, "loss": 1.6465, "step": 1372 }, { "epoch": 0.2814678146781468, "grad_norm": 0.6094884965036207, "learning_rate": 0.00016865099290414124, "loss": 1.6946, "step": 1373 }, { "epoch": 0.2816728167281673, "grad_norm": 0.6288416307609498, "learning_rate": 0.00016860269376840856, "loss": 1.6302, "step": 1374 }, { "epoch": 0.2818778187781878, "grad_norm": 0.6149902924427048, "learning_rate": 0.00016855436438200562, "loss": 1.6473, "step": 1375 }, { "epoch": 0.2820828208282083, "grad_norm": 0.6553959592743003, "learning_rate": 0.0001685060047662434, "loss": 1.6361, "step": 1376 }, { "epoch": 0.2822878228782288, "grad_norm": 0.5531393188586536, "learning_rate": 0.00016845761494244633, "loss": 1.5902, "step": 1377 }, { "epoch": 0.2824928249282493, "grad_norm": 0.6114938874288971, "learning_rate": 0.00016840919493195212, "loss": 1.6819, "step": 1378 }, { "epoch": 0.2826978269782698, "grad_norm": 0.644743919034271, "learning_rate": 0.0001683607447561118, "loss": 1.7007, "step": 1379 }, { "epoch": 0.2829028290282903, "grad_norm": 0.5728313072151706, "learning_rate": 0.00016831226443628968, "loss": 1.5309, "step": 1380 }, { "epoch": 0.2831078310783108, "grad_norm": 0.5683682080497117, "learning_rate": 0.00016826375399386337, "loss": 1.67, "step": 1381 }, { "epoch": 0.2833128331283313, "grad_norm": 0.6075185871427463, "learning_rate": 0.00016821521345022377, "loss": 1.6961, "step": 1382 }, { "epoch": 0.2835178351783518, "grad_norm": 0.5778006045886844, "learning_rate": 0.0001681666428267751, "loss": 1.6771, "step": 1383 }, { "epoch": 0.28372283722837227, "grad_norm": 0.5696748002171197, "learning_rate": 0.00016811804214493476, "loss": 1.6813, "step": 1384 }, { "epoch": 0.28392783927839277, "grad_norm": 0.6336855026210187, "learning_rate": 0.00016806941142613342, "loss": 1.6587, "step": 1385 }, { "epoch": 0.28413284132841327, "grad_norm": 0.5908344627382948, "learning_rate": 0.0001680207506918151, "loss": 1.6944, "step": 1386 }, { "epoch": 0.28433784337843376, "grad_norm": 0.6040203227523477, "learning_rate": 0.00016797205996343687, "loss": 1.6738, "step": 1387 }, { "epoch": 0.28454284542845426, "grad_norm": 0.616407029349558, "learning_rate": 0.00016792333926246916, "loss": 1.7029, "step": 1388 }, { "epoch": 0.28474784747847476, "grad_norm": 0.6329408167169194, "learning_rate": 0.00016787458861039563, "loss": 1.6862, "step": 1389 }, { "epoch": 0.2849528495284953, "grad_norm": 0.7838731139616325, "learning_rate": 0.00016782580802871306, "loss": 1.6748, "step": 1390 }, { "epoch": 0.2851578515785158, "grad_norm": 0.5882036060554755, "learning_rate": 0.0001677769975389315, "loss": 1.6451, "step": 1391 }, { "epoch": 0.2853628536285363, "grad_norm": 0.5693591084489307, "learning_rate": 0.00016772815716257412, "loss": 1.7635, "step": 1392 }, { "epoch": 0.2855678556785568, "grad_norm": 0.5711515693203755, "learning_rate": 0.0001676792869211773, "loss": 1.6348, "step": 1393 }, { "epoch": 0.2857728577285773, "grad_norm": 0.5610445863235145, "learning_rate": 0.0001676303868362907, "loss": 1.6005, "step": 1394 }, { "epoch": 0.2859778597785978, "grad_norm": 0.5839332863795611, "learning_rate": 0.0001675814569294769, "loss": 1.6338, "step": 1395 }, { "epoch": 0.2861828618286183, "grad_norm": 0.5828673270389891, "learning_rate": 0.00016753249722231185, "loss": 1.7122, "step": 1396 }, { "epoch": 0.2863878638786388, "grad_norm": 0.6216480342251545, "learning_rate": 0.0001674835077363845, "loss": 1.6316, "step": 1397 }, { "epoch": 0.2865928659286593, "grad_norm": 0.5722777389771095, "learning_rate": 0.00016743448849329702, "loss": 1.6823, "step": 1398 }, { "epoch": 0.2867978679786798, "grad_norm": 0.6225080484074912, "learning_rate": 0.00016738543951466466, "loss": 1.6717, "step": 1399 }, { "epoch": 0.2870028700287003, "grad_norm": 0.5675434638445968, "learning_rate": 0.0001673363608221158, "loss": 1.5686, "step": 1400 }, { "epoch": 0.2872078720787208, "grad_norm": 0.6487391589510901, "learning_rate": 0.0001672872524372919, "loss": 1.7171, "step": 1401 }, { "epoch": 0.2874128741287413, "grad_norm": 0.5931338987262978, "learning_rate": 0.00016723811438184755, "loss": 1.7028, "step": 1402 }, { "epoch": 0.2876178761787618, "grad_norm": 0.5939444403769348, "learning_rate": 0.00016718894667745038, "loss": 1.5771, "step": 1403 }, { "epoch": 0.2878228782287823, "grad_norm": 0.6119809914439992, "learning_rate": 0.0001671397493457811, "loss": 1.7637, "step": 1404 }, { "epoch": 0.2880278802788028, "grad_norm": 0.5781933512143425, "learning_rate": 0.0001670905224085335, "loss": 1.6768, "step": 1405 }, { "epoch": 0.28823288232882327, "grad_norm": 0.5499145869768924, "learning_rate": 0.00016704126588741448, "loss": 1.7184, "step": 1406 }, { "epoch": 0.28843788437884377, "grad_norm": 0.608443581122789, "learning_rate": 0.00016699197980414384, "loss": 1.6659, "step": 1407 }, { "epoch": 0.28864288642886426, "grad_norm": 0.6494720628224756, "learning_rate": 0.00016694266418045456, "loss": 1.7825, "step": 1408 }, { "epoch": 0.28884788847888476, "grad_norm": 0.6269297302941028, "learning_rate": 0.00016689331903809256, "loss": 1.6731, "step": 1409 }, { "epoch": 0.2890528905289053, "grad_norm": 0.5830461601018152, "learning_rate": 0.00016684394439881687, "loss": 1.6496, "step": 1410 }, { "epoch": 0.2892578925789258, "grad_norm": 0.5944504329483641, "learning_rate": 0.0001667945402843994, "loss": 1.6719, "step": 1411 }, { "epoch": 0.2894628946289463, "grad_norm": 0.5614784103184588, "learning_rate": 0.0001667451067166251, "loss": 1.6328, "step": 1412 }, { "epoch": 0.2896678966789668, "grad_norm": 0.5938633061809798, "learning_rate": 0.00016669564371729197, "loss": 1.6245, "step": 1413 }, { "epoch": 0.2898728987289873, "grad_norm": 0.5712304151533564, "learning_rate": 0.00016664615130821092, "loss": 1.6089, "step": 1414 }, { "epoch": 0.2900779007790078, "grad_norm": 0.5981680777222429, "learning_rate": 0.0001665966295112059, "loss": 1.7199, "step": 1415 }, { "epoch": 0.2902829028290283, "grad_norm": 0.5856478174301754, "learning_rate": 0.00016654707834811369, "loss": 1.722, "step": 1416 }, { "epoch": 0.2904879048790488, "grad_norm": 0.6225004333262452, "learning_rate": 0.00016649749784078418, "loss": 1.6306, "step": 1417 }, { "epoch": 0.2906929069290693, "grad_norm": 0.5685468712927579, "learning_rate": 0.0001664478880110801, "loss": 1.7041, "step": 1418 }, { "epoch": 0.2908979089790898, "grad_norm": 0.6079538986000567, "learning_rate": 0.00016639824888087712, "loss": 1.7059, "step": 1419 }, { "epoch": 0.2911029110291103, "grad_norm": 0.6414222790850636, "learning_rate": 0.00016634858047206378, "loss": 1.6947, "step": 1420 }, { "epoch": 0.2913079130791308, "grad_norm": 0.6249494675095583, "learning_rate": 0.00016629888280654174, "loss": 1.6235, "step": 1421 }, { "epoch": 0.2915129151291513, "grad_norm": 0.6491004754604272, "learning_rate": 0.00016624915590622528, "loss": 1.7217, "step": 1422 }, { "epoch": 0.2917179171791718, "grad_norm": 0.606198026146697, "learning_rate": 0.00016619939979304173, "loss": 1.6218, "step": 1423 }, { "epoch": 0.2919229192291923, "grad_norm": 0.5877569466767635, "learning_rate": 0.00016614961448893132, "loss": 1.6471, "step": 1424 }, { "epoch": 0.2921279212792128, "grad_norm": 0.5656702517456443, "learning_rate": 0.00016609980001584706, "loss": 1.6128, "step": 1425 }, { "epoch": 0.2923329233292333, "grad_norm": 0.6154393901226662, "learning_rate": 0.0001660499563957549, "loss": 1.7274, "step": 1426 }, { "epoch": 0.29253792537925377, "grad_norm": 0.5835405308587939, "learning_rate": 0.0001660000836506336, "loss": 1.7261, "step": 1427 }, { "epoch": 0.29274292742927427, "grad_norm": 0.5706537439759921, "learning_rate": 0.00016595018180247476, "loss": 1.7385, "step": 1428 }, { "epoch": 0.29294792947929477, "grad_norm": 0.6479207054037849, "learning_rate": 0.00016590025087328283, "loss": 1.7044, "step": 1429 }, { "epoch": 0.2931529315293153, "grad_norm": 0.6301267350576135, "learning_rate": 0.00016585029088507513, "loss": 1.7019, "step": 1430 }, { "epoch": 0.2933579335793358, "grad_norm": 0.5958598241177567, "learning_rate": 0.00016580030185988167, "loss": 1.6502, "step": 1431 }, { "epoch": 0.2935629356293563, "grad_norm": 0.5585997523185458, "learning_rate": 0.0001657502838197454, "loss": 1.6493, "step": 1432 }, { "epoch": 0.2937679376793768, "grad_norm": 0.6396998664630253, "learning_rate": 0.00016570023678672195, "loss": 1.6624, "step": 1433 }, { "epoch": 0.2939729397293973, "grad_norm": 0.5452526554629904, "learning_rate": 0.00016565016078287984, "loss": 1.6854, "step": 1434 }, { "epoch": 0.2941779417794178, "grad_norm": 0.5566176716853116, "learning_rate": 0.00016560005583030028, "loss": 1.7564, "step": 1435 }, { "epoch": 0.2943829438294383, "grad_norm": 0.6130102024107688, "learning_rate": 0.00016554992195107725, "loss": 1.7088, "step": 1436 }, { "epoch": 0.2945879458794588, "grad_norm": 0.5785907747932231, "learning_rate": 0.00016549975916731757, "loss": 1.7137, "step": 1437 }, { "epoch": 0.2947929479294793, "grad_norm": 0.5004139534862897, "learning_rate": 0.00016544956750114072, "loss": 1.6113, "step": 1438 }, { "epoch": 0.2949979499794998, "grad_norm": 0.5775971071235202, "learning_rate": 0.00016539934697467894, "loss": 1.6734, "step": 1439 }, { "epoch": 0.2952029520295203, "grad_norm": 0.6062372497504022, "learning_rate": 0.0001653490976100772, "loss": 1.6311, "step": 1440 }, { "epoch": 0.2954079540795408, "grad_norm": 0.5859856426173122, "learning_rate": 0.0001652988194294932, "loss": 1.5166, "step": 1441 }, { "epoch": 0.2956129561295613, "grad_norm": 0.5967393507878219, "learning_rate": 0.00016524851245509735, "loss": 1.6911, "step": 1442 }, { "epoch": 0.2958179581795818, "grad_norm": 0.5767887239009115, "learning_rate": 0.0001651981767090727, "loss": 1.6983, "step": 1443 }, { "epoch": 0.2960229602296023, "grad_norm": 0.5523697251522889, "learning_rate": 0.000165147812213615, "loss": 1.6263, "step": 1444 }, { "epoch": 0.2962279622796228, "grad_norm": 0.6067968583365366, "learning_rate": 0.0001650974189909328, "loss": 1.7411, "step": 1445 }, { "epoch": 0.2964329643296433, "grad_norm": 0.6107939553536341, "learning_rate": 0.00016504699706324714, "loss": 1.6371, "step": 1446 }, { "epoch": 0.2966379663796638, "grad_norm": 0.607001639158129, "learning_rate": 0.00016499654645279183, "loss": 1.6997, "step": 1447 }, { "epoch": 0.2968429684296843, "grad_norm": 0.6500444406045572, "learning_rate": 0.00016494606718181332, "loss": 1.6451, "step": 1448 }, { "epoch": 0.29704797047970477, "grad_norm": 0.6063097225408537, "learning_rate": 0.0001648955592725706, "loss": 1.6879, "step": 1449 }, { "epoch": 0.2972529725297253, "grad_norm": 0.6001187813604127, "learning_rate": 0.00016484502274733545, "loss": 1.575, "step": 1450 }, { "epoch": 0.2974579745797458, "grad_norm": 0.5833045505796, "learning_rate": 0.0001647944576283921, "loss": 1.6644, "step": 1451 }, { "epoch": 0.2976629766297663, "grad_norm": 0.6376388274561628, "learning_rate": 0.0001647438639380375, "loss": 1.5922, "step": 1452 }, { "epoch": 0.2978679786797868, "grad_norm": 0.5577993845281046, "learning_rate": 0.00016469324169858122, "loss": 1.6857, "step": 1453 }, { "epoch": 0.2980729807298073, "grad_norm": 0.6069534255387015, "learning_rate": 0.00016464259093234532, "loss": 1.5942, "step": 1454 }, { "epoch": 0.2982779827798278, "grad_norm": 0.5878791282239109, "learning_rate": 0.00016459191166166446, "loss": 1.676, "step": 1455 }, { "epoch": 0.2984829848298483, "grad_norm": 0.5401765415796359, "learning_rate": 0.00016454120390888596, "loss": 1.5596, "step": 1456 }, { "epoch": 0.2986879868798688, "grad_norm": 0.5706706407852579, "learning_rate": 0.00016449046769636958, "loss": 1.634, "step": 1457 }, { "epoch": 0.2988929889298893, "grad_norm": 0.6488679600213937, "learning_rate": 0.0001644397030464877, "loss": 1.6726, "step": 1458 }, { "epoch": 0.2990979909799098, "grad_norm": 0.6207910747118691, "learning_rate": 0.00016438890998162525, "loss": 1.652, "step": 1459 }, { "epoch": 0.2993029930299303, "grad_norm": 0.48123702384748535, "learning_rate": 0.00016433808852417962, "loss": 1.6781, "step": 1460 }, { "epoch": 0.2995079950799508, "grad_norm": 0.6176812532996043, "learning_rate": 0.0001642872386965608, "loss": 1.6483, "step": 1461 }, { "epoch": 0.2997129971299713, "grad_norm": 0.5966631733091627, "learning_rate": 0.00016423636052119122, "loss": 1.6711, "step": 1462 }, { "epoch": 0.2999179991799918, "grad_norm": 0.54138892575733, "learning_rate": 0.00016418545402050586, "loss": 1.6333, "step": 1463 }, { "epoch": 0.3001230012300123, "grad_norm": 0.5082718777872532, "learning_rate": 0.0001641345192169522, "loss": 1.6375, "step": 1464 }, { "epoch": 0.3003280032800328, "grad_norm": 0.5653767507826191, "learning_rate": 0.00016408355613299014, "loss": 1.6154, "step": 1465 }, { "epoch": 0.3005330053300533, "grad_norm": 0.5577558551273679, "learning_rate": 0.00016403256479109209, "loss": 1.6291, "step": 1466 }, { "epoch": 0.3007380073800738, "grad_norm": 0.5751186685437938, "learning_rate": 0.0001639815452137429, "loss": 1.5786, "step": 1467 }, { "epoch": 0.3009430094300943, "grad_norm": 0.5339315480495862, "learning_rate": 0.00016393049742343988, "loss": 1.6837, "step": 1468 }, { "epoch": 0.3011480114801148, "grad_norm": 0.592096439017495, "learning_rate": 0.00016387942144269286, "loss": 1.6602, "step": 1469 }, { "epoch": 0.3013530135301353, "grad_norm": 0.6350021083036947, "learning_rate": 0.00016382831729402396, "loss": 1.7384, "step": 1470 }, { "epoch": 0.3015580155801558, "grad_norm": 0.5456170004296444, "learning_rate": 0.00016377718499996778, "loss": 1.6979, "step": 1471 }, { "epoch": 0.3017630176301763, "grad_norm": 0.5625163540139964, "learning_rate": 0.00016372602458307136, "loss": 1.65, "step": 1472 }, { "epoch": 0.3019680196801968, "grad_norm": 0.6275864662064967, "learning_rate": 0.00016367483606589413, "loss": 1.6612, "step": 1473 }, { "epoch": 0.3021730217302173, "grad_norm": 0.6654054205446531, "learning_rate": 0.00016362361947100788, "loss": 1.6183, "step": 1474 }, { "epoch": 0.3023780237802378, "grad_norm": 0.534824858068565, "learning_rate": 0.00016357237482099684, "loss": 1.6559, "step": 1475 }, { "epoch": 0.3025830258302583, "grad_norm": 0.6049666407438539, "learning_rate": 0.00016352110213845746, "loss": 1.6219, "step": 1476 }, { "epoch": 0.3027880278802788, "grad_norm": 0.6556382273694806, "learning_rate": 0.0001634698014459988, "loss": 1.6815, "step": 1477 }, { "epoch": 0.3029930299302993, "grad_norm": 0.6183860894338956, "learning_rate": 0.0001634184727662421, "loss": 1.6166, "step": 1478 }, { "epoch": 0.3031980319803198, "grad_norm": 0.6035312388612828, "learning_rate": 0.0001633671161218209, "loss": 1.666, "step": 1479 }, { "epoch": 0.3034030340303403, "grad_norm": 0.6224830450563352, "learning_rate": 0.0001633157315353812, "loss": 1.6779, "step": 1480 }, { "epoch": 0.3036080360803608, "grad_norm": 0.6158059189888997, "learning_rate": 0.0001632643190295813, "loss": 1.6379, "step": 1481 }, { "epoch": 0.3038130381303813, "grad_norm": 0.5943417474762814, "learning_rate": 0.00016321287862709175, "loss": 1.7575, "step": 1482 }, { "epoch": 0.3040180401804018, "grad_norm": 0.5940620362442668, "learning_rate": 0.0001631614103505954, "loss": 1.6821, "step": 1483 }, { "epoch": 0.3042230422304223, "grad_norm": 0.6384725486547611, "learning_rate": 0.00016310991422278744, "loss": 1.6872, "step": 1484 }, { "epoch": 0.3044280442804428, "grad_norm": 0.5853932926543159, "learning_rate": 0.00016305839026637534, "loss": 1.6891, "step": 1485 }, { "epoch": 0.3046330463304633, "grad_norm": 0.6556969925421489, "learning_rate": 0.0001630068385040788, "loss": 1.6724, "step": 1486 }, { "epoch": 0.3048380483804838, "grad_norm": 0.5740200304718273, "learning_rate": 0.0001629552589586298, "loss": 1.7759, "step": 1487 }, { "epoch": 0.3050430504305043, "grad_norm": 0.6272507977664349, "learning_rate": 0.00016290365165277262, "loss": 1.6311, "step": 1488 }, { "epoch": 0.3052480524805248, "grad_norm": 0.6192117676394726, "learning_rate": 0.0001628520166092637, "loss": 1.6603, "step": 1489 }, { "epoch": 0.30545305453054533, "grad_norm": 0.6666399759615597, "learning_rate": 0.00016280035385087175, "loss": 1.6987, "step": 1490 }, { "epoch": 0.30565805658056583, "grad_norm": 0.5537870439952756, "learning_rate": 0.0001627486634003777, "loss": 1.6197, "step": 1491 }, { "epoch": 0.3058630586305863, "grad_norm": 0.6271339691764065, "learning_rate": 0.0001626969452805747, "loss": 1.6324, "step": 1492 }, { "epoch": 0.3060680606806068, "grad_norm": 0.5900091056753516, "learning_rate": 0.00016264519951426806, "loss": 1.6604, "step": 1493 }, { "epoch": 0.3062730627306273, "grad_norm": 0.55510744309782, "learning_rate": 0.00016259342612427537, "loss": 1.7371, "step": 1494 }, { "epoch": 0.3064780647806478, "grad_norm": 0.5225734408907443, "learning_rate": 0.0001625416251334263, "loss": 1.6408, "step": 1495 }, { "epoch": 0.3066830668306683, "grad_norm": 0.633984094717057, "learning_rate": 0.00016248979656456275, "loss": 1.722, "step": 1496 }, { "epoch": 0.3068880688806888, "grad_norm": 0.5940183625252852, "learning_rate": 0.0001624379404405387, "loss": 1.669, "step": 1497 }, { "epoch": 0.3070930709307093, "grad_norm": 0.5117203968935347, "learning_rate": 0.00016238605678422046, "loss": 1.6204, "step": 1498 }, { "epoch": 0.3072980729807298, "grad_norm": 0.6375268570205453, "learning_rate": 0.00016233414561848627, "loss": 1.6866, "step": 1499 }, { "epoch": 0.3075030750307503, "grad_norm": 0.5880112788446207, "learning_rate": 0.0001622822069662266, "loss": 1.6545, "step": 1500 }, { "epoch": 0.3077080770807708, "grad_norm": 0.5735574502024673, "learning_rate": 0.00016223024085034414, "loss": 1.7094, "step": 1501 }, { "epoch": 0.3079130791307913, "grad_norm": 0.5758395382762499, "learning_rate": 0.00016217824729375345, "loss": 1.5721, "step": 1502 }, { "epoch": 0.3081180811808118, "grad_norm": 0.5523134159331391, "learning_rate": 0.00016212622631938138, "loss": 1.6624, "step": 1503 }, { "epoch": 0.3083230832308323, "grad_norm": 0.5646346101504085, "learning_rate": 0.00016207417795016684, "loss": 1.7276, "step": 1504 }, { "epoch": 0.3085280852808528, "grad_norm": 0.5749170166885552, "learning_rate": 0.00016202210220906074, "loss": 1.7067, "step": 1505 }, { "epoch": 0.3087330873308733, "grad_norm": 0.4782237386560857, "learning_rate": 0.00016196999911902618, "loss": 1.6002, "step": 1506 }, { "epoch": 0.3089380893808938, "grad_norm": 0.5679524765129618, "learning_rate": 0.00016191786870303822, "loss": 1.7205, "step": 1507 }, { "epoch": 0.3091430914309143, "grad_norm": 0.5053090688123828, "learning_rate": 0.00016186571098408402, "loss": 1.6143, "step": 1508 }, { "epoch": 0.3093480934809348, "grad_norm": 0.5542317603451554, "learning_rate": 0.00016181352598516275, "loss": 1.6587, "step": 1509 }, { "epoch": 0.30955309553095534, "grad_norm": 0.5658256166476654, "learning_rate": 0.00016176131372928562, "loss": 1.6991, "step": 1510 }, { "epoch": 0.30975809758097583, "grad_norm": 0.550839221938382, "learning_rate": 0.00016170907423947585, "loss": 1.5961, "step": 1511 }, { "epoch": 0.30996309963099633, "grad_norm": 0.5130365852102582, "learning_rate": 0.00016165680753876872, "loss": 1.6286, "step": 1512 }, { "epoch": 0.31016810168101683, "grad_norm": 0.5201506371904974, "learning_rate": 0.00016160451365021146, "loss": 1.6852, "step": 1513 }, { "epoch": 0.3103731037310373, "grad_norm": 0.4659832141335574, "learning_rate": 0.00016155219259686326, "loss": 1.6011, "step": 1514 }, { "epoch": 0.3105781057810578, "grad_norm": 0.6116647912874431, "learning_rate": 0.00016149984440179537, "loss": 1.6497, "step": 1515 }, { "epoch": 0.3107831078310783, "grad_norm": 0.5308969478843171, "learning_rate": 0.00016144746908809094, "loss": 1.7091, "step": 1516 }, { "epoch": 0.3109881098810988, "grad_norm": 0.5203820445129897, "learning_rate": 0.0001613950666788451, "loss": 1.643, "step": 1517 }, { "epoch": 0.3111931119311193, "grad_norm": 0.6183169755345523, "learning_rate": 0.000161342637197165, "loss": 1.6003, "step": 1518 }, { "epoch": 0.3113981139811398, "grad_norm": 0.6136747127270937, "learning_rate": 0.00016129018066616952, "loss": 1.8026, "step": 1519 }, { "epoch": 0.3116031160311603, "grad_norm": 0.5817723945364702, "learning_rate": 0.00016123769710898976, "loss": 1.6406, "step": 1520 }, { "epoch": 0.3118081180811808, "grad_norm": 0.5735548512583484, "learning_rate": 0.00016118518654876848, "loss": 1.6114, "step": 1521 }, { "epoch": 0.3120131201312013, "grad_norm": 0.6138611699549323, "learning_rate": 0.00016113264900866048, "loss": 1.6961, "step": 1522 }, { "epoch": 0.3122181221812218, "grad_norm": 0.5379988913748793, "learning_rate": 0.0001610800845118324, "loss": 1.6559, "step": 1523 }, { "epoch": 0.3124231242312423, "grad_norm": 0.6183425338983181, "learning_rate": 0.00016102749308146284, "loss": 1.7289, "step": 1524 }, { "epoch": 0.3126281262812628, "grad_norm": 0.57905816432174, "learning_rate": 0.00016097487474074228, "loss": 1.7162, "step": 1525 }, { "epoch": 0.3128331283312833, "grad_norm": 0.5750642849271494, "learning_rate": 0.0001609222295128729, "loss": 1.6312, "step": 1526 }, { "epoch": 0.3130381303813038, "grad_norm": 0.5534029028893083, "learning_rate": 0.0001608695574210689, "loss": 1.6657, "step": 1527 }, { "epoch": 0.3132431324313243, "grad_norm": 0.6227328178019984, "learning_rate": 0.00016081685848855627, "loss": 1.657, "step": 1528 }, { "epoch": 0.3134481344813448, "grad_norm": 0.646428670960226, "learning_rate": 0.00016076413273857288, "loss": 1.6409, "step": 1529 }, { "epoch": 0.31365313653136534, "grad_norm": 0.5703451530633927, "learning_rate": 0.0001607113801943684, "loss": 1.6329, "step": 1530 }, { "epoch": 0.31385813858138584, "grad_norm": 0.6117652726218614, "learning_rate": 0.00016065860087920424, "loss": 1.6421, "step": 1531 }, { "epoch": 0.31406314063140633, "grad_norm": 0.6227058046968832, "learning_rate": 0.00016060579481635368, "loss": 1.6384, "step": 1532 }, { "epoch": 0.31426814268142683, "grad_norm": 0.5327861257160884, "learning_rate": 0.0001605529620291019, "loss": 1.6584, "step": 1533 }, { "epoch": 0.31447314473144733, "grad_norm": 0.5321587551317813, "learning_rate": 0.00016050010254074564, "loss": 1.5922, "step": 1534 }, { "epoch": 0.3146781467814678, "grad_norm": 0.5705952632728047, "learning_rate": 0.00016044721637459354, "loss": 1.7038, "step": 1535 }, { "epoch": 0.3148831488314883, "grad_norm": 0.5914355141960834, "learning_rate": 0.0001603943035539661, "loss": 1.7166, "step": 1536 }, { "epoch": 0.3150881508815088, "grad_norm": 0.5264366172331062, "learning_rate": 0.00016034136410219538, "loss": 1.5967, "step": 1537 }, { "epoch": 0.3152931529315293, "grad_norm": 0.5339024874637761, "learning_rate": 0.00016028839804262528, "loss": 1.6049, "step": 1538 }, { "epoch": 0.3154981549815498, "grad_norm": 0.5836726609422397, "learning_rate": 0.00016023540539861144, "loss": 1.7084, "step": 1539 }, { "epoch": 0.3157031570315703, "grad_norm": 0.5805632490811234, "learning_rate": 0.0001601823861935212, "loss": 1.6971, "step": 1540 }, { "epoch": 0.3159081590815908, "grad_norm": 0.6403114053296829, "learning_rate": 0.00016012934045073367, "loss": 1.7083, "step": 1541 }, { "epoch": 0.3161131611316113, "grad_norm": 0.5258402890664735, "learning_rate": 0.00016007626819363954, "loss": 1.6404, "step": 1542 }, { "epoch": 0.3163181631816318, "grad_norm": 0.5970121779241416, "learning_rate": 0.0001600231694456413, "loss": 1.7279, "step": 1543 }, { "epoch": 0.3165231652316523, "grad_norm": 0.6343125666489673, "learning_rate": 0.00015997004423015304, "loss": 1.7945, "step": 1544 }, { "epoch": 0.3167281672816728, "grad_norm": 0.5153636063707583, "learning_rate": 0.00015991689257060065, "loss": 1.6076, "step": 1545 }, { "epoch": 0.3169331693316933, "grad_norm": 0.5160392265344995, "learning_rate": 0.0001598637144904215, "loss": 1.7185, "step": 1546 }, { "epoch": 0.3171381713817138, "grad_norm": 0.5967646540854691, "learning_rate": 0.00015981051001306482, "loss": 1.682, "step": 1547 }, { "epoch": 0.3173431734317343, "grad_norm": 0.5775798643998891, "learning_rate": 0.00015975727916199125, "loss": 1.7016, "step": 1548 }, { "epoch": 0.3175481754817548, "grad_norm": 0.5264697985756588, "learning_rate": 0.00015970402196067326, "loss": 1.6893, "step": 1549 }, { "epoch": 0.31775317753177534, "grad_norm": 0.6081792600824154, "learning_rate": 0.0001596507384325948, "loss": 1.5229, "step": 1550 }, { "epoch": 0.31795817958179584, "grad_norm": 0.6325072580282736, "learning_rate": 0.00015959742860125153, "loss": 1.7011, "step": 1551 }, { "epoch": 0.31816318163181634, "grad_norm": 0.6172374078055283, "learning_rate": 0.00015954409249015065, "loss": 1.6627, "step": 1552 }, { "epoch": 0.31836818368183684, "grad_norm": 0.5210368811839309, "learning_rate": 0.00015949073012281093, "loss": 1.602, "step": 1553 }, { "epoch": 0.31857318573185733, "grad_norm": 0.5824052977175983, "learning_rate": 0.00015943734152276277, "loss": 1.6087, "step": 1554 }, { "epoch": 0.31877818778187783, "grad_norm": 0.5799045635179078, "learning_rate": 0.00015938392671354813, "loss": 1.6054, "step": 1555 }, { "epoch": 0.31898318983189833, "grad_norm": 0.6101232325561027, "learning_rate": 0.00015933048571872051, "loss": 1.6746, "step": 1556 }, { "epoch": 0.3191881918819188, "grad_norm": 0.62031143873723, "learning_rate": 0.00015927701856184495, "loss": 1.6361, "step": 1557 }, { "epoch": 0.3193931939319393, "grad_norm": 0.5340591908608283, "learning_rate": 0.00015922352526649803, "loss": 1.6567, "step": 1558 }, { "epoch": 0.3195981959819598, "grad_norm": 0.5921128155516477, "learning_rate": 0.00015917000585626785, "loss": 1.611, "step": 1559 }, { "epoch": 0.3198031980319803, "grad_norm": 0.6005879551200329, "learning_rate": 0.0001591164603547541, "loss": 1.6542, "step": 1560 }, { "epoch": 0.3200082000820008, "grad_norm": 0.5478549313862134, "learning_rate": 0.00015906288878556784, "loss": 1.594, "step": 1561 }, { "epoch": 0.3202132021320213, "grad_norm": 0.6228574999405933, "learning_rate": 0.0001590092911723317, "loss": 1.7298, "step": 1562 }, { "epoch": 0.3204182041820418, "grad_norm": 0.5571955643386896, "learning_rate": 0.0001589556675386799, "loss": 1.6305, "step": 1563 }, { "epoch": 0.3206232062320623, "grad_norm": 0.6699193358734897, "learning_rate": 0.00015890201790825788, "loss": 1.7347, "step": 1564 }, { "epoch": 0.3208282082820828, "grad_norm": 0.6115176380293189, "learning_rate": 0.0001588483423047228, "loss": 1.6538, "step": 1565 }, { "epoch": 0.3210332103321033, "grad_norm": 0.5855253007786319, "learning_rate": 0.00015879464075174308, "loss": 1.6841, "step": 1566 }, { "epoch": 0.3212382123821238, "grad_norm": 0.709009552707181, "learning_rate": 0.00015874091327299872, "loss": 1.7253, "step": 1567 }, { "epoch": 0.3214432144321443, "grad_norm": 0.5932666319209314, "learning_rate": 0.00015868715989218109, "loss": 1.6043, "step": 1568 }, { "epoch": 0.3216482164821648, "grad_norm": 0.5963803147610166, "learning_rate": 0.00015863338063299294, "loss": 1.6697, "step": 1569 }, { "epoch": 0.32185321853218535, "grad_norm": 0.5729312161917175, "learning_rate": 0.00015857957551914853, "loss": 1.6554, "step": 1570 }, { "epoch": 0.32205822058220585, "grad_norm": 0.5969413652818404, "learning_rate": 0.00015852574457437345, "loss": 1.614, "step": 1571 }, { "epoch": 0.32226322263222634, "grad_norm": 0.5692049023093891, "learning_rate": 0.0001584718878224047, "loss": 1.6431, "step": 1572 }, { "epoch": 0.32246822468224684, "grad_norm": 0.622346988005485, "learning_rate": 0.00015841800528699072, "loss": 1.6848, "step": 1573 }, { "epoch": 0.32267322673226734, "grad_norm": 0.5616108191195761, "learning_rate": 0.00015836409699189114, "loss": 1.6516, "step": 1574 }, { "epoch": 0.32287822878228783, "grad_norm": 0.5671225321927746, "learning_rate": 0.00015831016296087715, "loss": 1.6504, "step": 1575 }, { "epoch": 0.32308323083230833, "grad_norm": 0.5191009492610145, "learning_rate": 0.0001582562032177312, "loss": 1.5916, "step": 1576 }, { "epoch": 0.32328823288232883, "grad_norm": 0.6255391702873446, "learning_rate": 0.0001582022177862471, "loss": 1.6749, "step": 1577 }, { "epoch": 0.3234932349323493, "grad_norm": 0.5342099977843426, "learning_rate": 0.00015814820669022986, "loss": 1.6707, "step": 1578 }, { "epoch": 0.3236982369823698, "grad_norm": 0.5699650352474503, "learning_rate": 0.00015809416995349608, "loss": 1.676, "step": 1579 }, { "epoch": 0.3239032390323903, "grad_norm": 0.5466164647150458, "learning_rate": 0.00015804010759987343, "loss": 1.6765, "step": 1580 }, { "epoch": 0.3241082410824108, "grad_norm": 0.5409749893919559, "learning_rate": 0.00015798601965320096, "loss": 1.6276, "step": 1581 }, { "epoch": 0.3243132431324313, "grad_norm": 0.5854379903493903, "learning_rate": 0.00015793190613732892, "loss": 1.6655, "step": 1582 }, { "epoch": 0.3245182451824518, "grad_norm": 0.6131935964876746, "learning_rate": 0.00015787776707611902, "loss": 1.7083, "step": 1583 }, { "epoch": 0.3247232472324723, "grad_norm": 0.657230494200856, "learning_rate": 0.00015782360249344407, "loss": 1.6877, "step": 1584 }, { "epoch": 0.3249282492824928, "grad_norm": 0.5313191465744274, "learning_rate": 0.00015776941241318822, "loss": 1.631, "step": 1585 }, { "epoch": 0.3251332513325133, "grad_norm": 0.5697330884026001, "learning_rate": 0.00015771519685924682, "loss": 1.7189, "step": 1586 }, { "epoch": 0.3253382533825338, "grad_norm": 0.5956595244876132, "learning_rate": 0.00015766095585552646, "loss": 1.6337, "step": 1587 }, { "epoch": 0.3255432554325543, "grad_norm": 0.593133601810476, "learning_rate": 0.00015760668942594496, "loss": 1.6399, "step": 1588 }, { "epoch": 0.3257482574825748, "grad_norm": 0.5761780065409972, "learning_rate": 0.00015755239759443135, "loss": 1.6087, "step": 1589 }, { "epoch": 0.32595325953259535, "grad_norm": 0.5925984526938471, "learning_rate": 0.00015749808038492585, "loss": 1.6524, "step": 1590 }, { "epoch": 0.32615826158261585, "grad_norm": 0.5850255560121983, "learning_rate": 0.00015744373782137992, "loss": 1.6432, "step": 1591 }, { "epoch": 0.32636326363263635, "grad_norm": 0.5748735732717458, "learning_rate": 0.0001573893699277561, "loss": 1.5956, "step": 1592 }, { "epoch": 0.32656826568265684, "grad_norm": 0.6065485410032901, "learning_rate": 0.0001573349767280282, "loss": 1.7322, "step": 1593 }, { "epoch": 0.32677326773267734, "grad_norm": 0.6207846211881545, "learning_rate": 0.00015728055824618112, "loss": 1.6414, "step": 1594 }, { "epoch": 0.32697826978269784, "grad_norm": 0.60966945180391, "learning_rate": 0.00015722611450621102, "loss": 1.6396, "step": 1595 }, { "epoch": 0.32718327183271834, "grad_norm": 0.5487266395738928, "learning_rate": 0.000157171645532125, "loss": 1.6069, "step": 1596 }, { "epoch": 0.32738827388273883, "grad_norm": 0.5600983596625937, "learning_rate": 0.00015711715134794147, "loss": 1.6288, "step": 1597 }, { "epoch": 0.32759327593275933, "grad_norm": 0.5725634986973878, "learning_rate": 0.00015706263197768987, "loss": 1.5998, "step": 1598 }, { "epoch": 0.32779827798277983, "grad_norm": 0.5586418168855353, "learning_rate": 0.0001570080874454108, "loss": 1.589, "step": 1599 }, { "epoch": 0.3280032800328003, "grad_norm": 0.5485713679637177, "learning_rate": 0.00015695351777515583, "loss": 1.503, "step": 1600 }, { "epoch": 0.3282082820828208, "grad_norm": 0.5823726402188383, "learning_rate": 0.0001568989229909878, "loss": 1.6207, "step": 1601 }, { "epoch": 0.3284132841328413, "grad_norm": 0.5557323266145563, "learning_rate": 0.0001568443031169805, "loss": 1.6109, "step": 1602 }, { "epoch": 0.3286182861828618, "grad_norm": 0.6657897515465551, "learning_rate": 0.00015678965817721881, "loss": 1.7639, "step": 1603 }, { "epoch": 0.3288232882328823, "grad_norm": 0.5733910269341344, "learning_rate": 0.00015673498819579864, "loss": 1.6422, "step": 1604 }, { "epoch": 0.3290282902829028, "grad_norm": 0.585377799360969, "learning_rate": 0.00015668029319682698, "loss": 1.6102, "step": 1605 }, { "epoch": 0.3292332923329233, "grad_norm": 0.5546926420462501, "learning_rate": 0.00015662557320442186, "loss": 1.6394, "step": 1606 }, { "epoch": 0.3294382943829438, "grad_norm": 0.6253775735216174, "learning_rate": 0.0001565708282427123, "loss": 1.6266, "step": 1607 }, { "epoch": 0.3296432964329643, "grad_norm": 0.618952334138591, "learning_rate": 0.00015651605833583832, "loss": 1.7086, "step": 1608 }, { "epoch": 0.3298482984829848, "grad_norm": 0.5609377010825004, "learning_rate": 0.00015646126350795102, "loss": 1.6711, "step": 1609 }, { "epoch": 0.33005330053300536, "grad_norm": 0.5116265674264238, "learning_rate": 0.00015640644378321235, "loss": 1.6449, "step": 1610 }, { "epoch": 0.33025830258302585, "grad_norm": 0.555263215164451, "learning_rate": 0.00015635159918579535, "loss": 1.5872, "step": 1611 }, { "epoch": 0.33046330463304635, "grad_norm": 0.6432144082072414, "learning_rate": 0.00015629672973988402, "loss": 1.6744, "step": 1612 }, { "epoch": 0.33066830668306685, "grad_norm": 0.5855426576651509, "learning_rate": 0.00015624183546967323, "loss": 1.6365, "step": 1613 }, { "epoch": 0.33087330873308735, "grad_norm": 0.6499437957468279, "learning_rate": 0.00015618691639936896, "loss": 1.6525, "step": 1614 }, { "epoch": 0.33107831078310784, "grad_norm": 0.5002458007657454, "learning_rate": 0.0001561319725531879, "loss": 1.5731, "step": 1615 }, { "epoch": 0.33128331283312834, "grad_norm": 0.6286863989121065, "learning_rate": 0.0001560770039553579, "loss": 1.599, "step": 1616 }, { "epoch": 0.33148831488314884, "grad_norm": 0.6046846610346077, "learning_rate": 0.00015602201063011752, "loss": 1.6664, "step": 1617 }, { "epoch": 0.33169331693316934, "grad_norm": 0.5189878297018823, "learning_rate": 0.0001559669926017164, "loss": 1.6235, "step": 1618 }, { "epoch": 0.33189831898318983, "grad_norm": 0.6094463477253609, "learning_rate": 0.00015591194989441492, "loss": 1.7501, "step": 1619 }, { "epoch": 0.33210332103321033, "grad_norm": 0.647981605618956, "learning_rate": 0.0001558568825324845, "loss": 1.6398, "step": 1620 }, { "epoch": 0.3323083230832308, "grad_norm": 0.55411513030306, "learning_rate": 0.00015580179054020725, "loss": 1.6643, "step": 1621 }, { "epoch": 0.3325133251332513, "grad_norm": 0.5647748332443943, "learning_rate": 0.00015574667394187627, "loss": 1.7195, "step": 1622 }, { "epoch": 0.3327183271832718, "grad_norm": 0.6149250387843381, "learning_rate": 0.00015569153276179547, "loss": 1.6611, "step": 1623 }, { "epoch": 0.3329233292332923, "grad_norm": 0.5870375631238135, "learning_rate": 0.00015563636702427966, "loss": 1.7075, "step": 1624 }, { "epoch": 0.3331283312833128, "grad_norm": 0.5428384129030388, "learning_rate": 0.00015558117675365437, "loss": 1.6616, "step": 1625 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6197037259504439, "learning_rate": 0.00015552596197425595, "loss": 1.6958, "step": 1626 }, { "epoch": 0.3335383353833538, "grad_norm": 0.5624511768510775, "learning_rate": 0.00015547072271043173, "loss": 1.5972, "step": 1627 }, { "epoch": 0.3337433374333743, "grad_norm": 0.5187645316511276, "learning_rate": 0.00015541545898653961, "loss": 1.6799, "step": 1628 }, { "epoch": 0.3339483394833948, "grad_norm": 0.6312735507952534, "learning_rate": 0.00015536017082694846, "loss": 1.6578, "step": 1629 }, { "epoch": 0.33415334153341536, "grad_norm": 0.6156743275382334, "learning_rate": 0.0001553048582560378, "loss": 1.716, "step": 1630 }, { "epoch": 0.33435834358343586, "grad_norm": 0.5900175590897995, "learning_rate": 0.00015524952129819796, "loss": 1.634, "step": 1631 }, { "epoch": 0.33456334563345635, "grad_norm": 0.568716879683154, "learning_rate": 0.00015519415997783002, "loss": 1.6427, "step": 1632 }, { "epoch": 0.33476834768347685, "grad_norm": 0.5502966428997514, "learning_rate": 0.00015513877431934584, "loss": 1.6725, "step": 1633 }, { "epoch": 0.33497334973349735, "grad_norm": 0.6151753382081785, "learning_rate": 0.00015508336434716795, "loss": 1.6782, "step": 1634 }, { "epoch": 0.33517835178351785, "grad_norm": 0.5829160585638562, "learning_rate": 0.00015502793008572964, "loss": 1.6899, "step": 1635 }, { "epoch": 0.33538335383353834, "grad_norm": 0.5931408186265753, "learning_rate": 0.00015497247155947492, "loss": 1.6837, "step": 1636 }, { "epoch": 0.33558835588355884, "grad_norm": 0.6026650377425974, "learning_rate": 0.00015491698879285842, "loss": 1.6257, "step": 1637 }, { "epoch": 0.33579335793357934, "grad_norm": 0.532461746324293, "learning_rate": 0.00015486148181034553, "loss": 1.6284, "step": 1638 }, { "epoch": 0.33599835998359984, "grad_norm": 0.6386340893470719, "learning_rate": 0.00015480595063641238, "loss": 1.6796, "step": 1639 }, { "epoch": 0.33620336203362033, "grad_norm": 0.6037468828026835, "learning_rate": 0.00015475039529554564, "loss": 1.7667, "step": 1640 }, { "epoch": 0.33640836408364083, "grad_norm": 0.5635709449103589, "learning_rate": 0.00015469481581224272, "loss": 1.648, "step": 1641 }, { "epoch": 0.33661336613366133, "grad_norm": 0.5084373853514397, "learning_rate": 0.00015463921221101158, "loss": 1.5599, "step": 1642 }, { "epoch": 0.3368183681836818, "grad_norm": 0.6391914419141972, "learning_rate": 0.00015458358451637093, "loss": 1.6874, "step": 1643 }, { "epoch": 0.3370233702337023, "grad_norm": 0.5950859759723373, "learning_rate": 0.00015452793275285006, "loss": 1.6331, "step": 1644 }, { "epoch": 0.3372283722837228, "grad_norm": 0.6211635976751108, "learning_rate": 0.00015447225694498887, "loss": 1.7091, "step": 1645 }, { "epoch": 0.3374333743337433, "grad_norm": 0.5343127736045816, "learning_rate": 0.00015441655711733785, "loss": 1.7163, "step": 1646 }, { "epoch": 0.3376383763837638, "grad_norm": 0.565977935539105, "learning_rate": 0.00015436083329445805, "loss": 1.7085, "step": 1647 }, { "epoch": 0.3378433784337843, "grad_norm": 0.5406296483162867, "learning_rate": 0.00015430508550092124, "loss": 1.6425, "step": 1648 }, { "epoch": 0.3380483804838048, "grad_norm": 0.5231616714524926, "learning_rate": 0.00015424931376130957, "loss": 1.7039, "step": 1649 }, { "epoch": 0.33825338253382536, "grad_norm": 0.5323725080074326, "learning_rate": 0.00015419351810021592, "loss": 1.6244, "step": 1650 }, { "epoch": 0.33845838458384586, "grad_norm": 0.5129775750096033, "learning_rate": 0.00015413769854224357, "loss": 1.6414, "step": 1651 }, { "epoch": 0.33866338663386636, "grad_norm": 0.5444353015609091, "learning_rate": 0.00015408185511200646, "loss": 1.6455, "step": 1652 }, { "epoch": 0.33886838868388686, "grad_norm": 0.551735368139582, "learning_rate": 0.00015402598783412897, "loss": 1.6714, "step": 1653 }, { "epoch": 0.33907339073390735, "grad_norm": 0.5638749902872925, "learning_rate": 0.00015397009673324608, "loss": 1.685, "step": 1654 }, { "epoch": 0.33927839278392785, "grad_norm": 0.5810194079182676, "learning_rate": 0.00015391418183400313, "loss": 1.6446, "step": 1655 }, { "epoch": 0.33948339483394835, "grad_norm": 0.6126328711012577, "learning_rate": 0.00015385824316105614, "loss": 1.596, "step": 1656 }, { "epoch": 0.33968839688396885, "grad_norm": 0.5403590423760342, "learning_rate": 0.00015380228073907156, "loss": 1.6428, "step": 1657 }, { "epoch": 0.33989339893398934, "grad_norm": 0.602602566251628, "learning_rate": 0.00015374629459272612, "loss": 1.6756, "step": 1658 }, { "epoch": 0.34009840098400984, "grad_norm": 0.5880330031291576, "learning_rate": 0.0001536902847467073, "loss": 1.6478, "step": 1659 }, { "epoch": 0.34030340303403034, "grad_norm": 0.5668568450745457, "learning_rate": 0.00015363425122571285, "loss": 1.5807, "step": 1660 }, { "epoch": 0.34050840508405084, "grad_norm": 0.5932088461639127, "learning_rate": 0.000153578194054451, "loss": 1.6765, "step": 1661 }, { "epoch": 0.34071340713407133, "grad_norm": 0.5600105702542542, "learning_rate": 0.00015352211325764042, "loss": 1.6032, "step": 1662 }, { "epoch": 0.34091840918409183, "grad_norm": 0.521513431101829, "learning_rate": 0.0001534660088600102, "loss": 1.6388, "step": 1663 }, { "epoch": 0.34112341123411233, "grad_norm": 0.5789111646759075, "learning_rate": 0.00015340988088629982, "loss": 1.694, "step": 1664 }, { "epoch": 0.3413284132841328, "grad_norm": 0.5369205610119117, "learning_rate": 0.0001533537293612592, "loss": 1.6439, "step": 1665 }, { "epoch": 0.3415334153341533, "grad_norm": 0.5287426687052339, "learning_rate": 0.00015329755430964855, "loss": 1.605, "step": 1666 }, { "epoch": 0.3417384173841738, "grad_norm": 0.4963182573882921, "learning_rate": 0.00015324135575623857, "loss": 1.6616, "step": 1667 }, { "epoch": 0.3419434194341943, "grad_norm": 0.5323323111210633, "learning_rate": 0.00015318513372581026, "loss": 1.681, "step": 1668 }, { "epoch": 0.3421484214842148, "grad_norm": 0.546287224160045, "learning_rate": 0.00015312888824315493, "loss": 1.6302, "step": 1669 }, { "epoch": 0.34235342353423537, "grad_norm": 0.5344070123931473, "learning_rate": 0.0001530726193330743, "loss": 1.6694, "step": 1670 }, { "epoch": 0.34255842558425587, "grad_norm": 0.6280400876524551, "learning_rate": 0.00015301632702038046, "loss": 1.6097, "step": 1671 }, { "epoch": 0.34276342763427636, "grad_norm": 0.5232553469435289, "learning_rate": 0.00015296001132989573, "loss": 1.6069, "step": 1672 }, { "epoch": 0.34296842968429686, "grad_norm": 0.5873367814016014, "learning_rate": 0.00015290367228645274, "loss": 1.6551, "step": 1673 }, { "epoch": 0.34317343173431736, "grad_norm": 0.6222353004332912, "learning_rate": 0.00015284730991489446, "loss": 1.6964, "step": 1674 }, { "epoch": 0.34337843378433786, "grad_norm": 0.544747566749662, "learning_rate": 0.00015279092424007418, "loss": 1.6713, "step": 1675 }, { "epoch": 0.34358343583435835, "grad_norm": 0.5902207579849043, "learning_rate": 0.00015273451528685539, "loss": 1.5981, "step": 1676 }, { "epoch": 0.34378843788437885, "grad_norm": 0.6093540192670327, "learning_rate": 0.00015267808308011183, "loss": 1.6595, "step": 1677 }, { "epoch": 0.34399343993439935, "grad_norm": 0.5849310757415744, "learning_rate": 0.0001526216276447276, "loss": 1.6207, "step": 1678 }, { "epoch": 0.34419844198441985, "grad_norm": 0.6392302966849692, "learning_rate": 0.00015256514900559694, "loss": 1.6301, "step": 1679 }, { "epoch": 0.34440344403444034, "grad_norm": 0.5962228685231873, "learning_rate": 0.00015250864718762438, "loss": 1.6631, "step": 1680 }, { "epoch": 0.34460844608446084, "grad_norm": 0.5853347135318259, "learning_rate": 0.00015245212221572468, "loss": 1.6164, "step": 1681 }, { "epoch": 0.34481344813448134, "grad_norm": 0.6602196760535952, "learning_rate": 0.00015239557411482276, "loss": 1.7026, "step": 1682 }, { "epoch": 0.34501845018450183, "grad_norm": 0.6030016779364876, "learning_rate": 0.00015233900290985373, "loss": 1.6896, "step": 1683 }, { "epoch": 0.34522345223452233, "grad_norm": 0.526912577459109, "learning_rate": 0.00015228240862576303, "loss": 1.6117, "step": 1684 }, { "epoch": 0.34542845428454283, "grad_norm": 0.5955254016227488, "learning_rate": 0.00015222579128750603, "loss": 1.6221, "step": 1685 }, { "epoch": 0.3456334563345633, "grad_norm": 0.5871478192634015, "learning_rate": 0.00015216915092004847, "loss": 1.6877, "step": 1686 }, { "epoch": 0.3458384583845838, "grad_norm": 0.5482498742214277, "learning_rate": 0.00015211248754836616, "loss": 1.6323, "step": 1687 }, { "epoch": 0.3460434604346043, "grad_norm": 0.5450644757691485, "learning_rate": 0.00015205580119744512, "loss": 1.6376, "step": 1688 }, { "epoch": 0.3462484624846248, "grad_norm": 0.5960625000649314, "learning_rate": 0.00015199909189228137, "loss": 1.6232, "step": 1689 }, { "epoch": 0.3464534645346454, "grad_norm": 0.6607425068589716, "learning_rate": 0.00015194235965788124, "loss": 1.6867, "step": 1690 }, { "epoch": 0.34665846658466587, "grad_norm": 0.5775691030515819, "learning_rate": 0.000151885604519261, "loss": 1.6173, "step": 1691 }, { "epoch": 0.34686346863468637, "grad_norm": 0.5702122112404413, "learning_rate": 0.0001518288265014471, "loss": 1.6367, "step": 1692 }, { "epoch": 0.34706847068470686, "grad_norm": 0.6564208205979628, "learning_rate": 0.00015177202562947602, "loss": 1.6145, "step": 1693 }, { "epoch": 0.34727347273472736, "grad_norm": 0.65692230602941, "learning_rate": 0.00015171520192839446, "loss": 1.692, "step": 1694 }, { "epoch": 0.34747847478474786, "grad_norm": 0.5449801153723827, "learning_rate": 0.000151658355423259, "loss": 1.6202, "step": 1695 }, { "epoch": 0.34768347683476836, "grad_norm": 0.6291062430582015, "learning_rate": 0.00015160148613913642, "loss": 1.6894, "step": 1696 }, { "epoch": 0.34788847888478885, "grad_norm": 0.5761359293304741, "learning_rate": 0.0001515445941011035, "loss": 1.5725, "step": 1697 }, { "epoch": 0.34809348093480935, "grad_norm": 0.6176068847705807, "learning_rate": 0.00015148767933424696, "loss": 1.6317, "step": 1698 }, { "epoch": 0.34829848298482985, "grad_norm": 0.6028913335303212, "learning_rate": 0.00015143074186366374, "loss": 1.604, "step": 1699 }, { "epoch": 0.34850348503485035, "grad_norm": 0.5862828710950136, "learning_rate": 0.0001513737817144606, "loss": 1.6069, "step": 1700 }, { "epoch": 0.34870848708487084, "grad_norm": 0.5465012014851335, "learning_rate": 0.00015131679891175438, "loss": 1.6571, "step": 1701 }, { "epoch": 0.34891348913489134, "grad_norm": 0.6720528422400531, "learning_rate": 0.00015125979348067195, "loss": 1.698, "step": 1702 }, { "epoch": 0.34911849118491184, "grad_norm": 0.620343406904573, "learning_rate": 0.00015120276544635007, "loss": 1.6841, "step": 1703 }, { "epoch": 0.34932349323493234, "grad_norm": 0.591826032855559, "learning_rate": 0.00015114571483393552, "loss": 1.674, "step": 1704 }, { "epoch": 0.34952849528495283, "grad_norm": 0.550277802412764, "learning_rate": 0.00015108864166858506, "loss": 1.6216, "step": 1705 }, { "epoch": 0.34973349733497333, "grad_norm": 0.6161607732083954, "learning_rate": 0.00015103154597546532, "loss": 1.6167, "step": 1706 }, { "epoch": 0.34993849938499383, "grad_norm": 0.6225743336682613, "learning_rate": 0.00015097442777975295, "loss": 1.6702, "step": 1707 }, { "epoch": 0.3501435014350143, "grad_norm": 0.5317790917490676, "learning_rate": 0.00015091728710663445, "loss": 1.6735, "step": 1708 }, { "epoch": 0.3503485034850348, "grad_norm": 0.5742252874836843, "learning_rate": 0.00015086012398130624, "loss": 1.6381, "step": 1709 }, { "epoch": 0.3505535055350554, "grad_norm": 0.5961741041096219, "learning_rate": 0.00015080293842897468, "loss": 1.6527, "step": 1710 }, { "epoch": 0.3507585075850759, "grad_norm": 0.5224316752967613, "learning_rate": 0.00015074573047485604, "loss": 1.6825, "step": 1711 }, { "epoch": 0.35096350963509637, "grad_norm": 0.4834365629521206, "learning_rate": 0.00015068850014417635, "loss": 1.6302, "step": 1712 }, { "epoch": 0.35116851168511687, "grad_norm": 0.6025243605637712, "learning_rate": 0.00015063124746217166, "loss": 1.6865, "step": 1713 }, { "epoch": 0.35137351373513737, "grad_norm": 0.609654396206412, "learning_rate": 0.00015057397245408772, "loss": 1.5896, "step": 1714 }, { "epoch": 0.35157851578515786, "grad_norm": 0.5094118161082266, "learning_rate": 0.0001505166751451803, "loss": 1.6228, "step": 1715 }, { "epoch": 0.35178351783517836, "grad_norm": 0.5719282652035563, "learning_rate": 0.00015045935556071485, "loss": 1.7014, "step": 1716 }, { "epoch": 0.35198851988519886, "grad_norm": 0.5765880868026615, "learning_rate": 0.0001504020137259667, "loss": 1.5913, "step": 1717 }, { "epoch": 0.35219352193521936, "grad_norm": 0.6040820788939356, "learning_rate": 0.00015034464966622103, "loss": 1.6797, "step": 1718 }, { "epoch": 0.35239852398523985, "grad_norm": 0.6038907066362674, "learning_rate": 0.00015028726340677277, "loss": 1.6659, "step": 1719 }, { "epoch": 0.35260352603526035, "grad_norm": 0.5331098424480955, "learning_rate": 0.00015022985497292662, "loss": 1.6802, "step": 1720 }, { "epoch": 0.35280852808528085, "grad_norm": 0.555754030532374, "learning_rate": 0.00015017242438999711, "loss": 1.7191, "step": 1721 }, { "epoch": 0.35301353013530135, "grad_norm": 0.556487224859208, "learning_rate": 0.00015011497168330851, "loss": 1.5971, "step": 1722 }, { "epoch": 0.35321853218532184, "grad_norm": 0.5401759358557732, "learning_rate": 0.00015005749687819488, "loss": 1.6382, "step": 1723 }, { "epoch": 0.35342353423534234, "grad_norm": 0.49642833456506713, "learning_rate": 0.00015000000000000001, "loss": 1.5671, "step": 1724 }, { "epoch": 0.35362853628536284, "grad_norm": 0.5617076341377524, "learning_rate": 0.00014994248107407735, "loss": 1.6429, "step": 1725 }, { "epoch": 0.35383353833538334, "grad_norm": 0.6082603658900633, "learning_rate": 0.00014988494012579018, "loss": 1.6493, "step": 1726 }, { "epoch": 0.35403854038540383, "grad_norm": 0.5330591965008474, "learning_rate": 0.00014982737718051143, "loss": 1.5926, "step": 1727 }, { "epoch": 0.35424354243542433, "grad_norm": 0.5419773966322774, "learning_rate": 0.00014976979226362372, "loss": 1.6846, "step": 1728 }, { "epoch": 0.3544485444854448, "grad_norm": 0.6110764865851752, "learning_rate": 0.0001497121854005194, "loss": 1.5879, "step": 1729 }, { "epoch": 0.3546535465354654, "grad_norm": 0.5978549863044518, "learning_rate": 0.0001496545566166005, "loss": 1.5818, "step": 1730 }, { "epoch": 0.3548585485854859, "grad_norm": 0.5631804875064101, "learning_rate": 0.00014959690593727867, "loss": 1.6403, "step": 1731 }, { "epoch": 0.3550635506355064, "grad_norm": 0.6052697837634529, "learning_rate": 0.00014953923338797525, "loss": 1.6593, "step": 1732 }, { "epoch": 0.3552685526855269, "grad_norm": 0.6019382886706088, "learning_rate": 0.00014948153899412117, "loss": 1.6565, "step": 1733 }, { "epoch": 0.35547355473554737, "grad_norm": 0.5938042790425807, "learning_rate": 0.00014942382278115713, "loss": 1.6956, "step": 1734 }, { "epoch": 0.35567855678556787, "grad_norm": 0.5434100540156978, "learning_rate": 0.00014936608477453327, "loss": 1.6359, "step": 1735 }, { "epoch": 0.35588355883558837, "grad_norm": 0.5709215426087542, "learning_rate": 0.00014930832499970942, "loss": 1.6779, "step": 1736 }, { "epoch": 0.35608856088560886, "grad_norm": 0.6170412210009857, "learning_rate": 0.00014925054348215514, "loss": 1.6539, "step": 1737 }, { "epoch": 0.35629356293562936, "grad_norm": 0.6001765732036815, "learning_rate": 0.00014919274024734932, "loss": 1.6624, "step": 1738 }, { "epoch": 0.35649856498564986, "grad_norm": 0.5466509317661203, "learning_rate": 0.00014913491532078058, "loss": 1.7094, "step": 1739 }, { "epoch": 0.35670356703567035, "grad_norm": 0.5779924064284437, "learning_rate": 0.00014907706872794714, "loss": 1.6246, "step": 1740 }, { "epoch": 0.35690856908569085, "grad_norm": 0.5697276052761565, "learning_rate": 0.0001490192004943567, "loss": 1.6454, "step": 1741 }, { "epoch": 0.35711357113571135, "grad_norm": 0.6025969182717065, "learning_rate": 0.0001489613106455265, "loss": 1.6029, "step": 1742 }, { "epoch": 0.35731857318573185, "grad_norm": 0.6160056016225646, "learning_rate": 0.00014890339920698334, "loss": 1.6772, "step": 1743 }, { "epoch": 0.35752357523575234, "grad_norm": 0.5591041568403124, "learning_rate": 0.00014884546620426355, "loss": 1.6816, "step": 1744 }, { "epoch": 0.35772857728577284, "grad_norm": 0.5766152081156878, "learning_rate": 0.00014878751166291294, "loss": 1.6527, "step": 1745 }, { "epoch": 0.35793357933579334, "grad_norm": 0.5234920585905708, "learning_rate": 0.00014872953560848677, "loss": 1.6888, "step": 1746 }, { "epoch": 0.35813858138581384, "grad_norm": 0.5272317927038603, "learning_rate": 0.00014867153806654996, "loss": 1.661, "step": 1747 }, { "epoch": 0.35834358343583433, "grad_norm": 0.575154394340279, "learning_rate": 0.00014861351906267673, "loss": 1.6422, "step": 1748 }, { "epoch": 0.35854858548585483, "grad_norm": 0.5360553303951028, "learning_rate": 0.0001485554786224508, "loss": 1.5351, "step": 1749 }, { "epoch": 0.3587535875358754, "grad_norm": 0.6010369925056703, "learning_rate": 0.00014849741677146541, "loss": 1.6325, "step": 1750 }, { "epoch": 0.3589585895858959, "grad_norm": 0.5568039112696883, "learning_rate": 0.0001484393335353232, "loss": 1.6794, "step": 1751 }, { "epoch": 0.3591635916359164, "grad_norm": 0.559377587463221, "learning_rate": 0.00014838122893963618, "loss": 1.6792, "step": 1752 }, { "epoch": 0.3593685936859369, "grad_norm": 0.5909510959520973, "learning_rate": 0.00014832310301002587, "loss": 1.6569, "step": 1753 }, { "epoch": 0.3595735957359574, "grad_norm": 0.591970821601008, "learning_rate": 0.0001482649557721232, "loss": 1.644, "step": 1754 }, { "epoch": 0.35977859778597787, "grad_norm": 0.5003211660043793, "learning_rate": 0.00014820678725156844, "loss": 1.6719, "step": 1755 }, { "epoch": 0.35998359983599837, "grad_norm": 0.5433120973749493, "learning_rate": 0.00014814859747401123, "loss": 1.629, "step": 1756 }, { "epoch": 0.36018860188601887, "grad_norm": 0.5958620106137993, "learning_rate": 0.00014809038646511062, "loss": 1.6826, "step": 1757 }, { "epoch": 0.36039360393603936, "grad_norm": 0.5122995884938817, "learning_rate": 0.00014803215425053504, "loss": 1.5742, "step": 1758 }, { "epoch": 0.36059860598605986, "grad_norm": 0.47327092439572627, "learning_rate": 0.00014797390085596228, "loss": 1.6212, "step": 1759 }, { "epoch": 0.36080360803608036, "grad_norm": 0.544121867106752, "learning_rate": 0.0001479156263070794, "loss": 1.5911, "step": 1760 }, { "epoch": 0.36100861008610086, "grad_norm": 0.6177706205829336, "learning_rate": 0.0001478573306295828, "loss": 1.7169, "step": 1761 }, { "epoch": 0.36121361213612135, "grad_norm": 0.5279672432137881, "learning_rate": 0.0001477990138491783, "loss": 1.6308, "step": 1762 }, { "epoch": 0.36141861418614185, "grad_norm": 0.5108946200192143, "learning_rate": 0.00014774067599158093, "loss": 1.5732, "step": 1763 }, { "epoch": 0.36162361623616235, "grad_norm": 0.6326442389629837, "learning_rate": 0.00014768231708251498, "loss": 1.6418, "step": 1764 }, { "epoch": 0.36182861828618285, "grad_norm": 0.6183147647223144, "learning_rate": 0.0001476239371477141, "loss": 1.5786, "step": 1765 }, { "epoch": 0.36203362033620334, "grad_norm": 0.550509136570209, "learning_rate": 0.0001475655362129212, "loss": 1.6161, "step": 1766 }, { "epoch": 0.36223862238622384, "grad_norm": 0.4983729819066215, "learning_rate": 0.00014750711430388847, "loss": 1.5951, "step": 1767 }, { "epoch": 0.36244362443624434, "grad_norm": 0.5600708867513791, "learning_rate": 0.00014744867144637726, "loss": 1.6295, "step": 1768 }, { "epoch": 0.36264862648626484, "grad_norm": 0.6329404127859258, "learning_rate": 0.00014739020766615826, "loss": 1.6863, "step": 1769 }, { "epoch": 0.3628536285362854, "grad_norm": 0.5341822075269688, "learning_rate": 0.0001473317229890113, "loss": 1.6154, "step": 1770 }, { "epoch": 0.3630586305863059, "grad_norm": 0.5684149684186564, "learning_rate": 0.00014727321744072546, "loss": 1.6974, "step": 1771 }, { "epoch": 0.3632636326363264, "grad_norm": 0.586534189304384, "learning_rate": 0.0001472146910470991, "loss": 1.6452, "step": 1772 }, { "epoch": 0.3634686346863469, "grad_norm": 0.607537100747522, "learning_rate": 0.00014715614383393964, "loss": 1.6763, "step": 1773 }, { "epoch": 0.3636736367363674, "grad_norm": 0.5842371244180671, "learning_rate": 0.00014709757582706374, "loss": 1.7317, "step": 1774 }, { "epoch": 0.3638786387863879, "grad_norm": 0.5684426519415506, "learning_rate": 0.00014703898705229726, "loss": 1.692, "step": 1775 }, { "epoch": 0.3640836408364084, "grad_norm": 0.572071732472923, "learning_rate": 0.00014698037753547514, "loss": 1.668, "step": 1776 }, { "epoch": 0.36428864288642887, "grad_norm": 0.5922093196382778, "learning_rate": 0.00014692174730244158, "loss": 1.6961, "step": 1777 }, { "epoch": 0.36449364493644937, "grad_norm": 0.5753554886135926, "learning_rate": 0.00014686309637904977, "loss": 1.6365, "step": 1778 }, { "epoch": 0.36469864698646987, "grad_norm": 0.5485439284483765, "learning_rate": 0.00014680442479116215, "loss": 1.6975, "step": 1779 }, { "epoch": 0.36490364903649036, "grad_norm": 0.5224373889447186, "learning_rate": 0.00014674573256465024, "loss": 1.6442, "step": 1780 }, { "epoch": 0.36510865108651086, "grad_norm": 0.5813866042653603, "learning_rate": 0.00014668701972539458, "loss": 1.6447, "step": 1781 }, { "epoch": 0.36531365313653136, "grad_norm": 0.6148780578774461, "learning_rate": 0.00014662828629928494, "loss": 1.6979, "step": 1782 }, { "epoch": 0.36551865518655186, "grad_norm": 0.5992253572346069, "learning_rate": 0.00014656953231222006, "loss": 1.7374, "step": 1783 }, { "epoch": 0.36572365723657235, "grad_norm": 0.5861449043198859, "learning_rate": 0.00014651075779010774, "loss": 1.6908, "step": 1784 }, { "epoch": 0.36592865928659285, "grad_norm": 0.6133331719326407, "learning_rate": 0.00014645196275886498, "loss": 1.6957, "step": 1785 }, { "epoch": 0.36613366133661335, "grad_norm": 0.6204151873011282, "learning_rate": 0.00014639314724441754, "loss": 1.6192, "step": 1786 }, { "epoch": 0.36633866338663384, "grad_norm": 0.5432801305625949, "learning_rate": 0.00014633431127270057, "loss": 1.5985, "step": 1787 }, { "epoch": 0.36654366543665434, "grad_norm": 0.6008319852011252, "learning_rate": 0.000146275454869658, "loss": 1.6924, "step": 1788 }, { "epoch": 0.36674866748667484, "grad_norm": 0.6011855401980891, "learning_rate": 0.00014621657806124274, "loss": 1.6985, "step": 1789 }, { "epoch": 0.3669536695366954, "grad_norm": 0.6085108987066442, "learning_rate": 0.0001461576808734169, "loss": 1.6171, "step": 1790 }, { "epoch": 0.3671586715867159, "grad_norm": 0.6231654246481261, "learning_rate": 0.00014609876333215142, "loss": 1.6552, "step": 1791 }, { "epoch": 0.3673636736367364, "grad_norm": 0.5804498492413966, "learning_rate": 0.00014603982546342625, "loss": 1.6389, "step": 1792 }, { "epoch": 0.3675686756867569, "grad_norm": 0.5492002912186064, "learning_rate": 0.00014598086729323035, "loss": 1.5882, "step": 1793 }, { "epoch": 0.3677736777367774, "grad_norm": 0.5950241329323683, "learning_rate": 0.00014592188884756155, "loss": 1.6166, "step": 1794 }, { "epoch": 0.3679786797867979, "grad_norm": 0.5332968393927306, "learning_rate": 0.00014586289015242667, "loss": 1.6097, "step": 1795 }, { "epoch": 0.3681836818368184, "grad_norm": 0.6291082116455485, "learning_rate": 0.00014580387123384146, "loss": 1.7253, "step": 1796 }, { "epoch": 0.3683886838868389, "grad_norm": 0.5459247424660225, "learning_rate": 0.00014574483211783062, "loss": 1.6186, "step": 1797 }, { "epoch": 0.36859368593685937, "grad_norm": 0.5359738738014396, "learning_rate": 0.00014568577283042766, "loss": 1.7112, "step": 1798 }, { "epoch": 0.36879868798687987, "grad_norm": 0.5912518102722361, "learning_rate": 0.00014562669339767504, "loss": 1.6439, "step": 1799 }, { "epoch": 0.36900369003690037, "grad_norm": 0.5282021873923861, "learning_rate": 0.00014556759384562416, "loss": 1.5995, "step": 1800 }, { "epoch": 0.36920869208692086, "grad_norm": 0.5600266130812795, "learning_rate": 0.0001455084742003352, "loss": 1.69, "step": 1801 }, { "epoch": 0.36941369413694136, "grad_norm": 0.5464969202308386, "learning_rate": 0.00014544933448787725, "loss": 1.7284, "step": 1802 }, { "epoch": 0.36961869618696186, "grad_norm": 0.48886262438418515, "learning_rate": 0.0001453901747343282, "loss": 1.6371, "step": 1803 }, { "epoch": 0.36982369823698236, "grad_norm": 0.5025122945208039, "learning_rate": 0.00014533099496577488, "loss": 1.6813, "step": 1804 }, { "epoch": 0.37002870028700285, "grad_norm": 0.5624916134960064, "learning_rate": 0.0001452717952083128, "loss": 1.611, "step": 1805 }, { "epoch": 0.37023370233702335, "grad_norm": 0.5870215069472015, "learning_rate": 0.00014521257548804644, "loss": 1.6393, "step": 1806 }, { "epoch": 0.37043870438704385, "grad_norm": 0.549446872033983, "learning_rate": 0.00014515333583108896, "loss": 1.6776, "step": 1807 }, { "epoch": 0.37064370643706435, "grad_norm": 0.5300151703158199, "learning_rate": 0.00014509407626356232, "loss": 1.6533, "step": 1808 }, { "epoch": 0.37084870848708484, "grad_norm": 0.5989410462999809, "learning_rate": 0.00014503479681159738, "loss": 1.6537, "step": 1809 }, { "epoch": 0.3710537105371054, "grad_norm": 0.5368992858540935, "learning_rate": 0.00014497549750133365, "loss": 1.6781, "step": 1810 }, { "epoch": 0.3712587125871259, "grad_norm": 0.5533226806844579, "learning_rate": 0.0001449161783589194, "loss": 1.6581, "step": 1811 }, { "epoch": 0.3714637146371464, "grad_norm": 0.5241552700758864, "learning_rate": 0.00014485683941051173, "loss": 1.5942, "step": 1812 }, { "epoch": 0.3716687166871669, "grad_norm": 0.6058610852237856, "learning_rate": 0.00014479748068227637, "loss": 1.5724, "step": 1813 }, { "epoch": 0.3718737187371874, "grad_norm": 0.5110595905909968, "learning_rate": 0.00014473810220038785, "loss": 1.5825, "step": 1814 }, { "epoch": 0.3720787207872079, "grad_norm": 0.5783753480313738, "learning_rate": 0.0001446787039910294, "loss": 1.6987, "step": 1815 }, { "epoch": 0.3722837228372284, "grad_norm": 0.5400468144955473, "learning_rate": 0.00014461928608039285, "loss": 1.7266, "step": 1816 }, { "epoch": 0.3724887248872489, "grad_norm": 0.5860991322357123, "learning_rate": 0.0001445598484946789, "loss": 1.636, "step": 1817 }, { "epoch": 0.3726937269372694, "grad_norm": 0.5023613601584973, "learning_rate": 0.00014450039126009677, "loss": 1.647, "step": 1818 }, { "epoch": 0.3728987289872899, "grad_norm": 0.5650956274875231, "learning_rate": 0.0001444409144028644, "loss": 1.6734, "step": 1819 }, { "epoch": 0.37310373103731037, "grad_norm": 0.46927417237734576, "learning_rate": 0.00014438141794920838, "loss": 1.6833, "step": 1820 }, { "epoch": 0.37330873308733087, "grad_norm": 0.5283852968314336, "learning_rate": 0.00014432190192536397, "loss": 1.6737, "step": 1821 }, { "epoch": 0.37351373513735137, "grad_norm": 0.5085186939183504, "learning_rate": 0.000144262366357575, "loss": 1.6372, "step": 1822 }, { "epoch": 0.37371873718737186, "grad_norm": 0.5205053198340397, "learning_rate": 0.00014420281127209398, "loss": 1.5857, "step": 1823 }, { "epoch": 0.37392373923739236, "grad_norm": 0.5229325072501138, "learning_rate": 0.00014414323669518193, "loss": 1.6506, "step": 1824 }, { "epoch": 0.37412874128741286, "grad_norm": 0.5629653343937632, "learning_rate": 0.00014408364265310864, "loss": 1.691, "step": 1825 }, { "epoch": 0.37433374333743336, "grad_norm": 0.5134321816295735, "learning_rate": 0.00014402402917215227, "loss": 1.5935, "step": 1826 }, { "epoch": 0.37453874538745385, "grad_norm": 0.5466101442618813, "learning_rate": 0.0001439643962785997, "loss": 1.738, "step": 1827 }, { "epoch": 0.37474374743747435, "grad_norm": 0.5238125414218799, "learning_rate": 0.00014390474399874636, "loss": 1.5999, "step": 1828 }, { "epoch": 0.37494874948749485, "grad_norm": 0.5218377363580129, "learning_rate": 0.00014384507235889614, "loss": 1.6034, "step": 1829 }, { "epoch": 0.3751537515375154, "grad_norm": 0.4976050208989313, "learning_rate": 0.00014378538138536153, "loss": 1.5986, "step": 1830 }, { "epoch": 0.3753587535875359, "grad_norm": 0.5137796969843305, "learning_rate": 0.00014372567110446357, "loss": 1.5881, "step": 1831 }, { "epoch": 0.3755637556375564, "grad_norm": 0.5736930190020063, "learning_rate": 0.00014366594154253175, "loss": 1.6483, "step": 1832 }, { "epoch": 0.3757687576875769, "grad_norm": 0.5116032297411255, "learning_rate": 0.00014360619272590412, "loss": 1.5853, "step": 1833 }, { "epoch": 0.3759737597375974, "grad_norm": 0.4980688711218655, "learning_rate": 0.00014354642468092713, "loss": 1.6644, "step": 1834 }, { "epoch": 0.3761787617876179, "grad_norm": 0.5377789744253809, "learning_rate": 0.00014348663743395584, "loss": 1.6568, "step": 1835 }, { "epoch": 0.3763837638376384, "grad_norm": 0.5230764135862036, "learning_rate": 0.0001434268310113537, "loss": 1.6075, "step": 1836 }, { "epoch": 0.3765887658876589, "grad_norm": 0.504561126454411, "learning_rate": 0.00014336700543949256, "loss": 1.6486, "step": 1837 }, { "epoch": 0.3767937679376794, "grad_norm": 0.4701493381619338, "learning_rate": 0.00014330716074475286, "loss": 1.6083, "step": 1838 }, { "epoch": 0.3769987699876999, "grad_norm": 0.5138330976926028, "learning_rate": 0.00014324729695352337, "loss": 1.6443, "step": 1839 }, { "epoch": 0.3772037720377204, "grad_norm": 0.5286145108714454, "learning_rate": 0.00014318741409220128, "loss": 1.5607, "step": 1840 }, { "epoch": 0.3774087740877409, "grad_norm": 0.5723966036621873, "learning_rate": 0.00014312751218719224, "loss": 1.6693, "step": 1841 }, { "epoch": 0.37761377613776137, "grad_norm": 0.49228538180092907, "learning_rate": 0.00014306759126491022, "loss": 1.68, "step": 1842 }, { "epoch": 0.37781877818778187, "grad_norm": 0.5511010808124289, "learning_rate": 0.00014300765135177764, "loss": 1.6871, "step": 1843 }, { "epoch": 0.37802378023780236, "grad_norm": 0.5467359429891819, "learning_rate": 0.0001429476924742253, "loss": 1.6168, "step": 1844 }, { "epoch": 0.37822878228782286, "grad_norm": 0.5543801792450568, "learning_rate": 0.00014288771465869235, "loss": 1.6691, "step": 1845 }, { "epoch": 0.37843378433784336, "grad_norm": 0.49778237935472697, "learning_rate": 0.00014282771793162625, "loss": 1.6203, "step": 1846 }, { "epoch": 0.37863878638786386, "grad_norm": 0.5977987421200576, "learning_rate": 0.00014276770231948284, "loss": 1.668, "step": 1847 }, { "epoch": 0.37884378843788435, "grad_norm": 0.5387975611512261, "learning_rate": 0.00014270766784872627, "loss": 1.6272, "step": 1848 }, { "epoch": 0.37904879048790485, "grad_norm": 0.501506501292781, "learning_rate": 0.00014264761454582903, "loss": 1.5806, "step": 1849 }, { "epoch": 0.3792537925379254, "grad_norm": 0.4617814254616416, "learning_rate": 0.0001425875424372719, "loss": 1.5993, "step": 1850 }, { "epoch": 0.3794587945879459, "grad_norm": 0.5358879751000583, "learning_rate": 0.00014252745154954392, "loss": 1.55, "step": 1851 }, { "epoch": 0.3796637966379664, "grad_norm": 0.4923664734760799, "learning_rate": 0.00014246734190914245, "loss": 1.8, "step": 1852 }, { "epoch": 0.3798687986879869, "grad_norm": 0.481058797874573, "learning_rate": 0.00014240721354257313, "loss": 1.5733, "step": 1853 }, { "epoch": 0.3800738007380074, "grad_norm": 0.4933341040676817, "learning_rate": 0.0001423470664763498, "loss": 1.6176, "step": 1854 }, { "epoch": 0.3802788027880279, "grad_norm": 0.502656617402608, "learning_rate": 0.00014228690073699466, "loss": 1.5912, "step": 1855 }, { "epoch": 0.3804838048380484, "grad_norm": 0.5286207683211155, "learning_rate": 0.00014222671635103802, "loss": 1.6505, "step": 1856 }, { "epoch": 0.3806888068880689, "grad_norm": 0.5127352122792401, "learning_rate": 0.0001421665133450184, "loss": 1.6597, "step": 1857 }, { "epoch": 0.3808938089380894, "grad_norm": 0.4977091356272721, "learning_rate": 0.0001421062917454827, "loss": 1.6198, "step": 1858 }, { "epoch": 0.3810988109881099, "grad_norm": 0.5021022640916121, "learning_rate": 0.0001420460515789858, "loss": 1.639, "step": 1859 }, { "epoch": 0.3813038130381304, "grad_norm": 0.5326355391478207, "learning_rate": 0.00014198579287209097, "loss": 1.6867, "step": 1860 }, { "epoch": 0.3815088150881509, "grad_norm": 0.5146240562095568, "learning_rate": 0.00014192551565136953, "loss": 1.6063, "step": 1861 }, { "epoch": 0.3817138171381714, "grad_norm": 0.4966864123885708, "learning_rate": 0.00014186521994340095, "loss": 1.6287, "step": 1862 }, { "epoch": 0.38191881918819187, "grad_norm": 0.5473012060097575, "learning_rate": 0.00014180490577477293, "loss": 1.614, "step": 1863 }, { "epoch": 0.38212382123821237, "grad_norm": 0.5408838808758386, "learning_rate": 0.00014174457317208132, "loss": 1.6388, "step": 1864 }, { "epoch": 0.38232882328823287, "grad_norm": 0.49167247744856524, "learning_rate": 0.00014168422216193, "loss": 1.6015, "step": 1865 }, { "epoch": 0.38253382533825336, "grad_norm": 0.5568825321692835, "learning_rate": 0.00014162385277093103, "loss": 1.6386, "step": 1866 }, { "epoch": 0.38273882738827386, "grad_norm": 0.603523886052413, "learning_rate": 0.00014156346502570453, "loss": 1.7053, "step": 1867 }, { "epoch": 0.38294382943829436, "grad_norm": 0.5085498766412403, "learning_rate": 0.00014150305895287886, "loss": 1.5173, "step": 1868 }, { "epoch": 0.38314883148831486, "grad_norm": 0.5122342458911501, "learning_rate": 0.0001414426345790903, "loss": 1.6655, "step": 1869 }, { "epoch": 0.3833538335383354, "grad_norm": 0.5193474716248473, "learning_rate": 0.00014138219193098321, "loss": 1.608, "step": 1870 }, { "epoch": 0.3835588355883559, "grad_norm": 0.5403398184208354, "learning_rate": 0.00014132173103521012, "loss": 1.6687, "step": 1871 }, { "epoch": 0.3837638376383764, "grad_norm": 0.4666837183754349, "learning_rate": 0.00014126125191843146, "loss": 1.5998, "step": 1872 }, { "epoch": 0.3839688396883969, "grad_norm": 0.4917514933571927, "learning_rate": 0.00014120075460731583, "loss": 1.7046, "step": 1873 }, { "epoch": 0.3841738417384174, "grad_norm": 0.4792509097853011, "learning_rate": 0.00014114023912853977, "loss": 1.6512, "step": 1874 }, { "epoch": 0.3843788437884379, "grad_norm": 0.5248543484097465, "learning_rate": 0.00014107970550878787, "loss": 1.5534, "step": 1875 }, { "epoch": 0.3845838458384584, "grad_norm": 0.4330975430003677, "learning_rate": 0.00014101915377475274, "loss": 1.5329, "step": 1876 }, { "epoch": 0.3847888478884789, "grad_norm": 0.5271389145154657, "learning_rate": 0.00014095858395313484, "loss": 1.6893, "step": 1877 }, { "epoch": 0.3849938499384994, "grad_norm": 0.5275935385395334, "learning_rate": 0.0001408979960706428, "loss": 1.6249, "step": 1878 }, { "epoch": 0.3851988519885199, "grad_norm": 0.5251051425457449, "learning_rate": 0.00014083739015399314, "loss": 1.6248, "step": 1879 }, { "epoch": 0.3854038540385404, "grad_norm": 0.5472960361944075, "learning_rate": 0.0001407767662299102, "loss": 1.7137, "step": 1880 }, { "epoch": 0.3856088560885609, "grad_norm": 0.4842862703840487, "learning_rate": 0.00014071612432512651, "loss": 1.6399, "step": 1881 }, { "epoch": 0.3858138581385814, "grad_norm": 0.514186897854033, "learning_rate": 0.0001406554644663823, "loss": 1.7584, "step": 1882 }, { "epoch": 0.3860188601886019, "grad_norm": 0.5256250504230642, "learning_rate": 0.00014059478668042581, "loss": 1.6759, "step": 1883 }, { "epoch": 0.3862238622386224, "grad_norm": 0.47708847797483583, "learning_rate": 0.00014053409099401323, "loss": 1.5653, "step": 1884 }, { "epoch": 0.38642886428864287, "grad_norm": 0.4808074567168712, "learning_rate": 0.00014047337743390865, "loss": 1.6085, "step": 1885 }, { "epoch": 0.38663386633866337, "grad_norm": 0.5379097049586811, "learning_rate": 0.00014041264602688387, "loss": 1.671, "step": 1886 }, { "epoch": 0.38683886838868387, "grad_norm": 0.5590844068440689, "learning_rate": 0.00014035189679971875, "loss": 1.6947, "step": 1887 }, { "epoch": 0.38704387043870436, "grad_norm": 0.46778803409473096, "learning_rate": 0.00014029112977920088, "loss": 1.6003, "step": 1888 }, { "epoch": 0.38724887248872486, "grad_norm": 0.5235748950788491, "learning_rate": 0.00014023034499212588, "loss": 1.6214, "step": 1889 }, { "epoch": 0.3874538745387454, "grad_norm": 0.5084301034829668, "learning_rate": 0.00014016954246529696, "loss": 1.6589, "step": 1890 }, { "epoch": 0.3876588765887659, "grad_norm": 0.5150116757044244, "learning_rate": 0.00014010872222552532, "loss": 1.6186, "step": 1891 }, { "epoch": 0.3878638786387864, "grad_norm": 0.46063976559991104, "learning_rate": 0.00014004788429962988, "loss": 1.5996, "step": 1892 }, { "epoch": 0.3880688806888069, "grad_norm": 0.5333616264893559, "learning_rate": 0.00013998702871443748, "loss": 1.7368, "step": 1893 }, { "epoch": 0.3882738827388274, "grad_norm": 0.5231380511987593, "learning_rate": 0.00013992615549678262, "loss": 1.5673, "step": 1894 }, { "epoch": 0.3884788847888479, "grad_norm": 0.46415379036185195, "learning_rate": 0.0001398652646735076, "loss": 1.681, "step": 1895 }, { "epoch": 0.3886838868388684, "grad_norm": 0.5021717862089831, "learning_rate": 0.00013980435627146252, "loss": 1.6704, "step": 1896 }, { "epoch": 0.3888888888888889, "grad_norm": 0.5488608334361356, "learning_rate": 0.00013974343031750524, "loss": 1.5421, "step": 1897 }, { "epoch": 0.3890938909389094, "grad_norm": 0.514548825932426, "learning_rate": 0.00013968248683850134, "loss": 1.6595, "step": 1898 }, { "epoch": 0.3892988929889299, "grad_norm": 0.6140825978519333, "learning_rate": 0.0001396215258613241, "loss": 1.6863, "step": 1899 }, { "epoch": 0.3895038950389504, "grad_norm": 0.5235700004370976, "learning_rate": 0.00013956054741285452, "loss": 1.6869, "step": 1900 }, { "epoch": 0.3897088970889709, "grad_norm": 0.5156685806462629, "learning_rate": 0.00013949955151998136, "loss": 1.6603, "step": 1901 }, { "epoch": 0.3899138991389914, "grad_norm": 0.5547814540033799, "learning_rate": 0.00013943853820960105, "loss": 1.5975, "step": 1902 }, { "epoch": 0.3901189011890119, "grad_norm": 0.4763428662757902, "learning_rate": 0.00013937750750861767, "loss": 1.592, "step": 1903 }, { "epoch": 0.3903239032390324, "grad_norm": 0.47301906468085203, "learning_rate": 0.00013931645944394297, "loss": 1.6213, "step": 1904 }, { "epoch": 0.3905289052890529, "grad_norm": 0.5168194189397619, "learning_rate": 0.00013925539404249638, "loss": 1.6669, "step": 1905 }, { "epoch": 0.39073390733907337, "grad_norm": 0.517618601215397, "learning_rate": 0.000139194311331205, "loss": 1.6276, "step": 1906 }, { "epoch": 0.39093890938909387, "grad_norm": 0.5740641387947559, "learning_rate": 0.00013913321133700345, "loss": 1.6484, "step": 1907 }, { "epoch": 0.39114391143911437, "grad_norm": 0.5220024956764855, "learning_rate": 0.00013907209408683415, "loss": 1.6518, "step": 1908 }, { "epoch": 0.39134891348913486, "grad_norm": 0.5069357463688486, "learning_rate": 0.00013901095960764696, "loss": 1.6506, "step": 1909 }, { "epoch": 0.3915539155391554, "grad_norm": 0.5162482614187639, "learning_rate": 0.00013894980792639945, "loss": 1.594, "step": 1910 }, { "epoch": 0.3917589175891759, "grad_norm": 0.5475161774302121, "learning_rate": 0.00013888863907005668, "loss": 1.6714, "step": 1911 }, { "epoch": 0.3919639196391964, "grad_norm": 0.639273711472952, "learning_rate": 0.0001388274530655914, "loss": 1.7005, "step": 1912 }, { "epoch": 0.3921689216892169, "grad_norm": 0.5459673609382728, "learning_rate": 0.00013876624993998382, "loss": 1.6095, "step": 1913 }, { "epoch": 0.3923739237392374, "grad_norm": 0.572023981264735, "learning_rate": 0.00013870502972022173, "loss": 1.5871, "step": 1914 }, { "epoch": 0.3925789257892579, "grad_norm": 0.5467813608844581, "learning_rate": 0.00013864379243330046, "loss": 1.6066, "step": 1915 }, { "epoch": 0.3927839278392784, "grad_norm": 0.5896010275575714, "learning_rate": 0.00013858253810622293, "loss": 1.7256, "step": 1916 }, { "epoch": 0.3929889298892989, "grad_norm": 0.5430639565544005, "learning_rate": 0.00013852126676599944, "loss": 1.7053, "step": 1917 }, { "epoch": 0.3931939319393194, "grad_norm": 0.5367496954101093, "learning_rate": 0.00013845997843964792, "loss": 1.6492, "step": 1918 }, { "epoch": 0.3933989339893399, "grad_norm": 0.5096222945458827, "learning_rate": 0.0001383986731541937, "loss": 1.6632, "step": 1919 }, { "epoch": 0.3936039360393604, "grad_norm": 0.5328538003146586, "learning_rate": 0.00013833735093666963, "loss": 1.6541, "step": 1920 }, { "epoch": 0.3938089380893809, "grad_norm": 0.5460442800553386, "learning_rate": 0.00013827601181411604, "loss": 1.6917, "step": 1921 }, { "epoch": 0.3940139401394014, "grad_norm": 0.5199979660322286, "learning_rate": 0.00013821465581358072, "loss": 1.565, "step": 1922 }, { "epoch": 0.3942189421894219, "grad_norm": 0.5130904636502194, "learning_rate": 0.0001381532829621188, "loss": 1.6758, "step": 1923 }, { "epoch": 0.3944239442394424, "grad_norm": 0.5419366826007934, "learning_rate": 0.000138091893286793, "loss": 1.7313, "step": 1924 }, { "epoch": 0.3946289462894629, "grad_norm": 0.456210471109287, "learning_rate": 0.0001380304868146733, "loss": 1.54, "step": 1925 }, { "epoch": 0.3948339483394834, "grad_norm": 0.5375926738816738, "learning_rate": 0.00013796906357283723, "loss": 1.6607, "step": 1926 }, { "epoch": 0.3950389503895039, "grad_norm": 0.5489172672263147, "learning_rate": 0.0001379076235883696, "loss": 1.6175, "step": 1927 }, { "epoch": 0.39524395243952437, "grad_norm": 0.5168211728003793, "learning_rate": 0.0001378461668883627, "loss": 1.6294, "step": 1928 }, { "epoch": 0.39544895448954487, "grad_norm": 0.5087723008887542, "learning_rate": 0.0001377846934999161, "loss": 1.6459, "step": 1929 }, { "epoch": 0.3956539565395654, "grad_norm": 0.5186032448868557, "learning_rate": 0.00013772320345013678, "loss": 1.6256, "step": 1930 }, { "epoch": 0.3958589585895859, "grad_norm": 0.5702188115361192, "learning_rate": 0.00013766169676613906, "loss": 1.62, "step": 1931 }, { "epoch": 0.3960639606396064, "grad_norm": 0.5450350941952552, "learning_rate": 0.00013760017347504462, "loss": 1.5974, "step": 1932 }, { "epoch": 0.3962689626896269, "grad_norm": 0.52807741403906, "learning_rate": 0.00013753863360398241, "loss": 1.5844, "step": 1933 }, { "epoch": 0.3964739647396474, "grad_norm": 0.5277606054926492, "learning_rate": 0.0001374770771800887, "loss": 1.6242, "step": 1934 }, { "epoch": 0.3966789667896679, "grad_norm": 0.5345399675824376, "learning_rate": 0.00013741550423050712, "loss": 1.6462, "step": 1935 }, { "epoch": 0.3968839688396884, "grad_norm": 0.4757854703010009, "learning_rate": 0.00013735391478238848, "loss": 1.6136, "step": 1936 }, { "epoch": 0.3970889708897089, "grad_norm": 0.4883807837040311, "learning_rate": 0.00013729230886289098, "loss": 1.6681, "step": 1937 }, { "epoch": 0.3972939729397294, "grad_norm": 0.5201768366633468, "learning_rate": 0.00013723068649918, "loss": 1.6626, "step": 1938 }, { "epoch": 0.3974989749897499, "grad_norm": 0.5931031713663376, "learning_rate": 0.00013716904771842825, "loss": 1.7142, "step": 1939 }, { "epoch": 0.3977039770397704, "grad_norm": 0.5531884250131377, "learning_rate": 0.00013710739254781554, "loss": 1.6061, "step": 1940 }, { "epoch": 0.3979089790897909, "grad_norm": 0.5256474386396165, "learning_rate": 0.00013704572101452911, "loss": 1.721, "step": 1941 }, { "epoch": 0.3981139811398114, "grad_norm": 0.545159683838341, "learning_rate": 0.00013698403314576325, "loss": 1.6526, "step": 1942 }, { "epoch": 0.3983189831898319, "grad_norm": 0.4998367611503313, "learning_rate": 0.00013692232896871947, "loss": 1.6041, "step": 1943 }, { "epoch": 0.3985239852398524, "grad_norm": 0.5440881416443756, "learning_rate": 0.00013686060851060656, "loss": 1.6655, "step": 1944 }, { "epoch": 0.3987289872898729, "grad_norm": 0.49882710323321633, "learning_rate": 0.00013679887179864043, "loss": 1.6669, "step": 1945 }, { "epoch": 0.3989339893398934, "grad_norm": 0.5022680898309423, "learning_rate": 0.00013673711886004415, "loss": 1.6235, "step": 1946 }, { "epoch": 0.3991389913899139, "grad_norm": 0.6028555679768342, "learning_rate": 0.00013667534972204795, "loss": 1.6772, "step": 1947 }, { "epoch": 0.3993439934399344, "grad_norm": 0.5403072958120235, "learning_rate": 0.00013661356441188922, "loss": 1.6852, "step": 1948 }, { "epoch": 0.3995489954899549, "grad_norm": 0.5067037244078386, "learning_rate": 0.0001365517629568125, "loss": 1.6407, "step": 1949 }, { "epoch": 0.3997539975399754, "grad_norm": 0.5480829305600373, "learning_rate": 0.0001364899453840694, "loss": 1.6229, "step": 1950 }, { "epoch": 0.3999589995899959, "grad_norm": 0.5392124978523058, "learning_rate": 0.0001364281117209187, "loss": 1.6596, "step": 1951 }, { "epoch": 0.4001640016400164, "grad_norm": 0.5411960766962421, "learning_rate": 0.00013636626199462615, "loss": 1.5984, "step": 1952 }, { "epoch": 0.4003690036900369, "grad_norm": 0.464630641605827, "learning_rate": 0.00013630439623246474, "loss": 1.6089, "step": 1953 }, { "epoch": 0.4005740057400574, "grad_norm": 0.5363660824215917, "learning_rate": 0.00013624251446171445, "loss": 1.6299, "step": 1954 }, { "epoch": 0.4007790077900779, "grad_norm": 0.5249787036699148, "learning_rate": 0.00013618061670966227, "loss": 1.5612, "step": 1955 }, { "epoch": 0.4009840098400984, "grad_norm": 0.524575800254269, "learning_rate": 0.0001361187030036024, "loss": 1.6192, "step": 1956 }, { "epoch": 0.4011890118901189, "grad_norm": 0.49667668786480623, "learning_rate": 0.00013605677337083586, "loss": 1.6444, "step": 1957 }, { "epoch": 0.4013940139401394, "grad_norm": 0.5029661817388345, "learning_rate": 0.0001359948278386709, "loss": 1.6532, "step": 1958 }, { "epoch": 0.4015990159901599, "grad_norm": 0.5761850896172349, "learning_rate": 0.00013593286643442265, "loss": 1.6693, "step": 1959 }, { "epoch": 0.4018040180401804, "grad_norm": 0.5288200268116514, "learning_rate": 0.00013587088918541322, "loss": 1.6374, "step": 1960 }, { "epoch": 0.4020090200902009, "grad_norm": 0.5156510963772725, "learning_rate": 0.00013580889611897184, "loss": 1.6024, "step": 1961 }, { "epoch": 0.4022140221402214, "grad_norm": 0.5272588040940779, "learning_rate": 0.0001357468872624346, "loss": 1.6536, "step": 1962 }, { "epoch": 0.4024190241902419, "grad_norm": 0.5901458443834909, "learning_rate": 0.00013568486264314456, "loss": 1.7249, "step": 1963 }, { "epoch": 0.4026240262402624, "grad_norm": 0.5200718559261266, "learning_rate": 0.00013562282228845183, "loss": 1.6553, "step": 1964 }, { "epoch": 0.4028290282902829, "grad_norm": 0.4973690946582959, "learning_rate": 0.0001355607662257133, "loss": 1.6603, "step": 1965 }, { "epoch": 0.4030340303403034, "grad_norm": 0.5626474630298155, "learning_rate": 0.00013549869448229294, "loss": 1.6599, "step": 1966 }, { "epoch": 0.4032390323903239, "grad_norm": 0.5581489194311449, "learning_rate": 0.00013543660708556157, "loss": 1.6086, "step": 1967 }, { "epoch": 0.4034440344403444, "grad_norm": 0.4949076014961063, "learning_rate": 0.00013537450406289685, "loss": 1.5967, "step": 1968 }, { "epoch": 0.4036490364903649, "grad_norm": 0.5435251256139924, "learning_rate": 0.00013531238544168343, "loss": 1.6389, "step": 1969 }, { "epoch": 0.40385403854038543, "grad_norm": 0.5145533922449875, "learning_rate": 0.0001352502512493128, "loss": 1.5798, "step": 1970 }, { "epoch": 0.4040590405904059, "grad_norm": 0.5072794838453296, "learning_rate": 0.0001351881015131833, "loss": 1.667, "step": 1971 }, { "epoch": 0.4042640426404264, "grad_norm": 0.5029798339796385, "learning_rate": 0.0001351259362607002, "loss": 1.5781, "step": 1972 }, { "epoch": 0.4044690446904469, "grad_norm": 0.5736183300894925, "learning_rate": 0.00013506375551927547, "loss": 1.7497, "step": 1973 }, { "epoch": 0.4046740467404674, "grad_norm": 0.526322658976693, "learning_rate": 0.000135001559316328, "loss": 1.6223, "step": 1974 }, { "epoch": 0.4048790487904879, "grad_norm": 0.5781298647896926, "learning_rate": 0.00013493934767928352, "loss": 1.6686, "step": 1975 }, { "epoch": 0.4050840508405084, "grad_norm": 0.641387973875099, "learning_rate": 0.00013487712063557452, "loss": 1.6772, "step": 1976 }, { "epoch": 0.4052890528905289, "grad_norm": 0.5555644827933304, "learning_rate": 0.00013481487821264033, "loss": 1.6931, "step": 1977 }, { "epoch": 0.4054940549405494, "grad_norm": 0.5601542479062248, "learning_rate": 0.000134752620437927, "loss": 1.6774, "step": 1978 }, { "epoch": 0.4056990569905699, "grad_norm": 0.5273240386483041, "learning_rate": 0.00013469034733888736, "loss": 1.5764, "step": 1979 }, { "epoch": 0.4059040590405904, "grad_norm": 0.47925132004844073, "learning_rate": 0.00013462805894298106, "loss": 1.6306, "step": 1980 }, { "epoch": 0.4061090610906109, "grad_norm": 0.5273996375854236, "learning_rate": 0.00013456575527767445, "loss": 1.6449, "step": 1981 }, { "epoch": 0.4063140631406314, "grad_norm": 0.562906886797999, "learning_rate": 0.00013450343637044058, "loss": 1.5631, "step": 1982 }, { "epoch": 0.4065190651906519, "grad_norm": 0.5425512826454009, "learning_rate": 0.00013444110224875925, "loss": 1.59, "step": 1983 }, { "epoch": 0.4067240672406724, "grad_norm": 0.5032492279285105, "learning_rate": 0.00013437875294011704, "loss": 1.6719, "step": 1984 }, { "epoch": 0.4069290692906929, "grad_norm": 0.5717038458224182, "learning_rate": 0.00013431638847200708, "loss": 1.6815, "step": 1985 }, { "epoch": 0.4071340713407134, "grad_norm": 0.5357452489607015, "learning_rate": 0.00013425400887192933, "loss": 1.6323, "step": 1986 }, { "epoch": 0.4073390733907339, "grad_norm": 0.5835632135711694, "learning_rate": 0.00013419161416739032, "loss": 1.6831, "step": 1987 }, { "epoch": 0.4075440754407544, "grad_norm": 0.503318136097742, "learning_rate": 0.0001341292043859033, "loss": 1.6387, "step": 1988 }, { "epoch": 0.4077490774907749, "grad_norm": 0.5136942658239361, "learning_rate": 0.00013406677955498818, "loss": 1.699, "step": 1989 }, { "epoch": 0.40795407954079543, "grad_norm": 0.5437296027247385, "learning_rate": 0.00013400433970217135, "loss": 1.6169, "step": 1990 }, { "epoch": 0.40815908159081593, "grad_norm": 0.5467446983910647, "learning_rate": 0.0001339418848549861, "loss": 1.6595, "step": 1991 }, { "epoch": 0.40836408364083643, "grad_norm": 0.5219812157735233, "learning_rate": 0.00013387941504097213, "loss": 1.6329, "step": 1992 }, { "epoch": 0.4085690856908569, "grad_norm": 0.5435807763490486, "learning_rate": 0.00013381693028767573, "loss": 1.6553, "step": 1993 }, { "epoch": 0.4087740877408774, "grad_norm": 0.5081232456586999, "learning_rate": 0.00013375443062264988, "loss": 1.6199, "step": 1994 }, { "epoch": 0.4089790897908979, "grad_norm": 0.5085429537660069, "learning_rate": 0.0001336919160734541, "loss": 1.6778, "step": 1995 }, { "epoch": 0.4091840918409184, "grad_norm": 0.48189414321970997, "learning_rate": 0.00013362938666765443, "loss": 1.6444, "step": 1996 }, { "epoch": 0.4093890938909389, "grad_norm": 0.4986801106157369, "learning_rate": 0.00013356684243282356, "loss": 1.6475, "step": 1997 }, { "epoch": 0.4095940959409594, "grad_norm": 0.5597030678284673, "learning_rate": 0.00013350428339654058, "loss": 1.7235, "step": 1998 }, { "epoch": 0.4097990979909799, "grad_norm": 0.5187518865617184, "learning_rate": 0.00013344170958639123, "loss": 1.598, "step": 1999 }, { "epoch": 0.4100041000410004, "grad_norm": 0.45624808165714237, "learning_rate": 0.00013337912102996772, "loss": 1.6117, "step": 2000 }, { "epoch": 0.4102091020910209, "grad_norm": 0.5382054962947498, "learning_rate": 0.00013331651775486873, "loss": 1.6095, "step": 2001 }, { "epoch": 0.4104141041410414, "grad_norm": 0.49445168708768333, "learning_rate": 0.00013325389978869947, "loss": 1.596, "step": 2002 }, { "epoch": 0.4106191061910619, "grad_norm": 0.521899673559171, "learning_rate": 0.00013319126715907165, "loss": 1.6692, "step": 2003 }, { "epoch": 0.4108241082410824, "grad_norm": 0.485367847507791, "learning_rate": 0.00013312861989360337, "loss": 1.6457, "step": 2004 }, { "epoch": 0.4110291102911029, "grad_norm": 0.5371500642272338, "learning_rate": 0.0001330659580199192, "loss": 1.7048, "step": 2005 }, { "epoch": 0.4112341123411234, "grad_norm": 0.49676254692300204, "learning_rate": 0.00013300328156565027, "loss": 1.582, "step": 2006 }, { "epoch": 0.4114391143911439, "grad_norm": 0.5460817293620392, "learning_rate": 0.000132940590558434, "loss": 1.6407, "step": 2007 }, { "epoch": 0.4116441164411644, "grad_norm": 0.5394735353452728, "learning_rate": 0.00013287788502591426, "loss": 1.6469, "step": 2008 }, { "epoch": 0.4118491184911849, "grad_norm": 0.5590812181570827, "learning_rate": 0.00013281516499574135, "loss": 1.6113, "step": 2009 }, { "epoch": 0.41205412054120544, "grad_norm": 0.5234343425293484, "learning_rate": 0.00013275243049557192, "loss": 1.684, "step": 2010 }, { "epoch": 0.41225912259122593, "grad_norm": 0.5291689075493777, "learning_rate": 0.00013268968155306913, "loss": 1.5735, "step": 2011 }, { "epoch": 0.41246412464124643, "grad_norm": 0.4888944096601653, "learning_rate": 0.00013262691819590234, "loss": 1.6448, "step": 2012 }, { "epoch": 0.41266912669126693, "grad_norm": 0.520106352497343, "learning_rate": 0.00013256414045174735, "loss": 1.6199, "step": 2013 }, { "epoch": 0.4128741287412874, "grad_norm": 0.5488147932232325, "learning_rate": 0.00013250134834828626, "loss": 1.6238, "step": 2014 }, { "epoch": 0.4130791307913079, "grad_norm": 0.4875942848981133, "learning_rate": 0.00013243854191320758, "loss": 1.6183, "step": 2015 }, { "epoch": 0.4132841328413284, "grad_norm": 0.5087531930253176, "learning_rate": 0.0001323757211742061, "loss": 1.668, "step": 2016 }, { "epoch": 0.4134891348913489, "grad_norm": 0.6033249451828697, "learning_rate": 0.0001323128861589829, "loss": 1.6017, "step": 2017 }, { "epoch": 0.4136941369413694, "grad_norm": 0.47327438166821995, "learning_rate": 0.00013225003689524534, "loss": 1.6135, "step": 2018 }, { "epoch": 0.4138991389913899, "grad_norm": 0.5351667404128417, "learning_rate": 0.00013218717341070707, "loss": 1.6695, "step": 2019 }, { "epoch": 0.4141041410414104, "grad_norm": 0.5132694973313963, "learning_rate": 0.00013212429573308812, "loss": 1.6484, "step": 2020 }, { "epoch": 0.4143091430914309, "grad_norm": 0.5228646219304549, "learning_rate": 0.00013206140389011463, "loss": 1.6422, "step": 2021 }, { "epoch": 0.4145141451414514, "grad_norm": 0.6261495414317594, "learning_rate": 0.000131998497909519, "loss": 1.6358, "step": 2022 }, { "epoch": 0.4147191471914719, "grad_norm": 0.48951632321683874, "learning_rate": 0.00013193557781904, "loss": 1.7117, "step": 2023 }, { "epoch": 0.4149241492414924, "grad_norm": 0.5509823018811135, "learning_rate": 0.0001318726436464225, "loss": 1.6334, "step": 2024 }, { "epoch": 0.4151291512915129, "grad_norm": 0.49880550857827843, "learning_rate": 0.0001318096954194176, "loss": 1.62, "step": 2025 }, { "epoch": 0.4153341533415334, "grad_norm": 0.5283465471770175, "learning_rate": 0.00013174673316578256, "loss": 1.5566, "step": 2026 }, { "epoch": 0.4155391553915539, "grad_norm": 0.5231478124824578, "learning_rate": 0.00013168375691328095, "loss": 1.608, "step": 2027 }, { "epoch": 0.4157441574415744, "grad_norm": 0.450931378405646, "learning_rate": 0.0001316207666896824, "loss": 1.6248, "step": 2028 }, { "epoch": 0.4159491594915949, "grad_norm": 0.5493052752380496, "learning_rate": 0.00013155776252276276, "loss": 1.6463, "step": 2029 }, { "epoch": 0.41615416154161544, "grad_norm": 0.5969219675262446, "learning_rate": 0.00013149474444030393, "loss": 1.6508, "step": 2030 }, { "epoch": 0.41635916359163594, "grad_norm": 0.5039481102809289, "learning_rate": 0.00013143171247009415, "loss": 1.6625, "step": 2031 }, { "epoch": 0.41656416564165644, "grad_norm": 0.5525457894141088, "learning_rate": 0.00013136866663992754, "loss": 1.6552, "step": 2032 }, { "epoch": 0.41676916769167693, "grad_norm": 0.4631889378508682, "learning_rate": 0.00013130560697760445, "loss": 1.5278, "step": 2033 }, { "epoch": 0.41697416974169743, "grad_norm": 0.539112757024435, "learning_rate": 0.0001312425335109314, "loss": 1.6333, "step": 2034 }, { "epoch": 0.41717917179171793, "grad_norm": 0.5262788373972446, "learning_rate": 0.0001311794462677209, "loss": 1.6967, "step": 2035 }, { "epoch": 0.4173841738417384, "grad_norm": 0.5160680580463141, "learning_rate": 0.00013111634527579152, "loss": 1.5911, "step": 2036 }, { "epoch": 0.4175891758917589, "grad_norm": 0.4818656997471046, "learning_rate": 0.00013105323056296798, "loss": 1.6154, "step": 2037 }, { "epoch": 0.4177941779417794, "grad_norm": 0.5487802832339057, "learning_rate": 0.00013099010215708088, "loss": 1.6189, "step": 2038 }, { "epoch": 0.4179991799917999, "grad_norm": 0.5101340652052012, "learning_rate": 0.00013092696008596715, "loss": 1.5977, "step": 2039 }, { "epoch": 0.4182041820418204, "grad_norm": 0.49215672965071877, "learning_rate": 0.00013086380437746947, "loss": 1.5728, "step": 2040 }, { "epoch": 0.4184091840918409, "grad_norm": 0.5350264579579491, "learning_rate": 0.00013080063505943666, "loss": 1.6559, "step": 2041 }, { "epoch": 0.4186141861418614, "grad_norm": 0.5426506186064547, "learning_rate": 0.00013073745215972353, "loss": 1.6676, "step": 2042 }, { "epoch": 0.4188191881918819, "grad_norm": 0.5272612696715079, "learning_rate": 0.00013067425570619082, "loss": 1.5666, "step": 2043 }, { "epoch": 0.4190241902419024, "grad_norm": 0.5741411108582464, "learning_rate": 0.00013061104572670537, "loss": 1.6532, "step": 2044 }, { "epoch": 0.4192291922919229, "grad_norm": 0.5280309590888705, "learning_rate": 0.00013054782224913988, "loss": 1.5775, "step": 2045 }, { "epoch": 0.4194341943419434, "grad_norm": 0.5375310097761675, "learning_rate": 0.00013048458530137298, "loss": 1.6103, "step": 2046 }, { "epoch": 0.4196391963919639, "grad_norm": 0.49084963477795623, "learning_rate": 0.00013042133491128935, "loss": 1.6106, "step": 2047 }, { "epoch": 0.4198441984419844, "grad_norm": 0.5639861533825508, "learning_rate": 0.0001303580711067795, "loss": 1.6796, "step": 2048 }, { "epoch": 0.4200492004920049, "grad_norm": 0.4982352636643307, "learning_rate": 0.0001302947939157399, "loss": 1.5828, "step": 2049 }, { "epoch": 0.42025420254202545, "grad_norm": 0.5384516572175626, "learning_rate": 0.00013023150336607297, "loss": 1.5727, "step": 2050 }, { "epoch": 0.42045920459204594, "grad_norm": 0.5117938323017456, "learning_rate": 0.00013016819948568687, "loss": 1.6528, "step": 2051 }, { "epoch": 0.42066420664206644, "grad_norm": 0.5079816934398997, "learning_rate": 0.00013010488230249582, "loss": 1.6354, "step": 2052 }, { "epoch": 0.42086920869208694, "grad_norm": 0.5288042480301147, "learning_rate": 0.00013004155184441978, "loss": 1.6638, "step": 2053 }, { "epoch": 0.42107421074210744, "grad_norm": 0.5075058718298264, "learning_rate": 0.0001299782081393846, "loss": 1.6204, "step": 2054 }, { "epoch": 0.42127921279212793, "grad_norm": 0.5159503492853633, "learning_rate": 0.00012991485121532201, "loss": 1.6047, "step": 2055 }, { "epoch": 0.42148421484214843, "grad_norm": 0.4858756341486414, "learning_rate": 0.00012985148110016947, "loss": 1.6458, "step": 2056 }, { "epoch": 0.4216892168921689, "grad_norm": 0.5130547018084762, "learning_rate": 0.00012978809782187038, "loss": 1.6129, "step": 2057 }, { "epoch": 0.4218942189421894, "grad_norm": 0.5477055075489994, "learning_rate": 0.00012972470140837385, "loss": 1.6163, "step": 2058 }, { "epoch": 0.4220992209922099, "grad_norm": 0.4999949520517846, "learning_rate": 0.00012966129188763485, "loss": 1.623, "step": 2059 }, { "epoch": 0.4223042230422304, "grad_norm": 0.5172887157516003, "learning_rate": 0.00012959786928761407, "loss": 1.6731, "step": 2060 }, { "epoch": 0.4225092250922509, "grad_norm": 0.5066172544344552, "learning_rate": 0.00012953443363627803, "loss": 1.6093, "step": 2061 }, { "epoch": 0.4227142271422714, "grad_norm": 0.5445742369368239, "learning_rate": 0.00012947098496159893, "loss": 1.7007, "step": 2062 }, { "epoch": 0.4229192291922919, "grad_norm": 0.5746502222729577, "learning_rate": 0.00012940752329155473, "loss": 1.6079, "step": 2063 }, { "epoch": 0.4231242312423124, "grad_norm": 0.5676578296876167, "learning_rate": 0.00012934404865412924, "loss": 1.6303, "step": 2064 }, { "epoch": 0.4233292332923329, "grad_norm": 0.524013077402647, "learning_rate": 0.0001292805610773118, "loss": 1.5972, "step": 2065 }, { "epoch": 0.4235342353423534, "grad_norm": 0.5191795081846384, "learning_rate": 0.00012921706058909756, "loss": 1.6343, "step": 2066 }, { "epoch": 0.4237392373923739, "grad_norm": 0.8243378793273545, "learning_rate": 0.00012915354721748738, "loss": 1.6146, "step": 2067 }, { "epoch": 0.4239442394423944, "grad_norm": 0.5216625499768733, "learning_rate": 0.00012909002099048775, "loss": 1.6531, "step": 2068 }, { "epoch": 0.4241492414924149, "grad_norm": 0.534217068140855, "learning_rate": 0.00012902648193611086, "loss": 1.6983, "step": 2069 }, { "epoch": 0.42435424354243545, "grad_norm": 0.5537807518013668, "learning_rate": 0.00012896293008237455, "loss": 1.6889, "step": 2070 }, { "epoch": 0.42455924559245595, "grad_norm": 0.5000376429203801, "learning_rate": 0.00012889936545730225, "loss": 1.6549, "step": 2071 }, { "epoch": 0.42476424764247644, "grad_norm": 0.5222234108418661, "learning_rate": 0.0001288357880889232, "loss": 1.5681, "step": 2072 }, { "epoch": 0.42496924969249694, "grad_norm": 0.5040226674591807, "learning_rate": 0.00012877219800527193, "loss": 1.5993, "step": 2073 }, { "epoch": 0.42517425174251744, "grad_norm": 0.49077622400216847, "learning_rate": 0.00012870859523438893, "loss": 1.5894, "step": 2074 }, { "epoch": 0.42537925379253794, "grad_norm": 0.5056206302040633, "learning_rate": 0.0001286449798043201, "loss": 1.6337, "step": 2075 }, { "epoch": 0.42558425584255843, "grad_norm": 0.5542238499148647, "learning_rate": 0.00012858135174311693, "loss": 1.625, "step": 2076 }, { "epoch": 0.42578925789257893, "grad_norm": 0.5501214964368766, "learning_rate": 0.00012851771107883655, "loss": 1.5964, "step": 2077 }, { "epoch": 0.42599425994259943, "grad_norm": 0.49752788455468033, "learning_rate": 0.00012845405783954152, "loss": 1.71, "step": 2078 }, { "epoch": 0.4261992619926199, "grad_norm": 0.47643060629196343, "learning_rate": 0.00012839039205330007, "loss": 1.6201, "step": 2079 }, { "epoch": 0.4264042640426404, "grad_norm": 0.5343716011444284, "learning_rate": 0.00012832671374818597, "loss": 1.648, "step": 2080 }, { "epoch": 0.4266092660926609, "grad_norm": 0.6646075197067107, "learning_rate": 0.00012826302295227836, "loss": 1.6491, "step": 2081 }, { "epoch": 0.4268142681426814, "grad_norm": 0.5388197627268985, "learning_rate": 0.00012819931969366207, "loss": 1.6477, "step": 2082 }, { "epoch": 0.4270192701927019, "grad_norm": 0.5198449165623379, "learning_rate": 0.0001281356040004273, "loss": 1.581, "step": 2083 }, { "epoch": 0.4272242722427224, "grad_norm": 0.5877476016414378, "learning_rate": 0.00012807187590066979, "loss": 1.6241, "step": 2084 }, { "epoch": 0.4274292742927429, "grad_norm": 0.4992588613805797, "learning_rate": 0.00012800813542249072, "loss": 1.5306, "step": 2085 }, { "epoch": 0.4276342763427634, "grad_norm": 0.540205030159357, "learning_rate": 0.00012794438259399672, "loss": 1.6179, "step": 2086 }, { "epoch": 0.4278392783927839, "grad_norm": 0.5133839360614646, "learning_rate": 0.00012788061744329997, "loss": 1.5674, "step": 2087 }, { "epoch": 0.4280442804428044, "grad_norm": 0.5306368694523946, "learning_rate": 0.00012781683999851795, "loss": 1.686, "step": 2088 }, { "epoch": 0.4282492824928249, "grad_norm": 0.5421592198170199, "learning_rate": 0.0001277530502877736, "loss": 1.6195, "step": 2089 }, { "epoch": 0.42845428454284545, "grad_norm": 0.5069745819988721, "learning_rate": 0.00012768924833919532, "loss": 1.6021, "step": 2090 }, { "epoch": 0.42865928659286595, "grad_norm": 0.5322301428680724, "learning_rate": 0.00012762543418091689, "loss": 1.5605, "step": 2091 }, { "epoch": 0.42886428864288645, "grad_norm": 0.6321245928069295, "learning_rate": 0.00012756160784107738, "loss": 1.6462, "step": 2092 }, { "epoch": 0.42906929069290695, "grad_norm": 0.5389888107131428, "learning_rate": 0.00012749776934782133, "loss": 1.6231, "step": 2093 }, { "epoch": 0.42927429274292744, "grad_norm": 0.5934225226676983, "learning_rate": 0.00012743391872929865, "loss": 1.582, "step": 2094 }, { "epoch": 0.42947929479294794, "grad_norm": 0.5620097205105296, "learning_rate": 0.00012737005601366457, "loss": 1.6466, "step": 2095 }, { "epoch": 0.42968429684296844, "grad_norm": 0.5022648535916745, "learning_rate": 0.00012730618122907959, "loss": 1.6392, "step": 2096 }, { "epoch": 0.42988929889298894, "grad_norm": 0.5248833817638882, "learning_rate": 0.0001272422944037096, "loss": 1.6297, "step": 2097 }, { "epoch": 0.43009430094300943, "grad_norm": 0.5733973485514842, "learning_rate": 0.0001271783955657258, "loss": 1.6626, "step": 2098 }, { "epoch": 0.43029930299302993, "grad_norm": 0.6084702847018025, "learning_rate": 0.0001271144847433047, "loss": 1.6381, "step": 2099 }, { "epoch": 0.43050430504305043, "grad_norm": 0.54498369637951, "learning_rate": 0.00012705056196462801, "loss": 1.6562, "step": 2100 }, { "epoch": 0.4307093070930709, "grad_norm": 0.5633386996255672, "learning_rate": 0.0001269866272578828, "loss": 1.6587, "step": 2101 }, { "epoch": 0.4309143091430914, "grad_norm": 0.5619127811316713, "learning_rate": 0.0001269226806512614, "loss": 1.5643, "step": 2102 }, { "epoch": 0.4311193111931119, "grad_norm": 0.598425179783864, "learning_rate": 0.0001268587221729613, "loss": 1.6259, "step": 2103 }, { "epoch": 0.4313243132431324, "grad_norm": 0.5719151954946781, "learning_rate": 0.00012679475185118535, "loss": 1.6203, "step": 2104 }, { "epoch": 0.4315293152931529, "grad_norm": 0.5356397109583548, "learning_rate": 0.0001267307697141415, "loss": 1.5678, "step": 2105 }, { "epoch": 0.4317343173431734, "grad_norm": 0.6203681803670734, "learning_rate": 0.00012666677579004296, "loss": 1.6861, "step": 2106 }, { "epoch": 0.4319393193931939, "grad_norm": 0.5969382161746908, "learning_rate": 0.0001266027701071082, "loss": 1.6506, "step": 2107 }, { "epoch": 0.4321443214432144, "grad_norm": 0.5877935423111643, "learning_rate": 0.00012653875269356076, "loss": 1.6532, "step": 2108 }, { "epoch": 0.4323493234932349, "grad_norm": 0.5052465647166653, "learning_rate": 0.00012647472357762938, "loss": 1.6828, "step": 2109 }, { "epoch": 0.43255432554325546, "grad_norm": 0.530408932339115, "learning_rate": 0.0001264106827875481, "loss": 1.569, "step": 2110 }, { "epoch": 0.43275932759327596, "grad_norm": 0.6346565978648429, "learning_rate": 0.00012634663035155595, "loss": 1.6071, "step": 2111 }, { "epoch": 0.43296432964329645, "grad_norm": 0.6032169894221104, "learning_rate": 0.00012628256629789713, "loss": 1.6688, "step": 2112 }, { "epoch": 0.43316933169331695, "grad_norm": 0.5281126936510784, "learning_rate": 0.00012621849065482093, "loss": 1.5989, "step": 2113 }, { "epoch": 0.43337433374333745, "grad_norm": 0.5435861810416938, "learning_rate": 0.0001261544034505819, "loss": 1.6458, "step": 2114 }, { "epoch": 0.43357933579335795, "grad_norm": 0.5993840394271078, "learning_rate": 0.00012609030471343952, "loss": 1.6393, "step": 2115 }, { "epoch": 0.43378433784337844, "grad_norm": 0.6159968377473376, "learning_rate": 0.0001260261944716584, "loss": 1.6695, "step": 2116 }, { "epoch": 0.43398933989339894, "grad_norm": 0.5408960905874177, "learning_rate": 0.00012596207275350832, "loss": 1.6045, "step": 2117 }, { "epoch": 0.43419434194341944, "grad_norm": 0.55375972011674, "learning_rate": 0.00012589793958726398, "loss": 1.6004, "step": 2118 }, { "epoch": 0.43439934399343993, "grad_norm": 0.552016902209634, "learning_rate": 0.00012583379500120517, "loss": 1.6282, "step": 2119 }, { "epoch": 0.43460434604346043, "grad_norm": 0.5532532312459124, "learning_rate": 0.00012576963902361684, "loss": 1.6509, "step": 2120 }, { "epoch": 0.43480934809348093, "grad_norm": 0.5572167228417764, "learning_rate": 0.00012570547168278874, "loss": 1.605, "step": 2121 }, { "epoch": 0.4350143501435014, "grad_norm": 0.5864773233007516, "learning_rate": 0.00012564129300701585, "loss": 1.6421, "step": 2122 }, { "epoch": 0.4352193521935219, "grad_norm": 0.5040098130329621, "learning_rate": 0.00012557710302459803, "loss": 1.6982, "step": 2123 }, { "epoch": 0.4354243542435424, "grad_norm": 0.49952204858797405, "learning_rate": 0.00012551290176384005, "loss": 1.6008, "step": 2124 }, { "epoch": 0.4356293562935629, "grad_norm": 0.5224613033959592, "learning_rate": 0.00012544868925305189, "loss": 1.5299, "step": 2125 }, { "epoch": 0.4358343583435834, "grad_norm": 0.5036024977194129, "learning_rate": 0.00012538446552054822, "loss": 1.6216, "step": 2126 }, { "epoch": 0.4360393603936039, "grad_norm": 0.5065015134324423, "learning_rate": 0.0001253202305946489, "loss": 1.6242, "step": 2127 }, { "epoch": 0.4362443624436244, "grad_norm": 0.4865923876353277, "learning_rate": 0.00012525598450367854, "loss": 1.6299, "step": 2128 }, { "epoch": 0.4364493644936449, "grad_norm": 0.5001369984663168, "learning_rate": 0.00012519172727596675, "loss": 1.6086, "step": 2129 }, { "epoch": 0.43665436654366546, "grad_norm": 0.5336689201633212, "learning_rate": 0.0001251274589398481, "loss": 1.5928, "step": 2130 }, { "epoch": 0.43685936859368596, "grad_norm": 0.458484602444854, "learning_rate": 0.00012506317952366196, "loss": 1.567, "step": 2131 }, { "epoch": 0.43706437064370646, "grad_norm": 0.45472408648008505, "learning_rate": 0.0001249988890557526, "loss": 1.6658, "step": 2132 }, { "epoch": 0.43726937269372695, "grad_norm": 0.48451902869427593, "learning_rate": 0.0001249345875644693, "loss": 1.5793, "step": 2133 }, { "epoch": 0.43747437474374745, "grad_norm": 0.54992603870482, "learning_rate": 0.000124870275078166, "loss": 1.661, "step": 2134 }, { "epoch": 0.43767937679376795, "grad_norm": 0.5082912037596098, "learning_rate": 0.00012480595162520162, "loss": 1.6473, "step": 2135 }, { "epoch": 0.43788437884378845, "grad_norm": 0.4724840655210908, "learning_rate": 0.00012474161723393987, "loss": 1.5916, "step": 2136 }, { "epoch": 0.43808938089380894, "grad_norm": 0.5049927808634362, "learning_rate": 0.0001246772719327493, "loss": 1.6583, "step": 2137 }, { "epoch": 0.43829438294382944, "grad_norm": 0.530508083576109, "learning_rate": 0.0001246129157500033, "loss": 1.6635, "step": 2138 }, { "epoch": 0.43849938499384994, "grad_norm": 0.5122346630969103, "learning_rate": 0.00012454854871407994, "loss": 1.6209, "step": 2139 }, { "epoch": 0.43870438704387044, "grad_norm": 0.5120420117474423, "learning_rate": 0.0001244841708533622, "loss": 1.5957, "step": 2140 }, { "epoch": 0.43890938909389093, "grad_norm": 0.4866815909875715, "learning_rate": 0.0001244197821962378, "loss": 1.6452, "step": 2141 }, { "epoch": 0.43911439114391143, "grad_norm": 0.5279550794563505, "learning_rate": 0.0001243553827710992, "loss": 1.5392, "step": 2142 }, { "epoch": 0.43931939319393193, "grad_norm": 0.44556466936063477, "learning_rate": 0.00012429097260634365, "loss": 1.5182, "step": 2143 }, { "epoch": 0.4395243952439524, "grad_norm": 0.5552509582306528, "learning_rate": 0.00012422655173037304, "loss": 1.6877, "step": 2144 }, { "epoch": 0.4397293972939729, "grad_norm": 0.5215355483501207, "learning_rate": 0.00012416212017159412, "loss": 1.6233, "step": 2145 }, { "epoch": 0.4399343993439934, "grad_norm": 0.5189189052206612, "learning_rate": 0.00012409767795841823, "loss": 1.6281, "step": 2146 }, { "epoch": 0.4401394013940139, "grad_norm": 0.4843645387729046, "learning_rate": 0.0001240332251192615, "loss": 1.618, "step": 2147 }, { "epoch": 0.4403444034440344, "grad_norm": 0.48282550360680987, "learning_rate": 0.00012396876168254466, "loss": 1.592, "step": 2148 }, { "epoch": 0.4405494054940549, "grad_norm": 0.6274710133982132, "learning_rate": 0.0001239042876766932, "loss": 1.6735, "step": 2149 }, { "epoch": 0.44075440754407547, "grad_norm": 0.48471373703752735, "learning_rate": 0.00012383980313013715, "loss": 1.5537, "step": 2150 }, { "epoch": 0.44095940959409596, "grad_norm": 0.5384884989516128, "learning_rate": 0.00012377530807131137, "loss": 1.646, "step": 2151 }, { "epoch": 0.44116441164411646, "grad_norm": 0.49710256286569837, "learning_rate": 0.00012371080252865515, "loss": 1.5203, "step": 2152 }, { "epoch": 0.44136941369413696, "grad_norm": 0.5523918815485819, "learning_rate": 0.00012364628653061257, "loss": 1.6173, "step": 2153 }, { "epoch": 0.44157441574415746, "grad_norm": 0.4714398565088359, "learning_rate": 0.00012358176010563224, "loss": 1.5593, "step": 2154 }, { "epoch": 0.44177941779417795, "grad_norm": 0.4778642717551116, "learning_rate": 0.00012351722328216735, "loss": 1.6605, "step": 2155 }, { "epoch": 0.44198441984419845, "grad_norm": 0.5217475080141332, "learning_rate": 0.00012345267608867574, "loss": 1.6409, "step": 2156 }, { "epoch": 0.44218942189421895, "grad_norm": 0.4879055418506378, "learning_rate": 0.0001233881185536198, "loss": 1.6221, "step": 2157 }, { "epoch": 0.44239442394423945, "grad_norm": 0.5236382595855923, "learning_rate": 0.0001233235507054664, "loss": 1.5952, "step": 2158 }, { "epoch": 0.44259942599425994, "grad_norm": 0.5052041265063858, "learning_rate": 0.00012325897257268708, "loss": 1.6354, "step": 2159 }, { "epoch": 0.44280442804428044, "grad_norm": 0.5391553084710383, "learning_rate": 0.0001231943841837579, "loss": 1.6056, "step": 2160 }, { "epoch": 0.44300943009430094, "grad_norm": 0.5317024944837467, "learning_rate": 0.00012312978556715932, "loss": 1.6136, "step": 2161 }, { "epoch": 0.44321443214432144, "grad_norm": 0.45039348648677635, "learning_rate": 0.00012306517675137645, "loss": 1.5624, "step": 2162 }, { "epoch": 0.44341943419434193, "grad_norm": 0.5589837897894556, "learning_rate": 0.00012300055776489884, "loss": 1.6923, "step": 2163 }, { "epoch": 0.44362443624436243, "grad_norm": 0.5731646696151985, "learning_rate": 0.00012293592863622045, "loss": 1.6387, "step": 2164 }, { "epoch": 0.4438294382943829, "grad_norm": 0.5298811885026682, "learning_rate": 0.00012287128939383993, "loss": 1.6285, "step": 2165 }, { "epoch": 0.4440344403444034, "grad_norm": 0.5608041756463177, "learning_rate": 0.00012280664006626013, "loss": 1.6672, "step": 2166 }, { "epoch": 0.4442394423944239, "grad_norm": 0.4611347879059605, "learning_rate": 0.0001227419806819885, "loss": 1.5744, "step": 2167 }, { "epoch": 0.4444444444444444, "grad_norm": 0.5225137696362192, "learning_rate": 0.0001226773112695369, "loss": 1.5742, "step": 2168 }, { "epoch": 0.4446494464944649, "grad_norm": 0.5269321270877484, "learning_rate": 0.0001226126318574216, "loss": 1.6185, "step": 2169 }, { "epoch": 0.44485444854448547, "grad_norm": 0.5102159914914142, "learning_rate": 0.0001225479424741633, "loss": 1.6252, "step": 2170 }, { "epoch": 0.44505945059450597, "grad_norm": 0.5620848269228764, "learning_rate": 0.000122483243148287, "loss": 1.6388, "step": 2171 }, { "epoch": 0.44526445264452646, "grad_norm": 0.5909929809325606, "learning_rate": 0.00012241853390832226, "loss": 1.6388, "step": 2172 }, { "epoch": 0.44546945469454696, "grad_norm": 0.5645828183647269, "learning_rate": 0.0001223538147828029, "loss": 1.658, "step": 2173 }, { "epoch": 0.44567445674456746, "grad_norm": 0.5378105266035673, "learning_rate": 0.00012228908580026702, "loss": 1.6329, "step": 2174 }, { "epoch": 0.44587945879458796, "grad_norm": 0.5018137468578521, "learning_rate": 0.00012222434698925727, "loss": 1.6595, "step": 2175 }, { "epoch": 0.44608446084460845, "grad_norm": 0.48769179572466265, "learning_rate": 0.0001221595983783205, "loss": 1.5996, "step": 2176 }, { "epoch": 0.44628946289462895, "grad_norm": 0.5453521504564052, "learning_rate": 0.0001220948399960078, "loss": 1.6718, "step": 2177 }, { "epoch": 0.44649446494464945, "grad_norm": 0.5356855434269039, "learning_rate": 0.00012203007187087485, "loss": 1.581, "step": 2178 }, { "epoch": 0.44669946699466995, "grad_norm": 0.44856708369186865, "learning_rate": 0.00012196529403148132, "loss": 1.5513, "step": 2179 }, { "epoch": 0.44690446904469044, "grad_norm": 0.5143991891610954, "learning_rate": 0.00012190050650639131, "loss": 1.6605, "step": 2180 }, { "epoch": 0.44710947109471094, "grad_norm": 0.5569965352253903, "learning_rate": 0.00012183570932417323, "loss": 1.6337, "step": 2181 }, { "epoch": 0.44731447314473144, "grad_norm": 0.4846297856392185, "learning_rate": 0.00012177090251339965, "loss": 1.6488, "step": 2182 }, { "epoch": 0.44751947519475194, "grad_norm": 0.5676152994023149, "learning_rate": 0.00012170608610264742, "loss": 1.6705, "step": 2183 }, { "epoch": 0.44772447724477243, "grad_norm": 0.46163168541793304, "learning_rate": 0.00012164126012049766, "loss": 1.6689, "step": 2184 }, { "epoch": 0.44792947929479293, "grad_norm": 0.5139110587076279, "learning_rate": 0.00012157642459553564, "loss": 1.6773, "step": 2185 }, { "epoch": 0.44813448134481343, "grad_norm": 0.46633175126525284, "learning_rate": 0.00012151157955635097, "loss": 1.5892, "step": 2186 }, { "epoch": 0.4483394833948339, "grad_norm": 0.49448660065559547, "learning_rate": 0.00012144672503153726, "loss": 1.5627, "step": 2187 }, { "epoch": 0.4485444854448544, "grad_norm": 0.5250935178167044, "learning_rate": 0.00012138186104969247, "loss": 1.5623, "step": 2188 }, { "epoch": 0.4487494874948749, "grad_norm": 0.5407192805562584, "learning_rate": 0.00012131698763941863, "loss": 1.5759, "step": 2189 }, { "epoch": 0.4489544895448955, "grad_norm": 0.5106637399093619, "learning_rate": 0.00012125210482932203, "loss": 1.6665, "step": 2190 }, { "epoch": 0.44915949159491597, "grad_norm": 0.5293031553006489, "learning_rate": 0.00012118721264801299, "loss": 1.6175, "step": 2191 }, { "epoch": 0.44936449364493647, "grad_norm": 0.5468323908564092, "learning_rate": 0.000121122311124106, "loss": 1.6102, "step": 2192 }, { "epoch": 0.44956949569495697, "grad_norm": 0.523231532303066, "learning_rate": 0.0001210574002862197, "loss": 1.7186, "step": 2193 }, { "epoch": 0.44977449774497746, "grad_norm": 0.5222157286598067, "learning_rate": 0.00012099248016297681, "loss": 1.6269, "step": 2194 }, { "epoch": 0.44997949979499796, "grad_norm": 0.5543478262983981, "learning_rate": 0.00012092755078300422, "loss": 1.6321, "step": 2195 }, { "epoch": 0.45018450184501846, "grad_norm": 0.5250057809043659, "learning_rate": 0.00012086261217493276, "loss": 1.6546, "step": 2196 }, { "epoch": 0.45038950389503896, "grad_norm": 0.4766727878388731, "learning_rate": 0.00012079766436739742, "loss": 1.6267, "step": 2197 }, { "epoch": 0.45059450594505945, "grad_norm": 0.535968447646884, "learning_rate": 0.00012073270738903726, "loss": 1.6451, "step": 2198 }, { "epoch": 0.45079950799507995, "grad_norm": 0.5460785149856947, "learning_rate": 0.00012066774126849529, "loss": 1.7289, "step": 2199 }, { "epoch": 0.45100451004510045, "grad_norm": 0.5448945869127142, "learning_rate": 0.00012060276603441871, "loss": 1.6506, "step": 2200 }, { "epoch": 0.45120951209512095, "grad_norm": 0.49201681757479115, "learning_rate": 0.00012053778171545857, "loss": 1.6035, "step": 2201 }, { "epoch": 0.45141451414514144, "grad_norm": 0.47186569033147796, "learning_rate": 0.00012047278834027005, "loss": 1.4887, "step": 2202 }, { "epoch": 0.45161951619516194, "grad_norm": 0.47916073409169957, "learning_rate": 0.00012040778593751227, "loss": 1.6477, "step": 2203 }, { "epoch": 0.45182451824518244, "grad_norm": 0.4987732650660998, "learning_rate": 0.00012034277453584828, "loss": 1.6708, "step": 2204 }, { "epoch": 0.45202952029520294, "grad_norm": 0.496588246317768, "learning_rate": 0.00012027775416394522, "loss": 1.5524, "step": 2205 }, { "epoch": 0.45223452234522343, "grad_norm": 0.4828001014108145, "learning_rate": 0.0001202127248504741, "loss": 1.5849, "step": 2206 }, { "epoch": 0.45243952439524393, "grad_norm": 0.482606435953857, "learning_rate": 0.00012014768662410985, "loss": 1.595, "step": 2207 }, { "epoch": 0.45264452644526443, "grad_norm": 0.5584448365120266, "learning_rate": 0.00012008263951353143, "loss": 1.6303, "step": 2208 }, { "epoch": 0.4528495284952849, "grad_norm": 0.5141864628725594, "learning_rate": 0.00012001758354742163, "loss": 1.652, "step": 2209 }, { "epoch": 0.4530545305453055, "grad_norm": 0.470178592338048, "learning_rate": 0.00011995251875446718, "loss": 1.5825, "step": 2210 }, { "epoch": 0.453259532595326, "grad_norm": 0.48622644202780685, "learning_rate": 0.0001198874451633587, "loss": 1.6344, "step": 2211 }, { "epoch": 0.4534645346453465, "grad_norm": 0.5137268092341475, "learning_rate": 0.00011982236280279066, "loss": 1.6607, "step": 2212 }, { "epoch": 0.45366953669536697, "grad_norm": 0.508893577800389, "learning_rate": 0.0001197572717014615, "loss": 1.674, "step": 2213 }, { "epoch": 0.45387453874538747, "grad_norm": 0.48372042066740545, "learning_rate": 0.00011969217188807333, "loss": 1.6283, "step": 2214 }, { "epoch": 0.45407954079540797, "grad_norm": 0.4944984056659895, "learning_rate": 0.00011962706339133229, "loss": 1.6082, "step": 2215 }, { "epoch": 0.45428454284542846, "grad_norm": 0.5325089566366165, "learning_rate": 0.00011956194623994827, "loss": 1.569, "step": 2216 }, { "epoch": 0.45448954489544896, "grad_norm": 0.5011500795056895, "learning_rate": 0.00011949682046263491, "loss": 1.6693, "step": 2217 }, { "epoch": 0.45469454694546946, "grad_norm": 0.46754227705442525, "learning_rate": 0.00011943168608810978, "loss": 1.7038, "step": 2218 }, { "epoch": 0.45489954899548996, "grad_norm": 0.5483068720363576, "learning_rate": 0.00011936654314509415, "loss": 1.6316, "step": 2219 }, { "epoch": 0.45510455104551045, "grad_norm": 0.5096240811125679, "learning_rate": 0.00011930139166231308, "loss": 1.5714, "step": 2220 }, { "epoch": 0.45530955309553095, "grad_norm": 0.4813416419550769, "learning_rate": 0.00011923623166849547, "loss": 1.5927, "step": 2221 }, { "epoch": 0.45551455514555145, "grad_norm": 0.5170365330513925, "learning_rate": 0.00011917106319237386, "loss": 1.5518, "step": 2222 }, { "epoch": 0.45571955719557194, "grad_norm": 0.4884764179572433, "learning_rate": 0.0001191058862626846, "loss": 1.5164, "step": 2223 }, { "epoch": 0.45592455924559244, "grad_norm": 0.4954391531247397, "learning_rate": 0.00011904070090816777, "loss": 1.6353, "step": 2224 }, { "epoch": 0.45612956129561294, "grad_norm": 0.4960297156689326, "learning_rate": 0.00011897550715756713, "loss": 1.5118, "step": 2225 }, { "epoch": 0.45633456334563344, "grad_norm": 0.5390779402823423, "learning_rate": 0.0001189103050396302, "loss": 1.6791, "step": 2226 }, { "epoch": 0.45653956539565393, "grad_norm": 0.499269726115277, "learning_rate": 0.0001188450945831081, "loss": 1.5772, "step": 2227 }, { "epoch": 0.45674456744567443, "grad_norm": 0.46350476901104354, "learning_rate": 0.00011877987581675572, "loss": 1.6041, "step": 2228 }, { "epoch": 0.45694956949569493, "grad_norm": 0.47674721816803073, "learning_rate": 0.00011871464876933155, "loss": 1.5654, "step": 2229 }, { "epoch": 0.4571545715457155, "grad_norm": 0.4576385907402217, "learning_rate": 0.00011864941346959775, "loss": 1.642, "step": 2230 }, { "epoch": 0.457359573595736, "grad_norm": 0.4837016287745565, "learning_rate": 0.00011858416994632013, "loss": 1.6331, "step": 2231 }, { "epoch": 0.4575645756457565, "grad_norm": 0.5604741232557158, "learning_rate": 0.0001185189182282681, "loss": 1.699, "step": 2232 }, { "epoch": 0.457769577695777, "grad_norm": 0.4315380507020846, "learning_rate": 0.00011845365834421474, "loss": 1.5785, "step": 2233 }, { "epoch": 0.45797457974579747, "grad_norm": 0.5565812745284081, "learning_rate": 0.0001183883903229367, "loss": 1.6175, "step": 2234 }, { "epoch": 0.45817958179581797, "grad_norm": 0.5204126419888842, "learning_rate": 0.00011832311419321414, "loss": 1.5621, "step": 2235 }, { "epoch": 0.45838458384583847, "grad_norm": 0.43601270242126144, "learning_rate": 0.00011825782998383092, "loss": 1.5952, "step": 2236 }, { "epoch": 0.45858958589585896, "grad_norm": 0.4497791685004938, "learning_rate": 0.00011819253772357442, "loss": 1.6251, "step": 2237 }, { "epoch": 0.45879458794587946, "grad_norm": 0.4717757418463207, "learning_rate": 0.00011812723744123553, "loss": 1.5702, "step": 2238 }, { "epoch": 0.45899958999589996, "grad_norm": 0.49821996077055847, "learning_rate": 0.00011806192916560872, "loss": 1.6545, "step": 2239 }, { "epoch": 0.45920459204592046, "grad_norm": 0.46129632190603165, "learning_rate": 0.00011799661292549195, "loss": 1.5715, "step": 2240 }, { "epoch": 0.45940959409594095, "grad_norm": 0.49406280764032334, "learning_rate": 0.00011793128874968675, "loss": 1.632, "step": 2241 }, { "epoch": 0.45961459614596145, "grad_norm": 0.49000898358974676, "learning_rate": 0.00011786595666699809, "loss": 1.6326, "step": 2242 }, { "epoch": 0.45981959819598195, "grad_norm": 0.4356473279545904, "learning_rate": 0.00011780061670623448, "loss": 1.5789, "step": 2243 }, { "epoch": 0.46002460024600245, "grad_norm": 0.4892664839892892, "learning_rate": 0.00011773526889620783, "loss": 1.6733, "step": 2244 }, { "epoch": 0.46022960229602294, "grad_norm": 0.5556881400551208, "learning_rate": 0.00011766991326573356, "loss": 1.6861, "step": 2245 }, { "epoch": 0.46043460434604344, "grad_norm": 0.4486446257422821, "learning_rate": 0.00011760454984363058, "loss": 1.5268, "step": 2246 }, { "epoch": 0.46063960639606394, "grad_norm": 0.49179939605265005, "learning_rate": 0.0001175391786587211, "loss": 1.6432, "step": 2247 }, { "epoch": 0.46084460844608444, "grad_norm": 0.527555706448139, "learning_rate": 0.00011747379973983095, "loss": 1.5776, "step": 2248 }, { "epoch": 0.46104961049610493, "grad_norm": 0.4944724729281564, "learning_rate": 0.00011740841311578919, "loss": 1.6567, "step": 2249 }, { "epoch": 0.4612546125461255, "grad_norm": 0.5034188954175542, "learning_rate": 0.00011734301881542835, "loss": 1.6355, "step": 2250 }, { "epoch": 0.461459614596146, "grad_norm": 0.4692162254222976, "learning_rate": 0.00011727761686758438, "loss": 1.5915, "step": 2251 }, { "epoch": 0.4616646166461665, "grad_norm": 0.49030093156236876, "learning_rate": 0.00011721220730109654, "loss": 1.5702, "step": 2252 }, { "epoch": 0.461869618696187, "grad_norm": 0.5293979426920176, "learning_rate": 0.00011714679014480751, "loss": 1.6379, "step": 2253 }, { "epoch": 0.4620746207462075, "grad_norm": 0.5064174921529848, "learning_rate": 0.00011708136542756325, "loss": 1.5728, "step": 2254 }, { "epoch": 0.462279622796228, "grad_norm": 0.4937639548400506, "learning_rate": 0.00011701593317821306, "loss": 1.5463, "step": 2255 }, { "epoch": 0.46248462484624847, "grad_norm": 0.4848169917908273, "learning_rate": 0.00011695049342560968, "loss": 1.6266, "step": 2256 }, { "epoch": 0.46268962689626897, "grad_norm": 0.505922621676798, "learning_rate": 0.00011688504619860899, "loss": 1.6066, "step": 2257 }, { "epoch": 0.46289462894628947, "grad_norm": 0.5175374609665985, "learning_rate": 0.00011681959152607025, "loss": 1.6234, "step": 2258 }, { "epoch": 0.46309963099630996, "grad_norm": 0.48630453982276306, "learning_rate": 0.00011675412943685604, "loss": 1.5987, "step": 2259 }, { "epoch": 0.46330463304633046, "grad_norm": 0.5137283850061084, "learning_rate": 0.0001166886599598321, "loss": 1.562, "step": 2260 }, { "epoch": 0.46350963509635096, "grad_norm": 0.4843909314091931, "learning_rate": 0.00011662318312386755, "loss": 1.5956, "step": 2261 }, { "epoch": 0.46371463714637146, "grad_norm": 0.4937067253155184, "learning_rate": 0.00011655769895783469, "loss": 1.6087, "step": 2262 }, { "epoch": 0.46391963919639195, "grad_norm": 0.49294092335364975, "learning_rate": 0.00011649220749060903, "loss": 1.647, "step": 2263 }, { "epoch": 0.46412464124641245, "grad_norm": 0.5069275258145326, "learning_rate": 0.00011642670875106938, "loss": 1.5944, "step": 2264 }, { "epoch": 0.46432964329643295, "grad_norm": 0.5247022335682496, "learning_rate": 0.00011636120276809763, "loss": 1.6094, "step": 2265 }, { "epoch": 0.46453464534645345, "grad_norm": 0.5302073199734748, "learning_rate": 0.00011629568957057903, "loss": 1.6542, "step": 2266 }, { "epoch": 0.46473964739647394, "grad_norm": 0.4614223163381095, "learning_rate": 0.00011623016918740188, "loss": 1.5973, "step": 2267 }, { "epoch": 0.46494464944649444, "grad_norm": 0.5359077012270838, "learning_rate": 0.00011616464164745768, "loss": 1.7003, "step": 2268 }, { "epoch": 0.46514965149651494, "grad_norm": 0.5368926793827554, "learning_rate": 0.00011609910697964114, "loss": 1.6342, "step": 2269 }, { "epoch": 0.4653546535465355, "grad_norm": 0.469705446395763, "learning_rate": 0.00011603356521285005, "loss": 1.6145, "step": 2270 }, { "epoch": 0.465559655596556, "grad_norm": 0.5097166245134442, "learning_rate": 0.00011596801637598531, "loss": 1.6119, "step": 2271 }, { "epoch": 0.4657646576465765, "grad_norm": 0.5139429972702196, "learning_rate": 0.00011590246049795101, "loss": 1.6085, "step": 2272 }, { "epoch": 0.465969659696597, "grad_norm": 0.5458882595022456, "learning_rate": 0.00011583689760765435, "loss": 1.6548, "step": 2273 }, { "epoch": 0.4661746617466175, "grad_norm": 0.49188711206373154, "learning_rate": 0.00011577132773400552, "loss": 1.627, "step": 2274 }, { "epoch": 0.466379663796638, "grad_norm": 0.5369889449614228, "learning_rate": 0.00011570575090591791, "loss": 1.6578, "step": 2275 }, { "epoch": 0.4665846658466585, "grad_norm": 0.5823023557728096, "learning_rate": 0.00011564016715230788, "loss": 1.6762, "step": 2276 }, { "epoch": 0.466789667896679, "grad_norm": 0.4705088760551964, "learning_rate": 0.00011557457650209488, "loss": 1.6055, "step": 2277 }, { "epoch": 0.46699466994669947, "grad_norm": 0.4726782938818058, "learning_rate": 0.00011550897898420148, "loss": 1.5717, "step": 2278 }, { "epoch": 0.46719967199671997, "grad_norm": 0.4821132831803551, "learning_rate": 0.0001154433746275531, "loss": 1.5156, "step": 2279 }, { "epoch": 0.46740467404674046, "grad_norm": 0.4802441590712986, "learning_rate": 0.00011537776346107834, "loss": 1.5545, "step": 2280 }, { "epoch": 0.46760967609676096, "grad_norm": 0.5087575727183979, "learning_rate": 0.00011531214551370877, "loss": 1.6351, "step": 2281 }, { "epoch": 0.46781467814678146, "grad_norm": 0.5143936428207051, "learning_rate": 0.00011524652081437886, "loss": 1.6002, "step": 2282 }, { "epoch": 0.46801968019680196, "grad_norm": 0.48573209051853344, "learning_rate": 0.00011518088939202614, "loss": 1.6696, "step": 2283 }, { "epoch": 0.46822468224682245, "grad_norm": 0.4568152073951472, "learning_rate": 0.00011511525127559109, "loss": 1.5745, "step": 2284 }, { "epoch": 0.46842968429684295, "grad_norm": 0.5141425459674389, "learning_rate": 0.00011504960649401712, "loss": 1.6114, "step": 2285 }, { "epoch": 0.46863468634686345, "grad_norm": 0.5425349486881768, "learning_rate": 0.00011498395507625066, "loss": 1.5927, "step": 2286 }, { "epoch": 0.46883968839688395, "grad_norm": 0.5401999411034183, "learning_rate": 0.00011491829705124093, "loss": 1.657, "step": 2287 }, { "epoch": 0.46904469044690444, "grad_norm": 0.4930794446289379, "learning_rate": 0.00011485263244794016, "loss": 1.5945, "step": 2288 }, { "epoch": 0.46924969249692494, "grad_norm": 0.48302905761849846, "learning_rate": 0.00011478696129530346, "loss": 1.5507, "step": 2289 }, { "epoch": 0.4694546945469455, "grad_norm": 0.5529478748509801, "learning_rate": 0.0001147212836222888, "loss": 1.5967, "step": 2290 }, { "epoch": 0.469659696596966, "grad_norm": 0.45504910634552553, "learning_rate": 0.00011465559945785711, "loss": 1.6189, "step": 2291 }, { "epoch": 0.4698646986469865, "grad_norm": 0.5223227317092374, "learning_rate": 0.00011458990883097205, "loss": 1.6803, "step": 2292 }, { "epoch": 0.470069700697007, "grad_norm": 0.5004932491352585, "learning_rate": 0.00011452421177060022, "loss": 1.6869, "step": 2293 }, { "epoch": 0.4702747027470275, "grad_norm": 0.5147278474125473, "learning_rate": 0.0001144585083057111, "loss": 1.5897, "step": 2294 }, { "epoch": 0.470479704797048, "grad_norm": 0.48215347737227754, "learning_rate": 0.00011439279846527682, "loss": 1.5188, "step": 2295 }, { "epoch": 0.4706847068470685, "grad_norm": 0.5580465090243764, "learning_rate": 0.00011432708227827254, "loss": 1.6698, "step": 2296 }, { "epoch": 0.470889708897089, "grad_norm": 0.5304498002445579, "learning_rate": 0.00011426135977367604, "loss": 1.5778, "step": 2297 }, { "epoch": 0.4710947109471095, "grad_norm": 0.48671076063650104, "learning_rate": 0.00011419563098046799, "loss": 1.5694, "step": 2298 }, { "epoch": 0.47129971299712997, "grad_norm": 0.5422502064405047, "learning_rate": 0.00011412989592763181, "loss": 1.6876, "step": 2299 }, { "epoch": 0.47150471504715047, "grad_norm": 0.534484966022562, "learning_rate": 0.00011406415464415363, "loss": 1.6413, "step": 2300 }, { "epoch": 0.47170971709717097, "grad_norm": 0.4825865458285362, "learning_rate": 0.00011399840715902244, "loss": 1.6293, "step": 2301 }, { "epoch": 0.47191471914719146, "grad_norm": 0.4854239660113782, "learning_rate": 0.00011393265350122981, "loss": 1.5754, "step": 2302 }, { "epoch": 0.47211972119721196, "grad_norm": 0.5029086619176066, "learning_rate": 0.00011386689369977015, "loss": 1.6192, "step": 2303 }, { "epoch": 0.47232472324723246, "grad_norm": 0.5394439778676559, "learning_rate": 0.00011380112778364058, "loss": 1.5888, "step": 2304 }, { "epoch": 0.47252972529725296, "grad_norm": 0.5421504249480066, "learning_rate": 0.00011373535578184082, "loss": 1.5933, "step": 2305 }, { "epoch": 0.47273472734727345, "grad_norm": 0.4329304297479539, "learning_rate": 0.00011366957772337337, "loss": 1.6202, "step": 2306 }, { "epoch": 0.47293972939729395, "grad_norm": 0.43476931795384094, "learning_rate": 0.00011360379363724338, "loss": 1.5888, "step": 2307 }, { "epoch": 0.47314473144731445, "grad_norm": 0.531836936820513, "learning_rate": 0.00011353800355245856, "loss": 1.6634, "step": 2308 }, { "epoch": 0.47334973349733495, "grad_norm": 0.5147397557535468, "learning_rate": 0.00011347220749802945, "loss": 1.6116, "step": 2309 }, { "epoch": 0.4735547355473555, "grad_norm": 0.5518973499148925, "learning_rate": 0.00011340640550296906, "loss": 1.6636, "step": 2310 }, { "epoch": 0.473759737597376, "grad_norm": 0.4909563420940473, "learning_rate": 0.0001133405975962931, "loss": 1.6831, "step": 2311 }, { "epoch": 0.4739647396473965, "grad_norm": 0.5198956369396392, "learning_rate": 0.00011327478380701989, "loss": 1.6843, "step": 2312 }, { "epoch": 0.474169741697417, "grad_norm": 0.49830301694805923, "learning_rate": 0.00011320896416417026, "loss": 1.514, "step": 2313 }, { "epoch": 0.4743747437474375, "grad_norm": 0.5187545398255399, "learning_rate": 0.00011314313869676769, "loss": 1.5757, "step": 2314 }, { "epoch": 0.474579745797458, "grad_norm": 0.5384269747925001, "learning_rate": 0.00011307730743383826, "loss": 1.5965, "step": 2315 }, { "epoch": 0.4747847478474785, "grad_norm": 0.5378151236519214, "learning_rate": 0.00011301147040441055, "loss": 1.6185, "step": 2316 }, { "epoch": 0.474989749897499, "grad_norm": 0.521754909949357, "learning_rate": 0.00011294562763751573, "loss": 1.5829, "step": 2317 }, { "epoch": 0.4751947519475195, "grad_norm": 0.4697886691531133, "learning_rate": 0.0001128797791621874, "loss": 1.6663, "step": 2318 }, { "epoch": 0.47539975399754, "grad_norm": 0.5309026896781432, "learning_rate": 0.00011281392500746177, "loss": 1.6035, "step": 2319 }, { "epoch": 0.4756047560475605, "grad_norm": 0.46459856486107975, "learning_rate": 0.00011274806520237755, "loss": 1.5916, "step": 2320 }, { "epoch": 0.47580975809758097, "grad_norm": 0.48680313444577894, "learning_rate": 0.00011268219977597594, "loss": 1.6259, "step": 2321 }, { "epoch": 0.47601476014760147, "grad_norm": 0.5067966113169905, "learning_rate": 0.00011261632875730052, "loss": 1.6574, "step": 2322 }, { "epoch": 0.47621976219762197, "grad_norm": 0.5131417472436789, "learning_rate": 0.00011255045217539748, "loss": 1.6761, "step": 2323 }, { "epoch": 0.47642476424764246, "grad_norm": 0.4841404237792854, "learning_rate": 0.00011248457005931539, "loss": 1.5836, "step": 2324 }, { "epoch": 0.47662976629766296, "grad_norm": 0.5071375986370167, "learning_rate": 0.00011241868243810525, "loss": 1.6052, "step": 2325 }, { "epoch": 0.47683476834768346, "grad_norm": 0.4528402009254952, "learning_rate": 0.00011235278934082057, "loss": 1.6842, "step": 2326 }, { "epoch": 0.47703977039770395, "grad_norm": 0.49836550778390243, "learning_rate": 0.0001122868907965171, "loss": 1.6185, "step": 2327 }, { "epoch": 0.47724477244772445, "grad_norm": 0.5145929849826683, "learning_rate": 0.0001122209868342532, "loss": 1.6534, "step": 2328 }, { "epoch": 0.47744977449774495, "grad_norm": 0.45886796707133826, "learning_rate": 0.00011215507748308948, "loss": 1.5393, "step": 2329 }, { "epoch": 0.4776547765477655, "grad_norm": 0.4694812409190323, "learning_rate": 0.00011208916277208894, "loss": 1.5387, "step": 2330 }, { "epoch": 0.477859778597786, "grad_norm": 0.4637910427532401, "learning_rate": 0.00011202324273031706, "loss": 1.5485, "step": 2331 }, { "epoch": 0.4780647806478065, "grad_norm": 0.5353326062311138, "learning_rate": 0.0001119573173868415, "loss": 1.6811, "step": 2332 }, { "epoch": 0.478269782697827, "grad_norm": 0.4982408592050999, "learning_rate": 0.00011189138677073236, "loss": 1.6201, "step": 2333 }, { "epoch": 0.4784747847478475, "grad_norm": 0.4953129986805206, "learning_rate": 0.00011182545091106209, "loss": 1.6302, "step": 2334 }, { "epoch": 0.478679786797868, "grad_norm": 0.49332846201190206, "learning_rate": 0.00011175950983690536, "loss": 1.5992, "step": 2335 }, { "epoch": 0.4788847888478885, "grad_norm": 0.45243072914605553, "learning_rate": 0.0001116935635773392, "loss": 1.6021, "step": 2336 }, { "epoch": 0.479089790897909, "grad_norm": 0.48561084414706707, "learning_rate": 0.00011162761216144294, "loss": 1.6843, "step": 2337 }, { "epoch": 0.4792947929479295, "grad_norm": 0.47927300604352957, "learning_rate": 0.00011156165561829805, "loss": 1.6517, "step": 2338 }, { "epoch": 0.47949979499795, "grad_norm": 0.4442454083340828, "learning_rate": 0.00011149569397698853, "loss": 1.5565, "step": 2339 }, { "epoch": 0.4797047970479705, "grad_norm": 0.5381660359712772, "learning_rate": 0.00011142972726660037, "loss": 1.6523, "step": 2340 }, { "epoch": 0.479909799097991, "grad_norm": 0.45053015629064147, "learning_rate": 0.00011136375551622189, "loss": 1.5825, "step": 2341 }, { "epoch": 0.48011480114801147, "grad_norm": 0.46369572366146095, "learning_rate": 0.00011129777875494367, "loss": 1.5612, "step": 2342 }, { "epoch": 0.48031980319803197, "grad_norm": 0.48810575669816464, "learning_rate": 0.0001112317970118584, "loss": 1.6376, "step": 2343 }, { "epoch": 0.48052480524805247, "grad_norm": 0.5254086860127589, "learning_rate": 0.00011116581031606113, "loss": 1.6935, "step": 2344 }, { "epoch": 0.48072980729807296, "grad_norm": 0.5124948665863931, "learning_rate": 0.00011109981869664891, "loss": 1.6198, "step": 2345 }, { "epoch": 0.48093480934809346, "grad_norm": 0.5461771124928989, "learning_rate": 0.00011103382218272107, "loss": 1.6161, "step": 2346 }, { "epoch": 0.48113981139811396, "grad_norm": 0.4863473469789765, "learning_rate": 0.0001109678208033791, "loss": 1.592, "step": 2347 }, { "epoch": 0.48134481344813446, "grad_norm": 0.5046390734295799, "learning_rate": 0.00011090181458772658, "loss": 1.6116, "step": 2348 }, { "epoch": 0.48154981549815495, "grad_norm": 0.4737685706404132, "learning_rate": 0.00011083580356486925, "loss": 1.5607, "step": 2349 }, { "epoch": 0.4817548175481755, "grad_norm": 0.5236025428121257, "learning_rate": 0.00011076978776391498, "loss": 1.6434, "step": 2350 }, { "epoch": 0.481959819598196, "grad_norm": 0.5230584848789296, "learning_rate": 0.00011070376721397373, "loss": 1.6571, "step": 2351 }, { "epoch": 0.4821648216482165, "grad_norm": 0.4918779821776992, "learning_rate": 0.00011063774194415762, "loss": 1.5898, "step": 2352 }, { "epoch": 0.482369823698237, "grad_norm": 0.4748366711117525, "learning_rate": 0.00011057171198358069, "loss": 1.566, "step": 2353 }, { "epoch": 0.4825748257482575, "grad_norm": 0.4954311061768946, "learning_rate": 0.00011050567736135922, "loss": 1.5944, "step": 2354 }, { "epoch": 0.482779827798278, "grad_norm": 0.47698109405814104, "learning_rate": 0.00011043963810661145, "loss": 1.4945, "step": 2355 }, { "epoch": 0.4829848298482985, "grad_norm": 0.4504676414625187, "learning_rate": 0.0001103735942484577, "loss": 1.5752, "step": 2356 }, { "epoch": 0.483189831898319, "grad_norm": 0.48840594738611287, "learning_rate": 0.00011030754581602034, "loss": 1.5283, "step": 2357 }, { "epoch": 0.4833948339483395, "grad_norm": 0.5245347530391755, "learning_rate": 0.0001102414928384237, "loss": 1.6494, "step": 2358 }, { "epoch": 0.48359983599836, "grad_norm": 0.4850473636673112, "learning_rate": 0.0001101754353447941, "loss": 1.6233, "step": 2359 }, { "epoch": 0.4838048380483805, "grad_norm": 0.5221234980748952, "learning_rate": 0.00011010937336425997, "loss": 1.6097, "step": 2360 }, { "epoch": 0.484009840098401, "grad_norm": 0.5243373889101719, "learning_rate": 0.00011004330692595159, "loss": 1.6357, "step": 2361 }, { "epoch": 0.4842148421484215, "grad_norm": 0.4331562470316017, "learning_rate": 0.00010997723605900128, "loss": 1.609, "step": 2362 }, { "epoch": 0.484419844198442, "grad_norm": 0.43944438677907005, "learning_rate": 0.00010991116079254326, "loss": 1.5822, "step": 2363 }, { "epoch": 0.48462484624846247, "grad_norm": 0.4669528875625807, "learning_rate": 0.00010984508115571377, "loss": 1.6129, "step": 2364 }, { "epoch": 0.48482984829848297, "grad_norm": 0.5302574842280688, "learning_rate": 0.0001097789971776509, "loss": 1.5272, "step": 2365 }, { "epoch": 0.48503485034850347, "grad_norm": 0.48915275804868724, "learning_rate": 0.00010971290888749465, "loss": 1.6127, "step": 2366 }, { "epoch": 0.48523985239852396, "grad_norm": 0.46774279092863985, "learning_rate": 0.00010964681631438702, "loss": 1.61, "step": 2367 }, { "epoch": 0.48544485444854446, "grad_norm": 0.48916150402670916, "learning_rate": 0.00010958071948747175, "loss": 1.5986, "step": 2368 }, { "epoch": 0.48564985649856496, "grad_norm": 0.5381627483880119, "learning_rate": 0.00010951461843589464, "loss": 1.6588, "step": 2369 }, { "epoch": 0.4858548585485855, "grad_norm": 0.49255876080831035, "learning_rate": 0.00010944851318880314, "loss": 1.6333, "step": 2370 }, { "epoch": 0.486059860598606, "grad_norm": 0.4530107037969976, "learning_rate": 0.00010938240377534673, "loss": 1.6044, "step": 2371 }, { "epoch": 0.4862648626486265, "grad_norm": 0.4679867863257569, "learning_rate": 0.00010931629022467664, "loss": 1.5914, "step": 2372 }, { "epoch": 0.486469864698647, "grad_norm": 0.4609246427105402, "learning_rate": 0.0001092501725659459, "loss": 1.6302, "step": 2373 }, { "epoch": 0.4866748667486675, "grad_norm": 0.5634552571191087, "learning_rate": 0.00010918405082830947, "loss": 1.6898, "step": 2374 }, { "epoch": 0.486879868798688, "grad_norm": 0.4347474955091194, "learning_rate": 0.00010911792504092398, "loss": 1.6411, "step": 2375 }, { "epoch": 0.4870848708487085, "grad_norm": 0.46431622202420253, "learning_rate": 0.0001090517952329479, "loss": 1.6204, "step": 2376 }, { "epoch": 0.487289872898729, "grad_norm": 0.47692010798204787, "learning_rate": 0.00010898566143354152, "loss": 1.5208, "step": 2377 }, { "epoch": 0.4874948749487495, "grad_norm": 0.5378789929228168, "learning_rate": 0.00010891952367186673, "loss": 1.5894, "step": 2378 }, { "epoch": 0.48769987699877, "grad_norm": 0.5123897483953619, "learning_rate": 0.00010885338197708741, "loss": 1.5772, "step": 2379 }, { "epoch": 0.4879048790487905, "grad_norm": 0.4584599283163656, "learning_rate": 0.00010878723637836896, "loss": 1.5962, "step": 2380 }, { "epoch": 0.488109881098811, "grad_norm": 0.4762694224577129, "learning_rate": 0.00010872108690487859, "loss": 1.6019, "step": 2381 }, { "epoch": 0.4883148831488315, "grad_norm": 0.5050385453733731, "learning_rate": 0.00010865493358578525, "loss": 1.5604, "step": 2382 }, { "epoch": 0.488519885198852, "grad_norm": 0.5524601479762588, "learning_rate": 0.00010858877645025947, "loss": 1.7108, "step": 2383 }, { "epoch": 0.4887248872488725, "grad_norm": 0.46286418508676025, "learning_rate": 0.00010852261552747365, "loss": 1.6257, "step": 2384 }, { "epoch": 0.488929889298893, "grad_norm": 0.5073850683660276, "learning_rate": 0.00010845645084660168, "loss": 1.6136, "step": 2385 }, { "epoch": 0.48913489134891347, "grad_norm": 0.5496010084900519, "learning_rate": 0.00010839028243681913, "loss": 1.6215, "step": 2386 }, { "epoch": 0.48933989339893397, "grad_norm": 0.45297160435671774, "learning_rate": 0.00010832411032730338, "loss": 1.6088, "step": 2387 }, { "epoch": 0.48954489544895446, "grad_norm": 0.49003040788274843, "learning_rate": 0.00010825793454723325, "loss": 1.6281, "step": 2388 }, { "epoch": 0.48974989749897496, "grad_norm": 0.4549031176294867, "learning_rate": 0.00010819175512578926, "loss": 1.5464, "step": 2389 }, { "epoch": 0.4899548995489955, "grad_norm": 0.49100533756003345, "learning_rate": 0.00010812557209215354, "loss": 1.5634, "step": 2390 }, { "epoch": 0.490159901599016, "grad_norm": 0.5769889879117706, "learning_rate": 0.00010805938547550975, "loss": 1.5911, "step": 2391 }, { "epoch": 0.4903649036490365, "grad_norm": 0.4287139312395359, "learning_rate": 0.00010799319530504328, "loss": 1.5459, "step": 2392 }, { "epoch": 0.490569905699057, "grad_norm": 0.43376555126658445, "learning_rate": 0.00010792700160994091, "loss": 1.5647, "step": 2393 }, { "epoch": 0.4907749077490775, "grad_norm": 0.5233643726619, "learning_rate": 0.00010786080441939106, "loss": 1.6711, "step": 2394 }, { "epoch": 0.490979909799098, "grad_norm": 0.5633323902721322, "learning_rate": 0.00010779460376258373, "loss": 1.5535, "step": 2395 }, { "epoch": 0.4911849118491185, "grad_norm": 0.47732599941857085, "learning_rate": 0.00010772839966871033, "loss": 1.5987, "step": 2396 }, { "epoch": 0.491389913899139, "grad_norm": 0.46067596959487644, "learning_rate": 0.00010766219216696389, "loss": 1.5897, "step": 2397 }, { "epoch": 0.4915949159491595, "grad_norm": 0.43869446298812403, "learning_rate": 0.00010759598128653891, "loss": 1.5501, "step": 2398 }, { "epoch": 0.49179991799918, "grad_norm": 0.512174786123945, "learning_rate": 0.00010752976705663141, "loss": 1.6311, "step": 2399 }, { "epoch": 0.4920049200492005, "grad_norm": 0.5075030594159609, "learning_rate": 0.00010746354950643882, "loss": 1.5888, "step": 2400 }, { "epoch": 0.492209922099221, "grad_norm": 0.4652475912328771, "learning_rate": 0.00010739732866516006, "loss": 1.5667, "step": 2401 }, { "epoch": 0.4924149241492415, "grad_norm": 0.5109471502775814, "learning_rate": 0.00010733110456199553, "loss": 1.5664, "step": 2402 }, { "epoch": 0.492619926199262, "grad_norm": 0.41415626725373245, "learning_rate": 0.00010726487722614704, "loss": 1.5234, "step": 2403 }, { "epoch": 0.4928249282492825, "grad_norm": 0.453900556364923, "learning_rate": 0.00010719864668681789, "loss": 1.5408, "step": 2404 }, { "epoch": 0.493029930299303, "grad_norm": 0.5159658103391713, "learning_rate": 0.00010713241297321266, "loss": 1.6307, "step": 2405 }, { "epoch": 0.4932349323493235, "grad_norm": 0.4495477608661638, "learning_rate": 0.00010706617611453744, "loss": 1.6134, "step": 2406 }, { "epoch": 0.49343993439934397, "grad_norm": 0.46861893482379374, "learning_rate": 0.00010699993613999966, "loss": 1.6052, "step": 2407 }, { "epoch": 0.49364493644936447, "grad_norm": 0.4572047369011586, "learning_rate": 0.00010693369307880816, "loss": 1.5282, "step": 2408 }, { "epoch": 0.49384993849938497, "grad_norm": 0.5203231874347147, "learning_rate": 0.00010686744696017314, "loss": 1.7314, "step": 2409 }, { "epoch": 0.4940549405494055, "grad_norm": 0.47779029984765703, "learning_rate": 0.00010680119781330608, "loss": 1.6489, "step": 2410 }, { "epoch": 0.494259942599426, "grad_norm": 0.47885386449454903, "learning_rate": 0.00010673494566741986, "loss": 1.6085, "step": 2411 }, { "epoch": 0.4944649446494465, "grad_norm": 0.5108249036297599, "learning_rate": 0.0001066686905517287, "loss": 1.5901, "step": 2412 }, { "epoch": 0.494669946699467, "grad_norm": 0.5196745731885813, "learning_rate": 0.00010660243249544803, "loss": 1.7071, "step": 2413 }, { "epoch": 0.4948749487494875, "grad_norm": 0.5023310126977671, "learning_rate": 0.00010653617152779469, "loss": 1.5968, "step": 2414 }, { "epoch": 0.495079950799508, "grad_norm": 0.48776548190121755, "learning_rate": 0.00010646990767798673, "loss": 1.6243, "step": 2415 }, { "epoch": 0.4952849528495285, "grad_norm": 0.4604749085603614, "learning_rate": 0.00010640364097524351, "loss": 1.5148, "step": 2416 }, { "epoch": 0.495489954899549, "grad_norm": 0.5058738074819576, "learning_rate": 0.00010633737144878567, "loss": 1.6582, "step": 2417 }, { "epoch": 0.4956949569495695, "grad_norm": 0.4705812627629428, "learning_rate": 0.00010627109912783497, "loss": 1.5764, "step": 2418 }, { "epoch": 0.49589995899959, "grad_norm": 0.4207336641892229, "learning_rate": 0.00010620482404161455, "loss": 1.5807, "step": 2419 }, { "epoch": 0.4961049610496105, "grad_norm": 0.49470056794660433, "learning_rate": 0.00010613854621934876, "loss": 1.6516, "step": 2420 }, { "epoch": 0.496309963099631, "grad_norm": 0.5011553114874971, "learning_rate": 0.00010607226569026296, "loss": 1.5954, "step": 2421 }, { "epoch": 0.4965149651496515, "grad_norm": 0.4715568061798656, "learning_rate": 0.00010600598248358402, "loss": 1.5588, "step": 2422 }, { "epoch": 0.496719967199672, "grad_norm": 0.4465718327712777, "learning_rate": 0.00010593969662853971, "loss": 1.6177, "step": 2423 }, { "epoch": 0.4969249692496925, "grad_norm": 0.46102051514978815, "learning_rate": 0.00010587340815435913, "loss": 1.5527, "step": 2424 }, { "epoch": 0.497129971299713, "grad_norm": 0.4847758311808002, "learning_rate": 0.00010580711709027247, "loss": 1.6046, "step": 2425 }, { "epoch": 0.4973349733497335, "grad_norm": 0.48483871309018783, "learning_rate": 0.00010574082346551106, "loss": 1.5434, "step": 2426 }, { "epoch": 0.497539975399754, "grad_norm": 0.4632410477156264, "learning_rate": 0.00010567452730930743, "loss": 1.5945, "step": 2427 }, { "epoch": 0.4977449774497745, "grad_norm": 0.4528789393023613, "learning_rate": 0.00010560822865089507, "loss": 1.5985, "step": 2428 }, { "epoch": 0.49794997949979497, "grad_norm": 0.4999662475226222, "learning_rate": 0.0001055419275195088, "loss": 1.6006, "step": 2429 }, { "epoch": 0.4981549815498155, "grad_norm": 0.4824404460357333, "learning_rate": 0.00010547562394438432, "loss": 1.6129, "step": 2430 }, { "epoch": 0.498359983599836, "grad_norm": 0.51597552922563, "learning_rate": 0.0001054093179547585, "loss": 1.6651, "step": 2431 }, { "epoch": 0.4985649856498565, "grad_norm": 0.47077000695063503, "learning_rate": 0.00010534300957986934, "loss": 1.5855, "step": 2432 }, { "epoch": 0.498769987699877, "grad_norm": 0.5511054571725165, "learning_rate": 0.00010527669884895573, "loss": 1.6265, "step": 2433 }, { "epoch": 0.4989749897498975, "grad_norm": 0.4873107508785988, "learning_rate": 0.00010521038579125772, "loss": 1.6211, "step": 2434 }, { "epoch": 0.499179991799918, "grad_norm": 0.48425913975104357, "learning_rate": 0.00010514407043601639, "loss": 1.5269, "step": 2435 }, { "epoch": 0.4993849938499385, "grad_norm": 0.4448744832694071, "learning_rate": 0.00010507775281247376, "loss": 1.6123, "step": 2436 }, { "epoch": 0.499589995899959, "grad_norm": 0.5021480905739342, "learning_rate": 0.00010501143294987291, "loss": 1.5658, "step": 2437 }, { "epoch": 0.4997949979499795, "grad_norm": 0.47015251130786984, "learning_rate": 0.00010494511087745787, "loss": 1.5841, "step": 2438 }, { "epoch": 0.5, "grad_norm": 0.4515258663005253, "learning_rate": 0.00010487878662447362, "loss": 1.4943, "step": 2439 }, { "epoch": 0.5002050020500205, "grad_norm": 0.47194872939643995, "learning_rate": 0.00010481246022016621, "loss": 1.574, "step": 2440 }, { "epoch": 0.500410004100041, "grad_norm": 0.5223748220714067, "learning_rate": 0.00010474613169378255, "loss": 1.6495, "step": 2441 }, { "epoch": 0.5006150061500615, "grad_norm": 0.45534207125689324, "learning_rate": 0.0001046798010745705, "loss": 1.5767, "step": 2442 }, { "epoch": 0.500820008200082, "grad_norm": 0.48462075831370743, "learning_rate": 0.00010461346839177886, "loss": 1.6031, "step": 2443 }, { "epoch": 0.5010250102501025, "grad_norm": 0.520479204301412, "learning_rate": 0.0001045471336746573, "loss": 1.61, "step": 2444 }, { "epoch": 0.501230012300123, "grad_norm": 0.5010954605082324, "learning_rate": 0.00010448079695245642, "loss": 1.6324, "step": 2445 }, { "epoch": 0.5014350143501435, "grad_norm": 0.47110560381163424, "learning_rate": 0.00010441445825442772, "loss": 1.5896, "step": 2446 }, { "epoch": 0.501640016400164, "grad_norm": 0.4666942644677452, "learning_rate": 0.00010434811760982354, "loss": 1.6386, "step": 2447 }, { "epoch": 0.5018450184501845, "grad_norm": 0.5054809604278697, "learning_rate": 0.00010428177504789713, "loss": 1.6291, "step": 2448 }, { "epoch": 0.502050020500205, "grad_norm": 0.45789489860046845, "learning_rate": 0.00010421543059790249, "loss": 1.5651, "step": 2449 }, { "epoch": 0.5022550225502255, "grad_norm": 0.5122351289085794, "learning_rate": 0.00010414908428909451, "loss": 1.622, "step": 2450 }, { "epoch": 0.502460024600246, "grad_norm": 0.46720146443111, "learning_rate": 0.00010408273615072893, "loss": 1.6171, "step": 2451 }, { "epoch": 0.5026650266502665, "grad_norm": 0.4929517202147766, "learning_rate": 0.00010401638621206225, "loss": 1.5988, "step": 2452 }, { "epoch": 0.502870028700287, "grad_norm": 0.5219468133294127, "learning_rate": 0.00010395003450235177, "loss": 1.6357, "step": 2453 }, { "epoch": 0.5030750307503075, "grad_norm": 0.5131930453579404, "learning_rate": 0.00010388368105085557, "loss": 1.6377, "step": 2454 }, { "epoch": 0.503280032800328, "grad_norm": 0.48978927651683835, "learning_rate": 0.00010381732588683253, "loss": 1.559, "step": 2455 }, { "epoch": 0.5034850348503485, "grad_norm": 0.464008509874065, "learning_rate": 0.00010375096903954224, "loss": 1.4978, "step": 2456 }, { "epoch": 0.503690036900369, "grad_norm": 0.47675478064905874, "learning_rate": 0.0001036846105382451, "loss": 1.5878, "step": 2457 }, { "epoch": 0.5038950389503895, "grad_norm": 0.4882697008509031, "learning_rate": 0.00010361825041220212, "loss": 1.5783, "step": 2458 }, { "epoch": 0.5041000410004101, "grad_norm": 0.4925912638521127, "learning_rate": 0.00010355188869067515, "loss": 1.6102, "step": 2459 }, { "epoch": 0.5043050430504306, "grad_norm": 0.5024673878362161, "learning_rate": 0.00010348552540292671, "loss": 1.6067, "step": 2460 }, { "epoch": 0.504510045100451, "grad_norm": 0.5238039575010716, "learning_rate": 0.0001034191605782199, "loss": 1.632, "step": 2461 }, { "epoch": 0.5047150471504716, "grad_norm": 0.5558373580576796, "learning_rate": 0.00010335279424581871, "loss": 1.4658, "step": 2462 }, { "epoch": 0.504920049200492, "grad_norm": 0.5475872001937412, "learning_rate": 0.00010328642643498762, "loss": 1.5872, "step": 2463 }, { "epoch": 0.5051250512505125, "grad_norm": 0.5025179634215936, "learning_rate": 0.00010322005717499181, "loss": 1.5393, "step": 2464 }, { "epoch": 0.505330053300533, "grad_norm": 0.5029049452595459, "learning_rate": 0.00010315368649509716, "loss": 1.5776, "step": 2465 }, { "epoch": 0.5055350553505535, "grad_norm": 0.48131739466367407, "learning_rate": 0.00010308731442457005, "loss": 1.553, "step": 2466 }, { "epoch": 0.505740057400574, "grad_norm": 0.5111585139078691, "learning_rate": 0.00010302094099267759, "loss": 1.6123, "step": 2467 }, { "epoch": 0.5059450594505945, "grad_norm": 0.5173421848686773, "learning_rate": 0.00010295456622868745, "loss": 1.6605, "step": 2468 }, { "epoch": 0.506150061500615, "grad_norm": 0.5308233016317108, "learning_rate": 0.00010288819016186781, "loss": 1.6493, "step": 2469 }, { "epoch": 0.5063550635506355, "grad_norm": 0.4715321020694935, "learning_rate": 0.00010282181282148764, "loss": 1.6209, "step": 2470 }, { "epoch": 0.506560065600656, "grad_norm": 0.5830310639222391, "learning_rate": 0.00010275543423681621, "loss": 1.5886, "step": 2471 }, { "epoch": 0.5067650676506765, "grad_norm": 0.503817186060206, "learning_rate": 0.00010268905443712352, "loss": 1.6209, "step": 2472 }, { "epoch": 0.506970069700697, "grad_norm": 0.46038191349913693, "learning_rate": 0.00010262267345168002, "loss": 1.5915, "step": 2473 }, { "epoch": 0.5071750717507175, "grad_norm": 0.4758399941440051, "learning_rate": 0.0001025562913097567, "loss": 1.6005, "step": 2474 }, { "epoch": 0.507380073800738, "grad_norm": 0.4290919832977104, "learning_rate": 0.0001024899080406251, "loss": 1.6065, "step": 2475 }, { "epoch": 0.5075850758507585, "grad_norm": 0.4875553974177856, "learning_rate": 0.00010242352367355721, "loss": 1.6644, "step": 2476 }, { "epoch": 0.507790077900779, "grad_norm": 0.5208232481313042, "learning_rate": 0.0001023571382378255, "loss": 1.5928, "step": 2477 }, { "epoch": 0.5079950799507995, "grad_norm": 0.4855824646514368, "learning_rate": 0.00010229075176270298, "loss": 1.5313, "step": 2478 }, { "epoch": 0.50820008200082, "grad_norm": 0.5354337933570282, "learning_rate": 0.000102224364277463, "loss": 1.5763, "step": 2479 }, { "epoch": 0.5084050840508405, "grad_norm": 0.5246556148006474, "learning_rate": 0.00010215797581137947, "loss": 1.626, "step": 2480 }, { "epoch": 0.508610086100861, "grad_norm": 0.5248200890079275, "learning_rate": 0.00010209158639372669, "loss": 1.6198, "step": 2481 }, { "epoch": 0.5088150881508815, "grad_norm": 0.512113162776252, "learning_rate": 0.00010202519605377933, "loss": 1.6075, "step": 2482 }, { "epoch": 0.509020090200902, "grad_norm": 0.47168642330826854, "learning_rate": 0.00010195880482081259, "loss": 1.5638, "step": 2483 }, { "epoch": 0.5092250922509225, "grad_norm": 0.5586806365233687, "learning_rate": 0.0001018924127241019, "loss": 1.6259, "step": 2484 }, { "epoch": 0.509430094300943, "grad_norm": 0.46263332699102744, "learning_rate": 0.00010182601979292324, "loss": 1.5757, "step": 2485 }, { "epoch": 0.5096350963509635, "grad_norm": 0.5169451668995334, "learning_rate": 0.00010175962605655278, "loss": 1.6019, "step": 2486 }, { "epoch": 0.509840098400984, "grad_norm": 0.5106679612665614, "learning_rate": 0.00010169323154426727, "loss": 1.5749, "step": 2487 }, { "epoch": 0.5100451004510045, "grad_norm": 0.47639820709974806, "learning_rate": 0.00010162683628534353, "loss": 1.5993, "step": 2488 }, { "epoch": 0.510250102501025, "grad_norm": 0.4797874742922257, "learning_rate": 0.00010156044030905893, "loss": 1.5926, "step": 2489 }, { "epoch": 0.5104551045510455, "grad_norm": 0.5110094133179703, "learning_rate": 0.00010149404364469108, "loss": 1.6174, "step": 2490 }, { "epoch": 0.510660106601066, "grad_norm": 0.45701149123203805, "learning_rate": 0.00010142764632151791, "loss": 1.5263, "step": 2491 }, { "epoch": 0.5108651086510865, "grad_norm": 0.4606429909667692, "learning_rate": 0.00010136124836881756, "loss": 1.6431, "step": 2492 }, { "epoch": 0.511070110701107, "grad_norm": 0.5528467574561731, "learning_rate": 0.00010129484981586852, "loss": 1.6401, "step": 2493 }, { "epoch": 0.5112751127511275, "grad_norm": 0.4853128402441001, "learning_rate": 0.00010122845069194957, "loss": 1.6239, "step": 2494 }, { "epoch": 0.511480114801148, "grad_norm": 0.5058352624526249, "learning_rate": 0.00010116205102633973, "loss": 1.5764, "step": 2495 }, { "epoch": 0.5116851168511685, "grad_norm": 0.5215394579725485, "learning_rate": 0.00010109565084831816, "loss": 1.5929, "step": 2496 }, { "epoch": 0.511890118901189, "grad_norm": 0.4582735441208923, "learning_rate": 0.00010102925018716436, "loss": 1.5617, "step": 2497 }, { "epoch": 0.5120951209512095, "grad_norm": 0.5491773649701894, "learning_rate": 0.000100962849072158, "loss": 1.6699, "step": 2498 }, { "epoch": 0.5123001230012301, "grad_norm": 0.4811743553150433, "learning_rate": 0.00010089644753257897, "loss": 1.5442, "step": 2499 }, { "epoch": 0.5125051250512506, "grad_norm": 0.49544065018608047, "learning_rate": 0.0001008300455977073, "loss": 1.6331, "step": 2500 }, { "epoch": 0.5127101271012711, "grad_norm": 0.4564115367370197, "learning_rate": 0.00010076364329682327, "loss": 1.5818, "step": 2501 }, { "epoch": 0.5129151291512916, "grad_norm": 0.49393267098549415, "learning_rate": 0.0001006972406592072, "loss": 1.6238, "step": 2502 }, { "epoch": 0.5131201312013121, "grad_norm": 0.4919227152974293, "learning_rate": 0.00010063083771413975, "loss": 1.6198, "step": 2503 }, { "epoch": 0.5133251332513326, "grad_norm": 0.4983782900020245, "learning_rate": 0.00010056443449090148, "loss": 1.6591, "step": 2504 }, { "epoch": 0.513530135301353, "grad_norm": 0.4379587385398632, "learning_rate": 0.00010049803101877328, "loss": 1.5746, "step": 2505 }, { "epoch": 0.5137351373513735, "grad_norm": 0.459744160917418, "learning_rate": 0.00010043162732703601, "loss": 1.6218, "step": 2506 }, { "epoch": 0.513940139401394, "grad_norm": 0.5324166525407472, "learning_rate": 0.00010036522344497073, "loss": 1.6129, "step": 2507 }, { "epoch": 0.5141451414514145, "grad_norm": 0.4835041887056759, "learning_rate": 0.0001002988194018585, "loss": 1.5706, "step": 2508 }, { "epoch": 0.514350143501435, "grad_norm": 0.4737438506372438, "learning_rate": 0.00010023241522698048, "loss": 1.5815, "step": 2509 }, { "epoch": 0.5145551455514555, "grad_norm": 0.45369081143961243, "learning_rate": 0.00010016601094961792, "loss": 1.5817, "step": 2510 }, { "epoch": 0.514760147601476, "grad_norm": 0.431106950670304, "learning_rate": 0.00010009960659905211, "loss": 1.5066, "step": 2511 }, { "epoch": 0.5149651496514965, "grad_norm": 0.4955192895411924, "learning_rate": 0.00010003320220456425, "loss": 1.6076, "step": 2512 }, { "epoch": 0.515170151701517, "grad_norm": 0.540572428181615, "learning_rate": 9.996679779543578e-05, "loss": 1.5916, "step": 2513 }, { "epoch": 0.5153751537515375, "grad_norm": 0.47234105520451763, "learning_rate": 9.990039340094793e-05, "loss": 1.558, "step": 2514 }, { "epoch": 0.515580155801558, "grad_norm": 0.47920848127457555, "learning_rate": 9.983398905038211e-05, "loss": 1.6319, "step": 2515 }, { "epoch": 0.5157851578515785, "grad_norm": 0.49213997433948753, "learning_rate": 9.976758477301951e-05, "loss": 1.6662, "step": 2516 }, { "epoch": 0.515990159901599, "grad_norm": 0.45054389918426824, "learning_rate": 9.97011805981415e-05, "loss": 1.5518, "step": 2517 }, { "epoch": 0.5161951619516195, "grad_norm": 0.5029764414406267, "learning_rate": 9.96347765550293e-05, "loss": 1.6486, "step": 2518 }, { "epoch": 0.51640016400164, "grad_norm": 0.5052305700272539, "learning_rate": 9.9568372672964e-05, "loss": 1.5791, "step": 2519 }, { "epoch": 0.5166051660516605, "grad_norm": 0.5420813213312199, "learning_rate": 9.950196898122677e-05, "loss": 1.5652, "step": 2520 }, { "epoch": 0.516810168101681, "grad_norm": 0.4344333235712946, "learning_rate": 9.943556550909853e-05, "loss": 1.5429, "step": 2521 }, { "epoch": 0.5170151701517015, "grad_norm": 0.4864341815769745, "learning_rate": 9.936916228586028e-05, "loss": 1.6041, "step": 2522 }, { "epoch": 0.517220172201722, "grad_norm": 0.5164224728911496, "learning_rate": 9.93027593407928e-05, "loss": 1.656, "step": 2523 }, { "epoch": 0.5174251742517425, "grad_norm": 0.48558373722109194, "learning_rate": 9.923635670317677e-05, "loss": 1.5759, "step": 2524 }, { "epoch": 0.517630176301763, "grad_norm": 0.48590969028469266, "learning_rate": 9.916995440229274e-05, "loss": 1.5563, "step": 2525 }, { "epoch": 0.5178351783517835, "grad_norm": 0.4967037758031681, "learning_rate": 9.910355246742104e-05, "loss": 1.6096, "step": 2526 }, { "epoch": 0.518040180401804, "grad_norm": 0.49675997747506484, "learning_rate": 9.903715092784201e-05, "loss": 1.5492, "step": 2527 }, { "epoch": 0.5182451824518245, "grad_norm": 0.45251477918910415, "learning_rate": 9.897074981283566e-05, "loss": 1.5897, "step": 2528 }, { "epoch": 0.518450184501845, "grad_norm": 0.5039776535215352, "learning_rate": 9.890434915168186e-05, "loss": 1.6139, "step": 2529 }, { "epoch": 0.5186551865518655, "grad_norm": 0.5034529955913656, "learning_rate": 9.883794897366032e-05, "loss": 1.5903, "step": 2530 }, { "epoch": 0.518860188601886, "grad_norm": 0.5014220844700772, "learning_rate": 9.877154930805044e-05, "loss": 1.6361, "step": 2531 }, { "epoch": 0.5190651906519065, "grad_norm": 0.4733108419139549, "learning_rate": 9.870515018413147e-05, "loss": 1.6271, "step": 2532 }, { "epoch": 0.519270192701927, "grad_norm": 0.47555218542064365, "learning_rate": 9.863875163118246e-05, "loss": 1.5793, "step": 2533 }, { "epoch": 0.5194751947519475, "grad_norm": 0.512479108337581, "learning_rate": 9.857235367848212e-05, "loss": 1.5722, "step": 2534 }, { "epoch": 0.519680196801968, "grad_norm": 0.4411341485803346, "learning_rate": 9.850595635530894e-05, "loss": 1.5596, "step": 2535 }, { "epoch": 0.5198851988519885, "grad_norm": 0.4894139127533064, "learning_rate": 9.84395596909411e-05, "loss": 1.5474, "step": 2536 }, { "epoch": 0.520090200902009, "grad_norm": 0.5169028198689807, "learning_rate": 9.837316371465647e-05, "loss": 1.6659, "step": 2537 }, { "epoch": 0.5202952029520295, "grad_norm": 0.4271890419775494, "learning_rate": 9.830676845573277e-05, "loss": 1.5635, "step": 2538 }, { "epoch": 0.5205002050020501, "grad_norm": 0.49725971368584343, "learning_rate": 9.824037394344723e-05, "loss": 1.5679, "step": 2539 }, { "epoch": 0.5207052070520706, "grad_norm": 0.49802112891904055, "learning_rate": 9.81739802070768e-05, "loss": 1.5815, "step": 2540 }, { "epoch": 0.5209102091020911, "grad_norm": 0.48247149935574224, "learning_rate": 9.810758727589813e-05, "loss": 1.6449, "step": 2541 }, { "epoch": 0.5211152111521116, "grad_norm": 0.42232875110504203, "learning_rate": 9.80411951791874e-05, "loss": 1.5436, "step": 2542 }, { "epoch": 0.5213202132021321, "grad_norm": 0.4608539619915591, "learning_rate": 9.797480394622067e-05, "loss": 1.5227, "step": 2543 }, { "epoch": 0.5215252152521526, "grad_norm": 0.46229272710756314, "learning_rate": 9.790841360627335e-05, "loss": 1.6085, "step": 2544 }, { "epoch": 0.5217302173021731, "grad_norm": 0.5008077375281823, "learning_rate": 9.784202418862055e-05, "loss": 1.5789, "step": 2545 }, { "epoch": 0.5219352193521936, "grad_norm": 0.4803787861926077, "learning_rate": 9.777563572253704e-05, "loss": 1.6053, "step": 2546 }, { "epoch": 0.522140221402214, "grad_norm": 0.46517282070826577, "learning_rate": 9.770924823729707e-05, "loss": 1.5822, "step": 2547 }, { "epoch": 0.5223452234522346, "grad_norm": 0.5128084079117348, "learning_rate": 9.76428617621745e-05, "loss": 1.614, "step": 2548 }, { "epoch": 0.522550225502255, "grad_norm": 0.4634951299647063, "learning_rate": 9.757647632644281e-05, "loss": 1.6387, "step": 2549 }, { "epoch": 0.5227552275522755, "grad_norm": 0.4738270248530166, "learning_rate": 9.751009195937492e-05, "loss": 1.571, "step": 2550 }, { "epoch": 0.522960229602296, "grad_norm": 0.500545655030884, "learning_rate": 9.744370869024333e-05, "loss": 1.6107, "step": 2551 }, { "epoch": 0.5231652316523165, "grad_norm": 0.5018393676972772, "learning_rate": 9.737732654832001e-05, "loss": 1.6351, "step": 2552 }, { "epoch": 0.523370233702337, "grad_norm": 0.48920127556465665, "learning_rate": 9.73109455628765e-05, "loss": 1.5841, "step": 2553 }, { "epoch": 0.5235752357523575, "grad_norm": 0.4893372441157476, "learning_rate": 9.724456576318381e-05, "loss": 1.6178, "step": 2554 }, { "epoch": 0.523780237802378, "grad_norm": 0.5170917820909494, "learning_rate": 9.717818717851239e-05, "loss": 1.619, "step": 2555 }, { "epoch": 0.5239852398523985, "grad_norm": 0.51058600115176, "learning_rate": 9.711180983813221e-05, "loss": 1.6348, "step": 2556 }, { "epoch": 0.524190241902419, "grad_norm": 0.49067760840444696, "learning_rate": 9.70454337713126e-05, "loss": 1.5937, "step": 2557 }, { "epoch": 0.5243952439524395, "grad_norm": 0.5159856838872472, "learning_rate": 9.697905900732242e-05, "loss": 1.6992, "step": 2558 }, { "epoch": 0.52460024600246, "grad_norm": 0.4923969095974549, "learning_rate": 9.691268557542997e-05, "loss": 1.6061, "step": 2559 }, { "epoch": 0.5248052480524805, "grad_norm": 0.45800071463730624, "learning_rate": 9.684631350490287e-05, "loss": 1.536, "step": 2560 }, { "epoch": 0.525010250102501, "grad_norm": 0.5360345920950236, "learning_rate": 9.67799428250082e-05, "loss": 1.6033, "step": 2561 }, { "epoch": 0.5252152521525215, "grad_norm": 0.5180160431365916, "learning_rate": 9.67135735650124e-05, "loss": 1.6462, "step": 2562 }, { "epoch": 0.525420254202542, "grad_norm": 0.4812536913627767, "learning_rate": 9.664720575418131e-05, "loss": 1.6137, "step": 2563 }, { "epoch": 0.5256252562525625, "grad_norm": 0.4809066363559525, "learning_rate": 9.658083942178009e-05, "loss": 1.5367, "step": 2564 }, { "epoch": 0.525830258302583, "grad_norm": 0.48273280601119806, "learning_rate": 9.651447459707333e-05, "loss": 1.5507, "step": 2565 }, { "epoch": 0.5260352603526035, "grad_norm": 0.45050581541491375, "learning_rate": 9.644811130932487e-05, "loss": 1.4679, "step": 2566 }, { "epoch": 0.526240262402624, "grad_norm": 0.500768133112126, "learning_rate": 9.63817495877979e-05, "loss": 1.5782, "step": 2567 }, { "epoch": 0.5264452644526445, "grad_norm": 0.5284710322872105, "learning_rate": 9.631538946175496e-05, "loss": 1.6695, "step": 2568 }, { "epoch": 0.526650266502665, "grad_norm": 0.5109318990102362, "learning_rate": 9.624903096045777e-05, "loss": 1.564, "step": 2569 }, { "epoch": 0.5268552685526855, "grad_norm": 0.48838063559915174, "learning_rate": 9.618267411316748e-05, "loss": 1.6029, "step": 2570 }, { "epoch": 0.527060270602706, "grad_norm": 0.5276404347631407, "learning_rate": 9.611631894914445e-05, "loss": 1.581, "step": 2571 }, { "epoch": 0.5272652726527265, "grad_norm": 0.5322367855685195, "learning_rate": 9.604996549764825e-05, "loss": 1.6502, "step": 2572 }, { "epoch": 0.527470274702747, "grad_norm": 0.4821398347525406, "learning_rate": 9.598361378793779e-05, "loss": 1.5425, "step": 2573 }, { "epoch": 0.5276752767527675, "grad_norm": 0.5156658004918746, "learning_rate": 9.59172638492711e-05, "loss": 1.6408, "step": 2574 }, { "epoch": 0.527880278802788, "grad_norm": 0.5282466669936305, "learning_rate": 9.585091571090548e-05, "loss": 1.6889, "step": 2575 }, { "epoch": 0.5280852808528085, "grad_norm": 0.4733323569592494, "learning_rate": 9.578456940209754e-05, "loss": 1.6095, "step": 2576 }, { "epoch": 0.528290282902829, "grad_norm": 0.4811658428753099, "learning_rate": 9.571822495210289e-05, "loss": 1.6596, "step": 2577 }, { "epoch": 0.5284952849528495, "grad_norm": 0.5167827792861637, "learning_rate": 9.565188239017647e-05, "loss": 1.6315, "step": 2578 }, { "epoch": 0.5287002870028701, "grad_norm": 0.5012575618177271, "learning_rate": 9.55855417455723e-05, "loss": 1.5874, "step": 2579 }, { "epoch": 0.5289052890528906, "grad_norm": 0.5413913504440435, "learning_rate": 9.551920304754359e-05, "loss": 1.6183, "step": 2580 }, { "epoch": 0.5291102911029111, "grad_norm": 0.46071158692215214, "learning_rate": 9.545286632534273e-05, "loss": 1.541, "step": 2581 }, { "epoch": 0.5293152931529316, "grad_norm": 0.4477291072986434, "learning_rate": 9.538653160822117e-05, "loss": 1.5624, "step": 2582 }, { "epoch": 0.5295202952029521, "grad_norm": 0.5111728290860247, "learning_rate": 9.532019892542954e-05, "loss": 1.5701, "step": 2583 }, { "epoch": 0.5297252972529726, "grad_norm": 0.44421690248757384, "learning_rate": 9.525386830621747e-05, "loss": 1.5369, "step": 2584 }, { "epoch": 0.5299302993029931, "grad_norm": 0.4383958783572774, "learning_rate": 9.518753977983378e-05, "loss": 1.5485, "step": 2585 }, { "epoch": 0.5301353013530136, "grad_norm": 0.472800731554737, "learning_rate": 9.51212133755264e-05, "loss": 1.5338, "step": 2586 }, { "epoch": 0.5303403034030341, "grad_norm": 0.5423207566409595, "learning_rate": 9.505488912254217e-05, "loss": 1.6267, "step": 2587 }, { "epoch": 0.5305453054530546, "grad_norm": 0.45905269694253686, "learning_rate": 9.498856705012713e-05, "loss": 1.5588, "step": 2588 }, { "epoch": 0.5307503075030751, "grad_norm": 0.4937161742446509, "learning_rate": 9.492224718752628e-05, "loss": 1.5922, "step": 2589 }, { "epoch": 0.5309553095530956, "grad_norm": 0.4766148408996389, "learning_rate": 9.485592956398363e-05, "loss": 1.6414, "step": 2590 }, { "epoch": 0.531160311603116, "grad_norm": 0.49227759639704805, "learning_rate": 9.478961420874227e-05, "loss": 1.5908, "step": 2591 }, { "epoch": 0.5313653136531366, "grad_norm": 0.4478427512634655, "learning_rate": 9.472330115104428e-05, "loss": 1.5812, "step": 2592 }, { "epoch": 0.531570315703157, "grad_norm": 0.4427938910898017, "learning_rate": 9.465699042013068e-05, "loss": 1.5835, "step": 2593 }, { "epoch": 0.5317753177531775, "grad_norm": 0.4865116701435559, "learning_rate": 9.45906820452415e-05, "loss": 1.6174, "step": 2594 }, { "epoch": 0.531980319803198, "grad_norm": 0.4578424745835675, "learning_rate": 9.452437605561572e-05, "loss": 1.5859, "step": 2595 }, { "epoch": 0.5321853218532185, "grad_norm": 0.46196737986894526, "learning_rate": 9.445807248049121e-05, "loss": 1.5602, "step": 2596 }, { "epoch": 0.532390323903239, "grad_norm": 0.4447995981027458, "learning_rate": 9.439177134910493e-05, "loss": 1.557, "step": 2597 }, { "epoch": 0.5325953259532595, "grad_norm": 0.44419772585666994, "learning_rate": 9.432547269069261e-05, "loss": 1.6468, "step": 2598 }, { "epoch": 0.53280032800328, "grad_norm": 0.507291563731087, "learning_rate": 9.425917653448897e-05, "loss": 1.5493, "step": 2599 }, { "epoch": 0.5330053300533005, "grad_norm": 0.452533551154994, "learning_rate": 9.419288290972757e-05, "loss": 1.5742, "step": 2600 }, { "epoch": 0.533210332103321, "grad_norm": 0.43701229369428923, "learning_rate": 9.412659184564088e-05, "loss": 1.6192, "step": 2601 }, { "epoch": 0.5334153341533415, "grad_norm": 0.48292773924668453, "learning_rate": 9.40603033714603e-05, "loss": 1.6176, "step": 2602 }, { "epoch": 0.533620336203362, "grad_norm": 0.467810898431718, "learning_rate": 9.3994017516416e-05, "loss": 1.5941, "step": 2603 }, { "epoch": 0.5338253382533825, "grad_norm": 0.4384255556257593, "learning_rate": 9.392773430973705e-05, "loss": 1.5881, "step": 2604 }, { "epoch": 0.534030340303403, "grad_norm": 0.45708780221103745, "learning_rate": 9.38614537806513e-05, "loss": 1.5534, "step": 2605 }, { "epoch": 0.5342353423534235, "grad_norm": 0.41758424556981916, "learning_rate": 9.379517595838548e-05, "loss": 1.5603, "step": 2606 }, { "epoch": 0.534440344403444, "grad_norm": 0.48014776181606317, "learning_rate": 9.372890087216505e-05, "loss": 1.5584, "step": 2607 }, { "epoch": 0.5346453464534645, "grad_norm": 0.435177373212569, "learning_rate": 9.366262855121436e-05, "loss": 1.5959, "step": 2608 }, { "epoch": 0.534850348503485, "grad_norm": 0.46847034893299017, "learning_rate": 9.35963590247565e-05, "loss": 1.5881, "step": 2609 }, { "epoch": 0.5350553505535055, "grad_norm": 0.4550717051165404, "learning_rate": 9.353009232201328e-05, "loss": 1.5379, "step": 2610 }, { "epoch": 0.535260352603526, "grad_norm": 0.4200978439413184, "learning_rate": 9.346382847220534e-05, "loss": 1.5902, "step": 2611 }, { "epoch": 0.5354653546535465, "grad_norm": 0.48924871328365954, "learning_rate": 9.339756750455199e-05, "loss": 1.5238, "step": 2612 }, { "epoch": 0.535670356703567, "grad_norm": 0.46890992504076023, "learning_rate": 9.333130944827132e-05, "loss": 1.5627, "step": 2613 }, { "epoch": 0.5358753587535875, "grad_norm": 0.5351852825224304, "learning_rate": 9.326505433258015e-05, "loss": 1.6476, "step": 2614 }, { "epoch": 0.536080360803608, "grad_norm": 0.5287386691839768, "learning_rate": 9.319880218669394e-05, "loss": 1.6258, "step": 2615 }, { "epoch": 0.5362853628536285, "grad_norm": 0.45695847329910527, "learning_rate": 9.31325530398269e-05, "loss": 1.711, "step": 2616 }, { "epoch": 0.536490364903649, "grad_norm": 0.4694246732168661, "learning_rate": 9.306630692119182e-05, "loss": 1.4855, "step": 2617 }, { "epoch": 0.5366953669536695, "grad_norm": 0.47905575407870277, "learning_rate": 9.300006386000033e-05, "loss": 1.5275, "step": 2618 }, { "epoch": 0.5369003690036901, "grad_norm": 0.44527898617751693, "learning_rate": 9.293382388546259e-05, "loss": 1.5784, "step": 2619 }, { "epoch": 0.5371053710537106, "grad_norm": 0.5034381066746775, "learning_rate": 9.286758702678736e-05, "loss": 1.5708, "step": 2620 }, { "epoch": 0.5373103731037311, "grad_norm": 0.5198418243115236, "learning_rate": 9.280135331318216e-05, "loss": 1.6699, "step": 2621 }, { "epoch": 0.5375153751537516, "grad_norm": 0.4610284443386145, "learning_rate": 9.273512277385297e-05, "loss": 1.6123, "step": 2622 }, { "epoch": 0.5377203772037721, "grad_norm": 0.47474226913694023, "learning_rate": 9.266889543800447e-05, "loss": 1.564, "step": 2623 }, { "epoch": 0.5379253792537926, "grad_norm": 0.4726079947220515, "learning_rate": 9.260267133483997e-05, "loss": 1.5539, "step": 2624 }, { "epoch": 0.5381303813038131, "grad_norm": 0.5144171408522766, "learning_rate": 9.253645049356119e-05, "loss": 1.64, "step": 2625 }, { "epoch": 0.5383353833538336, "grad_norm": 0.5433169961057769, "learning_rate": 9.247023294336862e-05, "loss": 1.6346, "step": 2626 }, { "epoch": 0.5385403854038541, "grad_norm": 0.5338001873146371, "learning_rate": 9.24040187134611e-05, "loss": 1.6284, "step": 2627 }, { "epoch": 0.5387453874538746, "grad_norm": 0.48403605698163754, "learning_rate": 9.233780783303611e-05, "loss": 1.5404, "step": 2628 }, { "epoch": 0.5389503895038951, "grad_norm": 0.47396167503238784, "learning_rate": 9.22716003312897e-05, "loss": 1.5824, "step": 2629 }, { "epoch": 0.5391553915539156, "grad_norm": 0.4890984894825298, "learning_rate": 9.22053962374163e-05, "loss": 1.5415, "step": 2630 }, { "epoch": 0.5393603936039361, "grad_norm": 0.5012797003146663, "learning_rate": 9.213919558060897e-05, "loss": 1.6447, "step": 2631 }, { "epoch": 0.5395653956539566, "grad_norm": 0.501901005795332, "learning_rate": 9.207299839005911e-05, "loss": 1.6074, "step": 2632 }, { "epoch": 0.5397703977039771, "grad_norm": 0.43227741500071715, "learning_rate": 9.200680469495672e-05, "loss": 1.569, "step": 2633 }, { "epoch": 0.5399753997539976, "grad_norm": 0.4836658396704612, "learning_rate": 9.194061452449024e-05, "loss": 1.5177, "step": 2634 }, { "epoch": 0.540180401804018, "grad_norm": 0.5198709557637989, "learning_rate": 9.187442790784648e-05, "loss": 1.6129, "step": 2635 }, { "epoch": 0.5403854038540385, "grad_norm": 0.48987872009318717, "learning_rate": 9.180824487421077e-05, "loss": 1.6051, "step": 2636 }, { "epoch": 0.540590405904059, "grad_norm": 0.49629354038737566, "learning_rate": 9.174206545276677e-05, "loss": 1.6167, "step": 2637 }, { "epoch": 0.5407954079540795, "grad_norm": 0.45153789191171245, "learning_rate": 9.167588967269666e-05, "loss": 1.593, "step": 2638 }, { "epoch": 0.5410004100041, "grad_norm": 0.47009419797315166, "learning_rate": 9.160971756318087e-05, "loss": 1.58, "step": 2639 }, { "epoch": 0.5412054120541205, "grad_norm": 0.4676357691060261, "learning_rate": 9.154354915339836e-05, "loss": 1.594, "step": 2640 }, { "epoch": 0.541410414104141, "grad_norm": 0.49308540622553537, "learning_rate": 9.147738447252639e-05, "loss": 1.5343, "step": 2641 }, { "epoch": 0.5416154161541615, "grad_norm": 0.4432118342796589, "learning_rate": 9.141122354974055e-05, "loss": 1.5569, "step": 2642 }, { "epoch": 0.541820418204182, "grad_norm": 0.4464565682562934, "learning_rate": 9.13450664142148e-05, "loss": 1.6124, "step": 2643 }, { "epoch": 0.5420254202542025, "grad_norm": 0.4722069223751032, "learning_rate": 9.127891309512141e-05, "loss": 1.5676, "step": 2644 }, { "epoch": 0.542230422304223, "grad_norm": 0.4208935145134012, "learning_rate": 9.121276362163106e-05, "loss": 1.5593, "step": 2645 }, { "epoch": 0.5424354243542435, "grad_norm": 0.49514854171077044, "learning_rate": 9.114661802291262e-05, "loss": 1.6249, "step": 2646 }, { "epoch": 0.542640426404264, "grad_norm": 0.4806808338103088, "learning_rate": 9.108047632813328e-05, "loss": 1.5678, "step": 2647 }, { "epoch": 0.5428454284542845, "grad_norm": 0.4687343613699098, "learning_rate": 9.101433856645854e-05, "loss": 1.5483, "step": 2648 }, { "epoch": 0.543050430504305, "grad_norm": 0.5117833042240184, "learning_rate": 9.09482047670521e-05, "loss": 1.6888, "step": 2649 }, { "epoch": 0.5432554325543255, "grad_norm": 0.47515329692938435, "learning_rate": 9.088207495907603e-05, "loss": 1.642, "step": 2650 }, { "epoch": 0.543460434604346, "grad_norm": 0.5008671300898089, "learning_rate": 9.081594917169055e-05, "loss": 1.5892, "step": 2651 }, { "epoch": 0.5436654366543665, "grad_norm": 0.4941233205944891, "learning_rate": 9.074982743405413e-05, "loss": 1.6168, "step": 2652 }, { "epoch": 0.543870438704387, "grad_norm": 0.47430444833549046, "learning_rate": 9.068370977532341e-05, "loss": 1.6107, "step": 2653 }, { "epoch": 0.5440754407544075, "grad_norm": 0.4441716597389282, "learning_rate": 9.061759622465332e-05, "loss": 1.5605, "step": 2654 }, { "epoch": 0.544280442804428, "grad_norm": 0.482702388115561, "learning_rate": 9.055148681119688e-05, "loss": 1.6001, "step": 2655 }, { "epoch": 0.5444854448544485, "grad_norm": 0.49679740261205135, "learning_rate": 9.048538156410538e-05, "loss": 1.5632, "step": 2656 }, { "epoch": 0.544690446904469, "grad_norm": 0.4989006409344413, "learning_rate": 9.041928051252826e-05, "loss": 1.6114, "step": 2657 }, { "epoch": 0.5448954489544895, "grad_norm": 0.45274305074276694, "learning_rate": 9.0353183685613e-05, "loss": 1.5803, "step": 2658 }, { "epoch": 0.5451004510045101, "grad_norm": 0.4769916990922205, "learning_rate": 9.028709111250537e-05, "loss": 1.6207, "step": 2659 }, { "epoch": 0.5453054530545306, "grad_norm": 0.4555478084149483, "learning_rate": 9.022100282234913e-05, "loss": 1.5347, "step": 2660 }, { "epoch": 0.5455104551045511, "grad_norm": 0.46890901886584824, "learning_rate": 9.015491884428623e-05, "loss": 1.6012, "step": 2661 }, { "epoch": 0.5457154571545716, "grad_norm": 0.5110587419253257, "learning_rate": 9.008883920745675e-05, "loss": 1.6358, "step": 2662 }, { "epoch": 0.5459204592045921, "grad_norm": 0.4971322165718797, "learning_rate": 9.002276394099874e-05, "loss": 1.6863, "step": 2663 }, { "epoch": 0.5461254612546126, "grad_norm": 0.4301596112623638, "learning_rate": 8.995669307404845e-05, "loss": 1.5963, "step": 2664 }, { "epoch": 0.5463304633046331, "grad_norm": 0.46577192480695356, "learning_rate": 8.989062663574006e-05, "loss": 1.5492, "step": 2665 }, { "epoch": 0.5465354653546536, "grad_norm": 0.49235849065317666, "learning_rate": 8.98245646552059e-05, "loss": 1.6296, "step": 2666 }, { "epoch": 0.5467404674046741, "grad_norm": 0.4929929729188508, "learning_rate": 8.975850716157634e-05, "loss": 1.6624, "step": 2667 }, { "epoch": 0.5469454694546946, "grad_norm": 0.5125100632119998, "learning_rate": 8.969245418397969e-05, "loss": 1.6665, "step": 2668 }, { "epoch": 0.5471504715047151, "grad_norm": 0.4588157206541979, "learning_rate": 8.962640575154232e-05, "loss": 1.5865, "step": 2669 }, { "epoch": 0.5473554735547356, "grad_norm": 0.4657542785898101, "learning_rate": 8.956036189338858e-05, "loss": 1.571, "step": 2670 }, { "epoch": 0.5475604756047561, "grad_norm": 0.525947865233076, "learning_rate": 8.949432263864079e-05, "loss": 1.5987, "step": 2671 }, { "epoch": 0.5477654776547766, "grad_norm": 0.4968635537358625, "learning_rate": 8.942828801641933e-05, "loss": 1.5802, "step": 2672 }, { "epoch": 0.5479704797047971, "grad_norm": 0.4541055800603526, "learning_rate": 8.936225805584242e-05, "loss": 1.5398, "step": 2673 }, { "epoch": 0.5481754817548176, "grad_norm": 0.44786853103890034, "learning_rate": 8.929623278602627e-05, "loss": 1.5615, "step": 2674 }, { "epoch": 0.5483804838048381, "grad_norm": 0.515978158095013, "learning_rate": 8.923021223608504e-05, "loss": 1.623, "step": 2675 }, { "epoch": 0.5485854858548586, "grad_norm": 0.4758521821455385, "learning_rate": 8.916419643513074e-05, "loss": 1.5767, "step": 2676 }, { "epoch": 0.548790487904879, "grad_norm": 0.42388582225051624, "learning_rate": 8.909818541227343e-05, "loss": 1.54, "step": 2677 }, { "epoch": 0.5489954899548996, "grad_norm": 0.46264576269985275, "learning_rate": 8.90321791966209e-05, "loss": 1.5786, "step": 2678 }, { "epoch": 0.54920049200492, "grad_norm": 0.47999391205809916, "learning_rate": 8.896617781727894e-05, "loss": 1.5743, "step": 2679 }, { "epoch": 0.5494054940549405, "grad_norm": 0.46458200088613294, "learning_rate": 8.890018130335111e-05, "loss": 1.5876, "step": 2680 }, { "epoch": 0.549610496104961, "grad_norm": 0.48492234649104576, "learning_rate": 8.883418968393892e-05, "loss": 1.591, "step": 2681 }, { "epoch": 0.5498154981549815, "grad_norm": 0.4871763740885184, "learning_rate": 8.87682029881416e-05, "loss": 1.6211, "step": 2682 }, { "epoch": 0.550020500205002, "grad_norm": 0.4922734701906859, "learning_rate": 8.870222124505635e-05, "loss": 1.6959, "step": 2683 }, { "epoch": 0.5502255022550225, "grad_norm": 0.4631013858494112, "learning_rate": 8.863624448377814e-05, "loss": 1.6305, "step": 2684 }, { "epoch": 0.550430504305043, "grad_norm": 0.4516747226217731, "learning_rate": 8.857027273339967e-05, "loss": 1.5598, "step": 2685 }, { "epoch": 0.5506355063550635, "grad_norm": 0.49884978796348173, "learning_rate": 8.85043060230115e-05, "loss": 1.5981, "step": 2686 }, { "epoch": 0.550840508405084, "grad_norm": 0.4440033032555108, "learning_rate": 8.843834438170193e-05, "loss": 1.6103, "step": 2687 }, { "epoch": 0.5510455104551045, "grad_norm": 0.46783955392220905, "learning_rate": 8.837238783855709e-05, "loss": 1.6407, "step": 2688 }, { "epoch": 0.551250512505125, "grad_norm": 0.4601370042279884, "learning_rate": 8.830643642266082e-05, "loss": 1.56, "step": 2689 }, { "epoch": 0.5514555145551455, "grad_norm": 0.5260210732647638, "learning_rate": 8.824049016309465e-05, "loss": 1.642, "step": 2690 }, { "epoch": 0.551660516605166, "grad_norm": 0.4295839060842287, "learning_rate": 8.817454908893795e-05, "loss": 1.6299, "step": 2691 }, { "epoch": 0.5518655186551865, "grad_norm": 0.4605964301695693, "learning_rate": 8.810861322926764e-05, "loss": 1.5491, "step": 2692 }, { "epoch": 0.552070520705207, "grad_norm": 0.5081232754819388, "learning_rate": 8.80426826131585e-05, "loss": 1.5578, "step": 2693 }, { "epoch": 0.5522755227552275, "grad_norm": 0.46776415319094106, "learning_rate": 8.797675726968297e-05, "loss": 1.5834, "step": 2694 }, { "epoch": 0.552480524805248, "grad_norm": 0.4762230514657226, "learning_rate": 8.791083722791108e-05, "loss": 1.5943, "step": 2695 }, { "epoch": 0.5526855268552685, "grad_norm": 0.4621670020844566, "learning_rate": 8.784492251691057e-05, "loss": 1.5191, "step": 2696 }, { "epoch": 0.552890528905289, "grad_norm": 0.43661422338656364, "learning_rate": 8.777901316574685e-05, "loss": 1.579, "step": 2697 }, { "epoch": 0.5530955309553095, "grad_norm": 0.47112068475656355, "learning_rate": 8.771310920348292e-05, "loss": 1.6154, "step": 2698 }, { "epoch": 0.5533005330053301, "grad_norm": 0.4726113720612676, "learning_rate": 8.764721065917947e-05, "loss": 1.5752, "step": 2699 }, { "epoch": 0.5535055350553506, "grad_norm": 0.4655246624426711, "learning_rate": 8.758131756189476e-05, "loss": 1.5813, "step": 2700 }, { "epoch": 0.5537105371053711, "grad_norm": 0.5219761208482683, "learning_rate": 8.751542994068464e-05, "loss": 1.5848, "step": 2701 }, { "epoch": 0.5539155391553916, "grad_norm": 0.4323111530410708, "learning_rate": 8.744954782460254e-05, "loss": 1.5232, "step": 2702 }, { "epoch": 0.5541205412054121, "grad_norm": 0.4983349944847731, "learning_rate": 8.73836712426995e-05, "loss": 1.5749, "step": 2703 }, { "epoch": 0.5543255432554326, "grad_norm": 0.4645968573838534, "learning_rate": 8.731780022402409e-05, "loss": 1.6401, "step": 2704 }, { "epoch": 0.5545305453054531, "grad_norm": 0.4544524966052656, "learning_rate": 8.725193479762247e-05, "loss": 1.5581, "step": 2705 }, { "epoch": 0.5547355473554736, "grad_norm": 0.47219152650339763, "learning_rate": 8.718607499253825e-05, "loss": 1.5865, "step": 2706 }, { "epoch": 0.5549405494054941, "grad_norm": 0.4931596434888753, "learning_rate": 8.712022083781264e-05, "loss": 1.6059, "step": 2707 }, { "epoch": 0.5551455514555146, "grad_norm": 0.46198286930221866, "learning_rate": 8.70543723624843e-05, "loss": 1.6138, "step": 2708 }, { "epoch": 0.5553505535055351, "grad_norm": 0.4008861442313229, "learning_rate": 8.698852959558944e-05, "loss": 1.5455, "step": 2709 }, { "epoch": 0.5555555555555556, "grad_norm": 0.48589720803198055, "learning_rate": 8.692269256616175e-05, "loss": 1.6954, "step": 2710 }, { "epoch": 0.5557605576055761, "grad_norm": 0.4590082932518373, "learning_rate": 8.685686130323232e-05, "loss": 1.5966, "step": 2711 }, { "epoch": 0.5559655596555966, "grad_norm": 0.47823333668560974, "learning_rate": 8.679103583582979e-05, "loss": 1.5673, "step": 2712 }, { "epoch": 0.5561705617056171, "grad_norm": 0.49813589210415066, "learning_rate": 8.672521619298016e-05, "loss": 1.6617, "step": 2713 }, { "epoch": 0.5563755637556376, "grad_norm": 0.44496314213167276, "learning_rate": 8.665940240370688e-05, "loss": 1.5597, "step": 2714 }, { "epoch": 0.5565805658056581, "grad_norm": 0.45661253320674594, "learning_rate": 8.659359449703095e-05, "loss": 1.6677, "step": 2715 }, { "epoch": 0.5567855678556786, "grad_norm": 0.408860296952707, "learning_rate": 8.652779250197056e-05, "loss": 1.5866, "step": 2716 }, { "epoch": 0.5569905699056991, "grad_norm": 0.4379583156761515, "learning_rate": 8.646199644754146e-05, "loss": 1.5629, "step": 2717 }, { "epoch": 0.5571955719557196, "grad_norm": 0.4435669081957282, "learning_rate": 8.639620636275667e-05, "loss": 1.5433, "step": 2718 }, { "epoch": 0.5574005740057401, "grad_norm": 0.5128765961711079, "learning_rate": 8.633042227662662e-05, "loss": 1.6223, "step": 2719 }, { "epoch": 0.5576055760557606, "grad_norm": 0.4859469667685511, "learning_rate": 8.626464421815919e-05, "loss": 1.6443, "step": 2720 }, { "epoch": 0.557810578105781, "grad_norm": 0.454298878880921, "learning_rate": 8.619887221635944e-05, "loss": 1.5565, "step": 2721 }, { "epoch": 0.5580155801558015, "grad_norm": 0.5197011290371658, "learning_rate": 8.613310630022986e-05, "loss": 1.5898, "step": 2722 }, { "epoch": 0.558220582205822, "grad_norm": 0.5013274459520718, "learning_rate": 8.606734649877022e-05, "loss": 1.6439, "step": 2723 }, { "epoch": 0.5584255842558425, "grad_norm": 0.47436418576967115, "learning_rate": 8.600159284097757e-05, "loss": 1.5735, "step": 2724 }, { "epoch": 0.558630586305863, "grad_norm": 0.47253807238704093, "learning_rate": 8.593584535584637e-05, "loss": 1.5755, "step": 2725 }, { "epoch": 0.5588355883558835, "grad_norm": 0.4871736311217662, "learning_rate": 8.58701040723682e-05, "loss": 1.5583, "step": 2726 }, { "epoch": 0.559040590405904, "grad_norm": 0.48761585720030787, "learning_rate": 8.580436901953202e-05, "loss": 1.6417, "step": 2727 }, { "epoch": 0.5592455924559245, "grad_norm": 0.4513739946761981, "learning_rate": 8.573864022632398e-05, "loss": 1.5474, "step": 2728 }, { "epoch": 0.559450594505945, "grad_norm": 0.4399086954887682, "learning_rate": 8.567291772172751e-05, "loss": 1.5521, "step": 2729 }, { "epoch": 0.5596555965559655, "grad_norm": 0.47760950875664, "learning_rate": 8.560720153472319e-05, "loss": 1.6213, "step": 2730 }, { "epoch": 0.559860598605986, "grad_norm": 0.47082550165766734, "learning_rate": 8.554149169428894e-05, "loss": 1.6389, "step": 2731 }, { "epoch": 0.5600656006560065, "grad_norm": 0.4552501017617182, "learning_rate": 8.547578822939979e-05, "loss": 1.6262, "step": 2732 }, { "epoch": 0.560270602706027, "grad_norm": 0.41940772156927014, "learning_rate": 8.541009116902797e-05, "loss": 1.5014, "step": 2733 }, { "epoch": 0.5604756047560475, "grad_norm": 0.5082813672622422, "learning_rate": 8.534440054214294e-05, "loss": 1.6688, "step": 2734 }, { "epoch": 0.560680606806068, "grad_norm": 0.4204459505094942, "learning_rate": 8.52787163777112e-05, "loss": 1.5351, "step": 2735 }, { "epoch": 0.5608856088560885, "grad_norm": 0.4802801163655674, "learning_rate": 8.521303870469655e-05, "loss": 1.5699, "step": 2736 }, { "epoch": 0.561090610906109, "grad_norm": 0.44683758676501706, "learning_rate": 8.514736755205986e-05, "loss": 1.5248, "step": 2737 }, { "epoch": 0.5612956129561295, "grad_norm": 0.47643246461329575, "learning_rate": 8.508170294875909e-05, "loss": 1.6018, "step": 2738 }, { "epoch": 0.5615006150061501, "grad_norm": 0.5081701375502914, "learning_rate": 8.501604492374939e-05, "loss": 1.5642, "step": 2739 }, { "epoch": 0.5617056170561706, "grad_norm": 0.4349787653384339, "learning_rate": 8.495039350598288e-05, "loss": 1.5287, "step": 2740 }, { "epoch": 0.5619106191061911, "grad_norm": 0.48786456697125097, "learning_rate": 8.488474872440892e-05, "loss": 1.5895, "step": 2741 }, { "epoch": 0.5621156211562116, "grad_norm": 0.4311788620603534, "learning_rate": 8.48191106079739e-05, "loss": 1.5655, "step": 2742 }, { "epoch": 0.5623206232062321, "grad_norm": 0.4794441517134273, "learning_rate": 8.475347918562118e-05, "loss": 1.6106, "step": 2743 }, { "epoch": 0.5625256252562526, "grad_norm": 0.45767858156296126, "learning_rate": 8.468785448629128e-05, "loss": 1.5493, "step": 2744 }, { "epoch": 0.5627306273062731, "grad_norm": 0.4974461426461692, "learning_rate": 8.46222365389217e-05, "loss": 1.6653, "step": 2745 }, { "epoch": 0.5629356293562936, "grad_norm": 0.4744386568612818, "learning_rate": 8.45566253724469e-05, "loss": 1.6077, "step": 2746 }, { "epoch": 0.5631406314063141, "grad_norm": 0.4515951826847733, "learning_rate": 8.449102101579856e-05, "loss": 1.6145, "step": 2747 }, { "epoch": 0.5633456334563346, "grad_norm": 0.4662380834371251, "learning_rate": 8.442542349790514e-05, "loss": 1.5551, "step": 2748 }, { "epoch": 0.5635506355063551, "grad_norm": 0.48473926644768106, "learning_rate": 8.435983284769216e-05, "loss": 1.5523, "step": 2749 }, { "epoch": 0.5637556375563756, "grad_norm": 0.44151878917462695, "learning_rate": 8.429424909408214e-05, "loss": 1.5974, "step": 2750 }, { "epoch": 0.5639606396063961, "grad_norm": 0.43500873928167183, "learning_rate": 8.42286722659945e-05, "loss": 1.5042, "step": 2751 }, { "epoch": 0.5641656416564166, "grad_norm": 0.46636021809359474, "learning_rate": 8.416310239234566e-05, "loss": 1.5779, "step": 2752 }, { "epoch": 0.5643706437064371, "grad_norm": 0.47471533160762547, "learning_rate": 8.409753950204901e-05, "loss": 1.5122, "step": 2753 }, { "epoch": 0.5645756457564576, "grad_norm": 0.45922278719459037, "learning_rate": 8.403198362401473e-05, "loss": 1.5727, "step": 2754 }, { "epoch": 0.5647806478064781, "grad_norm": 0.41621380912394257, "learning_rate": 8.396643478715001e-05, "loss": 1.5294, "step": 2755 }, { "epoch": 0.5649856498564986, "grad_norm": 0.48339088637946764, "learning_rate": 8.390089302035888e-05, "loss": 1.5857, "step": 2756 }, { "epoch": 0.5651906519065191, "grad_norm": 0.45192567408837614, "learning_rate": 8.38353583525423e-05, "loss": 1.542, "step": 2757 }, { "epoch": 0.5653956539565396, "grad_norm": 0.4719122920371752, "learning_rate": 8.376983081259814e-05, "loss": 1.6055, "step": 2758 }, { "epoch": 0.5656006560065601, "grad_norm": 0.4495282078593574, "learning_rate": 8.370431042942099e-05, "loss": 1.6092, "step": 2759 }, { "epoch": 0.5658056580565806, "grad_norm": 0.4911656806600491, "learning_rate": 8.36387972319024e-05, "loss": 1.5596, "step": 2760 }, { "epoch": 0.5660106601066011, "grad_norm": 0.44589803030364894, "learning_rate": 8.357329124893067e-05, "loss": 1.5874, "step": 2761 }, { "epoch": 0.5662156621566216, "grad_norm": 0.44043619793201755, "learning_rate": 8.350779250939098e-05, "loss": 1.5944, "step": 2762 }, { "epoch": 0.566420664206642, "grad_norm": 0.490881176486932, "learning_rate": 8.344230104216535e-05, "loss": 1.5374, "step": 2763 }, { "epoch": 0.5666256662566626, "grad_norm": 0.4473742243308702, "learning_rate": 8.337681687613247e-05, "loss": 1.5579, "step": 2764 }, { "epoch": 0.566830668306683, "grad_norm": 0.4336860241181803, "learning_rate": 8.331134004016794e-05, "loss": 1.5435, "step": 2765 }, { "epoch": 0.5670356703567035, "grad_norm": 0.5058022455119351, "learning_rate": 8.324587056314401e-05, "loss": 1.6264, "step": 2766 }, { "epoch": 0.567240672406724, "grad_norm": 0.5185403358228308, "learning_rate": 8.318040847392976e-05, "loss": 1.638, "step": 2767 }, { "epoch": 0.5674456744567445, "grad_norm": 0.44435853900434275, "learning_rate": 8.311495380139104e-05, "loss": 1.6164, "step": 2768 }, { "epoch": 0.567650676506765, "grad_norm": 0.48967934204395624, "learning_rate": 8.304950657439033e-05, "loss": 1.6625, "step": 2769 }, { "epoch": 0.5678556785567855, "grad_norm": 0.5194040898941595, "learning_rate": 8.298406682178694e-05, "loss": 1.5344, "step": 2770 }, { "epoch": 0.568060680606806, "grad_norm": 0.45524359340987325, "learning_rate": 8.291863457243679e-05, "loss": 1.5712, "step": 2771 }, { "epoch": 0.5682656826568265, "grad_norm": 0.45186248322350536, "learning_rate": 8.285320985519254e-05, "loss": 1.6038, "step": 2772 }, { "epoch": 0.568470684706847, "grad_norm": 0.49936259381381526, "learning_rate": 8.278779269890347e-05, "loss": 1.5869, "step": 2773 }, { "epoch": 0.5686756867568675, "grad_norm": 0.48542247967144936, "learning_rate": 8.272238313241563e-05, "loss": 1.5745, "step": 2774 }, { "epoch": 0.568880688806888, "grad_norm": 0.46993731641929926, "learning_rate": 8.265698118457166e-05, "loss": 1.5529, "step": 2775 }, { "epoch": 0.5690856908569085, "grad_norm": 0.5098162873204044, "learning_rate": 8.259158688421085e-05, "loss": 1.5573, "step": 2776 }, { "epoch": 0.569290692906929, "grad_norm": 0.4711388927942418, "learning_rate": 8.25262002601691e-05, "loss": 1.5464, "step": 2777 }, { "epoch": 0.5694956949569495, "grad_norm": 0.46566844609010144, "learning_rate": 8.24608213412789e-05, "loss": 1.5588, "step": 2778 }, { "epoch": 0.5697006970069701, "grad_norm": 0.4433806059014826, "learning_rate": 8.239545015636944e-05, "loss": 1.5686, "step": 2779 }, { "epoch": 0.5699056990569906, "grad_norm": 0.42027399491719564, "learning_rate": 8.233008673426646e-05, "loss": 1.5258, "step": 2780 }, { "epoch": 0.5701107011070111, "grad_norm": 0.4664372984386003, "learning_rate": 8.226473110379221e-05, "loss": 1.6087, "step": 2781 }, { "epoch": 0.5703157031570316, "grad_norm": 0.4909885728039717, "learning_rate": 8.219938329376556e-05, "loss": 1.5991, "step": 2782 }, { "epoch": 0.5705207052070521, "grad_norm": 0.43739714020024856, "learning_rate": 8.213404333300191e-05, "loss": 1.4763, "step": 2783 }, { "epoch": 0.5707257072570726, "grad_norm": 0.44265461264361633, "learning_rate": 8.206871125031324e-05, "loss": 1.4952, "step": 2784 }, { "epoch": 0.5709307093070931, "grad_norm": 0.4751427399786054, "learning_rate": 8.200338707450806e-05, "loss": 1.531, "step": 2785 }, { "epoch": 0.5711357113571136, "grad_norm": 0.47996445696949575, "learning_rate": 8.19380708343913e-05, "loss": 1.5095, "step": 2786 }, { "epoch": 0.5713407134071341, "grad_norm": 0.4642615540618763, "learning_rate": 8.187276255876451e-05, "loss": 1.5285, "step": 2787 }, { "epoch": 0.5715457154571546, "grad_norm": 0.46879634584675467, "learning_rate": 8.180746227642562e-05, "loss": 1.5427, "step": 2788 }, { "epoch": 0.5717507175071751, "grad_norm": 0.4327553497951608, "learning_rate": 8.174217001616908e-05, "loss": 1.5685, "step": 2789 }, { "epoch": 0.5719557195571956, "grad_norm": 0.48044823109466844, "learning_rate": 8.167688580678587e-05, "loss": 1.5816, "step": 2790 }, { "epoch": 0.5721607216072161, "grad_norm": 0.43271691731053397, "learning_rate": 8.161160967706333e-05, "loss": 1.5133, "step": 2791 }, { "epoch": 0.5723657236572366, "grad_norm": 0.43865516134348287, "learning_rate": 8.154634165578527e-05, "loss": 1.5587, "step": 2792 }, { "epoch": 0.5725707257072571, "grad_norm": 0.5041463736308865, "learning_rate": 8.148108177173191e-05, "loss": 1.6045, "step": 2793 }, { "epoch": 0.5727757277572776, "grad_norm": 0.5002933100013144, "learning_rate": 8.141583005367988e-05, "loss": 1.6006, "step": 2794 }, { "epoch": 0.5729807298072981, "grad_norm": 0.48374712959962823, "learning_rate": 8.135058653040226e-05, "loss": 1.581, "step": 2795 }, { "epoch": 0.5731857318573186, "grad_norm": 0.4894348464778049, "learning_rate": 8.128535123066846e-05, "loss": 1.5838, "step": 2796 }, { "epoch": 0.5733907339073391, "grad_norm": 0.4848920999677346, "learning_rate": 8.122012418324429e-05, "loss": 1.5297, "step": 2797 }, { "epoch": 0.5735957359573596, "grad_norm": 0.5055371614414685, "learning_rate": 8.115490541689192e-05, "loss": 1.6007, "step": 2798 }, { "epoch": 0.5738007380073801, "grad_norm": 0.4945907352973538, "learning_rate": 8.108969496036979e-05, "loss": 1.5596, "step": 2799 }, { "epoch": 0.5740057400574006, "grad_norm": 0.4670049217763332, "learning_rate": 8.102449284243287e-05, "loss": 1.5708, "step": 2800 }, { "epoch": 0.5742107421074211, "grad_norm": 0.4608502951675274, "learning_rate": 8.095929909183226e-05, "loss": 1.5495, "step": 2801 }, { "epoch": 0.5744157441574416, "grad_norm": 0.5198947623278084, "learning_rate": 8.089411373731541e-05, "loss": 1.6064, "step": 2802 }, { "epoch": 0.5746207462074621, "grad_norm": 0.48376635257919165, "learning_rate": 8.082893680762619e-05, "loss": 1.5722, "step": 2803 }, { "epoch": 0.5748257482574826, "grad_norm": 0.49674901594952897, "learning_rate": 8.076376833150458e-05, "loss": 1.5457, "step": 2804 }, { "epoch": 0.5750307503075031, "grad_norm": 0.4955390689538443, "learning_rate": 8.069860833768693e-05, "loss": 1.5985, "step": 2805 }, { "epoch": 0.5752357523575236, "grad_norm": 0.4699349188279682, "learning_rate": 8.063345685490589e-05, "loss": 1.5834, "step": 2806 }, { "epoch": 0.575440754407544, "grad_norm": 0.4704767130444989, "learning_rate": 8.056831391189023e-05, "loss": 1.5617, "step": 2807 }, { "epoch": 0.5756457564575646, "grad_norm": 0.48457633227697255, "learning_rate": 8.050317953736512e-05, "loss": 1.5693, "step": 2808 }, { "epoch": 0.575850758507585, "grad_norm": 0.4292273459988384, "learning_rate": 8.043805376005177e-05, "loss": 1.59, "step": 2809 }, { "epoch": 0.5760557605576055, "grad_norm": 0.4577162302998883, "learning_rate": 8.03729366086677e-05, "loss": 1.5494, "step": 2810 }, { "epoch": 0.576260762607626, "grad_norm": 0.4954568906574125, "learning_rate": 8.030782811192668e-05, "loss": 1.6075, "step": 2811 }, { "epoch": 0.5764657646576465, "grad_norm": 0.4744112646384498, "learning_rate": 8.024272829853852e-05, "loss": 1.5645, "step": 2812 }, { "epoch": 0.576670766707667, "grad_norm": 0.5179082114337228, "learning_rate": 8.017763719720936e-05, "loss": 1.5323, "step": 2813 }, { "epoch": 0.5768757687576875, "grad_norm": 0.4702611550792192, "learning_rate": 8.011255483664133e-05, "loss": 1.5291, "step": 2814 }, { "epoch": 0.577080770807708, "grad_norm": 0.47217938018014816, "learning_rate": 8.004748124553283e-05, "loss": 1.6742, "step": 2815 }, { "epoch": 0.5772857728577285, "grad_norm": 0.5022788038282318, "learning_rate": 7.99824164525784e-05, "loss": 1.6052, "step": 2816 }, { "epoch": 0.577490774907749, "grad_norm": 0.4297384242177964, "learning_rate": 7.99173604864686e-05, "loss": 1.5042, "step": 2817 }, { "epoch": 0.5776957769577695, "grad_norm": 0.4577017852336908, "learning_rate": 7.985231337589019e-05, "loss": 1.5592, "step": 2818 }, { "epoch": 0.5779007790077901, "grad_norm": 0.46778619294343626, "learning_rate": 7.978727514952595e-05, "loss": 1.6089, "step": 2819 }, { "epoch": 0.5781057810578106, "grad_norm": 0.474161425400192, "learning_rate": 7.972224583605483e-05, "loss": 1.5598, "step": 2820 }, { "epoch": 0.5783107831078311, "grad_norm": 0.46254774541116084, "learning_rate": 7.965722546415173e-05, "loss": 1.5729, "step": 2821 }, { "epoch": 0.5785157851578516, "grad_norm": 0.4528521920224102, "learning_rate": 7.959221406248775e-05, "loss": 1.6784, "step": 2822 }, { "epoch": 0.5787207872078721, "grad_norm": 0.42230356506918815, "learning_rate": 7.952721165972996e-05, "loss": 1.5009, "step": 2823 }, { "epoch": 0.5789257892578926, "grad_norm": 0.4981664901815227, "learning_rate": 7.946221828454144e-05, "loss": 1.6676, "step": 2824 }, { "epoch": 0.5791307913079131, "grad_norm": 0.44921689065838266, "learning_rate": 7.939723396558132e-05, "loss": 1.5823, "step": 2825 }, { "epoch": 0.5793357933579336, "grad_norm": 0.4472783342030656, "learning_rate": 7.93322587315047e-05, "loss": 1.6223, "step": 2826 }, { "epoch": 0.5795407954079541, "grad_norm": 0.45704025689837224, "learning_rate": 7.926729261096276e-05, "loss": 1.5228, "step": 2827 }, { "epoch": 0.5797457974579746, "grad_norm": 0.48113985417457633, "learning_rate": 7.92023356326026e-05, "loss": 1.6116, "step": 2828 }, { "epoch": 0.5799507995079951, "grad_norm": 0.4810649777128658, "learning_rate": 7.913738782506727e-05, "loss": 1.5653, "step": 2829 }, { "epoch": 0.5801558015580156, "grad_norm": 0.45546034023084525, "learning_rate": 7.907244921699581e-05, "loss": 1.5618, "step": 2830 }, { "epoch": 0.5803608036080361, "grad_norm": 0.42028535047155097, "learning_rate": 7.900751983702317e-05, "loss": 1.5681, "step": 2831 }, { "epoch": 0.5805658056580566, "grad_norm": 0.4554780118570541, "learning_rate": 7.894259971378031e-05, "loss": 1.5828, "step": 2832 }, { "epoch": 0.5807708077080771, "grad_norm": 0.43349384022146037, "learning_rate": 7.887768887589403e-05, "loss": 1.5456, "step": 2833 }, { "epoch": 0.5809758097580976, "grad_norm": 0.4589527668069594, "learning_rate": 7.881278735198705e-05, "loss": 1.594, "step": 2834 }, { "epoch": 0.5811808118081181, "grad_norm": 0.43096675233898796, "learning_rate": 7.8747895170678e-05, "loss": 1.5244, "step": 2835 }, { "epoch": 0.5813858138581386, "grad_norm": 0.4302936709112231, "learning_rate": 7.868301236058138e-05, "loss": 1.5938, "step": 2836 }, { "epoch": 0.5815908159081591, "grad_norm": 0.4454121200108281, "learning_rate": 7.861813895030754e-05, "loss": 1.5697, "step": 2837 }, { "epoch": 0.5817958179581796, "grad_norm": 0.41208843086439834, "learning_rate": 7.855327496846276e-05, "loss": 1.5315, "step": 2838 }, { "epoch": 0.5820008200082001, "grad_norm": 0.46822681477448247, "learning_rate": 7.848842044364905e-05, "loss": 1.6061, "step": 2839 }, { "epoch": 0.5822058220582206, "grad_norm": 0.4172105089296651, "learning_rate": 7.842357540446437e-05, "loss": 1.5323, "step": 2840 }, { "epoch": 0.5824108241082411, "grad_norm": 0.4881793993515432, "learning_rate": 7.835873987950238e-05, "loss": 1.5293, "step": 2841 }, { "epoch": 0.5826158261582616, "grad_norm": 0.46743189424655723, "learning_rate": 7.829391389735259e-05, "loss": 1.5823, "step": 2842 }, { "epoch": 0.5828208282082821, "grad_norm": 0.46323901263219186, "learning_rate": 7.822909748660039e-05, "loss": 1.5939, "step": 2843 }, { "epoch": 0.5830258302583026, "grad_norm": 0.4603240255662383, "learning_rate": 7.816429067582678e-05, "loss": 1.6043, "step": 2844 }, { "epoch": 0.5832308323083231, "grad_norm": 0.45580500878688224, "learning_rate": 7.809949349360872e-05, "loss": 1.5585, "step": 2845 }, { "epoch": 0.5834358343583436, "grad_norm": 0.42889856701096885, "learning_rate": 7.803470596851872e-05, "loss": 1.5065, "step": 2846 }, { "epoch": 0.5836408364083641, "grad_norm": 0.4981112831154715, "learning_rate": 7.796992812912516e-05, "loss": 1.6294, "step": 2847 }, { "epoch": 0.5838458384583846, "grad_norm": 0.4798442243225811, "learning_rate": 7.790516000399219e-05, "loss": 1.5568, "step": 2848 }, { "epoch": 0.5840508405084051, "grad_norm": 0.4622015940788796, "learning_rate": 7.784040162167954e-05, "loss": 1.5457, "step": 2849 }, { "epoch": 0.5842558425584256, "grad_norm": 0.4353184257811204, "learning_rate": 7.777565301074275e-05, "loss": 1.5658, "step": 2850 }, { "epoch": 0.584460844608446, "grad_norm": 0.41631359808026636, "learning_rate": 7.7710914199733e-05, "loss": 1.5555, "step": 2851 }, { "epoch": 0.5846658466584665, "grad_norm": 0.469145393073749, "learning_rate": 7.764618521719715e-05, "loss": 1.5743, "step": 2852 }, { "epoch": 0.584870848708487, "grad_norm": 0.47258341502657575, "learning_rate": 7.758146609167773e-05, "loss": 1.5345, "step": 2853 }, { "epoch": 0.5850758507585075, "grad_norm": 0.4346090686240297, "learning_rate": 7.7516756851713e-05, "loss": 1.5774, "step": 2854 }, { "epoch": 0.585280852808528, "grad_norm": 0.48693640936995536, "learning_rate": 7.745205752583673e-05, "loss": 1.6069, "step": 2855 }, { "epoch": 0.5854858548585485, "grad_norm": 0.4448892411221868, "learning_rate": 7.738736814257843e-05, "loss": 1.5614, "step": 2856 }, { "epoch": 0.585690856908569, "grad_norm": 0.5150479486429531, "learning_rate": 7.732268873046313e-05, "loss": 1.5747, "step": 2857 }, { "epoch": 0.5858958589585895, "grad_norm": 0.4709818682182925, "learning_rate": 7.72580193180115e-05, "loss": 1.5686, "step": 2858 }, { "epoch": 0.5861008610086101, "grad_norm": 0.43790425227806434, "learning_rate": 7.71933599337399e-05, "loss": 1.52, "step": 2859 }, { "epoch": 0.5863058630586306, "grad_norm": 0.4413626622421896, "learning_rate": 7.71287106061601e-05, "loss": 1.5015, "step": 2860 }, { "epoch": 0.5865108651086511, "grad_norm": 0.4310407818275572, "learning_rate": 7.706407136377956e-05, "loss": 1.55, "step": 2861 }, { "epoch": 0.5867158671586716, "grad_norm": 0.4731523562710143, "learning_rate": 7.69994422351012e-05, "loss": 1.5154, "step": 2862 }, { "epoch": 0.5869208692086921, "grad_norm": 0.4871955782599282, "learning_rate": 7.69348232486236e-05, "loss": 1.6519, "step": 2863 }, { "epoch": 0.5871258712587126, "grad_norm": 0.5002230614648069, "learning_rate": 7.687021443284071e-05, "loss": 1.6242, "step": 2864 }, { "epoch": 0.5873308733087331, "grad_norm": 0.4649034147981404, "learning_rate": 7.680561581624212e-05, "loss": 1.6344, "step": 2865 }, { "epoch": 0.5875358753587536, "grad_norm": 0.5040437033568057, "learning_rate": 7.674102742731293e-05, "loss": 1.6426, "step": 2866 }, { "epoch": 0.5877408774087741, "grad_norm": 0.4806300106642581, "learning_rate": 7.667644929453362e-05, "loss": 1.5363, "step": 2867 }, { "epoch": 0.5879458794587946, "grad_norm": 0.43324569620733566, "learning_rate": 7.661188144638027e-05, "loss": 1.5575, "step": 2868 }, { "epoch": 0.5881508815088151, "grad_norm": 0.45123913290457685, "learning_rate": 7.654732391132429e-05, "loss": 1.5476, "step": 2869 }, { "epoch": 0.5883558835588356, "grad_norm": 0.4234195302813191, "learning_rate": 7.648277671783266e-05, "loss": 1.5205, "step": 2870 }, { "epoch": 0.5885608856088561, "grad_norm": 0.40813996975126404, "learning_rate": 7.641823989436781e-05, "loss": 1.52, "step": 2871 }, { "epoch": 0.5887658876588766, "grad_norm": 0.4788725993372061, "learning_rate": 7.635371346938746e-05, "loss": 1.5362, "step": 2872 }, { "epoch": 0.5889708897088971, "grad_norm": 0.42564232016355286, "learning_rate": 7.628919747134489e-05, "loss": 1.5739, "step": 2873 }, { "epoch": 0.5891758917589176, "grad_norm": 0.465141720475574, "learning_rate": 7.622469192868867e-05, "loss": 1.5213, "step": 2874 }, { "epoch": 0.5893808938089381, "grad_norm": 0.45931080212450043, "learning_rate": 7.616019686986285e-05, "loss": 1.6028, "step": 2875 }, { "epoch": 0.5895858958589586, "grad_norm": 0.4633635984104622, "learning_rate": 7.609571232330685e-05, "loss": 1.5936, "step": 2876 }, { "epoch": 0.5897908979089791, "grad_norm": 0.4664872502605939, "learning_rate": 7.603123831745536e-05, "loss": 1.5708, "step": 2877 }, { "epoch": 0.5899958999589996, "grad_norm": 0.40842845857718346, "learning_rate": 7.596677488073854e-05, "loss": 1.541, "step": 2878 }, { "epoch": 0.5902009020090201, "grad_norm": 0.46007633693727107, "learning_rate": 7.590232204158179e-05, "loss": 1.5702, "step": 2879 }, { "epoch": 0.5904059040590406, "grad_norm": 0.5008518912866813, "learning_rate": 7.583787982840588e-05, "loss": 1.5923, "step": 2880 }, { "epoch": 0.5906109061090611, "grad_norm": 0.4334617236720966, "learning_rate": 7.577344826962697e-05, "loss": 1.4979, "step": 2881 }, { "epoch": 0.5908159081590816, "grad_norm": 0.4256248857790475, "learning_rate": 7.570902739365637e-05, "loss": 1.5362, "step": 2882 }, { "epoch": 0.5910209102091021, "grad_norm": 0.4478164990192499, "learning_rate": 7.564461722890081e-05, "loss": 1.5848, "step": 2883 }, { "epoch": 0.5912259122591226, "grad_norm": 0.46074969260142695, "learning_rate": 7.558021780376223e-05, "loss": 1.5268, "step": 2884 }, { "epoch": 0.5914309143091431, "grad_norm": 0.4867293073168773, "learning_rate": 7.551582914663781e-05, "loss": 1.5988, "step": 2885 }, { "epoch": 0.5916359163591636, "grad_norm": 0.3882639942277087, "learning_rate": 7.54514512859201e-05, "loss": 1.4282, "step": 2886 }, { "epoch": 0.5918409184091841, "grad_norm": 0.3998976149246561, "learning_rate": 7.538708424999674e-05, "loss": 1.5167, "step": 2887 }, { "epoch": 0.5920459204592046, "grad_norm": 0.5057311694400677, "learning_rate": 7.532272806725072e-05, "loss": 1.4919, "step": 2888 }, { "epoch": 0.5922509225092251, "grad_norm": 0.4794172732968755, "learning_rate": 7.525838276606016e-05, "loss": 1.6711, "step": 2889 }, { "epoch": 0.5924559245592456, "grad_norm": 0.4422451479514918, "learning_rate": 7.519404837479837e-05, "loss": 1.5541, "step": 2890 }, { "epoch": 0.5926609266092661, "grad_norm": 0.5146714235555389, "learning_rate": 7.5129724921834e-05, "loss": 1.5923, "step": 2891 }, { "epoch": 0.5928659286592866, "grad_norm": 0.45403884245738774, "learning_rate": 7.506541243553072e-05, "loss": 1.6134, "step": 2892 }, { "epoch": 0.593070930709307, "grad_norm": 0.4374507581359449, "learning_rate": 7.50011109442474e-05, "loss": 1.5332, "step": 2893 }, { "epoch": 0.5932759327593276, "grad_norm": 0.4406527983008887, "learning_rate": 7.493682047633808e-05, "loss": 1.5565, "step": 2894 }, { "epoch": 0.593480934809348, "grad_norm": 0.4726042059440526, "learning_rate": 7.487254106015195e-05, "loss": 1.596, "step": 2895 }, { "epoch": 0.5936859368593685, "grad_norm": 0.448435253083987, "learning_rate": 7.480827272403326e-05, "loss": 1.6385, "step": 2896 }, { "epoch": 0.593890938909389, "grad_norm": 0.4494791698334838, "learning_rate": 7.474401549632147e-05, "loss": 1.4956, "step": 2897 }, { "epoch": 0.5940959409594095, "grad_norm": 0.4699834326011467, "learning_rate": 7.467976940535112e-05, "loss": 1.5578, "step": 2898 }, { "epoch": 0.5943009430094301, "grad_norm": 0.5087872607468341, "learning_rate": 7.461553447945179e-05, "loss": 1.6666, "step": 2899 }, { "epoch": 0.5945059450594506, "grad_norm": 0.44851189394034247, "learning_rate": 7.455131074694816e-05, "loss": 1.5925, "step": 2900 }, { "epoch": 0.5947109471094711, "grad_norm": 0.4175534470770163, "learning_rate": 7.448709823615995e-05, "loss": 1.5458, "step": 2901 }, { "epoch": 0.5949159491594916, "grad_norm": 0.47400132301893866, "learning_rate": 7.442289697540201e-05, "loss": 1.5097, "step": 2902 }, { "epoch": 0.5951209512095121, "grad_norm": 0.46887493732744584, "learning_rate": 7.435870699298416e-05, "loss": 1.5297, "step": 2903 }, { "epoch": 0.5953259532595326, "grad_norm": 0.48584437331528135, "learning_rate": 7.429452831721127e-05, "loss": 1.6051, "step": 2904 }, { "epoch": 0.5955309553095531, "grad_norm": 0.5025077618232665, "learning_rate": 7.42303609763832e-05, "loss": 1.6578, "step": 2905 }, { "epoch": 0.5957359573595736, "grad_norm": 0.449442576433012, "learning_rate": 7.41662049987948e-05, "loss": 1.5585, "step": 2906 }, { "epoch": 0.5959409594095941, "grad_norm": 0.42670517925271795, "learning_rate": 7.410206041273606e-05, "loss": 1.5461, "step": 2907 }, { "epoch": 0.5961459614596146, "grad_norm": 0.4680815828320162, "learning_rate": 7.40379272464917e-05, "loss": 1.6232, "step": 2908 }, { "epoch": 0.5963509635096351, "grad_norm": 0.4538627587316065, "learning_rate": 7.397380552834161e-05, "loss": 1.553, "step": 2909 }, { "epoch": 0.5965559655596556, "grad_norm": 0.4526768698468136, "learning_rate": 7.39096952865605e-05, "loss": 1.6067, "step": 2910 }, { "epoch": 0.5967609676096761, "grad_norm": 0.4253044617278166, "learning_rate": 7.384559654941814e-05, "loss": 1.5509, "step": 2911 }, { "epoch": 0.5969659696596966, "grad_norm": 0.4803130212739422, "learning_rate": 7.378150934517906e-05, "loss": 1.6028, "step": 2912 }, { "epoch": 0.5971709717097171, "grad_norm": 0.4688828197748933, "learning_rate": 7.371743370210289e-05, "loss": 1.5739, "step": 2913 }, { "epoch": 0.5973759737597376, "grad_norm": 0.4647481537688531, "learning_rate": 7.365336964844408e-05, "loss": 1.5398, "step": 2914 }, { "epoch": 0.5975809758097581, "grad_norm": 0.4291945269309361, "learning_rate": 7.35893172124519e-05, "loss": 1.5232, "step": 2915 }, { "epoch": 0.5977859778597786, "grad_norm": 0.43486850161559637, "learning_rate": 7.352527642237064e-05, "loss": 1.5937, "step": 2916 }, { "epoch": 0.5979909799097991, "grad_norm": 0.47206834237971973, "learning_rate": 7.346124730643929e-05, "loss": 1.5586, "step": 2917 }, { "epoch": 0.5981959819598196, "grad_norm": 0.4498992161060695, "learning_rate": 7.339722989289183e-05, "loss": 1.5829, "step": 2918 }, { "epoch": 0.5984009840098401, "grad_norm": 0.46517941039745553, "learning_rate": 7.333322420995708e-05, "loss": 1.5481, "step": 2919 }, { "epoch": 0.5986059860598606, "grad_norm": 0.5022982277225991, "learning_rate": 7.326923028585854e-05, "loss": 1.6315, "step": 2920 }, { "epoch": 0.5988109881098811, "grad_norm": 0.46503977065256796, "learning_rate": 7.32052481488147e-05, "loss": 1.5343, "step": 2921 }, { "epoch": 0.5990159901599016, "grad_norm": 0.44620462033036284, "learning_rate": 7.31412778270387e-05, "loss": 1.5502, "step": 2922 }, { "epoch": 0.5992209922099221, "grad_norm": 0.4496059349665212, "learning_rate": 7.307731934873862e-05, "loss": 1.6046, "step": 2923 }, { "epoch": 0.5994259942599426, "grad_norm": 0.4321324175035921, "learning_rate": 7.301337274211722e-05, "loss": 1.5535, "step": 2924 }, { "epoch": 0.5996309963099631, "grad_norm": 0.5175574511849769, "learning_rate": 7.294943803537202e-05, "loss": 1.6659, "step": 2925 }, { "epoch": 0.5998359983599836, "grad_norm": 0.3977693749582312, "learning_rate": 7.288551525669536e-05, "loss": 1.5706, "step": 2926 }, { "epoch": 0.6000410004100041, "grad_norm": 0.43619380730220897, "learning_rate": 7.282160443427424e-05, "loss": 1.6211, "step": 2927 }, { "epoch": 0.6002460024600246, "grad_norm": 0.45241328781708706, "learning_rate": 7.275770559629042e-05, "loss": 1.5221, "step": 2928 }, { "epoch": 0.6004510045100451, "grad_norm": 0.483219973559941, "learning_rate": 7.269381877092045e-05, "loss": 1.5598, "step": 2929 }, { "epoch": 0.6006560065600656, "grad_norm": 0.47601924043064653, "learning_rate": 7.262994398633547e-05, "loss": 1.592, "step": 2930 }, { "epoch": 0.6008610086100861, "grad_norm": 0.46329133087345176, "learning_rate": 7.256608127070137e-05, "loss": 1.6068, "step": 2931 }, { "epoch": 0.6010660106601066, "grad_norm": 0.45790279170063364, "learning_rate": 7.250223065217869e-05, "loss": 1.5248, "step": 2932 }, { "epoch": 0.6012710127101271, "grad_norm": 0.45302107601922315, "learning_rate": 7.243839215892263e-05, "loss": 1.5888, "step": 2933 }, { "epoch": 0.6014760147601476, "grad_norm": 0.41069801676078876, "learning_rate": 7.237456581908315e-05, "loss": 1.4927, "step": 2934 }, { "epoch": 0.6016810168101681, "grad_norm": 0.41391511299416306, "learning_rate": 7.231075166080467e-05, "loss": 1.5581, "step": 2935 }, { "epoch": 0.6018860188601886, "grad_norm": 0.4244492935350169, "learning_rate": 7.224694971222641e-05, "loss": 1.5608, "step": 2936 }, { "epoch": 0.602091020910209, "grad_norm": 0.49046412759116587, "learning_rate": 7.218316000148207e-05, "loss": 1.607, "step": 2937 }, { "epoch": 0.6022960229602295, "grad_norm": 0.4678840773151214, "learning_rate": 7.211938255670003e-05, "loss": 1.5151, "step": 2938 }, { "epoch": 0.6025010250102502, "grad_norm": 0.46787063802713913, "learning_rate": 7.205561740600329e-05, "loss": 1.5338, "step": 2939 }, { "epoch": 0.6027060270602707, "grad_norm": 0.4345510102151128, "learning_rate": 7.19918645775093e-05, "loss": 1.5101, "step": 2940 }, { "epoch": 0.6029110291102912, "grad_norm": 0.518363603649018, "learning_rate": 7.192812409933025e-05, "loss": 1.5883, "step": 2941 }, { "epoch": 0.6031160311603116, "grad_norm": 0.44056034890151863, "learning_rate": 7.186439599957273e-05, "loss": 1.5402, "step": 2942 }, { "epoch": 0.6033210332103321, "grad_norm": 0.4791752040908096, "learning_rate": 7.180068030633798e-05, "loss": 1.5683, "step": 2943 }, { "epoch": 0.6035260352603526, "grad_norm": 0.4828693826400533, "learning_rate": 7.173697704772164e-05, "loss": 1.5392, "step": 2944 }, { "epoch": 0.6037310373103731, "grad_norm": 0.4514751248424061, "learning_rate": 7.167328625181404e-05, "loss": 1.5748, "step": 2945 }, { "epoch": 0.6039360393603936, "grad_norm": 0.4711325186459684, "learning_rate": 7.160960794669992e-05, "loss": 1.4962, "step": 2946 }, { "epoch": 0.6041410414104141, "grad_norm": 0.4502351071834052, "learning_rate": 7.15459421604585e-05, "loss": 1.5814, "step": 2947 }, { "epoch": 0.6043460434604346, "grad_norm": 0.4587706708821412, "learning_rate": 7.148228892116351e-05, "loss": 1.502, "step": 2948 }, { "epoch": 0.6045510455104551, "grad_norm": 0.4155835311109425, "learning_rate": 7.141864825688307e-05, "loss": 1.5445, "step": 2949 }, { "epoch": 0.6047560475604756, "grad_norm": 0.40179931469561625, "learning_rate": 7.13550201956799e-05, "loss": 1.512, "step": 2950 }, { "epoch": 0.6049610496104961, "grad_norm": 0.4925202936110988, "learning_rate": 7.129140476561108e-05, "loss": 1.6364, "step": 2951 }, { "epoch": 0.6051660516605166, "grad_norm": 0.44635459860991183, "learning_rate": 7.122780199472809e-05, "loss": 1.534, "step": 2952 }, { "epoch": 0.6053710537105371, "grad_norm": 0.3957148211831439, "learning_rate": 7.116421191107687e-05, "loss": 1.577, "step": 2953 }, { "epoch": 0.6055760557605576, "grad_norm": 0.4461287587990288, "learning_rate": 7.110063454269777e-05, "loss": 1.5355, "step": 2954 }, { "epoch": 0.6057810578105781, "grad_norm": 0.42016913288286717, "learning_rate": 7.103706991762546e-05, "loss": 1.5818, "step": 2955 }, { "epoch": 0.6059860598605986, "grad_norm": 0.4681872730089598, "learning_rate": 7.097351806388915e-05, "loss": 1.5473, "step": 2956 }, { "epoch": 0.6061910619106191, "grad_norm": 0.4467933961188191, "learning_rate": 7.090997900951227e-05, "loss": 1.5756, "step": 2957 }, { "epoch": 0.6063960639606396, "grad_norm": 0.45454303945035157, "learning_rate": 7.084645278251263e-05, "loss": 1.5476, "step": 2958 }, { "epoch": 0.6066010660106601, "grad_norm": 0.4242354162291587, "learning_rate": 7.078293941090249e-05, "loss": 1.5584, "step": 2959 }, { "epoch": 0.6068060680606806, "grad_norm": 0.43653224264582896, "learning_rate": 7.071943892268822e-05, "loss": 1.5486, "step": 2960 }, { "epoch": 0.6070110701107011, "grad_norm": 0.43139034505263063, "learning_rate": 7.065595134587078e-05, "loss": 1.5366, "step": 2961 }, { "epoch": 0.6072160721607216, "grad_norm": 0.4514582820547564, "learning_rate": 7.059247670844528e-05, "loss": 1.4895, "step": 2962 }, { "epoch": 0.6074210742107421, "grad_norm": 0.47433438538277, "learning_rate": 7.052901503840111e-05, "loss": 1.5186, "step": 2963 }, { "epoch": 0.6076260762607626, "grad_norm": 0.4941721400249152, "learning_rate": 7.046556636372202e-05, "loss": 1.5917, "step": 2964 }, { "epoch": 0.6078310783107831, "grad_norm": 0.49800041869459405, "learning_rate": 7.040213071238592e-05, "loss": 1.4808, "step": 2965 }, { "epoch": 0.6080360803608036, "grad_norm": 0.452958066822433, "learning_rate": 7.033870811236516e-05, "loss": 1.5035, "step": 2966 }, { "epoch": 0.6082410824108241, "grad_norm": 0.47066943263641875, "learning_rate": 7.027529859162616e-05, "loss": 1.5362, "step": 2967 }, { "epoch": 0.6084460844608446, "grad_norm": 0.4459782854808815, "learning_rate": 7.021190217812966e-05, "loss": 1.5301, "step": 2968 }, { "epoch": 0.6086510865108651, "grad_norm": 0.46638297805865353, "learning_rate": 7.014851889983057e-05, "loss": 1.5454, "step": 2969 }, { "epoch": 0.6088560885608856, "grad_norm": 0.46390237728714767, "learning_rate": 7.008514878467805e-05, "loss": 1.532, "step": 2970 }, { "epoch": 0.6090610906109061, "grad_norm": 0.44795921069907085, "learning_rate": 7.002179186061542e-05, "loss": 1.5688, "step": 2971 }, { "epoch": 0.6092660926609266, "grad_norm": 0.4690095808970144, "learning_rate": 6.995844815558026e-05, "loss": 1.5604, "step": 2972 }, { "epoch": 0.6094710947109471, "grad_norm": 0.45825983851594926, "learning_rate": 6.98951176975042e-05, "loss": 1.5627, "step": 2973 }, { "epoch": 0.6096760967609676, "grad_norm": 0.5150166896673117, "learning_rate": 6.983180051431315e-05, "loss": 1.5798, "step": 2974 }, { "epoch": 0.6098810988109881, "grad_norm": 0.455109123650538, "learning_rate": 6.976849663392708e-05, "loss": 1.5797, "step": 2975 }, { "epoch": 0.6100861008610086, "grad_norm": 0.4702531173078538, "learning_rate": 6.97052060842601e-05, "loss": 1.5718, "step": 2976 }, { "epoch": 0.6102911029110291, "grad_norm": 0.4495664668884594, "learning_rate": 6.964192889322053e-05, "loss": 1.5585, "step": 2977 }, { "epoch": 0.6104961049610496, "grad_norm": 0.46226082522956324, "learning_rate": 6.957866508871068e-05, "loss": 1.5363, "step": 2978 }, { "epoch": 0.6107011070110702, "grad_norm": 0.4707280057172589, "learning_rate": 6.951541469862706e-05, "loss": 1.6113, "step": 2979 }, { "epoch": 0.6109061090610907, "grad_norm": 0.48183454794360314, "learning_rate": 6.945217775086017e-05, "loss": 1.5371, "step": 2980 }, { "epoch": 0.6111111111111112, "grad_norm": 0.4589876764504603, "learning_rate": 6.938895427329463e-05, "loss": 1.6195, "step": 2981 }, { "epoch": 0.6113161131611317, "grad_norm": 0.45775643294615465, "learning_rate": 6.932574429380918e-05, "loss": 1.5202, "step": 2982 }, { "epoch": 0.6115211152111522, "grad_norm": 0.4798856422208839, "learning_rate": 6.926254784027648e-05, "loss": 1.5764, "step": 2983 }, { "epoch": 0.6117261172611727, "grad_norm": 0.4403281723941011, "learning_rate": 6.919936494056336e-05, "loss": 1.5124, "step": 2984 }, { "epoch": 0.6119311193111932, "grad_norm": 0.43176902468149503, "learning_rate": 6.913619562253055e-05, "loss": 1.6334, "step": 2985 }, { "epoch": 0.6121361213612136, "grad_norm": 0.4597713414597451, "learning_rate": 6.907303991403289e-05, "loss": 1.6131, "step": 2986 }, { "epoch": 0.6123411234112341, "grad_norm": 0.45445690196581606, "learning_rate": 6.900989784291911e-05, "loss": 1.5836, "step": 2987 }, { "epoch": 0.6125461254612546, "grad_norm": 0.43526419128306093, "learning_rate": 6.894676943703206e-05, "loss": 1.4932, "step": 2988 }, { "epoch": 0.6127511275112751, "grad_norm": 0.4779583093014876, "learning_rate": 6.888365472420851e-05, "loss": 1.53, "step": 2989 }, { "epoch": 0.6129561295612956, "grad_norm": 0.4576740543678265, "learning_rate": 6.882055373227914e-05, "loss": 1.5736, "step": 2990 }, { "epoch": 0.6131611316113161, "grad_norm": 0.4481675687238009, "learning_rate": 6.875746648906863e-05, "loss": 1.5595, "step": 2991 }, { "epoch": 0.6133661336613366, "grad_norm": 0.4761378801904838, "learning_rate": 6.869439302239556e-05, "loss": 1.5474, "step": 2992 }, { "epoch": 0.6135711357113571, "grad_norm": 0.45375696383716857, "learning_rate": 6.863133336007248e-05, "loss": 1.5704, "step": 2993 }, { "epoch": 0.6137761377613776, "grad_norm": 0.4662242508913138, "learning_rate": 6.856828752990589e-05, "loss": 1.5479, "step": 2994 }, { "epoch": 0.6139811398113981, "grad_norm": 0.4469917888154684, "learning_rate": 6.850525555969607e-05, "loss": 1.5636, "step": 2995 }, { "epoch": 0.6141861418614186, "grad_norm": 0.44574423297664095, "learning_rate": 6.844223747723728e-05, "loss": 1.4877, "step": 2996 }, { "epoch": 0.6143911439114391, "grad_norm": 0.421475813096146, "learning_rate": 6.83792333103176e-05, "loss": 1.5472, "step": 2997 }, { "epoch": 0.6145961459614596, "grad_norm": 0.4376875543122658, "learning_rate": 6.831624308671905e-05, "loss": 1.641, "step": 2998 }, { "epoch": 0.6148011480114801, "grad_norm": 0.47070626573915497, "learning_rate": 6.825326683421744e-05, "loss": 1.5454, "step": 2999 }, { "epoch": 0.6150061500615006, "grad_norm": 0.4923536137605395, "learning_rate": 6.819030458058243e-05, "loss": 1.5655, "step": 3000 }, { "epoch": 0.6152111521115211, "grad_norm": 0.4589559562216384, "learning_rate": 6.812735635357753e-05, "loss": 1.5332, "step": 3001 }, { "epoch": 0.6154161541615416, "grad_norm": 0.41406116254882996, "learning_rate": 6.806442218096001e-05, "loss": 1.5288, "step": 3002 }, { "epoch": 0.6156211562115621, "grad_norm": 0.47434444684201854, "learning_rate": 6.800150209048097e-05, "loss": 1.506, "step": 3003 }, { "epoch": 0.6158261582615826, "grad_norm": 0.42832017576009973, "learning_rate": 6.79385961098854e-05, "loss": 1.5521, "step": 3004 }, { "epoch": 0.6160311603116031, "grad_norm": 0.4675808043623191, "learning_rate": 6.787570426691189e-05, "loss": 1.5702, "step": 3005 }, { "epoch": 0.6162361623616236, "grad_norm": 0.465647503517337, "learning_rate": 6.781282658929294e-05, "loss": 1.5811, "step": 3006 }, { "epoch": 0.6164411644116441, "grad_norm": 0.4424898207483636, "learning_rate": 6.774996310475473e-05, "loss": 1.5018, "step": 3007 }, { "epoch": 0.6166461664616646, "grad_norm": 0.44481342522395206, "learning_rate": 6.768711384101712e-05, "loss": 1.5328, "step": 3008 }, { "epoch": 0.6168511685116851, "grad_norm": 0.4698399725371324, "learning_rate": 6.762427882579389e-05, "loss": 1.4813, "step": 3009 }, { "epoch": 0.6170561705617056, "grad_norm": 0.49707658871041654, "learning_rate": 6.756145808679243e-05, "loss": 1.546, "step": 3010 }, { "epoch": 0.6172611726117261, "grad_norm": 0.4426897382888866, "learning_rate": 6.749865165171375e-05, "loss": 1.5276, "step": 3011 }, { "epoch": 0.6174661746617466, "grad_norm": 0.38791195809148604, "learning_rate": 6.74358595482527e-05, "loss": 1.5666, "step": 3012 }, { "epoch": 0.6176711767117671, "grad_norm": 0.4221514508427358, "learning_rate": 6.737308180409767e-05, "loss": 1.538, "step": 3013 }, { "epoch": 0.6178761787617876, "grad_norm": 0.4572047201575024, "learning_rate": 6.731031844693087e-05, "loss": 1.5332, "step": 3014 }, { "epoch": 0.6180811808118081, "grad_norm": 0.43861767506070165, "learning_rate": 6.724756950442807e-05, "loss": 1.544, "step": 3015 }, { "epoch": 0.6182861828618286, "grad_norm": 0.4323427771692878, "learning_rate": 6.718483500425867e-05, "loss": 1.5908, "step": 3016 }, { "epoch": 0.6184911849118491, "grad_norm": 0.4746355695421071, "learning_rate": 6.712211497408578e-05, "loss": 1.6562, "step": 3017 }, { "epoch": 0.6186961869618696, "grad_norm": 0.44815873191171124, "learning_rate": 6.705940944156603e-05, "loss": 1.5668, "step": 3018 }, { "epoch": 0.6189011890118902, "grad_norm": 0.43563679181631687, "learning_rate": 6.699671843434972e-05, "loss": 1.5005, "step": 3019 }, { "epoch": 0.6191061910619107, "grad_norm": 0.4571595922823676, "learning_rate": 6.69340419800808e-05, "loss": 1.5652, "step": 3020 }, { "epoch": 0.6193111931119312, "grad_norm": 0.45875034758171485, "learning_rate": 6.687138010639667e-05, "loss": 1.578, "step": 3021 }, { "epoch": 0.6195161951619517, "grad_norm": 0.4253248575499902, "learning_rate": 6.680873284092839e-05, "loss": 1.4906, "step": 3022 }, { "epoch": 0.6197211972119722, "grad_norm": 0.4531013198035524, "learning_rate": 6.674610021130055e-05, "loss": 1.6082, "step": 3023 }, { "epoch": 0.6199261992619927, "grad_norm": 0.4152722403328041, "learning_rate": 6.668348224513126e-05, "loss": 1.5313, "step": 3024 }, { "epoch": 0.6201312013120132, "grad_norm": 0.4393915941181943, "learning_rate": 6.662087897003229e-05, "loss": 1.5132, "step": 3025 }, { "epoch": 0.6203362033620337, "grad_norm": 0.47087162990327114, "learning_rate": 6.655829041360877e-05, "loss": 1.5645, "step": 3026 }, { "epoch": 0.6205412054120542, "grad_norm": 0.46168923411024254, "learning_rate": 6.649571660345944e-05, "loss": 1.583, "step": 3027 }, { "epoch": 0.6207462074620747, "grad_norm": 0.43839687490525603, "learning_rate": 6.643315756717648e-05, "loss": 1.5532, "step": 3028 }, { "epoch": 0.6209512095120951, "grad_norm": 0.5046946912544114, "learning_rate": 6.637061333234557e-05, "loss": 1.6101, "step": 3029 }, { "epoch": 0.6211562115621156, "grad_norm": 0.4703506483765253, "learning_rate": 6.630808392654593e-05, "loss": 1.6161, "step": 3030 }, { "epoch": 0.6213612136121361, "grad_norm": 0.4410805848457068, "learning_rate": 6.624556937735013e-05, "loss": 1.5445, "step": 3031 }, { "epoch": 0.6215662156621566, "grad_norm": 0.5680585085077579, "learning_rate": 6.61830697123243e-05, "loss": 1.5846, "step": 3032 }, { "epoch": 0.6217712177121771, "grad_norm": 0.42924480457354586, "learning_rate": 6.612058495902791e-05, "loss": 1.6052, "step": 3033 }, { "epoch": 0.6219762197621976, "grad_norm": 0.42982813747209114, "learning_rate": 6.605811514501392e-05, "loss": 1.5308, "step": 3034 }, { "epoch": 0.6221812218122181, "grad_norm": 0.4441246106502223, "learning_rate": 6.599566029782863e-05, "loss": 1.5295, "step": 3035 }, { "epoch": 0.6223862238622386, "grad_norm": 0.4326730877644339, "learning_rate": 6.593322044501185e-05, "loss": 1.5984, "step": 3036 }, { "epoch": 0.6225912259122591, "grad_norm": 0.4358961385466027, "learning_rate": 6.587079561409672e-05, "loss": 1.6145, "step": 3037 }, { "epoch": 0.6227962279622796, "grad_norm": 0.4578112174036775, "learning_rate": 6.580838583260968e-05, "loss": 1.5642, "step": 3038 }, { "epoch": 0.6230012300123001, "grad_norm": 0.48652487147283446, "learning_rate": 6.57459911280707e-05, "loss": 1.6143, "step": 3039 }, { "epoch": 0.6232062320623206, "grad_norm": 0.5141630065593029, "learning_rate": 6.568361152799293e-05, "loss": 1.5935, "step": 3040 }, { "epoch": 0.6234112341123411, "grad_norm": 0.4431000907807186, "learning_rate": 6.562124705988297e-05, "loss": 1.5108, "step": 3041 }, { "epoch": 0.6236162361623616, "grad_norm": 0.4801120183870235, "learning_rate": 6.555889775124076e-05, "loss": 1.5576, "step": 3042 }, { "epoch": 0.6238212382123821, "grad_norm": 0.46990654971245066, "learning_rate": 6.549656362955944e-05, "loss": 1.5396, "step": 3043 }, { "epoch": 0.6240262402624026, "grad_norm": 0.5185763461791878, "learning_rate": 6.54342447223256e-05, "loss": 1.4894, "step": 3044 }, { "epoch": 0.6242312423124231, "grad_norm": 0.4982719135297392, "learning_rate": 6.537194105701895e-05, "loss": 1.5106, "step": 3045 }, { "epoch": 0.6244362443624436, "grad_norm": 0.46857862001505124, "learning_rate": 6.530965266111264e-05, "loss": 1.5691, "step": 3046 }, { "epoch": 0.6246412464124641, "grad_norm": 0.47676696594678053, "learning_rate": 6.524737956207304e-05, "loss": 1.5025, "step": 3047 }, { "epoch": 0.6248462484624846, "grad_norm": 0.494878130134965, "learning_rate": 6.518512178735968e-05, "loss": 1.659, "step": 3048 }, { "epoch": 0.6250512505125051, "grad_norm": 0.4555208676122304, "learning_rate": 6.512287936442549e-05, "loss": 1.5844, "step": 3049 }, { "epoch": 0.6252562525625256, "grad_norm": 0.4666617153415811, "learning_rate": 6.50606523207165e-05, "loss": 1.5279, "step": 3050 }, { "epoch": 0.6254612546125461, "grad_norm": 0.5084049715324417, "learning_rate": 6.4998440683672e-05, "loss": 1.5811, "step": 3051 }, { "epoch": 0.6256662566625666, "grad_norm": 0.4538076629443115, "learning_rate": 6.493624448072457e-05, "loss": 1.5516, "step": 3052 }, { "epoch": 0.6258712587125871, "grad_norm": 0.43887531584056944, "learning_rate": 6.487406373929982e-05, "loss": 1.6212, "step": 3053 }, { "epoch": 0.6260762607626076, "grad_norm": 0.40300359557326754, "learning_rate": 6.48118984868167e-05, "loss": 1.5393, "step": 3054 }, { "epoch": 0.6262812628126281, "grad_norm": 0.46929896086456635, "learning_rate": 6.474974875068721e-05, "loss": 1.5414, "step": 3055 }, { "epoch": 0.6264862648626486, "grad_norm": 0.4600063367614203, "learning_rate": 6.468761455831656e-05, "loss": 1.5997, "step": 3056 }, { "epoch": 0.6266912669126691, "grad_norm": 0.43472745047654127, "learning_rate": 6.462549593710316e-05, "loss": 1.5287, "step": 3057 }, { "epoch": 0.6268962689626896, "grad_norm": 0.47110124986521373, "learning_rate": 6.456339291443845e-05, "loss": 1.5291, "step": 3058 }, { "epoch": 0.6271012710127102, "grad_norm": 0.4340888098912825, "learning_rate": 6.450130551770706e-05, "loss": 1.5416, "step": 3059 }, { "epoch": 0.6273062730627307, "grad_norm": 0.4541200900506299, "learning_rate": 6.443923377428672e-05, "loss": 1.5921, "step": 3060 }, { "epoch": 0.6275112751127512, "grad_norm": 0.4133637532071334, "learning_rate": 6.43771777115482e-05, "loss": 1.5532, "step": 3061 }, { "epoch": 0.6277162771627717, "grad_norm": 0.4112910106629686, "learning_rate": 6.431513735685543e-05, "loss": 1.5015, "step": 3062 }, { "epoch": 0.6279212792127922, "grad_norm": 0.4435137536551326, "learning_rate": 6.425311273756543e-05, "loss": 1.5479, "step": 3063 }, { "epoch": 0.6281262812628127, "grad_norm": 0.4787810811384867, "learning_rate": 6.419110388102818e-05, "loss": 1.5751, "step": 3064 }, { "epoch": 0.6283312833128332, "grad_norm": 0.4617157378231925, "learning_rate": 6.41291108145868e-05, "loss": 1.5023, "step": 3065 }, { "epoch": 0.6285362853628537, "grad_norm": 0.4203353332527203, "learning_rate": 6.406713356557739e-05, "loss": 1.4986, "step": 3066 }, { "epoch": 0.6287412874128742, "grad_norm": 0.43506829203387276, "learning_rate": 6.400517216132909e-05, "loss": 1.5543, "step": 3067 }, { "epoch": 0.6289462894628947, "grad_norm": 0.412736930042528, "learning_rate": 6.394322662916415e-05, "loss": 1.5562, "step": 3068 }, { "epoch": 0.6291512915129152, "grad_norm": 0.44163430888022454, "learning_rate": 6.388129699639762e-05, "loss": 1.606, "step": 3069 }, { "epoch": 0.6293562935629357, "grad_norm": 0.48693563184484756, "learning_rate": 6.381938329033775e-05, "loss": 1.5168, "step": 3070 }, { "epoch": 0.6295612956129562, "grad_norm": 0.4685244259782071, "learning_rate": 6.37574855382856e-05, "loss": 1.572, "step": 3071 }, { "epoch": 0.6297662976629766, "grad_norm": 0.43973339157362423, "learning_rate": 6.369560376753527e-05, "loss": 1.5996, "step": 3072 }, { "epoch": 0.6299712997129971, "grad_norm": 0.4196159961602663, "learning_rate": 6.363373800537387e-05, "loss": 1.5748, "step": 3073 }, { "epoch": 0.6301763017630176, "grad_norm": 0.5040263111844968, "learning_rate": 6.357188827908133e-05, "loss": 1.6471, "step": 3074 }, { "epoch": 0.6303813038130381, "grad_norm": 0.4638902429714762, "learning_rate": 6.351005461593063e-05, "loss": 1.5586, "step": 3075 }, { "epoch": 0.6305863058630586, "grad_norm": 0.4529245288933407, "learning_rate": 6.344823704318752e-05, "loss": 1.5208, "step": 3076 }, { "epoch": 0.6307913079130791, "grad_norm": 0.46428297219333026, "learning_rate": 6.338643558811082e-05, "loss": 1.5182, "step": 3077 }, { "epoch": 0.6309963099630996, "grad_norm": 0.4213468150289308, "learning_rate": 6.332465027795208e-05, "loss": 1.5517, "step": 3078 }, { "epoch": 0.6312013120131201, "grad_norm": 0.4111582335445511, "learning_rate": 6.326288113995589e-05, "loss": 1.4768, "step": 3079 }, { "epoch": 0.6314063140631406, "grad_norm": 0.4050386293384675, "learning_rate": 6.320112820135961e-05, "loss": 1.5074, "step": 3080 }, { "epoch": 0.6316113161131611, "grad_norm": 0.45694246954957957, "learning_rate": 6.313939148939347e-05, "loss": 1.6012, "step": 3081 }, { "epoch": 0.6318163181631816, "grad_norm": 0.476477422781459, "learning_rate": 6.307767103128057e-05, "loss": 1.632, "step": 3082 }, { "epoch": 0.6320213202132021, "grad_norm": 0.42829792876761863, "learning_rate": 6.301596685423679e-05, "loss": 1.5791, "step": 3083 }, { "epoch": 0.6322263222632226, "grad_norm": 0.4468566056534431, "learning_rate": 6.295427898547091e-05, "loss": 1.5671, "step": 3084 }, { "epoch": 0.6324313243132431, "grad_norm": 0.4608232611540185, "learning_rate": 6.289260745218447e-05, "loss": 1.6174, "step": 3085 }, { "epoch": 0.6326363263632636, "grad_norm": 0.4296584707129513, "learning_rate": 6.283095228157179e-05, "loss": 1.511, "step": 3086 }, { "epoch": 0.6328413284132841, "grad_norm": 0.37268380637691445, "learning_rate": 6.276931350082003e-05, "loss": 1.4874, "step": 3087 }, { "epoch": 0.6330463304633046, "grad_norm": 0.523482712003226, "learning_rate": 6.270769113710903e-05, "loss": 1.6072, "step": 3088 }, { "epoch": 0.6332513325133251, "grad_norm": 0.472150025473708, "learning_rate": 6.264608521761153e-05, "loss": 1.5999, "step": 3089 }, { "epoch": 0.6334563345633456, "grad_norm": 0.4744383769578823, "learning_rate": 6.258449576949292e-05, "loss": 1.52, "step": 3090 }, { "epoch": 0.6336613366133661, "grad_norm": 0.45943524836929384, "learning_rate": 6.252292281991133e-05, "loss": 1.5093, "step": 3091 }, { "epoch": 0.6338663386633866, "grad_norm": 0.463725695131211, "learning_rate": 6.246136639601764e-05, "loss": 1.5473, "step": 3092 }, { "epoch": 0.6340713407134071, "grad_norm": 0.46094504465199243, "learning_rate": 6.23998265249554e-05, "loss": 1.5483, "step": 3093 }, { "epoch": 0.6342763427634276, "grad_norm": 0.48481260909743107, "learning_rate": 6.233830323386091e-05, "loss": 1.5721, "step": 3094 }, { "epoch": 0.6344813448134481, "grad_norm": 0.43536682799752646, "learning_rate": 6.227679654986323e-05, "loss": 1.5682, "step": 3095 }, { "epoch": 0.6346863468634686, "grad_norm": 0.4807136300208779, "learning_rate": 6.221530650008391e-05, "loss": 1.5898, "step": 3096 }, { "epoch": 0.6348913489134891, "grad_norm": 0.45622830239550205, "learning_rate": 6.215383311163733e-05, "loss": 1.578, "step": 3097 }, { "epoch": 0.6350963509635096, "grad_norm": 0.4196356993103045, "learning_rate": 6.209237641163041e-05, "loss": 1.5267, "step": 3098 }, { "epoch": 0.6353013530135302, "grad_norm": 0.42762962872414567, "learning_rate": 6.203093642716278e-05, "loss": 1.4724, "step": 3099 }, { "epoch": 0.6355063550635507, "grad_norm": 0.48169889092815393, "learning_rate": 6.196951318532672e-05, "loss": 1.6326, "step": 3100 }, { "epoch": 0.6357113571135712, "grad_norm": 0.3938422042991795, "learning_rate": 6.190810671320704e-05, "loss": 1.5175, "step": 3101 }, { "epoch": 0.6359163591635917, "grad_norm": 0.40589681316749804, "learning_rate": 6.184671703788124e-05, "loss": 1.5179, "step": 3102 }, { "epoch": 0.6361213612136122, "grad_norm": 0.5312270393941052, "learning_rate": 6.178534418641932e-05, "loss": 1.5355, "step": 3103 }, { "epoch": 0.6363263632636327, "grad_norm": 0.40999673496993094, "learning_rate": 6.172398818588394e-05, "loss": 1.5355, "step": 3104 }, { "epoch": 0.6365313653136532, "grad_norm": 0.45408557733919874, "learning_rate": 6.166264906333038e-05, "loss": 1.5046, "step": 3105 }, { "epoch": 0.6367363673636737, "grad_norm": 0.40139532139574635, "learning_rate": 6.160132684580632e-05, "loss": 1.4976, "step": 3106 }, { "epoch": 0.6369413694136942, "grad_norm": 0.4391190505685638, "learning_rate": 6.154002156035212e-05, "loss": 1.5184, "step": 3107 }, { "epoch": 0.6371463714637147, "grad_norm": 0.42102805174258645, "learning_rate": 6.147873323400057e-05, "loss": 1.5964, "step": 3108 }, { "epoch": 0.6373513735137352, "grad_norm": 0.47359380008561136, "learning_rate": 6.14174618937771e-05, "loss": 1.5208, "step": 3109 }, { "epoch": 0.6375563755637557, "grad_norm": 0.4229507042072085, "learning_rate": 6.135620756669953e-05, "loss": 1.5073, "step": 3110 }, { "epoch": 0.6377613776137762, "grad_norm": 0.41421593544702223, "learning_rate": 6.129497027977829e-05, "loss": 1.5434, "step": 3111 }, { "epoch": 0.6379663796637967, "grad_norm": 0.4652491878822899, "learning_rate": 6.123375006001621e-05, "loss": 1.5935, "step": 3112 }, { "epoch": 0.6381713817138172, "grad_norm": 0.4805623615374848, "learning_rate": 6.117254693440864e-05, "loss": 1.6429, "step": 3113 }, { "epoch": 0.6383763837638377, "grad_norm": 0.44277759439079867, "learning_rate": 6.111136092994334e-05, "loss": 1.5765, "step": 3114 }, { "epoch": 0.6385813858138581, "grad_norm": 0.44702645540004915, "learning_rate": 6.105019207360056e-05, "loss": 1.5363, "step": 3115 }, { "epoch": 0.6387863878638786, "grad_norm": 0.4859048689662486, "learning_rate": 6.0989040392353045e-05, "loss": 1.5809, "step": 3116 }, { "epoch": 0.6389913899138991, "grad_norm": 0.45622043939948925, "learning_rate": 6.092790591316586e-05, "loss": 1.55, "step": 3117 }, { "epoch": 0.6391963919639196, "grad_norm": 0.45598645945246613, "learning_rate": 6.0866788662996566e-05, "loss": 1.5495, "step": 3118 }, { "epoch": 0.6394013940139401, "grad_norm": 0.48336684790519213, "learning_rate": 6.080568866879504e-05, "loss": 1.502, "step": 3119 }, { "epoch": 0.6396063960639606, "grad_norm": 0.4475737666285707, "learning_rate": 6.074460595750362e-05, "loss": 1.5202, "step": 3120 }, { "epoch": 0.6398113981139811, "grad_norm": 0.439372112039752, "learning_rate": 6.068354055605705e-05, "loss": 1.5591, "step": 3121 }, { "epoch": 0.6400164001640016, "grad_norm": 0.4896458467327331, "learning_rate": 6.0622492491382355e-05, "loss": 1.5066, "step": 3122 }, { "epoch": 0.6402214022140221, "grad_norm": 0.4758426880877212, "learning_rate": 6.056146179039899e-05, "loss": 1.5882, "step": 3123 }, { "epoch": 0.6404264042640426, "grad_norm": 0.42875447883323625, "learning_rate": 6.050044848001866e-05, "loss": 1.5841, "step": 3124 }, { "epoch": 0.6406314063140631, "grad_norm": 0.44377805630181605, "learning_rate": 6.043945258714553e-05, "loss": 1.5293, "step": 3125 }, { "epoch": 0.6408364083640836, "grad_norm": 0.4687476067771096, "learning_rate": 6.037847413867594e-05, "loss": 1.5161, "step": 3126 }, { "epoch": 0.6410414104141041, "grad_norm": 0.48434801119612425, "learning_rate": 6.03175131614987e-05, "loss": 1.5752, "step": 3127 }, { "epoch": 0.6412464124641246, "grad_norm": 0.44642566908429804, "learning_rate": 6.025656968249479e-05, "loss": 1.5015, "step": 3128 }, { "epoch": 0.6414514145141451, "grad_norm": 0.4948960693915527, "learning_rate": 6.01956437285375e-05, "loss": 1.5151, "step": 3129 }, { "epoch": 0.6416564165641656, "grad_norm": 0.44075415990186484, "learning_rate": 6.0134735326492456e-05, "loss": 1.4731, "step": 3130 }, { "epoch": 0.6418614186141861, "grad_norm": 0.43010664451961794, "learning_rate": 6.0073844503217416e-05, "loss": 1.4747, "step": 3131 }, { "epoch": 0.6420664206642066, "grad_norm": 0.4980942805847485, "learning_rate": 6.001297128556254e-05, "loss": 1.6157, "step": 3132 }, { "epoch": 0.6422714227142271, "grad_norm": 0.4839110453278517, "learning_rate": 5.995211570037013e-05, "loss": 1.5369, "step": 3133 }, { "epoch": 0.6424764247642476, "grad_norm": 0.48502879785065045, "learning_rate": 5.9891277774474706e-05, "loss": 1.5872, "step": 3134 }, { "epoch": 0.6426814268142681, "grad_norm": 0.46185359359460143, "learning_rate": 5.983045753470308e-05, "loss": 1.5725, "step": 3135 }, { "epoch": 0.6428864288642886, "grad_norm": 0.431272990971999, "learning_rate": 5.9769655007874135e-05, "loss": 1.5558, "step": 3136 }, { "epoch": 0.6430914309143091, "grad_norm": 0.45513265309122675, "learning_rate": 5.97088702207991e-05, "loss": 1.6232, "step": 3137 }, { "epoch": 0.6432964329643296, "grad_norm": 0.46927313281518557, "learning_rate": 5.964810320028129e-05, "loss": 1.5666, "step": 3138 }, { "epoch": 0.6435014350143502, "grad_norm": 0.4662299114881269, "learning_rate": 5.958735397311617e-05, "loss": 1.5095, "step": 3139 }, { "epoch": 0.6437064370643707, "grad_norm": 0.47693210481377235, "learning_rate": 5.9526622566091404e-05, "loss": 1.5459, "step": 3140 }, { "epoch": 0.6439114391143912, "grad_norm": 0.4575681722182784, "learning_rate": 5.946590900598676e-05, "loss": 1.5084, "step": 3141 }, { "epoch": 0.6441164411644117, "grad_norm": 0.42513599949011427, "learning_rate": 5.940521331957418e-05, "loss": 1.5618, "step": 3142 }, { "epoch": 0.6443214432144322, "grad_norm": 0.4498039731205022, "learning_rate": 5.934453553361774e-05, "loss": 1.552, "step": 3143 }, { "epoch": 0.6445264452644527, "grad_norm": 0.4746241405188141, "learning_rate": 5.928387567487352e-05, "loss": 1.5377, "step": 3144 }, { "epoch": 0.6447314473144732, "grad_norm": 0.46130790069385874, "learning_rate": 5.9223233770089805e-05, "loss": 1.5467, "step": 3145 }, { "epoch": 0.6449364493644937, "grad_norm": 0.4683276467710355, "learning_rate": 5.91626098460069e-05, "loss": 1.5173, "step": 3146 }, { "epoch": 0.6451414514145142, "grad_norm": 0.4583855578057574, "learning_rate": 5.9102003929357176e-05, "loss": 1.5856, "step": 3147 }, { "epoch": 0.6453464534645347, "grad_norm": 0.4573042182018581, "learning_rate": 5.904141604686515e-05, "loss": 1.4664, "step": 3148 }, { "epoch": 0.6455514555145552, "grad_norm": 0.417714253556272, "learning_rate": 5.8980846225247286e-05, "loss": 1.4652, "step": 3149 }, { "epoch": 0.6457564575645757, "grad_norm": 0.4687947482213487, "learning_rate": 5.8920294491212135e-05, "loss": 1.5532, "step": 3150 }, { "epoch": 0.6459614596145962, "grad_norm": 0.47899408800382387, "learning_rate": 5.885976087146023e-05, "loss": 1.6239, "step": 3151 }, { "epoch": 0.6461664616646167, "grad_norm": 0.47064112213968223, "learning_rate": 5.879924539268421e-05, "loss": 1.513, "step": 3152 }, { "epoch": 0.6463714637146372, "grad_norm": 0.4678637247192265, "learning_rate": 5.873874808156856e-05, "loss": 1.5423, "step": 3153 }, { "epoch": 0.6465764657646577, "grad_norm": 0.4546401482607076, "learning_rate": 5.8678268964789917e-05, "loss": 1.5566, "step": 3154 }, { "epoch": 0.6467814678146782, "grad_norm": 0.46931630190362994, "learning_rate": 5.861780806901682e-05, "loss": 1.5479, "step": 3155 }, { "epoch": 0.6469864698646987, "grad_norm": 0.4940517346671692, "learning_rate": 5.855736542090973e-05, "loss": 1.5689, "step": 3156 }, { "epoch": 0.6471914719147192, "grad_norm": 0.4275033835012508, "learning_rate": 5.8496941047121166e-05, "loss": 1.5947, "step": 3157 }, { "epoch": 0.6473964739647396, "grad_norm": 0.4676661954109531, "learning_rate": 5.843653497429546e-05, "loss": 1.5834, "step": 3158 }, { "epoch": 0.6476014760147601, "grad_norm": 0.4585392099430612, "learning_rate": 5.8376147229069e-05, "loss": 1.5086, "step": 3159 }, { "epoch": 0.6478064780647806, "grad_norm": 0.4370536235935077, "learning_rate": 5.831577783807005e-05, "loss": 1.6054, "step": 3160 }, { "epoch": 0.6480114801148011, "grad_norm": 0.4869207263266161, "learning_rate": 5.8255426827918736e-05, "loss": 1.6038, "step": 3161 }, { "epoch": 0.6482164821648216, "grad_norm": 0.44697840626255725, "learning_rate": 5.819509422522711e-05, "loss": 1.5771, "step": 3162 }, { "epoch": 0.6484214842148421, "grad_norm": 0.4372204434607352, "learning_rate": 5.813478005659905e-05, "loss": 1.5428, "step": 3163 }, { "epoch": 0.6486264862648626, "grad_norm": 0.44608074032163564, "learning_rate": 5.80744843486305e-05, "loss": 1.5268, "step": 3164 }, { "epoch": 0.6488314883148831, "grad_norm": 0.482804218009497, "learning_rate": 5.8014207127909046e-05, "loss": 1.5367, "step": 3165 }, { "epoch": 0.6490364903649036, "grad_norm": 0.44096838777295444, "learning_rate": 5.795394842101423e-05, "loss": 1.5316, "step": 3166 }, { "epoch": 0.6492414924149241, "grad_norm": 0.4031261525943413, "learning_rate": 5.789370825451737e-05, "loss": 1.474, "step": 3167 }, { "epoch": 0.6494464944649446, "grad_norm": 0.4685714831295918, "learning_rate": 5.7833486654981606e-05, "loss": 1.5835, "step": 3168 }, { "epoch": 0.6496514965149651, "grad_norm": 0.4502305900481912, "learning_rate": 5.7773283648961995e-05, "loss": 1.5674, "step": 3169 }, { "epoch": 0.6498564985649856, "grad_norm": 0.47007172799125435, "learning_rate": 5.771309926300534e-05, "loss": 1.5817, "step": 3170 }, { "epoch": 0.6500615006150061, "grad_norm": 0.4636312018459254, "learning_rate": 5.7652933523650197e-05, "loss": 1.5721, "step": 3171 }, { "epoch": 0.6502665026650266, "grad_norm": 0.46894903589726933, "learning_rate": 5.759278645742692e-05, "loss": 1.5265, "step": 3172 }, { "epoch": 0.6504715047150471, "grad_norm": 0.39333860872717535, "learning_rate": 5.753265809085757e-05, "loss": 1.5062, "step": 3173 }, { "epoch": 0.6506765067650676, "grad_norm": 0.4329914069624682, "learning_rate": 5.7472548450456086e-05, "loss": 1.5056, "step": 3174 }, { "epoch": 0.6508815088150881, "grad_norm": 0.4356872910419979, "learning_rate": 5.741245756272813e-05, "loss": 1.5391, "step": 3175 }, { "epoch": 0.6510865108651086, "grad_norm": 0.4015684419558019, "learning_rate": 5.735238545417101e-05, "loss": 1.519, "step": 3176 }, { "epoch": 0.6512915129151291, "grad_norm": 0.5035392397900486, "learning_rate": 5.729233215127378e-05, "loss": 1.6376, "step": 3177 }, { "epoch": 0.6514965149651496, "grad_norm": 0.45269166269271116, "learning_rate": 5.723229768051719e-05, "loss": 1.6163, "step": 3178 }, { "epoch": 0.6517015170151702, "grad_norm": 0.4479504049392582, "learning_rate": 5.717228206837375e-05, "loss": 1.4901, "step": 3179 }, { "epoch": 0.6519065190651907, "grad_norm": 0.47281687678925094, "learning_rate": 5.711228534130766e-05, "loss": 1.5553, "step": 3180 }, { "epoch": 0.6521115211152112, "grad_norm": 0.44998228518805594, "learning_rate": 5.7052307525774704e-05, "loss": 1.6461, "step": 3181 }, { "epoch": 0.6523165231652317, "grad_norm": 0.43728922449546276, "learning_rate": 5.699234864822239e-05, "loss": 1.5376, "step": 3182 }, { "epoch": 0.6525215252152522, "grad_norm": 0.4618517835386227, "learning_rate": 5.6932408735089804e-05, "loss": 1.5749, "step": 3183 }, { "epoch": 0.6527265272652727, "grad_norm": 0.47972418886107165, "learning_rate": 5.687248781280781e-05, "loss": 1.5501, "step": 3184 }, { "epoch": 0.6529315293152932, "grad_norm": 0.49097487992129674, "learning_rate": 5.681258590779872e-05, "loss": 1.5995, "step": 3185 }, { "epoch": 0.6531365313653137, "grad_norm": 0.4101905104609192, "learning_rate": 5.675270304647664e-05, "loss": 1.5156, "step": 3186 }, { "epoch": 0.6533415334153342, "grad_norm": 0.4578466938795278, "learning_rate": 5.669283925524715e-05, "loss": 1.5198, "step": 3187 }, { "epoch": 0.6535465354653547, "grad_norm": 0.4252736251730851, "learning_rate": 5.663299456050743e-05, "loss": 1.525, "step": 3188 }, { "epoch": 0.6537515375153752, "grad_norm": 0.4693685063599757, "learning_rate": 5.657316898864634e-05, "loss": 1.543, "step": 3189 }, { "epoch": 0.6539565395653957, "grad_norm": 0.4534799718928922, "learning_rate": 5.6513362566044156e-05, "loss": 1.5692, "step": 3190 }, { "epoch": 0.6541615416154162, "grad_norm": 0.5014795228701405, "learning_rate": 5.645357531907288e-05, "loss": 1.5632, "step": 3191 }, { "epoch": 0.6543665436654367, "grad_norm": 0.47029548478402916, "learning_rate": 5.639380727409593e-05, "loss": 1.5434, "step": 3192 }, { "epoch": 0.6545715457154572, "grad_norm": 0.4814289997456462, "learning_rate": 5.633405845746826e-05, "loss": 1.5905, "step": 3193 }, { "epoch": 0.6547765477654777, "grad_norm": 0.44070980057757314, "learning_rate": 5.6274328895536453e-05, "loss": 1.4297, "step": 3194 }, { "epoch": 0.6549815498154982, "grad_norm": 0.45404925459138346, "learning_rate": 5.621461861463846e-05, "loss": 1.5642, "step": 3195 }, { "epoch": 0.6551865518655187, "grad_norm": 0.4681611431930925, "learning_rate": 5.615492764110388e-05, "loss": 1.5062, "step": 3196 }, { "epoch": 0.6553915539155392, "grad_norm": 0.4803046238257875, "learning_rate": 5.6095256001253674e-05, "loss": 1.5625, "step": 3197 }, { "epoch": 0.6555965559655597, "grad_norm": 0.43861031958124674, "learning_rate": 5.6035603721400286e-05, "loss": 1.5785, "step": 3198 }, { "epoch": 0.6558015580155802, "grad_norm": 0.4450815930691942, "learning_rate": 5.597597082784776e-05, "loss": 1.5822, "step": 3199 }, { "epoch": 0.6560065600656007, "grad_norm": 0.42804738567236916, "learning_rate": 5.59163573468914e-05, "loss": 1.576, "step": 3200 }, { "epoch": 0.6562115621156211, "grad_norm": 0.4095625581118568, "learning_rate": 5.585676330481806e-05, "loss": 1.5105, "step": 3201 }, { "epoch": 0.6564165641656416, "grad_norm": 0.4052058646869245, "learning_rate": 5.5797188727906066e-05, "loss": 1.5147, "step": 3202 }, { "epoch": 0.6566215662156621, "grad_norm": 0.44528867818643275, "learning_rate": 5.5737633642425e-05, "loss": 1.5739, "step": 3203 }, { "epoch": 0.6568265682656826, "grad_norm": 0.45313991909875634, "learning_rate": 5.567809807463606e-05, "loss": 1.583, "step": 3204 }, { "epoch": 0.6570315703157031, "grad_norm": 0.4707462858715875, "learning_rate": 5.561858205079165e-05, "loss": 1.5937, "step": 3205 }, { "epoch": 0.6572365723657236, "grad_norm": 0.47426278351042955, "learning_rate": 5.555908559713561e-05, "loss": 1.6182, "step": 3206 }, { "epoch": 0.6574415744157441, "grad_norm": 0.4475531963539952, "learning_rate": 5.549960873990325e-05, "loss": 1.5851, "step": 3207 }, { "epoch": 0.6576465764657646, "grad_norm": 0.40005703959252115, "learning_rate": 5.544015150532109e-05, "loss": 1.5213, "step": 3208 }, { "epoch": 0.6578515785157851, "grad_norm": 0.43718927586236406, "learning_rate": 5.538071391960715e-05, "loss": 1.5161, "step": 3209 }, { "epoch": 0.6580565805658056, "grad_norm": 0.4504802482071545, "learning_rate": 5.5321296008970646e-05, "loss": 1.5618, "step": 3210 }, { "epoch": 0.6582615826158261, "grad_norm": 0.43689191329818405, "learning_rate": 5.526189779961215e-05, "loss": 1.5378, "step": 3211 }, { "epoch": 0.6584665846658466, "grad_norm": 0.4447015078021641, "learning_rate": 5.520251931772364e-05, "loss": 1.5398, "step": 3212 }, { "epoch": 0.6586715867158671, "grad_norm": 0.4398048376276036, "learning_rate": 5.514316058948827e-05, "loss": 1.5153, "step": 3213 }, { "epoch": 0.6588765887658876, "grad_norm": 0.4308897243836838, "learning_rate": 5.508382164108059e-05, "loss": 1.5565, "step": 3214 }, { "epoch": 0.6590815908159081, "grad_norm": 0.4270836856207494, "learning_rate": 5.5024502498666375e-05, "loss": 1.5176, "step": 3215 }, { "epoch": 0.6592865928659286, "grad_norm": 0.45852784704097116, "learning_rate": 5.496520318840266e-05, "loss": 1.594, "step": 3216 }, { "epoch": 0.6594915949159491, "grad_norm": 0.48635835007725897, "learning_rate": 5.490592373643768e-05, "loss": 1.5736, "step": 3217 }, { "epoch": 0.6596965969659696, "grad_norm": 0.4684679165980292, "learning_rate": 5.484666416891109e-05, "loss": 1.5488, "step": 3218 }, { "epoch": 0.6599015990159902, "grad_norm": 0.4344646996812627, "learning_rate": 5.478742451195358e-05, "loss": 1.5833, "step": 3219 }, { "epoch": 0.6601066010660107, "grad_norm": 0.44405561678189354, "learning_rate": 5.472820479168721e-05, "loss": 1.484, "step": 3220 }, { "epoch": 0.6603116031160312, "grad_norm": 0.4440539122831839, "learning_rate": 5.466900503422516e-05, "loss": 1.4991, "step": 3221 }, { "epoch": 0.6605166051660517, "grad_norm": 0.44579948169081185, "learning_rate": 5.46098252656718e-05, "loss": 1.5371, "step": 3222 }, { "epoch": 0.6607216072160722, "grad_norm": 0.4639735878052296, "learning_rate": 5.455066551212278e-05, "loss": 1.5072, "step": 3223 }, { "epoch": 0.6609266092660927, "grad_norm": 0.4371141112010233, "learning_rate": 5.44915257996648e-05, "loss": 1.5633, "step": 3224 }, { "epoch": 0.6611316113161132, "grad_norm": 0.4671515981857899, "learning_rate": 5.443240615437586e-05, "loss": 1.6421, "step": 3225 }, { "epoch": 0.6613366133661337, "grad_norm": 0.45830368772484725, "learning_rate": 5.437330660232498e-05, "loss": 1.493, "step": 3226 }, { "epoch": 0.6615416154161542, "grad_norm": 0.423427476354327, "learning_rate": 5.431422716957236e-05, "loss": 1.4763, "step": 3227 }, { "epoch": 0.6617466174661747, "grad_norm": 0.43795866049945825, "learning_rate": 5.4255167882169424e-05, "loss": 1.5489, "step": 3228 }, { "epoch": 0.6619516195161952, "grad_norm": 0.45181197556742075, "learning_rate": 5.419612876615854e-05, "loss": 1.5402, "step": 3229 }, { "epoch": 0.6621566215662157, "grad_norm": 0.4701909936533608, "learning_rate": 5.413710984757335e-05, "loss": 1.5774, "step": 3230 }, { "epoch": 0.6623616236162362, "grad_norm": 0.44082413536912224, "learning_rate": 5.407811115243849e-05, "loss": 1.5174, "step": 3231 }, { "epoch": 0.6625666256662567, "grad_norm": 0.5038142283139778, "learning_rate": 5.4019132706769706e-05, "loss": 1.5697, "step": 3232 }, { "epoch": 0.6627716277162772, "grad_norm": 0.4519124230988949, "learning_rate": 5.396017453657376e-05, "loss": 1.6061, "step": 3233 }, { "epoch": 0.6629766297662977, "grad_norm": 0.4543715581578266, "learning_rate": 5.3901236667848586e-05, "loss": 1.4984, "step": 3234 }, { "epoch": 0.6631816318163182, "grad_norm": 0.5382084445769868, "learning_rate": 5.384231912658311e-05, "loss": 1.6021, "step": 3235 }, { "epoch": 0.6633866338663387, "grad_norm": 0.4543702242725902, "learning_rate": 5.3783421938757286e-05, "loss": 1.5822, "step": 3236 }, { "epoch": 0.6635916359163592, "grad_norm": 0.43404116282001404, "learning_rate": 5.3724545130342074e-05, "loss": 1.5109, "step": 3237 }, { "epoch": 0.6637966379663797, "grad_norm": 0.49184910316176467, "learning_rate": 5.3665688727299444e-05, "loss": 1.4924, "step": 3238 }, { "epoch": 0.6640016400164002, "grad_norm": 0.45457346271665605, "learning_rate": 5.360685275558244e-05, "loss": 1.5243, "step": 3239 }, { "epoch": 0.6642066420664207, "grad_norm": 0.45648174844956557, "learning_rate": 5.3548037241135065e-05, "loss": 1.5364, "step": 3240 }, { "epoch": 0.6644116441164412, "grad_norm": 0.4304824196228037, "learning_rate": 5.348924220989227e-05, "loss": 1.5507, "step": 3241 }, { "epoch": 0.6646166461664617, "grad_norm": 0.4132082078195509, "learning_rate": 5.3430467687779985e-05, "loss": 1.518, "step": 3242 }, { "epoch": 0.6648216482164822, "grad_norm": 0.44485408074420546, "learning_rate": 5.337171370071508e-05, "loss": 1.5063, "step": 3243 }, { "epoch": 0.6650266502665027, "grad_norm": 0.4610816590170921, "learning_rate": 5.331298027460539e-05, "loss": 1.49, "step": 3244 }, { "epoch": 0.6652316523165231, "grad_norm": 0.45830666473374365, "learning_rate": 5.325426743534978e-05, "loss": 1.6203, "step": 3245 }, { "epoch": 0.6654366543665436, "grad_norm": 0.4364399927710269, "learning_rate": 5.3195575208837865e-05, "loss": 1.5124, "step": 3246 }, { "epoch": 0.6656416564165641, "grad_norm": 0.44074298374785986, "learning_rate": 5.3136903620950276e-05, "loss": 1.5157, "step": 3247 }, { "epoch": 0.6658466584665846, "grad_norm": 0.48338441506509955, "learning_rate": 5.3078252697558464e-05, "loss": 1.5949, "step": 3248 }, { "epoch": 0.6660516605166051, "grad_norm": 0.42034387716725086, "learning_rate": 5.301962246452485e-05, "loss": 1.5054, "step": 3249 }, { "epoch": 0.6662566625666256, "grad_norm": 0.45845655682944, "learning_rate": 5.296101294770276e-05, "loss": 1.4709, "step": 3250 }, { "epoch": 0.6664616646166461, "grad_norm": 0.48613464354798214, "learning_rate": 5.290242417293628e-05, "loss": 1.5544, "step": 3251 }, { "epoch": 0.6666666666666666, "grad_norm": 0.46059748913873183, "learning_rate": 5.28438561660604e-05, "loss": 1.5271, "step": 3252 }, { "epoch": 0.6668716687166871, "grad_norm": 0.4780978020855852, "learning_rate": 5.278530895290091e-05, "loss": 1.5691, "step": 3253 }, { "epoch": 0.6670766707667076, "grad_norm": 0.4893768677333458, "learning_rate": 5.27267825592745e-05, "loss": 1.5178, "step": 3254 }, { "epoch": 0.6672816728167281, "grad_norm": 0.46629887929368835, "learning_rate": 5.266827701098871e-05, "loss": 1.5838, "step": 3255 }, { "epoch": 0.6674866748667486, "grad_norm": 0.4871967092805627, "learning_rate": 5.260979233384178e-05, "loss": 1.5383, "step": 3256 }, { "epoch": 0.6676916769167691, "grad_norm": 0.41135390415159157, "learning_rate": 5.255132855362277e-05, "loss": 1.5044, "step": 3257 }, { "epoch": 0.6678966789667896, "grad_norm": 0.40528359300517297, "learning_rate": 5.249288569611155e-05, "loss": 1.4979, "step": 3258 }, { "epoch": 0.6681016810168102, "grad_norm": 0.4689360016431024, "learning_rate": 5.2434463787078816e-05, "loss": 1.5467, "step": 3259 }, { "epoch": 0.6683066830668307, "grad_norm": 0.46261930828341913, "learning_rate": 5.237606285228591e-05, "loss": 1.5157, "step": 3260 }, { "epoch": 0.6685116851168512, "grad_norm": 0.4159058853386433, "learning_rate": 5.2317682917485055e-05, "loss": 1.544, "step": 3261 }, { "epoch": 0.6687166871668717, "grad_norm": 0.4564289006751174, "learning_rate": 5.2259324008419116e-05, "loss": 1.5863, "step": 3262 }, { "epoch": 0.6689216892168922, "grad_norm": 0.389663625442729, "learning_rate": 5.2200986150821696e-05, "loss": 1.5405, "step": 3263 }, { "epoch": 0.6691266912669127, "grad_norm": 0.4177727704252784, "learning_rate": 5.2142669370417205e-05, "loss": 1.5592, "step": 3264 }, { "epoch": 0.6693316933169332, "grad_norm": 0.41924533000753, "learning_rate": 5.208437369292061e-05, "loss": 1.5364, "step": 3265 }, { "epoch": 0.6695366953669537, "grad_norm": 0.41375648844573704, "learning_rate": 5.202609914403773e-05, "loss": 1.5578, "step": 3266 }, { "epoch": 0.6697416974169742, "grad_norm": 0.4499001075657649, "learning_rate": 5.196784574946496e-05, "loss": 1.5027, "step": 3267 }, { "epoch": 0.6699466994669947, "grad_norm": 0.44061258187447844, "learning_rate": 5.190961353488941e-05, "loss": 1.4833, "step": 3268 }, { "epoch": 0.6701517015170152, "grad_norm": 0.4564597456267822, "learning_rate": 5.18514025259888e-05, "loss": 1.5635, "step": 3269 }, { "epoch": 0.6703567035670357, "grad_norm": 0.4876368927800723, "learning_rate": 5.179321274843156e-05, "loss": 1.6083, "step": 3270 }, { "epoch": 0.6705617056170562, "grad_norm": 0.41844214114797085, "learning_rate": 5.173504422787679e-05, "loss": 1.5222, "step": 3271 }, { "epoch": 0.6707667076670767, "grad_norm": 0.39595399126139963, "learning_rate": 5.167689698997413e-05, "loss": 1.4209, "step": 3272 }, { "epoch": 0.6709717097170972, "grad_norm": 0.4353923004952022, "learning_rate": 5.161877106036386e-05, "loss": 1.5611, "step": 3273 }, { "epoch": 0.6711767117671177, "grad_norm": 0.48766291706185577, "learning_rate": 5.156066646467683e-05, "loss": 1.5934, "step": 3274 }, { "epoch": 0.6713817138171382, "grad_norm": 0.41107440161040765, "learning_rate": 5.150258322853461e-05, "loss": 1.5048, "step": 3275 }, { "epoch": 0.6715867158671587, "grad_norm": 0.4089277441236762, "learning_rate": 5.1444521377549204e-05, "loss": 1.4841, "step": 3276 }, { "epoch": 0.6717917179171792, "grad_norm": 0.4200621045097158, "learning_rate": 5.13864809373233e-05, "loss": 1.5143, "step": 3277 }, { "epoch": 0.6719967199671997, "grad_norm": 0.4524101612533242, "learning_rate": 5.132846193345007e-05, "loss": 1.5806, "step": 3278 }, { "epoch": 0.6722017220172202, "grad_norm": 0.4868826051583284, "learning_rate": 5.1270464391513215e-05, "loss": 1.5266, "step": 3279 }, { "epoch": 0.6724067240672407, "grad_norm": 0.4352576675761379, "learning_rate": 5.1212488337087114e-05, "loss": 1.5244, "step": 3280 }, { "epoch": 0.6726117261172612, "grad_norm": 0.47956965952267, "learning_rate": 5.115453379573647e-05, "loss": 1.5969, "step": 3281 }, { "epoch": 0.6728167281672817, "grad_norm": 0.4109248912388438, "learning_rate": 5.109660079301668e-05, "loss": 1.5469, "step": 3282 }, { "epoch": 0.6730217302173022, "grad_norm": 0.42311964836028715, "learning_rate": 5.103868935447354e-05, "loss": 1.553, "step": 3283 }, { "epoch": 0.6732267322673227, "grad_norm": 0.4251106940803215, "learning_rate": 5.098079950564332e-05, "loss": 1.5754, "step": 3284 }, { "epoch": 0.6734317343173432, "grad_norm": 0.4142761889856098, "learning_rate": 5.092293127205288e-05, "loss": 1.5349, "step": 3285 }, { "epoch": 0.6736367363673637, "grad_norm": 0.4786030878073973, "learning_rate": 5.086508467921942e-05, "loss": 1.6177, "step": 3286 }, { "epoch": 0.6738417384173842, "grad_norm": 0.4448873820802366, "learning_rate": 5.080725975265073e-05, "loss": 1.564, "step": 3287 }, { "epoch": 0.6740467404674046, "grad_norm": 0.42762099261673503, "learning_rate": 5.074945651784491e-05, "loss": 1.4838, "step": 3288 }, { "epoch": 0.6742517425174251, "grad_norm": 0.43942819574509145, "learning_rate": 5.069167500029056e-05, "loss": 1.4998, "step": 3289 }, { "epoch": 0.6744567445674456, "grad_norm": 0.4593525714765017, "learning_rate": 5.0633915225466765e-05, "loss": 1.5516, "step": 3290 }, { "epoch": 0.6746617466174661, "grad_norm": 0.46519959158845237, "learning_rate": 5.0576177218842925e-05, "loss": 1.5615, "step": 3291 }, { "epoch": 0.6748667486674866, "grad_norm": 0.44795557772312, "learning_rate": 5.051846100587882e-05, "loss": 1.5078, "step": 3292 }, { "epoch": 0.6750717507175071, "grad_norm": 0.4374852977059392, "learning_rate": 5.046076661202479e-05, "loss": 1.5441, "step": 3293 }, { "epoch": 0.6752767527675276, "grad_norm": 0.4518116152808659, "learning_rate": 5.040309406272135e-05, "loss": 1.5561, "step": 3294 }, { "epoch": 0.6754817548175481, "grad_norm": 0.45109543035592686, "learning_rate": 5.034544338339953e-05, "loss": 1.4883, "step": 3295 }, { "epoch": 0.6756867568675686, "grad_norm": 0.4595021682880069, "learning_rate": 5.028781459948062e-05, "loss": 1.5424, "step": 3296 }, { "epoch": 0.6758917589175891, "grad_norm": 0.4580091531377571, "learning_rate": 5.02302077363763e-05, "loss": 1.5855, "step": 3297 }, { "epoch": 0.6760967609676096, "grad_norm": 0.5037125658735325, "learning_rate": 5.01726228194886e-05, "loss": 1.6201, "step": 3298 }, { "epoch": 0.6763017630176302, "grad_norm": 0.4596821987836919, "learning_rate": 5.011505987420982e-05, "loss": 1.5818, "step": 3299 }, { "epoch": 0.6765067650676507, "grad_norm": 0.43253371192535567, "learning_rate": 5.005751892592265e-05, "loss": 1.5097, "step": 3300 }, { "epoch": 0.6767117671176712, "grad_norm": 0.4520097086298899, "learning_rate": 5.000000000000002e-05, "loss": 1.5738, "step": 3301 }, { "epoch": 0.6769167691676917, "grad_norm": 0.45520370211679917, "learning_rate": 4.9942503121805106e-05, "loss": 1.5482, "step": 3302 }, { "epoch": 0.6771217712177122, "grad_norm": 0.4190783952091041, "learning_rate": 4.9885028316691495e-05, "loss": 1.5249, "step": 3303 }, { "epoch": 0.6773267732677327, "grad_norm": 0.4167142270934749, "learning_rate": 4.9827575610002895e-05, "loss": 1.5026, "step": 3304 }, { "epoch": 0.6775317753177532, "grad_norm": 0.4794154988242696, "learning_rate": 4.977014502707341e-05, "loss": 1.478, "step": 3305 }, { "epoch": 0.6777367773677737, "grad_norm": 0.5122653574706574, "learning_rate": 4.9712736593227285e-05, "loss": 1.5957, "step": 3306 }, { "epoch": 0.6779417794177942, "grad_norm": 0.43751156107248174, "learning_rate": 4.9655350333779014e-05, "loss": 1.5047, "step": 3307 }, { "epoch": 0.6781467814678147, "grad_norm": 0.47794919233487926, "learning_rate": 4.9597986274033316e-05, "loss": 1.5505, "step": 3308 }, { "epoch": 0.6783517835178352, "grad_norm": 0.44200923561022265, "learning_rate": 4.9540644439285156e-05, "loss": 1.4832, "step": 3309 }, { "epoch": 0.6785567855678557, "grad_norm": 0.49638739841323437, "learning_rate": 4.9483324854819714e-05, "loss": 1.5028, "step": 3310 }, { "epoch": 0.6787617876178762, "grad_norm": 0.42288256790014744, "learning_rate": 4.942602754591229e-05, "loss": 1.4888, "step": 3311 }, { "epoch": 0.6789667896678967, "grad_norm": 0.5217752470278747, "learning_rate": 4.93687525378284e-05, "loss": 1.5406, "step": 3312 }, { "epoch": 0.6791717917179172, "grad_norm": 0.44987998197624973, "learning_rate": 4.931149985582367e-05, "loss": 1.5479, "step": 3313 }, { "epoch": 0.6793767937679377, "grad_norm": 0.45820887889094164, "learning_rate": 4.9254269525143984e-05, "loss": 1.5327, "step": 3314 }, { "epoch": 0.6795817958179582, "grad_norm": 0.4531900413901263, "learning_rate": 4.919706157102533e-05, "loss": 1.5592, "step": 3315 }, { "epoch": 0.6797867978679787, "grad_norm": 0.48091473278906766, "learning_rate": 4.9139876018693795e-05, "loss": 1.6223, "step": 3316 }, { "epoch": 0.6799917999179992, "grad_norm": 0.4563396264811171, "learning_rate": 4.908271289336561e-05, "loss": 1.5207, "step": 3317 }, { "epoch": 0.6801968019680197, "grad_norm": 0.4540193442254131, "learning_rate": 4.9025572220247076e-05, "loss": 1.53, "step": 3318 }, { "epoch": 0.6804018040180402, "grad_norm": 0.4633787206835466, "learning_rate": 4.896845402453466e-05, "loss": 1.5992, "step": 3319 }, { "epoch": 0.6806068060680607, "grad_norm": 0.4472115524336386, "learning_rate": 4.891135833141495e-05, "loss": 1.5669, "step": 3320 }, { "epoch": 0.6808118081180812, "grad_norm": 0.38442693827120805, "learning_rate": 4.8854285166064485e-05, "loss": 1.5442, "step": 3321 }, { "epoch": 0.6810168101681017, "grad_norm": 0.4214897296888067, "learning_rate": 4.879723455364996e-05, "loss": 1.5529, "step": 3322 }, { "epoch": 0.6812218122181222, "grad_norm": 0.45359008292760117, "learning_rate": 4.8740206519328105e-05, "loss": 1.4759, "step": 3323 }, { "epoch": 0.6814268142681427, "grad_norm": 0.46672478571553133, "learning_rate": 4.868320108824563e-05, "loss": 1.5533, "step": 3324 }, { "epoch": 0.6816318163181632, "grad_norm": 0.4712752016035746, "learning_rate": 4.86262182855394e-05, "loss": 1.5446, "step": 3325 }, { "epoch": 0.6818368183681837, "grad_norm": 0.47240359661998654, "learning_rate": 4.856925813633627e-05, "loss": 1.6226, "step": 3326 }, { "epoch": 0.6820418204182042, "grad_norm": 0.46320753602402087, "learning_rate": 4.8512320665753044e-05, "loss": 1.5242, "step": 3327 }, { "epoch": 0.6822468224682247, "grad_norm": 0.4199712064906492, "learning_rate": 4.8455405898896555e-05, "loss": 1.5466, "step": 3328 }, { "epoch": 0.6824518245182452, "grad_norm": 0.4323237118538542, "learning_rate": 4.839851386086358e-05, "loss": 1.5201, "step": 3329 }, { "epoch": 0.6826568265682657, "grad_norm": 0.481619147844327, "learning_rate": 4.8341644576740985e-05, "loss": 1.5663, "step": 3330 }, { "epoch": 0.6828618286182861, "grad_norm": 0.45568849997630817, "learning_rate": 4.828479807160557e-05, "loss": 1.5195, "step": 3331 }, { "epoch": 0.6830668306683066, "grad_norm": 0.85738628176556, "learning_rate": 4.8227974370524e-05, "loss": 1.5824, "step": 3332 }, { "epoch": 0.6832718327183271, "grad_norm": 0.4437049961280124, "learning_rate": 4.817117349855297e-05, "loss": 1.534, "step": 3333 }, { "epoch": 0.6834768347683476, "grad_norm": 0.42864014184004096, "learning_rate": 4.8114395480739025e-05, "loss": 1.4808, "step": 3334 }, { "epoch": 0.6836818368183681, "grad_norm": 0.477092822492682, "learning_rate": 4.805764034211876e-05, "loss": 1.5487, "step": 3335 }, { "epoch": 0.6838868388683886, "grad_norm": 0.48019371943636857, "learning_rate": 4.800090810771862e-05, "loss": 1.5015, "step": 3336 }, { "epoch": 0.6840918409184091, "grad_norm": 0.49980573444650966, "learning_rate": 4.794419880255492e-05, "loss": 1.5497, "step": 3337 }, { "epoch": 0.6842968429684296, "grad_norm": 0.4334981434938236, "learning_rate": 4.788751245163387e-05, "loss": 1.5272, "step": 3338 }, { "epoch": 0.6845018450184502, "grad_norm": 0.46447989784280636, "learning_rate": 4.783084907995156e-05, "loss": 1.4925, "step": 3339 }, { "epoch": 0.6847068470684707, "grad_norm": 0.41757449203661123, "learning_rate": 4.7774208712493984e-05, "loss": 1.4792, "step": 3340 }, { "epoch": 0.6849118491184912, "grad_norm": 0.5092435407892048, "learning_rate": 4.7717591374237005e-05, "loss": 1.546, "step": 3341 }, { "epoch": 0.6851168511685117, "grad_norm": 0.4427989129199794, "learning_rate": 4.7660997090146276e-05, "loss": 1.5872, "step": 3342 }, { "epoch": 0.6853218532185322, "grad_norm": 0.47937769179808537, "learning_rate": 4.760442588517728e-05, "loss": 1.5642, "step": 3343 }, { "epoch": 0.6855268552685527, "grad_norm": 0.41708844290352937, "learning_rate": 4.754787778427533e-05, "loss": 1.5376, "step": 3344 }, { "epoch": 0.6857318573185732, "grad_norm": 0.4586409613086554, "learning_rate": 4.74913528123756e-05, "loss": 1.5336, "step": 3345 }, { "epoch": 0.6859368593685937, "grad_norm": 0.45281816391435126, "learning_rate": 4.7434850994403065e-05, "loss": 1.558, "step": 3346 }, { "epoch": 0.6861418614186142, "grad_norm": 0.4163603490325916, "learning_rate": 4.7378372355272435e-05, "loss": 1.5023, "step": 3347 }, { "epoch": 0.6863468634686347, "grad_norm": 0.4062199185217212, "learning_rate": 4.732191691988822e-05, "loss": 1.4881, "step": 3348 }, { "epoch": 0.6865518655186552, "grad_norm": 0.4241142312480205, "learning_rate": 4.7265484713144644e-05, "loss": 1.5349, "step": 3349 }, { "epoch": 0.6867568675686757, "grad_norm": 0.46004240101197413, "learning_rate": 4.720907575992585e-05, "loss": 1.5553, "step": 3350 }, { "epoch": 0.6869618696186962, "grad_norm": 0.45672962079221424, "learning_rate": 4.715269008510552e-05, "loss": 1.4798, "step": 3351 }, { "epoch": 0.6871668716687167, "grad_norm": 0.42500130650678186, "learning_rate": 4.7096327713547276e-05, "loss": 1.508, "step": 3352 }, { "epoch": 0.6873718737187372, "grad_norm": 0.4330170385822167, "learning_rate": 4.703998867010431e-05, "loss": 1.5096, "step": 3353 }, { "epoch": 0.6875768757687577, "grad_norm": 0.4568956383472607, "learning_rate": 4.698367297961954e-05, "loss": 1.4984, "step": 3354 }, { "epoch": 0.6877818778187782, "grad_norm": 0.42969367527050895, "learning_rate": 4.69273806669257e-05, "loss": 1.5486, "step": 3355 }, { "epoch": 0.6879868798687987, "grad_norm": 0.4492036849163193, "learning_rate": 4.687111175684509e-05, "loss": 1.5559, "step": 3356 }, { "epoch": 0.6881918819188192, "grad_norm": 0.45026674633707314, "learning_rate": 4.681486627418978e-05, "loss": 1.4977, "step": 3357 }, { "epoch": 0.6883968839688397, "grad_norm": 0.446967550670305, "learning_rate": 4.675864424376146e-05, "loss": 1.51, "step": 3358 }, { "epoch": 0.6886018860188602, "grad_norm": 0.43557138512954835, "learning_rate": 4.670244569035145e-05, "loss": 1.5446, "step": 3359 }, { "epoch": 0.6888068880688807, "grad_norm": 0.41776030770884587, "learning_rate": 4.664627063874083e-05, "loss": 1.5121, "step": 3360 }, { "epoch": 0.6890118901189012, "grad_norm": 0.47047269463768904, "learning_rate": 4.659011911370017e-05, "loss": 1.5202, "step": 3361 }, { "epoch": 0.6892168921689217, "grad_norm": 0.4487549917119925, "learning_rate": 4.653399113998981e-05, "loss": 1.5904, "step": 3362 }, { "epoch": 0.6894218942189422, "grad_norm": 0.42411164246405403, "learning_rate": 4.647788674235961e-05, "loss": 1.5071, "step": 3363 }, { "epoch": 0.6896268962689627, "grad_norm": 0.4603334336763966, "learning_rate": 4.6421805945549015e-05, "loss": 1.5399, "step": 3364 }, { "epoch": 0.6898318983189832, "grad_norm": 0.48212398952038743, "learning_rate": 4.6365748774287176e-05, "loss": 1.4934, "step": 3365 }, { "epoch": 0.6900369003690037, "grad_norm": 0.3761357649447258, "learning_rate": 4.630971525329274e-05, "loss": 1.4323, "step": 3366 }, { "epoch": 0.6902419024190242, "grad_norm": 0.4414865511782292, "learning_rate": 4.6253705407273886e-05, "loss": 1.5461, "step": 3367 }, { "epoch": 0.6904469044690447, "grad_norm": 0.4455325320566932, "learning_rate": 4.61977192609285e-05, "loss": 1.5684, "step": 3368 }, { "epoch": 0.6906519065190652, "grad_norm": 0.43676154900615155, "learning_rate": 4.614175683894384e-05, "loss": 1.4868, "step": 3369 }, { "epoch": 0.6908569085690857, "grad_norm": 0.4256562964118114, "learning_rate": 4.6085818165996876e-05, "loss": 1.4887, "step": 3370 }, { "epoch": 0.6910619106191062, "grad_norm": 0.4254030274358486, "learning_rate": 4.602990326675397e-05, "loss": 1.5416, "step": 3371 }, { "epoch": 0.6912669126691267, "grad_norm": 0.4643846977970754, "learning_rate": 4.597401216587104e-05, "loss": 1.6195, "step": 3372 }, { "epoch": 0.6914719147191472, "grad_norm": 0.4546663920223562, "learning_rate": 4.5918144887993574e-05, "loss": 1.5839, "step": 3373 }, { "epoch": 0.6916769167691676, "grad_norm": 0.4286593978294174, "learning_rate": 4.586230145775647e-05, "loss": 1.5487, "step": 3374 }, { "epoch": 0.6918819188191881, "grad_norm": 0.430485559822579, "learning_rate": 4.58064818997841e-05, "loss": 1.5554, "step": 3375 }, { "epoch": 0.6920869208692086, "grad_norm": 0.42554754188571264, "learning_rate": 4.575068623869045e-05, "loss": 1.5089, "step": 3376 }, { "epoch": 0.6922919229192291, "grad_norm": 0.42310839889276436, "learning_rate": 4.569491449907878e-05, "loss": 1.4908, "step": 3377 }, { "epoch": 0.6924969249692496, "grad_norm": 0.4358770635002334, "learning_rate": 4.563916670554196e-05, "loss": 1.5499, "step": 3378 }, { "epoch": 0.6927019270192702, "grad_norm": 0.4076771917822465, "learning_rate": 4.55834428826622e-05, "loss": 1.4985, "step": 3379 }, { "epoch": 0.6929069290692907, "grad_norm": 0.4705652288905525, "learning_rate": 4.552774305501115e-05, "loss": 1.5249, "step": 3380 }, { "epoch": 0.6931119311193112, "grad_norm": 0.427408820052831, "learning_rate": 4.547206724714996e-05, "loss": 1.5432, "step": 3381 }, { "epoch": 0.6933169331693317, "grad_norm": 0.39254391299183433, "learning_rate": 4.54164154836291e-05, "loss": 1.4498, "step": 3382 }, { "epoch": 0.6935219352193522, "grad_norm": 0.4132924271080056, "learning_rate": 4.536078778898845e-05, "loss": 1.5393, "step": 3383 }, { "epoch": 0.6937269372693727, "grad_norm": 0.4287777964160761, "learning_rate": 4.530518418775733e-05, "loss": 1.5807, "step": 3384 }, { "epoch": 0.6939319393193932, "grad_norm": 0.43879863236690086, "learning_rate": 4.5249604704454363e-05, "loss": 1.5951, "step": 3385 }, { "epoch": 0.6941369413694137, "grad_norm": 0.4132170990575669, "learning_rate": 4.5194049363587634e-05, "loss": 1.5102, "step": 3386 }, { "epoch": 0.6943419434194342, "grad_norm": 0.4036200593937283, "learning_rate": 4.513851818965449e-05, "loss": 1.5177, "step": 3387 }, { "epoch": 0.6945469454694547, "grad_norm": 0.41810042999117114, "learning_rate": 4.5083011207141614e-05, "loss": 1.5447, "step": 3388 }, { "epoch": 0.6947519475194752, "grad_norm": 0.409223608940766, "learning_rate": 4.502752844052514e-05, "loss": 1.5719, "step": 3389 }, { "epoch": 0.6949569495694957, "grad_norm": 0.44314024370613014, "learning_rate": 4.4972069914270366e-05, "loss": 1.5703, "step": 3390 }, { "epoch": 0.6951619516195162, "grad_norm": 0.4475381624454827, "learning_rate": 4.4916635652832076e-05, "loss": 1.5426, "step": 3391 }, { "epoch": 0.6953669536695367, "grad_norm": 0.4408687213248625, "learning_rate": 4.4861225680654194e-05, "loss": 1.4948, "step": 3392 }, { "epoch": 0.6955719557195572, "grad_norm": 0.4010041155055108, "learning_rate": 4.480584002216999e-05, "loss": 1.4558, "step": 3393 }, { "epoch": 0.6957769577695777, "grad_norm": 0.4457869383136856, "learning_rate": 4.4750478701802065e-05, "loss": 1.5658, "step": 3394 }, { "epoch": 0.6959819598195982, "grad_norm": 0.4400401036204604, "learning_rate": 4.469514174396221e-05, "loss": 1.566, "step": 3395 }, { "epoch": 0.6961869618696187, "grad_norm": 0.47764958247925376, "learning_rate": 4.4639829173051554e-05, "loss": 1.5338, "step": 3396 }, { "epoch": 0.6963919639196392, "grad_norm": 0.40103978052738076, "learning_rate": 4.45845410134604e-05, "loss": 1.4861, "step": 3397 }, { "epoch": 0.6965969659696597, "grad_norm": 0.4470805029496455, "learning_rate": 4.4529277289568314e-05, "loss": 1.4873, "step": 3398 }, { "epoch": 0.6968019680196802, "grad_norm": 0.42769937686655585, "learning_rate": 4.447403802574406e-05, "loss": 1.5501, "step": 3399 }, { "epoch": 0.6970069700697007, "grad_norm": 0.42509408103612056, "learning_rate": 4.4418823246345653e-05, "loss": 1.5076, "step": 3400 }, { "epoch": 0.6972119721197212, "grad_norm": 0.4392847101699351, "learning_rate": 4.4363632975720356e-05, "loss": 1.5428, "step": 3401 }, { "epoch": 0.6974169741697417, "grad_norm": 0.41661042923817515, "learning_rate": 4.430846723820453e-05, "loss": 1.5129, "step": 3402 }, { "epoch": 0.6976219762197622, "grad_norm": 0.4166677899972221, "learning_rate": 4.425332605812377e-05, "loss": 1.5491, "step": 3403 }, { "epoch": 0.6978269782697827, "grad_norm": 0.4173682753715901, "learning_rate": 4.4198209459792785e-05, "loss": 1.5259, "step": 3404 }, { "epoch": 0.6980319803198032, "grad_norm": 0.43395100736859943, "learning_rate": 4.414311746751551e-05, "loss": 1.5057, "step": 3405 }, { "epoch": 0.6982369823698237, "grad_norm": 0.4854154804817381, "learning_rate": 4.4088050105585075e-05, "loss": 1.5854, "step": 3406 }, { "epoch": 0.6984419844198442, "grad_norm": 0.4217382420864878, "learning_rate": 4.403300739828363e-05, "loss": 1.4926, "step": 3407 }, { "epoch": 0.6986469864698647, "grad_norm": 0.4175918016545473, "learning_rate": 4.397798936988251e-05, "loss": 1.5669, "step": 3408 }, { "epoch": 0.6988519885198852, "grad_norm": 0.4418920860537964, "learning_rate": 4.392299604464213e-05, "loss": 1.4934, "step": 3409 }, { "epoch": 0.6990569905699057, "grad_norm": 0.4412124670566063, "learning_rate": 4.386802744681209e-05, "loss": 1.5221, "step": 3410 }, { "epoch": 0.6992619926199262, "grad_norm": 0.5069530891898816, "learning_rate": 4.3813083600631065e-05, "loss": 1.5709, "step": 3411 }, { "epoch": 0.6994669946699467, "grad_norm": 0.4596403264248444, "learning_rate": 4.3758164530326785e-05, "loss": 1.5157, "step": 3412 }, { "epoch": 0.6996719967199672, "grad_norm": 0.45137278859473307, "learning_rate": 4.370327026011602e-05, "loss": 1.5145, "step": 3413 }, { "epoch": 0.6998769987699877, "grad_norm": 0.41587400036515626, "learning_rate": 4.364840081420466e-05, "loss": 1.5342, "step": 3414 }, { "epoch": 0.7000820008200082, "grad_norm": 0.4743037472631479, "learning_rate": 4.359355621678764e-05, "loss": 1.5002, "step": 3415 }, { "epoch": 0.7002870028700287, "grad_norm": 0.42520225732492695, "learning_rate": 4.353873649204899e-05, "loss": 1.5329, "step": 3416 }, { "epoch": 0.7004920049200491, "grad_norm": 0.45840189764321404, "learning_rate": 4.348394166416169e-05, "loss": 1.4784, "step": 3417 }, { "epoch": 0.7006970069700696, "grad_norm": 0.42071088490187714, "learning_rate": 4.3429171757287735e-05, "loss": 1.5151, "step": 3418 }, { "epoch": 0.7009020090200903, "grad_norm": 0.48064593064725697, "learning_rate": 4.337442679557815e-05, "loss": 1.5222, "step": 3419 }, { "epoch": 0.7011070110701108, "grad_norm": 0.419227455622307, "learning_rate": 4.3319706803173e-05, "loss": 1.5203, "step": 3420 }, { "epoch": 0.7013120131201313, "grad_norm": 0.4478070461742405, "learning_rate": 4.3265011804201374e-05, "loss": 1.5226, "step": 3421 }, { "epoch": 0.7015170151701517, "grad_norm": 0.38939450798439873, "learning_rate": 4.321034182278122e-05, "loss": 1.4921, "step": 3422 }, { "epoch": 0.7017220172201722, "grad_norm": 0.432943091577237, "learning_rate": 4.315569688301953e-05, "loss": 1.5492, "step": 3423 }, { "epoch": 0.7019270192701927, "grad_norm": 0.4522085470669943, "learning_rate": 4.310107700901224e-05, "loss": 1.5218, "step": 3424 }, { "epoch": 0.7021320213202132, "grad_norm": 0.4389529856307205, "learning_rate": 4.304648222484414e-05, "loss": 1.5248, "step": 3425 }, { "epoch": 0.7023370233702337, "grad_norm": 0.43361807006065684, "learning_rate": 4.299191255458922e-05, "loss": 1.4982, "step": 3426 }, { "epoch": 0.7025420254202542, "grad_norm": 0.4428330728296285, "learning_rate": 4.293736802231014e-05, "loss": 1.5575, "step": 3427 }, { "epoch": 0.7027470274702747, "grad_norm": 0.42304663484500776, "learning_rate": 4.288284865205856e-05, "loss": 1.5525, "step": 3428 }, { "epoch": 0.7029520295202952, "grad_norm": 0.5375403529385177, "learning_rate": 4.2828354467875046e-05, "loss": 1.5681, "step": 3429 }, { "epoch": 0.7031570315703157, "grad_norm": 0.4469907031456321, "learning_rate": 4.277388549378902e-05, "loss": 1.5519, "step": 3430 }, { "epoch": 0.7033620336203362, "grad_norm": 0.4434889157923991, "learning_rate": 4.271944175381886e-05, "loss": 1.5557, "step": 3431 }, { "epoch": 0.7035670356703567, "grad_norm": 0.4573709635194942, "learning_rate": 4.266502327197182e-05, "loss": 1.4891, "step": 3432 }, { "epoch": 0.7037720377203772, "grad_norm": 0.40044529720496036, "learning_rate": 4.261063007224393e-05, "loss": 1.481, "step": 3433 }, { "epoch": 0.7039770397703977, "grad_norm": 0.4065579144032816, "learning_rate": 4.255626217862013e-05, "loss": 1.509, "step": 3434 }, { "epoch": 0.7041820418204182, "grad_norm": 0.4417422837865661, "learning_rate": 4.250191961507416e-05, "loss": 1.5363, "step": 3435 }, { "epoch": 0.7043870438704387, "grad_norm": 0.45869242898495816, "learning_rate": 4.244760240556864e-05, "loss": 1.5098, "step": 3436 }, { "epoch": 0.7045920459204592, "grad_norm": 0.440833702806443, "learning_rate": 4.2393310574055045e-05, "loss": 1.5766, "step": 3437 }, { "epoch": 0.7047970479704797, "grad_norm": 0.44301830700045536, "learning_rate": 4.233904414447355e-05, "loss": 1.601, "step": 3438 }, { "epoch": 0.7050020500205002, "grad_norm": 0.41138186532763177, "learning_rate": 4.228480314075321e-05, "loss": 1.5381, "step": 3439 }, { "epoch": 0.7052070520705207, "grad_norm": 0.4181858285284662, "learning_rate": 4.2230587586811774e-05, "loss": 1.4558, "step": 3440 }, { "epoch": 0.7054120541205412, "grad_norm": 0.45318187511742036, "learning_rate": 4.217639750655594e-05, "loss": 1.5367, "step": 3441 }, { "epoch": 0.7056170561705617, "grad_norm": 0.461080252842011, "learning_rate": 4.2122232923880976e-05, "loss": 1.5358, "step": 3442 }, { "epoch": 0.7058220582205822, "grad_norm": 0.4282880729133403, "learning_rate": 4.20680938626711e-05, "loss": 1.5008, "step": 3443 }, { "epoch": 0.7060270602706027, "grad_norm": 0.4420501366242678, "learning_rate": 4.201398034679911e-05, "loss": 1.5725, "step": 3444 }, { "epoch": 0.7062320623206232, "grad_norm": 0.4230491945615498, "learning_rate": 4.195989240012659e-05, "loss": 1.4954, "step": 3445 }, { "epoch": 0.7064370643706437, "grad_norm": 0.4564392948570072, "learning_rate": 4.1905830046503935e-05, "loss": 1.5392, "step": 3446 }, { "epoch": 0.7066420664206642, "grad_norm": 0.44668778153424543, "learning_rate": 4.185179330977011e-05, "loss": 1.5944, "step": 3447 }, { "epoch": 0.7068470684706847, "grad_norm": 0.45313738343263005, "learning_rate": 4.1797782213752944e-05, "loss": 1.546, "step": 3448 }, { "epoch": 0.7070520705207052, "grad_norm": 0.44923932056304106, "learning_rate": 4.174379678226883e-05, "loss": 1.5248, "step": 3449 }, { "epoch": 0.7072570725707257, "grad_norm": 0.4315630861222875, "learning_rate": 4.168983703912285e-05, "loss": 1.5486, "step": 3450 }, { "epoch": 0.7074620746207462, "grad_norm": 0.41190529391375, "learning_rate": 4.163590300810888e-05, "loss": 1.5493, "step": 3451 }, { "epoch": 0.7076670766707667, "grad_norm": 0.4392506153198524, "learning_rate": 4.1581994713009295e-05, "loss": 1.5037, "step": 3452 }, { "epoch": 0.7078720787207872, "grad_norm": 0.4400255584273175, "learning_rate": 4.152811217759529e-05, "loss": 1.5175, "step": 3453 }, { "epoch": 0.7080770807708077, "grad_norm": 0.4448843962161563, "learning_rate": 4.1474255425626576e-05, "loss": 1.5294, "step": 3454 }, { "epoch": 0.7082820828208282, "grad_norm": 0.4293907220316177, "learning_rate": 4.142042448085148e-05, "loss": 1.5179, "step": 3455 }, { "epoch": 0.7084870848708487, "grad_norm": 0.4604403949206365, "learning_rate": 4.136661936700709e-05, "loss": 1.5544, "step": 3456 }, { "epoch": 0.7086920869208692, "grad_norm": 0.4221718115519598, "learning_rate": 4.1312840107818964e-05, "loss": 1.5202, "step": 3457 }, { "epoch": 0.7088970889708897, "grad_norm": 0.4438852978714222, "learning_rate": 4.125908672700129e-05, "loss": 1.5205, "step": 3458 }, { "epoch": 0.7091020910209103, "grad_norm": 0.42779241322623174, "learning_rate": 4.1205359248256946e-05, "loss": 1.5026, "step": 3459 }, { "epoch": 0.7093070930709308, "grad_norm": 0.44449370777953856, "learning_rate": 4.115165769527723e-05, "loss": 1.5675, "step": 3460 }, { "epoch": 0.7095120951209513, "grad_norm": 0.4115172080529009, "learning_rate": 4.109798209174213e-05, "loss": 1.5289, "step": 3461 }, { "epoch": 0.7097170971709718, "grad_norm": 0.42032460048347703, "learning_rate": 4.104433246132015e-05, "loss": 1.4504, "step": 3462 }, { "epoch": 0.7099220992209923, "grad_norm": 0.458318128519686, "learning_rate": 4.099070882766829e-05, "loss": 1.5374, "step": 3463 }, { "epoch": 0.7101271012710128, "grad_norm": 0.4239279152205268, "learning_rate": 4.09371112144322e-05, "loss": 1.503, "step": 3464 }, { "epoch": 0.7103321033210332, "grad_norm": 0.4339864582306654, "learning_rate": 4.088353964524593e-05, "loss": 1.5237, "step": 3465 }, { "epoch": 0.7105371053710537, "grad_norm": 0.3952308073103569, "learning_rate": 4.0829994143732164e-05, "loss": 1.4593, "step": 3466 }, { "epoch": 0.7107421074210742, "grad_norm": 0.408357519264169, "learning_rate": 4.077647473350201e-05, "loss": 1.4765, "step": 3467 }, { "epoch": 0.7109471094710947, "grad_norm": 0.44430598642048014, "learning_rate": 4.072298143815507e-05, "loss": 1.5329, "step": 3468 }, { "epoch": 0.7111521115211152, "grad_norm": 0.4325669955567285, "learning_rate": 4.066951428127952e-05, "loss": 1.4894, "step": 3469 }, { "epoch": 0.7113571135711357, "grad_norm": 0.41909485491285814, "learning_rate": 4.0616073286451864e-05, "loss": 1.5386, "step": 3470 }, { "epoch": 0.7115621156211562, "grad_norm": 0.39404165115905665, "learning_rate": 4.056265847723724e-05, "loss": 1.535, "step": 3471 }, { "epoch": 0.7117671176711767, "grad_norm": 0.5034144390339675, "learning_rate": 4.0509269877189106e-05, "loss": 1.5463, "step": 3472 }, { "epoch": 0.7119721197211972, "grad_norm": 0.4680054728828605, "learning_rate": 4.04559075098494e-05, "loss": 1.5466, "step": 3473 }, { "epoch": 0.7121771217712177, "grad_norm": 0.43312439318892254, "learning_rate": 4.040257139874848e-05, "loss": 1.5269, "step": 3474 }, { "epoch": 0.7123821238212382, "grad_norm": 0.44383045655181763, "learning_rate": 4.034926156740518e-05, "loss": 1.4704, "step": 3475 }, { "epoch": 0.7125871258712587, "grad_norm": 0.44043217741475005, "learning_rate": 4.029597803932675e-05, "loss": 1.5555, "step": 3476 }, { "epoch": 0.7127921279212792, "grad_norm": 0.4128799344301832, "learning_rate": 4.024272083800876e-05, "loss": 1.5333, "step": 3477 }, { "epoch": 0.7129971299712997, "grad_norm": 0.4234364538758268, "learning_rate": 4.0189489986935226e-05, "loss": 1.4906, "step": 3478 }, { "epoch": 0.7132021320213202, "grad_norm": 0.4189761608780559, "learning_rate": 4.01362855095785e-05, "loss": 1.6038, "step": 3479 }, { "epoch": 0.7134071340713407, "grad_norm": 0.4434163998455193, "learning_rate": 4.008310742939939e-05, "loss": 1.519, "step": 3480 }, { "epoch": 0.7136121361213612, "grad_norm": 0.4747082498066494, "learning_rate": 4.002995576984696e-05, "loss": 1.5524, "step": 3481 }, { "epoch": 0.7138171381713817, "grad_norm": 0.424749214420041, "learning_rate": 3.9976830554358746e-05, "loss": 1.4631, "step": 3482 }, { "epoch": 0.7140221402214022, "grad_norm": 0.4474781010886821, "learning_rate": 3.992373180636051e-05, "loss": 1.5556, "step": 3483 }, { "epoch": 0.7142271422714227, "grad_norm": 0.4272012681059497, "learning_rate": 3.9870659549266354e-05, "loss": 1.4721, "step": 3484 }, { "epoch": 0.7144321443214432, "grad_norm": 0.45784549195994245, "learning_rate": 3.9817613806478804e-05, "loss": 1.587, "step": 3485 }, { "epoch": 0.7146371463714637, "grad_norm": 0.39280557750909423, "learning_rate": 3.976459460138856e-05, "loss": 1.4343, "step": 3486 }, { "epoch": 0.7148421484214842, "grad_norm": 0.4702743874616334, "learning_rate": 3.971160195737475e-05, "loss": 1.5719, "step": 3487 }, { "epoch": 0.7150471504715047, "grad_norm": 0.46514364183722834, "learning_rate": 3.965863589780466e-05, "loss": 1.5651, "step": 3488 }, { "epoch": 0.7152521525215252, "grad_norm": 0.43992963173696936, "learning_rate": 3.9605696446033945e-05, "loss": 1.5687, "step": 3489 }, { "epoch": 0.7154571545715457, "grad_norm": 0.4117199720267242, "learning_rate": 3.9552783625406464e-05, "loss": 1.518, "step": 3490 }, { "epoch": 0.7156621566215662, "grad_norm": 0.4355313104177029, "learning_rate": 3.9499897459254375e-05, "loss": 1.5807, "step": 3491 }, { "epoch": 0.7158671586715867, "grad_norm": 0.42866631057671767, "learning_rate": 3.944703797089814e-05, "loss": 1.5364, "step": 3492 }, { "epoch": 0.7160721607216072, "grad_norm": 0.43935661240317686, "learning_rate": 3.939420518364633e-05, "loss": 1.4922, "step": 3493 }, { "epoch": 0.7162771627716277, "grad_norm": 0.47896483328035433, "learning_rate": 3.9341399120795816e-05, "loss": 1.5388, "step": 3494 }, { "epoch": 0.7164821648216482, "grad_norm": 0.43778865996397376, "learning_rate": 3.928861980563163e-05, "loss": 1.5275, "step": 3495 }, { "epoch": 0.7166871668716687, "grad_norm": 0.48464463006108066, "learning_rate": 3.923586726142711e-05, "loss": 1.5944, "step": 3496 }, { "epoch": 0.7168921689216892, "grad_norm": 0.4465115615789122, "learning_rate": 3.9183141511443725e-05, "loss": 1.5275, "step": 3497 }, { "epoch": 0.7170971709717097, "grad_norm": 0.4524076301786442, "learning_rate": 3.913044257893114e-05, "loss": 1.56, "step": 3498 }, { "epoch": 0.7173021730217303, "grad_norm": 0.4717705849785628, "learning_rate": 3.907777048712715e-05, "loss": 1.5876, "step": 3499 }, { "epoch": 0.7175071750717508, "grad_norm": 0.461517906057566, "learning_rate": 3.902512525925775e-05, "loss": 1.5465, "step": 3500 }, { "epoch": 0.7177121771217713, "grad_norm": 0.4207830208243561, "learning_rate": 3.897250691853712e-05, "loss": 1.5323, "step": 3501 }, { "epoch": 0.7179171791717918, "grad_norm": 0.4655345510157027, "learning_rate": 3.891991548816759e-05, "loss": 1.504, "step": 3502 }, { "epoch": 0.7181221812218123, "grad_norm": 0.44410473596029776, "learning_rate": 3.8867350991339555e-05, "loss": 1.5581, "step": 3503 }, { "epoch": 0.7183271832718328, "grad_norm": 0.4554069522872964, "learning_rate": 3.881481345123158e-05, "loss": 1.529, "step": 3504 }, { "epoch": 0.7185321853218533, "grad_norm": 0.40766495934876845, "learning_rate": 3.876230289101027e-05, "loss": 1.4977, "step": 3505 }, { "epoch": 0.7187371873718738, "grad_norm": 0.43066675659376075, "learning_rate": 3.8709819333830455e-05, "loss": 1.5454, "step": 3506 }, { "epoch": 0.7189421894218943, "grad_norm": 0.41372498726595935, "learning_rate": 3.865736280283503e-05, "loss": 1.5124, "step": 3507 }, { "epoch": 0.7191471914719147, "grad_norm": 0.4623352781739944, "learning_rate": 3.8604933321154904e-05, "loss": 1.5, "step": 3508 }, { "epoch": 0.7193521935219352, "grad_norm": 0.45849104121148104, "learning_rate": 3.855253091190909e-05, "loss": 1.4792, "step": 3509 }, { "epoch": 0.7195571955719557, "grad_norm": 0.41535432797406957, "learning_rate": 3.8500155598204644e-05, "loss": 1.4615, "step": 3510 }, { "epoch": 0.7197621976219762, "grad_norm": 0.4321507939070184, "learning_rate": 3.8447807403136726e-05, "loss": 1.5123, "step": 3511 }, { "epoch": 0.7199671996719967, "grad_norm": 0.4480024118598738, "learning_rate": 3.8395486349788554e-05, "loss": 1.529, "step": 3512 }, { "epoch": 0.7201722017220172, "grad_norm": 0.44537613264683973, "learning_rate": 3.8343192461231294e-05, "loss": 1.4935, "step": 3513 }, { "epoch": 0.7203772037720377, "grad_norm": 0.4178029213699762, "learning_rate": 3.829092576052416e-05, "loss": 1.4947, "step": 3514 }, { "epoch": 0.7205822058220582, "grad_norm": 0.4751344059662765, "learning_rate": 3.82386862707144e-05, "loss": 1.5313, "step": 3515 }, { "epoch": 0.7207872078720787, "grad_norm": 0.4409610574528483, "learning_rate": 3.818647401483724e-05, "loss": 1.5332, "step": 3516 }, { "epoch": 0.7209922099220992, "grad_norm": 0.4039428968645369, "learning_rate": 3.813428901591598e-05, "loss": 1.5064, "step": 3517 }, { "epoch": 0.7211972119721197, "grad_norm": 0.42390271115736167, "learning_rate": 3.808213129696177e-05, "loss": 1.4504, "step": 3518 }, { "epoch": 0.7214022140221402, "grad_norm": 0.425234606125503, "learning_rate": 3.8030000880973835e-05, "loss": 1.5143, "step": 3519 }, { "epoch": 0.7216072160721607, "grad_norm": 0.48586531785319365, "learning_rate": 3.7977897790939254e-05, "loss": 1.5753, "step": 3520 }, { "epoch": 0.7218122181221812, "grad_norm": 0.42897234277321256, "learning_rate": 3.7925822049833193e-05, "loss": 1.5014, "step": 3521 }, { "epoch": 0.7220172201722017, "grad_norm": 0.43760456001409564, "learning_rate": 3.7873773680618616e-05, "loss": 1.505, "step": 3522 }, { "epoch": 0.7222222222222222, "grad_norm": 0.47664240088393667, "learning_rate": 3.7821752706246584e-05, "loss": 1.5474, "step": 3523 }, { "epoch": 0.7224272242722427, "grad_norm": 0.43316928152618966, "learning_rate": 3.7769759149655916e-05, "loss": 1.522, "step": 3524 }, { "epoch": 0.7226322263222632, "grad_norm": 0.4664159211339371, "learning_rate": 3.771779303377342e-05, "loss": 1.536, "step": 3525 }, { "epoch": 0.7228372283722837, "grad_norm": 0.4638344468560447, "learning_rate": 3.766585438151375e-05, "loss": 1.5076, "step": 3526 }, { "epoch": 0.7230422304223042, "grad_norm": 0.4158251119345487, "learning_rate": 3.7613943215779556e-05, "loss": 1.4753, "step": 3527 }, { "epoch": 0.7232472324723247, "grad_norm": 0.412504154057362, "learning_rate": 3.7562059559461296e-05, "loss": 1.5147, "step": 3528 }, { "epoch": 0.7234522345223452, "grad_norm": 0.44755225470966187, "learning_rate": 3.75102034354373e-05, "loss": 1.554, "step": 3529 }, { "epoch": 0.7236572365723657, "grad_norm": 0.4360438766040676, "learning_rate": 3.745837486657374e-05, "loss": 1.4688, "step": 3530 }, { "epoch": 0.7238622386223862, "grad_norm": 0.4928569866627821, "learning_rate": 3.740657387572464e-05, "loss": 1.5451, "step": 3531 }, { "epoch": 0.7240672406724067, "grad_norm": 0.4256922044416373, "learning_rate": 3.735480048573194e-05, "loss": 1.4341, "step": 3532 }, { "epoch": 0.7242722427224272, "grad_norm": 0.4230515874731703, "learning_rate": 3.730305471942531e-05, "loss": 1.4926, "step": 3533 }, { "epoch": 0.7244772447724477, "grad_norm": 0.4475356759214047, "learning_rate": 3.725133659962232e-05, "loss": 1.5184, "step": 3534 }, { "epoch": 0.7246822468224682, "grad_norm": 0.4718617660111662, "learning_rate": 3.719964614912829e-05, "loss": 1.5251, "step": 3535 }, { "epoch": 0.7248872488724887, "grad_norm": 0.47987041064474306, "learning_rate": 3.7147983390736316e-05, "loss": 1.5386, "step": 3536 }, { "epoch": 0.7250922509225092, "grad_norm": 0.44374350064458823, "learning_rate": 3.7096348347227405e-05, "loss": 1.5154, "step": 3537 }, { "epoch": 0.7252972529725297, "grad_norm": 0.4701861993828469, "learning_rate": 3.704474104137019e-05, "loss": 1.4923, "step": 3538 }, { "epoch": 0.7255022550225503, "grad_norm": 0.4374661785625125, "learning_rate": 3.6993161495921226e-05, "loss": 1.4979, "step": 3539 }, { "epoch": 0.7257072570725708, "grad_norm": 0.4554914814027369, "learning_rate": 3.6941609733624706e-05, "loss": 1.5469, "step": 3540 }, { "epoch": 0.7259122591225913, "grad_norm": 0.45357691624776486, "learning_rate": 3.689008577721258e-05, "loss": 1.4659, "step": 3541 }, { "epoch": 0.7261172611726118, "grad_norm": 0.41709035606040246, "learning_rate": 3.683858964940464e-05, "loss": 1.5781, "step": 3542 }, { "epoch": 0.7263222632226323, "grad_norm": 0.5011181446991144, "learning_rate": 3.678712137290827e-05, "loss": 1.5754, "step": 3543 }, { "epoch": 0.7265272652726528, "grad_norm": 0.4208079192141814, "learning_rate": 3.6735680970418705e-05, "loss": 1.4918, "step": 3544 }, { "epoch": 0.7267322673226733, "grad_norm": 0.4477359199473054, "learning_rate": 3.668426846461881e-05, "loss": 1.5365, "step": 3545 }, { "epoch": 0.7269372693726938, "grad_norm": 0.42703503289716865, "learning_rate": 3.66328838781791e-05, "loss": 1.4746, "step": 3546 }, { "epoch": 0.7271422714227143, "grad_norm": 0.48872266369857126, "learning_rate": 3.658152723375794e-05, "loss": 1.5011, "step": 3547 }, { "epoch": 0.7273472734727348, "grad_norm": 0.42437991818125514, "learning_rate": 3.653019855400123e-05, "loss": 1.4812, "step": 3548 }, { "epoch": 0.7275522755227553, "grad_norm": 0.44980773572727784, "learning_rate": 3.647889786154254e-05, "loss": 1.453, "step": 3549 }, { "epoch": 0.7277572775727758, "grad_norm": 0.40202533560929643, "learning_rate": 3.642762517900322e-05, "loss": 1.4669, "step": 3550 }, { "epoch": 0.7279622796227962, "grad_norm": 0.3943361566093643, "learning_rate": 3.6376380528992125e-05, "loss": 1.4786, "step": 3551 }, { "epoch": 0.7281672816728167, "grad_norm": 0.4725434254209094, "learning_rate": 3.632516393410589e-05, "loss": 1.4986, "step": 3552 }, { "epoch": 0.7283722837228372, "grad_norm": 0.46756230394120024, "learning_rate": 3.6273975416928675e-05, "loss": 1.5168, "step": 3553 }, { "epoch": 0.7285772857728577, "grad_norm": 0.5025578152294731, "learning_rate": 3.622281500003224e-05, "loss": 1.6007, "step": 3554 }, { "epoch": 0.7287822878228782, "grad_norm": 0.4668190146540949, "learning_rate": 3.6171682705976085e-05, "loss": 1.5661, "step": 3555 }, { "epoch": 0.7289872898728987, "grad_norm": 0.4563147554874595, "learning_rate": 3.612057855730715e-05, "loss": 1.5565, "step": 3556 }, { "epoch": 0.7291922919229192, "grad_norm": 0.4917818577673773, "learning_rate": 3.6069502576560124e-05, "loss": 1.5121, "step": 3557 }, { "epoch": 0.7293972939729397, "grad_norm": 0.44992302787004207, "learning_rate": 3.6018454786257136e-05, "loss": 1.5631, "step": 3558 }, { "epoch": 0.7296022960229602, "grad_norm": 0.4195053738853119, "learning_rate": 3.5967435208907943e-05, "loss": 1.5098, "step": 3559 }, { "epoch": 0.7298072980729807, "grad_norm": 0.4229655260905307, "learning_rate": 3.59164438670099e-05, "loss": 1.4849, "step": 3560 }, { "epoch": 0.7300123001230012, "grad_norm": 0.4777103833690583, "learning_rate": 3.58654807830478e-05, "loss": 1.5169, "step": 3561 }, { "epoch": 0.7302173021730217, "grad_norm": 0.4190670776897408, "learning_rate": 3.581454597949414e-05, "loss": 1.4915, "step": 3562 }, { "epoch": 0.7304223042230422, "grad_norm": 0.42907633890684843, "learning_rate": 3.576363947880881e-05, "loss": 1.4984, "step": 3563 }, { "epoch": 0.7306273062730627, "grad_norm": 0.4634321069214964, "learning_rate": 3.571276130343925e-05, "loss": 1.5566, "step": 3564 }, { "epoch": 0.7308323083230832, "grad_norm": 0.48664513689765004, "learning_rate": 3.56619114758204e-05, "loss": 1.5414, "step": 3565 }, { "epoch": 0.7310373103731037, "grad_norm": 0.4603575252245323, "learning_rate": 3.561109001837475e-05, "loss": 1.494, "step": 3566 }, { "epoch": 0.7312423124231242, "grad_norm": 0.41840996358847155, "learning_rate": 3.5560296953512295e-05, "loss": 1.5179, "step": 3567 }, { "epoch": 0.7314473144731447, "grad_norm": 0.4373800476355417, "learning_rate": 3.550953230363044e-05, "loss": 1.5279, "step": 3568 }, { "epoch": 0.7316523165231652, "grad_norm": 0.4318747201927023, "learning_rate": 3.5458796091114076e-05, "loss": 1.4996, "step": 3569 }, { "epoch": 0.7318573185731857, "grad_norm": 0.4345557389320695, "learning_rate": 3.5408088338335545e-05, "loss": 1.5158, "step": 3570 }, { "epoch": 0.7320623206232062, "grad_norm": 0.4022959055144593, "learning_rate": 3.5357409067654676e-05, "loss": 1.466, "step": 3571 }, { "epoch": 0.7322673226732267, "grad_norm": 0.42845587270388763, "learning_rate": 3.530675830141877e-05, "loss": 1.6099, "step": 3572 }, { "epoch": 0.7324723247232472, "grad_norm": 0.4221213743064392, "learning_rate": 3.525613606196249e-05, "loss": 1.5098, "step": 3573 }, { "epoch": 0.7326773267732677, "grad_norm": 0.47495821094180796, "learning_rate": 3.5205542371607925e-05, "loss": 1.4911, "step": 3574 }, { "epoch": 0.7328823288232882, "grad_norm": 0.4469084513198053, "learning_rate": 3.515497725266458e-05, "loss": 1.5727, "step": 3575 }, { "epoch": 0.7330873308733087, "grad_norm": 0.4260853954451166, "learning_rate": 3.510444072742938e-05, "loss": 1.5017, "step": 3576 }, { "epoch": 0.7332923329233292, "grad_norm": 0.45678735882094973, "learning_rate": 3.50539328181867e-05, "loss": 1.4577, "step": 3577 }, { "epoch": 0.7334973349733497, "grad_norm": 0.39521063279384955, "learning_rate": 3.5003453547208176e-05, "loss": 1.524, "step": 3578 }, { "epoch": 0.7337023370233703, "grad_norm": 0.48379438386037976, "learning_rate": 3.4953002936752874e-05, "loss": 1.5225, "step": 3579 }, { "epoch": 0.7339073390733908, "grad_norm": 0.45083580542993734, "learning_rate": 3.490258100906724e-05, "loss": 1.4978, "step": 3580 }, { "epoch": 0.7341123411234113, "grad_norm": 0.4326580781488519, "learning_rate": 3.485218778638499e-05, "loss": 1.4629, "step": 3581 }, { "epoch": 0.7343173431734318, "grad_norm": 0.45949642165557864, "learning_rate": 3.4801823290927315e-05, "loss": 1.5132, "step": 3582 }, { "epoch": 0.7345223452234523, "grad_norm": 0.4970509096604444, "learning_rate": 3.475148754490267e-05, "loss": 1.527, "step": 3583 }, { "epoch": 0.7347273472734728, "grad_norm": 0.43460755566178566, "learning_rate": 3.470118057050681e-05, "loss": 1.4843, "step": 3584 }, { "epoch": 0.7349323493234933, "grad_norm": 0.4798452017293138, "learning_rate": 3.4650902389922824e-05, "loss": 1.5169, "step": 3585 }, { "epoch": 0.7351373513735138, "grad_norm": 0.45479290964325825, "learning_rate": 3.460065302532108e-05, "loss": 1.5349, "step": 3586 }, { "epoch": 0.7353423534235343, "grad_norm": 0.43749084533346677, "learning_rate": 3.455043249885928e-05, "loss": 1.5355, "step": 3587 }, { "epoch": 0.7355473554735548, "grad_norm": 0.39210104845206367, "learning_rate": 3.450024083268245e-05, "loss": 1.5071, "step": 3588 }, { "epoch": 0.7357523575235753, "grad_norm": 0.43219956287284417, "learning_rate": 3.445007804892278e-05, "loss": 1.4867, "step": 3589 }, { "epoch": 0.7359573595735958, "grad_norm": 0.4508015978885218, "learning_rate": 3.439994416969978e-05, "loss": 1.4995, "step": 3590 }, { "epoch": 0.7361623616236163, "grad_norm": 0.4155368053450593, "learning_rate": 3.4349839217120194e-05, "loss": 1.5017, "step": 3591 }, { "epoch": 0.7363673636736368, "grad_norm": 0.4678831400591447, "learning_rate": 3.429976321327805e-05, "loss": 1.4535, "step": 3592 }, { "epoch": 0.7365723657236573, "grad_norm": 0.48997655861328476, "learning_rate": 3.4249716180254624e-05, "loss": 1.6354, "step": 3593 }, { "epoch": 0.7367773677736777, "grad_norm": 0.4403336749040627, "learning_rate": 3.419969814011835e-05, "loss": 1.5564, "step": 3594 }, { "epoch": 0.7369823698236982, "grad_norm": 0.4238528814210788, "learning_rate": 3.414970911492491e-05, "loss": 1.4502, "step": 3595 }, { "epoch": 0.7371873718737187, "grad_norm": 0.4035819565287794, "learning_rate": 3.4099749126717175e-05, "loss": 1.4993, "step": 3596 }, { "epoch": 0.7373923739237392, "grad_norm": 0.4428776576547031, "learning_rate": 3.404981819752524e-05, "loss": 1.5182, "step": 3597 }, { "epoch": 0.7375973759737597, "grad_norm": 0.46546257726278006, "learning_rate": 3.399991634936641e-05, "loss": 1.5818, "step": 3598 }, { "epoch": 0.7378023780237802, "grad_norm": 0.42164558427264753, "learning_rate": 3.395004360424512e-05, "loss": 1.4588, "step": 3599 }, { "epoch": 0.7380073800738007, "grad_norm": 0.4169000668927247, "learning_rate": 3.390019998415297e-05, "loss": 1.4747, "step": 3600 }, { "epoch": 0.7382123821238212, "grad_norm": 0.44287746933564504, "learning_rate": 3.3850385511068695e-05, "loss": 1.497, "step": 3601 }, { "epoch": 0.7384173841738417, "grad_norm": 0.44780791429689304, "learning_rate": 3.380060020695825e-05, "loss": 1.5425, "step": 3602 }, { "epoch": 0.7386223862238622, "grad_norm": 0.4459467111624526, "learning_rate": 3.3750844093774736e-05, "loss": 1.587, "step": 3603 }, { "epoch": 0.7388273882738827, "grad_norm": 0.43272062877269973, "learning_rate": 3.3701117193458295e-05, "loss": 1.5392, "step": 3604 }, { "epoch": 0.7390323903239032, "grad_norm": 0.4560160315304973, "learning_rate": 3.365141952793622e-05, "loss": 1.5285, "step": 3605 }, { "epoch": 0.7392373923739237, "grad_norm": 0.44449981896343366, "learning_rate": 3.360175111912291e-05, "loss": 1.545, "step": 3606 }, { "epoch": 0.7394423944239442, "grad_norm": 0.4727726552862745, "learning_rate": 3.35521119889199e-05, "loss": 1.4878, "step": 3607 }, { "epoch": 0.7396473964739647, "grad_norm": 0.4701232802966624, "learning_rate": 3.350250215921581e-05, "loss": 1.5061, "step": 3608 }, { "epoch": 0.7398523985239852, "grad_norm": 0.44168940116918903, "learning_rate": 3.345292165188632e-05, "loss": 1.4634, "step": 3609 }, { "epoch": 0.7400574005740057, "grad_norm": 0.4429834658934189, "learning_rate": 3.3403370488794136e-05, "loss": 1.5374, "step": 3610 }, { "epoch": 0.7402624026240262, "grad_norm": 0.4390432074897609, "learning_rate": 3.335384869178908e-05, "loss": 1.5059, "step": 3611 }, { "epoch": 0.7404674046740467, "grad_norm": 0.46439204073258106, "learning_rate": 3.330435628270806e-05, "loss": 1.5173, "step": 3612 }, { "epoch": 0.7406724067240672, "grad_norm": 0.4687132771064403, "learning_rate": 3.325489328337491e-05, "loss": 1.5584, "step": 3613 }, { "epoch": 0.7408774087740877, "grad_norm": 0.41477307140111364, "learning_rate": 3.320545971560063e-05, "loss": 1.523, "step": 3614 }, { "epoch": 0.7410824108241082, "grad_norm": 0.4449021863987743, "learning_rate": 3.3156055601183155e-05, "loss": 1.523, "step": 3615 }, { "epoch": 0.7412874128741287, "grad_norm": 0.4166686958201675, "learning_rate": 3.310668096190741e-05, "loss": 1.4762, "step": 3616 }, { "epoch": 0.7414924149241492, "grad_norm": 0.4375628871819494, "learning_rate": 3.305733581954544e-05, "loss": 1.5376, "step": 3617 }, { "epoch": 0.7416974169741697, "grad_norm": 0.4449591766063655, "learning_rate": 3.300802019585615e-05, "loss": 1.4523, "step": 3618 }, { "epoch": 0.7419024190241903, "grad_norm": 0.4170242072564302, "learning_rate": 3.2958734112585546e-05, "loss": 1.4179, "step": 3619 }, { "epoch": 0.7421074210742108, "grad_norm": 0.4479486715367858, "learning_rate": 3.290947759146651e-05, "loss": 1.4986, "step": 3620 }, { "epoch": 0.7423124231242313, "grad_norm": 0.4577372804717908, "learning_rate": 3.286025065421892e-05, "loss": 1.4656, "step": 3621 }, { "epoch": 0.7425174251742518, "grad_norm": 0.4285175199785407, "learning_rate": 3.281105332254966e-05, "loss": 1.5027, "step": 3622 }, { "epoch": 0.7427224272242723, "grad_norm": 0.447407183446742, "learning_rate": 3.276188561815249e-05, "loss": 1.5739, "step": 3623 }, { "epoch": 0.7429274292742928, "grad_norm": 0.413495389841362, "learning_rate": 3.2712747562708115e-05, "loss": 1.5002, "step": 3624 }, { "epoch": 0.7431324313243133, "grad_norm": 0.46420309863004994, "learning_rate": 3.2663639177884234e-05, "loss": 1.5632, "step": 3625 }, { "epoch": 0.7433374333743338, "grad_norm": 0.455108248395503, "learning_rate": 3.261456048533535e-05, "loss": 1.5226, "step": 3626 }, { "epoch": 0.7435424354243543, "grad_norm": 0.4374069729309448, "learning_rate": 3.2565511506703005e-05, "loss": 1.4733, "step": 3627 }, { "epoch": 0.7437474374743748, "grad_norm": 0.4354544465623817, "learning_rate": 3.251649226361555e-05, "loss": 1.4362, "step": 3628 }, { "epoch": 0.7439524395243953, "grad_norm": 0.49078041968050845, "learning_rate": 3.246750277768819e-05, "loss": 1.5765, "step": 3629 }, { "epoch": 0.7441574415744158, "grad_norm": 0.41825159367516007, "learning_rate": 3.2418543070523135e-05, "loss": 1.4936, "step": 3630 }, { "epoch": 0.7443624436244363, "grad_norm": 0.478845903588315, "learning_rate": 3.2369613163709356e-05, "loss": 1.5562, "step": 3631 }, { "epoch": 0.7445674456744568, "grad_norm": 0.4737596458641103, "learning_rate": 3.232071307882268e-05, "loss": 1.4573, "step": 3632 }, { "epoch": 0.7447724477244773, "grad_norm": 0.42698103983244834, "learning_rate": 3.227184283742591e-05, "loss": 1.5207, "step": 3633 }, { "epoch": 0.7449774497744978, "grad_norm": 0.4209080963414414, "learning_rate": 3.222300246106852e-05, "loss": 1.4845, "step": 3634 }, { "epoch": 0.7451824518245183, "grad_norm": 0.47586484877735274, "learning_rate": 3.217419197128695e-05, "loss": 1.5105, "step": 3635 }, { "epoch": 0.7453874538745388, "grad_norm": 0.4039924718246903, "learning_rate": 3.21254113896044e-05, "loss": 1.4905, "step": 3636 }, { "epoch": 0.7455924559245592, "grad_norm": 0.46500934435606234, "learning_rate": 3.207666073753084e-05, "loss": 1.5188, "step": 3637 }, { "epoch": 0.7457974579745797, "grad_norm": 0.44496747035294837, "learning_rate": 3.202794003656316e-05, "loss": 1.5542, "step": 3638 }, { "epoch": 0.7460024600246002, "grad_norm": 0.4130377147171396, "learning_rate": 3.1979249308184957e-05, "loss": 1.5801, "step": 3639 }, { "epoch": 0.7462074620746207, "grad_norm": 0.4387364316566754, "learning_rate": 3.193058857386658e-05, "loss": 1.5146, "step": 3640 }, { "epoch": 0.7464124641246412, "grad_norm": 0.4655716114982046, "learning_rate": 3.188195785506527e-05, "loss": 1.5578, "step": 3641 }, { "epoch": 0.7466174661746617, "grad_norm": 0.44108467006116464, "learning_rate": 3.18333571732249e-05, "loss": 1.5426, "step": 3642 }, { "epoch": 0.7468224682246822, "grad_norm": 0.42221178090823297, "learning_rate": 3.178478654977624e-05, "loss": 1.4577, "step": 3643 }, { "epoch": 0.7470274702747027, "grad_norm": 0.46314649034235084, "learning_rate": 3.173624600613666e-05, "loss": 1.5696, "step": 3644 }, { "epoch": 0.7472324723247232, "grad_norm": 0.45585573972573246, "learning_rate": 3.168773556371034e-05, "loss": 1.547, "step": 3645 }, { "epoch": 0.7474374743747437, "grad_norm": 0.43666099128498387, "learning_rate": 3.163925524388822e-05, "loss": 1.5721, "step": 3646 }, { "epoch": 0.7476424764247642, "grad_norm": 0.4013041281766275, "learning_rate": 3.1590805068047865e-05, "loss": 1.5191, "step": 3647 }, { "epoch": 0.7478474784747847, "grad_norm": 0.43691889920123905, "learning_rate": 3.154238505755367e-05, "loss": 1.5473, "step": 3648 }, { "epoch": 0.7480524805248052, "grad_norm": 0.40429236742212604, "learning_rate": 3.149399523375661e-05, "loss": 1.4341, "step": 3649 }, { "epoch": 0.7482574825748257, "grad_norm": 0.449955144337816, "learning_rate": 3.144563561799438e-05, "loss": 1.5013, "step": 3650 }, { "epoch": 0.7484624846248462, "grad_norm": 0.4290599110243751, "learning_rate": 3.139730623159144e-05, "loss": 1.502, "step": 3651 }, { "epoch": 0.7486674866748667, "grad_norm": 0.42449902930507555, "learning_rate": 3.1349007095858786e-05, "loss": 1.5444, "step": 3652 }, { "epoch": 0.7488724887248872, "grad_norm": 0.4134993070461911, "learning_rate": 3.1300738232094184e-05, "loss": 1.4699, "step": 3653 }, { "epoch": 0.7490774907749077, "grad_norm": 0.457843350728091, "learning_rate": 3.125249966158201e-05, "loss": 1.5093, "step": 3654 }, { "epoch": 0.7492824928249282, "grad_norm": 0.42878523779372857, "learning_rate": 3.120429140559327e-05, "loss": 1.5284, "step": 3655 }, { "epoch": 0.7494874948749487, "grad_norm": 0.43503319476679875, "learning_rate": 3.115611348538556e-05, "loss": 1.5945, "step": 3656 }, { "epoch": 0.7496924969249692, "grad_norm": 0.4240016972637633, "learning_rate": 3.110796592220322e-05, "loss": 1.5532, "step": 3657 }, { "epoch": 0.7498974989749897, "grad_norm": 0.41016701176151243, "learning_rate": 3.1059848737277144e-05, "loss": 1.5155, "step": 3658 }, { "epoch": 0.7501025010250103, "grad_norm": 0.4308807045137664, "learning_rate": 3.10117619518248e-05, "loss": 1.4694, "step": 3659 }, { "epoch": 0.7503075030750308, "grad_norm": 0.43167365000093716, "learning_rate": 3.096370558705028e-05, "loss": 1.4964, "step": 3660 }, { "epoch": 0.7505125051250513, "grad_norm": 0.472214979490928, "learning_rate": 3.091567966414424e-05, "loss": 1.5867, "step": 3661 }, { "epoch": 0.7507175071750718, "grad_norm": 0.40197351486886024, "learning_rate": 3.086768420428392e-05, "loss": 1.5349, "step": 3662 }, { "epoch": 0.7509225092250923, "grad_norm": 0.448438770927794, "learning_rate": 3.081971922863319e-05, "loss": 1.4753, "step": 3663 }, { "epoch": 0.7511275112751128, "grad_norm": 0.4077326521952239, "learning_rate": 3.07717847583424e-05, "loss": 1.5048, "step": 3664 }, { "epoch": 0.7513325133251333, "grad_norm": 0.38823784037535536, "learning_rate": 3.072388081454848e-05, "loss": 1.5137, "step": 3665 }, { "epoch": 0.7515375153751538, "grad_norm": 0.47072160673614233, "learning_rate": 3.067600741837483e-05, "loss": 1.5082, "step": 3666 }, { "epoch": 0.7517425174251743, "grad_norm": 0.4735992508468832, "learning_rate": 3.0628164590931506e-05, "loss": 1.5767, "step": 3667 }, { "epoch": 0.7519475194751948, "grad_norm": 0.4488441421334702, "learning_rate": 3.0580352353315025e-05, "loss": 1.5457, "step": 3668 }, { "epoch": 0.7521525215252153, "grad_norm": 0.4401092562868568, "learning_rate": 3.05325707266084e-05, "loss": 1.5085, "step": 3669 }, { "epoch": 0.7523575235752358, "grad_norm": 0.4860138617916938, "learning_rate": 3.048481973188114e-05, "loss": 1.4712, "step": 3670 }, { "epoch": 0.7525625256252563, "grad_norm": 0.4383024949833615, "learning_rate": 3.043709939018925e-05, "loss": 1.5238, "step": 3671 }, { "epoch": 0.7527675276752768, "grad_norm": 0.39104857183460157, "learning_rate": 3.0389409722575258e-05, "loss": 1.5181, "step": 3672 }, { "epoch": 0.7529725297252973, "grad_norm": 0.4595604434658205, "learning_rate": 3.0341750750068164e-05, "loss": 1.5245, "step": 3673 }, { "epoch": 0.7531775317753178, "grad_norm": 0.4868060482313587, "learning_rate": 3.0294122493683374e-05, "loss": 1.5371, "step": 3674 }, { "epoch": 0.7533825338253383, "grad_norm": 0.5347300234622846, "learning_rate": 3.0246524974422808e-05, "loss": 1.5285, "step": 3675 }, { "epoch": 0.7535875358753588, "grad_norm": 0.41544729019195703, "learning_rate": 3.0198958213274753e-05, "loss": 1.451, "step": 3676 }, { "epoch": 0.7537925379253793, "grad_norm": 0.39348240252758643, "learning_rate": 3.0151422231214022e-05, "loss": 1.4317, "step": 3677 }, { "epoch": 0.7539975399753998, "grad_norm": 0.41045158941177545, "learning_rate": 3.010391704920187e-05, "loss": 1.4807, "step": 3678 }, { "epoch": 0.7542025420254203, "grad_norm": 0.4969316040115307, "learning_rate": 3.005644268818588e-05, "loss": 1.5515, "step": 3679 }, { "epoch": 0.7544075440754408, "grad_norm": 0.4212094218849045, "learning_rate": 3.0008999169100103e-05, "loss": 1.5152, "step": 3680 }, { "epoch": 0.7546125461254612, "grad_norm": 0.44353103713081743, "learning_rate": 2.9961586512864947e-05, "loss": 1.4816, "step": 3681 }, { "epoch": 0.7548175481754817, "grad_norm": 0.4032243754555605, "learning_rate": 2.991420474038721e-05, "loss": 1.5447, "step": 3682 }, { "epoch": 0.7550225502255022, "grad_norm": 0.41984635839243095, "learning_rate": 2.9866853872560198e-05, "loss": 1.5768, "step": 3683 }, { "epoch": 0.7552275522755227, "grad_norm": 0.4162000812606667, "learning_rate": 2.9819533930263433e-05, "loss": 1.513, "step": 3684 }, { "epoch": 0.7554325543255432, "grad_norm": 0.45318731289781133, "learning_rate": 2.977224493436288e-05, "loss": 1.6093, "step": 3685 }, { "epoch": 0.7556375563755637, "grad_norm": 0.5160044152215718, "learning_rate": 2.9724986905710815e-05, "loss": 1.5745, "step": 3686 }, { "epoch": 0.7558425584255842, "grad_norm": 0.4170517604407413, "learning_rate": 2.967775986514585e-05, "loss": 1.5149, "step": 3687 }, { "epoch": 0.7560475604756047, "grad_norm": 0.44447149009848486, "learning_rate": 2.963056383349301e-05, "loss": 1.5859, "step": 3688 }, { "epoch": 0.7562525625256252, "grad_norm": 0.4370067312058709, "learning_rate": 2.9583398831563625e-05, "loss": 1.5362, "step": 3689 }, { "epoch": 0.7564575645756457, "grad_norm": 0.4635620192162683, "learning_rate": 2.9536264880155285e-05, "loss": 1.5339, "step": 3690 }, { "epoch": 0.7566625666256662, "grad_norm": 0.44190281887382143, "learning_rate": 2.9489162000051918e-05, "loss": 1.4467, "step": 3691 }, { "epoch": 0.7568675686756867, "grad_norm": 0.45642912860996926, "learning_rate": 2.944209021202372e-05, "loss": 1.4937, "step": 3692 }, { "epoch": 0.7570725707257072, "grad_norm": 0.421560964474826, "learning_rate": 2.9395049536827255e-05, "loss": 1.4959, "step": 3693 }, { "epoch": 0.7572775727757277, "grad_norm": 0.41581850160653455, "learning_rate": 2.9348039995205347e-05, "loss": 1.528, "step": 3694 }, { "epoch": 0.7574825748257482, "grad_norm": 0.42611710433717004, "learning_rate": 2.9301061607887025e-05, "loss": 1.494, "step": 3695 }, { "epoch": 0.7576875768757687, "grad_norm": 0.4052948784561073, "learning_rate": 2.925411439558764e-05, "loss": 1.447, "step": 3696 }, { "epoch": 0.7578925789257892, "grad_norm": 0.41957288838456885, "learning_rate": 2.9207198379008736e-05, "loss": 1.5447, "step": 3697 }, { "epoch": 0.7580975809758097, "grad_norm": 0.40537087922710396, "learning_rate": 2.9160313578838184e-05, "loss": 1.5367, "step": 3698 }, { "epoch": 0.7583025830258303, "grad_norm": 0.4377696412426714, "learning_rate": 2.9113460015750072e-05, "loss": 1.4184, "step": 3699 }, { "epoch": 0.7585075850758508, "grad_norm": 0.4677032980285699, "learning_rate": 2.9066637710404675e-05, "loss": 1.5133, "step": 3700 }, { "epoch": 0.7587125871258713, "grad_norm": 0.4225395198726482, "learning_rate": 2.901984668344848e-05, "loss": 1.5249, "step": 3701 }, { "epoch": 0.7589175891758918, "grad_norm": 0.4499503976105507, "learning_rate": 2.8973086955514195e-05, "loss": 1.5726, "step": 3702 }, { "epoch": 0.7591225912259123, "grad_norm": 0.43737479063184526, "learning_rate": 2.892635854722079e-05, "loss": 1.5447, "step": 3703 }, { "epoch": 0.7593275932759328, "grad_norm": 0.43614796834189923, "learning_rate": 2.8879661479173305e-05, "loss": 1.4912, "step": 3704 }, { "epoch": 0.7595325953259533, "grad_norm": 0.4184546091278081, "learning_rate": 2.883299577196308e-05, "loss": 1.4966, "step": 3705 }, { "epoch": 0.7597375973759738, "grad_norm": 0.42526362858254263, "learning_rate": 2.8786361446167554e-05, "loss": 1.4569, "step": 3706 }, { "epoch": 0.7599425994259943, "grad_norm": 0.4333005626080129, "learning_rate": 2.87397585223503e-05, "loss": 1.4848, "step": 3707 }, { "epoch": 0.7601476014760148, "grad_norm": 0.43166615036754713, "learning_rate": 2.8693187021061162e-05, "loss": 1.5183, "step": 3708 }, { "epoch": 0.7603526035260353, "grad_norm": 0.4484933790335611, "learning_rate": 2.8646646962836e-05, "loss": 1.5217, "step": 3709 }, { "epoch": 0.7605576055760558, "grad_norm": 0.45378671876285576, "learning_rate": 2.8600138368196906e-05, "loss": 1.4772, "step": 3710 }, { "epoch": 0.7607626076260763, "grad_norm": 0.41390657265820774, "learning_rate": 2.855366125765204e-05, "loss": 1.5491, "step": 3711 }, { "epoch": 0.7609676096760968, "grad_norm": 0.4412676751496288, "learning_rate": 2.8507215651695662e-05, "loss": 1.4833, "step": 3712 }, { "epoch": 0.7611726117261173, "grad_norm": 0.3907761825902976, "learning_rate": 2.846080157080824e-05, "loss": 1.43, "step": 3713 }, { "epoch": 0.7613776137761378, "grad_norm": 0.40938901971793557, "learning_rate": 2.84144190354562e-05, "loss": 1.546, "step": 3714 }, { "epoch": 0.7615826158261583, "grad_norm": 0.4446398114123376, "learning_rate": 2.83680680660922e-05, "loss": 1.5346, "step": 3715 }, { "epoch": 0.7617876178761788, "grad_norm": 0.4211783605102619, "learning_rate": 2.8321748683154893e-05, "loss": 1.4768, "step": 3716 }, { "epoch": 0.7619926199261993, "grad_norm": 0.5057481065322842, "learning_rate": 2.8275460907068973e-05, "loss": 1.5884, "step": 3717 }, { "epoch": 0.7621976219762198, "grad_norm": 0.4171268273113572, "learning_rate": 2.822920475824531e-05, "loss": 1.5121, "step": 3718 }, { "epoch": 0.7624026240262403, "grad_norm": 0.41297430867511664, "learning_rate": 2.818298025708075e-05, "loss": 1.481, "step": 3719 }, { "epoch": 0.7626076260762608, "grad_norm": 0.48358578305399613, "learning_rate": 2.8136787423958143e-05, "loss": 1.6013, "step": 3720 }, { "epoch": 0.7628126281262813, "grad_norm": 0.44146485953194536, "learning_rate": 2.80906262792465e-05, "loss": 1.5073, "step": 3721 }, { "epoch": 0.7630176301763018, "grad_norm": 0.45987917876869283, "learning_rate": 2.804449684330074e-05, "loss": 1.5012, "step": 3722 }, { "epoch": 0.7632226322263223, "grad_norm": 0.4481092401353202, "learning_rate": 2.7998399136461883e-05, "loss": 1.5307, "step": 3723 }, { "epoch": 0.7634276342763427, "grad_norm": 0.4293490599275573, "learning_rate": 2.795233317905691e-05, "loss": 1.4638, "step": 3724 }, { "epoch": 0.7636326363263632, "grad_norm": 0.4307387926492202, "learning_rate": 2.790629899139878e-05, "loss": 1.4905, "step": 3725 }, { "epoch": 0.7638376383763837, "grad_norm": 0.419529915008727, "learning_rate": 2.786029659378655e-05, "loss": 1.5036, "step": 3726 }, { "epoch": 0.7640426404264042, "grad_norm": 0.44201203902214875, "learning_rate": 2.7814326006505108e-05, "loss": 1.4848, "step": 3727 }, { "epoch": 0.7642476424764247, "grad_norm": 0.44574520823671954, "learning_rate": 2.7768387249825457e-05, "loss": 1.4805, "step": 3728 }, { "epoch": 0.7644526445264452, "grad_norm": 0.446416947006107, "learning_rate": 2.7722480344004488e-05, "loss": 1.5296, "step": 3729 }, { "epoch": 0.7646576465764657, "grad_norm": 0.4147510631807401, "learning_rate": 2.767660530928503e-05, "loss": 1.5204, "step": 3730 }, { "epoch": 0.7648626486264862, "grad_norm": 0.3886932394257091, "learning_rate": 2.7630762165895884e-05, "loss": 1.5465, "step": 3731 }, { "epoch": 0.7650676506765067, "grad_norm": 0.45896780333900133, "learning_rate": 2.7584950934051824e-05, "loss": 1.5259, "step": 3732 }, { "epoch": 0.7652726527265272, "grad_norm": 0.3553960761467297, "learning_rate": 2.753917163395353e-05, "loss": 1.4615, "step": 3733 }, { "epoch": 0.7654776547765477, "grad_norm": 0.4052434456126481, "learning_rate": 2.7493424285787584e-05, "loss": 1.4696, "step": 3734 }, { "epoch": 0.7656826568265682, "grad_norm": 0.42573193143725796, "learning_rate": 2.7447708909726477e-05, "loss": 1.4805, "step": 3735 }, { "epoch": 0.7658876588765887, "grad_norm": 0.419615061517486, "learning_rate": 2.7402025525928586e-05, "loss": 1.512, "step": 3736 }, { "epoch": 0.7660926609266092, "grad_norm": 0.48440651924170874, "learning_rate": 2.7356374154538254e-05, "loss": 1.5044, "step": 3737 }, { "epoch": 0.7662976629766297, "grad_norm": 0.43254781622370125, "learning_rate": 2.7310754815685624e-05, "loss": 1.439, "step": 3738 }, { "epoch": 0.7665026650266503, "grad_norm": 0.41026555705590156, "learning_rate": 2.72651675294868e-05, "loss": 1.5002, "step": 3739 }, { "epoch": 0.7667076670766708, "grad_norm": 0.42262351065521786, "learning_rate": 2.7219612316043675e-05, "loss": 1.5122, "step": 3740 }, { "epoch": 0.7669126691266913, "grad_norm": 0.4161255322382909, "learning_rate": 2.7174089195443987e-05, "loss": 1.5414, "step": 3741 }, { "epoch": 0.7671176711767118, "grad_norm": 0.45746283051097364, "learning_rate": 2.7128598187761445e-05, "loss": 1.4854, "step": 3742 }, { "epoch": 0.7673226732267323, "grad_norm": 0.43693662542708245, "learning_rate": 2.7083139313055427e-05, "loss": 1.5436, "step": 3743 }, { "epoch": 0.7675276752767528, "grad_norm": 0.4130326354056413, "learning_rate": 2.7037712591371322e-05, "loss": 1.4443, "step": 3744 }, { "epoch": 0.7677326773267733, "grad_norm": 0.4418125758373944, "learning_rate": 2.6992318042740207e-05, "loss": 1.5299, "step": 3745 }, { "epoch": 0.7679376793767938, "grad_norm": 0.416507573368006, "learning_rate": 2.6946955687178997e-05, "loss": 1.5628, "step": 3746 }, { "epoch": 0.7681426814268143, "grad_norm": 0.4767874644098243, "learning_rate": 2.6901625544690434e-05, "loss": 1.5442, "step": 3747 }, { "epoch": 0.7683476834768348, "grad_norm": 0.4311472497908615, "learning_rate": 2.6856327635263045e-05, "loss": 1.5098, "step": 3748 }, { "epoch": 0.7685526855268553, "grad_norm": 0.41426591296824494, "learning_rate": 2.681106197887121e-05, "loss": 1.504, "step": 3749 }, { "epoch": 0.7687576875768758, "grad_norm": 0.49482238443008325, "learning_rate": 2.6765828595474984e-05, "loss": 1.5308, "step": 3750 }, { "epoch": 0.7689626896268963, "grad_norm": 0.4224667104451078, "learning_rate": 2.6720627505020223e-05, "loss": 1.4902, "step": 3751 }, { "epoch": 0.7691676916769168, "grad_norm": 0.423281626185111, "learning_rate": 2.667545872743854e-05, "loss": 1.4967, "step": 3752 }, { "epoch": 0.7693726937269373, "grad_norm": 0.4742526981176098, "learning_rate": 2.6630322282647334e-05, "loss": 1.527, "step": 3753 }, { "epoch": 0.7695776957769578, "grad_norm": 0.3730177478733768, "learning_rate": 2.6585218190549765e-05, "loss": 1.4448, "step": 3754 }, { "epoch": 0.7697826978269783, "grad_norm": 0.41758851794312185, "learning_rate": 2.6540146471034655e-05, "loss": 1.5105, "step": 3755 }, { "epoch": 0.7699876998769988, "grad_norm": 0.4211132054987742, "learning_rate": 2.6495107143976572e-05, "loss": 1.4988, "step": 3756 }, { "epoch": 0.7701927019270193, "grad_norm": 0.43243630872812017, "learning_rate": 2.6450100229235795e-05, "loss": 1.514, "step": 3757 }, { "epoch": 0.7703977039770398, "grad_norm": 0.3954026428145335, "learning_rate": 2.6405125746658354e-05, "loss": 1.468, "step": 3758 }, { "epoch": 0.7706027060270603, "grad_norm": 0.44975464494216955, "learning_rate": 2.636018371607598e-05, "loss": 1.524, "step": 3759 }, { "epoch": 0.7708077080770808, "grad_norm": 0.46102853090538465, "learning_rate": 2.6315274157306037e-05, "loss": 1.525, "step": 3760 }, { "epoch": 0.7710127101271013, "grad_norm": 0.43896308471768103, "learning_rate": 2.62703970901516e-05, "loss": 1.5073, "step": 3761 }, { "epoch": 0.7712177121771218, "grad_norm": 0.45642037283080394, "learning_rate": 2.6225552534401388e-05, "loss": 1.5704, "step": 3762 }, { "epoch": 0.7714227142271423, "grad_norm": 0.4447712367579291, "learning_rate": 2.618074050982985e-05, "loss": 1.5141, "step": 3763 }, { "epoch": 0.7716277162771628, "grad_norm": 0.4482384118927603, "learning_rate": 2.6135961036197064e-05, "loss": 1.5647, "step": 3764 }, { "epoch": 0.7718327183271833, "grad_norm": 0.39946221087218386, "learning_rate": 2.609121413324872e-05, "loss": 1.4841, "step": 3765 }, { "epoch": 0.7720377203772038, "grad_norm": 0.4481475554027438, "learning_rate": 2.6046499820716175e-05, "loss": 1.4693, "step": 3766 }, { "epoch": 0.7722427224272242, "grad_norm": 0.44491421232607, "learning_rate": 2.600181811831638e-05, "loss": 1.5038, "step": 3767 }, { "epoch": 0.7724477244772447, "grad_norm": 0.41022193169088217, "learning_rate": 2.595716904575196e-05, "loss": 1.5196, "step": 3768 }, { "epoch": 0.7726527265272652, "grad_norm": 0.44776584797913777, "learning_rate": 2.591255262271115e-05, "loss": 1.5324, "step": 3769 }, { "epoch": 0.7728577285772857, "grad_norm": 0.4468971228438905, "learning_rate": 2.5867968868867742e-05, "loss": 1.4574, "step": 3770 }, { "epoch": 0.7730627306273062, "grad_norm": 0.4154437197892019, "learning_rate": 2.5823417803881146e-05, "loss": 1.5207, "step": 3771 }, { "epoch": 0.7732677326773267, "grad_norm": 0.40255791747056074, "learning_rate": 2.5778899447396333e-05, "loss": 1.5128, "step": 3772 }, { "epoch": 0.7734727347273472, "grad_norm": 0.4227332462897162, "learning_rate": 2.573441381904389e-05, "loss": 1.4981, "step": 3773 }, { "epoch": 0.7736777367773677, "grad_norm": 0.4548120702954003, "learning_rate": 2.5689960938440007e-05, "loss": 1.5347, "step": 3774 }, { "epoch": 0.7738827388273882, "grad_norm": 0.4105391306983932, "learning_rate": 2.564554082518633e-05, "loss": 1.5332, "step": 3775 }, { "epoch": 0.7740877408774087, "grad_norm": 0.4425576334630604, "learning_rate": 2.5601153498870134e-05, "loss": 1.4919, "step": 3776 }, { "epoch": 0.7742927429274292, "grad_norm": 0.40735099549486947, "learning_rate": 2.5556798979064167e-05, "loss": 1.4936, "step": 3777 }, { "epoch": 0.7744977449774497, "grad_norm": 0.4295662874853571, "learning_rate": 2.551247728532682e-05, "loss": 1.5295, "step": 3778 }, { "epoch": 0.7747027470274703, "grad_norm": 0.4294894073726961, "learning_rate": 2.546818843720189e-05, "loss": 1.5059, "step": 3779 }, { "epoch": 0.7749077490774908, "grad_norm": 0.4071088870604697, "learning_rate": 2.5423932454218802e-05, "loss": 1.4806, "step": 3780 }, { "epoch": 0.7751127511275113, "grad_norm": 0.40890864399769267, "learning_rate": 2.53797093558924e-05, "loss": 1.5332, "step": 3781 }, { "epoch": 0.7753177531775318, "grad_norm": 0.3821532203417036, "learning_rate": 2.5335519161723042e-05, "loss": 1.4722, "step": 3782 }, { "epoch": 0.7755227552275523, "grad_norm": 0.44180206360413943, "learning_rate": 2.5291361891196652e-05, "loss": 1.5044, "step": 3783 }, { "epoch": 0.7757277572775728, "grad_norm": 0.4375162346001822, "learning_rate": 2.524723756378452e-05, "loss": 1.5256, "step": 3784 }, { "epoch": 0.7759327593275933, "grad_norm": 0.4520353377518609, "learning_rate": 2.5203146198943518e-05, "loss": 1.4936, "step": 3785 }, { "epoch": 0.7761377613776138, "grad_norm": 0.4239453294922117, "learning_rate": 2.515908781611591e-05, "loss": 1.4457, "step": 3786 }, { "epoch": 0.7763427634276343, "grad_norm": 0.4736515691015482, "learning_rate": 2.511506243472944e-05, "loss": 1.5642, "step": 3787 }, { "epoch": 0.7765477654776548, "grad_norm": 0.4586532717800692, "learning_rate": 2.507107007419729e-05, "loss": 1.5255, "step": 3788 }, { "epoch": 0.7767527675276753, "grad_norm": 0.4602919398343417, "learning_rate": 2.5027110753918094e-05, "loss": 1.4812, "step": 3789 }, { "epoch": 0.7769577695776958, "grad_norm": 0.45572410680384345, "learning_rate": 2.4983184493275947e-05, "loss": 1.5065, "step": 3790 }, { "epoch": 0.7771627716277163, "grad_norm": 0.39740112063896643, "learning_rate": 2.4939291311640324e-05, "loss": 1.4841, "step": 3791 }, { "epoch": 0.7773677736777368, "grad_norm": 0.4497098650548763, "learning_rate": 2.4895431228366107e-05, "loss": 1.509, "step": 3792 }, { "epoch": 0.7775727757277573, "grad_norm": 0.4824480597776566, "learning_rate": 2.485160426279357e-05, "loss": 1.5171, "step": 3793 }, { "epoch": 0.7777777777777778, "grad_norm": 0.4018467361664024, "learning_rate": 2.480781043424849e-05, "loss": 1.4425, "step": 3794 }, { "epoch": 0.7779827798277983, "grad_norm": 0.4574345048065237, "learning_rate": 2.4764049762041874e-05, "loss": 1.4661, "step": 3795 }, { "epoch": 0.7781877818778188, "grad_norm": 0.48877264975976215, "learning_rate": 2.4720322265470254e-05, "loss": 1.5726, "step": 3796 }, { "epoch": 0.7783927839278393, "grad_norm": 0.3935915969637343, "learning_rate": 2.4676627963815447e-05, "loss": 1.4706, "step": 3797 }, { "epoch": 0.7785977859778598, "grad_norm": 0.4493680826055248, "learning_rate": 2.463296687634462e-05, "loss": 1.4976, "step": 3798 }, { "epoch": 0.7788027880278803, "grad_norm": 0.43619506147666215, "learning_rate": 2.4589339022310386e-05, "loss": 1.5902, "step": 3799 }, { "epoch": 0.7790077900779008, "grad_norm": 0.4270168149107795, "learning_rate": 2.454574442095059e-05, "loss": 1.4938, "step": 3800 }, { "epoch": 0.7792127921279213, "grad_norm": 0.46626381524641214, "learning_rate": 2.4502183091488527e-05, "loss": 1.5499, "step": 3801 }, { "epoch": 0.7794177941779418, "grad_norm": 0.4406457859726337, "learning_rate": 2.445865505313274e-05, "loss": 1.5216, "step": 3802 }, { "epoch": 0.7796227962279623, "grad_norm": 0.4267729505363921, "learning_rate": 2.441516032507708e-05, "loss": 1.4458, "step": 3803 }, { "epoch": 0.7798277982779828, "grad_norm": 0.42507403015298584, "learning_rate": 2.4371698926500806e-05, "loss": 1.498, "step": 3804 }, { "epoch": 0.7800328003280033, "grad_norm": 0.4601021312533811, "learning_rate": 2.432827087656836e-05, "loss": 1.523, "step": 3805 }, { "epoch": 0.7802378023780238, "grad_norm": 0.4462546492731634, "learning_rate": 2.4284876194429596e-05, "loss": 1.6108, "step": 3806 }, { "epoch": 0.7804428044280443, "grad_norm": 0.4088646354381824, "learning_rate": 2.4241514899219575e-05, "loss": 1.5244, "step": 3807 }, { "epoch": 0.7806478064780648, "grad_norm": 0.4644591353783464, "learning_rate": 2.4198187010058614e-05, "loss": 1.4856, "step": 3808 }, { "epoch": 0.7808528085280853, "grad_norm": 0.471857822205609, "learning_rate": 2.415489254605242e-05, "loss": 1.5513, "step": 3809 }, { "epoch": 0.7810578105781057, "grad_norm": 0.4721726959382495, "learning_rate": 2.4111631526291846e-05, "loss": 1.5458, "step": 3810 }, { "epoch": 0.7812628126281262, "grad_norm": 0.4426483710039198, "learning_rate": 2.4068403969852992e-05, "loss": 1.5261, "step": 3811 }, { "epoch": 0.7814678146781467, "grad_norm": 0.4153366517004238, "learning_rate": 2.4025209895797328e-05, "loss": 1.4858, "step": 3812 }, { "epoch": 0.7816728167281672, "grad_norm": 0.4335632999552693, "learning_rate": 2.3982049323171407e-05, "loss": 1.5498, "step": 3813 }, { "epoch": 0.7818778187781877, "grad_norm": 0.4467160923534647, "learning_rate": 2.3938922271007147e-05, "loss": 1.5025, "step": 3814 }, { "epoch": 0.7820828208282082, "grad_norm": 0.42915778177766384, "learning_rate": 2.389582875832157e-05, "loss": 1.5277, "step": 3815 }, { "epoch": 0.7822878228782287, "grad_norm": 0.44923044257124095, "learning_rate": 2.3852768804116955e-05, "loss": 1.5139, "step": 3816 }, { "epoch": 0.7824928249282492, "grad_norm": 0.4593342289691706, "learning_rate": 2.3809742427380823e-05, "loss": 1.4862, "step": 3817 }, { "epoch": 0.7826978269782697, "grad_norm": 0.4280863693950598, "learning_rate": 2.3766749647085783e-05, "loss": 1.4976, "step": 3818 }, { "epoch": 0.7829028290282903, "grad_norm": 0.4577424306022073, "learning_rate": 2.372379048218979e-05, "loss": 1.5314, "step": 3819 }, { "epoch": 0.7831078310783108, "grad_norm": 0.4543289912894854, "learning_rate": 2.3680864951635828e-05, "loss": 1.4736, "step": 3820 }, { "epoch": 0.7833128331283313, "grad_norm": 0.42654384403920753, "learning_rate": 2.3637973074352114e-05, "loss": 1.5521, "step": 3821 }, { "epoch": 0.7835178351783518, "grad_norm": 0.4562389187444123, "learning_rate": 2.359511486925199e-05, "loss": 1.5124, "step": 3822 }, { "epoch": 0.7837228372283723, "grad_norm": 0.40968177686695917, "learning_rate": 2.3552290355233998e-05, "loss": 1.4837, "step": 3823 }, { "epoch": 0.7839278392783928, "grad_norm": 0.4059699670313258, "learning_rate": 2.3509499551181825e-05, "loss": 1.4929, "step": 3824 }, { "epoch": 0.7841328413284133, "grad_norm": 0.405307950074977, "learning_rate": 2.3466742475964264e-05, "loss": 1.5005, "step": 3825 }, { "epoch": 0.7843378433784338, "grad_norm": 0.40571983667174233, "learning_rate": 2.342401914843523e-05, "loss": 1.4375, "step": 3826 }, { "epoch": 0.7845428454284543, "grad_norm": 0.44588680860509766, "learning_rate": 2.3381329587433732e-05, "loss": 1.5187, "step": 3827 }, { "epoch": 0.7847478474784748, "grad_norm": 0.4251091228911665, "learning_rate": 2.3338673811783973e-05, "loss": 1.5011, "step": 3828 }, { "epoch": 0.7849528495284953, "grad_norm": 0.5775313580442965, "learning_rate": 2.329605184029523e-05, "loss": 1.4137, "step": 3829 }, { "epoch": 0.7851578515785158, "grad_norm": 0.4372130112039747, "learning_rate": 2.3253463691761822e-05, "loss": 1.5516, "step": 3830 }, { "epoch": 0.7853628536285363, "grad_norm": 0.40720589927125134, "learning_rate": 2.3210909384963196e-05, "loss": 1.4588, "step": 3831 }, { "epoch": 0.7855678556785568, "grad_norm": 0.4308913794465252, "learning_rate": 2.3168388938663834e-05, "loss": 1.4919, "step": 3832 }, { "epoch": 0.7857728577285773, "grad_norm": 0.45017543815028255, "learning_rate": 2.312590237161335e-05, "loss": 1.5043, "step": 3833 }, { "epoch": 0.7859778597785978, "grad_norm": 0.42222677565598876, "learning_rate": 2.3083449702546424e-05, "loss": 1.4907, "step": 3834 }, { "epoch": 0.7861828618286183, "grad_norm": 0.46441047332515534, "learning_rate": 2.3041030950182706e-05, "loss": 1.5141, "step": 3835 }, { "epoch": 0.7863878638786388, "grad_norm": 0.43232420546690575, "learning_rate": 2.2998646133226966e-05, "loss": 1.5137, "step": 3836 }, { "epoch": 0.7865928659286593, "grad_norm": 0.4416596017767906, "learning_rate": 2.2956295270368965e-05, "loss": 1.5068, "step": 3837 }, { "epoch": 0.7867978679786798, "grad_norm": 0.4207706303938015, "learning_rate": 2.2913978380283452e-05, "loss": 1.5564, "step": 3838 }, { "epoch": 0.7870028700287003, "grad_norm": 0.4533635629054324, "learning_rate": 2.2871695481630374e-05, "loss": 1.5492, "step": 3839 }, { "epoch": 0.7872078720787208, "grad_norm": 0.39896485535293114, "learning_rate": 2.2829446593054493e-05, "loss": 1.521, "step": 3840 }, { "epoch": 0.7874128741287413, "grad_norm": 0.3840082909604336, "learning_rate": 2.2787231733185677e-05, "loss": 1.3995, "step": 3841 }, { "epoch": 0.7876178761787618, "grad_norm": 0.44081458862908096, "learning_rate": 2.274505092063873e-05, "loss": 1.5166, "step": 3842 }, { "epoch": 0.7878228782287823, "grad_norm": 0.4761511352908657, "learning_rate": 2.2702904174013473e-05, "loss": 1.5456, "step": 3843 }, { "epoch": 0.7880278802788028, "grad_norm": 0.41244978122658543, "learning_rate": 2.266079151189472e-05, "loss": 1.5009, "step": 3844 }, { "epoch": 0.7882328823288233, "grad_norm": 0.41179707509773916, "learning_rate": 2.2618712952852272e-05, "loss": 1.4848, "step": 3845 }, { "epoch": 0.7884378843788438, "grad_norm": 0.40759702927550595, "learning_rate": 2.2576668515440825e-05, "loss": 1.5098, "step": 3846 }, { "epoch": 0.7886428864288643, "grad_norm": 0.43357995391244253, "learning_rate": 2.2534658218200076e-05, "loss": 1.4126, "step": 3847 }, { "epoch": 0.7888478884788848, "grad_norm": 0.43548024078421804, "learning_rate": 2.2492682079654636e-05, "loss": 1.4655, "step": 3848 }, { "epoch": 0.7890528905289053, "grad_norm": 0.4647389956747395, "learning_rate": 2.2450740118314085e-05, "loss": 1.4997, "step": 3849 }, { "epoch": 0.7892578925789258, "grad_norm": 0.4407727544031389, "learning_rate": 2.2408832352672947e-05, "loss": 1.5237, "step": 3850 }, { "epoch": 0.7894628946289463, "grad_norm": 0.42325027119403114, "learning_rate": 2.2366958801210636e-05, "loss": 1.5467, "step": 3851 }, { "epoch": 0.7896678966789668, "grad_norm": 0.4578310381512447, "learning_rate": 2.2325119482391467e-05, "loss": 1.5244, "step": 3852 }, { "epoch": 0.7898728987289872, "grad_norm": 0.44357675293153465, "learning_rate": 2.2283314414664656e-05, "loss": 1.5064, "step": 3853 }, { "epoch": 0.7900779007790077, "grad_norm": 0.4401491866332217, "learning_rate": 2.2241543616464377e-05, "loss": 1.5059, "step": 3854 }, { "epoch": 0.7902829028290282, "grad_norm": 0.46135008317059095, "learning_rate": 2.2199807106209668e-05, "loss": 1.5634, "step": 3855 }, { "epoch": 0.7904879048790487, "grad_norm": 0.4694352985697873, "learning_rate": 2.21581049023044e-05, "loss": 1.4694, "step": 3856 }, { "epoch": 0.7906929069290692, "grad_norm": 0.4392205865831773, "learning_rate": 2.211643702313736e-05, "loss": 1.53, "step": 3857 }, { "epoch": 0.7908979089790897, "grad_norm": 0.41607616712802875, "learning_rate": 2.2074803487082162e-05, "loss": 1.4657, "step": 3858 }, { "epoch": 0.7911029110291103, "grad_norm": 0.44218210236891825, "learning_rate": 2.2033204312497334e-05, "loss": 1.5534, "step": 3859 }, { "epoch": 0.7913079130791308, "grad_norm": 0.4932536598157732, "learning_rate": 2.1991639517726237e-05, "loss": 1.5265, "step": 3860 }, { "epoch": 0.7915129151291513, "grad_norm": 0.4105257144866682, "learning_rate": 2.195010912109704e-05, "loss": 1.4614, "step": 3861 }, { "epoch": 0.7917179171791718, "grad_norm": 0.4672175431037312, "learning_rate": 2.1908613140922763e-05, "loss": 1.5033, "step": 3862 }, { "epoch": 0.7919229192291923, "grad_norm": 0.46516287665259515, "learning_rate": 2.18671515955012e-05, "loss": 1.476, "step": 3863 }, { "epoch": 0.7921279212792128, "grad_norm": 0.46221775791896275, "learning_rate": 2.1825724503115062e-05, "loss": 1.5368, "step": 3864 }, { "epoch": 0.7923329233292333, "grad_norm": 0.41708550953083207, "learning_rate": 2.1784331882031816e-05, "loss": 1.5361, "step": 3865 }, { "epoch": 0.7925379253792538, "grad_norm": 0.45251360223720916, "learning_rate": 2.1742973750503725e-05, "loss": 1.5699, "step": 3866 }, { "epoch": 0.7927429274292743, "grad_norm": 0.46660172623426666, "learning_rate": 2.1701650126767824e-05, "loss": 1.5297, "step": 3867 }, { "epoch": 0.7929479294792948, "grad_norm": 0.44926145668220285, "learning_rate": 2.166036102904594e-05, "loss": 1.4898, "step": 3868 }, { "epoch": 0.7931529315293153, "grad_norm": 0.41078091933158717, "learning_rate": 2.1619106475544738e-05, "loss": 1.5204, "step": 3869 }, { "epoch": 0.7933579335793358, "grad_norm": 0.4512073660324291, "learning_rate": 2.1577886484455535e-05, "loss": 1.467, "step": 3870 }, { "epoch": 0.7935629356293563, "grad_norm": 0.42707121612272103, "learning_rate": 2.1536701073954558e-05, "loss": 1.4385, "step": 3871 }, { "epoch": 0.7937679376793768, "grad_norm": 0.44128032778263787, "learning_rate": 2.1495550262202645e-05, "loss": 1.5384, "step": 3872 }, { "epoch": 0.7939729397293973, "grad_norm": 0.40817953139629315, "learning_rate": 2.145443406734542e-05, "loss": 1.4615, "step": 3873 }, { "epoch": 0.7941779417794178, "grad_norm": 0.46034884654284425, "learning_rate": 2.141335250751331e-05, "loss": 1.5602, "step": 3874 }, { "epoch": 0.7943829438294383, "grad_norm": 0.43680395226362767, "learning_rate": 2.1372305600821353e-05, "loss": 1.5229, "step": 3875 }, { "epoch": 0.7945879458794588, "grad_norm": 0.42813043860691685, "learning_rate": 2.133129336536944e-05, "loss": 1.5277, "step": 3876 }, { "epoch": 0.7947929479294793, "grad_norm": 0.42083255647281076, "learning_rate": 2.1290315819242067e-05, "loss": 1.5161, "step": 3877 }, { "epoch": 0.7949979499794998, "grad_norm": 0.4183760460201756, "learning_rate": 2.124937298050843e-05, "loss": 1.4829, "step": 3878 }, { "epoch": 0.7952029520295203, "grad_norm": 0.43184957674153296, "learning_rate": 2.1208464867222544e-05, "loss": 1.517, "step": 3879 }, { "epoch": 0.7954079540795408, "grad_norm": 0.4963474553298808, "learning_rate": 2.1167591497422943e-05, "loss": 1.5076, "step": 3880 }, { "epoch": 0.7956129561295613, "grad_norm": 0.4104173597290443, "learning_rate": 2.1126752889133007e-05, "loss": 1.4726, "step": 3881 }, { "epoch": 0.7958179581795818, "grad_norm": 0.4510374130605548, "learning_rate": 2.1085949060360654e-05, "loss": 1.484, "step": 3882 }, { "epoch": 0.7960229602296023, "grad_norm": 0.43901542900714546, "learning_rate": 2.104518002909851e-05, "loss": 1.5393, "step": 3883 }, { "epoch": 0.7962279622796228, "grad_norm": 0.43084078735416814, "learning_rate": 2.1004445813323904e-05, "loss": 1.531, "step": 3884 }, { "epoch": 0.7964329643296433, "grad_norm": 0.4016919037064875, "learning_rate": 2.0963746430998756e-05, "loss": 1.4731, "step": 3885 }, { "epoch": 0.7966379663796638, "grad_norm": 0.45113990388412745, "learning_rate": 2.0923081900069618e-05, "loss": 1.4641, "step": 3886 }, { "epoch": 0.7968429684296843, "grad_norm": 0.4419446300529957, "learning_rate": 2.0882452238467755e-05, "loss": 1.487, "step": 3887 }, { "epoch": 0.7970479704797048, "grad_norm": 0.3995835644738447, "learning_rate": 2.084185746410894e-05, "loss": 1.4708, "step": 3888 }, { "epoch": 0.7972529725297253, "grad_norm": 0.4434416885001936, "learning_rate": 2.0801297594893687e-05, "loss": 1.5754, "step": 3889 }, { "epoch": 0.7974579745797458, "grad_norm": 0.47581857973876973, "learning_rate": 2.0760772648707016e-05, "loss": 1.5696, "step": 3890 }, { "epoch": 0.7976629766297663, "grad_norm": 0.4605248574137105, "learning_rate": 2.0720282643418576e-05, "loss": 1.5073, "step": 3891 }, { "epoch": 0.7978679786797868, "grad_norm": 0.4230751268743984, "learning_rate": 2.0679827596882663e-05, "loss": 1.4895, "step": 3892 }, { "epoch": 0.7980729807298073, "grad_norm": 0.4258769779341127, "learning_rate": 2.0639407526938082e-05, "loss": 1.4905, "step": 3893 }, { "epoch": 0.7982779827798278, "grad_norm": 0.4480096158252072, "learning_rate": 2.0599022451408222e-05, "loss": 1.4983, "step": 3894 }, { "epoch": 0.7984829848298483, "grad_norm": 0.4550163765958765, "learning_rate": 2.055867238810113e-05, "loss": 1.5582, "step": 3895 }, { "epoch": 0.7986879868798687, "grad_norm": 0.4391129718992249, "learning_rate": 2.0518357354809293e-05, "loss": 1.4178, "step": 3896 }, { "epoch": 0.7988929889298892, "grad_norm": 0.4421320958025758, "learning_rate": 2.0478077369309855e-05, "loss": 1.4899, "step": 3897 }, { "epoch": 0.7990979909799097, "grad_norm": 0.4022570824055907, "learning_rate": 2.0437832449364447e-05, "loss": 1.515, "step": 3898 }, { "epoch": 0.7993029930299304, "grad_norm": 0.4054839765666077, "learning_rate": 2.0397622612719202e-05, "loss": 1.4254, "step": 3899 }, { "epoch": 0.7995079950799509, "grad_norm": 0.4223934393524632, "learning_rate": 2.0357447877104895e-05, "loss": 1.4388, "step": 3900 }, { "epoch": 0.7997129971299713, "grad_norm": 0.41923905777489334, "learning_rate": 2.0317308260236732e-05, "loss": 1.4717, "step": 3901 }, { "epoch": 0.7999179991799918, "grad_norm": 0.4094556483191631, "learning_rate": 2.0277203779814447e-05, "loss": 1.5066, "step": 3902 }, { "epoch": 0.8001230012300123, "grad_norm": 0.4396441396721408, "learning_rate": 2.023713445352232e-05, "loss": 1.4918, "step": 3903 }, { "epoch": 0.8003280032800328, "grad_norm": 0.42859447348634844, "learning_rate": 2.0197100299029058e-05, "loss": 1.5656, "step": 3904 }, { "epoch": 0.8005330053300533, "grad_norm": 0.4468662706630017, "learning_rate": 2.015710133398797e-05, "loss": 1.5262, "step": 3905 }, { "epoch": 0.8007380073800738, "grad_norm": 0.43107449757424876, "learning_rate": 2.011713757603675e-05, "loss": 1.5235, "step": 3906 }, { "epoch": 0.8009430094300943, "grad_norm": 0.4134033966088403, "learning_rate": 2.0077209042797562e-05, "loss": 1.5365, "step": 3907 }, { "epoch": 0.8011480114801148, "grad_norm": 0.44781800542569394, "learning_rate": 2.003731575187714e-05, "loss": 1.5678, "step": 3908 }, { "epoch": 0.8013530135301353, "grad_norm": 0.4370194704013605, "learning_rate": 1.999745772086655e-05, "loss": 1.4628, "step": 3909 }, { "epoch": 0.8015580155801558, "grad_norm": 0.45195350027248254, "learning_rate": 1.995763496734143e-05, "loss": 1.47, "step": 3910 }, { "epoch": 0.8017630176301763, "grad_norm": 0.46484496837610184, "learning_rate": 1.9917847508861775e-05, "loss": 1.5817, "step": 3911 }, { "epoch": 0.8019680196801968, "grad_norm": 0.43686203379316896, "learning_rate": 1.9878095362972037e-05, "loss": 1.515, "step": 3912 }, { "epoch": 0.8021730217302173, "grad_norm": 0.3990794971106179, "learning_rate": 1.9838378547201132e-05, "loss": 1.4321, "step": 3913 }, { "epoch": 0.8023780237802378, "grad_norm": 0.38340188102664985, "learning_rate": 1.9798697079062332e-05, "loss": 1.5401, "step": 3914 }, { "epoch": 0.8025830258302583, "grad_norm": 0.41838839024518004, "learning_rate": 1.9759050976053407e-05, "loss": 1.4874, "step": 3915 }, { "epoch": 0.8027880278802788, "grad_norm": 0.39920374043593765, "learning_rate": 1.9719440255656474e-05, "loss": 1.4701, "step": 3916 }, { "epoch": 0.8029930299302993, "grad_norm": 0.4104848011698136, "learning_rate": 1.9679864935338042e-05, "loss": 1.5035, "step": 3917 }, { "epoch": 0.8031980319803198, "grad_norm": 0.41447789993178596, "learning_rate": 1.9640325032549024e-05, "loss": 1.4034, "step": 3918 }, { "epoch": 0.8034030340303403, "grad_norm": 0.42359174165888985, "learning_rate": 1.960082056472473e-05, "loss": 1.5512, "step": 3919 }, { "epoch": 0.8036080360803608, "grad_norm": 0.4631687679898118, "learning_rate": 1.956135154928487e-05, "loss": 1.5347, "step": 3920 }, { "epoch": 0.8038130381303813, "grad_norm": 0.405143043430309, "learning_rate": 1.9521918003633442e-05, "loss": 1.502, "step": 3921 }, { "epoch": 0.8040180401804018, "grad_norm": 0.4242304095629188, "learning_rate": 1.9482519945158872e-05, "loss": 1.5119, "step": 3922 }, { "epoch": 0.8042230422304223, "grad_norm": 0.4420297907550419, "learning_rate": 1.944315739123388e-05, "loss": 1.4655, "step": 3923 }, { "epoch": 0.8044280442804428, "grad_norm": 0.4531359342668859, "learning_rate": 1.940383035921558e-05, "loss": 1.5734, "step": 3924 }, { "epoch": 0.8046330463304633, "grad_norm": 0.40392690504627216, "learning_rate": 1.9364538866445436e-05, "loss": 1.4831, "step": 3925 }, { "epoch": 0.8048380483804838, "grad_norm": 0.4186726502831277, "learning_rate": 1.93252829302492e-05, "loss": 1.4891, "step": 3926 }, { "epoch": 0.8050430504305043, "grad_norm": 0.4193498525156862, "learning_rate": 1.9286062567936937e-05, "loss": 1.3818, "step": 3927 }, { "epoch": 0.8052480524805248, "grad_norm": 0.407285709244237, "learning_rate": 1.924687779680302e-05, "loss": 1.4818, "step": 3928 }, { "epoch": 0.8054530545305453, "grad_norm": 0.42675081334514614, "learning_rate": 1.9207728634126187e-05, "loss": 1.5283, "step": 3929 }, { "epoch": 0.8056580565805658, "grad_norm": 0.43830907293385163, "learning_rate": 1.916861509716945e-05, "loss": 1.5343, "step": 3930 }, { "epoch": 0.8058630586305863, "grad_norm": 0.43622926126059786, "learning_rate": 1.9129537203180102e-05, "loss": 1.4821, "step": 3931 }, { "epoch": 0.8060680606806068, "grad_norm": 0.40695212633150213, "learning_rate": 1.9090494969389696e-05, "loss": 1.4858, "step": 3932 }, { "epoch": 0.8062730627306273, "grad_norm": 0.44589757663149765, "learning_rate": 1.9051488413014064e-05, "loss": 1.5816, "step": 3933 }, { "epoch": 0.8064780647806478, "grad_norm": 0.3736714034831028, "learning_rate": 1.901251755125335e-05, "loss": 1.4148, "step": 3934 }, { "epoch": 0.8066830668306683, "grad_norm": 0.4262764833584188, "learning_rate": 1.8973582401291967e-05, "loss": 1.4005, "step": 3935 }, { "epoch": 0.8068880688806888, "grad_norm": 0.4119971066379156, "learning_rate": 1.8934682980298502e-05, "loss": 1.4796, "step": 3936 }, { "epoch": 0.8070930709307093, "grad_norm": 0.4056155244054997, "learning_rate": 1.8895819305425856e-05, "loss": 1.4493, "step": 3937 }, { "epoch": 0.8072980729807298, "grad_norm": 0.437297861093439, "learning_rate": 1.8856991393811097e-05, "loss": 1.5224, "step": 3938 }, { "epoch": 0.8075030750307504, "grad_norm": 0.427907085522041, "learning_rate": 1.88181992625756e-05, "loss": 1.5368, "step": 3939 }, { "epoch": 0.8077080770807709, "grad_norm": 0.4026043917708178, "learning_rate": 1.8779442928824963e-05, "loss": 1.4875, "step": 3940 }, { "epoch": 0.8079130791307914, "grad_norm": 0.4175682348813712, "learning_rate": 1.8740722409648947e-05, "loss": 1.4663, "step": 3941 }, { "epoch": 0.8081180811808119, "grad_norm": 0.44468124745085347, "learning_rate": 1.8702037722121523e-05, "loss": 1.5126, "step": 3942 }, { "epoch": 0.8083230832308324, "grad_norm": 0.4554108651114654, "learning_rate": 1.8663388883300905e-05, "loss": 1.5663, "step": 3943 }, { "epoch": 0.8085280852808528, "grad_norm": 0.4346400885220679, "learning_rate": 1.8624775910229418e-05, "loss": 1.4504, "step": 3944 }, { "epoch": 0.8087330873308733, "grad_norm": 0.4289981398511209, "learning_rate": 1.8586198819933686e-05, "loss": 1.4381, "step": 3945 }, { "epoch": 0.8089380893808938, "grad_norm": 0.4515270386667911, "learning_rate": 1.854765762942445e-05, "loss": 1.5434, "step": 3946 }, { "epoch": 0.8091430914309143, "grad_norm": 0.43332729340732434, "learning_rate": 1.8509152355696623e-05, "loss": 1.491, "step": 3947 }, { "epoch": 0.8093480934809348, "grad_norm": 0.4365381482984787, "learning_rate": 1.8470683015729272e-05, "loss": 1.4699, "step": 3948 }, { "epoch": 0.8095530955309553, "grad_norm": 0.39474061589884146, "learning_rate": 1.84322496264856e-05, "loss": 1.4424, "step": 3949 }, { "epoch": 0.8097580975809758, "grad_norm": 0.418374954458447, "learning_rate": 1.8393852204912997e-05, "loss": 1.4725, "step": 3950 }, { "epoch": 0.8099630996309963, "grad_norm": 0.47782494393803604, "learning_rate": 1.8355490767943028e-05, "loss": 1.5437, "step": 3951 }, { "epoch": 0.8101681016810168, "grad_norm": 0.42455096584701413, "learning_rate": 1.8317165332491303e-05, "loss": 1.5727, "step": 3952 }, { "epoch": 0.8103731037310373, "grad_norm": 0.4425374586757662, "learning_rate": 1.8278875915457618e-05, "loss": 1.4644, "step": 3953 }, { "epoch": 0.8105781057810578, "grad_norm": 0.3935435645028859, "learning_rate": 1.8240622533725814e-05, "loss": 1.5015, "step": 3954 }, { "epoch": 0.8107831078310783, "grad_norm": 0.47922103077379513, "learning_rate": 1.820240520416394e-05, "loss": 1.5803, "step": 3955 }, { "epoch": 0.8109881098810988, "grad_norm": 0.44487982531220893, "learning_rate": 1.8164223943624113e-05, "loss": 1.5156, "step": 3956 }, { "epoch": 0.8111931119311193, "grad_norm": 0.43633927709533915, "learning_rate": 1.8126078768942512e-05, "loss": 1.4503, "step": 3957 }, { "epoch": 0.8113981139811398, "grad_norm": 0.43501552606041066, "learning_rate": 1.8087969696939433e-05, "loss": 1.4699, "step": 3958 }, { "epoch": 0.8116031160311603, "grad_norm": 0.43768237180377356, "learning_rate": 1.8049896744419216e-05, "loss": 1.5217, "step": 3959 }, { "epoch": 0.8118081180811808, "grad_norm": 0.4233845148180157, "learning_rate": 1.801185992817034e-05, "loss": 1.4626, "step": 3960 }, { "epoch": 0.8120131201312013, "grad_norm": 0.5149738101072708, "learning_rate": 1.7973859264965288e-05, "loss": 1.537, "step": 3961 }, { "epoch": 0.8122181221812218, "grad_norm": 0.4469575878396721, "learning_rate": 1.7935894771560647e-05, "loss": 1.4982, "step": 3962 }, { "epoch": 0.8124231242312423, "grad_norm": 0.40035316266162, "learning_rate": 1.7897966464697034e-05, "loss": 1.4477, "step": 3963 }, { "epoch": 0.8126281262812628, "grad_norm": 0.3951811453006624, "learning_rate": 1.7860074361099067e-05, "loss": 1.4195, "step": 3964 }, { "epoch": 0.8128331283312833, "grad_norm": 0.4246267451127877, "learning_rate": 1.7822218477475494e-05, "loss": 1.5358, "step": 3965 }, { "epoch": 0.8130381303813038, "grad_norm": 0.43655999424816394, "learning_rate": 1.7784398830519e-05, "loss": 1.4701, "step": 3966 }, { "epoch": 0.8132431324313243, "grad_norm": 0.43946568029204325, "learning_rate": 1.7746615436906365e-05, "loss": 1.4982, "step": 3967 }, { "epoch": 0.8134481344813448, "grad_norm": 0.4001602392142344, "learning_rate": 1.7708868313298332e-05, "loss": 1.4555, "step": 3968 }, { "epoch": 0.8136531365313653, "grad_norm": 0.4371306036918452, "learning_rate": 1.767115747633965e-05, "loss": 1.5094, "step": 3969 }, { "epoch": 0.8138581385813858, "grad_norm": 0.45326464186107945, "learning_rate": 1.763348294265912e-05, "loss": 1.5277, "step": 3970 }, { "epoch": 0.8140631406314063, "grad_norm": 0.3970819593068182, "learning_rate": 1.7595844728869448e-05, "loss": 1.441, "step": 3971 }, { "epoch": 0.8142681426814268, "grad_norm": 0.39982819099467, "learning_rate": 1.7558242851567442e-05, "loss": 1.4894, "step": 3972 }, { "epoch": 0.8144731447314473, "grad_norm": 0.44818881706609065, "learning_rate": 1.752067732733378e-05, "loss": 1.4921, "step": 3973 }, { "epoch": 0.8146781467814678, "grad_norm": 0.3899482663502815, "learning_rate": 1.7483148172733145e-05, "loss": 1.4848, "step": 3974 }, { "epoch": 0.8148831488314883, "grad_norm": 0.4363047074612505, "learning_rate": 1.7445655404314208e-05, "loss": 1.5074, "step": 3975 }, { "epoch": 0.8150881508815088, "grad_norm": 0.45261111833740086, "learning_rate": 1.7408199038609586e-05, "loss": 1.5241, "step": 3976 }, { "epoch": 0.8152931529315293, "grad_norm": 0.45222049740683995, "learning_rate": 1.737077909213579e-05, "loss": 1.5032, "step": 3977 }, { "epoch": 0.8154981549815498, "grad_norm": 0.4480219783858042, "learning_rate": 1.7333395581393365e-05, "loss": 1.5194, "step": 3978 }, { "epoch": 0.8157031570315704, "grad_norm": 0.4117301766459738, "learning_rate": 1.7296048522866692e-05, "loss": 1.4641, "step": 3979 }, { "epoch": 0.8159081590815909, "grad_norm": 0.46184463055887004, "learning_rate": 1.7258737933024182e-05, "loss": 1.567, "step": 3980 }, { "epoch": 0.8161131611316114, "grad_norm": 0.42427677205086417, "learning_rate": 1.7221463828318073e-05, "loss": 1.4557, "step": 3981 }, { "epoch": 0.8163181631816319, "grad_norm": 0.42847948146774567, "learning_rate": 1.718422622518455e-05, "loss": 1.5771, "step": 3982 }, { "epoch": 0.8165231652316524, "grad_norm": 0.41843569083962945, "learning_rate": 1.7147025140043727e-05, "loss": 1.4736, "step": 3983 }, { "epoch": 0.8167281672816729, "grad_norm": 0.41914757382645457, "learning_rate": 1.7109860589299552e-05, "loss": 1.4841, "step": 3984 }, { "epoch": 0.8169331693316934, "grad_norm": 0.41093866418822345, "learning_rate": 1.7072732589339955e-05, "loss": 1.4927, "step": 3985 }, { "epoch": 0.8171381713817139, "grad_norm": 0.4060646927708431, "learning_rate": 1.7035641156536675e-05, "loss": 1.5237, "step": 3986 }, { "epoch": 0.8173431734317343, "grad_norm": 0.4553394166052772, "learning_rate": 1.6998586307245313e-05, "loss": 1.5359, "step": 3987 }, { "epoch": 0.8175481754817548, "grad_norm": 0.48167049978287974, "learning_rate": 1.696156805780543e-05, "loss": 1.492, "step": 3988 }, { "epoch": 0.8177531775317753, "grad_norm": 0.45061653390771095, "learning_rate": 1.6924586424540346e-05, "loss": 1.5123, "step": 3989 }, { "epoch": 0.8179581795817958, "grad_norm": 0.38615392837604146, "learning_rate": 1.688764142375733e-05, "loss": 1.4522, "step": 3990 }, { "epoch": 0.8181631816318163, "grad_norm": 0.40736554512568307, "learning_rate": 1.6850733071747405e-05, "loss": 1.4938, "step": 3991 }, { "epoch": 0.8183681836818368, "grad_norm": 0.40560956105470275, "learning_rate": 1.68138613847855e-05, "loss": 1.5218, "step": 3992 }, { "epoch": 0.8185731857318573, "grad_norm": 0.41524187637052146, "learning_rate": 1.6777026379130324e-05, "loss": 1.426, "step": 3993 }, { "epoch": 0.8187781877818778, "grad_norm": 0.4325522247923781, "learning_rate": 1.6740228071024454e-05, "loss": 1.5093, "step": 3994 }, { "epoch": 0.8189831898318983, "grad_norm": 0.40411280399317184, "learning_rate": 1.6703466476694307e-05, "loss": 1.4346, "step": 3995 }, { "epoch": 0.8191881918819188, "grad_norm": 0.435211648607629, "learning_rate": 1.6666741612350034e-05, "loss": 1.5092, "step": 3996 }, { "epoch": 0.8193931939319393, "grad_norm": 0.4101536614618035, "learning_rate": 1.663005349418566e-05, "loss": 1.4298, "step": 3997 }, { "epoch": 0.8195981959819598, "grad_norm": 0.4396975459223735, "learning_rate": 1.6593402138378934e-05, "loss": 1.4857, "step": 3998 }, { "epoch": 0.8198031980319803, "grad_norm": 0.430574743329003, "learning_rate": 1.6556787561091492e-05, "loss": 1.4968, "step": 3999 }, { "epoch": 0.8200082000820008, "grad_norm": 0.40527803868456297, "learning_rate": 1.652020977846864e-05, "loss": 1.4781, "step": 4000 }, { "epoch": 0.8202132021320213, "grad_norm": 0.40336534632349746, "learning_rate": 1.6483668806639584e-05, "loss": 1.482, "step": 4001 }, { "epoch": 0.8204182041820418, "grad_norm": 0.43810860147071806, "learning_rate": 1.6447164661717197e-05, "loss": 1.4851, "step": 4002 }, { "epoch": 0.8206232062320623, "grad_norm": 0.4659763698416814, "learning_rate": 1.6410697359798122e-05, "loss": 1.5014, "step": 4003 }, { "epoch": 0.8208282082820828, "grad_norm": 0.4008227714251599, "learning_rate": 1.6374266916962832e-05, "loss": 1.4652, "step": 4004 }, { "epoch": 0.8210332103321033, "grad_norm": 0.44413733118072135, "learning_rate": 1.6337873349275456e-05, "loss": 1.4906, "step": 4005 }, { "epoch": 0.8212382123821238, "grad_norm": 0.4387813098192902, "learning_rate": 1.6301516672783945e-05, "loss": 1.5313, "step": 4006 }, { "epoch": 0.8214432144321443, "grad_norm": 0.43535659339060073, "learning_rate": 1.6265196903519918e-05, "loss": 1.5104, "step": 4007 }, { "epoch": 0.8216482164821648, "grad_norm": 0.4231400775472947, "learning_rate": 1.6228914057498746e-05, "loss": 1.4356, "step": 4008 }, { "epoch": 0.8218532185321853, "grad_norm": 0.4139599366722531, "learning_rate": 1.619266815071948e-05, "loss": 1.499, "step": 4009 }, { "epoch": 0.8220582205822058, "grad_norm": 0.42914480568592833, "learning_rate": 1.6156459199164952e-05, "loss": 1.4612, "step": 4010 }, { "epoch": 0.8222632226322263, "grad_norm": 0.4251087675544272, "learning_rate": 1.612028721880169e-05, "loss": 1.4902, "step": 4011 }, { "epoch": 0.8224682246822468, "grad_norm": 0.45028571857062394, "learning_rate": 1.6084152225579863e-05, "loss": 1.5486, "step": 4012 }, { "epoch": 0.8226732267322673, "grad_norm": 0.43502191375973126, "learning_rate": 1.6048054235433364e-05, "loss": 1.5055, "step": 4013 }, { "epoch": 0.8228782287822878, "grad_norm": 0.40416886315713924, "learning_rate": 1.6011993264279734e-05, "loss": 1.4766, "step": 4014 }, { "epoch": 0.8230832308323083, "grad_norm": 0.44534120537498745, "learning_rate": 1.5975969328020257e-05, "loss": 1.4721, "step": 4015 }, { "epoch": 0.8232882328823288, "grad_norm": 0.43038848235478067, "learning_rate": 1.593998244253988e-05, "loss": 1.4657, "step": 4016 }, { "epoch": 0.8234932349323493, "grad_norm": 0.4447565003571557, "learning_rate": 1.5904032623707144e-05, "loss": 1.5198, "step": 4017 }, { "epoch": 0.8236982369823698, "grad_norm": 0.41264652318624195, "learning_rate": 1.58681198873743e-05, "loss": 1.4702, "step": 4018 }, { "epoch": 0.8239032390323904, "grad_norm": 0.39932582475887524, "learning_rate": 1.5832244249377204e-05, "loss": 1.4538, "step": 4019 }, { "epoch": 0.8241082410824109, "grad_norm": 0.4157218665292185, "learning_rate": 1.5796405725535402e-05, "loss": 1.5086, "step": 4020 }, { "epoch": 0.8243132431324314, "grad_norm": 0.4526047443566796, "learning_rate": 1.5760604331652075e-05, "loss": 1.5082, "step": 4021 }, { "epoch": 0.8245182451824519, "grad_norm": 0.4309206360954103, "learning_rate": 1.5724840083514005e-05, "loss": 1.5425, "step": 4022 }, { "epoch": 0.8247232472324724, "grad_norm": 0.44352321613135137, "learning_rate": 1.5689112996891576e-05, "loss": 1.5858, "step": 4023 }, { "epoch": 0.8249282492824929, "grad_norm": 0.4273080895507938, "learning_rate": 1.56534230875388e-05, "loss": 1.4687, "step": 4024 }, { "epoch": 0.8251332513325134, "grad_norm": 0.4649697050286364, "learning_rate": 1.561777037119333e-05, "loss": 1.5155, "step": 4025 }, { "epoch": 0.8253382533825339, "grad_norm": 0.43779085962740655, "learning_rate": 1.5582154863576414e-05, "loss": 1.4941, "step": 4026 }, { "epoch": 0.8255432554325544, "grad_norm": 0.42229959210095425, "learning_rate": 1.5546576580392846e-05, "loss": 1.485, "step": 4027 }, { "epoch": 0.8257482574825749, "grad_norm": 0.4050420990768643, "learning_rate": 1.551103553733104e-05, "loss": 1.4905, "step": 4028 }, { "epoch": 0.8259532595325954, "grad_norm": 0.39236556199383155, "learning_rate": 1.5475531750062955e-05, "loss": 1.4381, "step": 4029 }, { "epoch": 0.8261582615826158, "grad_norm": 0.4037580485948823, "learning_rate": 1.5440065234244162e-05, "loss": 1.5478, "step": 4030 }, { "epoch": 0.8263632636326363, "grad_norm": 0.42387433258676543, "learning_rate": 1.5404636005513805e-05, "loss": 1.5062, "step": 4031 }, { "epoch": 0.8265682656826568, "grad_norm": 0.4003384499584203, "learning_rate": 1.536924407949456e-05, "loss": 1.5205, "step": 4032 }, { "epoch": 0.8267732677326773, "grad_norm": 0.4461250096921476, "learning_rate": 1.533388947179264e-05, "loss": 1.5256, "step": 4033 }, { "epoch": 0.8269782697826978, "grad_norm": 0.4261393558615221, "learning_rate": 1.5298572197997797e-05, "loss": 1.5124, "step": 4034 }, { "epoch": 0.8271832718327183, "grad_norm": 0.4543693909621413, "learning_rate": 1.5263292273683404e-05, "loss": 1.5163, "step": 4035 }, { "epoch": 0.8273882738827388, "grad_norm": 0.394591475219828, "learning_rate": 1.5228049714406246e-05, "loss": 1.4583, "step": 4036 }, { "epoch": 0.8275932759327593, "grad_norm": 0.42123587137950136, "learning_rate": 1.5192844535706741e-05, "loss": 1.5258, "step": 4037 }, { "epoch": 0.8277982779827798, "grad_norm": 0.42469824137124834, "learning_rate": 1.5157676753108752e-05, "loss": 1.4822, "step": 4038 }, { "epoch": 0.8280032800328003, "grad_norm": 0.46304443157857894, "learning_rate": 1.512254638211964e-05, "loss": 1.4912, "step": 4039 }, { "epoch": 0.8282082820828208, "grad_norm": 0.41214509834924007, "learning_rate": 1.508745343823037e-05, "loss": 1.4867, "step": 4040 }, { "epoch": 0.8284132841328413, "grad_norm": 0.43337673883934186, "learning_rate": 1.5052397936915264e-05, "loss": 1.5154, "step": 4041 }, { "epoch": 0.8286182861828618, "grad_norm": 0.42173127670929955, "learning_rate": 1.5017379893632255e-05, "loss": 1.4693, "step": 4042 }, { "epoch": 0.8288232882328823, "grad_norm": 0.40105356786212654, "learning_rate": 1.4982399323822705e-05, "loss": 1.4558, "step": 4043 }, { "epoch": 0.8290282902829028, "grad_norm": 0.4134886774282266, "learning_rate": 1.4947456242911406e-05, "loss": 1.4603, "step": 4044 }, { "epoch": 0.8292332923329233, "grad_norm": 0.407878632342707, "learning_rate": 1.4912550666306747e-05, "loss": 1.4935, "step": 4045 }, { "epoch": 0.8294382943829438, "grad_norm": 0.4287419898274544, "learning_rate": 1.4877682609400423e-05, "loss": 1.4902, "step": 4046 }, { "epoch": 0.8296432964329643, "grad_norm": 0.4287778705999938, "learning_rate": 1.4842852087567727e-05, "loss": 1.539, "step": 4047 }, { "epoch": 0.8298482984829848, "grad_norm": 0.39679546858806153, "learning_rate": 1.4808059116167305e-05, "loss": 1.4377, "step": 4048 }, { "epoch": 0.8300533005330053, "grad_norm": 0.40336653116303844, "learning_rate": 1.4773303710541275e-05, "loss": 1.4537, "step": 4049 }, { "epoch": 0.8302583025830258, "grad_norm": 0.4434921199495249, "learning_rate": 1.4738585886015178e-05, "loss": 1.4598, "step": 4050 }, { "epoch": 0.8304633046330463, "grad_norm": 0.4862007546427389, "learning_rate": 1.4703905657898043e-05, "loss": 1.5946, "step": 4051 }, { "epoch": 0.8306683066830668, "grad_norm": 0.4508330156615731, "learning_rate": 1.4669263041482218e-05, "loss": 1.556, "step": 4052 }, { "epoch": 0.8308733087330873, "grad_norm": 0.40871876236025084, "learning_rate": 1.4634658052043582e-05, "loss": 1.4902, "step": 4053 }, { "epoch": 0.8310783107831078, "grad_norm": 0.40849565334971594, "learning_rate": 1.4600090704841318e-05, "loss": 1.4955, "step": 4054 }, { "epoch": 0.8312833128331283, "grad_norm": 0.44929879887737095, "learning_rate": 1.4565561015118057e-05, "loss": 1.5442, "step": 4055 }, { "epoch": 0.8314883148831488, "grad_norm": 0.44082578485236096, "learning_rate": 1.453106899809985e-05, "loss": 1.5276, "step": 4056 }, { "epoch": 0.8316933169331693, "grad_norm": 0.41489356572284175, "learning_rate": 1.4496614668996077e-05, "loss": 1.4893, "step": 4057 }, { "epoch": 0.8318983189831898, "grad_norm": 0.44764787361825054, "learning_rate": 1.4462198042999565e-05, "loss": 1.4971, "step": 4058 }, { "epoch": 0.8321033210332104, "grad_norm": 0.397129027366937, "learning_rate": 1.4427819135286469e-05, "loss": 1.4764, "step": 4059 }, { "epoch": 0.8323083230832309, "grad_norm": 0.44725605325884854, "learning_rate": 1.43934779610163e-05, "loss": 1.5393, "step": 4060 }, { "epoch": 0.8325133251332514, "grad_norm": 0.4017898606940109, "learning_rate": 1.4359174535331999e-05, "loss": 1.5071, "step": 4061 }, { "epoch": 0.8327183271832719, "grad_norm": 0.4165684258014967, "learning_rate": 1.4324908873359766e-05, "loss": 1.514, "step": 4062 }, { "epoch": 0.8329233292332924, "grad_norm": 0.4608710002230284, "learning_rate": 1.429068099020926e-05, "loss": 1.5183, "step": 4063 }, { "epoch": 0.8331283312833129, "grad_norm": 0.4607040741739288, "learning_rate": 1.4256490900973385e-05, "loss": 1.4828, "step": 4064 }, { "epoch": 0.8333333333333334, "grad_norm": 0.4067657365697677, "learning_rate": 1.4222338620728404e-05, "loss": 1.5249, "step": 4065 }, { "epoch": 0.8335383353833539, "grad_norm": 0.4312227878730755, "learning_rate": 1.418822416453397e-05, "loss": 1.4182, "step": 4066 }, { "epoch": 0.8337433374333744, "grad_norm": 0.4260066857339835, "learning_rate": 1.4154147547432971e-05, "loss": 1.4421, "step": 4067 }, { "epoch": 0.8339483394833949, "grad_norm": 0.44743336030759656, "learning_rate": 1.4120108784451625e-05, "loss": 1.4926, "step": 4068 }, { "epoch": 0.8341533415334154, "grad_norm": 0.41742696939301815, "learning_rate": 1.4086107890599543e-05, "loss": 1.5156, "step": 4069 }, { "epoch": 0.8343583435834359, "grad_norm": 0.41146580888849155, "learning_rate": 1.405214488086951e-05, "loss": 1.4777, "step": 4070 }, { "epoch": 0.8345633456334564, "grad_norm": 0.4748583072519751, "learning_rate": 1.4018219770237717e-05, "loss": 1.5351, "step": 4071 }, { "epoch": 0.8347683476834769, "grad_norm": 0.4219038451439774, "learning_rate": 1.3984332573663584e-05, "loss": 1.455, "step": 4072 }, { "epoch": 0.8349733497334973, "grad_norm": 0.4327922181388339, "learning_rate": 1.39504833060898e-05, "loss": 1.4365, "step": 4073 }, { "epoch": 0.8351783517835178, "grad_norm": 0.4239352174034709, "learning_rate": 1.3916671982442386e-05, "loss": 1.5566, "step": 4074 }, { "epoch": 0.8353833538335383, "grad_norm": 0.40703109422690775, "learning_rate": 1.3882898617630569e-05, "loss": 1.4885, "step": 4075 }, { "epoch": 0.8355883558835588, "grad_norm": 0.44321100817299836, "learning_rate": 1.3849163226546902e-05, "loss": 1.4935, "step": 4076 }, { "epoch": 0.8357933579335793, "grad_norm": 0.411309352068566, "learning_rate": 1.3815465824067153e-05, "loss": 1.4615, "step": 4077 }, { "epoch": 0.8359983599835998, "grad_norm": 0.4211245698043953, "learning_rate": 1.3781806425050303e-05, "loss": 1.5172, "step": 4078 }, { "epoch": 0.8362033620336203, "grad_norm": 0.4185606618630242, "learning_rate": 1.3748185044338669e-05, "loss": 1.4531, "step": 4079 }, { "epoch": 0.8364083640836408, "grad_norm": 0.4270472197962592, "learning_rate": 1.3714601696757712e-05, "loss": 1.5748, "step": 4080 }, { "epoch": 0.8366133661336613, "grad_norm": 0.42700289622647514, "learning_rate": 1.3681056397116198e-05, "loss": 1.4934, "step": 4081 }, { "epoch": 0.8368183681836818, "grad_norm": 0.43607904301376715, "learning_rate": 1.3647549160206075e-05, "loss": 1.4756, "step": 4082 }, { "epoch": 0.8370233702337023, "grad_norm": 0.39851169789580204, "learning_rate": 1.3614080000802487e-05, "loss": 1.443, "step": 4083 }, { "epoch": 0.8372283722837228, "grad_norm": 0.43119601429217375, "learning_rate": 1.358064893366382e-05, "loss": 1.5449, "step": 4084 }, { "epoch": 0.8374333743337433, "grad_norm": 0.4251188737325228, "learning_rate": 1.3547255973531648e-05, "loss": 1.505, "step": 4085 }, { "epoch": 0.8376383763837638, "grad_norm": 0.46717160726417184, "learning_rate": 1.351390113513078e-05, "loss": 1.5261, "step": 4086 }, { "epoch": 0.8378433784337843, "grad_norm": 0.44398503012810975, "learning_rate": 1.3480584433169174e-05, "loss": 1.5036, "step": 4087 }, { "epoch": 0.8380483804838048, "grad_norm": 0.43747392112788513, "learning_rate": 1.3447305882337968e-05, "loss": 1.4621, "step": 4088 }, { "epoch": 0.8382533825338253, "grad_norm": 0.4424733247384556, "learning_rate": 1.3414065497311478e-05, "loss": 1.5323, "step": 4089 }, { "epoch": 0.8384583845838458, "grad_norm": 0.43783215535801406, "learning_rate": 1.3380863292747214e-05, "loss": 1.472, "step": 4090 }, { "epoch": 0.8386633866338663, "grad_norm": 0.40336961353481776, "learning_rate": 1.3347699283285875e-05, "loss": 1.4637, "step": 4091 }, { "epoch": 0.8388683886838868, "grad_norm": 0.4323102209883956, "learning_rate": 1.331457348355125e-05, "loss": 1.5092, "step": 4092 }, { "epoch": 0.8390733907339073, "grad_norm": 0.41398547995065305, "learning_rate": 1.3281485908150315e-05, "loss": 1.513, "step": 4093 }, { "epoch": 0.8392783927839278, "grad_norm": 0.4104016327713903, "learning_rate": 1.3248436571673162e-05, "loss": 1.5075, "step": 4094 }, { "epoch": 0.8394833948339483, "grad_norm": 0.45945721944098505, "learning_rate": 1.3215425488693078e-05, "loss": 1.5099, "step": 4095 }, { "epoch": 0.8396883968839688, "grad_norm": 0.41985751920089753, "learning_rate": 1.3182452673766454e-05, "loss": 1.4774, "step": 4096 }, { "epoch": 0.8398933989339893, "grad_norm": 0.4424241484434764, "learning_rate": 1.3149518141432804e-05, "loss": 1.4572, "step": 4097 }, { "epoch": 0.8400984009840098, "grad_norm": 0.434656299403232, "learning_rate": 1.3116621906214743e-05, "loss": 1.4646, "step": 4098 }, { "epoch": 0.8403034030340304, "grad_norm": 0.4157358420521401, "learning_rate": 1.3083763982618025e-05, "loss": 1.4931, "step": 4099 }, { "epoch": 0.8405084050840509, "grad_norm": 0.4457006185067519, "learning_rate": 1.3050944385131447e-05, "loss": 1.5404, "step": 4100 }, { "epoch": 0.8407134071340714, "grad_norm": 0.44221406300249744, "learning_rate": 1.3018163128227057e-05, "loss": 1.5158, "step": 4101 }, { "epoch": 0.8409184091840919, "grad_norm": 0.4218222688641252, "learning_rate": 1.2985420226359846e-05, "loss": 1.4694, "step": 4102 }, { "epoch": 0.8411234112341124, "grad_norm": 0.46390991997664527, "learning_rate": 1.2952715693967964e-05, "loss": 1.5141, "step": 4103 }, { "epoch": 0.8413284132841329, "grad_norm": 0.4365061168954958, "learning_rate": 1.2920049545472602e-05, "loss": 1.4831, "step": 4104 }, { "epoch": 0.8415334153341534, "grad_norm": 0.3834868482231454, "learning_rate": 1.2887421795278044e-05, "loss": 1.4766, "step": 4105 }, { "epoch": 0.8417384173841739, "grad_norm": 0.4150280334364376, "learning_rate": 1.2854832457771648e-05, "loss": 1.4822, "step": 4106 }, { "epoch": 0.8419434194341944, "grad_norm": 0.4132803357544623, "learning_rate": 1.2822281547323867e-05, "loss": 1.4133, "step": 4107 }, { "epoch": 0.8421484214842149, "grad_norm": 0.4423716162927889, "learning_rate": 1.278976907828815e-05, "loss": 1.5493, "step": 4108 }, { "epoch": 0.8423534235342354, "grad_norm": 0.43658856717276107, "learning_rate": 1.2757295065001007e-05, "loss": 1.5239, "step": 4109 }, { "epoch": 0.8425584255842559, "grad_norm": 0.42346545413445763, "learning_rate": 1.2724859521781996e-05, "loss": 1.5179, "step": 4110 }, { "epoch": 0.8427634276342764, "grad_norm": 0.42778902649789075, "learning_rate": 1.269246246293374e-05, "loss": 1.401, "step": 4111 }, { "epoch": 0.8429684296842969, "grad_norm": 0.4473573092377989, "learning_rate": 1.2660103902741871e-05, "loss": 1.5053, "step": 4112 }, { "epoch": 0.8431734317343174, "grad_norm": 0.41684770909553376, "learning_rate": 1.262778385547504e-05, "loss": 1.5044, "step": 4113 }, { "epoch": 0.8433784337843379, "grad_norm": 0.4922006371516634, "learning_rate": 1.2595502335384912e-05, "loss": 1.4563, "step": 4114 }, { "epoch": 0.8435834358343584, "grad_norm": 0.43190174808785736, "learning_rate": 1.2563259356706147e-05, "loss": 1.5369, "step": 4115 }, { "epoch": 0.8437884378843789, "grad_norm": 0.4313134129047217, "learning_rate": 1.253105493365646e-05, "loss": 1.4374, "step": 4116 }, { "epoch": 0.8439934399343993, "grad_norm": 0.4023212907054902, "learning_rate": 1.2498889080436549e-05, "loss": 1.4541, "step": 4117 }, { "epoch": 0.8441984419844198, "grad_norm": 0.42201721492040123, "learning_rate": 1.2466761811230098e-05, "loss": 1.4871, "step": 4118 }, { "epoch": 0.8444034440344403, "grad_norm": 0.46135414242003453, "learning_rate": 1.2434673140203745e-05, "loss": 1.5035, "step": 4119 }, { "epoch": 0.8446084460844608, "grad_norm": 0.4290380524381426, "learning_rate": 1.2402623081507126e-05, "loss": 1.5248, "step": 4120 }, { "epoch": 0.8448134481344813, "grad_norm": 0.4125116008014693, "learning_rate": 1.2370611649272878e-05, "loss": 1.5291, "step": 4121 }, { "epoch": 0.8450184501845018, "grad_norm": 0.41585347883182866, "learning_rate": 1.2338638857616613e-05, "loss": 1.463, "step": 4122 }, { "epoch": 0.8452234522345223, "grad_norm": 0.4045722738621591, "learning_rate": 1.2306704720636852e-05, "loss": 1.4222, "step": 4123 }, { "epoch": 0.8454284542845428, "grad_norm": 0.48315250910190094, "learning_rate": 1.227480925241511e-05, "loss": 1.5062, "step": 4124 }, { "epoch": 0.8456334563345633, "grad_norm": 0.4262544886355197, "learning_rate": 1.22429524670158e-05, "loss": 1.5121, "step": 4125 }, { "epoch": 0.8458384583845838, "grad_norm": 0.4728518838554152, "learning_rate": 1.2211134378486378e-05, "loss": 1.4759, "step": 4126 }, { "epoch": 0.8460434604346043, "grad_norm": 0.38890553968018216, "learning_rate": 1.2179355000857119e-05, "loss": 1.506, "step": 4127 }, { "epoch": 0.8462484624846248, "grad_norm": 0.4217522643826517, "learning_rate": 1.2147614348141335e-05, "loss": 1.5279, "step": 4128 }, { "epoch": 0.8464534645346453, "grad_norm": 0.40840689414950304, "learning_rate": 1.2115912434335187e-05, "loss": 1.4975, "step": 4129 }, { "epoch": 0.8466584665846658, "grad_norm": 0.4337534256898972, "learning_rate": 1.2084249273417759e-05, "loss": 1.4863, "step": 4130 }, { "epoch": 0.8468634686346863, "grad_norm": 0.44411041644152915, "learning_rate": 1.2052624879351104e-05, "loss": 1.5731, "step": 4131 }, { "epoch": 0.8470684706847068, "grad_norm": 0.4020542945675568, "learning_rate": 1.2021039266080104e-05, "loss": 1.4646, "step": 4132 }, { "epoch": 0.8472734727347273, "grad_norm": 0.4031473741737517, "learning_rate": 1.1989492447532613e-05, "loss": 1.4506, "step": 4133 }, { "epoch": 0.8474784747847478, "grad_norm": 0.3980220562439092, "learning_rate": 1.195798443761933e-05, "loss": 1.4365, "step": 4134 }, { "epoch": 0.8476834768347683, "grad_norm": 0.44446476612360397, "learning_rate": 1.1926515250233839e-05, "loss": 1.5009, "step": 4135 }, { "epoch": 0.8478884788847888, "grad_norm": 0.4360524507061598, "learning_rate": 1.1895084899252663e-05, "loss": 1.4678, "step": 4136 }, { "epoch": 0.8480934809348093, "grad_norm": 0.4334059789136883, "learning_rate": 1.1863693398535114e-05, "loss": 1.4738, "step": 4137 }, { "epoch": 0.8482984829848298, "grad_norm": 0.3767589040866388, "learning_rate": 1.1832340761923444e-05, "loss": 1.4884, "step": 4138 }, { "epoch": 0.8485034850348504, "grad_norm": 0.42308555232716255, "learning_rate": 1.1801027003242749e-05, "loss": 1.4751, "step": 4139 }, { "epoch": 0.8487084870848709, "grad_norm": 0.37688539315308583, "learning_rate": 1.1769752136300927e-05, "loss": 1.4351, "step": 4140 }, { "epoch": 0.8489134891348914, "grad_norm": 0.40308373744729736, "learning_rate": 1.1738516174888836e-05, "loss": 1.4775, "step": 4141 }, { "epoch": 0.8491184911849119, "grad_norm": 0.4557699319332778, "learning_rate": 1.170731913278007e-05, "loss": 1.4945, "step": 4142 }, { "epoch": 0.8493234932349324, "grad_norm": 0.4262927329939197, "learning_rate": 1.1676161023731114e-05, "loss": 1.5031, "step": 4143 }, { "epoch": 0.8495284952849529, "grad_norm": 0.40261358364790967, "learning_rate": 1.1645041861481288e-05, "loss": 1.4495, "step": 4144 }, { "epoch": 0.8497334973349734, "grad_norm": 0.44112937158919285, "learning_rate": 1.1613961659752715e-05, "loss": 1.5159, "step": 4145 }, { "epoch": 0.8499384993849939, "grad_norm": 0.4204954968162001, "learning_rate": 1.1582920432250388e-05, "loss": 1.5136, "step": 4146 }, { "epoch": 0.8501435014350144, "grad_norm": 0.4125128231081385, "learning_rate": 1.1551918192662048e-05, "loss": 1.4665, "step": 4147 }, { "epoch": 0.8503485034850349, "grad_norm": 0.4337987555691318, "learning_rate": 1.1520954954658247e-05, "loss": 1.4972, "step": 4148 }, { "epoch": 0.8505535055350554, "grad_norm": 0.4546567480474969, "learning_rate": 1.149003073189242e-05, "loss": 1.5252, "step": 4149 }, { "epoch": 0.8507585075850759, "grad_norm": 0.4052719865626236, "learning_rate": 1.1459145538000705e-05, "loss": 1.4715, "step": 4150 }, { "epoch": 0.8509635096350964, "grad_norm": 0.4425343036806855, "learning_rate": 1.1428299386602104e-05, "loss": 1.5461, "step": 4151 }, { "epoch": 0.8511685116851169, "grad_norm": 0.4179351243946177, "learning_rate": 1.139749229129834e-05, "loss": 1.4199, "step": 4152 }, { "epoch": 0.8513735137351374, "grad_norm": 0.39347999398500727, "learning_rate": 1.1366724265673933e-05, "loss": 1.4521, "step": 4153 }, { "epoch": 0.8515785157851579, "grad_norm": 0.49832723323802064, "learning_rate": 1.1335995323296222e-05, "loss": 1.4861, "step": 4154 }, { "epoch": 0.8517835178351784, "grad_norm": 0.4168686029341923, "learning_rate": 1.1305305477715256e-05, "loss": 1.5202, "step": 4155 }, { "epoch": 0.8519885198851989, "grad_norm": 0.4824042221259518, "learning_rate": 1.1274654742463841e-05, "loss": 1.5418, "step": 4156 }, { "epoch": 0.8521935219352194, "grad_norm": 0.4225640926450485, "learning_rate": 1.1244043131057592e-05, "loss": 1.4781, "step": 4157 }, { "epoch": 0.8523985239852399, "grad_norm": 0.4083433149237101, "learning_rate": 1.1213470656994817e-05, "loss": 1.4527, "step": 4158 }, { "epoch": 0.8526035260352604, "grad_norm": 0.39471799456166173, "learning_rate": 1.1182937333756582e-05, "loss": 1.4227, "step": 4159 }, { "epoch": 0.8528085280852808, "grad_norm": 0.4162050440104547, "learning_rate": 1.1152443174806725e-05, "loss": 1.4783, "step": 4160 }, { "epoch": 0.8530135301353013, "grad_norm": 0.4929613126886529, "learning_rate": 1.1121988193591737e-05, "loss": 1.5586, "step": 4161 }, { "epoch": 0.8532185321853218, "grad_norm": 0.4340736942317783, "learning_rate": 1.109157240354094e-05, "loss": 1.4842, "step": 4162 }, { "epoch": 0.8534235342353423, "grad_norm": 0.41727988054721904, "learning_rate": 1.1061195818066284e-05, "loss": 1.4777, "step": 4163 }, { "epoch": 0.8536285362853628, "grad_norm": 0.43616071509035476, "learning_rate": 1.1030858450562442e-05, "loss": 1.4103, "step": 4164 }, { "epoch": 0.8538335383353833, "grad_norm": 0.4246995343847903, "learning_rate": 1.100056031440685e-05, "loss": 1.4944, "step": 4165 }, { "epoch": 0.8540385403854038, "grad_norm": 0.42822049962519326, "learning_rate": 1.0970301422959583e-05, "loss": 1.4971, "step": 4166 }, { "epoch": 0.8542435424354243, "grad_norm": 0.43015427213922725, "learning_rate": 1.0940081789563461e-05, "loss": 1.496, "step": 4167 }, { "epoch": 0.8544485444854448, "grad_norm": 0.4510875221798475, "learning_rate": 1.0909901427543968e-05, "loss": 1.5228, "step": 4168 }, { "epoch": 0.8546535465354653, "grad_norm": 0.41347933720167424, "learning_rate": 1.0879760350209234e-05, "loss": 1.501, "step": 4169 }, { "epoch": 0.8548585485854858, "grad_norm": 0.3948400745747476, "learning_rate": 1.0849658570850152e-05, "loss": 1.5529, "step": 4170 }, { "epoch": 0.8550635506355063, "grad_norm": 0.4064343396996718, "learning_rate": 1.0819596102740193e-05, "loss": 1.4636, "step": 4171 }, { "epoch": 0.8552685526855268, "grad_norm": 0.42274600906019216, "learning_rate": 1.0789572959135597e-05, "loss": 1.4089, "step": 4172 }, { "epoch": 0.8554735547355473, "grad_norm": 0.45452119318393625, "learning_rate": 1.0759589153275163e-05, "loss": 1.4974, "step": 4173 }, { "epoch": 0.8556785567855678, "grad_norm": 0.4309073461580129, "learning_rate": 1.0729644698380403e-05, "loss": 1.5576, "step": 4174 }, { "epoch": 0.8558835588355883, "grad_norm": 0.42434758651070265, "learning_rate": 1.0699739607655435e-05, "loss": 1.4597, "step": 4175 }, { "epoch": 0.8560885608856088, "grad_norm": 0.4077456640838372, "learning_rate": 1.0669873894287052e-05, "loss": 1.4549, "step": 4176 }, { "epoch": 0.8562935629356293, "grad_norm": 0.4380778822990617, "learning_rate": 1.0640047571444722e-05, "loss": 1.4855, "step": 4177 }, { "epoch": 0.8564985649856498, "grad_norm": 0.47499256880701546, "learning_rate": 1.0610260652280469e-05, "loss": 1.537, "step": 4178 }, { "epoch": 0.8567035670356704, "grad_norm": 0.4848525994042098, "learning_rate": 1.058051314992896e-05, "loss": 1.4817, "step": 4179 }, { "epoch": 0.8569085690856909, "grad_norm": 0.4382805146474575, "learning_rate": 1.0550805077507475e-05, "loss": 1.5312, "step": 4180 }, { "epoch": 0.8571135711357114, "grad_norm": 0.43980349710025407, "learning_rate": 1.0521136448115954e-05, "loss": 1.5196, "step": 4181 }, { "epoch": 0.8573185731857319, "grad_norm": 0.4279471456597939, "learning_rate": 1.049150727483692e-05, "loss": 1.4811, "step": 4182 }, { "epoch": 0.8575235752357524, "grad_norm": 0.42014260987782875, "learning_rate": 1.0461917570735491e-05, "loss": 1.4568, "step": 4183 }, { "epoch": 0.8577285772857729, "grad_norm": 0.41942169326765594, "learning_rate": 1.0432367348859362e-05, "loss": 1.4781, "step": 4184 }, { "epoch": 0.8579335793357934, "grad_norm": 0.40641492600052964, "learning_rate": 1.0402856622238832e-05, "loss": 1.499, "step": 4185 }, { "epoch": 0.8581385813858139, "grad_norm": 0.41570132072114757, "learning_rate": 1.0373385403886792e-05, "loss": 1.4406, "step": 4186 }, { "epoch": 0.8583435834358344, "grad_norm": 0.4223311677636492, "learning_rate": 1.034395370679876e-05, "loss": 1.4797, "step": 4187 }, { "epoch": 0.8585485854858549, "grad_norm": 0.42141961497704095, "learning_rate": 1.0314561543952729e-05, "loss": 1.4437, "step": 4188 }, { "epoch": 0.8587535875358754, "grad_norm": 0.4363700177051303, "learning_rate": 1.028520892830932e-05, "loss": 1.5351, "step": 4189 }, { "epoch": 0.8589585895858959, "grad_norm": 0.4385994172438943, "learning_rate": 1.0255895872811683e-05, "loss": 1.4796, "step": 4190 }, { "epoch": 0.8591635916359164, "grad_norm": 0.444436929476376, "learning_rate": 1.0226622390385554e-05, "loss": 1.4326, "step": 4191 }, { "epoch": 0.8593685936859369, "grad_norm": 0.3943030917234177, "learning_rate": 1.0197388493939242e-05, "loss": 1.4727, "step": 4192 }, { "epoch": 0.8595735957359574, "grad_norm": 0.42236713500496903, "learning_rate": 1.0168194196363534e-05, "loss": 1.4825, "step": 4193 }, { "epoch": 0.8597785977859779, "grad_norm": 0.3747138113033476, "learning_rate": 1.01390395105318e-05, "loss": 1.4119, "step": 4194 }, { "epoch": 0.8599835998359984, "grad_norm": 0.4678661263058261, "learning_rate": 1.0109924449299901e-05, "loss": 1.4742, "step": 4195 }, { "epoch": 0.8601886018860189, "grad_norm": 0.4444176502569113, "learning_rate": 1.0080849025506279e-05, "loss": 1.4235, "step": 4196 }, { "epoch": 0.8603936039360394, "grad_norm": 0.43547922030464714, "learning_rate": 1.0051813251971898e-05, "loss": 1.4836, "step": 4197 }, { "epoch": 0.8605986059860599, "grad_norm": 0.4315554574480792, "learning_rate": 1.0022817141500196e-05, "loss": 1.5749, "step": 4198 }, { "epoch": 0.8608036080360804, "grad_norm": 0.4929229154152233, "learning_rate": 9.993860706877135e-06, "loss": 1.5223, "step": 4199 }, { "epoch": 0.8610086100861009, "grad_norm": 0.4230669418185058, "learning_rate": 9.964943960871186e-06, "loss": 1.4754, "step": 4200 }, { "epoch": 0.8612136121361214, "grad_norm": 0.4065688935199412, "learning_rate": 9.93606691623329e-06, "loss": 1.471, "step": 4201 }, { "epoch": 0.8614186141861419, "grad_norm": 0.42764817804893823, "learning_rate": 9.907229585696986e-06, "loss": 1.4991, "step": 4202 }, { "epoch": 0.8616236162361623, "grad_norm": 0.3699568942068727, "learning_rate": 9.878431981978176e-06, "loss": 1.4631, "step": 4203 }, { "epoch": 0.8618286182861828, "grad_norm": 0.4057556544599368, "learning_rate": 9.849674117775299e-06, "loss": 1.4801, "step": 4204 }, { "epoch": 0.8620336203362033, "grad_norm": 0.45998238538523883, "learning_rate": 9.820956005769278e-06, "loss": 1.535, "step": 4205 }, { "epoch": 0.8622386223862238, "grad_norm": 0.4489879082621855, "learning_rate": 9.792277658623461e-06, "loss": 1.5197, "step": 4206 }, { "epoch": 0.8624436244362443, "grad_norm": 0.42107743766253847, "learning_rate": 9.763639088983722e-06, "loss": 1.4833, "step": 4207 }, { "epoch": 0.8626486264862648, "grad_norm": 0.3804768939174557, "learning_rate": 9.735040309478394e-06, "loss": 1.4694, "step": 4208 }, { "epoch": 0.8628536285362853, "grad_norm": 0.40233385635308083, "learning_rate": 9.706481332718208e-06, "loss": 1.4571, "step": 4209 }, { "epoch": 0.8630586305863058, "grad_norm": 0.39613851885223883, "learning_rate": 9.677962171296395e-06, "loss": 1.472, "step": 4210 }, { "epoch": 0.8632636326363263, "grad_norm": 0.4129840646275053, "learning_rate": 9.649482837788559e-06, "loss": 1.4308, "step": 4211 }, { "epoch": 0.8634686346863468, "grad_norm": 0.4394922531420773, "learning_rate": 9.621043344752834e-06, "loss": 1.4982, "step": 4212 }, { "epoch": 0.8636736367363673, "grad_norm": 0.38478743119041986, "learning_rate": 9.592643704729753e-06, "loss": 1.5066, "step": 4213 }, { "epoch": 0.8638786387863878, "grad_norm": 0.4101073038765194, "learning_rate": 9.564283930242257e-06, "loss": 1.5096, "step": 4214 }, { "epoch": 0.8640836408364083, "grad_norm": 0.3925096037052107, "learning_rate": 9.535964033795708e-06, "loss": 1.4631, "step": 4215 }, { "epoch": 0.8642886428864288, "grad_norm": 0.39001733363386804, "learning_rate": 9.507684027877884e-06, "loss": 1.4507, "step": 4216 }, { "epoch": 0.8644936449364493, "grad_norm": 0.3972002241726259, "learning_rate": 9.479443924959008e-06, "loss": 1.3943, "step": 4217 }, { "epoch": 0.8646986469864698, "grad_norm": 0.4507564362276557, "learning_rate": 9.451243737491654e-06, "loss": 1.5709, "step": 4218 }, { "epoch": 0.8649036490364904, "grad_norm": 0.38149621005196965, "learning_rate": 9.423083477910854e-06, "loss": 1.4806, "step": 4219 }, { "epoch": 0.8651086510865109, "grad_norm": 0.44735436609976137, "learning_rate": 9.394963158633995e-06, "loss": 1.4901, "step": 4220 }, { "epoch": 0.8653136531365314, "grad_norm": 0.4369129859042145, "learning_rate": 9.366882792060827e-06, "loss": 1.5002, "step": 4221 }, { "epoch": 0.8655186551865519, "grad_norm": 0.42121606214503354, "learning_rate": 9.338842390573566e-06, "loss": 1.478, "step": 4222 }, { "epoch": 0.8657236572365724, "grad_norm": 0.3897416846088761, "learning_rate": 9.31084196653671e-06, "loss": 1.4581, "step": 4223 }, { "epoch": 0.8659286592865929, "grad_norm": 0.40879126238424246, "learning_rate": 9.282881532297205e-06, "loss": 1.4583, "step": 4224 }, { "epoch": 0.8661336613366134, "grad_norm": 0.44276131025032817, "learning_rate": 9.254961100184333e-06, "loss": 1.509, "step": 4225 }, { "epoch": 0.8663386633866339, "grad_norm": 0.4394094281916402, "learning_rate": 9.227080682509693e-06, "loss": 1.5744, "step": 4226 }, { "epoch": 0.8665436654366544, "grad_norm": 0.410236230190187, "learning_rate": 9.199240291567336e-06, "loss": 1.4625, "step": 4227 }, { "epoch": 0.8667486674866749, "grad_norm": 0.4708193078012326, "learning_rate": 9.171439939633564e-06, "loss": 1.5104, "step": 4228 }, { "epoch": 0.8669536695366954, "grad_norm": 0.4028185800653282, "learning_rate": 9.143679638967106e-06, "loss": 1.4632, "step": 4229 }, { "epoch": 0.8671586715867159, "grad_norm": 0.36620970294973504, "learning_rate": 9.115959401808983e-06, "loss": 1.4854, "step": 4230 }, { "epoch": 0.8673636736367364, "grad_norm": 0.39047334149894813, "learning_rate": 9.088279240382536e-06, "loss": 1.4853, "step": 4231 }, { "epoch": 0.8675686756867569, "grad_norm": 0.40868567495099245, "learning_rate": 9.060639166893493e-06, "loss": 1.5342, "step": 4232 }, { "epoch": 0.8677736777367774, "grad_norm": 0.4747592219893723, "learning_rate": 9.033039193529857e-06, "loss": 1.5702, "step": 4233 }, { "epoch": 0.8679786797867979, "grad_norm": 0.4599886482785605, "learning_rate": 9.00547933246193e-06, "loss": 1.467, "step": 4234 }, { "epoch": 0.8681836818368184, "grad_norm": 0.39518441818371003, "learning_rate": 8.977959595842412e-06, "loss": 1.421, "step": 4235 }, { "epoch": 0.8683886838868389, "grad_norm": 0.4170806018914923, "learning_rate": 8.950479995806215e-06, "loss": 1.4702, "step": 4236 }, { "epoch": 0.8685936859368594, "grad_norm": 0.4344307439072057, "learning_rate": 8.923040544470629e-06, "loss": 1.5347, "step": 4237 }, { "epoch": 0.8687986879868799, "grad_norm": 0.46008997249726113, "learning_rate": 8.895641253935182e-06, "loss": 1.536, "step": 4238 }, { "epoch": 0.8690036900369004, "grad_norm": 0.4382179003295939, "learning_rate": 8.868282136281703e-06, "loss": 1.5422, "step": 4239 }, { "epoch": 0.8692086920869209, "grad_norm": 0.3935056480118967, "learning_rate": 8.840963203574348e-06, "loss": 1.5094, "step": 4240 }, { "epoch": 0.8694136941369414, "grad_norm": 0.4756323404083744, "learning_rate": 8.813684467859507e-06, "loss": 1.5178, "step": 4241 }, { "epoch": 0.8696186961869619, "grad_norm": 0.42978310761280036, "learning_rate": 8.786445941165878e-06, "loss": 1.4612, "step": 4242 }, { "epoch": 0.8698236982369824, "grad_norm": 0.43004323838058317, "learning_rate": 8.759247635504408e-06, "loss": 1.4408, "step": 4243 }, { "epoch": 0.8700287002870029, "grad_norm": 0.412104087334505, "learning_rate": 8.732089562868295e-06, "loss": 1.4698, "step": 4244 }, { "epoch": 0.8702337023370234, "grad_norm": 0.4366207912804082, "learning_rate": 8.704971735233048e-06, "loss": 1.4923, "step": 4245 }, { "epoch": 0.8704387043870438, "grad_norm": 0.45767928416432124, "learning_rate": 8.677894164556356e-06, "loss": 1.5269, "step": 4246 }, { "epoch": 0.8706437064370643, "grad_norm": 0.4178834734045859, "learning_rate": 8.650856862778245e-06, "loss": 1.4923, "step": 4247 }, { "epoch": 0.8708487084870848, "grad_norm": 0.38339128832077085, "learning_rate": 8.623859841820903e-06, "loss": 1.4559, "step": 4248 }, { "epoch": 0.8710537105371053, "grad_norm": 0.3641696256162045, "learning_rate": 8.596903113588806e-06, "loss": 1.4266, "step": 4249 }, { "epoch": 0.8712587125871258, "grad_norm": 0.4227922466624673, "learning_rate": 8.569986689968611e-06, "loss": 1.4577, "step": 4250 }, { "epoch": 0.8714637146371463, "grad_norm": 0.4074902888434092, "learning_rate": 8.543110582829272e-06, "loss": 1.4784, "step": 4251 }, { "epoch": 0.8716687166871668, "grad_norm": 0.3779493043347049, "learning_rate": 8.51627480402193e-06, "loss": 1.4892, "step": 4252 }, { "epoch": 0.8718737187371873, "grad_norm": 0.40829898208490795, "learning_rate": 8.489479365379949e-06, "loss": 1.5291, "step": 4253 }, { "epoch": 0.8720787207872078, "grad_norm": 0.4526017370721461, "learning_rate": 8.462724278718882e-06, "loss": 1.4941, "step": 4254 }, { "epoch": 0.8722837228372283, "grad_norm": 0.41561719708546446, "learning_rate": 8.4360095558365e-06, "loss": 1.4973, "step": 4255 }, { "epoch": 0.8724887248872488, "grad_norm": 0.43827811565348723, "learning_rate": 8.409335208512803e-06, "loss": 1.5097, "step": 4256 }, { "epoch": 0.8726937269372693, "grad_norm": 0.41387120080020373, "learning_rate": 8.382701248509949e-06, "loss": 1.5527, "step": 4257 }, { "epoch": 0.8728987289872898, "grad_norm": 0.39352095279882277, "learning_rate": 8.356107687572324e-06, "loss": 1.4886, "step": 4258 }, { "epoch": 0.8731037310373104, "grad_norm": 0.4465117634916217, "learning_rate": 8.329554537426465e-06, "loss": 1.4878, "step": 4259 }, { "epoch": 0.8733087330873309, "grad_norm": 0.44303676088370064, "learning_rate": 8.303041809781088e-06, "loss": 1.4686, "step": 4260 }, { "epoch": 0.8735137351373514, "grad_norm": 0.5095313285999533, "learning_rate": 8.27656951632715e-06, "loss": 1.5157, "step": 4261 }, { "epoch": 0.8737187371873719, "grad_norm": 0.39851770952819626, "learning_rate": 8.250137668737667e-06, "loss": 1.458, "step": 4262 }, { "epoch": 0.8739237392373924, "grad_norm": 0.4203748814655289, "learning_rate": 8.223746278667942e-06, "loss": 1.53, "step": 4263 }, { "epoch": 0.8741287412874129, "grad_norm": 0.4693413530901096, "learning_rate": 8.197395357755355e-06, "loss": 1.5987, "step": 4264 }, { "epoch": 0.8743337433374334, "grad_norm": 0.3998092303594904, "learning_rate": 8.171084917619454e-06, "loss": 1.4713, "step": 4265 }, { "epoch": 0.8745387453874539, "grad_norm": 0.40235441996765325, "learning_rate": 8.144814969861936e-06, "loss": 1.5324, "step": 4266 }, { "epoch": 0.8747437474374744, "grad_norm": 0.405840537687244, "learning_rate": 8.11858552606668e-06, "loss": 1.4868, "step": 4267 }, { "epoch": 0.8749487494874949, "grad_norm": 0.4351379641523539, "learning_rate": 8.092396597799689e-06, "loss": 1.4358, "step": 4268 }, { "epoch": 0.8751537515375154, "grad_norm": 0.4022996855143869, "learning_rate": 8.066248196609072e-06, "loss": 1.4827, "step": 4269 }, { "epoch": 0.8753587535875359, "grad_norm": 0.4619074994391109, "learning_rate": 8.040140334025082e-06, "loss": 1.515, "step": 4270 }, { "epoch": 0.8755637556375564, "grad_norm": 0.3989868523353025, "learning_rate": 8.014073021560086e-06, "loss": 1.4552, "step": 4271 }, { "epoch": 0.8757687576875769, "grad_norm": 0.41904062067240616, "learning_rate": 7.988046270708616e-06, "loss": 1.4556, "step": 4272 }, { "epoch": 0.8759737597375974, "grad_norm": 0.4504709236134617, "learning_rate": 7.962060092947277e-06, "loss": 1.4927, "step": 4273 }, { "epoch": 0.8761787617876179, "grad_norm": 0.44010629176765953, "learning_rate": 7.936114499734792e-06, "loss": 1.5425, "step": 4274 }, { "epoch": 0.8763837638376384, "grad_norm": 0.4100243902057019, "learning_rate": 7.91020950251199e-06, "loss": 1.4879, "step": 4275 }, { "epoch": 0.8765887658876589, "grad_norm": 0.42681235433148745, "learning_rate": 7.884345112701764e-06, "loss": 1.4558, "step": 4276 }, { "epoch": 0.8767937679376794, "grad_norm": 0.39334661084575034, "learning_rate": 7.858521341709168e-06, "loss": 1.5146, "step": 4277 }, { "epoch": 0.8769987699876999, "grad_norm": 0.38736696773150486, "learning_rate": 7.83273820092133e-06, "loss": 1.4604, "step": 4278 }, { "epoch": 0.8772037720377204, "grad_norm": 0.39709696775443526, "learning_rate": 7.80699570170742e-06, "loss": 1.432, "step": 4279 }, { "epoch": 0.8774087740877409, "grad_norm": 0.41612812643447483, "learning_rate": 7.781293855418703e-06, "loss": 1.4924, "step": 4280 }, { "epoch": 0.8776137761377614, "grad_norm": 0.46812269509607357, "learning_rate": 7.755632673388525e-06, "loss": 1.5099, "step": 4281 }, { "epoch": 0.8778187781877819, "grad_norm": 0.4170027286603011, "learning_rate": 7.7300121669323e-06, "loss": 1.4918, "step": 4282 }, { "epoch": 0.8780237802378024, "grad_norm": 0.373162785284076, "learning_rate": 7.704432347347535e-06, "loss": 1.4712, "step": 4283 }, { "epoch": 0.8782287822878229, "grad_norm": 0.41775581774521026, "learning_rate": 7.67889322591374e-06, "loss": 1.4908, "step": 4284 }, { "epoch": 0.8784337843378434, "grad_norm": 0.440317165079676, "learning_rate": 7.653394813892523e-06, "loss": 1.4927, "step": 4285 }, { "epoch": 0.8786387863878639, "grad_norm": 0.41508998038173794, "learning_rate": 7.627937122527507e-06, "loss": 1.5474, "step": 4286 }, { "epoch": 0.8788437884378844, "grad_norm": 0.44862409038027806, "learning_rate": 7.6025201630443795e-06, "loss": 1.5374, "step": 4287 }, { "epoch": 0.8790487904879049, "grad_norm": 0.4386583881089859, "learning_rate": 7.577143946650889e-06, "loss": 1.5678, "step": 4288 }, { "epoch": 0.8792537925379253, "grad_norm": 0.41497831541122543, "learning_rate": 7.551808484536782e-06, "loss": 1.4867, "step": 4289 }, { "epoch": 0.8794587945879458, "grad_norm": 0.3986380875785754, "learning_rate": 7.526513787873835e-06, "loss": 1.5332, "step": 4290 }, { "epoch": 0.8796637966379663, "grad_norm": 0.4397287389332245, "learning_rate": 7.501259867815847e-06, "loss": 1.4814, "step": 4291 }, { "epoch": 0.8798687986879868, "grad_norm": 0.40803663498113696, "learning_rate": 7.476046735498676e-06, "loss": 1.459, "step": 4292 }, { "epoch": 0.8800738007380073, "grad_norm": 0.4113532718027076, "learning_rate": 7.450874402040176e-06, "loss": 1.4226, "step": 4293 }, { "epoch": 0.8802788027880278, "grad_norm": 0.4696285085274754, "learning_rate": 7.4257428785401764e-06, "loss": 1.4852, "step": 4294 }, { "epoch": 0.8804838048380483, "grad_norm": 0.48518595019249383, "learning_rate": 7.400652176080558e-06, "loss": 1.5037, "step": 4295 }, { "epoch": 0.8806888068880688, "grad_norm": 0.4099246803436755, "learning_rate": 7.375602305725138e-06, "loss": 1.4554, "step": 4296 }, { "epoch": 0.8808938089380893, "grad_norm": 0.3764477098910107, "learning_rate": 7.350593278519824e-06, "loss": 1.4645, "step": 4297 }, { "epoch": 0.8810988109881098, "grad_norm": 0.39809139125590737, "learning_rate": 7.325625105492428e-06, "loss": 1.4922, "step": 4298 }, { "epoch": 0.8813038130381304, "grad_norm": 0.40945707627666006, "learning_rate": 7.3006977976528004e-06, "loss": 1.4548, "step": 4299 }, { "epoch": 0.8815088150881509, "grad_norm": 0.38372289975393553, "learning_rate": 7.275811365992735e-06, "loss": 1.448, "step": 4300 }, { "epoch": 0.8817138171381714, "grad_norm": 0.47339773925572554, "learning_rate": 7.250965821486011e-06, "loss": 1.5102, "step": 4301 }, { "epoch": 0.8819188191881919, "grad_norm": 0.4036016508790276, "learning_rate": 7.22616117508842e-06, "loss": 1.58, "step": 4302 }, { "epoch": 0.8821238212382124, "grad_norm": 0.4572132388300134, "learning_rate": 7.201397437737634e-06, "loss": 1.469, "step": 4303 }, { "epoch": 0.8823288232882329, "grad_norm": 0.41993333750887724, "learning_rate": 7.176674620353374e-06, "loss": 1.4395, "step": 4304 }, { "epoch": 0.8825338253382534, "grad_norm": 0.4491953266519826, "learning_rate": 7.151992733837276e-06, "loss": 1.5266, "step": 4305 }, { "epoch": 0.8827388273882739, "grad_norm": 0.42705442120420445, "learning_rate": 7.127351789072911e-06, "loss": 1.5475, "step": 4306 }, { "epoch": 0.8829438294382944, "grad_norm": 0.3786944821213462, "learning_rate": 7.1027517969258104e-06, "loss": 1.4423, "step": 4307 }, { "epoch": 0.8831488314883149, "grad_norm": 0.4187861064282639, "learning_rate": 7.078192768243486e-06, "loss": 1.483, "step": 4308 }, { "epoch": 0.8833538335383354, "grad_norm": 0.4475937168579179, "learning_rate": 7.053674713855319e-06, "loss": 1.4535, "step": 4309 }, { "epoch": 0.8835588355883559, "grad_norm": 0.4117847844233788, "learning_rate": 7.029197644572694e-06, "loss": 1.5013, "step": 4310 }, { "epoch": 0.8837638376383764, "grad_norm": 0.42153387898085704, "learning_rate": 7.004761571188856e-06, "loss": 1.4541, "step": 4311 }, { "epoch": 0.8839688396883969, "grad_norm": 0.42043585894663543, "learning_rate": 6.980366504479008e-06, "loss": 1.4958, "step": 4312 }, { "epoch": 0.8841738417384174, "grad_norm": 0.39502934468044415, "learning_rate": 6.956012455200278e-06, "loss": 1.5121, "step": 4313 }, { "epoch": 0.8843788437884379, "grad_norm": 0.4262024364755581, "learning_rate": 6.931699434091676e-06, "loss": 1.4827, "step": 4314 }, { "epoch": 0.8845838458384584, "grad_norm": 0.4103014261063067, "learning_rate": 6.90742745187416e-06, "loss": 1.4557, "step": 4315 }, { "epoch": 0.8847888478884789, "grad_norm": 0.4273168355342068, "learning_rate": 6.883196519250568e-06, "loss": 1.4911, "step": 4316 }, { "epoch": 0.8849938499384994, "grad_norm": 0.44556455395940653, "learning_rate": 6.859006646905619e-06, "loss": 1.4668, "step": 4317 }, { "epoch": 0.8851988519885199, "grad_norm": 0.4154958473790276, "learning_rate": 6.834857845505971e-06, "loss": 1.46, "step": 4318 }, { "epoch": 0.8854038540385404, "grad_norm": 0.4063941186814045, "learning_rate": 6.810750125700127e-06, "loss": 1.4725, "step": 4319 }, { "epoch": 0.8856088560885609, "grad_norm": 0.4007699245160234, "learning_rate": 6.7866834981185175e-06, "loss": 1.4726, "step": 4320 }, { "epoch": 0.8858138581385814, "grad_norm": 0.43461484437042064, "learning_rate": 6.762657973373432e-06, "loss": 1.4577, "step": 4321 }, { "epoch": 0.8860188601886019, "grad_norm": 0.387988967347175, "learning_rate": 6.738673562059006e-06, "loss": 1.4631, "step": 4322 }, { "epoch": 0.8862238622386224, "grad_norm": 0.3888541817566676, "learning_rate": 6.714730274751302e-06, "loss": 1.462, "step": 4323 }, { "epoch": 0.8864288642886429, "grad_norm": 0.398133842018511, "learning_rate": 6.69082812200823e-06, "loss": 1.4749, "step": 4324 }, { "epoch": 0.8866338663386634, "grad_norm": 0.39116550605028294, "learning_rate": 6.666967114369504e-06, "loss": 1.4567, "step": 4325 }, { "epoch": 0.8868388683886839, "grad_norm": 0.4240717354402672, "learning_rate": 6.643147262356808e-06, "loss": 1.4466, "step": 4326 }, { "epoch": 0.8870438704387044, "grad_norm": 0.41052038294049636, "learning_rate": 6.61936857647355e-06, "loss": 1.4516, "step": 4327 }, { "epoch": 0.8872488724887249, "grad_norm": 0.4224533246185816, "learning_rate": 6.59563106720511e-06, "loss": 1.4958, "step": 4328 }, { "epoch": 0.8874538745387454, "grad_norm": 0.45107254009332937, "learning_rate": 6.571934745018626e-06, "loss": 1.5279, "step": 4329 }, { "epoch": 0.8876588765887659, "grad_norm": 0.4035330292488205, "learning_rate": 6.548279620363074e-06, "loss": 1.4749, "step": 4330 }, { "epoch": 0.8878638786387864, "grad_norm": 0.4367041439211082, "learning_rate": 6.524665703669331e-06, "loss": 1.4704, "step": 4331 }, { "epoch": 0.8880688806888068, "grad_norm": 0.43217366715307726, "learning_rate": 6.501093005350023e-06, "loss": 1.441, "step": 4332 }, { "epoch": 0.8882738827388273, "grad_norm": 0.45891051651383613, "learning_rate": 6.477561535799681e-06, "loss": 1.5233, "step": 4333 }, { "epoch": 0.8884788847888478, "grad_norm": 0.4183459553877193, "learning_rate": 6.454071305394582e-06, "loss": 1.4864, "step": 4334 }, { "epoch": 0.8886838868388683, "grad_norm": 0.41425569288733005, "learning_rate": 6.430622324492852e-06, "loss": 1.4419, "step": 4335 }, { "epoch": 0.8888888888888888, "grad_norm": 0.4397703457312809, "learning_rate": 6.4072146034344415e-06, "loss": 1.4862, "step": 4336 }, { "epoch": 0.8890938909389093, "grad_norm": 0.42607442763953934, "learning_rate": 6.383848152541072e-06, "loss": 1.493, "step": 4337 }, { "epoch": 0.8892988929889298, "grad_norm": 0.4145253654502572, "learning_rate": 6.360522982116301e-06, "loss": 1.475, "step": 4338 }, { "epoch": 0.8895038950389504, "grad_norm": 0.39209549785040015, "learning_rate": 6.33723910244548e-06, "loss": 1.4827, "step": 4339 }, { "epoch": 0.8897088970889709, "grad_norm": 0.43560293264071587, "learning_rate": 6.313996523795717e-06, "loss": 1.5003, "step": 4340 }, { "epoch": 0.8899138991389914, "grad_norm": 0.5100273568085638, "learning_rate": 6.290795256415927e-06, "loss": 1.5628, "step": 4341 }, { "epoch": 0.8901189011890119, "grad_norm": 0.40451975659736783, "learning_rate": 6.2676353105368346e-06, "loss": 1.4569, "step": 4342 }, { "epoch": 0.8903239032390324, "grad_norm": 0.4219556299115111, "learning_rate": 6.244516696370928e-06, "loss": 1.5075, "step": 4343 }, { "epoch": 0.8905289052890529, "grad_norm": 0.4381633536779827, "learning_rate": 6.221439424112463e-06, "loss": 1.507, "step": 4344 }, { "epoch": 0.8907339073390734, "grad_norm": 0.4274465298179701, "learning_rate": 6.198403503937467e-06, "loss": 1.4717, "step": 4345 }, { "epoch": 0.8909389093890939, "grad_norm": 0.4357826663308224, "learning_rate": 6.175408946003703e-06, "loss": 1.5013, "step": 4346 }, { "epoch": 0.8911439114391144, "grad_norm": 0.46362662071190486, "learning_rate": 6.152455760450748e-06, "loss": 1.5811, "step": 4347 }, { "epoch": 0.8913489134891349, "grad_norm": 0.4260789101277547, "learning_rate": 6.1295439573999415e-06, "loss": 1.5608, "step": 4348 }, { "epoch": 0.8915539155391554, "grad_norm": 0.39134435123359895, "learning_rate": 6.106673546954322e-06, "loss": 1.4752, "step": 4349 }, { "epoch": 0.8917589175891759, "grad_norm": 0.41104812737606716, "learning_rate": 6.083844539198691e-06, "loss": 1.4942, "step": 4350 }, { "epoch": 0.8919639196391964, "grad_norm": 0.3850017330641284, "learning_rate": 6.061056944199606e-06, "loss": 1.4849, "step": 4351 }, { "epoch": 0.8921689216892169, "grad_norm": 0.39519086707393414, "learning_rate": 6.0383107720053735e-06, "loss": 1.4646, "step": 4352 }, { "epoch": 0.8923739237392374, "grad_norm": 0.38712510667130645, "learning_rate": 6.0156060326460264e-06, "loss": 1.4497, "step": 4353 }, { "epoch": 0.8925789257892579, "grad_norm": 0.3929588382694851, "learning_rate": 5.992942736133322e-06, "loss": 1.4404, "step": 4354 }, { "epoch": 0.8927839278392784, "grad_norm": 0.39897917401525135, "learning_rate": 5.9703208924607345e-06, "loss": 1.4297, "step": 4355 }, { "epoch": 0.8929889298892989, "grad_norm": 0.3839579187159569, "learning_rate": 5.947740511603461e-06, "loss": 1.4661, "step": 4356 }, { "epoch": 0.8931939319393194, "grad_norm": 0.4413139091397072, "learning_rate": 5.925201603518415e-06, "loss": 1.4465, "step": 4357 }, { "epoch": 0.8933989339893399, "grad_norm": 0.40779105483386197, "learning_rate": 5.902704178144269e-06, "loss": 1.5334, "step": 4358 }, { "epoch": 0.8936039360393604, "grad_norm": 0.42566553139562646, "learning_rate": 5.880248245401354e-06, "loss": 1.4879, "step": 4359 }, { "epoch": 0.8938089380893809, "grad_norm": 0.43877516219285406, "learning_rate": 5.857833815191704e-06, "loss": 1.5261, "step": 4360 }, { "epoch": 0.8940139401394014, "grad_norm": 0.4126074243371334, "learning_rate": 5.835460897399059e-06, "loss": 1.5265, "step": 4361 }, { "epoch": 0.8942189421894219, "grad_norm": 0.4197863581825808, "learning_rate": 5.813129501888859e-06, "loss": 1.4589, "step": 4362 }, { "epoch": 0.8944239442394424, "grad_norm": 0.4162512697969112, "learning_rate": 5.79083963850825e-06, "loss": 1.5739, "step": 4363 }, { "epoch": 0.8946289462894629, "grad_norm": 0.40482578653204193, "learning_rate": 5.768591317086047e-06, "loss": 1.4723, "step": 4364 }, { "epoch": 0.8948339483394834, "grad_norm": 0.5031846296513467, "learning_rate": 5.746384547432737e-06, "loss": 1.5114, "step": 4365 }, { "epoch": 0.8950389503895039, "grad_norm": 0.41312402290596706, "learning_rate": 5.724219339340508e-06, "loss": 1.4926, "step": 4366 }, { "epoch": 0.8952439524395244, "grad_norm": 0.4476948110488828, "learning_rate": 5.702095702583188e-06, "loss": 1.4996, "step": 4367 }, { "epoch": 0.8954489544895449, "grad_norm": 0.41201960879148447, "learning_rate": 5.6800136469163156e-06, "loss": 1.5225, "step": 4368 }, { "epoch": 0.8956539565395654, "grad_norm": 0.38809535534862694, "learning_rate": 5.657973182077081e-06, "loss": 1.4487, "step": 4369 }, { "epoch": 0.8958589585895859, "grad_norm": 0.40451963783488726, "learning_rate": 5.635974317784309e-06, "loss": 1.4064, "step": 4370 }, { "epoch": 0.8960639606396064, "grad_norm": 0.43671851752447083, "learning_rate": 5.614017063738519e-06, "loss": 1.5012, "step": 4371 }, { "epoch": 0.8962689626896269, "grad_norm": 0.3791427685973977, "learning_rate": 5.592101429621821e-06, "loss": 1.4475, "step": 4372 }, { "epoch": 0.8964739647396474, "grad_norm": 0.41866960933738107, "learning_rate": 5.570227425098051e-06, "loss": 1.5183, "step": 4373 }, { "epoch": 0.8966789667896679, "grad_norm": 0.4105122314505505, "learning_rate": 5.54839505981265e-06, "loss": 1.4754, "step": 4374 }, { "epoch": 0.8968839688396884, "grad_norm": 0.4168407798458137, "learning_rate": 5.526604343392694e-06, "loss": 1.4888, "step": 4375 }, { "epoch": 0.8970889708897088, "grad_norm": 0.3777130109190239, "learning_rate": 5.504855285446897e-06, "loss": 1.4894, "step": 4376 }, { "epoch": 0.8972939729397293, "grad_norm": 0.4572916946220848, "learning_rate": 5.483147895565588e-06, "loss": 1.5311, "step": 4377 }, { "epoch": 0.8974989749897498, "grad_norm": 0.3817856617287783, "learning_rate": 5.461482183320754e-06, "loss": 1.4343, "step": 4378 }, { "epoch": 0.8977039770397705, "grad_norm": 0.37738546683367635, "learning_rate": 5.439858158266009e-06, "loss": 1.4145, "step": 4379 }, { "epoch": 0.897908979089791, "grad_norm": 0.3952501044243125, "learning_rate": 5.418275829936537e-06, "loss": 1.4981, "step": 4380 }, { "epoch": 0.8981139811398114, "grad_norm": 0.4167985162739483, "learning_rate": 5.396735207849179e-06, "loss": 1.4914, "step": 4381 }, { "epoch": 0.8983189831898319, "grad_norm": 0.38839115524610257, "learning_rate": 5.375236301502351e-06, "loss": 1.4656, "step": 4382 }, { "epoch": 0.8985239852398524, "grad_norm": 0.42589564232486415, "learning_rate": 5.353779120376101e-06, "loss": 1.471, "step": 4383 }, { "epoch": 0.8987289872898729, "grad_norm": 0.3959255466586945, "learning_rate": 5.332363673932106e-06, "loss": 1.4355, "step": 4384 }, { "epoch": 0.8989339893398934, "grad_norm": 0.41647974152015393, "learning_rate": 5.310989971613567e-06, "loss": 1.4511, "step": 4385 }, { "epoch": 0.8991389913899139, "grad_norm": 0.4438481168520766, "learning_rate": 5.289658022845323e-06, "loss": 1.4887, "step": 4386 }, { "epoch": 0.8993439934399344, "grad_norm": 0.42804066215262604, "learning_rate": 5.268367837033783e-06, "loss": 1.4732, "step": 4387 }, { "epoch": 0.8995489954899549, "grad_norm": 0.4001595505609998, "learning_rate": 5.247119423566982e-06, "loss": 1.4831, "step": 4388 }, { "epoch": 0.8997539975399754, "grad_norm": 0.4357052463910056, "learning_rate": 5.225912791814469e-06, "loss": 1.4839, "step": 4389 }, { "epoch": 0.8999589995899959, "grad_norm": 0.37603492396208343, "learning_rate": 5.204747951127442e-06, "loss": 1.4468, "step": 4390 }, { "epoch": 0.9001640016400164, "grad_norm": 0.4362640423523535, "learning_rate": 5.183624910838602e-06, "loss": 1.4993, "step": 4391 }, { "epoch": 0.9003690036900369, "grad_norm": 0.42870809562502443, "learning_rate": 5.162543680262266e-06, "loss": 1.4781, "step": 4392 }, { "epoch": 0.9005740057400574, "grad_norm": 0.4912405403685505, "learning_rate": 5.141504268694297e-06, "loss": 1.5798, "step": 4393 }, { "epoch": 0.9007790077900779, "grad_norm": 0.4237245324241607, "learning_rate": 5.120506685412108e-06, "loss": 1.4802, "step": 4394 }, { "epoch": 0.9009840098400984, "grad_norm": 0.4298838163077209, "learning_rate": 5.099550939674691e-06, "loss": 1.5339, "step": 4395 }, { "epoch": 0.9011890118901189, "grad_norm": 0.4261848250576572, "learning_rate": 5.078637040722589e-06, "loss": 1.4837, "step": 4396 }, { "epoch": 0.9013940139401394, "grad_norm": 0.4087698210055334, "learning_rate": 5.057764997777847e-06, "loss": 1.472, "step": 4397 }, { "epoch": 0.9015990159901599, "grad_norm": 0.40548444024710545, "learning_rate": 5.036934820044126e-06, "loss": 1.4768, "step": 4398 }, { "epoch": 0.9018040180401804, "grad_norm": 0.39760237227440365, "learning_rate": 5.016146516706566e-06, "loss": 1.4723, "step": 4399 }, { "epoch": 0.9020090200902009, "grad_norm": 0.45649469436456097, "learning_rate": 4.995400096931846e-06, "loss": 1.4644, "step": 4400 }, { "epoch": 0.9022140221402214, "grad_norm": 0.3840876202952878, "learning_rate": 4.974695569868237e-06, "loss": 1.4985, "step": 4401 }, { "epoch": 0.9024190241902419, "grad_norm": 0.48057576272888625, "learning_rate": 4.954032944645459e-06, "loss": 1.455, "step": 4402 }, { "epoch": 0.9026240262402624, "grad_norm": 0.3772560654634475, "learning_rate": 4.933412230374812e-06, "loss": 1.4148, "step": 4403 }, { "epoch": 0.9028290282902829, "grad_norm": 0.40135335040807557, "learning_rate": 4.9128334361491e-06, "loss": 1.4279, "step": 4404 }, { "epoch": 0.9030340303403034, "grad_norm": 0.4313473353658115, "learning_rate": 4.892296571042598e-06, "loss": 1.4683, "step": 4405 }, { "epoch": 0.9032390323903239, "grad_norm": 0.40619140288261074, "learning_rate": 4.871801644111173e-06, "loss": 1.5104, "step": 4406 }, { "epoch": 0.9034440344403444, "grad_norm": 0.4084792119047769, "learning_rate": 4.8513486643921195e-06, "loss": 1.4179, "step": 4407 }, { "epoch": 0.9036490364903649, "grad_norm": 0.4254646699105221, "learning_rate": 4.830937640904309e-06, "loss": 1.5176, "step": 4408 }, { "epoch": 0.9038540385403854, "grad_norm": 0.40021577405821507, "learning_rate": 4.810568582648056e-06, "loss": 1.4892, "step": 4409 }, { "epoch": 0.9040590405904059, "grad_norm": 0.41631489380610787, "learning_rate": 4.790241498605174e-06, "loss": 1.445, "step": 4410 }, { "epoch": 0.9042640426404264, "grad_norm": 0.45863252323245773, "learning_rate": 4.769956397739017e-06, "loss": 1.4509, "step": 4411 }, { "epoch": 0.9044690446904469, "grad_norm": 0.38862650656314107, "learning_rate": 4.749713288994373e-06, "loss": 1.4867, "step": 4412 }, { "epoch": 0.9046740467404674, "grad_norm": 0.46409065828767937, "learning_rate": 4.729512181297524e-06, "loss": 1.5809, "step": 4413 }, { "epoch": 0.9048790487904879, "grad_norm": 0.9264537185083902, "learning_rate": 4.709353083556267e-06, "loss": 1.5154, "step": 4414 }, { "epoch": 0.9050840508405084, "grad_norm": 0.43895061320621714, "learning_rate": 4.689236004659825e-06, "loss": 1.5032, "step": 4415 }, { "epoch": 0.9052890528905289, "grad_norm": 0.4284209690941435, "learning_rate": 4.669160953478913e-06, "loss": 1.5023, "step": 4416 }, { "epoch": 0.9054940549405494, "grad_norm": 0.42568915094453375, "learning_rate": 4.649127938865749e-06, "loss": 1.5208, "step": 4417 }, { "epoch": 0.9056990569905699, "grad_norm": 0.40906890209747016, "learning_rate": 4.629136969653936e-06, "loss": 1.4958, "step": 4418 }, { "epoch": 0.9059040590405905, "grad_norm": 0.4296248118132678, "learning_rate": 4.609188054658631e-06, "loss": 1.5011, "step": 4419 }, { "epoch": 0.906109061090611, "grad_norm": 0.44818031416034615, "learning_rate": 4.589281202676366e-06, "loss": 1.4743, "step": 4420 }, { "epoch": 0.9063140631406315, "grad_norm": 0.4074294804428407, "learning_rate": 4.569416422485151e-06, "loss": 1.5113, "step": 4421 }, { "epoch": 0.906519065190652, "grad_norm": 0.45007376441789937, "learning_rate": 4.549593722844492e-06, "loss": 1.4651, "step": 4422 }, { "epoch": 0.9067240672406724, "grad_norm": 0.4337646770574745, "learning_rate": 4.529813112495251e-06, "loss": 1.5071, "step": 4423 }, { "epoch": 0.906929069290693, "grad_norm": 0.4768651715415433, "learning_rate": 4.5100746001598194e-06, "loss": 1.5528, "step": 4424 }, { "epoch": 0.9071340713407134, "grad_norm": 0.4207868137640321, "learning_rate": 4.490378194541955e-06, "loss": 1.4583, "step": 4425 }, { "epoch": 0.9073390733907339, "grad_norm": 0.3956413454971181, "learning_rate": 4.47072390432689e-06, "loss": 1.4614, "step": 4426 }, { "epoch": 0.9075440754407544, "grad_norm": 0.41323175023849396, "learning_rate": 4.451111738181279e-06, "loss": 1.4825, "step": 4427 }, { "epoch": 0.9077490774907749, "grad_norm": 0.42953635702334275, "learning_rate": 4.431541704753173e-06, "loss": 1.5218, "step": 4428 }, { "epoch": 0.9079540795407954, "grad_norm": 0.4599875582005663, "learning_rate": 4.412013812672089e-06, "loss": 1.4912, "step": 4429 }, { "epoch": 0.9081590815908159, "grad_norm": 0.43144965301230526, "learning_rate": 4.392528070548951e-06, "loss": 1.5, "step": 4430 }, { "epoch": 0.9083640836408364, "grad_norm": 0.4220856446726741, "learning_rate": 4.373084486976053e-06, "loss": 1.4709, "step": 4431 }, { "epoch": 0.9085690856908569, "grad_norm": 0.4178747146754646, "learning_rate": 4.353683070527148e-06, "loss": 1.4553, "step": 4432 }, { "epoch": 0.9087740877408774, "grad_norm": 0.4175328176169357, "learning_rate": 4.3343238297573695e-06, "loss": 1.5339, "step": 4433 }, { "epoch": 0.9089790897908979, "grad_norm": 0.3971593483845302, "learning_rate": 4.31500677320329e-06, "loss": 1.4506, "step": 4434 }, { "epoch": 0.9091840918409184, "grad_norm": 0.4050944744498629, "learning_rate": 4.295731909382827e-06, "loss": 1.4915, "step": 4435 }, { "epoch": 0.9093890938909389, "grad_norm": 0.4462672095431968, "learning_rate": 4.276499246795329e-06, "loss": 1.533, "step": 4436 }, { "epoch": 0.9095940959409594, "grad_norm": 0.39833843094022603, "learning_rate": 4.257308793921522e-06, "loss": 1.4537, "step": 4437 }, { "epoch": 0.9097990979909799, "grad_norm": 0.37633381980439107, "learning_rate": 4.238160559223514e-06, "loss": 1.4676, "step": 4438 }, { "epoch": 0.9100041000410004, "grad_norm": 0.39343908088699486, "learning_rate": 4.219054551144841e-06, "loss": 1.5038, "step": 4439 }, { "epoch": 0.9102091020910209, "grad_norm": 0.3890658655239858, "learning_rate": 4.199990778110363e-06, "loss": 1.4893, "step": 4440 }, { "epoch": 0.9104141041410414, "grad_norm": 0.42989092807082757, "learning_rate": 4.180969248526334e-06, "loss": 1.3789, "step": 4441 }, { "epoch": 0.9106191061910619, "grad_norm": 0.4090079859228634, "learning_rate": 4.161989970780366e-06, "loss": 1.4405, "step": 4442 }, { "epoch": 0.9108241082410824, "grad_norm": 0.43302347273677383, "learning_rate": 4.143052953241488e-06, "loss": 1.4864, "step": 4443 }, { "epoch": 0.9110291102911029, "grad_norm": 0.36420720128816036, "learning_rate": 4.124158204260064e-06, "loss": 1.4864, "step": 4444 }, { "epoch": 0.9112341123411234, "grad_norm": 0.4015953520775453, "learning_rate": 4.105305732167819e-06, "loss": 1.4506, "step": 4445 }, { "epoch": 0.9114391143911439, "grad_norm": 0.4028126478295471, "learning_rate": 4.086495545277824e-06, "loss": 1.4718, "step": 4446 }, { "epoch": 0.9116441164411644, "grad_norm": 0.44682379527963934, "learning_rate": 4.067727651884501e-06, "loss": 1.4948, "step": 4447 }, { "epoch": 0.9118491184911849, "grad_norm": 0.3951347802065758, "learning_rate": 4.049002060263663e-06, "loss": 1.498, "step": 4448 }, { "epoch": 0.9120541205412054, "grad_norm": 0.48157101415511205, "learning_rate": 4.030318778672448e-06, "loss": 1.5343, "step": 4449 }, { "epoch": 0.9122591225912259, "grad_norm": 0.42879029221658904, "learning_rate": 4.011677815349335e-06, "loss": 1.4847, "step": 4450 }, { "epoch": 0.9124641246412464, "grad_norm": 0.3888410941702474, "learning_rate": 3.993079178514125e-06, "loss": 1.4367, "step": 4451 }, { "epoch": 0.9126691266912669, "grad_norm": 0.4298310911224064, "learning_rate": 3.97452287636797e-06, "loss": 1.4636, "step": 4452 }, { "epoch": 0.9128741287412874, "grad_norm": 0.4663686910828376, "learning_rate": 3.9560089170933565e-06, "loss": 1.5502, "step": 4453 }, { "epoch": 0.9130791307913079, "grad_norm": 0.45299088870355975, "learning_rate": 3.937537308854133e-06, "loss": 1.5273, "step": 4454 }, { "epoch": 0.9132841328413284, "grad_norm": 0.44430264039157247, "learning_rate": 3.919108059795406e-06, "loss": 1.4836, "step": 4455 }, { "epoch": 0.9134891348913489, "grad_norm": 0.40772800661490394, "learning_rate": 3.900721178043654e-06, "loss": 1.4779, "step": 4456 }, { "epoch": 0.9136941369413694, "grad_norm": 0.45301093810920023, "learning_rate": 3.882376671706622e-06, "loss": 1.5227, "step": 4457 }, { "epoch": 0.9138991389913899, "grad_norm": 0.4090074161991738, "learning_rate": 3.864074548873431e-06, "loss": 1.4663, "step": 4458 }, { "epoch": 0.9141041410414105, "grad_norm": 0.42870978658495645, "learning_rate": 3.845814817614502e-06, "loss": 1.4628, "step": 4459 }, { "epoch": 0.914309143091431, "grad_norm": 0.4251652358938823, "learning_rate": 3.827597485981527e-06, "loss": 1.4736, "step": 4460 }, { "epoch": 0.9145141451414515, "grad_norm": 0.43075237257587945, "learning_rate": 3.8094225620075253e-06, "loss": 1.4821, "step": 4461 }, { "epoch": 0.914719147191472, "grad_norm": 0.40917543461982836, "learning_rate": 3.7912900537067976e-06, "loss": 1.5301, "step": 4462 }, { "epoch": 0.9149241492414925, "grad_norm": 0.4016414367307887, "learning_rate": 3.7731999690749585e-06, "loss": 1.4666, "step": 4463 }, { "epoch": 0.915129151291513, "grad_norm": 0.43876664911346974, "learning_rate": 3.7551523160889278e-06, "loss": 1.4816, "step": 4464 }, { "epoch": 0.9153341533415335, "grad_norm": 0.4088555704556853, "learning_rate": 3.737147102706906e-06, "loss": 1.4387, "step": 4465 }, { "epoch": 0.915539155391554, "grad_norm": 0.42785444759701224, "learning_rate": 3.7191843368683645e-06, "loss": 1.5544, "step": 4466 }, { "epoch": 0.9157441574415744, "grad_norm": 0.368992514426279, "learning_rate": 3.701264026494067e-06, "loss": 1.4259, "step": 4467 }, { "epoch": 0.9159491594915949, "grad_norm": 0.41956990721389625, "learning_rate": 3.683386179486037e-06, "loss": 1.4885, "step": 4468 }, { "epoch": 0.9161541615416154, "grad_norm": 0.38627572576098745, "learning_rate": 3.665550803727613e-06, "loss": 1.4264, "step": 4469 }, { "epoch": 0.9163591635916359, "grad_norm": 0.43194735714894306, "learning_rate": 3.6477579070833933e-06, "loss": 1.5109, "step": 4470 }, { "epoch": 0.9165641656416564, "grad_norm": 0.4276156872704722, "learning_rate": 3.630007497399224e-06, "loss": 1.4828, "step": 4471 }, { "epoch": 0.9167691676916769, "grad_norm": 0.3947192775074258, "learning_rate": 3.612299582502232e-06, "loss": 1.4867, "step": 4472 }, { "epoch": 0.9169741697416974, "grad_norm": 0.41848054789026884, "learning_rate": 3.5946341702007836e-06, "loss": 1.4689, "step": 4473 }, { "epoch": 0.9171791717917179, "grad_norm": 0.40582495797375623, "learning_rate": 3.5770112682845468e-06, "loss": 1.4884, "step": 4474 }, { "epoch": 0.9173841738417384, "grad_norm": 0.39984132673246475, "learning_rate": 3.5594308845244286e-06, "loss": 1.4434, "step": 4475 }, { "epoch": 0.9175891758917589, "grad_norm": 0.44777856455775167, "learning_rate": 3.5418930266725605e-06, "loss": 1.4633, "step": 4476 }, { "epoch": 0.9177941779417794, "grad_norm": 0.419848422486166, "learning_rate": 3.5243977024623453e-06, "loss": 1.518, "step": 4477 }, { "epoch": 0.9179991799917999, "grad_norm": 0.42144448705675136, "learning_rate": 3.5069449196084126e-06, "loss": 1.5145, "step": 4478 }, { "epoch": 0.9182041820418204, "grad_norm": 0.4297846511344611, "learning_rate": 3.4895346858066724e-06, "loss": 1.4509, "step": 4479 }, { "epoch": 0.9184091840918409, "grad_norm": 0.43441525212771925, "learning_rate": 3.4721670087342282e-06, "loss": 1.4791, "step": 4480 }, { "epoch": 0.9186141861418614, "grad_norm": 0.41349419501085394, "learning_rate": 3.4548418960494433e-06, "loss": 1.4903, "step": 4481 }, { "epoch": 0.9188191881918819, "grad_norm": 0.4608204075693658, "learning_rate": 3.437559355391917e-06, "loss": 1.5354, "step": 4482 }, { "epoch": 0.9190241902419024, "grad_norm": 0.44247800859939584, "learning_rate": 3.420319394382432e-06, "loss": 1.5231, "step": 4483 }, { "epoch": 0.9192291922919229, "grad_norm": 0.41916841496271157, "learning_rate": 3.4031220206230617e-06, "loss": 1.4664, "step": 4484 }, { "epoch": 0.9194341943419434, "grad_norm": 0.41373473630690827, "learning_rate": 3.385967241697041e-06, "loss": 1.38, "step": 4485 }, { "epoch": 0.9196391963919639, "grad_norm": 0.418404612139362, "learning_rate": 3.3688550651688632e-06, "loss": 1.4544, "step": 4486 }, { "epoch": 0.9198441984419844, "grad_norm": 0.4181703166344262, "learning_rate": 3.3517854985842147e-06, "loss": 1.4959, "step": 4487 }, { "epoch": 0.9200492004920049, "grad_norm": 0.4222133418582104, "learning_rate": 3.3347585494699963e-06, "loss": 1.443, "step": 4488 }, { "epoch": 0.9202542025420254, "grad_norm": 0.4101106198461772, "learning_rate": 3.317774225334336e-06, "loss": 1.4424, "step": 4489 }, { "epoch": 0.9204592045920459, "grad_norm": 0.39972761456560146, "learning_rate": 3.300832533666509e-06, "loss": 1.3824, "step": 4490 }, { "epoch": 0.9206642066420664, "grad_norm": 0.4282198779684472, "learning_rate": 3.2839334819370846e-06, "loss": 1.4889, "step": 4491 }, { "epoch": 0.9208692086920869, "grad_norm": 0.4483828393661136, "learning_rate": 3.2670770775977467e-06, "loss": 1.5322, "step": 4492 }, { "epoch": 0.9210742107421074, "grad_norm": 0.4381826226976708, "learning_rate": 3.250263328081382e-06, "loss": 1.3928, "step": 4493 }, { "epoch": 0.9212792127921279, "grad_norm": 0.4450788638055356, "learning_rate": 3.2334922408021384e-06, "loss": 1.4922, "step": 4494 }, { "epoch": 0.9214842148421484, "grad_norm": 0.4066091165142853, "learning_rate": 3.2167638231552777e-06, "loss": 1.4956, "step": 4495 }, { "epoch": 0.9216892168921689, "grad_norm": 0.4132591086685028, "learning_rate": 3.200078082517255e-06, "loss": 1.4969, "step": 4496 }, { "epoch": 0.9218942189421894, "grad_norm": 0.4103944125872859, "learning_rate": 3.1834350262457625e-06, "loss": 1.4701, "step": 4497 }, { "epoch": 0.9220992209922099, "grad_norm": 0.38472458134357945, "learning_rate": 3.1668346616795963e-06, "loss": 1.4379, "step": 4498 }, { "epoch": 0.9223042230422305, "grad_norm": 0.4267717543937199, "learning_rate": 3.1502769961387903e-06, "loss": 1.5257, "step": 4499 }, { "epoch": 0.922509225092251, "grad_norm": 0.41878321656621137, "learning_rate": 3.1337620369245037e-06, "loss": 1.5126, "step": 4500 }, { "epoch": 0.9227142271422715, "grad_norm": 0.418269177368489, "learning_rate": 3.117289791319089e-06, "loss": 1.4299, "step": 4501 }, { "epoch": 0.922919229192292, "grad_norm": 0.39811051005006315, "learning_rate": 3.1008602665860586e-06, "loss": 1.4175, "step": 4502 }, { "epoch": 0.9231242312423125, "grad_norm": 0.3991435884314393, "learning_rate": 3.0844734699700726e-06, "loss": 1.4738, "step": 4503 }, { "epoch": 0.923329233292333, "grad_norm": 0.408831360944762, "learning_rate": 3.0681294086969957e-06, "loss": 1.5171, "step": 4504 }, { "epoch": 0.9235342353423535, "grad_norm": 0.4138747600359767, "learning_rate": 3.051828089973796e-06, "loss": 1.4676, "step": 4505 }, { "epoch": 0.923739237392374, "grad_norm": 0.4045311013648829, "learning_rate": 3.0355695209886126e-06, "loss": 1.4878, "step": 4506 }, { "epoch": 0.9239442394423945, "grad_norm": 0.407115128351299, "learning_rate": 3.019353708910733e-06, "loss": 1.4807, "step": 4507 }, { "epoch": 0.924149241492415, "grad_norm": 0.4003536976833089, "learning_rate": 3.003180660890592e-06, "loss": 1.4305, "step": 4508 }, { "epoch": 0.9243542435424354, "grad_norm": 0.4014028430429668, "learning_rate": 2.9870503840597973e-06, "loss": 1.4639, "step": 4509 }, { "epoch": 0.924559245592456, "grad_norm": 0.4244954691294579, "learning_rate": 2.9709628855310367e-06, "loss": 1.4561, "step": 4510 }, { "epoch": 0.9247642476424764, "grad_norm": 0.41609049800610626, "learning_rate": 2.95491817239818e-06, "loss": 1.4355, "step": 4511 }, { "epoch": 0.9249692496924969, "grad_norm": 0.41175801146054786, "learning_rate": 2.93891625173619e-06, "loss": 1.5555, "step": 4512 }, { "epoch": 0.9251742517425174, "grad_norm": 0.39186745527876476, "learning_rate": 2.9229571306012226e-06, "loss": 1.4959, "step": 4513 }, { "epoch": 0.9253792537925379, "grad_norm": 0.3806252485752179, "learning_rate": 2.9070408160305153e-06, "loss": 1.4773, "step": 4514 }, { "epoch": 0.9255842558425584, "grad_norm": 0.43030039070004605, "learning_rate": 2.8911673150424313e-06, "loss": 1.517, "step": 4515 }, { "epoch": 0.9257892578925789, "grad_norm": 0.3983980134298662, "learning_rate": 2.875336634636472e-06, "loss": 1.5188, "step": 4516 }, { "epoch": 0.9259942599425994, "grad_norm": 0.3708434983240725, "learning_rate": 2.859548781793242e-06, "loss": 1.449, "step": 4517 }, { "epoch": 0.9261992619926199, "grad_norm": 0.44734482673099224, "learning_rate": 2.8438037634744617e-06, "loss": 1.4864, "step": 4518 }, { "epoch": 0.9264042640426404, "grad_norm": 0.3998819406117882, "learning_rate": 2.8281015866229776e-06, "loss": 1.5016, "step": 4519 }, { "epoch": 0.9266092660926609, "grad_norm": 0.3758323865602232, "learning_rate": 2.8124422581627287e-06, "loss": 1.452, "step": 4520 }, { "epoch": 0.9268142681426814, "grad_norm": 0.4157014156700869, "learning_rate": 2.796825784998791e-06, "loss": 1.5021, "step": 4521 }, { "epoch": 0.9270192701927019, "grad_norm": 0.41259368015487885, "learning_rate": 2.7812521740172904e-06, "loss": 1.4436, "step": 4522 }, { "epoch": 0.9272242722427224, "grad_norm": 0.3876767690598938, "learning_rate": 2.7657214320854773e-06, "loss": 1.4316, "step": 4523 }, { "epoch": 0.9274292742927429, "grad_norm": 0.44028876979510134, "learning_rate": 2.7502335660517185e-06, "loss": 1.5091, "step": 4524 }, { "epoch": 0.9276342763427634, "grad_norm": 0.40281137782747767, "learning_rate": 2.734788582745473e-06, "loss": 1.3714, "step": 4525 }, { "epoch": 0.9278392783927839, "grad_norm": 0.3976192134347834, "learning_rate": 2.7193864889772603e-06, "loss": 1.5117, "step": 4526 }, { "epoch": 0.9280442804428044, "grad_norm": 0.40219946408255786, "learning_rate": 2.7040272915387022e-06, "loss": 1.4581, "step": 4527 }, { "epoch": 0.9282492824928249, "grad_norm": 0.38864124668356803, "learning_rate": 2.6887109972025037e-06, "loss": 1.4581, "step": 4528 }, { "epoch": 0.9284542845428454, "grad_norm": 0.4060343137796328, "learning_rate": 2.6734376127224625e-06, "loss": 1.4097, "step": 4529 }, { "epoch": 0.9286592865928659, "grad_norm": 0.39372678668660055, "learning_rate": 2.658207144833447e-06, "loss": 1.4593, "step": 4530 }, { "epoch": 0.9288642886428864, "grad_norm": 0.40514047654048657, "learning_rate": 2.6430196002514065e-06, "loss": 1.4801, "step": 4531 }, { "epoch": 0.9290692906929069, "grad_norm": 0.3775697515584575, "learning_rate": 2.627874985673351e-06, "loss": 1.3906, "step": 4532 }, { "epoch": 0.9292742927429274, "grad_norm": 0.4530877012632655, "learning_rate": 2.6127733077773497e-06, "loss": 1.4922, "step": 4533 }, { "epoch": 0.9294792947929479, "grad_norm": 0.3857017010570035, "learning_rate": 2.597714573222576e-06, "loss": 1.5006, "step": 4534 }, { "epoch": 0.9296842968429684, "grad_norm": 0.43168979898810245, "learning_rate": 2.5826987886492627e-06, "loss": 1.5039, "step": 4535 }, { "epoch": 0.9298892988929889, "grad_norm": 0.42538292559643276, "learning_rate": 2.5677259606786684e-06, "loss": 1.4594, "step": 4536 }, { "epoch": 0.9300943009430094, "grad_norm": 0.3840161220880411, "learning_rate": 2.552796095913124e-06, "loss": 1.4574, "step": 4537 }, { "epoch": 0.9302993029930299, "grad_norm": 0.4227867781729992, "learning_rate": 2.5379092009360284e-06, "loss": 1.4985, "step": 4538 }, { "epoch": 0.9305043050430505, "grad_norm": 0.40578227368197406, "learning_rate": 2.5230652823118204e-06, "loss": 1.4651, "step": 4539 }, { "epoch": 0.930709307093071, "grad_norm": 0.39310472945149344, "learning_rate": 2.50826434658602e-06, "loss": 1.4618, "step": 4540 }, { "epoch": 0.9309143091430915, "grad_norm": 0.4408790962793832, "learning_rate": 2.4935064002851395e-06, "loss": 1.4854, "step": 4541 }, { "epoch": 0.931119311193112, "grad_norm": 0.39326167074612256, "learning_rate": 2.478791449916773e-06, "loss": 1.4778, "step": 4542 }, { "epoch": 0.9313243132431325, "grad_norm": 0.42048960730523716, "learning_rate": 2.4641195019695306e-06, "loss": 1.4709, "step": 4543 }, { "epoch": 0.931529315293153, "grad_norm": 0.44237083359266777, "learning_rate": 2.4494905629130925e-06, "loss": 1.4857, "step": 4544 }, { "epoch": 0.9317343173431735, "grad_norm": 0.44301680845058533, "learning_rate": 2.4349046391981546e-06, "loss": 1.5511, "step": 4545 }, { "epoch": 0.931939319393194, "grad_norm": 0.43117470621383425, "learning_rate": 2.420361737256438e-06, "loss": 1.4436, "step": 4546 }, { "epoch": 0.9321443214432145, "grad_norm": 0.40973765300621207, "learning_rate": 2.4058618635007133e-06, "loss": 1.4737, "step": 4547 }, { "epoch": 0.932349323493235, "grad_norm": 0.4172904348028181, "learning_rate": 2.3914050243247445e-06, "loss": 1.5277, "step": 4548 }, { "epoch": 0.9325543255432555, "grad_norm": 0.40977859284544466, "learning_rate": 2.3769912261033533e-06, "loss": 1.463, "step": 4549 }, { "epoch": 0.932759327593276, "grad_norm": 0.4269107879704024, "learning_rate": 2.3626204751923784e-06, "loss": 1.4333, "step": 4550 }, { "epoch": 0.9329643296432965, "grad_norm": 0.4154100739547564, "learning_rate": 2.3482927779286623e-06, "loss": 1.5191, "step": 4551 }, { "epoch": 0.933169331693317, "grad_norm": 0.36183219664822497, "learning_rate": 2.334008140630062e-06, "loss": 1.3816, "step": 4552 }, { "epoch": 0.9333743337433374, "grad_norm": 0.452358022642829, "learning_rate": 2.3197665695954607e-06, "loss": 1.5761, "step": 4553 }, { "epoch": 0.933579335793358, "grad_norm": 0.4097403150247577, "learning_rate": 2.3055680711047355e-06, "loss": 1.5097, "step": 4554 }, { "epoch": 0.9337843378433784, "grad_norm": 0.43341915366844846, "learning_rate": 2.291412651418778e-06, "loss": 1.4573, "step": 4555 }, { "epoch": 0.9339893398933989, "grad_norm": 0.4427625650634552, "learning_rate": 2.277300316779507e-06, "loss": 1.471, "step": 4556 }, { "epoch": 0.9341943419434194, "grad_norm": 0.4007717730377636, "learning_rate": 2.2632310734097994e-06, "loss": 1.4694, "step": 4557 }, { "epoch": 0.9343993439934399, "grad_norm": 0.4798354637907963, "learning_rate": 2.2492049275135486e-06, "loss": 1.5728, "step": 4558 }, { "epoch": 0.9346043460434604, "grad_norm": 0.432366661718664, "learning_rate": 2.2352218852756625e-06, "loss": 1.4707, "step": 4559 }, { "epoch": 0.9348093480934809, "grad_norm": 0.4460440923710054, "learning_rate": 2.22128195286202e-06, "loss": 1.4969, "step": 4560 }, { "epoch": 0.9350143501435014, "grad_norm": 0.4684781583418065, "learning_rate": 2.207385136419504e-06, "loss": 1.5261, "step": 4561 }, { "epoch": 0.9352193521935219, "grad_norm": 0.43713686511410216, "learning_rate": 2.19353144207598e-06, "loss": 1.5117, "step": 4562 }, { "epoch": 0.9354243542435424, "grad_norm": 0.43758544839160146, "learning_rate": 2.179720875940272e-06, "loss": 1.5151, "step": 4563 }, { "epoch": 0.9356293562935629, "grad_norm": 0.4129268916678531, "learning_rate": 2.165953444102242e-06, "loss": 1.4388, "step": 4564 }, { "epoch": 0.9358343583435834, "grad_norm": 0.4491329177134426, "learning_rate": 2.1522291526326898e-06, "loss": 1.519, "step": 4565 }, { "epoch": 0.9360393603936039, "grad_norm": 0.40560543303076346, "learning_rate": 2.1385480075834076e-06, "loss": 1.435, "step": 4566 }, { "epoch": 0.9362443624436244, "grad_norm": 0.4248879851154153, "learning_rate": 2.1249100149871693e-06, "loss": 1.4664, "step": 4567 }, { "epoch": 0.9364493644936449, "grad_norm": 0.4368460992615822, "learning_rate": 2.111315180857687e-06, "loss": 1.4749, "step": 4568 }, { "epoch": 0.9366543665436654, "grad_norm": 0.36533356517722737, "learning_rate": 2.0977635111896654e-06, "loss": 1.4435, "step": 4569 }, { "epoch": 0.9368593685936859, "grad_norm": 0.4010820531220128, "learning_rate": 2.0842550119588024e-06, "loss": 1.4344, "step": 4570 }, { "epoch": 0.9370643706437064, "grad_norm": 0.4258966255979595, "learning_rate": 2.0707896891216995e-06, "loss": 1.5236, "step": 4571 }, { "epoch": 0.9372693726937269, "grad_norm": 0.37911085021378865, "learning_rate": 2.057367548615974e-06, "loss": 1.4584, "step": 4572 }, { "epoch": 0.9374743747437474, "grad_norm": 0.40465394133403687, "learning_rate": 2.04398859636018e-06, "loss": 1.4883, "step": 4573 }, { "epoch": 0.9376793767937679, "grad_norm": 0.4372930249798691, "learning_rate": 2.03065283825381e-06, "loss": 1.4839, "step": 4574 }, { "epoch": 0.9378843788437884, "grad_norm": 0.3937186132948349, "learning_rate": 2.0173602801773495e-06, "loss": 1.507, "step": 4575 }, { "epoch": 0.9380893808938089, "grad_norm": 0.411973499164815, "learning_rate": 2.0041109279921864e-06, "loss": 1.4148, "step": 4576 }, { "epoch": 0.9382943829438294, "grad_norm": 0.44493513969349274, "learning_rate": 1.990904787540704e-06, "loss": 1.5318, "step": 4577 }, { "epoch": 0.9384993849938499, "grad_norm": 0.396585601401724, "learning_rate": 1.97774186464621e-06, "loss": 1.4856, "step": 4578 }, { "epoch": 0.9387043870438705, "grad_norm": 0.3936346698096212, "learning_rate": 1.964622165112939e-06, "loss": 1.4564, "step": 4579 }, { "epoch": 0.938909389093891, "grad_norm": 0.44329940607578605, "learning_rate": 1.951545694726098e-06, "loss": 1.5444, "step": 4580 }, { "epoch": 0.9391143911439115, "grad_norm": 0.42419625776220793, "learning_rate": 1.9385124592518065e-06, "loss": 1.4748, "step": 4581 }, { "epoch": 0.939319393193932, "grad_norm": 0.4154748513673259, "learning_rate": 1.925522464437135e-06, "loss": 1.4738, "step": 4582 }, { "epoch": 0.9395243952439525, "grad_norm": 0.43896780954596126, "learning_rate": 1.91257571601009e-06, "loss": 1.4588, "step": 4583 }, { "epoch": 0.939729397293973, "grad_norm": 0.44005980233680353, "learning_rate": 1.8996722196795713e-06, "loss": 1.4943, "step": 4584 }, { "epoch": 0.9399343993439935, "grad_norm": 0.419656120553632, "learning_rate": 1.8868119811354611e-06, "loss": 1.5182, "step": 4585 }, { "epoch": 0.940139401394014, "grad_norm": 0.37749984616269355, "learning_rate": 1.8739950060485234e-06, "loss": 1.579, "step": 4586 }, { "epoch": 0.9403444034440345, "grad_norm": 0.41286570468517103, "learning_rate": 1.8612213000704704e-06, "loss": 1.5313, "step": 4587 }, { "epoch": 0.940549405494055, "grad_norm": 0.4106493276962578, "learning_rate": 1.8484908688339186e-06, "loss": 1.4712, "step": 4588 }, { "epoch": 0.9407544075440755, "grad_norm": 0.47054730258968175, "learning_rate": 1.8358037179524224e-06, "loss": 1.4799, "step": 4589 }, { "epoch": 0.940959409594096, "grad_norm": 0.4324103409490006, "learning_rate": 1.8231598530204287e-06, "loss": 1.5168, "step": 4590 }, { "epoch": 0.9411644116441165, "grad_norm": 0.41466057803080025, "learning_rate": 1.810559279613322e-06, "loss": 1.4592, "step": 4591 }, { "epoch": 0.941369413694137, "grad_norm": 0.4051172885912199, "learning_rate": 1.7980020032873468e-06, "loss": 1.4895, "step": 4592 }, { "epoch": 0.9415744157441575, "grad_norm": 0.40438540291362046, "learning_rate": 1.7854880295797405e-06, "loss": 1.4249, "step": 4593 }, { "epoch": 0.941779417794178, "grad_norm": 0.4430811178964784, "learning_rate": 1.7730173640085445e-06, "loss": 1.5037, "step": 4594 }, { "epoch": 0.9419844198441985, "grad_norm": 0.4055527400379786, "learning_rate": 1.7605900120728047e-06, "loss": 1.4637, "step": 4595 }, { "epoch": 0.942189421894219, "grad_norm": 0.4025049571858752, "learning_rate": 1.748205979252393e-06, "loss": 1.4788, "step": 4596 }, { "epoch": 0.9423944239442394, "grad_norm": 0.4123895047425271, "learning_rate": 1.7358652710081081e-06, "loss": 1.44, "step": 4597 }, { "epoch": 0.9425994259942599, "grad_norm": 0.4060466673787726, "learning_rate": 1.72356789278163e-06, "loss": 1.499, "step": 4598 }, { "epoch": 0.9428044280442804, "grad_norm": 0.39758461289611335, "learning_rate": 1.711313849995555e-06, "loss": 1.4974, "step": 4599 }, { "epoch": 0.9430094300943009, "grad_norm": 0.38959189170047004, "learning_rate": 1.6991031480533715e-06, "loss": 1.4806, "step": 4600 }, { "epoch": 0.9432144321443214, "grad_norm": 0.3882309791214617, "learning_rate": 1.686935792339439e-06, "loss": 1.4639, "step": 4601 }, { "epoch": 0.9434194341943419, "grad_norm": 0.47363832224666175, "learning_rate": 1.6748117882189883e-06, "loss": 1.5344, "step": 4602 }, { "epoch": 0.9436244362443624, "grad_norm": 0.4576299615926088, "learning_rate": 1.6627311410381652e-06, "loss": 1.4975, "step": 4603 }, { "epoch": 0.9438294382943829, "grad_norm": 0.40109436131476384, "learning_rate": 1.650693856123997e-06, "loss": 1.5051, "step": 4604 }, { "epoch": 0.9440344403444034, "grad_norm": 0.3986627337671457, "learning_rate": 1.6386999387843716e-06, "loss": 1.4442, "step": 4605 }, { "epoch": 0.9442394423944239, "grad_norm": 0.42906664911223463, "learning_rate": 1.626749394308058e-06, "loss": 1.4606, "step": 4606 }, { "epoch": 0.9444444444444444, "grad_norm": 0.44572873121667206, "learning_rate": 1.614842227964708e-06, "loss": 1.4834, "step": 4607 }, { "epoch": 0.9446494464944649, "grad_norm": 0.4143428816862491, "learning_rate": 1.6029784450048323e-06, "loss": 1.4857, "step": 4608 }, { "epoch": 0.9448544485444854, "grad_norm": 0.38526828484852355, "learning_rate": 1.5911580506598245e-06, "loss": 1.5359, "step": 4609 }, { "epoch": 0.9450594505945059, "grad_norm": 0.38978783684109825, "learning_rate": 1.579381050141948e-06, "loss": 1.4901, "step": 4610 }, { "epoch": 0.9452644526445264, "grad_norm": 0.42659787671916094, "learning_rate": 1.5676474486443272e-06, "loss": 1.4861, "step": 4611 }, { "epoch": 0.9454694546945469, "grad_norm": 0.4151510242273399, "learning_rate": 1.5559572513409338e-06, "loss": 1.4169, "step": 4612 }, { "epoch": 0.9456744567445674, "grad_norm": 0.4169208646551611, "learning_rate": 1.5443104633866112e-06, "loss": 1.445, "step": 4613 }, { "epoch": 0.9458794587945879, "grad_norm": 0.43940355005413406, "learning_rate": 1.5327070899170736e-06, "loss": 1.4002, "step": 4614 }, { "epoch": 0.9460844608446084, "grad_norm": 0.42457098167300533, "learning_rate": 1.521147136048895e-06, "loss": 1.4624, "step": 4615 }, { "epoch": 0.9462894628946289, "grad_norm": 0.427672103467531, "learning_rate": 1.5096306068794641e-06, "loss": 1.4695, "step": 4616 }, { "epoch": 0.9464944649446494, "grad_norm": 0.434582645115717, "learning_rate": 1.4981575074870635e-06, "loss": 1.4816, "step": 4617 }, { "epoch": 0.9466994669946699, "grad_norm": 0.3983691978804189, "learning_rate": 1.4867278429308018e-06, "loss": 1.4416, "step": 4618 }, { "epoch": 0.9469044690446905, "grad_norm": 0.40122954706122715, "learning_rate": 1.4753416182506363e-06, "loss": 1.472, "step": 4619 }, { "epoch": 0.947109471094711, "grad_norm": 0.4340474962124046, "learning_rate": 1.4639988384673842e-06, "loss": 1.5026, "step": 4620 }, { "epoch": 0.9473144731447315, "grad_norm": 0.42517397394612205, "learning_rate": 1.4526995085826888e-06, "loss": 1.5021, "step": 4621 }, { "epoch": 0.947519475194752, "grad_norm": 0.4030591178317383, "learning_rate": 1.4414436335790538e-06, "loss": 1.4922, "step": 4622 }, { "epoch": 0.9477244772447725, "grad_norm": 0.3886046334608059, "learning_rate": 1.4302312184197974e-06, "loss": 1.4632, "step": 4623 }, { "epoch": 0.947929479294793, "grad_norm": 0.40698460786524404, "learning_rate": 1.4190622680490873e-06, "loss": 1.4846, "step": 4624 }, { "epoch": 0.9481344813448135, "grad_norm": 0.401137773814169, "learning_rate": 1.4079367873919059e-06, "loss": 1.4501, "step": 4625 }, { "epoch": 0.948339483394834, "grad_norm": 0.4222612713513452, "learning_rate": 1.396854781354129e-06, "loss": 1.4041, "step": 4626 }, { "epoch": 0.9485444854448545, "grad_norm": 0.3859821651219978, "learning_rate": 1.3858162548223807e-06, "loss": 1.4602, "step": 4627 }, { "epoch": 0.948749487494875, "grad_norm": 0.4538824938074429, "learning_rate": 1.3748212126641569e-06, "loss": 1.5304, "step": 4628 }, { "epoch": 0.9489544895448955, "grad_norm": 0.3996911278353664, "learning_rate": 1.3638696597277679e-06, "loss": 1.467, "step": 4629 }, { "epoch": 0.949159491594916, "grad_norm": 0.4175656017910606, "learning_rate": 1.3529616008423506e-06, "loss": 1.491, "step": 4630 }, { "epoch": 0.9493644936449365, "grad_norm": 0.39212672558834766, "learning_rate": 1.3420970408178913e-06, "loss": 1.494, "step": 4631 }, { "epoch": 0.949569495694957, "grad_norm": 0.42694031876878125, "learning_rate": 1.331275984445135e-06, "loss": 1.5484, "step": 4632 }, { "epoch": 0.9497744977449775, "grad_norm": 0.4112477329366927, "learning_rate": 1.3204984364956874e-06, "loss": 1.4851, "step": 4633 }, { "epoch": 0.949979499794998, "grad_norm": 0.42367707448676856, "learning_rate": 1.3097644017219468e-06, "loss": 1.5204, "step": 4634 }, { "epoch": 0.9501845018450185, "grad_norm": 0.4244367293639013, "learning_rate": 1.2990738848571494e-06, "loss": 1.4597, "step": 4635 }, { "epoch": 0.950389503895039, "grad_norm": 0.46912325802861377, "learning_rate": 1.288426890615335e-06, "loss": 1.5255, "step": 4636 }, { "epoch": 0.9505945059450595, "grad_norm": 0.4526593116073533, "learning_rate": 1.2778234236913155e-06, "loss": 1.4554, "step": 4637 }, { "epoch": 0.95079950799508, "grad_norm": 0.43815685939747784, "learning_rate": 1.2672634887607614e-06, "loss": 1.4792, "step": 4638 }, { "epoch": 0.9510045100451004, "grad_norm": 0.43757886235224264, "learning_rate": 1.256747090480115e-06, "loss": 1.4616, "step": 4639 }, { "epoch": 0.951209512095121, "grad_norm": 0.40585951067855275, "learning_rate": 1.2462742334866218e-06, "loss": 1.5298, "step": 4640 }, { "epoch": 0.9514145141451414, "grad_norm": 0.4651036546841763, "learning_rate": 1.2358449223983547e-06, "loss": 1.5235, "step": 4641 }, { "epoch": 0.9516195161951619, "grad_norm": 0.48241150699281893, "learning_rate": 1.2254591618141686e-06, "loss": 1.508, "step": 4642 }, { "epoch": 0.9518245182451824, "grad_norm": 0.4260358302361235, "learning_rate": 1.2151169563136888e-06, "loss": 1.4758, "step": 4643 }, { "epoch": 0.9520295202952029, "grad_norm": 0.39055915828560206, "learning_rate": 1.2048183104573563e-06, "loss": 1.4866, "step": 4644 }, { "epoch": 0.9522345223452234, "grad_norm": 0.4335721694380325, "learning_rate": 1.1945632287864383e-06, "loss": 1.4905, "step": 4645 }, { "epoch": 0.9524395243952439, "grad_norm": 0.4446314618662908, "learning_rate": 1.1843517158229288e-06, "loss": 1.4474, "step": 4646 }, { "epoch": 0.9526445264452644, "grad_norm": 0.41233027881018863, "learning_rate": 1.1741837760696595e-06, "loss": 1.4767, "step": 4647 }, { "epoch": 0.9528495284952849, "grad_norm": 0.4340371603042303, "learning_rate": 1.1640594140102213e-06, "loss": 1.4323, "step": 4648 }, { "epoch": 0.9530545305453054, "grad_norm": 0.4366875349622588, "learning_rate": 1.1539786341089876e-06, "loss": 1.4748, "step": 4649 }, { "epoch": 0.9532595325953259, "grad_norm": 0.45489270178308966, "learning_rate": 1.143941440811147e-06, "loss": 1.5774, "step": 4650 }, { "epoch": 0.9534645346453464, "grad_norm": 0.39725661609282786, "learning_rate": 1.1339478385426262e-06, "loss": 1.4378, "step": 4651 }, { "epoch": 0.9536695366953669, "grad_norm": 0.4253148346347174, "learning_rate": 1.1239978317101662e-06, "loss": 1.4674, "step": 4652 }, { "epoch": 0.9538745387453874, "grad_norm": 0.4306067235387545, "learning_rate": 1.114091424701258e-06, "loss": 1.5214, "step": 4653 }, { "epoch": 0.9540795407954079, "grad_norm": 0.4015431996269364, "learning_rate": 1.1042286218841736e-06, "loss": 1.5021, "step": 4654 }, { "epoch": 0.9542845428454284, "grad_norm": 0.4722780786684882, "learning_rate": 1.0944094276079675e-06, "loss": 1.4955, "step": 4655 }, { "epoch": 0.9544895448954489, "grad_norm": 0.4313275113611618, "learning_rate": 1.0846338462024541e-06, "loss": 1.4772, "step": 4656 }, { "epoch": 0.9546945469454694, "grad_norm": 0.43272841039882, "learning_rate": 1.0749018819782297e-06, "loss": 1.4857, "step": 4657 }, { "epoch": 0.9548995489954899, "grad_norm": 0.43086454656866835, "learning_rate": 1.0652135392266394e-06, "loss": 1.5047, "step": 4658 }, { "epoch": 0.9551045510455105, "grad_norm": 0.3803434158641607, "learning_rate": 1.055568822219799e-06, "loss": 1.478, "step": 4659 }, { "epoch": 0.955309553095531, "grad_norm": 0.4023398093422054, "learning_rate": 1.0459677352106067e-06, "loss": 1.4558, "step": 4660 }, { "epoch": 0.9555145551455515, "grad_norm": 0.39676256246624875, "learning_rate": 1.036410282432687e-06, "loss": 1.5035, "step": 4661 }, { "epoch": 0.955719557195572, "grad_norm": 0.4255844743479049, "learning_rate": 1.0268964681004356e-06, "loss": 1.459, "step": 4662 }, { "epoch": 0.9559245592455925, "grad_norm": 0.448379318965663, "learning_rate": 1.0174262964090408e-06, "loss": 1.5082, "step": 4663 }, { "epoch": 0.956129561295613, "grad_norm": 0.3985825209838232, "learning_rate": 1.0079997715343959e-06, "loss": 1.4754, "step": 4664 }, { "epoch": 0.9563345633456335, "grad_norm": 0.43058774652209775, "learning_rate": 9.986168976331866e-07, "loss": 1.4845, "step": 4665 }, { "epoch": 0.956539565395654, "grad_norm": 0.3794112998420427, "learning_rate": 9.892776788428149e-07, "loss": 1.4555, "step": 4666 }, { "epoch": 0.9567445674456745, "grad_norm": 0.40235105057860043, "learning_rate": 9.79982119281464e-07, "loss": 1.4668, "step": 4667 }, { "epoch": 0.956949569495695, "grad_norm": 0.3792903886393411, "learning_rate": 9.707302230480553e-07, "loss": 1.5232, "step": 4668 }, { "epoch": 0.9571545715457155, "grad_norm": 0.41672471938273514, "learning_rate": 9.615219942222474e-07, "loss": 1.4407, "step": 4669 }, { "epoch": 0.957359573595736, "grad_norm": 0.42753742021273905, "learning_rate": 9.523574368644483e-07, "loss": 1.5231, "step": 4670 }, { "epoch": 0.9575645756457565, "grad_norm": 0.39432676701692954, "learning_rate": 9.432365550158251e-07, "loss": 1.4167, "step": 4671 }, { "epoch": 0.957769577695777, "grad_norm": 0.42024170158675034, "learning_rate": 9.341593526982606e-07, "loss": 1.4961, "step": 4672 }, { "epoch": 0.9579745797457975, "grad_norm": 0.41605481659116417, "learning_rate": 9.251258339143864e-07, "loss": 1.515, "step": 4673 }, { "epoch": 0.958179581795818, "grad_norm": 0.41798006553937567, "learning_rate": 9.161360026475829e-07, "loss": 1.4719, "step": 4674 }, { "epoch": 0.9583845838458385, "grad_norm": 0.4066078260983904, "learning_rate": 9.071898628619569e-07, "loss": 1.4714, "step": 4675 }, { "epoch": 0.958589585895859, "grad_norm": 0.40563154867170487, "learning_rate": 8.982874185023415e-07, "loss": 1.4738, "step": 4676 }, { "epoch": 0.9587945879458795, "grad_norm": 0.4023707092057499, "learning_rate": 8.89428673494308e-07, "loss": 1.4289, "step": 4677 }, { "epoch": 0.9589995899959, "grad_norm": 0.4294599983077088, "learning_rate": 8.80613631744176e-07, "loss": 1.4649, "step": 4678 }, { "epoch": 0.9592045920459205, "grad_norm": 0.42785494858028356, "learning_rate": 8.718422971389584e-07, "loss": 1.4668, "step": 4679 }, { "epoch": 0.959409594095941, "grad_norm": 0.4218830364066306, "learning_rate": 8.63114673546428e-07, "loss": 1.5228, "step": 4680 }, { "epoch": 0.9596145961459615, "grad_norm": 0.427049384612903, "learning_rate": 8.544307648150729e-07, "loss": 1.4823, "step": 4681 }, { "epoch": 0.959819598195982, "grad_norm": 0.45085133029000424, "learning_rate": 8.45790574774108e-07, "loss": 1.5397, "step": 4682 }, { "epoch": 0.9600246002460024, "grad_norm": 0.42410111273667755, "learning_rate": 8.371941072334299e-07, "loss": 1.4962, "step": 4683 }, { "epoch": 0.9602296022960229, "grad_norm": 0.41234092871602834, "learning_rate": 8.286413659837288e-07, "loss": 1.5249, "step": 4684 }, { "epoch": 0.9604346043460434, "grad_norm": 0.4120491097994356, "learning_rate": 8.201323547963547e-07, "loss": 1.4971, "step": 4685 }, { "epoch": 0.9606396063960639, "grad_norm": 0.418622126870649, "learning_rate": 8.116670774234058e-07, "loss": 1.4799, "step": 4686 }, { "epoch": 0.9608446084460844, "grad_norm": 0.4239843854934979, "learning_rate": 8.032455375976744e-07, "loss": 1.6007, "step": 4687 }, { "epoch": 0.9610496104961049, "grad_norm": 0.36989986033355643, "learning_rate": 7.948677390326786e-07, "loss": 1.4977, "step": 4688 }, { "epoch": 0.9612546125461254, "grad_norm": 0.3757542674850543, "learning_rate": 7.865336854226524e-07, "loss": 1.4711, "step": 4689 }, { "epoch": 0.9614596145961459, "grad_norm": 0.4709794100569463, "learning_rate": 7.782433804425227e-07, "loss": 1.4974, "step": 4690 }, { "epoch": 0.9616646166461664, "grad_norm": 0.41583974910439636, "learning_rate": 7.699968277479652e-07, "loss": 1.4301, "step": 4691 }, { "epoch": 0.9618696186961869, "grad_norm": 0.4265693210591978, "learning_rate": 7.617940309753047e-07, "loss": 1.4841, "step": 4692 }, { "epoch": 0.9620746207462074, "grad_norm": 0.40143824887457236, "learning_rate": 7.536349937416143e-07, "loss": 1.5417, "step": 4693 }, { "epoch": 0.9622796227962279, "grad_norm": 0.4361494700553997, "learning_rate": 7.455197196446495e-07, "loss": 1.4935, "step": 4694 }, { "epoch": 0.9624846248462484, "grad_norm": 0.41389207386012566, "learning_rate": 7.374482122628922e-07, "loss": 1.4911, "step": 4695 }, { "epoch": 0.9626896268962689, "grad_norm": 0.4182687843116768, "learning_rate": 7.294204751555067e-07, "loss": 1.4344, "step": 4696 }, { "epoch": 0.9628946289462894, "grad_norm": 0.41550251715708325, "learning_rate": 7.214365118623611e-07, "loss": 1.4439, "step": 4697 }, { "epoch": 0.9630996309963099, "grad_norm": 0.39844119833303215, "learning_rate": 7.134963259040172e-07, "loss": 1.5171, "step": 4698 }, { "epoch": 0.9633046330463305, "grad_norm": 0.4374964656548517, "learning_rate": 7.055999207817188e-07, "loss": 1.4718, "step": 4699 }, { "epoch": 0.963509635096351, "grad_norm": 0.42050163623983694, "learning_rate": 6.977472999774471e-07, "loss": 1.5295, "step": 4700 }, { "epoch": 0.9637146371463715, "grad_norm": 0.42229955545226977, "learning_rate": 6.899384669538433e-07, "loss": 1.4836, "step": 4701 }, { "epoch": 0.963919639196392, "grad_norm": 0.4253691954860886, "learning_rate": 6.821734251542533e-07, "loss": 1.5109, "step": 4702 }, { "epoch": 0.9641246412464125, "grad_norm": 0.4510284699224987, "learning_rate": 6.744521780026936e-07, "loss": 1.5006, "step": 4703 }, { "epoch": 0.964329643296433, "grad_norm": 0.40629505372498653, "learning_rate": 6.667747289038851e-07, "loss": 1.465, "step": 4704 }, { "epoch": 0.9645346453464535, "grad_norm": 0.4127960730083617, "learning_rate": 6.591410812432419e-07, "loss": 1.4636, "step": 4705 }, { "epoch": 0.964739647396474, "grad_norm": 0.3797148049331649, "learning_rate": 6.515512383868605e-07, "loss": 1.448, "step": 4706 }, { "epoch": 0.9649446494464945, "grad_norm": 0.39124058476983414, "learning_rate": 6.440052036815081e-07, "loss": 1.477, "step": 4707 }, { "epoch": 0.965149651496515, "grad_norm": 0.39564688560498384, "learning_rate": 6.365029804546452e-07, "loss": 1.51, "step": 4708 }, { "epoch": 0.9653546535465355, "grad_norm": 0.40609961326935906, "learning_rate": 6.290445720144144e-07, "loss": 1.4874, "step": 4709 }, { "epoch": 0.965559655596556, "grad_norm": 0.4606722567468486, "learning_rate": 6.216299816496185e-07, "loss": 1.4852, "step": 4710 }, { "epoch": 0.9657646576465765, "grad_norm": 0.4190921743361472, "learning_rate": 6.142592126297753e-07, "loss": 1.4367, "step": 4711 }, { "epoch": 0.965969659696597, "grad_norm": 0.43796809660481956, "learning_rate": 6.069322682050516e-07, "loss": 1.4808, "step": 4712 }, { "epoch": 0.9661746617466175, "grad_norm": 0.4149639021606424, "learning_rate": 5.996491516062963e-07, "loss": 1.4742, "step": 4713 }, { "epoch": 0.966379663796638, "grad_norm": 0.38530540213969156, "learning_rate": 5.924098660450295e-07, "loss": 1.5079, "step": 4714 }, { "epoch": 0.9665846658466585, "grad_norm": 0.4184697124524958, "learning_rate": 5.852144147134531e-07, "loss": 1.5402, "step": 4715 }, { "epoch": 0.966789667896679, "grad_norm": 0.4456164181437981, "learning_rate": 5.780628007844401e-07, "loss": 1.4669, "step": 4716 }, { "epoch": 0.9669946699466995, "grad_norm": 0.4454321522803196, "learning_rate": 5.709550274115128e-07, "loss": 1.5302, "step": 4717 }, { "epoch": 0.96719967199672, "grad_norm": 0.4414207175769216, "learning_rate": 5.638910977288747e-07, "loss": 1.4703, "step": 4718 }, { "epoch": 0.9674046740467405, "grad_norm": 0.4659810004199522, "learning_rate": 5.568710148514122e-07, "loss": 1.5372, "step": 4719 }, { "epoch": 0.967609676096761, "grad_norm": 0.4106833448285871, "learning_rate": 5.498947818746602e-07, "loss": 1.4944, "step": 4720 }, { "epoch": 0.9678146781467815, "grad_norm": 0.37805980965060865, "learning_rate": 5.429624018748136e-07, "loss": 1.4878, "step": 4721 }, { "epoch": 0.968019680196802, "grad_norm": 0.42625196752407407, "learning_rate": 5.360738779087382e-07, "loss": 1.4859, "step": 4722 }, { "epoch": 0.9682246822468225, "grad_norm": 0.43478381156727663, "learning_rate": 5.292292130139598e-07, "loss": 1.5147, "step": 4723 }, { "epoch": 0.968429684296843, "grad_norm": 0.4002246285727642, "learning_rate": 5.22428410208664e-07, "loss": 1.4095, "step": 4724 }, { "epoch": 0.9686346863468634, "grad_norm": 0.38366057516835866, "learning_rate": 5.156714724917078e-07, "loss": 1.4476, "step": 4725 }, { "epoch": 0.968839688396884, "grad_norm": 0.4337626103727618, "learning_rate": 5.089584028425743e-07, "loss": 1.5306, "step": 4726 }, { "epoch": 0.9690446904469044, "grad_norm": 0.40252390973139135, "learning_rate": 5.022892042214289e-07, "loss": 1.4648, "step": 4727 }, { "epoch": 0.9692496924969249, "grad_norm": 0.38813388482940925, "learning_rate": 4.956638795690971e-07, "loss": 1.4158, "step": 4728 }, { "epoch": 0.9694546945469454, "grad_norm": 0.37727259572021915, "learning_rate": 4.890824318070419e-07, "loss": 1.4199, "step": 4729 }, { "epoch": 0.9696596965969659, "grad_norm": 0.4256671856842206, "learning_rate": 4.825448638373642e-07, "loss": 1.4919, "step": 4730 }, { "epoch": 0.9698646986469864, "grad_norm": 0.47119797029152144, "learning_rate": 4.7605117854284676e-07, "loss": 1.5173, "step": 4731 }, { "epoch": 0.9700697006970069, "grad_norm": 0.414881165668109, "learning_rate": 4.6960137878692134e-07, "loss": 1.5033, "step": 4732 }, { "epoch": 0.9702747027470274, "grad_norm": 0.38346657561480907, "learning_rate": 4.631954674136463e-07, "loss": 1.5122, "step": 4733 }, { "epoch": 0.9704797047970479, "grad_norm": 0.4258394678264236, "learning_rate": 4.568334472477287e-07, "loss": 1.4828, "step": 4734 }, { "epoch": 0.9706847068470684, "grad_norm": 0.4156653037754415, "learning_rate": 4.505153210945467e-07, "loss": 1.5297, "step": 4735 }, { "epoch": 0.9708897088970889, "grad_norm": 0.43030859498255475, "learning_rate": 4.442410917400941e-07, "loss": 1.5136, "step": 4736 }, { "epoch": 0.9710947109471094, "grad_norm": 0.4424199234429633, "learning_rate": 4.380107619510243e-07, "loss": 1.4358, "step": 4737 }, { "epoch": 0.9712997129971299, "grad_norm": 0.39855901712653724, "learning_rate": 4.318243344746287e-07, "loss": 1.4813, "step": 4738 }, { "epoch": 0.9715047150471505, "grad_norm": 0.4160824523828196, "learning_rate": 4.2568181203884725e-07, "loss": 1.4974, "step": 4739 }, { "epoch": 0.971709717097171, "grad_norm": 0.44560965593088836, "learning_rate": 4.195831973522468e-07, "loss": 1.4886, "step": 4740 }, { "epoch": 0.9719147191471915, "grad_norm": 0.4410754690295941, "learning_rate": 4.1352849310404287e-07, "loss": 1.5187, "step": 4741 }, { "epoch": 0.972119721197212, "grad_norm": 0.401686922030873, "learning_rate": 4.0751770196407745e-07, "loss": 1.4874, "step": 4742 }, { "epoch": 0.9723247232472325, "grad_norm": 0.4375544509971017, "learning_rate": 4.015508265828527e-07, "loss": 1.5499, "step": 4743 }, { "epoch": 0.972529725297253, "grad_norm": 0.4071523224615726, "learning_rate": 3.95627869591475e-07, "loss": 1.4506, "step": 4744 }, { "epoch": 0.9727347273472735, "grad_norm": 0.38503078562721504, "learning_rate": 3.8974883360169966e-07, "loss": 1.4753, "step": 4745 }, { "epoch": 0.972939729397294, "grad_norm": 0.4491544181139598, "learning_rate": 3.8391372120591964e-07, "loss": 1.4853, "step": 4746 }, { "epoch": 0.9731447314473145, "grad_norm": 0.41571417430065877, "learning_rate": 3.7812253497715445e-07, "loss": 1.5118, "step": 4747 }, { "epoch": 0.973349733497335, "grad_norm": 0.5144917204932947, "learning_rate": 3.7237527746905034e-07, "loss": 1.4622, "step": 4748 }, { "epoch": 0.9735547355473555, "grad_norm": 0.48049674908218165, "learning_rate": 3.6667195121589115e-07, "loss": 1.5274, "step": 4749 }, { "epoch": 0.973759737597376, "grad_norm": 0.4126431333133009, "learning_rate": 3.6101255873257634e-07, "loss": 1.4432, "step": 4750 }, { "epoch": 0.9739647396473965, "grad_norm": 0.42070510748039774, "learning_rate": 3.553971025146541e-07, "loss": 1.5106, "step": 4751 }, { "epoch": 0.974169741697417, "grad_norm": 0.4427413259298651, "learning_rate": 3.498255850382659e-07, "loss": 1.5155, "step": 4752 }, { "epoch": 0.9743747437474375, "grad_norm": 0.4420356563874356, "learning_rate": 3.4429800876021324e-07, "loss": 1.523, "step": 4753 }, { "epoch": 0.974579745797458, "grad_norm": 0.39347009097955726, "learning_rate": 3.38814376117913e-07, "loss": 1.4368, "step": 4754 }, { "epoch": 0.9747847478474785, "grad_norm": 0.34641017895382636, "learning_rate": 3.3337468952937546e-07, "loss": 1.351, "step": 4755 }, { "epoch": 0.974989749897499, "grad_norm": 0.41593376371532825, "learning_rate": 3.2797895139327074e-07, "loss": 1.4888, "step": 4756 }, { "epoch": 0.9751947519475195, "grad_norm": 0.4328847199977505, "learning_rate": 3.226271640888734e-07, "loss": 1.4963, "step": 4757 }, { "epoch": 0.97539975399754, "grad_norm": 0.3986398509048014, "learning_rate": 3.173193299760735e-07, "loss": 1.5254, "step": 4758 }, { "epoch": 0.9756047560475605, "grad_norm": 0.4188488332973968, "learning_rate": 3.1205545139538775e-07, "loss": 1.5053, "step": 4759 }, { "epoch": 0.975809758097581, "grad_norm": 0.40041212468923243, "learning_rate": 3.0683553066793715e-07, "loss": 1.4585, "step": 4760 }, { "epoch": 0.9760147601476015, "grad_norm": 0.4201099386305583, "learning_rate": 3.0165957009549163e-07, "loss": 1.5044, "step": 4761 }, { "epoch": 0.976219762197622, "grad_norm": 0.41623579088204754, "learning_rate": 2.9652757196039216e-07, "loss": 1.4784, "step": 4762 }, { "epoch": 0.9764247642476425, "grad_norm": 0.43686370709521993, "learning_rate": 2.9143953852562856e-07, "loss": 1.4495, "step": 4763 }, { "epoch": 0.976629766297663, "grad_norm": 0.44562982363815135, "learning_rate": 2.86395472034795e-07, "loss": 1.4928, "step": 4764 }, { "epoch": 0.9768347683476835, "grad_norm": 0.3599273158143983, "learning_rate": 2.813953747120901e-07, "loss": 1.401, "step": 4765 }, { "epoch": 0.977039770397704, "grad_norm": 0.4292916923316039, "learning_rate": 2.7643924876232794e-07, "loss": 1.5005, "step": 4766 }, { "epoch": 0.9772447724477245, "grad_norm": 0.4145690105930325, "learning_rate": 2.715270963709382e-07, "loss": 1.5025, "step": 4767 }, { "epoch": 0.977449774497745, "grad_norm": 0.3959551114664484, "learning_rate": 2.6665891970395487e-07, "loss": 1.4598, "step": 4768 }, { "epoch": 0.9776547765477654, "grad_norm": 0.46354582222510776, "learning_rate": 2.618347209080163e-07, "loss": 1.5248, "step": 4769 }, { "epoch": 0.977859778597786, "grad_norm": 0.4060424677087937, "learning_rate": 2.570545021103876e-07, "loss": 1.4273, "step": 4770 }, { "epoch": 0.9780647806478064, "grad_norm": 0.3675780316713538, "learning_rate": 2.52318265418916e-07, "loss": 1.4516, "step": 4771 }, { "epoch": 0.9782697826978269, "grad_norm": 0.43631992772309053, "learning_rate": 2.476260129220864e-07, "loss": 1.491, "step": 4772 }, { "epoch": 0.9784747847478474, "grad_norm": 0.4515275514757521, "learning_rate": 2.429777466889438e-07, "loss": 1.4841, "step": 4773 }, { "epoch": 0.9786797867978679, "grad_norm": 0.4189350600473239, "learning_rate": 2.3837346876918187e-07, "loss": 1.4818, "step": 4774 }, { "epoch": 0.9788847888478884, "grad_norm": 0.4449532304337471, "learning_rate": 2.338131811930655e-07, "loss": 1.4825, "step": 4775 }, { "epoch": 0.9790897908979089, "grad_norm": 0.3975122126376216, "learning_rate": 2.2929688597147503e-07, "loss": 1.4481, "step": 4776 }, { "epoch": 0.9792947929479294, "grad_norm": 0.43959619992170107, "learning_rate": 2.2482458509590632e-07, "loss": 1.4507, "step": 4777 }, { "epoch": 0.9794997949979499, "grad_norm": 0.45413831139471583, "learning_rate": 2.203962805384263e-07, "loss": 1.4736, "step": 4778 }, { "epoch": 0.9797047970479705, "grad_norm": 0.41676242646049017, "learning_rate": 2.1601197425170638e-07, "loss": 1.4565, "step": 4779 }, { "epoch": 0.979909799097991, "grad_norm": 0.4636818934551148, "learning_rate": 2.116716681690556e-07, "loss": 1.5103, "step": 4780 }, { "epoch": 0.9801148011480115, "grad_norm": 0.44965586992755496, "learning_rate": 2.073753642043208e-07, "loss": 1.4701, "step": 4781 }, { "epoch": 0.980319803198032, "grad_norm": 0.38922209473061264, "learning_rate": 2.0312306425200877e-07, "loss": 1.457, "step": 4782 }, { "epoch": 0.9805248052480525, "grad_norm": 0.432587203511188, "learning_rate": 1.989147701871641e-07, "loss": 1.4537, "step": 4783 }, { "epoch": 0.980729807298073, "grad_norm": 0.405219187548281, "learning_rate": 1.9475048386546902e-07, "loss": 1.4231, "step": 4784 }, { "epoch": 0.9809348093480935, "grad_norm": 0.4308760032805639, "learning_rate": 1.906302071231658e-07, "loss": 1.4876, "step": 4785 }, { "epoch": 0.981139811398114, "grad_norm": 0.4550905711420411, "learning_rate": 1.8655394177712336e-07, "loss": 1.4504, "step": 4786 }, { "epoch": 0.9813448134481345, "grad_norm": 0.42541104380579137, "learning_rate": 1.8252168962479277e-07, "loss": 1.5008, "step": 4787 }, { "epoch": 0.981549815498155, "grad_norm": 0.3829103190083179, "learning_rate": 1.7853345244420726e-07, "loss": 1.4594, "step": 4788 }, { "epoch": 0.9817548175481755, "grad_norm": 0.4159772737147079, "learning_rate": 1.7458923199400457e-07, "loss": 1.4758, "step": 4789 }, { "epoch": 0.981959819598196, "grad_norm": 0.4486934333537602, "learning_rate": 1.7068903001339342e-07, "loss": 1.4261, "step": 4790 }, { "epoch": 0.9821648216482165, "grad_norm": 0.42196303577083094, "learning_rate": 1.6683284822219813e-07, "loss": 1.4501, "step": 4791 }, { "epoch": 0.982369823698237, "grad_norm": 0.41955075559599125, "learning_rate": 1.6302068832081407e-07, "loss": 1.4533, "step": 4792 }, { "epoch": 0.9825748257482575, "grad_norm": 0.4007589889956423, "learning_rate": 1.5925255199024104e-07, "loss": 1.4553, "step": 4793 }, { "epoch": 0.982779827798278, "grad_norm": 0.40973927985609965, "learning_rate": 1.5552844089203877e-07, "loss": 1.4301, "step": 4794 }, { "epoch": 0.9829848298482985, "grad_norm": 0.4290011542576445, "learning_rate": 1.518483566683826e-07, "loss": 1.4963, "step": 4795 }, { "epoch": 0.983189831898319, "grad_norm": 0.39700694003378206, "learning_rate": 1.4821230094200777e-07, "loss": 1.4408, "step": 4796 }, { "epoch": 0.9833948339483395, "grad_norm": 0.4468864256479169, "learning_rate": 1.446202753162762e-07, "loss": 1.4466, "step": 4797 }, { "epoch": 0.98359983599836, "grad_norm": 0.3857946333915545, "learning_rate": 1.4107228137508754e-07, "loss": 1.452, "step": 4798 }, { "epoch": 0.9838048380483805, "grad_norm": 0.4088873547940262, "learning_rate": 1.3756832068294588e-07, "loss": 1.4245, "step": 4799 }, { "epoch": 0.984009840098401, "grad_norm": 0.41877538837429634, "learning_rate": 1.3410839478493754e-07, "loss": 1.5136, "step": 4800 }, { "epoch": 0.9842148421484215, "grad_norm": 0.3890165045313717, "learning_rate": 1.3069250520675314e-07, "loss": 1.417, "step": 4801 }, { "epoch": 0.984419844198442, "grad_norm": 0.41222355070570493, "learning_rate": 1.2732065345462118e-07, "loss": 1.4547, "step": 4802 }, { "epoch": 0.9846248462484625, "grad_norm": 0.4149785733436685, "learning_rate": 1.2399284101538566e-07, "loss": 1.4654, "step": 4803 }, { "epoch": 0.984829848298483, "grad_norm": 0.4480904638205778, "learning_rate": 1.2070906935646165e-07, "loss": 1.4858, "step": 4804 }, { "epoch": 0.9850348503485035, "grad_norm": 0.3965491757927723, "learning_rate": 1.1746933992584642e-07, "loss": 1.4479, "step": 4805 }, { "epoch": 0.985239852398524, "grad_norm": 0.4519263292626218, "learning_rate": 1.1427365415209723e-07, "loss": 1.4798, "step": 4806 }, { "epoch": 0.9854448544485445, "grad_norm": 0.4286642561964939, "learning_rate": 1.1112201344438689e-07, "loss": 1.4771, "step": 4807 }, { "epoch": 0.985649856498565, "grad_norm": 0.40895973603783775, "learning_rate": 1.0801441919242594e-07, "loss": 1.444, "step": 4808 }, { "epoch": 0.9858548585485855, "grad_norm": 0.45609641739854473, "learning_rate": 1.0495087276654048e-07, "loss": 1.5198, "step": 4809 }, { "epoch": 0.986059860598606, "grad_norm": 0.444124756879576, "learning_rate": 1.0193137551759436e-07, "loss": 1.4881, "step": 4810 }, { "epoch": 0.9862648626486265, "grad_norm": 0.4255164966940527, "learning_rate": 9.895592877706695e-08, "loss": 1.4958, "step": 4811 }, { "epoch": 0.986469864698647, "grad_norm": 0.39245316606553554, "learning_rate": 9.602453385699762e-08, "loss": 1.4554, "step": 4812 }, { "epoch": 0.9866748667486674, "grad_norm": 0.39263972070907505, "learning_rate": 9.313719204997462e-08, "loss": 1.4835, "step": 4813 }, { "epoch": 0.9868798687986879, "grad_norm": 0.4833814181160818, "learning_rate": 9.029390462921284e-08, "loss": 1.518, "step": 4814 }, { "epoch": 0.9870848708487084, "grad_norm": 0.4062334357911581, "learning_rate": 8.749467284845381e-08, "loss": 1.4927, "step": 4815 }, { "epoch": 0.9872898728987289, "grad_norm": 0.41795026215893444, "learning_rate": 8.47394979420324e-08, "loss": 1.4618, "step": 4816 }, { "epoch": 0.9874948749487494, "grad_norm": 0.4159697756371367, "learning_rate": 8.202838112486566e-08, "loss": 1.4918, "step": 4817 }, { "epoch": 0.9876998769987699, "grad_norm": 0.40522404226150216, "learning_rate": 7.936132359243064e-08, "loss": 1.469, "step": 4818 }, { "epoch": 0.9879048790487905, "grad_norm": 0.3864043385033911, "learning_rate": 7.673832652077551e-08, "loss": 1.4025, "step": 4819 }, { "epoch": 0.988109881098811, "grad_norm": 0.4079666960997385, "learning_rate": 7.415939106651948e-08, "loss": 1.4871, "step": 4820 }, { "epoch": 0.9883148831488315, "grad_norm": 0.37265363065866836, "learning_rate": 7.162451836685291e-08, "loss": 1.4625, "step": 4821 }, { "epoch": 0.988519885198852, "grad_norm": 0.4075121362007614, "learning_rate": 6.913370953955945e-08, "loss": 1.4401, "step": 4822 }, { "epoch": 0.9887248872488725, "grad_norm": 0.36437806832101377, "learning_rate": 6.66869656829494e-08, "loss": 1.4126, "step": 4823 }, { "epoch": 0.988929889298893, "grad_norm": 0.41253358562330233, "learning_rate": 6.428428787593754e-08, "loss": 1.4728, "step": 4824 }, { "epoch": 0.9891348913489135, "grad_norm": 0.4309973749764567, "learning_rate": 6.192567717798747e-08, "loss": 1.4838, "step": 4825 }, { "epoch": 0.989339893398934, "grad_norm": 0.400551961981492, "learning_rate": 5.961113462915613e-08, "loss": 1.4772, "step": 4826 }, { "epoch": 0.9895448954489545, "grad_norm": 0.42354319590893913, "learning_rate": 5.734066125003823e-08, "loss": 1.4291, "step": 4827 }, { "epoch": 0.989749897498975, "grad_norm": 0.4432523371427619, "learning_rate": 5.5114258041799596e-08, "loss": 1.4644, "step": 4828 }, { "epoch": 0.9899548995489955, "grad_norm": 0.44130520419860686, "learning_rate": 5.293192598621044e-08, "loss": 1.4911, "step": 4829 }, { "epoch": 0.990159901599016, "grad_norm": 0.4159219994022452, "learning_rate": 5.079366604555658e-08, "loss": 1.419, "step": 4830 }, { "epoch": 0.9903649036490365, "grad_norm": 0.3898255846453458, "learning_rate": 4.869947916271711e-08, "loss": 1.4689, "step": 4831 }, { "epoch": 0.990569905699057, "grad_norm": 0.43950526834446957, "learning_rate": 4.6649366261142243e-08, "loss": 1.4529, "step": 4832 }, { "epoch": 0.9907749077490775, "grad_norm": 0.40683627802699623, "learning_rate": 4.4643328244831085e-08, "loss": 1.4632, "step": 4833 }, { "epoch": 0.990979909799098, "grad_norm": 0.4198115460878348, "learning_rate": 4.2681365998364916e-08, "loss": 1.4538, "step": 4834 }, { "epoch": 0.9911849118491185, "grad_norm": 0.3978429993506763, "learning_rate": 4.076348038687394e-08, "loss": 1.4341, "step": 4835 }, { "epoch": 0.991389913899139, "grad_norm": 0.4568937319876624, "learning_rate": 3.888967225604834e-08, "loss": 1.5123, "step": 4836 }, { "epoch": 0.9915949159491595, "grad_norm": 0.3697961838990177, "learning_rate": 3.705994243217159e-08, "loss": 1.4678, "step": 4837 }, { "epoch": 0.99179991799918, "grad_norm": 0.37875620032364216, "learning_rate": 3.5274291722053877e-08, "loss": 1.4273, "step": 4838 }, { "epoch": 0.9920049200492005, "grad_norm": 0.3851465693595728, "learning_rate": 3.353272091309867e-08, "loss": 1.4268, "step": 4839 }, { "epoch": 0.992209922099221, "grad_norm": 0.43416259824483977, "learning_rate": 3.183523077324724e-08, "loss": 1.5395, "step": 4840 }, { "epoch": 0.9924149241492415, "grad_norm": 0.3877409953073631, "learning_rate": 3.018182205102304e-08, "loss": 1.4802, "step": 4841 }, { "epoch": 0.992619926199262, "grad_norm": 0.43388487465277253, "learning_rate": 2.8572495475509555e-08, "loss": 1.5293, "step": 4842 }, { "epoch": 0.9928249282492825, "grad_norm": 0.4090449695298792, "learning_rate": 2.7007251756339113e-08, "loss": 1.488, "step": 4843 }, { "epoch": 0.993029930299303, "grad_norm": 0.4054120593911369, "learning_rate": 2.5486091583715176e-08, "loss": 1.4195, "step": 4844 }, { "epoch": 0.9932349323493235, "grad_norm": 0.42585963293773976, "learning_rate": 2.400901562840119e-08, "loss": 1.477, "step": 4845 }, { "epoch": 0.993439934399344, "grad_norm": 0.40256109649364047, "learning_rate": 2.2576024541720587e-08, "loss": 1.4544, "step": 4846 }, { "epoch": 0.9936449364493645, "grad_norm": 0.42821230925476933, "learning_rate": 2.1187118955556805e-08, "loss": 1.5493, "step": 4847 }, { "epoch": 0.993849938499385, "grad_norm": 0.42042153418732986, "learning_rate": 1.9842299482353277e-08, "loss": 1.5471, "step": 4848 }, { "epoch": 0.9940549405494055, "grad_norm": 0.3768683670469088, "learning_rate": 1.8541566715113424e-08, "loss": 1.395, "step": 4849 }, { "epoch": 0.994259942599426, "grad_norm": 0.4369814528504174, "learning_rate": 1.7284921227400662e-08, "loss": 1.5094, "step": 4850 }, { "epoch": 0.9944649446494465, "grad_norm": 0.3841243443062922, "learning_rate": 1.607236357333841e-08, "loss": 1.4953, "step": 4851 }, { "epoch": 0.994669946699467, "grad_norm": 0.4315875249745099, "learning_rate": 1.4903894287610075e-08, "loss": 1.5117, "step": 4852 }, { "epoch": 0.9948749487494875, "grad_norm": 0.3728734240249131, "learning_rate": 1.3779513885470163e-08, "loss": 1.4637, "step": 4853 }, { "epoch": 0.995079950799508, "grad_norm": 0.4321237298735864, "learning_rate": 1.2699222862699867e-08, "loss": 1.4809, "step": 4854 }, { "epoch": 0.9952849528495284, "grad_norm": 0.40885449785880884, "learning_rate": 1.166302169566258e-08, "loss": 1.5055, "step": 4855 }, { "epoch": 0.995489954899549, "grad_norm": 0.4496581284435422, "learning_rate": 1.0670910841281689e-08, "loss": 1.5178, "step": 4856 }, { "epoch": 0.9956949569495694, "grad_norm": 0.42099984898874393, "learning_rate": 9.722890737029478e-09, "loss": 1.4703, "step": 4857 }, { "epoch": 0.9958999589995899, "grad_norm": 0.43382081968722686, "learning_rate": 8.818961800949321e-09, "loss": 1.4281, "step": 4858 }, { "epoch": 0.9961049610496105, "grad_norm": 0.4338470114430421, "learning_rate": 7.959124431622389e-09, "loss": 1.5158, "step": 4859 }, { "epoch": 0.996309963099631, "grad_norm": 0.4132278308700588, "learning_rate": 7.1433790082009456e-09, "loss": 1.5049, "step": 4860 }, { "epoch": 0.9965149651496515, "grad_norm": 0.43225488319709143, "learning_rate": 6.371725890386149e-09, "loss": 1.4922, "step": 4861 }, { "epoch": 0.996719967199672, "grad_norm": 0.41067329939197106, "learning_rate": 5.644165418450253e-09, "loss": 1.4199, "step": 4862 }, { "epoch": 0.9969249692496925, "grad_norm": 0.3685522577926695, "learning_rate": 4.960697913203305e-09, "loss": 1.4375, "step": 4863 }, { "epoch": 0.997129971299713, "grad_norm": 0.452913901435384, "learning_rate": 4.321323676037547e-09, "loss": 1.5359, "step": 4864 }, { "epoch": 0.9973349733497335, "grad_norm": 0.3931547223995384, "learning_rate": 3.726042988883016e-09, "loss": 1.5742, "step": 4865 }, { "epoch": 0.997539975399754, "grad_norm": 0.39494048963820855, "learning_rate": 3.1748561142297407e-09, "loss": 1.4847, "step": 4866 }, { "epoch": 0.9977449774497745, "grad_norm": 0.4412776056160834, "learning_rate": 2.6677632951277454e-09, "loss": 1.4449, "step": 4867 }, { "epoch": 0.997949979499795, "grad_norm": 0.41272395540200857, "learning_rate": 2.2047647551759474e-09, "loss": 1.4359, "step": 4868 }, { "epoch": 0.9981549815498155, "grad_norm": 0.40679571389735636, "learning_rate": 1.7858606985443616e-09, "loss": 1.3884, "step": 4869 }, { "epoch": 0.998359983599836, "grad_norm": 0.41502356742027807, "learning_rate": 1.411051309940792e-09, "loss": 1.481, "step": 4870 }, { "epoch": 0.9985649856498565, "grad_norm": 0.43328652001773216, "learning_rate": 1.08033675464414e-09, "loss": 1.516, "step": 4871 }, { "epoch": 0.998769987699877, "grad_norm": 0.40995482546673423, "learning_rate": 7.937171784933029e-10, "loss": 1.5096, "step": 4872 }, { "epoch": 0.9989749897498975, "grad_norm": 0.40505711826231994, "learning_rate": 5.51192707864967e-10, "loss": 1.532, "step": 4873 }, { "epoch": 0.999179991799918, "grad_norm": 0.4058151772170968, "learning_rate": 3.527634496958143e-10, "loss": 1.4582, "step": 4874 }, { "epoch": 0.9993849938499385, "grad_norm": 0.3900087938588259, "learning_rate": 1.9842949149362355e-10, "loss": 1.468, "step": 4875 }, { "epoch": 0.999589995899959, "grad_norm": 0.42760432050046554, "learning_rate": 8.819090131506613e-11, "loss": 1.4997, "step": 4876 }, { "epoch": 0.9997949979499795, "grad_norm": 0.44867171037446835, "learning_rate": 2.204772775460384e-11, "loss": 1.5601, "step": 4877 }, { "epoch": 1.0, "grad_norm": 0.4227223601971009, "learning_rate": 0.0, "loss": 1.4835, "step": 4878 }, { "epoch": 1.0, "step": 4878, "total_flos": 1495789164199936.0, "train_loss": 1.5954388801364343, "train_runtime": 53290.4384, "train_samples_per_second": 11.715, "train_steps_per_second": 0.092 } ], "logging_steps": 1.0, "max_steps": 4878, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1495789164199936.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }