{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.99978561321766, "eval_steps": 500, "global_step": 8744, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045735846899252504, "grad_norm": 3.3551074933769645, "learning_rate": 3.802281368821293e-07, "loss": 19.0829, "step": 2 }, { "epoch": 0.0009147169379850501, "grad_norm": 2.4466283018584645, "learning_rate": 7.604562737642586e-07, "loss": 18.7639, "step": 4 }, { "epoch": 0.001372075406977575, "grad_norm": 2.388886860789934, "learning_rate": 1.140684410646388e-06, "loss": 18.6481, "step": 6 }, { "epoch": 0.0018294338759701002, "grad_norm": 3.1920876493442645, "learning_rate": 1.5209125475285172e-06, "loss": 19.0513, "step": 8 }, { "epoch": 0.002286792344962625, "grad_norm": 2.569783164944378, "learning_rate": 1.9011406844106463e-06, "loss": 18.2638, "step": 10 }, { "epoch": 0.00274415081395515, "grad_norm": 2.866119102553481, "learning_rate": 2.281368821292776e-06, "loss": 18.2513, "step": 12 }, { "epoch": 0.0032015092829476752, "grad_norm": 2.4832582662958087, "learning_rate": 2.661596958174905e-06, "loss": 17.33, "step": 14 }, { "epoch": 0.0036588677519402003, "grad_norm": 2.2252221071633493, "learning_rate": 3.0418250950570345e-06, "loss": 16.791, "step": 16 }, { "epoch": 0.004116226220932726, "grad_norm": 2.24883653441383, "learning_rate": 3.422053231939164e-06, "loss": 16.0591, "step": 18 }, { "epoch": 0.00457358468992525, "grad_norm": 1.5751616349614006, "learning_rate": 3.8022813688212926e-06, "loss": 15.3076, "step": 20 }, { "epoch": 0.005030943158917775, "grad_norm": 1.513069830498891, "learning_rate": 4.182509505703422e-06, "loss": 14.6188, "step": 22 }, { "epoch": 0.0054883016279103, "grad_norm": 1.5002287849155653, "learning_rate": 4.562737642585552e-06, "loss": 13.3248, "step": 24 }, { "epoch": 0.005945660096902825, "grad_norm": 1.6086836876351434, "learning_rate": 4.942965779467681e-06, "loss": 12.2963, "step": 26 }, { "epoch": 0.0064030185658953504, "grad_norm": 1.3165910630712214, "learning_rate": 5.32319391634981e-06, "loss": 11.4807, "step": 28 }, { "epoch": 0.0068603770348878755, "grad_norm": 1.0017817835952896, "learning_rate": 5.703422053231939e-06, "loss": 10.635, "step": 30 }, { "epoch": 0.007317735503880401, "grad_norm": 0.7852701512556258, "learning_rate": 6.083650190114069e-06, "loss": 10.4066, "step": 32 }, { "epoch": 0.007775093972872926, "grad_norm": 0.8226240740476531, "learning_rate": 6.4638783269961976e-06, "loss": 9.6691, "step": 34 }, { "epoch": 0.008232452441865452, "grad_norm": 0.7922591242868186, "learning_rate": 6.844106463878328e-06, "loss": 9.396, "step": 36 }, { "epoch": 0.008689810910857975, "grad_norm": 0.9343505409637223, "learning_rate": 7.224334600760456e-06, "loss": 8.9035, "step": 38 }, { "epoch": 0.0091471693798505, "grad_norm": 0.7207006438918167, "learning_rate": 7.604562737642585e-06, "loss": 8.841, "step": 40 }, { "epoch": 0.009604527848843025, "grad_norm": 0.756708934341404, "learning_rate": 7.984790874524716e-06, "loss": 8.3431, "step": 42 }, { "epoch": 0.01006188631783555, "grad_norm": 0.6519910578494815, "learning_rate": 8.365019011406844e-06, "loss": 7.9055, "step": 44 }, { "epoch": 0.010519244786828075, "grad_norm": 0.7246919980717632, "learning_rate": 8.745247148288973e-06, "loss": 7.7243, "step": 46 }, { "epoch": 0.0109766032558206, "grad_norm": 0.796192175833102, "learning_rate": 9.125475285171103e-06, "loss": 7.4463, "step": 48 }, { "epoch": 0.011433961724813126, "grad_norm": 0.8738701883150013, "learning_rate": 9.505703422053232e-06, "loss": 7.4786, "step": 50 }, { "epoch": 0.01189132019380565, "grad_norm": 0.5781242783000669, "learning_rate": 9.885931558935362e-06, "loss": 7.3652, "step": 52 }, { "epoch": 0.012348678662798176, "grad_norm": 0.5999037675376463, "learning_rate": 1.0266159695817491e-05, "loss": 6.789, "step": 54 }, { "epoch": 0.012806037131790701, "grad_norm": 0.8365792992621859, "learning_rate": 1.064638783269962e-05, "loss": 6.7222, "step": 56 }, { "epoch": 0.013263395600783226, "grad_norm": 0.6583466868515011, "learning_rate": 1.102661596958175e-05, "loss": 6.4043, "step": 58 }, { "epoch": 0.013720754069775751, "grad_norm": 0.6272154404860449, "learning_rate": 1.1406844106463879e-05, "loss": 6.3377, "step": 60 }, { "epoch": 0.014178112538768276, "grad_norm": 0.7496098457243506, "learning_rate": 1.1787072243346007e-05, "loss": 6.2573, "step": 62 }, { "epoch": 0.014635471007760801, "grad_norm": 0.7376157395014756, "learning_rate": 1.2167300380228138e-05, "loss": 6.3595, "step": 64 }, { "epoch": 0.015092829476753326, "grad_norm": 0.5269377098911081, "learning_rate": 1.2547528517110266e-05, "loss": 5.9776, "step": 66 }, { "epoch": 0.015550187945745851, "grad_norm": 0.8701796247041962, "learning_rate": 1.2927756653992395e-05, "loss": 5.9556, "step": 68 }, { "epoch": 0.016007546414738377, "grad_norm": 0.564648525115316, "learning_rate": 1.3307984790874526e-05, "loss": 6.1945, "step": 70 }, { "epoch": 0.016464904883730903, "grad_norm": 0.6025454676950756, "learning_rate": 1.3688212927756656e-05, "loss": 5.894, "step": 72 }, { "epoch": 0.016922263352723427, "grad_norm": 0.6467459803376329, "learning_rate": 1.4068441064638785e-05, "loss": 5.6313, "step": 74 }, { "epoch": 0.01737962182171595, "grad_norm": 0.6176661072226, "learning_rate": 1.4448669201520912e-05, "loss": 5.898, "step": 76 }, { "epoch": 0.017836980290708477, "grad_norm": 0.8033614111250089, "learning_rate": 1.4828897338403042e-05, "loss": 5.7163, "step": 78 }, { "epoch": 0.018294338759701, "grad_norm": 0.8898793160663013, "learning_rate": 1.520912547528517e-05, "loss": 5.6707, "step": 80 }, { "epoch": 0.018751697228693527, "grad_norm": 0.5913529698347065, "learning_rate": 1.55893536121673e-05, "loss": 5.636, "step": 82 }, { "epoch": 0.01920905569768605, "grad_norm": 1.144825087184718, "learning_rate": 1.596958174904943e-05, "loss": 5.5091, "step": 84 }, { "epoch": 0.019666414166678577, "grad_norm": 0.5920023545688767, "learning_rate": 1.634980988593156e-05, "loss": 5.3832, "step": 86 }, { "epoch": 0.0201237726356711, "grad_norm": 0.5521565574693933, "learning_rate": 1.673003802281369e-05, "loss": 5.5723, "step": 88 }, { "epoch": 0.020581131104663627, "grad_norm": 0.709965596977381, "learning_rate": 1.711026615969582e-05, "loss": 5.3693, "step": 90 }, { "epoch": 0.02103848957365615, "grad_norm": 0.5249987420566565, "learning_rate": 1.7490494296577946e-05, "loss": 5.2865, "step": 92 }, { "epoch": 0.021495848042648678, "grad_norm": 0.6178345728680745, "learning_rate": 1.7870722433460076e-05, "loss": 5.2769, "step": 94 }, { "epoch": 0.0219532065116412, "grad_norm": 0.5841268665407164, "learning_rate": 1.8250950570342207e-05, "loss": 5.5173, "step": 96 }, { "epoch": 0.022410564980633728, "grad_norm": 0.5123147769331493, "learning_rate": 1.8631178707224337e-05, "loss": 5.4546, "step": 98 }, { "epoch": 0.02286792344962625, "grad_norm": 0.5366883170704826, "learning_rate": 1.9011406844106464e-05, "loss": 5.5489, "step": 100 }, { "epoch": 0.023325281918618778, "grad_norm": 0.7364425367921082, "learning_rate": 1.9391634980988594e-05, "loss": 5.1592, "step": 102 }, { "epoch": 0.0237826403876113, "grad_norm": 0.679542964607147, "learning_rate": 1.9771863117870725e-05, "loss": 5.1572, "step": 104 }, { "epoch": 0.024239998856603828, "grad_norm": 0.5944147404362704, "learning_rate": 2.0152091254752852e-05, "loss": 5.1168, "step": 106 }, { "epoch": 0.02469735732559635, "grad_norm": 0.5638869458850444, "learning_rate": 2.0532319391634982e-05, "loss": 4.9088, "step": 108 }, { "epoch": 0.02515471579458888, "grad_norm": 0.5532212556179864, "learning_rate": 2.0912547528517112e-05, "loss": 4.8944, "step": 110 }, { "epoch": 0.025612074263581402, "grad_norm": 0.5510164664935576, "learning_rate": 2.129277566539924e-05, "loss": 5.1309, "step": 112 }, { "epoch": 0.02606943273257393, "grad_norm": 0.6346661298977294, "learning_rate": 2.167300380228137e-05, "loss": 5.054, "step": 114 }, { "epoch": 0.026526791201566452, "grad_norm": 0.5614472976147326, "learning_rate": 2.20532319391635e-05, "loss": 4.9479, "step": 116 }, { "epoch": 0.02698414967055898, "grad_norm": 0.7170783797216205, "learning_rate": 2.2433460076045627e-05, "loss": 5.2157, "step": 118 }, { "epoch": 0.027441508139551502, "grad_norm": 0.549744895814218, "learning_rate": 2.2813688212927758e-05, "loss": 5.195, "step": 120 }, { "epoch": 0.02789886660854403, "grad_norm": 0.574415834820402, "learning_rate": 2.3193916349809888e-05, "loss": 5.1553, "step": 122 }, { "epoch": 0.028356225077536552, "grad_norm": 0.7486947941369442, "learning_rate": 2.3574144486692015e-05, "loss": 5.039, "step": 124 }, { "epoch": 0.02881358354652908, "grad_norm": 0.5227356090653664, "learning_rate": 2.3954372623574145e-05, "loss": 4.9132, "step": 126 }, { "epoch": 0.029270942015521603, "grad_norm": 0.8515458061611789, "learning_rate": 2.4334600760456276e-05, "loss": 5.069, "step": 128 }, { "epoch": 0.02972830048451413, "grad_norm": 0.5435823774121269, "learning_rate": 2.4714828897338406e-05, "loss": 5.0201, "step": 130 }, { "epoch": 0.030185658953506653, "grad_norm": 0.548658840367355, "learning_rate": 2.5095057034220533e-05, "loss": 4.7683, "step": 132 }, { "epoch": 0.03064301742249918, "grad_norm": 0.6142400790567315, "learning_rate": 2.5475285171102663e-05, "loss": 4.7301, "step": 134 }, { "epoch": 0.031100375891491703, "grad_norm": 0.6493586236909965, "learning_rate": 2.585551330798479e-05, "loss": 5.1568, "step": 136 }, { "epoch": 0.03155773436048423, "grad_norm": 0.6771311205114989, "learning_rate": 2.6235741444866924e-05, "loss": 4.9326, "step": 138 }, { "epoch": 0.03201509282947675, "grad_norm": 0.6390251325954068, "learning_rate": 2.661596958174905e-05, "loss": 5.0001, "step": 140 }, { "epoch": 0.032472451298469276, "grad_norm": 0.6196904193234224, "learning_rate": 2.6996197718631178e-05, "loss": 5.0021, "step": 142 }, { "epoch": 0.03292980976746181, "grad_norm": 0.6001830695927343, "learning_rate": 2.7376425855513312e-05, "loss": 4.7421, "step": 144 }, { "epoch": 0.03338716823645433, "grad_norm": 0.5647240750702043, "learning_rate": 2.775665399239544e-05, "loss": 4.881, "step": 146 }, { "epoch": 0.033844526705446853, "grad_norm": 0.6160585484036919, "learning_rate": 2.813688212927757e-05, "loss": 4.852, "step": 148 }, { "epoch": 0.03430188517443938, "grad_norm": 0.570753475738382, "learning_rate": 2.8517110266159696e-05, "loss": 4.8589, "step": 150 }, { "epoch": 0.0347592436434319, "grad_norm": 0.6562712075575997, "learning_rate": 2.8897338403041823e-05, "loss": 4.7183, "step": 152 }, { "epoch": 0.03521660211242443, "grad_norm": 0.6687562550047219, "learning_rate": 2.9277566539923957e-05, "loss": 4.9118, "step": 154 }, { "epoch": 0.035673960581416954, "grad_norm": 0.6428338327609473, "learning_rate": 2.9657794676806084e-05, "loss": 4.8735, "step": 156 }, { "epoch": 0.03613131905040948, "grad_norm": 0.8700461824261346, "learning_rate": 3.0038022813688214e-05, "loss": 5.0099, "step": 158 }, { "epoch": 0.036588677519402, "grad_norm": 0.6726606122981983, "learning_rate": 3.041825095057034e-05, "loss": 4.9975, "step": 160 }, { "epoch": 0.03704603598839453, "grad_norm": 0.4943442999208959, "learning_rate": 3.0798479087452475e-05, "loss": 4.8269, "step": 162 }, { "epoch": 0.037503394457387054, "grad_norm": 0.6036826337821003, "learning_rate": 3.11787072243346e-05, "loss": 4.8756, "step": 164 }, { "epoch": 0.03796075292637958, "grad_norm": 0.5694769133509333, "learning_rate": 3.155893536121673e-05, "loss": 4.6217, "step": 166 }, { "epoch": 0.0384181113953721, "grad_norm": 0.6120219574165127, "learning_rate": 3.193916349809886e-05, "loss": 4.8661, "step": 168 }, { "epoch": 0.03887546986436463, "grad_norm": 0.6410642141724815, "learning_rate": 3.231939163498099e-05, "loss": 4.7803, "step": 170 }, { "epoch": 0.039332828333357155, "grad_norm": 0.5866802594071411, "learning_rate": 3.269961977186312e-05, "loss": 4.6984, "step": 172 }, { "epoch": 0.03979018680234968, "grad_norm": 0.5090616561292535, "learning_rate": 3.307984790874525e-05, "loss": 4.6926, "step": 174 }, { "epoch": 0.0402475452713422, "grad_norm": 0.7196982389800881, "learning_rate": 3.346007604562738e-05, "loss": 4.6526, "step": 176 }, { "epoch": 0.04070490374033473, "grad_norm": 0.6078650277786868, "learning_rate": 3.384030418250951e-05, "loss": 4.6726, "step": 178 }, { "epoch": 0.041162262209327255, "grad_norm": 0.5992365619344687, "learning_rate": 3.422053231939164e-05, "loss": 4.8182, "step": 180 }, { "epoch": 0.04161962067831978, "grad_norm": 0.5097015165159354, "learning_rate": 3.4600760456273765e-05, "loss": 4.71, "step": 182 }, { "epoch": 0.0420769791473123, "grad_norm": 0.5458849801194426, "learning_rate": 3.498098859315589e-05, "loss": 4.6454, "step": 184 }, { "epoch": 0.04253433761630483, "grad_norm": 0.668299096943703, "learning_rate": 3.5361216730038026e-05, "loss": 4.6913, "step": 186 }, { "epoch": 0.042991696085297355, "grad_norm": 0.604609612074097, "learning_rate": 3.574144486692015e-05, "loss": 4.7287, "step": 188 }, { "epoch": 0.04344905455428988, "grad_norm": 0.533025131978597, "learning_rate": 3.612167300380228e-05, "loss": 4.754, "step": 190 }, { "epoch": 0.0439064130232824, "grad_norm": 0.5830220231923342, "learning_rate": 3.6501901140684413e-05, "loss": 4.8934, "step": 192 }, { "epoch": 0.04436377149227493, "grad_norm": 0.5480072018000458, "learning_rate": 3.688212927756654e-05, "loss": 4.6093, "step": 194 }, { "epoch": 0.044821129961267456, "grad_norm": 0.5668487924660356, "learning_rate": 3.7262357414448674e-05, "loss": 4.7475, "step": 196 }, { "epoch": 0.04527848843025998, "grad_norm": 0.8064183628626438, "learning_rate": 3.76425855513308e-05, "loss": 4.847, "step": 198 }, { "epoch": 0.0457358468992525, "grad_norm": 0.5474699870901841, "learning_rate": 3.802281368821293e-05, "loss": 4.6355, "step": 200 }, { "epoch": 0.04619320536824503, "grad_norm": 0.5485706743663757, "learning_rate": 3.840304182509506e-05, "loss": 4.6615, "step": 202 }, { "epoch": 0.046650563837237556, "grad_norm": 0.6388451577391124, "learning_rate": 3.878326996197719e-05, "loss": 4.7315, "step": 204 }, { "epoch": 0.04710792230623008, "grad_norm": 0.5535674001617467, "learning_rate": 3.916349809885932e-05, "loss": 4.8283, "step": 206 }, { "epoch": 0.0475652807752226, "grad_norm": 0.5316587446433735, "learning_rate": 3.954372623574145e-05, "loss": 4.8591, "step": 208 }, { "epoch": 0.04802263924421513, "grad_norm": 0.5351430236964024, "learning_rate": 3.9923954372623577e-05, "loss": 4.8031, "step": 210 }, { "epoch": 0.048479997713207656, "grad_norm": 0.4313764670045017, "learning_rate": 4.0304182509505703e-05, "loss": 4.6255, "step": 212 }, { "epoch": 0.04893735618220018, "grad_norm": 0.445034576155005, "learning_rate": 4.068441064638783e-05, "loss": 4.4574, "step": 214 }, { "epoch": 0.0493947146511927, "grad_norm": 0.6056213678776625, "learning_rate": 4.1064638783269964e-05, "loss": 4.7162, "step": 216 }, { "epoch": 0.049852073120185234, "grad_norm": 0.5620002115697225, "learning_rate": 4.144486692015209e-05, "loss": 4.6948, "step": 218 }, { "epoch": 0.05030943158917776, "grad_norm": 0.6072070364329526, "learning_rate": 4.1825095057034225e-05, "loss": 4.734, "step": 220 }, { "epoch": 0.05076679005817028, "grad_norm": 0.6371284991866716, "learning_rate": 4.220532319391635e-05, "loss": 4.5288, "step": 222 }, { "epoch": 0.051224148527162804, "grad_norm": 0.5619896321799719, "learning_rate": 4.258555133079848e-05, "loss": 4.7133, "step": 224 }, { "epoch": 0.05168150699615533, "grad_norm": 0.4604786674329856, "learning_rate": 4.296577946768061e-05, "loss": 4.7385, "step": 226 }, { "epoch": 0.05213886546514786, "grad_norm": 0.6751657008838486, "learning_rate": 4.334600760456274e-05, "loss": 4.4741, "step": 228 }, { "epoch": 0.05259622393414038, "grad_norm": 0.47658466850374326, "learning_rate": 4.3726235741444873e-05, "loss": 4.645, "step": 230 }, { "epoch": 0.053053582403132904, "grad_norm": 0.44431777749312595, "learning_rate": 4.4106463878327e-05, "loss": 4.582, "step": 232 }, { "epoch": 0.05351094087212543, "grad_norm": 0.6001571985786964, "learning_rate": 4.448669201520913e-05, "loss": 4.5858, "step": 234 }, { "epoch": 0.05396829934111796, "grad_norm": 0.5510019441888783, "learning_rate": 4.4866920152091254e-05, "loss": 4.7075, "step": 236 }, { "epoch": 0.05442565781011048, "grad_norm": 0.5474898015992852, "learning_rate": 4.524714828897338e-05, "loss": 4.7387, "step": 238 }, { "epoch": 0.054883016279103004, "grad_norm": 0.5616492089106279, "learning_rate": 4.5627376425855515e-05, "loss": 4.5684, "step": 240 }, { "epoch": 0.05534037474809553, "grad_norm": 0.5422436654849554, "learning_rate": 4.600760456273764e-05, "loss": 4.6231, "step": 242 }, { "epoch": 0.05579773321708806, "grad_norm": 0.5380050127547484, "learning_rate": 4.6387832699619776e-05, "loss": 4.6868, "step": 244 }, { "epoch": 0.05625509168608058, "grad_norm": 0.46041679065869534, "learning_rate": 4.67680608365019e-05, "loss": 4.6583, "step": 246 }, { "epoch": 0.056712450155073105, "grad_norm": 0.46667830106446745, "learning_rate": 4.714828897338403e-05, "loss": 4.8297, "step": 248 }, { "epoch": 0.05716980862406563, "grad_norm": 0.47207402396331233, "learning_rate": 4.7528517110266163e-05, "loss": 4.7188, "step": 250 }, { "epoch": 0.05762716709305816, "grad_norm": 0.5672996578244436, "learning_rate": 4.790874524714829e-05, "loss": 4.576, "step": 252 }, { "epoch": 0.05808452556205068, "grad_norm": 0.4771316912628846, "learning_rate": 4.8288973384030424e-05, "loss": 4.5866, "step": 254 }, { "epoch": 0.058541884031043205, "grad_norm": 0.5197782287950196, "learning_rate": 4.866920152091255e-05, "loss": 4.6504, "step": 256 }, { "epoch": 0.05899924250003573, "grad_norm": 0.5593502817073599, "learning_rate": 4.904942965779468e-05, "loss": 4.5811, "step": 258 }, { "epoch": 0.05945660096902826, "grad_norm": 0.4692667814525092, "learning_rate": 4.942965779467681e-05, "loss": 4.4996, "step": 260 }, { "epoch": 0.05991395943802078, "grad_norm": 0.4495506221719415, "learning_rate": 4.980988593155894e-05, "loss": 4.7288, "step": 262 }, { "epoch": 0.060371317907013305, "grad_norm": 0.704236614169144, "learning_rate": 4.999999828479661e-05, "loss": 4.688, "step": 264 }, { "epoch": 0.06082867637600583, "grad_norm": 0.6309222730336195, "learning_rate": 4.999998456317088e-05, "loss": 4.4899, "step": 266 }, { "epoch": 0.06128603484499836, "grad_norm": 0.41830254532958183, "learning_rate": 4.9999957119926955e-05, "loss": 4.4786, "step": 268 }, { "epoch": 0.06174339331399088, "grad_norm": 0.4955200873430737, "learning_rate": 4.9999915955079904e-05, "loss": 4.4559, "step": 270 }, { "epoch": 0.062200751782983406, "grad_norm": 0.5880073862309305, "learning_rate": 4.99998610686523e-05, "loss": 4.616, "step": 272 }, { "epoch": 0.06265811025197593, "grad_norm": 0.5230240968583285, "learning_rate": 4.9999792460674284e-05, "loss": 4.8392, "step": 274 }, { "epoch": 0.06311546872096846, "grad_norm": 0.5047335671578798, "learning_rate": 4.9999710131183505e-05, "loss": 4.5103, "step": 276 }, { "epoch": 0.06357282718996098, "grad_norm": 0.4784568750555856, "learning_rate": 4.999961408022517e-05, "loss": 4.6313, "step": 278 }, { "epoch": 0.0640301856589535, "grad_norm": 0.5347782665734103, "learning_rate": 4.9999504307851975e-05, "loss": 4.6283, "step": 280 }, { "epoch": 0.06448754412794604, "grad_norm": 0.5155766782370667, "learning_rate": 4.999938081412417e-05, "loss": 4.5535, "step": 282 }, { "epoch": 0.06494490259693855, "grad_norm": 0.4752095356359774, "learning_rate": 4.999924359910956e-05, "loss": 4.4611, "step": 284 }, { "epoch": 0.06540226106593108, "grad_norm": 0.52950865390323, "learning_rate": 4.999909266288344e-05, "loss": 4.671, "step": 286 }, { "epoch": 0.06585961953492361, "grad_norm": 0.4876960146360037, "learning_rate": 4.999892800552864e-05, "loss": 4.7854, "step": 288 }, { "epoch": 0.06631697800391613, "grad_norm": 0.47255883907619983, "learning_rate": 4.999874962713557e-05, "loss": 4.5277, "step": 290 }, { "epoch": 0.06677433647290866, "grad_norm": 1.2386194964509987, "learning_rate": 4.9998557527802115e-05, "loss": 4.5802, "step": 292 }, { "epoch": 0.06723169494190118, "grad_norm": 0.47261929958712445, "learning_rate": 4.9998351707633704e-05, "loss": 4.4238, "step": 294 }, { "epoch": 0.06768905341089371, "grad_norm": 0.4485979517544277, "learning_rate": 4.9998132166743314e-05, "loss": 4.5022, "step": 296 }, { "epoch": 0.06814641187988624, "grad_norm": 0.5243514039524344, "learning_rate": 4.999789890525145e-05, "loss": 4.5813, "step": 298 }, { "epoch": 0.06860377034887875, "grad_norm": 0.5912712205058703, "learning_rate": 4.999765192328613e-05, "loss": 4.3929, "step": 300 }, { "epoch": 0.06906112881787128, "grad_norm": 0.5267842185389456, "learning_rate": 4.999739122098291e-05, "loss": 4.4278, "step": 302 }, { "epoch": 0.0695184872868638, "grad_norm": 0.573139421911795, "learning_rate": 4.99971167984849e-05, "loss": 4.5442, "step": 304 }, { "epoch": 0.06997584575585633, "grad_norm": 0.5065432017355053, "learning_rate": 4.9996828655942706e-05, "loss": 4.6063, "step": 306 }, { "epoch": 0.07043320422484886, "grad_norm": 0.46417065782503303, "learning_rate": 4.9996526793514484e-05, "loss": 4.3368, "step": 308 }, { "epoch": 0.07089056269384138, "grad_norm": 0.45941098455022866, "learning_rate": 4.999621121136591e-05, "loss": 4.6093, "step": 310 }, { "epoch": 0.07134792116283391, "grad_norm": 0.5017652939431354, "learning_rate": 4.99958819096702e-05, "loss": 4.3646, "step": 312 }, { "epoch": 0.07180527963182644, "grad_norm": 0.5119334054586856, "learning_rate": 4.999553888860809e-05, "loss": 4.3873, "step": 314 }, { "epoch": 0.07226263810081895, "grad_norm": 0.45166338945211004, "learning_rate": 4.999518214836787e-05, "loss": 4.6178, "step": 316 }, { "epoch": 0.07271999656981148, "grad_norm": 0.47605119698442805, "learning_rate": 4.999481168914533e-05, "loss": 4.7236, "step": 318 }, { "epoch": 0.073177355038804, "grad_norm": 0.41586279412425764, "learning_rate": 4.9994427511143805e-05, "loss": 4.3503, "step": 320 }, { "epoch": 0.07363471350779653, "grad_norm": 0.4412356190323672, "learning_rate": 4.999402961457415e-05, "loss": 4.6399, "step": 322 }, { "epoch": 0.07409207197678906, "grad_norm": 0.3868326924783169, "learning_rate": 4.999361799965476e-05, "loss": 4.5127, "step": 324 }, { "epoch": 0.07454943044578158, "grad_norm": 0.7118563321205401, "learning_rate": 4.999319266661156e-05, "loss": 4.5033, "step": 326 }, { "epoch": 0.07500678891477411, "grad_norm": 0.3907153215890235, "learning_rate": 4.9992753615678e-05, "loss": 4.5105, "step": 328 }, { "epoch": 0.07546414738376664, "grad_norm": 0.46149004130503657, "learning_rate": 4.999230084709505e-05, "loss": 4.6453, "step": 330 }, { "epoch": 0.07592150585275916, "grad_norm": 0.3820056781908504, "learning_rate": 4.999183436111123e-05, "loss": 4.6401, "step": 332 }, { "epoch": 0.07637886432175169, "grad_norm": 0.5910260307369181, "learning_rate": 4.9991354157982586e-05, "loss": 4.4435, "step": 334 }, { "epoch": 0.0768362227907442, "grad_norm": 0.4998400551557144, "learning_rate": 4.999086023797266e-05, "loss": 4.6003, "step": 336 }, { "epoch": 0.07729358125973673, "grad_norm": 0.4371622689351097, "learning_rate": 4.999035260135256e-05, "loss": 4.4732, "step": 338 }, { "epoch": 0.07775093972872926, "grad_norm": 0.4093455522443536, "learning_rate": 4.998983124840092e-05, "loss": 4.4184, "step": 340 }, { "epoch": 0.07820829819772178, "grad_norm": 0.5067086767465206, "learning_rate": 4.998929617940388e-05, "loss": 4.7203, "step": 342 }, { "epoch": 0.07866565666671431, "grad_norm": 0.4752084694956521, "learning_rate": 4.998874739465511e-05, "loss": 4.4428, "step": 344 }, { "epoch": 0.07912301513570684, "grad_norm": 0.4543340108556913, "learning_rate": 4.998818489445585e-05, "loss": 4.4543, "step": 346 }, { "epoch": 0.07958037360469936, "grad_norm": 0.44432176839852794, "learning_rate": 4.9987608679114816e-05, "loss": 4.4305, "step": 348 }, { "epoch": 0.08003773207369189, "grad_norm": 1.9485503065405165, "learning_rate": 4.9987018748948264e-05, "loss": 4.5212, "step": 350 }, { "epoch": 0.0804950905426844, "grad_norm": 0.5509956970462221, "learning_rate": 4.998641510428001e-05, "loss": 4.6024, "step": 352 }, { "epoch": 0.08095244901167693, "grad_norm": 0.41483061099752455, "learning_rate": 4.998579774544136e-05, "loss": 4.4685, "step": 354 }, { "epoch": 0.08140980748066946, "grad_norm": 0.48517931024906413, "learning_rate": 4.9985166672771156e-05, "loss": 4.6583, "step": 356 }, { "epoch": 0.08186716594966198, "grad_norm": 0.7602689590874913, "learning_rate": 4.998452188661578e-05, "loss": 4.561, "step": 358 }, { "epoch": 0.08232452441865451, "grad_norm": 0.44561966123766916, "learning_rate": 4.998386338732913e-05, "loss": 4.5582, "step": 360 }, { "epoch": 0.08278188288764704, "grad_norm": 0.42897362796491334, "learning_rate": 4.998319117527264e-05, "loss": 4.3794, "step": 362 }, { "epoch": 0.08323924135663956, "grad_norm": 0.5344457155697423, "learning_rate": 4.998250525081525e-05, "loss": 4.5247, "step": 364 }, { "epoch": 0.08369659982563209, "grad_norm": 0.5427483584775837, "learning_rate": 4.998180561433345e-05, "loss": 4.6686, "step": 366 }, { "epoch": 0.0841539582946246, "grad_norm": 0.48810630716145986, "learning_rate": 4.998109226621124e-05, "loss": 4.5503, "step": 368 }, { "epoch": 0.08461131676361713, "grad_norm": 0.49007703528335533, "learning_rate": 4.998036520684016e-05, "loss": 4.4104, "step": 370 }, { "epoch": 0.08506867523260966, "grad_norm": 0.4561770028023576, "learning_rate": 4.997962443661926e-05, "loss": 4.5392, "step": 372 }, { "epoch": 0.08552603370160218, "grad_norm": 0.41245327742716453, "learning_rate": 4.997886995595512e-05, "loss": 4.343, "step": 374 }, { "epoch": 0.08598339217059471, "grad_norm": 0.40237006314351836, "learning_rate": 4.997810176526186e-05, "loss": 4.5761, "step": 376 }, { "epoch": 0.08644075063958723, "grad_norm": 0.5343506311136086, "learning_rate": 4.99773198649611e-05, "loss": 4.4244, "step": 378 }, { "epoch": 0.08689810910857976, "grad_norm": 0.4822196536614066, "learning_rate": 4.9976524255482014e-05, "loss": 4.6193, "step": 380 }, { "epoch": 0.08735546757757229, "grad_norm": 0.43283191114063974, "learning_rate": 4.997571493726127e-05, "loss": 4.4416, "step": 382 }, { "epoch": 0.0878128260465648, "grad_norm": 0.4303038424893233, "learning_rate": 4.9974891910743076e-05, "loss": 4.4111, "step": 384 }, { "epoch": 0.08827018451555733, "grad_norm": 0.40492897561802965, "learning_rate": 4.997405517637917e-05, "loss": 4.4454, "step": 386 }, { "epoch": 0.08872754298454986, "grad_norm": 0.4666318082846702, "learning_rate": 4.99732047346288e-05, "loss": 4.3857, "step": 388 }, { "epoch": 0.08918490145354238, "grad_norm": 0.4100910316568464, "learning_rate": 4.997234058595874e-05, "loss": 4.5297, "step": 390 }, { "epoch": 0.08964225992253491, "grad_norm": 0.4980737417117768, "learning_rate": 4.997146273084331e-05, "loss": 4.4436, "step": 392 }, { "epoch": 0.09009961839152743, "grad_norm": 0.5489806102549226, "learning_rate": 4.9970571169764315e-05, "loss": 4.3563, "step": 394 }, { "epoch": 0.09055697686051996, "grad_norm": 0.43959780536651355, "learning_rate": 4.996966590321111e-05, "loss": 4.3979, "step": 396 }, { "epoch": 0.09101433532951249, "grad_norm": 0.48903647953805796, "learning_rate": 4.996874693168055e-05, "loss": 4.4742, "step": 398 }, { "epoch": 0.091471693798505, "grad_norm": 0.5060562869907603, "learning_rate": 4.996781425567705e-05, "loss": 4.5327, "step": 400 }, { "epoch": 0.09192905226749754, "grad_norm": 0.46636441680547824, "learning_rate": 4.9966867875712494e-05, "loss": 4.5412, "step": 402 }, { "epoch": 0.09238641073649007, "grad_norm": 0.5366427249731298, "learning_rate": 4.996590779230635e-05, "loss": 4.2796, "step": 404 }, { "epoch": 0.09284376920548258, "grad_norm": 0.43739922248355223, "learning_rate": 4.9964934005985555e-05, "loss": 4.4745, "step": 406 }, { "epoch": 0.09330112767447511, "grad_norm": 0.424878957986423, "learning_rate": 4.996394651728459e-05, "loss": 4.4854, "step": 408 }, { "epoch": 0.09375848614346763, "grad_norm": 0.38862603842203, "learning_rate": 4.996294532674545e-05, "loss": 4.5344, "step": 410 }, { "epoch": 0.09421584461246016, "grad_norm": 0.3874107488783466, "learning_rate": 4.996193043491766e-05, "loss": 4.524, "step": 412 }, { "epoch": 0.09467320308145269, "grad_norm": 0.43984098484396517, "learning_rate": 4.9960901842358254e-05, "loss": 4.4715, "step": 414 }, { "epoch": 0.0951305615504452, "grad_norm": 0.5460963837173978, "learning_rate": 4.995985954963179e-05, "loss": 4.3423, "step": 416 }, { "epoch": 0.09558792001943774, "grad_norm": 0.4607399177576217, "learning_rate": 4.9958803557310344e-05, "loss": 4.3411, "step": 418 }, { "epoch": 0.09604527848843027, "grad_norm": 0.47178970854499785, "learning_rate": 4.995773386597352e-05, "loss": 4.4485, "step": 420 }, { "epoch": 0.09650263695742278, "grad_norm": 0.5897175736028717, "learning_rate": 4.9956650476208434e-05, "loss": 4.434, "step": 422 }, { "epoch": 0.09695999542641531, "grad_norm": 0.4520444647462453, "learning_rate": 4.995555338860971e-05, "loss": 4.3824, "step": 424 }, { "epoch": 0.09741735389540783, "grad_norm": 0.412997531674516, "learning_rate": 4.995444260377952e-05, "loss": 4.2389, "step": 426 }, { "epoch": 0.09787471236440036, "grad_norm": 0.540015249434364, "learning_rate": 4.995331812232752e-05, "loss": 4.2762, "step": 428 }, { "epoch": 0.09833207083339289, "grad_norm": 0.4668200020268364, "learning_rate": 4.995217994487089e-05, "loss": 4.4495, "step": 430 }, { "epoch": 0.0987894293023854, "grad_norm": 0.4180617540934258, "learning_rate": 4.9951028072034365e-05, "loss": 4.2697, "step": 432 }, { "epoch": 0.09924678777137794, "grad_norm": 0.48066122960228874, "learning_rate": 4.9949862504450143e-05, "loss": 4.3609, "step": 434 }, { "epoch": 0.09970414624037047, "grad_norm": 0.38199261038932636, "learning_rate": 4.9948683242757974e-05, "loss": 4.4871, "step": 436 }, { "epoch": 0.10016150470936298, "grad_norm": 0.562840438183233, "learning_rate": 4.9947490287605106e-05, "loss": 4.3871, "step": 438 }, { "epoch": 0.10061886317835551, "grad_norm": 0.6003362617377126, "learning_rate": 4.9946283639646315e-05, "loss": 4.4021, "step": 440 }, { "epoch": 0.10107622164734803, "grad_norm": 0.5400844230709532, "learning_rate": 4.9945063299543894e-05, "loss": 4.3691, "step": 442 }, { "epoch": 0.10153358011634056, "grad_norm": 0.584554540623405, "learning_rate": 4.994382926796764e-05, "loss": 4.5115, "step": 444 }, { "epoch": 0.10199093858533309, "grad_norm": 0.5112376098717012, "learning_rate": 4.994258154559487e-05, "loss": 4.4808, "step": 446 }, { "epoch": 0.10244829705432561, "grad_norm": 0.504213431714183, "learning_rate": 4.9941320133110405e-05, "loss": 4.4753, "step": 448 }, { "epoch": 0.10290565552331814, "grad_norm": 0.4443755582813989, "learning_rate": 4.994004503120661e-05, "loss": 4.3166, "step": 450 }, { "epoch": 0.10336301399231065, "grad_norm": 0.46169172424123167, "learning_rate": 4.993875624058332e-05, "loss": 4.5212, "step": 452 }, { "epoch": 0.10382037246130318, "grad_norm": 0.4275295516167809, "learning_rate": 4.993745376194793e-05, "loss": 4.2718, "step": 454 }, { "epoch": 0.10427773093029571, "grad_norm": 0.5486019659554374, "learning_rate": 4.993613759601532e-05, "loss": 4.4255, "step": 456 }, { "epoch": 0.10473508939928823, "grad_norm": 0.4970478285105538, "learning_rate": 4.993480774350787e-05, "loss": 4.4294, "step": 458 }, { "epoch": 0.10519244786828076, "grad_norm": 0.3752768011529072, "learning_rate": 4.9933464205155514e-05, "loss": 4.406, "step": 460 }, { "epoch": 0.10564980633727329, "grad_norm": 0.48585950342015016, "learning_rate": 4.993210698169566e-05, "loss": 4.4009, "step": 462 }, { "epoch": 0.10610716480626581, "grad_norm": 0.39690221182581, "learning_rate": 4.993073607387324e-05, "loss": 4.4962, "step": 464 }, { "epoch": 0.10656452327525834, "grad_norm": 0.474372255093721, "learning_rate": 4.99293514824407e-05, "loss": 4.5608, "step": 466 }, { "epoch": 0.10702188174425085, "grad_norm": 0.487376190627298, "learning_rate": 4.992795320815799e-05, "loss": 4.3461, "step": 468 }, { "epoch": 0.10747924021324338, "grad_norm": 0.43581732662498207, "learning_rate": 4.992654125179259e-05, "loss": 4.2466, "step": 470 }, { "epoch": 0.10793659868223592, "grad_norm": 0.4335575432309154, "learning_rate": 4.992511561411944e-05, "loss": 4.3763, "step": 472 }, { "epoch": 0.10839395715122843, "grad_norm": 0.568434300678429, "learning_rate": 4.992367629592106e-05, "loss": 4.4681, "step": 474 }, { "epoch": 0.10885131562022096, "grad_norm": 0.3911823873629191, "learning_rate": 4.992222329798743e-05, "loss": 4.3538, "step": 476 }, { "epoch": 0.10930867408921349, "grad_norm": 0.43478767016952863, "learning_rate": 4.992075662111604e-05, "loss": 4.3885, "step": 478 }, { "epoch": 0.10976603255820601, "grad_norm": 0.49991071917222146, "learning_rate": 4.9919276266111896e-05, "loss": 4.4903, "step": 480 }, { "epoch": 0.11022339102719854, "grad_norm": 0.4820076557981935, "learning_rate": 4.991778223378753e-05, "loss": 4.5505, "step": 482 }, { "epoch": 0.11068074949619106, "grad_norm": 0.5824711820325594, "learning_rate": 4.9916274524962944e-05, "loss": 4.2437, "step": 484 }, { "epoch": 0.11113810796518359, "grad_norm": 0.46823793098244326, "learning_rate": 4.991475314046568e-05, "loss": 4.062, "step": 486 }, { "epoch": 0.11159546643417612, "grad_norm": 0.5068352318907661, "learning_rate": 4.991321808113077e-05, "loss": 4.3534, "step": 488 }, { "epoch": 0.11205282490316863, "grad_norm": 0.4855388533281759, "learning_rate": 4.991166934780076e-05, "loss": 4.2383, "step": 490 }, { "epoch": 0.11251018337216116, "grad_norm": 0.4854790656323182, "learning_rate": 4.991010694132568e-05, "loss": 4.5757, "step": 492 }, { "epoch": 0.11296754184115369, "grad_norm": 0.4305934718401256, "learning_rate": 4.9908530862563093e-05, "loss": 4.2188, "step": 494 }, { "epoch": 0.11342490031014621, "grad_norm": 0.6512927880755747, "learning_rate": 4.990694111237806e-05, "loss": 4.2209, "step": 496 }, { "epoch": 0.11388225877913874, "grad_norm": 0.4574132610653411, "learning_rate": 4.990533769164312e-05, "loss": 4.4201, "step": 498 }, { "epoch": 0.11433961724813126, "grad_norm": 0.4124235248478477, "learning_rate": 4.9903720601238354e-05, "loss": 4.2979, "step": 500 }, { "epoch": 0.11479697571712379, "grad_norm": 0.5082144980070653, "learning_rate": 4.990208984205131e-05, "loss": 4.3578, "step": 502 }, { "epoch": 0.11525433418611632, "grad_norm": 0.40822329311650607, "learning_rate": 4.990044541497706e-05, "loss": 4.4059, "step": 504 }, { "epoch": 0.11571169265510883, "grad_norm": 0.5915796617778301, "learning_rate": 4.989878732091818e-05, "loss": 4.4258, "step": 506 }, { "epoch": 0.11616905112410136, "grad_norm": 0.7509146120390933, "learning_rate": 4.989711556078473e-05, "loss": 4.5206, "step": 508 }, { "epoch": 0.1166264095930939, "grad_norm": 0.5593908797839143, "learning_rate": 4.989543013549429e-05, "loss": 4.4023, "step": 510 }, { "epoch": 0.11708376806208641, "grad_norm": 0.6104400326506482, "learning_rate": 4.989373104597192e-05, "loss": 4.5751, "step": 512 }, { "epoch": 0.11754112653107894, "grad_norm": 0.5322284103501466, "learning_rate": 4.98920182931502e-05, "loss": 4.5141, "step": 514 }, { "epoch": 0.11799848500007146, "grad_norm": 0.5626752898160374, "learning_rate": 4.98902918779692e-05, "loss": 4.2717, "step": 516 }, { "epoch": 0.11845584346906399, "grad_norm": 0.40465154156680533, "learning_rate": 4.9888551801376484e-05, "loss": 4.4241, "step": 518 }, { "epoch": 0.11891320193805652, "grad_norm": 0.5362000970362284, "learning_rate": 4.988679806432712e-05, "loss": 4.3247, "step": 520 }, { "epoch": 0.11937056040704903, "grad_norm": 0.5491428491013071, "learning_rate": 4.9885030667783675e-05, "loss": 4.3871, "step": 522 }, { "epoch": 0.11982791887604156, "grad_norm": 0.47016824113051897, "learning_rate": 4.988324961271621e-05, "loss": 4.4139, "step": 524 }, { "epoch": 0.12028527734503408, "grad_norm": 0.3708655609457125, "learning_rate": 4.9881454900102286e-05, "loss": 4.3231, "step": 526 }, { "epoch": 0.12074263581402661, "grad_norm": 0.4905749501983597, "learning_rate": 4.9879646530926955e-05, "loss": 4.4291, "step": 528 }, { "epoch": 0.12119999428301914, "grad_norm": 0.6381727128681015, "learning_rate": 4.987782450618277e-05, "loss": 4.5112, "step": 530 }, { "epoch": 0.12165735275201166, "grad_norm": 0.5980838916168789, "learning_rate": 4.987598882686978e-05, "loss": 4.415, "step": 532 }, { "epoch": 0.12211471122100419, "grad_norm": 0.5548733681905925, "learning_rate": 4.9874139493995514e-05, "loss": 4.447, "step": 534 }, { "epoch": 0.12257206968999672, "grad_norm": 0.4260067473871173, "learning_rate": 4.9872276508575015e-05, "loss": 4.2832, "step": 536 }, { "epoch": 0.12302942815898923, "grad_norm": 0.34983942784803773, "learning_rate": 4.987039987163081e-05, "loss": 4.3519, "step": 538 }, { "epoch": 0.12348678662798176, "grad_norm": 0.605763271439878, "learning_rate": 4.986850958419292e-05, "loss": 4.1615, "step": 540 }, { "epoch": 0.12394414509697428, "grad_norm": 0.5449558643965982, "learning_rate": 4.986660564729886e-05, "loss": 4.5099, "step": 542 }, { "epoch": 0.12440150356596681, "grad_norm": 0.44674240559365364, "learning_rate": 4.986468806199363e-05, "loss": 4.3103, "step": 544 }, { "epoch": 0.12485886203495934, "grad_norm": 0.4609338954202693, "learning_rate": 4.986275682932972e-05, "loss": 4.4346, "step": 546 }, { "epoch": 0.12531622050395186, "grad_norm": 0.3569164141527082, "learning_rate": 4.9860811950367123e-05, "loss": 4.3837, "step": 548 }, { "epoch": 0.1257735789729444, "grad_norm": 0.5110255197658521, "learning_rate": 4.985885342617333e-05, "loss": 4.2208, "step": 550 }, { "epoch": 0.12623093744193692, "grad_norm": 0.5396194621427803, "learning_rate": 4.985688125782327e-05, "loss": 4.3531, "step": 552 }, { "epoch": 0.12668829591092945, "grad_norm": 0.4587510672564656, "learning_rate": 4.985489544639943e-05, "loss": 4.1794, "step": 554 }, { "epoch": 0.12714565437992195, "grad_norm": 0.4702897215234871, "learning_rate": 4.985289599299173e-05, "loss": 4.2658, "step": 556 }, { "epoch": 0.12760301284891448, "grad_norm": 0.5508744451798323, "learning_rate": 4.9850882898697625e-05, "loss": 4.3698, "step": 558 }, { "epoch": 0.128060371317907, "grad_norm": 0.6008723126711031, "learning_rate": 4.9848856164622015e-05, "loss": 4.3569, "step": 560 }, { "epoch": 0.12851772978689954, "grad_norm": 0.5410244111266905, "learning_rate": 4.98468157918773e-05, "loss": 4.3027, "step": 562 }, { "epoch": 0.12897508825589207, "grad_norm": 0.40941638518945633, "learning_rate": 4.984476178158338e-05, "loss": 4.2479, "step": 564 }, { "epoch": 0.12943244672488458, "grad_norm": 0.4691865001295337, "learning_rate": 4.984269413486763e-05, "loss": 4.4598, "step": 566 }, { "epoch": 0.1298898051938771, "grad_norm": 0.4029440429319586, "learning_rate": 4.9840612852864896e-05, "loss": 4.4718, "step": 568 }, { "epoch": 0.13034716366286964, "grad_norm": 0.41271694648390794, "learning_rate": 4.9838517936717535e-05, "loss": 4.3854, "step": 570 }, { "epoch": 0.13080452213186217, "grad_norm": 0.3959981633707284, "learning_rate": 4.983640938757537e-05, "loss": 4.3587, "step": 572 }, { "epoch": 0.1312618806008547, "grad_norm": 0.3760320845393137, "learning_rate": 4.98342872065957e-05, "loss": 4.3991, "step": 574 }, { "epoch": 0.13171923906984723, "grad_norm": 0.4254504152144642, "learning_rate": 4.9832151394943335e-05, "loss": 4.3783, "step": 576 }, { "epoch": 0.13217659753883973, "grad_norm": 0.47864356374219813, "learning_rate": 4.9830001953790525e-05, "loss": 4.2907, "step": 578 }, { "epoch": 0.13263395600783226, "grad_norm": 0.5431069122831936, "learning_rate": 4.982783888431704e-05, "loss": 4.32, "step": 580 }, { "epoch": 0.1330913144768248, "grad_norm": 0.4660606324172639, "learning_rate": 4.9825662187710107e-05, "loss": 4.0385, "step": 582 }, { "epoch": 0.13354867294581732, "grad_norm": 0.49924852106336626, "learning_rate": 4.982347186516444e-05, "loss": 4.3703, "step": 584 }, { "epoch": 0.13400603141480985, "grad_norm": 0.4015319374714519, "learning_rate": 4.982126791788223e-05, "loss": 4.3085, "step": 586 }, { "epoch": 0.13446338988380235, "grad_norm": 0.42398681833924373, "learning_rate": 4.981905034707315e-05, "loss": 4.2078, "step": 588 }, { "epoch": 0.13492074835279488, "grad_norm": 0.3932586199948823, "learning_rate": 4.981681915395434e-05, "loss": 4.4648, "step": 590 }, { "epoch": 0.13537810682178741, "grad_norm": 0.40028067218835006, "learning_rate": 4.981457433975042e-05, "loss": 4.1587, "step": 592 }, { "epoch": 0.13583546529077994, "grad_norm": 0.4583953161665657, "learning_rate": 4.9812315905693515e-05, "loss": 4.3486, "step": 594 }, { "epoch": 0.13629282375977247, "grad_norm": 0.41641239712813816, "learning_rate": 4.981004385302317e-05, "loss": 4.3699, "step": 596 }, { "epoch": 0.13675018222876498, "grad_norm": 0.37614829364832253, "learning_rate": 4.980775818298645e-05, "loss": 4.3065, "step": 598 }, { "epoch": 0.1372075406977575, "grad_norm": 0.46397318381889224, "learning_rate": 4.980545889683789e-05, "loss": 4.3448, "step": 600 }, { "epoch": 0.13766489916675004, "grad_norm": 0.5396234385733142, "learning_rate": 4.980314599583946e-05, "loss": 4.4545, "step": 602 }, { "epoch": 0.13812225763574257, "grad_norm": 0.4031063001568575, "learning_rate": 4.980081948126066e-05, "loss": 4.3134, "step": 604 }, { "epoch": 0.1385796161047351, "grad_norm": 0.4350636799832895, "learning_rate": 4.9798479354378416e-05, "loss": 4.1932, "step": 606 }, { "epoch": 0.1390369745737276, "grad_norm": 0.4539875995007464, "learning_rate": 4.9796125616477136e-05, "loss": 4.4557, "step": 608 }, { "epoch": 0.13949433304272013, "grad_norm": 0.43924477618503105, "learning_rate": 4.979375826884872e-05, "loss": 4.2627, "step": 610 }, { "epoch": 0.13995169151171266, "grad_norm": 0.4367027511805271, "learning_rate": 4.979137731279251e-05, "loss": 4.2565, "step": 612 }, { "epoch": 0.1404090499807052, "grad_norm": 0.4530438482029177, "learning_rate": 4.978898274961534e-05, "loss": 4.2312, "step": 614 }, { "epoch": 0.14086640844969772, "grad_norm": 0.3745931365279475, "learning_rate": 4.978657458063151e-05, "loss": 4.1596, "step": 616 }, { "epoch": 0.14132376691869025, "grad_norm": 0.4893146717892107, "learning_rate": 4.978415280716275e-05, "loss": 4.1975, "step": 618 }, { "epoch": 0.14178112538768275, "grad_norm": 0.36203811586312673, "learning_rate": 4.9781717430538314e-05, "loss": 4.3553, "step": 620 }, { "epoch": 0.14223848385667529, "grad_norm": 0.36977962913083795, "learning_rate": 4.977926845209488e-05, "loss": 4.2472, "step": 622 }, { "epoch": 0.14269584232566782, "grad_norm": 0.48135653716098503, "learning_rate": 4.9776805873176614e-05, "loss": 4.2511, "step": 624 }, { "epoch": 0.14315320079466035, "grad_norm": 0.49200855514116815, "learning_rate": 4.977432969513514e-05, "loss": 4.3583, "step": 626 }, { "epoch": 0.14361055926365288, "grad_norm": 0.5499879250501398, "learning_rate": 4.9771839919329544e-05, "loss": 4.3017, "step": 628 }, { "epoch": 0.14406791773264538, "grad_norm": 0.40843429590440916, "learning_rate": 4.976933654712638e-05, "loss": 4.2227, "step": 630 }, { "epoch": 0.1445252762016379, "grad_norm": 0.4307534780256741, "learning_rate": 4.976681957989965e-05, "loss": 4.1948, "step": 632 }, { "epoch": 0.14498263467063044, "grad_norm": 0.4711373886216359, "learning_rate": 4.9764289019030835e-05, "loss": 4.4109, "step": 634 }, { "epoch": 0.14543999313962297, "grad_norm": 0.8982529823897103, "learning_rate": 4.9761744865908875e-05, "loss": 4.3422, "step": 636 }, { "epoch": 0.1458973516086155, "grad_norm": 0.40191492356994024, "learning_rate": 4.975918712193017e-05, "loss": 4.345, "step": 638 }, { "epoch": 0.146354710077608, "grad_norm": 0.4280776122939098, "learning_rate": 4.975661578849857e-05, "loss": 4.3537, "step": 640 }, { "epoch": 0.14681206854660053, "grad_norm": 0.5525699815833672, "learning_rate": 4.97540308670254e-05, "loss": 4.3268, "step": 642 }, { "epoch": 0.14726942701559306, "grad_norm": 0.35432278707646664, "learning_rate": 4.975143235892942e-05, "loss": 4.4384, "step": 644 }, { "epoch": 0.1477267854845856, "grad_norm": 0.3581384032505779, "learning_rate": 4.974882026563686e-05, "loss": 4.2909, "step": 646 }, { "epoch": 0.14818414395357812, "grad_norm": 0.39776903063301466, "learning_rate": 4.9746194588581415e-05, "loss": 4.146, "step": 648 }, { "epoch": 0.14864150242257065, "grad_norm": 0.3970706907796864, "learning_rate": 4.974355532920422e-05, "loss": 4.3623, "step": 650 }, { "epoch": 0.14909886089156316, "grad_norm": 0.4403673453519781, "learning_rate": 4.974090248895389e-05, "loss": 4.3143, "step": 652 }, { "epoch": 0.1495562193605557, "grad_norm": 0.42841798335836856, "learning_rate": 4.973823606928645e-05, "loss": 4.3299, "step": 654 }, { "epoch": 0.15001357782954822, "grad_norm": 0.458085384998003, "learning_rate": 4.973555607166542e-05, "loss": 4.1672, "step": 656 }, { "epoch": 0.15047093629854075, "grad_norm": 0.41602219768937665, "learning_rate": 4.973286249756176e-05, "loss": 4.3428, "step": 658 }, { "epoch": 0.15092829476753328, "grad_norm": 0.39444931992501087, "learning_rate": 4.9730155348453875e-05, "loss": 4.2794, "step": 660 }, { "epoch": 0.15138565323652578, "grad_norm": 0.37296867549593454, "learning_rate": 4.972743462582762e-05, "loss": 4.4652, "step": 662 }, { "epoch": 0.1518430117055183, "grad_norm": 0.4187924121615629, "learning_rate": 4.97247003311763e-05, "loss": 4.3531, "step": 664 }, { "epoch": 0.15230037017451084, "grad_norm": 0.4819733546280132, "learning_rate": 4.972195246600069e-05, "loss": 4.2542, "step": 666 }, { "epoch": 0.15275772864350337, "grad_norm": 0.4697801555427181, "learning_rate": 4.9719191031808986e-05, "loss": 4.1524, "step": 668 }, { "epoch": 0.1532150871124959, "grad_norm": 0.41912402273895255, "learning_rate": 4.9716416030116854e-05, "loss": 4.4193, "step": 670 }, { "epoch": 0.1536724455814884, "grad_norm": 0.4671259881035427, "learning_rate": 4.9713627462447373e-05, "loss": 4.0239, "step": 672 }, { "epoch": 0.15412980405048093, "grad_norm": 0.43029695118478495, "learning_rate": 4.971082533033111e-05, "loss": 4.2472, "step": 674 }, { "epoch": 0.15458716251947346, "grad_norm": 0.4096355584547117, "learning_rate": 4.9708009635306054e-05, "loss": 4.2843, "step": 676 }, { "epoch": 0.155044520988466, "grad_norm": 0.47033610617367516, "learning_rate": 4.9705180378917646e-05, "loss": 4.3138, "step": 678 }, { "epoch": 0.15550187945745853, "grad_norm": 0.5245736729906754, "learning_rate": 4.970233756271875e-05, "loss": 4.4103, "step": 680 }, { "epoch": 0.15595923792645103, "grad_norm": 0.4077886092104909, "learning_rate": 4.9699481188269704e-05, "loss": 4.3074, "step": 682 }, { "epoch": 0.15641659639544356, "grad_norm": 0.4977277443880797, "learning_rate": 4.969661125713826e-05, "loss": 4.2748, "step": 684 }, { "epoch": 0.1568739548644361, "grad_norm": 0.48450211425985246, "learning_rate": 4.969372777089963e-05, "loss": 4.2399, "step": 686 }, { "epoch": 0.15733131333342862, "grad_norm": 0.43177850169637705, "learning_rate": 4.969083073113646e-05, "loss": 4.2564, "step": 688 }, { "epoch": 0.15778867180242115, "grad_norm": 0.40837173994927317, "learning_rate": 4.9687920139438825e-05, "loss": 4.3257, "step": 690 }, { "epoch": 0.15824603027141368, "grad_norm": 0.7381973218896656, "learning_rate": 4.968499599740426e-05, "loss": 4.2462, "step": 692 }, { "epoch": 0.15870338874040618, "grad_norm": 0.41929812569571184, "learning_rate": 4.9682058306637726e-05, "loss": 4.2455, "step": 694 }, { "epoch": 0.1591607472093987, "grad_norm": 0.3746347038926595, "learning_rate": 4.96791070687516e-05, "loss": 4.1557, "step": 696 }, { "epoch": 0.15961810567839124, "grad_norm": 0.44106380716405563, "learning_rate": 4.967614228536573e-05, "loss": 4.0681, "step": 698 }, { "epoch": 0.16007546414738377, "grad_norm": 0.49635665415260305, "learning_rate": 4.967316395810737e-05, "loss": 4.1946, "step": 700 }, { "epoch": 0.1605328226163763, "grad_norm": 0.4613114472684592, "learning_rate": 4.9670172088611235e-05, "loss": 4.1123, "step": 702 }, { "epoch": 0.1609901810853688, "grad_norm": 0.5213013517118192, "learning_rate": 4.966716667851945e-05, "loss": 4.3719, "step": 704 }, { "epoch": 0.16144753955436134, "grad_norm": 0.44654096176389874, "learning_rate": 4.966414772948157e-05, "loss": 4.3414, "step": 706 }, { "epoch": 0.16190489802335387, "grad_norm": 0.5186274992835929, "learning_rate": 4.96611152431546e-05, "loss": 4.3486, "step": 708 }, { "epoch": 0.1623622564923464, "grad_norm": 0.42153623896442066, "learning_rate": 4.965806922120297e-05, "loss": 4.3506, "step": 710 }, { "epoch": 0.16281961496133893, "grad_norm": 0.46624562102912837, "learning_rate": 4.9655009665298535e-05, "loss": 4.0646, "step": 712 }, { "epoch": 0.16327697343033143, "grad_norm": 0.37578328106305525, "learning_rate": 4.965193657712057e-05, "loss": 4.198, "step": 714 }, { "epoch": 0.16373433189932396, "grad_norm": 0.4063036406389832, "learning_rate": 4.964884995835578e-05, "loss": 4.2025, "step": 716 }, { "epoch": 0.1641916903683165, "grad_norm": 0.38510266176892244, "learning_rate": 4.964574981069832e-05, "loss": 4.1379, "step": 718 }, { "epoch": 0.16464904883730902, "grad_norm": 0.4787810543137835, "learning_rate": 4.964263613584974e-05, "loss": 4.1435, "step": 720 }, { "epoch": 0.16510640730630155, "grad_norm": 0.34861970123512487, "learning_rate": 4.9639508935519044e-05, "loss": 4.1878, "step": 722 }, { "epoch": 0.16556376577529408, "grad_norm": 0.49080232950853137, "learning_rate": 4.963636821142261e-05, "loss": 4.1459, "step": 724 }, { "epoch": 0.16602112424428658, "grad_norm": 0.5069174875498057, "learning_rate": 4.963321396528431e-05, "loss": 4.193, "step": 726 }, { "epoch": 0.1664784827132791, "grad_norm": 0.39631111606319563, "learning_rate": 4.963004619883538e-05, "loss": 4.2801, "step": 728 }, { "epoch": 0.16693584118227164, "grad_norm": 0.4876255362829698, "learning_rate": 4.962686491381449e-05, "loss": 4.2183, "step": 730 }, { "epoch": 0.16739319965126417, "grad_norm": 0.5471161446921264, "learning_rate": 4.962367011196775e-05, "loss": 4.1666, "step": 732 }, { "epoch": 0.1678505581202567, "grad_norm": 0.4428500135760926, "learning_rate": 4.962046179504867e-05, "loss": 4.3429, "step": 734 }, { "epoch": 0.1683079165892492, "grad_norm": 0.5092844829991566, "learning_rate": 4.9617239964818174e-05, "loss": 4.3524, "step": 736 }, { "epoch": 0.16876527505824174, "grad_norm": 0.4201936950964535, "learning_rate": 4.961400462304463e-05, "loss": 4.2899, "step": 738 }, { "epoch": 0.16922263352723427, "grad_norm": 0.5000729507633912, "learning_rate": 4.961075577150379e-05, "loss": 4.1752, "step": 740 }, { "epoch": 0.1696799919962268, "grad_norm": 0.41262817269959984, "learning_rate": 4.9607493411978845e-05, "loss": 4.2678, "step": 742 }, { "epoch": 0.17013735046521933, "grad_norm": 0.4064017709252772, "learning_rate": 4.960421754626038e-05, "loss": 4.3302, "step": 744 }, { "epoch": 0.17059470893421183, "grad_norm": 0.4255506783629371, "learning_rate": 4.960092817614641e-05, "loss": 4.3676, "step": 746 }, { "epoch": 0.17105206740320436, "grad_norm": 0.41596363082990184, "learning_rate": 4.959762530344235e-05, "loss": 4.2646, "step": 748 }, { "epoch": 0.1715094258721969, "grad_norm": 0.42077406530411854, "learning_rate": 4.959430892996104e-05, "loss": 4.183, "step": 750 }, { "epoch": 0.17196678434118942, "grad_norm": 0.3375262131048065, "learning_rate": 4.959097905752272e-05, "loss": 4.1479, "step": 752 }, { "epoch": 0.17242414281018195, "grad_norm": 0.43830249619283373, "learning_rate": 4.958763568795503e-05, "loss": 4.2235, "step": 754 }, { "epoch": 0.17288150127917445, "grad_norm": 0.4024544720934303, "learning_rate": 4.958427882309304e-05, "loss": 4.2943, "step": 756 }, { "epoch": 0.17333885974816698, "grad_norm": 0.5631799561826425, "learning_rate": 4.958090846477921e-05, "loss": 4.3533, "step": 758 }, { "epoch": 0.17379621821715951, "grad_norm": 0.5246321013549204, "learning_rate": 4.9577524614863415e-05, "loss": 4.2919, "step": 760 }, { "epoch": 0.17425357668615205, "grad_norm": 0.40198810377002975, "learning_rate": 4.957412727520293e-05, "loss": 4.4857, "step": 762 }, { "epoch": 0.17471093515514458, "grad_norm": 0.4045182511181305, "learning_rate": 4.957071644766244e-05, "loss": 4.1993, "step": 764 }, { "epoch": 0.1751682936241371, "grad_norm": 0.42890743192916364, "learning_rate": 4.956729213411403e-05, "loss": 4.1865, "step": 766 }, { "epoch": 0.1756256520931296, "grad_norm": 0.5034291278183217, "learning_rate": 4.956385433643717e-05, "loss": 4.3641, "step": 768 }, { "epoch": 0.17608301056212214, "grad_norm": 0.43129360697214036, "learning_rate": 4.956040305651877e-05, "loss": 4.3095, "step": 770 }, { "epoch": 0.17654036903111467, "grad_norm": 0.4382023623047202, "learning_rate": 4.95569382962531e-05, "loss": 4.2891, "step": 772 }, { "epoch": 0.1769977275001072, "grad_norm": 0.5244652351971236, "learning_rate": 4.955346005754186e-05, "loss": 4.2858, "step": 774 }, { "epoch": 0.17745508596909973, "grad_norm": 0.42362172950749616, "learning_rate": 4.9549968342294116e-05, "loss": 4.2833, "step": 776 }, { "epoch": 0.17791244443809223, "grad_norm": 0.4712293540418697, "learning_rate": 4.954646315242636e-05, "loss": 4.1656, "step": 778 }, { "epoch": 0.17836980290708476, "grad_norm": 0.3761261679509831, "learning_rate": 4.954294448986247e-05, "loss": 4.2122, "step": 780 }, { "epoch": 0.1788271613760773, "grad_norm": 0.4399249786121146, "learning_rate": 4.953941235653371e-05, "loss": 4.2064, "step": 782 }, { "epoch": 0.17928451984506982, "grad_norm": 0.505663211449017, "learning_rate": 4.953586675437875e-05, "loss": 4.287, "step": 784 }, { "epoch": 0.17974187831406235, "grad_norm": 0.4440489124901161, "learning_rate": 4.953230768534365e-05, "loss": 4.2095, "step": 786 }, { "epoch": 0.18019923678305486, "grad_norm": 0.42440571958906037, "learning_rate": 4.9528735151381845e-05, "loss": 4.1148, "step": 788 }, { "epoch": 0.18065659525204739, "grad_norm": 0.4878403924107201, "learning_rate": 4.9525149154454186e-05, "loss": 4.1621, "step": 790 }, { "epoch": 0.18111395372103992, "grad_norm": 0.36207336691180125, "learning_rate": 4.95215496965289e-05, "loss": 4.3215, "step": 792 }, { "epoch": 0.18157131219003245, "grad_norm": 0.41346887104735275, "learning_rate": 4.951793677958161e-05, "loss": 4.2692, "step": 794 }, { "epoch": 0.18202867065902498, "grad_norm": 0.4390424886738455, "learning_rate": 4.95143104055953e-05, "loss": 4.2115, "step": 796 }, { "epoch": 0.1824860291280175, "grad_norm": 0.40220360027889934, "learning_rate": 4.9510670576560384e-05, "loss": 4.3958, "step": 798 }, { "epoch": 0.18294338759701, "grad_norm": 0.4459177388154749, "learning_rate": 4.9507017294474615e-05, "loss": 4.3137, "step": 800 }, { "epoch": 0.18340074606600254, "grad_norm": 0.548316798825633, "learning_rate": 4.950335056134316e-05, "loss": 4.3573, "step": 802 }, { "epoch": 0.18385810453499507, "grad_norm": 0.4447440484470524, "learning_rate": 4.9499670379178576e-05, "loss": 4.2277, "step": 804 }, { "epoch": 0.1843154630039876, "grad_norm": 0.43020002835857557, "learning_rate": 4.949597675000076e-05, "loss": 4.1221, "step": 806 }, { "epoch": 0.18477282147298013, "grad_norm": 0.7997983529348888, "learning_rate": 4.9492269675837034e-05, "loss": 4.1685, "step": 808 }, { "epoch": 0.18523017994197263, "grad_norm": 0.5095140852342939, "learning_rate": 4.9488549158722075e-05, "loss": 4.4395, "step": 810 }, { "epoch": 0.18568753841096516, "grad_norm": 0.3632681034129438, "learning_rate": 4.948481520069796e-05, "loss": 4.3286, "step": 812 }, { "epoch": 0.1861448968799577, "grad_norm": 0.4965656919238741, "learning_rate": 4.948106780381409e-05, "loss": 4.2052, "step": 814 }, { "epoch": 0.18660225534895022, "grad_norm": 0.4171007001781783, "learning_rate": 4.9477306970127324e-05, "loss": 4.2955, "step": 816 }, { "epoch": 0.18705961381794275, "grad_norm": 0.4851664721752726, "learning_rate": 4.9473532701701834e-05, "loss": 4.2082, "step": 818 }, { "epoch": 0.18751697228693526, "grad_norm": 0.39661568783234535, "learning_rate": 4.946974500060917e-05, "loss": 4.2256, "step": 820 }, { "epoch": 0.1879743307559278, "grad_norm": 0.4244736532621646, "learning_rate": 4.946594386892829e-05, "loss": 4.2285, "step": 822 }, { "epoch": 0.18843168922492032, "grad_norm": 0.4106556168555614, "learning_rate": 4.9462129308745496e-05, "loss": 4.367, "step": 824 }, { "epoch": 0.18888904769391285, "grad_norm": 0.4731233353881579, "learning_rate": 4.945830132215446e-05, "loss": 4.3111, "step": 826 }, { "epoch": 0.18934640616290538, "grad_norm": 0.3826946933636588, "learning_rate": 4.945445991125624e-05, "loss": 4.2695, "step": 828 }, { "epoch": 0.18980376463189788, "grad_norm": 0.48856029929096934, "learning_rate": 4.9450605078159244e-05, "loss": 4.2434, "step": 830 }, { "epoch": 0.1902611231008904, "grad_norm": 0.5023940706337474, "learning_rate": 4.944673682497926e-05, "loss": 4.2522, "step": 832 }, { "epoch": 0.19071848156988294, "grad_norm": 0.5851048211389537, "learning_rate": 4.944285515383944e-05, "loss": 4.281, "step": 834 }, { "epoch": 0.19117584003887547, "grad_norm": 0.4168973899845162, "learning_rate": 4.9438960066870286e-05, "loss": 4.0986, "step": 836 }, { "epoch": 0.191633198507868, "grad_norm": 0.44253209102775093, "learning_rate": 4.943505156620969e-05, "loss": 4.2425, "step": 838 }, { "epoch": 0.19209055697686053, "grad_norm": 0.4835234355187809, "learning_rate": 4.943112965400288e-05, "loss": 4.3181, "step": 840 }, { "epoch": 0.19254791544585304, "grad_norm": 0.4341522606249232, "learning_rate": 4.9427194332402465e-05, "loss": 4.2694, "step": 842 }, { "epoch": 0.19300527391484557, "grad_norm": 0.5495759749142858, "learning_rate": 4.9423245603568405e-05, "loss": 4.264, "step": 844 }, { "epoch": 0.1934626323838381, "grad_norm": 0.3880197239119609, "learning_rate": 4.941928346966801e-05, "loss": 4.2671, "step": 846 }, { "epoch": 0.19391999085283063, "grad_norm": 0.3831523437761347, "learning_rate": 4.941530793287596e-05, "loss": 4.3369, "step": 848 }, { "epoch": 0.19437734932182316, "grad_norm": 0.4615750994605913, "learning_rate": 4.941131899537429e-05, "loss": 4.032, "step": 850 }, { "epoch": 0.19483470779081566, "grad_norm": 0.4673153380210486, "learning_rate": 4.9407316659352395e-05, "loss": 4.1582, "step": 852 }, { "epoch": 0.1952920662598082, "grad_norm": 0.36594592868991427, "learning_rate": 4.9403300927007015e-05, "loss": 4.367, "step": 854 }, { "epoch": 0.19574942472880072, "grad_norm": 0.5701725931875244, "learning_rate": 4.939927180054224e-05, "loss": 4.1343, "step": 856 }, { "epoch": 0.19620678319779325, "grad_norm": 0.43190933080046007, "learning_rate": 4.939522928216951e-05, "loss": 4.3522, "step": 858 }, { "epoch": 0.19666414166678578, "grad_norm": 0.44163198522339003, "learning_rate": 4.9391173374107634e-05, "loss": 4.161, "step": 860 }, { "epoch": 0.19712150013577828, "grad_norm": 0.5301252126335756, "learning_rate": 4.9387104078582754e-05, "loss": 4.1208, "step": 862 }, { "epoch": 0.1975788586047708, "grad_norm": 0.4833681039251679, "learning_rate": 4.938302139782837e-05, "loss": 4.1145, "step": 864 }, { "epoch": 0.19803621707376334, "grad_norm": 0.45847998967595704, "learning_rate": 4.937892533408532e-05, "loss": 4.2546, "step": 866 }, { "epoch": 0.19849357554275587, "grad_norm": 0.4140409068109302, "learning_rate": 4.937481588960179e-05, "loss": 4.3722, "step": 868 }, { "epoch": 0.1989509340117484, "grad_norm": 0.3854444102878219, "learning_rate": 4.93706930666333e-05, "loss": 4.2923, "step": 870 }, { "epoch": 0.19940829248074093, "grad_norm": 0.3808578099927782, "learning_rate": 4.936655686744274e-05, "loss": 4.0758, "step": 872 }, { "epoch": 0.19986565094973344, "grad_norm": 0.4934949010664815, "learning_rate": 4.936240729430031e-05, "loss": 4.1498, "step": 874 }, { "epoch": 0.20032300941872597, "grad_norm": 0.353234479609793, "learning_rate": 4.935824434948358e-05, "loss": 4.0375, "step": 876 }, { "epoch": 0.2007803678877185, "grad_norm": 0.3997873739529265, "learning_rate": 4.935406803527743e-05, "loss": 4.0881, "step": 878 }, { "epoch": 0.20123772635671103, "grad_norm": 0.5821383844892509, "learning_rate": 4.934987835397411e-05, "loss": 4.235, "step": 880 }, { "epoch": 0.20169508482570356, "grad_norm": 0.39493128824759494, "learning_rate": 4.934567530787318e-05, "loss": 4.3517, "step": 882 }, { "epoch": 0.20215244329469606, "grad_norm": 0.43011940143913746, "learning_rate": 4.934145889928155e-05, "loss": 4.0984, "step": 884 }, { "epoch": 0.2026098017636886, "grad_norm": 0.398046750713475, "learning_rate": 4.933722913051345e-05, "loss": 4.2514, "step": 886 }, { "epoch": 0.20306716023268112, "grad_norm": 0.4745633452272418, "learning_rate": 4.9332986003890455e-05, "loss": 4.1059, "step": 888 }, { "epoch": 0.20352451870167365, "grad_norm": 0.4035727424485657, "learning_rate": 4.932872952174148e-05, "loss": 4.2065, "step": 890 }, { "epoch": 0.20398187717066618, "grad_norm": 0.43163895067261343, "learning_rate": 4.9324459686402744e-05, "loss": 4.2978, "step": 892 }, { "epoch": 0.20443923563965868, "grad_norm": 0.36444512426951303, "learning_rate": 4.932017650021783e-05, "loss": 4.1107, "step": 894 }, { "epoch": 0.20489659410865121, "grad_norm": 0.43452395073974864, "learning_rate": 4.93158799655376e-05, "loss": 4.2925, "step": 896 }, { "epoch": 0.20535395257764374, "grad_norm": 0.5199168502369919, "learning_rate": 4.9311570084720305e-05, "loss": 4.2074, "step": 898 }, { "epoch": 0.20581131104663627, "grad_norm": 0.4341234288251736, "learning_rate": 4.9307246860131465e-05, "loss": 4.2103, "step": 900 }, { "epoch": 0.2062686695156288, "grad_norm": 0.4537684377492016, "learning_rate": 4.930291029414395e-05, "loss": 4.2505, "step": 902 }, { "epoch": 0.2067260279846213, "grad_norm": 0.4221140749237413, "learning_rate": 4.929856038913796e-05, "loss": 4.1782, "step": 904 }, { "epoch": 0.20718338645361384, "grad_norm": 0.47632070567199863, "learning_rate": 4.9294197147500994e-05, "loss": 4.1623, "step": 906 }, { "epoch": 0.20764074492260637, "grad_norm": 0.37104642465886745, "learning_rate": 4.928982057162789e-05, "loss": 4.0696, "step": 908 }, { "epoch": 0.2080981033915989, "grad_norm": 0.3902733746076985, "learning_rate": 4.92854306639208e-05, "loss": 4.1962, "step": 910 }, { "epoch": 0.20855546186059143, "grad_norm": 0.4318200683438465, "learning_rate": 4.928102742678918e-05, "loss": 4.2151, "step": 912 }, { "epoch": 0.20901282032958396, "grad_norm": 0.5240073731555246, "learning_rate": 4.927661086264982e-05, "loss": 4.159, "step": 914 }, { "epoch": 0.20947017879857646, "grad_norm": 0.4556664196596032, "learning_rate": 4.9272180973926815e-05, "loss": 4.1415, "step": 916 }, { "epoch": 0.209927537267569, "grad_norm": 0.3907778440904355, "learning_rate": 4.926773776305159e-05, "loss": 4.0813, "step": 918 }, { "epoch": 0.21038489573656152, "grad_norm": 0.39865127139456313, "learning_rate": 4.9263281232462854e-05, "loss": 4.069, "step": 920 }, { "epoch": 0.21084225420555405, "grad_norm": 0.45819956460205263, "learning_rate": 4.925881138460664e-05, "loss": 4.1371, "step": 922 }, { "epoch": 0.21129961267454658, "grad_norm": 0.5592327331261363, "learning_rate": 4.92543282219363e-05, "loss": 4.2184, "step": 924 }, { "epoch": 0.21175697114353909, "grad_norm": 0.4985817374680436, "learning_rate": 4.924983174691247e-05, "loss": 4.2873, "step": 926 }, { "epoch": 0.21221432961253162, "grad_norm": 0.4415157317375305, "learning_rate": 4.9245321962003126e-05, "loss": 4.0898, "step": 928 }, { "epoch": 0.21267168808152415, "grad_norm": 0.5635951031530353, "learning_rate": 4.924079886968352e-05, "loss": 3.9397, "step": 930 }, { "epoch": 0.21312904655051668, "grad_norm": 0.43908150112537014, "learning_rate": 4.9236262472436236e-05, "loss": 4.1943, "step": 932 }, { "epoch": 0.2135864050195092, "grad_norm": 0.49571243409658655, "learning_rate": 4.9231712772751124e-05, "loss": 4.068, "step": 934 }, { "epoch": 0.2140437634885017, "grad_norm": 0.5502286804069196, "learning_rate": 4.922714977312536e-05, "loss": 4.1539, "step": 936 }, { "epoch": 0.21450112195749424, "grad_norm": 0.44179561624929475, "learning_rate": 4.9222573476063414e-05, "loss": 4.1026, "step": 938 }, { "epoch": 0.21495848042648677, "grad_norm": 0.40931215167820767, "learning_rate": 4.921798388407707e-05, "loss": 3.9477, "step": 940 }, { "epoch": 0.2154158388954793, "grad_norm": 0.3933295275549851, "learning_rate": 4.921338099968538e-05, "loss": 4.007, "step": 942 }, { "epoch": 0.21587319736447183, "grad_norm": 0.3981309347144545, "learning_rate": 4.920876482541471e-05, "loss": 4.2269, "step": 944 }, { "epoch": 0.21633055583346436, "grad_norm": 0.4170850520840093, "learning_rate": 4.920413536379872e-05, "loss": 4.1673, "step": 946 }, { "epoch": 0.21678791430245686, "grad_norm": 0.3579041192975476, "learning_rate": 4.919949261737835e-05, "loss": 4.1244, "step": 948 }, { "epoch": 0.2172452727714494, "grad_norm": 0.42184667138955245, "learning_rate": 4.9194836588701855e-05, "loss": 4.1606, "step": 950 }, { "epoch": 0.21770263124044192, "grad_norm": 0.3957060328957601, "learning_rate": 4.919016728032476e-05, "loss": 3.9914, "step": 952 }, { "epoch": 0.21815998970943445, "grad_norm": 0.4594072382507427, "learning_rate": 4.918548469480988e-05, "loss": 4.1293, "step": 954 }, { "epoch": 0.21861734817842698, "grad_norm": 0.428002399794366, "learning_rate": 4.918078883472733e-05, "loss": 4.185, "step": 956 }, { "epoch": 0.2190747066474195, "grad_norm": 0.39338786562768413, "learning_rate": 4.917607970265451e-05, "loss": 4.1882, "step": 958 }, { "epoch": 0.21953206511641202, "grad_norm": 0.367554846096408, "learning_rate": 4.917135730117608e-05, "loss": 4.082, "step": 960 }, { "epoch": 0.21998942358540455, "grad_norm": 0.3827638789849269, "learning_rate": 4.916662163288401e-05, "loss": 4.214, "step": 962 }, { "epoch": 0.22044678205439708, "grad_norm": 0.4039151329081977, "learning_rate": 4.9161872700377545e-05, "loss": 4.0012, "step": 964 }, { "epoch": 0.2209041405233896, "grad_norm": 0.435268740950026, "learning_rate": 4.915711050626321e-05, "loss": 4.1507, "step": 966 }, { "epoch": 0.2213614989923821, "grad_norm": 0.44810630051717265, "learning_rate": 4.91523350531548e-05, "loss": 4.1221, "step": 968 }, { "epoch": 0.22181885746137464, "grad_norm": 0.3922361526099465, "learning_rate": 4.91475463436734e-05, "loss": 4.2392, "step": 970 }, { "epoch": 0.22227621593036717, "grad_norm": 0.4860510745516278, "learning_rate": 4.914274438044737e-05, "loss": 4.1545, "step": 972 }, { "epoch": 0.2227335743993597, "grad_norm": 0.440031154678342, "learning_rate": 4.9137929166112324e-05, "loss": 4.2148, "step": 974 }, { "epoch": 0.22319093286835223, "grad_norm": 0.40737573832409585, "learning_rate": 4.9133100703311174e-05, "loss": 4.1015, "step": 976 }, { "epoch": 0.22364829133734473, "grad_norm": 0.41574137786439447, "learning_rate": 4.91282589946941e-05, "loss": 4.0222, "step": 978 }, { "epoch": 0.22410564980633726, "grad_norm": 0.410269440064237, "learning_rate": 4.912340404291854e-05, "loss": 4.0509, "step": 980 }, { "epoch": 0.2245630082753298, "grad_norm": 0.4328417055487813, "learning_rate": 4.9118535850649205e-05, "loss": 4.1032, "step": 982 }, { "epoch": 0.22502036674432233, "grad_norm": 0.377646880090817, "learning_rate": 4.9113654420558084e-05, "loss": 4.0786, "step": 984 }, { "epoch": 0.22547772521331486, "grad_norm": 0.5105221567978043, "learning_rate": 4.910875975532442e-05, "loss": 4.0561, "step": 986 }, { "epoch": 0.22593508368230739, "grad_norm": 0.38890507665313856, "learning_rate": 4.910385185763472e-05, "loss": 4.2821, "step": 988 }, { "epoch": 0.2263924421512999, "grad_norm": 0.5324153180463788, "learning_rate": 4.909893073018277e-05, "loss": 4.0549, "step": 990 }, { "epoch": 0.22684980062029242, "grad_norm": 0.369045124113275, "learning_rate": 4.9093996375669585e-05, "loss": 4.0604, "step": 992 }, { "epoch": 0.22730715908928495, "grad_norm": 0.4089466059313856, "learning_rate": 4.9089048796803475e-05, "loss": 4.2894, "step": 994 }, { "epoch": 0.22776451755827748, "grad_norm": 0.4042791095460842, "learning_rate": 4.908408799629998e-05, "loss": 4.123, "step": 996 }, { "epoch": 0.22822187602727, "grad_norm": 0.42606965193048596, "learning_rate": 4.9079113976881924e-05, "loss": 4.1994, "step": 998 }, { "epoch": 0.2286792344962625, "grad_norm": 0.7234900123866423, "learning_rate": 4.907412674127937e-05, "loss": 4.3043, "step": 1000 }, { "epoch": 0.22913659296525504, "grad_norm": 0.3852986550299473, "learning_rate": 4.9069126292229625e-05, "loss": 4.2234, "step": 1002 }, { "epoch": 0.22959395143424757, "grad_norm": 0.4257941812773922, "learning_rate": 4.906411263247728e-05, "loss": 4.0682, "step": 1004 }, { "epoch": 0.2300513099032401, "grad_norm": 0.4274535454779611, "learning_rate": 4.905908576477415e-05, "loss": 4.2909, "step": 1006 }, { "epoch": 0.23050866837223263, "grad_norm": 0.44862540491786823, "learning_rate": 4.90540456918793e-05, "loss": 3.9026, "step": 1008 }, { "epoch": 0.23096602684122514, "grad_norm": 0.4146357652521922, "learning_rate": 4.9048992416559056e-05, "loss": 4.1739, "step": 1010 }, { "epoch": 0.23142338531021767, "grad_norm": 0.4344343680009707, "learning_rate": 4.904392594158698e-05, "loss": 4.0791, "step": 1012 }, { "epoch": 0.2318807437792102, "grad_norm": 0.4906009959769452, "learning_rate": 4.903884626974389e-05, "loss": 4.2878, "step": 1014 }, { "epoch": 0.23233810224820273, "grad_norm": 0.4375674161739725, "learning_rate": 4.903375340381783e-05, "loss": 4.1565, "step": 1016 }, { "epoch": 0.23279546071719526, "grad_norm": 0.39691287408167836, "learning_rate": 4.902864734660411e-05, "loss": 4.1612, "step": 1018 }, { "epoch": 0.2332528191861878, "grad_norm": 0.40251774405231167, "learning_rate": 4.902352810090526e-05, "loss": 4.0598, "step": 1020 }, { "epoch": 0.2337101776551803, "grad_norm": 0.3369488905444477, "learning_rate": 4.9018395669531045e-05, "loss": 4.2986, "step": 1022 }, { "epoch": 0.23416753612417282, "grad_norm": 0.3363040654064602, "learning_rate": 4.9013250055298496e-05, "loss": 4.0292, "step": 1024 }, { "epoch": 0.23462489459316535, "grad_norm": 0.46628125495059297, "learning_rate": 4.900809126103185e-05, "loss": 4.133, "step": 1026 }, { "epoch": 0.23508225306215788, "grad_norm": 0.41589833786713226, "learning_rate": 4.90029192895626e-05, "loss": 4.0852, "step": 1028 }, { "epoch": 0.2355396115311504, "grad_norm": 0.3616983733400844, "learning_rate": 4.8997734143729436e-05, "loss": 3.9588, "step": 1030 }, { "epoch": 0.2359969700001429, "grad_norm": 0.5632292206288387, "learning_rate": 4.8992535826378325e-05, "loss": 4.2464, "step": 1032 }, { "epoch": 0.23645432846913544, "grad_norm": 0.4231729145333774, "learning_rate": 4.898732434036244e-05, "loss": 4.2278, "step": 1034 }, { "epoch": 0.23691168693812797, "grad_norm": 0.584447925525632, "learning_rate": 4.898209968854217e-05, "loss": 4.1048, "step": 1036 }, { "epoch": 0.2373690454071205, "grad_norm": 0.44704408447870914, "learning_rate": 4.897686187378516e-05, "loss": 4.216, "step": 1038 }, { "epoch": 0.23782640387611304, "grad_norm": 0.5415564082486002, "learning_rate": 4.897161089896625e-05, "loss": 4.0804, "step": 1040 }, { "epoch": 0.23828376234510554, "grad_norm": 0.46736321350400695, "learning_rate": 4.896634676696753e-05, "loss": 4.1924, "step": 1042 }, { "epoch": 0.23874112081409807, "grad_norm": 0.3325012358843271, "learning_rate": 4.896106948067829e-05, "loss": 4.1566, "step": 1044 }, { "epoch": 0.2391984792830906, "grad_norm": 0.5336364271039986, "learning_rate": 4.8955779042995046e-05, "loss": 4.2759, "step": 1046 }, { "epoch": 0.23965583775208313, "grad_norm": 0.5002808378209448, "learning_rate": 4.8950475456821535e-05, "loss": 4.0537, "step": 1048 }, { "epoch": 0.24011319622107566, "grad_norm": 0.6550273404712444, "learning_rate": 4.894515872506872e-05, "loss": 4.2286, "step": 1050 }, { "epoch": 0.24057055469006816, "grad_norm": 0.40267304953485655, "learning_rate": 4.893982885065476e-05, "loss": 4.1084, "step": 1052 }, { "epoch": 0.2410279131590607, "grad_norm": 0.45384747359364935, "learning_rate": 4.893448583650504e-05, "loss": 4.1743, "step": 1054 }, { "epoch": 0.24148527162805322, "grad_norm": 0.4321906439004163, "learning_rate": 4.8929129685552145e-05, "loss": 4.0546, "step": 1056 }, { "epoch": 0.24194263009704575, "grad_norm": 0.4226355584627252, "learning_rate": 4.892376040073589e-05, "loss": 4.115, "step": 1058 }, { "epoch": 0.24239998856603828, "grad_norm": 0.46073320678393104, "learning_rate": 4.89183779850033e-05, "loss": 4.2194, "step": 1060 }, { "epoch": 0.2428573470350308, "grad_norm": 0.3362421162826196, "learning_rate": 4.8912982441308565e-05, "loss": 4.1978, "step": 1062 }, { "epoch": 0.24331470550402332, "grad_norm": 0.33804217561647204, "learning_rate": 4.8907573772613126e-05, "loss": 3.9311, "step": 1064 }, { "epoch": 0.24377206397301585, "grad_norm": 0.3792149377669765, "learning_rate": 4.8902151981885614e-05, "loss": 4.0878, "step": 1066 }, { "epoch": 0.24422942244200838, "grad_norm": 0.3597164605062607, "learning_rate": 4.889671707210186e-05, "loss": 4.1841, "step": 1068 }, { "epoch": 0.2446867809110009, "grad_norm": 0.36362479998208347, "learning_rate": 4.8891269046244895e-05, "loss": 4.1788, "step": 1070 }, { "epoch": 0.24514413937999344, "grad_norm": 0.4431645305525156, "learning_rate": 4.888580790730495e-05, "loss": 4.1065, "step": 1072 }, { "epoch": 0.24560149784898594, "grad_norm": 0.4207659023700046, "learning_rate": 4.8880333658279445e-05, "loss": 4.1694, "step": 1074 }, { "epoch": 0.24605885631797847, "grad_norm": 0.40740269572831983, "learning_rate": 4.8874846302173015e-05, "loss": 4.1696, "step": 1076 }, { "epoch": 0.246516214786971, "grad_norm": 0.36265355919033226, "learning_rate": 4.8869345841997485e-05, "loss": 4.0937, "step": 1078 }, { "epoch": 0.24697357325596353, "grad_norm": 0.6168065404346054, "learning_rate": 4.8863832280771844e-05, "loss": 4.1027, "step": 1080 }, { "epoch": 0.24743093172495606, "grad_norm": 0.3906149099954179, "learning_rate": 4.8858305621522304e-05, "loss": 4.2687, "step": 1082 }, { "epoch": 0.24788829019394856, "grad_norm": 0.4480960915364177, "learning_rate": 4.8852765867282264e-05, "loss": 4.048, "step": 1084 }, { "epoch": 0.2483456486629411, "grad_norm": 0.47930111933712194, "learning_rate": 4.8847213021092284e-05, "loss": 4.2548, "step": 1086 }, { "epoch": 0.24880300713193362, "grad_norm": 0.4089710368228549, "learning_rate": 4.884164708600014e-05, "loss": 3.9758, "step": 1088 }, { "epoch": 0.24926036560092615, "grad_norm": 0.3817157409836684, "learning_rate": 4.883606806506078e-05, "loss": 4.1731, "step": 1090 }, { "epoch": 0.24971772406991868, "grad_norm": 0.38445414604449873, "learning_rate": 4.883047596133633e-05, "loss": 4.2386, "step": 1092 }, { "epoch": 0.2501750825389112, "grad_norm": 0.44364086743373266, "learning_rate": 4.88248707778961e-05, "loss": 4.1526, "step": 1094 }, { "epoch": 0.2506324410079037, "grad_norm": 0.38830641237083074, "learning_rate": 4.8819252517816574e-05, "loss": 4.1647, "step": 1096 }, { "epoch": 0.25108979947689625, "grad_norm": 0.3794473540110916, "learning_rate": 4.8813621184181426e-05, "loss": 4.0475, "step": 1098 }, { "epoch": 0.2515471579458888, "grad_norm": 0.41680582424806123, "learning_rate": 4.8807976780081497e-05, "loss": 4.1857, "step": 1100 }, { "epoch": 0.2520045164148813, "grad_norm": 0.38907046941844925, "learning_rate": 4.8802319308614805e-05, "loss": 4.3508, "step": 1102 }, { "epoch": 0.25246187488387384, "grad_norm": 0.4928661844286053, "learning_rate": 4.8796648772886533e-05, "loss": 4.0325, "step": 1104 }, { "epoch": 0.25291923335286637, "grad_norm": 0.3906449503572378, "learning_rate": 4.879096517600905e-05, "loss": 4.0679, "step": 1106 }, { "epoch": 0.2533765918218589, "grad_norm": 0.48325290223092326, "learning_rate": 4.878526852110187e-05, "loss": 4.1726, "step": 1108 }, { "epoch": 0.25383395029085143, "grad_norm": 0.3894399405402806, "learning_rate": 4.8779558811291696e-05, "loss": 4.1357, "step": 1110 }, { "epoch": 0.2542913087598439, "grad_norm": 0.3675811362067191, "learning_rate": 4.8773836049712386e-05, "loss": 4.2219, "step": 1112 }, { "epoch": 0.25474866722883643, "grad_norm": 0.34999102001931703, "learning_rate": 4.876810023950497e-05, "loss": 4.0073, "step": 1114 }, { "epoch": 0.25520602569782896, "grad_norm": 0.47347854776236115, "learning_rate": 4.876235138381762e-05, "loss": 4.1642, "step": 1116 }, { "epoch": 0.2556633841668215, "grad_norm": 0.40658691080803444, "learning_rate": 4.8756589485805694e-05, "loss": 4.0232, "step": 1118 }, { "epoch": 0.256120742635814, "grad_norm": 0.42638082023770385, "learning_rate": 4.8750814548631696e-05, "loss": 4.1014, "step": 1120 }, { "epoch": 0.25657810110480656, "grad_norm": 0.4453176698886525, "learning_rate": 4.8745026575465287e-05, "loss": 4.2792, "step": 1122 }, { "epoch": 0.2570354595737991, "grad_norm": 0.4014136108875806, "learning_rate": 4.8739225569483274e-05, "loss": 4.0175, "step": 1124 }, { "epoch": 0.2574928180427916, "grad_norm": 0.41929588677403756, "learning_rate": 4.873341153386964e-05, "loss": 4.0515, "step": 1126 }, { "epoch": 0.25795017651178415, "grad_norm": 0.36562634410774114, "learning_rate": 4.87275844718155e-05, "loss": 4.0634, "step": 1128 }, { "epoch": 0.2584075349807767, "grad_norm": 0.36840025512130425, "learning_rate": 4.8721744386519116e-05, "loss": 4.1478, "step": 1130 }, { "epoch": 0.25886489344976915, "grad_norm": 0.380554174745952, "learning_rate": 4.8715891281185923e-05, "loss": 4.0316, "step": 1132 }, { "epoch": 0.2593222519187617, "grad_norm": 0.3951895669668604, "learning_rate": 4.871002515902847e-05, "loss": 4.0947, "step": 1134 }, { "epoch": 0.2597796103877542, "grad_norm": 0.3565760542544204, "learning_rate": 4.870414602326648e-05, "loss": 4.2654, "step": 1136 }, { "epoch": 0.26023696885674674, "grad_norm": 0.4377924418568552, "learning_rate": 4.86982538771268e-05, "loss": 4.0676, "step": 1138 }, { "epoch": 0.2606943273257393, "grad_norm": 0.49317294575461645, "learning_rate": 4.869234872384343e-05, "loss": 4.1616, "step": 1140 }, { "epoch": 0.2611516857947318, "grad_norm": 0.3652190138906539, "learning_rate": 4.8686430566657483e-05, "loss": 4.135, "step": 1142 }, { "epoch": 0.26160904426372433, "grad_norm": 0.4150248626075004, "learning_rate": 4.868049940881725e-05, "loss": 4.179, "step": 1144 }, { "epoch": 0.26206640273271686, "grad_norm": 0.4538753807614683, "learning_rate": 4.8674555253578124e-05, "loss": 4.2604, "step": 1146 }, { "epoch": 0.2625237612017094, "grad_norm": 0.7484859841078618, "learning_rate": 4.866859810420264e-05, "loss": 4.1734, "step": 1148 }, { "epoch": 0.2629811196707019, "grad_norm": 0.3605638204541479, "learning_rate": 4.86626279639605e-05, "loss": 4.2405, "step": 1150 }, { "epoch": 0.26343847813969445, "grad_norm": 0.4780436309440052, "learning_rate": 4.865664483612846e-05, "loss": 3.9334, "step": 1152 }, { "epoch": 0.26389583660868693, "grad_norm": 0.5000413937123571, "learning_rate": 4.865064872399048e-05, "loss": 4.0027, "step": 1154 }, { "epoch": 0.26435319507767946, "grad_norm": 0.5593375742235845, "learning_rate": 4.8644639630837605e-05, "loss": 4.0987, "step": 1156 }, { "epoch": 0.264810553546672, "grad_norm": 0.43557766519004915, "learning_rate": 4.8638617559968025e-05, "loss": 4.0444, "step": 1158 }, { "epoch": 0.2652679120156645, "grad_norm": 0.5131447121275712, "learning_rate": 4.863258251468704e-05, "loss": 4.0812, "step": 1160 }, { "epoch": 0.26572527048465705, "grad_norm": 0.3714529808409048, "learning_rate": 4.862653449830707e-05, "loss": 4.0771, "step": 1162 }, { "epoch": 0.2661826289536496, "grad_norm": 0.4099129272961706, "learning_rate": 4.862047351414767e-05, "loss": 4.0123, "step": 1164 }, { "epoch": 0.2666399874226421, "grad_norm": 0.351561472149512, "learning_rate": 4.861439956553549e-05, "loss": 4.0267, "step": 1166 }, { "epoch": 0.26709734589163464, "grad_norm": 0.4304311905552597, "learning_rate": 4.860831265580432e-05, "loss": 4.0863, "step": 1168 }, { "epoch": 0.26755470436062717, "grad_norm": 0.5569000710303572, "learning_rate": 4.860221278829505e-05, "loss": 4.0603, "step": 1170 }, { "epoch": 0.2680120628296197, "grad_norm": 0.582002395836333, "learning_rate": 4.859609996635568e-05, "loss": 4.0747, "step": 1172 }, { "epoch": 0.2684694212986122, "grad_norm": 0.4191726743420539, "learning_rate": 4.8589974193341324e-05, "loss": 4.0847, "step": 1174 }, { "epoch": 0.2689267797676047, "grad_norm": 0.48040772647540203, "learning_rate": 4.8583835472614206e-05, "loss": 4.1493, "step": 1176 }, { "epoch": 0.26938413823659724, "grad_norm": 0.8586063068224904, "learning_rate": 4.857768380754366e-05, "loss": 4.0681, "step": 1178 }, { "epoch": 0.26984149670558977, "grad_norm": 0.459043036425448, "learning_rate": 4.8571519201506115e-05, "loss": 4.1095, "step": 1180 }, { "epoch": 0.2702988551745823, "grad_norm": 0.5219025667413592, "learning_rate": 4.856534165788511e-05, "loss": 4.3058, "step": 1182 }, { "epoch": 0.27075621364357483, "grad_norm": 0.4335633172704861, "learning_rate": 4.855915118007128e-05, "loss": 4.3092, "step": 1184 }, { "epoch": 0.27121357211256736, "grad_norm": 0.44236359543986886, "learning_rate": 4.8552947771462364e-05, "loss": 4.1002, "step": 1186 }, { "epoch": 0.2716709305815599, "grad_norm": 0.4501923501006197, "learning_rate": 4.85467314354632e-05, "loss": 4.1851, "step": 1188 }, { "epoch": 0.2721282890505524, "grad_norm": 0.3991125596386875, "learning_rate": 4.854050217548571e-05, "loss": 4.1304, "step": 1190 }, { "epoch": 0.27258564751954495, "grad_norm": 0.4330598448040076, "learning_rate": 4.853425999494893e-05, "loss": 4.1496, "step": 1192 }, { "epoch": 0.2730430059885375, "grad_norm": 0.37360623749237215, "learning_rate": 4.852800489727895e-05, "loss": 4.0241, "step": 1194 }, { "epoch": 0.27350036445752995, "grad_norm": 0.44420085153068706, "learning_rate": 4.852173688590901e-05, "loss": 4.2196, "step": 1196 }, { "epoch": 0.2739577229265225, "grad_norm": 0.34210382030618125, "learning_rate": 4.851545596427938e-05, "loss": 4.2563, "step": 1198 }, { "epoch": 0.274415081395515, "grad_norm": 0.5401261381893442, "learning_rate": 4.850916213583743e-05, "loss": 4.1098, "step": 1200 }, { "epoch": 0.27487243986450755, "grad_norm": 0.4929235041631298, "learning_rate": 4.850285540403764e-05, "loss": 4.1356, "step": 1202 }, { "epoch": 0.2753297983335001, "grad_norm": 0.3686539403615571, "learning_rate": 4.849653577234155e-05, "loss": 4.0866, "step": 1204 }, { "epoch": 0.2757871568024926, "grad_norm": 0.4491978184343017, "learning_rate": 4.8490203244217786e-05, "loss": 4.0643, "step": 1206 }, { "epoch": 0.27624451527148514, "grad_norm": 0.3952239725252422, "learning_rate": 4.8483857823142045e-05, "loss": 4.0482, "step": 1208 }, { "epoch": 0.27670187374047767, "grad_norm": 0.4364371843911835, "learning_rate": 4.8477499512597115e-05, "loss": 4.1332, "step": 1210 }, { "epoch": 0.2771592322094702, "grad_norm": 0.5060244552315368, "learning_rate": 4.8471128316072845e-05, "loss": 4.2291, "step": 1212 }, { "epoch": 0.2776165906784627, "grad_norm": 0.4229971830959631, "learning_rate": 4.846474423706617e-05, "loss": 4.1257, "step": 1214 }, { "epoch": 0.2780739491474552, "grad_norm": 0.4510857140644861, "learning_rate": 4.845834727908107e-05, "loss": 4.0093, "step": 1216 }, { "epoch": 0.27853130761644773, "grad_norm": 0.3590625980732305, "learning_rate": 4.845193744562864e-05, "loss": 4.0587, "step": 1218 }, { "epoch": 0.27898866608544026, "grad_norm": 0.4037611171207559, "learning_rate": 4.8445514740226986e-05, "loss": 4.0503, "step": 1220 }, { "epoch": 0.2794460245544328, "grad_norm": 0.35451740774180296, "learning_rate": 4.843907916640133e-05, "loss": 4.2357, "step": 1222 }, { "epoch": 0.2799033830234253, "grad_norm": 0.43938940645912156, "learning_rate": 4.843263072768391e-05, "loss": 4.0438, "step": 1224 }, { "epoch": 0.28036074149241785, "grad_norm": 0.4016334411655353, "learning_rate": 4.842616942761406e-05, "loss": 4.0345, "step": 1226 }, { "epoch": 0.2808180999614104, "grad_norm": 0.43504886637729495, "learning_rate": 4.841969526973818e-05, "loss": 4.3126, "step": 1228 }, { "epoch": 0.2812754584304029, "grad_norm": 0.3878045492717429, "learning_rate": 4.841320825760967e-05, "loss": 4.1309, "step": 1230 }, { "epoch": 0.28173281689939544, "grad_norm": 0.4578989871618079, "learning_rate": 4.840670839478906e-05, "loss": 4.1485, "step": 1232 }, { "epoch": 0.282190175368388, "grad_norm": 0.436447444163878, "learning_rate": 4.8400195684843876e-05, "loss": 4.1153, "step": 1234 }, { "epoch": 0.2826475338373805, "grad_norm": 0.35818479235212625, "learning_rate": 4.839367013134873e-05, "loss": 4.1096, "step": 1236 }, { "epoch": 0.283104892306373, "grad_norm": 0.3336886384897768, "learning_rate": 4.838713173788526e-05, "loss": 4.0913, "step": 1238 }, { "epoch": 0.2835622507753655, "grad_norm": 0.4135233770148718, "learning_rate": 4.838058050804217e-05, "loss": 4.1117, "step": 1240 }, { "epoch": 0.28401960924435804, "grad_norm": 0.4635887354266814, "learning_rate": 4.83740164454152e-05, "loss": 4.0058, "step": 1242 }, { "epoch": 0.28447696771335057, "grad_norm": 0.39163940322962393, "learning_rate": 4.836743955360713e-05, "loss": 3.9987, "step": 1244 }, { "epoch": 0.2849343261823431, "grad_norm": 0.4057623430561721, "learning_rate": 4.8360849836227795e-05, "loss": 4.0986, "step": 1246 }, { "epoch": 0.28539168465133563, "grad_norm": 0.3533871770359048, "learning_rate": 4.835424729689405e-05, "loss": 4.022, "step": 1248 }, { "epoch": 0.28584904312032816, "grad_norm": 0.35449370076510683, "learning_rate": 4.8347631939229796e-05, "loss": 4.0729, "step": 1250 }, { "epoch": 0.2863064015893207, "grad_norm": 0.4478508231834435, "learning_rate": 4.834100376686599e-05, "loss": 3.9732, "step": 1252 }, { "epoch": 0.2867637600583132, "grad_norm": 0.33811772162383064, "learning_rate": 4.8334362783440585e-05, "loss": 3.9269, "step": 1254 }, { "epoch": 0.28722111852730575, "grad_norm": 0.4514122689949391, "learning_rate": 4.83277089925986e-05, "loss": 3.9284, "step": 1256 }, { "epoch": 0.2876784769962983, "grad_norm": 0.4318185327439502, "learning_rate": 4.832104239799205e-05, "loss": 4.1406, "step": 1258 }, { "epoch": 0.28813583546529076, "grad_norm": 0.4207143890091369, "learning_rate": 4.831436300328001e-05, "loss": 3.9264, "step": 1260 }, { "epoch": 0.2885931939342833, "grad_norm": 0.36635220663880297, "learning_rate": 4.830767081212857e-05, "loss": 4.1082, "step": 1262 }, { "epoch": 0.2890505524032758, "grad_norm": 0.6987044291572505, "learning_rate": 4.830096582821083e-05, "loss": 4.1007, "step": 1264 }, { "epoch": 0.28950791087226835, "grad_norm": 0.3763566709037951, "learning_rate": 4.829424805520692e-05, "loss": 4.083, "step": 1266 }, { "epoch": 0.2899652693412609, "grad_norm": 0.38568338839339594, "learning_rate": 4.8287517496804e-05, "loss": 4.2419, "step": 1268 }, { "epoch": 0.2904226278102534, "grad_norm": 0.4091672690607882, "learning_rate": 4.828077415669623e-05, "loss": 4.0831, "step": 1270 }, { "epoch": 0.29087998627924594, "grad_norm": 0.4093876858558383, "learning_rate": 4.82740180385848e-05, "loss": 3.999, "step": 1272 }, { "epoch": 0.29133734474823847, "grad_norm": 0.3770311897363152, "learning_rate": 4.826724914617791e-05, "loss": 4.1606, "step": 1274 }, { "epoch": 0.291794703217231, "grad_norm": 0.4627554651903174, "learning_rate": 4.8260467483190764e-05, "loss": 4.1052, "step": 1276 }, { "epoch": 0.29225206168622353, "grad_norm": 0.434968607252081, "learning_rate": 4.8253673053345574e-05, "loss": 4.0775, "step": 1278 }, { "epoch": 0.292709420155216, "grad_norm": 0.4772065297887098, "learning_rate": 4.824686586037157e-05, "loss": 4.2657, "step": 1280 }, { "epoch": 0.29316677862420853, "grad_norm": 0.39034010988779344, "learning_rate": 4.824004590800498e-05, "loss": 4.1105, "step": 1282 }, { "epoch": 0.29362413709320107, "grad_norm": 0.45976186603315644, "learning_rate": 4.8233213199989046e-05, "loss": 4.1706, "step": 1284 }, { "epoch": 0.2940814955621936, "grad_norm": 0.42930254598382867, "learning_rate": 4.8226367740074e-05, "loss": 4.1074, "step": 1286 }, { "epoch": 0.2945388540311861, "grad_norm": 0.492763696695971, "learning_rate": 4.821950953201707e-05, "loss": 3.9981, "step": 1288 }, { "epoch": 0.29499621250017866, "grad_norm": 0.37179106398191636, "learning_rate": 4.8212638579582495e-05, "loss": 4.059, "step": 1290 }, { "epoch": 0.2954535709691712, "grad_norm": 0.38826268188166746, "learning_rate": 4.820575488654149e-05, "loss": 3.9647, "step": 1292 }, { "epoch": 0.2959109294381637, "grad_norm": 0.3613578165587881, "learning_rate": 4.819885845667228e-05, "loss": 4.0798, "step": 1294 }, { "epoch": 0.29636828790715625, "grad_norm": 0.3689925689078236, "learning_rate": 4.8191949293760075e-05, "loss": 4.2952, "step": 1296 }, { "epoch": 0.2968256463761488, "grad_norm": 0.429098366962909, "learning_rate": 4.818502740159707e-05, "loss": 4.0687, "step": 1298 }, { "epoch": 0.2972830048451413, "grad_norm": 0.3957331074599359, "learning_rate": 4.8178092783982454e-05, "loss": 4.3308, "step": 1300 }, { "epoch": 0.2977403633141338, "grad_norm": 0.43150120205231074, "learning_rate": 4.817114544472239e-05, "loss": 3.9934, "step": 1302 }, { "epoch": 0.2981977217831263, "grad_norm": 0.3794727220318756, "learning_rate": 4.816418538763004e-05, "loss": 4.1695, "step": 1304 }, { "epoch": 0.29865508025211884, "grad_norm": 0.4202406449567351, "learning_rate": 4.815721261652553e-05, "loss": 4.1456, "step": 1306 }, { "epoch": 0.2991124387211114, "grad_norm": 0.3371544795621402, "learning_rate": 4.815022713523597e-05, "loss": 3.9673, "step": 1308 }, { "epoch": 0.2995697971901039, "grad_norm": 0.3828288288915709, "learning_rate": 4.814322894759544e-05, "loss": 4.1505, "step": 1310 }, { "epoch": 0.30002715565909643, "grad_norm": 0.408826288459337, "learning_rate": 4.813621805744502e-05, "loss": 4.1221, "step": 1312 }, { "epoch": 0.30048451412808896, "grad_norm": 0.447967827632661, "learning_rate": 4.812919446863272e-05, "loss": 4.2322, "step": 1314 }, { "epoch": 0.3009418725970815, "grad_norm": 0.39263444942457476, "learning_rate": 4.812215818501357e-05, "loss": 4.249, "step": 1316 }, { "epoch": 0.301399231066074, "grad_norm": 0.3838318468468975, "learning_rate": 4.8115109210449504e-05, "loss": 4.072, "step": 1318 }, { "epoch": 0.30185658953506656, "grad_norm": 0.3791296192382075, "learning_rate": 4.810804754880949e-05, "loss": 3.9772, "step": 1320 }, { "epoch": 0.30231394800405903, "grad_norm": 0.3732090941828782, "learning_rate": 4.8100973203969415e-05, "loss": 4.1334, "step": 1322 }, { "epoch": 0.30277130647305156, "grad_norm": 0.4658665983928153, "learning_rate": 4.809388617981213e-05, "loss": 4.1564, "step": 1324 }, { "epoch": 0.3032286649420441, "grad_norm": 0.4322109510909491, "learning_rate": 4.808678648022747e-05, "loss": 4.017, "step": 1326 }, { "epoch": 0.3036860234110366, "grad_norm": 0.3222341579911351, "learning_rate": 4.8079674109112205e-05, "loss": 4.1394, "step": 1328 }, { "epoch": 0.30414338188002915, "grad_norm": 0.36028742025522914, "learning_rate": 4.807254907037008e-05, "loss": 4.0709, "step": 1330 }, { "epoch": 0.3046007403490217, "grad_norm": 0.4479396023164055, "learning_rate": 4.806541136791175e-05, "loss": 4.0295, "step": 1332 }, { "epoch": 0.3050580988180142, "grad_norm": 0.4536358485854838, "learning_rate": 4.805826100565488e-05, "loss": 4.0566, "step": 1334 }, { "epoch": 0.30551545728700674, "grad_norm": 0.464219724306813, "learning_rate": 4.805109798752404e-05, "loss": 4.1135, "step": 1336 }, { "epoch": 0.3059728157559993, "grad_norm": 0.4540057829885572, "learning_rate": 4.804392231745077e-05, "loss": 4.0478, "step": 1338 }, { "epoch": 0.3064301742249918, "grad_norm": 0.49583622112822334, "learning_rate": 4.803673399937353e-05, "loss": 4.083, "step": 1340 }, { "epoch": 0.30688753269398433, "grad_norm": 0.44599355517743056, "learning_rate": 4.802953303723775e-05, "loss": 4.1186, "step": 1342 }, { "epoch": 0.3073448911629768, "grad_norm": 0.41962534170787397, "learning_rate": 4.8022319434995784e-05, "loss": 3.8929, "step": 1344 }, { "epoch": 0.30780224963196934, "grad_norm": 0.5660655345197312, "learning_rate": 4.801509319660692e-05, "loss": 4.0529, "step": 1346 }, { "epoch": 0.30825960810096187, "grad_norm": 0.4123941495218162, "learning_rate": 4.8007854326037394e-05, "loss": 4.3309, "step": 1348 }, { "epoch": 0.3087169665699544, "grad_norm": 0.44811853768598475, "learning_rate": 4.800060282726037e-05, "loss": 4.0358, "step": 1350 }, { "epoch": 0.30917432503894693, "grad_norm": 0.36820301689665996, "learning_rate": 4.799333870425593e-05, "loss": 4.0932, "step": 1352 }, { "epoch": 0.30963168350793946, "grad_norm": 0.4155924440124309, "learning_rate": 4.798606196101111e-05, "loss": 4.0114, "step": 1354 }, { "epoch": 0.310089041976932, "grad_norm": 1.281713169396861, "learning_rate": 4.797877260151985e-05, "loss": 4.0816, "step": 1356 }, { "epoch": 0.3105464004459245, "grad_norm": 0.4285811660148584, "learning_rate": 4.797147062978304e-05, "loss": 3.9925, "step": 1358 }, { "epoch": 0.31100375891491705, "grad_norm": 0.6922297772656372, "learning_rate": 4.7964156049808475e-05, "loss": 4.1342, "step": 1360 }, { "epoch": 0.3114611173839096, "grad_norm": 0.4060405778291266, "learning_rate": 4.795682886561086e-05, "loss": 3.9782, "step": 1362 }, { "epoch": 0.31191847585290206, "grad_norm": 0.5259587219362964, "learning_rate": 4.794948908121183e-05, "loss": 4.0273, "step": 1364 }, { "epoch": 0.3123758343218946, "grad_norm": 0.4347749549707768, "learning_rate": 4.794213670063995e-05, "loss": 4.1397, "step": 1366 }, { "epoch": 0.3128331927908871, "grad_norm": 0.5459585958508978, "learning_rate": 4.793477172793067e-05, "loss": 4.1575, "step": 1368 }, { "epoch": 0.31329055125987965, "grad_norm": 0.5287295182862715, "learning_rate": 4.7927394167126385e-05, "loss": 3.9992, "step": 1370 }, { "epoch": 0.3137479097288722, "grad_norm": 0.42849693083693313, "learning_rate": 4.792000402227636e-05, "loss": 3.9854, "step": 1372 }, { "epoch": 0.3142052681978647, "grad_norm": 0.45731885940699135, "learning_rate": 4.791260129743681e-05, "loss": 4.0322, "step": 1374 }, { "epoch": 0.31466262666685724, "grad_norm": 0.3713687680741365, "learning_rate": 4.7905185996670807e-05, "loss": 4.0317, "step": 1376 }, { "epoch": 0.31511998513584977, "grad_norm": 0.44264615875170893, "learning_rate": 4.789775812404837e-05, "loss": 3.9654, "step": 1378 }, { "epoch": 0.3155773436048423, "grad_norm": 0.36974559640228805, "learning_rate": 4.789031768364638e-05, "loss": 4.0599, "step": 1380 }, { "epoch": 0.31603470207383483, "grad_norm": 0.41485313289972464, "learning_rate": 4.788286467954865e-05, "loss": 4.1601, "step": 1382 }, { "epoch": 0.31649206054282736, "grad_norm": 0.38353853080761924, "learning_rate": 4.7875399115845874e-05, "loss": 4.1558, "step": 1384 }, { "epoch": 0.31694941901181983, "grad_norm": 0.4875162005789263, "learning_rate": 4.7867920996635626e-05, "loss": 3.8778, "step": 1386 }, { "epoch": 0.31740677748081236, "grad_norm": 0.4198072942502154, "learning_rate": 4.78604303260224e-05, "loss": 4.0541, "step": 1388 }, { "epoch": 0.3178641359498049, "grad_norm": 0.4382989094486286, "learning_rate": 4.785292710811756e-05, "loss": 4.0259, "step": 1390 }, { "epoch": 0.3183214944187974, "grad_norm": 0.4047184096502753, "learning_rate": 4.784541134703935e-05, "loss": 4.2253, "step": 1392 }, { "epoch": 0.31877885288778995, "grad_norm": 0.3484084104360977, "learning_rate": 4.7837883046912925e-05, "loss": 4.115, "step": 1394 }, { "epoch": 0.3192362113567825, "grad_norm": 0.3850258271732019, "learning_rate": 4.7830342211870286e-05, "loss": 4.0733, "step": 1396 }, { "epoch": 0.319693569825775, "grad_norm": 0.3904914627704978, "learning_rate": 4.782278884605035e-05, "loss": 4.0625, "step": 1398 }, { "epoch": 0.32015092829476754, "grad_norm": 0.38472418392490715, "learning_rate": 4.7815222953598883e-05, "loss": 3.9192, "step": 1400 }, { "epoch": 0.3206082867637601, "grad_norm": 0.44646083272441134, "learning_rate": 4.780764453866855e-05, "loss": 3.8855, "step": 1402 }, { "epoch": 0.3210656452327526, "grad_norm": 0.4290017464642221, "learning_rate": 4.7800053605418874e-05, "loss": 3.9861, "step": 1404 }, { "epoch": 0.32152300370174514, "grad_norm": 0.48050182507810413, "learning_rate": 4.7792450158016256e-05, "loss": 4.0913, "step": 1406 }, { "epoch": 0.3219803621707376, "grad_norm": 0.4871060075314635, "learning_rate": 4.778483420063395e-05, "loss": 4.1194, "step": 1408 }, { "epoch": 0.32243772063973014, "grad_norm": 0.48411834746797755, "learning_rate": 4.777720573745211e-05, "loss": 4.1946, "step": 1410 }, { "epoch": 0.32289507910872267, "grad_norm": 0.46130821708341774, "learning_rate": 4.7769564772657714e-05, "loss": 4.1832, "step": 1412 }, { "epoch": 0.3233524375777152, "grad_norm": 0.5089951462092674, "learning_rate": 4.776191131044464e-05, "loss": 4.2024, "step": 1414 }, { "epoch": 0.32380979604670773, "grad_norm": 0.29834146702101605, "learning_rate": 4.7754245355013586e-05, "loss": 4.126, "step": 1416 }, { "epoch": 0.32426715451570026, "grad_norm": 0.44077674741720463, "learning_rate": 4.774656691057213e-05, "loss": 4.1253, "step": 1418 }, { "epoch": 0.3247245129846928, "grad_norm": 0.3860127719263389, "learning_rate": 4.773887598133472e-05, "loss": 4.1366, "step": 1420 }, { "epoch": 0.3251818714536853, "grad_norm": 0.4472559132416972, "learning_rate": 4.773117257152262e-05, "loss": 4.0427, "step": 1422 }, { "epoch": 0.32563922992267785, "grad_norm": 0.38862899083627944, "learning_rate": 4.772345668536397e-05, "loss": 4.0318, "step": 1424 }, { "epoch": 0.3260965883916704, "grad_norm": 0.44427759814581075, "learning_rate": 4.7715728327093744e-05, "loss": 3.9172, "step": 1426 }, { "epoch": 0.32655394686066286, "grad_norm": 0.5942131544958608, "learning_rate": 4.770798750095378e-05, "loss": 4.2652, "step": 1428 }, { "epoch": 0.3270113053296554, "grad_norm": 0.4815896458187602, "learning_rate": 4.770023421119274e-05, "loss": 4.1117, "step": 1430 }, { "epoch": 0.3274686637986479, "grad_norm": 0.49427385004353674, "learning_rate": 4.7692468462066126e-05, "loss": 3.9305, "step": 1432 }, { "epoch": 0.32792602226764045, "grad_norm": 0.49514018048651065, "learning_rate": 4.7684690257836294e-05, "loss": 4.0789, "step": 1434 }, { "epoch": 0.328383380736633, "grad_norm": 0.37001219294407617, "learning_rate": 4.767689960277244e-05, "loss": 4.1227, "step": 1436 }, { "epoch": 0.3288407392056255, "grad_norm": 0.3927806312968316, "learning_rate": 4.766909650115056e-05, "loss": 4.0818, "step": 1438 }, { "epoch": 0.32929809767461804, "grad_norm": 0.3798108461754218, "learning_rate": 4.766128095725352e-05, "loss": 3.9657, "step": 1440 }, { "epoch": 0.32975545614361057, "grad_norm": 0.4078557484812662, "learning_rate": 4.765345297537099e-05, "loss": 3.9253, "step": 1442 }, { "epoch": 0.3302128146126031, "grad_norm": 0.43355259399293555, "learning_rate": 4.7645612559799483e-05, "loss": 4.0125, "step": 1444 }, { "epoch": 0.33067017308159563, "grad_norm": 0.4268668797001289, "learning_rate": 4.763775971484234e-05, "loss": 4.0323, "step": 1446 }, { "epoch": 0.33112753155058816, "grad_norm": 0.4671776786460804, "learning_rate": 4.7629894444809675e-05, "loss": 3.991, "step": 1448 }, { "epoch": 0.33158489001958064, "grad_norm": 0.43904425355730137, "learning_rate": 4.7622016754018494e-05, "loss": 4.2519, "step": 1450 }, { "epoch": 0.33204224848857317, "grad_norm": 0.46291760072290566, "learning_rate": 4.7614126646792576e-05, "loss": 3.957, "step": 1452 }, { "epoch": 0.3324996069575657, "grad_norm": 0.46572996658919724, "learning_rate": 4.760622412746253e-05, "loss": 4.0936, "step": 1454 }, { "epoch": 0.3329569654265582, "grad_norm": 0.42598460529610666, "learning_rate": 4.7598309200365765e-05, "loss": 4.0031, "step": 1456 }, { "epoch": 0.33341432389555076, "grad_norm": 0.3832520277762222, "learning_rate": 4.759038186984651e-05, "loss": 3.9472, "step": 1458 }, { "epoch": 0.3338716823645433, "grad_norm": 0.4713828374949059, "learning_rate": 4.7582442140255803e-05, "loss": 4.1158, "step": 1460 }, { "epoch": 0.3343290408335358, "grad_norm": 0.404821414957125, "learning_rate": 4.757449001595149e-05, "loss": 4.2809, "step": 1462 }, { "epoch": 0.33478639930252835, "grad_norm": 0.41804530431986686, "learning_rate": 4.7566525501298196e-05, "loss": 4.2595, "step": 1464 }, { "epoch": 0.3352437577715209, "grad_norm": 0.4102225840379128, "learning_rate": 4.755854860066738e-05, "loss": 4.0861, "step": 1466 }, { "epoch": 0.3357011162405134, "grad_norm": 0.3593753317126526, "learning_rate": 4.755055931843728e-05, "loss": 3.9355, "step": 1468 }, { "epoch": 0.3361584747095059, "grad_norm": 0.4985227816817837, "learning_rate": 4.754255765899294e-05, "loss": 4.1351, "step": 1470 }, { "epoch": 0.3366158331784984, "grad_norm": 0.42717577546333335, "learning_rate": 4.753454362672617e-05, "loss": 4.0383, "step": 1472 }, { "epoch": 0.33707319164749094, "grad_norm": 0.46076616238599555, "learning_rate": 4.7526517226035614e-05, "loss": 4.197, "step": 1474 }, { "epoch": 0.3375305501164835, "grad_norm": 0.3978573371269621, "learning_rate": 4.7518478461326685e-05, "loss": 3.9806, "step": 1476 }, { "epoch": 0.337987908585476, "grad_norm": 0.4074937415081195, "learning_rate": 4.751042733701156e-05, "loss": 4.1627, "step": 1478 }, { "epoch": 0.33844526705446853, "grad_norm": 0.45834435825381664, "learning_rate": 4.7502363857509234e-05, "loss": 4.0019, "step": 1480 }, { "epoch": 0.33890262552346107, "grad_norm": 0.49774281813298704, "learning_rate": 4.749428802724547e-05, "loss": 3.9919, "step": 1482 }, { "epoch": 0.3393599839924536, "grad_norm": 0.5081899560604626, "learning_rate": 4.748619985065281e-05, "loss": 3.9679, "step": 1484 }, { "epoch": 0.3398173424614461, "grad_norm": 0.4489367553617666, "learning_rate": 4.7478099332170556e-05, "loss": 4.0647, "step": 1486 }, { "epoch": 0.34027470093043866, "grad_norm": 0.5204038820409277, "learning_rate": 4.746998647624482e-05, "loss": 4.1496, "step": 1488 }, { "epoch": 0.3407320593994312, "grad_norm": 0.47192559634142356, "learning_rate": 4.746186128732845e-05, "loss": 3.968, "step": 1490 }, { "epoch": 0.34118941786842366, "grad_norm": 0.36628919655340864, "learning_rate": 4.7453723769881086e-05, "loss": 4.2166, "step": 1492 }, { "epoch": 0.3416467763374162, "grad_norm": 0.38509600634394847, "learning_rate": 4.744557392836913e-05, "loss": 3.8184, "step": 1494 }, { "epoch": 0.3421041348064087, "grad_norm": 0.3155302708170363, "learning_rate": 4.743741176726574e-05, "loss": 4.0422, "step": 1496 }, { "epoch": 0.34256149327540125, "grad_norm": 0.5527003517058654, "learning_rate": 4.742923729105084e-05, "loss": 4.0399, "step": 1498 }, { "epoch": 0.3430188517443938, "grad_norm": 0.4993473800640869, "learning_rate": 4.7421050504211116e-05, "loss": 4.1263, "step": 1500 }, { "epoch": 0.3434762102133863, "grad_norm": 0.5668443761122108, "learning_rate": 4.7412851411240006e-05, "loss": 4.1092, "step": 1502 }, { "epoch": 0.34393356868237884, "grad_norm": 0.3767806335466956, "learning_rate": 4.740464001663771e-05, "loss": 3.9746, "step": 1504 }, { "epoch": 0.3443909271513714, "grad_norm": 0.4235134450789171, "learning_rate": 4.739641632491119e-05, "loss": 4.0832, "step": 1506 }, { "epoch": 0.3448482856203639, "grad_norm": 0.43047492962674977, "learning_rate": 4.7388180340574106e-05, "loss": 3.9095, "step": 1508 }, { "epoch": 0.34530564408935643, "grad_norm": 0.48213071183393325, "learning_rate": 4.7379932068146936e-05, "loss": 4.0219, "step": 1510 }, { "epoch": 0.3457630025583489, "grad_norm": 0.4265927621915085, "learning_rate": 4.737167151215686e-05, "loss": 3.9268, "step": 1512 }, { "epoch": 0.34622036102734144, "grad_norm": 0.498251495214574, "learning_rate": 4.73633986771378e-05, "loss": 3.8539, "step": 1514 }, { "epoch": 0.34667771949633397, "grad_norm": 0.4866671625171818, "learning_rate": 4.7355113567630435e-05, "loss": 4.2009, "step": 1516 }, { "epoch": 0.3471350779653265, "grad_norm": 0.4580394351673183, "learning_rate": 4.734681618818216e-05, "loss": 4.1868, "step": 1518 }, { "epoch": 0.34759243643431903, "grad_norm": 0.4344688039803089, "learning_rate": 4.733850654334714e-05, "loss": 4.1894, "step": 1520 }, { "epoch": 0.34804979490331156, "grad_norm": 0.45097842342080324, "learning_rate": 4.733018463768622e-05, "loss": 3.9728, "step": 1522 }, { "epoch": 0.3485071533723041, "grad_norm": 0.37173436735360915, "learning_rate": 4.7321850475767024e-05, "loss": 3.9774, "step": 1524 }, { "epoch": 0.3489645118412966, "grad_norm": 0.4990556113066851, "learning_rate": 4.7313504062163875e-05, "loss": 4.0971, "step": 1526 }, { "epoch": 0.34942187031028915, "grad_norm": 0.49833039834148046, "learning_rate": 4.730514540145783e-05, "loss": 3.9478, "step": 1528 }, { "epoch": 0.3498792287792817, "grad_norm": 0.5683006702006999, "learning_rate": 4.7296774498236665e-05, "loss": 4.0148, "step": 1530 }, { "epoch": 0.3503365872482742, "grad_norm": 0.5649201200360637, "learning_rate": 4.7288391357094875e-05, "loss": 3.9562, "step": 1532 }, { "epoch": 0.3507939457172667, "grad_norm": 0.35292985765192336, "learning_rate": 4.7279995982633673e-05, "loss": 3.9047, "step": 1534 }, { "epoch": 0.3512513041862592, "grad_norm": 0.40461863738258347, "learning_rate": 4.727158837946099e-05, "loss": 3.9586, "step": 1536 }, { "epoch": 0.35170866265525175, "grad_norm": 0.4089987088290309, "learning_rate": 4.726316855219147e-05, "loss": 3.9753, "step": 1538 }, { "epoch": 0.3521660211242443, "grad_norm": 0.4630507662966671, "learning_rate": 4.7254736505446445e-05, "loss": 3.9814, "step": 1540 }, { "epoch": 0.3526233795932368, "grad_norm": 0.38759556022725106, "learning_rate": 4.724629224385398e-05, "loss": 3.9623, "step": 1542 }, { "epoch": 0.35308073806222934, "grad_norm": 0.3985651784037873, "learning_rate": 4.723783577204885e-05, "loss": 3.8335, "step": 1544 }, { "epoch": 0.35353809653122187, "grad_norm": 0.4462357203285216, "learning_rate": 4.7229367094672495e-05, "loss": 4.0382, "step": 1546 }, { "epoch": 0.3539954550002144, "grad_norm": 0.4267524003093774, "learning_rate": 4.722088621637309e-05, "loss": 4.2873, "step": 1548 }, { "epoch": 0.35445281346920693, "grad_norm": 0.628690931529832, "learning_rate": 4.721239314180549e-05, "loss": 4.1078, "step": 1550 }, { "epoch": 0.35491017193819946, "grad_norm": 0.4312795794750195, "learning_rate": 4.7203887875631234e-05, "loss": 4.187, "step": 1552 }, { "epoch": 0.355367530407192, "grad_norm": 0.4331363464839913, "learning_rate": 4.7195370422518584e-05, "loss": 4.1172, "step": 1554 }, { "epoch": 0.35582488887618446, "grad_norm": 0.3868954957823588, "learning_rate": 4.718684078714246e-05, "loss": 3.9994, "step": 1556 }, { "epoch": 0.356282247345177, "grad_norm": 0.6746932074273162, "learning_rate": 4.717829897418449e-05, "loss": 3.9736, "step": 1558 }, { "epoch": 0.3567396058141695, "grad_norm": 0.4699310682353554, "learning_rate": 4.716974498833297e-05, "loss": 3.9426, "step": 1560 }, { "epoch": 0.35719696428316206, "grad_norm": 0.4107234484379063, "learning_rate": 4.716117883428289e-05, "loss": 3.9966, "step": 1562 }, { "epoch": 0.3576543227521546, "grad_norm": 0.41336281157301347, "learning_rate": 4.7152600516735905e-05, "loss": 4.165, "step": 1564 }, { "epoch": 0.3581116812211471, "grad_norm": 0.35144890963468955, "learning_rate": 4.714401004040036e-05, "loss": 4.0746, "step": 1566 }, { "epoch": 0.35856903969013965, "grad_norm": 0.4268934440258615, "learning_rate": 4.7135407409991263e-05, "loss": 3.8966, "step": 1568 }, { "epoch": 0.3590263981591322, "grad_norm": 0.42146005357035166, "learning_rate": 4.7126792630230306e-05, "loss": 4.0858, "step": 1570 }, { "epoch": 0.3594837566281247, "grad_norm": 0.49818847737607075, "learning_rate": 4.7118165705845826e-05, "loss": 3.9246, "step": 1572 }, { "epoch": 0.35994111509711724, "grad_norm": 0.3751383963153299, "learning_rate": 4.710952664157285e-05, "loss": 4.0943, "step": 1574 }, { "epoch": 0.3603984735661097, "grad_norm": 0.4363817842798091, "learning_rate": 4.710087544215306e-05, "loss": 3.9612, "step": 1576 }, { "epoch": 0.36085583203510224, "grad_norm": 0.5367372368040008, "learning_rate": 4.70922121123348e-05, "loss": 4.0938, "step": 1578 }, { "epoch": 0.36131319050409477, "grad_norm": 0.45509727994365384, "learning_rate": 4.7083536656873064e-05, "loss": 4.2776, "step": 1580 }, { "epoch": 0.3617705489730873, "grad_norm": 0.49122313224565145, "learning_rate": 4.7074849080529495e-05, "loss": 3.9622, "step": 1582 }, { "epoch": 0.36222790744207983, "grad_norm": 0.3588244118094515, "learning_rate": 4.7066149388072414e-05, "loss": 4.0319, "step": 1584 }, { "epoch": 0.36268526591107236, "grad_norm": 0.41312256779149525, "learning_rate": 4.7057437584276784e-05, "loss": 4.0382, "step": 1586 }, { "epoch": 0.3631426243800649, "grad_norm": 0.41356681447792953, "learning_rate": 4.704871367392419e-05, "loss": 4.0851, "step": 1588 }, { "epoch": 0.3635999828490574, "grad_norm": 0.4380589218755476, "learning_rate": 4.70399776618029e-05, "loss": 4.0573, "step": 1590 }, { "epoch": 0.36405734131804995, "grad_norm": 0.4135530144346039, "learning_rate": 4.70312295527078e-05, "loss": 4.0444, "step": 1592 }, { "epoch": 0.3645146997870425, "grad_norm": 0.3533636577919328, "learning_rate": 4.7022469351440415e-05, "loss": 3.873, "step": 1594 }, { "epoch": 0.364972058256035, "grad_norm": 0.44451551895598923, "learning_rate": 4.701369706280892e-05, "loss": 4.1236, "step": 1596 }, { "epoch": 0.3654294167250275, "grad_norm": 0.47805837466215106, "learning_rate": 4.7004912691628125e-05, "loss": 3.9104, "step": 1598 }, { "epoch": 0.36588677519402, "grad_norm": 0.39231206883748176, "learning_rate": 4.6996116242719445e-05, "loss": 4.0434, "step": 1600 }, { "epoch": 0.36634413366301255, "grad_norm": 0.3778638757777496, "learning_rate": 4.698730772091096e-05, "loss": 4.2107, "step": 1602 }, { "epoch": 0.3668014921320051, "grad_norm": 0.6012007628918247, "learning_rate": 4.6978487131037354e-05, "loss": 4.0072, "step": 1604 }, { "epoch": 0.3672588506009976, "grad_norm": 0.45063254086855337, "learning_rate": 4.696965447793993e-05, "loss": 4.0412, "step": 1606 }, { "epoch": 0.36771620906999014, "grad_norm": 0.4915790429941996, "learning_rate": 4.6960809766466654e-05, "loss": 3.951, "step": 1608 }, { "epoch": 0.36817356753898267, "grad_norm": 0.37523933561446604, "learning_rate": 4.695195300147204e-05, "loss": 4.1007, "step": 1610 }, { "epoch": 0.3686309260079752, "grad_norm": 0.44585452638445844, "learning_rate": 4.694308418781729e-05, "loss": 3.9259, "step": 1612 }, { "epoch": 0.36908828447696773, "grad_norm": 0.40460600529497776, "learning_rate": 4.693420333037016e-05, "loss": 4.0927, "step": 1614 }, { "epoch": 0.36954564294596026, "grad_norm": 0.4731666551895427, "learning_rate": 4.692531043400506e-05, "loss": 3.9236, "step": 1616 }, { "epoch": 0.37000300141495274, "grad_norm": 0.4839524918246229, "learning_rate": 4.691640550360299e-05, "loss": 4.2235, "step": 1618 }, { "epoch": 0.37046035988394527, "grad_norm": 0.3676587793641765, "learning_rate": 4.690748854405155e-05, "loss": 4.1651, "step": 1620 }, { "epoch": 0.3709177183529378, "grad_norm": 0.41929050265154444, "learning_rate": 4.689855956024494e-05, "loss": 4.2991, "step": 1622 }, { "epoch": 0.37137507682193033, "grad_norm": 0.3802024065714107, "learning_rate": 4.688961855708397e-05, "loss": 4.1835, "step": 1624 }, { "epoch": 0.37183243529092286, "grad_norm": 0.39146510384135175, "learning_rate": 4.6880665539476054e-05, "loss": 3.8873, "step": 1626 }, { "epoch": 0.3722897937599154, "grad_norm": 0.3369103362388764, "learning_rate": 4.687170051233519e-05, "loss": 4.1943, "step": 1628 }, { "epoch": 0.3727471522289079, "grad_norm": 0.3567789893598691, "learning_rate": 4.686272348058196e-05, "loss": 4.0621, "step": 1630 }, { "epoch": 0.37320451069790045, "grad_norm": 0.42504384992687133, "learning_rate": 4.685373444914355e-05, "loss": 4.113, "step": 1632 }, { "epoch": 0.373661869166893, "grad_norm": 0.34753206799759634, "learning_rate": 4.684473342295372e-05, "loss": 3.9632, "step": 1634 }, { "epoch": 0.3741192276358855, "grad_norm": 0.5331421142997365, "learning_rate": 4.683572040695282e-05, "loss": 3.9618, "step": 1636 }, { "epoch": 0.37457658610487804, "grad_norm": 0.4291448634667697, "learning_rate": 4.682669540608778e-05, "loss": 4.0793, "step": 1638 }, { "epoch": 0.3750339445738705, "grad_norm": 0.33892018236336696, "learning_rate": 4.6817658425312105e-05, "loss": 3.9704, "step": 1640 }, { "epoch": 0.37549130304286304, "grad_norm": 0.4299304448749609, "learning_rate": 4.680860946958589e-05, "loss": 3.9697, "step": 1642 }, { "epoch": 0.3759486615118556, "grad_norm": 0.41220785835434376, "learning_rate": 4.679954854387578e-05, "loss": 4.0702, "step": 1644 }, { "epoch": 0.3764060199808481, "grad_norm": 0.3896248219917681, "learning_rate": 4.6790475653155e-05, "loss": 3.9355, "step": 1646 }, { "epoch": 0.37686337844984064, "grad_norm": 0.3874748814040244, "learning_rate": 4.678139080240335e-05, "loss": 4.1275, "step": 1648 }, { "epoch": 0.37732073691883317, "grad_norm": 0.4490269458794642, "learning_rate": 4.6772293996607175e-05, "loss": 3.9608, "step": 1650 }, { "epoch": 0.3777780953878257, "grad_norm": 0.560418090913043, "learning_rate": 4.67631852407594e-05, "loss": 4.1016, "step": 1652 }, { "epoch": 0.3782354538568182, "grad_norm": 0.5647598541973884, "learning_rate": 4.675406453985951e-05, "loss": 3.9384, "step": 1654 }, { "epoch": 0.37869281232581076, "grad_norm": 0.39723414668648427, "learning_rate": 4.674493189891354e-05, "loss": 3.8871, "step": 1656 }, { "epoch": 0.3791501707948033, "grad_norm": 0.45399716725705036, "learning_rate": 4.6735787322934054e-05, "loss": 4.0854, "step": 1658 }, { "epoch": 0.37960752926379576, "grad_norm": 0.3800146859556072, "learning_rate": 4.672663081694022e-05, "loss": 4.2049, "step": 1660 }, { "epoch": 0.3800648877327883, "grad_norm": 0.4194503798584145, "learning_rate": 4.671746238595771e-05, "loss": 4.2073, "step": 1662 }, { "epoch": 0.3805222462017808, "grad_norm": 0.374163310139277, "learning_rate": 4.670828203501876e-05, "loss": 3.8838, "step": 1664 }, { "epoch": 0.38097960467077335, "grad_norm": 0.38454593641251755, "learning_rate": 4.669908976916214e-05, "loss": 3.834, "step": 1666 }, { "epoch": 0.3814369631397659, "grad_norm": 0.44702996788121047, "learning_rate": 4.668988559343316e-05, "loss": 4.1838, "step": 1668 }, { "epoch": 0.3818943216087584, "grad_norm": 0.38465163193057383, "learning_rate": 4.668066951288368e-05, "loss": 3.9484, "step": 1670 }, { "epoch": 0.38235168007775094, "grad_norm": 0.477814171927242, "learning_rate": 4.6671441532572075e-05, "loss": 4.0527, "step": 1672 }, { "epoch": 0.3828090385467435, "grad_norm": 1.0140405473257903, "learning_rate": 4.666220165756326e-05, "loss": 3.9863, "step": 1674 }, { "epoch": 0.383266397015736, "grad_norm": 0.3892510857122883, "learning_rate": 4.6652949892928705e-05, "loss": 3.8728, "step": 1676 }, { "epoch": 0.38372375548472853, "grad_norm": 0.471358654007882, "learning_rate": 4.664368624374635e-05, "loss": 3.9102, "step": 1678 }, { "epoch": 0.38418111395372107, "grad_norm": 0.4199184683728097, "learning_rate": 4.66344107151007e-05, "loss": 3.9832, "step": 1680 }, { "epoch": 0.38463847242271354, "grad_norm": 0.342343976983988, "learning_rate": 4.662512331208276e-05, "loss": 3.98, "step": 1682 }, { "epoch": 0.38509583089170607, "grad_norm": 0.32096534584059344, "learning_rate": 4.6615824039790085e-05, "loss": 3.9137, "step": 1684 }, { "epoch": 0.3855531893606986, "grad_norm": 0.4318762261391222, "learning_rate": 4.660651290332669e-05, "loss": 4.0542, "step": 1686 }, { "epoch": 0.38601054782969113, "grad_norm": 0.4082666010077212, "learning_rate": 4.659718990780316e-05, "loss": 3.957, "step": 1688 }, { "epoch": 0.38646790629868366, "grad_norm": 0.4251707407238887, "learning_rate": 4.658785505833655e-05, "loss": 4.1245, "step": 1690 }, { "epoch": 0.3869252647676762, "grad_norm": 0.4294691050989548, "learning_rate": 4.657850836005042e-05, "loss": 4.1152, "step": 1692 }, { "epoch": 0.3873826232366687, "grad_norm": 0.47499792261673257, "learning_rate": 4.6569149818074864e-05, "loss": 3.9565, "step": 1694 }, { "epoch": 0.38783998170566125, "grad_norm": 0.3787810915281079, "learning_rate": 4.6559779437546446e-05, "loss": 4.0442, "step": 1696 }, { "epoch": 0.3882973401746538, "grad_norm": 0.33487412242411313, "learning_rate": 4.6550397223608254e-05, "loss": 4.0127, "step": 1698 }, { "epoch": 0.3887546986436463, "grad_norm": 0.4432757462599212, "learning_rate": 4.654100318140985e-05, "loss": 3.9987, "step": 1700 }, { "epoch": 0.38921205711263884, "grad_norm": 0.4191375504778241, "learning_rate": 4.65315973161073e-05, "loss": 3.9297, "step": 1702 }, { "epoch": 0.3896694155816313, "grad_norm": 0.4053920710095493, "learning_rate": 4.652217963286314e-05, "loss": 4.1528, "step": 1704 }, { "epoch": 0.39012677405062385, "grad_norm": 0.45842193881344867, "learning_rate": 4.651275013684643e-05, "loss": 4.0861, "step": 1706 }, { "epoch": 0.3905841325196164, "grad_norm": 0.5046920858830295, "learning_rate": 4.6503308833232674e-05, "loss": 4.0968, "step": 1708 }, { "epoch": 0.3910414909886089, "grad_norm": 0.4069265271152959, "learning_rate": 4.649385572720388e-05, "loss": 3.8426, "step": 1710 }, { "epoch": 0.39149884945760144, "grad_norm": 0.40081792221196805, "learning_rate": 4.648439082394853e-05, "loss": 3.9988, "step": 1712 }, { "epoch": 0.39195620792659397, "grad_norm": 0.3873552805856637, "learning_rate": 4.647491412866157e-05, "loss": 3.8732, "step": 1714 }, { "epoch": 0.3924135663955865, "grad_norm": 0.4262167252280559, "learning_rate": 4.6465425646544435e-05, "loss": 3.9313, "step": 1716 }, { "epoch": 0.39287092486457903, "grad_norm": 0.4420885534754303, "learning_rate": 4.645592538280502e-05, "loss": 3.9823, "step": 1718 }, { "epoch": 0.39332828333357156, "grad_norm": 0.4944226511814841, "learning_rate": 4.644641334265769e-05, "loss": 3.9729, "step": 1720 }, { "epoch": 0.3937856418025641, "grad_norm": 0.4660688333120682, "learning_rate": 4.643688953132326e-05, "loss": 3.8095, "step": 1722 }, { "epoch": 0.39424300027155657, "grad_norm": 0.3782908367595077, "learning_rate": 4.642735395402904e-05, "loss": 3.9776, "step": 1724 }, { "epoch": 0.3947003587405491, "grad_norm": 0.42674990369508503, "learning_rate": 4.641780661600875e-05, "loss": 4.0203, "step": 1726 }, { "epoch": 0.3951577172095416, "grad_norm": 0.3958157506183125, "learning_rate": 4.64082475225026e-05, "loss": 4.0403, "step": 1728 }, { "epoch": 0.39561507567853416, "grad_norm": 0.5046943888474583, "learning_rate": 4.639867667875725e-05, "loss": 4.1197, "step": 1730 }, { "epoch": 0.3960724341475267, "grad_norm": 0.43266172158310634, "learning_rate": 4.638909409002579e-05, "loss": 4.1516, "step": 1732 }, { "epoch": 0.3965297926165192, "grad_norm": 0.5055839643106107, "learning_rate": 4.637949976156778e-05, "loss": 3.9843, "step": 1734 }, { "epoch": 0.39698715108551175, "grad_norm": 0.44646971070245056, "learning_rate": 4.63698936986492e-05, "loss": 3.9035, "step": 1736 }, { "epoch": 0.3974445095545043, "grad_norm": 0.47224305748165585, "learning_rate": 4.636027590654249e-05, "loss": 4.0246, "step": 1738 }, { "epoch": 0.3979018680234968, "grad_norm": 0.38661541184241466, "learning_rate": 4.635064639052652e-05, "loss": 3.9206, "step": 1740 }, { "epoch": 0.39835922649248934, "grad_norm": 0.48231305891288595, "learning_rate": 4.6341005155886584e-05, "loss": 3.8412, "step": 1742 }, { "epoch": 0.39881658496148187, "grad_norm": 0.5685232653726305, "learning_rate": 4.633135220791444e-05, "loss": 4.1812, "step": 1744 }, { "epoch": 0.39927394343047434, "grad_norm": 0.403868886374197, "learning_rate": 4.6321687551908235e-05, "loss": 4.0437, "step": 1746 }, { "epoch": 0.3997313018994669, "grad_norm": 0.3843184452785042, "learning_rate": 4.6312011193172575e-05, "loss": 3.9575, "step": 1748 }, { "epoch": 0.4001886603684594, "grad_norm": 0.44557366325573006, "learning_rate": 4.6302323137018464e-05, "loss": 3.9139, "step": 1750 }, { "epoch": 0.40064601883745193, "grad_norm": 0.5422000795603532, "learning_rate": 4.6292623388763336e-05, "loss": 4.054, "step": 1752 }, { "epoch": 0.40110337730644446, "grad_norm": 0.40205498860649996, "learning_rate": 4.628291195373106e-05, "loss": 4.0085, "step": 1754 }, { "epoch": 0.401560735775437, "grad_norm": 0.3795381958892446, "learning_rate": 4.627318883725189e-05, "loss": 3.9226, "step": 1756 }, { "epoch": 0.4020180942444295, "grad_norm": 0.369009085334022, "learning_rate": 4.6263454044662505e-05, "loss": 3.9397, "step": 1758 }, { "epoch": 0.40247545271342205, "grad_norm": 0.47338015709508835, "learning_rate": 4.625370758130599e-05, "loss": 3.9499, "step": 1760 }, { "epoch": 0.4029328111824146, "grad_norm": 0.4352354638478579, "learning_rate": 4.624394945253185e-05, "loss": 3.9139, "step": 1762 }, { "epoch": 0.4033901696514071, "grad_norm": 0.5451820600090206, "learning_rate": 4.623417966369598e-05, "loss": 3.9196, "step": 1764 }, { "epoch": 0.4038475281203996, "grad_norm": 0.4095364383338292, "learning_rate": 4.622439822016067e-05, "loss": 3.9336, "step": 1766 }, { "epoch": 0.4043048865893921, "grad_norm": 0.39573700468905393, "learning_rate": 4.621460512729461e-05, "loss": 3.9012, "step": 1768 }, { "epoch": 0.40476224505838465, "grad_norm": 0.2874437966558066, "learning_rate": 4.6204800390472894e-05, "loss": 3.9974, "step": 1770 }, { "epoch": 0.4052196035273772, "grad_norm": 0.35297393394807447, "learning_rate": 4.6194984015076994e-05, "loss": 3.9681, "step": 1772 }, { "epoch": 0.4056769619963697, "grad_norm": 0.4766497297656967, "learning_rate": 4.618515600649477e-05, "loss": 3.9512, "step": 1774 }, { "epoch": 0.40613432046536224, "grad_norm": 0.5175349379461585, "learning_rate": 4.617531637012048e-05, "loss": 4.0868, "step": 1776 }, { "epoch": 0.40659167893435477, "grad_norm": 0.3998154765929621, "learning_rate": 4.616546511135477e-05, "loss": 4.0423, "step": 1778 }, { "epoch": 0.4070490374033473, "grad_norm": 0.4530914063990631, "learning_rate": 4.615560223560462e-05, "loss": 3.9647, "step": 1780 }, { "epoch": 0.40750639587233983, "grad_norm": 0.4114348690569461, "learning_rate": 4.614572774828345e-05, "loss": 3.8344, "step": 1782 }, { "epoch": 0.40796375434133236, "grad_norm": 0.36922714188345995, "learning_rate": 4.613584165481101e-05, "loss": 3.865, "step": 1784 }, { "epoch": 0.4084211128103249, "grad_norm": 0.5333868415882274, "learning_rate": 4.6125943960613414e-05, "loss": 3.8595, "step": 1786 }, { "epoch": 0.40887847127931737, "grad_norm": 0.37606241806483315, "learning_rate": 4.611603467112319e-05, "loss": 3.8755, "step": 1788 }, { "epoch": 0.4093358297483099, "grad_norm": 0.45658805746913933, "learning_rate": 4.610611379177917e-05, "loss": 3.811, "step": 1790 }, { "epoch": 0.40979318821730243, "grad_norm": 0.3054322042823779, "learning_rate": 4.609618132802661e-05, "loss": 3.9584, "step": 1792 }, { "epoch": 0.41025054668629496, "grad_norm": 0.4731932025515925, "learning_rate": 4.6086237285317065e-05, "loss": 4.0509, "step": 1794 }, { "epoch": 0.4107079051552875, "grad_norm": 0.467249582943595, "learning_rate": 4.607628166910849e-05, "loss": 4.0159, "step": 1796 }, { "epoch": 0.41116526362428, "grad_norm": 0.3759256105453405, "learning_rate": 4.606631448486517e-05, "loss": 3.8217, "step": 1798 }, { "epoch": 0.41162262209327255, "grad_norm": 0.36692039433111395, "learning_rate": 4.6056335738057744e-05, "loss": 3.8677, "step": 1800 }, { "epoch": 0.4120799805622651, "grad_norm": 0.39493060908861183, "learning_rate": 4.604634543416319e-05, "loss": 4.0541, "step": 1802 }, { "epoch": 0.4125373390312576, "grad_norm": 0.36289454972328317, "learning_rate": 4.603634357866485e-05, "loss": 3.7394, "step": 1804 }, { "epoch": 0.41299469750025014, "grad_norm": 0.42728285277135847, "learning_rate": 4.602633017705239e-05, "loss": 3.7485, "step": 1806 }, { "epoch": 0.4134520559692426, "grad_norm": 0.3682001637534586, "learning_rate": 4.601630523482181e-05, "loss": 4.0231, "step": 1808 }, { "epoch": 0.41390941443823515, "grad_norm": 0.39746747960510626, "learning_rate": 4.600626875747545e-05, "loss": 3.9018, "step": 1810 }, { "epoch": 0.4143667729072277, "grad_norm": 0.4792833514431452, "learning_rate": 4.5996220750521986e-05, "loss": 4.1277, "step": 1812 }, { "epoch": 0.4148241313762202, "grad_norm": 0.40262864035589396, "learning_rate": 4.598616121947642e-05, "loss": 3.9282, "step": 1814 }, { "epoch": 0.41528148984521274, "grad_norm": 0.4910852866880256, "learning_rate": 4.597609016986008e-05, "loss": 4.0143, "step": 1816 }, { "epoch": 0.41573884831420527, "grad_norm": 0.31803287543696424, "learning_rate": 4.5966007607200595e-05, "loss": 4.0273, "step": 1818 }, { "epoch": 0.4161962067831978, "grad_norm": 0.42412908038428754, "learning_rate": 4.5955913537031954e-05, "loss": 3.961, "step": 1820 }, { "epoch": 0.4166535652521903, "grad_norm": 0.5356990817728126, "learning_rate": 4.5945807964894426e-05, "loss": 3.8224, "step": 1822 }, { "epoch": 0.41711092372118286, "grad_norm": 0.35966937820692857, "learning_rate": 4.5935690896334616e-05, "loss": 4.0504, "step": 1824 }, { "epoch": 0.4175682821901754, "grad_norm": 0.3315994708943086, "learning_rate": 4.592556233690541e-05, "loss": 4.148, "step": 1826 }, { "epoch": 0.4180256406591679, "grad_norm": 0.3783873317991214, "learning_rate": 4.5915422292166056e-05, "loss": 4.0195, "step": 1828 }, { "epoch": 0.4184829991281604, "grad_norm": 0.4576730216232918, "learning_rate": 4.5905270767682034e-05, "loss": 3.908, "step": 1830 }, { "epoch": 0.4189403575971529, "grad_norm": 0.34834877432627676, "learning_rate": 4.589510776902518e-05, "loss": 3.9809, "step": 1832 }, { "epoch": 0.41939771606614545, "grad_norm": 0.3744099465045935, "learning_rate": 4.58849333017736e-05, "loss": 4.0908, "step": 1834 }, { "epoch": 0.419855074535138, "grad_norm": 0.35242013166804437, "learning_rate": 4.587474737151172e-05, "loss": 3.8601, "step": 1836 }, { "epoch": 0.4203124330041305, "grad_norm": 0.359390320658643, "learning_rate": 4.586454998383023e-05, "loss": 3.8451, "step": 1838 }, { "epoch": 0.42076979147312304, "grad_norm": 0.4506666153409513, "learning_rate": 4.5854341144326104e-05, "loss": 3.9073, "step": 1840 }, { "epoch": 0.4212271499421156, "grad_norm": 0.3769323906606771, "learning_rate": 4.584412085860264e-05, "loss": 3.9798, "step": 1842 }, { "epoch": 0.4216845084111081, "grad_norm": 0.40400504669257165, "learning_rate": 4.583388913226939e-05, "loss": 4.1447, "step": 1844 }, { "epoch": 0.42214186688010064, "grad_norm": 0.4045554361668142, "learning_rate": 4.5823645970942195e-05, "loss": 4.0264, "step": 1846 }, { "epoch": 0.42259922534909317, "grad_norm": 0.3459795046411297, "learning_rate": 4.5813391380243166e-05, "loss": 4.0664, "step": 1848 }, { "epoch": 0.4230565838180857, "grad_norm": 0.38900052844798616, "learning_rate": 4.5803125365800676e-05, "loss": 3.9418, "step": 1850 }, { "epoch": 0.42351394228707817, "grad_norm": 0.4724130398842176, "learning_rate": 4.57928479332494e-05, "loss": 4.0484, "step": 1852 }, { "epoch": 0.4239713007560707, "grad_norm": 0.40284695684351945, "learning_rate": 4.578255908823025e-05, "loss": 4.0002, "step": 1854 }, { "epoch": 0.42442865922506323, "grad_norm": 0.40486646693754635, "learning_rate": 4.5772258836390426e-05, "loss": 4.0154, "step": 1856 }, { "epoch": 0.42488601769405576, "grad_norm": 0.39633416398145566, "learning_rate": 4.576194718338336e-05, "loss": 3.8284, "step": 1858 }, { "epoch": 0.4253433761630483, "grad_norm": 0.43038161307978656, "learning_rate": 4.575162413486876e-05, "loss": 3.8752, "step": 1860 }, { "epoch": 0.4258007346320408, "grad_norm": 0.4566864930441063, "learning_rate": 4.574128969651261e-05, "loss": 4.0951, "step": 1862 }, { "epoch": 0.42625809310103335, "grad_norm": 0.4819200508046788, "learning_rate": 4.57309438739871e-05, "loss": 3.9046, "step": 1864 }, { "epoch": 0.4267154515700259, "grad_norm": 0.559807425919425, "learning_rate": 4.57205866729707e-05, "loss": 3.8691, "step": 1866 }, { "epoch": 0.4271728100390184, "grad_norm": 0.4291931256066799, "learning_rate": 4.5710218099148104e-05, "loss": 3.8654, "step": 1868 }, { "epoch": 0.42763016850801094, "grad_norm": 0.3907558603439137, "learning_rate": 4.5699838158210274e-05, "loss": 3.8679, "step": 1870 }, { "epoch": 0.4280875269770034, "grad_norm": 0.5321495350384869, "learning_rate": 4.5689446855854385e-05, "loss": 4.0026, "step": 1872 }, { "epoch": 0.42854488544599595, "grad_norm": 0.3938250500438462, "learning_rate": 4.567904419778387e-05, "loss": 4.0676, "step": 1874 }, { "epoch": 0.4290022439149885, "grad_norm": 0.4333423538675181, "learning_rate": 4.566863018970838e-05, "loss": 3.8521, "step": 1876 }, { "epoch": 0.429459602383981, "grad_norm": 0.34585097217197064, "learning_rate": 4.56582048373438e-05, "loss": 3.8779, "step": 1878 }, { "epoch": 0.42991696085297354, "grad_norm": 0.34367221255588276, "learning_rate": 4.564776814641224e-05, "loss": 3.9652, "step": 1880 }, { "epoch": 0.43037431932196607, "grad_norm": 0.45819050738172834, "learning_rate": 4.5637320122642035e-05, "loss": 3.8865, "step": 1882 }, { "epoch": 0.4308316777909586, "grad_norm": 0.3387129594814874, "learning_rate": 4.5626860771767753e-05, "loss": 3.9618, "step": 1884 }, { "epoch": 0.43128903625995113, "grad_norm": 0.4914873380727833, "learning_rate": 4.561639009953015e-05, "loss": 4.0406, "step": 1886 }, { "epoch": 0.43174639472894366, "grad_norm": 0.40612616852497985, "learning_rate": 4.5605908111676224e-05, "loss": 3.9451, "step": 1888 }, { "epoch": 0.4322037531979362, "grad_norm": 0.5180137096932627, "learning_rate": 4.5595414813959156e-05, "loss": 3.9242, "step": 1890 }, { "epoch": 0.4326611116669287, "grad_norm": 0.38449811591938077, "learning_rate": 4.5584910212138363e-05, "loss": 3.9741, "step": 1892 }, { "epoch": 0.4331184701359212, "grad_norm": 0.3692800470484141, "learning_rate": 4.557439431197945e-05, "loss": 3.82, "step": 1894 }, { "epoch": 0.4335758286049137, "grad_norm": 0.39105391795999384, "learning_rate": 4.556386711925424e-05, "loss": 4.0034, "step": 1896 }, { "epoch": 0.43403318707390626, "grad_norm": 0.3451188640629319, "learning_rate": 4.555332863974072e-05, "loss": 3.9881, "step": 1898 }, { "epoch": 0.4344905455428988, "grad_norm": 0.5258713122202964, "learning_rate": 4.5542778879223106e-05, "loss": 4.0113, "step": 1900 }, { "epoch": 0.4349479040118913, "grad_norm": 0.41611493854958115, "learning_rate": 4.55322178434918e-05, "loss": 4.0349, "step": 1902 }, { "epoch": 0.43540526248088385, "grad_norm": 0.48176319700495374, "learning_rate": 4.552164553834336e-05, "loss": 3.9975, "step": 1904 }, { "epoch": 0.4358626209498764, "grad_norm": 0.6621864971605154, "learning_rate": 4.551106196958058e-05, "loss": 3.9536, "step": 1906 }, { "epoch": 0.4363199794188689, "grad_norm": 0.3823140460886242, "learning_rate": 4.5500467143012405e-05, "loss": 3.8961, "step": 1908 }, { "epoch": 0.43677733788786144, "grad_norm": 0.42754143670243233, "learning_rate": 4.5489861064453955e-05, "loss": 3.9424, "step": 1910 }, { "epoch": 0.43723469635685397, "grad_norm": 0.38799180385201243, "learning_rate": 4.547924373972655e-05, "loss": 4.0322, "step": 1912 }, { "epoch": 0.43769205482584644, "grad_norm": 0.4121934524754162, "learning_rate": 4.546861517465766e-05, "loss": 3.9598, "step": 1914 }, { "epoch": 0.438149413294839, "grad_norm": 0.4703642705767704, "learning_rate": 4.545797537508093e-05, "loss": 4.0665, "step": 1916 }, { "epoch": 0.4386067717638315, "grad_norm": 0.3887644710177913, "learning_rate": 4.544732434683619e-05, "loss": 3.982, "step": 1918 }, { "epoch": 0.43906413023282403, "grad_norm": 0.39397737129577887, "learning_rate": 4.54366620957694e-05, "loss": 3.8346, "step": 1920 }, { "epoch": 0.43952148870181657, "grad_norm": 0.4024972345126852, "learning_rate": 4.5425988627732704e-05, "loss": 3.8783, "step": 1922 }, { "epoch": 0.4399788471708091, "grad_norm": 0.37819110592012084, "learning_rate": 4.54153039485844e-05, "loss": 3.8102, "step": 1924 }, { "epoch": 0.4404362056398016, "grad_norm": 0.4610882043973611, "learning_rate": 4.540460806418893e-05, "loss": 3.9688, "step": 1926 }, { "epoch": 0.44089356410879416, "grad_norm": 0.4041750258961, "learning_rate": 4.539390098041688e-05, "loss": 4.0251, "step": 1928 }, { "epoch": 0.4413509225777867, "grad_norm": 0.4653021051284635, "learning_rate": 4.538318270314502e-05, "loss": 3.9194, "step": 1930 }, { "epoch": 0.4418082810467792, "grad_norm": 0.45804159857248206, "learning_rate": 4.537245323825622e-05, "loss": 3.9133, "step": 1932 }, { "epoch": 0.44226563951577175, "grad_norm": 0.39870221563206304, "learning_rate": 4.536171259163951e-05, "loss": 3.9261, "step": 1934 }, { "epoch": 0.4427229979847642, "grad_norm": 0.4627601504034401, "learning_rate": 4.535096076919007e-05, "loss": 3.9784, "step": 1936 }, { "epoch": 0.44318035645375675, "grad_norm": 0.5108828232466974, "learning_rate": 4.534019777680918e-05, "loss": 4.0444, "step": 1938 }, { "epoch": 0.4436377149227493, "grad_norm": 0.3562846618654936, "learning_rate": 4.532942362040428e-05, "loss": 3.7625, "step": 1940 }, { "epoch": 0.4440950733917418, "grad_norm": 0.4061191604901255, "learning_rate": 4.531863830588893e-05, "loss": 3.8106, "step": 1942 }, { "epoch": 0.44455243186073434, "grad_norm": 0.4307787130597295, "learning_rate": 4.5307841839182794e-05, "loss": 4.1319, "step": 1944 }, { "epoch": 0.4450097903297269, "grad_norm": 0.4611725127254357, "learning_rate": 4.529703422621171e-05, "loss": 4.1878, "step": 1946 }, { "epoch": 0.4454671487987194, "grad_norm": 0.4126453981485674, "learning_rate": 4.528621547290757e-05, "loss": 3.7447, "step": 1948 }, { "epoch": 0.44592450726771193, "grad_norm": 0.48914501846843284, "learning_rate": 4.5275385585208425e-05, "loss": 3.9629, "step": 1950 }, { "epoch": 0.44638186573670446, "grad_norm": 0.47137525065214847, "learning_rate": 4.526454456905841e-05, "loss": 3.8819, "step": 1952 }, { "epoch": 0.446839224205697, "grad_norm": 0.4701587964461521, "learning_rate": 4.525369243040779e-05, "loss": 3.9527, "step": 1954 }, { "epoch": 0.44729658267468947, "grad_norm": 0.3613877324606178, "learning_rate": 4.5242829175212925e-05, "loss": 3.8921, "step": 1956 }, { "epoch": 0.447753941143682, "grad_norm": 0.40163342261483703, "learning_rate": 4.523195480943627e-05, "loss": 3.9719, "step": 1958 }, { "epoch": 0.44821129961267453, "grad_norm": 0.3959714832490052, "learning_rate": 4.522106933904639e-05, "loss": 3.7399, "step": 1960 }, { "epoch": 0.44866865808166706, "grad_norm": 0.4114035759073606, "learning_rate": 4.521017277001793e-05, "loss": 3.8936, "step": 1962 }, { "epoch": 0.4491260165506596, "grad_norm": 0.38988997924932217, "learning_rate": 4.519926510833165e-05, "loss": 3.9242, "step": 1964 }, { "epoch": 0.4495833750196521, "grad_norm": 0.38976173379056356, "learning_rate": 4.5188346359974374e-05, "loss": 3.9289, "step": 1966 }, { "epoch": 0.45004073348864465, "grad_norm": 0.38698229816277013, "learning_rate": 4.517741653093903e-05, "loss": 3.9033, "step": 1968 }, { "epoch": 0.4504980919576372, "grad_norm": 0.4621759361899341, "learning_rate": 4.516647562722461e-05, "loss": 3.9836, "step": 1970 }, { "epoch": 0.4509554504266297, "grad_norm": 0.44323843092001813, "learning_rate": 4.51555236548362e-05, "loss": 3.9898, "step": 1972 }, { "epoch": 0.45141280889562224, "grad_norm": 0.3532696917000019, "learning_rate": 4.514456061978495e-05, "loss": 3.9848, "step": 1974 }, { "epoch": 0.45187016736461477, "grad_norm": 0.4490179193463773, "learning_rate": 4.513358652808809e-05, "loss": 3.9723, "step": 1976 }, { "epoch": 0.45232752583360725, "grad_norm": 0.42243364222120006, "learning_rate": 4.5122601385768915e-05, "loss": 3.9745, "step": 1978 }, { "epoch": 0.4527848843025998, "grad_norm": 0.47261146459339176, "learning_rate": 4.511160519885679e-05, "loss": 3.949, "step": 1980 }, { "epoch": 0.4532422427715923, "grad_norm": 0.3762748624818492, "learning_rate": 4.510059797338714e-05, "loss": 3.7482, "step": 1982 }, { "epoch": 0.45369960124058484, "grad_norm": 0.3938556589930499, "learning_rate": 4.508957971540143e-05, "loss": 3.7438, "step": 1984 }, { "epoch": 0.45415695970957737, "grad_norm": 0.4790501470148904, "learning_rate": 4.507855043094721e-05, "loss": 3.9648, "step": 1986 }, { "epoch": 0.4546143181785699, "grad_norm": 0.45464332011987696, "learning_rate": 4.506751012607807e-05, "loss": 3.8803, "step": 1988 }, { "epoch": 0.45507167664756243, "grad_norm": 0.3946597390518656, "learning_rate": 4.505645880685364e-05, "loss": 3.8056, "step": 1990 }, { "epoch": 0.45552903511655496, "grad_norm": 0.4604020969392829, "learning_rate": 4.504539647933962e-05, "loss": 3.8282, "step": 1992 }, { "epoch": 0.4559863935855475, "grad_norm": 0.4238111782641236, "learning_rate": 4.503432314960771e-05, "loss": 3.9465, "step": 1994 }, { "epoch": 0.45644375205454, "grad_norm": 0.6612854053198568, "learning_rate": 4.502323882373569e-05, "loss": 3.8365, "step": 1996 }, { "epoch": 0.45690111052353255, "grad_norm": 0.4408297360705524, "learning_rate": 4.501214350780736e-05, "loss": 3.9276, "step": 1998 }, { "epoch": 0.457358468992525, "grad_norm": 0.5285881336713264, "learning_rate": 4.5001037207912544e-05, "loss": 3.9909, "step": 2000 }, { "epoch": 0.45781582746151755, "grad_norm": 0.44303377942363287, "learning_rate": 4.49899199301471e-05, "loss": 3.9262, "step": 2002 }, { "epoch": 0.4582731859305101, "grad_norm": 0.3602363381531898, "learning_rate": 4.497879168061293e-05, "loss": 3.9458, "step": 2004 }, { "epoch": 0.4587305443995026, "grad_norm": 0.5518402578147364, "learning_rate": 4.496765246541792e-05, "loss": 3.9244, "step": 2006 }, { "epoch": 0.45918790286849515, "grad_norm": 0.4054209796791101, "learning_rate": 4.4956502290676006e-05, "loss": 4.1551, "step": 2008 }, { "epoch": 0.4596452613374877, "grad_norm": 0.3984005854840745, "learning_rate": 4.494534116250714e-05, "loss": 4.0444, "step": 2010 }, { "epoch": 0.4601026198064802, "grad_norm": 0.3796006358432771, "learning_rate": 4.4934169087037246e-05, "loss": 3.8656, "step": 2012 }, { "epoch": 0.46055997827547274, "grad_norm": 0.49514114352023986, "learning_rate": 4.492298607039832e-05, "loss": 3.9668, "step": 2014 }, { "epoch": 0.46101733674446527, "grad_norm": 0.4677568564134187, "learning_rate": 4.4911792118728314e-05, "loss": 3.8581, "step": 2016 }, { "epoch": 0.4614746952134578, "grad_norm": 0.3979909863230056, "learning_rate": 4.490058723817119e-05, "loss": 3.8497, "step": 2018 }, { "epoch": 0.46193205368245027, "grad_norm": 0.3816322310119046, "learning_rate": 4.4889371434876925e-05, "loss": 4.1003, "step": 2020 }, { "epoch": 0.4623894121514428, "grad_norm": 0.4763059394503744, "learning_rate": 4.4878144715001483e-05, "loss": 3.9048, "step": 2022 }, { "epoch": 0.46284677062043533, "grad_norm": 0.439943099786037, "learning_rate": 4.486690708470681e-05, "loss": 4.0054, "step": 2024 }, { "epoch": 0.46330412908942786, "grad_norm": 0.39506060901847695, "learning_rate": 4.485565855016086e-05, "loss": 3.8257, "step": 2026 }, { "epoch": 0.4637614875584204, "grad_norm": 0.507164380151235, "learning_rate": 4.484439911753755e-05, "loss": 3.8116, "step": 2028 }, { "epoch": 0.4642188460274129, "grad_norm": 0.3517928004934264, "learning_rate": 4.483312879301679e-05, "loss": 4.1032, "step": 2030 }, { "epoch": 0.46467620449640545, "grad_norm": 0.4868724641501284, "learning_rate": 4.482184758278447e-05, "loss": 4.0363, "step": 2032 }, { "epoch": 0.465133562965398, "grad_norm": 0.3704320529507017, "learning_rate": 4.481055549303246e-05, "loss": 3.8924, "step": 2034 }, { "epoch": 0.4655909214343905, "grad_norm": 0.4337222965528921, "learning_rate": 4.479925252995858e-05, "loss": 4.022, "step": 2036 }, { "epoch": 0.46604827990338304, "grad_norm": 0.45959425643755003, "learning_rate": 4.478793869976664e-05, "loss": 3.9694, "step": 2038 }, { "epoch": 0.4665056383723756, "grad_norm": 0.379687634220806, "learning_rate": 4.47766140086664e-05, "loss": 3.7455, "step": 2040 }, { "epoch": 0.46696299684136805, "grad_norm": 0.3643564085691381, "learning_rate": 4.476527846287359e-05, "loss": 3.9781, "step": 2042 }, { "epoch": 0.4674203553103606, "grad_norm": 0.37331801941165293, "learning_rate": 4.4753932068609905e-05, "loss": 3.9788, "step": 2044 }, { "epoch": 0.4678777137793531, "grad_norm": 0.4145000233483394, "learning_rate": 4.474257483210298e-05, "loss": 4.0959, "step": 2046 }, { "epoch": 0.46833507224834564, "grad_norm": 0.4811065906026452, "learning_rate": 4.473120675958638e-05, "loss": 3.8895, "step": 2048 }, { "epoch": 0.46879243071733817, "grad_norm": 0.36178001807705673, "learning_rate": 4.471982785729969e-05, "loss": 3.8536, "step": 2050 }, { "epoch": 0.4692497891863307, "grad_norm": 0.45214611407150573, "learning_rate": 4.470843813148835e-05, "loss": 3.9636, "step": 2052 }, { "epoch": 0.46970714765532323, "grad_norm": 0.354344949541451, "learning_rate": 4.469703758840381e-05, "loss": 4.0852, "step": 2054 }, { "epoch": 0.47016450612431576, "grad_norm": 0.4136868698385038, "learning_rate": 4.468562623430341e-05, "loss": 3.9995, "step": 2056 }, { "epoch": 0.4706218645933083, "grad_norm": 0.40462373798257223, "learning_rate": 4.4674204075450453e-05, "loss": 4.0681, "step": 2058 }, { "epoch": 0.4710792230623008, "grad_norm": 0.3554823107805154, "learning_rate": 4.466277111811416e-05, "loss": 3.9651, "step": 2060 }, { "epoch": 0.4715365815312933, "grad_norm": 0.3565772243605559, "learning_rate": 4.465132736856969e-05, "loss": 3.84, "step": 2062 }, { "epoch": 0.4719939400002858, "grad_norm": 0.4766461033264771, "learning_rate": 4.4639872833098106e-05, "loss": 4.1294, "step": 2064 }, { "epoch": 0.47245129846927836, "grad_norm": 0.39922723854431563, "learning_rate": 4.462840751798642e-05, "loss": 4.0072, "step": 2066 }, { "epoch": 0.4729086569382709, "grad_norm": 0.37593880996412454, "learning_rate": 4.4616931429527515e-05, "loss": 4.0839, "step": 2068 }, { "epoch": 0.4733660154072634, "grad_norm": 0.5716272901601355, "learning_rate": 4.460544457402024e-05, "loss": 3.9974, "step": 2070 }, { "epoch": 0.47382337387625595, "grad_norm": 0.37400869634278955, "learning_rate": 4.459394695776932e-05, "loss": 3.8849, "step": 2072 }, { "epoch": 0.4742807323452485, "grad_norm": 0.43400549026062174, "learning_rate": 4.4582438587085386e-05, "loss": 4.0874, "step": 2074 }, { "epoch": 0.474738090814241, "grad_norm": 0.4370005174618501, "learning_rate": 4.4570919468285e-05, "loss": 4.0194, "step": 2076 }, { "epoch": 0.47519544928323354, "grad_norm": 0.49897876397700974, "learning_rate": 4.455938960769058e-05, "loss": 4.1444, "step": 2078 }, { "epoch": 0.47565280775222607, "grad_norm": 0.4573945614733294, "learning_rate": 4.454784901163049e-05, "loss": 3.8808, "step": 2080 }, { "epoch": 0.4761101662212186, "grad_norm": 0.4686632711921496, "learning_rate": 4.4536297686438944e-05, "loss": 3.9526, "step": 2082 }, { "epoch": 0.4765675246902111, "grad_norm": 0.4715631471329907, "learning_rate": 4.452473563845606e-05, "loss": 3.8911, "step": 2084 }, { "epoch": 0.4770248831592036, "grad_norm": 0.36521493380613956, "learning_rate": 4.451316287402785e-05, "loss": 3.9481, "step": 2086 }, { "epoch": 0.47748224162819614, "grad_norm": 0.40592993452438725, "learning_rate": 4.450157939950619e-05, "loss": 3.9903, "step": 2088 }, { "epoch": 0.47793960009718867, "grad_norm": 0.3593604458059372, "learning_rate": 4.4489985221248856e-05, "loss": 3.8986, "step": 2090 }, { "epoch": 0.4783969585661812, "grad_norm": 0.41183514985453623, "learning_rate": 4.447838034561948e-05, "loss": 4.0246, "step": 2092 }, { "epoch": 0.4788543170351737, "grad_norm": 0.3076751896504972, "learning_rate": 4.4466764778987566e-05, "loss": 3.8906, "step": 2094 }, { "epoch": 0.47931167550416626, "grad_norm": 0.42919939234255944, "learning_rate": 4.4455138527728514e-05, "loss": 3.8234, "step": 2096 }, { "epoch": 0.4797690339731588, "grad_norm": 0.44324690278633805, "learning_rate": 4.444350159822355e-05, "loss": 3.9136, "step": 2098 }, { "epoch": 0.4802263924421513, "grad_norm": 0.39823403492768333, "learning_rate": 4.443185399685978e-05, "loss": 3.8478, "step": 2100 }, { "epoch": 0.48068375091114385, "grad_norm": 0.40021114513528777, "learning_rate": 4.442019573003018e-05, "loss": 4.0368, "step": 2102 }, { "epoch": 0.4811411093801363, "grad_norm": 0.42314152103553315, "learning_rate": 4.4408526804133545e-05, "loss": 3.8783, "step": 2104 }, { "epoch": 0.48159846784912885, "grad_norm": 0.4503507201092336, "learning_rate": 4.439684722557455e-05, "loss": 4.1049, "step": 2106 }, { "epoch": 0.4820558263181214, "grad_norm": 0.3837815689248353, "learning_rate": 4.43851570007637e-05, "loss": 3.7798, "step": 2108 }, { "epoch": 0.4825131847871139, "grad_norm": 0.9687794096389566, "learning_rate": 4.437345613611737e-05, "loss": 3.8696, "step": 2110 }, { "epoch": 0.48297054325610644, "grad_norm": 0.3746323770376514, "learning_rate": 4.436174463805774e-05, "loss": 4.0475, "step": 2112 }, { "epoch": 0.483427901725099, "grad_norm": 0.5486121516345279, "learning_rate": 4.4350022513012844e-05, "loss": 3.7814, "step": 2114 }, { "epoch": 0.4838852601940915, "grad_norm": 0.36737948329457454, "learning_rate": 4.433828976741655e-05, "loss": 3.8002, "step": 2116 }, { "epoch": 0.48434261866308403, "grad_norm": 0.38061822935324424, "learning_rate": 4.432654640770856e-05, "loss": 3.9853, "step": 2118 }, { "epoch": 0.48479997713207656, "grad_norm": 0.33934680735560374, "learning_rate": 4.4314792440334377e-05, "loss": 3.8662, "step": 2120 }, { "epoch": 0.4852573356010691, "grad_norm": 0.36106987049305106, "learning_rate": 4.430302787174535e-05, "loss": 3.9217, "step": 2122 }, { "epoch": 0.4857146940700616, "grad_norm": 0.4352496035035599, "learning_rate": 4.429125270839864e-05, "loss": 3.8212, "step": 2124 }, { "epoch": 0.4861720525390541, "grad_norm": 0.46416736728003527, "learning_rate": 4.427946695675723e-05, "loss": 3.8954, "step": 2126 }, { "epoch": 0.48662941100804663, "grad_norm": 0.47514093717458, "learning_rate": 4.426767062328989e-05, "loss": 3.8747, "step": 2128 }, { "epoch": 0.48708676947703916, "grad_norm": 0.3628134843614156, "learning_rate": 4.4255863714471244e-05, "loss": 4.0775, "step": 2130 }, { "epoch": 0.4875441279460317, "grad_norm": 0.49735689426877355, "learning_rate": 4.424404623678167e-05, "loss": 4.0369, "step": 2132 }, { "epoch": 0.4880014864150242, "grad_norm": 0.46987335385833134, "learning_rate": 4.423221819670736e-05, "loss": 3.8745, "step": 2134 }, { "epoch": 0.48845884488401675, "grad_norm": 0.4067948245968839, "learning_rate": 4.4220379600740334e-05, "loss": 4.0434, "step": 2136 }, { "epoch": 0.4889162033530093, "grad_norm": 0.3895674504223055, "learning_rate": 4.4208530455378374e-05, "loss": 3.8947, "step": 2138 }, { "epoch": 0.4893735618220018, "grad_norm": 0.39776200647533, "learning_rate": 4.4196670767125066e-05, "loss": 3.8309, "step": 2140 }, { "epoch": 0.48983092029099434, "grad_norm": 0.432727291657325, "learning_rate": 4.418480054248977e-05, "loss": 4.0227, "step": 2142 }, { "epoch": 0.4902882787599869, "grad_norm": 0.5084168872277336, "learning_rate": 4.4172919787987646e-05, "loss": 3.9885, "step": 2144 }, { "epoch": 0.4907456372289794, "grad_norm": 0.4718670838052002, "learning_rate": 4.416102851013962e-05, "loss": 3.79, "step": 2146 }, { "epoch": 0.4912029956979719, "grad_norm": 0.32863631599941073, "learning_rate": 4.4149126715472405e-05, "loss": 4.0808, "step": 2148 }, { "epoch": 0.4916603541669644, "grad_norm": 0.4174034181398685, "learning_rate": 4.413721441051848e-05, "loss": 3.8063, "step": 2150 }, { "epoch": 0.49211771263595694, "grad_norm": 0.46865721420043405, "learning_rate": 4.4125291601816073e-05, "loss": 3.9948, "step": 2152 }, { "epoch": 0.49257507110494947, "grad_norm": 0.41676798025863065, "learning_rate": 4.4113358295909234e-05, "loss": 3.9384, "step": 2154 }, { "epoch": 0.493032429573942, "grad_norm": 0.5433654159550148, "learning_rate": 4.4101414499347704e-05, "loss": 4.0714, "step": 2156 }, { "epoch": 0.49348978804293453, "grad_norm": 0.39243556988115874, "learning_rate": 4.408946021868704e-05, "loss": 3.9471, "step": 2158 }, { "epoch": 0.49394714651192706, "grad_norm": 0.45909396547386844, "learning_rate": 4.407749546048851e-05, "loss": 3.9231, "step": 2160 }, { "epoch": 0.4944045049809196, "grad_norm": 0.44646061577912927, "learning_rate": 4.4065520231319156e-05, "loss": 3.8968, "step": 2162 }, { "epoch": 0.4948618634499121, "grad_norm": 0.48916902326550177, "learning_rate": 4.4053534537751776e-05, "loss": 3.9293, "step": 2164 }, { "epoch": 0.49531922191890465, "grad_norm": 0.399930773645546, "learning_rate": 4.404153838636489e-05, "loss": 4.0176, "step": 2166 }, { "epoch": 0.4957765803878971, "grad_norm": 0.44233373239183477, "learning_rate": 4.402953178374276e-05, "loss": 4.1561, "step": 2168 }, { "epoch": 0.49623393885688966, "grad_norm": 0.6064024586056512, "learning_rate": 4.4017514736475396e-05, "loss": 3.9597, "step": 2170 }, { "epoch": 0.4966912973258822, "grad_norm": 0.395581481100878, "learning_rate": 4.4005487251158545e-05, "loss": 4.0772, "step": 2172 }, { "epoch": 0.4971486557948747, "grad_norm": 0.34941357718658433, "learning_rate": 4.399344933439366e-05, "loss": 3.9374, "step": 2174 }, { "epoch": 0.49760601426386725, "grad_norm": 0.42441302254168023, "learning_rate": 4.3981400992787936e-05, "loss": 3.9454, "step": 2176 }, { "epoch": 0.4980633727328598, "grad_norm": 0.5012103570686948, "learning_rate": 4.396934223295429e-05, "loss": 4.0481, "step": 2178 }, { "epoch": 0.4985207312018523, "grad_norm": 0.4263264726184198, "learning_rate": 4.395727306151135e-05, "loss": 3.7812, "step": 2180 }, { "epoch": 0.49897808967084484, "grad_norm": 0.37482798127768696, "learning_rate": 4.394519348508347e-05, "loss": 4.1065, "step": 2182 }, { "epoch": 0.49943544813983737, "grad_norm": 0.39308217650817406, "learning_rate": 4.39331035103007e-05, "loss": 3.8929, "step": 2184 }, { "epoch": 0.4998928066088299, "grad_norm": 0.32417079846699876, "learning_rate": 4.392100314379881e-05, "loss": 3.9866, "step": 2186 }, { "epoch": 0.5003501650778224, "grad_norm": 0.5510726590686273, "learning_rate": 4.390889239221926e-05, "loss": 3.9482, "step": 2188 }, { "epoch": 0.5008075235468149, "grad_norm": 0.44633981094806924, "learning_rate": 4.3896771262209224e-05, "loss": 4.0627, "step": 2190 }, { "epoch": 0.5012648820158074, "grad_norm": 0.3513838738067671, "learning_rate": 4.3884639760421575e-05, "loss": 3.9076, "step": 2192 }, { "epoch": 0.5017222404848, "grad_norm": 0.47948172998775634, "learning_rate": 4.387249789351485e-05, "loss": 3.8992, "step": 2194 }, { "epoch": 0.5021795989537925, "grad_norm": 0.34067270968476265, "learning_rate": 4.3860345668153325e-05, "loss": 3.9894, "step": 2196 }, { "epoch": 0.502636957422785, "grad_norm": 0.34095593134275504, "learning_rate": 4.38481830910069e-05, "loss": 4.0158, "step": 2198 }, { "epoch": 0.5030943158917776, "grad_norm": 0.42484844464720745, "learning_rate": 4.3836010168751215e-05, "loss": 3.949, "step": 2200 }, { "epoch": 0.5035516743607701, "grad_norm": 0.38482437072605313, "learning_rate": 4.382382690806754e-05, "loss": 3.8651, "step": 2202 }, { "epoch": 0.5040090328297626, "grad_norm": 0.35688155143189143, "learning_rate": 4.3811633315642866e-05, "loss": 3.8276, "step": 2204 }, { "epoch": 0.5044663912987551, "grad_norm": 0.431273481172038, "learning_rate": 4.379942939816981e-05, "loss": 3.9725, "step": 2206 }, { "epoch": 0.5049237497677477, "grad_norm": 0.34062045776768385, "learning_rate": 4.378721516234667e-05, "loss": 4.0961, "step": 2208 }, { "epoch": 0.5053811082367402, "grad_norm": 0.4155909918732224, "learning_rate": 4.3774990614877445e-05, "loss": 3.9832, "step": 2210 }, { "epoch": 0.5058384667057327, "grad_norm": 0.47413878795791486, "learning_rate": 4.376275576247173e-05, "loss": 3.9745, "step": 2212 }, { "epoch": 0.5062958251747253, "grad_norm": 0.4420350246598642, "learning_rate": 4.3750510611844825e-05, "loss": 3.9971, "step": 2214 }, { "epoch": 0.5067531836437178, "grad_norm": 0.4425910139774328, "learning_rate": 4.373825516971766e-05, "loss": 3.9747, "step": 2216 }, { "epoch": 0.5072105421127103, "grad_norm": 0.461841956674269, "learning_rate": 4.3725989442816815e-05, "loss": 3.7541, "step": 2218 }, { "epoch": 0.5076679005817029, "grad_norm": 0.5213652914145236, "learning_rate": 4.371371343787454e-05, "loss": 4.0536, "step": 2220 }, { "epoch": 0.5081252590506953, "grad_norm": 0.35709996136141964, "learning_rate": 4.3701427161628674e-05, "loss": 3.8176, "step": 2222 }, { "epoch": 0.5085826175196878, "grad_norm": 0.3952373003394258, "learning_rate": 4.368913062082274e-05, "loss": 4.1018, "step": 2224 }, { "epoch": 0.5090399759886803, "grad_norm": 0.4449871279965765, "learning_rate": 4.367682382220588e-05, "loss": 3.9783, "step": 2226 }, { "epoch": 0.5094973344576729, "grad_norm": 0.41665052939001046, "learning_rate": 4.366450677253287e-05, "loss": 3.9598, "step": 2228 }, { "epoch": 0.5099546929266654, "grad_norm": 0.39021735855896805, "learning_rate": 4.3652179478564106e-05, "loss": 3.813, "step": 2230 }, { "epoch": 0.5104120513956579, "grad_norm": 0.448970593449532, "learning_rate": 4.36398419470656e-05, "loss": 3.875, "step": 2232 }, { "epoch": 0.5108694098646505, "grad_norm": 0.3340261221331573, "learning_rate": 4.3627494184809e-05, "loss": 4.1339, "step": 2234 }, { "epoch": 0.511326768333643, "grad_norm": 0.48987575347367435, "learning_rate": 4.3615136198571574e-05, "loss": 3.9134, "step": 2236 }, { "epoch": 0.5117841268026355, "grad_norm": 0.5360267131166191, "learning_rate": 4.360276799513616e-05, "loss": 3.933, "step": 2238 }, { "epoch": 0.512241485271628, "grad_norm": 0.4360242203625824, "learning_rate": 4.359038958129125e-05, "loss": 3.9053, "step": 2240 }, { "epoch": 0.5126988437406206, "grad_norm": 0.46366759172114647, "learning_rate": 4.3578000963830925e-05, "loss": 3.8077, "step": 2242 }, { "epoch": 0.5131562022096131, "grad_norm": 0.4104407096087864, "learning_rate": 4.3565602149554855e-05, "loss": 3.8881, "step": 2244 }, { "epoch": 0.5136135606786056, "grad_norm": 0.518100949501135, "learning_rate": 4.355319314526832e-05, "loss": 3.8746, "step": 2246 }, { "epoch": 0.5140709191475982, "grad_norm": 0.3885862745019246, "learning_rate": 4.35407739577822e-05, "loss": 3.7874, "step": 2248 }, { "epoch": 0.5145282776165907, "grad_norm": 0.46817451755019923, "learning_rate": 4.352834459391294e-05, "loss": 3.9398, "step": 2250 }, { "epoch": 0.5149856360855832, "grad_norm": 0.35234961874868015, "learning_rate": 4.3515905060482586e-05, "loss": 4.0653, "step": 2252 }, { "epoch": 0.5154429945545758, "grad_norm": 0.4581566509799023, "learning_rate": 4.350345536431877e-05, "loss": 3.9559, "step": 2254 }, { "epoch": 0.5159003530235683, "grad_norm": 0.5158351701057694, "learning_rate": 4.349099551225468e-05, "loss": 3.6979, "step": 2256 }, { "epoch": 0.5163577114925608, "grad_norm": 0.45266167202573765, "learning_rate": 4.347852551112911e-05, "loss": 3.9955, "step": 2258 }, { "epoch": 0.5168150699615534, "grad_norm": 0.41108328187925297, "learning_rate": 4.34660453677864e-05, "loss": 3.9645, "step": 2260 }, { "epoch": 0.5172724284305459, "grad_norm": 0.4863797815304017, "learning_rate": 4.345355508907646e-05, "loss": 4.1442, "step": 2262 }, { "epoch": 0.5177297868995383, "grad_norm": 0.4642134648872519, "learning_rate": 4.3441054681854774e-05, "loss": 4.0717, "step": 2264 }, { "epoch": 0.5181871453685308, "grad_norm": 0.43369060156373257, "learning_rate": 4.342854415298239e-05, "loss": 3.8668, "step": 2266 }, { "epoch": 0.5186445038375234, "grad_norm": 0.39384410051613084, "learning_rate": 4.3416023509325873e-05, "loss": 3.9318, "step": 2268 }, { "epoch": 0.5191018623065159, "grad_norm": 0.4490341636567963, "learning_rate": 4.340349275775739e-05, "loss": 3.8576, "step": 2270 }, { "epoch": 0.5195592207755084, "grad_norm": 0.4136676885376808, "learning_rate": 4.339095190515463e-05, "loss": 3.9086, "step": 2272 }, { "epoch": 0.520016579244501, "grad_norm": 0.47391970096243047, "learning_rate": 4.3378400958400824e-05, "loss": 4.0519, "step": 2274 }, { "epoch": 0.5204739377134935, "grad_norm": 0.390660112710215, "learning_rate": 4.3365839924384746e-05, "loss": 4.0399, "step": 2276 }, { "epoch": 0.520931296182486, "grad_norm": 0.34661805585597283, "learning_rate": 4.335326881000071e-05, "loss": 3.9909, "step": 2278 }, { "epoch": 0.5213886546514785, "grad_norm": 0.4914553510093784, "learning_rate": 4.334068762214856e-05, "loss": 4.0579, "step": 2280 }, { "epoch": 0.5218460131204711, "grad_norm": 0.39521797769000055, "learning_rate": 4.332809636773368e-05, "loss": 3.9088, "step": 2282 }, { "epoch": 0.5223033715894636, "grad_norm": 0.3867235472410489, "learning_rate": 4.3315495053666966e-05, "loss": 4.0222, "step": 2284 }, { "epoch": 0.5227607300584561, "grad_norm": 0.4105103553622794, "learning_rate": 4.3302883686864826e-05, "loss": 3.8677, "step": 2286 }, { "epoch": 0.5232180885274487, "grad_norm": 0.3562244933182647, "learning_rate": 4.3290262274249215e-05, "loss": 3.6953, "step": 2288 }, { "epoch": 0.5236754469964412, "grad_norm": 0.41681156221948656, "learning_rate": 4.327763082274759e-05, "loss": 4.0408, "step": 2290 }, { "epoch": 0.5241328054654337, "grad_norm": 0.40939465613895815, "learning_rate": 4.326498933929288e-05, "loss": 3.9302, "step": 2292 }, { "epoch": 0.5245901639344263, "grad_norm": 0.3617504847469918, "learning_rate": 4.325233783082358e-05, "loss": 3.9739, "step": 2294 }, { "epoch": 0.5250475224034188, "grad_norm": 0.3950624760882867, "learning_rate": 4.323967630428367e-05, "loss": 3.8562, "step": 2296 }, { "epoch": 0.5255048808724113, "grad_norm": 0.4827030613391404, "learning_rate": 4.32270047666226e-05, "loss": 4.0707, "step": 2298 }, { "epoch": 0.5259622393414038, "grad_norm": 0.49424439497872696, "learning_rate": 4.321432322479535e-05, "loss": 4.0663, "step": 2300 }, { "epoch": 0.5264195978103964, "grad_norm": 0.44063761131690155, "learning_rate": 4.320163168576236e-05, "loss": 3.807, "step": 2302 }, { "epoch": 0.5268769562793889, "grad_norm": 0.45316082714113914, "learning_rate": 4.318893015648958e-05, "loss": 3.8935, "step": 2304 }, { "epoch": 0.5273343147483813, "grad_norm": 0.3461822164200853, "learning_rate": 4.317621864394844e-05, "loss": 3.9603, "step": 2306 }, { "epoch": 0.5277916732173739, "grad_norm": 0.4147872699572864, "learning_rate": 4.3163497155115835e-05, "loss": 3.8075, "step": 2308 }, { "epoch": 0.5282490316863664, "grad_norm": 0.45573284874291453, "learning_rate": 4.315076569697415e-05, "loss": 3.9377, "step": 2310 }, { "epoch": 0.5287063901553589, "grad_norm": 0.4340303302265985, "learning_rate": 4.3138024276511234e-05, "loss": 4.0123, "step": 2312 }, { "epoch": 0.5291637486243514, "grad_norm": 0.38367553662584286, "learning_rate": 4.3125272900720425e-05, "loss": 3.9909, "step": 2314 }, { "epoch": 0.529621107093344, "grad_norm": 0.407627009450293, "learning_rate": 4.311251157660049e-05, "loss": 3.7645, "step": 2316 }, { "epoch": 0.5300784655623365, "grad_norm": 0.3504165516492917, "learning_rate": 4.3099740311155676e-05, "loss": 3.9268, "step": 2318 }, { "epoch": 0.530535824031329, "grad_norm": 0.4693344922624895, "learning_rate": 4.3086959111395685e-05, "loss": 3.894, "step": 2320 }, { "epoch": 0.5309931825003216, "grad_norm": 0.38608686282762705, "learning_rate": 4.307416798433568e-05, "loss": 4.0157, "step": 2322 }, { "epoch": 0.5314505409693141, "grad_norm": 0.456986253919817, "learning_rate": 4.306136693699625e-05, "loss": 3.8114, "step": 2324 }, { "epoch": 0.5319078994383066, "grad_norm": 0.3732742495417293, "learning_rate": 4.304855597640346e-05, "loss": 3.8628, "step": 2326 }, { "epoch": 0.5323652579072992, "grad_norm": 0.4164792732208229, "learning_rate": 4.3035735109588785e-05, "loss": 3.719, "step": 2328 }, { "epoch": 0.5328226163762917, "grad_norm": 0.6117811626686213, "learning_rate": 4.3022904343589146e-05, "loss": 3.8829, "step": 2330 }, { "epoch": 0.5332799748452842, "grad_norm": 0.44550021715726174, "learning_rate": 4.3010063685446914e-05, "loss": 3.8152, "step": 2332 }, { "epoch": 0.5337373333142768, "grad_norm": 0.4583097816687352, "learning_rate": 4.299721314220988e-05, "loss": 3.9869, "step": 2334 }, { "epoch": 0.5341946917832693, "grad_norm": 0.40126429814565345, "learning_rate": 4.298435272093125e-05, "loss": 3.8737, "step": 2336 }, { "epoch": 0.5346520502522618, "grad_norm": 0.5228839053445065, "learning_rate": 4.2971482428669664e-05, "loss": 3.8305, "step": 2338 }, { "epoch": 0.5351094087212543, "grad_norm": 0.36071954975239456, "learning_rate": 4.2958602272489176e-05, "loss": 3.9695, "step": 2340 }, { "epoch": 0.5355667671902469, "grad_norm": 0.4045866328725946, "learning_rate": 4.294571225945925e-05, "loss": 3.857, "step": 2342 }, { "epoch": 0.5360241256592394, "grad_norm": 0.4117481764062222, "learning_rate": 4.2932812396654765e-05, "loss": 3.9871, "step": 2344 }, { "epoch": 0.5364814841282319, "grad_norm": 0.4210469809128684, "learning_rate": 4.291990269115601e-05, "loss": 3.9357, "step": 2346 }, { "epoch": 0.5369388425972244, "grad_norm": 0.411441569248354, "learning_rate": 4.290698315004866e-05, "loss": 3.9543, "step": 2348 }, { "epoch": 0.5373962010662169, "grad_norm": 0.450641171327911, "learning_rate": 4.289405378042381e-05, "loss": 3.8987, "step": 2350 }, { "epoch": 0.5378535595352094, "grad_norm": 0.38747943854386097, "learning_rate": 4.288111458937795e-05, "loss": 4.0353, "step": 2352 }, { "epoch": 0.5383109180042019, "grad_norm": 0.40281706811106593, "learning_rate": 4.286816558401292e-05, "loss": 3.8722, "step": 2354 }, { "epoch": 0.5387682764731945, "grad_norm": 0.45649490511433705, "learning_rate": 4.285520677143601e-05, "loss": 3.9558, "step": 2356 }, { "epoch": 0.539225634942187, "grad_norm": 0.4155723287435847, "learning_rate": 4.284223815875983e-05, "loss": 3.8198, "step": 2358 }, { "epoch": 0.5396829934111795, "grad_norm": 0.5126389826756109, "learning_rate": 4.2829259753102416e-05, "loss": 3.9191, "step": 2360 }, { "epoch": 0.5401403518801721, "grad_norm": 0.4652202107589849, "learning_rate": 4.281627156158715e-05, "loss": 4.0689, "step": 2362 }, { "epoch": 0.5405977103491646, "grad_norm": 0.41628357044515674, "learning_rate": 4.2803273591342815e-05, "loss": 3.9889, "step": 2364 }, { "epoch": 0.5410550688181571, "grad_norm": 0.4354745608114201, "learning_rate": 4.2790265849503526e-05, "loss": 4.0531, "step": 2366 }, { "epoch": 0.5415124272871497, "grad_norm": 0.468143881886823, "learning_rate": 4.277724834320878e-05, "loss": 3.8868, "step": 2368 }, { "epoch": 0.5419697857561422, "grad_norm": 0.5177947832482748, "learning_rate": 4.276422107960344e-05, "loss": 3.738, "step": 2370 }, { "epoch": 0.5424271442251347, "grad_norm": 0.4020065450064365, "learning_rate": 4.275118406583771e-05, "loss": 3.9065, "step": 2372 }, { "epoch": 0.5428845026941272, "grad_norm": 0.45841363362372983, "learning_rate": 4.273813730906715e-05, "loss": 3.8521, "step": 2374 }, { "epoch": 0.5433418611631198, "grad_norm": 0.5368564912050254, "learning_rate": 4.2725080816452666e-05, "loss": 3.8125, "step": 2376 }, { "epoch": 0.5437992196321123, "grad_norm": 0.437866466172258, "learning_rate": 4.271201459516053e-05, "loss": 3.8947, "step": 2378 }, { "epoch": 0.5442565781011048, "grad_norm": 0.41149092999841747, "learning_rate": 4.269893865236231e-05, "loss": 3.8927, "step": 2380 }, { "epoch": 0.5447139365700974, "grad_norm": 0.36560557876260014, "learning_rate": 4.268585299523494e-05, "loss": 4.0142, "step": 2382 }, { "epoch": 0.5451712950390899, "grad_norm": 0.44096769641779826, "learning_rate": 4.26727576309607e-05, "loss": 3.9137, "step": 2384 }, { "epoch": 0.5456286535080824, "grad_norm": 0.47281350916662135, "learning_rate": 4.2659652566727156e-05, "loss": 3.8614, "step": 2386 }, { "epoch": 0.546086011977075, "grad_norm": 0.565832781695442, "learning_rate": 4.2646537809727224e-05, "loss": 3.9305, "step": 2388 }, { "epoch": 0.5465433704460674, "grad_norm": 0.4512962070910915, "learning_rate": 4.2633413367159136e-05, "loss": 3.8981, "step": 2390 }, { "epoch": 0.5470007289150599, "grad_norm": 0.3790165455535681, "learning_rate": 4.2620279246226445e-05, "loss": 3.8172, "step": 2392 }, { "epoch": 0.5474580873840524, "grad_norm": 0.33502332367766224, "learning_rate": 4.260713545413801e-05, "loss": 3.9043, "step": 2394 }, { "epoch": 0.547915445853045, "grad_norm": 0.42527876084570726, "learning_rate": 4.259398199810801e-05, "loss": 3.9183, "step": 2396 }, { "epoch": 0.5483728043220375, "grad_norm": 0.5219118744848763, "learning_rate": 4.258081888535589e-05, "loss": 3.8163, "step": 2398 }, { "epoch": 0.54883016279103, "grad_norm": 0.49316224957123456, "learning_rate": 4.256764612310644e-05, "loss": 3.7031, "step": 2400 }, { "epoch": 0.5492875212600226, "grad_norm": 0.4910309994763148, "learning_rate": 4.255446371858973e-05, "loss": 4.0239, "step": 2402 }, { "epoch": 0.5497448797290151, "grad_norm": 0.5137347628893262, "learning_rate": 4.254127167904112e-05, "loss": 3.9322, "step": 2404 }, { "epoch": 0.5502022381980076, "grad_norm": 0.4633309567628373, "learning_rate": 4.252807001170126e-05, "loss": 3.8558, "step": 2406 }, { "epoch": 0.5506595966670002, "grad_norm": 0.37260090798878054, "learning_rate": 4.2514858723816074e-05, "loss": 3.9038, "step": 2408 }, { "epoch": 0.5511169551359927, "grad_norm": 0.41279182154160105, "learning_rate": 4.25016378226368e-05, "loss": 3.89, "step": 2410 }, { "epoch": 0.5515743136049852, "grad_norm": 0.3970860768503931, "learning_rate": 4.24884073154199e-05, "loss": 4.1317, "step": 2412 }, { "epoch": 0.5520316720739777, "grad_norm": 0.441757501252436, "learning_rate": 4.247516720942716e-05, "loss": 3.9079, "step": 2414 }, { "epoch": 0.5524890305429703, "grad_norm": 0.4068818314150209, "learning_rate": 4.246191751192559e-05, "loss": 3.9479, "step": 2416 }, { "epoch": 0.5529463890119628, "grad_norm": 0.6707600724355829, "learning_rate": 4.244865823018751e-05, "loss": 3.7154, "step": 2418 }, { "epoch": 0.5534037474809553, "grad_norm": 0.4994716684165115, "learning_rate": 4.2435389371490463e-05, "loss": 3.7612, "step": 2420 }, { "epoch": 0.5538611059499479, "grad_norm": 0.4336063451913701, "learning_rate": 4.2422110943117264e-05, "loss": 3.8955, "step": 2422 }, { "epoch": 0.5543184644189404, "grad_norm": 0.48859726407135706, "learning_rate": 4.240882295235598e-05, "loss": 3.8881, "step": 2424 }, { "epoch": 0.5547758228879329, "grad_norm": 0.4590392059690037, "learning_rate": 4.239552540649991e-05, "loss": 3.8737, "step": 2426 }, { "epoch": 0.5552331813569255, "grad_norm": 0.3818781212337321, "learning_rate": 4.2382218312847646e-05, "loss": 3.8696, "step": 2428 }, { "epoch": 0.555690539825918, "grad_norm": 0.4360554381776462, "learning_rate": 4.236890167870295e-05, "loss": 4.1347, "step": 2430 }, { "epoch": 0.5561478982949104, "grad_norm": 0.439591347891016, "learning_rate": 4.235557551137489e-05, "loss": 3.9601, "step": 2432 }, { "epoch": 0.5566052567639029, "grad_norm": 0.41408463185020333, "learning_rate": 4.234223981817771e-05, "loss": 3.893, "step": 2434 }, { "epoch": 0.5570626152328955, "grad_norm": 0.435906146572986, "learning_rate": 4.232889460643091e-05, "loss": 3.7972, "step": 2436 }, { "epoch": 0.557519973701888, "grad_norm": 0.45835650089260743, "learning_rate": 4.231553988345922e-05, "loss": 3.9396, "step": 2438 }, { "epoch": 0.5579773321708805, "grad_norm": 0.4174756858507089, "learning_rate": 4.230217565659258e-05, "loss": 3.9256, "step": 2440 }, { "epoch": 0.558434690639873, "grad_norm": 0.3666889377173741, "learning_rate": 4.228880193316613e-05, "loss": 3.894, "step": 2442 }, { "epoch": 0.5588920491088656, "grad_norm": 0.4046752191396049, "learning_rate": 4.227541872052026e-05, "loss": 3.8682, "step": 2444 }, { "epoch": 0.5593494075778581, "grad_norm": 0.36617016374575967, "learning_rate": 4.2262026026000544e-05, "loss": 4.0709, "step": 2446 }, { "epoch": 0.5598067660468506, "grad_norm": 0.43252376469055975, "learning_rate": 4.224862385695776e-05, "loss": 3.9231, "step": 2448 }, { "epoch": 0.5602641245158432, "grad_norm": 0.44017322457447783, "learning_rate": 4.223521222074789e-05, "loss": 3.8878, "step": 2450 }, { "epoch": 0.5607214829848357, "grad_norm": 0.3772819349942624, "learning_rate": 4.2221791124732105e-05, "loss": 3.9183, "step": 2452 }, { "epoch": 0.5611788414538282, "grad_norm": 0.47268199062402627, "learning_rate": 4.22083605762768e-05, "loss": 3.801, "step": 2454 }, { "epoch": 0.5616361999228208, "grad_norm": 0.5005878926215539, "learning_rate": 4.2194920582753504e-05, "loss": 3.8063, "step": 2456 }, { "epoch": 0.5620935583918133, "grad_norm": 0.4312074654100118, "learning_rate": 4.2181471151538975e-05, "loss": 3.8209, "step": 2458 }, { "epoch": 0.5625509168608058, "grad_norm": 0.37853523652377163, "learning_rate": 4.2168012290015136e-05, "loss": 3.8513, "step": 2460 }, { "epoch": 0.5630082753297984, "grad_norm": 0.37231759492142624, "learning_rate": 4.215454400556909e-05, "loss": 3.9851, "step": 2462 }, { "epoch": 0.5634656337987909, "grad_norm": 0.5893617896726332, "learning_rate": 4.2141066305593105e-05, "loss": 3.9955, "step": 2464 }, { "epoch": 0.5639229922677834, "grad_norm": 0.47295645960909183, "learning_rate": 4.212757919748461e-05, "loss": 3.8529, "step": 2466 }, { "epoch": 0.564380350736776, "grad_norm": 0.7288054843280639, "learning_rate": 4.211408268864622e-05, "loss": 3.7305, "step": 2468 }, { "epoch": 0.5648377092057685, "grad_norm": 0.4936500949223552, "learning_rate": 4.210057678648569e-05, "loss": 4.0138, "step": 2470 }, { "epoch": 0.565295067674761, "grad_norm": 0.5990338374660151, "learning_rate": 4.208706149841594e-05, "loss": 3.907, "step": 2472 }, { "epoch": 0.5657524261437534, "grad_norm": 0.42803282754889627, "learning_rate": 4.207353683185503e-05, "loss": 3.9811, "step": 2474 }, { "epoch": 0.566209784612746, "grad_norm": 0.574275630287614, "learning_rate": 4.20600027942262e-05, "loss": 3.8066, "step": 2476 }, { "epoch": 0.5666671430817385, "grad_norm": 0.4557620627020042, "learning_rate": 4.2046459392957794e-05, "loss": 3.8475, "step": 2478 }, { "epoch": 0.567124501550731, "grad_norm": 0.331498537752255, "learning_rate": 4.2032906635483316e-05, "loss": 4.055, "step": 2480 }, { "epoch": 0.5675818600197235, "grad_norm": 0.4128815165429017, "learning_rate": 4.20193445292414e-05, "loss": 3.9735, "step": 2482 }, { "epoch": 0.5680392184887161, "grad_norm": 0.3580255673631693, "learning_rate": 4.2005773081675806e-05, "loss": 3.881, "step": 2484 }, { "epoch": 0.5684965769577086, "grad_norm": 0.3567219118236316, "learning_rate": 4.199219230023544e-05, "loss": 3.9022, "step": 2486 }, { "epoch": 0.5689539354267011, "grad_norm": 0.504740504974268, "learning_rate": 4.197860219237431e-05, "loss": 3.8798, "step": 2488 }, { "epoch": 0.5694112938956937, "grad_norm": 0.45386920850037726, "learning_rate": 4.196500276555155e-05, "loss": 3.9732, "step": 2490 }, { "epoch": 0.5698686523646862, "grad_norm": 0.4252540929744231, "learning_rate": 4.195139402723143e-05, "loss": 3.8995, "step": 2492 }, { "epoch": 0.5703260108336787, "grad_norm": 0.5365869096414737, "learning_rate": 4.193777598488328e-05, "loss": 3.7591, "step": 2494 }, { "epoch": 0.5707833693026713, "grad_norm": 0.3982094878971331, "learning_rate": 4.1924148645981585e-05, "loss": 3.9117, "step": 2496 }, { "epoch": 0.5712407277716638, "grad_norm": 0.384335252986298, "learning_rate": 4.191051201800591e-05, "loss": 4.0497, "step": 2498 }, { "epoch": 0.5716980862406563, "grad_norm": 0.38501828026412294, "learning_rate": 4.1896866108440935e-05, "loss": 3.7935, "step": 2500 }, { "epoch": 0.5721554447096489, "grad_norm": 0.4439269310854397, "learning_rate": 4.1883210924776416e-05, "loss": 3.7576, "step": 2502 }, { "epoch": 0.5726128031786414, "grad_norm": 0.5079421244753765, "learning_rate": 4.18695464745072e-05, "loss": 4.0842, "step": 2504 }, { "epoch": 0.5730701616476339, "grad_norm": 0.3963891464651516, "learning_rate": 4.1855872765133234e-05, "loss": 3.8732, "step": 2506 }, { "epoch": 0.5735275201166264, "grad_norm": 0.41626157340722736, "learning_rate": 4.1842189804159535e-05, "loss": 3.6758, "step": 2508 }, { "epoch": 0.573984878585619, "grad_norm": 0.3480008378498361, "learning_rate": 4.1828497599096205e-05, "loss": 3.8602, "step": 2510 }, { "epoch": 0.5744422370546115, "grad_norm": 0.3479624589129522, "learning_rate": 4.181479615745841e-05, "loss": 3.7341, "step": 2512 }, { "epoch": 0.574899595523604, "grad_norm": 0.45300399874718067, "learning_rate": 4.180108548676641e-05, "loss": 3.9569, "step": 2514 }, { "epoch": 0.5753569539925966, "grad_norm": 0.4121109117235215, "learning_rate": 4.17873655945455e-05, "loss": 3.7541, "step": 2516 }, { "epoch": 0.575814312461589, "grad_norm": 0.5329072291940614, "learning_rate": 4.177363648832605e-05, "loss": 3.8017, "step": 2518 }, { "epoch": 0.5762716709305815, "grad_norm": 0.40329178900916685, "learning_rate": 4.175989817564349e-05, "loss": 3.8828, "step": 2520 }, { "epoch": 0.576729029399574, "grad_norm": 0.4168053773622359, "learning_rate": 4.1746150664038296e-05, "loss": 3.8207, "step": 2522 }, { "epoch": 0.5771863878685666, "grad_norm": 0.4275025736859577, "learning_rate": 4.1732393961056004e-05, "loss": 3.8071, "step": 2524 }, { "epoch": 0.5776437463375591, "grad_norm": 0.3900061855681495, "learning_rate": 4.171862807424718e-05, "loss": 3.843, "step": 2526 }, { "epoch": 0.5781011048065516, "grad_norm": 0.42432880273759155, "learning_rate": 4.1704853011167444e-05, "loss": 3.9742, "step": 2528 }, { "epoch": 0.5785584632755442, "grad_norm": 0.40350730939127405, "learning_rate": 4.169106877937744e-05, "loss": 3.7644, "step": 2530 }, { "epoch": 0.5790158217445367, "grad_norm": 0.35247363595580217, "learning_rate": 4.167727538644286e-05, "loss": 3.8848, "step": 2532 }, { "epoch": 0.5794731802135292, "grad_norm": 0.43805648970049105, "learning_rate": 4.1663472839934406e-05, "loss": 3.9792, "step": 2534 }, { "epoch": 0.5799305386825218, "grad_norm": 0.4010442655239697, "learning_rate": 4.164966114742782e-05, "loss": 4.0562, "step": 2536 }, { "epoch": 0.5803878971515143, "grad_norm": 0.4268903024703729, "learning_rate": 4.163584031650386e-05, "loss": 3.7774, "step": 2538 }, { "epoch": 0.5808452556205068, "grad_norm": 0.457632940316729, "learning_rate": 4.162201035474829e-05, "loss": 3.9032, "step": 2540 }, { "epoch": 0.5813026140894993, "grad_norm": 0.5082648390082802, "learning_rate": 4.1608171269751906e-05, "loss": 3.7605, "step": 2542 }, { "epoch": 0.5817599725584919, "grad_norm": 0.4764189894922461, "learning_rate": 4.1594323069110474e-05, "loss": 3.7713, "step": 2544 }, { "epoch": 0.5822173310274844, "grad_norm": 0.40776774354495365, "learning_rate": 4.1580465760424815e-05, "loss": 3.8374, "step": 2546 }, { "epoch": 0.5826746894964769, "grad_norm": 0.44085255218910163, "learning_rate": 4.156659935130071e-05, "loss": 3.8284, "step": 2548 }, { "epoch": 0.5831320479654695, "grad_norm": 0.42080349304926024, "learning_rate": 4.155272384934893e-05, "loss": 3.8275, "step": 2550 }, { "epoch": 0.583589406434462, "grad_norm": 0.49420889281079955, "learning_rate": 4.153883926218528e-05, "loss": 3.9353, "step": 2552 }, { "epoch": 0.5840467649034545, "grad_norm": 0.3894646576883193, "learning_rate": 4.1524945597430514e-05, "loss": 3.8663, "step": 2554 }, { "epoch": 0.5845041233724471, "grad_norm": 0.6018572989105916, "learning_rate": 4.151104286271038e-05, "loss": 4.1529, "step": 2556 }, { "epoch": 0.5849614818414396, "grad_norm": 0.452070456056268, "learning_rate": 4.1497131065655595e-05, "loss": 4.1034, "step": 2558 }, { "epoch": 0.585418840310432, "grad_norm": 0.5021183320928201, "learning_rate": 4.148321021390187e-05, "loss": 3.8062, "step": 2560 }, { "epoch": 0.5858761987794245, "grad_norm": 0.5048248729996009, "learning_rate": 4.146928031508988e-05, "loss": 3.8898, "step": 2562 }, { "epoch": 0.5863335572484171, "grad_norm": 0.4262234202960524, "learning_rate": 4.1455341376865234e-05, "loss": 3.9831, "step": 2564 }, { "epoch": 0.5867909157174096, "grad_norm": 0.45587746160170517, "learning_rate": 4.144139340687856e-05, "loss": 3.8219, "step": 2566 }, { "epoch": 0.5872482741864021, "grad_norm": 0.49801526436813465, "learning_rate": 4.142743641278538e-05, "loss": 3.848, "step": 2568 }, { "epoch": 0.5877056326553947, "grad_norm": 0.3757215948199426, "learning_rate": 4.1413470402246227e-05, "loss": 3.9332, "step": 2570 }, { "epoch": 0.5881629911243872, "grad_norm": 0.44917789771040834, "learning_rate": 4.139949538292655e-05, "loss": 3.7466, "step": 2572 }, { "epoch": 0.5886203495933797, "grad_norm": 0.4305322666005306, "learning_rate": 4.138551136249673e-05, "loss": 3.9151, "step": 2574 }, { "epoch": 0.5890777080623723, "grad_norm": 0.4241324356252531, "learning_rate": 4.137151834863213e-05, "loss": 3.7537, "step": 2576 }, { "epoch": 0.5895350665313648, "grad_norm": 0.378215632335273, "learning_rate": 4.1357516349013016e-05, "loss": 4.086, "step": 2578 }, { "epoch": 0.5899924250003573, "grad_norm": 0.44499632000740974, "learning_rate": 4.1343505371324596e-05, "loss": 3.9008, "step": 2580 }, { "epoch": 0.5904497834693498, "grad_norm": 0.4801336996566025, "learning_rate": 4.132948542325702e-05, "loss": 3.939, "step": 2582 }, { "epoch": 0.5909071419383424, "grad_norm": 0.6309509919430609, "learning_rate": 4.131545651250532e-05, "loss": 3.8921, "step": 2584 }, { "epoch": 0.5913645004073349, "grad_norm": 0.39980368043356196, "learning_rate": 4.130141864676951e-05, "loss": 3.8445, "step": 2586 }, { "epoch": 0.5918218588763274, "grad_norm": 0.34669489222750643, "learning_rate": 4.128737183375445e-05, "loss": 4.1341, "step": 2588 }, { "epoch": 0.59227921734532, "grad_norm": 0.4503066117737585, "learning_rate": 4.127331608116997e-05, "loss": 3.8294, "step": 2590 }, { "epoch": 0.5927365758143125, "grad_norm": 0.3467469485945542, "learning_rate": 4.125925139673077e-05, "loss": 3.7049, "step": 2592 }, { "epoch": 0.593193934283305, "grad_norm": 0.3534894164260029, "learning_rate": 4.1245177788156466e-05, "loss": 3.7662, "step": 2594 }, { "epoch": 0.5936512927522976, "grad_norm": 0.406774787134679, "learning_rate": 4.123109526317157e-05, "loss": 3.6905, "step": 2596 }, { "epoch": 0.5941086512212901, "grad_norm": 0.4607048579056137, "learning_rate": 4.121700382950549e-05, "loss": 3.9784, "step": 2598 }, { "epoch": 0.5945660096902826, "grad_norm": 0.4372649753627753, "learning_rate": 4.120290349489252e-05, "loss": 3.7584, "step": 2600 }, { "epoch": 0.595023368159275, "grad_norm": 0.4035648962111905, "learning_rate": 4.118879426707184e-05, "loss": 3.7303, "step": 2602 }, { "epoch": 0.5954807266282676, "grad_norm": 0.4056023239885993, "learning_rate": 4.117467615378752e-05, "loss": 3.8144, "step": 2604 }, { "epoch": 0.5959380850972601, "grad_norm": 0.3830262968980694, "learning_rate": 4.116054916278848e-05, "loss": 3.8619, "step": 2606 }, { "epoch": 0.5963954435662526, "grad_norm": 0.3892331312195756, "learning_rate": 4.1146413301828554e-05, "loss": 3.8199, "step": 2608 }, { "epoch": 0.5968528020352452, "grad_norm": 0.35771767305072233, "learning_rate": 4.113226857866641e-05, "loss": 3.8327, "step": 2610 }, { "epoch": 0.5973101605042377, "grad_norm": 0.37513644994353795, "learning_rate": 4.111811500106559e-05, "loss": 3.8246, "step": 2612 }, { "epoch": 0.5977675189732302, "grad_norm": 0.41886280376627427, "learning_rate": 4.110395257679451e-05, "loss": 3.863, "step": 2614 }, { "epoch": 0.5982248774422227, "grad_norm": 0.4623722441241289, "learning_rate": 4.108978131362643e-05, "loss": 3.9582, "step": 2616 }, { "epoch": 0.5986822359112153, "grad_norm": 0.386323669222756, "learning_rate": 4.1075601219339446e-05, "loss": 3.5948, "step": 2618 }, { "epoch": 0.5991395943802078, "grad_norm": 0.3504956057323402, "learning_rate": 4.106141230171652e-05, "loss": 3.6537, "step": 2620 }, { "epoch": 0.5995969528492003, "grad_norm": 0.7715621329608352, "learning_rate": 4.104721456854547e-05, "loss": 3.7421, "step": 2622 }, { "epoch": 0.6000543113181929, "grad_norm": 0.3825077478708685, "learning_rate": 4.103300802761892e-05, "loss": 3.7996, "step": 2624 }, { "epoch": 0.6005116697871854, "grad_norm": 0.4389497374586291, "learning_rate": 4.101879268673434e-05, "loss": 3.8876, "step": 2626 }, { "epoch": 0.6009690282561779, "grad_norm": 0.368702623589009, "learning_rate": 4.1004568553694056e-05, "loss": 3.8646, "step": 2628 }, { "epoch": 0.6014263867251705, "grad_norm": 0.4443596969791653, "learning_rate": 4.099033563630518e-05, "loss": 3.647, "step": 2630 }, { "epoch": 0.601883745194163, "grad_norm": 0.3647565112483842, "learning_rate": 4.097609394237966e-05, "loss": 3.9552, "step": 2632 }, { "epoch": 0.6023411036631555, "grad_norm": 0.3700675830462978, "learning_rate": 4.0961843479734276e-05, "loss": 3.8772, "step": 2634 }, { "epoch": 0.602798462132148, "grad_norm": 0.5370695342112992, "learning_rate": 4.094758425619061e-05, "loss": 3.8884, "step": 2636 }, { "epoch": 0.6032558206011406, "grad_norm": 0.38342452269016597, "learning_rate": 4.093331627957504e-05, "loss": 3.996, "step": 2638 }, { "epoch": 0.6037131790701331, "grad_norm": 0.3410261783955063, "learning_rate": 4.091903955771877e-05, "loss": 3.8963, "step": 2640 }, { "epoch": 0.6041705375391256, "grad_norm": 0.3746217966257781, "learning_rate": 4.090475409845778e-05, "loss": 3.7786, "step": 2642 }, { "epoch": 0.6046278960081181, "grad_norm": 0.36307534155690036, "learning_rate": 4.0890459909632875e-05, "loss": 3.8682, "step": 2644 }, { "epoch": 0.6050852544771106, "grad_norm": 0.46661333442103853, "learning_rate": 4.087615699908963e-05, "loss": 3.8037, "step": 2646 }, { "epoch": 0.6055426129461031, "grad_norm": 0.3540826118056686, "learning_rate": 4.086184537467842e-05, "loss": 4.0169, "step": 2648 }, { "epoch": 0.6059999714150957, "grad_norm": 0.3455867894868125, "learning_rate": 4.084752504425439e-05, "loss": 3.8456, "step": 2650 }, { "epoch": 0.6064573298840882, "grad_norm": 0.4297543040276966, "learning_rate": 4.083319601567746e-05, "loss": 3.7385, "step": 2652 }, { "epoch": 0.6069146883530807, "grad_norm": 0.5144187226375146, "learning_rate": 4.081885829681234e-05, "loss": 3.8902, "step": 2654 }, { "epoch": 0.6073720468220732, "grad_norm": 0.31228291600856684, "learning_rate": 4.0804511895528506e-05, "loss": 3.9657, "step": 2656 }, { "epoch": 0.6078294052910658, "grad_norm": 0.39702561741916564, "learning_rate": 4.07901568197002e-05, "loss": 3.788, "step": 2658 }, { "epoch": 0.6082867637600583, "grad_norm": 0.47523499686485643, "learning_rate": 4.0775793077206414e-05, "loss": 4.0014, "step": 2660 }, { "epoch": 0.6087441222290508, "grad_norm": 0.3545768573623851, "learning_rate": 4.07614206759309e-05, "loss": 3.7999, "step": 2662 }, { "epoch": 0.6092014806980434, "grad_norm": 0.4489394937030464, "learning_rate": 4.074703962376219e-05, "loss": 3.8128, "step": 2664 }, { "epoch": 0.6096588391670359, "grad_norm": 0.506070391265186, "learning_rate": 4.073264992859352e-05, "loss": 3.9439, "step": 2666 }, { "epoch": 0.6101161976360284, "grad_norm": 0.40546374161701254, "learning_rate": 4.071825159832289e-05, "loss": 3.8147, "step": 2668 }, { "epoch": 0.610573556105021, "grad_norm": 0.46764886587057014, "learning_rate": 4.070384464085305e-05, "loss": 3.7728, "step": 2670 }, { "epoch": 0.6110309145740135, "grad_norm": 0.3354532721883977, "learning_rate": 4.0689429064091476e-05, "loss": 3.8166, "step": 2672 }, { "epoch": 0.611488273043006, "grad_norm": 0.3469719850000598, "learning_rate": 4.0675004875950364e-05, "loss": 3.9825, "step": 2674 }, { "epoch": 0.6119456315119985, "grad_norm": 0.3391527736924668, "learning_rate": 4.066057208434666e-05, "loss": 3.7793, "step": 2676 }, { "epoch": 0.6124029899809911, "grad_norm": 0.38340699491895647, "learning_rate": 4.064613069720201e-05, "loss": 3.8274, "step": 2678 }, { "epoch": 0.6128603484499836, "grad_norm": 0.43496302315974616, "learning_rate": 4.0631680722442794e-05, "loss": 3.9237, "step": 2680 }, { "epoch": 0.6133177069189761, "grad_norm": 0.40729119682122306, "learning_rate": 4.061722216800009e-05, "loss": 3.6955, "step": 2682 }, { "epoch": 0.6137750653879687, "grad_norm": 0.4296125173167651, "learning_rate": 4.0602755041809696e-05, "loss": 3.7663, "step": 2684 }, { "epoch": 0.6142324238569611, "grad_norm": 0.46931512463443187, "learning_rate": 4.058827935181212e-05, "loss": 3.7626, "step": 2686 }, { "epoch": 0.6146897823259536, "grad_norm": 0.4049123235271883, "learning_rate": 4.0573795105952553e-05, "loss": 3.944, "step": 2688 }, { "epoch": 0.6151471407949461, "grad_norm": 0.41717953906518024, "learning_rate": 4.05593023121809e-05, "loss": 3.7606, "step": 2690 }, { "epoch": 0.6156044992639387, "grad_norm": 0.341636118550547, "learning_rate": 4.0544800978451734e-05, "loss": 3.7722, "step": 2692 }, { "epoch": 0.6160618577329312, "grad_norm": 0.4239120400325462, "learning_rate": 4.0530291112724353e-05, "loss": 4.003, "step": 2694 }, { "epoch": 0.6165192162019237, "grad_norm": 0.3926164409101662, "learning_rate": 4.05157727229627e-05, "loss": 3.7939, "step": 2696 }, { "epoch": 0.6169765746709163, "grad_norm": 0.4345805306251296, "learning_rate": 4.05012458171354e-05, "loss": 3.8623, "step": 2698 }, { "epoch": 0.6174339331399088, "grad_norm": 0.4867769338202521, "learning_rate": 4.04867104032158e-05, "loss": 3.893, "step": 2700 }, { "epoch": 0.6178912916089013, "grad_norm": 0.4769597508425607, "learning_rate": 4.047216648918185e-05, "loss": 3.6479, "step": 2702 }, { "epoch": 0.6183486500778939, "grad_norm": 0.3988825052010137, "learning_rate": 4.045761408301621e-05, "loss": 3.8204, "step": 2704 }, { "epoch": 0.6188060085468864, "grad_norm": 0.43078725700631687, "learning_rate": 4.044305319270618e-05, "loss": 3.9355, "step": 2706 }, { "epoch": 0.6192633670158789, "grad_norm": 0.6051912964104249, "learning_rate": 4.042848382624374e-05, "loss": 3.8699, "step": 2708 }, { "epoch": 0.6197207254848714, "grad_norm": 0.5969659146877233, "learning_rate": 4.041390599162548e-05, "loss": 3.9309, "step": 2710 }, { "epoch": 0.620178083953864, "grad_norm": 0.3268384347197687, "learning_rate": 4.039931969685268e-05, "loss": 3.7218, "step": 2712 }, { "epoch": 0.6206354424228565, "grad_norm": 0.5342547766650901, "learning_rate": 4.0384724949931254e-05, "loss": 3.7562, "step": 2714 }, { "epoch": 0.621092800891849, "grad_norm": 0.5121741270655108, "learning_rate": 4.037012175887174e-05, "loss": 3.7754, "step": 2716 }, { "epoch": 0.6215501593608416, "grad_norm": 0.49652358829921256, "learning_rate": 4.0355510131689314e-05, "loss": 3.7615, "step": 2718 }, { "epoch": 0.6220075178298341, "grad_norm": 0.35934931207152854, "learning_rate": 4.03408900764038e-05, "loss": 3.8644, "step": 2720 }, { "epoch": 0.6224648762988266, "grad_norm": 0.5084473676140149, "learning_rate": 4.0326261601039625e-05, "loss": 3.8377, "step": 2722 }, { "epoch": 0.6229222347678192, "grad_norm": 0.392288214833285, "learning_rate": 4.031162471362585e-05, "loss": 3.8296, "step": 2724 }, { "epoch": 0.6233795932368117, "grad_norm": 0.5180112933470613, "learning_rate": 4.0296979422196166e-05, "loss": 3.777, "step": 2726 }, { "epoch": 0.6238369517058041, "grad_norm": 0.48003116312790445, "learning_rate": 4.028232573478884e-05, "loss": 3.8656, "step": 2728 }, { "epoch": 0.6242943101747966, "grad_norm": 0.4404747341489712, "learning_rate": 4.026766365944678e-05, "loss": 3.8951, "step": 2730 }, { "epoch": 0.6247516686437892, "grad_norm": 0.4265990147315021, "learning_rate": 4.025299320421749e-05, "loss": 3.7409, "step": 2732 }, { "epoch": 0.6252090271127817, "grad_norm": 0.41751682056304223, "learning_rate": 4.0238314377153064e-05, "loss": 3.7073, "step": 2734 }, { "epoch": 0.6256663855817742, "grad_norm": 0.36650775324985024, "learning_rate": 4.02236271863102e-05, "loss": 3.927, "step": 2736 }, { "epoch": 0.6261237440507668, "grad_norm": 0.5238299251016272, "learning_rate": 4.020893163975018e-05, "loss": 4.0157, "step": 2738 }, { "epoch": 0.6265811025197593, "grad_norm": 0.45015575706828337, "learning_rate": 4.0194227745538894e-05, "loss": 3.7434, "step": 2740 }, { "epoch": 0.6270384609887518, "grad_norm": 0.3898310221580035, "learning_rate": 4.017951551174677e-05, "loss": 3.7589, "step": 2742 }, { "epoch": 0.6274958194577444, "grad_norm": 0.43627735003019386, "learning_rate": 4.016479494644885e-05, "loss": 3.9672, "step": 2744 }, { "epoch": 0.6279531779267369, "grad_norm": 0.449239083534799, "learning_rate": 4.015006605772474e-05, "loss": 3.6762, "step": 2746 }, { "epoch": 0.6284105363957294, "grad_norm": 0.42629770576903087, "learning_rate": 4.0135328853658613e-05, "loss": 3.756, "step": 2748 }, { "epoch": 0.6288678948647219, "grad_norm": 0.43809667838573885, "learning_rate": 4.0120583342339204e-05, "loss": 3.8412, "step": 2750 }, { "epoch": 0.6293252533337145, "grad_norm": 0.3960946145132889, "learning_rate": 4.0105829531859805e-05, "loss": 3.851, "step": 2752 }, { "epoch": 0.629782611802707, "grad_norm": 0.40323192745608116, "learning_rate": 4.009106743031828e-05, "loss": 3.7235, "step": 2754 }, { "epoch": 0.6302399702716995, "grad_norm": 0.39140025455961686, "learning_rate": 4.0076297045817015e-05, "loss": 3.7939, "step": 2756 }, { "epoch": 0.6306973287406921, "grad_norm": 0.3456209052638334, "learning_rate": 4.006151838646297e-05, "loss": 3.7559, "step": 2758 }, { "epoch": 0.6311546872096846, "grad_norm": 0.4530808657495433, "learning_rate": 4.004673146036763e-05, "loss": 3.9235, "step": 2760 }, { "epoch": 0.6316120456786771, "grad_norm": 0.39500830861902053, "learning_rate": 4.0031936275647015e-05, "loss": 4.0715, "step": 2762 }, { "epoch": 0.6320694041476697, "grad_norm": 0.4534051949621997, "learning_rate": 4.00171328404217e-05, "loss": 3.8418, "step": 2764 }, { "epoch": 0.6325267626166622, "grad_norm": 0.389429322398243, "learning_rate": 4.000232116281676e-05, "loss": 3.914, "step": 2766 }, { "epoch": 0.6329841210856547, "grad_norm": 0.40287482756807175, "learning_rate": 3.9987501250961814e-05, "loss": 3.9472, "step": 2768 }, { "epoch": 0.6334414795546471, "grad_norm": 0.6181702816907975, "learning_rate": 3.9972673112991e-05, "loss": 3.9595, "step": 2770 }, { "epoch": 0.6338988380236397, "grad_norm": 0.40908233285593665, "learning_rate": 3.9957836757042944e-05, "loss": 3.7749, "step": 2772 }, { "epoch": 0.6343561964926322, "grad_norm": 0.42349143652494253, "learning_rate": 3.994299219126083e-05, "loss": 3.7392, "step": 2774 }, { "epoch": 0.6348135549616247, "grad_norm": 0.44516569361204866, "learning_rate": 3.99281394237923e-05, "loss": 3.8779, "step": 2776 }, { "epoch": 0.6352709134306173, "grad_norm": 0.4012501121235628, "learning_rate": 3.991327846278954e-05, "loss": 3.9607, "step": 2778 }, { "epoch": 0.6357282718996098, "grad_norm": 0.35758966145190396, "learning_rate": 3.989840931640919e-05, "loss": 3.8332, "step": 2780 }, { "epoch": 0.6361856303686023, "grad_norm": 0.47077931780692406, "learning_rate": 3.988353199281242e-05, "loss": 3.8842, "step": 2782 }, { "epoch": 0.6366429888375948, "grad_norm": 0.36858188385275586, "learning_rate": 3.986864650016487e-05, "loss": 3.9244, "step": 2784 }, { "epoch": 0.6371003473065874, "grad_norm": 0.49285235232341096, "learning_rate": 3.985375284663666e-05, "loss": 3.9362, "step": 2786 }, { "epoch": 0.6375577057755799, "grad_norm": 0.4581144001451367, "learning_rate": 3.983885104040241e-05, "loss": 3.7878, "step": 2788 }, { "epoch": 0.6380150642445724, "grad_norm": 0.4277037518134039, "learning_rate": 3.982394108964119e-05, "loss": 4.0028, "step": 2790 }, { "epoch": 0.638472422713565, "grad_norm": 0.421057297185095, "learning_rate": 3.980902300253655e-05, "loss": 3.8827, "step": 2792 }, { "epoch": 0.6389297811825575, "grad_norm": 0.42476925756975525, "learning_rate": 3.9794096787276516e-05, "loss": 3.8837, "step": 2794 }, { "epoch": 0.63938713965155, "grad_norm": 0.3877460052633709, "learning_rate": 3.977916245205355e-05, "loss": 3.7618, "step": 2796 }, { "epoch": 0.6398444981205426, "grad_norm": 0.36125483413946546, "learning_rate": 3.976422000506461e-05, "loss": 3.8692, "step": 2798 }, { "epoch": 0.6403018565895351, "grad_norm": 0.5116901826528434, "learning_rate": 3.974926945451106e-05, "loss": 3.8682, "step": 2800 }, { "epoch": 0.6407592150585276, "grad_norm": 0.4570394988008439, "learning_rate": 3.9734310808598764e-05, "loss": 3.7994, "step": 2802 }, { "epoch": 0.6412165735275202, "grad_norm": 0.39245664320645024, "learning_rate": 3.971934407553797e-05, "loss": 4.0547, "step": 2804 }, { "epoch": 0.6416739319965127, "grad_norm": 0.35818192181081815, "learning_rate": 3.9704369263543406e-05, "loss": 3.8524, "step": 2806 }, { "epoch": 0.6421312904655052, "grad_norm": 0.4018855958493937, "learning_rate": 3.968938638083423e-05, "loss": 3.6524, "step": 2808 }, { "epoch": 0.6425886489344977, "grad_norm": 0.5131935532435535, "learning_rate": 3.9674395435634005e-05, "loss": 3.7606, "step": 2810 }, { "epoch": 0.6430460074034903, "grad_norm": 0.4287993181407368, "learning_rate": 3.965939643617076e-05, "loss": 4.1291, "step": 2812 }, { "epoch": 0.6435033658724827, "grad_norm": 0.4539469794221516, "learning_rate": 3.9644389390676904e-05, "loss": 3.7412, "step": 2814 }, { "epoch": 0.6439607243414752, "grad_norm": 0.43625207441432456, "learning_rate": 3.962937430738929e-05, "loss": 3.9417, "step": 2816 }, { "epoch": 0.6444180828104678, "grad_norm": 0.44424962473516977, "learning_rate": 3.9614351194549174e-05, "loss": 3.8441, "step": 2818 }, { "epoch": 0.6448754412794603, "grad_norm": 0.5480029327112231, "learning_rate": 3.959932006040222e-05, "loss": 3.7712, "step": 2820 }, { "epoch": 0.6453327997484528, "grad_norm": 0.4010905591052417, "learning_rate": 3.958428091319847e-05, "loss": 3.8591, "step": 2822 }, { "epoch": 0.6457901582174453, "grad_norm": 0.3897669141193358, "learning_rate": 3.956923376119241e-05, "loss": 3.8183, "step": 2824 }, { "epoch": 0.6462475166864379, "grad_norm": 0.3704745497424541, "learning_rate": 3.9554178612642886e-05, "loss": 3.6278, "step": 2826 }, { "epoch": 0.6467048751554304, "grad_norm": 0.46124173485191877, "learning_rate": 3.953911547581315e-05, "loss": 3.7865, "step": 2828 }, { "epoch": 0.6471622336244229, "grad_norm": 0.3554326971865547, "learning_rate": 3.9524044358970825e-05, "loss": 3.7826, "step": 2830 }, { "epoch": 0.6476195920934155, "grad_norm": 0.5176700150593785, "learning_rate": 3.950896527038792e-05, "loss": 3.753, "step": 2832 }, { "epoch": 0.648076950562408, "grad_norm": 0.5103911730828897, "learning_rate": 3.949387821834083e-05, "loss": 3.842, "step": 2834 }, { "epoch": 0.6485343090314005, "grad_norm": 0.4756136343722125, "learning_rate": 3.94787832111103e-05, "loss": 3.7352, "step": 2836 }, { "epoch": 0.648991667500393, "grad_norm": 0.6219595147216614, "learning_rate": 3.946368025698146e-05, "loss": 3.8624, "step": 2838 }, { "epoch": 0.6494490259693856, "grad_norm": 0.5293299252935251, "learning_rate": 3.944856936424379e-05, "loss": 4.1392, "step": 2840 }, { "epoch": 0.6499063844383781, "grad_norm": 0.5203381918672801, "learning_rate": 3.943345054119112e-05, "loss": 4.053, "step": 2842 }, { "epoch": 0.6503637429073706, "grad_norm": 0.40345929870692204, "learning_rate": 3.941832379612166e-05, "loss": 3.9161, "step": 2844 }, { "epoch": 0.6508211013763632, "grad_norm": 0.398166751498829, "learning_rate": 3.940318913733795e-05, "loss": 3.7775, "step": 2846 }, { "epoch": 0.6512784598453557, "grad_norm": 0.4967971680819227, "learning_rate": 3.938804657314687e-05, "loss": 3.7708, "step": 2848 }, { "epoch": 0.6517358183143482, "grad_norm": 0.3778442956625349, "learning_rate": 3.9372896111859645e-05, "loss": 3.8975, "step": 2850 }, { "epoch": 0.6521931767833408, "grad_norm": 0.5451524034945537, "learning_rate": 3.9357737761791846e-05, "loss": 3.7463, "step": 2852 }, { "epoch": 0.6526505352523333, "grad_norm": 0.4216696451324443, "learning_rate": 3.9342571531263346e-05, "loss": 3.9378, "step": 2854 }, { "epoch": 0.6531078937213257, "grad_norm": 0.4512341609087668, "learning_rate": 3.932739742859836e-05, "loss": 3.8488, "step": 2856 }, { "epoch": 0.6535652521903182, "grad_norm": 0.45219645234942996, "learning_rate": 3.931221546212542e-05, "loss": 3.7417, "step": 2858 }, { "epoch": 0.6540226106593108, "grad_norm": 0.43180554031503465, "learning_rate": 3.929702564017741e-05, "loss": 3.8034, "step": 2860 }, { "epoch": 0.6544799691283033, "grad_norm": 0.42673317774850905, "learning_rate": 3.928182797109145e-05, "loss": 3.9858, "step": 2862 }, { "epoch": 0.6549373275972958, "grad_norm": 0.4099999084414403, "learning_rate": 3.926662246320902e-05, "loss": 3.8786, "step": 2864 }, { "epoch": 0.6553946860662884, "grad_norm": 0.5124801038741276, "learning_rate": 3.925140912487591e-05, "loss": 3.9405, "step": 2866 }, { "epoch": 0.6558520445352809, "grad_norm": 0.41256076264069774, "learning_rate": 3.923618796444218e-05, "loss": 3.8234, "step": 2868 }, { "epoch": 0.6563094030042734, "grad_norm": 0.35414956088924154, "learning_rate": 3.922095899026219e-05, "loss": 3.9502, "step": 2870 }, { "epoch": 0.656766761473266, "grad_norm": 0.4047726582069079, "learning_rate": 3.920572221069459e-05, "loss": 3.6238, "step": 2872 }, { "epoch": 0.6572241199422585, "grad_norm": 0.4313410068627315, "learning_rate": 3.919047763410233e-05, "loss": 3.8189, "step": 2874 }, { "epoch": 0.657681478411251, "grad_norm": 0.40075216950027437, "learning_rate": 3.91752252688526e-05, "loss": 3.7203, "step": 2876 }, { "epoch": 0.6581388368802435, "grad_norm": 0.5802934548781453, "learning_rate": 3.915996512331691e-05, "loss": 3.8096, "step": 2878 }, { "epoch": 0.6585961953492361, "grad_norm": 0.48621357867189435, "learning_rate": 3.914469720587103e-05, "loss": 3.9283, "step": 2880 }, { "epoch": 0.6590535538182286, "grad_norm": 0.34795260924240645, "learning_rate": 3.912942152489497e-05, "loss": 3.9101, "step": 2882 }, { "epoch": 0.6595109122872211, "grad_norm": 0.5149363629828271, "learning_rate": 3.911413808877301e-05, "loss": 3.6438, "step": 2884 }, { "epoch": 0.6599682707562137, "grad_norm": 0.33494689066713995, "learning_rate": 3.909884690589372e-05, "loss": 3.7304, "step": 2886 }, { "epoch": 0.6604256292252062, "grad_norm": 0.6078356593802006, "learning_rate": 3.908354798464987e-05, "loss": 3.8039, "step": 2888 }, { "epoch": 0.6608829876941987, "grad_norm": 0.6011036200192731, "learning_rate": 3.9068241333438516e-05, "loss": 4.0118, "step": 2890 }, { "epoch": 0.6613403461631913, "grad_norm": 0.44293631312105114, "learning_rate": 3.9052926960660945e-05, "loss": 3.9565, "step": 2892 }, { "epoch": 0.6617977046321838, "grad_norm": 0.4500055802846991, "learning_rate": 3.9037604874722676e-05, "loss": 3.6642, "step": 2894 }, { "epoch": 0.6622550631011763, "grad_norm": 0.4608550725394292, "learning_rate": 3.902227508403348e-05, "loss": 3.7921, "step": 2896 }, { "epoch": 0.6627124215701687, "grad_norm": 0.40425509545624677, "learning_rate": 3.900693759700732e-05, "loss": 3.6993, "step": 2898 }, { "epoch": 0.6631697800391613, "grad_norm": 0.5160508853436468, "learning_rate": 3.8991592422062436e-05, "loss": 3.8624, "step": 2900 }, { "epoch": 0.6636271385081538, "grad_norm": 0.4375082083340785, "learning_rate": 3.897623956762123e-05, "loss": 3.972, "step": 2902 }, { "epoch": 0.6640844969771463, "grad_norm": 0.4299466652007656, "learning_rate": 3.896087904211036e-05, "loss": 3.829, "step": 2904 }, { "epoch": 0.6645418554461389, "grad_norm": 0.3657986570874337, "learning_rate": 3.894551085396069e-05, "loss": 3.8989, "step": 2906 }, { "epoch": 0.6649992139151314, "grad_norm": 0.5217793805359103, "learning_rate": 3.8930135011607276e-05, "loss": 3.9337, "step": 2908 }, { "epoch": 0.6654565723841239, "grad_norm": 0.496611107619201, "learning_rate": 3.891475152348938e-05, "loss": 3.8689, "step": 2910 }, { "epoch": 0.6659139308531165, "grad_norm": 0.4193341600285363, "learning_rate": 3.889936039805044e-05, "loss": 3.8211, "step": 2912 }, { "epoch": 0.666371289322109, "grad_norm": 0.4914369006521371, "learning_rate": 3.8883961643738144e-05, "loss": 3.8305, "step": 2914 }, { "epoch": 0.6668286477911015, "grad_norm": 0.41930496677119933, "learning_rate": 3.8868555269004314e-05, "loss": 3.9965, "step": 2916 }, { "epoch": 0.667286006260094, "grad_norm": 0.883584992679874, "learning_rate": 3.885314128230496e-05, "loss": 4.0149, "step": 2918 }, { "epoch": 0.6677433647290866, "grad_norm": 0.45767637513968323, "learning_rate": 3.88377196921003e-05, "loss": 3.8803, "step": 2920 }, { "epoch": 0.6682007231980791, "grad_norm": 0.5774052237482825, "learning_rate": 3.88222905068547e-05, "loss": 3.7208, "step": 2922 }, { "epoch": 0.6686580816670716, "grad_norm": 0.39188777899705884, "learning_rate": 3.8806853735036687e-05, "loss": 3.6787, "step": 2924 }, { "epoch": 0.6691154401360642, "grad_norm": 0.3981221952294643, "learning_rate": 3.879140938511898e-05, "loss": 3.7608, "step": 2926 }, { "epoch": 0.6695727986050567, "grad_norm": 0.4971250192931404, "learning_rate": 3.877595746557844e-05, "loss": 3.8455, "step": 2928 }, { "epoch": 0.6700301570740492, "grad_norm": 0.4222611143875233, "learning_rate": 3.876049798489609e-05, "loss": 4.0106, "step": 2930 }, { "epoch": 0.6704875155430418, "grad_norm": 0.45216438809102055, "learning_rate": 3.874503095155709e-05, "loss": 3.8139, "step": 2932 }, { "epoch": 0.6709448740120343, "grad_norm": 0.4228883687622838, "learning_rate": 3.8729556374050755e-05, "loss": 3.8146, "step": 2934 }, { "epoch": 0.6714022324810268, "grad_norm": 0.40642646478216643, "learning_rate": 3.871407426087056e-05, "loss": 3.8802, "step": 2936 }, { "epoch": 0.6718595909500193, "grad_norm": 0.43541917511188544, "learning_rate": 3.869858462051407e-05, "loss": 3.9085, "step": 2938 }, { "epoch": 0.6723169494190118, "grad_norm": 0.4538828004961495, "learning_rate": 3.868308746148301e-05, "loss": 3.8888, "step": 2940 }, { "epoch": 0.6727743078880043, "grad_norm": 0.36051292218236863, "learning_rate": 3.8667582792283244e-05, "loss": 3.9532, "step": 2942 }, { "epoch": 0.6732316663569968, "grad_norm": 0.4951877882240551, "learning_rate": 3.865207062142474e-05, "loss": 3.9699, "step": 2944 }, { "epoch": 0.6736890248259894, "grad_norm": 0.46859768936114243, "learning_rate": 3.8636550957421566e-05, "loss": 3.9952, "step": 2946 }, { "epoch": 0.6741463832949819, "grad_norm": 0.35146073881922907, "learning_rate": 3.8621023808791955e-05, "loss": 3.6976, "step": 2948 }, { "epoch": 0.6746037417639744, "grad_norm": 0.35828730551871396, "learning_rate": 3.860548918405821e-05, "loss": 3.9402, "step": 2950 }, { "epoch": 0.675061100232967, "grad_norm": 0.5811014364974806, "learning_rate": 3.858994709174672e-05, "loss": 3.7499, "step": 2952 }, { "epoch": 0.6755184587019595, "grad_norm": 0.5027469636397669, "learning_rate": 3.8574397540388006e-05, "loss": 3.8016, "step": 2954 }, { "epoch": 0.675975817170952, "grad_norm": 0.4468608006975784, "learning_rate": 3.85588405385167e-05, "loss": 3.8377, "step": 2956 }, { "epoch": 0.6764331756399445, "grad_norm": 0.41125042608059315, "learning_rate": 3.8543276094671465e-05, "loss": 3.8149, "step": 2958 }, { "epoch": 0.6768905341089371, "grad_norm": 0.4005862618700311, "learning_rate": 3.852770421739509e-05, "loss": 3.7754, "step": 2960 }, { "epoch": 0.6773478925779296, "grad_norm": 0.44308074732897457, "learning_rate": 3.851212491523444e-05, "loss": 3.808, "step": 2962 }, { "epoch": 0.6778052510469221, "grad_norm": 0.4820955385093477, "learning_rate": 3.849653819674044e-05, "loss": 3.8276, "step": 2964 }, { "epoch": 0.6782626095159147, "grad_norm": 0.48175254134085405, "learning_rate": 3.848094407046811e-05, "loss": 3.8275, "step": 2966 }, { "epoch": 0.6787199679849072, "grad_norm": 0.43160530739853975, "learning_rate": 3.846534254497651e-05, "loss": 3.8548, "step": 2968 }, { "epoch": 0.6791773264538997, "grad_norm": 0.47722974561491666, "learning_rate": 3.844973362882877e-05, "loss": 3.8165, "step": 2970 }, { "epoch": 0.6796346849228923, "grad_norm": 0.4174135924197083, "learning_rate": 3.843411733059209e-05, "loss": 3.6944, "step": 2972 }, { "epoch": 0.6800920433918848, "grad_norm": 0.4374851667069412, "learning_rate": 3.84184936588377e-05, "loss": 3.8476, "step": 2974 }, { "epoch": 0.6805494018608773, "grad_norm": 0.443672069906201, "learning_rate": 3.840286262214089e-05, "loss": 3.7194, "step": 2976 }, { "epoch": 0.6810067603298698, "grad_norm": 0.41453832600892043, "learning_rate": 3.8387224229081e-05, "loss": 3.7312, "step": 2978 }, { "epoch": 0.6814641187988624, "grad_norm": 0.4053091194347145, "learning_rate": 3.837157848824138e-05, "loss": 3.9708, "step": 2980 }, { "epoch": 0.6819214772678548, "grad_norm": 0.4949996929215914, "learning_rate": 3.835592540820945e-05, "loss": 3.6243, "step": 2982 }, { "epoch": 0.6823788357368473, "grad_norm": 0.40438266894277775, "learning_rate": 3.8340264997576626e-05, "loss": 3.7226, "step": 2984 }, { "epoch": 0.6828361942058399, "grad_norm": 0.4722102086169007, "learning_rate": 3.8324597264938354e-05, "loss": 3.7177, "step": 2986 }, { "epoch": 0.6832935526748324, "grad_norm": 0.4637111682520117, "learning_rate": 3.8308922218894125e-05, "loss": 3.834, "step": 2988 }, { "epoch": 0.6837509111438249, "grad_norm": 0.49406225917455604, "learning_rate": 3.829323986804741e-05, "loss": 4.1257, "step": 2990 }, { "epoch": 0.6842082696128174, "grad_norm": 0.40454440440644607, "learning_rate": 3.8277550221005706e-05, "loss": 3.753, "step": 2992 }, { "epoch": 0.68466562808181, "grad_norm": 0.4649450848823882, "learning_rate": 3.826185328638051e-05, "loss": 3.8323, "step": 2994 }, { "epoch": 0.6851229865508025, "grad_norm": 0.3884100103740279, "learning_rate": 3.824614907278733e-05, "loss": 3.86, "step": 2996 }, { "epoch": 0.685580345019795, "grad_norm": 0.604709198042463, "learning_rate": 3.823043758884565e-05, "loss": 3.8581, "step": 2998 }, { "epoch": 0.6860377034887876, "grad_norm": 0.4066420277508114, "learning_rate": 3.821471884317896e-05, "loss": 3.7533, "step": 3000 }, { "epoch": 0.6864950619577801, "grad_norm": 0.40859787393680613, "learning_rate": 3.819899284441474e-05, "loss": 3.619, "step": 3002 }, { "epoch": 0.6869524204267726, "grad_norm": 0.32087151663494035, "learning_rate": 3.818325960118442e-05, "loss": 3.8463, "step": 3004 }, { "epoch": 0.6874097788957652, "grad_norm": 0.3781805659679584, "learning_rate": 3.8167519122123444e-05, "loss": 3.6393, "step": 3006 }, { "epoch": 0.6878671373647577, "grad_norm": 0.4417536389741262, "learning_rate": 3.81517714158712e-05, "loss": 3.8753, "step": 3008 }, { "epoch": 0.6883244958337502, "grad_norm": 0.3735745078906581, "learning_rate": 3.813601649107106e-05, "loss": 3.8722, "step": 3010 }, { "epoch": 0.6887818543027427, "grad_norm": 0.3984872001616906, "learning_rate": 3.812025435637036e-05, "loss": 3.776, "step": 3012 }, { "epoch": 0.6892392127717353, "grad_norm": 0.42810874824585254, "learning_rate": 3.8104485020420365e-05, "loss": 3.718, "step": 3014 }, { "epoch": 0.6896965712407278, "grad_norm": 0.598880337045831, "learning_rate": 3.808870849187634e-05, "loss": 3.9992, "step": 3016 }, { "epoch": 0.6901539297097203, "grad_norm": 0.5987891137840223, "learning_rate": 3.807292477939743e-05, "loss": 3.7529, "step": 3018 }, { "epoch": 0.6906112881787129, "grad_norm": 0.3614084103029486, "learning_rate": 3.805713389164681e-05, "loss": 3.8156, "step": 3020 }, { "epoch": 0.6910686466477054, "grad_norm": 0.5141158705257172, "learning_rate": 3.804133583729151e-05, "loss": 3.8642, "step": 3022 }, { "epoch": 0.6915260051166978, "grad_norm": 0.4576446459840137, "learning_rate": 3.802553062500256e-05, "loss": 3.7261, "step": 3024 }, { "epoch": 0.6919833635856903, "grad_norm": 0.5796233684096449, "learning_rate": 3.8009718263454866e-05, "loss": 3.8908, "step": 3026 }, { "epoch": 0.6924407220546829, "grad_norm": 0.4552407199357875, "learning_rate": 3.7993898761327284e-05, "loss": 3.9079, "step": 3028 }, { "epoch": 0.6928980805236754, "grad_norm": 0.4188354988495028, "learning_rate": 3.797807212730259e-05, "loss": 3.9447, "step": 3030 }, { "epoch": 0.6933554389926679, "grad_norm": 0.4124034176089167, "learning_rate": 3.796223837006748e-05, "loss": 3.7841, "step": 3032 }, { "epoch": 0.6938127974616605, "grad_norm": 0.4313477605776826, "learning_rate": 3.794639749831254e-05, "loss": 3.8445, "step": 3034 }, { "epoch": 0.694270155930653, "grad_norm": 0.5419578472633946, "learning_rate": 3.793054952073227e-05, "loss": 3.7664, "step": 3036 }, { "epoch": 0.6947275143996455, "grad_norm": 0.46806449038143105, "learning_rate": 3.791469444602507e-05, "loss": 3.8567, "step": 3038 }, { "epoch": 0.6951848728686381, "grad_norm": 0.4564297622428044, "learning_rate": 3.789883228289324e-05, "loss": 3.9102, "step": 3040 }, { "epoch": 0.6956422313376306, "grad_norm": 0.4662326291507138, "learning_rate": 3.7882963040042976e-05, "loss": 3.8092, "step": 3042 }, { "epoch": 0.6960995898066231, "grad_norm": 0.42048451190325525, "learning_rate": 3.786708672618433e-05, "loss": 3.8879, "step": 3044 }, { "epoch": 0.6965569482756157, "grad_norm": 0.334656758005154, "learning_rate": 3.7851203350031286e-05, "loss": 3.8806, "step": 3046 }, { "epoch": 0.6970143067446082, "grad_norm": 0.36568182805209665, "learning_rate": 3.783531292030165e-05, "loss": 3.7153, "step": 3048 }, { "epoch": 0.6974716652136007, "grad_norm": 0.4386781428798825, "learning_rate": 3.781941544571712e-05, "loss": 3.7245, "step": 3050 }, { "epoch": 0.6979290236825932, "grad_norm": 0.5348645828562764, "learning_rate": 3.780351093500328e-05, "loss": 4.036, "step": 3052 }, { "epoch": 0.6983863821515858, "grad_norm": 0.6153834571048032, "learning_rate": 3.778759939688956e-05, "loss": 3.9229, "step": 3054 }, { "epoch": 0.6988437406205783, "grad_norm": 0.4254065098174298, "learning_rate": 3.777168084010925e-05, "loss": 3.9156, "step": 3056 }, { "epoch": 0.6993010990895708, "grad_norm": 0.435629815385603, "learning_rate": 3.775575527339947e-05, "loss": 3.8988, "step": 3058 }, { "epoch": 0.6997584575585634, "grad_norm": 0.4239433237491541, "learning_rate": 3.7739822705501224e-05, "loss": 3.874, "step": 3060 }, { "epoch": 0.7002158160275559, "grad_norm": 0.36362293541021523, "learning_rate": 3.772388314515934e-05, "loss": 3.7781, "step": 3062 }, { "epoch": 0.7006731744965484, "grad_norm": 0.4483110129846119, "learning_rate": 3.770793660112248e-05, "loss": 3.6252, "step": 3064 }, { "epoch": 0.7011305329655408, "grad_norm": 0.44368971898756865, "learning_rate": 3.769198308214316e-05, "loss": 3.8039, "step": 3066 }, { "epoch": 0.7015878914345334, "grad_norm": 0.37872260844448613, "learning_rate": 3.767602259697769e-05, "loss": 3.7579, "step": 3068 }, { "epoch": 0.7020452499035259, "grad_norm": 0.41248651395773683, "learning_rate": 3.7660055154386235e-05, "loss": 3.7564, "step": 3070 }, { "epoch": 0.7025026083725184, "grad_norm": 0.4710429377695463, "learning_rate": 3.764408076313275e-05, "loss": 4.0196, "step": 3072 }, { "epoch": 0.702959966841511, "grad_norm": 0.49438634801540743, "learning_rate": 3.7628099431985045e-05, "loss": 3.7871, "step": 3074 }, { "epoch": 0.7034173253105035, "grad_norm": 0.36401991451853877, "learning_rate": 3.7612111169714695e-05, "loss": 3.6606, "step": 3076 }, { "epoch": 0.703874683779496, "grad_norm": 0.5626705758755977, "learning_rate": 3.75961159850971e-05, "loss": 3.6926, "step": 3078 }, { "epoch": 0.7043320422484886, "grad_norm": 0.4311470385664636, "learning_rate": 3.758011388691148e-05, "loss": 3.8219, "step": 3080 }, { "epoch": 0.7047894007174811, "grad_norm": 0.4222941612178756, "learning_rate": 3.75641048839408e-05, "loss": 3.6809, "step": 3082 }, { "epoch": 0.7052467591864736, "grad_norm": 0.37836602623603327, "learning_rate": 3.7548088984971864e-05, "loss": 3.7726, "step": 3084 }, { "epoch": 0.7057041176554661, "grad_norm": 0.3527682291304711, "learning_rate": 3.753206619879522e-05, "loss": 3.8085, "step": 3086 }, { "epoch": 0.7061614761244587, "grad_norm": 0.4691288847331719, "learning_rate": 3.7516036534205224e-05, "loss": 3.5887, "step": 3088 }, { "epoch": 0.7066188345934512, "grad_norm": 0.3887940986189847, "learning_rate": 3.7500000000000003e-05, "loss": 3.8793, "step": 3090 }, { "epoch": 0.7070761930624437, "grad_norm": 0.3800472109730045, "learning_rate": 3.7483956604981446e-05, "loss": 3.8381, "step": 3092 }, { "epoch": 0.7075335515314363, "grad_norm": 0.4408759924085462, "learning_rate": 3.746790635795521e-05, "loss": 3.8116, "step": 3094 }, { "epoch": 0.7079909100004288, "grad_norm": 0.5079418311119246, "learning_rate": 3.745184926773073e-05, "loss": 3.8456, "step": 3096 }, { "epoch": 0.7084482684694213, "grad_norm": 0.452773747283195, "learning_rate": 3.743578534312116e-05, "loss": 3.9367, "step": 3098 }, { "epoch": 0.7089056269384139, "grad_norm": 0.39398958773457865, "learning_rate": 3.741971459294344e-05, "loss": 3.7305, "step": 3100 }, { "epoch": 0.7093629854074064, "grad_norm": 0.49327880733103907, "learning_rate": 3.740363702601824e-05, "loss": 3.6973, "step": 3102 }, { "epoch": 0.7098203438763989, "grad_norm": 0.4519710975706805, "learning_rate": 3.738755265116998e-05, "loss": 3.87, "step": 3104 }, { "epoch": 0.7102777023453914, "grad_norm": 0.4209729900847439, "learning_rate": 3.73714614772268e-05, "loss": 4.0508, "step": 3106 }, { "epoch": 0.710735060814384, "grad_norm": 0.4619467564438502, "learning_rate": 3.735536351302059e-05, "loss": 3.8154, "step": 3108 }, { "epoch": 0.7111924192833764, "grad_norm": 0.46264967300805854, "learning_rate": 3.733925876738696e-05, "loss": 3.9493, "step": 3110 }, { "epoch": 0.7116497777523689, "grad_norm": 0.37163923226801826, "learning_rate": 3.7323147249165255e-05, "loss": 3.9676, "step": 3112 }, { "epoch": 0.7121071362213615, "grad_norm": 0.4690983113971863, "learning_rate": 3.73070289671985e-05, "loss": 3.8051, "step": 3114 }, { "epoch": 0.712564494690354, "grad_norm": 0.39937018640808175, "learning_rate": 3.729090393033347e-05, "loss": 3.8559, "step": 3116 }, { "epoch": 0.7130218531593465, "grad_norm": 0.4432590555975417, "learning_rate": 3.727477214742063e-05, "loss": 3.7815, "step": 3118 }, { "epoch": 0.713479211628339, "grad_norm": 0.3620199848277868, "learning_rate": 3.7258633627314166e-05, "loss": 3.7853, "step": 3120 }, { "epoch": 0.7139365700973316, "grad_norm": 0.5913948733244633, "learning_rate": 3.724248837887193e-05, "loss": 3.8202, "step": 3122 }, { "epoch": 0.7143939285663241, "grad_norm": 0.4369531176611581, "learning_rate": 3.7226336410955495e-05, "loss": 4.0248, "step": 3124 }, { "epoch": 0.7148512870353166, "grad_norm": 0.4934576086255713, "learning_rate": 3.721017773243011e-05, "loss": 3.7567, "step": 3126 }, { "epoch": 0.7153086455043092, "grad_norm": 0.4413289027680742, "learning_rate": 3.719401235216471e-05, "loss": 3.7523, "step": 3128 }, { "epoch": 0.7157660039733017, "grad_norm": 0.40448452561442316, "learning_rate": 3.717784027903191e-05, "loss": 3.9136, "step": 3130 }, { "epoch": 0.7162233624422942, "grad_norm": 0.45411499987994136, "learning_rate": 3.716166152190798e-05, "loss": 3.679, "step": 3132 }, { "epoch": 0.7166807209112868, "grad_norm": 0.39536919696105816, "learning_rate": 3.7145476089672884e-05, "loss": 3.7208, "step": 3134 }, { "epoch": 0.7171380793802793, "grad_norm": 0.4690574750180005, "learning_rate": 3.712928399121026e-05, "loss": 3.8189, "step": 3136 }, { "epoch": 0.7175954378492718, "grad_norm": 0.43946460285032496, "learning_rate": 3.711308523540735e-05, "loss": 3.7909, "step": 3138 }, { "epoch": 0.7180527963182644, "grad_norm": 0.42276972800344537, "learning_rate": 3.709687983115512e-05, "loss": 3.7311, "step": 3140 }, { "epoch": 0.7185101547872569, "grad_norm": 0.5060919738719521, "learning_rate": 3.708066778734812e-05, "loss": 3.9038, "step": 3142 }, { "epoch": 0.7189675132562494, "grad_norm": 0.39277767215155107, "learning_rate": 3.70644491128846e-05, "loss": 3.6845, "step": 3144 }, { "epoch": 0.7194248717252419, "grad_norm": 0.3747338502863775, "learning_rate": 3.704822381666641e-05, "loss": 3.663, "step": 3146 }, { "epoch": 0.7198822301942345, "grad_norm": 0.48121563305697823, "learning_rate": 3.703199190759904e-05, "loss": 3.7292, "step": 3148 }, { "epoch": 0.720339588663227, "grad_norm": 0.4465680909123813, "learning_rate": 3.701575339459164e-05, "loss": 3.839, "step": 3150 }, { "epoch": 0.7207969471322194, "grad_norm": 0.4356150039281254, "learning_rate": 3.6999508286556944e-05, "loss": 3.7198, "step": 3152 }, { "epoch": 0.721254305601212, "grad_norm": 0.4335541406358734, "learning_rate": 3.698325659241133e-05, "loss": 3.9995, "step": 3154 }, { "epoch": 0.7217116640702045, "grad_norm": 0.4023518422618495, "learning_rate": 3.6966998321074786e-05, "loss": 3.8893, "step": 3156 }, { "epoch": 0.722169022539197, "grad_norm": 0.3684994614246527, "learning_rate": 3.695073348147092e-05, "loss": 3.7627, "step": 3158 }, { "epoch": 0.7226263810081895, "grad_norm": 0.44183401462668365, "learning_rate": 3.693446208252691e-05, "loss": 3.6569, "step": 3160 }, { "epoch": 0.7230837394771821, "grad_norm": 0.37059431096948137, "learning_rate": 3.6918184133173574e-05, "loss": 3.7738, "step": 3162 }, { "epoch": 0.7235410979461746, "grad_norm": 0.37273867593241583, "learning_rate": 3.690189964234532e-05, "loss": 3.8703, "step": 3164 }, { "epoch": 0.7239984564151671, "grad_norm": 0.37713756891331973, "learning_rate": 3.6885608618980114e-05, "loss": 3.6392, "step": 3166 }, { "epoch": 0.7244558148841597, "grad_norm": 0.3939122736045461, "learning_rate": 3.686931107201954e-05, "loss": 3.8434, "step": 3168 }, { "epoch": 0.7249131733531522, "grad_norm": 0.42384544020762044, "learning_rate": 3.6853007010408754e-05, "loss": 3.604, "step": 3170 }, { "epoch": 0.7253705318221447, "grad_norm": 0.41798256468002076, "learning_rate": 3.6836696443096475e-05, "loss": 3.9651, "step": 3172 }, { "epoch": 0.7258278902911373, "grad_norm": 0.45145352972608804, "learning_rate": 3.6820379379035026e-05, "loss": 4.1053, "step": 3174 }, { "epoch": 0.7262852487601298, "grad_norm": 0.5152974388739235, "learning_rate": 3.680405582718025e-05, "loss": 3.9421, "step": 3176 }, { "epoch": 0.7267426072291223, "grad_norm": 0.48512635868396314, "learning_rate": 3.678772579649159e-05, "loss": 3.7827, "step": 3178 }, { "epoch": 0.7271999656981148, "grad_norm": 0.4720265947078898, "learning_rate": 3.677138929593202e-05, "loss": 3.7882, "step": 3180 }, { "epoch": 0.7276573241671074, "grad_norm": 0.43186986023070617, "learning_rate": 3.675504633446808e-05, "loss": 3.804, "step": 3182 }, { "epoch": 0.7281146826360999, "grad_norm": 0.478629818739386, "learning_rate": 3.6738696921069844e-05, "loss": 3.9516, "step": 3184 }, { "epoch": 0.7285720411050924, "grad_norm": 0.5264846585747481, "learning_rate": 3.672234106471094e-05, "loss": 3.9984, "step": 3186 }, { "epoch": 0.729029399574085, "grad_norm": 0.5187037769084513, "learning_rate": 3.6705978774368536e-05, "loss": 3.8808, "step": 3188 }, { "epoch": 0.7294867580430775, "grad_norm": 0.5212659056727277, "learning_rate": 3.668961005902329e-05, "loss": 3.8002, "step": 3190 }, { "epoch": 0.72994411651207, "grad_norm": 0.3683058164784097, "learning_rate": 3.667323492765946e-05, "loss": 3.7502, "step": 3192 }, { "epoch": 0.7304014749810624, "grad_norm": 0.45293532709753537, "learning_rate": 3.665685338926475e-05, "loss": 3.7732, "step": 3194 }, { "epoch": 0.730858833450055, "grad_norm": 0.595358151465155, "learning_rate": 3.664046545283042e-05, "loss": 3.8765, "step": 3196 }, { "epoch": 0.7313161919190475, "grad_norm": 0.41806695800402666, "learning_rate": 3.6624071127351256e-05, "loss": 3.8418, "step": 3198 }, { "epoch": 0.73177355038804, "grad_norm": 0.45048015793067636, "learning_rate": 3.660767042182551e-05, "loss": 3.7443, "step": 3200 }, { "epoch": 0.7322309088570326, "grad_norm": 0.5221753061584139, "learning_rate": 3.659126334525496e-05, "loss": 3.7697, "step": 3202 }, { "epoch": 0.7326882673260251, "grad_norm": 0.4951773763033662, "learning_rate": 3.657484990664488e-05, "loss": 3.762, "step": 3204 }, { "epoch": 0.7331456257950176, "grad_norm": 0.412252536874603, "learning_rate": 3.6558430115004036e-05, "loss": 3.8338, "step": 3206 }, { "epoch": 0.7336029842640102, "grad_norm": 0.4459736918015953, "learning_rate": 3.6542003979344676e-05, "loss": 3.6733, "step": 3208 }, { "epoch": 0.7340603427330027, "grad_norm": 0.3923679187581678, "learning_rate": 3.652557150868253e-05, "loss": 3.8256, "step": 3210 }, { "epoch": 0.7345177012019952, "grad_norm": 0.4058103199484141, "learning_rate": 3.65091327120368e-05, "loss": 3.8073, "step": 3212 }, { "epoch": 0.7349750596709878, "grad_norm": 0.5165387351586336, "learning_rate": 3.6492687598430195e-05, "loss": 3.7943, "step": 3214 }, { "epoch": 0.7354324181399803, "grad_norm": 0.463648774777557, "learning_rate": 3.647623617688883e-05, "loss": 3.8723, "step": 3216 }, { "epoch": 0.7358897766089728, "grad_norm": 0.4047185569246085, "learning_rate": 3.645977845644233e-05, "loss": 3.8173, "step": 3218 }, { "epoch": 0.7363471350779653, "grad_norm": 0.49604583869321406, "learning_rate": 3.644331444612376e-05, "loss": 3.7902, "step": 3220 }, { "epoch": 0.7368044935469579, "grad_norm": 0.36824882604123116, "learning_rate": 3.6426844154969653e-05, "loss": 3.8501, "step": 3222 }, { "epoch": 0.7372618520159504, "grad_norm": 0.40781864512406635, "learning_rate": 3.641036759201995e-05, "loss": 3.7471, "step": 3224 }, { "epoch": 0.7377192104849429, "grad_norm": 0.49776278602563334, "learning_rate": 3.639388476631809e-05, "loss": 3.7692, "step": 3226 }, { "epoch": 0.7381765689539355, "grad_norm": 0.48538332246143506, "learning_rate": 3.63773956869109e-05, "loss": 3.9504, "step": 3228 }, { "epoch": 0.738633927422928, "grad_norm": 0.41825441598584595, "learning_rate": 3.636090036284867e-05, "loss": 3.666, "step": 3230 }, { "epoch": 0.7390912858919205, "grad_norm": 0.351964858068059, "learning_rate": 3.63443988031851e-05, "loss": 3.7604, "step": 3232 }, { "epoch": 0.739548644360913, "grad_norm": 0.3752289708781062, "learning_rate": 3.632789101697732e-05, "loss": 3.8685, "step": 3234 }, { "epoch": 0.7400060028299055, "grad_norm": 0.4013363160375532, "learning_rate": 3.631137701328589e-05, "loss": 3.8349, "step": 3236 }, { "epoch": 0.740463361298898, "grad_norm": 0.5054018167933305, "learning_rate": 3.6294856801174756e-05, "loss": 3.7493, "step": 3238 }, { "epoch": 0.7409207197678905, "grad_norm": 0.5026197348705252, "learning_rate": 3.627833038971129e-05, "loss": 3.8749, "step": 3240 }, { "epoch": 0.7413780782368831, "grad_norm": 0.4524760861426803, "learning_rate": 3.626179778796626e-05, "loss": 3.7992, "step": 3242 }, { "epoch": 0.7418354367058756, "grad_norm": 0.3225088415486383, "learning_rate": 3.6245259005013843e-05, "loss": 3.921, "step": 3244 }, { "epoch": 0.7422927951748681, "grad_norm": 0.43185098622712864, "learning_rate": 3.6228714049931575e-05, "loss": 3.8652, "step": 3246 }, { "epoch": 0.7427501536438607, "grad_norm": 0.49892022844152606, "learning_rate": 3.6212162931800435e-05, "loss": 3.8329, "step": 3248 }, { "epoch": 0.7432075121128532, "grad_norm": 0.4340948182358497, "learning_rate": 3.6195605659704736e-05, "loss": 3.7537, "step": 3250 }, { "epoch": 0.7436648705818457, "grad_norm": 0.4095694567844691, "learning_rate": 3.617904224273218e-05, "loss": 3.8312, "step": 3252 }, { "epoch": 0.7441222290508382, "grad_norm": 0.3818251664381552, "learning_rate": 3.6162472689973864e-05, "loss": 3.7843, "step": 3254 }, { "epoch": 0.7445795875198308, "grad_norm": 0.3613992775633283, "learning_rate": 3.614589701052423e-05, "loss": 3.7193, "step": 3256 }, { "epoch": 0.7450369459888233, "grad_norm": 0.4020384954577239, "learning_rate": 3.612931521348108e-05, "loss": 3.7773, "step": 3258 }, { "epoch": 0.7454943044578158, "grad_norm": 0.44333114198033163, "learning_rate": 3.611272730794561e-05, "loss": 3.8458, "step": 3260 }, { "epoch": 0.7459516629268084, "grad_norm": 0.4583588416052719, "learning_rate": 3.609613330302231e-05, "loss": 3.9095, "step": 3262 }, { "epoch": 0.7464090213958009, "grad_norm": 0.5498120864722642, "learning_rate": 3.607953320781907e-05, "loss": 3.9289, "step": 3264 }, { "epoch": 0.7468663798647934, "grad_norm": 0.4383082417219103, "learning_rate": 3.606292703144709e-05, "loss": 3.7484, "step": 3266 }, { "epoch": 0.747323738333786, "grad_norm": 0.42210161339294006, "learning_rate": 3.604631478302093e-05, "loss": 3.8605, "step": 3268 }, { "epoch": 0.7477810968027785, "grad_norm": 0.586037919658241, "learning_rate": 3.602969647165847e-05, "loss": 3.6559, "step": 3270 }, { "epoch": 0.748238455271771, "grad_norm": 0.41604017803108556, "learning_rate": 3.601307210648092e-05, "loss": 3.6756, "step": 3272 }, { "epoch": 0.7486958137407635, "grad_norm": 0.42105458985166605, "learning_rate": 3.59964416966128e-05, "loss": 3.7727, "step": 3274 }, { "epoch": 0.7491531722097561, "grad_norm": 0.3870523343370536, "learning_rate": 3.5979805251181994e-05, "loss": 3.7475, "step": 3276 }, { "epoch": 0.7496105306787485, "grad_norm": 0.4800643614742949, "learning_rate": 3.596316277931964e-05, "loss": 3.8804, "step": 3278 }, { "epoch": 0.750067889147741, "grad_norm": 0.448116343000953, "learning_rate": 3.5946514290160207e-05, "loss": 3.7406, "step": 3280 }, { "epoch": 0.7505252476167336, "grad_norm": 0.33203812760354556, "learning_rate": 3.592985979284148e-05, "loss": 3.9074, "step": 3282 }, { "epoch": 0.7509826060857261, "grad_norm": 0.4425425630508959, "learning_rate": 3.591319929650453e-05, "loss": 3.8922, "step": 3284 }, { "epoch": 0.7514399645547186, "grad_norm": 0.6024269729297634, "learning_rate": 3.589653281029371e-05, "loss": 3.7705, "step": 3286 }, { "epoch": 0.7518973230237112, "grad_norm": 0.4113135404980838, "learning_rate": 3.5879860343356697e-05, "loss": 3.6013, "step": 3288 }, { "epoch": 0.7523546814927037, "grad_norm": 0.4328639271889946, "learning_rate": 3.58631819048444e-05, "loss": 3.8238, "step": 3290 }, { "epoch": 0.7528120399616962, "grad_norm": 0.4955576605828133, "learning_rate": 3.584649750391105e-05, "loss": 3.8443, "step": 3292 }, { "epoch": 0.7532693984306887, "grad_norm": 0.4352751427344632, "learning_rate": 3.582980714971411e-05, "loss": 3.9655, "step": 3294 }, { "epoch": 0.7537267568996813, "grad_norm": 0.41682284130378733, "learning_rate": 3.5813110851414356e-05, "loss": 3.7029, "step": 3296 }, { "epoch": 0.7541841153686738, "grad_norm": 0.39799115480725145, "learning_rate": 3.579640861817579e-05, "loss": 3.6133, "step": 3298 }, { "epoch": 0.7546414738376663, "grad_norm": 0.4797262020838129, "learning_rate": 3.577970045916569e-05, "loss": 3.6565, "step": 3300 }, { "epoch": 0.7550988323066589, "grad_norm": 0.4651863214256715, "learning_rate": 3.576298638355457e-05, "loss": 3.7984, "step": 3302 }, { "epoch": 0.7555561907756514, "grad_norm": 0.4435329685579703, "learning_rate": 3.5746266400516216e-05, "loss": 3.7691, "step": 3304 }, { "epoch": 0.7560135492446439, "grad_norm": 0.6902297403539421, "learning_rate": 3.572954051922763e-05, "loss": 3.6468, "step": 3306 }, { "epoch": 0.7564709077136365, "grad_norm": 0.3910301586883713, "learning_rate": 3.5712808748869076e-05, "loss": 3.6997, "step": 3308 }, { "epoch": 0.756928266182629, "grad_norm": 0.43652753379999176, "learning_rate": 3.569607109862403e-05, "loss": 3.6522, "step": 3310 }, { "epoch": 0.7573856246516215, "grad_norm": 0.4090006762209854, "learning_rate": 3.567932757767921e-05, "loss": 3.8538, "step": 3312 }, { "epoch": 0.757842983120614, "grad_norm": 0.41230740241345465, "learning_rate": 3.566257819522454e-05, "loss": 3.6468, "step": 3314 }, { "epoch": 0.7583003415896066, "grad_norm": 0.6873692691659119, "learning_rate": 3.5645822960453177e-05, "loss": 3.6443, "step": 3316 }, { "epoch": 0.7587577000585991, "grad_norm": 0.4267345221337882, "learning_rate": 3.5629061882561494e-05, "loss": 3.7302, "step": 3318 }, { "epoch": 0.7592150585275915, "grad_norm": 0.45312937372661855, "learning_rate": 3.561229497074904e-05, "loss": 3.8704, "step": 3320 }, { "epoch": 0.759672416996584, "grad_norm": 0.43296188847402556, "learning_rate": 3.5595522234218605e-05, "loss": 3.9193, "step": 3322 }, { "epoch": 0.7601297754655766, "grad_norm": 0.4450686439601137, "learning_rate": 3.557874368217614e-05, "loss": 3.7005, "step": 3324 }, { "epoch": 0.7605871339345691, "grad_norm": 0.6836976762518702, "learning_rate": 3.556195932383084e-05, "loss": 3.7807, "step": 3326 }, { "epoch": 0.7610444924035616, "grad_norm": 0.5256457932268245, "learning_rate": 3.554516916839501e-05, "loss": 3.8154, "step": 3328 }, { "epoch": 0.7615018508725542, "grad_norm": 0.3742845584926679, "learning_rate": 3.55283732250842e-05, "loss": 3.7457, "step": 3330 }, { "epoch": 0.7619592093415467, "grad_norm": 0.4701628215529568, "learning_rate": 3.551157150311713e-05, "loss": 3.9576, "step": 3332 }, { "epoch": 0.7624165678105392, "grad_norm": 0.47530243744440864, "learning_rate": 3.549476401171566e-05, "loss": 3.8349, "step": 3334 }, { "epoch": 0.7628739262795318, "grad_norm": 0.5998671533620281, "learning_rate": 3.547795076010483e-05, "loss": 3.8057, "step": 3336 }, { "epoch": 0.7633312847485243, "grad_norm": 0.4256376321741368, "learning_rate": 3.5461131757512864e-05, "loss": 3.7373, "step": 3338 }, { "epoch": 0.7637886432175168, "grad_norm": 0.4618466027580383, "learning_rate": 3.5444307013171126e-05, "loss": 3.8101, "step": 3340 }, { "epoch": 0.7642460016865094, "grad_norm": 0.3574678777054946, "learning_rate": 3.54274765363141e-05, "loss": 3.8564, "step": 3342 }, { "epoch": 0.7647033601555019, "grad_norm": 0.4992839036431568, "learning_rate": 3.541064033617949e-05, "loss": 3.6612, "step": 3344 }, { "epoch": 0.7651607186244944, "grad_norm": 0.47402049449986283, "learning_rate": 3.539379842200807e-05, "loss": 3.9422, "step": 3346 }, { "epoch": 0.765618077093487, "grad_norm": 0.4258582544250805, "learning_rate": 3.537695080304378e-05, "loss": 3.8721, "step": 3348 }, { "epoch": 0.7660754355624795, "grad_norm": 0.37743252327991067, "learning_rate": 3.536009748853369e-05, "loss": 3.8216, "step": 3350 }, { "epoch": 0.766532794031472, "grad_norm": 0.39914580572696884, "learning_rate": 3.5343238487728017e-05, "loss": 3.7722, "step": 3352 }, { "epoch": 0.7669901525004645, "grad_norm": 0.40314106502607266, "learning_rate": 3.532637380988006e-05, "loss": 3.8543, "step": 3354 }, { "epoch": 0.7674475109694571, "grad_norm": 0.42956877715531994, "learning_rate": 3.5309503464246235e-05, "loss": 3.7105, "step": 3356 }, { "epoch": 0.7679048694384496, "grad_norm": 0.3764905116809457, "learning_rate": 3.529262746008611e-05, "loss": 3.7764, "step": 3358 }, { "epoch": 0.7683622279074421, "grad_norm": 0.39820778526496486, "learning_rate": 3.527574580666233e-05, "loss": 3.6741, "step": 3360 }, { "epoch": 0.7688195863764345, "grad_norm": 0.46985965358051884, "learning_rate": 3.525885851324064e-05, "loss": 3.7023, "step": 3362 }, { "epoch": 0.7692769448454271, "grad_norm": 0.36075932865281474, "learning_rate": 3.524196558908988e-05, "loss": 3.7853, "step": 3364 }, { "epoch": 0.7697343033144196, "grad_norm": 0.4018985359039462, "learning_rate": 3.522506704348201e-05, "loss": 3.9082, "step": 3366 }, { "epoch": 0.7701916617834121, "grad_norm": 0.513319148227877, "learning_rate": 3.520816288569202e-05, "loss": 3.9537, "step": 3368 }, { "epoch": 0.7706490202524047, "grad_norm": 0.4603716075936913, "learning_rate": 3.519125312499802e-05, "loss": 3.7942, "step": 3370 }, { "epoch": 0.7711063787213972, "grad_norm": 0.46127683465840985, "learning_rate": 3.517433777068121e-05, "loss": 3.5954, "step": 3372 }, { "epoch": 0.7715637371903897, "grad_norm": 0.5307837291319375, "learning_rate": 3.515741683202582e-05, "loss": 3.9818, "step": 3374 }, { "epoch": 0.7720210956593823, "grad_norm": 0.5613854545566022, "learning_rate": 3.5140490318319155e-05, "loss": 3.7865, "step": 3376 }, { "epoch": 0.7724784541283748, "grad_norm": 0.4680710484899067, "learning_rate": 3.512355823885159e-05, "loss": 3.621, "step": 3378 }, { "epoch": 0.7729358125973673, "grad_norm": 0.4924207013815382, "learning_rate": 3.510662060291656e-05, "loss": 3.957, "step": 3380 }, { "epoch": 0.7733931710663599, "grad_norm": 0.46961888211174235, "learning_rate": 3.508967741981054e-05, "loss": 3.817, "step": 3382 }, { "epoch": 0.7738505295353524, "grad_norm": 0.4291363041825334, "learning_rate": 3.5072728698833044e-05, "loss": 3.78, "step": 3384 }, { "epoch": 0.7743078880043449, "grad_norm": 0.4603320364033778, "learning_rate": 3.505577444928665e-05, "loss": 3.7183, "step": 3386 }, { "epoch": 0.7747652464733374, "grad_norm": 0.49417672006610014, "learning_rate": 3.503881468047693e-05, "loss": 3.7611, "step": 3388 }, { "epoch": 0.77522260494233, "grad_norm": 0.3343682787567288, "learning_rate": 3.502184940171252e-05, "loss": 3.596, "step": 3390 }, { "epoch": 0.7756799634113225, "grad_norm": 0.4478468969994614, "learning_rate": 3.500487862230507e-05, "loss": 3.7377, "step": 3392 }, { "epoch": 0.776137321880315, "grad_norm": 0.4206589727866931, "learning_rate": 3.4987902351569254e-05, "loss": 3.8651, "step": 3394 }, { "epoch": 0.7765946803493076, "grad_norm": 0.4194171444883656, "learning_rate": 3.4970920598822745e-05, "loss": 3.7687, "step": 3396 }, { "epoch": 0.7770520388183001, "grad_norm": 0.5804034261797569, "learning_rate": 3.4953933373386226e-05, "loss": 3.7822, "step": 3398 }, { "epoch": 0.7775093972872926, "grad_norm": 0.5171857390128866, "learning_rate": 3.493694068458341e-05, "loss": 3.8483, "step": 3400 }, { "epoch": 0.7779667557562852, "grad_norm": 0.4481741992858383, "learning_rate": 3.491994254174098e-05, "loss": 3.7947, "step": 3402 }, { "epoch": 0.7784241142252777, "grad_norm": 0.3743633787169613, "learning_rate": 3.490293895418862e-05, "loss": 3.9025, "step": 3404 }, { "epoch": 0.7788814726942701, "grad_norm": 0.4783036521542891, "learning_rate": 3.4885929931259014e-05, "loss": 3.7438, "step": 3406 }, { "epoch": 0.7793388311632626, "grad_norm": 0.4464651549199825, "learning_rate": 3.4868915482287816e-05, "loss": 3.7824, "step": 3408 }, { "epoch": 0.7797961896322552, "grad_norm": 0.5628667801221436, "learning_rate": 3.4851895616613664e-05, "loss": 3.838, "step": 3410 }, { "epoch": 0.7802535481012477, "grad_norm": 0.3842843041765097, "learning_rate": 3.4834870343578165e-05, "loss": 3.8631, "step": 3412 }, { "epoch": 0.7807109065702402, "grad_norm": 0.3726079465642191, "learning_rate": 3.4817839672525895e-05, "loss": 3.6927, "step": 3414 }, { "epoch": 0.7811682650392328, "grad_norm": 0.3881214595946487, "learning_rate": 3.48008036128044e-05, "loss": 3.8617, "step": 3416 }, { "epoch": 0.7816256235082253, "grad_norm": 0.42451577721045136, "learning_rate": 3.4783762173764174e-05, "loss": 3.8849, "step": 3418 }, { "epoch": 0.7820829819772178, "grad_norm": 0.4504997068387262, "learning_rate": 3.476671536475867e-05, "loss": 3.8643, "step": 3420 }, { "epoch": 0.7825403404462103, "grad_norm": 0.5106212990765592, "learning_rate": 3.474966319514429e-05, "loss": 3.8182, "step": 3422 }, { "epoch": 0.7829976989152029, "grad_norm": 0.35016015424683816, "learning_rate": 3.473260567428037e-05, "loss": 3.7754, "step": 3424 }, { "epoch": 0.7834550573841954, "grad_norm": 0.4351942062945953, "learning_rate": 3.471554281152917e-05, "loss": 3.8511, "step": 3426 }, { "epoch": 0.7839124158531879, "grad_norm": 0.35441411947869855, "learning_rate": 3.469847461625593e-05, "loss": 3.6893, "step": 3428 }, { "epoch": 0.7843697743221805, "grad_norm": 0.46306478065096945, "learning_rate": 3.4681401097828774e-05, "loss": 3.7348, "step": 3430 }, { "epoch": 0.784827132791173, "grad_norm": 0.4700636259090095, "learning_rate": 3.466432226561875e-05, "loss": 3.9511, "step": 3432 }, { "epoch": 0.7852844912601655, "grad_norm": 0.45648574878512677, "learning_rate": 3.464723812899985e-05, "loss": 3.9615, "step": 3434 }, { "epoch": 0.7857418497291581, "grad_norm": 0.4745610373414307, "learning_rate": 3.463014869734894e-05, "loss": 3.7038, "step": 3436 }, { "epoch": 0.7861992081981506, "grad_norm": 0.39521030492228804, "learning_rate": 3.461305398004583e-05, "loss": 4.0322, "step": 3438 }, { "epoch": 0.7866565666671431, "grad_norm": 0.3745468103313991, "learning_rate": 3.4595953986473195e-05, "loss": 3.9484, "step": 3440 }, { "epoch": 0.7871139251361357, "grad_norm": 0.48199403764471815, "learning_rate": 3.4578848726016645e-05, "loss": 3.7113, "step": 3442 }, { "epoch": 0.7875712836051282, "grad_norm": 0.5427629918002372, "learning_rate": 3.456173820806464e-05, "loss": 3.8146, "step": 3444 }, { "epoch": 0.7880286420741207, "grad_norm": 0.6044004059415296, "learning_rate": 3.454462244200856e-05, "loss": 3.927, "step": 3446 }, { "epoch": 0.7884860005431131, "grad_norm": 0.44019030305532436, "learning_rate": 3.452750143724265e-05, "loss": 3.7849, "step": 3448 }, { "epoch": 0.7889433590121057, "grad_norm": 0.3981518188737778, "learning_rate": 3.451037520316402e-05, "loss": 3.7225, "step": 3450 }, { "epoch": 0.7894007174810982, "grad_norm": 0.3976706998706614, "learning_rate": 3.449324374917268e-05, "loss": 3.838, "step": 3452 }, { "epoch": 0.7898580759500907, "grad_norm": 0.5177347486064916, "learning_rate": 3.447610708467146e-05, "loss": 3.8608, "step": 3454 }, { "epoch": 0.7903154344190833, "grad_norm": 0.492997653192463, "learning_rate": 3.4458965219066106e-05, "loss": 3.6782, "step": 3456 }, { "epoch": 0.7907727928880758, "grad_norm": 0.47599884218988736, "learning_rate": 3.444181816176517e-05, "loss": 3.7831, "step": 3458 }, { "epoch": 0.7912301513570683, "grad_norm": 0.4842528989160565, "learning_rate": 3.4424665922180064e-05, "loss": 3.6502, "step": 3460 }, { "epoch": 0.7916875098260608, "grad_norm": 0.40541745120433603, "learning_rate": 3.440750850972509e-05, "loss": 3.8047, "step": 3462 }, { "epoch": 0.7921448682950534, "grad_norm": 0.492048213565771, "learning_rate": 3.4390345933817326e-05, "loss": 3.759, "step": 3464 }, { "epoch": 0.7926022267640459, "grad_norm": 0.47908182706124425, "learning_rate": 3.43731782038767e-05, "loss": 3.818, "step": 3466 }, { "epoch": 0.7930595852330384, "grad_norm": 0.48616382014145604, "learning_rate": 3.4356005329326005e-05, "loss": 3.7935, "step": 3468 }, { "epoch": 0.793516943702031, "grad_norm": 0.3957449496459659, "learning_rate": 3.4338827319590814e-05, "loss": 3.7612, "step": 3470 }, { "epoch": 0.7939743021710235, "grad_norm": 0.3862588826179707, "learning_rate": 3.432164418409954e-05, "loss": 3.8339, "step": 3472 }, { "epoch": 0.794431660640016, "grad_norm": 0.4051618123582954, "learning_rate": 3.430445593228342e-05, "loss": 3.8281, "step": 3474 }, { "epoch": 0.7948890191090086, "grad_norm": 0.39658865473405375, "learning_rate": 3.428726257357647e-05, "loss": 3.8147, "step": 3476 }, { "epoch": 0.7953463775780011, "grad_norm": 0.4903292469157574, "learning_rate": 3.427006411741552e-05, "loss": 3.8867, "step": 3478 }, { "epoch": 0.7958037360469936, "grad_norm": 0.4569082339106562, "learning_rate": 3.425286057324022e-05, "loss": 3.8002, "step": 3480 }, { "epoch": 0.7962610945159861, "grad_norm": 0.3559304267977607, "learning_rate": 3.4235651950492974e-05, "loss": 3.8688, "step": 3482 }, { "epoch": 0.7967184529849787, "grad_norm": 0.5073411074076994, "learning_rate": 3.4218438258619e-05, "loss": 3.712, "step": 3484 }, { "epoch": 0.7971758114539712, "grad_norm": 0.4206086508703834, "learning_rate": 3.4201219507066305e-05, "loss": 3.8764, "step": 3486 }, { "epoch": 0.7976331699229637, "grad_norm": 0.5343165806297578, "learning_rate": 3.4183995705285646e-05, "loss": 3.8036, "step": 3488 }, { "epoch": 0.7980905283919562, "grad_norm": 0.4277279474480947, "learning_rate": 3.416676686273057e-05, "loss": 3.7062, "step": 3490 }, { "epoch": 0.7985478868609487, "grad_norm": 0.4277756553483029, "learning_rate": 3.4149532988857396e-05, "loss": 3.7023, "step": 3492 }, { "epoch": 0.7990052453299412, "grad_norm": 0.5091969617716279, "learning_rate": 3.413229409312518e-05, "loss": 3.6183, "step": 3494 }, { "epoch": 0.7994626037989337, "grad_norm": 0.4593901119211797, "learning_rate": 3.4115050184995754e-05, "loss": 3.8487, "step": 3496 }, { "epoch": 0.7999199622679263, "grad_norm": 0.5406700710847181, "learning_rate": 3.409780127393371e-05, "loss": 3.782, "step": 3498 }, { "epoch": 0.8003773207369188, "grad_norm": 0.451264656732348, "learning_rate": 3.408054736940635e-05, "loss": 3.8631, "step": 3500 }, { "epoch": 0.8008346792059113, "grad_norm": 0.46541539473204724, "learning_rate": 3.406328848088376e-05, "loss": 3.8492, "step": 3502 }, { "epoch": 0.8012920376749039, "grad_norm": 0.4753842092440012, "learning_rate": 3.404602461783873e-05, "loss": 3.3891, "step": 3504 }, { "epoch": 0.8017493961438964, "grad_norm": 0.4118977948192034, "learning_rate": 3.402875578974679e-05, "loss": 3.8202, "step": 3506 }, { "epoch": 0.8022067546128889, "grad_norm": 0.5980623398967078, "learning_rate": 3.401148200608621e-05, "loss": 3.6457, "step": 3508 }, { "epoch": 0.8026641130818815, "grad_norm": 0.44110348538375393, "learning_rate": 3.3994203276337955e-05, "loss": 3.8049, "step": 3510 }, { "epoch": 0.803121471550874, "grad_norm": 0.6806852318250726, "learning_rate": 3.397691960998571e-05, "loss": 3.8512, "step": 3512 }, { "epoch": 0.8035788300198665, "grad_norm": 0.3453364933533194, "learning_rate": 3.395963101651589e-05, "loss": 3.7816, "step": 3514 }, { "epoch": 0.804036188488859, "grad_norm": 0.4230980980349126, "learning_rate": 3.394233750541759e-05, "loss": 3.7764, "step": 3516 }, { "epoch": 0.8044935469578516, "grad_norm": 0.6761669315221336, "learning_rate": 3.392503908618262e-05, "loss": 3.565, "step": 3518 }, { "epoch": 0.8049509054268441, "grad_norm": 0.4585607806024426, "learning_rate": 3.390773576830548e-05, "loss": 3.6896, "step": 3520 }, { "epoch": 0.8054082638958366, "grad_norm": 0.4465146986441863, "learning_rate": 3.3890427561283344e-05, "loss": 3.623, "step": 3522 }, { "epoch": 0.8058656223648292, "grad_norm": 0.42235183259298376, "learning_rate": 3.38731144746161e-05, "loss": 3.6934, "step": 3524 }, { "epoch": 0.8063229808338217, "grad_norm": 0.3610141936890677, "learning_rate": 3.385579651780628e-05, "loss": 3.6054, "step": 3526 }, { "epoch": 0.8067803393028142, "grad_norm": 0.4517374663300466, "learning_rate": 3.383847370035912e-05, "loss": 3.7847, "step": 3528 }, { "epoch": 0.8072376977718068, "grad_norm": 0.3763521770498964, "learning_rate": 3.382114603178249e-05, "loss": 3.9278, "step": 3530 }, { "epoch": 0.8076950562407992, "grad_norm": 0.5066741802835913, "learning_rate": 3.380381352158697e-05, "loss": 3.7797, "step": 3532 }, { "epoch": 0.8081524147097917, "grad_norm": 0.41886744631448264, "learning_rate": 3.3786476179285745e-05, "loss": 3.7847, "step": 3534 }, { "epoch": 0.8086097731787842, "grad_norm": 0.44412367225575194, "learning_rate": 3.376913401439468e-05, "loss": 3.9674, "step": 3536 }, { "epoch": 0.8090671316477768, "grad_norm": 0.478660623808247, "learning_rate": 3.37517870364323e-05, "loss": 3.758, "step": 3538 }, { "epoch": 0.8095244901167693, "grad_norm": 0.6102246205882698, "learning_rate": 3.373443525491974e-05, "loss": 3.8085, "step": 3540 }, { "epoch": 0.8099818485857618, "grad_norm": 0.4541036790367762, "learning_rate": 3.371707867938079e-05, "loss": 3.9194, "step": 3542 }, { "epoch": 0.8104392070547544, "grad_norm": 0.5460517954502465, "learning_rate": 3.369971731934186e-05, "loss": 3.7441, "step": 3544 }, { "epoch": 0.8108965655237469, "grad_norm": 0.416061183960025, "learning_rate": 3.368235118433201e-05, "loss": 3.8559, "step": 3546 }, { "epoch": 0.8113539239927394, "grad_norm": 0.4962137032213832, "learning_rate": 3.3664980283882905e-05, "loss": 3.6596, "step": 3548 }, { "epoch": 0.811811282461732, "grad_norm": 0.5469057891897818, "learning_rate": 3.36476046275288e-05, "loss": 3.8509, "step": 3550 }, { "epoch": 0.8122686409307245, "grad_norm": 0.6386712683811865, "learning_rate": 3.363022422480662e-05, "loss": 3.7633, "step": 3552 }, { "epoch": 0.812725999399717, "grad_norm": 0.38321870544677217, "learning_rate": 3.3612839085255836e-05, "loss": 3.68, "step": 3554 }, { "epoch": 0.8131833578687095, "grad_norm": 0.3871786099907317, "learning_rate": 3.359544921841855e-05, "loss": 3.7872, "step": 3556 }, { "epoch": 0.8136407163377021, "grad_norm": 0.431009029015053, "learning_rate": 3.357805463383946e-05, "loss": 3.6871, "step": 3558 }, { "epoch": 0.8140980748066946, "grad_norm": 0.5556607908343656, "learning_rate": 3.356065534106584e-05, "loss": 3.835, "step": 3560 }, { "epoch": 0.8145554332756871, "grad_norm": 0.4266305904050093, "learning_rate": 3.354325134964755e-05, "loss": 3.8039, "step": 3562 }, { "epoch": 0.8150127917446797, "grad_norm": 0.4391228515578958, "learning_rate": 3.3525842669137046e-05, "loss": 3.7154, "step": 3564 }, { "epoch": 0.8154701502136722, "grad_norm": 0.5166735247443967, "learning_rate": 3.3508429309089334e-05, "loss": 3.6707, "step": 3566 }, { "epoch": 0.8159275086826647, "grad_norm": 0.4144065615053979, "learning_rate": 3.3491011279061996e-05, "loss": 3.7616, "step": 3568 }, { "epoch": 0.8163848671516573, "grad_norm": 0.5080168972941009, "learning_rate": 3.347358858861519e-05, "loss": 3.9227, "step": 3570 }, { "epoch": 0.8168422256206498, "grad_norm": 0.4659947962235026, "learning_rate": 3.345616124731162e-05, "loss": 3.825, "step": 3572 }, { "epoch": 0.8172995840896422, "grad_norm": 0.47065327803826695, "learning_rate": 3.3438729264716534e-05, "loss": 3.7437, "step": 3574 }, { "epoch": 0.8177569425586347, "grad_norm": 0.4960428046552968, "learning_rate": 3.342129265039775e-05, "loss": 3.7555, "step": 3576 }, { "epoch": 0.8182143010276273, "grad_norm": 0.3998451983897789, "learning_rate": 3.3403851413925614e-05, "loss": 3.8293, "step": 3578 }, { "epoch": 0.8186716594966198, "grad_norm": 0.420606507494237, "learning_rate": 3.338640556487301e-05, "loss": 3.9021, "step": 3580 }, { "epoch": 0.8191290179656123, "grad_norm": 0.4548664992581972, "learning_rate": 3.336895511281536e-05, "loss": 3.6885, "step": 3582 }, { "epoch": 0.8195863764346049, "grad_norm": 0.4468055924620551, "learning_rate": 3.3351500067330596e-05, "loss": 3.6476, "step": 3584 }, { "epoch": 0.8200437349035974, "grad_norm": 0.3899023619721872, "learning_rate": 3.3334040437999184e-05, "loss": 3.7661, "step": 3586 }, { "epoch": 0.8205010933725899, "grad_norm": 0.4536615023733629, "learning_rate": 3.331657623440412e-05, "loss": 3.8965, "step": 3588 }, { "epoch": 0.8209584518415824, "grad_norm": 0.4647897961503501, "learning_rate": 3.3299107466130884e-05, "loss": 3.7072, "step": 3590 }, { "epoch": 0.821415810310575, "grad_norm": 0.3799217223807014, "learning_rate": 3.3281634142767474e-05, "loss": 3.771, "step": 3592 }, { "epoch": 0.8218731687795675, "grad_norm": 0.4380396171772733, "learning_rate": 3.326415627390439e-05, "loss": 3.6249, "step": 3594 }, { "epoch": 0.82233052724856, "grad_norm": 0.4838014868263983, "learning_rate": 3.324667386913462e-05, "loss": 3.8264, "step": 3596 }, { "epoch": 0.8227878857175526, "grad_norm": 0.3819954190291601, "learning_rate": 3.322918693805365e-05, "loss": 3.6407, "step": 3598 }, { "epoch": 0.8232452441865451, "grad_norm": 0.4216812359921469, "learning_rate": 3.321169549025943e-05, "loss": 3.6274, "step": 3600 }, { "epoch": 0.8237026026555376, "grad_norm": 0.5385233352709071, "learning_rate": 3.319419953535242e-05, "loss": 3.7227, "step": 3602 }, { "epoch": 0.8241599611245302, "grad_norm": 0.5524503429675205, "learning_rate": 3.3176699082935545e-05, "loss": 3.7095, "step": 3604 }, { "epoch": 0.8246173195935227, "grad_norm": 0.4548035804468364, "learning_rate": 3.3159194142614175e-05, "loss": 3.6396, "step": 3606 }, { "epoch": 0.8250746780625152, "grad_norm": 0.4469688463705029, "learning_rate": 3.3141684723996165e-05, "loss": 3.6319, "step": 3608 }, { "epoch": 0.8255320365315078, "grad_norm": 0.43925899032224536, "learning_rate": 3.312417083669183e-05, "loss": 3.6725, "step": 3610 }, { "epoch": 0.8259893950005003, "grad_norm": 0.4100657590315923, "learning_rate": 3.310665249031392e-05, "loss": 3.7626, "step": 3612 }, { "epoch": 0.8264467534694928, "grad_norm": 0.3878288135278857, "learning_rate": 3.308912969447765e-05, "loss": 3.8346, "step": 3614 }, { "epoch": 0.8269041119384852, "grad_norm": 0.4407882753155129, "learning_rate": 3.307160245880068e-05, "loss": 3.7997, "step": 3616 }, { "epoch": 0.8273614704074778, "grad_norm": 0.4975290036972302, "learning_rate": 3.3054070792903073e-05, "loss": 3.743, "step": 3618 }, { "epoch": 0.8278188288764703, "grad_norm": 0.45287492033567894, "learning_rate": 3.3036534706407366e-05, "loss": 3.691, "step": 3620 }, { "epoch": 0.8282761873454628, "grad_norm": 0.38413933065404676, "learning_rate": 3.301899420893851e-05, "loss": 3.6947, "step": 3622 }, { "epoch": 0.8287335458144554, "grad_norm": 0.5219673748837554, "learning_rate": 3.3001449310123854e-05, "loss": 3.8363, "step": 3624 }, { "epoch": 0.8291909042834479, "grad_norm": 0.4563879392425312, "learning_rate": 3.2983900019593195e-05, "loss": 3.7478, "step": 3626 }, { "epoch": 0.8296482627524404, "grad_norm": 0.41323847918942963, "learning_rate": 3.296634634697871e-05, "loss": 3.688, "step": 3628 }, { "epoch": 0.8301056212214329, "grad_norm": 0.4355030294339246, "learning_rate": 3.294878830191501e-05, "loss": 3.8204, "step": 3630 }, { "epoch": 0.8305629796904255, "grad_norm": 0.5307588831624893, "learning_rate": 3.2931225894039084e-05, "loss": 3.8007, "step": 3632 }, { "epoch": 0.831020338159418, "grad_norm": 0.3585407483859201, "learning_rate": 3.291365913299033e-05, "loss": 3.7018, "step": 3634 }, { "epoch": 0.8314776966284105, "grad_norm": 0.44873993275222923, "learning_rate": 3.2896088028410534e-05, "loss": 3.8399, "step": 3636 }, { "epoch": 0.8319350550974031, "grad_norm": 0.423201259998355, "learning_rate": 3.287851258994385e-05, "loss": 3.688, "step": 3638 }, { "epoch": 0.8323924135663956, "grad_norm": 0.3727534473267454, "learning_rate": 3.286093282723682e-05, "loss": 3.8436, "step": 3640 }, { "epoch": 0.8328497720353881, "grad_norm": 0.5605196265001059, "learning_rate": 3.2843348749938376e-05, "loss": 3.7772, "step": 3642 }, { "epoch": 0.8333071305043807, "grad_norm": 0.5115344174179982, "learning_rate": 3.28257603676998e-05, "loss": 3.5949, "step": 3644 }, { "epoch": 0.8337644889733732, "grad_norm": 0.4600324959783095, "learning_rate": 3.280816769017473e-05, "loss": 3.834, "step": 3646 }, { "epoch": 0.8342218474423657, "grad_norm": 0.5123458688685749, "learning_rate": 3.279057072701918e-05, "loss": 3.6878, "step": 3648 }, { "epoch": 0.8346792059113582, "grad_norm": 0.5002984407964486, "learning_rate": 3.2772969487891514e-05, "loss": 3.8555, "step": 3650 }, { "epoch": 0.8351365643803508, "grad_norm": 0.42283547527202753, "learning_rate": 3.275536398245243e-05, "loss": 3.6961, "step": 3652 }, { "epoch": 0.8355939228493433, "grad_norm": 0.41880176992807505, "learning_rate": 3.273775422036498e-05, "loss": 3.7126, "step": 3654 }, { "epoch": 0.8360512813183358, "grad_norm": 0.3993702050639071, "learning_rate": 3.2720140211294525e-05, "loss": 3.6455, "step": 3656 }, { "epoch": 0.8365086397873283, "grad_norm": 0.4907457336768913, "learning_rate": 3.270252196490881e-05, "loss": 3.5651, "step": 3658 }, { "epoch": 0.8369659982563208, "grad_norm": 0.4165157124141858, "learning_rate": 3.268489949087786e-05, "loss": 3.7057, "step": 3660 }, { "epoch": 0.8374233567253133, "grad_norm": 0.5453917688943265, "learning_rate": 3.266727279887404e-05, "loss": 3.8323, "step": 3662 }, { "epoch": 0.8378807151943058, "grad_norm": 0.5462738275023149, "learning_rate": 3.264964189857202e-05, "loss": 3.7684, "step": 3664 }, { "epoch": 0.8383380736632984, "grad_norm": 0.6203180903800212, "learning_rate": 3.263200679964879e-05, "loss": 3.9536, "step": 3666 }, { "epoch": 0.8387954321322909, "grad_norm": 0.41901560729395504, "learning_rate": 3.2614367511783636e-05, "loss": 3.7845, "step": 3668 }, { "epoch": 0.8392527906012834, "grad_norm": 0.6062752059730424, "learning_rate": 3.259672404465815e-05, "loss": 3.7106, "step": 3670 }, { "epoch": 0.839710149070276, "grad_norm": 0.427312425096457, "learning_rate": 3.257907640795622e-05, "loss": 4.0044, "step": 3672 }, { "epoch": 0.8401675075392685, "grad_norm": 0.43637700366322646, "learning_rate": 3.2561424611364e-05, "loss": 3.676, "step": 3674 }, { "epoch": 0.840624866008261, "grad_norm": 0.4607297784897688, "learning_rate": 3.2543768664569966e-05, "loss": 3.9212, "step": 3676 }, { "epoch": 0.8410822244772536, "grad_norm": 0.36389739586364056, "learning_rate": 3.2526108577264834e-05, "loss": 3.8987, "step": 3678 }, { "epoch": 0.8415395829462461, "grad_norm": 0.4797266524797574, "learning_rate": 3.250844435914162e-05, "loss": 3.9284, "step": 3680 }, { "epoch": 0.8419969414152386, "grad_norm": 0.4429382306977418, "learning_rate": 3.249077601989558e-05, "loss": 3.8695, "step": 3682 }, { "epoch": 0.8424542998842312, "grad_norm": 0.3958557670493589, "learning_rate": 3.247310356922428e-05, "loss": 3.6824, "step": 3684 }, { "epoch": 0.8429116583532237, "grad_norm": 0.3712182890067637, "learning_rate": 3.2455427016827477e-05, "loss": 3.6358, "step": 3686 }, { "epoch": 0.8433690168222162, "grad_norm": 0.44486508563817506, "learning_rate": 3.243774637240723e-05, "loss": 3.6789, "step": 3688 }, { "epoch": 0.8438263752912087, "grad_norm": 0.45149501771490813, "learning_rate": 3.242006164566782e-05, "loss": 3.6926, "step": 3690 }, { "epoch": 0.8442837337602013, "grad_norm": 0.4885269309900831, "learning_rate": 3.2402372846315785e-05, "loss": 3.7033, "step": 3692 }, { "epoch": 0.8447410922291938, "grad_norm": 0.8315786644111065, "learning_rate": 3.238467998405988e-05, "loss": 3.7217, "step": 3694 }, { "epoch": 0.8451984506981863, "grad_norm": 0.40514765246677575, "learning_rate": 3.23669830686111e-05, "loss": 3.7752, "step": 3696 }, { "epoch": 0.8456558091671789, "grad_norm": 0.4291059025860556, "learning_rate": 3.2349282109682665e-05, "loss": 3.6988, "step": 3698 }, { "epoch": 0.8461131676361714, "grad_norm": 0.4579408023184631, "learning_rate": 3.233157711699002e-05, "loss": 3.6935, "step": 3700 }, { "epoch": 0.8465705261051638, "grad_norm": 0.46208158846434855, "learning_rate": 3.231386810025081e-05, "loss": 3.7533, "step": 3702 }, { "epoch": 0.8470278845741563, "grad_norm": 0.5085085573348463, "learning_rate": 3.229615506918489e-05, "loss": 3.6094, "step": 3704 }, { "epoch": 0.8474852430431489, "grad_norm": 0.5039851400440224, "learning_rate": 3.227843803351434e-05, "loss": 3.7266, "step": 3706 }, { "epoch": 0.8479426015121414, "grad_norm": 0.4407101941095418, "learning_rate": 3.226071700296341e-05, "loss": 3.7304, "step": 3708 }, { "epoch": 0.8483999599811339, "grad_norm": 0.5902412540380796, "learning_rate": 3.224299198725855e-05, "loss": 3.6376, "step": 3710 }, { "epoch": 0.8488573184501265, "grad_norm": 0.5788135552165508, "learning_rate": 3.222526299612842e-05, "loss": 3.7739, "step": 3712 }, { "epoch": 0.849314676919119, "grad_norm": 0.3806217919764134, "learning_rate": 3.220753003930382e-05, "loss": 3.7433, "step": 3714 }, { "epoch": 0.8497720353881115, "grad_norm": 0.4621369709149404, "learning_rate": 3.218979312651778e-05, "loss": 3.8062, "step": 3716 }, { "epoch": 0.850229393857104, "grad_norm": 0.40086946464692197, "learning_rate": 3.217205226750545e-05, "loss": 3.706, "step": 3718 }, { "epoch": 0.8506867523260966, "grad_norm": 0.4544516577720729, "learning_rate": 3.215430747200416e-05, "loss": 3.7709, "step": 3720 }, { "epoch": 0.8511441107950891, "grad_norm": 0.3954432305248917, "learning_rate": 3.213655874975344e-05, "loss": 3.7724, "step": 3722 }, { "epoch": 0.8516014692640816, "grad_norm": 0.5528961591886957, "learning_rate": 3.211880611049492e-05, "loss": 3.6969, "step": 3724 }, { "epoch": 0.8520588277330742, "grad_norm": 0.3742742071292524, "learning_rate": 3.21010495639724e-05, "loss": 3.6471, "step": 3726 }, { "epoch": 0.8525161862020667, "grad_norm": 0.5259729441204186, "learning_rate": 3.208328911993185e-05, "loss": 3.8199, "step": 3728 }, { "epoch": 0.8529735446710592, "grad_norm": 0.42955911625750876, "learning_rate": 3.206552478812133e-05, "loss": 3.5856, "step": 3730 }, { "epoch": 0.8534309031400518, "grad_norm": 0.4296562183437548, "learning_rate": 3.2047756578291085e-05, "loss": 3.7852, "step": 3732 }, { "epoch": 0.8538882616090443, "grad_norm": 0.4541911703318133, "learning_rate": 3.202998450019345e-05, "loss": 3.7166, "step": 3734 }, { "epoch": 0.8543456200780368, "grad_norm": 0.6106162512005093, "learning_rate": 3.2012208563582906e-05, "loss": 3.9606, "step": 3736 }, { "epoch": 0.8548029785470294, "grad_norm": 0.5456520302403286, "learning_rate": 3.1994428778216036e-05, "loss": 3.8368, "step": 3738 }, { "epoch": 0.8552603370160219, "grad_norm": 0.5834185305322563, "learning_rate": 3.197664515385156e-05, "loss": 3.8677, "step": 3740 }, { "epoch": 0.8557176954850144, "grad_norm": 0.4439911395350862, "learning_rate": 3.195885770025026e-05, "loss": 3.6589, "step": 3742 }, { "epoch": 0.8561750539540068, "grad_norm": 0.34515759156546194, "learning_rate": 3.194106642717508e-05, "loss": 3.7389, "step": 3744 }, { "epoch": 0.8566324124229994, "grad_norm": 0.3849261379915617, "learning_rate": 3.1923271344391e-05, "loss": 3.6793, "step": 3746 }, { "epoch": 0.8570897708919919, "grad_norm": 0.5111825769357381, "learning_rate": 3.190547246166513e-05, "loss": 3.7454, "step": 3748 }, { "epoch": 0.8575471293609844, "grad_norm": 0.5411404352099385, "learning_rate": 3.188766978876666e-05, "loss": 3.8404, "step": 3750 }, { "epoch": 0.858004487829977, "grad_norm": 0.42545829563976917, "learning_rate": 3.1869863335466847e-05, "loss": 3.5795, "step": 3752 }, { "epoch": 0.8584618462989695, "grad_norm": 0.5066072980967996, "learning_rate": 3.1852053111539044e-05, "loss": 3.6807, "step": 3754 }, { "epoch": 0.858919204767962, "grad_norm": 0.48938416088781594, "learning_rate": 3.1834239126758644e-05, "loss": 3.6605, "step": 3756 }, { "epoch": 0.8593765632369545, "grad_norm": 0.4609584318677727, "learning_rate": 3.181642139090314e-05, "loss": 3.7297, "step": 3758 }, { "epoch": 0.8598339217059471, "grad_norm": 0.4949772804517922, "learning_rate": 3.179859991375204e-05, "loss": 3.8276, "step": 3760 }, { "epoch": 0.8602912801749396, "grad_norm": 0.46692539185705, "learning_rate": 3.178077470508696e-05, "loss": 3.6586, "step": 3762 }, { "epoch": 0.8607486386439321, "grad_norm": 0.4503875950680773, "learning_rate": 3.1762945774691506e-05, "loss": 3.7509, "step": 3764 }, { "epoch": 0.8612059971129247, "grad_norm": 0.4792987876300533, "learning_rate": 3.1745113132351376e-05, "loss": 3.6941, "step": 3766 }, { "epoch": 0.8616633555819172, "grad_norm": 0.39173928595366186, "learning_rate": 3.172727678785427e-05, "loss": 3.6572, "step": 3768 }, { "epoch": 0.8621207140509097, "grad_norm": 0.4026238105623713, "learning_rate": 3.170943675098994e-05, "loss": 3.5588, "step": 3770 }, { "epoch": 0.8625780725199023, "grad_norm": 0.4149891183759372, "learning_rate": 3.169159303155017e-05, "loss": 3.8561, "step": 3772 }, { "epoch": 0.8630354309888948, "grad_norm": 0.4206742222738918, "learning_rate": 3.1673745639328737e-05, "loss": 3.8537, "step": 3774 }, { "epoch": 0.8634927894578873, "grad_norm": 0.5667544117012507, "learning_rate": 3.165589458412145e-05, "loss": 3.6492, "step": 3776 }, { "epoch": 0.8639501479268799, "grad_norm": 0.416658772950104, "learning_rate": 3.163803987572615e-05, "loss": 3.7311, "step": 3778 }, { "epoch": 0.8644075063958724, "grad_norm": 0.4133067161054988, "learning_rate": 3.1620181523942645e-05, "loss": 3.7237, "step": 3780 }, { "epoch": 0.8648648648648649, "grad_norm": 0.38605955564522637, "learning_rate": 3.1602319538572766e-05, "loss": 3.7288, "step": 3782 }, { "epoch": 0.8653222233338574, "grad_norm": 0.3531371245370439, "learning_rate": 3.158445392942033e-05, "loss": 3.7778, "step": 3784 }, { "epoch": 0.8657795818028499, "grad_norm": 0.4680216216468356, "learning_rate": 3.156658470629115e-05, "loss": 3.838, "step": 3786 }, { "epoch": 0.8662369402718424, "grad_norm": 0.4542010126630042, "learning_rate": 3.154871187899302e-05, "loss": 3.6913, "step": 3788 }, { "epoch": 0.8666942987408349, "grad_norm": 0.39288270816044174, "learning_rate": 3.15308354573357e-05, "loss": 3.8193, "step": 3790 }, { "epoch": 0.8671516572098275, "grad_norm": 0.4133682008073959, "learning_rate": 3.151295545113095e-05, "loss": 3.7259, "step": 3792 }, { "epoch": 0.86760901567882, "grad_norm": 0.49383390655295484, "learning_rate": 3.1495071870192465e-05, "loss": 3.7498, "step": 3794 }, { "epoch": 0.8680663741478125, "grad_norm": 0.4551810610681232, "learning_rate": 3.147718472433593e-05, "loss": 3.7883, "step": 3796 }, { "epoch": 0.868523732616805, "grad_norm": 0.4813239232793019, "learning_rate": 3.145929402337896e-05, "loss": 3.6768, "step": 3798 }, { "epoch": 0.8689810910857976, "grad_norm": 0.4334697889857738, "learning_rate": 3.144139977714115e-05, "loss": 3.685, "step": 3800 }, { "epoch": 0.8694384495547901, "grad_norm": 0.38880962058765445, "learning_rate": 3.142350199544403e-05, "loss": 3.7347, "step": 3802 }, { "epoch": 0.8698958080237826, "grad_norm": 0.41665566151708366, "learning_rate": 3.140560068811104e-05, "loss": 3.9073, "step": 3804 }, { "epoch": 0.8703531664927752, "grad_norm": 0.42942234874896723, "learning_rate": 3.1387695864967614e-05, "loss": 3.7393, "step": 3806 }, { "epoch": 0.8708105249617677, "grad_norm": 0.4021010361202014, "learning_rate": 3.136978753584107e-05, "loss": 3.5565, "step": 3808 }, { "epoch": 0.8712678834307602, "grad_norm": 0.4815162157693268, "learning_rate": 3.135187571056065e-05, "loss": 3.8723, "step": 3810 }, { "epoch": 0.8717252418997528, "grad_norm": 0.5928783292522086, "learning_rate": 3.1333960398957566e-05, "loss": 3.8489, "step": 3812 }, { "epoch": 0.8721826003687453, "grad_norm": 0.4432183600190478, "learning_rate": 3.131604161086486e-05, "loss": 3.8649, "step": 3814 }, { "epoch": 0.8726399588377378, "grad_norm": 0.43638186964748643, "learning_rate": 3.129811935611757e-05, "loss": 3.633, "step": 3816 }, { "epoch": 0.8730973173067303, "grad_norm": 0.4998167715516185, "learning_rate": 3.128019364455258e-05, "loss": 3.7861, "step": 3818 }, { "epoch": 0.8735546757757229, "grad_norm": 0.60905245045043, "learning_rate": 3.126226448600868e-05, "loss": 3.6353, "step": 3820 }, { "epoch": 0.8740120342447154, "grad_norm": 0.4034724652823444, "learning_rate": 3.124433189032656e-05, "loss": 3.3877, "step": 3822 }, { "epoch": 0.8744693927137079, "grad_norm": 0.37340813764752195, "learning_rate": 3.122639586734881e-05, "loss": 3.7796, "step": 3824 }, { "epoch": 0.8749267511827005, "grad_norm": 0.41885819674228075, "learning_rate": 3.1208456426919876e-05, "loss": 3.768, "step": 3826 }, { "epoch": 0.8753841096516929, "grad_norm": 0.44917103057833974, "learning_rate": 3.119051357888608e-05, "loss": 3.7582, "step": 3828 }, { "epoch": 0.8758414681206854, "grad_norm": 0.42667957348164154, "learning_rate": 3.117256733309565e-05, "loss": 3.7078, "step": 3830 }, { "epoch": 0.876298826589678, "grad_norm": 0.43122780162235796, "learning_rate": 3.115461769939862e-05, "loss": 3.7635, "step": 3832 }, { "epoch": 0.8767561850586705, "grad_norm": 0.363711604684245, "learning_rate": 3.113666468764695e-05, "loss": 3.5817, "step": 3834 }, { "epoch": 0.877213543527663, "grad_norm": 0.43882076966958816, "learning_rate": 3.1118708307694414e-05, "loss": 3.7281, "step": 3836 }, { "epoch": 0.8776709019966555, "grad_norm": 0.42269554519611663, "learning_rate": 3.110074856939662e-05, "loss": 3.6577, "step": 3838 }, { "epoch": 0.8781282604656481, "grad_norm": 0.32118185045937814, "learning_rate": 3.1082785482611054e-05, "loss": 3.5827, "step": 3840 }, { "epoch": 0.8785856189346406, "grad_norm": 0.4423364105478104, "learning_rate": 3.106481905719703e-05, "loss": 3.5903, "step": 3842 }, { "epoch": 0.8790429774036331, "grad_norm": 0.4509315015535317, "learning_rate": 3.104684930301569e-05, "loss": 3.97, "step": 3844 }, { "epoch": 0.8795003358726257, "grad_norm": 0.4254544079815276, "learning_rate": 3.102887622993001e-05, "loss": 3.6602, "step": 3846 }, { "epoch": 0.8799576943416182, "grad_norm": 0.47350441676942906, "learning_rate": 3.1010899847804766e-05, "loss": 3.727, "step": 3848 }, { "epoch": 0.8804150528106107, "grad_norm": 0.5040552673010138, "learning_rate": 3.099292016650658e-05, "loss": 3.8261, "step": 3850 }, { "epoch": 0.8808724112796033, "grad_norm": 0.4527827812167353, "learning_rate": 3.097493719590387e-05, "loss": 3.7218, "step": 3852 }, { "epoch": 0.8813297697485958, "grad_norm": 0.5475431175759422, "learning_rate": 3.0956950945866844e-05, "loss": 3.6993, "step": 3854 }, { "epoch": 0.8817871282175883, "grad_norm": 0.3565910355017739, "learning_rate": 3.093896142626755e-05, "loss": 3.6117, "step": 3856 }, { "epoch": 0.8822444866865808, "grad_norm": 0.5396270836088961, "learning_rate": 3.0920968646979794e-05, "loss": 3.6616, "step": 3858 }, { "epoch": 0.8827018451555734, "grad_norm": 0.44887094716701936, "learning_rate": 3.090297261787919e-05, "loss": 3.77, "step": 3860 }, { "epoch": 0.8831592036245659, "grad_norm": 0.3702691402884174, "learning_rate": 3.0884973348843126e-05, "loss": 3.7467, "step": 3862 }, { "epoch": 0.8836165620935584, "grad_norm": 0.37935656893985575, "learning_rate": 3.086697084975076e-05, "loss": 3.6025, "step": 3864 }, { "epoch": 0.884073920562551, "grad_norm": 0.4755103268529848, "learning_rate": 3.084896513048306e-05, "loss": 3.6344, "step": 3866 }, { "epoch": 0.8845312790315435, "grad_norm": 0.44679043443189653, "learning_rate": 3.0830956200922723e-05, "loss": 3.7741, "step": 3868 }, { "epoch": 0.8849886375005359, "grad_norm": 0.45101427240266345, "learning_rate": 3.081294407095421e-05, "loss": 3.616, "step": 3870 }, { "epoch": 0.8854459959695284, "grad_norm": 0.4925733295301023, "learning_rate": 3.079492875046377e-05, "loss": 3.7483, "step": 3872 }, { "epoch": 0.885903354438521, "grad_norm": 0.49605289801654745, "learning_rate": 3.0776910249339375e-05, "loss": 3.7728, "step": 3874 }, { "epoch": 0.8863607129075135, "grad_norm": 0.5433401580198606, "learning_rate": 3.0758888577470744e-05, "loss": 3.6312, "step": 3876 }, { "epoch": 0.886818071376506, "grad_norm": 0.5011466706048979, "learning_rate": 3.074086374474935e-05, "loss": 3.7261, "step": 3878 }, { "epoch": 0.8872754298454986, "grad_norm": 0.40156932483617763, "learning_rate": 3.07228357610684e-05, "loss": 3.6667, "step": 3880 }, { "epoch": 0.8877327883144911, "grad_norm": 0.31857704257233943, "learning_rate": 3.070480463632282e-05, "loss": 3.6771, "step": 3882 }, { "epoch": 0.8881901467834836, "grad_norm": 0.49029589263235046, "learning_rate": 3.068677038040925e-05, "loss": 3.7791, "step": 3884 }, { "epoch": 0.8886475052524762, "grad_norm": 0.43972054715874054, "learning_rate": 3.066873300322608e-05, "loss": 3.6955, "step": 3886 }, { "epoch": 0.8891048637214687, "grad_norm": 0.3483392350234565, "learning_rate": 3.065069251467339e-05, "loss": 3.7299, "step": 3888 }, { "epoch": 0.8895622221904612, "grad_norm": 0.4217270662407163, "learning_rate": 3.063264892465299e-05, "loss": 3.7282, "step": 3890 }, { "epoch": 0.8900195806594537, "grad_norm": 0.5399287078297009, "learning_rate": 3.0614602243068344e-05, "loss": 3.5997, "step": 3892 }, { "epoch": 0.8904769391284463, "grad_norm": 0.4878694705332139, "learning_rate": 3.059655247982467e-05, "loss": 3.7588, "step": 3894 }, { "epoch": 0.8909342975974388, "grad_norm": 0.42296281720930334, "learning_rate": 3.0578499644828845e-05, "loss": 3.8763, "step": 3896 }, { "epoch": 0.8913916560664313, "grad_norm": 0.4093089661957519, "learning_rate": 3.056044374798944e-05, "loss": 3.6426, "step": 3898 }, { "epoch": 0.8918490145354239, "grad_norm": 0.3298594184742828, "learning_rate": 3.054238479921671e-05, "loss": 3.641, "step": 3900 }, { "epoch": 0.8923063730044164, "grad_norm": 0.4989727166185952, "learning_rate": 3.0524322808422565e-05, "loss": 3.8151, "step": 3902 }, { "epoch": 0.8927637314734089, "grad_norm": 0.3938102365769879, "learning_rate": 3.0506257785520624e-05, "loss": 3.7842, "step": 3904 }, { "epoch": 0.8932210899424015, "grad_norm": 0.41791104426394143, "learning_rate": 3.0488189740426133e-05, "loss": 3.8342, "step": 3906 }, { "epoch": 0.893678448411394, "grad_norm": 0.4327220230006306, "learning_rate": 3.0470118683056016e-05, "loss": 3.7366, "step": 3908 }, { "epoch": 0.8941358068803865, "grad_norm": 0.35862383913614815, "learning_rate": 3.0452044623328835e-05, "loss": 3.8079, "step": 3910 }, { "epoch": 0.8945931653493789, "grad_norm": 0.520241474195404, "learning_rate": 3.0433967571164823e-05, "loss": 3.6211, "step": 3912 }, { "epoch": 0.8950505238183715, "grad_norm": 0.43572628277760467, "learning_rate": 3.0415887536485836e-05, "loss": 3.6008, "step": 3914 }, { "epoch": 0.895507882287364, "grad_norm": 0.5524217510047917, "learning_rate": 3.039780452921538e-05, "loss": 3.7577, "step": 3916 }, { "epoch": 0.8959652407563565, "grad_norm": 0.38644208548322584, "learning_rate": 3.0379718559278575e-05, "loss": 3.7728, "step": 3918 }, { "epoch": 0.8964225992253491, "grad_norm": 0.42542528443881517, "learning_rate": 3.0361629636602184e-05, "loss": 3.7734, "step": 3920 }, { "epoch": 0.8968799576943416, "grad_norm": 0.45582803652799153, "learning_rate": 3.0343537771114583e-05, "loss": 3.7058, "step": 3922 }, { "epoch": 0.8973373161633341, "grad_norm": 0.42061908417349214, "learning_rate": 3.032544297274577e-05, "loss": 3.7421, "step": 3924 }, { "epoch": 0.8977946746323267, "grad_norm": 0.44766074639370673, "learning_rate": 3.0307345251427344e-05, "loss": 3.5887, "step": 3926 }, { "epoch": 0.8982520331013192, "grad_norm": 0.4982877859247366, "learning_rate": 3.0289244617092506e-05, "loss": 3.7717, "step": 3928 }, { "epoch": 0.8987093915703117, "grad_norm": 0.4711685412299576, "learning_rate": 3.027114107967608e-05, "loss": 3.7485, "step": 3930 }, { "epoch": 0.8991667500393042, "grad_norm": 0.38863405206566776, "learning_rate": 3.0253034649114442e-05, "loss": 3.5179, "step": 3932 }, { "epoch": 0.8996241085082968, "grad_norm": 0.4201434770531495, "learning_rate": 3.0234925335345595e-05, "loss": 3.6304, "step": 3934 }, { "epoch": 0.9000814669772893, "grad_norm": 0.4639479877972599, "learning_rate": 3.0216813148309103e-05, "loss": 3.7364, "step": 3936 }, { "epoch": 0.9005388254462818, "grad_norm": 0.3860623539965568, "learning_rate": 3.0198698097946117e-05, "loss": 3.6389, "step": 3938 }, { "epoch": 0.9009961839152744, "grad_norm": 0.5100144696052964, "learning_rate": 3.0180580194199348e-05, "loss": 3.6747, "step": 3940 }, { "epoch": 0.9014535423842669, "grad_norm": 0.3165447261749984, "learning_rate": 3.0162459447013085e-05, "loss": 3.8278, "step": 3942 }, { "epoch": 0.9019109008532594, "grad_norm": 0.46961470955506557, "learning_rate": 3.0144335866333173e-05, "loss": 3.7323, "step": 3944 }, { "epoch": 0.902368259322252, "grad_norm": 0.48269443302887866, "learning_rate": 3.0126209462107014e-05, "loss": 3.7443, "step": 3946 }, { "epoch": 0.9028256177912445, "grad_norm": 0.42237580561949123, "learning_rate": 3.010808024428356e-05, "loss": 3.6266, "step": 3948 }, { "epoch": 0.903282976260237, "grad_norm": 0.41447723059702585, "learning_rate": 3.0089948222813297e-05, "loss": 3.6318, "step": 3950 }, { "epoch": 0.9037403347292295, "grad_norm": 0.4358876182703237, "learning_rate": 3.0071813407648265e-05, "loss": 3.4978, "step": 3952 }, { "epoch": 0.9041976931982221, "grad_norm": 0.49129562709608493, "learning_rate": 3.0053675808742028e-05, "loss": 3.6261, "step": 3954 }, { "epoch": 0.9046550516672145, "grad_norm": 0.48923043119215165, "learning_rate": 3.0035535436049678e-05, "loss": 3.7221, "step": 3956 }, { "epoch": 0.905112410136207, "grad_norm": 0.3690543906591148, "learning_rate": 3.0017392299527835e-05, "loss": 3.7222, "step": 3958 }, { "epoch": 0.9055697686051996, "grad_norm": 0.4414020639406735, "learning_rate": 2.9999246409134635e-05, "loss": 3.5938, "step": 3960 }, { "epoch": 0.9060271270741921, "grad_norm": 0.38060942052642577, "learning_rate": 2.9981097774829713e-05, "loss": 3.778, "step": 3962 }, { "epoch": 0.9064844855431846, "grad_norm": 0.5261602331061599, "learning_rate": 2.9962946406574242e-05, "loss": 3.5259, "step": 3964 }, { "epoch": 0.9069418440121771, "grad_norm": 0.42863967755124366, "learning_rate": 2.9944792314330853e-05, "loss": 3.7273, "step": 3966 }, { "epoch": 0.9073992024811697, "grad_norm": 0.5200486439180624, "learning_rate": 2.9926635508063696e-05, "loss": 3.8336, "step": 3968 }, { "epoch": 0.9078565609501622, "grad_norm": 0.5512527720063767, "learning_rate": 2.9908475997738416e-05, "loss": 3.5691, "step": 3970 }, { "epoch": 0.9083139194191547, "grad_norm": 0.4057895178057235, "learning_rate": 2.9890313793322127e-05, "loss": 3.6379, "step": 3972 }, { "epoch": 0.9087712778881473, "grad_norm": 0.43593236516847533, "learning_rate": 2.9872148904783436e-05, "loss": 3.7791, "step": 3974 }, { "epoch": 0.9092286363571398, "grad_norm": 0.4151136084535376, "learning_rate": 2.9853981342092406e-05, "loss": 3.5697, "step": 3976 }, { "epoch": 0.9096859948261323, "grad_norm": 0.4487006973502205, "learning_rate": 2.9835811115220584e-05, "loss": 3.6888, "step": 3978 }, { "epoch": 0.9101433532951249, "grad_norm": 0.5698116402434925, "learning_rate": 2.9817638234140975e-05, "loss": 3.7152, "step": 3980 }, { "epoch": 0.9106007117641174, "grad_norm": 0.3832380304178157, "learning_rate": 2.9799462708828034e-05, "loss": 3.803, "step": 3982 }, { "epoch": 0.9110580702331099, "grad_norm": 0.2962919834825323, "learning_rate": 2.978128454925767e-05, "loss": 3.6081, "step": 3984 }, { "epoch": 0.9115154287021024, "grad_norm": 0.41821754038776254, "learning_rate": 2.9763103765407246e-05, "loss": 3.6638, "step": 3986 }, { "epoch": 0.911972787171095, "grad_norm": 0.4441385690176825, "learning_rate": 2.974492036725555e-05, "loss": 3.643, "step": 3988 }, { "epoch": 0.9124301456400875, "grad_norm": 0.43943862891130187, "learning_rate": 2.9726734364782827e-05, "loss": 3.7349, "step": 3990 }, { "epoch": 0.91288750410908, "grad_norm": 0.43166432380347325, "learning_rate": 2.970854576797073e-05, "loss": 3.5048, "step": 3992 }, { "epoch": 0.9133448625780726, "grad_norm": 0.33276294419901065, "learning_rate": 2.9690354586802342e-05, "loss": 3.7739, "step": 3994 }, { "epoch": 0.9138022210470651, "grad_norm": 0.49858834247292705, "learning_rate": 2.967216083126217e-05, "loss": 3.6668, "step": 3996 }, { "epoch": 0.9142595795160575, "grad_norm": 0.42544666758321714, "learning_rate": 2.9653964511336134e-05, "loss": 3.774, "step": 3998 }, { "epoch": 0.91471693798505, "grad_norm": 0.4640245512014409, "learning_rate": 2.9635765637011558e-05, "loss": 3.8177, "step": 4000 }, { "epoch": 0.9151742964540426, "grad_norm": 0.5028915538960013, "learning_rate": 2.9617564218277156e-05, "loss": 3.653, "step": 4002 }, { "epoch": 0.9156316549230351, "grad_norm": 0.4525726338834894, "learning_rate": 2.959936026512307e-05, "loss": 3.5576, "step": 4004 }, { "epoch": 0.9160890133920276, "grad_norm": 0.3902919112090336, "learning_rate": 2.95811537875408e-05, "loss": 3.706, "step": 4006 }, { "epoch": 0.9165463718610202, "grad_norm": 0.4583573298963765, "learning_rate": 2.9562944795523256e-05, "loss": 3.7205, "step": 4008 }, { "epoch": 0.9170037303300127, "grad_norm": 0.4027466099653823, "learning_rate": 2.954473329906471e-05, "loss": 3.4986, "step": 4010 }, { "epoch": 0.9174610887990052, "grad_norm": 0.5424383028606614, "learning_rate": 2.9526519308160816e-05, "loss": 3.5863, "step": 4012 }, { "epoch": 0.9179184472679978, "grad_norm": 0.524564014783054, "learning_rate": 2.9508302832808603e-05, "loss": 3.5562, "step": 4014 }, { "epoch": 0.9183758057369903, "grad_norm": 0.43658600429211314, "learning_rate": 2.9490083883006448e-05, "loss": 3.6012, "step": 4016 }, { "epoch": 0.9188331642059828, "grad_norm": 0.48707426811910826, "learning_rate": 2.947186246875411e-05, "loss": 3.6575, "step": 4018 }, { "epoch": 0.9192905226749754, "grad_norm": 0.5007716125893995, "learning_rate": 2.945363860005268e-05, "loss": 3.7513, "step": 4020 }, { "epoch": 0.9197478811439679, "grad_norm": 0.4354786840865428, "learning_rate": 2.94354122869046e-05, "loss": 3.7187, "step": 4022 }, { "epoch": 0.9202052396129604, "grad_norm": 0.4520752520065985, "learning_rate": 2.9417183539313654e-05, "loss": 3.7935, "step": 4024 }, { "epoch": 0.9206625980819529, "grad_norm": 0.364196935889873, "learning_rate": 2.9398952367284978e-05, "loss": 3.6567, "step": 4026 }, { "epoch": 0.9211199565509455, "grad_norm": 0.41659244432588827, "learning_rate": 2.938071878082501e-05, "loss": 3.6673, "step": 4028 }, { "epoch": 0.921577315019938, "grad_norm": 0.5798211604256233, "learning_rate": 2.9362482789941537e-05, "loss": 3.707, "step": 4030 }, { "epoch": 0.9220346734889305, "grad_norm": 0.39370917608164385, "learning_rate": 2.934424440464366e-05, "loss": 3.7164, "step": 4032 }, { "epoch": 0.9224920319579231, "grad_norm": 0.42584569250872956, "learning_rate": 2.9326003634941784e-05, "loss": 3.7004, "step": 4034 }, { "epoch": 0.9229493904269156, "grad_norm": 0.44574584288164276, "learning_rate": 2.9307760490847636e-05, "loss": 3.6718, "step": 4036 }, { "epoch": 0.9234067488959081, "grad_norm": 0.5131741567337438, "learning_rate": 2.928951498237424e-05, "loss": 3.6343, "step": 4038 }, { "epoch": 0.9238641073649005, "grad_norm": 0.48226221817224707, "learning_rate": 2.927126711953591e-05, "loss": 3.638, "step": 4040 }, { "epoch": 0.9243214658338931, "grad_norm": 0.46172060958048916, "learning_rate": 2.9253016912348274e-05, "loss": 3.5599, "step": 4042 }, { "epoch": 0.9247788243028856, "grad_norm": 0.41235008260902567, "learning_rate": 2.9234764370828227e-05, "loss": 3.6057, "step": 4044 }, { "epoch": 0.9252361827718781, "grad_norm": 0.4066366959527684, "learning_rate": 2.9216509504993943e-05, "loss": 3.6636, "step": 4046 }, { "epoch": 0.9256935412408707, "grad_norm": 0.4928050783622937, "learning_rate": 2.91982523248649e-05, "loss": 3.8753, "step": 4048 }, { "epoch": 0.9261508997098632, "grad_norm": 0.44719330648601857, "learning_rate": 2.917999284046181e-05, "loss": 3.7419, "step": 4050 }, { "epoch": 0.9266082581788557, "grad_norm": 0.49974697386534844, "learning_rate": 2.9161731061806663e-05, "loss": 3.6794, "step": 4052 }, { "epoch": 0.9270656166478483, "grad_norm": 0.4618063835464869, "learning_rate": 2.9143466998922726e-05, "loss": 3.7771, "step": 4054 }, { "epoch": 0.9275229751168408, "grad_norm": 0.4879419444866207, "learning_rate": 2.9125200661834496e-05, "loss": 3.7583, "step": 4056 }, { "epoch": 0.9279803335858333, "grad_norm": 0.42847357850535905, "learning_rate": 2.9106932060567726e-05, "loss": 3.4876, "step": 4058 }, { "epoch": 0.9284376920548258, "grad_norm": 0.46555184679342093, "learning_rate": 2.908866120514942e-05, "loss": 3.7016, "step": 4060 }, { "epoch": 0.9288950505238184, "grad_norm": 0.4370653343343347, "learning_rate": 2.907038810560781e-05, "loss": 3.6254, "step": 4062 }, { "epoch": 0.9293524089928109, "grad_norm": 0.42826448103632836, "learning_rate": 2.9052112771972352e-05, "loss": 3.7215, "step": 4064 }, { "epoch": 0.9298097674618034, "grad_norm": 0.43775644620160986, "learning_rate": 2.9033835214273757e-05, "loss": 3.8835, "step": 4066 }, { "epoch": 0.930267125930796, "grad_norm": 0.46605778684889226, "learning_rate": 2.9015555442543924e-05, "loss": 3.728, "step": 4068 }, { "epoch": 0.9307244843997885, "grad_norm": 0.37947935827758095, "learning_rate": 2.899727346681599e-05, "loss": 3.6166, "step": 4070 }, { "epoch": 0.931181842868781, "grad_norm": 0.4945039732583457, "learning_rate": 2.8978989297124282e-05, "loss": 3.7344, "step": 4072 }, { "epoch": 0.9316392013377736, "grad_norm": 0.47132825692413827, "learning_rate": 2.8960702943504343e-05, "loss": 3.5296, "step": 4074 }, { "epoch": 0.9320965598067661, "grad_norm": 0.38963725138504535, "learning_rate": 2.894241441599293e-05, "loss": 3.7672, "step": 4076 }, { "epoch": 0.9325539182757586, "grad_norm": 0.41812797436989113, "learning_rate": 2.8924123724627954e-05, "loss": 3.5825, "step": 4078 }, { "epoch": 0.9330112767447512, "grad_norm": 0.47935737221845093, "learning_rate": 2.8905830879448555e-05, "loss": 3.7837, "step": 4080 }, { "epoch": 0.9334686352137436, "grad_norm": 0.437515411786694, "learning_rate": 2.8887535890495028e-05, "loss": 3.5188, "step": 4082 }, { "epoch": 0.9339259936827361, "grad_norm": 0.39107754242827847, "learning_rate": 2.886923876780886e-05, "loss": 3.8986, "step": 4084 }, { "epoch": 0.9343833521517286, "grad_norm": 0.4807655379263563, "learning_rate": 2.8850939521432685e-05, "loss": 3.6077, "step": 4086 }, { "epoch": 0.9348407106207212, "grad_norm": 0.494177142215018, "learning_rate": 2.883263816141034e-05, "loss": 3.7219, "step": 4088 }, { "epoch": 0.9352980690897137, "grad_norm": 0.42471891392276695, "learning_rate": 2.8814334697786798e-05, "loss": 3.8311, "step": 4090 }, { "epoch": 0.9357554275587062, "grad_norm": 0.5496991938853829, "learning_rate": 2.8796029140608173e-05, "loss": 3.7742, "step": 4092 }, { "epoch": 0.9362127860276988, "grad_norm": 0.3933639442761843, "learning_rate": 2.8777721499921774e-05, "loss": 3.8492, "step": 4094 }, { "epoch": 0.9366701444966913, "grad_norm": 0.5021093993589811, "learning_rate": 2.8759411785776014e-05, "loss": 3.6724, "step": 4096 }, { "epoch": 0.9371275029656838, "grad_norm": 0.41389951557168864, "learning_rate": 2.8741100008220446e-05, "loss": 3.7711, "step": 4098 }, { "epoch": 0.9375848614346763, "grad_norm": 0.5455052797427384, "learning_rate": 2.8722786177305772e-05, "loss": 3.6599, "step": 4100 }, { "epoch": 0.9380422199036689, "grad_norm": 0.42638202594234614, "learning_rate": 2.870447030308381e-05, "loss": 3.5891, "step": 4102 }, { "epoch": 0.9384995783726614, "grad_norm": 0.5356682882543555, "learning_rate": 2.868615239560752e-05, "loss": 3.5718, "step": 4104 }, { "epoch": 0.9389569368416539, "grad_norm": 0.45228052372723554, "learning_rate": 2.8667832464930938e-05, "loss": 3.7028, "step": 4106 }, { "epoch": 0.9394142953106465, "grad_norm": 0.44247312655447535, "learning_rate": 2.8649510521109246e-05, "loss": 3.7038, "step": 4108 }, { "epoch": 0.939871653779639, "grad_norm": 0.41878687923927255, "learning_rate": 2.8631186574198725e-05, "loss": 3.7361, "step": 4110 }, { "epoch": 0.9403290122486315, "grad_norm": 0.6041332652187428, "learning_rate": 2.8612860634256745e-05, "loss": 3.5059, "step": 4112 }, { "epoch": 0.940786370717624, "grad_norm": 0.39946388362942487, "learning_rate": 2.859453271134176e-05, "loss": 3.766, "step": 4114 }, { "epoch": 0.9412437291866166, "grad_norm": 0.5403314388788294, "learning_rate": 2.857620281551334e-05, "loss": 3.8317, "step": 4116 }, { "epoch": 0.9417010876556091, "grad_norm": 0.4467198902981519, "learning_rate": 2.8557870956832132e-05, "loss": 3.6807, "step": 4118 }, { "epoch": 0.9421584461246016, "grad_norm": 0.5465684303934716, "learning_rate": 2.8539537145359834e-05, "loss": 3.6345, "step": 4120 }, { "epoch": 0.9426158045935942, "grad_norm": 0.3819726177382719, "learning_rate": 2.8521201391159253e-05, "loss": 3.652, "step": 4122 }, { "epoch": 0.9430731630625866, "grad_norm": 0.5721416143730768, "learning_rate": 2.8502863704294235e-05, "loss": 3.7077, "step": 4124 }, { "epoch": 0.9435305215315791, "grad_norm": 0.5669881790970819, "learning_rate": 2.848452409482969e-05, "loss": 3.7046, "step": 4126 }, { "epoch": 0.9439878800005717, "grad_norm": 0.5338736708885088, "learning_rate": 2.8466182572831597e-05, "loss": 3.5359, "step": 4128 }, { "epoch": 0.9444452384695642, "grad_norm": 0.47356552747242286, "learning_rate": 2.8447839148366978e-05, "loss": 3.507, "step": 4130 }, { "epoch": 0.9449025969385567, "grad_norm": 0.4313516814838825, "learning_rate": 2.842949383150389e-05, "loss": 3.6234, "step": 4132 }, { "epoch": 0.9453599554075492, "grad_norm": 0.4760069188208185, "learning_rate": 2.8411146632311436e-05, "loss": 3.6206, "step": 4134 }, { "epoch": 0.9458173138765418, "grad_norm": 0.534347284724855, "learning_rate": 2.8392797560859765e-05, "loss": 3.4613, "step": 4136 }, { "epoch": 0.9462746723455343, "grad_norm": 0.4544144403690042, "learning_rate": 2.8374446627220037e-05, "loss": 3.7686, "step": 4138 }, { "epoch": 0.9467320308145268, "grad_norm": 0.5182967756888305, "learning_rate": 2.8356093841464436e-05, "loss": 3.7179, "step": 4140 }, { "epoch": 0.9471893892835194, "grad_norm": 0.4395798902994463, "learning_rate": 2.8337739213666153e-05, "loss": 3.664, "step": 4142 }, { "epoch": 0.9476467477525119, "grad_norm": 0.561466618855996, "learning_rate": 2.8319382753899428e-05, "loss": 3.6621, "step": 4144 }, { "epoch": 0.9481041062215044, "grad_norm": 0.5273659595475036, "learning_rate": 2.8301024472239463e-05, "loss": 3.76, "step": 4146 }, { "epoch": 0.948561464690497, "grad_norm": 0.4896602840687107, "learning_rate": 2.828266437876247e-05, "loss": 3.6016, "step": 4148 }, { "epoch": 0.9490188231594895, "grad_norm": 0.3552190930963978, "learning_rate": 2.826430248354568e-05, "loss": 3.6035, "step": 4150 }, { "epoch": 0.949476181628482, "grad_norm": 0.47632552336560596, "learning_rate": 2.824593879666729e-05, "loss": 3.7245, "step": 4152 }, { "epoch": 0.9499335400974745, "grad_norm": 0.5568874681834494, "learning_rate": 2.822757332820649e-05, "loss": 3.6313, "step": 4154 }, { "epoch": 0.9503908985664671, "grad_norm": 0.4449746205293396, "learning_rate": 2.8209206088243432e-05, "loss": 3.5646, "step": 4156 }, { "epoch": 0.9508482570354596, "grad_norm": 0.4295466278809233, "learning_rate": 2.8190837086859267e-05, "loss": 3.7276, "step": 4158 }, { "epoch": 0.9513056155044521, "grad_norm": 0.472163992295982, "learning_rate": 2.817246633413609e-05, "loss": 3.6791, "step": 4160 }, { "epoch": 0.9517629739734447, "grad_norm": 0.5655296118797333, "learning_rate": 2.815409384015696e-05, "loss": 3.4834, "step": 4162 }, { "epoch": 0.9522203324424372, "grad_norm": 0.3800259592575555, "learning_rate": 2.8135719615005908e-05, "loss": 3.6686, "step": 4164 }, { "epoch": 0.9526776909114296, "grad_norm": 0.4833617841908092, "learning_rate": 2.8117343668767898e-05, "loss": 3.7913, "step": 4166 }, { "epoch": 0.9531350493804222, "grad_norm": 0.38240866867189605, "learning_rate": 2.8098966011528854e-05, "loss": 3.8371, "step": 4168 }, { "epoch": 0.9535924078494147, "grad_norm": 0.4365215600163239, "learning_rate": 2.8080586653375613e-05, "loss": 3.7135, "step": 4170 }, { "epoch": 0.9540497663184072, "grad_norm": 0.4120692916106754, "learning_rate": 2.806220560439598e-05, "loss": 3.6547, "step": 4172 }, { "epoch": 0.9545071247873997, "grad_norm": 0.46818128099793693, "learning_rate": 2.8043822874678667e-05, "loss": 3.7794, "step": 4174 }, { "epoch": 0.9549644832563923, "grad_norm": 0.4475589606533091, "learning_rate": 2.8025438474313292e-05, "loss": 3.6483, "step": 4176 }, { "epoch": 0.9554218417253848, "grad_norm": 0.4228487785547249, "learning_rate": 2.8007052413390446e-05, "loss": 3.5491, "step": 4178 }, { "epoch": 0.9558792001943773, "grad_norm": 0.6143685197148879, "learning_rate": 2.798866470200157e-05, "loss": 3.5428, "step": 4180 }, { "epoch": 0.9563365586633699, "grad_norm": 0.36420620304656137, "learning_rate": 2.797027535023904e-05, "loss": 3.7188, "step": 4182 }, { "epoch": 0.9567939171323624, "grad_norm": 0.5117548864286942, "learning_rate": 2.795188436819613e-05, "loss": 3.5618, "step": 4184 }, { "epoch": 0.9572512756013549, "grad_norm": 0.41543570919003836, "learning_rate": 2.7933491765967013e-05, "loss": 3.5476, "step": 4186 }, { "epoch": 0.9577086340703475, "grad_norm": 0.4904494402761393, "learning_rate": 2.791509755364674e-05, "loss": 3.5473, "step": 4188 }, { "epoch": 0.95816599253934, "grad_norm": 0.40654932512403946, "learning_rate": 2.7896701741331255e-05, "loss": 3.8333, "step": 4190 }, { "epoch": 0.9586233510083325, "grad_norm": 0.33963726611024564, "learning_rate": 2.7878304339117377e-05, "loss": 3.5229, "step": 4192 }, { "epoch": 0.959080709477325, "grad_norm": 0.36552272980224204, "learning_rate": 2.785990535710279e-05, "loss": 3.6528, "step": 4194 }, { "epoch": 0.9595380679463176, "grad_norm": 0.3602476099280465, "learning_rate": 2.7841504805386064e-05, "loss": 3.5829, "step": 4196 }, { "epoch": 0.9599954264153101, "grad_norm": 0.4336406751104916, "learning_rate": 2.7823102694066607e-05, "loss": 3.7719, "step": 4198 }, { "epoch": 0.9604527848843026, "grad_norm": 0.3914804247472425, "learning_rate": 2.7804699033244707e-05, "loss": 3.6911, "step": 4200 }, { "epoch": 0.9609101433532952, "grad_norm": 0.4293876468678296, "learning_rate": 2.7786293833021487e-05, "loss": 3.551, "step": 4202 }, { "epoch": 0.9613675018222877, "grad_norm": 0.39728365981914116, "learning_rate": 2.77678871034989e-05, "loss": 3.5824, "step": 4204 }, { "epoch": 0.9618248602912802, "grad_norm": 0.33922942311255894, "learning_rate": 2.774947885477979e-05, "loss": 3.7459, "step": 4206 }, { "epoch": 0.9622822187602726, "grad_norm": 0.3447670629122563, "learning_rate": 2.7731069096967788e-05, "loss": 3.6961, "step": 4208 }, { "epoch": 0.9627395772292652, "grad_norm": 0.480966325854035, "learning_rate": 2.7712657840167344e-05, "loss": 3.8588, "step": 4210 }, { "epoch": 0.9631969356982577, "grad_norm": 0.43368764668247917, "learning_rate": 2.769424509448379e-05, "loss": 3.6735, "step": 4212 }, { "epoch": 0.9636542941672502, "grad_norm": 0.4458464884228377, "learning_rate": 2.7675830870023223e-05, "loss": 3.8078, "step": 4214 }, { "epoch": 0.9641116526362428, "grad_norm": 0.38978323839509726, "learning_rate": 2.7657415176892553e-05, "loss": 3.681, "step": 4216 }, { "epoch": 0.9645690111052353, "grad_norm": 0.4160091238454119, "learning_rate": 2.7638998025199537e-05, "loss": 3.6635, "step": 4218 }, { "epoch": 0.9650263695742278, "grad_norm": 0.4544654946548009, "learning_rate": 2.762057942505269e-05, "loss": 3.6419, "step": 4220 }, { "epoch": 0.9654837280432204, "grad_norm": 0.43419849047870657, "learning_rate": 2.7602159386561337e-05, "loss": 3.6037, "step": 4222 }, { "epoch": 0.9659410865122129, "grad_norm": 0.40830107248437764, "learning_rate": 2.75837379198356e-05, "loss": 3.6803, "step": 4224 }, { "epoch": 0.9663984449812054, "grad_norm": 0.5388896222644853, "learning_rate": 2.7565315034986384e-05, "loss": 3.5567, "step": 4226 }, { "epoch": 0.966855803450198, "grad_norm": 0.44913398938163424, "learning_rate": 2.7546890742125353e-05, "loss": 3.7091, "step": 4228 }, { "epoch": 0.9673131619191905, "grad_norm": 0.4662311110109165, "learning_rate": 2.7528465051364972e-05, "loss": 3.5642, "step": 4230 }, { "epoch": 0.967770520388183, "grad_norm": 0.47850186075149725, "learning_rate": 2.751003797281844e-05, "loss": 3.7346, "step": 4232 }, { "epoch": 0.9682278788571755, "grad_norm": 0.5150908101673795, "learning_rate": 2.7491609516599757e-05, "loss": 3.7244, "step": 4234 }, { "epoch": 0.9686852373261681, "grad_norm": 0.4222017541620794, "learning_rate": 2.747317969282366e-05, "loss": 3.616, "step": 4236 }, { "epoch": 0.9691425957951606, "grad_norm": 0.4390700132602774, "learning_rate": 2.7454748511605604e-05, "loss": 3.6111, "step": 4238 }, { "epoch": 0.9695999542641531, "grad_norm": 0.4566853510324598, "learning_rate": 2.7436315983061856e-05, "loss": 3.6961, "step": 4240 }, { "epoch": 0.9700573127331457, "grad_norm": 0.4887185846656897, "learning_rate": 2.7417882117309372e-05, "loss": 3.5841, "step": 4242 }, { "epoch": 0.9705146712021382, "grad_norm": 0.43743274376119295, "learning_rate": 2.7399446924465853e-05, "loss": 3.5352, "step": 4244 }, { "epoch": 0.9709720296711307, "grad_norm": 0.4023139164798792, "learning_rate": 2.7381010414649745e-05, "loss": 3.7189, "step": 4246 }, { "epoch": 0.9714293881401233, "grad_norm": 0.4183502239006486, "learning_rate": 2.736257259798019e-05, "loss": 3.6897, "step": 4248 }, { "epoch": 0.9718867466091158, "grad_norm": 0.49307049156305266, "learning_rate": 2.734413348457707e-05, "loss": 3.6766, "step": 4250 }, { "epoch": 0.9723441050781082, "grad_norm": 0.710618461035283, "learning_rate": 2.7325693084560965e-05, "loss": 3.764, "step": 4252 }, { "epoch": 0.9728014635471007, "grad_norm": 0.46713675411544847, "learning_rate": 2.730725140805318e-05, "loss": 3.762, "step": 4254 }, { "epoch": 0.9732588220160933, "grad_norm": 0.43721380409582605, "learning_rate": 2.7288808465175692e-05, "loss": 3.7228, "step": 4256 }, { "epoch": 0.9737161804850858, "grad_norm": 0.4506885863055504, "learning_rate": 2.7270364266051196e-05, "loss": 3.7169, "step": 4258 }, { "epoch": 0.9741735389540783, "grad_norm": 0.38647557328972104, "learning_rate": 2.7251918820803056e-05, "loss": 3.6339, "step": 4260 }, { "epoch": 0.9746308974230709, "grad_norm": 0.5435768399811546, "learning_rate": 2.723347213955535e-05, "loss": 3.7342, "step": 4262 }, { "epoch": 0.9750882558920634, "grad_norm": 0.4928370231762921, "learning_rate": 2.7215024232432805e-05, "loss": 3.5492, "step": 4264 }, { "epoch": 0.9755456143610559, "grad_norm": 0.41413790538033757, "learning_rate": 2.7196575109560833e-05, "loss": 3.6397, "step": 4266 }, { "epoch": 0.9760029728300484, "grad_norm": 0.4581206453176247, "learning_rate": 2.7178124781065522e-05, "loss": 3.7635, "step": 4268 }, { "epoch": 0.976460331299041, "grad_norm": 0.3975509803996893, "learning_rate": 2.7159673257073615e-05, "loss": 3.7069, "step": 4270 }, { "epoch": 0.9769176897680335, "grad_norm": 0.4922076024385599, "learning_rate": 2.714122054771249e-05, "loss": 3.553, "step": 4272 }, { "epoch": 0.977375048237026, "grad_norm": 0.4161482049001532, "learning_rate": 2.712276666311021e-05, "loss": 3.6581, "step": 4274 }, { "epoch": 0.9778324067060186, "grad_norm": 0.3241691666341615, "learning_rate": 2.7104311613395467e-05, "loss": 3.5494, "step": 4276 }, { "epoch": 0.9782897651750111, "grad_norm": 0.46517312377729264, "learning_rate": 2.7085855408697586e-05, "loss": 3.6776, "step": 4278 }, { "epoch": 0.9787471236440036, "grad_norm": 0.41713651352683123, "learning_rate": 2.706739805914653e-05, "loss": 3.5955, "step": 4280 }, { "epoch": 0.9792044821129962, "grad_norm": 0.3404742016471815, "learning_rate": 2.7048939574872918e-05, "loss": 3.5473, "step": 4282 }, { "epoch": 0.9796618405819887, "grad_norm": 0.5251754547030735, "learning_rate": 2.703047996600793e-05, "loss": 3.8624, "step": 4284 }, { "epoch": 0.9801191990509812, "grad_norm": 0.3657212301921671, "learning_rate": 2.7012019242683434e-05, "loss": 3.7155, "step": 4286 }, { "epoch": 0.9805765575199737, "grad_norm": 0.4325948154012243, "learning_rate": 2.6993557415031857e-05, "loss": 3.6184, "step": 4288 }, { "epoch": 0.9810339159889663, "grad_norm": 0.5438527930865295, "learning_rate": 2.6975094493186254e-05, "loss": 3.6223, "step": 4290 }, { "epoch": 0.9814912744579588, "grad_norm": 0.39909124036398186, "learning_rate": 2.6956630487280288e-05, "loss": 3.7175, "step": 4292 }, { "epoch": 0.9819486329269512, "grad_norm": 0.560356134267714, "learning_rate": 2.693816540744819e-05, "loss": 3.7657, "step": 4294 }, { "epoch": 0.9824059913959438, "grad_norm": 0.38051973506512454, "learning_rate": 2.6919699263824806e-05, "loss": 3.8314, "step": 4296 }, { "epoch": 0.9828633498649363, "grad_norm": 0.44382697498915874, "learning_rate": 2.6901232066545568e-05, "loss": 3.6788, "step": 4298 }, { "epoch": 0.9833207083339288, "grad_norm": 0.6583272890425113, "learning_rate": 2.6882763825746455e-05, "loss": 3.8366, "step": 4300 }, { "epoch": 0.9837780668029213, "grad_norm": 0.41243835624401304, "learning_rate": 2.6864294551564047e-05, "loss": 3.5172, "step": 4302 }, { "epoch": 0.9842354252719139, "grad_norm": 0.3949637921856153, "learning_rate": 2.6845824254135494e-05, "loss": 3.6461, "step": 4304 }, { "epoch": 0.9846927837409064, "grad_norm": 0.4595461228242493, "learning_rate": 2.6827352943598483e-05, "loss": 3.6278, "step": 4306 }, { "epoch": 0.9851501422098989, "grad_norm": 0.538252207688173, "learning_rate": 2.6808880630091277e-05, "loss": 3.7287, "step": 4308 }, { "epoch": 0.9856075006788915, "grad_norm": 0.40080589530972077, "learning_rate": 2.6790407323752685e-05, "loss": 3.8098, "step": 4310 }, { "epoch": 0.986064859147884, "grad_norm": 0.4810416674817023, "learning_rate": 2.6771933034722056e-05, "loss": 3.7726, "step": 4312 }, { "epoch": 0.9865222176168765, "grad_norm": 0.5410619777832473, "learning_rate": 2.675345777313929e-05, "loss": 3.6238, "step": 4314 }, { "epoch": 0.9869795760858691, "grad_norm": 0.5237220161719, "learning_rate": 2.67349815491448e-05, "loss": 3.6397, "step": 4316 }, { "epoch": 0.9874369345548616, "grad_norm": 0.3944842999915644, "learning_rate": 2.671650437287954e-05, "loss": 3.5877, "step": 4318 }, { "epoch": 0.9878942930238541, "grad_norm": 0.4917924900480585, "learning_rate": 2.669802625448501e-05, "loss": 3.741, "step": 4320 }, { "epoch": 0.9883516514928467, "grad_norm": 0.4131093841463855, "learning_rate": 2.6679547204103173e-05, "loss": 3.6826, "step": 4322 }, { "epoch": 0.9888090099618392, "grad_norm": 0.39437190699989166, "learning_rate": 2.6661067231876548e-05, "loss": 3.8106, "step": 4324 }, { "epoch": 0.9892663684308317, "grad_norm": 0.44382677354356737, "learning_rate": 2.664258634794815e-05, "loss": 3.5128, "step": 4326 }, { "epoch": 0.9897237268998242, "grad_norm": 0.42978668802027825, "learning_rate": 2.6624104562461484e-05, "loss": 3.7012, "step": 4328 }, { "epoch": 0.9901810853688168, "grad_norm": 0.47422041476364407, "learning_rate": 2.6605621885560555e-05, "loss": 3.7291, "step": 4330 }, { "epoch": 0.9906384438378093, "grad_norm": 0.44071480938252566, "learning_rate": 2.658713832738987e-05, "loss": 3.7019, "step": 4332 }, { "epoch": 0.9910958023068018, "grad_norm": 0.5271183238521483, "learning_rate": 2.6568653898094397e-05, "loss": 3.7796, "step": 4334 }, { "epoch": 0.9915531607757943, "grad_norm": 0.45471651830198323, "learning_rate": 2.6550168607819602e-05, "loss": 3.6447, "step": 4336 }, { "epoch": 0.9920105192447868, "grad_norm": 0.5670410807964367, "learning_rate": 2.653168246671141e-05, "loss": 3.791, "step": 4338 }, { "epoch": 0.9924678777137793, "grad_norm": 0.38814819942313733, "learning_rate": 2.6513195484916213e-05, "loss": 3.6313, "step": 4340 }, { "epoch": 0.9929252361827718, "grad_norm": 0.5219288335662398, "learning_rate": 2.6494707672580875e-05, "loss": 3.6306, "step": 4342 }, { "epoch": 0.9933825946517644, "grad_norm": 0.435449204377514, "learning_rate": 2.6476219039852718e-05, "loss": 3.6154, "step": 4344 }, { "epoch": 0.9938399531207569, "grad_norm": 0.42988892082509095, "learning_rate": 2.6457729596879495e-05, "loss": 3.5782, "step": 4346 }, { "epoch": 0.9942973115897494, "grad_norm": 0.37236489876616663, "learning_rate": 2.643923935380942e-05, "loss": 3.7356, "step": 4348 }, { "epoch": 0.994754670058742, "grad_norm": 0.4640244066934927, "learning_rate": 2.6420748320791137e-05, "loss": 3.4627, "step": 4350 }, { "epoch": 0.9952120285277345, "grad_norm": 0.5286452727166071, "learning_rate": 2.640225650797374e-05, "loss": 3.6456, "step": 4352 }, { "epoch": 0.995669386996727, "grad_norm": 0.4123014752630153, "learning_rate": 2.6383763925506726e-05, "loss": 3.6902, "step": 4354 }, { "epoch": 0.9961267454657196, "grad_norm": 0.4916153867563911, "learning_rate": 2.636527058354003e-05, "loss": 3.7298, "step": 4356 }, { "epoch": 0.9965841039347121, "grad_norm": 0.3959863663876846, "learning_rate": 2.6346776492224006e-05, "loss": 3.6421, "step": 4358 }, { "epoch": 0.9970414624037046, "grad_norm": 0.40166588502559875, "learning_rate": 2.632828166170942e-05, "loss": 3.6122, "step": 4360 }, { "epoch": 0.9974988208726971, "grad_norm": 0.4118297948327621, "learning_rate": 2.6309786102147422e-05, "loss": 3.6329, "step": 4362 }, { "epoch": 0.9979561793416897, "grad_norm": 0.6328641140457593, "learning_rate": 2.6291289823689587e-05, "loss": 3.6687, "step": 4364 }, { "epoch": 0.9984135378106822, "grad_norm": 0.5722948398879372, "learning_rate": 2.627279283648788e-05, "loss": 3.7246, "step": 4366 }, { "epoch": 0.9988708962796747, "grad_norm": 0.44104008974211756, "learning_rate": 2.6254295150694646e-05, "loss": 3.6237, "step": 4368 }, { "epoch": 0.9993282547486673, "grad_norm": 0.5490120684716454, "learning_rate": 2.6235796776462624e-05, "loss": 3.6452, "step": 4370 }, { "epoch": 0.9997856132176598, "grad_norm": 0.5423201384991757, "learning_rate": 2.6217297723944924e-05, "loss": 3.8059, "step": 4372 }, { "epoch": 1.0004573584689924, "grad_norm": 0.4659152464026524, "learning_rate": 2.619879800329502e-05, "loss": 4.319, "step": 4374 }, { "epoch": 1.000914716937985, "grad_norm": 0.4505500802339011, "learning_rate": 2.6180297624666777e-05, "loss": 3.7971, "step": 4376 }, { "epoch": 1.0013720754069775, "grad_norm": 0.4206025218483967, "learning_rate": 2.6161796598214393e-05, "loss": 3.7509, "step": 4378 }, { "epoch": 1.0018294338759701, "grad_norm": 0.5090450587367139, "learning_rate": 2.6143294934092445e-05, "loss": 3.5718, "step": 4380 }, { "epoch": 1.0022867923449625, "grad_norm": 0.411358062582081, "learning_rate": 2.6124792642455848e-05, "loss": 3.8469, "step": 4382 }, { "epoch": 1.0027441508139552, "grad_norm": 0.4112935601444204, "learning_rate": 2.6106289733459854e-05, "loss": 3.7216, "step": 4384 }, { "epoch": 1.0032015092829476, "grad_norm": 0.5058891847288761, "learning_rate": 2.6087786217260078e-05, "loss": 3.5444, "step": 4386 }, { "epoch": 1.0036588677519402, "grad_norm": 0.5222734061255677, "learning_rate": 2.606928210401245e-05, "loss": 3.6559, "step": 4388 }, { "epoch": 1.0041162262209327, "grad_norm": 0.5465398578959461, "learning_rate": 2.6050777403873222e-05, "loss": 3.5939, "step": 4390 }, { "epoch": 1.0045735846899253, "grad_norm": 0.36467598237430787, "learning_rate": 2.6032272126998985e-05, "loss": 3.5566, "step": 4392 }, { "epoch": 1.0050309431589177, "grad_norm": 0.5004877751045459, "learning_rate": 2.601376628354665e-05, "loss": 3.7688, "step": 4394 }, { "epoch": 1.0054883016279104, "grad_norm": 0.37869063859574814, "learning_rate": 2.59952598836734e-05, "loss": 3.5359, "step": 4396 }, { "epoch": 1.0059456600969028, "grad_norm": 0.4043331222133971, "learning_rate": 2.5976752937536775e-05, "loss": 3.5542, "step": 4398 }, { "epoch": 1.0064030185658954, "grad_norm": 0.5379189510823899, "learning_rate": 2.5958245455294583e-05, "loss": 3.699, "step": 4400 }, { "epoch": 1.0068603770348878, "grad_norm": 0.4551430193428084, "learning_rate": 2.5939737447104935e-05, "loss": 3.62, "step": 4402 }, { "epoch": 1.0073177355038805, "grad_norm": 0.6346927307529422, "learning_rate": 2.5921228923126235e-05, "loss": 3.7399, "step": 4404 }, { "epoch": 1.007775093972873, "grad_norm": 0.4788331515857012, "learning_rate": 2.5902719893517154e-05, "loss": 3.5185, "step": 4406 }, { "epoch": 1.0082324524418655, "grad_norm": 0.4218198952184157, "learning_rate": 2.5884210368436658e-05, "loss": 3.7334, "step": 4408 }, { "epoch": 1.008689810910858, "grad_norm": 0.45207249969768143, "learning_rate": 2.5865700358043983e-05, "loss": 3.6267, "step": 4410 }, { "epoch": 1.0091471693798506, "grad_norm": 0.4658064450293089, "learning_rate": 2.584718987249862e-05, "loss": 3.5644, "step": 4412 }, { "epoch": 1.009604527848843, "grad_norm": 0.5050052221424788, "learning_rate": 2.5828678921960332e-05, "loss": 3.6099, "step": 4414 }, { "epoch": 1.0100618863178354, "grad_norm": 0.5424348964828669, "learning_rate": 2.5810167516589134e-05, "loss": 3.6469, "step": 4416 }, { "epoch": 1.010519244786828, "grad_norm": 0.3937304272403995, "learning_rate": 2.5791655666545282e-05, "loss": 3.6891, "step": 4418 }, { "epoch": 1.0109766032558205, "grad_norm": 0.4918825476381197, "learning_rate": 2.5773143381989294e-05, "loss": 3.553, "step": 4420 }, { "epoch": 1.0114339617248131, "grad_norm": 0.45902333195710854, "learning_rate": 2.5754630673081915e-05, "loss": 3.6749, "step": 4422 }, { "epoch": 1.0118913201938056, "grad_norm": 0.4400829802911825, "learning_rate": 2.573611754998412e-05, "loss": 3.5966, "step": 4424 }, { "epoch": 1.0123486786627982, "grad_norm": 0.509961700731084, "learning_rate": 2.571760402285711e-05, "loss": 3.5825, "step": 4426 }, { "epoch": 1.0128060371317906, "grad_norm": 0.38189907478825763, "learning_rate": 2.5699090101862328e-05, "loss": 3.7551, "step": 4428 }, { "epoch": 1.0132633956007833, "grad_norm": 0.43105635649163965, "learning_rate": 2.568057579716141e-05, "loss": 3.6498, "step": 4430 }, { "epoch": 1.0137207540697757, "grad_norm": 0.40926234609895124, "learning_rate": 2.566206111891621e-05, "loss": 3.6181, "step": 4432 }, { "epoch": 1.0141781125387683, "grad_norm": 0.4962237480628246, "learning_rate": 2.5643546077288787e-05, "loss": 3.6861, "step": 4434 }, { "epoch": 1.0146354710077607, "grad_norm": 0.5274230332291929, "learning_rate": 2.5625030682441403e-05, "loss": 3.5271, "step": 4436 }, { "epoch": 1.0150928294767534, "grad_norm": 0.5147352633986058, "learning_rate": 2.560651494453652e-05, "loss": 3.617, "step": 4438 }, { "epoch": 1.0155501879457458, "grad_norm": 0.4315941384255532, "learning_rate": 2.5587998873736763e-05, "loss": 3.6186, "step": 4440 }, { "epoch": 1.0160075464147384, "grad_norm": 0.5785776119263384, "learning_rate": 2.5569482480204964e-05, "loss": 3.7023, "step": 4442 }, { "epoch": 1.0164649048837309, "grad_norm": 0.5684126996066565, "learning_rate": 2.5550965774104136e-05, "loss": 3.5608, "step": 4444 }, { "epoch": 1.0169222633527235, "grad_norm": 0.3849159566688693, "learning_rate": 2.5532448765597432e-05, "loss": 3.6488, "step": 4446 }, { "epoch": 1.017379621821716, "grad_norm": 0.6041060587200449, "learning_rate": 2.55139314648482e-05, "loss": 3.5278, "step": 4448 }, { "epoch": 1.0178369802907086, "grad_norm": 0.4255526392647679, "learning_rate": 2.5495413882019947e-05, "loss": 3.5788, "step": 4450 }, { "epoch": 1.018294338759701, "grad_norm": 0.46543953485352707, "learning_rate": 2.5476896027276313e-05, "loss": 3.8987, "step": 4452 }, { "epoch": 1.0187516972286936, "grad_norm": 0.46332769241546945, "learning_rate": 2.5458377910781118e-05, "loss": 3.7862, "step": 4454 }, { "epoch": 1.019209055697686, "grad_norm": 0.4252762030221371, "learning_rate": 2.5439859542698297e-05, "loss": 3.6774, "step": 4456 }, { "epoch": 1.0196664141666785, "grad_norm": 0.5020449661959904, "learning_rate": 2.5421340933191935e-05, "loss": 3.6817, "step": 4458 }, { "epoch": 1.0201237726356711, "grad_norm": 0.5129251310154699, "learning_rate": 2.540282209242626e-05, "loss": 3.6165, "step": 4460 }, { "epoch": 1.0205811311046635, "grad_norm": 0.3974138086669078, "learning_rate": 2.5384303030565602e-05, "loss": 3.6308, "step": 4462 }, { "epoch": 1.0210384895736562, "grad_norm": 0.4080204166932552, "learning_rate": 2.5365783757774442e-05, "loss": 3.598, "step": 4464 }, { "epoch": 1.0214958480426486, "grad_norm": 0.4086145034683508, "learning_rate": 2.5347264284217354e-05, "loss": 3.625, "step": 4466 }, { "epoch": 1.0219532065116412, "grad_norm": 0.6652182003077568, "learning_rate": 2.532874462005903e-05, "loss": 3.6417, "step": 4468 }, { "epoch": 1.0224105649806337, "grad_norm": 0.49098540147067626, "learning_rate": 2.5310224775464263e-05, "loss": 3.6882, "step": 4470 }, { "epoch": 1.0228679234496263, "grad_norm": 0.5115362526248567, "learning_rate": 2.5291704760597966e-05, "loss": 3.5985, "step": 4472 }, { "epoch": 1.0233252819186187, "grad_norm": 0.48033037751779867, "learning_rate": 2.527318458562511e-05, "loss": 3.4019, "step": 4474 }, { "epoch": 1.0237826403876114, "grad_norm": 0.5132899151539496, "learning_rate": 2.5254664260710777e-05, "loss": 3.5247, "step": 4476 }, { "epoch": 1.0242399988566038, "grad_norm": 0.46019852125115945, "learning_rate": 2.523614379602013e-05, "loss": 3.842, "step": 4478 }, { "epoch": 1.0246973573255964, "grad_norm": 0.6169214520265159, "learning_rate": 2.52176232017184e-05, "loss": 3.5125, "step": 4480 }, { "epoch": 1.0251547157945888, "grad_norm": 0.46159616382441127, "learning_rate": 2.5199102487970893e-05, "loss": 3.6081, "step": 4482 }, { "epoch": 1.0256120742635815, "grad_norm": 0.469373256868054, "learning_rate": 2.5180581664942997e-05, "loss": 3.7203, "step": 4484 }, { "epoch": 1.026069432732574, "grad_norm": 0.45100176688332405, "learning_rate": 2.5162060742800127e-05, "loss": 3.667, "step": 4486 }, { "epoch": 1.0265267912015665, "grad_norm": 0.5389111347812154, "learning_rate": 2.5143539731707772e-05, "loss": 3.6154, "step": 4488 }, { "epoch": 1.026984149670559, "grad_norm": 0.5872416668930364, "learning_rate": 2.5125018641831484e-05, "loss": 3.5373, "step": 4490 }, { "epoch": 1.0274415081395516, "grad_norm": 0.4582293204570124, "learning_rate": 2.5106497483336823e-05, "loss": 3.7065, "step": 4492 }, { "epoch": 1.027898866608544, "grad_norm": 0.5124190976090442, "learning_rate": 2.508797626638941e-05, "loss": 3.5325, "step": 4494 }, { "epoch": 1.0283562250775367, "grad_norm": 0.48403758941051395, "learning_rate": 2.5069455001154907e-05, "loss": 3.6917, "step": 4496 }, { "epoch": 1.028813583546529, "grad_norm": 0.5139511567557846, "learning_rate": 2.5050933697798974e-05, "loss": 3.7988, "step": 4498 }, { "epoch": 1.0292709420155215, "grad_norm": 0.4556878389057201, "learning_rate": 2.5032412366487318e-05, "loss": 3.741, "step": 4500 }, { "epoch": 1.0297283004845141, "grad_norm": 0.556544727296556, "learning_rate": 2.501389101738564e-05, "loss": 3.6018, "step": 4502 }, { "epoch": 1.0301856589535066, "grad_norm": 0.4566022049116683, "learning_rate": 2.499536966065967e-05, "loss": 3.5702, "step": 4504 }, { "epoch": 1.0306430174224992, "grad_norm": 0.40300026819537305, "learning_rate": 2.497684830647513e-05, "loss": 3.5541, "step": 4506 }, { "epoch": 1.0311003758914916, "grad_norm": 0.4784389823726162, "learning_rate": 2.495832696499775e-05, "loss": 3.5829, "step": 4508 }, { "epoch": 1.0315577343604843, "grad_norm": 0.7629522655174178, "learning_rate": 2.4939805646393243e-05, "loss": 3.5266, "step": 4510 }, { "epoch": 1.0320150928294767, "grad_norm": 0.5154818711890227, "learning_rate": 2.4921284360827322e-05, "loss": 3.6205, "step": 4512 }, { "epoch": 1.0324724512984693, "grad_norm": 0.46115105741115536, "learning_rate": 2.4902763118465645e-05, "loss": 3.6055, "step": 4514 }, { "epoch": 1.0329298097674617, "grad_norm": 0.5717534712605588, "learning_rate": 2.4884241929473905e-05, "loss": 3.6651, "step": 4516 }, { "epoch": 1.0333871682364544, "grad_norm": 0.49413919260171746, "learning_rate": 2.4865720804017734e-05, "loss": 3.5982, "step": 4518 }, { "epoch": 1.0338445267054468, "grad_norm": 0.5724382888650232, "learning_rate": 2.4847199752262716e-05, "loss": 3.647, "step": 4520 }, { "epoch": 1.0343018851744394, "grad_norm": 0.5625981933930211, "learning_rate": 2.482867878437441e-05, "loss": 3.7637, "step": 4522 }, { "epoch": 1.0347592436434319, "grad_norm": 0.6057492252570034, "learning_rate": 2.481015791051835e-05, "loss": 3.6629, "step": 4524 }, { "epoch": 1.0352166021124245, "grad_norm": 0.45583818675717086, "learning_rate": 2.479163714085997e-05, "loss": 3.7316, "step": 4526 }, { "epoch": 1.035673960581417, "grad_norm": 0.5974824440743177, "learning_rate": 2.4773116485564682e-05, "loss": 3.7362, "step": 4528 }, { "epoch": 1.0361313190504096, "grad_norm": 0.49039708814969973, "learning_rate": 2.4754595954797837e-05, "loss": 3.5853, "step": 4530 }, { "epoch": 1.036588677519402, "grad_norm": 0.5212500399308224, "learning_rate": 2.4736075558724698e-05, "loss": 3.8123, "step": 4532 }, { "epoch": 1.0370460359883946, "grad_norm": 0.46402895064410127, "learning_rate": 2.4717555307510462e-05, "loss": 3.7599, "step": 4534 }, { "epoch": 1.037503394457387, "grad_norm": 0.5645350360505844, "learning_rate": 2.4699035211320257e-05, "loss": 3.691, "step": 4536 }, { "epoch": 1.0379607529263797, "grad_norm": 0.41039659633391584, "learning_rate": 2.4680515280319103e-05, "loss": 3.6457, "step": 4538 }, { "epoch": 1.038418111395372, "grad_norm": 0.48114443773077026, "learning_rate": 2.4661995524671952e-05, "loss": 3.6957, "step": 4540 }, { "epoch": 1.0388754698643647, "grad_norm": 0.5115912475151908, "learning_rate": 2.4643475954543658e-05, "loss": 3.5199, "step": 4542 }, { "epoch": 1.0393328283333572, "grad_norm": 0.5078851207745306, "learning_rate": 2.4624956580098952e-05, "loss": 3.5163, "step": 4544 }, { "epoch": 1.0397901868023496, "grad_norm": 0.5403183116787821, "learning_rate": 2.4606437411502487e-05, "loss": 3.6194, "step": 4546 }, { "epoch": 1.0402475452713422, "grad_norm": 0.4686992776766681, "learning_rate": 2.4587918458918763e-05, "loss": 3.529, "step": 4548 }, { "epoch": 1.0407049037403346, "grad_norm": 0.4187596227479675, "learning_rate": 2.4569399732512205e-05, "loss": 3.5312, "step": 4550 }, { "epoch": 1.0411622622093273, "grad_norm": 0.44839571316181637, "learning_rate": 2.4550881242447098e-05, "loss": 3.6103, "step": 4552 }, { "epoch": 1.0416196206783197, "grad_norm": 0.4752398098872048, "learning_rate": 2.4532362998887583e-05, "loss": 3.4833, "step": 4554 }, { "epoch": 1.0420769791473123, "grad_norm": 0.515439144508881, "learning_rate": 2.4513845011997682e-05, "loss": 3.5413, "step": 4556 }, { "epoch": 1.0425343376163048, "grad_norm": 0.7383425651802666, "learning_rate": 2.449532729194128e-05, "loss": 3.6433, "step": 4558 }, { "epoch": 1.0429916960852974, "grad_norm": 0.4385872342257959, "learning_rate": 2.4476809848882085e-05, "loss": 3.4894, "step": 4560 }, { "epoch": 1.0434490545542898, "grad_norm": 0.486868485369209, "learning_rate": 2.4458292692983693e-05, "loss": 3.7343, "step": 4562 }, { "epoch": 1.0439064130232825, "grad_norm": 0.5331134091869206, "learning_rate": 2.443977583440952e-05, "loss": 3.5342, "step": 4564 }, { "epoch": 1.0443637714922749, "grad_norm": 0.40202405876432357, "learning_rate": 2.4421259283322818e-05, "loss": 3.6993, "step": 4566 }, { "epoch": 1.0448211299612675, "grad_norm": 0.4674044848341989, "learning_rate": 2.4402743049886677e-05, "loss": 3.6685, "step": 4568 }, { "epoch": 1.04527848843026, "grad_norm": 0.44114652098414825, "learning_rate": 2.4384227144264016e-05, "loss": 3.7447, "step": 4570 }, { "epoch": 1.0457358468992526, "grad_norm": 0.46693533276491667, "learning_rate": 2.436571157661756e-05, "loss": 3.5061, "step": 4572 }, { "epoch": 1.046193205368245, "grad_norm": 0.4965979499478453, "learning_rate": 2.4347196357109872e-05, "loss": 3.6563, "step": 4574 }, { "epoch": 1.0466505638372376, "grad_norm": 0.4030836949146051, "learning_rate": 2.4328681495903287e-05, "loss": 3.5903, "step": 4576 }, { "epoch": 1.04710792230623, "grad_norm": 0.4554644406149152, "learning_rate": 2.4310167003159978e-05, "loss": 3.6573, "step": 4578 }, { "epoch": 1.0475652807752227, "grad_norm": 0.5532342113288097, "learning_rate": 2.4291652889041918e-05, "loss": 3.7548, "step": 4580 }, { "epoch": 1.0480226392442151, "grad_norm": 0.49425616817069695, "learning_rate": 2.427313916371082e-05, "loss": 3.636, "step": 4582 }, { "epoch": 1.0484799977132075, "grad_norm": 0.4553870106499823, "learning_rate": 2.4254625837328255e-05, "loss": 3.5345, "step": 4584 }, { "epoch": 1.0489373561822002, "grad_norm": 0.44251375597245957, "learning_rate": 2.423611292005553e-05, "loss": 3.5247, "step": 4586 }, { "epoch": 1.0493947146511926, "grad_norm": 0.4235218717124592, "learning_rate": 2.421760042205374e-05, "loss": 3.5435, "step": 4588 }, { "epoch": 1.0498520731201852, "grad_norm": 0.4927450685201102, "learning_rate": 2.419908835348374e-05, "loss": 3.7478, "step": 4590 }, { "epoch": 1.0503094315891777, "grad_norm": 0.481895777678649, "learning_rate": 2.418057672450617e-05, "loss": 3.714, "step": 4592 }, { "epoch": 1.0507667900581703, "grad_norm": 0.5394222607025181, "learning_rate": 2.4162065545281408e-05, "loss": 3.6774, "step": 4594 }, { "epoch": 1.0512241485271627, "grad_norm": 0.5204210334281062, "learning_rate": 2.4143554825969594e-05, "loss": 3.6623, "step": 4596 }, { "epoch": 1.0516815069961554, "grad_norm": 0.8267578860363296, "learning_rate": 2.4125044576730626e-05, "loss": 3.6023, "step": 4598 }, { "epoch": 1.0521388654651478, "grad_norm": 0.5341576217752481, "learning_rate": 2.4106534807724117e-05, "loss": 3.4541, "step": 4600 }, { "epoch": 1.0525962239341404, "grad_norm": 0.5187588449666276, "learning_rate": 2.4088025529109453e-05, "loss": 3.6722, "step": 4602 }, { "epoch": 1.0530535824031328, "grad_norm": 0.4626641665177215, "learning_rate": 2.40695167510457e-05, "loss": 3.7447, "step": 4604 }, { "epoch": 1.0535109408721255, "grad_norm": 0.7947282485025278, "learning_rate": 2.4051008483691706e-05, "loss": 3.4861, "step": 4606 }, { "epoch": 1.053968299341118, "grad_norm": 0.4960448623959092, "learning_rate": 2.4032500737206012e-05, "loss": 3.5629, "step": 4608 }, { "epoch": 1.0544256578101106, "grad_norm": 0.3782175234626828, "learning_rate": 2.401399352174685e-05, "loss": 3.6621, "step": 4610 }, { "epoch": 1.054883016279103, "grad_norm": 0.4868417195709296, "learning_rate": 2.3995486847472204e-05, "loss": 3.5938, "step": 4612 }, { "epoch": 1.0553403747480956, "grad_norm": 0.47884538148570827, "learning_rate": 2.3976980724539748e-05, "loss": 3.7371, "step": 4614 }, { "epoch": 1.055797733217088, "grad_norm": 0.3287803259340645, "learning_rate": 2.395847516310682e-05, "loss": 3.7125, "step": 4616 }, { "epoch": 1.0562550916860807, "grad_norm": 0.4988085887073523, "learning_rate": 2.3939970173330496e-05, "loss": 3.7071, "step": 4618 }, { "epoch": 1.056712450155073, "grad_norm": 0.454077902556929, "learning_rate": 2.3921465765367518e-05, "loss": 3.6327, "step": 4620 }, { "epoch": 1.0571698086240657, "grad_norm": 0.5256575009416337, "learning_rate": 2.3902961949374302e-05, "loss": 3.5921, "step": 4622 }, { "epoch": 1.0576271670930582, "grad_norm": 0.6111221453966155, "learning_rate": 2.3884458735506945e-05, "loss": 3.6245, "step": 4624 }, { "epoch": 1.0580845255620508, "grad_norm": 0.4479543888117191, "learning_rate": 2.3865956133921226e-05, "loss": 3.5779, "step": 4626 }, { "epoch": 1.0585418840310432, "grad_norm": 0.4753998347246816, "learning_rate": 2.3847454154772563e-05, "loss": 3.6259, "step": 4628 }, { "epoch": 1.0589992425000356, "grad_norm": 0.5598542722905084, "learning_rate": 2.3828952808216064e-05, "loss": 3.5646, "step": 4630 }, { "epoch": 1.0594566009690283, "grad_norm": 0.5789832883738939, "learning_rate": 2.3810452104406444e-05, "loss": 3.6368, "step": 4632 }, { "epoch": 1.0599139594380207, "grad_norm": 0.5577210498291517, "learning_rate": 2.3791952053498114e-05, "loss": 3.7355, "step": 4634 }, { "epoch": 1.0603713179070133, "grad_norm": 0.3883944532521342, "learning_rate": 2.377345266564511e-05, "loss": 3.538, "step": 4636 }, { "epoch": 1.0608286763760058, "grad_norm": 0.46422760463552754, "learning_rate": 2.375495395100108e-05, "loss": 3.5622, "step": 4638 }, { "epoch": 1.0612860348449984, "grad_norm": 0.4269998069434433, "learning_rate": 2.3736455919719334e-05, "loss": 3.4803, "step": 4640 }, { "epoch": 1.0617433933139908, "grad_norm": 0.5155837013582838, "learning_rate": 2.3717958581952805e-05, "loss": 3.5757, "step": 4642 }, { "epoch": 1.0622007517829835, "grad_norm": 0.5677402440706456, "learning_rate": 2.3699461947854007e-05, "loss": 3.7027, "step": 4644 }, { "epoch": 1.0626581102519759, "grad_norm": 0.47276612257938366, "learning_rate": 2.3680966027575123e-05, "loss": 3.8242, "step": 4646 }, { "epoch": 1.0631154687209685, "grad_norm": 0.5219431084601169, "learning_rate": 2.366247083126791e-05, "loss": 3.6552, "step": 4648 }, { "epoch": 1.063572827189961, "grad_norm": 0.47236206420464044, "learning_rate": 2.364397636908373e-05, "loss": 3.7313, "step": 4650 }, { "epoch": 1.0640301856589536, "grad_norm": 0.495219895283115, "learning_rate": 2.362548265117355e-05, "loss": 3.7014, "step": 4652 }, { "epoch": 1.064487544127946, "grad_norm": 0.4575939138357865, "learning_rate": 2.3606989687687933e-05, "loss": 3.6942, "step": 4654 }, { "epoch": 1.0649449025969386, "grad_norm": 0.47054576221925404, "learning_rate": 2.358849748877701e-05, "loss": 3.5175, "step": 4656 }, { "epoch": 1.065402261065931, "grad_norm": 0.4106884364766329, "learning_rate": 2.3570006064590517e-05, "loss": 3.6322, "step": 4658 }, { "epoch": 1.0658596195349237, "grad_norm": 0.4188424720007509, "learning_rate": 2.355151542527772e-05, "loss": 3.4315, "step": 4660 }, { "epoch": 1.0663169780039161, "grad_norm": 0.4569019721663023, "learning_rate": 2.3533025580987517e-05, "loss": 3.5925, "step": 4662 }, { "epoch": 1.0667743364729088, "grad_norm": 0.6999984102571691, "learning_rate": 2.3514536541868334e-05, "loss": 3.448, "step": 4664 }, { "epoch": 1.0672316949419012, "grad_norm": 0.5325655916050609, "learning_rate": 2.3496048318068137e-05, "loss": 3.6744, "step": 4666 }, { "epoch": 1.0676890534108936, "grad_norm": 0.512571745014298, "learning_rate": 2.3477560919734484e-05, "loss": 3.6125, "step": 4668 }, { "epoch": 1.0681464118798862, "grad_norm": 0.5542247346222074, "learning_rate": 2.345907435701447e-05, "loss": 3.5227, "step": 4670 }, { "epoch": 1.0686037703488787, "grad_norm": 0.49704472564014773, "learning_rate": 2.3440588640054695e-05, "loss": 3.5404, "step": 4672 }, { "epoch": 1.0690611288178713, "grad_norm": 0.6328911908990911, "learning_rate": 2.3422103779001347e-05, "loss": 4.0243, "step": 4674 }, { "epoch": 1.0695184872868637, "grad_norm": 0.47477998531958754, "learning_rate": 2.3403619784000126e-05, "loss": 3.5265, "step": 4676 }, { "epoch": 1.0699758457558564, "grad_norm": 0.5005037630989608, "learning_rate": 2.338513666519622e-05, "loss": 3.4611, "step": 4678 }, { "epoch": 1.0704332042248488, "grad_norm": 0.4271913504270028, "learning_rate": 2.3366654432734393e-05, "loss": 3.6297, "step": 4680 }, { "epoch": 1.0708905626938414, "grad_norm": 0.799683390491655, "learning_rate": 2.3348173096758895e-05, "loss": 3.5966, "step": 4682 }, { "epoch": 1.0713479211628338, "grad_norm": 0.4197400431289865, "learning_rate": 2.332969266741347e-05, "loss": 3.6208, "step": 4684 }, { "epoch": 1.0718052796318265, "grad_norm": 0.570180250537184, "learning_rate": 2.3311213154841397e-05, "loss": 3.712, "step": 4686 }, { "epoch": 1.072262638100819, "grad_norm": 0.7552890252161344, "learning_rate": 2.329273456918543e-05, "loss": 3.5902, "step": 4688 }, { "epoch": 1.0727199965698115, "grad_norm": 0.3875268142405728, "learning_rate": 2.3274256920587816e-05, "loss": 3.6637, "step": 4690 }, { "epoch": 1.073177355038804, "grad_norm": 0.5485207997066989, "learning_rate": 2.3255780219190304e-05, "loss": 3.5894, "step": 4692 }, { "epoch": 1.0736347135077966, "grad_norm": 0.44669090226893804, "learning_rate": 2.3237304475134078e-05, "loss": 3.572, "step": 4694 }, { "epoch": 1.074092071976789, "grad_norm": 0.5408205858245743, "learning_rate": 2.3218829698559857e-05, "loss": 3.5901, "step": 4696 }, { "epoch": 1.0745494304457817, "grad_norm": 0.5520886534908356, "learning_rate": 2.3200355899607804e-05, "loss": 3.6962, "step": 4698 }, { "epoch": 1.075006788914774, "grad_norm": 0.6500348558617023, "learning_rate": 2.318188308841751e-05, "loss": 3.6783, "step": 4700 }, { "epoch": 1.0754641473837667, "grad_norm": 0.49700146331322237, "learning_rate": 2.3163411275128084e-05, "loss": 3.6067, "step": 4702 }, { "epoch": 1.0759215058527591, "grad_norm": 0.3570974576568477, "learning_rate": 2.3144940469878062e-05, "loss": 3.6581, "step": 4704 }, { "epoch": 1.0763788643217518, "grad_norm": 0.4828973504230568, "learning_rate": 2.31264706828054e-05, "loss": 3.4305, "step": 4706 }, { "epoch": 1.0768362227907442, "grad_norm": 0.3912040614620348, "learning_rate": 2.310800192404753e-05, "loss": 3.5271, "step": 4708 }, { "epoch": 1.0772935812597368, "grad_norm": 0.5631844277307094, "learning_rate": 2.3089534203741327e-05, "loss": 3.5922, "step": 4710 }, { "epoch": 1.0777509397287293, "grad_norm": 0.529846187890805, "learning_rate": 2.3071067532023044e-05, "loss": 3.6724, "step": 4712 }, { "epoch": 1.0782082981977217, "grad_norm": 0.5162671165769186, "learning_rate": 2.3052601919028413e-05, "loss": 3.5508, "step": 4714 }, { "epoch": 1.0786656566667143, "grad_norm": 0.5955363556104104, "learning_rate": 2.3034137374892566e-05, "loss": 3.4774, "step": 4716 }, { "epoch": 1.0791230151357067, "grad_norm": 0.4027266670917644, "learning_rate": 2.301567390975003e-05, "loss": 3.6921, "step": 4718 }, { "epoch": 1.0795803736046994, "grad_norm": 0.3629240515417792, "learning_rate": 2.2997211533734783e-05, "loss": 3.5914, "step": 4720 }, { "epoch": 1.0800377320736918, "grad_norm": 0.48141284392093026, "learning_rate": 2.2978750256980144e-05, "loss": 3.6414, "step": 4722 }, { "epoch": 1.0804950905426844, "grad_norm": 0.4042519069855486, "learning_rate": 2.296029008961888e-05, "loss": 3.6365, "step": 4724 }, { "epoch": 1.0809524490116769, "grad_norm": 0.4592321011425102, "learning_rate": 2.2941831041783152e-05, "loss": 3.6265, "step": 4726 }, { "epoch": 1.0814098074806695, "grad_norm": 0.3714445764701019, "learning_rate": 2.2923373123604442e-05, "loss": 3.4668, "step": 4728 }, { "epoch": 1.081867165949662, "grad_norm": 0.48069900329896, "learning_rate": 2.2904916345213694e-05, "loss": 3.4424, "step": 4730 }, { "epoch": 1.0823245244186546, "grad_norm": 0.43194538249574604, "learning_rate": 2.2886460716741183e-05, "loss": 3.5892, "step": 4732 }, { "epoch": 1.082781882887647, "grad_norm": 0.361904612303048, "learning_rate": 2.2868006248316536e-05, "loss": 3.5946, "step": 4734 }, { "epoch": 1.0832392413566396, "grad_norm": 0.525781989132502, "learning_rate": 2.2849552950068787e-05, "loss": 3.5542, "step": 4736 }, { "epoch": 1.083696599825632, "grad_norm": 0.48733844525482356, "learning_rate": 2.283110083212631e-05, "loss": 3.5258, "step": 4738 }, { "epoch": 1.0841539582946247, "grad_norm": 0.5410571306192895, "learning_rate": 2.2812649904616808e-05, "loss": 3.6598, "step": 4740 }, { "epoch": 1.084611316763617, "grad_norm": 0.3924043185244331, "learning_rate": 2.2794200177667366e-05, "loss": 3.5235, "step": 4742 }, { "epoch": 1.0850686752326097, "grad_norm": 0.4436001900866852, "learning_rate": 2.2775751661404402e-05, "loss": 3.5478, "step": 4744 }, { "epoch": 1.0855260337016022, "grad_norm": 0.4782874205476945, "learning_rate": 2.2757304365953637e-05, "loss": 3.5235, "step": 4746 }, { "epoch": 1.0859833921705948, "grad_norm": 0.445330351756136, "learning_rate": 2.2738858301440173e-05, "loss": 3.5968, "step": 4748 }, { "epoch": 1.0864407506395872, "grad_norm": 0.5020777625517501, "learning_rate": 2.2720413477988382e-05, "loss": 3.6542, "step": 4750 }, { "epoch": 1.0868981091085796, "grad_norm": 0.5511025107336053, "learning_rate": 2.2701969905722006e-05, "loss": 3.6028, "step": 4752 }, { "epoch": 1.0873554675775723, "grad_norm": 0.6604107924680466, "learning_rate": 2.268352759476408e-05, "loss": 3.6948, "step": 4754 }, { "epoch": 1.0878128260465647, "grad_norm": 0.5366015166025558, "learning_rate": 2.2665086555236927e-05, "loss": 3.5722, "step": 4756 }, { "epoch": 1.0882701845155573, "grad_norm": 0.43190475943752327, "learning_rate": 2.2646646797262193e-05, "loss": 3.6127, "step": 4758 }, { "epoch": 1.0887275429845498, "grad_norm": 0.4799768829987898, "learning_rate": 2.262820833096084e-05, "loss": 3.5443, "step": 4760 }, { "epoch": 1.0891849014535424, "grad_norm": 0.6263359097699657, "learning_rate": 2.260977116645306e-05, "loss": 3.4652, "step": 4762 }, { "epoch": 1.0896422599225348, "grad_norm": 0.39479505576068874, "learning_rate": 2.2591335313858393e-05, "loss": 3.4741, "step": 4764 }, { "epoch": 1.0900996183915275, "grad_norm": 0.38944770786263716, "learning_rate": 2.2572900783295644e-05, "loss": 3.5501, "step": 4766 }, { "epoch": 1.0905569768605199, "grad_norm": 0.48575090523977543, "learning_rate": 2.2554467584882852e-05, "loss": 3.6144, "step": 4768 }, { "epoch": 1.0910143353295125, "grad_norm": 0.38585342541664924, "learning_rate": 2.2536035728737374e-05, "loss": 3.6717, "step": 4770 }, { "epoch": 1.091471693798505, "grad_norm": 0.4111879542832852, "learning_rate": 2.2517605224975823e-05, "loss": 3.5123, "step": 4772 }, { "epoch": 1.0919290522674976, "grad_norm": 0.46159482412721403, "learning_rate": 2.2499176083714032e-05, "loss": 3.5958, "step": 4774 }, { "epoch": 1.09238641073649, "grad_norm": 0.386928501466309, "learning_rate": 2.248074831506714e-05, "loss": 3.5536, "step": 4776 }, { "epoch": 1.0928437692054827, "grad_norm": 0.4147170848685094, "learning_rate": 2.2462321929149486e-05, "loss": 3.5709, "step": 4778 }, { "epoch": 1.093301127674475, "grad_norm": 0.4681039490696786, "learning_rate": 2.2443896936074666e-05, "loss": 3.4447, "step": 4780 }, { "epoch": 1.0937584861434677, "grad_norm": 0.5002273698251032, "learning_rate": 2.242547334595554e-05, "loss": 3.4285, "step": 4782 }, { "epoch": 1.0942158446124601, "grad_norm": 0.48960372361542853, "learning_rate": 2.2407051168904147e-05, "loss": 3.6662, "step": 4784 }, { "epoch": 1.0946732030814528, "grad_norm": 0.4679509982657135, "learning_rate": 2.238863041503179e-05, "loss": 3.524, "step": 4786 }, { "epoch": 1.0951305615504452, "grad_norm": 0.5340679826003027, "learning_rate": 2.2370211094448983e-05, "loss": 3.6146, "step": 4788 }, { "epoch": 1.0955879200194378, "grad_norm": 0.5680211952059547, "learning_rate": 2.2351793217265427e-05, "loss": 3.6444, "step": 4790 }, { "epoch": 1.0960452784884303, "grad_norm": 0.49395860176717327, "learning_rate": 2.2333376793590067e-05, "loss": 3.4452, "step": 4792 }, { "epoch": 1.096502636957423, "grad_norm": 0.469038715458428, "learning_rate": 2.231496183353104e-05, "loss": 3.5352, "step": 4794 }, { "epoch": 1.0969599954264153, "grad_norm": 0.4710311696253619, "learning_rate": 2.2296548347195654e-05, "loss": 3.5698, "step": 4796 }, { "epoch": 1.0974173538954077, "grad_norm": 0.42734654580828374, "learning_rate": 2.2278136344690444e-05, "loss": 3.5454, "step": 4798 }, { "epoch": 1.0978747123644004, "grad_norm": 0.4865433549881363, "learning_rate": 2.225972583612112e-05, "loss": 3.5354, "step": 4800 }, { "epoch": 1.0983320708333928, "grad_norm": 0.5808028244268427, "learning_rate": 2.2241316831592543e-05, "loss": 3.5968, "step": 4802 }, { "epoch": 1.0987894293023854, "grad_norm": 0.6044763122347454, "learning_rate": 2.2222909341208803e-05, "loss": 3.5975, "step": 4804 }, { "epoch": 1.0992467877713779, "grad_norm": 0.4715685348175149, "learning_rate": 2.2204503375073103e-05, "loss": 3.6146, "step": 4806 }, { "epoch": 1.0997041462403705, "grad_norm": 0.4200363180659229, "learning_rate": 2.218609894328784e-05, "loss": 3.6309, "step": 4808 }, { "epoch": 1.100161504709363, "grad_norm": 0.6529276855511089, "learning_rate": 2.216769605595458e-05, "loss": 3.4797, "step": 4810 }, { "epoch": 1.1006188631783556, "grad_norm": 0.4464472150315652, "learning_rate": 2.2149294723174e-05, "loss": 3.6102, "step": 4812 }, { "epoch": 1.101076221647348, "grad_norm": 0.5444745651361085, "learning_rate": 2.213089495504597e-05, "loss": 3.6737, "step": 4814 }, { "epoch": 1.1015335801163406, "grad_norm": 0.5080703667505495, "learning_rate": 2.2112496761669483e-05, "loss": 3.7037, "step": 4816 }, { "epoch": 1.101990938585333, "grad_norm": 0.7171934522327763, "learning_rate": 2.2094100153142634e-05, "loss": 3.7236, "step": 4818 }, { "epoch": 1.1024482970543257, "grad_norm": 0.45735979905036656, "learning_rate": 2.2075705139562705e-05, "loss": 3.6689, "step": 4820 }, { "epoch": 1.102905655523318, "grad_norm": 0.5495618483783455, "learning_rate": 2.2057311731026083e-05, "loss": 3.5677, "step": 4822 }, { "epoch": 1.1033630139923107, "grad_norm": 0.5096083358199656, "learning_rate": 2.2038919937628236e-05, "loss": 3.5772, "step": 4824 }, { "epoch": 1.1038203724613032, "grad_norm": 0.48266336057296433, "learning_rate": 2.20205297694638e-05, "loss": 3.5686, "step": 4826 }, { "epoch": 1.1042777309302958, "grad_norm": 0.4328147144467549, "learning_rate": 2.2002141236626504e-05, "loss": 3.5997, "step": 4828 }, { "epoch": 1.1047350893992882, "grad_norm": 0.3981916940959488, "learning_rate": 2.198375434920914e-05, "loss": 3.5149, "step": 4830 }, { "epoch": 1.1051924478682809, "grad_norm": 0.4970162292916201, "learning_rate": 2.196536911730365e-05, "loss": 3.6113, "step": 4832 }, { "epoch": 1.1056498063372733, "grad_norm": 0.4959019117320036, "learning_rate": 2.1946985551001055e-05, "loss": 3.4665, "step": 4834 }, { "epoch": 1.1061071648062657, "grad_norm": 0.5644920098133743, "learning_rate": 2.1928603660391426e-05, "loss": 3.5948, "step": 4836 }, { "epoch": 1.1065645232752583, "grad_norm": 0.4986836569982581, "learning_rate": 2.191022345556397e-05, "loss": 3.4605, "step": 4838 }, { "epoch": 1.1070218817442508, "grad_norm": 0.5884677930622262, "learning_rate": 2.189184494660691e-05, "loss": 3.5544, "step": 4840 }, { "epoch": 1.1074792402132434, "grad_norm": 0.4984192297764929, "learning_rate": 2.1873468143607574e-05, "loss": 3.5052, "step": 4842 }, { "epoch": 1.1079365986822358, "grad_norm": 0.46470710605288373, "learning_rate": 2.1855093056652373e-05, "loss": 3.5089, "step": 4844 }, { "epoch": 1.1083939571512285, "grad_norm": 0.44684054235177717, "learning_rate": 2.1836719695826716e-05, "loss": 3.5207, "step": 4846 }, { "epoch": 1.1088513156202209, "grad_norm": 0.5153866130122791, "learning_rate": 2.1818348071215117e-05, "loss": 3.641, "step": 4848 }, { "epoch": 1.1093086740892135, "grad_norm": 0.38661766657764046, "learning_rate": 2.179997819290113e-05, "loss": 3.6129, "step": 4850 }, { "epoch": 1.109766032558206, "grad_norm": 0.5146140968403796, "learning_rate": 2.178161007096731e-05, "loss": 3.5865, "step": 4852 }, { "epoch": 1.1102233910271986, "grad_norm": 0.43229628947287246, "learning_rate": 2.1763243715495296e-05, "loss": 3.7279, "step": 4854 }, { "epoch": 1.110680749496191, "grad_norm": 0.5623512601231685, "learning_rate": 2.1744879136565748e-05, "loss": 3.7006, "step": 4856 }, { "epoch": 1.1111381079651836, "grad_norm": 0.6128777555496727, "learning_rate": 2.1726516344258313e-05, "loss": 3.8373, "step": 4858 }, { "epoch": 1.111595466434176, "grad_norm": 0.4632182511053723, "learning_rate": 2.170815534865171e-05, "loss": 3.4641, "step": 4860 }, { "epoch": 1.1120528249031687, "grad_norm": 0.4995984014360312, "learning_rate": 2.168979615982365e-05, "loss": 3.7056, "step": 4862 }, { "epoch": 1.1125101833721611, "grad_norm": 0.4172627734736958, "learning_rate": 2.167143878785083e-05, "loss": 3.4312, "step": 4864 }, { "epoch": 1.1129675418411538, "grad_norm": 0.4527263861174326, "learning_rate": 2.1653083242808995e-05, "loss": 3.4786, "step": 4866 }, { "epoch": 1.1134249003101462, "grad_norm": 0.4357490583465434, "learning_rate": 2.163472953477284e-05, "loss": 3.4353, "step": 4868 }, { "epoch": 1.1138822587791388, "grad_norm": 0.47540521027281984, "learning_rate": 2.161637767381608e-05, "loss": 3.4529, "step": 4870 }, { "epoch": 1.1143396172481312, "grad_norm": 0.3788377955628856, "learning_rate": 2.159802767001143e-05, "loss": 3.495, "step": 4872 }, { "epoch": 1.1147969757171239, "grad_norm": 0.4381626934374628, "learning_rate": 2.1579679533430537e-05, "loss": 3.5797, "step": 4874 }, { "epoch": 1.1152543341861163, "grad_norm": 0.4723525687456671, "learning_rate": 2.1561333274144057e-05, "loss": 3.5892, "step": 4876 }, { "epoch": 1.115711692655109, "grad_norm": 0.4553352088213555, "learning_rate": 2.154298890222164e-05, "loss": 3.6007, "step": 4878 }, { "epoch": 1.1161690511241014, "grad_norm": 0.4631998662887061, "learning_rate": 2.152464642773183e-05, "loss": 3.5346, "step": 4880 }, { "epoch": 1.1166264095930938, "grad_norm": 0.42421586894537583, "learning_rate": 2.1506305860742197e-05, "loss": 3.5495, "step": 4882 }, { "epoch": 1.1170837680620864, "grad_norm": 0.4775208389767974, "learning_rate": 2.1487967211319245e-05, "loss": 3.5591, "step": 4884 }, { "epoch": 1.1175411265310788, "grad_norm": 0.41516411187735924, "learning_rate": 2.1469630489528388e-05, "loss": 3.6399, "step": 4886 }, { "epoch": 1.1179984850000715, "grad_norm": 0.4277562697568106, "learning_rate": 2.1451295705434028e-05, "loss": 3.5838, "step": 4888 }, { "epoch": 1.118455843469064, "grad_norm": 0.38479298528222616, "learning_rate": 2.1432962869099503e-05, "loss": 3.5806, "step": 4890 }, { "epoch": 1.1189132019380565, "grad_norm": 0.5129534262379408, "learning_rate": 2.1414631990587035e-05, "loss": 3.6027, "step": 4892 }, { "epoch": 1.119370560407049, "grad_norm": 0.4656987061504488, "learning_rate": 2.1396303079957832e-05, "loss": 3.5728, "step": 4894 }, { "epoch": 1.1198279188760416, "grad_norm": 0.6915099933616555, "learning_rate": 2.137797614727197e-05, "loss": 3.5515, "step": 4896 }, { "epoch": 1.120285277345034, "grad_norm": 0.5532543311348169, "learning_rate": 2.1359651202588472e-05, "loss": 3.5677, "step": 4898 }, { "epoch": 1.1207426358140267, "grad_norm": 0.3979163033382228, "learning_rate": 2.134132825596527e-05, "loss": 3.6212, "step": 4900 }, { "epoch": 1.121199994283019, "grad_norm": 0.40970197679677295, "learning_rate": 2.1323007317459168e-05, "loss": 3.5795, "step": 4902 }, { "epoch": 1.1216573527520117, "grad_norm": 0.384869653416238, "learning_rate": 2.1304688397125895e-05, "loss": 3.6135, "step": 4904 }, { "epoch": 1.1221147112210041, "grad_norm": 0.4531342735656986, "learning_rate": 2.1286371505020092e-05, "loss": 3.6715, "step": 4906 }, { "epoch": 1.1225720696899968, "grad_norm": 0.41410350596512796, "learning_rate": 2.1268056651195227e-05, "loss": 3.3813, "step": 4908 }, { "epoch": 1.1230294281589892, "grad_norm": 0.38162850211342403, "learning_rate": 2.124974384570369e-05, "loss": 3.5778, "step": 4910 }, { "epoch": 1.1234867866279818, "grad_norm": 0.4596675901166249, "learning_rate": 2.1231433098596768e-05, "loss": 3.6024, "step": 4912 }, { "epoch": 1.1239441450969743, "grad_norm": 0.46585014135795233, "learning_rate": 2.121312441992455e-05, "loss": 3.68, "step": 4914 }, { "epoch": 1.124401503565967, "grad_norm": 0.5202434505433509, "learning_rate": 2.119481781973606e-05, "loss": 3.6104, "step": 4916 }, { "epoch": 1.1248588620349593, "grad_norm": 0.4681348419937417, "learning_rate": 2.117651330807915e-05, "loss": 3.4272, "step": 4918 }, { "epoch": 1.1253162205039517, "grad_norm": 0.5035588449376511, "learning_rate": 2.1158210895000502e-05, "loss": 3.6004, "step": 4920 }, { "epoch": 1.1257735789729444, "grad_norm": 0.48256699699773853, "learning_rate": 2.1139910590545704e-05, "loss": 3.3679, "step": 4922 }, { "epoch": 1.126230937441937, "grad_norm": 0.5312765619706004, "learning_rate": 2.1121612404759126e-05, "loss": 3.628, "step": 4924 }, { "epoch": 1.1266882959109294, "grad_norm": 0.44226453265156473, "learning_rate": 2.110331634768401e-05, "loss": 3.7015, "step": 4926 }, { "epoch": 1.1271456543799219, "grad_norm": 0.4400449031978945, "learning_rate": 2.108502242936244e-05, "loss": 3.5073, "step": 4928 }, { "epoch": 1.1276030128489145, "grad_norm": 0.4750873686097116, "learning_rate": 2.106673065983529e-05, "loss": 3.5903, "step": 4930 }, { "epoch": 1.128060371317907, "grad_norm": 0.4957736176373071, "learning_rate": 2.104844104914227e-05, "loss": 3.5462, "step": 4932 }, { "epoch": 1.1285177297868996, "grad_norm": 0.4446282232402044, "learning_rate": 2.1030153607321938e-05, "loss": 3.5739, "step": 4934 }, { "epoch": 1.128975088255892, "grad_norm": 0.37746073092281573, "learning_rate": 2.1011868344411596e-05, "loss": 3.718, "step": 4936 }, { "epoch": 1.1294324467248846, "grad_norm": 0.5650736424044033, "learning_rate": 2.09935852704474e-05, "loss": 3.452, "step": 4938 }, { "epoch": 1.129889805193877, "grad_norm": 0.524018036895915, "learning_rate": 2.0975304395464304e-05, "loss": 3.5807, "step": 4940 }, { "epoch": 1.1303471636628697, "grad_norm": 0.37344578689893326, "learning_rate": 2.0957025729496017e-05, "loss": 3.4562, "step": 4942 }, { "epoch": 1.130804522131862, "grad_norm": 0.5015811655249425, "learning_rate": 2.0938749282575078e-05, "loss": 3.6744, "step": 4944 }, { "epoch": 1.1312618806008548, "grad_norm": 0.4220409949772455, "learning_rate": 2.0920475064732794e-05, "loss": 3.707, "step": 4946 }, { "epoch": 1.1317192390698472, "grad_norm": 0.5106500429434867, "learning_rate": 2.0902203085999224e-05, "loss": 3.5503, "step": 4948 }, { "epoch": 1.1321765975388398, "grad_norm": 0.38110977157941867, "learning_rate": 2.088393335640323e-05, "loss": 3.5846, "step": 4950 }, { "epoch": 1.1326339560078322, "grad_norm": 0.4032906251192298, "learning_rate": 2.0865665885972444e-05, "loss": 3.5893, "step": 4952 }, { "epoch": 1.1330913144768249, "grad_norm": 0.5410724706428417, "learning_rate": 2.0847400684733208e-05, "loss": 3.893, "step": 4954 }, { "epoch": 1.1335486729458173, "grad_norm": 0.6159954873266519, "learning_rate": 2.0829137762710693e-05, "loss": 3.6201, "step": 4956 }, { "epoch": 1.13400603141481, "grad_norm": 0.43964946175603803, "learning_rate": 2.0810877129928747e-05, "loss": 3.6595, "step": 4958 }, { "epoch": 1.1344633898838024, "grad_norm": 0.6040973020613638, "learning_rate": 2.079261879641e-05, "loss": 3.6764, "step": 4960 }, { "epoch": 1.134920748352795, "grad_norm": 0.553595394613357, "learning_rate": 2.0774362772175833e-05, "loss": 3.611, "step": 4962 }, { "epoch": 1.1353781068217874, "grad_norm": 0.57787949272223, "learning_rate": 2.0756109067246312e-05, "loss": 3.6754, "step": 4964 }, { "epoch": 1.1358354652907798, "grad_norm": 0.6338628599583698, "learning_rate": 2.073785769164027e-05, "loss": 3.7436, "step": 4966 }, { "epoch": 1.1362928237597725, "grad_norm": 0.4801689546234419, "learning_rate": 2.0719608655375262e-05, "loss": 3.5159, "step": 4968 }, { "epoch": 1.136750182228765, "grad_norm": 0.38949803865303734, "learning_rate": 2.0701361968467532e-05, "loss": 3.572, "step": 4970 }, { "epoch": 1.1372075406977575, "grad_norm": 0.46351091054257715, "learning_rate": 2.068311764093204e-05, "loss": 3.5185, "step": 4972 }, { "epoch": 1.13766489916675, "grad_norm": 0.4929076330685147, "learning_rate": 2.0664875682782492e-05, "loss": 3.6229, "step": 4974 }, { "epoch": 1.1381222576357426, "grad_norm": 0.520893147530744, "learning_rate": 2.0646636104031223e-05, "loss": 3.3895, "step": 4976 }, { "epoch": 1.138579616104735, "grad_norm": 0.5129135394007496, "learning_rate": 2.0628398914689326e-05, "loss": 3.5893, "step": 4978 }, { "epoch": 1.1390369745737277, "grad_norm": 0.37070334125990834, "learning_rate": 2.0610164124766563e-05, "loss": 3.6013, "step": 4980 }, { "epoch": 1.13949433304272, "grad_norm": 0.4417344451426038, "learning_rate": 2.0591931744271344e-05, "loss": 3.3871, "step": 4982 }, { "epoch": 1.1399516915117127, "grad_norm": 0.4539953948577201, "learning_rate": 2.0573701783210816e-05, "loss": 3.596, "step": 4984 }, { "epoch": 1.1404090499807051, "grad_norm": 0.520710860793736, "learning_rate": 2.0555474251590746e-05, "loss": 3.6136, "step": 4986 }, { "epoch": 1.1408664084496978, "grad_norm": 0.44512872082580696, "learning_rate": 2.0537249159415593e-05, "loss": 3.5073, "step": 4988 }, { "epoch": 1.1413237669186902, "grad_norm": 0.6449520471401539, "learning_rate": 2.0519026516688494e-05, "loss": 3.3711, "step": 4990 }, { "epoch": 1.1417811253876828, "grad_norm": 0.4514095291488662, "learning_rate": 2.0500806333411194e-05, "loss": 3.5424, "step": 4992 }, { "epoch": 1.1422384838566753, "grad_norm": 0.4914992682596078, "learning_rate": 2.048258861958412e-05, "loss": 3.4878, "step": 4994 }, { "epoch": 1.142695842325668, "grad_norm": 0.4471746127219642, "learning_rate": 2.0464373385206362e-05, "loss": 3.5755, "step": 4996 }, { "epoch": 1.1431532007946603, "grad_norm": 0.41396292802781803, "learning_rate": 2.0446160640275598e-05, "loss": 3.6245, "step": 4998 }, { "epoch": 1.143610559263653, "grad_norm": 0.5066645337936198, "learning_rate": 2.042795039478817e-05, "loss": 3.602, "step": 5000 }, { "epoch": 1.1440679177326454, "grad_norm": 0.3755228847636335, "learning_rate": 2.0409742658739072e-05, "loss": 3.556, "step": 5002 }, { "epoch": 1.1445252762016378, "grad_norm": 0.5239177322835973, "learning_rate": 2.0391537442121868e-05, "loss": 3.5736, "step": 5004 }, { "epoch": 1.1449826346706304, "grad_norm": 0.4920985126280637, "learning_rate": 2.0373334754928766e-05, "loss": 3.5818, "step": 5006 }, { "epoch": 1.145439993139623, "grad_norm": 0.4980102375990898, "learning_rate": 2.0355134607150612e-05, "loss": 3.5622, "step": 5008 }, { "epoch": 1.1458973516086155, "grad_norm": 0.4764093559466664, "learning_rate": 2.0336937008776792e-05, "loss": 3.5784, "step": 5010 }, { "epoch": 1.146354710077608, "grad_norm": 0.4222908266576998, "learning_rate": 2.0318741969795368e-05, "loss": 3.6848, "step": 5012 }, { "epoch": 1.1468120685466006, "grad_norm": 0.475374272613009, "learning_rate": 2.0300549500192936e-05, "loss": 3.5795, "step": 5014 }, { "epoch": 1.147269427015593, "grad_norm": 0.4841517272237649, "learning_rate": 2.0282359609954712e-05, "loss": 3.6169, "step": 5016 }, { "epoch": 1.1477267854845856, "grad_norm": 0.3956616514563504, "learning_rate": 2.026417230906451e-05, "loss": 3.5885, "step": 5018 }, { "epoch": 1.148184143953578, "grad_norm": 0.5439464082826709, "learning_rate": 2.024598760750468e-05, "loss": 3.5344, "step": 5020 }, { "epoch": 1.1486415024225707, "grad_norm": 0.38123189851907174, "learning_rate": 2.0227805515256173e-05, "loss": 3.6467, "step": 5022 }, { "epoch": 1.149098860891563, "grad_norm": 0.4554368925649383, "learning_rate": 2.020962604229853e-05, "loss": 3.522, "step": 5024 }, { "epoch": 1.1495562193605557, "grad_norm": 0.4685422277920717, "learning_rate": 2.0191449198609797e-05, "loss": 3.5444, "step": 5026 }, { "epoch": 1.1500135778295482, "grad_norm": 0.5567090444176506, "learning_rate": 2.0173274994166613e-05, "loss": 3.5842, "step": 5028 }, { "epoch": 1.1504709362985408, "grad_norm": 0.4337680397624681, "learning_rate": 2.015510343894419e-05, "loss": 3.5125, "step": 5030 }, { "epoch": 1.1509282947675332, "grad_norm": 0.45191263925857644, "learning_rate": 2.0136934542916227e-05, "loss": 3.6947, "step": 5032 }, { "epoch": 1.1513856532365259, "grad_norm": 0.4316506535672618, "learning_rate": 2.011876831605501e-05, "loss": 3.6088, "step": 5034 }, { "epoch": 1.1518430117055183, "grad_norm": 0.6208667220241012, "learning_rate": 2.010060476833136e-05, "loss": 3.4763, "step": 5036 }, { "epoch": 1.152300370174511, "grad_norm": 0.5026363665555613, "learning_rate": 2.0082443909714592e-05, "loss": 3.5665, "step": 5038 }, { "epoch": 1.1527577286435033, "grad_norm": 0.4752506742828819, "learning_rate": 2.0064285750172584e-05, "loss": 3.6675, "step": 5040 }, { "epoch": 1.153215087112496, "grad_norm": 0.4532990614591216, "learning_rate": 2.0046130299671698e-05, "loss": 3.3932, "step": 5042 }, { "epoch": 1.1536724455814884, "grad_norm": 0.397710604597067, "learning_rate": 2.0027977568176832e-05, "loss": 3.5327, "step": 5044 }, { "epoch": 1.154129804050481, "grad_norm": 0.4412967917511769, "learning_rate": 2.0009827565651407e-05, "loss": 3.545, "step": 5046 }, { "epoch": 1.1545871625194735, "grad_norm": 0.4697682130235734, "learning_rate": 1.9991680302057294e-05, "loss": 3.4696, "step": 5048 }, { "epoch": 1.1550445209884659, "grad_norm": 0.430081395025061, "learning_rate": 1.9973535787354906e-05, "loss": 3.6421, "step": 5050 }, { "epoch": 1.1555018794574585, "grad_norm": 0.48029766293147125, "learning_rate": 1.9955394031503146e-05, "loss": 3.6525, "step": 5052 }, { "epoch": 1.155959237926451, "grad_norm": 0.4585408766775968, "learning_rate": 1.993725504445937e-05, "loss": 3.6058, "step": 5054 }, { "epoch": 1.1564165963954436, "grad_norm": 0.4509562517947735, "learning_rate": 1.9919118836179436e-05, "loss": 3.507, "step": 5056 }, { "epoch": 1.156873954864436, "grad_norm": 0.46456140871303425, "learning_rate": 1.9900985416617696e-05, "loss": 3.4728, "step": 5058 }, { "epoch": 1.1573313133334286, "grad_norm": 0.45773738531297026, "learning_rate": 1.9882854795726925e-05, "loss": 3.6854, "step": 5060 }, { "epoch": 1.157788671802421, "grad_norm": 0.5548014456612551, "learning_rate": 1.9864726983458392e-05, "loss": 3.4583, "step": 5062 }, { "epoch": 1.1582460302714137, "grad_norm": 0.5144747417429839, "learning_rate": 1.9846601989761848e-05, "loss": 3.4847, "step": 5064 }, { "epoch": 1.1587033887404061, "grad_norm": 0.5328355577239682, "learning_rate": 1.982847982458543e-05, "loss": 3.6321, "step": 5066 }, { "epoch": 1.1591607472093988, "grad_norm": 0.5311058634279724, "learning_rate": 1.9810360497875788e-05, "loss": 3.4238, "step": 5068 }, { "epoch": 1.1596181056783912, "grad_norm": 0.439814474103844, "learning_rate": 1.979224401957797e-05, "loss": 3.5123, "step": 5070 }, { "epoch": 1.1600754641473838, "grad_norm": 0.45338631362724063, "learning_rate": 1.977413039963548e-05, "loss": 3.6437, "step": 5072 }, { "epoch": 1.1605328226163762, "grad_norm": 0.48211171917553297, "learning_rate": 1.9756019647990275e-05, "loss": 3.6991, "step": 5074 }, { "epoch": 1.1609901810853689, "grad_norm": 0.456153793512326, "learning_rate": 1.9737911774582685e-05, "loss": 3.642, "step": 5076 }, { "epoch": 1.1614475395543613, "grad_norm": 0.4366482208568653, "learning_rate": 1.9719806789351498e-05, "loss": 3.6701, "step": 5078 }, { "epoch": 1.161904898023354, "grad_norm": 0.45800999232182166, "learning_rate": 1.970170470223392e-05, "loss": 3.6297, "step": 5080 }, { "epoch": 1.1623622564923464, "grad_norm": 0.47909910478984025, "learning_rate": 1.968360552316554e-05, "loss": 3.7526, "step": 5082 }, { "epoch": 1.162819614961339, "grad_norm": 0.47884791767268664, "learning_rate": 1.966550926208036e-05, "loss": 3.4246, "step": 5084 }, { "epoch": 1.1632769734303314, "grad_norm": 0.4487970279979814, "learning_rate": 1.9647415928910813e-05, "loss": 3.632, "step": 5086 }, { "epoch": 1.1637343318993238, "grad_norm": 0.42276736648008584, "learning_rate": 1.9629325533587667e-05, "loss": 3.7687, "step": 5088 }, { "epoch": 1.1641916903683165, "grad_norm": 0.519900255448766, "learning_rate": 1.9611238086040116e-05, "loss": 3.5975, "step": 5090 }, { "epoch": 1.1646490488373091, "grad_norm": 0.5627249956494589, "learning_rate": 1.959315359619575e-05, "loss": 3.6793, "step": 5092 }, { "epoch": 1.1651064073063016, "grad_norm": 0.48644910785931617, "learning_rate": 1.9575072073980483e-05, "loss": 3.7403, "step": 5094 }, { "epoch": 1.165563765775294, "grad_norm": 0.51210627551272, "learning_rate": 1.9556993529318644e-05, "loss": 3.5184, "step": 5096 }, { "epoch": 1.1660211242442866, "grad_norm": 0.6159078635600548, "learning_rate": 1.9538917972132924e-05, "loss": 3.573, "step": 5098 }, { "epoch": 1.166478482713279, "grad_norm": 0.452810605915918, "learning_rate": 1.952084541234435e-05, "loss": 3.5957, "step": 5100 }, { "epoch": 1.1669358411822717, "grad_norm": 0.4953965070706457, "learning_rate": 1.950277585987233e-05, "loss": 3.5534, "step": 5102 }, { "epoch": 1.167393199651264, "grad_norm": 0.5363632568499834, "learning_rate": 1.94847093246346e-05, "loss": 3.6606, "step": 5104 }, { "epoch": 1.1678505581202567, "grad_norm": 0.427860502694519, "learning_rate": 1.946664581654725e-05, "loss": 3.452, "step": 5106 }, { "epoch": 1.1683079165892492, "grad_norm": 0.4582580663550682, "learning_rate": 1.9448585345524727e-05, "loss": 3.5154, "step": 5108 }, { "epoch": 1.1687652750582418, "grad_norm": 0.5425853336039373, "learning_rate": 1.9430527921479768e-05, "loss": 3.5777, "step": 5110 }, { "epoch": 1.1692226335272342, "grad_norm": 0.46997720292786155, "learning_rate": 1.9412473554323467e-05, "loss": 3.5713, "step": 5112 }, { "epoch": 1.1696799919962269, "grad_norm": 0.46937259018199395, "learning_rate": 1.9394422253965264e-05, "loss": 3.5919, "step": 5114 }, { "epoch": 1.1701373504652193, "grad_norm": 0.5516804598709898, "learning_rate": 1.9376374030312848e-05, "loss": 3.4412, "step": 5116 }, { "epoch": 1.170594708934212, "grad_norm": 0.4982303356284815, "learning_rate": 1.9358328893272273e-05, "loss": 3.6157, "step": 5118 }, { "epoch": 1.1710520674032043, "grad_norm": 0.4913751013121771, "learning_rate": 1.9340286852747902e-05, "loss": 3.729, "step": 5120 }, { "epoch": 1.171509425872197, "grad_norm": 0.43228412146667, "learning_rate": 1.9322247918642354e-05, "loss": 3.6179, "step": 5122 }, { "epoch": 1.1719667843411894, "grad_norm": 0.44903351655045975, "learning_rate": 1.930421210085658e-05, "loss": 3.5993, "step": 5124 }, { "epoch": 1.172424142810182, "grad_norm": 0.47608584658776604, "learning_rate": 1.9286179409289824e-05, "loss": 3.6463, "step": 5126 }, { "epoch": 1.1728815012791745, "grad_norm": 0.4631581076861507, "learning_rate": 1.9268149853839575e-05, "loss": 3.6299, "step": 5128 }, { "epoch": 1.173338859748167, "grad_norm": 0.4547769917252239, "learning_rate": 1.925012344440165e-05, "loss": 3.5662, "step": 5130 }, { "epoch": 1.1737962182171595, "grad_norm": 0.5274421600070844, "learning_rate": 1.9232100190870094e-05, "loss": 3.8253, "step": 5132 }, { "epoch": 1.174253576686152, "grad_norm": 0.46809067451316805, "learning_rate": 1.9214080103137254e-05, "loss": 3.6535, "step": 5134 }, { "epoch": 1.1747109351551446, "grad_norm": 0.4936427597488999, "learning_rate": 1.9196063191093733e-05, "loss": 3.7175, "step": 5136 }, { "epoch": 1.1751682936241372, "grad_norm": 0.5231437997846704, "learning_rate": 1.917804946462837e-05, "loss": 3.5486, "step": 5138 }, { "epoch": 1.1756256520931296, "grad_norm": 0.4790025841308764, "learning_rate": 1.916003893362827e-05, "loss": 3.5642, "step": 5140 }, { "epoch": 1.176083010562122, "grad_norm": 0.48615621875954834, "learning_rate": 1.9142031607978805e-05, "loss": 3.4491, "step": 5142 }, { "epoch": 1.1765403690311147, "grad_norm": 0.44161837046851793, "learning_rate": 1.9124027497563544e-05, "loss": 3.5658, "step": 5144 }, { "epoch": 1.1769977275001071, "grad_norm": 0.45883733139012123, "learning_rate": 1.9106026612264316e-05, "loss": 3.6105, "step": 5146 }, { "epoch": 1.1774550859690998, "grad_norm": 0.49644303228491116, "learning_rate": 1.90880289619612e-05, "loss": 3.5245, "step": 5148 }, { "epoch": 1.1779124444380922, "grad_norm": 0.5529135693269555, "learning_rate": 1.9070034556532456e-05, "loss": 3.5414, "step": 5150 }, { "epoch": 1.1783698029070848, "grad_norm": 0.4149440162883647, "learning_rate": 1.9052043405854584e-05, "loss": 3.6826, "step": 5152 }, { "epoch": 1.1788271613760772, "grad_norm": 0.4561510004780452, "learning_rate": 1.9034055519802322e-05, "loss": 3.5228, "step": 5154 }, { "epoch": 1.1792845198450699, "grad_norm": 0.43783549660210097, "learning_rate": 1.901607090824856e-05, "loss": 3.8124, "step": 5156 }, { "epoch": 1.1797418783140623, "grad_norm": 0.5467225543486963, "learning_rate": 1.8998089581064453e-05, "loss": 3.3958, "step": 5158 }, { "epoch": 1.180199236783055, "grad_norm": 0.4555261722963931, "learning_rate": 1.8980111548119294e-05, "loss": 3.527, "step": 5160 }, { "epoch": 1.1806565952520474, "grad_norm": 0.493187116329579, "learning_rate": 1.896213681928062e-05, "loss": 3.6296, "step": 5162 }, { "epoch": 1.18111395372104, "grad_norm": 0.45583374095191836, "learning_rate": 1.8944165404414124e-05, "loss": 3.5498, "step": 5164 }, { "epoch": 1.1815713121900324, "grad_norm": 0.3681568580145138, "learning_rate": 1.8926197313383685e-05, "loss": 3.4995, "step": 5166 }, { "epoch": 1.182028670659025, "grad_norm": 0.5036573756518338, "learning_rate": 1.890823255605136e-05, "loss": 3.5896, "step": 5168 }, { "epoch": 1.1824860291280175, "grad_norm": 0.5109390258766278, "learning_rate": 1.889027114227739e-05, "loss": 3.3093, "step": 5170 }, { "epoch": 1.18294338759701, "grad_norm": 0.43708485252448487, "learning_rate": 1.8872313081920152e-05, "loss": 3.5823, "step": 5172 }, { "epoch": 1.1834007460660025, "grad_norm": 0.45073145057458686, "learning_rate": 1.8854358384836194e-05, "loss": 3.5497, "step": 5174 }, { "epoch": 1.1838581045349952, "grad_norm": 0.43592291366621316, "learning_rate": 1.883640706088025e-05, "loss": 3.5513, "step": 5176 }, { "epoch": 1.1843154630039876, "grad_norm": 0.46627147586749207, "learning_rate": 1.8818459119905144e-05, "loss": 3.4647, "step": 5178 }, { "epoch": 1.18477282147298, "grad_norm": 0.6504109926442613, "learning_rate": 1.8800514571761882e-05, "loss": 3.5384, "step": 5180 }, { "epoch": 1.1852301799419727, "grad_norm": 0.4820368230117307, "learning_rate": 1.878257342629962e-05, "loss": 3.6745, "step": 5182 }, { "epoch": 1.185687538410965, "grad_norm": 0.5959989469842121, "learning_rate": 1.8764635693365594e-05, "loss": 3.5819, "step": 5184 }, { "epoch": 1.1861448968799577, "grad_norm": 0.5260096877043596, "learning_rate": 1.8746701382805225e-05, "loss": 3.5486, "step": 5186 }, { "epoch": 1.1866022553489501, "grad_norm": 0.42722450419782315, "learning_rate": 1.8728770504462016e-05, "loss": 3.6225, "step": 5188 }, { "epoch": 1.1870596138179428, "grad_norm": 0.42148128434144916, "learning_rate": 1.87108430681776e-05, "loss": 3.3376, "step": 5190 }, { "epoch": 1.1875169722869352, "grad_norm": 0.5630069673320525, "learning_rate": 1.8692919083791725e-05, "loss": 3.4983, "step": 5192 }, { "epoch": 1.1879743307559278, "grad_norm": 0.5104731768511597, "learning_rate": 1.867499856114224e-05, "loss": 3.4542, "step": 5194 }, { "epoch": 1.1884316892249203, "grad_norm": 0.39118756792464354, "learning_rate": 1.865708151006509e-05, "loss": 3.7181, "step": 5196 }, { "epoch": 1.188889047693913, "grad_norm": 0.5301036246680454, "learning_rate": 1.863916794039432e-05, "loss": 3.7218, "step": 5198 }, { "epoch": 1.1893464061629053, "grad_norm": 0.41417991125138004, "learning_rate": 1.8621257861962062e-05, "loss": 3.6275, "step": 5200 }, { "epoch": 1.189803764631898, "grad_norm": 0.3763840881892339, "learning_rate": 1.860335128459853e-05, "loss": 3.34, "step": 5202 }, { "epoch": 1.1902611231008904, "grad_norm": 0.5036710441562385, "learning_rate": 1.8585448218132036e-05, "loss": 3.5982, "step": 5204 }, { "epoch": 1.190718481569883, "grad_norm": 0.35115658320483867, "learning_rate": 1.8567548672388922e-05, "loss": 3.5512, "step": 5206 }, { "epoch": 1.1911758400388754, "grad_norm": 0.4912209784693028, "learning_rate": 1.8549652657193634e-05, "loss": 3.6994, "step": 5208 }, { "epoch": 1.191633198507868, "grad_norm": 0.5128436936509839, "learning_rate": 1.8531760182368685e-05, "loss": 3.6254, "step": 5210 }, { "epoch": 1.1920905569768605, "grad_norm": 0.4734710102371027, "learning_rate": 1.8513871257734606e-05, "loss": 3.6239, "step": 5212 }, { "epoch": 1.1925479154458531, "grad_norm": 0.48006299571570543, "learning_rate": 1.8495985893110018e-05, "loss": 3.4603, "step": 5214 }, { "epoch": 1.1930052739148456, "grad_norm": 0.41700737454276504, "learning_rate": 1.847810409831156e-05, "loss": 3.5026, "step": 5216 }, { "epoch": 1.193462632383838, "grad_norm": 0.4110051407643348, "learning_rate": 1.846022588315393e-05, "loss": 3.4609, "step": 5218 }, { "epoch": 1.1939199908528306, "grad_norm": 0.5118370792935444, "learning_rate": 1.8442351257449868e-05, "loss": 3.6293, "step": 5220 }, { "epoch": 1.1943773493218233, "grad_norm": 0.496965468465826, "learning_rate": 1.842448023101011e-05, "loss": 3.5566, "step": 5222 }, { "epoch": 1.1948347077908157, "grad_norm": 0.41702901065663966, "learning_rate": 1.8406612813643454e-05, "loss": 3.6107, "step": 5224 }, { "epoch": 1.195292066259808, "grad_norm": 0.47181885997969375, "learning_rate": 1.83887490151567e-05, "loss": 3.5484, "step": 5226 }, { "epoch": 1.1957494247288007, "grad_norm": 0.628691879604583, "learning_rate": 1.8370888845354654e-05, "loss": 3.5994, "step": 5228 }, { "epoch": 1.1962067831977932, "grad_norm": 0.5531201044501779, "learning_rate": 1.835303231404014e-05, "loss": 3.5572, "step": 5230 }, { "epoch": 1.1966641416667858, "grad_norm": 0.46498887817864415, "learning_rate": 1.8335179431014002e-05, "loss": 3.7491, "step": 5232 }, { "epoch": 1.1971215001357782, "grad_norm": 0.42347630608020176, "learning_rate": 1.831733020607504e-05, "loss": 3.5004, "step": 5234 }, { "epoch": 1.1975788586047709, "grad_norm": 0.43480370929687834, "learning_rate": 1.8299484649020076e-05, "loss": 3.6017, "step": 5236 }, { "epoch": 1.1980362170737633, "grad_norm": 0.637080996388286, "learning_rate": 1.8281642769643937e-05, "loss": 3.4212, "step": 5238 }, { "epoch": 1.198493575542756, "grad_norm": 0.5080570805706739, "learning_rate": 1.8263804577739375e-05, "loss": 3.5399, "step": 5240 }, { "epoch": 1.1989509340117483, "grad_norm": 0.45866863672578206, "learning_rate": 1.8245970083097157e-05, "loss": 3.6287, "step": 5242 }, { "epoch": 1.199408292480741, "grad_norm": 0.4699381076358804, "learning_rate": 1.8228139295506037e-05, "loss": 3.5267, "step": 5244 }, { "epoch": 1.1998656509497334, "grad_norm": 0.598892379667803, "learning_rate": 1.8210312224752685e-05, "loss": 3.4946, "step": 5246 }, { "epoch": 1.200323009418726, "grad_norm": 0.44073135972594996, "learning_rate": 1.819248888062177e-05, "loss": 3.4653, "step": 5248 }, { "epoch": 1.2007803678877185, "grad_norm": 0.4675486747371268, "learning_rate": 1.81746692728959e-05, "loss": 3.4487, "step": 5250 }, { "epoch": 1.201237726356711, "grad_norm": 0.5076221989805885, "learning_rate": 1.815685341135563e-05, "loss": 3.6159, "step": 5252 }, { "epoch": 1.2016950848257035, "grad_norm": 0.493825864999431, "learning_rate": 1.8139041305779476e-05, "loss": 3.7206, "step": 5254 }, { "epoch": 1.202152443294696, "grad_norm": 0.43368366099680294, "learning_rate": 1.8121232965943867e-05, "loss": 3.6497, "step": 5256 }, { "epoch": 1.2026098017636886, "grad_norm": 0.35216860975226527, "learning_rate": 1.810342840162319e-05, "loss": 3.4596, "step": 5258 }, { "epoch": 1.2030671602326812, "grad_norm": 0.49746330329563115, "learning_rate": 1.8085627622589747e-05, "loss": 3.5562, "step": 5260 }, { "epoch": 1.2035245187016737, "grad_norm": 0.48678175516880473, "learning_rate": 1.8067830638613757e-05, "loss": 3.5728, "step": 5262 }, { "epoch": 1.203981877170666, "grad_norm": 0.5963377277046392, "learning_rate": 1.8050037459463366e-05, "loss": 3.5706, "step": 5264 }, { "epoch": 1.2044392356396587, "grad_norm": 0.5411261710693771, "learning_rate": 1.803224809490463e-05, "loss": 3.3237, "step": 5266 }, { "epoch": 1.2048965941086511, "grad_norm": 0.5769715530252213, "learning_rate": 1.8014462554701505e-05, "loss": 3.5144, "step": 5268 }, { "epoch": 1.2053539525776438, "grad_norm": 0.5476760447104164, "learning_rate": 1.7996680848615854e-05, "loss": 3.6761, "step": 5270 }, { "epoch": 1.2058113110466362, "grad_norm": 0.5963707106737254, "learning_rate": 1.7978902986407452e-05, "loss": 3.6843, "step": 5272 }, { "epoch": 1.2062686695156288, "grad_norm": 0.48577093661290655, "learning_rate": 1.796112897783392e-05, "loss": 3.5071, "step": 5274 }, { "epoch": 1.2067260279846213, "grad_norm": 0.4000662443322203, "learning_rate": 1.794335883265081e-05, "loss": 3.6144, "step": 5276 }, { "epoch": 1.207183386453614, "grad_norm": 0.4188851800192103, "learning_rate": 1.7925592560611516e-05, "loss": 3.6298, "step": 5278 }, { "epoch": 1.2076407449226063, "grad_norm": 0.46659557838482824, "learning_rate": 1.7907830171467333e-05, "loss": 3.5048, "step": 5280 }, { "epoch": 1.208098103391599, "grad_norm": 0.4321572677012214, "learning_rate": 1.7890071674967424e-05, "loss": 3.6577, "step": 5282 }, { "epoch": 1.2085554618605914, "grad_norm": 0.3960271745518873, "learning_rate": 1.787231708085879e-05, "loss": 3.5902, "step": 5284 }, { "epoch": 1.209012820329584, "grad_norm": 0.46961133023294305, "learning_rate": 1.7854566398886325e-05, "loss": 3.5589, "step": 5286 }, { "epoch": 1.2094701787985764, "grad_norm": 0.5062495902730908, "learning_rate": 1.783681963879275e-05, "loss": 3.6595, "step": 5288 }, { "epoch": 1.209927537267569, "grad_norm": 0.4330299251349207, "learning_rate": 1.781907681031864e-05, "loss": 3.4022, "step": 5290 }, { "epoch": 1.2103848957365615, "grad_norm": 0.4897011976876315, "learning_rate": 1.7801337923202415e-05, "loss": 3.4965, "step": 5292 }, { "epoch": 1.2108422542055541, "grad_norm": 0.5904741518450373, "learning_rate": 1.7783602987180338e-05, "loss": 3.3423, "step": 5294 }, { "epoch": 1.2112996126745466, "grad_norm": 0.5083302719253324, "learning_rate": 1.7765872011986484e-05, "loss": 3.5203, "step": 5296 }, { "epoch": 1.2117569711435392, "grad_norm": 0.4171716987103417, "learning_rate": 1.7748145007352775e-05, "loss": 3.4962, "step": 5298 }, { "epoch": 1.2122143296125316, "grad_norm": 0.4793202233631763, "learning_rate": 1.7730421983008938e-05, "loss": 3.7669, "step": 5300 }, { "epoch": 1.212671688081524, "grad_norm": 0.4067554996871294, "learning_rate": 1.7712702948682525e-05, "loss": 3.5416, "step": 5302 }, { "epoch": 1.2131290465505167, "grad_norm": 0.5736894568536575, "learning_rate": 1.76949879140989e-05, "loss": 3.503, "step": 5304 }, { "epoch": 1.2135864050195093, "grad_norm": 0.5551420954782219, "learning_rate": 1.7677276888981212e-05, "loss": 3.4553, "step": 5306 }, { "epoch": 1.2140437634885017, "grad_norm": 0.567656958002331, "learning_rate": 1.7659569883050436e-05, "loss": 3.4735, "step": 5308 }, { "epoch": 1.2145011219574942, "grad_norm": 0.4788896772494813, "learning_rate": 1.764186690602533e-05, "loss": 3.462, "step": 5310 }, { "epoch": 1.2149584804264868, "grad_norm": 0.49111669102681854, "learning_rate": 1.7624167967622436e-05, "loss": 3.4444, "step": 5312 }, { "epoch": 1.2154158388954792, "grad_norm": 0.4230808931236131, "learning_rate": 1.7606473077556076e-05, "loss": 3.5379, "step": 5314 }, { "epoch": 1.2158731973644719, "grad_norm": 0.6045149150267317, "learning_rate": 1.7588782245538372e-05, "loss": 3.6166, "step": 5316 }, { "epoch": 1.2163305558334643, "grad_norm": 0.3927964509645845, "learning_rate": 1.7571095481279192e-05, "loss": 3.3798, "step": 5318 }, { "epoch": 1.216787914302457, "grad_norm": 0.40054139810786793, "learning_rate": 1.7553412794486184e-05, "loss": 3.4332, "step": 5320 }, { "epoch": 1.2172452727714493, "grad_norm": 0.5010062166178549, "learning_rate": 1.7535734194864766e-05, "loss": 3.4289, "step": 5322 }, { "epoch": 1.217702631240442, "grad_norm": 0.4989476155647745, "learning_rate": 1.7518059692118094e-05, "loss": 3.5333, "step": 5324 }, { "epoch": 1.2181599897094344, "grad_norm": 0.42882753494119247, "learning_rate": 1.750038929594709e-05, "loss": 3.5643, "step": 5326 }, { "epoch": 1.218617348178427, "grad_norm": 0.433359856282901, "learning_rate": 1.7482723016050418e-05, "loss": 3.4724, "step": 5328 }, { "epoch": 1.2190747066474195, "grad_norm": 0.37482792164568324, "learning_rate": 1.7465060862124476e-05, "loss": 3.5516, "step": 5330 }, { "epoch": 1.219532065116412, "grad_norm": 0.5652132558930499, "learning_rate": 1.7447402843863415e-05, "loss": 3.6543, "step": 5332 }, { "epoch": 1.2199894235854045, "grad_norm": 0.4011856373893596, "learning_rate": 1.7429748970959087e-05, "loss": 3.635, "step": 5334 }, { "epoch": 1.2204467820543972, "grad_norm": 0.4537771891757753, "learning_rate": 1.7412099253101094e-05, "loss": 3.5094, "step": 5336 }, { "epoch": 1.2209041405233896, "grad_norm": 0.44085838278551137, "learning_rate": 1.7394453699976758e-05, "loss": 3.5767, "step": 5338 }, { "epoch": 1.221361498992382, "grad_norm": 0.4878542902590196, "learning_rate": 1.73768123212711e-05, "loss": 3.531, "step": 5340 }, { "epoch": 1.2218188574613746, "grad_norm": 0.5101902972276359, "learning_rate": 1.7359175126666848e-05, "loss": 3.3448, "step": 5342 }, { "epoch": 1.2222762159303673, "grad_norm": 0.4870765626038108, "learning_rate": 1.7341542125844462e-05, "loss": 3.4453, "step": 5344 }, { "epoch": 1.2227335743993597, "grad_norm": 0.40650662239775415, "learning_rate": 1.732391332848206e-05, "loss": 3.5239, "step": 5346 }, { "epoch": 1.2231909328683521, "grad_norm": 0.45016630000634195, "learning_rate": 1.7306288744255482e-05, "loss": 3.3375, "step": 5348 }, { "epoch": 1.2236482913373448, "grad_norm": 0.47207241412994283, "learning_rate": 1.7288668382838253e-05, "loss": 3.5394, "step": 5350 }, { "epoch": 1.2241056498063372, "grad_norm": 0.4499173909762217, "learning_rate": 1.727105225390156e-05, "loss": 3.6032, "step": 5352 }, { "epoch": 1.2245630082753298, "grad_norm": 0.50931725203923, "learning_rate": 1.7253440367114294e-05, "loss": 3.5545, "step": 5354 }, { "epoch": 1.2250203667443222, "grad_norm": 0.4757255960119849, "learning_rate": 1.7235832732142997e-05, "loss": 3.4911, "step": 5356 }, { "epoch": 1.2254777252133149, "grad_norm": 0.4222360937641643, "learning_rate": 1.721822935865188e-05, "loss": 3.5235, "step": 5358 }, { "epoch": 1.2259350836823073, "grad_norm": 0.5253338694495081, "learning_rate": 1.720063025630283e-05, "loss": 3.6028, "step": 5360 }, { "epoch": 1.2263924421513, "grad_norm": 0.4247390345560719, "learning_rate": 1.7183035434755375e-05, "loss": 3.5229, "step": 5362 }, { "epoch": 1.2268498006202924, "grad_norm": 0.4646714315862229, "learning_rate": 1.7165444903666695e-05, "loss": 3.5066, "step": 5364 }, { "epoch": 1.227307159089285, "grad_norm": 0.469202363766247, "learning_rate": 1.714785867269163e-05, "loss": 3.6687, "step": 5366 }, { "epoch": 1.2277645175582774, "grad_norm": 0.5581998703516691, "learning_rate": 1.7130276751482632e-05, "loss": 3.5795, "step": 5368 }, { "epoch": 1.22822187602727, "grad_norm": 0.509140566840058, "learning_rate": 1.711269914968981e-05, "loss": 3.5753, "step": 5370 }, { "epoch": 1.2286792344962625, "grad_norm": 0.5111063430340284, "learning_rate": 1.7095125876960898e-05, "loss": 3.5723, "step": 5372 }, { "epoch": 1.2291365929652551, "grad_norm": 0.5095725442904006, "learning_rate": 1.7077556942941248e-05, "loss": 3.4341, "step": 5374 }, { "epoch": 1.2295939514342475, "grad_norm": 0.3938744620752611, "learning_rate": 1.7059992357273836e-05, "loss": 3.645, "step": 5376 }, { "epoch": 1.2300513099032402, "grad_norm": 0.4024340333844091, "learning_rate": 1.7042432129599252e-05, "loss": 3.403, "step": 5378 }, { "epoch": 1.2305086683722326, "grad_norm": 0.44512299638065306, "learning_rate": 1.7024876269555684e-05, "loss": 3.4236, "step": 5380 }, { "epoch": 1.2309660268412252, "grad_norm": 0.47054840647527235, "learning_rate": 1.7007324786778935e-05, "loss": 3.4642, "step": 5382 }, { "epoch": 1.2314233853102177, "grad_norm": 0.43044190504828556, "learning_rate": 1.6989777690902405e-05, "loss": 3.5618, "step": 5384 }, { "epoch": 1.23188074377921, "grad_norm": 0.4048481191666587, "learning_rate": 1.6972234991557073e-05, "loss": 3.3923, "step": 5386 }, { "epoch": 1.2323381022482027, "grad_norm": 0.42226533139877626, "learning_rate": 1.695469669837152e-05, "loss": 3.5506, "step": 5388 }, { "epoch": 1.2327954607171954, "grad_norm": 0.5855834654304888, "learning_rate": 1.6937162820971906e-05, "loss": 3.5965, "step": 5390 }, { "epoch": 1.2332528191861878, "grad_norm": 0.5652022357231651, "learning_rate": 1.691963336898195e-05, "loss": 3.5612, "step": 5392 }, { "epoch": 1.2337101776551802, "grad_norm": 0.566786907799838, "learning_rate": 1.6902108352022973e-05, "loss": 3.5476, "step": 5394 }, { "epoch": 1.2341675361241728, "grad_norm": 0.5131222355218684, "learning_rate": 1.688458777971383e-05, "loss": 3.5966, "step": 5396 }, { "epoch": 1.2346248945931653, "grad_norm": 0.5308713124261432, "learning_rate": 1.6867071661670958e-05, "loss": 3.7142, "step": 5398 }, { "epoch": 1.235082253062158, "grad_norm": 0.5668450070220067, "learning_rate": 1.6849560007508347e-05, "loss": 3.4733, "step": 5400 }, { "epoch": 1.2355396115311503, "grad_norm": 0.46567040356463507, "learning_rate": 1.683205282683752e-05, "loss": 3.5922, "step": 5402 }, { "epoch": 1.235996970000143, "grad_norm": 0.42252508162363445, "learning_rate": 1.6814550129267565e-05, "loss": 3.6143, "step": 5404 }, { "epoch": 1.2364543284691354, "grad_norm": 0.42405901829436476, "learning_rate": 1.6797051924405106e-05, "loss": 3.5172, "step": 5406 }, { "epoch": 1.236911686938128, "grad_norm": 0.50944979884924, "learning_rate": 1.677955822185428e-05, "loss": 3.5073, "step": 5408 }, { "epoch": 1.2373690454071204, "grad_norm": 0.4439017889167246, "learning_rate": 1.6762069031216785e-05, "loss": 3.4024, "step": 5410 }, { "epoch": 1.237826403876113, "grad_norm": 0.4587600809283746, "learning_rate": 1.6744584362091825e-05, "loss": 3.5035, "step": 5412 }, { "epoch": 1.2382837623451055, "grad_norm": 0.4003722209887568, "learning_rate": 1.672710422407611e-05, "loss": 3.4931, "step": 5414 }, { "epoch": 1.2387411208140982, "grad_norm": 0.381605522704547, "learning_rate": 1.6709628626763896e-05, "loss": 3.3722, "step": 5416 }, { "epoch": 1.2391984792830906, "grad_norm": 0.46856245421650694, "learning_rate": 1.669215757974692e-05, "loss": 3.4495, "step": 5418 }, { "epoch": 1.2396558377520832, "grad_norm": 0.5215762339635664, "learning_rate": 1.6674691092614427e-05, "loss": 3.5185, "step": 5420 }, { "epoch": 1.2401131962210756, "grad_norm": 0.4947561870369288, "learning_rate": 1.6657229174953164e-05, "loss": 3.4963, "step": 5422 }, { "epoch": 1.240570554690068, "grad_norm": 0.40435700268913505, "learning_rate": 1.6639771836347366e-05, "loss": 3.3916, "step": 5424 }, { "epoch": 1.2410279131590607, "grad_norm": 0.45307413999634477, "learning_rate": 1.6622319086378757e-05, "loss": 3.7069, "step": 5426 }, { "epoch": 1.2414852716280533, "grad_norm": 0.5028417121457592, "learning_rate": 1.6604870934626544e-05, "loss": 3.5635, "step": 5428 }, { "epoch": 1.2419426300970458, "grad_norm": 0.6172874224934546, "learning_rate": 1.6587427390667405e-05, "loss": 3.5313, "step": 5430 }, { "epoch": 1.2423999885660382, "grad_norm": 0.4973251498930154, "learning_rate": 1.6569988464075487e-05, "loss": 3.5731, "step": 5432 }, { "epoch": 1.2428573470350308, "grad_norm": 0.5505633092088974, "learning_rate": 1.6552554164422423e-05, "loss": 3.5803, "step": 5434 }, { "epoch": 1.2433147055040232, "grad_norm": 0.41910192590051176, "learning_rate": 1.6535124501277276e-05, "loss": 3.5555, "step": 5436 }, { "epoch": 1.2437720639730159, "grad_norm": 0.5220948549974634, "learning_rate": 1.6517699484206584e-05, "loss": 3.4051, "step": 5438 }, { "epoch": 1.2442294224420083, "grad_norm": 0.42028617138278346, "learning_rate": 1.6500279122774337e-05, "loss": 3.5514, "step": 5440 }, { "epoch": 1.244686780911001, "grad_norm": 0.4773470096508055, "learning_rate": 1.6482863426541953e-05, "loss": 3.4796, "step": 5442 }, { "epoch": 1.2451441393799934, "grad_norm": 0.5356263372344481, "learning_rate": 1.6465452405068305e-05, "loss": 3.4535, "step": 5444 }, { "epoch": 1.245601497848986, "grad_norm": 0.4618926255008054, "learning_rate": 1.6448046067909692e-05, "loss": 3.8844, "step": 5446 }, { "epoch": 1.2460588563179784, "grad_norm": 0.5092131737927245, "learning_rate": 1.643064442461984e-05, "loss": 3.6012, "step": 5448 }, { "epoch": 1.246516214786971, "grad_norm": 0.4328684195740673, "learning_rate": 1.6413247484749918e-05, "loss": 3.4728, "step": 5450 }, { "epoch": 1.2469735732559635, "grad_norm": 0.4087633732699101, "learning_rate": 1.6395855257848485e-05, "loss": 3.5861, "step": 5452 }, { "epoch": 1.2474309317249561, "grad_norm": 0.4295651521760435, "learning_rate": 1.6378467753461525e-05, "loss": 3.5576, "step": 5454 }, { "epoch": 1.2478882901939485, "grad_norm": 0.391913881639279, "learning_rate": 1.6361084981132447e-05, "loss": 3.5108, "step": 5456 }, { "epoch": 1.2483456486629412, "grad_norm": 0.5344014936426984, "learning_rate": 1.6343706950402026e-05, "loss": 3.6078, "step": 5458 }, { "epoch": 1.2488030071319336, "grad_norm": 0.4226513536208411, "learning_rate": 1.6326333670808475e-05, "loss": 3.5289, "step": 5460 }, { "epoch": 1.2492603656009262, "grad_norm": 0.4507439635936292, "learning_rate": 1.6308965151887373e-05, "loss": 3.5571, "step": 5462 }, { "epoch": 1.2497177240699187, "grad_norm": 0.3962002943855282, "learning_rate": 1.6291601403171692e-05, "loss": 3.39, "step": 5464 }, { "epoch": 1.2501750825389113, "grad_norm": 0.5361181927687068, "learning_rate": 1.6274242434191785e-05, "loss": 3.526, "step": 5466 }, { "epoch": 1.2506324410079037, "grad_norm": 0.40846433905962554, "learning_rate": 1.6256888254475393e-05, "loss": 3.3406, "step": 5468 }, { "epoch": 1.2510897994768961, "grad_norm": 0.48521888193489976, "learning_rate": 1.623953887354761e-05, "loss": 3.5266, "step": 5470 }, { "epoch": 1.2515471579458888, "grad_norm": 0.44667790874999397, "learning_rate": 1.6222194300930907e-05, "loss": 3.4981, "step": 5472 }, { "epoch": 1.2520045164148814, "grad_norm": 0.45252411562059397, "learning_rate": 1.6204854546145122e-05, "loss": 3.3163, "step": 5474 }, { "epoch": 1.2524618748838738, "grad_norm": 0.5319925616253435, "learning_rate": 1.6187519618707426e-05, "loss": 3.7199, "step": 5476 }, { "epoch": 1.2529192333528663, "grad_norm": 0.450013891641456, "learning_rate": 1.6170189528132367e-05, "loss": 3.5658, "step": 5478 }, { "epoch": 1.253376591821859, "grad_norm": 0.4921155755911593, "learning_rate": 1.615286428393182e-05, "loss": 3.5778, "step": 5480 }, { "epoch": 1.2538339502908515, "grad_norm": 0.5297499403258178, "learning_rate": 1.6135543895615006e-05, "loss": 3.4041, "step": 5482 }, { "epoch": 1.254291308759844, "grad_norm": 0.449687795817879, "learning_rate": 1.6118228372688494e-05, "loss": 3.435, "step": 5484 }, { "epoch": 1.2547486672288364, "grad_norm": 0.43339606237598755, "learning_rate": 1.610091772465615e-05, "loss": 3.5394, "step": 5486 }, { "epoch": 1.255206025697829, "grad_norm": 0.6400186879439623, "learning_rate": 1.6083611961019197e-05, "loss": 3.4168, "step": 5488 }, { "epoch": 1.2556633841668214, "grad_norm": 0.47731413741590767, "learning_rate": 1.606631109127616e-05, "loss": 3.5922, "step": 5490 }, { "epoch": 1.256120742635814, "grad_norm": 0.44303779871238247, "learning_rate": 1.604901512492288e-05, "loss": 3.4282, "step": 5492 }, { "epoch": 1.2565781011048065, "grad_norm": 0.5155375724423752, "learning_rate": 1.6031724071452515e-05, "loss": 3.5441, "step": 5494 }, { "epoch": 1.2570354595737991, "grad_norm": 0.45757303973344265, "learning_rate": 1.6014437940355515e-05, "loss": 3.4088, "step": 5496 }, { "epoch": 1.2574928180427916, "grad_norm": 0.4339403365321458, "learning_rate": 1.5997156741119635e-05, "loss": 3.5957, "step": 5498 }, { "epoch": 1.2579501765117842, "grad_norm": 0.39507545103677283, "learning_rate": 1.597988048322992e-05, "loss": 3.7054, "step": 5500 }, { "epoch": 1.2584075349807766, "grad_norm": 0.47372852420191536, "learning_rate": 1.5962609176168707e-05, "loss": 3.5999, "step": 5502 }, { "epoch": 1.2588648934497693, "grad_norm": 0.8425614851863498, "learning_rate": 1.594534282941561e-05, "loss": 3.557, "step": 5504 }, { "epoch": 1.2593222519187617, "grad_norm": 0.43718179983986216, "learning_rate": 1.592808145244753e-05, "loss": 3.5972, "step": 5506 }, { "epoch": 1.259779610387754, "grad_norm": 0.3769848271612805, "learning_rate": 1.591082505473863e-05, "loss": 3.4745, "step": 5508 }, { "epoch": 1.2602369688567467, "grad_norm": 0.6384219926098161, "learning_rate": 1.5893573645760337e-05, "loss": 3.4298, "step": 5510 }, { "epoch": 1.2606943273257394, "grad_norm": 0.4428499799277663, "learning_rate": 1.587632723498136e-05, "loss": 3.4069, "step": 5512 }, { "epoch": 1.2611516857947318, "grad_norm": 0.4973120260951915, "learning_rate": 1.585908583186764e-05, "loss": 3.5661, "step": 5514 }, { "epoch": 1.2616090442637242, "grad_norm": 0.579489695277189, "learning_rate": 1.5841849445882384e-05, "loss": 3.4764, "step": 5516 }, { "epoch": 1.2620664027327169, "grad_norm": 0.4772511337760379, "learning_rate": 1.582461808648605e-05, "loss": 3.452, "step": 5518 }, { "epoch": 1.2625237612017095, "grad_norm": 0.4578350530283575, "learning_rate": 1.5807391763136315e-05, "loss": 3.5203, "step": 5520 }, { "epoch": 1.262981119670702, "grad_norm": 0.5176464683467511, "learning_rate": 1.5790170485288118e-05, "loss": 3.6078, "step": 5522 }, { "epoch": 1.2634384781396943, "grad_norm": 0.5671420349730785, "learning_rate": 1.5772954262393613e-05, "loss": 3.605, "step": 5524 }, { "epoch": 1.263895836608687, "grad_norm": 0.39043706187563626, "learning_rate": 1.575574310390218e-05, "loss": 3.5675, "step": 5526 }, { "epoch": 1.2643531950776794, "grad_norm": 0.43162258680118104, "learning_rate": 1.5738537019260428e-05, "loss": 3.5053, "step": 5528 }, { "epoch": 1.264810553546672, "grad_norm": 0.4311744969380363, "learning_rate": 1.5721336017912175e-05, "loss": 3.7449, "step": 5530 }, { "epoch": 1.2652679120156645, "grad_norm": 0.4148775212681664, "learning_rate": 1.5704140109298445e-05, "loss": 3.7047, "step": 5532 }, { "epoch": 1.265725270484657, "grad_norm": 0.4733370457199189, "learning_rate": 1.5686949302857472e-05, "loss": 3.6391, "step": 5534 }, { "epoch": 1.2661826289536495, "grad_norm": 0.42032049022581536, "learning_rate": 1.56697636080247e-05, "loss": 3.5224, "step": 5536 }, { "epoch": 1.2666399874226422, "grad_norm": 0.43823876336485035, "learning_rate": 1.5652583034232742e-05, "loss": 3.4757, "step": 5538 }, { "epoch": 1.2670973458916346, "grad_norm": 0.4415291302889481, "learning_rate": 1.5635407590911426e-05, "loss": 3.4389, "step": 5540 }, { "epoch": 1.2675547043606272, "grad_norm": 0.47506616533127877, "learning_rate": 1.5618237287487746e-05, "loss": 3.5206, "step": 5542 }, { "epoch": 1.2680120628296196, "grad_norm": 0.47673020773252744, "learning_rate": 1.560107213338588e-05, "loss": 3.6171, "step": 5544 }, { "epoch": 1.268469421298612, "grad_norm": 0.624245522012106, "learning_rate": 1.5583912138027195e-05, "loss": 3.4669, "step": 5546 }, { "epoch": 1.2689267797676047, "grad_norm": 0.5550619661259669, "learning_rate": 1.5566757310830192e-05, "loss": 3.5143, "step": 5548 }, { "epoch": 1.2693841382365973, "grad_norm": 0.5274550991473989, "learning_rate": 1.5549607661210568e-05, "loss": 3.68, "step": 5550 }, { "epoch": 1.2698414967055898, "grad_norm": 0.45317748193343715, "learning_rate": 1.5532463198581175e-05, "loss": 3.5963, "step": 5552 }, { "epoch": 1.2702988551745822, "grad_norm": 0.38486293588471326, "learning_rate": 1.5515323932351994e-05, "loss": 3.5109, "step": 5554 }, { "epoch": 1.2707562136435748, "grad_norm": 0.43741235536333045, "learning_rate": 1.5498189871930176e-05, "loss": 3.3996, "step": 5556 }, { "epoch": 1.2712135721125675, "grad_norm": 0.4712080087849803, "learning_rate": 1.5481061026720013e-05, "loss": 3.4749, "step": 5558 }, { "epoch": 1.2716709305815599, "grad_norm": 0.45320924436175974, "learning_rate": 1.5463937406122924e-05, "loss": 3.4905, "step": 5560 }, { "epoch": 1.2721282890505523, "grad_norm": 0.4923576333410807, "learning_rate": 1.5446819019537463e-05, "loss": 3.5393, "step": 5562 }, { "epoch": 1.272585647519545, "grad_norm": 0.5167550506160425, "learning_rate": 1.5429705876359324e-05, "loss": 3.6178, "step": 5564 }, { "epoch": 1.2730430059885376, "grad_norm": 0.4484792012088108, "learning_rate": 1.5412597985981307e-05, "loss": 3.3284, "step": 5566 }, { "epoch": 1.27350036445753, "grad_norm": 0.4757582647975846, "learning_rate": 1.539549535779334e-05, "loss": 3.6008, "step": 5568 }, { "epoch": 1.2739577229265224, "grad_norm": 0.47327839283475887, "learning_rate": 1.5378398001182447e-05, "loss": 3.5918, "step": 5570 }, { "epoch": 1.274415081395515, "grad_norm": 0.5072548959344433, "learning_rate": 1.536130592553278e-05, "loss": 3.417, "step": 5572 }, { "epoch": 1.2748724398645075, "grad_norm": 0.4175066844896181, "learning_rate": 1.5344219140225586e-05, "loss": 3.3315, "step": 5574 }, { "epoch": 1.2753297983335001, "grad_norm": 0.45409229228083897, "learning_rate": 1.532713765463919e-05, "loss": 3.5413, "step": 5576 }, { "epoch": 1.2757871568024926, "grad_norm": 0.5262999482813858, "learning_rate": 1.5310061478149036e-05, "loss": 3.7039, "step": 5578 }, { "epoch": 1.2762445152714852, "grad_norm": 0.408488032740015, "learning_rate": 1.5292990620127637e-05, "loss": 3.3826, "step": 5580 }, { "epoch": 1.2767018737404776, "grad_norm": 0.45273035227030933, "learning_rate": 1.5275925089944588e-05, "loss": 3.4902, "step": 5582 }, { "epoch": 1.2771592322094703, "grad_norm": 0.5321916201258965, "learning_rate": 1.5258864896966558e-05, "loss": 3.7906, "step": 5584 }, { "epoch": 1.2776165906784627, "grad_norm": 0.43213550972068765, "learning_rate": 1.5241810050557307e-05, "loss": 3.3148, "step": 5586 }, { "epoch": 1.2780739491474553, "grad_norm": 0.34248372411689987, "learning_rate": 1.5224760560077628e-05, "loss": 3.5202, "step": 5588 }, { "epoch": 1.2785313076164477, "grad_norm": 0.595100999041709, "learning_rate": 1.5207716434885394e-05, "loss": 3.464, "step": 5590 }, { "epoch": 1.2789886660854402, "grad_norm": 0.5066581811267536, "learning_rate": 1.5190677684335542e-05, "loss": 3.5581, "step": 5592 }, { "epoch": 1.2794460245544328, "grad_norm": 0.4954162837904768, "learning_rate": 1.5173644317780031e-05, "loss": 3.6643, "step": 5594 }, { "epoch": 1.2799033830234254, "grad_norm": 0.4738234460761632, "learning_rate": 1.5156616344567892e-05, "loss": 3.5484, "step": 5596 }, { "epoch": 1.2803607414924179, "grad_norm": 0.46462533130895645, "learning_rate": 1.513959377404518e-05, "loss": 3.5073, "step": 5598 }, { "epoch": 1.2808180999614103, "grad_norm": 0.422580217866516, "learning_rate": 1.5122576615554985e-05, "loss": 3.5579, "step": 5600 }, { "epoch": 1.281275458430403, "grad_norm": 0.42402950020720814, "learning_rate": 1.5105564878437439e-05, "loss": 3.7178, "step": 5602 }, { "epoch": 1.2817328168993956, "grad_norm": 0.5625411389460047, "learning_rate": 1.5088558572029688e-05, "loss": 3.4472, "step": 5604 }, { "epoch": 1.282190175368388, "grad_norm": 0.508772954118353, "learning_rate": 1.5071557705665895e-05, "loss": 3.6505, "step": 5606 }, { "epoch": 1.2826475338373804, "grad_norm": 0.43330985733166155, "learning_rate": 1.5054562288677249e-05, "loss": 3.396, "step": 5608 }, { "epoch": 1.283104892306373, "grad_norm": 0.43120926236120055, "learning_rate": 1.503757233039193e-05, "loss": 3.322, "step": 5610 }, { "epoch": 1.2835622507753655, "grad_norm": 0.4508525494495274, "learning_rate": 1.5020587840135142e-05, "loss": 3.5276, "step": 5612 }, { "epoch": 1.284019609244358, "grad_norm": 0.5046140741370726, "learning_rate": 1.5003608827229081e-05, "loss": 3.6306, "step": 5614 }, { "epoch": 1.2844769677133505, "grad_norm": 0.39651943859781746, "learning_rate": 1.4986635300992925e-05, "loss": 3.5077, "step": 5616 }, { "epoch": 1.2849343261823432, "grad_norm": 0.5023571125347502, "learning_rate": 1.4969667270742848e-05, "loss": 3.463, "step": 5618 }, { "epoch": 1.2853916846513356, "grad_norm": 0.47069752163539486, "learning_rate": 1.4952704745792023e-05, "loss": 3.5059, "step": 5620 }, { "epoch": 1.2858490431203282, "grad_norm": 0.5142716362091632, "learning_rate": 1.4935747735450573e-05, "loss": 3.5417, "step": 5622 }, { "epoch": 1.2863064015893206, "grad_norm": 0.708970208985435, "learning_rate": 1.4918796249025612e-05, "loss": 3.4874, "step": 5624 }, { "epoch": 1.2867637600583133, "grad_norm": 0.46549446977368586, "learning_rate": 1.4901850295821229e-05, "loss": 3.4426, "step": 5626 }, { "epoch": 1.2872211185273057, "grad_norm": 0.49712491438176465, "learning_rate": 1.4884909885138454e-05, "loss": 3.5427, "step": 5628 }, { "epoch": 1.2876784769962983, "grad_norm": 0.5136898023138587, "learning_rate": 1.4867975026275293e-05, "loss": 3.6043, "step": 5630 }, { "epoch": 1.2881358354652908, "grad_norm": 0.6061935024874799, "learning_rate": 1.485104572852669e-05, "loss": 3.5783, "step": 5632 }, { "epoch": 1.2885931939342834, "grad_norm": 0.557618924487293, "learning_rate": 1.4834122001184553e-05, "loss": 3.507, "step": 5634 }, { "epoch": 1.2890505524032758, "grad_norm": 0.49776540544237236, "learning_rate": 1.4817203853537726e-05, "loss": 3.5465, "step": 5636 }, { "epoch": 1.2895079108722682, "grad_norm": 0.48692596029288865, "learning_rate": 1.4800291294871983e-05, "loss": 3.3939, "step": 5638 }, { "epoch": 1.2899652693412609, "grad_norm": 0.5877982792612039, "learning_rate": 1.4783384334470035e-05, "loss": 3.4541, "step": 5640 }, { "epoch": 1.2904226278102535, "grad_norm": 0.42682805999890544, "learning_rate": 1.4766482981611537e-05, "loss": 3.4437, "step": 5642 }, { "epoch": 1.290879986279246, "grad_norm": 0.4848589325524474, "learning_rate": 1.4749587245573032e-05, "loss": 3.6479, "step": 5644 }, { "epoch": 1.2913373447482384, "grad_norm": 0.47945568621707974, "learning_rate": 1.473269713562801e-05, "loss": 3.5132, "step": 5646 }, { "epoch": 1.291794703217231, "grad_norm": 0.44938719535270427, "learning_rate": 1.4715812661046865e-05, "loss": 3.5, "step": 5648 }, { "epoch": 1.2922520616862236, "grad_norm": 0.5349999916664907, "learning_rate": 1.4698933831096887e-05, "loss": 3.5831, "step": 5650 }, { "epoch": 1.292709420155216, "grad_norm": 0.6607206741966782, "learning_rate": 1.468206065504228e-05, "loss": 3.522, "step": 5652 }, { "epoch": 1.2931667786242085, "grad_norm": 0.4546603794464916, "learning_rate": 1.466519314214414e-05, "loss": 3.4362, "step": 5654 }, { "epoch": 1.2936241370932011, "grad_norm": 0.4561625971751402, "learning_rate": 1.4648331301660457e-05, "loss": 3.5008, "step": 5656 }, { "epoch": 1.2940814955621935, "grad_norm": 0.46613692873613677, "learning_rate": 1.4631475142846107e-05, "loss": 3.5327, "step": 5658 }, { "epoch": 1.2945388540311862, "grad_norm": 0.4102478288410578, "learning_rate": 1.4614624674952842e-05, "loss": 3.4801, "step": 5660 }, { "epoch": 1.2949962125001786, "grad_norm": 0.42395936893958375, "learning_rate": 1.4597779907229297e-05, "loss": 3.4296, "step": 5662 }, { "epoch": 1.2954535709691712, "grad_norm": 0.4450732809977983, "learning_rate": 1.4580940848920984e-05, "loss": 3.4159, "step": 5664 }, { "epoch": 1.2959109294381637, "grad_norm": 0.40764671258965923, "learning_rate": 1.456410750927027e-05, "loss": 3.3933, "step": 5666 }, { "epoch": 1.2963682879071563, "grad_norm": 0.4519581239732129, "learning_rate": 1.454727989751637e-05, "loss": 3.5719, "step": 5668 }, { "epoch": 1.2968256463761487, "grad_norm": 0.3822836239286625, "learning_rate": 1.45304580228954e-05, "loss": 3.6657, "step": 5670 }, { "epoch": 1.2972830048451414, "grad_norm": 0.5061531245000942, "learning_rate": 1.4513641894640276e-05, "loss": 3.5366, "step": 5672 }, { "epoch": 1.2977403633141338, "grad_norm": 0.515939241079423, "learning_rate": 1.4496831521980802e-05, "loss": 3.6522, "step": 5674 }, { "epoch": 1.2981977217831262, "grad_norm": 0.47004037315878927, "learning_rate": 1.4480026914143601e-05, "loss": 3.4444, "step": 5676 }, { "epoch": 1.2986550802521188, "grad_norm": 0.6420129534151386, "learning_rate": 1.4463228080352114e-05, "loss": 3.5341, "step": 5678 }, { "epoch": 1.2991124387211115, "grad_norm": 0.5713781564475539, "learning_rate": 1.4446435029826666e-05, "loss": 3.5294, "step": 5680 }, { "epoch": 1.299569797190104, "grad_norm": 0.4559803969588761, "learning_rate": 1.4429647771784358e-05, "loss": 3.5557, "step": 5682 }, { "epoch": 1.3000271556590963, "grad_norm": 0.3839365940111249, "learning_rate": 1.4412866315439123e-05, "loss": 3.4273, "step": 5684 }, { "epoch": 1.300484514128089, "grad_norm": 0.48655278788473405, "learning_rate": 1.4396090670001738e-05, "loss": 3.6182, "step": 5686 }, { "epoch": 1.3009418725970816, "grad_norm": 0.49223587767189986, "learning_rate": 1.4379320844679745e-05, "loss": 3.4949, "step": 5688 }, { "epoch": 1.301399231066074, "grad_norm": 0.5438338598719736, "learning_rate": 1.436255684867752e-05, "loss": 3.5537, "step": 5690 }, { "epoch": 1.3018565895350664, "grad_norm": 0.5965585039263731, "learning_rate": 1.4345798691196261e-05, "loss": 3.5259, "step": 5692 }, { "epoch": 1.302313948004059, "grad_norm": 0.4155967645447607, "learning_rate": 1.4329046381433897e-05, "loss": 3.5019, "step": 5694 }, { "epoch": 1.3027713064730515, "grad_norm": 0.4659901115551208, "learning_rate": 1.4312299928585202e-05, "loss": 3.5808, "step": 5696 }, { "epoch": 1.3032286649420441, "grad_norm": 0.534604059170912, "learning_rate": 1.429555934184174e-05, "loss": 3.5839, "step": 5698 }, { "epoch": 1.3036860234110366, "grad_norm": 0.5056490663475878, "learning_rate": 1.4278824630391796e-05, "loss": 3.5405, "step": 5700 }, { "epoch": 1.3041433818800292, "grad_norm": 0.3924503972667657, "learning_rate": 1.4262095803420486e-05, "loss": 3.5877, "step": 5702 }, { "epoch": 1.3046007403490216, "grad_norm": 0.47289383646884314, "learning_rate": 1.4245372870109702e-05, "loss": 3.6569, "step": 5704 }, { "epoch": 1.3050580988180143, "grad_norm": 0.49401523672864966, "learning_rate": 1.4228655839638033e-05, "loss": 3.5613, "step": 5706 }, { "epoch": 1.3055154572870067, "grad_norm": 0.5336278265017912, "learning_rate": 1.4211944721180898e-05, "loss": 3.3741, "step": 5708 }, { "epoch": 1.3059728157559993, "grad_norm": 0.5143754441362506, "learning_rate": 1.4195239523910463e-05, "loss": 3.5458, "step": 5710 }, { "epoch": 1.3064301742249917, "grad_norm": 0.5373217630949013, "learning_rate": 1.417854025699559e-05, "loss": 3.6885, "step": 5712 }, { "epoch": 1.3068875326939844, "grad_norm": 0.415245166541441, "learning_rate": 1.4161846929601949e-05, "loss": 3.4204, "step": 5714 }, { "epoch": 1.3073448911629768, "grad_norm": 0.548399408218543, "learning_rate": 1.4145159550891917e-05, "loss": 3.6034, "step": 5716 }, { "epoch": 1.3078022496319694, "grad_norm": 0.4384387707301001, "learning_rate": 1.4128478130024606e-05, "loss": 3.5051, "step": 5718 }, { "epoch": 1.3082596081009619, "grad_norm": 0.4155157426050645, "learning_rate": 1.411180267615588e-05, "loss": 3.5379, "step": 5720 }, { "epoch": 1.3087169665699543, "grad_norm": 0.428368625099205, "learning_rate": 1.4095133198438303e-05, "loss": 3.492, "step": 5722 }, { "epoch": 1.309174325038947, "grad_norm": 0.4648727920820337, "learning_rate": 1.4078469706021166e-05, "loss": 3.6358, "step": 5724 }, { "epoch": 1.3096316835079396, "grad_norm": 0.5419666181151885, "learning_rate": 1.406181220805049e-05, "loss": 3.515, "step": 5726 }, { "epoch": 1.310089041976932, "grad_norm": 0.44650658913499625, "learning_rate": 1.404516071366899e-05, "loss": 3.508, "step": 5728 }, { "epoch": 1.3105464004459244, "grad_norm": 0.4563887718220785, "learning_rate": 1.4028515232016074e-05, "loss": 3.5244, "step": 5730 }, { "epoch": 1.311003758914917, "grad_norm": 0.685261071776412, "learning_rate": 1.4011875772227889e-05, "loss": 3.5334, "step": 5732 }, { "epoch": 1.3114611173839097, "grad_norm": 0.42631060390389103, "learning_rate": 1.3995242343437231e-05, "loss": 3.5103, "step": 5734 }, { "epoch": 1.311918475852902, "grad_norm": 0.5563925742547172, "learning_rate": 1.3978614954773628e-05, "loss": 3.6742, "step": 5736 }, { "epoch": 1.3123758343218945, "grad_norm": 0.5014348007161828, "learning_rate": 1.3961993615363267e-05, "loss": 3.5093, "step": 5738 }, { "epoch": 1.3128331927908872, "grad_norm": 0.43048143731613336, "learning_rate": 1.3945378334329004e-05, "loss": 3.6046, "step": 5740 }, { "epoch": 1.3132905512598796, "grad_norm": 0.5215041481869851, "learning_rate": 1.392876912079042e-05, "loss": 3.4458, "step": 5742 }, { "epoch": 1.3137479097288722, "grad_norm": 0.4938892282885408, "learning_rate": 1.3912165983863696e-05, "loss": 3.4635, "step": 5744 }, { "epoch": 1.3142052681978647, "grad_norm": 0.51470564754064, "learning_rate": 1.3895568932661726e-05, "loss": 3.5361, "step": 5746 }, { "epoch": 1.3146626266668573, "grad_norm": 0.4183423342471019, "learning_rate": 1.3878977976294077e-05, "loss": 3.5152, "step": 5748 }, { "epoch": 1.3151199851358497, "grad_norm": 0.4137059602031915, "learning_rate": 1.3862393123866902e-05, "loss": 3.5635, "step": 5750 }, { "epoch": 1.3155773436048424, "grad_norm": 0.44957415577863796, "learning_rate": 1.384581438448307e-05, "loss": 3.5678, "step": 5752 }, { "epoch": 1.3160347020738348, "grad_norm": 0.40945368949619765, "learning_rate": 1.382924176724209e-05, "loss": 3.5799, "step": 5754 }, { "epoch": 1.3164920605428274, "grad_norm": 0.4593124029107795, "learning_rate": 1.3812675281240055e-05, "loss": 3.3326, "step": 5756 }, { "epoch": 1.3169494190118198, "grad_norm": 0.5416643680621551, "learning_rate": 1.3796114935569749e-05, "loss": 3.4716, "step": 5758 }, { "epoch": 1.3174067774808123, "grad_norm": 0.6044878736104781, "learning_rate": 1.3779560739320585e-05, "loss": 3.4362, "step": 5760 }, { "epoch": 1.317864135949805, "grad_norm": 0.43581194146187674, "learning_rate": 1.3763012701578545e-05, "loss": 3.4081, "step": 5762 }, { "epoch": 1.3183214944187975, "grad_norm": 0.6607311837601929, "learning_rate": 1.374647083142629e-05, "loss": 3.262, "step": 5764 }, { "epoch": 1.31877885288779, "grad_norm": 0.4790027241201328, "learning_rate": 1.3729935137943094e-05, "loss": 3.373, "step": 5766 }, { "epoch": 1.3192362113567824, "grad_norm": 0.3827965596831197, "learning_rate": 1.3713405630204781e-05, "loss": 3.3463, "step": 5768 }, { "epoch": 1.319693569825775, "grad_norm": 0.41613749836709735, "learning_rate": 1.3696882317283841e-05, "loss": 3.5232, "step": 5770 }, { "epoch": 1.3201509282947677, "grad_norm": 0.3540312657800308, "learning_rate": 1.3680365208249363e-05, "loss": 3.4137, "step": 5772 }, { "epoch": 1.32060828676376, "grad_norm": 0.4039850737285897, "learning_rate": 1.3663854312166968e-05, "loss": 3.4641, "step": 5774 }, { "epoch": 1.3210656452327525, "grad_norm": 0.3882958207955974, "learning_rate": 1.3647349638098943e-05, "loss": 3.7439, "step": 5776 }, { "epoch": 1.3215230037017451, "grad_norm": 0.6716429509274272, "learning_rate": 1.3630851195104122e-05, "loss": 3.4837, "step": 5778 }, { "epoch": 1.3219803621707376, "grad_norm": 0.5587014848013082, "learning_rate": 1.3614358992237908e-05, "loss": 3.5279, "step": 5780 }, { "epoch": 1.3224377206397302, "grad_norm": 0.44769694424618006, "learning_rate": 1.3597873038552312e-05, "loss": 3.4518, "step": 5782 }, { "epoch": 1.3228950791087226, "grad_norm": 0.46610504021623334, "learning_rate": 1.3581393343095894e-05, "loss": 3.4545, "step": 5784 }, { "epoch": 1.3233524375777153, "grad_norm": 0.4117148440789212, "learning_rate": 1.3564919914913771e-05, "loss": 3.5027, "step": 5786 }, { "epoch": 1.3238097960467077, "grad_norm": 0.4752059922780135, "learning_rate": 1.3548452763047653e-05, "loss": 3.5712, "step": 5788 }, { "epoch": 1.3242671545157003, "grad_norm": 0.551038776551175, "learning_rate": 1.3531991896535776e-05, "loss": 3.3732, "step": 5790 }, { "epoch": 1.3247245129846927, "grad_norm": 0.42154033950461506, "learning_rate": 1.3515537324412918e-05, "loss": 3.555, "step": 5792 }, { "epoch": 1.3251818714536854, "grad_norm": 0.44507150654143046, "learning_rate": 1.3499089055710449e-05, "loss": 3.5406, "step": 5794 }, { "epoch": 1.3256392299226778, "grad_norm": 0.46547214123942965, "learning_rate": 1.348264709945623e-05, "loss": 3.4977, "step": 5796 }, { "epoch": 1.3260965883916704, "grad_norm": 0.48729699688731115, "learning_rate": 1.3466211464674672e-05, "loss": 3.3898, "step": 5798 }, { "epoch": 1.3265539468606629, "grad_norm": 0.4507562779165526, "learning_rate": 1.3449782160386737e-05, "loss": 3.7081, "step": 5800 }, { "epoch": 1.3270113053296555, "grad_norm": 0.4708121240812599, "learning_rate": 1.3433359195609876e-05, "loss": 3.4962, "step": 5802 }, { "epoch": 1.327468663798648, "grad_norm": 0.4178818050758134, "learning_rate": 1.3416942579358117e-05, "loss": 3.3984, "step": 5804 }, { "epoch": 1.3279260222676403, "grad_norm": 0.4822187111741872, "learning_rate": 1.3400532320641917e-05, "loss": 3.5979, "step": 5806 }, { "epoch": 1.328383380736633, "grad_norm": 0.40691927652826043, "learning_rate": 1.3384128428468322e-05, "loss": 3.4582, "step": 5808 }, { "epoch": 1.3288407392056256, "grad_norm": 0.5009423554697392, "learning_rate": 1.3367730911840876e-05, "loss": 3.6649, "step": 5810 }, { "epoch": 1.329298097674618, "grad_norm": 0.5344873410644809, "learning_rate": 1.3351339779759558e-05, "loss": 3.5808, "step": 5812 }, { "epoch": 1.3297554561436105, "grad_norm": 0.44436465998448726, "learning_rate": 1.333495504122091e-05, "loss": 3.5601, "step": 5814 }, { "epoch": 1.330212814612603, "grad_norm": 0.4503517789209028, "learning_rate": 1.331857670521796e-05, "loss": 3.6822, "step": 5816 }, { "epoch": 1.3306701730815957, "grad_norm": 0.4446330994647704, "learning_rate": 1.3302204780740168e-05, "loss": 3.5277, "step": 5818 }, { "epoch": 1.3311275315505882, "grad_norm": 0.443310083682527, "learning_rate": 1.3285839276773532e-05, "loss": 3.4664, "step": 5820 }, { "epoch": 1.3315848900195806, "grad_norm": 0.46980310399356445, "learning_rate": 1.3269480202300522e-05, "loss": 3.4228, "step": 5822 }, { "epoch": 1.3320422484885732, "grad_norm": 0.40355092029993594, "learning_rate": 1.3253127566300022e-05, "loss": 3.3926, "step": 5824 }, { "epoch": 1.3324996069575656, "grad_norm": 0.5137304700494597, "learning_rate": 1.3236781377747448e-05, "loss": 3.4917, "step": 5826 }, { "epoch": 1.3329569654265583, "grad_norm": 0.503343312067732, "learning_rate": 1.3220441645614667e-05, "loss": 3.4334, "step": 5828 }, { "epoch": 1.3334143238955507, "grad_norm": 0.5018622765169337, "learning_rate": 1.3204108378869951e-05, "loss": 3.5595, "step": 5830 }, { "epoch": 1.3338716823645433, "grad_norm": 0.5331349591289827, "learning_rate": 1.3187781586478087e-05, "loss": 3.5498, "step": 5832 }, { "epoch": 1.3343290408335358, "grad_norm": 0.5279657439783868, "learning_rate": 1.3171461277400269e-05, "loss": 3.4756, "step": 5834 }, { "epoch": 1.3347863993025284, "grad_norm": 0.5051003730753667, "learning_rate": 1.3155147460594139e-05, "loss": 3.4202, "step": 5836 }, { "epoch": 1.3352437577715208, "grad_norm": 0.5783846246426819, "learning_rate": 1.3138840145013797e-05, "loss": 3.6981, "step": 5838 }, { "epoch": 1.3357011162405135, "grad_norm": 0.6041752688573789, "learning_rate": 1.3122539339609753e-05, "loss": 3.5392, "step": 5840 }, { "epoch": 1.3361584747095059, "grad_norm": 0.4469314892951075, "learning_rate": 1.3106245053328937e-05, "loss": 3.3233, "step": 5842 }, { "epoch": 1.3366158331784983, "grad_norm": 0.41738358650617724, "learning_rate": 1.3089957295114733e-05, "loss": 3.5857, "step": 5844 }, { "epoch": 1.337073191647491, "grad_norm": 0.5233336388329912, "learning_rate": 1.3073676073906915e-05, "loss": 3.4488, "step": 5846 }, { "epoch": 1.3375305501164836, "grad_norm": 0.40815882465230147, "learning_rate": 1.3057401398641665e-05, "loss": 3.4354, "step": 5848 }, { "epoch": 1.337987908585476, "grad_norm": 0.5426144438768903, "learning_rate": 1.3041133278251602e-05, "loss": 3.5258, "step": 5850 }, { "epoch": 1.3384452670544684, "grad_norm": 0.5243581958756244, "learning_rate": 1.3024871721665722e-05, "loss": 3.5055, "step": 5852 }, { "epoch": 1.338902625523461, "grad_norm": 0.5323767213992869, "learning_rate": 1.3008616737809414e-05, "loss": 3.6515, "step": 5854 }, { "epoch": 1.3393599839924537, "grad_norm": 0.42063056227930096, "learning_rate": 1.2992368335604493e-05, "loss": 3.4171, "step": 5856 }, { "epoch": 1.3398173424614461, "grad_norm": 0.48988699288664767, "learning_rate": 1.2976126523969127e-05, "loss": 3.3567, "step": 5858 }, { "epoch": 1.3402747009304385, "grad_norm": 0.47882227511449443, "learning_rate": 1.2959891311817876e-05, "loss": 3.4965, "step": 5860 }, { "epoch": 1.3407320593994312, "grad_norm": 0.5923020123805817, "learning_rate": 1.2943662708061677e-05, "loss": 3.6379, "step": 5862 }, { "epoch": 1.3411894178684236, "grad_norm": 0.4705500215225369, "learning_rate": 1.2927440721607858e-05, "loss": 3.4707, "step": 5864 }, { "epoch": 1.3416467763374162, "grad_norm": 0.49154210540020243, "learning_rate": 1.2911225361360096e-05, "loss": 3.3738, "step": 5866 }, { "epoch": 1.3421041348064087, "grad_norm": 0.5418670509837711, "learning_rate": 1.2895016636218427e-05, "loss": 3.5191, "step": 5868 }, { "epoch": 1.3425614932754013, "grad_norm": 0.5431774204975027, "learning_rate": 1.2878814555079255e-05, "loss": 3.52, "step": 5870 }, { "epoch": 1.3430188517443937, "grad_norm": 0.5457894313667564, "learning_rate": 1.2862619126835362e-05, "loss": 3.5089, "step": 5872 }, { "epoch": 1.3434762102133864, "grad_norm": 0.41208451544100644, "learning_rate": 1.2846430360375817e-05, "loss": 3.4881, "step": 5874 }, { "epoch": 1.3439335686823788, "grad_norm": 0.4974023575546293, "learning_rate": 1.2830248264586082e-05, "loss": 3.3826, "step": 5876 }, { "epoch": 1.3443909271513714, "grad_norm": 0.4592014043127329, "learning_rate": 1.2814072848347964e-05, "loss": 3.5412, "step": 5878 }, { "epoch": 1.3448482856203638, "grad_norm": 0.47976637884471207, "learning_rate": 1.2797904120539545e-05, "loss": 3.4569, "step": 5880 }, { "epoch": 1.3453056440893565, "grad_norm": 0.420844213971399, "learning_rate": 1.2781742090035298e-05, "loss": 3.4358, "step": 5882 }, { "epoch": 1.345763002558349, "grad_norm": 0.414823408759608, "learning_rate": 1.276558676570601e-05, "loss": 3.4186, "step": 5884 }, { "epoch": 1.3462203610273415, "grad_norm": 0.4460886499009814, "learning_rate": 1.2749438156418744e-05, "loss": 3.5078, "step": 5886 }, { "epoch": 1.346677719496334, "grad_norm": 0.4229050882129444, "learning_rate": 1.2733296271036933e-05, "loss": 3.6503, "step": 5888 }, { "epoch": 1.3471350779653264, "grad_norm": 0.6086381957213491, "learning_rate": 1.2717161118420279e-05, "loss": 3.4308, "step": 5890 }, { "epoch": 1.347592436434319, "grad_norm": 0.5378930772602194, "learning_rate": 1.2701032707424804e-05, "loss": 3.5904, "step": 5892 }, { "epoch": 1.3480497949033117, "grad_norm": 0.4342625760655026, "learning_rate": 1.2684911046902836e-05, "loss": 3.3994, "step": 5894 }, { "epoch": 1.348507153372304, "grad_norm": 0.53018463121758, "learning_rate": 1.2668796145702993e-05, "loss": 3.5119, "step": 5896 }, { "epoch": 1.3489645118412965, "grad_norm": 0.39480911516601885, "learning_rate": 1.2652688012670168e-05, "loss": 3.4294, "step": 5898 }, { "epoch": 1.3494218703102892, "grad_norm": 0.5162080011995123, "learning_rate": 1.2636586656645572e-05, "loss": 3.4149, "step": 5900 }, { "epoch": 1.3498792287792818, "grad_norm": 0.3843757668154131, "learning_rate": 1.2620492086466662e-05, "loss": 3.5299, "step": 5902 }, { "epoch": 1.3503365872482742, "grad_norm": 0.4638187905211292, "learning_rate": 1.2604404310967183e-05, "loss": 3.2007, "step": 5904 }, { "epoch": 1.3507939457172666, "grad_norm": 0.46632399608479663, "learning_rate": 1.258832333897717e-05, "loss": 3.6496, "step": 5906 }, { "epoch": 1.3512513041862593, "grad_norm": 0.6375576413966715, "learning_rate": 1.2572249179322893e-05, "loss": 3.4473, "step": 5908 }, { "epoch": 1.3517086626552517, "grad_norm": 0.5287110017593959, "learning_rate": 1.2556181840826891e-05, "loss": 3.4552, "step": 5910 }, { "epoch": 1.3521660211242443, "grad_norm": 0.5336251457662734, "learning_rate": 1.2540121332307986e-05, "loss": 3.5838, "step": 5912 }, { "epoch": 1.3526233795932368, "grad_norm": 0.49244139949602045, "learning_rate": 1.2524067662581219e-05, "loss": 3.6018, "step": 5914 }, { "epoch": 1.3530807380622294, "grad_norm": 0.4669611494151254, "learning_rate": 1.2508020840457877e-05, "loss": 3.3949, "step": 5916 }, { "epoch": 1.3535380965312218, "grad_norm": 0.47658369923952376, "learning_rate": 1.2491980874745518e-05, "loss": 3.5366, "step": 5918 }, { "epoch": 1.3539954550002145, "grad_norm": 0.6245311647538696, "learning_rate": 1.2475947774247911e-05, "loss": 3.4968, "step": 5920 }, { "epoch": 1.3544528134692069, "grad_norm": 0.5906878362260539, "learning_rate": 1.2459921547765068e-05, "loss": 3.4442, "step": 5922 }, { "epoch": 1.3549101719381995, "grad_norm": 0.41234146380612857, "learning_rate": 1.2443902204093214e-05, "loss": 3.513, "step": 5924 }, { "epoch": 1.355367530407192, "grad_norm": 0.5017202111540421, "learning_rate": 1.242788975202482e-05, "loss": 3.4134, "step": 5926 }, { "epoch": 1.3558248888761844, "grad_norm": 0.44602313977671393, "learning_rate": 1.241188420034856e-05, "loss": 3.49, "step": 5928 }, { "epoch": 1.356282247345177, "grad_norm": 0.4435218311740403, "learning_rate": 1.2395885557849309e-05, "loss": 3.4632, "step": 5930 }, { "epoch": 1.3567396058141696, "grad_norm": 0.4614890726269179, "learning_rate": 1.2379893833308173e-05, "loss": 3.559, "step": 5932 }, { "epoch": 1.357196964283162, "grad_norm": 0.4690504859387067, "learning_rate": 1.236390903550247e-05, "loss": 3.3533, "step": 5934 }, { "epoch": 1.3576543227521545, "grad_norm": 0.44911529344834883, "learning_rate": 1.234793117320566e-05, "loss": 3.4554, "step": 5936 }, { "epoch": 1.3581116812211471, "grad_norm": 0.35891879191425197, "learning_rate": 1.233196025518745e-05, "loss": 3.7404, "step": 5938 }, { "epoch": 1.3585690396901398, "grad_norm": 0.4704938399543018, "learning_rate": 1.2315996290213738e-05, "loss": 3.535, "step": 5940 }, { "epoch": 1.3590263981591322, "grad_norm": 0.4506899914850529, "learning_rate": 1.2300039287046553e-05, "loss": 3.5475, "step": 5942 }, { "epoch": 1.3594837566281246, "grad_norm": 0.4605851038628502, "learning_rate": 1.2284089254444154e-05, "loss": 3.7185, "step": 5944 }, { "epoch": 1.3599411150971172, "grad_norm": 0.5648552805038931, "learning_rate": 1.2268146201160972e-05, "loss": 3.4837, "step": 5946 }, { "epoch": 1.3603984735661097, "grad_norm": 0.4179635270407368, "learning_rate": 1.2252210135947556e-05, "loss": 3.3835, "step": 5948 }, { "epoch": 1.3608558320351023, "grad_norm": 0.36306350088744943, "learning_rate": 1.2236281067550686e-05, "loss": 3.374, "step": 5950 }, { "epoch": 1.3613131905040947, "grad_norm": 0.4725102128080661, "learning_rate": 1.2220359004713258e-05, "loss": 3.4757, "step": 5952 }, { "epoch": 1.3617705489730874, "grad_norm": 0.4192605777536619, "learning_rate": 1.2204443956174332e-05, "loss": 3.4923, "step": 5954 }, { "epoch": 1.3622279074420798, "grad_norm": 0.3930419156044653, "learning_rate": 1.2188535930669137e-05, "loss": 3.4787, "step": 5956 }, { "epoch": 1.3626852659110724, "grad_norm": 0.42974752527071414, "learning_rate": 1.2172634936929022e-05, "loss": 3.5806, "step": 5958 }, { "epoch": 1.3631426243800648, "grad_norm": 0.4377999980157126, "learning_rate": 1.2156740983681484e-05, "loss": 3.6074, "step": 5960 }, { "epoch": 1.3635999828490575, "grad_norm": 0.41464279106273855, "learning_rate": 1.2140854079650169e-05, "loss": 3.5529, "step": 5962 }, { "epoch": 1.36405734131805, "grad_norm": 0.4585576285282471, "learning_rate": 1.212497423355484e-05, "loss": 3.4788, "step": 5964 }, { "epoch": 1.3645146997870425, "grad_norm": 0.42643447953068175, "learning_rate": 1.2109101454111377e-05, "loss": 3.5207, "step": 5966 }, { "epoch": 1.364972058256035, "grad_norm": 0.44116423732495574, "learning_rate": 1.2093235750031817e-05, "loss": 3.613, "step": 5968 }, { "epoch": 1.3654294167250276, "grad_norm": 0.45028384449073866, "learning_rate": 1.2077377130024278e-05, "loss": 3.439, "step": 5970 }, { "epoch": 1.36588677519402, "grad_norm": 0.4775379391522447, "learning_rate": 1.2061525602792994e-05, "loss": 3.5584, "step": 5972 }, { "epoch": 1.3663441336630124, "grad_norm": 0.39219044764045546, "learning_rate": 1.2045681177038335e-05, "loss": 3.4492, "step": 5974 }, { "epoch": 1.366801492132005, "grad_norm": 0.43965335946566875, "learning_rate": 1.2029843861456738e-05, "loss": 3.3274, "step": 5976 }, { "epoch": 1.3672588506009977, "grad_norm": 0.549282849452467, "learning_rate": 1.2014013664740758e-05, "loss": 3.5357, "step": 5978 }, { "epoch": 1.3677162090699901, "grad_norm": 0.45525305991634396, "learning_rate": 1.1998190595579029e-05, "loss": 3.5538, "step": 5980 }, { "epoch": 1.3681735675389826, "grad_norm": 0.4832136937853628, "learning_rate": 1.1982374662656293e-05, "loss": 3.5538, "step": 5982 }, { "epoch": 1.3686309260079752, "grad_norm": 0.42904849655404526, "learning_rate": 1.1966565874653358e-05, "loss": 3.3724, "step": 5984 }, { "epoch": 1.3690882844769678, "grad_norm": 0.410243906009564, "learning_rate": 1.1950764240247103e-05, "loss": 3.5416, "step": 5986 }, { "epoch": 1.3695456429459603, "grad_norm": 0.44414571525991703, "learning_rate": 1.1934969768110519e-05, "loss": 3.4395, "step": 5988 }, { "epoch": 1.3700030014149527, "grad_norm": 0.4224136587098436, "learning_rate": 1.1919182466912621e-05, "loss": 3.5937, "step": 5990 }, { "epoch": 1.3704603598839453, "grad_norm": 0.5416190847870149, "learning_rate": 1.1903402345318504e-05, "loss": 3.6724, "step": 5992 }, { "epoch": 1.3709177183529377, "grad_norm": 0.5137353139945486, "learning_rate": 1.1887629411989343e-05, "loss": 3.4214, "step": 5994 }, { "epoch": 1.3713750768219304, "grad_norm": 0.47134191451473734, "learning_rate": 1.187186367558234e-05, "loss": 3.4109, "step": 5996 }, { "epoch": 1.3718324352909228, "grad_norm": 0.49856769033415205, "learning_rate": 1.1856105144750748e-05, "loss": 3.5105, "step": 5998 }, { "epoch": 1.3722897937599154, "grad_norm": 0.4975122761529659, "learning_rate": 1.1840353828143885e-05, "loss": 3.5219, "step": 6000 }, { "epoch": 1.3727471522289079, "grad_norm": 0.544348959053581, "learning_rate": 1.1824609734407116e-05, "loss": 3.4026, "step": 6002 }, { "epoch": 1.3732045106979005, "grad_norm": 0.44922672762214044, "learning_rate": 1.1808872872181784e-05, "loss": 3.4813, "step": 6004 }, { "epoch": 1.373661869166893, "grad_norm": 0.4331350735735584, "learning_rate": 1.1793143250105334e-05, "loss": 3.1844, "step": 6006 }, { "epoch": 1.3741192276358856, "grad_norm": 0.5722938504598043, "learning_rate": 1.1777420876811197e-05, "loss": 3.5462, "step": 6008 }, { "epoch": 1.374576586104878, "grad_norm": 0.41548798460979613, "learning_rate": 1.1761705760928823e-05, "loss": 3.492, "step": 6010 }, { "epoch": 1.3750339445738704, "grad_norm": 0.4087695157644686, "learning_rate": 1.1745997911083711e-05, "loss": 3.5954, "step": 6012 }, { "epoch": 1.375491303042863, "grad_norm": 0.4991279537391535, "learning_rate": 1.173029733589734e-05, "loss": 3.4988, "step": 6014 }, { "epoch": 1.3759486615118557, "grad_norm": 0.6083677477007551, "learning_rate": 1.1714604043987199e-05, "loss": 3.533, "step": 6016 }, { "epoch": 1.376406019980848, "grad_norm": 0.43136111224006346, "learning_rate": 1.1698918043966805e-05, "loss": 3.3, "step": 6018 }, { "epoch": 1.3768633784498405, "grad_norm": 0.5816683309808146, "learning_rate": 1.1683239344445645e-05, "loss": 3.439, "step": 6020 }, { "epoch": 1.3773207369188332, "grad_norm": 0.4554956182861621, "learning_rate": 1.1667567954029207e-05, "loss": 3.4385, "step": 6022 }, { "epoch": 1.3777780953878258, "grad_norm": 0.5593022475002563, "learning_rate": 1.1651903881318982e-05, "loss": 3.3735, "step": 6024 }, { "epoch": 1.3782354538568182, "grad_norm": 0.4305707004235215, "learning_rate": 1.1636247134912424e-05, "loss": 3.5962, "step": 6026 }, { "epoch": 1.3786928123258106, "grad_norm": 0.39219390514356306, "learning_rate": 1.1620597723402965e-05, "loss": 3.4084, "step": 6028 }, { "epoch": 1.3791501707948033, "grad_norm": 0.49675590899331423, "learning_rate": 1.160495565538004e-05, "loss": 3.5896, "step": 6030 }, { "epoch": 1.3796075292637957, "grad_norm": 0.5520864942448176, "learning_rate": 1.1589320939429021e-05, "loss": 3.4538, "step": 6032 }, { "epoch": 1.3800648877327883, "grad_norm": 0.42969313531128, "learning_rate": 1.1573693584131254e-05, "loss": 3.6293, "step": 6034 }, { "epoch": 1.3805222462017808, "grad_norm": 0.4873832073670207, "learning_rate": 1.1558073598064065e-05, "loss": 3.4379, "step": 6036 }, { "epoch": 1.3809796046707734, "grad_norm": 0.5020956289862012, "learning_rate": 1.1542460989800705e-05, "loss": 3.5981, "step": 6038 }, { "epoch": 1.3814369631397658, "grad_norm": 0.3599191774930033, "learning_rate": 1.1526855767910393e-05, "loss": 3.4284, "step": 6040 }, { "epoch": 1.3818943216087585, "grad_norm": 0.5605916659971756, "learning_rate": 1.1511257940958284e-05, "loss": 3.5445, "step": 6042 }, { "epoch": 1.3823516800777509, "grad_norm": 0.4673873167861074, "learning_rate": 1.149566751750549e-05, "loss": 3.4874, "step": 6044 }, { "epoch": 1.3828090385467435, "grad_norm": 0.5022625396249254, "learning_rate": 1.1480084506109052e-05, "loss": 3.4324, "step": 6046 }, { "epoch": 1.383266397015736, "grad_norm": 0.4238861325556137, "learning_rate": 1.146450891532192e-05, "loss": 3.3701, "step": 6048 }, { "epoch": 1.3837237554847286, "grad_norm": 0.4623631236151379, "learning_rate": 1.1448940753693016e-05, "loss": 3.446, "step": 6050 }, { "epoch": 1.384181113953721, "grad_norm": 0.5380824204899382, "learning_rate": 1.1433380029767152e-05, "loss": 3.5815, "step": 6052 }, { "epoch": 1.3846384724227137, "grad_norm": 0.5845877550800317, "learning_rate": 1.1417826752085054e-05, "loss": 3.5199, "step": 6054 }, { "epoch": 1.385095830891706, "grad_norm": 0.40945862068923666, "learning_rate": 1.1402280929183393e-05, "loss": 3.6676, "step": 6056 }, { "epoch": 1.3855531893606985, "grad_norm": 0.5056226610994882, "learning_rate": 1.1386742569594716e-05, "loss": 3.5651, "step": 6058 }, { "epoch": 1.3860105478296911, "grad_norm": 0.4187630565942159, "learning_rate": 1.137121168184748e-05, "loss": 3.3449, "step": 6060 }, { "epoch": 1.3864679062986838, "grad_norm": 0.46762727751555816, "learning_rate": 1.1355688274466054e-05, "loss": 3.3213, "step": 6062 }, { "epoch": 1.3869252647676762, "grad_norm": 0.42545213140840765, "learning_rate": 1.134017235597071e-05, "loss": 3.3284, "step": 6064 }, { "epoch": 1.3873826232366686, "grad_norm": 0.404622835715053, "learning_rate": 1.1324663934877564e-05, "loss": 3.4138, "step": 6066 }, { "epoch": 1.3878399817056613, "grad_norm": 0.42293947640655244, "learning_rate": 1.1309163019698668e-05, "loss": 3.3011, "step": 6068 }, { "epoch": 1.388297340174654, "grad_norm": 0.4672307507962137, "learning_rate": 1.1293669618941923e-05, "loss": 3.5037, "step": 6070 }, { "epoch": 1.3887546986436463, "grad_norm": 0.39228230808431097, "learning_rate": 1.1278183741111111e-05, "loss": 3.5117, "step": 6072 }, { "epoch": 1.3892120571126387, "grad_norm": 0.5034484558049963, "learning_rate": 1.1262705394705908e-05, "loss": 3.5832, "step": 6074 }, { "epoch": 1.3896694155816314, "grad_norm": 0.40855264985790674, "learning_rate": 1.1247234588221823e-05, "loss": 3.6008, "step": 6076 }, { "epoch": 1.3901267740506238, "grad_norm": 0.3892853462522474, "learning_rate": 1.1231771330150236e-05, "loss": 3.6244, "step": 6078 }, { "epoch": 1.3905841325196164, "grad_norm": 0.4304084971053582, "learning_rate": 1.1216315628978405e-05, "loss": 3.5124, "step": 6080 }, { "epoch": 1.3910414909886089, "grad_norm": 0.43886840915212805, "learning_rate": 1.1200867493189419e-05, "loss": 3.5157, "step": 6082 }, { "epoch": 1.3914988494576015, "grad_norm": 0.6147004005230378, "learning_rate": 1.1185426931262208e-05, "loss": 3.4957, "step": 6084 }, { "epoch": 1.391956207926594, "grad_norm": 0.5016273896260002, "learning_rate": 1.1169993951671576e-05, "loss": 3.4669, "step": 6086 }, { "epoch": 1.3924135663955866, "grad_norm": 0.41180186075236674, "learning_rate": 1.1154568562888134e-05, "loss": 3.3489, "step": 6088 }, { "epoch": 1.392870924864579, "grad_norm": 0.43552102666542025, "learning_rate": 1.1139150773378337e-05, "loss": 3.3033, "step": 6090 }, { "epoch": 1.3933282833335716, "grad_norm": 0.35319256642504393, "learning_rate": 1.1123740591604486e-05, "loss": 3.4925, "step": 6092 }, { "epoch": 1.393785641802564, "grad_norm": 0.5111265760076998, "learning_rate": 1.1108338026024681e-05, "loss": 3.3674, "step": 6094 }, { "epoch": 1.3942430002715565, "grad_norm": 0.5431868574783656, "learning_rate": 1.1092943085092853e-05, "loss": 3.3639, "step": 6096 }, { "epoch": 1.394700358740549, "grad_norm": 0.4163165548745906, "learning_rate": 1.1077555777258736e-05, "loss": 3.3506, "step": 6098 }, { "epoch": 1.3951577172095417, "grad_norm": 0.4532136994726007, "learning_rate": 1.106217611096791e-05, "loss": 3.5373, "step": 6100 }, { "epoch": 1.3956150756785342, "grad_norm": 0.4049584692983474, "learning_rate": 1.104680409466172e-05, "loss": 3.3672, "step": 6102 }, { "epoch": 1.3960724341475266, "grad_norm": 0.4437659160016295, "learning_rate": 1.1031439736777327e-05, "loss": 3.3063, "step": 6104 }, { "epoch": 1.3965297926165192, "grad_norm": 0.4762387817793702, "learning_rate": 1.1016083045747702e-05, "loss": 3.3887, "step": 6106 }, { "epoch": 1.3969871510855119, "grad_norm": 0.410012901324145, "learning_rate": 1.1000734030001591e-05, "loss": 3.4101, "step": 6108 }, { "epoch": 1.3974445095545043, "grad_norm": 0.4959363011692415, "learning_rate": 1.098539269796352e-05, "loss": 3.3632, "step": 6110 }, { "epoch": 1.3979018680234967, "grad_norm": 0.5119751149738712, "learning_rate": 1.0970059058053835e-05, "loss": 3.4098, "step": 6112 }, { "epoch": 1.3983592264924893, "grad_norm": 0.5855792921583796, "learning_rate": 1.095473311868862e-05, "loss": 3.3906, "step": 6114 }, { "epoch": 1.398816584961482, "grad_norm": 0.3885720474631175, "learning_rate": 1.0939414888279739e-05, "loss": 3.3693, "step": 6116 }, { "epoch": 1.3992739434304744, "grad_norm": 0.500584737275434, "learning_rate": 1.0924104375234856e-05, "loss": 3.3432, "step": 6118 }, { "epoch": 1.3997313018994668, "grad_norm": 0.7271053788159432, "learning_rate": 1.0908801587957365e-05, "loss": 3.5089, "step": 6120 }, { "epoch": 1.4001886603684595, "grad_norm": 0.40669378470006673, "learning_rate": 1.089350653484642e-05, "loss": 3.5053, "step": 6122 }, { "epoch": 1.4006460188374519, "grad_norm": 0.4016908700201258, "learning_rate": 1.0878219224296964e-05, "loss": 3.4473, "step": 6124 }, { "epoch": 1.4011033773064445, "grad_norm": 0.45544831113938766, "learning_rate": 1.0862939664699657e-05, "loss": 3.6021, "step": 6126 }, { "epoch": 1.401560735775437, "grad_norm": 0.431715876721248, "learning_rate": 1.0847667864440905e-05, "loss": 3.5142, "step": 6128 }, { "epoch": 1.4020180942444296, "grad_norm": 0.46667470889598156, "learning_rate": 1.0832403831902885e-05, "loss": 3.4459, "step": 6130 }, { "epoch": 1.402475452713422, "grad_norm": 0.45884715524859715, "learning_rate": 1.0817147575463482e-05, "loss": 3.4485, "step": 6132 }, { "epoch": 1.4029328111824146, "grad_norm": 0.3886280130911874, "learning_rate": 1.0801899103496313e-05, "loss": 3.4839, "step": 6134 }, { "epoch": 1.403390169651407, "grad_norm": 0.40463961019645417, "learning_rate": 1.0786658424370748e-05, "loss": 3.3989, "step": 6136 }, { "epoch": 1.4038475281203997, "grad_norm": 0.3960334017073638, "learning_rate": 1.0771425546451857e-05, "loss": 3.6125, "step": 6138 }, { "epoch": 1.4043048865893921, "grad_norm": 0.5168383511762327, "learning_rate": 1.0756200478100426e-05, "loss": 3.3351, "step": 6140 }, { "epoch": 1.4047622450583845, "grad_norm": 0.41112322580935345, "learning_rate": 1.0740983227672979e-05, "loss": 3.375, "step": 6142 }, { "epoch": 1.4052196035273772, "grad_norm": 0.39234336552208005, "learning_rate": 1.0725773803521725e-05, "loss": 3.5064, "step": 6144 }, { "epoch": 1.4056769619963698, "grad_norm": 0.47219618167634053, "learning_rate": 1.0710572213994577e-05, "loss": 3.3835, "step": 6146 }, { "epoch": 1.4061343204653622, "grad_norm": 0.5632181351812889, "learning_rate": 1.0695378467435175e-05, "loss": 3.4843, "step": 6148 }, { "epoch": 1.4065916789343547, "grad_norm": 0.4182574977122916, "learning_rate": 1.0680192572182824e-05, "loss": 3.6276, "step": 6150 }, { "epoch": 1.4070490374033473, "grad_norm": 0.48980367213660464, "learning_rate": 1.0665014536572531e-05, "loss": 3.3915, "step": 6152 }, { "epoch": 1.40750639587234, "grad_norm": 0.5023044588613698, "learning_rate": 1.0649844368934986e-05, "loss": 3.5561, "step": 6154 }, { "epoch": 1.4079637543413324, "grad_norm": 0.4576343892959544, "learning_rate": 1.0634682077596577e-05, "loss": 3.5892, "step": 6156 }, { "epoch": 1.4084211128103248, "grad_norm": 0.42570122132949423, "learning_rate": 1.061952767087935e-05, "loss": 3.4972, "step": 6158 }, { "epoch": 1.4088784712793174, "grad_norm": 0.4167698573442946, "learning_rate": 1.0604381157101021e-05, "loss": 3.4793, "step": 6160 }, { "epoch": 1.4093358297483098, "grad_norm": 0.36124495502763887, "learning_rate": 1.0589242544574995e-05, "loss": 3.4585, "step": 6162 }, { "epoch": 1.4097931882173025, "grad_norm": 0.3979304396990232, "learning_rate": 1.0574111841610326e-05, "loss": 3.472, "step": 6164 }, { "epoch": 1.410250546686295, "grad_norm": 0.4542873535732895, "learning_rate": 1.0558989056511712e-05, "loss": 3.4347, "step": 6166 }, { "epoch": 1.4107079051552875, "grad_norm": 0.43077800393913773, "learning_rate": 1.0543874197579545e-05, "loss": 3.4292, "step": 6168 }, { "epoch": 1.41116526362428, "grad_norm": 0.4422136871886983, "learning_rate": 1.0528767273109829e-05, "loss": 3.5421, "step": 6170 }, { "epoch": 1.4116226220932726, "grad_norm": 0.4402119337182308, "learning_rate": 1.0513668291394224e-05, "loss": 3.4575, "step": 6172 }, { "epoch": 1.412079980562265, "grad_norm": 0.5091571025865834, "learning_rate": 1.049857726072005e-05, "loss": 3.5607, "step": 6174 }, { "epoch": 1.4125373390312577, "grad_norm": 0.47978310393382295, "learning_rate": 1.0483494189370232e-05, "loss": 3.3648, "step": 6176 }, { "epoch": 1.41299469750025, "grad_norm": 0.40488019501667644, "learning_rate": 1.0468419085623338e-05, "loss": 3.42, "step": 6178 }, { "epoch": 1.4134520559692425, "grad_norm": 0.4405601122268259, "learning_rate": 1.0453351957753582e-05, "loss": 3.5965, "step": 6180 }, { "epoch": 1.4139094144382351, "grad_norm": 0.4182153313115168, "learning_rate": 1.0438292814030776e-05, "loss": 3.4234, "step": 6182 }, { "epoch": 1.4143667729072278, "grad_norm": 0.44232795493095595, "learning_rate": 1.0423241662720347e-05, "loss": 3.4426, "step": 6184 }, { "epoch": 1.4148241313762202, "grad_norm": 0.4906750252604982, "learning_rate": 1.0408198512083368e-05, "loss": 3.4274, "step": 6186 }, { "epoch": 1.4152814898452126, "grad_norm": 0.534597843396439, "learning_rate": 1.0393163370376482e-05, "loss": 3.4658, "step": 6188 }, { "epoch": 1.4157388483142053, "grad_norm": 0.40160093902785865, "learning_rate": 1.0378136245851952e-05, "loss": 3.3557, "step": 6190 }, { "epoch": 1.416196206783198, "grad_norm": 0.5168483555671193, "learning_rate": 1.036311714675766e-05, "loss": 3.5708, "step": 6192 }, { "epoch": 1.4166535652521903, "grad_norm": 0.4660114719816551, "learning_rate": 1.0348106081337047e-05, "loss": 3.4053, "step": 6194 }, { "epoch": 1.4171109237211827, "grad_norm": 0.4939256283302286, "learning_rate": 1.0333103057829163e-05, "loss": 3.4905, "step": 6196 }, { "epoch": 1.4175682821901754, "grad_norm": 0.4986968941009901, "learning_rate": 1.0318108084468655e-05, "loss": 3.5067, "step": 6198 }, { "epoch": 1.418025640659168, "grad_norm": 0.4923991550516166, "learning_rate": 1.0303121169485735e-05, "loss": 3.4997, "step": 6200 }, { "epoch": 1.4184829991281604, "grad_norm": 0.48982403972583977, "learning_rate": 1.0288142321106186e-05, "loss": 3.4245, "step": 6202 }, { "epoch": 1.4189403575971529, "grad_norm": 0.4312772818242469, "learning_rate": 1.0273171547551397e-05, "loss": 3.4812, "step": 6204 }, { "epoch": 1.4193977160661455, "grad_norm": 0.4572429492723856, "learning_rate": 1.0258208857038286e-05, "loss": 3.5011, "step": 6206 }, { "epoch": 1.419855074535138, "grad_norm": 0.4387856734138229, "learning_rate": 1.0243254257779348e-05, "loss": 3.4793, "step": 6208 }, { "epoch": 1.4203124330041306, "grad_norm": 0.41000728626871275, "learning_rate": 1.0228307757982655e-05, "loss": 3.5296, "step": 6210 }, { "epoch": 1.420769791473123, "grad_norm": 0.511337691670005, "learning_rate": 1.0213369365851814e-05, "loss": 3.3191, "step": 6212 }, { "epoch": 1.4212271499421156, "grad_norm": 0.5145670412035341, "learning_rate": 1.019843908958598e-05, "loss": 3.3889, "step": 6214 }, { "epoch": 1.421684508411108, "grad_norm": 0.4451777605702211, "learning_rate": 1.0183516937379855e-05, "loss": 3.4314, "step": 6216 }, { "epoch": 1.4221418668801007, "grad_norm": 0.6696446014563231, "learning_rate": 1.0168602917423706e-05, "loss": 3.402, "step": 6218 }, { "epoch": 1.422599225349093, "grad_norm": 0.4849339613246409, "learning_rate": 1.0153697037903306e-05, "loss": 3.3412, "step": 6220 }, { "epoch": 1.4230565838180858, "grad_norm": 0.5087061135298326, "learning_rate": 1.0138799306999964e-05, "loss": 3.4259, "step": 6222 }, { "epoch": 1.4235139422870782, "grad_norm": 0.5094211335148858, "learning_rate": 1.012390973289054e-05, "loss": 3.7094, "step": 6224 }, { "epoch": 1.4239713007560706, "grad_norm": 0.42473780664586536, "learning_rate": 1.0109028323747396e-05, "loss": 3.6283, "step": 6226 }, { "epoch": 1.4244286592250632, "grad_norm": 0.39607747813853716, "learning_rate": 1.0094155087738408e-05, "loss": 3.4801, "step": 6228 }, { "epoch": 1.4248860176940559, "grad_norm": 0.5993976281263297, "learning_rate": 1.0079290033026992e-05, "loss": 3.4085, "step": 6230 }, { "epoch": 1.4253433761630483, "grad_norm": 0.5450516139534358, "learning_rate": 1.0064433167772045e-05, "loss": 3.5812, "step": 6232 }, { "epoch": 1.4258007346320407, "grad_norm": 0.5335490742826684, "learning_rate": 1.0049584500127979e-05, "loss": 3.4172, "step": 6234 }, { "epoch": 1.4262580931010334, "grad_norm": 0.6300068104845109, "learning_rate": 1.0034744038244723e-05, "loss": 3.4902, "step": 6236 }, { "epoch": 1.426715451570026, "grad_norm": 0.5622238965045155, "learning_rate": 1.0019911790267678e-05, "loss": 3.4984, "step": 6238 }, { "epoch": 1.4271728100390184, "grad_norm": 0.46367281405089494, "learning_rate": 1.0005087764337742e-05, "loss": 3.5999, "step": 6240 }, { "epoch": 1.4276301685080108, "grad_norm": 0.5229652039037539, "learning_rate": 9.990271968591319e-06, "loss": 3.3632, "step": 6242 }, { "epoch": 1.4280875269770035, "grad_norm": 0.4319817071414837, "learning_rate": 9.975464411160277e-06, "loss": 3.3781, "step": 6244 }, { "epoch": 1.428544885445996, "grad_norm": 0.42167584698877475, "learning_rate": 9.960665100171951e-06, "loss": 3.5543, "step": 6246 }, { "epoch": 1.4290022439149885, "grad_norm": 0.48117769862443066, "learning_rate": 9.94587404374919e-06, "loss": 3.3767, "step": 6248 }, { "epoch": 1.429459602383981, "grad_norm": 0.5914224629970294, "learning_rate": 9.93109125001028e-06, "loss": 3.4452, "step": 6250 }, { "epoch": 1.4299169608529736, "grad_norm": 0.4469410914118218, "learning_rate": 9.916316727068964e-06, "loss": 3.3262, "step": 6252 }, { "epoch": 1.430374319321966, "grad_norm": 0.4360629390054412, "learning_rate": 9.901550483034486e-06, "loss": 3.419, "step": 6254 }, { "epoch": 1.4308316777909587, "grad_norm": 0.4028683177903958, "learning_rate": 9.886792526011512e-06, "loss": 3.4096, "step": 6256 }, { "epoch": 1.431289036259951, "grad_norm": 0.5373167157950128, "learning_rate": 9.872042864100156e-06, "loss": 3.4823, "step": 6258 }, { "epoch": 1.4317463947289437, "grad_norm": 0.49887945856497773, "learning_rate": 9.857301505396014e-06, "loss": 3.4426, "step": 6260 }, { "epoch": 1.4322037531979361, "grad_norm": 0.4519821586284205, "learning_rate": 9.842568457990092e-06, "loss": 3.4498, "step": 6262 }, { "epoch": 1.4326611116669288, "grad_norm": 0.3586355978622111, "learning_rate": 9.827843729968835e-06, "loss": 3.4746, "step": 6264 }, { "epoch": 1.4331184701359212, "grad_norm": 0.5109670228040646, "learning_rate": 9.813127329414154e-06, "loss": 3.5117, "step": 6266 }, { "epoch": 1.4335758286049138, "grad_norm": 0.5045803204275072, "learning_rate": 9.798419264403355e-06, "loss": 3.414, "step": 6268 }, { "epoch": 1.4340331870739063, "grad_norm": 0.5156979942673341, "learning_rate": 9.783719543009181e-06, "loss": 3.5018, "step": 6270 }, { "epoch": 1.4344905455428987, "grad_norm": 0.383978314123897, "learning_rate": 9.769028173299787e-06, "loss": 3.3099, "step": 6272 }, { "epoch": 1.4349479040118913, "grad_norm": 0.48467722047472367, "learning_rate": 9.754345163338771e-06, "loss": 3.3487, "step": 6274 }, { "epoch": 1.435405262480884, "grad_norm": 0.5149098841515835, "learning_rate": 9.739670521185116e-06, "loss": 3.3998, "step": 6276 }, { "epoch": 1.4358626209498764, "grad_norm": 0.5153654485627465, "learning_rate": 9.72500425489321e-06, "loss": 3.3177, "step": 6278 }, { "epoch": 1.4363199794188688, "grad_norm": 0.43644336418332225, "learning_rate": 9.710346372512871e-06, "loss": 3.3762, "step": 6280 }, { "epoch": 1.4367773378878614, "grad_norm": 0.41397624646910347, "learning_rate": 9.695696882089295e-06, "loss": 3.488, "step": 6282 }, { "epoch": 1.437234696356854, "grad_norm": 0.4211853359465133, "learning_rate": 9.68105579166306e-06, "loss": 3.4118, "step": 6284 }, { "epoch": 1.4376920548258465, "grad_norm": 0.5390438131880524, "learning_rate": 9.66642310927017e-06, "loss": 3.2008, "step": 6286 }, { "epoch": 1.438149413294839, "grad_norm": 0.5975295786539215, "learning_rate": 9.651798842941986e-06, "loss": 3.428, "step": 6288 }, { "epoch": 1.4386067717638316, "grad_norm": 0.5074261295883207, "learning_rate": 9.637183000705243e-06, "loss": 3.5797, "step": 6290 }, { "epoch": 1.439064130232824, "grad_norm": 0.4269668800253306, "learning_rate": 9.62257559058209e-06, "loss": 3.357, "step": 6292 }, { "epoch": 1.4395214887018166, "grad_norm": 0.42506239517181454, "learning_rate": 9.607976620590011e-06, "loss": 3.3733, "step": 6294 }, { "epoch": 1.439978847170809, "grad_norm": 0.38158939529219676, "learning_rate": 9.593386098741863e-06, "loss": 3.4046, "step": 6296 }, { "epoch": 1.4404362056398017, "grad_norm": 0.4954556544079114, "learning_rate": 9.578804033045894e-06, "loss": 3.5064, "step": 6298 }, { "epoch": 1.440893564108794, "grad_norm": 0.5045509649720952, "learning_rate": 9.564230431505678e-06, "loss": 3.4745, "step": 6300 }, { "epoch": 1.4413509225777867, "grad_norm": 0.5113892404330705, "learning_rate": 9.549665302120146e-06, "loss": 3.4637, "step": 6302 }, { "epoch": 1.4418082810467792, "grad_norm": 0.5515680307429802, "learning_rate": 9.535108652883609e-06, "loss": 3.3769, "step": 6304 }, { "epoch": 1.4422656395157718, "grad_norm": 0.3949742804073016, "learning_rate": 9.520560491785697e-06, "loss": 3.4391, "step": 6306 }, { "epoch": 1.4427229979847642, "grad_norm": 0.5208269754657517, "learning_rate": 9.506020826811374e-06, "loss": 3.4687, "step": 6308 }, { "epoch": 1.4431803564537566, "grad_norm": 0.4700577296149496, "learning_rate": 9.491489665940973e-06, "loss": 3.534, "step": 6310 }, { "epoch": 1.4436377149227493, "grad_norm": 0.4605007041714531, "learning_rate": 9.476967017150133e-06, "loss": 3.3965, "step": 6312 }, { "epoch": 1.444095073391742, "grad_norm": 0.49190772142839073, "learning_rate": 9.462452888409817e-06, "loss": 3.4738, "step": 6314 }, { "epoch": 1.4445524318607343, "grad_norm": 0.386418177616872, "learning_rate": 9.447947287686343e-06, "loss": 3.396, "step": 6316 }, { "epoch": 1.4450097903297268, "grad_norm": 0.5819407655159109, "learning_rate": 9.433450222941323e-06, "loss": 3.5124, "step": 6318 }, { "epoch": 1.4454671487987194, "grad_norm": 0.4583546153098236, "learning_rate": 9.418961702131674e-06, "loss": 3.4747, "step": 6320 }, { "epoch": 1.445924507267712, "grad_norm": 0.42659316389376, "learning_rate": 9.40448173320966e-06, "loss": 3.4891, "step": 6322 }, { "epoch": 1.4463818657367045, "grad_norm": 0.485086318671975, "learning_rate": 9.390010324122823e-06, "loss": 3.5511, "step": 6324 }, { "epoch": 1.4468392242056969, "grad_norm": 0.693170613128674, "learning_rate": 9.375547482814002e-06, "loss": 3.3355, "step": 6326 }, { "epoch": 1.4472965826746895, "grad_norm": 0.4839810338185941, "learning_rate": 9.361093217221362e-06, "loss": 3.4905, "step": 6328 }, { "epoch": 1.447753941143682, "grad_norm": 0.434528563133473, "learning_rate": 9.346647535278339e-06, "loss": 3.5061, "step": 6330 }, { "epoch": 1.4482112996126746, "grad_norm": 0.5454105114231302, "learning_rate": 9.332210444913662e-06, "loss": 3.4443, "step": 6332 }, { "epoch": 1.448668658081667, "grad_norm": 0.3753376473824839, "learning_rate": 9.317781954051339e-06, "loss": 3.4218, "step": 6334 }, { "epoch": 1.4491260165506596, "grad_norm": 0.43559174242411564, "learning_rate": 9.303362070610676e-06, "loss": 3.4371, "step": 6336 }, { "epoch": 1.449583375019652, "grad_norm": 0.468874724007436, "learning_rate": 9.288950802506241e-06, "loss": 3.4705, "step": 6338 }, { "epoch": 1.4500407334886447, "grad_norm": 0.5702554857156571, "learning_rate": 9.274548157647863e-06, "loss": 3.4651, "step": 6340 }, { "epoch": 1.4504980919576371, "grad_norm": 0.422339907313629, "learning_rate": 9.26015414394067e-06, "loss": 3.503, "step": 6342 }, { "epoch": 1.4509554504266298, "grad_norm": 0.5488849216816315, "learning_rate": 9.245768769285024e-06, "loss": 3.5285, "step": 6344 }, { "epoch": 1.4514128088956222, "grad_norm": 0.4207483718894969, "learning_rate": 9.231392041576545e-06, "loss": 3.5988, "step": 6346 }, { "epoch": 1.4518701673646148, "grad_norm": 0.46176977504979266, "learning_rate": 9.217023968706135e-06, "loss": 3.2535, "step": 6348 }, { "epoch": 1.4523275258336072, "grad_norm": 0.48540631605199563, "learning_rate": 9.202664558559918e-06, "loss": 3.6156, "step": 6350 }, { "epoch": 1.4527848843025999, "grad_norm": 0.43079246285020567, "learning_rate": 9.188313819019264e-06, "loss": 3.4641, "step": 6352 }, { "epoch": 1.4532422427715923, "grad_norm": 0.42369103741663827, "learning_rate": 9.173971757960814e-06, "loss": 3.4859, "step": 6354 }, { "epoch": 1.4536996012405847, "grad_norm": 0.5107380445615013, "learning_rate": 9.159638383256408e-06, "loss": 3.4897, "step": 6356 }, { "epoch": 1.4541569597095774, "grad_norm": 0.44768867308683824, "learning_rate": 9.14531370277313e-06, "loss": 3.5805, "step": 6358 }, { "epoch": 1.45461431817857, "grad_norm": 0.45148155186436023, "learning_rate": 9.130997724373316e-06, "loss": 3.5815, "step": 6360 }, { "epoch": 1.4550716766475624, "grad_norm": 0.5662918238320225, "learning_rate": 9.116690455914496e-06, "loss": 3.3342, "step": 6362 }, { "epoch": 1.4555290351165548, "grad_norm": 0.3875268526925196, "learning_rate": 9.102391905249422e-06, "loss": 3.4452, "step": 6364 }, { "epoch": 1.4559863935855475, "grad_norm": 0.5102764799974514, "learning_rate": 9.088102080226083e-06, "loss": 3.5039, "step": 6366 }, { "epoch": 1.4564437520545401, "grad_norm": 0.5371299600991387, "learning_rate": 9.073820988687657e-06, "loss": 3.4977, "step": 6368 }, { "epoch": 1.4569011105235325, "grad_norm": 0.5034669247344906, "learning_rate": 9.05954863847253e-06, "loss": 3.4002, "step": 6370 }, { "epoch": 1.457358468992525, "grad_norm": 0.45753039303387927, "learning_rate": 9.045285037414315e-06, "loss": 3.4735, "step": 6372 }, { "epoch": 1.4578158274615176, "grad_norm": 0.4879583282774723, "learning_rate": 9.031030193341792e-06, "loss": 3.398, "step": 6374 }, { "epoch": 1.45827318593051, "grad_norm": 0.6142950728961389, "learning_rate": 9.016784114078939e-06, "loss": 3.5007, "step": 6376 }, { "epoch": 1.4587305443995027, "grad_norm": 0.4936091765444133, "learning_rate": 9.002546807444948e-06, "loss": 3.4799, "step": 6378 }, { "epoch": 1.459187902868495, "grad_norm": 0.3910691025603187, "learning_rate": 8.988318281254174e-06, "loss": 3.5, "step": 6380 }, { "epoch": 1.4596452613374877, "grad_norm": 0.8470678440697476, "learning_rate": 8.974098543316147e-06, "loss": 3.4247, "step": 6382 }, { "epoch": 1.4601026198064802, "grad_norm": 0.46300963791255917, "learning_rate": 8.959887601435602e-06, "loss": 3.3488, "step": 6384 }, { "epoch": 1.4605599782754728, "grad_norm": 0.6024262803433222, "learning_rate": 8.94568546341242e-06, "loss": 3.3715, "step": 6386 }, { "epoch": 1.4610173367444652, "grad_norm": 0.4370922504578511, "learning_rate": 8.931492137041658e-06, "loss": 3.6713, "step": 6388 }, { "epoch": 1.4614746952134579, "grad_norm": 0.4461245577941738, "learning_rate": 8.917307630113528e-06, "loss": 3.3796, "step": 6390 }, { "epoch": 1.4619320536824503, "grad_norm": 0.4651418579509632, "learning_rate": 8.90313195041343e-06, "loss": 3.5215, "step": 6392 }, { "epoch": 1.4623894121514427, "grad_norm": 0.5280423940577654, "learning_rate": 8.888965105721885e-06, "loss": 3.2994, "step": 6394 }, { "epoch": 1.4628467706204353, "grad_norm": 0.5727472459384703, "learning_rate": 8.874807103814578e-06, "loss": 3.6571, "step": 6396 }, { "epoch": 1.463304129089428, "grad_norm": 0.4727786039769676, "learning_rate": 8.860657952462353e-06, "loss": 3.5401, "step": 6398 }, { "epoch": 1.4637614875584204, "grad_norm": 0.49823936781614364, "learning_rate": 8.846517659431181e-06, "loss": 3.4918, "step": 6400 }, { "epoch": 1.4642188460274128, "grad_norm": 0.4411162208457492, "learning_rate": 8.832386232482165e-06, "loss": 3.4517, "step": 6402 }, { "epoch": 1.4646762044964055, "grad_norm": 0.4718427830239078, "learning_rate": 8.818263679371566e-06, "loss": 3.4191, "step": 6404 }, { "epoch": 1.465133562965398, "grad_norm": 0.45990903525090027, "learning_rate": 8.804150007850753e-06, "loss": 3.4443, "step": 6406 }, { "epoch": 1.4655909214343905, "grad_norm": 0.4983912305970846, "learning_rate": 8.790045225666218e-06, "loss": 3.5946, "step": 6408 }, { "epoch": 1.466048279903383, "grad_norm": 0.48996534222533855, "learning_rate": 8.775949340559602e-06, "loss": 3.3701, "step": 6410 }, { "epoch": 1.4665056383723756, "grad_norm": 0.5082230009140336, "learning_rate": 8.761862360267637e-06, "loss": 3.1957, "step": 6412 }, { "epoch": 1.466962996841368, "grad_norm": 0.41126013662142036, "learning_rate": 8.747784292522163e-06, "loss": 3.4586, "step": 6414 }, { "epoch": 1.4674203553103606, "grad_norm": 0.5720950708993424, "learning_rate": 8.733715145050154e-06, "loss": 3.4871, "step": 6416 }, { "epoch": 1.467877713779353, "grad_norm": 0.4355300250022835, "learning_rate": 8.71965492557367e-06, "loss": 3.3824, "step": 6418 }, { "epoch": 1.4683350722483457, "grad_norm": 0.44830021035965717, "learning_rate": 8.705603641809864e-06, "loss": 3.5372, "step": 6420 }, { "epoch": 1.4687924307173381, "grad_norm": 0.5175003569136416, "learning_rate": 8.691561301471013e-06, "loss": 3.515, "step": 6422 }, { "epoch": 1.4692497891863308, "grad_norm": 0.40017584645862225, "learning_rate": 8.677527912264456e-06, "loss": 3.5538, "step": 6424 }, { "epoch": 1.4697071476553232, "grad_norm": 0.443139546885964, "learning_rate": 8.663503481892623e-06, "loss": 3.6154, "step": 6426 }, { "epoch": 1.4701645061243158, "grad_norm": 0.41584570194851816, "learning_rate": 8.64948801805305e-06, "loss": 3.5373, "step": 6428 }, { "epoch": 1.4706218645933082, "grad_norm": 0.45905780610657476, "learning_rate": 8.63548152843833e-06, "loss": 3.514, "step": 6430 }, { "epoch": 1.4710792230623009, "grad_norm": 0.4964018037985336, "learning_rate": 8.621484020736124e-06, "loss": 3.5874, "step": 6432 }, { "epoch": 1.4715365815312933, "grad_norm": 0.45344052345955704, "learning_rate": 8.607495502629193e-06, "loss": 3.5999, "step": 6434 }, { "epoch": 1.471993940000286, "grad_norm": 0.4561551981897701, "learning_rate": 8.593515981795338e-06, "loss": 3.4204, "step": 6436 }, { "epoch": 1.4724512984692784, "grad_norm": 0.5631405838599899, "learning_rate": 8.579545465907421e-06, "loss": 3.592, "step": 6438 }, { "epoch": 1.4729086569382708, "grad_norm": 0.4286564507758937, "learning_rate": 8.565583962633388e-06, "loss": 3.5819, "step": 6440 }, { "epoch": 1.4733660154072634, "grad_norm": 0.47836129950248857, "learning_rate": 8.55163147963621e-06, "loss": 3.469, "step": 6442 }, { "epoch": 1.473823373876256, "grad_norm": 0.5518888604830983, "learning_rate": 8.537688024573908e-06, "loss": 3.5609, "step": 6444 }, { "epoch": 1.4742807323452485, "grad_norm": 0.4292177406801869, "learning_rate": 8.52375360509958e-06, "loss": 3.4468, "step": 6446 }, { "epoch": 1.474738090814241, "grad_norm": 0.47353533498768924, "learning_rate": 8.509828228861327e-06, "loss": 3.4419, "step": 6448 }, { "epoch": 1.4751954492832335, "grad_norm": 0.39475174209640346, "learning_rate": 8.495911903502304e-06, "loss": 3.5039, "step": 6450 }, { "epoch": 1.4756528077522262, "grad_norm": 0.6354796399247307, "learning_rate": 8.48200463666069e-06, "loss": 3.5933, "step": 6452 }, { "epoch": 1.4761101662212186, "grad_norm": 0.6627995542550645, "learning_rate": 8.46810643596971e-06, "loss": 3.7007, "step": 6454 }, { "epoch": 1.476567524690211, "grad_norm": 0.40775586681366965, "learning_rate": 8.454217309057597e-06, "loss": 3.5398, "step": 6456 }, { "epoch": 1.4770248831592037, "grad_norm": 0.5216849866317288, "learning_rate": 8.440337263547599e-06, "loss": 3.4679, "step": 6458 }, { "epoch": 1.477482241628196, "grad_norm": 0.3667265250735279, "learning_rate": 8.426466307058e-06, "loss": 3.4351, "step": 6460 }, { "epoch": 1.4779396000971887, "grad_norm": 0.38212260536713855, "learning_rate": 8.412604447202077e-06, "loss": 3.6374, "step": 6462 }, { "epoch": 1.4783969585661811, "grad_norm": 0.4631036517722299, "learning_rate": 8.398751691588116e-06, "loss": 3.3477, "step": 6464 }, { "epoch": 1.4788543170351738, "grad_norm": 0.4891255670169358, "learning_rate": 8.384908047819426e-06, "loss": 3.4095, "step": 6466 }, { "epoch": 1.4793116755041662, "grad_norm": 0.5163069463638279, "learning_rate": 8.371073523494288e-06, "loss": 3.3759, "step": 6468 }, { "epoch": 1.4797690339731588, "grad_norm": 0.4282397067808818, "learning_rate": 8.357248126205983e-06, "loss": 3.5233, "step": 6470 }, { "epoch": 1.4802263924421513, "grad_norm": 0.43513205579139896, "learning_rate": 8.343431863542805e-06, "loss": 3.4706, "step": 6472 }, { "epoch": 1.480683750911144, "grad_norm": 0.47526917297100457, "learning_rate": 8.329624743088008e-06, "loss": 3.3804, "step": 6474 }, { "epoch": 1.4811411093801363, "grad_norm": 0.47500153026836245, "learning_rate": 8.315826772419833e-06, "loss": 3.3489, "step": 6476 }, { "epoch": 1.4815984678491287, "grad_norm": 0.5028261541073864, "learning_rate": 8.302037959111519e-06, "loss": 3.4595, "step": 6478 }, { "epoch": 1.4820558263181214, "grad_norm": 0.49818678741920974, "learning_rate": 8.288258310731257e-06, "loss": 3.3465, "step": 6480 }, { "epoch": 1.482513184787114, "grad_norm": 0.5894651650626799, "learning_rate": 8.274487834842201e-06, "loss": 3.3575, "step": 6482 }, { "epoch": 1.4829705432561064, "grad_norm": 0.4781635468346734, "learning_rate": 8.260726539002509e-06, "loss": 3.33, "step": 6484 }, { "epoch": 1.4834279017250989, "grad_norm": 0.5924253242852057, "learning_rate": 8.246974430765261e-06, "loss": 3.492, "step": 6486 }, { "epoch": 1.4838852601940915, "grad_norm": 0.6158842983329786, "learning_rate": 8.233231517678503e-06, "loss": 3.3905, "step": 6488 }, { "epoch": 1.4843426186630841, "grad_norm": 0.4264226160092272, "learning_rate": 8.219497807285257e-06, "loss": 3.4153, "step": 6490 }, { "epoch": 1.4847999771320766, "grad_norm": 0.6356612943641646, "learning_rate": 8.205773307123465e-06, "loss": 3.3048, "step": 6492 }, { "epoch": 1.485257335601069, "grad_norm": 0.45983721452035775, "learning_rate": 8.19205802472602e-06, "loss": 3.5514, "step": 6494 }, { "epoch": 1.4857146940700616, "grad_norm": 0.5109531411859536, "learning_rate": 8.178351967620781e-06, "loss": 3.4142, "step": 6496 }, { "epoch": 1.486172052539054, "grad_norm": 0.5644445451658556, "learning_rate": 8.164655143330513e-06, "loss": 3.4462, "step": 6498 }, { "epoch": 1.4866294110080467, "grad_norm": 0.39378643548507525, "learning_rate": 8.150967559372913e-06, "loss": 3.4236, "step": 6500 }, { "epoch": 1.487086769477039, "grad_norm": 0.48485243248303267, "learning_rate": 8.137289223260638e-06, "loss": 3.4356, "step": 6502 }, { "epoch": 1.4875441279460317, "grad_norm": 0.44493292643257715, "learning_rate": 8.12362014250124e-06, "loss": 3.3941, "step": 6504 }, { "epoch": 1.4880014864150242, "grad_norm": 0.6088298657878365, "learning_rate": 8.109960324597199e-06, "loss": 3.5004, "step": 6506 }, { "epoch": 1.4884588448840168, "grad_norm": 0.49121183239834454, "learning_rate": 8.096309777045905e-06, "loss": 3.4518, "step": 6508 }, { "epoch": 1.4889162033530092, "grad_norm": 0.5272183869725625, "learning_rate": 8.08266850733968e-06, "loss": 3.5646, "step": 6510 }, { "epoch": 1.4893735618220019, "grad_norm": 0.4480886591191078, "learning_rate": 8.069036522965736e-06, "loss": 3.4556, "step": 6512 }, { "epoch": 1.4898309202909943, "grad_norm": 0.46144788578110013, "learning_rate": 8.055413831406184e-06, "loss": 3.5695, "step": 6514 }, { "epoch": 1.490288278759987, "grad_norm": 0.4093621696034024, "learning_rate": 8.041800440138058e-06, "loss": 3.3417, "step": 6516 }, { "epoch": 1.4907456372289793, "grad_norm": 0.6152135936315386, "learning_rate": 8.028196356633266e-06, "loss": 3.3301, "step": 6518 }, { "epoch": 1.491202995697972, "grad_norm": 0.41704541119071387, "learning_rate": 8.014601588358608e-06, "loss": 3.5537, "step": 6520 }, { "epoch": 1.4916603541669644, "grad_norm": 0.46559739127009997, "learning_rate": 8.001016142775788e-06, "loss": 3.4938, "step": 6522 }, { "epoch": 1.4921177126359568, "grad_norm": 0.4376449685377457, "learning_rate": 7.98744002734138e-06, "loss": 3.5023, "step": 6524 }, { "epoch": 1.4925750711049495, "grad_norm": 0.47894632069159815, "learning_rate": 7.97387324950683e-06, "loss": 3.3456, "step": 6526 }, { "epoch": 1.493032429573942, "grad_norm": 0.4489516085882967, "learning_rate": 7.960315816718483e-06, "loss": 3.3587, "step": 6528 }, { "epoch": 1.4934897880429345, "grad_norm": 0.30033413984708707, "learning_rate": 7.946767736417535e-06, "loss": 3.3355, "step": 6530 }, { "epoch": 1.493947146511927, "grad_norm": 0.805109117291754, "learning_rate": 7.933229016040043e-06, "loss": 3.3496, "step": 6532 }, { "epoch": 1.4944045049809196, "grad_norm": 0.4969319149025408, "learning_rate": 7.919699663016956e-06, "loss": 3.4321, "step": 6534 }, { "epoch": 1.4948618634499122, "grad_norm": 0.48150133035303694, "learning_rate": 7.90617968477405e-06, "loss": 3.4819, "step": 6536 }, { "epoch": 1.4953192219189047, "grad_norm": 0.5032347115261389, "learning_rate": 7.892669088731968e-06, "loss": 3.4152, "step": 6538 }, { "epoch": 1.495776580387897, "grad_norm": 0.44636099337947666, "learning_rate": 7.879167882306219e-06, "loss": 3.5968, "step": 6540 }, { "epoch": 1.4962339388568897, "grad_norm": 0.4906202978163948, "learning_rate": 7.86567607290713e-06, "loss": 3.2416, "step": 6542 }, { "epoch": 1.4966912973258821, "grad_norm": 0.5964370958618087, "learning_rate": 7.85219366793988e-06, "loss": 3.3545, "step": 6544 }, { "epoch": 1.4971486557948748, "grad_norm": 0.5201539058523621, "learning_rate": 7.838720674804506e-06, "loss": 3.6601, "step": 6546 }, { "epoch": 1.4976060142638672, "grad_norm": 0.43946156811550674, "learning_rate": 7.825257100895858e-06, "loss": 3.6176, "step": 6548 }, { "epoch": 1.4980633727328598, "grad_norm": 0.5810495071821337, "learning_rate": 7.811802953603606e-06, "loss": 3.3713, "step": 6550 }, { "epoch": 1.4985207312018523, "grad_norm": 0.501199029675935, "learning_rate": 7.798358240312285e-06, "loss": 3.4941, "step": 6552 }, { "epoch": 1.498978089670845, "grad_norm": 0.4833644662345073, "learning_rate": 7.784922968401214e-06, "loss": 3.5278, "step": 6554 }, { "epoch": 1.4994354481398373, "grad_norm": 0.4741123406872166, "learning_rate": 7.771497145244541e-06, "loss": 3.4167, "step": 6556 }, { "epoch": 1.49989280660883, "grad_norm": 0.5482715757815068, "learning_rate": 7.758080778211246e-06, "loss": 3.495, "step": 6558 }, { "epoch": 1.5003501650778224, "grad_norm": 0.47757892403269764, "learning_rate": 7.744673874665095e-06, "loss": 3.4962, "step": 6560 }, { "epoch": 1.5008075235468148, "grad_norm": 0.5218681323576094, "learning_rate": 7.731276441964671e-06, "loss": 3.3993, "step": 6562 }, { "epoch": 1.5012648820158074, "grad_norm": 0.43620725783029873, "learning_rate": 7.717888487463348e-06, "loss": 3.497, "step": 6564 }, { "epoch": 1.5017222404848, "grad_norm": 0.450559125680468, "learning_rate": 7.704510018509326e-06, "loss": 3.3177, "step": 6566 }, { "epoch": 1.5021795989537925, "grad_norm": 0.5397228122063775, "learning_rate": 7.691141042445563e-06, "loss": 3.4175, "step": 6568 }, { "epoch": 1.502636957422785, "grad_norm": 0.5363696188548975, "learning_rate": 7.677781566609821e-06, "loss": 3.4613, "step": 6570 }, { "epoch": 1.5030943158917776, "grad_norm": 0.457700500198426, "learning_rate": 7.664431598334666e-06, "loss": 3.3364, "step": 6572 }, { "epoch": 1.5035516743607702, "grad_norm": 0.5073513578144677, "learning_rate": 7.651091144947414e-06, "loss": 3.3741, "step": 6574 }, { "epoch": 1.5040090328297626, "grad_norm": 0.3907639447101771, "learning_rate": 7.637760213770176e-06, "loss": 3.3954, "step": 6576 }, { "epoch": 1.504466391298755, "grad_norm": 0.4385614291772359, "learning_rate": 7.624438812119844e-06, "loss": 3.4935, "step": 6578 }, { "epoch": 1.5049237497677477, "grad_norm": 0.5521789294075887, "learning_rate": 7.6111269473080645e-06, "loss": 3.4067, "step": 6580 }, { "epoch": 1.5053811082367403, "grad_norm": 0.47050596991368626, "learning_rate": 7.597824626641248e-06, "loss": 3.3713, "step": 6582 }, { "epoch": 1.5058384667057327, "grad_norm": 0.5264856491831973, "learning_rate": 7.584531857420585e-06, "loss": 3.5135, "step": 6584 }, { "epoch": 1.5062958251747252, "grad_norm": 0.41902640031478516, "learning_rate": 7.571248646942008e-06, "loss": 3.5188, "step": 6586 }, { "epoch": 1.5067531836437178, "grad_norm": 0.4916327998922038, "learning_rate": 7.557975002496198e-06, "loss": 3.5103, "step": 6588 }, { "epoch": 1.5072105421127104, "grad_norm": 0.5181142369660583, "learning_rate": 7.544710931368612e-06, "loss": 3.5438, "step": 6590 }, { "epoch": 1.5076679005817029, "grad_norm": 0.5753130962196754, "learning_rate": 7.531456440839427e-06, "loss": 3.3504, "step": 6592 }, { "epoch": 1.5081252590506953, "grad_norm": 0.47314868842504193, "learning_rate": 7.518211538183559e-06, "loss": 3.468, "step": 6594 }, { "epoch": 1.508582617519688, "grad_norm": 0.3772507712923208, "learning_rate": 7.504976230670693e-06, "loss": 3.3961, "step": 6596 }, { "epoch": 1.5090399759886803, "grad_norm": 0.49541850956198935, "learning_rate": 7.491750525565214e-06, "loss": 3.542, "step": 6598 }, { "epoch": 1.5094973344576728, "grad_norm": 0.45656659886206324, "learning_rate": 7.478534430126246e-06, "loss": 3.4762, "step": 6600 }, { "epoch": 1.5099546929266654, "grad_norm": 0.4479306204049181, "learning_rate": 7.465327951607656e-06, "loss": 3.3292, "step": 6602 }, { "epoch": 1.510412051395658, "grad_norm": 0.5120426055887592, "learning_rate": 7.452131097258009e-06, "loss": 3.3358, "step": 6604 }, { "epoch": 1.5108694098646505, "grad_norm": 0.44939934790576264, "learning_rate": 7.4389438743205916e-06, "loss": 3.4064, "step": 6606 }, { "epoch": 1.5113267683336429, "grad_norm": 0.49453395523367905, "learning_rate": 7.425766290033429e-06, "loss": 3.3889, "step": 6608 }, { "epoch": 1.5117841268026355, "grad_norm": 0.5122475548960918, "learning_rate": 7.412598351629221e-06, "loss": 3.6807, "step": 6610 }, { "epoch": 1.5122414852716282, "grad_norm": 0.4770939474577048, "learning_rate": 7.399440066335386e-06, "loss": 3.4585, "step": 6612 }, { "epoch": 1.5126988437406206, "grad_norm": 0.5647797217161867, "learning_rate": 7.386291441374063e-06, "loss": 3.4432, "step": 6614 }, { "epoch": 1.513156202209613, "grad_norm": 0.4918690514113928, "learning_rate": 7.373152483962065e-06, "loss": 3.4735, "step": 6616 }, { "epoch": 1.5136135606786056, "grad_norm": 0.39081488762349925, "learning_rate": 7.360023201310898e-06, "loss": 3.4303, "step": 6618 }, { "epoch": 1.5140709191475983, "grad_norm": 0.5099991595971906, "learning_rate": 7.346903600626781e-06, "loss": 3.4295, "step": 6620 }, { "epoch": 1.5145282776165907, "grad_norm": 0.39976910990496395, "learning_rate": 7.333793689110599e-06, "loss": 3.4742, "step": 6622 }, { "epoch": 1.5149856360855831, "grad_norm": 0.4520675389069545, "learning_rate": 7.3206934739579235e-06, "loss": 3.5476, "step": 6624 }, { "epoch": 1.5154429945545758, "grad_norm": 0.46816705377328927, "learning_rate": 7.307602962358998e-06, "loss": 3.2906, "step": 6626 }, { "epoch": 1.5159003530235684, "grad_norm": 0.5396104874522802, "learning_rate": 7.29452216149876e-06, "loss": 3.5769, "step": 6628 }, { "epoch": 1.5163577114925608, "grad_norm": 0.36058531005593203, "learning_rate": 7.2814510785568e-06, "loss": 3.4433, "step": 6630 }, { "epoch": 1.5168150699615532, "grad_norm": 0.4847882261701968, "learning_rate": 7.26838972070737e-06, "loss": 3.4776, "step": 6632 }, { "epoch": 1.5172724284305459, "grad_norm": 0.4448099647021994, "learning_rate": 7.255338095119404e-06, "loss": 3.4897, "step": 6634 }, { "epoch": 1.5177297868995383, "grad_norm": 0.529678995069793, "learning_rate": 7.242296208956484e-06, "loss": 3.6041, "step": 6636 }, { "epoch": 1.5181871453685307, "grad_norm": 0.47869841529284257, "learning_rate": 7.229264069376832e-06, "loss": 3.5209, "step": 6638 }, { "epoch": 1.5186445038375234, "grad_norm": 0.4856445533419523, "learning_rate": 7.216241683533348e-06, "loss": 3.5091, "step": 6640 }, { "epoch": 1.519101862306516, "grad_norm": 0.4845252265543472, "learning_rate": 7.2032290585735625e-06, "loss": 3.4424, "step": 6642 }, { "epoch": 1.5195592207755084, "grad_norm": 0.43112401988049964, "learning_rate": 7.190226201639641e-06, "loss": 3.5151, "step": 6644 }, { "epoch": 1.5200165792445008, "grad_norm": 0.39024559669283126, "learning_rate": 7.177233119868412e-06, "loss": 3.6281, "step": 6646 }, { "epoch": 1.5204739377134935, "grad_norm": 0.46545703085661144, "learning_rate": 7.164249820391316e-06, "loss": 3.3518, "step": 6648 }, { "epoch": 1.5209312961824861, "grad_norm": 0.4473243907985932, "learning_rate": 7.151276310334428e-06, "loss": 3.4995, "step": 6650 }, { "epoch": 1.5213886546514785, "grad_norm": 0.4566813050253589, "learning_rate": 7.138312596818467e-06, "loss": 3.4906, "step": 6652 }, { "epoch": 1.521846013120471, "grad_norm": 0.4212506230290274, "learning_rate": 7.125358686958752e-06, "loss": 3.3786, "step": 6654 }, { "epoch": 1.5223033715894636, "grad_norm": 0.42661104799331073, "learning_rate": 7.112414587865232e-06, "loss": 3.4547, "step": 6656 }, { "epoch": 1.5227607300584562, "grad_norm": 0.4163574104224081, "learning_rate": 7.099480306642478e-06, "loss": 3.407, "step": 6658 }, { "epoch": 1.5232180885274487, "grad_norm": 0.49676133841123954, "learning_rate": 7.086555850389662e-06, "loss": 3.4796, "step": 6660 }, { "epoch": 1.523675446996441, "grad_norm": 0.5429739917529555, "learning_rate": 7.0736412262005585e-06, "loss": 3.5399, "step": 6662 }, { "epoch": 1.5241328054654337, "grad_norm": 0.6682855178450562, "learning_rate": 7.0607364411635675e-06, "loss": 3.5543, "step": 6664 }, { "epoch": 1.5245901639344264, "grad_norm": 0.5986309953820683, "learning_rate": 7.047841502361668e-06, "loss": 3.607, "step": 6666 }, { "epoch": 1.5250475224034188, "grad_norm": 0.4933547151878582, "learning_rate": 7.034956416872432e-06, "loss": 3.3653, "step": 6668 }, { "epoch": 1.5255048808724112, "grad_norm": 0.49214370472141183, "learning_rate": 7.0220811917680515e-06, "loss": 3.3947, "step": 6670 }, { "epoch": 1.5259622393414038, "grad_norm": 0.468316958990777, "learning_rate": 7.009215834115279e-06, "loss": 3.3916, "step": 6672 }, { "epoch": 1.5264195978103965, "grad_norm": 0.4946630678872832, "learning_rate": 6.996360350975451e-06, "loss": 3.5201, "step": 6674 }, { "epoch": 1.526876956279389, "grad_norm": 0.38820301703077115, "learning_rate": 6.9835147494045075e-06, "loss": 3.4388, "step": 6676 }, { "epoch": 1.5273343147483813, "grad_norm": 0.5250599611191679, "learning_rate": 6.9706790364529456e-06, "loss": 3.4083, "step": 6678 }, { "epoch": 1.527791673217374, "grad_norm": 0.4601750290670514, "learning_rate": 6.957853219165836e-06, "loss": 3.2821, "step": 6680 }, { "epoch": 1.5282490316863664, "grad_norm": 0.42000626891430415, "learning_rate": 6.945037304582819e-06, "loss": 3.5576, "step": 6682 }, { "epoch": 1.5287063901553588, "grad_norm": 0.4355805217251271, "learning_rate": 6.932231299738115e-06, "loss": 3.422, "step": 6684 }, { "epoch": 1.5291637486243514, "grad_norm": 0.5676246692242359, "learning_rate": 6.9194352116604825e-06, "loss": 3.4162, "step": 6686 }, { "epoch": 1.529621107093344, "grad_norm": 0.5364506236389615, "learning_rate": 6.906649047373246e-06, "loss": 3.6153, "step": 6688 }, { "epoch": 1.5300784655623365, "grad_norm": 0.45345536060767344, "learning_rate": 6.8938728138942946e-06, "loss": 3.353, "step": 6690 }, { "epoch": 1.530535824031329, "grad_norm": 0.437443065344656, "learning_rate": 6.88110651823605e-06, "loss": 3.6704, "step": 6692 }, { "epoch": 1.5309931825003216, "grad_norm": 0.44410952993495667, "learning_rate": 6.868350167405479e-06, "loss": 3.5308, "step": 6694 }, { "epoch": 1.5314505409693142, "grad_norm": 0.4741165365202421, "learning_rate": 6.855603768404112e-06, "loss": 3.6456, "step": 6696 }, { "epoch": 1.5319078994383066, "grad_norm": 0.46315641860335977, "learning_rate": 6.842867328227994e-06, "loss": 3.3303, "step": 6698 }, { "epoch": 1.532365257907299, "grad_norm": 0.3919902784360919, "learning_rate": 6.8301408538677055e-06, "loss": 3.5194, "step": 6700 }, { "epoch": 1.5328226163762917, "grad_norm": 0.5351246455529333, "learning_rate": 6.817424352308377e-06, "loss": 3.4732, "step": 6702 }, { "epoch": 1.5332799748452843, "grad_norm": 0.4766472913009843, "learning_rate": 6.804717830529647e-06, "loss": 3.373, "step": 6704 }, { "epoch": 1.5337373333142768, "grad_norm": 0.5379666232494426, "learning_rate": 6.792021295505671e-06, "loss": 3.4091, "step": 6706 }, { "epoch": 1.5341946917832692, "grad_norm": 0.5077031531348151, "learning_rate": 6.779334754205152e-06, "loss": 3.2492, "step": 6708 }, { "epoch": 1.5346520502522618, "grad_norm": 0.5206777213010256, "learning_rate": 6.766658213591279e-06, "loss": 3.434, "step": 6710 }, { "epoch": 1.5351094087212545, "grad_norm": 0.39170990013713003, "learning_rate": 6.753991680621755e-06, "loss": 3.3231, "step": 6712 }, { "epoch": 1.5355667671902469, "grad_norm": 0.402904582570569, "learning_rate": 6.741335162248813e-06, "loss": 3.3507, "step": 6714 }, { "epoch": 1.5360241256592393, "grad_norm": 0.46415203784831, "learning_rate": 6.7286886654191664e-06, "loss": 3.3812, "step": 6716 }, { "epoch": 1.536481484128232, "grad_norm": 0.600547330282234, "learning_rate": 6.71605219707403e-06, "loss": 3.3528, "step": 6718 }, { "epoch": 1.5369388425972244, "grad_norm": 0.48768118034667063, "learning_rate": 6.703425764149132e-06, "loss": 3.5123, "step": 6720 }, { "epoch": 1.5373962010662168, "grad_norm": 0.446891861710845, "learning_rate": 6.690809373574675e-06, "loss": 3.4797, "step": 6722 }, { "epoch": 1.5378535595352094, "grad_norm": 0.4745456982505356, "learning_rate": 6.678203032275346e-06, "loss": 3.2536, "step": 6724 }, { "epoch": 1.538310918004202, "grad_norm": 0.5045683008449044, "learning_rate": 6.6656067471703385e-06, "loss": 3.491, "step": 6726 }, { "epoch": 1.5387682764731945, "grad_norm": 0.4499527780604556, "learning_rate": 6.653020525173309e-06, "loss": 3.311, "step": 6728 }, { "epoch": 1.539225634942187, "grad_norm": 0.3929942568318095, "learning_rate": 6.640444373192389e-06, "loss": 3.4893, "step": 6730 }, { "epoch": 1.5396829934111795, "grad_norm": 0.5252362276129993, "learning_rate": 6.6278782981301995e-06, "loss": 3.3209, "step": 6732 }, { "epoch": 1.5401403518801722, "grad_norm": 0.39167978927597286, "learning_rate": 6.615322306883815e-06, "loss": 3.3384, "step": 6734 }, { "epoch": 1.5405977103491646, "grad_norm": 0.584377883455231, "learning_rate": 6.602776406344774e-06, "loss": 3.5401, "step": 6736 }, { "epoch": 1.541055068818157, "grad_norm": 0.6402202118196686, "learning_rate": 6.590240603399095e-06, "loss": 3.4381, "step": 6738 }, { "epoch": 1.5415124272871497, "grad_norm": 0.5805189981185714, "learning_rate": 6.577714904927237e-06, "loss": 3.5715, "step": 6740 }, { "epoch": 1.5419697857561423, "grad_norm": 0.4597364392087894, "learning_rate": 6.565199317804119e-06, "loss": 3.626, "step": 6742 }, { "epoch": 1.5424271442251347, "grad_norm": 0.43498222459999925, "learning_rate": 6.5526938488991e-06, "loss": 3.2862, "step": 6744 }, { "epoch": 1.5428845026941271, "grad_norm": 0.6488530769120567, "learning_rate": 6.5401985050760115e-06, "loss": 3.3167, "step": 6746 }, { "epoch": 1.5433418611631198, "grad_norm": 0.562601610119167, "learning_rate": 6.527713293193105e-06, "loss": 3.3696, "step": 6748 }, { "epoch": 1.5437992196321124, "grad_norm": 0.4704488645295614, "learning_rate": 6.5152382201030645e-06, "loss": 3.3486, "step": 6750 }, { "epoch": 1.5442565781011048, "grad_norm": 0.4831202621770493, "learning_rate": 6.5027732926530445e-06, "loss": 3.5174, "step": 6752 }, { "epoch": 1.5447139365700973, "grad_norm": 0.4229350018715706, "learning_rate": 6.490318517684593e-06, "loss": 3.2941, "step": 6754 }, { "epoch": 1.54517129503909, "grad_norm": 0.35897348613641633, "learning_rate": 6.477873902033699e-06, "loss": 3.3135, "step": 6756 }, { "epoch": 1.5456286535080825, "grad_norm": 0.44138050848246785, "learning_rate": 6.4654394525307885e-06, "loss": 3.5623, "step": 6758 }, { "epoch": 1.546086011977075, "grad_norm": 0.4435547217667641, "learning_rate": 6.45301517600069e-06, "loss": 3.5886, "step": 6760 }, { "epoch": 1.5465433704460674, "grad_norm": 0.518113057789227, "learning_rate": 6.4406010792626505e-06, "loss": 3.3498, "step": 6762 }, { "epoch": 1.54700072891506, "grad_norm": 0.5128368654570604, "learning_rate": 6.428197169130346e-06, "loss": 3.2689, "step": 6764 }, { "epoch": 1.5474580873840524, "grad_norm": 0.3943385229455799, "learning_rate": 6.415803452411842e-06, "loss": 3.5288, "step": 6766 }, { "epoch": 1.5479154458530449, "grad_norm": 0.507623461236526, "learning_rate": 6.403419935909608e-06, "loss": 3.5113, "step": 6768 }, { "epoch": 1.5483728043220375, "grad_norm": 0.4771196516725131, "learning_rate": 6.391046626420541e-06, "loss": 3.3226, "step": 6770 }, { "epoch": 1.5488301627910301, "grad_norm": 0.4757944390233732, "learning_rate": 6.378683530735913e-06, "loss": 3.5598, "step": 6772 }, { "epoch": 1.5492875212600226, "grad_norm": 0.37458490206241796, "learning_rate": 6.366330655641381e-06, "loss": 3.2417, "step": 6774 }, { "epoch": 1.549744879729015, "grad_norm": 0.4303125690080619, "learning_rate": 6.353988007917027e-06, "loss": 3.6315, "step": 6776 }, { "epoch": 1.5502022381980076, "grad_norm": 0.46602806720607304, "learning_rate": 6.341655594337292e-06, "loss": 3.63, "step": 6778 }, { "epoch": 1.5506595966670003, "grad_norm": 0.4522283825323262, "learning_rate": 6.329333421670996e-06, "loss": 3.3884, "step": 6780 }, { "epoch": 1.5511169551359927, "grad_norm": 0.48339848373875255, "learning_rate": 6.317021496681366e-06, "loss": 3.3748, "step": 6782 }, { "epoch": 1.551574313604985, "grad_norm": 0.5238325706253463, "learning_rate": 6.304719826125982e-06, "loss": 3.4394, "step": 6784 }, { "epoch": 1.5520316720739777, "grad_norm": 0.5561000139570175, "learning_rate": 6.292428416756791e-06, "loss": 3.5476, "step": 6786 }, { "epoch": 1.5524890305429704, "grad_norm": 0.594949339184489, "learning_rate": 6.280147275320131e-06, "loss": 3.3252, "step": 6788 }, { "epoch": 1.5529463890119628, "grad_norm": 0.47353561819867845, "learning_rate": 6.267876408556688e-06, "loss": 3.4472, "step": 6790 }, { "epoch": 1.5534037474809552, "grad_norm": 0.4323151785036954, "learning_rate": 6.255615823201505e-06, "loss": 3.4664, "step": 6792 }, { "epoch": 1.5538611059499479, "grad_norm": 0.5308333333682664, "learning_rate": 6.243365525984002e-06, "loss": 3.4536, "step": 6794 }, { "epoch": 1.5543184644189405, "grad_norm": 0.6418078133132137, "learning_rate": 6.231125523627932e-06, "loss": 3.5515, "step": 6796 }, { "epoch": 1.554775822887933, "grad_norm": 0.48327274422324584, "learning_rate": 6.218895822851403e-06, "loss": 3.4341, "step": 6798 }, { "epoch": 1.5552331813569253, "grad_norm": 0.5011597237187982, "learning_rate": 6.206676430366867e-06, "loss": 3.4466, "step": 6800 }, { "epoch": 1.555690539825918, "grad_norm": 0.600752495721212, "learning_rate": 6.1944673528811335e-06, "loss": 3.4676, "step": 6802 }, { "epoch": 1.5561478982949104, "grad_norm": 0.46633644220569276, "learning_rate": 6.182268597095334e-06, "loss": 3.3314, "step": 6804 }, { "epoch": 1.5566052567639028, "grad_norm": 0.48346994815125877, "learning_rate": 6.170080169704928e-06, "loss": 3.5272, "step": 6806 }, { "epoch": 1.5570626152328955, "grad_norm": 0.49217593467082266, "learning_rate": 6.157902077399735e-06, "loss": 3.5044, "step": 6808 }, { "epoch": 1.557519973701888, "grad_norm": 0.45003970222702494, "learning_rate": 6.1457343268638776e-06, "loss": 3.3542, "step": 6810 }, { "epoch": 1.5579773321708805, "grad_norm": 0.5052190137173362, "learning_rate": 6.1335769247758e-06, "loss": 3.376, "step": 6812 }, { "epoch": 1.558434690639873, "grad_norm": 0.5585929497132871, "learning_rate": 6.121429877808291e-06, "loss": 3.3467, "step": 6814 }, { "epoch": 1.5588920491088656, "grad_norm": 0.46853289344798066, "learning_rate": 6.1092931926284315e-06, "loss": 3.3559, "step": 6816 }, { "epoch": 1.5593494075778582, "grad_norm": 0.5182251826249786, "learning_rate": 6.097166875897623e-06, "loss": 3.4463, "step": 6818 }, { "epoch": 1.5598067660468506, "grad_norm": 0.587597897672041, "learning_rate": 6.08505093427158e-06, "loss": 3.3307, "step": 6820 }, { "epoch": 1.560264124515843, "grad_norm": 0.5869682908989837, "learning_rate": 6.072945374400324e-06, "loss": 3.5918, "step": 6822 }, { "epoch": 1.5607214829848357, "grad_norm": 0.45598611581625165, "learning_rate": 6.060850202928159e-06, "loss": 3.4706, "step": 6824 }, { "epoch": 1.5611788414538283, "grad_norm": 0.5260418964297356, "learning_rate": 6.04876542649373e-06, "loss": 3.5085, "step": 6826 }, { "epoch": 1.5616361999228208, "grad_norm": 0.48919256997396304, "learning_rate": 6.036691051729912e-06, "loss": 3.2327, "step": 6828 }, { "epoch": 1.5620935583918132, "grad_norm": 0.525959770198055, "learning_rate": 6.0246270852639285e-06, "loss": 3.6515, "step": 6830 }, { "epoch": 1.5625509168608058, "grad_norm": 0.40816747701397527, "learning_rate": 6.0125735337172805e-06, "loss": 3.5215, "step": 6832 }, { "epoch": 1.5630082753297985, "grad_norm": 0.4609508512470675, "learning_rate": 6.000530403705715e-06, "loss": 3.6303, "step": 6834 }, { "epoch": 1.5634656337987909, "grad_norm": 0.4668063238616122, "learning_rate": 5.988497701839299e-06, "loss": 3.467, "step": 6836 }, { "epoch": 1.5639229922677833, "grad_norm": 0.5015750752711604, "learning_rate": 5.976475434722367e-06, "loss": 3.4232, "step": 6838 }, { "epoch": 1.564380350736776, "grad_norm": 0.5201780552188269, "learning_rate": 5.964463608953516e-06, "loss": 3.4396, "step": 6840 }, { "epoch": 1.5648377092057686, "grad_norm": 0.6356793209934088, "learning_rate": 5.952462231125611e-06, "loss": 3.6425, "step": 6842 }, { "epoch": 1.565295067674761, "grad_norm": 0.4872079114816146, "learning_rate": 5.9404713078258016e-06, "loss": 3.4504, "step": 6844 }, { "epoch": 1.5657524261437534, "grad_norm": 0.4241404108035351, "learning_rate": 5.928490845635481e-06, "loss": 3.333, "step": 6846 }, { "epoch": 1.566209784612746, "grad_norm": 0.4830645708732424, "learning_rate": 5.9165208511303e-06, "loss": 3.4073, "step": 6848 }, { "epoch": 1.5666671430817385, "grad_norm": 0.5932591316574892, "learning_rate": 5.904561330880182e-06, "loss": 3.5312, "step": 6850 }, { "epoch": 1.567124501550731, "grad_norm": 0.5589219309692235, "learning_rate": 5.892612291449284e-06, "loss": 3.4655, "step": 6852 }, { "epoch": 1.5675818600197235, "grad_norm": 0.48163690681600185, "learning_rate": 5.880673739396012e-06, "loss": 3.3949, "step": 6854 }, { "epoch": 1.5680392184887162, "grad_norm": 0.38961222009440394, "learning_rate": 5.86874568127303e-06, "loss": 3.4269, "step": 6856 }, { "epoch": 1.5684965769577086, "grad_norm": 0.4798364588172053, "learning_rate": 5.856828123627228e-06, "loss": 3.4469, "step": 6858 }, { "epoch": 1.568953935426701, "grad_norm": 0.39214663729618776, "learning_rate": 5.844921072999737e-06, "loss": 3.3802, "step": 6860 }, { "epoch": 1.5694112938956937, "grad_norm": 0.41253394362925006, "learning_rate": 5.8330245359259125e-06, "loss": 3.5317, "step": 6862 }, { "epoch": 1.5698686523646863, "grad_norm": 0.4966590994086676, "learning_rate": 5.821138518935363e-06, "loss": 3.384, "step": 6864 }, { "epoch": 1.5703260108336787, "grad_norm": 0.5004596385668301, "learning_rate": 5.809263028551901e-06, "loss": 3.5652, "step": 6866 }, { "epoch": 1.5707833693026712, "grad_norm": 0.4432898815660629, "learning_rate": 5.797398071293564e-06, "loss": 3.3828, "step": 6868 }, { "epoch": 1.5712407277716638, "grad_norm": 0.42889417759871634, "learning_rate": 5.78554365367262e-06, "loss": 3.6081, "step": 6870 }, { "epoch": 1.5716980862406564, "grad_norm": 0.512248107601336, "learning_rate": 5.773699782195543e-06, "loss": 3.5756, "step": 6872 }, { "epoch": 1.5721554447096489, "grad_norm": 0.5079701399043443, "learning_rate": 5.761866463363014e-06, "loss": 3.4243, "step": 6874 }, { "epoch": 1.5726128031786413, "grad_norm": 0.4718837573015674, "learning_rate": 5.7500437036699385e-06, "loss": 3.4333, "step": 6876 }, { "epoch": 1.573070161647634, "grad_norm": 0.5614062377691865, "learning_rate": 5.73823150960541e-06, "loss": 3.3929, "step": 6878 }, { "epoch": 1.5735275201166266, "grad_norm": 0.487973169409985, "learning_rate": 5.726429887652726e-06, "loss": 3.5013, "step": 6880 }, { "epoch": 1.573984878585619, "grad_norm": 0.5261969352933775, "learning_rate": 5.714638844289394e-06, "loss": 3.4158, "step": 6882 }, { "epoch": 1.5744422370546114, "grad_norm": 0.4686926488760119, "learning_rate": 5.7028583859871e-06, "loss": 3.576, "step": 6884 }, { "epoch": 1.574899595523604, "grad_norm": 0.4949272739564533, "learning_rate": 5.691088519211721e-06, "loss": 3.3532, "step": 6886 }, { "epoch": 1.5753569539925967, "grad_norm": 0.5933169492279271, "learning_rate": 5.679329250423346e-06, "loss": 3.5451, "step": 6888 }, { "epoch": 1.5758143124615889, "grad_norm": 0.4764356131558197, "learning_rate": 5.6675805860761985e-06, "loss": 3.4531, "step": 6890 }, { "epoch": 1.5762716709305815, "grad_norm": 0.45009779132452815, "learning_rate": 5.655842532618722e-06, "loss": 3.4723, "step": 6892 }, { "epoch": 1.5767290293995742, "grad_norm": 0.5040250830293456, "learning_rate": 5.644115096493541e-06, "loss": 3.5377, "step": 6894 }, { "epoch": 1.5771863878685666, "grad_norm": 0.49224368788903894, "learning_rate": 5.6323982841374055e-06, "loss": 3.3254, "step": 6896 }, { "epoch": 1.577643746337559, "grad_norm": 0.44464166205273487, "learning_rate": 5.620692101981279e-06, "loss": 3.4455, "step": 6898 }, { "epoch": 1.5781011048065516, "grad_norm": 0.4743916047247181, "learning_rate": 5.6089965564502825e-06, "loss": 3.3792, "step": 6900 }, { "epoch": 1.5785584632755443, "grad_norm": 0.460689720539708, "learning_rate": 5.597311653963686e-06, "loss": 3.48, "step": 6902 }, { "epoch": 1.5790158217445367, "grad_norm": 0.6285603916875918, "learning_rate": 5.585637400934917e-06, "loss": 3.4569, "step": 6904 }, { "epoch": 1.5794731802135291, "grad_norm": 0.4472508323921829, "learning_rate": 5.573973803771581e-06, "loss": 3.6598, "step": 6906 }, { "epoch": 1.5799305386825218, "grad_norm": 0.5267584767123494, "learning_rate": 5.562320868875409e-06, "loss": 3.4918, "step": 6908 }, { "epoch": 1.5803878971515144, "grad_norm": 0.4654644740619929, "learning_rate": 5.550678602642287e-06, "loss": 3.3191, "step": 6910 }, { "epoch": 1.5808452556205068, "grad_norm": 0.40350548139675524, "learning_rate": 5.539047011462256e-06, "loss": 3.3239, "step": 6912 }, { "epoch": 1.5813026140894992, "grad_norm": 0.5015799771885479, "learning_rate": 5.5274261017194905e-06, "loss": 3.3957, "step": 6914 }, { "epoch": 1.5817599725584919, "grad_norm": 0.4509427202891442, "learning_rate": 5.515815879792296e-06, "loss": 3.4551, "step": 6916 }, { "epoch": 1.5822173310274845, "grad_norm": 0.48714627919205705, "learning_rate": 5.504216352053113e-06, "loss": 3.5451, "step": 6918 }, { "epoch": 1.582674689496477, "grad_norm": 0.4918617048248058, "learning_rate": 5.492627524868529e-06, "loss": 3.4552, "step": 6920 }, { "epoch": 1.5831320479654694, "grad_norm": 0.41802794491862294, "learning_rate": 5.481049404599245e-06, "loss": 3.4715, "step": 6922 }, { "epoch": 1.583589406434462, "grad_norm": 0.4262041389147559, "learning_rate": 5.469481997600073e-06, "loss": 3.4547, "step": 6924 }, { "epoch": 1.5840467649034546, "grad_norm": 0.5032646767209601, "learning_rate": 5.457925310219977e-06, "loss": 3.3773, "step": 6926 }, { "epoch": 1.584504123372447, "grad_norm": 0.4876503339687891, "learning_rate": 5.446379348802011e-06, "loss": 3.3744, "step": 6928 }, { "epoch": 1.5849614818414395, "grad_norm": 0.44211760733530686, "learning_rate": 5.434844119683341e-06, "loss": 3.398, "step": 6930 }, { "epoch": 1.5854188403104321, "grad_norm": 0.46860753119575527, "learning_rate": 5.42331962919527e-06, "loss": 3.417, "step": 6932 }, { "epoch": 1.5858761987794245, "grad_norm": 0.5012021514239035, "learning_rate": 5.411805883663177e-06, "loss": 3.493, "step": 6934 }, { "epoch": 1.586333557248417, "grad_norm": 0.4614357276127069, "learning_rate": 5.4003028894065474e-06, "loss": 3.3868, "step": 6936 }, { "epoch": 1.5867909157174096, "grad_norm": 0.5927216760198417, "learning_rate": 5.388810652738987e-06, "loss": 3.4102, "step": 6938 }, { "epoch": 1.5872482741864022, "grad_norm": 0.6067174033897402, "learning_rate": 5.377329179968179e-06, "loss": 3.3119, "step": 6940 }, { "epoch": 1.5877056326553947, "grad_norm": 0.5213504290629379, "learning_rate": 5.365858477395894e-06, "loss": 3.403, "step": 6942 }, { "epoch": 1.588162991124387, "grad_norm": 0.4898010709869246, "learning_rate": 5.354398551318021e-06, "loss": 3.3735, "step": 6944 }, { "epoch": 1.5886203495933797, "grad_norm": 0.39772454220590003, "learning_rate": 5.342949408024487e-06, "loss": 3.3806, "step": 6946 }, { "epoch": 1.5890777080623724, "grad_norm": 0.50637770207724, "learning_rate": 5.331511053799335e-06, "loss": 3.4859, "step": 6948 }, { "epoch": 1.5895350665313648, "grad_norm": 0.5105837982999115, "learning_rate": 5.320083494920697e-06, "loss": 3.5043, "step": 6950 }, { "epoch": 1.5899924250003572, "grad_norm": 0.5414152356567699, "learning_rate": 5.308666737660734e-06, "loss": 3.3522, "step": 6952 }, { "epoch": 1.5904497834693498, "grad_norm": 0.4790754196803757, "learning_rate": 5.297260788285716e-06, "loss": 3.4859, "step": 6954 }, { "epoch": 1.5909071419383425, "grad_norm": 0.4850195626118398, "learning_rate": 5.285865653055985e-06, "loss": 3.5465, "step": 6956 }, { "epoch": 1.591364500407335, "grad_norm": 0.4325534437764098, "learning_rate": 5.274481338225904e-06, "loss": 3.5273, "step": 6958 }, { "epoch": 1.5918218588763273, "grad_norm": 0.4571477790364264, "learning_rate": 5.2631078500439415e-06, "loss": 3.4129, "step": 6960 }, { "epoch": 1.59227921734532, "grad_norm": 0.598906312977832, "learning_rate": 5.251745194752622e-06, "loss": 3.3843, "step": 6962 }, { "epoch": 1.5927365758143126, "grad_norm": 0.4347197372198258, "learning_rate": 5.240393378588479e-06, "loss": 3.3862, "step": 6964 }, { "epoch": 1.593193934283305, "grad_norm": 0.4246095932834489, "learning_rate": 5.2290524077821454e-06, "loss": 3.3812, "step": 6966 }, { "epoch": 1.5936512927522974, "grad_norm": 0.5338496107201668, "learning_rate": 5.217722288558288e-06, "loss": 3.5275, "step": 6968 }, { "epoch": 1.59410865122129, "grad_norm": 0.4220771128800826, "learning_rate": 5.206403027135609e-06, "loss": 3.2825, "step": 6970 }, { "epoch": 1.5945660096902827, "grad_norm": 0.4520992474750735, "learning_rate": 5.1950946297268546e-06, "loss": 3.5705, "step": 6972 }, { "epoch": 1.595023368159275, "grad_norm": 0.44910107605532784, "learning_rate": 5.183797102538801e-06, "loss": 3.388, "step": 6974 }, { "epoch": 1.5954807266282676, "grad_norm": 0.4132440863992175, "learning_rate": 5.1725104517722805e-06, "loss": 3.4707, "step": 6976 }, { "epoch": 1.5959380850972602, "grad_norm": 0.42812265339786054, "learning_rate": 5.161234683622135e-06, "loss": 3.4372, "step": 6978 }, { "epoch": 1.5963954435662526, "grad_norm": 0.41339179365191364, "learning_rate": 5.149969804277233e-06, "loss": 3.2909, "step": 6980 }, { "epoch": 1.596852802035245, "grad_norm": 0.5128202449625184, "learning_rate": 5.13871581992049e-06, "loss": 3.587, "step": 6982 }, { "epoch": 1.5973101605042377, "grad_norm": 0.4162187572874207, "learning_rate": 5.1274727367288115e-06, "loss": 3.3857, "step": 6984 }, { "epoch": 1.5977675189732303, "grad_norm": 0.4838737078013568, "learning_rate": 5.116240560873128e-06, "loss": 3.3362, "step": 6986 }, { "epoch": 1.5982248774422227, "grad_norm": 0.4785464841273278, "learning_rate": 5.105019298518407e-06, "loss": 3.3705, "step": 6988 }, { "epoch": 1.5986822359112152, "grad_norm": 0.41880559481046215, "learning_rate": 5.093808955823595e-06, "loss": 3.3779, "step": 6990 }, { "epoch": 1.5991395943802078, "grad_norm": 0.5877261517877517, "learning_rate": 5.0826095389416575e-06, "loss": 3.3568, "step": 6992 }, { "epoch": 1.5995969528492004, "grad_norm": 0.6309371050020506, "learning_rate": 5.071421054019568e-06, "loss": 3.3852, "step": 6994 }, { "epoch": 1.6000543113181929, "grad_norm": 0.4983490569473855, "learning_rate": 5.060243507198292e-06, "loss": 3.4277, "step": 6996 }, { "epoch": 1.6005116697871853, "grad_norm": 0.42995610944619667, "learning_rate": 5.04907690461279e-06, "loss": 3.2822, "step": 6998 }, { "epoch": 1.600969028256178, "grad_norm": 0.4477656705053413, "learning_rate": 5.037921252392033e-06, "loss": 3.2475, "step": 7000 }, { "epoch": 1.6014263867251706, "grad_norm": 0.4916071876686, "learning_rate": 5.02677655665896e-06, "loss": 3.4211, "step": 7002 }, { "epoch": 1.601883745194163, "grad_norm": 0.6463713993805098, "learning_rate": 5.0156428235304975e-06, "loss": 3.5176, "step": 7004 }, { "epoch": 1.6023411036631554, "grad_norm": 0.5045119496963361, "learning_rate": 5.004520059117587e-06, "loss": 3.5358, "step": 7006 }, { "epoch": 1.602798462132148, "grad_norm": 0.41656252235017616, "learning_rate": 4.993408269525096e-06, "loss": 3.3593, "step": 7008 }, { "epoch": 1.6032558206011407, "grad_norm": 0.3971952632982731, "learning_rate": 4.982307460851909e-06, "loss": 3.348, "step": 7010 }, { "epoch": 1.603713179070133, "grad_norm": 0.4208883297584999, "learning_rate": 4.9712176391908925e-06, "loss": 3.3188, "step": 7012 }, { "epoch": 1.6041705375391255, "grad_norm": 0.5189315949142734, "learning_rate": 4.9601388106288255e-06, "loss": 3.5026, "step": 7014 }, { "epoch": 1.6046278960081182, "grad_norm": 0.5853051142677199, "learning_rate": 4.949070981246512e-06, "loss": 3.5807, "step": 7016 }, { "epoch": 1.6050852544771106, "grad_norm": 0.44130803593775253, "learning_rate": 4.938014157118704e-06, "loss": 3.4007, "step": 7018 }, { "epoch": 1.605542612946103, "grad_norm": 0.48548043150118314, "learning_rate": 4.926968344314084e-06, "loss": 3.5171, "step": 7020 }, { "epoch": 1.6059999714150957, "grad_norm": 0.5321080907151704, "learning_rate": 4.915933548895324e-06, "loss": 3.3646, "step": 7022 }, { "epoch": 1.6064573298840883, "grad_norm": 0.4403751618663567, "learning_rate": 4.90490977691905e-06, "loss": 3.2759, "step": 7024 }, { "epoch": 1.6069146883530807, "grad_norm": 0.5054566831489745, "learning_rate": 4.893897034435798e-06, "loss": 3.4578, "step": 7026 }, { "epoch": 1.6073720468220731, "grad_norm": 0.5070781680178125, "learning_rate": 4.8828953274900975e-06, "loss": 3.6003, "step": 7028 }, { "epoch": 1.6078294052910658, "grad_norm": 0.4607834477726319, "learning_rate": 4.871904662120402e-06, "loss": 3.3698, "step": 7030 }, { "epoch": 1.6082867637600584, "grad_norm": 0.5195223585242721, "learning_rate": 4.860925044359096e-06, "loss": 3.456, "step": 7032 }, { "epoch": 1.6087441222290508, "grad_norm": 0.4490152810423271, "learning_rate": 4.849956480232512e-06, "loss": 3.4077, "step": 7034 }, { "epoch": 1.6092014806980433, "grad_norm": 0.49548073688251804, "learning_rate": 4.838998975760906e-06, "loss": 3.2963, "step": 7036 }, { "epoch": 1.609658839167036, "grad_norm": 0.5189108909300614, "learning_rate": 4.828052536958477e-06, "loss": 3.2454, "step": 7038 }, { "epoch": 1.6101161976360285, "grad_norm": 0.5799125666322893, "learning_rate": 4.817117169833338e-06, "loss": 3.4561, "step": 7040 }, { "epoch": 1.610573556105021, "grad_norm": 0.49199250309454357, "learning_rate": 4.806192880387528e-06, "loss": 3.501, "step": 7042 }, { "epoch": 1.6110309145740134, "grad_norm": 0.4795430888643962, "learning_rate": 4.795279674617017e-06, "loss": 3.5055, "step": 7044 }, { "epoch": 1.611488273043006, "grad_norm": 0.4713360816338353, "learning_rate": 4.784377558511677e-06, "loss": 3.3277, "step": 7046 }, { "epoch": 1.6119456315119987, "grad_norm": 0.4132111271750334, "learning_rate": 4.773486538055291e-06, "loss": 3.4968, "step": 7048 }, { "epoch": 1.612402989980991, "grad_norm": 0.5001812546738276, "learning_rate": 4.762606619225574e-06, "loss": 3.4036, "step": 7050 }, { "epoch": 1.6128603484499835, "grad_norm": 0.472184079357913, "learning_rate": 4.751737807994125e-06, "loss": 3.3569, "step": 7052 }, { "epoch": 1.6133177069189761, "grad_norm": 0.46174253863970655, "learning_rate": 4.7408801103264514e-06, "loss": 3.33, "step": 7054 }, { "epoch": 1.6137750653879688, "grad_norm": 0.35885291945965553, "learning_rate": 4.730033532181974e-06, "loss": 3.3839, "step": 7056 }, { "epoch": 1.614232423856961, "grad_norm": 0.37577610369229575, "learning_rate": 4.719198079513998e-06, "loss": 3.4867, "step": 7058 }, { "epoch": 1.6146897823259536, "grad_norm": 0.4033592888793026, "learning_rate": 4.708373758269713e-06, "loss": 3.5797, "step": 7060 }, { "epoch": 1.6151471407949463, "grad_norm": 0.5670288873004647, "learning_rate": 4.697560574390237e-06, "loss": 3.4011, "step": 7062 }, { "epoch": 1.6156044992639387, "grad_norm": 0.6031784547825824, "learning_rate": 4.686758533810517e-06, "loss": 3.3554, "step": 7064 }, { "epoch": 1.616061857732931, "grad_norm": 0.8131994391343246, "learning_rate": 4.675967642459431e-06, "loss": 3.4407, "step": 7066 }, { "epoch": 1.6165192162019237, "grad_norm": 0.466575330569329, "learning_rate": 4.665187906259736e-06, "loss": 3.3884, "step": 7068 }, { "epoch": 1.6169765746709164, "grad_norm": 0.44831844171564317, "learning_rate": 4.654419331128024e-06, "loss": 3.2959, "step": 7070 }, { "epoch": 1.6174339331399088, "grad_norm": 0.5246347428335847, "learning_rate": 4.643661922974804e-06, "loss": 3.571, "step": 7072 }, { "epoch": 1.6178912916089012, "grad_norm": 0.44805666672534067, "learning_rate": 4.632915687704456e-06, "loss": 3.3523, "step": 7074 }, { "epoch": 1.6183486500778939, "grad_norm": 0.48317855303741863, "learning_rate": 4.622180631215186e-06, "loss": 3.3115, "step": 7076 }, { "epoch": 1.6188060085468865, "grad_norm": 0.4326239557361096, "learning_rate": 4.611456759399108e-06, "loss": 3.3182, "step": 7078 }, { "epoch": 1.619263367015879, "grad_norm": 0.5684406707406806, "learning_rate": 4.600744078142185e-06, "loss": 3.3774, "step": 7080 }, { "epoch": 1.6197207254848713, "grad_norm": 0.486210146155778, "learning_rate": 4.590042593324218e-06, "loss": 3.4213, "step": 7082 }, { "epoch": 1.620178083953864, "grad_norm": 0.47305474940202863, "learning_rate": 4.579352310818882e-06, "loss": 3.3554, "step": 7084 }, { "epoch": 1.6206354424228566, "grad_norm": 0.47947347880300717, "learning_rate": 4.56867323649372e-06, "loss": 3.3778, "step": 7086 }, { "epoch": 1.621092800891849, "grad_norm": 0.5043499789360029, "learning_rate": 4.55800537621007e-06, "loss": 3.2852, "step": 7088 }, { "epoch": 1.6215501593608415, "grad_norm": 0.4404846123325355, "learning_rate": 4.547348735823173e-06, "loss": 3.4487, "step": 7090 }, { "epoch": 1.622007517829834, "grad_norm": 0.48907811563442516, "learning_rate": 4.536703321182079e-06, "loss": 3.2857, "step": 7092 }, { "epoch": 1.6224648762988267, "grad_norm": 0.5518058823432704, "learning_rate": 4.526069138129674e-06, "loss": 3.5484, "step": 7094 }, { "epoch": 1.6229222347678192, "grad_norm": 0.4188965811319681, "learning_rate": 4.515446192502706e-06, "loss": 3.4731, "step": 7096 }, { "epoch": 1.6233795932368116, "grad_norm": 0.5365803280480763, "learning_rate": 4.504834490131726e-06, "loss": 3.6044, "step": 7098 }, { "epoch": 1.6238369517058042, "grad_norm": 0.4374016923808013, "learning_rate": 4.494234036841139e-06, "loss": 3.322, "step": 7100 }, { "epoch": 1.6242943101747966, "grad_norm": 0.6206361536443222, "learning_rate": 4.483644838449155e-06, "loss": 3.3203, "step": 7102 }, { "epoch": 1.624751668643789, "grad_norm": 0.6052664634201869, "learning_rate": 4.473066900767811e-06, "loss": 3.3625, "step": 7104 }, { "epoch": 1.6252090271127817, "grad_norm": 0.5179124723905513, "learning_rate": 4.4625002296029815e-06, "loss": 3.3368, "step": 7106 }, { "epoch": 1.6256663855817743, "grad_norm": 0.535760858191654, "learning_rate": 4.451944830754334e-06, "loss": 3.5729, "step": 7108 }, { "epoch": 1.6261237440507668, "grad_norm": 0.5101492596508092, "learning_rate": 4.4414007100153505e-06, "loss": 3.3071, "step": 7110 }, { "epoch": 1.6265811025197592, "grad_norm": 0.5109002942645722, "learning_rate": 4.4308678731733496e-06, "loss": 3.3639, "step": 7112 }, { "epoch": 1.6270384609887518, "grad_norm": 0.40889892414135265, "learning_rate": 4.420346326009425e-06, "loss": 3.4079, "step": 7114 }, { "epoch": 1.6274958194577445, "grad_norm": 0.38129648409892464, "learning_rate": 4.409836074298482e-06, "loss": 3.3674, "step": 7116 }, { "epoch": 1.6279531779267369, "grad_norm": 0.4985793614380998, "learning_rate": 4.399337123809244e-06, "loss": 3.4618, "step": 7118 }, { "epoch": 1.6284105363957293, "grad_norm": 0.4657989273120769, "learning_rate": 4.38884948030421e-06, "loss": 3.428, "step": 7120 }, { "epoch": 1.628867894864722, "grad_norm": 0.5292388771628999, "learning_rate": 4.378373149539677e-06, "loss": 3.4071, "step": 7122 }, { "epoch": 1.6293252533337146, "grad_norm": 0.5329791706087541, "learning_rate": 4.367908137265755e-06, "loss": 3.363, "step": 7124 }, { "epoch": 1.629782611802707, "grad_norm": 0.5341202669726284, "learning_rate": 4.3574544492263006e-06, "loss": 3.5078, "step": 7126 }, { "epoch": 1.6302399702716994, "grad_norm": 0.7538908724624837, "learning_rate": 4.347012091158986e-06, "loss": 3.4019, "step": 7128 }, { "epoch": 1.630697328740692, "grad_norm": 0.40823128708060546, "learning_rate": 4.33658106879527e-06, "loss": 3.1842, "step": 7130 }, { "epoch": 1.6311546872096847, "grad_norm": 0.49844715264519524, "learning_rate": 4.326161387860356e-06, "loss": 3.4552, "step": 7132 }, { "epoch": 1.6316120456786771, "grad_norm": 0.48323820907290904, "learning_rate": 4.315753054073252e-06, "loss": 3.4335, "step": 7134 }, { "epoch": 1.6320694041476695, "grad_norm": 0.4664932444605626, "learning_rate": 4.305356073146744e-06, "loss": 3.4042, "step": 7136 }, { "epoch": 1.6325267626166622, "grad_norm": 0.4177428731487335, "learning_rate": 4.294970450787339e-06, "loss": 3.5559, "step": 7138 }, { "epoch": 1.6329841210856548, "grad_norm": 0.5392030449798957, "learning_rate": 4.284596192695364e-06, "loss": 3.5041, "step": 7140 }, { "epoch": 1.633441479554647, "grad_norm": 0.45979605795222855, "learning_rate": 4.274233304564895e-06, "loss": 3.4718, "step": 7142 }, { "epoch": 1.6338988380236397, "grad_norm": 0.4737932543019145, "learning_rate": 4.263881792083735e-06, "loss": 3.5743, "step": 7144 }, { "epoch": 1.6343561964926323, "grad_norm": 0.5470439649867719, "learning_rate": 4.25354166093348e-06, "loss": 3.33, "step": 7146 }, { "epoch": 1.6348135549616247, "grad_norm": 0.4982972656336507, "learning_rate": 4.24321291678948e-06, "loss": 3.4716, "step": 7148 }, { "epoch": 1.6352709134306171, "grad_norm": 0.5382283709371108, "learning_rate": 4.232895565320794e-06, "loss": 3.3067, "step": 7150 }, { "epoch": 1.6357282718996098, "grad_norm": 0.35968880842883955, "learning_rate": 4.222589612190278e-06, "loss": 3.5033, "step": 7152 }, { "epoch": 1.6361856303686024, "grad_norm": 0.5303828511193062, "learning_rate": 4.2122950630545e-06, "loss": 3.3475, "step": 7154 }, { "epoch": 1.6366429888375948, "grad_norm": 0.46420836099037277, "learning_rate": 4.202011923563778e-06, "loss": 3.4298, "step": 7156 }, { "epoch": 1.6371003473065873, "grad_norm": 0.5134809015728431, "learning_rate": 4.191740199362173e-06, "loss": 3.4578, "step": 7158 }, { "epoch": 1.63755770577558, "grad_norm": 0.5412140905700707, "learning_rate": 4.18147989608747e-06, "loss": 3.3624, "step": 7160 }, { "epoch": 1.6380150642445725, "grad_norm": 0.5480422233878988, "learning_rate": 4.1712310193711994e-06, "loss": 3.6978, "step": 7162 }, { "epoch": 1.638472422713565, "grad_norm": 0.5111800699977811, "learning_rate": 4.1609935748386035e-06, "loss": 3.4327, "step": 7164 }, { "epoch": 1.6389297811825574, "grad_norm": 0.44394960481084966, "learning_rate": 4.150767568108657e-06, "loss": 3.416, "step": 7166 }, { "epoch": 1.63938713965155, "grad_norm": 0.43956324575125805, "learning_rate": 4.140553004794068e-06, "loss": 3.5401, "step": 7168 }, { "epoch": 1.6398444981205427, "grad_norm": 0.41136112352472615, "learning_rate": 4.130349890501247e-06, "loss": 3.491, "step": 7170 }, { "epoch": 1.640301856589535, "grad_norm": 0.4351807325451384, "learning_rate": 4.12015823083032e-06, "loss": 3.3431, "step": 7172 }, { "epoch": 1.6407592150585275, "grad_norm": 0.4951588238847547, "learning_rate": 4.1099780313751476e-06, "loss": 3.4855, "step": 7174 }, { "epoch": 1.6412165735275202, "grad_norm": 0.4688813343438891, "learning_rate": 4.099809297723278e-06, "loss": 3.4777, "step": 7176 }, { "epoch": 1.6416739319965128, "grad_norm": 0.4804205211221203, "learning_rate": 4.089652035455965e-06, "loss": 3.2942, "step": 7178 }, { "epoch": 1.6421312904655052, "grad_norm": 0.7169324866129466, "learning_rate": 4.079506250148199e-06, "loss": 3.4047, "step": 7180 }, { "epoch": 1.6425886489344976, "grad_norm": 0.49275544111401454, "learning_rate": 4.069371947368619e-06, "loss": 3.3264, "step": 7182 }, { "epoch": 1.6430460074034903, "grad_norm": 0.5013989012391568, "learning_rate": 4.059249132679604e-06, "loss": 3.3805, "step": 7184 }, { "epoch": 1.6435033658724827, "grad_norm": 0.5000080465622975, "learning_rate": 4.0491378116372245e-06, "loss": 3.4684, "step": 7186 }, { "epoch": 1.643960724341475, "grad_norm": 0.5957363682210604, "learning_rate": 4.039037989791205e-06, "loss": 3.4287, "step": 7188 }, { "epoch": 1.6444180828104678, "grad_norm": 0.4493308383586327, "learning_rate": 4.028949672685001e-06, "loss": 3.5099, "step": 7190 }, { "epoch": 1.6448754412794604, "grad_norm": 0.46113436745910996, "learning_rate": 4.018872865855744e-06, "loss": 3.5359, "step": 7192 }, { "epoch": 1.6453327997484528, "grad_norm": 0.33851101320824195, "learning_rate": 4.008807574834225e-06, "loss": 3.5139, "step": 7194 }, { "epoch": 1.6457901582174452, "grad_norm": 0.4473686099697901, "learning_rate": 3.998753805144936e-06, "loss": 3.2324, "step": 7196 }, { "epoch": 1.6462475166864379, "grad_norm": 0.4540204047415453, "learning_rate": 3.988711562306055e-06, "loss": 3.2904, "step": 7198 }, { "epoch": 1.6467048751554305, "grad_norm": 0.5598585308571414, "learning_rate": 3.978680851829392e-06, "loss": 3.3138, "step": 7200 }, { "epoch": 1.647162233624423, "grad_norm": 0.46690332324254086, "learning_rate": 3.968661679220468e-06, "loss": 3.3744, "step": 7202 }, { "epoch": 1.6476195920934154, "grad_norm": 0.4796280988762636, "learning_rate": 3.958654049978469e-06, "loss": 3.2283, "step": 7204 }, { "epoch": 1.648076950562408, "grad_norm": 0.566726041820009, "learning_rate": 3.9486579695962065e-06, "loss": 3.5089, "step": 7206 }, { "epoch": 1.6485343090314006, "grad_norm": 0.4855007349189437, "learning_rate": 3.938673443560201e-06, "loss": 3.4915, "step": 7208 }, { "epoch": 1.648991667500393, "grad_norm": 0.48892317728283236, "learning_rate": 3.9287004773506e-06, "loss": 3.3613, "step": 7210 }, { "epoch": 1.6494490259693855, "grad_norm": 0.444223578363424, "learning_rate": 3.918739076441214e-06, "loss": 3.1972, "step": 7212 }, { "epoch": 1.6499063844383781, "grad_norm": 0.5485666269731914, "learning_rate": 3.908789246299518e-06, "loss": 3.3865, "step": 7214 }, { "epoch": 1.6503637429073708, "grad_norm": 0.4407651177949417, "learning_rate": 3.8988509923866216e-06, "loss": 3.3358, "step": 7216 }, { "epoch": 1.6508211013763632, "grad_norm": 0.44558036845530963, "learning_rate": 3.888924320157278e-06, "loss": 3.4496, "step": 7218 }, { "epoch": 1.6512784598453556, "grad_norm": 0.3855090052162681, "learning_rate": 3.879009235059905e-06, "loss": 3.4439, "step": 7220 }, { "epoch": 1.6517358183143482, "grad_norm": 0.4226175590194055, "learning_rate": 3.869105742536536e-06, "loss": 3.3382, "step": 7222 }, { "epoch": 1.6521931767833409, "grad_norm": 0.5358863639347369, "learning_rate": 3.859213848022849e-06, "loss": 3.3326, "step": 7224 }, { "epoch": 1.6526505352523333, "grad_norm": 0.5430027793265181, "learning_rate": 3.849333556948173e-06, "loss": 3.3865, "step": 7226 }, { "epoch": 1.6531078937213257, "grad_norm": 0.4489346808496771, "learning_rate": 3.83946487473544e-06, "loss": 3.4416, "step": 7228 }, { "epoch": 1.6535652521903184, "grad_norm": 0.4451337924218525, "learning_rate": 3.829607806801238e-06, "loss": 3.3579, "step": 7230 }, { "epoch": 1.6540226106593108, "grad_norm": 0.682016460350109, "learning_rate": 3.819762358555759e-06, "loss": 3.3158, "step": 7232 }, { "epoch": 1.6544799691283032, "grad_norm": 0.3821260759739675, "learning_rate": 3.8099285354028235e-06, "loss": 3.3832, "step": 7234 }, { "epoch": 1.6549373275972958, "grad_norm": 0.4989304726559839, "learning_rate": 3.80010634273989e-06, "loss": 3.3608, "step": 7236 }, { "epoch": 1.6553946860662885, "grad_norm": 0.42270735468719034, "learning_rate": 3.79029578595799e-06, "loss": 3.3802, "step": 7238 }, { "epoch": 1.655852044535281, "grad_norm": 0.516892608979177, "learning_rate": 3.780496870441813e-06, "loss": 3.4044, "step": 7240 }, { "epoch": 1.6563094030042733, "grad_norm": 0.37424717279537234, "learning_rate": 3.7707096015696468e-06, "loss": 3.4405, "step": 7242 }, { "epoch": 1.656766761473266, "grad_norm": 0.4703492539957999, "learning_rate": 3.7609339847133634e-06, "loss": 3.4909, "step": 7244 }, { "epoch": 1.6572241199422586, "grad_norm": 0.5311639379465765, "learning_rate": 3.7511700252384654e-06, "loss": 3.2452, "step": 7246 }, { "epoch": 1.657681478411251, "grad_norm": 0.42487406547782547, "learning_rate": 3.7414177285040623e-06, "loss": 3.4918, "step": 7248 }, { "epoch": 1.6581388368802434, "grad_norm": 0.5453322584622352, "learning_rate": 3.7316770998628265e-06, "loss": 3.4334, "step": 7250 }, { "epoch": 1.658596195349236, "grad_norm": 0.46909743784402175, "learning_rate": 3.721948144661061e-06, "loss": 3.3354, "step": 7252 }, { "epoch": 1.6590535538182287, "grad_norm": 0.47399190287776316, "learning_rate": 3.7122308682386557e-06, "loss": 3.512, "step": 7254 }, { "epoch": 1.6595109122872211, "grad_norm": 0.43732179690885514, "learning_rate": 3.70252527592907e-06, "loss": 3.5514, "step": 7256 }, { "epoch": 1.6599682707562136, "grad_norm": 0.45464695818559037, "learning_rate": 3.6928313730593662e-06, "loss": 3.5065, "step": 7258 }, { "epoch": 1.6604256292252062, "grad_norm": 0.4348234489491334, "learning_rate": 3.6831491649502087e-06, "loss": 3.2677, "step": 7260 }, { "epoch": 1.6608829876941988, "grad_norm": 0.5366097918527339, "learning_rate": 3.6734786569157935e-06, "loss": 3.2644, "step": 7262 }, { "epoch": 1.6613403461631913, "grad_norm": 0.4566502572537467, "learning_rate": 3.6638198542639416e-06, "loss": 3.3397, "step": 7264 }, { "epoch": 1.6617977046321837, "grad_norm": 0.41755477511690714, "learning_rate": 3.6541727622960433e-06, "loss": 3.3892, "step": 7266 }, { "epoch": 1.6622550631011763, "grad_norm": 0.5928210470320439, "learning_rate": 3.644537386307023e-06, "loss": 3.6389, "step": 7268 }, { "epoch": 1.6627124215701687, "grad_norm": 0.5891913032866246, "learning_rate": 3.6349137315854235e-06, "loss": 3.507, "step": 7270 }, { "epoch": 1.6631697800391612, "grad_norm": 0.4370363708021512, "learning_rate": 3.625301803413325e-06, "loss": 3.427, "step": 7272 }, { "epoch": 1.6636271385081538, "grad_norm": 0.4545912532600161, "learning_rate": 3.615701607066374e-06, "loss": 3.3587, "step": 7274 }, { "epoch": 1.6640844969771464, "grad_norm": 0.5192888310972869, "learning_rate": 3.6061131478137946e-06, "loss": 3.4275, "step": 7276 }, { "epoch": 1.6645418554461389, "grad_norm": 0.6505132565287444, "learning_rate": 3.5965364309183514e-06, "loss": 3.4911, "step": 7278 }, { "epoch": 1.6649992139151313, "grad_norm": 0.5248959563110636, "learning_rate": 3.5869714616363626e-06, "loss": 3.4269, "step": 7280 }, { "epoch": 1.665456572384124, "grad_norm": 0.5126912144088447, "learning_rate": 3.5774182452177174e-06, "loss": 3.2585, "step": 7282 }, { "epoch": 1.6659139308531166, "grad_norm": 0.503683440163081, "learning_rate": 3.5678767869058412e-06, "loss": 3.4778, "step": 7284 }, { "epoch": 1.666371289322109, "grad_norm": 0.41193592344528446, "learning_rate": 3.5583470919376944e-06, "loss": 3.5089, "step": 7286 }, { "epoch": 1.6668286477911014, "grad_norm": 0.5399605554285294, "learning_rate": 3.54882916554381e-06, "loss": 3.3927, "step": 7288 }, { "epoch": 1.667286006260094, "grad_norm": 0.46940447893793313, "learning_rate": 3.539323012948231e-06, "loss": 3.3893, "step": 7290 }, { "epoch": 1.6677433647290867, "grad_norm": 0.5444031574311862, "learning_rate": 3.529828639368568e-06, "loss": 3.4489, "step": 7292 }, { "epoch": 1.668200723198079, "grad_norm": 0.5214416829325149, "learning_rate": 3.5203460500159407e-06, "loss": 3.5749, "step": 7294 }, { "epoch": 1.6686580816670715, "grad_norm": 0.36105270198650413, "learning_rate": 3.5108752500950082e-06, "loss": 3.4248, "step": 7296 }, { "epoch": 1.6691154401360642, "grad_norm": 0.5221294456641535, "learning_rate": 3.501416244803979e-06, "loss": 3.6373, "step": 7298 }, { "epoch": 1.6695727986050568, "grad_norm": 0.5349275977618462, "learning_rate": 3.4919690393345444e-06, "loss": 3.3111, "step": 7300 }, { "epoch": 1.6700301570740492, "grad_norm": 0.41722745311175213, "learning_rate": 3.48253363887196e-06, "loss": 3.415, "step": 7302 }, { "epoch": 1.6704875155430416, "grad_norm": 0.6234188344737838, "learning_rate": 3.473110048594999e-06, "loss": 3.5063, "step": 7304 }, { "epoch": 1.6709448740120343, "grad_norm": 0.42146042480867496, "learning_rate": 3.4636982736759166e-06, "loss": 3.5808, "step": 7306 }, { "epoch": 1.671402232481027, "grad_norm": 0.47889057961444764, "learning_rate": 3.4542983192805217e-06, "loss": 3.4381, "step": 7308 }, { "epoch": 1.6718595909500193, "grad_norm": 0.44668840033703455, "learning_rate": 3.4449101905681293e-06, "loss": 3.4181, "step": 7310 }, { "epoch": 1.6723169494190118, "grad_norm": 0.4610888992204366, "learning_rate": 3.435533892691531e-06, "loss": 3.4231, "step": 7312 }, { "epoch": 1.6727743078880044, "grad_norm": 0.4185637223429644, "learning_rate": 3.4261694307970633e-06, "loss": 3.3598, "step": 7314 }, { "epoch": 1.6732316663569968, "grad_norm": 0.6127033548067448, "learning_rate": 3.4168168100245646e-06, "loss": 3.3535, "step": 7316 }, { "epoch": 1.6736890248259892, "grad_norm": 0.5022014493494453, "learning_rate": 3.407476035507337e-06, "loss": 3.4052, "step": 7318 }, { "epoch": 1.6741463832949819, "grad_norm": 0.4293239095573725, "learning_rate": 3.398147112372219e-06, "loss": 3.484, "step": 7320 }, { "epoch": 1.6746037417639745, "grad_norm": 0.4051799068760361, "learning_rate": 3.3888300457395357e-06, "loss": 3.4602, "step": 7322 }, { "epoch": 1.675061100232967, "grad_norm": 0.4968714138117516, "learning_rate": 3.3795248407230835e-06, "loss": 3.3929, "step": 7324 }, { "epoch": 1.6755184587019594, "grad_norm": 0.5120662353874784, "learning_rate": 3.3702315024301793e-06, "loss": 3.3572, "step": 7326 }, { "epoch": 1.675975817170952, "grad_norm": 0.3822396916944918, "learning_rate": 3.3609500359616075e-06, "loss": 3.3692, "step": 7328 }, { "epoch": 1.6764331756399447, "grad_norm": 0.5153717517273928, "learning_rate": 3.3516804464116314e-06, "loss": 3.5297, "step": 7330 }, { "epoch": 1.676890534108937, "grad_norm": 0.5554095122650896, "learning_rate": 3.3424227388680174e-06, "loss": 3.463, "step": 7332 }, { "epoch": 1.6773478925779295, "grad_norm": 0.4964699857894146, "learning_rate": 3.3331769184119956e-06, "loss": 3.2881, "step": 7334 }, { "epoch": 1.6778052510469221, "grad_norm": 0.4495184520587586, "learning_rate": 3.3239429901182644e-06, "loss": 3.3741, "step": 7336 }, { "epoch": 1.6782626095159148, "grad_norm": 0.5073358793267886, "learning_rate": 3.314720959055018e-06, "loss": 3.4197, "step": 7338 }, { "epoch": 1.6787199679849072, "grad_norm": 0.49577950865316206, "learning_rate": 3.3055108302839005e-06, "loss": 3.3806, "step": 7340 }, { "epoch": 1.6791773264538996, "grad_norm": 0.4362370620546952, "learning_rate": 3.296312608860025e-06, "loss": 3.4174, "step": 7342 }, { "epoch": 1.6796346849228923, "grad_norm": 0.5509590511250513, "learning_rate": 3.287126299831983e-06, "loss": 3.3097, "step": 7344 }, { "epoch": 1.680092043391885, "grad_norm": 0.5097469165628298, "learning_rate": 3.2779519082418163e-06, "loss": 3.4014, "step": 7346 }, { "epoch": 1.6805494018608773, "grad_norm": 0.5240579884470833, "learning_rate": 3.268789439125017e-06, "loss": 3.4529, "step": 7348 }, { "epoch": 1.6810067603298697, "grad_norm": 0.48169963069938837, "learning_rate": 3.2596388975105615e-06, "loss": 3.438, "step": 7350 }, { "epoch": 1.6814641187988624, "grad_norm": 0.4607760906480398, "learning_rate": 3.2505002884208545e-06, "loss": 3.3901, "step": 7352 }, { "epoch": 1.6819214772678548, "grad_norm": 0.46871310724671345, "learning_rate": 3.2413736168717595e-06, "loss": 3.4296, "step": 7354 }, { "epoch": 1.6823788357368472, "grad_norm": 0.4118804295692823, "learning_rate": 3.2322588878725803e-06, "loss": 3.3939, "step": 7356 }, { "epoch": 1.6828361942058399, "grad_norm": 0.6306275688852514, "learning_rate": 3.2231561064260814e-06, "loss": 3.6808, "step": 7358 }, { "epoch": 1.6832935526748325, "grad_norm": 0.46119231923040066, "learning_rate": 3.214065277528472e-06, "loss": 3.4271, "step": 7360 }, { "epoch": 1.683750911143825, "grad_norm": 0.5513474009872484, "learning_rate": 3.204986406169366e-06, "loss": 3.2661, "step": 7362 }, { "epoch": 1.6842082696128173, "grad_norm": 0.4612361942334082, "learning_rate": 3.1959194973318536e-06, "loss": 3.6606, "step": 7364 }, { "epoch": 1.68466562808181, "grad_norm": 0.42917397764453813, "learning_rate": 3.1868645559924526e-06, "loss": 3.4946, "step": 7366 }, { "epoch": 1.6851229865508026, "grad_norm": 0.5315100650390578, "learning_rate": 3.1778215871210825e-06, "loss": 3.4116, "step": 7368 }, { "epoch": 1.685580345019795, "grad_norm": 0.6414170137680097, "learning_rate": 3.1687905956811233e-06, "loss": 3.439, "step": 7370 }, { "epoch": 1.6860377034887875, "grad_norm": 0.5023408275655691, "learning_rate": 3.1597715866293843e-06, "loss": 3.3541, "step": 7372 }, { "epoch": 1.68649506195778, "grad_norm": 0.36529681238145917, "learning_rate": 3.1507645649160616e-06, "loss": 3.365, "step": 7374 }, { "epoch": 1.6869524204267727, "grad_norm": 0.4622134957988826, "learning_rate": 3.141769535484804e-06, "loss": 3.3357, "step": 7376 }, { "epoch": 1.6874097788957652, "grad_norm": 0.5745911720447506, "learning_rate": 3.1327865032726804e-06, "loss": 3.2168, "step": 7378 }, { "epoch": 1.6878671373647576, "grad_norm": 0.4435507071621757, "learning_rate": 3.1238154732101427e-06, "loss": 3.4948, "step": 7380 }, { "epoch": 1.6883244958337502, "grad_norm": 0.4504548522467269, "learning_rate": 3.1148564502210902e-06, "loss": 3.4081, "step": 7382 }, { "epoch": 1.6887818543027429, "grad_norm": 0.4398845504619363, "learning_rate": 3.105909439222815e-06, "loss": 3.3962, "step": 7384 }, { "epoch": 1.6892392127717353, "grad_norm": 0.5995359593281769, "learning_rate": 3.0969744451260136e-06, "loss": 3.5803, "step": 7386 }, { "epoch": 1.6896965712407277, "grad_norm": 0.44881528897435535, "learning_rate": 3.088051472834799e-06, "loss": 3.4499, "step": 7388 }, { "epoch": 1.6901539297097203, "grad_norm": 0.42045780846667563, "learning_rate": 3.0791405272466768e-06, "loss": 3.3381, "step": 7390 }, { "epoch": 1.690611288178713, "grad_norm": 0.5255363857764402, "learning_rate": 3.0702416132525504e-06, "loss": 3.3883, "step": 7392 }, { "epoch": 1.6910686466477054, "grad_norm": 0.4549676490905048, "learning_rate": 3.0613547357367266e-06, "loss": 3.5408, "step": 7394 }, { "epoch": 1.6915260051166978, "grad_norm": 0.4620182814985664, "learning_rate": 3.0524798995769044e-06, "loss": 3.537, "step": 7396 }, { "epoch": 1.6919833635856905, "grad_norm": 0.47283853215916166, "learning_rate": 3.043617109644159e-06, "loss": 3.4247, "step": 7398 }, { "epoch": 1.6924407220546829, "grad_norm": 0.5150433062428117, "learning_rate": 3.034766370802983e-06, "loss": 3.3769, "step": 7400 }, { "epoch": 1.6928980805236753, "grad_norm": 0.42236014127942295, "learning_rate": 3.0259276879112286e-06, "loss": 3.3443, "step": 7402 }, { "epoch": 1.693355438992668, "grad_norm": 0.42239901882088376, "learning_rate": 3.0171010658201352e-06, "loss": 3.389, "step": 7404 }, { "epoch": 1.6938127974616606, "grad_norm": 0.45792105042069026, "learning_rate": 3.0082865093743417e-06, "loss": 3.4995, "step": 7406 }, { "epoch": 1.694270155930653, "grad_norm": 0.4361247689324967, "learning_rate": 2.999484023411839e-06, "loss": 3.4059, "step": 7408 }, { "epoch": 1.6947275143996454, "grad_norm": 0.5219009678484656, "learning_rate": 2.9906936127640035e-06, "loss": 3.4262, "step": 7410 }, { "epoch": 1.695184872868638, "grad_norm": 0.5511144076856765, "learning_rate": 2.98191528225559e-06, "loss": 3.3765, "step": 7412 }, { "epoch": 1.6956422313376307, "grad_norm": 0.47241240903071086, "learning_rate": 2.97314903670472e-06, "loss": 3.257, "step": 7414 }, { "epoch": 1.6960995898066231, "grad_norm": 0.44684102589500996, "learning_rate": 2.9643948809228716e-06, "loss": 3.3494, "step": 7416 }, { "epoch": 1.6965569482756155, "grad_norm": 0.40334031838222, "learning_rate": 2.9556528197148937e-06, "loss": 3.5024, "step": 7418 }, { "epoch": 1.6970143067446082, "grad_norm": 0.3915330631915312, "learning_rate": 2.9469228578790033e-06, "loss": 3.2614, "step": 7420 }, { "epoch": 1.6974716652136008, "grad_norm": 0.5744541230770859, "learning_rate": 2.938205000206784e-06, "loss": 3.431, "step": 7422 }, { "epoch": 1.6979290236825932, "grad_norm": 0.48857678434257606, "learning_rate": 2.9294992514831366e-06, "loss": 3.3754, "step": 7424 }, { "epoch": 1.6983863821515857, "grad_norm": 0.6162291945761995, "learning_rate": 2.920805616486358e-06, "loss": 3.3939, "step": 7426 }, { "epoch": 1.6988437406205783, "grad_norm": 0.39020460395164575, "learning_rate": 2.9121240999880892e-06, "loss": 3.1669, "step": 7428 }, { "epoch": 1.699301099089571, "grad_norm": 0.5441381088823035, "learning_rate": 2.903454706753292e-06, "loss": 3.3777, "step": 7430 }, { "epoch": 1.6997584575585634, "grad_norm": 0.41946652586218774, "learning_rate": 2.8947974415403034e-06, "loss": 3.5471, "step": 7432 }, { "epoch": 1.7002158160275558, "grad_norm": 0.47515588072311166, "learning_rate": 2.886152309100801e-06, "loss": 3.5203, "step": 7434 }, { "epoch": 1.7006731744965484, "grad_norm": 0.48937041306081597, "learning_rate": 2.87751931417978e-06, "loss": 3.3715, "step": 7436 }, { "epoch": 1.7011305329655408, "grad_norm": 0.48192015902607, "learning_rate": 2.8688984615155983e-06, "loss": 3.4858, "step": 7438 }, { "epoch": 1.7015878914345333, "grad_norm": 0.5947209751010754, "learning_rate": 2.860289755839948e-06, "loss": 3.3818, "step": 7440 }, { "epoch": 1.702045249903526, "grad_norm": 0.5538767679005902, "learning_rate": 2.851693201877831e-06, "loss": 3.4624, "step": 7442 }, { "epoch": 1.7025026083725185, "grad_norm": 0.41853684295196225, "learning_rate": 2.8431088043476106e-06, "loss": 3.3079, "step": 7444 }, { "epoch": 1.702959966841511, "grad_norm": 0.5179427392104704, "learning_rate": 2.834536567960955e-06, "loss": 3.484, "step": 7446 }, { "epoch": 1.7034173253105034, "grad_norm": 0.6257980973051118, "learning_rate": 2.8259764974228582e-06, "loss": 3.4229, "step": 7448 }, { "epoch": 1.703874683779496, "grad_norm": 0.47101581048953894, "learning_rate": 2.8174285974316596e-06, "loss": 3.4688, "step": 7450 }, { "epoch": 1.7043320422484887, "grad_norm": 0.5305466764626927, "learning_rate": 2.8088928726789946e-06, "loss": 3.3833, "step": 7452 }, { "epoch": 1.704789400717481, "grad_norm": 0.4762068187231043, "learning_rate": 2.8003693278498224e-06, "loss": 3.367, "step": 7454 }, { "epoch": 1.7052467591864735, "grad_norm": 0.5428626965475307, "learning_rate": 2.7918579676224214e-06, "loss": 3.4382, "step": 7456 }, { "epoch": 1.7057041176554661, "grad_norm": 0.47572623813341136, "learning_rate": 2.783358796668384e-06, "loss": 3.3197, "step": 7458 }, { "epoch": 1.7061614761244588, "grad_norm": 0.5213750374573809, "learning_rate": 2.7748718196525998e-06, "loss": 3.5766, "step": 7460 }, { "epoch": 1.7066188345934512, "grad_norm": 0.5677332362274722, "learning_rate": 2.766397041233282e-06, "loss": 3.4169, "step": 7462 }, { "epoch": 1.7070761930624436, "grad_norm": 0.521688614186557, "learning_rate": 2.7579344660619374e-06, "loss": 3.4447, "step": 7464 }, { "epoch": 1.7075335515314363, "grad_norm": 0.4669705470873278, "learning_rate": 2.7494840987833727e-06, "loss": 3.2753, "step": 7466 }, { "epoch": 1.707990910000429, "grad_norm": 0.37605340345810745, "learning_rate": 2.7410459440357083e-06, "loss": 3.4513, "step": 7468 }, { "epoch": 1.7084482684694213, "grad_norm": 0.44024288842838516, "learning_rate": 2.732620006450348e-06, "loss": 3.5941, "step": 7470 }, { "epoch": 1.7089056269384137, "grad_norm": 0.5655573040405997, "learning_rate": 2.7242062906519954e-06, "loss": 3.5645, "step": 7472 }, { "epoch": 1.7093629854074064, "grad_norm": 0.6678536076047801, "learning_rate": 2.7158048012586397e-06, "loss": 3.4465, "step": 7474 }, { "epoch": 1.709820343876399, "grad_norm": 0.5588292779087977, "learning_rate": 2.707415542881575e-06, "loss": 3.5095, "step": 7476 }, { "epoch": 1.7102777023453914, "grad_norm": 0.4481398022972209, "learning_rate": 2.699038520125366e-06, "loss": 3.3256, "step": 7478 }, { "epoch": 1.7107350608143839, "grad_norm": 0.474966268220094, "learning_rate": 2.690673737587865e-06, "loss": 3.3682, "step": 7480 }, { "epoch": 1.7111924192833765, "grad_norm": 0.3969935450389457, "learning_rate": 2.6823211998602188e-06, "loss": 3.5128, "step": 7482 }, { "epoch": 1.711649777752369, "grad_norm": 0.4561804653116081, "learning_rate": 2.6739809115268355e-06, "loss": 3.3937, "step": 7484 }, { "epoch": 1.7121071362213613, "grad_norm": 0.4304189005821778, "learning_rate": 2.665652877165406e-06, "loss": 3.3245, "step": 7486 }, { "epoch": 1.712564494690354, "grad_norm": 0.5045749456126922, "learning_rate": 2.6573371013469017e-06, "loss": 3.4788, "step": 7488 }, { "epoch": 1.7130218531593466, "grad_norm": 0.4476077341415899, "learning_rate": 2.6490335886355714e-06, "loss": 3.2833, "step": 7490 }, { "epoch": 1.713479211628339, "grad_norm": 0.6222116737900213, "learning_rate": 2.6407423435889034e-06, "loss": 3.3143, "step": 7492 }, { "epoch": 1.7139365700973315, "grad_norm": 0.512395745815273, "learning_rate": 2.6324633707576817e-06, "loss": 3.175, "step": 7494 }, { "epoch": 1.714393928566324, "grad_norm": 0.4196975818709798, "learning_rate": 2.624196674685958e-06, "loss": 3.4301, "step": 7496 }, { "epoch": 1.7148512870353168, "grad_norm": 0.5152321966557587, "learning_rate": 2.6159422599110084e-06, "loss": 3.326, "step": 7498 }, { "epoch": 1.7153086455043092, "grad_norm": 0.4520966436328179, "learning_rate": 2.6077001309634113e-06, "loss": 3.4087, "step": 7500 }, { "epoch": 1.7157660039733016, "grad_norm": 0.5962707316795041, "learning_rate": 2.5994702923669763e-06, "loss": 3.4934, "step": 7502 }, { "epoch": 1.7162233624422942, "grad_norm": 0.40635311211973385, "learning_rate": 2.59125274863877e-06, "loss": 3.5087, "step": 7504 }, { "epoch": 1.7166807209112869, "grad_norm": 0.3997063348551112, "learning_rate": 2.5830475042891237e-06, "loss": 3.4168, "step": 7506 }, { "epoch": 1.7171380793802793, "grad_norm": 0.36979560670858, "learning_rate": 2.5748545638216102e-06, "loss": 3.3721, "step": 7508 }, { "epoch": 1.7175954378492717, "grad_norm": 0.49449063898180534, "learning_rate": 2.5666739317330353e-06, "loss": 3.2801, "step": 7510 }, { "epoch": 1.7180527963182644, "grad_norm": 0.534513463379788, "learning_rate": 2.5585056125134788e-06, "loss": 3.2443, "step": 7512 }, { "epoch": 1.718510154787257, "grad_norm": 0.47350849211065515, "learning_rate": 2.5503496106462355e-06, "loss": 3.4972, "step": 7514 }, { "epoch": 1.7189675132562494, "grad_norm": 0.46599095895596215, "learning_rate": 2.5422059306078496e-06, "loss": 3.4491, "step": 7516 }, { "epoch": 1.7194248717252418, "grad_norm": 0.4589989030851916, "learning_rate": 2.534074576868109e-06, "loss": 3.2534, "step": 7518 }, { "epoch": 1.7198822301942345, "grad_norm": 0.4480141452318781, "learning_rate": 2.525955553890025e-06, "loss": 3.4623, "step": 7520 }, { "epoch": 1.7203395886632271, "grad_norm": 0.4250543847068681, "learning_rate": 2.5178488661298432e-06, "loss": 3.3697, "step": 7522 }, { "epoch": 1.7207969471322193, "grad_norm": 0.4551695703141242, "learning_rate": 2.5097545180370497e-06, "loss": 3.4011, "step": 7524 }, { "epoch": 1.721254305601212, "grad_norm": 0.6021707358053433, "learning_rate": 2.5016725140543458e-06, "loss": 3.4281, "step": 7526 }, { "epoch": 1.7217116640702046, "grad_norm": 0.600415163649355, "learning_rate": 2.4936028586176553e-06, "loss": 3.5424, "step": 7528 }, { "epoch": 1.722169022539197, "grad_norm": 0.6422535734689742, "learning_rate": 2.485545556156138e-06, "loss": 3.2632, "step": 7530 }, { "epoch": 1.7226263810081894, "grad_norm": 0.5090504448987191, "learning_rate": 2.477500611092162e-06, "loss": 3.357, "step": 7532 }, { "epoch": 1.723083739477182, "grad_norm": 0.4992006588136776, "learning_rate": 2.469468027841318e-06, "loss": 3.3891, "step": 7534 }, { "epoch": 1.7235410979461747, "grad_norm": 0.47016980994394353, "learning_rate": 2.4614478108124035e-06, "loss": 3.4757, "step": 7536 }, { "epoch": 1.7239984564151671, "grad_norm": 0.46886256773679, "learning_rate": 2.453439964407447e-06, "loss": 3.3961, "step": 7538 }, { "epoch": 1.7244558148841596, "grad_norm": 0.5491112084188383, "learning_rate": 2.445444493021673e-06, "loss": 3.3967, "step": 7540 }, { "epoch": 1.7249131733531522, "grad_norm": 0.6126535933460561, "learning_rate": 2.4374614010435057e-06, "loss": 3.291, "step": 7542 }, { "epoch": 1.7253705318221448, "grad_norm": 0.4955435126904875, "learning_rate": 2.4294906928546e-06, "loss": 3.3752, "step": 7544 }, { "epoch": 1.7258278902911373, "grad_norm": 0.49171111349134417, "learning_rate": 2.4215323728297905e-06, "loss": 3.4621, "step": 7546 }, { "epoch": 1.7262852487601297, "grad_norm": 0.4737633122257205, "learning_rate": 2.413586445337118e-06, "loss": 3.3431, "step": 7548 }, { "epoch": 1.7267426072291223, "grad_norm": 0.4890471136092009, "learning_rate": 2.40565291473783e-06, "loss": 3.4014, "step": 7550 }, { "epoch": 1.727199965698115, "grad_norm": 0.6572850557005893, "learning_rate": 2.3977317853863757e-06, "loss": 3.5006, "step": 7552 }, { "epoch": 1.7276573241671074, "grad_norm": 0.466249903635419, "learning_rate": 2.389823061630361e-06, "loss": 3.3144, "step": 7554 }, { "epoch": 1.7281146826360998, "grad_norm": 0.6309793080747381, "learning_rate": 2.3819267478106255e-06, "loss": 3.4934, "step": 7556 }, { "epoch": 1.7285720411050924, "grad_norm": 0.5365634432536095, "learning_rate": 2.3740428482611822e-06, "loss": 3.3455, "step": 7558 }, { "epoch": 1.729029399574085, "grad_norm": 0.586972251203723, "learning_rate": 2.366171367309214e-06, "loss": 3.4552, "step": 7560 }, { "epoch": 1.7294867580430775, "grad_norm": 0.448168098127638, "learning_rate": 2.3583123092751155e-06, "loss": 3.4642, "step": 7562 }, { "epoch": 1.72994411651207, "grad_norm": 0.5851486490772034, "learning_rate": 2.350465678472441e-06, "loss": 3.363, "step": 7564 }, { "epoch": 1.7304014749810626, "grad_norm": 0.5077138658247429, "learning_rate": 2.3426314792079324e-06, "loss": 3.3452, "step": 7566 }, { "epoch": 1.730858833450055, "grad_norm": 0.6034004570660553, "learning_rate": 2.3348097157815135e-06, "loss": 3.3838, "step": 7568 }, { "epoch": 1.7313161919190474, "grad_norm": 0.41885586652938117, "learning_rate": 2.3270003924862728e-06, "loss": 3.4115, "step": 7570 }, { "epoch": 1.73177355038804, "grad_norm": 0.5393001751147353, "learning_rate": 2.3192035136084726e-06, "loss": 3.5008, "step": 7572 }, { "epoch": 1.7322309088570327, "grad_norm": 0.5002823568838584, "learning_rate": 2.3114190834275546e-06, "loss": 3.3268, "step": 7574 }, { "epoch": 1.732688267326025, "grad_norm": 0.45856506998819335, "learning_rate": 2.303647106216114e-06, "loss": 3.4883, "step": 7576 }, { "epoch": 1.7331456257950175, "grad_norm": 0.547160210487202, "learning_rate": 2.29588758623992e-06, "loss": 3.2901, "step": 7578 }, { "epoch": 1.7336029842640102, "grad_norm": 0.5432520340092759, "learning_rate": 2.288140527757904e-06, "loss": 3.2902, "step": 7580 }, { "epoch": 1.7340603427330028, "grad_norm": 0.539281855748032, "learning_rate": 2.2804059350221536e-06, "loss": 3.2773, "step": 7582 }, { "epoch": 1.7345177012019952, "grad_norm": 0.5368277348861344, "learning_rate": 2.272683812277915e-06, "loss": 3.437, "step": 7584 }, { "epoch": 1.7349750596709876, "grad_norm": 0.5160844085614813, "learning_rate": 2.2649741637635946e-06, "loss": 3.3468, "step": 7586 }, { "epoch": 1.7354324181399803, "grad_norm": 0.4784555825691421, "learning_rate": 2.25727699371075e-06, "loss": 3.2863, "step": 7588 }, { "epoch": 1.735889776608973, "grad_norm": 0.5255248171375251, "learning_rate": 2.2495923063440867e-06, "loss": 3.4975, "step": 7590 }, { "epoch": 1.7363471350779653, "grad_norm": 0.5335118165231463, "learning_rate": 2.2419201058814594e-06, "loss": 3.3578, "step": 7592 }, { "epoch": 1.7368044935469578, "grad_norm": 0.5335932551503156, "learning_rate": 2.234260396533874e-06, "loss": 3.5142, "step": 7594 }, { "epoch": 1.7372618520159504, "grad_norm": 0.43715715598870736, "learning_rate": 2.226613182505477e-06, "loss": 3.3709, "step": 7596 }, { "epoch": 1.737719210484943, "grad_norm": 0.46011596140628114, "learning_rate": 2.218978467993554e-06, "loss": 3.2714, "step": 7598 }, { "epoch": 1.7381765689539355, "grad_norm": 0.5094803636789412, "learning_rate": 2.211356257188538e-06, "loss": 3.3663, "step": 7600 }, { "epoch": 1.7386339274229279, "grad_norm": 0.4990895147303341, "learning_rate": 2.2037465542739917e-06, "loss": 3.3666, "step": 7602 }, { "epoch": 1.7390912858919205, "grad_norm": 0.41162719360031186, "learning_rate": 2.196149363426611e-06, "loss": 3.4827, "step": 7604 }, { "epoch": 1.7395486443609132, "grad_norm": 0.4530614117696004, "learning_rate": 2.1885646888162386e-06, "loss": 3.3918, "step": 7606 }, { "epoch": 1.7400060028299054, "grad_norm": 0.4210076599726763, "learning_rate": 2.1809925346058303e-06, "loss": 3.3695, "step": 7608 }, { "epoch": 1.740463361298898, "grad_norm": 0.35958404266151905, "learning_rate": 2.1734329049514752e-06, "loss": 3.6157, "step": 7610 }, { "epoch": 1.7409207197678906, "grad_norm": 0.4731857706999957, "learning_rate": 2.165885804002396e-06, "loss": 3.3701, "step": 7612 }, { "epoch": 1.741378078236883, "grad_norm": 0.4621830453458115, "learning_rate": 2.1583512359009324e-06, "loss": 3.3694, "step": 7614 }, { "epoch": 1.7418354367058755, "grad_norm": 0.49531280445366294, "learning_rate": 2.1508292047825368e-06, "loss": 3.4113, "step": 7616 }, { "epoch": 1.7422927951748681, "grad_norm": 0.47173435376172207, "learning_rate": 2.1433197147757988e-06, "loss": 3.3326, "step": 7618 }, { "epoch": 1.7427501536438608, "grad_norm": 0.4864911644888969, "learning_rate": 2.135822770002413e-06, "loss": 3.3513, "step": 7620 }, { "epoch": 1.7432075121128532, "grad_norm": 0.5392235501522565, "learning_rate": 2.1283383745771853e-06, "loss": 3.3382, "step": 7622 }, { "epoch": 1.7436648705818456, "grad_norm": 0.46799371426693426, "learning_rate": 2.1208665326080486e-06, "loss": 3.5027, "step": 7624 }, { "epoch": 1.7441222290508382, "grad_norm": 0.49633852109104387, "learning_rate": 2.1134072481960284e-06, "loss": 3.4965, "step": 7626 }, { "epoch": 1.7445795875198309, "grad_norm": 0.4228604780495045, "learning_rate": 2.1059605254352632e-06, "loss": 3.4155, "step": 7628 }, { "epoch": 1.7450369459888233, "grad_norm": 0.4639525003784567, "learning_rate": 2.0985263684130093e-06, "loss": 3.5642, "step": 7630 }, { "epoch": 1.7454943044578157, "grad_norm": 0.47099401262754087, "learning_rate": 2.0911047812096073e-06, "loss": 3.3916, "step": 7632 }, { "epoch": 1.7459516629268084, "grad_norm": 0.5281557683615793, "learning_rate": 2.083695767898508e-06, "loss": 3.4846, "step": 7634 }, { "epoch": 1.746409021395801, "grad_norm": 0.5022066418667731, "learning_rate": 2.076299332546264e-06, "loss": 3.47, "step": 7636 }, { "epoch": 1.7468663798647934, "grad_norm": 0.5571191972426498, "learning_rate": 2.068915479212516e-06, "loss": 3.4097, "step": 7638 }, { "epoch": 1.7473237383337858, "grad_norm": 0.4560023569801134, "learning_rate": 2.0615442119500034e-06, "loss": 3.4615, "step": 7640 }, { "epoch": 1.7477810968027785, "grad_norm": 0.5420634729743247, "learning_rate": 2.054185534804562e-06, "loss": 3.4318, "step": 7642 }, { "epoch": 1.7482384552717711, "grad_norm": 0.5221939973638123, "learning_rate": 2.046839451815108e-06, "loss": 3.2964, "step": 7644 }, { "epoch": 1.7486958137407635, "grad_norm": 0.48113754680187637, "learning_rate": 2.0395059670136523e-06, "loss": 3.3834, "step": 7646 }, { "epoch": 1.749153172209756, "grad_norm": 0.4481943474798285, "learning_rate": 2.0321850844252837e-06, "loss": 3.5106, "step": 7648 }, { "epoch": 1.7496105306787486, "grad_norm": 0.46986726489528746, "learning_rate": 2.0248768080681853e-06, "loss": 3.4169, "step": 7650 }, { "epoch": 1.750067889147741, "grad_norm": 0.555935758059401, "learning_rate": 2.0175811419536143e-06, "loss": 3.3179, "step": 7652 }, { "epoch": 1.7505252476167334, "grad_norm": 0.4336350910995268, "learning_rate": 2.010298090085899e-06, "loss": 3.4586, "step": 7654 }, { "epoch": 1.750982606085726, "grad_norm": 0.5253000571316423, "learning_rate": 2.003027656462461e-06, "loss": 3.3147, "step": 7656 }, { "epoch": 1.7514399645547187, "grad_norm": 0.6386270303176859, "learning_rate": 1.995769845073786e-06, "loss": 3.3455, "step": 7658 }, { "epoch": 1.7518973230237112, "grad_norm": 0.466684926547144, "learning_rate": 1.988524659903426e-06, "loss": 3.551, "step": 7660 }, { "epoch": 1.7523546814927036, "grad_norm": 0.4839041328568623, "learning_rate": 1.9812921049280175e-06, "loss": 3.3551, "step": 7662 }, { "epoch": 1.7528120399616962, "grad_norm": 0.48126495883036396, "learning_rate": 1.974072184117259e-06, "loss": 3.3661, "step": 7664 }, { "epoch": 1.7532693984306889, "grad_norm": 0.40340271262744803, "learning_rate": 1.9668649014339013e-06, "loss": 3.3057, "step": 7666 }, { "epoch": 1.7537267568996813, "grad_norm": 0.48662711148767895, "learning_rate": 1.9596702608337847e-06, "loss": 3.4556, "step": 7668 }, { "epoch": 1.7541841153686737, "grad_norm": 0.6276408431441598, "learning_rate": 1.9524882662657866e-06, "loss": 3.381, "step": 7670 }, { "epoch": 1.7546414738376663, "grad_norm": 0.5129606014723473, "learning_rate": 1.9453189216718515e-06, "loss": 3.3549, "step": 7672 }, { "epoch": 1.755098832306659, "grad_norm": 0.6558750307487712, "learning_rate": 1.9381622309869924e-06, "loss": 3.1965, "step": 7674 }, { "epoch": 1.7555561907756514, "grad_norm": 0.5358078046188278, "learning_rate": 1.931018198139259e-06, "loss": 3.5874, "step": 7676 }, { "epoch": 1.7560135492446438, "grad_norm": 0.41330799600341167, "learning_rate": 1.923886827049759e-06, "loss": 3.521, "step": 7678 }, { "epoch": 1.7564709077136365, "grad_norm": 0.4850392094473882, "learning_rate": 1.9167681216326615e-06, "loss": 3.3897, "step": 7680 }, { "epoch": 1.756928266182629, "grad_norm": 0.4341886751975281, "learning_rate": 1.9096620857951692e-06, "loss": 3.2681, "step": 7682 }, { "epoch": 1.7573856246516215, "grad_norm": 0.42478893733149475, "learning_rate": 1.902568723437534e-06, "loss": 3.3764, "step": 7684 }, { "epoch": 1.757842983120614, "grad_norm": 0.4610014079128289, "learning_rate": 1.8954880384530639e-06, "loss": 3.5366, "step": 7686 }, { "epoch": 1.7583003415896066, "grad_norm": 0.545110826991665, "learning_rate": 1.8884200347280945e-06, "loss": 3.3175, "step": 7688 }, { "epoch": 1.7587577000585992, "grad_norm": 0.554916817986981, "learning_rate": 1.8813647161420006e-06, "loss": 3.2882, "step": 7690 }, { "epoch": 1.7592150585275914, "grad_norm": 0.5478540920260371, "learning_rate": 1.8743220865672123e-06, "loss": 3.6183, "step": 7692 }, { "epoch": 1.759672416996584, "grad_norm": 0.5005955725318237, "learning_rate": 1.8672921498691736e-06, "loss": 3.392, "step": 7694 }, { "epoch": 1.7601297754655767, "grad_norm": 0.5753447981206331, "learning_rate": 1.8602749099063705e-06, "loss": 3.4213, "step": 7696 }, { "epoch": 1.7605871339345691, "grad_norm": 0.5756656815425275, "learning_rate": 1.85327037053033e-06, "loss": 3.5785, "step": 7698 }, { "epoch": 1.7610444924035615, "grad_norm": 0.4370771305849002, "learning_rate": 1.8462785355855911e-06, "loss": 3.4867, "step": 7700 }, { "epoch": 1.7615018508725542, "grad_norm": 0.4749984402379979, "learning_rate": 1.839299408909728e-06, "loss": 3.3983, "step": 7702 }, { "epoch": 1.7619592093415468, "grad_norm": 0.6131584627700555, "learning_rate": 1.832332994333344e-06, "loss": 3.3945, "step": 7704 }, { "epoch": 1.7624165678105392, "grad_norm": 0.4488686067736178, "learning_rate": 1.8253792956800602e-06, "loss": 3.2382, "step": 7706 }, { "epoch": 1.7628739262795317, "grad_norm": 0.4915487595254695, "learning_rate": 1.8184383167665158e-06, "loss": 3.4264, "step": 7708 }, { "epoch": 1.7633312847485243, "grad_norm": 0.5535168450556778, "learning_rate": 1.8115100614023662e-06, "loss": 3.4557, "step": 7710 }, { "epoch": 1.763788643217517, "grad_norm": 0.4638921148729875, "learning_rate": 1.8045945333903024e-06, "loss": 3.43, "step": 7712 }, { "epoch": 1.7642460016865094, "grad_norm": 0.5206632972360786, "learning_rate": 1.7976917365260081e-06, "loss": 3.432, "step": 7714 }, { "epoch": 1.7647033601555018, "grad_norm": 0.39406682997524684, "learning_rate": 1.790801674598186e-06, "loss": 3.4229, "step": 7716 }, { "epoch": 1.7651607186244944, "grad_norm": 0.5228668347459156, "learning_rate": 1.783924351388555e-06, "loss": 3.2939, "step": 7718 }, { "epoch": 1.765618077093487, "grad_norm": 0.5748185120144058, "learning_rate": 1.777059770671835e-06, "loss": 3.3314, "step": 7720 }, { "epoch": 1.7660754355624795, "grad_norm": 0.801521666083439, "learning_rate": 1.770207936215751e-06, "loss": 3.4156, "step": 7722 }, { "epoch": 1.766532794031472, "grad_norm": 0.406220343314253, "learning_rate": 1.7633688517810443e-06, "loss": 3.3158, "step": 7724 }, { "epoch": 1.7669901525004645, "grad_norm": 0.5380511263960076, "learning_rate": 1.7565425211214432e-06, "loss": 3.1802, "step": 7726 }, { "epoch": 1.7674475109694572, "grad_norm": 0.6190629061948396, "learning_rate": 1.749728947983681e-06, "loss": 3.4124, "step": 7728 }, { "epoch": 1.7679048694384496, "grad_norm": 0.6488022525229081, "learning_rate": 1.7429281361074968e-06, "loss": 3.4112, "step": 7730 }, { "epoch": 1.768362227907442, "grad_norm": 0.47236108627582235, "learning_rate": 1.736140089225613e-06, "loss": 3.5469, "step": 7732 }, { "epoch": 1.7688195863764347, "grad_norm": 0.4954061774589049, "learning_rate": 1.7293648110637467e-06, "loss": 3.435, "step": 7734 }, { "epoch": 1.769276944845427, "grad_norm": 0.4127070535848239, "learning_rate": 1.7226023053406259e-06, "loss": 3.3979, "step": 7736 }, { "epoch": 1.7697343033144195, "grad_norm": 0.5727612168934143, "learning_rate": 1.7158525757679427e-06, "loss": 3.244, "step": 7738 }, { "epoch": 1.7701916617834121, "grad_norm": 0.5218898040133679, "learning_rate": 1.7091156260503866e-06, "loss": 3.3794, "step": 7740 }, { "epoch": 1.7706490202524048, "grad_norm": 0.3633113151332789, "learning_rate": 1.7023914598856412e-06, "loss": 3.4249, "step": 7742 }, { "epoch": 1.7711063787213972, "grad_norm": 0.5726091310858294, "learning_rate": 1.6956800809643625e-06, "loss": 3.3543, "step": 7744 }, { "epoch": 1.7715637371903896, "grad_norm": 0.6108354283210595, "learning_rate": 1.6889814929701903e-06, "loss": 3.467, "step": 7746 }, { "epoch": 1.7720210956593823, "grad_norm": 0.7686910544645994, "learning_rate": 1.682295699579753e-06, "loss": 3.5573, "step": 7748 }, { "epoch": 1.772478454128375, "grad_norm": 0.43986114210092475, "learning_rate": 1.67562270446264e-06, "loss": 3.4752, "step": 7750 }, { "epoch": 1.7729358125973673, "grad_norm": 0.4029962933029111, "learning_rate": 1.6689625112814272e-06, "loss": 3.32, "step": 7752 }, { "epoch": 1.7733931710663597, "grad_norm": 0.4403972571501088, "learning_rate": 1.6623151236916683e-06, "loss": 3.3864, "step": 7754 }, { "epoch": 1.7738505295353524, "grad_norm": 0.5734438695735736, "learning_rate": 1.6556805453418756e-06, "loss": 3.2906, "step": 7756 }, { "epoch": 1.774307888004345, "grad_norm": 0.5614371324640112, "learning_rate": 1.6490587798735356e-06, "loss": 3.307, "step": 7758 }, { "epoch": 1.7747652464733374, "grad_norm": 0.5593518801125514, "learning_rate": 1.6424498309211139e-06, "loss": 3.3427, "step": 7760 }, { "epoch": 1.7752226049423299, "grad_norm": 0.5469156522627621, "learning_rate": 1.6358537021120253e-06, "loss": 3.3818, "step": 7762 }, { "epoch": 1.7756799634113225, "grad_norm": 0.5104238126864359, "learning_rate": 1.6292703970666512e-06, "loss": 3.2068, "step": 7764 }, { "epoch": 1.7761373218803151, "grad_norm": 0.6024234605188769, "learning_rate": 1.6226999193983405e-06, "loss": 3.4357, "step": 7766 }, { "epoch": 1.7765946803493076, "grad_norm": 0.5972511564190073, "learning_rate": 1.6161422727134028e-06, "loss": 3.5122, "step": 7768 }, { "epoch": 1.7770520388183, "grad_norm": 0.4369251016203628, "learning_rate": 1.609597460611098e-06, "loss": 3.2938, "step": 7770 }, { "epoch": 1.7775093972872926, "grad_norm": 0.5379795585474998, "learning_rate": 1.6030654866836415e-06, "loss": 3.3783, "step": 7772 }, { "epoch": 1.7779667557562853, "grad_norm": 0.5475450770228822, "learning_rate": 1.5965463545162152e-06, "loss": 3.5476, "step": 7774 }, { "epoch": 1.7784241142252777, "grad_norm": 0.49565534669541134, "learning_rate": 1.5900400676869348e-06, "loss": 3.5354, "step": 7776 }, { "epoch": 1.77888147269427, "grad_norm": 0.5433175592629044, "learning_rate": 1.5835466297668717e-06, "loss": 3.5681, "step": 7778 }, { "epoch": 1.7793388311632627, "grad_norm": 0.5105631935879819, "learning_rate": 1.5770660443200552e-06, "loss": 3.457, "step": 7780 }, { "epoch": 1.7797961896322552, "grad_norm": 0.5010493116000427, "learning_rate": 1.5705983149034486e-06, "loss": 3.578, "step": 7782 }, { "epoch": 1.7802535481012476, "grad_norm": 0.4653546707073854, "learning_rate": 1.5641434450669596e-06, "loss": 3.4599, "step": 7784 }, { "epoch": 1.7807109065702402, "grad_norm": 0.4574644367018063, "learning_rate": 1.5577014383534455e-06, "loss": 3.3799, "step": 7786 }, { "epoch": 1.7811682650392329, "grad_norm": 0.5481548863596184, "learning_rate": 1.5512722982987004e-06, "loss": 3.3912, "step": 7788 }, { "epoch": 1.7816256235082253, "grad_norm": 0.503988983696175, "learning_rate": 1.5448560284314434e-06, "loss": 3.4752, "step": 7790 }, { "epoch": 1.7820829819772177, "grad_norm": 0.41866476273230313, "learning_rate": 1.5384526322733546e-06, "loss": 3.3764, "step": 7792 }, { "epoch": 1.7825403404462103, "grad_norm": 0.40557309757612536, "learning_rate": 1.5320621133390307e-06, "loss": 3.4129, "step": 7794 }, { "epoch": 1.782997698915203, "grad_norm": 0.5231926188639475, "learning_rate": 1.5256844751359966e-06, "loss": 3.3257, "step": 7796 }, { "epoch": 1.7834550573841954, "grad_norm": 0.4134593565419243, "learning_rate": 1.519319721164722e-06, "loss": 3.4337, "step": 7798 }, { "epoch": 1.7839124158531878, "grad_norm": 0.49268207224675276, "learning_rate": 1.5129678549186011e-06, "loss": 3.2904, "step": 7800 }, { "epoch": 1.7843697743221805, "grad_norm": 0.4328500194330994, "learning_rate": 1.5066288798839396e-06, "loss": 3.4393, "step": 7802 }, { "epoch": 1.784827132791173, "grad_norm": 0.4764479827868581, "learning_rate": 1.5003027995399932e-06, "loss": 3.238, "step": 7804 }, { "epoch": 1.7852844912601655, "grad_norm": 0.5741506782717752, "learning_rate": 1.4939896173589179e-06, "loss": 3.4775, "step": 7806 }, { "epoch": 1.785741849729158, "grad_norm": 0.46900661120299725, "learning_rate": 1.4876893368057975e-06, "loss": 3.4635, "step": 7808 }, { "epoch": 1.7861992081981506, "grad_norm": 0.6135268964646512, "learning_rate": 1.4814019613386437e-06, "loss": 3.4955, "step": 7810 }, { "epoch": 1.7866565666671432, "grad_norm": 0.4617066189742393, "learning_rate": 1.4751274944083737e-06, "loss": 3.4338, "step": 7812 }, { "epoch": 1.7871139251361357, "grad_norm": 0.4199600379812865, "learning_rate": 1.468865939458819e-06, "loss": 3.3016, "step": 7814 }, { "epoch": 1.787571283605128, "grad_norm": 0.4196972090733541, "learning_rate": 1.4626172999267357e-06, "loss": 3.5598, "step": 7816 }, { "epoch": 1.7880286420741207, "grad_norm": 0.6346818604587351, "learning_rate": 1.4563815792417779e-06, "loss": 3.4953, "step": 7818 }, { "epoch": 1.7884860005431131, "grad_norm": 0.47770042267593615, "learning_rate": 1.4501587808265132e-06, "loss": 3.3738, "step": 7820 }, { "epoch": 1.7889433590121055, "grad_norm": 0.43994137237921277, "learning_rate": 1.4439489080964292e-06, "loss": 3.2784, "step": 7822 }, { "epoch": 1.7894007174810982, "grad_norm": 0.4185483959613396, "learning_rate": 1.4377519644598964e-06, "loss": 3.3342, "step": 7824 }, { "epoch": 1.7898580759500908, "grad_norm": 0.6102992409214635, "learning_rate": 1.4315679533182081e-06, "loss": 3.2763, "step": 7826 }, { "epoch": 1.7903154344190833, "grad_norm": 0.4897400693056992, "learning_rate": 1.4253968780655435e-06, "loss": 3.4103, "step": 7828 }, { "epoch": 1.7907727928880757, "grad_norm": 0.5370659273087723, "learning_rate": 1.4192387420890018e-06, "loss": 3.5422, "step": 7830 }, { "epoch": 1.7912301513570683, "grad_norm": 0.3966055811634553, "learning_rate": 1.4130935487685625e-06, "loss": 3.1575, "step": 7832 }, { "epoch": 1.791687509826061, "grad_norm": 0.4638527637046136, "learning_rate": 1.4069613014771027e-06, "loss": 3.3676, "step": 7834 }, { "epoch": 1.7921448682950534, "grad_norm": 0.551202220878916, "learning_rate": 1.4008420035804077e-06, "loss": 3.3602, "step": 7836 }, { "epoch": 1.7926022267640458, "grad_norm": 0.5164515199887624, "learning_rate": 1.3947356584371407e-06, "loss": 3.4635, "step": 7838 }, { "epoch": 1.7930595852330384, "grad_norm": 0.5650335844996053, "learning_rate": 1.3886422693988626e-06, "loss": 3.4673, "step": 7840 }, { "epoch": 1.793516943702031, "grad_norm": 0.41697708924451277, "learning_rate": 1.3825618398100232e-06, "loss": 3.4066, "step": 7842 }, { "epoch": 1.7939743021710235, "grad_norm": 0.6533784484086262, "learning_rate": 1.3764943730079582e-06, "loss": 3.3942, "step": 7844 }, { "epoch": 1.794431660640016, "grad_norm": 0.414019888358691, "learning_rate": 1.3704398723228818e-06, "loss": 3.3292, "step": 7846 }, { "epoch": 1.7948890191090086, "grad_norm": 0.43192749502114164, "learning_rate": 1.3643983410779076e-06, "loss": 3.4716, "step": 7848 }, { "epoch": 1.7953463775780012, "grad_norm": 0.44580298733452645, "learning_rate": 1.358369782589014e-06, "loss": 3.3616, "step": 7850 }, { "epoch": 1.7958037360469936, "grad_norm": 0.5883712831375335, "learning_rate": 1.352354200165068e-06, "loss": 3.3811, "step": 7852 }, { "epoch": 1.796261094515986, "grad_norm": 0.4265007624121825, "learning_rate": 1.3463515971078144e-06, "loss": 3.5347, "step": 7854 }, { "epoch": 1.7967184529849787, "grad_norm": 0.5027254152722233, "learning_rate": 1.3403619767118736e-06, "loss": 3.2791, "step": 7856 }, { "epoch": 1.7971758114539713, "grad_norm": 0.43088126382018077, "learning_rate": 1.3343853422647324e-06, "loss": 3.3564, "step": 7858 }, { "epoch": 1.7976331699229637, "grad_norm": 0.48203529282022944, "learning_rate": 1.328421697046764e-06, "loss": 3.6435, "step": 7860 }, { "epoch": 1.7980905283919562, "grad_norm": 0.4402402313844796, "learning_rate": 1.3224710443311999e-06, "loss": 3.3478, "step": 7862 }, { "epoch": 1.7985478868609488, "grad_norm": 0.6562131008491983, "learning_rate": 1.3165333873841446e-06, "loss": 3.4434, "step": 7864 }, { "epoch": 1.7990052453299412, "grad_norm": 0.7183046204949554, "learning_rate": 1.3106087294645768e-06, "loss": 3.4365, "step": 7866 }, { "epoch": 1.7994626037989336, "grad_norm": 0.5757521571517731, "learning_rate": 1.3046970738243319e-06, "loss": 3.3988, "step": 7868 }, { "epoch": 1.7999199622679263, "grad_norm": 0.41754544293480983, "learning_rate": 1.2987984237081058e-06, "loss": 3.4707, "step": 7870 }, { "epoch": 1.800377320736919, "grad_norm": 0.3831566217574844, "learning_rate": 1.2929127823534697e-06, "loss": 3.3771, "step": 7872 }, { "epoch": 1.8008346792059113, "grad_norm": 0.47202960948791606, "learning_rate": 1.28704015299084e-06, "loss": 3.2562, "step": 7874 }, { "epoch": 1.8012920376749038, "grad_norm": 0.5293347109401146, "learning_rate": 1.281180538843499e-06, "loss": 3.416, "step": 7876 }, { "epoch": 1.8017493961438964, "grad_norm": 0.473221448820966, "learning_rate": 1.2753339431275878e-06, "loss": 3.3314, "step": 7878 }, { "epoch": 1.802206754612889, "grad_norm": 0.5826063451164119, "learning_rate": 1.2695003690520985e-06, "loss": 3.4987, "step": 7880 }, { "epoch": 1.8026641130818815, "grad_norm": 0.8487661101170496, "learning_rate": 1.263679819818872e-06, "loss": 3.5716, "step": 7882 }, { "epoch": 1.8031214715508739, "grad_norm": 0.42412344141166997, "learning_rate": 1.2578722986226033e-06, "loss": 3.4948, "step": 7884 }, { "epoch": 1.8035788300198665, "grad_norm": 0.5426124940866524, "learning_rate": 1.252077808650845e-06, "loss": 3.4803, "step": 7886 }, { "epoch": 1.8040361884888592, "grad_norm": 0.44986881820934066, "learning_rate": 1.2462963530839838e-06, "loss": 3.5373, "step": 7888 }, { "epoch": 1.8044935469578516, "grad_norm": 0.544570016233581, "learning_rate": 1.240527935095262e-06, "loss": 3.3967, "step": 7890 }, { "epoch": 1.804950905426844, "grad_norm": 0.5012245977058111, "learning_rate": 1.2347725578507608e-06, "loss": 3.3567, "step": 7892 }, { "epoch": 1.8054082638958366, "grad_norm": 0.5042047725285169, "learning_rate": 1.2290302245094087e-06, "loss": 3.3738, "step": 7894 }, { "epoch": 1.8058656223648293, "grad_norm": 0.49508916711615414, "learning_rate": 1.2233009382229683e-06, "loss": 3.458, "step": 7896 }, { "epoch": 1.8063229808338217, "grad_norm": 0.4915185499823669, "learning_rate": 1.217584702136046e-06, "loss": 3.4169, "step": 7898 }, { "epoch": 1.8067803393028141, "grad_norm": 0.43303641163316176, "learning_rate": 1.2118815193860878e-06, "loss": 3.5007, "step": 7900 }, { "epoch": 1.8072376977718068, "grad_norm": 0.5036365171541963, "learning_rate": 1.206191393103362e-06, "loss": 3.5132, "step": 7902 }, { "epoch": 1.8076950562407992, "grad_norm": 0.5462155044016638, "learning_rate": 1.2005143264109898e-06, "loss": 3.4034, "step": 7904 }, { "epoch": 1.8081524147097916, "grad_norm": 0.4754212268518142, "learning_rate": 1.194850322424912e-06, "loss": 3.4913, "step": 7906 }, { "epoch": 1.8086097731787842, "grad_norm": 0.46933485738222797, "learning_rate": 1.1891993842538974e-06, "loss": 3.3934, "step": 7908 }, { "epoch": 1.8090671316477769, "grad_norm": 0.4884395647890034, "learning_rate": 1.1835615149995594e-06, "loss": 3.3588, "step": 7910 }, { "epoch": 1.8095244901167693, "grad_norm": 0.4873584878154004, "learning_rate": 1.1779367177563173e-06, "loss": 3.3998, "step": 7912 }, { "epoch": 1.8099818485857617, "grad_norm": 0.565152457733129, "learning_rate": 1.172324995611429e-06, "loss": 3.3724, "step": 7914 }, { "epoch": 1.8104392070547544, "grad_norm": 0.4407911487916101, "learning_rate": 1.1667263516449728e-06, "loss": 3.3732, "step": 7916 }, { "epoch": 1.810896565523747, "grad_norm": 0.6409032496503422, "learning_rate": 1.1611407889298515e-06, "loss": 3.3823, "step": 7918 }, { "epoch": 1.8113539239927394, "grad_norm": 0.5410068938173928, "learning_rate": 1.1555683105317821e-06, "loss": 3.3646, "step": 7920 }, { "epoch": 1.8118112824617318, "grad_norm": 0.4261951534581487, "learning_rate": 1.1500089195093045e-06, "loss": 3.3592, "step": 7922 }, { "epoch": 1.8122686409307245, "grad_norm": 0.5026773653110301, "learning_rate": 1.1444626189137746e-06, "loss": 3.4282, "step": 7924 }, { "epoch": 1.8127259993997171, "grad_norm": 0.4905114018080396, "learning_rate": 1.1389294117893607e-06, "loss": 3.6681, "step": 7926 }, { "epoch": 1.8131833578687095, "grad_norm": 0.460936804948703, "learning_rate": 1.1334093011730495e-06, "loss": 3.3745, "step": 7928 }, { "epoch": 1.813640716337702, "grad_norm": 0.6044281662177022, "learning_rate": 1.1279022900946374e-06, "loss": 3.3984, "step": 7930 }, { "epoch": 1.8140980748066946, "grad_norm": 0.5635855584201023, "learning_rate": 1.1224083815767255e-06, "loss": 3.4486, "step": 7932 }, { "epoch": 1.8145554332756872, "grad_norm": 0.5865698390495466, "learning_rate": 1.1169275786347355e-06, "loss": 3.4804, "step": 7934 }, { "epoch": 1.8150127917446797, "grad_norm": 0.5766989931470516, "learning_rate": 1.111459884276883e-06, "loss": 3.3751, "step": 7936 }, { "epoch": 1.815470150213672, "grad_norm": 0.4759971986726266, "learning_rate": 1.1060053015041955e-06, "loss": 3.3838, "step": 7938 }, { "epoch": 1.8159275086826647, "grad_norm": 0.5593998035067779, "learning_rate": 1.1005638333105056e-06, "loss": 3.5339, "step": 7940 }, { "epoch": 1.8163848671516574, "grad_norm": 0.4350873464551365, "learning_rate": 1.0951354826824467e-06, "loss": 3.3033, "step": 7942 }, { "epoch": 1.8168422256206498, "grad_norm": 0.5347362457297062, "learning_rate": 1.0897202525994438e-06, "loss": 3.4227, "step": 7944 }, { "epoch": 1.8172995840896422, "grad_norm": 0.3630466729285259, "learning_rate": 1.0843181460337304e-06, "loss": 3.2098, "step": 7946 }, { "epoch": 1.8177569425586348, "grad_norm": 0.4904333523909993, "learning_rate": 1.0789291659503347e-06, "loss": 3.4121, "step": 7948 }, { "epoch": 1.8182143010276273, "grad_norm": 0.4561607025236509, "learning_rate": 1.0735533153070826e-06, "loss": 3.3887, "step": 7950 }, { "epoch": 1.8186716594966197, "grad_norm": 0.4724080088040703, "learning_rate": 1.068190597054583e-06, "loss": 3.5605, "step": 7952 }, { "epoch": 1.8191290179656123, "grad_norm": 0.4527178612397044, "learning_rate": 1.0628410141362566e-06, "loss": 3.5123, "step": 7954 }, { "epoch": 1.819586376434605, "grad_norm": 0.3754624058968918, "learning_rate": 1.0575045694882908e-06, "loss": 3.3826, "step": 7956 }, { "epoch": 1.8200437349035974, "grad_norm": 0.4922849165165368, "learning_rate": 1.0521812660396785e-06, "loss": 3.3872, "step": 7958 }, { "epoch": 1.8205010933725898, "grad_norm": 0.49377613420976646, "learning_rate": 1.0468711067121966e-06, "loss": 3.4076, "step": 7960 }, { "epoch": 1.8209584518415824, "grad_norm": 0.5708899252978321, "learning_rate": 1.0415740944204027e-06, "loss": 3.4466, "step": 7962 }, { "epoch": 1.821415810310575, "grad_norm": 0.4049863050935758, "learning_rate": 1.0362902320716434e-06, "loss": 3.3574, "step": 7964 }, { "epoch": 1.8218731687795675, "grad_norm": 0.6428618341040117, "learning_rate": 1.0310195225660484e-06, "loss": 3.3422, "step": 7966 }, { "epoch": 1.82233052724856, "grad_norm": 0.47346671426609993, "learning_rate": 1.0257619687965236e-06, "loss": 3.4201, "step": 7968 }, { "epoch": 1.8227878857175526, "grad_norm": 0.5781890215092388, "learning_rate": 1.0205175736487522e-06, "loss": 3.5244, "step": 7970 }, { "epoch": 1.8232452441865452, "grad_norm": 0.46761231643986007, "learning_rate": 1.0152863400012092e-06, "loss": 3.3733, "step": 7972 }, { "epoch": 1.8237026026555376, "grad_norm": 0.7655521280291633, "learning_rate": 1.0100682707251313e-06, "loss": 3.3529, "step": 7974 }, { "epoch": 1.82415996112453, "grad_norm": 0.44278885352208874, "learning_rate": 1.0048633686845305e-06, "loss": 3.265, "step": 7976 }, { "epoch": 1.8246173195935227, "grad_norm": 0.6174301684427264, "learning_rate": 9.996716367362018e-07, "loss": 3.3687, "step": 7978 }, { "epoch": 1.8250746780625153, "grad_norm": 0.43260845580233964, "learning_rate": 9.944930777297023e-07, "loss": 3.342, "step": 7980 }, { "epoch": 1.8255320365315078, "grad_norm": 0.4294196421030754, "learning_rate": 9.89327694507361e-07, "loss": 3.3578, "step": 7982 }, { "epoch": 1.8259893950005002, "grad_norm": 0.47684671963070074, "learning_rate": 9.841754899042794e-07, "loss": 3.4788, "step": 7984 }, { "epoch": 1.8264467534694928, "grad_norm": 0.45585134412528217, "learning_rate": 9.790364667483231e-07, "loss": 3.549, "step": 7986 }, { "epoch": 1.8269041119384852, "grad_norm": 0.5370971400861465, "learning_rate": 9.739106278601169e-07, "loss": 3.4943, "step": 7988 }, { "epoch": 1.8273614704074777, "grad_norm": 0.45645741359746295, "learning_rate": 9.687979760530624e-07, "loss": 3.3555, "step": 7990 }, { "epoch": 1.8278188288764703, "grad_norm": 0.5503487749074486, "learning_rate": 9.6369851413331e-07, "loss": 3.5381, "step": 7992 }, { "epoch": 1.828276187345463, "grad_norm": 0.5474859367375984, "learning_rate": 9.58612244899776e-07, "loss": 3.409, "step": 7994 }, { "epoch": 1.8287335458144554, "grad_norm": 0.43130620023545846, "learning_rate": 9.53539171144141e-07, "loss": 3.2717, "step": 7996 }, { "epoch": 1.8291909042834478, "grad_norm": 0.5349754807834131, "learning_rate": 9.484792956508337e-07, "loss": 3.4077, "step": 7998 }, { "epoch": 1.8296482627524404, "grad_norm": 0.6393623702441658, "learning_rate": 9.434326211970435e-07, "loss": 3.4731, "step": 8000 }, { "epoch": 1.830105621221433, "grad_norm": 0.5707420403739661, "learning_rate": 9.383991505527129e-07, "loss": 3.3815, "step": 8002 }, { "epoch": 1.8305629796904255, "grad_norm": 0.6021516317900876, "learning_rate": 9.333788864805404e-07, "loss": 3.3188, "step": 8004 }, { "epoch": 1.831020338159418, "grad_norm": 0.548734989612678, "learning_rate": 9.283718317359746e-07, "loss": 3.4343, "step": 8006 }, { "epoch": 1.8314776966284105, "grad_norm": 0.4498987441584451, "learning_rate": 9.233779890672061e-07, "loss": 3.3197, "step": 8008 }, { "epoch": 1.8319350550974032, "grad_norm": 0.4238563211792064, "learning_rate": 9.183973612151897e-07, "loss": 3.4249, "step": 8010 }, { "epoch": 1.8323924135663956, "grad_norm": 0.5746650259115482, "learning_rate": 9.134299509136135e-07, "loss": 3.4086, "step": 8012 }, { "epoch": 1.832849772035388, "grad_norm": 0.6365156961574862, "learning_rate": 9.084757608889132e-07, "loss": 3.2547, "step": 8014 }, { "epoch": 1.8333071305043807, "grad_norm": 0.5475006611702468, "learning_rate": 9.03534793860275e-07, "loss": 3.3327, "step": 8016 }, { "epoch": 1.8337644889733733, "grad_norm": 0.4028853502532983, "learning_rate": 8.98607052539624e-07, "loss": 3.368, "step": 8018 }, { "epoch": 1.8342218474423657, "grad_norm": 0.4007157285199031, "learning_rate": 8.936925396316159e-07, "loss": 3.3272, "step": 8020 }, { "epoch": 1.8346792059113581, "grad_norm": 0.48435783355811196, "learning_rate": 8.887912578336683e-07, "loss": 3.5316, "step": 8022 }, { "epoch": 1.8351365643803508, "grad_norm": 0.4507732694768975, "learning_rate": 8.839032098359151e-07, "loss": 3.2471, "step": 8024 }, { "epoch": 1.8355939228493434, "grad_norm": 0.4016541617604914, "learning_rate": 8.790283983212355e-07, "loss": 3.3139, "step": 8026 }, { "epoch": 1.8360512813183358, "grad_norm": 0.5210335886940582, "learning_rate": 8.741668259652475e-07, "loss": 3.2422, "step": 8028 }, { "epoch": 1.8365086397873283, "grad_norm": 0.5075026226915513, "learning_rate": 8.693184954362943e-07, "loss": 3.4262, "step": 8030 }, { "epoch": 1.836965998256321, "grad_norm": 0.5432266062172968, "learning_rate": 8.644834093954556e-07, "loss": 3.4653, "step": 8032 }, { "epoch": 1.8374233567253133, "grad_norm": 0.5194542619839344, "learning_rate": 8.596615704965422e-07, "loss": 3.2897, "step": 8034 }, { "epoch": 1.8378807151943057, "grad_norm": 0.4516467283160176, "learning_rate": 8.548529813860951e-07, "loss": 3.5149, "step": 8036 }, { "epoch": 1.8383380736632984, "grad_norm": 0.6485329982800361, "learning_rate": 8.500576447033753e-07, "loss": 3.4664, "step": 8038 }, { "epoch": 1.838795432132291, "grad_norm": 0.40996865114876285, "learning_rate": 8.452755630803833e-07, "loss": 3.428, "step": 8040 }, { "epoch": 1.8392527906012834, "grad_norm": 0.3923641589654944, "learning_rate": 8.40506739141833e-07, "loss": 3.4604, "step": 8042 }, { "epoch": 1.8397101490702759, "grad_norm": 0.544344250656941, "learning_rate": 8.357511755051589e-07, "loss": 3.6791, "step": 8044 }, { "epoch": 1.8401675075392685, "grad_norm": 0.4565613931620274, "learning_rate": 8.310088747805339e-07, "loss": 3.1941, "step": 8046 }, { "epoch": 1.8406248660082611, "grad_norm": 0.5179516002640988, "learning_rate": 8.262798395708371e-07, "loss": 3.2461, "step": 8048 }, { "epoch": 1.8410822244772536, "grad_norm": 0.38317830261033714, "learning_rate": 8.2156407247167e-07, "loss": 3.5501, "step": 8050 }, { "epoch": 1.841539582946246, "grad_norm": 0.4053154429532563, "learning_rate": 8.16861576071351e-07, "loss": 3.4674, "step": 8052 }, { "epoch": 1.8419969414152386, "grad_norm": 0.5374336512224067, "learning_rate": 8.121723529509212e-07, "loss": 3.3311, "step": 8054 }, { "epoch": 1.8424542998842313, "grad_norm": 0.46728518813879527, "learning_rate": 8.074964056841272e-07, "loss": 3.3843, "step": 8056 }, { "epoch": 1.8429116583532237, "grad_norm": 0.5615441359975891, "learning_rate": 8.028337368374328e-07, "loss": 3.4645, "step": 8058 }, { "epoch": 1.843369016822216, "grad_norm": 0.534494673283997, "learning_rate": 7.981843489700158e-07, "loss": 3.5, "step": 8060 }, { "epoch": 1.8438263752912087, "grad_norm": 0.5850918325661616, "learning_rate": 7.935482446337627e-07, "loss": 3.3208, "step": 8062 }, { "epoch": 1.8442837337602014, "grad_norm": 0.4413983504677196, "learning_rate": 7.889254263732687e-07, "loss": 3.4427, "step": 8064 }, { "epoch": 1.8447410922291938, "grad_norm": 0.6327042663698074, "learning_rate": 7.843158967258374e-07, "loss": 3.4276, "step": 8066 }, { "epoch": 1.8451984506981862, "grad_norm": 0.523554717646439, "learning_rate": 7.797196582214783e-07, "loss": 3.3319, "step": 8068 }, { "epoch": 1.8456558091671789, "grad_norm": 0.4054627677167464, "learning_rate": 7.751367133829068e-07, "loss": 3.4017, "step": 8070 }, { "epoch": 1.8461131676361715, "grad_norm": 0.579558116941108, "learning_rate": 7.705670647255414e-07, "loss": 3.3939, "step": 8072 }, { "epoch": 1.8465705261051637, "grad_norm": 0.5105703732278896, "learning_rate": 7.660107147575008e-07, "loss": 3.4691, "step": 8074 }, { "epoch": 1.8470278845741563, "grad_norm": 0.5271521891474213, "learning_rate": 7.614676659796066e-07, "loss": 3.3856, "step": 8076 }, { "epoch": 1.847485243043149, "grad_norm": 0.4999017022783233, "learning_rate": 7.569379208853839e-07, "loss": 3.4497, "step": 8078 }, { "epoch": 1.8479426015121414, "grad_norm": 0.6510780527755249, "learning_rate": 7.524214819610464e-07, "loss": 3.5255, "step": 8080 }, { "epoch": 1.8483999599811338, "grad_norm": 0.6230571590519544, "learning_rate": 7.479183516855115e-07, "loss": 3.46, "step": 8082 }, { "epoch": 1.8488573184501265, "grad_norm": 0.40631418454710444, "learning_rate": 7.434285325303908e-07, "loss": 3.4545, "step": 8084 }, { "epoch": 1.849314676919119, "grad_norm": 0.611617924552453, "learning_rate": 7.389520269599882e-07, "loss": 3.4454, "step": 8086 }, { "epoch": 1.8497720353881115, "grad_norm": 0.4574309989090923, "learning_rate": 7.344888374312992e-07, "loss": 3.358, "step": 8088 }, { "epoch": 1.850229393857104, "grad_norm": 0.5106969634652082, "learning_rate": 7.300389663940172e-07, "loss": 3.3791, "step": 8090 }, { "epoch": 1.8506867523260966, "grad_norm": 0.5723690941916845, "learning_rate": 7.256024162905217e-07, "loss": 3.5005, "step": 8092 }, { "epoch": 1.8511441107950892, "grad_norm": 0.541949873638587, "learning_rate": 7.211791895558706e-07, "loss": 3.3581, "step": 8094 }, { "epoch": 1.8516014692640816, "grad_norm": 0.5076664641278256, "learning_rate": 7.167692886178301e-07, "loss": 3.5261, "step": 8096 }, { "epoch": 1.852058827733074, "grad_norm": 0.504634921757126, "learning_rate": 7.123727158968335e-07, "loss": 3.395, "step": 8098 }, { "epoch": 1.8525161862020667, "grad_norm": 0.4781823379443397, "learning_rate": 7.079894738060061e-07, "loss": 3.4156, "step": 8100 }, { "epoch": 1.8529735446710593, "grad_norm": 0.5104007491399983, "learning_rate": 7.036195647511623e-07, "loss": 3.3239, "step": 8102 }, { "epoch": 1.8534309031400518, "grad_norm": 0.5117776851349375, "learning_rate": 6.992629911307891e-07, "loss": 3.4392, "step": 8104 }, { "epoch": 1.8538882616090442, "grad_norm": 0.4673849097020322, "learning_rate": 6.949197553360514e-07, "loss": 3.5358, "step": 8106 }, { "epoch": 1.8543456200780368, "grad_norm": 0.4973357865070217, "learning_rate": 6.905898597508121e-07, "loss": 3.4026, "step": 8108 }, { "epoch": 1.8548029785470295, "grad_norm": 0.6681206290706045, "learning_rate": 6.862733067515892e-07, "loss": 3.7181, "step": 8110 }, { "epoch": 1.8552603370160219, "grad_norm": 0.38921607559859556, "learning_rate": 6.819700987075905e-07, "loss": 3.4253, "step": 8112 }, { "epoch": 1.8557176954850143, "grad_norm": 0.4713131920162311, "learning_rate": 6.776802379806991e-07, "loss": 3.5246, "step": 8114 }, { "epoch": 1.856175053954007, "grad_norm": 0.4399085672974245, "learning_rate": 6.7340372692547e-07, "loss": 3.2032, "step": 8116 }, { "epoch": 1.8566324124229994, "grad_norm": 0.48696044811101724, "learning_rate": 6.69140567889126e-07, "loss": 3.2676, "step": 8118 }, { "epoch": 1.8570897708919918, "grad_norm": 0.611793341053467, "learning_rate": 6.648907632115703e-07, "loss": 3.4743, "step": 8120 }, { "epoch": 1.8575471293609844, "grad_norm": 0.5394217634649278, "learning_rate": 6.606543152253702e-07, "loss": 3.3965, "step": 8122 }, { "epoch": 1.858004487829977, "grad_norm": 0.5866449789712973, "learning_rate": 6.564312262557659e-07, "loss": 3.3662, "step": 8124 }, { "epoch": 1.8584618462989695, "grad_norm": 0.5082747189805507, "learning_rate": 6.522214986206615e-07, "loss": 3.4919, "step": 8126 }, { "epoch": 1.858919204767962, "grad_norm": 0.4918557214412503, "learning_rate": 6.48025134630631e-07, "loss": 3.5703, "step": 8128 }, { "epoch": 1.8593765632369545, "grad_norm": 0.4789713966155637, "learning_rate": 6.438421365889124e-07, "loss": 3.471, "step": 8130 }, { "epoch": 1.8598339217059472, "grad_norm": 0.5332473624745887, "learning_rate": 6.396725067914028e-07, "loss": 3.3072, "step": 8132 }, { "epoch": 1.8602912801749396, "grad_norm": 0.40509981779689075, "learning_rate": 6.355162475266713e-07, "loss": 3.3769, "step": 8134 }, { "epoch": 1.860748638643932, "grad_norm": 0.6012965518591807, "learning_rate": 6.313733610759404e-07, "loss": 3.2558, "step": 8136 }, { "epoch": 1.8612059971129247, "grad_norm": 0.553818976409725, "learning_rate": 6.272438497130966e-07, "loss": 3.4472, "step": 8138 }, { "epoch": 1.8616633555819173, "grad_norm": 0.5576073801009935, "learning_rate": 6.231277157046883e-07, "loss": 3.3609, "step": 8140 }, { "epoch": 1.8621207140509097, "grad_norm": 0.5103669401845564, "learning_rate": 6.190249613099136e-07, "loss": 3.2827, "step": 8142 }, { "epoch": 1.8625780725199022, "grad_norm": 0.5795405816062017, "learning_rate": 6.149355887806296e-07, "loss": 3.3481, "step": 8144 }, { "epoch": 1.8630354309888948, "grad_norm": 0.4145728870334549, "learning_rate": 6.10859600361352e-07, "loss": 3.3606, "step": 8146 }, { "epoch": 1.8634927894578874, "grad_norm": 0.5056100179199613, "learning_rate": 6.067969982892497e-07, "loss": 3.4819, "step": 8148 }, { "epoch": 1.8639501479268799, "grad_norm": 0.5859666435623331, "learning_rate": 6.027477847941415e-07, "loss": 3.2792, "step": 8150 }, { "epoch": 1.8644075063958723, "grad_norm": 0.41073429609799983, "learning_rate": 5.987119620984999e-07, "loss": 3.347, "step": 8152 }, { "epoch": 1.864864864864865, "grad_norm": 0.5256202791435168, "learning_rate": 5.946895324174473e-07, "loss": 3.4104, "step": 8154 }, { "epoch": 1.8653222233338576, "grad_norm": 0.5927051335155105, "learning_rate": 5.906804979587538e-07, "loss": 3.4494, "step": 8156 }, { "epoch": 1.8657795818028498, "grad_norm": 0.4737257718757929, "learning_rate": 5.866848609228398e-07, "loss": 3.3117, "step": 8158 }, { "epoch": 1.8662369402718424, "grad_norm": 0.4617708399478847, "learning_rate": 5.827026235027705e-07, "loss": 3.5523, "step": 8160 }, { "epoch": 1.866694298740835, "grad_norm": 0.5394046361125682, "learning_rate": 5.787337878842531e-07, "loss": 3.4484, "step": 8162 }, { "epoch": 1.8671516572098275, "grad_norm": 0.5130006008386346, "learning_rate": 5.747783562456476e-07, "loss": 3.2929, "step": 8164 }, { "epoch": 1.8676090156788199, "grad_norm": 0.6086924226857339, "learning_rate": 5.708363307579512e-07, "loss": 3.3788, "step": 8166 }, { "epoch": 1.8680663741478125, "grad_norm": 0.5646708507437246, "learning_rate": 5.669077135848022e-07, "loss": 3.2525, "step": 8168 }, { "epoch": 1.8685237326168052, "grad_norm": 0.44038708939688676, "learning_rate": 5.629925068824843e-07, "loss": 3.4636, "step": 8170 }, { "epoch": 1.8689810910857976, "grad_norm": 0.489368312029977, "learning_rate": 5.590907127999173e-07, "loss": 3.431, "step": 8172 }, { "epoch": 1.86943844955479, "grad_norm": 0.37430729067456686, "learning_rate": 5.552023334786577e-07, "loss": 3.2568, "step": 8174 }, { "epoch": 1.8698958080237826, "grad_norm": 0.4144081975834886, "learning_rate": 5.513273710528983e-07, "loss": 3.4264, "step": 8176 }, { "epoch": 1.8703531664927753, "grad_norm": 0.6344460020122275, "learning_rate": 5.474658276494793e-07, "loss": 3.4847, "step": 8178 }, { "epoch": 1.8708105249617677, "grad_norm": 0.6124940180972964, "learning_rate": 5.436177053878583e-07, "loss": 3.4654, "step": 8180 }, { "epoch": 1.8712678834307601, "grad_norm": 0.572493367885505, "learning_rate": 5.397830063801374e-07, "loss": 3.7249, "step": 8182 }, { "epoch": 1.8717252418997528, "grad_norm": 0.3974945841426294, "learning_rate": 5.359617327310551e-07, "loss": 3.5398, "step": 8184 }, { "epoch": 1.8721826003687454, "grad_norm": 0.4609506572849873, "learning_rate": 5.321538865379671e-07, "loss": 3.2795, "step": 8186 }, { "epoch": 1.8726399588377378, "grad_norm": 0.5198579205759257, "learning_rate": 5.283594698908684e-07, "loss": 3.3265, "step": 8188 }, { "epoch": 1.8730973173067302, "grad_norm": 0.44416155065461294, "learning_rate": 5.245784848723873e-07, "loss": 3.2017, "step": 8190 }, { "epoch": 1.8735546757757229, "grad_norm": 0.5032758095798667, "learning_rate": 5.208109335577693e-07, "loss": 3.4846, "step": 8192 }, { "epoch": 1.8740120342447155, "grad_norm": 0.5098071611224714, "learning_rate": 5.17056818014891e-07, "loss": 3.2615, "step": 8194 }, { "epoch": 1.874469392713708, "grad_norm": 0.4796413337286478, "learning_rate": 5.133161403042597e-07, "loss": 3.4786, "step": 8196 }, { "epoch": 1.8749267511827004, "grad_norm": 0.44547635125800306, "learning_rate": 5.095889024789996e-07, "loss": 3.4436, "step": 8198 }, { "epoch": 1.875384109651693, "grad_norm": 0.4726687658244941, "learning_rate": 5.058751065848605e-07, "loss": 3.2792, "step": 8200 }, { "epoch": 1.8758414681206854, "grad_norm": 0.542487637998658, "learning_rate": 5.021747546602173e-07, "loss": 3.4293, "step": 8202 }, { "epoch": 1.8762988265896778, "grad_norm": 0.4469473260090913, "learning_rate": 4.98487848736065e-07, "loss": 3.4127, "step": 8204 }, { "epoch": 1.8767561850586705, "grad_norm": 0.6105965572749299, "learning_rate": 4.948143908360125e-07, "loss": 3.4169, "step": 8206 }, { "epoch": 1.8772135435276631, "grad_norm": 0.670731391822081, "learning_rate": 4.911543829762999e-07, "loss": 3.6233, "step": 8208 }, { "epoch": 1.8776709019966555, "grad_norm": 0.6186722388677711, "learning_rate": 4.875078271657729e-07, "loss": 3.5485, "step": 8210 }, { "epoch": 1.878128260465648, "grad_norm": 0.5472607206006403, "learning_rate": 4.838747254058973e-07, "loss": 3.3834, "step": 8212 }, { "epoch": 1.8785856189346406, "grad_norm": 0.5438427922666144, "learning_rate": 4.802550796907584e-07, "loss": 3.43, "step": 8214 }, { "epoch": 1.8790429774036332, "grad_norm": 0.6461187752209643, "learning_rate": 4.766488920070561e-07, "loss": 3.5619, "step": 8216 }, { "epoch": 1.8795003358726257, "grad_norm": 0.46648679194401627, "learning_rate": 4.7305616433409307e-07, "loss": 3.2496, "step": 8218 }, { "epoch": 1.879957694341618, "grad_norm": 0.426669602694443, "learning_rate": 4.694768986437975e-07, "loss": 3.443, "step": 8220 }, { "epoch": 1.8804150528106107, "grad_norm": 0.4452420898454528, "learning_rate": 4.659110969007036e-07, "loss": 3.235, "step": 8222 }, { "epoch": 1.8808724112796034, "grad_norm": 0.4976191011274204, "learning_rate": 4.623587610619512e-07, "loss": 3.1992, "step": 8224 }, { "epoch": 1.8813297697485958, "grad_norm": 0.47647308073529443, "learning_rate": 4.5881989307729743e-07, "loss": 3.4432, "step": 8226 }, { "epoch": 1.8817871282175882, "grad_norm": 0.4433514934629077, "learning_rate": 4.5529449488910247e-07, "loss": 3.3357, "step": 8228 }, { "epoch": 1.8822444866865808, "grad_norm": 0.45847998967595704, "learning_rate": 4.517825684323324e-07, "loss": 3.3599, "step": 8230 }, { "epoch": 1.8827018451555735, "grad_norm": 0.6054026906062393, "learning_rate": 4.482841156345646e-07, "loss": 3.5132, "step": 8232 }, { "epoch": 1.883159203624566, "grad_norm": 0.41560712646830694, "learning_rate": 4.4479913841597445e-07, "loss": 3.2713, "step": 8234 }, { "epoch": 1.8836165620935583, "grad_norm": 0.6178055819103337, "learning_rate": 4.413276386893428e-07, "loss": 3.4187, "step": 8236 }, { "epoch": 1.884073920562551, "grad_norm": 0.4675207072532172, "learning_rate": 4.378696183600567e-07, "loss": 3.5124, "step": 8238 }, { "epoch": 1.8845312790315436, "grad_norm": 0.38148697226642314, "learning_rate": 4.344250793261062e-07, "loss": 3.4023, "step": 8240 }, { "epoch": 1.8849886375005358, "grad_norm": 0.5548081871158157, "learning_rate": 4.309940234780735e-07, "loss": 3.6169, "step": 8242 }, { "epoch": 1.8854459959695284, "grad_norm": 0.6669888810484937, "learning_rate": 4.2757645269914657e-07, "loss": 3.4186, "step": 8244 }, { "epoch": 1.885903354438521, "grad_norm": 0.37593569843117414, "learning_rate": 4.2417236886511115e-07, "loss": 3.5561, "step": 8246 }, { "epoch": 1.8863607129075135, "grad_norm": 0.4828120981603635, "learning_rate": 4.2078177384435035e-07, "loss": 3.2684, "step": 8248 }, { "epoch": 1.886818071376506, "grad_norm": 0.49657807025061107, "learning_rate": 4.174046694978395e-07, "loss": 3.493, "step": 8250 }, { "epoch": 1.8872754298454986, "grad_norm": 0.4968888076927007, "learning_rate": 4.140410576791598e-07, "loss": 3.4065, "step": 8252 }, { "epoch": 1.8877327883144912, "grad_norm": 0.4803346896583703, "learning_rate": 4.106909402344761e-07, "loss": 3.4103, "step": 8254 }, { "epoch": 1.8881901467834836, "grad_norm": 0.4018744907998853, "learning_rate": 4.0735431900255084e-07, "loss": 3.1973, "step": 8256 }, { "epoch": 1.888647505252476, "grad_norm": 0.4688491080733809, "learning_rate": 4.0403119581474134e-07, "loss": 3.2465, "step": 8258 }, { "epoch": 1.8891048637214687, "grad_norm": 0.4981575188498278, "learning_rate": 4.0072157249499143e-07, "loss": 3.4271, "step": 8260 }, { "epoch": 1.8895622221904613, "grad_norm": 0.48627185112954113, "learning_rate": 3.9742545085983685e-07, "loss": 3.456, "step": 8262 }, { "epoch": 1.8900195806594537, "grad_norm": 0.3578828850695929, "learning_rate": 3.9414283271840545e-07, "loss": 3.4642, "step": 8264 }, { "epoch": 1.8904769391284462, "grad_norm": 0.5223554843471174, "learning_rate": 3.908737198724144e-07, "loss": 3.4462, "step": 8266 }, { "epoch": 1.8909342975974388, "grad_norm": 0.47662534455717864, "learning_rate": 3.876181141161589e-07, "loss": 3.4943, "step": 8268 }, { "epoch": 1.8913916560664314, "grad_norm": 0.3451065584282744, "learning_rate": 3.8437601723653184e-07, "loss": 3.3663, "step": 8270 }, { "epoch": 1.8918490145354239, "grad_norm": 0.5960199982901078, "learning_rate": 3.81147431013007e-07, "loss": 3.2903, "step": 8272 }, { "epoch": 1.8923063730044163, "grad_norm": 0.5601739207610242, "learning_rate": 3.779323572176391e-07, "loss": 3.398, "step": 8274 }, { "epoch": 1.892763731473409, "grad_norm": 0.4288718023867523, "learning_rate": 3.747307976150749e-07, "loss": 3.3866, "step": 8276 }, { "epoch": 1.8932210899424016, "grad_norm": 0.49221880374239957, "learning_rate": 3.715427539625338e-07, "loss": 3.4165, "step": 8278 }, { "epoch": 1.893678448411394, "grad_norm": 0.5875153975802465, "learning_rate": 3.683682280098244e-07, "loss": 3.5221, "step": 8280 }, { "epoch": 1.8941358068803864, "grad_norm": 0.48441885165171233, "learning_rate": 3.652072214993335e-07, "loss": 3.4135, "step": 8282 }, { "epoch": 1.894593165349379, "grad_norm": 0.470628930413601, "learning_rate": 3.620597361660261e-07, "loss": 3.3811, "step": 8284 }, { "epoch": 1.8950505238183715, "grad_norm": 0.47382071002589476, "learning_rate": 3.5892577373744517e-07, "loss": 3.4005, "step": 8286 }, { "epoch": 1.8955078822873639, "grad_norm": 0.5282798934959725, "learning_rate": 3.558053359337177e-07, "loss": 3.4188, "step": 8288 }, { "epoch": 1.8959652407563565, "grad_norm": 0.43817968024590626, "learning_rate": 3.526984244675402e-07, "loss": 3.4708, "step": 8290 }, { "epoch": 1.8964225992253492, "grad_norm": 0.4861595597041388, "learning_rate": 3.4960504104418757e-07, "loss": 3.3145, "step": 8292 }, { "epoch": 1.8968799576943416, "grad_norm": 0.4736875679377452, "learning_rate": 3.465251873615072e-07, "loss": 3.4374, "step": 8294 }, { "epoch": 1.897337316163334, "grad_norm": 0.5903843808370807, "learning_rate": 3.434588651099302e-07, "loss": 3.4589, "step": 8296 }, { "epoch": 1.8977946746323267, "grad_norm": 0.5716433999365822, "learning_rate": 3.4040607597244646e-07, "loss": 3.4588, "step": 8298 }, { "epoch": 1.8982520331013193, "grad_norm": 0.4573123914878292, "learning_rate": 3.3736682162462954e-07, "loss": 3.5629, "step": 8300 }, { "epoch": 1.8987093915703117, "grad_norm": 0.4961989433918873, "learning_rate": 3.3434110373462e-07, "loss": 3.291, "step": 8302 }, { "epoch": 1.8991667500393041, "grad_norm": 0.5541781921431077, "learning_rate": 3.3132892396312845e-07, "loss": 3.4361, "step": 8304 }, { "epoch": 1.8996241085082968, "grad_norm": 0.4143244976091064, "learning_rate": 3.2833028396343226e-07, "loss": 3.2977, "step": 8306 }, { "epoch": 1.9000814669772894, "grad_norm": 0.514347111257631, "learning_rate": 3.2534518538138436e-07, "loss": 3.3301, "step": 8308 }, { "epoch": 1.9005388254462818, "grad_norm": 0.5637338668984625, "learning_rate": 3.2237362985539923e-07, "loss": 3.4954, "step": 8310 }, { "epoch": 1.9009961839152743, "grad_norm": 0.4365830178105238, "learning_rate": 3.1941561901645825e-07, "loss": 3.4025, "step": 8312 }, { "epoch": 1.901453542384267, "grad_norm": 0.44999829530393054, "learning_rate": 3.164711544881127e-07, "loss": 3.4885, "step": 8314 }, { "epoch": 1.9019109008532595, "grad_norm": 0.505059039942074, "learning_rate": 3.1354023788647546e-07, "loss": 3.4508, "step": 8316 }, { "epoch": 1.902368259322252, "grad_norm": 0.5165218298952735, "learning_rate": 3.106228708202208e-07, "loss": 3.4039, "step": 8318 }, { "epoch": 1.9028256177912444, "grad_norm": 0.4508668933876769, "learning_rate": 3.0771905489059835e-07, "loss": 3.4118, "step": 8320 }, { "epoch": 1.903282976260237, "grad_norm": 0.5872042631930667, "learning_rate": 3.048287916914028e-07, "loss": 3.5113, "step": 8322 }, { "epoch": 1.9037403347292297, "grad_norm": 0.5912420107763439, "learning_rate": 3.019520828090011e-07, "loss": 3.4048, "step": 8324 }, { "epoch": 1.904197693198222, "grad_norm": 0.500524841461911, "learning_rate": 2.9908892982231927e-07, "loss": 3.3191, "step": 8326 }, { "epoch": 1.9046550516672145, "grad_norm": 0.5413976759458727, "learning_rate": 2.962393343028447e-07, "loss": 3.404, "step": 8328 }, { "epoch": 1.9051124101362071, "grad_norm": 0.4445460901984517, "learning_rate": 2.934032978146123e-07, "loss": 3.5586, "step": 8330 }, { "epoch": 1.9055697686051996, "grad_norm": 0.5879042107415888, "learning_rate": 2.9058082191423253e-07, "loss": 3.4885, "step": 8332 }, { "epoch": 1.906027127074192, "grad_norm": 0.5277363198990636, "learning_rate": 2.877719081508606e-07, "loss": 3.3589, "step": 8334 }, { "epoch": 1.9064844855431846, "grad_norm": 0.6159074038773894, "learning_rate": 2.849765580662078e-07, "loss": 3.3351, "step": 8336 }, { "epoch": 1.9069418440121773, "grad_norm": 0.6177838257390463, "learning_rate": 2.821947731945468e-07, "loss": 3.3138, "step": 8338 }, { "epoch": 1.9073992024811697, "grad_norm": 0.4261791399958019, "learning_rate": 2.794265550627062e-07, "loss": 3.5493, "step": 8340 }, { "epoch": 1.907856560950162, "grad_norm": 0.4511570453270829, "learning_rate": 2.766719051900568e-07, "loss": 3.3605, "step": 8342 }, { "epoch": 1.9083139194191547, "grad_norm": 0.5428234427342301, "learning_rate": 2.739308250885336e-07, "loss": 3.2701, "step": 8344 }, { "epoch": 1.9087712778881474, "grad_norm": 0.4108675465675168, "learning_rate": 2.712033162626221e-07, "loss": 3.3931, "step": 8346 }, { "epoch": 1.9092286363571398, "grad_norm": 0.5362889908351457, "learning_rate": 2.6848938020934966e-07, "loss": 3.4438, "step": 8348 }, { "epoch": 1.9096859948261322, "grad_norm": 0.4767236749725356, "learning_rate": 2.6578901841830816e-07, "loss": 3.4362, "step": 8350 }, { "epoch": 1.9101433532951249, "grad_norm": 0.5089915452921981, "learning_rate": 2.631022323716287e-07, "loss": 3.3371, "step": 8352 }, { "epoch": 1.9106007117641175, "grad_norm": 0.65748729323666, "learning_rate": 2.604290235439927e-07, "loss": 3.4848, "step": 8354 }, { "epoch": 1.91105807023311, "grad_norm": 0.5109711930484447, "learning_rate": 2.5776939340263206e-07, "loss": 3.3955, "step": 8356 }, { "epoch": 1.9115154287021023, "grad_norm": 0.4837228630399219, "learning_rate": 2.551233434073291e-07, "loss": 3.2806, "step": 8358 }, { "epoch": 1.911972787171095, "grad_norm": 0.582928190433262, "learning_rate": 2.524908750104027e-07, "loss": 3.428, "step": 8360 }, { "epoch": 1.9124301456400876, "grad_norm": 0.5855165368097649, "learning_rate": 2.498719896567248e-07, "loss": 3.4459, "step": 8362 }, { "epoch": 1.91288750410908, "grad_norm": 0.42625039178936536, "learning_rate": 2.4726668878371217e-07, "loss": 3.3244, "step": 8364 }, { "epoch": 1.9133448625780725, "grad_norm": 0.46590960144704346, "learning_rate": 2.446749738213183e-07, "loss": 3.4802, "step": 8366 }, { "epoch": 1.913802221047065, "grad_norm": 0.3853674891603292, "learning_rate": 2.4209684619204976e-07, "loss": 3.456, "step": 8368 }, { "epoch": 1.9142595795160575, "grad_norm": 0.4904580233039856, "learning_rate": 2.3953230731094953e-07, "loss": 3.3825, "step": 8370 }, { "epoch": 1.91471693798505, "grad_norm": 0.4319597505864069, "learning_rate": 2.369813585856029e-07, "loss": 3.4383, "step": 8372 }, { "epoch": 1.9151742964540426, "grad_norm": 0.442538875984195, "learning_rate": 2.3444400141613422e-07, "loss": 3.282, "step": 8374 }, { "epoch": 1.9156316549230352, "grad_norm": 0.4849627835457149, "learning_rate": 2.3192023719521562e-07, "loss": 3.4068, "step": 8376 }, { "epoch": 1.9160890133920276, "grad_norm": 0.47895924771213183, "learning_rate": 2.2941006730805015e-07, "loss": 3.3722, "step": 8378 }, { "epoch": 1.91654637186102, "grad_norm": 0.5164316399084954, "learning_rate": 2.269134931323802e-07, "loss": 3.2843, "step": 8380 }, { "epoch": 1.9170037303300127, "grad_norm": 0.4946700415118058, "learning_rate": 2.2443051603849296e-07, "loss": 3.439, "step": 8382 }, { "epoch": 1.9174610887990053, "grad_norm": 0.40643491571300805, "learning_rate": 2.219611373892011e-07, "loss": 3.5177, "step": 8384 }, { "epoch": 1.9179184472679978, "grad_norm": 0.4823260347247573, "learning_rate": 2.1950535853986487e-07, "loss": 3.3849, "step": 8386 }, { "epoch": 1.9183758057369902, "grad_norm": 0.3889116668936974, "learning_rate": 2.170631808383783e-07, "loss": 3.4268, "step": 8388 }, { "epoch": 1.9188331642059828, "grad_norm": 0.4603051033901907, "learning_rate": 2.146346056251608e-07, "loss": 3.3297, "step": 8390 }, { "epoch": 1.9192905226749755, "grad_norm": 0.45671381889967405, "learning_rate": 2.122196342331767e-07, "loss": 3.7068, "step": 8392 }, { "epoch": 1.9197478811439679, "grad_norm": 0.5248104139156843, "learning_rate": 2.0981826798791848e-07, "loss": 3.3994, "step": 8394 }, { "epoch": 1.9202052396129603, "grad_norm": 0.3986088814250305, "learning_rate": 2.074305082074124e-07, "loss": 3.3622, "step": 8396 }, { "epoch": 1.920662598081953, "grad_norm": 0.47393766993937514, "learning_rate": 2.0505635620221564e-07, "loss": 3.3381, "step": 8398 }, { "epoch": 1.9211199565509456, "grad_norm": 0.6112478770275732, "learning_rate": 2.026958132754192e-07, "loss": 3.2885, "step": 8400 }, { "epoch": 1.921577315019938, "grad_norm": 0.5509852849843931, "learning_rate": 2.0034888072263947e-07, "loss": 3.271, "step": 8402 }, { "epoch": 1.9220346734889304, "grad_norm": 0.4266372615148644, "learning_rate": 1.9801555983202935e-07, "loss": 3.3897, "step": 8404 }, { "epoch": 1.922492031957923, "grad_norm": 0.5271230165020444, "learning_rate": 1.9569585188426444e-07, "loss": 3.3056, "step": 8406 }, { "epoch": 1.9229493904269157, "grad_norm": 0.4405129432227205, "learning_rate": 1.9338975815255122e-07, "loss": 3.3749, "step": 8408 }, { "epoch": 1.9234067488959081, "grad_norm": 0.40888268883548, "learning_rate": 1.9109727990262728e-07, "loss": 3.3476, "step": 8410 }, { "epoch": 1.9238641073649005, "grad_norm": 0.5636857304023453, "learning_rate": 1.8881841839274715e-07, "loss": 3.4697, "step": 8412 }, { "epoch": 1.9243214658338932, "grad_norm": 0.5007489674558007, "learning_rate": 1.8655317487370762e-07, "loss": 3.3264, "step": 8414 }, { "epoch": 1.9247788243028856, "grad_norm": 0.49161562924119395, "learning_rate": 1.8430155058881404e-07, "loss": 3.424, "step": 8416 }, { "epoch": 1.925236182771878, "grad_norm": 0.5169124136764185, "learning_rate": 1.8206354677390846e-07, "loss": 3.3652, "step": 8418 }, { "epoch": 1.9256935412408707, "grad_norm": 0.5245585038437304, "learning_rate": 1.7983916465734996e-07, "loss": 3.3501, "step": 8420 }, { "epoch": 1.9261508997098633, "grad_norm": 0.4981616467644875, "learning_rate": 1.7762840546002856e-07, "loss": 3.3466, "step": 8422 }, { "epoch": 1.9266082581788557, "grad_norm": 0.434504180333887, "learning_rate": 1.754312703953459e-07, "loss": 3.5712, "step": 8424 }, { "epoch": 1.9270656166478481, "grad_norm": 0.44044207029000026, "learning_rate": 1.732477606692401e-07, "loss": 3.454, "step": 8426 }, { "epoch": 1.9275229751168408, "grad_norm": 0.42356766111379696, "learning_rate": 1.710778774801608e-07, "loss": 3.2843, "step": 8428 }, { "epoch": 1.9279803335858334, "grad_norm": 0.5185366697467843, "learning_rate": 1.6892162201907758e-07, "loss": 3.4038, "step": 8430 }, { "epoch": 1.9284376920548258, "grad_norm": 0.4590560532461942, "learning_rate": 1.6677899546948818e-07, "loss": 3.0913, "step": 8432 }, { "epoch": 1.9288950505238183, "grad_norm": 0.5798425163845959, "learning_rate": 1.6464999900740463e-07, "loss": 3.3888, "step": 8434 }, { "epoch": 1.929352408992811, "grad_norm": 0.48561520391081303, "learning_rate": 1.6253463380135615e-07, "loss": 3.3207, "step": 8436 }, { "epoch": 1.9298097674618035, "grad_norm": 0.45823346661128955, "learning_rate": 1.6043290101239728e-07, "loss": 3.4689, "step": 8438 }, { "epoch": 1.930267125930796, "grad_norm": 0.5048009632696129, "learning_rate": 1.5834480179408862e-07, "loss": 3.3677, "step": 8440 }, { "epoch": 1.9307244843997884, "grad_norm": 0.5260693162003122, "learning_rate": 1.5627033729252173e-07, "loss": 3.5196, "step": 8442 }, { "epoch": 1.931181842868781, "grad_norm": 0.501554546114877, "learning_rate": 1.542095086462969e-07, "loss": 3.4529, "step": 8444 }, { "epoch": 1.9316392013377737, "grad_norm": 0.4657662958139479, "learning_rate": 1.5216231698652606e-07, "loss": 3.492, "step": 8446 }, { "epoch": 1.932096559806766, "grad_norm": 0.5195478852555819, "learning_rate": 1.5012876343684646e-07, "loss": 3.2938, "step": 8448 }, { "epoch": 1.9325539182757585, "grad_norm": 0.47217794129689655, "learning_rate": 1.4810884911340416e-07, "loss": 3.5078, "step": 8450 }, { "epoch": 1.9330112767447512, "grad_norm": 0.5623827653150784, "learning_rate": 1.4610257512485405e-07, "loss": 3.502, "step": 8452 }, { "epoch": 1.9334686352137436, "grad_norm": 0.4995977005179884, "learning_rate": 1.441099425723763e-07, "loss": 3.2709, "step": 8454 }, { "epoch": 1.933925993682736, "grad_norm": 0.46085980940580223, "learning_rate": 1.4213095254965448e-07, "loss": 3.5015, "step": 8456 }, { "epoch": 1.9343833521517286, "grad_norm": 0.42320368952129894, "learning_rate": 1.4016560614288632e-07, "loss": 3.3106, "step": 8458 }, { "epoch": 1.9348407106207213, "grad_norm": 0.530984756225776, "learning_rate": 1.3821390443078398e-07, "loss": 3.4002, "step": 8460 }, { "epoch": 1.9352980690897137, "grad_norm": 0.4205817604512966, "learning_rate": 1.362758484845683e-07, "loss": 3.5311, "step": 8462 }, { "epoch": 1.935755427558706, "grad_norm": 0.4872545663747844, "learning_rate": 1.3435143936796902e-07, "loss": 3.3762, "step": 8464 }, { "epoch": 1.9362127860276988, "grad_norm": 0.4974656846561689, "learning_rate": 1.3244067813722726e-07, "loss": 3.4871, "step": 8466 }, { "epoch": 1.9366701444966914, "grad_norm": 0.5482226796037911, "learning_rate": 1.3054356584109307e-07, "loss": 3.5898, "step": 8468 }, { "epoch": 1.9371275029656838, "grad_norm": 0.45504190849300336, "learning_rate": 1.2866010352082515e-07, "loss": 3.4159, "step": 8470 }, { "epoch": 1.9375848614346762, "grad_norm": 0.3843443253535564, "learning_rate": 1.26790292210191e-07, "loss": 3.342, "step": 8472 }, { "epoch": 1.9380422199036689, "grad_norm": 0.409871811104271, "learning_rate": 1.2493413293546141e-07, "loss": 3.2917, "step": 8474 }, { "epoch": 1.9384995783726615, "grad_norm": 0.5872021823202607, "learning_rate": 1.230916267154242e-07, "loss": 3.3464, "step": 8476 }, { "epoch": 1.938956936841654, "grad_norm": 0.5237306656375638, "learning_rate": 1.2126277456136203e-07, "loss": 3.4477, "step": 8478 }, { "epoch": 1.9394142953106464, "grad_norm": 0.5584247306725737, "learning_rate": 1.1944757747706648e-07, "loss": 3.2856, "step": 8480 }, { "epoch": 1.939871653779639, "grad_norm": 0.458955707060575, "learning_rate": 1.1764603645883777e-07, "loss": 3.4544, "step": 8482 }, { "epoch": 1.9403290122486316, "grad_norm": 0.45704243312097564, "learning_rate": 1.1585815249548215e-07, "loss": 3.304, "step": 8484 }, { "epoch": 1.940786370717624, "grad_norm": 0.4925589446554345, "learning_rate": 1.1408392656830081e-07, "loss": 3.2472, "step": 8486 }, { "epoch": 1.9412437291866165, "grad_norm": 0.4437982848587262, "learning_rate": 1.123233596511064e-07, "loss": 3.31, "step": 8488 }, { "epoch": 1.9417010876556091, "grad_norm": 0.40284536628614925, "learning_rate": 1.1057645271021489e-07, "loss": 3.5022, "step": 8490 }, { "epoch": 1.9421584461246018, "grad_norm": 0.6633690972447436, "learning_rate": 1.0884320670443982e-07, "loss": 3.3836, "step": 8492 }, { "epoch": 1.9426158045935942, "grad_norm": 0.5145154343209393, "learning_rate": 1.0712362258510079e-07, "loss": 3.2658, "step": 8494 }, { "epoch": 1.9430731630625866, "grad_norm": 0.5575256005212563, "learning_rate": 1.054177012960178e-07, "loss": 3.3529, "step": 8496 }, { "epoch": 1.9435305215315792, "grad_norm": 0.4315240452267389, "learning_rate": 1.0372544377350857e-07, "loss": 3.3013, "step": 8498 }, { "epoch": 1.9439878800005717, "grad_norm": 0.49151172870464777, "learning_rate": 1.0204685094639955e-07, "loss": 3.4458, "step": 8500 }, { "epoch": 1.944445238469564, "grad_norm": 0.4278827740762339, "learning_rate": 1.0038192373600652e-07, "loss": 3.168, "step": 8502 }, { "epoch": 1.9449025969385567, "grad_norm": 0.43558632026267613, "learning_rate": 9.87306630561513e-08, "loss": 3.4366, "step": 8504 }, { "epoch": 1.9453599554075494, "grad_norm": 0.5493137478234283, "learning_rate": 9.709306981315614e-08, "loss": 3.5864, "step": 8506 }, { "epoch": 1.9458173138765418, "grad_norm": 0.5404425660512766, "learning_rate": 9.546914490583258e-08, "loss": 3.2952, "step": 8508 }, { "epoch": 1.9462746723455342, "grad_norm": 0.4972428507657726, "learning_rate": 9.385888922550102e-08, "loss": 3.4172, "step": 8510 }, { "epoch": 1.9467320308145268, "grad_norm": 0.5369214925741039, "learning_rate": 9.226230365597666e-08, "loss": 3.6069, "step": 8512 }, { "epoch": 1.9471893892835195, "grad_norm": 0.4814805023765526, "learning_rate": 9.067938907356133e-08, "loss": 3.3415, "step": 8514 }, { "epoch": 1.947646747752512, "grad_norm": 0.4759916263242484, "learning_rate": 8.911014634706838e-08, "loss": 3.3944, "step": 8516 }, { "epoch": 1.9481041062215043, "grad_norm": 0.4677074122968524, "learning_rate": 8.755457633780051e-08, "loss": 3.401, "step": 8518 }, { "epoch": 1.948561464690497, "grad_norm": 0.5364062614229792, "learning_rate": 8.601267989955253e-08, "loss": 3.378, "step": 8520 }, { "epoch": 1.9490188231594896, "grad_norm": 0.4531655457870821, "learning_rate": 8.448445787861692e-08, "loss": 3.3655, "step": 8522 }, { "epoch": 1.949476181628482, "grad_norm": 0.5809894685447325, "learning_rate": 8.29699111137866e-08, "loss": 3.4364, "step": 8524 }, { "epoch": 1.9499335400974744, "grad_norm": 0.49905022177882874, "learning_rate": 8.14690404363383e-08, "loss": 3.4214, "step": 8526 }, { "epoch": 1.950390898566467, "grad_norm": 0.4236423419251686, "learning_rate": 7.998184667004915e-08, "loss": 3.5004, "step": 8528 }, { "epoch": 1.9508482570354597, "grad_norm": 0.4025566500727466, "learning_rate": 7.850833063118845e-08, "loss": 3.3049, "step": 8530 }, { "epoch": 1.9513056155044521, "grad_norm": 0.4385445421023422, "learning_rate": 7.704849312851759e-08, "loss": 3.4601, "step": 8532 }, { "epoch": 1.9517629739734446, "grad_norm": 0.4918872886489554, "learning_rate": 7.560233496329006e-08, "loss": 3.4782, "step": 8534 }, { "epoch": 1.9522203324424372, "grad_norm": 0.5261776499725135, "learning_rate": 7.416985692924872e-08, "loss": 3.4748, "step": 8536 }, { "epoch": 1.9526776909114296, "grad_norm": 0.4613123840178905, "learning_rate": 7.275105981263686e-08, "loss": 3.3505, "step": 8538 }, { "epoch": 1.953135049380422, "grad_norm": 0.4965860072214, "learning_rate": 7.134594439217601e-08, "loss": 3.3663, "step": 8540 }, { "epoch": 1.9535924078494147, "grad_norm": 0.47310380789942386, "learning_rate": 6.995451143909093e-08, "loss": 3.4954, "step": 8542 }, { "epoch": 1.9540497663184073, "grad_norm": 0.46923180296809275, "learning_rate": 6.85767617170846e-08, "loss": 3.3179, "step": 8544 }, { "epoch": 1.9545071247873997, "grad_norm": 0.4968652808240485, "learning_rate": 6.721269598236324e-08, "loss": 3.445, "step": 8546 }, { "epoch": 1.9549644832563922, "grad_norm": 0.6227508846339462, "learning_rate": 6.586231498360573e-08, "loss": 3.305, "step": 8548 }, { "epoch": 1.9554218417253848, "grad_norm": 0.5994139610730456, "learning_rate": 6.452561946199698e-08, "loss": 3.3832, "step": 8550 }, { "epoch": 1.9558792001943774, "grad_norm": 0.5693059164150898, "learning_rate": 6.320261015120011e-08, "loss": 3.4205, "step": 8552 }, { "epoch": 1.9563365586633699, "grad_norm": 0.43921012093702827, "learning_rate": 6.189328777736481e-08, "loss": 3.2606, "step": 8554 }, { "epoch": 1.9567939171323623, "grad_norm": 0.688359438625909, "learning_rate": 6.059765305913845e-08, "loss": 3.44, "step": 8556 }, { "epoch": 1.957251275601355, "grad_norm": 0.5138374032900701, "learning_rate": 5.9315706707646634e-08, "loss": 3.4259, "step": 8558 }, { "epoch": 1.9577086340703476, "grad_norm": 0.5151693180128719, "learning_rate": 5.8047449426501535e-08, "loss": 3.5642, "step": 8560 }, { "epoch": 1.95816599253934, "grad_norm": 0.6300825645068067, "learning_rate": 5.679288191181298e-08, "loss": 3.309, "step": 8562 }, { "epoch": 1.9586233510083324, "grad_norm": 0.48797194793464344, "learning_rate": 5.55520048521635e-08, "loss": 3.5267, "step": 8564 }, { "epoch": 1.959080709477325, "grad_norm": 0.5310292626846976, "learning_rate": 5.432481892862773e-08, "loss": 3.2704, "step": 8566 }, { "epoch": 1.9595380679463177, "grad_norm": 0.5253013903766665, "learning_rate": 5.3111324814766884e-08, "loss": 3.5063, "step": 8568 }, { "epoch": 1.95999542641531, "grad_norm": 0.46359984072138605, "learning_rate": 5.1911523176623177e-08, "loss": 3.4648, "step": 8570 }, { "epoch": 1.9604527848843025, "grad_norm": 0.5208056601484127, "learning_rate": 5.072541467272818e-08, "loss": 3.5653, "step": 8572 }, { "epoch": 1.9609101433532952, "grad_norm": 0.38187628725524814, "learning_rate": 4.9552999954097234e-08, "loss": 3.2728, "step": 8574 }, { "epoch": 1.9613675018222878, "grad_norm": 0.4942571178525763, "learning_rate": 4.8394279664221164e-08, "loss": 3.34, "step": 8576 }, { "epoch": 1.9618248602912802, "grad_norm": 0.5110716185855503, "learning_rate": 4.724925443908845e-08, "loss": 3.3132, "step": 8578 }, { "epoch": 1.9622822187602726, "grad_norm": 0.6733162525185391, "learning_rate": 4.611792490716027e-08, "loss": 3.4732, "step": 8580 }, { "epoch": 1.9627395772292653, "grad_norm": 0.5306929304226732, "learning_rate": 4.5000291689381576e-08, "loss": 3.6847, "step": 8582 }, { "epoch": 1.9631969356982577, "grad_norm": 0.6312466545771718, "learning_rate": 4.3896355399183906e-08, "loss": 3.2746, "step": 8584 }, { "epoch": 1.9636542941672501, "grad_norm": 0.5371477980308563, "learning_rate": 4.28061166424798e-08, "loss": 3.395, "step": 8586 }, { "epoch": 1.9641116526362428, "grad_norm": 0.4586456834803098, "learning_rate": 4.172957601766558e-08, "loss": 3.318, "step": 8588 }, { "epoch": 1.9645690111052354, "grad_norm": 0.5417515675936183, "learning_rate": 4.066673411561306e-08, "loss": 3.2431, "step": 8590 }, { "epoch": 1.9650263695742278, "grad_norm": 0.46142406967895194, "learning_rate": 3.961759151967781e-08, "loss": 3.4812, "step": 8592 }, { "epoch": 1.9654837280432202, "grad_norm": 0.4542683776944216, "learning_rate": 3.858214880570199e-08, "loss": 3.4702, "step": 8594 }, { "epoch": 1.9659410865122129, "grad_norm": 0.44384972096582986, "learning_rate": 3.756040654200321e-08, "loss": 3.3624, "step": 8596 }, { "epoch": 1.9663984449812055, "grad_norm": 0.49099549257555825, "learning_rate": 3.6552365289377334e-08, "loss": 3.5825, "step": 8598 }, { "epoch": 1.966855803450198, "grad_norm": 0.44855740834790997, "learning_rate": 3.555802560110677e-08, "loss": 3.3844, "step": 8600 }, { "epoch": 1.9673131619191904, "grad_norm": 0.5129775950482331, "learning_rate": 3.4577388022946636e-08, "loss": 3.4178, "step": 8602 }, { "epoch": 1.967770520388183, "grad_norm": 0.44923516948380654, "learning_rate": 3.3610453093135815e-08, "loss": 3.2635, "step": 8604 }, { "epoch": 1.9682278788571757, "grad_norm": 0.5871496505273031, "learning_rate": 3.265722134238869e-08, "loss": 3.5091, "step": 8606 }, { "epoch": 1.968685237326168, "grad_norm": 0.4248675069729021, "learning_rate": 3.171769329390617e-08, "loss": 3.4902, "step": 8608 }, { "epoch": 1.9691425957951605, "grad_norm": 0.488735475982906, "learning_rate": 3.079186946335633e-08, "loss": 3.2365, "step": 8610 }, { "epoch": 1.9695999542641531, "grad_norm": 0.5312580220233882, "learning_rate": 2.987975035889656e-08, "loss": 3.4841, "step": 8612 }, { "epoch": 1.9700573127331458, "grad_norm": 0.6017289860205673, "learning_rate": 2.8981336481154198e-08, "loss": 3.454, "step": 8614 }, { "epoch": 1.9705146712021382, "grad_norm": 0.4407822916639934, "learning_rate": 2.8096628323240336e-08, "loss": 3.5349, "step": 8616 }, { "epoch": 1.9709720296711306, "grad_norm": 0.5529618351254184, "learning_rate": 2.7225626370736003e-08, "loss": 3.433, "step": 8618 }, { "epoch": 1.9714293881401233, "grad_norm": 0.47748145631663147, "learning_rate": 2.6368331101706e-08, "loss": 3.2916, "step": 8620 }, { "epoch": 1.971886746609116, "grad_norm": 0.5380175039872159, "learning_rate": 2.5524742986690607e-08, "loss": 3.2187, "step": 8622 }, { "epoch": 1.972344105078108, "grad_norm": 0.3709748730234905, "learning_rate": 2.469486248870556e-08, "loss": 3.3117, "step": 8624 }, { "epoch": 1.9728014635471007, "grad_norm": 0.49082015098632875, "learning_rate": 2.387869006324206e-08, "loss": 3.4005, "step": 8626 }, { "epoch": 1.9732588220160934, "grad_norm": 0.35526330481492735, "learning_rate": 2.3076226158266766e-08, "loss": 3.4603, "step": 8628 }, { "epoch": 1.9737161804850858, "grad_norm": 0.42928569386910403, "learning_rate": 2.2287471214230138e-08, "loss": 3.4088, "step": 8630 }, { "epoch": 1.9741735389540782, "grad_norm": 0.4506814942150869, "learning_rate": 2.1512425664046988e-08, "loss": 3.5376, "step": 8632 }, { "epoch": 1.9746308974230709, "grad_norm": 0.587791104938818, "learning_rate": 2.075108993311592e-08, "loss": 3.268, "step": 8634 }, { "epoch": 1.9750882558920635, "grad_norm": 0.4952222272517707, "learning_rate": 2.0003464439305452e-08, "loss": 3.4813, "step": 8636 }, { "epoch": 1.975545614361056, "grad_norm": 0.547001687770799, "learning_rate": 1.9269549592959567e-08, "loss": 3.4033, "step": 8638 }, { "epoch": 1.9760029728300483, "grad_norm": 0.5223360286670289, "learning_rate": 1.8549345796900485e-08, "loss": 3.4574, "step": 8640 }, { "epoch": 1.976460331299041, "grad_norm": 0.49521607383976957, "learning_rate": 1.784285344642589e-08, "loss": 3.3646, "step": 8642 }, { "epoch": 1.9769176897680336, "grad_norm": 0.5289598637824537, "learning_rate": 1.7150072929300597e-08, "loss": 3.3189, "step": 8644 }, { "epoch": 1.977375048237026, "grad_norm": 0.44934312508429053, "learning_rate": 1.647100462577045e-08, "loss": 3.2683, "step": 8646 }, { "epoch": 1.9778324067060185, "grad_norm": 0.5684985747407546, "learning_rate": 1.580564890854841e-08, "loss": 3.3752, "step": 8648 }, { "epoch": 1.978289765175011, "grad_norm": 0.6696677434929064, "learning_rate": 1.515400614282847e-08, "loss": 3.4847, "step": 8650 }, { "epoch": 1.9787471236440037, "grad_norm": 0.466633293187501, "learning_rate": 1.4516076686271752e-08, "loss": 3.5175, "step": 8652 }, { "epoch": 1.9792044821129962, "grad_norm": 0.4672556262914936, "learning_rate": 1.3891860889020392e-08, "loss": 3.4006, "step": 8654 }, { "epoch": 1.9796618405819886, "grad_norm": 0.49306422063290595, "learning_rate": 1.3281359093678114e-08, "loss": 3.3393, "step": 8656 }, { "epoch": 1.9801191990509812, "grad_norm": 0.4896304905953396, "learning_rate": 1.2684571635335207e-08, "loss": 3.411, "step": 8658 }, { "epoch": 1.9805765575199739, "grad_norm": 0.5720366192263269, "learning_rate": 1.2101498841540771e-08, "loss": 3.3659, "step": 8660 }, { "epoch": 1.9810339159889663, "grad_norm": 0.4018616055915735, "learning_rate": 1.1532141032324917e-08, "loss": 3.3462, "step": 8662 }, { "epoch": 1.9814912744579587, "grad_norm": 0.4973190374823219, "learning_rate": 1.0976498520190448e-08, "loss": 3.6199, "step": 8664 }, { "epoch": 1.9819486329269513, "grad_norm": 0.4835447301963435, "learning_rate": 1.0434571610110077e-08, "loss": 3.4341, "step": 8666 }, { "epoch": 1.9824059913959438, "grad_norm": 0.43584254197967787, "learning_rate": 9.906360599526431e-09, "loss": 3.5657, "step": 8668 }, { "epoch": 1.9828633498649362, "grad_norm": 0.5025302522956713, "learning_rate": 9.391865778357601e-09, "loss": 3.3907, "step": 8670 }, { "epoch": 1.9833207083339288, "grad_norm": 0.4493353816674565, "learning_rate": 8.891087428988809e-09, "loss": 3.3463, "step": 8672 }, { "epoch": 1.9837780668029215, "grad_norm": 0.4157927547441489, "learning_rate": 8.4040258262863e-09, "loss": 3.3807, "step": 8674 }, { "epoch": 1.9842354252719139, "grad_norm": 0.49615356505229063, "learning_rate": 7.930681237575122e-09, "loss": 3.4419, "step": 8676 }, { "epoch": 1.9846927837409063, "grad_norm": 0.5519689105476976, "learning_rate": 7.471053922658567e-09, "loss": 3.3788, "step": 8678 }, { "epoch": 1.985150142209899, "grad_norm": 0.5633628108938563, "learning_rate": 7.025144133812611e-09, "loss": 3.4719, "step": 8680 }, { "epoch": 1.9856075006788916, "grad_norm": 0.5147062555079349, "learning_rate": 6.592952115777595e-09, "loss": 3.6666, "step": 8682 }, { "epoch": 1.986064859147884, "grad_norm": 0.6033251069712908, "learning_rate": 6.174478105774873e-09, "loss": 3.3263, "step": 8684 }, { "epoch": 1.9865222176168764, "grad_norm": 0.4890152411738034, "learning_rate": 5.769722333484606e-09, "loss": 3.4314, "step": 8686 }, { "epoch": 1.986979576085869, "grad_norm": 0.46411402509408395, "learning_rate": 5.378685021062424e-09, "loss": 3.349, "step": 8688 }, { "epoch": 1.9874369345548617, "grad_norm": 0.5496085648845892, "learning_rate": 5.001366383139416e-09, "loss": 3.3452, "step": 8690 }, { "epoch": 1.9878942930238541, "grad_norm": 0.4907182380135756, "learning_rate": 4.637766626811035e-09, "loss": 3.4213, "step": 8692 }, { "epoch": 1.9883516514928465, "grad_norm": 0.42758907828928927, "learning_rate": 4.287885951642645e-09, "loss": 3.3036, "step": 8694 }, { "epoch": 1.9888090099618392, "grad_norm": 0.5150954098884467, "learning_rate": 3.951724549675073e-09, "loss": 3.4393, "step": 8696 }, { "epoch": 1.9892663684308318, "grad_norm": 0.4062889337223136, "learning_rate": 3.629282605413509e-09, "loss": 3.2911, "step": 8698 }, { "epoch": 1.9897237268998242, "grad_norm": 0.4810901128539133, "learning_rate": 3.320560295833053e-09, "loss": 3.3842, "step": 8700 }, { "epoch": 1.9901810853688167, "grad_norm": 0.48613922249785385, "learning_rate": 3.0255577903842704e-09, "loss": 3.5236, "step": 8702 }, { "epoch": 1.9906384438378093, "grad_norm": 0.4866935552012408, "learning_rate": 2.744275250982087e-09, "loss": 3.1802, "step": 8704 }, { "epoch": 1.991095802306802, "grad_norm": 0.5007088524057388, "learning_rate": 2.4767128320113408e-09, "loss": 3.3278, "step": 8706 }, { "epoch": 1.9915531607757941, "grad_norm": 0.606423025904209, "learning_rate": 2.2228706803295586e-09, "loss": 3.3554, "step": 8708 }, { "epoch": 1.9920105192447868, "grad_norm": 0.46995906666803505, "learning_rate": 1.9827489352614026e-09, "loss": 3.3631, "step": 8710 }, { "epoch": 1.9924678777137794, "grad_norm": 0.4809658146554809, "learning_rate": 1.756347728598673e-09, "loss": 3.4817, "step": 8712 }, { "epoch": 1.9929252361827718, "grad_norm": 0.4308793098769786, "learning_rate": 1.5436671846114082e-09, "loss": 3.3152, "step": 8714 }, { "epoch": 1.9933825946517643, "grad_norm": 0.41189508154716237, "learning_rate": 1.3447074200256816e-09, "loss": 3.332, "step": 8716 }, { "epoch": 1.993839953120757, "grad_norm": 0.3434140123938779, "learning_rate": 1.159468544048581e-09, "loss": 3.4472, "step": 8718 }, { "epoch": 1.9942973115897495, "grad_norm": 0.4781226120683466, "learning_rate": 9.879506583487797e-10, "loss": 3.3721, "step": 8720 }, { "epoch": 1.994754670058742, "grad_norm": 0.5920154432299344, "learning_rate": 8.301538570676393e-10, "loss": 3.2953, "step": 8722 }, { "epoch": 1.9952120285277344, "grad_norm": 0.5400628469241122, "learning_rate": 6.860782268108823e-10, "loss": 3.3934, "step": 8724 }, { "epoch": 1.995669386996727, "grad_norm": 0.5779055874960531, "learning_rate": 5.557238466624704e-10, "loss": 3.4832, "step": 8726 }, { "epoch": 1.9961267454657197, "grad_norm": 0.5135477880715323, "learning_rate": 4.3909078816517514e-10, "loss": 3.3407, "step": 8728 }, { "epoch": 1.996584103934712, "grad_norm": 0.5105331897725597, "learning_rate": 3.361791153344562e-10, "loss": 3.3554, "step": 8730 }, { "epoch": 1.9970414624037045, "grad_norm": 0.5411465207616597, "learning_rate": 2.4698888465846115e-10, "loss": 3.5588, "step": 8732 }, { "epoch": 1.9974988208726971, "grad_norm": 0.5880420781215231, "learning_rate": 1.7152014508692305e-10, "loss": 3.4176, "step": 8734 }, { "epoch": 1.9979561793416898, "grad_norm": 0.6484672355727926, "learning_rate": 1.0977293804226296e-10, "loss": 3.5863, "step": 8736 }, { "epoch": 1.9984135378106822, "grad_norm": 0.49974797274763727, "learning_rate": 6.174729741958985e-11, "loss": 3.3429, "step": 8738 }, { "epoch": 1.9988708962796746, "grad_norm": 0.6099848507735399, "learning_rate": 2.7443249572822738e-11, "loss": 3.3886, "step": 8740 }, { "epoch": 1.9993282547486673, "grad_norm": 0.9381506886990069, "learning_rate": 6.860813334119698e-12, "loss": 3.4229, "step": 8742 }, { "epoch": 1.99978561321766, "grad_norm": 0.42894637609783665, "learning_rate": 0.0, "loss": 3.2766, "step": 8744 } ], "logging_steps": 2, "max_steps": 8744, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.191684474667008e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }