{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2457, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002442002442002442, "grad_norm": 2.3893706798553467, "learning_rate": 8.130081300813009e-08, "loss": 1.9268020391464233, "step": 2 }, { "epoch": 0.004884004884004884, "grad_norm": 2.7525486946105957, "learning_rate": 2.439024390243903e-07, "loss": 2.098186492919922, "step": 4 }, { "epoch": 0.007326007326007326, "grad_norm": 11.947481155395508, "learning_rate": 4.0650406504065046e-07, "loss": 2.4138333797454834, "step": 6 }, { "epoch": 0.009768009768009768, "grad_norm": 2.4891531467437744, "learning_rate": 5.691056910569106e-07, "loss": 1.953867793083191, "step": 8 }, { "epoch": 0.01221001221001221, "grad_norm": 3.49385666847229, "learning_rate": 7.317073170731707e-07, "loss": 2.1249871253967285, "step": 10 }, { "epoch": 0.014652014652014652, "grad_norm": 9.621060371398926, "learning_rate": 8.94308943089431e-07, "loss": 1.870603322982788, "step": 12 }, { "epoch": 0.017094017094017096, "grad_norm": 1.2622815370559692, "learning_rate": 1.0569105691056912e-06, "loss": 1.646697998046875, "step": 14 }, { "epoch": 0.019536019536019536, "grad_norm": 16.232858657836914, "learning_rate": 1.2195121951219514e-06, "loss": 1.6898235082626343, "step": 16 }, { "epoch": 0.02197802197802198, "grad_norm": 10.059346199035645, "learning_rate": 1.3821138211382116e-06, "loss": 1.8439620733261108, "step": 18 }, { "epoch": 0.02442002442002442, "grad_norm": 3.1513400077819824, "learning_rate": 1.5447154471544717e-06, "loss": 1.6882305145263672, "step": 20 }, { "epoch": 0.026862026862026864, "grad_norm": 5.707210540771484, "learning_rate": 1.707317073170732e-06, "loss": 1.4086613655090332, "step": 22 }, { "epoch": 0.029304029304029304, "grad_norm": 1.5729589462280273, "learning_rate": 1.8699186991869919e-06, "loss": 1.22359037399292, "step": 24 }, { "epoch": 0.031746031746031744, "grad_norm": 1.6034835577011108, "learning_rate": 2.0325203252032523e-06, "loss": 1.6794222593307495, "step": 26 }, { "epoch": 0.03418803418803419, "grad_norm": 4.907107353210449, "learning_rate": 2.1951219512195125e-06, "loss": 1.7425767183303833, "step": 28 }, { "epoch": 0.03663003663003663, "grad_norm": 3.0787065029144287, "learning_rate": 2.3577235772357727e-06, "loss": 1.1433881521224976, "step": 30 }, { "epoch": 0.03907203907203907, "grad_norm": 2.307734966278076, "learning_rate": 2.5203252032520324e-06, "loss": 1.0014692544937134, "step": 32 }, { "epoch": 0.04151404151404151, "grad_norm": 2.102328062057495, "learning_rate": 2.682926829268293e-06, "loss": 1.558118224143982, "step": 34 }, { "epoch": 0.04395604395604396, "grad_norm": 2.508723020553589, "learning_rate": 2.845528455284553e-06, "loss": 1.2752659320831299, "step": 36 }, { "epoch": 0.0463980463980464, "grad_norm": 1.2697498798370361, "learning_rate": 3.0081300813008134e-06, "loss": 1.5238615274429321, "step": 38 }, { "epoch": 0.04884004884004884, "grad_norm": 9.991412162780762, "learning_rate": 3.1707317073170736e-06, "loss": 1.3837251663208008, "step": 40 }, { "epoch": 0.05128205128205128, "grad_norm": 1.6207857131958008, "learning_rate": 3.3333333333333333e-06, "loss": 1.4677042961120605, "step": 42 }, { "epoch": 0.05372405372405373, "grad_norm": 1.4186246395111084, "learning_rate": 3.495934959349594e-06, "loss": 1.4204754829406738, "step": 44 }, { "epoch": 0.05616605616605617, "grad_norm": 3.8226752281188965, "learning_rate": 3.6585365853658537e-06, "loss": 1.3837416172027588, "step": 46 }, { "epoch": 0.05860805860805861, "grad_norm": 2.2121241092681885, "learning_rate": 3.821138211382115e-06, "loss": 1.0739210844039917, "step": 48 }, { "epoch": 0.06105006105006105, "grad_norm": 2.6986029148101807, "learning_rate": 3.983739837398374e-06, "loss": 0.9827917814254761, "step": 50 }, { "epoch": 0.06349206349206349, "grad_norm": 1.9821456670761108, "learning_rate": 4.146341463414634e-06, "loss": 1.3439877033233643, "step": 52 }, { "epoch": 0.06593406593406594, "grad_norm": 20.799144744873047, "learning_rate": 4.308943089430894e-06, "loss": 1.0445363521575928, "step": 54 }, { "epoch": 0.06837606837606838, "grad_norm": 2.018078088760376, "learning_rate": 4.471544715447155e-06, "loss": 1.3443750143051147, "step": 56 }, { "epoch": 0.07081807081807082, "grad_norm": 1.7203161716461182, "learning_rate": 4.634146341463416e-06, "loss": 1.3475321531295776, "step": 58 }, { "epoch": 0.07326007326007326, "grad_norm": 1.489026665687561, "learning_rate": 4.796747967479675e-06, "loss": 1.4037724733352661, "step": 60 }, { "epoch": 0.0757020757020757, "grad_norm": 5.072752475738525, "learning_rate": 4.959349593495935e-06, "loss": 1.0292410850524902, "step": 62 }, { "epoch": 0.07814407814407814, "grad_norm": 1.9865821599960327, "learning_rate": 5.121951219512195e-06, "loss": 1.6453887224197388, "step": 64 }, { "epoch": 0.08058608058608059, "grad_norm": 8.150779724121094, "learning_rate": 5.2845528455284555e-06, "loss": 1.1829452514648438, "step": 66 }, { "epoch": 0.08302808302808302, "grad_norm": 1.6878677606582642, "learning_rate": 5.447154471544716e-06, "loss": 1.1011936664581299, "step": 68 }, { "epoch": 0.08547008547008547, "grad_norm": 0.6670809388160706, "learning_rate": 5.609756097560977e-06, "loss": 1.1616472005844116, "step": 70 }, { "epoch": 0.08791208791208792, "grad_norm": 1.4617451429367065, "learning_rate": 5.772357723577237e-06, "loss": 1.3839240074157715, "step": 72 }, { "epoch": 0.09035409035409035, "grad_norm": 1.9361579418182373, "learning_rate": 5.934959349593496e-06, "loss": 1.4121818542480469, "step": 74 }, { "epoch": 0.0927960927960928, "grad_norm": 3.0737693309783936, "learning_rate": 6.0975609756097564e-06, "loss": 1.416529893875122, "step": 76 }, { "epoch": 0.09523809523809523, "grad_norm": 1.4835634231567383, "learning_rate": 6.260162601626017e-06, "loss": 1.222318410873413, "step": 78 }, { "epoch": 0.09768009768009768, "grad_norm": 3.82383394241333, "learning_rate": 6.422764227642278e-06, "loss": 1.1652414798736572, "step": 80 }, { "epoch": 0.10012210012210013, "grad_norm": 13.640969276428223, "learning_rate": 6.585365853658538e-06, "loss": 1.0345308780670166, "step": 82 }, { "epoch": 0.10256410256410256, "grad_norm": 9.584553718566895, "learning_rate": 6.747967479674797e-06, "loss": 1.0803658962249756, "step": 84 }, { "epoch": 0.10500610500610501, "grad_norm": 1.267685055732727, "learning_rate": 6.910569105691057e-06, "loss": 1.249168038368225, "step": 86 }, { "epoch": 0.10744810744810745, "grad_norm": 1.5764743089675903, "learning_rate": 7.0731707317073175e-06, "loss": 1.4062129259109497, "step": 88 }, { "epoch": 0.10989010989010989, "grad_norm": 0.6901059746742249, "learning_rate": 7.2357723577235786e-06, "loss": 1.1516574621200562, "step": 90 }, { "epoch": 0.11233211233211234, "grad_norm": 2.498300552368164, "learning_rate": 7.398373983739838e-06, "loss": 1.4968205690383911, "step": 92 }, { "epoch": 0.11477411477411477, "grad_norm": 8.051702499389648, "learning_rate": 7.560975609756098e-06, "loss": 1.1234135627746582, "step": 94 }, { "epoch": 0.11721611721611722, "grad_norm": 2.431464433670044, "learning_rate": 7.723577235772358e-06, "loss": 1.4027729034423828, "step": 96 }, { "epoch": 0.11965811965811966, "grad_norm": 2.105727434158325, "learning_rate": 7.886178861788618e-06, "loss": 1.3487744331359863, "step": 98 }, { "epoch": 0.1221001221001221, "grad_norm": 2.983010768890381, "learning_rate": 8.048780487804879e-06, "loss": 1.074942708015442, "step": 100 }, { "epoch": 0.12454212454212454, "grad_norm": 3.559720039367676, "learning_rate": 8.21138211382114e-06, "loss": 1.0268707275390625, "step": 102 }, { "epoch": 0.12698412698412698, "grad_norm": 1.4598705768585205, "learning_rate": 8.373983739837399e-06, "loss": 0.9993240833282471, "step": 104 }, { "epoch": 0.12942612942612944, "grad_norm": 1.1988660097122192, "learning_rate": 8.536585365853658e-06, "loss": 0.9525761604309082, "step": 106 }, { "epoch": 0.13186813186813187, "grad_norm": 27.485122680664062, "learning_rate": 8.69918699186992e-06, "loss": 1.3531205654144287, "step": 108 }, { "epoch": 0.1343101343101343, "grad_norm": 2.1461856365203857, "learning_rate": 8.86178861788618e-06, "loss": 1.6010525226593018, "step": 110 }, { "epoch": 0.13675213675213677, "grad_norm": 2.505549430847168, "learning_rate": 9.02439024390244e-06, "loss": 1.41323721408844, "step": 112 }, { "epoch": 0.1391941391941392, "grad_norm": 1.478813648223877, "learning_rate": 9.1869918699187e-06, "loss": 1.1879425048828125, "step": 114 }, { "epoch": 0.14163614163614163, "grad_norm": 1.3980270624160767, "learning_rate": 9.34959349593496e-06, "loss": 1.3165570497512817, "step": 116 }, { "epoch": 0.14407814407814407, "grad_norm": 3.0620999336242676, "learning_rate": 9.51219512195122e-06, "loss": 1.2242571115493774, "step": 118 }, { "epoch": 0.14652014652014653, "grad_norm": 1.2746002674102783, "learning_rate": 9.67479674796748e-06, "loss": 1.3259317874908447, "step": 120 }, { "epoch": 0.14896214896214896, "grad_norm": 1.4318238496780396, "learning_rate": 9.837398373983741e-06, "loss": 1.3270224332809448, "step": 122 }, { "epoch": 0.1514041514041514, "grad_norm": 2.3391611576080322, "learning_rate": 1e-05, "loss": 1.3085572719573975, "step": 124 }, { "epoch": 0.15384615384615385, "grad_norm": 1.5901521444320679, "learning_rate": 1.0162601626016262e-05, "loss": 1.1765004396438599, "step": 126 }, { "epoch": 0.1562881562881563, "grad_norm": 1.361893892288208, "learning_rate": 1.0325203252032521e-05, "loss": 1.4069057703018188, "step": 128 }, { "epoch": 0.15873015873015872, "grad_norm": 2.779815673828125, "learning_rate": 1.0487804878048782e-05, "loss": 1.2615665197372437, "step": 130 }, { "epoch": 0.16117216117216118, "grad_norm": 1.3855739831924438, "learning_rate": 1.065040650406504e-05, "loss": 1.0511776208877563, "step": 132 }, { "epoch": 0.16361416361416362, "grad_norm": 1.6709128618240356, "learning_rate": 1.0813008130081301e-05, "loss": 1.1562919616699219, "step": 134 }, { "epoch": 0.16605616605616605, "grad_norm": 2.7451095581054688, "learning_rate": 1.0975609756097562e-05, "loss": 1.1669853925704956, "step": 136 }, { "epoch": 0.1684981684981685, "grad_norm": 1.3229765892028809, "learning_rate": 1.1138211382113821e-05, "loss": 1.132803201675415, "step": 138 }, { "epoch": 0.17094017094017094, "grad_norm": 1.1329905986785889, "learning_rate": 1.1300813008130082e-05, "loss": 1.078572392463684, "step": 140 }, { "epoch": 0.17338217338217338, "grad_norm": 1.5809731483459473, "learning_rate": 1.1463414634146342e-05, "loss": 1.3616305589675903, "step": 142 }, { "epoch": 0.17582417582417584, "grad_norm": 1.0317999124526978, "learning_rate": 1.1626016260162603e-05, "loss": 1.1173185110092163, "step": 144 }, { "epoch": 0.17826617826617827, "grad_norm": 2.4350783824920654, "learning_rate": 1.1788617886178864e-05, "loss": 0.9900561571121216, "step": 146 }, { "epoch": 0.1807081807081807, "grad_norm": 1.8645095825195312, "learning_rate": 1.1951219512195123e-05, "loss": 0.9898566007614136, "step": 148 }, { "epoch": 0.18315018315018314, "grad_norm": 2.060671091079712, "learning_rate": 1.2113821138211384e-05, "loss": 1.3455116748809814, "step": 150 }, { "epoch": 0.1855921855921856, "grad_norm": 4.001134395599365, "learning_rate": 1.2276422764227642e-05, "loss": 1.4696322679519653, "step": 152 }, { "epoch": 0.18803418803418803, "grad_norm": 1.4498295783996582, "learning_rate": 1.2439024390243903e-05, "loss": 1.343530297279358, "step": 154 }, { "epoch": 0.19047619047619047, "grad_norm": 6.848788261413574, "learning_rate": 1.2601626016260164e-05, "loss": 1.6466281414031982, "step": 156 }, { "epoch": 0.19291819291819293, "grad_norm": 3.0440895557403564, "learning_rate": 1.2764227642276423e-05, "loss": 1.1969726085662842, "step": 158 }, { "epoch": 0.19536019536019536, "grad_norm": 3.5766472816467285, "learning_rate": 1.2926829268292684e-05, "loss": 0.961052656173706, "step": 160 }, { "epoch": 0.1978021978021978, "grad_norm": 4.323490619659424, "learning_rate": 1.3089430894308943e-05, "loss": 1.4117612838745117, "step": 162 }, { "epoch": 0.20024420024420025, "grad_norm": 1.282266616821289, "learning_rate": 1.3252032520325204e-05, "loss": 1.319150447845459, "step": 164 }, { "epoch": 0.2026862026862027, "grad_norm": 1.7024965286254883, "learning_rate": 1.3414634146341466e-05, "loss": 1.3318078517913818, "step": 166 }, { "epoch": 0.20512820512820512, "grad_norm": 4.461455821990967, "learning_rate": 1.3577235772357725e-05, "loss": 1.1935322284698486, "step": 168 }, { "epoch": 0.20757020757020758, "grad_norm": 4.874426364898682, "learning_rate": 1.3739837398373986e-05, "loss": 0.9753493666648865, "step": 170 }, { "epoch": 0.21001221001221002, "grad_norm": 1.221576452255249, "learning_rate": 1.3902439024390244e-05, "loss": 1.0886809825897217, "step": 172 }, { "epoch": 0.21245421245421245, "grad_norm": 1.046645998954773, "learning_rate": 1.4065040650406505e-05, "loss": 1.3587074279785156, "step": 174 }, { "epoch": 0.2148962148962149, "grad_norm": 1.0372843742370605, "learning_rate": 1.4227642276422766e-05, "loss": 1.2677640914916992, "step": 176 }, { "epoch": 0.21733821733821734, "grad_norm": 3.766371250152588, "learning_rate": 1.4390243902439025e-05, "loss": 1.3696472644805908, "step": 178 }, { "epoch": 0.21978021978021978, "grad_norm": 0.8646840453147888, "learning_rate": 1.4552845528455286e-05, "loss": 1.0324885845184326, "step": 180 }, { "epoch": 0.2222222222222222, "grad_norm": 2.4293770790100098, "learning_rate": 1.4715447154471545e-05, "loss": 0.9206986427307129, "step": 182 }, { "epoch": 0.22466422466422467, "grad_norm": 1.333274006843567, "learning_rate": 1.4878048780487806e-05, "loss": 1.4569449424743652, "step": 184 }, { "epoch": 0.2271062271062271, "grad_norm": 10.24266529083252, "learning_rate": 1.5040650406504067e-05, "loss": 1.3317943811416626, "step": 186 }, { "epoch": 0.22954822954822954, "grad_norm": 1.4686931371688843, "learning_rate": 1.5203252032520327e-05, "loss": 1.4440950155258179, "step": 188 }, { "epoch": 0.231990231990232, "grad_norm": 3.4735898971557617, "learning_rate": 1.5365853658536586e-05, "loss": 1.4389160871505737, "step": 190 }, { "epoch": 0.23443223443223443, "grad_norm": 1.2915067672729492, "learning_rate": 1.5528455284552847e-05, "loss": 1.383222222328186, "step": 192 }, { "epoch": 0.23687423687423687, "grad_norm": 1.2586745023727417, "learning_rate": 1.5691056910569108e-05, "loss": 1.3772218227386475, "step": 194 }, { "epoch": 0.23931623931623933, "grad_norm": 5.940347194671631, "learning_rate": 1.585365853658537e-05, "loss": 1.152698040008545, "step": 196 }, { "epoch": 0.24175824175824176, "grad_norm": 1.017399787902832, "learning_rate": 1.6016260162601627e-05, "loss": 1.3445426225662231, "step": 198 }, { "epoch": 0.2442002442002442, "grad_norm": 2.1003332138061523, "learning_rate": 1.6178861788617888e-05, "loss": 1.4353071451187134, "step": 200 }, { "epoch": 0.24664224664224665, "grad_norm": 1.2850189208984375, "learning_rate": 1.6341463414634145e-05, "loss": 1.3451241254806519, "step": 202 }, { "epoch": 0.2490842490842491, "grad_norm": 1.576464295387268, "learning_rate": 1.6504065040650406e-05, "loss": 1.0413107872009277, "step": 204 }, { "epoch": 0.2515262515262515, "grad_norm": 2.5853071212768555, "learning_rate": 1.6666666666666667e-05, "loss": 1.319067120552063, "step": 206 }, { "epoch": 0.25396825396825395, "grad_norm": 1.365488052368164, "learning_rate": 1.682926829268293e-05, "loss": 1.1813554763793945, "step": 208 }, { "epoch": 0.2564102564102564, "grad_norm": 2.6422295570373535, "learning_rate": 1.699186991869919e-05, "loss": 0.9516135454177856, "step": 210 }, { "epoch": 0.2588522588522589, "grad_norm": 3.197498321533203, "learning_rate": 1.7154471544715447e-05, "loss": 0.7080205678939819, "step": 212 }, { "epoch": 0.2612942612942613, "grad_norm": 0.6656016111373901, "learning_rate": 1.7317073170731708e-05, "loss": 1.2077349424362183, "step": 214 }, { "epoch": 0.26373626373626374, "grad_norm": 1.1304625272750854, "learning_rate": 1.747967479674797e-05, "loss": 1.340700626373291, "step": 216 }, { "epoch": 0.2661782661782662, "grad_norm": 1.5265967845916748, "learning_rate": 1.7642276422764227e-05, "loss": 1.2717061042785645, "step": 218 }, { "epoch": 0.2686202686202686, "grad_norm": 1.4525116682052612, "learning_rate": 1.7804878048780488e-05, "loss": 1.3473409414291382, "step": 220 }, { "epoch": 0.27106227106227104, "grad_norm": 1.32387113571167, "learning_rate": 1.796747967479675e-05, "loss": 1.0149593353271484, "step": 222 }, { "epoch": 0.27350427350427353, "grad_norm": 3.1132187843322754, "learning_rate": 1.813008130081301e-05, "loss": 1.0153287649154663, "step": 224 }, { "epoch": 0.27594627594627597, "grad_norm": 1.0930202007293701, "learning_rate": 1.829268292682927e-05, "loss": 1.3861629962921143, "step": 226 }, { "epoch": 0.2783882783882784, "grad_norm": 1.287597417831421, "learning_rate": 1.845528455284553e-05, "loss": 1.4393969774246216, "step": 228 }, { "epoch": 0.28083028083028083, "grad_norm": 2.620121717453003, "learning_rate": 1.861788617886179e-05, "loss": 1.3899247646331787, "step": 230 }, { "epoch": 0.28327228327228327, "grad_norm": 8.519104957580566, "learning_rate": 1.878048780487805e-05, "loss": 1.5776143074035645, "step": 232 }, { "epoch": 0.2857142857142857, "grad_norm": 1.6813069581985474, "learning_rate": 1.8943089430894312e-05, "loss": 1.1832704544067383, "step": 234 }, { "epoch": 0.28815628815628813, "grad_norm": 0.8178291320800781, "learning_rate": 1.9105691056910573e-05, "loss": 1.4064499139785767, "step": 236 }, { "epoch": 0.2905982905982906, "grad_norm": 1.324826717376709, "learning_rate": 1.926829268292683e-05, "loss": 1.3489106893539429, "step": 238 }, { "epoch": 0.29304029304029305, "grad_norm": 1.8617048263549805, "learning_rate": 1.943089430894309e-05, "loss": 1.2338833808898926, "step": 240 }, { "epoch": 0.2954822954822955, "grad_norm": 1.1062006950378418, "learning_rate": 1.959349593495935e-05, "loss": 1.1017166376113892, "step": 242 }, { "epoch": 0.2979242979242979, "grad_norm": 1.151795744895935, "learning_rate": 1.975609756097561e-05, "loss": 1.4224822521209717, "step": 244 }, { "epoch": 0.30036630036630035, "grad_norm": 1.1693967580795288, "learning_rate": 1.991869918699187e-05, "loss": 0.9105122089385986, "step": 246 }, { "epoch": 0.3028083028083028, "grad_norm": 2.165432929992676, "learning_rate": 1.9999990914795638e-05, "loss": 1.3537715673446655, "step": 248 }, { "epoch": 0.3052503052503053, "grad_norm": 2.523041009902954, "learning_rate": 1.9999918233270764e-05, "loss": 1.1235604286193848, "step": 250 }, { "epoch": 0.3076923076923077, "grad_norm": 4.936850547790527, "learning_rate": 1.999977287080797e-05, "loss": 1.2547414302825928, "step": 252 }, { "epoch": 0.31013431013431014, "grad_norm": 1.219511866569519, "learning_rate": 1.9999554828581173e-05, "loss": 1.4373202323913574, "step": 254 }, { "epoch": 0.3125763125763126, "grad_norm": 1.137669324874878, "learning_rate": 1.9999264108351216e-05, "loss": 1.3956284523010254, "step": 256 }, { "epoch": 0.315018315018315, "grad_norm": 1.6814566850662231, "learning_rate": 1.999890071246588e-05, "loss": 1.4139020442962646, "step": 258 }, { "epoch": 0.31746031746031744, "grad_norm": 1.1596673727035522, "learning_rate": 1.9998464643859853e-05, "loss": 1.3567984104156494, "step": 260 }, { "epoch": 0.3199023199023199, "grad_norm": 2.8471524715423584, "learning_rate": 1.999795590605471e-05, "loss": 1.6041795015335083, "step": 262 }, { "epoch": 0.32234432234432236, "grad_norm": 3.1703484058380127, "learning_rate": 1.9997374503158877e-05, "loss": 0.9505234956741333, "step": 264 }, { "epoch": 0.3247863247863248, "grad_norm": 1.568231463432312, "learning_rate": 1.9996720439867617e-05, "loss": 1.1375908851623535, "step": 266 }, { "epoch": 0.32722832722832723, "grad_norm": 1.1084926128387451, "learning_rate": 1.9995993721462966e-05, "loss": 1.5744917392730713, "step": 268 }, { "epoch": 0.32967032967032966, "grad_norm": 2.8259096145629883, "learning_rate": 1.9995194353813707e-05, "loss": 1.1887890100479126, "step": 270 }, { "epoch": 0.3321123321123321, "grad_norm": 1.1199963092803955, "learning_rate": 1.999432234337532e-05, "loss": 1.4438523054122925, "step": 272 }, { "epoch": 0.33455433455433453, "grad_norm": 3.761988878250122, "learning_rate": 1.999337769718993e-05, "loss": 1.2220399379730225, "step": 274 }, { "epoch": 0.336996336996337, "grad_norm": 1.841293454170227, "learning_rate": 1.9992360422886246e-05, "loss": 1.1481637954711914, "step": 276 }, { "epoch": 0.33943833943833945, "grad_norm": 1.215539813041687, "learning_rate": 1.9991270528679508e-05, "loss": 1.5834959745407104, "step": 278 }, { "epoch": 0.3418803418803419, "grad_norm": 0.9015586972236633, "learning_rate": 1.9990108023371403e-05, "loss": 1.4441936016082764, "step": 280 }, { "epoch": 0.3443223443223443, "grad_norm": 1.5563743114471436, "learning_rate": 1.9988872916350022e-05, "loss": 1.376705288887024, "step": 282 }, { "epoch": 0.34676434676434675, "grad_norm": 7.034574031829834, "learning_rate": 1.9987565217589756e-05, "loss": 1.4534231424331665, "step": 284 }, { "epoch": 0.3492063492063492, "grad_norm": 0.9118156433105469, "learning_rate": 1.9986184937651227e-05, "loss": 1.2641198635101318, "step": 286 }, { "epoch": 0.3516483516483517, "grad_norm": 3.323513984680176, "learning_rate": 1.9984732087681215e-05, "loss": 1.834381341934204, "step": 288 }, { "epoch": 0.3540903540903541, "grad_norm": 3.959578037261963, "learning_rate": 1.9983206679412542e-05, "loss": 1.1039708852767944, "step": 290 }, { "epoch": 0.35653235653235654, "grad_norm": 0.8664885759353638, "learning_rate": 1.9981608725164002e-05, "loss": 1.4267356395721436, "step": 292 }, { "epoch": 0.358974358974359, "grad_norm": 1.372922658920288, "learning_rate": 1.9979938237840247e-05, "loss": 1.198704481124878, "step": 294 }, { "epoch": 0.3614163614163614, "grad_norm": 2.058027982711792, "learning_rate": 1.9978195230931686e-05, "loss": 1.1538225412368774, "step": 296 }, { "epoch": 0.36385836385836384, "grad_norm": 4.946676254272461, "learning_rate": 1.997637971851438e-05, "loss": 1.5473830699920654, "step": 298 }, { "epoch": 0.3663003663003663, "grad_norm": 2.0882294178009033, "learning_rate": 1.9974491715249917e-05, "loss": 1.357876181602478, "step": 300 }, { "epoch": 0.36874236874236876, "grad_norm": 3.573915958404541, "learning_rate": 1.9972531236385314e-05, "loss": 1.0178381204605103, "step": 302 }, { "epoch": 0.3711843711843712, "grad_norm": 1.7873722314834595, "learning_rate": 1.997049829775287e-05, "loss": 1.327938199043274, "step": 304 }, { "epoch": 0.37362637362637363, "grad_norm": 1.3761481046676636, "learning_rate": 1.996839291577006e-05, "loss": 1.4819612503051758, "step": 306 }, { "epoch": 0.37606837606837606, "grad_norm": 1.9104338884353638, "learning_rate": 1.996621510743938e-05, "loss": 1.418102741241455, "step": 308 }, { "epoch": 0.3785103785103785, "grad_norm": 1.1609731912612915, "learning_rate": 1.9963964890348236e-05, "loss": 1.4227708578109741, "step": 310 }, { "epoch": 0.38095238095238093, "grad_norm": 3.0023646354675293, "learning_rate": 1.9961642282668776e-05, "loss": 1.1034045219421387, "step": 312 }, { "epoch": 0.3833943833943834, "grad_norm": 4.011119842529297, "learning_rate": 1.9959247303157763e-05, "loss": 1.4926037788391113, "step": 314 }, { "epoch": 0.38583638583638585, "grad_norm": 4.095101356506348, "learning_rate": 1.995677997115641e-05, "loss": 0.8862283229827881, "step": 316 }, { "epoch": 0.3882783882783883, "grad_norm": 1.9095430374145508, "learning_rate": 1.9954240306590235e-05, "loss": 1.15045166015625, "step": 318 }, { "epoch": 0.3907203907203907, "grad_norm": 1.4787174463272095, "learning_rate": 1.9951628329968885e-05, "loss": 1.4402953386306763, "step": 320 }, { "epoch": 0.39316239316239315, "grad_norm": 2.3421995639801025, "learning_rate": 1.9948944062385994e-05, "loss": 1.456636667251587, "step": 322 }, { "epoch": 0.3956043956043956, "grad_norm": 1.1577990055084229, "learning_rate": 1.9946187525518986e-05, "loss": 1.4146589040756226, "step": 324 }, { "epoch": 0.398046398046398, "grad_norm": 1.954940676689148, "learning_rate": 1.994335874162892e-05, "loss": 1.3673632144927979, "step": 326 }, { "epoch": 0.4004884004884005, "grad_norm": 1.90268874168396, "learning_rate": 1.9940457733560293e-05, "loss": 1.3601889610290527, "step": 328 }, { "epoch": 0.40293040293040294, "grad_norm": 4.163765907287598, "learning_rate": 1.993748452474088e-05, "loss": 0.9897390007972717, "step": 330 }, { "epoch": 0.4053724053724054, "grad_norm": 3.35142183303833, "learning_rate": 1.9934439139181516e-05, "loss": 0.6906993389129639, "step": 332 }, { "epoch": 0.4078144078144078, "grad_norm": 1.405617356300354, "learning_rate": 1.993132160147593e-05, "loss": 1.1328214406967163, "step": 334 }, { "epoch": 0.41025641025641024, "grad_norm": 1.5693705081939697, "learning_rate": 1.9928131936800514e-05, "loss": 1.4789706468582153, "step": 336 }, { "epoch": 0.4126984126984127, "grad_norm": 1.2349439859390259, "learning_rate": 1.9924870170914157e-05, "loss": 1.0828137397766113, "step": 338 }, { "epoch": 0.41514041514041516, "grad_norm": 5.380833148956299, "learning_rate": 1.9921536330158007e-05, "loss": 1.1599012613296509, "step": 340 }, { "epoch": 0.4175824175824176, "grad_norm": 2.7839314937591553, "learning_rate": 1.9918130441455273e-05, "loss": 1.6682945489883423, "step": 342 }, { "epoch": 0.42002442002442003, "grad_norm": 1.93392813205719, "learning_rate": 1.9914652532311005e-05, "loss": 0.9947870969772339, "step": 344 }, { "epoch": 0.42246642246642246, "grad_norm": 1.5755698680877686, "learning_rate": 1.991110263081186e-05, "loss": 1.315640926361084, "step": 346 }, { "epoch": 0.4249084249084249, "grad_norm": 2.16658878326416, "learning_rate": 1.9907480765625906e-05, "loss": 1.39967679977417, "step": 348 }, { "epoch": 0.42735042735042733, "grad_norm": 2.711895704269409, "learning_rate": 1.9903786966002352e-05, "loss": 0.9204920530319214, "step": 350 }, { "epoch": 0.4297924297924298, "grad_norm": 2.3947465419769287, "learning_rate": 1.9900021261771348e-05, "loss": 1.1823644638061523, "step": 352 }, { "epoch": 0.43223443223443225, "grad_norm": 1.8362082242965698, "learning_rate": 1.9896183683343706e-05, "loss": 1.3596951961517334, "step": 354 }, { "epoch": 0.4346764346764347, "grad_norm": 2.1142735481262207, "learning_rate": 1.989227426171069e-05, "loss": 1.03623628616333, "step": 356 }, { "epoch": 0.4371184371184371, "grad_norm": 4.102887153625488, "learning_rate": 1.9888293028443747e-05, "loss": 1.240249514579773, "step": 358 }, { "epoch": 0.43956043956043955, "grad_norm": 1.5868562459945679, "learning_rate": 1.9884240015694248e-05, "loss": 1.281577467918396, "step": 360 }, { "epoch": 0.442002442002442, "grad_norm": 1.967654824256897, "learning_rate": 1.988011525619325e-05, "loss": 1.1424391269683838, "step": 362 }, { "epoch": 0.4444444444444444, "grad_norm": 1.7741585969924927, "learning_rate": 1.9875918783251207e-05, "loss": 1.2371528148651123, "step": 364 }, { "epoch": 0.4468864468864469, "grad_norm": 2.0856261253356934, "learning_rate": 1.9871650630757716e-05, "loss": 1.4550820589065552, "step": 366 }, { "epoch": 0.44932844932844934, "grad_norm": 1.4822794198989868, "learning_rate": 1.9867310833181234e-05, "loss": 1.1890130043029785, "step": 368 }, { "epoch": 0.4517704517704518, "grad_norm": 0.6963343620300293, "learning_rate": 1.986289942556881e-05, "loss": 1.2029908895492554, "step": 370 }, { "epoch": 0.4542124542124542, "grad_norm": 1.5182689428329468, "learning_rate": 1.9858416443545794e-05, "loss": 1.3851736783981323, "step": 372 }, { "epoch": 0.45665445665445664, "grad_norm": 1.8810380697250366, "learning_rate": 1.9853861923315555e-05, "loss": 1.0434424877166748, "step": 374 }, { "epoch": 0.4590964590964591, "grad_norm": 1.6905688047409058, "learning_rate": 1.984923590165918e-05, "loss": 1.301484227180481, "step": 376 }, { "epoch": 0.46153846153846156, "grad_norm": 2.4930872917175293, "learning_rate": 1.9844538415935187e-05, "loss": 1.0400949716567993, "step": 378 }, { "epoch": 0.463980463980464, "grad_norm": 4.205483913421631, "learning_rate": 1.983976950407922e-05, "loss": 0.9666699767112732, "step": 380 }, { "epoch": 0.46642246642246643, "grad_norm": 1.9438555240631104, "learning_rate": 1.983492920460373e-05, "loss": 1.3446414470672607, "step": 382 }, { "epoch": 0.46886446886446886, "grad_norm": 1.6146860122680664, "learning_rate": 1.983001755659769e-05, "loss": 1.2357232570648193, "step": 384 }, { "epoch": 0.4713064713064713, "grad_norm": 2.4254696369171143, "learning_rate": 1.9825034599726263e-05, "loss": 1.2619645595550537, "step": 386 }, { "epoch": 0.47374847374847373, "grad_norm": 2.857746124267578, "learning_rate": 1.9819980374230468e-05, "loss": 1.6904096603393555, "step": 388 }, { "epoch": 0.47619047619047616, "grad_norm": 1.4608720541000366, "learning_rate": 1.981485492092689e-05, "loss": 0.9965710639953613, "step": 390 }, { "epoch": 0.47863247863247865, "grad_norm": 61.0188102722168, "learning_rate": 1.9809658281207318e-05, "loss": 0.9120445251464844, "step": 392 }, { "epoch": 0.4810744810744811, "grad_norm": 2.121208429336548, "learning_rate": 1.980439049703843e-05, "loss": 1.0203512907028198, "step": 394 }, { "epoch": 0.4835164835164835, "grad_norm": 1.042589783668518, "learning_rate": 1.979905161096144e-05, "loss": 1.3058192729949951, "step": 396 }, { "epoch": 0.48595848595848595, "grad_norm": 1.507728934288025, "learning_rate": 1.9793641666091773e-05, "loss": 1.3444452285766602, "step": 398 }, { "epoch": 0.4884004884004884, "grad_norm": 4.658176422119141, "learning_rate": 1.9788160706118698e-05, "loss": 0.6673938035964966, "step": 400 }, { "epoch": 0.4908424908424908, "grad_norm": 1.1496187448501587, "learning_rate": 1.978260877530499e-05, "loss": 1.3050227165222168, "step": 402 }, { "epoch": 0.4932844932844933, "grad_norm": 0.8402596712112427, "learning_rate": 1.9776985918486552e-05, "loss": 1.4215201139450073, "step": 404 }, { "epoch": 0.49572649572649574, "grad_norm": 8.28558349609375, "learning_rate": 1.9771292181072076e-05, "loss": 0.8944355845451355, "step": 406 }, { "epoch": 0.4981684981684982, "grad_norm": 2.735724449157715, "learning_rate": 1.9765527609042676e-05, "loss": 1.0254771709442139, "step": 408 }, { "epoch": 0.5006105006105006, "grad_norm": 1.9720531702041626, "learning_rate": 1.9759692248951482e-05, "loss": 1.3571816682815552, "step": 410 }, { "epoch": 0.503052503052503, "grad_norm": 14.514373779296875, "learning_rate": 1.975378614792332e-05, "loss": 0.6523332595825195, "step": 412 }, { "epoch": 0.5054945054945055, "grad_norm": 1.5351808071136475, "learning_rate": 1.9747809353654276e-05, "loss": 1.3964738845825195, "step": 414 }, { "epoch": 0.5079365079365079, "grad_norm": 1.1067290306091309, "learning_rate": 1.974176191441135e-05, "loss": 1.3599458932876587, "step": 416 }, { "epoch": 0.5103785103785103, "grad_norm": 0.968450665473938, "learning_rate": 1.973564387903204e-05, "loss": 1.1259132623672485, "step": 418 }, { "epoch": 0.5128205128205128, "grad_norm": 0.7555665373802185, "learning_rate": 1.972945529692398e-05, "loss": 1.3250101804733276, "step": 420 }, { "epoch": 0.5152625152625152, "grad_norm": 1.294765830039978, "learning_rate": 1.97231962180645e-05, "loss": 1.3246148824691772, "step": 422 }, { "epoch": 0.5177045177045178, "grad_norm": 3.749925374984741, "learning_rate": 1.9716866693000248e-05, "loss": 1.3295143842697144, "step": 424 }, { "epoch": 0.5201465201465202, "grad_norm": 1.7079066038131714, "learning_rate": 1.9710466772846784e-05, "loss": 1.1310526132583618, "step": 426 }, { "epoch": 0.5225885225885226, "grad_norm": 1.0455013513565063, "learning_rate": 1.9703996509288153e-05, "loss": 1.341339111328125, "step": 428 }, { "epoch": 0.525030525030525, "grad_norm": 2.6277689933776855, "learning_rate": 1.9697455954576478e-05, "loss": 0.984380841255188, "step": 430 }, { "epoch": 0.5274725274725275, "grad_norm": 3.6414973735809326, "learning_rate": 1.9690845161531532e-05, "loss": 0.6374328136444092, "step": 432 }, { "epoch": 0.5299145299145299, "grad_norm": 0.9854040741920471, "learning_rate": 1.968416418354032e-05, "loss": 1.363136887550354, "step": 434 }, { "epoch": 0.5323565323565324, "grad_norm": 1.02694833278656, "learning_rate": 1.967741307455663e-05, "loss": 1.3728197813034058, "step": 436 }, { "epoch": 0.5347985347985348, "grad_norm": 1.2664965391159058, "learning_rate": 1.967059188910062e-05, "loss": 1.3319021463394165, "step": 438 }, { "epoch": 0.5372405372405372, "grad_norm": 0.8867588043212891, "learning_rate": 1.9663700682258367e-05, "loss": 1.299553394317627, "step": 440 }, { "epoch": 0.5396825396825397, "grad_norm": 13.338286399841309, "learning_rate": 1.9656739509681413e-05, "loss": 1.1493945121765137, "step": 442 }, { "epoch": 0.5421245421245421, "grad_norm": 2.412151575088501, "learning_rate": 1.9649708427586333e-05, "loss": 1.0136598348617554, "step": 444 }, { "epoch": 0.5445665445665445, "grad_norm": 2.4818806648254395, "learning_rate": 1.964260749275427e-05, "loss": 1.1629705429077148, "step": 446 }, { "epoch": 0.5470085470085471, "grad_norm": 1.1341965198516846, "learning_rate": 1.963543676253048e-05, "loss": 1.1858645677566528, "step": 448 }, { "epoch": 0.5494505494505495, "grad_norm": 1.6893372535705566, "learning_rate": 1.962819629482386e-05, "loss": 1.1235462427139282, "step": 450 }, { "epoch": 0.5518925518925519, "grad_norm": 1.6189004182815552, "learning_rate": 1.9620886148106498e-05, "loss": 0.9178623557090759, "step": 452 }, { "epoch": 0.5543345543345544, "grad_norm": 1.3195807933807373, "learning_rate": 1.9613506381413194e-05, "loss": 1.377665400505066, "step": 454 }, { "epoch": 0.5567765567765568, "grad_norm": 1.4087958335876465, "learning_rate": 1.960605705434097e-05, "loss": 1.3081351518630981, "step": 456 }, { "epoch": 0.5592185592185592, "grad_norm": 0.652862012386322, "learning_rate": 1.95985382270486e-05, "loss": 0.8939856290817261, "step": 458 }, { "epoch": 0.5616605616605617, "grad_norm": 3.38787579536438, "learning_rate": 1.9590949960256132e-05, "loss": 1.266584873199463, "step": 460 }, { "epoch": 0.5641025641025641, "grad_norm": 1.0980466604232788, "learning_rate": 1.9583292315244383e-05, "loss": 1.2569012641906738, "step": 462 }, { "epoch": 0.5665445665445665, "grad_norm": 4.170780181884766, "learning_rate": 1.9575565353854448e-05, "loss": 0.641703724861145, "step": 464 }, { "epoch": 0.568986568986569, "grad_norm": 1.1431292295455933, "learning_rate": 1.9567769138487208e-05, "loss": 1.567794680595398, "step": 466 }, { "epoch": 0.5714285714285714, "grad_norm": 1.7932583093643188, "learning_rate": 1.955990373210281e-05, "loss": 1.3980201482772827, "step": 468 }, { "epoch": 0.5738705738705738, "grad_norm": 2.625420570373535, "learning_rate": 1.9551969198220188e-05, "loss": 1.1457037925720215, "step": 470 }, { "epoch": 0.5763125763125763, "grad_norm": 4.886669635772705, "learning_rate": 1.954396560091652e-05, "loss": 1.344892144203186, "step": 472 }, { "epoch": 0.5787545787545788, "grad_norm": 2.250831127166748, "learning_rate": 1.953589300482671e-05, "loss": 0.9534360766410828, "step": 474 }, { "epoch": 0.5811965811965812, "grad_norm": 2.8664050102233887, "learning_rate": 1.9527751475142904e-05, "loss": 1.0838558673858643, "step": 476 }, { "epoch": 0.5836385836385837, "grad_norm": 0.9391406774520874, "learning_rate": 1.951954107761391e-05, "loss": 1.2320207357406616, "step": 478 }, { "epoch": 0.5860805860805861, "grad_norm": 1.4157171249389648, "learning_rate": 1.9511261878544715e-05, "loss": 1.3821120262145996, "step": 480 }, { "epoch": 0.5885225885225885, "grad_norm": 4.214658737182617, "learning_rate": 1.950291394479592e-05, "loss": 0.5741876363754272, "step": 482 }, { "epoch": 0.590964590964591, "grad_norm": 2.0429494380950928, "learning_rate": 1.9494497343783212e-05, "loss": 1.1259833574295044, "step": 484 }, { "epoch": 0.5934065934065934, "grad_norm": 0.9556084275245667, "learning_rate": 1.9486012143476813e-05, "loss": 1.1523076295852661, "step": 486 }, { "epoch": 0.5958485958485958, "grad_norm": 5.83870792388916, "learning_rate": 1.9477458412400934e-05, "loss": 1.0496693849563599, "step": 488 }, { "epoch": 0.5982905982905983, "grad_norm": 1.3661986589431763, "learning_rate": 1.946883621963323e-05, "loss": 1.1105148792266846, "step": 490 }, { "epoch": 0.6007326007326007, "grad_norm": 1.4116313457489014, "learning_rate": 1.946014563480422e-05, "loss": 0.9300603866577148, "step": 492 }, { "epoch": 0.6031746031746031, "grad_norm": 1.300858974456787, "learning_rate": 1.9451386728096758e-05, "loss": 1.0661330223083496, "step": 494 }, { "epoch": 0.6056166056166056, "grad_norm": 1.8178846836090088, "learning_rate": 1.9442559570245433e-05, "loss": 1.304194450378418, "step": 496 }, { "epoch": 0.608058608058608, "grad_norm": 1.6697763204574585, "learning_rate": 1.9433664232536014e-05, "loss": 0.6469916105270386, "step": 498 }, { "epoch": 0.6105006105006106, "grad_norm": 1.206526279449463, "learning_rate": 1.9424700786804877e-05, "loss": 0.9863432049751282, "step": 500 }, { "epoch": 0.612942612942613, "grad_norm": 1.7002737522125244, "learning_rate": 1.9415669305438413e-05, "loss": 1.2856956720352173, "step": 502 }, { "epoch": 0.6153846153846154, "grad_norm": 1.4255826473236084, "learning_rate": 1.9406569861372466e-05, "loss": 1.3286441564559937, "step": 504 }, { "epoch": 0.6178266178266179, "grad_norm": 1.0831611156463623, "learning_rate": 1.9397402528091707e-05, "loss": 1.3130193948745728, "step": 506 }, { "epoch": 0.6202686202686203, "grad_norm": 1.9533292055130005, "learning_rate": 1.9388167379629076e-05, "loss": 1.380988597869873, "step": 508 }, { "epoch": 0.6227106227106227, "grad_norm": 3.5476789474487305, "learning_rate": 1.9378864490565172e-05, "loss": 1.3338630199432373, "step": 510 }, { "epoch": 0.6251526251526252, "grad_norm": 3.0227179527282715, "learning_rate": 1.9369493936027642e-05, "loss": 1.2690256834030151, "step": 512 }, { "epoch": 0.6275946275946276, "grad_norm": 0.7818955779075623, "learning_rate": 1.9360055791690584e-05, "loss": 1.1770192384719849, "step": 514 }, { "epoch": 0.63003663003663, "grad_norm": 0.7348341941833496, "learning_rate": 1.935055013377393e-05, "loss": 1.119304895401001, "step": 516 }, { "epoch": 0.6324786324786325, "grad_norm": 1.465811848640442, "learning_rate": 1.934097703904284e-05, "loss": 1.34721040725708, "step": 518 }, { "epoch": 0.6349206349206349, "grad_norm": 1.2145129442214966, "learning_rate": 1.933133658480707e-05, "loss": 0.9806722402572632, "step": 520 }, { "epoch": 0.6373626373626373, "grad_norm": 2.869335174560547, "learning_rate": 1.9321628848920358e-05, "loss": 1.0333569049835205, "step": 522 }, { "epoch": 0.6398046398046398, "grad_norm": 2.509185552597046, "learning_rate": 1.9311853909779785e-05, "loss": 1.087817907333374, "step": 524 }, { "epoch": 0.6422466422466423, "grad_norm": 1.7746318578720093, "learning_rate": 1.9302011846325156e-05, "loss": 1.3438972234725952, "step": 526 }, { "epoch": 0.6446886446886447, "grad_norm": 0.9185584783554077, "learning_rate": 1.9292102738038347e-05, "loss": 1.38664972782135, "step": 528 }, { "epoch": 0.6471306471306472, "grad_norm": 1.1560609340667725, "learning_rate": 1.9282126664942667e-05, "loss": 1.1136956214904785, "step": 530 }, { "epoch": 0.6495726495726496, "grad_norm": 1.5920125246047974, "learning_rate": 1.927208370760223e-05, "loss": 1.0266146659851074, "step": 532 }, { "epoch": 0.652014652014652, "grad_norm": 2.174090623855591, "learning_rate": 1.9261973947121273e-05, "loss": 1.6666396856307983, "step": 534 }, { "epoch": 0.6544566544566545, "grad_norm": 1.7790899276733398, "learning_rate": 1.925179746514352e-05, "loss": 0.9882057309150696, "step": 536 }, { "epoch": 0.6568986568986569, "grad_norm": 1.3070317506790161, "learning_rate": 1.9241554343851537e-05, "loss": 1.368809461593628, "step": 538 }, { "epoch": 0.6593406593406593, "grad_norm": 1.5976839065551758, "learning_rate": 1.923124466596602e-05, "loss": 1.3585935831069946, "step": 540 }, { "epoch": 0.6617826617826618, "grad_norm": 1.836732268333435, "learning_rate": 1.922086851474519e-05, "loss": 1.0160579681396484, "step": 542 }, { "epoch": 0.6642246642246642, "grad_norm": 4.108547687530518, "learning_rate": 1.9210425973984074e-05, "loss": 1.3244247436523438, "step": 544 }, { "epoch": 0.6666666666666666, "grad_norm": 1.7101798057556152, "learning_rate": 1.9199917128013836e-05, "loss": 1.2471184730529785, "step": 546 }, { "epoch": 0.6691086691086691, "grad_norm": 1.3308701515197754, "learning_rate": 1.918934206170112e-05, "loss": 1.3621915578842163, "step": 548 }, { "epoch": 0.6715506715506715, "grad_norm": 1.1020407676696777, "learning_rate": 1.917870086044734e-05, "loss": 1.230018973350525, "step": 550 }, { "epoch": 0.673992673992674, "grad_norm": 19.01947021484375, "learning_rate": 1.9167993610187988e-05, "loss": 1.0613629817962646, "step": 552 }, { "epoch": 0.6764346764346765, "grad_norm": 1.0684137344360352, "learning_rate": 1.915722039739197e-05, "loss": 1.1644939184188843, "step": 554 }, { "epoch": 0.6788766788766789, "grad_norm": 1.4123005867004395, "learning_rate": 1.9146381309060874e-05, "loss": 0.9099707007408142, "step": 556 }, { "epoch": 0.6813186813186813, "grad_norm": 3.2105636596679688, "learning_rate": 1.913547643272828e-05, "loss": 1.228736400604248, "step": 558 }, { "epoch": 0.6837606837606838, "grad_norm": 0.4815189242362976, "learning_rate": 1.912450585645907e-05, "loss": 1.3034601211547852, "step": 560 }, { "epoch": 0.6862026862026862, "grad_norm": 2.001192569732666, "learning_rate": 1.9113469668848675e-05, "loss": 1.072668433189392, "step": 562 }, { "epoch": 0.6886446886446886, "grad_norm": 1.3243483304977417, "learning_rate": 1.9102367959022417e-05, "loss": 1.3628251552581787, "step": 564 }, { "epoch": 0.6910866910866911, "grad_norm": 1.6034096479415894, "learning_rate": 1.909120081663473e-05, "loss": 1.1910985708236694, "step": 566 }, { "epoch": 0.6935286935286935, "grad_norm": 1.6782633066177368, "learning_rate": 1.9079968331868487e-05, "loss": 1.4165751934051514, "step": 568 }, { "epoch": 0.6959706959706959, "grad_norm": 0.8705784678459167, "learning_rate": 1.9068670595434228e-05, "loss": 1.1330338716506958, "step": 570 }, { "epoch": 0.6984126984126984, "grad_norm": 3.466735601425171, "learning_rate": 1.9057307698569458e-05, "loss": 1.0612688064575195, "step": 572 }, { "epoch": 0.7008547008547008, "grad_norm": 2.736870765686035, "learning_rate": 1.9045879733037907e-05, "loss": 1.4824306964874268, "step": 574 }, { "epoch": 0.7032967032967034, "grad_norm": 1.9692933559417725, "learning_rate": 1.9034386791128766e-05, "loss": 1.28273606300354, "step": 576 }, { "epoch": 0.7057387057387058, "grad_norm": 0.8525418043136597, "learning_rate": 1.9022828965655975e-05, "loss": 1.2495508193969727, "step": 578 }, { "epoch": 0.7081807081807082, "grad_norm": 0.8721325993537903, "learning_rate": 1.9011206349957444e-05, "loss": 1.2048630714416504, "step": 580 }, { "epoch": 0.7106227106227107, "grad_norm": 1.3199268579483032, "learning_rate": 1.899951903789431e-05, "loss": 1.2845754623413086, "step": 582 }, { "epoch": 0.7130647130647131, "grad_norm": 1.1963062286376953, "learning_rate": 1.8987767123850197e-05, "loss": 1.2032135725021362, "step": 584 }, { "epoch": 0.7155067155067155, "grad_norm": 1.1792757511138916, "learning_rate": 1.8975950702730425e-05, "loss": 1.375983715057373, "step": 586 }, { "epoch": 0.717948717948718, "grad_norm": 1.8274788856506348, "learning_rate": 1.8964069869961254e-05, "loss": 1.1112651824951172, "step": 588 }, { "epoch": 0.7203907203907204, "grad_norm": 1.0463271141052246, "learning_rate": 1.8952124721489115e-05, "loss": 1.0283359289169312, "step": 590 }, { "epoch": 0.7228327228327228, "grad_norm": 1.1223207712173462, "learning_rate": 1.8940115353779847e-05, "loss": 0.9025493860244751, "step": 592 }, { "epoch": 0.7252747252747253, "grad_norm": 1.450899600982666, "learning_rate": 1.8928041863817896e-05, "loss": 1.2699706554412842, "step": 594 }, { "epoch": 0.7277167277167277, "grad_norm": 2.5641753673553467, "learning_rate": 1.891590434910554e-05, "loss": 1.0194693803787231, "step": 596 }, { "epoch": 0.7301587301587301, "grad_norm": 0.7553045153617859, "learning_rate": 1.890370290766212e-05, "loss": 1.160589337348938, "step": 598 }, { "epoch": 0.7326007326007326, "grad_norm": 1.1860005855560303, "learning_rate": 1.8891437638023212e-05, "loss": 1.2648638486862183, "step": 600 }, { "epoch": 0.7350427350427351, "grad_norm": 1.1435580253601074, "learning_rate": 1.8879108639239864e-05, "loss": 1.3810834884643555, "step": 602 }, { "epoch": 0.7374847374847375, "grad_norm": 0.9142278432846069, "learning_rate": 1.8866716010877774e-05, "loss": 1.2209972143173218, "step": 604 }, { "epoch": 0.73992673992674, "grad_norm": 3.2111129760742188, "learning_rate": 1.885425985301651e-05, "loss": 1.510741949081421, "step": 606 }, { "epoch": 0.7423687423687424, "grad_norm": 1.5610990524291992, "learning_rate": 1.884174026624868e-05, "loss": 1.3180582523345947, "step": 608 }, { "epoch": 0.7448107448107448, "grad_norm": 0.7449647188186646, "learning_rate": 1.8829157351679116e-05, "loss": 0.9663639664649963, "step": 610 }, { "epoch": 0.7472527472527473, "grad_norm": 1.3256258964538574, "learning_rate": 1.881651121092408e-05, "loss": 1.2966718673706055, "step": 612 }, { "epoch": 0.7496947496947497, "grad_norm": 1.0234135389328003, "learning_rate": 1.880380194611044e-05, "loss": 1.2717726230621338, "step": 614 }, { "epoch": 0.7521367521367521, "grad_norm": 2.811690092086792, "learning_rate": 1.8791029659874817e-05, "loss": 1.0650262832641602, "step": 616 }, { "epoch": 0.7545787545787546, "grad_norm": 1.469228744506836, "learning_rate": 1.877819445536279e-05, "loss": 1.6179522275924683, "step": 618 }, { "epoch": 0.757020757020757, "grad_norm": 2.5131025314331055, "learning_rate": 1.8765296436228043e-05, "loss": 1.1963871717453003, "step": 620 }, { "epoch": 0.7594627594627594, "grad_norm": 1.2842845916748047, "learning_rate": 1.875233570663154e-05, "loss": 0.9286983013153076, "step": 622 }, { "epoch": 0.7619047619047619, "grad_norm": 1.0976072549819946, "learning_rate": 1.8739312371240678e-05, "loss": 1.2990517616271973, "step": 624 }, { "epoch": 0.7643467643467643, "grad_norm": 1.3670490980148315, "learning_rate": 1.8726226535228425e-05, "loss": 1.352059006690979, "step": 626 }, { "epoch": 0.7667887667887668, "grad_norm": 2.016474485397339, "learning_rate": 1.871307830427251e-05, "loss": 1.1491894721984863, "step": 628 }, { "epoch": 0.7692307692307693, "grad_norm": 1.5183488130569458, "learning_rate": 1.8699867784554537e-05, "loss": 1.3350757360458374, "step": 630 }, { "epoch": 0.7716727716727717, "grad_norm": 0.8359405398368835, "learning_rate": 1.868659508275914e-05, "loss": 1.0210474729537964, "step": 632 }, { "epoch": 0.7741147741147741, "grad_norm": 1.0358965396881104, "learning_rate": 1.867326030607311e-05, "loss": 1.0034987926483154, "step": 634 }, { "epoch": 0.7765567765567766, "grad_norm": 2.3178768157958984, "learning_rate": 1.8659863562184552e-05, "loss": 1.3230623006820679, "step": 636 }, { "epoch": 0.778998778998779, "grad_norm": 1.5217390060424805, "learning_rate": 1.8646404959281986e-05, "loss": 1.3143547773361206, "step": 638 }, { "epoch": 0.7814407814407814, "grad_norm": 1.7523036003112793, "learning_rate": 1.8632884606053506e-05, "loss": 0.9751634001731873, "step": 640 }, { "epoch": 0.7838827838827839, "grad_norm": 2.0202057361602783, "learning_rate": 1.861930261168587e-05, "loss": 1.1349761486053467, "step": 642 }, { "epoch": 0.7863247863247863, "grad_norm": 0.9345976710319519, "learning_rate": 1.860565908586365e-05, "loss": 1.2226810455322266, "step": 644 }, { "epoch": 0.7887667887667887, "grad_norm": 1.210115909576416, "learning_rate": 1.859195413876831e-05, "loss": 1.0119144916534424, "step": 646 }, { "epoch": 0.7912087912087912, "grad_norm": 1.0988825559616089, "learning_rate": 1.857818788107734e-05, "loss": 1.26012122631073, "step": 648 }, { "epoch": 0.7936507936507936, "grad_norm": 4.83104944229126, "learning_rate": 1.856436042396338e-05, "loss": 0.5898873209953308, "step": 650 }, { "epoch": 0.796092796092796, "grad_norm": 1.161339282989502, "learning_rate": 1.8550471879093275e-05, "loss": 0.8887655138969421, "step": 652 }, { "epoch": 0.7985347985347986, "grad_norm": 1.4048727750778198, "learning_rate": 1.8536522358627205e-05, "loss": 1.2602205276489258, "step": 654 }, { "epoch": 0.800976800976801, "grad_norm": 2.1626598834991455, "learning_rate": 1.852251197521778e-05, "loss": 1.2750191688537598, "step": 656 }, { "epoch": 0.8034188034188035, "grad_norm": 2.365673065185547, "learning_rate": 1.8508440842009113e-05, "loss": 0.5839018225669861, "step": 658 }, { "epoch": 0.8058608058608059, "grad_norm": 1.4860225915908813, "learning_rate": 1.849430907263592e-05, "loss": 1.297167181968689, "step": 660 }, { "epoch": 0.8083028083028083, "grad_norm": 1.04447603225708, "learning_rate": 1.8480116781222604e-05, "loss": 1.2555423974990845, "step": 662 }, { "epoch": 0.8107448107448108, "grad_norm": 0.8101674318313599, "learning_rate": 1.846586408238232e-05, "loss": 1.3545968532562256, "step": 664 }, { "epoch": 0.8131868131868132, "grad_norm": 1.1193162202835083, "learning_rate": 1.8451551091216064e-05, "loss": 0.9384480118751526, "step": 666 }, { "epoch": 0.8156288156288156, "grad_norm": 1.269223928451538, "learning_rate": 1.8437177923311728e-05, "loss": 1.0872721672058105, "step": 668 }, { "epoch": 0.818070818070818, "grad_norm": 1.7073310613632202, "learning_rate": 1.842274469474318e-05, "loss": 1.4501525163650513, "step": 670 }, { "epoch": 0.8205128205128205, "grad_norm": 1.2747077941894531, "learning_rate": 1.8408251522069323e-05, "loss": 1.296190857887268, "step": 672 }, { "epoch": 0.8229548229548229, "grad_norm": 1.145330786705017, "learning_rate": 1.8393698522333158e-05, "loss": 1.076781153678894, "step": 674 }, { "epoch": 0.8253968253968254, "grad_norm": 1.0505316257476807, "learning_rate": 1.837908581306082e-05, "loss": 0.963850200176239, "step": 676 }, { "epoch": 0.8278388278388278, "grad_norm": 4.262927055358887, "learning_rate": 1.8364413512260656e-05, "loss": 1.2688353061676025, "step": 678 }, { "epoch": 0.8302808302808303, "grad_norm": 2.2526209354400635, "learning_rate": 1.8349681738422245e-05, "loss": 1.3245513439178467, "step": 680 }, { "epoch": 0.8327228327228328, "grad_norm": 1.7615208625793457, "learning_rate": 1.8334890610515465e-05, "loss": 1.2618424892425537, "step": 682 }, { "epoch": 0.8351648351648352, "grad_norm": 2.4765729904174805, "learning_rate": 1.8320040247989516e-05, "loss": 0.9116923213005066, "step": 684 }, { "epoch": 0.8376068376068376, "grad_norm": 2.0831899642944336, "learning_rate": 1.8305130770771966e-05, "loss": 1.4006067514419556, "step": 686 }, { "epoch": 0.8400488400488401, "grad_norm": 3.837216854095459, "learning_rate": 1.829016229926777e-05, "loss": 1.3707760572433472, "step": 688 }, { "epoch": 0.8424908424908425, "grad_norm": 12.806596755981445, "learning_rate": 1.827513495435831e-05, "loss": 1.0350643396377563, "step": 690 }, { "epoch": 0.8449328449328449, "grad_norm": 1.426324486732483, "learning_rate": 1.826004885740042e-05, "loss": 1.3101565837860107, "step": 692 }, { "epoch": 0.8473748473748474, "grad_norm": 0.7182126045227051, "learning_rate": 1.8244904130225383e-05, "loss": 1.1183477640151978, "step": 694 }, { "epoch": 0.8498168498168498, "grad_norm": 1.0692784786224365, "learning_rate": 1.8229700895137977e-05, "loss": 1.2185040712356567, "step": 696 }, { "epoch": 0.8522588522588522, "grad_norm": 1.405985951423645, "learning_rate": 1.821443927491548e-05, "loss": 1.0439921617507935, "step": 698 }, { "epoch": 0.8547008547008547, "grad_norm": 0.9861589074134827, "learning_rate": 1.819911939280665e-05, "loss": 1.179707646369934, "step": 700 }, { "epoch": 0.8571428571428571, "grad_norm": 1.3593485355377197, "learning_rate": 1.8183741372530778e-05, "loss": 1.1061705350875854, "step": 702 }, { "epoch": 0.8595848595848596, "grad_norm": 2.342923402786255, "learning_rate": 1.816830533827665e-05, "loss": 1.0052831172943115, "step": 704 }, { "epoch": 0.8620268620268621, "grad_norm": 1.4813743829727173, "learning_rate": 1.815281141470155e-05, "loss": 0.5395532250404358, "step": 706 }, { "epoch": 0.8644688644688645, "grad_norm": 1.3919825553894043, "learning_rate": 1.8137259726930283e-05, "loss": 1.2419100999832153, "step": 708 }, { "epoch": 0.8669108669108669, "grad_norm": 3.034050464630127, "learning_rate": 1.8121650400554125e-05, "loss": 0.9318399429321289, "step": 710 }, { "epoch": 0.8693528693528694, "grad_norm": 4.048087120056152, "learning_rate": 1.8105983561629827e-05, "loss": 1.4534571170806885, "step": 712 }, { "epoch": 0.8717948717948718, "grad_norm": 13.133171081542969, "learning_rate": 1.8090259336678598e-05, "loss": 1.6200733184814453, "step": 714 }, { "epoch": 0.8742368742368742, "grad_norm": 1.3102926015853882, "learning_rate": 1.8074477852685088e-05, "loss": 1.4871742725372314, "step": 716 }, { "epoch": 0.8766788766788767, "grad_norm": 0.9029149413108826, "learning_rate": 1.805863923709635e-05, "loss": 1.0001909732818604, "step": 718 }, { "epoch": 0.8791208791208791, "grad_norm": 0.828899621963501, "learning_rate": 1.8042743617820814e-05, "loss": 1.2416490316390991, "step": 720 }, { "epoch": 0.8815628815628815, "grad_norm": 2.1641383171081543, "learning_rate": 1.8026791123227255e-05, "loss": 0.8903718590736389, "step": 722 }, { "epoch": 0.884004884004884, "grad_norm": 1.445026159286499, "learning_rate": 1.8010781882143773e-05, "loss": 1.285760521888733, "step": 724 }, { "epoch": 0.8864468864468864, "grad_norm": 0.9921174645423889, "learning_rate": 1.799471602385672e-05, "loss": 1.2185858488082886, "step": 726 }, { "epoch": 0.8888888888888888, "grad_norm": 1.5229535102844238, "learning_rate": 1.797859367810968e-05, "loss": 1.2078474760055542, "step": 728 }, { "epoch": 0.8913308913308914, "grad_norm": 1.6648898124694824, "learning_rate": 1.7962414975102416e-05, "loss": 1.4831866025924683, "step": 730 }, { "epoch": 0.8937728937728938, "grad_norm": 3.4526195526123047, "learning_rate": 1.794618004548982e-05, "loss": 1.2522797584533691, "step": 732 }, { "epoch": 0.8962148962148963, "grad_norm": 1.0352317094802856, "learning_rate": 1.7929889020380842e-05, "loss": 1.0359210968017578, "step": 734 }, { "epoch": 0.8986568986568987, "grad_norm": 0.8629250526428223, "learning_rate": 1.791354203133746e-05, "loss": 0.8198949098587036, "step": 736 }, { "epoch": 0.9010989010989011, "grad_norm": 4.816531658172607, "learning_rate": 1.7897139210373594e-05, "loss": 0.9690486788749695, "step": 738 }, { "epoch": 0.9035409035409036, "grad_norm": 2.7800450325012207, "learning_rate": 1.7880680689954047e-05, "loss": 1.0706011056900024, "step": 740 }, { "epoch": 0.905982905982906, "grad_norm": 1.3503133058547974, "learning_rate": 1.786416660299344e-05, "loss": 0.9173503518104553, "step": 742 }, { "epoch": 0.9084249084249084, "grad_norm": 0.9783918261528015, "learning_rate": 1.7847597082855133e-05, "loss": 0.9544399976730347, "step": 744 }, { "epoch": 0.9108669108669109, "grad_norm": 1.6359418630599976, "learning_rate": 1.7830972263350142e-05, "loss": 1.2056411504745483, "step": 746 }, { "epoch": 0.9133089133089133, "grad_norm": 1.5760291814804077, "learning_rate": 1.7814292278736084e-05, "loss": 0.9109166264533997, "step": 748 }, { "epoch": 0.9157509157509157, "grad_norm": 1.4765530824661255, "learning_rate": 1.7797557263716054e-05, "loss": 1.401995301246643, "step": 750 }, { "epoch": 0.9181929181929182, "grad_norm": 0.7756912708282471, "learning_rate": 1.7780767353437573e-05, "loss": 1.2727299928665161, "step": 752 }, { "epoch": 0.9206349206349206, "grad_norm": 0.8636785745620728, "learning_rate": 1.7763922683491476e-05, "loss": 1.2869514226913452, "step": 754 }, { "epoch": 0.9230769230769231, "grad_norm": 0.8454907536506653, "learning_rate": 1.7747023389910815e-05, "loss": 1.2656826972961426, "step": 756 }, { "epoch": 0.9255189255189256, "grad_norm": 1.3287631273269653, "learning_rate": 1.773006960916978e-05, "loss": 1.3375307321548462, "step": 758 }, { "epoch": 0.927960927960928, "grad_norm": 1.5437045097351074, "learning_rate": 1.7713061478182582e-05, "loss": 0.8308702111244202, "step": 760 }, { "epoch": 0.9304029304029304, "grad_norm": 3.6134531497955322, "learning_rate": 1.7695999134302348e-05, "loss": 1.2227895259857178, "step": 762 }, { "epoch": 0.9328449328449329, "grad_norm": 1.38361394405365, "learning_rate": 1.767888271532001e-05, "loss": 0.9452077150344849, "step": 764 }, { "epoch": 0.9352869352869353, "grad_norm": 1.8651083707809448, "learning_rate": 1.7661712359463202e-05, "loss": 0.6139346957206726, "step": 766 }, { "epoch": 0.9377289377289377, "grad_norm": 1.1716833114624023, "learning_rate": 1.7644488205395136e-05, "loss": 0.9175626039505005, "step": 768 }, { "epoch": 0.9401709401709402, "grad_norm": 4.235447406768799, "learning_rate": 1.7627210392213484e-05, "loss": 0.7235321402549744, "step": 770 }, { "epoch": 0.9426129426129426, "grad_norm": 0.8599190711975098, "learning_rate": 1.7609879059449256e-05, "loss": 1.1240880489349365, "step": 772 }, { "epoch": 0.945054945054945, "grad_norm": 3.387906789779663, "learning_rate": 1.7592494347065667e-05, "loss": 1.3139581680297852, "step": 774 }, { "epoch": 0.9474969474969475, "grad_norm": 1.6255816221237183, "learning_rate": 1.7575056395457017e-05, "loss": 1.2285006046295166, "step": 776 }, { "epoch": 0.9499389499389499, "grad_norm": 2.0512325763702393, "learning_rate": 1.7557565345447548e-05, "loss": 0.9121115207672119, "step": 778 }, { "epoch": 0.9523809523809523, "grad_norm": 1.003928542137146, "learning_rate": 1.754002133829031e-05, "loss": 1.1289280652999878, "step": 780 }, { "epoch": 0.9548229548229549, "grad_norm": 6.144791126251221, "learning_rate": 1.752242451566603e-05, "loss": 1.1398252248764038, "step": 782 }, { "epoch": 0.9572649572649573, "grad_norm": 0.8303928375244141, "learning_rate": 1.7504775019681946e-05, "loss": 1.263461709022522, "step": 784 }, { "epoch": 0.9597069597069597, "grad_norm": 1.0771842002868652, "learning_rate": 1.7487072992870683e-05, "loss": 1.2938859462738037, "step": 786 }, { "epoch": 0.9621489621489622, "grad_norm": 1.3151885271072388, "learning_rate": 1.746931857818908e-05, "loss": 1.3971589803695679, "step": 788 }, { "epoch": 0.9645909645909646, "grad_norm": 2.2546122074127197, "learning_rate": 1.7451511919017054e-05, "loss": 1.341101884841919, "step": 790 }, { "epoch": 0.967032967032967, "grad_norm": 0.76347416639328, "learning_rate": 1.743365315915643e-05, "loss": 1.0966370105743408, "step": 792 }, { "epoch": 0.9694749694749695, "grad_norm": 1.2820730209350586, "learning_rate": 1.7415742442829792e-05, "loss": 1.3368990421295166, "step": 794 }, { "epoch": 0.9719169719169719, "grad_norm": 0.7520409226417542, "learning_rate": 1.7397779914679303e-05, "loss": 1.2155550718307495, "step": 796 }, { "epoch": 0.9743589743589743, "grad_norm": 0.652754545211792, "learning_rate": 1.7379765719765542e-05, "loss": 1.2150750160217285, "step": 798 }, { "epoch": 0.9768009768009768, "grad_norm": 0.8119310736656189, "learning_rate": 1.7361700003566348e-05, "loss": 1.2871735095977783, "step": 800 }, { "epoch": 0.9792429792429792, "grad_norm": 2.2065281867980957, "learning_rate": 1.734358291197562e-05, "loss": 0.9395040273666382, "step": 802 }, { "epoch": 0.9816849816849816, "grad_norm": 0.936976432800293, "learning_rate": 1.732541459130215e-05, "loss": 1.1477895975112915, "step": 804 }, { "epoch": 0.9841269841269841, "grad_norm": 1.3355202674865723, "learning_rate": 1.730719518826846e-05, "loss": 1.573718547821045, "step": 806 }, { "epoch": 0.9865689865689866, "grad_norm": 4.425434112548828, "learning_rate": 1.7288924850009576e-05, "loss": 0.9391233325004578, "step": 808 }, { "epoch": 0.989010989010989, "grad_norm": 0.7173460721969604, "learning_rate": 1.7270603724071876e-05, "loss": 1.364790916442871, "step": 810 }, { "epoch": 0.9914529914529915, "grad_norm": 0.7534496784210205, "learning_rate": 1.725223195841189e-05, "loss": 1.2704541683197021, "step": 812 }, { "epoch": 0.9938949938949939, "grad_norm": 1.4058549404144287, "learning_rate": 1.7233809701395087e-05, "loss": 1.35564386844635, "step": 814 }, { "epoch": 0.9963369963369964, "grad_norm": 0.8958796858787537, "learning_rate": 1.72153371017947e-05, "loss": 1.233031153678894, "step": 816 }, { "epoch": 0.9987789987789988, "grad_norm": 0.7508826851844788, "learning_rate": 1.7196814308790516e-05, "loss": 1.1463748216629028, "step": 818 }, { "epoch": 1.0012210012210012, "grad_norm": 0.6122261881828308, "learning_rate": 1.717824147196767e-05, "loss": 1.007127285003662, "step": 820 }, { "epoch": 1.0036630036630036, "grad_norm": 1.004014015197754, "learning_rate": 1.7159618741315433e-05, "loss": 1.0883307456970215, "step": 822 }, { "epoch": 1.006105006105006, "grad_norm": 1.8373212814331055, "learning_rate": 1.7140946267226006e-05, "loss": 0.4619407653808594, "step": 824 }, { "epoch": 1.0085470085470085, "grad_norm": 7.073435306549072, "learning_rate": 1.712222420049331e-05, "loss": 0.8937675356864929, "step": 826 }, { "epoch": 1.010989010989011, "grad_norm": 3.16390061378479, "learning_rate": 1.7103452692311756e-05, "loss": 0.7834187150001526, "step": 828 }, { "epoch": 1.0134310134310134, "grad_norm": 1.286433458328247, "learning_rate": 1.708463189427504e-05, "loss": 0.7017002105712891, "step": 830 }, { "epoch": 1.0158730158730158, "grad_norm": 2.5467231273651123, "learning_rate": 1.7065761958374905e-05, "loss": 0.9201502203941345, "step": 832 }, { "epoch": 1.0183150183150182, "grad_norm": 1.361122965812683, "learning_rate": 1.7046843036999912e-05, "loss": 0.9217178821563721, "step": 834 }, { "epoch": 1.0207570207570207, "grad_norm": 2.6307156085968018, "learning_rate": 1.7027875282934224e-05, "loss": 1.00894033908844, "step": 836 }, { "epoch": 1.0231990231990231, "grad_norm": 0.9444079995155334, "learning_rate": 1.7008858849356363e-05, "loss": 1.0666855573654175, "step": 838 }, { "epoch": 1.0256410256410255, "grad_norm": 1.807748556137085, "learning_rate": 1.6989793889837966e-05, "loss": 0.7795441746711731, "step": 840 }, { "epoch": 1.028083028083028, "grad_norm": 4.041755199432373, "learning_rate": 1.6970680558342566e-05, "loss": 0.7524101734161377, "step": 842 }, { "epoch": 1.0305250305250304, "grad_norm": 0.885811448097229, "learning_rate": 1.695151900922432e-05, "loss": 0.9602640271186829, "step": 844 }, { "epoch": 1.032967032967033, "grad_norm": 0.9917791485786438, "learning_rate": 1.6932309397226792e-05, "loss": 0.8459327816963196, "step": 846 }, { "epoch": 1.0354090354090355, "grad_norm": 0.9382413029670715, "learning_rate": 1.6913051877481676e-05, "loss": 1.1561813354492188, "step": 848 }, { "epoch": 1.037851037851038, "grad_norm": 1.5294519662857056, "learning_rate": 1.6893746605507567e-05, "loss": 0.7689896821975708, "step": 850 }, { "epoch": 1.0402930402930404, "grad_norm": 1.7145957946777344, "learning_rate": 1.6874393737208688e-05, "loss": 0.5241991281509399, "step": 852 }, { "epoch": 1.0427350427350428, "grad_norm": 0.781104326248169, "learning_rate": 1.685499342887364e-05, "loss": 1.0428876876831055, "step": 854 }, { "epoch": 1.0451770451770452, "grad_norm": 1.5123246908187866, "learning_rate": 1.6835545837174132e-05, "loss": 0.668832004070282, "step": 856 }, { "epoch": 1.0476190476190477, "grad_norm": 1.0035831928253174, "learning_rate": 1.681605111916373e-05, "loss": 1.2478870153427124, "step": 858 }, { "epoch": 1.05006105006105, "grad_norm": 0.9146220684051514, "learning_rate": 1.679650943227657e-05, "loss": 0.8985828161239624, "step": 860 }, { "epoch": 1.0525030525030525, "grad_norm": 1.358199119567871, "learning_rate": 1.6776920934326103e-05, "loss": 1.0257023572921753, "step": 862 }, { "epoch": 1.054945054945055, "grad_norm": 1.0113524198532104, "learning_rate": 1.675728578350381e-05, "loss": 1.0212005376815796, "step": 864 }, { "epoch": 1.0573870573870574, "grad_norm": 2.236260175704956, "learning_rate": 1.673760413837793e-05, "loss": 1.4508510828018188, "step": 866 }, { "epoch": 1.0598290598290598, "grad_norm": 2.680145740509033, "learning_rate": 1.6717876157892175e-05, "loss": 0.5031489729881287, "step": 868 }, { "epoch": 1.0622710622710623, "grad_norm": 1.7734426259994507, "learning_rate": 1.6698102001364456e-05, "loss": 0.9893677234649658, "step": 870 }, { "epoch": 1.0647130647130647, "grad_norm": 1.0509651899337769, "learning_rate": 1.6678281828485576e-05, "loss": 0.897520124912262, "step": 872 }, { "epoch": 1.0671550671550671, "grad_norm": 1.6916723251342773, "learning_rate": 1.6658415799317966e-05, "loss": 0.7381224036216736, "step": 874 }, { "epoch": 1.0695970695970696, "grad_norm": 1.0783177614212036, "learning_rate": 1.6638504074294375e-05, "loss": 0.9826089143753052, "step": 876 }, { "epoch": 1.072039072039072, "grad_norm": 0.9295514225959778, "learning_rate": 1.6618546814216586e-05, "loss": 1.0204219818115234, "step": 878 }, { "epoch": 1.0744810744810744, "grad_norm": 2.3482747077941895, "learning_rate": 1.65985441802541e-05, "loss": 0.6614128947257996, "step": 880 }, { "epoch": 1.0769230769230769, "grad_norm": 0.9849045276641846, "learning_rate": 1.6578496333942848e-05, "loss": 0.9977365732192993, "step": 882 }, { "epoch": 1.0793650793650793, "grad_norm": 4.873172283172607, "learning_rate": 1.655840343718389e-05, "loss": 0.6593250036239624, "step": 884 }, { "epoch": 1.0818070818070817, "grad_norm": 60.08795928955078, "learning_rate": 1.6538265652242103e-05, "loss": 0.7343877553939819, "step": 886 }, { "epoch": 1.0842490842490842, "grad_norm": 1.1528880596160889, "learning_rate": 1.6518083141744862e-05, "loss": 1.0775821208953857, "step": 888 }, { "epoch": 1.0866910866910866, "grad_norm": 0.5812370181083679, "learning_rate": 1.649785606868073e-05, "loss": 0.7265040874481201, "step": 890 }, { "epoch": 1.089133089133089, "grad_norm": 0.9050455093383789, "learning_rate": 1.647758459639816e-05, "loss": 0.94173663854599, "step": 892 }, { "epoch": 1.0915750915750915, "grad_norm": 1.2509444952011108, "learning_rate": 1.6457268888604143e-05, "loss": 1.1309514045715332, "step": 894 }, { "epoch": 1.0940170940170941, "grad_norm": 1.1489883661270142, "learning_rate": 1.643690910936292e-05, "loss": 1.1048157215118408, "step": 896 }, { "epoch": 1.0964590964590966, "grad_norm": 3.072650909423828, "learning_rate": 1.6416505423094636e-05, "loss": 0.8980664014816284, "step": 898 }, { "epoch": 1.098901098901099, "grad_norm": 1.031434416770935, "learning_rate": 1.639605799457401e-05, "loss": 0.6644148826599121, "step": 900 }, { "epoch": 1.1013431013431014, "grad_norm": 3.342662811279297, "learning_rate": 1.6375566988929025e-05, "loss": 0.6176282167434692, "step": 902 }, { "epoch": 1.1037851037851039, "grad_norm": 0.8162381649017334, "learning_rate": 1.6355032571639574e-05, "loss": 0.5790269374847412, "step": 904 }, { "epoch": 1.1062271062271063, "grad_norm": 1.7559690475463867, "learning_rate": 1.6334454908536123e-05, "loss": 0.8540843725204468, "step": 906 }, { "epoch": 1.1086691086691087, "grad_norm": 2.1038284301757812, "learning_rate": 1.631383416579839e-05, "loss": 1.0307986736297607, "step": 908 }, { "epoch": 1.1111111111111112, "grad_norm": 0.8097777366638184, "learning_rate": 1.6293170509954e-05, "loss": 0.7846847176551819, "step": 910 }, { "epoch": 1.1135531135531136, "grad_norm": 0.7294727563858032, "learning_rate": 1.6272464107877112e-05, "loss": 1.0868881940841675, "step": 912 }, { "epoch": 1.115995115995116, "grad_norm": 1.1073777675628662, "learning_rate": 1.6251715126787114e-05, "loss": 0.6077226400375366, "step": 914 }, { "epoch": 1.1184371184371185, "grad_norm": 1.3670622110366821, "learning_rate": 1.623092373424723e-05, "loss": 0.7134993076324463, "step": 916 }, { "epoch": 1.120879120879121, "grad_norm": 1.8728268146514893, "learning_rate": 1.6210090098163206e-05, "loss": 1.1230908632278442, "step": 918 }, { "epoch": 1.1233211233211233, "grad_norm": 0.782214343547821, "learning_rate": 1.618921438678192e-05, "loss": 0.9432562589645386, "step": 920 }, { "epoch": 1.1257631257631258, "grad_norm": 1.2588818073272705, "learning_rate": 1.616829676869005e-05, "loss": 0.8601541519165039, "step": 922 }, { "epoch": 1.1282051282051282, "grad_norm": 1.1834020614624023, "learning_rate": 1.61473374128127e-05, "loss": 0.7565584778785706, "step": 924 }, { "epoch": 1.1306471306471306, "grad_norm": 1.476582646369934, "learning_rate": 1.612633648841203e-05, "loss": 0.6475503444671631, "step": 926 }, { "epoch": 1.133089133089133, "grad_norm": 1.7382149696350098, "learning_rate": 1.61052941650859e-05, "loss": 0.5194863677024841, "step": 928 }, { "epoch": 1.1355311355311355, "grad_norm": 1.6398006677627563, "learning_rate": 1.608421061276651e-05, "loss": 0.8809158205986023, "step": 930 }, { "epoch": 1.137973137973138, "grad_norm": 1.1977638006210327, "learning_rate": 1.6063086001718986e-05, "loss": 1.0729451179504395, "step": 932 }, { "epoch": 1.1404151404151404, "grad_norm": 1.9817147254943848, "learning_rate": 1.6041920502540058e-05, "loss": 1.008049726486206, "step": 934 }, { "epoch": 1.1428571428571428, "grad_norm": 1.1614291667938232, "learning_rate": 1.6020714286156646e-05, "loss": 0.8578592538833618, "step": 936 }, { "epoch": 1.1452991452991452, "grad_norm": 0.9589775800704956, "learning_rate": 1.59994675238245e-05, "loss": 0.9546090960502625, "step": 938 }, { "epoch": 1.1477411477411477, "grad_norm": 0.889543354511261, "learning_rate": 1.5978180387126797e-05, "loss": 1.0442495346069336, "step": 940 }, { "epoch": 1.15018315018315, "grad_norm": 1.1197261810302734, "learning_rate": 1.5956853047972776e-05, "loss": 0.8928858637809753, "step": 942 }, { "epoch": 1.1526251526251525, "grad_norm": 0.7546731233596802, "learning_rate": 1.5935485678596328e-05, "loss": 0.8579668998718262, "step": 944 }, { "epoch": 1.155067155067155, "grad_norm": 1.2320284843444824, "learning_rate": 1.5914078451554637e-05, "loss": 0.683056652545929, "step": 946 }, { "epoch": 1.1575091575091574, "grad_norm": 1.5659480094909668, "learning_rate": 1.5892631539726754e-05, "loss": 0.6238126754760742, "step": 948 }, { "epoch": 1.1599511599511598, "grad_norm": 1.120065450668335, "learning_rate": 1.5871145116312207e-05, "loss": 0.9421287178993225, "step": 950 }, { "epoch": 1.1623931623931625, "grad_norm": 0.9711224436759949, "learning_rate": 1.5849619354829627e-05, "loss": 0.9722180366516113, "step": 952 }, { "epoch": 1.164835164835165, "grad_norm": 1.5788224935531616, "learning_rate": 1.5828054429115317e-05, "loss": 0.9436995983123779, "step": 954 }, { "epoch": 1.1672771672771673, "grad_norm": 0.5967025756835938, "learning_rate": 1.580645051332186e-05, "loss": 0.8100671768188477, "step": 956 }, { "epoch": 1.1697191697191698, "grad_norm": 0.7621123790740967, "learning_rate": 1.5784807781916714e-05, "loss": 0.7545087337493896, "step": 958 }, { "epoch": 1.1721611721611722, "grad_norm": 1.0470103025436401, "learning_rate": 1.5763126409680803e-05, "loss": 1.0842094421386719, "step": 960 }, { "epoch": 1.1746031746031746, "grad_norm": 1.1259769201278687, "learning_rate": 1.5741406571707108e-05, "loss": 0.7638933062553406, "step": 962 }, { "epoch": 1.177045177045177, "grad_norm": 0.513518750667572, "learning_rate": 1.571964844339924e-05, "loss": 0.6498727798461914, "step": 964 }, { "epoch": 1.1794871794871795, "grad_norm": 0.5694072246551514, "learning_rate": 1.569785220047003e-05, "loss": 0.983795702457428, "step": 966 }, { "epoch": 1.181929181929182, "grad_norm": 0.9271643161773682, "learning_rate": 1.5676018018940134e-05, "loss": 1.1204752922058105, "step": 968 }, { "epoch": 1.1843711843711844, "grad_norm": 1.4760109186172485, "learning_rate": 1.5654146075136565e-05, "loss": 0.7088498473167419, "step": 970 }, { "epoch": 1.1868131868131868, "grad_norm": 1.260972023010254, "learning_rate": 1.5632236545691308e-05, "loss": 0.9644913077354431, "step": 972 }, { "epoch": 1.1892551892551892, "grad_norm": 0.883178174495697, "learning_rate": 1.561028960753988e-05, "loss": 0.7552489638328552, "step": 974 }, { "epoch": 1.1916971916971917, "grad_norm": 4.277756214141846, "learning_rate": 1.5588305437919884e-05, "loss": 0.6645691990852356, "step": 976 }, { "epoch": 1.1941391941391941, "grad_norm": 1.147638201713562, "learning_rate": 1.556628421436962e-05, "loss": 0.8974350094795227, "step": 978 }, { "epoch": 1.1965811965811965, "grad_norm": 2.6772568225860596, "learning_rate": 1.554422611472661e-05, "loss": 1.0676953792572021, "step": 980 }, { "epoch": 1.199023199023199, "grad_norm": 1.5516761541366577, "learning_rate": 1.552213131712617e-05, "loss": 1.0465797185897827, "step": 982 }, { "epoch": 1.2014652014652014, "grad_norm": 0.5330000519752502, "learning_rate": 1.55e-05, "loss": 1.1170203685760498, "step": 984 }, { "epoch": 1.2039072039072038, "grad_norm": 1.7233712673187256, "learning_rate": 1.5477832342074713e-05, "loss": 0.7278258800506592, "step": 986 }, { "epoch": 1.2063492063492063, "grad_norm": 4.363593101501465, "learning_rate": 1.545562852237039e-05, "loss": 0.7073162794113159, "step": 988 }, { "epoch": 1.2087912087912087, "grad_norm": 1.1713706254959106, "learning_rate": 1.5433388720199156e-05, "loss": 0.891094982624054, "step": 990 }, { "epoch": 1.2112332112332111, "grad_norm": 0.9442173838615417, "learning_rate": 1.5411113115163722e-05, "loss": 0.9304923415184021, "step": 992 }, { "epoch": 1.2136752136752136, "grad_norm": 2.135201930999756, "learning_rate": 1.538880188715593e-05, "loss": 0.9996479749679565, "step": 994 }, { "epoch": 1.2161172161172162, "grad_norm": 1.68083918094635, "learning_rate": 1.5366455216355298e-05, "loss": 0.8368605971336365, "step": 996 }, { "epoch": 1.2185592185592187, "grad_norm": 0.7228335738182068, "learning_rate": 1.534407328322758e-05, "loss": 0.9793355464935303, "step": 998 }, { "epoch": 1.221001221001221, "grad_norm": 3.5241169929504395, "learning_rate": 1.5321656268523294e-05, "loss": 0.6125832796096802, "step": 1000 }, { "epoch": 1.2234432234432235, "grad_norm": 0.628485381603241, "learning_rate": 1.5299204353276268e-05, "loss": 0.7384300827980042, "step": 1002 }, { "epoch": 1.225885225885226, "grad_norm": 0.8416216373443604, "learning_rate": 1.5276717718802183e-05, "loss": 0.9433239698410034, "step": 1004 }, { "epoch": 1.2283272283272284, "grad_norm": 1.3178609609603882, "learning_rate": 1.5254196546697088e-05, "loss": 0.9707098603248596, "step": 1006 }, { "epoch": 1.2307692307692308, "grad_norm": 1.0210210084915161, "learning_rate": 1.523164101883597e-05, "loss": 0.5824246406555176, "step": 1008 }, { "epoch": 1.2332112332112333, "grad_norm": 0.7243679165840149, "learning_rate": 1.5209051317371242e-05, "loss": 1.0274351835250854, "step": 1010 }, { "epoch": 1.2356532356532357, "grad_norm": 0.7745081782341003, "learning_rate": 1.5186427624731313e-05, "loss": 0.6757472157478333, "step": 1012 }, { "epoch": 1.2380952380952381, "grad_norm": 0.5712753534317017, "learning_rate": 1.5163770123619083e-05, "loss": 1.041149616241455, "step": 1014 }, { "epoch": 1.2405372405372406, "grad_norm": 1.5870078802108765, "learning_rate": 1.5141078997010486e-05, "loss": 0.886056125164032, "step": 1016 }, { "epoch": 1.242979242979243, "grad_norm": 0.9383798837661743, "learning_rate": 1.5118354428153008e-05, "loss": 0.9722467660903931, "step": 1018 }, { "epoch": 1.2454212454212454, "grad_norm": 3.283290147781372, "learning_rate": 1.5095596600564197e-05, "loss": 0.6366119980812073, "step": 1020 }, { "epoch": 1.2478632478632479, "grad_norm": 3.411051034927368, "learning_rate": 1.5072805698030197e-05, "loss": 0.7901923656463623, "step": 1022 }, { "epoch": 1.2503052503052503, "grad_norm": 1.0399166345596313, "learning_rate": 1.504998190460426e-05, "loss": 0.9346777200698853, "step": 1024 }, { "epoch": 1.2527472527472527, "grad_norm": 0.6323780417442322, "learning_rate": 1.5027125404605246e-05, "loss": 0.8927645087242126, "step": 1026 }, { "epoch": 1.2551892551892552, "grad_norm": 0.7854591608047485, "learning_rate": 1.500423638261615e-05, "loss": 0.8685034513473511, "step": 1028 }, { "epoch": 1.2576312576312576, "grad_norm": 0.7747111320495605, "learning_rate": 1.4981315023482605e-05, "loss": 0.8063104152679443, "step": 1030 }, { "epoch": 1.26007326007326, "grad_norm": 0.7940489649772644, "learning_rate": 1.4958361512311394e-05, "loss": 1.0881439447402954, "step": 1032 }, { "epoch": 1.2625152625152625, "grad_norm": 3.6989586353302, "learning_rate": 1.4935376034468944e-05, "loss": 1.1380131244659424, "step": 1034 }, { "epoch": 1.264957264957265, "grad_norm": 0.5151039361953735, "learning_rate": 1.4912358775579841e-05, "loss": 0.6871868968009949, "step": 1036 }, { "epoch": 1.2673992673992673, "grad_norm": 1.3680596351623535, "learning_rate": 1.4889309921525325e-05, "loss": 0.6862649321556091, "step": 1038 }, { "epoch": 1.2698412698412698, "grad_norm": 0.6552305221557617, "learning_rate": 1.4866229658441793e-05, "loss": 0.7429234385490417, "step": 1040 }, { "epoch": 1.2722832722832722, "grad_norm": 0.5459038019180298, "learning_rate": 1.4843118172719289e-05, "loss": 0.9307520389556885, "step": 1042 }, { "epoch": 1.2747252747252746, "grad_norm": 0.5527384281158447, "learning_rate": 1.4819975650999998e-05, "loss": 0.7104328274726868, "step": 1044 }, { "epoch": 1.277167277167277, "grad_norm": 1.2261544466018677, "learning_rate": 1.4796802280176762e-05, "loss": 1.0070260763168335, "step": 1046 }, { "epoch": 1.2796092796092795, "grad_norm": 1.9242292642593384, "learning_rate": 1.4773598247391527e-05, "loss": 0.690989077091217, "step": 1048 }, { "epoch": 1.282051282051282, "grad_norm": 1.825949788093567, "learning_rate": 1.4750363740033881e-05, "loss": 0.42399048805236816, "step": 1050 }, { "epoch": 1.2844932844932844, "grad_norm": 1.841841459274292, "learning_rate": 1.4727098945739497e-05, "loss": 1.0426183938980103, "step": 1052 }, { "epoch": 1.2869352869352868, "grad_norm": 0.51153963804245, "learning_rate": 1.470380405238865e-05, "loss": 0.8385255336761475, "step": 1054 }, { "epoch": 1.2893772893772895, "grad_norm": 2.656769275665283, "learning_rate": 1.4680479248104678e-05, "loss": 0.6596496105194092, "step": 1056 }, { "epoch": 1.291819291819292, "grad_norm": 1.2762665748596191, "learning_rate": 1.4657124721252476e-05, "loss": 1.232382893562317, "step": 1058 }, { "epoch": 1.2942612942612943, "grad_norm": 1.1065174341201782, "learning_rate": 1.4633740660436974e-05, "loss": 1.0262730121612549, "step": 1060 }, { "epoch": 1.2967032967032968, "grad_norm": 3.235954999923706, "learning_rate": 1.4610327254501607e-05, "loss": 0.6136125326156616, "step": 1062 }, { "epoch": 1.2991452991452992, "grad_norm": 0.5966620445251465, "learning_rate": 1.4586884692526791e-05, "loss": 0.8876266479492188, "step": 1064 }, { "epoch": 1.3015873015873016, "grad_norm": 2.7788665294647217, "learning_rate": 1.4563413163828397e-05, "loss": 0.7026379108428955, "step": 1066 }, { "epoch": 1.304029304029304, "grad_norm": 0.7998191714286804, "learning_rate": 1.4539912857956234e-05, "loss": 0.9727767705917358, "step": 1068 }, { "epoch": 1.3064713064713065, "grad_norm": 1.385021686553955, "learning_rate": 1.4516383964692495e-05, "loss": 0.7625731825828552, "step": 1070 }, { "epoch": 1.308913308913309, "grad_norm": 1.5408962965011597, "learning_rate": 1.4492826674050248e-05, "loss": 0.9061781167984009, "step": 1072 }, { "epoch": 1.3113553113553114, "grad_norm": 6.768632888793945, "learning_rate": 1.4469241176271884e-05, "loss": 0.7514428496360779, "step": 1074 }, { "epoch": 1.3137973137973138, "grad_norm": 0.7883042097091675, "learning_rate": 1.4445627661827589e-05, "loss": 0.6796785593032837, "step": 1076 }, { "epoch": 1.3162393162393162, "grad_norm": 1.3659143447875977, "learning_rate": 1.4421986321413801e-05, "loss": 0.9605479836463928, "step": 1078 }, { "epoch": 1.3186813186813187, "grad_norm": 1.356332778930664, "learning_rate": 1.439831734595168e-05, "loss": 0.8200567364692688, "step": 1080 }, { "epoch": 1.321123321123321, "grad_norm": 1.2193089723587036, "learning_rate": 1.4374620926585556e-05, "loss": 0.881037175655365, "step": 1082 }, { "epoch": 1.3235653235653235, "grad_norm": 0.5569941401481628, "learning_rate": 1.4350897254681386e-05, "loss": 0.8864683508872986, "step": 1084 }, { "epoch": 1.326007326007326, "grad_norm": 1.2279424667358398, "learning_rate": 1.4327146521825213e-05, "loss": 1.0031923055648804, "step": 1086 }, { "epoch": 1.3284493284493284, "grad_norm": 7.039901256561279, "learning_rate": 1.4303368919821619e-05, "loss": 1.0991631746292114, "step": 1088 }, { "epoch": 1.3308913308913308, "grad_norm": 0.7994674444198608, "learning_rate": 1.4279564640692172e-05, "loss": 0.6553327441215515, "step": 1090 }, { "epoch": 1.3333333333333333, "grad_norm": 5.8774871826171875, "learning_rate": 1.4255733876673874e-05, "loss": 0.7461038827896118, "step": 1092 }, { "epoch": 1.3357753357753357, "grad_norm": 0.7029107213020325, "learning_rate": 1.4231876820217623e-05, "loss": 0.9785415530204773, "step": 1094 }, { "epoch": 1.3382173382173383, "grad_norm": 3.4110426902770996, "learning_rate": 1.4207993663986636e-05, "loss": 0.47891128063201904, "step": 1096 }, { "epoch": 1.3406593406593408, "grad_norm": 1.4747514724731445, "learning_rate": 1.4184084600854906e-05, "loss": 1.1681262254714966, "step": 1098 }, { "epoch": 1.3431013431013432, "grad_norm": 1.336816668510437, "learning_rate": 1.4160149823905654e-05, "loss": 1.0751440525054932, "step": 1100 }, { "epoch": 1.3455433455433456, "grad_norm": 0.80948805809021, "learning_rate": 1.4136189526429749e-05, "loss": 1.000352144241333, "step": 1102 }, { "epoch": 1.347985347985348, "grad_norm": 2.687490701675415, "learning_rate": 1.4112203901924153e-05, "loss": 0.8417548537254333, "step": 1104 }, { "epoch": 1.3504273504273505, "grad_norm": 0.8591554760932922, "learning_rate": 1.4088193144090376e-05, "loss": 0.9740299582481384, "step": 1106 }, { "epoch": 1.352869352869353, "grad_norm": 3.9168152809143066, "learning_rate": 1.406415744683289e-05, "loss": 0.7925201058387756, "step": 1108 }, { "epoch": 1.3553113553113554, "grad_norm": 0.8020510673522949, "learning_rate": 1.4040097004257567e-05, "loss": 1.042458415031433, "step": 1110 }, { "epoch": 1.3577533577533578, "grad_norm": 1.342916488647461, "learning_rate": 1.4016012010670125e-05, "loss": 0.9074981808662415, "step": 1112 }, { "epoch": 1.3601953601953602, "grad_norm": 1.7544145584106445, "learning_rate": 1.3991902660574544e-05, "loss": 0.8596875667572021, "step": 1114 }, { "epoch": 1.3626373626373627, "grad_norm": 2.7417960166931152, "learning_rate": 1.39677691486715e-05, "loss": 0.5096735954284668, "step": 1116 }, { "epoch": 1.3650793650793651, "grad_norm": 6.50905704498291, "learning_rate": 1.3943611669856797e-05, "loss": 0.8825461268424988, "step": 1118 }, { "epoch": 1.3675213675213675, "grad_norm": 1.5938875675201416, "learning_rate": 1.3919430419219787e-05, "loss": 0.9512450695037842, "step": 1120 }, { "epoch": 1.36996336996337, "grad_norm": 2.952125072479248, "learning_rate": 1.389522559204179e-05, "loss": 0.9308354258537292, "step": 1122 }, { "epoch": 1.3724053724053724, "grad_norm": 0.7429002523422241, "learning_rate": 1.387099738379454e-05, "loss": 0.8262976408004761, "step": 1124 }, { "epoch": 1.3748473748473748, "grad_norm": 2.061551809310913, "learning_rate": 1.3846745990138581e-05, "loss": 1.28501558303833, "step": 1126 }, { "epoch": 1.3772893772893773, "grad_norm": 0.9269969463348389, "learning_rate": 1.382247160692169e-05, "loss": 0.9468799829483032, "step": 1128 }, { "epoch": 1.3797313797313797, "grad_norm": 0.8824846744537354, "learning_rate": 1.3798174430177314e-05, "loss": 0.6640329360961914, "step": 1130 }, { "epoch": 1.3821733821733821, "grad_norm": 0.633753776550293, "learning_rate": 1.3773854656122962e-05, "loss": 0.7266710996627808, "step": 1132 }, { "epoch": 1.3846153846153846, "grad_norm": 5.053553581237793, "learning_rate": 1.3749512481158649e-05, "loss": 0.5124362707138062, "step": 1134 }, { "epoch": 1.387057387057387, "grad_norm": 1.3869932889938354, "learning_rate": 1.3725148101865275e-05, "loss": 0.6932591199874878, "step": 1136 }, { "epoch": 1.3894993894993894, "grad_norm": 0.8337790369987488, "learning_rate": 1.3700761715003068e-05, "loss": 1.0207314491271973, "step": 1138 }, { "epoch": 1.3919413919413919, "grad_norm": 2.2834839820861816, "learning_rate": 1.3676353517509981e-05, "loss": 0.8703376650810242, "step": 1140 }, { "epoch": 1.3943833943833943, "grad_norm": 1.934580683708191, "learning_rate": 1.3651923706500105e-05, "loss": 0.9365097284317017, "step": 1142 }, { "epoch": 1.3968253968253967, "grad_norm": 2.526843786239624, "learning_rate": 1.362747247926207e-05, "loss": 0.7051898837089539, "step": 1144 }, { "epoch": 1.3992673992673992, "grad_norm": 0.8698064684867859, "learning_rate": 1.3603000033257465e-05, "loss": 1.0435025691986084, "step": 1146 }, { "epoch": 1.4017094017094016, "grad_norm": 2.076078176498413, "learning_rate": 1.3578506566119236e-05, "loss": 0.8728469610214233, "step": 1148 }, { "epoch": 1.404151404151404, "grad_norm": 0.8785778880119324, "learning_rate": 1.355399227565008e-05, "loss": 0.7566535472869873, "step": 1150 }, { "epoch": 1.4065934065934065, "grad_norm": 1.0821596384048462, "learning_rate": 1.352945735982087e-05, "loss": 0.7982299327850342, "step": 1152 }, { "epoch": 1.409035409035409, "grad_norm": 1.226269006729126, "learning_rate": 1.3504902016769039e-05, "loss": 0.7825957536697388, "step": 1154 }, { "epoch": 1.4114774114774113, "grad_norm": 1.9049503803253174, "learning_rate": 1.348032644479698e-05, "loss": 0.6891085505485535, "step": 1156 }, { "epoch": 1.4139194139194138, "grad_norm": 1.1582715511322021, "learning_rate": 1.3455730842370462e-05, "loss": 0.8980281352996826, "step": 1158 }, { "epoch": 1.4163614163614164, "grad_norm": 0.8849154114723206, "learning_rate": 1.3431115408117002e-05, "loss": 0.8913061618804932, "step": 1160 }, { "epoch": 1.4188034188034189, "grad_norm": 1.0964971780776978, "learning_rate": 1.3406480340824272e-05, "loss": 0.7366968393325806, "step": 1162 }, { "epoch": 1.4212454212454213, "grad_norm": 13.473047256469727, "learning_rate": 1.3381825839438514e-05, "loss": 0.6932869553565979, "step": 1164 }, { "epoch": 1.4236874236874237, "grad_norm": 1.122653603553772, "learning_rate": 1.3357152103062892e-05, "loss": 1.1828283071517944, "step": 1166 }, { "epoch": 1.4261294261294262, "grad_norm": 0.561507523059845, "learning_rate": 1.3332459330955921e-05, "loss": 0.966327428817749, "step": 1168 }, { "epoch": 1.4285714285714286, "grad_norm": 2.9495770931243896, "learning_rate": 1.3307747722529838e-05, "loss": 0.8709004521369934, "step": 1170 }, { "epoch": 1.431013431013431, "grad_norm": 0.6762902140617371, "learning_rate": 1.3283017477348993e-05, "loss": 0.9068043231964111, "step": 1172 }, { "epoch": 1.4334554334554335, "grad_norm": 0.7292370796203613, "learning_rate": 1.3258268795128258e-05, "loss": 0.9378133416175842, "step": 1174 }, { "epoch": 1.435897435897436, "grad_norm": 0.974267303943634, "learning_rate": 1.3233501875731376e-05, "loss": 1.0176819562911987, "step": 1176 }, { "epoch": 1.4383394383394383, "grad_norm": 5.0265116691589355, "learning_rate": 1.320871691916938e-05, "loss": 0.7393254041671753, "step": 1178 }, { "epoch": 1.4407814407814408, "grad_norm": 3.240424394607544, "learning_rate": 1.3183914125598966e-05, "loss": 0.8406731486320496, "step": 1180 }, { "epoch": 1.4432234432234432, "grad_norm": 0.9493277668952942, "learning_rate": 1.3159093695320881e-05, "loss": 0.756401002407074, "step": 1182 }, { "epoch": 1.4456654456654456, "grad_norm": 0.9762367010116577, "learning_rate": 1.313425582877829e-05, "loss": 1.055999755859375, "step": 1184 }, { "epoch": 1.448107448107448, "grad_norm": 0.6565649509429932, "learning_rate": 1.3109400726555179e-05, "loss": 0.8509088754653931, "step": 1186 }, { "epoch": 1.4505494505494505, "grad_norm": 2.6168346405029297, "learning_rate": 1.3084528589374718e-05, "loss": 0.7348777651786804, "step": 1188 }, { "epoch": 1.452991452991453, "grad_norm": 1.5224627256393433, "learning_rate": 1.305963961809765e-05, "loss": 0.9267134666442871, "step": 1190 }, { "epoch": 1.4554334554334554, "grad_norm": 0.7623134255409241, "learning_rate": 1.3034734013720669e-05, "loss": 0.8056920170783997, "step": 1192 }, { "epoch": 1.4578754578754578, "grad_norm": 1.4244619607925415, "learning_rate": 1.3009811977374784e-05, "loss": 0.6724956631660461, "step": 1194 }, { "epoch": 1.4603174603174602, "grad_norm": 0.7519621253013611, "learning_rate": 1.2984873710323711e-05, "loss": 0.6628673076629639, "step": 1196 }, { "epoch": 1.462759462759463, "grad_norm": 0.7634888887405396, "learning_rate": 1.2959919413962242e-05, "loss": 0.8408687710762024, "step": 1198 }, { "epoch": 1.4652014652014653, "grad_norm": 1.9624353647232056, "learning_rate": 1.2934949289814611e-05, "loss": 1.1985151767730713, "step": 1200 }, { "epoch": 1.4676434676434678, "grad_norm": 1.5909016132354736, "learning_rate": 1.290996353953288e-05, "loss": 0.9667496681213379, "step": 1202 }, { "epoch": 1.4700854700854702, "grad_norm": 0.8254397511482239, "learning_rate": 1.2884962364895304e-05, "loss": 0.9893684983253479, "step": 1204 }, { "epoch": 1.4725274725274726, "grad_norm": 0.9778246879577637, "learning_rate": 1.2859945967804687e-05, "loss": 0.8230042457580566, "step": 1206 }, { "epoch": 1.474969474969475, "grad_norm": 2.8977315425872803, "learning_rate": 1.2834914550286789e-05, "loss": 0.7464233040809631, "step": 1208 }, { "epoch": 1.4774114774114775, "grad_norm": 16.703990936279297, "learning_rate": 1.2809868314488647e-05, "loss": 0.8318718671798706, "step": 1210 }, { "epoch": 1.47985347985348, "grad_norm": 1.9694427251815796, "learning_rate": 1.2784807462676983e-05, "loss": 0.8906052708625793, "step": 1212 }, { "epoch": 1.4822954822954824, "grad_norm": 0.8902061581611633, "learning_rate": 1.2759732197236548e-05, "loss": 0.9788769483566284, "step": 1214 }, { "epoch": 1.4847374847374848, "grad_norm": 0.8015345335006714, "learning_rate": 1.2734642720668494e-05, "loss": 0.9402112364768982, "step": 1216 }, { "epoch": 1.4871794871794872, "grad_norm": 2.7102816104888916, "learning_rate": 1.2709539235588739e-05, "loss": 0.27936387062072754, "step": 1218 }, { "epoch": 1.4896214896214897, "grad_norm": 0.5606179237365723, "learning_rate": 1.2684421944726323e-05, "loss": 0.7066472768783569, "step": 1220 }, { "epoch": 1.492063492063492, "grad_norm": 1.7472079992294312, "learning_rate": 1.2659291050921798e-05, "loss": 0.8000496029853821, "step": 1222 }, { "epoch": 1.4945054945054945, "grad_norm": 3.1667306423187256, "learning_rate": 1.263414675712554e-05, "loss": 0.733214259147644, "step": 1224 }, { "epoch": 1.496947496947497, "grad_norm": 1.6288788318634033, "learning_rate": 1.2608989266396165e-05, "loss": 0.8229939341545105, "step": 1226 }, { "epoch": 1.4993894993894994, "grad_norm": 3.6219799518585205, "learning_rate": 1.2583818781898855e-05, "loss": 0.4456430971622467, "step": 1228 }, { "epoch": 1.5018315018315018, "grad_norm": 1.921484351158142, "learning_rate": 1.2558635506903717e-05, "loss": 0.6831130981445312, "step": 1230 }, { "epoch": 1.5042735042735043, "grad_norm": 0.4906938970088959, "learning_rate": 1.253343964478417e-05, "loss": 0.6764166951179504, "step": 1232 }, { "epoch": 1.5067155067155067, "grad_norm": 1.23770272731781, "learning_rate": 1.250823139901527e-05, "loss": 0.9079239368438721, "step": 1234 }, { "epoch": 1.5091575091575091, "grad_norm": 0.9974614977836609, "learning_rate": 1.2483010973172077e-05, "loss": 0.9452921748161316, "step": 1236 }, { "epoch": 1.5115995115995116, "grad_norm": 0.9079129099845886, "learning_rate": 1.2457778570928026e-05, "loss": 0.8234338760375977, "step": 1238 }, { "epoch": 1.514041514041514, "grad_norm": 0.9488117098808289, "learning_rate": 1.2432534396053261e-05, "loss": 0.8415461778640747, "step": 1240 }, { "epoch": 1.5164835164835164, "grad_norm": 0.7722516059875488, "learning_rate": 1.2407278652413001e-05, "loss": 1.0288302898406982, "step": 1242 }, { "epoch": 1.5189255189255189, "grad_norm": 3.5721123218536377, "learning_rate": 1.2382011543965896e-05, "loss": 0.7554802298545837, "step": 1244 }, { "epoch": 1.5213675213675213, "grad_norm": 0.6691564917564392, "learning_rate": 1.2356733274762367e-05, "loss": 0.7608579397201538, "step": 1246 }, { "epoch": 1.5238095238095237, "grad_norm": 0.9692053198814392, "learning_rate": 1.2331444048942969e-05, "loss": 0.8119852542877197, "step": 1248 }, { "epoch": 1.5262515262515262, "grad_norm": 1.7576018571853638, "learning_rate": 1.2306144070736747e-05, "loss": 1.1432095766067505, "step": 1250 }, { "epoch": 1.5286935286935286, "grad_norm": 2.8032066822052, "learning_rate": 1.228083354445957e-05, "loss": 0.7118352055549622, "step": 1252 }, { "epoch": 1.531135531135531, "grad_norm": 1.4581559896469116, "learning_rate": 1.2255512674512491e-05, "loss": 0.9391320943832397, "step": 1254 }, { "epoch": 1.5335775335775335, "grad_norm": 0.9859986305236816, "learning_rate": 1.2230181665380101e-05, "loss": 1.0426268577575684, "step": 1256 }, { "epoch": 1.536019536019536, "grad_norm": 0.6827996373176575, "learning_rate": 1.220484072162887e-05, "loss": 0.35382741689682007, "step": 1258 }, { "epoch": 1.5384615384615383, "grad_norm": 4.84762716293335, "learning_rate": 1.2179490047905495e-05, "loss": 0.6097034215927124, "step": 1260 }, { "epoch": 1.5409035409035408, "grad_norm": 1.7744395732879639, "learning_rate": 1.2154129848935258e-05, "loss": 0.6083784103393555, "step": 1262 }, { "epoch": 1.5433455433455432, "grad_norm": 2.7440474033355713, "learning_rate": 1.2128760329520355e-05, "loss": 0.7916078567504883, "step": 1264 }, { "epoch": 1.5457875457875456, "grad_norm": 1.4891324043273926, "learning_rate": 1.210338169453825e-05, "loss": 0.8106079697608948, "step": 1266 }, { "epoch": 1.5482295482295483, "grad_norm": 0.9212846159934998, "learning_rate": 1.2077994148940033e-05, "loss": 0.8362663984298706, "step": 1268 }, { "epoch": 1.5506715506715507, "grad_norm": 1.9237959384918213, "learning_rate": 1.2052597897748746e-05, "loss": 0.4818616807460785, "step": 1270 }, { "epoch": 1.5531135531135531, "grad_norm": 3.0629465579986572, "learning_rate": 1.202719314605773e-05, "loss": 1.0731854438781738, "step": 1272 }, { "epoch": 1.5555555555555556, "grad_norm": 1.05351984500885, "learning_rate": 1.2001780099028988e-05, "loss": 0.943490207195282, "step": 1274 }, { "epoch": 1.557997557997558, "grad_norm": 4.432197570800781, "learning_rate": 1.1976358961891504e-05, "loss": 1.3021904230117798, "step": 1276 }, { "epoch": 1.5604395604395604, "grad_norm": 1.0480554103851318, "learning_rate": 1.1950929939939596e-05, "loss": 0.7510530948638916, "step": 1278 }, { "epoch": 1.5628815628815629, "grad_norm": 1.6610548496246338, "learning_rate": 1.192549323853126e-05, "loss": 0.9113296270370483, "step": 1280 }, { "epoch": 1.5653235653235653, "grad_norm": 4.827365875244141, "learning_rate": 1.1900049063086508e-05, "loss": 0.6182503700256348, "step": 1282 }, { "epoch": 1.5677655677655677, "grad_norm": 0.8534301519393921, "learning_rate": 1.1874597619085712e-05, "loss": 0.9308310151100159, "step": 1284 }, { "epoch": 1.5702075702075702, "grad_norm": 0.9158720970153809, "learning_rate": 1.1849139112067937e-05, "loss": 0.9331011772155762, "step": 1286 }, { "epoch": 1.5726495726495726, "grad_norm": 12.01048755645752, "learning_rate": 1.18236737476293e-05, "loss": 0.490848183631897, "step": 1288 }, { "epoch": 1.575091575091575, "grad_norm": 0.9270315170288086, "learning_rate": 1.1798201731421286e-05, "loss": 0.7262513637542725, "step": 1290 }, { "epoch": 1.5775335775335775, "grad_norm": 1.8197249174118042, "learning_rate": 1.1772723269149096e-05, "loss": 0.43270692229270935, "step": 1292 }, { "epoch": 1.5799755799755801, "grad_norm": 1.0564115047454834, "learning_rate": 1.1747238566569993e-05, "loss": 0.6380181908607483, "step": 1294 }, { "epoch": 1.5824175824175826, "grad_norm": 0.937374472618103, "learning_rate": 1.1721747829491639e-05, "loss": 0.9579664468765259, "step": 1296 }, { "epoch": 1.584859584859585, "grad_norm": 0.9189720749855042, "learning_rate": 1.169625126377042e-05, "loss": 1.1132162809371948, "step": 1298 }, { "epoch": 1.5873015873015874, "grad_norm": 1.5094869136810303, "learning_rate": 1.1670749075309798e-05, "loss": 0.9595221877098083, "step": 1300 }, { "epoch": 1.5897435897435899, "grad_norm": 3.5550084114074707, "learning_rate": 1.164524147005864e-05, "loss": 1.0293970108032227, "step": 1302 }, { "epoch": 1.5921855921855923, "grad_norm": 8.070341110229492, "learning_rate": 1.1619728654009561e-05, "loss": 0.9469819664955139, "step": 1304 }, { "epoch": 1.5946275946275947, "grad_norm": 2.206435203552246, "learning_rate": 1.1594210833197252e-05, "loss": 0.6112901568412781, "step": 1306 }, { "epoch": 1.5970695970695972, "grad_norm": 0.7995406985282898, "learning_rate": 1.156868821369683e-05, "loss": 0.9325740337371826, "step": 1308 }, { "epoch": 1.5995115995115996, "grad_norm": 1.177374243736267, "learning_rate": 1.1543161001622154e-05, "loss": 0.821311891078949, "step": 1310 }, { "epoch": 1.601953601953602, "grad_norm": 1.0490672588348389, "learning_rate": 1.1517629403124175e-05, "loss": 0.8008186221122742, "step": 1312 }, { "epoch": 1.6043956043956045, "grad_norm": 1.078908085823059, "learning_rate": 1.1492093624389274e-05, "loss": 0.9607588648796082, "step": 1314 }, { "epoch": 1.606837606837607, "grad_norm": 0.9914792776107788, "learning_rate": 1.1466553871637585e-05, "loss": 1.0678871870040894, "step": 1316 }, { "epoch": 1.6092796092796093, "grad_norm": 0.9516023993492126, "learning_rate": 1.1441010351121332e-05, "loss": 0.927726686000824, "step": 1318 }, { "epoch": 1.6117216117216118, "grad_norm": 1.6526710987091064, "learning_rate": 1.1415463269123172e-05, "loss": 1.1496163606643677, "step": 1320 }, { "epoch": 1.6141636141636142, "grad_norm": 0.8162203431129456, "learning_rate": 1.1389912831954524e-05, "loss": 0.849646270275116, "step": 1322 }, { "epoch": 1.6166056166056166, "grad_norm": 0.7434989809989929, "learning_rate": 1.1364359245953897e-05, "loss": 1.0158569812774658, "step": 1324 }, { "epoch": 1.619047619047619, "grad_norm": 2.0639302730560303, "learning_rate": 1.1338802717485234e-05, "loss": 0.6589023470878601, "step": 1326 }, { "epoch": 1.6214896214896215, "grad_norm": 1.0379024744033813, "learning_rate": 1.1313243452936235e-05, "loss": 0.9295322895050049, "step": 1328 }, { "epoch": 1.623931623931624, "grad_norm": 1.181497573852539, "learning_rate": 1.1287681658716706e-05, "loss": 1.0116742849349976, "step": 1330 }, { "epoch": 1.6263736263736264, "grad_norm": 4.863892078399658, "learning_rate": 1.1262117541256872e-05, "loss": 0.8862733244895935, "step": 1332 }, { "epoch": 1.6288156288156288, "grad_norm": 0.7002055644989014, "learning_rate": 1.1236551307005722e-05, "loss": 0.9096848368644714, "step": 1334 }, { "epoch": 1.6312576312576312, "grad_norm": 0.6345333456993103, "learning_rate": 1.1210983162429347e-05, "loss": 0.5657076835632324, "step": 1336 }, { "epoch": 1.6336996336996337, "grad_norm": 2.7891440391540527, "learning_rate": 1.1185413314009254e-05, "loss": 0.9815369248390198, "step": 1338 }, { "epoch": 1.636141636141636, "grad_norm": 6.338902473449707, "learning_rate": 1.1159841968240714e-05, "loss": 0.5724242925643921, "step": 1340 }, { "epoch": 1.6385836385836385, "grad_norm": 2.7349283695220947, "learning_rate": 1.1134269331631096e-05, "loss": 0.4281773269176483, "step": 1342 }, { "epoch": 1.641025641025641, "grad_norm": 1.9846585988998413, "learning_rate": 1.1108695610698187e-05, "loss": 1.0027917623519897, "step": 1344 }, { "epoch": 1.6434676434676434, "grad_norm": 0.6990553140640259, "learning_rate": 1.1083121011968531e-05, "loss": 0.9550279378890991, "step": 1346 }, { "epoch": 1.6459096459096458, "grad_norm": 2.958153486251831, "learning_rate": 1.1057545741975768e-05, "loss": 0.6426241993904114, "step": 1348 }, { "epoch": 1.6483516483516483, "grad_norm": 0.8284672498703003, "learning_rate": 1.1031970007258947e-05, "loss": 0.8278497457504272, "step": 1350 }, { "epoch": 1.6507936507936507, "grad_norm": 0.7631545066833496, "learning_rate": 1.1006394014360882e-05, "loss": 0.9407053589820862, "step": 1352 }, { "epoch": 1.6532356532356531, "grad_norm": 4.05110502243042, "learning_rate": 1.0980817969826458e-05, "loss": 0.9099552035331726, "step": 1354 }, { "epoch": 1.6556776556776556, "grad_norm": 1.0000635385513306, "learning_rate": 1.0955242080200994e-05, "loss": 0.9383828639984131, "step": 1356 }, { "epoch": 1.658119658119658, "grad_norm": 1.1321988105773926, "learning_rate": 1.0929666552028545e-05, "loss": 0.52699214220047, "step": 1358 }, { "epoch": 1.6605616605616604, "grad_norm": 1.246857762336731, "learning_rate": 1.0904091591850255e-05, "loss": 0.6198506355285645, "step": 1360 }, { "epoch": 1.6630036630036629, "grad_norm": 1.0080903768539429, "learning_rate": 1.0878517406202674e-05, "loss": 0.9911934733390808, "step": 1362 }, { "epoch": 1.6654456654456653, "grad_norm": 0.8918383121490479, "learning_rate": 1.0852944201616097e-05, "loss": 1.0504215955734253, "step": 1364 }, { "epoch": 1.6678876678876677, "grad_norm": 1.0392669439315796, "learning_rate": 1.082737218461291e-05, "loss": 1.0229471921920776, "step": 1366 }, { "epoch": 1.6703296703296702, "grad_norm": 0.8570772409439087, "learning_rate": 1.080180156170589e-05, "loss": 1.049717903137207, "step": 1368 }, { "epoch": 1.6727716727716728, "grad_norm": 0.9958022236824036, "learning_rate": 1.0776232539396567e-05, "loss": 1.006693720817566, "step": 1370 }, { "epoch": 1.6752136752136753, "grad_norm": 0.882525622844696, "learning_rate": 1.0750665324173542e-05, "loss": 0.615381121635437, "step": 1372 }, { "epoch": 1.6776556776556777, "grad_norm": 0.9473522305488586, "learning_rate": 1.0725100122510819e-05, "loss": 0.36105355620384216, "step": 1374 }, { "epoch": 1.6800976800976801, "grad_norm": 3.743011236190796, "learning_rate": 1.0699537140866146e-05, "loss": 1.1695616245269775, "step": 1376 }, { "epoch": 1.6825396825396826, "grad_norm": 0.823453962802887, "learning_rate": 1.0673976585679341e-05, "loss": 0.9196591377258301, "step": 1378 }, { "epoch": 1.684981684981685, "grad_norm": 0.5954387187957764, "learning_rate": 1.0648418663370628e-05, "loss": 0.7695765495300293, "step": 1380 }, { "epoch": 1.6874236874236874, "grad_norm": 2.546109437942505, "learning_rate": 1.0622863580338967e-05, "loss": 1.0195831060409546, "step": 1382 }, { "epoch": 1.6898656898656899, "grad_norm": 0.7414639592170715, "learning_rate": 1.0597311542960385e-05, "loss": 0.8976457715034485, "step": 1384 }, { "epoch": 1.6923076923076923, "grad_norm": 0.6246572732925415, "learning_rate": 1.0571762757586321e-05, "loss": 0.9752371907234192, "step": 1386 }, { "epoch": 1.6947496947496947, "grad_norm": 0.8245002627372742, "learning_rate": 1.0546217430541947e-05, "loss": 0.9225857257843018, "step": 1388 }, { "epoch": 1.6971916971916972, "grad_norm": 0.7589647769927979, "learning_rate": 1.0520675768124507e-05, "loss": 0.47266364097595215, "step": 1390 }, { "epoch": 1.6996336996336996, "grad_norm": 0.8037369847297668, "learning_rate": 1.0495137976601648e-05, "loss": 0.8273367881774902, "step": 1392 }, { "epoch": 1.702075702075702, "grad_norm": 0.9903712868690491, "learning_rate": 1.0469604262209765e-05, "loss": 0.7290286421775818, "step": 1394 }, { "epoch": 1.7045177045177047, "grad_norm": 2.0067808628082275, "learning_rate": 1.0444074831152317e-05, "loss": 0.9373266100883484, "step": 1396 }, { "epoch": 1.7069597069597071, "grad_norm": 20.187288284301758, "learning_rate": 1.0418549889598175e-05, "loss": 0.8240612149238586, "step": 1398 }, { "epoch": 1.7094017094017095, "grad_norm": 4.022505283355713, "learning_rate": 1.0393029643679962e-05, "loss": 0.44202497601509094, "step": 1400 }, { "epoch": 1.711843711843712, "grad_norm": 5.573869705200195, "learning_rate": 1.0367514299492366e-05, "loss": 0.9583691954612732, "step": 1402 }, { "epoch": 1.7142857142857144, "grad_norm": 1.5996133089065552, "learning_rate": 1.0342004063090503e-05, "loss": 1.0398838520050049, "step": 1404 }, { "epoch": 1.7167277167277168, "grad_norm": 2.385746717453003, "learning_rate": 1.0316499140488232e-05, "loss": 0.4760570824146271, "step": 1406 }, { "epoch": 1.7191697191697193, "grad_norm": 0.8254954218864441, "learning_rate": 1.0290999737656497e-05, "loss": 0.907942533493042, "step": 1408 }, { "epoch": 1.7216117216117217, "grad_norm": 8.329554557800293, "learning_rate": 1.026550606052168e-05, "loss": 0.6862547397613525, "step": 1410 }, { "epoch": 1.7240537240537241, "grad_norm": 2.332361936569214, "learning_rate": 1.0240018314963909e-05, "loss": 0.8768781423568726, "step": 1412 }, { "epoch": 1.7264957264957266, "grad_norm": 2.285680055618286, "learning_rate": 1.0214536706815418e-05, "loss": 0.986327588558197, "step": 1414 }, { "epoch": 1.728937728937729, "grad_norm": 3.5364201068878174, "learning_rate": 1.0189061441858873e-05, "loss": 0.8355549573898315, "step": 1416 }, { "epoch": 1.7313797313797314, "grad_norm": 0.8595628142356873, "learning_rate": 1.0163592725825712e-05, "loss": 0.8929445743560791, "step": 1418 }, { "epoch": 1.7338217338217339, "grad_norm": 15.206433296203613, "learning_rate": 1.0138130764394496e-05, "loss": 0.7870601415634155, "step": 1420 }, { "epoch": 1.7362637362637363, "grad_norm": 2.8101370334625244, "learning_rate": 1.0112675763189224e-05, "loss": 0.7534129023551941, "step": 1422 }, { "epoch": 1.7387057387057387, "grad_norm": 1.858702540397644, "learning_rate": 1.0087227927777696e-05, "loss": 0.8370426893234253, "step": 1424 }, { "epoch": 1.7411477411477412, "grad_norm": 2.0665295124053955, "learning_rate": 1.006178746366984e-05, "loss": 0.6909109354019165, "step": 1426 }, { "epoch": 1.7435897435897436, "grad_norm": 0.9323246479034424, "learning_rate": 1.0036354576316052e-05, "loss": 1.014011263847351, "step": 1428 }, { "epoch": 1.746031746031746, "grad_norm": 1.75360107421875, "learning_rate": 1.0010929471105548e-05, "loss": 1.2392351627349854, "step": 1430 }, { "epoch": 1.7484737484737485, "grad_norm": 1.979491949081421, "learning_rate": 9.98551235336469e-06, "loss": 0.6340602040290833, "step": 1432 }, { "epoch": 1.750915750915751, "grad_norm": 2.876166343688965, "learning_rate": 9.960103428355337e-06, "loss": 0.7525686621665955, "step": 1434 }, { "epoch": 1.7533577533577533, "grad_norm": 1.366552710533142, "learning_rate": 9.934702901273187e-06, "loss": 0.6044411063194275, "step": 1436 }, { "epoch": 1.7557997557997558, "grad_norm": 0.689400315284729, "learning_rate": 9.90931097724612e-06, "loss": 0.4377739727497101, "step": 1438 }, { "epoch": 1.7582417582417582, "grad_norm": 0.8386373519897461, "learning_rate": 9.883927861332538e-06, "loss": 0.909875214099884, "step": 1440 }, { "epoch": 1.7606837606837606, "grad_norm": 7.745026111602783, "learning_rate": 9.85855375851971e-06, "loss": 0.7949923872947693, "step": 1442 }, { "epoch": 1.763125763125763, "grad_norm": 2.948460340499878, "learning_rate": 9.833188873722122e-06, "loss": 0.6595785021781921, "step": 1444 }, { "epoch": 1.7655677655677655, "grad_norm": 0.7448163032531738, "learning_rate": 9.80783341177981e-06, "loss": 1.0280483961105347, "step": 1446 }, { "epoch": 1.768009768009768, "grad_norm": 0.7969598770141602, "learning_rate": 9.782487577456724e-06, "loss": 1.0123943090438843, "step": 1448 }, { "epoch": 1.7704517704517704, "grad_norm": 0.9583572149276733, "learning_rate": 9.75715157543905e-06, "loss": 0.8486643433570862, "step": 1450 }, { "epoch": 1.7728937728937728, "grad_norm": 2.09142804145813, "learning_rate": 9.731825610333587e-06, "loss": 0.3455406129360199, "step": 1452 }, { "epoch": 1.7753357753357752, "grad_norm": 0.9442964196205139, "learning_rate": 9.706509886666067e-06, "loss": 0.8303570747375488, "step": 1454 }, { "epoch": 1.7777777777777777, "grad_norm": 1.240134358406067, "learning_rate": 9.681204608879518e-06, "loss": 0.5113586187362671, "step": 1456 }, { "epoch": 1.7802197802197801, "grad_norm": 1.1532829999923706, "learning_rate": 9.655909981332614e-06, "loss": 0.8892757892608643, "step": 1458 }, { "epoch": 1.7826617826617825, "grad_norm": 1.5256012678146362, "learning_rate": 9.63062620829801e-06, "loss": 0.8083629608154297, "step": 1460 }, { "epoch": 1.785103785103785, "grad_norm": 1.8043534755706787, "learning_rate": 9.605353493960717e-06, "loss": 0.9189132452011108, "step": 1462 }, { "epoch": 1.7875457875457874, "grad_norm": 0.841884434223175, "learning_rate": 9.580092042416427e-06, "loss": 0.6249831318855286, "step": 1464 }, { "epoch": 1.7899877899877898, "grad_norm": 2.1716599464416504, "learning_rate": 9.554842057669886e-06, "loss": 0.6827890872955322, "step": 1466 }, { "epoch": 1.7924297924297923, "grad_norm": 3.5236616134643555, "learning_rate": 9.529603743633229e-06, "loss": 0.7608170509338379, "step": 1468 }, { "epoch": 1.7948717948717947, "grad_norm": 1.99154531955719, "learning_rate": 9.504377304124346e-06, "loss": 0.9152241945266724, "step": 1470 }, { "epoch": 1.7973137973137974, "grad_norm": 0.8060831427574158, "learning_rate": 9.47916294286523e-06, "loss": 0.8515353202819824, "step": 1472 }, { "epoch": 1.7997557997557998, "grad_norm": 5.8603363037109375, "learning_rate": 9.453960863480333e-06, "loss": 0.5703706741333008, "step": 1474 }, { "epoch": 1.8021978021978022, "grad_norm": 7.417604446411133, "learning_rate": 9.428771269494926e-06, "loss": 0.7551999092102051, "step": 1476 }, { "epoch": 1.8046398046398047, "grad_norm": 1.034999966621399, "learning_rate": 9.403594364333444e-06, "loss": 0.6955189108848572, "step": 1478 }, { "epoch": 1.807081807081807, "grad_norm": 0.9549148678779602, "learning_rate": 9.378430351317854e-06, "loss": 0.42793938517570496, "step": 1480 }, { "epoch": 1.8095238095238095, "grad_norm": 1.3916822671890259, "learning_rate": 9.353279433666014e-06, "loss": 0.6840672492980957, "step": 1482 }, { "epoch": 1.811965811965812, "grad_norm": 0.854276716709137, "learning_rate": 9.328141814490021e-06, "loss": 0.893316924571991, "step": 1484 }, { "epoch": 1.8144078144078144, "grad_norm": 1.491588830947876, "learning_rate": 9.303017696794578e-06, "loss": 0.872158944606781, "step": 1486 }, { "epoch": 1.8168498168498168, "grad_norm": 1.8033097982406616, "learning_rate": 9.277907283475358e-06, "loss": 0.6238676905632019, "step": 1488 }, { "epoch": 1.8192918192918193, "grad_norm": 0.8885567784309387, "learning_rate": 9.252810777317351e-06, "loss": 0.6716984510421753, "step": 1490 }, { "epoch": 1.8217338217338217, "grad_norm": 1.0771310329437256, "learning_rate": 9.227728380993253e-06, "loss": 0.8512567281723022, "step": 1492 }, { "epoch": 1.8241758241758241, "grad_norm": 1.4891635179519653, "learning_rate": 9.202660297061798e-06, "loss": 0.5891348123550415, "step": 1494 }, { "epoch": 1.8266178266178266, "grad_norm": 1.5767910480499268, "learning_rate": 9.177606727966142e-06, "loss": 0.8717406392097473, "step": 1496 }, { "epoch": 1.8290598290598292, "grad_norm": 0.8637403845787048, "learning_rate": 9.15256787603222e-06, "loss": 1.3341138362884521, "step": 1498 }, { "epoch": 1.8315018315018317, "grad_norm": 1.3066986799240112, "learning_rate": 9.127543943467128e-06, "loss": 1.2278974056243896, "step": 1500 }, { "epoch": 1.833943833943834, "grad_norm": 1.3648895025253296, "learning_rate": 9.102535132357457e-06, "loss": 0.6873140335083008, "step": 1502 }, { "epoch": 1.8363858363858365, "grad_norm": 0.45770537853240967, "learning_rate": 9.077541644667697e-06, "loss": 0.7067763209342957, "step": 1504 }, { "epoch": 1.838827838827839, "grad_norm": 2.4009127616882324, "learning_rate": 9.052563682238587e-06, "loss": 0.6803405284881592, "step": 1506 }, { "epoch": 1.8412698412698414, "grad_norm": 1.205779790878296, "learning_rate": 9.02760144678548e-06, "loss": 0.6593731641769409, "step": 1508 }, { "epoch": 1.8437118437118438, "grad_norm": 0.640776515007019, "learning_rate": 9.00265513989673e-06, "loss": 0.8603323101997375, "step": 1510 }, { "epoch": 1.8461538461538463, "grad_norm": 1.0433986186981201, "learning_rate": 8.977724963032056e-06, "loss": 0.8412877917289734, "step": 1512 }, { "epoch": 1.8485958485958487, "grad_norm": 1.245303750038147, "learning_rate": 8.952811117520914e-06, "loss": 1.0396430492401123, "step": 1514 }, { "epoch": 1.8510378510378511, "grad_norm": 1.5737297534942627, "learning_rate": 8.927913804560864e-06, "loss": 0.6088389754295349, "step": 1516 }, { "epoch": 1.8534798534798536, "grad_norm": 0.9162042140960693, "learning_rate": 8.903033225215975e-06, "loss": 1.1635559797286987, "step": 1518 }, { "epoch": 1.855921855921856, "grad_norm": 1.7877050638198853, "learning_rate": 8.878169580415154e-06, "loss": 0.631327748298645, "step": 1520 }, { "epoch": 1.8583638583638584, "grad_norm": 3.03653883934021, "learning_rate": 8.85332307095057e-06, "loss": 0.902554452419281, "step": 1522 }, { "epoch": 1.8608058608058609, "grad_norm": 1.9247746467590332, "learning_rate": 8.828493897475998e-06, "loss": 0.8101663589477539, "step": 1524 }, { "epoch": 1.8632478632478633, "grad_norm": 1.386506199836731, "learning_rate": 8.803682260505216e-06, "loss": 0.7383776903152466, "step": 1526 }, { "epoch": 1.8656898656898657, "grad_norm": 1.1092829704284668, "learning_rate": 8.778888360410385e-06, "loss": 0.7297862768173218, "step": 1528 }, { "epoch": 1.8681318681318682, "grad_norm": 0.7110038995742798, "learning_rate": 8.754112397420426e-06, "loss": 0.8971010446548462, "step": 1530 }, { "epoch": 1.8705738705738706, "grad_norm": 1.9106638431549072, "learning_rate": 8.729354571619404e-06, "loss": 0.7592481374740601, "step": 1532 }, { "epoch": 1.873015873015873, "grad_norm": 0.805887758731842, "learning_rate": 8.704615082944914e-06, "loss": 0.8079948425292969, "step": 1534 }, { "epoch": 1.8754578754578755, "grad_norm": 0.6133478283882141, "learning_rate": 8.679894131186462e-06, "loss": 1.000016450881958, "step": 1536 }, { "epoch": 1.877899877899878, "grad_norm": 0.6692440509796143, "learning_rate": 8.655191915983859e-06, "loss": 0.8313310742378235, "step": 1538 }, { "epoch": 1.8803418803418803, "grad_norm": 0.9560274481773376, "learning_rate": 8.630508636825602e-06, "loss": 0.9431169033050537, "step": 1540 }, { "epoch": 1.8827838827838828, "grad_norm": 1.700568675994873, "learning_rate": 8.605844493047269e-06, "loss": 0.9815627336502075, "step": 1542 }, { "epoch": 1.8852258852258852, "grad_norm": 1.308621883392334, "learning_rate": 8.581199683829899e-06, "loss": 0.7461444735527039, "step": 1544 }, { "epoch": 1.8876678876678876, "grad_norm": 1.2452470064163208, "learning_rate": 8.556574408198399e-06, "loss": 0.9441168904304504, "step": 1546 }, { "epoch": 1.89010989010989, "grad_norm": 3.298710823059082, "learning_rate": 8.531968865019919e-06, "loss": 0.8527262210845947, "step": 1548 }, { "epoch": 1.8925518925518925, "grad_norm": 0.8520393967628479, "learning_rate": 8.507383253002264e-06, "loss": 0.47991418838500977, "step": 1550 }, { "epoch": 1.894993894993895, "grad_norm": 1.5283163785934448, "learning_rate": 8.482817770692276e-06, "loss": 0.8953297138214111, "step": 1552 }, { "epoch": 1.8974358974358974, "grad_norm": 2.6013505458831787, "learning_rate": 8.458272616474226e-06, "loss": 0.598823070526123, "step": 1554 }, { "epoch": 1.8998778998778998, "grad_norm": 6.25869083404541, "learning_rate": 8.43374798856824e-06, "loss": 1.0903539657592773, "step": 1556 }, { "epoch": 1.9023199023199022, "grad_norm": 0.7708169221878052, "learning_rate": 8.40924408502866e-06, "loss": 0.6560428738594055, "step": 1558 }, { "epoch": 1.9047619047619047, "grad_norm": 1.3442054986953735, "learning_rate": 8.384761103742476e-06, "loss": 0.553628146648407, "step": 1560 }, { "epoch": 1.907203907203907, "grad_norm": 0.8295760750770569, "learning_rate": 8.360299242427713e-06, "loss": 0.8809893727302551, "step": 1562 }, { "epoch": 1.9096459096459095, "grad_norm": 1.2123860120773315, "learning_rate": 8.335858698631829e-06, "loss": 0.7752953171730042, "step": 1564 }, { "epoch": 1.912087912087912, "grad_norm": 1.137731909751892, "learning_rate": 8.311439669730139e-06, "loss": 0.937446653842926, "step": 1566 }, { "epoch": 1.9145299145299144, "grad_norm": 1.4613070487976074, "learning_rate": 8.287042352924206e-06, "loss": 0.9597198963165283, "step": 1568 }, { "epoch": 1.9169719169719168, "grad_norm": 7.560548305511475, "learning_rate": 8.26266694524024e-06, "loss": 0.6756553053855896, "step": 1570 }, { "epoch": 1.9194139194139193, "grad_norm": 0.7736316919326782, "learning_rate": 8.238313643527533e-06, "loss": 0.8379277586936951, "step": 1572 }, { "epoch": 1.9218559218559217, "grad_norm": 2.3948774337768555, "learning_rate": 8.213982644456856e-06, "loss": 0.7130874991416931, "step": 1574 }, { "epoch": 1.9242979242979243, "grad_norm": 2.804558753967285, "learning_rate": 8.189674144518864e-06, "loss": 0.7871428728103638, "step": 1576 }, { "epoch": 1.9267399267399268, "grad_norm": 3.343308925628662, "learning_rate": 8.165388340022507e-06, "loss": 0.7644234895706177, "step": 1578 }, { "epoch": 1.9291819291819292, "grad_norm": 0.9689104557037354, "learning_rate": 8.14112542709347e-06, "loss": 0.9481227397918701, "step": 1580 }, { "epoch": 1.9316239316239316, "grad_norm": 0.9340876936912537, "learning_rate": 8.116885601672557e-06, "loss": 0.2258923351764679, "step": 1582 }, { "epoch": 1.934065934065934, "grad_norm": 1.9040846824645996, "learning_rate": 8.09266905951413e-06, "loss": 0.5065496563911438, "step": 1584 }, { "epoch": 1.9365079365079365, "grad_norm": 2.174138069152832, "learning_rate": 8.068475996184527e-06, "loss": 0.5920478701591492, "step": 1586 }, { "epoch": 1.938949938949939, "grad_norm": 0.8130704760551453, "learning_rate": 8.044306607060466e-06, "loss": 0.9720399379730225, "step": 1588 }, { "epoch": 1.9413919413919414, "grad_norm": 0.833109974861145, "learning_rate": 8.02016108732748e-06, "loss": 1.0517313480377197, "step": 1590 }, { "epoch": 1.9438339438339438, "grad_norm": 2.0496108531951904, "learning_rate": 7.996039631978352e-06, "loss": 1.0347234010696411, "step": 1592 }, { "epoch": 1.9462759462759462, "grad_norm": 1.0047261714935303, "learning_rate": 7.97194243581151e-06, "loss": 0.6489905118942261, "step": 1594 }, { "epoch": 1.9487179487179487, "grad_norm": 1.0025273561477661, "learning_rate": 7.947869693429486e-06, "loss": 0.568684458732605, "step": 1596 }, { "epoch": 1.9511599511599511, "grad_norm": 1.1909536123275757, "learning_rate": 7.923821599237322e-06, "loss": 0.6664155125617981, "step": 1598 }, { "epoch": 1.9536019536019538, "grad_norm": 1.6859694719314575, "learning_rate": 7.899798347441005e-06, "loss": 0.7015742063522339, "step": 1600 }, { "epoch": 1.9560439560439562, "grad_norm": 0.6844836473464966, "learning_rate": 7.87580013204591e-06, "loss": 0.9169449210166931, "step": 1602 }, { "epoch": 1.9584859584859586, "grad_norm": 2.2930445671081543, "learning_rate": 7.85182714685522e-06, "loss": 0.8345751762390137, "step": 1604 }, { "epoch": 1.960927960927961, "grad_norm": 2.5689308643341064, "learning_rate": 7.827879585468363e-06, "loss": 1.1974244117736816, "step": 1606 }, { "epoch": 1.9633699633699635, "grad_norm": 1.2992660999298096, "learning_rate": 7.803957641279457e-06, "loss": 1.1730899810791016, "step": 1608 }, { "epoch": 1.965811965811966, "grad_norm": 1.0391148328781128, "learning_rate": 7.780061507475738e-06, "loss": 0.9335651397705078, "step": 1610 }, { "epoch": 1.9682539682539684, "grad_norm": 3.6143672466278076, "learning_rate": 7.756191377036004e-06, "loss": 0.8546837568283081, "step": 1612 }, { "epoch": 1.9706959706959708, "grad_norm": 0.9346309304237366, "learning_rate": 7.732347442729062e-06, "loss": 1.0305918455123901, "step": 1614 }, { "epoch": 1.9731379731379732, "grad_norm": 0.9905077815055847, "learning_rate": 7.708529897112158e-06, "loss": 0.8775286674499512, "step": 1616 }, { "epoch": 1.9755799755799757, "grad_norm": 0.6666707396507263, "learning_rate": 7.684738932529441e-06, "loss": 0.8464508056640625, "step": 1618 }, { "epoch": 1.978021978021978, "grad_norm": 1.0916727781295776, "learning_rate": 7.660974741110387e-06, "loss": 1.035678505897522, "step": 1620 }, { "epoch": 1.9804639804639805, "grad_norm": 0.7847446203231812, "learning_rate": 7.637237514768265e-06, "loss": 0.6054593324661255, "step": 1622 }, { "epoch": 1.982905982905983, "grad_norm": 2.2946202754974365, "learning_rate": 7.613527445198576e-06, "loss": 0.45836907625198364, "step": 1624 }, { "epoch": 1.9853479853479854, "grad_norm": 9.175978660583496, "learning_rate": 7.5898447238775264e-06, "loss": 0.7117047905921936, "step": 1626 }, { "epoch": 1.9877899877899878, "grad_norm": 3.764439105987549, "learning_rate": 7.566189542060445e-06, "loss": 1.0821315050125122, "step": 1628 }, { "epoch": 1.9902319902319903, "grad_norm": 0.9272487163543701, "learning_rate": 7.5425620907802655e-06, "loss": 1.1502904891967773, "step": 1630 }, { "epoch": 1.9926739926739927, "grad_norm": 1.1519207954406738, "learning_rate": 7.518962560845986e-06, "loss": 0.8673257231712341, "step": 1632 }, { "epoch": 1.9951159951159951, "grad_norm": 0.6419383883476257, "learning_rate": 7.4953911428411085e-06, "loss": 0.75059574842453, "step": 1634 }, { "epoch": 1.9975579975579976, "grad_norm": 1.7326091527938843, "learning_rate": 7.4718480271221125e-06, "loss": 1.0258231163024902, "step": 1636 }, { "epoch": 2.0, "grad_norm": 0.8297693133354187, "learning_rate": 7.448333403816926e-06, "loss": 0.9197133779525757, "step": 1638 }, { "epoch": 2.0024420024420024, "grad_norm": 0.842572808265686, "learning_rate": 7.424847462823361e-06, "loss": 0.6060487627983093, "step": 1640 }, { "epoch": 2.004884004884005, "grad_norm": 1.4340323209762573, "learning_rate": 7.401390393807615e-06, "loss": 0.47724178433418274, "step": 1642 }, { "epoch": 2.0073260073260073, "grad_norm": 0.6351611018180847, "learning_rate": 7.37796238620272e-06, "loss": 0.5051848292350769, "step": 1644 }, { "epoch": 2.0097680097680097, "grad_norm": 3.20005202293396, "learning_rate": 7.3545636292070055e-06, "loss": 0.438951700925827, "step": 1646 }, { "epoch": 2.012210012210012, "grad_norm": 1.5867102146148682, "learning_rate": 7.331194311782597e-06, "loss": 0.528706431388855, "step": 1648 }, { "epoch": 2.0146520146520146, "grad_norm": 2.449397325515747, "learning_rate": 7.307854622653863e-06, "loss": 0.3387841284275055, "step": 1650 }, { "epoch": 2.017094017094017, "grad_norm": 5.5735626220703125, "learning_rate": 7.284544750305902e-06, "loss": 0.6135000586509705, "step": 1652 }, { "epoch": 2.0195360195360195, "grad_norm": 2.001272439956665, "learning_rate": 7.261264882983024e-06, "loss": 0.4525635838508606, "step": 1654 }, { "epoch": 2.021978021978022, "grad_norm": 1.0277931690216064, "learning_rate": 7.238015208687226e-06, "loss": 0.4565449655056, "step": 1656 }, { "epoch": 2.0244200244200243, "grad_norm": 1.670928716659546, "learning_rate": 7.214795915176671e-06, "loss": 0.4369199872016907, "step": 1658 }, { "epoch": 2.0268620268620268, "grad_norm": 1.4175351858139038, "learning_rate": 7.191607189964181e-06, "loss": 0.6220426559448242, "step": 1660 }, { "epoch": 2.029304029304029, "grad_norm": 1.3668700456619263, "learning_rate": 7.16844922031571e-06, "loss": 0.557952880859375, "step": 1662 }, { "epoch": 2.0317460317460316, "grad_norm": 0.9909934401512146, "learning_rate": 7.145322193248838e-06, "loss": 0.2245861142873764, "step": 1664 }, { "epoch": 2.034188034188034, "grad_norm": 6.492028713226318, "learning_rate": 7.122226295531267e-06, "loss": 0.40176424384117126, "step": 1666 }, { "epoch": 2.0366300366300365, "grad_norm": 0.9408150911331177, "learning_rate": 7.099161713679308e-06, "loss": 0.4665899872779846, "step": 1668 }, { "epoch": 2.039072039072039, "grad_norm": 1.566773533821106, "learning_rate": 7.07612863395636e-06, "loss": 0.6036043763160706, "step": 1670 }, { "epoch": 2.0415140415140414, "grad_norm": 1.2262314558029175, "learning_rate": 7.053127242371434e-06, "loss": 0.5682324171066284, "step": 1672 }, { "epoch": 2.043956043956044, "grad_norm": 0.9549220204353333, "learning_rate": 7.030157724677631e-06, "loss": 0.5213257074356079, "step": 1674 }, { "epoch": 2.0463980463980462, "grad_norm": 1.66300368309021, "learning_rate": 7.0072202663706405e-06, "loss": 0.3227638006210327, "step": 1676 }, { "epoch": 2.0488400488400487, "grad_norm": 1.2017823457717896, "learning_rate": 6.984315052687258e-06, "loss": 0.5378082990646362, "step": 1678 }, { "epoch": 2.051282051282051, "grad_norm": 0.8874703645706177, "learning_rate": 6.96144226860388e-06, "loss": 0.49545711278915405, "step": 1680 }, { "epoch": 2.0537240537240535, "grad_norm": 1.3648614883422852, "learning_rate": 6.938602098835e-06, "loss": 0.3199822008609772, "step": 1682 }, { "epoch": 2.056166056166056, "grad_norm": 2.5054514408111572, "learning_rate": 6.915794727831743e-06, "loss": 0.3839988112449646, "step": 1684 }, { "epoch": 2.0586080586080584, "grad_norm": 2.381861925125122, "learning_rate": 6.893020339780341e-06, "loss": 0.3781861662864685, "step": 1686 }, { "epoch": 2.061050061050061, "grad_norm": 2.2430403232574463, "learning_rate": 6.870279118600679e-06, "loss": 0.6202837824821472, "step": 1688 }, { "epoch": 2.0634920634920633, "grad_norm": 2.3006107807159424, "learning_rate": 6.847571247944791e-06, "loss": 0.46027785539627075, "step": 1690 }, { "epoch": 2.065934065934066, "grad_norm": 1.330511450767517, "learning_rate": 6.8248969111953825e-06, "loss": 0.31774628162384033, "step": 1692 }, { "epoch": 2.0683760683760686, "grad_norm": 1.060591459274292, "learning_rate": 6.80225629146434e-06, "loss": 0.47486642003059387, "step": 1694 }, { "epoch": 2.070818070818071, "grad_norm": 1.2816616296768188, "learning_rate": 6.7796495715912694e-06, "loss": 0.4364372789859772, "step": 1696 }, { "epoch": 2.0732600732600734, "grad_norm": 1.004572868347168, "learning_rate": 6.757076934142013e-06, "loss": 0.4288478493690491, "step": 1698 }, { "epoch": 2.075702075702076, "grad_norm": 1.2579833269119263, "learning_rate": 6.734538561407158e-06, "loss": 0.4020456075668335, "step": 1700 }, { "epoch": 2.0781440781440783, "grad_norm": 1.9755547046661377, "learning_rate": 6.712034635400593e-06, "loss": 0.26895561814308167, "step": 1702 }, { "epoch": 2.0805860805860807, "grad_norm": 2.1291699409484863, "learning_rate": 6.689565337858019e-06, "loss": 0.2938929796218872, "step": 1704 }, { "epoch": 2.083028083028083, "grad_norm": 1.6085429191589355, "learning_rate": 6.6671308502354844e-06, "loss": 0.19200079143047333, "step": 1706 }, { "epoch": 2.0854700854700856, "grad_norm": 3.190870761871338, "learning_rate": 6.644731353707927e-06, "loss": 0.5591083765029907, "step": 1708 }, { "epoch": 2.087912087912088, "grad_norm": 1.8141244649887085, "learning_rate": 6.622367029167702e-06, "loss": 0.2770901918411255, "step": 1710 }, { "epoch": 2.0903540903540905, "grad_norm": 4.159117221832275, "learning_rate": 6.600038057223126e-06, "loss": 0.394546240568161, "step": 1712 }, { "epoch": 2.092796092796093, "grad_norm": 1.3365147113800049, "learning_rate": 6.577744618197017e-06, "loss": 0.4641517996788025, "step": 1714 }, { "epoch": 2.0952380952380953, "grad_norm": 0.9762091636657715, "learning_rate": 6.555486892125243e-06, "loss": 0.32657861709594727, "step": 1716 }, { "epoch": 2.0976800976800978, "grad_norm": 1.1228184700012207, "learning_rate": 6.533265058755256e-06, "loss": 0.6660332083702087, "step": 1718 }, { "epoch": 2.1001221001221, "grad_norm": 0.9115656614303589, "learning_rate": 6.5110792975446515e-06, "loss": 0.48777180910110474, "step": 1720 }, { "epoch": 2.1025641025641026, "grad_norm": 1.8341835737228394, "learning_rate": 6.488929787659721e-06, "loss": 0.6992468237876892, "step": 1722 }, { "epoch": 2.105006105006105, "grad_norm": 1.1542752981185913, "learning_rate": 6.466816707973991e-06, "loss": 0.3529256284236908, "step": 1724 }, { "epoch": 2.1074481074481075, "grad_norm": 19.553573608398438, "learning_rate": 6.444740237066791e-06, "loss": 0.45478177070617676, "step": 1726 }, { "epoch": 2.10989010989011, "grad_norm": 0.6075100898742676, "learning_rate": 6.422700553221817e-06, "loss": 0.3780288100242615, "step": 1728 }, { "epoch": 2.1123321123321124, "grad_norm": 0.8796222805976868, "learning_rate": 6.400697834425662e-06, "loss": 0.42669016122817993, "step": 1730 }, { "epoch": 2.114774114774115, "grad_norm": 0.9508007764816284, "learning_rate": 6.378732258366421e-06, "loss": 0.34392303228378296, "step": 1732 }, { "epoch": 2.1172161172161172, "grad_norm": 0.28383857011795044, "learning_rate": 6.356804002432225e-06, "loss": 0.1719311773777008, "step": 1734 }, { "epoch": 2.1196581196581197, "grad_norm": 1.0620123147964478, "learning_rate": 6.334913243709809e-06, "loss": 0.5892414450645447, "step": 1736 }, { "epoch": 2.122100122100122, "grad_norm": 1.1223015785217285, "learning_rate": 6.313060158983104e-06, "loss": 0.3725854456424713, "step": 1738 }, { "epoch": 2.1245421245421245, "grad_norm": 0.83611661195755, "learning_rate": 6.291244924731794e-06, "loss": 0.4878256618976593, "step": 1740 }, { "epoch": 2.126984126984127, "grad_norm": 1.6328321695327759, "learning_rate": 6.26946771712988e-06, "loss": 0.43116888403892517, "step": 1742 }, { "epoch": 2.1294261294261294, "grad_norm": 1.3364393711090088, "learning_rate": 6.247728712044283e-06, "loss": 0.37520939111709595, "step": 1744 }, { "epoch": 2.131868131868132, "grad_norm": 1.3389878273010254, "learning_rate": 6.226028085033413e-06, "loss": 0.5751076936721802, "step": 1746 }, { "epoch": 2.1343101343101343, "grad_norm": 1.8287776708602905, "learning_rate": 6.2043660113457325e-06, "loss": 0.20154741406440735, "step": 1748 }, { "epoch": 2.1367521367521367, "grad_norm": 1.4840490818023682, "learning_rate": 6.182742665918373e-06, "loss": 0.6898431777954102, "step": 1750 }, { "epoch": 2.139194139194139, "grad_norm": 0.9770026803016663, "learning_rate": 6.161158223375705e-06, "loss": 0.3924607038497925, "step": 1752 }, { "epoch": 2.1416361416361416, "grad_norm": 0.7722997069358826, "learning_rate": 6.13961285802792e-06, "loss": 0.43264567852020264, "step": 1754 }, { "epoch": 2.144078144078144, "grad_norm": 0.9995938539505005, "learning_rate": 6.118106743869641e-06, "loss": 0.5022901296615601, "step": 1756 }, { "epoch": 2.1465201465201464, "grad_norm": 0.5033841133117676, "learning_rate": 6.096640054578511e-06, "loss": 0.21431341767311096, "step": 1758 }, { "epoch": 2.148962148962149, "grad_norm": 1.137976050376892, "learning_rate": 6.075212963513776e-06, "loss": 0.4715498685836792, "step": 1760 }, { "epoch": 2.1514041514041513, "grad_norm": 0.9455146193504333, "learning_rate": 6.053825643714912e-06, "loss": 0.4320064187049866, "step": 1762 }, { "epoch": 2.1538461538461537, "grad_norm": 2.8845789432525635, "learning_rate": 6.032478267900206e-06, "loss": 0.3226162791252136, "step": 1764 }, { "epoch": 2.156288156288156, "grad_norm": 0.9458103179931641, "learning_rate": 6.011171008465363e-06, "loss": 0.2729605436325073, "step": 1766 }, { "epoch": 2.1587301587301586, "grad_norm": 1.9725005626678467, "learning_rate": 5.989904037482128e-06, "loss": 0.3462582230567932, "step": 1768 }, { "epoch": 2.161172161172161, "grad_norm": 2.0717337131500244, "learning_rate": 5.968677526696882e-06, "loss": 0.38312727212905884, "step": 1770 }, { "epoch": 2.1636141636141635, "grad_norm": 0.8864312767982483, "learning_rate": 5.947491647529267e-06, "loss": 0.353424072265625, "step": 1772 }, { "epoch": 2.166056166056166, "grad_norm": 1.0762509107589722, "learning_rate": 5.9263465710707814e-06, "loss": 0.5065031051635742, "step": 1774 }, { "epoch": 2.1684981684981683, "grad_norm": 0.7869840264320374, "learning_rate": 5.905242468083423e-06, "loss": 0.5348921418190002, "step": 1776 }, { "epoch": 2.1709401709401708, "grad_norm": 2.1878821849823, "learning_rate": 5.884179508998299e-06, "loss": 0.27236610651016235, "step": 1778 }, { "epoch": 2.173382173382173, "grad_norm": 0.9579680562019348, "learning_rate": 5.863157863914239e-06, "loss": 0.43548962473869324, "step": 1780 }, { "epoch": 2.1758241758241756, "grad_norm": 1.8547625541687012, "learning_rate": 5.8421777025964446e-06, "loss": 0.5892971754074097, "step": 1782 }, { "epoch": 2.178266178266178, "grad_norm": 0.9620394706726074, "learning_rate": 5.8212391944750965e-06, "loss": 0.4943884313106537, "step": 1784 }, { "epoch": 2.1807081807081805, "grad_norm": 2.7082159519195557, "learning_rate": 5.8003425086440015e-06, "loss": 0.5425156354904175, "step": 1786 }, { "epoch": 2.183150183150183, "grad_norm": 4.512080669403076, "learning_rate": 5.779487813859218e-06, "loss": 0.3213900625705719, "step": 1788 }, { "epoch": 2.185592185592186, "grad_norm": 0.9232001900672913, "learning_rate": 5.758675278537692e-06, "loss": 0.46233004331588745, "step": 1790 }, { "epoch": 2.1880341880341883, "grad_norm": 3.6497743129730225, "learning_rate": 5.737905070755907e-06, "loss": 0.480983167886734, "step": 1792 }, { "epoch": 2.1904761904761907, "grad_norm": 1.0851823091506958, "learning_rate": 5.717177358248522e-06, "loss": 0.2742152810096741, "step": 1794 }, { "epoch": 2.192918192918193, "grad_norm": 2.418455123901367, "learning_rate": 5.696492308407002e-06, "loss": 0.3769078254699707, "step": 1796 }, { "epoch": 2.1953601953601956, "grad_norm": 0.7429922223091125, "learning_rate": 5.675850088278298e-06, "loss": 0.40196555852890015, "step": 1798 }, { "epoch": 2.197802197802198, "grad_norm": 1.3570210933685303, "learning_rate": 5.655250864563469e-06, "loss": 0.3571450412273407, "step": 1800 }, { "epoch": 2.2002442002442004, "grad_norm": 1.8261560201644897, "learning_rate": 5.63469480361635e-06, "loss": 0.4585352838039398, "step": 1802 }, { "epoch": 2.202686202686203, "grad_norm": 2.33353328704834, "learning_rate": 5.614182071442201e-06, "loss": 0.4414786100387573, "step": 1804 }, { "epoch": 2.2051282051282053, "grad_norm": 1.7394614219665527, "learning_rate": 5.59371283369637e-06, "loss": 0.5657206177711487, "step": 1806 }, { "epoch": 2.2075702075702077, "grad_norm": 1.2605091333389282, "learning_rate": 5.573287255682967e-06, "loss": 0.5330032706260681, "step": 1808 }, { "epoch": 2.21001221001221, "grad_norm": 0.2691946029663086, "learning_rate": 5.552905502353502e-06, "loss": 0.2634370028972626, "step": 1810 }, { "epoch": 2.2124542124542126, "grad_norm": 0.983033299446106, "learning_rate": 5.532567738305576e-06, "loss": 0.4326469302177429, "step": 1812 }, { "epoch": 2.214896214896215, "grad_norm": 0.23342449963092804, "learning_rate": 5.512274127781552e-06, "loss": 0.1571735441684723, "step": 1814 }, { "epoch": 2.2173382173382175, "grad_norm": 1.2843339443206787, "learning_rate": 5.492024834667205e-06, "loss": 0.5355442762374878, "step": 1816 }, { "epoch": 2.21978021978022, "grad_norm": 0.949738621711731, "learning_rate": 5.471820022490422e-06, "loss": 0.38218754529953003, "step": 1818 }, { "epoch": 2.2222222222222223, "grad_norm": 0.8940930962562561, "learning_rate": 5.451659854419882e-06, "loss": 0.49747079610824585, "step": 1820 }, { "epoch": 2.2246642246642248, "grad_norm": 0.6108909249305725, "learning_rate": 5.431544493263714e-06, "loss": 0.2641042172908783, "step": 1822 }, { "epoch": 2.227106227106227, "grad_norm": 0.776020884513855, "learning_rate": 5.411474101468208e-06, "loss": 0.39929312467575073, "step": 1824 }, { "epoch": 2.2295482295482296, "grad_norm": 1.3689377307891846, "learning_rate": 5.3914488411165e-06, "loss": 0.2978437840938568, "step": 1826 }, { "epoch": 2.231990231990232, "grad_norm": 2.88201904296875, "learning_rate": 5.3714688739272396e-06, "loss": 0.3673563599586487, "step": 1828 }, { "epoch": 2.2344322344322345, "grad_norm": 2.748995065689087, "learning_rate": 5.351534361253312e-06, "loss": 0.29434409737586975, "step": 1830 }, { "epoch": 2.236874236874237, "grad_norm": 1.0924896001815796, "learning_rate": 5.331645464080526e-06, "loss": 0.46827900409698486, "step": 1832 }, { "epoch": 2.2393162393162394, "grad_norm": 0.8829333186149597, "learning_rate": 5.311802343026302e-06, "loss": 0.5047073364257812, "step": 1834 }, { "epoch": 2.241758241758242, "grad_norm": 1.315529227256775, "learning_rate": 5.292005158338394e-06, "loss": 0.40334218740463257, "step": 1836 }, { "epoch": 2.244200244200244, "grad_norm": 2.0851378440856934, "learning_rate": 5.272254069893579e-06, "loss": 0.5924956798553467, "step": 1838 }, { "epoch": 2.2466422466422467, "grad_norm": 3.3136000633239746, "learning_rate": 5.2525492371963785e-06, "loss": 0.31219542026519775, "step": 1840 }, { "epoch": 2.249084249084249, "grad_norm": 0.728590190410614, "learning_rate": 5.232890819377765e-06, "loss": 0.46928393840789795, "step": 1842 }, { "epoch": 2.2515262515262515, "grad_norm": 0.7545236945152283, "learning_rate": 5.213278975193874e-06, "loss": 0.4485982060432434, "step": 1844 }, { "epoch": 2.253968253968254, "grad_norm": 2.7309439182281494, "learning_rate": 5.193713863024722e-06, "loss": 0.3948480784893036, "step": 1846 }, { "epoch": 2.2564102564102564, "grad_norm": 0.3682626187801361, "learning_rate": 5.174195640872937e-06, "loss": 0.3254821300506592, "step": 1848 }, { "epoch": 2.258852258852259, "grad_norm": 1.932949423789978, "learning_rate": 5.154724466362473e-06, "loss": 0.43265148997306824, "step": 1850 }, { "epoch": 2.2612942612942613, "grad_norm": 1.3246240615844727, "learning_rate": 5.135300496737335e-06, "loss": 0.5352158546447754, "step": 1852 }, { "epoch": 2.2637362637362637, "grad_norm": 0.8921855688095093, "learning_rate": 5.115923888860321e-06, "loss": 0.6833795309066772, "step": 1854 }, { "epoch": 2.266178266178266, "grad_norm": 1.2048108577728271, "learning_rate": 5.096594799211748e-06, "loss": 0.6043341755867004, "step": 1856 }, { "epoch": 2.2686202686202686, "grad_norm": 1.5590717792510986, "learning_rate": 5.0773133838881806e-06, "loss": 0.6158211827278137, "step": 1858 }, { "epoch": 2.271062271062271, "grad_norm": 0.9362733364105225, "learning_rate": 5.058079798601184e-06, "loss": 0.7204128503799438, "step": 1860 }, { "epoch": 2.2735042735042734, "grad_norm": 1.0600636005401611, "learning_rate": 5.0388941986760675e-06, "loss": 0.32139068841934204, "step": 1862 }, { "epoch": 2.275946275946276, "grad_norm": 0.8406434059143066, "learning_rate": 5.019756739050606e-06, "loss": 0.29253455996513367, "step": 1864 }, { "epoch": 2.2783882783882783, "grad_norm": 2.3749077320098877, "learning_rate": 5.000667574273821e-06, "loss": 0.39995700120925903, "step": 1866 }, { "epoch": 2.2808302808302807, "grad_norm": 0.8223360180854797, "learning_rate": 4.981626858504718e-06, "loss": 0.45448631048202515, "step": 1868 }, { "epoch": 2.283272283272283, "grad_norm": 0.7664647698402405, "learning_rate": 4.962634745511027e-06, "loss": 0.42726626992225647, "step": 1870 }, { "epoch": 2.2857142857142856, "grad_norm": 1.1275815963745117, "learning_rate": 4.943691388667989e-06, "loss": 0.4752141237258911, "step": 1872 }, { "epoch": 2.288156288156288, "grad_norm": 2.4123940467834473, "learning_rate": 4.924796940957099e-06, "loss": 0.13898348808288574, "step": 1874 }, { "epoch": 2.2905982905982905, "grad_norm": 1.461748480796814, "learning_rate": 4.905951554964876e-06, "loss": 0.6339101791381836, "step": 1876 }, { "epoch": 2.293040293040293, "grad_norm": 2.0306098461151123, "learning_rate": 4.887155382881625e-06, "loss": 0.347889244556427, "step": 1878 }, { "epoch": 2.2954822954822953, "grad_norm": 1.3482933044433594, "learning_rate": 4.868408576500216e-06, "loss": 0.340035080909729, "step": 1880 }, { "epoch": 2.2979242979242978, "grad_norm": 4.910120010375977, "learning_rate": 4.849711287214856e-06, "loss": 0.5293861031532288, "step": 1882 }, { "epoch": 2.3003663003663, "grad_norm": 1.0976754426956177, "learning_rate": 4.8310636660198616e-06, "loss": 0.31249868869781494, "step": 1884 }, { "epoch": 2.3028083028083026, "grad_norm": 1.3118927478790283, "learning_rate": 4.812465863508448e-06, "loss": 0.5040943026542664, "step": 1886 }, { "epoch": 2.305250305250305, "grad_norm": 0.9740425944328308, "learning_rate": 4.7939180298715055e-06, "loss": 0.42627787590026855, "step": 1888 }, { "epoch": 2.3076923076923075, "grad_norm": 1.1387205123901367, "learning_rate": 4.775420314896384e-06, "loss": 0.44656771421432495, "step": 1890 }, { "epoch": 2.31013431013431, "grad_norm": 2.269031047821045, "learning_rate": 4.756972867965698e-06, "loss": 0.5736830830574036, "step": 1892 }, { "epoch": 2.3125763125763124, "grad_norm": 0.9688907265663147, "learning_rate": 4.738575838056104e-06, "loss": 0.4964962601661682, "step": 1894 }, { "epoch": 2.315018315018315, "grad_norm": 1.7838249206542969, "learning_rate": 4.7202293737371066e-06, "loss": 0.4222361445426941, "step": 1896 }, { "epoch": 2.317460317460317, "grad_norm": 1.0578351020812988, "learning_rate": 4.7019336231698576e-06, "loss": 0.5211227536201477, "step": 1898 }, { "epoch": 2.3199023199023197, "grad_norm": 1.8706358671188354, "learning_rate": 4.6836887341059525e-06, "loss": 0.8980540633201599, "step": 1900 }, { "epoch": 2.3223443223443225, "grad_norm": 1.151202917098999, "learning_rate": 4.6654948538862475e-06, "loss": 0.4475945234298706, "step": 1902 }, { "epoch": 2.324786324786325, "grad_norm": 4.294190406799316, "learning_rate": 4.647352129439665e-06, "loss": 0.251365065574646, "step": 1904 }, { "epoch": 2.3272283272283274, "grad_norm": 1.604580044746399, "learning_rate": 4.629260707282009e-06, "loss": 0.190834641456604, "step": 1906 }, { "epoch": 2.32967032967033, "grad_norm": 1.1880110502243042, "learning_rate": 4.6112207335147704e-06, "loss": 0.2842097878456116, "step": 1908 }, { "epoch": 2.3321123321123323, "grad_norm": 2.0477302074432373, "learning_rate": 4.593232353823968e-06, "loss": 0.23184801638126373, "step": 1910 }, { "epoch": 2.3345543345543347, "grad_norm": 1.7173128128051758, "learning_rate": 4.575295713478956e-06, "loss": 0.40144017338752747, "step": 1912 }, { "epoch": 2.336996336996337, "grad_norm": 0.9430311322212219, "learning_rate": 4.557410957331249e-06, "loss": 0.5639522075653076, "step": 1914 }, { "epoch": 2.3394383394383396, "grad_norm": 3.2917191982269287, "learning_rate": 4.539578229813372e-06, "loss": 0.636457622051239, "step": 1916 }, { "epoch": 2.341880341880342, "grad_norm": 1.405510663986206, "learning_rate": 4.521797674937672e-06, "loss": 0.26978304982185364, "step": 1918 }, { "epoch": 2.3443223443223444, "grad_norm": 2.574928045272827, "learning_rate": 4.5040694362951625e-06, "loss": 0.3309711515903473, "step": 1920 }, { "epoch": 2.346764346764347, "grad_norm": 1.7721152305603027, "learning_rate": 4.486393657054369e-06, "loss": 0.3379634618759155, "step": 1922 }, { "epoch": 2.3492063492063493, "grad_norm": 0.34488657116889954, "learning_rate": 4.468770479960171e-06, "loss": 0.2894682288169861, "step": 1924 }, { "epoch": 2.3516483516483517, "grad_norm": 1.060381531715393, "learning_rate": 4.451200047332638e-06, "loss": 0.44025763869285583, "step": 1926 }, { "epoch": 2.354090354090354, "grad_norm": 1.5222772359848022, "learning_rate": 4.433682501065897e-06, "loss": 0.3474840223789215, "step": 1928 }, { "epoch": 2.3565323565323566, "grad_norm": 2.951404094696045, "learning_rate": 4.416217982626981e-06, "loss": 0.3358984589576721, "step": 1930 }, { "epoch": 2.358974358974359, "grad_norm": 1.0801118612289429, "learning_rate": 4.398806633054675e-06, "loss": 0.3395053446292877, "step": 1932 }, { "epoch": 2.3614163614163615, "grad_norm": 2.127126693725586, "learning_rate": 4.381448592958394e-06, "loss": 0.5439938902854919, "step": 1934 }, { "epoch": 2.363858363858364, "grad_norm": 1.0053937435150146, "learning_rate": 4.36414400251704e-06, "loss": 0.2674437463283539, "step": 1936 }, { "epoch": 2.3663003663003663, "grad_norm": 0.9853598475456238, "learning_rate": 4.346893001477861e-06, "loss": 0.4141199290752411, "step": 1938 }, { "epoch": 2.3687423687423688, "grad_norm": 8.180671691894531, "learning_rate": 4.329695729155342e-06, "loss": 0.5360310673713684, "step": 1940 }, { "epoch": 2.371184371184371, "grad_norm": 0.22848689556121826, "learning_rate": 4.3125523244300686e-06, "loss": 0.25111788511276245, "step": 1942 }, { "epoch": 2.3736263736263736, "grad_norm": 1.5355631113052368, "learning_rate": 4.295462925747594e-06, "loss": 0.3430798351764679, "step": 1944 }, { "epoch": 2.376068376068376, "grad_norm": 1.6975699663162231, "learning_rate": 4.278427671117344e-06, "loss": 0.08609216660261154, "step": 1946 }, { "epoch": 2.3785103785103785, "grad_norm": 1.575578212738037, "learning_rate": 4.261446698111496e-06, "loss": 0.194163978099823, "step": 1948 }, { "epoch": 2.380952380952381, "grad_norm": 4.127973556518555, "learning_rate": 4.24452014386385e-06, "loss": 0.20009776949882507, "step": 1950 }, { "epoch": 2.3833943833943834, "grad_norm": 0.7139300107955933, "learning_rate": 4.22764814506874e-06, "loss": 0.12069036066532135, "step": 1952 }, { "epoch": 2.385836385836386, "grad_norm": 3.075773000717163, "learning_rate": 4.210830837979932e-06, "loss": 0.35760805010795593, "step": 1954 }, { "epoch": 2.3882783882783882, "grad_norm": 1.492324948310852, "learning_rate": 4.194068358409503e-06, "loss": 0.48620444536209106, "step": 1956 }, { "epoch": 2.3907203907203907, "grad_norm": 1.7053909301757812, "learning_rate": 4.17736084172677e-06, "loss": 0.20889446139335632, "step": 1958 }, { "epoch": 2.393162393162393, "grad_norm": 1.3225889205932617, "learning_rate": 4.160708422857178e-06, "loss": 0.5993058085441589, "step": 1960 }, { "epoch": 2.3956043956043955, "grad_norm": 1.3367353677749634, "learning_rate": 4.144111236281214e-06, "loss": 0.1960648149251938, "step": 1962 }, { "epoch": 2.398046398046398, "grad_norm": 2.359844446182251, "learning_rate": 4.127569416033332e-06, "loss": 0.5698574185371399, "step": 1964 }, { "epoch": 2.4004884004884004, "grad_norm": 1.1340882778167725, "learning_rate": 4.111083095700858e-06, "loss": 0.18890273571014404, "step": 1966 }, { "epoch": 2.402930402930403, "grad_norm": 2.4454874992370605, "learning_rate": 4.094652408422913e-06, "loss": 0.3097396492958069, "step": 1968 }, { "epoch": 2.4053724053724053, "grad_norm": 4.218069553375244, "learning_rate": 4.078277486889341e-06, "loss": 0.23327361047267914, "step": 1970 }, { "epoch": 2.4078144078144077, "grad_norm": 3.866490364074707, "learning_rate": 4.061958463339646e-06, "loss": 0.06529633700847626, "step": 1972 }, { "epoch": 2.41025641025641, "grad_norm": 0.4942020773887634, "learning_rate": 4.045695469561899e-06, "loss": 0.08752602338790894, "step": 1974 }, { "epoch": 2.4126984126984126, "grad_norm": 3.321356773376465, "learning_rate": 4.029488636891702e-06, "loss": 0.3558381199836731, "step": 1976 }, { "epoch": 2.415140415140415, "grad_norm": 3.152714729309082, "learning_rate": 4.013338096211109e-06, "loss": 0.3303931653499603, "step": 1978 }, { "epoch": 2.4175824175824174, "grad_norm": 0.6018658876419067, "learning_rate": 3.99724397794758e-06, "loss": 0.22131627798080444, "step": 1980 }, { "epoch": 2.42002442002442, "grad_norm": 1.3327726125717163, "learning_rate": 3.981206412072914e-06, "loss": 0.39478451013565063, "step": 1982 }, { "epoch": 2.4224664224664223, "grad_norm": 1.705815076828003, "learning_rate": 3.965225528102217e-06, "loss": 0.3109724521636963, "step": 1984 }, { "epoch": 2.4249084249084247, "grad_norm": 0.7618647217750549, "learning_rate": 3.949301455092845e-06, "loss": 0.5224888920783997, "step": 1986 }, { "epoch": 2.427350427350427, "grad_norm": 1.2163892984390259, "learning_rate": 3.933434321643356e-06, "loss": 0.4845066964626312, "step": 1988 }, { "epoch": 2.42979242979243, "grad_norm": 0.8843790292739868, "learning_rate": 3.917624255892489e-06, "loss": 0.5302805304527283, "step": 1990 }, { "epoch": 2.4322344322344325, "grad_norm": 1.2315729856491089, "learning_rate": 3.901871385518117e-06, "loss": 0.42821258306503296, "step": 1992 }, { "epoch": 2.434676434676435, "grad_norm": 0.9088804125785828, "learning_rate": 3.886175837736214e-06, "loss": 0.4940814673900604, "step": 1994 }, { "epoch": 2.4371184371184373, "grad_norm": 1.1520100831985474, "learning_rate": 3.870537739299836e-06, "loss": 0.3047824501991272, "step": 1996 }, { "epoch": 2.4395604395604398, "grad_norm": 0.7935906648635864, "learning_rate": 3.854957216498099e-06, "loss": 0.5371643900871277, "step": 1998 }, { "epoch": 2.442002442002442, "grad_norm": 1.0501606464385986, "learning_rate": 3.839434395155135e-06, "loss": 0.24889859557151794, "step": 2000 }, { "epoch": 2.4444444444444446, "grad_norm": 1.7994686365127563, "learning_rate": 3.8239694006291194e-06, "loss": 0.45958831906318665, "step": 2002 }, { "epoch": 2.446886446886447, "grad_norm": 0.9377945065498352, "learning_rate": 3.8085623578112136e-06, "loss": 0.22220918536186218, "step": 2004 }, { "epoch": 2.4493284493284495, "grad_norm": 1.056534767150879, "learning_rate": 3.793213391124586e-06, "loss": 0.29667913913726807, "step": 2006 }, { "epoch": 2.451770451770452, "grad_norm": 1.055069088935852, "learning_rate": 3.7779226245233937e-06, "loss": 0.7430405616760254, "step": 2008 }, { "epoch": 2.4542124542124544, "grad_norm": 1.062638282775879, "learning_rate": 3.7626901814917927e-06, "loss": 0.3536508083343506, "step": 2010 }, { "epoch": 2.456654456654457, "grad_norm": 2.2568395137786865, "learning_rate": 3.747516185042922e-06, "loss": 0.2591190040111542, "step": 2012 }, { "epoch": 2.4590964590964592, "grad_norm": 1.5303833484649658, "learning_rate": 3.7324007577179283e-06, "loss": 0.5008297562599182, "step": 2014 }, { "epoch": 2.4615384615384617, "grad_norm": 0.9226781725883484, "learning_rate": 3.7173440215849744e-06, "loss": 0.4963090121746063, "step": 2016 }, { "epoch": 2.463980463980464, "grad_norm": 0.9127579927444458, "learning_rate": 3.7023460982382355e-06, "loss": 0.5157759189605713, "step": 2018 }, { "epoch": 2.4664224664224665, "grad_norm": 7.223013401031494, "learning_rate": 3.687407108796942e-06, "loss": 0.4686001241207123, "step": 2020 }, { "epoch": 2.468864468864469, "grad_norm": 1.2899993658065796, "learning_rate": 3.672527173904388e-06, "loss": 0.25978168845176697, "step": 2022 }, { "epoch": 2.4713064713064714, "grad_norm": 5.451155662536621, "learning_rate": 3.6577064137269525e-06, "loss": 0.3640308380126953, "step": 2024 }, { "epoch": 2.473748473748474, "grad_norm": 10.173837661743164, "learning_rate": 3.6429449479531416e-06, "loss": 0.3720964193344116, "step": 2026 }, { "epoch": 2.4761904761904763, "grad_norm": 0.20691752433776855, "learning_rate": 3.6282428957926154e-06, "loss": 0.2083432972431183, "step": 2028 }, { "epoch": 2.4786324786324787, "grad_norm": 2.024094581604004, "learning_rate": 3.613600375975221e-06, "loss": 0.5114956498146057, "step": 2030 }, { "epoch": 2.481074481074481, "grad_norm": 1.2281562089920044, "learning_rate": 3.599017506750042e-06, "loss": 0.47537893056869507, "step": 2032 }, { "epoch": 2.4835164835164836, "grad_norm": 2.2216989994049072, "learning_rate": 3.5844944058844393e-06, "loss": 0.25453007221221924, "step": 2034 }, { "epoch": 2.485958485958486, "grad_norm": 2.591078281402588, "learning_rate": 3.570031190663098e-06, "loss": 0.5005137920379639, "step": 2036 }, { "epoch": 2.4884004884004884, "grad_norm": 0.48911339044570923, "learning_rate": 3.5556279778870862e-06, "loss": 0.5193389058113098, "step": 2038 }, { "epoch": 2.490842490842491, "grad_norm": 27.082082748413086, "learning_rate": 3.5412848838729075e-06, "loss": 0.5654491782188416, "step": 2040 }, { "epoch": 2.4932844932844933, "grad_norm": 1.6297937631607056, "learning_rate": 3.5270020244515583e-06, "loss": 0.5325220227241516, "step": 2042 }, { "epoch": 2.4957264957264957, "grad_norm": 0.9335009455680847, "learning_rate": 3.5127795149676014e-06, "loss": 0.38437139987945557, "step": 2044 }, { "epoch": 2.498168498168498, "grad_norm": 141.24978637695312, "learning_rate": 3.49861747027823e-06, "loss": 0.2638123035430908, "step": 2046 }, { "epoch": 2.5006105006105006, "grad_norm": 1.3640321493148804, "learning_rate": 3.484516004752334e-06, "loss": 0.4149170219898224, "step": 2048 }, { "epoch": 2.503052503052503, "grad_norm": 1.0066052675247192, "learning_rate": 3.4704752322695877e-06, "loss": 0.4781511425971985, "step": 2050 }, { "epoch": 2.5054945054945055, "grad_norm": 1.2308069467544556, "learning_rate": 3.456495266219525e-06, "loss": 0.7653157711029053, "step": 2052 }, { "epoch": 2.507936507936508, "grad_norm": 1.3373329639434814, "learning_rate": 3.442576219500614e-06, "loss": 0.36611488461494446, "step": 2054 }, { "epoch": 2.5103785103785103, "grad_norm": 1.555979609489441, "learning_rate": 3.428718204519369e-06, "loss": 0.531693696975708, "step": 2056 }, { "epoch": 2.5128205128205128, "grad_norm": 8.703025817871094, "learning_rate": 3.4149213331894193e-06, "loss": 0.18801343441009521, "step": 2058 }, { "epoch": 2.515262515262515, "grad_norm": 1.2803109884262085, "learning_rate": 3.4011857169306127e-06, "loss": 0.16657070815563202, "step": 2060 }, { "epoch": 2.5177045177045176, "grad_norm": 0.712373673915863, "learning_rate": 3.3875114666681235e-06, "loss": 0.2420540601015091, "step": 2062 }, { "epoch": 2.52014652014652, "grad_norm": 1.780391812324524, "learning_rate": 3.3738986928315474e-06, "loss": 0.4269709587097168, "step": 2064 }, { "epoch": 2.5225885225885225, "grad_norm": 1.2723828554153442, "learning_rate": 3.360347505354011e-06, "loss": 0.3732086420059204, "step": 2066 }, { "epoch": 2.525030525030525, "grad_norm": 2.761953353881836, "learning_rate": 3.3468580136712903e-06, "loss": 0.5551900863647461, "step": 2068 }, { "epoch": 2.5274725274725274, "grad_norm": 0.8927345275878906, "learning_rate": 3.333430326720921e-06, "loss": 0.5004504919052124, "step": 2070 }, { "epoch": 2.52991452991453, "grad_norm": 0.67017662525177, "learning_rate": 3.3200645529413165e-06, "loss": 0.31844204664230347, "step": 2072 }, { "epoch": 2.5323565323565322, "grad_norm": 1.6567728519439697, "learning_rate": 3.3067608002709006e-06, "loss": 0.592690646648407, "step": 2074 }, { "epoch": 2.5347985347985347, "grad_norm": 1.0990091562271118, "learning_rate": 3.2935191761472313e-06, "loss": 0.509267270565033, "step": 2076 }, { "epoch": 2.537240537240537, "grad_norm": 2.832087516784668, "learning_rate": 3.280339787506127e-06, "loss": 0.4890163540840149, "step": 2078 }, { "epoch": 2.5396825396825395, "grad_norm": 3.6818792819976807, "learning_rate": 3.2672227407808184e-06, "loss": 0.35127052664756775, "step": 2080 }, { "epoch": 2.542124542124542, "grad_norm": 0.9744904041290283, "learning_rate": 3.2541681419010716e-06, "loss": 0.4693216383457184, "step": 2082 }, { "epoch": 2.5445665445665444, "grad_norm": 0.9872434735298157, "learning_rate": 3.2411760962923434e-06, "loss": 0.47572940587997437, "step": 2084 }, { "epoch": 2.547008547008547, "grad_norm": 1.288815975189209, "learning_rate": 3.228246708874926e-06, "loss": 0.45491641759872437, "step": 2086 }, { "epoch": 2.5494505494505493, "grad_norm": 1.0426764488220215, "learning_rate": 3.2153800840631043e-06, "loss": 0.6177046298980713, "step": 2088 }, { "epoch": 2.5518925518925517, "grad_norm": 1.2259653806686401, "learning_rate": 3.202576325764307e-06, "loss": 0.45679447054862976, "step": 2090 }, { "epoch": 2.554334554334554, "grad_norm": 2.0075936317443848, "learning_rate": 3.1898355373782663e-06, "loss": 0.3028113842010498, "step": 2092 }, { "epoch": 2.5567765567765566, "grad_norm": 0.8422965407371521, "learning_rate": 3.177157821796191e-06, "loss": 0.2570323646068573, "step": 2094 }, { "epoch": 2.559218559218559, "grad_norm": 0.8695139288902283, "learning_rate": 3.1645432813999306e-06, "loss": 0.3652976155281067, "step": 2096 }, { "epoch": 2.5616605616605614, "grad_norm": 2.6163241863250732, "learning_rate": 3.1519920180611436e-06, "loss": 0.08200995624065399, "step": 2098 }, { "epoch": 2.564102564102564, "grad_norm": 0.7538577914237976, "learning_rate": 3.139504133140484e-06, "loss": 0.26613810658454895, "step": 2100 }, { "epoch": 2.5665445665445663, "grad_norm": 0.9928892254829407, "learning_rate": 3.127079727486781e-06, "loss": 0.39854198694229126, "step": 2102 }, { "epoch": 2.5689865689865687, "grad_norm": 2.9046833515167236, "learning_rate": 3.114718901436215e-06, "loss": 0.35459813475608826, "step": 2104 }, { "epoch": 2.571428571428571, "grad_norm": 0.8664820194244385, "learning_rate": 3.1024217548115195e-06, "loss": 0.3210771977901459, "step": 2106 }, { "epoch": 2.5738705738705736, "grad_norm": 1.5520901679992676, "learning_rate": 3.090188386921171e-06, "loss": 0.24245740473270416, "step": 2108 }, { "epoch": 2.576312576312576, "grad_norm": 1.7673155069351196, "learning_rate": 3.078018896558582e-06, "loss": 0.21324002742767334, "step": 2110 }, { "epoch": 2.578754578754579, "grad_norm": 0.730332612991333, "learning_rate": 3.0659133820013123e-06, "loss": 0.469443142414093, "step": 2112 }, { "epoch": 2.5811965811965814, "grad_norm": 1.5071324110031128, "learning_rate": 3.0538719410102612e-06, "loss": 0.16458410024642944, "step": 2114 }, { "epoch": 2.583638583638584, "grad_norm": 1.1855233907699585, "learning_rate": 3.0418946708288984e-06, "loss": 0.3730916976928711, "step": 2116 }, { "epoch": 2.586080586080586, "grad_norm": 1.2179559469223022, "learning_rate": 3.029981668182458e-06, "loss": 0.5398478507995605, "step": 2118 }, { "epoch": 2.5885225885225887, "grad_norm": 1.1000230312347412, "learning_rate": 3.0181330292771727e-06, "loss": 0.25115227699279785, "step": 2120 }, { "epoch": 2.590964590964591, "grad_norm": 1.437605857849121, "learning_rate": 3.0063488497994864e-06, "loss": 0.6454752087593079, "step": 2122 }, { "epoch": 2.5934065934065935, "grad_norm": 0.7121138572692871, "learning_rate": 2.994629224915288e-06, "loss": 0.30809617042541504, "step": 2124 }, { "epoch": 2.595848595848596, "grad_norm": 1.196258783340454, "learning_rate": 2.9829742492691436e-06, "loss": 0.1984136551618576, "step": 2126 }, { "epoch": 2.5982905982905984, "grad_norm": 3.140024423599243, "learning_rate": 2.971384016983522e-06, "loss": 0.4299178123474121, "step": 2128 }, { "epoch": 2.600732600732601, "grad_norm": 2.820770502090454, "learning_rate": 2.959858621658047e-06, "loss": 0.2969256043434143, "step": 2130 }, { "epoch": 2.6031746031746033, "grad_norm": 3.3160879611968994, "learning_rate": 2.94839815636874e-06, "loss": 0.2652299702167511, "step": 2132 }, { "epoch": 2.6056166056166057, "grad_norm": 0.7100194096565247, "learning_rate": 2.9370027136672536e-06, "loss": 0.34369128942489624, "step": 2134 }, { "epoch": 2.608058608058608, "grad_norm": 3.5660557746887207, "learning_rate": 2.925672385580145e-06, "loss": 0.30307111144065857, "step": 2136 }, { "epoch": 2.6105006105006106, "grad_norm": 0.9895382523536682, "learning_rate": 2.9144072636081233e-06, "loss": 0.2503519058227539, "step": 2138 }, { "epoch": 2.612942612942613, "grad_norm": 0.7191367745399475, "learning_rate": 2.9032074387253017e-06, "loss": 0.25583434104919434, "step": 2140 }, { "epoch": 2.6153846153846154, "grad_norm": 1.4276951551437378, "learning_rate": 2.892073001378481e-06, "loss": 0.3618330955505371, "step": 2142 }, { "epoch": 2.617826617826618, "grad_norm": 2.0080482959747314, "learning_rate": 2.881004041486406e-06, "loss": 0.4887958765029907, "step": 2144 }, { "epoch": 2.6202686202686203, "grad_norm": 0.8838030099868774, "learning_rate": 2.8700006484390395e-06, "loss": 0.46932682394981384, "step": 2146 }, { "epoch": 2.6227106227106227, "grad_norm": 1.1301337480545044, "learning_rate": 2.8590629110968503e-06, "loss": 0.3209373652935028, "step": 2148 }, { "epoch": 2.625152625152625, "grad_norm": 1.117184042930603, "learning_rate": 2.8481909177900874e-06, "loss": 0.468944787979126, "step": 2150 }, { "epoch": 2.6275946275946276, "grad_norm": 1.6847853660583496, "learning_rate": 2.837384756318063e-06, "loss": 0.439802885055542, "step": 2152 }, { "epoch": 2.63003663003663, "grad_norm": 1.6028481721878052, "learning_rate": 2.826644513948456e-06, "loss": 0.48533153533935547, "step": 2154 }, { "epoch": 2.6324786324786325, "grad_norm": 2.249617576599121, "learning_rate": 2.8159702774166e-06, "loss": 0.5256586670875549, "step": 2156 }, { "epoch": 2.634920634920635, "grad_norm": 1.6403663158416748, "learning_rate": 2.8053621329247767e-06, "loss": 0.5299547910690308, "step": 2158 }, { "epoch": 2.6373626373626373, "grad_norm": 1.569277048110962, "learning_rate": 2.7948201661415307e-06, "loss": 0.2885707914829254, "step": 2160 }, { "epoch": 2.6398046398046398, "grad_norm": 1.2910041809082031, "learning_rate": 2.7843444622009746e-06, "loss": 0.34332627058029175, "step": 2162 }, { "epoch": 2.642246642246642, "grad_norm": 1.1258636713027954, "learning_rate": 2.773935105702096e-06, "loss": 0.3300524652004242, "step": 2164 }, { "epoch": 2.6446886446886446, "grad_norm": 1.1584712266921997, "learning_rate": 2.763592180708081e-06, "loss": 0.4990626871585846, "step": 2166 }, { "epoch": 2.647130647130647, "grad_norm": 0.8549714684486389, "learning_rate": 2.7533157707456336e-06, "loss": 0.42835402488708496, "step": 2168 }, { "epoch": 2.6495726495726495, "grad_norm": 0.7408347129821777, "learning_rate": 2.7431059588042945e-06, "loss": 0.504192590713501, "step": 2170 }, { "epoch": 2.652014652014652, "grad_norm": 1.2692267894744873, "learning_rate": 2.7329628273357815e-06, "loss": 0.5846405029296875, "step": 2172 }, { "epoch": 2.6544566544566544, "grad_norm": 1.1758378744125366, "learning_rate": 2.72288645825332e-06, "loss": 0.4775027632713318, "step": 2174 }, { "epoch": 2.656898656898657, "grad_norm": 1.020842432975769, "learning_rate": 2.7128769329309744e-06, "loss": 0.2678804397583008, "step": 2176 }, { "epoch": 2.659340659340659, "grad_norm": 0.7583962082862854, "learning_rate": 2.702934332203002e-06, "loss": 0.4422096908092499, "step": 2178 }, { "epoch": 2.6617826617826617, "grad_norm": 2.3237428665161133, "learning_rate": 2.6930587363631932e-06, "loss": 0.4233754575252533, "step": 2180 }, { "epoch": 2.664224664224664, "grad_norm": 0.6809400916099548, "learning_rate": 2.6832502251642223e-06, "loss": 0.40418240427970886, "step": 2182 }, { "epoch": 2.6666666666666665, "grad_norm": 0.7269507050514221, "learning_rate": 2.6735088778170105e-06, "loss": 0.2588379979133606, "step": 2184 }, { "epoch": 2.669108669108669, "grad_norm": 1.2103101015090942, "learning_rate": 2.66383477299008e-06, "loss": 0.39823517203330994, "step": 2186 }, { "epoch": 2.6715506715506714, "grad_norm": 0.9755131006240845, "learning_rate": 2.6542279888089163e-06, "loss": 0.3795110881328583, "step": 2188 }, { "epoch": 2.6739926739926743, "grad_norm": 0.9968971610069275, "learning_rate": 2.6446886028553476e-06, "loss": 0.5400364995002747, "step": 2190 }, { "epoch": 2.6764346764346767, "grad_norm": 2.260093927383423, "learning_rate": 2.6352166921669076e-06, "loss": 0.5039065480232239, "step": 2192 }, { "epoch": 2.678876678876679, "grad_norm": 2.027021646499634, "learning_rate": 2.625812333236222e-06, "loss": 0.13939893245697021, "step": 2194 }, { "epoch": 2.6813186813186816, "grad_norm": 0.9278559684753418, "learning_rate": 2.61647560201038e-06, "loss": 0.33114153146743774, "step": 2196 }, { "epoch": 2.683760683760684, "grad_norm": 0.7097818851470947, "learning_rate": 2.6072065738903335e-06, "loss": 0.521342396736145, "step": 2198 }, { "epoch": 2.6862026862026864, "grad_norm": 1.291375756263733, "learning_rate": 2.5980053237302816e-06, "loss": 0.4681139588356018, "step": 2200 }, { "epoch": 2.688644688644689, "grad_norm": 4.184018611907959, "learning_rate": 2.588871925837062e-06, "loss": 0.28020548820495605, "step": 2202 }, { "epoch": 2.6910866910866913, "grad_norm": 2.3299784660339355, "learning_rate": 2.5798064539695604e-06, "loss": 0.5311964750289917, "step": 2204 }, { "epoch": 2.6935286935286937, "grad_norm": 2.1903867721557617, "learning_rate": 2.5708089813381088e-06, "loss": 0.12289441376924515, "step": 2206 }, { "epoch": 2.695970695970696, "grad_norm": 2.261828899383545, "learning_rate": 2.561879580603893e-06, "loss": 0.47109082341194153, "step": 2208 }, { "epoch": 2.6984126984126986, "grad_norm": 1.110669493675232, "learning_rate": 2.5530183238783728e-06, "loss": 0.3485221564769745, "step": 2210 }, { "epoch": 2.700854700854701, "grad_norm": 1.4882543087005615, "learning_rate": 2.5442252827226925e-06, "loss": 0.5045080184936523, "step": 2212 }, { "epoch": 2.7032967032967035, "grad_norm": 1.0571633577346802, "learning_rate": 2.5355005281471046e-06, "loss": 0.2372823804616928, "step": 2214 }, { "epoch": 2.705738705738706, "grad_norm": 0.6153679490089417, "learning_rate": 2.526844130610399e-06, "loss": 0.2721218168735504, "step": 2216 }, { "epoch": 2.7081807081807083, "grad_norm": 1.4203280210494995, "learning_rate": 2.5182561600193317e-06, "loss": 0.311516672372818, "step": 2218 }, { "epoch": 2.7106227106227108, "grad_norm": 2.5656776428222656, "learning_rate": 2.5097366857280636e-06, "loss": 0.1073763519525528, "step": 2220 }, { "epoch": 2.713064713064713, "grad_norm": 1.4745090007781982, "learning_rate": 2.501285776537593e-06, "loss": 0.358319491147995, "step": 2222 }, { "epoch": 2.7155067155067156, "grad_norm": 1.3464287519454956, "learning_rate": 2.4929035006952106e-06, "loss": 0.21015426516532898, "step": 2224 }, { "epoch": 2.717948717948718, "grad_norm": 1.07408607006073, "learning_rate": 2.4845899258939362e-06, "loss": 0.25736236572265625, "step": 2226 }, { "epoch": 2.7203907203907205, "grad_norm": 2.0369420051574707, "learning_rate": 2.4763451192719816e-06, "loss": 0.2484760284423828, "step": 2228 }, { "epoch": 2.722832722832723, "grad_norm": 1.062886357307434, "learning_rate": 2.4681691474122064e-06, "loss": 0.4695739150047302, "step": 2230 }, { "epoch": 2.7252747252747254, "grad_norm": 3.0891904830932617, "learning_rate": 2.4600620763415754e-06, "loss": 0.2893969714641571, "step": 2232 }, { "epoch": 2.727716727716728, "grad_norm": 0.8144769072532654, "learning_rate": 2.4520239715306325e-06, "loss": 0.5152880549430847, "step": 2234 }, { "epoch": 2.7301587301587302, "grad_norm": 1.6376501321792603, "learning_rate": 2.4440548978929678e-06, "loss": 0.7832448482513428, "step": 2236 }, { "epoch": 2.7326007326007327, "grad_norm": 0.9825433492660522, "learning_rate": 2.4361549197846914e-06, "loss": 0.376642107963562, "step": 2238 }, { "epoch": 2.735042735042735, "grad_norm": 1.621090054512024, "learning_rate": 2.42832410100392e-06, "loss": 0.26889967918395996, "step": 2240 }, { "epoch": 2.7374847374847375, "grad_norm": 0.8367129564285278, "learning_rate": 2.420562504790256e-06, "loss": 0.5269310474395752, "step": 2242 }, { "epoch": 2.73992673992674, "grad_norm": 2.0027148723602295, "learning_rate": 2.412870193824278e-06, "loss": 0.2715807557106018, "step": 2244 }, { "epoch": 2.7423687423687424, "grad_norm": 1.490946650505066, "learning_rate": 2.4052472302270365e-06, "loss": 0.2188037633895874, "step": 2246 }, { "epoch": 2.744810744810745, "grad_norm": 1.6017478704452515, "learning_rate": 2.3976936755595533e-06, "loss": 0.4869040846824646, "step": 2248 }, { "epoch": 2.7472527472527473, "grad_norm": 1.3607432842254639, "learning_rate": 2.390209590822319e-06, "loss": 0.40255841612815857, "step": 2250 }, { "epoch": 2.7496947496947497, "grad_norm": 1.5456528663635254, "learning_rate": 2.3827950364548034e-06, "loss": 0.6289904117584229, "step": 2252 }, { "epoch": 2.752136752136752, "grad_norm": 1.5753426551818848, "learning_rate": 2.375450072334972e-06, "loss": 0.5615298748016357, "step": 2254 }, { "epoch": 2.7545787545787546, "grad_norm": 1.4261977672576904, "learning_rate": 2.3681747577787924e-06, "loss": 0.2363334745168686, "step": 2256 }, { "epoch": 2.757020757020757, "grad_norm": 1.1819992065429688, "learning_rate": 2.3609691515397628e-06, "loss": 0.4858379364013672, "step": 2258 }, { "epoch": 2.7594627594627594, "grad_norm": 1.9267686605453491, "learning_rate": 2.3538333118084396e-06, "loss": 0.5177884697914124, "step": 2260 }, { "epoch": 2.761904761904762, "grad_norm": 1.1344858407974243, "learning_rate": 2.3467672962119565e-06, "loss": 0.5373342037200928, "step": 2262 }, { "epoch": 2.7643467643467643, "grad_norm": 0.8637273907661438, "learning_rate": 2.3397711618135725e-06, "loss": 0.43640759587287903, "step": 2264 }, { "epoch": 2.7667887667887667, "grad_norm": 1.145462155342102, "learning_rate": 2.332844965112201e-06, "loss": 0.3964022099971771, "step": 2266 }, { "epoch": 2.769230769230769, "grad_norm": 0.5111590623855591, "learning_rate": 2.3259887620419573e-06, "loss": 0.3127731680870056, "step": 2268 }, { "epoch": 2.7716727716727716, "grad_norm": 0.791425347328186, "learning_rate": 2.3192026079717086e-06, "loss": 0.2613333463668823, "step": 2270 }, { "epoch": 2.774114774114774, "grad_norm": 0.23441043496131897, "learning_rate": 2.3124865577046252e-06, "loss": 0.07839272171258926, "step": 2272 }, { "epoch": 2.7765567765567765, "grad_norm": 1.0026205778121948, "learning_rate": 2.3058406654777355e-06, "loss": 0.502284824848175, "step": 2274 }, { "epoch": 2.778998778998779, "grad_norm": 0.9165741801261902, "learning_rate": 2.299264984961492e-06, "loss": 0.6292468905448914, "step": 2276 }, { "epoch": 2.7814407814407813, "grad_norm": 1.3016325235366821, "learning_rate": 2.2927595692593366e-06, "loss": 0.3484017252922058, "step": 2278 }, { "epoch": 2.7838827838827838, "grad_norm": 1.573944091796875, "learning_rate": 2.286324470907269e-06, "loss": 0.18759427964687347, "step": 2280 }, { "epoch": 2.786324786324786, "grad_norm": 2.0719950199127197, "learning_rate": 2.279959741873426e-06, "loss": 0.419060617685318, "step": 2282 }, { "epoch": 2.7887667887667886, "grad_norm": 1.6407965421676636, "learning_rate": 2.2736654335576634e-06, "loss": 0.4783077836036682, "step": 2284 }, { "epoch": 2.791208791208791, "grad_norm": 1.0861320495605469, "learning_rate": 2.267441596791132e-06, "loss": 0.4703105390071869, "step": 2286 }, { "epoch": 2.7936507936507935, "grad_norm": 0.9553175568580627, "learning_rate": 2.2612882818358784e-06, "loss": 0.41585975885391235, "step": 2288 }, { "epoch": 2.796092796092796, "grad_norm": 9.468893051147461, "learning_rate": 2.2552055383844327e-06, "loss": 0.08420296758413315, "step": 2290 }, { "epoch": 2.7985347985347984, "grad_norm": 2.4556336402893066, "learning_rate": 2.2491934155594063e-06, "loss": 0.35032370686531067, "step": 2292 }, { "epoch": 2.800976800976801, "grad_norm": 1.1944650411605835, "learning_rate": 2.243251961913099e-06, "loss": 0.36088746786117554, "step": 2294 }, { "epoch": 2.8034188034188032, "grad_norm": 0.9773551821708679, "learning_rate": 2.2373812254271074e-06, "loss": 0.42339953780174255, "step": 2296 }, { "epoch": 2.8058608058608057, "grad_norm": 1.2944077253341675, "learning_rate": 2.231581253511929e-06, "loss": 0.1882065087556839, "step": 2298 }, { "epoch": 2.808302808302808, "grad_norm": 1.328771948814392, "learning_rate": 2.2258520930065902e-06, "loss": 0.33834829926490784, "step": 2300 }, { "epoch": 2.8107448107448105, "grad_norm": 1.5796797275543213, "learning_rate": 2.2201937901782632e-06, "loss": 0.5746235847473145, "step": 2302 }, { "epoch": 2.813186813186813, "grad_norm": 0.1405964195728302, "learning_rate": 2.2146063907218928e-06, "loss": 0.2884528338909149, "step": 2304 }, { "epoch": 2.8156288156288154, "grad_norm": 0.7891967296600342, "learning_rate": 2.2090899397598235e-06, "loss": 0.34547799825668335, "step": 2306 }, { "epoch": 2.818070818070818, "grad_norm": 1.0902297496795654, "learning_rate": 2.2036444818414424e-06, "loss": 0.4068155288696289, "step": 2308 }, { "epoch": 2.8205128205128203, "grad_norm": 1.061621904373169, "learning_rate": 2.198270060942815e-06, "loss": 0.4539620876312256, "step": 2310 }, { "epoch": 2.8229548229548227, "grad_norm": 0.9649152755737305, "learning_rate": 2.192966720466328e-06, "loss": 0.22723491489887238, "step": 2312 }, { "epoch": 2.825396825396825, "grad_norm": 10.881244659423828, "learning_rate": 2.1877345032403458e-06, "loss": 0.287578284740448, "step": 2314 }, { "epoch": 2.8278388278388276, "grad_norm": 2.314340829849243, "learning_rate": 2.182573451518859e-06, "loss": 0.4537888169288635, "step": 2316 }, { "epoch": 2.8302808302808304, "grad_norm": 1.7877088785171509, "learning_rate": 2.1774836069811415e-06, "loss": 0.3850943446159363, "step": 2318 }, { "epoch": 2.832722832722833, "grad_norm": 0.8207268714904785, "learning_rate": 2.1724650107314217e-06, "loss": 0.22680553793907166, "step": 2320 }, { "epoch": 2.8351648351648353, "grad_norm": 1.7450029850006104, "learning_rate": 2.1675177032985435e-06, "loss": 0.34959569573402405, "step": 2322 }, { "epoch": 2.8376068376068377, "grad_norm": 0.28571420907974243, "learning_rate": 2.1626417246356398e-06, "loss": 0.08046525716781616, "step": 2324 }, { "epoch": 2.84004884004884, "grad_norm": 1.3986101150512695, "learning_rate": 2.1578371141198154e-06, "loss": 0.3989933431148529, "step": 2326 }, { "epoch": 2.8424908424908426, "grad_norm": 1.5185210704803467, "learning_rate": 2.15310391055182e-06, "loss": 0.27708202600479126, "step": 2328 }, { "epoch": 2.844932844932845, "grad_norm": 2.8958606719970703, "learning_rate": 2.1484421521557453e-06, "loss": 0.24901802837848663, "step": 2330 }, { "epoch": 2.8473748473748475, "grad_norm": 1.108059048652649, "learning_rate": 2.143851876578706e-06, "loss": 0.45619091391563416, "step": 2332 }, { "epoch": 2.84981684981685, "grad_norm": 1.0437735319137573, "learning_rate": 2.1393331208905436e-06, "loss": 0.07932747900485992, "step": 2334 }, { "epoch": 2.8522588522588523, "grad_norm": 1.237439513206482, "learning_rate": 2.134885921583522e-06, "loss": 0.5910269021987915, "step": 2336 }, { "epoch": 2.8547008547008548, "grad_norm": 1.1078741550445557, "learning_rate": 2.1305103145720383e-06, "loss": 0.3153696656227112, "step": 2338 }, { "epoch": 2.857142857142857, "grad_norm": 1.034421682357788, "learning_rate": 2.1262063351923255e-06, "loss": 0.47363409399986267, "step": 2340 }, { "epoch": 2.8595848595848596, "grad_norm": 1.1710708141326904, "learning_rate": 2.121974018202172e-06, "loss": 0.48734188079833984, "step": 2342 }, { "epoch": 2.862026862026862, "grad_norm": 2.568005084991455, "learning_rate": 2.1178133977806413e-06, "loss": 0.19048890471458435, "step": 2344 }, { "epoch": 2.8644688644688645, "grad_norm": 1.4728940725326538, "learning_rate": 2.113724507527794e-06, "loss": 0.6129634976387024, "step": 2346 }, { "epoch": 2.866910866910867, "grad_norm": 0.22239279747009277, "learning_rate": 2.1097073804644163e-06, "loss": 0.2763885259628296, "step": 2348 }, { "epoch": 2.8693528693528694, "grad_norm": 0.6631549000740051, "learning_rate": 2.105762049031753e-06, "loss": 0.2500677704811096, "step": 2350 }, { "epoch": 2.871794871794872, "grad_norm": 1.0234497785568237, "learning_rate": 2.1018885450912487e-06, "loss": 0.45614075660705566, "step": 2352 }, { "epoch": 2.8742368742368742, "grad_norm": 1.8352830410003662, "learning_rate": 2.098086899924288e-06, "loss": 0.3945198953151703, "step": 2354 }, { "epoch": 2.8766788766788767, "grad_norm": 0.8980585932731628, "learning_rate": 2.0943571442319437e-06, "loss": 0.49924108386039734, "step": 2356 }, { "epoch": 2.879120879120879, "grad_norm": 25.131999969482422, "learning_rate": 2.090699308134726e-06, "loss": 0.4753328263759613, "step": 2358 }, { "epoch": 2.8815628815628815, "grad_norm": 1.648654818534851, "learning_rate": 2.0871134211723417e-06, "loss": 0.23788021504878998, "step": 2360 }, { "epoch": 2.884004884004884, "grad_norm": 1.9093987941741943, "learning_rate": 2.0835995123034603e-06, "loss": 0.32568857073783875, "step": 2362 }, { "epoch": 2.8864468864468864, "grad_norm": 1.0956945419311523, "learning_rate": 2.0801576099054696e-06, "loss": 0.6228987574577332, "step": 2364 }, { "epoch": 2.888888888888889, "grad_norm": 0.992882490158081, "learning_rate": 2.0767877417742564e-06, "loss": 0.39544668793678284, "step": 2366 }, { "epoch": 2.8913308913308913, "grad_norm": 4.350165367126465, "learning_rate": 2.0734899351239744e-06, "loss": 0.3747745156288147, "step": 2368 }, { "epoch": 2.8937728937728937, "grad_norm": 1.0189871788024902, "learning_rate": 2.0702642165868326e-06, "loss": 0.3083977997303009, "step": 2370 }, { "epoch": 2.896214896214896, "grad_norm": 1.012895107269287, "learning_rate": 2.0671106122128717e-06, "loss": 0.388817697763443, "step": 2372 }, { "epoch": 2.8986568986568986, "grad_norm": 0.2986360788345337, "learning_rate": 2.064029147469759e-06, "loss": 0.3050660490989685, "step": 2374 }, { "epoch": 2.901098901098901, "grad_norm": 3.99959397315979, "learning_rate": 2.0610198472425817e-06, "loss": 0.42830216884613037, "step": 2376 }, { "epoch": 2.9035409035409034, "grad_norm": 0.9284391403198242, "learning_rate": 2.0580827358336447e-06, "loss": 0.4124550223350525, "step": 2378 }, { "epoch": 2.905982905982906, "grad_norm": 1.0101871490478516, "learning_rate": 2.055217836962276e-06, "loss": 0.34032320976257324, "step": 2380 }, { "epoch": 2.9084249084249083, "grad_norm": 2.9604995250701904, "learning_rate": 2.0524251737646367e-06, "loss": 0.5842119455337524, "step": 2382 }, { "epoch": 2.9108669108669107, "grad_norm": 1.806335687637329, "learning_rate": 2.049704768793527e-06, "loss": 0.308889776468277, "step": 2384 }, { "epoch": 2.913308913308913, "grad_norm": 1.2805176973342896, "learning_rate": 2.0470566440182126e-06, "loss": 0.736882746219635, "step": 2386 }, { "epoch": 2.9157509157509156, "grad_norm": 1.484055995941162, "learning_rate": 2.0444808208242414e-06, "loss": 0.3669341504573822, "step": 2388 }, { "epoch": 2.918192918192918, "grad_norm": 2.3404009342193604, "learning_rate": 2.041977320013275e-06, "loss": 0.303989052772522, "step": 2390 }, { "epoch": 2.9206349206349205, "grad_norm": 4.0918097496032715, "learning_rate": 2.0395461618029175e-06, "loss": 0.4449572265148163, "step": 2392 }, { "epoch": 2.9230769230769234, "grad_norm": 1.0222722291946411, "learning_rate": 2.0371873658265546e-06, "loss": 0.31565719842910767, "step": 2394 }, { "epoch": 2.925518925518926, "grad_norm": 1.7059550285339355, "learning_rate": 2.0349009511331912e-06, "loss": 0.24595557153224945, "step": 2396 }, { "epoch": 2.927960927960928, "grad_norm": 3.1395483016967773, "learning_rate": 2.032686936187305e-06, "loss": 0.30839934945106506, "step": 2398 }, { "epoch": 2.9304029304029307, "grad_norm": 2.6876676082611084, "learning_rate": 2.0305453388686876e-06, "loss": 0.32078707218170166, "step": 2400 }, { "epoch": 2.932844932844933, "grad_norm": 1.2002466917037964, "learning_rate": 2.0284761764723087e-06, "loss": 0.27718839049339294, "step": 2402 }, { "epoch": 2.9352869352869355, "grad_norm": 1.524316668510437, "learning_rate": 2.026479465708171e-06, "loss": 0.18042829632759094, "step": 2404 }, { "epoch": 2.937728937728938, "grad_norm": 2.9133212566375732, "learning_rate": 2.0245552227011777e-06, "loss": 0.5652621984481812, "step": 2406 }, { "epoch": 2.9401709401709404, "grad_norm": 1.6217875480651855, "learning_rate": 2.022703462991003e-06, "loss": 0.28077784180641174, "step": 2408 }, { "epoch": 2.942612942612943, "grad_norm": 0.957901656627655, "learning_rate": 2.0209242015319625e-06, "loss": 0.312043696641922, "step": 2410 }, { "epoch": 2.9450549450549453, "grad_norm": 0.7723997235298157, "learning_rate": 2.0192174526928982e-06, "loss": 0.42037639021873474, "step": 2412 }, { "epoch": 2.9474969474969477, "grad_norm": 1.0776695013046265, "learning_rate": 2.0175832302570575e-06, "loss": 0.5173778533935547, "step": 2414 }, { "epoch": 2.94993894993895, "grad_norm": 0.926655650138855, "learning_rate": 2.016021547421984e-06, "loss": 0.46436506509780884, "step": 2416 }, { "epoch": 2.9523809523809526, "grad_norm": 1.5396034717559814, "learning_rate": 2.0145324167994134e-06, "loss": 0.24875374138355255, "step": 2418 }, { "epoch": 2.954822954822955, "grad_norm": 1.1180499792099, "learning_rate": 2.0131158504151655e-06, "loss": 0.35978463292121887, "step": 2420 }, { "epoch": 2.9572649572649574, "grad_norm": 0.9617190957069397, "learning_rate": 2.0117718597090543e-06, "loss": 0.3947286605834961, "step": 2422 }, { "epoch": 2.95970695970696, "grad_norm": 1.0433496236801147, "learning_rate": 2.010500455534788e-06, "loss": 0.28263401985168457, "step": 2424 }, { "epoch": 2.9621489621489623, "grad_norm": 1.070198893547058, "learning_rate": 2.0093016481598885e-06, "loss": 0.5800071954727173, "step": 2426 }, { "epoch": 2.9645909645909647, "grad_norm": 3.0985279083251953, "learning_rate": 2.0081754472656034e-06, "loss": 0.1977805346250534, "step": 2428 }, { "epoch": 2.967032967032967, "grad_norm": 1.0906306505203247, "learning_rate": 2.0071218619468327e-06, "loss": 0.3762721121311188, "step": 2430 }, { "epoch": 2.9694749694749696, "grad_norm": 0.7913962602615356, "learning_rate": 2.0061409007120475e-06, "loss": 0.3768196403980255, "step": 2432 }, { "epoch": 2.971916971916972, "grad_norm": 1.3056226968765259, "learning_rate": 2.005232571483231e-06, "loss": 0.46781641244888306, "step": 2434 }, { "epoch": 2.9743589743589745, "grad_norm": 1.005242109298706, "learning_rate": 2.0043968815958075e-06, "loss": 0.25440388917922974, "step": 2436 }, { "epoch": 2.976800976800977, "grad_norm": 3.3108999729156494, "learning_rate": 2.003633837798584e-06, "loss": 0.12983591854572296, "step": 2438 }, { "epoch": 2.9792429792429793, "grad_norm": 1.743328332901001, "learning_rate": 2.0029434462537e-06, "loss": 0.43715769052505493, "step": 2440 }, { "epoch": 2.9816849816849818, "grad_norm": 1.05440092086792, "learning_rate": 2.002325712536572e-06, "loss": 0.4317605495452881, "step": 2442 }, { "epoch": 2.984126984126984, "grad_norm": 2.774752616882324, "learning_rate": 2.001780641635854e-06, "loss": 0.39571458101272583, "step": 2444 }, { "epoch": 2.9865689865689866, "grad_norm": 1.0296354293823242, "learning_rate": 2.001308237953393e-06, "loss": 0.4417667090892792, "step": 2446 }, { "epoch": 2.989010989010989, "grad_norm": 1.3123754262924194, "learning_rate": 2.000908505304195e-06, "loss": 0.5195387601852417, "step": 2448 }, { "epoch": 2.9914529914529915, "grad_norm": 2.177339553833008, "learning_rate": 2.0005814469163937e-06, "loss": 0.19710102677345276, "step": 2450 }, { "epoch": 2.993894993894994, "grad_norm": 1.543820858001709, "learning_rate": 2.0003270654312266e-06, "loss": 0.4630212187767029, "step": 2452 }, { "epoch": 2.9963369963369964, "grad_norm": 1.5547709465026855, "learning_rate": 2.000145362903009e-06, "loss": 0.6292054057121277, "step": 2454 }, { "epoch": 2.998778998778999, "grad_norm": 0.33734217286109924, "learning_rate": 2.0000363407991222e-06, "loss": 0.16045792400836945, "step": 2456 }, { "epoch": 3.0, "step": 2457, "total_flos": 2.578606960937009e+18, "train_loss": 0.8228938954362648, "train_runtime": 8271.5959, "train_samples_per_second": 4.753, "train_steps_per_second": 0.297 } ], "logging_steps": 2, "max_steps": 2457, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.578606960937009e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }