{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 42880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0023320895522388058, "grad_norm": 2.2295329380712103, "learning_rate": 4.6641791044776116e-07, "loss": 0.8501, "step": 5 }, { "epoch": 0.0046641791044776115, "grad_norm": 2.076003633520795, "learning_rate": 9.328358208955223e-07, "loss": 0.8447, "step": 10 }, { "epoch": 0.006996268656716418, "grad_norm": 1.6931098442538834, "learning_rate": 1.3992537313432837e-06, "loss": 0.8637, "step": 15 }, { "epoch": 0.009328358208955223, "grad_norm": 1.2199856677414933, "learning_rate": 1.8656716417910446e-06, "loss": 0.8254, "step": 20 }, { "epoch": 0.01166044776119403, "grad_norm": 1.1645759707013204, "learning_rate": 2.3320895522388064e-06, "loss": 0.8174, "step": 25 }, { "epoch": 0.013992537313432836, "grad_norm": 0.9986660790082118, "learning_rate": 2.7985074626865674e-06, "loss": 0.7811, "step": 30 }, { "epoch": 0.01632462686567164, "grad_norm": 0.7134449734517674, "learning_rate": 3.2649253731343283e-06, "loss": 0.7694, "step": 35 }, { "epoch": 0.018656716417910446, "grad_norm": 0.6939871764718141, "learning_rate": 3.7313432835820893e-06, "loss": 0.7488, "step": 40 }, { "epoch": 0.020988805970149255, "grad_norm": 0.634235550379677, "learning_rate": 4.1977611940298515e-06, "loss": 0.7372, "step": 45 }, { "epoch": 0.02332089552238806, "grad_norm": 0.5048500892750544, "learning_rate": 4.664179104477613e-06, "loss": 0.7061, "step": 50 }, { "epoch": 0.025652985074626867, "grad_norm": 0.47513866531921245, "learning_rate": 5.130597014925373e-06, "loss": 0.6963, "step": 55 }, { "epoch": 0.027985074626865673, "grad_norm": 0.47798146587613805, "learning_rate": 5.597014925373135e-06, "loss": 0.6919, "step": 60 }, { "epoch": 0.03031716417910448, "grad_norm": 0.46778253813722226, "learning_rate": 6.063432835820896e-06, "loss": 0.6782, "step": 65 }, { "epoch": 0.03264925373134328, "grad_norm": 0.4337173900972249, "learning_rate": 6.529850746268657e-06, "loss": 0.6499, "step": 70 }, { "epoch": 0.034981343283582086, "grad_norm": 0.42556390514120546, "learning_rate": 6.996268656716418e-06, "loss": 0.6545, "step": 75 }, { "epoch": 0.03731343283582089, "grad_norm": 0.43242960976295775, "learning_rate": 7.4626865671641785e-06, "loss": 0.6602, "step": 80 }, { "epoch": 0.039645522388059705, "grad_norm": 0.3996448525480252, "learning_rate": 7.92910447761194e-06, "loss": 0.6528, "step": 85 }, { "epoch": 0.04197761194029851, "grad_norm": 0.4483198810815304, "learning_rate": 8.395522388059703e-06, "loss": 0.6509, "step": 90 }, { "epoch": 0.044309701492537316, "grad_norm": 0.4590559809790449, "learning_rate": 8.861940298507463e-06, "loss": 0.6361, "step": 95 }, { "epoch": 0.04664179104477612, "grad_norm": 0.45042646355317134, "learning_rate": 9.328358208955226e-06, "loss": 0.6292, "step": 100 }, { "epoch": 0.04897388059701493, "grad_norm": 0.4422215234464473, "learning_rate": 9.794776119402986e-06, "loss": 0.6551, "step": 105 }, { "epoch": 0.051305970149253734, "grad_norm": 0.4146839065005811, "learning_rate": 1.0261194029850747e-05, "loss": 0.6478, "step": 110 }, { "epoch": 0.05363805970149254, "grad_norm": 0.46535328810266424, "learning_rate": 1.0727611940298509e-05, "loss": 0.6469, "step": 115 }, { "epoch": 0.055970149253731345, "grad_norm": 0.44429545207074567, "learning_rate": 1.119402985074627e-05, "loss": 0.6405, "step": 120 }, { "epoch": 0.05830223880597015, "grad_norm": 0.4672546211423811, "learning_rate": 1.166044776119403e-05, "loss": 0.6226, "step": 125 }, { "epoch": 0.06063432835820896, "grad_norm": 0.42932384206154806, "learning_rate": 1.2126865671641792e-05, "loss": 0.6209, "step": 130 }, { "epoch": 0.06296641791044776, "grad_norm": 0.5080229335382829, "learning_rate": 1.2593283582089551e-05, "loss": 0.6655, "step": 135 }, { "epoch": 0.06529850746268656, "grad_norm": 0.4660099606845085, "learning_rate": 1.3059701492537313e-05, "loss": 0.6361, "step": 140 }, { "epoch": 0.06763059701492537, "grad_norm": 0.495400455736784, "learning_rate": 1.3526119402985074e-05, "loss": 0.6197, "step": 145 }, { "epoch": 0.06996268656716417, "grad_norm": 0.4962205841934173, "learning_rate": 1.3992537313432836e-05, "loss": 0.6351, "step": 150 }, { "epoch": 0.07229477611940298, "grad_norm": 0.4342314310453569, "learning_rate": 1.4458955223880596e-05, "loss": 0.6201, "step": 155 }, { "epoch": 0.07462686567164178, "grad_norm": 0.42555461801622013, "learning_rate": 1.4925373134328357e-05, "loss": 0.6234, "step": 160 }, { "epoch": 0.07695895522388059, "grad_norm": 0.4425553400877895, "learning_rate": 1.539179104477612e-05, "loss": 0.6236, "step": 165 }, { "epoch": 0.07929104477611941, "grad_norm": 0.472955390099434, "learning_rate": 1.585820895522388e-05, "loss": 0.6035, "step": 170 }, { "epoch": 0.08162313432835822, "grad_norm": 0.4809394371753047, "learning_rate": 1.6324626865671644e-05, "loss": 0.5938, "step": 175 }, { "epoch": 0.08395522388059702, "grad_norm": 0.45985081046453197, "learning_rate": 1.6791044776119406e-05, "loss": 0.6112, "step": 180 }, { "epoch": 0.08628731343283583, "grad_norm": 0.5128989915868833, "learning_rate": 1.7257462686567165e-05, "loss": 0.6067, "step": 185 }, { "epoch": 0.08861940298507463, "grad_norm": 0.5061502876856064, "learning_rate": 1.7723880597014927e-05, "loss": 0.6029, "step": 190 }, { "epoch": 0.09095149253731344, "grad_norm": 0.47723123689915764, "learning_rate": 1.819029850746269e-05, "loss": 0.6263, "step": 195 }, { "epoch": 0.09328358208955224, "grad_norm": 0.47008494350531754, "learning_rate": 1.865671641791045e-05, "loss": 0.6091, "step": 200 }, { "epoch": 0.09561567164179105, "grad_norm": 0.4960314457530383, "learning_rate": 1.912313432835821e-05, "loss": 0.6254, "step": 205 }, { "epoch": 0.09794776119402986, "grad_norm": 0.5030531556186032, "learning_rate": 1.9589552238805972e-05, "loss": 0.6195, "step": 210 }, { "epoch": 0.10027985074626866, "grad_norm": 0.5074494000055438, "learning_rate": 2.0055970149253735e-05, "loss": 0.6131, "step": 215 }, { "epoch": 0.10261194029850747, "grad_norm": 0.5026103692955317, "learning_rate": 2.0522388059701493e-05, "loss": 0.6053, "step": 220 }, { "epoch": 0.10494402985074627, "grad_norm": 0.4952198361114178, "learning_rate": 2.0988805970149256e-05, "loss": 0.6031, "step": 225 }, { "epoch": 0.10727611940298508, "grad_norm": 0.45753939967656854, "learning_rate": 2.1455223880597018e-05, "loss": 0.5953, "step": 230 }, { "epoch": 0.10960820895522388, "grad_norm": 0.4946436938458718, "learning_rate": 2.1921641791044777e-05, "loss": 0.5842, "step": 235 }, { "epoch": 0.11194029850746269, "grad_norm": 0.53112229556647, "learning_rate": 2.238805970149254e-05, "loss": 0.6296, "step": 240 }, { "epoch": 0.1142723880597015, "grad_norm": 0.5195471733662039, "learning_rate": 2.28544776119403e-05, "loss": 0.6115, "step": 245 }, { "epoch": 0.1166044776119403, "grad_norm": 0.46145825716317784, "learning_rate": 2.332089552238806e-05, "loss": 0.5963, "step": 250 }, { "epoch": 0.11893656716417911, "grad_norm": 0.5161160634034617, "learning_rate": 2.3787313432835822e-05, "loss": 0.5922, "step": 255 }, { "epoch": 0.12126865671641791, "grad_norm": 0.5163207309277418, "learning_rate": 2.4253731343283584e-05, "loss": 0.6003, "step": 260 }, { "epoch": 0.12360074626865672, "grad_norm": 0.515461602959512, "learning_rate": 2.4720149253731347e-05, "loss": 0.5974, "step": 265 }, { "epoch": 0.1259328358208955, "grad_norm": 0.5661264267602301, "learning_rate": 2.5186567164179102e-05, "loss": 0.5949, "step": 270 }, { "epoch": 0.12826492537313433, "grad_norm": 0.5237279820968304, "learning_rate": 2.5652985074626868e-05, "loss": 0.5917, "step": 275 }, { "epoch": 0.13059701492537312, "grad_norm": 0.5029081329751228, "learning_rate": 2.6119402985074626e-05, "loss": 0.6035, "step": 280 }, { "epoch": 0.13292910447761194, "grad_norm": 0.5222101178561755, "learning_rate": 2.658582089552239e-05, "loss": 0.5985, "step": 285 }, { "epoch": 0.13526119402985073, "grad_norm": 0.5203469486301953, "learning_rate": 2.7052238805970147e-05, "loss": 0.6031, "step": 290 }, { "epoch": 0.13759328358208955, "grad_norm": 0.5159066130648681, "learning_rate": 2.7518656716417913e-05, "loss": 0.5812, "step": 295 }, { "epoch": 0.13992537313432835, "grad_norm": 0.5427720067953925, "learning_rate": 2.7985074626865672e-05, "loss": 0.5879, "step": 300 }, { "epoch": 0.14225746268656717, "grad_norm": 0.5160495844776948, "learning_rate": 2.8451492537313434e-05, "loss": 0.5776, "step": 305 }, { "epoch": 0.14458955223880596, "grad_norm": 0.5822141320258251, "learning_rate": 2.8917910447761193e-05, "loss": 0.6006, "step": 310 }, { "epoch": 0.14692164179104478, "grad_norm": 0.5087791828023077, "learning_rate": 2.9384328358208955e-05, "loss": 0.6039, "step": 315 }, { "epoch": 0.14925373134328357, "grad_norm": 0.5541301178131879, "learning_rate": 2.9850746268656714e-05, "loss": 0.5904, "step": 320 }, { "epoch": 0.1515858208955224, "grad_norm": 0.4602568467684823, "learning_rate": 3.031716417910448e-05, "loss": 0.5911, "step": 325 }, { "epoch": 0.15391791044776118, "grad_norm": 0.48388507286961585, "learning_rate": 3.078358208955224e-05, "loss": 0.5938, "step": 330 }, { "epoch": 0.15625, "grad_norm": 0.5612358034324315, "learning_rate": 3.125e-05, "loss": 0.5915, "step": 335 }, { "epoch": 0.15858208955223882, "grad_norm": 0.5188139815530584, "learning_rate": 3.171641791044776e-05, "loss": 0.6149, "step": 340 }, { "epoch": 0.1609141791044776, "grad_norm": 0.509099940413524, "learning_rate": 3.2182835820895525e-05, "loss": 0.5979, "step": 345 }, { "epoch": 0.16324626865671643, "grad_norm": 0.5074329245083399, "learning_rate": 3.264925373134329e-05, "loss": 0.5971, "step": 350 }, { "epoch": 0.16557835820895522, "grad_norm": 0.570510926509384, "learning_rate": 3.311567164179105e-05, "loss": 0.5904, "step": 355 }, { "epoch": 0.16791044776119404, "grad_norm": 0.5326284625656521, "learning_rate": 3.358208955223881e-05, "loss": 0.5965, "step": 360 }, { "epoch": 0.17024253731343283, "grad_norm": 0.5109884076312404, "learning_rate": 3.404850746268657e-05, "loss": 0.591, "step": 365 }, { "epoch": 0.17257462686567165, "grad_norm": 0.4741394953145448, "learning_rate": 3.451492537313433e-05, "loss": 0.582, "step": 370 }, { "epoch": 0.17490671641791045, "grad_norm": 0.6050725674218901, "learning_rate": 3.498134328358209e-05, "loss": 0.5742, "step": 375 }, { "epoch": 0.17723880597014927, "grad_norm": 0.5230144972448404, "learning_rate": 3.5447761194029854e-05, "loss": 0.5849, "step": 380 }, { "epoch": 0.17957089552238806, "grad_norm": 0.48073124926504845, "learning_rate": 3.5914179104477616e-05, "loss": 0.5733, "step": 385 }, { "epoch": 0.18190298507462688, "grad_norm": 0.5500233403806325, "learning_rate": 3.638059701492538e-05, "loss": 0.587, "step": 390 }, { "epoch": 0.18423507462686567, "grad_norm": 0.43955225616530763, "learning_rate": 3.6847014925373134e-05, "loss": 0.5679, "step": 395 }, { "epoch": 0.1865671641791045, "grad_norm": 0.5478378915926606, "learning_rate": 3.73134328358209e-05, "loss": 0.5929, "step": 400 }, { "epoch": 0.18889925373134328, "grad_norm": 0.5120045573095398, "learning_rate": 3.777985074626866e-05, "loss": 0.5918, "step": 405 }, { "epoch": 0.1912313432835821, "grad_norm": 0.5104230024559651, "learning_rate": 3.824626865671642e-05, "loss": 0.5971, "step": 410 }, { "epoch": 0.1935634328358209, "grad_norm": 0.49704091288274654, "learning_rate": 3.871268656716418e-05, "loss": 0.5788, "step": 415 }, { "epoch": 0.1958955223880597, "grad_norm": 0.6106308270800648, "learning_rate": 3.9179104477611945e-05, "loss": 0.5935, "step": 420 }, { "epoch": 0.1982276119402985, "grad_norm": 0.5263574845840019, "learning_rate": 3.96455223880597e-05, "loss": 0.5659, "step": 425 }, { "epoch": 0.20055970149253732, "grad_norm": 0.4885734431665078, "learning_rate": 4.011194029850747e-05, "loss": 0.6024, "step": 430 }, { "epoch": 0.20289179104477612, "grad_norm": 0.5645574220780547, "learning_rate": 4.0578358208955225e-05, "loss": 0.5875, "step": 435 }, { "epoch": 0.20522388059701493, "grad_norm": 0.48875996533864763, "learning_rate": 4.104477611940299e-05, "loss": 0.5728, "step": 440 }, { "epoch": 0.20755597014925373, "grad_norm": 0.4841538383903449, "learning_rate": 4.151119402985075e-05, "loss": 0.5633, "step": 445 }, { "epoch": 0.20988805970149255, "grad_norm": 0.469724963071256, "learning_rate": 4.197761194029851e-05, "loss": 0.5825, "step": 450 }, { "epoch": 0.21222014925373134, "grad_norm": 0.4970754876466534, "learning_rate": 4.244402985074627e-05, "loss": 0.581, "step": 455 }, { "epoch": 0.21455223880597016, "grad_norm": 0.47575387598595414, "learning_rate": 4.2910447761194036e-05, "loss": 0.584, "step": 460 }, { "epoch": 0.21688432835820895, "grad_norm": 0.5036987391450833, "learning_rate": 4.337686567164179e-05, "loss": 0.5959, "step": 465 }, { "epoch": 0.21921641791044777, "grad_norm": 0.5368498125244545, "learning_rate": 4.384328358208955e-05, "loss": 0.5827, "step": 470 }, { "epoch": 0.22154850746268656, "grad_norm": 0.5327193645804722, "learning_rate": 4.4309701492537316e-05, "loss": 0.5892, "step": 475 }, { "epoch": 0.22388059701492538, "grad_norm": 0.46877641201938425, "learning_rate": 4.477611940298508e-05, "loss": 0.5882, "step": 480 }, { "epoch": 0.22621268656716417, "grad_norm": 0.6248399116032307, "learning_rate": 4.524253731343284e-05, "loss": 0.6064, "step": 485 }, { "epoch": 0.228544776119403, "grad_norm": 0.43461058898712157, "learning_rate": 4.57089552238806e-05, "loss": 0.5404, "step": 490 }, { "epoch": 0.23087686567164178, "grad_norm": 0.46868138061617515, "learning_rate": 4.617537313432836e-05, "loss": 0.5866, "step": 495 }, { "epoch": 0.2332089552238806, "grad_norm": 0.4832877005784685, "learning_rate": 4.664179104477612e-05, "loss": 0.5613, "step": 500 }, { "epoch": 0.2355410447761194, "grad_norm": 0.4558074476951364, "learning_rate": 4.710820895522388e-05, "loss": 0.5671, "step": 505 }, { "epoch": 0.23787313432835822, "grad_norm": 0.5366670664425746, "learning_rate": 4.7574626865671644e-05, "loss": 0.5766, "step": 510 }, { "epoch": 0.240205223880597, "grad_norm": 0.4542966753786887, "learning_rate": 4.8041044776119407e-05, "loss": 0.58, "step": 515 }, { "epoch": 0.24253731343283583, "grad_norm": 0.48987918147478154, "learning_rate": 4.850746268656717e-05, "loss": 0.5816, "step": 520 }, { "epoch": 0.24486940298507462, "grad_norm": 0.5163300706677894, "learning_rate": 4.8973880597014924e-05, "loss": 0.5666, "step": 525 }, { "epoch": 0.24720149253731344, "grad_norm": 0.4927091604878387, "learning_rate": 4.944029850746269e-05, "loss": 0.574, "step": 530 }, { "epoch": 0.24953358208955223, "grad_norm": 0.49169368786047923, "learning_rate": 4.990671641791045e-05, "loss": 0.575, "step": 535 }, { "epoch": 0.251865671641791, "grad_norm": 0.5498088444156006, "learning_rate": 4.9999982870865717e-05, "loss": 0.5624, "step": 540 }, { "epoch": 0.25419776119402987, "grad_norm": 0.4947811396796278, "learning_rate": 4.999991328380238e-05, "loss": 0.576, "step": 545 }, { "epoch": 0.25652985074626866, "grad_norm": 0.5116626322425373, "learning_rate": 4.999979016840452e-05, "loss": 0.5702, "step": 550 }, { "epoch": 0.25886194029850745, "grad_norm": 0.5418657892507784, "learning_rate": 4.999961352496503e-05, "loss": 0.5643, "step": 555 }, { "epoch": 0.26119402985074625, "grad_norm": 0.45605282350976517, "learning_rate": 4.9999383353904156e-05, "loss": 0.5894, "step": 560 }, { "epoch": 0.2635261194029851, "grad_norm": 0.4425730413616792, "learning_rate": 4.999909965576949e-05, "loss": 0.5783, "step": 565 }, { "epoch": 0.2658582089552239, "grad_norm": 0.4091575791592399, "learning_rate": 4.9998762431235955e-05, "loss": 0.584, "step": 570 }, { "epoch": 0.2681902985074627, "grad_norm": 0.4694658550972048, "learning_rate": 4.999837168110584e-05, "loss": 0.563, "step": 575 }, { "epoch": 0.27052238805970147, "grad_norm": 0.44268221210479863, "learning_rate": 4.999792740630874e-05, "loss": 0.5679, "step": 580 }, { "epoch": 0.2728544776119403, "grad_norm": 0.434972756462237, "learning_rate": 4.999742960790161e-05, "loss": 0.5681, "step": 585 }, { "epoch": 0.2751865671641791, "grad_norm": 0.4263728295406491, "learning_rate": 4.999687828706874e-05, "loss": 0.5508, "step": 590 }, { "epoch": 0.2775186567164179, "grad_norm": 0.44992597479158, "learning_rate": 4.9996273445121744e-05, "loss": 0.5714, "step": 595 }, { "epoch": 0.2798507462686567, "grad_norm": 0.5464963868412782, "learning_rate": 4.999561508349957e-05, "loss": 0.5798, "step": 600 }, { "epoch": 0.28218283582089554, "grad_norm": 0.4036339704804201, "learning_rate": 4.9994903203768486e-05, "loss": 0.5471, "step": 605 }, { "epoch": 0.28451492537313433, "grad_norm": 0.5223581157481912, "learning_rate": 4.99941378076221e-05, "loss": 0.572, "step": 610 }, { "epoch": 0.2868470149253731, "grad_norm": 0.4412008973823726, "learning_rate": 4.999331889688131e-05, "loss": 0.5791, "step": 615 }, { "epoch": 0.2891791044776119, "grad_norm": 0.5086061777921279, "learning_rate": 4.999244647349435e-05, "loss": 0.5561, "step": 620 }, { "epoch": 0.29151119402985076, "grad_norm": 0.4164356940793962, "learning_rate": 4.999152053953675e-05, "loss": 0.5582, "step": 625 }, { "epoch": 0.29384328358208955, "grad_norm": 0.39593864037351323, "learning_rate": 4.999054109721136e-05, "loss": 0.5812, "step": 630 }, { "epoch": 0.29617537313432835, "grad_norm": 0.41529828168523664, "learning_rate": 4.9989508148848315e-05, "loss": 0.585, "step": 635 }, { "epoch": 0.29850746268656714, "grad_norm": 0.41401622682171924, "learning_rate": 4.998842169690504e-05, "loss": 0.576, "step": 640 }, { "epoch": 0.300839552238806, "grad_norm": 0.4402628536417918, "learning_rate": 4.998728174396626e-05, "loss": 0.5606, "step": 645 }, { "epoch": 0.3031716417910448, "grad_norm": 0.5058837360616752, "learning_rate": 4.998608829274398e-05, "loss": 0.569, "step": 650 }, { "epoch": 0.30550373134328357, "grad_norm": 0.5244539263641883, "learning_rate": 4.998484134607746e-05, "loss": 0.5729, "step": 655 }, { "epoch": 0.30783582089552236, "grad_norm": 0.4201765669794548, "learning_rate": 4.998354090693326e-05, "loss": 0.5717, "step": 660 }, { "epoch": 0.3101679104477612, "grad_norm": 0.41587213936098155, "learning_rate": 4.9982186978405175e-05, "loss": 0.556, "step": 665 }, { "epoch": 0.3125, "grad_norm": 0.3923261607162458, "learning_rate": 4.9980779563714274e-05, "loss": 0.5781, "step": 670 }, { "epoch": 0.3148320895522388, "grad_norm": 0.4353288420164073, "learning_rate": 4.9979318666208855e-05, "loss": 0.5473, "step": 675 }, { "epoch": 0.31716417910447764, "grad_norm": 0.3759454813104066, "learning_rate": 4.997780428936446e-05, "loss": 0.5573, "step": 680 }, { "epoch": 0.31949626865671643, "grad_norm": 0.41734883044163185, "learning_rate": 4.9976236436783865e-05, "loss": 0.5775, "step": 685 }, { "epoch": 0.3218283582089552, "grad_norm": 0.42610361209989317, "learning_rate": 4.997461511219705e-05, "loss": 0.5571, "step": 690 }, { "epoch": 0.324160447761194, "grad_norm": 0.4521159293396472, "learning_rate": 4.997294031946124e-05, "loss": 0.5746, "step": 695 }, { "epoch": 0.32649253731343286, "grad_norm": 0.41624558419427676, "learning_rate": 4.9971212062560844e-05, "loss": 0.552, "step": 700 }, { "epoch": 0.32882462686567165, "grad_norm": 0.41064404200415605, "learning_rate": 4.9969430345607445e-05, "loss": 0.559, "step": 705 }, { "epoch": 0.33115671641791045, "grad_norm": 0.45565570121911403, "learning_rate": 4.996759517283986e-05, "loss": 0.5552, "step": 710 }, { "epoch": 0.33348880597014924, "grad_norm": 0.42912644318681875, "learning_rate": 4.996570654862402e-05, "loss": 0.5815, "step": 715 }, { "epoch": 0.3358208955223881, "grad_norm": 0.41115213549546004, "learning_rate": 4.996376447745307e-05, "loss": 0.5679, "step": 720 }, { "epoch": 0.3381529850746269, "grad_norm": 0.40570892089654226, "learning_rate": 4.996176896394728e-05, "loss": 0.5636, "step": 725 }, { "epoch": 0.34048507462686567, "grad_norm": 0.41301208062222433, "learning_rate": 4.995972001285406e-05, "loss": 0.5741, "step": 730 }, { "epoch": 0.34281716417910446, "grad_norm": 0.3806697117708081, "learning_rate": 4.995761762904797e-05, "loss": 0.5628, "step": 735 }, { "epoch": 0.3451492537313433, "grad_norm": 0.4492848161350124, "learning_rate": 4.995546181753069e-05, "loss": 0.5784, "step": 740 }, { "epoch": 0.3474813432835821, "grad_norm": 0.40705609725571307, "learning_rate": 4.9953252583430965e-05, "loss": 0.5526, "step": 745 }, { "epoch": 0.3498134328358209, "grad_norm": 0.3971807082040081, "learning_rate": 4.9950989932004684e-05, "loss": 0.5631, "step": 750 }, { "epoch": 0.3521455223880597, "grad_norm": 0.41199147527442126, "learning_rate": 4.9948673868634806e-05, "loss": 0.558, "step": 755 }, { "epoch": 0.35447761194029853, "grad_norm": 0.41147804012481887, "learning_rate": 4.9946304398831336e-05, "loss": 0.5554, "step": 760 }, { "epoch": 0.3568097014925373, "grad_norm": 0.38159821955380585, "learning_rate": 4.9943881528231365e-05, "loss": 0.557, "step": 765 }, { "epoch": 0.3591417910447761, "grad_norm": 0.4134491620413654, "learning_rate": 4.994140526259901e-05, "loss": 0.5536, "step": 770 }, { "epoch": 0.3614738805970149, "grad_norm": 0.46065068700959344, "learning_rate": 4.993887560782541e-05, "loss": 0.5557, "step": 775 }, { "epoch": 0.36380597014925375, "grad_norm": 0.44111664551891794, "learning_rate": 4.993629256992876e-05, "loss": 0.5632, "step": 780 }, { "epoch": 0.36613805970149255, "grad_norm": 0.38242083999278226, "learning_rate": 4.99336561550542e-05, "loss": 0.5522, "step": 785 }, { "epoch": 0.36847014925373134, "grad_norm": 0.3960239963198183, "learning_rate": 4.993096636947389e-05, "loss": 0.5785, "step": 790 }, { "epoch": 0.37080223880597013, "grad_norm": 0.4392834730710291, "learning_rate": 4.992822321958695e-05, "loss": 0.5448, "step": 795 }, { "epoch": 0.373134328358209, "grad_norm": 0.4236495892352232, "learning_rate": 4.992542671191948e-05, "loss": 0.5691, "step": 800 }, { "epoch": 0.37546641791044777, "grad_norm": 0.4023321178572999, "learning_rate": 4.992257685312448e-05, "loss": 0.559, "step": 805 }, { "epoch": 0.37779850746268656, "grad_norm": 0.4520138826986025, "learning_rate": 4.991967364998191e-05, "loss": 0.5605, "step": 810 }, { "epoch": 0.38013059701492535, "grad_norm": 0.4255435712890787, "learning_rate": 4.991671710939861e-05, "loss": 0.5489, "step": 815 }, { "epoch": 0.3824626865671642, "grad_norm": 0.3756742774439683, "learning_rate": 4.991370723840834e-05, "loss": 0.5409, "step": 820 }, { "epoch": 0.384794776119403, "grad_norm": 0.4208899259681843, "learning_rate": 4.9910644044171714e-05, "loss": 0.5506, "step": 825 }, { "epoch": 0.3871268656716418, "grad_norm": 0.46765402860212946, "learning_rate": 4.9907527533976214e-05, "loss": 0.5614, "step": 830 }, { "epoch": 0.3894589552238806, "grad_norm": 0.418915156201997, "learning_rate": 4.9904357715236164e-05, "loss": 0.5643, "step": 835 }, { "epoch": 0.3917910447761194, "grad_norm": 0.40003446505434603, "learning_rate": 4.990113459549271e-05, "loss": 0.558, "step": 840 }, { "epoch": 0.3941231343283582, "grad_norm": 0.4138597952328836, "learning_rate": 4.98978581824138e-05, "loss": 0.5678, "step": 845 }, { "epoch": 0.396455223880597, "grad_norm": 0.43334983532124594, "learning_rate": 4.9894528483794175e-05, "loss": 0.5611, "step": 850 }, { "epoch": 0.3987873134328358, "grad_norm": 0.36835058809576865, "learning_rate": 4.9891145507555346e-05, "loss": 0.565, "step": 855 }, { "epoch": 0.40111940298507465, "grad_norm": 0.384808108418171, "learning_rate": 4.9887709261745566e-05, "loss": 0.5577, "step": 860 }, { "epoch": 0.40345149253731344, "grad_norm": 0.4015689018193905, "learning_rate": 4.988421975453982e-05, "loss": 0.5689, "step": 865 }, { "epoch": 0.40578358208955223, "grad_norm": 0.4397180108747486, "learning_rate": 4.9880676994239805e-05, "loss": 0.5609, "step": 870 }, { "epoch": 0.408115671641791, "grad_norm": 0.4206072707120865, "learning_rate": 4.9877080989273925e-05, "loss": 0.5732, "step": 875 }, { "epoch": 0.41044776119402987, "grad_norm": 0.43715797665945394, "learning_rate": 4.987343174819723e-05, "loss": 0.5417, "step": 880 }, { "epoch": 0.41277985074626866, "grad_norm": 0.4047779753136153, "learning_rate": 4.9869729279691425e-05, "loss": 0.5408, "step": 885 }, { "epoch": 0.41511194029850745, "grad_norm": 0.3854994757250592, "learning_rate": 4.9865973592564876e-05, "loss": 0.5702, "step": 890 }, { "epoch": 0.41744402985074625, "grad_norm": 0.4003343884313177, "learning_rate": 4.9862164695752524e-05, "loss": 0.5565, "step": 895 }, { "epoch": 0.4197761194029851, "grad_norm": 0.43212414161632823, "learning_rate": 4.98583025983159e-05, "loss": 0.5591, "step": 900 }, { "epoch": 0.4221082089552239, "grad_norm": 0.4183933276689782, "learning_rate": 4.985438730944314e-05, "loss": 0.5466, "step": 905 }, { "epoch": 0.4244402985074627, "grad_norm": 0.3831981632222535, "learning_rate": 4.985041883844888e-05, "loss": 0.5545, "step": 910 }, { "epoch": 0.42677238805970147, "grad_norm": 0.4030650697212643, "learning_rate": 4.9846397194774294e-05, "loss": 0.5461, "step": 915 }, { "epoch": 0.4291044776119403, "grad_norm": 0.3677001002235363, "learning_rate": 4.984232238798707e-05, "loss": 0.5483, "step": 920 }, { "epoch": 0.4314365671641791, "grad_norm": 0.39923985255605793, "learning_rate": 4.983819442778134e-05, "loss": 0.5446, "step": 925 }, { "epoch": 0.4337686567164179, "grad_norm": 0.3609775815554434, "learning_rate": 4.983401332397775e-05, "loss": 0.5302, "step": 930 }, { "epoch": 0.4361007462686567, "grad_norm": 0.4075429451841239, "learning_rate": 4.9829779086523295e-05, "loss": 0.5645, "step": 935 }, { "epoch": 0.43843283582089554, "grad_norm": 0.39192485111109526, "learning_rate": 4.982549172549145e-05, "loss": 0.5589, "step": 940 }, { "epoch": 0.44076492537313433, "grad_norm": 0.39993493767810395, "learning_rate": 4.9821151251082035e-05, "loss": 0.5737, "step": 945 }, { "epoch": 0.4430970149253731, "grad_norm": 0.4186646892211082, "learning_rate": 4.981675767362125e-05, "loss": 0.5482, "step": 950 }, { "epoch": 0.4454291044776119, "grad_norm": 0.4637400472930426, "learning_rate": 4.98123110035616e-05, "loss": 0.5531, "step": 955 }, { "epoch": 0.44776119402985076, "grad_norm": 0.4067265443583475, "learning_rate": 4.980781125148194e-05, "loss": 0.5519, "step": 960 }, { "epoch": 0.45009328358208955, "grad_norm": 0.4318758995422347, "learning_rate": 4.980325842808737e-05, "loss": 0.5619, "step": 965 }, { "epoch": 0.45242537313432835, "grad_norm": 0.411832983619724, "learning_rate": 4.979865254420929e-05, "loss": 0.5346, "step": 970 }, { "epoch": 0.45475746268656714, "grad_norm": 0.40954522187906084, "learning_rate": 4.9793993610805276e-05, "loss": 0.5533, "step": 975 }, { "epoch": 0.457089552238806, "grad_norm": 0.38226692338905877, "learning_rate": 4.9789281638959184e-05, "loss": 0.571, "step": 980 }, { "epoch": 0.4594216417910448, "grad_norm": 0.39791193610676584, "learning_rate": 4.978451663988099e-05, "loss": 0.5462, "step": 985 }, { "epoch": 0.46175373134328357, "grad_norm": 0.40774611590411897, "learning_rate": 4.977969862490685e-05, "loss": 0.5445, "step": 990 }, { "epoch": 0.46408582089552236, "grad_norm": 0.372198751387616, "learning_rate": 4.977482760549905e-05, "loss": 0.5428, "step": 995 }, { "epoch": 0.4664179104477612, "grad_norm": 0.38217827901801454, "learning_rate": 4.976990359324597e-05, "loss": 0.5403, "step": 1000 }, { "epoch": 0.46875, "grad_norm": 0.3957281965976151, "learning_rate": 4.9764926599862065e-05, "loss": 0.5609, "step": 1005 }, { "epoch": 0.4710820895522388, "grad_norm": 0.37772635878670907, "learning_rate": 4.9759896637187826e-05, "loss": 0.5473, "step": 1010 }, { "epoch": 0.47341417910447764, "grad_norm": 0.3652221392783123, "learning_rate": 4.9754813717189765e-05, "loss": 0.5564, "step": 1015 }, { "epoch": 0.47574626865671643, "grad_norm": 0.3652469430383942, "learning_rate": 4.974967785196039e-05, "loss": 0.5404, "step": 1020 }, { "epoch": 0.4780783582089552, "grad_norm": 0.36579411989906985, "learning_rate": 4.974448905371816e-05, "loss": 0.5502, "step": 1025 }, { "epoch": 0.480410447761194, "grad_norm": 0.3713440506580523, "learning_rate": 4.973924733480747e-05, "loss": 0.5444, "step": 1030 }, { "epoch": 0.48274253731343286, "grad_norm": 0.3678565409040218, "learning_rate": 4.9733952707698606e-05, "loss": 0.5354, "step": 1035 }, { "epoch": 0.48507462686567165, "grad_norm": 0.3626115529857116, "learning_rate": 4.9728605184987724e-05, "loss": 0.542, "step": 1040 }, { "epoch": 0.48740671641791045, "grad_norm": 0.37253632119315827, "learning_rate": 4.972320477939685e-05, "loss": 0.5316, "step": 1045 }, { "epoch": 0.48973880597014924, "grad_norm": 0.3705996160437411, "learning_rate": 4.971775150377378e-05, "loss": 0.5507, "step": 1050 }, { "epoch": 0.4920708955223881, "grad_norm": 0.358241975779833, "learning_rate": 4.971224537109211e-05, "loss": 0.5265, "step": 1055 }, { "epoch": 0.4944029850746269, "grad_norm": 0.40794664312471457, "learning_rate": 4.970668639445119e-05, "loss": 0.5364, "step": 1060 }, { "epoch": 0.49673507462686567, "grad_norm": 0.3975538963313393, "learning_rate": 4.970107458707608e-05, "loss": 0.5467, "step": 1065 }, { "epoch": 0.49906716417910446, "grad_norm": 0.41481074104861354, "learning_rate": 4.969540996231754e-05, "loss": 0.56, "step": 1070 }, { "epoch": 0.5013992537313433, "grad_norm": 0.3948087462273658, "learning_rate": 4.968969253365196e-05, "loss": 0.5505, "step": 1075 }, { "epoch": 0.503731343283582, "grad_norm": 0.3744272127291213, "learning_rate": 4.9683922314681374e-05, "loss": 0.5487, "step": 1080 }, { "epoch": 0.5060634328358209, "grad_norm": 0.3859303445252345, "learning_rate": 4.96780993191334e-05, "loss": 0.547, "step": 1085 }, { "epoch": 0.5083955223880597, "grad_norm": 0.39250939077407293, "learning_rate": 4.9672223560861204e-05, "loss": 0.5589, "step": 1090 }, { "epoch": 0.5107276119402985, "grad_norm": 0.3793482689528634, "learning_rate": 4.9666295053843495e-05, "loss": 0.5361, "step": 1095 }, { "epoch": 0.5130597014925373, "grad_norm": 0.3759471113718543, "learning_rate": 4.966031381218447e-05, "loss": 0.5598, "step": 1100 }, { "epoch": 0.5153917910447762, "grad_norm": 0.3820158935725623, "learning_rate": 4.9654279850113775e-05, "loss": 0.55, "step": 1105 }, { "epoch": 0.5177238805970149, "grad_norm": 0.40343283105317984, "learning_rate": 4.964819318198648e-05, "loss": 0.5425, "step": 1110 }, { "epoch": 0.5200559701492538, "grad_norm": 0.3976625683280339, "learning_rate": 4.9642053822283066e-05, "loss": 0.5357, "step": 1115 }, { "epoch": 0.5223880597014925, "grad_norm": 0.34911756043276876, "learning_rate": 4.9635861785609333e-05, "loss": 0.5621, "step": 1120 }, { "epoch": 0.5247201492537313, "grad_norm": 0.36057729241798103, "learning_rate": 4.9629617086696434e-05, "loss": 0.5385, "step": 1125 }, { "epoch": 0.5270522388059702, "grad_norm": 0.3350907877386989, "learning_rate": 4.962331974040079e-05, "loss": 0.5411, "step": 1130 }, { "epoch": 0.5293843283582089, "grad_norm": 0.39047882235983533, "learning_rate": 4.961696976170409e-05, "loss": 0.5311, "step": 1135 }, { "epoch": 0.5317164179104478, "grad_norm": 0.4014893974981797, "learning_rate": 4.961056716571322e-05, "loss": 0.5302, "step": 1140 }, { "epoch": 0.5340485074626866, "grad_norm": 0.4241978975949145, "learning_rate": 4.960411196766025e-05, "loss": 0.5551, "step": 1145 }, { "epoch": 0.5363805970149254, "grad_norm": 0.42546168192379435, "learning_rate": 4.95976041829024e-05, "loss": 0.5363, "step": 1150 }, { "epoch": 0.5387126865671642, "grad_norm": 0.337011122200343, "learning_rate": 4.9591043826921984e-05, "loss": 0.5536, "step": 1155 }, { "epoch": 0.5410447761194029, "grad_norm": 0.34305834891770093, "learning_rate": 4.95844309153264e-05, "loss": 0.5461, "step": 1160 }, { "epoch": 0.5433768656716418, "grad_norm": 0.3416477558846636, "learning_rate": 4.9577765463848065e-05, "loss": 0.525, "step": 1165 }, { "epoch": 0.5457089552238806, "grad_norm": 0.34059287949176814, "learning_rate": 4.957104748834441e-05, "loss": 0.5497, "step": 1170 }, { "epoch": 0.5480410447761194, "grad_norm": 0.36442892174189684, "learning_rate": 4.9564277004797784e-05, "loss": 0.5343, "step": 1175 }, { "epoch": 0.5503731343283582, "grad_norm": 0.35064546351853537, "learning_rate": 4.95574540293155e-05, "loss": 0.5351, "step": 1180 }, { "epoch": 0.5527052238805971, "grad_norm": 0.3960831009505344, "learning_rate": 4.9550578578129734e-05, "loss": 0.5528, "step": 1185 }, { "epoch": 0.5550373134328358, "grad_norm": 0.3941351383467762, "learning_rate": 4.954365066759748e-05, "loss": 0.5375, "step": 1190 }, { "epoch": 0.5573694029850746, "grad_norm": 0.3668895373159287, "learning_rate": 4.95366703142006e-05, "loss": 0.5405, "step": 1195 }, { "epoch": 0.5597014925373134, "grad_norm": 0.36042341857179533, "learning_rate": 4.952963753454563e-05, "loss": 0.532, "step": 1200 }, { "epoch": 0.5620335820895522, "grad_norm": 0.3992814832744831, "learning_rate": 4.95225523453639e-05, "loss": 0.5325, "step": 1205 }, { "epoch": 0.5643656716417911, "grad_norm": 0.4028510807063909, "learning_rate": 4.951541476351141e-05, "loss": 0.5252, "step": 1210 }, { "epoch": 0.5666977611940298, "grad_norm": 0.42129744360789534, "learning_rate": 4.9508224805968784e-05, "loss": 0.5379, "step": 1215 }, { "epoch": 0.5690298507462687, "grad_norm": 0.3713937825556482, "learning_rate": 4.950098248984127e-05, "loss": 0.5482, "step": 1220 }, { "epoch": 0.5713619402985075, "grad_norm": 0.3580475063653948, "learning_rate": 4.949368783235867e-05, "loss": 0.5566, "step": 1225 }, { "epoch": 0.5736940298507462, "grad_norm": 0.34895266516882334, "learning_rate": 4.9486340850875316e-05, "loss": 0.5424, "step": 1230 }, { "epoch": 0.5760261194029851, "grad_norm": 0.34648840102526307, "learning_rate": 4.947894156287001e-05, "loss": 0.5396, "step": 1235 }, { "epoch": 0.5783582089552238, "grad_norm": 0.3575446766402884, "learning_rate": 4.947148998594601e-05, "loss": 0.5281, "step": 1240 }, { "epoch": 0.5806902985074627, "grad_norm": 0.4101764251769675, "learning_rate": 4.946398613783096e-05, "loss": 0.5344, "step": 1245 }, { "epoch": 0.5830223880597015, "grad_norm": 0.33332597588786006, "learning_rate": 4.945643003637686e-05, "loss": 0.5414, "step": 1250 }, { "epoch": 0.5853544776119403, "grad_norm": 0.40404282849248474, "learning_rate": 4.944882169956001e-05, "loss": 0.5458, "step": 1255 }, { "epoch": 0.5876865671641791, "grad_norm": 0.36464067116640114, "learning_rate": 4.9441161145481016e-05, "loss": 0.5325, "step": 1260 }, { "epoch": 0.590018656716418, "grad_norm": 0.35277563261199946, "learning_rate": 4.9433448392364694e-05, "loss": 0.5516, "step": 1265 }, { "epoch": 0.5923507462686567, "grad_norm": 0.32771425847930086, "learning_rate": 4.942568345856002e-05, "loss": 0.5332, "step": 1270 }, { "epoch": 0.5946828358208955, "grad_norm": 0.33833243875653346, "learning_rate": 4.941786636254014e-05, "loss": 0.5258, "step": 1275 }, { "epoch": 0.5970149253731343, "grad_norm": 0.3497744005386503, "learning_rate": 4.940999712290229e-05, "loss": 0.5269, "step": 1280 }, { "epoch": 0.5993470149253731, "grad_norm": 0.33841894660949384, "learning_rate": 4.940207575836775e-05, "loss": 0.5361, "step": 1285 }, { "epoch": 0.601679104477612, "grad_norm": 0.3811025241133377, "learning_rate": 4.9394102287781816e-05, "loss": 0.5418, "step": 1290 }, { "epoch": 0.6040111940298507, "grad_norm": 0.34091515778594245, "learning_rate": 4.938607673011375e-05, "loss": 0.5235, "step": 1295 }, { "epoch": 0.6063432835820896, "grad_norm": 0.3883983144481749, "learning_rate": 4.9377999104456704e-05, "loss": 0.5284, "step": 1300 }, { "epoch": 0.6086753731343284, "grad_norm": 0.34329959776358226, "learning_rate": 4.9369869430027756e-05, "loss": 0.5244, "step": 1305 }, { "epoch": 0.6110074626865671, "grad_norm": 0.3408142894445899, "learning_rate": 4.9361687726167746e-05, "loss": 0.5456, "step": 1310 }, { "epoch": 0.613339552238806, "grad_norm": 0.33087354200314084, "learning_rate": 4.9353454012341346e-05, "loss": 0.5354, "step": 1315 }, { "epoch": 0.6156716417910447, "grad_norm": 0.3723701706806508, "learning_rate": 4.934516830813693e-05, "loss": 0.5424, "step": 1320 }, { "epoch": 0.6180037313432836, "grad_norm": 0.3732417172799275, "learning_rate": 4.9336830633266565e-05, "loss": 0.5358, "step": 1325 }, { "epoch": 0.6203358208955224, "grad_norm": 0.34512859934646883, "learning_rate": 4.932844100756599e-05, "loss": 0.5336, "step": 1330 }, { "epoch": 0.6226679104477612, "grad_norm": 0.3974168785588145, "learning_rate": 4.931999945099449e-05, "loss": 0.5393, "step": 1335 }, { "epoch": 0.625, "grad_norm": 0.38578760573550785, "learning_rate": 4.931150598363494e-05, "loss": 0.5253, "step": 1340 }, { "epoch": 0.6273320895522388, "grad_norm": 0.3620054406740747, "learning_rate": 4.9302960625693666e-05, "loss": 0.5289, "step": 1345 }, { "epoch": 0.6296641791044776, "grad_norm": 0.34885855958230283, "learning_rate": 4.929436339750049e-05, "loss": 0.5497, "step": 1350 }, { "epoch": 0.6319962686567164, "grad_norm": 0.30723787880779496, "learning_rate": 4.9285714319508607e-05, "loss": 0.5275, "step": 1355 }, { "epoch": 0.6343283582089553, "grad_norm": 0.328932824470315, "learning_rate": 4.927701341229457e-05, "loss": 0.537, "step": 1360 }, { "epoch": 0.636660447761194, "grad_norm": 0.3733544778977239, "learning_rate": 4.9268260696558264e-05, "loss": 0.5358, "step": 1365 }, { "epoch": 0.6389925373134329, "grad_norm": 0.35468746828054404, "learning_rate": 4.925945619312277e-05, "loss": 0.5283, "step": 1370 }, { "epoch": 0.6413246268656716, "grad_norm": 0.37870167235760666, "learning_rate": 4.925059992293443e-05, "loss": 0.5359, "step": 1375 }, { "epoch": 0.6436567164179104, "grad_norm": 0.33991959812553946, "learning_rate": 4.924169190706271e-05, "loss": 0.5142, "step": 1380 }, { "epoch": 0.6459888059701493, "grad_norm": 0.3527811037292783, "learning_rate": 4.92327321667002e-05, "loss": 0.5173, "step": 1385 }, { "epoch": 0.648320895522388, "grad_norm": 0.35576245249963906, "learning_rate": 4.922372072316253e-05, "loss": 0.5415, "step": 1390 }, { "epoch": 0.6506529850746269, "grad_norm": 0.3816050519320196, "learning_rate": 4.9214657597888354e-05, "loss": 0.5593, "step": 1395 }, { "epoch": 0.6529850746268657, "grad_norm": 0.3784172271368758, "learning_rate": 4.920554281243925e-05, "loss": 0.5349, "step": 1400 }, { "epoch": 0.6553171641791045, "grad_norm": 0.41918359184853343, "learning_rate": 4.919637638849972e-05, "loss": 0.5223, "step": 1405 }, { "epoch": 0.6576492537313433, "grad_norm": 0.3483923490974592, "learning_rate": 4.918715834787711e-05, "loss": 0.5362, "step": 1410 }, { "epoch": 0.659981343283582, "grad_norm": 0.35911416690460024, "learning_rate": 4.917788871250157e-05, "loss": 0.5261, "step": 1415 }, { "epoch": 0.6623134328358209, "grad_norm": 0.35255576025375807, "learning_rate": 4.9168567504425994e-05, "loss": 0.5171, "step": 1420 }, { "epoch": 0.6646455223880597, "grad_norm": 0.3240403361781962, "learning_rate": 4.915919474582596e-05, "loss": 0.5266, "step": 1425 }, { "epoch": 0.6669776119402985, "grad_norm": 0.32886939787359015, "learning_rate": 4.914977045899969e-05, "loss": 0.523, "step": 1430 }, { "epoch": 0.6693097014925373, "grad_norm": 0.3774778237712222, "learning_rate": 4.914029466636801e-05, "loss": 0.5288, "step": 1435 }, { "epoch": 0.6716417910447762, "grad_norm": 0.3479403080485948, "learning_rate": 4.913076739047425e-05, "loss": 0.5248, "step": 1440 }, { "epoch": 0.6739738805970149, "grad_norm": 0.3123438536856514, "learning_rate": 4.9121188653984266e-05, "loss": 0.5328, "step": 1445 }, { "epoch": 0.6763059701492538, "grad_norm": 0.31681862386055243, "learning_rate": 4.9111558479686296e-05, "loss": 0.5289, "step": 1450 }, { "epoch": 0.6786380597014925, "grad_norm": 0.33797280594013307, "learning_rate": 4.910187689049099e-05, "loss": 0.5305, "step": 1455 }, { "epoch": 0.6809701492537313, "grad_norm": 0.34794284430567773, "learning_rate": 4.909214390943127e-05, "loss": 0.5237, "step": 1460 }, { "epoch": 0.6833022388059702, "grad_norm": 0.34060452305873434, "learning_rate": 4.908235955966236e-05, "loss": 0.5299, "step": 1465 }, { "epoch": 0.6856343283582089, "grad_norm": 0.37524257100875796, "learning_rate": 4.907252386446169e-05, "loss": 0.5259, "step": 1470 }, { "epoch": 0.6879664179104478, "grad_norm": 0.322855430906761, "learning_rate": 4.906263684722883e-05, "loss": 0.5248, "step": 1475 }, { "epoch": 0.6902985074626866, "grad_norm": 0.33929045505492195, "learning_rate": 4.905269853148543e-05, "loss": 0.5293, "step": 1480 }, { "epoch": 0.6926305970149254, "grad_norm": 0.3294623783236448, "learning_rate": 4.9042708940875225e-05, "loss": 0.5186, "step": 1485 }, { "epoch": 0.6949626865671642, "grad_norm": 0.34559814457245464, "learning_rate": 4.903266809916392e-05, "loss": 0.5256, "step": 1490 }, { "epoch": 0.6972947761194029, "grad_norm": 0.3946789966913834, "learning_rate": 4.902257603023912e-05, "loss": 0.5449, "step": 1495 }, { "epoch": 0.6996268656716418, "grad_norm": 0.3575651270807026, "learning_rate": 4.901243275811034e-05, "loss": 0.5321, "step": 1500 }, { "epoch": 0.7019589552238806, "grad_norm": 0.3369126910084287, "learning_rate": 4.9002238306908884e-05, "loss": 0.5308, "step": 1505 }, { "epoch": 0.7042910447761194, "grad_norm": 0.3377269627124363, "learning_rate": 4.899199270088782e-05, "loss": 0.5475, "step": 1510 }, { "epoch": 0.7066231343283582, "grad_norm": 0.3628842803473672, "learning_rate": 4.8981695964421934e-05, "loss": 0.526, "step": 1515 }, { "epoch": 0.7089552238805971, "grad_norm": 0.3408743210265883, "learning_rate": 4.897134812200763e-05, "loss": 0.5254, "step": 1520 }, { "epoch": 0.7112873134328358, "grad_norm": 0.34717547261512677, "learning_rate": 4.8960949198262896e-05, "loss": 0.5308, "step": 1525 }, { "epoch": 0.7136194029850746, "grad_norm": 0.353400013411367, "learning_rate": 4.895049921792727e-05, "loss": 0.5363, "step": 1530 }, { "epoch": 0.7159514925373134, "grad_norm": 0.3269264274262196, "learning_rate": 4.893999820586172e-05, "loss": 0.5453, "step": 1535 }, { "epoch": 0.7182835820895522, "grad_norm": 0.35621241069628895, "learning_rate": 4.892944618704865e-05, "loss": 0.5281, "step": 1540 }, { "epoch": 0.7206156716417911, "grad_norm": 0.3343478410155707, "learning_rate": 4.89188431865918e-05, "loss": 0.5397, "step": 1545 }, { "epoch": 0.7229477611940298, "grad_norm": 0.3377818564981383, "learning_rate": 4.89081892297162e-05, "loss": 0.5467, "step": 1550 }, { "epoch": 0.7252798507462687, "grad_norm": 0.337309084827813, "learning_rate": 4.8897484341768104e-05, "loss": 0.5254, "step": 1555 }, { "epoch": 0.7276119402985075, "grad_norm": 0.32537494988086507, "learning_rate": 4.8886728548214933e-05, "loss": 0.5199, "step": 1560 }, { "epoch": 0.7299440298507462, "grad_norm": 0.3262972769755828, "learning_rate": 4.887592187464522e-05, "loss": 0.5252, "step": 1565 }, { "epoch": 0.7322761194029851, "grad_norm": 0.3493755908357834, "learning_rate": 4.886506434676854e-05, "loss": 0.5255, "step": 1570 }, { "epoch": 0.7346082089552238, "grad_norm": 0.31856757993549556, "learning_rate": 4.885415599041545e-05, "loss": 0.5281, "step": 1575 }, { "epoch": 0.7369402985074627, "grad_norm": 0.3682779198559107, "learning_rate": 4.884319683153746e-05, "loss": 0.54, "step": 1580 }, { "epoch": 0.7392723880597015, "grad_norm": 0.3521786540911041, "learning_rate": 4.883218689620688e-05, "loss": 0.5249, "step": 1585 }, { "epoch": 0.7416044776119403, "grad_norm": 0.3414863269345257, "learning_rate": 4.882112621061687e-05, "loss": 0.5188, "step": 1590 }, { "epoch": 0.7439365671641791, "grad_norm": 0.3442003478238611, "learning_rate": 4.881001480108131e-05, "loss": 0.5301, "step": 1595 }, { "epoch": 0.746268656716418, "grad_norm": 0.32953662329750516, "learning_rate": 4.8798852694034775e-05, "loss": 0.5275, "step": 1600 }, { "epoch": 0.7486007462686567, "grad_norm": 0.3748611706584024, "learning_rate": 4.878763991603241e-05, "loss": 0.544, "step": 1605 }, { "epoch": 0.7509328358208955, "grad_norm": 0.3724819236373765, "learning_rate": 4.877637649374994e-05, "loss": 0.5409, "step": 1610 }, { "epoch": 0.7532649253731343, "grad_norm": 0.33363727054495096, "learning_rate": 4.876506245398358e-05, "loss": 0.5221, "step": 1615 }, { "epoch": 0.7555970149253731, "grad_norm": 0.42778648037225797, "learning_rate": 4.875369782364994e-05, "loss": 0.5218, "step": 1620 }, { "epoch": 0.757929104477612, "grad_norm": 0.3139401308806014, "learning_rate": 4.8742282629786005e-05, "loss": 0.5287, "step": 1625 }, { "epoch": 0.7602611940298507, "grad_norm": 0.3145458843448436, "learning_rate": 4.8730816899549046e-05, "loss": 0.5245, "step": 1630 }, { "epoch": 0.7625932835820896, "grad_norm": 0.3555008738845568, "learning_rate": 4.871930066021658e-05, "loss": 0.5067, "step": 1635 }, { "epoch": 0.7649253731343284, "grad_norm": 0.3398705279264097, "learning_rate": 4.8707733939186254e-05, "loss": 0.5145, "step": 1640 }, { "epoch": 0.7672574626865671, "grad_norm": 0.3658988957974236, "learning_rate": 4.869611676397584e-05, "loss": 0.5256, "step": 1645 }, { "epoch": 0.769589552238806, "grad_norm": 0.3448359212897554, "learning_rate": 4.868444916222313e-05, "loss": 0.5375, "step": 1650 }, { "epoch": 0.7719216417910447, "grad_norm": 0.3342623787843734, "learning_rate": 4.867273116168591e-05, "loss": 0.5284, "step": 1655 }, { "epoch": 0.7742537313432836, "grad_norm": 0.3725217681687816, "learning_rate": 4.8660962790241824e-05, "loss": 0.5426, "step": 1660 }, { "epoch": 0.7765858208955224, "grad_norm": 0.3568470173736074, "learning_rate": 4.864914407588837e-05, "loss": 0.5232, "step": 1665 }, { "epoch": 0.7789179104477612, "grad_norm": 0.3483279707807047, "learning_rate": 4.863727504674282e-05, "loss": 0.5274, "step": 1670 }, { "epoch": 0.78125, "grad_norm": 0.31746117266106816, "learning_rate": 4.8625355731042174e-05, "loss": 0.5332, "step": 1675 }, { "epoch": 0.7835820895522388, "grad_norm": 0.32577393074487054, "learning_rate": 4.861338615714299e-05, "loss": 0.5162, "step": 1680 }, { "epoch": 0.7859141791044776, "grad_norm": 0.35513543984392393, "learning_rate": 4.860136635352145e-05, "loss": 0.514, "step": 1685 }, { "epoch": 0.7882462686567164, "grad_norm": 0.32468687669980567, "learning_rate": 4.8589296348773244e-05, "loss": 0.5276, "step": 1690 }, { "epoch": 0.7905783582089553, "grad_norm": 0.3507065992522076, "learning_rate": 4.857717617161345e-05, "loss": 0.5207, "step": 1695 }, { "epoch": 0.792910447761194, "grad_norm": 0.34417358472237963, "learning_rate": 4.856500585087654e-05, "loss": 0.5312, "step": 1700 }, { "epoch": 0.7952425373134329, "grad_norm": 0.3441275496645877, "learning_rate": 4.855278541551626e-05, "loss": 0.5229, "step": 1705 }, { "epoch": 0.7975746268656716, "grad_norm": 0.3176543345653708, "learning_rate": 4.85405148946056e-05, "loss": 0.5063, "step": 1710 }, { "epoch": 0.7999067164179104, "grad_norm": 0.3332577359226092, "learning_rate": 4.8528194317336703e-05, "loss": 0.5409, "step": 1715 }, { "epoch": 0.8022388059701493, "grad_norm": 0.3624329695220031, "learning_rate": 4.851582371302078e-05, "loss": 0.5256, "step": 1720 }, { "epoch": 0.804570895522388, "grad_norm": 0.36372957091196234, "learning_rate": 4.8503403111088075e-05, "loss": 0.5438, "step": 1725 }, { "epoch": 0.8069029850746269, "grad_norm": 0.34926474001302205, "learning_rate": 4.849093254108778e-05, "loss": 0.5243, "step": 1730 }, { "epoch": 0.8092350746268657, "grad_norm": 0.3252922426684505, "learning_rate": 4.8478412032687956e-05, "loss": 0.5205, "step": 1735 }, { "epoch": 0.8115671641791045, "grad_norm": 0.3169329800907355, "learning_rate": 4.8465841615675464e-05, "loss": 0.5124, "step": 1740 }, { "epoch": 0.8138992537313433, "grad_norm": 0.31602372855375255, "learning_rate": 4.84532213199559e-05, "loss": 0.5272, "step": 1745 }, { "epoch": 0.816231343283582, "grad_norm": 0.3256964898729751, "learning_rate": 4.844055117555355e-05, "loss": 0.5136, "step": 1750 }, { "epoch": 0.8185634328358209, "grad_norm": 0.33772107142920493, "learning_rate": 4.8427831212611276e-05, "loss": 0.5099, "step": 1755 }, { "epoch": 0.8208955223880597, "grad_norm": 0.36353040358534994, "learning_rate": 4.8415061461390444e-05, "loss": 0.5274, "step": 1760 }, { "epoch": 0.8232276119402985, "grad_norm": 0.34619201145710204, "learning_rate": 4.840224195227088e-05, "loss": 0.5278, "step": 1765 }, { "epoch": 0.8255597014925373, "grad_norm": 0.3685964741465242, "learning_rate": 4.8389372715750814e-05, "loss": 0.5299, "step": 1770 }, { "epoch": 0.8278917910447762, "grad_norm": 0.3081229181666821, "learning_rate": 4.8376453782446724e-05, "loss": 0.5316, "step": 1775 }, { "epoch": 0.8302238805970149, "grad_norm": 0.32654019838928633, "learning_rate": 4.836348518309337e-05, "loss": 0.5292, "step": 1780 }, { "epoch": 0.8325559701492538, "grad_norm": 0.3653143263829344, "learning_rate": 4.835046694854364e-05, "loss": 0.5285, "step": 1785 }, { "epoch": 0.8348880597014925, "grad_norm": 0.3408282105841519, "learning_rate": 4.833739910976853e-05, "loss": 0.5214, "step": 1790 }, { "epoch": 0.8372201492537313, "grad_norm": 0.3160814276238013, "learning_rate": 4.8324281697857024e-05, "loss": 0.5226, "step": 1795 }, { "epoch": 0.8395522388059702, "grad_norm": 0.3290489799156316, "learning_rate": 4.831111474401604e-05, "loss": 0.5333, "step": 1800 }, { "epoch": 0.8418843283582089, "grad_norm": 0.333467511639791, "learning_rate": 4.8297898279570385e-05, "loss": 0.504, "step": 1805 }, { "epoch": 0.8442164179104478, "grad_norm": 0.33221654324934774, "learning_rate": 4.828463233596264e-05, "loss": 0.527, "step": 1810 }, { "epoch": 0.8465485074626866, "grad_norm": 0.3241986162098286, "learning_rate": 4.827131694475309e-05, "loss": 0.5197, "step": 1815 }, { "epoch": 0.8488805970149254, "grad_norm": 0.32319061252353704, "learning_rate": 4.825795213761967e-05, "loss": 0.5255, "step": 1820 }, { "epoch": 0.8512126865671642, "grad_norm": 0.3450627464566209, "learning_rate": 4.824453794635788e-05, "loss": 0.5283, "step": 1825 }, { "epoch": 0.8535447761194029, "grad_norm": 0.33776695127447615, "learning_rate": 4.8231074402880686e-05, "loss": 0.5093, "step": 1830 }, { "epoch": 0.8558768656716418, "grad_norm": 0.35521299223320174, "learning_rate": 4.82175615392185e-05, "loss": 0.5226, "step": 1835 }, { "epoch": 0.8582089552238806, "grad_norm": 0.3526673730896415, "learning_rate": 4.8203999387519036e-05, "loss": 0.5558, "step": 1840 }, { "epoch": 0.8605410447761194, "grad_norm": 0.32963132191265515, "learning_rate": 4.81903879800473e-05, "loss": 0.5463, "step": 1845 }, { "epoch": 0.8628731343283582, "grad_norm": 0.33773979272497684, "learning_rate": 4.817672734918543e-05, "loss": 0.5286, "step": 1850 }, { "epoch": 0.8652052238805971, "grad_norm": 0.3407382267165379, "learning_rate": 4.816301752743271e-05, "loss": 0.5258, "step": 1855 }, { "epoch": 0.8675373134328358, "grad_norm": 0.36487440679950267, "learning_rate": 4.8149258547405466e-05, "loss": 0.5314, "step": 1860 }, { "epoch": 0.8698694029850746, "grad_norm": 0.3533737217654913, "learning_rate": 4.8135450441836905e-05, "loss": 0.5226, "step": 1865 }, { "epoch": 0.8722014925373134, "grad_norm": 0.3491908795931541, "learning_rate": 4.8121593243577176e-05, "loss": 0.5121, "step": 1870 }, { "epoch": 0.8745335820895522, "grad_norm": 0.3264992553747918, "learning_rate": 4.8107686985593194e-05, "loss": 0.5152, "step": 1875 }, { "epoch": 0.8768656716417911, "grad_norm": 0.3268900758163521, "learning_rate": 4.809373170096859e-05, "loss": 0.5062, "step": 1880 }, { "epoch": 0.8791977611940298, "grad_norm": 0.3165482567491455, "learning_rate": 4.8079727422903615e-05, "loss": 0.5214, "step": 1885 }, { "epoch": 0.8815298507462687, "grad_norm": 0.3390361077734908, "learning_rate": 4.806567418471511e-05, "loss": 0.5205, "step": 1890 }, { "epoch": 0.8838619402985075, "grad_norm": 0.3052669190883162, "learning_rate": 4.805157201983637e-05, "loss": 0.52, "step": 1895 }, { "epoch": 0.8861940298507462, "grad_norm": 0.3222061220442884, "learning_rate": 4.803742096181711e-05, "loss": 0.5027, "step": 1900 }, { "epoch": 0.8885261194029851, "grad_norm": 0.31738935943585744, "learning_rate": 4.802322104432334e-05, "loss": 0.5088, "step": 1905 }, { "epoch": 0.8908582089552238, "grad_norm": 0.3207484468661816, "learning_rate": 4.800897230113732e-05, "loss": 0.529, "step": 1910 }, { "epoch": 0.8931902985074627, "grad_norm": 0.3060682658460953, "learning_rate": 4.799467476615748e-05, "loss": 0.5232, "step": 1915 }, { "epoch": 0.8955223880597015, "grad_norm": 0.35278139640243344, "learning_rate": 4.7980328473398314e-05, "loss": 0.5231, "step": 1920 }, { "epoch": 0.8978544776119403, "grad_norm": 0.34302859315120093, "learning_rate": 4.7965933456990306e-05, "loss": 0.5272, "step": 1925 }, { "epoch": 0.9001865671641791, "grad_norm": 0.328047772712654, "learning_rate": 4.795148975117988e-05, "loss": 0.5311, "step": 1930 }, { "epoch": 0.902518656716418, "grad_norm": 0.31496077205458617, "learning_rate": 4.7936997390329266e-05, "loss": 0.5213, "step": 1935 }, { "epoch": 0.9048507462686567, "grad_norm": 0.29856283518006344, "learning_rate": 4.7922456408916465e-05, "loss": 0.5353, "step": 1940 }, { "epoch": 0.9071828358208955, "grad_norm": 0.32433494379432704, "learning_rate": 4.790786684153516e-05, "loss": 0.5151, "step": 1945 }, { "epoch": 0.9095149253731343, "grad_norm": 0.3284799042682583, "learning_rate": 4.7893228722894584e-05, "loss": 0.5129, "step": 1950 }, { "epoch": 0.9118470149253731, "grad_norm": 0.3470162776586131, "learning_rate": 4.787854208781951e-05, "loss": 0.5183, "step": 1955 }, { "epoch": 0.914179104477612, "grad_norm": 0.2959001352801515, "learning_rate": 4.786380697125012e-05, "loss": 0.5136, "step": 1960 }, { "epoch": 0.9165111940298507, "grad_norm": 0.3396088978261471, "learning_rate": 4.784902340824195e-05, "loss": 0.5262, "step": 1965 }, { "epoch": 0.9188432835820896, "grad_norm": 0.3273368184008834, "learning_rate": 4.7834191433965756e-05, "loss": 0.5201, "step": 1970 }, { "epoch": 0.9211753731343284, "grad_norm": 0.34277366674591425, "learning_rate": 4.781931108370751e-05, "loss": 0.5142, "step": 1975 }, { "epoch": 0.9235074626865671, "grad_norm": 0.29157514895351394, "learning_rate": 4.780438239286824e-05, "loss": 0.505, "step": 1980 }, { "epoch": 0.925839552238806, "grad_norm": 0.3186231330293528, "learning_rate": 4.7789405396964004e-05, "loss": 0.5168, "step": 1985 }, { "epoch": 0.9281716417910447, "grad_norm": 0.33246111585519117, "learning_rate": 4.777438013162576e-05, "loss": 0.5186, "step": 1990 }, { "epoch": 0.9305037313432836, "grad_norm": 0.30532390031294376, "learning_rate": 4.775930663259932e-05, "loss": 0.5067, "step": 1995 }, { "epoch": 0.9328358208955224, "grad_norm": 0.3126978989124814, "learning_rate": 4.774418493574523e-05, "loss": 0.5327, "step": 2000 }, { "epoch": 0.9351679104477612, "grad_norm": 0.32810377762280085, "learning_rate": 4.77290150770387e-05, "loss": 0.5163, "step": 2005 }, { "epoch": 0.9375, "grad_norm": 0.32584031278508513, "learning_rate": 4.771379709256953e-05, "loss": 0.5084, "step": 2010 }, { "epoch": 0.9398320895522388, "grad_norm": 0.3597717139309028, "learning_rate": 4.769853101854201e-05, "loss": 0.5207, "step": 2015 }, { "epoch": 0.9421641791044776, "grad_norm": 0.3346781319238501, "learning_rate": 4.768321689127483e-05, "loss": 0.5092, "step": 2020 }, { "epoch": 0.9444962686567164, "grad_norm": 0.29808476543581425, "learning_rate": 4.766785474720102e-05, "loss": 0.5147, "step": 2025 }, { "epoch": 0.9468283582089553, "grad_norm": 0.3458162733582178, "learning_rate": 4.765244462286782e-05, "loss": 0.5197, "step": 2030 }, { "epoch": 0.949160447761194, "grad_norm": 0.31045521519422287, "learning_rate": 4.763698655493664e-05, "loss": 0.4991, "step": 2035 }, { "epoch": 0.9514925373134329, "grad_norm": 0.3024611057468114, "learning_rate": 4.7621480580182925e-05, "loss": 0.521, "step": 2040 }, { "epoch": 0.9538246268656716, "grad_norm": 0.3319637122867193, "learning_rate": 4.760592673549611e-05, "loss": 0.5232, "step": 2045 }, { "epoch": 0.9561567164179104, "grad_norm": 0.33550836502792386, "learning_rate": 4.759032505787952e-05, "loss": 0.5097, "step": 2050 }, { "epoch": 0.9584888059701493, "grad_norm": 0.3298609959987978, "learning_rate": 4.7574675584450256e-05, "loss": 0.5256, "step": 2055 }, { "epoch": 0.960820895522388, "grad_norm": 0.3326522250375082, "learning_rate": 4.755897835243916e-05, "loss": 0.5013, "step": 2060 }, { "epoch": 0.9631529850746269, "grad_norm": 0.34444397587068326, "learning_rate": 4.754323339919064e-05, "loss": 0.518, "step": 2065 }, { "epoch": 0.9654850746268657, "grad_norm": 0.35556298427252975, "learning_rate": 4.752744076216268e-05, "loss": 0.5263, "step": 2070 }, { "epoch": 0.9678171641791045, "grad_norm": 0.333852747876224, "learning_rate": 4.751160047892672e-05, "loss": 0.5164, "step": 2075 }, { "epoch": 0.9701492537313433, "grad_norm": 0.3215532264640461, "learning_rate": 4.74957125871675e-05, "loss": 0.5181, "step": 2080 }, { "epoch": 0.972481343283582, "grad_norm": 0.3566315009881727, "learning_rate": 4.747977712468305e-05, "loss": 0.5243, "step": 2085 }, { "epoch": 0.9748134328358209, "grad_norm": 0.36590696358575703, "learning_rate": 4.746379412938459e-05, "loss": 0.5289, "step": 2090 }, { "epoch": 0.9771455223880597, "grad_norm": 0.3263213346072128, "learning_rate": 4.7447763639296384e-05, "loss": 0.5245, "step": 2095 }, { "epoch": 0.9794776119402985, "grad_norm": 0.33158275181977964, "learning_rate": 4.743168569255572e-05, "loss": 0.514, "step": 2100 }, { "epoch": 0.9818097014925373, "grad_norm": 0.3414952504671106, "learning_rate": 4.741556032741278e-05, "loss": 0.51, "step": 2105 }, { "epoch": 0.9841417910447762, "grad_norm": 0.31381790674843224, "learning_rate": 4.739938758223055e-05, "loss": 0.5128, "step": 2110 }, { "epoch": 0.9864738805970149, "grad_norm": 0.34615669054875065, "learning_rate": 4.738316749548473e-05, "loss": 0.5416, "step": 2115 }, { "epoch": 0.9888059701492538, "grad_norm": 0.32382830203402907, "learning_rate": 4.736690010576368e-05, "loss": 0.5169, "step": 2120 }, { "epoch": 0.9911380597014925, "grad_norm": 0.2980802702248772, "learning_rate": 4.735058545176824e-05, "loss": 0.5142, "step": 2125 }, { "epoch": 0.9934701492537313, "grad_norm": 0.6012864662337598, "learning_rate": 4.733422357231176e-05, "loss": 0.5264, "step": 2130 }, { "epoch": 0.9958022388059702, "grad_norm": 0.3468477851689769, "learning_rate": 4.731781450631988e-05, "loss": 0.518, "step": 2135 }, { "epoch": 0.9981343283582089, "grad_norm": 0.3347251750353037, "learning_rate": 4.730135829283055e-05, "loss": 0.5218, "step": 2140 }, { "epoch": 1.0004664179104477, "grad_norm": 0.34493107963746067, "learning_rate": 4.728485497099385e-05, "loss": 0.5044, "step": 2145 }, { "epoch": 1.0027985074626866, "grad_norm": 0.32458829282143936, "learning_rate": 4.726830458007194e-05, "loss": 0.4614, "step": 2150 }, { "epoch": 1.0051305970149254, "grad_norm": 0.32255863805156076, "learning_rate": 4.725170715943898e-05, "loss": 0.4779, "step": 2155 }, { "epoch": 1.007462686567164, "grad_norm": 0.32715875045446985, "learning_rate": 4.723506274858101e-05, "loss": 0.4654, "step": 2160 }, { "epoch": 1.009794776119403, "grad_norm": 0.3399494609203243, "learning_rate": 4.721837138709582e-05, "loss": 0.4984, "step": 2165 }, { "epoch": 1.0121268656716418, "grad_norm": 0.32859330798699127, "learning_rate": 4.720163311469296e-05, "loss": 0.4823, "step": 2170 }, { "epoch": 1.0144589552238805, "grad_norm": 0.3596012918492095, "learning_rate": 4.718484797119355e-05, "loss": 0.4972, "step": 2175 }, { "epoch": 1.0167910447761195, "grad_norm": 0.31902889700552106, "learning_rate": 4.7168015996530204e-05, "loss": 0.4671, "step": 2180 }, { "epoch": 1.0191231343283582, "grad_norm": 0.3358585247278866, "learning_rate": 4.715113723074699e-05, "loss": 0.4661, "step": 2185 }, { "epoch": 1.021455223880597, "grad_norm": 0.3161058107105468, "learning_rate": 4.7134211713999264e-05, "loss": 0.4649, "step": 2190 }, { "epoch": 1.023787313432836, "grad_norm": 0.3090966363067558, "learning_rate": 4.711723948655362e-05, "loss": 0.4747, "step": 2195 }, { "epoch": 1.0261194029850746, "grad_norm": 0.33208495346814326, "learning_rate": 4.7100220588787755e-05, "loss": 0.4638, "step": 2200 }, { "epoch": 1.0284514925373134, "grad_norm": 0.37977143949869246, "learning_rate": 4.7083155061190426e-05, "loss": 0.4939, "step": 2205 }, { "epoch": 1.0307835820895523, "grad_norm": 4.123640145086634, "learning_rate": 4.706604294436132e-05, "loss": 0.4722, "step": 2210 }, { "epoch": 1.033115671641791, "grad_norm": 0.3357747265602457, "learning_rate": 4.704888427901094e-05, "loss": 0.4672, "step": 2215 }, { "epoch": 1.0354477611940298, "grad_norm": 0.3483278494946721, "learning_rate": 4.703167910596055e-05, "loss": 0.4918, "step": 2220 }, { "epoch": 1.0377798507462686, "grad_norm": 0.36945917088328467, "learning_rate": 4.701442746614206e-05, "loss": 0.5095, "step": 2225 }, { "epoch": 1.0401119402985075, "grad_norm": 0.37622665582505943, "learning_rate": 4.699712940059791e-05, "loss": 0.4687, "step": 2230 }, { "epoch": 1.0424440298507462, "grad_norm": 0.3302084693156725, "learning_rate": 4.697978495048099e-05, "loss": 0.4814, "step": 2235 }, { "epoch": 1.044776119402985, "grad_norm": 0.33794444291268205, "learning_rate": 4.696239415705458e-05, "loss": 0.4857, "step": 2240 }, { "epoch": 1.047108208955224, "grad_norm": 0.3615939089944461, "learning_rate": 4.694495706169214e-05, "loss": 0.4772, "step": 2245 }, { "epoch": 1.0494402985074627, "grad_norm": 0.30227356548792883, "learning_rate": 4.692747370587737e-05, "loss": 0.4887, "step": 2250 }, { "epoch": 1.0517723880597014, "grad_norm": 0.3050251641467046, "learning_rate": 4.690994413120394e-05, "loss": 0.4706, "step": 2255 }, { "epoch": 1.0541044776119404, "grad_norm": 0.3197357303075886, "learning_rate": 4.689236837937556e-05, "loss": 0.4629, "step": 2260 }, { "epoch": 1.056436567164179, "grad_norm": 0.3338358993351683, "learning_rate": 4.687474649220573e-05, "loss": 0.4908, "step": 2265 }, { "epoch": 1.0587686567164178, "grad_norm": 0.3286438116481475, "learning_rate": 4.685707851161773e-05, "loss": 0.4712, "step": 2270 }, { "epoch": 1.0611007462686568, "grad_norm": 0.31842783075913145, "learning_rate": 4.683936447964452e-05, "loss": 0.4849, "step": 2275 }, { "epoch": 1.0634328358208955, "grad_norm": 0.3163050069757611, "learning_rate": 4.6821604438428594e-05, "loss": 0.4864, "step": 2280 }, { "epoch": 1.0657649253731343, "grad_norm": 0.325945172003011, "learning_rate": 4.680379843022192e-05, "loss": 0.4796, "step": 2285 }, { "epoch": 1.0680970149253732, "grad_norm": 0.31819012766194577, "learning_rate": 4.678594649738581e-05, "loss": 0.4544, "step": 2290 }, { "epoch": 1.070429104477612, "grad_norm": 0.3427585639787564, "learning_rate": 4.676804868239083e-05, "loss": 0.4546, "step": 2295 }, { "epoch": 1.0727611940298507, "grad_norm": 0.30206039801062196, "learning_rate": 4.6750105027816716e-05, "loss": 0.4737, "step": 2300 }, { "epoch": 1.0750932835820897, "grad_norm": 0.29717982852468494, "learning_rate": 4.673211557635225e-05, "loss": 0.4713, "step": 2305 }, { "epoch": 1.0774253731343284, "grad_norm": 0.34808617006404324, "learning_rate": 4.671408037079519e-05, "loss": 0.4812, "step": 2310 }, { "epoch": 1.0797574626865671, "grad_norm": 0.32938489470654436, "learning_rate": 4.669599945405208e-05, "loss": 0.4794, "step": 2315 }, { "epoch": 1.0820895522388059, "grad_norm": 0.34412545122661975, "learning_rate": 4.6677872869138304e-05, "loss": 0.4686, "step": 2320 }, { "epoch": 1.0844216417910448, "grad_norm": 0.3074804070103023, "learning_rate": 4.6659700659177814e-05, "loss": 0.4718, "step": 2325 }, { "epoch": 1.0867537313432836, "grad_norm": 0.3241184686148533, "learning_rate": 4.6641482867403156e-05, "loss": 0.4768, "step": 2330 }, { "epoch": 1.0890858208955223, "grad_norm": 0.3284572757053119, "learning_rate": 4.662321953715529e-05, "loss": 0.4693, "step": 2335 }, { "epoch": 1.0914179104477613, "grad_norm": 0.3555279076014111, "learning_rate": 4.660491071188353e-05, "loss": 0.4748, "step": 2340 }, { "epoch": 1.09375, "grad_norm": 0.3408189065140399, "learning_rate": 4.658655643514541e-05, "loss": 0.4986, "step": 2345 }, { "epoch": 1.0960820895522387, "grad_norm": 0.30123259180537115, "learning_rate": 4.656815675060662e-05, "loss": 0.4818, "step": 2350 }, { "epoch": 1.0984141791044777, "grad_norm": 0.2990172702620939, "learning_rate": 4.654971170204083e-05, "loss": 0.4732, "step": 2355 }, { "epoch": 1.1007462686567164, "grad_norm": 0.3210860116743587, "learning_rate": 4.6531221333329694e-05, "loss": 0.4791, "step": 2360 }, { "epoch": 1.1030783582089552, "grad_norm": 0.333540984484251, "learning_rate": 4.6512685688462645e-05, "loss": 0.4797, "step": 2365 }, { "epoch": 1.1054104477611941, "grad_norm": 0.31403075830403704, "learning_rate": 4.649410481153683e-05, "loss": 0.4791, "step": 2370 }, { "epoch": 1.1077425373134329, "grad_norm": 0.31823131812727906, "learning_rate": 4.6475478746757025e-05, "loss": 0.4684, "step": 2375 }, { "epoch": 1.1100746268656716, "grad_norm": 0.31736593651150546, "learning_rate": 4.64568075384355e-05, "loss": 0.4824, "step": 2380 }, { "epoch": 1.1124067164179103, "grad_norm": 0.33552683340739253, "learning_rate": 4.643809123099192e-05, "loss": 0.4765, "step": 2385 }, { "epoch": 1.1147388059701493, "grad_norm": 0.3321379731812115, "learning_rate": 4.641932986895325e-05, "loss": 0.4688, "step": 2390 }, { "epoch": 1.117070895522388, "grad_norm": 0.27956248071487355, "learning_rate": 4.640052349695363e-05, "loss": 0.4767, "step": 2395 }, { "epoch": 1.1194029850746268, "grad_norm": 0.31369622957119425, "learning_rate": 4.6381672159734287e-05, "loss": 0.4619, "step": 2400 }, { "epoch": 1.1217350746268657, "grad_norm": 0.29968480978023204, "learning_rate": 4.636277590214344e-05, "loss": 0.4778, "step": 2405 }, { "epoch": 1.1240671641791045, "grad_norm": 0.30934378204168783, "learning_rate": 4.634383476913615e-05, "loss": 0.4787, "step": 2410 }, { "epoch": 1.1263992537313432, "grad_norm": 0.3067470992355944, "learning_rate": 4.632484880577425e-05, "loss": 0.4613, "step": 2415 }, { "epoch": 1.1287313432835822, "grad_norm": 0.3229510661600592, "learning_rate": 4.6305818057226226e-05, "loss": 0.4743, "step": 2420 }, { "epoch": 1.131063432835821, "grad_norm": 0.3407174916674853, "learning_rate": 4.62867425687671e-05, "loss": 0.4929, "step": 2425 }, { "epoch": 1.1333955223880596, "grad_norm": 0.330992623340824, "learning_rate": 4.626762238577836e-05, "loss": 0.4848, "step": 2430 }, { "epoch": 1.1357276119402986, "grad_norm": 0.32364036956866926, "learning_rate": 4.624845755374779e-05, "loss": 0.4808, "step": 2435 }, { "epoch": 1.1380597014925373, "grad_norm": 0.315343408632216, "learning_rate": 4.622924811826942e-05, "loss": 0.4696, "step": 2440 }, { "epoch": 1.140391791044776, "grad_norm": 0.31349888146809796, "learning_rate": 4.620999412504338e-05, "loss": 0.4732, "step": 2445 }, { "epoch": 1.142723880597015, "grad_norm": 0.28788632904562184, "learning_rate": 4.619069561987581e-05, "loss": 0.4794, "step": 2450 }, { "epoch": 1.1450559701492538, "grad_norm": 0.3096391992080239, "learning_rate": 4.6171352648678755e-05, "loss": 0.4626, "step": 2455 }, { "epoch": 1.1473880597014925, "grad_norm": 0.29658069026928063, "learning_rate": 4.615196525747003e-05, "loss": 0.4675, "step": 2460 }, { "epoch": 1.1497201492537314, "grad_norm": 0.3218746330495119, "learning_rate": 4.613253349237314e-05, "loss": 0.4824, "step": 2465 }, { "epoch": 1.1520522388059702, "grad_norm": 0.3096843335141755, "learning_rate": 4.611305739961715e-05, "loss": 0.4707, "step": 2470 }, { "epoch": 1.154384328358209, "grad_norm": 0.33464505068352085, "learning_rate": 4.609353702553659e-05, "loss": 0.4778, "step": 2475 }, { "epoch": 1.1567164179104479, "grad_norm": 0.3087989448174087, "learning_rate": 4.607397241657133e-05, "loss": 0.4854, "step": 2480 }, { "epoch": 1.1590485074626866, "grad_norm": 0.30638461235794034, "learning_rate": 4.605436361926648e-05, "loss": 0.463, "step": 2485 }, { "epoch": 1.1613805970149254, "grad_norm": 0.3019339851496765, "learning_rate": 4.6034710680272274e-05, "loss": 0.4592, "step": 2490 }, { "epoch": 1.163712686567164, "grad_norm": 0.3016520187236193, "learning_rate": 4.601501364634397e-05, "loss": 0.4787, "step": 2495 }, { "epoch": 1.166044776119403, "grad_norm": 0.32133477597555454, "learning_rate": 4.599527256434171e-05, "loss": 0.4886, "step": 2500 }, { "epoch": 1.1683768656716418, "grad_norm": 0.3196229392784417, "learning_rate": 4.597548748123046e-05, "loss": 0.4752, "step": 2505 }, { "epoch": 1.1707089552238805, "grad_norm": 0.3213996270674388, "learning_rate": 4.595565844407982e-05, "loss": 0.4761, "step": 2510 }, { "epoch": 1.1730410447761195, "grad_norm": 0.3093130518353402, "learning_rate": 4.5935785500064014e-05, "loss": 0.4675, "step": 2515 }, { "epoch": 1.1753731343283582, "grad_norm": 0.33806099859117544, "learning_rate": 4.5915868696461685e-05, "loss": 0.4882, "step": 2520 }, { "epoch": 1.177705223880597, "grad_norm": 0.3018628444424992, "learning_rate": 4.589590808065583e-05, "loss": 0.4712, "step": 2525 }, { "epoch": 1.180037313432836, "grad_norm": 0.31244170683175976, "learning_rate": 4.587590370013367e-05, "loss": 0.4813, "step": 2530 }, { "epoch": 1.1823694029850746, "grad_norm": 0.31648337655355924, "learning_rate": 4.585585560248657e-05, "loss": 0.4697, "step": 2535 }, { "epoch": 1.1847014925373134, "grad_norm": 0.3377765239325855, "learning_rate": 4.5835763835409864e-05, "loss": 0.4856, "step": 2540 }, { "epoch": 1.1870335820895521, "grad_norm": 0.2955591321792683, "learning_rate": 4.58156284467028e-05, "loss": 0.4688, "step": 2545 }, { "epoch": 1.189365671641791, "grad_norm": 0.3196114564133787, "learning_rate": 4.579544948426841e-05, "loss": 0.4818, "step": 2550 }, { "epoch": 1.1916977611940298, "grad_norm": 0.30057262686683534, "learning_rate": 4.577522699611336e-05, "loss": 0.4797, "step": 2555 }, { "epoch": 1.1940298507462686, "grad_norm": 0.29421490705804, "learning_rate": 4.57549610303479e-05, "loss": 0.4732, "step": 2560 }, { "epoch": 1.1963619402985075, "grad_norm": 0.2939063438309448, "learning_rate": 4.573465163518569e-05, "loss": 0.4761, "step": 2565 }, { "epoch": 1.1986940298507462, "grad_norm": 0.3257510440708279, "learning_rate": 4.571429885894373e-05, "loss": 0.4801, "step": 2570 }, { "epoch": 1.201026119402985, "grad_norm": 0.30626345483261114, "learning_rate": 4.569390275004221e-05, "loss": 0.4877, "step": 2575 }, { "epoch": 1.203358208955224, "grad_norm": 0.30910640812583706, "learning_rate": 4.567346335700442e-05, "loss": 0.4905, "step": 2580 }, { "epoch": 1.2056902985074627, "grad_norm": 0.30792277458018735, "learning_rate": 4.565298072845662e-05, "loss": 0.4643, "step": 2585 }, { "epoch": 1.2080223880597014, "grad_norm": 0.33049642211902874, "learning_rate": 4.563245491312793e-05, "loss": 0.482, "step": 2590 }, { "epoch": 1.2103544776119404, "grad_norm": 0.3291522206559048, "learning_rate": 4.5611885959850216e-05, "loss": 0.4602, "step": 2595 }, { "epoch": 1.212686567164179, "grad_norm": 0.2989649950083559, "learning_rate": 4.559127391755796e-05, "loss": 0.46, "step": 2600 }, { "epoch": 1.2150186567164178, "grad_norm": 0.3073834687450615, "learning_rate": 4.557061883528818e-05, "loss": 0.4812, "step": 2605 }, { "epoch": 1.2173507462686568, "grad_norm": 0.32418815488792685, "learning_rate": 4.554992076218026e-05, "loss": 0.4708, "step": 2610 }, { "epoch": 1.2196828358208955, "grad_norm": 0.3240320131714874, "learning_rate": 4.552917974747588e-05, "loss": 0.4753, "step": 2615 }, { "epoch": 1.2220149253731343, "grad_norm": 0.31114133595445564, "learning_rate": 4.5508395840518884e-05, "loss": 0.4591, "step": 2620 }, { "epoch": 1.2243470149253732, "grad_norm": 0.341104643138175, "learning_rate": 4.548756909075511e-05, "loss": 0.4797, "step": 2625 }, { "epoch": 1.226679104477612, "grad_norm": 0.3485227182883163, "learning_rate": 4.5466699547732405e-05, "loss": 0.4665, "step": 2630 }, { "epoch": 1.2290111940298507, "grad_norm": 0.30302622201981266, "learning_rate": 4.544578726110035e-05, "loss": 0.4643, "step": 2635 }, { "epoch": 1.2313432835820897, "grad_norm": 0.3261989009266306, "learning_rate": 4.5424832280610245e-05, "loss": 0.4871, "step": 2640 }, { "epoch": 1.2336753731343284, "grad_norm": 0.31320494601692955, "learning_rate": 4.540383465611496e-05, "loss": 0.463, "step": 2645 }, { "epoch": 1.2360074626865671, "grad_norm": 0.287838077303706, "learning_rate": 4.5382794437568824e-05, "loss": 0.4819, "step": 2650 }, { "epoch": 1.2383395522388059, "grad_norm": 0.3173152226733926, "learning_rate": 4.5361711675027484e-05, "loss": 0.4726, "step": 2655 }, { "epoch": 1.2406716417910448, "grad_norm": 0.2921006154759842, "learning_rate": 4.53405864186478e-05, "loss": 0.4828, "step": 2660 }, { "epoch": 1.2430037313432836, "grad_norm": 0.3132785348431443, "learning_rate": 4.531941871868775e-05, "loss": 0.4753, "step": 2665 }, { "epoch": 1.2453358208955223, "grad_norm": 0.31719546946749805, "learning_rate": 4.5298208625506253e-05, "loss": 0.4727, "step": 2670 }, { "epoch": 1.2476679104477613, "grad_norm": 0.34894532270827705, "learning_rate": 4.527695618956312e-05, "loss": 0.4828, "step": 2675 }, { "epoch": 1.25, "grad_norm": 0.32093183202162795, "learning_rate": 4.5255661461418854e-05, "loss": 0.4895, "step": 2680 }, { "epoch": 1.2523320895522387, "grad_norm": 0.2908623169007248, "learning_rate": 4.5234324491734624e-05, "loss": 0.4726, "step": 2685 }, { "epoch": 1.2546641791044777, "grad_norm": 0.3316192187469941, "learning_rate": 4.521294533127206e-05, "loss": 0.4882, "step": 2690 }, { "epoch": 1.2569962686567164, "grad_norm": 0.29404858284920904, "learning_rate": 4.519152403089317e-05, "loss": 0.4662, "step": 2695 }, { "epoch": 1.2593283582089552, "grad_norm": 0.302122161033726, "learning_rate": 4.517006064156023e-05, "loss": 0.4725, "step": 2700 }, { "epoch": 1.261660447761194, "grad_norm": 0.3214588988239813, "learning_rate": 4.5148555214335616e-05, "loss": 0.4661, "step": 2705 }, { "epoch": 1.2639925373134329, "grad_norm": 0.3101209385057379, "learning_rate": 4.512700780038174e-05, "loss": 0.4656, "step": 2710 }, { "epoch": 1.2663246268656716, "grad_norm": 0.2990772457017377, "learning_rate": 4.510541845096091e-05, "loss": 0.4522, "step": 2715 }, { "epoch": 1.2686567164179103, "grad_norm": 0.3015980707729143, "learning_rate": 4.5083787217435175e-05, "loss": 0.4691, "step": 2720 }, { "epoch": 1.2709888059701493, "grad_norm": 0.3054074565425917, "learning_rate": 4.506211415126624e-05, "loss": 0.4791, "step": 2725 }, { "epoch": 1.273320895522388, "grad_norm": 0.30950533403450803, "learning_rate": 4.504039930401535e-05, "loss": 0.485, "step": 2730 }, { "epoch": 1.2756529850746268, "grad_norm": 0.2973848210633481, "learning_rate": 4.501864272734311e-05, "loss": 0.4928, "step": 2735 }, { "epoch": 1.2779850746268657, "grad_norm": 0.30084732409820364, "learning_rate": 4.4996844473009425e-05, "loss": 0.4765, "step": 2740 }, { "epoch": 1.2803171641791045, "grad_norm": 0.30624624529937566, "learning_rate": 4.497500459287335e-05, "loss": 0.4631, "step": 2745 }, { "epoch": 1.2826492537313432, "grad_norm": 0.293799963618475, "learning_rate": 4.4953123138892984e-05, "loss": 0.4538, "step": 2750 }, { "epoch": 1.2849813432835822, "grad_norm": 0.2969036981139745, "learning_rate": 4.4931200163125306e-05, "loss": 0.4725, "step": 2755 }, { "epoch": 1.287313432835821, "grad_norm": 0.30347983071770374, "learning_rate": 4.4909235717726086e-05, "loss": 0.4755, "step": 2760 }, { "epoch": 1.2896455223880596, "grad_norm": 0.29676733670130784, "learning_rate": 4.488722985494978e-05, "loss": 0.4812, "step": 2765 }, { "epoch": 1.2919776119402986, "grad_norm": 0.30234546010089725, "learning_rate": 4.486518262714931e-05, "loss": 0.4748, "step": 2770 }, { "epoch": 1.2943097014925373, "grad_norm": 0.2921873147622258, "learning_rate": 4.484309408677609e-05, "loss": 0.4727, "step": 2775 }, { "epoch": 1.296641791044776, "grad_norm": 0.31693923569987387, "learning_rate": 4.4820964286379764e-05, "loss": 0.4869, "step": 2780 }, { "epoch": 1.298973880597015, "grad_norm": 0.3075068478130031, "learning_rate": 4.479879327860816e-05, "loss": 0.4791, "step": 2785 }, { "epoch": 1.3013059701492538, "grad_norm": 0.3249105382692282, "learning_rate": 4.477658111620711e-05, "loss": 0.4698, "step": 2790 }, { "epoch": 1.3036380597014925, "grad_norm": 0.32563884630995865, "learning_rate": 4.47543278520204e-05, "loss": 0.4706, "step": 2795 }, { "epoch": 1.3059701492537314, "grad_norm": 0.29258174085451616, "learning_rate": 4.4732033538989556e-05, "loss": 0.4493, "step": 2800 }, { "epoch": 1.3083022388059702, "grad_norm": 0.3106219913540466, "learning_rate": 4.47096982301538e-05, "loss": 0.4748, "step": 2805 }, { "epoch": 1.310634328358209, "grad_norm": 0.30485204063720533, "learning_rate": 4.468732197864984e-05, "loss": 0.48, "step": 2810 }, { "epoch": 1.3129664179104479, "grad_norm": 0.3121415118222675, "learning_rate": 4.4664904837711835e-05, "loss": 0.4793, "step": 2815 }, { "epoch": 1.3152985074626866, "grad_norm": 0.30251302169174843, "learning_rate": 4.4642446860671185e-05, "loss": 0.4672, "step": 2820 }, { "epoch": 1.3176305970149254, "grad_norm": 0.3088262138414778, "learning_rate": 4.461994810095647e-05, "loss": 0.4663, "step": 2825 }, { "epoch": 1.3199626865671643, "grad_norm": 0.32419860969825015, "learning_rate": 4.4597408612093265e-05, "loss": 0.4761, "step": 2830 }, { "epoch": 1.322294776119403, "grad_norm": 0.29581049819823596, "learning_rate": 4.457482844770408e-05, "loss": 0.469, "step": 2835 }, { "epoch": 1.3246268656716418, "grad_norm": 0.3275321151645879, "learning_rate": 4.455220766150814e-05, "loss": 0.4916, "step": 2840 }, { "epoch": 1.3269589552238805, "grad_norm": 0.28268537801141486, "learning_rate": 4.452954630732136e-05, "loss": 0.4669, "step": 2845 }, { "epoch": 1.3292910447761195, "grad_norm": 0.30305754093106707, "learning_rate": 4.450684443905615e-05, "loss": 0.4818, "step": 2850 }, { "epoch": 1.3316231343283582, "grad_norm": 0.31350563776648027, "learning_rate": 4.44841021107213e-05, "loss": 0.4824, "step": 2855 }, { "epoch": 1.333955223880597, "grad_norm": 0.2991408162187756, "learning_rate": 4.4461319376421875e-05, "loss": 0.464, "step": 2860 }, { "epoch": 1.3362873134328357, "grad_norm": 0.3033395304840415, "learning_rate": 4.443849629035903e-05, "loss": 0.459, "step": 2865 }, { "epoch": 1.3386194029850746, "grad_norm": 0.31943981197493226, "learning_rate": 4.441563290682996e-05, "loss": 0.4684, "step": 2870 }, { "epoch": 1.3409514925373134, "grad_norm": 0.31174865707585836, "learning_rate": 4.43927292802277e-05, "loss": 0.4754, "step": 2875 }, { "epoch": 1.3432835820895521, "grad_norm": 0.29530526069490054, "learning_rate": 4.436978546504105e-05, "loss": 0.4686, "step": 2880 }, { "epoch": 1.345615671641791, "grad_norm": 0.31831590823804545, "learning_rate": 4.43468015158544e-05, "loss": 0.4796, "step": 2885 }, { "epoch": 1.3479477611940298, "grad_norm": 0.3184428209926445, "learning_rate": 4.432377748734763e-05, "loss": 0.4708, "step": 2890 }, { "epoch": 1.3502798507462686, "grad_norm": 0.31235315733927943, "learning_rate": 4.430071343429597e-05, "loss": 0.4608, "step": 2895 }, { "epoch": 1.3526119402985075, "grad_norm": 0.3268236250627049, "learning_rate": 4.427760941156986e-05, "loss": 0.4586, "step": 2900 }, { "epoch": 1.3549440298507462, "grad_norm": 0.2771098744883647, "learning_rate": 4.4254465474134856e-05, "loss": 0.4732, "step": 2905 }, { "epoch": 1.357276119402985, "grad_norm": 0.32389550568197406, "learning_rate": 4.423128167705144e-05, "loss": 0.4713, "step": 2910 }, { "epoch": 1.359608208955224, "grad_norm": 0.2774307433189656, "learning_rate": 4.4208058075474945e-05, "loss": 0.4577, "step": 2915 }, { "epoch": 1.3619402985074627, "grad_norm": 0.29487574490755114, "learning_rate": 4.418479472465539e-05, "loss": 0.4833, "step": 2920 }, { "epoch": 1.3642723880597014, "grad_norm": 0.29962345536204466, "learning_rate": 4.416149167993737e-05, "loss": 0.4783, "step": 2925 }, { "epoch": 1.3666044776119404, "grad_norm": 0.32604941089423306, "learning_rate": 4.413814899675991e-05, "loss": 0.4818, "step": 2930 }, { "epoch": 1.368936567164179, "grad_norm": 0.3161143379581694, "learning_rate": 4.411476673065631e-05, "loss": 0.475, "step": 2935 }, { "epoch": 1.3712686567164178, "grad_norm": 0.31004566959658714, "learning_rate": 4.409134493725409e-05, "loss": 0.4541, "step": 2940 }, { "epoch": 1.3736007462686568, "grad_norm": 0.35236814002864336, "learning_rate": 4.406788367227475e-05, "loss": 0.4733, "step": 2945 }, { "epoch": 1.3759328358208955, "grad_norm": 0.3558252739909897, "learning_rate": 4.404438299153376e-05, "loss": 0.5035, "step": 2950 }, { "epoch": 1.3782649253731343, "grad_norm": 0.347244804173969, "learning_rate": 4.4020842950940294e-05, "loss": 0.4937, "step": 2955 }, { "epoch": 1.3805970149253732, "grad_norm": 0.2973702681802193, "learning_rate": 4.3997263606497225e-05, "loss": 0.4605, "step": 2960 }, { "epoch": 1.382929104477612, "grad_norm": 0.3099205702136001, "learning_rate": 4.397364501430088e-05, "loss": 0.4837, "step": 2965 }, { "epoch": 1.3852611940298507, "grad_norm": 0.2905842814089538, "learning_rate": 4.3949987230541e-05, "loss": 0.4769, "step": 2970 }, { "epoch": 1.3875932835820897, "grad_norm": 0.3044680139119459, "learning_rate": 4.392629031150054e-05, "loss": 0.4773, "step": 2975 }, { "epoch": 1.3899253731343284, "grad_norm": 0.3076308924029483, "learning_rate": 4.390255431355557e-05, "loss": 0.4579, "step": 2980 }, { "epoch": 1.3922574626865671, "grad_norm": 0.30147813004623364, "learning_rate": 4.387877929317512e-05, "loss": 0.4792, "step": 2985 }, { "epoch": 1.394589552238806, "grad_norm": 0.31939821920434697, "learning_rate": 4.3854965306921064e-05, "loss": 0.4767, "step": 2990 }, { "epoch": 1.3969216417910448, "grad_norm": 0.2917685051193323, "learning_rate": 4.383111241144798e-05, "loss": 0.4599, "step": 2995 }, { "epoch": 1.3992537313432836, "grad_norm": 0.2746264818048184, "learning_rate": 4.380722066350303e-05, "loss": 0.4722, "step": 3000 }, { "epoch": 1.4015858208955223, "grad_norm": 0.2865435923006377, "learning_rate": 4.378329011992575e-05, "loss": 0.4768, "step": 3005 }, { "epoch": 1.4039179104477613, "grad_norm": 0.3226790099733525, "learning_rate": 4.375932083764803e-05, "loss": 0.4593, "step": 3010 }, { "epoch": 1.40625, "grad_norm": 0.2981577652966081, "learning_rate": 4.37353128736939e-05, "loss": 0.477, "step": 3015 }, { "epoch": 1.4085820895522387, "grad_norm": 0.3204239036383832, "learning_rate": 4.3711266285179415e-05, "loss": 0.4716, "step": 3020 }, { "epoch": 1.4109141791044777, "grad_norm": 0.2908240984012701, "learning_rate": 4.3687181129312534e-05, "loss": 0.4806, "step": 3025 }, { "epoch": 1.4132462686567164, "grad_norm": 0.2906255210780697, "learning_rate": 4.366305746339293e-05, "loss": 0.4797, "step": 3030 }, { "epoch": 1.4155783582089552, "grad_norm": 0.31273767669877334, "learning_rate": 4.363889534481195e-05, "loss": 0.4601, "step": 3035 }, { "epoch": 1.417910447761194, "grad_norm": 0.3055252429103392, "learning_rate": 4.361469483105236e-05, "loss": 0.4654, "step": 3040 }, { "epoch": 1.4202425373134329, "grad_norm": 0.30329047563801764, "learning_rate": 4.3590455979688335e-05, "loss": 0.4714, "step": 3045 }, { "epoch": 1.4225746268656716, "grad_norm": 0.2903272038228953, "learning_rate": 4.3566178848385194e-05, "loss": 0.4975, "step": 3050 }, { "epoch": 1.4249067164179103, "grad_norm": 0.28983415957505865, "learning_rate": 4.3541863494899385e-05, "loss": 0.463, "step": 3055 }, { "epoch": 1.4272388059701493, "grad_norm": 0.28124016038155175, "learning_rate": 4.351750997707824e-05, "loss": 0.4544, "step": 3060 }, { "epoch": 1.429570895522388, "grad_norm": 0.2834138117437917, "learning_rate": 4.34931183528599e-05, "loss": 0.4584, "step": 3065 }, { "epoch": 1.4319029850746268, "grad_norm": 0.30964249099582436, "learning_rate": 4.346868868027318e-05, "loss": 0.4676, "step": 3070 }, { "epoch": 1.4342350746268657, "grad_norm": 0.2991736709833795, "learning_rate": 4.344422101743739e-05, "loss": 0.4547, "step": 3075 }, { "epoch": 1.4365671641791045, "grad_norm": 0.30248185404835864, "learning_rate": 4.341971542256225e-05, "loss": 0.4729, "step": 3080 }, { "epoch": 1.4388992537313432, "grad_norm": 0.3065773284546266, "learning_rate": 4.339517195394768e-05, "loss": 0.4664, "step": 3085 }, { "epoch": 1.4412313432835822, "grad_norm": 0.26914958437285724, "learning_rate": 4.3370590669983736e-05, "loss": 0.4732, "step": 3090 }, { "epoch": 1.443563432835821, "grad_norm": 0.2741493630505988, "learning_rate": 4.334597162915045e-05, "loss": 0.4687, "step": 3095 }, { "epoch": 1.4458955223880596, "grad_norm": 0.29553119821881635, "learning_rate": 4.332131489001762e-05, "loss": 0.4764, "step": 3100 }, { "epoch": 1.4482276119402986, "grad_norm": 0.3036096319265361, "learning_rate": 4.3296620511244804e-05, "loss": 0.4631, "step": 3105 }, { "epoch": 1.4505597014925373, "grad_norm": 0.2873404314367724, "learning_rate": 4.327188855158106e-05, "loss": 0.4707, "step": 3110 }, { "epoch": 1.452891791044776, "grad_norm": 0.277819659258999, "learning_rate": 4.3247119069864856e-05, "loss": 0.4709, "step": 3115 }, { "epoch": 1.455223880597015, "grad_norm": 0.31673240532077707, "learning_rate": 4.322231212502394e-05, "loss": 0.4558, "step": 3120 }, { "epoch": 1.4575559701492538, "grad_norm": 0.33516815129867766, "learning_rate": 4.3197467776075185e-05, "loss": 0.4706, "step": 3125 }, { "epoch": 1.4598880597014925, "grad_norm": 0.29697398847514433, "learning_rate": 4.317258608212444e-05, "loss": 0.4695, "step": 3130 }, { "epoch": 1.4622201492537314, "grad_norm": 0.29612879088485305, "learning_rate": 4.3147667102366415e-05, "loss": 0.4689, "step": 3135 }, { "epoch": 1.4645522388059702, "grad_norm": 0.2918459917810315, "learning_rate": 4.3122710896084504e-05, "loss": 0.4756, "step": 3140 }, { "epoch": 1.466884328358209, "grad_norm": 0.2898604674814688, "learning_rate": 4.309771752265069e-05, "loss": 0.4627, "step": 3145 }, { "epoch": 1.4692164179104479, "grad_norm": 0.290504861349296, "learning_rate": 4.307268704152535e-05, "loss": 0.4558, "step": 3150 }, { "epoch": 1.4715485074626866, "grad_norm": 0.2865342815638596, "learning_rate": 4.3047619512257164e-05, "loss": 0.4641, "step": 3155 }, { "epoch": 1.4738805970149254, "grad_norm": 0.2918197614523999, "learning_rate": 4.302251499448294e-05, "loss": 0.468, "step": 3160 }, { "epoch": 1.4762126865671643, "grad_norm": 0.28866616902027686, "learning_rate": 4.29973735479275e-05, "loss": 0.4849, "step": 3165 }, { "epoch": 1.478544776119403, "grad_norm": 0.2689182131597249, "learning_rate": 4.297219523240349e-05, "loss": 0.4593, "step": 3170 }, { "epoch": 1.4808768656716418, "grad_norm": 0.31154070881723384, "learning_rate": 4.2946980107811295e-05, "loss": 0.4752, "step": 3175 }, { "epoch": 1.4832089552238805, "grad_norm": 0.2813249137796694, "learning_rate": 4.292172823413887e-05, "loss": 0.4685, "step": 3180 }, { "epoch": 1.4855410447761195, "grad_norm": 0.2922000191302423, "learning_rate": 4.289643967146158e-05, "loss": 0.4714, "step": 3185 }, { "epoch": 1.4878731343283582, "grad_norm": 0.30338983604911735, "learning_rate": 4.28711144799421e-05, "loss": 0.4649, "step": 3190 }, { "epoch": 1.490205223880597, "grad_norm": 0.2693070260579693, "learning_rate": 4.2845752719830206e-05, "loss": 0.4624, "step": 3195 }, { "epoch": 1.4925373134328357, "grad_norm": 0.2952534239751394, "learning_rate": 4.282035445146272e-05, "loss": 0.4794, "step": 3200 }, { "epoch": 1.4948694029850746, "grad_norm": 0.30107734118778406, "learning_rate": 4.2794919735263295e-05, "loss": 0.4685, "step": 3205 }, { "epoch": 1.4972014925373134, "grad_norm": 0.30472188682032275, "learning_rate": 4.276944863174229e-05, "loss": 0.4697, "step": 3210 }, { "epoch": 1.4995335820895521, "grad_norm": 0.299840861864034, "learning_rate": 4.2743941201496644e-05, "loss": 0.4678, "step": 3215 }, { "epoch": 1.501865671641791, "grad_norm": 0.29268345647404764, "learning_rate": 4.271839750520972e-05, "loss": 0.4701, "step": 3220 }, { "epoch": 1.5041977611940298, "grad_norm": 0.28650834159456856, "learning_rate": 4.2692817603651134e-05, "loss": 0.4498, "step": 3225 }, { "epoch": 1.5065298507462686, "grad_norm": 0.2920019138301745, "learning_rate": 4.2667201557676673e-05, "loss": 0.4627, "step": 3230 }, { "epoch": 1.5088619402985075, "grad_norm": 0.2899800218858267, "learning_rate": 4.2641549428228087e-05, "loss": 0.464, "step": 3235 }, { "epoch": 1.5111940298507462, "grad_norm": 0.28426036002972965, "learning_rate": 4.261586127633297e-05, "loss": 0.4603, "step": 3240 }, { "epoch": 1.513526119402985, "grad_norm": 0.2876443496268544, "learning_rate": 4.259013716310465e-05, "loss": 0.4631, "step": 3245 }, { "epoch": 1.515858208955224, "grad_norm": 0.3575185938677565, "learning_rate": 4.256437714974196e-05, "loss": 0.4688, "step": 3250 }, { "epoch": 1.5181902985074627, "grad_norm": 0.3099167959851916, "learning_rate": 4.253858129752916e-05, "loss": 0.4704, "step": 3255 }, { "epoch": 1.5205223880597014, "grad_norm": 0.298694922156145, "learning_rate": 4.251274966783579e-05, "loss": 0.4702, "step": 3260 }, { "epoch": 1.5228544776119404, "grad_norm": 0.32263846233216037, "learning_rate": 4.24868823221165e-05, "loss": 0.4637, "step": 3265 }, { "epoch": 1.525186567164179, "grad_norm": 0.28256695936657295, "learning_rate": 4.246097932191088e-05, "loss": 0.4651, "step": 3270 }, { "epoch": 1.5275186567164178, "grad_norm": 0.314824918153698, "learning_rate": 4.2435040728843376e-05, "loss": 0.4675, "step": 3275 }, { "epoch": 1.5298507462686568, "grad_norm": 0.3171458133861504, "learning_rate": 4.2409066604623096e-05, "loss": 0.472, "step": 3280 }, { "epoch": 1.5321828358208955, "grad_norm": 0.28792894046074463, "learning_rate": 4.23830570110437e-05, "loss": 0.466, "step": 3285 }, { "epoch": 1.5345149253731343, "grad_norm": 0.2771371511430686, "learning_rate": 4.2357012009983185e-05, "loss": 0.4857, "step": 3290 }, { "epoch": 1.5368470149253732, "grad_norm": 0.2975515171520846, "learning_rate": 4.2330931663403844e-05, "loss": 0.4653, "step": 3295 }, { "epoch": 1.539179104477612, "grad_norm": 0.33035702632946984, "learning_rate": 4.230481603335201e-05, "loss": 0.4601, "step": 3300 }, { "epoch": 1.5415111940298507, "grad_norm": 0.2948164232727813, "learning_rate": 4.227866518195797e-05, "loss": 0.4636, "step": 3305 }, { "epoch": 1.5438432835820897, "grad_norm": 0.2831394398329794, "learning_rate": 4.225247917143582e-05, "loss": 0.4846, "step": 3310 }, { "epoch": 1.5461753731343284, "grad_norm": 0.2994184766946835, "learning_rate": 4.22262580640833e-05, "loss": 0.4794, "step": 3315 }, { "epoch": 1.5485074626865671, "grad_norm": 0.3139414845017238, "learning_rate": 4.220000192228161e-05, "loss": 0.477, "step": 3320 }, { "epoch": 1.550839552238806, "grad_norm": 0.27914098155752737, "learning_rate": 4.217371080849535e-05, "loss": 0.4676, "step": 3325 }, { "epoch": 1.5531716417910446, "grad_norm": 0.2864772085729057, "learning_rate": 4.2147384785272284e-05, "loss": 0.4568, "step": 3330 }, { "epoch": 1.5555037313432836, "grad_norm": 0.2984697512112631, "learning_rate": 4.212102391524324e-05, "loss": 0.4761, "step": 3335 }, { "epoch": 1.5578358208955225, "grad_norm": 0.3051851337102855, "learning_rate": 4.209462826112195e-05, "loss": 0.4703, "step": 3340 }, { "epoch": 1.560167910447761, "grad_norm": 0.29931825838646026, "learning_rate": 4.2068197885704904e-05, "loss": 0.4754, "step": 3345 }, { "epoch": 1.5625, "grad_norm": 0.2941237425955385, "learning_rate": 4.204173285187117e-05, "loss": 0.4784, "step": 3350 }, { "epoch": 1.564832089552239, "grad_norm": 0.2893825747685523, "learning_rate": 4.201523322258231e-05, "loss": 0.4564, "step": 3355 }, { "epoch": 1.5671641791044775, "grad_norm": 0.3189548189044172, "learning_rate": 4.1988699060882144e-05, "loss": 0.4654, "step": 3360 }, { "epoch": 1.5694962686567164, "grad_norm": 0.2997107815969718, "learning_rate": 4.196213042989668e-05, "loss": 0.4719, "step": 3365 }, { "epoch": 1.5718283582089554, "grad_norm": 0.3057305437373418, "learning_rate": 4.193552739283393e-05, "loss": 0.4778, "step": 3370 }, { "epoch": 1.574160447761194, "grad_norm": 0.3021601528652931, "learning_rate": 4.190889001298373e-05, "loss": 0.4656, "step": 3375 }, { "epoch": 1.5764925373134329, "grad_norm": 0.2843190875389752, "learning_rate": 4.188221835371766e-05, "loss": 0.4672, "step": 3380 }, { "epoch": 1.5788246268656716, "grad_norm": 0.2969376080633963, "learning_rate": 4.1855512478488816e-05, "loss": 0.4591, "step": 3385 }, { "epoch": 1.5811567164179103, "grad_norm": 0.27683056010772356, "learning_rate": 4.182877245083172e-05, "loss": 0.4757, "step": 3390 }, { "epoch": 1.5834888059701493, "grad_norm": 0.29448673555878324, "learning_rate": 4.180199833436213e-05, "loss": 0.4554, "step": 3395 }, { "epoch": 1.585820895522388, "grad_norm": 0.29840081924940687, "learning_rate": 4.1775190192776905e-05, "loss": 0.4898, "step": 3400 }, { "epoch": 1.5881529850746268, "grad_norm": 0.2869616164265809, "learning_rate": 4.1748348089853864e-05, "loss": 0.4683, "step": 3405 }, { "epoch": 1.5904850746268657, "grad_norm": 0.3035890251488408, "learning_rate": 4.172147208945159e-05, "loss": 0.4846, "step": 3410 }, { "epoch": 1.5928171641791045, "grad_norm": 0.28962994882244664, "learning_rate": 4.1694562255509354e-05, "loss": 0.4626, "step": 3415 }, { "epoch": 1.5951492537313432, "grad_norm": 0.29985351439568614, "learning_rate": 4.1667618652046894e-05, "loss": 0.4609, "step": 3420 }, { "epoch": 1.5974813432835822, "grad_norm": 0.29680635474933514, "learning_rate": 4.164064134316428e-05, "loss": 0.4872, "step": 3425 }, { "epoch": 1.599813432835821, "grad_norm": 0.2982755166256606, "learning_rate": 4.161363039304177e-05, "loss": 0.4822, "step": 3430 }, { "epoch": 1.6021455223880596, "grad_norm": 0.30050486692742423, "learning_rate": 4.158658586593969e-05, "loss": 0.4643, "step": 3435 }, { "epoch": 1.6044776119402986, "grad_norm": 0.2996446910306368, "learning_rate": 4.155950782619819e-05, "loss": 0.4572, "step": 3440 }, { "epoch": 1.6068097014925373, "grad_norm": 0.30142421667431196, "learning_rate": 4.153239633823721e-05, "loss": 0.4722, "step": 3445 }, { "epoch": 1.609141791044776, "grad_norm": 0.33955518592425504, "learning_rate": 4.1505251466556206e-05, "loss": 0.4821, "step": 3450 }, { "epoch": 1.611473880597015, "grad_norm": 0.2932917944667033, "learning_rate": 4.1478073275734105e-05, "loss": 0.4704, "step": 3455 }, { "epoch": 1.6138059701492538, "grad_norm": 0.29288126576481205, "learning_rate": 4.145086183042907e-05, "loss": 0.4619, "step": 3460 }, { "epoch": 1.6161380597014925, "grad_norm": 0.2891243478522058, "learning_rate": 4.142361719537838e-05, "loss": 0.456, "step": 3465 }, { "epoch": 1.6184701492537314, "grad_norm": 0.29667973290745303, "learning_rate": 4.13963394353983e-05, "loss": 0.4668, "step": 3470 }, { "epoch": 1.6208022388059702, "grad_norm": 0.31319625702313963, "learning_rate": 4.136902861538387e-05, "loss": 0.4778, "step": 3475 }, { "epoch": 1.623134328358209, "grad_norm": 0.28091025461838476, "learning_rate": 4.13416848003088e-05, "loss": 0.4688, "step": 3480 }, { "epoch": 1.6254664179104479, "grad_norm": 0.2897453685263902, "learning_rate": 4.1314308055225295e-05, "loss": 0.4835, "step": 3485 }, { "epoch": 1.6277985074626866, "grad_norm": 0.2825265118490395, "learning_rate": 4.128689844526388e-05, "loss": 0.4543, "step": 3490 }, { "epoch": 1.6301305970149254, "grad_norm": 0.29898539437032745, "learning_rate": 4.125945603563331e-05, "loss": 0.4683, "step": 3495 }, { "epoch": 1.6324626865671643, "grad_norm": 0.2781701648860489, "learning_rate": 4.123198089162033e-05, "loss": 0.4536, "step": 3500 }, { "epoch": 1.6347947761194028, "grad_norm": 0.29064518765259745, "learning_rate": 4.1204473078589575e-05, "loss": 0.4766, "step": 3505 }, { "epoch": 1.6371268656716418, "grad_norm": 0.28976326992631135, "learning_rate": 4.117693266198342e-05, "loss": 0.4669, "step": 3510 }, { "epoch": 1.6394589552238807, "grad_norm": 0.30834676759086455, "learning_rate": 4.114935970732178e-05, "loss": 0.48, "step": 3515 }, { "epoch": 1.6417910447761193, "grad_norm": 0.2821308878120327, "learning_rate": 4.112175428020199e-05, "loss": 0.4723, "step": 3520 }, { "epoch": 1.6441231343283582, "grad_norm": 0.28338344689298606, "learning_rate": 4.1094116446298645e-05, "loss": 0.4626, "step": 3525 }, { "epoch": 1.6464552238805972, "grad_norm": 0.29777125112364844, "learning_rate": 4.1066446271363426e-05, "loss": 0.4754, "step": 3530 }, { "epoch": 1.6487873134328357, "grad_norm": 0.29853117825634373, "learning_rate": 4.103874382122496e-05, "loss": 0.4708, "step": 3535 }, { "epoch": 1.6511194029850746, "grad_norm": 0.38361061541876845, "learning_rate": 4.1011009161788655e-05, "loss": 0.4748, "step": 3540 }, { "epoch": 1.6534514925373134, "grad_norm": 0.2905841338990493, "learning_rate": 4.098324235903655e-05, "loss": 0.4488, "step": 3545 }, { "epoch": 1.6557835820895521, "grad_norm": 0.28885109578709295, "learning_rate": 4.095544347902715e-05, "loss": 0.4855, "step": 3550 }, { "epoch": 1.658115671641791, "grad_norm": 0.3451254809440575, "learning_rate": 4.092761258789529e-05, "loss": 0.4877, "step": 3555 }, { "epoch": 1.6604477611940298, "grad_norm": 0.2988637118232085, "learning_rate": 4.089974975185192e-05, "loss": 0.4754, "step": 3560 }, { "epoch": 1.6627798507462686, "grad_norm": 0.29525265169346, "learning_rate": 4.087185503718404e-05, "loss": 0.4532, "step": 3565 }, { "epoch": 1.6651119402985075, "grad_norm": 0.2925658623053148, "learning_rate": 4.084392851025447e-05, "loss": 0.4572, "step": 3570 }, { "epoch": 1.6674440298507462, "grad_norm": 0.3058262088492079, "learning_rate": 4.081597023750169e-05, "loss": 0.4687, "step": 3575 }, { "epoch": 1.669776119402985, "grad_norm": 0.672289051466199, "learning_rate": 4.078798028543974e-05, "loss": 0.495, "step": 3580 }, { "epoch": 1.672108208955224, "grad_norm": 0.29465403109223975, "learning_rate": 4.0759958720658e-05, "loss": 0.4694, "step": 3585 }, { "epoch": 1.6744402985074627, "grad_norm": 0.31241270007307387, "learning_rate": 4.073190560982106e-05, "loss": 0.4606, "step": 3590 }, { "epoch": 1.6767723880597014, "grad_norm": 0.3277236459034337, "learning_rate": 4.07038210196686e-05, "loss": 0.4684, "step": 3595 }, { "epoch": 1.6791044776119404, "grad_norm": 0.2829776462479434, "learning_rate": 4.067570501701513e-05, "loss": 0.466, "step": 3600 }, { "epoch": 1.681436567164179, "grad_norm": 0.315442438482844, "learning_rate": 4.064755766874993e-05, "loss": 0.4624, "step": 3605 }, { "epoch": 1.6837686567164178, "grad_norm": 0.29458034159243274, "learning_rate": 4.061937904183685e-05, "loss": 0.4598, "step": 3610 }, { "epoch": 1.6861007462686568, "grad_norm": 0.297419369575184, "learning_rate": 4.0591169203314145e-05, "loss": 0.4831, "step": 3615 }, { "epoch": 1.6884328358208955, "grad_norm": 0.33331977205232516, "learning_rate": 4.056292822029432e-05, "loss": 0.4851, "step": 3620 }, { "epoch": 1.6907649253731343, "grad_norm": 0.2873330608447031, "learning_rate": 4.053465615996397e-05, "loss": 0.4672, "step": 3625 }, { "epoch": 1.6930970149253732, "grad_norm": 0.28495409327431825, "learning_rate": 4.050635308958366e-05, "loss": 0.4784, "step": 3630 }, { "epoch": 1.695429104477612, "grad_norm": 0.2887251294628341, "learning_rate": 4.047801907648769e-05, "loss": 0.4621, "step": 3635 }, { "epoch": 1.6977611940298507, "grad_norm": 0.2908278476120155, "learning_rate": 4.0449654188083985e-05, "loss": 0.4634, "step": 3640 }, { "epoch": 1.7000932835820897, "grad_norm": 0.30778136007529744, "learning_rate": 4.042125849185394e-05, "loss": 0.4754, "step": 3645 }, { "epoch": 1.7024253731343284, "grad_norm": 0.27188538376324906, "learning_rate": 4.0392832055352205e-05, "loss": 0.4726, "step": 3650 }, { "epoch": 1.7047574626865671, "grad_norm": 0.29367109953817416, "learning_rate": 4.036437494620661e-05, "loss": 0.4654, "step": 3655 }, { "epoch": 1.707089552238806, "grad_norm": 0.28872238672526207, "learning_rate": 4.033588723211793e-05, "loss": 0.4724, "step": 3660 }, { "epoch": 1.7094216417910446, "grad_norm": 0.3080232243674097, "learning_rate": 4.030736898085974e-05, "loss": 0.4704, "step": 3665 }, { "epoch": 1.7117537313432836, "grad_norm": 0.2800675916024454, "learning_rate": 4.02788202602783e-05, "loss": 0.4711, "step": 3670 }, { "epoch": 1.7140858208955225, "grad_norm": 0.3041141028391375, "learning_rate": 4.025024113829233e-05, "loss": 0.474, "step": 3675 }, { "epoch": 1.716417910447761, "grad_norm": 0.28528269723526445, "learning_rate": 4.022163168289287e-05, "loss": 0.4606, "step": 3680 }, { "epoch": 1.71875, "grad_norm": 0.28970960831666037, "learning_rate": 4.019299196214315e-05, "loss": 0.4728, "step": 3685 }, { "epoch": 1.721082089552239, "grad_norm": 0.2950071270891838, "learning_rate": 4.016432204417839e-05, "loss": 0.4733, "step": 3690 }, { "epoch": 1.7234141791044775, "grad_norm": 0.2807367305849103, "learning_rate": 4.0135621997205654e-05, "loss": 0.4508, "step": 3695 }, { "epoch": 1.7257462686567164, "grad_norm": 0.29100263738159415, "learning_rate": 4.010689188950367e-05, "loss": 0.4801, "step": 3700 }, { "epoch": 1.7280783582089554, "grad_norm": 0.2983770142313376, "learning_rate": 4.00781317894227e-05, "loss": 0.4762, "step": 3705 }, { "epoch": 1.730410447761194, "grad_norm": 0.28109635975044434, "learning_rate": 4.004934176538436e-05, "loss": 0.4649, "step": 3710 }, { "epoch": 1.7327425373134329, "grad_norm": 0.3472664674987914, "learning_rate": 4.002052188588144e-05, "loss": 0.4753, "step": 3715 }, { "epoch": 1.7350746268656716, "grad_norm": 0.2956397470634308, "learning_rate": 3.999167221947777e-05, "loss": 0.4721, "step": 3720 }, { "epoch": 1.7374067164179103, "grad_norm": 0.28123664148109456, "learning_rate": 3.9962792834808034e-05, "loss": 0.4795, "step": 3725 }, { "epoch": 1.7397388059701493, "grad_norm": 0.30513612770241016, "learning_rate": 3.993388380057763e-05, "loss": 0.4683, "step": 3730 }, { "epoch": 1.742070895522388, "grad_norm": 0.30130916202984026, "learning_rate": 3.9904945185562484e-05, "loss": 0.4632, "step": 3735 }, { "epoch": 1.7444029850746268, "grad_norm": 0.28221079998789494, "learning_rate": 3.987597705860891e-05, "loss": 0.471, "step": 3740 }, { "epoch": 1.7467350746268657, "grad_norm": 0.2742339603692919, "learning_rate": 3.9846979488633415e-05, "loss": 0.4721, "step": 3745 }, { "epoch": 1.7490671641791045, "grad_norm": 0.2814091603897577, "learning_rate": 3.9817952544622554e-05, "loss": 0.4485, "step": 3750 }, { "epoch": 1.7513992537313432, "grad_norm": 0.26394655530166977, "learning_rate": 3.978889629563277e-05, "loss": 0.4723, "step": 3755 }, { "epoch": 1.7537313432835822, "grad_norm": 0.27261154776793245, "learning_rate": 3.9759810810790236e-05, "loss": 0.4775, "step": 3760 }, { "epoch": 1.756063432835821, "grad_norm": 0.2905087199684674, "learning_rate": 3.9730696159290656e-05, "loss": 0.467, "step": 3765 }, { "epoch": 1.7583955223880596, "grad_norm": 0.3053987677661337, "learning_rate": 3.970155241039914e-05, "loss": 0.4624, "step": 3770 }, { "epoch": 1.7607276119402986, "grad_norm": 0.35828326306462377, "learning_rate": 3.967237963345001e-05, "loss": 0.4704, "step": 3775 }, { "epoch": 1.7630597014925373, "grad_norm": 0.3242378836697219, "learning_rate": 3.964317789784664e-05, "loss": 0.4723, "step": 3780 }, { "epoch": 1.765391791044776, "grad_norm": 0.2785762183888276, "learning_rate": 3.961394727306133e-05, "loss": 0.4638, "step": 3785 }, { "epoch": 1.767723880597015, "grad_norm": 0.2885820777031719, "learning_rate": 3.958468782863508e-05, "loss": 0.4555, "step": 3790 }, { "epoch": 1.7700559701492538, "grad_norm": 0.384071014074954, "learning_rate": 3.955539963417746e-05, "loss": 0.4625, "step": 3795 }, { "epoch": 1.7723880597014925, "grad_norm": 0.29486483247966055, "learning_rate": 3.952608275936644e-05, "loss": 0.4726, "step": 3800 }, { "epoch": 1.7747201492537314, "grad_norm": 0.26687817737368436, "learning_rate": 3.949673727394823e-05, "loss": 0.4509, "step": 3805 }, { "epoch": 1.7770522388059702, "grad_norm": 0.284896836661788, "learning_rate": 3.946736324773707e-05, "loss": 0.4677, "step": 3810 }, { "epoch": 1.779384328358209, "grad_norm": 0.2783237864052887, "learning_rate": 3.943796075061517e-05, "loss": 0.4628, "step": 3815 }, { "epoch": 1.7817164179104479, "grad_norm": 0.2735438873817288, "learning_rate": 3.940852985253239e-05, "loss": 0.4458, "step": 3820 }, { "epoch": 1.7840485074626866, "grad_norm": 0.2916140758264907, "learning_rate": 3.937907062350622e-05, "loss": 0.4712, "step": 3825 }, { "epoch": 1.7863805970149254, "grad_norm": 0.2924784597308533, "learning_rate": 3.9349583133621535e-05, "loss": 0.4666, "step": 3830 }, { "epoch": 1.7887126865671643, "grad_norm": 0.29144443534025816, "learning_rate": 3.9320067453030415e-05, "loss": 0.4639, "step": 3835 }, { "epoch": 1.7910447761194028, "grad_norm": 0.3164103883365729, "learning_rate": 3.9290523651952046e-05, "loss": 0.4669, "step": 3840 }, { "epoch": 1.7933768656716418, "grad_norm": 0.277626969701622, "learning_rate": 3.926095180067249e-05, "loss": 0.4553, "step": 3845 }, { "epoch": 1.7957089552238807, "grad_norm": 0.32377416477682147, "learning_rate": 3.923135196954456e-05, "loss": 0.4845, "step": 3850 }, { "epoch": 1.7980410447761193, "grad_norm": 0.264927997599163, "learning_rate": 3.92017242289876e-05, "loss": 0.4627, "step": 3855 }, { "epoch": 1.8003731343283582, "grad_norm": 0.26910667338704186, "learning_rate": 3.9172068649487405e-05, "loss": 0.4533, "step": 3860 }, { "epoch": 1.8027052238805972, "grad_norm": 0.3012627305931757, "learning_rate": 3.914238530159595e-05, "loss": 0.4725, "step": 3865 }, { "epoch": 1.8050373134328357, "grad_norm": 0.2825760484687656, "learning_rate": 3.9112674255931294e-05, "loss": 0.4692, "step": 3870 }, { "epoch": 1.8073694029850746, "grad_norm": 0.2835609077313444, "learning_rate": 3.908293558317741e-05, "loss": 0.4567, "step": 3875 }, { "epoch": 1.8097014925373134, "grad_norm": 0.2706212044365537, "learning_rate": 3.9053169354083946e-05, "loss": 0.4578, "step": 3880 }, { "epoch": 1.8120335820895521, "grad_norm": 0.28888485330700714, "learning_rate": 3.9023375639466156e-05, "loss": 0.4525, "step": 3885 }, { "epoch": 1.814365671641791, "grad_norm": 0.2821249411638743, "learning_rate": 3.8993554510204664e-05, "loss": 0.4619, "step": 3890 }, { "epoch": 1.8166977611940298, "grad_norm": 0.29986062319312823, "learning_rate": 3.896370603724531e-05, "loss": 0.4731, "step": 3895 }, { "epoch": 1.8190298507462686, "grad_norm": 0.2743138401562156, "learning_rate": 3.893383029159899e-05, "loss": 0.4578, "step": 3900 }, { "epoch": 1.8213619402985075, "grad_norm": 0.3071675546564976, "learning_rate": 3.89039273443415e-05, "loss": 0.4584, "step": 3905 }, { "epoch": 1.8236940298507462, "grad_norm": 0.29121424520242284, "learning_rate": 3.887399726661332e-05, "loss": 0.463, "step": 3910 }, { "epoch": 1.826026119402985, "grad_norm": 0.29615302397841037, "learning_rate": 3.88440401296195e-05, "loss": 0.4713, "step": 3915 }, { "epoch": 1.828358208955224, "grad_norm": 0.29150532281543085, "learning_rate": 3.881405600462943e-05, "loss": 0.4666, "step": 3920 }, { "epoch": 1.8306902985074627, "grad_norm": 0.31348699166645305, "learning_rate": 3.8784044962976776e-05, "loss": 0.4644, "step": 3925 }, { "epoch": 1.8330223880597014, "grad_norm": 0.28691559725015614, "learning_rate": 3.8754007076059155e-05, "loss": 0.4757, "step": 3930 }, { "epoch": 1.8353544776119404, "grad_norm": 0.3168729951555514, "learning_rate": 3.8723942415338105e-05, "loss": 0.4725, "step": 3935 }, { "epoch": 1.837686567164179, "grad_norm": 0.2982331204057746, "learning_rate": 3.869385105233884e-05, "loss": 0.4623, "step": 3940 }, { "epoch": 1.8400186567164178, "grad_norm": 0.3240696041324729, "learning_rate": 3.8663733058650104e-05, "loss": 0.4826, "step": 3945 }, { "epoch": 1.8423507462686568, "grad_norm": 0.28625568790802364, "learning_rate": 3.8633588505924e-05, "loss": 0.46, "step": 3950 }, { "epoch": 1.8446828358208955, "grad_norm": 0.2998401188840404, "learning_rate": 3.8603417465875816e-05, "loss": 0.4636, "step": 3955 }, { "epoch": 1.8470149253731343, "grad_norm": 0.27394284330749197, "learning_rate": 3.857322001028385e-05, "loss": 0.4689, "step": 3960 }, { "epoch": 1.8493470149253732, "grad_norm": 0.2773738924500118, "learning_rate": 3.854299621098925e-05, "loss": 0.4736, "step": 3965 }, { "epoch": 1.851679104477612, "grad_norm": 0.3115525055908915, "learning_rate": 3.851274613989582e-05, "loss": 0.4624, "step": 3970 }, { "epoch": 1.8540111940298507, "grad_norm": 0.290224255052661, "learning_rate": 3.848246986896989e-05, "loss": 0.4655, "step": 3975 }, { "epoch": 1.8563432835820897, "grad_norm": 0.2838303053176141, "learning_rate": 3.84521674702401e-05, "loss": 0.4656, "step": 3980 }, { "epoch": 1.8586753731343284, "grad_norm": 0.28457308156875305, "learning_rate": 3.8421839015797265e-05, "loss": 0.4621, "step": 3985 }, { "epoch": 1.8610074626865671, "grad_norm": 0.299732530595295, "learning_rate": 3.839148457779418e-05, "loss": 0.478, "step": 3990 }, { "epoch": 1.863339552238806, "grad_norm": 0.3025731856867041, "learning_rate": 3.8361104228445455e-05, "loss": 0.4768, "step": 3995 }, { "epoch": 1.8656716417910446, "grad_norm": 0.31658159541808834, "learning_rate": 3.8330698040027345e-05, "loss": 0.4838, "step": 4000 }, { "epoch": 1.8680037313432836, "grad_norm": 0.29050832605019716, "learning_rate": 3.83002660848776e-05, "loss": 0.4778, "step": 4005 }, { "epoch": 1.8703358208955225, "grad_norm": 0.2937532075565073, "learning_rate": 3.826980843539521e-05, "loss": 0.4787, "step": 4010 }, { "epoch": 1.872667910447761, "grad_norm": 0.2750179858628357, "learning_rate": 3.823932516404036e-05, "loss": 0.4513, "step": 4015 }, { "epoch": 1.875, "grad_norm": 0.30785506251418904, "learning_rate": 3.8208816343334156e-05, "loss": 0.465, "step": 4020 }, { "epoch": 1.877332089552239, "grad_norm": 0.2832427919921074, "learning_rate": 3.81782820458585e-05, "loss": 0.4607, "step": 4025 }, { "epoch": 1.8796641791044775, "grad_norm": 0.28757436200080694, "learning_rate": 3.814772234425588e-05, "loss": 0.4681, "step": 4030 }, { "epoch": 1.8819962686567164, "grad_norm": 0.2821054178127719, "learning_rate": 3.8117137311229255e-05, "loss": 0.4596, "step": 4035 }, { "epoch": 1.8843283582089554, "grad_norm": 0.27997802697698426, "learning_rate": 3.808652701954183e-05, "loss": 0.464, "step": 4040 }, { "epoch": 1.886660447761194, "grad_norm": 0.2690748037397427, "learning_rate": 3.805589154201691e-05, "loss": 0.4608, "step": 4045 }, { "epoch": 1.8889925373134329, "grad_norm": 2.0484745231493022, "learning_rate": 3.80252309515377e-05, "loss": 0.4568, "step": 4050 }, { "epoch": 1.8913246268656716, "grad_norm": 0.28093252685438097, "learning_rate": 3.799454532104718e-05, "loss": 0.4591, "step": 4055 }, { "epoch": 1.8936567164179103, "grad_norm": 0.2898740878837333, "learning_rate": 3.7963834723547866e-05, "loss": 0.472, "step": 4060 }, { "epoch": 1.8959888059701493, "grad_norm": 0.28763186213038794, "learning_rate": 3.793309923210171e-05, "loss": 0.4623, "step": 4065 }, { "epoch": 1.898320895522388, "grad_norm": 0.28090490270197704, "learning_rate": 3.7902338919829854e-05, "loss": 0.4749, "step": 4070 }, { "epoch": 1.9006529850746268, "grad_norm": 0.27552897227958767, "learning_rate": 3.78715538599125e-05, "loss": 0.4583, "step": 4075 }, { "epoch": 1.9029850746268657, "grad_norm": 0.27158948936653277, "learning_rate": 3.784074412558875e-05, "loss": 0.4514, "step": 4080 }, { "epoch": 1.9053171641791045, "grad_norm": 0.28390682602524214, "learning_rate": 3.7809909790156355e-05, "loss": 0.4852, "step": 4085 }, { "epoch": 1.9076492537313432, "grad_norm": 0.2966360846270045, "learning_rate": 3.777905092697166e-05, "loss": 0.456, "step": 4090 }, { "epoch": 1.9099813432835822, "grad_norm": 0.2852300405818221, "learning_rate": 3.77481676094493e-05, "loss": 0.4653, "step": 4095 }, { "epoch": 1.912313432835821, "grad_norm": 0.28900220058177695, "learning_rate": 3.771725991106214e-05, "loss": 0.473, "step": 4100 }, { "epoch": 1.9146455223880596, "grad_norm": 0.28326871866010084, "learning_rate": 3.7686327905341014e-05, "loss": 0.4693, "step": 4105 }, { "epoch": 1.9169776119402986, "grad_norm": 0.29866821938798094, "learning_rate": 3.765537166587458e-05, "loss": 0.4677, "step": 4110 }, { "epoch": 1.9193097014925373, "grad_norm": 0.30156769610238693, "learning_rate": 3.762439126630919e-05, "loss": 0.4703, "step": 4115 }, { "epoch": 1.921641791044776, "grad_norm": 0.27715136807046664, "learning_rate": 3.7593386780348625e-05, "loss": 0.4639, "step": 4120 }, { "epoch": 1.923973880597015, "grad_norm": 0.28959038634124634, "learning_rate": 3.756235828175401e-05, "loss": 0.4698, "step": 4125 }, { "epoch": 1.9263059701492538, "grad_norm": 0.28416411032499583, "learning_rate": 3.753130584434357e-05, "loss": 0.4722, "step": 4130 }, { "epoch": 1.9286380597014925, "grad_norm": 0.29376633520604595, "learning_rate": 3.750022954199248e-05, "loss": 0.4738, "step": 4135 }, { "epoch": 1.9309701492537314, "grad_norm": 0.27276075070858236, "learning_rate": 3.7469129448632704e-05, "loss": 0.4615, "step": 4140 }, { "epoch": 1.9333022388059702, "grad_norm": 0.28949062930315683, "learning_rate": 3.743800563825283e-05, "loss": 0.47, "step": 4145 }, { "epoch": 1.935634328358209, "grad_norm": 0.3118606871404512, "learning_rate": 3.74068581848978e-05, "loss": 0.471, "step": 4150 }, { "epoch": 1.9379664179104479, "grad_norm": 0.270098927616095, "learning_rate": 3.737568716266888e-05, "loss": 0.4637, "step": 4155 }, { "epoch": 1.9402985074626866, "grad_norm": 0.2729465438261843, "learning_rate": 3.734449264572336e-05, "loss": 0.4592, "step": 4160 }, { "epoch": 1.9426305970149254, "grad_norm": 0.2655069265458174, "learning_rate": 3.7313274708274445e-05, "loss": 0.4666, "step": 4165 }, { "epoch": 1.9449626865671643, "grad_norm": 0.29821813902496036, "learning_rate": 3.7282033424591043e-05, "loss": 0.4476, "step": 4170 }, { "epoch": 1.9472947761194028, "grad_norm": 0.2643337902845149, "learning_rate": 3.725076886899763e-05, "loss": 0.4503, "step": 4175 }, { "epoch": 1.9496268656716418, "grad_norm": 0.29438810857720943, "learning_rate": 3.721948111587399e-05, "loss": 0.4973, "step": 4180 }, { "epoch": 1.9519589552238807, "grad_norm": 0.2720269034870123, "learning_rate": 3.718817023965519e-05, "loss": 0.4427, "step": 4185 }, { "epoch": 1.9542910447761193, "grad_norm": 0.2804856095653858, "learning_rate": 3.715683631483121e-05, "loss": 0.4717, "step": 4190 }, { "epoch": 1.9566231343283582, "grad_norm": 0.28330070902628207, "learning_rate": 3.712547941594693e-05, "loss": 0.4552, "step": 4195 }, { "epoch": 1.9589552238805972, "grad_norm": 0.2827024094853824, "learning_rate": 3.709409961760186e-05, "loss": 0.472, "step": 4200 }, { "epoch": 1.9612873134328357, "grad_norm": 0.5816781367593028, "learning_rate": 3.706269699444998e-05, "loss": 0.4611, "step": 4205 }, { "epoch": 1.9636194029850746, "grad_norm": 0.2757115740280917, "learning_rate": 3.703127162119959e-05, "loss": 0.4656, "step": 4210 }, { "epoch": 1.9659514925373134, "grad_norm": 0.28114169673420236, "learning_rate": 3.699982357261312e-05, "loss": 0.4537, "step": 4215 }, { "epoch": 1.9682835820895521, "grad_norm": 0.2844187646725584, "learning_rate": 3.69683529235069e-05, "loss": 0.475, "step": 4220 }, { "epoch": 1.970615671641791, "grad_norm": 0.309527049913776, "learning_rate": 3.693685974875109e-05, "loss": 0.4768, "step": 4225 }, { "epoch": 1.9729477611940298, "grad_norm": 0.2871649360790269, "learning_rate": 3.69053441232694e-05, "loss": 0.4637, "step": 4230 }, { "epoch": 1.9752798507462686, "grad_norm": 0.28026198403635133, "learning_rate": 3.6873806122038964e-05, "loss": 0.4626, "step": 4235 }, { "epoch": 1.9776119402985075, "grad_norm": 0.2880512260659336, "learning_rate": 3.684224582009014e-05, "loss": 0.4854, "step": 4240 }, { "epoch": 1.9799440298507462, "grad_norm": 0.2718163970445502, "learning_rate": 3.6810663292506344e-05, "loss": 0.4614, "step": 4245 }, { "epoch": 1.982276119402985, "grad_norm": 0.2862854800089581, "learning_rate": 3.677905861442387e-05, "loss": 0.4594, "step": 4250 }, { "epoch": 1.984608208955224, "grad_norm": 0.28195789175487307, "learning_rate": 3.6747431861031716e-05, "loss": 0.4671, "step": 4255 }, { "epoch": 1.9869402985074627, "grad_norm": 0.2646943857945916, "learning_rate": 3.67157831075714e-05, "loss": 0.464, "step": 4260 }, { "epoch": 1.9892723880597014, "grad_norm": 0.28323805663028806, "learning_rate": 3.6684112429336745e-05, "loss": 0.4661, "step": 4265 }, { "epoch": 1.9916044776119404, "grad_norm": 0.29216496583579926, "learning_rate": 3.665241990167378e-05, "loss": 0.4483, "step": 4270 }, { "epoch": 1.993936567164179, "grad_norm": 0.2731463625816152, "learning_rate": 3.6620705599980494e-05, "loss": 0.4575, "step": 4275 }, { "epoch": 1.9962686567164178, "grad_norm": 0.28944924926883736, "learning_rate": 3.6588969599706665e-05, "loss": 0.4443, "step": 4280 }, { "epoch": 1.9986007462686568, "grad_norm": 0.27672033948776054, "learning_rate": 3.655721197635371e-05, "loss": 0.4623, "step": 4285 }, { "epoch": 2.0009328358208953, "grad_norm": 0.31881062120766374, "learning_rate": 3.652543280547449e-05, "loss": 0.4381, "step": 4290 }, { "epoch": 2.0032649253731343, "grad_norm": 0.3234252965401319, "learning_rate": 3.6493632162673125e-05, "loss": 0.4075, "step": 4295 }, { "epoch": 2.0055970149253732, "grad_norm": 0.3089535798277932, "learning_rate": 3.6461810123604805e-05, "loss": 0.4066, "step": 4300 }, { "epoch": 2.0079291044776117, "grad_norm": 0.3115759624286921, "learning_rate": 3.6429966763975636e-05, "loss": 0.4311, "step": 4305 }, { "epoch": 2.0102611940298507, "grad_norm": 0.290849998223141, "learning_rate": 3.639810215954245e-05, "loss": 0.4161, "step": 4310 }, { "epoch": 2.0125932835820897, "grad_norm": 0.29411478250003953, "learning_rate": 3.6366216386112605e-05, "loss": 0.4066, "step": 4315 }, { "epoch": 2.014925373134328, "grad_norm": 0.3011674560000593, "learning_rate": 3.633430951954383e-05, "loss": 0.4074, "step": 4320 }, { "epoch": 2.017257462686567, "grad_norm": 0.27912593536689817, "learning_rate": 3.6302381635744056e-05, "loss": 0.3939, "step": 4325 }, { "epoch": 2.019589552238806, "grad_norm": 0.31547567931295845, "learning_rate": 3.6270432810671176e-05, "loss": 0.4157, "step": 4330 }, { "epoch": 2.0219216417910446, "grad_norm": 0.2876896407561548, "learning_rate": 3.623846312033294e-05, "loss": 0.4086, "step": 4335 }, { "epoch": 2.0242537313432836, "grad_norm": 0.29958531291670554, "learning_rate": 3.6206472640786696e-05, "loss": 0.4319, "step": 4340 }, { "epoch": 2.0265858208955225, "grad_norm": 0.2909953700947549, "learning_rate": 3.617446144813929e-05, "loss": 0.4013, "step": 4345 }, { "epoch": 2.028917910447761, "grad_norm": 0.30395664083832746, "learning_rate": 3.614242961854683e-05, "loss": 0.4061, "step": 4350 }, { "epoch": 2.03125, "grad_norm": 0.2730208347332672, "learning_rate": 3.611037722821452e-05, "loss": 0.4076, "step": 4355 }, { "epoch": 2.033582089552239, "grad_norm": 0.3093650115610087, "learning_rate": 3.607830435339648e-05, "loss": 0.4095, "step": 4360 }, { "epoch": 2.0359141791044775, "grad_norm": 0.31154904508890424, "learning_rate": 3.604621107039555e-05, "loss": 0.4198, "step": 4365 }, { "epoch": 2.0382462686567164, "grad_norm": 0.3084031333669663, "learning_rate": 3.601409745556315e-05, "loss": 0.4183, "step": 4370 }, { "epoch": 2.0405783582089554, "grad_norm": 0.2931179319757642, "learning_rate": 3.598196358529906e-05, "loss": 0.399, "step": 4375 }, { "epoch": 2.042910447761194, "grad_norm": 0.28412962899673655, "learning_rate": 3.5949809536051235e-05, "loss": 0.4076, "step": 4380 }, { "epoch": 2.045242537313433, "grad_norm": 0.28297789743046964, "learning_rate": 3.591763538431563e-05, "loss": 0.4068, "step": 4385 }, { "epoch": 2.047574626865672, "grad_norm": 0.27851763427328957, "learning_rate": 3.5885441206636065e-05, "loss": 0.4012, "step": 4390 }, { "epoch": 2.0499067164179103, "grad_norm": 0.275165718685644, "learning_rate": 3.585322707960397e-05, "loss": 0.4204, "step": 4395 }, { "epoch": 2.0522388059701493, "grad_norm": 0.3008755875657365, "learning_rate": 3.5820993079858235e-05, "loss": 0.4039, "step": 4400 }, { "epoch": 2.0545708955223883, "grad_norm": 0.2807282932074267, "learning_rate": 3.5788739284085044e-05, "loss": 0.418, "step": 4405 }, { "epoch": 2.0569029850746268, "grad_norm": 0.2966847393025324, "learning_rate": 3.575646576901767e-05, "loss": 0.4128, "step": 4410 }, { "epoch": 2.0592350746268657, "grad_norm": 0.30112551355981815, "learning_rate": 3.57241726114363e-05, "loss": 0.4146, "step": 4415 }, { "epoch": 2.0615671641791047, "grad_norm": 0.28467605922969297, "learning_rate": 3.5691859888167846e-05, "loss": 0.4028, "step": 4420 }, { "epoch": 2.063899253731343, "grad_norm": 0.3138889203462376, "learning_rate": 3.5659527676085774e-05, "loss": 0.3968, "step": 4425 }, { "epoch": 2.066231343283582, "grad_norm": 0.3014015236671151, "learning_rate": 3.56271760521099e-05, "loss": 0.4051, "step": 4430 }, { "epoch": 2.0685634328358207, "grad_norm": 0.2989313942817169, "learning_rate": 3.559480509320625e-05, "loss": 0.4132, "step": 4435 }, { "epoch": 2.0708955223880596, "grad_norm": 0.28614756550952414, "learning_rate": 3.556241487638682e-05, "loss": 0.4019, "step": 4440 }, { "epoch": 2.0732276119402986, "grad_norm": 0.3117635770223885, "learning_rate": 3.5530005478709446e-05, "loss": 0.4268, "step": 4445 }, { "epoch": 2.075559701492537, "grad_norm": 0.29911669634266624, "learning_rate": 3.549757697727759e-05, "loss": 0.4051, "step": 4450 }, { "epoch": 2.077891791044776, "grad_norm": 0.30602404809586486, "learning_rate": 3.546512944924014e-05, "loss": 0.4121, "step": 4455 }, { "epoch": 2.080223880597015, "grad_norm": 0.30821696586457453, "learning_rate": 3.5432662971791264e-05, "loss": 0.4087, "step": 4460 }, { "epoch": 2.0825559701492535, "grad_norm": 0.28446566182131433, "learning_rate": 3.540017762217023e-05, "loss": 0.4021, "step": 4465 }, { "epoch": 2.0848880597014925, "grad_norm": 0.2686132598725414, "learning_rate": 3.5367673477661174e-05, "loss": 0.4093, "step": 4470 }, { "epoch": 2.0872201492537314, "grad_norm": 0.2715088327711594, "learning_rate": 3.533515061559297e-05, "loss": 0.4125, "step": 4475 }, { "epoch": 2.08955223880597, "grad_norm": 0.31237045766727517, "learning_rate": 3.5302609113339e-05, "loss": 0.4062, "step": 4480 }, { "epoch": 2.091884328358209, "grad_norm": 0.2831837416876725, "learning_rate": 3.5270049048317016e-05, "loss": 0.4155, "step": 4485 }, { "epoch": 2.094216417910448, "grad_norm": 0.2808865750856891, "learning_rate": 3.5237470497988905e-05, "loss": 0.3985, "step": 4490 }, { "epoch": 2.0965485074626864, "grad_norm": 0.3024949339139431, "learning_rate": 3.520487353986056e-05, "loss": 0.3986, "step": 4495 }, { "epoch": 2.0988805970149254, "grad_norm": 0.29069870202721354, "learning_rate": 3.517225825148164e-05, "loss": 0.418, "step": 4500 }, { "epoch": 2.1012126865671643, "grad_norm": 0.29796361860195353, "learning_rate": 3.513962471044543e-05, "loss": 0.4252, "step": 4505 }, { "epoch": 2.103544776119403, "grad_norm": 0.3094148838212992, "learning_rate": 3.510697299438864e-05, "loss": 0.412, "step": 4510 }, { "epoch": 2.105876865671642, "grad_norm": 0.3142811503649403, "learning_rate": 3.50743031809912e-05, "loss": 0.4211, "step": 4515 }, { "epoch": 2.1082089552238807, "grad_norm": 0.307890629394432, "learning_rate": 3.504161534797612e-05, "loss": 0.4065, "step": 4520 }, { "epoch": 2.1105410447761193, "grad_norm": 0.31004076773910627, "learning_rate": 3.500890957310926e-05, "loss": 0.4271, "step": 4525 }, { "epoch": 2.112873134328358, "grad_norm": 0.30982793641269996, "learning_rate": 3.497618593419916e-05, "loss": 0.4106, "step": 4530 }, { "epoch": 2.115205223880597, "grad_norm": 0.3173158741449482, "learning_rate": 3.494344450909689e-05, "loss": 0.4247, "step": 4535 }, { "epoch": 2.1175373134328357, "grad_norm": 0.2865273532733797, "learning_rate": 3.491068537569581e-05, "loss": 0.4097, "step": 4540 }, { "epoch": 2.1198694029850746, "grad_norm": 0.2852854631406371, "learning_rate": 3.4877908611931406e-05, "loss": 0.4203, "step": 4545 }, { "epoch": 2.1222014925373136, "grad_norm": 0.3181956134452494, "learning_rate": 3.484511429578113e-05, "loss": 0.4097, "step": 4550 }, { "epoch": 2.124533582089552, "grad_norm": 0.27827136842068667, "learning_rate": 3.481230250526416e-05, "loss": 0.4062, "step": 4555 }, { "epoch": 2.126865671641791, "grad_norm": 0.30357870752192456, "learning_rate": 3.477947331844127e-05, "loss": 0.4071, "step": 4560 }, { "epoch": 2.12919776119403, "grad_norm": 0.29558755102180173, "learning_rate": 3.4746626813414624e-05, "loss": 0.414, "step": 4565 }, { "epoch": 2.1315298507462686, "grad_norm": 0.28423449153726155, "learning_rate": 3.471376306832756e-05, "loss": 0.3935, "step": 4570 }, { "epoch": 2.1338619402985075, "grad_norm": 0.31224475373485744, "learning_rate": 3.468088216136445e-05, "loss": 0.4114, "step": 4575 }, { "epoch": 2.1361940298507465, "grad_norm": 0.28514884647086647, "learning_rate": 3.4647984170750506e-05, "loss": 0.4111, "step": 4580 }, { "epoch": 2.138526119402985, "grad_norm": 0.2864394918090475, "learning_rate": 3.4615069174751566e-05, "loss": 0.4108, "step": 4585 }, { "epoch": 2.140858208955224, "grad_norm": 0.2764059725136837, "learning_rate": 3.4582137251673916e-05, "loss": 0.4023, "step": 4590 }, { "epoch": 2.143190298507463, "grad_norm": 0.28515932005907696, "learning_rate": 3.454918847986414e-05, "loss": 0.4087, "step": 4595 }, { "epoch": 2.1455223880597014, "grad_norm": 0.29727286731571656, "learning_rate": 3.451622293770889e-05, "loss": 0.4257, "step": 4600 }, { "epoch": 2.1478544776119404, "grad_norm": 0.28416641744600796, "learning_rate": 3.448324070363469e-05, "loss": 0.4076, "step": 4605 }, { "epoch": 2.1501865671641793, "grad_norm": 0.29320159858259165, "learning_rate": 3.445024185610783e-05, "loss": 0.4064, "step": 4610 }, { "epoch": 2.152518656716418, "grad_norm": 0.283828474794063, "learning_rate": 3.441722647363408e-05, "loss": 0.4157, "step": 4615 }, { "epoch": 2.154850746268657, "grad_norm": 0.29078041366345425, "learning_rate": 3.438419463475857e-05, "loss": 0.414, "step": 4620 }, { "epoch": 2.1571828358208953, "grad_norm": 0.2966156078514873, "learning_rate": 3.435114641806557e-05, "loss": 0.4133, "step": 4625 }, { "epoch": 2.1595149253731343, "grad_norm": 0.3077525844098013, "learning_rate": 3.43180819021783e-05, "loss": 0.4239, "step": 4630 }, { "epoch": 2.1618470149253732, "grad_norm": 0.28926088209210626, "learning_rate": 3.428500116575881e-05, "loss": 0.4202, "step": 4635 }, { "epoch": 2.1641791044776117, "grad_norm": 0.2850711423951347, "learning_rate": 3.425190428750767e-05, "loss": 0.4042, "step": 4640 }, { "epoch": 2.1665111940298507, "grad_norm": 0.2974387179861358, "learning_rate": 3.4218791346163894e-05, "loss": 0.4076, "step": 4645 }, { "epoch": 2.1688432835820897, "grad_norm": 0.27917353314482707, "learning_rate": 3.41856624205047e-05, "loss": 0.4139, "step": 4650 }, { "epoch": 2.171175373134328, "grad_norm": 0.2959290848121045, "learning_rate": 3.415251758934534e-05, "loss": 0.4278, "step": 4655 }, { "epoch": 2.173507462686567, "grad_norm": 0.2784635874033752, "learning_rate": 3.4119356931538894e-05, "loss": 0.4026, "step": 4660 }, { "epoch": 2.175839552238806, "grad_norm": 0.2820581248639051, "learning_rate": 3.408618052597611e-05, "loss": 0.4122, "step": 4665 }, { "epoch": 2.1781716417910446, "grad_norm": 0.2763241945473573, "learning_rate": 3.405298845158518e-05, "loss": 0.417, "step": 4670 }, { "epoch": 2.1805037313432836, "grad_norm": 0.2801906275625635, "learning_rate": 3.4019780787331586e-05, "loss": 0.4127, "step": 4675 }, { "epoch": 2.1828358208955225, "grad_norm": 0.2745667654376543, "learning_rate": 3.3986557612217904e-05, "loss": 0.4144, "step": 4680 }, { "epoch": 2.185167910447761, "grad_norm": 0.2983871656504047, "learning_rate": 3.3953319005283606e-05, "loss": 0.415, "step": 4685 }, { "epoch": 2.1875, "grad_norm": 0.2771835420407509, "learning_rate": 3.392006504560487e-05, "loss": 0.4204, "step": 4690 }, { "epoch": 2.189832089552239, "grad_norm": 0.2911936826756233, "learning_rate": 3.388679581229441e-05, "loss": 0.4098, "step": 4695 }, { "epoch": 2.1921641791044775, "grad_norm": 0.2866212714288774, "learning_rate": 3.3853511384501256e-05, "loss": 0.4033, "step": 4700 }, { "epoch": 2.1944962686567164, "grad_norm": 0.2902128614080767, "learning_rate": 3.382021184141062e-05, "loss": 0.3994, "step": 4705 }, { "epoch": 2.1968283582089554, "grad_norm": 0.2887110502467031, "learning_rate": 3.378689726224364e-05, "loss": 0.4071, "step": 4710 }, { "epoch": 2.199160447761194, "grad_norm": 0.30578899646712004, "learning_rate": 3.3753567726257255e-05, "loss": 0.4141, "step": 4715 }, { "epoch": 2.201492537313433, "grad_norm": 0.29022555808244327, "learning_rate": 3.372022331274397e-05, "loss": 0.4124, "step": 4720 }, { "epoch": 2.203824626865672, "grad_norm": 0.28496991883498973, "learning_rate": 3.368686410103167e-05, "loss": 0.4047, "step": 4725 }, { "epoch": 2.2061567164179103, "grad_norm": 0.29257995522528646, "learning_rate": 3.3653490170483485e-05, "loss": 0.3957, "step": 4730 }, { "epoch": 2.2084888059701493, "grad_norm": 0.27740660197245065, "learning_rate": 3.3620101600497526e-05, "loss": 0.4041, "step": 4735 }, { "epoch": 2.2108208955223883, "grad_norm": 0.28011945944388206, "learning_rate": 3.358669847050676e-05, "loss": 0.4099, "step": 4740 }, { "epoch": 2.2131529850746268, "grad_norm": 0.29087374906241387, "learning_rate": 3.355328085997876e-05, "loss": 0.4034, "step": 4745 }, { "epoch": 2.2154850746268657, "grad_norm": 0.27504003709460073, "learning_rate": 3.351984884841558e-05, "loss": 0.4088, "step": 4750 }, { "epoch": 2.2178171641791047, "grad_norm": 0.3059413679156015, "learning_rate": 3.348640251535352e-05, "loss": 0.4215, "step": 4755 }, { "epoch": 2.220149253731343, "grad_norm": 0.2813060973258817, "learning_rate": 3.3452941940362946e-05, "loss": 0.426, "step": 4760 }, { "epoch": 2.222481343283582, "grad_norm": 0.2931172429059915, "learning_rate": 3.341946720304812e-05, "loss": 0.4081, "step": 4765 }, { "epoch": 2.2248134328358207, "grad_norm": 0.28572413514067374, "learning_rate": 3.3385978383046996e-05, "loss": 0.4088, "step": 4770 }, { "epoch": 2.2271455223880596, "grad_norm": 0.28811896069722165, "learning_rate": 3.335247556003101e-05, "loss": 0.4226, "step": 4775 }, { "epoch": 2.2294776119402986, "grad_norm": 0.280713035566458, "learning_rate": 3.331895881370495e-05, "loss": 0.4083, "step": 4780 }, { "epoch": 2.231809701492537, "grad_norm": 0.2899028401042888, "learning_rate": 3.32854282238067e-05, "loss": 0.4221, "step": 4785 }, { "epoch": 2.234141791044776, "grad_norm": 0.2740089895988837, "learning_rate": 3.3251883870107066e-05, "loss": 0.4081, "step": 4790 }, { "epoch": 2.236473880597015, "grad_norm": 0.29839334016583247, "learning_rate": 3.3218325832409616e-05, "loss": 0.4265, "step": 4795 }, { "epoch": 2.2388059701492535, "grad_norm": 0.2747649481068881, "learning_rate": 3.3184754190550506e-05, "loss": 0.408, "step": 4800 }, { "epoch": 2.2411380597014925, "grad_norm": 0.2728134873027034, "learning_rate": 3.31511690243982e-05, "loss": 0.4243, "step": 4805 }, { "epoch": 2.2434701492537314, "grad_norm": 0.29621549643357226, "learning_rate": 3.3117570413853373e-05, "loss": 0.414, "step": 4810 }, { "epoch": 2.24580223880597, "grad_norm": 0.283174115917414, "learning_rate": 3.308395843884866e-05, "loss": 0.4145, "step": 4815 }, { "epoch": 2.248134328358209, "grad_norm": 0.28264304752865343, "learning_rate": 3.305033317934852e-05, "loss": 0.412, "step": 4820 }, { "epoch": 2.250466417910448, "grad_norm": 0.298121488132451, "learning_rate": 3.301669471534899e-05, "loss": 0.4122, "step": 4825 }, { "epoch": 2.2527985074626864, "grad_norm": 0.27848014096094087, "learning_rate": 3.298304312687754e-05, "loss": 0.4168, "step": 4830 }, { "epoch": 2.2551305970149254, "grad_norm": 0.31432047529164525, "learning_rate": 3.2949378493992854e-05, "loss": 0.4243, "step": 4835 }, { "epoch": 2.2574626865671643, "grad_norm": 0.2874725423615602, "learning_rate": 3.2915700896784655e-05, "loss": 0.4133, "step": 4840 }, { "epoch": 2.259794776119403, "grad_norm": 0.28204336099997873, "learning_rate": 3.288201041537348e-05, "loss": 0.4189, "step": 4845 }, { "epoch": 2.262126865671642, "grad_norm": 0.3037832069339439, "learning_rate": 3.284830712991057e-05, "loss": 0.4211, "step": 4850 }, { "epoch": 2.2644589552238807, "grad_norm": 0.281877806513229, "learning_rate": 3.281459112057759e-05, "loss": 0.3972, "step": 4855 }, { "epoch": 2.2667910447761193, "grad_norm": 0.29261172535107915, "learning_rate": 3.2780862467586486e-05, "loss": 0.4068, "step": 4860 }, { "epoch": 2.269123134328358, "grad_norm": 0.3045351607173564, "learning_rate": 3.2747121251179294e-05, "loss": 0.4164, "step": 4865 }, { "epoch": 2.271455223880597, "grad_norm": 0.282652839953475, "learning_rate": 3.271336755162792e-05, "loss": 0.4132, "step": 4870 }, { "epoch": 2.2737873134328357, "grad_norm": 0.3074390301651007, "learning_rate": 3.267960144923397e-05, "loss": 0.4245, "step": 4875 }, { "epoch": 2.2761194029850746, "grad_norm": 0.2958738568560361, "learning_rate": 3.264582302432856e-05, "loss": 0.4197, "step": 4880 }, { "epoch": 2.2784514925373136, "grad_norm": 0.2742844721716757, "learning_rate": 3.261203235727214e-05, "loss": 0.4038, "step": 4885 }, { "epoch": 2.280783582089552, "grad_norm": 0.30018453895381486, "learning_rate": 3.2578229528454266e-05, "loss": 0.4216, "step": 4890 }, { "epoch": 2.283115671641791, "grad_norm": 0.29739896496090495, "learning_rate": 3.254441461829344e-05, "loss": 0.4282, "step": 4895 }, { "epoch": 2.28544776119403, "grad_norm": 0.2861256332684493, "learning_rate": 3.251058770723688e-05, "loss": 0.4046, "step": 4900 }, { "epoch": 2.2877798507462686, "grad_norm": 0.28062797639378995, "learning_rate": 3.247674887576038e-05, "loss": 0.4024, "step": 4905 }, { "epoch": 2.2901119402985075, "grad_norm": 0.28692223224511243, "learning_rate": 3.2442898204368086e-05, "loss": 0.4104, "step": 4910 }, { "epoch": 2.2924440298507465, "grad_norm": 0.29068288005576576, "learning_rate": 3.240903577359232e-05, "loss": 0.4249, "step": 4915 }, { "epoch": 2.294776119402985, "grad_norm": 0.276238363217899, "learning_rate": 3.237516166399336e-05, "loss": 0.4109, "step": 4920 }, { "epoch": 2.297108208955224, "grad_norm": 0.29178494630318774, "learning_rate": 3.234127595615927e-05, "loss": 0.4018, "step": 4925 }, { "epoch": 2.299440298507463, "grad_norm": 0.28113019063245775, "learning_rate": 3.230737873070574e-05, "loss": 0.431, "step": 4930 }, { "epoch": 2.3017723880597014, "grad_norm": 0.29752843425004905, "learning_rate": 3.2273470068275816e-05, "loss": 0.4197, "step": 4935 }, { "epoch": 2.3041044776119404, "grad_norm": 0.2829418020096784, "learning_rate": 3.223955004953979e-05, "loss": 0.4177, "step": 4940 }, { "epoch": 2.3064365671641793, "grad_norm": 0.295566531410958, "learning_rate": 3.220561875519495e-05, "loss": 0.4157, "step": 4945 }, { "epoch": 2.308768656716418, "grad_norm": 0.28117721097906195, "learning_rate": 3.2171676265965415e-05, "loss": 0.4038, "step": 4950 }, { "epoch": 2.311100746268657, "grad_norm": 0.27368260654418836, "learning_rate": 3.2137722662601934e-05, "loss": 0.4046, "step": 4955 }, { "epoch": 2.3134328358208958, "grad_norm": 0.2641813634369348, "learning_rate": 3.21037580258817e-05, "loss": 0.4086, "step": 4960 }, { "epoch": 2.3157649253731343, "grad_norm": 0.277953490308689, "learning_rate": 3.206978243660817e-05, "loss": 0.4069, "step": 4965 }, { "epoch": 2.3180970149253732, "grad_norm": 0.2822738341850038, "learning_rate": 3.203579597561082e-05, "loss": 0.3918, "step": 4970 }, { "epoch": 2.3204291044776117, "grad_norm": 0.3045144166316868, "learning_rate": 3.200179872374503e-05, "loss": 0.4033, "step": 4975 }, { "epoch": 2.3227611940298507, "grad_norm": 0.27775668768674633, "learning_rate": 3.1967790761891826e-05, "loss": 0.4133, "step": 4980 }, { "epoch": 2.3250932835820897, "grad_norm": 0.2886189226909925, "learning_rate": 3.1933772170957716e-05, "loss": 0.4074, "step": 4985 }, { "epoch": 2.327425373134328, "grad_norm": 0.2768265978833099, "learning_rate": 3.189974303187452e-05, "loss": 0.4001, "step": 4990 }, { "epoch": 2.329757462686567, "grad_norm": 0.32922759406669644, "learning_rate": 3.186570342559912e-05, "loss": 0.4005, "step": 4995 }, { "epoch": 2.332089552238806, "grad_norm": 0.2837682808638154, "learning_rate": 3.1831653433113317e-05, "loss": 0.4065, "step": 5000 }, { "epoch": 2.3344216417910446, "grad_norm": 0.3054394846042669, "learning_rate": 3.179759313542362e-05, "loss": 0.4244, "step": 5005 }, { "epoch": 2.3367537313432836, "grad_norm": 0.279926107413865, "learning_rate": 3.176352261356105e-05, "loss": 0.4099, "step": 5010 }, { "epoch": 2.3390858208955225, "grad_norm": 0.27185638800771716, "learning_rate": 3.172944194858096e-05, "loss": 0.3942, "step": 5015 }, { "epoch": 2.341417910447761, "grad_norm": 0.30180978463253916, "learning_rate": 3.169535122156283e-05, "loss": 0.4231, "step": 5020 }, { "epoch": 2.34375, "grad_norm": 0.2854288081765936, "learning_rate": 3.166125051361007e-05, "loss": 0.4034, "step": 5025 }, { "epoch": 2.346082089552239, "grad_norm": 0.2884698683601375, "learning_rate": 3.162713990584983e-05, "loss": 0.4121, "step": 5030 }, { "epoch": 2.3484141791044775, "grad_norm": 0.27583665066088314, "learning_rate": 3.159301947943285e-05, "loss": 0.4198, "step": 5035 }, { "epoch": 2.3507462686567164, "grad_norm": 0.3126373897124649, "learning_rate": 3.155888931553319e-05, "loss": 0.4178, "step": 5040 }, { "epoch": 2.3530783582089554, "grad_norm": 0.2897024235219797, "learning_rate": 3.152474949534808e-05, "loss": 0.4121, "step": 5045 }, { "epoch": 2.355410447761194, "grad_norm": 0.2755452100355404, "learning_rate": 3.1490600100097746e-05, "loss": 0.4, "step": 5050 }, { "epoch": 2.357742537313433, "grad_norm": 0.29041520052571956, "learning_rate": 3.145644121102517e-05, "loss": 0.4106, "step": 5055 }, { "epoch": 2.360074626865672, "grad_norm": 0.2899280890509911, "learning_rate": 3.142227290939595e-05, "loss": 0.4146, "step": 5060 }, { "epoch": 2.3624067164179103, "grad_norm": 0.285337952542912, "learning_rate": 3.1388095276498013e-05, "loss": 0.4142, "step": 5065 }, { "epoch": 2.3647388059701493, "grad_norm": 0.2946518805148902, "learning_rate": 3.1353908393641574e-05, "loss": 0.4081, "step": 5070 }, { "epoch": 2.3670708955223883, "grad_norm": 0.28375300948262283, "learning_rate": 3.131971234215877e-05, "loss": 0.4112, "step": 5075 }, { "epoch": 2.3694029850746268, "grad_norm": 0.3035253701239221, "learning_rate": 3.128550720340362e-05, "loss": 0.4141, "step": 5080 }, { "epoch": 2.3717350746268657, "grad_norm": 0.2834693486604403, "learning_rate": 3.125129305875172e-05, "loss": 0.4166, "step": 5085 }, { "epoch": 2.3740671641791042, "grad_norm": 0.29223002722238856, "learning_rate": 3.1217069989600097e-05, "loss": 0.4243, "step": 5090 }, { "epoch": 2.376399253731343, "grad_norm": 0.2926067361321854, "learning_rate": 3.118283807736703e-05, "loss": 0.4163, "step": 5095 }, { "epoch": 2.378731343283582, "grad_norm": 0.2915469453930346, "learning_rate": 3.1148597403491816e-05, "loss": 0.4287, "step": 5100 }, { "epoch": 2.3810634328358207, "grad_norm": 0.28096999325653965, "learning_rate": 3.1114348049434583e-05, "loss": 0.4134, "step": 5105 }, { "epoch": 2.3833955223880596, "grad_norm": 0.27422227316379466, "learning_rate": 3.108009009667615e-05, "loss": 0.3971, "step": 5110 }, { "epoch": 2.3857276119402986, "grad_norm": 0.29994815078710574, "learning_rate": 3.104582362671778e-05, "loss": 0.4366, "step": 5115 }, { "epoch": 2.388059701492537, "grad_norm": 0.2698596228248138, "learning_rate": 3.1011548721080955e-05, "loss": 0.4025, "step": 5120 }, { "epoch": 2.390391791044776, "grad_norm": 0.2665850872966264, "learning_rate": 3.097726546130729e-05, "loss": 0.4062, "step": 5125 }, { "epoch": 2.392723880597015, "grad_norm": 0.2690518553636494, "learning_rate": 3.094297392895825e-05, "loss": 0.4124, "step": 5130 }, { "epoch": 2.3950559701492535, "grad_norm": 0.2969039669493591, "learning_rate": 3.090867420561495e-05, "loss": 0.4272, "step": 5135 }, { "epoch": 2.3973880597014925, "grad_norm": 0.3010487080930608, "learning_rate": 3.0874366372878036e-05, "loss": 0.432, "step": 5140 }, { "epoch": 2.3997201492537314, "grad_norm": 0.26903384809089204, "learning_rate": 3.0840050512367444e-05, "loss": 0.3955, "step": 5145 }, { "epoch": 2.40205223880597, "grad_norm": 0.273490278287781, "learning_rate": 3.0805726705722156e-05, "loss": 0.412, "step": 5150 }, { "epoch": 2.404384328358209, "grad_norm": 0.2786197828089237, "learning_rate": 3.077139503460012e-05, "loss": 0.4044, "step": 5155 }, { "epoch": 2.406716417910448, "grad_norm": 0.27655360077441493, "learning_rate": 3.073705558067797e-05, "loss": 0.419, "step": 5160 }, { "epoch": 2.4090485074626864, "grad_norm": 0.28030844779850084, "learning_rate": 3.070270842565084e-05, "loss": 0.4188, "step": 5165 }, { "epoch": 2.4113805970149254, "grad_norm": 0.28320723471401543, "learning_rate": 3.0668353651232226e-05, "loss": 0.4119, "step": 5170 }, { "epoch": 2.4137126865671643, "grad_norm": 0.28308707598912264, "learning_rate": 3.063399133915371e-05, "loss": 0.4034, "step": 5175 }, { "epoch": 2.416044776119403, "grad_norm": 0.2707636581420033, "learning_rate": 3.059962157116481e-05, "loss": 0.3926, "step": 5180 }, { "epoch": 2.418376865671642, "grad_norm": 0.28587463868343044, "learning_rate": 3.056524442903282e-05, "loss": 0.4185, "step": 5185 }, { "epoch": 2.4207089552238807, "grad_norm": 0.286026501129167, "learning_rate": 3.053085999454254e-05, "loss": 0.417, "step": 5190 }, { "epoch": 2.4230410447761193, "grad_norm": 0.2793548465337239, "learning_rate": 3.0496468349496115e-05, "loss": 0.4204, "step": 5195 }, { "epoch": 2.425373134328358, "grad_norm": 0.27963384866280944, "learning_rate": 3.046206957571288e-05, "loss": 0.4204, "step": 5200 }, { "epoch": 2.427705223880597, "grad_norm": 0.2756064164686805, "learning_rate": 3.0427663755029108e-05, "loss": 0.4084, "step": 5205 }, { "epoch": 2.4300373134328357, "grad_norm": 0.2779997502071945, "learning_rate": 3.0393250969297826e-05, "loss": 0.4154, "step": 5210 }, { "epoch": 2.4323694029850746, "grad_norm": 0.27305040164317934, "learning_rate": 3.0358831300388657e-05, "loss": 0.4254, "step": 5215 }, { "epoch": 2.4347014925373136, "grad_norm": 0.2940125906703864, "learning_rate": 3.0324404830187564e-05, "loss": 0.4116, "step": 5220 }, { "epoch": 2.437033582089552, "grad_norm": 0.2899419232336015, "learning_rate": 3.0289971640596737e-05, "loss": 0.4048, "step": 5225 }, { "epoch": 2.439365671641791, "grad_norm": 0.28289920459318507, "learning_rate": 3.0255531813534322e-05, "loss": 0.4019, "step": 5230 }, { "epoch": 2.44169776119403, "grad_norm": 0.2801297827139387, "learning_rate": 3.022108543093425e-05, "loss": 0.4182, "step": 5235 }, { "epoch": 2.4440298507462686, "grad_norm": 0.2797185886482608, "learning_rate": 3.0186632574746055e-05, "loss": 0.4138, "step": 5240 }, { "epoch": 2.4463619402985075, "grad_norm": 0.29051187663402706, "learning_rate": 3.0152173326934692e-05, "loss": 0.4129, "step": 5245 }, { "epoch": 2.4486940298507465, "grad_norm": 0.29061965535814815, "learning_rate": 3.0117707769480285e-05, "loss": 0.402, "step": 5250 }, { "epoch": 2.451026119402985, "grad_norm": 0.27919097210525046, "learning_rate": 3.008323598437802e-05, "loss": 0.4146, "step": 5255 }, { "epoch": 2.453358208955224, "grad_norm": 0.27387967959242243, "learning_rate": 3.0048758053637844e-05, "loss": 0.4058, "step": 5260 }, { "epoch": 2.455690298507463, "grad_norm": 0.2785060988404884, "learning_rate": 3.001427405928435e-05, "loss": 0.412, "step": 5265 }, { "epoch": 2.4580223880597014, "grad_norm": 0.2718636278675604, "learning_rate": 2.9979784083356567e-05, "loss": 0.4205, "step": 5270 }, { "epoch": 2.4603544776119404, "grad_norm": 0.2889429306316699, "learning_rate": 2.994528820790774e-05, "loss": 0.4192, "step": 5275 }, { "epoch": 2.4626865671641793, "grad_norm": 0.272595186091201, "learning_rate": 2.9910786515005146e-05, "loss": 0.4075, "step": 5280 }, { "epoch": 2.465018656716418, "grad_norm": 0.28724151209192234, "learning_rate": 2.987627908672992e-05, "loss": 0.4107, "step": 5285 }, { "epoch": 2.467350746268657, "grad_norm": 0.2798109206114257, "learning_rate": 2.9841766005176808e-05, "loss": 0.4153, "step": 5290 }, { "epoch": 2.4696828358208958, "grad_norm": 0.26845909997363254, "learning_rate": 2.9807247352454055e-05, "loss": 0.411, "step": 5295 }, { "epoch": 2.4720149253731343, "grad_norm": 0.28826189173677547, "learning_rate": 2.977272321068311e-05, "loss": 0.4166, "step": 5300 }, { "epoch": 2.4743470149253732, "grad_norm": 0.2912001478234137, "learning_rate": 2.9738193661998526e-05, "loss": 0.406, "step": 5305 }, { "epoch": 2.4766791044776117, "grad_norm": 0.2762305785925404, "learning_rate": 2.9703658788547674e-05, "loss": 0.4032, "step": 5310 }, { "epoch": 2.4790111940298507, "grad_norm": 0.26653070578673477, "learning_rate": 2.9669118672490627e-05, "loss": 0.4145, "step": 5315 }, { "epoch": 2.4813432835820897, "grad_norm": 0.2863920248449496, "learning_rate": 2.9634573395999916e-05, "loss": 0.4376, "step": 5320 }, { "epoch": 2.483675373134328, "grad_norm": 0.28304711234868335, "learning_rate": 2.9600023041260355e-05, "loss": 0.3949, "step": 5325 }, { "epoch": 2.486007462686567, "grad_norm": 0.281305045250197, "learning_rate": 2.9565467690468834e-05, "loss": 0.4117, "step": 5330 }, { "epoch": 2.488339552238806, "grad_norm": 0.270975649685555, "learning_rate": 2.953090742583413e-05, "loss": 0.4067, "step": 5335 }, { "epoch": 2.4906716417910446, "grad_norm": 0.2960558887534767, "learning_rate": 2.949634232957671e-05, "loss": 0.4258, "step": 5340 }, { "epoch": 2.4930037313432836, "grad_norm": 0.29266302887281775, "learning_rate": 2.9461772483928547e-05, "loss": 0.4121, "step": 5345 }, { "epoch": 2.4953358208955225, "grad_norm": 0.27916248623782414, "learning_rate": 2.9427197971132886e-05, "loss": 0.4121, "step": 5350 }, { "epoch": 2.497667910447761, "grad_norm": 0.27675343730611807, "learning_rate": 2.9392618873444112e-05, "loss": 0.4145, "step": 5355 }, { "epoch": 2.5, "grad_norm": 0.28919269153632127, "learning_rate": 2.9358035273127483e-05, "loss": 0.4113, "step": 5360 }, { "epoch": 2.502332089552239, "grad_norm": 0.2891928211462449, "learning_rate": 2.9323447252458986e-05, "loss": 0.4052, "step": 5365 }, { "epoch": 2.5046641791044775, "grad_norm": 0.295739992547136, "learning_rate": 2.9288854893725128e-05, "loss": 0.4178, "step": 5370 }, { "epoch": 2.5069962686567164, "grad_norm": 0.2778464143088341, "learning_rate": 2.9254258279222724e-05, "loss": 0.4114, "step": 5375 }, { "epoch": 2.5093283582089554, "grad_norm": 0.2863320332550059, "learning_rate": 2.921965749125873e-05, "loss": 0.4224, "step": 5380 }, { "epoch": 2.511660447761194, "grad_norm": 0.2796082085080106, "learning_rate": 2.9185052612150004e-05, "loss": 0.4116, "step": 5385 }, { "epoch": 2.513992537313433, "grad_norm": 0.293872741321864, "learning_rate": 2.9150443724223174e-05, "loss": 0.4083, "step": 5390 }, { "epoch": 2.5163246268656714, "grad_norm": 0.2841304283933895, "learning_rate": 2.9115830909814374e-05, "loss": 0.4155, "step": 5395 }, { "epoch": 2.5186567164179103, "grad_norm": 0.3044938711586594, "learning_rate": 2.9081214251269095e-05, "loss": 0.4162, "step": 5400 }, { "epoch": 2.5209888059701493, "grad_norm": 0.28560716141656156, "learning_rate": 2.904659383094197e-05, "loss": 0.4182, "step": 5405 }, { "epoch": 2.523320895522388, "grad_norm": 0.27985480179977357, "learning_rate": 2.9011969731196565e-05, "loss": 0.4038, "step": 5410 }, { "epoch": 2.5256529850746268, "grad_norm": 0.28346041394700144, "learning_rate": 2.897734203440524e-05, "loss": 0.3995, "step": 5415 }, { "epoch": 2.5279850746268657, "grad_norm": 0.3036031056892552, "learning_rate": 2.894271082294887e-05, "loss": 0.4168, "step": 5420 }, { "epoch": 2.5303171641791042, "grad_norm": 0.2881290656483007, "learning_rate": 2.8908076179216715e-05, "loss": 0.3989, "step": 5425 }, { "epoch": 2.532649253731343, "grad_norm": 0.2894984276593726, "learning_rate": 2.8873438185606194e-05, "loss": 0.4152, "step": 5430 }, { "epoch": 2.534981343283582, "grad_norm": 0.2667291751962678, "learning_rate": 2.8838796924522694e-05, "loss": 0.4101, "step": 5435 }, { "epoch": 2.5373134328358207, "grad_norm": 0.3051411835422506, "learning_rate": 2.8804152478379377e-05, "loss": 0.4179, "step": 5440 }, { "epoch": 2.5396455223880596, "grad_norm": 0.2903418609575051, "learning_rate": 2.8769504929596986e-05, "loss": 0.4057, "step": 5445 }, { "epoch": 2.5419776119402986, "grad_norm": 0.2862755248530218, "learning_rate": 2.8734854360603646e-05, "loss": 0.4253, "step": 5450 }, { "epoch": 2.544309701492537, "grad_norm": 0.276296409809769, "learning_rate": 2.870020085383466e-05, "loss": 0.4042, "step": 5455 }, { "epoch": 2.546641791044776, "grad_norm": 0.29574012251645554, "learning_rate": 2.8665544491732315e-05, "loss": 0.4215, "step": 5460 }, { "epoch": 2.548973880597015, "grad_norm": 0.29793312097768215, "learning_rate": 2.8630885356745716e-05, "loss": 0.4256, "step": 5465 }, { "epoch": 2.5513059701492535, "grad_norm": 0.30154186807140637, "learning_rate": 2.859622353133054e-05, "loss": 0.4036, "step": 5470 }, { "epoch": 2.5536380597014925, "grad_norm": 0.28859173630686297, "learning_rate": 2.8561559097948863e-05, "loss": 0.4196, "step": 5475 }, { "epoch": 2.5559701492537314, "grad_norm": 0.29324586353741733, "learning_rate": 2.852689213906899e-05, "loss": 0.415, "step": 5480 }, { "epoch": 2.55830223880597, "grad_norm": 0.27211043707371596, "learning_rate": 2.849222273716522e-05, "loss": 0.3965, "step": 5485 }, { "epoch": 2.560634328358209, "grad_norm": 0.2860570477037576, "learning_rate": 2.8457550974717655e-05, "loss": 0.4161, "step": 5490 }, { "epoch": 2.562966417910448, "grad_norm": 0.2902630248354323, "learning_rate": 2.8422876934212027e-05, "loss": 0.4092, "step": 5495 }, { "epoch": 2.5652985074626864, "grad_norm": 0.2864719052145671, "learning_rate": 2.8388200698139484e-05, "loss": 0.4239, "step": 5500 }, { "epoch": 2.5676305970149254, "grad_norm": 0.27692153023764576, "learning_rate": 2.8353522348996388e-05, "loss": 0.4072, "step": 5505 }, { "epoch": 2.5699626865671643, "grad_norm": 0.2653822901709693, "learning_rate": 2.8318841969284145e-05, "loss": 0.4155, "step": 5510 }, { "epoch": 2.572294776119403, "grad_norm": 0.28472816326939837, "learning_rate": 2.8284159641508972e-05, "loss": 0.4094, "step": 5515 }, { "epoch": 2.574626865671642, "grad_norm": 0.31374644208042596, "learning_rate": 2.824947544818175e-05, "loss": 0.4093, "step": 5520 }, { "epoch": 2.5769589552238807, "grad_norm": 0.28907979869724953, "learning_rate": 2.8214789471817754e-05, "loss": 0.409, "step": 5525 }, { "epoch": 2.5792910447761193, "grad_norm": 0.2765191952721562, "learning_rate": 2.8180101794936542e-05, "loss": 0.4235, "step": 5530 }, { "epoch": 2.581623134328358, "grad_norm": 0.28165337816905683, "learning_rate": 2.8145412500061702e-05, "loss": 0.4074, "step": 5535 }, { "epoch": 2.583955223880597, "grad_norm": 0.27406497909290994, "learning_rate": 2.8110721669720663e-05, "loss": 0.4105, "step": 5540 }, { "epoch": 2.5862873134328357, "grad_norm": 0.2855005389725674, "learning_rate": 2.8076029386444524e-05, "loss": 0.4051, "step": 5545 }, { "epoch": 2.5886194029850746, "grad_norm": 0.30352887983172216, "learning_rate": 2.804133573276783e-05, "loss": 0.4197, "step": 5550 }, { "epoch": 2.5909514925373136, "grad_norm": 0.3056166787627263, "learning_rate": 2.800664079122839e-05, "loss": 0.4062, "step": 5555 }, { "epoch": 2.593283582089552, "grad_norm": 0.2867043712327039, "learning_rate": 2.7971944644367066e-05, "loss": 0.4124, "step": 5560 }, { "epoch": 2.595615671641791, "grad_norm": 0.2911292757152944, "learning_rate": 2.79372473747276e-05, "loss": 0.4065, "step": 5565 }, { "epoch": 2.59794776119403, "grad_norm": 0.29257223418200046, "learning_rate": 2.7902549064856405e-05, "loss": 0.4187, "step": 5570 }, { "epoch": 2.6002798507462686, "grad_norm": 0.279366591514459, "learning_rate": 2.7867849797302357e-05, "loss": 0.423, "step": 5575 }, { "epoch": 2.6026119402985075, "grad_norm": 0.28156039699964514, "learning_rate": 2.7833149654616637e-05, "loss": 0.4179, "step": 5580 }, { "epoch": 2.6049440298507465, "grad_norm": 0.3042048077887387, "learning_rate": 2.7798448719352467e-05, "loss": 0.4077, "step": 5585 }, { "epoch": 2.607276119402985, "grad_norm": 0.2968325147942465, "learning_rate": 2.7763747074065e-05, "loss": 0.4051, "step": 5590 }, { "epoch": 2.609608208955224, "grad_norm": 0.27546321496151016, "learning_rate": 2.7729044801311032e-05, "loss": 0.4077, "step": 5595 }, { "epoch": 2.611940298507463, "grad_norm": 0.3132370975356417, "learning_rate": 2.7694341983648884e-05, "loss": 0.4094, "step": 5600 }, { "epoch": 2.6142723880597014, "grad_norm": 0.2985687158099164, "learning_rate": 2.7659638703638173e-05, "loss": 0.4233, "step": 5605 }, { "epoch": 2.6166044776119404, "grad_norm": 0.28648261352353926, "learning_rate": 2.76249350438396e-05, "loss": 0.4104, "step": 5610 }, { "epoch": 2.6189365671641793, "grad_norm": 0.2985524558666485, "learning_rate": 2.7590231086814782e-05, "loss": 0.415, "step": 5615 }, { "epoch": 2.621268656716418, "grad_norm": 0.28173110703312193, "learning_rate": 2.7555526915126033e-05, "loss": 0.3981, "step": 5620 }, { "epoch": 2.623600746268657, "grad_norm": 0.27296364044258065, "learning_rate": 2.7520822611336176e-05, "loss": 0.4008, "step": 5625 }, { "epoch": 2.6259328358208958, "grad_norm": 0.2690360276152885, "learning_rate": 2.7486118258008374e-05, "loss": 0.4018, "step": 5630 }, { "epoch": 2.6282649253731343, "grad_norm": 0.2676482576845998, "learning_rate": 2.7451413937705878e-05, "loss": 0.3996, "step": 5635 }, { "epoch": 2.6305970149253732, "grad_norm": 0.2824074503955977, "learning_rate": 2.7416709732991863e-05, "loss": 0.4158, "step": 5640 }, { "epoch": 2.632929104477612, "grad_norm": 0.2669455037136894, "learning_rate": 2.7382005726429256e-05, "loss": 0.4113, "step": 5645 }, { "epoch": 2.6352611940298507, "grad_norm": 0.2812601155294345, "learning_rate": 2.7347302000580475e-05, "loss": 0.416, "step": 5650 }, { "epoch": 2.6375932835820897, "grad_norm": 0.3034684383954149, "learning_rate": 2.7312598638007308e-05, "loss": 0.4047, "step": 5655 }, { "epoch": 2.6399253731343286, "grad_norm": 0.29449519228968557, "learning_rate": 2.727789572127064e-05, "loss": 0.4224, "step": 5660 }, { "epoch": 2.642257462686567, "grad_norm": 0.25706282265145036, "learning_rate": 2.724319333293033e-05, "loss": 0.4031, "step": 5665 }, { "epoch": 2.644589552238806, "grad_norm": 0.26346309892487835, "learning_rate": 2.7208491555544964e-05, "loss": 0.4079, "step": 5670 }, { "epoch": 2.6469216417910446, "grad_norm": 0.2862179794472313, "learning_rate": 2.7173790471671662e-05, "loss": 0.4156, "step": 5675 }, { "epoch": 2.6492537313432836, "grad_norm": 0.28999236646436255, "learning_rate": 2.7139090163865932e-05, "loss": 0.4147, "step": 5680 }, { "epoch": 2.6515858208955225, "grad_norm": 0.28844716555697525, "learning_rate": 2.7104390714681393e-05, "loss": 0.4201, "step": 5685 }, { "epoch": 2.653917910447761, "grad_norm": 0.3095265631285178, "learning_rate": 2.7069692206669633e-05, "loss": 0.4175, "step": 5690 }, { "epoch": 2.65625, "grad_norm": 0.28013870811075176, "learning_rate": 2.7034994722380036e-05, "loss": 0.4158, "step": 5695 }, { "epoch": 2.658582089552239, "grad_norm": 0.2797690435217867, "learning_rate": 2.7000298344359494e-05, "loss": 0.4014, "step": 5700 }, { "epoch": 2.6609141791044775, "grad_norm": 0.2943187411412463, "learning_rate": 2.6965603155152302e-05, "loss": 0.4134, "step": 5705 }, { "epoch": 2.6632462686567164, "grad_norm": 0.2940132675917417, "learning_rate": 2.6930909237299934e-05, "loss": 0.4183, "step": 5710 }, { "epoch": 2.6655783582089554, "grad_norm": 0.2811997753260035, "learning_rate": 2.6896216673340814e-05, "loss": 0.4102, "step": 5715 }, { "epoch": 2.667910447761194, "grad_norm": 0.2856257771269045, "learning_rate": 2.686152554581016e-05, "loss": 0.4284, "step": 5720 }, { "epoch": 2.670242537313433, "grad_norm": 0.2930136555941714, "learning_rate": 2.682683593723977e-05, "loss": 0.4122, "step": 5725 }, { "epoch": 2.6725746268656714, "grad_norm": 0.27474058410083874, "learning_rate": 2.6792147930157812e-05, "loss": 0.4042, "step": 5730 }, { "epoch": 2.6749067164179103, "grad_norm": 0.2892002405425968, "learning_rate": 2.6757461607088692e-05, "loss": 0.415, "step": 5735 }, { "epoch": 2.6772388059701493, "grad_norm": 0.2975159305537817, "learning_rate": 2.6722777050552737e-05, "loss": 0.4238, "step": 5740 }, { "epoch": 2.679570895522388, "grad_norm": 0.27949534759912076, "learning_rate": 2.668809434306615e-05, "loss": 0.4141, "step": 5745 }, { "epoch": 2.6819029850746268, "grad_norm": 0.33228322279106837, "learning_rate": 2.6653413567140668e-05, "loss": 0.4149, "step": 5750 }, { "epoch": 2.6842350746268657, "grad_norm": 0.28018849008992913, "learning_rate": 2.661873480528347e-05, "loss": 0.4071, "step": 5755 }, { "epoch": 2.6865671641791042, "grad_norm": 0.27904157705623367, "learning_rate": 2.6584058139996942e-05, "loss": 0.4215, "step": 5760 }, { "epoch": 2.688899253731343, "grad_norm": 0.2690268102699883, "learning_rate": 2.654938365377847e-05, "loss": 0.4097, "step": 5765 }, { "epoch": 2.691231343283582, "grad_norm": 0.2848496606484559, "learning_rate": 2.651471142912026e-05, "loss": 0.4124, "step": 5770 }, { "epoch": 2.6935634328358207, "grad_norm": 0.2819800621977774, "learning_rate": 2.6480041548509137e-05, "loss": 0.4066, "step": 5775 }, { "epoch": 2.6958955223880596, "grad_norm": 0.2752947238055661, "learning_rate": 2.644537409442635e-05, "loss": 0.3982, "step": 5780 }, { "epoch": 2.6982276119402986, "grad_norm": 0.2851708828309587, "learning_rate": 2.6410709149347385e-05, "loss": 0.409, "step": 5785 }, { "epoch": 2.700559701492537, "grad_norm": 0.27582020176579347, "learning_rate": 2.6376046795741733e-05, "loss": 0.4075, "step": 5790 }, { "epoch": 2.702891791044776, "grad_norm": 0.27094950828861036, "learning_rate": 2.6341387116072763e-05, "loss": 0.4004, "step": 5795 }, { "epoch": 2.705223880597015, "grad_norm": 0.26626199583050103, "learning_rate": 2.630673019279742e-05, "loss": 0.4052, "step": 5800 }, { "epoch": 2.7075559701492535, "grad_norm": 0.2780641156394685, "learning_rate": 2.6272076108366163e-05, "loss": 0.409, "step": 5805 }, { "epoch": 2.7098880597014925, "grad_norm": 0.31554513537562723, "learning_rate": 2.623742494522264e-05, "loss": 0.4162, "step": 5810 }, { "epoch": 2.7122201492537314, "grad_norm": 0.29162289545767994, "learning_rate": 2.620277678580358e-05, "loss": 0.4217, "step": 5815 }, { "epoch": 2.71455223880597, "grad_norm": 0.30115957339701943, "learning_rate": 2.616813171253855e-05, "loss": 0.4109, "step": 5820 }, { "epoch": 2.716884328358209, "grad_norm": 0.2728025274428326, "learning_rate": 2.6133489807849786e-05, "loss": 0.393, "step": 5825 }, { "epoch": 2.719216417910448, "grad_norm": 0.27739879726231587, "learning_rate": 2.609885115415198e-05, "loss": 0.4006, "step": 5830 }, { "epoch": 2.7215485074626864, "grad_norm": 0.2838470031218356, "learning_rate": 2.6064215833852113e-05, "loss": 0.4164, "step": 5835 }, { "epoch": 2.7238805970149254, "grad_norm": 0.2806398888601681, "learning_rate": 2.602958392934917e-05, "loss": 0.4073, "step": 5840 }, { "epoch": 2.7262126865671643, "grad_norm": 0.27647042511319375, "learning_rate": 2.5994955523034098e-05, "loss": 0.4067, "step": 5845 }, { "epoch": 2.728544776119403, "grad_norm": 0.2985486410590037, "learning_rate": 2.5960330697289447e-05, "loss": 0.4222, "step": 5850 }, { "epoch": 2.730876865671642, "grad_norm": 0.25670922036870814, "learning_rate": 2.5925709534489295e-05, "loss": 0.3985, "step": 5855 }, { "epoch": 2.7332089552238807, "grad_norm": 0.273585312045461, "learning_rate": 2.589109211699899e-05, "loss": 0.4108, "step": 5860 }, { "epoch": 2.7355410447761193, "grad_norm": 0.27569045155004457, "learning_rate": 2.5856478527174955e-05, "loss": 0.4067, "step": 5865 }, { "epoch": 2.737873134328358, "grad_norm": 0.2952661527029023, "learning_rate": 2.5821868847364534e-05, "loss": 0.4129, "step": 5870 }, { "epoch": 2.740205223880597, "grad_norm": 0.27913552337919617, "learning_rate": 2.578726315990576e-05, "loss": 0.4152, "step": 5875 }, { "epoch": 2.7425373134328357, "grad_norm": 0.27645680873293765, "learning_rate": 2.575266154712715e-05, "loss": 0.4103, "step": 5880 }, { "epoch": 2.7448694029850746, "grad_norm": 0.2920431452453726, "learning_rate": 2.571806409134756e-05, "loss": 0.4313, "step": 5885 }, { "epoch": 2.7472014925373136, "grad_norm": 0.2795411735452722, "learning_rate": 2.5683470874875913e-05, "loss": 0.4203, "step": 5890 }, { "epoch": 2.749533582089552, "grad_norm": 0.293790750731234, "learning_rate": 2.564888198001109e-05, "loss": 0.4014, "step": 5895 }, { "epoch": 2.751865671641791, "grad_norm": 0.2901650542584019, "learning_rate": 2.5614297489041673e-05, "loss": 0.4216, "step": 5900 }, { "epoch": 2.75419776119403, "grad_norm": 0.2642768095436784, "learning_rate": 2.5579717484245756e-05, "loss": 0.3909, "step": 5905 }, { "epoch": 2.7565298507462686, "grad_norm": 0.2927024500556146, "learning_rate": 2.554514204789078e-05, "loss": 0.4164, "step": 5910 }, { "epoch": 2.7588619402985075, "grad_norm": 0.2959140046547698, "learning_rate": 2.551057126223329e-05, "loss": 0.4093, "step": 5915 }, { "epoch": 2.7611940298507465, "grad_norm": 0.2727602256413882, "learning_rate": 2.54760052095188e-05, "loss": 0.4116, "step": 5920 }, { "epoch": 2.763526119402985, "grad_norm": 0.2866022430150923, "learning_rate": 2.544144397198155e-05, "loss": 0.4112, "step": 5925 }, { "epoch": 2.765858208955224, "grad_norm": 0.27130649578383254, "learning_rate": 2.5406887631844312e-05, "loss": 0.4155, "step": 5930 }, { "epoch": 2.768190298507463, "grad_norm": 0.2841338621062099, "learning_rate": 2.5372336271318225e-05, "loss": 0.4148, "step": 5935 }, { "epoch": 2.7705223880597014, "grad_norm": 0.28408543549899484, "learning_rate": 2.5337789972602566e-05, "loss": 0.4134, "step": 5940 }, { "epoch": 2.7728544776119404, "grad_norm": 0.2889653215395783, "learning_rate": 2.530324881788459e-05, "loss": 0.4244, "step": 5945 }, { "epoch": 2.7751865671641793, "grad_norm": 0.2713795251198719, "learning_rate": 2.5268712889339296e-05, "loss": 0.3997, "step": 5950 }, { "epoch": 2.777518656716418, "grad_norm": 0.27549897395536704, "learning_rate": 2.5234182269129253e-05, "loss": 0.4112, "step": 5955 }, { "epoch": 2.779850746268657, "grad_norm": 0.2592251976098974, "learning_rate": 2.519965703940441e-05, "loss": 0.4027, "step": 5960 }, { "epoch": 2.7821828358208958, "grad_norm": 0.2782935244608504, "learning_rate": 2.5165137282301877e-05, "loss": 0.4167, "step": 5965 }, { "epoch": 2.7845149253731343, "grad_norm": 0.27066599513779727, "learning_rate": 2.5130623079945754e-05, "loss": 0.4009, "step": 5970 }, { "epoch": 2.7868470149253732, "grad_norm": 0.2742891736600173, "learning_rate": 2.5096114514446934e-05, "loss": 0.3975, "step": 5975 }, { "epoch": 2.789179104477612, "grad_norm": 0.2672327035602551, "learning_rate": 2.5061611667902878e-05, "loss": 0.4143, "step": 5980 }, { "epoch": 2.7915111940298507, "grad_norm": 0.2887518035034608, "learning_rate": 2.5027114622397473e-05, "loss": 0.4169, "step": 5985 }, { "epoch": 2.7938432835820897, "grad_norm": 0.28189038855450327, "learning_rate": 2.4992623460000763e-05, "loss": 0.4234, "step": 5990 }, { "epoch": 2.7961753731343286, "grad_norm": 0.2786487780773611, "learning_rate": 2.495813826276884e-05, "loss": 0.412, "step": 5995 }, { "epoch": 2.798507462686567, "grad_norm": 0.2719145853057059, "learning_rate": 2.4923659112743576e-05, "loss": 0.4038, "step": 6000 }, { "epoch": 2.800839552238806, "grad_norm": 0.28628815227823545, "learning_rate": 2.4889186091952444e-05, "loss": 0.422, "step": 6005 }, { "epoch": 2.8031716417910446, "grad_norm": 0.2723191517372468, "learning_rate": 2.485471928240839e-05, "loss": 0.4099, "step": 6010 }, { "epoch": 2.8055037313432836, "grad_norm": 0.27234552020102326, "learning_rate": 2.4820258766109515e-05, "loss": 0.4031, "step": 6015 }, { "epoch": 2.8078358208955225, "grad_norm": 0.27535451636920344, "learning_rate": 2.4785804625039005e-05, "loss": 0.4095, "step": 6020 }, { "epoch": 2.810167910447761, "grad_norm": 0.27388942690615536, "learning_rate": 2.4751356941164855e-05, "loss": 0.4153, "step": 6025 }, { "epoch": 2.8125, "grad_norm": 0.30022023654850966, "learning_rate": 2.4716915796439678e-05, "loss": 0.4148, "step": 6030 }, { "epoch": 2.814832089552239, "grad_norm": 0.2853605005989762, "learning_rate": 2.4682481272800572e-05, "loss": 0.4117, "step": 6035 }, { "epoch": 2.8171641791044775, "grad_norm": 0.27850833798474206, "learning_rate": 2.4648053452168857e-05, "loss": 0.4063, "step": 6040 }, { "epoch": 2.8194962686567164, "grad_norm": 0.28022304120694014, "learning_rate": 2.4613632416449893e-05, "loss": 0.4042, "step": 6045 }, { "epoch": 2.8218283582089554, "grad_norm": 0.26850900759003576, "learning_rate": 2.4579218247532947e-05, "loss": 0.4039, "step": 6050 }, { "epoch": 2.824160447761194, "grad_norm": 0.29441160018662854, "learning_rate": 2.4544811027290893e-05, "loss": 0.4125, "step": 6055 }, { "epoch": 2.826492537313433, "grad_norm": 0.2821462383420028, "learning_rate": 2.4510410837580106e-05, "loss": 0.4083, "step": 6060 }, { "epoch": 2.8288246268656714, "grad_norm": 0.2668705297066501, "learning_rate": 2.447601776024024e-05, "loss": 0.387, "step": 6065 }, { "epoch": 2.8311567164179103, "grad_norm": 0.26822185455445424, "learning_rate": 2.4441631877093995e-05, "loss": 0.4035, "step": 6070 }, { "epoch": 2.8334888059701493, "grad_norm": 0.28542217389622304, "learning_rate": 2.4407253269947006e-05, "loss": 0.4239, "step": 6075 }, { "epoch": 2.835820895522388, "grad_norm": 0.27797192550616395, "learning_rate": 2.437288202058755e-05, "loss": 0.4238, "step": 6080 }, { "epoch": 2.8381529850746268, "grad_norm": 0.2925242797976724, "learning_rate": 2.4338518210786416e-05, "loss": 0.425, "step": 6085 }, { "epoch": 2.8404850746268657, "grad_norm": 0.28831398853519075, "learning_rate": 2.430416192229672e-05, "loss": 0.4175, "step": 6090 }, { "epoch": 2.8428171641791042, "grad_norm": 0.29353608995620356, "learning_rate": 2.4269813236853632e-05, "loss": 0.409, "step": 6095 }, { "epoch": 2.845149253731343, "grad_norm": 0.28621297644531635, "learning_rate": 2.423547223617429e-05, "loss": 0.4098, "step": 6100 }, { "epoch": 2.847481343283582, "grad_norm": 0.2953980427056792, "learning_rate": 2.42011390019575e-05, "loss": 0.4071, "step": 6105 }, { "epoch": 2.8498134328358207, "grad_norm": 0.2754264861976559, "learning_rate": 2.4166813615883625e-05, "loss": 0.3986, "step": 6110 }, { "epoch": 2.8521455223880596, "grad_norm": 0.2951236268952993, "learning_rate": 2.4132496159614366e-05, "loss": 0.4152, "step": 6115 }, { "epoch": 2.8544776119402986, "grad_norm": 0.27554678619833345, "learning_rate": 2.4098186714792504e-05, "loss": 0.4072, "step": 6120 }, { "epoch": 2.856809701492537, "grad_norm": 0.2914017018490711, "learning_rate": 2.4063885363041822e-05, "loss": 0.4257, "step": 6125 }, { "epoch": 2.859141791044776, "grad_norm": 0.2800604474920407, "learning_rate": 2.4029592185966804e-05, "loss": 0.4149, "step": 6130 }, { "epoch": 2.861473880597015, "grad_norm": 0.28548957069923653, "learning_rate": 2.399530726515251e-05, "loss": 0.405, "step": 6135 }, { "epoch": 2.8638059701492535, "grad_norm": 0.2774201042557407, "learning_rate": 2.396103068216437e-05, "loss": 0.4112, "step": 6140 }, { "epoch": 2.8661380597014925, "grad_norm": 0.273388980880744, "learning_rate": 2.3926762518547928e-05, "loss": 0.3976, "step": 6145 }, { "epoch": 2.8684701492537314, "grad_norm": 0.2804872162667335, "learning_rate": 2.3892502855828762e-05, "loss": 0.4131, "step": 6150 }, { "epoch": 2.87080223880597, "grad_norm": 0.27949669719196674, "learning_rate": 2.3858251775512176e-05, "loss": 0.4188, "step": 6155 }, { "epoch": 2.873134328358209, "grad_norm": 0.25983655843989606, "learning_rate": 2.3824009359083073e-05, "loss": 0.4107, "step": 6160 }, { "epoch": 2.875466417910448, "grad_norm": 0.2754673105506416, "learning_rate": 2.378977568800576e-05, "loss": 0.4205, "step": 6165 }, { "epoch": 2.8777985074626864, "grad_norm": 0.28518601105020835, "learning_rate": 2.375555084372371e-05, "loss": 0.4191, "step": 6170 }, { "epoch": 2.8801305970149254, "grad_norm": 0.28788519456011885, "learning_rate": 2.3721334907659424e-05, "loss": 0.4155, "step": 6175 }, { "epoch": 2.8824626865671643, "grad_norm": 0.2770491640874757, "learning_rate": 2.368712796121419e-05, "loss": 0.394, "step": 6180 }, { "epoch": 2.884794776119403, "grad_norm": 0.2814497527622503, "learning_rate": 2.3652930085767904e-05, "loss": 0.4068, "step": 6185 }, { "epoch": 2.887126865671642, "grad_norm": 0.27003535724671546, "learning_rate": 2.3618741362678915e-05, "loss": 0.4051, "step": 6190 }, { "epoch": 2.8894589552238807, "grad_norm": 0.28587410487025533, "learning_rate": 2.358456187328376e-05, "loss": 0.4, "step": 6195 }, { "epoch": 2.8917910447761193, "grad_norm": 0.28006737047252983, "learning_rate": 2.355039169889704e-05, "loss": 0.3964, "step": 6200 }, { "epoch": 2.894123134328358, "grad_norm": 0.27039253822544085, "learning_rate": 2.3516230920811166e-05, "loss": 0.4069, "step": 6205 }, { "epoch": 2.896455223880597, "grad_norm": 0.27152563572079286, "learning_rate": 2.3482079620296223e-05, "loss": 0.3973, "step": 6210 }, { "epoch": 2.8987873134328357, "grad_norm": 0.2622421448419082, "learning_rate": 2.3447937878599725e-05, "loss": 0.4132, "step": 6215 }, { "epoch": 2.9011194029850746, "grad_norm": 0.28811310790730704, "learning_rate": 2.3413805776946453e-05, "loss": 0.416, "step": 6220 }, { "epoch": 2.9034514925373136, "grad_norm": 0.27430743895091675, "learning_rate": 2.337968339653826e-05, "loss": 0.4136, "step": 6225 }, { "epoch": 2.905783582089552, "grad_norm": 0.27918670253148936, "learning_rate": 2.3345570818553874e-05, "loss": 0.415, "step": 6230 }, { "epoch": 2.908115671641791, "grad_norm": 0.28554768365250915, "learning_rate": 2.331146812414869e-05, "loss": 0.4052, "step": 6235 }, { "epoch": 2.91044776119403, "grad_norm": 0.2940493856184246, "learning_rate": 2.3277375394454594e-05, "loss": 0.4188, "step": 6240 }, { "epoch": 2.9127798507462686, "grad_norm": 0.27488954873907623, "learning_rate": 2.324329271057976e-05, "loss": 0.4104, "step": 6245 }, { "epoch": 2.9151119402985075, "grad_norm": 0.28880902465659813, "learning_rate": 2.3209220153608486e-05, "loss": 0.4178, "step": 6250 }, { "epoch": 2.9174440298507465, "grad_norm": 0.29052244267674837, "learning_rate": 2.3175157804600954e-05, "loss": 0.4222, "step": 6255 }, { "epoch": 2.919776119402985, "grad_norm": 0.2627300930416156, "learning_rate": 2.3141105744593065e-05, "loss": 0.3983, "step": 6260 }, { "epoch": 2.922108208955224, "grad_norm": 0.280243129650194, "learning_rate": 2.310706405459625e-05, "loss": 0.3902, "step": 6265 }, { "epoch": 2.924440298507463, "grad_norm": 0.2781334130034801, "learning_rate": 2.3073032815597263e-05, "loss": 0.3998, "step": 6270 }, { "epoch": 2.9267723880597014, "grad_norm": 0.272955033103825, "learning_rate": 2.3039012108557982e-05, "loss": 0.3972, "step": 6275 }, { "epoch": 2.9291044776119404, "grad_norm": 0.28126265525899574, "learning_rate": 2.3005002014415274e-05, "loss": 0.4096, "step": 6280 }, { "epoch": 2.9314365671641793, "grad_norm": 0.2789063156339077, "learning_rate": 2.297100261408069e-05, "loss": 0.4257, "step": 6285 }, { "epoch": 2.933768656716418, "grad_norm": 0.28078572532746543, "learning_rate": 2.2937013988440405e-05, "loss": 0.4153, "step": 6290 }, { "epoch": 2.936100746268657, "grad_norm": 0.281368167429435, "learning_rate": 2.2903036218354912e-05, "loss": 0.4071, "step": 6295 }, { "epoch": 2.9384328358208958, "grad_norm": 0.2703619206258202, "learning_rate": 2.2869069384658908e-05, "loss": 0.4165, "step": 6300 }, { "epoch": 2.9407649253731343, "grad_norm": 0.26845853459048463, "learning_rate": 2.283511356816106e-05, "loss": 0.4165, "step": 6305 }, { "epoch": 2.9430970149253732, "grad_norm": 0.2885452454815897, "learning_rate": 2.280116884964383e-05, "loss": 0.3965, "step": 6310 }, { "epoch": 2.945429104477612, "grad_norm": 0.27554312886701704, "learning_rate": 2.276723530986327e-05, "loss": 0.42, "step": 6315 }, { "epoch": 2.9477611940298507, "grad_norm": 0.27776439348336873, "learning_rate": 2.273331302954883e-05, "loss": 0.4111, "step": 6320 }, { "epoch": 2.9500932835820897, "grad_norm": 0.2786780529733663, "learning_rate": 2.26994020894032e-05, "loss": 0.4089, "step": 6325 }, { "epoch": 2.9524253731343286, "grad_norm": 0.2792189653311061, "learning_rate": 2.266550257010207e-05, "loss": 0.4058, "step": 6330 }, { "epoch": 2.954757462686567, "grad_norm": 0.2822817511378771, "learning_rate": 2.2631614552293963e-05, "loss": 0.4059, "step": 6335 }, { "epoch": 2.957089552238806, "grad_norm": 0.2966584941970794, "learning_rate": 2.2597738116600048e-05, "loss": 0.4081, "step": 6340 }, { "epoch": 2.9594216417910446, "grad_norm": 0.2726401926914794, "learning_rate": 2.2563873343613916e-05, "loss": 0.4102, "step": 6345 }, { "epoch": 2.9617537313432836, "grad_norm": 0.2766867578012419, "learning_rate": 2.2530020313901446e-05, "loss": 0.4113, "step": 6350 }, { "epoch": 2.9640858208955225, "grad_norm": 0.273401769171858, "learning_rate": 2.249617910800056e-05, "loss": 0.4, "step": 6355 }, { "epoch": 2.966417910447761, "grad_norm": 0.2906625571583108, "learning_rate": 2.2462349806421035e-05, "loss": 0.4084, "step": 6360 }, { "epoch": 2.96875, "grad_norm": 0.2852874501292654, "learning_rate": 2.2428532489644368e-05, "loss": 0.4027, "step": 6365 }, { "epoch": 2.971082089552239, "grad_norm": 0.28738659379610354, "learning_rate": 2.2394727238123497e-05, "loss": 0.4128, "step": 6370 }, { "epoch": 2.9734141791044775, "grad_norm": 0.28162509035492167, "learning_rate": 2.236093413228269e-05, "loss": 0.4191, "step": 6375 }, { "epoch": 2.9757462686567164, "grad_norm": 0.268894067022804, "learning_rate": 2.2327153252517323e-05, "loss": 0.416, "step": 6380 }, { "epoch": 2.9780783582089554, "grad_norm": 0.28601838537920177, "learning_rate": 2.2293384679193645e-05, "loss": 0.4163, "step": 6385 }, { "epoch": 2.980410447761194, "grad_norm": 0.2784046526239699, "learning_rate": 2.2259628492648676e-05, "loss": 0.4213, "step": 6390 }, { "epoch": 2.982742537313433, "grad_norm": 0.30020855587450185, "learning_rate": 2.2225884773189936e-05, "loss": 0.4084, "step": 6395 }, { "epoch": 2.9850746268656714, "grad_norm": 0.27350322779970815, "learning_rate": 2.2192153601095293e-05, "loss": 0.4065, "step": 6400 }, { "epoch": 2.9874067164179103, "grad_norm": 0.2867640977109047, "learning_rate": 2.2158435056612775e-05, "loss": 0.4088, "step": 6405 }, { "epoch": 2.9897388059701493, "grad_norm": 0.2825535767474217, "learning_rate": 2.2124729219960343e-05, "loss": 0.4004, "step": 6410 }, { "epoch": 2.992070895522388, "grad_norm": 0.2802532264395162, "learning_rate": 2.2091036171325754e-05, "loss": 0.4182, "step": 6415 }, { "epoch": 2.9944029850746268, "grad_norm": 0.27606126866272457, "learning_rate": 2.2057355990866328e-05, "loss": 0.3903, "step": 6420 }, { "epoch": 2.9967350746268657, "grad_norm": 0.2992510407621408, "learning_rate": 2.2023688758708767e-05, "loss": 0.4232, "step": 6425 }, { "epoch": 2.9990671641791042, "grad_norm": 0.2961903818676689, "learning_rate": 2.199003455494898e-05, "loss": 0.4181, "step": 6430 }, { "epoch": 3.001399253731343, "grad_norm": 0.29447608965123995, "learning_rate": 2.1956393459651864e-05, "loss": 0.3603, "step": 6435 }, { "epoch": 3.003731343283582, "grad_norm": 0.3455794601417332, "learning_rate": 2.1922765552851155e-05, "loss": 0.3571, "step": 6440 }, { "epoch": 3.006063432835821, "grad_norm": 0.29839339850864605, "learning_rate": 2.1889150914549195e-05, "loss": 0.3572, "step": 6445 }, { "epoch": 3.0083955223880596, "grad_norm": 0.2983698363796104, "learning_rate": 2.1855549624716755e-05, "loss": 0.3578, "step": 6450 }, { "epoch": 3.0107276119402986, "grad_norm": 0.2979562037621808, "learning_rate": 2.182196176329287e-05, "loss": 0.3583, "step": 6455 }, { "epoch": 3.013059701492537, "grad_norm": 0.2892786140279995, "learning_rate": 2.1788387410184603e-05, "loss": 0.3484, "step": 6460 }, { "epoch": 3.015391791044776, "grad_norm": 0.29390305693348817, "learning_rate": 2.1754826645266895e-05, "loss": 0.3504, "step": 6465 }, { "epoch": 3.017723880597015, "grad_norm": 0.29901633330822297, "learning_rate": 2.172127954838238e-05, "loss": 0.359, "step": 6470 }, { "epoch": 3.0200559701492535, "grad_norm": 0.2789710410650097, "learning_rate": 2.1687746199341118e-05, "loss": 0.3463, "step": 6475 }, { "epoch": 3.0223880597014925, "grad_norm": 0.2963856176949294, "learning_rate": 2.165422667792053e-05, "loss": 0.3484, "step": 6480 }, { "epoch": 3.0247201492537314, "grad_norm": 0.2919256718078638, "learning_rate": 2.162072106386509e-05, "loss": 0.3547, "step": 6485 }, { "epoch": 3.02705223880597, "grad_norm": 0.3100798615711843, "learning_rate": 2.158722943688621e-05, "loss": 0.3534, "step": 6490 }, { "epoch": 3.029384328358209, "grad_norm": 0.30040176737252433, "learning_rate": 2.1553751876662014e-05, "loss": 0.3598, "step": 6495 }, { "epoch": 3.031716417910448, "grad_norm": 0.30082506062590647, "learning_rate": 2.1520288462837175e-05, "loss": 0.3567, "step": 6500 }, { "epoch": 3.0340485074626864, "grad_norm": 0.3000515122135271, "learning_rate": 2.148683927502269e-05, "loss": 0.351, "step": 6505 }, { "epoch": 3.0363805970149254, "grad_norm": 0.28579595143852315, "learning_rate": 2.1453404392795735e-05, "loss": 0.3552, "step": 6510 }, { "epoch": 3.0387126865671643, "grad_norm": 0.30209809935248516, "learning_rate": 2.1419983895699437e-05, "loss": 0.3555, "step": 6515 }, { "epoch": 3.041044776119403, "grad_norm": 0.3032678191228412, "learning_rate": 2.1386577863242708e-05, "loss": 0.3587, "step": 6520 }, { "epoch": 3.043376865671642, "grad_norm": 0.2923717618432948, "learning_rate": 2.135318637490004e-05, "loss": 0.353, "step": 6525 }, { "epoch": 3.0457089552238807, "grad_norm": 0.29094999055387305, "learning_rate": 2.131980951011134e-05, "loss": 0.3471, "step": 6530 }, { "epoch": 3.0480410447761193, "grad_norm": 0.290577265042397, "learning_rate": 2.1286447348281695e-05, "loss": 0.3562, "step": 6535 }, { "epoch": 3.050373134328358, "grad_norm": 0.29568071360146014, "learning_rate": 2.1253099968781237e-05, "loss": 0.3606, "step": 6540 }, { "epoch": 3.052705223880597, "grad_norm": 0.3127875610676049, "learning_rate": 2.1219767450944938e-05, "loss": 0.3561, "step": 6545 }, { "epoch": 3.0550373134328357, "grad_norm": 0.29583046406332286, "learning_rate": 2.1186449874072385e-05, "loss": 0.359, "step": 6550 }, { "epoch": 3.0573694029850746, "grad_norm": 0.29410336443585583, "learning_rate": 2.115314731742764e-05, "loss": 0.3502, "step": 6555 }, { "epoch": 3.0597014925373136, "grad_norm": 0.3078855148116754, "learning_rate": 2.1119859860239023e-05, "loss": 0.3551, "step": 6560 }, { "epoch": 3.062033582089552, "grad_norm": 0.30468395748571503, "learning_rate": 2.108658758169893e-05, "loss": 0.3432, "step": 6565 }, { "epoch": 3.064365671641791, "grad_norm": 0.29890200470101985, "learning_rate": 2.105333056096367e-05, "loss": 0.3529, "step": 6570 }, { "epoch": 3.06669776119403, "grad_norm": 0.29080684806906665, "learning_rate": 2.1020088877153215e-05, "loss": 0.3423, "step": 6575 }, { "epoch": 3.0690298507462686, "grad_norm": 0.3097531024507575, "learning_rate": 2.0986862609351077e-05, "loss": 0.3584, "step": 6580 }, { "epoch": 3.0713619402985075, "grad_norm": 0.30316614170818473, "learning_rate": 2.0953651836604083e-05, "loss": 0.3592, "step": 6585 }, { "epoch": 3.0736940298507465, "grad_norm": 0.3155692103883952, "learning_rate": 2.0920456637922194e-05, "loss": 0.3547, "step": 6590 }, { "epoch": 3.076026119402985, "grad_norm": 0.30067457508880385, "learning_rate": 2.088727709227833e-05, "loss": 0.3552, "step": 6595 }, { "epoch": 3.078358208955224, "grad_norm": 0.29600198957184537, "learning_rate": 2.085411327860815e-05, "loss": 0.3583, "step": 6600 }, { "epoch": 3.080690298507463, "grad_norm": 0.30844352273172254, "learning_rate": 2.0820965275809913e-05, "loss": 0.3533, "step": 6605 }, { "epoch": 3.0830223880597014, "grad_norm": 0.29052588804988433, "learning_rate": 2.0787833162744257e-05, "loss": 0.3542, "step": 6610 }, { "epoch": 3.0853544776119404, "grad_norm": 0.2867141374871356, "learning_rate": 2.0754717018234003e-05, "loss": 0.3599, "step": 6615 }, { "epoch": 3.0876865671641793, "grad_norm": 0.3234908170210263, "learning_rate": 2.072161692106399e-05, "loss": 0.3661, "step": 6620 }, { "epoch": 3.090018656716418, "grad_norm": 0.31554727624628637, "learning_rate": 2.0688532949980882e-05, "loss": 0.3527, "step": 6625 }, { "epoch": 3.092350746268657, "grad_norm": 0.29801561092221956, "learning_rate": 2.0655465183692972e-05, "loss": 0.343, "step": 6630 }, { "epoch": 3.0946828358208953, "grad_norm": 0.3036097791219128, "learning_rate": 2.0622413700870026e-05, "loss": 0.3581, "step": 6635 }, { "epoch": 3.0970149253731343, "grad_norm": 0.3189472543996483, "learning_rate": 2.0589378580143016e-05, "loss": 0.3579, "step": 6640 }, { "epoch": 3.0993470149253732, "grad_norm": 0.2976173557848646, "learning_rate": 2.0556359900104054e-05, "loss": 0.3553, "step": 6645 }, { "epoch": 3.1016791044776117, "grad_norm": 0.29259763711258235, "learning_rate": 2.0523357739306087e-05, "loss": 0.3587, "step": 6650 }, { "epoch": 3.1040111940298507, "grad_norm": 0.3006248239553923, "learning_rate": 2.049037217626279e-05, "loss": 0.3674, "step": 6655 }, { "epoch": 3.1063432835820897, "grad_norm": 0.29352228439662065, "learning_rate": 2.0457403289448353e-05, "loss": 0.3541, "step": 6660 }, { "epoch": 3.108675373134328, "grad_norm": 0.2961940658971492, "learning_rate": 2.0424451157297264e-05, "loss": 0.3542, "step": 6665 }, { "epoch": 3.111007462686567, "grad_norm": 0.290049340482157, "learning_rate": 2.0391515858204184e-05, "loss": 0.3508, "step": 6670 }, { "epoch": 3.113339552238806, "grad_norm": 0.30384121278658505, "learning_rate": 2.0358597470523706e-05, "loss": 0.3567, "step": 6675 }, { "epoch": 3.1156716417910446, "grad_norm": 0.2904261269557149, "learning_rate": 2.0325696072570195e-05, "loss": 0.3499, "step": 6680 }, { "epoch": 3.1180037313432836, "grad_norm": 0.30665085997334063, "learning_rate": 2.0292811742617607e-05, "loss": 0.3551, "step": 6685 }, { "epoch": 3.1203358208955225, "grad_norm": 0.3012630149466457, "learning_rate": 2.0259944558899274e-05, "loss": 0.3596, "step": 6690 }, { "epoch": 3.122667910447761, "grad_norm": 0.29914131490926643, "learning_rate": 2.022709459960776e-05, "loss": 0.3578, "step": 6695 }, { "epoch": 3.125, "grad_norm": 0.3005799635263445, "learning_rate": 2.0194261942894628e-05, "loss": 0.354, "step": 6700 }, { "epoch": 3.127332089552239, "grad_norm": 0.2946536199624025, "learning_rate": 2.016144666687029e-05, "loss": 0.3567, "step": 6705 }, { "epoch": 3.1296641791044775, "grad_norm": 0.2965017063959669, "learning_rate": 2.0128648849603798e-05, "loss": 0.3419, "step": 6710 }, { "epoch": 3.1319962686567164, "grad_norm": 0.3210844018432783, "learning_rate": 2.009586856912269e-05, "loss": 0.3494, "step": 6715 }, { "epoch": 3.1343283582089554, "grad_norm": 0.30539425074075616, "learning_rate": 2.006310590341276e-05, "loss": 0.3498, "step": 6720 }, { "epoch": 3.136660447761194, "grad_norm": 0.3089313715361165, "learning_rate": 2.00303609304179e-05, "loss": 0.3676, "step": 6725 }, { "epoch": 3.138992537313433, "grad_norm": 0.2866234733628717, "learning_rate": 1.9997633728039933e-05, "loss": 0.3568, "step": 6730 }, { "epoch": 3.141324626865672, "grad_norm": 0.2829313288884777, "learning_rate": 1.996492437413838e-05, "loss": 0.3491, "step": 6735 }, { "epoch": 3.1436567164179103, "grad_norm": 0.28957969372300074, "learning_rate": 1.99322329465303e-05, "loss": 0.3518, "step": 6740 }, { "epoch": 3.1459888059701493, "grad_norm": 0.29690568529927974, "learning_rate": 1.989955952299012e-05, "loss": 0.3588, "step": 6745 }, { "epoch": 3.1483208955223883, "grad_norm": 0.31041497087069536, "learning_rate": 1.986690418124942e-05, "loss": 0.357, "step": 6750 }, { "epoch": 3.1506529850746268, "grad_norm": 0.28719082167290644, "learning_rate": 1.983426699899677e-05, "loss": 0.3514, "step": 6755 }, { "epoch": 3.1529850746268657, "grad_norm": 0.28973405339250263, "learning_rate": 1.9801648053877548e-05, "loss": 0.3583, "step": 6760 }, { "epoch": 3.1553171641791047, "grad_norm": 0.3025138616470345, "learning_rate": 1.9769047423493707e-05, "loss": 0.3431, "step": 6765 }, { "epoch": 3.157649253731343, "grad_norm": 0.2992747076101132, "learning_rate": 1.9736465185403675e-05, "loss": 0.3557, "step": 6770 }, { "epoch": 3.159981343283582, "grad_norm": 0.2980866387033325, "learning_rate": 1.9703901417122106e-05, "loss": 0.3588, "step": 6775 }, { "epoch": 3.1623134328358207, "grad_norm": 0.31429916830712795, "learning_rate": 1.96713561961197e-05, "loss": 0.3557, "step": 6780 }, { "epoch": 3.1646455223880596, "grad_norm": 0.2969627700809447, "learning_rate": 1.9638829599823056e-05, "loss": 0.3601, "step": 6785 }, { "epoch": 3.1669776119402986, "grad_norm": 0.3016229284531387, "learning_rate": 1.9606321705614427e-05, "loss": 0.3661, "step": 6790 }, { "epoch": 3.169309701492537, "grad_norm": 0.2996671120171106, "learning_rate": 1.957383259083162e-05, "loss": 0.353, "step": 6795 }, { "epoch": 3.171641791044776, "grad_norm": 0.29953874692442234, "learning_rate": 1.9541362332767737e-05, "loss": 0.3617, "step": 6800 }, { "epoch": 3.173973880597015, "grad_norm": 0.29798395780821113, "learning_rate": 1.950891100867102e-05, "loss": 0.3614, "step": 6805 }, { "epoch": 3.1763059701492535, "grad_norm": 0.29693447605592366, "learning_rate": 1.9476478695744683e-05, "loss": 0.3464, "step": 6810 }, { "epoch": 3.1786380597014925, "grad_norm": 0.3021593605869337, "learning_rate": 1.944406547114667e-05, "loss": 0.3393, "step": 6815 }, { "epoch": 3.1809701492537314, "grad_norm": 0.3115430377376789, "learning_rate": 1.9411671411989568e-05, "loss": 0.3651, "step": 6820 }, { "epoch": 3.18330223880597, "grad_norm": 0.29574269341167575, "learning_rate": 1.937929659534034e-05, "loss": 0.3513, "step": 6825 }, { "epoch": 3.185634328358209, "grad_norm": 0.30458990095504884, "learning_rate": 1.9346941098220157e-05, "loss": 0.3555, "step": 6830 }, { "epoch": 3.187966417910448, "grad_norm": 0.2889301935054613, "learning_rate": 1.931460499760426e-05, "loss": 0.3545, "step": 6835 }, { "epoch": 3.1902985074626864, "grad_norm": 0.28622291784133747, "learning_rate": 1.9282288370421708e-05, "loss": 0.3564, "step": 6840 }, { "epoch": 3.1926305970149254, "grad_norm": 0.29924446087839024, "learning_rate": 1.9249991293555276e-05, "loss": 0.3474, "step": 6845 }, { "epoch": 3.1949626865671643, "grad_norm": 0.30677374520233397, "learning_rate": 1.9217713843841195e-05, "loss": 0.3417, "step": 6850 }, { "epoch": 3.197294776119403, "grad_norm": 0.3148998715161739, "learning_rate": 1.9185456098068998e-05, "loss": 0.355, "step": 6855 }, { "epoch": 3.199626865671642, "grad_norm": 0.2937117801207235, "learning_rate": 1.9153218132981375e-05, "loss": 0.3431, "step": 6860 }, { "epoch": 3.2019589552238807, "grad_norm": 0.29009130936505945, "learning_rate": 1.912100002527392e-05, "loss": 0.3499, "step": 6865 }, { "epoch": 3.2042910447761193, "grad_norm": 0.29537675494856114, "learning_rate": 1.9088801851595008e-05, "loss": 0.3578, "step": 6870 }, { "epoch": 3.206623134328358, "grad_norm": 0.2858903892018682, "learning_rate": 1.9056623688545588e-05, "loss": 0.3478, "step": 6875 }, { "epoch": 3.208955223880597, "grad_norm": 0.3241333633993836, "learning_rate": 1.9024465612678993e-05, "loss": 0.3577, "step": 6880 }, { "epoch": 3.2112873134328357, "grad_norm": 0.28589251207963373, "learning_rate": 1.8992327700500772e-05, "loss": 0.3597, "step": 6885 }, { "epoch": 3.2136194029850746, "grad_norm": 0.3035438052986818, "learning_rate": 1.8960210028468512e-05, "loss": 0.3495, "step": 6890 }, { "epoch": 3.2159514925373136, "grad_norm": 0.28910591792630547, "learning_rate": 1.8928112672991626e-05, "loss": 0.3504, "step": 6895 }, { "epoch": 3.218283582089552, "grad_norm": 0.3037201698001736, "learning_rate": 1.8896035710431225e-05, "loss": 0.3612, "step": 6900 }, { "epoch": 3.220615671641791, "grad_norm": 0.28882676056459866, "learning_rate": 1.8863979217099874e-05, "loss": 0.3659, "step": 6905 }, { "epoch": 3.22294776119403, "grad_norm": 0.2989668551793024, "learning_rate": 1.8831943269261467e-05, "loss": 0.3545, "step": 6910 }, { "epoch": 3.2252798507462686, "grad_norm": 0.29193133471757204, "learning_rate": 1.8799927943130986e-05, "loss": 0.3603, "step": 6915 }, { "epoch": 3.2276119402985075, "grad_norm": 0.30949296435681634, "learning_rate": 1.8767933314874382e-05, "loss": 0.3633, "step": 6920 }, { "epoch": 3.2299440298507465, "grad_norm": 0.3065242706339426, "learning_rate": 1.8735959460608364e-05, "loss": 0.369, "step": 6925 }, { "epoch": 3.232276119402985, "grad_norm": 0.2953858643806452, "learning_rate": 1.8704006456400202e-05, "loss": 0.366, "step": 6930 }, { "epoch": 3.234608208955224, "grad_norm": 0.3018249414875586, "learning_rate": 1.8672074378267573e-05, "loss": 0.3556, "step": 6935 }, { "epoch": 3.236940298507463, "grad_norm": 0.3005298801189957, "learning_rate": 1.8640163302178377e-05, "loss": 0.3617, "step": 6940 }, { "epoch": 3.2392723880597014, "grad_norm": 0.28458140255596587, "learning_rate": 1.8608273304050515e-05, "loss": 0.3671, "step": 6945 }, { "epoch": 3.2416044776119404, "grad_norm": 0.29491303982906303, "learning_rate": 1.8576404459751796e-05, "loss": 0.3568, "step": 6950 }, { "epoch": 3.2439365671641793, "grad_norm": 0.30018786507788925, "learning_rate": 1.8544556845099657e-05, "loss": 0.3596, "step": 6955 }, { "epoch": 3.246268656716418, "grad_norm": 0.28218276847430507, "learning_rate": 1.851273053586105e-05, "loss": 0.355, "step": 6960 }, { "epoch": 3.248600746268657, "grad_norm": 0.30337390779467155, "learning_rate": 1.8480925607752248e-05, "loss": 0.3634, "step": 6965 }, { "epoch": 3.2509328358208958, "grad_norm": 0.2850643019325219, "learning_rate": 1.8449142136438628e-05, "loss": 0.3642, "step": 6970 }, { "epoch": 3.2532649253731343, "grad_norm": 0.30795699046928965, "learning_rate": 1.8417380197534558e-05, "loss": 0.3593, "step": 6975 }, { "epoch": 3.2555970149253732, "grad_norm": 0.2969312938375177, "learning_rate": 1.8385639866603144e-05, "loss": 0.3598, "step": 6980 }, { "epoch": 3.2579291044776117, "grad_norm": 0.3111317706880131, "learning_rate": 1.8353921219156102e-05, "loss": 0.3672, "step": 6985 }, { "epoch": 3.2602611940298507, "grad_norm": 0.293661219738354, "learning_rate": 1.8322224330653576e-05, "loss": 0.3587, "step": 6990 }, { "epoch": 3.2625932835820897, "grad_norm": 0.29492244148923374, "learning_rate": 1.8290549276503915e-05, "loss": 0.3625, "step": 6995 }, { "epoch": 3.264925373134328, "grad_norm": 0.2936169682190564, "learning_rate": 1.825889613206355e-05, "loss": 0.3578, "step": 7000 }, { "epoch": 3.267257462686567, "grad_norm": 0.28325832009960206, "learning_rate": 1.8227264972636758e-05, "loss": 0.3416, "step": 7005 }, { "epoch": 3.269589552238806, "grad_norm": 0.28991524900544435, "learning_rate": 1.8195655873475554e-05, "loss": 0.3695, "step": 7010 }, { "epoch": 3.2719216417910446, "grad_norm": 0.29631605420318874, "learning_rate": 1.8164068909779437e-05, "loss": 0.357, "step": 7015 }, { "epoch": 3.2742537313432836, "grad_norm": 0.2951203632471497, "learning_rate": 1.8132504156695245e-05, "loss": 0.3646, "step": 7020 }, { "epoch": 3.2765858208955225, "grad_norm": 0.29822315612233086, "learning_rate": 1.8100961689317003e-05, "loss": 0.3563, "step": 7025 }, { "epoch": 3.278917910447761, "grad_norm": 0.30523298724271275, "learning_rate": 1.806944158268568e-05, "loss": 0.35, "step": 7030 }, { "epoch": 3.28125, "grad_norm": 0.2893648521599262, "learning_rate": 1.803794391178908e-05, "loss": 0.3537, "step": 7035 }, { "epoch": 3.283582089552239, "grad_norm": 0.30598195630866526, "learning_rate": 1.8006468751561628e-05, "loss": 0.3548, "step": 7040 }, { "epoch": 3.2859141791044775, "grad_norm": 0.2881423074803722, "learning_rate": 1.797501617688417e-05, "loss": 0.3469, "step": 7045 }, { "epoch": 3.2882462686567164, "grad_norm": 0.28597811785300375, "learning_rate": 1.7943586262583846e-05, "loss": 0.3595, "step": 7050 }, { "epoch": 3.2905783582089554, "grad_norm": 0.29221752262721756, "learning_rate": 1.791217908343386e-05, "loss": 0.3636, "step": 7055 }, { "epoch": 3.292910447761194, "grad_norm": 0.2983849711552808, "learning_rate": 1.7880794714153366e-05, "loss": 0.3561, "step": 7060 }, { "epoch": 3.295242537313433, "grad_norm": 0.29908190509128885, "learning_rate": 1.784943322940722e-05, "loss": 0.3603, "step": 7065 }, { "epoch": 3.297574626865672, "grad_norm": 0.3026869235356606, "learning_rate": 1.7818094703805837e-05, "loss": 0.3525, "step": 7070 }, { "epoch": 3.2999067164179103, "grad_norm": 0.2957506159452286, "learning_rate": 1.7786779211905048e-05, "loss": 0.3689, "step": 7075 }, { "epoch": 3.3022388059701493, "grad_norm": 0.301492493788461, "learning_rate": 1.775548682820582e-05, "loss": 0.3575, "step": 7080 }, { "epoch": 3.3045708955223883, "grad_norm": 0.2982471245972684, "learning_rate": 1.7724217627154204e-05, "loss": 0.341, "step": 7085 }, { "epoch": 3.3069029850746268, "grad_norm": 0.2824816660797434, "learning_rate": 1.7692971683141063e-05, "loss": 0.3552, "step": 7090 }, { "epoch": 3.3092350746268657, "grad_norm": 0.30590967771629135, "learning_rate": 1.766174907050196e-05, "loss": 0.3489, "step": 7095 }, { "epoch": 3.3115671641791042, "grad_norm": 0.2993562561790773, "learning_rate": 1.7630549863516914e-05, "loss": 0.3596, "step": 7100 }, { "epoch": 3.313899253731343, "grad_norm": 0.29634582089281203, "learning_rate": 1.75993741364103e-05, "loss": 0.3507, "step": 7105 }, { "epoch": 3.316231343283582, "grad_norm": 0.28979586820760966, "learning_rate": 1.7568221963350605e-05, "loss": 0.3526, "step": 7110 }, { "epoch": 3.3185634328358207, "grad_norm": 0.2998540775071929, "learning_rate": 1.7537093418450294e-05, "loss": 0.3652, "step": 7115 }, { "epoch": 3.3208955223880596, "grad_norm": 0.2884637652647664, "learning_rate": 1.750598857576561e-05, "loss": 0.3579, "step": 7120 }, { "epoch": 3.3232276119402986, "grad_norm": 0.3020758480237682, "learning_rate": 1.7474907509296412e-05, "loss": 0.3575, "step": 7125 }, { "epoch": 3.325559701492537, "grad_norm": 0.2994130658722527, "learning_rate": 1.7443850292986007e-05, "loss": 0.3633, "step": 7130 }, { "epoch": 3.327891791044776, "grad_norm": 0.3022593992134436, "learning_rate": 1.7412817000720937e-05, "loss": 0.3692, "step": 7135 }, { "epoch": 3.330223880597015, "grad_norm": 0.2989695268525266, "learning_rate": 1.738180770633085e-05, "loss": 0.351, "step": 7140 }, { "epoch": 3.3325559701492535, "grad_norm": 0.3016182900510012, "learning_rate": 1.7350822483588277e-05, "loss": 0.37, "step": 7145 }, { "epoch": 3.3348880597014925, "grad_norm": 0.29528198780478276, "learning_rate": 1.7319861406208504e-05, "loss": 0.3479, "step": 7150 }, { "epoch": 3.3372201492537314, "grad_norm": 0.30763309702601777, "learning_rate": 1.728892454784938e-05, "loss": 0.358, "step": 7155 }, { "epoch": 3.33955223880597, "grad_norm": 0.289355823489564, "learning_rate": 1.7258011982111094e-05, "loss": 0.356, "step": 7160 }, { "epoch": 3.341884328358209, "grad_norm": 0.291997999864154, "learning_rate": 1.72271237825361e-05, "loss": 0.3507, "step": 7165 }, { "epoch": 3.344216417910448, "grad_norm": 0.2862786767923253, "learning_rate": 1.7196260022608828e-05, "loss": 0.3577, "step": 7170 }, { "epoch": 3.3465485074626864, "grad_norm": 0.29916374705043713, "learning_rate": 1.716542077575561e-05, "loss": 0.3617, "step": 7175 }, { "epoch": 3.3488805970149254, "grad_norm": 0.29293315198292913, "learning_rate": 1.7134606115344427e-05, "loss": 0.3669, "step": 7180 }, { "epoch": 3.3512126865671643, "grad_norm": 0.2965563505699907, "learning_rate": 1.710381611468479e-05, "loss": 0.3485, "step": 7185 }, { "epoch": 3.353544776119403, "grad_norm": 0.28617524302166936, "learning_rate": 1.7073050847027537e-05, "loss": 0.3509, "step": 7190 }, { "epoch": 3.355876865671642, "grad_norm": 0.29980421076951325, "learning_rate": 1.704231038556465e-05, "loss": 0.3628, "step": 7195 }, { "epoch": 3.3582089552238807, "grad_norm": 0.29749248017489394, "learning_rate": 1.701159480342911e-05, "loss": 0.3614, "step": 7200 }, { "epoch": 3.3605410447761193, "grad_norm": 0.3033475269047319, "learning_rate": 1.6980904173694727e-05, "loss": 0.3543, "step": 7205 }, { "epoch": 3.362873134328358, "grad_norm": 0.3104454034130757, "learning_rate": 1.695023856937591e-05, "loss": 0.3596, "step": 7210 }, { "epoch": 3.365205223880597, "grad_norm": 0.2891899425126918, "learning_rate": 1.691959806342756e-05, "loss": 0.3543, "step": 7215 }, { "epoch": 3.3675373134328357, "grad_norm": 0.2830918264381227, "learning_rate": 1.688898272874485e-05, "loss": 0.3526, "step": 7220 }, { "epoch": 3.3698694029850746, "grad_norm": 0.29642244772042775, "learning_rate": 1.685839263816308e-05, "loss": 0.3564, "step": 7225 }, { "epoch": 3.3722014925373136, "grad_norm": 0.3027726761517845, "learning_rate": 1.68278278644575e-05, "loss": 0.3528, "step": 7230 }, { "epoch": 3.374533582089552, "grad_norm": 0.29970322770662877, "learning_rate": 1.679728848034311e-05, "loss": 0.372, "step": 7235 }, { "epoch": 3.376865671641791, "grad_norm": 0.310164620010251, "learning_rate": 1.6766774558474523e-05, "loss": 0.3622, "step": 7240 }, { "epoch": 3.37919776119403, "grad_norm": 0.28976992598803347, "learning_rate": 1.6736286171445763e-05, "loss": 0.3508, "step": 7245 }, { "epoch": 3.3815298507462686, "grad_norm": 0.3055185755758128, "learning_rate": 1.670582339179012e-05, "loss": 0.3562, "step": 7250 }, { "epoch": 3.3838619402985075, "grad_norm": 0.29315170621686065, "learning_rate": 1.667538629197996e-05, "loss": 0.3573, "step": 7255 }, { "epoch": 3.3861940298507465, "grad_norm": 0.31256247191430114, "learning_rate": 1.664497494442654e-05, "loss": 0.3661, "step": 7260 }, { "epoch": 3.388526119402985, "grad_norm": 0.2927451773553934, "learning_rate": 1.6614589421479876e-05, "loss": 0.3566, "step": 7265 }, { "epoch": 3.390858208955224, "grad_norm": 0.3057016980465087, "learning_rate": 1.6584229795428514e-05, "loss": 0.3703, "step": 7270 }, { "epoch": 3.393190298507463, "grad_norm": 0.28887104263183544, "learning_rate": 1.655389613849943e-05, "loss": 0.3536, "step": 7275 }, { "epoch": 3.3955223880597014, "grad_norm": 0.30096723143735166, "learning_rate": 1.6523588522857784e-05, "loss": 0.352, "step": 7280 }, { "epoch": 3.3978544776119404, "grad_norm": 0.2963666471500546, "learning_rate": 1.6493307020606796e-05, "loss": 0.3576, "step": 7285 }, { "epoch": 3.4001865671641793, "grad_norm": 0.3153839312827231, "learning_rate": 1.6463051703787557e-05, "loss": 0.364, "step": 7290 }, { "epoch": 3.402518656716418, "grad_norm": 0.308337115378691, "learning_rate": 1.6432822644378888e-05, "loss": 0.3556, "step": 7295 }, { "epoch": 3.404850746268657, "grad_norm": 0.2908645436082711, "learning_rate": 1.6402619914297087e-05, "loss": 0.3606, "step": 7300 }, { "epoch": 3.4071828358208958, "grad_norm": 0.3053957448604142, "learning_rate": 1.6372443585395875e-05, "loss": 0.3545, "step": 7305 }, { "epoch": 3.4095149253731343, "grad_norm": 0.2997207426350879, "learning_rate": 1.634229372946611e-05, "loss": 0.3483, "step": 7310 }, { "epoch": 3.4118470149253732, "grad_norm": 0.288922523926626, "learning_rate": 1.6312170418235705e-05, "loss": 0.3705, "step": 7315 }, { "epoch": 3.4141791044776117, "grad_norm": 0.28517609910564695, "learning_rate": 1.6282073723369427e-05, "loss": 0.3498, "step": 7320 }, { "epoch": 3.4165111940298507, "grad_norm": 0.2968035508891607, "learning_rate": 1.625200371646867e-05, "loss": 0.3488, "step": 7325 }, { "epoch": 3.4188432835820897, "grad_norm": 0.30421710948514274, "learning_rate": 1.622196046907141e-05, "loss": 0.3586, "step": 7330 }, { "epoch": 3.421175373134328, "grad_norm": 0.28737596048474884, "learning_rate": 1.619194405265189e-05, "loss": 0.3452, "step": 7335 }, { "epoch": 3.423507462686567, "grad_norm": 0.3000961249319295, "learning_rate": 1.616195453862057e-05, "loss": 0.3419, "step": 7340 }, { "epoch": 3.425839552238806, "grad_norm": 0.29376616438648107, "learning_rate": 1.6131991998323893e-05, "loss": 0.3479, "step": 7345 }, { "epoch": 3.4281716417910446, "grad_norm": 0.3071367625652116, "learning_rate": 1.6102056503044115e-05, "loss": 0.354, "step": 7350 }, { "epoch": 3.4305037313432836, "grad_norm": 0.2951722407093875, "learning_rate": 1.6072148123999182e-05, "loss": 0.3627, "step": 7355 }, { "epoch": 3.4328358208955225, "grad_norm": 0.31279544754907523, "learning_rate": 1.6042266932342498e-05, "loss": 0.3599, "step": 7360 }, { "epoch": 3.435167910447761, "grad_norm": 0.3015943433010243, "learning_rate": 1.60124129991628e-05, "loss": 0.3552, "step": 7365 }, { "epoch": 3.4375, "grad_norm": 0.2878769505903286, "learning_rate": 1.5982586395483983e-05, "loss": 0.3519, "step": 7370 }, { "epoch": 3.439832089552239, "grad_norm": 0.2944002778546623, "learning_rate": 1.595278719226491e-05, "loss": 0.3529, "step": 7375 }, { "epoch": 3.4421641791044775, "grad_norm": 0.28464717367395115, "learning_rate": 1.5923015460399277e-05, "loss": 0.3643, "step": 7380 }, { "epoch": 3.4444962686567164, "grad_norm": 0.29830865332603757, "learning_rate": 1.589327127071539e-05, "loss": 0.3633, "step": 7385 }, { "epoch": 3.4468283582089554, "grad_norm": 0.29296548097541697, "learning_rate": 1.5863554693976065e-05, "loss": 0.3543, "step": 7390 }, { "epoch": 3.449160447761194, "grad_norm": 0.2877989597636989, "learning_rate": 1.5833865800878422e-05, "loss": 0.3586, "step": 7395 }, { "epoch": 3.451492537313433, "grad_norm": 0.2962568262303169, "learning_rate": 1.580420466205369e-05, "loss": 0.361, "step": 7400 }, { "epoch": 3.453824626865672, "grad_norm": 0.28992670359207906, "learning_rate": 1.577457134806711e-05, "loss": 0.347, "step": 7405 }, { "epoch": 3.4561567164179103, "grad_norm": 0.29820404695422453, "learning_rate": 1.5744965929417693e-05, "loss": 0.3519, "step": 7410 }, { "epoch": 3.4584888059701493, "grad_norm": 0.3002075298689392, "learning_rate": 1.57153884765381e-05, "loss": 0.3519, "step": 7415 }, { "epoch": 3.4608208955223883, "grad_norm": 0.29866627176412436, "learning_rate": 1.5685839059794476e-05, "loss": 0.3627, "step": 7420 }, { "epoch": 3.4631529850746268, "grad_norm": 0.29842700945335937, "learning_rate": 1.5656317749486225e-05, "loss": 0.3509, "step": 7425 }, { "epoch": 3.4654850746268657, "grad_norm": 0.31758551946271896, "learning_rate": 1.562682461584594e-05, "loss": 0.368, "step": 7430 }, { "epoch": 3.4678171641791042, "grad_norm": 0.30506288166415924, "learning_rate": 1.559735972903912e-05, "loss": 0.3534, "step": 7435 }, { "epoch": 3.470149253731343, "grad_norm": 0.2880499003011923, "learning_rate": 1.5567923159164108e-05, "loss": 0.3625, "step": 7440 }, { "epoch": 3.472481343283582, "grad_norm": 0.2918919233433875, "learning_rate": 1.553851497625187e-05, "loss": 0.3727, "step": 7445 }, { "epoch": 3.4748134328358207, "grad_norm": 0.2902964320978122, "learning_rate": 1.5509135250265835e-05, "loss": 0.3558, "step": 7450 }, { "epoch": 3.4771455223880596, "grad_norm": 0.28739197221190255, "learning_rate": 1.547978405110171e-05, "loss": 0.3502, "step": 7455 }, { "epoch": 3.4794776119402986, "grad_norm": 0.30476342446357835, "learning_rate": 1.545046144858738e-05, "loss": 0.3633, "step": 7460 }, { "epoch": 3.481809701492537, "grad_norm": 0.30387668228537973, "learning_rate": 1.5421167512482655e-05, "loss": 0.3599, "step": 7465 }, { "epoch": 3.484141791044776, "grad_norm": 0.29283297526670193, "learning_rate": 1.539190231247917e-05, "loss": 0.3495, "step": 7470 }, { "epoch": 3.486473880597015, "grad_norm": 0.29971779022893624, "learning_rate": 1.5362665918200193e-05, "loss": 0.349, "step": 7475 }, { "epoch": 3.4888059701492535, "grad_norm": 0.28242251730110957, "learning_rate": 1.533345839920045e-05, "loss": 0.3536, "step": 7480 }, { "epoch": 3.4911380597014925, "grad_norm": 0.2971343119306769, "learning_rate": 1.5304279824966e-05, "loss": 0.3565, "step": 7485 }, { "epoch": 3.4934701492537314, "grad_norm": 0.29622543843611676, "learning_rate": 1.5275130264913994e-05, "loss": 0.3713, "step": 7490 }, { "epoch": 3.49580223880597, "grad_norm": 0.2870654017898968, "learning_rate": 1.5246009788392606e-05, "loss": 0.3517, "step": 7495 }, { "epoch": 3.498134328358209, "grad_norm": 0.29915027478335077, "learning_rate": 1.5216918464680776e-05, "loss": 0.3599, "step": 7500 }, { "epoch": 3.500466417910448, "grad_norm": 0.3013275046457301, "learning_rate": 1.5187856362988123e-05, "loss": 0.3573, "step": 7505 }, { "epoch": 3.5027985074626864, "grad_norm": 0.28914092912421346, "learning_rate": 1.5158823552454737e-05, "loss": 0.362, "step": 7510 }, { "epoch": 3.5051305970149254, "grad_norm": 0.30079812125188365, "learning_rate": 1.5129820102151e-05, "loss": 0.361, "step": 7515 }, { "epoch": 3.5074626865671643, "grad_norm": 0.2952008525853324, "learning_rate": 1.5100846081077479e-05, "loss": 0.3505, "step": 7520 }, { "epoch": 3.509794776119403, "grad_norm": 0.3036031486925564, "learning_rate": 1.5071901558164692e-05, "loss": 0.3648, "step": 7525 }, { "epoch": 3.512126865671642, "grad_norm": 0.2966496719622429, "learning_rate": 1.5042986602273017e-05, "loss": 0.3549, "step": 7530 }, { "epoch": 3.5144589552238807, "grad_norm": 0.2989521146994412, "learning_rate": 1.5014101282192452e-05, "loss": 0.3611, "step": 7535 }, { "epoch": 3.5167910447761193, "grad_norm": 0.287234145558702, "learning_rate": 1.498524566664253e-05, "loss": 0.3559, "step": 7540 }, { "epoch": 3.519123134328358, "grad_norm": 0.30900948854246685, "learning_rate": 1.4956419824272083e-05, "loss": 0.3499, "step": 7545 }, { "epoch": 3.521455223880597, "grad_norm": 0.2859798976432502, "learning_rate": 1.4927623823659126e-05, "loss": 0.3538, "step": 7550 }, { "epoch": 3.5237873134328357, "grad_norm": 0.30882126023403517, "learning_rate": 1.4898857733310673e-05, "loss": 0.3575, "step": 7555 }, { "epoch": 3.5261194029850746, "grad_norm": 0.31362168432796605, "learning_rate": 1.4870121621662594e-05, "loss": 0.3664, "step": 7560 }, { "epoch": 3.5284514925373136, "grad_norm": 0.2852958812450785, "learning_rate": 1.4841415557079413e-05, "loss": 0.3546, "step": 7565 }, { "epoch": 3.530783582089552, "grad_norm": 0.29145333587542527, "learning_rate": 1.4812739607854199e-05, "loss": 0.3676, "step": 7570 }, { "epoch": 3.533115671641791, "grad_norm": 0.28793150077340407, "learning_rate": 1.4784093842208351e-05, "loss": 0.3477, "step": 7575 }, { "epoch": 3.53544776119403, "grad_norm": 0.2799788774837976, "learning_rate": 1.4755478328291476e-05, "loss": 0.3559, "step": 7580 }, { "epoch": 3.5377798507462686, "grad_norm": 0.29158782434899155, "learning_rate": 1.4726893134181214e-05, "loss": 0.345, "step": 7585 }, { "epoch": 3.5401119402985075, "grad_norm": 0.29176848527231125, "learning_rate": 1.4698338327883044e-05, "loss": 0.357, "step": 7590 }, { "epoch": 3.5424440298507465, "grad_norm": 0.2799745538167895, "learning_rate": 1.4669813977330193e-05, "loss": 0.349, "step": 7595 }, { "epoch": 3.544776119402985, "grad_norm": 0.28854357087585775, "learning_rate": 1.4641320150383391e-05, "loss": 0.347, "step": 7600 }, { "epoch": 3.547108208955224, "grad_norm": 0.283423557372721, "learning_rate": 1.461285691483078e-05, "loss": 0.3538, "step": 7605 }, { "epoch": 3.549440298507463, "grad_norm": 0.28166018080528465, "learning_rate": 1.458442433838772e-05, "loss": 0.3581, "step": 7610 }, { "epoch": 3.5517723880597014, "grad_norm": 0.28503603366141994, "learning_rate": 1.4556022488696614e-05, "loss": 0.3508, "step": 7615 }, { "epoch": 3.5541044776119404, "grad_norm": 0.2906894001997848, "learning_rate": 1.4527651433326786e-05, "loss": 0.3589, "step": 7620 }, { "epoch": 3.5564365671641793, "grad_norm": 0.30956170334950045, "learning_rate": 1.4499311239774277e-05, "loss": 0.3568, "step": 7625 }, { "epoch": 3.558768656716418, "grad_norm": 0.3036039738274132, "learning_rate": 1.4471001975461735e-05, "loss": 0.3682, "step": 7630 }, { "epoch": 3.561100746268657, "grad_norm": 0.3001675692633625, "learning_rate": 1.4442723707738199e-05, "loss": 0.3584, "step": 7635 }, { "epoch": 3.5634328358208958, "grad_norm": 0.2780253986656024, "learning_rate": 1.4414476503878968e-05, "loss": 0.3675, "step": 7640 }, { "epoch": 3.5657649253731343, "grad_norm": 0.29174570094961516, "learning_rate": 1.4386260431085457e-05, "loss": 0.3608, "step": 7645 }, { "epoch": 3.5680970149253732, "grad_norm": 0.2889730988928366, "learning_rate": 1.4358075556485016e-05, "loss": 0.367, "step": 7650 }, { "epoch": 3.570429104477612, "grad_norm": 0.2810334548221009, "learning_rate": 1.4329921947130748e-05, "loss": 0.3456, "step": 7655 }, { "epoch": 3.5727611940298507, "grad_norm": 0.30766763309941214, "learning_rate": 1.430179967000141e-05, "loss": 0.3531, "step": 7660 }, { "epoch": 3.5750932835820897, "grad_norm": 0.28722344733813815, "learning_rate": 1.4273708792001182e-05, "loss": 0.3584, "step": 7665 }, { "epoch": 3.5774253731343286, "grad_norm": 0.3097303207605004, "learning_rate": 1.424564937995957e-05, "loss": 0.376, "step": 7670 }, { "epoch": 3.579757462686567, "grad_norm": 0.2995816885189167, "learning_rate": 1.4217621500631222e-05, "loss": 0.3605, "step": 7675 }, { "epoch": 3.582089552238806, "grad_norm": 0.29712473130266287, "learning_rate": 1.4189625220695746e-05, "loss": 0.3554, "step": 7680 }, { "epoch": 3.5844216417910446, "grad_norm": 0.3027485538895017, "learning_rate": 1.41616606067576e-05, "loss": 0.3633, "step": 7685 }, { "epoch": 3.5867537313432836, "grad_norm": 0.29489151194414664, "learning_rate": 1.413372772534588e-05, "loss": 0.3524, "step": 7690 }, { "epoch": 3.5890858208955225, "grad_norm": 0.292583965280972, "learning_rate": 1.410582664291421e-05, "loss": 0.362, "step": 7695 }, { "epoch": 3.591417910447761, "grad_norm": 0.2876977033369693, "learning_rate": 1.4077957425840563e-05, "loss": 0.3534, "step": 7700 }, { "epoch": 3.59375, "grad_norm": 0.29468129202074855, "learning_rate": 1.4050120140427081e-05, "loss": 0.348, "step": 7705 }, { "epoch": 3.596082089552239, "grad_norm": 0.30877443498774065, "learning_rate": 1.4022314852899968e-05, "loss": 0.3532, "step": 7710 }, { "epoch": 3.5984141791044775, "grad_norm": 0.28976380559332493, "learning_rate": 1.3994541629409275e-05, "loss": 0.3484, "step": 7715 }, { "epoch": 3.6007462686567164, "grad_norm": 0.3045931762631833, "learning_rate": 1.3966800536028802e-05, "loss": 0.3687, "step": 7720 }, { "epoch": 3.6030783582089554, "grad_norm": 0.2835477452949352, "learning_rate": 1.3939091638755882e-05, "loss": 0.335, "step": 7725 }, { "epoch": 3.605410447761194, "grad_norm": 0.3076252865433502, "learning_rate": 1.3911415003511258e-05, "loss": 0.3618, "step": 7730 }, { "epoch": 3.607742537313433, "grad_norm": 0.33569531992338836, "learning_rate": 1.3883770696138946e-05, "loss": 0.3727, "step": 7735 }, { "epoch": 3.6100746268656714, "grad_norm": 0.29189920451522394, "learning_rate": 1.3856158782406007e-05, "loss": 0.3562, "step": 7740 }, { "epoch": 3.6124067164179103, "grad_norm": 0.2899329673371963, "learning_rate": 1.3828579328002473e-05, "loss": 0.358, "step": 7745 }, { "epoch": 3.6147388059701493, "grad_norm": 0.3017440143397427, "learning_rate": 1.3801032398541153e-05, "loss": 0.3526, "step": 7750 }, { "epoch": 3.617070895522388, "grad_norm": 0.303793278927911, "learning_rate": 1.3773518059557445e-05, "loss": 0.3546, "step": 7755 }, { "epoch": 3.6194029850746268, "grad_norm": 0.2946026133373514, "learning_rate": 1.3746036376509252e-05, "loss": 0.3525, "step": 7760 }, { "epoch": 3.6217350746268657, "grad_norm": 0.304345730892658, "learning_rate": 1.3718587414776756e-05, "loss": 0.3495, "step": 7765 }, { "epoch": 3.6240671641791042, "grad_norm": 0.30338403262410285, "learning_rate": 1.3691171239662315e-05, "loss": 0.3676, "step": 7770 }, { "epoch": 3.626399253731343, "grad_norm": 0.2859063338286154, "learning_rate": 1.366378791639028e-05, "loss": 0.3528, "step": 7775 }, { "epoch": 3.628731343283582, "grad_norm": 0.29511808064651746, "learning_rate": 1.3636437510106836e-05, "loss": 0.3618, "step": 7780 }, { "epoch": 3.6310634328358207, "grad_norm": 0.29365694276668197, "learning_rate": 1.3609120085879872e-05, "loss": 0.3581, "step": 7785 }, { "epoch": 3.6333955223880596, "grad_norm": 0.290573663092171, "learning_rate": 1.3581835708698796e-05, "loss": 0.3572, "step": 7790 }, { "epoch": 3.6357276119402986, "grad_norm": 0.28657513815805086, "learning_rate": 1.3554584443474405e-05, "loss": 0.363, "step": 7795 }, { "epoch": 3.638059701492537, "grad_norm": 0.27696862101376607, "learning_rate": 1.352736635503873e-05, "loss": 0.3443, "step": 7800 }, { "epoch": 3.640391791044776, "grad_norm": 0.29787867818816366, "learning_rate": 1.3500181508144855e-05, "loss": 0.3538, "step": 7805 }, { "epoch": 3.642723880597015, "grad_norm": 0.28101614582384843, "learning_rate": 1.3473029967466779e-05, "loss": 0.3578, "step": 7810 }, { "epoch": 3.6450559701492535, "grad_norm": 0.29354399795481195, "learning_rate": 1.3445911797599293e-05, "loss": 0.3653, "step": 7815 }, { "epoch": 3.6473880597014925, "grad_norm": 0.28858163371757295, "learning_rate": 1.3418827063057754e-05, "loss": 0.3504, "step": 7820 }, { "epoch": 3.6497201492537314, "grad_norm": 0.33101143148391066, "learning_rate": 1.3391775828278023e-05, "loss": 0.3559, "step": 7825 }, { "epoch": 3.65205223880597, "grad_norm": 0.285785531139151, "learning_rate": 1.3364758157616219e-05, "loss": 0.3509, "step": 7830 }, { "epoch": 3.654384328358209, "grad_norm": 0.28108023186016284, "learning_rate": 1.3337774115348639e-05, "loss": 0.3632, "step": 7835 }, { "epoch": 3.656716417910448, "grad_norm": 0.309326555412937, "learning_rate": 1.3310823765671571e-05, "loss": 0.3537, "step": 7840 }, { "epoch": 3.6590485074626864, "grad_norm": 0.307994629223912, "learning_rate": 1.3283907172701135e-05, "loss": 0.361, "step": 7845 }, { "epoch": 3.6613805970149254, "grad_norm": 0.28676986392850884, "learning_rate": 1.3257024400473162e-05, "loss": 0.3471, "step": 7850 }, { "epoch": 3.6637126865671643, "grad_norm": 0.28477197200691134, "learning_rate": 1.3230175512943e-05, "loss": 0.3449, "step": 7855 }, { "epoch": 3.666044776119403, "grad_norm": 0.29018174765669597, "learning_rate": 1.3203360573985394e-05, "loss": 0.3501, "step": 7860 }, { "epoch": 3.668376865671642, "grad_norm": 0.28193922253204096, "learning_rate": 1.3176579647394338e-05, "loss": 0.3443, "step": 7865 }, { "epoch": 3.6707089552238807, "grad_norm": 0.32210272509483917, "learning_rate": 1.314983279688288e-05, "loss": 0.3522, "step": 7870 }, { "epoch": 3.6730410447761193, "grad_norm": 0.2794951773054552, "learning_rate": 1.3123120086083026e-05, "loss": 0.3593, "step": 7875 }, { "epoch": 3.675373134328358, "grad_norm": 0.30271891497726117, "learning_rate": 1.3096441578545544e-05, "loss": 0.364, "step": 7880 }, { "epoch": 3.677705223880597, "grad_norm": 0.2888862955619843, "learning_rate": 1.306979733773983e-05, "loss": 0.3529, "step": 7885 }, { "epoch": 3.6800373134328357, "grad_norm": 0.28077915909282974, "learning_rate": 1.3043187427053788e-05, "loss": 0.3486, "step": 7890 }, { "epoch": 3.6823694029850746, "grad_norm": 0.2939679079731848, "learning_rate": 1.3016611909793613e-05, "loss": 0.3614, "step": 7895 }, { "epoch": 3.6847014925373136, "grad_norm": 0.2923391691036835, "learning_rate": 1.2990070849183678e-05, "loss": 0.3567, "step": 7900 }, { "epoch": 3.687033582089552, "grad_norm": 0.29811605769707217, "learning_rate": 1.2963564308366416e-05, "loss": 0.3501, "step": 7905 }, { "epoch": 3.689365671641791, "grad_norm": 0.29643761933870993, "learning_rate": 1.2937092350402097e-05, "loss": 0.3609, "step": 7910 }, { "epoch": 3.69169776119403, "grad_norm": 0.27701403097709604, "learning_rate": 1.2910655038268749e-05, "loss": 0.3546, "step": 7915 }, { "epoch": 3.6940298507462686, "grad_norm": 0.2741148064892688, "learning_rate": 1.2884252434861938e-05, "loss": 0.3491, "step": 7920 }, { "epoch": 3.6963619402985075, "grad_norm": 0.3004249868750194, "learning_rate": 1.2857884602994706e-05, "loss": 0.3512, "step": 7925 }, { "epoch": 3.6986940298507465, "grad_norm": 0.2920751318358904, "learning_rate": 1.2831551605397321e-05, "loss": 0.3617, "step": 7930 }, { "epoch": 3.701026119402985, "grad_norm": 0.28243593579231224, "learning_rate": 1.2805253504717213e-05, "loss": 0.3437, "step": 7935 }, { "epoch": 3.703358208955224, "grad_norm": 0.29181414356043495, "learning_rate": 1.2778990363518785e-05, "loss": 0.3629, "step": 7940 }, { "epoch": 3.705690298507463, "grad_norm": 0.29544487129690233, "learning_rate": 1.2752762244283255e-05, "loss": 0.3699, "step": 7945 }, { "epoch": 3.7080223880597014, "grad_norm": 0.28719022286813756, "learning_rate": 1.2726569209408545e-05, "loss": 0.3532, "step": 7950 }, { "epoch": 3.7103544776119404, "grad_norm": 0.2930316645371656, "learning_rate": 1.2700411321209078e-05, "loss": 0.3624, "step": 7955 }, { "epoch": 3.7126865671641793, "grad_norm": 0.2817495146297711, "learning_rate": 1.2674288641915688e-05, "loss": 0.3523, "step": 7960 }, { "epoch": 3.715018656716418, "grad_norm": 0.2848332804582776, "learning_rate": 1.264820123367545e-05, "loss": 0.3656, "step": 7965 }, { "epoch": 3.717350746268657, "grad_norm": 0.29119030089794146, "learning_rate": 1.26221491585515e-05, "loss": 0.3552, "step": 7970 }, { "epoch": 3.7196828358208958, "grad_norm": 0.2961301640188612, "learning_rate": 1.2596132478522938e-05, "loss": 0.3627, "step": 7975 }, { "epoch": 3.7220149253731343, "grad_norm": 0.292111639749612, "learning_rate": 1.2570151255484639e-05, "loss": 0.3567, "step": 7980 }, { "epoch": 3.7243470149253732, "grad_norm": 0.2895274874614945, "learning_rate": 1.2544205551247148e-05, "loss": 0.3529, "step": 7985 }, { "epoch": 3.726679104477612, "grad_norm": 0.29797667057108973, "learning_rate": 1.251829542753648e-05, "loss": 0.3528, "step": 7990 }, { "epoch": 3.7290111940298507, "grad_norm": 0.29423312869195745, "learning_rate": 1.249242094599404e-05, "loss": 0.361, "step": 7995 }, { "epoch": 3.7313432835820897, "grad_norm": 0.2958015752765381, "learning_rate": 1.246658216817639e-05, "loss": 0.3634, "step": 8000 }, { "epoch": 3.7336753731343286, "grad_norm": 0.29363428771875905, "learning_rate": 1.2440779155555202e-05, "loss": 0.3511, "step": 8005 }, { "epoch": 3.736007462686567, "grad_norm": 0.29801364241452183, "learning_rate": 1.2415011969517016e-05, "loss": 0.3641, "step": 8010 }, { "epoch": 3.738339552238806, "grad_norm": 0.2880988824363315, "learning_rate": 1.2389280671363175e-05, "loss": 0.35, "step": 8015 }, { "epoch": 3.7406716417910446, "grad_norm": 0.2885482807102054, "learning_rate": 1.2363585322309615e-05, "loss": 0.3519, "step": 8020 }, { "epoch": 3.7430037313432836, "grad_norm": 0.30675495831513316, "learning_rate": 1.2337925983486768e-05, "loss": 0.3534, "step": 8025 }, { "epoch": 3.7453358208955225, "grad_norm": 0.2841278299744841, "learning_rate": 1.2312302715939394e-05, "loss": 0.3493, "step": 8030 }, { "epoch": 3.747667910447761, "grad_norm": 0.30117309297543066, "learning_rate": 1.2286715580626418e-05, "loss": 0.3558, "step": 8035 }, { "epoch": 3.75, "grad_norm": 0.2823771377104651, "learning_rate": 1.2261164638420832e-05, "loss": 0.3519, "step": 8040 }, { "epoch": 3.752332089552239, "grad_norm": 0.29374733286752874, "learning_rate": 1.2235649950109492e-05, "loss": 0.358, "step": 8045 }, { "epoch": 3.7546641791044775, "grad_norm": 0.2963910065816967, "learning_rate": 1.2210171576393037e-05, "loss": 0.3611, "step": 8050 }, { "epoch": 3.7569962686567164, "grad_norm": 0.2922835220716444, "learning_rate": 1.2184729577885695e-05, "loss": 0.3674, "step": 8055 }, { "epoch": 3.7593283582089554, "grad_norm": 0.3106542599906194, "learning_rate": 1.2159324015115148e-05, "loss": 0.3606, "step": 8060 }, { "epoch": 3.761660447761194, "grad_norm": 0.3032163738996274, "learning_rate": 1.2133954948522423e-05, "loss": 0.3642, "step": 8065 }, { "epoch": 3.763992537313433, "grad_norm": 0.30650395066997793, "learning_rate": 1.210862243846168e-05, "loss": 0.3523, "step": 8070 }, { "epoch": 3.7663246268656714, "grad_norm": 0.30803691550525786, "learning_rate": 1.2083326545200154e-05, "loss": 0.3667, "step": 8075 }, { "epoch": 3.7686567164179103, "grad_norm": 0.2940103098374772, "learning_rate": 1.205806732891793e-05, "loss": 0.3528, "step": 8080 }, { "epoch": 3.7709888059701493, "grad_norm": 0.28067520868217627, "learning_rate": 1.2032844849707853e-05, "loss": 0.3457, "step": 8085 }, { "epoch": 3.773320895522388, "grad_norm": 0.27532752224412865, "learning_rate": 1.2007659167575377e-05, "loss": 0.3509, "step": 8090 }, { "epoch": 3.7756529850746268, "grad_norm": 0.2941075409451071, "learning_rate": 1.1982510342438395e-05, "loss": 0.3524, "step": 8095 }, { "epoch": 3.7779850746268657, "grad_norm": 0.29520084957584974, "learning_rate": 1.195739843412713e-05, "loss": 0.3495, "step": 8100 }, { "epoch": 3.7803171641791042, "grad_norm": 0.29448227900788065, "learning_rate": 1.1932323502383978e-05, "loss": 0.3567, "step": 8105 }, { "epoch": 3.782649253731343, "grad_norm": 0.29410419980712876, "learning_rate": 1.1907285606863351e-05, "loss": 0.3535, "step": 8110 }, { "epoch": 3.784981343283582, "grad_norm": 0.30711921370898143, "learning_rate": 1.1882284807131576e-05, "loss": 0.3596, "step": 8115 }, { "epoch": 3.7873134328358207, "grad_norm": 0.2913109263179348, "learning_rate": 1.1857321162666692e-05, "loss": 0.3522, "step": 8120 }, { "epoch": 3.7896455223880596, "grad_norm": 0.2710680798841491, "learning_rate": 1.1832394732858377e-05, "loss": 0.346, "step": 8125 }, { "epoch": 3.7919776119402986, "grad_norm": 0.28893957555077326, "learning_rate": 1.1807505577007765e-05, "loss": 0.368, "step": 8130 }, { "epoch": 3.794309701492537, "grad_norm": 0.2881348312683764, "learning_rate": 1.1782653754327295e-05, "loss": 0.3679, "step": 8135 }, { "epoch": 3.796641791044776, "grad_norm": 0.2946006432438279, "learning_rate": 1.1757839323940616e-05, "loss": 0.3585, "step": 8140 }, { "epoch": 3.798973880597015, "grad_norm": 0.2925912935476016, "learning_rate": 1.1733062344882396e-05, "loss": 0.3606, "step": 8145 }, { "epoch": 3.8013059701492535, "grad_norm": 0.29429844548593664, "learning_rate": 1.1708322876098215e-05, "loss": 0.3528, "step": 8150 }, { "epoch": 3.8036380597014925, "grad_norm": 0.30887179915703444, "learning_rate": 1.1683620976444426e-05, "loss": 0.361, "step": 8155 }, { "epoch": 3.8059701492537314, "grad_norm": 0.2824530021645635, "learning_rate": 1.1658956704687974e-05, "loss": 0.3554, "step": 8160 }, { "epoch": 3.80830223880597, "grad_norm": 0.29728704854787397, "learning_rate": 1.1634330119506317e-05, "loss": 0.3699, "step": 8165 }, { "epoch": 3.810634328358209, "grad_norm": 0.31014776929680377, "learning_rate": 1.1609741279487236e-05, "loss": 0.3637, "step": 8170 }, { "epoch": 3.812966417910448, "grad_norm": 0.2926088847742517, "learning_rate": 1.1585190243128707e-05, "loss": 0.353, "step": 8175 }, { "epoch": 3.8152985074626864, "grad_norm": 0.2979630047546316, "learning_rate": 1.15606770688388e-05, "loss": 0.3478, "step": 8180 }, { "epoch": 3.8176305970149254, "grad_norm": 0.2976899412274589, "learning_rate": 1.1536201814935473e-05, "loss": 0.343, "step": 8185 }, { "epoch": 3.8199626865671643, "grad_norm": 0.28808497584348125, "learning_rate": 1.1511764539646494e-05, "loss": 0.3576, "step": 8190 }, { "epoch": 3.822294776119403, "grad_norm": 0.2973608263770689, "learning_rate": 1.1487365301109281e-05, "loss": 0.3573, "step": 8195 }, { "epoch": 3.824626865671642, "grad_norm": 0.29968897586866633, "learning_rate": 1.1463004157370735e-05, "loss": 0.352, "step": 8200 }, { "epoch": 3.8269589552238807, "grad_norm": 0.3096440988535845, "learning_rate": 1.1438681166387162e-05, "loss": 0.3683, "step": 8205 }, { "epoch": 3.8292910447761193, "grad_norm": 0.2741502239142194, "learning_rate": 1.1414396386024064e-05, "loss": 0.3474, "step": 8210 }, { "epoch": 3.831623134328358, "grad_norm": 0.2942294302511282, "learning_rate": 1.1390149874056065e-05, "loss": 0.3663, "step": 8215 }, { "epoch": 3.833955223880597, "grad_norm": 0.29114614997015453, "learning_rate": 1.1365941688166747e-05, "loss": 0.3583, "step": 8220 }, { "epoch": 3.8362873134328357, "grad_norm": 0.2919755709631297, "learning_rate": 1.134177188594849e-05, "loss": 0.3568, "step": 8225 }, { "epoch": 3.8386194029850746, "grad_norm": 0.31013207944739696, "learning_rate": 1.1317640524902383e-05, "loss": 0.3727, "step": 8230 }, { "epoch": 3.8409514925373136, "grad_norm": 0.2856057412020683, "learning_rate": 1.129354766243804e-05, "loss": 0.3538, "step": 8235 }, { "epoch": 3.843283582089552, "grad_norm": 0.29246266396022763, "learning_rate": 1.1269493355873498e-05, "loss": 0.3523, "step": 8240 }, { "epoch": 3.845615671641791, "grad_norm": 0.29027597856372317, "learning_rate": 1.1245477662435076e-05, "loss": 0.3425, "step": 8245 }, { "epoch": 3.84794776119403, "grad_norm": 0.2934215355286048, "learning_rate": 1.1221500639257204e-05, "loss": 0.3541, "step": 8250 }, { "epoch": 3.8502798507462686, "grad_norm": 0.2933097062976139, "learning_rate": 1.1197562343382341e-05, "loss": 0.3593, "step": 8255 }, { "epoch": 3.8526119402985075, "grad_norm": 0.29229414621215566, "learning_rate": 1.1173662831760798e-05, "loss": 0.3604, "step": 8260 }, { "epoch": 3.8549440298507465, "grad_norm": 0.2831261775593604, "learning_rate": 1.1149802161250607e-05, "loss": 0.3654, "step": 8265 }, { "epoch": 3.857276119402985, "grad_norm": 0.2977216803387459, "learning_rate": 1.1125980388617425e-05, "loss": 0.3626, "step": 8270 }, { "epoch": 3.859608208955224, "grad_norm": 0.2849481821852572, "learning_rate": 1.1102197570534334e-05, "loss": 0.3597, "step": 8275 }, { "epoch": 3.861940298507463, "grad_norm": 0.28879730272017745, "learning_rate": 1.1078453763581776e-05, "loss": 0.3544, "step": 8280 }, { "epoch": 3.8642723880597014, "grad_norm": 0.2992038514698661, "learning_rate": 1.1054749024247348e-05, "loss": 0.3568, "step": 8285 }, { "epoch": 3.8666044776119404, "grad_norm": 0.290194347044365, "learning_rate": 1.103108340892573e-05, "loss": 0.3594, "step": 8290 }, { "epoch": 3.8689365671641793, "grad_norm": 0.2826735937951113, "learning_rate": 1.100745697391852e-05, "loss": 0.3462, "step": 8295 }, { "epoch": 3.871268656716418, "grad_norm": 0.30339028437867, "learning_rate": 1.0983869775434091e-05, "loss": 0.3618, "step": 8300 }, { "epoch": 3.873600746268657, "grad_norm": 0.29347235850619174, "learning_rate": 1.096032186958749e-05, "loss": 0.3477, "step": 8305 }, { "epoch": 3.8759328358208958, "grad_norm": 0.30531661397067245, "learning_rate": 1.0936813312400263e-05, "loss": 0.3649, "step": 8310 }, { "epoch": 3.8782649253731343, "grad_norm": 0.2920153441382587, "learning_rate": 1.091334415980036e-05, "loss": 0.3522, "step": 8315 }, { "epoch": 3.8805970149253732, "grad_norm": 0.2898738483350599, "learning_rate": 1.0889914467621986e-05, "loss": 0.3524, "step": 8320 }, { "epoch": 3.882929104477612, "grad_norm": 0.2927608130841681, "learning_rate": 1.0866524291605452e-05, "loss": 0.3539, "step": 8325 }, { "epoch": 3.8852611940298507, "grad_norm": 0.291260644337128, "learning_rate": 1.0843173687397079e-05, "loss": 0.3428, "step": 8330 }, { "epoch": 3.8875932835820897, "grad_norm": 0.29104190176788935, "learning_rate": 1.0819862710549025e-05, "loss": 0.3581, "step": 8335 }, { "epoch": 3.8899253731343286, "grad_norm": 0.28615462821266957, "learning_rate": 1.0796591416519192e-05, "loss": 0.3496, "step": 8340 }, { "epoch": 3.892257462686567, "grad_norm": 0.28479520893776716, "learning_rate": 1.0773359860671054e-05, "loss": 0.3581, "step": 8345 }, { "epoch": 3.894589552238806, "grad_norm": 0.29339780850022235, "learning_rate": 1.0750168098273569e-05, "loss": 0.3585, "step": 8350 }, { "epoch": 3.8969216417910446, "grad_norm": 0.30283711110137923, "learning_rate": 1.0727016184501e-05, "loss": 0.3697, "step": 8355 }, { "epoch": 3.8992537313432836, "grad_norm": 0.28342685713093285, "learning_rate": 1.0703904174432836e-05, "loss": 0.3609, "step": 8360 }, { "epoch": 3.9015858208955225, "grad_norm": 0.29642245266867284, "learning_rate": 1.0680832123053603e-05, "loss": 0.3631, "step": 8365 }, { "epoch": 3.903917910447761, "grad_norm": 0.3035803762319652, "learning_rate": 1.0657800085252789e-05, "loss": 0.376, "step": 8370 }, { "epoch": 3.90625, "grad_norm": 0.2878292075424635, "learning_rate": 1.0634808115824668e-05, "loss": 0.3612, "step": 8375 }, { "epoch": 3.908582089552239, "grad_norm": 0.3124247861272246, "learning_rate": 1.0611856269468203e-05, "loss": 0.371, "step": 8380 }, { "epoch": 3.9109141791044775, "grad_norm": 0.28175311990483765, "learning_rate": 1.0588944600786907e-05, "loss": 0.3471, "step": 8385 }, { "epoch": 3.9132462686567164, "grad_norm": 0.29159135713904916, "learning_rate": 1.0566073164288687e-05, "loss": 0.3548, "step": 8390 }, { "epoch": 3.9155783582089554, "grad_norm": 0.28670551273703254, "learning_rate": 1.0543242014385758e-05, "loss": 0.3429, "step": 8395 }, { "epoch": 3.917910447761194, "grad_norm": 0.28966346089598266, "learning_rate": 1.052045120539447e-05, "loss": 0.3499, "step": 8400 }, { "epoch": 3.920242537313433, "grad_norm": 0.3115340194096633, "learning_rate": 1.0497700791535221e-05, "loss": 0.3726, "step": 8405 }, { "epoch": 3.9225746268656714, "grad_norm": 0.2885119382485302, "learning_rate": 1.0474990826932301e-05, "loss": 0.3682, "step": 8410 }, { "epoch": 3.9249067164179103, "grad_norm": 0.3142237475041356, "learning_rate": 1.0452321365613758e-05, "loss": 0.3819, "step": 8415 }, { "epoch": 3.9272388059701493, "grad_norm": 0.3125381133506531, "learning_rate": 1.0429692461511298e-05, "loss": 0.3616, "step": 8420 }, { "epoch": 3.929570895522388, "grad_norm": 0.28552193526696856, "learning_rate": 1.0407104168460116e-05, "loss": 0.3564, "step": 8425 }, { "epoch": 3.9319029850746268, "grad_norm": 0.28394987960843554, "learning_rate": 1.0384556540198825e-05, "loss": 0.3537, "step": 8430 }, { "epoch": 3.9342350746268657, "grad_norm": 0.27654343131951353, "learning_rate": 1.0362049630369259e-05, "loss": 0.3433, "step": 8435 }, { "epoch": 3.9365671641791042, "grad_norm": 0.2884305314595042, "learning_rate": 1.033958349251641e-05, "loss": 0.3572, "step": 8440 }, { "epoch": 3.938899253731343, "grad_norm": 0.3035815017958473, "learning_rate": 1.0317158180088254e-05, "loss": 0.368, "step": 8445 }, { "epoch": 3.941231343283582, "grad_norm": 0.3056491792197943, "learning_rate": 1.0294773746435638e-05, "loss": 0.3693, "step": 8450 }, { "epoch": 3.9435634328358207, "grad_norm": 0.281986310165486, "learning_rate": 1.0272430244812175e-05, "loss": 0.3711, "step": 8455 }, { "epoch": 3.9458955223880596, "grad_norm": 0.287792904143575, "learning_rate": 1.0250127728374098e-05, "loss": 0.3491, "step": 8460 }, { "epoch": 3.9482276119402986, "grad_norm": 0.29150716736764803, "learning_rate": 1.0227866250180105e-05, "loss": 0.3514, "step": 8465 }, { "epoch": 3.950559701492537, "grad_norm": 0.2919223772377872, "learning_rate": 1.02056458631913e-05, "loss": 0.363, "step": 8470 }, { "epoch": 3.952891791044776, "grad_norm": 0.29230864112679494, "learning_rate": 1.0183466620270996e-05, "loss": 0.3637, "step": 8475 }, { "epoch": 3.955223880597015, "grad_norm": 0.28050753278457957, "learning_rate": 1.0161328574184645e-05, "loss": 0.3535, "step": 8480 }, { "epoch": 3.9575559701492535, "grad_norm": 0.28252236271125614, "learning_rate": 1.0139231777599689e-05, "loss": 0.3566, "step": 8485 }, { "epoch": 3.9598880597014925, "grad_norm": 0.29454183963626634, "learning_rate": 1.0117176283085419e-05, "loss": 0.3558, "step": 8490 }, { "epoch": 3.9622201492537314, "grad_norm": 0.29459515152753013, "learning_rate": 1.009516214311289e-05, "loss": 0.3657, "step": 8495 }, { "epoch": 3.96455223880597, "grad_norm": 0.2912167756061136, "learning_rate": 1.0073189410054742e-05, "loss": 0.3535, "step": 8500 }, { "epoch": 3.966884328358209, "grad_norm": 0.2928723709535208, "learning_rate": 1.0051258136185132e-05, "loss": 0.3513, "step": 8505 }, { "epoch": 3.969216417910448, "grad_norm": 0.2951023915723867, "learning_rate": 1.0029368373679583e-05, "loss": 0.3629, "step": 8510 }, { "epoch": 3.9715485074626864, "grad_norm": 0.2805828728418148, "learning_rate": 1.0007520174614836e-05, "loss": 0.3455, "step": 8515 }, { "epoch": 3.9738805970149254, "grad_norm": 0.28900705523090153, "learning_rate": 9.98571359096878e-06, "loss": 0.3507, "step": 8520 }, { "epoch": 3.9762126865671643, "grad_norm": 0.29555417977639625, "learning_rate": 9.96394867462028e-06, "loss": 0.3674, "step": 8525 }, { "epoch": 3.978544776119403, "grad_norm": 0.30331556284055283, "learning_rate": 9.94222547734909e-06, "loss": 0.3464, "step": 8530 }, { "epoch": 3.980876865671642, "grad_norm": 0.29561691167240595, "learning_rate": 9.92054405083569e-06, "loss": 0.3585, "step": 8535 }, { "epoch": 3.9832089552238807, "grad_norm": 0.4256219583039644, "learning_rate": 9.898904446661188e-06, "loss": 0.3565, "step": 8540 }, { "epoch": 3.9855410447761193, "grad_norm": 0.27799642447777484, "learning_rate": 9.87730671630722e-06, "loss": 0.3493, "step": 8545 }, { "epoch": 3.987873134328358, "grad_norm": 0.28989803469079184, "learning_rate": 9.855750911155784e-06, "loss": 0.3556, "step": 8550 }, { "epoch": 3.990205223880597, "grad_norm": 0.2961064358107011, "learning_rate": 9.834237082489126e-06, "loss": 0.3494, "step": 8555 }, { "epoch": 3.9925373134328357, "grad_norm": 0.31032202853602864, "learning_rate": 9.812765281489655e-06, "loss": 0.3673, "step": 8560 }, { "epoch": 3.9948694029850746, "grad_norm": 0.2992096961247, "learning_rate": 9.79133555923976e-06, "loss": 0.3532, "step": 8565 }, { "epoch": 3.9972014925373136, "grad_norm": 0.2928695992537825, "learning_rate": 9.76994796672176e-06, "loss": 0.359, "step": 8570 }, { "epoch": 3.999533582089552, "grad_norm": 0.28695245819173015, "learning_rate": 9.748602554817721e-06, "loss": 0.3651, "step": 8575 }, { "epoch": 4.001865671641791, "grad_norm": 0.29002450461091217, "learning_rate": 9.72729937430936e-06, "loss": 0.3134, "step": 8580 }, { "epoch": 4.00419776119403, "grad_norm": 0.33626284850202026, "learning_rate": 9.706038475877938e-06, "loss": 0.3087, "step": 8585 }, { "epoch": 4.0065298507462686, "grad_norm": 0.34050524388004166, "learning_rate": 9.6848199101041e-06, "loss": 0.3162, "step": 8590 }, { "epoch": 4.008861940298507, "grad_norm": 0.3146727499335174, "learning_rate": 9.66364372746781e-06, "loss": 0.3055, "step": 8595 }, { "epoch": 4.0111940298507465, "grad_norm": 0.29461586110361526, "learning_rate": 9.64250997834819e-06, "loss": 0.3148, "step": 8600 }, { "epoch": 4.013526119402985, "grad_norm": 0.33635544785300936, "learning_rate": 9.621418713023389e-06, "loss": 0.3089, "step": 8605 }, { "epoch": 4.0158582089552235, "grad_norm": 0.29728720805342124, "learning_rate": 9.60036998167052e-06, "loss": 0.2927, "step": 8610 }, { "epoch": 4.018190298507463, "grad_norm": 0.3051156147123375, "learning_rate": 9.579363834365484e-06, "loss": 0.3125, "step": 8615 }, { "epoch": 4.020522388059701, "grad_norm": 0.3025349904353988, "learning_rate": 9.558400321082863e-06, "loss": 0.3133, "step": 8620 }, { "epoch": 4.02285447761194, "grad_norm": 0.31994426906710727, "learning_rate": 9.537479491695845e-06, "loss": 0.3211, "step": 8625 }, { "epoch": 4.025186567164179, "grad_norm": 0.3297641628965963, "learning_rate": 9.516601395976038e-06, "loss": 0.3166, "step": 8630 }, { "epoch": 4.027518656716418, "grad_norm": 0.31549249214451097, "learning_rate": 9.495766083593407e-06, "loss": 0.3045, "step": 8635 }, { "epoch": 4.029850746268656, "grad_norm": 0.32345858084193363, "learning_rate": 9.474973604116112e-06, "loss": 0.311, "step": 8640 }, { "epoch": 4.032182835820896, "grad_norm": 0.3223613214915529, "learning_rate": 9.454224007010428e-06, "loss": 0.3128, "step": 8645 }, { "epoch": 4.034514925373134, "grad_norm": 0.3049220003183433, "learning_rate": 9.433517341640621e-06, "loss": 0.3051, "step": 8650 }, { "epoch": 4.036847014925373, "grad_norm": 0.31637606367045745, "learning_rate": 9.41285365726878e-06, "loss": 0.3119, "step": 8655 }, { "epoch": 4.039179104477612, "grad_norm": 0.30549836009291187, "learning_rate": 9.39223300305479e-06, "loss": 0.3189, "step": 8660 }, { "epoch": 4.041511194029851, "grad_norm": 0.3035498327748312, "learning_rate": 9.371655428056122e-06, "loss": 0.3148, "step": 8665 }, { "epoch": 4.043843283582089, "grad_norm": 0.3154182794432834, "learning_rate": 9.351120981227788e-06, "loss": 0.3143, "step": 8670 }, { "epoch": 4.046175373134329, "grad_norm": 0.31414646524222867, "learning_rate": 9.330629711422196e-06, "loss": 0.3106, "step": 8675 }, { "epoch": 4.048507462686567, "grad_norm": 0.3158450908221077, "learning_rate": 9.310181667389003e-06, "loss": 0.3221, "step": 8680 }, { "epoch": 4.050839552238806, "grad_norm": 0.2993836297546226, "learning_rate": 9.289776897775074e-06, "loss": 0.3097, "step": 8685 }, { "epoch": 4.053171641791045, "grad_norm": 0.309791509140291, "learning_rate": 9.269415451124283e-06, "loss": 0.3115, "step": 8690 }, { "epoch": 4.055503731343284, "grad_norm": 0.30600571097508633, "learning_rate": 9.249097375877458e-06, "loss": 0.3031, "step": 8695 }, { "epoch": 4.057835820895522, "grad_norm": 0.32116266560398526, "learning_rate": 9.22882272037225e-06, "loss": 0.3125, "step": 8700 }, { "epoch": 4.0601679104477615, "grad_norm": 0.3173273936289824, "learning_rate": 9.208591532842995e-06, "loss": 0.3166, "step": 8705 }, { "epoch": 4.0625, "grad_norm": 0.3253453927734838, "learning_rate": 9.188403861420615e-06, "loss": 0.3212, "step": 8710 }, { "epoch": 4.0648320895522385, "grad_norm": 0.3290519566013205, "learning_rate": 9.16825975413253e-06, "loss": 0.3203, "step": 8715 }, { "epoch": 4.067164179104478, "grad_norm": 0.299674337060201, "learning_rate": 9.148159258902488e-06, "loss": 0.3176, "step": 8720 }, { "epoch": 4.069496268656716, "grad_norm": 0.30987669068176316, "learning_rate": 9.128102423550511e-06, "loss": 0.3067, "step": 8725 }, { "epoch": 4.071828358208955, "grad_norm": 0.3154394089584995, "learning_rate": 9.108089295792726e-06, "loss": 0.3226, "step": 8730 }, { "epoch": 4.074160447761194, "grad_norm": 0.3169870831754904, "learning_rate": 9.088119923241295e-06, "loss": 0.3149, "step": 8735 }, { "epoch": 4.076492537313433, "grad_norm": 0.324622958513319, "learning_rate": 9.068194353404288e-06, "loss": 0.3204, "step": 8740 }, { "epoch": 4.078824626865671, "grad_norm": 0.3057669940921324, "learning_rate": 9.04831263368554e-06, "loss": 0.3083, "step": 8745 }, { "epoch": 4.081156716417911, "grad_norm": 0.30832034501071065, "learning_rate": 9.028474811384597e-06, "loss": 0.3126, "step": 8750 }, { "epoch": 4.083488805970149, "grad_norm": 0.3346293050669514, "learning_rate": 9.008680933696545e-06, "loss": 0.3212, "step": 8755 }, { "epoch": 4.085820895522388, "grad_norm": 0.3238456372596288, "learning_rate": 8.98893104771194e-06, "loss": 0.3212, "step": 8760 }, { "epoch": 4.088152985074627, "grad_norm": 0.3194037973567986, "learning_rate": 8.969225200416678e-06, "loss": 0.3176, "step": 8765 }, { "epoch": 4.090485074626866, "grad_norm": 0.3115448794044634, "learning_rate": 8.94956343869187e-06, "loss": 0.32, "step": 8770 }, { "epoch": 4.092817164179104, "grad_norm": 0.3312404819956108, "learning_rate": 8.929945809313773e-06, "loss": 0.2992, "step": 8775 }, { "epoch": 4.095149253731344, "grad_norm": 0.3094918619869502, "learning_rate": 8.910372358953614e-06, "loss": 0.3025, "step": 8780 }, { "epoch": 4.097481343283582, "grad_norm": 0.3149685554142559, "learning_rate": 8.890843134177555e-06, "loss": 0.3094, "step": 8785 }, { "epoch": 4.099813432835821, "grad_norm": 0.30234642848369625, "learning_rate": 8.871358181446519e-06, "loss": 0.3063, "step": 8790 }, { "epoch": 4.10214552238806, "grad_norm": 0.3047195786959947, "learning_rate": 8.851917547116111e-06, "loss": 0.3065, "step": 8795 }, { "epoch": 4.104477611940299, "grad_norm": 0.30817607727839985, "learning_rate": 8.83252127743649e-06, "loss": 0.3177, "step": 8800 }, { "epoch": 4.106809701492537, "grad_norm": 0.32389403040056725, "learning_rate": 8.813169418552294e-06, "loss": 0.3192, "step": 8805 }, { "epoch": 4.1091417910447765, "grad_norm": 0.309655071791805, "learning_rate": 8.793862016502477e-06, "loss": 0.3135, "step": 8810 }, { "epoch": 4.111473880597015, "grad_norm": 0.31769604613286423, "learning_rate": 8.774599117220254e-06, "loss": 0.3162, "step": 8815 }, { "epoch": 4.1138059701492535, "grad_norm": 0.3137267870937617, "learning_rate": 8.755380766532945e-06, "loss": 0.3136, "step": 8820 }, { "epoch": 4.116138059701493, "grad_norm": 0.3286052794870223, "learning_rate": 8.736207010161899e-06, "loss": 0.3171, "step": 8825 }, { "epoch": 4.1184701492537314, "grad_norm": 0.32135047949417067, "learning_rate": 8.71707789372236e-06, "loss": 0.3081, "step": 8830 }, { "epoch": 4.12080223880597, "grad_norm": 0.30895730625647005, "learning_rate": 8.697993462723392e-06, "loss": 0.3183, "step": 8835 }, { "epoch": 4.123134328358209, "grad_norm": 0.3153339838923634, "learning_rate": 8.678953762567739e-06, "loss": 0.3059, "step": 8840 }, { "epoch": 4.125466417910448, "grad_norm": 0.31140445503712555, "learning_rate": 8.659958838551722e-06, "loss": 0.3059, "step": 8845 }, { "epoch": 4.127798507462686, "grad_norm": 0.3208038052118783, "learning_rate": 8.641008735865153e-06, "loss": 0.3172, "step": 8850 }, { "epoch": 4.130130597014926, "grad_norm": 0.31182779546510214, "learning_rate": 8.62210349959119e-06, "loss": 0.3096, "step": 8855 }, { "epoch": 4.132462686567164, "grad_norm": 0.30823897230124725, "learning_rate": 8.60324317470627e-06, "loss": 0.3162, "step": 8860 }, { "epoch": 4.134794776119403, "grad_norm": 0.30549449535725226, "learning_rate": 8.584427806079988e-06, "loss": 0.3105, "step": 8865 }, { "epoch": 4.137126865671641, "grad_norm": 0.3276870411189968, "learning_rate": 8.565657438474963e-06, "loss": 0.317, "step": 8870 }, { "epoch": 4.139458955223881, "grad_norm": 0.3255550760285615, "learning_rate": 8.546932116546775e-06, "loss": 0.3112, "step": 8875 }, { "epoch": 4.141791044776119, "grad_norm": 0.3211721281662896, "learning_rate": 8.528251884843829e-06, "loss": 0.3082, "step": 8880 }, { "epoch": 4.144123134328359, "grad_norm": 0.3084207444044092, "learning_rate": 8.509616787807263e-06, "loss": 0.3209, "step": 8885 }, { "epoch": 4.146455223880597, "grad_norm": 0.3153955686042942, "learning_rate": 8.491026869770832e-06, "loss": 0.3165, "step": 8890 }, { "epoch": 4.148787313432836, "grad_norm": 0.317890751027894, "learning_rate": 8.472482174960808e-06, "loss": 0.3141, "step": 8895 }, { "epoch": 4.151119402985074, "grad_norm": 0.3099177527455459, "learning_rate": 8.453982747495881e-06, "loss": 0.3094, "step": 8900 }, { "epoch": 4.153451492537314, "grad_norm": 0.3101414685406431, "learning_rate": 8.435528631387052e-06, "loss": 0.3152, "step": 8905 }, { "epoch": 4.155783582089552, "grad_norm": 0.3105806744254732, "learning_rate": 8.417119870537503e-06, "loss": 0.3222, "step": 8910 }, { "epoch": 4.158115671641791, "grad_norm": 0.3158640114460028, "learning_rate": 8.398756508742536e-06, "loss": 0.3023, "step": 8915 }, { "epoch": 4.16044776119403, "grad_norm": 0.3072726624126907, "learning_rate": 8.380438589689438e-06, "loss": 0.3122, "step": 8920 }, { "epoch": 4.1627798507462686, "grad_norm": 0.3232308284724611, "learning_rate": 8.36216615695738e-06, "loss": 0.3179, "step": 8925 }, { "epoch": 4.165111940298507, "grad_norm": 0.3218974867633839, "learning_rate": 8.343939254017336e-06, "loss": 0.3173, "step": 8930 }, { "epoch": 4.1674440298507465, "grad_norm": 0.31838158374627895, "learning_rate": 8.325757924231938e-06, "loss": 0.3136, "step": 8935 }, { "epoch": 4.169776119402985, "grad_norm": 0.32198349049591396, "learning_rate": 8.307622210855425e-06, "loss": 0.3014, "step": 8940 }, { "epoch": 4.1721082089552235, "grad_norm": 0.3224937724074064, "learning_rate": 8.289532157033481e-06, "loss": 0.3221, "step": 8945 }, { "epoch": 4.174440298507463, "grad_norm": 0.31021544495813436, "learning_rate": 8.271487805803193e-06, "loss": 0.3022, "step": 8950 }, { "epoch": 4.176772388059701, "grad_norm": 0.30814214259571354, "learning_rate": 8.253489200092912e-06, "loss": 0.303, "step": 8955 }, { "epoch": 4.17910447761194, "grad_norm": 0.29981152368282815, "learning_rate": 8.235536382722133e-06, "loss": 0.3063, "step": 8960 }, { "epoch": 4.181436567164179, "grad_norm": 0.3103627312429476, "learning_rate": 8.217629396401465e-06, "loss": 0.3204, "step": 8965 }, { "epoch": 4.183768656716418, "grad_norm": 0.32366246552113753, "learning_rate": 8.199768283732432e-06, "loss": 0.3072, "step": 8970 }, { "epoch": 4.186100746268656, "grad_norm": 0.31436166041436625, "learning_rate": 8.181953087207467e-06, "loss": 0.311, "step": 8975 }, { "epoch": 4.188432835820896, "grad_norm": 0.3152166006644819, "learning_rate": 8.164183849209741e-06, "loss": 0.3016, "step": 8980 }, { "epoch": 4.190764925373134, "grad_norm": 0.31943270360697545, "learning_rate": 8.146460612013083e-06, "loss": 0.3173, "step": 8985 }, { "epoch": 4.193097014925373, "grad_norm": 0.31439371675822525, "learning_rate": 8.128783417781909e-06, "loss": 0.3127, "step": 8990 }, { "epoch": 4.195429104477612, "grad_norm": 0.30843036771253723, "learning_rate": 8.111152308571065e-06, "loss": 0.3109, "step": 8995 }, { "epoch": 4.197761194029851, "grad_norm": 0.3040692895732532, "learning_rate": 8.09356732632579e-06, "loss": 0.3129, "step": 9000 }, { "epoch": 4.200093283582089, "grad_norm": 0.3063649124775844, "learning_rate": 8.07602851288157e-06, "loss": 0.3038, "step": 9005 }, { "epoch": 4.202425373134329, "grad_norm": 0.3173369384118131, "learning_rate": 8.058535909964041e-06, "loss": 0.3141, "step": 9010 }, { "epoch": 4.204757462686567, "grad_norm": 0.31489353483827565, "learning_rate": 8.041089559188929e-06, "loss": 0.3065, "step": 9015 }, { "epoch": 4.207089552238806, "grad_norm": 0.3044616332675753, "learning_rate": 8.023689502061897e-06, "loss": 0.2985, "step": 9020 }, { "epoch": 4.209421641791045, "grad_norm": 0.3101325979565472, "learning_rate": 8.006335779978494e-06, "loss": 0.3094, "step": 9025 }, { "epoch": 4.211753731343284, "grad_norm": 0.31224166301645534, "learning_rate": 7.989028434224028e-06, "loss": 0.3126, "step": 9030 }, { "epoch": 4.214085820895522, "grad_norm": 0.32158220579616065, "learning_rate": 7.971767505973468e-06, "loss": 0.3163, "step": 9035 }, { "epoch": 4.2164179104477615, "grad_norm": 0.3133654915101628, "learning_rate": 7.95455303629137e-06, "loss": 0.3166, "step": 9040 }, { "epoch": 4.21875, "grad_norm": 0.32602493914807396, "learning_rate": 7.937385066131745e-06, "loss": 0.3224, "step": 9045 }, { "epoch": 4.2210820895522385, "grad_norm": 0.3150913343853468, "learning_rate": 7.920263636337994e-06, "loss": 0.309, "step": 9050 }, { "epoch": 4.223414179104478, "grad_norm": 0.3253696674418175, "learning_rate": 7.90318878764279e-06, "loss": 0.3159, "step": 9055 }, { "epoch": 4.225746268656716, "grad_norm": 0.31969486028013766, "learning_rate": 7.886160560667984e-06, "loss": 0.3189, "step": 9060 }, { "epoch": 4.228078358208955, "grad_norm": 0.30487204490459674, "learning_rate": 7.869178995924525e-06, "loss": 0.3138, "step": 9065 }, { "epoch": 4.230410447761194, "grad_norm": 0.3228721669434475, "learning_rate": 7.852244133812332e-06, "loss": 0.3171, "step": 9070 }, { "epoch": 4.232742537313433, "grad_norm": 0.32390417755974726, "learning_rate": 7.83535601462022e-06, "loss": 0.3159, "step": 9075 }, { "epoch": 4.235074626865671, "grad_norm": 0.30918725164534683, "learning_rate": 7.818514678525822e-06, "loss": 0.3187, "step": 9080 }, { "epoch": 4.237406716417911, "grad_norm": 0.30862873142342656, "learning_rate": 7.80172016559544e-06, "loss": 0.3148, "step": 9085 }, { "epoch": 4.239738805970149, "grad_norm": 0.3247762515037869, "learning_rate": 7.784972515784004e-06, "loss": 0.304, "step": 9090 }, { "epoch": 4.242070895522388, "grad_norm": 0.3144946482458086, "learning_rate": 7.768271768934955e-06, "loss": 0.3156, "step": 9095 }, { "epoch": 4.244402985074627, "grad_norm": 0.30641586399500176, "learning_rate": 7.751617964780131e-06, "loss": 0.3208, "step": 9100 }, { "epoch": 4.246735074626866, "grad_norm": 0.3281896040357453, "learning_rate": 7.73501114293971e-06, "loss": 0.319, "step": 9105 }, { "epoch": 4.249067164179104, "grad_norm": 0.3067166567247068, "learning_rate": 7.71845134292208e-06, "loss": 0.3173, "step": 9110 }, { "epoch": 4.251399253731344, "grad_norm": 0.321386958303076, "learning_rate": 7.70193860412378e-06, "loss": 0.3189, "step": 9115 }, { "epoch": 4.253731343283582, "grad_norm": 0.31565098123213453, "learning_rate": 7.68547296582938e-06, "loss": 0.3144, "step": 9120 }, { "epoch": 4.256063432835821, "grad_norm": 0.30297565593361153, "learning_rate": 7.669054467211388e-06, "loss": 0.3076, "step": 9125 }, { "epoch": 4.25839552238806, "grad_norm": 0.3237276061761198, "learning_rate": 7.652683147330177e-06, "loss": 0.3183, "step": 9130 }, { "epoch": 4.260727611940299, "grad_norm": 0.30441683085101673, "learning_rate": 7.636359045133873e-06, "loss": 0.314, "step": 9135 }, { "epoch": 4.263059701492537, "grad_norm": 0.32362249302808843, "learning_rate": 7.620082199458269e-06, "loss": 0.3141, "step": 9140 }, { "epoch": 4.2653917910447765, "grad_norm": 0.3223196171135628, "learning_rate": 7.603852649026738e-06, "loss": 0.3143, "step": 9145 }, { "epoch": 4.267723880597015, "grad_norm": 0.32189305748704267, "learning_rate": 7.587670432450131e-06, "loss": 0.3098, "step": 9150 }, { "epoch": 4.2700559701492535, "grad_norm": 0.3312261323152844, "learning_rate": 7.5715355882266815e-06, "loss": 0.3189, "step": 9155 }, { "epoch": 4.272388059701493, "grad_norm": 0.32889092078293924, "learning_rate": 7.5554481547419395e-06, "loss": 0.3154, "step": 9160 }, { "epoch": 4.2747201492537314, "grad_norm": 0.3199672441575963, "learning_rate": 7.539408170268644e-06, "loss": 0.3231, "step": 9165 }, { "epoch": 4.27705223880597, "grad_norm": 0.32067370578240506, "learning_rate": 7.523415672966675e-06, "loss": 0.3017, "step": 9170 }, { "epoch": 4.279384328358209, "grad_norm": 0.3119390948689613, "learning_rate": 7.507470700882905e-06, "loss": 0.3101, "step": 9175 }, { "epoch": 4.281716417910448, "grad_norm": 0.3023053467260447, "learning_rate": 7.491573291951176e-06, "loss": 0.3001, "step": 9180 }, { "epoch": 4.284048507462686, "grad_norm": 0.322710819840888, "learning_rate": 7.475723483992149e-06, "loss": 0.3267, "step": 9185 }, { "epoch": 4.286380597014926, "grad_norm": 0.32556580558679654, "learning_rate": 7.459921314713253e-06, "loss": 0.3167, "step": 9190 }, { "epoch": 4.288712686567164, "grad_norm": 0.30383904854616484, "learning_rate": 7.444166821708584e-06, "loss": 0.3056, "step": 9195 }, { "epoch": 4.291044776119403, "grad_norm": 0.31564422134400894, "learning_rate": 7.4284600424588045e-06, "loss": 0.3137, "step": 9200 }, { "epoch": 4.293376865671641, "grad_norm": 0.31421931846934953, "learning_rate": 7.412801014331075e-06, "loss": 0.3041, "step": 9205 }, { "epoch": 4.295708955223881, "grad_norm": 0.3066546144311205, "learning_rate": 7.397189774578939e-06, "loss": 0.3099, "step": 9210 }, { "epoch": 4.298041044776119, "grad_norm": 0.31187213501582517, "learning_rate": 7.38162636034226e-06, "loss": 0.311, "step": 9215 }, { "epoch": 4.300373134328359, "grad_norm": 0.31474221882403147, "learning_rate": 7.366110808647128e-06, "loss": 0.3216, "step": 9220 }, { "epoch": 4.302705223880597, "grad_norm": 0.31248321219341946, "learning_rate": 7.350643156405751e-06, "loss": 0.3067, "step": 9225 }, { "epoch": 4.305037313432836, "grad_norm": 0.32415173346840953, "learning_rate": 7.335223440416391e-06, "loss": 0.3077, "step": 9230 }, { "epoch": 4.307369402985074, "grad_norm": 0.3179933635843986, "learning_rate": 7.319851697363271e-06, "loss": 0.3106, "step": 9235 }, { "epoch": 4.309701492537314, "grad_norm": 0.31425355077599126, "learning_rate": 7.304527963816472e-06, "loss": 0.3047, "step": 9240 }, { "epoch": 4.312033582089552, "grad_norm": 0.3182936582995094, "learning_rate": 7.289252276231863e-06, "loss": 0.3086, "step": 9245 }, { "epoch": 4.314365671641791, "grad_norm": 0.3273698813295788, "learning_rate": 7.27402467095102e-06, "loss": 0.3143, "step": 9250 }, { "epoch": 4.31669776119403, "grad_norm": 0.3261639824380815, "learning_rate": 7.258845184201111e-06, "loss": 0.3236, "step": 9255 }, { "epoch": 4.3190298507462686, "grad_norm": 0.3032145553864473, "learning_rate": 7.243713852094848e-06, "loss": 0.3102, "step": 9260 }, { "epoch": 4.321361940298507, "grad_norm": 0.31806544717252216, "learning_rate": 7.228630710630356e-06, "loss": 0.3179, "step": 9265 }, { "epoch": 4.3236940298507465, "grad_norm": 0.3091395304455638, "learning_rate": 7.21359579569114e-06, "loss": 0.3082, "step": 9270 }, { "epoch": 4.326026119402985, "grad_norm": 0.31677198074475166, "learning_rate": 7.198609143045948e-06, "loss": 0.3137, "step": 9275 }, { "epoch": 4.3283582089552235, "grad_norm": 0.3013078297988274, "learning_rate": 7.183670788348726e-06, "loss": 0.304, "step": 9280 }, { "epoch": 4.330690298507463, "grad_norm": 0.3148810507249331, "learning_rate": 7.168780767138512e-06, "loss": 0.2966, "step": 9285 }, { "epoch": 4.333022388059701, "grad_norm": 0.3170505193000523, "learning_rate": 7.1539391148393474e-06, "loss": 0.3123, "step": 9290 }, { "epoch": 4.33535447761194, "grad_norm": 0.3344234874322577, "learning_rate": 7.139145866760217e-06, "loss": 0.3292, "step": 9295 }, { "epoch": 4.337686567164179, "grad_norm": 0.32160923845280837, "learning_rate": 7.124401058094938e-06, "loss": 0.3144, "step": 9300 }, { "epoch": 4.340018656716418, "grad_norm": 0.32558531430592386, "learning_rate": 7.109704723922094e-06, "loss": 0.3161, "step": 9305 }, { "epoch": 4.342350746268656, "grad_norm": 0.30215865961176647, "learning_rate": 7.0950568992049494e-06, "loss": 0.3156, "step": 9310 }, { "epoch": 4.344682835820896, "grad_norm": 0.312793956612791, "learning_rate": 7.080457618791344e-06, "loss": 0.3154, "step": 9315 }, { "epoch": 4.347014925373134, "grad_norm": 0.2900081506291846, "learning_rate": 7.0659069174136544e-06, "loss": 0.3086, "step": 9320 }, { "epoch": 4.349347014925373, "grad_norm": 0.3147125363762862, "learning_rate": 7.051404829688663e-06, "loss": 0.3065, "step": 9325 }, { "epoch": 4.351679104477612, "grad_norm": 0.31378330342686284, "learning_rate": 7.036951390117512e-06, "loss": 0.3183, "step": 9330 }, { "epoch": 4.354011194029851, "grad_norm": 0.3146955699979967, "learning_rate": 7.022546633085604e-06, "loss": 0.3086, "step": 9335 }, { "epoch": 4.356343283582089, "grad_norm": 0.3102439076702596, "learning_rate": 7.008190592862514e-06, "loss": 0.3159, "step": 9340 }, { "epoch": 4.358675373134329, "grad_norm": 0.3083100730334388, "learning_rate": 6.9938833036019365e-06, "loss": 0.3118, "step": 9345 }, { "epoch": 4.361007462686567, "grad_norm": 0.33345960798746554, "learning_rate": 6.979624799341565e-06, "loss": 0.3169, "step": 9350 }, { "epoch": 4.363339552238806, "grad_norm": 0.31031671437953723, "learning_rate": 6.965415114003046e-06, "loss": 0.3172, "step": 9355 }, { "epoch": 4.365671641791045, "grad_norm": 0.31122494037803433, "learning_rate": 6.951254281391881e-06, "loss": 0.31, "step": 9360 }, { "epoch": 4.368003731343284, "grad_norm": 0.3310842319371813, "learning_rate": 6.937142335197338e-06, "loss": 0.3163, "step": 9365 }, { "epoch": 4.370335820895522, "grad_norm": 0.3266247532483072, "learning_rate": 6.9230793089924005e-06, "loss": 0.3081, "step": 9370 }, { "epoch": 4.3726679104477615, "grad_norm": 0.3160049405098009, "learning_rate": 6.909065236233644e-06, "loss": 0.3142, "step": 9375 }, { "epoch": 4.375, "grad_norm": 0.31751455260183614, "learning_rate": 6.8951001502612065e-06, "loss": 0.3054, "step": 9380 }, { "epoch": 4.3773320895522385, "grad_norm": 0.3169421254859183, "learning_rate": 6.881184084298675e-06, "loss": 0.3256, "step": 9385 }, { "epoch": 4.379664179104478, "grad_norm": 0.3116499988090621, "learning_rate": 6.867317071453007e-06, "loss": 0.3047, "step": 9390 }, { "epoch": 4.381996268656716, "grad_norm": 0.3027211097393815, "learning_rate": 6.8534991447144706e-06, "loss": 0.2999, "step": 9395 }, { "epoch": 4.384328358208955, "grad_norm": 0.30799580833483214, "learning_rate": 6.839730336956554e-06, "loss": 0.3026, "step": 9400 }, { "epoch": 4.386660447761194, "grad_norm": 0.3125447210745162, "learning_rate": 6.826010680935886e-06, "loss": 0.3127, "step": 9405 }, { "epoch": 4.388992537313433, "grad_norm": 0.3135573006545542, "learning_rate": 6.812340209292164e-06, "loss": 0.3145, "step": 9410 }, { "epoch": 4.391324626865671, "grad_norm": 0.33046297751458564, "learning_rate": 6.79871895454807e-06, "loss": 0.3232, "step": 9415 }, { "epoch": 4.393656716417911, "grad_norm": 0.31992957018526746, "learning_rate": 6.785146949109206e-06, "loss": 0.3087, "step": 9420 }, { "epoch": 4.395988805970149, "grad_norm": 0.3083363513241552, "learning_rate": 6.771624225263994e-06, "loss": 0.3008, "step": 9425 }, { "epoch": 4.398320895522388, "grad_norm": 0.32115573055455376, "learning_rate": 6.758150815183618e-06, "loss": 0.3211, "step": 9430 }, { "epoch": 4.400652985074627, "grad_norm": 0.3118255348524424, "learning_rate": 6.7447267509219494e-06, "loss": 0.3152, "step": 9435 }, { "epoch": 4.402985074626866, "grad_norm": 0.30976798621989876, "learning_rate": 6.7313520644154555e-06, "loss": 0.3132, "step": 9440 }, { "epoch": 4.405317164179104, "grad_norm": 0.3109715811421837, "learning_rate": 6.718026787483131e-06, "loss": 0.3029, "step": 9445 }, { "epoch": 4.407649253731344, "grad_norm": 0.31069446660274447, "learning_rate": 6.704750951826438e-06, "loss": 0.3184, "step": 9450 }, { "epoch": 4.409981343283582, "grad_norm": 0.3137778669065622, "learning_rate": 6.691524589029188e-06, "loss": 0.3183, "step": 9455 }, { "epoch": 4.412313432835821, "grad_norm": 0.3120167533097256, "learning_rate": 6.6783477305575215e-06, "loss": 0.3161, "step": 9460 }, { "epoch": 4.41464552238806, "grad_norm": 0.31485240472068454, "learning_rate": 6.665220407759788e-06, "loss": 0.3189, "step": 9465 }, { "epoch": 4.416977611940299, "grad_norm": 0.31193107546933735, "learning_rate": 6.652142651866497e-06, "loss": 0.3063, "step": 9470 }, { "epoch": 4.419309701492537, "grad_norm": 0.3040665162851122, "learning_rate": 6.639114493990238e-06, "loss": 0.3114, "step": 9475 }, { "epoch": 4.4216417910447765, "grad_norm": 0.31650079759954197, "learning_rate": 6.626135965125597e-06, "loss": 0.3078, "step": 9480 }, { "epoch": 4.423973880597015, "grad_norm": 0.32424038608897016, "learning_rate": 6.613207096149099e-06, "loss": 0.313, "step": 9485 }, { "epoch": 4.4263059701492535, "grad_norm": 0.30214938721311874, "learning_rate": 6.600327917819114e-06, "loss": 0.3083, "step": 9490 }, { "epoch": 4.428638059701493, "grad_norm": 0.3172182664051574, "learning_rate": 6.587498460775811e-06, "loss": 0.3088, "step": 9495 }, { "epoch": 4.4309701492537314, "grad_norm": 0.32712447336488193, "learning_rate": 6.574718755541061e-06, "loss": 0.3229, "step": 9500 }, { "epoch": 4.43330223880597, "grad_norm": 0.3004275740872496, "learning_rate": 6.561988832518367e-06, "loss": 0.309, "step": 9505 }, { "epoch": 4.435634328358209, "grad_norm": 0.32180864365797557, "learning_rate": 6.5493087219928114e-06, "loss": 0.3256, "step": 9510 }, { "epoch": 4.437966417910448, "grad_norm": 0.32083923931934416, "learning_rate": 6.536678454130965e-06, "loss": 0.3117, "step": 9515 }, { "epoch": 4.440298507462686, "grad_norm": 0.31086398229118484, "learning_rate": 6.52409805898081e-06, "loss": 0.3108, "step": 9520 }, { "epoch": 4.442630597014926, "grad_norm": 0.3157325366555264, "learning_rate": 6.511567566471697e-06, "loss": 0.3215, "step": 9525 }, { "epoch": 4.444962686567164, "grad_norm": 0.3128950545238299, "learning_rate": 6.499087006414245e-06, "loss": 0.3134, "step": 9530 }, { "epoch": 4.447294776119403, "grad_norm": 0.3120100270556806, "learning_rate": 6.4866564085002826e-06, "loss": 0.3075, "step": 9535 }, { "epoch": 4.449626865671641, "grad_norm": 0.33091833388470304, "learning_rate": 6.474275802302776e-06, "loss": 0.3239, "step": 9540 }, { "epoch": 4.451958955223881, "grad_norm": 0.31938910425740324, "learning_rate": 6.461945217275761e-06, "loss": 0.3134, "step": 9545 }, { "epoch": 4.454291044776119, "grad_norm": 0.32532022254048837, "learning_rate": 6.449664682754278e-06, "loss": 0.3039, "step": 9550 }, { "epoch": 4.456623134328359, "grad_norm": 0.3055026466031164, "learning_rate": 6.4374342279542726e-06, "loss": 0.3204, "step": 9555 }, { "epoch": 4.458955223880597, "grad_norm": 0.2958013163739507, "learning_rate": 6.425253881972573e-06, "loss": 0.302, "step": 9560 }, { "epoch": 4.461287313432836, "grad_norm": 0.3112662258913266, "learning_rate": 6.4131236737867795e-06, "loss": 0.3092, "step": 9565 }, { "epoch": 4.463619402985074, "grad_norm": 0.30246537004372054, "learning_rate": 6.4010436322552204e-06, "loss": 0.3045, "step": 9570 }, { "epoch": 4.465951492537314, "grad_norm": 0.3179609148442766, "learning_rate": 6.389013786116878e-06, "loss": 0.3102, "step": 9575 }, { "epoch": 4.468283582089552, "grad_norm": 0.3098076847750162, "learning_rate": 6.377034163991308e-06, "loss": 0.3097, "step": 9580 }, { "epoch": 4.470615671641791, "grad_norm": 0.3063427984939342, "learning_rate": 6.365104794378582e-06, "loss": 0.3057, "step": 9585 }, { "epoch": 4.47294776119403, "grad_norm": 0.31456699898495105, "learning_rate": 6.353225705659234e-06, "loss": 0.3113, "step": 9590 }, { "epoch": 4.4752798507462686, "grad_norm": 0.32116179349119583, "learning_rate": 6.341396926094155e-06, "loss": 0.3196, "step": 9595 }, { "epoch": 4.477611940298507, "grad_norm": 0.3134511044698331, "learning_rate": 6.329618483824559e-06, "loss": 0.3265, "step": 9600 }, { "epoch": 4.4799440298507465, "grad_norm": 0.32916329518341536, "learning_rate": 6.317890406871914e-06, "loss": 0.3249, "step": 9605 }, { "epoch": 4.482276119402985, "grad_norm": 0.32035564956686297, "learning_rate": 6.306212723137846e-06, "loss": 0.3161, "step": 9610 }, { "epoch": 4.4846082089552235, "grad_norm": 0.32161224071198696, "learning_rate": 6.2945854604041135e-06, "loss": 0.3189, "step": 9615 }, { "epoch": 4.486940298507463, "grad_norm": 0.31313425253710436, "learning_rate": 6.283008646332507e-06, "loss": 0.3142, "step": 9620 }, { "epoch": 4.489272388059701, "grad_norm": 0.30960005742676017, "learning_rate": 6.271482308464807e-06, "loss": 0.3072, "step": 9625 }, { "epoch": 4.49160447761194, "grad_norm": 0.32234775156311685, "learning_rate": 6.2600064742227e-06, "loss": 0.3177, "step": 9630 }, { "epoch": 4.493936567164179, "grad_norm": 0.3096093420310791, "learning_rate": 6.248581170907729e-06, "loss": 0.3145, "step": 9635 }, { "epoch": 4.496268656716418, "grad_norm": 0.32484903863918807, "learning_rate": 6.237206425701223e-06, "loss": 0.3201, "step": 9640 }, { "epoch": 4.498600746268656, "grad_norm": 0.32321957314753663, "learning_rate": 6.225882265664218e-06, "loss": 0.3209, "step": 9645 }, { "epoch": 4.500932835820896, "grad_norm": 0.3255971314401222, "learning_rate": 6.214608717737426e-06, "loss": 0.3218, "step": 9650 }, { "epoch": 4.503264925373134, "grad_norm": 0.2923830837157011, "learning_rate": 6.2033858087411275e-06, "loss": 0.2978, "step": 9655 }, { "epoch": 4.505597014925373, "grad_norm": 0.3171272853430279, "learning_rate": 6.192213565375147e-06, "loss": 0.3149, "step": 9660 }, { "epoch": 4.507929104477612, "grad_norm": 0.31386421682739657, "learning_rate": 6.1810920142187726e-06, "loss": 0.3116, "step": 9665 }, { "epoch": 4.510261194029851, "grad_norm": 0.31564354902715813, "learning_rate": 6.170021181730681e-06, "loss": 0.3247, "step": 9670 }, { "epoch": 4.512593283582089, "grad_norm": 0.32590661703654167, "learning_rate": 6.159001094248904e-06, "loss": 0.3227, "step": 9675 }, { "epoch": 4.514925373134329, "grad_norm": 0.3174014912807721, "learning_rate": 6.1480317779907285e-06, "loss": 0.3109, "step": 9680 }, { "epoch": 4.517257462686567, "grad_norm": 0.3125804785267247, "learning_rate": 6.1371132590526744e-06, "loss": 0.3087, "step": 9685 }, { "epoch": 4.519589552238806, "grad_norm": 0.30608732292736945, "learning_rate": 6.126245563410399e-06, "loss": 0.3172, "step": 9690 }, { "epoch": 4.521921641791045, "grad_norm": 0.30925082314152713, "learning_rate": 6.115428716918657e-06, "loss": 0.316, "step": 9695 }, { "epoch": 4.524253731343284, "grad_norm": 0.31850442442497634, "learning_rate": 6.104662745311222e-06, "loss": 0.3101, "step": 9700 }, { "epoch": 4.526585820895522, "grad_norm": 0.30884949170154735, "learning_rate": 6.093947674200838e-06, "loss": 0.3206, "step": 9705 }, { "epoch": 4.5289179104477615, "grad_norm": 0.3073826112809431, "learning_rate": 6.083283529079157e-06, "loss": 0.3074, "step": 9710 }, { "epoch": 4.53125, "grad_norm": 0.3098958380398544, "learning_rate": 6.072670335316676e-06, "loss": 0.3188, "step": 9715 }, { "epoch": 4.5335820895522385, "grad_norm": 0.311949342572286, "learning_rate": 6.062108118162669e-06, "loss": 0.3107, "step": 9720 }, { "epoch": 4.535914179104478, "grad_norm": 0.3191186998839216, "learning_rate": 6.051596902745143e-06, "loss": 0.3105, "step": 9725 }, { "epoch": 4.538246268656716, "grad_norm": 0.30294658217909026, "learning_rate": 6.0411367140707625e-06, "loss": 0.3081, "step": 9730 }, { "epoch": 4.540578358208955, "grad_norm": 0.30646672169994305, "learning_rate": 6.030727577024802e-06, "loss": 0.3058, "step": 9735 }, { "epoch": 4.542910447761194, "grad_norm": 0.3070710595862018, "learning_rate": 6.020369516371085e-06, "loss": 0.3187, "step": 9740 }, { "epoch": 4.545242537313433, "grad_norm": 0.3233005534553956, "learning_rate": 6.010062556751906e-06, "loss": 0.3257, "step": 9745 }, { "epoch": 4.547574626865671, "grad_norm": 0.31475505910043794, "learning_rate": 5.999806722688007e-06, "loss": 0.3091, "step": 9750 }, { "epoch": 4.549906716417911, "grad_norm": 0.3316235369717215, "learning_rate": 5.98960203857849e-06, "loss": 0.3141, "step": 9755 }, { "epoch": 4.552238805970149, "grad_norm": 0.31520917469036164, "learning_rate": 5.9794485287007696e-06, "loss": 0.3187, "step": 9760 }, { "epoch": 4.554570895522388, "grad_norm": 0.3061968703181662, "learning_rate": 5.9693462172105165e-06, "loss": 0.3207, "step": 9765 }, { "epoch": 4.556902985074627, "grad_norm": 0.3080188083913628, "learning_rate": 5.959295128141596e-06, "loss": 0.3121, "step": 9770 }, { "epoch": 4.559235074626866, "grad_norm": 0.32577751957131945, "learning_rate": 5.949295285406015e-06, "loss": 0.3193, "step": 9775 }, { "epoch": 4.561567164179104, "grad_norm": 0.3137134608643252, "learning_rate": 5.93934671279386e-06, "loss": 0.3162, "step": 9780 }, { "epoch": 4.563899253731344, "grad_norm": 0.3245497285389817, "learning_rate": 5.929449433973249e-06, "loss": 0.3055, "step": 9785 }, { "epoch": 4.566231343283582, "grad_norm": 0.32393984303129214, "learning_rate": 5.919603472490263e-06, "loss": 0.3273, "step": 9790 }, { "epoch": 4.568563432835821, "grad_norm": 0.32853893073697416, "learning_rate": 5.909808851768898e-06, "loss": 0.3043, "step": 9795 }, { "epoch": 4.57089552238806, "grad_norm": 0.31078785437739787, "learning_rate": 5.900065595111014e-06, "loss": 0.3153, "step": 9800 }, { "epoch": 4.573227611940299, "grad_norm": 0.3146152918897088, "learning_rate": 5.890373725696271e-06, "loss": 0.315, "step": 9805 }, { "epoch": 4.575559701492537, "grad_norm": 0.31820728627644107, "learning_rate": 5.880733266582066e-06, "loss": 0.3148, "step": 9810 }, { "epoch": 4.5778917910447765, "grad_norm": 0.3008475950935232, "learning_rate": 5.871144240703507e-06, "loss": 0.3058, "step": 9815 }, { "epoch": 4.580223880597015, "grad_norm": 0.31242150747046804, "learning_rate": 5.8616066708733255e-06, "loss": 0.3095, "step": 9820 }, { "epoch": 4.5825559701492535, "grad_norm": 0.3322775299107112, "learning_rate": 5.852120579781838e-06, "loss": 0.315, "step": 9825 }, { "epoch": 4.584888059701493, "grad_norm": 0.31778235126301185, "learning_rate": 5.8426859899969034e-06, "loss": 0.3267, "step": 9830 }, { "epoch": 4.5872201492537314, "grad_norm": 0.3200236380881401, "learning_rate": 5.833302923963837e-06, "loss": 0.3196, "step": 9835 }, { "epoch": 4.58955223880597, "grad_norm": 0.3296895436181347, "learning_rate": 5.8239714040053936e-06, "loss": 0.324, "step": 9840 }, { "epoch": 4.5918843283582085, "grad_norm": 0.29558216990953023, "learning_rate": 5.814691452321687e-06, "loss": 0.3058, "step": 9845 }, { "epoch": 4.594216417910448, "grad_norm": 0.30976737100102036, "learning_rate": 5.805463090990154e-06, "loss": 0.3182, "step": 9850 }, { "epoch": 4.596548507462686, "grad_norm": 0.307151737940936, "learning_rate": 5.796286341965492e-06, "loss": 0.3089, "step": 9855 }, { "epoch": 4.598880597014926, "grad_norm": 0.323579388155557, "learning_rate": 5.787161227079613e-06, "loss": 0.3189, "step": 9860 }, { "epoch": 4.601212686567164, "grad_norm": 0.32366176436725375, "learning_rate": 5.778087768041589e-06, "loss": 0.3153, "step": 9865 }, { "epoch": 4.603544776119403, "grad_norm": 0.31966823984980225, "learning_rate": 5.769065986437591e-06, "loss": 0.3187, "step": 9870 }, { "epoch": 4.605876865671641, "grad_norm": 0.32935418031501373, "learning_rate": 5.7600959037308626e-06, "loss": 0.3086, "step": 9875 }, { "epoch": 4.608208955223881, "grad_norm": 0.31411529075759415, "learning_rate": 5.7511775412616415e-06, "loss": 0.3163, "step": 9880 }, { "epoch": 4.610541044776119, "grad_norm": 0.30455591380827685, "learning_rate": 5.742310920247127e-06, "loss": 0.3018, "step": 9885 }, { "epoch": 4.612873134328359, "grad_norm": 0.31122927044143517, "learning_rate": 5.733496061781418e-06, "loss": 0.3195, "step": 9890 }, { "epoch": 4.615205223880597, "grad_norm": 0.3245639285418736, "learning_rate": 5.7247329868354705e-06, "loss": 0.3112, "step": 9895 }, { "epoch": 4.617537313432836, "grad_norm": 0.3129893975669735, "learning_rate": 5.716021716257047e-06, "loss": 0.3186, "step": 9900 }, { "epoch": 4.619869402985074, "grad_norm": 0.3133463144824551, "learning_rate": 5.707362270770665e-06, "loss": 0.3091, "step": 9905 }, { "epoch": 4.622201492537314, "grad_norm": 0.3089608905940591, "learning_rate": 5.698754670977544e-06, "loss": 0.3069, "step": 9910 }, { "epoch": 4.624533582089552, "grad_norm": 0.31553464859391045, "learning_rate": 5.690198937355561e-06, "loss": 0.3223, "step": 9915 }, { "epoch": 4.6268656716417915, "grad_norm": 0.3094332341555593, "learning_rate": 5.6816950902592005e-06, "loss": 0.3074, "step": 9920 }, { "epoch": 4.62919776119403, "grad_norm": 0.323929037041858, "learning_rate": 5.673243149919512e-06, "loss": 0.304, "step": 9925 }, { "epoch": 4.6315298507462686, "grad_norm": 0.3080886874823638, "learning_rate": 5.664843136444054e-06, "loss": 0.3166, "step": 9930 }, { "epoch": 4.633861940298507, "grad_norm": 0.323773619163542, "learning_rate": 5.6564950698168385e-06, "loss": 0.3066, "step": 9935 }, { "epoch": 4.6361940298507465, "grad_norm": 0.30955918780638436, "learning_rate": 5.648198969898311e-06, "loss": 0.3077, "step": 9940 }, { "epoch": 4.638526119402985, "grad_norm": 0.3267849659965636, "learning_rate": 5.639954856425273e-06, "loss": 0.313, "step": 9945 }, { "epoch": 4.6408582089552235, "grad_norm": 0.3067641751166403, "learning_rate": 5.631762749010855e-06, "loss": 0.3077, "step": 9950 }, { "epoch": 4.643190298507463, "grad_norm": 0.3314442299746067, "learning_rate": 5.6236226671444555e-06, "loss": 0.3183, "step": 9955 }, { "epoch": 4.645522388059701, "grad_norm": 0.30285794545252454, "learning_rate": 5.615534630191708e-06, "loss": 0.3035, "step": 9960 }, { "epoch": 4.64785447761194, "grad_norm": 0.3193964837219883, "learning_rate": 5.607498657394424e-06, "loss": 0.3173, "step": 9965 }, { "epoch": 4.650186567164179, "grad_norm": 0.31554284532707494, "learning_rate": 5.59951476787056e-06, "loss": 0.3185, "step": 9970 }, { "epoch": 4.652518656716418, "grad_norm": 0.31384595373206736, "learning_rate": 5.591582980614151e-06, "loss": 0.3068, "step": 9975 }, { "epoch": 4.654850746268656, "grad_norm": 0.32072026204665194, "learning_rate": 5.583703314495294e-06, "loss": 0.3075, "step": 9980 }, { "epoch": 4.657182835820896, "grad_norm": 0.3045696494941392, "learning_rate": 5.5758757882600706e-06, "loss": 0.3142, "step": 9985 }, { "epoch": 4.659514925373134, "grad_norm": 0.31082319478312015, "learning_rate": 5.568100420530533e-06, "loss": 0.32, "step": 9990 }, { "epoch": 4.661847014925373, "grad_norm": 0.3024613441589873, "learning_rate": 5.560377229804644e-06, "loss": 0.3114, "step": 9995 }, { "epoch": 4.664179104477612, "grad_norm": 0.30260492687100504, "learning_rate": 5.55270623445622e-06, "loss": 0.3115, "step": 10000 }, { "epoch": 4.666511194029851, "grad_norm": 0.30232804473891833, "learning_rate": 5.545087452734928e-06, "loss": 0.3158, "step": 10005 }, { "epoch": 4.668843283582089, "grad_norm": 0.3110822076814464, "learning_rate": 5.537520902766193e-06, "loss": 0.3206, "step": 10010 }, { "epoch": 4.671175373134329, "grad_norm": 0.3121586215028074, "learning_rate": 5.5300066025511885e-06, "loss": 0.3115, "step": 10015 }, { "epoch": 4.673507462686567, "grad_norm": 0.3076162047086666, "learning_rate": 5.522544569966786e-06, "loss": 0.3192, "step": 10020 }, { "epoch": 4.675839552238806, "grad_norm": 0.30078077269058356, "learning_rate": 5.515134822765504e-06, "loss": 0.3027, "step": 10025 }, { "epoch": 4.678171641791045, "grad_norm": 0.32944434985470217, "learning_rate": 5.507777378575474e-06, "loss": 0.3231, "step": 10030 }, { "epoch": 4.680503731343284, "grad_norm": 0.3356419706218018, "learning_rate": 5.500472254900392e-06, "loss": 0.3201, "step": 10035 }, { "epoch": 4.682835820895522, "grad_norm": 0.3035287423708558, "learning_rate": 5.4932194691194905e-06, "loss": 0.2992, "step": 10040 }, { "epoch": 4.6851679104477615, "grad_norm": 0.329979423870201, "learning_rate": 5.486019038487483e-06, "loss": 0.3192, "step": 10045 }, { "epoch": 4.6875, "grad_norm": 0.31744214575763574, "learning_rate": 5.4788709801345244e-06, "loss": 0.315, "step": 10050 }, { "epoch": 4.6898320895522385, "grad_norm": 0.3209755570552515, "learning_rate": 5.471775311066177e-06, "loss": 0.3203, "step": 10055 }, { "epoch": 4.692164179104478, "grad_norm": 0.3167904048222757, "learning_rate": 5.464732048163365e-06, "loss": 0.3155, "step": 10060 }, { "epoch": 4.694496268656716, "grad_norm": 0.30366354150139746, "learning_rate": 5.4577412081823355e-06, "loss": 0.3195, "step": 10065 }, { "epoch": 4.696828358208955, "grad_norm": 0.3277864988054558, "learning_rate": 5.450802807754625e-06, "loss": 0.3263, "step": 10070 }, { "epoch": 4.699160447761194, "grad_norm": 0.32899134092332627, "learning_rate": 5.443916863387002e-06, "loss": 0.3157, "step": 10075 }, { "epoch": 4.701492537313433, "grad_norm": 0.32279822575297357, "learning_rate": 5.437083391461452e-06, "loss": 0.318, "step": 10080 }, { "epoch": 4.703824626865671, "grad_norm": 0.31817387526755603, "learning_rate": 5.43030240823512e-06, "loss": 0.3179, "step": 10085 }, { "epoch": 4.706156716417911, "grad_norm": 0.319581445888398, "learning_rate": 5.423573929840277e-06, "loss": 0.3045, "step": 10090 }, { "epoch": 4.708488805970149, "grad_norm": 0.3141819923660517, "learning_rate": 5.416897972284287e-06, "loss": 0.3121, "step": 10095 }, { "epoch": 4.710820895522388, "grad_norm": 0.31179688751651263, "learning_rate": 5.410274551449559e-06, "loss": 0.3146, "step": 10100 }, { "epoch": 4.713152985074627, "grad_norm": 0.3064104614287999, "learning_rate": 5.403703683093517e-06, "loss": 0.3088, "step": 10105 }, { "epoch": 4.715485074626866, "grad_norm": 0.31976295181396064, "learning_rate": 5.397185382848568e-06, "loss": 0.321, "step": 10110 }, { "epoch": 4.717817164179104, "grad_norm": 0.3151962910889956, "learning_rate": 5.39071966622204e-06, "loss": 0.3183, "step": 10115 }, { "epoch": 4.720149253731344, "grad_norm": 0.3102478176701361, "learning_rate": 5.384306548596178e-06, "loss": 0.3187, "step": 10120 }, { "epoch": 4.722481343283582, "grad_norm": 0.29722456289925236, "learning_rate": 5.377946045228084e-06, "loss": 0.3068, "step": 10125 }, { "epoch": 4.724813432835821, "grad_norm": 0.3068470197965488, "learning_rate": 5.37163817124969e-06, "loss": 0.3243, "step": 10130 }, { "epoch": 4.72714552238806, "grad_norm": 0.3223281565053617, "learning_rate": 5.36538294166772e-06, "loss": 0.3163, "step": 10135 }, { "epoch": 4.729477611940299, "grad_norm": 0.3132281603892288, "learning_rate": 5.3591803713636545e-06, "loss": 0.315, "step": 10140 }, { "epoch": 4.731809701492537, "grad_norm": 0.3076055174173578, "learning_rate": 5.353030475093694e-06, "loss": 0.3181, "step": 10145 }, { "epoch": 4.7341417910447765, "grad_norm": 0.32547349236974366, "learning_rate": 5.346933267488726e-06, "loss": 0.32, "step": 10150 }, { "epoch": 4.736473880597015, "grad_norm": 0.3220746084859488, "learning_rate": 5.340888763054291e-06, "loss": 0.3141, "step": 10155 }, { "epoch": 4.7388059701492535, "grad_norm": 0.2999493359984161, "learning_rate": 5.3348969761705446e-06, "loss": 0.3117, "step": 10160 }, { "epoch": 4.741138059701493, "grad_norm": 0.3138694317764045, "learning_rate": 5.328957921092224e-06, "loss": 0.3098, "step": 10165 }, { "epoch": 4.7434701492537314, "grad_norm": 0.31135830909382434, "learning_rate": 5.323071611948619e-06, "loss": 0.3159, "step": 10170 }, { "epoch": 4.74580223880597, "grad_norm": 0.3276581343897369, "learning_rate": 5.317238062743527e-06, "loss": 0.3203, "step": 10175 }, { "epoch": 4.7481343283582085, "grad_norm": 0.31942184368521176, "learning_rate": 5.311457287355232e-06, "loss": 0.3059, "step": 10180 }, { "epoch": 4.750466417910448, "grad_norm": 0.32233074310798976, "learning_rate": 5.3057292995364695e-06, "loss": 0.3139, "step": 10185 }, { "epoch": 4.752798507462686, "grad_norm": 0.3197885231839519, "learning_rate": 5.300054112914385e-06, "loss": 0.3138, "step": 10190 }, { "epoch": 4.755130597014926, "grad_norm": 0.3073239712431677, "learning_rate": 5.294431740990509e-06, "loss": 0.3048, "step": 10195 }, { "epoch": 4.757462686567164, "grad_norm": 0.3273091480745681, "learning_rate": 5.288862197140726e-06, "loss": 0.3164, "step": 10200 }, { "epoch": 4.759794776119403, "grad_norm": 0.3165498317692603, "learning_rate": 5.283345494615238e-06, "loss": 0.3198, "step": 10205 }, { "epoch": 4.762126865671641, "grad_norm": 0.30677842416650786, "learning_rate": 5.277881646538537e-06, "loss": 0.3174, "step": 10210 }, { "epoch": 4.764458955223881, "grad_norm": 0.33097318597186326, "learning_rate": 5.272470665909368e-06, "loss": 0.3109, "step": 10215 }, { "epoch": 4.766791044776119, "grad_norm": 0.3067247860791447, "learning_rate": 5.267112565600707e-06, "loss": 0.3164, "step": 10220 }, { "epoch": 4.769123134328359, "grad_norm": 0.32213167035259604, "learning_rate": 5.261807358359719e-06, "loss": 0.31, "step": 10225 }, { "epoch": 4.771455223880597, "grad_norm": 0.32129302279705746, "learning_rate": 5.25655505680774e-06, "loss": 0.3112, "step": 10230 }, { "epoch": 4.773787313432836, "grad_norm": 0.30731743933544003, "learning_rate": 5.2513556734402384e-06, "loss": 0.32, "step": 10235 }, { "epoch": 4.776119402985074, "grad_norm": 0.31624010138604564, "learning_rate": 5.2462092206267864e-06, "loss": 0.3177, "step": 10240 }, { "epoch": 4.778451492537314, "grad_norm": 0.2999926311088851, "learning_rate": 5.241115710611033e-06, "loss": 0.3081, "step": 10245 }, { "epoch": 4.780783582089552, "grad_norm": 0.31868994551337204, "learning_rate": 5.236075155510675e-06, "loss": 0.3271, "step": 10250 }, { "epoch": 4.7831156716417915, "grad_norm": 0.3188880985978943, "learning_rate": 5.231087567317425e-06, "loss": 0.3035, "step": 10255 }, { "epoch": 4.78544776119403, "grad_norm": 0.2975355144549426, "learning_rate": 5.2261529578969905e-06, "loss": 0.3073, "step": 10260 }, { "epoch": 4.7877798507462686, "grad_norm": 0.3064823985694433, "learning_rate": 5.22127133898903e-06, "loss": 0.3093, "step": 10265 }, { "epoch": 4.790111940298507, "grad_norm": 0.31263613947296776, "learning_rate": 5.216442722207141e-06, "loss": 0.3167, "step": 10270 }, { "epoch": 4.7924440298507465, "grad_norm": 0.3094329041195748, "learning_rate": 5.211667119038829e-06, "loss": 0.3227, "step": 10275 }, { "epoch": 4.794776119402985, "grad_norm": 0.30770653543899973, "learning_rate": 5.206944540845476e-06, "loss": 0.3089, "step": 10280 }, { "epoch": 4.7971082089552235, "grad_norm": 0.3071760999523295, "learning_rate": 5.202274998862312e-06, "loss": 0.3229, "step": 10285 }, { "epoch": 4.799440298507463, "grad_norm": 0.31880184565220154, "learning_rate": 5.197658504198392e-06, "loss": 0.3188, "step": 10290 }, { "epoch": 4.801772388059701, "grad_norm": 0.3244002372851874, "learning_rate": 5.1930950678365715e-06, "loss": 0.3226, "step": 10295 }, { "epoch": 4.80410447761194, "grad_norm": 0.31189750419867435, "learning_rate": 5.188584700633478e-06, "loss": 0.3116, "step": 10300 }, { "epoch": 4.806436567164179, "grad_norm": 0.3147556532824331, "learning_rate": 5.184127413319482e-06, "loss": 0.3087, "step": 10305 }, { "epoch": 4.808768656716418, "grad_norm": 0.3062542284527385, "learning_rate": 5.179723216498677e-06, "loss": 0.3015, "step": 10310 }, { "epoch": 4.811100746268656, "grad_norm": 0.30414655771864124, "learning_rate": 5.17537212064885e-06, "loss": 0.3254, "step": 10315 }, { "epoch": 4.813432835820896, "grad_norm": 0.31569863631312906, "learning_rate": 5.171074136121461e-06, "loss": 0.3049, "step": 10320 }, { "epoch": 4.815764925373134, "grad_norm": 0.33897421898143415, "learning_rate": 5.166829273141612e-06, "loss": 0.3179, "step": 10325 }, { "epoch": 4.818097014925373, "grad_norm": 0.32453223076194404, "learning_rate": 5.162637541808031e-06, "loss": 0.3112, "step": 10330 }, { "epoch": 4.820429104477612, "grad_norm": 0.31386945189075266, "learning_rate": 5.158498952093038e-06, "loss": 0.3156, "step": 10335 }, { "epoch": 4.822761194029851, "grad_norm": 0.30493886063483444, "learning_rate": 5.154413513842533e-06, "loss": 0.3063, "step": 10340 }, { "epoch": 4.825093283582089, "grad_norm": 0.3077443261292822, "learning_rate": 5.1503812367759575e-06, "loss": 0.3131, "step": 10345 }, { "epoch": 4.827425373134329, "grad_norm": 0.31497260805611127, "learning_rate": 5.146402130486288e-06, "loss": 0.3204, "step": 10350 }, { "epoch": 4.829757462686567, "grad_norm": 0.3037036278157516, "learning_rate": 5.142476204440002e-06, "loss": 0.3105, "step": 10355 }, { "epoch": 4.832089552238806, "grad_norm": 0.30696638199603155, "learning_rate": 5.138603467977062e-06, "loss": 0.3199, "step": 10360 }, { "epoch": 4.834421641791045, "grad_norm": 0.31979254334390783, "learning_rate": 5.134783930310883e-06, "loss": 0.3252, "step": 10365 }, { "epoch": 4.836753731343284, "grad_norm": 0.31014111158900004, "learning_rate": 5.131017600528324e-06, "loss": 0.3104, "step": 10370 }, { "epoch": 4.839085820895522, "grad_norm": 0.32016767672617874, "learning_rate": 5.127304487589658e-06, "loss": 0.3185, "step": 10375 }, { "epoch": 4.8414179104477615, "grad_norm": 0.3163161583521059, "learning_rate": 5.123644600328549e-06, "loss": 0.3172, "step": 10380 }, { "epoch": 4.84375, "grad_norm": 0.3183278995478433, "learning_rate": 5.120037947452043e-06, "loss": 0.3165, "step": 10385 }, { "epoch": 4.8460820895522385, "grad_norm": 0.3068556974279603, "learning_rate": 5.116484537540532e-06, "loss": 0.3058, "step": 10390 }, { "epoch": 4.848414179104478, "grad_norm": 0.3188458939171224, "learning_rate": 5.11298437904774e-06, "loss": 0.317, "step": 10395 }, { "epoch": 4.850746268656716, "grad_norm": 0.3043582604280459, "learning_rate": 5.1095374803007115e-06, "loss": 0.3146, "step": 10400 }, { "epoch": 4.853078358208955, "grad_norm": 0.31174271327484376, "learning_rate": 5.1061438494997726e-06, "loss": 0.3154, "step": 10405 }, { "epoch": 4.855410447761194, "grad_norm": 0.3264175427536592, "learning_rate": 5.102803494718532e-06, "loss": 0.3193, "step": 10410 }, { "epoch": 4.857742537313433, "grad_norm": 0.31605823451059634, "learning_rate": 5.099516423903844e-06, "loss": 0.319, "step": 10415 }, { "epoch": 4.860074626865671, "grad_norm": 0.3019979674490088, "learning_rate": 5.096282644875807e-06, "loss": 0.2992, "step": 10420 }, { "epoch": 4.862406716417911, "grad_norm": 0.3255443883520004, "learning_rate": 5.093102165327729e-06, "loss": 0.3125, "step": 10425 }, { "epoch": 4.864738805970149, "grad_norm": 0.31377114042475823, "learning_rate": 5.089974992826117e-06, "loss": 0.306, "step": 10430 }, { "epoch": 4.867070895522388, "grad_norm": 0.30128572832080486, "learning_rate": 5.086901134810658e-06, "loss": 0.3124, "step": 10435 }, { "epoch": 4.869402985074627, "grad_norm": 0.33196099108489213, "learning_rate": 5.083880598594204e-06, "loss": 0.3215, "step": 10440 }, { "epoch": 4.871735074626866, "grad_norm": 0.3092183955297128, "learning_rate": 5.080913391362749e-06, "loss": 0.3123, "step": 10445 }, { "epoch": 4.874067164179104, "grad_norm": 0.3085023568763702, "learning_rate": 5.0779995201754225e-06, "loss": 0.3209, "step": 10450 }, { "epoch": 4.876399253731344, "grad_norm": 0.31567946170630096, "learning_rate": 5.07513899196445e-06, "loss": 0.3167, "step": 10455 }, { "epoch": 4.878731343283582, "grad_norm": 0.31533380424871116, "learning_rate": 5.072331813535166e-06, "loss": 0.3053, "step": 10460 }, { "epoch": 4.881063432835821, "grad_norm": 0.30312794255971315, "learning_rate": 5.069577991565977e-06, "loss": 0.3035, "step": 10465 }, { "epoch": 4.88339552238806, "grad_norm": 0.31852435679254615, "learning_rate": 5.066877532608349e-06, "loss": 0.3203, "step": 10470 }, { "epoch": 4.885727611940299, "grad_norm": 0.320858159049413, "learning_rate": 5.064230443086805e-06, "loss": 0.3146, "step": 10475 }, { "epoch": 4.888059701492537, "grad_norm": 0.3263919608105869, "learning_rate": 5.06163672929889e-06, "loss": 0.3126, "step": 10480 }, { "epoch": 4.8903917910447765, "grad_norm": 0.31853861087305485, "learning_rate": 5.059096397415167e-06, "loss": 0.3181, "step": 10485 }, { "epoch": 4.892723880597015, "grad_norm": 0.3125053420833613, "learning_rate": 5.056609453479208e-06, "loss": 0.3096, "step": 10490 }, { "epoch": 4.8950559701492535, "grad_norm": 0.30924650707850115, "learning_rate": 5.0541759034075645e-06, "loss": 0.3131, "step": 10495 }, { "epoch": 4.897388059701493, "grad_norm": 0.3052144591941686, "learning_rate": 5.051795752989764e-06, "loss": 0.3106, "step": 10500 }, { "epoch": 4.8997201492537314, "grad_norm": 0.3106176481687058, "learning_rate": 5.049469007888298e-06, "loss": 0.306, "step": 10505 }, { "epoch": 4.90205223880597, "grad_norm": 0.3389149063968058, "learning_rate": 5.047195673638596e-06, "loss": 0.3275, "step": 10510 }, { "epoch": 4.9043843283582085, "grad_norm": 0.3144226280409235, "learning_rate": 5.044975755649028e-06, "loss": 0.3051, "step": 10515 }, { "epoch": 4.906716417910448, "grad_norm": 0.3200007857695069, "learning_rate": 5.042809259200885e-06, "loss": 0.3152, "step": 10520 }, { "epoch": 4.909048507462686, "grad_norm": 0.3193925817411586, "learning_rate": 5.040696189448356e-06, "loss": 0.3163, "step": 10525 }, { "epoch": 4.911380597014926, "grad_norm": 0.3100880143328751, "learning_rate": 5.038636551418533e-06, "loss": 0.3033, "step": 10530 }, { "epoch": 4.913712686567164, "grad_norm": 0.322011618953854, "learning_rate": 5.036630350011395e-06, "loss": 0.3239, "step": 10535 }, { "epoch": 4.916044776119403, "grad_norm": 0.31259093543701244, "learning_rate": 5.034677589999783e-06, "loss": 0.3178, "step": 10540 }, { "epoch": 4.918376865671641, "grad_norm": 0.32073760950709745, "learning_rate": 5.032778276029403e-06, "loss": 0.3048, "step": 10545 }, { "epoch": 4.920708955223881, "grad_norm": 0.3211413693826086, "learning_rate": 5.030932412618815e-06, "loss": 0.3044, "step": 10550 }, { "epoch": 4.923041044776119, "grad_norm": 0.3135189327750762, "learning_rate": 5.029140004159409e-06, "loss": 0.3138, "step": 10555 }, { "epoch": 4.925373134328359, "grad_norm": 0.3187071918842242, "learning_rate": 5.02740105491541e-06, "loss": 0.3133, "step": 10560 }, { "epoch": 4.927705223880597, "grad_norm": 0.2998107707115894, "learning_rate": 5.025715569023859e-06, "loss": 0.3219, "step": 10565 }, { "epoch": 4.930037313432836, "grad_norm": 0.3235954234174985, "learning_rate": 5.024083550494606e-06, "loss": 0.3122, "step": 10570 }, { "epoch": 4.932369402985074, "grad_norm": 0.30648737851850577, "learning_rate": 5.0225050032102965e-06, "loss": 0.3082, "step": 10575 }, { "epoch": 4.934701492537314, "grad_norm": 0.3143240565698902, "learning_rate": 5.020979930926365e-06, "loss": 0.3151, "step": 10580 }, { "epoch": 4.937033582089552, "grad_norm": 0.32088149135950106, "learning_rate": 5.0195083372710345e-06, "loss": 0.3132, "step": 10585 }, { "epoch": 4.9393656716417915, "grad_norm": 0.3318394026129433, "learning_rate": 5.018090225745291e-06, "loss": 0.3071, "step": 10590 }, { "epoch": 4.94169776119403, "grad_norm": 0.33093220062650414, "learning_rate": 5.016725599722889e-06, "loss": 0.3314, "step": 10595 }, { "epoch": 4.9440298507462686, "grad_norm": 0.31243339431356787, "learning_rate": 5.0154144624503365e-06, "loss": 0.31, "step": 10600 }, { "epoch": 4.946361940298507, "grad_norm": 0.31698064253447866, "learning_rate": 5.014156817046891e-06, "loss": 0.3208, "step": 10605 }, { "epoch": 4.9486940298507465, "grad_norm": 0.3138631032332983, "learning_rate": 5.012952666504542e-06, "loss": 0.3175, "step": 10610 }, { "epoch": 4.951026119402985, "grad_norm": 0.31798861220751123, "learning_rate": 5.011802013688029e-06, "loss": 0.3229, "step": 10615 }, { "epoch": 4.9533582089552235, "grad_norm": 0.30446977534613207, "learning_rate": 5.010704861334803e-06, "loss": 0.3129, "step": 10620 }, { "epoch": 4.955690298507463, "grad_norm": 0.3283495902759374, "learning_rate": 5.0096612120550436e-06, "loss": 0.3226, "step": 10625 }, { "epoch": 4.958022388059701, "grad_norm": 0.3281503098358507, "learning_rate": 5.008671068331634e-06, "loss": 0.3173, "step": 10630 }, { "epoch": 4.96035447761194, "grad_norm": 0.3170052155152725, "learning_rate": 5.007734432520179e-06, "loss": 0.3179, "step": 10635 }, { "epoch": 4.962686567164179, "grad_norm": 0.31739455544952183, "learning_rate": 5.0068513068489765e-06, "loss": 0.3172, "step": 10640 }, { "epoch": 4.965018656716418, "grad_norm": 0.3139369209779813, "learning_rate": 5.006021693419021e-06, "loss": 0.3091, "step": 10645 }, { "epoch": 4.967350746268656, "grad_norm": 0.3177375618248317, "learning_rate": 5.0052455942040045e-06, "loss": 0.3153, "step": 10650 }, { "epoch": 4.969682835820896, "grad_norm": 0.30180313462665614, "learning_rate": 5.0045230110503e-06, "loss": 0.3113, "step": 10655 }, { "epoch": 4.972014925373134, "grad_norm": 0.32179223088280756, "learning_rate": 5.003853945676969e-06, "loss": 0.3155, "step": 10660 }, { "epoch": 4.974347014925373, "grad_norm": 0.3145240981789299, "learning_rate": 5.003238399675746e-06, "loss": 0.3158, "step": 10665 }, { "epoch": 4.976679104477612, "grad_norm": 0.30821100313936856, "learning_rate": 5.002676374511046e-06, "loss": 0.3063, "step": 10670 }, { "epoch": 4.979011194029851, "grad_norm": 0.31483642532427264, "learning_rate": 5.002167871519951e-06, "loss": 0.3153, "step": 10675 }, { "epoch": 4.981343283582089, "grad_norm": 0.31036425533089596, "learning_rate": 5.001712891912217e-06, "loss": 0.3224, "step": 10680 }, { "epoch": 4.983675373134329, "grad_norm": 0.31468429025060357, "learning_rate": 5.001311436770255e-06, "loss": 0.3171, "step": 10685 }, { "epoch": 4.986007462686567, "grad_norm": 0.3112534800471989, "learning_rate": 5.000963507049151e-06, "loss": 0.3157, "step": 10690 }, { "epoch": 4.988339552238806, "grad_norm": 0.3158579409870581, "learning_rate": 5.000669103576643e-06, "loss": 0.3101, "step": 10695 }, { "epoch": 4.990671641791045, "grad_norm": 0.31259960854316443, "learning_rate": 5.000428227053131e-06, "loss": 0.3158, "step": 10700 }, { "epoch": 4.993003731343284, "grad_norm": 0.3126887504788149, "learning_rate": 5.000240878051671e-06, "loss": 0.3134, "step": 10705 }, { "epoch": 4.995335820895522, "grad_norm": 0.326172987456135, "learning_rate": 5.000107057017976e-06, "loss": 0.3154, "step": 10710 }, { "epoch": 4.9976679104477615, "grad_norm": 0.3109071483421273, "learning_rate": 5.000026764270413e-06, "loss": 0.3111, "step": 10715 }, { "epoch": 5.0, "grad_norm": 0.34171871946179255, "learning_rate": 5e-06, "loss": 0.3113, "step": 10720 }, { "epoch": 5.0023320895522385, "grad_norm": 0.4194112784736783, "learning_rate": 4.5250331177161014e-05, "loss": 0.2876, "step": 10725 }, { "epoch": 5.004664179104478, "grad_norm": 0.3545863890564702, "learning_rate": 4.5244998253598994e-05, "loss": 0.2919, "step": 10730 }, { "epoch": 5.006996268656716, "grad_norm": 0.3867573071168342, "learning_rate": 4.5239662691525744e-05, "loss": 0.2995, "step": 10735 }, { "epoch": 5.009328358208955, "grad_norm": 0.37692298332879576, "learning_rate": 4.5234324491734624e-05, "loss": 0.3048, "step": 10740 }, { "epoch": 5.011660447761194, "grad_norm": 0.3900706241619746, "learning_rate": 4.522898365501938e-05, "loss": 0.3032, "step": 10745 }, { "epoch": 5.013992537313433, "grad_norm": 0.35826258877287953, "learning_rate": 4.5223640182174115e-05, "loss": 0.3009, "step": 10750 }, { "epoch": 5.016324626865671, "grad_norm": 0.3823801566260137, "learning_rate": 4.5218294073993374e-05, "loss": 0.3107, "step": 10755 }, { "epoch": 5.018656716417911, "grad_norm": 0.38331596651898153, "learning_rate": 4.521294533127206e-05, "loss": 0.3139, "step": 10760 }, { "epoch": 5.020988805970149, "grad_norm": 0.3886977545728376, "learning_rate": 4.5207593954805494e-05, "loss": 0.3229, "step": 10765 }, { "epoch": 5.023320895522388, "grad_norm": 0.3908599967959028, "learning_rate": 4.520223994538937e-05, "loss": 0.3189, "step": 10770 }, { "epoch": 5.025652985074627, "grad_norm": 0.3846431104282232, "learning_rate": 4.519688330381976e-05, "loss": 0.3204, "step": 10775 }, { "epoch": 5.027985074626866, "grad_norm": 0.38736416941840285, "learning_rate": 4.519152403089317e-05, "loss": 0.3159, "step": 10780 }, { "epoch": 5.030317164179104, "grad_norm": 0.37348197277385037, "learning_rate": 4.518616212740647e-05, "loss": 0.3213, "step": 10785 }, { "epoch": 5.032649253731344, "grad_norm": 0.37447914010545635, "learning_rate": 4.51807975941569e-05, "loss": 0.3222, "step": 10790 }, { "epoch": 5.034981343283582, "grad_norm": 0.3667616983838328, "learning_rate": 4.517543043194214e-05, "loss": 0.3183, "step": 10795 }, { "epoch": 5.037313432835821, "grad_norm": 0.3785869892185965, "learning_rate": 4.517006064156023e-05, "loss": 0.3195, "step": 10800 }, { "epoch": 5.03964552238806, "grad_norm": 0.4020500561892113, "learning_rate": 4.516468822380959e-05, "loss": 0.3197, "step": 10805 }, { "epoch": 5.041977611940299, "grad_norm": 0.37706204913789226, "learning_rate": 4.515931317948907e-05, "loss": 0.3202, "step": 10810 }, { "epoch": 5.044309701492537, "grad_norm": 0.39736891314526, "learning_rate": 4.515393550939787e-05, "loss": 0.3292, "step": 10815 }, { "epoch": 5.0466417910447765, "grad_norm": 0.3830084745361982, "learning_rate": 4.5148555214335616e-05, "loss": 0.3362, "step": 10820 }, { "epoch": 5.048973880597015, "grad_norm": 0.373649415703944, "learning_rate": 4.514317229510228e-05, "loss": 0.3314, "step": 10825 }, { "epoch": 5.0513059701492535, "grad_norm": 0.37875122199102834, "learning_rate": 4.5137786752498285e-05, "loss": 0.334, "step": 10830 }, { "epoch": 5.053638059701493, "grad_norm": 0.3696141649314653, "learning_rate": 4.513239858732438e-05, "loss": 0.326, "step": 10835 }, { "epoch": 5.0559701492537314, "grad_norm": 0.3620823641301571, "learning_rate": 4.512700780038174e-05, "loss": 0.3472, "step": 10840 }, { "epoch": 5.05830223880597, "grad_norm": 0.36616102464488354, "learning_rate": 4.5121614392471934e-05, "loss": 0.3382, "step": 10845 }, { "epoch": 5.060634328358209, "grad_norm": 0.3655001412880012, "learning_rate": 4.5116218364396904e-05, "loss": 0.3303, "step": 10850 }, { "epoch": 5.062966417910448, "grad_norm": 0.38694820351739173, "learning_rate": 4.511081971695899e-05, "loss": 0.3326, "step": 10855 }, { "epoch": 5.065298507462686, "grad_norm": 0.36824078950466527, "learning_rate": 4.510541845096091e-05, "loss": 0.3156, "step": 10860 }, { "epoch": 5.067630597014926, "grad_norm": 0.34407786603771756, "learning_rate": 4.510001456720579e-05, "loss": 0.3287, "step": 10865 }, { "epoch": 5.069962686567164, "grad_norm": 0.3918749400870992, "learning_rate": 4.509460806649714e-05, "loss": 0.3436, "step": 10870 }, { "epoch": 5.072294776119403, "grad_norm": 0.3452678251019426, "learning_rate": 4.508919894963884e-05, "loss": 0.3279, "step": 10875 }, { "epoch": 5.074626865671641, "grad_norm": 0.3992701768358375, "learning_rate": 4.5083787217435175e-05, "loss": 0.3394, "step": 10880 }, { "epoch": 5.076958955223881, "grad_norm": 0.42819186836124956, "learning_rate": 4.507837287069083e-05, "loss": 0.3319, "step": 10885 }, { "epoch": 5.079291044776119, "grad_norm": 0.3716757481612679, "learning_rate": 4.507295591021087e-05, "loss": 0.3361, "step": 10890 }, { "epoch": 5.081623134328359, "grad_norm": 0.37986019049962216, "learning_rate": 4.5067536336800724e-05, "loss": 0.3396, "step": 10895 }, { "epoch": 5.083955223880597, "grad_norm": 0.37420439359965124, "learning_rate": 4.506211415126624e-05, "loss": 0.3345, "step": 10900 }, { "epoch": 5.086287313432836, "grad_norm": 0.38378158672044504, "learning_rate": 4.5056689354413664e-05, "loss": 0.3409, "step": 10905 }, { "epoch": 5.088619402985074, "grad_norm": 0.37987642438664554, "learning_rate": 4.505126194704958e-05, "loss": 0.3328, "step": 10910 }, { "epoch": 5.090951492537314, "grad_norm": 0.3604064235737157, "learning_rate": 4.504583192998101e-05, "loss": 0.3316, "step": 10915 }, { "epoch": 5.093283582089552, "grad_norm": 0.3571873815856108, "learning_rate": 4.504039930401535e-05, "loss": 0.3376, "step": 10920 }, { "epoch": 5.095615671641791, "grad_norm": 0.38082126568184427, "learning_rate": 4.503496406996037e-05, "loss": 0.3514, "step": 10925 }, { "epoch": 5.09794776119403, "grad_norm": 0.38152297631820337, "learning_rate": 4.5029526228624226e-05, "loss": 0.3416, "step": 10930 }, { "epoch": 5.1002798507462686, "grad_norm": 0.3748925654535182, "learning_rate": 4.50240857808155e-05, "loss": 0.3377, "step": 10935 }, { "epoch": 5.102611940298507, "grad_norm": 0.3729101048694972, "learning_rate": 4.501864272734311e-05, "loss": 0.3426, "step": 10940 }, { "epoch": 5.1049440298507465, "grad_norm": 0.3757280852831875, "learning_rate": 4.50131970690164e-05, "loss": 0.3348, "step": 10945 }, { "epoch": 5.107276119402985, "grad_norm": 0.35949075716670026, "learning_rate": 4.500774880664508e-05, "loss": 0.3362, "step": 10950 }, { "epoch": 5.1096082089552235, "grad_norm": 0.35267737261160914, "learning_rate": 4.500229794103925e-05, "loss": 0.3276, "step": 10955 }, { "epoch": 5.111940298507463, "grad_norm": 0.3506620809605052, "learning_rate": 4.4996844473009425e-05, "loss": 0.3344, "step": 10960 }, { "epoch": 5.114272388059701, "grad_norm": 0.36299910558739407, "learning_rate": 4.499138840336646e-05, "loss": 0.3299, "step": 10965 }, { "epoch": 5.11660447761194, "grad_norm": 0.35925210576690825, "learning_rate": 4.498592973292162e-05, "loss": 0.3322, "step": 10970 }, { "epoch": 5.118936567164179, "grad_norm": 0.36741923073465793, "learning_rate": 4.498046846248658e-05, "loss": 0.3424, "step": 10975 }, { "epoch": 5.121268656716418, "grad_norm": 0.3759294250923111, "learning_rate": 4.497500459287335e-05, "loss": 0.3322, "step": 10980 }, { "epoch": 5.123600746268656, "grad_norm": 0.35887049534044446, "learning_rate": 4.496953812489438e-05, "loss": 0.3414, "step": 10985 }, { "epoch": 5.125932835820896, "grad_norm": 0.35632685699780986, "learning_rate": 4.496406905936246e-05, "loss": 0.3298, "step": 10990 }, { "epoch": 5.128264925373134, "grad_norm": 0.3850181400319679, "learning_rate": 4.49585973970908e-05, "loss": 0.3594, "step": 10995 }, { "epoch": 5.130597014925373, "grad_norm": 0.36780280767666884, "learning_rate": 4.4953123138892984e-05, "loss": 0.3427, "step": 11000 }, { "epoch": 5.132929104477612, "grad_norm": 0.359390489620687, "learning_rate": 4.4947646285582974e-05, "loss": 0.3375, "step": 11005 }, { "epoch": 5.135261194029851, "grad_norm": 0.37234735298551674, "learning_rate": 4.4942166837975134e-05, "loss": 0.3378, "step": 11010 }, { "epoch": 5.137593283582089, "grad_norm": 0.37807973052473964, "learning_rate": 4.49366847968842e-05, "loss": 0.3394, "step": 11015 }, { "epoch": 5.139925373134329, "grad_norm": 0.35619537631681825, "learning_rate": 4.4931200163125306e-05, "loss": 0.3338, "step": 11020 }, { "epoch": 5.142257462686567, "grad_norm": 0.3791313037748476, "learning_rate": 4.492571293751395e-05, "loss": 0.3402, "step": 11025 }, { "epoch": 5.144589552238806, "grad_norm": 0.34001259328057537, "learning_rate": 4.492022312086605e-05, "loss": 0.3357, "step": 11030 }, { "epoch": 5.146921641791045, "grad_norm": 0.35931046646915105, "learning_rate": 4.491473071399787e-05, "loss": 0.3391, "step": 11035 }, { "epoch": 5.149253731343284, "grad_norm": 0.364393452475412, "learning_rate": 4.4909235717726086e-05, "loss": 0.3516, "step": 11040 }, { "epoch": 5.151585820895522, "grad_norm": 0.3530087729288539, "learning_rate": 4.490373813286776e-05, "loss": 0.3393, "step": 11045 }, { "epoch": 5.1539179104477615, "grad_norm": 0.37788637763777166, "learning_rate": 4.4898237960240315e-05, "loss": 0.3408, "step": 11050 }, { "epoch": 5.15625, "grad_norm": 0.3559814737396872, "learning_rate": 4.4892735200661584e-05, "loss": 0.3418, "step": 11055 }, { "epoch": 5.1585820895522385, "grad_norm": 0.35911877295534445, "learning_rate": 4.488722985494978e-05, "loss": 0.3423, "step": 11060 }, { "epoch": 5.160914179104478, "grad_norm": 0.36538095338821996, "learning_rate": 4.488172192392347e-05, "loss": 0.3473, "step": 11065 }, { "epoch": 5.163246268656716, "grad_norm": 0.3536183929427922, "learning_rate": 4.487621140840165e-05, "loss": 0.3415, "step": 11070 }, { "epoch": 5.165578358208955, "grad_norm": 0.3504431068171028, "learning_rate": 4.487069830920369e-05, "loss": 0.3466, "step": 11075 }, { "epoch": 5.167910447761194, "grad_norm": 0.3330366175289514, "learning_rate": 4.486518262714931e-05, "loss": 0.3469, "step": 11080 }, { "epoch": 5.170242537313433, "grad_norm": 0.35900260196789546, "learning_rate": 4.4859664363058665e-05, "loss": 0.3483, "step": 11085 }, { "epoch": 5.172574626865671, "grad_norm": 0.38423136332809926, "learning_rate": 4.485414351775224e-05, "loss": 0.3424, "step": 11090 }, { "epoch": 5.174906716417911, "grad_norm": 0.35540435949465154, "learning_rate": 4.484862009205096e-05, "loss": 0.3465, "step": 11095 }, { "epoch": 5.177238805970149, "grad_norm": 0.36963983386024135, "learning_rate": 4.484309408677609e-05, "loss": 0.3367, "step": 11100 }, { "epoch": 5.179570895522388, "grad_norm": 0.36020051496675287, "learning_rate": 4.48375655027493e-05, "loss": 0.3437, "step": 11105 }, { "epoch": 5.181902985074627, "grad_norm": 0.3513083919822455, "learning_rate": 4.483203434079263e-05, "loss": 0.3335, "step": 11110 }, { "epoch": 5.184235074626866, "grad_norm": 0.3596088903630333, "learning_rate": 4.4826500601728515e-05, "loss": 0.345, "step": 11115 }, { "epoch": 5.186567164179104, "grad_norm": 0.346793856174051, "learning_rate": 4.4820964286379764e-05, "loss": 0.3481, "step": 11120 }, { "epoch": 5.188899253731344, "grad_norm": 0.3366645491477908, "learning_rate": 4.481542539556959e-05, "loss": 0.3468, "step": 11125 }, { "epoch": 5.191231343283582, "grad_norm": 0.3742317616460274, "learning_rate": 4.480988393012155e-05, "loss": 0.3371, "step": 11130 }, { "epoch": 5.193563432835821, "grad_norm": 0.3507651574777929, "learning_rate": 4.4804339890859625e-05, "loss": 0.3362, "step": 11135 }, { "epoch": 5.19589552238806, "grad_norm": 0.3656114859400276, "learning_rate": 4.479879327860816e-05, "loss": 0.3541, "step": 11140 }, { "epoch": 5.198227611940299, "grad_norm": 0.356581017891171, "learning_rate": 4.479324409419186e-05, "loss": 0.3417, "step": 11145 }, { "epoch": 5.200559701492537, "grad_norm": 0.3648190081253754, "learning_rate": 4.478769233843587e-05, "loss": 0.3505, "step": 11150 }, { "epoch": 5.2028917910447765, "grad_norm": 0.3578554685407242, "learning_rate": 4.478213801216566e-05, "loss": 0.3526, "step": 11155 }, { "epoch": 5.205223880597015, "grad_norm": 0.3389870233019987, "learning_rate": 4.477658111620711e-05, "loss": 0.3429, "step": 11160 }, { "epoch": 5.2075559701492535, "grad_norm": 0.36283488435840144, "learning_rate": 4.477102165138648e-05, "loss": 0.3487, "step": 11165 }, { "epoch": 5.209888059701493, "grad_norm": 0.34907245093737216, "learning_rate": 4.4765459618530405e-05, "loss": 0.349, "step": 11170 }, { "epoch": 5.2122201492537314, "grad_norm": 0.35453346831406407, "learning_rate": 4.4759895018465906e-05, "loss": 0.3396, "step": 11175 }, { "epoch": 5.21455223880597, "grad_norm": 0.361516732564419, "learning_rate": 4.47543278520204e-05, "loss": 0.3463, "step": 11180 }, { "epoch": 5.216884328358209, "grad_norm": 0.3647071851087732, "learning_rate": 4.474875812002165e-05, "loss": 0.3403, "step": 11185 }, { "epoch": 5.219216417910448, "grad_norm": 0.3831202441617135, "learning_rate": 4.474318582329783e-05, "loss": 0.3402, "step": 11190 }, { "epoch": 5.221548507462686, "grad_norm": 0.3803647263083268, "learning_rate": 4.473761096267749e-05, "loss": 0.3568, "step": 11195 }, { "epoch": 5.223880597014926, "grad_norm": 0.3659507484071894, "learning_rate": 4.4732033538989556e-05, "loss": 0.3566, "step": 11200 }, { "epoch": 5.226212686567164, "grad_norm": 0.34574842817813717, "learning_rate": 4.4726453553063343e-05, "loss": 0.334, "step": 11205 }, { "epoch": 5.228544776119403, "grad_norm": 0.35800261790966115, "learning_rate": 4.4720871005728526e-05, "loss": 0.3385, "step": 11210 }, { "epoch": 5.230876865671641, "grad_norm": 0.34186114599684436, "learning_rate": 4.4715285897815196e-05, "loss": 0.3507, "step": 11215 }, { "epoch": 5.233208955223881, "grad_norm": 0.3656586355373762, "learning_rate": 4.47096982301538e-05, "loss": 0.3486, "step": 11220 }, { "epoch": 5.235541044776119, "grad_norm": 0.34479910557046806, "learning_rate": 4.470410800357515e-05, "loss": 0.3543, "step": 11225 }, { "epoch": 5.237873134328359, "grad_norm": 0.3608766753026699, "learning_rate": 4.469851521891049e-05, "loss": 0.3486, "step": 11230 }, { "epoch": 5.240205223880597, "grad_norm": 0.37017371305116714, "learning_rate": 4.469291987699139e-05, "loss": 0.3496, "step": 11235 }, { "epoch": 5.242537313432836, "grad_norm": 0.37185741368073216, "learning_rate": 4.468732197864984e-05, "loss": 0.3525, "step": 11240 }, { "epoch": 5.244869402985074, "grad_norm": 0.358233892055795, "learning_rate": 4.468172152471818e-05, "loss": 0.3419, "step": 11245 }, { "epoch": 5.247201492537314, "grad_norm": 0.3426622295223703, "learning_rate": 4.467611851602916e-05, "loss": 0.3556, "step": 11250 }, { "epoch": 5.249533582089552, "grad_norm": 0.34376779857337375, "learning_rate": 4.467051295341587e-05, "loss": 0.3439, "step": 11255 }, { "epoch": 5.251865671641791, "grad_norm": 0.37039564968898736, "learning_rate": 4.4664904837711835e-05, "loss": 0.351, "step": 11260 }, { "epoch": 5.25419776119403, "grad_norm": 0.3486481013529, "learning_rate": 4.4659294169750896e-05, "loss": 0.3551, "step": 11265 }, { "epoch": 5.2565298507462686, "grad_norm": 0.3620672709462342, "learning_rate": 4.465368095036733e-05, "loss": 0.3423, "step": 11270 }, { "epoch": 5.258861940298507, "grad_norm": 0.3506984981316624, "learning_rate": 4.464806518039575e-05, "loss": 0.341, "step": 11275 }, { "epoch": 5.2611940298507465, "grad_norm": 0.35714743778187125, "learning_rate": 4.4642446860671185e-05, "loss": 0.3529, "step": 11280 }, { "epoch": 5.263526119402985, "grad_norm": 0.37884266146258927, "learning_rate": 4.463682599202902e-05, "loss": 0.3634, "step": 11285 }, { "epoch": 5.2658582089552235, "grad_norm": 0.36508662519458374, "learning_rate": 4.463120257530501e-05, "loss": 0.357, "step": 11290 }, { "epoch": 5.268190298507463, "grad_norm": 0.36275586852228453, "learning_rate": 4.462557661133532e-05, "loss": 0.3377, "step": 11295 }, { "epoch": 5.270522388059701, "grad_norm": 0.3429710239747169, "learning_rate": 4.461994810095647e-05, "loss": 0.3393, "step": 11300 }, { "epoch": 5.27285447761194, "grad_norm": 0.37510449726743045, "learning_rate": 4.4614317045005365e-05, "loss": 0.3554, "step": 11305 }, { "epoch": 5.275186567164179, "grad_norm": 0.3590792211388695, "learning_rate": 4.46086834443193e-05, "loss": 0.3553, "step": 11310 }, { "epoch": 5.277518656716418, "grad_norm": 0.3375875292752857, "learning_rate": 4.460304729973592e-05, "loss": 0.3472, "step": 11315 }, { "epoch": 5.279850746268656, "grad_norm": 0.3490831333836901, "learning_rate": 4.4597408612093265e-05, "loss": 0.3457, "step": 11320 }, { "epoch": 5.282182835820896, "grad_norm": 0.36513944287949684, "learning_rate": 4.4591767382229776e-05, "loss": 0.3538, "step": 11325 }, { "epoch": 5.284514925373134, "grad_norm": 0.3545389933418666, "learning_rate": 4.458612361098423e-05, "loss": 0.3589, "step": 11330 }, { "epoch": 5.286847014925373, "grad_norm": 0.38088804237942586, "learning_rate": 4.458047729919581e-05, "loss": 0.356, "step": 11335 }, { "epoch": 5.289179104477612, "grad_norm": 0.3791395921536223, "learning_rate": 4.457482844770408e-05, "loss": 0.3525, "step": 11340 }, { "epoch": 5.291511194029851, "grad_norm": 0.39661371756092695, "learning_rate": 4.456917705734894e-05, "loss": 0.3621, "step": 11345 }, { "epoch": 5.293843283582089, "grad_norm": 0.3682609324862478, "learning_rate": 4.456352312897072e-05, "loss": 0.3481, "step": 11350 }, { "epoch": 5.296175373134329, "grad_norm": 0.3435125557186274, "learning_rate": 4.45578666634101e-05, "loss": 0.3456, "step": 11355 }, { "epoch": 5.298507462686567, "grad_norm": 0.3566911851957441, "learning_rate": 4.455220766150814e-05, "loss": 0.3469, "step": 11360 }, { "epoch": 5.300839552238806, "grad_norm": 0.3539278902789977, "learning_rate": 4.454654612410628e-05, "loss": 0.3573, "step": 11365 }, { "epoch": 5.303171641791045, "grad_norm": 0.3341737500371164, "learning_rate": 4.454088205204634e-05, "loss": 0.3452, "step": 11370 }, { "epoch": 5.305503731343284, "grad_norm": 0.324600169894544, "learning_rate": 4.453521544617051e-05, "loss": 0.342, "step": 11375 }, { "epoch": 5.307835820895522, "grad_norm": 0.36450230420164725, "learning_rate": 4.452954630732136e-05, "loss": 0.3473, "step": 11380 }, { "epoch": 5.3101679104477615, "grad_norm": 0.340555681853518, "learning_rate": 4.452387463634185e-05, "loss": 0.3529, "step": 11385 }, { "epoch": 5.3125, "grad_norm": 0.35960323650815634, "learning_rate": 4.451820043407527e-05, "loss": 0.3482, "step": 11390 }, { "epoch": 5.3148320895522385, "grad_norm": 0.3693394881651317, "learning_rate": 4.451252370136536e-05, "loss": 0.3701, "step": 11395 }, { "epoch": 5.317164179104478, "grad_norm": 0.3530690896453175, "learning_rate": 4.450684443905615e-05, "loss": 0.3542, "step": 11400 }, { "epoch": 5.319496268656716, "grad_norm": 0.3420814990957236, "learning_rate": 4.450116264799214e-05, "loss": 0.357, "step": 11405 }, { "epoch": 5.321828358208955, "grad_norm": 0.3553314848095541, "learning_rate": 4.4495478329018125e-05, "loss": 0.3508, "step": 11410 }, { "epoch": 5.324160447761194, "grad_norm": 0.33073963642894916, "learning_rate": 4.448979148297932e-05, "loss": 0.3528, "step": 11415 }, { "epoch": 5.326492537313433, "grad_norm": 0.34376612913130605, "learning_rate": 4.44841021107213e-05, "loss": 0.3661, "step": 11420 }, { "epoch": 5.328824626865671, "grad_norm": 0.3325831968686565, "learning_rate": 4.4478410213090035e-05, "loss": 0.3489, "step": 11425 }, { "epoch": 5.331156716417911, "grad_norm": 0.33807370700662515, "learning_rate": 4.447271579093185e-05, "loss": 0.3587, "step": 11430 }, { "epoch": 5.333488805970149, "grad_norm": 0.3439270118477338, "learning_rate": 4.446701884509343e-05, "loss": 0.3483, "step": 11435 }, { "epoch": 5.335820895522388, "grad_norm": 0.3658448817787607, "learning_rate": 4.4461319376421875e-05, "loss": 0.3609, "step": 11440 }, { "epoch": 5.338152985074627, "grad_norm": 0.35370718492014946, "learning_rate": 4.445561738576464e-05, "loss": 0.3471, "step": 11445 }, { "epoch": 5.340485074626866, "grad_norm": 0.3571431868815221, "learning_rate": 4.444991287396955e-05, "loss": 0.3574, "step": 11450 }, { "epoch": 5.342817164179104, "grad_norm": 0.3536802571956055, "learning_rate": 4.444420584188482e-05, "loss": 0.3513, "step": 11455 }, { "epoch": 5.345149253731344, "grad_norm": 0.3294935070734058, "learning_rate": 4.443849629035903e-05, "loss": 0.3635, "step": 11460 }, { "epoch": 5.347481343283582, "grad_norm": 0.34264378496224246, "learning_rate": 4.443278422024113e-05, "loss": 0.3585, "step": 11465 }, { "epoch": 5.349813432835821, "grad_norm": 0.34425802192337623, "learning_rate": 4.4427069632380455e-05, "loss": 0.3367, "step": 11470 }, { "epoch": 5.35214552238806, "grad_norm": 0.3444041233417056, "learning_rate": 4.4421352527626706e-05, "loss": 0.3439, "step": 11475 }, { "epoch": 5.354477611940299, "grad_norm": 0.3506277052854596, "learning_rate": 4.441563290682996e-05, "loss": 0.3575, "step": 11480 }, { "epoch": 5.356809701492537, "grad_norm": 0.3459701101438178, "learning_rate": 4.440991077084067e-05, "loss": 0.3499, "step": 11485 }, { "epoch": 5.3591417910447765, "grad_norm": 0.34231815741286226, "learning_rate": 4.4404186120509674e-05, "loss": 0.3453, "step": 11490 }, { "epoch": 5.361473880597015, "grad_norm": 0.34362933668976864, "learning_rate": 4.4398458956688156e-05, "loss": 0.3478, "step": 11495 }, { "epoch": 5.3638059701492535, "grad_norm": 0.3522428823046336, "learning_rate": 4.43927292802277e-05, "loss": 0.3567, "step": 11500 }, { "epoch": 5.366138059701493, "grad_norm": 1.6116880328642222, "learning_rate": 4.4386997091980255e-05, "loss": 0.3582, "step": 11505 }, { "epoch": 5.3684701492537314, "grad_norm": 0.5183535568809745, "learning_rate": 4.438126239279814e-05, "loss": 0.3544, "step": 11510 }, { "epoch": 5.37080223880597, "grad_norm": 0.33637642768029347, "learning_rate": 4.437552518353405e-05, "loss": 0.3517, "step": 11515 }, { "epoch": 5.373134328358209, "grad_norm": 0.3411358144794998, "learning_rate": 4.436978546504105e-05, "loss": 0.3551, "step": 11520 }, { "epoch": 5.375466417910448, "grad_norm": 0.340232667044874, "learning_rate": 4.436404323817258e-05, "loss": 0.3554, "step": 11525 }, { "epoch": 5.377798507462686, "grad_norm": 0.3538410789277539, "learning_rate": 4.435829850378247e-05, "loss": 0.3557, "step": 11530 }, { "epoch": 5.380130597014926, "grad_norm": 0.34376318925797533, "learning_rate": 4.435255126272489e-05, "loss": 0.3545, "step": 11535 }, { "epoch": 5.382462686567164, "grad_norm": 0.4025696435829934, "learning_rate": 4.43468015158544e-05, "loss": 0.3558, "step": 11540 }, { "epoch": 5.384794776119403, "grad_norm": 0.3339562480182389, "learning_rate": 4.434104926402594e-05, "loss": 0.3539, "step": 11545 }, { "epoch": 5.387126865671641, "grad_norm": 0.3460431734624195, "learning_rate": 4.433529450809481e-05, "loss": 0.3386, "step": 11550 }, { "epoch": 5.389458955223881, "grad_norm": 0.3409769804570632, "learning_rate": 4.432953724891669e-05, "loss": 0.3466, "step": 11555 }, { "epoch": 5.391791044776119, "grad_norm": 0.34178850902294594, "learning_rate": 4.432377748734763e-05, "loss": 0.3577, "step": 11560 }, { "epoch": 5.394123134328359, "grad_norm": 0.35342356161013316, "learning_rate": 4.4318015224244044e-05, "loss": 0.3541, "step": 11565 }, { "epoch": 5.396455223880597, "grad_norm": 0.3395943147658307, "learning_rate": 4.431225046046274e-05, "loss": 0.3464, "step": 11570 }, { "epoch": 5.398787313432836, "grad_norm": 0.33696544354638924, "learning_rate": 4.4306483196860866e-05, "loss": 0.3618, "step": 11575 }, { "epoch": 5.401119402985074, "grad_norm": 0.3619131584245415, "learning_rate": 4.430071343429597e-05, "loss": 0.359, "step": 11580 }, { "epoch": 5.403451492537314, "grad_norm": 0.3344476799736475, "learning_rate": 4.429494117362595e-05, "loss": 0.348, "step": 11585 }, { "epoch": 5.405783582089552, "grad_norm": 0.35319502657092017, "learning_rate": 4.4289166415709096e-05, "loss": 0.3589, "step": 11590 }, { "epoch": 5.408115671641791, "grad_norm": 0.37316879198998965, "learning_rate": 4.428338916140406e-05, "loss": 0.3615, "step": 11595 }, { "epoch": 5.41044776119403, "grad_norm": 0.33510455466915956, "learning_rate": 4.427760941156986e-05, "loss": 0.3498, "step": 11600 }, { "epoch": 5.4127798507462686, "grad_norm": 0.32863669694783704, "learning_rate": 4.427182716706589e-05, "loss": 0.354, "step": 11605 }, { "epoch": 5.415111940298507, "grad_norm": 0.34300444620938597, "learning_rate": 4.426604242875191e-05, "loss": 0.3591, "step": 11610 }, { "epoch": 5.4174440298507465, "grad_norm": 0.34651613937006187, "learning_rate": 4.426025519748807e-05, "loss": 0.3463, "step": 11615 }, { "epoch": 5.419776119402985, "grad_norm": 0.3391524387798896, "learning_rate": 4.4254465474134856e-05, "loss": 0.3643, "step": 11620 }, { "epoch": 5.4221082089552235, "grad_norm": 0.3330938294240374, "learning_rate": 4.424867325955315e-05, "loss": 0.3533, "step": 11625 }, { "epoch": 5.424440298507463, "grad_norm": 0.3199813674411649, "learning_rate": 4.42428785546042e-05, "loss": 0.3461, "step": 11630 }, { "epoch": 5.426772388059701, "grad_norm": 0.3363406692340255, "learning_rate": 4.4237081360149646e-05, "loss": 0.3544, "step": 11635 }, { "epoch": 5.42910447761194, "grad_norm": 0.33773381305562916, "learning_rate": 4.423128167705144e-05, "loss": 0.3482, "step": 11640 }, { "epoch": 5.431436567164179, "grad_norm": 0.3501381985053473, "learning_rate": 4.4225479506171956e-05, "loss": 0.3537, "step": 11645 }, { "epoch": 5.433768656716418, "grad_norm": 0.32756293326002905, "learning_rate": 4.4219674848373924e-05, "loss": 0.349, "step": 11650 }, { "epoch": 5.436100746268656, "grad_norm": 0.34757171201283954, "learning_rate": 4.421386770452042e-05, "loss": 0.3674, "step": 11655 }, { "epoch": 5.438432835820896, "grad_norm": 0.3355477984681319, "learning_rate": 4.4208058075474945e-05, "loss": 0.3534, "step": 11660 }, { "epoch": 5.440764925373134, "grad_norm": 0.32670133771306614, "learning_rate": 4.4202245962101314e-05, "loss": 0.3445, "step": 11665 }, { "epoch": 5.443097014925373, "grad_norm": 0.3415938465428368, "learning_rate": 4.419643136526373e-05, "loss": 0.3498, "step": 11670 }, { "epoch": 5.445429104477612, "grad_norm": 0.3490741563372888, "learning_rate": 4.419061428582678e-05, "loss": 0.3583, "step": 11675 }, { "epoch": 5.447761194029851, "grad_norm": 0.3706767475899418, "learning_rate": 4.418479472465539e-05, "loss": 0.3664, "step": 11680 }, { "epoch": 5.450093283582089, "grad_norm": 0.34555738921820567, "learning_rate": 4.41789726826149e-05, "loss": 0.3572, "step": 11685 }, { "epoch": 5.452425373134329, "grad_norm": 0.3343603505619401, "learning_rate": 4.417314816057096e-05, "loss": 0.362, "step": 11690 }, { "epoch": 5.454757462686567, "grad_norm": 0.34412192107506806, "learning_rate": 4.416732115938965e-05, "loss": 0.3524, "step": 11695 }, { "epoch": 5.457089552238806, "grad_norm": 0.32973143506626446, "learning_rate": 4.416149167993737e-05, "loss": 0.3737, "step": 11700 }, { "epoch": 5.459421641791045, "grad_norm": 0.3406137466930918, "learning_rate": 4.415565972308092e-05, "loss": 0.3613, "step": 11705 }, { "epoch": 5.461753731343284, "grad_norm": 0.33274499257406825, "learning_rate": 4.4149825289687454e-05, "loss": 0.3548, "step": 11710 }, { "epoch": 5.464085820895522, "grad_norm": 0.33554901729617886, "learning_rate": 4.414398838062448e-05, "loss": 0.3588, "step": 11715 }, { "epoch": 5.4664179104477615, "grad_norm": 0.3608394704808852, "learning_rate": 4.413814899675991e-05, "loss": 0.3558, "step": 11720 }, { "epoch": 5.46875, "grad_norm": 0.3404420570001265, "learning_rate": 4.413230713896199e-05, "loss": 0.3659, "step": 11725 }, { "epoch": 5.4710820895522385, "grad_norm": 0.35372160512487977, "learning_rate": 4.4126462808099364e-05, "loss": 0.3571, "step": 11730 }, { "epoch": 5.473414179104478, "grad_norm": 0.36063333245123674, "learning_rate": 4.4120616005041014e-05, "loss": 0.3629, "step": 11735 }, { "epoch": 5.475746268656716, "grad_norm": 0.33280686713467106, "learning_rate": 4.411476673065631e-05, "loss": 0.3552, "step": 11740 }, { "epoch": 5.478078358208955, "grad_norm": 0.3443407457267361, "learning_rate": 4.4108914985814985e-05, "loss": 0.3553, "step": 11745 }, { "epoch": 5.480410447761194, "grad_norm": 0.31382134373176246, "learning_rate": 4.410306077138713e-05, "loss": 0.3582, "step": 11750 }, { "epoch": 5.482742537313433, "grad_norm": 0.3323661131927893, "learning_rate": 4.409720408824323e-05, "loss": 0.3534, "step": 11755 }, { "epoch": 5.485074626865671, "grad_norm": 0.33900622962032917, "learning_rate": 4.409134493725409e-05, "loss": 0.3617, "step": 11760 }, { "epoch": 5.487406716417911, "grad_norm": 0.35587419142189525, "learning_rate": 4.408548331929092e-05, "loss": 0.3678, "step": 11765 }, { "epoch": 5.489738805970149, "grad_norm": 0.3299278443273275, "learning_rate": 4.407961923522529e-05, "loss": 0.3619, "step": 11770 }, { "epoch": 5.492070895522388, "grad_norm": 0.34551320278962067, "learning_rate": 4.407375268592914e-05, "loss": 0.3585, "step": 11775 }, { "epoch": 5.494402985074627, "grad_norm": 0.3378346575611808, "learning_rate": 4.406788367227475e-05, "loss": 0.356, "step": 11780 }, { "epoch": 5.496735074626866, "grad_norm": 0.3495271486530321, "learning_rate": 4.4062012195134814e-05, "loss": 0.3692, "step": 11785 }, { "epoch": 5.499067164179104, "grad_norm": 0.35153671499253303, "learning_rate": 4.4056138255382335e-05, "loss": 0.3666, "step": 11790 }, { "epoch": 5.501399253731344, "grad_norm": 0.3362686913444013, "learning_rate": 4.405026185389073e-05, "loss": 0.36, "step": 11795 }, { "epoch": 5.503731343283582, "grad_norm": 0.33337089889258875, "learning_rate": 4.404438299153376e-05, "loss": 0.357, "step": 11800 }, { "epoch": 5.506063432835821, "grad_norm": 0.33073747506227325, "learning_rate": 4.4038501669185544e-05, "loss": 0.3603, "step": 11805 }, { "epoch": 5.50839552238806, "grad_norm": 0.3454293883578366, "learning_rate": 4.4032617887720604e-05, "loss": 0.3582, "step": 11810 }, { "epoch": 5.510727611940299, "grad_norm": 0.326749387947174, "learning_rate": 4.402673164801377e-05, "loss": 0.3492, "step": 11815 }, { "epoch": 5.513059701492537, "grad_norm": 0.3520200542659728, "learning_rate": 4.4020842950940294e-05, "loss": 0.3594, "step": 11820 }, { "epoch": 5.5153917910447765, "grad_norm": 0.34776639441872687, "learning_rate": 4.401495179737576e-05, "loss": 0.3623, "step": 11825 }, { "epoch": 5.517723880597015, "grad_norm": 0.34720393562185886, "learning_rate": 4.400905818819613e-05, "loss": 0.3557, "step": 11830 }, { "epoch": 5.5200559701492535, "grad_norm": 0.3417939763899824, "learning_rate": 4.400316212427772e-05, "loss": 0.3527, "step": 11835 }, { "epoch": 5.522388059701493, "grad_norm": 0.32962626326820343, "learning_rate": 4.3997263606497225e-05, "loss": 0.3599, "step": 11840 }, { "epoch": 5.5247201492537314, "grad_norm": 0.3359065568669348, "learning_rate": 4.3991362635731684e-05, "loss": 0.3493, "step": 11845 }, { "epoch": 5.52705223880597, "grad_norm": 0.34272156070238113, "learning_rate": 4.3985459212858535e-05, "loss": 0.3568, "step": 11850 }, { "epoch": 5.5293843283582085, "grad_norm": 0.3373769418410964, "learning_rate": 4.397955333875555e-05, "loss": 0.3425, "step": 11855 }, { "epoch": 5.531716417910448, "grad_norm": 0.3346663032112375, "learning_rate": 4.397364501430088e-05, "loss": 0.3561, "step": 11860 }, { "epoch": 5.534048507462686, "grad_norm": 0.3600065410143563, "learning_rate": 4.3967734240373025e-05, "loss": 0.3533, "step": 11865 }, { "epoch": 5.536380597014926, "grad_norm": 0.3420304262277709, "learning_rate": 4.396182101785089e-05, "loss": 0.3547, "step": 11870 }, { "epoch": 5.538712686567164, "grad_norm": 0.3272959469895073, "learning_rate": 4.3955905347613666e-05, "loss": 0.3548, "step": 11875 }, { "epoch": 5.541044776119403, "grad_norm": 0.359691613815329, "learning_rate": 4.3949987230541e-05, "loss": 0.3599, "step": 11880 }, { "epoch": 5.543376865671641, "grad_norm": 0.3615817119550993, "learning_rate": 4.394406666751284e-05, "loss": 0.3669, "step": 11885 }, { "epoch": 5.545708955223881, "grad_norm": 0.317585703942768, "learning_rate": 4.3938143659409515e-05, "loss": 0.3604, "step": 11890 }, { "epoch": 5.548041044776119, "grad_norm": 0.3503202457510897, "learning_rate": 4.393221820711173e-05, "loss": 0.3475, "step": 11895 }, { "epoch": 5.550373134328359, "grad_norm": 0.3375563709568005, "learning_rate": 4.392629031150054e-05, "loss": 0.3677, "step": 11900 }, { "epoch": 5.552705223880597, "grad_norm": 0.34175231682937807, "learning_rate": 4.392035997345736e-05, "loss": 0.3738, "step": 11905 }, { "epoch": 5.555037313432836, "grad_norm": 0.34344080830541673, "learning_rate": 4.391442719386398e-05, "loss": 0.3453, "step": 11910 }, { "epoch": 5.557369402985074, "grad_norm": 0.3381690250105537, "learning_rate": 4.390849197360254e-05, "loss": 0.3482, "step": 11915 }, { "epoch": 5.559701492537314, "grad_norm": 0.3465365271843039, "learning_rate": 4.390255431355557e-05, "loss": 0.365, "step": 11920 }, { "epoch": 5.562033582089552, "grad_norm": 0.3350788121139953, "learning_rate": 4.389661421460592e-05, "loss": 0.3594, "step": 11925 }, { "epoch": 5.5643656716417915, "grad_norm": 0.3278841404539431, "learning_rate": 4.389067167763683e-05, "loss": 0.3617, "step": 11930 }, { "epoch": 5.56669776119403, "grad_norm": 0.32228330763266927, "learning_rate": 4.388472670353191e-05, "loss": 0.3644, "step": 11935 }, { "epoch": 5.5690298507462686, "grad_norm": 0.33704295278352786, "learning_rate": 4.387877929317512e-05, "loss": 0.3615, "step": 11940 }, { "epoch": 5.571361940298507, "grad_norm": 0.33642711369493183, "learning_rate": 4.387282944745077e-05, "loss": 0.3556, "step": 11945 }, { "epoch": 5.5736940298507465, "grad_norm": 0.34602327804662236, "learning_rate": 4.3866877167243554e-05, "loss": 0.3701, "step": 11950 }, { "epoch": 5.576026119402985, "grad_norm": 0.3598105532814673, "learning_rate": 4.3860922453438515e-05, "loss": 0.3734, "step": 11955 }, { "epoch": 5.5783582089552235, "grad_norm": 0.3455341484902863, "learning_rate": 4.3854965306921064e-05, "loss": 0.3653, "step": 11960 }, { "epoch": 5.580690298507463, "grad_norm": 0.33764839259263696, "learning_rate": 4.3849005728576975e-05, "loss": 0.3608, "step": 11965 }, { "epoch": 5.583022388059701, "grad_norm": 0.3330071090933281, "learning_rate": 4.384304371929238e-05, "loss": 0.3539, "step": 11970 }, { "epoch": 5.58535447761194, "grad_norm": 0.3343912237864895, "learning_rate": 4.383707927995377e-05, "loss": 0.358, "step": 11975 }, { "epoch": 5.587686567164179, "grad_norm": 0.3452325440955779, "learning_rate": 4.383111241144798e-05, "loss": 0.3486, "step": 11980 }, { "epoch": 5.590018656716418, "grad_norm": 0.36614284946699344, "learning_rate": 4.3825143114662266e-05, "loss": 0.3706, "step": 11985 }, { "epoch": 5.592350746268656, "grad_norm": 0.3272524864142572, "learning_rate": 4.3819171390484184e-05, "loss": 0.3511, "step": 11990 }, { "epoch": 5.594682835820896, "grad_norm": 0.3463939397494372, "learning_rate": 4.381319723980167e-05, "loss": 0.3661, "step": 11995 }, { "epoch": 5.597014925373134, "grad_norm": 0.34742539235443204, "learning_rate": 4.380722066350303e-05, "loss": 0.3685, "step": 12000 }, { "epoch": 5.599347014925373, "grad_norm": 0.3375687041721783, "learning_rate": 4.380124166247691e-05, "loss": 0.3502, "step": 12005 }, { "epoch": 5.601679104477612, "grad_norm": 0.33611580382389405, "learning_rate": 4.3795260237612353e-05, "loss": 0.3686, "step": 12010 }, { "epoch": 5.604011194029851, "grad_norm": 0.3316080515216353, "learning_rate": 4.378927638979871e-05, "loss": 0.3535, "step": 12015 }, { "epoch": 5.606343283582089, "grad_norm": 0.3400368361359211, "learning_rate": 4.378329011992575e-05, "loss": 0.3635, "step": 12020 }, { "epoch": 5.608675373134329, "grad_norm": 0.3247500545758322, "learning_rate": 4.377730142888356e-05, "loss": 0.3533, "step": 12025 }, { "epoch": 5.611007462686567, "grad_norm": 0.3464576774798346, "learning_rate": 4.37713103175626e-05, "loss": 0.3708, "step": 12030 }, { "epoch": 5.613339552238806, "grad_norm": 0.3372448766083085, "learning_rate": 4.376531678685369e-05, "loss": 0.3499, "step": 12035 }, { "epoch": 5.615671641791045, "grad_norm": 0.3278479439268278, "learning_rate": 4.375932083764803e-05, "loss": 0.3575, "step": 12040 }, { "epoch": 5.618003731343284, "grad_norm": 0.34330433135779187, "learning_rate": 4.3753322470837135e-05, "loss": 0.3662, "step": 12045 }, { "epoch": 5.620335820895522, "grad_norm": 0.32991214326254725, "learning_rate": 4.3747321687312916e-05, "loss": 0.3552, "step": 12050 }, { "epoch": 5.6226679104477615, "grad_norm": 0.3528896149701725, "learning_rate": 4.3741318487967634e-05, "loss": 0.3646, "step": 12055 }, { "epoch": 5.625, "grad_norm": 0.34263719981221247, "learning_rate": 4.37353128736939e-05, "loss": 0.3541, "step": 12060 }, { "epoch": 5.6273320895522385, "grad_norm": 0.343184502412897, "learning_rate": 4.3729304845384695e-05, "loss": 0.3615, "step": 12065 }, { "epoch": 5.629664179104478, "grad_norm": 0.32046453531758573, "learning_rate": 4.3723294403933355e-05, "loss": 0.3639, "step": 12070 }, { "epoch": 5.631996268656716, "grad_norm": 0.36098131717325777, "learning_rate": 4.371728155023358e-05, "loss": 0.3719, "step": 12075 }, { "epoch": 5.634328358208955, "grad_norm": 0.32628194082175616, "learning_rate": 4.3711266285179415e-05, "loss": 0.3611, "step": 12080 }, { "epoch": 5.636660447761194, "grad_norm": 0.32951508571120486, "learning_rate": 4.370524860966529e-05, "loss": 0.3789, "step": 12085 }, { "epoch": 5.638992537313433, "grad_norm": 0.3498243605578393, "learning_rate": 4.369922852458594e-05, "loss": 0.3542, "step": 12090 }, { "epoch": 5.641324626865671, "grad_norm": 0.3300798272078043, "learning_rate": 4.369320603083653e-05, "loss": 0.3572, "step": 12095 }, { "epoch": 5.643656716417911, "grad_norm": 0.33154458033421647, "learning_rate": 4.3687181129312534e-05, "loss": 0.3633, "step": 12100 }, { "epoch": 5.645988805970149, "grad_norm": 0.33065869194286607, "learning_rate": 4.368115382090979e-05, "loss": 0.3576, "step": 12105 }, { "epoch": 5.648320895522388, "grad_norm": 0.3598687868604048, "learning_rate": 4.3675124106524514e-05, "loss": 0.3648, "step": 12110 }, { "epoch": 5.650652985074627, "grad_norm": 0.34138456147236185, "learning_rate": 4.366909198705325e-05, "loss": 0.3674, "step": 12115 }, { "epoch": 5.652985074626866, "grad_norm": 0.3473161069914176, "learning_rate": 4.366305746339293e-05, "loss": 0.361, "step": 12120 }, { "epoch": 5.655317164179104, "grad_norm": 0.32428275839190607, "learning_rate": 4.365702053644083e-05, "loss": 0.3478, "step": 12125 }, { "epoch": 5.657649253731344, "grad_norm": 0.3458318407411648, "learning_rate": 4.365098120709458e-05, "loss": 0.3803, "step": 12130 }, { "epoch": 5.659981343283582, "grad_norm": 0.3336111141692966, "learning_rate": 4.364493947625217e-05, "loss": 0.3587, "step": 12135 }, { "epoch": 5.662313432835821, "grad_norm": 0.3206589700128947, "learning_rate": 4.363889534481195e-05, "loss": 0.3579, "step": 12140 }, { "epoch": 5.66464552238806, "grad_norm": 0.3424038641332398, "learning_rate": 4.3632848813672614e-05, "loss": 0.3608, "step": 12145 }, { "epoch": 5.666977611940299, "grad_norm": 0.33278759321494084, "learning_rate": 4.3626799883733236e-05, "loss": 0.363, "step": 12150 }, { "epoch": 5.669309701492537, "grad_norm": 0.3378907544402892, "learning_rate": 4.362074855589322e-05, "loss": 0.3574, "step": 12155 }, { "epoch": 5.6716417910447765, "grad_norm": 0.3390242332447116, "learning_rate": 4.361469483105236e-05, "loss": 0.3599, "step": 12160 }, { "epoch": 5.673973880597015, "grad_norm": 0.3309585199723721, "learning_rate": 4.3608638710110775e-05, "loss": 0.3644, "step": 12165 }, { "epoch": 5.6763059701492535, "grad_norm": 0.3381528172345653, "learning_rate": 4.360258019396895e-05, "loss": 0.3563, "step": 12170 }, { "epoch": 5.678638059701493, "grad_norm": 0.33273926428847694, "learning_rate": 4.3596519283527745e-05, "loss": 0.3474, "step": 12175 }, { "epoch": 5.6809701492537314, "grad_norm": 0.36800593065600956, "learning_rate": 4.3590455979688335e-05, "loss": 0.3605, "step": 12180 }, { "epoch": 5.68330223880597, "grad_norm": 0.3484409322953236, "learning_rate": 4.358439028335229e-05, "loss": 0.3657, "step": 12185 }, { "epoch": 5.6856343283582085, "grad_norm": 0.32666383874596355, "learning_rate": 4.357832219542151e-05, "loss": 0.35, "step": 12190 }, { "epoch": 5.687966417910448, "grad_norm": 0.29941619214788623, "learning_rate": 4.357225171679828e-05, "loss": 0.3534, "step": 12195 }, { "epoch": 5.690298507462686, "grad_norm": 0.3503561199221976, "learning_rate": 4.3566178848385194e-05, "loss": 0.3759, "step": 12200 }, { "epoch": 5.692630597014926, "grad_norm": 0.33930623254128767, "learning_rate": 4.3560103591085264e-05, "loss": 0.3678, "step": 12205 }, { "epoch": 5.694962686567164, "grad_norm": 0.3532572985805629, "learning_rate": 4.35540259458018e-05, "loss": 0.3802, "step": 12210 }, { "epoch": 5.697294776119403, "grad_norm": 0.339504434515697, "learning_rate": 4.3547945913438494e-05, "loss": 0.369, "step": 12215 }, { "epoch": 5.699626865671641, "grad_norm": 0.3293001589329846, "learning_rate": 4.3541863494899385e-05, "loss": 0.3595, "step": 12220 }, { "epoch": 5.701958955223881, "grad_norm": 0.3300839256133122, "learning_rate": 4.353577869108887e-05, "loss": 0.3433, "step": 12225 }, { "epoch": 5.704291044776119, "grad_norm": 0.3500106982699641, "learning_rate": 4.352969150291172e-05, "loss": 0.3664, "step": 12230 }, { "epoch": 5.706623134328359, "grad_norm": 0.33216113634790306, "learning_rate": 4.3523601931273024e-05, "loss": 0.3646, "step": 12235 }, { "epoch": 5.708955223880597, "grad_norm": 0.3492980105690514, "learning_rate": 4.351750997707824e-05, "loss": 0.372, "step": 12240 }, { "epoch": 5.711287313432836, "grad_norm": 0.33453559852200676, "learning_rate": 4.351141564123319e-05, "loss": 0.3629, "step": 12245 }, { "epoch": 5.713619402985074, "grad_norm": 0.32106439132466974, "learning_rate": 4.3505318924644036e-05, "loss": 0.3594, "step": 12250 }, { "epoch": 5.715951492537314, "grad_norm": 0.35471810482133354, "learning_rate": 4.349921982821732e-05, "loss": 0.3696, "step": 12255 }, { "epoch": 5.718283582089552, "grad_norm": 0.34198375303551914, "learning_rate": 4.34931183528599e-05, "loss": 0.3597, "step": 12260 }, { "epoch": 5.7206156716417915, "grad_norm": 0.3236682740567985, "learning_rate": 4.3487014499479016e-05, "loss": 0.3607, "step": 12265 }, { "epoch": 5.72294776119403, "grad_norm": 0.33562747761750783, "learning_rate": 4.348090826898225e-05, "loss": 0.3643, "step": 12270 }, { "epoch": 5.7252798507462686, "grad_norm": 0.3197418835309336, "learning_rate": 4.3474799662277534e-05, "loss": 0.3649, "step": 12275 }, { "epoch": 5.727611940298507, "grad_norm": 0.36693331282178626, "learning_rate": 4.346868868027318e-05, "loss": 0.3622, "step": 12280 }, { "epoch": 5.7299440298507465, "grad_norm": 0.40868920782721524, "learning_rate": 4.3462575323877804e-05, "loss": 0.3798, "step": 12285 }, { "epoch": 5.732276119402985, "grad_norm": 0.3962504829650375, "learning_rate": 4.345645959400043e-05, "loss": 0.3583, "step": 12290 }, { "epoch": 5.7346082089552235, "grad_norm": 0.32121454626128154, "learning_rate": 4.345034149155039e-05, "loss": 0.3716, "step": 12295 }, { "epoch": 5.736940298507463, "grad_norm": 0.3498192274567818, "learning_rate": 4.344422101743739e-05, "loss": 0.3742, "step": 12300 }, { "epoch": 5.739272388059701, "grad_norm": 0.3381673251267072, "learning_rate": 4.343809817257149e-05, "loss": 0.3596, "step": 12305 }, { "epoch": 5.74160447761194, "grad_norm": 0.3377912636969226, "learning_rate": 4.3431972957863106e-05, "loss": 0.3761, "step": 12310 }, { "epoch": 5.743936567164179, "grad_norm": 0.3219426430714035, "learning_rate": 4.342584537422298e-05, "loss": 0.3584, "step": 12315 }, { "epoch": 5.746268656716418, "grad_norm": 0.3315703730208313, "learning_rate": 4.341971542256225e-05, "loss": 0.3647, "step": 12320 }, { "epoch": 5.748600746268656, "grad_norm": 0.35553844714104743, "learning_rate": 4.341358310379235e-05, "loss": 0.3646, "step": 12325 }, { "epoch": 5.750932835820896, "grad_norm": 0.33904693174085626, "learning_rate": 4.340744841882512e-05, "loss": 0.3651, "step": 12330 }, { "epoch": 5.753264925373134, "grad_norm": 0.38548817860251705, "learning_rate": 4.3401311368572723e-05, "loss": 0.3578, "step": 12335 }, { "epoch": 5.755597014925373, "grad_norm": 0.3337230687819924, "learning_rate": 4.339517195394768e-05, "loss": 0.3569, "step": 12340 }, { "epoch": 5.757929104477612, "grad_norm": 0.3462916431310772, "learning_rate": 4.3389030175862854e-05, "loss": 0.3623, "step": 12345 }, { "epoch": 5.760261194029851, "grad_norm": 0.3195207573130104, "learning_rate": 4.3382886035231484e-05, "loss": 0.3629, "step": 12350 }, { "epoch": 5.762593283582089, "grad_norm": 0.35331313117451335, "learning_rate": 4.337673953296714e-05, "loss": 0.3691, "step": 12355 }, { "epoch": 5.764925373134329, "grad_norm": 0.34280844921831904, "learning_rate": 4.3370590669983736e-05, "loss": 0.3749, "step": 12360 }, { "epoch": 5.767257462686567, "grad_norm": 0.33642372222817535, "learning_rate": 4.3364439447195565e-05, "loss": 0.3637, "step": 12365 }, { "epoch": 5.769589552238806, "grad_norm": 0.3426070166332215, "learning_rate": 4.335828586551725e-05, "loss": 0.3665, "step": 12370 }, { "epoch": 5.771921641791045, "grad_norm": 0.32720151646754664, "learning_rate": 4.335212992586376e-05, "loss": 0.3597, "step": 12375 }, { "epoch": 5.774253731343284, "grad_norm": 0.3328695423932092, "learning_rate": 4.334597162915045e-05, "loss": 0.3691, "step": 12380 }, { "epoch": 5.776585820895522, "grad_norm": 0.43907068548343764, "learning_rate": 4.333981097629296e-05, "loss": 0.3626, "step": 12385 }, { "epoch": 5.7789179104477615, "grad_norm": 0.3114377221361768, "learning_rate": 4.333364796820735e-05, "loss": 0.3626, "step": 12390 }, { "epoch": 5.78125, "grad_norm": 0.3456237327057742, "learning_rate": 4.332748260580999e-05, "loss": 0.3573, "step": 12395 }, { "epoch": 5.7835820895522385, "grad_norm": 0.3409606705656817, "learning_rate": 4.332131489001762e-05, "loss": 0.3674, "step": 12400 }, { "epoch": 5.785914179104478, "grad_norm": 0.5572930434987605, "learning_rate": 4.331514482174731e-05, "loss": 0.3708, "step": 12405 }, { "epoch": 5.788246268656716, "grad_norm": 0.32821782848670567, "learning_rate": 4.3308972401916495e-05, "loss": 0.3602, "step": 12410 }, { "epoch": 5.790578358208955, "grad_norm": 0.3256145278393219, "learning_rate": 4.330279763144296e-05, "loss": 0.3674, "step": 12415 }, { "epoch": 5.792910447761194, "grad_norm": 0.3453307216552716, "learning_rate": 4.3296620511244804e-05, "loss": 0.3701, "step": 12420 }, { "epoch": 5.795242537313433, "grad_norm": 0.31536207609964156, "learning_rate": 4.3290441042240544e-05, "loss": 0.3631, "step": 12425 }, { "epoch": 5.797574626865671, "grad_norm": 0.3185872389514879, "learning_rate": 4.3284259225348985e-05, "loss": 0.3683, "step": 12430 }, { "epoch": 5.799906716417911, "grad_norm": 0.31678887401090317, "learning_rate": 4.327807506148931e-05, "loss": 0.3609, "step": 12435 }, { "epoch": 5.802238805970149, "grad_norm": 5.362240689159592, "learning_rate": 4.327188855158106e-05, "loss": 0.3648, "step": 12440 }, { "epoch": 5.804570895522388, "grad_norm": 0.3680100011349524, "learning_rate": 4.3265699696544085e-05, "loss": 0.3662, "step": 12445 }, { "epoch": 5.806902985074627, "grad_norm": 0.3489902261220409, "learning_rate": 4.325950849729862e-05, "loss": 0.3529, "step": 12450 }, { "epoch": 5.809235074626866, "grad_norm": 0.31107007483141536, "learning_rate": 4.325331495476523e-05, "loss": 0.3502, "step": 12455 }, { "epoch": 5.811567164179104, "grad_norm": 0.32684005064106736, "learning_rate": 4.3247119069864856e-05, "loss": 0.3649, "step": 12460 }, { "epoch": 5.813899253731344, "grad_norm": 0.34675183104621815, "learning_rate": 4.3240920843518746e-05, "loss": 0.3665, "step": 12465 }, { "epoch": 5.816231343283582, "grad_norm": 0.33553561008491567, "learning_rate": 4.323472027664852e-05, "loss": 0.3613, "step": 12470 }, { "epoch": 5.818563432835821, "grad_norm": 0.3309632064703169, "learning_rate": 4.322851737017615e-05, "loss": 0.3786, "step": 12475 }, { "epoch": 5.82089552238806, "grad_norm": 0.34763820049723354, "learning_rate": 4.322231212502394e-05, "loss": 0.3631, "step": 12480 }, { "epoch": 5.823227611940299, "grad_norm": 0.334861046576397, "learning_rate": 4.321610454211456e-05, "loss": 0.3705, "step": 12485 }, { "epoch": 5.825559701492537, "grad_norm": 0.3221303022128312, "learning_rate": 4.320989462237101e-05, "loss": 0.3642, "step": 12490 }, { "epoch": 5.8278917910447765, "grad_norm": 0.33558965431959126, "learning_rate": 4.3203682366716645e-05, "loss": 0.3674, "step": 12495 }, { "epoch": 5.830223880597015, "grad_norm": 0.3332687598544487, "learning_rate": 4.3197467776075185e-05, "loss": 0.3682, "step": 12500 }, { "epoch": 5.8325559701492535, "grad_norm": 0.3175264701801688, "learning_rate": 4.3191250851370655e-05, "loss": 0.3549, "step": 12505 }, { "epoch": 5.834888059701493, "grad_norm": 0.3419660126884338, "learning_rate": 4.318503159352748e-05, "loss": 0.3698, "step": 12510 }, { "epoch": 5.8372201492537314, "grad_norm": 0.332201393587079, "learning_rate": 4.317881000347037e-05, "loss": 0.3643, "step": 12515 }, { "epoch": 5.83955223880597, "grad_norm": 0.3564285517731832, "learning_rate": 4.317258608212444e-05, "loss": 0.3715, "step": 12520 }, { "epoch": 5.8418843283582085, "grad_norm": 0.3521222489694544, "learning_rate": 4.316635983041512e-05, "loss": 0.3713, "step": 12525 }, { "epoch": 5.844216417910448, "grad_norm": 0.3496173897022177, "learning_rate": 4.31601312492682e-05, "loss": 0.3848, "step": 12530 }, { "epoch": 5.846548507462686, "grad_norm": 0.32276113429018, "learning_rate": 4.3153900339609804e-05, "loss": 0.3596, "step": 12535 }, { "epoch": 5.848880597014926, "grad_norm": 0.33308430168012393, "learning_rate": 4.3147667102366415e-05, "loss": 0.3766, "step": 12540 }, { "epoch": 5.851212686567164, "grad_norm": 0.3120020597133076, "learning_rate": 4.3141431538464846e-05, "loss": 0.3581, "step": 12545 }, { "epoch": 5.853544776119403, "grad_norm": 0.3246178495518034, "learning_rate": 4.313519364883227e-05, "loss": 0.3668, "step": 12550 }, { "epoch": 5.855876865671641, "grad_norm": 0.3459745146926963, "learning_rate": 4.31289534343962e-05, "loss": 0.3586, "step": 12555 }, { "epoch": 5.858208955223881, "grad_norm": 0.3176850488906768, "learning_rate": 4.3122710896084504e-05, "loss": 0.3717, "step": 12560 }, { "epoch": 5.860541044776119, "grad_norm": 0.3548068536528352, "learning_rate": 4.311646603482538e-05, "loss": 0.3601, "step": 12565 }, { "epoch": 5.862873134328359, "grad_norm": 0.3255350917912054, "learning_rate": 4.3110218851547384e-05, "loss": 0.3747, "step": 12570 }, { "epoch": 5.865205223880597, "grad_norm": 0.3284684416002701, "learning_rate": 4.31039693471794e-05, "loss": 0.3592, "step": 12575 }, { "epoch": 5.867537313432836, "grad_norm": 0.3219081661810986, "learning_rate": 4.309771752265069e-05, "loss": 0.3633, "step": 12580 }, { "epoch": 5.869869402985074, "grad_norm": 0.3293709639632858, "learning_rate": 4.309146337889082e-05, "loss": 0.3685, "step": 12585 }, { "epoch": 5.872201492537314, "grad_norm": 0.3306842461655779, "learning_rate": 4.308520691682974e-05, "loss": 0.3678, "step": 12590 }, { "epoch": 5.874533582089552, "grad_norm": 0.32041432118459123, "learning_rate": 4.30789481373977e-05, "loss": 0.3569, "step": 12595 }, { "epoch": 5.8768656716417915, "grad_norm": 0.3218622160229452, "learning_rate": 4.307268704152535e-05, "loss": 0.3711, "step": 12600 }, { "epoch": 5.87919776119403, "grad_norm": 0.31609034967269006, "learning_rate": 4.306642363014363e-05, "loss": 0.3667, "step": 12605 }, { "epoch": 5.8815298507462686, "grad_norm": 0.3155232916640764, "learning_rate": 4.3060157904183873e-05, "loss": 0.3665, "step": 12610 }, { "epoch": 5.883861940298507, "grad_norm": 0.3203346379307875, "learning_rate": 4.305388986457772e-05, "loss": 0.362, "step": 12615 }, { "epoch": 5.8861940298507465, "grad_norm": 0.34207677184048774, "learning_rate": 4.3047619512257164e-05, "loss": 0.3778, "step": 12620 }, { "epoch": 5.888526119402985, "grad_norm": 0.3349771879217811, "learning_rate": 4.304134684815455e-05, "loss": 0.3584, "step": 12625 }, { "epoch": 5.8908582089552235, "grad_norm": 0.3457728133506728, "learning_rate": 4.3035071873202563e-05, "loss": 0.3819, "step": 12630 }, { "epoch": 5.893190298507463, "grad_norm": 0.33821134288386456, "learning_rate": 4.3028794588334246e-05, "loss": 0.3648, "step": 12635 }, { "epoch": 5.895522388059701, "grad_norm": 0.32841645648993406, "learning_rate": 4.302251499448294e-05, "loss": 0.3729, "step": 12640 }, { "epoch": 5.89785447761194, "grad_norm": 0.3200702365992664, "learning_rate": 4.301623309258239e-05, "loss": 0.3571, "step": 12645 }, { "epoch": 5.900186567164179, "grad_norm": 0.3367089902316932, "learning_rate": 4.3009948883566645e-05, "loss": 0.3783, "step": 12650 }, { "epoch": 5.902518656716418, "grad_norm": 0.3222288031944619, "learning_rate": 4.3003662368370094e-05, "loss": 0.37, "step": 12655 }, { "epoch": 5.904850746268656, "grad_norm": 0.3455621398024207, "learning_rate": 4.29973735479275e-05, "loss": 0.3696, "step": 12660 }, { "epoch": 5.907182835820896, "grad_norm": 0.4287342454610744, "learning_rate": 4.299108242317393e-05, "loss": 0.3613, "step": 12665 }, { "epoch": 5.909514925373134, "grad_norm": 0.3210586337303363, "learning_rate": 4.298478899504485e-05, "loss": 0.3587, "step": 12670 }, { "epoch": 5.911847014925373, "grad_norm": 0.3123896290918195, "learning_rate": 4.297849326447599e-05, "loss": 0.3544, "step": 12675 }, { "epoch": 5.914179104477612, "grad_norm": 0.3349954492971426, "learning_rate": 4.297219523240349e-05, "loss": 0.3576, "step": 12680 }, { "epoch": 5.916511194029851, "grad_norm": 0.30826894338129734, "learning_rate": 4.2965894899763796e-05, "loss": 0.3683, "step": 12685 }, { "epoch": 5.918843283582089, "grad_norm": 0.31465848398968654, "learning_rate": 4.2959592267493715e-05, "loss": 0.3688, "step": 12690 }, { "epoch": 5.921175373134329, "grad_norm": 0.3302978442782693, "learning_rate": 4.29532873365304e-05, "loss": 0.3719, "step": 12695 }, { "epoch": 5.923507462686567, "grad_norm": 0.32371955055004675, "learning_rate": 4.2946980107811295e-05, "loss": 0.37, "step": 12700 }, { "epoch": 5.925839552238806, "grad_norm": 0.3145163580987525, "learning_rate": 4.2940670582274265e-05, "loss": 0.362, "step": 12705 }, { "epoch": 5.928171641791045, "grad_norm": 0.3441259735360635, "learning_rate": 4.2934358760857454e-05, "loss": 0.3673, "step": 12710 }, { "epoch": 5.930503731343284, "grad_norm": 0.31624136709987427, "learning_rate": 4.2928044644499374e-05, "loss": 0.3627, "step": 12715 }, { "epoch": 5.932835820895522, "grad_norm": 0.32411377516695355, "learning_rate": 4.292172823413887e-05, "loss": 0.3658, "step": 12720 }, { "epoch": 5.9351679104477615, "grad_norm": 0.3399390233931739, "learning_rate": 4.2915409530715144e-05, "loss": 0.3627, "step": 12725 }, { "epoch": 5.9375, "grad_norm": 0.3590082726783239, "learning_rate": 4.2909088535167714e-05, "loss": 0.3666, "step": 12730 }, { "epoch": 5.9398320895522385, "grad_norm": 0.33934753911074655, "learning_rate": 4.2902765248436454e-05, "loss": 0.3715, "step": 12735 }, { "epoch": 5.942164179104478, "grad_norm": 0.3210709841635274, "learning_rate": 4.289643967146158e-05, "loss": 0.3492, "step": 12740 }, { "epoch": 5.944496268656716, "grad_norm": 0.302324577860987, "learning_rate": 4.2890111805183646e-05, "loss": 0.3565, "step": 12745 }, { "epoch": 5.946828358208955, "grad_norm": 0.33912467763610743, "learning_rate": 4.288378165054354e-05, "loss": 0.3647, "step": 12750 }, { "epoch": 5.949160447761194, "grad_norm": 0.33371363020280925, "learning_rate": 4.28774492084825e-05, "loss": 0.3633, "step": 12755 }, { "epoch": 5.951492537313433, "grad_norm": 0.3302091539438975, "learning_rate": 4.28711144799421e-05, "loss": 0.3711, "step": 12760 }, { "epoch": 5.953824626865671, "grad_norm": 0.3292174311197712, "learning_rate": 4.286477746586424e-05, "loss": 0.3672, "step": 12765 }, { "epoch": 5.956156716417911, "grad_norm": 0.3403491332146517, "learning_rate": 4.2858438167191185e-05, "loss": 0.3675, "step": 12770 }, { "epoch": 5.958488805970149, "grad_norm": 0.3149315169994156, "learning_rate": 4.285209658486553e-05, "loss": 0.3677, "step": 12775 }, { "epoch": 5.960820895522388, "grad_norm": 0.32388612151333596, "learning_rate": 4.2845752719830206e-05, "loss": 0.3852, "step": 12780 }, { "epoch": 5.963152985074627, "grad_norm": 0.3381472473959259, "learning_rate": 4.283940657302848e-05, "loss": 0.3682, "step": 12785 }, { "epoch": 5.965485074626866, "grad_norm": 0.3189510401727534, "learning_rate": 4.283305814540397e-05, "loss": 0.3608, "step": 12790 }, { "epoch": 5.967817164179104, "grad_norm": 0.31189293137152885, "learning_rate": 4.282670743790062e-05, "loss": 0.3586, "step": 12795 }, { "epoch": 5.970149253731344, "grad_norm": 0.34723366370561004, "learning_rate": 4.282035445146272e-05, "loss": 0.3696, "step": 12800 }, { "epoch": 5.972481343283582, "grad_norm": 0.32504392559323025, "learning_rate": 4.28139991870349e-05, "loss": 0.3582, "step": 12805 }, { "epoch": 5.974813432835821, "grad_norm": 0.3211879121662421, "learning_rate": 4.2807641645562134e-05, "loss": 0.3819, "step": 12810 }, { "epoch": 5.97714552238806, "grad_norm": 0.3213565728820816, "learning_rate": 4.280128182798972e-05, "loss": 0.3661, "step": 12815 }, { "epoch": 5.979477611940299, "grad_norm": 0.33359514646596217, "learning_rate": 4.2794919735263295e-05, "loss": 0.3609, "step": 12820 }, { "epoch": 5.981809701492537, "grad_norm": 0.31440644248348804, "learning_rate": 4.278855536832885e-05, "loss": 0.3677, "step": 12825 }, { "epoch": 5.9841417910447765, "grad_norm": 0.33488604847778636, "learning_rate": 4.278218872813271e-05, "loss": 0.3677, "step": 12830 }, { "epoch": 5.986473880597015, "grad_norm": 0.4263870262189326, "learning_rate": 4.277581981562152e-05, "loss": 0.3635, "step": 12835 }, { "epoch": 5.9888059701492535, "grad_norm": 0.3296876798602788, "learning_rate": 4.276944863174229e-05, "loss": 0.3675, "step": 12840 }, { "epoch": 5.991138059701493, "grad_norm": 0.30967578191075623, "learning_rate": 4.2763075177442346e-05, "loss": 0.3549, "step": 12845 }, { "epoch": 5.9934701492537314, "grad_norm": 0.31498012489227384, "learning_rate": 4.275669945366936e-05, "loss": 0.3561, "step": 12850 }, { "epoch": 5.99580223880597, "grad_norm": 0.31228203019206496, "learning_rate": 4.275032146137135e-05, "loss": 0.3713, "step": 12855 }, { "epoch": 5.9981343283582085, "grad_norm": 0.3244738875823224, "learning_rate": 4.2743941201496644e-05, "loss": 0.3717, "step": 12860 }, { "epoch": 6.000466417910448, "grad_norm": 0.42267503343282464, "learning_rate": 4.2737558674993936e-05, "loss": 0.3621, "step": 12865 }, { "epoch": 6.002798507462686, "grad_norm": 0.477298213055688, "learning_rate": 4.2731173882812264e-05, "loss": 0.2937, "step": 12870 }, { "epoch": 6.005130597014926, "grad_norm": 0.36590833343376344, "learning_rate": 4.2724786825900955e-05, "loss": 0.2973, "step": 12875 }, { "epoch": 6.007462686567164, "grad_norm": 0.516847537488958, "learning_rate": 4.271839750520972e-05, "loss": 0.2912, "step": 12880 }, { "epoch": 6.009794776119403, "grad_norm": 0.3849563920357841, "learning_rate": 4.271200592168858e-05, "loss": 0.2955, "step": 12885 }, { "epoch": 6.012126865671642, "grad_norm": 0.3672981552368816, "learning_rate": 4.2705612076287907e-05, "loss": 0.291, "step": 12890 }, { "epoch": 6.014458955223881, "grad_norm": 0.3821246322820867, "learning_rate": 4.269921596995842e-05, "loss": 0.3009, "step": 12895 }, { "epoch": 6.016791044776119, "grad_norm": 0.36581283497230227, "learning_rate": 4.2692817603651134e-05, "loss": 0.2974, "step": 12900 }, { "epoch": 6.019123134328358, "grad_norm": 0.3831881842029943, "learning_rate": 4.268641697831744e-05, "loss": 0.3047, "step": 12905 }, { "epoch": 6.021455223880597, "grad_norm": 0.4350014700976339, "learning_rate": 4.2680014094909035e-05, "loss": 0.2955, "step": 12910 }, { "epoch": 6.023787313432836, "grad_norm": 0.4162156081266002, "learning_rate": 4.267360895437799e-05, "loss": 0.2945, "step": 12915 }, { "epoch": 6.026119402985074, "grad_norm": 0.3721573213277028, "learning_rate": 4.2667201557676673e-05, "loss": 0.2935, "step": 12920 }, { "epoch": 6.028451492537314, "grad_norm": 0.35284081070480694, "learning_rate": 4.2660791905757794e-05, "loss": 0.2898, "step": 12925 }, { "epoch": 6.030783582089552, "grad_norm": 0.6264671837026738, "learning_rate": 4.2654379999574425e-05, "loss": 0.3069, "step": 12930 }, { "epoch": 6.033115671641791, "grad_norm": 0.3626925678934575, "learning_rate": 4.2647965840079945e-05, "loss": 0.288, "step": 12935 }, { "epoch": 6.03544776119403, "grad_norm": 0.3627266849220394, "learning_rate": 4.2641549428228087e-05, "loss": 0.299, "step": 12940 }, { "epoch": 6.0377798507462686, "grad_norm": 0.3765393335285203, "learning_rate": 4.263513076497289e-05, "loss": 0.3043, "step": 12945 }, { "epoch": 6.040111940298507, "grad_norm": 0.3951208849694295, "learning_rate": 4.2628709851268775e-05, "loss": 0.3059, "step": 12950 }, { "epoch": 6.0424440298507465, "grad_norm": 0.37940439940044346, "learning_rate": 4.262228668807044e-05, "loss": 0.3016, "step": 12955 }, { "epoch": 6.044776119402985, "grad_norm": 0.38684993515210797, "learning_rate": 4.261586127633297e-05, "loss": 0.304, "step": 12960 }, { "epoch": 6.0471082089552235, "grad_norm": 0.39214814873434456, "learning_rate": 4.260943361701176e-05, "loss": 0.3001, "step": 12965 }, { "epoch": 6.049440298507463, "grad_norm": 0.36722200451549336, "learning_rate": 4.2603003711062536e-05, "loss": 0.2975, "step": 12970 }, { "epoch": 6.051772388059701, "grad_norm": 0.36137834383351747, "learning_rate": 4.259657155944136e-05, "loss": 0.2972, "step": 12975 }, { "epoch": 6.05410447761194, "grad_norm": 0.3865012189391187, "learning_rate": 4.259013716310465e-05, "loss": 0.3018, "step": 12980 }, { "epoch": 6.056436567164179, "grad_norm": 0.3831205846353468, "learning_rate": 4.258370052300911e-05, "loss": 0.2969, "step": 12985 }, { "epoch": 6.058768656716418, "grad_norm": 0.3648037173923533, "learning_rate": 4.2577261640111834e-05, "loss": 0.2924, "step": 12990 }, { "epoch": 6.061100746268656, "grad_norm": 0.3565088927880556, "learning_rate": 4.25708205153702e-05, "loss": 0.294, "step": 12995 }, { "epoch": 6.063432835820896, "grad_norm": 0.3663040265821174, "learning_rate": 4.256437714974196e-05, "loss": 0.2915, "step": 13000 }, { "epoch": 6.065764925373134, "grad_norm": 0.35844997590530764, "learning_rate": 4.2557931544185166e-05, "loss": 0.2983, "step": 13005 }, { "epoch": 6.068097014925373, "grad_norm": 0.3733507612906886, "learning_rate": 4.255148369965822e-05, "loss": 0.2897, "step": 13010 }, { "epoch": 6.070429104477612, "grad_norm": 0.3661153388895778, "learning_rate": 4.254503361711987e-05, "loss": 0.2988, "step": 13015 }, { "epoch": 6.072761194029851, "grad_norm": 0.356407683954056, "learning_rate": 4.253858129752916e-05, "loss": 0.2967, "step": 13020 }, { "epoch": 6.075093283582089, "grad_norm": 0.34958542182250424, "learning_rate": 4.2532126741845506e-05, "loss": 0.2954, "step": 13025 }, { "epoch": 6.077425373134329, "grad_norm": 0.36270681112527065, "learning_rate": 4.252566995102864e-05, "loss": 0.2973, "step": 13030 }, { "epoch": 6.079757462686567, "grad_norm": 0.3694114305073947, "learning_rate": 4.25192109260386e-05, "loss": 0.3009, "step": 13035 }, { "epoch": 6.082089552238806, "grad_norm": 0.371665560928459, "learning_rate": 4.251274966783579e-05, "loss": 0.2955, "step": 13040 }, { "epoch": 6.084421641791045, "grad_norm": 0.38324982927796303, "learning_rate": 4.250628617738096e-05, "loss": 0.3077, "step": 13045 }, { "epoch": 6.086753731343284, "grad_norm": 0.37013846095940073, "learning_rate": 4.2499820455635154e-05, "loss": 0.302, "step": 13050 }, { "epoch": 6.089085820895522, "grad_norm": 0.3647663480197423, "learning_rate": 4.2493352503559756e-05, "loss": 0.2975, "step": 13055 }, { "epoch": 6.0914179104477615, "grad_norm": 0.35874306991150345, "learning_rate": 4.24868823221165e-05, "loss": 0.3041, "step": 13060 }, { "epoch": 6.09375, "grad_norm": 0.3919279331770141, "learning_rate": 4.248040991226743e-05, "loss": 0.311, "step": 13065 }, { "epoch": 6.0960820895522385, "grad_norm": 0.3830338290764689, "learning_rate": 4.2473935274974944e-05, "loss": 0.3016, "step": 13070 }, { "epoch": 6.098414179104478, "grad_norm": 0.36302892728323904, "learning_rate": 4.246745841120174e-05, "loss": 0.302, "step": 13075 }, { "epoch": 6.100746268656716, "grad_norm": 0.4092525158695316, "learning_rate": 4.246097932191088e-05, "loss": 0.3043, "step": 13080 }, { "epoch": 6.103078358208955, "grad_norm": 0.4001479322803608, "learning_rate": 4.245449800806574e-05, "loss": 0.3062, "step": 13085 }, { "epoch": 6.105410447761194, "grad_norm": 0.3636022678290163, "learning_rate": 4.2448014470630034e-05, "loss": 0.2965, "step": 13090 }, { "epoch": 6.107742537313433, "grad_norm": 0.3945517199999036, "learning_rate": 4.244152871056779e-05, "loss": 0.2941, "step": 13095 }, { "epoch": 6.110074626865671, "grad_norm": 0.3834596174227562, "learning_rate": 4.2435040728843376e-05, "loss": 0.3065, "step": 13100 }, { "epoch": 6.112406716417911, "grad_norm": 0.38429487139698176, "learning_rate": 4.242855052642151e-05, "loss": 0.3161, "step": 13105 }, { "epoch": 6.114738805970149, "grad_norm": 0.37070770420041177, "learning_rate": 4.2422058104267215e-05, "loss": 0.3105, "step": 13110 }, { "epoch": 6.117070895522388, "grad_norm": 0.390114607876701, "learning_rate": 4.241556346334584e-05, "loss": 0.3067, "step": 13115 }, { "epoch": 6.119402985074627, "grad_norm": 0.3653038797622989, "learning_rate": 4.2409066604623096e-05, "loss": 0.3055, "step": 13120 }, { "epoch": 6.121735074626866, "grad_norm": 0.36557494840927274, "learning_rate": 4.2402567529065e-05, "loss": 0.3102, "step": 13125 }, { "epoch": 6.124067164179104, "grad_norm": 0.36352771968671244, "learning_rate": 4.239606623763789e-05, "loss": 0.3, "step": 13130 }, { "epoch": 6.126399253731344, "grad_norm": 0.3661183565757262, "learning_rate": 4.2389562731308454e-05, "loss": 0.3031, "step": 13135 }, { "epoch": 6.128731343283582, "grad_norm": 0.35326887708423577, "learning_rate": 4.23830570110437e-05, "loss": 0.3016, "step": 13140 }, { "epoch": 6.131063432835821, "grad_norm": 0.35744319387819545, "learning_rate": 4.237654907781096e-05, "loss": 0.3049, "step": 13145 }, { "epoch": 6.13339552238806, "grad_norm": 0.36481180684522385, "learning_rate": 4.237003893257791e-05, "loss": 0.3027, "step": 13150 }, { "epoch": 6.135727611940299, "grad_norm": 0.34955688488532255, "learning_rate": 4.236352657631254e-05, "loss": 0.3025, "step": 13155 }, { "epoch": 6.138059701492537, "grad_norm": 0.3567612147505793, "learning_rate": 4.2357012009983185e-05, "loss": 0.3045, "step": 13160 }, { "epoch": 6.1403917910447765, "grad_norm": 0.3633567785598711, "learning_rate": 4.2350495234558494e-05, "loss": 0.3062, "step": 13165 }, { "epoch": 6.142723880597015, "grad_norm": 0.3770212071055855, "learning_rate": 4.234397625100745e-05, "loss": 0.308, "step": 13170 }, { "epoch": 6.1450559701492535, "grad_norm": 0.36857126297664045, "learning_rate": 4.233745506029934e-05, "loss": 0.2926, "step": 13175 }, { "epoch": 6.147388059701493, "grad_norm": 0.37341768692054167, "learning_rate": 4.2330931663403844e-05, "loss": 0.3105, "step": 13180 }, { "epoch": 6.1497201492537314, "grad_norm": 0.3734437677105757, "learning_rate": 4.232440606129089e-05, "loss": 0.298, "step": 13185 }, { "epoch": 6.15205223880597, "grad_norm": 0.5633515729798091, "learning_rate": 4.231787825493081e-05, "loss": 0.3066, "step": 13190 }, { "epoch": 6.154384328358209, "grad_norm": 0.3550622339098281, "learning_rate": 4.231134824529419e-05, "loss": 0.2931, "step": 13195 }, { "epoch": 6.156716417910448, "grad_norm": 0.3644346093395638, "learning_rate": 4.230481603335201e-05, "loss": 0.3091, "step": 13200 }, { "epoch": 6.159048507462686, "grad_norm": 0.33671785773152246, "learning_rate": 4.229828162007553e-05, "loss": 0.3024, "step": 13205 }, { "epoch": 6.161380597014926, "grad_norm": 0.5695790211348859, "learning_rate": 4.229174500643634e-05, "loss": 0.3083, "step": 13210 }, { "epoch": 6.163712686567164, "grad_norm": 0.3868482996058648, "learning_rate": 4.228520619340641e-05, "loss": 0.3055, "step": 13215 }, { "epoch": 6.166044776119403, "grad_norm": 0.37452957381262947, "learning_rate": 4.227866518195797e-05, "loss": 0.3093, "step": 13220 }, { "epoch": 6.168376865671641, "grad_norm": 0.37133543527614227, "learning_rate": 4.227212197306362e-05, "loss": 0.3056, "step": 13225 }, { "epoch": 6.170708955223881, "grad_norm": 0.370872495199131, "learning_rate": 4.226557656769626e-05, "loss": 0.3071, "step": 13230 }, { "epoch": 6.173041044776119, "grad_norm": 0.3739117125144943, "learning_rate": 4.225902896682914e-05, "loss": 0.2976, "step": 13235 }, { "epoch": 6.175373134328359, "grad_norm": 0.36557502928094676, "learning_rate": 4.225247917143582e-05, "loss": 0.3123, "step": 13240 }, { "epoch": 6.177705223880597, "grad_norm": 0.3727053453869346, "learning_rate": 4.2245927182490194e-05, "loss": 0.3042, "step": 13245 }, { "epoch": 6.180037313432836, "grad_norm": 0.3803592747637171, "learning_rate": 4.223937300096648e-05, "loss": 0.3079, "step": 13250 }, { "epoch": 6.182369402985074, "grad_norm": 0.3571484557375909, "learning_rate": 4.223281662783922e-05, "loss": 0.3029, "step": 13255 }, { "epoch": 6.184701492537314, "grad_norm": 0.3738002462370214, "learning_rate": 4.22262580640833e-05, "loss": 0.308, "step": 13260 }, { "epoch": 6.187033582089552, "grad_norm": 0.36265048492015867, "learning_rate": 4.221969731067388e-05, "loss": 0.3054, "step": 13265 }, { "epoch": 6.189365671641791, "grad_norm": 0.3753796642633425, "learning_rate": 4.221313436858651e-05, "loss": 0.3118, "step": 13270 }, { "epoch": 6.19169776119403, "grad_norm": 0.36694284765209784, "learning_rate": 4.2206569238797025e-05, "loss": 0.3111, "step": 13275 }, { "epoch": 6.1940298507462686, "grad_norm": 0.38533331324559505, "learning_rate": 4.220000192228161e-05, "loss": 0.3023, "step": 13280 }, { "epoch": 6.196361940298507, "grad_norm": 0.3931070529685477, "learning_rate": 4.2193432420016746e-05, "loss": 0.3027, "step": 13285 }, { "epoch": 6.1986940298507465, "grad_norm": 0.38840926980562046, "learning_rate": 4.218686073297926e-05, "loss": 0.3076, "step": 13290 }, { "epoch": 6.201026119402985, "grad_norm": 0.3721568756584136, "learning_rate": 4.218028686214631e-05, "loss": 0.3165, "step": 13295 }, { "epoch": 6.2033582089552235, "grad_norm": 0.36723852580337984, "learning_rate": 4.217371080849535e-05, "loss": 0.3141, "step": 13300 }, { "epoch": 6.205690298507463, "grad_norm": 0.39622641695482586, "learning_rate": 4.216713257300418e-05, "loss": 0.3077, "step": 13305 }, { "epoch": 6.208022388059701, "grad_norm": 0.3860302029057169, "learning_rate": 4.216055215665093e-05, "loss": 0.3108, "step": 13310 }, { "epoch": 6.21035447761194, "grad_norm": 0.3565989937127332, "learning_rate": 4.215396956041404e-05, "loss": 0.3029, "step": 13315 }, { "epoch": 6.212686567164179, "grad_norm": 0.3782856141469653, "learning_rate": 4.2147384785272284e-05, "loss": 0.3011, "step": 13320 }, { "epoch": 6.215018656716418, "grad_norm": 0.3499258424619151, "learning_rate": 4.214079783220474e-05, "loss": 0.3182, "step": 13325 }, { "epoch": 6.217350746268656, "grad_norm": 0.3840536824085021, "learning_rate": 4.213420870219084e-05, "loss": 0.3142, "step": 13330 }, { "epoch": 6.219682835820896, "grad_norm": 0.35696682443013444, "learning_rate": 4.212761739621032e-05, "loss": 0.3063, "step": 13335 }, { "epoch": 6.222014925373134, "grad_norm": 0.3926138271815239, "learning_rate": 4.212102391524324e-05, "loss": 0.3097, "step": 13340 }, { "epoch": 6.224347014925373, "grad_norm": 0.3535257321233668, "learning_rate": 4.211442826027e-05, "loss": 0.3019, "step": 13345 }, { "epoch": 6.226679104477612, "grad_norm": 0.3604882126446137, "learning_rate": 4.210783043227129e-05, "loss": 0.3031, "step": 13350 }, { "epoch": 6.229011194029851, "grad_norm": 0.35278780595697834, "learning_rate": 4.210123043222816e-05, "loss": 0.3095, "step": 13355 }, { "epoch": 6.231343283582089, "grad_norm": 0.36908430351875676, "learning_rate": 4.209462826112195e-05, "loss": 0.2994, "step": 13360 }, { "epoch": 6.233675373134329, "grad_norm": 0.3521048199428409, "learning_rate": 4.2088023919934366e-05, "loss": 0.2957, "step": 13365 }, { "epoch": 6.236007462686567, "grad_norm": 0.36545291652508505, "learning_rate": 4.2081417409647386e-05, "loss": 0.3012, "step": 13370 }, { "epoch": 6.238339552238806, "grad_norm": 0.35103519549194867, "learning_rate": 4.207480873124335e-05, "loss": 0.3172, "step": 13375 }, { "epoch": 6.240671641791045, "grad_norm": 0.350632609652498, "learning_rate": 4.2068197885704904e-05, "loss": 0.3034, "step": 13380 }, { "epoch": 6.243003731343284, "grad_norm": 0.36416579798459175, "learning_rate": 4.2061584874015006e-05, "loss": 0.2999, "step": 13385 }, { "epoch": 6.245335820895522, "grad_norm": 0.35635542134682663, "learning_rate": 4.205496969715696e-05, "loss": 0.3022, "step": 13390 }, { "epoch": 6.2476679104477615, "grad_norm": 0.35732590836950906, "learning_rate": 4.2048352356114366e-05, "loss": 0.3133, "step": 13395 }, { "epoch": 6.25, "grad_norm": 0.34866356814840005, "learning_rate": 4.204173285187117e-05, "loss": 0.3161, "step": 13400 }, { "epoch": 6.2523320895522385, "grad_norm": 0.3373783554002995, "learning_rate": 4.203511118541163e-05, "loss": 0.3059, "step": 13405 }, { "epoch": 6.254664179104478, "grad_norm": 0.34046773545646086, "learning_rate": 4.202848735772031e-05, "loss": 0.3044, "step": 13410 }, { "epoch": 6.256996268656716, "grad_norm": 0.36284142920395673, "learning_rate": 4.202186136978213e-05, "loss": 0.3107, "step": 13415 }, { "epoch": 6.259328358208955, "grad_norm": 0.3969878401920931, "learning_rate": 4.201523322258231e-05, "loss": 0.314, "step": 13420 }, { "epoch": 6.261660447761194, "grad_norm": 0.36022223261229613, "learning_rate": 4.2008602917106365e-05, "loss": 0.3052, "step": 13425 }, { "epoch": 6.263992537313433, "grad_norm": 0.36678044762391215, "learning_rate": 4.2001970454340185e-05, "loss": 0.3034, "step": 13430 }, { "epoch": 6.266324626865671, "grad_norm": 0.35764452223265164, "learning_rate": 4.199533583526994e-05, "loss": 0.3039, "step": 13435 }, { "epoch": 6.268656716417911, "grad_norm": 0.3476623010710145, "learning_rate": 4.1988699060882144e-05, "loss": 0.3059, "step": 13440 }, { "epoch": 6.270988805970149, "grad_norm": 0.3597368602148695, "learning_rate": 4.198206013216361e-05, "loss": 0.3126, "step": 13445 }, { "epoch": 6.273320895522388, "grad_norm": 0.4056695921341811, "learning_rate": 4.197541905010149e-05, "loss": 0.3001, "step": 13450 }, { "epoch": 6.275652985074627, "grad_norm": 0.3494795135653706, "learning_rate": 4.196877581568326e-05, "loss": 0.3049, "step": 13455 }, { "epoch": 6.277985074626866, "grad_norm": 0.3698177398536701, "learning_rate": 4.196213042989668e-05, "loss": 0.3112, "step": 13460 }, { "epoch": 6.280317164179104, "grad_norm": 0.3468755413336929, "learning_rate": 4.195548289372988e-05, "loss": 0.3025, "step": 13465 }, { "epoch": 6.282649253731344, "grad_norm": 0.3547011514768028, "learning_rate": 4.194883320817127e-05, "loss": 0.3131, "step": 13470 }, { "epoch": 6.284981343283582, "grad_norm": 0.3605595832924173, "learning_rate": 4.1942181374209596e-05, "loss": 0.308, "step": 13475 }, { "epoch": 6.287313432835821, "grad_norm": 0.37182906337178845, "learning_rate": 4.193552739283393e-05, "loss": 0.3115, "step": 13480 }, { "epoch": 6.28964552238806, "grad_norm": 0.34952069555517457, "learning_rate": 4.192887126503364e-05, "loss": 0.3102, "step": 13485 }, { "epoch": 6.291977611940299, "grad_norm": 0.35471319814857555, "learning_rate": 4.192221299179845e-05, "loss": 0.3046, "step": 13490 }, { "epoch": 6.294309701492537, "grad_norm": 0.3503425519283247, "learning_rate": 4.191555257411837e-05, "loss": 0.2996, "step": 13495 }, { "epoch": 6.2966417910447765, "grad_norm": 0.3813924358374529, "learning_rate": 4.190889001298373e-05, "loss": 0.3115, "step": 13500 }, { "epoch": 6.298973880597015, "grad_norm": 0.35793076969863247, "learning_rate": 4.190222530938521e-05, "loss": 0.3117, "step": 13505 }, { "epoch": 6.3013059701492535, "grad_norm": 0.3511531064596747, "learning_rate": 4.189555846431377e-05, "loss": 0.3027, "step": 13510 }, { "epoch": 6.303638059701493, "grad_norm": 0.3504128798713619, "learning_rate": 4.188888947876071e-05, "loss": 0.3107, "step": 13515 }, { "epoch": 6.3059701492537314, "grad_norm": 0.3838820172962709, "learning_rate": 4.188221835371766e-05, "loss": 0.3069, "step": 13520 }, { "epoch": 6.30830223880597, "grad_norm": 0.3660656785682886, "learning_rate": 4.187554509017653e-05, "loss": 0.3152, "step": 13525 }, { "epoch": 6.310634328358209, "grad_norm": 0.35916114074078226, "learning_rate": 4.1868869689129584e-05, "loss": 0.3112, "step": 13530 }, { "epoch": 6.312966417910448, "grad_norm": 0.36382116952194393, "learning_rate": 4.186219215156938e-05, "loss": 0.3059, "step": 13535 }, { "epoch": 6.315298507462686, "grad_norm": 0.3563852322612261, "learning_rate": 4.1855512478488816e-05, "loss": 0.3129, "step": 13540 }, { "epoch": 6.317630597014926, "grad_norm": 0.371918146649218, "learning_rate": 4.184883067088108e-05, "loss": 0.3017, "step": 13545 }, { "epoch": 6.319962686567164, "grad_norm": 0.3535154947558169, "learning_rate": 4.184214672973971e-05, "loss": 0.3128, "step": 13550 }, { "epoch": 6.322294776119403, "grad_norm": 0.357525457750877, "learning_rate": 4.183546065605855e-05, "loss": 0.309, "step": 13555 }, { "epoch": 6.324626865671641, "grad_norm": 0.3546403496492115, "learning_rate": 4.182877245083172e-05, "loss": 0.3042, "step": 13560 }, { "epoch": 6.326958955223881, "grad_norm": 0.3603514565673951, "learning_rate": 4.1822082115053717e-05, "loss": 0.315, "step": 13565 }, { "epoch": 6.329291044776119, "grad_norm": 0.371358178869777, "learning_rate": 4.181538964971933e-05, "loss": 0.3114, "step": 13570 }, { "epoch": 6.331623134328359, "grad_norm": 0.3783594056108107, "learning_rate": 4.180869505582366e-05, "loss": 0.3009, "step": 13575 }, { "epoch": 6.333955223880597, "grad_norm": 0.35851215260794556, "learning_rate": 4.180199833436213e-05, "loss": 0.3071, "step": 13580 }, { "epoch": 6.336287313432836, "grad_norm": 0.3413820037235899, "learning_rate": 4.179529948633047e-05, "loss": 0.3094, "step": 13585 }, { "epoch": 6.338619402985074, "grad_norm": 0.37510600400552174, "learning_rate": 4.178859851272475e-05, "loss": 0.3151, "step": 13590 }, { "epoch": 6.340951492537314, "grad_norm": 0.37402749064775026, "learning_rate": 4.1781895414541326e-05, "loss": 0.3138, "step": 13595 }, { "epoch": 6.343283582089552, "grad_norm": 0.36882383528555074, "learning_rate": 4.1775190192776905e-05, "loss": 0.3073, "step": 13600 }, { "epoch": 6.345615671641791, "grad_norm": 0.3600492996291712, "learning_rate": 4.176848284842847e-05, "loss": 0.3205, "step": 13605 }, { "epoch": 6.34794776119403, "grad_norm": 0.37395454550831436, "learning_rate": 4.176177338249334e-05, "loss": 0.3066, "step": 13610 }, { "epoch": 6.3502798507462686, "grad_norm": 0.36703411276711817, "learning_rate": 4.1755061795969155e-05, "loss": 0.3056, "step": 13615 }, { "epoch": 6.352611940298507, "grad_norm": 0.38774068192996564, "learning_rate": 4.1748348089853864e-05, "loss": 0.3182, "step": 13620 }, { "epoch": 6.3549440298507465, "grad_norm": 0.3509078391341999, "learning_rate": 4.1741632265145715e-05, "loss": 0.3107, "step": 13625 }, { "epoch": 6.357276119402985, "grad_norm": 0.3427121984397021, "learning_rate": 4.173491432284332e-05, "loss": 0.3198, "step": 13630 }, { "epoch": 6.3596082089552235, "grad_norm": 0.3617491731780856, "learning_rate": 4.172819426394554e-05, "loss": 0.3245, "step": 13635 }, { "epoch": 6.361940298507463, "grad_norm": 0.347070779785552, "learning_rate": 4.172147208945159e-05, "loss": 0.308, "step": 13640 }, { "epoch": 6.364272388059701, "grad_norm": 0.3451731110635797, "learning_rate": 4.171474780036101e-05, "loss": 0.3048, "step": 13645 }, { "epoch": 6.36660447761194, "grad_norm": 0.37364759343863585, "learning_rate": 4.170802139767362e-05, "loss": 0.3077, "step": 13650 }, { "epoch": 6.368936567164179, "grad_norm": 0.3706899535082302, "learning_rate": 4.170129288238958e-05, "loss": 0.3199, "step": 13655 }, { "epoch": 6.371268656716418, "grad_norm": 0.3675637351221338, "learning_rate": 4.1694562255509354e-05, "loss": 0.3185, "step": 13660 }, { "epoch": 6.373600746268656, "grad_norm": 0.3352975884130223, "learning_rate": 4.1687829518033726e-05, "loss": 0.3055, "step": 13665 }, { "epoch": 6.375932835820896, "grad_norm": 0.3437811509513926, "learning_rate": 4.168109467096378e-05, "loss": 0.3139, "step": 13670 }, { "epoch": 6.378264925373134, "grad_norm": 0.35277330346116403, "learning_rate": 4.1674357715300924e-05, "loss": 0.2937, "step": 13675 }, { "epoch": 6.380597014925373, "grad_norm": 0.36956156576814025, "learning_rate": 4.1667618652046894e-05, "loss": 0.3183, "step": 13680 }, { "epoch": 6.382929104477612, "grad_norm": 0.36932829367948933, "learning_rate": 4.1660877482203704e-05, "loss": 0.3143, "step": 13685 }, { "epoch": 6.385261194029851, "grad_norm": 0.38505925193656937, "learning_rate": 4.165413420677372e-05, "loss": 0.3056, "step": 13690 }, { "epoch": 6.387593283582089, "grad_norm": 0.3593592920407849, "learning_rate": 4.164738882675958e-05, "loss": 0.3236, "step": 13695 }, { "epoch": 6.389925373134329, "grad_norm": 0.3723164846670845, "learning_rate": 4.164064134316428e-05, "loss": 0.3225, "step": 13700 }, { "epoch": 6.392257462686567, "grad_norm": 0.36638812333962656, "learning_rate": 4.163389175699109e-05, "loss": 0.314, "step": 13705 }, { "epoch": 6.394589552238806, "grad_norm": 0.35800322408344293, "learning_rate": 4.162714006924362e-05, "loss": 0.3276, "step": 13710 }, { "epoch": 6.396921641791045, "grad_norm": 0.3689192473446033, "learning_rate": 4.1620386280925776e-05, "loss": 0.3149, "step": 13715 }, { "epoch": 6.399253731343284, "grad_norm": 0.3597735048718556, "learning_rate": 4.161363039304177e-05, "loss": 0.3089, "step": 13720 }, { "epoch": 6.401585820895522, "grad_norm": 0.36388492314977655, "learning_rate": 4.160687240659616e-05, "loss": 0.3201, "step": 13725 }, { "epoch": 6.4039179104477615, "grad_norm": 0.3557666281911261, "learning_rate": 4.160011232259378e-05, "loss": 0.31, "step": 13730 }, { "epoch": 6.40625, "grad_norm": 0.36324906142496, "learning_rate": 4.1593350142039806e-05, "loss": 0.3149, "step": 13735 }, { "epoch": 6.4085820895522385, "grad_norm": 0.3616049896092365, "learning_rate": 4.158658586593969e-05, "loss": 0.3114, "step": 13740 }, { "epoch": 6.410914179104478, "grad_norm": 0.41053362346614614, "learning_rate": 4.157981949529922e-05, "loss": 0.3155, "step": 13745 }, { "epoch": 6.413246268656716, "grad_norm": 0.41507149408780614, "learning_rate": 4.1573051031124486e-05, "loss": 0.3127, "step": 13750 }, { "epoch": 6.415578358208955, "grad_norm": 0.36586852053532937, "learning_rate": 4.156628047442191e-05, "loss": 0.3084, "step": 13755 }, { "epoch": 6.417910447761194, "grad_norm": 0.3561017930503077, "learning_rate": 4.155950782619819e-05, "loss": 0.3118, "step": 13760 }, { "epoch": 6.420242537313433, "grad_norm": 0.38606334958256083, "learning_rate": 4.155273308746037e-05, "loss": 0.3269, "step": 13765 }, { "epoch": 6.422574626865671, "grad_norm": 0.3630463250906746, "learning_rate": 4.1545956259215776e-05, "loss": 0.3221, "step": 13770 }, { "epoch": 6.424906716417911, "grad_norm": 0.37115348650078606, "learning_rate": 4.153917734247208e-05, "loss": 0.3161, "step": 13775 }, { "epoch": 6.427238805970149, "grad_norm": 0.3529372756052765, "learning_rate": 4.153239633823721e-05, "loss": 0.3177, "step": 13780 }, { "epoch": 6.429570895522388, "grad_norm": 0.36597055717529275, "learning_rate": 4.152561324751945e-05, "loss": 0.3202, "step": 13785 }, { "epoch": 6.431902985074627, "grad_norm": 0.33448216392084584, "learning_rate": 4.151882807132739e-05, "loss": 0.3132, "step": 13790 }, { "epoch": 6.434235074626866, "grad_norm": 0.3604970209210281, "learning_rate": 4.1512040810669905e-05, "loss": 0.3218, "step": 13795 }, { "epoch": 6.436567164179104, "grad_norm": 0.3446694328054215, "learning_rate": 4.1505251466556206e-05, "loss": 0.3077, "step": 13800 }, { "epoch": 6.438899253731344, "grad_norm": 0.3463882010063856, "learning_rate": 4.1498460039995805e-05, "loss": 0.309, "step": 13805 }, { "epoch": 6.441231343283582, "grad_norm": 0.37207839877527304, "learning_rate": 4.149166653199852e-05, "loss": 0.3233, "step": 13810 }, { "epoch": 6.443563432835821, "grad_norm": 0.34304653061931284, "learning_rate": 4.148487094357447e-05, "loss": 0.312, "step": 13815 }, { "epoch": 6.44589552238806, "grad_norm": 0.3828092907107444, "learning_rate": 4.1478073275734105e-05, "loss": 0.3167, "step": 13820 }, { "epoch": 6.448227611940299, "grad_norm": 0.3453180480024246, "learning_rate": 4.147127352948817e-05, "loss": 0.3025, "step": 13825 }, { "epoch": 6.450559701492537, "grad_norm": 0.3430081661227673, "learning_rate": 4.146447170584772e-05, "loss": 0.3184, "step": 13830 }, { "epoch": 6.4528917910447765, "grad_norm": 0.36274639440752776, "learning_rate": 4.145766780582413e-05, "loss": 0.3112, "step": 13835 }, { "epoch": 6.455223880597015, "grad_norm": 0.3361930084330603, "learning_rate": 4.145086183042907e-05, "loss": 0.3025, "step": 13840 }, { "epoch": 6.4575559701492535, "grad_norm": 0.34532661907484635, "learning_rate": 4.1444053780674506e-05, "loss": 0.3059, "step": 13845 }, { "epoch": 6.459888059701493, "grad_norm": 0.36633418004368057, "learning_rate": 4.143724365757275e-05, "loss": 0.3158, "step": 13850 }, { "epoch": 6.4622201492537314, "grad_norm": 0.36432661238615516, "learning_rate": 4.143043146213641e-05, "loss": 0.3147, "step": 13855 }, { "epoch": 6.46455223880597, "grad_norm": 0.3793693605146012, "learning_rate": 4.142361719537838e-05, "loss": 0.3132, "step": 13860 }, { "epoch": 6.466884328358209, "grad_norm": 0.34509198353910187, "learning_rate": 4.1416800858311875e-05, "loss": 0.3168, "step": 13865 }, { "epoch": 6.469216417910448, "grad_norm": 0.3669657524016061, "learning_rate": 4.140998245195042e-05, "loss": 0.321, "step": 13870 }, { "epoch": 6.471548507462686, "grad_norm": 0.36293177301215285, "learning_rate": 4.1403161977307845e-05, "loss": 0.3137, "step": 13875 }, { "epoch": 6.473880597014926, "grad_norm": 0.35646951793702286, "learning_rate": 4.13963394353983e-05, "loss": 0.3097, "step": 13880 }, { "epoch": 6.476212686567164, "grad_norm": 0.36539936967899533, "learning_rate": 4.1389514827236214e-05, "loss": 0.3161, "step": 13885 }, { "epoch": 6.478544776119403, "grad_norm": 0.3691122098503804, "learning_rate": 4.138268815383636e-05, "loss": 0.3187, "step": 13890 }, { "epoch": 6.480876865671641, "grad_norm": 0.3494954500072802, "learning_rate": 4.137585941621379e-05, "loss": 0.3133, "step": 13895 }, { "epoch": 6.483208955223881, "grad_norm": 0.36740693330458, "learning_rate": 4.136902861538387e-05, "loss": 0.3234, "step": 13900 }, { "epoch": 6.485541044776119, "grad_norm": 0.3528809082986268, "learning_rate": 4.136219575236228e-05, "loss": 0.3149, "step": 13905 }, { "epoch": 6.487873134328359, "grad_norm": 0.35289737858965003, "learning_rate": 4.135536082816499e-05, "loss": 0.3168, "step": 13910 }, { "epoch": 6.490205223880597, "grad_norm": 0.3634039099445142, "learning_rate": 4.13485238438083e-05, "loss": 0.3174, "step": 13915 }, { "epoch": 6.492537313432836, "grad_norm": 0.33536766136670976, "learning_rate": 4.13416848003088e-05, "loss": 0.3058, "step": 13920 }, { "epoch": 6.494869402985074, "grad_norm": 0.34876005541524235, "learning_rate": 4.133484369868339e-05, "loss": 0.3114, "step": 13925 }, { "epoch": 6.497201492537314, "grad_norm": 0.3507987311841924, "learning_rate": 4.132800053994927e-05, "loss": 0.3086, "step": 13930 }, { "epoch": 6.499533582089552, "grad_norm": 0.3918306118057363, "learning_rate": 4.132115532512397e-05, "loss": 0.3191, "step": 13935 }, { "epoch": 6.5018656716417915, "grad_norm": 0.3729813873738966, "learning_rate": 4.1314308055225295e-05, "loss": 0.3283, "step": 13940 }, { "epoch": 6.50419776119403, "grad_norm": 0.347371439184102, "learning_rate": 4.130745873127136e-05, "loss": 0.3093, "step": 13945 }, { "epoch": 6.5065298507462686, "grad_norm": 0.34023267017226505, "learning_rate": 4.1300607354280605e-05, "loss": 0.3067, "step": 13950 }, { "epoch": 6.508861940298507, "grad_norm": 0.3708224764800474, "learning_rate": 4.129375392527177e-05, "loss": 0.3247, "step": 13955 }, { "epoch": 6.5111940298507465, "grad_norm": 0.34160907535924306, "learning_rate": 4.128689844526388e-05, "loss": 0.317, "step": 13960 }, { "epoch": 6.513526119402985, "grad_norm": 0.35572331257467993, "learning_rate": 4.128004091527629e-05, "loss": 0.3105, "step": 13965 }, { "epoch": 6.5158582089552235, "grad_norm": 0.3785876190440269, "learning_rate": 4.1273181336328646e-05, "loss": 0.3122, "step": 13970 }, { "epoch": 6.518190298507463, "grad_norm": 0.3730026106391999, "learning_rate": 4.1266319709440895e-05, "loss": 0.3112, "step": 13975 }, { "epoch": 6.520522388059701, "grad_norm": 0.3804462773170987, "learning_rate": 4.125945603563331e-05, "loss": 0.3195, "step": 13980 }, { "epoch": 6.52285447761194, "grad_norm": 0.3488329106073035, "learning_rate": 4.1252590315926435e-05, "loss": 0.313, "step": 13985 }, { "epoch": 6.525186567164179, "grad_norm": 0.3550087744126233, "learning_rate": 4.124572255134115e-05, "loss": 0.3159, "step": 13990 }, { "epoch": 6.527518656716418, "grad_norm": 0.33614163133769254, "learning_rate": 4.123885274289862e-05, "loss": 0.3274, "step": 13995 }, { "epoch": 6.529850746268656, "grad_norm": 0.3409109020153499, "learning_rate": 4.123198089162033e-05, "loss": 0.31, "step": 14000 }, { "epoch": 6.532182835820896, "grad_norm": 0.31696435348858315, "learning_rate": 4.122510699852803e-05, "loss": 0.314, "step": 14005 }, { "epoch": 6.534514925373134, "grad_norm": 0.3564016725900514, "learning_rate": 4.121823106464384e-05, "loss": 0.3098, "step": 14010 }, { "epoch": 6.536847014925373, "grad_norm": 0.3623537481722302, "learning_rate": 4.121135309099013e-05, "loss": 0.3159, "step": 14015 }, { "epoch": 6.539179104477612, "grad_norm": 0.37984360136463097, "learning_rate": 4.1204473078589575e-05, "loss": 0.3212, "step": 14020 }, { "epoch": 6.541511194029851, "grad_norm": 0.35184292790258903, "learning_rate": 4.119759102846518e-05, "loss": 0.3098, "step": 14025 }, { "epoch": 6.543843283582089, "grad_norm": 0.34231035856068265, "learning_rate": 4.119070694164024e-05, "loss": 0.3151, "step": 14030 }, { "epoch": 6.546175373134329, "grad_norm": 0.3430842248743943, "learning_rate": 4.1183820819138355e-05, "loss": 0.3181, "step": 14035 }, { "epoch": 6.548507462686567, "grad_norm": 0.3557286946985154, "learning_rate": 4.117693266198342e-05, "loss": 0.3121, "step": 14040 }, { "epoch": 6.550839552238806, "grad_norm": 0.3575247007136557, "learning_rate": 4.117004247119964e-05, "loss": 0.3171, "step": 14045 }, { "epoch": 6.553171641791045, "grad_norm": 0.34784873109499587, "learning_rate": 4.116315024781152e-05, "loss": 0.3208, "step": 14050 }, { "epoch": 6.555503731343284, "grad_norm": 0.3580596725481573, "learning_rate": 4.115625599284386e-05, "loss": 0.3143, "step": 14055 }, { "epoch": 6.557835820895522, "grad_norm": 0.3455796055142391, "learning_rate": 4.114935970732178e-05, "loss": 0.3135, "step": 14060 }, { "epoch": 6.5601679104477615, "grad_norm": 0.3463574707781244, "learning_rate": 4.114246139227069e-05, "loss": 0.3174, "step": 14065 }, { "epoch": 6.5625, "grad_norm": 0.3542089472143169, "learning_rate": 4.113556104871631e-05, "loss": 0.3192, "step": 14070 }, { "epoch": 6.5648320895522385, "grad_norm": 0.36010583464312784, "learning_rate": 4.112865867768464e-05, "loss": 0.3178, "step": 14075 }, { "epoch": 6.567164179104478, "grad_norm": 0.3601782107178461, "learning_rate": 4.112175428020199e-05, "loss": 0.3126, "step": 14080 }, { "epoch": 6.569496268656716, "grad_norm": 0.35423432677631883, "learning_rate": 4.1114847857295006e-05, "loss": 0.3159, "step": 14085 }, { "epoch": 6.571828358208955, "grad_norm": 0.37273747348472835, "learning_rate": 4.110793940999059e-05, "loss": 0.3244, "step": 14090 }, { "epoch": 6.574160447761194, "grad_norm": 0.34731737576119515, "learning_rate": 4.110102893931597e-05, "loss": 0.3264, "step": 14095 }, { "epoch": 6.576492537313433, "grad_norm": 0.3830704880306235, "learning_rate": 4.1094116446298645e-05, "loss": 0.3251, "step": 14100 }, { "epoch": 6.578824626865671, "grad_norm": 0.37797059271035327, "learning_rate": 4.1087201931966463e-05, "loss": 0.324, "step": 14105 }, { "epoch": 6.581156716417911, "grad_norm": 0.3411876811266971, "learning_rate": 4.108028539734753e-05, "loss": 0.3144, "step": 14110 }, { "epoch": 6.583488805970149, "grad_norm": 0.3567926834732331, "learning_rate": 4.1073366843470285e-05, "loss": 0.3147, "step": 14115 }, { "epoch": 6.585820895522388, "grad_norm": 0.3489032095606202, "learning_rate": 4.1066446271363426e-05, "loss": 0.3179, "step": 14120 }, { "epoch": 6.588152985074627, "grad_norm": 0.3366434915190471, "learning_rate": 4.1059523682056e-05, "loss": 0.3181, "step": 14125 }, { "epoch": 6.590485074626866, "grad_norm": 0.355278664165609, "learning_rate": 4.1052599076577306e-05, "loss": 0.309, "step": 14130 }, { "epoch": 6.592817164179104, "grad_norm": 0.35488591551270693, "learning_rate": 4.104567245595699e-05, "loss": 0.3286, "step": 14135 }, { "epoch": 6.595149253731344, "grad_norm": 0.3783438673194741, "learning_rate": 4.103874382122496e-05, "loss": 0.3179, "step": 14140 }, { "epoch": 6.597481343283582, "grad_norm": 0.35309975050332754, "learning_rate": 4.103181317341144e-05, "loss": 0.319, "step": 14145 }, { "epoch": 6.599813432835821, "grad_norm": 0.3552658191241982, "learning_rate": 4.1024880513546955e-05, "loss": 0.3229, "step": 14150 }, { "epoch": 6.60214552238806, "grad_norm": 0.33485337041392654, "learning_rate": 4.101794584266232e-05, "loss": 0.3156, "step": 14155 }, { "epoch": 6.604477611940299, "grad_norm": 0.3739819128953085, "learning_rate": 4.1011009161788655e-05, "loss": 0.3226, "step": 14160 }, { "epoch": 6.606809701492537, "grad_norm": 0.35597889584045217, "learning_rate": 4.100407047195738e-05, "loss": 0.3124, "step": 14165 }, { "epoch": 6.6091417910447765, "grad_norm": 0.3622635733303684, "learning_rate": 4.099712977420021e-05, "loss": 0.3212, "step": 14170 }, { "epoch": 6.611473880597015, "grad_norm": 0.369280077678431, "learning_rate": 4.099018706954916e-05, "loss": 0.3258, "step": 14175 }, { "epoch": 6.6138059701492535, "grad_norm": 0.3533709315925628, "learning_rate": 4.098324235903655e-05, "loss": 0.325, "step": 14180 }, { "epoch": 6.616138059701493, "grad_norm": 0.3635462224401335, "learning_rate": 4.0976295643694986e-05, "loss": 0.334, "step": 14185 }, { "epoch": 6.6184701492537314, "grad_norm": 0.3561046385227732, "learning_rate": 4.0969346924557374e-05, "loss": 0.3233, "step": 14190 }, { "epoch": 6.62080223880597, "grad_norm": 0.36277646646736655, "learning_rate": 4.096239620265693e-05, "loss": 0.3177, "step": 14195 }, { "epoch": 6.6231343283582085, "grad_norm": 0.3688003974289781, "learning_rate": 4.095544347902715e-05, "loss": 0.3198, "step": 14200 }, { "epoch": 6.625466417910448, "grad_norm": 0.3438516348674343, "learning_rate": 4.0948488754701846e-05, "loss": 0.3137, "step": 14205 }, { "epoch": 6.627798507462686, "grad_norm": 0.36500152072876896, "learning_rate": 4.094153203071512e-05, "loss": 0.3133, "step": 14210 }, { "epoch": 6.630130597014926, "grad_norm": 0.3670148139985481, "learning_rate": 4.0934573308101376e-05, "loss": 0.3134, "step": 14215 }, { "epoch": 6.632462686567164, "grad_norm": 0.36044335264970107, "learning_rate": 4.092761258789529e-05, "loss": 0.3174, "step": 14220 }, { "epoch": 6.634794776119403, "grad_norm": 0.34843119726959354, "learning_rate": 4.092064987113186e-05, "loss": 0.3134, "step": 14225 }, { "epoch": 6.637126865671641, "grad_norm": 0.362040618411942, "learning_rate": 4.091368515884638e-05, "loss": 0.3278, "step": 14230 }, { "epoch": 6.639458955223881, "grad_norm": 0.35051667793341706, "learning_rate": 4.0906718452074435e-05, "loss": 0.3134, "step": 14235 }, { "epoch": 6.641791044776119, "grad_norm": 0.37204448415983815, "learning_rate": 4.089974975185192e-05, "loss": 0.3161, "step": 14240 }, { "epoch": 6.644123134328359, "grad_norm": 0.3673022385252952, "learning_rate": 4.0892779059214994e-05, "loss": 0.3208, "step": 14245 }, { "epoch": 6.646455223880597, "grad_norm": 0.34001178485429434, "learning_rate": 4.088580637520015e-05, "loss": 0.307, "step": 14250 }, { "epoch": 6.648787313432836, "grad_norm": 0.35045306893980854, "learning_rate": 4.087883170084414e-05, "loss": 0.3174, "step": 14255 }, { "epoch": 6.651119402985074, "grad_norm": 0.3563521410149195, "learning_rate": 4.087185503718404e-05, "loss": 0.3123, "step": 14260 }, { "epoch": 6.653451492537314, "grad_norm": 0.3520792602639194, "learning_rate": 4.0864876385257225e-05, "loss": 0.3107, "step": 14265 }, { "epoch": 6.655783582089552, "grad_norm": 0.35199051541832593, "learning_rate": 4.0857895746101335e-05, "loss": 0.3195, "step": 14270 }, { "epoch": 6.6581156716417915, "grad_norm": 0.3547485837387309, "learning_rate": 4.085091312075434e-05, "loss": 0.3223, "step": 14275 }, { "epoch": 6.66044776119403, "grad_norm": 0.36442811997528435, "learning_rate": 4.084392851025447e-05, "loss": 0.3169, "step": 14280 }, { "epoch": 6.6627798507462686, "grad_norm": 0.33361004342333606, "learning_rate": 4.083694191564028e-05, "loss": 0.3192, "step": 14285 }, { "epoch": 6.665111940298507, "grad_norm": 0.36597667246046955, "learning_rate": 4.082995333795063e-05, "loss": 0.3227, "step": 14290 }, { "epoch": 6.6674440298507465, "grad_norm": 0.351235287431919, "learning_rate": 4.0822962778224613e-05, "loss": 0.3157, "step": 14295 }, { "epoch": 6.669776119402985, "grad_norm": 0.3662387328412152, "learning_rate": 4.081597023750169e-05, "loss": 0.3215, "step": 14300 }, { "epoch": 6.6721082089552235, "grad_norm": 0.3448648645392874, "learning_rate": 4.0808975716821574e-05, "loss": 0.3145, "step": 14305 }, { "epoch": 6.674440298507463, "grad_norm": 0.3527422664137503, "learning_rate": 4.0801979217224285e-05, "loss": 0.308, "step": 14310 }, { "epoch": 6.676772388059701, "grad_norm": 0.35844536543214367, "learning_rate": 4.079498073975013e-05, "loss": 0.322, "step": 14315 }, { "epoch": 6.67910447761194, "grad_norm": 0.34948612804318585, "learning_rate": 4.078798028543974e-05, "loss": 0.315, "step": 14320 }, { "epoch": 6.681436567164179, "grad_norm": 0.32957516330876585, "learning_rate": 4.078097785533398e-05, "loss": 0.3187, "step": 14325 }, { "epoch": 6.683768656716418, "grad_norm": 0.37627974324610763, "learning_rate": 4.0773973450474055e-05, "loss": 0.334, "step": 14330 }, { "epoch": 6.686100746268656, "grad_norm": 0.3671190479003281, "learning_rate": 4.076696707190147e-05, "loss": 0.3236, "step": 14335 }, { "epoch": 6.688432835820896, "grad_norm": 0.3579115812141965, "learning_rate": 4.0759958720658e-05, "loss": 0.323, "step": 14340 }, { "epoch": 6.690764925373134, "grad_norm": 0.3671402707307194, "learning_rate": 4.07529483977857e-05, "loss": 0.3269, "step": 14345 }, { "epoch": 6.693097014925373, "grad_norm": 0.3441226013276048, "learning_rate": 4.074593610432695e-05, "loss": 0.3219, "step": 14350 }, { "epoch": 6.695429104477612, "grad_norm": 0.3501565075337504, "learning_rate": 4.073892184132442e-05, "loss": 0.3112, "step": 14355 }, { "epoch": 6.697761194029851, "grad_norm": 0.37482713955935204, "learning_rate": 4.073190560982106e-05, "loss": 0.3201, "step": 14360 }, { "epoch": 6.700093283582089, "grad_norm": 0.3870700368314624, "learning_rate": 4.072488741086011e-05, "loss": 0.3285, "step": 14365 }, { "epoch": 6.702425373134329, "grad_norm": 0.36044457545527603, "learning_rate": 4.071786724548511e-05, "loss": 0.3138, "step": 14370 }, { "epoch": 6.704757462686567, "grad_norm": 0.3446394953347121, "learning_rate": 4.07108451147399e-05, "loss": 0.3184, "step": 14375 }, { "epoch": 6.707089552238806, "grad_norm": 0.34148580008024015, "learning_rate": 4.07038210196686e-05, "loss": 0.3186, "step": 14380 }, { "epoch": 6.709421641791045, "grad_norm": 0.36059676968987076, "learning_rate": 4.0696794961315605e-05, "loss": 0.3252, "step": 14385 }, { "epoch": 6.711753731343284, "grad_norm": 0.3565905127769683, "learning_rate": 4.068976694072565e-05, "loss": 0.3217, "step": 14390 }, { "epoch": 6.714085820895522, "grad_norm": 0.3519812064646689, "learning_rate": 4.068273695894373e-05, "loss": 0.3147, "step": 14395 }, { "epoch": 6.7164179104477615, "grad_norm": 0.3403193337195221, "learning_rate": 4.067570501701513e-05, "loss": 0.3105, "step": 14400 }, { "epoch": 6.71875, "grad_norm": 0.3574092684794178, "learning_rate": 4.066867111598542e-05, "loss": 0.3258, "step": 14405 }, { "epoch": 6.7210820895522385, "grad_norm": 0.3541254123229967, "learning_rate": 4.0661635256900505e-05, "loss": 0.3176, "step": 14410 }, { "epoch": 6.723414179104478, "grad_norm": 0.36373791951304413, "learning_rate": 4.065459744080652e-05, "loss": 0.3239, "step": 14415 }, { "epoch": 6.725746268656716, "grad_norm": 0.3392088648618669, "learning_rate": 4.064755766874993e-05, "loss": 0.3176, "step": 14420 }, { "epoch": 6.728078358208955, "grad_norm": 0.3450339775688847, "learning_rate": 4.064051594177749e-05, "loss": 0.3174, "step": 14425 }, { "epoch": 6.730410447761194, "grad_norm": 0.3878340843890398, "learning_rate": 4.0633472260936224e-05, "loss": 0.3179, "step": 14430 }, { "epoch": 6.732742537313433, "grad_norm": 0.3611700151735808, "learning_rate": 4.0626426627273474e-05, "loss": 0.3246, "step": 14435 }, { "epoch": 6.735074626865671, "grad_norm": 0.3600687321890789, "learning_rate": 4.061937904183685e-05, "loss": 0.3228, "step": 14440 }, { "epoch": 6.737406716417911, "grad_norm": 0.34308526491828384, "learning_rate": 4.061232950567427e-05, "loss": 0.3246, "step": 14445 }, { "epoch": 6.739738805970149, "grad_norm": 0.35426543647311354, "learning_rate": 4.060527801983391e-05, "loss": 0.3155, "step": 14450 }, { "epoch": 6.742070895522388, "grad_norm": 0.34158238224780374, "learning_rate": 4.0598224585364276e-05, "loss": 0.3191, "step": 14455 }, { "epoch": 6.744402985074627, "grad_norm": 0.35514874634470217, "learning_rate": 4.0591169203314145e-05, "loss": 0.3074, "step": 14460 }, { "epoch": 6.746735074626866, "grad_norm": 0.3630226134126628, "learning_rate": 4.058411187473257e-05, "loss": 0.3227, "step": 14465 }, { "epoch": 6.749067164179104, "grad_norm": 0.3563801488190617, "learning_rate": 4.057705260066894e-05, "loss": 0.324, "step": 14470 }, { "epoch": 6.751399253731344, "grad_norm": 0.3647670688137035, "learning_rate": 4.056999138217287e-05, "loss": 0.3176, "step": 14475 }, { "epoch": 6.753731343283582, "grad_norm": 0.3472201155385524, "learning_rate": 4.056292822029432e-05, "loss": 0.3156, "step": 14480 }, { "epoch": 6.756063432835821, "grad_norm": 0.3360459133134726, "learning_rate": 4.055586311608349e-05, "loss": 0.3121, "step": 14485 }, { "epoch": 6.75839552238806, "grad_norm": 0.33915080532614883, "learning_rate": 4.05487960705909e-05, "loss": 0.3199, "step": 14490 }, { "epoch": 6.760727611940299, "grad_norm": 0.34936666685890155, "learning_rate": 4.054172708486737e-05, "loss": 0.3145, "step": 14495 }, { "epoch": 6.763059701492537, "grad_norm": 0.33867262211433785, "learning_rate": 4.053465615996397e-05, "loss": 0.3239, "step": 14500 }, { "epoch": 6.7653917910447765, "grad_norm": 0.3372021226474374, "learning_rate": 4.05275832969321e-05, "loss": 0.3291, "step": 14505 }, { "epoch": 6.767723880597015, "grad_norm": 0.3761637792105787, "learning_rate": 4.0520508496823395e-05, "loss": 0.319, "step": 14510 }, { "epoch": 6.7700559701492535, "grad_norm": 0.3395465551867107, "learning_rate": 4.051343176068984e-05, "loss": 0.3051, "step": 14515 }, { "epoch": 6.772388059701493, "grad_norm": 0.3486289625064907, "learning_rate": 4.050635308958366e-05, "loss": 0.3196, "step": 14520 }, { "epoch": 6.7747201492537314, "grad_norm": 0.3530566006021559, "learning_rate": 4.04992724845574e-05, "loss": 0.3249, "step": 14525 }, { "epoch": 6.77705223880597, "grad_norm": 0.37217626071647236, "learning_rate": 4.0492189946663864e-05, "loss": 0.3253, "step": 14530 }, { "epoch": 6.7793843283582085, "grad_norm": 0.37676270564376674, "learning_rate": 4.048510547695616e-05, "loss": 0.31, "step": 14535 }, { "epoch": 6.781716417910448, "grad_norm": 0.3529923544995505, "learning_rate": 4.047801907648769e-05, "loss": 0.3211, "step": 14540 }, { "epoch": 6.784048507462686, "grad_norm": 0.3719762652725489, "learning_rate": 4.047093074631213e-05, "loss": 0.3246, "step": 14545 }, { "epoch": 6.786380597014926, "grad_norm": 0.3533254790293355, "learning_rate": 4.046384048748344e-05, "loss": 0.3254, "step": 14550 }, { "epoch": 6.788712686567164, "grad_norm": 0.3611672812139889, "learning_rate": 4.045674830105587e-05, "loss": 0.3304, "step": 14555 }, { "epoch": 6.791044776119403, "grad_norm": 0.3628341125312176, "learning_rate": 4.0449654188083985e-05, "loss": 0.3138, "step": 14560 }, { "epoch": 6.793376865671641, "grad_norm": 0.3534038783313159, "learning_rate": 4.0442558149622586e-05, "loss": 0.3203, "step": 14565 }, { "epoch": 6.795708955223881, "grad_norm": 0.351943063412137, "learning_rate": 4.04354601867268e-05, "loss": 0.3202, "step": 14570 }, { "epoch": 6.798041044776119, "grad_norm": 0.33716610392212937, "learning_rate": 4.0428360300452024e-05, "loss": 0.3086, "step": 14575 }, { "epoch": 6.800373134328359, "grad_norm": 0.34352658725675145, "learning_rate": 4.042125849185394e-05, "loss": 0.3249, "step": 14580 }, { "epoch": 6.802705223880597, "grad_norm": 0.3553889971241233, "learning_rate": 4.0414154761988506e-05, "loss": 0.3271, "step": 14585 }, { "epoch": 6.805037313432836, "grad_norm": 0.35541967667406316, "learning_rate": 4.040704911191201e-05, "loss": 0.3173, "step": 14590 }, { "epoch": 6.807369402985074, "grad_norm": 0.3461379209602243, "learning_rate": 4.0399941542680956e-05, "loss": 0.3212, "step": 14595 }, { "epoch": 6.809701492537314, "grad_norm": 0.34136602598239785, "learning_rate": 4.0392832055352205e-05, "loss": 0.3187, "step": 14600 }, { "epoch": 6.812033582089552, "grad_norm": 0.3421972480942586, "learning_rate": 4.038572065098286e-05, "loss": 0.3214, "step": 14605 }, { "epoch": 6.8143656716417915, "grad_norm": 0.3485602584230024, "learning_rate": 4.0378607330630304e-05, "loss": 0.3184, "step": 14610 }, { "epoch": 6.81669776119403, "grad_norm": 0.37152657660821614, "learning_rate": 4.037149209535222e-05, "loss": 0.3308, "step": 14615 }, { "epoch": 6.8190298507462686, "grad_norm": 0.3370432219852723, "learning_rate": 4.036437494620661e-05, "loss": 0.3192, "step": 14620 }, { "epoch": 6.821361940298507, "grad_norm": 0.3468459146895508, "learning_rate": 4.0357255884251694e-05, "loss": 0.3205, "step": 14625 }, { "epoch": 6.8236940298507465, "grad_norm": 0.3427905063304711, "learning_rate": 4.0350134910546e-05, "loss": 0.3222, "step": 14630 }, { "epoch": 6.826026119402985, "grad_norm": 0.32613759855568764, "learning_rate": 4.0343012026148384e-05, "loss": 0.3208, "step": 14635 }, { "epoch": 6.8283582089552235, "grad_norm": 0.34266819525600484, "learning_rate": 4.033588723211793e-05, "loss": 0.3294, "step": 14640 }, { "epoch": 6.830690298507463, "grad_norm": 0.3610116580835583, "learning_rate": 4.032876052951402e-05, "loss": 0.3105, "step": 14645 }, { "epoch": 6.833022388059701, "grad_norm": 0.35027660071453665, "learning_rate": 4.032163191939633e-05, "loss": 0.3142, "step": 14650 }, { "epoch": 6.83535447761194, "grad_norm": 0.341755026255196, "learning_rate": 4.0314501402824825e-05, "loss": 0.3125, "step": 14655 }, { "epoch": 6.837686567164179, "grad_norm": 0.3499357043458593, "learning_rate": 4.030736898085974e-05, "loss": 0.3266, "step": 14660 }, { "epoch": 6.840018656716418, "grad_norm": 0.3611812612877206, "learning_rate": 4.03002346545616e-05, "loss": 0.3301, "step": 14665 }, { "epoch": 6.842350746268656, "grad_norm": 0.362210540578717, "learning_rate": 4.02930984249912e-05, "loss": 0.3219, "step": 14670 }, { "epoch": 6.844682835820896, "grad_norm": 0.35113018440147525, "learning_rate": 4.028596029320965e-05, "loss": 0.3261, "step": 14675 }, { "epoch": 6.847014925373134, "grad_norm": 0.3569163296337719, "learning_rate": 4.02788202602783e-05, "loss": 0.3189, "step": 14680 }, { "epoch": 6.849347014925373, "grad_norm": 0.3541764220502976, "learning_rate": 4.027167832725882e-05, "loss": 0.3184, "step": 14685 }, { "epoch": 6.851679104477612, "grad_norm": 0.34995982741449516, "learning_rate": 4.026453449521313e-05, "loss": 0.3143, "step": 14690 }, { "epoch": 6.854011194029851, "grad_norm": 0.3635783243040878, "learning_rate": 4.025738876520347e-05, "loss": 0.3232, "step": 14695 }, { "epoch": 6.856343283582089, "grad_norm": 0.3457267067711015, "learning_rate": 4.025024113829233e-05, "loss": 0.3187, "step": 14700 }, { "epoch": 6.858675373134329, "grad_norm": 0.3431808968227393, "learning_rate": 4.024309161554249e-05, "loss": 0.3297, "step": 14705 }, { "epoch": 6.861007462686567, "grad_norm": 0.3473273651615546, "learning_rate": 4.023594019801702e-05, "loss": 0.3157, "step": 14710 }, { "epoch": 6.863339552238806, "grad_norm": 0.37276791435198664, "learning_rate": 4.022878688677927e-05, "loss": 0.33, "step": 14715 }, { "epoch": 6.865671641791045, "grad_norm": 0.34826405530034427, "learning_rate": 4.022163168289287e-05, "loss": 0.3267, "step": 14720 }, { "epoch": 6.868003731343284, "grad_norm": 0.3651069985384104, "learning_rate": 4.021447458742172e-05, "loss": 0.3304, "step": 14725 }, { "epoch": 6.870335820895522, "grad_norm": 0.3684525744834592, "learning_rate": 4.020731560143002e-05, "loss": 0.3273, "step": 14730 }, { "epoch": 6.8726679104477615, "grad_norm": 0.35243806291623087, "learning_rate": 4.0200154725982245e-05, "loss": 0.3312, "step": 14735 }, { "epoch": 6.875, "grad_norm": 0.3707428607214628, "learning_rate": 4.019299196214315e-05, "loss": 0.3166, "step": 14740 }, { "epoch": 6.8773320895522385, "grad_norm": 0.36308833652605843, "learning_rate": 4.0185827310977756e-05, "loss": 0.3217, "step": 14745 }, { "epoch": 6.879664179104478, "grad_norm": 0.3626178160598027, "learning_rate": 4.017866077355139e-05, "loss": 0.3387, "step": 14750 }, { "epoch": 6.881996268656716, "grad_norm": 0.33688431174568007, "learning_rate": 4.0171492350929644e-05, "loss": 0.321, "step": 14755 }, { "epoch": 6.884328358208955, "grad_norm": 0.3309270327255603, "learning_rate": 4.016432204417839e-05, "loss": 0.3223, "step": 14760 }, { "epoch": 6.886660447761194, "grad_norm": 0.35001909964432565, "learning_rate": 4.015714985436379e-05, "loss": 0.3227, "step": 14765 }, { "epoch": 6.888992537313433, "grad_norm": 0.3416846349091918, "learning_rate": 4.014997578255227e-05, "loss": 0.3274, "step": 14770 }, { "epoch": 6.891324626865671, "grad_norm": 0.3424439066043812, "learning_rate": 4.014279982981057e-05, "loss": 0.3249, "step": 14775 }, { "epoch": 6.893656716417911, "grad_norm": 0.3518174914240323, "learning_rate": 4.0135621997205654e-05, "loss": 0.3203, "step": 14780 }, { "epoch": 6.895988805970149, "grad_norm": 0.36200899964594335, "learning_rate": 4.0128442285804815e-05, "loss": 0.3277, "step": 14785 }, { "epoch": 6.898320895522388, "grad_norm": 0.358226875745305, "learning_rate": 4.01212606966756e-05, "loss": 0.327, "step": 14790 }, { "epoch": 6.900652985074627, "grad_norm": 0.3532927491663648, "learning_rate": 4.0114077230885847e-05, "loss": 0.3155, "step": 14795 }, { "epoch": 6.902985074626866, "grad_norm": 0.3344970170321555, "learning_rate": 4.010689188950367e-05, "loss": 0.3181, "step": 14800 }, { "epoch": 6.905317164179104, "grad_norm": 0.33854073751168173, "learning_rate": 4.009970467359746e-05, "loss": 0.311, "step": 14805 }, { "epoch": 6.907649253731344, "grad_norm": 0.3616734464322388, "learning_rate": 4.009251558423588e-05, "loss": 0.333, "step": 14810 }, { "epoch": 6.909981343283582, "grad_norm": 0.3517867918624647, "learning_rate": 4.008532462248789e-05, "loss": 0.3187, "step": 14815 }, { "epoch": 6.912313432835821, "grad_norm": 0.35261897519745145, "learning_rate": 4.00781317894227e-05, "loss": 0.3288, "step": 14820 }, { "epoch": 6.91464552238806, "grad_norm": 0.3257856631654078, "learning_rate": 4.007093708610984e-05, "loss": 0.3156, "step": 14825 }, { "epoch": 6.916977611940299, "grad_norm": 0.35652301559583355, "learning_rate": 4.006374051361907e-05, "loss": 0.3157, "step": 14830 }, { "epoch": 6.919309701492537, "grad_norm": 0.3500278387020452, "learning_rate": 4.005654207302047e-05, "loss": 0.3231, "step": 14835 }, { "epoch": 6.9216417910447765, "grad_norm": 0.3392599393908404, "learning_rate": 4.004934176538436e-05, "loss": 0.3166, "step": 14840 }, { "epoch": 6.923973880597015, "grad_norm": 0.34640575906117044, "learning_rate": 4.004213959178137e-05, "loss": 0.3358, "step": 14845 }, { "epoch": 6.9263059701492535, "grad_norm": 0.3518877921516153, "learning_rate": 4.0034935553282396e-05, "loss": 0.3193, "step": 14850 }, { "epoch": 6.928638059701493, "grad_norm": 0.3444331145126578, "learning_rate": 4.00277296509586e-05, "loss": 0.3247, "step": 14855 }, { "epoch": 6.9309701492537314, "grad_norm": 0.3439236616091342, "learning_rate": 4.002052188588144e-05, "loss": 0.3299, "step": 14860 }, { "epoch": 6.93330223880597, "grad_norm": 0.3501587253214695, "learning_rate": 4.001331225912263e-05, "loss": 0.3232, "step": 14865 }, { "epoch": 6.9356343283582085, "grad_norm": 0.3426464902841555, "learning_rate": 4.000610077175419e-05, "loss": 0.3168, "step": 14870 }, { "epoch": 6.937966417910448, "grad_norm": 0.32752372804620744, "learning_rate": 3.999888742484838e-05, "loss": 0.3293, "step": 14875 }, { "epoch": 6.940298507462686, "grad_norm": 0.34836590008570484, "learning_rate": 3.999167221947777e-05, "loss": 0.3263, "step": 14880 }, { "epoch": 6.942630597014926, "grad_norm": 0.3394033378282361, "learning_rate": 3.9984455156715176e-05, "loss": 0.3211, "step": 14885 }, { "epoch": 6.944962686567164, "grad_norm": 0.3500639683797342, "learning_rate": 3.997723623763372e-05, "loss": 0.3307, "step": 14890 }, { "epoch": 6.947294776119403, "grad_norm": 0.37188996355059045, "learning_rate": 3.997001546330679e-05, "loss": 0.3131, "step": 14895 }, { "epoch": 6.949626865671641, "grad_norm": 0.3555833764232195, "learning_rate": 3.9962792834808034e-05, "loss": 0.3337, "step": 14900 }, { "epoch": 6.951958955223881, "grad_norm": 0.34087877730583344, "learning_rate": 3.9955568353211384e-05, "loss": 0.32, "step": 14905 }, { "epoch": 6.954291044776119, "grad_norm": 0.36744094999078747, "learning_rate": 3.9948342019591066e-05, "loss": 0.3232, "step": 14910 }, { "epoch": 6.956623134328359, "grad_norm": 0.3432899842481004, "learning_rate": 3.994111383502156e-05, "loss": 0.3355, "step": 14915 }, { "epoch": 6.958955223880597, "grad_norm": 0.364878190304042, "learning_rate": 3.993388380057763e-05, "loss": 0.3337, "step": 14920 }, { "epoch": 6.961287313432836, "grad_norm": 0.3316304511298171, "learning_rate": 3.992665191733431e-05, "loss": 0.3201, "step": 14925 }, { "epoch": 6.963619402985074, "grad_norm": 0.3457480711943086, "learning_rate": 3.9919418186366905e-05, "loss": 0.3271, "step": 14930 }, { "epoch": 6.965951492537314, "grad_norm": 0.36897351281589635, "learning_rate": 3.991218260875101e-05, "loss": 0.334, "step": 14935 }, { "epoch": 6.968283582089552, "grad_norm": 0.36257859517739693, "learning_rate": 3.9904945185562484e-05, "loss": 0.3177, "step": 14940 }, { "epoch": 6.9706156716417915, "grad_norm": 0.33277338557965536, "learning_rate": 3.989770591787747e-05, "loss": 0.3176, "step": 14945 }, { "epoch": 6.97294776119403, "grad_norm": 0.3338303109890001, "learning_rate": 3.989046480677236e-05, "loss": 0.3178, "step": 14950 }, { "epoch": 6.9752798507462686, "grad_norm": 0.3379091122898388, "learning_rate": 3.988322185332386e-05, "loss": 0.3291, "step": 14955 }, { "epoch": 6.977611940298507, "grad_norm": 0.3634840652263816, "learning_rate": 3.987597705860891e-05, "loss": 0.3398, "step": 14960 }, { "epoch": 6.9799440298507465, "grad_norm": 0.3323345849356805, "learning_rate": 3.9868730423704754e-05, "loss": 0.3258, "step": 14965 }, { "epoch": 6.982276119402985, "grad_norm": 0.3244616148240234, "learning_rate": 3.986148194968888e-05, "loss": 0.3216, "step": 14970 }, { "epoch": 6.9846082089552235, "grad_norm": 0.3518735018394079, "learning_rate": 3.985423163763909e-05, "loss": 0.3327, "step": 14975 }, { "epoch": 6.986940298507463, "grad_norm": 0.3538687966341984, "learning_rate": 3.9846979488633415e-05, "loss": 0.3248, "step": 14980 }, { "epoch": 6.989272388059701, "grad_norm": 0.36358946538819714, "learning_rate": 3.9839725503750185e-05, "loss": 0.3313, "step": 14985 }, { "epoch": 6.99160447761194, "grad_norm": 0.347563847116603, "learning_rate": 3.9832469684068007e-05, "loss": 0.322, "step": 14990 }, { "epoch": 6.993936567164179, "grad_norm": 0.3522878239273907, "learning_rate": 3.982521203066575e-05, "loss": 0.3284, "step": 14995 }, { "epoch": 6.996268656716418, "grad_norm": 0.3495800353587978, "learning_rate": 3.9817952544622554e-05, "loss": 0.325, "step": 15000 }, { "epoch": 6.998600746268656, "grad_norm": 0.3267978829116378, "learning_rate": 3.981069122701784e-05, "loss": 0.3236, "step": 15005 }, { "epoch": 7.000932835820896, "grad_norm": 0.3652618225993664, "learning_rate": 3.9803428078931276e-05, "loss": 0.2969, "step": 15010 }, { "epoch": 7.003264925373134, "grad_norm": 0.4308162004499188, "learning_rate": 3.979616310144284e-05, "loss": 0.251, "step": 15015 }, { "epoch": 7.005597014925373, "grad_norm": 0.39016249232877487, "learning_rate": 3.978889629563277e-05, "loss": 0.2446, "step": 15020 }, { "epoch": 7.007929104477612, "grad_norm": 0.384540030623715, "learning_rate": 3.9781627662581575e-05, "loss": 0.2526, "step": 15025 }, { "epoch": 7.010261194029851, "grad_norm": 0.3769395356578833, "learning_rate": 3.977435720337e-05, "loss": 0.2485, "step": 15030 }, { "epoch": 7.012593283582089, "grad_norm": 0.41343035115571397, "learning_rate": 3.976708491907912e-05, "loss": 0.2548, "step": 15035 }, { "epoch": 7.014925373134329, "grad_norm": 0.3812060372212582, "learning_rate": 3.9759810810790236e-05, "loss": 0.2435, "step": 15040 }, { "epoch": 7.017257462686567, "grad_norm": 0.37075369250138435, "learning_rate": 3.9752534879584954e-05, "loss": 0.2537, "step": 15045 }, { "epoch": 7.019589552238806, "grad_norm": 0.4022492953657317, "learning_rate": 3.9745257126545146e-05, "loss": 0.2539, "step": 15050 }, { "epoch": 7.021921641791045, "grad_norm": 0.3862723105390634, "learning_rate": 3.973797755275291e-05, "loss": 0.2513, "step": 15055 }, { "epoch": 7.024253731343284, "grad_norm": 0.36859104275555393, "learning_rate": 3.9730696159290656e-05, "loss": 0.2385, "step": 15060 }, { "epoch": 7.026585820895522, "grad_norm": 0.41096701072028136, "learning_rate": 3.9723412947241085e-05, "loss": 0.2574, "step": 15065 }, { "epoch": 7.0289179104477615, "grad_norm": 0.37246988903043615, "learning_rate": 3.971612791768712e-05, "loss": 0.2529, "step": 15070 }, { "epoch": 7.03125, "grad_norm": 0.37870626738117047, "learning_rate": 3.970884107171198e-05, "loss": 0.2497, "step": 15075 }, { "epoch": 7.0335820895522385, "grad_norm": 0.4042879760913247, "learning_rate": 3.970155241039914e-05, "loss": 0.2462, "step": 15080 }, { "epoch": 7.035914179104478, "grad_norm": 0.3822068633512142, "learning_rate": 3.969426193483237e-05, "loss": 0.2508, "step": 15085 }, { "epoch": 7.038246268656716, "grad_norm": 0.3920119812968771, "learning_rate": 3.968696964609568e-05, "loss": 0.2453, "step": 15090 }, { "epoch": 7.040578358208955, "grad_norm": 0.38841003910500393, "learning_rate": 3.967967554527338e-05, "loss": 0.254, "step": 15095 }, { "epoch": 7.042910447761194, "grad_norm": 0.38553288126621976, "learning_rate": 3.967237963345001e-05, "loss": 0.2407, "step": 15100 }, { "epoch": 7.045242537313433, "grad_norm": 0.38075018686384127, "learning_rate": 3.966508191171041e-05, "loss": 0.2434, "step": 15105 }, { "epoch": 7.047574626865671, "grad_norm": 0.3773436190924883, "learning_rate": 3.9657782381139696e-05, "loss": 0.259, "step": 15110 }, { "epoch": 7.049906716417911, "grad_norm": 0.40210027437544066, "learning_rate": 3.965048104282323e-05, "loss": 0.2569, "step": 15115 }, { "epoch": 7.052238805970149, "grad_norm": 0.38342532690048636, "learning_rate": 3.964317789784664e-05, "loss": 0.249, "step": 15120 }, { "epoch": 7.054570895522388, "grad_norm": 0.36867273629568353, "learning_rate": 3.963587294729585e-05, "loss": 0.2467, "step": 15125 }, { "epoch": 7.056902985074627, "grad_norm": 0.3818470458113307, "learning_rate": 3.962856619225703e-05, "loss": 0.2532, "step": 15130 }, { "epoch": 7.059235074626866, "grad_norm": 0.3900955283618013, "learning_rate": 3.962125763381661e-05, "loss": 0.2522, "step": 15135 }, { "epoch": 7.061567164179104, "grad_norm": 0.3664324214234312, "learning_rate": 3.961394727306133e-05, "loss": 0.2504, "step": 15140 }, { "epoch": 7.063899253731344, "grad_norm": 0.38251396736389126, "learning_rate": 3.9606635111078156e-05, "loss": 0.2546, "step": 15145 }, { "epoch": 7.066231343283582, "grad_norm": 0.41458243386584287, "learning_rate": 3.9599321148954325e-05, "loss": 0.2464, "step": 15150 }, { "epoch": 7.068563432835821, "grad_norm": 0.3874755330480218, "learning_rate": 3.959200538777738e-05, "loss": 0.2513, "step": 15155 }, { "epoch": 7.07089552238806, "grad_norm": 0.3788332498786341, "learning_rate": 3.958468782863508e-05, "loss": 0.2516, "step": 15160 }, { "epoch": 7.073227611940299, "grad_norm": 0.38788173499962464, "learning_rate": 3.957736847261548e-05, "loss": 0.2497, "step": 15165 }, { "epoch": 7.075559701492537, "grad_norm": 0.4407900481817876, "learning_rate": 3.9570047320806916e-05, "loss": 0.2527, "step": 15170 }, { "epoch": 7.0778917910447765, "grad_norm": 0.4096120925404942, "learning_rate": 3.956272437429796e-05, "loss": 0.2473, "step": 15175 }, { "epoch": 7.080223880597015, "grad_norm": 0.4416173773699138, "learning_rate": 3.955539963417746e-05, "loss": 0.2512, "step": 15180 }, { "epoch": 7.0825559701492535, "grad_norm": 0.39966592649068433, "learning_rate": 3.954807310153454e-05, "loss": 0.2535, "step": 15185 }, { "epoch": 7.084888059701493, "grad_norm": 0.4008157114812891, "learning_rate": 3.954074477745859e-05, "loss": 0.2494, "step": 15190 }, { "epoch": 7.0872201492537314, "grad_norm": 0.4122488082091167, "learning_rate": 3.9533414663039246e-05, "loss": 0.251, "step": 15195 }, { "epoch": 7.08955223880597, "grad_norm": 0.36907719390628047, "learning_rate": 3.952608275936644e-05, "loss": 0.2558, "step": 15200 }, { "epoch": 7.091884328358209, "grad_norm": 0.386401424987472, "learning_rate": 3.951874906753035e-05, "loss": 0.2566, "step": 15205 }, { "epoch": 7.094216417910448, "grad_norm": 0.3857035432540853, "learning_rate": 3.9511413588621435e-05, "loss": 0.2575, "step": 15210 }, { "epoch": 7.096548507462686, "grad_norm": 0.3962388049925765, "learning_rate": 3.9504076323730396e-05, "loss": 0.2529, "step": 15215 }, { "epoch": 7.098880597014926, "grad_norm": 0.38076977633517944, "learning_rate": 3.949673727394823e-05, "loss": 0.2514, "step": 15220 }, { "epoch": 7.101212686567164, "grad_norm": 0.39906750276060354, "learning_rate": 3.948939644036616e-05, "loss": 0.2478, "step": 15225 }, { "epoch": 7.103544776119403, "grad_norm": 0.39351480053601023, "learning_rate": 3.9482053824075716e-05, "loss": 0.2576, "step": 15230 }, { "epoch": 7.105876865671641, "grad_norm": 0.40115090375408247, "learning_rate": 3.947470942616868e-05, "loss": 0.2636, "step": 15235 }, { "epoch": 7.108208955223881, "grad_norm": 0.4017002347990251, "learning_rate": 3.946736324773707e-05, "loss": 0.2644, "step": 15240 }, { "epoch": 7.110541044776119, "grad_norm": 0.3856159224436017, "learning_rate": 3.946001528987322e-05, "loss": 0.253, "step": 15245 }, { "epoch": 7.112873134328359, "grad_norm": 0.3735037022789581, "learning_rate": 3.945266555366968e-05, "loss": 0.2528, "step": 15250 }, { "epoch": 7.115205223880597, "grad_norm": 0.369448165680739, "learning_rate": 3.94453140402193e-05, "loss": 0.2585, "step": 15255 }, { "epoch": 7.117537313432836, "grad_norm": 0.36419925294654765, "learning_rate": 3.943796075061517e-05, "loss": 0.2425, "step": 15260 }, { "epoch": 7.119869402985074, "grad_norm": 0.3572209362300009, "learning_rate": 3.943060568595065e-05, "loss": 0.2482, "step": 15265 }, { "epoch": 7.122201492537314, "grad_norm": 0.4006770406114497, "learning_rate": 3.942324884731938e-05, "loss": 0.2611, "step": 15270 }, { "epoch": 7.124533582089552, "grad_norm": 0.3739664502545242, "learning_rate": 3.941589023581524e-05, "loss": 0.2535, "step": 15275 }, { "epoch": 7.126865671641791, "grad_norm": 0.3723391298950818, "learning_rate": 3.940852985253239e-05, "loss": 0.2498, "step": 15280 }, { "epoch": 7.12919776119403, "grad_norm": 0.373256562020474, "learning_rate": 3.940116769856526e-05, "loss": 0.263, "step": 15285 }, { "epoch": 7.1315298507462686, "grad_norm": 0.38155403195177856, "learning_rate": 3.9393803775008506e-05, "loss": 0.25, "step": 15290 }, { "epoch": 7.133861940298507, "grad_norm": 0.38311292991903906, "learning_rate": 3.9386438082957096e-05, "loss": 0.2582, "step": 15295 }, { "epoch": 7.1361940298507465, "grad_norm": 0.39510084100127435, "learning_rate": 3.937907062350622e-05, "loss": 0.2538, "step": 15300 }, { "epoch": 7.138526119402985, "grad_norm": 0.3870374419544877, "learning_rate": 3.937170139775137e-05, "loss": 0.2571, "step": 15305 }, { "epoch": 7.1408582089552235, "grad_norm": 0.39196240368685725, "learning_rate": 3.9364330406788265e-05, "loss": 0.25, "step": 15310 }, { "epoch": 7.143190298507463, "grad_norm": 0.3837799510375093, "learning_rate": 3.9356957651712894e-05, "loss": 0.2695, "step": 15315 }, { "epoch": 7.145522388059701, "grad_norm": 0.3703758989528494, "learning_rate": 3.9349583133621535e-05, "loss": 0.2541, "step": 15320 }, { "epoch": 7.14785447761194, "grad_norm": 0.3851907500301571, "learning_rate": 3.934220685361069e-05, "loss": 0.252, "step": 15325 }, { "epoch": 7.150186567164179, "grad_norm": 0.37326337092829265, "learning_rate": 3.933482881277715e-05, "loss": 0.2476, "step": 15330 }, { "epoch": 7.152518656716418, "grad_norm": 0.39521159387847293, "learning_rate": 3.9327449012217955e-05, "loss": 0.2496, "step": 15335 }, { "epoch": 7.154850746268656, "grad_norm": 0.38135878383032046, "learning_rate": 3.9320067453030415e-05, "loss": 0.2536, "step": 15340 }, { "epoch": 7.157182835820896, "grad_norm": 0.3891030491602235, "learning_rate": 3.9312684136312094e-05, "loss": 0.2552, "step": 15345 }, { "epoch": 7.159514925373134, "grad_norm": 0.3706478411975886, "learning_rate": 3.930529906316083e-05, "loss": 0.2491, "step": 15350 }, { "epoch": 7.161847014925373, "grad_norm": 0.3907135972366671, "learning_rate": 3.9297912234674694e-05, "loss": 0.2654, "step": 15355 }, { "epoch": 7.164179104477612, "grad_norm": 0.37726714462496297, "learning_rate": 3.9290523651952046e-05, "loss": 0.2572, "step": 15360 }, { "epoch": 7.166511194029851, "grad_norm": 0.35891123654511936, "learning_rate": 3.92831333160915e-05, "loss": 0.2526, "step": 15365 }, { "epoch": 7.168843283582089, "grad_norm": 0.3898271079595198, "learning_rate": 3.927574122819193e-05, "loss": 0.2573, "step": 15370 }, { "epoch": 7.171175373134329, "grad_norm": 0.379019496914522, "learning_rate": 3.9268347389352464e-05, "loss": 0.2543, "step": 15375 }, { "epoch": 7.173507462686567, "grad_norm": 0.376268693385115, "learning_rate": 3.926095180067249e-05, "loss": 0.2522, "step": 15380 }, { "epoch": 7.175839552238806, "grad_norm": 0.37440720586746057, "learning_rate": 3.925355446325167e-05, "loss": 0.2567, "step": 15385 }, { "epoch": 7.178171641791045, "grad_norm": 0.36855914947473895, "learning_rate": 3.924615537818992e-05, "loss": 0.2564, "step": 15390 }, { "epoch": 7.180503731343284, "grad_norm": 0.38276140561266325, "learning_rate": 3.92387545465874e-05, "loss": 0.2603, "step": 15395 }, { "epoch": 7.182835820895522, "grad_norm": 0.38643149900802315, "learning_rate": 3.923135196954456e-05, "loss": 0.2566, "step": 15400 }, { "epoch": 7.1851679104477615, "grad_norm": 0.3688748775349814, "learning_rate": 3.922394764816208e-05, "loss": 0.2563, "step": 15405 }, { "epoch": 7.1875, "grad_norm": 0.4044429046927919, "learning_rate": 3.92165415835409e-05, "loss": 0.2619, "step": 15410 }, { "epoch": 7.1898320895522385, "grad_norm": 0.38130778904342527, "learning_rate": 3.920913377678226e-05, "loss": 0.2595, "step": 15415 }, { "epoch": 7.192164179104478, "grad_norm": 0.39168490149108903, "learning_rate": 3.92017242289876e-05, "loss": 0.2586, "step": 15420 }, { "epoch": 7.194496268656716, "grad_norm": 0.3843595239233218, "learning_rate": 3.919431294125868e-05, "loss": 0.2586, "step": 15425 }, { "epoch": 7.196828358208955, "grad_norm": 0.3766249431592982, "learning_rate": 3.918689991469746e-05, "loss": 0.2619, "step": 15430 }, { "epoch": 7.199160447761194, "grad_norm": 0.3885492388782359, "learning_rate": 3.91794851504062e-05, "loss": 0.2626, "step": 15435 }, { "epoch": 7.201492537313433, "grad_norm": 0.376258721543359, "learning_rate": 3.9172068649487405e-05, "loss": 0.2571, "step": 15440 }, { "epoch": 7.203824626865671, "grad_norm": 0.3590883690813897, "learning_rate": 3.916465041304383e-05, "loss": 0.2672, "step": 15445 }, { "epoch": 7.206156716417911, "grad_norm": 0.40209663365929704, "learning_rate": 3.91572304421785e-05, "loss": 0.2695, "step": 15450 }, { "epoch": 7.208488805970149, "grad_norm": 0.403080709687087, "learning_rate": 3.9149808737994705e-05, "loss": 0.2586, "step": 15455 }, { "epoch": 7.210820895522388, "grad_norm": 0.37930551148127184, "learning_rate": 3.914238530159595e-05, "loss": 0.2511, "step": 15460 }, { "epoch": 7.213152985074627, "grad_norm": 0.42091154483781074, "learning_rate": 3.9134960134086055e-05, "loss": 0.2685, "step": 15465 }, { "epoch": 7.215485074626866, "grad_norm": 0.3678148687496611, "learning_rate": 3.9127533236569077e-05, "loss": 0.2623, "step": 15470 }, { "epoch": 7.217817164179104, "grad_norm": 0.3640872209956744, "learning_rate": 3.91201046101493e-05, "loss": 0.248, "step": 15475 }, { "epoch": 7.220149253731344, "grad_norm": 0.3886038083633255, "learning_rate": 3.9112674255931294e-05, "loss": 0.2627, "step": 15480 }, { "epoch": 7.222481343283582, "grad_norm": 0.3838666271998195, "learning_rate": 3.9105242175019905e-05, "loss": 0.2586, "step": 15485 }, { "epoch": 7.224813432835821, "grad_norm": 0.3835515249906745, "learning_rate": 3.909780836852019e-05, "loss": 0.2613, "step": 15490 }, { "epoch": 7.22714552238806, "grad_norm": 0.3959740513559202, "learning_rate": 3.9090372837537496e-05, "loss": 0.2594, "step": 15495 }, { "epoch": 7.229477611940299, "grad_norm": 0.39114949577363195, "learning_rate": 3.908293558317741e-05, "loss": 0.2691, "step": 15500 }, { "epoch": 7.231809701492537, "grad_norm": 0.3776938562852783, "learning_rate": 3.907549660654577e-05, "loss": 0.2638, "step": 15505 }, { "epoch": 7.2341417910447765, "grad_norm": 0.3704013664183536, "learning_rate": 3.9068055908748706e-05, "loss": 0.2564, "step": 15510 }, { "epoch": 7.236473880597015, "grad_norm": 0.3880676743260775, "learning_rate": 3.9060613490892556e-05, "loss": 0.2597, "step": 15515 }, { "epoch": 7.2388059701492535, "grad_norm": 0.37048761939667785, "learning_rate": 3.9053169354083946e-05, "loss": 0.2557, "step": 15520 }, { "epoch": 7.241138059701493, "grad_norm": 0.3596796404363834, "learning_rate": 3.904572349942974e-05, "loss": 0.2566, "step": 15525 }, { "epoch": 7.2434701492537314, "grad_norm": 0.39606366187092695, "learning_rate": 3.903827592803708e-05, "loss": 0.265, "step": 15530 }, { "epoch": 7.24580223880597, "grad_norm": 0.3636017530137616, "learning_rate": 3.903082664101334e-05, "loss": 0.2619, "step": 15535 }, { "epoch": 7.248134328358209, "grad_norm": 0.3817332794581375, "learning_rate": 3.9023375639466156e-05, "loss": 0.2582, "step": 15540 }, { "epoch": 7.250466417910448, "grad_norm": 0.3756645383078816, "learning_rate": 3.901592292450342e-05, "loss": 0.2604, "step": 15545 }, { "epoch": 7.252798507462686, "grad_norm": 0.373742690081245, "learning_rate": 3.900846849723328e-05, "loss": 0.263, "step": 15550 }, { "epoch": 7.255130597014926, "grad_norm": 0.38471278052529323, "learning_rate": 3.9001012358764146e-05, "loss": 0.2607, "step": 15555 }, { "epoch": 7.257462686567164, "grad_norm": 0.3806494892579137, "learning_rate": 3.8993554510204664e-05, "loss": 0.2639, "step": 15560 }, { "epoch": 7.259794776119403, "grad_norm": 0.38230016184107823, "learning_rate": 3.898609495266375e-05, "loss": 0.2642, "step": 15565 }, { "epoch": 7.262126865671641, "grad_norm": 0.40220717770316583, "learning_rate": 3.897863368725056e-05, "loss": 0.2613, "step": 15570 }, { "epoch": 7.264458955223881, "grad_norm": 0.39173666327052253, "learning_rate": 3.8971170715074526e-05, "loss": 0.2608, "step": 15575 }, { "epoch": 7.266791044776119, "grad_norm": 0.3877733646053974, "learning_rate": 3.896370603724531e-05, "loss": 0.2581, "step": 15580 }, { "epoch": 7.269123134328359, "grad_norm": 0.37515935885746027, "learning_rate": 3.895623965487284e-05, "loss": 0.2613, "step": 15585 }, { "epoch": 7.271455223880597, "grad_norm": 0.40034525325904247, "learning_rate": 3.8948771569067305e-05, "loss": 0.2673, "step": 15590 }, { "epoch": 7.273787313432836, "grad_norm": 0.3675720161840688, "learning_rate": 3.8941301780939124e-05, "loss": 0.2538, "step": 15595 }, { "epoch": 7.276119402985074, "grad_norm": 0.3820050610967914, "learning_rate": 3.893383029159899e-05, "loss": 0.2616, "step": 15600 }, { "epoch": 7.278451492537314, "grad_norm": 0.36772136952472634, "learning_rate": 3.892635710215785e-05, "loss": 0.2596, "step": 15605 }, { "epoch": 7.280783582089552, "grad_norm": 0.3748047802735308, "learning_rate": 3.891888221372688e-05, "loss": 0.257, "step": 15610 }, { "epoch": 7.283115671641791, "grad_norm": 0.39065125867455786, "learning_rate": 3.891140562741753e-05, "loss": 0.2663, "step": 15615 }, { "epoch": 7.28544776119403, "grad_norm": 0.39014829855172517, "learning_rate": 3.89039273443415e-05, "loss": 0.2639, "step": 15620 }, { "epoch": 7.2877798507462686, "grad_norm": 0.41035987852795575, "learning_rate": 3.889644736561073e-05, "loss": 0.2745, "step": 15625 }, { "epoch": 7.290111940298507, "grad_norm": 0.3836000789287239, "learning_rate": 3.888896569233744e-05, "loss": 0.2609, "step": 15630 }, { "epoch": 7.2924440298507465, "grad_norm": 0.3982784353180335, "learning_rate": 3.888148232563407e-05, "loss": 0.2739, "step": 15635 }, { "epoch": 7.294776119402985, "grad_norm": 0.38844158633157616, "learning_rate": 3.887399726661332e-05, "loss": 0.2583, "step": 15640 }, { "epoch": 7.2971082089552235, "grad_norm": 0.36646337542775514, "learning_rate": 3.886651051638815e-05, "loss": 0.2694, "step": 15645 }, { "epoch": 7.299440298507463, "grad_norm": 0.3869362751226481, "learning_rate": 3.885902207607178e-05, "loss": 0.263, "step": 15650 }, { "epoch": 7.301772388059701, "grad_norm": 0.3703193093051725, "learning_rate": 3.885153194677766e-05, "loss": 0.2659, "step": 15655 }, { "epoch": 7.30410447761194, "grad_norm": 0.3836755800016129, "learning_rate": 3.88440401296195e-05, "loss": 0.2622, "step": 15660 }, { "epoch": 7.306436567164179, "grad_norm": 0.406279267032629, "learning_rate": 3.8836546625711254e-05, "loss": 0.2599, "step": 15665 }, { "epoch": 7.308768656716418, "grad_norm": 0.3872978158428735, "learning_rate": 3.8829051436167144e-05, "loss": 0.2704, "step": 15670 }, { "epoch": 7.311100746268656, "grad_norm": 0.3807889246246793, "learning_rate": 3.882155456210164e-05, "loss": 0.2703, "step": 15675 }, { "epoch": 7.313432835820896, "grad_norm": 0.405766388697164, "learning_rate": 3.881405600462943e-05, "loss": 0.2712, "step": 15680 }, { "epoch": 7.315764925373134, "grad_norm": 0.3958291957419486, "learning_rate": 3.880655576486551e-05, "loss": 0.2591, "step": 15685 }, { "epoch": 7.318097014925373, "grad_norm": 0.3831585739998603, "learning_rate": 3.879905384392508e-05, "loss": 0.2657, "step": 15690 }, { "epoch": 7.320429104477612, "grad_norm": 0.3802033728181818, "learning_rate": 3.8791550242923594e-05, "loss": 0.2633, "step": 15695 }, { "epoch": 7.322761194029851, "grad_norm": 0.3830531398399696, "learning_rate": 3.8784044962976776e-05, "loss": 0.271, "step": 15700 }, { "epoch": 7.325093283582089, "grad_norm": 0.390181307802734, "learning_rate": 3.877653800520058e-05, "loss": 0.2597, "step": 15705 }, { "epoch": 7.327425373134329, "grad_norm": 0.3878611603685835, "learning_rate": 3.8769029370711234e-05, "loss": 0.2656, "step": 15710 }, { "epoch": 7.329757462686567, "grad_norm": 0.37316463627498453, "learning_rate": 3.876151906062519e-05, "loss": 0.2619, "step": 15715 }, { "epoch": 7.332089552238806, "grad_norm": 0.3919164090538949, "learning_rate": 3.8754007076059155e-05, "loss": 0.2645, "step": 15720 }, { "epoch": 7.334421641791045, "grad_norm": 0.38811080456406727, "learning_rate": 3.8746493418130096e-05, "loss": 0.2619, "step": 15725 }, { "epoch": 7.336753731343284, "grad_norm": 0.3792090655401604, "learning_rate": 3.873897808795522e-05, "loss": 0.2611, "step": 15730 }, { "epoch": 7.339085820895522, "grad_norm": 0.40524292970975356, "learning_rate": 3.8731461086652e-05, "loss": 0.2686, "step": 15735 }, { "epoch": 7.3414179104477615, "grad_norm": 0.37643294872819033, "learning_rate": 3.8723942415338105e-05, "loss": 0.2676, "step": 15740 }, { "epoch": 7.34375, "grad_norm": 0.4014429838961305, "learning_rate": 3.871642207513152e-05, "loss": 0.2683, "step": 15745 }, { "epoch": 7.3460820895522385, "grad_norm": 0.3803706129286758, "learning_rate": 3.870890006715044e-05, "loss": 0.2689, "step": 15750 }, { "epoch": 7.348414179104478, "grad_norm": 0.35993748109585344, "learning_rate": 3.870137639251331e-05, "loss": 0.2663, "step": 15755 }, { "epoch": 7.350746268656716, "grad_norm": 0.37966833525731897, "learning_rate": 3.869385105233884e-05, "loss": 0.2624, "step": 15760 }, { "epoch": 7.353078358208955, "grad_norm": 0.3752062149971955, "learning_rate": 3.868632404774597e-05, "loss": 0.2664, "step": 15765 }, { "epoch": 7.355410447761194, "grad_norm": 0.38437505229883323, "learning_rate": 3.867879537985388e-05, "loss": 0.2652, "step": 15770 }, { "epoch": 7.357742537313433, "grad_norm": 0.40657738431718266, "learning_rate": 3.867126504978204e-05, "loss": 0.275, "step": 15775 }, { "epoch": 7.360074626865671, "grad_norm": 0.40011665635968824, "learning_rate": 3.8663733058650104e-05, "loss": 0.2663, "step": 15780 }, { "epoch": 7.362406716417911, "grad_norm": 0.3748838201520957, "learning_rate": 3.865619940757804e-05, "loss": 0.2696, "step": 15785 }, { "epoch": 7.364738805970149, "grad_norm": 0.3825201912277946, "learning_rate": 3.8648664097686e-05, "loss": 0.267, "step": 15790 }, { "epoch": 7.367070895522388, "grad_norm": 0.38983835307761816, "learning_rate": 3.8641127130094434e-05, "loss": 0.2576, "step": 15795 }, { "epoch": 7.369402985074627, "grad_norm": 0.3654074359747031, "learning_rate": 3.8633588505924e-05, "loss": 0.2677, "step": 15800 }, { "epoch": 7.371735074626866, "grad_norm": 0.3793361201715317, "learning_rate": 3.862604822629564e-05, "loss": 0.2632, "step": 15805 }, { "epoch": 7.374067164179104, "grad_norm": 0.3908416390336342, "learning_rate": 3.861850629233051e-05, "loss": 0.2683, "step": 15810 }, { "epoch": 7.376399253731344, "grad_norm": 0.40477165141568194, "learning_rate": 3.861096270515001e-05, "loss": 0.2652, "step": 15815 }, { "epoch": 7.378731343283582, "grad_norm": 0.39130226062393675, "learning_rate": 3.8603417465875816e-05, "loss": 0.2798, "step": 15820 }, { "epoch": 7.381063432835821, "grad_norm": 0.3751021055694078, "learning_rate": 3.8595870575629834e-05, "loss": 0.2567, "step": 15825 }, { "epoch": 7.38339552238806, "grad_norm": 0.3947030386027147, "learning_rate": 3.858832203553421e-05, "loss": 0.2717, "step": 15830 }, { "epoch": 7.385727611940299, "grad_norm": 0.3645353030204309, "learning_rate": 3.8580771846711335e-05, "loss": 0.2708, "step": 15835 }, { "epoch": 7.388059701492537, "grad_norm": 0.37060967286496294, "learning_rate": 3.857322001028385e-05, "loss": 0.2658, "step": 15840 }, { "epoch": 7.3903917910447765, "grad_norm": 0.35766338726325686, "learning_rate": 3.856566652737465e-05, "loss": 0.2667, "step": 15845 }, { "epoch": 7.392723880597015, "grad_norm": 0.393211341967248, "learning_rate": 3.855811139910686e-05, "loss": 0.2679, "step": 15850 }, { "epoch": 7.3950559701492535, "grad_norm": 0.38683896977285925, "learning_rate": 3.855055462660385e-05, "loss": 0.2675, "step": 15855 }, { "epoch": 7.397388059701493, "grad_norm": 0.38186685030455586, "learning_rate": 3.854299621098925e-05, "loss": 0.2623, "step": 15860 }, { "epoch": 7.3997201492537314, "grad_norm": 0.3921155333406873, "learning_rate": 3.853543615338691e-05, "loss": 0.2679, "step": 15865 }, { "epoch": 7.40205223880597, "grad_norm": 0.37232919623029237, "learning_rate": 3.8527874454920955e-05, "loss": 0.2646, "step": 15870 }, { "epoch": 7.404384328358209, "grad_norm": 0.39707754761892233, "learning_rate": 3.852031111671573e-05, "loss": 0.2764, "step": 15875 }, { "epoch": 7.406716417910448, "grad_norm": 0.3852660233946518, "learning_rate": 3.851274613989582e-05, "loss": 0.2688, "step": 15880 }, { "epoch": 7.409048507462686, "grad_norm": 0.3760578343049283, "learning_rate": 3.850517952558608e-05, "loss": 0.2658, "step": 15885 }, { "epoch": 7.411380597014926, "grad_norm": 0.3546390005096932, "learning_rate": 3.849761127491158e-05, "loss": 0.2661, "step": 15890 }, { "epoch": 7.413712686567164, "grad_norm": 0.3821462056697725, "learning_rate": 3.849004138899766e-05, "loss": 0.2766, "step": 15895 }, { "epoch": 7.416044776119403, "grad_norm": 0.37013931975157166, "learning_rate": 3.848246986896989e-05, "loss": 0.2689, "step": 15900 }, { "epoch": 7.418376865671641, "grad_norm": 0.37489932724113423, "learning_rate": 3.847489671595406e-05, "loss": 0.2625, "step": 15905 }, { "epoch": 7.420708955223881, "grad_norm": 0.3837458689868867, "learning_rate": 3.8467321931076255e-05, "loss": 0.2731, "step": 15910 }, { "epoch": 7.423041044776119, "grad_norm": 0.4033788948250503, "learning_rate": 3.845974551546276e-05, "loss": 0.2645, "step": 15915 }, { "epoch": 7.425373134328359, "grad_norm": 0.3713198186375398, "learning_rate": 3.84521674702401e-05, "loss": 0.2691, "step": 15920 }, { "epoch": 7.427705223880597, "grad_norm": 0.3803754834284782, "learning_rate": 3.844458779653508e-05, "loss": 0.2739, "step": 15925 }, { "epoch": 7.430037313432836, "grad_norm": 0.3829707190195545, "learning_rate": 3.8437006495474716e-05, "loss": 0.2679, "step": 15930 }, { "epoch": 7.432369402985074, "grad_norm": 0.36922290025875876, "learning_rate": 3.842942356818627e-05, "loss": 0.2715, "step": 15935 }, { "epoch": 7.434701492537314, "grad_norm": 0.3873951726391043, "learning_rate": 3.8421839015797265e-05, "loss": 0.2641, "step": 15940 }, { "epoch": 7.437033582089552, "grad_norm": 0.39603395739724584, "learning_rate": 3.841425283943544e-05, "loss": 0.2686, "step": 15945 }, { "epoch": 7.439365671641791, "grad_norm": 0.37890526468202573, "learning_rate": 3.840666504022879e-05, "loss": 0.2722, "step": 15950 }, { "epoch": 7.44169776119403, "grad_norm": 0.3789171665052328, "learning_rate": 3.839907561930554e-05, "loss": 0.266, "step": 15955 }, { "epoch": 7.4440298507462686, "grad_norm": 0.3760471542252113, "learning_rate": 3.839148457779418e-05, "loss": 0.2687, "step": 15960 }, { "epoch": 7.446361940298507, "grad_norm": 0.3652312846552105, "learning_rate": 3.838389191682341e-05, "loss": 0.2705, "step": 15965 }, { "epoch": 7.4486940298507465, "grad_norm": 0.37735460269693333, "learning_rate": 3.837629763752219e-05, "loss": 0.2607, "step": 15970 }, { "epoch": 7.451026119402985, "grad_norm": 0.3696620317557275, "learning_rate": 3.8368701741019734e-05, "loss": 0.2726, "step": 15975 }, { "epoch": 7.4533582089552235, "grad_norm": 0.37465041353918666, "learning_rate": 3.8361104228445455e-05, "loss": 0.2651, "step": 15980 }, { "epoch": 7.455690298507463, "grad_norm": 0.4159146074060093, "learning_rate": 3.835350510092904e-05, "loss": 0.2675, "step": 15985 }, { "epoch": 7.458022388059701, "grad_norm": 0.39304973528016074, "learning_rate": 3.834590435960041e-05, "loss": 0.2632, "step": 15990 }, { "epoch": 7.46035447761194, "grad_norm": 0.361428390202475, "learning_rate": 3.833830200558971e-05, "loss": 0.269, "step": 15995 }, { "epoch": 7.462686567164179, "grad_norm": 0.3712829228568899, "learning_rate": 3.8330698040027345e-05, "loss": 0.2688, "step": 16000 }, { "epoch": 7.465018656716418, "grad_norm": 0.39019390663203746, "learning_rate": 3.832309246404396e-05, "loss": 0.2717, "step": 16005 }, { "epoch": 7.467350746268656, "grad_norm": 0.3960533996289103, "learning_rate": 3.8315485278770423e-05, "loss": 0.2638, "step": 16010 }, { "epoch": 7.469682835820896, "grad_norm": 0.39712322339935463, "learning_rate": 3.830787648533785e-05, "loss": 0.269, "step": 16015 }, { "epoch": 7.472014925373134, "grad_norm": 0.35108814804596067, "learning_rate": 3.83002660848776e-05, "loss": 0.2658, "step": 16020 }, { "epoch": 7.474347014925373, "grad_norm": 0.3827986118682957, "learning_rate": 3.829265407852125e-05, "loss": 0.2634, "step": 16025 }, { "epoch": 7.476679104477612, "grad_norm": 0.3749197455270441, "learning_rate": 3.828504046740065e-05, "loss": 0.2641, "step": 16030 }, { "epoch": 7.479011194029851, "grad_norm": 0.3765984547101378, "learning_rate": 3.827742525264787e-05, "loss": 0.274, "step": 16035 }, { "epoch": 7.481343283582089, "grad_norm": 0.3904481377376915, "learning_rate": 3.826980843539521e-05, "loss": 0.2772, "step": 16040 }, { "epoch": 7.483675373134329, "grad_norm": 0.3789233890649732, "learning_rate": 3.826219001677523e-05, "loss": 0.2685, "step": 16045 }, { "epoch": 7.486007462686567, "grad_norm": 0.3886622566150852, "learning_rate": 3.82545699979207e-05, "loss": 0.2592, "step": 16050 }, { "epoch": 7.488339552238806, "grad_norm": 0.39713698689930954, "learning_rate": 3.824694837996466e-05, "loss": 0.2622, "step": 16055 }, { "epoch": 7.490671641791045, "grad_norm": 0.374205085095704, "learning_rate": 3.823932516404036e-05, "loss": 0.2752, "step": 16060 }, { "epoch": 7.493003731343284, "grad_norm": 0.38013632760164884, "learning_rate": 3.823170035128131e-05, "loss": 0.2745, "step": 16065 }, { "epoch": 7.495335820895522, "grad_norm": 0.3729376066009225, "learning_rate": 3.822407394282123e-05, "loss": 0.2631, "step": 16070 }, { "epoch": 7.4976679104477615, "grad_norm": 0.3741880998379404, "learning_rate": 3.821644593979411e-05, "loss": 0.2637, "step": 16075 }, { "epoch": 7.5, "grad_norm": 0.42208273692022885, "learning_rate": 3.8208816343334156e-05, "loss": 0.2678, "step": 16080 }, { "epoch": 7.5023320895522385, "grad_norm": 0.3781178092189157, "learning_rate": 3.820118515457582e-05, "loss": 0.2647, "step": 16085 }, { "epoch": 7.504664179104478, "grad_norm": 0.3774599333898893, "learning_rate": 3.819355237465377e-05, "loss": 0.2642, "step": 16090 }, { "epoch": 7.506996268656716, "grad_norm": 0.36035662248501965, "learning_rate": 3.818591800470295e-05, "loss": 0.2682, "step": 16095 }, { "epoch": 7.509328358208955, "grad_norm": 0.38774026271171336, "learning_rate": 3.81782820458585e-05, "loss": 0.2635, "step": 16100 }, { "epoch": 7.511660447761194, "grad_norm": 0.39975724617160985, "learning_rate": 3.817064449925582e-05, "loss": 0.2705, "step": 16105 }, { "epoch": 7.513992537313433, "grad_norm": 0.36913804676704437, "learning_rate": 3.816300536603054e-05, "loss": 0.2713, "step": 16110 }, { "epoch": 7.516324626865671, "grad_norm": 0.3770593373507304, "learning_rate": 3.815536464731853e-05, "loss": 0.267, "step": 16115 }, { "epoch": 7.518656716417911, "grad_norm": 0.37190913321436225, "learning_rate": 3.814772234425588e-05, "loss": 0.2664, "step": 16120 }, { "epoch": 7.520988805970149, "grad_norm": 0.37768616475948735, "learning_rate": 3.814007845797894e-05, "loss": 0.2729, "step": 16125 }, { "epoch": 7.523320895522388, "grad_norm": 0.38948504667697204, "learning_rate": 3.813243298962428e-05, "loss": 0.268, "step": 16130 }, { "epoch": 7.525652985074627, "grad_norm": 0.35453953423456525, "learning_rate": 3.8124785940328704e-05, "loss": 0.268, "step": 16135 }, { "epoch": 7.527985074626866, "grad_norm": 0.3844719566337298, "learning_rate": 3.8117137311229255e-05, "loss": 0.2804, "step": 16140 }, { "epoch": 7.530317164179104, "grad_norm": 0.3888099105414525, "learning_rate": 3.810948710346322e-05, "loss": 0.2733, "step": 16145 }, { "epoch": 7.532649253731344, "grad_norm": 0.36110468174633203, "learning_rate": 3.81018353181681e-05, "loss": 0.2698, "step": 16150 }, { "epoch": 7.534981343283582, "grad_norm": 0.37823511652997727, "learning_rate": 3.8094181956481645e-05, "loss": 0.2696, "step": 16155 }, { "epoch": 7.537313432835821, "grad_norm": 0.37134065704539937, "learning_rate": 3.808652701954183e-05, "loss": 0.2707, "step": 16160 }, { "epoch": 7.53964552238806, "grad_norm": 0.3495860603013858, "learning_rate": 3.807887050848689e-05, "loss": 0.2641, "step": 16165 }, { "epoch": 7.541977611940299, "grad_norm": 0.3794447569125387, "learning_rate": 3.807121242445526e-05, "loss": 0.2703, "step": 16170 }, { "epoch": 7.544309701492537, "grad_norm": 0.39586029656817456, "learning_rate": 3.806355276858562e-05, "loss": 0.2769, "step": 16175 }, { "epoch": 7.5466417910447765, "grad_norm": 0.37914549073327275, "learning_rate": 3.805589154201691e-05, "loss": 0.2691, "step": 16180 }, { "epoch": 7.548973880597015, "grad_norm": 0.3908233046897201, "learning_rate": 3.804822874588825e-05, "loss": 0.2745, "step": 16185 }, { "epoch": 7.5513059701492535, "grad_norm": 0.3754191116700516, "learning_rate": 3.804056438133905e-05, "loss": 0.2654, "step": 16190 }, { "epoch": 7.553638059701493, "grad_norm": 0.37255927344521367, "learning_rate": 3.803289844950893e-05, "loss": 0.2562, "step": 16195 }, { "epoch": 7.5559701492537314, "grad_norm": 0.384992983096584, "learning_rate": 3.80252309515377e-05, "loss": 0.2766, "step": 16200 }, { "epoch": 7.55830223880597, "grad_norm": 0.3878054245621602, "learning_rate": 3.801756188856549e-05, "loss": 0.269, "step": 16205 }, { "epoch": 7.5606343283582085, "grad_norm": 0.37759459382944527, "learning_rate": 3.800989126173259e-05, "loss": 0.2782, "step": 16210 }, { "epoch": 7.562966417910448, "grad_norm": 0.366510019892454, "learning_rate": 3.800221907217956e-05, "loss": 0.2716, "step": 16215 }, { "epoch": 7.565298507462686, "grad_norm": 0.3950084397410356, "learning_rate": 3.799454532104718e-05, "loss": 0.2763, "step": 16220 }, { "epoch": 7.567630597014926, "grad_norm": 0.38418126339274977, "learning_rate": 3.7986870009476454e-05, "loss": 0.2715, "step": 16225 }, { "epoch": 7.569962686567164, "grad_norm": 0.3792547402416868, "learning_rate": 3.7979193138608646e-05, "loss": 0.2712, "step": 16230 }, { "epoch": 7.572294776119403, "grad_norm": 0.3744686233233901, "learning_rate": 3.797151470958521e-05, "loss": 0.2754, "step": 16235 }, { "epoch": 7.574626865671641, "grad_norm": 0.39354530185078157, "learning_rate": 3.7963834723547866e-05, "loss": 0.2744, "step": 16240 }, { "epoch": 7.576958955223881, "grad_norm": 0.3945194199210772, "learning_rate": 3.795615318163856e-05, "loss": 0.2753, "step": 16245 }, { "epoch": 7.579291044776119, "grad_norm": 0.3916656383009546, "learning_rate": 3.794847008499946e-05, "loss": 0.2743, "step": 16250 }, { "epoch": 7.581623134328359, "grad_norm": 0.3875872193013707, "learning_rate": 3.7940785434772965e-05, "loss": 0.2816, "step": 16255 }, { "epoch": 7.583955223880597, "grad_norm": 0.3954438384544729, "learning_rate": 3.793309923210171e-05, "loss": 0.2828, "step": 16260 }, { "epoch": 7.586287313432836, "grad_norm": 0.39638007295681926, "learning_rate": 3.792541147812856e-05, "loss": 0.2726, "step": 16265 }, { "epoch": 7.588619402985074, "grad_norm": 0.384943668578718, "learning_rate": 3.791772217399661e-05, "loss": 0.2689, "step": 16270 }, { "epoch": 7.590951492537314, "grad_norm": 0.36413723111457674, "learning_rate": 3.791003132084919e-05, "loss": 0.2635, "step": 16275 }, { "epoch": 7.593283582089552, "grad_norm": 0.37892491731905226, "learning_rate": 3.7902338919829854e-05, "loss": 0.2686, "step": 16280 }, { "epoch": 7.5956156716417915, "grad_norm": 0.38714098458332274, "learning_rate": 3.789464497208238e-05, "loss": 0.2678, "step": 16285 }, { "epoch": 7.59794776119403, "grad_norm": 0.39045961543435703, "learning_rate": 3.788694947875079e-05, "loss": 0.2703, "step": 16290 }, { "epoch": 7.6002798507462686, "grad_norm": 0.3649326381320554, "learning_rate": 3.787925244097935e-05, "loss": 0.2801, "step": 16295 }, { "epoch": 7.602611940298507, "grad_norm": 0.3680542789150815, "learning_rate": 3.78715538599125e-05, "loss": 0.2718, "step": 16300 }, { "epoch": 7.6049440298507465, "grad_norm": 0.37906560452921106, "learning_rate": 3.786385373669497e-05, "loss": 0.2757, "step": 16305 }, { "epoch": 7.607276119402985, "grad_norm": 0.385554275687325, "learning_rate": 3.7856152072471686e-05, "loss": 0.2636, "step": 16310 }, { "epoch": 7.6096082089552235, "grad_norm": 0.3716920984172636, "learning_rate": 3.7848448868387806e-05, "loss": 0.272, "step": 16315 }, { "epoch": 7.611940298507463, "grad_norm": 0.3900833817244157, "learning_rate": 3.784074412558875e-05, "loss": 0.2751, "step": 16320 }, { "epoch": 7.614272388059701, "grad_norm": 0.3830513871300066, "learning_rate": 3.7833037845220097e-05, "loss": 0.2685, "step": 16325 }, { "epoch": 7.61660447761194, "grad_norm": 0.3813640960715144, "learning_rate": 3.782533002842773e-05, "loss": 0.2722, "step": 16330 }, { "epoch": 7.618936567164179, "grad_norm": 0.3874501021246566, "learning_rate": 3.781762067635771e-05, "loss": 0.2728, "step": 16335 }, { "epoch": 7.621268656716418, "grad_norm": 0.3830203224587388, "learning_rate": 3.7809909790156355e-05, "loss": 0.2724, "step": 16340 }, { "epoch": 7.623600746268656, "grad_norm": 0.378341257303565, "learning_rate": 3.78021973709702e-05, "loss": 0.278, "step": 16345 }, { "epoch": 7.625932835820896, "grad_norm": 0.39246510550165636, "learning_rate": 3.7794483419946e-05, "loss": 0.2867, "step": 16350 }, { "epoch": 7.628264925373134, "grad_norm": 0.3459984655988357, "learning_rate": 3.778676793823075e-05, "loss": 0.2637, "step": 16355 }, { "epoch": 7.630597014925373, "grad_norm": 0.3998638590402181, "learning_rate": 3.777905092697166e-05, "loss": 0.2735, "step": 16360 }, { "epoch": 7.632929104477612, "grad_norm": 0.38921727905005143, "learning_rate": 3.7771332387316186e-05, "loss": 0.2686, "step": 16365 }, { "epoch": 7.635261194029851, "grad_norm": 0.37274086860930467, "learning_rate": 3.7763612320412e-05, "loss": 0.2732, "step": 16370 }, { "epoch": 7.637593283582089, "grad_norm": 0.3832623619529381, "learning_rate": 3.7755890727406994e-05, "loss": 0.2781, "step": 16375 }, { "epoch": 7.639925373134329, "grad_norm": 0.37385154509973184, "learning_rate": 3.77481676094493e-05, "loss": 0.2754, "step": 16380 }, { "epoch": 7.642257462686567, "grad_norm": 0.3898307891608889, "learning_rate": 3.774044296768728e-05, "loss": 0.2671, "step": 16385 }, { "epoch": 7.644589552238806, "grad_norm": 0.36867220045460947, "learning_rate": 3.77327168032695e-05, "loss": 0.2658, "step": 16390 }, { "epoch": 7.646921641791045, "grad_norm": 0.36041180364775455, "learning_rate": 3.772498911734478e-05, "loss": 0.2665, "step": 16395 }, { "epoch": 7.649253731343284, "grad_norm": 0.41348752622073026, "learning_rate": 3.771725991106214e-05, "loss": 0.2763, "step": 16400 }, { "epoch": 7.651585820895522, "grad_norm": 0.3765567612624428, "learning_rate": 3.7709529185570846e-05, "loss": 0.2757, "step": 16405 }, { "epoch": 7.6539179104477615, "grad_norm": 0.36725900923928495, "learning_rate": 3.770179694202038e-05, "loss": 0.2741, "step": 16410 }, { "epoch": 7.65625, "grad_norm": 0.3842228691648146, "learning_rate": 3.7694063181560456e-05, "loss": 0.2755, "step": 16415 }, { "epoch": 7.6585820895522385, "grad_norm": 0.36548655533501334, "learning_rate": 3.7686327905341014e-05, "loss": 0.274, "step": 16420 }, { "epoch": 7.660914179104478, "grad_norm": 0.3652176067400715, "learning_rate": 3.76785911145122e-05, "loss": 0.273, "step": 16425 }, { "epoch": 7.663246268656716, "grad_norm": 0.38212742788310167, "learning_rate": 3.767085281022441e-05, "loss": 0.2763, "step": 16430 }, { "epoch": 7.665578358208955, "grad_norm": 0.3756528634826962, "learning_rate": 3.766311299362826e-05, "loss": 0.2692, "step": 16435 }, { "epoch": 7.667910447761194, "grad_norm": 0.37954538593678544, "learning_rate": 3.765537166587458e-05, "loss": 0.2797, "step": 16440 }, { "epoch": 7.670242537313433, "grad_norm": 0.38879840096899904, "learning_rate": 3.764762882811444e-05, "loss": 0.2793, "step": 16445 }, { "epoch": 7.672574626865671, "grad_norm": 0.36704146606836807, "learning_rate": 3.763988448149912e-05, "loss": 0.2692, "step": 16450 }, { "epoch": 7.674906716417911, "grad_norm": 0.3952071220869639, "learning_rate": 3.763213862718012e-05, "loss": 0.2752, "step": 16455 }, { "epoch": 7.677238805970149, "grad_norm": 0.365617042905023, "learning_rate": 3.762439126630919e-05, "loss": 0.2696, "step": 16460 }, { "epoch": 7.679570895522388, "grad_norm": 0.37575910190935913, "learning_rate": 3.761664240003828e-05, "loss": 0.2701, "step": 16465 }, { "epoch": 7.681902985074627, "grad_norm": 0.38756557738106506, "learning_rate": 3.7608892029519576e-05, "loss": 0.2736, "step": 16470 }, { "epoch": 7.684235074626866, "grad_norm": 0.3608790890298827, "learning_rate": 3.7601140155905485e-05, "loss": 0.2712, "step": 16475 }, { "epoch": 7.686567164179104, "grad_norm": 0.363239898191367, "learning_rate": 3.7593386780348625e-05, "loss": 0.27, "step": 16480 }, { "epoch": 7.688899253731344, "grad_norm": 0.39700787646360153, "learning_rate": 3.758563190400187e-05, "loss": 0.2807, "step": 16485 }, { "epoch": 7.691231343283582, "grad_norm": 0.3794290881350517, "learning_rate": 3.757787552801827e-05, "loss": 0.2786, "step": 16490 }, { "epoch": 7.693563432835821, "grad_norm": 0.3653469069859092, "learning_rate": 3.757011765355115e-05, "loss": 0.2699, "step": 16495 }, { "epoch": 7.69589552238806, "grad_norm": 0.3796818804766615, "learning_rate": 3.756235828175401e-05, "loss": 0.2677, "step": 16500 }, { "epoch": 7.698227611940299, "grad_norm": 0.3862641891685225, "learning_rate": 3.755459741378061e-05, "loss": 0.2685, "step": 16505 }, { "epoch": 7.700559701492537, "grad_norm": 0.36571075626490973, "learning_rate": 3.75468350507849e-05, "loss": 0.2742, "step": 16510 }, { "epoch": 7.7028917910447765, "grad_norm": 0.3675016453098096, "learning_rate": 3.753907119392108e-05, "loss": 0.2654, "step": 16515 }, { "epoch": 7.705223880597015, "grad_norm": 0.3752240816941197, "learning_rate": 3.753130584434357e-05, "loss": 0.2732, "step": 16520 }, { "epoch": 7.7075559701492535, "grad_norm": 0.3788214663168051, "learning_rate": 3.752353900320698e-05, "loss": 0.2753, "step": 16525 }, { "epoch": 7.709888059701493, "grad_norm": 0.37757316084497267, "learning_rate": 3.7515770671666175e-05, "loss": 0.2737, "step": 16530 }, { "epoch": 7.7122201492537314, "grad_norm": 0.36909832011003874, "learning_rate": 3.750800085087625e-05, "loss": 0.2646, "step": 16535 }, { "epoch": 7.71455223880597, "grad_norm": 0.3754902087121156, "learning_rate": 3.750022954199248e-05, "loss": 0.2656, "step": 16540 }, { "epoch": 7.7168843283582085, "grad_norm": 0.3738482374580144, "learning_rate": 3.7492456746170385e-05, "loss": 0.2732, "step": 16545 }, { "epoch": 7.719216417910448, "grad_norm": 0.380165308488838, "learning_rate": 3.748468246456572e-05, "loss": 0.271, "step": 16550 }, { "epoch": 7.721548507462686, "grad_norm": 0.42021812868278013, "learning_rate": 3.747690669833443e-05, "loss": 0.2837, "step": 16555 }, { "epoch": 7.723880597014926, "grad_norm": 0.3720829422908175, "learning_rate": 3.7469129448632704e-05, "loss": 0.2829, "step": 16560 }, { "epoch": 7.726212686567164, "grad_norm": 0.37528294419908226, "learning_rate": 3.7461350716616955e-05, "loss": 0.2738, "step": 16565 }, { "epoch": 7.728544776119403, "grad_norm": 0.3693263273695873, "learning_rate": 3.7453570503443785e-05, "loss": 0.2747, "step": 16570 }, { "epoch": 7.730876865671641, "grad_norm": 0.36017667442905377, "learning_rate": 3.744578881027006e-05, "loss": 0.2785, "step": 16575 }, { "epoch": 7.733208955223881, "grad_norm": 0.3886163302090998, "learning_rate": 3.743800563825283e-05, "loss": 0.2694, "step": 16580 }, { "epoch": 7.735541044776119, "grad_norm": 0.36817451318975264, "learning_rate": 3.743022098854937e-05, "loss": 0.2758, "step": 16585 }, { "epoch": 7.737873134328359, "grad_norm": 0.3762095604278048, "learning_rate": 3.742243486231719e-05, "loss": 0.2718, "step": 16590 }, { "epoch": 7.740205223880597, "grad_norm": 0.37662906882654273, "learning_rate": 3.7414647260714025e-05, "loss": 0.2805, "step": 16595 }, { "epoch": 7.742537313432836, "grad_norm": 0.3800307526717601, "learning_rate": 3.74068581848978e-05, "loss": 0.2754, "step": 16600 }, { "epoch": 7.744869402985074, "grad_norm": 0.37178560322145016, "learning_rate": 3.739906763602669e-05, "loss": 0.2749, "step": 16605 }, { "epoch": 7.747201492537314, "grad_norm": 0.3831429937284738, "learning_rate": 3.7391275615259065e-05, "loss": 0.2717, "step": 16610 }, { "epoch": 7.749533582089552, "grad_norm": 0.35861288415786685, "learning_rate": 3.738348212375352e-05, "loss": 0.2756, "step": 16615 }, { "epoch": 7.7518656716417915, "grad_norm": 0.3857299140101131, "learning_rate": 3.737568716266888e-05, "loss": 0.2826, "step": 16620 }, { "epoch": 7.75419776119403, "grad_norm": 0.3768829995098099, "learning_rate": 3.7367890733164185e-05, "loss": 0.269, "step": 16625 }, { "epoch": 7.7565298507462686, "grad_norm": 0.3832608097923746, "learning_rate": 3.7360092836398686e-05, "loss": 0.2783, "step": 16630 }, { "epoch": 7.758861940298507, "grad_norm": 0.36897935328238485, "learning_rate": 3.7352293473531844e-05, "loss": 0.2718, "step": 16635 }, { "epoch": 7.7611940298507465, "grad_norm": 0.38015342206744557, "learning_rate": 3.734449264572336e-05, "loss": 0.2691, "step": 16640 }, { "epoch": 7.763526119402985, "grad_norm": 0.36732206223487335, "learning_rate": 3.733669035413315e-05, "loss": 0.2835, "step": 16645 }, { "epoch": 7.7658582089552235, "grad_norm": 0.3669779563193028, "learning_rate": 3.7328886599921327e-05, "loss": 0.2851, "step": 16650 }, { "epoch": 7.768190298507463, "grad_norm": 0.38739773447223785, "learning_rate": 3.732108138424824e-05, "loss": 0.2735, "step": 16655 }, { "epoch": 7.770522388059701, "grad_norm": 0.36978379407412393, "learning_rate": 3.7313274708274445e-05, "loss": 0.2654, "step": 16660 }, { "epoch": 7.77285447761194, "grad_norm": 0.3661943756837653, "learning_rate": 3.7305466573160725e-05, "loss": 0.2763, "step": 16665 }, { "epoch": 7.775186567164179, "grad_norm": 0.3683290187320757, "learning_rate": 3.729765698006808e-05, "loss": 0.277, "step": 16670 }, { "epoch": 7.777518656716418, "grad_norm": 0.3768220257036437, "learning_rate": 3.7289845930157704e-05, "loss": 0.2798, "step": 16675 }, { "epoch": 7.779850746268656, "grad_norm": 0.3771301122015155, "learning_rate": 3.7282033424591043e-05, "loss": 0.2724, "step": 16680 }, { "epoch": 7.782182835820896, "grad_norm": 0.36677089215533737, "learning_rate": 3.7274219464529736e-05, "loss": 0.2696, "step": 16685 }, { "epoch": 7.784514925373134, "grad_norm": 0.37328124538816776, "learning_rate": 3.726640405113564e-05, "loss": 0.2711, "step": 16690 }, { "epoch": 7.786847014925373, "grad_norm": 0.3731973244556859, "learning_rate": 3.725858718557084e-05, "loss": 0.2726, "step": 16695 }, { "epoch": 7.789179104477612, "grad_norm": 0.3689483106602507, "learning_rate": 3.725076886899763e-05, "loss": 0.2732, "step": 16700 }, { "epoch": 7.791511194029851, "grad_norm": 0.38914491431489584, "learning_rate": 3.72429491025785e-05, "loss": 0.2828, "step": 16705 }, { "epoch": 7.793843283582089, "grad_norm": 0.37549745713667676, "learning_rate": 3.723512788747619e-05, "loss": 0.2767, "step": 16710 }, { "epoch": 7.796175373134329, "grad_norm": 0.41358784148187183, "learning_rate": 3.722730522485364e-05, "loss": 0.2824, "step": 16715 }, { "epoch": 7.798507462686567, "grad_norm": 0.3920822257667621, "learning_rate": 3.721948111587399e-05, "loss": 0.2745, "step": 16720 }, { "epoch": 7.800839552238806, "grad_norm": 0.39190141761053443, "learning_rate": 3.721165556170065e-05, "loss": 0.2869, "step": 16725 }, { "epoch": 7.803171641791045, "grad_norm": 0.4005525506892468, "learning_rate": 3.720382856349715e-05, "loss": 0.2781, "step": 16730 }, { "epoch": 7.805503731343284, "grad_norm": 0.3922917701485776, "learning_rate": 3.719600012242733e-05, "loss": 0.2775, "step": 16735 }, { "epoch": 7.807835820895522, "grad_norm": 0.3831398321672898, "learning_rate": 3.718817023965519e-05, "loss": 0.2709, "step": 16740 }, { "epoch": 7.8101679104477615, "grad_norm": 0.3893584703092133, "learning_rate": 3.718033891634496e-05, "loss": 0.285, "step": 16745 }, { "epoch": 7.8125, "grad_norm": 0.37193207748943025, "learning_rate": 3.717250615366108e-05, "loss": 0.2702, "step": 16750 }, { "epoch": 7.8148320895522385, "grad_norm": 0.35799801622999, "learning_rate": 3.7164671952768206e-05, "loss": 0.2715, "step": 16755 }, { "epoch": 7.817164179104478, "grad_norm": 0.3958594044093376, "learning_rate": 3.715683631483121e-05, "loss": 0.2789, "step": 16760 }, { "epoch": 7.819496268656716, "grad_norm": 0.36603192146276703, "learning_rate": 3.7148999241015185e-05, "loss": 0.2682, "step": 16765 }, { "epoch": 7.821828358208955, "grad_norm": 0.38153439410515133, "learning_rate": 3.714116073248542e-05, "loss": 0.2763, "step": 16770 }, { "epoch": 7.824160447761194, "grad_norm": 0.3818610628064441, "learning_rate": 3.713332079040743e-05, "loss": 0.2759, "step": 16775 }, { "epoch": 7.826492537313433, "grad_norm": 0.38253139439547745, "learning_rate": 3.712547941594693e-05, "loss": 0.2759, "step": 16780 }, { "epoch": 7.828824626865671, "grad_norm": 0.35722526518692865, "learning_rate": 3.7117636610269875e-05, "loss": 0.2832, "step": 16785 }, { "epoch": 7.831156716417911, "grad_norm": 0.3638425268169718, "learning_rate": 3.71097923745424e-05, "loss": 0.2726, "step": 16790 }, { "epoch": 7.833488805970149, "grad_norm": 0.38544232196644374, "learning_rate": 3.710194670993087e-05, "loss": 0.2878, "step": 16795 }, { "epoch": 7.835820895522388, "grad_norm": 0.3677041735237463, "learning_rate": 3.709409961760186e-05, "loss": 0.2776, "step": 16800 }, { "epoch": 7.838152985074627, "grad_norm": 0.3758020037208962, "learning_rate": 3.708625109872217e-05, "loss": 0.2766, "step": 16805 }, { "epoch": 7.840485074626866, "grad_norm": 0.369798523781752, "learning_rate": 3.707840115445877e-05, "loss": 0.2849, "step": 16810 }, { "epoch": 7.842817164179104, "grad_norm": 0.38210927119346566, "learning_rate": 3.707054978597891e-05, "loss": 0.2791, "step": 16815 }, { "epoch": 7.845149253731344, "grad_norm": 0.36733546069483974, "learning_rate": 3.706269699444998e-05, "loss": 0.2854, "step": 16820 }, { "epoch": 7.847481343283582, "grad_norm": 0.3736871381046388, "learning_rate": 3.705484278103964e-05, "loss": 0.2741, "step": 16825 }, { "epoch": 7.849813432835821, "grad_norm": 0.38274829414000294, "learning_rate": 3.704698714691572e-05, "loss": 0.2775, "step": 16830 }, { "epoch": 7.85214552238806, "grad_norm": 0.3722007345258053, "learning_rate": 3.703913009324628e-05, "loss": 0.2791, "step": 16835 }, { "epoch": 7.854477611940299, "grad_norm": 0.3744185712546272, "learning_rate": 3.703127162119959e-05, "loss": 0.2727, "step": 16840 }, { "epoch": 7.856809701492537, "grad_norm": 0.39928173318226456, "learning_rate": 3.702341173194413e-05, "loss": 0.2813, "step": 16845 }, { "epoch": 7.8591417910447765, "grad_norm": 0.3673385889238369, "learning_rate": 3.701555042664861e-05, "loss": 0.2742, "step": 16850 }, { "epoch": 7.861473880597015, "grad_norm": 0.37666684359158464, "learning_rate": 3.7007687706481896e-05, "loss": 0.274, "step": 16855 }, { "epoch": 7.8638059701492535, "grad_norm": 0.3718904851203048, "learning_rate": 3.699982357261312e-05, "loss": 0.2706, "step": 16860 }, { "epoch": 7.866138059701493, "grad_norm": 0.3717845409969875, "learning_rate": 3.699195802621159e-05, "loss": 0.2735, "step": 16865 }, { "epoch": 7.8684701492537314, "grad_norm": 0.4041239269558037, "learning_rate": 3.6984091068446855e-05, "loss": 0.282, "step": 16870 }, { "epoch": 7.87080223880597, "grad_norm": 0.36687328450968043, "learning_rate": 3.697622270048864e-05, "loss": 0.2776, "step": 16875 }, { "epoch": 7.8731343283582085, "grad_norm": 0.3694497705146261, "learning_rate": 3.69683529235069e-05, "loss": 0.2751, "step": 16880 }, { "epoch": 7.875466417910448, "grad_norm": 0.3968666854180644, "learning_rate": 3.6960481738671806e-05, "loss": 0.2769, "step": 16885 }, { "epoch": 7.877798507462686, "grad_norm": 0.3734738180684915, "learning_rate": 3.695260914715372e-05, "loss": 0.2726, "step": 16890 }, { "epoch": 7.880130597014926, "grad_norm": 0.37810305246764314, "learning_rate": 3.6944735150123215e-05, "loss": 0.2785, "step": 16895 }, { "epoch": 7.882462686567164, "grad_norm": 0.3684120601518599, "learning_rate": 3.693685974875109e-05, "loss": 0.275, "step": 16900 }, { "epoch": 7.884794776119403, "grad_norm": 0.3806330187124303, "learning_rate": 3.6928982944208344e-05, "loss": 0.2794, "step": 16905 }, { "epoch": 7.887126865671641, "grad_norm": 0.3881894719008805, "learning_rate": 3.692110473766616e-05, "loss": 0.2676, "step": 16910 }, { "epoch": 7.889458955223881, "grad_norm": 0.36724372298324154, "learning_rate": 3.6913225130295974e-05, "loss": 0.2724, "step": 16915 }, { "epoch": 7.891791044776119, "grad_norm": 0.3623138894492127, "learning_rate": 3.69053441232694e-05, "loss": 0.2765, "step": 16920 }, { "epoch": 7.894123134328359, "grad_norm": 0.37487935624462493, "learning_rate": 3.689746171775827e-05, "loss": 0.2775, "step": 16925 }, { "epoch": 7.896455223880597, "grad_norm": 0.38286462302412994, "learning_rate": 3.688957791493462e-05, "loss": 0.276, "step": 16930 }, { "epoch": 7.898787313432836, "grad_norm": 0.37735012519601513, "learning_rate": 3.68816927159707e-05, "loss": 0.2822, "step": 16935 }, { "epoch": 7.901119402985074, "grad_norm": 0.36891695848748507, "learning_rate": 3.6873806122038964e-05, "loss": 0.2801, "step": 16940 }, { "epoch": 7.903451492537314, "grad_norm": 0.360258348996527, "learning_rate": 3.686591813431206e-05, "loss": 0.2786, "step": 16945 }, { "epoch": 7.905783582089552, "grad_norm": 0.4019989424643233, "learning_rate": 3.685802875396287e-05, "loss": 0.2803, "step": 16950 }, { "epoch": 7.9081156716417915, "grad_norm": 0.3796773941741753, "learning_rate": 3.6850137982164466e-05, "loss": 0.2746, "step": 16955 }, { "epoch": 7.91044776119403, "grad_norm": 0.37105850455008893, "learning_rate": 3.684224582009014e-05, "loss": 0.2783, "step": 16960 }, { "epoch": 7.9127798507462686, "grad_norm": 0.3761279082445865, "learning_rate": 3.683435226891335e-05, "loss": 0.2698, "step": 16965 }, { "epoch": 7.915111940298507, "grad_norm": 0.38044707112681947, "learning_rate": 3.682645732980783e-05, "loss": 0.281, "step": 16970 }, { "epoch": 7.9174440298507465, "grad_norm": 0.39664604998319336, "learning_rate": 3.681856100394745e-05, "loss": 0.2849, "step": 16975 }, { "epoch": 7.919776119402985, "grad_norm": 0.37961215229832923, "learning_rate": 3.6810663292506344e-05, "loss": 0.2749, "step": 16980 }, { "epoch": 7.9221082089552235, "grad_norm": 0.40455263526832635, "learning_rate": 3.68027641966588e-05, "loss": 0.2771, "step": 16985 }, { "epoch": 7.924440298507463, "grad_norm": 0.37883081174690547, "learning_rate": 3.6794863717579365e-05, "loss": 0.2727, "step": 16990 }, { "epoch": 7.926772388059701, "grad_norm": 0.3605438191024723, "learning_rate": 3.6786961856442737e-05, "loss": 0.2794, "step": 16995 }, { "epoch": 7.92910447761194, "grad_norm": 0.35377724597949683, "learning_rate": 3.677905861442387e-05, "loss": 0.2744, "step": 17000 }, { "epoch": 7.931436567164179, "grad_norm": 0.39308962584635243, "learning_rate": 3.677115399269789e-05, "loss": 0.2802, "step": 17005 }, { "epoch": 7.933768656716418, "grad_norm": 0.3652036215635847, "learning_rate": 3.676324799244014e-05, "loss": 0.279, "step": 17010 }, { "epoch": 7.936100746268656, "grad_norm": 0.3814574681140552, "learning_rate": 3.675534061482617e-05, "loss": 0.2785, "step": 17015 }, { "epoch": 7.938432835820896, "grad_norm": 0.3732184327438152, "learning_rate": 3.6747431861031716e-05, "loss": 0.2798, "step": 17020 }, { "epoch": 7.940764925373134, "grad_norm": 0.39521127597608596, "learning_rate": 3.6739521732232756e-05, "loss": 0.2748, "step": 17025 }, { "epoch": 7.943097014925373, "grad_norm": 0.38102606058045563, "learning_rate": 3.673161022960544e-05, "loss": 0.2715, "step": 17030 }, { "epoch": 7.945429104477612, "grad_norm": 0.3572797708707033, "learning_rate": 3.6723697354326134e-05, "loss": 0.2798, "step": 17035 }, { "epoch": 7.947761194029851, "grad_norm": 0.36732653367769375, "learning_rate": 3.67157831075714e-05, "loss": 0.2761, "step": 17040 }, { "epoch": 7.950093283582089, "grad_norm": 0.3755194836849493, "learning_rate": 3.670786749051801e-05, "loss": 0.2777, "step": 17045 }, { "epoch": 7.952425373134329, "grad_norm": 0.3640098971651658, "learning_rate": 3.6699950504342954e-05, "loss": 0.2683, "step": 17050 }, { "epoch": 7.954757462686567, "grad_norm": 0.36570697147924836, "learning_rate": 3.669203215022341e-05, "loss": 0.2725, "step": 17055 }, { "epoch": 7.957089552238806, "grad_norm": 0.37668997227880696, "learning_rate": 3.6684112429336745e-05, "loss": 0.273, "step": 17060 }, { "epoch": 7.959421641791045, "grad_norm": 0.38047335225824575, "learning_rate": 3.667619134286057e-05, "loss": 0.2711, "step": 17065 }, { "epoch": 7.961753731343284, "grad_norm": 0.4069315747526958, "learning_rate": 3.666826889197265e-05, "loss": 0.2765, "step": 17070 }, { "epoch": 7.964085820895522, "grad_norm": 0.36021925485841055, "learning_rate": 3.666034507785098e-05, "loss": 0.2838, "step": 17075 }, { "epoch": 7.9664179104477615, "grad_norm": 0.39134945562935364, "learning_rate": 3.665241990167378e-05, "loss": 0.279, "step": 17080 }, { "epoch": 7.96875, "grad_norm": 0.3561313505173692, "learning_rate": 3.664449336461943e-05, "loss": 0.275, "step": 17085 }, { "epoch": 7.9710820895522385, "grad_norm": 0.3902585525195908, "learning_rate": 3.663656546786653e-05, "loss": 0.2832, "step": 17090 }, { "epoch": 7.973414179104478, "grad_norm": 0.3651929416135597, "learning_rate": 3.6628636212593874e-05, "loss": 0.2787, "step": 17095 }, { "epoch": 7.975746268656716, "grad_norm": 0.3798951702619177, "learning_rate": 3.6620705599980494e-05, "loss": 0.2826, "step": 17100 }, { "epoch": 7.978078358208955, "grad_norm": 0.35662140507859835, "learning_rate": 3.6612773631205567e-05, "loss": 0.2803, "step": 17105 }, { "epoch": 7.980410447761194, "grad_norm": 0.36138341291753723, "learning_rate": 3.660484030744852e-05, "loss": 0.2778, "step": 17110 }, { "epoch": 7.982742537313433, "grad_norm": 0.378008900543922, "learning_rate": 3.659690562988894e-05, "loss": 0.2839, "step": 17115 }, { "epoch": 7.985074626865671, "grad_norm": 0.3805643470275995, "learning_rate": 3.6588969599706665e-05, "loss": 0.2693, "step": 17120 }, { "epoch": 7.987406716417911, "grad_norm": 0.3801489160738746, "learning_rate": 3.6581032218081685e-05, "loss": 0.2807, "step": 17125 }, { "epoch": 7.989738805970149, "grad_norm": 0.3838259826313762, "learning_rate": 3.6573093486194226e-05, "loss": 0.2751, "step": 17130 }, { "epoch": 7.992070895522388, "grad_norm": 0.3666301925365707, "learning_rate": 3.65651534052247e-05, "loss": 0.2731, "step": 17135 }, { "epoch": 7.994402985074627, "grad_norm": 0.3775755091490725, "learning_rate": 3.655721197635371e-05, "loss": 0.2766, "step": 17140 }, { "epoch": 7.996735074626866, "grad_norm": 0.36610187262312943, "learning_rate": 3.654926920076208e-05, "loss": 0.2781, "step": 17145 }, { "epoch": 7.999067164179104, "grad_norm": 0.383846580351878, "learning_rate": 3.654132507963083e-05, "loss": 0.2806, "step": 17150 }, { "epoch": 8.001399253731343, "grad_norm": 0.38501838719866915, "learning_rate": 3.653337961414116e-05, "loss": 0.2436, "step": 17155 }, { "epoch": 8.003731343283581, "grad_norm": 0.472206935019026, "learning_rate": 3.652543280547449e-05, "loss": 0.2126, "step": 17160 }, { "epoch": 8.006063432835822, "grad_norm": 0.3930909389269255, "learning_rate": 3.651748465481245e-05, "loss": 0.2027, "step": 17165 }, { "epoch": 8.00839552238806, "grad_norm": 0.4220678932318882, "learning_rate": 3.650953516333682e-05, "loss": 0.2048, "step": 17170 }, { "epoch": 8.010727611940299, "grad_norm": 0.3960022808218618, "learning_rate": 3.6501584332229645e-05, "loss": 0.2114, "step": 17175 }, { "epoch": 8.013059701492537, "grad_norm": 0.4320455967822555, "learning_rate": 3.6493632162673125e-05, "loss": 0.2008, "step": 17180 }, { "epoch": 8.015391791044776, "grad_norm": 0.38741375939257766, "learning_rate": 3.648567865584967e-05, "loss": 0.2107, "step": 17185 }, { "epoch": 8.017723880597014, "grad_norm": 0.4036528512578452, "learning_rate": 3.647772381294189e-05, "loss": 0.2054, "step": 17190 }, { "epoch": 8.020055970149254, "grad_norm": 1.9125959542802062, "learning_rate": 3.64697676351326e-05, "loss": 0.2133, "step": 17195 }, { "epoch": 8.022388059701493, "grad_norm": 0.4166417100022577, "learning_rate": 3.6461810123604805e-05, "loss": 0.2057, "step": 17200 }, { "epoch": 8.024720149253731, "grad_norm": 0.44870093800688005, "learning_rate": 3.645385127954171e-05, "loss": 0.2041, "step": 17205 }, { "epoch": 8.02705223880597, "grad_norm": 0.427467886846065, "learning_rate": 3.6445891104126714e-05, "loss": 0.2052, "step": 17210 }, { "epoch": 8.029384328358208, "grad_norm": 0.4164873986597023, "learning_rate": 3.643792959854342e-05, "loss": 0.2036, "step": 17215 }, { "epoch": 8.031716417910447, "grad_norm": 0.40751179789851155, "learning_rate": 3.6429966763975636e-05, "loss": 0.2048, "step": 17220 }, { "epoch": 8.034048507462687, "grad_norm": 0.42656603610209826, "learning_rate": 3.642200260160735e-05, "loss": 0.2012, "step": 17225 }, { "epoch": 8.036380597014926, "grad_norm": 0.39629749325189045, "learning_rate": 3.641403711262277e-05, "loss": 0.2102, "step": 17230 }, { "epoch": 8.038712686567164, "grad_norm": 0.40197502260040985, "learning_rate": 3.6406070298206265e-05, "loss": 0.2064, "step": 17235 }, { "epoch": 8.041044776119403, "grad_norm": 0.39471739645272175, "learning_rate": 3.639810215954245e-05, "loss": 0.2021, "step": 17240 }, { "epoch": 8.043376865671641, "grad_norm": 0.3936605987719547, "learning_rate": 3.639013269781609e-05, "loss": 0.1981, "step": 17245 }, { "epoch": 8.04570895522388, "grad_norm": 0.4054832631736671, "learning_rate": 3.638216191421218e-05, "loss": 0.2044, "step": 17250 }, { "epoch": 8.04804104477612, "grad_norm": 0.40249536102171446, "learning_rate": 3.637418980991589e-05, "loss": 0.2142, "step": 17255 }, { "epoch": 8.050373134328359, "grad_norm": 0.3978341672751216, "learning_rate": 3.6366216386112605e-05, "loss": 0.2064, "step": 17260 }, { "epoch": 8.052705223880597, "grad_norm": 0.4066578826621695, "learning_rate": 3.635824164398789e-05, "loss": 0.2113, "step": 17265 }, { "epoch": 8.055037313432836, "grad_norm": 0.4210350758299634, "learning_rate": 3.635026558472752e-05, "loss": 0.2024, "step": 17270 }, { "epoch": 8.057369402985074, "grad_norm": 0.40468950558373756, "learning_rate": 3.634228820951744e-05, "loss": 0.2036, "step": 17275 }, { "epoch": 8.059701492537313, "grad_norm": 0.4006505268732701, "learning_rate": 3.633430951954383e-05, "loss": 0.2062, "step": 17280 }, { "epoch": 8.062033582089553, "grad_norm": 0.4188079217739421, "learning_rate": 3.6326329515993055e-05, "loss": 0.2132, "step": 17285 }, { "epoch": 8.064365671641792, "grad_norm": 0.40504182811298756, "learning_rate": 3.631834820005163e-05, "loss": 0.2135, "step": 17290 }, { "epoch": 8.06669776119403, "grad_norm": 0.405224711549134, "learning_rate": 3.6310365572906314e-05, "loss": 0.2071, "step": 17295 }, { "epoch": 8.069029850746269, "grad_norm": 0.3977682831984614, "learning_rate": 3.6302381635744056e-05, "loss": 0.2059, "step": 17300 }, { "epoch": 8.071361940298507, "grad_norm": 0.407893177468354, "learning_rate": 3.629439638975199e-05, "loss": 0.2053, "step": 17305 }, { "epoch": 8.073694029850746, "grad_norm": 0.4078465953639046, "learning_rate": 3.628640983611744e-05, "loss": 0.2105, "step": 17310 }, { "epoch": 8.076026119402986, "grad_norm": 0.4371841923876009, "learning_rate": 3.6278421976027926e-05, "loss": 0.2071, "step": 17315 }, { "epoch": 8.078358208955224, "grad_norm": 0.4194670224096998, "learning_rate": 3.6270432810671176e-05, "loss": 0.2064, "step": 17320 }, { "epoch": 8.080690298507463, "grad_norm": 0.39050183775211467, "learning_rate": 3.6262442341235105e-05, "loss": 0.2076, "step": 17325 }, { "epoch": 8.083022388059701, "grad_norm": 0.4100920784868964, "learning_rate": 3.62544505689078e-05, "loss": 0.2069, "step": 17330 }, { "epoch": 8.08535447761194, "grad_norm": 0.3905096650620924, "learning_rate": 3.6246457494877585e-05, "loss": 0.2108, "step": 17335 }, { "epoch": 8.087686567164178, "grad_norm": 0.4542863911217664, "learning_rate": 3.623846312033294e-05, "loss": 0.2019, "step": 17340 }, { "epoch": 8.090018656716419, "grad_norm": 0.4250554849814726, "learning_rate": 3.623046744646254e-05, "loss": 0.2134, "step": 17345 }, { "epoch": 8.092350746268657, "grad_norm": 0.4078093294108674, "learning_rate": 3.622247047445529e-05, "loss": 0.2106, "step": 17350 }, { "epoch": 8.094682835820896, "grad_norm": 0.4049910753646204, "learning_rate": 3.6214472205500256e-05, "loss": 0.2085, "step": 17355 }, { "epoch": 8.097014925373134, "grad_norm": 0.4005322413157875, "learning_rate": 3.6206472640786696e-05, "loss": 0.2096, "step": 17360 }, { "epoch": 8.099347014925373, "grad_norm": 0.4190157157775978, "learning_rate": 3.6198471781504076e-05, "loss": 0.2107, "step": 17365 }, { "epoch": 8.101679104477611, "grad_norm": 0.43087216627936825, "learning_rate": 3.619046962884204e-05, "loss": 0.2079, "step": 17370 }, { "epoch": 8.104011194029852, "grad_norm": 0.39981706615854434, "learning_rate": 3.618246618399043e-05, "loss": 0.2159, "step": 17375 }, { "epoch": 8.10634328358209, "grad_norm": 0.42246545851732975, "learning_rate": 3.617446144813929e-05, "loss": 0.2073, "step": 17380 }, { "epoch": 8.108675373134329, "grad_norm": 0.40235320219697757, "learning_rate": 3.616645542247885e-05, "loss": 0.2091, "step": 17385 }, { "epoch": 8.111007462686567, "grad_norm": 0.39227574719341507, "learning_rate": 3.6158448108199515e-05, "loss": 0.1997, "step": 17390 }, { "epoch": 8.113339552238806, "grad_norm": 0.4069289254006938, "learning_rate": 3.6150439506491915e-05, "loss": 0.2063, "step": 17395 }, { "epoch": 8.115671641791044, "grad_norm": 0.39872633760424997, "learning_rate": 3.614242961854683e-05, "loss": 0.2138, "step": 17400 }, { "epoch": 8.118003731343284, "grad_norm": 0.4310145540100771, "learning_rate": 3.6134418445555275e-05, "loss": 0.2128, "step": 17405 }, { "epoch": 8.120335820895523, "grad_norm": 0.41510898170743116, "learning_rate": 3.6126405988708424e-05, "loss": 0.2071, "step": 17410 }, { "epoch": 8.122667910447761, "grad_norm": 0.40335678227186195, "learning_rate": 3.611839224919765e-05, "loss": 0.2115, "step": 17415 }, { "epoch": 8.125, "grad_norm": 0.39856434016494, "learning_rate": 3.611037722821452e-05, "loss": 0.2051, "step": 17420 }, { "epoch": 8.127332089552239, "grad_norm": 0.4169390373114269, "learning_rate": 3.6102360926950796e-05, "loss": 0.213, "step": 17425 }, { "epoch": 8.129664179104477, "grad_norm": 0.38907789932561754, "learning_rate": 3.609434334659842e-05, "loss": 0.2133, "step": 17430 }, { "epoch": 8.131996268656717, "grad_norm": 0.405986223710685, "learning_rate": 3.608632448834954e-05, "loss": 0.2174, "step": 17435 }, { "epoch": 8.134328358208956, "grad_norm": 0.387449618525237, "learning_rate": 3.607830435339648e-05, "loss": 0.2058, "step": 17440 }, { "epoch": 8.136660447761194, "grad_norm": 0.39394827140645666, "learning_rate": 3.6070282942931744e-05, "loss": 0.2124, "step": 17445 }, { "epoch": 8.138992537313433, "grad_norm": 0.4141705156009503, "learning_rate": 3.606226025814805e-05, "loss": 0.215, "step": 17450 }, { "epoch": 8.141324626865671, "grad_norm": 0.42361484373576314, "learning_rate": 3.605423630023829e-05, "loss": 0.212, "step": 17455 }, { "epoch": 8.14365671641791, "grad_norm": 0.3984390212622635, "learning_rate": 3.604621107039555e-05, "loss": 0.2077, "step": 17460 }, { "epoch": 8.145988805970148, "grad_norm": 0.4031292452370952, "learning_rate": 3.603818456981312e-05, "loss": 0.2096, "step": 17465 }, { "epoch": 8.148320895522389, "grad_norm": 0.41421847285199187, "learning_rate": 3.6030156799684435e-05, "loss": 0.2113, "step": 17470 }, { "epoch": 8.150652985074627, "grad_norm": 0.39298325574984533, "learning_rate": 3.602212776120317e-05, "loss": 0.212, "step": 17475 }, { "epoch": 8.152985074626866, "grad_norm": 0.4042426744566189, "learning_rate": 3.601409745556315e-05, "loss": 0.2144, "step": 17480 }, { "epoch": 8.155317164179104, "grad_norm": 0.42126586400325766, "learning_rate": 3.6006065883958425e-05, "loss": 0.2162, "step": 17485 }, { "epoch": 8.157649253731343, "grad_norm": 0.413740584096473, "learning_rate": 3.5998033047583194e-05, "loss": 0.2185, "step": 17490 }, { "epoch": 8.159981343283581, "grad_norm": 0.4102596811283832, "learning_rate": 3.598999894763187e-05, "loss": 0.2082, "step": 17495 }, { "epoch": 8.162313432835822, "grad_norm": 0.4122460877243404, "learning_rate": 3.598196358529906e-05, "loss": 0.2219, "step": 17500 }, { "epoch": 8.16464552238806, "grad_norm": 0.4017903442313962, "learning_rate": 3.597392696177953e-05, "loss": 0.214, "step": 17505 }, { "epoch": 8.166977611940299, "grad_norm": 0.4088333217076534, "learning_rate": 3.596588907826824e-05, "loss": 0.21, "step": 17510 }, { "epoch": 8.169309701492537, "grad_norm": 0.3942404107724422, "learning_rate": 3.595784993596036e-05, "loss": 0.2134, "step": 17515 }, { "epoch": 8.171641791044776, "grad_norm": 0.4242275672973836, "learning_rate": 3.5949809536051235e-05, "loss": 0.2142, "step": 17520 }, { "epoch": 8.173973880597014, "grad_norm": 0.39909309737226384, "learning_rate": 3.594176787973638e-05, "loss": 0.2104, "step": 17525 }, { "epoch": 8.176305970149254, "grad_norm": 0.4232687103317804, "learning_rate": 3.593372496821154e-05, "loss": 0.2111, "step": 17530 }, { "epoch": 8.178638059701493, "grad_norm": 0.4121659872641697, "learning_rate": 3.5925680802672584e-05, "loss": 0.2115, "step": 17535 }, { "epoch": 8.180970149253731, "grad_norm": 0.39697272371669484, "learning_rate": 3.591763538431563e-05, "loss": 0.2116, "step": 17540 }, { "epoch": 8.18330223880597, "grad_norm": 0.4094628725618236, "learning_rate": 3.590958871433695e-05, "loss": 0.2218, "step": 17545 }, { "epoch": 8.185634328358208, "grad_norm": 0.3954438293184553, "learning_rate": 3.5901540793933e-05, "loss": 0.2072, "step": 17550 }, { "epoch": 8.187966417910447, "grad_norm": 0.43358525331356745, "learning_rate": 3.5893491624300416e-05, "loss": 0.2158, "step": 17555 }, { "epoch": 8.190298507462687, "grad_norm": 0.4091497563831329, "learning_rate": 3.5885441206636065e-05, "loss": 0.2154, "step": 17560 }, { "epoch": 8.192630597014926, "grad_norm": 0.428680244247364, "learning_rate": 3.587738954213694e-05, "loss": 0.2101, "step": 17565 }, { "epoch": 8.194962686567164, "grad_norm": 0.3943038813903387, "learning_rate": 3.586933663200026e-05, "loss": 0.2127, "step": 17570 }, { "epoch": 8.197294776119403, "grad_norm": 0.44407542780865855, "learning_rate": 3.586128247742341e-05, "loss": 0.2148, "step": 17575 }, { "epoch": 8.199626865671641, "grad_norm": 0.40109854530435846, "learning_rate": 3.585322707960397e-05, "loss": 0.2117, "step": 17580 }, { "epoch": 8.20195895522388, "grad_norm": 0.40086989031896125, "learning_rate": 3.584517043973969e-05, "loss": 0.2164, "step": 17585 }, { "epoch": 8.20429104477612, "grad_norm": 0.39876130864137677, "learning_rate": 3.583711255902853e-05, "loss": 0.2094, "step": 17590 }, { "epoch": 8.206623134328359, "grad_norm": 0.4255817451811311, "learning_rate": 3.58290534386686e-05, "loss": 0.2143, "step": 17595 }, { "epoch": 8.208955223880597, "grad_norm": 0.3904496945668143, "learning_rate": 3.5820993079858235e-05, "loss": 0.2194, "step": 17600 }, { "epoch": 8.211287313432836, "grad_norm": 0.39526092317221234, "learning_rate": 3.581293148379592e-05, "loss": 0.2191, "step": 17605 }, { "epoch": 8.213619402985074, "grad_norm": 0.4112112915987695, "learning_rate": 3.580486865168034e-05, "loss": 0.2121, "step": 17610 }, { "epoch": 8.215951492537313, "grad_norm": 0.3890483825015122, "learning_rate": 3.579680458471037e-05, "loss": 0.2178, "step": 17615 }, { "epoch": 8.218283582089553, "grad_norm": 0.4460707505866275, "learning_rate": 3.5788739284085044e-05, "loss": 0.2147, "step": 17620 }, { "epoch": 8.220615671641792, "grad_norm": 0.39300285164336285, "learning_rate": 3.57806727510036e-05, "loss": 0.214, "step": 17625 }, { "epoch": 8.22294776119403, "grad_norm": 0.4235750266762855, "learning_rate": 3.577260498666546e-05, "loss": 0.2176, "step": 17630 }, { "epoch": 8.225279850746269, "grad_norm": 0.3885360046282675, "learning_rate": 3.5764535992270226e-05, "loss": 0.2159, "step": 17635 }, { "epoch": 8.227611940298507, "grad_norm": 0.42342689991891747, "learning_rate": 3.575646576901767e-05, "loss": 0.2173, "step": 17640 }, { "epoch": 8.229944029850746, "grad_norm": 0.396939842641254, "learning_rate": 3.5748394318107765e-05, "loss": 0.2151, "step": 17645 }, { "epoch": 8.232276119402986, "grad_norm": 0.3976543675163237, "learning_rate": 3.5740321640740646e-05, "loss": 0.2226, "step": 17650 }, { "epoch": 8.234608208955224, "grad_norm": 0.4204902997370208, "learning_rate": 3.573224773811665e-05, "loss": 0.2142, "step": 17655 }, { "epoch": 8.236940298507463, "grad_norm": 0.4224790534322908, "learning_rate": 3.57241726114363e-05, "loss": 0.2171, "step": 17660 }, { "epoch": 8.239272388059701, "grad_norm": 0.3968044602225004, "learning_rate": 3.5716096261900274e-05, "loss": 0.2154, "step": 17665 }, { "epoch": 8.24160447761194, "grad_norm": 0.40518747211042927, "learning_rate": 3.570801869070945e-05, "loss": 0.2085, "step": 17670 }, { "epoch": 8.243936567164178, "grad_norm": 0.41721247133811834, "learning_rate": 3.5699939899064894e-05, "loss": 0.2188, "step": 17675 }, { "epoch": 8.246268656716419, "grad_norm": 0.42055594096225246, "learning_rate": 3.5691859888167846e-05, "loss": 0.2171, "step": 17680 }, { "epoch": 8.248600746268657, "grad_norm": 0.4102644647627275, "learning_rate": 3.5683778659219714e-05, "loss": 0.2168, "step": 17685 }, { "epoch": 8.250932835820896, "grad_norm": 0.422333943451172, "learning_rate": 3.5675696213422105e-05, "loss": 0.2134, "step": 17690 }, { "epoch": 8.253264925373134, "grad_norm": 0.40709014182989134, "learning_rate": 3.56676125519768e-05, "loss": 0.2098, "step": 17695 }, { "epoch": 8.255597014925373, "grad_norm": 0.386166669469734, "learning_rate": 3.5659527676085774e-05, "loss": 0.2155, "step": 17700 }, { "epoch": 8.257929104477611, "grad_norm": 0.417613496140446, "learning_rate": 3.565144158695115e-05, "loss": 0.2204, "step": 17705 }, { "epoch": 8.260261194029852, "grad_norm": 0.3986918954588642, "learning_rate": 3.564335428577526e-05, "loss": 0.2149, "step": 17710 }, { "epoch": 8.26259328358209, "grad_norm": 0.3933692330157474, "learning_rate": 3.5635265773760625e-05, "loss": 0.2177, "step": 17715 }, { "epoch": 8.264925373134329, "grad_norm": 0.40345149074003106, "learning_rate": 3.56271760521099e-05, "loss": 0.2192, "step": 17720 }, { "epoch": 8.267257462686567, "grad_norm": 0.39759378731335293, "learning_rate": 3.561908512202597e-05, "loss": 0.2055, "step": 17725 }, { "epoch": 8.269589552238806, "grad_norm": 0.4182832602649684, "learning_rate": 3.561099298471187e-05, "loss": 0.2206, "step": 17730 }, { "epoch": 8.271921641791044, "grad_norm": 0.42742285145227277, "learning_rate": 3.560289964137083e-05, "loss": 0.2273, "step": 17735 }, { "epoch": 8.274253731343283, "grad_norm": 0.40708153709728956, "learning_rate": 3.559480509320625e-05, "loss": 0.219, "step": 17740 }, { "epoch": 8.276585820895523, "grad_norm": 0.4247578150468134, "learning_rate": 3.558670934142171e-05, "loss": 0.2169, "step": 17745 }, { "epoch": 8.278917910447761, "grad_norm": 0.381575842900316, "learning_rate": 3.557861238722097e-05, "loss": 0.2071, "step": 17750 }, { "epoch": 8.28125, "grad_norm": 0.4259620710528104, "learning_rate": 3.557051423180797e-05, "loss": 0.2177, "step": 17755 }, { "epoch": 8.283582089552239, "grad_norm": 0.43156012568956553, "learning_rate": 3.556241487638682e-05, "loss": 0.2217, "step": 17760 }, { "epoch": 8.285914179104477, "grad_norm": 0.40858219398978746, "learning_rate": 3.5554314322161844e-05, "loss": 0.217, "step": 17765 }, { "epoch": 8.288246268656717, "grad_norm": 0.4205288996358645, "learning_rate": 3.554621257033749e-05, "loss": 0.2177, "step": 17770 }, { "epoch": 8.290578358208956, "grad_norm": 0.3976169939474134, "learning_rate": 3.5538109622118414e-05, "loss": 0.2189, "step": 17775 }, { "epoch": 8.292910447761194, "grad_norm": 0.40730860301124666, "learning_rate": 3.5530005478709446e-05, "loss": 0.2266, "step": 17780 }, { "epoch": 8.295242537313433, "grad_norm": 0.3889102207519085, "learning_rate": 3.552190014131562e-05, "loss": 0.2168, "step": 17785 }, { "epoch": 8.297574626865671, "grad_norm": 0.4029483566141981, "learning_rate": 3.551379361114209e-05, "loss": 0.217, "step": 17790 }, { "epoch": 8.29990671641791, "grad_norm": 0.4131533814308135, "learning_rate": 3.550568588939423e-05, "loss": 0.2172, "step": 17795 }, { "epoch": 8.302238805970148, "grad_norm": 0.39897504511775184, "learning_rate": 3.549757697727759e-05, "loss": 0.2213, "step": 17800 }, { "epoch": 8.304570895522389, "grad_norm": 0.4188999798621589, "learning_rate": 3.5489466875997876e-05, "loss": 0.22, "step": 17805 }, { "epoch": 8.306902985074627, "grad_norm": 0.41018795000389036, "learning_rate": 3.548135558676098e-05, "loss": 0.2137, "step": 17810 }, { "epoch": 8.309235074626866, "grad_norm": 0.3968853815686748, "learning_rate": 3.547324311077299e-05, "loss": 0.2146, "step": 17815 }, { "epoch": 8.311567164179104, "grad_norm": 0.4108799621615323, "learning_rate": 3.546512944924014e-05, "loss": 0.2146, "step": 17820 }, { "epoch": 8.313899253731343, "grad_norm": 0.40142287338557875, "learning_rate": 3.5457014603368844e-05, "loss": 0.2133, "step": 17825 }, { "epoch": 8.316231343283581, "grad_norm": 0.414850972566944, "learning_rate": 3.544889857436573e-05, "loss": 0.2173, "step": 17830 }, { "epoch": 8.318563432835822, "grad_norm": 0.40961091798841387, "learning_rate": 3.544078136343755e-05, "loss": 0.2212, "step": 17835 }, { "epoch": 8.32089552238806, "grad_norm": 0.4061564238556182, "learning_rate": 3.5432662971791264e-05, "loss": 0.2184, "step": 17840 }, { "epoch": 8.323227611940299, "grad_norm": 0.4000079176893927, "learning_rate": 3.5424543400634e-05, "loss": 0.2208, "step": 17845 }, { "epoch": 8.325559701492537, "grad_norm": 0.401314211999886, "learning_rate": 3.541642265117306e-05, "loss": 0.2187, "step": 17850 }, { "epoch": 8.327891791044776, "grad_norm": 0.4024074146613495, "learning_rate": 3.5408300724615915e-05, "loss": 0.217, "step": 17855 }, { "epoch": 8.330223880597014, "grad_norm": 0.4072654822625798, "learning_rate": 3.540017762217023e-05, "loss": 0.2258, "step": 17860 }, { "epoch": 8.332555970149254, "grad_norm": 0.41931461538505066, "learning_rate": 3.539205334504382e-05, "loss": 0.2226, "step": 17865 }, { "epoch": 8.334888059701493, "grad_norm": 0.42100755681185303, "learning_rate": 3.5383927894444694e-05, "loss": 0.2213, "step": 17870 }, { "epoch": 8.337220149253731, "grad_norm": 0.40190812319038477, "learning_rate": 3.537580127158103e-05, "loss": 0.2189, "step": 17875 }, { "epoch": 8.33955223880597, "grad_norm": 2.6593010580589755, "learning_rate": 3.5367673477661174e-05, "loss": 0.2135, "step": 17880 }, { "epoch": 8.341884328358208, "grad_norm": 0.4259285393127884, "learning_rate": 3.5359544513893655e-05, "loss": 0.2216, "step": 17885 }, { "epoch": 8.344216417910447, "grad_norm": 0.41490464324990206, "learning_rate": 3.535141438148717e-05, "loss": 0.2205, "step": 17890 }, { "epoch": 8.346548507462687, "grad_norm": 0.3791543583877595, "learning_rate": 3.53432830816506e-05, "loss": 0.2219, "step": 17895 }, { "epoch": 8.348880597014926, "grad_norm": 0.4167826294378733, "learning_rate": 3.533515061559297e-05, "loss": 0.2201, "step": 17900 }, { "epoch": 8.351212686567164, "grad_norm": 0.4168356464321184, "learning_rate": 3.532701698452352e-05, "loss": 0.222, "step": 17905 }, { "epoch": 8.353544776119403, "grad_norm": 0.37981792031239436, "learning_rate": 3.5318882189651635e-05, "loss": 0.2206, "step": 17910 }, { "epoch": 8.355876865671641, "grad_norm": 0.392447299158986, "learning_rate": 3.531074623218689e-05, "loss": 0.2209, "step": 17915 }, { "epoch": 8.35820895522388, "grad_norm": 0.40751190332204273, "learning_rate": 3.5302609113339e-05, "loss": 0.216, "step": 17920 }, { "epoch": 8.36054104477612, "grad_norm": 0.4025108934328051, "learning_rate": 3.52944708343179e-05, "loss": 0.2212, "step": 17925 }, { "epoch": 8.362873134328359, "grad_norm": 0.37541472695413164, "learning_rate": 3.5286331396333675e-05, "loss": 0.2134, "step": 17930 }, { "epoch": 8.365205223880597, "grad_norm": 0.42613844186992966, "learning_rate": 3.527819080059657e-05, "loss": 0.222, "step": 17935 }, { "epoch": 8.367537313432836, "grad_norm": 0.4093197268715918, "learning_rate": 3.5270049048317016e-05, "loss": 0.2254, "step": 17940 }, { "epoch": 8.369869402985074, "grad_norm": 0.40315204323596715, "learning_rate": 3.5261906140705615e-05, "loss": 0.2173, "step": 17945 }, { "epoch": 8.372201492537313, "grad_norm": 0.4185616981860159, "learning_rate": 3.525376207897314e-05, "loss": 0.2192, "step": 17950 }, { "epoch": 8.374533582089553, "grad_norm": 0.41258517013005186, "learning_rate": 3.524561686433053e-05, "loss": 0.2222, "step": 17955 }, { "epoch": 8.376865671641792, "grad_norm": 0.41116225824865854, "learning_rate": 3.5237470497988905e-05, "loss": 0.2185, "step": 17960 }, { "epoch": 8.37919776119403, "grad_norm": 0.426160204577497, "learning_rate": 3.522932298115955e-05, "loss": 0.2212, "step": 17965 }, { "epoch": 8.381529850746269, "grad_norm": 0.41155270297109536, "learning_rate": 3.5221174315053935e-05, "loss": 0.2193, "step": 17970 }, { "epoch": 8.383861940298507, "grad_norm": 0.41744563604645135, "learning_rate": 3.5213024500883666e-05, "loss": 0.2256, "step": 17975 }, { "epoch": 8.386194029850746, "grad_norm": 0.3913684911114912, "learning_rate": 3.520487353986056e-05, "loss": 0.218, "step": 17980 }, { "epoch": 8.388526119402986, "grad_norm": 0.41633196276926504, "learning_rate": 3.5196721433196575e-05, "loss": 0.2186, "step": 17985 }, { "epoch": 8.390858208955224, "grad_norm": 0.43823485466735335, "learning_rate": 3.518856818210387e-05, "loss": 0.216, "step": 17990 }, { "epoch": 8.393190298507463, "grad_norm": 0.410715906367167, "learning_rate": 3.5180413787794724e-05, "loss": 0.2196, "step": 17995 }, { "epoch": 8.395522388059701, "grad_norm": 0.38453217898915526, "learning_rate": 3.517225825148164e-05, "loss": 0.2153, "step": 18000 }, { "epoch": 8.39785447761194, "grad_norm": 0.4115630879618281, "learning_rate": 3.516410157437727e-05, "loss": 0.223, "step": 18005 }, { "epoch": 8.400186567164178, "grad_norm": 0.39797727309947234, "learning_rate": 3.515594375769442e-05, "loss": 0.2224, "step": 18010 }, { "epoch": 8.402518656716419, "grad_norm": 0.4221138373561232, "learning_rate": 3.514778480264609e-05, "loss": 0.2163, "step": 18015 }, { "epoch": 8.404850746268657, "grad_norm": 0.4199944663522031, "learning_rate": 3.513962471044543e-05, "loss": 0.2256, "step": 18020 }, { "epoch": 8.407182835820896, "grad_norm": 0.4100447790190883, "learning_rate": 3.513146348230578e-05, "loss": 0.2184, "step": 18025 }, { "epoch": 8.409514925373134, "grad_norm": 0.3885716434856497, "learning_rate": 3.512330111944062e-05, "loss": 0.2099, "step": 18030 }, { "epoch": 8.411847014925373, "grad_norm": 0.39397417129508544, "learning_rate": 3.511513762306363e-05, "loss": 0.2225, "step": 18035 }, { "epoch": 8.414179104477611, "grad_norm": 0.40723015647177385, "learning_rate": 3.510697299438864e-05, "loss": 0.2197, "step": 18040 }, { "epoch": 8.416511194029852, "grad_norm": 0.4166942207068395, "learning_rate": 3.509880723462965e-05, "loss": 0.2267, "step": 18045 }, { "epoch": 8.41884328358209, "grad_norm": 0.3992672101924755, "learning_rate": 3.509064034500082e-05, "loss": 0.2163, "step": 18050 }, { "epoch": 8.421175373134329, "grad_norm": 0.4006164444275421, "learning_rate": 3.50824723267165e-05, "loss": 0.2288, "step": 18055 }, { "epoch": 8.423507462686567, "grad_norm": 0.49953825799645785, "learning_rate": 3.50743031809912e-05, "loss": 0.2171, "step": 18060 }, { "epoch": 8.425839552238806, "grad_norm": 0.4155153889368617, "learning_rate": 3.506613290903958e-05, "loss": 0.2288, "step": 18065 }, { "epoch": 8.428171641791044, "grad_norm": 0.4067872764716404, "learning_rate": 3.505796151207651e-05, "loss": 0.2248, "step": 18070 }, { "epoch": 8.430503731343283, "grad_norm": 0.3999291698840521, "learning_rate": 3.504978899131696e-05, "loss": 0.2305, "step": 18075 }, { "epoch": 8.432835820895523, "grad_norm": 0.42254726861038905, "learning_rate": 3.504161534797612e-05, "loss": 0.2161, "step": 18080 }, { "epoch": 8.435167910447761, "grad_norm": 0.3980579660566814, "learning_rate": 3.503344058326934e-05, "loss": 0.2151, "step": 18085 }, { "epoch": 8.4375, "grad_norm": 0.4171402528192449, "learning_rate": 3.5025264698412126e-05, "loss": 0.224, "step": 18090 }, { "epoch": 8.439832089552239, "grad_norm": 0.4095029446245325, "learning_rate": 3.5017087694620154e-05, "loss": 0.2265, "step": 18095 }, { "epoch": 8.442164179104477, "grad_norm": 0.4181183423534689, "learning_rate": 3.500890957310926e-05, "loss": 0.2311, "step": 18100 }, { "epoch": 8.444496268656717, "grad_norm": 0.39686120452419166, "learning_rate": 3.500073033509546e-05, "loss": 0.2241, "step": 18105 }, { "epoch": 8.446828358208956, "grad_norm": 0.4161440287998863, "learning_rate": 3.4992549981794915e-05, "loss": 0.2216, "step": 18110 }, { "epoch": 8.449160447761194, "grad_norm": 0.39092753176302986, "learning_rate": 3.498436851442398e-05, "loss": 0.2257, "step": 18115 }, { "epoch": 8.451492537313433, "grad_norm": 0.40186220967059433, "learning_rate": 3.497618593419916e-05, "loss": 0.2158, "step": 18120 }, { "epoch": 8.453824626865671, "grad_norm": 0.43616706309642883, "learning_rate": 3.496800224233713e-05, "loss": 0.2265, "step": 18125 }, { "epoch": 8.45615671641791, "grad_norm": 0.3942992552490923, "learning_rate": 3.495981744005471e-05, "loss": 0.22, "step": 18130 }, { "epoch": 8.458488805970148, "grad_norm": 0.39448682254633005, "learning_rate": 3.49516315285689e-05, "loss": 0.2177, "step": 18135 }, { "epoch": 8.460820895522389, "grad_norm": 0.4217001524132117, "learning_rate": 3.494344450909689e-05, "loss": 0.223, "step": 18140 }, { "epoch": 8.463152985074627, "grad_norm": 0.4105533168867623, "learning_rate": 3.4935256382855996e-05, "loss": 0.2224, "step": 18145 }, { "epoch": 8.465485074626866, "grad_norm": 0.43516440050214816, "learning_rate": 3.492706715106372e-05, "loss": 0.2256, "step": 18150 }, { "epoch": 8.467817164179104, "grad_norm": 0.3911329071644769, "learning_rate": 3.491887681493771e-05, "loss": 0.2164, "step": 18155 }, { "epoch": 8.470149253731343, "grad_norm": 0.40770627132023485, "learning_rate": 3.491068537569581e-05, "loss": 0.2231, "step": 18160 }, { "epoch": 8.472481343283581, "grad_norm": 0.42479345836597837, "learning_rate": 3.4902492834555994e-05, "loss": 0.2226, "step": 18165 }, { "epoch": 8.474813432835822, "grad_norm": 0.41512685105767866, "learning_rate": 3.489429919273642e-05, "loss": 0.2241, "step": 18170 }, { "epoch": 8.47714552238806, "grad_norm": 0.39128337904935434, "learning_rate": 3.488610445145539e-05, "loss": 0.2205, "step": 18175 }, { "epoch": 8.479477611940299, "grad_norm": 0.4027995728956263, "learning_rate": 3.4877908611931406e-05, "loss": 0.2254, "step": 18180 }, { "epoch": 8.481809701492537, "grad_norm": 0.4263220905236226, "learning_rate": 3.48697116753831e-05, "loss": 0.2335, "step": 18185 }, { "epoch": 8.484141791044776, "grad_norm": 0.41053649248298185, "learning_rate": 3.486151364302928e-05, "loss": 0.2246, "step": 18190 }, { "epoch": 8.486473880597014, "grad_norm": 0.442100385311907, "learning_rate": 3.485331451608891e-05, "loss": 0.2241, "step": 18195 }, { "epoch": 8.488805970149254, "grad_norm": 0.4037648145556154, "learning_rate": 3.484511429578113e-05, "loss": 0.2176, "step": 18200 }, { "epoch": 8.491138059701493, "grad_norm": 0.4173714613044079, "learning_rate": 3.483691298332522e-05, "loss": 0.2209, "step": 18205 }, { "epoch": 8.493470149253731, "grad_norm": 0.42507778076066716, "learning_rate": 3.482871057994065e-05, "loss": 0.2228, "step": 18210 }, { "epoch": 8.49580223880597, "grad_norm": 0.4160579057562592, "learning_rate": 3.4820507086847036e-05, "loss": 0.2321, "step": 18215 }, { "epoch": 8.498134328358208, "grad_norm": 0.41167574765875414, "learning_rate": 3.481230250526416e-05, "loss": 0.2282, "step": 18220 }, { "epoch": 8.500466417910447, "grad_norm": 0.4116592554065681, "learning_rate": 3.480409683641196e-05, "loss": 0.2313, "step": 18225 }, { "epoch": 8.502798507462687, "grad_norm": 0.4314817381949247, "learning_rate": 3.479589008151054e-05, "loss": 0.2192, "step": 18230 }, { "epoch": 8.505130597014926, "grad_norm": 0.4101947240500427, "learning_rate": 3.4787682241780164e-05, "loss": 0.2267, "step": 18235 }, { "epoch": 8.507462686567164, "grad_norm": 0.401037681134816, "learning_rate": 3.477947331844127e-05, "loss": 0.2316, "step": 18240 }, { "epoch": 8.509794776119403, "grad_norm": 0.41497520684396016, "learning_rate": 3.477126331271445e-05, "loss": 0.2261, "step": 18245 }, { "epoch": 8.512126865671641, "grad_norm": 0.4201295989647314, "learning_rate": 3.476305222582042e-05, "loss": 0.2214, "step": 18250 }, { "epoch": 8.51445895522388, "grad_norm": 0.42574591728180644, "learning_rate": 3.475484005898013e-05, "loss": 0.2211, "step": 18255 }, { "epoch": 8.51679104477612, "grad_norm": 0.4254231219898732, "learning_rate": 3.4746626813414624e-05, "loss": 0.2181, "step": 18260 }, { "epoch": 8.519123134328359, "grad_norm": 0.4122547535376834, "learning_rate": 3.473841249034514e-05, "loss": 0.2291, "step": 18265 }, { "epoch": 8.521455223880597, "grad_norm": 0.4154262097318366, "learning_rate": 3.4730197090993084e-05, "loss": 0.2289, "step": 18270 }, { "epoch": 8.523787313432836, "grad_norm": 0.41117533754032737, "learning_rate": 3.4721980616579984e-05, "loss": 0.223, "step": 18275 }, { "epoch": 8.526119402985074, "grad_norm": 0.4123464881977397, "learning_rate": 3.471376306832756e-05, "loss": 0.2328, "step": 18280 }, { "epoch": 8.528451492537313, "grad_norm": 0.4009298740898282, "learning_rate": 3.4705544447457686e-05, "loss": 0.2205, "step": 18285 }, { "epoch": 8.530783582089553, "grad_norm": 0.4005370579200843, "learning_rate": 3.4697324755192387e-05, "loss": 0.2271, "step": 18290 }, { "epoch": 8.533115671641792, "grad_norm": 0.4242145968254287, "learning_rate": 3.468910399275387e-05, "loss": 0.2328, "step": 18295 }, { "epoch": 8.53544776119403, "grad_norm": 0.39805956028925765, "learning_rate": 3.468088216136445e-05, "loss": 0.2273, "step": 18300 }, { "epoch": 8.537779850746269, "grad_norm": 0.4234042790957661, "learning_rate": 3.467265926224667e-05, "loss": 0.2281, "step": 18305 }, { "epoch": 8.540111940298507, "grad_norm": 0.4063629536462254, "learning_rate": 3.466443529662317e-05, "loss": 0.2251, "step": 18310 }, { "epoch": 8.542444029850746, "grad_norm": 0.39824466528456187, "learning_rate": 3.465621026571679e-05, "loss": 0.2202, "step": 18315 }, { "epoch": 8.544776119402986, "grad_norm": 0.41347827903018175, "learning_rate": 3.4647984170750506e-05, "loss": 0.2245, "step": 18320 }, { "epoch": 8.547108208955224, "grad_norm": 0.39888230091389304, "learning_rate": 3.463975701294747e-05, "loss": 0.2309, "step": 18325 }, { "epoch": 8.549440298507463, "grad_norm": 0.4054883184897346, "learning_rate": 3.463152879353097e-05, "loss": 0.2266, "step": 18330 }, { "epoch": 8.551772388059701, "grad_norm": 0.39721168001818585, "learning_rate": 3.462329951372446e-05, "loss": 0.2295, "step": 18335 }, { "epoch": 8.55410447761194, "grad_norm": 0.3990539352951013, "learning_rate": 3.4615069174751566e-05, "loss": 0.2244, "step": 18340 }, { "epoch": 8.556436567164178, "grad_norm": 0.39640614324025303, "learning_rate": 3.460683777783605e-05, "loss": 0.2327, "step": 18345 }, { "epoch": 8.558768656716419, "grad_norm": 0.42368183443351976, "learning_rate": 3.459860532420186e-05, "loss": 0.2318, "step": 18350 }, { "epoch": 8.561100746268657, "grad_norm": 0.40347291800745405, "learning_rate": 3.459037181507307e-05, "loss": 0.2251, "step": 18355 }, { "epoch": 8.563432835820896, "grad_norm": 0.4141131789889208, "learning_rate": 3.4582137251673916e-05, "loss": 0.2268, "step": 18360 }, { "epoch": 8.565764925373134, "grad_norm": 0.4116441754880187, "learning_rate": 3.4573901635228815e-05, "loss": 0.2257, "step": 18365 }, { "epoch": 8.568097014925373, "grad_norm": 0.38419432520103525, "learning_rate": 3.456566496696232e-05, "loss": 0.2182, "step": 18370 }, { "epoch": 8.570429104477611, "grad_norm": 0.4235548476687123, "learning_rate": 3.455742724809914e-05, "loss": 0.2256, "step": 18375 }, { "epoch": 8.572761194029852, "grad_norm": 0.4212871696545927, "learning_rate": 3.454918847986414e-05, "loss": 0.2296, "step": 18380 }, { "epoch": 8.57509328358209, "grad_norm": 0.38618316614030607, "learning_rate": 3.4540948663482356e-05, "loss": 0.223, "step": 18385 }, { "epoch": 8.577425373134329, "grad_norm": 0.3999772112321403, "learning_rate": 3.453270780017897e-05, "loss": 0.2312, "step": 18390 }, { "epoch": 8.579757462686567, "grad_norm": 0.39124826771942195, "learning_rate": 3.452446589117932e-05, "loss": 0.2241, "step": 18395 }, { "epoch": 8.582089552238806, "grad_norm": 0.42088116800825776, "learning_rate": 3.451622293770889e-05, "loss": 0.2278, "step": 18400 }, { "epoch": 8.584421641791044, "grad_norm": 0.41796742443178864, "learning_rate": 3.450797894099332e-05, "loss": 0.2223, "step": 18405 }, { "epoch": 8.586753731343283, "grad_norm": 0.42076183251199745, "learning_rate": 3.4499733902258446e-05, "loss": 0.2275, "step": 18410 }, { "epoch": 8.589085820895523, "grad_norm": 0.39407853564052925, "learning_rate": 3.4491487822730194e-05, "loss": 0.2219, "step": 18415 }, { "epoch": 8.591417910447761, "grad_norm": 0.4171464734369688, "learning_rate": 3.448324070363469e-05, "loss": 0.2263, "step": 18420 }, { "epoch": 8.59375, "grad_norm": 0.4143846023799813, "learning_rate": 3.447499254619821e-05, "loss": 0.2342, "step": 18425 }, { "epoch": 8.596082089552239, "grad_norm": 0.4235674689395619, "learning_rate": 3.446674335164716e-05, "loss": 0.2274, "step": 18430 }, { "epoch": 8.598414179104477, "grad_norm": 0.40217336048567415, "learning_rate": 3.445849312120813e-05, "loss": 0.2229, "step": 18435 }, { "epoch": 8.600746268656717, "grad_norm": 0.3887877047418478, "learning_rate": 3.445024185610783e-05, "loss": 0.2286, "step": 18440 }, { "epoch": 8.603078358208956, "grad_norm": 0.4136450183931288, "learning_rate": 3.444198955757316e-05, "loss": 0.2261, "step": 18445 }, { "epoch": 8.605410447761194, "grad_norm": 0.4348843808398468, "learning_rate": 3.443373622683116e-05, "loss": 0.23, "step": 18450 }, { "epoch": 8.607742537313433, "grad_norm": 0.4029048116474647, "learning_rate": 3.442548186510902e-05, "loss": 0.2304, "step": 18455 }, { "epoch": 8.610074626865671, "grad_norm": 0.412777984347724, "learning_rate": 3.441722647363408e-05, "loss": 0.2297, "step": 18460 }, { "epoch": 8.61240671641791, "grad_norm": 0.4138240253076594, "learning_rate": 3.4408970053633835e-05, "loss": 0.2228, "step": 18465 }, { "epoch": 8.614738805970148, "grad_norm": 0.4144314506065973, "learning_rate": 3.440071260633594e-05, "loss": 0.2313, "step": 18470 }, { "epoch": 8.617070895522389, "grad_norm": 0.38526906323801324, "learning_rate": 3.43924541329682e-05, "loss": 0.2276, "step": 18475 }, { "epoch": 8.619402985074627, "grad_norm": 0.3960665981575153, "learning_rate": 3.438419463475857e-05, "loss": 0.2202, "step": 18480 }, { "epoch": 8.621735074626866, "grad_norm": 0.4128587776498482, "learning_rate": 3.437593411293516e-05, "loss": 0.2274, "step": 18485 }, { "epoch": 8.624067164179104, "grad_norm": 0.41691404552760586, "learning_rate": 3.436767256872621e-05, "loss": 0.2296, "step": 18490 }, { "epoch": 8.626399253731343, "grad_norm": 0.39381572241406665, "learning_rate": 3.435941000336016e-05, "loss": 0.2239, "step": 18495 }, { "epoch": 8.628731343283581, "grad_norm": 0.41538881512455034, "learning_rate": 3.435114641806557e-05, "loss": 0.2294, "step": 18500 }, { "epoch": 8.631063432835822, "grad_norm": 0.4023197461593131, "learning_rate": 3.434288181407114e-05, "loss": 0.2252, "step": 18505 }, { "epoch": 8.63339552238806, "grad_norm": 0.4220720170034195, "learning_rate": 3.433461619260575e-05, "loss": 0.2316, "step": 18510 }, { "epoch": 8.635727611940299, "grad_norm": 0.4098063182343183, "learning_rate": 3.432634955489841e-05, "loss": 0.219, "step": 18515 }, { "epoch": 8.638059701492537, "grad_norm": 0.40633592661710644, "learning_rate": 3.43180819021783e-05, "loss": 0.2249, "step": 18520 }, { "epoch": 8.640391791044776, "grad_norm": 0.4356351405813253, "learning_rate": 3.430981323567475e-05, "loss": 0.231, "step": 18525 }, { "epoch": 8.642723880597014, "grad_norm": 0.4311314046860303, "learning_rate": 3.4301543556617206e-05, "loss": 0.2301, "step": 18530 }, { "epoch": 8.645055970149254, "grad_norm": 0.41649589613721555, "learning_rate": 3.42932728662353e-05, "loss": 0.2288, "step": 18535 }, { "epoch": 8.647388059701493, "grad_norm": 0.40957172634581457, "learning_rate": 3.428500116575881e-05, "loss": 0.2241, "step": 18540 }, { "epoch": 8.649720149253731, "grad_norm": 0.4330942414883423, "learning_rate": 3.427672845641765e-05, "loss": 0.2326, "step": 18545 }, { "epoch": 8.65205223880597, "grad_norm": 0.4062091073004812, "learning_rate": 3.42684547394419e-05, "loss": 0.2246, "step": 18550 }, { "epoch": 8.654384328358208, "grad_norm": 0.39778615270643747, "learning_rate": 3.4260180016061784e-05, "loss": 0.2224, "step": 18555 }, { "epoch": 8.656716417910447, "grad_norm": 0.39576927369619064, "learning_rate": 3.425190428750767e-05, "loss": 0.2228, "step": 18560 }, { "epoch": 8.659048507462687, "grad_norm": 0.40202649178750227, "learning_rate": 3.424362755501007e-05, "loss": 0.2244, "step": 18565 }, { "epoch": 8.661380597014926, "grad_norm": 0.40256854805741676, "learning_rate": 3.423534981979968e-05, "loss": 0.2312, "step": 18570 }, { "epoch": 8.663712686567164, "grad_norm": 0.38872730142675715, "learning_rate": 3.422707108310729e-05, "loss": 0.2207, "step": 18575 }, { "epoch": 8.666044776119403, "grad_norm": 0.4004948849282724, "learning_rate": 3.4218791346163894e-05, "loss": 0.2247, "step": 18580 }, { "epoch": 8.668376865671641, "grad_norm": 0.39544904893994465, "learning_rate": 3.421051061020059e-05, "loss": 0.2263, "step": 18585 }, { "epoch": 8.67070895522388, "grad_norm": 0.40593265087318275, "learning_rate": 3.420222887644866e-05, "loss": 0.2248, "step": 18590 }, { "epoch": 8.67304104477612, "grad_norm": 0.4271610047393061, "learning_rate": 3.419394614613951e-05, "loss": 0.2339, "step": 18595 }, { "epoch": 8.675373134328359, "grad_norm": 0.4167124148402504, "learning_rate": 3.41856624205047e-05, "loss": 0.2348, "step": 18600 }, { "epoch": 8.677705223880597, "grad_norm": 0.3944857178460424, "learning_rate": 3.417737770077595e-05, "loss": 0.2282, "step": 18605 }, { "epoch": 8.680037313432836, "grad_norm": 0.40788633174978367, "learning_rate": 3.4169091988185106e-05, "loss": 0.2206, "step": 18610 }, { "epoch": 8.682369402985074, "grad_norm": 0.42108935201726444, "learning_rate": 3.4160805283964184e-05, "loss": 0.2264, "step": 18615 }, { "epoch": 8.684701492537313, "grad_norm": 0.41008641750888736, "learning_rate": 3.415251758934534e-05, "loss": 0.2204, "step": 18620 }, { "epoch": 8.687033582089553, "grad_norm": 0.4149893026781698, "learning_rate": 3.414422890556087e-05, "loss": 0.2277, "step": 18625 }, { "epoch": 8.689365671641792, "grad_norm": 0.41248835867571787, "learning_rate": 3.413593923384321e-05, "loss": 0.2259, "step": 18630 }, { "epoch": 8.69169776119403, "grad_norm": 0.41473781982603597, "learning_rate": 3.4127648575424975e-05, "loss": 0.2287, "step": 18635 }, { "epoch": 8.694029850746269, "grad_norm": 0.4546020267019803, "learning_rate": 3.4119356931538894e-05, "loss": 0.2315, "step": 18640 }, { "epoch": 8.696361940298507, "grad_norm": 0.39881903509863176, "learning_rate": 3.411106430341786e-05, "loss": 0.2237, "step": 18645 }, { "epoch": 8.698694029850746, "grad_norm": 0.4041639349082663, "learning_rate": 3.410277069229491e-05, "loss": 0.2275, "step": 18650 }, { "epoch": 8.701026119402986, "grad_norm": 0.4007276428641424, "learning_rate": 3.409447609940322e-05, "loss": 0.2326, "step": 18655 }, { "epoch": 8.703358208955224, "grad_norm": 0.39048012194058357, "learning_rate": 3.408618052597611e-05, "loss": 0.228, "step": 18660 }, { "epoch": 8.705690298507463, "grad_norm": 0.39953911024074956, "learning_rate": 3.407788397324706e-05, "loss": 0.2255, "step": 18665 }, { "epoch": 8.708022388059701, "grad_norm": 0.38555392208795686, "learning_rate": 3.4069586442449684e-05, "loss": 0.2172, "step": 18670 }, { "epoch": 8.71035447761194, "grad_norm": 0.4103091814961729, "learning_rate": 3.406128793481776e-05, "loss": 0.2192, "step": 18675 }, { "epoch": 8.712686567164178, "grad_norm": 0.4039948465524476, "learning_rate": 3.405298845158518e-05, "loss": 0.2239, "step": 18680 }, { "epoch": 8.715018656716419, "grad_norm": 0.38788777619397663, "learning_rate": 3.4044687993985995e-05, "loss": 0.2184, "step": 18685 }, { "epoch": 8.717350746268657, "grad_norm": 0.409690014684569, "learning_rate": 3.403638656325442e-05, "loss": 0.232, "step": 18690 }, { "epoch": 8.719682835820896, "grad_norm": 0.3996863706806077, "learning_rate": 3.402808416062479e-05, "loss": 0.2292, "step": 18695 }, { "epoch": 8.722014925373134, "grad_norm": 0.41358231588439176, "learning_rate": 3.4019780787331586e-05, "loss": 0.2331, "step": 18700 }, { "epoch": 8.724347014925373, "grad_norm": 0.4091175566259438, "learning_rate": 3.4011476444609456e-05, "loss": 0.2262, "step": 18705 }, { "epoch": 8.726679104477611, "grad_norm": 0.3996970627919778, "learning_rate": 3.4003171133693154e-05, "loss": 0.2305, "step": 18710 }, { "epoch": 8.729011194029852, "grad_norm": 0.408157098981832, "learning_rate": 3.399486485581762e-05, "loss": 0.2262, "step": 18715 }, { "epoch": 8.73134328358209, "grad_norm": 0.398232054685269, "learning_rate": 3.3986557612217904e-05, "loss": 0.2281, "step": 18720 }, { "epoch": 8.733675373134329, "grad_norm": 0.389830439702745, "learning_rate": 3.3978249404129224e-05, "loss": 0.2317, "step": 18725 }, { "epoch": 8.736007462686567, "grad_norm": 0.39038938191475236, "learning_rate": 3.396994023278693e-05, "loss": 0.2239, "step": 18730 }, { "epoch": 8.738339552238806, "grad_norm": 0.4098175756828459, "learning_rate": 3.396163009942651e-05, "loss": 0.2314, "step": 18735 }, { "epoch": 8.740671641791044, "grad_norm": 0.4061404498669277, "learning_rate": 3.3953319005283606e-05, "loss": 0.2268, "step": 18740 }, { "epoch": 8.743003731343283, "grad_norm": 0.41496156001472884, "learning_rate": 3.3945006951594e-05, "loss": 0.2294, "step": 18745 }, { "epoch": 8.745335820895523, "grad_norm": 0.418768294260026, "learning_rate": 3.393669393959361e-05, "loss": 0.2304, "step": 18750 }, { "epoch": 8.747667910447761, "grad_norm": 0.4018090152238327, "learning_rate": 3.39283799705185e-05, "loss": 0.2284, "step": 18755 }, { "epoch": 8.75, "grad_norm": 0.40432593129645406, "learning_rate": 3.392006504560487e-05, "loss": 0.2283, "step": 18760 }, { "epoch": 8.752332089552239, "grad_norm": 0.3985484357364707, "learning_rate": 3.391174916608909e-05, "loss": 0.23, "step": 18765 }, { "epoch": 8.754664179104477, "grad_norm": 0.4051401131537755, "learning_rate": 3.390343233320764e-05, "loss": 0.2263, "step": 18770 }, { "epoch": 8.756996268656717, "grad_norm": 0.40483470349187306, "learning_rate": 3.3895114548197154e-05, "loss": 0.2322, "step": 18775 }, { "epoch": 8.759328358208956, "grad_norm": 0.4036105755307256, "learning_rate": 3.388679581229441e-05, "loss": 0.2276, "step": 18780 }, { "epoch": 8.761660447761194, "grad_norm": 0.38388368667632505, "learning_rate": 3.3878476126736314e-05, "loss": 0.2245, "step": 18785 }, { "epoch": 8.763992537313433, "grad_norm": 0.417892305901417, "learning_rate": 3.3870155492759936e-05, "loss": 0.2274, "step": 18790 }, { "epoch": 8.766324626865671, "grad_norm": 0.4019629632105521, "learning_rate": 3.386183391160248e-05, "loss": 0.2287, "step": 18795 }, { "epoch": 8.76865671641791, "grad_norm": 0.42030224640621144, "learning_rate": 3.3853511384501256e-05, "loss": 0.2294, "step": 18800 }, { "epoch": 8.770988805970148, "grad_norm": 0.40960452287176846, "learning_rate": 3.384518791269378e-05, "loss": 0.2273, "step": 18805 }, { "epoch": 8.773320895522389, "grad_norm": 0.4309298718250295, "learning_rate": 3.3836863497417645e-05, "loss": 0.2305, "step": 18810 }, { "epoch": 8.775652985074627, "grad_norm": 0.3860851010907989, "learning_rate": 3.3828538139910626e-05, "loss": 0.2268, "step": 18815 }, { "epoch": 8.777985074626866, "grad_norm": 0.3975148425617919, "learning_rate": 3.382021184141062e-05, "loss": 0.2268, "step": 18820 }, { "epoch": 8.780317164179104, "grad_norm": 0.41533881275157014, "learning_rate": 3.3811884603155665e-05, "loss": 0.2352, "step": 18825 }, { "epoch": 8.782649253731343, "grad_norm": 0.4132389196484029, "learning_rate": 3.3803556426383954e-05, "loss": 0.228, "step": 18830 }, { "epoch": 8.784981343283581, "grad_norm": 0.4051913235053636, "learning_rate": 3.379522731233379e-05, "loss": 0.2317, "step": 18835 }, { "epoch": 8.787313432835822, "grad_norm": 0.41186851159148036, "learning_rate": 3.378689726224364e-05, "loss": 0.2324, "step": 18840 }, { "epoch": 8.78964552238806, "grad_norm": 0.39039305966301147, "learning_rate": 3.377856627735211e-05, "loss": 0.2267, "step": 18845 }, { "epoch": 8.791977611940299, "grad_norm": 0.4091450771317349, "learning_rate": 3.3770234358897926e-05, "loss": 0.2251, "step": 18850 }, { "epoch": 8.794309701492537, "grad_norm": 0.410679849245423, "learning_rate": 3.376190150811997e-05, "loss": 0.2296, "step": 18855 }, { "epoch": 8.796641791044776, "grad_norm": 0.40723516746812377, "learning_rate": 3.3753567726257255e-05, "loss": 0.228, "step": 18860 }, { "epoch": 8.798973880597014, "grad_norm": 0.41239628225854397, "learning_rate": 3.3745233014548936e-05, "loss": 0.2341, "step": 18865 }, { "epoch": 8.801305970149254, "grad_norm": 0.40642256995739434, "learning_rate": 3.373689737423431e-05, "loss": 0.2332, "step": 18870 }, { "epoch": 8.803638059701493, "grad_norm": 0.402374484409927, "learning_rate": 3.372856080655279e-05, "loss": 0.2337, "step": 18875 }, { "epoch": 8.805970149253731, "grad_norm": 0.4188503653979963, "learning_rate": 3.372022331274397e-05, "loss": 0.2354, "step": 18880 }, { "epoch": 8.80830223880597, "grad_norm": 0.39538673577713834, "learning_rate": 3.3711884894047526e-05, "loss": 0.2293, "step": 18885 }, { "epoch": 8.810634328358208, "grad_norm": 0.416363070393141, "learning_rate": 3.3703545551703326e-05, "loss": 0.2369, "step": 18890 }, { "epoch": 8.812966417910447, "grad_norm": 0.39869907861051995, "learning_rate": 3.369520528695133e-05, "loss": 0.2261, "step": 18895 }, { "epoch": 8.815298507462687, "grad_norm": 0.4013582636127732, "learning_rate": 3.368686410103167e-05, "loss": 0.2272, "step": 18900 }, { "epoch": 8.817630597014926, "grad_norm": 0.41348950180778477, "learning_rate": 3.36785219951846e-05, "loss": 0.2351, "step": 18905 }, { "epoch": 8.819962686567164, "grad_norm": 0.4333988089431769, "learning_rate": 3.367017897065051e-05, "loss": 0.2405, "step": 18910 }, { "epoch": 8.822294776119403, "grad_norm": 0.39761933686927525, "learning_rate": 3.366183502866991e-05, "loss": 0.2294, "step": 18915 }, { "epoch": 8.824626865671641, "grad_norm": 0.3867852337682989, "learning_rate": 3.3653490170483485e-05, "loss": 0.2256, "step": 18920 }, { "epoch": 8.82695895522388, "grad_norm": 0.4286255125830081, "learning_rate": 3.364514439733203e-05, "loss": 0.2318, "step": 18925 }, { "epoch": 8.82929104477612, "grad_norm": 0.40459772722540516, "learning_rate": 3.363679771045648e-05, "loss": 0.2303, "step": 18930 }, { "epoch": 8.831623134328359, "grad_norm": 0.40405182356985986, "learning_rate": 3.3628450111097914e-05, "loss": 0.232, "step": 18935 }, { "epoch": 8.833955223880597, "grad_norm": 0.41184187460960237, "learning_rate": 3.3620101600497526e-05, "loss": 0.2335, "step": 18940 }, { "epoch": 8.836287313432836, "grad_norm": 0.4101994720895286, "learning_rate": 3.361175217989668e-05, "loss": 0.2342, "step": 18945 }, { "epoch": 8.838619402985074, "grad_norm": 0.4062812201912889, "learning_rate": 3.360340185053683e-05, "loss": 0.2299, "step": 18950 }, { "epoch": 8.840951492537313, "grad_norm": 0.4020479615772756, "learning_rate": 3.3595050613659605e-05, "loss": 0.2302, "step": 18955 }, { "epoch": 8.843283582089553, "grad_norm": 0.42006751792120806, "learning_rate": 3.358669847050676e-05, "loss": 0.2331, "step": 18960 }, { "epoch": 8.845615671641792, "grad_norm": 0.39648350972865615, "learning_rate": 3.3578345422320165e-05, "loss": 0.2268, "step": 18965 }, { "epoch": 8.84794776119403, "grad_norm": 0.38810563814157667, "learning_rate": 3.356999147034184e-05, "loss": 0.2295, "step": 18970 }, { "epoch": 8.850279850746269, "grad_norm": 0.4000419266086575, "learning_rate": 3.3561636615813945e-05, "loss": 0.2305, "step": 18975 }, { "epoch": 8.852611940298507, "grad_norm": 0.4463429398893989, "learning_rate": 3.355328085997876e-05, "loss": 0.2345, "step": 18980 }, { "epoch": 8.854944029850746, "grad_norm": 0.37968983366925313, "learning_rate": 3.3544924204078715e-05, "loss": 0.2309, "step": 18985 }, { "epoch": 8.857276119402986, "grad_norm": 0.40626646618997886, "learning_rate": 3.3536566649356356e-05, "loss": 0.2306, "step": 18990 }, { "epoch": 8.859608208955224, "grad_norm": 0.3996815979534987, "learning_rate": 3.352820819705437e-05, "loss": 0.2315, "step": 18995 }, { "epoch": 8.861940298507463, "grad_norm": 0.3997803733911581, "learning_rate": 3.351984884841558e-05, "loss": 0.2318, "step": 19000 }, { "epoch": 8.864272388059701, "grad_norm": 0.42370187076958044, "learning_rate": 3.351148860468295e-05, "loss": 0.2261, "step": 19005 }, { "epoch": 8.86660447761194, "grad_norm": 0.37631187094728447, "learning_rate": 3.350312746709956e-05, "loss": 0.2293, "step": 19010 }, { "epoch": 8.868936567164178, "grad_norm": 0.4058830856147769, "learning_rate": 3.3494765436908635e-05, "loss": 0.2255, "step": 19015 }, { "epoch": 8.871268656716419, "grad_norm": 0.3999102583295986, "learning_rate": 3.348640251535352e-05, "loss": 0.2296, "step": 19020 }, { "epoch": 8.873600746268657, "grad_norm": 0.3950141595202111, "learning_rate": 3.3478038703677714e-05, "loss": 0.2395, "step": 19025 }, { "epoch": 8.875932835820896, "grad_norm": 0.41395117906326745, "learning_rate": 3.346967400312482e-05, "loss": 0.2307, "step": 19030 }, { "epoch": 8.878264925373134, "grad_norm": 0.3973722530693121, "learning_rate": 3.346130841493861e-05, "loss": 0.2312, "step": 19035 }, { "epoch": 8.880597014925373, "grad_norm": 0.3924918803790972, "learning_rate": 3.3452941940362946e-05, "loss": 0.2307, "step": 19040 }, { "epoch": 8.882929104477611, "grad_norm": 0.4345122105064804, "learning_rate": 3.3444574580641855e-05, "loss": 0.2292, "step": 19045 }, { "epoch": 8.885261194029852, "grad_norm": 0.4201750348068134, "learning_rate": 3.343620633701948e-05, "loss": 0.2337, "step": 19050 }, { "epoch": 8.88759328358209, "grad_norm": 0.3919934480352503, "learning_rate": 3.3427837210740096e-05, "loss": 0.2266, "step": 19055 }, { "epoch": 8.889925373134329, "grad_norm": 0.39465420363599696, "learning_rate": 3.341946720304812e-05, "loss": 0.2318, "step": 19060 }, { "epoch": 8.892257462686567, "grad_norm": 0.432459478345014, "learning_rate": 3.341109631518809e-05, "loss": 0.2311, "step": 19065 }, { "epoch": 8.894589552238806, "grad_norm": 0.3978360704886682, "learning_rate": 3.340272454840466e-05, "loss": 0.233, "step": 19070 }, { "epoch": 8.896921641791044, "grad_norm": 0.4104199500028601, "learning_rate": 3.339435190394266e-05, "loss": 0.234, "step": 19075 }, { "epoch": 8.899253731343283, "grad_norm": 0.4064687928750594, "learning_rate": 3.3385978383046996e-05, "loss": 0.2278, "step": 19080 }, { "epoch": 8.901585820895523, "grad_norm": 0.40523747152325973, "learning_rate": 3.337760398696275e-05, "loss": 0.2266, "step": 19085 }, { "epoch": 8.903917910447761, "grad_norm": 0.4248628125095579, "learning_rate": 3.336922871693509e-05, "loss": 0.2376, "step": 19090 }, { "epoch": 8.90625, "grad_norm": 0.4320616521253333, "learning_rate": 3.3360852574209364e-05, "loss": 0.2429, "step": 19095 }, { "epoch": 8.908582089552239, "grad_norm": 0.40406079569934694, "learning_rate": 3.335247556003101e-05, "loss": 0.2316, "step": 19100 }, { "epoch": 8.910914179104477, "grad_norm": 0.4233233910265626, "learning_rate": 3.334409767564562e-05, "loss": 0.2278, "step": 19105 }, { "epoch": 8.913246268656717, "grad_norm": 0.39791787282054164, "learning_rate": 3.33357189222989e-05, "loss": 0.2331, "step": 19110 }, { "epoch": 8.915578358208956, "grad_norm": 0.4074481612831819, "learning_rate": 3.3327339301236685e-05, "loss": 0.2374, "step": 19115 }, { "epoch": 8.917910447761194, "grad_norm": 0.4041397614142612, "learning_rate": 3.331895881370495e-05, "loss": 0.2344, "step": 19120 }, { "epoch": 8.920242537313433, "grad_norm": 0.3950399474036515, "learning_rate": 3.33105774609498e-05, "loss": 0.2332, "step": 19125 }, { "epoch": 8.922574626865671, "grad_norm": 0.38633239946931813, "learning_rate": 3.3302195244217435e-05, "loss": 0.2329, "step": 19130 }, { "epoch": 8.92490671641791, "grad_norm": 0.3885085106745547, "learning_rate": 3.329381216475424e-05, "loss": 0.2304, "step": 19135 }, { "epoch": 8.927238805970148, "grad_norm": 0.39904449866159153, "learning_rate": 3.32854282238067e-05, "loss": 0.233, "step": 19140 }, { "epoch": 8.929570895522389, "grad_norm": 0.4064307057966676, "learning_rate": 3.327704342262139e-05, "loss": 0.2334, "step": 19145 }, { "epoch": 8.931902985074627, "grad_norm": 0.4066779753319839, "learning_rate": 3.326865776244509e-05, "loss": 0.2335, "step": 19150 }, { "epoch": 8.934235074626866, "grad_norm": 0.3988165335664172, "learning_rate": 3.326027124452464e-05, "loss": 0.2313, "step": 19155 }, { "epoch": 8.936567164179104, "grad_norm": 0.3986797407048292, "learning_rate": 3.3251883870107066e-05, "loss": 0.2371, "step": 19160 }, { "epoch": 8.938899253731343, "grad_norm": 0.39582322288799504, "learning_rate": 3.324349564043945e-05, "loss": 0.2319, "step": 19165 }, { "epoch": 8.941231343283581, "grad_norm": 0.3882380137906559, "learning_rate": 3.323510655676906e-05, "loss": 0.236, "step": 19170 }, { "epoch": 8.943563432835822, "grad_norm": 0.43979264965437437, "learning_rate": 3.322671662034329e-05, "loss": 0.2342, "step": 19175 }, { "epoch": 8.94589552238806, "grad_norm": 0.38432710472167814, "learning_rate": 3.3218325832409616e-05, "loss": 0.238, "step": 19180 }, { "epoch": 8.948227611940299, "grad_norm": 0.4277647675969534, "learning_rate": 3.3209934194215685e-05, "loss": 0.2369, "step": 19185 }, { "epoch": 8.950559701492537, "grad_norm": 0.4009937097127753, "learning_rate": 3.320154170700925e-05, "loss": 0.2324, "step": 19190 }, { "epoch": 8.952891791044776, "grad_norm": 0.3891076852961445, "learning_rate": 3.3193148372038186e-05, "loss": 0.236, "step": 19195 }, { "epoch": 8.955223880597014, "grad_norm": 0.4176506904014141, "learning_rate": 3.3184754190550506e-05, "loss": 0.2327, "step": 19200 }, { "epoch": 8.957555970149254, "grad_norm": 0.42081221058661683, "learning_rate": 3.317635916379434e-05, "loss": 0.2328, "step": 19205 }, { "epoch": 8.959888059701493, "grad_norm": 0.3847701234823632, "learning_rate": 3.316796329301796e-05, "loss": 0.2283, "step": 19210 }, { "epoch": 8.962220149253731, "grad_norm": 0.4114930693762871, "learning_rate": 3.315956657946975e-05, "loss": 0.2353, "step": 19215 }, { "epoch": 8.96455223880597, "grad_norm": 0.40856772494210936, "learning_rate": 3.31511690243982e-05, "loss": 0.234, "step": 19220 }, { "epoch": 8.966884328358208, "grad_norm": 0.4168523871820654, "learning_rate": 3.314277062905197e-05, "loss": 0.2385, "step": 19225 }, { "epoch": 8.969216417910447, "grad_norm": 0.4071551660790745, "learning_rate": 3.3134371394679806e-05, "loss": 0.2339, "step": 19230 }, { "epoch": 8.971548507462687, "grad_norm": 0.3959954771355168, "learning_rate": 3.3125971322530605e-05, "loss": 0.2317, "step": 19235 }, { "epoch": 8.973880597014926, "grad_norm": 0.40373914611778877, "learning_rate": 3.3117570413853373e-05, "loss": 0.2328, "step": 19240 }, { "epoch": 8.976212686567164, "grad_norm": 0.3971005295854398, "learning_rate": 3.3109168669897234e-05, "loss": 0.2339, "step": 19245 }, { "epoch": 8.978544776119403, "grad_norm": 0.4133579044103397, "learning_rate": 3.3100766091911464e-05, "loss": 0.2336, "step": 19250 }, { "epoch": 8.980876865671641, "grad_norm": 0.41516545864296334, "learning_rate": 3.309236268114543e-05, "loss": 0.2287, "step": 19255 }, { "epoch": 8.98320895522388, "grad_norm": 0.4220940702155678, "learning_rate": 3.308395843884866e-05, "loss": 0.2388, "step": 19260 }, { "epoch": 8.98554104477612, "grad_norm": 0.43469391384846806, "learning_rate": 3.3075553366270765e-05, "loss": 0.234, "step": 19265 }, { "epoch": 8.987873134328359, "grad_norm": 0.4098766979809459, "learning_rate": 3.30671474646615e-05, "loss": 0.2369, "step": 19270 }, { "epoch": 8.990205223880597, "grad_norm": 0.4165792885908616, "learning_rate": 3.305874073527076e-05, "loss": 0.2323, "step": 19275 }, { "epoch": 8.992537313432836, "grad_norm": 0.38390438813921096, "learning_rate": 3.305033317934852e-05, "loss": 0.2349, "step": 19280 }, { "epoch": 8.994869402985074, "grad_norm": 0.4075140993580146, "learning_rate": 3.304192479814492e-05, "loss": 0.2342, "step": 19285 }, { "epoch": 8.997201492537313, "grad_norm": 0.4021361321532038, "learning_rate": 3.30335155929102e-05, "loss": 0.2303, "step": 19290 }, { "epoch": 8.999533582089553, "grad_norm": 0.38711774690316175, "learning_rate": 3.3025105564894726e-05, "loss": 0.2308, "step": 19295 }, { "epoch": 9.001865671641792, "grad_norm": 0.40450492143395494, "learning_rate": 3.301669471534899e-05, "loss": 0.1815, "step": 19300 }, { "epoch": 9.00419776119403, "grad_norm": 0.45150852681803166, "learning_rate": 3.300828304552362e-05, "loss": 0.1606, "step": 19305 }, { "epoch": 9.006529850746269, "grad_norm": 0.4104820271521411, "learning_rate": 3.299987055666932e-05, "loss": 0.1622, "step": 19310 }, { "epoch": 9.008861940298507, "grad_norm": 0.43563559182715544, "learning_rate": 3.299145725003698e-05, "loss": 0.1628, "step": 19315 }, { "epoch": 9.011194029850746, "grad_norm": 0.43710785498395444, "learning_rate": 3.298304312687754e-05, "loss": 0.1708, "step": 19320 }, { "epoch": 9.013526119402986, "grad_norm": 0.4131764382737645, "learning_rate": 3.2974628188442135e-05, "loss": 0.1651, "step": 19325 }, { "epoch": 9.015858208955224, "grad_norm": 0.4077967902463252, "learning_rate": 3.2966212435981975e-05, "loss": 0.1649, "step": 19330 }, { "epoch": 9.018190298507463, "grad_norm": 0.41090541485404114, "learning_rate": 3.295779587074839e-05, "loss": 0.1627, "step": 19335 }, { "epoch": 9.020522388059701, "grad_norm": 0.39751291170339703, "learning_rate": 3.2949378493992854e-05, "loss": 0.164, "step": 19340 }, { "epoch": 9.02285447761194, "grad_norm": 0.4288501931403631, "learning_rate": 3.294096030696695e-05, "loss": 0.1657, "step": 19345 }, { "epoch": 9.025186567164178, "grad_norm": 0.40910733116012654, "learning_rate": 3.293254131092238e-05, "loss": 0.1602, "step": 19350 }, { "epoch": 9.027518656716419, "grad_norm": 0.39707871231143077, "learning_rate": 3.292412150711096e-05, "loss": 0.1561, "step": 19355 }, { "epoch": 9.029850746268657, "grad_norm": 0.4349092313211091, "learning_rate": 3.2915700896784655e-05, "loss": 0.1605, "step": 19360 }, { "epoch": 9.032182835820896, "grad_norm": 0.4052301081772827, "learning_rate": 3.29072794811955e-05, "loss": 0.1621, "step": 19365 }, { "epoch": 9.034514925373134, "grad_norm": 0.40407979284498274, "learning_rate": 3.28988572615957e-05, "loss": 0.162, "step": 19370 }, { "epoch": 9.036847014925373, "grad_norm": 0.42211519833905314, "learning_rate": 3.289043423923756e-05, "loss": 0.1635, "step": 19375 }, { "epoch": 9.039179104477611, "grad_norm": 0.42531754823019596, "learning_rate": 3.288201041537348e-05, "loss": 0.166, "step": 19380 }, { "epoch": 9.041511194029852, "grad_norm": 0.42234797442999966, "learning_rate": 3.2873585791256026e-05, "loss": 0.1611, "step": 19385 }, { "epoch": 9.04384328358209, "grad_norm": 0.4164390897577914, "learning_rate": 3.286516036813785e-05, "loss": 0.1651, "step": 19390 }, { "epoch": 9.046175373134329, "grad_norm": 0.41630763637187557, "learning_rate": 3.2856734147271734e-05, "loss": 0.1668, "step": 19395 }, { "epoch": 9.048507462686567, "grad_norm": 0.40623816073291746, "learning_rate": 3.284830712991057e-05, "loss": 0.1633, "step": 19400 }, { "epoch": 9.050839552238806, "grad_norm": 0.4489330985908478, "learning_rate": 3.283987931730738e-05, "loss": 0.1683, "step": 19405 }, { "epoch": 9.053171641791044, "grad_norm": 0.42035357936184703, "learning_rate": 3.28314507107153e-05, "loss": 0.1602, "step": 19410 }, { "epoch": 9.055503731343284, "grad_norm": 0.39367695867158997, "learning_rate": 3.282302131138758e-05, "loss": 0.1617, "step": 19415 }, { "epoch": 9.057835820895523, "grad_norm": 0.4070898498138727, "learning_rate": 3.281459112057759e-05, "loss": 0.1621, "step": 19420 }, { "epoch": 9.060167910447761, "grad_norm": 0.4304418686903167, "learning_rate": 3.280616013953882e-05, "loss": 0.1707, "step": 19425 }, { "epoch": 9.0625, "grad_norm": 0.4143279983361951, "learning_rate": 3.2797728369524875e-05, "loss": 0.1669, "step": 19430 }, { "epoch": 9.064832089552239, "grad_norm": 0.44925256154647014, "learning_rate": 3.2789295811789486e-05, "loss": 0.1706, "step": 19435 }, { "epoch": 9.067164179104477, "grad_norm": 0.42382198088594114, "learning_rate": 3.2780862467586486e-05, "loss": 0.1666, "step": 19440 }, { "epoch": 9.069496268656716, "grad_norm": 0.4557618296506824, "learning_rate": 3.2772428338169835e-05, "loss": 0.1643, "step": 19445 }, { "epoch": 9.071828358208956, "grad_norm": 0.4380121148651643, "learning_rate": 3.2763993424793604e-05, "loss": 0.166, "step": 19450 }, { "epoch": 9.074160447761194, "grad_norm": 0.3979703874950318, "learning_rate": 3.275555772871198e-05, "loss": 0.1668, "step": 19455 }, { "epoch": 9.076492537313433, "grad_norm": 0.40901658720474243, "learning_rate": 3.2747121251179294e-05, "loss": 0.1605, "step": 19460 }, { "epoch": 9.078824626865671, "grad_norm": 0.4283688944915248, "learning_rate": 3.2738683993449937e-05, "loss": 0.1612, "step": 19465 }, { "epoch": 9.08115671641791, "grad_norm": 0.42026058398656585, "learning_rate": 3.273024595677846e-05, "loss": 0.1628, "step": 19470 }, { "epoch": 9.083488805970148, "grad_norm": 0.41763382551131983, "learning_rate": 3.272180714241954e-05, "loss": 0.1679, "step": 19475 }, { "epoch": 9.085820895522389, "grad_norm": 0.41869672499351024, "learning_rate": 3.271336755162792e-05, "loss": 0.1598, "step": 19480 }, { "epoch": 9.088152985074627, "grad_norm": 0.4197390799397455, "learning_rate": 3.270492718565848e-05, "loss": 0.1629, "step": 19485 }, { "epoch": 9.090485074626866, "grad_norm": 0.43662011094543146, "learning_rate": 3.269648604576625e-05, "loss": 0.1635, "step": 19490 }, { "epoch": 9.092817164179104, "grad_norm": 0.42860527435030304, "learning_rate": 3.2688044133206336e-05, "loss": 0.1724, "step": 19495 }, { "epoch": 9.095149253731343, "grad_norm": 0.43548590128928144, "learning_rate": 3.267960144923397e-05, "loss": 0.1638, "step": 19500 }, { "epoch": 9.097481343283581, "grad_norm": 0.4289016148403282, "learning_rate": 3.267115799510448e-05, "loss": 0.1633, "step": 19505 }, { "epoch": 9.099813432835822, "grad_norm": 0.4280652561579429, "learning_rate": 3.266271377207335e-05, "loss": 0.1633, "step": 19510 }, { "epoch": 9.10214552238806, "grad_norm": 0.43444370464516224, "learning_rate": 3.265426878139615e-05, "loss": 0.1689, "step": 19515 }, { "epoch": 9.104477611940299, "grad_norm": 0.41921990290919114, "learning_rate": 3.264582302432856e-05, "loss": 0.1666, "step": 19520 }, { "epoch": 9.106809701492537, "grad_norm": 0.4152770834278002, "learning_rate": 3.263737650212639e-05, "loss": 0.1683, "step": 19525 }, { "epoch": 9.109141791044776, "grad_norm": 0.41519711020357375, "learning_rate": 3.262892921604556e-05, "loss": 0.1656, "step": 19530 }, { "epoch": 9.111473880597014, "grad_norm": 0.42307326179397897, "learning_rate": 3.262048116734209e-05, "loss": 0.1709, "step": 19535 }, { "epoch": 9.113805970149254, "grad_norm": 0.4225309597349603, "learning_rate": 3.261203235727214e-05, "loss": 0.1673, "step": 19540 }, { "epoch": 9.116138059701493, "grad_norm": 0.4143639657247927, "learning_rate": 3.2603582787091954e-05, "loss": 0.1682, "step": 19545 }, { "epoch": 9.118470149253731, "grad_norm": 0.4199302354017152, "learning_rate": 3.259513245805791e-05, "loss": 0.1689, "step": 19550 }, { "epoch": 9.12080223880597, "grad_norm": 0.416169712092717, "learning_rate": 3.258668137142648e-05, "loss": 0.1637, "step": 19555 }, { "epoch": 9.123134328358208, "grad_norm": 0.42914201623587184, "learning_rate": 3.2578229528454266e-05, "loss": 0.1708, "step": 19560 }, { "epoch": 9.125466417910447, "grad_norm": 0.4247990797229928, "learning_rate": 3.2569776930397986e-05, "loss": 0.1707, "step": 19565 }, { "epoch": 9.127798507462687, "grad_norm": 0.42988425704276656, "learning_rate": 3.256132357851445e-05, "loss": 0.1662, "step": 19570 }, { "epoch": 9.130130597014926, "grad_norm": 0.4234513790818353, "learning_rate": 3.255286947406058e-05, "loss": 0.1708, "step": 19575 }, { "epoch": 9.132462686567164, "grad_norm": 0.42085146436339504, "learning_rate": 3.254441461829344e-05, "loss": 0.1682, "step": 19580 }, { "epoch": 9.134794776119403, "grad_norm": 0.42576693475245664, "learning_rate": 3.253595901247017e-05, "loss": 0.1614, "step": 19585 }, { "epoch": 9.137126865671641, "grad_norm": 0.42977212106812857, "learning_rate": 3.252750265784806e-05, "loss": 0.1705, "step": 19590 }, { "epoch": 9.13945895522388, "grad_norm": 0.42541075566682235, "learning_rate": 3.251904555568446e-05, "loss": 0.1727, "step": 19595 }, { "epoch": 9.14179104477612, "grad_norm": 0.44988130236667306, "learning_rate": 3.251058770723688e-05, "loss": 0.1718, "step": 19600 }, { "epoch": 9.144123134328359, "grad_norm": 0.4284485372503761, "learning_rate": 3.2502129113762916e-05, "loss": 0.1692, "step": 19605 }, { "epoch": 9.146455223880597, "grad_norm": 0.403555173205888, "learning_rate": 3.249366977652028e-05, "loss": 0.1665, "step": 19610 }, { "epoch": 9.148787313432836, "grad_norm": 0.43133952198540043, "learning_rate": 3.248520969676679e-05, "loss": 0.1661, "step": 19615 }, { "epoch": 9.151119402985074, "grad_norm": 0.4056409054629659, "learning_rate": 3.247674887576038e-05, "loss": 0.1675, "step": 19620 }, { "epoch": 9.153451492537313, "grad_norm": 0.4122816299316605, "learning_rate": 3.24682873147591e-05, "loss": 0.1664, "step": 19625 }, { "epoch": 9.155783582089553, "grad_norm": 0.42431560863555223, "learning_rate": 3.24598250150211e-05, "loss": 0.174, "step": 19630 }, { "epoch": 9.158115671641792, "grad_norm": 0.4288782100252737, "learning_rate": 3.245136197780464e-05, "loss": 0.1691, "step": 19635 }, { "epoch": 9.16044776119403, "grad_norm": 0.4322754147969088, "learning_rate": 3.2442898204368086e-05, "loss": 0.17, "step": 19640 }, { "epoch": 9.162779850746269, "grad_norm": 0.4264468564194671, "learning_rate": 3.243443369596994e-05, "loss": 0.1671, "step": 19645 }, { "epoch": 9.165111940298507, "grad_norm": 0.43726080993608396, "learning_rate": 3.242596845386878e-05, "loss": 0.1725, "step": 19650 }, { "epoch": 9.167444029850746, "grad_norm": 0.436083910370301, "learning_rate": 3.24175024793233e-05, "loss": 0.1714, "step": 19655 }, { "epoch": 9.169776119402986, "grad_norm": 0.4213723074438591, "learning_rate": 3.240903577359232e-05, "loss": 0.1697, "step": 19660 }, { "epoch": 9.172108208955224, "grad_norm": 0.43505994196956566, "learning_rate": 3.2400568337934753e-05, "loss": 0.1662, "step": 19665 }, { "epoch": 9.174440298507463, "grad_norm": 0.4499296408043922, "learning_rate": 3.239210017360963e-05, "loss": 0.1728, "step": 19670 }, { "epoch": 9.176772388059701, "grad_norm": 0.439789306666143, "learning_rate": 3.238363128187609e-05, "loss": 0.171, "step": 19675 }, { "epoch": 9.17910447761194, "grad_norm": 0.42275260357246264, "learning_rate": 3.237516166399336e-05, "loss": 0.171, "step": 19680 }, { "epoch": 9.181436567164178, "grad_norm": 0.4451339486267121, "learning_rate": 3.23666913212208e-05, "loss": 0.1735, "step": 19685 }, { "epoch": 9.183768656716419, "grad_norm": 0.4338793887088181, "learning_rate": 3.2358220254817874e-05, "loss": 0.1675, "step": 19690 }, { "epoch": 9.186100746268657, "grad_norm": 0.4337864999528774, "learning_rate": 3.234974846604414e-05, "loss": 0.1762, "step": 19695 }, { "epoch": 9.188432835820896, "grad_norm": 0.42357357780003624, "learning_rate": 3.234127595615927e-05, "loss": 0.1693, "step": 19700 }, { "epoch": 9.190764925373134, "grad_norm": 0.40283919308247973, "learning_rate": 3.233280272642306e-05, "loss": 0.1704, "step": 19705 }, { "epoch": 9.193097014925373, "grad_norm": 0.4250748261273562, "learning_rate": 3.232432877809538e-05, "loss": 0.1732, "step": 19710 }, { "epoch": 9.195429104477611, "grad_norm": 0.4165388422849926, "learning_rate": 3.231585411243625e-05, "loss": 0.1654, "step": 19715 }, { "epoch": 9.197761194029852, "grad_norm": 0.44024816989019466, "learning_rate": 3.230737873070574e-05, "loss": 0.1743, "step": 19720 }, { "epoch": 9.20009328358209, "grad_norm": 0.4340777546476193, "learning_rate": 3.2298902634164076e-05, "loss": 0.174, "step": 19725 }, { "epoch": 9.202425373134329, "grad_norm": 0.4633244077982807, "learning_rate": 3.229042582407157e-05, "loss": 0.1744, "step": 19730 }, { "epoch": 9.204757462686567, "grad_norm": 0.42516653175607405, "learning_rate": 3.2281948301688644e-05, "loss": 0.1753, "step": 19735 }, { "epoch": 9.207089552238806, "grad_norm": 0.435941497489751, "learning_rate": 3.2273470068275816e-05, "loss": 0.176, "step": 19740 }, { "epoch": 9.209421641791044, "grad_norm": 0.41866846489413606, "learning_rate": 3.2264991125093734e-05, "loss": 0.1779, "step": 19745 }, { "epoch": 9.211753731343283, "grad_norm": 0.4339554431198841, "learning_rate": 3.225651147340312e-05, "loss": 0.1702, "step": 19750 }, { "epoch": 9.214085820895523, "grad_norm": 0.4483328106919701, "learning_rate": 3.224803111446482e-05, "loss": 0.1675, "step": 19755 }, { "epoch": 9.216417910447761, "grad_norm": 0.4275781916478092, "learning_rate": 3.223955004953979e-05, "loss": 0.1753, "step": 19760 }, { "epoch": 9.21875, "grad_norm": 0.440745591639815, "learning_rate": 3.223106827988908e-05, "loss": 0.1719, "step": 19765 }, { "epoch": 9.221082089552239, "grad_norm": 0.4194859938189719, "learning_rate": 3.222258580677385e-05, "loss": 0.1708, "step": 19770 }, { "epoch": 9.223414179104477, "grad_norm": 0.4448304441325004, "learning_rate": 3.221410263145534e-05, "loss": 0.1771, "step": 19775 }, { "epoch": 9.225746268656717, "grad_norm": 0.4592363745725991, "learning_rate": 3.220561875519495e-05, "loss": 0.1763, "step": 19780 }, { "epoch": 9.228078358208956, "grad_norm": 0.4428542830312022, "learning_rate": 3.219713417925414e-05, "loss": 0.1723, "step": 19785 }, { "epoch": 9.230410447761194, "grad_norm": 0.41004078062256943, "learning_rate": 3.218864890489446e-05, "loss": 0.1724, "step": 19790 }, { "epoch": 9.232742537313433, "grad_norm": 0.41757946222205555, "learning_rate": 3.218016293337763e-05, "loss": 0.1753, "step": 19795 }, { "epoch": 9.235074626865671, "grad_norm": 0.4260125899705012, "learning_rate": 3.2171676265965415e-05, "loss": 0.1761, "step": 19800 }, { "epoch": 9.23740671641791, "grad_norm": 0.44678686048771643, "learning_rate": 3.216318890391969e-05, "loss": 0.1749, "step": 19805 }, { "epoch": 9.239738805970148, "grad_norm": 0.4563930514649532, "learning_rate": 3.2154700848502454e-05, "loss": 0.1809, "step": 19810 }, { "epoch": 9.242070895522389, "grad_norm": 0.4374298232619285, "learning_rate": 3.21462121009758e-05, "loss": 0.1773, "step": 19815 }, { "epoch": 9.244402985074627, "grad_norm": 0.4435899780814908, "learning_rate": 3.2137722662601934e-05, "loss": 0.1739, "step": 19820 }, { "epoch": 9.246735074626866, "grad_norm": 0.42197757296977184, "learning_rate": 3.212923253464314e-05, "loss": 0.1742, "step": 19825 }, { "epoch": 9.249067164179104, "grad_norm": 0.4406265607269732, "learning_rate": 3.212074171836181e-05, "loss": 0.1758, "step": 19830 }, { "epoch": 9.251399253731343, "grad_norm": 0.4076009138946553, "learning_rate": 3.211225021502047e-05, "loss": 0.1705, "step": 19835 }, { "epoch": 9.253731343283581, "grad_norm": 0.4359874200861375, "learning_rate": 3.21037580258817e-05, "loss": 0.174, "step": 19840 }, { "epoch": 9.256063432835822, "grad_norm": 0.44244761951702083, "learning_rate": 3.209526515220824e-05, "loss": 0.1681, "step": 19845 }, { "epoch": 9.25839552238806, "grad_norm": 0.43358329965600867, "learning_rate": 3.208677159526287e-05, "loss": 0.1748, "step": 19850 }, { "epoch": 9.260727611940299, "grad_norm": 0.431789022806971, "learning_rate": 3.207827735630851e-05, "loss": 0.1746, "step": 19855 }, { "epoch": 9.263059701492537, "grad_norm": 0.4350935077339532, "learning_rate": 3.206978243660817e-05, "loss": 0.1776, "step": 19860 }, { "epoch": 9.265391791044776, "grad_norm": 0.4278685162932021, "learning_rate": 3.206128683742497e-05, "loss": 0.1726, "step": 19865 }, { "epoch": 9.267723880597014, "grad_norm": 0.41858151505802244, "learning_rate": 3.205279056002212e-05, "loss": 0.1723, "step": 19870 }, { "epoch": 9.270055970149254, "grad_norm": 0.4250656884220101, "learning_rate": 3.204429360566293e-05, "loss": 0.1738, "step": 19875 }, { "epoch": 9.272388059701493, "grad_norm": 0.4481909060520682, "learning_rate": 3.203579597561082e-05, "loss": 0.1737, "step": 19880 }, { "epoch": 9.274720149253731, "grad_norm": 0.41747272329986324, "learning_rate": 3.202729767112931e-05, "loss": 0.1742, "step": 19885 }, { "epoch": 9.27705223880597, "grad_norm": 0.42699898919333595, "learning_rate": 3.2018798693482015e-05, "loss": 0.1787, "step": 19890 }, { "epoch": 9.279384328358208, "grad_norm": 0.41731888293971714, "learning_rate": 3.201029904393265e-05, "loss": 0.178, "step": 19895 }, { "epoch": 9.281716417910447, "grad_norm": 0.43020029170847174, "learning_rate": 3.200179872374503e-05, "loss": 0.1733, "step": 19900 }, { "epoch": 9.284048507462687, "grad_norm": 0.42711745431835857, "learning_rate": 3.199329773418307e-05, "loss": 0.176, "step": 19905 }, { "epoch": 9.286380597014926, "grad_norm": 0.41994711362965864, "learning_rate": 3.198479607651079e-05, "loss": 0.1704, "step": 19910 }, { "epoch": 9.288712686567164, "grad_norm": 0.43665705994161985, "learning_rate": 3.1976293751992295e-05, "loss": 0.1759, "step": 19915 }, { "epoch": 9.291044776119403, "grad_norm": 0.42957818672953074, "learning_rate": 3.1967790761891826e-05, "loss": 0.1735, "step": 19920 }, { "epoch": 9.293376865671641, "grad_norm": 0.4263943874109768, "learning_rate": 3.195928710747366e-05, "loss": 0.1808, "step": 19925 }, { "epoch": 9.29570895522388, "grad_norm": 0.440081229481672, "learning_rate": 3.1950782790002236e-05, "loss": 0.1763, "step": 19930 }, { "epoch": 9.29804104477612, "grad_norm": 0.4305454327594806, "learning_rate": 3.194227781074205e-05, "loss": 0.1797, "step": 19935 }, { "epoch": 9.300373134328359, "grad_norm": 0.43470981432761907, "learning_rate": 3.1933772170957716e-05, "loss": 0.1748, "step": 19940 }, { "epoch": 9.302705223880597, "grad_norm": 0.4283531628287524, "learning_rate": 3.192526587191395e-05, "loss": 0.1802, "step": 19945 }, { "epoch": 9.305037313432836, "grad_norm": 0.44254473291393787, "learning_rate": 3.191675891487554e-05, "loss": 0.1758, "step": 19950 }, { "epoch": 9.307369402985074, "grad_norm": 0.4061168265630799, "learning_rate": 3.190825130110739e-05, "loss": 0.1713, "step": 19955 }, { "epoch": 9.309701492537313, "grad_norm": 0.461796388038234, "learning_rate": 3.189974303187452e-05, "loss": 0.1891, "step": 19960 }, { "epoch": 9.312033582089553, "grad_norm": 0.4242061789124302, "learning_rate": 3.1891234108442007e-05, "loss": 0.1776, "step": 19965 }, { "epoch": 9.314365671641792, "grad_norm": 0.4426136637402203, "learning_rate": 3.188272453207507e-05, "loss": 0.178, "step": 19970 }, { "epoch": 9.31669776119403, "grad_norm": 0.44137851092693725, "learning_rate": 3.1874214304038965e-05, "loss": 0.1725, "step": 19975 }, { "epoch": 9.319029850746269, "grad_norm": 0.4268068086865364, "learning_rate": 3.186570342559912e-05, "loss": 0.1731, "step": 19980 }, { "epoch": 9.321361940298507, "grad_norm": 0.4261647479310316, "learning_rate": 3.1857191898020996e-05, "loss": 0.1703, "step": 19985 }, { "epoch": 9.323694029850746, "grad_norm": 0.4562379049552641, "learning_rate": 3.184867972257019e-05, "loss": 0.1768, "step": 19990 }, { "epoch": 9.326026119402986, "grad_norm": 0.44738446112631863, "learning_rate": 3.184016690051237e-05, "loss": 0.1762, "step": 19995 }, { "epoch": 9.328358208955224, "grad_norm": 0.4241247203114402, "learning_rate": 3.1831653433113317e-05, "loss": 0.1782, "step": 20000 }, { "epoch": 9.330690298507463, "grad_norm": 0.4244802524774078, "learning_rate": 3.18231393216389e-05, "loss": 0.1779, "step": 20005 }, { "epoch": 9.333022388059701, "grad_norm": 0.4134808425928455, "learning_rate": 3.1814624567355087e-05, "loss": 0.1771, "step": 20010 }, { "epoch": 9.33535447761194, "grad_norm": 0.44468956308118884, "learning_rate": 3.180610917152795e-05, "loss": 0.178, "step": 20015 }, { "epoch": 9.337686567164178, "grad_norm": 0.4325789824032844, "learning_rate": 3.179759313542362e-05, "loss": 0.1797, "step": 20020 }, { "epoch": 9.340018656716419, "grad_norm": 0.44386442657733455, "learning_rate": 3.178907646030838e-05, "loss": 0.1823, "step": 20025 }, { "epoch": 9.342350746268657, "grad_norm": 0.44415392123217423, "learning_rate": 3.1780559147448554e-05, "loss": 0.1792, "step": 20030 }, { "epoch": 9.344682835820896, "grad_norm": 0.4337050102751632, "learning_rate": 3.1772041198110604e-05, "loss": 0.1758, "step": 20035 }, { "epoch": 9.347014925373134, "grad_norm": 0.4644437011053532, "learning_rate": 3.176352261356105e-05, "loss": 0.176, "step": 20040 }, { "epoch": 9.349347014925373, "grad_norm": 0.40869454041266995, "learning_rate": 3.1755003395066546e-05, "loss": 0.1786, "step": 20045 }, { "epoch": 9.351679104477611, "grad_norm": 0.42647247514465764, "learning_rate": 3.17464835438938e-05, "loss": 0.182, "step": 20050 }, { "epoch": 9.354011194029852, "grad_norm": 0.44498807894256254, "learning_rate": 3.173796306130963e-05, "loss": 0.1748, "step": 20055 }, { "epoch": 9.35634328358209, "grad_norm": 0.42074172962860723, "learning_rate": 3.172944194858096e-05, "loss": 0.1733, "step": 20060 }, { "epoch": 9.358675373134329, "grad_norm": 0.41077376066901666, "learning_rate": 3.17209202069748e-05, "loss": 0.1778, "step": 20065 }, { "epoch": 9.361007462686567, "grad_norm": 0.46410050868603725, "learning_rate": 3.171239783775825e-05, "loss": 0.1751, "step": 20070 }, { "epoch": 9.363339552238806, "grad_norm": 0.470675015038599, "learning_rate": 3.170387484219849e-05, "loss": 0.1852, "step": 20075 }, { "epoch": 9.365671641791044, "grad_norm": 0.4376499082931864, "learning_rate": 3.169535122156283e-05, "loss": 0.1705, "step": 20080 }, { "epoch": 9.368003731343283, "grad_norm": 0.41873941728889, "learning_rate": 3.1686826977118635e-05, "loss": 0.1685, "step": 20085 }, { "epoch": 9.370335820895523, "grad_norm": 0.43067501223059135, "learning_rate": 3.167830211013338e-05, "loss": 0.1754, "step": 20090 }, { "epoch": 9.372667910447761, "grad_norm": 0.4401961447666279, "learning_rate": 3.166977662187464e-05, "loss": 0.178, "step": 20095 }, { "epoch": 9.375, "grad_norm": 0.4432776930018639, "learning_rate": 3.166125051361007e-05, "loss": 0.18, "step": 20100 }, { "epoch": 9.377332089552239, "grad_norm": 0.4264381778398016, "learning_rate": 3.165272378660741e-05, "loss": 0.1765, "step": 20105 }, { "epoch": 9.379664179104477, "grad_norm": 0.4144592911503309, "learning_rate": 3.164419644213451e-05, "loss": 0.1805, "step": 20110 }, { "epoch": 9.381996268656717, "grad_norm": 0.4345023346090368, "learning_rate": 3.1635668481459306e-05, "loss": 0.1793, "step": 20115 }, { "epoch": 9.384328358208956, "grad_norm": 0.43433162609217285, "learning_rate": 3.162713990584983e-05, "loss": 0.1786, "step": 20120 }, { "epoch": 9.386660447761194, "grad_norm": 0.4475421459428423, "learning_rate": 3.1618610716574196e-05, "loss": 0.1767, "step": 20125 }, { "epoch": 9.388992537313433, "grad_norm": 0.4448584238263746, "learning_rate": 3.1610080914900604e-05, "loss": 0.1791, "step": 20130 }, { "epoch": 9.391324626865671, "grad_norm": 0.4561428228856879, "learning_rate": 3.1601550502097356e-05, "loss": 0.1845, "step": 20135 }, { "epoch": 9.39365671641791, "grad_norm": 0.45076164402922425, "learning_rate": 3.159301947943285e-05, "loss": 0.1788, "step": 20140 }, { "epoch": 9.395988805970148, "grad_norm": 0.5286011729427849, "learning_rate": 3.158448784817556e-05, "loss": 0.1784, "step": 20145 }, { "epoch": 9.398320895522389, "grad_norm": 0.42965573353578973, "learning_rate": 3.157595560959407e-05, "loss": 0.1769, "step": 20150 }, { "epoch": 9.400652985074627, "grad_norm": 0.4342154994817608, "learning_rate": 3.156742276495702e-05, "loss": 0.1774, "step": 20155 }, { "epoch": 9.402985074626866, "grad_norm": 0.42938280191939937, "learning_rate": 3.155888931553319e-05, "loss": 0.1768, "step": 20160 }, { "epoch": 9.405317164179104, "grad_norm": 0.4124560059497282, "learning_rate": 3.155035526259139e-05, "loss": 0.1737, "step": 20165 }, { "epoch": 9.407649253731343, "grad_norm": 0.43523311723823455, "learning_rate": 3.154182060740058e-05, "loss": 0.1788, "step": 20170 }, { "epoch": 9.409981343283581, "grad_norm": 0.4122331766522766, "learning_rate": 3.153328535122978e-05, "loss": 0.1815, "step": 20175 }, { "epoch": 9.412313432835822, "grad_norm": 0.45176762841153384, "learning_rate": 3.152474949534808e-05, "loss": 0.1825, "step": 20180 }, { "epoch": 9.41464552238806, "grad_norm": 0.41799671297897173, "learning_rate": 3.15162130410247e-05, "loss": 0.1728, "step": 20185 }, { "epoch": 9.416977611940299, "grad_norm": 0.42068444801006094, "learning_rate": 3.1507675989528915e-05, "loss": 0.182, "step": 20190 }, { "epoch": 9.419309701492537, "grad_norm": 0.4764054914850856, "learning_rate": 3.1499138342130114e-05, "loss": 0.1845, "step": 20195 }, { "epoch": 9.421641791044776, "grad_norm": 0.43131412298688454, "learning_rate": 3.1490600100097746e-05, "loss": 0.1793, "step": 20200 }, { "epoch": 9.423973880597014, "grad_norm": 0.4249982667143463, "learning_rate": 3.148206126470138e-05, "loss": 0.1782, "step": 20205 }, { "epoch": 9.426305970149254, "grad_norm": 0.4253888008457687, "learning_rate": 3.147352183721067e-05, "loss": 0.1771, "step": 20210 }, { "epoch": 9.428638059701493, "grad_norm": 0.4131876522878838, "learning_rate": 3.1464981818895325e-05, "loss": 0.173, "step": 20215 }, { "epoch": 9.430970149253731, "grad_norm": 0.43537283321196757, "learning_rate": 3.145644121102517e-05, "loss": 0.185, "step": 20220 }, { "epoch": 9.43330223880597, "grad_norm": 0.4366002040144386, "learning_rate": 3.1447900014870125e-05, "loss": 0.1737, "step": 20225 }, { "epoch": 9.435634328358208, "grad_norm": 0.4470632983540943, "learning_rate": 3.1439358231700165e-05, "loss": 0.1804, "step": 20230 }, { "epoch": 9.437966417910447, "grad_norm": 0.43484910934800936, "learning_rate": 3.143081586278539e-05, "loss": 0.184, "step": 20235 }, { "epoch": 9.440298507462687, "grad_norm": 0.42908267287818497, "learning_rate": 3.142227290939595e-05, "loss": 0.1803, "step": 20240 }, { "epoch": 9.442630597014926, "grad_norm": 0.45940690816671237, "learning_rate": 3.14137293728021e-05, "loss": 0.1788, "step": 20245 }, { "epoch": 9.444962686567164, "grad_norm": 0.4384968502817865, "learning_rate": 3.14051852542742e-05, "loss": 0.1841, "step": 20250 }, { "epoch": 9.447294776119403, "grad_norm": 0.4227860606961542, "learning_rate": 3.1396640555082665e-05, "loss": 0.1794, "step": 20255 }, { "epoch": 9.449626865671641, "grad_norm": 0.44496839007495836, "learning_rate": 3.1388095276498013e-05, "loss": 0.1836, "step": 20260 }, { "epoch": 9.45195895522388, "grad_norm": 0.41910235944043317, "learning_rate": 3.137954941979085e-05, "loss": 0.174, "step": 20265 }, { "epoch": 9.45429104477612, "grad_norm": 0.4387124595343625, "learning_rate": 3.1371002986231855e-05, "loss": 0.1782, "step": 20270 }, { "epoch": 9.456623134328359, "grad_norm": 0.43011950183124503, "learning_rate": 3.136245597709181e-05, "loss": 0.1798, "step": 20275 }, { "epoch": 9.458955223880597, "grad_norm": 0.4078466057028619, "learning_rate": 3.1353908393641574e-05, "loss": 0.176, "step": 20280 }, { "epoch": 9.461287313432836, "grad_norm": 0.42432570167633493, "learning_rate": 3.134536023715207e-05, "loss": 0.1833, "step": 20285 }, { "epoch": 9.463619402985074, "grad_norm": 0.4232052510349351, "learning_rate": 3.133681150889434e-05, "loss": 0.1789, "step": 20290 }, { "epoch": 9.465951492537313, "grad_norm": 0.41403968109497646, "learning_rate": 3.1328262210139515e-05, "loss": 0.1768, "step": 20295 }, { "epoch": 9.468283582089553, "grad_norm": 0.40448433324168387, "learning_rate": 3.131971234215877e-05, "loss": 0.1761, "step": 20300 }, { "epoch": 9.470615671641792, "grad_norm": 0.40737024686985235, "learning_rate": 3.13111619062234e-05, "loss": 0.1789, "step": 20305 }, { "epoch": 9.47294776119403, "grad_norm": 0.4506347232983131, "learning_rate": 3.1302610903604775e-05, "loss": 0.1772, "step": 20310 }, { "epoch": 9.475279850746269, "grad_norm": 0.4212265531437914, "learning_rate": 3.129405933557433e-05, "loss": 0.1795, "step": 20315 }, { "epoch": 9.477611940298507, "grad_norm": 0.439401525738801, "learning_rate": 3.128550720340362e-05, "loss": 0.179, "step": 20320 }, { "epoch": 9.479944029850746, "grad_norm": 0.4310725618619645, "learning_rate": 3.127695450836426e-05, "loss": 0.1757, "step": 20325 }, { "epoch": 9.482276119402986, "grad_norm": 0.41963305605018686, "learning_rate": 3.126840125172795e-05, "loss": 0.1771, "step": 20330 }, { "epoch": 9.484608208955224, "grad_norm": 0.43519651917854413, "learning_rate": 3.125984743476648e-05, "loss": 0.1788, "step": 20335 }, { "epoch": 9.486940298507463, "grad_norm": 0.4339595510726335, "learning_rate": 3.125129305875172e-05, "loss": 0.1841, "step": 20340 }, { "epoch": 9.489272388059701, "grad_norm": 0.44247429083923145, "learning_rate": 3.1242738124955624e-05, "loss": 0.18, "step": 20345 }, { "epoch": 9.49160447761194, "grad_norm": 0.4389933204135541, "learning_rate": 3.1234182634650234e-05, "loss": 0.1788, "step": 20350 }, { "epoch": 9.493936567164178, "grad_norm": 0.4270349246278426, "learning_rate": 3.122562658910765e-05, "loss": 0.187, "step": 20355 }, { "epoch": 9.496268656716419, "grad_norm": 0.4203290992586545, "learning_rate": 3.1217069989600097e-05, "loss": 0.1735, "step": 20360 }, { "epoch": 9.498600746268657, "grad_norm": 0.41839668197790464, "learning_rate": 3.1208512837399856e-05, "loss": 0.1816, "step": 20365 }, { "epoch": 9.500932835820896, "grad_norm": 0.42285346688707626, "learning_rate": 3.119995513377928e-05, "loss": 0.1831, "step": 20370 }, { "epoch": 9.503264925373134, "grad_norm": 0.43004580904526146, "learning_rate": 3.119139688001082e-05, "loss": 0.1838, "step": 20375 }, { "epoch": 9.505597014925373, "grad_norm": 0.4391744974773042, "learning_rate": 3.118283807736703e-05, "loss": 0.1776, "step": 20380 }, { "epoch": 9.507929104477611, "grad_norm": 0.43908843358352, "learning_rate": 3.1174278727120496e-05, "loss": 0.1788, "step": 20385 }, { "epoch": 9.510261194029852, "grad_norm": 0.4276030759108609, "learning_rate": 3.1165718830543914e-05, "loss": 0.1814, "step": 20390 }, { "epoch": 9.51259328358209, "grad_norm": 0.43955353368538647, "learning_rate": 3.115715838891007e-05, "loss": 0.1837, "step": 20395 }, { "epoch": 9.514925373134329, "grad_norm": 0.4093233847968292, "learning_rate": 3.1148597403491816e-05, "loss": 0.1817, "step": 20400 }, { "epoch": 9.517257462686567, "grad_norm": 0.42650043350859246, "learning_rate": 3.114003587556208e-05, "loss": 0.1824, "step": 20405 }, { "epoch": 9.519589552238806, "grad_norm": 0.42708022766993764, "learning_rate": 3.1131473806393876e-05, "loss": 0.182, "step": 20410 }, { "epoch": 9.521921641791044, "grad_norm": 0.4383278441961168, "learning_rate": 3.112291119726032e-05, "loss": 0.185, "step": 20415 }, { "epoch": 9.524253731343283, "grad_norm": 0.41283987406910955, "learning_rate": 3.1114348049434583e-05, "loss": 0.1756, "step": 20420 }, { "epoch": 9.526585820895523, "grad_norm": 0.415017721739395, "learning_rate": 3.110578436418992e-05, "loss": 0.1759, "step": 20425 }, { "epoch": 9.528917910447761, "grad_norm": 0.43648180257090674, "learning_rate": 3.109722014279967e-05, "loss": 0.1844, "step": 20430 }, { "epoch": 9.53125, "grad_norm": 0.42938310137285984, "learning_rate": 3.108865538653725e-05, "loss": 0.1784, "step": 20435 }, { "epoch": 9.533582089552239, "grad_norm": 0.45365355335849206, "learning_rate": 3.108009009667615e-05, "loss": 0.1848, "step": 20440 }, { "epoch": 9.535914179104477, "grad_norm": 0.42037430471940046, "learning_rate": 3.1071524274489966e-05, "loss": 0.1787, "step": 20445 }, { "epoch": 9.538246268656717, "grad_norm": 0.4228181484260728, "learning_rate": 3.106295792125233e-05, "loss": 0.1867, "step": 20450 }, { "epoch": 9.540578358208956, "grad_norm": 0.43701218887625964, "learning_rate": 3.1054391038237e-05, "loss": 0.1802, "step": 20455 }, { "epoch": 9.542910447761194, "grad_norm": 0.46864519629906565, "learning_rate": 3.104582362671778e-05, "loss": 0.1796, "step": 20460 }, { "epoch": 9.545242537313433, "grad_norm": 0.4503899330911187, "learning_rate": 3.103725568796854e-05, "loss": 0.1833, "step": 20465 }, { "epoch": 9.547574626865671, "grad_norm": 0.45221335733250223, "learning_rate": 3.102868722326328e-05, "loss": 0.183, "step": 20470 }, { "epoch": 9.54990671641791, "grad_norm": 0.4261242393367249, "learning_rate": 3.102011823387605e-05, "loss": 0.185, "step": 20475 }, { "epoch": 9.552238805970148, "grad_norm": 0.44119829856175785, "learning_rate": 3.1011548721080955e-05, "loss": 0.1813, "step": 20480 }, { "epoch": 9.554570895522389, "grad_norm": 0.41796817730873853, "learning_rate": 3.100297868615222e-05, "loss": 0.1809, "step": 20485 }, { "epoch": 9.556902985074627, "grad_norm": 0.455140053921012, "learning_rate": 3.099440813036411e-05, "loss": 0.1792, "step": 20490 }, { "epoch": 9.559235074626866, "grad_norm": 0.44015620589014226, "learning_rate": 3.0985837054990983e-05, "loss": 0.1795, "step": 20495 }, { "epoch": 9.561567164179104, "grad_norm": 0.4265923677657143, "learning_rate": 3.097726546130729e-05, "loss": 0.1802, "step": 20500 }, { "epoch": 9.563899253731343, "grad_norm": 0.42084731833364036, "learning_rate": 3.096869335058755e-05, "loss": 0.1832, "step": 20505 }, { "epoch": 9.566231343283581, "grad_norm": 0.4336015802286334, "learning_rate": 3.096012072410633e-05, "loss": 0.1802, "step": 20510 }, { "epoch": 9.568563432835822, "grad_norm": 0.42375541012189566, "learning_rate": 3.095154758313831e-05, "loss": 0.1819, "step": 20515 }, { "epoch": 9.57089552238806, "grad_norm": 0.42061189482687605, "learning_rate": 3.094297392895825e-05, "loss": 0.1812, "step": 20520 }, { "epoch": 9.573227611940299, "grad_norm": 0.43389840379054206, "learning_rate": 3.093439976284094e-05, "loss": 0.179, "step": 20525 }, { "epoch": 9.575559701492537, "grad_norm": 0.40932220683652626, "learning_rate": 3.0925825086061295e-05, "loss": 0.1805, "step": 20530 }, { "epoch": 9.577891791044776, "grad_norm": 0.4453311050077748, "learning_rate": 3.0917249899894285e-05, "loss": 0.1824, "step": 20535 }, { "epoch": 9.580223880597014, "grad_norm": 0.42072277450346623, "learning_rate": 3.090867420561495e-05, "loss": 0.184, "step": 20540 }, { "epoch": 9.582555970149254, "grad_norm": 0.4497103689415234, "learning_rate": 3.090009800449842e-05, "loss": 0.1828, "step": 20545 }, { "epoch": 9.584888059701493, "grad_norm": 0.43167188396720146, "learning_rate": 3.0891521297819906e-05, "loss": 0.1824, "step": 20550 }, { "epoch": 9.587220149253731, "grad_norm": 0.4528311763664512, "learning_rate": 3.088294408685466e-05, "loss": 0.1852, "step": 20555 }, { "epoch": 9.58955223880597, "grad_norm": 0.4543936318675092, "learning_rate": 3.0874366372878036e-05, "loss": 0.1851, "step": 20560 }, { "epoch": 9.591884328358208, "grad_norm": 0.4449083482745699, "learning_rate": 3.086578815716548e-05, "loss": 0.1845, "step": 20565 }, { "epoch": 9.594216417910447, "grad_norm": 0.4121875113198233, "learning_rate": 3.085720944099246e-05, "loss": 0.186, "step": 20570 }, { "epoch": 9.596548507462687, "grad_norm": 0.4565710446051817, "learning_rate": 3.0848630225634564e-05, "loss": 0.1844, "step": 20575 }, { "epoch": 9.598880597014926, "grad_norm": 0.43989740275197287, "learning_rate": 3.0840050512367444e-05, "loss": 0.1853, "step": 20580 }, { "epoch": 9.601212686567164, "grad_norm": 0.41749612118151413, "learning_rate": 3.08314703024668e-05, "loss": 0.1787, "step": 20585 }, { "epoch": 9.603544776119403, "grad_norm": 0.42288582874276875, "learning_rate": 3.082288959720845e-05, "loss": 0.1849, "step": 20590 }, { "epoch": 9.605876865671641, "grad_norm": 0.4236440165377373, "learning_rate": 3.081430839786825e-05, "loss": 0.1808, "step": 20595 }, { "epoch": 9.60820895522388, "grad_norm": 0.43883705075549173, "learning_rate": 3.0805726705722156e-05, "loss": 0.1797, "step": 20600 }, { "epoch": 9.61054104477612, "grad_norm": 0.41725220025926407, "learning_rate": 3.079714452204617e-05, "loss": 0.1808, "step": 20605 }, { "epoch": 9.612873134328359, "grad_norm": 0.4310222767924544, "learning_rate": 3.078856184811638e-05, "loss": 0.1885, "step": 20610 }, { "epoch": 9.615205223880597, "grad_norm": 0.4419956111266621, "learning_rate": 3.0779978685208956e-05, "loss": 0.186, "step": 20615 }, { "epoch": 9.617537313432836, "grad_norm": 0.5778060301515147, "learning_rate": 3.077139503460012e-05, "loss": 0.1786, "step": 20620 }, { "epoch": 9.619869402985074, "grad_norm": 0.42482161453931566, "learning_rate": 3.0762810897566184e-05, "loss": 0.1877, "step": 20625 }, { "epoch": 9.622201492537313, "grad_norm": 0.4081774969547128, "learning_rate": 3.0754226275383546e-05, "loss": 0.1808, "step": 20630 }, { "epoch": 9.624533582089553, "grad_norm": 0.46479561682260206, "learning_rate": 3.0745641169328627e-05, "loss": 0.1884, "step": 20635 }, { "epoch": 9.626865671641792, "grad_norm": 0.4530150536913963, "learning_rate": 3.073705558067797e-05, "loss": 0.1839, "step": 20640 }, { "epoch": 9.62919776119403, "grad_norm": 0.4173288448831521, "learning_rate": 3.072846951070816e-05, "loss": 0.1828, "step": 20645 }, { "epoch": 9.631529850746269, "grad_norm": 0.4252277204002397, "learning_rate": 3.071988296069586e-05, "loss": 0.1796, "step": 20650 }, { "epoch": 9.633861940298507, "grad_norm": 0.4283208582796829, "learning_rate": 3.071129593191783e-05, "loss": 0.181, "step": 20655 }, { "epoch": 9.636194029850746, "grad_norm": 0.42295375200822904, "learning_rate": 3.070270842565084e-05, "loss": 0.1857, "step": 20660 }, { "epoch": 9.638526119402986, "grad_norm": 0.4544854757910125, "learning_rate": 3.069412044317181e-05, "loss": 0.1858, "step": 20665 }, { "epoch": 9.640858208955224, "grad_norm": 0.42733627081021514, "learning_rate": 3.068553198575767e-05, "loss": 0.1849, "step": 20670 }, { "epoch": 9.643190298507463, "grad_norm": 0.43333129993314284, "learning_rate": 3.0676943054685445e-05, "loss": 0.1825, "step": 20675 }, { "epoch": 9.645522388059701, "grad_norm": 0.44287010150469625, "learning_rate": 3.0668353651232226e-05, "loss": 0.1847, "step": 20680 }, { "epoch": 9.64785447761194, "grad_norm": 0.44294072508641147, "learning_rate": 3.065976377667517e-05, "loss": 0.1836, "step": 20685 }, { "epoch": 9.650186567164178, "grad_norm": 0.44318072445548395, "learning_rate": 3.065117343229153e-05, "loss": 0.1779, "step": 20690 }, { "epoch": 9.652518656716419, "grad_norm": 0.4317610439185678, "learning_rate": 3.0642582619358576e-05, "loss": 0.18, "step": 20695 }, { "epoch": 9.654850746268657, "grad_norm": 0.4477960345411698, "learning_rate": 3.063399133915371e-05, "loss": 0.1843, "step": 20700 }, { "epoch": 9.657182835820896, "grad_norm": 0.4445591353966393, "learning_rate": 3.0625399592954346e-05, "loss": 0.1846, "step": 20705 }, { "epoch": 9.659514925373134, "grad_norm": 0.43697785333903194, "learning_rate": 3.0616807382038016e-05, "loss": 0.1829, "step": 20710 }, { "epoch": 9.661847014925373, "grad_norm": 0.43760074388862663, "learning_rate": 3.0608214707682286e-05, "loss": 0.1822, "step": 20715 }, { "epoch": 9.664179104477611, "grad_norm": 0.46685517070632576, "learning_rate": 3.059962157116481e-05, "loss": 0.1914, "step": 20720 }, { "epoch": 9.666511194029852, "grad_norm": 0.4411934768146901, "learning_rate": 3.059102797376331e-05, "loss": 0.187, "step": 20725 }, { "epoch": 9.66884328358209, "grad_norm": 0.44306080600769043, "learning_rate": 3.058243391675557e-05, "loss": 0.1871, "step": 20730 }, { "epoch": 9.671175373134329, "grad_norm": 0.4359363730069315, "learning_rate": 3.0573839401419426e-05, "loss": 0.1843, "step": 20735 }, { "epoch": 9.673507462686567, "grad_norm": 0.43737348357206757, "learning_rate": 3.056524442903282e-05, "loss": 0.1833, "step": 20740 }, { "epoch": 9.675839552238806, "grad_norm": 0.42307073822599534, "learning_rate": 3.055664900087374e-05, "loss": 0.1793, "step": 20745 }, { "epoch": 9.678171641791044, "grad_norm": 0.4297828766793949, "learning_rate": 3.054805311822023e-05, "loss": 0.1847, "step": 20750 }, { "epoch": 9.680503731343283, "grad_norm": 0.4159000211587784, "learning_rate": 3.0539456782350436e-05, "loss": 0.1826, "step": 20755 }, { "epoch": 9.682835820895523, "grad_norm": 0.46754278688978973, "learning_rate": 3.053085999454254e-05, "loss": 0.1943, "step": 20760 }, { "epoch": 9.685167910447761, "grad_norm": 0.45788330922394727, "learning_rate": 3.0522262756074796e-05, "loss": 0.1846, "step": 20765 }, { "epoch": 9.6875, "grad_norm": 0.4528444213882102, "learning_rate": 3.051366506822554e-05, "loss": 0.1891, "step": 20770 }, { "epoch": 9.689832089552239, "grad_norm": 0.42913967363462746, "learning_rate": 3.0505066932273157e-05, "loss": 0.1816, "step": 20775 }, { "epoch": 9.692164179104477, "grad_norm": 0.4410320024216381, "learning_rate": 3.0496468349496115e-05, "loss": 0.1832, "step": 20780 }, { "epoch": 9.694496268656717, "grad_norm": 0.4442067322582568, "learning_rate": 3.0487869321172947e-05, "loss": 0.191, "step": 20785 }, { "epoch": 9.696828358208956, "grad_norm": 0.4092719199465466, "learning_rate": 3.047926984858223e-05, "loss": 0.187, "step": 20790 }, { "epoch": 9.699160447761194, "grad_norm": 0.44402040343135984, "learning_rate": 3.047066993300264e-05, "loss": 0.184, "step": 20795 }, { "epoch": 9.701492537313433, "grad_norm": 0.42727058588241623, "learning_rate": 3.046206957571288e-05, "loss": 0.1812, "step": 20800 }, { "epoch": 9.703824626865671, "grad_norm": 0.43337169075986426, "learning_rate": 3.0453468777991768e-05, "loss": 0.1875, "step": 20805 }, { "epoch": 9.70615671641791, "grad_norm": 0.4185790792292514, "learning_rate": 3.0444867541118145e-05, "loss": 0.1869, "step": 20810 }, { "epoch": 9.708488805970148, "grad_norm": 0.44227004098311173, "learning_rate": 3.0436265866370922e-05, "loss": 0.1856, "step": 20815 }, { "epoch": 9.710820895522389, "grad_norm": 0.43185000958225694, "learning_rate": 3.0427663755029108e-05, "loss": 0.1884, "step": 20820 }, { "epoch": 9.713152985074627, "grad_norm": 0.42012354506230654, "learning_rate": 3.041906120837174e-05, "loss": 0.1846, "step": 20825 }, { "epoch": 9.715485074626866, "grad_norm": 0.4535503567562695, "learning_rate": 3.0410458227677934e-05, "loss": 0.1849, "step": 20830 }, { "epoch": 9.717817164179104, "grad_norm": 0.4190539766402466, "learning_rate": 3.040185481422689e-05, "loss": 0.1869, "step": 20835 }, { "epoch": 9.720149253731343, "grad_norm": 0.4239683802385783, "learning_rate": 3.0393250969297826e-05, "loss": 0.1833, "step": 20840 }, { "epoch": 9.722481343283581, "grad_norm": 0.4336860155501356, "learning_rate": 3.0384646694170073e-05, "loss": 0.1903, "step": 20845 }, { "epoch": 9.724813432835822, "grad_norm": 0.45558892888853153, "learning_rate": 3.0376041990122983e-05, "loss": 0.1812, "step": 20850 }, { "epoch": 9.72714552238806, "grad_norm": 0.43630014642898873, "learning_rate": 3.036743685843601e-05, "loss": 0.1865, "step": 20855 }, { "epoch": 9.729477611940299, "grad_norm": 0.40281020322829597, "learning_rate": 3.0358831300388657e-05, "loss": 0.1872, "step": 20860 }, { "epoch": 9.731809701492537, "grad_norm": 0.43258708538130425, "learning_rate": 3.035022531726047e-05, "loss": 0.1793, "step": 20865 }, { "epoch": 9.734141791044776, "grad_norm": 0.4520435068928705, "learning_rate": 3.0341618910331093e-05, "loss": 0.1887, "step": 20870 }, { "epoch": 9.736473880597014, "grad_norm": 0.440280955355701, "learning_rate": 3.0333012080880207e-05, "loss": 0.188, "step": 20875 }, { "epoch": 9.738805970149254, "grad_norm": 0.4672160402241151, "learning_rate": 3.0324404830187564e-05, "loss": 0.1899, "step": 20880 }, { "epoch": 9.741138059701493, "grad_norm": 0.438627835283304, "learning_rate": 3.0315797159532995e-05, "loss": 0.1833, "step": 20885 }, { "epoch": 9.743470149253731, "grad_norm": 0.42948442380945323, "learning_rate": 3.0307189070196358e-05, "loss": 0.1883, "step": 20890 }, { "epoch": 9.74580223880597, "grad_norm": 0.42380884423619386, "learning_rate": 3.0298580563457606e-05, "loss": 0.1869, "step": 20895 }, { "epoch": 9.748134328358208, "grad_norm": 0.4340731028311322, "learning_rate": 3.0289971640596737e-05, "loss": 0.1868, "step": 20900 }, { "epoch": 9.750466417910447, "grad_norm": 0.4368720042725163, "learning_rate": 3.0281362302893822e-05, "loss": 0.1866, "step": 20905 }, { "epoch": 9.752798507462687, "grad_norm": 0.4476868655616306, "learning_rate": 3.0272752551628975e-05, "loss": 0.1896, "step": 20910 }, { "epoch": 9.755130597014926, "grad_norm": 0.4292893477275638, "learning_rate": 3.026414238808239e-05, "loss": 0.1918, "step": 20915 }, { "epoch": 9.757462686567164, "grad_norm": 0.43659162503070453, "learning_rate": 3.0255531813534322e-05, "loss": 0.1863, "step": 20920 }, { "epoch": 9.759794776119403, "grad_norm": 0.4585278141026668, "learning_rate": 3.0246920829265067e-05, "loss": 0.1875, "step": 20925 }, { "epoch": 9.762126865671641, "grad_norm": 0.43208393367786113, "learning_rate": 3.0238309436555e-05, "loss": 0.1844, "step": 20930 }, { "epoch": 9.76445895522388, "grad_norm": 0.43191313196946196, "learning_rate": 3.0229697636684568e-05, "loss": 0.1858, "step": 20935 }, { "epoch": 9.76679104477612, "grad_norm": 0.4271331826726478, "learning_rate": 3.022108543093425e-05, "loss": 0.1851, "step": 20940 }, { "epoch": 9.769123134328359, "grad_norm": 0.4380520374520412, "learning_rate": 3.0212472820584587e-05, "loss": 0.1869, "step": 20945 }, { "epoch": 9.771455223880597, "grad_norm": 0.45217634762868353, "learning_rate": 3.020385980691621e-05, "loss": 0.1868, "step": 20950 }, { "epoch": 9.773787313432836, "grad_norm": 0.4375284493944394, "learning_rate": 3.019524639120979e-05, "loss": 0.1906, "step": 20955 }, { "epoch": 9.776119402985074, "grad_norm": 0.4109722892819675, "learning_rate": 3.0186632574746055e-05, "loss": 0.1824, "step": 20960 }, { "epoch": 9.778451492537313, "grad_norm": 0.4525198776396399, "learning_rate": 3.0178018358805793e-05, "loss": 0.1858, "step": 20965 }, { "epoch": 9.780783582089553, "grad_norm": 0.4428178078699812, "learning_rate": 3.016940374466986e-05, "loss": 0.1891, "step": 20970 }, { "epoch": 9.783115671641792, "grad_norm": 0.4383983428427838, "learning_rate": 3.0160788733619167e-05, "loss": 0.1875, "step": 20975 }, { "epoch": 9.78544776119403, "grad_norm": 0.434525699570895, "learning_rate": 3.0152173326934692e-05, "loss": 0.1831, "step": 20980 }, { "epoch": 9.787779850746269, "grad_norm": 0.42494965930846146, "learning_rate": 3.0143557525897444e-05, "loss": 0.1857, "step": 20985 }, { "epoch": 9.790111940298507, "grad_norm": 0.4674301035472428, "learning_rate": 3.0134941331788525e-05, "loss": 0.1925, "step": 20990 }, { "epoch": 9.792444029850746, "grad_norm": 0.42681845181922545, "learning_rate": 3.0126324745889067e-05, "loss": 0.191, "step": 20995 }, { "epoch": 9.794776119402986, "grad_norm": 0.4424510721318009, "learning_rate": 3.0117707769480285e-05, "loss": 0.1848, "step": 21000 }, { "epoch": 9.797108208955224, "grad_norm": 0.43565766122550165, "learning_rate": 3.0109090403843448e-05, "loss": 0.191, "step": 21005 }, { "epoch": 9.799440298507463, "grad_norm": 0.41154583881462514, "learning_rate": 3.0100472650259866e-05, "loss": 0.1841, "step": 21010 }, { "epoch": 9.801772388059701, "grad_norm": 0.43395696040379367, "learning_rate": 3.0091854510010907e-05, "loss": 0.1882, "step": 21015 }, { "epoch": 9.80410447761194, "grad_norm": 0.41340506675287847, "learning_rate": 3.008323598437802e-05, "loss": 0.1767, "step": 21020 }, { "epoch": 9.806436567164178, "grad_norm": 0.4280526171814063, "learning_rate": 3.0074617074642693e-05, "loss": 0.1879, "step": 21025 }, { "epoch": 9.808768656716419, "grad_norm": 0.43472710434408246, "learning_rate": 3.006599778208647e-05, "loss": 0.1903, "step": 21030 }, { "epoch": 9.811100746268657, "grad_norm": 0.4279478239345848, "learning_rate": 3.005737810799097e-05, "loss": 0.1868, "step": 21035 }, { "epoch": 9.813432835820896, "grad_norm": 0.41460638496719826, "learning_rate": 3.0048758053637844e-05, "loss": 0.183, "step": 21040 }, { "epoch": 9.815764925373134, "grad_norm": 0.4341098070982781, "learning_rate": 3.0040137620308812e-05, "loss": 0.1907, "step": 21045 }, { "epoch": 9.818097014925373, "grad_norm": 0.4274044924845387, "learning_rate": 3.0031516809285658e-05, "loss": 0.1895, "step": 21050 }, { "epoch": 9.820429104477611, "grad_norm": 0.42691736390601465, "learning_rate": 3.0022895621850207e-05, "loss": 0.1907, "step": 21055 }, { "epoch": 9.822761194029852, "grad_norm": 0.4382035473539771, "learning_rate": 3.001427405928435e-05, "loss": 0.1841, "step": 21060 }, { "epoch": 9.82509328358209, "grad_norm": 0.41888229556402384, "learning_rate": 3.0005652122870032e-05, "loss": 0.1911, "step": 21065 }, { "epoch": 9.827425373134329, "grad_norm": 0.4339378807014297, "learning_rate": 2.999702981388925e-05, "loss": 0.1845, "step": 21070 }, { "epoch": 9.829757462686567, "grad_norm": 0.44457288961256314, "learning_rate": 2.9988407133624057e-05, "loss": 0.1939, "step": 21075 }, { "epoch": 9.832089552238806, "grad_norm": 0.4339498405921915, "learning_rate": 2.9979784083356567e-05, "loss": 0.1839, "step": 21080 }, { "epoch": 9.834421641791044, "grad_norm": 0.4242770909409081, "learning_rate": 2.9971160664368946e-05, "loss": 0.184, "step": 21085 }, { "epoch": 9.836753731343283, "grad_norm": 0.4425744158121135, "learning_rate": 2.996253687794341e-05, "loss": 0.184, "step": 21090 }, { "epoch": 9.839085820895523, "grad_norm": 0.45352712620334945, "learning_rate": 2.9953912725362225e-05, "loss": 0.1862, "step": 21095 }, { "epoch": 9.841417910447761, "grad_norm": 0.4364692109505892, "learning_rate": 2.994528820790774e-05, "loss": 0.1909, "step": 21100 }, { "epoch": 9.84375, "grad_norm": 0.41952018566415783, "learning_rate": 2.9936663326862323e-05, "loss": 0.1901, "step": 21105 }, { "epoch": 9.846082089552239, "grad_norm": 0.425683654839295, "learning_rate": 2.9928038083508415e-05, "loss": 0.1861, "step": 21110 }, { "epoch": 9.848414179104477, "grad_norm": 0.43156220635951675, "learning_rate": 2.9919412479128513e-05, "loss": 0.1881, "step": 21115 }, { "epoch": 9.850746268656717, "grad_norm": 0.4321610507409509, "learning_rate": 2.9910786515005146e-05, "loss": 0.1858, "step": 21120 }, { "epoch": 9.853078358208956, "grad_norm": 0.43062297471100536, "learning_rate": 2.990216019242093e-05, "loss": 0.1909, "step": 21125 }, { "epoch": 9.855410447761194, "grad_norm": 0.43140089494214523, "learning_rate": 2.9893533512658507e-05, "loss": 0.1885, "step": 21130 }, { "epoch": 9.857742537313433, "grad_norm": 0.44164242630196204, "learning_rate": 2.988490647700058e-05, "loss": 0.1924, "step": 21135 }, { "epoch": 9.860074626865671, "grad_norm": 0.41934741001978715, "learning_rate": 2.987627908672992e-05, "loss": 0.1864, "step": 21140 }, { "epoch": 9.86240671641791, "grad_norm": 0.46167259105459885, "learning_rate": 2.9867651343129315e-05, "loss": 0.1886, "step": 21145 }, { "epoch": 9.864738805970148, "grad_norm": 0.41772414021386245, "learning_rate": 2.9859023247481644e-05, "loss": 0.1886, "step": 21150 }, { "epoch": 9.867070895522389, "grad_norm": 0.4360793086325806, "learning_rate": 2.985039480106982e-05, "loss": 0.1923, "step": 21155 }, { "epoch": 9.869402985074627, "grad_norm": 0.4376821618807655, "learning_rate": 2.9841766005176808e-05, "loss": 0.1892, "step": 21160 }, { "epoch": 9.871735074626866, "grad_norm": 0.43277693231896047, "learning_rate": 2.983313686108563e-05, "loss": 0.189, "step": 21165 }, { "epoch": 9.874067164179104, "grad_norm": 0.41826953137893974, "learning_rate": 2.982450737007935e-05, "loss": 0.1887, "step": 21170 }, { "epoch": 9.876399253731343, "grad_norm": 0.4249524444583628, "learning_rate": 2.9815877533441107e-05, "loss": 0.1874, "step": 21175 }, { "epoch": 9.878731343283581, "grad_norm": 0.5030363460714333, "learning_rate": 2.9807247352454055e-05, "loss": 0.1954, "step": 21180 }, { "epoch": 9.881063432835822, "grad_norm": 0.4250408107208624, "learning_rate": 2.9798616828401428e-05, "loss": 0.1871, "step": 21185 }, { "epoch": 9.88339552238806, "grad_norm": 0.4286870422613224, "learning_rate": 2.9789985962566503e-05, "loss": 0.1885, "step": 21190 }, { "epoch": 9.885727611940299, "grad_norm": 0.4206753936592002, "learning_rate": 2.9781354756232604e-05, "loss": 0.1908, "step": 21195 }, { "epoch": 9.888059701492537, "grad_norm": 0.4556541841966694, "learning_rate": 2.977272321068311e-05, "loss": 0.1919, "step": 21200 }, { "epoch": 9.890391791044776, "grad_norm": 0.4395534026848426, "learning_rate": 2.9764091327201456e-05, "loss": 0.1887, "step": 21205 }, { "epoch": 9.892723880597014, "grad_norm": 0.43449155701198977, "learning_rate": 2.975545910707111e-05, "loss": 0.1887, "step": 21210 }, { "epoch": 9.895055970149254, "grad_norm": 0.4454182629787171, "learning_rate": 2.9746826551575606e-05, "loss": 0.1863, "step": 21215 }, { "epoch": 9.897388059701493, "grad_norm": 0.40094561498635845, "learning_rate": 2.9738193661998526e-05, "loss": 0.1841, "step": 21220 }, { "epoch": 9.899720149253731, "grad_norm": 0.44080341305726894, "learning_rate": 2.9729560439623484e-05, "loss": 0.1893, "step": 21225 }, { "epoch": 9.90205223880597, "grad_norm": 0.43759257053523454, "learning_rate": 2.9720926885734167e-05, "loss": 0.1908, "step": 21230 }, { "epoch": 9.904384328358208, "grad_norm": 0.4439319508649315, "learning_rate": 2.97122930016143e-05, "loss": 0.1865, "step": 21235 }, { "epoch": 9.906716417910447, "grad_norm": 0.4276645500684391, "learning_rate": 2.9703658788547674e-05, "loss": 0.192, "step": 21240 }, { "epoch": 9.909048507462687, "grad_norm": 0.4458604128648658, "learning_rate": 2.9695024247818088e-05, "loss": 0.1861, "step": 21245 }, { "epoch": 9.911380597014926, "grad_norm": 0.4320482253005778, "learning_rate": 2.968638938070942e-05, "loss": 0.1912, "step": 21250 }, { "epoch": 9.913712686567164, "grad_norm": 0.43544502863114015, "learning_rate": 2.9677754188505614e-05, "loss": 0.1839, "step": 21255 }, { "epoch": 9.916044776119403, "grad_norm": 0.4348587679525351, "learning_rate": 2.9669118672490627e-05, "loss": 0.1928, "step": 21260 }, { "epoch": 9.918376865671641, "grad_norm": 0.42235558229222725, "learning_rate": 2.9660482833948466e-05, "loss": 0.1839, "step": 21265 }, { "epoch": 9.92070895522388, "grad_norm": 0.42984781972148917, "learning_rate": 2.9651846674163208e-05, "loss": 0.1855, "step": 21270 }, { "epoch": 9.92304104477612, "grad_norm": 0.4268281189798152, "learning_rate": 2.964321019441898e-05, "loss": 0.1917, "step": 21275 }, { "epoch": 9.925373134328359, "grad_norm": 0.4467408422322408, "learning_rate": 2.9634573395999916e-05, "loss": 0.1884, "step": 21280 }, { "epoch": 9.927705223880597, "grad_norm": 0.4195163848799163, "learning_rate": 2.962593628019024e-05, "loss": 0.1921, "step": 21285 }, { "epoch": 9.930037313432836, "grad_norm": 0.4478123218614824, "learning_rate": 2.9617298848274223e-05, "loss": 0.1894, "step": 21290 }, { "epoch": 9.932369402985074, "grad_norm": 0.3997732745663867, "learning_rate": 2.960866110153614e-05, "loss": 0.1868, "step": 21295 }, { "epoch": 9.934701492537313, "grad_norm": 0.44311983912300634, "learning_rate": 2.9600023041260355e-05, "loss": 0.1959, "step": 21300 }, { "epoch": 9.937033582089553, "grad_norm": 0.4298684754981774, "learning_rate": 2.9591384668731264e-05, "loss": 0.1909, "step": 21305 }, { "epoch": 9.939365671641792, "grad_norm": 0.45235146557778605, "learning_rate": 2.9582745985233312e-05, "loss": 0.1902, "step": 21310 }, { "epoch": 9.94169776119403, "grad_norm": 0.43881985981220967, "learning_rate": 2.9574106992050993e-05, "loss": 0.1931, "step": 21315 }, { "epoch": 9.944029850746269, "grad_norm": 0.4300936903666615, "learning_rate": 2.9565467690468834e-05, "loss": 0.1913, "step": 21320 }, { "epoch": 9.946361940298507, "grad_norm": 0.4368182360394131, "learning_rate": 2.9556828081771413e-05, "loss": 0.1908, "step": 21325 }, { "epoch": 9.948694029850746, "grad_norm": 0.4601899204696124, "learning_rate": 2.9548188167243372e-05, "loss": 0.1867, "step": 21330 }, { "epoch": 9.951026119402986, "grad_norm": 0.4405261395688998, "learning_rate": 2.953954794816937e-05, "loss": 0.1893, "step": 21335 }, { "epoch": 9.953358208955224, "grad_norm": 0.44333421457156913, "learning_rate": 2.953090742583413e-05, "loss": 0.1844, "step": 21340 }, { "epoch": 9.955690298507463, "grad_norm": 0.4425310304821343, "learning_rate": 2.952226660152242e-05, "loss": 0.188, "step": 21345 }, { "epoch": 9.958022388059701, "grad_norm": 0.46687822698293635, "learning_rate": 2.951362547651903e-05, "loss": 0.1917, "step": 21350 }, { "epoch": 9.96035447761194, "grad_norm": 0.44178605661529713, "learning_rate": 2.950498405210883e-05, "loss": 0.185, "step": 21355 }, { "epoch": 9.962686567164178, "grad_norm": 0.44033886922436444, "learning_rate": 2.949634232957671e-05, "loss": 0.1934, "step": 21360 }, { "epoch": 9.965018656716419, "grad_norm": 0.4487397071059582, "learning_rate": 2.9487700310207618e-05, "loss": 0.1917, "step": 21365 }, { "epoch": 9.967350746268657, "grad_norm": 0.43376269326461014, "learning_rate": 2.9479057995286528e-05, "loss": 0.1903, "step": 21370 }, { "epoch": 9.969682835820896, "grad_norm": 0.43152521825352724, "learning_rate": 2.947041538609848e-05, "loss": 0.1845, "step": 21375 }, { "epoch": 9.972014925373134, "grad_norm": 0.44058090692497015, "learning_rate": 2.9461772483928547e-05, "loss": 0.1938, "step": 21380 }, { "epoch": 9.974347014925373, "grad_norm": 0.42998186152040746, "learning_rate": 2.9453129290061832e-05, "loss": 0.1948, "step": 21385 }, { "epoch": 9.976679104477611, "grad_norm": 0.43332985241631317, "learning_rate": 2.944448580578351e-05, "loss": 0.1912, "step": 21390 }, { "epoch": 9.979011194029852, "grad_norm": 0.42926195348473667, "learning_rate": 2.9435842032378778e-05, "loss": 0.1912, "step": 21395 }, { "epoch": 9.98134328358209, "grad_norm": 0.44389807743052573, "learning_rate": 2.9427197971132886e-05, "loss": 0.1873, "step": 21400 }, { "epoch": 9.983675373134329, "grad_norm": 0.4469874056554037, "learning_rate": 2.941855362333112e-05, "loss": 0.1906, "step": 21405 }, { "epoch": 9.986007462686567, "grad_norm": 0.4368014069622708, "learning_rate": 2.9409908990258812e-05, "loss": 0.1961, "step": 21410 }, { "epoch": 9.988339552238806, "grad_norm": 0.426859973970227, "learning_rate": 2.9401264073201333e-05, "loss": 0.1916, "step": 21415 }, { "epoch": 9.990671641791044, "grad_norm": 0.43227903607877527, "learning_rate": 2.9392618873444112e-05, "loss": 0.1922, "step": 21420 }, { "epoch": 9.993003731343283, "grad_norm": 0.4409401403980574, "learning_rate": 2.938397339227259e-05, "loss": 0.182, "step": 21425 }, { "epoch": 9.995335820895523, "grad_norm": 0.4339283946517555, "learning_rate": 2.937532763097227e-05, "loss": 0.1938, "step": 21430 }, { "epoch": 9.997667910447761, "grad_norm": 0.4596304674975474, "learning_rate": 2.936668159082871e-05, "loss": 0.1918, "step": 21435 }, { "epoch": 10.0, "grad_norm": 0.48705170753091703, "learning_rate": 2.9358035273127483e-05, "loss": 0.1924, "step": 21440 }, { "epoch": 10.002332089552239, "grad_norm": 0.43389164686297016, "learning_rate": 2.9349388679154206e-05, "loss": 0.1316, "step": 21445 }, { "epoch": 10.004664179104477, "grad_norm": 0.4335380542469739, "learning_rate": 2.934074181019455e-05, "loss": 0.1296, "step": 21450 }, { "epoch": 10.006996268656716, "grad_norm": 0.42921144292605984, "learning_rate": 2.9332094667534238e-05, "loss": 0.1291, "step": 21455 }, { "epoch": 10.009328358208956, "grad_norm": 0.423321036389472, "learning_rate": 2.9323447252458986e-05, "loss": 0.127, "step": 21460 }, { "epoch": 10.011660447761194, "grad_norm": 0.46784411802175957, "learning_rate": 2.9314799566254603e-05, "loss": 0.1313, "step": 21465 }, { "epoch": 10.013992537313433, "grad_norm": 0.410570647903673, "learning_rate": 2.9306151610206916e-05, "loss": 0.1229, "step": 21470 }, { "epoch": 10.016324626865671, "grad_norm": 0.4292374962714226, "learning_rate": 2.9297503385601788e-05, "loss": 0.1265, "step": 21475 }, { "epoch": 10.01865671641791, "grad_norm": 0.3995677789463421, "learning_rate": 2.9288854893725128e-05, "loss": 0.1232, "step": 21480 }, { "epoch": 10.020988805970148, "grad_norm": 0.43991789841981815, "learning_rate": 2.928020613586288e-05, "loss": 0.1286, "step": 21485 }, { "epoch": 10.023320895522389, "grad_norm": 0.40502216869303087, "learning_rate": 2.9271557113301047e-05, "loss": 0.1263, "step": 21490 }, { "epoch": 10.025652985074627, "grad_norm": 0.4191829210600361, "learning_rate": 2.9262907827325638e-05, "loss": 0.1202, "step": 21495 }, { "epoch": 10.027985074626866, "grad_norm": 0.4201180899650098, "learning_rate": 2.9254258279222724e-05, "loss": 0.1255, "step": 21500 }, { "epoch": 10.030317164179104, "grad_norm": 0.428128369586942, "learning_rate": 2.9245608470278417e-05, "loss": 0.1267, "step": 21505 }, { "epoch": 10.032649253731343, "grad_norm": 0.4323863027501273, "learning_rate": 2.9236958401778854e-05, "loss": 0.1256, "step": 21510 }, { "epoch": 10.034981343283581, "grad_norm": 0.40973292603663247, "learning_rate": 2.9228308075010213e-05, "loss": 0.1253, "step": 21515 }, { "epoch": 10.037313432835822, "grad_norm": 0.4003274733341407, "learning_rate": 2.921965749125873e-05, "loss": 0.1243, "step": 21520 }, { "epoch": 10.03964552238806, "grad_norm": 0.46569849100446625, "learning_rate": 2.9211006651810645e-05, "loss": 0.1284, "step": 21525 }, { "epoch": 10.041977611940299, "grad_norm": 0.4378566714488795, "learning_rate": 2.920235555795227e-05, "loss": 0.1328, "step": 21530 }, { "epoch": 10.044309701492537, "grad_norm": 0.4441699503816044, "learning_rate": 2.919370421096993e-05, "loss": 0.1246, "step": 21535 }, { "epoch": 10.046641791044776, "grad_norm": 0.40944545035953267, "learning_rate": 2.9185052612150004e-05, "loss": 0.1279, "step": 21540 }, { "epoch": 10.048973880597014, "grad_norm": 0.4340183415498991, "learning_rate": 2.9176400762778906e-05, "loss": 0.1302, "step": 21545 }, { "epoch": 10.051305970149254, "grad_norm": 0.4337437722000339, "learning_rate": 2.9167748664143067e-05, "loss": 0.124, "step": 21550 }, { "epoch": 10.053638059701493, "grad_norm": 0.40966209287536004, "learning_rate": 2.9159096317528985e-05, "loss": 0.1242, "step": 21555 }, { "epoch": 10.055970149253731, "grad_norm": 0.4211196328368198, "learning_rate": 2.9150443724223174e-05, "loss": 0.123, "step": 21560 }, { "epoch": 10.05830223880597, "grad_norm": 0.44384716545989916, "learning_rate": 2.91417908855122e-05, "loss": 0.1284, "step": 21565 }, { "epoch": 10.060634328358208, "grad_norm": 0.43338215262775415, "learning_rate": 2.9133137802682646e-05, "loss": 0.1247, "step": 21570 }, { "epoch": 10.062966417910447, "grad_norm": 0.39194776336665, "learning_rate": 2.912448447702115e-05, "loss": 0.1218, "step": 21575 }, { "epoch": 10.065298507462687, "grad_norm": 0.4311410131237351, "learning_rate": 2.9115830909814374e-05, "loss": 0.1275, "step": 21580 }, { "epoch": 10.067630597014926, "grad_norm": 0.4235206190022118, "learning_rate": 2.9107177102349026e-05, "loss": 0.1265, "step": 21585 }, { "epoch": 10.069962686567164, "grad_norm": 0.4308715168404759, "learning_rate": 2.909852305591184e-05, "loss": 0.1284, "step": 21590 }, { "epoch": 10.072294776119403, "grad_norm": 0.44696955491557977, "learning_rate": 2.9089868771789598e-05, "loss": 0.1286, "step": 21595 }, { "epoch": 10.074626865671641, "grad_norm": 0.4595934127483725, "learning_rate": 2.9081214251269095e-05, "loss": 0.1302, "step": 21600 }, { "epoch": 10.07695895522388, "grad_norm": 0.445089438514597, "learning_rate": 2.9072559495637187e-05, "loss": 0.1309, "step": 21605 }, { "epoch": 10.07929104477612, "grad_norm": 0.4309810042007916, "learning_rate": 2.9063904506180746e-05, "loss": 0.1268, "step": 21610 }, { "epoch": 10.081623134328359, "grad_norm": 0.4171138771647604, "learning_rate": 2.9055249284186686e-05, "loss": 0.1278, "step": 21615 }, { "epoch": 10.083955223880597, "grad_norm": 0.4566854720899114, "learning_rate": 2.904659383094197e-05, "loss": 0.1234, "step": 21620 }, { "epoch": 10.086287313432836, "grad_norm": 0.4316863094615615, "learning_rate": 2.9037938147733557e-05, "loss": 0.1264, "step": 21625 }, { "epoch": 10.088619402985074, "grad_norm": 0.43467790610735196, "learning_rate": 2.902928223584848e-05, "loss": 0.1282, "step": 21630 }, { "epoch": 10.090951492537313, "grad_norm": 0.4271042329557096, "learning_rate": 2.9020626096573793e-05, "loss": 0.1305, "step": 21635 }, { "epoch": 10.093283582089553, "grad_norm": 0.40896954817910064, "learning_rate": 2.9011969731196565e-05, "loss": 0.127, "step": 21640 }, { "epoch": 10.095615671641792, "grad_norm": 0.44836498663425123, "learning_rate": 2.9003313141003934e-05, "loss": 0.1294, "step": 21645 }, { "epoch": 10.09794776119403, "grad_norm": 0.44873491468422155, "learning_rate": 2.8994656327283036e-05, "loss": 0.1329, "step": 21650 }, { "epoch": 10.100279850746269, "grad_norm": 0.46077001943248524, "learning_rate": 2.898599929132107e-05, "loss": 0.133, "step": 21655 }, { "epoch": 10.102611940298507, "grad_norm": 0.4353463128954848, "learning_rate": 2.897734203440524e-05, "loss": 0.1271, "step": 21660 }, { "epoch": 10.104944029850746, "grad_norm": 0.4281116621579128, "learning_rate": 2.8968684557822806e-05, "loss": 0.1278, "step": 21665 }, { "epoch": 10.107276119402986, "grad_norm": 0.4363494342983073, "learning_rate": 2.8960026862861057e-05, "loss": 0.1301, "step": 21670 }, { "epoch": 10.109608208955224, "grad_norm": 0.4364502028484613, "learning_rate": 2.8951368950807288e-05, "loss": 0.1311, "step": 21675 }, { "epoch": 10.111940298507463, "grad_norm": 0.4394759981081816, "learning_rate": 2.894271082294887e-05, "loss": 0.1305, "step": 21680 }, { "epoch": 10.114272388059701, "grad_norm": 0.4288884091154745, "learning_rate": 2.8934052480573175e-05, "loss": 0.1326, "step": 21685 }, { "epoch": 10.11660447761194, "grad_norm": 0.4283885172036093, "learning_rate": 2.8925393924967615e-05, "loss": 0.1287, "step": 21690 }, { "epoch": 10.118936567164178, "grad_norm": 0.42844949883019934, "learning_rate": 2.891673515741964e-05, "loss": 0.1318, "step": 21695 }, { "epoch": 10.121268656716419, "grad_norm": 0.4253950447353692, "learning_rate": 2.8908076179216715e-05, "loss": 0.131, "step": 21700 }, { "epoch": 10.123600746268657, "grad_norm": 0.4395314013540467, "learning_rate": 2.8899416991646354e-05, "loss": 0.1323, "step": 21705 }, { "epoch": 10.125932835820896, "grad_norm": 0.4160314740338468, "learning_rate": 2.88907575959961e-05, "loss": 0.1307, "step": 21710 }, { "epoch": 10.128264925373134, "grad_norm": 0.4167444804358023, "learning_rate": 2.8882097993553504e-05, "loss": 0.1278, "step": 21715 }, { "epoch": 10.130597014925373, "grad_norm": 0.4545464244560826, "learning_rate": 2.8873438185606194e-05, "loss": 0.1276, "step": 21720 }, { "epoch": 10.132929104477611, "grad_norm": 0.4222413990472604, "learning_rate": 2.8864778173441775e-05, "loss": 0.1339, "step": 21725 }, { "epoch": 10.135261194029852, "grad_norm": 0.44438626739737264, "learning_rate": 2.8856117958347923e-05, "loss": 0.1298, "step": 21730 }, { "epoch": 10.13759328358209, "grad_norm": 0.4323401315166159, "learning_rate": 2.884745754161232e-05, "loss": 0.134, "step": 21735 }, { "epoch": 10.139925373134329, "grad_norm": 0.459358286685257, "learning_rate": 2.8838796924522694e-05, "loss": 0.1316, "step": 21740 }, { "epoch": 10.142257462686567, "grad_norm": 0.4491224720363105, "learning_rate": 2.883013610836679e-05, "loss": 0.1327, "step": 21745 }, { "epoch": 10.144589552238806, "grad_norm": 0.4404907529864997, "learning_rate": 2.8821475094432393e-05, "loss": 0.13, "step": 21750 }, { "epoch": 10.146921641791044, "grad_norm": 0.45690123306584335, "learning_rate": 2.8812813884007306e-05, "loss": 0.1344, "step": 21755 }, { "epoch": 10.149253731343283, "grad_norm": 0.42241919357142826, "learning_rate": 2.8804152478379377e-05, "loss": 0.1301, "step": 21760 }, { "epoch": 10.151585820895523, "grad_norm": 0.4368148250333678, "learning_rate": 2.8795490878836468e-05, "loss": 0.128, "step": 21765 }, { "epoch": 10.153917910447761, "grad_norm": 0.43316453723323745, "learning_rate": 2.8786829086666483e-05, "loss": 0.1333, "step": 21770 }, { "epoch": 10.15625, "grad_norm": 0.44584127291537107, "learning_rate": 2.877816710315734e-05, "loss": 0.1298, "step": 21775 }, { "epoch": 10.158582089552239, "grad_norm": 0.43929113210594284, "learning_rate": 2.8769504929596986e-05, "loss": 0.1307, "step": 21780 }, { "epoch": 10.160914179104477, "grad_norm": 0.42311031886216544, "learning_rate": 2.876084256727342e-05, "loss": 0.1305, "step": 21785 }, { "epoch": 10.163246268656717, "grad_norm": 0.46929190796994397, "learning_rate": 2.8752180017474646e-05, "loss": 0.1314, "step": 21790 }, { "epoch": 10.165578358208956, "grad_norm": 0.450266810469857, "learning_rate": 2.8743517281488703e-05, "loss": 0.1304, "step": 21795 }, { "epoch": 10.167910447761194, "grad_norm": 0.4426691495551753, "learning_rate": 2.8734854360603646e-05, "loss": 0.133, "step": 21800 }, { "epoch": 10.170242537313433, "grad_norm": 0.44537089905177096, "learning_rate": 2.8726191256107582e-05, "loss": 0.1325, "step": 21805 }, { "epoch": 10.172574626865671, "grad_norm": 0.43771741666778335, "learning_rate": 2.8717527969288632e-05, "loss": 0.1316, "step": 21810 }, { "epoch": 10.17490671641791, "grad_norm": 0.429319378283208, "learning_rate": 2.870886450143493e-05, "loss": 0.1299, "step": 21815 }, { "epoch": 10.177238805970148, "grad_norm": 0.4278411952962587, "learning_rate": 2.870020085383466e-05, "loss": 0.1355, "step": 21820 }, { "epoch": 10.179570895522389, "grad_norm": 0.43843558986382286, "learning_rate": 2.8691537027776022e-05, "loss": 0.1332, "step": 21825 }, { "epoch": 10.181902985074627, "grad_norm": 0.43401189319106065, "learning_rate": 2.868287302454725e-05, "loss": 0.1317, "step": 21830 }, { "epoch": 10.184235074626866, "grad_norm": 0.44639106809814366, "learning_rate": 2.867420884543659e-05, "loss": 0.1313, "step": 21835 }, { "epoch": 10.186567164179104, "grad_norm": 0.42993323895216884, "learning_rate": 2.8665544491732315e-05, "loss": 0.137, "step": 21840 }, { "epoch": 10.188899253731343, "grad_norm": 0.4141727097827079, "learning_rate": 2.8656879964722753e-05, "loss": 0.134, "step": 21845 }, { "epoch": 10.191231343283581, "grad_norm": 0.4576859160687952, "learning_rate": 2.8648215265696227e-05, "loss": 0.1334, "step": 21850 }, { "epoch": 10.193563432835822, "grad_norm": 0.445970377885955, "learning_rate": 2.8639550395941085e-05, "loss": 0.1298, "step": 21855 }, { "epoch": 10.19589552238806, "grad_norm": 0.45957160807108216, "learning_rate": 2.8630885356745716e-05, "loss": 0.1331, "step": 21860 }, { "epoch": 10.198227611940299, "grad_norm": 0.44808483401343785, "learning_rate": 2.8622220149398533e-05, "loss": 0.1344, "step": 21865 }, { "epoch": 10.200559701492537, "grad_norm": 0.4420910238172746, "learning_rate": 2.8613554775187962e-05, "loss": 0.1327, "step": 21870 }, { "epoch": 10.202891791044776, "grad_norm": 0.45669347648659947, "learning_rate": 2.860488923540247e-05, "loss": 0.1284, "step": 21875 }, { "epoch": 10.205223880597014, "grad_norm": 0.4419255049599166, "learning_rate": 2.859622353133054e-05, "loss": 0.1356, "step": 21880 }, { "epoch": 10.207555970149254, "grad_norm": 0.44507699308255105, "learning_rate": 2.8587557664260662e-05, "loss": 0.132, "step": 21885 }, { "epoch": 10.209888059701493, "grad_norm": 0.8785843606613075, "learning_rate": 2.8578891635481387e-05, "loss": 0.1311, "step": 21890 }, { "epoch": 10.212220149253731, "grad_norm": 0.4394140585001209, "learning_rate": 2.857022544628126e-05, "loss": 0.1339, "step": 21895 }, { "epoch": 10.21455223880597, "grad_norm": 0.41664964495754325, "learning_rate": 2.8561559097948863e-05, "loss": 0.132, "step": 21900 }, { "epoch": 10.216884328358208, "grad_norm": 0.4382729792512176, "learning_rate": 2.8552892591772806e-05, "loss": 0.1331, "step": 21905 }, { "epoch": 10.219216417910447, "grad_norm": 0.4405092873252187, "learning_rate": 2.8544225929041697e-05, "loss": 0.136, "step": 21910 }, { "epoch": 10.221548507462687, "grad_norm": 0.43217159355589163, "learning_rate": 2.8535559111044206e-05, "loss": 0.1318, "step": 21915 }, { "epoch": 10.223880597014926, "grad_norm": 0.44392044636288674, "learning_rate": 2.852689213906899e-05, "loss": 0.1375, "step": 21920 }, { "epoch": 10.226212686567164, "grad_norm": 0.45139661880182863, "learning_rate": 2.851822501440476e-05, "loss": 0.1343, "step": 21925 }, { "epoch": 10.228544776119403, "grad_norm": 0.43566720259584973, "learning_rate": 2.850955773834022e-05, "loss": 0.1292, "step": 21930 }, { "epoch": 10.230876865671641, "grad_norm": 0.4427925130979913, "learning_rate": 2.850089031216412e-05, "loss": 0.1321, "step": 21935 }, { "epoch": 10.23320895522388, "grad_norm": 0.44246685345863024, "learning_rate": 2.849222273716522e-05, "loss": 0.14, "step": 21940 }, { "epoch": 10.23554104477612, "grad_norm": 0.4385994744098163, "learning_rate": 2.84835550146323e-05, "loss": 0.1342, "step": 21945 }, { "epoch": 10.237873134328359, "grad_norm": 0.42359866286904463, "learning_rate": 2.8474887145854183e-05, "loss": 0.1316, "step": 21950 }, { "epoch": 10.240205223880597, "grad_norm": 0.4471850074608068, "learning_rate": 2.8466219132119688e-05, "loss": 0.1313, "step": 21955 }, { "epoch": 10.242537313432836, "grad_norm": 0.4728306442765106, "learning_rate": 2.8457550974717655e-05, "loss": 0.1367, "step": 21960 }, { "epoch": 10.244869402985074, "grad_norm": 0.4418621331107783, "learning_rate": 2.8448882674936973e-05, "loss": 0.132, "step": 21965 }, { "epoch": 10.247201492537313, "grad_norm": 0.4424439806250325, "learning_rate": 2.8440214234066524e-05, "loss": 0.1344, "step": 21970 }, { "epoch": 10.249533582089553, "grad_norm": 0.4269709117147282, "learning_rate": 2.8431545653395236e-05, "loss": 0.1338, "step": 21975 }, { "epoch": 10.251865671641792, "grad_norm": 0.4804964916122285, "learning_rate": 2.8422876934212027e-05, "loss": 0.1403, "step": 21980 }, { "epoch": 10.25419776119403, "grad_norm": 0.45189455586552085, "learning_rate": 2.841420807780586e-05, "loss": 0.1342, "step": 21985 }, { "epoch": 10.256529850746269, "grad_norm": 0.449429131308178, "learning_rate": 2.8405539085465717e-05, "loss": 0.1299, "step": 21990 }, { "epoch": 10.258861940298507, "grad_norm": 0.47153384089042893, "learning_rate": 2.8396869958480587e-05, "loss": 0.1385, "step": 21995 }, { "epoch": 10.261194029850746, "grad_norm": 0.43613072170451495, "learning_rate": 2.8388200698139484e-05, "loss": 0.1339, "step": 22000 }, { "epoch": 10.263526119402986, "grad_norm": 0.4489105120004847, "learning_rate": 2.837953130573145e-05, "loss": 0.1387, "step": 22005 }, { "epoch": 10.265858208955224, "grad_norm": 0.45047199773737706, "learning_rate": 2.8370861782545537e-05, "loss": 0.1413, "step": 22010 }, { "epoch": 10.268190298507463, "grad_norm": 0.43628335957112996, "learning_rate": 2.8362192129870817e-05, "loss": 0.1375, "step": 22015 }, { "epoch": 10.270522388059701, "grad_norm": 0.42949669801679025, "learning_rate": 2.8353522348996388e-05, "loss": 0.1377, "step": 22020 }, { "epoch": 10.27285447761194, "grad_norm": 0.4610487164365232, "learning_rate": 2.8344852441211367e-05, "loss": 0.1342, "step": 22025 }, { "epoch": 10.275186567164178, "grad_norm": 0.4525016968474113, "learning_rate": 2.8336182407804886e-05, "loss": 0.1342, "step": 22030 }, { "epoch": 10.277518656716419, "grad_norm": 0.4386996631873114, "learning_rate": 2.8327512250066083e-05, "loss": 0.1347, "step": 22035 }, { "epoch": 10.279850746268657, "grad_norm": 0.4362750020805601, "learning_rate": 2.8318841969284145e-05, "loss": 0.1386, "step": 22040 }, { "epoch": 10.282182835820896, "grad_norm": 0.45386295626300316, "learning_rate": 2.8310171566748243e-05, "loss": 0.1347, "step": 22045 }, { "epoch": 10.284514925373134, "grad_norm": 0.44533006195802105, "learning_rate": 2.8301501043747608e-05, "loss": 0.1344, "step": 22050 }, { "epoch": 10.286847014925373, "grad_norm": 0.43828846452792275, "learning_rate": 2.829283040157143e-05, "loss": 0.1336, "step": 22055 }, { "epoch": 10.289179104477611, "grad_norm": 0.4535401781295859, "learning_rate": 2.8284159641508972e-05, "loss": 0.1354, "step": 22060 }, { "epoch": 10.291511194029852, "grad_norm": 0.4379590403449937, "learning_rate": 2.827548876484949e-05, "loss": 0.1356, "step": 22065 }, { "epoch": 10.29384328358209, "grad_norm": 0.4487765994477272, "learning_rate": 2.826681777288226e-05, "loss": 0.1349, "step": 22070 }, { "epoch": 10.296175373134329, "grad_norm": 0.44831411973204977, "learning_rate": 2.825814666689658e-05, "loss": 0.1382, "step": 22075 }, { "epoch": 10.298507462686567, "grad_norm": 0.4406328559812715, "learning_rate": 2.824947544818175e-05, "loss": 0.1376, "step": 22080 }, { "epoch": 10.300839552238806, "grad_norm": 0.4661002603382822, "learning_rate": 2.8240804118027092e-05, "loss": 0.1349, "step": 22085 }, { "epoch": 10.303171641791044, "grad_norm": 0.422208543933137, "learning_rate": 2.8232132677721972e-05, "loss": 0.1366, "step": 22090 }, { "epoch": 10.305503731343283, "grad_norm": 0.451425115969559, "learning_rate": 2.8223461128555727e-05, "loss": 0.1361, "step": 22095 }, { "epoch": 10.307835820895523, "grad_norm": 0.42847258225166474, "learning_rate": 2.8214789471817754e-05, "loss": 0.1361, "step": 22100 }, { "epoch": 10.310167910447761, "grad_norm": 0.4364141112359053, "learning_rate": 2.8206117708797432e-05, "loss": 0.1333, "step": 22105 }, { "epoch": 10.3125, "grad_norm": 0.45632347658536726, "learning_rate": 2.819744584078417e-05, "loss": 0.1366, "step": 22110 }, { "epoch": 10.314832089552239, "grad_norm": 0.43763497457854805, "learning_rate": 2.818877386906739e-05, "loss": 0.138, "step": 22115 }, { "epoch": 10.317164179104477, "grad_norm": 0.4230442172769159, "learning_rate": 2.8180101794936542e-05, "loss": 0.1365, "step": 22120 }, { "epoch": 10.319496268656717, "grad_norm": 0.43941826040154885, "learning_rate": 2.8171429619681073e-05, "loss": 0.1336, "step": 22125 }, { "epoch": 10.321828358208956, "grad_norm": 0.4420923447505594, "learning_rate": 2.8162757344590445e-05, "loss": 0.1348, "step": 22130 }, { "epoch": 10.324160447761194, "grad_norm": 0.4297975449923181, "learning_rate": 2.815408497095416e-05, "loss": 0.1378, "step": 22135 }, { "epoch": 10.326492537313433, "grad_norm": 0.46113850449614874, "learning_rate": 2.8145412500061702e-05, "loss": 0.135, "step": 22140 }, { "epoch": 10.328824626865671, "grad_norm": 0.4553498832157878, "learning_rate": 2.813673993320259e-05, "loss": 0.142, "step": 22145 }, { "epoch": 10.33115671641791, "grad_norm": 0.4493750050741348, "learning_rate": 2.812806727166635e-05, "loss": 0.1359, "step": 22150 }, { "epoch": 10.333488805970148, "grad_norm": 0.44128820246413875, "learning_rate": 2.811939451674252e-05, "loss": 0.1354, "step": 22155 }, { "epoch": 10.335820895522389, "grad_norm": 0.4625301721500628, "learning_rate": 2.8110721669720663e-05, "loss": 0.1394, "step": 22160 }, { "epoch": 10.338152985074627, "grad_norm": 0.44846072153289124, "learning_rate": 2.8102048731890345e-05, "loss": 0.1396, "step": 22165 }, { "epoch": 10.340485074626866, "grad_norm": 0.4618437460466973, "learning_rate": 2.8093375704541158e-05, "loss": 0.1401, "step": 22170 }, { "epoch": 10.342817164179104, "grad_norm": 0.4388797654055099, "learning_rate": 2.808470258896268e-05, "loss": 0.138, "step": 22175 }, { "epoch": 10.345149253731343, "grad_norm": 0.4376440197934354, "learning_rate": 2.8076029386444524e-05, "loss": 0.141, "step": 22180 }, { "epoch": 10.347481343283581, "grad_norm": 0.4231107299562372, "learning_rate": 2.806735609827633e-05, "loss": 0.1393, "step": 22185 }, { "epoch": 10.349813432835822, "grad_norm": 0.475700332320199, "learning_rate": 2.805868272574771e-05, "loss": 0.1352, "step": 22190 }, { "epoch": 10.35214552238806, "grad_norm": 0.44269038050464266, "learning_rate": 2.8050009270148326e-05, "loss": 0.1353, "step": 22195 }, { "epoch": 10.354477611940299, "grad_norm": 0.43187845457093654, "learning_rate": 2.804133573276783e-05, "loss": 0.1356, "step": 22200 }, { "epoch": 10.356809701492537, "grad_norm": 0.45476380932527993, "learning_rate": 2.803266211489591e-05, "loss": 0.1393, "step": 22205 }, { "epoch": 10.359141791044776, "grad_norm": 0.4435249925608204, "learning_rate": 2.8023988417822222e-05, "loss": 0.1446, "step": 22210 }, { "epoch": 10.361473880597014, "grad_norm": 0.44907644341268377, "learning_rate": 2.8015314642836476e-05, "loss": 0.1411, "step": 22215 }, { "epoch": 10.363805970149254, "grad_norm": 0.47480173732729863, "learning_rate": 2.800664079122839e-05, "loss": 0.1411, "step": 22220 }, { "epoch": 10.366138059701493, "grad_norm": 0.47836268243948565, "learning_rate": 2.799796686428766e-05, "loss": 0.1393, "step": 22225 }, { "epoch": 10.368470149253731, "grad_norm": 0.4541405176996518, "learning_rate": 2.7989292863304045e-05, "loss": 0.1379, "step": 22230 }, { "epoch": 10.37080223880597, "grad_norm": 0.4365258207121139, "learning_rate": 2.7980618789567258e-05, "loss": 0.1365, "step": 22235 }, { "epoch": 10.373134328358208, "grad_norm": 0.4359098426310621, "learning_rate": 2.7971944644367066e-05, "loss": 0.1365, "step": 22240 }, { "epoch": 10.375466417910447, "grad_norm": 0.4523888441313891, "learning_rate": 2.796327042899322e-05, "loss": 0.1375, "step": 22245 }, { "epoch": 10.377798507462687, "grad_norm": 0.4451877345267988, "learning_rate": 2.7954596144735512e-05, "loss": 0.1394, "step": 22250 }, { "epoch": 10.380130597014926, "grad_norm": 0.43262808180030976, "learning_rate": 2.7945921792883707e-05, "loss": 0.1336, "step": 22255 }, { "epoch": 10.382462686567164, "grad_norm": 0.43819879348398993, "learning_rate": 2.79372473747276e-05, "loss": 0.1374, "step": 22260 }, { "epoch": 10.384794776119403, "grad_norm": 0.45072886871299706, "learning_rate": 2.7928572891557003e-05, "loss": 0.1388, "step": 22265 }, { "epoch": 10.387126865671641, "grad_norm": 0.45876150989012054, "learning_rate": 2.7919898344661723e-05, "loss": 0.1422, "step": 22270 }, { "epoch": 10.38945895522388, "grad_norm": 0.452259856752294, "learning_rate": 2.791122373533157e-05, "loss": 0.1401, "step": 22275 }, { "epoch": 10.39179104477612, "grad_norm": 0.4328329970872166, "learning_rate": 2.7902549064856405e-05, "loss": 0.139, "step": 22280 }, { "epoch": 10.394123134328359, "grad_norm": 0.4591567706025057, "learning_rate": 2.7893874334526043e-05, "loss": 0.1377, "step": 22285 }, { "epoch": 10.396455223880597, "grad_norm": 0.4412468196774066, "learning_rate": 2.7885199545630343e-05, "loss": 0.1401, "step": 22290 }, { "epoch": 10.398787313432836, "grad_norm": 0.4767440421121956, "learning_rate": 2.7876524699459163e-05, "loss": 0.1394, "step": 22295 }, { "epoch": 10.401119402985074, "grad_norm": 0.4373085478390444, "learning_rate": 2.7867849797302357e-05, "loss": 0.1371, "step": 22300 }, { "epoch": 10.403451492537313, "grad_norm": 0.4446689597392482, "learning_rate": 2.7859174840449826e-05, "loss": 0.1389, "step": 22305 }, { "epoch": 10.405783582089553, "grad_norm": 0.4464939898421133, "learning_rate": 2.785049983019143e-05, "loss": 0.1401, "step": 22310 }, { "epoch": 10.408115671641792, "grad_norm": 0.44100141572165497, "learning_rate": 2.7841824767817065e-05, "loss": 0.1389, "step": 22315 }, { "epoch": 10.41044776119403, "grad_norm": 0.4643240842356437, "learning_rate": 2.7833149654616637e-05, "loss": 0.137, "step": 22320 }, { "epoch": 10.412779850746269, "grad_norm": 0.4560500298023937, "learning_rate": 2.782447449188004e-05, "loss": 0.1418, "step": 22325 }, { "epoch": 10.415111940298507, "grad_norm": 0.45458754761934766, "learning_rate": 2.7815799280897202e-05, "loss": 0.1393, "step": 22330 }, { "epoch": 10.417444029850746, "grad_norm": 0.4287761180265848, "learning_rate": 2.780712402295803e-05, "loss": 0.1361, "step": 22335 }, { "epoch": 10.419776119402986, "grad_norm": 0.4625716235505138, "learning_rate": 2.7798448719352467e-05, "loss": 0.1384, "step": 22340 }, { "epoch": 10.422108208955224, "grad_norm": 0.4512844516222381, "learning_rate": 2.778977337137044e-05, "loss": 0.1398, "step": 22345 }, { "epoch": 10.424440298507463, "grad_norm": 0.44315474550047923, "learning_rate": 2.7781097980301878e-05, "loss": 0.1453, "step": 22350 }, { "epoch": 10.426772388059701, "grad_norm": 0.4505086278979528, "learning_rate": 2.777242254743675e-05, "loss": 0.1446, "step": 22355 }, { "epoch": 10.42910447761194, "grad_norm": 0.4510756994945215, "learning_rate": 2.7763747074065e-05, "loss": 0.139, "step": 22360 }, { "epoch": 10.431436567164178, "grad_norm": 0.4617129941822893, "learning_rate": 2.775507156147658e-05, "loss": 0.1423, "step": 22365 }, { "epoch": 10.433768656716419, "grad_norm": 0.458262508373887, "learning_rate": 2.7746396010961462e-05, "loss": 0.1406, "step": 22370 }, { "epoch": 10.436100746268657, "grad_norm": 0.4649843303709964, "learning_rate": 2.773772042380962e-05, "loss": 0.1379, "step": 22375 }, { "epoch": 10.438432835820896, "grad_norm": 0.45356101070907245, "learning_rate": 2.7729044801311032e-05, "loss": 0.1386, "step": 22380 }, { "epoch": 10.440764925373134, "grad_norm": 0.4425124912558172, "learning_rate": 2.772036914475567e-05, "loss": 0.1437, "step": 22385 }, { "epoch": 10.443097014925373, "grad_norm": 0.47346950243228664, "learning_rate": 2.7711693455433534e-05, "loss": 0.1425, "step": 22390 }, { "epoch": 10.445429104477611, "grad_norm": 0.4442206444439273, "learning_rate": 2.7703017734634608e-05, "loss": 0.1437, "step": 22395 }, { "epoch": 10.447761194029852, "grad_norm": 0.44733711015542804, "learning_rate": 2.7694341983648884e-05, "loss": 0.1405, "step": 22400 }, { "epoch": 10.45009328358209, "grad_norm": 0.45721532232849954, "learning_rate": 2.768566620376638e-05, "loss": 0.142, "step": 22405 }, { "epoch": 10.452425373134329, "grad_norm": 0.4464869638800727, "learning_rate": 2.7676990396277085e-05, "loss": 0.1422, "step": 22410 }, { "epoch": 10.454757462686567, "grad_norm": 0.4731833776497934, "learning_rate": 2.7668314562471008e-05, "loss": 0.14, "step": 22415 }, { "epoch": 10.457089552238806, "grad_norm": 0.44660591741268796, "learning_rate": 2.7659638703638173e-05, "loss": 0.1371, "step": 22420 }, { "epoch": 10.459421641791044, "grad_norm": 0.44325749214387283, "learning_rate": 2.7650962821068595e-05, "loss": 0.1405, "step": 22425 }, { "epoch": 10.461753731343283, "grad_norm": 0.4309776301061395, "learning_rate": 2.764228691605229e-05, "loss": 0.1383, "step": 22430 }, { "epoch": 10.464085820895523, "grad_norm": 0.4561914003573803, "learning_rate": 2.7633610989879284e-05, "loss": 0.1464, "step": 22435 }, { "epoch": 10.466417910447761, "grad_norm": 0.4636196718424069, "learning_rate": 2.76249350438396e-05, "loss": 0.1382, "step": 22440 }, { "epoch": 10.46875, "grad_norm": 0.45054523562956444, "learning_rate": 2.761625907922328e-05, "loss": 0.1382, "step": 22445 }, { "epoch": 10.471082089552239, "grad_norm": 0.4559859069645858, "learning_rate": 2.7607583097320345e-05, "loss": 0.1422, "step": 22450 }, { "epoch": 10.473414179104477, "grad_norm": 0.4578214618452632, "learning_rate": 2.7598907099420835e-05, "loss": 0.1448, "step": 22455 }, { "epoch": 10.475746268656717, "grad_norm": 0.45611383419418206, "learning_rate": 2.7590231086814782e-05, "loss": 0.1416, "step": 22460 }, { "epoch": 10.478078358208956, "grad_norm": 0.44135221283565496, "learning_rate": 2.758155506079223e-05, "loss": 0.1414, "step": 22465 }, { "epoch": 10.480410447761194, "grad_norm": 0.45790300408660456, "learning_rate": 2.7572879022643228e-05, "loss": 0.1408, "step": 22470 }, { "epoch": 10.482742537313433, "grad_norm": 0.42912857056673526, "learning_rate": 2.7564202973657815e-05, "loss": 0.1385, "step": 22475 }, { "epoch": 10.485074626865671, "grad_norm": 0.4428718059076422, "learning_rate": 2.7555526915126033e-05, "loss": 0.1435, "step": 22480 }, { "epoch": 10.48740671641791, "grad_norm": 0.43258900700864084, "learning_rate": 2.754685084833793e-05, "loss": 0.1356, "step": 22485 }, { "epoch": 10.489738805970148, "grad_norm": 0.47123483491533913, "learning_rate": 2.7538174774583552e-05, "loss": 0.1432, "step": 22490 }, { "epoch": 10.492070895522389, "grad_norm": 0.4626166348392573, "learning_rate": 2.752949869515295e-05, "loss": 0.1426, "step": 22495 }, { "epoch": 10.494402985074627, "grad_norm": 0.46408540711486096, "learning_rate": 2.7520822611336176e-05, "loss": 0.1406, "step": 22500 }, { "epoch": 10.496735074626866, "grad_norm": 0.46711222871390723, "learning_rate": 2.7512146524423288e-05, "loss": 0.1389, "step": 22505 }, { "epoch": 10.499067164179104, "grad_norm": 0.4294303757380223, "learning_rate": 2.7503470435704322e-05, "loss": 0.1336, "step": 22510 }, { "epoch": 10.501399253731343, "grad_norm": 0.4580239754692455, "learning_rate": 2.7494794346469327e-05, "loss": 0.1444, "step": 22515 }, { "epoch": 10.503731343283581, "grad_norm": 0.45190062951224363, "learning_rate": 2.7486118258008374e-05, "loss": 0.1415, "step": 22520 }, { "epoch": 10.506063432835822, "grad_norm": 0.46696100579420546, "learning_rate": 2.74774421716115e-05, "loss": 0.1443, "step": 22525 }, { "epoch": 10.50839552238806, "grad_norm": 0.4675528897714873, "learning_rate": 2.746876608856876e-05, "loss": 0.1436, "step": 22530 }, { "epoch": 10.510727611940299, "grad_norm": 0.44329539664122786, "learning_rate": 2.7460090010170197e-05, "loss": 0.1435, "step": 22535 }, { "epoch": 10.513059701492537, "grad_norm": 0.4534014499232469, "learning_rate": 2.7451413937705878e-05, "loss": 0.1434, "step": 22540 }, { "epoch": 10.515391791044776, "grad_norm": 0.4324747222210287, "learning_rate": 2.7442737872465835e-05, "loss": 0.1428, "step": 22545 }, { "epoch": 10.517723880597014, "grad_norm": 0.4259701885552462, "learning_rate": 2.743406181574012e-05, "loss": 0.1453, "step": 22550 }, { "epoch": 10.520055970149254, "grad_norm": 0.46151069531104216, "learning_rate": 2.7425385768818784e-05, "loss": 0.1423, "step": 22555 }, { "epoch": 10.522388059701493, "grad_norm": 0.4686283424189893, "learning_rate": 2.7416709732991863e-05, "loss": 0.1402, "step": 22560 }, { "epoch": 10.524720149253731, "grad_norm": 0.44061837754880373, "learning_rate": 2.7408033709549413e-05, "loss": 0.1416, "step": 22565 }, { "epoch": 10.52705223880597, "grad_norm": 0.4813126791192727, "learning_rate": 2.7399357699781477e-05, "loss": 0.1485, "step": 22570 }, { "epoch": 10.529384328358208, "grad_norm": 0.46035720328778107, "learning_rate": 2.739068170497807e-05, "loss": 0.1426, "step": 22575 }, { "epoch": 10.531716417910447, "grad_norm": 0.43300833789326254, "learning_rate": 2.7382005726429256e-05, "loss": 0.1383, "step": 22580 }, { "epoch": 10.534048507462687, "grad_norm": 0.429286900215883, "learning_rate": 2.7373329765425053e-05, "loss": 0.1416, "step": 22585 }, { "epoch": 10.536380597014926, "grad_norm": 0.4618959867106397, "learning_rate": 2.736465382325551e-05, "loss": 0.1427, "step": 22590 }, { "epoch": 10.538712686567164, "grad_norm": 0.44782187552542735, "learning_rate": 2.7355977901210646e-05, "loss": 0.144, "step": 22595 }, { "epoch": 10.541044776119403, "grad_norm": 0.45336658104715694, "learning_rate": 2.7347302000580475e-05, "loss": 0.1464, "step": 22600 }, { "epoch": 10.543376865671641, "grad_norm": 0.4634718861104936, "learning_rate": 2.7338626122655047e-05, "loss": 0.1391, "step": 22605 }, { "epoch": 10.54570895522388, "grad_norm": 0.4320248548054759, "learning_rate": 2.7329950268724358e-05, "loss": 0.142, "step": 22610 }, { "epoch": 10.54804104477612, "grad_norm": 0.4557076628010814, "learning_rate": 2.7321274440078442e-05, "loss": 0.1447, "step": 22615 }, { "epoch": 10.550373134328359, "grad_norm": 0.44195741870526517, "learning_rate": 2.7312598638007308e-05, "loss": 0.1412, "step": 22620 }, { "epoch": 10.552705223880597, "grad_norm": 0.47814796953028044, "learning_rate": 2.7303922863800952e-05, "loss": 0.1406, "step": 22625 }, { "epoch": 10.555037313432836, "grad_norm": 0.45424131563668646, "learning_rate": 2.7295247118749395e-05, "loss": 0.1442, "step": 22630 }, { "epoch": 10.557369402985074, "grad_norm": 0.46379984281838105, "learning_rate": 2.728657140414262e-05, "loss": 0.1442, "step": 22635 }, { "epoch": 10.559701492537313, "grad_norm": 0.44453917360276624, "learning_rate": 2.727789572127064e-05, "loss": 0.1441, "step": 22640 }, { "epoch": 10.562033582089553, "grad_norm": 0.45390612748577863, "learning_rate": 2.726922007142344e-05, "loss": 0.1445, "step": 22645 }, { "epoch": 10.564365671641792, "grad_norm": 0.468057399676657, "learning_rate": 2.7260544455890996e-05, "loss": 0.1459, "step": 22650 }, { "epoch": 10.56669776119403, "grad_norm": 0.4553426604851544, "learning_rate": 2.7251868875963312e-05, "loss": 0.1445, "step": 22655 }, { "epoch": 10.569029850746269, "grad_norm": 0.46210869273241256, "learning_rate": 2.724319333293033e-05, "loss": 0.1439, "step": 22660 }, { "epoch": 10.571361940298507, "grad_norm": 0.46111250308943874, "learning_rate": 2.723451782808205e-05, "loss": 0.1439, "step": 22665 }, { "epoch": 10.573694029850746, "grad_norm": 0.4603755520525431, "learning_rate": 2.7225842362708427e-05, "loss": 0.1462, "step": 22670 }, { "epoch": 10.576026119402986, "grad_norm": 0.47186827416669525, "learning_rate": 2.7217166938099402e-05, "loss": 0.1425, "step": 22675 }, { "epoch": 10.578358208955224, "grad_norm": 0.4282022186179048, "learning_rate": 2.7208491555544964e-05, "loss": 0.1415, "step": 22680 }, { "epoch": 10.580690298507463, "grad_norm": 0.4364392645611192, "learning_rate": 2.7199816216335024e-05, "loss": 0.1437, "step": 22685 }, { "epoch": 10.583022388059701, "grad_norm": 0.4438448597156454, "learning_rate": 2.7191140921759546e-05, "loss": 0.1415, "step": 22690 }, { "epoch": 10.58535447761194, "grad_norm": 0.45285627920771193, "learning_rate": 2.7182465673108454e-05, "loss": 0.1447, "step": 22695 }, { "epoch": 10.587686567164178, "grad_norm": 0.4777924569355204, "learning_rate": 2.7173790471671662e-05, "loss": 0.1463, "step": 22700 }, { "epoch": 10.590018656716419, "grad_norm": 0.472878579459652, "learning_rate": 2.7165115318739114e-05, "loss": 0.1428, "step": 22705 }, { "epoch": 10.592350746268657, "grad_norm": 0.45118883915444685, "learning_rate": 2.7156440215600703e-05, "loss": 0.1451, "step": 22710 }, { "epoch": 10.594682835820896, "grad_norm": 0.45926889881287764, "learning_rate": 2.7147765163546347e-05, "loss": 0.1464, "step": 22715 }, { "epoch": 10.597014925373134, "grad_norm": 0.4754309913616897, "learning_rate": 2.7139090163865932e-05, "loss": 0.1486, "step": 22720 }, { "epoch": 10.599347014925373, "grad_norm": 0.4504294813596372, "learning_rate": 2.7130415217849354e-05, "loss": 0.1454, "step": 22725 }, { "epoch": 10.601679104477611, "grad_norm": 0.4500231692775403, "learning_rate": 2.712174032678648e-05, "loss": 0.1444, "step": 22730 }, { "epoch": 10.604011194029852, "grad_norm": 0.47410699793835187, "learning_rate": 2.711306549196721e-05, "loss": 0.145, "step": 22735 }, { "epoch": 10.60634328358209, "grad_norm": 0.4421035850025401, "learning_rate": 2.7104390714681393e-05, "loss": 0.1426, "step": 22740 }, { "epoch": 10.608675373134329, "grad_norm": 0.4543617295468848, "learning_rate": 2.7095715996218875e-05, "loss": 0.1411, "step": 22745 }, { "epoch": 10.611007462686567, "grad_norm": 0.4432137958650212, "learning_rate": 2.7087041337869522e-05, "loss": 0.1489, "step": 22750 }, { "epoch": 10.613339552238806, "grad_norm": 0.47083869337734635, "learning_rate": 2.7078366740923173e-05, "loss": 0.1443, "step": 22755 }, { "epoch": 10.615671641791044, "grad_norm": 0.4444496641605444, "learning_rate": 2.7069692206669633e-05, "loss": 0.1421, "step": 22760 }, { "epoch": 10.618003731343283, "grad_norm": 0.45123981864140905, "learning_rate": 2.7061017736398752e-05, "loss": 0.1506, "step": 22765 }, { "epoch": 10.620335820895523, "grad_norm": 0.484489140093528, "learning_rate": 2.7052343331400322e-05, "loss": 0.1493, "step": 22770 }, { "epoch": 10.622667910447761, "grad_norm": 0.4495267783257183, "learning_rate": 2.7043668992964157e-05, "loss": 0.1419, "step": 22775 }, { "epoch": 10.625, "grad_norm": 0.4529901875609145, "learning_rate": 2.7034994722380036e-05, "loss": 0.1397, "step": 22780 }, { "epoch": 10.627332089552239, "grad_norm": 0.44672180508347054, "learning_rate": 2.7026320520937738e-05, "loss": 0.1458, "step": 22785 }, { "epoch": 10.629664179104477, "grad_norm": 0.47409511574886104, "learning_rate": 2.701764638992705e-05, "loss": 0.1466, "step": 22790 }, { "epoch": 10.631996268656717, "grad_norm": 0.468536948216095, "learning_rate": 2.700897233063771e-05, "loss": 0.1417, "step": 22795 }, { "epoch": 10.634328358208956, "grad_norm": 0.4643357869300533, "learning_rate": 2.7000298344359494e-05, "loss": 0.1454, "step": 22800 }, { "epoch": 10.636660447761194, "grad_norm": 0.45690823202025954, "learning_rate": 2.6991624432382123e-05, "loss": 0.1396, "step": 22805 }, { "epoch": 10.638992537313433, "grad_norm": 0.46211021342946385, "learning_rate": 2.6982950595995315e-05, "loss": 0.1429, "step": 22810 }, { "epoch": 10.641324626865671, "grad_norm": 0.46244903145343624, "learning_rate": 2.6974276836488815e-05, "loss": 0.1478, "step": 22815 }, { "epoch": 10.64365671641791, "grad_norm": 0.4666769105978808, "learning_rate": 2.6965603155152302e-05, "loss": 0.1417, "step": 22820 }, { "epoch": 10.645988805970148, "grad_norm": 0.4621957571818168, "learning_rate": 2.695692955327549e-05, "loss": 0.1474, "step": 22825 }, { "epoch": 10.648320895522389, "grad_norm": 0.45902603367060835, "learning_rate": 2.6948256032148052e-05, "loss": 0.1449, "step": 22830 }, { "epoch": 10.650652985074627, "grad_norm": 0.4571305514639993, "learning_rate": 2.6939582593059638e-05, "loss": 0.1484, "step": 22835 }, { "epoch": 10.652985074626866, "grad_norm": 0.4507233535611882, "learning_rate": 2.6930909237299934e-05, "loss": 0.145, "step": 22840 }, { "epoch": 10.655317164179104, "grad_norm": 0.45827908273586876, "learning_rate": 2.692223596615857e-05, "loss": 0.1482, "step": 22845 }, { "epoch": 10.657649253731343, "grad_norm": 0.46784189837140233, "learning_rate": 2.691356278092519e-05, "loss": 0.1416, "step": 22850 }, { "epoch": 10.659981343283581, "grad_norm": 0.4603563755743808, "learning_rate": 2.6904889682889404e-05, "loss": 0.1461, "step": 22855 }, { "epoch": 10.662313432835822, "grad_norm": 0.4690903473263815, "learning_rate": 2.6896216673340814e-05, "loss": 0.1464, "step": 22860 }, { "epoch": 10.66464552238806, "grad_norm": 0.44154542833379146, "learning_rate": 2.6887543753569022e-05, "loss": 0.1457, "step": 22865 }, { "epoch": 10.666977611940299, "grad_norm": 0.44669722319039656, "learning_rate": 2.687887092486361e-05, "loss": 0.1453, "step": 22870 }, { "epoch": 10.669309701492537, "grad_norm": 0.4230373702870995, "learning_rate": 2.6870198188514135e-05, "loss": 0.1424, "step": 22875 }, { "epoch": 10.671641791044776, "grad_norm": 0.4571207940863704, "learning_rate": 2.686152554581016e-05, "loss": 0.1419, "step": 22880 }, { "epoch": 10.673973880597014, "grad_norm": 0.45533603562899777, "learning_rate": 2.6852852998041216e-05, "loss": 0.1396, "step": 22885 }, { "epoch": 10.676305970149254, "grad_norm": 0.4371567446323711, "learning_rate": 2.6844180546496833e-05, "loss": 0.1449, "step": 22890 }, { "epoch": 10.678638059701493, "grad_norm": 0.45885062379689834, "learning_rate": 2.6835508192466512e-05, "loss": 0.1448, "step": 22895 }, { "epoch": 10.680970149253731, "grad_norm": 0.44616362425378364, "learning_rate": 2.682683593723977e-05, "loss": 0.1439, "step": 22900 }, { "epoch": 10.68330223880597, "grad_norm": 0.45082562093207024, "learning_rate": 2.6818163782106065e-05, "loss": 0.1526, "step": 22905 }, { "epoch": 10.685634328358208, "grad_norm": 0.47512958068717326, "learning_rate": 2.680949172835487e-05, "loss": 0.1485, "step": 22910 }, { "epoch": 10.687966417910447, "grad_norm": 0.4510057122507526, "learning_rate": 2.680081977727565e-05, "loss": 0.1445, "step": 22915 }, { "epoch": 10.690298507462687, "grad_norm": 0.46408858392681424, "learning_rate": 2.6792147930157812e-05, "loss": 0.1458, "step": 22920 }, { "epoch": 10.692630597014926, "grad_norm": 0.43630080669793553, "learning_rate": 2.6783476188290813e-05, "loss": 0.1418, "step": 22925 }, { "epoch": 10.694962686567164, "grad_norm": 0.45326689487204713, "learning_rate": 2.6774804552964034e-05, "loss": 0.1468, "step": 22930 }, { "epoch": 10.697294776119403, "grad_norm": 0.46258813960255585, "learning_rate": 2.676613302546686e-05, "loss": 0.1401, "step": 22935 }, { "epoch": 10.699626865671641, "grad_norm": 0.4583023415512064, "learning_rate": 2.6757461607088692e-05, "loss": 0.1486, "step": 22940 }, { "epoch": 10.70195895522388, "grad_norm": 0.4439915451465055, "learning_rate": 2.6748790299118846e-05, "loss": 0.1459, "step": 22945 }, { "epoch": 10.70429104477612, "grad_norm": 0.46507616280614134, "learning_rate": 2.6740119102846707e-05, "loss": 0.1455, "step": 22950 }, { "epoch": 10.706623134328359, "grad_norm": 0.4430431078437158, "learning_rate": 2.6731448019561566e-05, "loss": 0.1449, "step": 22955 }, { "epoch": 10.708955223880597, "grad_norm": 0.463025860835336, "learning_rate": 2.6722777050552737e-05, "loss": 0.1485, "step": 22960 }, { "epoch": 10.711287313432836, "grad_norm": 0.4494482421596344, "learning_rate": 2.6714106197109522e-05, "loss": 0.1437, "step": 22965 }, { "epoch": 10.713619402985074, "grad_norm": 0.45180706147925065, "learning_rate": 2.6705435460521177e-05, "loss": 0.1447, "step": 22970 }, { "epoch": 10.715951492537313, "grad_norm": 0.440766298334755, "learning_rate": 2.6696764842076978e-05, "loss": 0.1438, "step": 22975 }, { "epoch": 10.718283582089553, "grad_norm": 0.45073509542845114, "learning_rate": 2.668809434306615e-05, "loss": 0.1441, "step": 22980 }, { "epoch": 10.720615671641792, "grad_norm": 0.4519967370421372, "learning_rate": 2.66794239647779e-05, "loss": 0.1447, "step": 22985 }, { "epoch": 10.72294776119403, "grad_norm": 0.4523653584170996, "learning_rate": 2.6670753708501454e-05, "loss": 0.1427, "step": 22990 }, { "epoch": 10.725279850746269, "grad_norm": 0.45596328810268255, "learning_rate": 2.6662083575525986e-05, "loss": 0.1456, "step": 22995 }, { "epoch": 10.727611940298507, "grad_norm": 0.48196510893095224, "learning_rate": 2.6653413567140668e-05, "loss": 0.15, "step": 23000 }, { "epoch": 10.729944029850746, "grad_norm": 0.44627132181870954, "learning_rate": 2.6644743684634637e-05, "loss": 0.1447, "step": 23005 }, { "epoch": 10.732276119402986, "grad_norm": 0.46254303504922245, "learning_rate": 2.6636073929297018e-05, "loss": 0.145, "step": 23010 }, { "epoch": 10.734608208955224, "grad_norm": 0.46774968792185967, "learning_rate": 2.6627404302416935e-05, "loss": 0.1495, "step": 23015 }, { "epoch": 10.736940298507463, "grad_norm": 0.47506427794068784, "learning_rate": 2.661873480528347e-05, "loss": 0.1522, "step": 23020 }, { "epoch": 10.739272388059701, "grad_norm": 0.472685336554548, "learning_rate": 2.66100654391857e-05, "loss": 0.1513, "step": 23025 }, { "epoch": 10.74160447761194, "grad_norm": 0.45337260346396807, "learning_rate": 2.660139620541267e-05, "loss": 0.1488, "step": 23030 }, { "epoch": 10.743936567164178, "grad_norm": 0.4403124090891152, "learning_rate": 2.6592727105253413e-05, "loss": 0.1446, "step": 23035 }, { "epoch": 10.746268656716419, "grad_norm": 0.4553484760964155, "learning_rate": 2.6584058139996942e-05, "loss": 0.1468, "step": 23040 }, { "epoch": 10.748600746268657, "grad_norm": 0.43099972829053923, "learning_rate": 2.6575389310932252e-05, "loss": 0.1483, "step": 23045 }, { "epoch": 10.750932835820896, "grad_norm": 0.4562936899896617, "learning_rate": 2.656672061934831e-05, "loss": 0.143, "step": 23050 }, { "epoch": 10.753264925373134, "grad_norm": 0.4620583412940851, "learning_rate": 2.6558052066534077e-05, "loss": 0.1437, "step": 23055 }, { "epoch": 10.755597014925373, "grad_norm": 0.433909873636648, "learning_rate": 2.654938365377847e-05, "loss": 0.1404, "step": 23060 }, { "epoch": 10.757929104477611, "grad_norm": 0.45064523894956027, "learning_rate": 2.6540715382370407e-05, "loss": 0.1432, "step": 23065 }, { "epoch": 10.760261194029852, "grad_norm": 0.46417589221705496, "learning_rate": 2.6532047253598776e-05, "loss": 0.1493, "step": 23070 }, { "epoch": 10.76259328358209, "grad_norm": 0.47691645171967517, "learning_rate": 2.6523379268752448e-05, "loss": 0.1473, "step": 23075 }, { "epoch": 10.764925373134329, "grad_norm": 0.4697778712884835, "learning_rate": 2.651471142912026e-05, "loss": 0.1447, "step": 23080 }, { "epoch": 10.767257462686567, "grad_norm": 0.4548256786610921, "learning_rate": 2.6506043735991042e-05, "loss": 0.1452, "step": 23085 }, { "epoch": 10.769589552238806, "grad_norm": 0.47040479790439865, "learning_rate": 2.6497376190653607e-05, "loss": 0.1463, "step": 23090 }, { "epoch": 10.771921641791044, "grad_norm": 0.4675280567585455, "learning_rate": 2.6488708794396712e-05, "loss": 0.1456, "step": 23095 }, { "epoch": 10.774253731343283, "grad_norm": 0.4545511319971777, "learning_rate": 2.6480041548509137e-05, "loss": 0.1496, "step": 23100 }, { "epoch": 10.776585820895523, "grad_norm": 0.46198029671230517, "learning_rate": 2.6471374454279617e-05, "loss": 0.1482, "step": 23105 }, { "epoch": 10.778917910447761, "grad_norm": 0.4614512414549411, "learning_rate": 2.6462707512996847e-05, "loss": 0.1481, "step": 23110 }, { "epoch": 10.78125, "grad_norm": 0.4901276088594684, "learning_rate": 2.645404072594954e-05, "loss": 0.1473, "step": 23115 }, { "epoch": 10.783582089552239, "grad_norm": 0.4694062818002839, "learning_rate": 2.644537409442635e-05, "loss": 0.1492, "step": 23120 }, { "epoch": 10.785914179104477, "grad_norm": 0.46736915403873874, "learning_rate": 2.6436707619715935e-05, "loss": 0.1481, "step": 23125 }, { "epoch": 10.788246268656717, "grad_norm": 0.43399373438820865, "learning_rate": 2.642804130310691e-05, "loss": 0.1481, "step": 23130 }, { "epoch": 10.790578358208956, "grad_norm": 0.48864625928407807, "learning_rate": 2.641937514588786e-05, "loss": 0.1492, "step": 23135 }, { "epoch": 10.792910447761194, "grad_norm": 0.4532076431195653, "learning_rate": 2.6410709149347385e-05, "loss": 0.143, "step": 23140 }, { "epoch": 10.795242537313433, "grad_norm": 0.45915316869828093, "learning_rate": 2.640204331477401e-05, "loss": 0.1475, "step": 23145 }, { "epoch": 10.797574626865671, "grad_norm": 0.4339518196494666, "learning_rate": 2.6393377643456284e-05, "loss": 0.1438, "step": 23150 }, { "epoch": 10.79990671641791, "grad_norm": 0.46314141661062086, "learning_rate": 2.6384712136682706e-05, "loss": 0.1508, "step": 23155 }, { "epoch": 10.802238805970148, "grad_norm": 0.443137586226443, "learning_rate": 2.6376046795741733e-05, "loss": 0.144, "step": 23160 }, { "epoch": 10.804570895522389, "grad_norm": 0.47164549204459744, "learning_rate": 2.6367381621921845e-05, "loss": 0.1471, "step": 23165 }, { "epoch": 10.806902985074627, "grad_norm": 0.42642436810342377, "learning_rate": 2.6358716616511446e-05, "loss": 0.1443, "step": 23170 }, { "epoch": 10.809235074626866, "grad_norm": 0.4570666526549436, "learning_rate": 2.635005178079896e-05, "loss": 0.1421, "step": 23175 }, { "epoch": 10.811567164179104, "grad_norm": 0.46075756418357067, "learning_rate": 2.6341387116072763e-05, "loss": 0.1465, "step": 23180 }, { "epoch": 10.813899253731343, "grad_norm": 0.45512396966871654, "learning_rate": 2.633272262362118e-05, "loss": 0.1484, "step": 23185 }, { "epoch": 10.816231343283581, "grad_norm": 0.48100762139872116, "learning_rate": 2.6324058304732574e-05, "loss": 0.1506, "step": 23190 }, { "epoch": 10.818563432835822, "grad_norm": 0.4678949377017707, "learning_rate": 2.6315394160695227e-05, "loss": 0.1513, "step": 23195 }, { "epoch": 10.82089552238806, "grad_norm": 0.4504866999549045, "learning_rate": 2.630673019279742e-05, "loss": 0.1429, "step": 23200 }, { "epoch": 10.823227611940299, "grad_norm": 0.4587692656038596, "learning_rate": 2.6298066402327404e-05, "loss": 0.1444, "step": 23205 }, { "epoch": 10.825559701492537, "grad_norm": 0.4570393246017119, "learning_rate": 2.6289402790573392e-05, "loss": 0.1492, "step": 23210 }, { "epoch": 10.827891791044776, "grad_norm": 0.47008363161896866, "learning_rate": 2.6280739358823588e-05, "loss": 0.1479, "step": 23215 }, { "epoch": 10.830223880597014, "grad_norm": 0.4584006120072726, "learning_rate": 2.6272076108366163e-05, "loss": 0.1506, "step": 23220 }, { "epoch": 10.832555970149254, "grad_norm": 0.43834337612355556, "learning_rate": 2.6263413040489248e-05, "loss": 0.1481, "step": 23225 }, { "epoch": 10.834888059701493, "grad_norm": 0.4564663140872489, "learning_rate": 2.6254750156480973e-05, "loss": 0.1456, "step": 23230 }, { "epoch": 10.837220149253731, "grad_norm": 0.43943758003551114, "learning_rate": 2.6246087457629408e-05, "loss": 0.1497, "step": 23235 }, { "epoch": 10.83955223880597, "grad_norm": 0.46576761116548765, "learning_rate": 2.623742494522264e-05, "loss": 0.1454, "step": 23240 }, { "epoch": 10.841884328358208, "grad_norm": 0.44717384575826846, "learning_rate": 2.6228762620548675e-05, "loss": 0.1469, "step": 23245 }, { "epoch": 10.844216417910447, "grad_norm": 0.4729064147754114, "learning_rate": 2.6220100484895527e-05, "loss": 0.1435, "step": 23250 }, { "epoch": 10.846548507462687, "grad_norm": 0.4959141382128313, "learning_rate": 2.6211438539551187e-05, "loss": 0.1484, "step": 23255 }, { "epoch": 10.848880597014926, "grad_norm": 0.4660423510783084, "learning_rate": 2.620277678580358e-05, "loss": 0.1502, "step": 23260 }, { "epoch": 10.851212686567164, "grad_norm": 0.4760617797584299, "learning_rate": 2.619411522494063e-05, "loss": 0.1449, "step": 23265 }, { "epoch": 10.853544776119403, "grad_norm": 0.4402736285917476, "learning_rate": 2.6185453858250242e-05, "loss": 0.1482, "step": 23270 }, { "epoch": 10.855876865671641, "grad_norm": 0.45537645179657527, "learning_rate": 2.6176792687020273e-05, "loss": 0.148, "step": 23275 }, { "epoch": 10.85820895522388, "grad_norm": 0.47182056275228296, "learning_rate": 2.616813171253855e-05, "loss": 0.1487, "step": 23280 }, { "epoch": 10.86054104477612, "grad_norm": 0.4708349059034256, "learning_rate": 2.615947093609288e-05, "loss": 0.1478, "step": 23285 }, { "epoch": 10.862873134328359, "grad_norm": 0.4445952835000037, "learning_rate": 2.615081035897104e-05, "loss": 0.1469, "step": 23290 }, { "epoch": 10.865205223880597, "grad_norm": 0.4591258842509111, "learning_rate": 2.614214998246077e-05, "loss": 0.1496, "step": 23295 }, { "epoch": 10.867537313432836, "grad_norm": 0.45449202725727245, "learning_rate": 2.6133489807849786e-05, "loss": 0.1448, "step": 23300 }, { "epoch": 10.869869402985074, "grad_norm": 0.44933017285772553, "learning_rate": 2.6124829836425784e-05, "loss": 0.149, "step": 23305 }, { "epoch": 10.872201492537313, "grad_norm": 0.47606523011145374, "learning_rate": 2.6116170069476397e-05, "loss": 0.1511, "step": 23310 }, { "epoch": 10.874533582089553, "grad_norm": 0.46261016535162675, "learning_rate": 2.6107510508289274e-05, "loss": 0.1514, "step": 23315 }, { "epoch": 10.876865671641792, "grad_norm": 0.44364644990775237, "learning_rate": 2.609885115415198e-05, "loss": 0.1429, "step": 23320 }, { "epoch": 10.87919776119403, "grad_norm": 0.4382419680312567, "learning_rate": 2.6090192008352115e-05, "loss": 0.1505, "step": 23325 }, { "epoch": 10.881529850746269, "grad_norm": 0.439392952231555, "learning_rate": 2.6081533072177183e-05, "loss": 0.147, "step": 23330 }, { "epoch": 10.883861940298507, "grad_norm": 0.45728951240620286, "learning_rate": 2.6072874346914688e-05, "loss": 0.1515, "step": 23335 }, { "epoch": 10.886194029850746, "grad_norm": 0.44149944808239244, "learning_rate": 2.6064215833852113e-05, "loss": 0.1495, "step": 23340 }, { "epoch": 10.888526119402986, "grad_norm": 0.4696103510031108, "learning_rate": 2.605555753427687e-05, "loss": 0.1487, "step": 23345 }, { "epoch": 10.890858208955224, "grad_norm": 0.4439227425635809, "learning_rate": 2.6046899449476397e-05, "loss": 0.1431, "step": 23350 }, { "epoch": 10.893190298507463, "grad_norm": 0.46067054716198663, "learning_rate": 2.6038241580738053e-05, "loss": 0.1487, "step": 23355 }, { "epoch": 10.895522388059701, "grad_norm": 0.4644097166261758, "learning_rate": 2.602958392934917e-05, "loss": 0.152, "step": 23360 }, { "epoch": 10.89785447761194, "grad_norm": 0.44868052332056935, "learning_rate": 2.6020926496597082e-05, "loss": 0.1473, "step": 23365 }, { "epoch": 10.900186567164178, "grad_norm": 0.4628989790941363, "learning_rate": 2.601226928376904e-05, "loss": 0.1457, "step": 23370 }, { "epoch": 10.902518656716419, "grad_norm": 0.4567685111317407, "learning_rate": 2.600361229215231e-05, "loss": 0.1496, "step": 23375 }, { "epoch": 10.904850746268657, "grad_norm": 0.45051957173276536, "learning_rate": 2.5994955523034098e-05, "loss": 0.1515, "step": 23380 }, { "epoch": 10.907182835820896, "grad_norm": 0.46697765450766976, "learning_rate": 2.598629897770157e-05, "loss": 0.1537, "step": 23385 }, { "epoch": 10.909514925373134, "grad_norm": 0.46303802910587105, "learning_rate": 2.5977642657441893e-05, "loss": 0.1471, "step": 23390 }, { "epoch": 10.911847014925373, "grad_norm": 0.46570117019974766, "learning_rate": 2.5968986563542153e-05, "loss": 0.1535, "step": 23395 }, { "epoch": 10.914179104477611, "grad_norm": 0.46204257571127494, "learning_rate": 2.5960330697289447e-05, "loss": 0.1458, "step": 23400 }, { "epoch": 10.916511194029852, "grad_norm": 0.43824706960247084, "learning_rate": 2.595167505997082e-05, "loss": 0.149, "step": 23405 }, { "epoch": 10.91884328358209, "grad_norm": 0.4648210423045219, "learning_rate": 2.5943019652873267e-05, "loss": 0.1496, "step": 23410 }, { "epoch": 10.921175373134329, "grad_norm": 0.4323096577202562, "learning_rate": 2.5934364477283786e-05, "loss": 0.1435, "step": 23415 }, { "epoch": 10.923507462686567, "grad_norm": 0.44543083597468514, "learning_rate": 2.5925709534489295e-05, "loss": 0.1508, "step": 23420 }, { "epoch": 10.925839552238806, "grad_norm": 0.46147281345238417, "learning_rate": 2.5917054825776717e-05, "loss": 0.1486, "step": 23425 }, { "epoch": 10.928171641791044, "grad_norm": 0.47489942373028443, "learning_rate": 2.5908400352432927e-05, "loss": 0.1445, "step": 23430 }, { "epoch": 10.930503731343283, "grad_norm": 0.45604098037099655, "learning_rate": 2.5899746115744743e-05, "loss": 0.1481, "step": 23435 }, { "epoch": 10.932835820895523, "grad_norm": 0.44500079567973616, "learning_rate": 2.589109211699899e-05, "loss": 0.1493, "step": 23440 }, { "epoch": 10.935167910447761, "grad_norm": 0.49084213622022244, "learning_rate": 2.5882438357482414e-05, "loss": 0.1456, "step": 23445 }, { "epoch": 10.9375, "grad_norm": 0.48524560160909236, "learning_rate": 2.5873784838481762e-05, "loss": 0.1485, "step": 23450 }, { "epoch": 10.939832089552239, "grad_norm": 0.4454741950878139, "learning_rate": 2.5865131561283718e-05, "loss": 0.1486, "step": 23455 }, { "epoch": 10.942164179104477, "grad_norm": 0.48378973400116865, "learning_rate": 2.5856478527174955e-05, "loss": 0.1516, "step": 23460 }, { "epoch": 10.944496268656717, "grad_norm": 0.45887642202566836, "learning_rate": 2.5847825737442088e-05, "loss": 0.1505, "step": 23465 }, { "epoch": 10.946828358208956, "grad_norm": 0.4530277366555616, "learning_rate": 2.5839173193371697e-05, "loss": 0.1479, "step": 23470 }, { "epoch": 10.949160447761194, "grad_norm": 0.44998644448099745, "learning_rate": 2.583052089625035e-05, "loss": 0.1478, "step": 23475 }, { "epoch": 10.951492537313433, "grad_norm": 0.463591731982233, "learning_rate": 2.5821868847364534e-05, "loss": 0.1493, "step": 23480 }, { "epoch": 10.953824626865671, "grad_norm": 0.4535975222353094, "learning_rate": 2.5813217048000753e-05, "loss": 0.1449, "step": 23485 }, { "epoch": 10.95615671641791, "grad_norm": 0.46983616722468036, "learning_rate": 2.5804565499445437e-05, "loss": 0.1511, "step": 23490 }, { "epoch": 10.958488805970148, "grad_norm": 0.45887403711563296, "learning_rate": 2.5795914202984978e-05, "loss": 0.1429, "step": 23495 }, { "epoch": 10.960820895522389, "grad_norm": 0.4528439428855959, "learning_rate": 2.578726315990576e-05, "loss": 0.1536, "step": 23500 }, { "epoch": 10.963152985074627, "grad_norm": 0.4645119847137578, "learning_rate": 2.577861237149409e-05, "loss": 0.1517, "step": 23505 }, { "epoch": 10.965485074626866, "grad_norm": 0.467943635963322, "learning_rate": 2.5769961839036277e-05, "loss": 0.1528, "step": 23510 }, { "epoch": 10.967817164179104, "grad_norm": 0.43988655102555113, "learning_rate": 2.5761311563818564e-05, "loss": 0.1504, "step": 23515 }, { "epoch": 10.970149253731343, "grad_norm": 0.4540228629351341, "learning_rate": 2.575266154712715e-05, "loss": 0.1498, "step": 23520 }, { "epoch": 10.972481343283581, "grad_norm": 0.4515835264224302, "learning_rate": 2.574401179024823e-05, "loss": 0.1516, "step": 23525 }, { "epoch": 10.974813432835822, "grad_norm": 0.45716802687411895, "learning_rate": 2.5735362294467928e-05, "loss": 0.1511, "step": 23530 }, { "epoch": 10.97714552238806, "grad_norm": 0.47398003492594604, "learning_rate": 2.5726713061072354e-05, "loss": 0.1529, "step": 23535 }, { "epoch": 10.979477611940299, "grad_norm": 0.46373937632685586, "learning_rate": 2.571806409134756e-05, "loss": 0.1552, "step": 23540 }, { "epoch": 10.981809701492537, "grad_norm": 0.44243738223960116, "learning_rate": 2.5709415386579548e-05, "loss": 0.1532, "step": 23545 }, { "epoch": 10.984141791044776, "grad_norm": 0.45535566943405953, "learning_rate": 2.570076694805432e-05, "loss": 0.1484, "step": 23550 }, { "epoch": 10.986473880597014, "grad_norm": 0.4502003386084863, "learning_rate": 2.5692118777057804e-05, "loss": 0.1468, "step": 23555 }, { "epoch": 10.988805970149254, "grad_norm": 0.4540468835204666, "learning_rate": 2.5683470874875913e-05, "loss": 0.1476, "step": 23560 }, { "epoch": 10.991138059701493, "grad_norm": 0.4671949231662656, "learning_rate": 2.56748232427945e-05, "loss": 0.1451, "step": 23565 }, { "epoch": 10.993470149253731, "grad_norm": 0.4575674005443204, "learning_rate": 2.566617588209937e-05, "loss": 0.1454, "step": 23570 }, { "epoch": 10.99580223880597, "grad_norm": 0.4855570596591255, "learning_rate": 2.5657528794076325e-05, "loss": 0.1502, "step": 23575 }, { "epoch": 10.998134328358208, "grad_norm": 0.45282127646286463, "learning_rate": 2.564888198001109e-05, "loss": 0.1504, "step": 23580 }, { "epoch": 11.000466417910447, "grad_norm": 0.3963358656207722, "learning_rate": 2.5640235441189374e-05, "loss": 0.1381, "step": 23585 }, { "epoch": 11.002798507462687, "grad_norm": 0.4693022664980193, "learning_rate": 2.563158917889683e-05, "loss": 0.0981, "step": 23590 }, { "epoch": 11.005130597014926, "grad_norm": 0.45389178847113193, "learning_rate": 2.562294319441907e-05, "loss": 0.0973, "step": 23595 }, { "epoch": 11.007462686567164, "grad_norm": 0.4502785459300183, "learning_rate": 2.5614297489041673e-05, "loss": 0.1011, "step": 23600 }, { "epoch": 11.009794776119403, "grad_norm": 0.4600830656014947, "learning_rate": 2.5605652064050168e-05, "loss": 0.0975, "step": 23605 }, { "epoch": 11.012126865671641, "grad_norm": 0.43492913736664096, "learning_rate": 2.559700692073006e-05, "loss": 0.0974, "step": 23610 }, { "epoch": 11.01445895522388, "grad_norm": 0.413109169874298, "learning_rate": 2.5588362060366788e-05, "loss": 0.1003, "step": 23615 }, { "epoch": 11.01679104477612, "grad_norm": 0.4369223741435439, "learning_rate": 2.5579717484245756e-05, "loss": 0.0947, "step": 23620 }, { "epoch": 11.019123134328359, "grad_norm": 0.44088315293686137, "learning_rate": 2.5571073193652344e-05, "loss": 0.096, "step": 23625 }, { "epoch": 11.021455223880597, "grad_norm": 0.4218739788340321, "learning_rate": 2.556242918987185e-05, "loss": 0.0922, "step": 23630 }, { "epoch": 11.023787313432836, "grad_norm": 0.4195073643226075, "learning_rate": 2.555378547418959e-05, "loss": 0.0934, "step": 23635 }, { "epoch": 11.026119402985074, "grad_norm": 0.42676441854974684, "learning_rate": 2.554514204789078e-05, "loss": 0.0959, "step": 23640 }, { "epoch": 11.028451492537313, "grad_norm": 0.4195414754299518, "learning_rate": 2.553649891226061e-05, "loss": 0.0943, "step": 23645 }, { "epoch": 11.030783582089553, "grad_norm": 0.41355089855288246, "learning_rate": 2.5527856068584244e-05, "loss": 0.0913, "step": 23650 }, { "epoch": 11.033115671641792, "grad_norm": 0.427082656692269, "learning_rate": 2.5519213518146778e-05, "loss": 0.0943, "step": 23655 }, { "epoch": 11.03544776119403, "grad_norm": 0.44368513194629544, "learning_rate": 2.551057126223329e-05, "loss": 0.0979, "step": 23660 }, { "epoch": 11.037779850746269, "grad_norm": 0.4071136226905678, "learning_rate": 2.5501929302128795e-05, "loss": 0.0949, "step": 23665 }, { "epoch": 11.040111940298507, "grad_norm": 0.42635058236051604, "learning_rate": 2.5493287639118265e-05, "loss": 0.0921, "step": 23670 }, { "epoch": 11.042444029850746, "grad_norm": 0.42390451585177075, "learning_rate": 2.548464627448664e-05, "loss": 0.0932, "step": 23675 }, { "epoch": 11.044776119402986, "grad_norm": 0.4093927327850756, "learning_rate": 2.54760052095188e-05, "loss": 0.093, "step": 23680 }, { "epoch": 11.047108208955224, "grad_norm": 0.421424872361411, "learning_rate": 2.54673644454996e-05, "loss": 0.0954, "step": 23685 }, { "epoch": 11.049440298507463, "grad_norm": 0.40758725251307704, "learning_rate": 2.545872398371383e-05, "loss": 0.0928, "step": 23690 }, { "epoch": 11.051772388059701, "grad_norm": 0.41627018923309395, "learning_rate": 2.5450083825446237e-05, "loss": 0.0957, "step": 23695 }, { "epoch": 11.05410447761194, "grad_norm": 0.42535897860639393, "learning_rate": 2.544144397198155e-05, "loss": 0.0972, "step": 23700 }, { "epoch": 11.056436567164178, "grad_norm": 0.4372761361191069, "learning_rate": 2.5432804424604412e-05, "loss": 0.093, "step": 23705 }, { "epoch": 11.058768656716419, "grad_norm": 0.41932787764177815, "learning_rate": 2.5424165184599457e-05, "loss": 0.0948, "step": 23710 }, { "epoch": 11.061100746268657, "grad_norm": 0.43390727362637627, "learning_rate": 2.541552625325125e-05, "loss": 0.0959, "step": 23715 }, { "epoch": 11.063432835820896, "grad_norm": 0.4207937681924418, "learning_rate": 2.5406887631844312e-05, "loss": 0.096, "step": 23720 }, { "epoch": 11.065764925373134, "grad_norm": 0.4345698687972853, "learning_rate": 2.539824932166313e-05, "loss": 0.0938, "step": 23725 }, { "epoch": 11.068097014925373, "grad_norm": 0.42194521360768306, "learning_rate": 2.5389611323992134e-05, "loss": 0.0918, "step": 23730 }, { "epoch": 11.070429104477611, "grad_norm": 0.42547686136742324, "learning_rate": 2.538097364011572e-05, "loss": 0.0928, "step": 23735 }, { "epoch": 11.072761194029852, "grad_norm": 0.4185387403232781, "learning_rate": 2.5372336271318225e-05, "loss": 0.0944, "step": 23740 }, { "epoch": 11.07509328358209, "grad_norm": 0.4454407177038836, "learning_rate": 2.5363699218883928e-05, "loss": 0.0943, "step": 23745 }, { "epoch": 11.077425373134329, "grad_norm": 0.4329913200968591, "learning_rate": 2.5355062484097103e-05, "loss": 0.0933, "step": 23750 }, { "epoch": 11.079757462686567, "grad_norm": 0.4354922921958569, "learning_rate": 2.534642606824193e-05, "loss": 0.0952, "step": 23755 }, { "epoch": 11.082089552238806, "grad_norm": 0.4223928111517238, "learning_rate": 2.5337789972602566e-05, "loss": 0.0963, "step": 23760 }, { "epoch": 11.084421641791044, "grad_norm": 0.44259942527305246, "learning_rate": 2.5329154198463122e-05, "loss": 0.0967, "step": 23765 }, { "epoch": 11.086753731343284, "grad_norm": 0.4251975230946546, "learning_rate": 2.5320518747107646e-05, "loss": 0.0936, "step": 23770 }, { "epoch": 11.089085820895523, "grad_norm": 0.44890416083406753, "learning_rate": 2.5311883619820148e-05, "loss": 0.0954, "step": 23775 }, { "epoch": 11.091417910447761, "grad_norm": 0.4017846537856625, "learning_rate": 2.530324881788459e-05, "loss": 0.0959, "step": 23780 }, { "epoch": 11.09375, "grad_norm": 0.42823888163879276, "learning_rate": 2.529461434258489e-05, "loss": 0.0924, "step": 23785 }, { "epoch": 11.096082089552239, "grad_norm": 0.43117253854528215, "learning_rate": 2.5285980195204906e-05, "loss": 0.0938, "step": 23790 }, { "epoch": 11.098414179104477, "grad_norm": 0.44476731082625365, "learning_rate": 2.5277346377028448e-05, "loss": 0.0975, "step": 23795 }, { "epoch": 11.100746268656716, "grad_norm": 0.41612915208509765, "learning_rate": 2.5268712889339296e-05, "loss": 0.0956, "step": 23800 }, { "epoch": 11.103078358208956, "grad_norm": 0.42512280636777194, "learning_rate": 2.5260079733421155e-05, "loss": 0.0974, "step": 23805 }, { "epoch": 11.105410447761194, "grad_norm": 0.4316016936410093, "learning_rate": 2.5251446910557704e-05, "loss": 0.0978, "step": 23810 }, { "epoch": 11.107742537313433, "grad_norm": 0.4215386961583547, "learning_rate": 2.524281442203254e-05, "loss": 0.096, "step": 23815 }, { "epoch": 11.110074626865671, "grad_norm": 0.4363269088627876, "learning_rate": 2.5234182269129253e-05, "loss": 0.0986, "step": 23820 }, { "epoch": 11.11240671641791, "grad_norm": 0.43139285539099237, "learning_rate": 2.522555045313136e-05, "loss": 0.096, "step": 23825 }, { "epoch": 11.114738805970148, "grad_norm": 0.46985674096064495, "learning_rate": 2.5216918975322303e-05, "loss": 0.0955, "step": 23830 }, { "epoch": 11.117070895522389, "grad_norm": 0.45028661516131757, "learning_rate": 2.520828783698554e-05, "loss": 0.0969, "step": 23835 }, { "epoch": 11.119402985074627, "grad_norm": 0.413924097273748, "learning_rate": 2.519965703940441e-05, "loss": 0.0961, "step": 23840 }, { "epoch": 11.121735074626866, "grad_norm": 0.4386198291104718, "learning_rate": 2.5191026583862237e-05, "loss": 0.0989, "step": 23845 }, { "epoch": 11.124067164179104, "grad_norm": 0.4338154933233585, "learning_rate": 2.5182396471642287e-05, "loss": 0.0976, "step": 23850 }, { "epoch": 11.126399253731343, "grad_norm": 0.44374445251178807, "learning_rate": 2.5173766704027775e-05, "loss": 0.0978, "step": 23855 }, { "epoch": 11.128731343283581, "grad_norm": 0.4380585465739345, "learning_rate": 2.5165137282301877e-05, "loss": 0.0967, "step": 23860 }, { "epoch": 11.131063432835822, "grad_norm": 0.4202841347936221, "learning_rate": 2.5156508207747686e-05, "loss": 0.0959, "step": 23865 }, { "epoch": 11.13339552238806, "grad_norm": 0.44634254522658307, "learning_rate": 2.5147879481648266e-05, "loss": 0.099, "step": 23870 }, { "epoch": 11.135727611940299, "grad_norm": 0.4377884886759619, "learning_rate": 2.5139251105286644e-05, "loss": 0.1007, "step": 23875 }, { "epoch": 11.138059701492537, "grad_norm": 0.46033861799748504, "learning_rate": 2.5130623079945754e-05, "loss": 0.1006, "step": 23880 }, { "epoch": 11.140391791044776, "grad_norm": 0.41158869360358036, "learning_rate": 2.5121995406908516e-05, "loss": 0.0959, "step": 23885 }, { "epoch": 11.142723880597014, "grad_norm": 0.4303955402547019, "learning_rate": 2.511336808745778e-05, "loss": 0.0977, "step": 23890 }, { "epoch": 11.145055970149254, "grad_norm": 0.4462532531440917, "learning_rate": 2.510474112287633e-05, "loss": 0.099, "step": 23895 }, { "epoch": 11.147388059701493, "grad_norm": 0.4505358398795935, "learning_rate": 2.5096114514446934e-05, "loss": 0.1001, "step": 23900 }, { "epoch": 11.149720149253731, "grad_norm": 0.4383331490542453, "learning_rate": 2.508748826345227e-05, "loss": 0.0984, "step": 23905 }, { "epoch": 11.15205223880597, "grad_norm": 0.4715248409395717, "learning_rate": 2.5078862371175e-05, "loss": 0.097, "step": 23910 }, { "epoch": 11.154384328358208, "grad_norm": 0.46772903286200146, "learning_rate": 2.507023683889769e-05, "loss": 0.0971, "step": 23915 }, { "epoch": 11.156716417910447, "grad_norm": 0.41510566746134503, "learning_rate": 2.5061611667902878e-05, "loss": 0.095, "step": 23920 }, { "epoch": 11.159048507462687, "grad_norm": 0.45240707673886726, "learning_rate": 2.505298685947306e-05, "loss": 0.0985, "step": 23925 }, { "epoch": 11.161380597014926, "grad_norm": 0.4410808258242447, "learning_rate": 2.504436241489064e-05, "loss": 0.096, "step": 23930 }, { "epoch": 11.163712686567164, "grad_norm": 0.44318300728969806, "learning_rate": 2.503573833543801e-05, "loss": 0.0994, "step": 23935 }, { "epoch": 11.166044776119403, "grad_norm": 0.4652553563843303, "learning_rate": 2.5027114622397473e-05, "loss": 0.0981, "step": 23940 }, { "epoch": 11.168376865671641, "grad_norm": 0.4435657889122711, "learning_rate": 2.5018491277051297e-05, "loss": 0.1002, "step": 23945 }, { "epoch": 11.17070895522388, "grad_norm": 0.41388607767703023, "learning_rate": 2.50098683006817e-05, "loss": 0.1001, "step": 23950 }, { "epoch": 11.17304104477612, "grad_norm": 0.45316211020106417, "learning_rate": 2.500124569457082e-05, "loss": 0.0973, "step": 23955 }, { "epoch": 11.175373134328359, "grad_norm": 0.42990700247532376, "learning_rate": 2.4992623460000763e-05, "loss": 0.1002, "step": 23960 }, { "epoch": 11.177705223880597, "grad_norm": 0.4484188113591902, "learning_rate": 2.4984001598253586e-05, "loss": 0.0997, "step": 23965 }, { "epoch": 11.180037313432836, "grad_norm": 0.4390626883582674, "learning_rate": 2.497538011061125e-05, "loss": 0.1, "step": 23970 }, { "epoch": 11.182369402985074, "grad_norm": 0.4442430788036036, "learning_rate": 2.4966758998355717e-05, "loss": 0.0959, "step": 23975 }, { "epoch": 11.184701492537313, "grad_norm": 0.44240738962323634, "learning_rate": 2.495813826276884e-05, "loss": 0.0989, "step": 23980 }, { "epoch": 11.187033582089553, "grad_norm": 0.4781757624592065, "learning_rate": 2.4949517905132454e-05, "loss": 0.1011, "step": 23985 }, { "epoch": 11.189365671641792, "grad_norm": 0.42507726968174736, "learning_rate": 2.4940897926728314e-05, "loss": 0.0993, "step": 23990 }, { "epoch": 11.19169776119403, "grad_norm": 0.46670003360105283, "learning_rate": 2.4932278328838138e-05, "loss": 0.1006, "step": 23995 }, { "epoch": 11.194029850746269, "grad_norm": 0.4428592587673833, "learning_rate": 2.4923659112743576e-05, "loss": 0.0954, "step": 24000 }, { "epoch": 11.196361940298507, "grad_norm": 0.4336608544045601, "learning_rate": 2.4915040279726204e-05, "loss": 0.0989, "step": 24005 }, { "epoch": 11.198694029850746, "grad_norm": 0.4332120881984919, "learning_rate": 2.490642183106759e-05, "loss": 0.1033, "step": 24010 }, { "epoch": 11.201026119402986, "grad_norm": 0.4354768224501455, "learning_rate": 2.48978037680492e-05, "loss": 0.0972, "step": 24015 }, { "epoch": 11.203358208955224, "grad_norm": 0.4304779055918563, "learning_rate": 2.4889186091952444e-05, "loss": 0.0992, "step": 24020 }, { "epoch": 11.205690298507463, "grad_norm": 0.45404746169122195, "learning_rate": 2.4880568804058713e-05, "loss": 0.0997, "step": 24025 }, { "epoch": 11.208022388059701, "grad_norm": 0.44257917044246287, "learning_rate": 2.48719519056493e-05, "loss": 0.1013, "step": 24030 }, { "epoch": 11.21035447761194, "grad_norm": 0.44347616997586076, "learning_rate": 2.486333539800546e-05, "loss": 0.099, "step": 24035 }, { "epoch": 11.212686567164178, "grad_norm": 0.4482277683016142, "learning_rate": 2.485471928240839e-05, "loss": 0.0989, "step": 24040 }, { "epoch": 11.215018656716419, "grad_norm": 0.44713255039939087, "learning_rate": 2.4846103560139206e-05, "loss": 0.1002, "step": 24045 }, { "epoch": 11.217350746268657, "grad_norm": 0.45208285687084043, "learning_rate": 2.4837488232479005e-05, "loss": 0.0973, "step": 24050 }, { "epoch": 11.219682835820896, "grad_norm": 0.4508632524714966, "learning_rate": 2.4828873300708783e-05, "loss": 0.098, "step": 24055 }, { "epoch": 11.222014925373134, "grad_norm": 0.4585383448511146, "learning_rate": 2.4820258766109515e-05, "loss": 0.1041, "step": 24060 }, { "epoch": 11.224347014925373, "grad_norm": 0.46364088409206383, "learning_rate": 2.48116446299621e-05, "loss": 0.1038, "step": 24065 }, { "epoch": 11.226679104477611, "grad_norm": 0.4483549253300209, "learning_rate": 2.4803030893547357e-05, "loss": 0.1025, "step": 24070 }, { "epoch": 11.229011194029852, "grad_norm": 0.4643021396266438, "learning_rate": 2.4794417558146093e-05, "loss": 0.0968, "step": 24075 }, { "epoch": 11.23134328358209, "grad_norm": 0.45023466217072944, "learning_rate": 2.4785804625039005e-05, "loss": 0.1029, "step": 24080 }, { "epoch": 11.233675373134329, "grad_norm": 0.4466340445252862, "learning_rate": 2.4777192095506775e-05, "loss": 0.1018, "step": 24085 }, { "epoch": 11.236007462686567, "grad_norm": 0.4539116630424583, "learning_rate": 2.4768579970829985e-05, "loss": 0.0972, "step": 24090 }, { "epoch": 11.238339552238806, "grad_norm": 0.45387816015717486, "learning_rate": 2.4759968252289183e-05, "loss": 0.0995, "step": 24095 }, { "epoch": 11.240671641791044, "grad_norm": 0.43247718362983456, "learning_rate": 2.4751356941164855e-05, "loss": 0.1021, "step": 24100 }, { "epoch": 11.243003731343283, "grad_norm": 0.4577724102445459, "learning_rate": 2.4742746038737404e-05, "loss": 0.103, "step": 24105 }, { "epoch": 11.245335820895523, "grad_norm": 0.47545320320766027, "learning_rate": 2.4734135546287208e-05, "loss": 0.099, "step": 24110 }, { "epoch": 11.247667910447761, "grad_norm": 0.4508395251011292, "learning_rate": 2.4725525465094557e-05, "loss": 0.1011, "step": 24115 }, { "epoch": 11.25, "grad_norm": 0.4531458273170598, "learning_rate": 2.4716915796439678e-05, "loss": 0.098, "step": 24120 }, { "epoch": 11.252332089552239, "grad_norm": 0.45937445516390524, "learning_rate": 2.4708306541602765e-05, "loss": 0.1016, "step": 24125 }, { "epoch": 11.254664179104477, "grad_norm": 0.45481544932597684, "learning_rate": 2.4699697701863916e-05, "loss": 0.1013, "step": 24130 }, { "epoch": 11.256996268656717, "grad_norm": 0.45998125930880185, "learning_rate": 2.4691089278503188e-05, "loss": 0.1042, "step": 24135 }, { "epoch": 11.259328358208956, "grad_norm": 0.46046085760784017, "learning_rate": 2.4682481272800572e-05, "loss": 0.1025, "step": 24140 }, { "epoch": 11.261660447761194, "grad_norm": 0.4711433531022589, "learning_rate": 2.4673873686035996e-05, "loss": 0.1023, "step": 24145 }, { "epoch": 11.263992537313433, "grad_norm": 0.42070326249554346, "learning_rate": 2.4665266519489328e-05, "loss": 0.0975, "step": 24150 }, { "epoch": 11.266324626865671, "grad_norm": 0.4460137127829196, "learning_rate": 2.4656659774440365e-05, "loss": 0.1015, "step": 24155 }, { "epoch": 11.26865671641791, "grad_norm": 0.4624074958600878, "learning_rate": 2.4648053452168857e-05, "loss": 0.1021, "step": 24160 }, { "epoch": 11.270988805970148, "grad_norm": 0.45903566819588965, "learning_rate": 2.4639447553954464e-05, "loss": 0.1018, "step": 24165 }, { "epoch": 11.273320895522389, "grad_norm": 0.4474610224258538, "learning_rate": 2.463084208107682e-05, "loss": 0.1041, "step": 24170 }, { "epoch": 11.275652985074627, "grad_norm": 0.44803259686823305, "learning_rate": 2.462223703481547e-05, "loss": 0.1013, "step": 24175 }, { "epoch": 11.277985074626866, "grad_norm": 0.4257792088866472, "learning_rate": 2.4613632416449893e-05, "loss": 0.1012, "step": 24180 }, { "epoch": 11.280317164179104, "grad_norm": 0.4547873711506659, "learning_rate": 2.4605028227259535e-05, "loss": 0.1052, "step": 24185 }, { "epoch": 11.282649253731343, "grad_norm": 0.4553064581985679, "learning_rate": 2.4596424468523728e-05, "loss": 0.1022, "step": 24190 }, { "epoch": 11.284981343283581, "grad_norm": 0.4600623060053633, "learning_rate": 2.458782114152179e-05, "loss": 0.1022, "step": 24195 }, { "epoch": 11.287313432835822, "grad_norm": 0.4545248427631926, "learning_rate": 2.4579218247532947e-05, "loss": 0.103, "step": 24200 }, { "epoch": 11.28964552238806, "grad_norm": 0.45994112190764125, "learning_rate": 2.4570615787836358e-05, "loss": 0.1006, "step": 24205 }, { "epoch": 11.291977611940299, "grad_norm": 0.4598466468060709, "learning_rate": 2.4562013763711145e-05, "loss": 0.1027, "step": 24210 }, { "epoch": 11.294309701492537, "grad_norm": 0.4544097333860385, "learning_rate": 2.4553412176436323e-05, "loss": 0.1022, "step": 24215 }, { "epoch": 11.296641791044776, "grad_norm": 0.44974379545817367, "learning_rate": 2.4544811027290893e-05, "loss": 0.1065, "step": 24220 }, { "epoch": 11.298973880597014, "grad_norm": 0.4436862971982961, "learning_rate": 2.4536210317553744e-05, "loss": 0.1044, "step": 24225 }, { "epoch": 11.301305970149254, "grad_norm": 0.4661329467144973, "learning_rate": 2.452761004850371e-05, "loss": 0.1027, "step": 24230 }, { "epoch": 11.303638059701493, "grad_norm": 0.4459830329331304, "learning_rate": 2.4519010221419604e-05, "loss": 0.1042, "step": 24235 }, { "epoch": 11.305970149253731, "grad_norm": 0.4513423945672678, "learning_rate": 2.4510410837580106e-05, "loss": 0.1003, "step": 24240 }, { "epoch": 11.30830223880597, "grad_norm": 0.44856285756707276, "learning_rate": 2.4501811898263878e-05, "loss": 0.0982, "step": 24245 }, { "epoch": 11.310634328358208, "grad_norm": 0.43478838212135645, "learning_rate": 2.4493213404749493e-05, "loss": 0.1041, "step": 24250 }, { "epoch": 11.312966417910447, "grad_norm": 0.4629657663816365, "learning_rate": 2.4484615358315456e-05, "loss": 0.1037, "step": 24255 }, { "epoch": 11.315298507462687, "grad_norm": 0.43734715942373226, "learning_rate": 2.447601776024024e-05, "loss": 0.1011, "step": 24260 }, { "epoch": 11.317630597014926, "grad_norm": 0.45530284074264216, "learning_rate": 2.4467420611802194e-05, "loss": 0.101, "step": 24265 }, { "epoch": 11.319962686567164, "grad_norm": 0.43652109484480045, "learning_rate": 2.4458823914279662e-05, "loss": 0.1017, "step": 24270 }, { "epoch": 11.322294776119403, "grad_norm": 0.4605664577145724, "learning_rate": 2.445022766895087e-05, "loss": 0.0993, "step": 24275 }, { "epoch": 11.324626865671641, "grad_norm": 0.453996657938861, "learning_rate": 2.4441631877093995e-05, "loss": 0.1014, "step": 24280 }, { "epoch": 11.32695895522388, "grad_norm": 0.4723333508217484, "learning_rate": 2.443303653998717e-05, "loss": 0.1045, "step": 24285 }, { "epoch": 11.32929104477612, "grad_norm": 0.4597114913677603, "learning_rate": 2.442444165890842e-05, "loss": 0.103, "step": 24290 }, { "epoch": 11.331623134328359, "grad_norm": 0.4525776654554929, "learning_rate": 2.441584723513573e-05, "loss": 0.103, "step": 24295 }, { "epoch": 11.333955223880597, "grad_norm": 0.4527228687413055, "learning_rate": 2.4407253269947006e-05, "loss": 0.1048, "step": 24300 }, { "epoch": 11.336287313432836, "grad_norm": 0.4438476152764902, "learning_rate": 2.4398659764620086e-05, "loss": 0.1045, "step": 24305 }, { "epoch": 11.338619402985074, "grad_norm": 0.4721257406696885, "learning_rate": 2.4390066720432746e-05, "loss": 0.1047, "step": 24310 }, { "epoch": 11.340951492537313, "grad_norm": 0.47934557914069054, "learning_rate": 2.438147413866269e-05, "loss": 0.1025, "step": 24315 }, { "epoch": 11.343283582089553, "grad_norm": 0.4570762830332563, "learning_rate": 2.437288202058755e-05, "loss": 0.1048, "step": 24320 }, { "epoch": 11.345615671641792, "grad_norm": 0.4423208807834774, "learning_rate": 2.4364290367484888e-05, "loss": 0.1034, "step": 24325 }, { "epoch": 11.34794776119403, "grad_norm": 0.4553322133460473, "learning_rate": 2.4355699180632207e-05, "loss": 0.1028, "step": 24330 }, { "epoch": 11.350279850746269, "grad_norm": 0.46251261533944943, "learning_rate": 2.4347108461306935e-05, "loss": 0.1043, "step": 24335 }, { "epoch": 11.352611940298507, "grad_norm": 0.4571595827705474, "learning_rate": 2.4338518210786416e-05, "loss": 0.102, "step": 24340 }, { "epoch": 11.354944029850746, "grad_norm": 0.44776059299078674, "learning_rate": 2.432992843034796e-05, "loss": 0.1019, "step": 24345 }, { "epoch": 11.357276119402986, "grad_norm": 0.4420681301322377, "learning_rate": 2.4321339121268766e-05, "loss": 0.1032, "step": 24350 }, { "epoch": 11.359608208955224, "grad_norm": 0.45008930780540907, "learning_rate": 2.431275028482599e-05, "loss": 0.1048, "step": 24355 }, { "epoch": 11.361940298507463, "grad_norm": 0.46238957083321114, "learning_rate": 2.430416192229672e-05, "loss": 0.103, "step": 24360 }, { "epoch": 11.364272388059701, "grad_norm": 0.4634388976307239, "learning_rate": 2.4295574034957935e-05, "loss": 0.1049, "step": 24365 }, { "epoch": 11.36660447761194, "grad_norm": 0.4610289898706834, "learning_rate": 2.42869866240866e-05, "loss": 0.1038, "step": 24370 }, { "epoch": 11.368936567164178, "grad_norm": 0.45631802108159036, "learning_rate": 2.427839969095957e-05, "loss": 0.1056, "step": 24375 }, { "epoch": 11.371268656716419, "grad_norm": 0.47034407119329885, "learning_rate": 2.4269813236853632e-05, "loss": 0.1019, "step": 24380 }, { "epoch": 11.373600746268657, "grad_norm": 0.45864940525172304, "learning_rate": 2.4261227263045527e-05, "loss": 0.1096, "step": 24385 }, { "epoch": 11.375932835820896, "grad_norm": 0.46774964940973485, "learning_rate": 2.4252641770811886e-05, "loss": 0.1045, "step": 24390 }, { "epoch": 11.378264925373134, "grad_norm": 0.4507826819922234, "learning_rate": 2.4244056761429307e-05, "loss": 0.103, "step": 24395 }, { "epoch": 11.380597014925373, "grad_norm": 0.4638836154791427, "learning_rate": 2.423547223617429e-05, "loss": 0.1055, "step": 24400 }, { "epoch": 11.382929104477611, "grad_norm": 0.48193893387784675, "learning_rate": 2.4226888196323268e-05, "loss": 0.1063, "step": 24405 }, { "epoch": 11.385261194029852, "grad_norm": 0.46207919172941453, "learning_rate": 2.4218304643152617e-05, "loss": 0.1029, "step": 24410 }, { "epoch": 11.38759328358209, "grad_norm": 0.44911280336233783, "learning_rate": 2.4209721577938617e-05, "loss": 0.102, "step": 24415 }, { "epoch": 11.389925373134329, "grad_norm": 0.48044539587938606, "learning_rate": 2.42011390019575e-05, "loss": 0.1054, "step": 24420 }, { "epoch": 11.392257462686567, "grad_norm": 0.46094277653961035, "learning_rate": 2.419255691648541e-05, "loss": 0.1081, "step": 24425 }, { "epoch": 11.394589552238806, "grad_norm": 0.44335639523186027, "learning_rate": 2.4183975322798407e-05, "loss": 0.1026, "step": 24430 }, { "epoch": 11.396921641791044, "grad_norm": 0.4535236932262161, "learning_rate": 2.417539422217251e-05, "loss": 0.1037, "step": 24435 }, { "epoch": 11.399253731343283, "grad_norm": 0.4762914858176701, "learning_rate": 2.4166813615883625e-05, "loss": 0.103, "step": 24440 }, { "epoch": 11.401585820895523, "grad_norm": 0.4564392547039997, "learning_rate": 2.415823350520764e-05, "loss": 0.1036, "step": 24445 }, { "epoch": 11.403917910447761, "grad_norm": 0.4870753736221522, "learning_rate": 2.4149653891420304e-05, "loss": 0.1056, "step": 24450 }, { "epoch": 11.40625, "grad_norm": 0.44589695059028844, "learning_rate": 2.4141074775797335e-05, "loss": 0.1015, "step": 24455 }, { "epoch": 11.408582089552239, "grad_norm": 0.4623010508441326, "learning_rate": 2.4132496159614366e-05, "loss": 0.1065, "step": 24460 }, { "epoch": 11.410914179104477, "grad_norm": 0.46367575175174397, "learning_rate": 2.412391804414694e-05, "loss": 0.1052, "step": 24465 }, { "epoch": 11.413246268656717, "grad_norm": 0.4733397030419729, "learning_rate": 2.4115340430670574e-05, "loss": 0.106, "step": 24470 }, { "epoch": 11.415578358208956, "grad_norm": 0.45963802632767414, "learning_rate": 2.410676332046065e-05, "loss": 0.1085, "step": 24475 }, { "epoch": 11.417910447761194, "grad_norm": 0.45431495665979726, "learning_rate": 2.4098186714792504e-05, "loss": 0.1039, "step": 24480 }, { "epoch": 11.420242537313433, "grad_norm": 0.47067957395112686, "learning_rate": 2.408961061494141e-05, "loss": 0.1094, "step": 24485 }, { "epoch": 11.422574626865671, "grad_norm": 0.45897703681921015, "learning_rate": 2.408103502218253e-05, "loss": 0.1059, "step": 24490 }, { "epoch": 11.42490671641791, "grad_norm": 0.4504499249669589, "learning_rate": 2.407245993779099e-05, "loss": 0.105, "step": 24495 }, { "epoch": 11.427238805970148, "grad_norm": 0.46488709413911455, "learning_rate": 2.4063885363041822e-05, "loss": 0.1026, "step": 24500 }, { "epoch": 11.429570895522389, "grad_norm": 0.46171037267640946, "learning_rate": 2.405531129920997e-05, "loss": 0.1055, "step": 24505 }, { "epoch": 11.431902985074627, "grad_norm": 0.4601309300624462, "learning_rate": 2.4046737747570326e-05, "loss": 0.1054, "step": 24510 }, { "epoch": 11.434235074626866, "grad_norm": 0.44141123273634963, "learning_rate": 2.4038164709397694e-05, "loss": 0.1026, "step": 24515 }, { "epoch": 11.436567164179104, "grad_norm": 0.4623522700651772, "learning_rate": 2.4029592185966804e-05, "loss": 0.1082, "step": 24520 }, { "epoch": 11.438899253731343, "grad_norm": 0.4606515582109767, "learning_rate": 2.40210201785523e-05, "loss": 0.1028, "step": 24525 }, { "epoch": 11.441231343283581, "grad_norm": 0.48871529853296286, "learning_rate": 2.4012448688428768e-05, "loss": 0.1028, "step": 24530 }, { "epoch": 11.443563432835822, "grad_norm": 0.45156800631246097, "learning_rate": 2.4003877716870698e-05, "loss": 0.103, "step": 24535 }, { "epoch": 11.44589552238806, "grad_norm": 0.45824920172004957, "learning_rate": 2.399530726515251e-05, "loss": 0.107, "step": 24540 }, { "epoch": 11.448227611940299, "grad_norm": 0.46600129861980283, "learning_rate": 2.3986737334548564e-05, "loss": 0.1079, "step": 24545 }, { "epoch": 11.450559701492537, "grad_norm": 0.4674391781191261, "learning_rate": 2.397816792633311e-05, "loss": 0.1085, "step": 24550 }, { "epoch": 11.452891791044776, "grad_norm": 0.505117061055309, "learning_rate": 2.396959904178034e-05, "loss": 0.1078, "step": 24555 }, { "epoch": 11.455223880597014, "grad_norm": 0.46181978938665497, "learning_rate": 2.396103068216437e-05, "loss": 0.1038, "step": 24560 }, { "epoch": 11.457555970149254, "grad_norm": 0.46358973618456173, "learning_rate": 2.395246284875922e-05, "loss": 0.1033, "step": 24565 }, { "epoch": 11.459888059701493, "grad_norm": 0.4514214024912243, "learning_rate": 2.3943895542838868e-05, "loss": 0.1048, "step": 24570 }, { "epoch": 11.462220149253731, "grad_norm": 0.4496873988831441, "learning_rate": 2.3935328765677172e-05, "loss": 0.1029, "step": 24575 }, { "epoch": 11.46455223880597, "grad_norm": 0.443389092296085, "learning_rate": 2.3926762518547928e-05, "loss": 0.107, "step": 24580 }, { "epoch": 11.466884328358208, "grad_norm": 0.44726095111461883, "learning_rate": 2.3918196802724874e-05, "loss": 0.1064, "step": 24585 }, { "epoch": 11.469216417910447, "grad_norm": 0.4801091209579104, "learning_rate": 2.3909631619481626e-05, "loss": 0.1091, "step": 24590 }, { "epoch": 11.471548507462687, "grad_norm": 0.4884169644620585, "learning_rate": 2.390106697009176e-05, "loss": 0.1092, "step": 24595 }, { "epoch": 11.473880597014926, "grad_norm": 0.4636664740732752, "learning_rate": 2.3892502855828762e-05, "loss": 0.1074, "step": 24600 }, { "epoch": 11.476212686567164, "grad_norm": 0.44549216814936066, "learning_rate": 2.388393927796601e-05, "loss": 0.1042, "step": 24605 }, { "epoch": 11.478544776119403, "grad_norm": 0.4619981631469214, "learning_rate": 2.387537623777686e-05, "loss": 0.1026, "step": 24610 }, { "epoch": 11.480876865671641, "grad_norm": 0.46921438369959806, "learning_rate": 2.3866813736534517e-05, "loss": 0.1072, "step": 24615 }, { "epoch": 11.48320895522388, "grad_norm": 0.46553100210727927, "learning_rate": 2.3858251775512176e-05, "loss": 0.1057, "step": 24620 }, { "epoch": 11.48554104477612, "grad_norm": 0.46235054258663283, "learning_rate": 2.3849690355982895e-05, "loss": 0.1094, "step": 24625 }, { "epoch": 11.487873134328359, "grad_norm": 0.4429432255579689, "learning_rate": 2.384112947921968e-05, "loss": 0.1045, "step": 24630 }, { "epoch": 11.490205223880597, "grad_norm": 0.45565288064470516, "learning_rate": 2.3832569146495464e-05, "loss": 0.1081, "step": 24635 }, { "epoch": 11.492537313432836, "grad_norm": 0.4368383305069309, "learning_rate": 2.3824009359083073e-05, "loss": 0.1048, "step": 24640 }, { "epoch": 11.494869402985074, "grad_norm": 0.4553331297232902, "learning_rate": 2.3815450118255272e-05, "loss": 0.1109, "step": 24645 }, { "epoch": 11.497201492537313, "grad_norm": 0.44731184434897975, "learning_rate": 2.380689142528474e-05, "loss": 0.1045, "step": 24650 }, { "epoch": 11.499533582089553, "grad_norm": 0.4369405597218285, "learning_rate": 2.3798333281444057e-05, "loss": 0.1072, "step": 24655 }, { "epoch": 11.501865671641792, "grad_norm": 0.45520898965650064, "learning_rate": 2.378977568800576e-05, "loss": 0.1056, "step": 24660 }, { "epoch": 11.50419776119403, "grad_norm": 0.46061054970164317, "learning_rate": 2.3781218646242264e-05, "loss": 0.102, "step": 24665 }, { "epoch": 11.506529850746269, "grad_norm": 0.4545719892432966, "learning_rate": 2.3772662157425925e-05, "loss": 0.106, "step": 24670 }, { "epoch": 11.508861940298507, "grad_norm": 0.4507781321988189, "learning_rate": 2.376410622282902e-05, "loss": 0.1073, "step": 24675 }, { "epoch": 11.511194029850746, "grad_norm": 0.4735611971471673, "learning_rate": 2.375555084372371e-05, "loss": 0.1099, "step": 24680 }, { "epoch": 11.513526119402986, "grad_norm": 0.455021240981216, "learning_rate": 2.3746996021382124e-05, "loss": 0.1062, "step": 24685 }, { "epoch": 11.515858208955224, "grad_norm": 0.44101237306581115, "learning_rate": 2.3738441757076268e-05, "loss": 0.1053, "step": 24690 }, { "epoch": 11.518190298507463, "grad_norm": 0.4669809209072245, "learning_rate": 2.3729888052078088e-05, "loss": 0.1106, "step": 24695 }, { "epoch": 11.520522388059701, "grad_norm": 0.4379598992833313, "learning_rate": 2.3721334907659424e-05, "loss": 0.1111, "step": 24700 }, { "epoch": 11.52285447761194, "grad_norm": 0.45697928737274673, "learning_rate": 2.371278232509206e-05, "loss": 0.1084, "step": 24705 }, { "epoch": 11.525186567164178, "grad_norm": 0.4602288412599557, "learning_rate": 2.370423030564768e-05, "loss": 0.1087, "step": 24710 }, { "epoch": 11.527518656716419, "grad_norm": 0.46972142138349554, "learning_rate": 2.3695678850597876e-05, "loss": 0.1091, "step": 24715 }, { "epoch": 11.529850746268657, "grad_norm": 0.45572246112805026, "learning_rate": 2.368712796121419e-05, "loss": 0.1067, "step": 24720 }, { "epoch": 11.532182835820896, "grad_norm": 0.4733344360004621, "learning_rate": 2.3678577638768035e-05, "loss": 0.1099, "step": 24725 }, { "epoch": 11.534514925373134, "grad_norm": 0.47525895987208255, "learning_rate": 2.367002788453077e-05, "loss": 0.1096, "step": 24730 }, { "epoch": 11.536847014925373, "grad_norm": 0.4539343818205457, "learning_rate": 2.366147869977367e-05, "loss": 0.107, "step": 24735 }, { "epoch": 11.539179104477611, "grad_norm": 0.47847414979608177, "learning_rate": 2.3652930085767904e-05, "loss": 0.1123, "step": 24740 }, { "epoch": 11.541511194029852, "grad_norm": 0.4603388473686356, "learning_rate": 2.364438204378458e-05, "loss": 0.1068, "step": 24745 }, { "epoch": 11.54384328358209, "grad_norm": 0.4684699306111713, "learning_rate": 2.3635834575094705e-05, "loss": 0.1081, "step": 24750 }, { "epoch": 11.546175373134329, "grad_norm": 0.46924533300628496, "learning_rate": 2.3627287680969195e-05, "loss": 0.1064, "step": 24755 }, { "epoch": 11.548507462686567, "grad_norm": 0.4606249009491358, "learning_rate": 2.3618741362678915e-05, "loss": 0.1075, "step": 24760 }, { "epoch": 11.550839552238806, "grad_norm": 0.4463022495658314, "learning_rate": 2.3610195621494595e-05, "loss": 0.1086, "step": 24765 }, { "epoch": 11.553171641791044, "grad_norm": 0.4809245815974111, "learning_rate": 2.360165045868693e-05, "loss": 0.1092, "step": 24770 }, { "epoch": 11.555503731343283, "grad_norm": 0.4751334872066319, "learning_rate": 2.3593105875526488e-05, "loss": 0.1089, "step": 24775 }, { "epoch": 11.557835820895523, "grad_norm": 0.49258849781796826, "learning_rate": 2.358456187328376e-05, "loss": 0.1074, "step": 24780 }, { "epoch": 11.560167910447761, "grad_norm": 0.4692669630775536, "learning_rate": 2.3576018453229182e-05, "loss": 0.1109, "step": 24785 }, { "epoch": 11.5625, "grad_norm": 0.4923454609537806, "learning_rate": 2.3567475616633046e-05, "loss": 0.1081, "step": 24790 }, { "epoch": 11.564832089552239, "grad_norm": 0.4509463954681857, "learning_rate": 2.3558933364765617e-05, "loss": 0.1076, "step": 24795 }, { "epoch": 11.567164179104477, "grad_norm": 0.4659898205596048, "learning_rate": 2.355039169889704e-05, "loss": 0.1066, "step": 24800 }, { "epoch": 11.569496268656717, "grad_norm": 0.4798283771012256, "learning_rate": 2.3541850620297368e-05, "loss": 0.1068, "step": 24805 }, { "epoch": 11.571828358208956, "grad_norm": 0.4495346042452578, "learning_rate": 2.3533310130236592e-05, "loss": 0.1081, "step": 24810 }, { "epoch": 11.574160447761194, "grad_norm": 0.458337435701614, "learning_rate": 2.3524770229984587e-05, "loss": 0.1062, "step": 24815 }, { "epoch": 11.576492537313433, "grad_norm": 0.4441444095403032, "learning_rate": 2.3516230920811166e-05, "loss": 0.1093, "step": 24820 }, { "epoch": 11.578824626865671, "grad_norm": 0.47213048647765865, "learning_rate": 2.3507692203986042e-05, "loss": 0.1072, "step": 24825 }, { "epoch": 11.58115671641791, "grad_norm": 0.47284104531416543, "learning_rate": 2.3499154080778823e-05, "loss": 0.1088, "step": 24830 }, { "epoch": 11.583488805970148, "grad_norm": 0.46177746506845674, "learning_rate": 2.349061655245907e-05, "loss": 0.111, "step": 24835 }, { "epoch": 11.585820895522389, "grad_norm": 0.4917241272817865, "learning_rate": 2.3482079620296223e-05, "loss": 0.1107, "step": 24840 }, { "epoch": 11.588152985074627, "grad_norm": 0.47871942580973725, "learning_rate": 2.347354328555963e-05, "loss": 0.112, "step": 24845 }, { "epoch": 11.590485074626866, "grad_norm": 0.45338370099311387, "learning_rate": 2.3465007549518576e-05, "loss": 0.103, "step": 24850 }, { "epoch": 11.592817164179104, "grad_norm": 0.46440331040868094, "learning_rate": 2.3456472413442248e-05, "loss": 0.1098, "step": 24855 }, { "epoch": 11.595149253731343, "grad_norm": 0.4580846638702107, "learning_rate": 2.3447937878599725e-05, "loss": 0.1085, "step": 24860 }, { "epoch": 11.597481343283581, "grad_norm": 0.439954142105912, "learning_rate": 2.3439403946260007e-05, "loss": 0.1084, "step": 24865 }, { "epoch": 11.599813432835822, "grad_norm": 0.46716577455282304, "learning_rate": 2.343087061769203e-05, "loss": 0.1096, "step": 24870 }, { "epoch": 11.60214552238806, "grad_norm": 0.4770966169212388, "learning_rate": 2.3422337894164594e-05, "loss": 0.1102, "step": 24875 }, { "epoch": 11.604477611940299, "grad_norm": 0.45723437833597386, "learning_rate": 2.3413805776946453e-05, "loss": 0.1117, "step": 24880 }, { "epoch": 11.606809701492537, "grad_norm": 0.4588748060778336, "learning_rate": 2.3405274267306244e-05, "loss": 0.1094, "step": 24885 }, { "epoch": 11.609141791044776, "grad_norm": 0.4536691399726953, "learning_rate": 2.3396743366512508e-05, "loss": 0.1073, "step": 24890 }, { "epoch": 11.611473880597014, "grad_norm": 0.45676869214715504, "learning_rate": 2.338821307583373e-05, "loss": 0.1087, "step": 24895 }, { "epoch": 11.613805970149254, "grad_norm": 0.4626584530986704, "learning_rate": 2.337968339653826e-05, "loss": 0.1069, "step": 24900 }, { "epoch": 11.616138059701493, "grad_norm": 0.48988579585190867, "learning_rate": 2.33711543298944e-05, "loss": 0.1082, "step": 24905 }, { "epoch": 11.618470149253731, "grad_norm": 0.4430802934307402, "learning_rate": 2.3362625877170336e-05, "loss": 0.1054, "step": 24910 }, { "epoch": 11.62080223880597, "grad_norm": 0.46742362223373957, "learning_rate": 2.3354098039634154e-05, "loss": 0.107, "step": 24915 }, { "epoch": 11.623134328358208, "grad_norm": 0.4618198606571937, "learning_rate": 2.3345570818553874e-05, "loss": 0.11, "step": 24920 }, { "epoch": 11.625466417910447, "grad_norm": 0.45119471066743494, "learning_rate": 2.3337044215197408e-05, "loss": 0.1094, "step": 24925 }, { "epoch": 11.627798507462687, "grad_norm": 0.4621858267076391, "learning_rate": 2.3328518230832587e-05, "loss": 0.1073, "step": 24930 }, { "epoch": 11.630130597014926, "grad_norm": 0.4689700640335267, "learning_rate": 2.3319992866727136e-05, "loss": 0.1127, "step": 24935 }, { "epoch": 11.632462686567164, "grad_norm": 0.4525250820071023, "learning_rate": 2.331146812414869e-05, "loss": 0.1131, "step": 24940 }, { "epoch": 11.634794776119403, "grad_norm": 0.46956234389917767, "learning_rate": 2.330294400436481e-05, "loss": 0.1097, "step": 24945 }, { "epoch": 11.637126865671641, "grad_norm": 0.4790292219195209, "learning_rate": 2.329442050864293e-05, "loss": 0.1099, "step": 24950 }, { "epoch": 11.63945895522388, "grad_norm": 0.46103824911260566, "learning_rate": 2.3285897638250437e-05, "loss": 0.1069, "step": 24955 }, { "epoch": 11.64179104477612, "grad_norm": 0.46333022973541527, "learning_rate": 2.3277375394454594e-05, "loss": 0.1089, "step": 24960 }, { "epoch": 11.644123134328359, "grad_norm": 0.4707419032557014, "learning_rate": 2.326885377852256e-05, "loss": 0.1131, "step": 24965 }, { "epoch": 11.646455223880597, "grad_norm": 0.46923604103894473, "learning_rate": 2.326033279172144e-05, "loss": 0.1104, "step": 24970 }, { "epoch": 11.648787313432836, "grad_norm": 0.46454913694964045, "learning_rate": 2.32518124353182e-05, "loss": 0.1104, "step": 24975 }, { "epoch": 11.651119402985074, "grad_norm": 0.47240083081415163, "learning_rate": 2.324329271057976e-05, "loss": 0.1072, "step": 24980 }, { "epoch": 11.653451492537313, "grad_norm": 0.4699029258449172, "learning_rate": 2.3234773618772916e-05, "loss": 0.1124, "step": 24985 }, { "epoch": 11.655783582089553, "grad_norm": 0.4614903611489475, "learning_rate": 2.322625516116435e-05, "loss": 0.1079, "step": 24990 }, { "epoch": 11.658115671641792, "grad_norm": 0.44682212006144406, "learning_rate": 2.3217737339020706e-05, "loss": 0.1062, "step": 24995 }, { "epoch": 11.66044776119403, "grad_norm": 0.5491440913047312, "learning_rate": 2.3209220153608486e-05, "loss": 0.1123, "step": 25000 }, { "epoch": 11.662779850746269, "grad_norm": 0.45258137184008224, "learning_rate": 2.3200703606194126e-05, "loss": 0.1093, "step": 25005 }, { "epoch": 11.665111940298507, "grad_norm": 0.4593378024239075, "learning_rate": 2.3192187698043944e-05, "loss": 0.1112, "step": 25010 }, { "epoch": 11.667444029850746, "grad_norm": 0.4675038951917833, "learning_rate": 2.3183672430424163e-05, "loss": 0.1101, "step": 25015 }, { "epoch": 11.669776119402986, "grad_norm": 0.4615753682239151, "learning_rate": 2.3175157804600954e-05, "loss": 0.1078, "step": 25020 }, { "epoch": 11.672108208955224, "grad_norm": 0.4587051165953658, "learning_rate": 2.3166643821840338e-05, "loss": 0.1067, "step": 25025 }, { "epoch": 11.674440298507463, "grad_norm": 0.4779820899316151, "learning_rate": 2.3158130483408262e-05, "loss": 0.1101, "step": 25030 }, { "epoch": 11.676772388059701, "grad_norm": 0.46477555399844017, "learning_rate": 2.3149617790570587e-05, "loss": 0.1079, "step": 25035 }, { "epoch": 11.67910447761194, "grad_norm": 0.45450438949424377, "learning_rate": 2.3141105744593065e-05, "loss": 0.1095, "step": 25040 }, { "epoch": 11.681436567164178, "grad_norm": 0.45808094684571343, "learning_rate": 2.3132594346741353e-05, "loss": 0.1071, "step": 25045 }, { "epoch": 11.683768656716419, "grad_norm": 0.46176145480547803, "learning_rate": 2.3124083598281022e-05, "loss": 0.1125, "step": 25050 }, { "epoch": 11.686100746268657, "grad_norm": 0.44904844683643236, "learning_rate": 2.3115573500477534e-05, "loss": 0.1073, "step": 25055 }, { "epoch": 11.688432835820896, "grad_norm": 0.46961506622851673, "learning_rate": 2.310706405459625e-05, "loss": 0.1092, "step": 25060 }, { "epoch": 11.690764925373134, "grad_norm": 0.4481047586648315, "learning_rate": 2.3098555261902455e-05, "loss": 0.1074, "step": 25065 }, { "epoch": 11.693097014925373, "grad_norm": 0.47128049188413207, "learning_rate": 2.3090047123661324e-05, "loss": 0.1162, "step": 25070 }, { "epoch": 11.695429104477611, "grad_norm": 0.4666335188953614, "learning_rate": 2.308153964113793e-05, "loss": 0.1059, "step": 25075 }, { "epoch": 11.697761194029852, "grad_norm": 0.4720353906191408, "learning_rate": 2.3073032815597263e-05, "loss": 0.1107, "step": 25080 }, { "epoch": 11.70009328358209, "grad_norm": 0.4642803495595102, "learning_rate": 2.3064526648304195e-05, "loss": 0.1123, "step": 25085 }, { "epoch": 11.702425373134329, "grad_norm": 0.4348112786217517, "learning_rate": 2.3056021140523516e-05, "loss": 0.1068, "step": 25090 }, { "epoch": 11.704757462686567, "grad_norm": 0.4868708436583097, "learning_rate": 2.304751629351992e-05, "loss": 0.1074, "step": 25095 }, { "epoch": 11.707089552238806, "grad_norm": 0.45688737978945326, "learning_rate": 2.3039012108557982e-05, "loss": 0.1102, "step": 25100 }, { "epoch": 11.709421641791044, "grad_norm": 0.4286515318338114, "learning_rate": 2.3030508586902215e-05, "loss": 0.1115, "step": 25105 }, { "epoch": 11.711753731343283, "grad_norm": 0.47686031679978313, "learning_rate": 2.3022005729817e-05, "loss": 0.1137, "step": 25110 }, { "epoch": 11.714085820895523, "grad_norm": 0.45831536215984564, "learning_rate": 2.301350353856661e-05, "loss": 0.1075, "step": 25115 }, { "epoch": 11.716417910447761, "grad_norm": 0.4817083573492822, "learning_rate": 2.3005002014415274e-05, "loss": 0.1108, "step": 25120 }, { "epoch": 11.71875, "grad_norm": 0.46619634151241873, "learning_rate": 2.2996501158627054e-05, "loss": 0.1107, "step": 25125 }, { "epoch": 11.721082089552239, "grad_norm": 0.46154795004506843, "learning_rate": 2.2988000972465978e-05, "loss": 0.1075, "step": 25130 }, { "epoch": 11.723414179104477, "grad_norm": 0.45569395196481344, "learning_rate": 2.297950145719593e-05, "loss": 0.1087, "step": 25135 }, { "epoch": 11.725746268656717, "grad_norm": 0.44907899134221524, "learning_rate": 2.297100261408069e-05, "loss": 0.1098, "step": 25140 }, { "epoch": 11.728078358208956, "grad_norm": 0.4733472332788013, "learning_rate": 2.2962504444383974e-05, "loss": 0.1101, "step": 25145 }, { "epoch": 11.730410447761194, "grad_norm": 0.4512241965461789, "learning_rate": 2.295400694936937e-05, "loss": 0.1086, "step": 25150 }, { "epoch": 11.732742537313433, "grad_norm": 0.4547935705277222, "learning_rate": 2.2945510130300386e-05, "loss": 0.11, "step": 25155 }, { "epoch": 11.735074626865671, "grad_norm": 0.47703903372297096, "learning_rate": 2.2937013988440405e-05, "loss": 0.1128, "step": 25160 }, { "epoch": 11.73740671641791, "grad_norm": 0.46499133283869754, "learning_rate": 2.2928518525052717e-05, "loss": 0.1092, "step": 25165 }, { "epoch": 11.739738805970148, "grad_norm": 0.4813656716986557, "learning_rate": 2.2920023741400533e-05, "loss": 0.1087, "step": 25170 }, { "epoch": 11.742070895522389, "grad_norm": 0.46956550830805355, "learning_rate": 2.2911529638746927e-05, "loss": 0.1073, "step": 25175 }, { "epoch": 11.744402985074627, "grad_norm": 0.4873928242812786, "learning_rate": 2.2903036218354912e-05, "loss": 0.1111, "step": 25180 }, { "epoch": 11.746735074626866, "grad_norm": 0.4493258798095712, "learning_rate": 2.289454348148736e-05, "loss": 0.1101, "step": 25185 }, { "epoch": 11.749067164179104, "grad_norm": 0.4618271107824901, "learning_rate": 2.288605142940707e-05, "loss": 0.1095, "step": 25190 }, { "epoch": 11.751399253731343, "grad_norm": 0.48022301782885657, "learning_rate": 2.2877560063376723e-05, "loss": 0.1113, "step": 25195 }, { "epoch": 11.753731343283581, "grad_norm": 0.474140136303263, "learning_rate": 2.2869069384658908e-05, "loss": 0.1119, "step": 25200 }, { "epoch": 11.756063432835822, "grad_norm": 0.48293741387313555, "learning_rate": 2.2860579394516107e-05, "loss": 0.113, "step": 25205 }, { "epoch": 11.75839552238806, "grad_norm": 0.4646679112540921, "learning_rate": 2.2852090094210698e-05, "loss": 0.108, "step": 25210 }, { "epoch": 11.760727611940299, "grad_norm": 0.4794470260815294, "learning_rate": 2.2843601485004955e-05, "loss": 0.1115, "step": 25215 }, { "epoch": 11.763059701492537, "grad_norm": 0.46913248723860745, "learning_rate": 2.283511356816106e-05, "loss": 0.1108, "step": 25220 }, { "epoch": 11.765391791044776, "grad_norm": 0.47531561658722926, "learning_rate": 2.2826626344941083e-05, "loss": 0.1107, "step": 25225 }, { "epoch": 11.767723880597014, "grad_norm": 0.46940680015065567, "learning_rate": 2.2818139816607e-05, "loss": 0.1107, "step": 25230 }, { "epoch": 11.770055970149254, "grad_norm": 0.47961849183461575, "learning_rate": 2.2809653984420652e-05, "loss": 0.11, "step": 25235 }, { "epoch": 11.772388059701493, "grad_norm": 0.4580811650864949, "learning_rate": 2.280116884964383e-05, "loss": 0.11, "step": 25240 }, { "epoch": 11.774720149253731, "grad_norm": 0.4474933170423436, "learning_rate": 2.279268441353818e-05, "loss": 0.1124, "step": 25245 }, { "epoch": 11.77705223880597, "grad_norm": 0.4839443807701576, "learning_rate": 2.2784200677365242e-05, "loss": 0.1107, "step": 25250 }, { "epoch": 11.779384328358208, "grad_norm": 0.4729706994835885, "learning_rate": 2.2775717642386497e-05, "loss": 0.1129, "step": 25255 }, { "epoch": 11.781716417910447, "grad_norm": 0.4602490594415257, "learning_rate": 2.276723530986327e-05, "loss": 0.1106, "step": 25260 }, { "epoch": 11.784048507462687, "grad_norm": 0.48512309004028337, "learning_rate": 2.27587536810568e-05, "loss": 0.1136, "step": 25265 }, { "epoch": 11.786380597014926, "grad_norm": 0.4578220229155314, "learning_rate": 2.2750272757228235e-05, "loss": 0.1108, "step": 25270 }, { "epoch": 11.788712686567164, "grad_norm": 0.4640232095348435, "learning_rate": 2.27417925396386e-05, "loss": 0.1116, "step": 25275 }, { "epoch": 11.791044776119403, "grad_norm": 0.45154594377054347, "learning_rate": 2.273331302954883e-05, "loss": 0.1093, "step": 25280 }, { "epoch": 11.793376865671641, "grad_norm": 0.4645847953647543, "learning_rate": 2.2724834228219742e-05, "loss": 0.1105, "step": 25285 }, { "epoch": 11.79570895522388, "grad_norm": 0.46478927263054026, "learning_rate": 2.271635613691205e-05, "loss": 0.1084, "step": 25290 }, { "epoch": 11.79804104477612, "grad_norm": 0.47872881493178177, "learning_rate": 2.2707878756886368e-05, "loss": 0.1125, "step": 25295 }, { "epoch": 11.800373134328359, "grad_norm": 0.4626912127832839, "learning_rate": 2.26994020894032e-05, "loss": 0.1115, "step": 25300 }, { "epoch": 11.802705223880597, "grad_norm": 0.44729803750514874, "learning_rate": 2.2690926135722946e-05, "loss": 0.109, "step": 25305 }, { "epoch": 11.805037313432836, "grad_norm": 0.4574591347280043, "learning_rate": 2.2682450897105905e-05, "loss": 0.1102, "step": 25310 }, { "epoch": 11.807369402985074, "grad_norm": 0.4776759069109248, "learning_rate": 2.2673976374812246e-05, "loss": 0.1102, "step": 25315 }, { "epoch": 11.809701492537313, "grad_norm": 0.45759264709260344, "learning_rate": 2.266550257010207e-05, "loss": 0.1117, "step": 25320 }, { "epoch": 11.812033582089553, "grad_norm": 0.48748623518035433, "learning_rate": 2.265702948423534e-05, "loss": 0.1136, "step": 25325 }, { "epoch": 11.814365671641792, "grad_norm": 0.46413311602771085, "learning_rate": 2.2648557118471918e-05, "loss": 0.1137, "step": 25330 }, { "epoch": 11.81669776119403, "grad_norm": 0.48189895053804144, "learning_rate": 2.264008547407158e-05, "loss": 0.1162, "step": 25335 }, { "epoch": 11.819029850746269, "grad_norm": 0.4959341438824467, "learning_rate": 2.2631614552293963e-05, "loss": 0.1111, "step": 25340 }, { "epoch": 11.821361940298507, "grad_norm": 0.4738115124465132, "learning_rate": 2.2623144354398623e-05, "loss": 0.1143, "step": 25345 }, { "epoch": 11.823694029850746, "grad_norm": 0.4752528131404825, "learning_rate": 2.2614674881644974e-05, "loss": 0.1114, "step": 25350 }, { "epoch": 11.826026119402986, "grad_norm": 0.48169775112637314, "learning_rate": 2.2606206135292385e-05, "loss": 0.1136, "step": 25355 }, { "epoch": 11.828358208955224, "grad_norm": 0.48265918480676084, "learning_rate": 2.2597738116600048e-05, "loss": 0.1153, "step": 25360 }, { "epoch": 11.830690298507463, "grad_norm": 0.49016424660315944, "learning_rate": 2.2589270826827073e-05, "loss": 0.1125, "step": 25365 }, { "epoch": 11.833022388059701, "grad_norm": 0.46545687149504084, "learning_rate": 2.2580804267232484e-05, "loss": 0.1098, "step": 25370 }, { "epoch": 11.83535447761194, "grad_norm": 0.4572714111753946, "learning_rate": 2.257233843907517e-05, "loss": 0.1064, "step": 25375 }, { "epoch": 11.837686567164178, "grad_norm": 0.4676624894633709, "learning_rate": 2.2563873343613916e-05, "loss": 0.11, "step": 25380 }, { "epoch": 11.840018656716419, "grad_norm": 0.4829486799163735, "learning_rate": 2.2555408982107407e-05, "loss": 0.1147, "step": 25385 }, { "epoch": 11.842350746268657, "grad_norm": 0.45398706427265584, "learning_rate": 2.2546945355814196e-05, "loss": 0.1076, "step": 25390 }, { "epoch": 11.844682835820896, "grad_norm": 0.4793002222868968, "learning_rate": 2.2538482465992762e-05, "loss": 0.1134, "step": 25395 }, { "epoch": 11.847014925373134, "grad_norm": 0.4726470134818361, "learning_rate": 2.2530020313901446e-05, "loss": 0.1084, "step": 25400 }, { "epoch": 11.849347014925373, "grad_norm": 0.4917520676295972, "learning_rate": 2.2521558900798495e-05, "loss": 0.1105, "step": 25405 }, { "epoch": 11.851679104477611, "grad_norm": 0.4910255490009665, "learning_rate": 2.2513098227942032e-05, "loss": 0.1126, "step": 25410 }, { "epoch": 11.854011194029852, "grad_norm": 0.47368973052486335, "learning_rate": 2.250463829659008e-05, "loss": 0.1115, "step": 25415 }, { "epoch": 11.85634328358209, "grad_norm": 0.48476350688608866, "learning_rate": 2.249617910800056e-05, "loss": 0.1126, "step": 25420 }, { "epoch": 11.858675373134329, "grad_norm": 0.47641715739258184, "learning_rate": 2.248772066343125e-05, "loss": 0.1132, "step": 25425 }, { "epoch": 11.861007462686567, "grad_norm": 0.49429586713307094, "learning_rate": 2.2479262964139863e-05, "loss": 0.114, "step": 25430 }, { "epoch": 11.863339552238806, "grad_norm": 0.4548578229511978, "learning_rate": 2.2470806011383972e-05, "loss": 0.1078, "step": 25435 }, { "epoch": 11.865671641791044, "grad_norm": 0.46654667947303335, "learning_rate": 2.2462349806421035e-05, "loss": 0.1098, "step": 25440 }, { "epoch": 11.868003731343283, "grad_norm": 0.47068578699834, "learning_rate": 2.245389435050842e-05, "loss": 0.1123, "step": 25445 }, { "epoch": 11.870335820895523, "grad_norm": 0.4639554917685537, "learning_rate": 2.244543964490336e-05, "loss": 0.1111, "step": 25450 }, { "epoch": 11.872667910447761, "grad_norm": 0.45819900607110625, "learning_rate": 2.2436985690863004e-05, "loss": 0.1144, "step": 25455 }, { "epoch": 11.875, "grad_norm": 0.4742478760672834, "learning_rate": 2.2428532489644368e-05, "loss": 0.1112, "step": 25460 }, { "epoch": 11.877332089552239, "grad_norm": 0.4657395676739404, "learning_rate": 2.2420080042504348e-05, "loss": 0.11, "step": 25465 }, { "epoch": 11.879664179104477, "grad_norm": 0.46376764370077894, "learning_rate": 2.2411628350699766e-05, "loss": 0.112, "step": 25470 }, { "epoch": 11.881996268656717, "grad_norm": 0.46128163931450306, "learning_rate": 2.2403177415487285e-05, "loss": 0.1118, "step": 25475 }, { "epoch": 11.884328358208956, "grad_norm": 0.4574151518941943, "learning_rate": 2.2394727238123497e-05, "loss": 0.1117, "step": 25480 }, { "epoch": 11.886660447761194, "grad_norm": 0.4623090146727044, "learning_rate": 2.2386277819864853e-05, "loss": 0.111, "step": 25485 }, { "epoch": 11.888992537313433, "grad_norm": 0.4832602180482283, "learning_rate": 2.23778291619677e-05, "loss": 0.1156, "step": 25490 }, { "epoch": 11.891324626865671, "grad_norm": 0.47929135253861627, "learning_rate": 2.2369381265688277e-05, "loss": 0.1114, "step": 25495 }, { "epoch": 11.89365671641791, "grad_norm": 0.4852924743238566, "learning_rate": 2.236093413228269e-05, "loss": 0.1112, "step": 25500 }, { "epoch": 11.895988805970148, "grad_norm": 0.48191370560927704, "learning_rate": 2.2352487763006975e-05, "loss": 0.1128, "step": 25505 }, { "epoch": 11.898320895522389, "grad_norm": 0.4727111053513991, "learning_rate": 2.2344042159117006e-05, "loss": 0.1132, "step": 25510 }, { "epoch": 11.900652985074627, "grad_norm": 0.4674939187901427, "learning_rate": 2.2335597321868568e-05, "loss": 0.115, "step": 25515 }, { "epoch": 11.902985074626866, "grad_norm": 0.46187120701897455, "learning_rate": 2.2327153252517323e-05, "loss": 0.1083, "step": 25520 }, { "epoch": 11.905317164179104, "grad_norm": 0.4764720003884542, "learning_rate": 2.2318709952318822e-05, "loss": 0.1111, "step": 25525 }, { "epoch": 11.907649253731343, "grad_norm": 0.4690304003592535, "learning_rate": 2.2310267422528523e-05, "loss": 0.1119, "step": 25530 }, { "epoch": 11.909981343283581, "grad_norm": 0.470853681137574, "learning_rate": 2.2301825664401733e-05, "loss": 0.1114, "step": 25535 }, { "epoch": 11.912313432835822, "grad_norm": 0.4692623095667456, "learning_rate": 2.2293384679193645e-05, "loss": 0.1095, "step": 25540 }, { "epoch": 11.91464552238806, "grad_norm": 0.4880829890999912, "learning_rate": 2.228494446815939e-05, "loss": 0.1112, "step": 25545 }, { "epoch": 11.916977611940299, "grad_norm": 0.4586350813572882, "learning_rate": 2.2276505032553912e-05, "loss": 0.1113, "step": 25550 }, { "epoch": 11.919309701492537, "grad_norm": 0.45592959768694097, "learning_rate": 2.2268066373632096e-05, "loss": 0.1115, "step": 25555 }, { "epoch": 11.921641791044776, "grad_norm": 0.4920831993284916, "learning_rate": 2.2259628492648676e-05, "loss": 0.1126, "step": 25560 }, { "epoch": 11.923973880597014, "grad_norm": 0.4717455942780976, "learning_rate": 2.2251191390858295e-05, "loss": 0.1118, "step": 25565 }, { "epoch": 11.926305970149254, "grad_norm": 0.4894178316206465, "learning_rate": 2.224275506951547e-05, "loss": 0.1149, "step": 25570 }, { "epoch": 11.928638059701493, "grad_norm": 0.4610737139018976, "learning_rate": 2.2234319529874586e-05, "loss": 0.1065, "step": 25575 }, { "epoch": 11.930970149253731, "grad_norm": 0.4798539025609699, "learning_rate": 2.2225884773189936e-05, "loss": 0.1105, "step": 25580 }, { "epoch": 11.93330223880597, "grad_norm": 0.4766974596211768, "learning_rate": 2.221745080071569e-05, "loss": 0.116, "step": 25585 }, { "epoch": 11.935634328358208, "grad_norm": 0.48368213509943764, "learning_rate": 2.2209017613705908e-05, "loss": 0.112, "step": 25590 }, { "epoch": 11.937966417910447, "grad_norm": 0.4687142228326237, "learning_rate": 2.22005852134145e-05, "loss": 0.1133, "step": 25595 }, { "epoch": 11.940298507462687, "grad_norm": 0.4715219068589985, "learning_rate": 2.2192153601095293e-05, "loss": 0.111, "step": 25600 }, { "epoch": 11.942630597014926, "grad_norm": 0.479576569294674, "learning_rate": 2.2183722778002004e-05, "loss": 0.11, "step": 25605 }, { "epoch": 11.944962686567164, "grad_norm": 0.4843101700616122, "learning_rate": 2.2175292745388186e-05, "loss": 0.1142, "step": 25610 }, { "epoch": 11.947294776119403, "grad_norm": 0.4798730939667242, "learning_rate": 2.2166863504507336e-05, "loss": 0.1144, "step": 25615 }, { "epoch": 11.949626865671641, "grad_norm": 0.4508975669882925, "learning_rate": 2.2158435056612775e-05, "loss": 0.1091, "step": 25620 }, { "epoch": 11.95195895522388, "grad_norm": 0.4605736058669018, "learning_rate": 2.215000740295774e-05, "loss": 0.1128, "step": 25625 }, { "epoch": 11.95429104477612, "grad_norm": 0.47077182115819727, "learning_rate": 2.2141580544795353e-05, "loss": 0.1127, "step": 25630 }, { "epoch": 11.956623134328359, "grad_norm": 0.4780620541312793, "learning_rate": 2.2133154483378587e-05, "loss": 0.1138, "step": 25635 }, { "epoch": 11.958955223880597, "grad_norm": 0.46350709455889866, "learning_rate": 2.2124729219960343e-05, "loss": 0.1122, "step": 25640 }, { "epoch": 11.961287313432836, "grad_norm": 0.4842524962552419, "learning_rate": 2.211630475579336e-05, "loss": 0.1115, "step": 25645 }, { "epoch": 11.963619402985074, "grad_norm": 0.4659487658926835, "learning_rate": 2.2107881092130266e-05, "loss": 0.1118, "step": 25650 }, { "epoch": 11.965951492537313, "grad_norm": 0.4607289037472573, "learning_rate": 2.209945823022361e-05, "loss": 0.1095, "step": 25655 }, { "epoch": 11.968283582089553, "grad_norm": 0.47113047177252615, "learning_rate": 2.2091036171325754e-05, "loss": 0.1105, "step": 25660 }, { "epoch": 11.970615671641792, "grad_norm": 0.45703528916808595, "learning_rate": 2.2082614916689002e-05, "loss": 0.1125, "step": 25665 }, { "epoch": 11.97294776119403, "grad_norm": 0.4717239000854319, "learning_rate": 2.2074194467565514e-05, "loss": 0.1119, "step": 25670 }, { "epoch": 11.975279850746269, "grad_norm": 0.4803217621381574, "learning_rate": 2.2065774825207304e-05, "loss": 0.1146, "step": 25675 }, { "epoch": 11.977611940298507, "grad_norm": 0.47506710153444937, "learning_rate": 2.2057355990866328e-05, "loss": 0.1114, "step": 25680 }, { "epoch": 11.979944029850746, "grad_norm": 0.47947065881714096, "learning_rate": 2.204893796579436e-05, "loss": 0.112, "step": 25685 }, { "epoch": 11.982276119402986, "grad_norm": 0.48260999967174417, "learning_rate": 2.2040520751243094e-05, "loss": 0.1134, "step": 25690 }, { "epoch": 11.984608208955224, "grad_norm": 0.4666160711524732, "learning_rate": 2.2032104348464082e-05, "loss": 0.1131, "step": 25695 }, { "epoch": 11.986940298507463, "grad_norm": 0.4795692067172651, "learning_rate": 2.2023688758708767e-05, "loss": 0.1157, "step": 25700 }, { "epoch": 11.989272388059701, "grad_norm": 0.47420155913851947, "learning_rate": 2.201527398322846e-05, "loss": 0.1144, "step": 25705 }, { "epoch": 11.99160447761194, "grad_norm": 0.4935165205493315, "learning_rate": 2.2006860023274363e-05, "loss": 0.1139, "step": 25710 }, { "epoch": 11.993936567164178, "grad_norm": 0.4716415738521498, "learning_rate": 2.199844688009755e-05, "loss": 0.1112, "step": 25715 }, { "epoch": 11.996268656716419, "grad_norm": 0.4493728448411317, "learning_rate": 2.199003455494898e-05, "loss": 0.1114, "step": 25720 }, { "epoch": 11.998600746268657, "grad_norm": 0.4822121861529902, "learning_rate": 2.198162304907947e-05, "loss": 0.1179, "step": 25725 }, { "epoch": 12.000932835820896, "grad_norm": 0.4001559151032121, "learning_rate": 2.1973212363739747e-05, "loss": 0.0985, "step": 25730 }, { "epoch": 12.003264925373134, "grad_norm": 0.47449674881396753, "learning_rate": 2.1964802500180388e-05, "loss": 0.0694, "step": 25735 }, { "epoch": 12.005597014925373, "grad_norm": 0.41889051272462496, "learning_rate": 2.1956393459651864e-05, "loss": 0.069, "step": 25740 }, { "epoch": 12.007929104477611, "grad_norm": 0.4266260451650987, "learning_rate": 2.1947985243404522e-05, "loss": 0.0697, "step": 25745 }, { "epoch": 12.010261194029852, "grad_norm": 0.41219622134817546, "learning_rate": 2.1939577852688576e-05, "loss": 0.0712, "step": 25750 }, { "epoch": 12.01259328358209, "grad_norm": 0.41537365143912586, "learning_rate": 2.1931171288754133e-05, "loss": 0.0657, "step": 25755 }, { "epoch": 12.014925373134329, "grad_norm": 0.4128002122368376, "learning_rate": 2.1922765552851155e-05, "loss": 0.0673, "step": 25760 }, { "epoch": 12.017257462686567, "grad_norm": 0.4236072838999586, "learning_rate": 2.1914360646229508e-05, "loss": 0.0658, "step": 25765 }, { "epoch": 12.019589552238806, "grad_norm": 0.37940685478838293, "learning_rate": 2.190595657013892e-05, "loss": 0.0668, "step": 25770 }, { "epoch": 12.021921641791044, "grad_norm": 0.40586267364306794, "learning_rate": 2.1897553325828984e-05, "loss": 0.0658, "step": 25775 }, { "epoch": 12.024253731343284, "grad_norm": 0.4056541183134531, "learning_rate": 2.1889150914549195e-05, "loss": 0.0694, "step": 25780 }, { "epoch": 12.026585820895523, "grad_norm": 0.3858822370293439, "learning_rate": 2.18807493375489e-05, "loss": 0.0662, "step": 25785 }, { "epoch": 12.028917910447761, "grad_norm": 0.4139678179137175, "learning_rate": 2.1872348596077348e-05, "loss": 0.0679, "step": 25790 }, { "epoch": 12.03125, "grad_norm": 0.3895755243781418, "learning_rate": 2.186394869138364e-05, "loss": 0.0653, "step": 25795 }, { "epoch": 12.033582089552239, "grad_norm": 0.4250634926700904, "learning_rate": 2.1855549624716755e-05, "loss": 0.0657, "step": 25800 }, { "epoch": 12.035914179104477, "grad_norm": 0.4047531610440189, "learning_rate": 2.1847151397325567e-05, "loss": 0.067, "step": 25805 }, { "epoch": 12.038246268656716, "grad_norm": 0.41623852264015065, "learning_rate": 2.1838754010458796e-05, "loss": 0.0687, "step": 25810 }, { "epoch": 12.040578358208956, "grad_norm": 0.41474679085126537, "learning_rate": 2.183035746536507e-05, "loss": 0.0703, "step": 25815 }, { "epoch": 12.042910447761194, "grad_norm": 0.4112187017371647, "learning_rate": 2.182196176329287e-05, "loss": 0.0683, "step": 25820 }, { "epoch": 12.045242537313433, "grad_norm": 0.41292425877687583, "learning_rate": 2.181356690549054e-05, "loss": 0.0682, "step": 25825 }, { "epoch": 12.047574626865671, "grad_norm": 0.42165061384082403, "learning_rate": 2.1805172893206342e-05, "loss": 0.0655, "step": 25830 }, { "epoch": 12.04990671641791, "grad_norm": 0.4196250605360094, "learning_rate": 2.179677972768836e-05, "loss": 0.0676, "step": 25835 }, { "epoch": 12.052238805970148, "grad_norm": 0.4242658128836897, "learning_rate": 2.1788387410184603e-05, "loss": 0.0701, "step": 25840 }, { "epoch": 12.054570895522389, "grad_norm": 0.4165420119368905, "learning_rate": 2.177999594194291e-05, "loss": 0.0668, "step": 25845 }, { "epoch": 12.056902985074627, "grad_norm": 0.3923030369869099, "learning_rate": 2.177160532421101e-05, "loss": 0.0669, "step": 25850 }, { "epoch": 12.059235074626866, "grad_norm": 0.42524038648514584, "learning_rate": 2.1763215558236515e-05, "loss": 0.069, "step": 25855 }, { "epoch": 12.061567164179104, "grad_norm": 0.4197184562800357, "learning_rate": 2.1754826645266895e-05, "loss": 0.0692, "step": 25860 }, { "epoch": 12.063899253731343, "grad_norm": 0.41685208249624767, "learning_rate": 2.1746438586549516e-05, "loss": 0.0678, "step": 25865 }, { "epoch": 12.066231343283581, "grad_norm": 0.423075715654552, "learning_rate": 2.1738051383331598e-05, "loss": 0.0663, "step": 25870 }, { "epoch": 12.068563432835822, "grad_norm": 0.3890966862182446, "learning_rate": 2.1729665036860225e-05, "loss": 0.0648, "step": 25875 }, { "epoch": 12.07089552238806, "grad_norm": 0.39959679097073, "learning_rate": 2.172127954838238e-05, "loss": 0.0656, "step": 25880 }, { "epoch": 12.073227611940299, "grad_norm": 0.42725512822165385, "learning_rate": 2.1712894919144888e-05, "loss": 0.0686, "step": 25885 }, { "epoch": 12.075559701492537, "grad_norm": 0.39052350409051734, "learning_rate": 2.1704511150394486e-05, "loss": 0.0647, "step": 25890 }, { "epoch": 12.077891791044776, "grad_norm": 0.4130540601718914, "learning_rate": 2.1696128243377743e-05, "loss": 0.0684, "step": 25895 }, { "epoch": 12.080223880597014, "grad_norm": 0.3943262735273733, "learning_rate": 2.1687746199341118e-05, "loss": 0.0689, "step": 25900 }, { "epoch": 12.082555970149254, "grad_norm": 0.4287077596595933, "learning_rate": 2.1679365019530956e-05, "loss": 0.0686, "step": 25905 }, { "epoch": 12.084888059701493, "grad_norm": 0.439420339651706, "learning_rate": 2.167098470519344e-05, "loss": 0.0678, "step": 25910 }, { "epoch": 12.087220149253731, "grad_norm": 0.40885731822492716, "learning_rate": 2.1662605257574647e-05, "loss": 0.0688, "step": 25915 }, { "epoch": 12.08955223880597, "grad_norm": 0.4216958459992359, "learning_rate": 2.165422667792053e-05, "loss": 0.0684, "step": 25920 }, { "epoch": 12.091884328358208, "grad_norm": 0.4238740132054451, "learning_rate": 2.1645848967476895e-05, "loss": 0.0681, "step": 25925 }, { "epoch": 12.094216417910447, "grad_norm": 0.4381813053733037, "learning_rate": 2.1637472127489427e-05, "loss": 0.0683, "step": 25930 }, { "epoch": 12.096548507462687, "grad_norm": 0.4364325313403454, "learning_rate": 2.1629096159203683e-05, "loss": 0.0691, "step": 25935 }, { "epoch": 12.098880597014926, "grad_norm": 0.40778382188980095, "learning_rate": 2.162072106386509e-05, "loss": 0.0675, "step": 25940 }, { "epoch": 12.101212686567164, "grad_norm": 0.43001271159184334, "learning_rate": 2.161234684271895e-05, "loss": 0.0693, "step": 25945 }, { "epoch": 12.103544776119403, "grad_norm": 0.4223810299719113, "learning_rate": 2.1603973497010417e-05, "loss": 0.069, "step": 25950 }, { "epoch": 12.105876865671641, "grad_norm": 0.4144735455612087, "learning_rate": 2.1595601027984535e-05, "loss": 0.0711, "step": 25955 }, { "epoch": 12.10820895522388, "grad_norm": 0.42502268936905524, "learning_rate": 2.158722943688621e-05, "loss": 0.068, "step": 25960 }, { "epoch": 12.11054104477612, "grad_norm": 0.4409980127441515, "learning_rate": 2.1578858724960215e-05, "loss": 0.07, "step": 25965 }, { "epoch": 12.112873134328359, "grad_norm": 0.4213309522937229, "learning_rate": 2.1570488893451203e-05, "loss": 0.0711, "step": 25970 }, { "epoch": 12.115205223880597, "grad_norm": 0.42078834157956957, "learning_rate": 2.1562119943603672e-05, "loss": 0.0694, "step": 25975 }, { "epoch": 12.117537313432836, "grad_norm": 0.48886982416087943, "learning_rate": 2.1553751876662014e-05, "loss": 0.0696, "step": 25980 }, { "epoch": 12.119869402985074, "grad_norm": 0.4389836151688408, "learning_rate": 2.154538469387048e-05, "loss": 0.0713, "step": 25985 }, { "epoch": 12.122201492537313, "grad_norm": 0.45102013736327173, "learning_rate": 2.1537018396473195e-05, "loss": 0.0703, "step": 25990 }, { "epoch": 12.124533582089553, "grad_norm": 0.41850393870332814, "learning_rate": 2.1528652985714143e-05, "loss": 0.0683, "step": 25995 }, { "epoch": 12.126865671641792, "grad_norm": 0.4204898960677714, "learning_rate": 2.1520288462837175e-05, "loss": 0.0708, "step": 26000 }, { "epoch": 12.12919776119403, "grad_norm": 0.42328243772240093, "learning_rate": 2.1511924829086015e-05, "loss": 0.0691, "step": 26005 }, { "epoch": 12.131529850746269, "grad_norm": 0.4407720887909399, "learning_rate": 2.1503562085704265e-05, "loss": 0.07, "step": 26010 }, { "epoch": 12.133861940298507, "grad_norm": 0.4238942130084169, "learning_rate": 2.149520023393538e-05, "loss": 0.0698, "step": 26015 }, { "epoch": 12.136194029850746, "grad_norm": 0.41175173337674476, "learning_rate": 2.148683927502269e-05, "loss": 0.0694, "step": 26020 }, { "epoch": 12.138526119402986, "grad_norm": 0.4129210798626654, "learning_rate": 2.1478479210209383e-05, "loss": 0.0673, "step": 26025 }, { "epoch": 12.140858208955224, "grad_norm": 0.44253972682716763, "learning_rate": 2.147012004073853e-05, "loss": 0.0699, "step": 26030 }, { "epoch": 12.143190298507463, "grad_norm": 0.43434312183044044, "learning_rate": 2.1461761767853038e-05, "loss": 0.0699, "step": 26035 }, { "epoch": 12.145522388059701, "grad_norm": 0.4316204177062014, "learning_rate": 2.1453404392795735e-05, "loss": 0.07, "step": 26040 }, { "epoch": 12.14785447761194, "grad_norm": 0.4193064915200322, "learning_rate": 2.1445047916809262e-05, "loss": 0.0692, "step": 26045 }, { "epoch": 12.150186567164178, "grad_norm": 0.43758925682792166, "learning_rate": 2.143669234113614e-05, "loss": 0.0706, "step": 26050 }, { "epoch": 12.152518656716419, "grad_norm": 0.43454094124318227, "learning_rate": 2.1428337667018782e-05, "loss": 0.0666, "step": 26055 }, { "epoch": 12.154850746268657, "grad_norm": 0.4565587691606919, "learning_rate": 2.1419983895699437e-05, "loss": 0.0715, "step": 26060 }, { "epoch": 12.157182835820896, "grad_norm": 0.43060175801437567, "learning_rate": 2.141163102842023e-05, "loss": 0.0681, "step": 26065 }, { "epoch": 12.159514925373134, "grad_norm": 0.43382240065195454, "learning_rate": 2.1403279066423166e-05, "loss": 0.0661, "step": 26070 }, { "epoch": 12.161847014925373, "grad_norm": 0.4585976175927166, "learning_rate": 2.1394928010950077e-05, "loss": 0.0705, "step": 26075 }, { "epoch": 12.164179104477611, "grad_norm": 0.4381030134799799, "learning_rate": 2.1386577863242708e-05, "loss": 0.0703, "step": 26080 }, { "epoch": 12.166511194029852, "grad_norm": 0.4259104016335515, "learning_rate": 2.1378228624542628e-05, "loss": 0.07, "step": 26085 }, { "epoch": 12.16884328358209, "grad_norm": 0.4469508048335161, "learning_rate": 2.136988029609131e-05, "loss": 0.0714, "step": 26090 }, { "epoch": 12.171175373134329, "grad_norm": 0.4710147647822976, "learning_rate": 2.1361532879130058e-05, "loss": 0.0731, "step": 26095 }, { "epoch": 12.173507462686567, "grad_norm": 0.445021580240508, "learning_rate": 2.135318637490004e-05, "loss": 0.0724, "step": 26100 }, { "epoch": 12.175839552238806, "grad_norm": 0.42504645329758, "learning_rate": 2.1344840784642322e-05, "loss": 0.073, "step": 26105 }, { "epoch": 12.178171641791044, "grad_norm": 0.42082185823188056, "learning_rate": 2.1336496109597804e-05, "loss": 0.0694, "step": 26110 }, { "epoch": 12.180503731343283, "grad_norm": 0.464147778696169, "learning_rate": 2.1328152351007264e-05, "loss": 0.0698, "step": 26115 }, { "epoch": 12.182835820895523, "grad_norm": 0.4420415888045598, "learning_rate": 2.131980951011134e-05, "loss": 0.0718, "step": 26120 }, { "epoch": 12.185167910447761, "grad_norm": 0.4311697077693, "learning_rate": 2.1311467588150518e-05, "loss": 0.0739, "step": 26125 }, { "epoch": 12.1875, "grad_norm": 0.448289242002901, "learning_rate": 2.1303126586365175e-05, "loss": 0.0696, "step": 26130 }, { "epoch": 12.189832089552239, "grad_norm": 0.42328523523591727, "learning_rate": 2.1294786505995534e-05, "loss": 0.0696, "step": 26135 }, { "epoch": 12.192164179104477, "grad_norm": 0.4243392229458906, "learning_rate": 2.1286447348281695e-05, "loss": 0.0711, "step": 26140 }, { "epoch": 12.194496268656717, "grad_norm": 0.44416742713846435, "learning_rate": 2.1278109114463594e-05, "loss": 0.071, "step": 26145 }, { "epoch": 12.196828358208956, "grad_norm": 0.4464203164739769, "learning_rate": 2.126977180578106e-05, "loss": 0.0713, "step": 26150 }, { "epoch": 12.199160447761194, "grad_norm": 0.42943966346346807, "learning_rate": 2.1261435423473765e-05, "loss": 0.0719, "step": 26155 }, { "epoch": 12.201492537313433, "grad_norm": 0.45261247667368454, "learning_rate": 2.1253099968781237e-05, "loss": 0.0747, "step": 26160 }, { "epoch": 12.203824626865671, "grad_norm": 0.43341532203477223, "learning_rate": 2.1244765442942904e-05, "loss": 0.0703, "step": 26165 }, { "epoch": 12.20615671641791, "grad_norm": 0.44983176449593554, "learning_rate": 2.1236431847198017e-05, "loss": 0.0711, "step": 26170 }, { "epoch": 12.208488805970148, "grad_norm": 0.4213774403171939, "learning_rate": 2.1228099182785693e-05, "loss": 0.0707, "step": 26175 }, { "epoch": 12.210820895522389, "grad_norm": 0.44497295877131054, "learning_rate": 2.1219767450944938e-05, "loss": 0.0743, "step": 26180 }, { "epoch": 12.213152985074627, "grad_norm": 0.4465808531303172, "learning_rate": 2.1211436652914585e-05, "loss": 0.0727, "step": 26185 }, { "epoch": 12.215485074626866, "grad_norm": 0.44631408902554864, "learning_rate": 2.1203106789933352e-05, "loss": 0.0712, "step": 26190 }, { "epoch": 12.217817164179104, "grad_norm": 0.4399484428353346, "learning_rate": 2.119477786323981e-05, "loss": 0.072, "step": 26195 }, { "epoch": 12.220149253731343, "grad_norm": 0.44477632780720483, "learning_rate": 2.1186449874072385e-05, "loss": 0.0744, "step": 26200 }, { "epoch": 12.222481343283581, "grad_norm": 0.4523526902366323, "learning_rate": 2.1178122823669373e-05, "loss": 0.0708, "step": 26205 }, { "epoch": 12.224813432835822, "grad_norm": 0.428180695483211, "learning_rate": 2.116979671326892e-05, "loss": 0.0707, "step": 26210 }, { "epoch": 12.22714552238806, "grad_norm": 0.4234327826153676, "learning_rate": 2.1161471544109057e-05, "loss": 0.0725, "step": 26215 }, { "epoch": 12.229477611940299, "grad_norm": 0.4530861064035394, "learning_rate": 2.115314731742764e-05, "loss": 0.0729, "step": 26220 }, { "epoch": 12.231809701492537, "grad_norm": 0.4505837264268026, "learning_rate": 2.1144824034462403e-05, "loss": 0.0721, "step": 26225 }, { "epoch": 12.234141791044776, "grad_norm": 0.44906484034866034, "learning_rate": 2.1136501696450943e-05, "loss": 0.0709, "step": 26230 }, { "epoch": 12.236473880597014, "grad_norm": 0.4102052450928335, "learning_rate": 2.112818030463071e-05, "loss": 0.0703, "step": 26235 }, { "epoch": 12.238805970149254, "grad_norm": 0.443398705878065, "learning_rate": 2.1119859860239023e-05, "loss": 0.0734, "step": 26240 }, { "epoch": 12.241138059701493, "grad_norm": 0.44502743338770173, "learning_rate": 2.1111540364513045e-05, "loss": 0.0719, "step": 26245 }, { "epoch": 12.243470149253731, "grad_norm": 0.433503626420238, "learning_rate": 2.1103221818689794e-05, "loss": 0.0708, "step": 26250 }, { "epoch": 12.24580223880597, "grad_norm": 0.4207559527824806, "learning_rate": 2.1094904224006185e-05, "loss": 0.0708, "step": 26255 }, { "epoch": 12.248134328358208, "grad_norm": 0.43998454805358905, "learning_rate": 2.108658758169893e-05, "loss": 0.0712, "step": 26260 }, { "epoch": 12.250466417910447, "grad_norm": 0.43363444806489937, "learning_rate": 2.107827189300467e-05, "loss": 0.0741, "step": 26265 }, { "epoch": 12.252798507462687, "grad_norm": 0.4331871248793562, "learning_rate": 2.1069957159159848e-05, "loss": 0.0707, "step": 26270 }, { "epoch": 12.255130597014926, "grad_norm": 0.42666447614676123, "learning_rate": 2.1061643381400785e-05, "loss": 0.0713, "step": 26275 }, { "epoch": 12.257462686567164, "grad_norm": 0.459216480095238, "learning_rate": 2.105333056096367e-05, "loss": 0.0734, "step": 26280 }, { "epoch": 12.259794776119403, "grad_norm": 0.43730174536563654, "learning_rate": 2.104501869908453e-05, "loss": 0.073, "step": 26285 }, { "epoch": 12.262126865671641, "grad_norm": 0.4505567672895469, "learning_rate": 2.1036707796999267e-05, "loss": 0.0724, "step": 26290 }, { "epoch": 12.26445895522388, "grad_norm": 0.42641312866982506, "learning_rate": 2.102839785594362e-05, "loss": 0.0718, "step": 26295 }, { "epoch": 12.26679104477612, "grad_norm": 0.44893917358870505, "learning_rate": 2.1020088877153215e-05, "loss": 0.0755, "step": 26300 }, { "epoch": 12.269123134328359, "grad_norm": 0.4352166671985724, "learning_rate": 2.1011780861863504e-05, "loss": 0.0734, "step": 26305 }, { "epoch": 12.271455223880597, "grad_norm": 0.4448711323776344, "learning_rate": 2.100347381130982e-05, "loss": 0.0724, "step": 26310 }, { "epoch": 12.273787313432836, "grad_norm": 0.4678280104164203, "learning_rate": 2.099516772672733e-05, "loss": 0.0731, "step": 26315 }, { "epoch": 12.276119402985074, "grad_norm": 0.45382031677405443, "learning_rate": 2.0986862609351077e-05, "loss": 0.0721, "step": 26320 }, { "epoch": 12.278451492537313, "grad_norm": 0.4511514063682234, "learning_rate": 2.0978558460415954e-05, "loss": 0.0701, "step": 26325 }, { "epoch": 12.280783582089553, "grad_norm": 0.44075233876261516, "learning_rate": 2.09702552811567e-05, "loss": 0.0728, "step": 26330 }, { "epoch": 12.283115671641792, "grad_norm": 0.47066641193204956, "learning_rate": 2.096195307280792e-05, "loss": 0.0748, "step": 26335 }, { "epoch": 12.28544776119403, "grad_norm": 0.4303073488816093, "learning_rate": 2.0953651836604083e-05, "loss": 0.0728, "step": 26340 }, { "epoch": 12.287779850746269, "grad_norm": 0.44833945792661367, "learning_rate": 2.094535157377949e-05, "loss": 0.0744, "step": 26345 }, { "epoch": 12.290111940298507, "grad_norm": 0.44949897595475125, "learning_rate": 2.093705228556832e-05, "loss": 0.0731, "step": 26350 }, { "epoch": 12.292444029850746, "grad_norm": 0.4543327093192926, "learning_rate": 2.0928753973204597e-05, "loss": 0.0722, "step": 26355 }, { "epoch": 12.294776119402986, "grad_norm": 0.45004848096225875, "learning_rate": 2.0920456637922194e-05, "loss": 0.0729, "step": 26360 }, { "epoch": 12.297108208955224, "grad_norm": 0.4396705341230525, "learning_rate": 2.0912160280954852e-05, "loss": 0.0723, "step": 26365 }, { "epoch": 12.299440298507463, "grad_norm": 0.44427623718499587, "learning_rate": 2.0903864903536147e-05, "loss": 0.074, "step": 26370 }, { "epoch": 12.301772388059701, "grad_norm": 0.47819547869232015, "learning_rate": 2.0895570506899544e-05, "loss": 0.0737, "step": 26375 }, { "epoch": 12.30410447761194, "grad_norm": 0.4788295387501211, "learning_rate": 2.088727709227833e-05, "loss": 0.0727, "step": 26380 }, { "epoch": 12.306436567164178, "grad_norm": 0.4690176824477929, "learning_rate": 2.0878984660905644e-05, "loss": 0.0748, "step": 26385 }, { "epoch": 12.308768656716419, "grad_norm": 0.4583516521158871, "learning_rate": 2.087069321401451e-05, "loss": 0.0739, "step": 26390 }, { "epoch": 12.311100746268657, "grad_norm": 0.462240368291497, "learning_rate": 2.0862402752837768e-05, "loss": 0.0753, "step": 26395 }, { "epoch": 12.313432835820896, "grad_norm": 0.46100227092033724, "learning_rate": 2.085411327860815e-05, "loss": 0.0757, "step": 26400 }, { "epoch": 12.315764925373134, "grad_norm": 0.4512351005157021, "learning_rate": 2.084582479255821e-05, "loss": 0.0708, "step": 26405 }, { "epoch": 12.318097014925373, "grad_norm": 0.43678267378333774, "learning_rate": 2.083753729592037e-05, "loss": 0.0726, "step": 26410 }, { "epoch": 12.320429104477611, "grad_norm": 0.44961425983852094, "learning_rate": 2.0829250789926898e-05, "loss": 0.073, "step": 26415 }, { "epoch": 12.322761194029852, "grad_norm": 0.45414144146527746, "learning_rate": 2.0820965275809913e-05, "loss": 0.0727, "step": 26420 }, { "epoch": 12.32509328358209, "grad_norm": 0.4459359895972579, "learning_rate": 2.081268075480141e-05, "loss": 0.0736, "step": 26425 }, { "epoch": 12.327425373134329, "grad_norm": 0.4629672399681973, "learning_rate": 2.0804397228133205e-05, "loss": 0.0732, "step": 26430 }, { "epoch": 12.329757462686567, "grad_norm": 0.45617569880782144, "learning_rate": 2.0796114697036972e-05, "loss": 0.0764, "step": 26435 }, { "epoch": 12.332089552238806, "grad_norm": 0.4449406654433593, "learning_rate": 2.0787833162744257e-05, "loss": 0.0726, "step": 26440 }, { "epoch": 12.334421641791044, "grad_norm": 0.4571773913973807, "learning_rate": 2.0779552626486444e-05, "loss": 0.0732, "step": 26445 }, { "epoch": 12.336753731343283, "grad_norm": 0.4518648831202513, "learning_rate": 2.077127308949476e-05, "loss": 0.0752, "step": 26450 }, { "epoch": 12.339085820895523, "grad_norm": 0.4413053149125824, "learning_rate": 2.0762994553000304e-05, "loss": 0.0721, "step": 26455 }, { "epoch": 12.341417910447761, "grad_norm": 0.4681907423212594, "learning_rate": 2.0754717018234003e-05, "loss": 0.0718, "step": 26460 }, { "epoch": 12.34375, "grad_norm": 0.4460484105662918, "learning_rate": 2.074644048642666e-05, "loss": 0.0735, "step": 26465 }, { "epoch": 12.346082089552239, "grad_norm": 0.4524962371800169, "learning_rate": 2.0738164958808905e-05, "loss": 0.0761, "step": 26470 }, { "epoch": 12.348414179104477, "grad_norm": 0.4692113335205492, "learning_rate": 2.072989043661124e-05, "loss": 0.076, "step": 26475 }, { "epoch": 12.350746268656717, "grad_norm": 0.43899896999158805, "learning_rate": 2.072161692106399e-05, "loss": 0.0748, "step": 26480 }, { "epoch": 12.353078358208956, "grad_norm": 0.44151241488045034, "learning_rate": 2.0713344413397368e-05, "loss": 0.0745, "step": 26485 }, { "epoch": 12.355410447761194, "grad_norm": 0.4753262887534398, "learning_rate": 2.0705072914841407e-05, "loss": 0.075, "step": 26490 }, { "epoch": 12.357742537313433, "grad_norm": 0.4474841194219162, "learning_rate": 2.0696802426625993e-05, "loss": 0.074, "step": 26495 }, { "epoch": 12.360074626865671, "grad_norm": 0.4303133175085121, "learning_rate": 2.0688532949980882e-05, "loss": 0.0706, "step": 26500 }, { "epoch": 12.36240671641791, "grad_norm": 0.4545044237911892, "learning_rate": 2.0680264486135665e-05, "loss": 0.0739, "step": 26505 }, { "epoch": 12.364738805970148, "grad_norm": 0.44169380896410887, "learning_rate": 2.0671997036319763e-05, "loss": 0.0724, "step": 26510 }, { "epoch": 12.367070895522389, "grad_norm": 0.4483662461312245, "learning_rate": 2.0663730601762494e-05, "loss": 0.0761, "step": 26515 }, { "epoch": 12.369402985074627, "grad_norm": 0.4571754596863445, "learning_rate": 2.0655465183692972e-05, "loss": 0.0738, "step": 26520 }, { "epoch": 12.371735074626866, "grad_norm": 0.4651939303730309, "learning_rate": 2.0647200783340214e-05, "loss": 0.0761, "step": 26525 }, { "epoch": 12.374067164179104, "grad_norm": 0.4537094091057783, "learning_rate": 2.063893740193304e-05, "loss": 0.076, "step": 26530 }, { "epoch": 12.376399253731343, "grad_norm": 0.44344145750367503, "learning_rate": 2.063067504070012e-05, "loss": 0.0751, "step": 26535 }, { "epoch": 12.378731343283581, "grad_norm": 0.45946814671387876, "learning_rate": 2.0622413700870026e-05, "loss": 0.0752, "step": 26540 }, { "epoch": 12.381063432835822, "grad_norm": 0.46231840243297856, "learning_rate": 2.0614153383671103e-05, "loss": 0.0761, "step": 26545 }, { "epoch": 12.38339552238806, "grad_norm": 0.4428889108258969, "learning_rate": 2.0605894090331607e-05, "loss": 0.0776, "step": 26550 }, { "epoch": 12.385727611940299, "grad_norm": 0.482823290974215, "learning_rate": 2.0597635822079607e-05, "loss": 0.0778, "step": 26555 }, { "epoch": 12.388059701492537, "grad_norm": 0.472328056146228, "learning_rate": 2.0589378580143016e-05, "loss": 0.0761, "step": 26560 }, { "epoch": 12.390391791044776, "grad_norm": 0.45947620192911187, "learning_rate": 2.058112236574963e-05, "loss": 0.0763, "step": 26565 }, { "epoch": 12.392723880597014, "grad_norm": 0.44023086939024925, "learning_rate": 2.057286718012705e-05, "loss": 0.0711, "step": 26570 }, { "epoch": 12.395055970149254, "grad_norm": 0.4556484113205283, "learning_rate": 2.0564613024502754e-05, "loss": 0.0734, "step": 26575 }, { "epoch": 12.397388059701493, "grad_norm": 0.4484626897639386, "learning_rate": 2.0556359900104054e-05, "loss": 0.0754, "step": 26580 }, { "epoch": 12.399720149253731, "grad_norm": 0.4481393341870258, "learning_rate": 2.0548107808158102e-05, "loss": 0.0754, "step": 26585 }, { "epoch": 12.40205223880597, "grad_norm": 0.45241279077049057, "learning_rate": 2.0539856749891918e-05, "loss": 0.0728, "step": 26590 }, { "epoch": 12.404384328358208, "grad_norm": 0.4570056894548275, "learning_rate": 2.0531606726532344e-05, "loss": 0.0757, "step": 26595 }, { "epoch": 12.406716417910447, "grad_norm": 0.4532450387636183, "learning_rate": 2.0523357739306087e-05, "loss": 0.0776, "step": 26600 }, { "epoch": 12.409048507462687, "grad_norm": 0.45522242288082837, "learning_rate": 2.0515109789439695e-05, "loss": 0.0761, "step": 26605 }, { "epoch": 12.411380597014926, "grad_norm": 0.4572640305620828, "learning_rate": 2.050686287815954e-05, "loss": 0.0769, "step": 26610 }, { "epoch": 12.413712686567164, "grad_norm": 0.44472620758525705, "learning_rate": 2.049861700669189e-05, "loss": 0.0751, "step": 26615 }, { "epoch": 12.416044776119403, "grad_norm": 0.4558730996552911, "learning_rate": 2.049037217626279e-05, "loss": 0.077, "step": 26620 }, { "epoch": 12.418376865671641, "grad_norm": 0.46419163392856677, "learning_rate": 2.0482128388098202e-05, "loss": 0.0742, "step": 26625 }, { "epoch": 12.42070895522388, "grad_norm": 0.4798883147209171, "learning_rate": 2.0473885643423885e-05, "loss": 0.0776, "step": 26630 }, { "epoch": 12.42304104477612, "grad_norm": 0.42776193718910377, "learning_rate": 2.046564394346544e-05, "loss": 0.0753, "step": 26635 }, { "epoch": 12.425373134328359, "grad_norm": 0.46551442552454303, "learning_rate": 2.0457403289448353e-05, "loss": 0.0732, "step": 26640 }, { "epoch": 12.427705223880597, "grad_norm": 0.46874660078594305, "learning_rate": 2.0449163682597915e-05, "loss": 0.0757, "step": 26645 }, { "epoch": 12.430037313432836, "grad_norm": 0.4573899412306608, "learning_rate": 2.0440925124139286e-05, "loss": 0.0781, "step": 26650 }, { "epoch": 12.432369402985074, "grad_norm": 0.4912298751608248, "learning_rate": 2.0432687615297458e-05, "loss": 0.0759, "step": 26655 }, { "epoch": 12.434701492537313, "grad_norm": 0.43688921796825586, "learning_rate": 2.0424451157297264e-05, "loss": 0.0749, "step": 26660 }, { "epoch": 12.437033582089553, "grad_norm": 0.45788178691706644, "learning_rate": 2.0416215751363392e-05, "loss": 0.0779, "step": 26665 }, { "epoch": 12.439365671641792, "grad_norm": 0.47707679984514495, "learning_rate": 2.040798139872037e-05, "loss": 0.0753, "step": 26670 }, { "epoch": 12.44169776119403, "grad_norm": 0.46697760802575666, "learning_rate": 2.0399748100592564e-05, "loss": 0.0787, "step": 26675 }, { "epoch": 12.444029850746269, "grad_norm": 0.4666642140908343, "learning_rate": 2.0391515858204184e-05, "loss": 0.078, "step": 26680 }, { "epoch": 12.446361940298507, "grad_norm": 0.45628353316563947, "learning_rate": 2.038328467277929e-05, "loss": 0.0758, "step": 26685 }, { "epoch": 12.448694029850746, "grad_norm": 0.45224165225648033, "learning_rate": 2.0375054545541776e-05, "loss": 0.0763, "step": 26690 }, { "epoch": 12.451026119402986, "grad_norm": 0.4524638616931825, "learning_rate": 2.0366825477715386e-05, "loss": 0.0771, "step": 26695 }, { "epoch": 12.453358208955224, "grad_norm": 0.45353387089855757, "learning_rate": 2.0358597470523706e-05, "loss": 0.0761, "step": 26700 }, { "epoch": 12.455690298507463, "grad_norm": 0.4614880970420699, "learning_rate": 2.035037052519016e-05, "loss": 0.0754, "step": 26705 }, { "epoch": 12.458022388059701, "grad_norm": 0.4699954224609055, "learning_rate": 2.034214464293801e-05, "loss": 0.0785, "step": 26710 }, { "epoch": 12.46035447761194, "grad_norm": 0.4425283133739075, "learning_rate": 2.0333919824990372e-05, "loss": 0.0759, "step": 26715 }, { "epoch": 12.462686567164178, "grad_norm": 0.448977282822987, "learning_rate": 2.0325696072570195e-05, "loss": 0.0753, "step": 26720 }, { "epoch": 12.465018656716419, "grad_norm": 0.46070442103081577, "learning_rate": 2.0317473386900275e-05, "loss": 0.0761, "step": 26725 }, { "epoch": 12.467350746268657, "grad_norm": 0.45057150510149735, "learning_rate": 2.0309251769203252e-05, "loss": 0.0768, "step": 26730 }, { "epoch": 12.469682835820896, "grad_norm": 0.4628684543602443, "learning_rate": 2.0301031220701582e-05, "loss": 0.0776, "step": 26735 }, { "epoch": 12.472014925373134, "grad_norm": 0.45372316838931337, "learning_rate": 2.0292811742617607e-05, "loss": 0.0747, "step": 26740 }, { "epoch": 12.474347014925373, "grad_norm": 0.4960823262178236, "learning_rate": 2.028459333617346e-05, "loss": 0.0776, "step": 26745 }, { "epoch": 12.476679104477611, "grad_norm": 0.4592168592063033, "learning_rate": 2.0276376002591164e-05, "loss": 0.0759, "step": 26750 }, { "epoch": 12.479011194029852, "grad_norm": 0.44771470919631984, "learning_rate": 2.0268159743092546e-05, "loss": 0.0781, "step": 26755 }, { "epoch": 12.48134328358209, "grad_norm": 0.4660300148049293, "learning_rate": 2.0259944558899274e-05, "loss": 0.0736, "step": 26760 }, { "epoch": 12.483675373134329, "grad_norm": 0.46384311486697316, "learning_rate": 2.0251730451232886e-05, "loss": 0.0779, "step": 26765 }, { "epoch": 12.486007462686567, "grad_norm": 0.48894643529261617, "learning_rate": 2.0243517421314727e-05, "loss": 0.0785, "step": 26770 }, { "epoch": 12.488339552238806, "grad_norm": 0.4543456635546845, "learning_rate": 2.0235305470366008e-05, "loss": 0.0748, "step": 26775 }, { "epoch": 12.490671641791044, "grad_norm": 0.46247439480082897, "learning_rate": 2.022709459960776e-05, "loss": 0.0781, "step": 26780 }, { "epoch": 12.493003731343283, "grad_norm": 0.47331358907096993, "learning_rate": 2.021888481026086e-05, "loss": 0.0761, "step": 26785 }, { "epoch": 12.495335820895523, "grad_norm": 0.4616089151284358, "learning_rate": 2.0210676103546028e-05, "loss": 0.0739, "step": 26790 }, { "epoch": 12.497667910447761, "grad_norm": 0.4479909674702392, "learning_rate": 2.0202468480683812e-05, "loss": 0.0761, "step": 26795 }, { "epoch": 12.5, "grad_norm": 0.45519461310964754, "learning_rate": 2.0194261942894628e-05, "loss": 0.0784, "step": 26800 }, { "epoch": 12.502332089552239, "grad_norm": 0.4733955635041723, "learning_rate": 2.0186056491398686e-05, "loss": 0.0771, "step": 26805 }, { "epoch": 12.504664179104477, "grad_norm": 0.47177882909913904, "learning_rate": 2.0177852127416063e-05, "loss": 0.0784, "step": 26810 }, { "epoch": 12.506996268656717, "grad_norm": 0.45594197893444705, "learning_rate": 2.0169648852166684e-05, "loss": 0.077, "step": 26815 }, { "epoch": 12.509328358208956, "grad_norm": 0.4585790658282467, "learning_rate": 2.016144666687029e-05, "loss": 0.0797, "step": 26820 }, { "epoch": 12.511660447761194, "grad_norm": 0.45738831054221835, "learning_rate": 2.015324557274645e-05, "loss": 0.0763, "step": 26825 }, { "epoch": 12.513992537313433, "grad_norm": 0.47819230706783267, "learning_rate": 2.0145045571014614e-05, "loss": 0.0785, "step": 26830 }, { "epoch": 12.516324626865671, "grad_norm": 0.4495594822353405, "learning_rate": 2.0136846662894028e-05, "loss": 0.077, "step": 26835 }, { "epoch": 12.51865671641791, "grad_norm": 0.45229444475837965, "learning_rate": 2.0128648849603798e-05, "loss": 0.0772, "step": 26840 }, { "epoch": 12.520988805970148, "grad_norm": 0.46288056223268526, "learning_rate": 2.0120452132362854e-05, "loss": 0.0784, "step": 26845 }, { "epoch": 12.523320895522389, "grad_norm": 0.4605338130534437, "learning_rate": 2.0112256512389976e-05, "loss": 0.078, "step": 26850 }, { "epoch": 12.525652985074627, "grad_norm": 0.4690143751305437, "learning_rate": 2.0104061990903773e-05, "loss": 0.0774, "step": 26855 }, { "epoch": 12.527985074626866, "grad_norm": 0.4371870733971387, "learning_rate": 2.009586856912269e-05, "loss": 0.0758, "step": 26860 }, { "epoch": 12.530317164179104, "grad_norm": 0.4927443751949473, "learning_rate": 2.008767624826501e-05, "loss": 0.0774, "step": 26865 }, { "epoch": 12.532649253731343, "grad_norm": 0.46082339002725403, "learning_rate": 2.0079485029548838e-05, "loss": 0.0787, "step": 26870 }, { "epoch": 12.534981343283581, "grad_norm": 0.4753557816904934, "learning_rate": 2.007129491419217e-05, "loss": 0.0786, "step": 26875 }, { "epoch": 12.537313432835822, "grad_norm": 0.4657689445699968, "learning_rate": 2.006310590341276e-05, "loss": 0.0807, "step": 26880 }, { "epoch": 12.53964552238806, "grad_norm": 0.49299248897830805, "learning_rate": 2.005491799842824e-05, "loss": 0.0791, "step": 26885 }, { "epoch": 12.541977611940299, "grad_norm": 0.4638955918001919, "learning_rate": 2.0046731200456097e-05, "loss": 0.077, "step": 26890 }, { "epoch": 12.544309701492537, "grad_norm": 0.4584996645805775, "learning_rate": 2.0038545510713597e-05, "loss": 0.0769, "step": 26895 }, { "epoch": 12.546641791044776, "grad_norm": 0.45771681196331987, "learning_rate": 2.00303609304179e-05, "loss": 0.0767, "step": 26900 }, { "epoch": 12.548973880597014, "grad_norm": 0.46076716225428865, "learning_rate": 2.0022177460785974e-05, "loss": 0.0765, "step": 26905 }, { "epoch": 12.551305970149254, "grad_norm": 0.490390050699006, "learning_rate": 2.0013995103034594e-05, "loss": 0.0802, "step": 26910 }, { "epoch": 12.553638059701493, "grad_norm": 0.4621842712559337, "learning_rate": 2.000581385838043e-05, "loss": 0.0771, "step": 26915 }, { "epoch": 12.555970149253731, "grad_norm": 0.46876332128141585, "learning_rate": 1.9997633728039933e-05, "loss": 0.0771, "step": 26920 }, { "epoch": 12.55830223880597, "grad_norm": 0.4606784538214288, "learning_rate": 1.9989454713229426e-05, "loss": 0.0761, "step": 26925 }, { "epoch": 12.560634328358208, "grad_norm": 0.45254769762971137, "learning_rate": 1.9981276815165046e-05, "loss": 0.076, "step": 26930 }, { "epoch": 12.562966417910447, "grad_norm": 0.47910816183728433, "learning_rate": 1.9973100035062753e-05, "loss": 0.081, "step": 26935 }, { "epoch": 12.565298507462687, "grad_norm": 0.43641662112888885, "learning_rate": 1.996492437413838e-05, "loss": 0.075, "step": 26940 }, { "epoch": 12.567630597014926, "grad_norm": 0.47187875205548124, "learning_rate": 1.9956749833607545e-05, "loss": 0.0785, "step": 26945 }, { "epoch": 12.569962686567164, "grad_norm": 0.4669448901031379, "learning_rate": 1.994857641468575e-05, "loss": 0.0793, "step": 26950 }, { "epoch": 12.572294776119403, "grad_norm": 0.4800760605938874, "learning_rate": 1.994040411858829e-05, "loss": 0.0783, "step": 26955 }, { "epoch": 12.574626865671641, "grad_norm": 0.461454715625054, "learning_rate": 1.99322329465303e-05, "loss": 0.0771, "step": 26960 }, { "epoch": 12.57695895522388, "grad_norm": 0.47754655944398766, "learning_rate": 1.992406289972677e-05, "loss": 0.0773, "step": 26965 }, { "epoch": 12.57929104477612, "grad_norm": 0.45953382042564234, "learning_rate": 1.9915893979392492e-05, "loss": 0.0786, "step": 26970 }, { "epoch": 12.581623134328359, "grad_norm": 0.4865729215370681, "learning_rate": 1.9907726186742122e-05, "loss": 0.0808, "step": 26975 }, { "epoch": 12.583955223880597, "grad_norm": 0.4985426423582949, "learning_rate": 1.989955952299012e-05, "loss": 0.0795, "step": 26980 }, { "epoch": 12.586287313432836, "grad_norm": 0.4949676756898525, "learning_rate": 1.9891393989350794e-05, "loss": 0.0809, "step": 26985 }, { "epoch": 12.588619402985074, "grad_norm": 0.45559049964206466, "learning_rate": 1.9883229587038287e-05, "loss": 0.079, "step": 26990 }, { "epoch": 12.590951492537313, "grad_norm": 0.4692877366277243, "learning_rate": 1.987506631726656e-05, "loss": 0.0788, "step": 26995 }, { "epoch": 12.593283582089553, "grad_norm": 0.452342010461331, "learning_rate": 1.986690418124942e-05, "loss": 0.0782, "step": 27000 }, { "epoch": 12.595615671641792, "grad_norm": 0.4632189004308772, "learning_rate": 1.985874318020049e-05, "loss": 0.0781, "step": 27005 }, { "epoch": 12.59794776119403, "grad_norm": 0.4734198321683892, "learning_rate": 1.9850583315333242e-05, "loss": 0.0793, "step": 27010 }, { "epoch": 12.600279850746269, "grad_norm": 0.47731816593052767, "learning_rate": 1.9842424587860958e-05, "loss": 0.0781, "step": 27015 }, { "epoch": 12.602611940298507, "grad_norm": 0.4574966605071352, "learning_rate": 1.983426699899677e-05, "loss": 0.0744, "step": 27020 }, { "epoch": 12.604944029850746, "grad_norm": 0.46353656385664194, "learning_rate": 1.982611054995364e-05, "loss": 0.0749, "step": 27025 }, { "epoch": 12.607276119402986, "grad_norm": 0.47156376749736817, "learning_rate": 1.9817955241944335e-05, "loss": 0.0806, "step": 27030 }, { "epoch": 12.609608208955224, "grad_norm": 0.48975134132460024, "learning_rate": 1.980980107618149e-05, "loss": 0.0783, "step": 27035 }, { "epoch": 12.611940298507463, "grad_norm": 0.452919379562146, "learning_rate": 1.9801648053877548e-05, "loss": 0.0757, "step": 27040 }, { "epoch": 12.614272388059701, "grad_norm": 0.4486843950293751, "learning_rate": 1.979349617624477e-05, "loss": 0.0772, "step": 27045 }, { "epoch": 12.61660447761194, "grad_norm": 0.4823106252593694, "learning_rate": 1.978534544449528e-05, "loss": 0.0791, "step": 27050 }, { "epoch": 12.618936567164178, "grad_norm": 0.45722036118303755, "learning_rate": 1.9777195859840997e-05, "loss": 0.0767, "step": 27055 }, { "epoch": 12.621268656716419, "grad_norm": 0.46782900620173645, "learning_rate": 1.9769047423493707e-05, "loss": 0.0779, "step": 27060 }, { "epoch": 12.623600746268657, "grad_norm": 0.4828892080227312, "learning_rate": 1.9760900136664994e-05, "loss": 0.074, "step": 27065 }, { "epoch": 12.625932835820896, "grad_norm": 0.47069744643305744, "learning_rate": 1.975275400056627e-05, "loss": 0.0771, "step": 27070 }, { "epoch": 12.628264925373134, "grad_norm": 0.4456537271326283, "learning_rate": 1.974460901640881e-05, "loss": 0.077, "step": 27075 }, { "epoch": 12.630597014925373, "grad_norm": 0.4616556478599221, "learning_rate": 1.9736465185403675e-05, "loss": 0.0827, "step": 27080 }, { "epoch": 12.632929104477611, "grad_norm": 0.4626613790961957, "learning_rate": 1.9728322508761794e-05, "loss": 0.08, "step": 27085 }, { "epoch": 12.635261194029852, "grad_norm": 0.4858166888311669, "learning_rate": 1.9720180987693888e-05, "loss": 0.0793, "step": 27090 }, { "epoch": 12.63759328358209, "grad_norm": 0.46733453603689384, "learning_rate": 1.9712040623410523e-05, "loss": 0.0799, "step": 27095 }, { "epoch": 12.639925373134329, "grad_norm": 0.4727439683921748, "learning_rate": 1.9703901417122106e-05, "loss": 0.0796, "step": 27100 }, { "epoch": 12.642257462686567, "grad_norm": 0.4657350028469426, "learning_rate": 1.9695763370038846e-05, "loss": 0.0792, "step": 27105 }, { "epoch": 12.644589552238806, "grad_norm": 0.4654840541443183, "learning_rate": 1.968762648337081e-05, "loss": 0.0805, "step": 27110 }, { "epoch": 12.646921641791044, "grad_norm": 0.4850194295384901, "learning_rate": 1.9679490758327862e-05, "loss": 0.0803, "step": 27115 }, { "epoch": 12.649253731343283, "grad_norm": 0.4502354236290112, "learning_rate": 1.96713561961197e-05, "loss": 0.0767, "step": 27120 }, { "epoch": 12.651585820895523, "grad_norm": 0.4611657762949326, "learning_rate": 1.966322279795587e-05, "loss": 0.0808, "step": 27125 }, { "epoch": 12.653917910447761, "grad_norm": 0.4796524453776827, "learning_rate": 1.9655090565045718e-05, "loss": 0.078, "step": 27130 }, { "epoch": 12.65625, "grad_norm": 0.4534453693926543, "learning_rate": 1.9646959498598444e-05, "loss": 0.08, "step": 27135 }, { "epoch": 12.658582089552239, "grad_norm": 0.4832577438729363, "learning_rate": 1.9638829599823056e-05, "loss": 0.0796, "step": 27140 }, { "epoch": 12.660914179104477, "grad_norm": 0.4746774893816295, "learning_rate": 1.963070086992837e-05, "loss": 0.0784, "step": 27145 }, { "epoch": 12.663246268656717, "grad_norm": 0.4776394684510085, "learning_rate": 1.9622573310123082e-05, "loss": 0.0813, "step": 27150 }, { "epoch": 12.665578358208956, "grad_norm": 0.4602465028218704, "learning_rate": 1.9614446921615654e-05, "loss": 0.0783, "step": 27155 }, { "epoch": 12.667910447761194, "grad_norm": 0.4751692854041869, "learning_rate": 1.9606321705614427e-05, "loss": 0.0793, "step": 27160 }, { "epoch": 12.670242537313433, "grad_norm": 0.46375235667513115, "learning_rate": 1.9598197663327534e-05, "loss": 0.0778, "step": 27165 }, { "epoch": 12.672574626865671, "grad_norm": 0.4817026377648107, "learning_rate": 1.9590074795962925e-05, "loss": 0.076, "step": 27170 }, { "epoch": 12.67490671641791, "grad_norm": 0.4643050036368286, "learning_rate": 1.9581953104728422e-05, "loss": 0.0805, "step": 27175 }, { "epoch": 12.677238805970148, "grad_norm": 0.46931653333065676, "learning_rate": 1.957383259083162e-05, "loss": 0.0809, "step": 27180 }, { "epoch": 12.679570895522389, "grad_norm": 0.4760160254990632, "learning_rate": 1.9565713255479974e-05, "loss": 0.076, "step": 27185 }, { "epoch": 12.681902985074627, "grad_norm": 0.47555860606674977, "learning_rate": 1.955759509988075e-05, "loss": 0.0782, "step": 27190 }, { "epoch": 12.684235074626866, "grad_norm": 0.4697634368929082, "learning_rate": 1.9549478125241034e-05, "loss": 0.0796, "step": 27195 }, { "epoch": 12.686567164179104, "grad_norm": 0.4594621567923427, "learning_rate": 1.9541362332767737e-05, "loss": 0.0792, "step": 27200 }, { "epoch": 12.688899253731343, "grad_norm": 0.48002686637676545, "learning_rate": 1.9533247723667613e-05, "loss": 0.0793, "step": 27205 }, { "epoch": 12.691231343283581, "grad_norm": 0.4437090063332973, "learning_rate": 1.952513429914723e-05, "loss": 0.0787, "step": 27210 }, { "epoch": 12.693563432835822, "grad_norm": 0.4797611910875342, "learning_rate": 1.9517022060412958e-05, "loss": 0.0776, "step": 27215 }, { "epoch": 12.69589552238806, "grad_norm": 0.44645515868084806, "learning_rate": 1.950891100867102e-05, "loss": 0.0811, "step": 27220 }, { "epoch": 12.698227611940299, "grad_norm": 0.46309597400050323, "learning_rate": 1.950080114512746e-05, "loss": 0.0804, "step": 27225 }, { "epoch": 12.700559701492537, "grad_norm": 0.4691436504228513, "learning_rate": 1.9492692470988115e-05, "loss": 0.0793, "step": 27230 }, { "epoch": 12.702891791044776, "grad_norm": 0.4774532027507049, "learning_rate": 1.9484584987458693e-05, "loss": 0.0817, "step": 27235 }, { "epoch": 12.705223880597014, "grad_norm": 0.4776266322105131, "learning_rate": 1.9476478695744683e-05, "loss": 0.0769, "step": 27240 }, { "epoch": 12.707555970149254, "grad_norm": 0.4565220089840565, "learning_rate": 1.9468373597051404e-05, "loss": 0.0821, "step": 27245 }, { "epoch": 12.709888059701493, "grad_norm": 0.4824534314746482, "learning_rate": 1.9460269692584034e-05, "loss": 0.0778, "step": 27250 }, { "epoch": 12.712220149253731, "grad_norm": 0.4778580271894171, "learning_rate": 1.9452166983547516e-05, "loss": 0.0821, "step": 27255 }, { "epoch": 12.71455223880597, "grad_norm": 0.47070721585877406, "learning_rate": 1.944406547114667e-05, "loss": 0.0776, "step": 27260 }, { "epoch": 12.716884328358208, "grad_norm": 0.4633195440851239, "learning_rate": 1.9435965156586105e-05, "loss": 0.0774, "step": 27265 }, { "epoch": 12.719216417910447, "grad_norm": 0.45182496139404743, "learning_rate": 1.9427866041070254e-05, "loss": 0.0813, "step": 27270 }, { "epoch": 12.721548507462687, "grad_norm": 0.48969382135084144, "learning_rate": 1.9419768125803382e-05, "loss": 0.0791, "step": 27275 }, { "epoch": 12.723880597014926, "grad_norm": 0.4696399841756536, "learning_rate": 1.9411671411989568e-05, "loss": 0.0787, "step": 27280 }, { "epoch": 12.726212686567164, "grad_norm": 0.4596426907099033, "learning_rate": 1.9403575900832726e-05, "loss": 0.0798, "step": 27285 }, { "epoch": 12.728544776119403, "grad_norm": 0.48810151747772795, "learning_rate": 1.9395481593536575e-05, "loss": 0.082, "step": 27290 }, { "epoch": 12.730876865671641, "grad_norm": 0.45988394284046125, "learning_rate": 1.9387388491304646e-05, "loss": 0.0779, "step": 27295 }, { "epoch": 12.73320895522388, "grad_norm": 0.47211520696277676, "learning_rate": 1.937929659534034e-05, "loss": 0.0786, "step": 27300 }, { "epoch": 12.73554104477612, "grad_norm": 0.4475543284377289, "learning_rate": 1.9371205906846808e-05, "loss": 0.0802, "step": 27305 }, { "epoch": 12.737873134328359, "grad_norm": 0.4683173298895976, "learning_rate": 1.9363116427027084e-05, "loss": 0.0791, "step": 27310 }, { "epoch": 12.740205223880597, "grad_norm": 0.44655623495763713, "learning_rate": 1.9355028157083988e-05, "loss": 0.0788, "step": 27315 }, { "epoch": 12.742537313432836, "grad_norm": 0.45888465998786315, "learning_rate": 1.9346941098220157e-05, "loss": 0.0786, "step": 27320 }, { "epoch": 12.744869402985074, "grad_norm": 0.46268351793158125, "learning_rate": 1.933885525163807e-05, "loss": 0.0796, "step": 27325 }, { "epoch": 12.747201492537313, "grad_norm": 0.48271646289177583, "learning_rate": 1.933077061854002e-05, "loss": 0.0797, "step": 27330 }, { "epoch": 12.749533582089553, "grad_norm": 0.4607906597785622, "learning_rate": 1.9322687200128103e-05, "loss": 0.0812, "step": 27335 }, { "epoch": 12.751865671641792, "grad_norm": 0.49009065628211274, "learning_rate": 1.931460499760426e-05, "loss": 0.0832, "step": 27340 }, { "epoch": 12.75419776119403, "grad_norm": 0.4549368895586004, "learning_rate": 1.930652401217021e-05, "loss": 0.079, "step": 27345 }, { "epoch": 12.756529850746269, "grad_norm": 0.44829566996341175, "learning_rate": 1.929844424502755e-05, "loss": 0.0791, "step": 27350 }, { "epoch": 12.758861940298507, "grad_norm": 0.46446948425451334, "learning_rate": 1.929036569737765e-05, "loss": 0.0803, "step": 27355 }, { "epoch": 12.761194029850746, "grad_norm": 0.45664398646136456, "learning_rate": 1.9282288370421708e-05, "loss": 0.0797, "step": 27360 }, { "epoch": 12.763526119402986, "grad_norm": 0.48650136229056207, "learning_rate": 1.9274212265360757e-05, "loss": 0.0786, "step": 27365 }, { "epoch": 12.765858208955224, "grad_norm": 0.4728628600296022, "learning_rate": 1.9266137383395626e-05, "loss": 0.0783, "step": 27370 }, { "epoch": 12.768190298507463, "grad_norm": 0.47659521408642996, "learning_rate": 1.925806372572697e-05, "loss": 0.0805, "step": 27375 }, { "epoch": 12.770522388059701, "grad_norm": 0.47324950740489996, "learning_rate": 1.9249991293555276e-05, "loss": 0.082, "step": 27380 }, { "epoch": 12.77285447761194, "grad_norm": 0.46849510974947534, "learning_rate": 1.9241920088080833e-05, "loss": 0.0775, "step": 27385 }, { "epoch": 12.775186567164178, "grad_norm": 0.46876800194662915, "learning_rate": 1.9233850110503748e-05, "loss": 0.0767, "step": 27390 }, { "epoch": 12.777518656716419, "grad_norm": 0.4789441536472238, "learning_rate": 1.9225781362023955e-05, "loss": 0.0796, "step": 27395 }, { "epoch": 12.779850746268657, "grad_norm": 0.4987821467060857, "learning_rate": 1.9217713843841195e-05, "loss": 0.08, "step": 27400 }, { "epoch": 12.782182835820896, "grad_norm": 0.4612511154329872, "learning_rate": 1.9209647557155025e-05, "loss": 0.0822, "step": 27405 }, { "epoch": 12.784514925373134, "grad_norm": 0.47143907191804735, "learning_rate": 1.9201582503164845e-05, "loss": 0.0772, "step": 27410 }, { "epoch": 12.786847014925373, "grad_norm": 0.47674603347426314, "learning_rate": 1.9193518683069833e-05, "loss": 0.0808, "step": 27415 }, { "epoch": 12.789179104477611, "grad_norm": 0.450703232741145, "learning_rate": 1.9185456098068998e-05, "loss": 0.0772, "step": 27420 }, { "epoch": 12.791511194029852, "grad_norm": 0.45731588433453124, "learning_rate": 1.9177394749361193e-05, "loss": 0.0809, "step": 27425 }, { "epoch": 12.79384328358209, "grad_norm": 0.4450334205211549, "learning_rate": 1.9169334638145037e-05, "loss": 0.0768, "step": 27430 }, { "epoch": 12.796175373134329, "grad_norm": 0.47646474013184714, "learning_rate": 1.9161275765619007e-05, "loss": 0.0814, "step": 27435 }, { "epoch": 12.798507462686567, "grad_norm": 0.4721766697701395, "learning_rate": 1.9153218132981375e-05, "loss": 0.0801, "step": 27440 }, { "epoch": 12.800839552238806, "grad_norm": 0.4807497703048021, "learning_rate": 1.9145161741430234e-05, "loss": 0.0799, "step": 27445 }, { "epoch": 12.803171641791044, "grad_norm": 0.4821832044382131, "learning_rate": 1.9137106592163495e-05, "loss": 0.0798, "step": 27450 }, { "epoch": 12.805503731343283, "grad_norm": 0.46736761238613617, "learning_rate": 1.9129052686378873e-05, "loss": 0.0795, "step": 27455 }, { "epoch": 12.807835820895523, "grad_norm": 0.4859419365554095, "learning_rate": 1.912100002527392e-05, "loss": 0.0832, "step": 27460 }, { "epoch": 12.810167910447761, "grad_norm": 0.4712357496667044, "learning_rate": 1.9112948610045982e-05, "loss": 0.0787, "step": 27465 }, { "epoch": 12.8125, "grad_norm": 0.47531067573787134, "learning_rate": 1.9104898441892222e-05, "loss": 0.0802, "step": 27470 }, { "epoch": 12.814832089552239, "grad_norm": 0.5435768079514604, "learning_rate": 1.909684952200964e-05, "loss": 0.0794, "step": 27475 }, { "epoch": 12.817164179104477, "grad_norm": 0.46991402300361607, "learning_rate": 1.9088801851595008e-05, "loss": 0.0796, "step": 27480 }, { "epoch": 12.819496268656717, "grad_norm": 0.4837285125484913, "learning_rate": 1.908075543184496e-05, "loss": 0.0789, "step": 27485 }, { "epoch": 12.821828358208956, "grad_norm": 0.4613772190098701, "learning_rate": 1.907271026395592e-05, "loss": 0.0812, "step": 27490 }, { "epoch": 12.824160447761194, "grad_norm": 0.49167076101281076, "learning_rate": 1.90646663491241e-05, "loss": 0.0805, "step": 27495 }, { "epoch": 12.826492537313433, "grad_norm": 0.4615102074137066, "learning_rate": 1.9056623688545588e-05, "loss": 0.0811, "step": 27500 }, { "epoch": 12.828824626865671, "grad_norm": 0.4639669322284747, "learning_rate": 1.904858228341623e-05, "loss": 0.0813, "step": 27505 }, { "epoch": 12.83115671641791, "grad_norm": 0.5031407298386842, "learning_rate": 1.9040542134931715e-05, "loss": 0.0814, "step": 27510 }, { "epoch": 12.833488805970148, "grad_norm": 0.4837643493470474, "learning_rate": 1.9032503244287537e-05, "loss": 0.0817, "step": 27515 }, { "epoch": 12.835820895522389, "grad_norm": 0.46791356953183466, "learning_rate": 1.9024465612678993e-05, "loss": 0.0774, "step": 27520 }, { "epoch": 12.838152985074627, "grad_norm": 0.4426828779806819, "learning_rate": 1.901642924130121e-05, "loss": 0.0793, "step": 27525 }, { "epoch": 12.840485074626866, "grad_norm": 0.4625167036643995, "learning_rate": 1.900839413134911e-05, "loss": 0.0785, "step": 27530 }, { "epoch": 12.842817164179104, "grad_norm": 0.4758768724989832, "learning_rate": 1.9000360284017448e-05, "loss": 0.0798, "step": 27535 }, { "epoch": 12.845149253731343, "grad_norm": 0.475075800307457, "learning_rate": 1.8992327700500772e-05, "loss": 0.0818, "step": 27540 }, { "epoch": 12.847481343283581, "grad_norm": 0.47767320315532685, "learning_rate": 1.8984296381993454e-05, "loss": 0.0814, "step": 27545 }, { "epoch": 12.849813432835822, "grad_norm": 0.4827784646424914, "learning_rate": 1.897626632968968e-05, "loss": 0.0818, "step": 27550 }, { "epoch": 12.85214552238806, "grad_norm": 0.48119043513708615, "learning_rate": 1.8968237544783423e-05, "loss": 0.0788, "step": 27555 }, { "epoch": 12.854477611940299, "grad_norm": 0.48721203131782503, "learning_rate": 1.8960210028468512e-05, "loss": 0.0798, "step": 27560 }, { "epoch": 12.856809701492537, "grad_norm": 0.4615943895315708, "learning_rate": 1.8952183781938538e-05, "loss": 0.0795, "step": 27565 }, { "epoch": 12.859141791044776, "grad_norm": 0.46185669470285745, "learning_rate": 1.8944158806386942e-05, "loss": 0.0811, "step": 27570 }, { "epoch": 12.861473880597014, "grad_norm": 0.4809283267521767, "learning_rate": 1.8936135103006957e-05, "loss": 0.0814, "step": 27575 }, { "epoch": 12.863805970149254, "grad_norm": 0.4781654071913427, "learning_rate": 1.8928112672991626e-05, "loss": 0.0825, "step": 27580 }, { "epoch": 12.866138059701493, "grad_norm": 0.47040388116197296, "learning_rate": 1.8920091517533818e-05, "loss": 0.0831, "step": 27585 }, { "epoch": 12.868470149253731, "grad_norm": 0.48550710913493533, "learning_rate": 1.8912071637826196e-05, "loss": 0.0802, "step": 27590 }, { "epoch": 12.87080223880597, "grad_norm": 0.485653820671467, "learning_rate": 1.890405303506123e-05, "loss": 0.08, "step": 27595 }, { "epoch": 12.873134328358208, "grad_norm": 0.4591761429029312, "learning_rate": 1.8896035710431225e-05, "loss": 0.0818, "step": 27600 }, { "epoch": 12.875466417910447, "grad_norm": 0.464102123052674, "learning_rate": 1.888801966512827e-05, "loss": 0.0797, "step": 27605 }, { "epoch": 12.877798507462687, "grad_norm": 0.4636963315250954, "learning_rate": 1.8880004900344283e-05, "loss": 0.0796, "step": 27610 }, { "epoch": 12.880130597014926, "grad_norm": 0.4817254321270912, "learning_rate": 1.8871991417270978e-05, "loss": 0.0805, "step": 27615 }, { "epoch": 12.882462686567164, "grad_norm": 0.4612327029060001, "learning_rate": 1.8863979217099874e-05, "loss": 0.0815, "step": 27620 }, { "epoch": 12.884794776119403, "grad_norm": 0.4579676365473029, "learning_rate": 1.8855968301022326e-05, "loss": 0.0808, "step": 27625 }, { "epoch": 12.887126865671641, "grad_norm": 0.4712356882743447, "learning_rate": 1.8847958670229465e-05, "loss": 0.0791, "step": 27630 }, { "epoch": 12.88945895522388, "grad_norm": 0.4752910370521795, "learning_rate": 1.883995032591226e-05, "loss": 0.0835, "step": 27635 }, { "epoch": 12.89179104477612, "grad_norm": 0.4565023232945777, "learning_rate": 1.8831943269261467e-05, "loss": 0.0794, "step": 27640 }, { "epoch": 12.894123134328359, "grad_norm": 0.4692961540029773, "learning_rate": 1.8823937501467648e-05, "loss": 0.0803, "step": 27645 }, { "epoch": 12.896455223880597, "grad_norm": 0.4611372068832633, "learning_rate": 1.8815933023721206e-05, "loss": 0.0807, "step": 27650 }, { "epoch": 12.898787313432836, "grad_norm": 0.459551696875981, "learning_rate": 1.880792983721231e-05, "loss": 0.0802, "step": 27655 }, { "epoch": 12.901119402985074, "grad_norm": 0.48685268825712263, "learning_rate": 1.8799927943130986e-05, "loss": 0.0843, "step": 27660 }, { "epoch": 12.903451492537313, "grad_norm": 0.4683882053218717, "learning_rate": 1.879192734266701e-05, "loss": 0.082, "step": 27665 }, { "epoch": 12.905783582089553, "grad_norm": 0.4637180858486111, "learning_rate": 1.878392803701e-05, "loss": 0.0807, "step": 27670 }, { "epoch": 12.908115671641792, "grad_norm": 0.4827506073996982, "learning_rate": 1.8775930027349386e-05, "loss": 0.0805, "step": 27675 }, { "epoch": 12.91044776119403, "grad_norm": 0.45227396316732354, "learning_rate": 1.8767933314874382e-05, "loss": 0.0798, "step": 27680 }, { "epoch": 12.912779850746269, "grad_norm": 0.49462799530532037, "learning_rate": 1.8759937900774038e-05, "loss": 0.0816, "step": 27685 }, { "epoch": 12.915111940298507, "grad_norm": 0.46903794911303176, "learning_rate": 1.875194378623718e-05, "loss": 0.0795, "step": 27690 }, { "epoch": 12.917444029850746, "grad_norm": 0.4950385816647987, "learning_rate": 1.8743950972452477e-05, "loss": 0.0807, "step": 27695 }, { "epoch": 12.919776119402986, "grad_norm": 0.4924682268269597, "learning_rate": 1.8735959460608364e-05, "loss": 0.0795, "step": 27700 }, { "epoch": 12.922108208955224, "grad_norm": 0.463587086784409, "learning_rate": 1.8727969251893107e-05, "loss": 0.0813, "step": 27705 }, { "epoch": 12.924440298507463, "grad_norm": 0.4539254532279128, "learning_rate": 1.871998034749478e-05, "loss": 0.0788, "step": 27710 }, { "epoch": 12.926772388059701, "grad_norm": 0.47173947207829664, "learning_rate": 1.8711992748601252e-05, "loss": 0.0806, "step": 27715 }, { "epoch": 12.92910447761194, "grad_norm": 0.48388330262804014, "learning_rate": 1.8704006456400202e-05, "loss": 0.0807, "step": 27720 }, { "epoch": 12.931436567164178, "grad_norm": 0.4683100606331587, "learning_rate": 1.8696021472079118e-05, "loss": 0.0781, "step": 27725 }, { "epoch": 12.933768656716419, "grad_norm": 0.4702877009118245, "learning_rate": 1.8688037796825285e-05, "loss": 0.0813, "step": 27730 }, { "epoch": 12.936100746268657, "grad_norm": 0.45958771222399575, "learning_rate": 1.8680055431825804e-05, "loss": 0.0828, "step": 27735 }, { "epoch": 12.938432835820896, "grad_norm": 0.463606808029065, "learning_rate": 1.8672074378267573e-05, "loss": 0.081, "step": 27740 }, { "epoch": 12.940764925373134, "grad_norm": 0.47002360369671936, "learning_rate": 1.8664094637337303e-05, "loss": 0.0825, "step": 27745 }, { "epoch": 12.943097014925373, "grad_norm": 0.5043958203722091, "learning_rate": 1.8656116210221502e-05, "loss": 0.0836, "step": 27750 }, { "epoch": 12.945429104477611, "grad_norm": 0.48561280732281453, "learning_rate": 1.8648139098106482e-05, "loss": 0.0821, "step": 27755 }, { "epoch": 12.947761194029852, "grad_norm": 0.4894851039729573, "learning_rate": 1.8640163302178377e-05, "loss": 0.0805, "step": 27760 }, { "epoch": 12.95009328358209, "grad_norm": 0.4716821887736091, "learning_rate": 1.8632188823623086e-05, "loss": 0.081, "step": 27765 }, { "epoch": 12.952425373134329, "grad_norm": 0.47433757369801005, "learning_rate": 1.8624215663626365e-05, "loss": 0.0856, "step": 27770 }, { "epoch": 12.954757462686567, "grad_norm": 0.4839667310069279, "learning_rate": 1.861624382337373e-05, "loss": 0.0815, "step": 27775 }, { "epoch": 12.957089552238806, "grad_norm": 0.4770678375832244, "learning_rate": 1.8608273304050515e-05, "loss": 0.0812, "step": 27780 }, { "epoch": 12.959421641791044, "grad_norm": 0.462717439364187, "learning_rate": 1.8600304106841876e-05, "loss": 0.0819, "step": 27785 }, { "epoch": 12.961753731343283, "grad_norm": 0.49457944852943964, "learning_rate": 1.859233623293274e-05, "loss": 0.0819, "step": 27790 }, { "epoch": 12.964085820895523, "grad_norm": 0.46128153034000535, "learning_rate": 1.8584369683507863e-05, "loss": 0.0815, "step": 27795 }, { "epoch": 12.966417910447761, "grad_norm": 0.47560282771194196, "learning_rate": 1.8576404459751796e-05, "loss": 0.0818, "step": 27800 }, { "epoch": 12.96875, "grad_norm": 0.4611637191923963, "learning_rate": 1.8568440562848876e-05, "loss": 0.0823, "step": 27805 }, { "epoch": 12.971082089552239, "grad_norm": 0.4663308825568393, "learning_rate": 1.8560477993983284e-05, "loss": 0.0827, "step": 27810 }, { "epoch": 12.973414179104477, "grad_norm": 0.48994413315204904, "learning_rate": 1.855251675433895e-05, "loss": 0.0833, "step": 27815 }, { "epoch": 12.975746268656717, "grad_norm": 0.46280199919631415, "learning_rate": 1.8544556845099657e-05, "loss": 0.0836, "step": 27820 }, { "epoch": 12.978078358208956, "grad_norm": 0.4797568647394782, "learning_rate": 1.8536598267448958e-05, "loss": 0.0824, "step": 27825 }, { "epoch": 12.980410447761194, "grad_norm": 0.4869809575323921, "learning_rate": 1.8528641022570202e-05, "loss": 0.0815, "step": 27830 }, { "epoch": 12.982742537313433, "grad_norm": 0.45110818625398963, "learning_rate": 1.8520685111646585e-05, "loss": 0.0839, "step": 27835 }, { "epoch": 12.985074626865671, "grad_norm": 0.48118451206076196, "learning_rate": 1.851273053586105e-05, "loss": 0.0826, "step": 27840 }, { "epoch": 12.98740671641791, "grad_norm": 0.47141488038485785, "learning_rate": 1.850477729639638e-05, "loss": 0.0834, "step": 27845 }, { "epoch": 12.989738805970148, "grad_norm": 0.4612249542595578, "learning_rate": 1.8496825394435146e-05, "loss": 0.0822, "step": 27850 }, { "epoch": 12.992070895522389, "grad_norm": 0.48486123219777105, "learning_rate": 1.8488874831159703e-05, "loss": 0.0829, "step": 27855 }, { "epoch": 12.994402985074627, "grad_norm": 0.4972426568044053, "learning_rate": 1.8480925607752248e-05, "loss": 0.0827, "step": 27860 }, { "epoch": 12.996735074626866, "grad_norm": 0.46936735516139816, "learning_rate": 1.847297772539473e-05, "loss": 0.0809, "step": 27865 }, { "epoch": 12.999067164179104, "grad_norm": 0.47956894745442, "learning_rate": 1.8465031185268943e-05, "loss": 0.0828, "step": 27870 }, { "epoch": 13.001399253731343, "grad_norm": 0.37236441304908463, "learning_rate": 1.845708598855645e-05, "loss": 0.0622, "step": 27875 }, { "epoch": 13.003731343283581, "grad_norm": 0.38929939203098146, "learning_rate": 1.8449142136438628e-05, "loss": 0.0465, "step": 27880 }, { "epoch": 13.006063432835822, "grad_norm": 0.3951201954830634, "learning_rate": 1.8441199630096655e-05, "loss": 0.0498, "step": 27885 }, { "epoch": 13.00839552238806, "grad_norm": 0.40450020923181423, "learning_rate": 1.84332584707115e-05, "loss": 0.0489, "step": 27890 }, { "epoch": 13.010727611940299, "grad_norm": 0.39023714783693697, "learning_rate": 1.8425318659463942e-05, "loss": 0.0462, "step": 27895 }, { "epoch": 13.013059701492537, "grad_norm": 0.3974733104491037, "learning_rate": 1.8417380197534558e-05, "loss": 0.0462, "step": 27900 }, { "epoch": 13.015391791044776, "grad_norm": 0.4289457348574876, "learning_rate": 1.8409443086103713e-05, "loss": 0.0497, "step": 27905 }, { "epoch": 13.017723880597014, "grad_norm": 0.41327053244626477, "learning_rate": 1.8401507326351575e-05, "loss": 0.0471, "step": 27910 }, { "epoch": 13.020055970149254, "grad_norm": 0.3950312081697639, "learning_rate": 1.839357291945813e-05, "loss": 0.0475, "step": 27915 }, { "epoch": 13.022388059701493, "grad_norm": 0.37709346031922103, "learning_rate": 1.8385639866603144e-05, "loss": 0.0442, "step": 27920 }, { "epoch": 13.024720149253731, "grad_norm": 0.4032210701808419, "learning_rate": 1.837770816896618e-05, "loss": 0.0449, "step": 27925 }, { "epoch": 13.02705223880597, "grad_norm": 0.35991059213641485, "learning_rate": 1.836977782772661e-05, "loss": 0.0459, "step": 27930 }, { "epoch": 13.029384328358208, "grad_norm": 0.40316897320346257, "learning_rate": 1.8361848844063594e-05, "loss": 0.0465, "step": 27935 }, { "epoch": 13.031716417910447, "grad_norm": 0.4152661847434969, "learning_rate": 1.8353921219156102e-05, "loss": 0.045, "step": 27940 }, { "epoch": 13.034048507462687, "grad_norm": 0.3548442152984328, "learning_rate": 1.83459949541829e-05, "loss": 0.046, "step": 27945 }, { "epoch": 13.036380597014926, "grad_norm": 0.41518209373681675, "learning_rate": 1.8338070050322544e-05, "loss": 0.0479, "step": 27950 }, { "epoch": 13.038712686567164, "grad_norm": 0.39393035315869024, "learning_rate": 1.8330146508753377e-05, "loss": 0.048, "step": 27955 }, { "epoch": 13.041044776119403, "grad_norm": 0.4330170493894648, "learning_rate": 1.8322224330653576e-05, "loss": 0.0464, "step": 27960 }, { "epoch": 13.043376865671641, "grad_norm": 0.41046690567031413, "learning_rate": 1.8314303517201076e-05, "loss": 0.0474, "step": 27965 }, { "epoch": 13.04570895522388, "grad_norm": 0.4243630917906131, "learning_rate": 1.830638406957364e-05, "loss": 0.0464, "step": 27970 }, { "epoch": 13.04804104477612, "grad_norm": 0.3891515092428246, "learning_rate": 1.829846598894881e-05, "loss": 0.0461, "step": 27975 }, { "epoch": 13.050373134328359, "grad_norm": 0.3952020944959068, "learning_rate": 1.8290549276503915e-05, "loss": 0.0449, "step": 27980 }, { "epoch": 13.052705223880597, "grad_norm": 0.40889833249164514, "learning_rate": 1.8282633933416115e-05, "loss": 0.0459, "step": 27985 }, { "epoch": 13.055037313432836, "grad_norm": 0.3817198728231914, "learning_rate": 1.8274719960862325e-05, "loss": 0.046, "step": 27990 }, { "epoch": 13.057369402985074, "grad_norm": 0.36679331767827617, "learning_rate": 1.82668073600193e-05, "loss": 0.0471, "step": 27995 }, { "epoch": 13.059701492537313, "grad_norm": 0.3993309913435297, "learning_rate": 1.825889613206355e-05, "loss": 0.0476, "step": 28000 }, { "epoch": 13.062033582089553, "grad_norm": 0.3996152603951055, "learning_rate": 1.82509862781714e-05, "loss": 0.0459, "step": 28005 }, { "epoch": 13.064365671641792, "grad_norm": 0.39296961485745535, "learning_rate": 1.824307779951898e-05, "loss": 0.0455, "step": 28010 }, { "epoch": 13.06669776119403, "grad_norm": 0.4009685970026534, "learning_rate": 1.8235170697282194e-05, "loss": 0.0468, "step": 28015 }, { "epoch": 13.069029850746269, "grad_norm": 0.39261957532551783, "learning_rate": 1.8227264972636758e-05, "loss": 0.0453, "step": 28020 }, { "epoch": 13.071361940298507, "grad_norm": 0.3977350199827115, "learning_rate": 1.8219360626758185e-05, "loss": 0.0467, "step": 28025 }, { "epoch": 13.073694029850746, "grad_norm": 0.40072990431856725, "learning_rate": 1.821145766082176e-05, "loss": 0.0476, "step": 28030 }, { "epoch": 13.076026119402986, "grad_norm": 0.39788601515213007, "learning_rate": 1.820355607600259e-05, "loss": 0.0468, "step": 28035 }, { "epoch": 13.078358208955224, "grad_norm": 0.3951009547762045, "learning_rate": 1.8195655873475554e-05, "loss": 0.0471, "step": 28040 }, { "epoch": 13.080690298507463, "grad_norm": 0.3851891902289224, "learning_rate": 1.818775705441535e-05, "loss": 0.0459, "step": 28045 }, { "epoch": 13.083022388059701, "grad_norm": 0.41553768093208243, "learning_rate": 1.8179859619996448e-05, "loss": 0.0462, "step": 28050 }, { "epoch": 13.08535447761194, "grad_norm": 0.41849108329544615, "learning_rate": 1.8171963571393112e-05, "loss": 0.0472, "step": 28055 }, { "epoch": 13.087686567164178, "grad_norm": 0.39478399783123747, "learning_rate": 1.8164068909779437e-05, "loss": 0.0476, "step": 28060 }, { "epoch": 13.090018656716419, "grad_norm": 0.44184012925811883, "learning_rate": 1.8156175636329252e-05, "loss": 0.0464, "step": 28065 }, { "epoch": 13.092350746268657, "grad_norm": 0.43346754404801785, "learning_rate": 1.814828375221623e-05, "loss": 0.0487, "step": 28070 }, { "epoch": 13.094682835820896, "grad_norm": 0.42655236004234687, "learning_rate": 1.814039325861382e-05, "loss": 0.0477, "step": 28075 }, { "epoch": 13.097014925373134, "grad_norm": 0.422912609918946, "learning_rate": 1.8132504156695245e-05, "loss": 0.048, "step": 28080 }, { "epoch": 13.099347014925373, "grad_norm": 0.4304099932976602, "learning_rate": 1.812461644763355e-05, "loss": 0.0466, "step": 28085 }, { "epoch": 13.101679104477611, "grad_norm": 0.4099814531545141, "learning_rate": 1.8116730132601565e-05, "loss": 0.0485, "step": 28090 }, { "epoch": 13.104011194029852, "grad_norm": 0.3931526675591652, "learning_rate": 1.8108845212771913e-05, "loss": 0.0497, "step": 28095 }, { "epoch": 13.10634328358209, "grad_norm": 0.4099483734563276, "learning_rate": 1.8100961689317003e-05, "loss": 0.0465, "step": 28100 }, { "epoch": 13.108675373134329, "grad_norm": 0.40751611517011993, "learning_rate": 1.8093079563409017e-05, "loss": 0.0475, "step": 28105 }, { "epoch": 13.111007462686567, "grad_norm": 0.39302977585411436, "learning_rate": 1.808519883621999e-05, "loss": 0.046, "step": 28110 }, { "epoch": 13.113339552238806, "grad_norm": 0.42013699811271643, "learning_rate": 1.807731950892167e-05, "loss": 0.0475, "step": 28115 }, { "epoch": 13.115671641791044, "grad_norm": 0.386808496001414, "learning_rate": 1.806944158268568e-05, "loss": 0.0459, "step": 28120 }, { "epoch": 13.118003731343284, "grad_norm": 0.3893854549979811, "learning_rate": 1.8061565058683362e-05, "loss": 0.0458, "step": 28125 }, { "epoch": 13.120335820895523, "grad_norm": 0.4250788434435963, "learning_rate": 1.805368993808589e-05, "loss": 0.0488, "step": 28130 }, { "epoch": 13.122667910447761, "grad_norm": 0.4252522886172523, "learning_rate": 1.8045816222064212e-05, "loss": 0.0466, "step": 28135 }, { "epoch": 13.125, "grad_norm": 0.39682094382634986, "learning_rate": 1.803794391178908e-05, "loss": 0.0441, "step": 28140 }, { "epoch": 13.127332089552239, "grad_norm": 0.42403878083243807, "learning_rate": 1.8030073008431032e-05, "loss": 0.0455, "step": 28145 }, { "epoch": 13.129664179104477, "grad_norm": 0.3987706311248275, "learning_rate": 1.8022203513160406e-05, "loss": 0.0478, "step": 28150 }, { "epoch": 13.131996268656717, "grad_norm": 0.41876240713571944, "learning_rate": 1.8014335427147288e-05, "loss": 0.0465, "step": 28155 }, { "epoch": 13.134328358208956, "grad_norm": 0.4314019595578977, "learning_rate": 1.8006468751561628e-05, "loss": 0.0458, "step": 28160 }, { "epoch": 13.136660447761194, "grad_norm": 0.4077694120248306, "learning_rate": 1.7998603487573095e-05, "loss": 0.0472, "step": 28165 }, { "epoch": 13.138992537313433, "grad_norm": 0.40500278505349274, "learning_rate": 1.7990739636351188e-05, "loss": 0.0466, "step": 28170 }, { "epoch": 13.141324626865671, "grad_norm": 0.4222073239710467, "learning_rate": 1.798287719906519e-05, "loss": 0.0462, "step": 28175 }, { "epoch": 13.14365671641791, "grad_norm": 0.4048199842501008, "learning_rate": 1.797501617688417e-05, "loss": 0.0489, "step": 28180 }, { "epoch": 13.145988805970148, "grad_norm": 0.3991353561176378, "learning_rate": 1.7967156570976974e-05, "loss": 0.0461, "step": 28185 }, { "epoch": 13.148320895522389, "grad_norm": 0.4222035409912974, "learning_rate": 1.795929838251227e-05, "loss": 0.049, "step": 28190 }, { "epoch": 13.150652985074627, "grad_norm": 0.42459806531020755, "learning_rate": 1.7951441612658488e-05, "loss": 0.049, "step": 28195 }, { "epoch": 13.152985074626866, "grad_norm": 0.3798219261567842, "learning_rate": 1.7943586262583846e-05, "loss": 0.0477, "step": 28200 }, { "epoch": 13.155317164179104, "grad_norm": 0.38567040179781337, "learning_rate": 1.7935732333456362e-05, "loss": 0.0464, "step": 28205 }, { "epoch": 13.157649253731343, "grad_norm": 0.41809785320324566, "learning_rate": 1.7927879826443844e-05, "loss": 0.048, "step": 28210 }, { "epoch": 13.159981343283581, "grad_norm": 0.4043341278801013, "learning_rate": 1.7920028742713885e-05, "loss": 0.048, "step": 28215 }, { "epoch": 13.162313432835822, "grad_norm": 0.40616853765645694, "learning_rate": 1.791217908343386e-05, "loss": 0.0486, "step": 28220 }, { "epoch": 13.16464552238806, "grad_norm": 0.3875328850291756, "learning_rate": 1.7904330849770957e-05, "loss": 0.0457, "step": 28225 }, { "epoch": 13.166977611940299, "grad_norm": 0.4087696133177745, "learning_rate": 1.78964840428921e-05, "loss": 0.0481, "step": 28230 }, { "epoch": 13.169309701492537, "grad_norm": 0.41019433871336264, "learning_rate": 1.788863866396407e-05, "loss": 0.0488, "step": 28235 }, { "epoch": 13.171641791044776, "grad_norm": 0.4266708797213414, "learning_rate": 1.7880794714153366e-05, "loss": 0.0511, "step": 28240 }, { "epoch": 13.173973880597014, "grad_norm": 0.40888646678008145, "learning_rate": 1.7872952194626337e-05, "loss": 0.048, "step": 28245 }, { "epoch": 13.176305970149254, "grad_norm": 0.44596198478016735, "learning_rate": 1.786511110654907e-05, "loss": 0.0497, "step": 28250 }, { "epoch": 13.178638059701493, "grad_norm": 0.39256632587338663, "learning_rate": 1.785727145108747e-05, "loss": 0.0467, "step": 28255 }, { "epoch": 13.180970149253731, "grad_norm": 0.4411511142381691, "learning_rate": 1.784943322940722e-05, "loss": 0.0504, "step": 28260 }, { "epoch": 13.18330223880597, "grad_norm": 0.4419521037089005, "learning_rate": 1.7841596442673785e-05, "loss": 0.0483, "step": 28265 }, { "epoch": 13.185634328358208, "grad_norm": 0.4411285203714949, "learning_rate": 1.7833761092052415e-05, "loss": 0.0479, "step": 28270 }, { "epoch": 13.187966417910447, "grad_norm": 0.41724146946171026, "learning_rate": 1.782592717870817e-05, "loss": 0.048, "step": 28275 }, { "epoch": 13.190298507462687, "grad_norm": 0.3856826740110639, "learning_rate": 1.7818094703805837e-05, "loss": 0.0495, "step": 28280 }, { "epoch": 13.192630597014926, "grad_norm": 0.4208317636424599, "learning_rate": 1.7810263668510084e-05, "loss": 0.0484, "step": 28285 }, { "epoch": 13.194962686567164, "grad_norm": 0.41672893124376886, "learning_rate": 1.780243407398527e-05, "loss": 0.0484, "step": 28290 }, { "epoch": 13.197294776119403, "grad_norm": 0.407447506180696, "learning_rate": 1.7794605921395607e-05, "loss": 0.0482, "step": 28295 }, { "epoch": 13.199626865671641, "grad_norm": 0.43460946042341486, "learning_rate": 1.7786779211905048e-05, "loss": 0.0484, "step": 28300 }, { "epoch": 13.20195895522388, "grad_norm": 0.40751128703571465, "learning_rate": 1.7778953946677353e-05, "loss": 0.0465, "step": 28305 }, { "epoch": 13.20429104477612, "grad_norm": 0.4237634328147941, "learning_rate": 1.7771130126876068e-05, "loss": 0.0462, "step": 28310 }, { "epoch": 13.206623134328359, "grad_norm": 0.43054284391219466, "learning_rate": 1.776330775366452e-05, "loss": 0.0487, "step": 28315 }, { "epoch": 13.208955223880597, "grad_norm": 0.41496669829578475, "learning_rate": 1.775548682820582e-05, "loss": 0.0484, "step": 28320 }, { "epoch": 13.211287313432836, "grad_norm": 0.42511511311119987, "learning_rate": 1.774766735166287e-05, "loss": 0.0471, "step": 28325 }, { "epoch": 13.213619402985074, "grad_norm": 0.42768885502713005, "learning_rate": 1.7739849325198334e-05, "loss": 0.0485, "step": 28330 }, { "epoch": 13.215951492537313, "grad_norm": 0.43048006079907114, "learning_rate": 1.7732032749974704e-05, "loss": 0.05, "step": 28335 }, { "epoch": 13.218283582089553, "grad_norm": 0.41352405861504193, "learning_rate": 1.7724217627154204e-05, "loss": 0.0482, "step": 28340 }, { "epoch": 13.220615671641792, "grad_norm": 0.4220599760948571, "learning_rate": 1.771640395789888e-05, "loss": 0.0493, "step": 28345 }, { "epoch": 13.22294776119403, "grad_norm": 0.43140989084463716, "learning_rate": 1.7708591743370555e-05, "loss": 0.0518, "step": 28350 }, { "epoch": 13.225279850746269, "grad_norm": 0.4114712339979646, "learning_rate": 1.7700780984730818e-05, "loss": 0.0492, "step": 28355 }, { "epoch": 13.227611940298507, "grad_norm": 0.4053353123096339, "learning_rate": 1.7692971683141063e-05, "loss": 0.0492, "step": 28360 }, { "epoch": 13.229944029850746, "grad_norm": 0.4313517926542267, "learning_rate": 1.7685163839762457e-05, "loss": 0.0477, "step": 28365 }, { "epoch": 13.232276119402986, "grad_norm": 0.41452314146277436, "learning_rate": 1.7677357455755954e-05, "loss": 0.0491, "step": 28370 }, { "epoch": 13.234608208955224, "grad_norm": 0.4466931553416193, "learning_rate": 1.7669552532282287e-05, "loss": 0.0484, "step": 28375 }, { "epoch": 13.236940298507463, "grad_norm": 0.4330569571497868, "learning_rate": 1.766174907050196e-05, "loss": 0.0481, "step": 28380 }, { "epoch": 13.239272388059701, "grad_norm": 0.42156346716655985, "learning_rate": 1.765394707157529e-05, "loss": 0.0497, "step": 28385 }, { "epoch": 13.24160447761194, "grad_norm": 0.411306330916367, "learning_rate": 1.764614653666235e-05, "loss": 0.049, "step": 28390 }, { "epoch": 13.243936567164178, "grad_norm": 0.44961854461692746, "learning_rate": 1.763834746692301e-05, "loss": 0.0489, "step": 28395 }, { "epoch": 13.246268656716419, "grad_norm": 0.45775665153790107, "learning_rate": 1.7630549863516914e-05, "loss": 0.0503, "step": 28400 }, { "epoch": 13.248600746268657, "grad_norm": 0.4414780796682878, "learning_rate": 1.76227537276035e-05, "loss": 0.049, "step": 28405 }, { "epoch": 13.250932835820896, "grad_norm": 0.4192283240062205, "learning_rate": 1.7614959060341968e-05, "loss": 0.0481, "step": 28410 }, { "epoch": 13.253264925373134, "grad_norm": 0.4152319943514378, "learning_rate": 1.7607165862891302e-05, "loss": 0.047, "step": 28415 }, { "epoch": 13.255597014925373, "grad_norm": 0.4388091399067465, "learning_rate": 1.75993741364103e-05, "loss": 0.0498, "step": 28420 }, { "epoch": 13.257929104477611, "grad_norm": 0.42143016980499576, "learning_rate": 1.7591583882057495e-05, "loss": 0.0506, "step": 28425 }, { "epoch": 13.260261194029852, "grad_norm": 0.43353160477852354, "learning_rate": 1.7583795100991246e-05, "loss": 0.0501, "step": 28430 }, { "epoch": 13.26259328358209, "grad_norm": 0.41956962859153085, "learning_rate": 1.7576007794369648e-05, "loss": 0.048, "step": 28435 }, { "epoch": 13.264925373134329, "grad_norm": 0.43964440523396237, "learning_rate": 1.7568221963350605e-05, "loss": 0.0493, "step": 28440 }, { "epoch": 13.267257462686567, "grad_norm": 0.4085268995304815, "learning_rate": 1.75604376090918e-05, "loss": 0.0467, "step": 28445 }, { "epoch": 13.269589552238806, "grad_norm": 0.4108907873025686, "learning_rate": 1.755265473275069e-05, "loss": 0.0483, "step": 28450 }, { "epoch": 13.271921641791044, "grad_norm": 0.4395394017487624, "learning_rate": 1.7544873335484514e-05, "loss": 0.0483, "step": 28455 }, { "epoch": 13.274253731343283, "grad_norm": 0.4425271197261562, "learning_rate": 1.7537093418450294e-05, "loss": 0.0499, "step": 28460 }, { "epoch": 13.276585820895523, "grad_norm": 0.42790966175787026, "learning_rate": 1.7529314982804817e-05, "loss": 0.0498, "step": 28465 }, { "epoch": 13.278917910447761, "grad_norm": 0.4545651796546929, "learning_rate": 1.7521538029704682e-05, "loss": 0.0519, "step": 28470 }, { "epoch": 13.28125, "grad_norm": 0.40069870603138874, "learning_rate": 1.7513762560306224e-05, "loss": 0.0502, "step": 28475 }, { "epoch": 13.283582089552239, "grad_norm": 0.4443307786168434, "learning_rate": 1.750598857576561e-05, "loss": 0.0488, "step": 28480 }, { "epoch": 13.285914179104477, "grad_norm": 0.4336902483531214, "learning_rate": 1.749821607723873e-05, "loss": 0.0489, "step": 28485 }, { "epoch": 13.288246268656717, "grad_norm": 0.44367814764247293, "learning_rate": 1.749044506588129e-05, "loss": 0.0517, "step": 28490 }, { "epoch": 13.290578358208956, "grad_norm": 0.41214907995101957, "learning_rate": 1.748267554284877e-05, "loss": 0.0493, "step": 28495 }, { "epoch": 13.292910447761194, "grad_norm": 0.4199544545869973, "learning_rate": 1.7474907509296412e-05, "loss": 0.049, "step": 28500 }, { "epoch": 13.295242537313433, "grad_norm": 0.4269320508862906, "learning_rate": 1.746714096637926e-05, "loss": 0.0497, "step": 28505 }, { "epoch": 13.297574626865671, "grad_norm": 0.4298789069202734, "learning_rate": 1.7459375915252123e-05, "loss": 0.0493, "step": 28510 }, { "epoch": 13.29990671641791, "grad_norm": 0.4349294088634044, "learning_rate": 1.745161235706958e-05, "loss": 0.0487, "step": 28515 }, { "epoch": 13.302238805970148, "grad_norm": 0.453585492907447, "learning_rate": 1.7443850292986007e-05, "loss": 0.0507, "step": 28520 }, { "epoch": 13.304570895522389, "grad_norm": 0.42207110442736434, "learning_rate": 1.7436089724155545e-05, "loss": 0.0477, "step": 28525 }, { "epoch": 13.306902985074627, "grad_norm": 0.413534523007601, "learning_rate": 1.742833065173212e-05, "loss": 0.0494, "step": 28530 }, { "epoch": 13.309235074626866, "grad_norm": 0.4369411584047025, "learning_rate": 1.7420573076869422e-05, "loss": 0.0506, "step": 28535 }, { "epoch": 13.311567164179104, "grad_norm": 0.4340420571631667, "learning_rate": 1.7412817000720937e-05, "loss": 0.0503, "step": 28540 }, { "epoch": 13.313899253731343, "grad_norm": 0.44195462857583856, "learning_rate": 1.7405062424439916e-05, "loss": 0.0503, "step": 28545 }, { "epoch": 13.316231343283581, "grad_norm": 0.4282808789141139, "learning_rate": 1.7397309349179393e-05, "loss": 0.0495, "step": 28550 }, { "epoch": 13.318563432835822, "grad_norm": 0.4393148122376941, "learning_rate": 1.7389557776092182e-05, "loss": 0.0507, "step": 28555 }, { "epoch": 13.32089552238806, "grad_norm": 0.41744671182532794, "learning_rate": 1.738180770633085e-05, "loss": 0.0485, "step": 28560 }, { "epoch": 13.323227611940299, "grad_norm": 0.4387686464413788, "learning_rate": 1.7374059141047772e-05, "loss": 0.0499, "step": 28565 }, { "epoch": 13.325559701492537, "grad_norm": 0.43524910721953175, "learning_rate": 1.7366312081395075e-05, "loss": 0.0486, "step": 28570 }, { "epoch": 13.327891791044776, "grad_norm": 0.43716315578412307, "learning_rate": 1.735856652852468e-05, "loss": 0.0513, "step": 28575 }, { "epoch": 13.330223880597014, "grad_norm": 0.4144886942878345, "learning_rate": 1.7350822483588277e-05, "loss": 0.0508, "step": 28580 }, { "epoch": 13.332555970149254, "grad_norm": 0.4449341030931336, "learning_rate": 1.734307994773734e-05, "loss": 0.0502, "step": 28585 }, { "epoch": 13.334888059701493, "grad_norm": 0.42762927128880046, "learning_rate": 1.7335338922123076e-05, "loss": 0.0505, "step": 28590 }, { "epoch": 13.337220149253731, "grad_norm": 0.4359923085406246, "learning_rate": 1.732759940789655e-05, "loss": 0.0501, "step": 28595 }, { "epoch": 13.33955223880597, "grad_norm": 0.430380544406577, "learning_rate": 1.7319861406208504e-05, "loss": 0.0505, "step": 28600 }, { "epoch": 13.341884328358208, "grad_norm": 0.42616721109902134, "learning_rate": 1.7312124918209548e-05, "loss": 0.0514, "step": 28605 }, { "epoch": 13.344216417910447, "grad_norm": 0.44254141266089875, "learning_rate": 1.7304389945050004e-05, "loss": 0.0519, "step": 28610 }, { "epoch": 13.346548507462687, "grad_norm": 0.4364966383474057, "learning_rate": 1.729665648787998e-05, "loss": 0.0496, "step": 28615 }, { "epoch": 13.348880597014926, "grad_norm": 0.44307306450601236, "learning_rate": 1.728892454784938e-05, "loss": 0.0514, "step": 28620 }, { "epoch": 13.351212686567164, "grad_norm": 0.44162770522840245, "learning_rate": 1.7281194126107864e-05, "loss": 0.0504, "step": 28625 }, { "epoch": 13.353544776119403, "grad_norm": 0.4262628170290931, "learning_rate": 1.7273465223804876e-05, "loss": 0.0484, "step": 28630 }, { "epoch": 13.355876865671641, "grad_norm": 0.4292376870793598, "learning_rate": 1.726573784208963e-05, "loss": 0.0504, "step": 28635 }, { "epoch": 13.35820895522388, "grad_norm": 0.428252288881246, "learning_rate": 1.7258011982111094e-05, "loss": 0.0508, "step": 28640 }, { "epoch": 13.36054104477612, "grad_norm": 0.4505696375419499, "learning_rate": 1.7250287645018067e-05, "loss": 0.0504, "step": 28645 }, { "epoch": 13.362873134328359, "grad_norm": 0.4352911937168258, "learning_rate": 1.7242564831959045e-05, "loss": 0.0512, "step": 28650 }, { "epoch": 13.365205223880597, "grad_norm": 0.43475667147720276, "learning_rate": 1.7234843544082368e-05, "loss": 0.0505, "step": 28655 }, { "epoch": 13.367537313432836, "grad_norm": 0.41375051573525246, "learning_rate": 1.72271237825361e-05, "loss": 0.0488, "step": 28660 }, { "epoch": 13.369869402985074, "grad_norm": 0.418730297894936, "learning_rate": 1.7219405548468098e-05, "loss": 0.0491, "step": 28665 }, { "epoch": 13.372201492537313, "grad_norm": 0.4613561546992675, "learning_rate": 1.7211688843025987e-05, "loss": 0.0529, "step": 28670 }, { "epoch": 13.374533582089553, "grad_norm": 0.41795574794555007, "learning_rate": 1.7203973667357177e-05, "loss": 0.0491, "step": 28675 }, { "epoch": 13.376865671641792, "grad_norm": 0.42875903341034743, "learning_rate": 1.7196260022608828e-05, "loss": 0.0502, "step": 28680 }, { "epoch": 13.37919776119403, "grad_norm": 0.4288406813150327, "learning_rate": 1.7188547909927903e-05, "loss": 0.0489, "step": 28685 }, { "epoch": 13.381529850746269, "grad_norm": 0.43534143451828516, "learning_rate": 1.7180837330461093e-05, "loss": 0.0526, "step": 28690 }, { "epoch": 13.383861940298507, "grad_norm": 0.443579905795443, "learning_rate": 1.7173128285354917e-05, "loss": 0.0509, "step": 28695 }, { "epoch": 13.386194029850746, "grad_norm": 0.461026221079942, "learning_rate": 1.716542077575561e-05, "loss": 0.0509, "step": 28700 }, { "epoch": 13.388526119402986, "grad_norm": 0.4475252874272554, "learning_rate": 1.7157714802809223e-05, "loss": 0.0516, "step": 28705 }, { "epoch": 13.390858208955224, "grad_norm": 0.4496477817042167, "learning_rate": 1.7150010367661546e-05, "loss": 0.0521, "step": 28710 }, { "epoch": 13.393190298507463, "grad_norm": 0.43400388016515107, "learning_rate": 1.7142307471458165e-05, "loss": 0.0532, "step": 28715 }, { "epoch": 13.395522388059701, "grad_norm": 0.4473308098891312, "learning_rate": 1.7134606115344427e-05, "loss": 0.0508, "step": 28720 }, { "epoch": 13.39785447761194, "grad_norm": 0.4466367473005122, "learning_rate": 1.7126906300465446e-05, "loss": 0.0508, "step": 28725 }, { "epoch": 13.400186567164178, "grad_norm": 0.4367289059017423, "learning_rate": 1.7119208027966116e-05, "loss": 0.05, "step": 28730 }, { "epoch": 13.402518656716419, "grad_norm": 0.4440363044384357, "learning_rate": 1.7111511298991088e-05, "loss": 0.0529, "step": 28735 }, { "epoch": 13.404850746268657, "grad_norm": 0.4378158673662948, "learning_rate": 1.710381611468479e-05, "loss": 0.0504, "step": 28740 }, { "epoch": 13.407182835820896, "grad_norm": 0.43672540624145056, "learning_rate": 1.7096122476191434e-05, "loss": 0.0498, "step": 28745 }, { "epoch": 13.409514925373134, "grad_norm": 0.45352202115205287, "learning_rate": 1.7088430384654984e-05, "loss": 0.0514, "step": 28750 }, { "epoch": 13.411847014925373, "grad_norm": 0.44388680080336956, "learning_rate": 1.7080739841219175e-05, "loss": 0.0531, "step": 28755 }, { "epoch": 13.414179104477611, "grad_norm": 0.4608614949543501, "learning_rate": 1.7073050847027537e-05, "loss": 0.0499, "step": 28760 }, { "epoch": 13.416511194029852, "grad_norm": 0.4231465227566958, "learning_rate": 1.7065363403223316e-05, "loss": 0.051, "step": 28765 }, { "epoch": 13.41884328358209, "grad_norm": 0.4529072183742898, "learning_rate": 1.7057677510949598e-05, "loss": 0.0536, "step": 28770 }, { "epoch": 13.421175373134329, "grad_norm": 0.42342746355468724, "learning_rate": 1.704999317134917e-05, "loss": 0.0511, "step": 28775 }, { "epoch": 13.423507462686567, "grad_norm": 0.43044509524192354, "learning_rate": 1.704231038556465e-05, "loss": 0.0498, "step": 28780 }, { "epoch": 13.425839552238806, "grad_norm": 0.46051609282563, "learning_rate": 1.7034629154738368e-05, "loss": 0.054, "step": 28785 }, { "epoch": 13.428171641791044, "grad_norm": 0.44361926592730566, "learning_rate": 1.702694948001246e-05, "loss": 0.0524, "step": 28790 }, { "epoch": 13.430503731343283, "grad_norm": 0.456038396168676, "learning_rate": 1.7019271362528823e-05, "loss": 0.0508, "step": 28795 }, { "epoch": 13.432835820895523, "grad_norm": 0.4247458627102662, "learning_rate": 1.701159480342911e-05, "loss": 0.0496, "step": 28800 }, { "epoch": 13.435167910447761, "grad_norm": 0.44683841790478257, "learning_rate": 1.7003919803854765e-05, "loss": 0.0516, "step": 28805 }, { "epoch": 13.4375, "grad_norm": 0.4271824206720838, "learning_rate": 1.6996246364946985e-05, "loss": 0.0516, "step": 28810 }, { "epoch": 13.439832089552239, "grad_norm": 0.44189573959596673, "learning_rate": 1.698857448784672e-05, "loss": 0.0504, "step": 28815 }, { "epoch": 13.442164179104477, "grad_norm": 0.45051378844551193, "learning_rate": 1.6980904173694727e-05, "loss": 0.0529, "step": 28820 }, { "epoch": 13.444496268656717, "grad_norm": 0.4560157457871684, "learning_rate": 1.6973235423631488e-05, "loss": 0.0524, "step": 28825 }, { "epoch": 13.446828358208956, "grad_norm": 0.4456213453624248, "learning_rate": 1.69655682387973e-05, "loss": 0.0519, "step": 28830 }, { "epoch": 13.449160447761194, "grad_norm": 0.4347548163370142, "learning_rate": 1.6957902620332163e-05, "loss": 0.0517, "step": 28835 }, { "epoch": 13.451492537313433, "grad_norm": 0.45139053480896074, "learning_rate": 1.695023856937591e-05, "loss": 0.0524, "step": 28840 }, { "epoch": 13.453824626865671, "grad_norm": 0.4571401674147697, "learning_rate": 1.6942576087068096e-05, "loss": 0.0512, "step": 28845 }, { "epoch": 13.45615671641791, "grad_norm": 0.43101007552277676, "learning_rate": 1.6934915174548073e-05, "loss": 0.051, "step": 28850 }, { "epoch": 13.458488805970148, "grad_norm": 0.4506685269156032, "learning_rate": 1.6927255832954934e-05, "loss": 0.0532, "step": 28855 }, { "epoch": 13.460820895522389, "grad_norm": 0.4493852789536838, "learning_rate": 1.691959806342756e-05, "loss": 0.0524, "step": 28860 }, { "epoch": 13.463152985074627, "grad_norm": 0.4590730272849826, "learning_rate": 1.691194186710457e-05, "loss": 0.0513, "step": 28865 }, { "epoch": 13.465485074626866, "grad_norm": 0.45000841332483854, "learning_rate": 1.690428724512439e-05, "loss": 0.0518, "step": 28870 }, { "epoch": 13.467817164179104, "grad_norm": 0.4557948856470383, "learning_rate": 1.6896634198625167e-05, "loss": 0.0506, "step": 28875 }, { "epoch": 13.470149253731343, "grad_norm": 0.4501526266096002, "learning_rate": 1.688898272874485e-05, "loss": 0.053, "step": 28880 }, { "epoch": 13.472481343283581, "grad_norm": 0.4616148661405171, "learning_rate": 1.688133283662113e-05, "loss": 0.0528, "step": 28885 }, { "epoch": 13.474813432835822, "grad_norm": 0.46905043400568786, "learning_rate": 1.6873684523391487e-05, "loss": 0.0536, "step": 28890 }, { "epoch": 13.47714552238806, "grad_norm": 0.4307658048180297, "learning_rate": 1.6866037790193135e-05, "loss": 0.0502, "step": 28895 }, { "epoch": 13.479477611940299, "grad_norm": 0.43568037675714405, "learning_rate": 1.685839263816308e-05, "loss": 0.0522, "step": 28900 }, { "epoch": 13.481809701492537, "grad_norm": 0.4119592028574455, "learning_rate": 1.6850749068438092e-05, "loss": 0.0506, "step": 28905 }, { "epoch": 13.484141791044776, "grad_norm": 0.4213200511579645, "learning_rate": 1.6843107082154675e-05, "loss": 0.0508, "step": 28910 }, { "epoch": 13.486473880597014, "grad_norm": 0.4449618562516258, "learning_rate": 1.6835466680449128e-05, "loss": 0.0519, "step": 28915 }, { "epoch": 13.488805970149254, "grad_norm": 0.453051858471691, "learning_rate": 1.68278278644575e-05, "loss": 0.0525, "step": 28920 }, { "epoch": 13.491138059701493, "grad_norm": 0.44805954970386697, "learning_rate": 1.6820190635315627e-05, "loss": 0.0536, "step": 28925 }, { "epoch": 13.493470149253731, "grad_norm": 0.43527665547625094, "learning_rate": 1.6812554994159073e-05, "loss": 0.053, "step": 28930 }, { "epoch": 13.49580223880597, "grad_norm": 0.4386663941233288, "learning_rate": 1.6804920942123207e-05, "loss": 0.0523, "step": 28935 }, { "epoch": 13.498134328358208, "grad_norm": 0.4610099164799043, "learning_rate": 1.679728848034311e-05, "loss": 0.0531, "step": 28940 }, { "epoch": 13.500466417910447, "grad_norm": 0.43210045057548047, "learning_rate": 1.6789657609953686e-05, "loss": 0.0517, "step": 28945 }, { "epoch": 13.502798507462687, "grad_norm": 0.44027834426023765, "learning_rate": 1.678202833208954e-05, "loss": 0.0526, "step": 28950 }, { "epoch": 13.505130597014926, "grad_norm": 0.45416654206993984, "learning_rate": 1.6774400647885106e-05, "loss": 0.0527, "step": 28955 }, { "epoch": 13.507462686567164, "grad_norm": 0.4638559285040807, "learning_rate": 1.6766774558474523e-05, "loss": 0.0516, "step": 28960 }, { "epoch": 13.509794776119403, "grad_norm": 0.44995654477206637, "learning_rate": 1.675915006499173e-05, "loss": 0.053, "step": 28965 }, { "epoch": 13.512126865671641, "grad_norm": 0.44831034772267137, "learning_rate": 1.675152716857041e-05, "loss": 0.0534, "step": 28970 }, { "epoch": 13.51445895522388, "grad_norm": 0.44512003782522136, "learning_rate": 1.6743905870344017e-05, "loss": 0.0518, "step": 28975 }, { "epoch": 13.51679104477612, "grad_norm": 0.4613804106004079, "learning_rate": 1.6736286171445763e-05, "loss": 0.0549, "step": 28980 }, { "epoch": 13.519123134328359, "grad_norm": 0.42746005163677603, "learning_rate": 1.672866807300864e-05, "loss": 0.0521, "step": 28985 }, { "epoch": 13.521455223880597, "grad_norm": 0.4350552787737773, "learning_rate": 1.672105157616535e-05, "loss": 0.0522, "step": 28990 }, { "epoch": 13.523787313432836, "grad_norm": 0.44113740083703656, "learning_rate": 1.6713436682048436e-05, "loss": 0.0503, "step": 28995 }, { "epoch": 13.526119402985074, "grad_norm": 0.45920149373362323, "learning_rate": 1.670582339179012e-05, "loss": 0.0544, "step": 29000 }, { "epoch": 13.528451492537313, "grad_norm": 0.4368426641478583, "learning_rate": 1.669821170652246e-05, "loss": 0.0535, "step": 29005 }, { "epoch": 13.530783582089553, "grad_norm": 0.45581528058879334, "learning_rate": 1.669060162737722e-05, "loss": 0.0529, "step": 29010 }, { "epoch": 13.533115671641792, "grad_norm": 0.4313378971388167, "learning_rate": 1.6682993155485956e-05, "loss": 0.0524, "step": 29015 }, { "epoch": 13.53544776119403, "grad_norm": 0.456580841097419, "learning_rate": 1.667538629197996e-05, "loss": 0.0533, "step": 29020 }, { "epoch": 13.537779850746269, "grad_norm": 0.46328288050334965, "learning_rate": 1.6667781037990317e-05, "loss": 0.0539, "step": 29025 }, { "epoch": 13.540111940298507, "grad_norm": 0.44237442021478424, "learning_rate": 1.666017739464784e-05, "loss": 0.0526, "step": 29030 }, { "epoch": 13.542444029850746, "grad_norm": 0.45876618860374757, "learning_rate": 1.6652575363083145e-05, "loss": 0.0524, "step": 29035 }, { "epoch": 13.544776119402986, "grad_norm": 0.4259302386667209, "learning_rate": 1.664497494442654e-05, "loss": 0.0521, "step": 29040 }, { "epoch": 13.547108208955224, "grad_norm": 0.4389002344932357, "learning_rate": 1.6637376139808174e-05, "loss": 0.0526, "step": 29045 }, { "epoch": 13.549440298507463, "grad_norm": 0.46648377060402807, "learning_rate": 1.6629778950357883e-05, "loss": 0.0524, "step": 29050 }, { "epoch": 13.551772388059701, "grad_norm": 0.4223178419437189, "learning_rate": 1.6622183377205325e-05, "loss": 0.0518, "step": 29055 }, { "epoch": 13.55410447761194, "grad_norm": 0.44343422208880795, "learning_rate": 1.6614589421479876e-05, "loss": 0.0524, "step": 29060 }, { "epoch": 13.556436567164178, "grad_norm": 0.46077066207971407, "learning_rate": 1.660699708431068e-05, "loss": 0.0526, "step": 29065 }, { "epoch": 13.558768656716419, "grad_norm": 0.4221905045740723, "learning_rate": 1.6599406366826648e-05, "loss": 0.0537, "step": 29070 }, { "epoch": 13.561100746268657, "grad_norm": 0.44771954907303996, "learning_rate": 1.659181727015645e-05, "loss": 0.0532, "step": 29075 }, { "epoch": 13.563432835820896, "grad_norm": 0.44926760185786047, "learning_rate": 1.6584229795428514e-05, "loss": 0.0537, "step": 29080 }, { "epoch": 13.565764925373134, "grad_norm": 0.44819210600193077, "learning_rate": 1.6576643943771026e-05, "loss": 0.054, "step": 29085 }, { "epoch": 13.568097014925373, "grad_norm": 0.46594250623761735, "learning_rate": 1.656905971631192e-05, "loss": 0.0516, "step": 29090 }, { "epoch": 13.570429104477611, "grad_norm": 0.45433210298404114, "learning_rate": 1.65614771141789e-05, "loss": 0.0519, "step": 29095 }, { "epoch": 13.572761194029852, "grad_norm": 0.4416467838201528, "learning_rate": 1.655389613849943e-05, "loss": 0.0523, "step": 29100 }, { "epoch": 13.57509328358209, "grad_norm": 0.44762764092675483, "learning_rate": 1.6546316790400726e-05, "loss": 0.0527, "step": 29105 }, { "epoch": 13.577425373134329, "grad_norm": 0.44191486798191926, "learning_rate": 1.653873907100977e-05, "loss": 0.0529, "step": 29110 }, { "epoch": 13.579757462686567, "grad_norm": 0.43288925150082774, "learning_rate": 1.6531162981453286e-05, "loss": 0.0521, "step": 29115 }, { "epoch": 13.582089552238806, "grad_norm": 0.45271666359731116, "learning_rate": 1.6523588522857784e-05, "loss": 0.0541, "step": 29120 }, { "epoch": 13.584421641791044, "grad_norm": 0.43917728278592255, "learning_rate": 1.6516015696349486e-05, "loss": 0.0526, "step": 29125 }, { "epoch": 13.586753731343283, "grad_norm": 0.4850732760659502, "learning_rate": 1.6508444503054432e-05, "loss": 0.0541, "step": 29130 }, { "epoch": 13.589085820895523, "grad_norm": 0.4394890640970089, "learning_rate": 1.6500874944098348e-05, "loss": 0.0542, "step": 29135 }, { "epoch": 13.591417910447761, "grad_norm": 0.4712826095342191, "learning_rate": 1.6493307020606796e-05, "loss": 0.0541, "step": 29140 }, { "epoch": 13.59375, "grad_norm": 0.4688261020453173, "learning_rate": 1.648574073370502e-05, "loss": 0.0543, "step": 29145 }, { "epoch": 13.596082089552239, "grad_norm": 0.44026986881273583, "learning_rate": 1.647817608451807e-05, "loss": 0.0544, "step": 29150 }, { "epoch": 13.598414179104477, "grad_norm": 0.45525905541062817, "learning_rate": 1.647061307417073e-05, "loss": 0.0533, "step": 29155 }, { "epoch": 13.600746268656717, "grad_norm": 0.44727149650026504, "learning_rate": 1.6463051703787557e-05, "loss": 0.0531, "step": 29160 }, { "epoch": 13.603078358208956, "grad_norm": 0.45632920289868184, "learning_rate": 1.645549197449285e-05, "loss": 0.0541, "step": 29165 }, { "epoch": 13.605410447761194, "grad_norm": 0.45471355077088593, "learning_rate": 1.644793388741067e-05, "loss": 0.055, "step": 29170 }, { "epoch": 13.607742537313433, "grad_norm": 0.46786491607686176, "learning_rate": 1.6440377443664816e-05, "loss": 0.055, "step": 29175 }, { "epoch": 13.610074626865671, "grad_norm": 0.4424055757203879, "learning_rate": 1.6432822644378888e-05, "loss": 0.054, "step": 29180 }, { "epoch": 13.61240671641791, "grad_norm": 0.44617220881789826, "learning_rate": 1.6425269490676176e-05, "loss": 0.0532, "step": 29185 }, { "epoch": 13.614738805970148, "grad_norm": 0.45144868824715223, "learning_rate": 1.64177179836798e-05, "loss": 0.0521, "step": 29190 }, { "epoch": 13.617070895522389, "grad_norm": 0.46730960619921985, "learning_rate": 1.641016812451257e-05, "loss": 0.0535, "step": 29195 }, { "epoch": 13.619402985074627, "grad_norm": 0.4558011716774357, "learning_rate": 1.6402619914297087e-05, "loss": 0.0542, "step": 29200 }, { "epoch": 13.621735074626866, "grad_norm": 0.43929514043831264, "learning_rate": 1.6395073354155694e-05, "loss": 0.0529, "step": 29205 }, { "epoch": 13.624067164179104, "grad_norm": 0.44779776038023306, "learning_rate": 1.6387528445210497e-05, "loss": 0.0541, "step": 29210 }, { "epoch": 13.626399253731343, "grad_norm": 0.46697257966430317, "learning_rate": 1.6379985188583352e-05, "loss": 0.0549, "step": 29215 }, { "epoch": 13.628731343283581, "grad_norm": 0.4635205900670319, "learning_rate": 1.6372443585395875e-05, "loss": 0.055, "step": 29220 }, { "epoch": 13.631063432835822, "grad_norm": 0.44656847247223197, "learning_rate": 1.6364903636769403e-05, "loss": 0.0521, "step": 29225 }, { "epoch": 13.63339552238806, "grad_norm": 0.4667053162709333, "learning_rate": 1.6357365343825088e-05, "loss": 0.0545, "step": 29230 }, { "epoch": 13.635727611940299, "grad_norm": 0.4292053426021482, "learning_rate": 1.634982870768378e-05, "loss": 0.0523, "step": 29235 }, { "epoch": 13.638059701492537, "grad_norm": 0.8202309929552586, "learning_rate": 1.634229372946611e-05, "loss": 0.0542, "step": 29240 }, { "epoch": 13.640391791044776, "grad_norm": 0.4498716826969071, "learning_rate": 1.633476041029246e-05, "loss": 0.0543, "step": 29245 }, { "epoch": 13.642723880597014, "grad_norm": 0.4607614957260853, "learning_rate": 1.632722875128296e-05, "loss": 0.0556, "step": 29250 }, { "epoch": 13.645055970149254, "grad_norm": 0.44513276291374976, "learning_rate": 1.6319698753557498e-05, "loss": 0.053, "step": 29255 }, { "epoch": 13.647388059701493, "grad_norm": 0.455180806999829, "learning_rate": 1.6312170418235705e-05, "loss": 0.0532, "step": 29260 }, { "epoch": 13.649720149253731, "grad_norm": 0.4519277055902217, "learning_rate": 1.6304643746436994e-05, "loss": 0.055, "step": 29265 }, { "epoch": 13.65205223880597, "grad_norm": 0.46336457877350706, "learning_rate": 1.6297118739280483e-05, "loss": 0.0543, "step": 29270 }, { "epoch": 13.654384328358208, "grad_norm": 0.45366051533652, "learning_rate": 1.6289595397885074e-05, "loss": 0.0554, "step": 29275 }, { "epoch": 13.656716417910447, "grad_norm": 0.45668515724525643, "learning_rate": 1.6282073723369427e-05, "loss": 0.0522, "step": 29280 }, { "epoch": 13.659048507462687, "grad_norm": 0.4347482094633869, "learning_rate": 1.6274553716851928e-05, "loss": 0.0533, "step": 29285 }, { "epoch": 13.661380597014926, "grad_norm": 0.44457482608173765, "learning_rate": 1.6267035379450744e-05, "loss": 0.0528, "step": 29290 }, { "epoch": 13.663712686567164, "grad_norm": 0.4677732468776169, "learning_rate": 1.625951871228378e-05, "loss": 0.0532, "step": 29295 }, { "epoch": 13.666044776119403, "grad_norm": 0.4604823649409319, "learning_rate": 1.625200371646867e-05, "loss": 0.0533, "step": 29300 }, { "epoch": 13.668376865671641, "grad_norm": 0.46367836816726943, "learning_rate": 1.6244490393122857e-05, "loss": 0.0546, "step": 29305 }, { "epoch": 13.67070895522388, "grad_norm": 0.472094339668436, "learning_rate": 1.6236978743363464e-05, "loss": 0.0553, "step": 29310 }, { "epoch": 13.67304104477612, "grad_norm": 0.4502921893515901, "learning_rate": 1.622946876830744e-05, "loss": 0.0551, "step": 29315 }, { "epoch": 13.675373134328359, "grad_norm": 0.4407713331256173, "learning_rate": 1.622196046907141e-05, "loss": 0.054, "step": 29320 }, { "epoch": 13.677705223880597, "grad_norm": 0.4658710465554957, "learning_rate": 1.62144538467718e-05, "loss": 0.0557, "step": 29325 }, { "epoch": 13.680037313432836, "grad_norm": 0.4603790442611225, "learning_rate": 1.6206948902524783e-05, "loss": 0.0543, "step": 29330 }, { "epoch": 13.682369402985074, "grad_norm": 0.4792091676913343, "learning_rate": 1.6199445637446255e-05, "loss": 0.0562, "step": 29335 }, { "epoch": 13.684701492537313, "grad_norm": 0.46754868108214176, "learning_rate": 1.619194405265189e-05, "loss": 0.0548, "step": 29340 }, { "epoch": 13.687033582089553, "grad_norm": 0.4634807985393242, "learning_rate": 1.618444414925711e-05, "loss": 0.0546, "step": 29345 }, { "epoch": 13.689365671641792, "grad_norm": 0.4448636088864409, "learning_rate": 1.617694592837705e-05, "loss": 0.053, "step": 29350 }, { "epoch": 13.69169776119403, "grad_norm": 0.4553895784491003, "learning_rate": 1.616944939112666e-05, "loss": 0.0539, "step": 29355 }, { "epoch": 13.694029850746269, "grad_norm": 0.47999712678473805, "learning_rate": 1.616195453862057e-05, "loss": 0.0551, "step": 29360 }, { "epoch": 13.696361940298507, "grad_norm": 0.4689533615028954, "learning_rate": 1.615446137197322e-05, "loss": 0.0543, "step": 29365 }, { "epoch": 13.698694029850746, "grad_norm": 0.42410742535162177, "learning_rate": 1.614696989229876e-05, "loss": 0.0526, "step": 29370 }, { "epoch": 13.701026119402986, "grad_norm": 0.441313026163626, "learning_rate": 1.613948010071109e-05, "loss": 0.055, "step": 29375 }, { "epoch": 13.703358208955224, "grad_norm": 0.46534942290551407, "learning_rate": 1.6131991998323893e-05, "loss": 0.0537, "step": 29380 }, { "epoch": 13.705690298507463, "grad_norm": 0.4708070689124286, "learning_rate": 1.612450558625056e-05, "loss": 0.0556, "step": 29385 }, { "epoch": 13.708022388059701, "grad_norm": 0.44945066469889555, "learning_rate": 1.611702086560426e-05, "loss": 0.0548, "step": 29390 }, { "epoch": 13.71035447761194, "grad_norm": 0.4404420065430067, "learning_rate": 1.61095378374979e-05, "loss": 0.0535, "step": 29395 }, { "epoch": 13.712686567164178, "grad_norm": 0.44910240379214533, "learning_rate": 1.6102056503044115e-05, "loss": 0.0538, "step": 29400 }, { "epoch": 13.715018656716419, "grad_norm": 0.4656273074333589, "learning_rate": 1.6094576863355338e-05, "loss": 0.0537, "step": 29405 }, { "epoch": 13.717350746268657, "grad_norm": 0.4537340675459958, "learning_rate": 1.6087098919543696e-05, "loss": 0.055, "step": 29410 }, { "epoch": 13.719682835820896, "grad_norm": 0.45194570688680574, "learning_rate": 1.6079622672721093e-05, "loss": 0.0545, "step": 29415 }, { "epoch": 13.722014925373134, "grad_norm": 0.4624064435126024, "learning_rate": 1.6072148123999182e-05, "loss": 0.0532, "step": 29420 }, { "epoch": 13.724347014925373, "grad_norm": 0.45607024243764543, "learning_rate": 1.6064675274489348e-05, "loss": 0.0545, "step": 29425 }, { "epoch": 13.726679104477611, "grad_norm": 0.44350230553395253, "learning_rate": 1.605720412530274e-05, "loss": 0.0541, "step": 29430 }, { "epoch": 13.729011194029852, "grad_norm": 0.4673061107951683, "learning_rate": 1.6049734677550243e-05, "loss": 0.0543, "step": 29435 }, { "epoch": 13.73134328358209, "grad_norm": 0.4570036736123816, "learning_rate": 1.6042266932342498e-05, "loss": 0.056, "step": 29440 }, { "epoch": 13.733675373134329, "grad_norm": 0.4377781906265606, "learning_rate": 1.6034800890789874e-05, "loss": 0.0539, "step": 29445 }, { "epoch": 13.736007462686567, "grad_norm": 0.45087972858554615, "learning_rate": 1.6027336554002512e-05, "loss": 0.0528, "step": 29450 }, { "epoch": 13.738339552238806, "grad_norm": 0.46627576300229656, "learning_rate": 1.6019873923090277e-05, "loss": 0.0549, "step": 29455 }, { "epoch": 13.740671641791044, "grad_norm": 0.46254741523586335, "learning_rate": 1.60124129991628e-05, "loss": 0.0549, "step": 29460 }, { "epoch": 13.743003731343283, "grad_norm": 0.4480253441507884, "learning_rate": 1.600495378332944e-05, "loss": 0.0555, "step": 29465 }, { "epoch": 13.745335820895523, "grad_norm": 0.45067676756305164, "learning_rate": 1.599749627669933e-05, "loss": 0.0536, "step": 29470 }, { "epoch": 13.747667910447761, "grad_norm": 0.45812000967685024, "learning_rate": 1.59900404803813e-05, "loss": 0.0526, "step": 29475 }, { "epoch": 13.75, "grad_norm": 0.44978827860325993, "learning_rate": 1.5982586395483983e-05, "loss": 0.0531, "step": 29480 }, { "epoch": 13.752332089552239, "grad_norm": 0.46483538802520213, "learning_rate": 1.5975134023115707e-05, "loss": 0.0549, "step": 29485 }, { "epoch": 13.754664179104477, "grad_norm": 0.4318527418617096, "learning_rate": 1.5967683364384595e-05, "loss": 0.0529, "step": 29490 }, { "epoch": 13.756996268656717, "grad_norm": 0.4800622732312427, "learning_rate": 1.5960234420398464e-05, "loss": 0.0569, "step": 29495 }, { "epoch": 13.759328358208956, "grad_norm": 0.45860025182564323, "learning_rate": 1.595278719226491e-05, "loss": 0.0554, "step": 29500 }, { "epoch": 13.761660447761194, "grad_norm": 0.4546358067899339, "learning_rate": 1.5945341681091268e-05, "loss": 0.0562, "step": 29505 }, { "epoch": 13.763992537313433, "grad_norm": 0.43589717415610224, "learning_rate": 1.5937897887984605e-05, "loss": 0.056, "step": 29510 }, { "epoch": 13.766324626865671, "grad_norm": 0.4371456453737992, "learning_rate": 1.593045581405175e-05, "loss": 0.055, "step": 29515 }, { "epoch": 13.76865671641791, "grad_norm": 0.47240775568504484, "learning_rate": 1.5923015460399277e-05, "loss": 0.0544, "step": 29520 }, { "epoch": 13.770988805970148, "grad_norm": 0.4610133697637774, "learning_rate": 1.5915576828133462e-05, "loss": 0.0563, "step": 29525 }, { "epoch": 13.773320895522389, "grad_norm": 0.46930572282288274, "learning_rate": 1.59081399183604e-05, "loss": 0.0558, "step": 29530 }, { "epoch": 13.775652985074627, "grad_norm": 0.4886609307798065, "learning_rate": 1.590070473218585e-05, "loss": 0.0585, "step": 29535 }, { "epoch": 13.777985074626866, "grad_norm": 0.45427919267323325, "learning_rate": 1.589327127071539e-05, "loss": 0.0544, "step": 29540 }, { "epoch": 13.780317164179104, "grad_norm": 0.46184705640950374, "learning_rate": 1.5885839535054277e-05, "loss": 0.0552, "step": 29545 }, { "epoch": 13.782649253731343, "grad_norm": 0.4467942441489382, "learning_rate": 1.587840952630755e-05, "loss": 0.0531, "step": 29550 }, { "epoch": 13.784981343283581, "grad_norm": 0.4586180260657966, "learning_rate": 1.587098124557997e-05, "loss": 0.0546, "step": 29555 }, { "epoch": 13.787313432835822, "grad_norm": 0.4499118223444757, "learning_rate": 1.5863554693976065e-05, "loss": 0.0555, "step": 29560 }, { "epoch": 13.78964552238806, "grad_norm": 0.45951048823423296, "learning_rate": 1.5856129872600083e-05, "loss": 0.0533, "step": 29565 }, { "epoch": 13.791977611940299, "grad_norm": 0.4736423203514559, "learning_rate": 1.584870678255604e-05, "loss": 0.0561, "step": 29570 }, { "epoch": 13.794309701492537, "grad_norm": 0.44981462756632573, "learning_rate": 1.5841285424947643e-05, "loss": 0.055, "step": 29575 }, { "epoch": 13.796641791044776, "grad_norm": 0.48450910954146376, "learning_rate": 1.5833865800878422e-05, "loss": 0.0556, "step": 29580 }, { "epoch": 13.798973880597014, "grad_norm": 0.45172225643825387, "learning_rate": 1.5826447911451568e-05, "loss": 0.0569, "step": 29585 }, { "epoch": 13.801305970149254, "grad_norm": 0.449184097840402, "learning_rate": 1.5819031757770064e-05, "loss": 0.0551, "step": 29590 }, { "epoch": 13.803638059701493, "grad_norm": 0.45631518684743505, "learning_rate": 1.5811617340936622e-05, "loss": 0.0519, "step": 29595 }, { "epoch": 13.805970149253731, "grad_norm": 0.46762730265582764, "learning_rate": 1.580420466205369e-05, "loss": 0.0567, "step": 29600 }, { "epoch": 13.80830223880597, "grad_norm": 0.444093760039626, "learning_rate": 1.5796793722223464e-05, "loss": 0.0545, "step": 29605 }, { "epoch": 13.810634328358208, "grad_norm": 0.46128307584900835, "learning_rate": 1.5789384522547888e-05, "loss": 0.0552, "step": 29610 }, { "epoch": 13.812966417910447, "grad_norm": 0.46441376898272424, "learning_rate": 1.5781977064128627e-05, "loss": 0.0555, "step": 29615 }, { "epoch": 13.815298507462687, "grad_norm": 0.4719855021847703, "learning_rate": 1.577457134806711e-05, "loss": 0.0548, "step": 29620 }, { "epoch": 13.817630597014926, "grad_norm": 0.46741044853680713, "learning_rate": 1.5767167375464488e-05, "loss": 0.0553, "step": 29625 }, { "epoch": 13.819962686567164, "grad_norm": 0.4669585416225662, "learning_rate": 1.5759765147421658e-05, "loss": 0.0549, "step": 29630 }, { "epoch": 13.822294776119403, "grad_norm": 0.4538008873832302, "learning_rate": 1.5752364665039265e-05, "loss": 0.0551, "step": 29635 }, { "epoch": 13.824626865671641, "grad_norm": 0.43326930597515967, "learning_rate": 1.5744965929417693e-05, "loss": 0.0537, "step": 29640 }, { "epoch": 13.82695895522388, "grad_norm": 0.4662291240999933, "learning_rate": 1.5737568941657053e-05, "loss": 0.0567, "step": 29645 }, { "epoch": 13.82929104477612, "grad_norm": 0.44057695382696455, "learning_rate": 1.573017370285722e-05, "loss": 0.0566, "step": 29650 }, { "epoch": 13.831623134328359, "grad_norm": 0.4786863772178007, "learning_rate": 1.57227802141178e-05, "loss": 0.0568, "step": 29655 }, { "epoch": 13.833955223880597, "grad_norm": 0.44768654045309386, "learning_rate": 1.57153884765381e-05, "loss": 0.0539, "step": 29660 }, { "epoch": 13.836287313432836, "grad_norm": 0.4432620425834878, "learning_rate": 1.570799849121724e-05, "loss": 0.0545, "step": 29665 }, { "epoch": 13.838619402985074, "grad_norm": 0.457411101302185, "learning_rate": 1.5700610259254018e-05, "loss": 0.0536, "step": 29670 }, { "epoch": 13.840951492537313, "grad_norm": 0.46179967421030743, "learning_rate": 1.5693223781746996e-05, "loss": 0.0568, "step": 29675 }, { "epoch": 13.843283582089553, "grad_norm": 0.4478838717167941, "learning_rate": 1.5685839059794476e-05, "loss": 0.0552, "step": 29680 }, { "epoch": 13.845615671641792, "grad_norm": 0.4544376187732887, "learning_rate": 1.5678456094494493e-05, "loss": 0.0574, "step": 29685 }, { "epoch": 13.84794776119403, "grad_norm": 0.46471538822940667, "learning_rate": 1.5671074886944823e-05, "loss": 0.0535, "step": 29690 }, { "epoch": 13.850279850746269, "grad_norm": 0.4684887920368361, "learning_rate": 1.5663695438242992e-05, "loss": 0.056, "step": 29695 }, { "epoch": 13.852611940298507, "grad_norm": 0.46883087060511186, "learning_rate": 1.5656317749486225e-05, "loss": 0.0557, "step": 29700 }, { "epoch": 13.854944029850746, "grad_norm": 0.4443067925087165, "learning_rate": 1.5648941821771552e-05, "loss": 0.0555, "step": 29705 }, { "epoch": 13.857276119402986, "grad_norm": 0.48180009369739224, "learning_rate": 1.5641567656195664e-05, "loss": 0.0553, "step": 29710 }, { "epoch": 13.859608208955224, "grad_norm": 0.4660726466650359, "learning_rate": 1.563419525385507e-05, "loss": 0.0548, "step": 29715 }, { "epoch": 13.861940298507463, "grad_norm": 0.459442301124331, "learning_rate": 1.562682461584594e-05, "loss": 0.0567, "step": 29720 }, { "epoch": 13.864272388059701, "grad_norm": 0.4601327240418182, "learning_rate": 1.5619455743264227e-05, "loss": 0.0546, "step": 29725 }, { "epoch": 13.86660447761194, "grad_norm": 0.45244224595560506, "learning_rate": 1.561208863720562e-05, "loss": 0.0558, "step": 29730 }, { "epoch": 13.868936567164178, "grad_norm": 0.466233007351942, "learning_rate": 1.5604723298765532e-05, "loss": 0.0563, "step": 29735 }, { "epoch": 13.871268656716419, "grad_norm": 0.44922971227807473, "learning_rate": 1.559735972903912e-05, "loss": 0.0555, "step": 29740 }, { "epoch": 13.873600746268657, "grad_norm": 0.47224816740164777, "learning_rate": 1.5589997929121282e-05, "loss": 0.0551, "step": 29745 }, { "epoch": 13.875932835820896, "grad_norm": 0.47009689927493054, "learning_rate": 1.5582637900106622e-05, "loss": 0.0554, "step": 29750 }, { "epoch": 13.878264925373134, "grad_norm": 0.45395183541608697, "learning_rate": 1.557527964308954e-05, "loss": 0.0556, "step": 29755 }, { "epoch": 13.880597014925373, "grad_norm": 0.4379706104932858, "learning_rate": 1.5567923159164108e-05, "loss": 0.0565, "step": 29760 }, { "epoch": 13.882929104477611, "grad_norm": 0.46293108904084923, "learning_rate": 1.5560568449424197e-05, "loss": 0.0553, "step": 29765 }, { "epoch": 13.885261194029852, "grad_norm": 0.4353311015322349, "learning_rate": 1.555321551496335e-05, "loss": 0.0554, "step": 29770 }, { "epoch": 13.88759328358209, "grad_norm": 0.47046305516454406, "learning_rate": 1.5545864356874894e-05, "loss": 0.0551, "step": 29775 }, { "epoch": 13.889925373134329, "grad_norm": 0.4454690338467701, "learning_rate": 1.553851497625187e-05, "loss": 0.055, "step": 29780 }, { "epoch": 13.892257462686567, "grad_norm": 0.4494063927955785, "learning_rate": 1.5531167374187068e-05, "loss": 0.0559, "step": 29785 }, { "epoch": 13.894589552238806, "grad_norm": 0.47210561164895304, "learning_rate": 1.5523821551773006e-05, "loss": 0.0543, "step": 29790 }, { "epoch": 13.896921641791044, "grad_norm": 0.4584391838945934, "learning_rate": 1.5516477510101933e-05, "loss": 0.0568, "step": 29795 }, { "epoch": 13.899253731343283, "grad_norm": 0.4926699945602889, "learning_rate": 1.5509135250265835e-05, "loss": 0.0555, "step": 29800 }, { "epoch": 13.901585820895523, "grad_norm": 0.45051613950704783, "learning_rate": 1.5501794773356437e-05, "loss": 0.0542, "step": 29805 }, { "epoch": 13.903917910447761, "grad_norm": 0.4655305005630288, "learning_rate": 1.5494456080465198e-05, "loss": 0.0549, "step": 29810 }, { "epoch": 13.90625, "grad_norm": 0.4867305594044115, "learning_rate": 1.548711917268331e-05, "loss": 0.057, "step": 29815 }, { "epoch": 13.908582089552239, "grad_norm": 0.45216979486856035, "learning_rate": 1.547978405110171e-05, "loss": 0.0552, "step": 29820 }, { "epoch": 13.910914179104477, "grad_norm": 0.4908229072070972, "learning_rate": 1.5472450716811052e-05, "loss": 0.0577, "step": 29825 }, { "epoch": 13.913246268656717, "grad_norm": 0.499959138633529, "learning_rate": 1.5465119170901742e-05, "loss": 0.0585, "step": 29830 }, { "epoch": 13.915578358208956, "grad_norm": 0.44715019449279564, "learning_rate": 1.5457789414463886e-05, "loss": 0.0546, "step": 29835 }, { "epoch": 13.917910447761194, "grad_norm": 0.49820798085035756, "learning_rate": 1.545046144858738e-05, "loss": 0.058, "step": 29840 }, { "epoch": 13.920242537313433, "grad_norm": 0.4781885468794251, "learning_rate": 1.5443135274361792e-05, "loss": 0.0557, "step": 29845 }, { "epoch": 13.922574626865671, "grad_norm": 0.46919246680703536, "learning_rate": 1.543581089287649e-05, "loss": 0.0579, "step": 29850 }, { "epoch": 13.92490671641791, "grad_norm": 0.46789157989860675, "learning_rate": 1.5428488305220506e-05, "loss": 0.0555, "step": 29855 }, { "epoch": 13.927238805970148, "grad_norm": 0.4675262549291341, "learning_rate": 1.5421167512482655e-05, "loss": 0.0562, "step": 29860 }, { "epoch": 13.929570895522389, "grad_norm": 0.4606797888357012, "learning_rate": 1.5413848515751463e-05, "loss": 0.0554, "step": 29865 }, { "epoch": 13.931902985074627, "grad_norm": 0.4510849871767764, "learning_rate": 1.5406531316115197e-05, "loss": 0.0562, "step": 29870 }, { "epoch": 13.934235074626866, "grad_norm": 0.4608476662237576, "learning_rate": 1.539921591466185e-05, "loss": 0.0543, "step": 29875 }, { "epoch": 13.936567164179104, "grad_norm": 0.4666674039786538, "learning_rate": 1.539190231247917e-05, "loss": 0.0571, "step": 29880 }, { "epoch": 13.938899253731343, "grad_norm": 0.45476681790931933, "learning_rate": 1.5384590510654596e-05, "loss": 0.0558, "step": 29885 }, { "epoch": 13.941231343283581, "grad_norm": 0.4565950483780446, "learning_rate": 1.5377280510275342e-05, "loss": 0.0545, "step": 29890 }, { "epoch": 13.943563432835822, "grad_norm": 0.4340988424365748, "learning_rate": 1.536997231242831e-05, "loss": 0.0565, "step": 29895 }, { "epoch": 13.94589552238806, "grad_norm": 0.45073158590205425, "learning_rate": 1.5362665918200193e-05, "loss": 0.0555, "step": 29900 }, { "epoch": 13.948227611940299, "grad_norm": 0.47368267695600913, "learning_rate": 1.5355361328677358e-05, "loss": 0.0571, "step": 29905 }, { "epoch": 13.950559701492537, "grad_norm": 0.4643755941198959, "learning_rate": 1.534805854494593e-05, "loss": 0.052, "step": 29910 }, { "epoch": 13.952891791044776, "grad_norm": 0.44481356759292334, "learning_rate": 1.5340757568091767e-05, "loss": 0.0562, "step": 29915 }, { "epoch": 13.955223880597014, "grad_norm": 0.4395454714697108, "learning_rate": 1.533345839920045e-05, "loss": 0.0557, "step": 29920 }, { "epoch": 13.957555970149254, "grad_norm": 0.4503701787791656, "learning_rate": 1.5326161039357307e-05, "loss": 0.0558, "step": 29925 }, { "epoch": 13.959888059701493, "grad_norm": 0.48422456366855654, "learning_rate": 1.5318865489647383e-05, "loss": 0.0549, "step": 29930 }, { "epoch": 13.962220149253731, "grad_norm": 0.448118462950757, "learning_rate": 1.531157175115543e-05, "loss": 0.0556, "step": 29935 }, { "epoch": 13.96455223880597, "grad_norm": 0.4868744479695637, "learning_rate": 1.5304279824966e-05, "loss": 0.0554, "step": 29940 }, { "epoch": 13.966884328358208, "grad_norm": 0.46335917060078996, "learning_rate": 1.5296989712163302e-05, "loss": 0.0553, "step": 29945 }, { "epoch": 13.969216417910447, "grad_norm": 0.4620583336577286, "learning_rate": 1.528970141383131e-05, "loss": 0.0581, "step": 29950 }, { "epoch": 13.971548507462687, "grad_norm": 0.4349038514244588, "learning_rate": 1.528241493105373e-05, "loss": 0.0575, "step": 29955 }, { "epoch": 13.973880597014926, "grad_norm": 0.4818706821819587, "learning_rate": 1.5275130264913994e-05, "loss": 0.0578, "step": 29960 }, { "epoch": 13.976212686567164, "grad_norm": 0.45758239953284224, "learning_rate": 1.5267847416495257e-05, "loss": 0.0575, "step": 29965 }, { "epoch": 13.978544776119403, "grad_norm": 0.44999666230929314, "learning_rate": 1.5260566386880413e-05, "loss": 0.0573, "step": 29970 }, { "epoch": 13.980876865671641, "grad_norm": 0.45803809049985944, "learning_rate": 1.5253287177152082e-05, "loss": 0.0546, "step": 29975 }, { "epoch": 13.98320895522388, "grad_norm": 0.46207735703485325, "learning_rate": 1.5246009788392606e-05, "loss": 0.0563, "step": 29980 }, { "epoch": 13.98554104477612, "grad_norm": 0.4755923499256169, "learning_rate": 1.5238734221684065e-05, "loss": 0.056, "step": 29985 }, { "epoch": 13.987873134328359, "grad_norm": 0.46063042234425794, "learning_rate": 1.5231460478108268e-05, "loss": 0.0538, "step": 29990 }, { "epoch": 13.990205223880597, "grad_norm": 0.4692103481334803, "learning_rate": 1.5224188558746749e-05, "loss": 0.058, "step": 29995 }, { "epoch": 13.992537313432836, "grad_norm": 0.452031305507622, "learning_rate": 1.5216918464680776e-05, "loss": 0.054, "step": 30000 }, { "epoch": 13.994869402985074, "grad_norm": 0.44418372837183095, "learning_rate": 1.5209650196991352e-05, "loss": 0.0572, "step": 30005 }, { "epoch": 13.997201492537313, "grad_norm": 0.4779777308589974, "learning_rate": 1.520238375675917e-05, "loss": 0.0579, "step": 30010 }, { "epoch": 13.999533582089553, "grad_norm": 0.4561426319812, "learning_rate": 1.5195119145064712e-05, "loss": 0.0551, "step": 30015 }, { "epoch": 14.001865671641792, "grad_norm": 0.3300362714055747, "learning_rate": 1.5187856362988123e-05, "loss": 0.0366, "step": 30020 }, { "epoch": 14.00419776119403, "grad_norm": 0.4038803721615827, "learning_rate": 1.5180595411609347e-05, "loss": 0.0339, "step": 30025 }, { "epoch": 14.006529850746269, "grad_norm": 0.36399508078098874, "learning_rate": 1.5173336292007994e-05, "loss": 0.032, "step": 30030 }, { "epoch": 14.008861940298507, "grad_norm": 0.39908717090933904, "learning_rate": 1.5166079005263425e-05, "loss": 0.0316, "step": 30035 }, { "epoch": 14.011194029850746, "grad_norm": 0.36511940777746166, "learning_rate": 1.5158823552454737e-05, "loss": 0.0313, "step": 30040 }, { "epoch": 14.013526119402986, "grad_norm": 0.3413986820132389, "learning_rate": 1.5151569934660743e-05, "loss": 0.0317, "step": 30045 }, { "epoch": 14.015858208955224, "grad_norm": 0.33652190444522684, "learning_rate": 1.5144318152959985e-05, "loss": 0.0304, "step": 30050 }, { "epoch": 14.018190298507463, "grad_norm": 0.34810363642807524, "learning_rate": 1.513706820843075e-05, "loss": 0.0316, "step": 30055 }, { "epoch": 14.020522388059701, "grad_norm": 0.3463998174519046, "learning_rate": 1.5129820102151e-05, "loss": 0.0299, "step": 30060 }, { "epoch": 14.02285447761194, "grad_norm": 0.3686659721658761, "learning_rate": 1.5122573835198495e-05, "loss": 0.0294, "step": 30065 }, { "epoch": 14.025186567164178, "grad_norm": 0.3541422353537438, "learning_rate": 1.5115329408650658e-05, "loss": 0.0288, "step": 30070 }, { "epoch": 14.027518656716419, "grad_norm": 0.3692743146805594, "learning_rate": 1.5108086823584692e-05, "loss": 0.03, "step": 30075 }, { "epoch": 14.029850746268657, "grad_norm": 0.3660755031367307, "learning_rate": 1.5100846081077479e-05, "loss": 0.0295, "step": 30080 }, { "epoch": 14.032182835820896, "grad_norm": 0.36102927678097374, "learning_rate": 1.5093607182205652e-05, "loss": 0.0291, "step": 30085 }, { "epoch": 14.034514925373134, "grad_norm": 0.37321597765478604, "learning_rate": 1.5086370128045574e-05, "loss": 0.0298, "step": 30090 }, { "epoch": 14.036847014925373, "grad_norm": 0.36910919094850914, "learning_rate": 1.5079134919673316e-05, "loss": 0.0294, "step": 30095 }, { "epoch": 14.039179104477611, "grad_norm": 0.3771910600600667, "learning_rate": 1.5071901558164692e-05, "loss": 0.03, "step": 30100 }, { "epoch": 14.041511194029852, "grad_norm": 0.33994873095076117, "learning_rate": 1.5064670044595236e-05, "loss": 0.0304, "step": 30105 }, { "epoch": 14.04384328358209, "grad_norm": 0.3723529377399018, "learning_rate": 1.5057440380040184e-05, "loss": 0.0287, "step": 30110 }, { "epoch": 14.046175373134329, "grad_norm": 0.3616206060557333, "learning_rate": 1.5050212565574551e-05, "loss": 0.0302, "step": 30115 }, { "epoch": 14.048507462686567, "grad_norm": 0.3399871562594655, "learning_rate": 1.5042986602273017e-05, "loss": 0.0311, "step": 30120 }, { "epoch": 14.050839552238806, "grad_norm": 0.33710646745389306, "learning_rate": 1.503576249121002e-05, "loss": 0.0291, "step": 30125 }, { "epoch": 14.053171641791044, "grad_norm": 0.36874199028522825, "learning_rate": 1.502854023345972e-05, "loss": 0.0303, "step": 30130 }, { "epoch": 14.055503731343284, "grad_norm": 0.36915887503794725, "learning_rate": 1.5021319830095995e-05, "loss": 0.031, "step": 30135 }, { "epoch": 14.057835820895523, "grad_norm": 0.37386413430421034, "learning_rate": 1.5014101282192452e-05, "loss": 0.0289, "step": 30140 }, { "epoch": 14.060167910447761, "grad_norm": 0.3548561284891677, "learning_rate": 1.5006884590822418e-05, "loss": 0.0311, "step": 30145 }, { "epoch": 14.0625, "grad_norm": 0.360528722301664, "learning_rate": 1.4999669757058956e-05, "loss": 0.03, "step": 30150 }, { "epoch": 14.064832089552239, "grad_norm": 0.333525461016171, "learning_rate": 1.4992456781974826e-05, "loss": 0.0286, "step": 30155 }, { "epoch": 14.067164179104477, "grad_norm": 0.35798156980148815, "learning_rate": 1.498524566664253e-05, "loss": 0.0288, "step": 30160 }, { "epoch": 14.069496268656716, "grad_norm": 0.35726474104635747, "learning_rate": 1.4978036412134305e-05, "loss": 0.0297, "step": 30165 }, { "epoch": 14.071828358208956, "grad_norm": 0.3547304758964566, "learning_rate": 1.4970829019522083e-05, "loss": 0.0303, "step": 30170 }, { "epoch": 14.074160447761194, "grad_norm": 0.3612636831763893, "learning_rate": 1.4963623489877546e-05, "loss": 0.029, "step": 30175 }, { "epoch": 14.076492537313433, "grad_norm": 0.36822908852788255, "learning_rate": 1.4956419824272083e-05, "loss": 0.0291, "step": 30180 }, { "epoch": 14.078824626865671, "grad_norm": 0.3925727728222752, "learning_rate": 1.4949218023776806e-05, "loss": 0.03, "step": 30185 }, { "epoch": 14.08115671641791, "grad_norm": 0.36522647410739306, "learning_rate": 1.4942018089462567e-05, "loss": 0.0308, "step": 30190 }, { "epoch": 14.083488805970148, "grad_norm": 0.3621907852923563, "learning_rate": 1.4934820022399901e-05, "loss": 0.0296, "step": 30195 }, { "epoch": 14.085820895522389, "grad_norm": 0.3504293759442738, "learning_rate": 1.4927623823659126e-05, "loss": 0.0299, "step": 30200 }, { "epoch": 14.088152985074627, "grad_norm": 0.3689778153599001, "learning_rate": 1.4920429494310223e-05, "loss": 0.0305, "step": 30205 }, { "epoch": 14.090485074626866, "grad_norm": 0.3792563451683751, "learning_rate": 1.4913237035422926e-05, "loss": 0.0318, "step": 30210 }, { "epoch": 14.092817164179104, "grad_norm": 0.34076956401606134, "learning_rate": 1.4906046448066685e-05, "loss": 0.0302, "step": 30215 }, { "epoch": 14.095149253731343, "grad_norm": 0.36128099742830444, "learning_rate": 1.4898857733310673e-05, "loss": 0.0312, "step": 30220 }, { "epoch": 14.097481343283581, "grad_norm": 0.4002256623463485, "learning_rate": 1.4891670892223781e-05, "loss": 0.0303, "step": 30225 }, { "epoch": 14.099813432835822, "grad_norm": 0.3449950130735075, "learning_rate": 1.4884485925874634e-05, "loss": 0.031, "step": 30230 }, { "epoch": 14.10214552238806, "grad_norm": 0.33689533984970055, "learning_rate": 1.487730283533154e-05, "loss": 0.0298, "step": 30235 }, { "epoch": 14.104477611940299, "grad_norm": 0.3396857313358826, "learning_rate": 1.4870121621662594e-05, "loss": 0.0293, "step": 30240 }, { "epoch": 14.106809701492537, "grad_norm": 0.3949407957242696, "learning_rate": 1.4862942285935536e-05, "loss": 0.0303, "step": 30245 }, { "epoch": 14.109141791044776, "grad_norm": 0.381868510568833, "learning_rate": 1.4855764829217894e-05, "loss": 0.0295, "step": 30250 }, { "epoch": 14.111473880597014, "grad_norm": 0.35923010189208937, "learning_rate": 1.4848589252576872e-05, "loss": 0.0294, "step": 30255 }, { "epoch": 14.113805970149254, "grad_norm": 0.3722546249755808, "learning_rate": 1.4841415557079413e-05, "loss": 0.0316, "step": 30260 }, { "epoch": 14.116138059701493, "grad_norm": 0.3771205745690265, "learning_rate": 1.4834243743792176e-05, "loss": 0.0313, "step": 30265 }, { "epoch": 14.118470149253731, "grad_norm": 0.35294089542096513, "learning_rate": 1.482707381378154e-05, "loss": 0.0291, "step": 30270 }, { "epoch": 14.12080223880597, "grad_norm": 0.3730975833214386, "learning_rate": 1.4819905768113604e-05, "loss": 0.0323, "step": 30275 }, { "epoch": 14.123134328358208, "grad_norm": 0.3874931616522691, "learning_rate": 1.4812739607854199e-05, "loss": 0.0318, "step": 30280 }, { "epoch": 14.125466417910447, "grad_norm": 0.3673452552117257, "learning_rate": 1.4805575334068842e-05, "loss": 0.03, "step": 30285 }, { "epoch": 14.127798507462687, "grad_norm": 0.3668670718431297, "learning_rate": 1.479841294782282e-05, "loss": 0.03, "step": 30290 }, { "epoch": 14.130130597014926, "grad_norm": 0.3665644171199561, "learning_rate": 1.4791252450181081e-05, "loss": 0.0301, "step": 30295 }, { "epoch": 14.132462686567164, "grad_norm": 0.34782610874418907, "learning_rate": 1.4784093842208351e-05, "loss": 0.0297, "step": 30300 }, { "epoch": 14.134794776119403, "grad_norm": 0.39557017835004105, "learning_rate": 1.4776937124969026e-05, "loss": 0.0311, "step": 30305 }, { "epoch": 14.137126865671641, "grad_norm": 0.37852995394417627, "learning_rate": 1.4769782299527252e-05, "loss": 0.0307, "step": 30310 }, { "epoch": 14.13945895522388, "grad_norm": 0.39075361442241296, "learning_rate": 1.4762629366946876e-05, "loss": 0.0314, "step": 30315 }, { "epoch": 14.14179104477612, "grad_norm": 0.36860553243900396, "learning_rate": 1.4755478328291476e-05, "loss": 0.0305, "step": 30320 }, { "epoch": 14.144123134328359, "grad_norm": 0.37500030675324797, "learning_rate": 1.4748329184624341e-05, "loss": 0.0308, "step": 30325 }, { "epoch": 14.146455223880597, "grad_norm": 0.37113648341847466, "learning_rate": 1.4741181937008485e-05, "loss": 0.0294, "step": 30330 }, { "epoch": 14.148787313432836, "grad_norm": 0.3802260773178367, "learning_rate": 1.4734036586506628e-05, "loss": 0.0298, "step": 30335 }, { "epoch": 14.151119402985074, "grad_norm": 0.3690820989943741, "learning_rate": 1.4726893134181214e-05, "loss": 0.0301, "step": 30340 }, { "epoch": 14.153451492537313, "grad_norm": 0.36335005486130245, "learning_rate": 1.4719751581094409e-05, "loss": 0.03, "step": 30345 }, { "epoch": 14.155783582089553, "grad_norm": 0.3750051834309953, "learning_rate": 1.4712611928308095e-05, "loss": 0.0313, "step": 30350 }, { "epoch": 14.158115671641792, "grad_norm": 0.3794552373382977, "learning_rate": 1.4705474176883865e-05, "loss": 0.0308, "step": 30355 }, { "epoch": 14.16044776119403, "grad_norm": 0.3758994928976908, "learning_rate": 1.4698338327883044e-05, "loss": 0.0316, "step": 30360 }, { "epoch": 14.162779850746269, "grad_norm": 0.3899594218064991, "learning_rate": 1.4691204382366666e-05, "loss": 0.0325, "step": 30365 }, { "epoch": 14.165111940298507, "grad_norm": 0.3598725927155688, "learning_rate": 1.4684072341395454e-05, "loss": 0.031, "step": 30370 }, { "epoch": 14.167444029850746, "grad_norm": 0.35270460216724947, "learning_rate": 1.4676942206029909e-05, "loss": 0.0296, "step": 30375 }, { "epoch": 14.169776119402986, "grad_norm": 0.385770590598755, "learning_rate": 1.4669813977330193e-05, "loss": 0.031, "step": 30380 }, { "epoch": 14.172108208955224, "grad_norm": 0.37596923708416086, "learning_rate": 1.466268765635621e-05, "loss": 0.0306, "step": 30385 }, { "epoch": 14.174440298507463, "grad_norm": 0.3781549155433748, "learning_rate": 1.4655563244167572e-05, "loss": 0.0302, "step": 30390 }, { "epoch": 14.176772388059701, "grad_norm": 0.36632250165409613, "learning_rate": 1.4648440741823616e-05, "loss": 0.0302, "step": 30395 }, { "epoch": 14.17910447761194, "grad_norm": 0.3698807583478805, "learning_rate": 1.4641320150383391e-05, "loss": 0.0306, "step": 30400 }, { "epoch": 14.181436567164178, "grad_norm": 0.3692416420890843, "learning_rate": 1.4634201470905665e-05, "loss": 0.0306, "step": 30405 }, { "epoch": 14.183768656716419, "grad_norm": 0.373890644988239, "learning_rate": 1.4627084704448895e-05, "loss": 0.0317, "step": 30410 }, { "epoch": 14.186100746268657, "grad_norm": 0.3865142055935142, "learning_rate": 1.4619969852071305e-05, "loss": 0.0314, "step": 30415 }, { "epoch": 14.188432835820896, "grad_norm": 0.37852016020689694, "learning_rate": 1.461285691483078e-05, "loss": 0.0308, "step": 30420 }, { "epoch": 14.190764925373134, "grad_norm": 0.37008751106728566, "learning_rate": 1.4605745893784973e-05, "loss": 0.0309, "step": 30425 }, { "epoch": 14.193097014925373, "grad_norm": 0.3890406591941415, "learning_rate": 1.4598636789991199e-05, "loss": 0.0315, "step": 30430 }, { "epoch": 14.195429104477611, "grad_norm": 0.3891781716518428, "learning_rate": 1.4591529604506527e-05, "loss": 0.0312, "step": 30435 }, { "epoch": 14.197761194029852, "grad_norm": 0.3731419384474, "learning_rate": 1.458442433838772e-05, "loss": 0.0297, "step": 30440 }, { "epoch": 14.20009328358209, "grad_norm": 0.3827572364613635, "learning_rate": 1.4577320992691269e-05, "loss": 0.0303, "step": 30445 }, { "epoch": 14.202425373134329, "grad_norm": 0.3756783915618007, "learning_rate": 1.4570219568473376e-05, "loss": 0.0311, "step": 30450 }, { "epoch": 14.204757462686567, "grad_norm": 0.396476624945309, "learning_rate": 1.4563120066789953e-05, "loss": 0.0307, "step": 30455 }, { "epoch": 14.207089552238806, "grad_norm": 0.38779128917905414, "learning_rate": 1.4556022488696614e-05, "loss": 0.0319, "step": 30460 }, { "epoch": 14.209421641791044, "grad_norm": 0.38817068112234693, "learning_rate": 1.4548926835248722e-05, "loss": 0.0321, "step": 30465 }, { "epoch": 14.211753731343283, "grad_norm": 0.39298572613181576, "learning_rate": 1.4541833107501312e-05, "loss": 0.0308, "step": 30470 }, { "epoch": 14.214085820895523, "grad_norm": 0.3585584038528542, "learning_rate": 1.453474130650918e-05, "loss": 0.0304, "step": 30475 }, { "epoch": 14.216417910447761, "grad_norm": 0.38887990337519046, "learning_rate": 1.4527651433326786e-05, "loss": 0.0316, "step": 30480 }, { "epoch": 14.21875, "grad_norm": 0.36632020184658487, "learning_rate": 1.4520563489008333e-05, "loss": 0.03, "step": 30485 }, { "epoch": 14.221082089552239, "grad_norm": 0.38936974035516697, "learning_rate": 1.4513477474607729e-05, "loss": 0.033, "step": 30490 }, { "epoch": 14.223414179104477, "grad_norm": 0.3616734836064317, "learning_rate": 1.4506393391178597e-05, "loss": 0.0299, "step": 30495 }, { "epoch": 14.225746268656717, "grad_norm": 0.38393258394249513, "learning_rate": 1.4499311239774277e-05, "loss": 0.0318, "step": 30500 }, { "epoch": 14.228078358208956, "grad_norm": 0.40321736460540103, "learning_rate": 1.449223102144781e-05, "loss": 0.0315, "step": 30505 }, { "epoch": 14.230410447761194, "grad_norm": 0.3708325484318699, "learning_rate": 1.4485152737251972e-05, "loss": 0.0298, "step": 30510 }, { "epoch": 14.232742537313433, "grad_norm": 0.3771785051895369, "learning_rate": 1.4478076388239214e-05, "loss": 0.0306, "step": 30515 }, { "epoch": 14.235074626865671, "grad_norm": 0.35850243346765764, "learning_rate": 1.4471001975461735e-05, "loss": 0.0305, "step": 30520 }, { "epoch": 14.23740671641791, "grad_norm": 0.3985857868984491, "learning_rate": 1.4463929499971424e-05, "loss": 0.0314, "step": 30525 }, { "epoch": 14.239738805970148, "grad_norm": 0.39142801056910714, "learning_rate": 1.4456858962819897e-05, "loss": 0.0319, "step": 30530 }, { "epoch": 14.242070895522389, "grad_norm": 0.3618611333434878, "learning_rate": 1.4449790365058474e-05, "loss": 0.0304, "step": 30535 }, { "epoch": 14.244402985074627, "grad_norm": 0.39743627465276415, "learning_rate": 1.4442723707738199e-05, "loss": 0.0319, "step": 30540 }, { "epoch": 14.246735074626866, "grad_norm": 0.3768944245703683, "learning_rate": 1.4435658991909784e-05, "loss": 0.0319, "step": 30545 }, { "epoch": 14.249067164179104, "grad_norm": 0.37663919012182906, "learning_rate": 1.4428596218623722e-05, "loss": 0.0321, "step": 30550 }, { "epoch": 14.251399253731343, "grad_norm": 0.3867864522524212, "learning_rate": 1.4421535388930146e-05, "loss": 0.0307, "step": 30555 }, { "epoch": 14.253731343283581, "grad_norm": 0.4018124607283776, "learning_rate": 1.4414476503878968e-05, "loss": 0.0325, "step": 30560 }, { "epoch": 14.256063432835822, "grad_norm": 0.3923553259561104, "learning_rate": 1.440741956451975e-05, "loss": 0.0317, "step": 30565 }, { "epoch": 14.25839552238806, "grad_norm": 0.3865818269638369, "learning_rate": 1.4400364571901803e-05, "loss": 0.0311, "step": 30570 }, { "epoch": 14.260727611940299, "grad_norm": 0.38467388701636157, "learning_rate": 1.4393311527074132e-05, "loss": 0.0322, "step": 30575 }, { "epoch": 14.263059701492537, "grad_norm": 0.3943487538810583, "learning_rate": 1.4386260431085457e-05, "loss": 0.0323, "step": 30580 }, { "epoch": 14.265391791044776, "grad_norm": 0.39621977427943517, "learning_rate": 1.4379211284984213e-05, "loss": 0.0329, "step": 30585 }, { "epoch": 14.267723880597014, "grad_norm": 0.38721111572714123, "learning_rate": 1.4372164089818546e-05, "loss": 0.0317, "step": 30590 }, { "epoch": 14.270055970149254, "grad_norm": 0.37801752460551435, "learning_rate": 1.4365118846636283e-05, "loss": 0.0308, "step": 30595 }, { "epoch": 14.272388059701493, "grad_norm": 0.38174212825993603, "learning_rate": 1.4358075556485016e-05, "loss": 0.0318, "step": 30600 }, { "epoch": 14.274720149253731, "grad_norm": 0.38280954304248377, "learning_rate": 1.4351034220411983e-05, "loss": 0.0311, "step": 30605 }, { "epoch": 14.27705223880597, "grad_norm": 0.3999273790626024, "learning_rate": 1.4343994839464192e-05, "loss": 0.032, "step": 30610 }, { "epoch": 14.279384328358208, "grad_norm": 0.38332005163514293, "learning_rate": 1.4336957414688313e-05, "loss": 0.0315, "step": 30615 }, { "epoch": 14.281716417910447, "grad_norm": 0.391548802524801, "learning_rate": 1.4329921947130748e-05, "loss": 0.0336, "step": 30620 }, { "epoch": 14.284048507462687, "grad_norm": 0.4093800776284813, "learning_rate": 1.4322888437837604e-05, "loss": 0.0318, "step": 30625 }, { "epoch": 14.286380597014926, "grad_norm": 0.3977999835835461, "learning_rate": 1.43158568878547e-05, "loss": 0.0301, "step": 30630 }, { "epoch": 14.288712686567164, "grad_norm": 0.43244184957169085, "learning_rate": 1.4308827298227551e-05, "loss": 0.0331, "step": 30635 }, { "epoch": 14.291044776119403, "grad_norm": 0.40384637113519334, "learning_rate": 1.430179967000141e-05, "loss": 0.0328, "step": 30640 }, { "epoch": 14.293376865671641, "grad_norm": 0.3930276393398937, "learning_rate": 1.4294774004221185e-05, "loss": 0.0328, "step": 30645 }, { "epoch": 14.29570895522388, "grad_norm": 0.3736620825900781, "learning_rate": 1.4287750301931557e-05, "loss": 0.0316, "step": 30650 }, { "epoch": 14.29804104477612, "grad_norm": 0.3977600396876721, "learning_rate": 1.4280728564176866e-05, "loss": 0.0317, "step": 30655 }, { "epoch": 14.300373134328359, "grad_norm": 0.39104490700838623, "learning_rate": 1.4273708792001182e-05, "loss": 0.0328, "step": 30660 }, { "epoch": 14.302705223880597, "grad_norm": 0.417979254411365, "learning_rate": 1.4266690986448273e-05, "loss": 0.0316, "step": 30665 }, { "epoch": 14.305037313432836, "grad_norm": 0.41760251754570643, "learning_rate": 1.4259675148561627e-05, "loss": 0.0324, "step": 30670 }, { "epoch": 14.307369402985074, "grad_norm": 0.3906922898638894, "learning_rate": 1.4252661279384427e-05, "loss": 0.0313, "step": 30675 }, { "epoch": 14.309701492537313, "grad_norm": 0.4022473510028239, "learning_rate": 1.424564937995957e-05, "loss": 0.0327, "step": 30680 }, { "epoch": 14.312033582089553, "grad_norm": 0.39356463091240335, "learning_rate": 1.4238639451329666e-05, "loss": 0.0321, "step": 30685 }, { "epoch": 14.314365671641792, "grad_norm": 0.3626022653887245, "learning_rate": 1.4231631494537007e-05, "loss": 0.0318, "step": 30690 }, { "epoch": 14.31669776119403, "grad_norm": 0.4202798824504924, "learning_rate": 1.4224625510623618e-05, "loss": 0.0313, "step": 30695 }, { "epoch": 14.319029850746269, "grad_norm": 0.41060878973469356, "learning_rate": 1.4217621500631222e-05, "loss": 0.0332, "step": 30700 }, { "epoch": 14.321361940298507, "grad_norm": 0.3962150719886968, "learning_rate": 1.4210619465601249e-05, "loss": 0.032, "step": 30705 }, { "epoch": 14.323694029850746, "grad_norm": 0.3942817590496289, "learning_rate": 1.4203619406574833e-05, "loss": 0.0331, "step": 30710 }, { "epoch": 14.326026119402986, "grad_norm": 0.3904762435267592, "learning_rate": 1.4196621324592818e-05, "loss": 0.0325, "step": 30715 }, { "epoch": 14.328358208955224, "grad_norm": 0.4018024462355793, "learning_rate": 1.4189625220695746e-05, "loss": 0.0329, "step": 30720 }, { "epoch": 14.330690298507463, "grad_norm": 0.415075739289987, "learning_rate": 1.4182631095923884e-05, "loss": 0.0308, "step": 30725 }, { "epoch": 14.333022388059701, "grad_norm": 0.40448643575835275, "learning_rate": 1.4175638951317166e-05, "loss": 0.0316, "step": 30730 }, { "epoch": 14.33535447761194, "grad_norm": 0.39583221184801953, "learning_rate": 1.4168648787915289e-05, "loss": 0.0338, "step": 30735 }, { "epoch": 14.337686567164178, "grad_norm": 0.3841655408327475, "learning_rate": 1.41616606067576e-05, "loss": 0.0311, "step": 30740 }, { "epoch": 14.340018656716419, "grad_norm": 0.4118051027357558, "learning_rate": 1.4154674408883179e-05, "loss": 0.0323, "step": 30745 }, { "epoch": 14.342350746268657, "grad_norm": 0.40216730241303444, "learning_rate": 1.4147690195330815e-05, "loss": 0.0324, "step": 30750 }, { "epoch": 14.344682835820896, "grad_norm": 0.40230055378670093, "learning_rate": 1.4140707967138983e-05, "loss": 0.0319, "step": 30755 }, { "epoch": 14.347014925373134, "grad_norm": 0.4104061093092204, "learning_rate": 1.413372772534588e-05, "loss": 0.0334, "step": 30760 }, { "epoch": 14.349347014925373, "grad_norm": 0.41604658378988424, "learning_rate": 1.4126749470989408e-05, "loss": 0.0321, "step": 30765 }, { "epoch": 14.351679104477611, "grad_norm": 0.38628686813382485, "learning_rate": 1.411977320510714e-05, "loss": 0.0328, "step": 30770 }, { "epoch": 14.354011194029852, "grad_norm": 0.40888042236249594, "learning_rate": 1.4112798928736415e-05, "loss": 0.0326, "step": 30775 }, { "epoch": 14.35634328358209, "grad_norm": 0.37552945179819935, "learning_rate": 1.410582664291421e-05, "loss": 0.0324, "step": 30780 }, { "epoch": 14.358675373134329, "grad_norm": 0.3815226676604016, "learning_rate": 1.4098856348677263e-05, "loss": 0.0308, "step": 30785 }, { "epoch": 14.361007462686567, "grad_norm": 0.38605743367058737, "learning_rate": 1.4091888047061974e-05, "loss": 0.0322, "step": 30790 }, { "epoch": 14.363339552238806, "grad_norm": 0.4303952919817968, "learning_rate": 1.4084921739104465e-05, "loss": 0.0333, "step": 30795 }, { "epoch": 14.365671641791044, "grad_norm": 0.41389829666972616, "learning_rate": 1.4077957425840563e-05, "loss": 0.0331, "step": 30800 }, { "epoch": 14.368003731343283, "grad_norm": 0.3983661865427256, "learning_rate": 1.407099510830579e-05, "loss": 0.0339, "step": 30805 }, { "epoch": 14.370335820895523, "grad_norm": 0.3928610914135401, "learning_rate": 1.406403478753538e-05, "loss": 0.0329, "step": 30810 }, { "epoch": 14.372667910447761, "grad_norm": 0.42550658989871587, "learning_rate": 1.4057076464564272e-05, "loss": 0.0342, "step": 30815 }, { "epoch": 14.375, "grad_norm": 0.40999321793954213, "learning_rate": 1.4050120140427081e-05, "loss": 0.0334, "step": 30820 }, { "epoch": 14.377332089552239, "grad_norm": 0.4005737667209963, "learning_rate": 1.4043165816158172e-05, "loss": 0.0332, "step": 30825 }, { "epoch": 14.379664179104477, "grad_norm": 0.3886659273104881, "learning_rate": 1.4036213492791561e-05, "loss": 0.0324, "step": 30830 }, { "epoch": 14.381996268656717, "grad_norm": 0.40143737747672725, "learning_rate": 1.4029263171361018e-05, "loss": 0.0338, "step": 30835 }, { "epoch": 14.384328358208956, "grad_norm": 0.4214835239707941, "learning_rate": 1.4022314852899968e-05, "loss": 0.0339, "step": 30840 }, { "epoch": 14.386660447761194, "grad_norm": 0.4150021041838224, "learning_rate": 1.4015368538441567e-05, "loss": 0.0325, "step": 30845 }, { "epoch": 14.388992537313433, "grad_norm": 0.3973793032877953, "learning_rate": 1.4008424229018668e-05, "loss": 0.0335, "step": 30850 }, { "epoch": 14.391324626865671, "grad_norm": 0.42591973382297993, "learning_rate": 1.4001481925663818e-05, "loss": 0.0327, "step": 30855 }, { "epoch": 14.39365671641791, "grad_norm": 0.42150939026535417, "learning_rate": 1.3994541629409275e-05, "loss": 0.0341, "step": 30860 }, { "epoch": 14.395988805970148, "grad_norm": 0.3935564711585373, "learning_rate": 1.3987603341287003e-05, "loss": 0.0332, "step": 30865 }, { "epoch": 14.398320895522389, "grad_norm": 0.39323782708514693, "learning_rate": 1.398066706232864e-05, "loss": 0.0324, "step": 30870 }, { "epoch": 14.400652985074627, "grad_norm": 0.39522965057998194, "learning_rate": 1.3973732793565553e-05, "loss": 0.0328, "step": 30875 }, { "epoch": 14.402985074626866, "grad_norm": 0.4178193437133593, "learning_rate": 1.3966800536028802e-05, "loss": 0.031, "step": 30880 }, { "epoch": 14.405317164179104, "grad_norm": 0.39781284270954037, "learning_rate": 1.3959870290749148e-05, "loss": 0.0312, "step": 30885 }, { "epoch": 14.407649253731343, "grad_norm": 0.39135359176239476, "learning_rate": 1.395294205875705e-05, "loss": 0.0327, "step": 30890 }, { "epoch": 14.409981343283581, "grad_norm": 0.38630657920860356, "learning_rate": 1.394601584108267e-05, "loss": 0.033, "step": 30895 }, { "epoch": 14.412313432835822, "grad_norm": 0.38303813444139206, "learning_rate": 1.3939091638755882e-05, "loss": 0.0318, "step": 30900 }, { "epoch": 14.41464552238806, "grad_norm": 0.3986880158233868, "learning_rate": 1.3932169452806221e-05, "loss": 0.0338, "step": 30905 }, { "epoch": 14.416977611940299, "grad_norm": 0.4218921764056444, "learning_rate": 1.3925249284262984e-05, "loss": 0.0338, "step": 30910 }, { "epoch": 14.419309701492537, "grad_norm": 0.4198552296065315, "learning_rate": 1.3918331134155104e-05, "loss": 0.0335, "step": 30915 }, { "epoch": 14.421641791044776, "grad_norm": 0.42088009646950436, "learning_rate": 1.3911415003511258e-05, "loss": 0.0342, "step": 30920 }, { "epoch": 14.423973880597014, "grad_norm": 0.40085327782695335, "learning_rate": 1.3904500893359809e-05, "loss": 0.0338, "step": 30925 }, { "epoch": 14.426305970149254, "grad_norm": 0.4194183804683855, "learning_rate": 1.3897588804728818e-05, "loss": 0.0347, "step": 30930 }, { "epoch": 14.428638059701493, "grad_norm": 0.3946136787001949, "learning_rate": 1.3890678738646038e-05, "loss": 0.0342, "step": 30935 }, { "epoch": 14.430970149253731, "grad_norm": 0.3905423763520787, "learning_rate": 1.3883770696138946e-05, "loss": 0.0348, "step": 30940 }, { "epoch": 14.43330223880597, "grad_norm": 0.42544952828130644, "learning_rate": 1.3876864678234674e-05, "loss": 0.0357, "step": 30945 }, { "epoch": 14.435634328358208, "grad_norm": 0.4334313749194491, "learning_rate": 1.3869960685960118e-05, "loss": 0.0346, "step": 30950 }, { "epoch": 14.437966417910447, "grad_norm": 0.40895204224796194, "learning_rate": 1.3863058720341799e-05, "loss": 0.0345, "step": 30955 }, { "epoch": 14.440298507462687, "grad_norm": 0.40700899524137785, "learning_rate": 1.3856158782406007e-05, "loss": 0.0324, "step": 30960 }, { "epoch": 14.442630597014926, "grad_norm": 0.3961606669757416, "learning_rate": 1.3849260873178671e-05, "loss": 0.0333, "step": 30965 }, { "epoch": 14.444962686567164, "grad_norm": 0.40816310160872227, "learning_rate": 1.3842364993685453e-05, "loss": 0.0333, "step": 30970 }, { "epoch": 14.447294776119403, "grad_norm": 0.40438325331531577, "learning_rate": 1.3835471144951705e-05, "loss": 0.0342, "step": 30975 }, { "epoch": 14.449626865671641, "grad_norm": 0.42372466869684644, "learning_rate": 1.3828579328002473e-05, "loss": 0.0331, "step": 30980 }, { "epoch": 14.45195895522388, "grad_norm": 0.40818849649238154, "learning_rate": 1.382168954386251e-05, "loss": 0.0337, "step": 30985 }, { "epoch": 14.45429104477612, "grad_norm": 0.4229326607154136, "learning_rate": 1.3814801793556264e-05, "loss": 0.0338, "step": 30990 }, { "epoch": 14.456623134328359, "grad_norm": 0.4149075721397462, "learning_rate": 1.3807916078107851e-05, "loss": 0.033, "step": 30995 }, { "epoch": 14.458955223880597, "grad_norm": 0.4102622299495281, "learning_rate": 1.3801032398541153e-05, "loss": 0.0327, "step": 31000 }, { "epoch": 14.461287313432836, "grad_norm": 0.41768468561604155, "learning_rate": 1.3794150755879665e-05, "loss": 0.034, "step": 31005 }, { "epoch": 14.463619402985074, "grad_norm": 0.40913464552756446, "learning_rate": 1.3787271151146658e-05, "loss": 0.0341, "step": 31010 }, { "epoch": 14.465951492537313, "grad_norm": 0.40432708734002415, "learning_rate": 1.3780393585365043e-05, "loss": 0.033, "step": 31015 }, { "epoch": 14.468283582089553, "grad_norm": 0.41688591458219015, "learning_rate": 1.3773518059557445e-05, "loss": 0.0326, "step": 31020 }, { "epoch": 14.470615671641792, "grad_norm": 0.41048017544166293, "learning_rate": 1.3766644574746202e-05, "loss": 0.0333, "step": 31025 }, { "epoch": 14.47294776119403, "grad_norm": 0.4086836632279738, "learning_rate": 1.3759773131953321e-05, "loss": 0.0323, "step": 31030 }, { "epoch": 14.475279850746269, "grad_norm": 0.40477787103495566, "learning_rate": 1.3752903732200535e-05, "loss": 0.0333, "step": 31035 }, { "epoch": 14.477611940298507, "grad_norm": 0.4073281779525335, "learning_rate": 1.3746036376509252e-05, "loss": 0.0336, "step": 31040 }, { "epoch": 14.479944029850746, "grad_norm": 0.4060178416686378, "learning_rate": 1.3739171065900577e-05, "loss": 0.0342, "step": 31045 }, { "epoch": 14.482276119402986, "grad_norm": 0.42031089901741525, "learning_rate": 1.3732307801395322e-05, "loss": 0.0346, "step": 31050 }, { "epoch": 14.484608208955224, "grad_norm": 0.4230372957700733, "learning_rate": 1.3725446584013984e-05, "loss": 0.0344, "step": 31055 }, { "epoch": 14.486940298507463, "grad_norm": 0.39208541482314674, "learning_rate": 1.3718587414776756e-05, "loss": 0.0334, "step": 31060 }, { "epoch": 14.489272388059701, "grad_norm": 0.407946080789277, "learning_rate": 1.3711730294703545e-05, "loss": 0.0354, "step": 31065 }, { "epoch": 14.49160447761194, "grad_norm": 0.4207174271923717, "learning_rate": 1.3704875224813928e-05, "loss": 0.034, "step": 31070 }, { "epoch": 14.493936567164178, "grad_norm": 0.41260478936076017, "learning_rate": 1.3698022206127201e-05, "loss": 0.0332, "step": 31075 }, { "epoch": 14.496268656716419, "grad_norm": 0.39932948282394326, "learning_rate": 1.3691171239662315e-05, "loss": 0.0338, "step": 31080 }, { "epoch": 14.498600746268657, "grad_norm": 0.41486323207263326, "learning_rate": 1.3684322326437976e-05, "loss": 0.0338, "step": 31085 }, { "epoch": 14.500932835820896, "grad_norm": 0.38801500787842164, "learning_rate": 1.3677475467472534e-05, "loss": 0.0346, "step": 31090 }, { "epoch": 14.503264925373134, "grad_norm": 0.3931904802787073, "learning_rate": 1.367063066378405e-05, "loss": 0.0335, "step": 31095 }, { "epoch": 14.505597014925373, "grad_norm": 0.42365708399980956, "learning_rate": 1.366378791639028e-05, "loss": 0.0348, "step": 31100 }, { "epoch": 14.507929104477611, "grad_norm": 0.40480891891644205, "learning_rate": 1.3656947226308686e-05, "loss": 0.0341, "step": 31105 }, { "epoch": 14.510261194029852, "grad_norm": 0.4236716927425404, "learning_rate": 1.3650108594556404e-05, "loss": 0.0339, "step": 31110 }, { "epoch": 14.51259328358209, "grad_norm": 0.4190878541535327, "learning_rate": 1.3643272022150286e-05, "loss": 0.0334, "step": 31115 }, { "epoch": 14.514925373134329, "grad_norm": 0.4007729735573883, "learning_rate": 1.3636437510106836e-05, "loss": 0.0333, "step": 31120 }, { "epoch": 14.517257462686567, "grad_norm": 0.44188858515292145, "learning_rate": 1.3629605059442315e-05, "loss": 0.0329, "step": 31125 }, { "epoch": 14.519589552238806, "grad_norm": 0.41378548298496615, "learning_rate": 1.362277467117261e-05, "loss": 0.0337, "step": 31130 }, { "epoch": 14.521921641791044, "grad_norm": 0.4203875622971969, "learning_rate": 1.3615946346313368e-05, "loss": 0.0355, "step": 31135 }, { "epoch": 14.524253731343283, "grad_norm": 0.42779772001459454, "learning_rate": 1.3609120085879872e-05, "loss": 0.0343, "step": 31140 }, { "epoch": 14.526585820895523, "grad_norm": 0.4103412305435608, "learning_rate": 1.360229589088713e-05, "loss": 0.0338, "step": 31145 }, { "epoch": 14.528917910447761, "grad_norm": 0.3815558876840306, "learning_rate": 1.3595473762349825e-05, "loss": 0.0331, "step": 31150 }, { "epoch": 14.53125, "grad_norm": 0.42781191691485043, "learning_rate": 1.3588653701282359e-05, "loss": 0.0348, "step": 31155 }, { "epoch": 14.533582089552239, "grad_norm": 0.41795190064613796, "learning_rate": 1.3581835708698796e-05, "loss": 0.035, "step": 31160 }, { "epoch": 14.535914179104477, "grad_norm": 0.4100082706668444, "learning_rate": 1.357501978561292e-05, "loss": 0.0337, "step": 31165 }, { "epoch": 14.538246268656717, "grad_norm": 0.4192094477989234, "learning_rate": 1.3568205933038164e-05, "loss": 0.034, "step": 31170 }, { "epoch": 14.540578358208956, "grad_norm": 0.4199184643946221, "learning_rate": 1.3561394151987722e-05, "loss": 0.0337, "step": 31175 }, { "epoch": 14.542910447761194, "grad_norm": 0.4301936623293784, "learning_rate": 1.3554584443474405e-05, "loss": 0.0357, "step": 31180 }, { "epoch": 14.545242537313433, "grad_norm": 0.4036515298683883, "learning_rate": 1.3547776808510782e-05, "loss": 0.0344, "step": 31185 }, { "epoch": 14.547574626865671, "grad_norm": 0.42510978844389957, "learning_rate": 1.3540971248109063e-05, "loss": 0.033, "step": 31190 }, { "epoch": 14.54990671641791, "grad_norm": 0.42610034713996764, "learning_rate": 1.3534167763281175e-05, "loss": 0.0347, "step": 31195 }, { "epoch": 14.552238805970148, "grad_norm": 0.4230328296852746, "learning_rate": 1.352736635503873e-05, "loss": 0.034, "step": 31200 }, { "epoch": 14.554570895522389, "grad_norm": 0.453759164950968, "learning_rate": 1.3520567024393033e-05, "loss": 0.0348, "step": 31205 }, { "epoch": 14.556902985074627, "grad_norm": 0.40674159618990074, "learning_rate": 1.3513769772355083e-05, "loss": 0.0338, "step": 31210 }, { "epoch": 14.559235074626866, "grad_norm": 0.4094273037557532, "learning_rate": 1.350697459993556e-05, "loss": 0.0335, "step": 31215 }, { "epoch": 14.561567164179104, "grad_norm": 0.39283350369247894, "learning_rate": 1.3500181508144855e-05, "loss": 0.0354, "step": 31220 }, { "epoch": 14.563899253731343, "grad_norm": 0.42612568265087075, "learning_rate": 1.3493390497993014e-05, "loss": 0.0339, "step": 31225 }, { "epoch": 14.566231343283581, "grad_norm": 0.3969749098299763, "learning_rate": 1.3486601570489809e-05, "loss": 0.0344, "step": 31230 }, { "epoch": 14.568563432835822, "grad_norm": 0.41695676381429814, "learning_rate": 1.3479814726644686e-05, "loss": 0.0341, "step": 31235 }, { "epoch": 14.57089552238806, "grad_norm": 0.3997283272926389, "learning_rate": 1.3473029967466779e-05, "loss": 0.0343, "step": 31240 }, { "epoch": 14.573227611940299, "grad_norm": 0.4257485811678424, "learning_rate": 1.3466247293964922e-05, "loss": 0.0352, "step": 31245 }, { "epoch": 14.575559701492537, "grad_norm": 0.4169647989562604, "learning_rate": 1.3459466707147644e-05, "loss": 0.0347, "step": 31250 }, { "epoch": 14.577891791044776, "grad_norm": 0.4163907727559924, "learning_rate": 1.3452688208023123e-05, "loss": 0.0346, "step": 31255 }, { "epoch": 14.580223880597014, "grad_norm": 0.4468087135304617, "learning_rate": 1.3445911797599293e-05, "loss": 0.0358, "step": 31260 }, { "epoch": 14.582555970149254, "grad_norm": 0.439563473709762, "learning_rate": 1.3439137476883706e-05, "loss": 0.0357, "step": 31265 }, { "epoch": 14.584888059701493, "grad_norm": 0.43689148972416847, "learning_rate": 1.3432365246883671e-05, "loss": 0.0361, "step": 31270 }, { "epoch": 14.587220149253731, "grad_norm": 0.4339350834370352, "learning_rate": 1.3425595108606138e-05, "loss": 0.0354, "step": 31275 }, { "epoch": 14.58955223880597, "grad_norm": 0.4084040663009797, "learning_rate": 1.3418827063057754e-05, "loss": 0.0344, "step": 31280 }, { "epoch": 14.591884328358208, "grad_norm": 0.4074193910378302, "learning_rate": 1.3412061111244877e-05, "loss": 0.0352, "step": 31285 }, { "epoch": 14.594216417910447, "grad_norm": 0.4118800895974343, "learning_rate": 1.3405297254173532e-05, "loss": 0.0338, "step": 31290 }, { "epoch": 14.596548507462687, "grad_norm": 0.4366624000036103, "learning_rate": 1.3398535492849442e-05, "loss": 0.0348, "step": 31295 }, { "epoch": 14.598880597014926, "grad_norm": 0.4221608604423126, "learning_rate": 1.3391775828278023e-05, "loss": 0.0339, "step": 31300 }, { "epoch": 14.601212686567164, "grad_norm": 0.4169554877349917, "learning_rate": 1.338501826146435e-05, "loss": 0.0356, "step": 31305 }, { "epoch": 14.603544776119403, "grad_norm": 0.41792316991840683, "learning_rate": 1.3378262793413237e-05, "loss": 0.0344, "step": 31310 }, { "epoch": 14.605876865671641, "grad_norm": 0.4229267397058532, "learning_rate": 1.3371509425129131e-05, "loss": 0.0335, "step": 31315 }, { "epoch": 14.60820895522388, "grad_norm": 0.41532513329634374, "learning_rate": 1.3364758157616219e-05, "loss": 0.0344, "step": 31320 }, { "epoch": 14.61054104477612, "grad_norm": 0.4181909862553021, "learning_rate": 1.3358008991878329e-05, "loss": 0.0342, "step": 31325 }, { "epoch": 14.612873134328359, "grad_norm": 0.4085447553454273, "learning_rate": 1.3351261928919007e-05, "loss": 0.0359, "step": 31330 }, { "epoch": 14.615205223880597, "grad_norm": 0.39291137652734137, "learning_rate": 1.3344516969741471e-05, "loss": 0.0346, "step": 31335 }, { "epoch": 14.617537313432836, "grad_norm": 0.4218037652878488, "learning_rate": 1.3337774115348639e-05, "loss": 0.0337, "step": 31340 }, { "epoch": 14.619869402985074, "grad_norm": 0.42023126600714567, "learning_rate": 1.3331033366743101e-05, "loss": 0.0354, "step": 31345 }, { "epoch": 14.622201492537313, "grad_norm": 0.4207256099882843, "learning_rate": 1.3324294724927155e-05, "loss": 0.0346, "step": 31350 }, { "epoch": 14.624533582089553, "grad_norm": 0.43138602492176154, "learning_rate": 1.3317558190902752e-05, "loss": 0.0347, "step": 31355 }, { "epoch": 14.626865671641792, "grad_norm": 0.4237554535125759, "learning_rate": 1.3310823765671571e-05, "loss": 0.0328, "step": 31360 }, { "epoch": 14.62919776119403, "grad_norm": 0.4170496175573579, "learning_rate": 1.3304091450234934e-05, "loss": 0.0344, "step": 31365 }, { "epoch": 14.631529850746269, "grad_norm": 0.4176957759851247, "learning_rate": 1.32973612455939e-05, "loss": 0.0338, "step": 31370 }, { "epoch": 14.633861940298507, "grad_norm": 0.4271961042406829, "learning_rate": 1.3290633152749166e-05, "loss": 0.0342, "step": 31375 }, { "epoch": 14.636194029850746, "grad_norm": 0.4039356002134867, "learning_rate": 1.3283907172701135e-05, "loss": 0.0352, "step": 31380 }, { "epoch": 14.638526119402986, "grad_norm": 0.42988400876602656, "learning_rate": 1.3277183306449902e-05, "loss": 0.0367, "step": 31385 }, { "epoch": 14.640858208955224, "grad_norm": 0.4225203297573163, "learning_rate": 1.3270461554995243e-05, "loss": 0.036, "step": 31390 }, { "epoch": 14.643190298507463, "grad_norm": 0.41956126395039556, "learning_rate": 1.3263741919336608e-05, "loss": 0.0342, "step": 31395 }, { "epoch": 14.645522388059701, "grad_norm": 0.41020689829585666, "learning_rate": 1.3257024400473162e-05, "loss": 0.0325, "step": 31400 }, { "epoch": 14.64785447761194, "grad_norm": 0.4257262985228973, "learning_rate": 1.3250308999403715e-05, "loss": 0.0347, "step": 31405 }, { "epoch": 14.650186567164178, "grad_norm": 0.41508911066545506, "learning_rate": 1.3243595717126792e-05, "loss": 0.0331, "step": 31410 }, { "epoch": 14.652518656716419, "grad_norm": 0.43450917145634677, "learning_rate": 1.3236884554640589e-05, "loss": 0.035, "step": 31415 }, { "epoch": 14.654850746268657, "grad_norm": 0.4232713175486915, "learning_rate": 1.3230175512943e-05, "loss": 0.0345, "step": 31420 }, { "epoch": 14.657182835820896, "grad_norm": 0.39317381040882243, "learning_rate": 1.3223468593031585e-05, "loss": 0.035, "step": 31425 }, { "epoch": 14.659514925373134, "grad_norm": 0.4151883329988704, "learning_rate": 1.3216763795903608e-05, "loss": 0.0351, "step": 31430 }, { "epoch": 14.661847014925373, "grad_norm": 0.4200729153931138, "learning_rate": 1.321006112255601e-05, "loss": 0.0353, "step": 31435 }, { "epoch": 14.664179104477611, "grad_norm": 0.4270385572210163, "learning_rate": 1.3203360573985394e-05, "loss": 0.0354, "step": 31440 }, { "epoch": 14.666511194029852, "grad_norm": 0.42229380693094326, "learning_rate": 1.31966621511881e-05, "loss": 0.0346, "step": 31445 }, { "epoch": 14.66884328358209, "grad_norm": 0.41906005954274034, "learning_rate": 1.3189965855160088e-05, "loss": 0.0351, "step": 31450 }, { "epoch": 14.671175373134329, "grad_norm": 0.4318066776009608, "learning_rate": 1.318327168689705e-05, "loss": 0.0359, "step": 31455 }, { "epoch": 14.673507462686567, "grad_norm": 0.4157754157156395, "learning_rate": 1.3176579647394338e-05, "loss": 0.0346, "step": 31460 }, { "epoch": 14.675839552238806, "grad_norm": 0.4222554864447895, "learning_rate": 1.3169889737647001e-05, "loss": 0.0347, "step": 31465 }, { "epoch": 14.678171641791044, "grad_norm": 0.41636658694565365, "learning_rate": 1.3163201958649757e-05, "loss": 0.0357, "step": 31470 }, { "epoch": 14.680503731343283, "grad_norm": 0.4068951455036406, "learning_rate": 1.315651631139703e-05, "loss": 0.0352, "step": 31475 }, { "epoch": 14.682835820895523, "grad_norm": 0.4128332482726942, "learning_rate": 1.314983279688288e-05, "loss": 0.0351, "step": 31480 }, { "epoch": 14.685167910447761, "grad_norm": 0.42208322841922824, "learning_rate": 1.3143151416101118e-05, "loss": 0.0337, "step": 31485 }, { "epoch": 14.6875, "grad_norm": 0.43027611412130656, "learning_rate": 1.3136472170045171e-05, "loss": 0.0349, "step": 31490 }, { "epoch": 14.689832089552239, "grad_norm": 0.4302047621146001, "learning_rate": 1.3129795059708209e-05, "loss": 0.0359, "step": 31495 }, { "epoch": 14.692164179104477, "grad_norm": 0.42626242541148335, "learning_rate": 1.3123120086083026e-05, "loss": 0.0342, "step": 31500 }, { "epoch": 14.694496268656717, "grad_norm": 0.43000461436462245, "learning_rate": 1.3116447250162145e-05, "loss": 0.0354, "step": 31505 }, { "epoch": 14.696828358208956, "grad_norm": 0.42733006374976107, "learning_rate": 1.3109776552937742e-05, "loss": 0.0362, "step": 31510 }, { "epoch": 14.699160447761194, "grad_norm": 0.44218676684431685, "learning_rate": 1.3103107995401689e-05, "loss": 0.0351, "step": 31515 }, { "epoch": 14.701492537313433, "grad_norm": 0.4340560089575121, "learning_rate": 1.3096441578545544e-05, "loss": 0.0357, "step": 31520 }, { "epoch": 14.703824626865671, "grad_norm": 0.4118325693703165, "learning_rate": 1.3089777303360534e-05, "loss": 0.0368, "step": 31525 }, { "epoch": 14.70615671641791, "grad_norm": 0.42783294373702946, "learning_rate": 1.308311517083756e-05, "loss": 0.0357, "step": 31530 }, { "epoch": 14.708488805970148, "grad_norm": 0.41637742744462775, "learning_rate": 1.3076455181967246e-05, "loss": 0.0342, "step": 31535 }, { "epoch": 14.710820895522389, "grad_norm": 0.4250080883070207, "learning_rate": 1.306979733773983e-05, "loss": 0.0348, "step": 31540 }, { "epoch": 14.713152985074627, "grad_norm": 0.38887044748782473, "learning_rate": 1.3063141639145313e-05, "loss": 0.035, "step": 31545 }, { "epoch": 14.715485074626866, "grad_norm": 0.4218507337232924, "learning_rate": 1.3056488087173302e-05, "loss": 0.036, "step": 31550 }, { "epoch": 14.717817164179104, "grad_norm": 0.43288395554716563, "learning_rate": 1.3049836682813127e-05, "loss": 0.0357, "step": 31555 }, { "epoch": 14.720149253731343, "grad_norm": 0.4317405055609363, "learning_rate": 1.3043187427053788e-05, "loss": 0.0357, "step": 31560 }, { "epoch": 14.722481343283581, "grad_norm": 0.4243891872565016, "learning_rate": 1.3036540320883967e-05, "loss": 0.035, "step": 31565 }, { "epoch": 14.724813432835822, "grad_norm": 0.4182643316585439, "learning_rate": 1.3029895365292018e-05, "loss": 0.0336, "step": 31570 }, { "epoch": 14.72714552238806, "grad_norm": 0.39423009610081694, "learning_rate": 1.3023252561266e-05, "loss": 0.0337, "step": 31575 }, { "epoch": 14.729477611940299, "grad_norm": 0.41176254603710744, "learning_rate": 1.3016611909793613e-05, "loss": 0.0353, "step": 31580 }, { "epoch": 14.731809701492537, "grad_norm": 0.4392078480265529, "learning_rate": 1.3009973411862264e-05, "loss": 0.0369, "step": 31585 }, { "epoch": 14.734141791044776, "grad_norm": 0.43607126930062573, "learning_rate": 1.3003337068459037e-05, "loss": 0.0348, "step": 31590 }, { "epoch": 14.736473880597014, "grad_norm": 0.40689025101392595, "learning_rate": 1.29967028805707e-05, "loss": 0.0355, "step": 31595 }, { "epoch": 14.738805970149254, "grad_norm": 0.41196531817396603, "learning_rate": 1.2990070849183678e-05, "loss": 0.0349, "step": 31600 }, { "epoch": 14.741138059701493, "grad_norm": 0.3885658998413892, "learning_rate": 1.2983440975284101e-05, "loss": 0.0352, "step": 31605 }, { "epoch": 14.743470149253731, "grad_norm": 0.42582251737784205, "learning_rate": 1.2976813259857773e-05, "loss": 0.0355, "step": 31610 }, { "epoch": 14.74580223880597, "grad_norm": 0.4206359719509992, "learning_rate": 1.2970187703890149e-05, "loss": 0.0346, "step": 31615 }, { "epoch": 14.748134328358208, "grad_norm": 0.42840230406232954, "learning_rate": 1.2963564308366416e-05, "loss": 0.0358, "step": 31620 }, { "epoch": 14.750466417910447, "grad_norm": 0.4444686917058942, "learning_rate": 1.2956943074271388e-05, "loss": 0.0366, "step": 31625 }, { "epoch": 14.752798507462687, "grad_norm": 0.42337023111033933, "learning_rate": 1.295032400258958e-05, "loss": 0.0353, "step": 31630 }, { "epoch": 14.755130597014926, "grad_norm": 0.42272865206000054, "learning_rate": 1.2943707094305196e-05, "loss": 0.0352, "step": 31635 }, { "epoch": 14.757462686567164, "grad_norm": 0.42205406795061534, "learning_rate": 1.2937092350402097e-05, "loss": 0.0355, "step": 31640 }, { "epoch": 14.759794776119403, "grad_norm": 0.4087341857500635, "learning_rate": 1.2930479771863835e-05, "loss": 0.0352, "step": 31645 }, { "epoch": 14.762126865671641, "grad_norm": 0.4281901562221655, "learning_rate": 1.2923869359673646e-05, "loss": 0.0357, "step": 31650 }, { "epoch": 14.76445895522388, "grad_norm": 0.4210060309444807, "learning_rate": 1.2917261114814414e-05, "loss": 0.0353, "step": 31655 }, { "epoch": 14.76679104477612, "grad_norm": 0.3942441717322778, "learning_rate": 1.2910655038268749e-05, "loss": 0.0347, "step": 31660 }, { "epoch": 14.769123134328359, "grad_norm": 0.43913253011279696, "learning_rate": 1.2904051131018877e-05, "loss": 0.0355, "step": 31665 }, { "epoch": 14.771455223880597, "grad_norm": 0.42922056886700743, "learning_rate": 1.2897449394046773e-05, "loss": 0.0352, "step": 31670 }, { "epoch": 14.773787313432836, "grad_norm": 0.4333356759097451, "learning_rate": 1.2890849828334029e-05, "loss": 0.0353, "step": 31675 }, { "epoch": 14.776119402985074, "grad_norm": 0.4269937631295438, "learning_rate": 1.2884252434861938e-05, "loss": 0.0354, "step": 31680 }, { "epoch": 14.778451492537313, "grad_norm": 0.4312709193230407, "learning_rate": 1.2877657214611477e-05, "loss": 0.0357, "step": 31685 }, { "epoch": 14.780783582089553, "grad_norm": 0.4253512881792104, "learning_rate": 1.2871064168563291e-05, "loss": 0.0357, "step": 31690 }, { "epoch": 14.783115671641792, "grad_norm": 0.44942880928207185, "learning_rate": 1.2864473297697693e-05, "loss": 0.0381, "step": 31695 }, { "epoch": 14.78544776119403, "grad_norm": 0.42359806875339506, "learning_rate": 1.2857884602994706e-05, "loss": 0.0365, "step": 31700 }, { "epoch": 14.787779850746269, "grad_norm": 0.4386800184890531, "learning_rate": 1.2851298085433972e-05, "loss": 0.0368, "step": 31705 }, { "epoch": 14.790111940298507, "grad_norm": 0.43129675678395646, "learning_rate": 1.2844713745994871e-05, "loss": 0.0348, "step": 31710 }, { "epoch": 14.792444029850746, "grad_norm": 0.42640736997370443, "learning_rate": 1.283813158565641e-05, "loss": 0.0364, "step": 31715 }, { "epoch": 14.794776119402986, "grad_norm": 0.4252580290596859, "learning_rate": 1.2831551605397321e-05, "loss": 0.0358, "step": 31720 }, { "epoch": 14.797108208955224, "grad_norm": 0.42097333359477035, "learning_rate": 1.2824973806195955e-05, "loss": 0.037, "step": 31725 }, { "epoch": 14.799440298507463, "grad_norm": 0.4073407519871557, "learning_rate": 1.2818398189030383e-05, "loss": 0.0351, "step": 31730 }, { "epoch": 14.801772388059701, "grad_norm": 0.43642581760698057, "learning_rate": 1.2811824754878331e-05, "loss": 0.036, "step": 31735 }, { "epoch": 14.80410447761194, "grad_norm": 0.42261254161710166, "learning_rate": 1.2805253504717213e-05, "loss": 0.0349, "step": 31740 }, { "epoch": 14.806436567164178, "grad_norm": 0.43408337682030823, "learning_rate": 1.27986844395241e-05, "loss": 0.0368, "step": 31745 }, { "epoch": 14.808768656716419, "grad_norm": 0.43189211745184297, "learning_rate": 1.2792117560275766e-05, "loss": 0.0356, "step": 31750 }, { "epoch": 14.811100746268657, "grad_norm": 0.4267729554380111, "learning_rate": 1.2785552867948625e-05, "loss": 0.0363, "step": 31755 }, { "epoch": 14.813432835820896, "grad_norm": 0.4387911224299674, "learning_rate": 1.2778990363518785e-05, "loss": 0.0362, "step": 31760 }, { "epoch": 14.815764925373134, "grad_norm": 0.4280431908334434, "learning_rate": 1.277243004796204e-05, "loss": 0.0361, "step": 31765 }, { "epoch": 14.818097014925373, "grad_norm": 0.43873106612145163, "learning_rate": 1.2765871922253835e-05, "loss": 0.0351, "step": 31770 }, { "epoch": 14.820429104477611, "grad_norm": 0.4415392875076087, "learning_rate": 1.2759315987369306e-05, "loss": 0.0353, "step": 31775 }, { "epoch": 14.822761194029852, "grad_norm": 0.4167823479047018, "learning_rate": 1.2752762244283255e-05, "loss": 0.0337, "step": 31780 }, { "epoch": 14.82509328358209, "grad_norm": 0.42809614930346834, "learning_rate": 1.2746210693970171e-05, "loss": 0.036, "step": 31785 }, { "epoch": 14.827425373134329, "grad_norm": 0.42043268526348904, "learning_rate": 1.2739661337404185e-05, "loss": 0.0357, "step": 31790 }, { "epoch": 14.829757462686567, "grad_norm": 0.4237384243576174, "learning_rate": 1.2733114175559149e-05, "loss": 0.0351, "step": 31795 }, { "epoch": 14.832089552238806, "grad_norm": 0.44197987496062446, "learning_rate": 1.2726569209408545e-05, "loss": 0.0367, "step": 31800 }, { "epoch": 14.834421641791044, "grad_norm": 0.4128408270795722, "learning_rate": 1.272002643992555e-05, "loss": 0.036, "step": 31805 }, { "epoch": 14.836753731343283, "grad_norm": 0.454022972369671, "learning_rate": 1.2713485868083014e-05, "loss": 0.0368, "step": 31810 }, { "epoch": 14.839085820895523, "grad_norm": 0.4247125032539369, "learning_rate": 1.270694749485346e-05, "loss": 0.0347, "step": 31815 }, { "epoch": 14.841417910447761, "grad_norm": 0.43599118158352956, "learning_rate": 1.2700411321209078e-05, "loss": 0.036, "step": 31820 }, { "epoch": 14.84375, "grad_norm": 0.4181208288084367, "learning_rate": 1.2693877348121742e-05, "loss": 0.0361, "step": 31825 }, { "epoch": 14.846082089552239, "grad_norm": 0.4192693074435536, "learning_rate": 1.2687345576562965e-05, "loss": 0.0352, "step": 31830 }, { "epoch": 14.848414179104477, "grad_norm": 0.43806335444852834, "learning_rate": 1.2680816007503998e-05, "loss": 0.0359, "step": 31835 }, { "epoch": 14.850746268656717, "grad_norm": 0.43807328459478617, "learning_rate": 1.2674288641915688e-05, "loss": 0.0359, "step": 31840 }, { "epoch": 14.853078358208956, "grad_norm": 0.45197492978474696, "learning_rate": 1.2667763480768625e-05, "loss": 0.0362, "step": 31845 }, { "epoch": 14.855410447761194, "grad_norm": 0.42852145780598666, "learning_rate": 1.2661240525033016e-05, "loss": 0.0361, "step": 31850 }, { "epoch": 14.857742537313433, "grad_norm": 0.4156902465034927, "learning_rate": 1.2654719775678764e-05, "loss": 0.0357, "step": 31855 }, { "epoch": 14.860074626865671, "grad_norm": 0.43812087443180286, "learning_rate": 1.264820123367545e-05, "loss": 0.0353, "step": 31860 }, { "epoch": 14.86240671641791, "grad_norm": 0.4263953812661719, "learning_rate": 1.2641684899992314e-05, "loss": 0.0355, "step": 31865 }, { "epoch": 14.864738805970148, "grad_norm": 0.4608911371449193, "learning_rate": 1.2635170775598271e-05, "loss": 0.0368, "step": 31870 }, { "epoch": 14.867070895522389, "grad_norm": 0.43197298938935047, "learning_rate": 1.2628658861461923e-05, "loss": 0.0343, "step": 31875 }, { "epoch": 14.869402985074627, "grad_norm": 0.4683073401307802, "learning_rate": 1.26221491585515e-05, "loss": 0.0364, "step": 31880 }, { "epoch": 14.871735074626866, "grad_norm": 0.43434388692170517, "learning_rate": 1.2615641667834962e-05, "loss": 0.0356, "step": 31885 }, { "epoch": 14.874067164179104, "grad_norm": 0.42791908642377485, "learning_rate": 1.2609136390279886e-05, "loss": 0.0364, "step": 31890 }, { "epoch": 14.876399253731343, "grad_norm": 0.4270042455650096, "learning_rate": 1.2602633326853569e-05, "loss": 0.0357, "step": 31895 }, { "epoch": 14.878731343283581, "grad_norm": 0.4675603398181461, "learning_rate": 1.2596132478522938e-05, "loss": 0.0369, "step": 31900 }, { "epoch": 14.881063432835822, "grad_norm": 0.42154613441760885, "learning_rate": 1.258963384625461e-05, "loss": 0.036, "step": 31905 }, { "epoch": 14.88339552238806, "grad_norm": 0.44655437315978924, "learning_rate": 1.258313743101487e-05, "loss": 0.0353, "step": 31910 }, { "epoch": 14.885727611940299, "grad_norm": 0.43287680123538197, "learning_rate": 1.2576643233769669e-05, "loss": 0.0351, "step": 31915 }, { "epoch": 14.888059701492537, "grad_norm": 0.4246910529907923, "learning_rate": 1.2570151255484639e-05, "loss": 0.036, "step": 31920 }, { "epoch": 14.890391791044776, "grad_norm": 0.4127550426281403, "learning_rate": 1.2563661497125073e-05, "loss": 0.0351, "step": 31925 }, { "epoch": 14.892723880597014, "grad_norm": 0.4147298766749441, "learning_rate": 1.2557173959655932e-05, "loss": 0.0361, "step": 31930 }, { "epoch": 14.895055970149254, "grad_norm": 0.4425501788368654, "learning_rate": 1.2550688644041861e-05, "loss": 0.0359, "step": 31935 }, { "epoch": 14.897388059701493, "grad_norm": 0.4227875446583411, "learning_rate": 1.2544205551247148e-05, "loss": 0.0368, "step": 31940 }, { "epoch": 14.899720149253731, "grad_norm": 0.4326859687167421, "learning_rate": 1.2537724682235776e-05, "loss": 0.0364, "step": 31945 }, { "epoch": 14.90205223880597, "grad_norm": 0.4405680753136494, "learning_rate": 1.253124603797139e-05, "loss": 0.0374, "step": 31950 }, { "epoch": 14.904384328358208, "grad_norm": 0.4377784171708227, "learning_rate": 1.2524769619417297e-05, "loss": 0.0362, "step": 31955 }, { "epoch": 14.906716417910447, "grad_norm": 0.4232300971898163, "learning_rate": 1.251829542753648e-05, "loss": 0.0366, "step": 31960 }, { "epoch": 14.909048507462687, "grad_norm": 0.420991483882026, "learning_rate": 1.2511823463291595e-05, "loss": 0.0361, "step": 31965 }, { "epoch": 14.911380597014926, "grad_norm": 0.436154297550441, "learning_rate": 1.2505353727644958e-05, "loss": 0.0357, "step": 31970 }, { "epoch": 14.913712686567164, "grad_norm": 0.4242727217455418, "learning_rate": 1.2498886221558547e-05, "loss": 0.0369, "step": 31975 }, { "epoch": 14.916044776119403, "grad_norm": 0.423670100924058, "learning_rate": 1.249242094599404e-05, "loss": 0.036, "step": 31980 }, { "epoch": 14.918376865671641, "grad_norm": 0.4234913713697095, "learning_rate": 1.2485957901912736e-05, "loss": 0.0356, "step": 31985 }, { "epoch": 14.92070895522388, "grad_norm": 0.42588606187949424, "learning_rate": 1.2479497090275643e-05, "loss": 0.0358, "step": 31990 }, { "epoch": 14.92304104477612, "grad_norm": 0.42968762550842693, "learning_rate": 1.2473038512043417e-05, "loss": 0.0359, "step": 31995 }, { "epoch": 14.925373134328359, "grad_norm": 0.43303447313470855, "learning_rate": 1.246658216817639e-05, "loss": 0.0359, "step": 32000 }, { "epoch": 14.927705223880597, "grad_norm": 0.4098626739344114, "learning_rate": 1.2460128059634556e-05, "loss": 0.0353, "step": 32005 }, { "epoch": 14.930037313432836, "grad_norm": 0.42605962241105827, "learning_rate": 1.2453676187377588e-05, "loss": 0.0368, "step": 32010 }, { "epoch": 14.932369402985074, "grad_norm": 0.44058979840640106, "learning_rate": 1.2447226552364792e-05, "loss": 0.0366, "step": 32015 }, { "epoch": 14.934701492537313, "grad_norm": 0.41817643169174035, "learning_rate": 1.2440779155555202e-05, "loss": 0.0363, "step": 32020 }, { "epoch": 14.937033582089553, "grad_norm": 0.44168623671771917, "learning_rate": 1.2434333997907448e-05, "loss": 0.0357, "step": 32025 }, { "epoch": 14.939365671641792, "grad_norm": 0.42666645290864663, "learning_rate": 1.24278910803799e-05, "loss": 0.0365, "step": 32030 }, { "epoch": 14.94169776119403, "grad_norm": 0.40682110439163854, "learning_rate": 1.2421450403930529e-05, "loss": 0.0364, "step": 32035 }, { "epoch": 14.944029850746269, "grad_norm": 0.4313538565701426, "learning_rate": 1.2415011969517016e-05, "loss": 0.0362, "step": 32040 }, { "epoch": 14.946361940298507, "grad_norm": 0.4396112830067322, "learning_rate": 1.2408575778096686e-05, "loss": 0.038, "step": 32045 }, { "epoch": 14.948694029850746, "grad_norm": 0.41921490023111013, "learning_rate": 1.2402141830626547e-05, "loss": 0.0377, "step": 32050 }, { "epoch": 14.951026119402986, "grad_norm": 0.42034804706058904, "learning_rate": 1.2395710128063263e-05, "loss": 0.0356, "step": 32055 }, { "epoch": 14.953358208955224, "grad_norm": 0.4089219233990294, "learning_rate": 1.2389280671363175e-05, "loss": 0.0355, "step": 32060 }, { "epoch": 14.955690298507463, "grad_norm": 0.44032486999023956, "learning_rate": 1.2382853461482255e-05, "loss": 0.0351, "step": 32065 }, { "epoch": 14.958022388059701, "grad_norm": 0.4220073995358734, "learning_rate": 1.2376428499376201e-05, "loss": 0.0354, "step": 32070 }, { "epoch": 14.96035447761194, "grad_norm": 0.43359475616237525, "learning_rate": 1.2370005786000314e-05, "loss": 0.0369, "step": 32075 }, { "epoch": 14.962686567164178, "grad_norm": 0.43671931895118166, "learning_rate": 1.2363585322309615e-05, "loss": 0.0349, "step": 32080 }, { "epoch": 14.965018656716419, "grad_norm": 0.4406702822725363, "learning_rate": 1.2357167109258748e-05, "loss": 0.0368, "step": 32085 }, { "epoch": 14.967350746268657, "grad_norm": 0.43527420329580474, "learning_rate": 1.2350751147802047e-05, "loss": 0.0358, "step": 32090 }, { "epoch": 14.969682835820896, "grad_norm": 0.42704872483252193, "learning_rate": 1.23443374388935e-05, "loss": 0.0362, "step": 32095 }, { "epoch": 14.972014925373134, "grad_norm": 0.44348043439724777, "learning_rate": 1.2337925983486768e-05, "loss": 0.0351, "step": 32100 }, { "epoch": 14.974347014925373, "grad_norm": 0.43632886476706234, "learning_rate": 1.2331516782535172e-05, "loss": 0.0355, "step": 32105 }, { "epoch": 14.976679104477611, "grad_norm": 0.43710341402557595, "learning_rate": 1.2325109836991703e-05, "loss": 0.036, "step": 32110 }, { "epoch": 14.979011194029852, "grad_norm": 0.44729334377048807, "learning_rate": 1.2318705147809006e-05, "loss": 0.036, "step": 32115 }, { "epoch": 14.98134328358209, "grad_norm": 0.4280784966636888, "learning_rate": 1.2312302715939394e-05, "loss": 0.0361, "step": 32120 }, { "epoch": 14.983675373134329, "grad_norm": 0.4156801237179036, "learning_rate": 1.2305902542334854e-05, "loss": 0.035, "step": 32125 }, { "epoch": 14.986007462686567, "grad_norm": 0.4436520468806834, "learning_rate": 1.2299504627947029e-05, "loss": 0.0368, "step": 32130 }, { "epoch": 14.988339552238806, "grad_norm": 0.4438553989334471, "learning_rate": 1.2293108973727224e-05, "loss": 0.0353, "step": 32135 }, { "epoch": 14.990671641791044, "grad_norm": 0.42694731783832157, "learning_rate": 1.2286715580626418e-05, "loss": 0.0355, "step": 32140 }, { "epoch": 14.993003731343283, "grad_norm": 0.42629213953289885, "learning_rate": 1.228032444959525e-05, "loss": 0.0355, "step": 32145 }, { "epoch": 14.995335820895523, "grad_norm": 0.42378816014656323, "learning_rate": 1.2273935581584e-05, "loss": 0.0359, "step": 32150 }, { "epoch": 14.997667910447761, "grad_norm": 0.42482290363257, "learning_rate": 1.2267548977542656e-05, "loss": 0.0357, "step": 32155 }, { "epoch": 15.0, "grad_norm": 0.47729763143494325, "learning_rate": 1.2261164638420832e-05, "loss": 0.0364, "step": 32160 }, { "epoch": 15.002332089552239, "grad_norm": 0.32154490588407286, "learning_rate": 1.2254782565167817e-05, "loss": 0.0199, "step": 32165 }, { "epoch": 15.004664179104477, "grad_norm": 0.32409323976108906, "learning_rate": 1.2248402758732568e-05, "loss": 0.0206, "step": 32170 }, { "epoch": 15.006996268656716, "grad_norm": 0.31978693994640245, "learning_rate": 1.22420252200637e-05, "loss": 0.0204, "step": 32175 }, { "epoch": 15.009328358208956, "grad_norm": 0.3326289004891433, "learning_rate": 1.2235649950109492e-05, "loss": 0.0201, "step": 32180 }, { "epoch": 15.011660447761194, "grad_norm": 0.3206187237408453, "learning_rate": 1.2229276949817894e-05, "loss": 0.019, "step": 32185 }, { "epoch": 15.013992537313433, "grad_norm": 0.31081128796131563, "learning_rate": 1.222290622013649e-05, "loss": 0.0203, "step": 32190 }, { "epoch": 15.016324626865671, "grad_norm": 0.31074922807610394, "learning_rate": 1.221653776201257e-05, "loss": 0.0193, "step": 32195 }, { "epoch": 15.01865671641791, "grad_norm": 0.3032506980272697, "learning_rate": 1.2210171576393037e-05, "loss": 0.0195, "step": 32200 }, { "epoch": 15.020988805970148, "grad_norm": 0.33538490196358417, "learning_rate": 1.220380766422451e-05, "loss": 0.0192, "step": 32205 }, { "epoch": 15.023320895522389, "grad_norm": 0.33708442382535814, "learning_rate": 1.2197446026453219e-05, "loss": 0.0193, "step": 32210 }, { "epoch": 15.025652985074627, "grad_norm": 0.33261161728146776, "learning_rate": 1.219108666402509e-05, "loss": 0.02, "step": 32215 }, { "epoch": 15.027985074626866, "grad_norm": 0.3022127313171612, "learning_rate": 1.2184729577885695e-05, "loss": 0.019, "step": 32220 }, { "epoch": 15.030317164179104, "grad_norm": 0.317066005550295, "learning_rate": 1.2178374768980275e-05, "loss": 0.0184, "step": 32225 }, { "epoch": 15.032649253731343, "grad_norm": 0.330377094624126, "learning_rate": 1.2172022238253727e-05, "loss": 0.0187, "step": 32230 }, { "epoch": 15.034981343283581, "grad_norm": 0.333274198937291, "learning_rate": 1.2165671986650618e-05, "loss": 0.019, "step": 32235 }, { "epoch": 15.037313432835822, "grad_norm": 0.32239432553298225, "learning_rate": 1.2159324015115148e-05, "loss": 0.0184, "step": 32240 }, { "epoch": 15.03964552238806, "grad_norm": 0.31446606349115735, "learning_rate": 1.2152978324591233e-05, "loss": 0.0194, "step": 32245 }, { "epoch": 15.041977611940299, "grad_norm": 0.2960420531811023, "learning_rate": 1.2146634916022383e-05, "loss": 0.0187, "step": 32250 }, { "epoch": 15.044309701492537, "grad_norm": 0.3418876907600837, "learning_rate": 1.214029379035183e-05, "loss": 0.0199, "step": 32255 }, { "epoch": 15.046641791044776, "grad_norm": 0.3233930613262029, "learning_rate": 1.2133954948522423e-05, "loss": 0.0187, "step": 32260 }, { "epoch": 15.048973880597014, "grad_norm": 0.322935493339198, "learning_rate": 1.2127618391476688e-05, "loss": 0.0199, "step": 32265 }, { "epoch": 15.051305970149254, "grad_norm": 0.3127859553514359, "learning_rate": 1.2121284120156812e-05, "loss": 0.0185, "step": 32270 }, { "epoch": 15.053638059701493, "grad_norm": 0.3355185595610426, "learning_rate": 1.2114952135504642e-05, "loss": 0.0191, "step": 32275 }, { "epoch": 15.055970149253731, "grad_norm": 0.32174880556177343, "learning_rate": 1.210862243846168e-05, "loss": 0.0184, "step": 32280 }, { "epoch": 15.05830223880597, "grad_norm": 0.2985755950497983, "learning_rate": 1.2102295029969104e-05, "loss": 0.0187, "step": 32285 }, { "epoch": 15.060634328358208, "grad_norm": 0.31101305127828555, "learning_rate": 1.209596991096772e-05, "loss": 0.0196, "step": 32290 }, { "epoch": 15.062966417910447, "grad_norm": 0.3096783672337991, "learning_rate": 1.2089647082398022e-05, "loss": 0.0179, "step": 32295 }, { "epoch": 15.065298507462687, "grad_norm": 0.3367806013151682, "learning_rate": 1.2083326545200154e-05, "loss": 0.0185, "step": 32300 }, { "epoch": 15.067630597014926, "grad_norm": 0.2993851603566439, "learning_rate": 1.2077008300313915e-05, "loss": 0.0184, "step": 32305 }, { "epoch": 15.069962686567164, "grad_norm": 0.29026431380456147, "learning_rate": 1.2070692348678776e-05, "loss": 0.0184, "step": 32310 }, { "epoch": 15.072294776119403, "grad_norm": 0.3207771996633388, "learning_rate": 1.2064378691233851e-05, "loss": 0.0181, "step": 32315 }, { "epoch": 15.074626865671641, "grad_norm": 0.35646651325412543, "learning_rate": 1.205806732891793e-05, "loss": 0.0196, "step": 32320 }, { "epoch": 15.07695895522388, "grad_norm": 0.30717974256129477, "learning_rate": 1.205175826266943e-05, "loss": 0.0197, "step": 32325 }, { "epoch": 15.07929104477612, "grad_norm": 0.305986385534828, "learning_rate": 1.2045451493426483e-05, "loss": 0.0185, "step": 32330 }, { "epoch": 15.081623134328359, "grad_norm": 0.3516339395841, "learning_rate": 1.2039147022126815e-05, "loss": 0.0189, "step": 32335 }, { "epoch": 15.083955223880597, "grad_norm": 0.31882360604377574, "learning_rate": 1.2032844849707853e-05, "loss": 0.0183, "step": 32340 }, { "epoch": 15.086287313432836, "grad_norm": 0.31403578780610364, "learning_rate": 1.2026544977106669e-05, "loss": 0.0191, "step": 32345 }, { "epoch": 15.088619402985074, "grad_norm": 0.315123274487562, "learning_rate": 1.202024740525999e-05, "loss": 0.0187, "step": 32350 }, { "epoch": 15.090951492537313, "grad_norm": 0.3037971262592016, "learning_rate": 1.2013952135104209e-05, "loss": 0.0186, "step": 32355 }, { "epoch": 15.093283582089553, "grad_norm": 0.32041331784872146, "learning_rate": 1.2007659167575377e-05, "loss": 0.0196, "step": 32360 }, { "epoch": 15.095615671641792, "grad_norm": 0.3396663179595824, "learning_rate": 1.2001368503609179e-05, "loss": 0.0182, "step": 32365 }, { "epoch": 15.09794776119403, "grad_norm": 0.3268029210412046, "learning_rate": 1.1995080144141004e-05, "loss": 0.0193, "step": 32370 }, { "epoch": 15.100279850746269, "grad_norm": 0.32387784337228664, "learning_rate": 1.1988794090105842e-05, "loss": 0.0188, "step": 32375 }, { "epoch": 15.102611940298507, "grad_norm": 0.31122801442207026, "learning_rate": 1.1982510342438395e-05, "loss": 0.0186, "step": 32380 }, { "epoch": 15.104944029850746, "grad_norm": 0.3264667124483338, "learning_rate": 1.1976228902072981e-05, "loss": 0.0187, "step": 32385 }, { "epoch": 15.107276119402986, "grad_norm": 0.32427165780308764, "learning_rate": 1.1969949769943587e-05, "loss": 0.0188, "step": 32390 }, { "epoch": 15.109608208955224, "grad_norm": 0.32828665397139384, "learning_rate": 1.196367294698387e-05, "loss": 0.0184, "step": 32395 }, { "epoch": 15.111940298507463, "grad_norm": 0.30662809899232296, "learning_rate": 1.195739843412713e-05, "loss": 0.0198, "step": 32400 }, { "epoch": 15.114272388059701, "grad_norm": 0.31923275300329396, "learning_rate": 1.1951126232306325e-05, "loss": 0.0189, "step": 32405 }, { "epoch": 15.11660447761194, "grad_norm": 0.33494014006159273, "learning_rate": 1.1944856342454078e-05, "loss": 0.0188, "step": 32410 }, { "epoch": 15.118936567164178, "grad_norm": 0.34665713260911035, "learning_rate": 1.1938588765502643e-05, "loss": 0.0195, "step": 32415 }, { "epoch": 15.121268656716419, "grad_norm": 0.32361475434503184, "learning_rate": 1.1932323502383978e-05, "loss": 0.0191, "step": 32420 }, { "epoch": 15.123600746268657, "grad_norm": 0.3275160704300656, "learning_rate": 1.1926060554029636e-05, "loss": 0.019, "step": 32425 }, { "epoch": 15.125932835820896, "grad_norm": 0.34323733325280403, "learning_rate": 1.1919799921370888e-05, "loss": 0.0187, "step": 32430 }, { "epoch": 15.128264925373134, "grad_norm": 0.3255303066290362, "learning_rate": 1.1913541605338605e-05, "loss": 0.0189, "step": 32435 }, { "epoch": 15.130597014925373, "grad_norm": 0.33745105967194555, "learning_rate": 1.1907285606863351e-05, "loss": 0.0188, "step": 32440 }, { "epoch": 15.132929104477611, "grad_norm": 0.3199590750932407, "learning_rate": 1.190103192687533e-05, "loss": 0.0188, "step": 32445 }, { "epoch": 15.135261194029852, "grad_norm": 0.3289937391142411, "learning_rate": 1.1894780566304406e-05, "loss": 0.0186, "step": 32450 }, { "epoch": 15.13759328358209, "grad_norm": 0.3155491672065699, "learning_rate": 1.1888531526080095e-05, "loss": 0.0186, "step": 32455 }, { "epoch": 15.139925373134329, "grad_norm": 0.33203800035066977, "learning_rate": 1.1882284807131576e-05, "loss": 0.0185, "step": 32460 }, { "epoch": 15.142257462686567, "grad_norm": 0.3168160603410372, "learning_rate": 1.1876040410387653e-05, "loss": 0.0191, "step": 32465 }, { "epoch": 15.144589552238806, "grad_norm": 0.3153914841346115, "learning_rate": 1.1869798336776845e-05, "loss": 0.0191, "step": 32470 }, { "epoch": 15.146921641791044, "grad_norm": 0.31275677669882274, "learning_rate": 1.1863558587227256e-05, "loss": 0.0192, "step": 32475 }, { "epoch": 15.149253731343283, "grad_norm": 0.321138048145737, "learning_rate": 1.1857321162666692e-05, "loss": 0.0188, "step": 32480 }, { "epoch": 15.151585820895523, "grad_norm": 0.3524663911758957, "learning_rate": 1.1851086064022596e-05, "loss": 0.0187, "step": 32485 }, { "epoch": 15.153917910447761, "grad_norm": 0.3196819216972694, "learning_rate": 1.1844853292222066e-05, "loss": 0.0179, "step": 32490 }, { "epoch": 15.15625, "grad_norm": 0.32429100227566177, "learning_rate": 1.1838622848191857e-05, "loss": 0.0187, "step": 32495 }, { "epoch": 15.158582089552239, "grad_norm": 0.311356861779091, "learning_rate": 1.1832394732858377e-05, "loss": 0.0191, "step": 32500 }, { "epoch": 15.160914179104477, "grad_norm": 0.3221084096932885, "learning_rate": 1.1826168947147693e-05, "loss": 0.0185, "step": 32505 }, { "epoch": 15.163246268656717, "grad_norm": 0.3011168222642033, "learning_rate": 1.1819945491985504e-05, "loss": 0.0184, "step": 32510 }, { "epoch": 15.165578358208956, "grad_norm": 0.3243035569974553, "learning_rate": 1.1813724368297187e-05, "loss": 0.0199, "step": 32515 }, { "epoch": 15.167910447761194, "grad_norm": 0.32864743057276674, "learning_rate": 1.1807505577007765e-05, "loss": 0.0189, "step": 32520 }, { "epoch": 15.170242537313433, "grad_norm": 0.3239717330145324, "learning_rate": 1.1801289119041909e-05, "loss": 0.0181, "step": 32525 }, { "epoch": 15.172574626865671, "grad_norm": 0.32252046833238124, "learning_rate": 1.179507499532395e-05, "loss": 0.0186, "step": 32530 }, { "epoch": 15.17490671641791, "grad_norm": 0.3295265204787511, "learning_rate": 1.1788863206777875e-05, "loss": 0.0184, "step": 32535 }, { "epoch": 15.177238805970148, "grad_norm": 0.33267091721467074, "learning_rate": 1.1782653754327295e-05, "loss": 0.0194, "step": 32540 }, { "epoch": 15.179570895522389, "grad_norm": 0.3474083830345143, "learning_rate": 1.1776446638895523e-05, "loss": 0.0193, "step": 32545 }, { "epoch": 15.181902985074627, "grad_norm": 0.33206641651723745, "learning_rate": 1.1770241861405475e-05, "loss": 0.0186, "step": 32550 }, { "epoch": 15.184235074626866, "grad_norm": 0.3369554207424315, "learning_rate": 1.1764039422779765e-05, "loss": 0.0191, "step": 32555 }, { "epoch": 15.186567164179104, "grad_norm": 0.3256104851513778, "learning_rate": 1.1757839323940616e-05, "loss": 0.0193, "step": 32560 }, { "epoch": 15.188899253731343, "grad_norm": 0.33543679128235254, "learning_rate": 1.1751641565809931e-05, "loss": 0.0197, "step": 32565 }, { "epoch": 15.191231343283581, "grad_norm": 0.33010564047464597, "learning_rate": 1.1745446149309257e-05, "loss": 0.0198, "step": 32570 }, { "epoch": 15.193563432835822, "grad_norm": 0.3171341698045886, "learning_rate": 1.1739253075359796e-05, "loss": 0.0192, "step": 32575 }, { "epoch": 15.19589552238806, "grad_norm": 0.342498861907379, "learning_rate": 1.1733062344882396e-05, "loss": 0.0197, "step": 32580 }, { "epoch": 15.198227611940299, "grad_norm": 0.325367107870333, "learning_rate": 1.1726873958797565e-05, "loss": 0.0181, "step": 32585 }, { "epoch": 15.200559701492537, "grad_norm": 0.3376881154740737, "learning_rate": 1.1720687918025434e-05, "loss": 0.0187, "step": 32590 }, { "epoch": 15.202891791044776, "grad_norm": 0.32050562642064445, "learning_rate": 1.1714504223485844e-05, "loss": 0.0178, "step": 32595 }, { "epoch": 15.205223880597014, "grad_norm": 0.3245545115079226, "learning_rate": 1.1708322876098215e-05, "loss": 0.0193, "step": 32600 }, { "epoch": 15.207555970149254, "grad_norm": 0.30425256190123323, "learning_rate": 1.1702143876781687e-05, "loss": 0.0191, "step": 32605 }, { "epoch": 15.209888059701493, "grad_norm": 0.3298418339316331, "learning_rate": 1.1695967226454996e-05, "loss": 0.0193, "step": 32610 }, { "epoch": 15.212220149253731, "grad_norm": 0.3320723166749462, "learning_rate": 1.1689792926036555e-05, "loss": 0.0186, "step": 32615 }, { "epoch": 15.21455223880597, "grad_norm": 0.3422500819309081, "learning_rate": 1.1683620976444426e-05, "loss": 0.0182, "step": 32620 }, { "epoch": 15.216884328358208, "grad_norm": 0.3401535881372086, "learning_rate": 1.1677451378596318e-05, "loss": 0.0201, "step": 32625 }, { "epoch": 15.219216417910447, "grad_norm": 0.3338413497669365, "learning_rate": 1.1671284133409592e-05, "loss": 0.0188, "step": 32630 }, { "epoch": 15.221548507462687, "grad_norm": 0.33040707554458265, "learning_rate": 1.1665119241801257e-05, "loss": 0.0188, "step": 32635 }, { "epoch": 15.223880597014926, "grad_norm": 0.34126335077690545, "learning_rate": 1.1658956704687974e-05, "loss": 0.019, "step": 32640 }, { "epoch": 15.226212686567164, "grad_norm": 0.3557167988691129, "learning_rate": 1.165279652298606e-05, "loss": 0.0197, "step": 32645 }, { "epoch": 15.228544776119403, "grad_norm": 0.33368835777836314, "learning_rate": 1.1646638697611462e-05, "loss": 0.0193, "step": 32650 }, { "epoch": 15.230876865671641, "grad_norm": 0.352969845128157, "learning_rate": 1.1640483229479794e-05, "loss": 0.0205, "step": 32655 }, { "epoch": 15.23320895522388, "grad_norm": 0.3482488658380842, "learning_rate": 1.1634330119506317e-05, "loss": 0.0199, "step": 32660 }, { "epoch": 15.23554104477612, "grad_norm": 0.34009126703105874, "learning_rate": 1.162817936860594e-05, "loss": 0.0195, "step": 32665 }, { "epoch": 15.237873134328359, "grad_norm": 0.3147388680038501, "learning_rate": 1.1622030977693221e-05, "loss": 0.0181, "step": 32670 }, { "epoch": 15.240205223880597, "grad_norm": 0.3618810623331399, "learning_rate": 1.1615884947682364e-05, "loss": 0.0195, "step": 32675 }, { "epoch": 15.242537313432836, "grad_norm": 0.3328848475880467, "learning_rate": 1.1609741279487236e-05, "loss": 0.02, "step": 32680 }, { "epoch": 15.244869402985074, "grad_norm": 0.33332479295588463, "learning_rate": 1.1603599974021317e-05, "loss": 0.0192, "step": 32685 }, { "epoch": 15.247201492537313, "grad_norm": 0.33071485793909633, "learning_rate": 1.1597461032197788e-05, "loss": 0.02, "step": 32690 }, { "epoch": 15.249533582089553, "grad_norm": 0.3352149023479188, "learning_rate": 1.1591324454929433e-05, "loss": 0.0199, "step": 32695 }, { "epoch": 15.251865671641792, "grad_norm": 0.3268639866014777, "learning_rate": 1.1585190243128707e-05, "loss": 0.0205, "step": 32700 }, { "epoch": 15.25419776119403, "grad_norm": 0.3129890320695087, "learning_rate": 1.1579058397707707e-05, "loss": 0.0194, "step": 32705 }, { "epoch": 15.256529850746269, "grad_norm": 0.33933574574611225, "learning_rate": 1.1572928919578186e-05, "loss": 0.0191, "step": 32710 }, { "epoch": 15.258861940298507, "grad_norm": 0.3408901453339348, "learning_rate": 1.1566801809651532e-05, "loss": 0.0185, "step": 32715 }, { "epoch": 15.261194029850746, "grad_norm": 0.3666492069120472, "learning_rate": 1.15606770688388e-05, "loss": 0.0197, "step": 32720 }, { "epoch": 15.263526119402986, "grad_norm": 0.3417075640439385, "learning_rate": 1.1554554698050652e-05, "loss": 0.0188, "step": 32725 }, { "epoch": 15.265858208955224, "grad_norm": 0.3400123320880155, "learning_rate": 1.154843469819746e-05, "loss": 0.0191, "step": 32730 }, { "epoch": 15.268190298507463, "grad_norm": 0.34026613132574374, "learning_rate": 1.1542317070189181e-05, "loss": 0.0194, "step": 32735 }, { "epoch": 15.270522388059701, "grad_norm": 0.3489337678219386, "learning_rate": 1.1536201814935473e-05, "loss": 0.0204, "step": 32740 }, { "epoch": 15.27285447761194, "grad_norm": 0.34611774084186625, "learning_rate": 1.1530088933345595e-05, "loss": 0.0192, "step": 32745 }, { "epoch": 15.275186567164178, "grad_norm": 0.3321567675591594, "learning_rate": 1.152397842632848e-05, "loss": 0.0197, "step": 32750 }, { "epoch": 15.277518656716419, "grad_norm": 0.3235319419859057, "learning_rate": 1.1517870294792709e-05, "loss": 0.0187, "step": 32755 }, { "epoch": 15.279850746268657, "grad_norm": 0.3413597294200426, "learning_rate": 1.1511764539646494e-05, "loss": 0.0198, "step": 32760 }, { "epoch": 15.282182835820896, "grad_norm": 0.35153215845150276, "learning_rate": 1.1505661161797707e-05, "loss": 0.0196, "step": 32765 }, { "epoch": 15.284514925373134, "grad_norm": 0.3509050679878391, "learning_rate": 1.1499560162153866e-05, "loss": 0.0193, "step": 32770 }, { "epoch": 15.286847014925373, "grad_norm": 0.31022387364354675, "learning_rate": 1.1493461541622114e-05, "loss": 0.0186, "step": 32775 }, { "epoch": 15.289179104477611, "grad_norm": 0.33677418790036384, "learning_rate": 1.1487365301109281e-05, "loss": 0.0201, "step": 32780 }, { "epoch": 15.291511194029852, "grad_norm": 0.33661509759306074, "learning_rate": 1.1481271441521796e-05, "loss": 0.0193, "step": 32785 }, { "epoch": 15.29384328358209, "grad_norm": 0.3379216434443036, "learning_rate": 1.147517996376578e-05, "loss": 0.02, "step": 32790 }, { "epoch": 15.296175373134329, "grad_norm": 0.32331901209074226, "learning_rate": 1.1469090868746961e-05, "loss": 0.0198, "step": 32795 }, { "epoch": 15.298507462686567, "grad_norm": 0.3808330277209503, "learning_rate": 1.1463004157370735e-05, "loss": 0.0204, "step": 32800 }, { "epoch": 15.300839552238806, "grad_norm": 0.36150311215879866, "learning_rate": 1.1456919830542138e-05, "loss": 0.0198, "step": 32805 }, { "epoch": 15.303171641791044, "grad_norm": 0.3558836858096253, "learning_rate": 1.1450837889165852e-05, "loss": 0.0201, "step": 32810 }, { "epoch": 15.305503731343283, "grad_norm": 0.3500549768984742, "learning_rate": 1.14447583341462e-05, "loss": 0.0197, "step": 32815 }, { "epoch": 15.307835820895523, "grad_norm": 0.35077878457664746, "learning_rate": 1.1438681166387162e-05, "loss": 0.0198, "step": 32820 }, { "epoch": 15.310167910447761, "grad_norm": 0.34954905961951294, "learning_rate": 1.1432606386792341e-05, "loss": 0.0197, "step": 32825 }, { "epoch": 15.3125, "grad_norm": 0.33094855798949074, "learning_rate": 1.1426533996265008e-05, "loss": 0.0201, "step": 32830 }, { "epoch": 15.314832089552239, "grad_norm": 0.34777824546752634, "learning_rate": 1.1420463995708064e-05, "loss": 0.0192, "step": 32835 }, { "epoch": 15.317164179104477, "grad_norm": 0.348454248282685, "learning_rate": 1.1414396386024064e-05, "loss": 0.0201, "step": 32840 }, { "epoch": 15.319496268656717, "grad_norm": 0.3318390256654701, "learning_rate": 1.1408331168115203e-05, "loss": 0.0202, "step": 32845 }, { "epoch": 15.321828358208956, "grad_norm": 0.32716858792672104, "learning_rate": 1.140226834288332e-05, "loss": 0.0196, "step": 32850 }, { "epoch": 15.324160447761194, "grad_norm": 0.3569582351802427, "learning_rate": 1.1396207911229903e-05, "loss": 0.0198, "step": 32855 }, { "epoch": 15.326492537313433, "grad_norm": 0.34587774232872237, "learning_rate": 1.1390149874056065e-05, "loss": 0.02, "step": 32860 }, { "epoch": 15.328824626865671, "grad_norm": 0.35806201831354006, "learning_rate": 1.1384094232262602e-05, "loss": 0.0204, "step": 32865 }, { "epoch": 15.33115671641791, "grad_norm": 0.3334418043330391, "learning_rate": 1.1378040986749912e-05, "loss": 0.0194, "step": 32870 }, { "epoch": 15.333488805970148, "grad_norm": 0.33479294678017146, "learning_rate": 1.137199013841806e-05, "loss": 0.0203, "step": 32875 }, { "epoch": 15.335820895522389, "grad_norm": 0.34279865746727856, "learning_rate": 1.1365941688166747e-05, "loss": 0.0203, "step": 32880 }, { "epoch": 15.338152985074627, "grad_norm": 0.35502747616506586, "learning_rate": 1.1359895636895324e-05, "loss": 0.0205, "step": 32885 }, { "epoch": 15.340485074626866, "grad_norm": 0.335729645064579, "learning_rate": 1.1353851985502777e-05, "loss": 0.0197, "step": 32890 }, { "epoch": 15.342817164179104, "grad_norm": 0.32970503263802226, "learning_rate": 1.1347810734887747e-05, "loss": 0.0202, "step": 32895 }, { "epoch": 15.345149253731343, "grad_norm": 0.35568704704102677, "learning_rate": 1.134177188594849e-05, "loss": 0.0192, "step": 32900 }, { "epoch": 15.347481343283581, "grad_norm": 0.3362441482718854, "learning_rate": 1.1335735439582952e-05, "loss": 0.0194, "step": 32905 }, { "epoch": 15.349813432835822, "grad_norm": 0.358578980936505, "learning_rate": 1.1329701396688669e-05, "loss": 0.0206, "step": 32910 }, { "epoch": 15.35214552238806, "grad_norm": 0.35639650818639396, "learning_rate": 1.132366975816287e-05, "loss": 0.0206, "step": 32915 }, { "epoch": 15.354477611940299, "grad_norm": 0.3579688752666215, "learning_rate": 1.1317640524902383e-05, "loss": 0.0198, "step": 32920 }, { "epoch": 15.356809701492537, "grad_norm": 0.36677886720651653, "learning_rate": 1.1311613697803703e-05, "loss": 0.0196, "step": 32925 }, { "epoch": 15.359141791044776, "grad_norm": 0.3622897912127011, "learning_rate": 1.1305589277762965e-05, "loss": 0.0202, "step": 32930 }, { "epoch": 15.361473880597014, "grad_norm": 0.35884534800746454, "learning_rate": 1.1299567265675939e-05, "loss": 0.0207, "step": 32935 }, { "epoch": 15.363805970149254, "grad_norm": 0.35193311787460607, "learning_rate": 1.129354766243804e-05, "loss": 0.0201, "step": 32940 }, { "epoch": 15.366138059701493, "grad_norm": 0.3477020663445345, "learning_rate": 1.1287530468944332e-05, "loss": 0.0202, "step": 32945 }, { "epoch": 15.368470149253731, "grad_norm": 0.3495921502666025, "learning_rate": 1.1281515686089497e-05, "loss": 0.0202, "step": 32950 }, { "epoch": 15.37080223880597, "grad_norm": 0.34804964602258387, "learning_rate": 1.1275503314767901e-05, "loss": 0.0202, "step": 32955 }, { "epoch": 15.373134328358208, "grad_norm": 0.3381107408792386, "learning_rate": 1.1269493355873498e-05, "loss": 0.0201, "step": 32960 }, { "epoch": 15.375466417910447, "grad_norm": 0.36038971700809774, "learning_rate": 1.126348581029994e-05, "loss": 0.0195, "step": 32965 }, { "epoch": 15.377798507462687, "grad_norm": 0.3645371950817923, "learning_rate": 1.1257480678940469e-05, "loss": 0.0209, "step": 32970 }, { "epoch": 15.380130597014926, "grad_norm": 0.3462272647557636, "learning_rate": 1.1251477962687998e-05, "loss": 0.0199, "step": 32975 }, { "epoch": 15.382462686567164, "grad_norm": 0.34329344636111614, "learning_rate": 1.1245477662435076e-05, "loss": 0.0205, "step": 32980 }, { "epoch": 15.384794776119403, "grad_norm": 0.35324201238587555, "learning_rate": 1.1239479779073885e-05, "loss": 0.0192, "step": 32985 }, { "epoch": 15.387126865671641, "grad_norm": 0.35474782913604025, "learning_rate": 1.123348431349626e-05, "loss": 0.0198, "step": 32990 }, { "epoch": 15.38945895522388, "grad_norm": 0.3586336838477049, "learning_rate": 1.1227491266593668e-05, "loss": 0.0204, "step": 32995 }, { "epoch": 15.39179104477612, "grad_norm": 0.3529680563864717, "learning_rate": 1.1221500639257204e-05, "loss": 0.0197, "step": 33000 }, { "epoch": 15.394123134328359, "grad_norm": 0.35368640868007806, "learning_rate": 1.121551243237764e-05, "loss": 0.0203, "step": 33005 }, { "epoch": 15.396455223880597, "grad_norm": 0.3628275346830205, "learning_rate": 1.1209526646845346e-05, "loss": 0.0202, "step": 33010 }, { "epoch": 15.398787313432836, "grad_norm": 0.3534570477523497, "learning_rate": 1.1203543283550355e-05, "loss": 0.0197, "step": 33015 }, { "epoch": 15.401119402985074, "grad_norm": 0.3756092340196612, "learning_rate": 1.1197562343382341e-05, "loss": 0.0211, "step": 33020 }, { "epoch": 15.403451492537313, "grad_norm": 0.36097868399415883, "learning_rate": 1.119158382723061e-05, "loss": 0.0212, "step": 33025 }, { "epoch": 15.405783582089553, "grad_norm": 0.34095682785056863, "learning_rate": 1.118560773598411e-05, "loss": 0.0205, "step": 33030 }, { "epoch": 15.408115671641792, "grad_norm": 0.35907004814502547, "learning_rate": 1.1179634070531427e-05, "loss": 0.0207, "step": 33035 }, { "epoch": 15.41044776119403, "grad_norm": 0.3488398569889731, "learning_rate": 1.1173662831760798e-05, "loss": 0.0198, "step": 33040 }, { "epoch": 15.412779850746269, "grad_norm": 0.3274923211830803, "learning_rate": 1.1167694020560071e-05, "loss": 0.0197, "step": 33045 }, { "epoch": 15.415111940298507, "grad_norm": 0.3366102138206132, "learning_rate": 1.1161727637816762e-05, "loss": 0.0199, "step": 33050 }, { "epoch": 15.417444029850746, "grad_norm": 0.3487679313566711, "learning_rate": 1.1155763684418013e-05, "loss": 0.0208, "step": 33055 }, { "epoch": 15.419776119402986, "grad_norm": 0.3620905357426344, "learning_rate": 1.1149802161250607e-05, "loss": 0.0202, "step": 33060 }, { "epoch": 15.422108208955224, "grad_norm": 0.34672158621560556, "learning_rate": 1.1143843069200965e-05, "loss": 0.0205, "step": 33065 }, { "epoch": 15.424440298507463, "grad_norm": 0.3561158339646823, "learning_rate": 1.1137886409155158e-05, "loss": 0.0204, "step": 33070 }, { "epoch": 15.426772388059701, "grad_norm": 0.3328099459182572, "learning_rate": 1.1131932181998856e-05, "loss": 0.0207, "step": 33075 }, { "epoch": 15.42910447761194, "grad_norm": 0.3391192328342057, "learning_rate": 1.1125980388617425e-05, "loss": 0.0207, "step": 33080 }, { "epoch": 15.431436567164178, "grad_norm": 0.3568106866443787, "learning_rate": 1.1120031029895816e-05, "loss": 0.0216, "step": 33085 }, { "epoch": 15.433768656716419, "grad_norm": 0.37072849789695933, "learning_rate": 1.1114084106718667e-05, "loss": 0.02, "step": 33090 }, { "epoch": 15.436100746268657, "grad_norm": 0.3463052595651047, "learning_rate": 1.1108139619970207e-05, "loss": 0.02, "step": 33095 }, { "epoch": 15.438432835820896, "grad_norm": 0.36101459195654706, "learning_rate": 1.1102197570534334e-05, "loss": 0.0205, "step": 33100 }, { "epoch": 15.440764925373134, "grad_norm": 0.3533201206063105, "learning_rate": 1.1096257959294572e-05, "loss": 0.0204, "step": 33105 }, { "epoch": 15.443097014925373, "grad_norm": 0.33948654345075446, "learning_rate": 1.1090320787134085e-05, "loss": 0.0203, "step": 33110 }, { "epoch": 15.445429104477611, "grad_norm": 0.33207883810482225, "learning_rate": 1.1084386054935669e-05, "loss": 0.0205, "step": 33115 }, { "epoch": 15.447761194029852, "grad_norm": 0.35826726088723637, "learning_rate": 1.1078453763581776e-05, "loss": 0.0196, "step": 33120 }, { "epoch": 15.45009328358209, "grad_norm": 0.35289168104349067, "learning_rate": 1.1072523913954455e-05, "loss": 0.0202, "step": 33125 }, { "epoch": 15.452425373134329, "grad_norm": 0.3647846690049095, "learning_rate": 1.1066596506935447e-05, "loss": 0.0208, "step": 33130 }, { "epoch": 15.454757462686567, "grad_norm": 0.3626216632076173, "learning_rate": 1.1060671543406074e-05, "loss": 0.0198, "step": 33135 }, { "epoch": 15.457089552238806, "grad_norm": 0.3431430042592521, "learning_rate": 1.1054749024247348e-05, "loss": 0.0198, "step": 33140 }, { "epoch": 15.459421641791044, "grad_norm": 0.3684858481277614, "learning_rate": 1.1048828950339867e-05, "loss": 0.0211, "step": 33145 }, { "epoch": 15.461753731343283, "grad_norm": 0.34745062047792086, "learning_rate": 1.1042911322563903e-05, "loss": 0.0205, "step": 33150 }, { "epoch": 15.464085820895523, "grad_norm": 0.3599640822383092, "learning_rate": 1.1036996141799347e-05, "loss": 0.0206, "step": 33155 }, { "epoch": 15.466417910447761, "grad_norm": 0.35729117828158896, "learning_rate": 1.103108340892573e-05, "loss": 0.0202, "step": 33160 }, { "epoch": 15.46875, "grad_norm": 0.34501817057503015, "learning_rate": 1.1025173124822213e-05, "loss": 0.0201, "step": 33165 }, { "epoch": 15.471082089552239, "grad_norm": 0.36748344908540653, "learning_rate": 1.1019265290367616e-05, "loss": 0.0206, "step": 33170 }, { "epoch": 15.473414179104477, "grad_norm": 0.3745302262080638, "learning_rate": 1.1013359906440353e-05, "loss": 0.0198, "step": 33175 }, { "epoch": 15.475746268656717, "grad_norm": 0.35367237465480095, "learning_rate": 1.100745697391852e-05, "loss": 0.0211, "step": 33180 }, { "epoch": 15.478078358208956, "grad_norm": 0.37757697725529327, "learning_rate": 1.1001556493679812e-05, "loss": 0.0204, "step": 33185 }, { "epoch": 15.480410447761194, "grad_norm": 0.36122659488968417, "learning_rate": 1.099565846660158e-05, "loss": 0.021, "step": 33190 }, { "epoch": 15.482742537313433, "grad_norm": 0.34771390477925307, "learning_rate": 1.0989762893560798e-05, "loss": 0.0211, "step": 33195 }, { "epoch": 15.485074626865671, "grad_norm": 0.3779441158656077, "learning_rate": 1.0983869775434091e-05, "loss": 0.021, "step": 33200 }, { "epoch": 15.48740671641791, "grad_norm": 0.3635475001588831, "learning_rate": 1.0977979113097702e-05, "loss": 0.0208, "step": 33205 }, { "epoch": 15.489738805970148, "grad_norm": 0.3711783899365261, "learning_rate": 1.097209090742752e-05, "loss": 0.0211, "step": 33210 }, { "epoch": 15.492070895522389, "grad_norm": 0.3596854951670339, "learning_rate": 1.096620515929907e-05, "loss": 0.0211, "step": 33215 }, { "epoch": 15.494402985074627, "grad_norm": 0.3570347883661954, "learning_rate": 1.096032186958749e-05, "loss": 0.0206, "step": 33220 }, { "epoch": 15.496735074626866, "grad_norm": 0.34625747038338156, "learning_rate": 1.095444103916758e-05, "loss": 0.0207, "step": 33225 }, { "epoch": 15.499067164179104, "grad_norm": 0.38037616633394855, "learning_rate": 1.0948562668913763e-05, "loss": 0.021, "step": 33230 }, { "epoch": 15.501399253731343, "grad_norm": 0.34833846505699234, "learning_rate": 1.0942686759700092e-05, "loss": 0.0213, "step": 33235 }, { "epoch": 15.503731343283581, "grad_norm": 0.3542007001920025, "learning_rate": 1.0936813312400263e-05, "loss": 0.0204, "step": 33240 }, { "epoch": 15.506063432835822, "grad_norm": 0.36721096521411084, "learning_rate": 1.0930942327887605e-05, "loss": 0.0209, "step": 33245 }, { "epoch": 15.50839552238806, "grad_norm": 0.38079376982667873, "learning_rate": 1.092507380703506e-05, "loss": 0.0204, "step": 33250 }, { "epoch": 15.510727611940299, "grad_norm": 0.37444054477697986, "learning_rate": 1.0919207750715243e-05, "loss": 0.0209, "step": 33255 }, { "epoch": 15.513059701492537, "grad_norm": 0.3642644677955907, "learning_rate": 1.091334415980036e-05, "loss": 0.0204, "step": 33260 }, { "epoch": 15.515391791044776, "grad_norm": 0.3536735817796114, "learning_rate": 1.0907483035162291e-05, "loss": 0.0207, "step": 33265 }, { "epoch": 15.517723880597014, "grad_norm": 0.3810585197078366, "learning_rate": 1.0901624377672513e-05, "loss": 0.0212, "step": 33270 }, { "epoch": 15.520055970149254, "grad_norm": 0.3697076601627023, "learning_rate": 1.0895768188202158e-05, "loss": 0.0213, "step": 33275 }, { "epoch": 15.522388059701493, "grad_norm": 0.36537628820387535, "learning_rate": 1.0889914467621986e-05, "loss": 0.0209, "step": 33280 }, { "epoch": 15.524720149253731, "grad_norm": 0.3645100240082292, "learning_rate": 1.0884063216802388e-05, "loss": 0.0209, "step": 33285 }, { "epoch": 15.52705223880597, "grad_norm": 0.37125824339540997, "learning_rate": 1.0878214436613387e-05, "loss": 0.0215, "step": 33290 }, { "epoch": 15.529384328358208, "grad_norm": 0.37154663306351704, "learning_rate": 1.0872368127924654e-05, "loss": 0.0206, "step": 33295 }, { "epoch": 15.531716417910447, "grad_norm": 0.36209760900576127, "learning_rate": 1.0866524291605452e-05, "loss": 0.0208, "step": 33300 }, { "epoch": 15.534048507462687, "grad_norm": 0.3480443424428528, "learning_rate": 1.0860682928524732e-05, "loss": 0.0213, "step": 33305 }, { "epoch": 15.536380597014926, "grad_norm": 0.36886127788734513, "learning_rate": 1.0854844039551023e-05, "loss": 0.0204, "step": 33310 }, { "epoch": 15.538712686567164, "grad_norm": 0.3621738507744495, "learning_rate": 1.0849007625552539e-05, "loss": 0.0214, "step": 33315 }, { "epoch": 15.541044776119403, "grad_norm": 0.37366631115406546, "learning_rate": 1.0843173687397079e-05, "loss": 0.0213, "step": 33320 }, { "epoch": 15.543376865671641, "grad_norm": 0.34951856826304484, "learning_rate": 1.0837342225952097e-05, "loss": 0.021, "step": 33325 }, { "epoch": 15.54570895522388, "grad_norm": 0.3643272961234599, "learning_rate": 1.0831513242084681e-05, "loss": 0.0206, "step": 33330 }, { "epoch": 15.54804104477612, "grad_norm": 0.3679344311987629, "learning_rate": 1.0825686736661541e-05, "loss": 0.0212, "step": 33335 }, { "epoch": 15.550373134328359, "grad_norm": 0.37972318427419427, "learning_rate": 1.0819862710549025e-05, "loss": 0.0214, "step": 33340 }, { "epoch": 15.552705223880597, "grad_norm": 0.3690561546556947, "learning_rate": 1.0814041164613107e-05, "loss": 0.0208, "step": 33345 }, { "epoch": 15.555037313432836, "grad_norm": 0.3817212891679794, "learning_rate": 1.0808222099719396e-05, "loss": 0.0211, "step": 33350 }, { "epoch": 15.557369402985074, "grad_norm": 0.3697015192049541, "learning_rate": 1.0802405516733138e-05, "loss": 0.0213, "step": 33355 }, { "epoch": 15.559701492537313, "grad_norm": 0.3699696463837916, "learning_rate": 1.0796591416519192e-05, "loss": 0.0218, "step": 33360 }, { "epoch": 15.562033582089553, "grad_norm": 0.3762550678836597, "learning_rate": 1.0790779799942063e-05, "loss": 0.0212, "step": 33365 }, { "epoch": 15.564365671641792, "grad_norm": 0.38859198140000983, "learning_rate": 1.0784970667865882e-05, "loss": 0.0212, "step": 33370 }, { "epoch": 15.56669776119403, "grad_norm": 0.37544314246901483, "learning_rate": 1.0779164021154417e-05, "loss": 0.0213, "step": 33375 }, { "epoch": 15.569029850746269, "grad_norm": 0.36782993625829014, "learning_rate": 1.0773359860671054e-05, "loss": 0.0217, "step": 33380 }, { "epoch": 15.571361940298507, "grad_norm": 0.36732982357537697, "learning_rate": 1.0767558187278817e-05, "loss": 0.0212, "step": 33385 }, { "epoch": 15.573694029850746, "grad_norm": 0.37678683935671126, "learning_rate": 1.0761759001840371e-05, "loss": 0.0215, "step": 33390 }, { "epoch": 15.576026119402986, "grad_norm": 0.3772244528523237, "learning_rate": 1.0755962305217973e-05, "loss": 0.0214, "step": 33395 }, { "epoch": 15.578358208955224, "grad_norm": 0.3683985961581661, "learning_rate": 1.0750168098273569e-05, "loss": 0.0213, "step": 33400 }, { "epoch": 15.580690298507463, "grad_norm": 0.3858355821486702, "learning_rate": 1.074437638186868e-05, "loss": 0.0211, "step": 33405 }, { "epoch": 15.583022388059701, "grad_norm": 0.35453703334862446, "learning_rate": 1.073858715686448e-05, "loss": 0.0214, "step": 33410 }, { "epoch": 15.58535447761194, "grad_norm": 0.36935378202129737, "learning_rate": 1.0732800424121779e-05, "loss": 0.0213, "step": 33415 }, { "epoch": 15.587686567164178, "grad_norm": 0.3472632600003096, "learning_rate": 1.0727016184501e-05, "loss": 0.0204, "step": 33420 }, { "epoch": 15.590018656716419, "grad_norm": 0.35830699570645425, "learning_rate": 1.0721234438862213e-05, "loss": 0.0214, "step": 33425 }, { "epoch": 15.592350746268657, "grad_norm": 0.37585772047464483, "learning_rate": 1.0715455188065112e-05, "loss": 0.0221, "step": 33430 }, { "epoch": 15.594682835820896, "grad_norm": 0.37363086764014014, "learning_rate": 1.0709678432968995e-05, "loss": 0.0216, "step": 33435 }, { "epoch": 15.597014925373134, "grad_norm": 0.36612865518401183, "learning_rate": 1.0703904174432836e-05, "loss": 0.0212, "step": 33440 }, { "epoch": 15.599347014925373, "grad_norm": 0.3877235733864216, "learning_rate": 1.0698132413315188e-05, "loss": 0.0215, "step": 33445 }, { "epoch": 15.601679104477611, "grad_norm": 0.3987511458474937, "learning_rate": 1.069236315047428e-05, "loss": 0.0218, "step": 33450 }, { "epoch": 15.604011194029852, "grad_norm": 0.3622168388386975, "learning_rate": 1.0686596386767928e-05, "loss": 0.021, "step": 33455 }, { "epoch": 15.60634328358209, "grad_norm": 0.35762177332054956, "learning_rate": 1.0680832123053603e-05, "loss": 0.0215, "step": 33460 }, { "epoch": 15.608675373134329, "grad_norm": 0.3812088861193467, "learning_rate": 1.067507036018839e-05, "loss": 0.0222, "step": 33465 }, { "epoch": 15.611007462686567, "grad_norm": 0.35976320209583335, "learning_rate": 1.0669311099029014e-05, "loss": 0.0222, "step": 33470 }, { "epoch": 15.613339552238806, "grad_norm": 0.3881077262316989, "learning_rate": 1.066355434043182e-05, "loss": 0.0212, "step": 33475 }, { "epoch": 15.615671641791044, "grad_norm": 0.34863765602822927, "learning_rate": 1.0657800085252789e-05, "loss": 0.0215, "step": 33480 }, { "epoch": 15.618003731343283, "grad_norm": 0.3726159971754797, "learning_rate": 1.0652048334347503e-05, "loss": 0.0212, "step": 33485 }, { "epoch": 15.620335820895523, "grad_norm": 0.36297067893009455, "learning_rate": 1.064629908857122e-05, "loss": 0.0219, "step": 33490 }, { "epoch": 15.622667910447761, "grad_norm": 0.37636568316046815, "learning_rate": 1.0640552348778772e-05, "loss": 0.0215, "step": 33495 }, { "epoch": 15.625, "grad_norm": 0.3645423457475685, "learning_rate": 1.0634808115824668e-05, "loss": 0.0214, "step": 33500 }, { "epoch": 15.627332089552239, "grad_norm": 0.3708393568315061, "learning_rate": 1.0629066390563002e-05, "loss": 0.0217, "step": 33505 }, { "epoch": 15.629664179104477, "grad_norm": 0.38899043549949425, "learning_rate": 1.062332717384752e-05, "loss": 0.0222, "step": 33510 }, { "epoch": 15.631996268656717, "grad_norm": 0.38727687591179805, "learning_rate": 1.061759046653159e-05, "loss": 0.0214, "step": 33515 }, { "epoch": 15.634328358208956, "grad_norm": 0.3680998170730211, "learning_rate": 1.0611856269468203e-05, "loss": 0.0218, "step": 33520 }, { "epoch": 15.636660447761194, "grad_norm": 0.36170044678465185, "learning_rate": 1.0606124583509983e-05, "loss": 0.0209, "step": 33525 }, { "epoch": 15.638992537313433, "grad_norm": 0.3578763216552786, "learning_rate": 1.0600395409509177e-05, "loss": 0.0211, "step": 33530 }, { "epoch": 15.641324626865671, "grad_norm": 0.3780517645778011, "learning_rate": 1.0594668748317643e-05, "loss": 0.0219, "step": 33535 }, { "epoch": 15.64365671641791, "grad_norm": 0.39674659548567215, "learning_rate": 1.0588944600786907e-05, "loss": 0.022, "step": 33540 }, { "epoch": 15.645988805970148, "grad_norm": 0.3745501406443852, "learning_rate": 1.0583222967768076e-05, "loss": 0.0219, "step": 33545 }, { "epoch": 15.648320895522389, "grad_norm": 0.3654192749181249, "learning_rate": 1.0577503850111903e-05, "loss": 0.0218, "step": 33550 }, { "epoch": 15.650652985074627, "grad_norm": 0.38327056282397143, "learning_rate": 1.0571787248668774e-05, "loss": 0.0213, "step": 33555 }, { "epoch": 15.652985074626866, "grad_norm": 0.38531196716670074, "learning_rate": 1.0566073164288687e-05, "loss": 0.0207, "step": 33560 }, { "epoch": 15.655317164179104, "grad_norm": 0.3695581102854483, "learning_rate": 1.0560361597821273e-05, "loss": 0.0211, "step": 33565 }, { "epoch": 15.657649253731343, "grad_norm": 0.38152961261949375, "learning_rate": 1.0554652550115788e-05, "loss": 0.0223, "step": 33570 }, { "epoch": 15.659981343283581, "grad_norm": 0.37195117167886493, "learning_rate": 1.054894602202112e-05, "loss": 0.0228, "step": 33575 }, { "epoch": 15.662313432835822, "grad_norm": 0.36764372268163314, "learning_rate": 1.0543242014385758e-05, "loss": 0.0206, "step": 33580 }, { "epoch": 15.66464552238806, "grad_norm": 0.3595658232402743, "learning_rate": 1.0537540528057844e-05, "loss": 0.0221, "step": 33585 }, { "epoch": 15.666977611940299, "grad_norm": 0.3790924478054281, "learning_rate": 1.0531841563885134e-05, "loss": 0.0209, "step": 33590 }, { "epoch": 15.669309701492537, "grad_norm": 0.37540255994220334, "learning_rate": 1.0526145122715007e-05, "loss": 0.0209, "step": 33595 }, { "epoch": 15.671641791044776, "grad_norm": 0.3868990414831755, "learning_rate": 1.052045120539447e-05, "loss": 0.022, "step": 33600 }, { "epoch": 15.673973880597014, "grad_norm": 0.38590773931482747, "learning_rate": 1.051475981277016e-05, "loss": 0.0209, "step": 33605 }, { "epoch": 15.676305970149254, "grad_norm": 0.3866176092709366, "learning_rate": 1.050907094568832e-05, "loss": 0.0212, "step": 33610 }, { "epoch": 15.678638059701493, "grad_norm": 0.3913544746213643, "learning_rate": 1.0503384604994846e-05, "loss": 0.0212, "step": 33615 }, { "epoch": 15.680970149253731, "grad_norm": 0.3696061431157034, "learning_rate": 1.0497700791535221e-05, "loss": 0.0217, "step": 33620 }, { "epoch": 15.68330223880597, "grad_norm": 0.39102750253280016, "learning_rate": 1.04920195061546e-05, "loss": 0.0213, "step": 33625 }, { "epoch": 15.685634328358208, "grad_norm": 0.3771734029105715, "learning_rate": 1.0486340749697716e-05, "loss": 0.0208, "step": 33630 }, { "epoch": 15.687966417910447, "grad_norm": 0.37596385105064417, "learning_rate": 1.0480664523008948e-05, "loss": 0.0217, "step": 33635 }, { "epoch": 15.690298507462687, "grad_norm": 0.3578911589835671, "learning_rate": 1.0474990826932301e-05, "loss": 0.0211, "step": 33640 }, { "epoch": 15.692630597014926, "grad_norm": 0.3752951405597153, "learning_rate": 1.0469319662311403e-05, "loss": 0.022, "step": 33645 }, { "epoch": 15.694962686567164, "grad_norm": 0.37470262275104005, "learning_rate": 1.0463651029989492e-05, "loss": 0.0218, "step": 33650 }, { "epoch": 15.697294776119403, "grad_norm": 0.3589380767458744, "learning_rate": 1.0457984930809452e-05, "loss": 0.0213, "step": 33655 }, { "epoch": 15.699626865671641, "grad_norm": 0.3756410533880492, "learning_rate": 1.0452321365613758e-05, "loss": 0.0227, "step": 33660 }, { "epoch": 15.70195895522388, "grad_norm": 0.38341458959016267, "learning_rate": 1.0446660335244551e-05, "loss": 0.0222, "step": 33665 }, { "epoch": 15.70429104477612, "grad_norm": 0.366760353970963, "learning_rate": 1.0441001840543548e-05, "loss": 0.0223, "step": 33670 }, { "epoch": 15.706623134328359, "grad_norm": 0.3657993993818806, "learning_rate": 1.0435345882352144e-05, "loss": 0.0211, "step": 33675 }, { "epoch": 15.708955223880597, "grad_norm": 0.38023751140096407, "learning_rate": 1.0429692461511298e-05, "loss": 0.0216, "step": 33680 }, { "epoch": 15.711287313432836, "grad_norm": 0.3903340348934175, "learning_rate": 1.0424041578861626e-05, "loss": 0.0219, "step": 33685 }, { "epoch": 15.713619402985074, "grad_norm": 0.35276336238464684, "learning_rate": 1.041839323524337e-05, "loss": 0.0209, "step": 33690 }, { "epoch": 15.715951492537313, "grad_norm": 0.3898236961578038, "learning_rate": 1.0412747431496372e-05, "loss": 0.022, "step": 33695 }, { "epoch": 15.718283582089553, "grad_norm": 0.38244604956906864, "learning_rate": 1.0407104168460116e-05, "loss": 0.0218, "step": 33700 }, { "epoch": 15.720615671641792, "grad_norm": 0.3669861196597188, "learning_rate": 1.0401463446973708e-05, "loss": 0.0212, "step": 33705 }, { "epoch": 15.72294776119403, "grad_norm": 0.3724105779830089, "learning_rate": 1.0395825267875846e-05, "loss": 0.0223, "step": 33710 }, { "epoch": 15.725279850746269, "grad_norm": 0.35911442178038944, "learning_rate": 1.0390189632004905e-05, "loss": 0.022, "step": 33715 }, { "epoch": 15.727611940298507, "grad_norm": 0.37075438832999563, "learning_rate": 1.0384556540198825e-05, "loss": 0.0215, "step": 33720 }, { "epoch": 15.729944029850746, "grad_norm": 0.3739727755225605, "learning_rate": 1.0378925993295202e-05, "loss": 0.0221, "step": 33725 }, { "epoch": 15.732276119402986, "grad_norm": 0.3796681812722538, "learning_rate": 1.0373297992131242e-05, "loss": 0.0217, "step": 33730 }, { "epoch": 15.734608208955224, "grad_norm": 0.3746075846647282, "learning_rate": 1.0367672537543777e-05, "loss": 0.0214, "step": 33735 }, { "epoch": 15.736940298507463, "grad_norm": 0.37645278260660914, "learning_rate": 1.0362049630369259e-05, "loss": 0.0216, "step": 33740 }, { "epoch": 15.739272388059701, "grad_norm": 0.40040431882632926, "learning_rate": 1.0356429271443757e-05, "loss": 0.0235, "step": 33745 }, { "epoch": 15.74160447761194, "grad_norm": 0.3866308813480008, "learning_rate": 1.0350811461602974e-05, "loss": 0.0221, "step": 33750 }, { "epoch": 15.743936567164178, "grad_norm": 0.38912266777897, "learning_rate": 1.0345196201682212e-05, "loss": 0.0225, "step": 33755 }, { "epoch": 15.746268656716419, "grad_norm": 0.3821506031007412, "learning_rate": 1.033958349251641e-05, "loss": 0.0227, "step": 33760 }, { "epoch": 15.748600746268657, "grad_norm": 0.35071248885476297, "learning_rate": 1.0333973334940125e-05, "loss": 0.022, "step": 33765 }, { "epoch": 15.750932835820896, "grad_norm": 0.3790282753319865, "learning_rate": 1.0328365729787536e-05, "loss": 0.0214, "step": 33770 }, { "epoch": 15.753264925373134, "grad_norm": 0.36957853880067626, "learning_rate": 1.0322760677892437e-05, "loss": 0.0213, "step": 33775 }, { "epoch": 15.755597014925373, "grad_norm": 0.36937153093974057, "learning_rate": 1.0317158180088254e-05, "loss": 0.021, "step": 33780 }, { "epoch": 15.757929104477611, "grad_norm": 0.3759878493321423, "learning_rate": 1.0311558237208006e-05, "loss": 0.0217, "step": 33785 }, { "epoch": 15.760261194029852, "grad_norm": 0.37848696066613147, "learning_rate": 1.0305960850084373e-05, "loss": 0.0229, "step": 33790 }, { "epoch": 15.76259328358209, "grad_norm": 0.3723281053376561, "learning_rate": 1.030036601954961e-05, "loss": 0.0219, "step": 33795 }, { "epoch": 15.764925373134329, "grad_norm": 0.37681052567087087, "learning_rate": 1.0294773746435638e-05, "loss": 0.0225, "step": 33800 }, { "epoch": 15.767257462686567, "grad_norm": 0.37302414446211607, "learning_rate": 1.028918403157396e-05, "loss": 0.0215, "step": 33805 }, { "epoch": 15.769589552238806, "grad_norm": 0.39089478730148736, "learning_rate": 1.0283596875795718e-05, "loss": 0.0224, "step": 33810 }, { "epoch": 15.771921641791044, "grad_norm": 0.39362799599412296, "learning_rate": 1.0278012279931665e-05, "loss": 0.0219, "step": 33815 }, { "epoch": 15.774253731343283, "grad_norm": 0.36730745481062765, "learning_rate": 1.0272430244812175e-05, "loss": 0.0212, "step": 33820 }, { "epoch": 15.776585820895523, "grad_norm": 0.3957702445340285, "learning_rate": 1.0266850771267253e-05, "loss": 0.0225, "step": 33825 }, { "epoch": 15.778917910447761, "grad_norm": 0.39084639499582685, "learning_rate": 1.0261273860126514e-05, "loss": 0.0219, "step": 33830 }, { "epoch": 15.78125, "grad_norm": 0.37870499935402524, "learning_rate": 1.0255699512219166e-05, "loss": 0.0213, "step": 33835 }, { "epoch": 15.783582089552239, "grad_norm": 0.38054849540744473, "learning_rate": 1.0250127728374098e-05, "loss": 0.0226, "step": 33840 }, { "epoch": 15.785914179104477, "grad_norm": 0.38439532993611597, "learning_rate": 1.0244558509419748e-05, "loss": 0.0212, "step": 33845 }, { "epoch": 15.788246268656717, "grad_norm": 0.3795423956098173, "learning_rate": 1.023899185618423e-05, "loss": 0.0218, "step": 33850 }, { "epoch": 15.790578358208956, "grad_norm": 0.3749177844224587, "learning_rate": 1.023342776949524e-05, "loss": 0.0218, "step": 33855 }, { "epoch": 15.792910447761194, "grad_norm": 0.38603080443897414, "learning_rate": 1.0227866250180105e-05, "loss": 0.0219, "step": 33860 }, { "epoch": 15.795242537313433, "grad_norm": 0.38587972871175935, "learning_rate": 1.022230729906577e-05, "loss": 0.0227, "step": 33865 }, { "epoch": 15.797574626865671, "grad_norm": 0.3931341697923315, "learning_rate": 1.02167509169788e-05, "loss": 0.0225, "step": 33870 }, { "epoch": 15.79990671641791, "grad_norm": 0.36855648171460975, "learning_rate": 1.0211197104745373e-05, "loss": 0.0217, "step": 33875 }, { "epoch": 15.802238805970148, "grad_norm": 0.40014183536165643, "learning_rate": 1.02056458631913e-05, "loss": 0.0222, "step": 33880 }, { "epoch": 15.804570895522389, "grad_norm": 0.3743296365746255, "learning_rate": 1.020009719314197e-05, "loss": 0.0224, "step": 33885 }, { "epoch": 15.806902985074627, "grad_norm": 0.3652820310592652, "learning_rate": 1.0194551095422447e-05, "loss": 0.0229, "step": 33890 }, { "epoch": 15.809235074626866, "grad_norm": 0.35368798194791073, "learning_rate": 1.0189007570857363e-05, "loss": 0.0218, "step": 33895 }, { "epoch": 15.811567164179104, "grad_norm": 0.38667613710188287, "learning_rate": 1.0183466620270996e-05, "loss": 0.0218, "step": 33900 }, { "epoch": 15.813899253731343, "grad_norm": 0.3849687692314246, "learning_rate": 1.0177928244487225e-05, "loss": 0.0218, "step": 33905 }, { "epoch": 15.816231343283581, "grad_norm": 0.3772423300702403, "learning_rate": 1.0172392444329561e-05, "loss": 0.0222, "step": 33910 }, { "epoch": 15.818563432835822, "grad_norm": 0.3819831557194169, "learning_rate": 1.0166859220621122e-05, "loss": 0.0215, "step": 33915 }, { "epoch": 15.82089552238806, "grad_norm": 0.39158622278917526, "learning_rate": 1.0161328574184645e-05, "loss": 0.0221, "step": 33920 }, { "epoch": 15.823227611940299, "grad_norm": 0.35955536158110646, "learning_rate": 1.015580050584249e-05, "loss": 0.0216, "step": 33925 }, { "epoch": 15.825559701492537, "grad_norm": 0.38724289004644197, "learning_rate": 1.0150275016416613e-05, "loss": 0.0228, "step": 33930 }, { "epoch": 15.827891791044776, "grad_norm": 0.3697539756559524, "learning_rate": 1.0144752106728613e-05, "loss": 0.0223, "step": 33935 }, { "epoch": 15.830223880597014, "grad_norm": 0.38021055630283807, "learning_rate": 1.0139231777599689e-05, "loss": 0.0224, "step": 33940 }, { "epoch": 15.832555970149254, "grad_norm": 0.3951513360635005, "learning_rate": 1.0133714029850667e-05, "loss": 0.0226, "step": 33945 }, { "epoch": 15.834888059701493, "grad_norm": 0.39089391708433013, "learning_rate": 1.0128198864301976e-05, "loss": 0.022, "step": 33950 }, { "epoch": 15.837220149253731, "grad_norm": 0.38254337829886315, "learning_rate": 1.0122686281773674e-05, "loss": 0.022, "step": 33955 }, { "epoch": 15.83955223880597, "grad_norm": 0.3530338829470512, "learning_rate": 1.0117176283085419e-05, "loss": 0.0218, "step": 33960 }, { "epoch": 15.841884328358208, "grad_norm": 0.40437398031063987, "learning_rate": 1.0111668869056515e-05, "loss": 0.0217, "step": 33965 }, { "epoch": 15.844216417910447, "grad_norm": 0.3682623358869108, "learning_rate": 1.0106164040505835e-05, "loss": 0.0228, "step": 33970 }, { "epoch": 15.846548507462687, "grad_norm": 0.37092475496112837, "learning_rate": 1.0100661798251923e-05, "loss": 0.0222, "step": 33975 }, { "epoch": 15.848880597014926, "grad_norm": 0.39552186737931105, "learning_rate": 1.009516214311289e-05, "loss": 0.0227, "step": 33980 }, { "epoch": 15.851212686567164, "grad_norm": 0.3881001967092515, "learning_rate": 1.0089665075906485e-05, "loss": 0.0223, "step": 33985 }, { "epoch": 15.853544776119403, "grad_norm": 0.3977223088864541, "learning_rate": 1.0084170597450073e-05, "loss": 0.0218, "step": 33990 }, { "epoch": 15.855876865671641, "grad_norm": 0.4069234526396436, "learning_rate": 1.0078678708560627e-05, "loss": 0.0223, "step": 33995 }, { "epoch": 15.85820895522388, "grad_norm": 0.37051652110944944, "learning_rate": 1.0073189410054742e-05, "loss": 0.0222, "step": 34000 }, { "epoch": 15.86054104477612, "grad_norm": 0.38333459471863146, "learning_rate": 1.0067702702748627e-05, "loss": 0.0222, "step": 34005 }, { "epoch": 15.862873134328359, "grad_norm": 0.3777118806255258, "learning_rate": 1.0062218587458085e-05, "loss": 0.0215, "step": 34010 }, { "epoch": 15.865205223880597, "grad_norm": 0.3805761605292239, "learning_rate": 1.005673706499858e-05, "loss": 0.0215, "step": 34015 }, { "epoch": 15.867537313432836, "grad_norm": 0.3963893521583418, "learning_rate": 1.0051258136185132e-05, "loss": 0.0228, "step": 34020 }, { "epoch": 15.869869402985074, "grad_norm": 0.39155489534230753, "learning_rate": 1.004578180183243e-05, "loss": 0.0218, "step": 34025 }, { "epoch": 15.872201492537313, "grad_norm": 0.3705394741663114, "learning_rate": 1.0040308062754738e-05, "loss": 0.0218, "step": 34030 }, { "epoch": 15.874533582089553, "grad_norm": 0.3985833433164944, "learning_rate": 1.0034836919765953e-05, "loss": 0.0225, "step": 34035 }, { "epoch": 15.876865671641792, "grad_norm": 0.390947978002368, "learning_rate": 1.0029368373679583e-05, "loss": 0.0225, "step": 34040 }, { "epoch": 15.87919776119403, "grad_norm": 0.38254110160432453, "learning_rate": 1.002390242530874e-05, "loss": 0.0221, "step": 34045 }, { "epoch": 15.881529850746269, "grad_norm": 0.3884545666156146, "learning_rate": 1.001843907546617e-05, "loss": 0.0226, "step": 34050 }, { "epoch": 15.883861940298507, "grad_norm": 0.39482988323538565, "learning_rate": 1.0012978324964214e-05, "loss": 0.0224, "step": 34055 }, { "epoch": 15.886194029850746, "grad_norm": 0.36570890165162145, "learning_rate": 1.0007520174614836e-05, "loss": 0.0216, "step": 34060 }, { "epoch": 15.888526119402986, "grad_norm": 0.37202776201647647, "learning_rate": 1.0002064625229613e-05, "loss": 0.0218, "step": 34065 }, { "epoch": 15.890858208955224, "grad_norm": 0.38104613272525706, "learning_rate": 9.996611677619719e-06, "loss": 0.0221, "step": 34070 }, { "epoch": 15.893190298507463, "grad_norm": 0.3754508377459925, "learning_rate": 9.991161332595978e-06, "loss": 0.0216, "step": 34075 }, { "epoch": 15.895522388059701, "grad_norm": 0.3834196896457144, "learning_rate": 9.98571359096878e-06, "loss": 0.0217, "step": 34080 }, { "epoch": 15.89785447761194, "grad_norm": 0.39757846960189225, "learning_rate": 9.980268453548172e-06, "loss": 0.0225, "step": 34085 }, { "epoch": 15.900186567164178, "grad_norm": 0.3912148312067803, "learning_rate": 9.97482592114378e-06, "loss": 0.0227, "step": 34090 }, { "epoch": 15.902518656716419, "grad_norm": 0.3616135010177258, "learning_rate": 9.969385994564862e-06, "loss": 0.0222, "step": 34095 }, { "epoch": 15.904850746268657, "grad_norm": 0.385366616574128, "learning_rate": 9.96394867462028e-06, "loss": 0.0221, "step": 34100 }, { "epoch": 15.907182835820896, "grad_norm": 0.37923049622689636, "learning_rate": 9.958513962118521e-06, "loss": 0.0219, "step": 34105 }, { "epoch": 15.909514925373134, "grad_norm": 0.40417213281054926, "learning_rate": 9.953081857867665e-06, "loss": 0.0222, "step": 34110 }, { "epoch": 15.911847014925373, "grad_norm": 0.39664351879526116, "learning_rate": 9.947652362675418e-06, "loss": 0.0219, "step": 34115 }, { "epoch": 15.914179104477611, "grad_norm": 0.37593629574295334, "learning_rate": 9.94222547734909e-06, "loss": 0.0223, "step": 34120 }, { "epoch": 15.916511194029852, "grad_norm": 0.3899642081164438, "learning_rate": 9.936801202695607e-06, "loss": 0.0225, "step": 34125 }, { "epoch": 15.91884328358209, "grad_norm": 0.37499190215538053, "learning_rate": 9.93137953952151e-06, "loss": 0.0234, "step": 34130 }, { "epoch": 15.921175373134329, "grad_norm": 0.4040501302277636, "learning_rate": 9.925960488632948e-06, "loss": 0.0227, "step": 34135 }, { "epoch": 15.923507462686567, "grad_norm": 0.381077999375381, "learning_rate": 9.92054405083569e-06, "loss": 0.0217, "step": 34140 }, { "epoch": 15.925839552238806, "grad_norm": 0.39134061184293295, "learning_rate": 9.915130226935081e-06, "loss": 0.0217, "step": 34145 }, { "epoch": 15.928171641791044, "grad_norm": 0.36364285840941285, "learning_rate": 9.90971901773614e-06, "loss": 0.0226, "step": 34150 }, { "epoch": 15.930503731343283, "grad_norm": 0.3826065913940817, "learning_rate": 9.904310424043432e-06, "loss": 0.0212, "step": 34155 }, { "epoch": 15.932835820895523, "grad_norm": 0.36876202189366253, "learning_rate": 9.898904446661188e-06, "loss": 0.0219, "step": 34160 }, { "epoch": 15.935167910447761, "grad_norm": 0.3863744898567053, "learning_rate": 9.89350108639321e-06, "loss": 0.0227, "step": 34165 }, { "epoch": 15.9375, "grad_norm": 0.3917785998958983, "learning_rate": 9.888100344042926e-06, "loss": 0.022, "step": 34170 }, { "epoch": 15.939832089552239, "grad_norm": 0.39779581240750195, "learning_rate": 9.88270222041338e-06, "loss": 0.0226, "step": 34175 }, { "epoch": 15.942164179104477, "grad_norm": 0.3915582253221121, "learning_rate": 9.87730671630722e-06, "loss": 0.022, "step": 34180 }, { "epoch": 15.944496268656717, "grad_norm": 0.36694035590262886, "learning_rate": 9.871913832526702e-06, "loss": 0.0214, "step": 34185 }, { "epoch": 15.946828358208956, "grad_norm": 0.40086604326547415, "learning_rate": 9.866523569873708e-06, "loss": 0.0228, "step": 34190 }, { "epoch": 15.949160447761194, "grad_norm": 0.38931348810473154, "learning_rate": 9.861135929149695e-06, "loss": 0.023, "step": 34195 }, { "epoch": 15.951492537313433, "grad_norm": 0.3780773006414413, "learning_rate": 9.855750911155784e-06, "loss": 0.0209, "step": 34200 }, { "epoch": 15.953824626865671, "grad_norm": 0.36783367351140867, "learning_rate": 9.850368516692643e-06, "loss": 0.0224, "step": 34205 }, { "epoch": 15.95615671641791, "grad_norm": 0.4105591567888028, "learning_rate": 9.844988746560615e-06, "loss": 0.0228, "step": 34210 }, { "epoch": 15.958488805970148, "grad_norm": 0.39585414676719755, "learning_rate": 9.839611601559597e-06, "loss": 0.0223, "step": 34215 }, { "epoch": 15.960820895522389, "grad_norm": 0.3888001346624839, "learning_rate": 9.834237082489126e-06, "loss": 0.022, "step": 34220 }, { "epoch": 15.963152985074627, "grad_norm": 0.4074906820539481, "learning_rate": 9.828865190148342e-06, "loss": 0.0229, "step": 34225 }, { "epoch": 15.965485074626866, "grad_norm": 0.38781946753468804, "learning_rate": 9.823495925335995e-06, "loss": 0.0226, "step": 34230 }, { "epoch": 15.967817164179104, "grad_norm": 0.3797277535101792, "learning_rate": 9.81812928885044e-06, "loss": 0.0225, "step": 34235 }, { "epoch": 15.970149253731343, "grad_norm": 0.3787283860531042, "learning_rate": 9.812765281489655e-06, "loss": 0.0217, "step": 34240 }, { "epoch": 15.972481343283581, "grad_norm": 0.38936413317502594, "learning_rate": 9.807403904051194e-06, "loss": 0.0223, "step": 34245 }, { "epoch": 15.974813432835822, "grad_norm": 0.41392747982794026, "learning_rate": 9.802045157332269e-06, "loss": 0.0225, "step": 34250 }, { "epoch": 15.97714552238806, "grad_norm": 0.3941322953253256, "learning_rate": 9.796689042129652e-06, "loss": 0.023, "step": 34255 }, { "epoch": 15.979477611940299, "grad_norm": 0.37497110610831097, "learning_rate": 9.79133555923976e-06, "loss": 0.0217, "step": 34260 }, { "epoch": 15.981809701492537, "grad_norm": 0.365451584821678, "learning_rate": 9.785984709458602e-06, "loss": 0.0224, "step": 34265 }, { "epoch": 15.984141791044776, "grad_norm": 0.404978286326302, "learning_rate": 9.780636493581797e-06, "loss": 0.0226, "step": 34270 }, { "epoch": 15.986473880597014, "grad_norm": 0.3718317233205827, "learning_rate": 9.775290912404569e-06, "loss": 0.022, "step": 34275 }, { "epoch": 15.988805970149254, "grad_norm": 0.4015010178602953, "learning_rate": 9.76994796672176e-06, "loss": 0.0226, "step": 34280 }, { "epoch": 15.991138059701493, "grad_norm": 0.3841527849829499, "learning_rate": 9.764607657327818e-06, "loss": 0.0223, "step": 34285 }, { "epoch": 15.993470149253731, "grad_norm": 0.39089537242082617, "learning_rate": 9.759269985016786e-06, "loss": 0.0218, "step": 34290 }, { "epoch": 15.99580223880597, "grad_norm": 0.36367148572053604, "learning_rate": 9.753934950582333e-06, "loss": 0.0225, "step": 34295 }, { "epoch": 15.998134328358208, "grad_norm": 0.3858191716655979, "learning_rate": 9.748602554817721e-06, "loss": 0.0222, "step": 34300 }, { "epoch": 16.00046641791045, "grad_norm": 0.2556936653497716, "learning_rate": 9.743272798515829e-06, "loss": 0.0209, "step": 34305 }, { "epoch": 16.002798507462686, "grad_norm": 0.24598234616029432, "learning_rate": 9.737945682469145e-06, "loss": 0.0126, "step": 34310 }, { "epoch": 16.005130597014926, "grad_norm": 0.2794270030322318, "learning_rate": 9.732621207469761e-06, "loss": 0.0122, "step": 34315 }, { "epoch": 16.007462686567163, "grad_norm": 0.29722772679216924, "learning_rate": 9.72729937430936e-06, "loss": 0.0125, "step": 34320 }, { "epoch": 16.009794776119403, "grad_norm": 0.2607235804270406, "learning_rate": 9.72198018377927e-06, "loss": 0.0121, "step": 34325 }, { "epoch": 16.012126865671643, "grad_norm": 0.26211443683774144, "learning_rate": 9.716663636670375e-06, "loss": 0.012, "step": 34330 }, { "epoch": 16.01445895522388, "grad_norm": 0.2805937523733126, "learning_rate": 9.71134973377323e-06, "loss": 0.0121, "step": 34335 }, { "epoch": 16.01679104477612, "grad_norm": 0.26448055627099104, "learning_rate": 9.706038475877938e-06, "loss": 0.012, "step": 34340 }, { "epoch": 16.019123134328357, "grad_norm": 0.2514044261264005, "learning_rate": 9.700729863774233e-06, "loss": 0.0117, "step": 34345 }, { "epoch": 16.021455223880597, "grad_norm": 0.27239628973016106, "learning_rate": 9.69542389825146e-06, "loss": 0.0119, "step": 34350 }, { "epoch": 16.023787313432837, "grad_norm": 0.2699833685139524, "learning_rate": 9.690120580098566e-06, "loss": 0.0121, "step": 34355 }, { "epoch": 16.026119402985074, "grad_norm": 0.26457890636086057, "learning_rate": 9.6848199101041e-06, "loss": 0.0119, "step": 34360 }, { "epoch": 16.028451492537314, "grad_norm": 0.28460972370331505, "learning_rate": 9.67952188905623e-06, "loss": 0.0118, "step": 34365 }, { "epoch": 16.03078358208955, "grad_norm": 0.2758265997094567, "learning_rate": 9.674226517742705e-06, "loss": 0.0118, "step": 34370 }, { "epoch": 16.03311567164179, "grad_norm": 0.268327973467905, "learning_rate": 9.668933796950913e-06, "loss": 0.0116, "step": 34375 }, { "epoch": 16.03544776119403, "grad_norm": 0.2666053099968144, "learning_rate": 9.66364372746781e-06, "loss": 0.0118, "step": 34380 }, { "epoch": 16.03777985074627, "grad_norm": 0.2831094434639498, "learning_rate": 9.658356310080007e-06, "loss": 0.012, "step": 34385 }, { "epoch": 16.04011194029851, "grad_norm": 0.2903835587644097, "learning_rate": 9.653071545573667e-06, "loss": 0.0113, "step": 34390 }, { "epoch": 16.042444029850746, "grad_norm": 0.26704627649907525, "learning_rate": 9.647789434734594e-06, "loss": 0.0114, "step": 34395 }, { "epoch": 16.044776119402986, "grad_norm": 0.2592173869189282, "learning_rate": 9.64250997834819e-06, "loss": 0.0114, "step": 34400 }, { "epoch": 16.047108208955223, "grad_norm": 0.27022338005037116, "learning_rate": 9.637233177199452e-06, "loss": 0.0111, "step": 34405 }, { "epoch": 16.049440298507463, "grad_norm": 0.25327008217961566, "learning_rate": 9.631959032072997e-06, "loss": 0.0116, "step": 34410 }, { "epoch": 16.051772388059703, "grad_norm": 0.2665391287445326, "learning_rate": 9.626687543753041e-06, "loss": 0.0114, "step": 34415 }, { "epoch": 16.05410447761194, "grad_norm": 0.2679777238471803, "learning_rate": 9.621418713023389e-06, "loss": 0.0119, "step": 34420 }, { "epoch": 16.05643656716418, "grad_norm": 0.26588609314600303, "learning_rate": 9.616152540667488e-06, "loss": 0.0116, "step": 34425 }, { "epoch": 16.058768656716417, "grad_norm": 0.2536899004892797, "learning_rate": 9.61088902746835e-06, "loss": 0.0109, "step": 34430 }, { "epoch": 16.061100746268657, "grad_norm": 0.2775906429263993, "learning_rate": 9.605628174208617e-06, "loss": 0.0118, "step": 34435 }, { "epoch": 16.063432835820894, "grad_norm": 0.2742816177761332, "learning_rate": 9.60036998167052e-06, "loss": 0.0114, "step": 34440 }, { "epoch": 16.065764925373134, "grad_norm": 0.27014658349912535, "learning_rate": 9.595114450635911e-06, "loss": 0.0117, "step": 34445 }, { "epoch": 16.068097014925375, "grad_norm": 0.26697782150269755, "learning_rate": 9.589861581886232e-06, "loss": 0.0116, "step": 34450 }, { "epoch": 16.07042910447761, "grad_norm": 0.2513150461237461, "learning_rate": 9.584611376202534e-06, "loss": 0.0119, "step": 34455 }, { "epoch": 16.07276119402985, "grad_norm": 0.25348286842817463, "learning_rate": 9.579363834365484e-06, "loss": 0.0116, "step": 34460 }, { "epoch": 16.07509328358209, "grad_norm": 0.26233889184810544, "learning_rate": 9.574118957155321e-06, "loss": 0.0114, "step": 34465 }, { "epoch": 16.07742537313433, "grad_norm": 0.28209239971398237, "learning_rate": 9.568876745351919e-06, "loss": 0.0116, "step": 34470 }, { "epoch": 16.07975746268657, "grad_norm": 0.25868882113750646, "learning_rate": 9.563637199734744e-06, "loss": 0.0119, "step": 34475 }, { "epoch": 16.082089552238806, "grad_norm": 0.2755130854769603, "learning_rate": 9.558400321082863e-06, "loss": 0.0116, "step": 34480 }, { "epoch": 16.084421641791046, "grad_norm": 0.2674106298961892, "learning_rate": 9.553166110174957e-06, "loss": 0.0121, "step": 34485 }, { "epoch": 16.086753731343283, "grad_norm": 0.2674269069079128, "learning_rate": 9.547934567789302e-06, "loss": 0.0121, "step": 34490 }, { "epoch": 16.089085820895523, "grad_norm": 0.2735542240462302, "learning_rate": 9.542705694703763e-06, "loss": 0.0114, "step": 34495 }, { "epoch": 16.09141791044776, "grad_norm": 0.26392127134595517, "learning_rate": 9.537479491695845e-06, "loss": 0.012, "step": 34500 }, { "epoch": 16.09375, "grad_norm": 0.26637173883227444, "learning_rate": 9.532255959542616e-06, "loss": 0.0116, "step": 34505 }, { "epoch": 16.09608208955224, "grad_norm": 0.26329061823247796, "learning_rate": 9.527035099020784e-06, "loss": 0.0112, "step": 34510 }, { "epoch": 16.098414179104477, "grad_norm": 0.27124372573455807, "learning_rate": 9.521816910906626e-06, "loss": 0.0115, "step": 34515 }, { "epoch": 16.100746268656717, "grad_norm": 0.29037236810206374, "learning_rate": 9.516601395976038e-06, "loss": 0.0114, "step": 34520 }, { "epoch": 16.103078358208954, "grad_norm": 0.25578069186529806, "learning_rate": 9.511388555004523e-06, "loss": 0.0114, "step": 34525 }, { "epoch": 16.105410447761194, "grad_norm": 0.24210636512830264, "learning_rate": 9.506178388767176e-06, "loss": 0.0112, "step": 34530 }, { "epoch": 16.10774253731343, "grad_norm": 0.2529112002830143, "learning_rate": 9.5009708980387e-06, "loss": 0.011, "step": 34535 }, { "epoch": 16.11007462686567, "grad_norm": 0.27504285395475386, "learning_rate": 9.495766083593407e-06, "loss": 0.0118, "step": 34540 }, { "epoch": 16.11240671641791, "grad_norm": 0.25453589800014614, "learning_rate": 9.490563946205183e-06, "loss": 0.0114, "step": 34545 }, { "epoch": 16.11473880597015, "grad_norm": 0.279284577772264, "learning_rate": 9.485364486647561e-06, "loss": 0.0117, "step": 34550 }, { "epoch": 16.11707089552239, "grad_norm": 0.2704421052060355, "learning_rate": 9.480167705693624e-06, "loss": 0.0117, "step": 34555 }, { "epoch": 16.119402985074625, "grad_norm": 0.27326692954319315, "learning_rate": 9.474973604116112e-06, "loss": 0.0117, "step": 34560 }, { "epoch": 16.121735074626866, "grad_norm": 0.26167483854450957, "learning_rate": 9.469782182687317e-06, "loss": 0.0113, "step": 34565 }, { "epoch": 16.124067164179106, "grad_norm": 0.2749202168291761, "learning_rate": 9.464593442179162e-06, "loss": 0.0119, "step": 34570 }, { "epoch": 16.126399253731343, "grad_norm": 0.2708091675369306, "learning_rate": 9.459407383363158e-06, "loss": 0.0113, "step": 34575 }, { "epoch": 16.128731343283583, "grad_norm": 0.27357839449642235, "learning_rate": 9.454224007010428e-06, "loss": 0.0116, "step": 34580 }, { "epoch": 16.13106343283582, "grad_norm": 0.26912667472278246, "learning_rate": 9.449043313891692e-06, "loss": 0.0114, "step": 34585 }, { "epoch": 16.13339552238806, "grad_norm": 0.26554531988799135, "learning_rate": 9.443865304777266e-06, "loss": 0.0117, "step": 34590 }, { "epoch": 16.135727611940297, "grad_norm": 0.2659455910574547, "learning_rate": 9.438689980437062e-06, "loss": 0.0116, "step": 34595 }, { "epoch": 16.138059701492537, "grad_norm": 0.27351407573650954, "learning_rate": 9.433517341640621e-06, "loss": 0.0119, "step": 34600 }, { "epoch": 16.140391791044777, "grad_norm": 0.2633889477480214, "learning_rate": 9.428347389157039e-06, "loss": 0.0115, "step": 34605 }, { "epoch": 16.142723880597014, "grad_norm": 0.2660515194177599, "learning_rate": 9.423180123755064e-06, "loss": 0.0115, "step": 34610 }, { "epoch": 16.145055970149254, "grad_norm": 0.24964708120853824, "learning_rate": 9.418015546203002e-06, "loss": 0.0116, "step": 34615 }, { "epoch": 16.14738805970149, "grad_norm": 0.2625179397978101, "learning_rate": 9.41285365726878e-06, "loss": 0.0114, "step": 34620 }, { "epoch": 16.14972014925373, "grad_norm": 0.2527175046646987, "learning_rate": 9.407694457719925e-06, "loss": 0.0113, "step": 34625 }, { "epoch": 16.15205223880597, "grad_norm": 0.28151297042166834, "learning_rate": 9.40253794832356e-06, "loss": 0.012, "step": 34630 }, { "epoch": 16.15438432835821, "grad_norm": 0.2932850072566455, "learning_rate": 9.397384129846404e-06, "loss": 0.0115, "step": 34635 }, { "epoch": 16.15671641791045, "grad_norm": 0.26183836387102294, "learning_rate": 9.39223300305479e-06, "loss": 0.0119, "step": 34640 }, { "epoch": 16.159048507462686, "grad_norm": 0.2540362806683164, "learning_rate": 9.387084568714628e-06, "loss": 0.0118, "step": 34645 }, { "epoch": 16.161380597014926, "grad_norm": 0.2730307951591179, "learning_rate": 9.381938827591447e-06, "loss": 0.0123, "step": 34650 }, { "epoch": 16.163712686567163, "grad_norm": 0.28176101437869505, "learning_rate": 9.376795780450373e-06, "loss": 0.0123, "step": 34655 }, { "epoch": 16.166044776119403, "grad_norm": 0.28354968928619273, "learning_rate": 9.371655428056122e-06, "loss": 0.012, "step": 34660 }, { "epoch": 16.168376865671643, "grad_norm": 0.27515497972430586, "learning_rate": 9.36651777117302e-06, "loss": 0.0117, "step": 34665 }, { "epoch": 16.17070895522388, "grad_norm": 0.2567743824309142, "learning_rate": 9.361382810564984e-06, "loss": 0.0115, "step": 34670 }, { "epoch": 16.17304104477612, "grad_norm": 0.29463005449772883, "learning_rate": 9.35625054699554e-06, "loss": 0.0114, "step": 34675 }, { "epoch": 16.175373134328357, "grad_norm": 0.27358468212491915, "learning_rate": 9.351120981227788e-06, "loss": 0.0121, "step": 34680 }, { "epoch": 16.177705223880597, "grad_norm": 0.2768713641414963, "learning_rate": 9.345994114024472e-06, "loss": 0.0115, "step": 34685 }, { "epoch": 16.180037313432837, "grad_norm": 0.28685586624451975, "learning_rate": 9.34086994614789e-06, "loss": 0.0114, "step": 34690 }, { "epoch": 16.182369402985074, "grad_norm": 0.2621092729963244, "learning_rate": 9.33574847835996e-06, "loss": 0.0118, "step": 34695 }, { "epoch": 16.184701492537314, "grad_norm": 0.2807927995261907, "learning_rate": 9.330629711422196e-06, "loss": 0.0115, "step": 34700 }, { "epoch": 16.18703358208955, "grad_norm": 0.26449131768924183, "learning_rate": 9.325513646095707e-06, "loss": 0.0119, "step": 34705 }, { "epoch": 16.18936567164179, "grad_norm": 0.2734954946786627, "learning_rate": 9.320400283141208e-06, "loss": 0.0123, "step": 34710 }, { "epoch": 16.19169776119403, "grad_norm": 0.28032100176400804, "learning_rate": 9.315289623319012e-06, "loss": 0.0117, "step": 34715 }, { "epoch": 16.19402985074627, "grad_norm": 0.2667263330201546, "learning_rate": 9.310181667389003e-06, "loss": 0.0123, "step": 34720 }, { "epoch": 16.19636194029851, "grad_norm": 0.3054474928056671, "learning_rate": 9.305076416110715e-06, "loss": 0.0125, "step": 34725 }, { "epoch": 16.198694029850746, "grad_norm": 0.26489266900849723, "learning_rate": 9.299973870243222e-06, "loss": 0.0119, "step": 34730 }, { "epoch": 16.201026119402986, "grad_norm": 0.2691802956160623, "learning_rate": 9.294874030545247e-06, "loss": 0.0113, "step": 34735 }, { "epoch": 16.203358208955223, "grad_norm": 0.27832372841254993, "learning_rate": 9.289776897775074e-06, "loss": 0.0116, "step": 34740 }, { "epoch": 16.205690298507463, "grad_norm": 0.2809065375159855, "learning_rate": 9.284682472690599e-06, "loss": 0.0121, "step": 34745 }, { "epoch": 16.208022388059703, "grad_norm": 0.28498861801281783, "learning_rate": 9.279590756049316e-06, "loss": 0.0119, "step": 34750 }, { "epoch": 16.21035447761194, "grad_norm": 0.27624627824242415, "learning_rate": 9.274501748608314e-06, "loss": 0.0116, "step": 34755 }, { "epoch": 16.21268656716418, "grad_norm": 0.29121932222259883, "learning_rate": 9.269415451124283e-06, "loss": 0.0121, "step": 34760 }, { "epoch": 16.215018656716417, "grad_norm": 0.2860668707855997, "learning_rate": 9.2643318643535e-06, "loss": 0.0121, "step": 34765 }, { "epoch": 16.217350746268657, "grad_norm": 0.2852824662550088, "learning_rate": 9.25925098905185e-06, "loss": 0.0115, "step": 34770 }, { "epoch": 16.219682835820894, "grad_norm": 0.29579570877431755, "learning_rate": 9.254172825974823e-06, "loss": 0.0118, "step": 34775 }, { "epoch": 16.222014925373134, "grad_norm": 0.26928686735826546, "learning_rate": 9.249097375877458e-06, "loss": 0.0119, "step": 34780 }, { "epoch": 16.224347014925375, "grad_norm": 0.2660008383869713, "learning_rate": 9.244024639514465e-06, "loss": 0.0117, "step": 34785 }, { "epoch": 16.22667910447761, "grad_norm": 0.2871346968432119, "learning_rate": 9.23895461764009e-06, "loss": 0.012, "step": 34790 }, { "epoch": 16.22901119402985, "grad_norm": 0.28337369140005886, "learning_rate": 9.233887311008197e-06, "loss": 0.012, "step": 34795 }, { "epoch": 16.23134328358209, "grad_norm": 0.26499891551748983, "learning_rate": 9.22882272037225e-06, "loss": 0.0113, "step": 34800 }, { "epoch": 16.23367537313433, "grad_norm": 0.2763982430367619, "learning_rate": 9.223760846485307e-06, "loss": 0.0113, "step": 34805 }, { "epoch": 16.23600746268657, "grad_norm": 0.28936711513192886, "learning_rate": 9.218701690100017e-06, "loss": 0.0117, "step": 34810 }, { "epoch": 16.238339552238806, "grad_norm": 0.272447070319276, "learning_rate": 9.21364525196863e-06, "loss": 0.0121, "step": 34815 }, { "epoch": 16.240671641791046, "grad_norm": 0.27184464701321287, "learning_rate": 9.208591532842995e-06, "loss": 0.0118, "step": 34820 }, { "epoch": 16.243003731343283, "grad_norm": 0.2742301612345057, "learning_rate": 9.203540533474537e-06, "loss": 0.0119, "step": 34825 }, { "epoch": 16.245335820895523, "grad_norm": 0.292035050772396, "learning_rate": 9.198492254614302e-06, "loss": 0.012, "step": 34830 }, { "epoch": 16.24766791044776, "grad_norm": 0.27711356453135894, "learning_rate": 9.193446697012921e-06, "loss": 0.0112, "step": 34835 }, { "epoch": 16.25, "grad_norm": 0.26603145169601244, "learning_rate": 9.188403861420615e-06, "loss": 0.0122, "step": 34840 }, { "epoch": 16.25233208955224, "grad_norm": 0.28307704441474724, "learning_rate": 9.183363748587207e-06, "loss": 0.0119, "step": 34845 }, { "epoch": 16.254664179104477, "grad_norm": 0.2671587657516701, "learning_rate": 9.178326359262124e-06, "loss": 0.0122, "step": 34850 }, { "epoch": 16.256996268656717, "grad_norm": 0.2757787914430995, "learning_rate": 9.173291694194356e-06, "loss": 0.0123, "step": 34855 }, { "epoch": 16.259328358208954, "grad_norm": 0.28181351070376265, "learning_rate": 9.16825975413253e-06, "loss": 0.0118, "step": 34860 }, { "epoch": 16.261660447761194, "grad_norm": 0.2587727212899196, "learning_rate": 9.163230539824829e-06, "loss": 0.0114, "step": 34865 }, { "epoch": 16.263992537313435, "grad_norm": 0.2862415136485921, "learning_rate": 9.158204052019069e-06, "loss": 0.0116, "step": 34870 }, { "epoch": 16.26632462686567, "grad_norm": 0.28293371361070957, "learning_rate": 9.153180291462627e-06, "loss": 0.0121, "step": 34875 }, { "epoch": 16.26865671641791, "grad_norm": 0.2908875150281376, "learning_rate": 9.148159258902488e-06, "loss": 0.0123, "step": 34880 }, { "epoch": 16.27098880597015, "grad_norm": 0.26665742659634817, "learning_rate": 9.143140955085239e-06, "loss": 0.0114, "step": 34885 }, { "epoch": 16.27332089552239, "grad_norm": 0.28371085220766357, "learning_rate": 9.138125380757046e-06, "loss": 0.0121, "step": 34890 }, { "epoch": 16.275652985074625, "grad_norm": 0.25827281708586136, "learning_rate": 9.133112536663682e-06, "loss": 0.0114, "step": 34895 }, { "epoch": 16.277985074626866, "grad_norm": 0.28279296916732355, "learning_rate": 9.128102423550511e-06, "loss": 0.0122, "step": 34900 }, { "epoch": 16.280317164179106, "grad_norm": 0.2576845930783859, "learning_rate": 9.123095042162477e-06, "loss": 0.0119, "step": 34905 }, { "epoch": 16.282649253731343, "grad_norm": 0.3029980552468768, "learning_rate": 9.118090393244147e-06, "loss": 0.0113, "step": 34910 }, { "epoch": 16.284981343283583, "grad_norm": 0.2770477836985061, "learning_rate": 9.113088477539643e-06, "loss": 0.0121, "step": 34915 }, { "epoch": 16.28731343283582, "grad_norm": 0.29748089045728376, "learning_rate": 9.108089295792726e-06, "loss": 0.0126, "step": 34920 }, { "epoch": 16.28964552238806, "grad_norm": 0.27742951832065027, "learning_rate": 9.10309284874671e-06, "loss": 0.0118, "step": 34925 }, { "epoch": 16.291977611940297, "grad_norm": 0.2747157950538654, "learning_rate": 9.098099137144522e-06, "loss": 0.0119, "step": 34930 }, { "epoch": 16.294309701492537, "grad_norm": 0.2796288202588254, "learning_rate": 9.093108161728683e-06, "loss": 0.0121, "step": 34935 }, { "epoch": 16.296641791044777, "grad_norm": 0.3071790199125081, "learning_rate": 9.088119923241295e-06, "loss": 0.012, "step": 34940 }, { "epoch": 16.298973880597014, "grad_norm": 0.2670375817187536, "learning_rate": 9.083134422424073e-06, "loss": 0.0119, "step": 34945 }, { "epoch": 16.301305970149254, "grad_norm": 0.3108037001406882, "learning_rate": 9.07815166001831e-06, "loss": 0.0121, "step": 34950 }, { "epoch": 16.30363805970149, "grad_norm": 0.27488967517231955, "learning_rate": 9.073171636764879e-06, "loss": 0.0123, "step": 34955 }, { "epoch": 16.30597014925373, "grad_norm": 0.3001804196241367, "learning_rate": 9.068194353404288e-06, "loss": 0.012, "step": 34960 }, { "epoch": 16.30830223880597, "grad_norm": 0.27857120697427457, "learning_rate": 9.063219810676593e-06, "loss": 0.0115, "step": 34965 }, { "epoch": 16.31063432835821, "grad_norm": 0.30004486441670164, "learning_rate": 9.058248009321464e-06, "loss": 0.0118, "step": 34970 }, { "epoch": 16.31296641791045, "grad_norm": 0.2638725660850824, "learning_rate": 9.053278950078163e-06, "loss": 0.0118, "step": 34975 }, { "epoch": 16.315298507462686, "grad_norm": 0.2805706066647096, "learning_rate": 9.04831263368554e-06, "loss": 0.0118, "step": 34980 }, { "epoch": 16.317630597014926, "grad_norm": 0.28664287337740796, "learning_rate": 9.043349060882039e-06, "loss": 0.0123, "step": 34985 }, { "epoch": 16.319962686567163, "grad_norm": 0.26461953935296206, "learning_rate": 9.038388232405699e-06, "loss": 0.012, "step": 34990 }, { "epoch": 16.322294776119403, "grad_norm": 0.29284816056987323, "learning_rate": 9.033430148994148e-06, "loss": 0.0122, "step": 34995 }, { "epoch": 16.324626865671643, "grad_norm": 0.2847223690168844, "learning_rate": 9.028474811384597e-06, "loss": 0.0119, "step": 35000 }, { "epoch": 16.32695895522388, "grad_norm": 0.2856685489322488, "learning_rate": 9.023522220313865e-06, "loss": 0.0116, "step": 35005 }, { "epoch": 16.32929104477612, "grad_norm": 0.2761656629663893, "learning_rate": 9.01857237651835e-06, "loss": 0.0118, "step": 35010 }, { "epoch": 16.331623134328357, "grad_norm": 0.3161614542947597, "learning_rate": 9.013625280734047e-06, "loss": 0.0122, "step": 35015 }, { "epoch": 16.333955223880597, "grad_norm": 0.2777515270522393, "learning_rate": 9.008680933696545e-06, "loss": 0.0122, "step": 35020 }, { "epoch": 16.336287313432837, "grad_norm": 0.2755157542315063, "learning_rate": 9.003739336141025e-06, "loss": 0.012, "step": 35025 }, { "epoch": 16.338619402985074, "grad_norm": 0.29400384028667237, "learning_rate": 8.998800488802239e-06, "loss": 0.0118, "step": 35030 }, { "epoch": 16.340951492537314, "grad_norm": 0.2585842480609191, "learning_rate": 8.99386439241457e-06, "loss": 0.0123, "step": 35035 }, { "epoch": 16.34328358208955, "grad_norm": 0.2926021547680174, "learning_rate": 8.98893104771194e-06, "loss": 0.0121, "step": 35040 }, { "epoch": 16.34561567164179, "grad_norm": 0.2867090934350402, "learning_rate": 8.984000455427917e-06, "loss": 0.0118, "step": 35045 }, { "epoch": 16.34794776119403, "grad_norm": 0.29780998659093105, "learning_rate": 8.979072616295616e-06, "loss": 0.0123, "step": 35050 }, { "epoch": 16.35027985074627, "grad_norm": 0.29323348755994727, "learning_rate": 8.974147531047763e-06, "loss": 0.012, "step": 35055 }, { "epoch": 16.35261194029851, "grad_norm": 0.29200908994818947, "learning_rate": 8.969225200416678e-06, "loss": 0.0121, "step": 35060 }, { "epoch": 16.354944029850746, "grad_norm": 0.2870206462450499, "learning_rate": 8.964305625134254e-06, "loss": 0.0119, "step": 35065 }, { "epoch": 16.357276119402986, "grad_norm": 0.27267632335679065, "learning_rate": 8.959388805931993e-06, "loss": 0.0123, "step": 35070 }, { "epoch": 16.359608208955223, "grad_norm": 0.27697914448935834, "learning_rate": 8.954474743540979e-06, "loss": 0.0123, "step": 35075 }, { "epoch": 16.361940298507463, "grad_norm": 0.277671831016508, "learning_rate": 8.94956343869187e-06, "loss": 0.0121, "step": 35080 }, { "epoch": 16.364272388059703, "grad_norm": 0.2790465198404473, "learning_rate": 8.944654892114956e-06, "loss": 0.0118, "step": 35085 }, { "epoch": 16.36660447761194, "grad_norm": 0.3194361824686778, "learning_rate": 8.939749104540065e-06, "loss": 0.0122, "step": 35090 }, { "epoch": 16.36893656716418, "grad_norm": 0.2948092978044304, "learning_rate": 8.934846076696665e-06, "loss": 0.0121, "step": 35095 }, { "epoch": 16.371268656716417, "grad_norm": 0.2957638204856704, "learning_rate": 8.929945809313773e-06, "loss": 0.0121, "step": 35100 }, { "epoch": 16.373600746268657, "grad_norm": 0.28841680891104376, "learning_rate": 8.925048303120012e-06, "loss": 0.012, "step": 35105 }, { "epoch": 16.375932835820894, "grad_norm": 0.3038433594086453, "learning_rate": 8.9201535588436e-06, "loss": 0.0124, "step": 35110 }, { "epoch": 16.378264925373134, "grad_norm": 0.29356061727851, "learning_rate": 8.915261577212337e-06, "loss": 0.0128, "step": 35115 }, { "epoch": 16.380597014925375, "grad_norm": 0.2648737174809742, "learning_rate": 8.910372358953614e-06, "loss": 0.0117, "step": 35120 }, { "epoch": 16.38292910447761, "grad_norm": 0.30402892583847607, "learning_rate": 8.905485904794416e-06, "loss": 0.0121, "step": 35125 }, { "epoch": 16.38526119402985, "grad_norm": 0.2823487347798481, "learning_rate": 8.900602215461297e-06, "loss": 0.0118, "step": 35130 }, { "epoch": 16.38759328358209, "grad_norm": 0.27115559278007445, "learning_rate": 8.895721291680433e-06, "loss": 0.0117, "step": 35135 }, { "epoch": 16.38992537313433, "grad_norm": 0.2902640760463831, "learning_rate": 8.890843134177555e-06, "loss": 0.012, "step": 35140 }, { "epoch": 16.392257462686565, "grad_norm": 0.29916895841989705, "learning_rate": 8.885967743678011e-06, "loss": 0.0121, "step": 35145 }, { "epoch": 16.394589552238806, "grad_norm": 0.29183809349701284, "learning_rate": 8.881095120906716e-06, "loss": 0.0128, "step": 35150 }, { "epoch": 16.396921641791046, "grad_norm": 0.28040650815474866, "learning_rate": 8.876225266588184e-06, "loss": 0.0124, "step": 35155 }, { "epoch": 16.399253731343283, "grad_norm": 0.2808367624181434, "learning_rate": 8.871358181446519e-06, "loss": 0.0124, "step": 35160 }, { "epoch": 16.401585820895523, "grad_norm": 0.29001272940337297, "learning_rate": 8.866493866205407e-06, "loss": 0.0125, "step": 35165 }, { "epoch": 16.40391791044776, "grad_norm": 0.27268935810755823, "learning_rate": 8.861632321588126e-06, "loss": 0.0122, "step": 35170 }, { "epoch": 16.40625, "grad_norm": 0.34840344706179527, "learning_rate": 8.856773548317545e-06, "loss": 0.0118, "step": 35175 }, { "epoch": 16.40858208955224, "grad_norm": 0.26425503461988736, "learning_rate": 8.851917547116111e-06, "loss": 0.0123, "step": 35180 }, { "epoch": 16.410914179104477, "grad_norm": 0.2862608480005972, "learning_rate": 8.847064318705864e-06, "loss": 0.0126, "step": 35185 }, { "epoch": 16.413246268656717, "grad_norm": 0.30667175210802894, "learning_rate": 8.842213863808439e-06, "loss": 0.0123, "step": 35190 }, { "epoch": 16.415578358208954, "grad_norm": 0.30879901210018906, "learning_rate": 8.837366183145044e-06, "loss": 0.0127, "step": 35195 }, { "epoch": 16.417910447761194, "grad_norm": 0.32553269174005567, "learning_rate": 8.83252127743649e-06, "loss": 0.0127, "step": 35200 }, { "epoch": 16.420242537313435, "grad_norm": 0.31951500670265615, "learning_rate": 8.827679147403167e-06, "loss": 0.0128, "step": 35205 }, { "epoch": 16.42257462686567, "grad_norm": 0.30897774290672814, "learning_rate": 8.822839793765056e-06, "loss": 0.0128, "step": 35210 }, { "epoch": 16.42490671641791, "grad_norm": 0.29178697253407826, "learning_rate": 8.818003217241707e-06, "loss": 0.0124, "step": 35215 }, { "epoch": 16.42723880597015, "grad_norm": 0.2942973432427728, "learning_rate": 8.813169418552294e-06, "loss": 0.0122, "step": 35220 }, { "epoch": 16.42957089552239, "grad_norm": 0.2843774102883171, "learning_rate": 8.808338398415544e-06, "loss": 0.0125, "step": 35225 }, { "epoch": 16.431902985074625, "grad_norm": 0.2962990812502746, "learning_rate": 8.803510157549785e-06, "loss": 0.0128, "step": 35230 }, { "epoch": 16.434235074626866, "grad_norm": 0.2759276084469966, "learning_rate": 8.79868469667293e-06, "loss": 0.0127, "step": 35235 }, { "epoch": 16.436567164179106, "grad_norm": 0.2974759881755272, "learning_rate": 8.793862016502477e-06, "loss": 0.0121, "step": 35240 }, { "epoch": 16.438899253731343, "grad_norm": 0.29128014367414723, "learning_rate": 8.789042117755521e-06, "loss": 0.0129, "step": 35245 }, { "epoch": 16.441231343283583, "grad_norm": 0.28962959085283424, "learning_rate": 8.78422500114873e-06, "loss": 0.0121, "step": 35250 }, { "epoch": 16.44356343283582, "grad_norm": 0.2648262981325156, "learning_rate": 8.779410667398352e-06, "loss": 0.0123, "step": 35255 }, { "epoch": 16.44589552238806, "grad_norm": 0.28958118109436914, "learning_rate": 8.774599117220254e-06, "loss": 0.0126, "step": 35260 }, { "epoch": 16.448227611940297, "grad_norm": 0.3036350695930814, "learning_rate": 8.769790351329847e-06, "loss": 0.0125, "step": 35265 }, { "epoch": 16.450559701492537, "grad_norm": 0.29609406757530976, "learning_rate": 8.764984370442166e-06, "loss": 0.0126, "step": 35270 }, { "epoch": 16.452891791044777, "grad_norm": 0.3023338013586167, "learning_rate": 8.7601811752718e-06, "loss": 0.0127, "step": 35275 }, { "epoch": 16.455223880597014, "grad_norm": 0.2998508033603593, "learning_rate": 8.755380766532945e-06, "loss": 0.0125, "step": 35280 }, { "epoch": 16.457555970149254, "grad_norm": 0.3252099086290119, "learning_rate": 8.750583144939373e-06, "loss": 0.0129, "step": 35285 }, { "epoch": 16.45988805970149, "grad_norm": 0.31597470680851597, "learning_rate": 8.745788311204444e-06, "loss": 0.0123, "step": 35290 }, { "epoch": 16.46222014925373, "grad_norm": 0.2749937190513466, "learning_rate": 8.740996266041108e-06, "loss": 0.0124, "step": 35295 }, { "epoch": 16.46455223880597, "grad_norm": 0.29358975790855374, "learning_rate": 8.736207010161899e-06, "loss": 0.0128, "step": 35300 }, { "epoch": 16.46688432835821, "grad_norm": 0.32604549855771797, "learning_rate": 8.731420544278913e-06, "loss": 0.0125, "step": 35305 }, { "epoch": 16.46921641791045, "grad_norm": 0.3135656497382314, "learning_rate": 8.726636869103884e-06, "loss": 0.0126, "step": 35310 }, { "epoch": 16.471548507462686, "grad_norm": 0.2872272872972948, "learning_rate": 8.721855985348068e-06, "loss": 0.0129, "step": 35315 }, { "epoch": 16.473880597014926, "grad_norm": 0.3200196840778924, "learning_rate": 8.71707789372236e-06, "loss": 0.0122, "step": 35320 }, { "epoch": 16.476212686567163, "grad_norm": 0.29618605171981544, "learning_rate": 8.712302594937202e-06, "loss": 0.0126, "step": 35325 }, { "epoch": 16.478544776119403, "grad_norm": 0.30880153438483066, "learning_rate": 8.70753008970264e-06, "loss": 0.0124, "step": 35330 }, { "epoch": 16.480876865671643, "grad_norm": 0.2992989331074897, "learning_rate": 8.7027603787283e-06, "loss": 0.0127, "step": 35335 }, { "epoch": 16.48320895522388, "grad_norm": 0.2985937796813159, "learning_rate": 8.697993462723392e-06, "loss": 0.0124, "step": 35340 }, { "epoch": 16.48554104477612, "grad_norm": 0.31470033475087533, "learning_rate": 8.69322934239671e-06, "loss": 0.0126, "step": 35345 }, { "epoch": 16.487873134328357, "grad_norm": 0.30047668169232755, "learning_rate": 8.688468018456639e-06, "loss": 0.0125, "step": 35350 }, { "epoch": 16.490205223880597, "grad_norm": 0.2988737480266461, "learning_rate": 8.68370949161113e-06, "loss": 0.0127, "step": 35355 }, { "epoch": 16.492537313432837, "grad_norm": 0.30888582892889666, "learning_rate": 8.678953762567739e-06, "loss": 0.0124, "step": 35360 }, { "epoch": 16.494869402985074, "grad_norm": 0.2915140602605641, "learning_rate": 8.674200832033595e-06, "loss": 0.0128, "step": 35365 }, { "epoch": 16.497201492537314, "grad_norm": 0.2912814956757315, "learning_rate": 8.669450700715414e-06, "loss": 0.0126, "step": 35370 }, { "epoch": 16.49953358208955, "grad_norm": 0.3289940915826332, "learning_rate": 8.664703369319496e-06, "loss": 0.0132, "step": 35375 }, { "epoch": 16.50186567164179, "grad_norm": 0.31160414095413624, "learning_rate": 8.659958838551722e-06, "loss": 0.0125, "step": 35380 }, { "epoch": 16.50419776119403, "grad_norm": 0.30553952048544525, "learning_rate": 8.655217109117564e-06, "loss": 0.0129, "step": 35385 }, { "epoch": 16.50652985074627, "grad_norm": 0.30454113046986575, "learning_rate": 8.650478181722055e-06, "loss": 0.0129, "step": 35390 }, { "epoch": 16.50886194029851, "grad_norm": 0.28936470217571064, "learning_rate": 8.64574205706985e-06, "loss": 0.0127, "step": 35395 }, { "epoch": 16.511194029850746, "grad_norm": 0.28358184240069667, "learning_rate": 8.641008735865153e-06, "loss": 0.0122, "step": 35400 }, { "epoch": 16.513526119402986, "grad_norm": 0.30620081483314154, "learning_rate": 8.63627821881176e-06, "loss": 0.0129, "step": 35405 }, { "epoch": 16.515858208955223, "grad_norm": 0.28767515241297303, "learning_rate": 8.631550506613062e-06, "loss": 0.0126, "step": 35410 }, { "epoch": 16.518190298507463, "grad_norm": 0.3041617207195823, "learning_rate": 8.626825599972022e-06, "loss": 0.0127, "step": 35415 }, { "epoch": 16.520522388059703, "grad_norm": 0.286474792734738, "learning_rate": 8.62210349959119e-06, "loss": 0.0124, "step": 35420 }, { "epoch": 16.52285447761194, "grad_norm": 0.3048726684170874, "learning_rate": 8.617384206172696e-06, "loss": 0.0125, "step": 35425 }, { "epoch": 16.52518656716418, "grad_norm": 0.3124292217658407, "learning_rate": 8.612667720418243e-06, "loss": 0.0123, "step": 35430 }, { "epoch": 16.527518656716417, "grad_norm": 0.30026766508294395, "learning_rate": 8.60795404302915e-06, "loss": 0.0129, "step": 35435 }, { "epoch": 16.529850746268657, "grad_norm": 0.2972896684583799, "learning_rate": 8.60324317470627e-06, "loss": 0.0126, "step": 35440 }, { "epoch": 16.532182835820894, "grad_norm": 0.3105743797657726, "learning_rate": 8.598535116150086e-06, "loss": 0.0122, "step": 35445 }, { "epoch": 16.534514925373134, "grad_norm": 0.31356265787955706, "learning_rate": 8.593829868060632e-06, "loss": 0.013, "step": 35450 }, { "epoch": 16.536847014925375, "grad_norm": 0.29708114840217975, "learning_rate": 8.589127431137527e-06, "loss": 0.0127, "step": 35455 }, { "epoch": 16.53917910447761, "grad_norm": 0.30926549256098357, "learning_rate": 8.584427806079988e-06, "loss": 0.0128, "step": 35460 }, { "epoch": 16.54151119402985, "grad_norm": 0.2982187517616922, "learning_rate": 8.579730993586798e-06, "loss": 0.0128, "step": 35465 }, { "epoch": 16.54384328358209, "grad_norm": 0.31432740551551785, "learning_rate": 8.575036994356334e-06, "loss": 0.0126, "step": 35470 }, { "epoch": 16.54617537313433, "grad_norm": 0.30578649174820777, "learning_rate": 8.570345809086543e-06, "loss": 0.0131, "step": 35475 }, { "epoch": 16.548507462686565, "grad_norm": 0.31532187129568634, "learning_rate": 8.565657438474963e-06, "loss": 0.0128, "step": 35480 }, { "epoch": 16.550839552238806, "grad_norm": 0.30208176291030453, "learning_rate": 8.560971883218714e-06, "loss": 0.0125, "step": 35485 }, { "epoch": 16.553171641791046, "grad_norm": 0.30560547098580343, "learning_rate": 8.556289144014474e-06, "loss": 0.0121, "step": 35490 }, { "epoch": 16.555503731343283, "grad_norm": 0.29282139229195364, "learning_rate": 8.551609221558548e-06, "loss": 0.0131, "step": 35495 }, { "epoch": 16.557835820895523, "grad_norm": 0.2957018318159279, "learning_rate": 8.546932116546775e-06, "loss": 0.0128, "step": 35500 }, { "epoch": 16.56016791044776, "grad_norm": 0.289446152493124, "learning_rate": 8.542257829674608e-06, "loss": 0.0129, "step": 35505 }, { "epoch": 16.5625, "grad_norm": 0.33668872458355376, "learning_rate": 8.53758636163706e-06, "loss": 0.0129, "step": 35510 }, { "epoch": 16.56483208955224, "grad_norm": 0.3069084049602637, "learning_rate": 8.53291771312874e-06, "loss": 0.0127, "step": 35515 }, { "epoch": 16.567164179104477, "grad_norm": 0.31840145067808323, "learning_rate": 8.528251884843829e-06, "loss": 0.0129, "step": 35520 }, { "epoch": 16.569496268656717, "grad_norm": 0.30258089568741614, "learning_rate": 8.523588877476089e-06, "loss": 0.013, "step": 35525 }, { "epoch": 16.571828358208954, "grad_norm": 0.2957591967579055, "learning_rate": 8.518928691718872e-06, "loss": 0.0127, "step": 35530 }, { "epoch": 16.574160447761194, "grad_norm": 0.28201374961878256, "learning_rate": 8.514271328265094e-06, "loss": 0.0127, "step": 35535 }, { "epoch": 16.576492537313435, "grad_norm": 0.3204645075027925, "learning_rate": 8.509616787807263e-06, "loss": 0.013, "step": 35540 }, { "epoch": 16.57882462686567, "grad_norm": 0.29497611091771975, "learning_rate": 8.504965071037465e-06, "loss": 0.0125, "step": 35545 }, { "epoch": 16.58115671641791, "grad_norm": 0.3083373915325858, "learning_rate": 8.500316178647366e-06, "loss": 0.0131, "step": 35550 }, { "epoch": 16.58348880597015, "grad_norm": 0.28023804404633984, "learning_rate": 8.495670111328214e-06, "loss": 0.0126, "step": 35555 }, { "epoch": 16.58582089552239, "grad_norm": 0.28972535304349534, "learning_rate": 8.491026869770832e-06, "loss": 0.0126, "step": 35560 }, { "epoch": 16.588152985074625, "grad_norm": 0.31388326902024294, "learning_rate": 8.486386454665621e-06, "loss": 0.0125, "step": 35565 }, { "epoch": 16.590485074626866, "grad_norm": 0.293661044482441, "learning_rate": 8.48174886670258e-06, "loss": 0.0127, "step": 35570 }, { "epoch": 16.592817164179106, "grad_norm": 0.3076955267465346, "learning_rate": 8.477114106571255e-06, "loss": 0.0128, "step": 35575 }, { "epoch": 16.595149253731343, "grad_norm": 0.2985908880318785, "learning_rate": 8.472482174960808e-06, "loss": 0.0134, "step": 35580 }, { "epoch": 16.597481343283583, "grad_norm": 0.3282583550092959, "learning_rate": 8.467853072559953e-06, "loss": 0.0131, "step": 35585 }, { "epoch": 16.59981343283582, "grad_norm": 0.30548294250456165, "learning_rate": 8.463226800056995e-06, "loss": 0.0131, "step": 35590 }, { "epoch": 16.60214552238806, "grad_norm": 0.30011399744114714, "learning_rate": 8.458603358139818e-06, "loss": 0.0128, "step": 35595 }, { "epoch": 16.604477611940297, "grad_norm": 0.29988433402844356, "learning_rate": 8.453982747495881e-06, "loss": 0.0135, "step": 35600 }, { "epoch": 16.606809701492537, "grad_norm": 0.2970007632087743, "learning_rate": 8.449364968812228e-06, "loss": 0.0134, "step": 35605 }, { "epoch": 16.609141791044777, "grad_norm": 0.2999927928452974, "learning_rate": 8.44475002277548e-06, "loss": 0.0131, "step": 35610 }, { "epoch": 16.611473880597014, "grad_norm": 0.27967341836834364, "learning_rate": 8.440137910071821e-06, "loss": 0.013, "step": 35615 }, { "epoch": 16.613805970149254, "grad_norm": 0.3095347093255426, "learning_rate": 8.435528631387052e-06, "loss": 0.0127, "step": 35620 }, { "epoch": 16.61613805970149, "grad_norm": 0.31739841578817557, "learning_rate": 8.430922187406501e-06, "loss": 0.0136, "step": 35625 }, { "epoch": 16.61847014925373, "grad_norm": 0.31279199440383554, "learning_rate": 8.426318578815128e-06, "loss": 0.0131, "step": 35630 }, { "epoch": 16.62080223880597, "grad_norm": 0.3049443647993674, "learning_rate": 8.421717806297431e-06, "loss": 0.013, "step": 35635 }, { "epoch": 16.62313432835821, "grad_norm": 0.30097406138362093, "learning_rate": 8.417119870537503e-06, "loss": 0.0129, "step": 35640 }, { "epoch": 16.62546641791045, "grad_norm": 0.3243010888320004, "learning_rate": 8.41252477221901e-06, "loss": 0.0129, "step": 35645 }, { "epoch": 16.627798507462686, "grad_norm": 0.3118806062151556, "learning_rate": 8.407932512025207e-06, "loss": 0.0137, "step": 35650 }, { "epoch": 16.630130597014926, "grad_norm": 0.3010862836302896, "learning_rate": 8.403343090638914e-06, "loss": 0.0131, "step": 35655 }, { "epoch": 16.632462686567163, "grad_norm": 0.3204825154220431, "learning_rate": 8.398756508742536e-06, "loss": 0.0129, "step": 35660 }, { "epoch": 16.634794776119403, "grad_norm": 0.2999927784433601, "learning_rate": 8.394172767018048e-06, "loss": 0.0131, "step": 35665 }, { "epoch": 16.637126865671643, "grad_norm": 0.3271549490017568, "learning_rate": 8.38959186614702e-06, "loss": 0.0131, "step": 35670 }, { "epoch": 16.63945895522388, "grad_norm": 0.3074960729158605, "learning_rate": 8.385013806810577e-06, "loss": 0.0131, "step": 35675 }, { "epoch": 16.64179104477612, "grad_norm": 0.29786589327074103, "learning_rate": 8.380438589689438e-06, "loss": 0.0128, "step": 35680 }, { "epoch": 16.644123134328357, "grad_norm": 0.30050494775309705, "learning_rate": 8.37586621546389e-06, "loss": 0.0132, "step": 35685 }, { "epoch": 16.646455223880597, "grad_norm": 0.3063072428838579, "learning_rate": 8.371296684813806e-06, "loss": 0.013, "step": 35690 }, { "epoch": 16.648787313432837, "grad_norm": 0.31092378725275177, "learning_rate": 8.36672999841863e-06, "loss": 0.0132, "step": 35695 }, { "epoch": 16.651119402985074, "grad_norm": 0.3144393830320626, "learning_rate": 8.36216615695738e-06, "loss": 0.0134, "step": 35700 }, { "epoch": 16.653451492537314, "grad_norm": 0.3214724575674888, "learning_rate": 8.357605161108663e-06, "loss": 0.0137, "step": 35705 }, { "epoch": 16.65578358208955, "grad_norm": 0.2854841660411959, "learning_rate": 8.353047011550654e-06, "loss": 0.0132, "step": 35710 }, { "epoch": 16.65811567164179, "grad_norm": 0.31572865632506236, "learning_rate": 8.348491708961102e-06, "loss": 0.0134, "step": 35715 }, { "epoch": 16.66044776119403, "grad_norm": 0.32106999079334136, "learning_rate": 8.343939254017336e-06, "loss": 0.0133, "step": 35720 }, { "epoch": 16.66277985074627, "grad_norm": 0.32259557015356216, "learning_rate": 8.339389647396265e-06, "loss": 0.0135, "step": 35725 }, { "epoch": 16.66511194029851, "grad_norm": 0.2970452159110565, "learning_rate": 8.334842889774374e-06, "loss": 0.0127, "step": 35730 }, { "epoch": 16.667444029850746, "grad_norm": 0.33068685327059943, "learning_rate": 8.330298981827719e-06, "loss": 0.0136, "step": 35735 }, { "epoch": 16.669776119402986, "grad_norm": 0.3408502205945471, "learning_rate": 8.325757924231938e-06, "loss": 0.0131, "step": 35740 }, { "epoch": 16.672108208955223, "grad_norm": 0.3240455480186661, "learning_rate": 8.321219717662249e-06, "loss": 0.0129, "step": 35745 }, { "epoch": 16.674440298507463, "grad_norm": 0.3363134028317703, "learning_rate": 8.31668436279342e-06, "loss": 0.0134, "step": 35750 }, { "epoch": 16.676772388059703, "grad_norm": 0.327996312177629, "learning_rate": 8.312151860299835e-06, "loss": 0.0141, "step": 35755 }, { "epoch": 16.67910447761194, "grad_norm": 0.3296233299424349, "learning_rate": 8.307622210855425e-06, "loss": 0.0134, "step": 35760 }, { "epoch": 16.68143656716418, "grad_norm": 0.32554934843937705, "learning_rate": 8.303095415133703e-06, "loss": 0.0131, "step": 35765 }, { "epoch": 16.683768656716417, "grad_norm": 0.316974925239425, "learning_rate": 8.298571473807767e-06, "loss": 0.0131, "step": 35770 }, { "epoch": 16.686100746268657, "grad_norm": 0.29644398505696046, "learning_rate": 8.29405038755028e-06, "loss": 0.0129, "step": 35775 }, { "epoch": 16.688432835820894, "grad_norm": 0.3053523096101135, "learning_rate": 8.289532157033481e-06, "loss": 0.0131, "step": 35780 }, { "epoch": 16.690764925373134, "grad_norm": 0.3014615675552324, "learning_rate": 8.2850167829292e-06, "loss": 0.013, "step": 35785 }, { "epoch": 16.693097014925375, "grad_norm": 0.32233036197838805, "learning_rate": 8.28050426590881e-06, "loss": 0.0134, "step": 35790 }, { "epoch": 16.69542910447761, "grad_norm": 0.32431531772324185, "learning_rate": 8.2759946066433e-06, "loss": 0.0133, "step": 35795 }, { "epoch": 16.69776119402985, "grad_norm": 0.2710498435104712, "learning_rate": 8.271487805803193e-06, "loss": 0.0131, "step": 35800 }, { "epoch": 16.70009328358209, "grad_norm": 0.3151157755160211, "learning_rate": 8.26698386405863e-06, "loss": 0.0132, "step": 35805 }, { "epoch": 16.70242537313433, "grad_norm": 0.3096704117982414, "learning_rate": 8.262482782079281e-06, "loss": 0.0128, "step": 35810 }, { "epoch": 16.704757462686565, "grad_norm": 0.31348240956593554, "learning_rate": 8.25798456053443e-06, "loss": 0.0131, "step": 35815 }, { "epoch": 16.707089552238806, "grad_norm": 0.3189213074584037, "learning_rate": 8.253489200092912e-06, "loss": 0.013, "step": 35820 }, { "epoch": 16.709421641791046, "grad_norm": 0.307601865213875, "learning_rate": 8.248996701423141e-06, "loss": 0.0132, "step": 35825 }, { "epoch": 16.711753731343283, "grad_norm": 0.3382428464370193, "learning_rate": 8.244507065193117e-06, "loss": 0.0132, "step": 35830 }, { "epoch": 16.714085820895523, "grad_norm": 0.3269925363648751, "learning_rate": 8.240020292070408e-06, "loss": 0.013, "step": 35835 }, { "epoch": 16.71641791044776, "grad_norm": 0.2948298835402243, "learning_rate": 8.235536382722133e-06, "loss": 0.0133, "step": 35840 }, { "epoch": 16.71875, "grad_norm": 0.3125893142949479, "learning_rate": 8.231055337815039e-06, "loss": 0.0136, "step": 35845 }, { "epoch": 16.72108208955224, "grad_norm": 0.32416798052125895, "learning_rate": 8.226577158015383e-06, "loss": 0.0129, "step": 35850 }, { "epoch": 16.723414179104477, "grad_norm": 0.3289901941239085, "learning_rate": 8.22210184398905e-06, "loss": 0.0139, "step": 35855 }, { "epoch": 16.725746268656717, "grad_norm": 0.3159172080018601, "learning_rate": 8.217629396401465e-06, "loss": 0.0132, "step": 35860 }, { "epoch": 16.728078358208954, "grad_norm": 0.303146578989104, "learning_rate": 8.21315981591764e-06, "loss": 0.0131, "step": 35865 }, { "epoch": 16.730410447761194, "grad_norm": 0.31593582020944017, "learning_rate": 8.208693103202158e-06, "loss": 0.0133, "step": 35870 }, { "epoch": 16.732742537313435, "grad_norm": 0.31738613437630636, "learning_rate": 8.20422925891918e-06, "loss": 0.0131, "step": 35875 }, { "epoch": 16.73507462686567, "grad_norm": 0.3265117292834825, "learning_rate": 8.199768283732432e-06, "loss": 0.0135, "step": 35880 }, { "epoch": 16.73740671641791, "grad_norm": 0.3057453388121499, "learning_rate": 8.19531017830523e-06, "loss": 0.0133, "step": 35885 }, { "epoch": 16.73973880597015, "grad_norm": 0.30129445385257725, "learning_rate": 8.190854943300436e-06, "loss": 0.0139, "step": 35890 }, { "epoch": 16.74207089552239, "grad_norm": 0.3079704258145521, "learning_rate": 8.18640257938051e-06, "loss": 0.0132, "step": 35895 }, { "epoch": 16.744402985074625, "grad_norm": 0.31489055070528893, "learning_rate": 8.181953087207467e-06, "loss": 0.0132, "step": 35900 }, { "epoch": 16.746735074626866, "grad_norm": 0.32272343440612666, "learning_rate": 8.177506467442915e-06, "loss": 0.0137, "step": 35905 }, { "epoch": 16.749067164179106, "grad_norm": 0.32414533379304583, "learning_rate": 8.17306272074802e-06, "loss": 0.0136, "step": 35910 }, { "epoch": 16.751399253731343, "grad_norm": 0.3283453876675815, "learning_rate": 8.16862184778352e-06, "loss": 0.0133, "step": 35915 }, { "epoch": 16.753731343283583, "grad_norm": 0.32384437572946584, "learning_rate": 8.164183849209741e-06, "loss": 0.0134, "step": 35920 }, { "epoch": 16.75606343283582, "grad_norm": 0.3277302651355488, "learning_rate": 8.159748725686554e-06, "loss": 0.0131, "step": 35925 }, { "epoch": 16.75839552238806, "grad_norm": 0.31098611778873747, "learning_rate": 8.155316477873438e-06, "loss": 0.0133, "step": 35930 }, { "epoch": 16.760727611940297, "grad_norm": 0.33027889093282714, "learning_rate": 8.150887106429412e-06, "loss": 0.0132, "step": 35935 }, { "epoch": 16.763059701492537, "grad_norm": 0.33844920014935653, "learning_rate": 8.146460612013083e-06, "loss": 0.0133, "step": 35940 }, { "epoch": 16.765391791044777, "grad_norm": 0.30625274384545426, "learning_rate": 8.142036995282633e-06, "loss": 0.0129, "step": 35945 }, { "epoch": 16.767723880597014, "grad_norm": 0.31346587192508085, "learning_rate": 8.137616256895811e-06, "loss": 0.0131, "step": 35950 }, { "epoch": 16.770055970149254, "grad_norm": 0.31789168234515613, "learning_rate": 8.133198397509936e-06, "loss": 0.0131, "step": 35955 }, { "epoch": 16.77238805970149, "grad_norm": 0.3061315264685629, "learning_rate": 8.128783417781909e-06, "loss": 0.0132, "step": 35960 }, { "epoch": 16.77472014925373, "grad_norm": 0.32247168462920967, "learning_rate": 8.124371318368176e-06, "loss": 0.0135, "step": 35965 }, { "epoch": 16.77705223880597, "grad_norm": 0.3162351724906923, "learning_rate": 8.119962099924797e-06, "loss": 0.0135, "step": 35970 }, { "epoch": 16.77938432835821, "grad_norm": 0.32414212011187604, "learning_rate": 8.115555763107362e-06, "loss": 0.0134, "step": 35975 }, { "epoch": 16.78171641791045, "grad_norm": 0.3057061157019941, "learning_rate": 8.111152308571065e-06, "loss": 0.0128, "step": 35980 }, { "epoch": 16.784048507462686, "grad_norm": 0.3053551686545957, "learning_rate": 8.10675173697065e-06, "loss": 0.013, "step": 35985 }, { "epoch": 16.786380597014926, "grad_norm": 0.3239111622663403, "learning_rate": 8.10235404896044e-06, "loss": 0.0138, "step": 35990 }, { "epoch": 16.788712686567163, "grad_norm": 0.3151554786909021, "learning_rate": 8.097959245194333e-06, "loss": 0.0135, "step": 35995 }, { "epoch": 16.791044776119403, "grad_norm": 0.32280312161083796, "learning_rate": 8.09356732632579e-06, "loss": 0.013, "step": 36000 }, { "epoch": 16.793376865671643, "grad_norm": 0.3070421781946572, "learning_rate": 8.089178293007848e-06, "loss": 0.013, "step": 36005 }, { "epoch": 16.79570895522388, "grad_norm": 0.31645279269135085, "learning_rate": 8.084792145893122e-06, "loss": 0.0135, "step": 36010 }, { "epoch": 16.79804104477612, "grad_norm": 0.3144389006096287, "learning_rate": 8.08040888563377e-06, "loss": 0.0131, "step": 36015 }, { "epoch": 16.800373134328357, "grad_norm": 0.32167541785729564, "learning_rate": 8.07602851288157e-06, "loss": 0.0136, "step": 36020 }, { "epoch": 16.802705223880597, "grad_norm": 0.31582250244928917, "learning_rate": 8.07165102828781e-06, "loss": 0.0129, "step": 36025 }, { "epoch": 16.805037313432837, "grad_norm": 0.3088436755378812, "learning_rate": 8.067276432503406e-06, "loss": 0.0133, "step": 36030 }, { "epoch": 16.807369402985074, "grad_norm": 0.32879022105803163, "learning_rate": 8.062904726178806e-06, "loss": 0.0136, "step": 36035 }, { "epoch": 16.809701492537314, "grad_norm": 0.3279690368607014, "learning_rate": 8.058535909964041e-06, "loss": 0.0136, "step": 36040 }, { "epoch": 16.81203358208955, "grad_norm": 0.3356567933716082, "learning_rate": 8.054169984508714e-06, "loss": 0.0133, "step": 36045 }, { "epoch": 16.81436567164179, "grad_norm": 0.374616795082665, "learning_rate": 8.049806950461996e-06, "loss": 0.014, "step": 36050 }, { "epoch": 16.81669776119403, "grad_norm": 0.29997361278228174, "learning_rate": 8.045446808472628e-06, "loss": 0.0133, "step": 36055 }, { "epoch": 16.81902985074627, "grad_norm": 0.32135729973491306, "learning_rate": 8.041089559188929e-06, "loss": 0.0134, "step": 36060 }, { "epoch": 16.82136194029851, "grad_norm": 0.32821564412891235, "learning_rate": 8.036735203258766e-06, "loss": 0.0138, "step": 36065 }, { "epoch": 16.823694029850746, "grad_norm": 0.3286991704738585, "learning_rate": 8.032383741329598e-06, "loss": 0.0134, "step": 36070 }, { "epoch": 16.826026119402986, "grad_norm": 0.3222931218968911, "learning_rate": 8.028035174048446e-06, "loss": 0.0135, "step": 36075 }, { "epoch": 16.828358208955223, "grad_norm": 0.31153575800677585, "learning_rate": 8.023689502061897e-06, "loss": 0.0133, "step": 36080 }, { "epoch": 16.830690298507463, "grad_norm": 0.335774580581267, "learning_rate": 8.019346726016116e-06, "loss": 0.0134, "step": 36085 }, { "epoch": 16.833022388059703, "grad_norm": 0.29980994072106254, "learning_rate": 8.015006846556825e-06, "loss": 0.0133, "step": 36090 }, { "epoch": 16.83535447761194, "grad_norm": 0.3320998405833911, "learning_rate": 8.010669864329334e-06, "loss": 0.0144, "step": 36095 }, { "epoch": 16.83768656716418, "grad_norm": 0.2948029806646196, "learning_rate": 8.006335779978494e-06, "loss": 0.0129, "step": 36100 }, { "epoch": 16.840018656716417, "grad_norm": 0.32082171638097434, "learning_rate": 8.00200459414876e-06, "loss": 0.013, "step": 36105 }, { "epoch": 16.842350746268657, "grad_norm": 0.34201457293147536, "learning_rate": 7.997676307484123e-06, "loss": 0.0137, "step": 36110 }, { "epoch": 16.844682835820894, "grad_norm": 0.3443955001791909, "learning_rate": 7.993350920628164e-06, "loss": 0.0136, "step": 36115 }, { "epoch": 16.847014925373134, "grad_norm": 0.33176085849783826, "learning_rate": 7.989028434224028e-06, "loss": 0.0134, "step": 36120 }, { "epoch": 16.849347014925375, "grad_norm": 0.32095762724426047, "learning_rate": 7.984708848914426e-06, "loss": 0.0134, "step": 36125 }, { "epoch": 16.85167910447761, "grad_norm": 0.3141230552139176, "learning_rate": 7.980392165341636e-06, "loss": 0.0137, "step": 36130 }, { "epoch": 16.85401119402985, "grad_norm": 0.33492135647119337, "learning_rate": 7.976078384147515e-06, "loss": 0.0139, "step": 36135 }, { "epoch": 16.85634328358209, "grad_norm": 0.31817133374280043, "learning_rate": 7.971767505973468e-06, "loss": 0.0134, "step": 36140 }, { "epoch": 16.85867537313433, "grad_norm": 0.32344272590392964, "learning_rate": 7.9674595314605e-06, "loss": 0.013, "step": 36145 }, { "epoch": 16.861007462686565, "grad_norm": 0.32538076857110476, "learning_rate": 7.963154461249143e-06, "loss": 0.0137, "step": 36150 }, { "epoch": 16.863339552238806, "grad_norm": 0.3252583570966817, "learning_rate": 7.958852295979542e-06, "loss": 0.0135, "step": 36155 }, { "epoch": 16.865671641791046, "grad_norm": 0.32153323632790753, "learning_rate": 7.95455303629137e-06, "loss": 0.0134, "step": 36160 }, { "epoch": 16.868003731343283, "grad_norm": 0.31961912439558776, "learning_rate": 7.950256682823895e-06, "loss": 0.0139, "step": 36165 }, { "epoch": 16.870335820895523, "grad_norm": 0.3187609416497845, "learning_rate": 7.945963236215944e-06, "loss": 0.0132, "step": 36170 }, { "epoch": 16.87266791044776, "grad_norm": 0.3490921272661752, "learning_rate": 7.941672697105905e-06, "loss": 0.0139, "step": 36175 }, { "epoch": 16.875, "grad_norm": 0.33897054015814687, "learning_rate": 7.937385066131745e-06, "loss": 0.0131, "step": 36180 }, { "epoch": 16.87733208955224, "grad_norm": 0.29515578953764604, "learning_rate": 7.933100343930995e-06, "loss": 0.0133, "step": 36185 }, { "epoch": 16.879664179104477, "grad_norm": 0.3044540293507575, "learning_rate": 7.928818531140748e-06, "loss": 0.0131, "step": 36190 }, { "epoch": 16.881996268656717, "grad_norm": 0.28218078524159085, "learning_rate": 7.924539628397675e-06, "loss": 0.0134, "step": 36195 }, { "epoch": 16.884328358208954, "grad_norm": 0.30261565893516146, "learning_rate": 7.920263636337994e-06, "loss": 0.0135, "step": 36200 }, { "epoch": 16.886660447761194, "grad_norm": 0.3369662737804819, "learning_rate": 7.915990555597522e-06, "loss": 0.0136, "step": 36205 }, { "epoch": 16.888992537313435, "grad_norm": 0.30651456083354506, "learning_rate": 7.911720386811613e-06, "loss": 0.0134, "step": 36210 }, { "epoch": 16.89132462686567, "grad_norm": 0.3229727222452434, "learning_rate": 7.907453130615203e-06, "loss": 0.0138, "step": 36215 }, { "epoch": 16.89365671641791, "grad_norm": 0.34861528401762937, "learning_rate": 7.90318878764279e-06, "loss": 0.0138, "step": 36220 }, { "epoch": 16.89598880597015, "grad_norm": 0.3111473138272947, "learning_rate": 7.898927358528447e-06, "loss": 0.0136, "step": 36225 }, { "epoch": 16.89832089552239, "grad_norm": 0.32564111806944585, "learning_rate": 7.894668843905803e-06, "loss": 0.0138, "step": 36230 }, { "epoch": 16.900652985074625, "grad_norm": 0.33676367770382115, "learning_rate": 7.890413244408059e-06, "loss": 0.0137, "step": 36235 }, { "epoch": 16.902985074626866, "grad_norm": 0.3210581934740294, "learning_rate": 7.886160560667984e-06, "loss": 0.0136, "step": 36240 }, { "epoch": 16.905317164179106, "grad_norm": 0.3307502591184439, "learning_rate": 7.881910793317915e-06, "loss": 0.0136, "step": 36245 }, { "epoch": 16.907649253731343, "grad_norm": 0.3354976443972757, "learning_rate": 7.87766394298974e-06, "loss": 0.0133, "step": 36250 }, { "epoch": 16.909981343283583, "grad_norm": 0.32854668834156325, "learning_rate": 7.873420010314933e-06, "loss": 0.0132, "step": 36255 }, { "epoch": 16.91231343283582, "grad_norm": 0.3293992837497759, "learning_rate": 7.869178995924525e-06, "loss": 0.0135, "step": 36260 }, { "epoch": 16.91464552238806, "grad_norm": 0.33237634279148026, "learning_rate": 7.864940900449109e-06, "loss": 0.0137, "step": 36265 }, { "epoch": 16.916977611940297, "grad_norm": 0.32376597876663243, "learning_rate": 7.860705724518857e-06, "loss": 0.0138, "step": 36270 }, { "epoch": 16.919309701492537, "grad_norm": 0.31989100315717706, "learning_rate": 7.8564734687635e-06, "loss": 0.0137, "step": 36275 }, { "epoch": 16.921641791044777, "grad_norm": 0.3148940834479812, "learning_rate": 7.852244133812332e-06, "loss": 0.0134, "step": 36280 }, { "epoch": 16.923973880597014, "grad_norm": 0.3291795728174081, "learning_rate": 7.8480177202942e-06, "loss": 0.0135, "step": 36285 }, { "epoch": 16.926305970149254, "grad_norm": 0.3144041285833692, "learning_rate": 7.843794228837556e-06, "loss": 0.0136, "step": 36290 }, { "epoch": 16.92863805970149, "grad_norm": 0.3055018766020447, "learning_rate": 7.839573660070373e-06, "loss": 0.0137, "step": 36295 }, { "epoch": 16.93097014925373, "grad_norm": 0.32579177610793536, "learning_rate": 7.83535601462022e-06, "loss": 0.0137, "step": 36300 }, { "epoch": 16.93330223880597, "grad_norm": 0.33250185425447093, "learning_rate": 7.831141293114216e-06, "loss": 0.0135, "step": 36305 }, { "epoch": 16.93563432835821, "grad_norm": 0.29780342846640356, "learning_rate": 7.82692949617905e-06, "loss": 0.0134, "step": 36310 }, { "epoch": 16.93796641791045, "grad_norm": 0.32578560915921945, "learning_rate": 7.822720624440978e-06, "loss": 0.0135, "step": 36315 }, { "epoch": 16.940298507462686, "grad_norm": 0.3414043756073633, "learning_rate": 7.818514678525822e-06, "loss": 0.0139, "step": 36320 }, { "epoch": 16.942630597014926, "grad_norm": 0.33942040634703574, "learning_rate": 7.814311659058951e-06, "loss": 0.0133, "step": 36325 }, { "epoch": 16.944962686567163, "grad_norm": 0.320984713732921, "learning_rate": 7.810111566665333e-06, "loss": 0.0135, "step": 36330 }, { "epoch": 16.947294776119403, "grad_norm": 0.3471207625276748, "learning_rate": 7.805914401969466e-06, "loss": 0.0135, "step": 36335 }, { "epoch": 16.949626865671643, "grad_norm": 0.3355118970546133, "learning_rate": 7.80172016559544e-06, "loss": 0.0137, "step": 36340 }, { "epoch": 16.95195895522388, "grad_norm": 0.3503644170489977, "learning_rate": 7.797528858166891e-06, "loss": 0.0138, "step": 36345 }, { "epoch": 16.95429104477612, "grad_norm": 0.3256089327889439, "learning_rate": 7.793340480307027e-06, "loss": 0.0138, "step": 36350 }, { "epoch": 16.956623134328357, "grad_norm": 0.33366187030452027, "learning_rate": 7.789155032638619e-06, "loss": 0.0136, "step": 36355 }, { "epoch": 16.958955223880597, "grad_norm": 0.3297853634243778, "learning_rate": 7.784972515784004e-06, "loss": 0.0138, "step": 36360 }, { "epoch": 16.961287313432837, "grad_norm": 0.3265168399789917, "learning_rate": 7.780792930365085e-06, "loss": 0.0134, "step": 36365 }, { "epoch": 16.963619402985074, "grad_norm": 0.3191495529012563, "learning_rate": 7.776616277003328e-06, "loss": 0.0137, "step": 36370 }, { "epoch": 16.965951492537314, "grad_norm": 0.3290540108343719, "learning_rate": 7.772442556319747e-06, "loss": 0.0141, "step": 36375 }, { "epoch": 16.96828358208955, "grad_norm": 0.3128645650583725, "learning_rate": 7.768271768934955e-06, "loss": 0.0135, "step": 36380 }, { "epoch": 16.97061567164179, "grad_norm": 0.3291218182961878, "learning_rate": 7.76410391546909e-06, "loss": 0.0135, "step": 36385 }, { "epoch": 16.97294776119403, "grad_norm": 0.32292582749556753, "learning_rate": 7.759938996541886e-06, "loss": 0.0136, "step": 36390 }, { "epoch": 16.97527985074627, "grad_norm": 0.3144880238888245, "learning_rate": 7.755777012772615e-06, "loss": 0.0127, "step": 36395 }, { "epoch": 16.97761194029851, "grad_norm": 0.3069397278040189, "learning_rate": 7.751617964780131e-06, "loss": 0.0134, "step": 36400 }, { "epoch": 16.979944029850746, "grad_norm": 0.33630378204551165, "learning_rate": 7.747461853182842e-06, "loss": 0.0141, "step": 36405 }, { "epoch": 16.982276119402986, "grad_norm": 0.329234127056889, "learning_rate": 7.743308678598722e-06, "loss": 0.0136, "step": 36410 }, { "epoch": 16.984608208955223, "grad_norm": 0.31728337616583907, "learning_rate": 7.73915844164531e-06, "loss": 0.0134, "step": 36415 }, { "epoch": 16.986940298507463, "grad_norm": 0.32783237043367597, "learning_rate": 7.73501114293971e-06, "loss": 0.0138, "step": 36420 }, { "epoch": 16.989272388059703, "grad_norm": 0.32254741448006985, "learning_rate": 7.730866783098576e-06, "loss": 0.014, "step": 36425 }, { "epoch": 16.99160447761194, "grad_norm": 0.32317598019524696, "learning_rate": 7.726725362738141e-06, "loss": 0.0134, "step": 36430 }, { "epoch": 16.99393656716418, "grad_norm": 0.3217081080711385, "learning_rate": 7.722586882474191e-06, "loss": 0.0136, "step": 36435 }, { "epoch": 16.996268656716417, "grad_norm": 0.32198223056692526, "learning_rate": 7.71845134292208e-06, "loss": 0.0134, "step": 36440 }, { "epoch": 16.998600746268657, "grad_norm": 0.3016259397028913, "learning_rate": 7.714318744696728e-06, "loss": 0.0133, "step": 36445 }, { "epoch": 17.000932835820894, "grad_norm": 0.19322308586138415, "learning_rate": 7.710189088412604e-06, "loss": 0.0115, "step": 36450 }, { "epoch": 17.003264925373134, "grad_norm": 0.22967791136901042, "learning_rate": 7.706062374683757e-06, "loss": 0.0081, "step": 36455 }, { "epoch": 17.005597014925375, "grad_norm": 0.23480039850083448, "learning_rate": 7.70193860412378e-06, "loss": 0.0081, "step": 36460 }, { "epoch": 17.00792910447761, "grad_norm": 0.22687456813917936, "learning_rate": 7.697817777345852e-06, "loss": 0.008, "step": 36465 }, { "epoch": 17.01026119402985, "grad_norm": 0.21080611078778963, "learning_rate": 7.693699894962686e-06, "loss": 0.008, "step": 36470 }, { "epoch": 17.01259328358209, "grad_norm": 0.2351115096528289, "learning_rate": 7.689584957586578e-06, "loss": 0.0079, "step": 36475 }, { "epoch": 17.01492537313433, "grad_norm": 0.22657266021918981, "learning_rate": 7.68547296582938e-06, "loss": 0.0083, "step": 36480 }, { "epoch": 17.01725746268657, "grad_norm": 0.21291926628353317, "learning_rate": 7.681363920302506e-06, "loss": 0.0077, "step": 36485 }, { "epoch": 17.019589552238806, "grad_norm": 0.2583570747988229, "learning_rate": 7.67725782161693e-06, "loss": 0.0076, "step": 36490 }, { "epoch": 17.021921641791046, "grad_norm": 0.22729668402536013, "learning_rate": 7.673154670383195e-06, "loss": 0.0077, "step": 36495 }, { "epoch": 17.024253731343283, "grad_norm": 0.21267899234614357, "learning_rate": 7.669054467211388e-06, "loss": 0.0078, "step": 36500 }, { "epoch": 17.026585820895523, "grad_norm": 0.231445034931361, "learning_rate": 7.664957212711187e-06, "loss": 0.0078, "step": 36505 }, { "epoch": 17.02891791044776, "grad_norm": 0.221791870217178, "learning_rate": 7.660862907491795e-06, "loss": 0.0079, "step": 36510 }, { "epoch": 17.03125, "grad_norm": 0.22180606537847117, "learning_rate": 7.656771552162015e-06, "loss": 0.0075, "step": 36515 }, { "epoch": 17.03358208955224, "grad_norm": 0.23201268600797711, "learning_rate": 7.652683147330177e-06, "loss": 0.0082, "step": 36520 }, { "epoch": 17.035914179104477, "grad_norm": 0.22217866705008138, "learning_rate": 7.6485976936042e-06, "loss": 0.0076, "step": 36525 }, { "epoch": 17.038246268656717, "grad_norm": 0.21529572438735955, "learning_rate": 7.644515191591542e-06, "loss": 0.0079, "step": 36530 }, { "epoch": 17.040578358208954, "grad_norm": 0.23258567584607645, "learning_rate": 7.640435641899236e-06, "loss": 0.0079, "step": 36535 }, { "epoch": 17.042910447761194, "grad_norm": 0.22051277739311614, "learning_rate": 7.636359045133873e-06, "loss": 0.0079, "step": 36540 }, { "epoch": 17.04524253731343, "grad_norm": 0.2282743184087084, "learning_rate": 7.632285401901606e-06, "loss": 0.0078, "step": 36545 }, { "epoch": 17.04757462686567, "grad_norm": 0.24168368761358347, "learning_rate": 7.6282147128081364e-06, "loss": 0.0081, "step": 36550 }, { "epoch": 17.04990671641791, "grad_norm": 0.2270721200791876, "learning_rate": 7.624146978458754e-06, "loss": 0.0076, "step": 36555 }, { "epoch": 17.05223880597015, "grad_norm": 0.2289287086504526, "learning_rate": 7.620082199458269e-06, "loss": 0.0078, "step": 36560 }, { "epoch": 17.05457089552239, "grad_norm": 0.21104224908335703, "learning_rate": 7.616020376411098e-06, "loss": 0.0077, "step": 36565 }, { "epoch": 17.056902985074625, "grad_norm": 0.21260172916390258, "learning_rate": 7.611961509921182e-06, "loss": 0.0076, "step": 36570 }, { "epoch": 17.059235074626866, "grad_norm": 0.21489401120534285, "learning_rate": 7.6079056005920375e-06, "loss": 0.0073, "step": 36575 }, { "epoch": 17.061567164179106, "grad_norm": 0.22121634115897132, "learning_rate": 7.603852649026738e-06, "loss": 0.0075, "step": 36580 }, { "epoch": 17.063899253731343, "grad_norm": 0.19381459147094118, "learning_rate": 7.599802655827924e-06, "loss": 0.0075, "step": 36585 }, { "epoch": 17.066231343283583, "grad_norm": 0.23363359961081726, "learning_rate": 7.595755621597788e-06, "loss": 0.0076, "step": 36590 }, { "epoch": 17.06856343283582, "grad_norm": 0.19941383309006436, "learning_rate": 7.591711546938086e-06, "loss": 0.0073, "step": 36595 }, { "epoch": 17.07089552238806, "grad_norm": 0.2121930802924916, "learning_rate": 7.587670432450131e-06, "loss": 0.0075, "step": 36600 }, { "epoch": 17.073227611940297, "grad_norm": 0.2082317099330928, "learning_rate": 7.583632278734798e-06, "loss": 0.0077, "step": 36605 }, { "epoch": 17.075559701492537, "grad_norm": 0.21996487955300814, "learning_rate": 7.57959708639252e-06, "loss": 0.0074, "step": 36610 }, { "epoch": 17.077891791044777, "grad_norm": 0.2245093913215368, "learning_rate": 7.575564856023298e-06, "loss": 0.0077, "step": 36615 }, { "epoch": 17.080223880597014, "grad_norm": 0.1876100204311609, "learning_rate": 7.5715355882266815e-06, "loss": 0.0077, "step": 36620 }, { "epoch": 17.082555970149254, "grad_norm": 0.21384716850783447, "learning_rate": 7.567509283601784e-06, "loss": 0.0073, "step": 36625 }, { "epoch": 17.08488805970149, "grad_norm": 0.21971156370709363, "learning_rate": 7.5634859427472835e-06, "loss": 0.0078, "step": 36630 }, { "epoch": 17.08722014925373, "grad_norm": 0.21708431709496792, "learning_rate": 7.5594655662613995e-06, "loss": 0.0074, "step": 36635 }, { "epoch": 17.08955223880597, "grad_norm": 0.22057298040031217, "learning_rate": 7.5554481547419395e-06, "loss": 0.0074, "step": 36640 }, { "epoch": 17.09188432835821, "grad_norm": 0.21596042426647932, "learning_rate": 7.551433708786243e-06, "loss": 0.0074, "step": 36645 }, { "epoch": 17.09421641791045, "grad_norm": 0.2159188234101408, "learning_rate": 7.547422228991223e-06, "loss": 0.007, "step": 36650 }, { "epoch": 17.096548507462686, "grad_norm": 0.22783662624083661, "learning_rate": 7.543413715953347e-06, "loss": 0.0076, "step": 36655 }, { "epoch": 17.098880597014926, "grad_norm": 0.2256303166097536, "learning_rate": 7.539408170268644e-06, "loss": 0.0075, "step": 36660 }, { "epoch": 17.101212686567163, "grad_norm": 0.23263570994087573, "learning_rate": 7.535405592532703e-06, "loss": 0.0078, "step": 36665 }, { "epoch": 17.103544776119403, "grad_norm": 0.209769862735918, "learning_rate": 7.531405983340668e-06, "loss": 0.0071, "step": 36670 }, { "epoch": 17.105876865671643, "grad_norm": 0.2167826030917257, "learning_rate": 7.527409343287231e-06, "loss": 0.0075, "step": 36675 }, { "epoch": 17.10820895522388, "grad_norm": 0.21236706568016903, "learning_rate": 7.523415672966675e-06, "loss": 0.0074, "step": 36680 }, { "epoch": 17.11054104477612, "grad_norm": 0.2070860712284132, "learning_rate": 7.519424972972797e-06, "loss": 0.0074, "step": 36685 }, { "epoch": 17.112873134328357, "grad_norm": 0.22078110178843838, "learning_rate": 7.515437243898998e-06, "loss": 0.0076, "step": 36690 }, { "epoch": 17.115205223880597, "grad_norm": 0.2125187509718531, "learning_rate": 7.511452486338202e-06, "loss": 0.0076, "step": 36695 }, { "epoch": 17.117537313432837, "grad_norm": 0.20388698532326122, "learning_rate": 7.507470700882905e-06, "loss": 0.0073, "step": 36700 }, { "epoch": 17.119869402985074, "grad_norm": 0.2314656406253905, "learning_rate": 7.503491888125165e-06, "loss": 0.0077, "step": 36705 }, { "epoch": 17.122201492537314, "grad_norm": 0.21491809484540955, "learning_rate": 7.499516048656589e-06, "loss": 0.0074, "step": 36710 }, { "epoch": 17.12453358208955, "grad_norm": 0.21285455270310305, "learning_rate": 7.495543183068349e-06, "loss": 0.0075, "step": 36715 }, { "epoch": 17.12686567164179, "grad_norm": 0.21474201763288017, "learning_rate": 7.491573291951176e-06, "loss": 0.0074, "step": 36720 }, { "epoch": 17.12919776119403, "grad_norm": 0.23175300958262743, "learning_rate": 7.487606375895343e-06, "loss": 0.0077, "step": 36725 }, { "epoch": 17.13152985074627, "grad_norm": 0.20762786577442616, "learning_rate": 7.483642435490706e-06, "loss": 0.0075, "step": 36730 }, { "epoch": 17.13386194029851, "grad_norm": 0.23597242226178625, "learning_rate": 7.479681471326648e-06, "loss": 0.0076, "step": 36735 }, { "epoch": 17.136194029850746, "grad_norm": 0.18954837128202986, "learning_rate": 7.475723483992149e-06, "loss": 0.0073, "step": 36740 }, { "epoch": 17.138526119402986, "grad_norm": 0.20696526156545073, "learning_rate": 7.471768474075706e-06, "loss": 0.0072, "step": 36745 }, { "epoch": 17.140858208955223, "grad_norm": 0.2132141134324873, "learning_rate": 7.467816442165397e-06, "loss": 0.0073, "step": 36750 }, { "epoch": 17.143190298507463, "grad_norm": 0.23201048280753345, "learning_rate": 7.463867388848851e-06, "loss": 0.0075, "step": 36755 }, { "epoch": 17.145522388059703, "grad_norm": 0.21826321240999608, "learning_rate": 7.459921314713253e-06, "loss": 0.0074, "step": 36760 }, { "epoch": 17.14785447761194, "grad_norm": 0.22494759749100765, "learning_rate": 7.4559782203453485e-06, "loss": 0.0074, "step": 36765 }, { "epoch": 17.15018656716418, "grad_norm": 0.1960035209572958, "learning_rate": 7.452038106331442e-06, "loss": 0.0072, "step": 36770 }, { "epoch": 17.152518656716417, "grad_norm": 0.23403944881992897, "learning_rate": 7.448100973257381e-06, "loss": 0.0075, "step": 36775 }, { "epoch": 17.154850746268657, "grad_norm": 0.21064129187641664, "learning_rate": 7.444166821708584e-06, "loss": 0.0075, "step": 36780 }, { "epoch": 17.157182835820894, "grad_norm": 0.20271588595263232, "learning_rate": 7.440235652270024e-06, "loss": 0.0076, "step": 36785 }, { "epoch": 17.159514925373134, "grad_norm": 0.21856074814909504, "learning_rate": 7.436307465526224e-06, "loss": 0.0077, "step": 36790 }, { "epoch": 17.161847014925375, "grad_norm": 0.2181248999743933, "learning_rate": 7.432382262061271e-06, "loss": 0.0075, "step": 36795 }, { "epoch": 17.16417910447761, "grad_norm": 0.21379116809593737, "learning_rate": 7.4284600424588045e-06, "loss": 0.0076, "step": 36800 }, { "epoch": 17.16651119402985, "grad_norm": 0.21794985773917128, "learning_rate": 7.424540807302019e-06, "loss": 0.0074, "step": 36805 }, { "epoch": 17.16884328358209, "grad_norm": 0.1996533886453613, "learning_rate": 7.42062455717367e-06, "loss": 0.0074, "step": 36810 }, { "epoch": 17.17117537313433, "grad_norm": 0.20980457550251247, "learning_rate": 7.4167112926560714e-06, "loss": 0.0073, "step": 36815 }, { "epoch": 17.17350746268657, "grad_norm": 0.21135477684781298, "learning_rate": 7.412801014331075e-06, "loss": 0.0074, "step": 36820 }, { "epoch": 17.175839552238806, "grad_norm": 0.24804988825493146, "learning_rate": 7.408893722780108e-06, "loss": 0.0077, "step": 36825 }, { "epoch": 17.178171641791046, "grad_norm": 0.20850638293860435, "learning_rate": 7.4049894185841476e-06, "loss": 0.0076, "step": 36830 }, { "epoch": 17.180503731343283, "grad_norm": 0.21738437161027074, "learning_rate": 7.401088102323729e-06, "loss": 0.0073, "step": 36835 }, { "epoch": 17.182835820895523, "grad_norm": 0.22689581895403305, "learning_rate": 7.397189774578939e-06, "loss": 0.0074, "step": 36840 }, { "epoch": 17.18516791044776, "grad_norm": 0.23043894294946235, "learning_rate": 7.393294435929424e-06, "loss": 0.0077, "step": 36845 }, { "epoch": 17.1875, "grad_norm": 0.21666159128106732, "learning_rate": 7.389402086954368e-06, "loss": 0.0072, "step": 36850 }, { "epoch": 17.18983208955224, "grad_norm": 0.2326948937850058, "learning_rate": 7.385512728232552e-06, "loss": 0.0074, "step": 36855 }, { "epoch": 17.192164179104477, "grad_norm": 0.21557493250619678, "learning_rate": 7.38162636034226e-06, "loss": 0.0076, "step": 36860 }, { "epoch": 17.194496268656717, "grad_norm": 0.19941638780778245, "learning_rate": 7.37774298386138e-06, "loss": 0.0073, "step": 36865 }, { "epoch": 17.196828358208954, "grad_norm": 0.21801576753689503, "learning_rate": 7.373862599367316e-06, "loss": 0.0077, "step": 36870 }, { "epoch": 17.199160447761194, "grad_norm": 0.22117681951643542, "learning_rate": 7.3699852074370605e-06, "loss": 0.0078, "step": 36875 }, { "epoch": 17.20149253731343, "grad_norm": 0.20654565639172204, "learning_rate": 7.366110808647128e-06, "loss": 0.0076, "step": 36880 }, { "epoch": 17.20382462686567, "grad_norm": 0.22080486858889067, "learning_rate": 7.362239403573614e-06, "loss": 0.0074, "step": 36885 }, { "epoch": 17.20615671641791, "grad_norm": 0.19685160091800166, "learning_rate": 7.3583709927921574e-06, "loss": 0.0075, "step": 36890 }, { "epoch": 17.20848880597015, "grad_norm": 0.22018308574968, "learning_rate": 7.3545055768779526e-06, "loss": 0.0075, "step": 36895 }, { "epoch": 17.21082089552239, "grad_norm": 0.23332216238123205, "learning_rate": 7.350643156405751e-06, "loss": 0.0076, "step": 36900 }, { "epoch": 17.213152985074625, "grad_norm": 0.2073976116270177, "learning_rate": 7.346783731949864e-06, "loss": 0.0077, "step": 36905 }, { "epoch": 17.215485074626866, "grad_norm": 0.21843952767457878, "learning_rate": 7.342927304084132e-06, "loss": 0.0074, "step": 36910 }, { "epoch": 17.217817164179106, "grad_norm": 0.20352423454699464, "learning_rate": 7.339073873381991e-06, "loss": 0.0076, "step": 36915 }, { "epoch": 17.220149253731343, "grad_norm": 0.19960860228231259, "learning_rate": 7.335223440416391e-06, "loss": 0.0076, "step": 36920 }, { "epoch": 17.222481343283583, "grad_norm": 0.22493624750402885, "learning_rate": 7.3313760057598715e-06, "loss": 0.0079, "step": 36925 }, { "epoch": 17.22481343283582, "grad_norm": 0.2230845073132302, "learning_rate": 7.327531569984497e-06, "loss": 0.0076, "step": 36930 }, { "epoch": 17.22714552238806, "grad_norm": 0.21035613645469872, "learning_rate": 7.3236901336619024e-06, "loss": 0.0075, "step": 36935 }, { "epoch": 17.229477611940297, "grad_norm": 0.22648569796984824, "learning_rate": 7.319851697363271e-06, "loss": 0.0075, "step": 36940 }, { "epoch": 17.231809701492537, "grad_norm": 0.24041069767384715, "learning_rate": 7.316016261659342e-06, "loss": 0.008, "step": 36945 }, { "epoch": 17.234141791044777, "grad_norm": 0.20637543276296377, "learning_rate": 7.31218382712041e-06, "loss": 0.0072, "step": 36950 }, { "epoch": 17.236473880597014, "grad_norm": 0.22987622935303156, "learning_rate": 7.308354394316322e-06, "loss": 0.0078, "step": 36955 }, { "epoch": 17.238805970149254, "grad_norm": 0.21913354340066304, "learning_rate": 7.304527963816472e-06, "loss": 0.0076, "step": 36960 }, { "epoch": 17.24113805970149, "grad_norm": 0.22699518755839765, "learning_rate": 7.300704536189819e-06, "loss": 0.0078, "step": 36965 }, { "epoch": 17.24347014925373, "grad_norm": 0.22275296268643968, "learning_rate": 7.2968841120048666e-06, "loss": 0.0078, "step": 36970 }, { "epoch": 17.24580223880597, "grad_norm": 0.21683043300960156, "learning_rate": 7.293066691829676e-06, "loss": 0.0075, "step": 36975 }, { "epoch": 17.24813432835821, "grad_norm": 0.2151285979505528, "learning_rate": 7.289252276231863e-06, "loss": 0.0076, "step": 36980 }, { "epoch": 17.25046641791045, "grad_norm": 0.20771704915096734, "learning_rate": 7.285440865778594e-06, "loss": 0.0073, "step": 36985 }, { "epoch": 17.252798507462686, "grad_norm": 0.2298779917555245, "learning_rate": 7.281632461036594e-06, "loss": 0.008, "step": 36990 }, { "epoch": 17.255130597014926, "grad_norm": 0.21879890228772608, "learning_rate": 7.277827062572121e-06, "loss": 0.0074, "step": 36995 }, { "epoch": 17.257462686567163, "grad_norm": 0.22102508747772476, "learning_rate": 7.27402467095102e-06, "loss": 0.0072, "step": 37000 }, { "epoch": 17.259794776119403, "grad_norm": 0.239399028854807, "learning_rate": 7.2702252867386575e-06, "loss": 0.008, "step": 37005 }, { "epoch": 17.262126865671643, "grad_norm": 0.2358995596063326, "learning_rate": 7.266428910499971e-06, "loss": 0.0075, "step": 37010 }, { "epoch": 17.26445895522388, "grad_norm": 0.22853853559528203, "learning_rate": 7.262635542799444e-06, "loss": 0.0079, "step": 37015 }, { "epoch": 17.26679104477612, "grad_norm": 0.2068820082220521, "learning_rate": 7.258845184201111e-06, "loss": 0.0079, "step": 37020 }, { "epoch": 17.269123134328357, "grad_norm": 0.218156285332045, "learning_rate": 7.255057835268567e-06, "loss": 0.0078, "step": 37025 }, { "epoch": 17.271455223880597, "grad_norm": 0.21131139287372233, "learning_rate": 7.251273496564957e-06, "loss": 0.0078, "step": 37030 }, { "epoch": 17.273787313432837, "grad_norm": 0.2189639316842703, "learning_rate": 7.2474921686529625e-06, "loss": 0.0074, "step": 37035 }, { "epoch": 17.276119402985074, "grad_norm": 0.2158789943447355, "learning_rate": 7.243713852094848e-06, "loss": 0.0076, "step": 37040 }, { "epoch": 17.278451492537314, "grad_norm": 0.21537183792601788, "learning_rate": 7.239938547452394e-06, "loss": 0.0074, "step": 37045 }, { "epoch": 17.28078358208955, "grad_norm": 0.2273674650769446, "learning_rate": 7.2361662552869734e-06, "loss": 0.0076, "step": 37050 }, { "epoch": 17.28311567164179, "grad_norm": 0.20374168772151047, "learning_rate": 7.232396976159475e-06, "loss": 0.0076, "step": 37055 }, { "epoch": 17.28544776119403, "grad_norm": 0.2025232240978587, "learning_rate": 7.228630710630356e-06, "loss": 0.0073, "step": 37060 }, { "epoch": 17.28777985074627, "grad_norm": 0.23954844211099563, "learning_rate": 7.224867459259628e-06, "loss": 0.0073, "step": 37065 }, { "epoch": 17.29011194029851, "grad_norm": 0.24299703159924582, "learning_rate": 7.221107222606851e-06, "loss": 0.0078, "step": 37070 }, { "epoch": 17.292444029850746, "grad_norm": 0.2363113592183746, "learning_rate": 7.217350001231131e-06, "loss": 0.0077, "step": 37075 }, { "epoch": 17.294776119402986, "grad_norm": 0.21063772103850928, "learning_rate": 7.21359579569114e-06, "loss": 0.0077, "step": 37080 }, { "epoch": 17.297108208955223, "grad_norm": 0.2310635683048598, "learning_rate": 7.2098446065450795e-06, "loss": 0.0075, "step": 37085 }, { "epoch": 17.299440298507463, "grad_norm": 0.2248394005061046, "learning_rate": 7.206096434350728e-06, "loss": 0.008, "step": 37090 }, { "epoch": 17.301772388059703, "grad_norm": 0.23595747384812388, "learning_rate": 7.202351279665391e-06, "loss": 0.0076, "step": 37095 }, { "epoch": 17.30410447761194, "grad_norm": 0.22651102296454687, "learning_rate": 7.198609143045948e-06, "loss": 0.0081, "step": 37100 }, { "epoch": 17.30643656716418, "grad_norm": 0.23059003930458466, "learning_rate": 7.194870025048812e-06, "loss": 0.0076, "step": 37105 }, { "epoch": 17.308768656716417, "grad_norm": 0.21191969438921684, "learning_rate": 7.191133926229957e-06, "loss": 0.0079, "step": 37110 }, { "epoch": 17.311100746268657, "grad_norm": 0.2272098536861033, "learning_rate": 7.187400847144904e-06, "loss": 0.0077, "step": 37115 }, { "epoch": 17.313432835820894, "grad_norm": 0.23437949935988817, "learning_rate": 7.183670788348726e-06, "loss": 0.0075, "step": 37120 }, { "epoch": 17.315764925373134, "grad_norm": 0.22264597263164526, "learning_rate": 7.1799437503960465e-06, "loss": 0.0079, "step": 37125 }, { "epoch": 17.318097014925375, "grad_norm": 0.2289207840708099, "learning_rate": 7.176219733841047e-06, "loss": 0.0077, "step": 37130 }, { "epoch": 17.32042910447761, "grad_norm": 0.24120814909862007, "learning_rate": 7.17249873923744e-06, "loss": 0.0077, "step": 37135 }, { "epoch": 17.32276119402985, "grad_norm": 0.2296291499262523, "learning_rate": 7.168780767138512e-06, "loss": 0.0077, "step": 37140 }, { "epoch": 17.32509328358209, "grad_norm": 0.21137174022866342, "learning_rate": 7.165065818097086e-06, "loss": 0.0076, "step": 37145 }, { "epoch": 17.32742537313433, "grad_norm": 0.23168714701466894, "learning_rate": 7.161353892665538e-06, "loss": 0.0077, "step": 37150 }, { "epoch": 17.329757462686565, "grad_norm": 0.24356013469528232, "learning_rate": 7.157644991395801e-06, "loss": 0.0075, "step": 37155 }, { "epoch": 17.332089552238806, "grad_norm": 0.21820626816807873, "learning_rate": 7.1539391148393474e-06, "loss": 0.0074, "step": 37160 }, { "epoch": 17.334421641791046, "grad_norm": 0.23630067480947387, "learning_rate": 7.1502362635472135e-06, "loss": 0.008, "step": 37165 }, { "epoch": 17.336753731343283, "grad_norm": 0.2133015612638126, "learning_rate": 7.146536438069963e-06, "loss": 0.0078, "step": 37170 }, { "epoch": 17.339085820895523, "grad_norm": 0.22618855137235327, "learning_rate": 7.142839638957743e-06, "loss": 0.0073, "step": 37175 }, { "epoch": 17.34141791044776, "grad_norm": 0.2258861842404235, "learning_rate": 7.139145866760217e-06, "loss": 0.0077, "step": 37180 }, { "epoch": 17.34375, "grad_norm": 0.22546052292883226, "learning_rate": 7.1354551220266216e-06, "loss": 0.0084, "step": 37185 }, { "epoch": 17.34608208955224, "grad_norm": 0.24023772518826414, "learning_rate": 7.1317674053057335e-06, "loss": 0.0076, "step": 37190 }, { "epoch": 17.348414179104477, "grad_norm": 0.2329229917484231, "learning_rate": 7.128082717145881e-06, "loss": 0.0077, "step": 37195 }, { "epoch": 17.350746268656717, "grad_norm": 0.24321362042918226, "learning_rate": 7.124401058094938e-06, "loss": 0.0077, "step": 37200 }, { "epoch": 17.353078358208954, "grad_norm": 0.21921529177810695, "learning_rate": 7.120722428700342e-06, "loss": 0.0076, "step": 37205 }, { "epoch": 17.355410447761194, "grad_norm": 0.22774684177077945, "learning_rate": 7.117046829509057e-06, "loss": 0.0077, "step": 37210 }, { "epoch": 17.357742537313435, "grad_norm": 0.24737041850615799, "learning_rate": 7.1133742610676196e-06, "loss": 0.008, "step": 37215 }, { "epoch": 17.36007462686567, "grad_norm": 0.21097252540976694, "learning_rate": 7.109704723922094e-06, "loss": 0.0077, "step": 37220 }, { "epoch": 17.36240671641791, "grad_norm": 0.23636467455440915, "learning_rate": 7.106038218618125e-06, "loss": 0.0077, "step": 37225 }, { "epoch": 17.36473880597015, "grad_norm": 0.2599504130356415, "learning_rate": 7.102374745700866e-06, "loss": 0.008, "step": 37230 }, { "epoch": 17.36707089552239, "grad_norm": 0.2213487107296093, "learning_rate": 7.098714305715051e-06, "loss": 0.0077, "step": 37235 }, { "epoch": 17.369402985074625, "grad_norm": 0.22520333120065475, "learning_rate": 7.0950568992049494e-06, "loss": 0.0079, "step": 37240 }, { "epoch": 17.371735074626866, "grad_norm": 0.24120558676538603, "learning_rate": 7.091402526714383e-06, "loss": 0.0081, "step": 37245 }, { "epoch": 17.374067164179106, "grad_norm": 0.23998700235399173, "learning_rate": 7.087751188786723e-06, "loss": 0.0076, "step": 37250 }, { "epoch": 17.376399253731343, "grad_norm": 0.23289337722938158, "learning_rate": 7.084102885964892e-06, "loss": 0.0076, "step": 37255 }, { "epoch": 17.378731343283583, "grad_norm": 0.22402290996802612, "learning_rate": 7.080457618791344e-06, "loss": 0.0077, "step": 37260 }, { "epoch": 17.38106343283582, "grad_norm": 0.23405176389887547, "learning_rate": 7.076815387808115e-06, "loss": 0.0078, "step": 37265 }, { "epoch": 17.38339552238806, "grad_norm": 0.22463317901155735, "learning_rate": 7.0731761935567495e-06, "loss": 0.0081, "step": 37270 }, { "epoch": 17.385727611940297, "grad_norm": 0.2315011164321993, "learning_rate": 7.0695400365783784e-06, "loss": 0.0075, "step": 37275 }, { "epoch": 17.388059701492537, "grad_norm": 0.23928164518204512, "learning_rate": 7.0659069174136544e-06, "loss": 0.0077, "step": 37280 }, { "epoch": 17.390391791044777, "grad_norm": 0.211755119491256, "learning_rate": 7.062276836602786e-06, "loss": 0.0078, "step": 37285 }, { "epoch": 17.392723880597014, "grad_norm": 0.2180523853640815, "learning_rate": 7.058649794685537e-06, "loss": 0.008, "step": 37290 }, { "epoch": 17.395055970149254, "grad_norm": 0.23081970990497724, "learning_rate": 7.055025792201212e-06, "loss": 0.0078, "step": 37295 }, { "epoch": 17.39738805970149, "grad_norm": 0.23822908696599415, "learning_rate": 7.051404829688663e-06, "loss": 0.0079, "step": 37300 }, { "epoch": 17.39972014925373, "grad_norm": 0.22186326069694048, "learning_rate": 7.047786907686296e-06, "loss": 0.0077, "step": 37305 }, { "epoch": 17.40205223880597, "grad_norm": 0.2330635683986178, "learning_rate": 7.044172026732059e-06, "loss": 0.0077, "step": 37310 }, { "epoch": 17.40438432835821, "grad_norm": 0.2386882043849179, "learning_rate": 7.040560187363447e-06, "loss": 0.0077, "step": 37315 }, { "epoch": 17.40671641791045, "grad_norm": 0.22799040459904954, "learning_rate": 7.036951390117512e-06, "loss": 0.0075, "step": 37320 }, { "epoch": 17.409048507462686, "grad_norm": 0.23326015578465764, "learning_rate": 7.033345635530844e-06, "loss": 0.0077, "step": 37325 }, { "epoch": 17.411380597014926, "grad_norm": 0.2139186071233181, "learning_rate": 7.029742924139586e-06, "loss": 0.0077, "step": 37330 }, { "epoch": 17.413712686567163, "grad_norm": 0.25647429914304115, "learning_rate": 7.0261432564794255e-06, "loss": 0.008, "step": 37335 }, { "epoch": 17.416044776119403, "grad_norm": 0.22563592538177166, "learning_rate": 7.022546633085604e-06, "loss": 0.0078, "step": 37340 }, { "epoch": 17.418376865671643, "grad_norm": 0.23551366679834262, "learning_rate": 7.018953054492889e-06, "loss": 0.0078, "step": 37345 }, { "epoch": 17.42070895522388, "grad_norm": 0.23561438655714662, "learning_rate": 7.015362521235632e-06, "loss": 0.0076, "step": 37350 }, { "epoch": 17.42304104477612, "grad_norm": 0.24331854310005163, "learning_rate": 7.011775033847698e-06, "loss": 0.0081, "step": 37355 }, { "epoch": 17.425373134328357, "grad_norm": 0.23332483126946008, "learning_rate": 7.008190592862514e-06, "loss": 0.0076, "step": 37360 }, { "epoch": 17.427705223880597, "grad_norm": 0.20136064574310472, "learning_rate": 7.004609198813053e-06, "loss": 0.0078, "step": 37365 }, { "epoch": 17.430037313432837, "grad_norm": 0.25725056131465224, "learning_rate": 7.0010308522318355e-06, "loss": 0.008, "step": 37370 }, { "epoch": 17.432369402985074, "grad_norm": 0.22750693296887792, "learning_rate": 6.997455553650924e-06, "loss": 0.0077, "step": 37375 }, { "epoch": 17.434701492537314, "grad_norm": 0.23473683991226948, "learning_rate": 6.9938833036019365e-06, "loss": 0.0079, "step": 37380 }, { "epoch": 17.43703358208955, "grad_norm": 0.25001263418579106, "learning_rate": 6.990314102616022e-06, "loss": 0.0079, "step": 37385 }, { "epoch": 17.43936567164179, "grad_norm": 0.2207172605321858, "learning_rate": 6.9867479512239e-06, "loss": 0.0079, "step": 37390 }, { "epoch": 17.44169776119403, "grad_norm": 0.24033693882181906, "learning_rate": 6.98318484995581e-06, "loss": 0.0083, "step": 37395 }, { "epoch": 17.44402985074627, "grad_norm": 0.24582869372608876, "learning_rate": 6.979624799341565e-06, "loss": 0.0076, "step": 37400 }, { "epoch": 17.44636194029851, "grad_norm": 0.24305498869548545, "learning_rate": 6.976067799910499e-06, "loss": 0.008, "step": 37405 }, { "epoch": 17.448694029850746, "grad_norm": 0.2514830546319348, "learning_rate": 6.972513852191508e-06, "loss": 0.0078, "step": 37410 }, { "epoch": 17.451026119402986, "grad_norm": 0.24317282947095117, "learning_rate": 6.968962956713028e-06, "loss": 0.0081, "step": 37415 }, { "epoch": 17.453358208955223, "grad_norm": 0.2305213964339523, "learning_rate": 6.965415114003046e-06, "loss": 0.0081, "step": 37420 }, { "epoch": 17.455690298507463, "grad_norm": 0.22921010694809626, "learning_rate": 6.96187032458909e-06, "loss": 0.0078, "step": 37425 }, { "epoch": 17.458022388059703, "grad_norm": 0.2446941953627658, "learning_rate": 6.958328588998242e-06, "loss": 0.0083, "step": 37430 }, { "epoch": 17.46035447761194, "grad_norm": 0.24630845617129443, "learning_rate": 6.954789907757112e-06, "loss": 0.0085, "step": 37435 }, { "epoch": 17.46268656716418, "grad_norm": 0.216719730654196, "learning_rate": 6.951254281391881e-06, "loss": 0.008, "step": 37440 }, { "epoch": 17.465018656716417, "grad_norm": 0.2287665532880995, "learning_rate": 6.947721710428251e-06, "loss": 0.0077, "step": 37445 }, { "epoch": 17.467350746268657, "grad_norm": 0.2438992379866128, "learning_rate": 6.944192195391494e-06, "loss": 0.008, "step": 37450 }, { "epoch": 17.469682835820894, "grad_norm": 0.23780474210092512, "learning_rate": 6.9406657368064055e-06, "loss": 0.0079, "step": 37455 }, { "epoch": 17.472014925373134, "grad_norm": 0.24278590723873752, "learning_rate": 6.937142335197338e-06, "loss": 0.0079, "step": 37460 }, { "epoch": 17.474347014925375, "grad_norm": 0.24029583690572884, "learning_rate": 6.93362199108819e-06, "loss": 0.0079, "step": 37465 }, { "epoch": 17.47667910447761, "grad_norm": 0.27411303667604936, "learning_rate": 6.930104705002403e-06, "loss": 0.0082, "step": 37470 }, { "epoch": 17.47901119402985, "grad_norm": 0.2512370136571627, "learning_rate": 6.9265904774629585e-06, "loss": 0.0082, "step": 37475 }, { "epoch": 17.48134328358209, "grad_norm": 0.23522017723755687, "learning_rate": 6.9230793089924005e-06, "loss": 0.0079, "step": 37480 }, { "epoch": 17.48367537313433, "grad_norm": 0.23211897581691904, "learning_rate": 6.919571200112787e-06, "loss": 0.0081, "step": 37485 }, { "epoch": 17.486007462686565, "grad_norm": 0.23152029740429778, "learning_rate": 6.916066151345761e-06, "loss": 0.008, "step": 37490 }, { "epoch": 17.488339552238806, "grad_norm": 0.23745459370769356, "learning_rate": 6.912564163212476e-06, "loss": 0.0078, "step": 37495 }, { "epoch": 17.490671641791046, "grad_norm": 0.2891942860082839, "learning_rate": 6.909065236233644e-06, "loss": 0.008, "step": 37500 }, { "epoch": 17.493003731343283, "grad_norm": 0.2521163880682321, "learning_rate": 6.90556937092953e-06, "loss": 0.008, "step": 37505 }, { "epoch": 17.495335820895523, "grad_norm": 0.2322832921315333, "learning_rate": 6.90207656781993e-06, "loss": 0.0081, "step": 37510 }, { "epoch": 17.49766791044776, "grad_norm": 0.2590028157472264, "learning_rate": 6.898586827424194e-06, "loss": 0.0079, "step": 37515 }, { "epoch": 17.5, "grad_norm": 0.23298544137787822, "learning_rate": 6.8951001502612065e-06, "loss": 0.0078, "step": 37520 }, { "epoch": 17.50233208955224, "grad_norm": 0.2399622498540717, "learning_rate": 6.891616536849416e-06, "loss": 0.0077, "step": 37525 }, { "epoch": 17.504664179104477, "grad_norm": 0.2687128558914863, "learning_rate": 6.888135987706787e-06, "loss": 0.0081, "step": 37530 }, { "epoch": 17.506996268656717, "grad_norm": 0.2467180113941704, "learning_rate": 6.884658503350851e-06, "loss": 0.0082, "step": 37535 }, { "epoch": 17.509328358208954, "grad_norm": 0.2499937629639681, "learning_rate": 6.881184084298675e-06, "loss": 0.008, "step": 37540 }, { "epoch": 17.511660447761194, "grad_norm": 0.2605782751446763, "learning_rate": 6.877712731066875e-06, "loss": 0.0082, "step": 37545 }, { "epoch": 17.513992537313435, "grad_norm": 0.25570104679560385, "learning_rate": 6.874244444171607e-06, "loss": 0.008, "step": 37550 }, { "epoch": 17.51632462686567, "grad_norm": 0.23096710251636654, "learning_rate": 6.870779224128571e-06, "loss": 0.0082, "step": 37555 }, { "epoch": 17.51865671641791, "grad_norm": 0.23937710127563333, "learning_rate": 6.867317071453007e-06, "loss": 0.0079, "step": 37560 }, { "epoch": 17.52098880597015, "grad_norm": 0.2361306337604918, "learning_rate": 6.863857986659716e-06, "loss": 0.0076, "step": 37565 }, { "epoch": 17.52332089552239, "grad_norm": 0.25063718061563495, "learning_rate": 6.860401970263017e-06, "loss": 0.0082, "step": 37570 }, { "epoch": 17.525652985074625, "grad_norm": 0.2468969197648419, "learning_rate": 6.856949022776798e-06, "loss": 0.0081, "step": 37575 }, { "epoch": 17.527985074626866, "grad_norm": 0.2478286123427031, "learning_rate": 6.8534991447144706e-06, "loss": 0.0081, "step": 37580 }, { "epoch": 17.530317164179106, "grad_norm": 0.23767855322910336, "learning_rate": 6.850052336589008e-06, "loss": 0.0079, "step": 37585 }, { "epoch": 17.532649253731343, "grad_norm": 0.2712014301390395, "learning_rate": 6.8466085989129066e-06, "loss": 0.0078, "step": 37590 }, { "epoch": 17.534981343283583, "grad_norm": 0.2444301555447247, "learning_rate": 6.843167932198227e-06, "loss": 0.0084, "step": 37595 }, { "epoch": 17.53731343283582, "grad_norm": 0.2512448971251524, "learning_rate": 6.839730336956554e-06, "loss": 0.0082, "step": 37600 }, { "epoch": 17.53964552238806, "grad_norm": 0.23427535084960358, "learning_rate": 6.836295813699031e-06, "loss": 0.0079, "step": 37605 }, { "epoch": 17.541977611940297, "grad_norm": 0.22189034079771677, "learning_rate": 6.83286436293634e-06, "loss": 0.0079, "step": 37610 }, { "epoch": 17.544309701492537, "grad_norm": 0.26123035597459193, "learning_rate": 6.829435985178708e-06, "loss": 0.0083, "step": 37615 }, { "epoch": 17.546641791044777, "grad_norm": 0.24028784539475143, "learning_rate": 6.826010680935886e-06, "loss": 0.0081, "step": 37620 }, { "epoch": 17.548973880597014, "grad_norm": 0.24840665116751817, "learning_rate": 6.8225884507172005e-06, "loss": 0.0082, "step": 37625 }, { "epoch": 17.551305970149254, "grad_norm": 0.2636352216947196, "learning_rate": 6.819169295031493e-06, "loss": 0.0083, "step": 37630 }, { "epoch": 17.55363805970149, "grad_norm": 0.24805547220828847, "learning_rate": 6.815753214387172e-06, "loss": 0.0081, "step": 37635 }, { "epoch": 17.55597014925373, "grad_norm": 0.2431389923421396, "learning_rate": 6.812340209292164e-06, "loss": 0.0083, "step": 37640 }, { "epoch": 17.55830223880597, "grad_norm": 0.24878078777277066, "learning_rate": 6.808930280253956e-06, "loss": 0.0085, "step": 37645 }, { "epoch": 17.56063432835821, "grad_norm": 0.25504438191576595, "learning_rate": 6.80552342777957e-06, "loss": 0.0083, "step": 37650 }, { "epoch": 17.56296641791045, "grad_norm": 0.2586061316755469, "learning_rate": 6.80211965237557e-06, "loss": 0.0083, "step": 37655 }, { "epoch": 17.565298507462686, "grad_norm": 0.23893441334523532, "learning_rate": 6.79871895454807e-06, "loss": 0.0081, "step": 37660 }, { "epoch": 17.567630597014926, "grad_norm": 0.24050291709573599, "learning_rate": 6.7953213348027235e-06, "loss": 0.0081, "step": 37665 }, { "epoch": 17.569962686567163, "grad_norm": 0.24495037724571342, "learning_rate": 6.791926793644713e-06, "loss": 0.0082, "step": 37670 }, { "epoch": 17.572294776119403, "grad_norm": 0.24192283046780308, "learning_rate": 6.7885353315787825e-06, "loss": 0.0081, "step": 37675 }, { "epoch": 17.574626865671643, "grad_norm": 0.2541663238860533, "learning_rate": 6.785146949109206e-06, "loss": 0.008, "step": 37680 }, { "epoch": 17.57695895522388, "grad_norm": 0.24239514516217922, "learning_rate": 6.781761646739805e-06, "loss": 0.0081, "step": 37685 }, { "epoch": 17.57929104477612, "grad_norm": 0.24854612238582888, "learning_rate": 6.778379424973943e-06, "loss": 0.0081, "step": 37690 }, { "epoch": 17.581623134328357, "grad_norm": 0.23630881173318946, "learning_rate": 6.775000284314523e-06, "loss": 0.0079, "step": 37695 }, { "epoch": 17.583955223880597, "grad_norm": 0.23642949934472135, "learning_rate": 6.771624225263994e-06, "loss": 0.0082, "step": 37700 }, { "epoch": 17.586287313432837, "grad_norm": 0.2570026020012314, "learning_rate": 6.768251248324333e-06, "loss": 0.0084, "step": 37705 }, { "epoch": 17.588619402985074, "grad_norm": 0.23397782030882439, "learning_rate": 6.764881353997082e-06, "loss": 0.0082, "step": 37710 }, { "epoch": 17.590951492537314, "grad_norm": 0.2538908395450224, "learning_rate": 6.761514542783308e-06, "loss": 0.0085, "step": 37715 }, { "epoch": 17.59328358208955, "grad_norm": 0.2693766285559697, "learning_rate": 6.758150815183618e-06, "loss": 0.0082, "step": 37720 }, { "epoch": 17.59561567164179, "grad_norm": 0.24380919710054536, "learning_rate": 6.7547901716981704e-06, "loss": 0.0082, "step": 37725 }, { "epoch": 17.59794776119403, "grad_norm": 0.2473781759187102, "learning_rate": 6.751432612826664e-06, "loss": 0.0085, "step": 37730 }, { "epoch": 17.60027985074627, "grad_norm": 0.24971579618557244, "learning_rate": 6.748078139068327e-06, "loss": 0.0081, "step": 37735 }, { "epoch": 17.60261194029851, "grad_norm": 0.29697418308110346, "learning_rate": 6.7447267509219494e-06, "loss": 0.0083, "step": 37740 }, { "epoch": 17.604944029850746, "grad_norm": 0.2204558280379004, "learning_rate": 6.741378448885838e-06, "loss": 0.0082, "step": 37745 }, { "epoch": 17.607276119402986, "grad_norm": 0.23523388952751825, "learning_rate": 6.738033233457863e-06, "loss": 0.0083, "step": 37750 }, { "epoch": 17.609608208955223, "grad_norm": 0.2614150396953813, "learning_rate": 6.734691105135417e-06, "loss": 0.0083, "step": 37755 }, { "epoch": 17.611940298507463, "grad_norm": 0.24934836453819928, "learning_rate": 6.7313520644154555e-06, "loss": 0.0084, "step": 37760 }, { "epoch": 17.614272388059703, "grad_norm": 0.23964796735357938, "learning_rate": 6.7280161117944495e-06, "loss": 0.0084, "step": 37765 }, { "epoch": 17.61660447761194, "grad_norm": 0.23918739155171373, "learning_rate": 6.724683247768427e-06, "loss": 0.0083, "step": 37770 }, { "epoch": 17.61893656716418, "grad_norm": 0.2561157110264968, "learning_rate": 6.721353472832953e-06, "loss": 0.0081, "step": 37775 }, { "epoch": 17.621268656716417, "grad_norm": 0.24546443549217975, "learning_rate": 6.718026787483131e-06, "loss": 0.0083, "step": 37780 }, { "epoch": 17.623600746268657, "grad_norm": 0.23540315523189612, "learning_rate": 6.714703192213614e-06, "loss": 0.008, "step": 37785 }, { "epoch": 17.625932835820894, "grad_norm": 0.27245154552744966, "learning_rate": 6.7113826875185885e-06, "loss": 0.0083, "step": 37790 }, { "epoch": 17.628264925373134, "grad_norm": 0.2653880970933636, "learning_rate": 6.7080652738917655e-06, "loss": 0.0082, "step": 37795 }, { "epoch": 17.630597014925375, "grad_norm": 0.23353981783651, "learning_rate": 6.704750951826438e-06, "loss": 0.0081, "step": 37800 }, { "epoch": 17.63292910447761, "grad_norm": 0.23012963824566063, "learning_rate": 6.701439721815391e-06, "loss": 0.0079, "step": 37805 }, { "epoch": 17.63526119402985, "grad_norm": 0.23596518616386705, "learning_rate": 6.698131584350989e-06, "loss": 0.0086, "step": 37810 }, { "epoch": 17.63759328358209, "grad_norm": 0.2462821486480908, "learning_rate": 6.69482653992511e-06, "loss": 0.0084, "step": 37815 }, { "epoch": 17.63992537313433, "grad_norm": 0.23320388499801442, "learning_rate": 6.691524589029188e-06, "loss": 0.0084, "step": 37820 }, { "epoch": 17.642257462686565, "grad_norm": 0.2761581375810806, "learning_rate": 6.688225732154189e-06, "loss": 0.0084, "step": 37825 }, { "epoch": 17.644589552238806, "grad_norm": 0.2647233634526641, "learning_rate": 6.684929969790622e-06, "loss": 0.0082, "step": 37830 }, { "epoch": 17.646921641791046, "grad_norm": 0.22427806711818163, "learning_rate": 6.6816373024285365e-06, "loss": 0.0086, "step": 37835 }, { "epoch": 17.649253731343283, "grad_norm": 0.2201130603957964, "learning_rate": 6.6783477305575215e-06, "loss": 0.0082, "step": 37840 }, { "epoch": 17.651585820895523, "grad_norm": 0.2502947146472413, "learning_rate": 6.675061254666702e-06, "loss": 0.0082, "step": 37845 }, { "epoch": 17.65391791044776, "grad_norm": 0.2492034986761709, "learning_rate": 6.671777875244745e-06, "loss": 0.0082, "step": 37850 }, { "epoch": 17.65625, "grad_norm": 0.2517096636945324, "learning_rate": 6.668497592779857e-06, "loss": 0.0085, "step": 37855 }, { "epoch": 17.65858208955224, "grad_norm": 0.25401658018930234, "learning_rate": 6.665220407759788e-06, "loss": 0.0079, "step": 37860 }, { "epoch": 17.660914179104477, "grad_norm": 0.25004669628361553, "learning_rate": 6.661946320671822e-06, "loss": 0.0083, "step": 37865 }, { "epoch": 17.663246268656717, "grad_norm": 0.23963523870653788, "learning_rate": 6.658675332002787e-06, "loss": 0.0084, "step": 37870 }, { "epoch": 17.665578358208954, "grad_norm": 0.24589206749525663, "learning_rate": 6.655407442239047e-06, "loss": 0.0082, "step": 37875 }, { "epoch": 17.667910447761194, "grad_norm": 0.24626363006977384, "learning_rate": 6.652142651866497e-06, "loss": 0.0084, "step": 37880 }, { "epoch": 17.670242537313435, "grad_norm": 0.2604946346758195, "learning_rate": 6.648880961370593e-06, "loss": 0.0084, "step": 37885 }, { "epoch": 17.67257462686567, "grad_norm": 0.25528934755113236, "learning_rate": 6.645622371236314e-06, "loss": 0.0083, "step": 37890 }, { "epoch": 17.67490671641791, "grad_norm": 0.2776983003755349, "learning_rate": 6.642366881948173e-06, "loss": 0.0081, "step": 37895 }, { "epoch": 17.67723880597015, "grad_norm": 0.23708955950082616, "learning_rate": 6.639114493990238e-06, "loss": 0.0079, "step": 37900 }, { "epoch": 17.67957089552239, "grad_norm": 0.2735216482469529, "learning_rate": 6.635865207846106e-06, "loss": 0.0084, "step": 37905 }, { "epoch": 17.681902985074625, "grad_norm": 0.2495738953460516, "learning_rate": 6.6326190239989135e-06, "loss": 0.0082, "step": 37910 }, { "epoch": 17.684235074626866, "grad_norm": 0.2537193952409857, "learning_rate": 6.629375942931345e-06, "loss": 0.0081, "step": 37915 }, { "epoch": 17.686567164179106, "grad_norm": 0.21726732827897147, "learning_rate": 6.626135965125597e-06, "loss": 0.0084, "step": 37920 }, { "epoch": 17.688899253731343, "grad_norm": 0.2560859693276377, "learning_rate": 6.622899091063442e-06, "loss": 0.0086, "step": 37925 }, { "epoch": 17.691231343283583, "grad_norm": 0.2422261406822412, "learning_rate": 6.61966532122616e-06, "loss": 0.0082, "step": 37930 }, { "epoch": 17.69356343283582, "grad_norm": 0.2635714003440469, "learning_rate": 6.6164346560945935e-06, "loss": 0.0084, "step": 37935 }, { "epoch": 17.69589552238806, "grad_norm": 0.26884614597367557, "learning_rate": 6.613207096149099e-06, "loss": 0.0087, "step": 37940 }, { "epoch": 17.698227611940297, "grad_norm": 0.24554427686208286, "learning_rate": 6.609982641869591e-06, "loss": 0.008, "step": 37945 }, { "epoch": 17.700559701492537, "grad_norm": 0.2580928542522933, "learning_rate": 6.606761293735513e-06, "loss": 0.0083, "step": 37950 }, { "epoch": 17.702891791044777, "grad_norm": 0.2579438170322897, "learning_rate": 6.6035430522258455e-06, "loss": 0.0086, "step": 37955 }, { "epoch": 17.705223880597014, "grad_norm": 0.25466829351907083, "learning_rate": 6.600327917819114e-06, "loss": 0.0083, "step": 37960 }, { "epoch": 17.707555970149254, "grad_norm": 0.24603105564103006, "learning_rate": 6.597115890993383e-06, "loss": 0.0083, "step": 37965 }, { "epoch": 17.70988805970149, "grad_norm": 0.271246883191794, "learning_rate": 6.593906972226238e-06, "loss": 0.0084, "step": 37970 }, { "epoch": 17.71222014925373, "grad_norm": 0.23006116182299025, "learning_rate": 6.590701161994828e-06, "loss": 0.0083, "step": 37975 }, { "epoch": 17.71455223880597, "grad_norm": 0.24079416608658133, "learning_rate": 6.587498460775811e-06, "loss": 0.0084, "step": 37980 }, { "epoch": 17.71688432835821, "grad_norm": 0.25846188495969535, "learning_rate": 6.5842988690454135e-06, "loss": 0.0083, "step": 37985 }, { "epoch": 17.71921641791045, "grad_norm": 0.25875044137349634, "learning_rate": 6.581102387279374e-06, "loss": 0.008, "step": 37990 }, { "epoch": 17.721548507462686, "grad_norm": 0.25854735448055777, "learning_rate": 6.577909015952982e-06, "loss": 0.0085, "step": 37995 }, { "epoch": 17.723880597014926, "grad_norm": 0.2667954365728728, "learning_rate": 6.574718755541061e-06, "loss": 0.0084, "step": 38000 }, { "epoch": 17.726212686567163, "grad_norm": 0.2663535993694204, "learning_rate": 6.571531606517972e-06, "loss": 0.0085, "step": 38005 }, { "epoch": 17.728544776119403, "grad_norm": 0.25647913337626727, "learning_rate": 6.568347569357611e-06, "loss": 0.0083, "step": 38010 }, { "epoch": 17.730876865671643, "grad_norm": 0.26651644308123495, "learning_rate": 6.565166644533424e-06, "loss": 0.009, "step": 38015 }, { "epoch": 17.73320895522388, "grad_norm": 0.2764007832120244, "learning_rate": 6.561988832518367e-06, "loss": 0.0083, "step": 38020 }, { "epoch": 17.73554104477612, "grad_norm": 0.25158247443332354, "learning_rate": 6.558814133784966e-06, "loss": 0.0085, "step": 38025 }, { "epoch": 17.737873134328357, "grad_norm": 0.25161437745072557, "learning_rate": 6.555642548805262e-06, "loss": 0.0083, "step": 38030 }, { "epoch": 17.740205223880597, "grad_norm": 0.2615799544509756, "learning_rate": 6.552474078050835e-06, "loss": 0.0087, "step": 38035 }, { "epoch": 17.742537313432837, "grad_norm": 0.2572348017488883, "learning_rate": 6.5493087219928114e-06, "loss": 0.0083, "step": 38040 }, { "epoch": 17.744869402985074, "grad_norm": 0.258094773442168, "learning_rate": 6.54614648110185e-06, "loss": 0.0086, "step": 38045 }, { "epoch": 17.747201492537314, "grad_norm": 0.24921760838203075, "learning_rate": 6.542987355848144e-06, "loss": 0.0083, "step": 38050 }, { "epoch": 17.74953358208955, "grad_norm": 0.25532673762207164, "learning_rate": 6.539831346701426e-06, "loss": 0.0084, "step": 38055 }, { "epoch": 17.75186567164179, "grad_norm": 0.283989369738389, "learning_rate": 6.536678454130965e-06, "loss": 0.0086, "step": 38060 }, { "epoch": 17.75419776119403, "grad_norm": 0.22321501884900719, "learning_rate": 6.53352867860556e-06, "loss": 0.0084, "step": 38065 }, { "epoch": 17.75652985074627, "grad_norm": 0.24385650011560972, "learning_rate": 6.530382020593559e-06, "loss": 0.0081, "step": 38070 }, { "epoch": 17.75886194029851, "grad_norm": 0.23548171719361366, "learning_rate": 6.527238480562838e-06, "loss": 0.0082, "step": 38075 }, { "epoch": 17.761194029850746, "grad_norm": 0.2620554421604714, "learning_rate": 6.52409805898081e-06, "loss": 0.0086, "step": 38080 }, { "epoch": 17.763526119402986, "grad_norm": 0.24024765320056155, "learning_rate": 6.520960756314427e-06, "loss": 0.0082, "step": 38085 }, { "epoch": 17.765858208955223, "grad_norm": 0.2597695260324674, "learning_rate": 6.517826573030178e-06, "loss": 0.0084, "step": 38090 }, { "epoch": 17.768190298507463, "grad_norm": 0.27057647195311235, "learning_rate": 6.514695509594076e-06, "loss": 0.0086, "step": 38095 }, { "epoch": 17.770522388059703, "grad_norm": 0.266955622737018, "learning_rate": 6.511567566471697e-06, "loss": 0.0083, "step": 38100 }, { "epoch": 17.77285447761194, "grad_norm": 0.2654695562961714, "learning_rate": 6.50844274412812e-06, "loss": 0.0083, "step": 38105 }, { "epoch": 17.77518656716418, "grad_norm": 0.27682371386083615, "learning_rate": 6.50532104302799e-06, "loss": 0.0086, "step": 38110 }, { "epoch": 17.777518656716417, "grad_norm": 0.26024021479528736, "learning_rate": 6.5022024636354605e-06, "loss": 0.0087, "step": 38115 }, { "epoch": 17.779850746268657, "grad_norm": 0.2360091809949543, "learning_rate": 6.499087006414245e-06, "loss": 0.0083, "step": 38120 }, { "epoch": 17.782182835820894, "grad_norm": 0.25716023489190687, "learning_rate": 6.495974671827574e-06, "loss": 0.0087, "step": 38125 }, { "epoch": 17.784514925373134, "grad_norm": 0.24443456835839103, "learning_rate": 6.492865460338228e-06, "loss": 0.0086, "step": 38130 }, { "epoch": 17.786847014925375, "grad_norm": 0.24668160837773534, "learning_rate": 6.489759372408514e-06, "loss": 0.0083, "step": 38135 }, { "epoch": 17.78917910447761, "grad_norm": 0.25187968622513907, "learning_rate": 6.4866564085002826e-06, "loss": 0.0086, "step": 38140 }, { "epoch": 17.79151119402985, "grad_norm": 0.2527132574017141, "learning_rate": 6.483556569074904e-06, "loss": 0.0083, "step": 38145 }, { "epoch": 17.79384328358209, "grad_norm": 0.24508892377783426, "learning_rate": 6.480459854593305e-06, "loss": 0.0084, "step": 38150 }, { "epoch": 17.79617537313433, "grad_norm": 0.24514020235876272, "learning_rate": 6.477366265515931e-06, "loss": 0.0087, "step": 38155 }, { "epoch": 17.798507462686565, "grad_norm": 0.2728655597633974, "learning_rate": 6.474275802302776e-06, "loss": 0.0084, "step": 38160 }, { "epoch": 17.800839552238806, "grad_norm": 0.2945608956252544, "learning_rate": 6.471188465413355e-06, "loss": 0.0087, "step": 38165 }, { "epoch": 17.803171641791046, "grad_norm": 0.27318245777570566, "learning_rate": 6.468104255306728e-06, "loss": 0.0085, "step": 38170 }, { "epoch": 17.805503731343283, "grad_norm": 0.2685331083200039, "learning_rate": 6.465023172441489e-06, "loss": 0.0086, "step": 38175 }, { "epoch": 17.807835820895523, "grad_norm": 0.26791311384900374, "learning_rate": 6.461945217275761e-06, "loss": 0.0086, "step": 38180 }, { "epoch": 17.81016791044776, "grad_norm": 0.25664303043307635, "learning_rate": 6.458870390267213e-06, "loss": 0.0086, "step": 38185 }, { "epoch": 17.8125, "grad_norm": 0.23699280477966592, "learning_rate": 6.455798691873042e-06, "loss": 0.0088, "step": 38190 }, { "epoch": 17.81483208955224, "grad_norm": 0.28209699573635355, "learning_rate": 6.45273012254997e-06, "loss": 0.0087, "step": 38195 }, { "epoch": 17.817164179104477, "grad_norm": 0.24334080851733678, "learning_rate": 6.449664682754278e-06, "loss": 0.0086, "step": 38200 }, { "epoch": 17.819496268656717, "grad_norm": 0.2792620641726979, "learning_rate": 6.446602372941756e-06, "loss": 0.0085, "step": 38205 }, { "epoch": 17.821828358208954, "grad_norm": 0.28796500794627083, "learning_rate": 6.443543193567745e-06, "loss": 0.0086, "step": 38210 }, { "epoch": 17.824160447761194, "grad_norm": 0.2770674580995448, "learning_rate": 6.440487145087116e-06, "loss": 0.0088, "step": 38215 }, { "epoch": 17.826492537313435, "grad_norm": 0.23877775249623115, "learning_rate": 6.4374342279542726e-06, "loss": 0.0087, "step": 38220 }, { "epoch": 17.82882462686567, "grad_norm": 0.24627638632200846, "learning_rate": 6.434384442623156e-06, "loss": 0.0088, "step": 38225 }, { "epoch": 17.83115671641791, "grad_norm": 0.2679725617527583, "learning_rate": 6.431337789547239e-06, "loss": 0.0086, "step": 38230 }, { "epoch": 17.83348880597015, "grad_norm": 0.27249659522366493, "learning_rate": 6.428294269179531e-06, "loss": 0.0087, "step": 38235 }, { "epoch": 17.83582089552239, "grad_norm": 0.2662640310072346, "learning_rate": 6.425253881972573e-06, "loss": 0.0086, "step": 38240 }, { "epoch": 17.838152985074625, "grad_norm": 0.2407190024452978, "learning_rate": 6.4222166283784415e-06, "loss": 0.0085, "step": 38245 }, { "epoch": 17.840485074626866, "grad_norm": 0.2693366427546327, "learning_rate": 6.419182508848745e-06, "loss": 0.0084, "step": 38250 }, { "epoch": 17.842817164179106, "grad_norm": 0.27260736403413127, "learning_rate": 6.416151523834632e-06, "loss": 0.0087, "step": 38255 }, { "epoch": 17.845149253731343, "grad_norm": 0.25927603533130117, "learning_rate": 6.4131236737867795e-06, "loss": 0.0083, "step": 38260 }, { "epoch": 17.847481343283583, "grad_norm": 0.26420391070071964, "learning_rate": 6.4100989591554026e-06, "loss": 0.0089, "step": 38265 }, { "epoch": 17.84981343283582, "grad_norm": 0.27387243432183794, "learning_rate": 6.407077380390236e-06, "loss": 0.0089, "step": 38270 }, { "epoch": 17.85214552238806, "grad_norm": 0.2456682623440745, "learning_rate": 6.4040589379405765e-06, "loss": 0.0084, "step": 38275 }, { "epoch": 17.854477611940297, "grad_norm": 0.26137148681527966, "learning_rate": 6.4010436322552204e-06, "loss": 0.0086, "step": 38280 }, { "epoch": 17.856809701492537, "grad_norm": 0.2670846042329129, "learning_rate": 6.39803146378253e-06, "loss": 0.0087, "step": 38285 }, { "epoch": 17.859141791044777, "grad_norm": 0.246447916208616, "learning_rate": 6.395022432970375e-06, "loss": 0.0085, "step": 38290 }, { "epoch": 17.861473880597014, "grad_norm": 0.26568867289446324, "learning_rate": 6.392016540266179e-06, "loss": 0.0087, "step": 38295 }, { "epoch": 17.863805970149254, "grad_norm": 0.24236323873174515, "learning_rate": 6.389013786116878e-06, "loss": 0.0085, "step": 38300 }, { "epoch": 17.86613805970149, "grad_norm": 0.26851083692913297, "learning_rate": 6.3860141709689615e-06, "loss": 0.0084, "step": 38305 }, { "epoch": 17.86847014925373, "grad_norm": 0.2642310292156638, "learning_rate": 6.383017695268441e-06, "loss": 0.0088, "step": 38310 }, { "epoch": 17.87080223880597, "grad_norm": 0.24352728424993034, "learning_rate": 6.380024359460862e-06, "loss": 0.0087, "step": 38315 }, { "epoch": 17.87313432835821, "grad_norm": 0.2550286163499799, "learning_rate": 6.377034163991308e-06, "loss": 0.009, "step": 38320 }, { "epoch": 17.87546641791045, "grad_norm": 0.2758214692426234, "learning_rate": 6.3740471093043914e-06, "loss": 0.0088, "step": 38325 }, { "epoch": 17.877798507462686, "grad_norm": 0.2378977195346546, "learning_rate": 6.3710631958442524e-06, "loss": 0.0086, "step": 38330 }, { "epoch": 17.880130597014926, "grad_norm": 0.262033582824426, "learning_rate": 6.3680824240545835e-06, "loss": 0.0084, "step": 38335 }, { "epoch": 17.882462686567163, "grad_norm": 0.27793937405392005, "learning_rate": 6.365104794378582e-06, "loss": 0.0087, "step": 38340 }, { "epoch": 17.884794776119403, "grad_norm": 0.26148676974823953, "learning_rate": 6.362130307259008e-06, "loss": 0.0086, "step": 38345 }, { "epoch": 17.887126865671643, "grad_norm": 0.24973898104903194, "learning_rate": 6.3591589631381286e-06, "loss": 0.0085, "step": 38350 }, { "epoch": 17.88945895522388, "grad_norm": 0.23595649381782402, "learning_rate": 6.356190762457753e-06, "loss": 0.0086, "step": 38355 }, { "epoch": 17.89179104477612, "grad_norm": 0.2625903235427029, "learning_rate": 6.353225705659234e-06, "loss": 0.0084, "step": 38360 }, { "epoch": 17.894123134328357, "grad_norm": 0.2573599103193759, "learning_rate": 6.350263793183439e-06, "loss": 0.0087, "step": 38365 }, { "epoch": 17.896455223880597, "grad_norm": 0.27246663987603054, "learning_rate": 6.347305025470776e-06, "loss": 0.0088, "step": 38370 }, { "epoch": 17.898787313432837, "grad_norm": 0.23972074223415105, "learning_rate": 6.344349402961194e-06, "loss": 0.0088, "step": 38375 }, { "epoch": 17.901119402985074, "grad_norm": 0.27554952901369284, "learning_rate": 6.341396926094155e-06, "loss": 0.0086, "step": 38380 }, { "epoch": 17.903451492537314, "grad_norm": 0.2594576730163333, "learning_rate": 6.338447595308671e-06, "loss": 0.0085, "step": 38385 }, { "epoch": 17.90578358208955, "grad_norm": 0.2342531828745256, "learning_rate": 6.335501411043274e-06, "loss": 0.0087, "step": 38390 }, { "epoch": 17.90811567164179, "grad_norm": 0.26946817073175605, "learning_rate": 6.3325583737360376e-06, "loss": 0.0085, "step": 38395 }, { "epoch": 17.91044776119403, "grad_norm": 0.2654130341256823, "learning_rate": 6.329618483824559e-06, "loss": 0.0089, "step": 38400 }, { "epoch": 17.91277985074627, "grad_norm": 0.2922881330352032, "learning_rate": 6.326681741745978e-06, "loss": 0.0087, "step": 38405 }, { "epoch": 17.91511194029851, "grad_norm": 0.27288641414332926, "learning_rate": 6.323748147936959e-06, "loss": 0.0083, "step": 38410 }, { "epoch": 17.917444029850746, "grad_norm": 0.24765282493591917, "learning_rate": 6.320817702833689e-06, "loss": 0.0085, "step": 38415 }, { "epoch": 17.919776119402986, "grad_norm": 0.27973315463918624, "learning_rate": 6.317890406871914e-06, "loss": 0.0087, "step": 38420 }, { "epoch": 17.922108208955223, "grad_norm": 0.27012667527129325, "learning_rate": 6.314966260486882e-06, "loss": 0.0088, "step": 38425 }, { "epoch": 17.924440298507463, "grad_norm": 0.2699044994523763, "learning_rate": 6.312045264113388e-06, "loss": 0.0086, "step": 38430 }, { "epoch": 17.926772388059703, "grad_norm": 0.25871269210409925, "learning_rate": 6.309127418185755e-06, "loss": 0.0085, "step": 38435 }, { "epoch": 17.92910447761194, "grad_norm": 0.27349740245225035, "learning_rate": 6.306212723137846e-06, "loss": 0.009, "step": 38440 }, { "epoch": 17.93143656716418, "grad_norm": 0.2869988530815342, "learning_rate": 6.3033011794030416e-06, "loss": 0.0088, "step": 38445 }, { "epoch": 17.933768656716417, "grad_norm": 0.275519222659838, "learning_rate": 6.300392787414265e-06, "loss": 0.0088, "step": 38450 }, { "epoch": 17.936100746268657, "grad_norm": 0.25733807933802216, "learning_rate": 6.297487547603958e-06, "loss": 0.0086, "step": 38455 }, { "epoch": 17.938432835820894, "grad_norm": 0.26464127705026963, "learning_rate": 6.2945854604041135e-06, "loss": 0.0087, "step": 38460 }, { "epoch": 17.940764925373134, "grad_norm": 0.25308748651614477, "learning_rate": 6.291686526246232e-06, "loss": 0.0087, "step": 38465 }, { "epoch": 17.943097014925375, "grad_norm": 0.2630742276575572, "learning_rate": 6.28879074556137e-06, "loss": 0.0089, "step": 38470 }, { "epoch": 17.94542910447761, "grad_norm": 0.2822437917735082, "learning_rate": 6.2858981187800915e-06, "loss": 0.0086, "step": 38475 }, { "epoch": 17.94776119402985, "grad_norm": 0.26734173176112663, "learning_rate": 6.283008646332507e-06, "loss": 0.0088, "step": 38480 }, { "epoch": 17.95009328358209, "grad_norm": 0.2871847923071538, "learning_rate": 6.280122328648254e-06, "loss": 0.0086, "step": 38485 }, { "epoch": 17.95242537313433, "grad_norm": 0.2535789981507266, "learning_rate": 6.277239166156497e-06, "loss": 0.0083, "step": 38490 }, { "epoch": 17.954757462686565, "grad_norm": 0.277761593749561, "learning_rate": 6.274359159285937e-06, "loss": 0.0085, "step": 38495 }, { "epoch": 17.957089552238806, "grad_norm": 0.2616472452656676, "learning_rate": 6.271482308464807e-06, "loss": 0.0087, "step": 38500 }, { "epoch": 17.959421641791046, "grad_norm": 0.27993462646374434, "learning_rate": 6.268608614120858e-06, "loss": 0.0093, "step": 38505 }, { "epoch": 17.961753731343283, "grad_norm": 0.2663104036183189, "learning_rate": 6.265738076681392e-06, "loss": 0.0087, "step": 38510 }, { "epoch": 17.964085820895523, "grad_norm": 0.27337954070631654, "learning_rate": 6.262870696573219e-06, "loss": 0.0087, "step": 38515 }, { "epoch": 17.96641791044776, "grad_norm": 0.296242463785682, "learning_rate": 6.2600064742227e-06, "loss": 0.0089, "step": 38520 }, { "epoch": 17.96875, "grad_norm": 0.29309265977669696, "learning_rate": 6.2571454100557136e-06, "loss": 0.0086, "step": 38525 }, { "epoch": 17.97108208955224, "grad_norm": 0.2505894692164311, "learning_rate": 6.254287504497672e-06, "loss": 0.0088, "step": 38530 }, { "epoch": 17.973414179104477, "grad_norm": 0.3135737390342521, "learning_rate": 6.251432757973519e-06, "loss": 0.0087, "step": 38535 }, { "epoch": 17.975746268656717, "grad_norm": 0.25740254938254015, "learning_rate": 6.248581170907729e-06, "loss": 0.0085, "step": 38540 }, { "epoch": 17.978078358208954, "grad_norm": 0.2615583095661884, "learning_rate": 6.245732743724305e-06, "loss": 0.0086, "step": 38545 }, { "epoch": 17.980410447761194, "grad_norm": 0.27363254329536785, "learning_rate": 6.242887476846785e-06, "loss": 0.009, "step": 38550 }, { "epoch": 17.982742537313435, "grad_norm": 0.2687704176558345, "learning_rate": 6.2400453706982216e-06, "loss": 0.0085, "step": 38555 }, { "epoch": 17.98507462686567, "grad_norm": 0.25858814380225675, "learning_rate": 6.237206425701223e-06, "loss": 0.0086, "step": 38560 }, { "epoch": 17.98740671641791, "grad_norm": 0.25528462060968293, "learning_rate": 6.234370642277903e-06, "loss": 0.0084, "step": 38565 }, { "epoch": 17.98973880597015, "grad_norm": 0.2707817039372987, "learning_rate": 6.231538020849919e-06, "loss": 0.0087, "step": 38570 }, { "epoch": 17.99207089552239, "grad_norm": 0.2710874956606427, "learning_rate": 6.228708561838452e-06, "loss": 0.0088, "step": 38575 }, { "epoch": 17.994402985074625, "grad_norm": 0.2474021905268159, "learning_rate": 6.225882265664218e-06, "loss": 0.0088, "step": 38580 }, { "epoch": 17.996735074626866, "grad_norm": 0.2724678624054538, "learning_rate": 6.223059132747463e-06, "loss": 0.0087, "step": 38585 }, { "epoch": 17.999067164179106, "grad_norm": 0.2629214809333501, "learning_rate": 6.220239163507955e-06, "loss": 0.009, "step": 38590 }, { "epoch": 18.001399253731343, "grad_norm": 0.20138216245147975, "learning_rate": 6.217422358364999e-06, "loss": 0.0072, "step": 38595 }, { "epoch": 18.003731343283583, "grad_norm": 0.20293748928467878, "learning_rate": 6.214608717737426e-06, "loss": 0.0059, "step": 38600 }, { "epoch": 18.00606343283582, "grad_norm": 0.20724013281402448, "learning_rate": 6.211798242043596e-06, "loss": 0.0057, "step": 38605 }, { "epoch": 18.00839552238806, "grad_norm": 0.19963727238674944, "learning_rate": 6.2089909317014e-06, "loss": 0.0059, "step": 38610 }, { "epoch": 18.010727611940297, "grad_norm": 0.19374361311658098, "learning_rate": 6.206186787128262e-06, "loss": 0.0056, "step": 38615 }, { "epoch": 18.013059701492537, "grad_norm": 0.19349989869988363, "learning_rate": 6.2033858087411275e-06, "loss": 0.0054, "step": 38620 }, { "epoch": 18.015391791044777, "grad_norm": 0.19312261911369216, "learning_rate": 6.200587996956478e-06, "loss": 0.0054, "step": 38625 }, { "epoch": 18.017723880597014, "grad_norm": 0.21620020190933661, "learning_rate": 6.197793352190316e-06, "loss": 0.0058, "step": 38630 }, { "epoch": 18.020055970149254, "grad_norm": 0.18413056698455807, "learning_rate": 6.1950018748581865e-06, "loss": 0.0055, "step": 38635 }, { "epoch": 18.02238805970149, "grad_norm": 0.1647685760784043, "learning_rate": 6.192213565375147e-06, "loss": 0.0055, "step": 38640 }, { "epoch": 18.02472014925373, "grad_norm": 0.20772569575934116, "learning_rate": 6.189428424155801e-06, "loss": 0.0057, "step": 38645 }, { "epoch": 18.02705223880597, "grad_norm": 0.16708093679420682, "learning_rate": 6.186646451614265e-06, "loss": 0.0055, "step": 38650 }, { "epoch": 18.02938432835821, "grad_norm": 0.1688759322077456, "learning_rate": 6.1838676481641945e-06, "loss": 0.0055, "step": 38655 }, { "epoch": 18.03171641791045, "grad_norm": 0.18890151213457967, "learning_rate": 6.1810920142187726e-06, "loss": 0.0055, "step": 38660 }, { "epoch": 18.034048507462686, "grad_norm": 0.16939190782446176, "learning_rate": 6.178319550190705e-06, "loss": 0.0054, "step": 38665 }, { "epoch": 18.036380597014926, "grad_norm": 0.1923324732543967, "learning_rate": 6.175550256492235e-06, "loss": 0.0055, "step": 38670 }, { "epoch": 18.038712686567163, "grad_norm": 0.17465981308786177, "learning_rate": 6.172784133535133e-06, "loss": 0.0057, "step": 38675 }, { "epoch": 18.041044776119403, "grad_norm": 0.1782624684337386, "learning_rate": 6.170021181730681e-06, "loss": 0.0055, "step": 38680 }, { "epoch": 18.043376865671643, "grad_norm": 0.20587040327176673, "learning_rate": 6.167261401489721e-06, "loss": 0.0054, "step": 38685 }, { "epoch": 18.04570895522388, "grad_norm": 0.18525953318308103, "learning_rate": 6.164504793222589e-06, "loss": 0.0053, "step": 38690 }, { "epoch": 18.04804104477612, "grad_norm": 0.18283185976281213, "learning_rate": 6.161751357339184e-06, "loss": 0.0053, "step": 38695 }, { "epoch": 18.050373134328357, "grad_norm": 0.15490700804946492, "learning_rate": 6.159001094248904e-06, "loss": 0.0052, "step": 38700 }, { "epoch": 18.052705223880597, "grad_norm": 0.1680192381907752, "learning_rate": 6.156254004360687e-06, "loss": 0.0055, "step": 38705 }, { "epoch": 18.055037313432837, "grad_norm": 0.1906622207471288, "learning_rate": 6.153510088083e-06, "loss": 0.0055, "step": 38710 }, { "epoch": 18.057369402985074, "grad_norm": 0.19889584183756012, "learning_rate": 6.150769345823841e-06, "loss": 0.0055, "step": 38715 }, { "epoch": 18.059701492537314, "grad_norm": 0.17492118849267632, "learning_rate": 6.1480317779907285e-06, "loss": 0.0051, "step": 38720 }, { "epoch": 18.06203358208955, "grad_norm": 0.17978622989078508, "learning_rate": 6.145297384990715e-06, "loss": 0.0054, "step": 38725 }, { "epoch": 18.06436567164179, "grad_norm": 0.1783773241370518, "learning_rate": 6.1425661672303735e-06, "loss": 0.0055, "step": 38730 }, { "epoch": 18.06669776119403, "grad_norm": 0.18302134210698612, "learning_rate": 6.139838125115818e-06, "loss": 0.0054, "step": 38735 }, { "epoch": 18.06902985074627, "grad_norm": 0.16158444143859757, "learning_rate": 6.1371132590526744e-06, "loss": 0.0054, "step": 38740 }, { "epoch": 18.07136194029851, "grad_norm": 0.18415125921339034, "learning_rate": 6.134391569446108e-06, "loss": 0.0054, "step": 38745 }, { "epoch": 18.073694029850746, "grad_norm": 0.1955402879833066, "learning_rate": 6.1316730567008086e-06, "loss": 0.0051, "step": 38750 }, { "epoch": 18.076026119402986, "grad_norm": 0.19845950549439434, "learning_rate": 6.12895772122099e-06, "loss": 0.0054, "step": 38755 }, { "epoch": 18.078358208955223, "grad_norm": 0.16879892256180518, "learning_rate": 6.126245563410399e-06, "loss": 0.0052, "step": 38760 }, { "epoch": 18.080690298507463, "grad_norm": 0.167082888437753, "learning_rate": 6.1235365836723054e-06, "loss": 0.0054, "step": 38765 }, { "epoch": 18.083022388059703, "grad_norm": 0.18593928871537588, "learning_rate": 6.120830782409515e-06, "loss": 0.0053, "step": 38770 }, { "epoch": 18.08535447761194, "grad_norm": 0.18179523590425573, "learning_rate": 6.118128160024346e-06, "loss": 0.0054, "step": 38775 }, { "epoch": 18.08768656716418, "grad_norm": 0.16970782014193106, "learning_rate": 6.115428716918657e-06, "loss": 0.0054, "step": 38780 }, { "epoch": 18.090018656716417, "grad_norm": 0.15664161489787545, "learning_rate": 6.112732453493826e-06, "loss": 0.0054, "step": 38785 }, { "epoch": 18.092350746268657, "grad_norm": 0.17081041689748788, "learning_rate": 6.110039370150765e-06, "loss": 0.0053, "step": 38790 }, { "epoch": 18.094682835820894, "grad_norm": 0.16895521730302507, "learning_rate": 6.107349467289907e-06, "loss": 0.0055, "step": 38795 }, { "epoch": 18.097014925373134, "grad_norm": 0.1903166468601966, "learning_rate": 6.104662745311222e-06, "loss": 0.0053, "step": 38800 }, { "epoch": 18.099347014925375, "grad_norm": 0.18404422173686188, "learning_rate": 6.1019792046141875e-06, "loss": 0.0053, "step": 38805 }, { "epoch": 18.10167910447761, "grad_norm": 0.19419581256238755, "learning_rate": 6.099298845597832e-06, "loss": 0.0053, "step": 38810 }, { "epoch": 18.10401119402985, "grad_norm": 0.16207610703292943, "learning_rate": 6.096621668660686e-06, "loss": 0.0052, "step": 38815 }, { "epoch": 18.10634328358209, "grad_norm": 0.15901358997250714, "learning_rate": 6.093947674200838e-06, "loss": 0.0054, "step": 38820 }, { "epoch": 18.10867537313433, "grad_norm": 0.17159636152647104, "learning_rate": 6.09127686261587e-06, "loss": 0.0054, "step": 38825 }, { "epoch": 18.11100746268657, "grad_norm": 0.192533563548582, "learning_rate": 6.088609234302912e-06, "loss": 0.0054, "step": 38830 }, { "epoch": 18.113339552238806, "grad_norm": 0.16884817563440566, "learning_rate": 6.085944789658615e-06, "loss": 0.0055, "step": 38835 }, { "epoch": 18.115671641791046, "grad_norm": 0.18830716351220722, "learning_rate": 6.083283529079157e-06, "loss": 0.0054, "step": 38840 }, { "epoch": 18.118003731343283, "grad_norm": 0.17904102429013788, "learning_rate": 6.08062545296024e-06, "loss": 0.0054, "step": 38845 }, { "epoch": 18.120335820895523, "grad_norm": 0.19114611448545155, "learning_rate": 6.077970561697095e-06, "loss": 0.0054, "step": 38850 }, { "epoch": 18.12266791044776, "grad_norm": 0.18072928326533538, "learning_rate": 6.075318855684477e-06, "loss": 0.0054, "step": 38855 }, { "epoch": 18.125, "grad_norm": 0.17118622762786762, "learning_rate": 6.072670335316676e-06, "loss": 0.0054, "step": 38860 }, { "epoch": 18.12733208955224, "grad_norm": 0.15480201185447967, "learning_rate": 6.070025000987492e-06, "loss": 0.0051, "step": 38865 }, { "epoch": 18.129664179104477, "grad_norm": 0.16240319699572472, "learning_rate": 6.067382853090269e-06, "loss": 0.0054, "step": 38870 }, { "epoch": 18.131996268656717, "grad_norm": 0.1646456872603462, "learning_rate": 6.064743892017864e-06, "loss": 0.005, "step": 38875 }, { "epoch": 18.134328358208954, "grad_norm": 0.19001317531786122, "learning_rate": 6.062108118162669e-06, "loss": 0.0054, "step": 38880 }, { "epoch": 18.136660447761194, "grad_norm": 0.1826648361534731, "learning_rate": 6.059475531916595e-06, "loss": 0.0053, "step": 38885 }, { "epoch": 18.13899253731343, "grad_norm": 0.16895058081134068, "learning_rate": 6.056846133671083e-06, "loss": 0.0053, "step": 38890 }, { "epoch": 18.14132462686567, "grad_norm": 0.16396444832395388, "learning_rate": 6.054219923817101e-06, "loss": 0.0054, "step": 38895 }, { "epoch": 18.14365671641791, "grad_norm": 0.17232423444486555, "learning_rate": 6.051596902745143e-06, "loss": 0.0053, "step": 38900 }, { "epoch": 18.14598880597015, "grad_norm": 0.17095676713750993, "learning_rate": 6.048977070845219e-06, "loss": 0.0055, "step": 38905 }, { "epoch": 18.14832089552239, "grad_norm": 0.16422584037892848, "learning_rate": 6.0463604285068834e-06, "loss": 0.0052, "step": 38910 }, { "epoch": 18.150652985074625, "grad_norm": 0.15650970020434787, "learning_rate": 6.043746976119201e-06, "loss": 0.0053, "step": 38915 }, { "epoch": 18.152985074626866, "grad_norm": 0.17073933888783366, "learning_rate": 6.0411367140707625e-06, "loss": 0.0053, "step": 38920 }, { "epoch": 18.155317164179106, "grad_norm": 0.18287331369176338, "learning_rate": 6.038529642749697e-06, "loss": 0.0053, "step": 38925 }, { "epoch": 18.157649253731343, "grad_norm": 0.16901708242850155, "learning_rate": 6.035925762543644e-06, "loss": 0.0056, "step": 38930 }, { "epoch": 18.159981343283583, "grad_norm": 0.17686203939344763, "learning_rate": 6.03332507383978e-06, "loss": 0.0054, "step": 38935 }, { "epoch": 18.16231343283582, "grad_norm": 0.16942422670465412, "learning_rate": 6.030727577024802e-06, "loss": 0.0053, "step": 38940 }, { "epoch": 18.16464552238806, "grad_norm": 0.18012872687987108, "learning_rate": 6.028133272484936e-06, "loss": 0.0052, "step": 38945 }, { "epoch": 18.166977611940297, "grad_norm": 0.16951256215021093, "learning_rate": 6.025542160605923e-06, "loss": 0.0056, "step": 38950 }, { "epoch": 18.169309701492537, "grad_norm": 0.17927180323456457, "learning_rate": 6.022954241773038e-06, "loss": 0.0052, "step": 38955 }, { "epoch": 18.171641791044777, "grad_norm": 0.17083382765773222, "learning_rate": 6.020369516371085e-06, "loss": 0.0051, "step": 38960 }, { "epoch": 18.173973880597014, "grad_norm": 0.18356117278825065, "learning_rate": 6.017787984784381e-06, "loss": 0.0053, "step": 38965 }, { "epoch": 18.176305970149254, "grad_norm": 0.15642306860378943, "learning_rate": 6.015209647396781e-06, "loss": 0.0052, "step": 38970 }, { "epoch": 18.17863805970149, "grad_norm": 0.16234840326076672, "learning_rate": 6.012634504591658e-06, "loss": 0.0053, "step": 38975 }, { "epoch": 18.18097014925373, "grad_norm": 0.16148370240484067, "learning_rate": 6.010062556751906e-06, "loss": 0.0053, "step": 38980 }, { "epoch": 18.18330223880597, "grad_norm": 0.16023227830366624, "learning_rate": 6.0074938042599574e-06, "loss": 0.0053, "step": 38985 }, { "epoch": 18.18563432835821, "grad_norm": 0.17248289024430544, "learning_rate": 6.00492824749775e-06, "loss": 0.0055, "step": 38990 }, { "epoch": 18.18796641791045, "grad_norm": 0.17042959230800264, "learning_rate": 6.00236588684677e-06, "loss": 0.0054, "step": 38995 }, { "epoch": 18.190298507462686, "grad_norm": 0.19932572291491202, "learning_rate": 5.999806722688007e-06, "loss": 0.0054, "step": 39000 }, { "epoch": 18.192630597014926, "grad_norm": 0.1842213841560276, "learning_rate": 5.9972507554019895e-06, "loss": 0.0055, "step": 39005 }, { "epoch": 18.194962686567163, "grad_norm": 0.19109555899338848, "learning_rate": 5.994697985368761e-06, "loss": 0.0052, "step": 39010 }, { "epoch": 18.197294776119403, "grad_norm": 0.17643380823582713, "learning_rate": 5.992148412967895e-06, "loss": 0.0054, "step": 39015 }, { "epoch": 18.199626865671643, "grad_norm": 0.1708531427606983, "learning_rate": 5.98960203857849e-06, "loss": 0.0055, "step": 39020 }, { "epoch": 18.20195895522388, "grad_norm": 0.16958609657596238, "learning_rate": 5.987058862579167e-06, "loss": 0.0056, "step": 39025 }, { "epoch": 18.20429104477612, "grad_norm": 0.18864710455370856, "learning_rate": 5.98451888534807e-06, "loss": 0.0054, "step": 39030 }, { "epoch": 18.206623134328357, "grad_norm": 0.190104400970232, "learning_rate": 5.981982107262877e-06, "loss": 0.0055, "step": 39035 }, { "epoch": 18.208955223880597, "grad_norm": 0.16241825980448815, "learning_rate": 5.9794485287007696e-06, "loss": 0.0054, "step": 39040 }, { "epoch": 18.211287313432837, "grad_norm": 0.17598130049821245, "learning_rate": 5.976918150038478e-06, "loss": 0.0054, "step": 39045 }, { "epoch": 18.213619402985074, "grad_norm": 0.16887744594060103, "learning_rate": 5.974390971652237e-06, "loss": 0.0055, "step": 39050 }, { "epoch": 18.215951492537314, "grad_norm": 0.17404560496986585, "learning_rate": 5.971866993917821e-06, "loss": 0.0054, "step": 39055 }, { "epoch": 18.21828358208955, "grad_norm": 0.1821135345081318, "learning_rate": 5.9693462172105165e-06, "loss": 0.0056, "step": 39060 }, { "epoch": 18.22061567164179, "grad_norm": 0.1665958794884969, "learning_rate": 5.966828641905142e-06, "loss": 0.0054, "step": 39065 }, { "epoch": 18.22294776119403, "grad_norm": 0.16429704265846923, "learning_rate": 5.964314268376031e-06, "loss": 0.0055, "step": 39070 }, { "epoch": 18.22527985074627, "grad_norm": 0.17856782496388668, "learning_rate": 5.961803096997056e-06, "loss": 0.0056, "step": 39075 }, { "epoch": 18.22761194029851, "grad_norm": 0.17865069876314446, "learning_rate": 5.959295128141596e-06, "loss": 0.0054, "step": 39080 }, { "epoch": 18.229944029850746, "grad_norm": 0.16548058939537696, "learning_rate": 5.956790362182567e-06, "loss": 0.0055, "step": 39085 }, { "epoch": 18.232276119402986, "grad_norm": 0.1820064083927832, "learning_rate": 5.9542887994923985e-06, "loss": 0.0054, "step": 39090 }, { "epoch": 18.234608208955223, "grad_norm": 0.18519885990153725, "learning_rate": 5.951790440443055e-06, "loss": 0.0054, "step": 39095 }, { "epoch": 18.236940298507463, "grad_norm": 0.1728454080153649, "learning_rate": 5.949295285406015e-06, "loss": 0.0054, "step": 39100 }, { "epoch": 18.239272388059703, "grad_norm": 0.17661210949561412, "learning_rate": 5.946803334752285e-06, "loss": 0.0054, "step": 39105 }, { "epoch": 18.24160447761194, "grad_norm": 0.2569078881263843, "learning_rate": 5.944314588852393e-06, "loss": 0.0054, "step": 39110 }, { "epoch": 18.24393656716418, "grad_norm": 0.18874334643248952, "learning_rate": 5.941829048076392e-06, "loss": 0.0054, "step": 39115 }, { "epoch": 18.246268656716417, "grad_norm": 0.18951198273795328, "learning_rate": 5.93934671279386e-06, "loss": 0.0055, "step": 39120 }, { "epoch": 18.248600746268657, "grad_norm": 0.17623509251460043, "learning_rate": 5.936867583373895e-06, "loss": 0.0054, "step": 39125 }, { "epoch": 18.250932835820894, "grad_norm": 0.1821379336011862, "learning_rate": 5.934391660185121e-06, "loss": 0.0055, "step": 39130 }, { "epoch": 18.253264925373134, "grad_norm": 0.18709244179547996, "learning_rate": 5.931918943595682e-06, "loss": 0.0053, "step": 39135 }, { "epoch": 18.255597014925375, "grad_norm": 0.18379310439215452, "learning_rate": 5.929449433973249e-06, "loss": 0.0053, "step": 39140 }, { "epoch": 18.25792910447761, "grad_norm": 0.17267307831195425, "learning_rate": 5.926983131685012e-06, "loss": 0.0053, "step": 39145 }, { "epoch": 18.26026119402985, "grad_norm": 0.17080315006035687, "learning_rate": 5.924520037097688e-06, "loss": 0.0054, "step": 39150 }, { "epoch": 18.26259328358209, "grad_norm": 0.18749239390672637, "learning_rate": 5.922060150577517e-06, "loss": 0.0052, "step": 39155 }, { "epoch": 18.26492537313433, "grad_norm": 0.18237393303118937, "learning_rate": 5.919603472490263e-06, "loss": 0.0052, "step": 39160 }, { "epoch": 18.267257462686565, "grad_norm": 0.1589320498495801, "learning_rate": 5.917150003201201e-06, "loss": 0.0055, "step": 39165 }, { "epoch": 18.269589552238806, "grad_norm": 0.2053277375555973, "learning_rate": 5.914699743075149e-06, "loss": 0.0055, "step": 39170 }, { "epoch": 18.271921641791046, "grad_norm": 0.16955141524265224, "learning_rate": 5.9122526924764264e-06, "loss": 0.0053, "step": 39175 }, { "epoch": 18.274253731343283, "grad_norm": 0.23282781157817733, "learning_rate": 5.909808851768898e-06, "loss": 0.0052, "step": 39180 }, { "epoch": 18.276585820895523, "grad_norm": 0.17303254432381698, "learning_rate": 5.9073682213159325e-06, "loss": 0.0055, "step": 39185 }, { "epoch": 18.27891791044776, "grad_norm": 0.17687312620436016, "learning_rate": 5.904930801480427e-06, "loss": 0.0053, "step": 39190 }, { "epoch": 18.28125, "grad_norm": 0.2029166988945874, "learning_rate": 5.902496592624808e-06, "loss": 0.0055, "step": 39195 }, { "epoch": 18.28358208955224, "grad_norm": 0.1648344938247004, "learning_rate": 5.900065595111014e-06, "loss": 0.0056, "step": 39200 }, { "epoch": 18.285914179104477, "grad_norm": 0.17105169851544905, "learning_rate": 5.897637809300514e-06, "loss": 0.0055, "step": 39205 }, { "epoch": 18.288246268656717, "grad_norm": 0.187977371112075, "learning_rate": 5.895213235554298e-06, "loss": 0.0055, "step": 39210 }, { "epoch": 18.290578358208954, "grad_norm": 0.19340340184345345, "learning_rate": 5.892791874232868e-06, "loss": 0.0053, "step": 39215 }, { "epoch": 18.292910447761194, "grad_norm": 0.18726643876183174, "learning_rate": 5.890373725696271e-06, "loss": 0.0056, "step": 39220 }, { "epoch": 18.295242537313435, "grad_norm": 0.20090313163067486, "learning_rate": 5.887958790304047e-06, "loss": 0.0055, "step": 39225 }, { "epoch": 18.29757462686567, "grad_norm": 0.16644268343828678, "learning_rate": 5.885547068415289e-06, "loss": 0.0053, "step": 39230 }, { "epoch": 18.29990671641791, "grad_norm": 0.18741850483022537, "learning_rate": 5.883138560388587e-06, "loss": 0.0056, "step": 39235 }, { "epoch": 18.30223880597015, "grad_norm": 0.1708447359114837, "learning_rate": 5.880733266582066e-06, "loss": 0.0053, "step": 39240 }, { "epoch": 18.30457089552239, "grad_norm": 0.18291939963337522, "learning_rate": 5.878331187353371e-06, "loss": 0.0054, "step": 39245 }, { "epoch": 18.306902985074625, "grad_norm": 0.1701299045801517, "learning_rate": 5.875932323059667e-06, "loss": 0.0055, "step": 39250 }, { "epoch": 18.309235074626866, "grad_norm": 0.1975243203741557, "learning_rate": 5.87353667405764e-06, "loss": 0.0057, "step": 39255 }, { "epoch": 18.311567164179106, "grad_norm": 0.16589551370206423, "learning_rate": 5.871144240703507e-06, "loss": 0.0054, "step": 39260 }, { "epoch": 18.313899253731343, "grad_norm": 0.17629437420276023, "learning_rate": 5.86875502335299e-06, "loss": 0.0053, "step": 39265 }, { "epoch": 18.316231343283583, "grad_norm": 0.17945939556549945, "learning_rate": 5.866369022361354e-06, "loss": 0.0053, "step": 39270 }, { "epoch": 18.31856343283582, "grad_norm": 0.19896555394830237, "learning_rate": 5.863986238083367e-06, "loss": 0.0054, "step": 39275 }, { "epoch": 18.32089552238806, "grad_norm": 0.16888860665830513, "learning_rate": 5.8616066708733255e-06, "loss": 0.0056, "step": 39280 }, { "epoch": 18.323227611940297, "grad_norm": 0.15000356945826232, "learning_rate": 5.859230321085049e-06, "loss": 0.0054, "step": 39285 }, { "epoch": 18.325559701492537, "grad_norm": 0.2000806935342354, "learning_rate": 5.856857189071884e-06, "loss": 0.0055, "step": 39290 }, { "epoch": 18.327891791044777, "grad_norm": 0.19460536790598282, "learning_rate": 5.8544872751866845e-06, "loss": 0.0056, "step": 39295 }, { "epoch": 18.330223880597014, "grad_norm": 0.20793456169203436, "learning_rate": 5.852120579781838e-06, "loss": 0.0053, "step": 39300 }, { "epoch": 18.332555970149254, "grad_norm": 0.20159952325070538, "learning_rate": 5.849757103209252e-06, "loss": 0.0055, "step": 39305 }, { "epoch": 18.33488805970149, "grad_norm": 0.18370744880189555, "learning_rate": 5.847396845820349e-06, "loss": 0.0054, "step": 39310 }, { "epoch": 18.33722014925373, "grad_norm": 0.19370323038999984, "learning_rate": 5.845039807966074e-06, "loss": 0.0054, "step": 39315 }, { "epoch": 18.33955223880597, "grad_norm": 0.16435888175788158, "learning_rate": 5.8426859899969034e-06, "loss": 0.0056, "step": 39320 }, { "epoch": 18.34188432835821, "grad_norm": 0.17500865148723657, "learning_rate": 5.84033539226282e-06, "loss": 0.0056, "step": 39325 }, { "epoch": 18.34421641791045, "grad_norm": 0.2018036515006122, "learning_rate": 5.83798801511334e-06, "loss": 0.0054, "step": 39330 }, { "epoch": 18.346548507462686, "grad_norm": 0.1623473858042481, "learning_rate": 5.835643858897498e-06, "loss": 0.0054, "step": 39335 }, { "epoch": 18.348880597014926, "grad_norm": 0.19691930213432687, "learning_rate": 5.833302923963837e-06, "loss": 0.0057, "step": 39340 }, { "epoch": 18.351212686567163, "grad_norm": 0.21540699763677076, "learning_rate": 5.830965210660445e-06, "loss": 0.0053, "step": 39345 }, { "epoch": 18.353544776119403, "grad_norm": 0.17634164117860857, "learning_rate": 5.828630719334905e-06, "loss": 0.0055, "step": 39350 }, { "epoch": 18.355876865671643, "grad_norm": 0.1654233143374166, "learning_rate": 5.826299450334345e-06, "loss": 0.0054, "step": 39355 }, { "epoch": 18.35820895522388, "grad_norm": 0.21234131003433038, "learning_rate": 5.8239714040053936e-06, "loss": 0.0054, "step": 39360 }, { "epoch": 18.36054104477612, "grad_norm": 0.20985084375644142, "learning_rate": 5.821646580694214e-06, "loss": 0.0057, "step": 39365 }, { "epoch": 18.362873134328357, "grad_norm": 0.18405461504403228, "learning_rate": 5.819324980746483e-06, "loss": 0.0055, "step": 39370 }, { "epoch": 18.365205223880597, "grad_norm": 0.20943078885244987, "learning_rate": 5.817006604507401e-06, "loss": 0.0057, "step": 39375 }, { "epoch": 18.367537313432837, "grad_norm": 0.16682116390664362, "learning_rate": 5.814691452321687e-06, "loss": 0.0054, "step": 39380 }, { "epoch": 18.369869402985074, "grad_norm": 0.19101740447079907, "learning_rate": 5.812379524533587e-06, "loss": 0.0055, "step": 39385 }, { "epoch": 18.372201492537314, "grad_norm": 0.19567940275317633, "learning_rate": 5.810070821486854e-06, "loss": 0.0055, "step": 39390 }, { "epoch": 18.37453358208955, "grad_norm": 0.19014501336702305, "learning_rate": 5.8077653435247774e-06, "loss": 0.0056, "step": 39395 }, { "epoch": 18.37686567164179, "grad_norm": 0.17931026914768877, "learning_rate": 5.805463090990154e-06, "loss": 0.0055, "step": 39400 }, { "epoch": 18.37919776119403, "grad_norm": 0.19664966809939047, "learning_rate": 5.803164064225313e-06, "loss": 0.0056, "step": 39405 }, { "epoch": 18.38152985074627, "grad_norm": 0.16640495589190227, "learning_rate": 5.800868263572093e-06, "loss": 0.0058, "step": 39410 }, { "epoch": 18.38386194029851, "grad_norm": 0.20325313460619537, "learning_rate": 5.7985756893718585e-06, "loss": 0.0055, "step": 39415 }, { "epoch": 18.386194029850746, "grad_norm": 0.21345787794075957, "learning_rate": 5.796286341965492e-06, "loss": 0.0057, "step": 39420 }, { "epoch": 18.388526119402986, "grad_norm": 0.19430661127164456, "learning_rate": 5.794000221693403e-06, "loss": 0.0058, "step": 39425 }, { "epoch": 18.390858208955223, "grad_norm": 0.2449370187296035, "learning_rate": 5.7917173288955105e-06, "loss": 0.006, "step": 39430 }, { "epoch": 18.393190298507463, "grad_norm": 0.19336900165469562, "learning_rate": 5.789437663911261e-06, "loss": 0.0058, "step": 39435 }, { "epoch": 18.395522388059703, "grad_norm": 0.18571192313482765, "learning_rate": 5.787161227079613e-06, "loss": 0.0056, "step": 39440 }, { "epoch": 18.39785447761194, "grad_norm": 0.17549808445716156, "learning_rate": 5.7848880187390615e-06, "loss": 0.0058, "step": 39445 }, { "epoch": 18.40018656716418, "grad_norm": 0.17149894873370639, "learning_rate": 5.782618039227603e-06, "loss": 0.0056, "step": 39450 }, { "epoch": 18.402518656716417, "grad_norm": 0.17946480640245713, "learning_rate": 5.7803512888827626e-06, "loss": 0.0056, "step": 39455 }, { "epoch": 18.404850746268657, "grad_norm": 0.183572815796184, "learning_rate": 5.778087768041589e-06, "loss": 0.0055, "step": 39460 }, { "epoch": 18.407182835820894, "grad_norm": 0.22132280810350421, "learning_rate": 5.7758274770406375e-06, "loss": 0.0055, "step": 39465 }, { "epoch": 18.409514925373134, "grad_norm": 0.195136552483781, "learning_rate": 5.7735704162160005e-06, "loss": 0.0057, "step": 39470 }, { "epoch": 18.411847014925375, "grad_norm": 0.17454261260084336, "learning_rate": 5.771316585903276e-06, "loss": 0.0056, "step": 39475 }, { "epoch": 18.41417910447761, "grad_norm": 0.1982026061840734, "learning_rate": 5.769065986437591e-06, "loss": 0.0056, "step": 39480 }, { "epoch": 18.41651119402985, "grad_norm": 0.213830924393314, "learning_rate": 5.766818618153584e-06, "loss": 0.0056, "step": 39485 }, { "epoch": 18.41884328358209, "grad_norm": 0.18376510879230282, "learning_rate": 5.764574481385419e-06, "loss": 0.0055, "step": 39490 }, { "epoch": 18.42117537313433, "grad_norm": 0.19762880242852626, "learning_rate": 5.762333576466778e-06, "loss": 0.0057, "step": 39495 }, { "epoch": 18.423507462686565, "grad_norm": 0.17182595364946768, "learning_rate": 5.7600959037308626e-06, "loss": 0.0056, "step": 39500 }, { "epoch": 18.425839552238806, "grad_norm": 0.1830767502201477, "learning_rate": 5.75786146351039e-06, "loss": 0.0057, "step": 39505 }, { "epoch": 18.428171641791046, "grad_norm": 0.18352668156049684, "learning_rate": 5.755630256137605e-06, "loss": 0.0059, "step": 39510 }, { "epoch": 18.430503731343283, "grad_norm": 0.19627746697222093, "learning_rate": 5.753402281944261e-06, "loss": 0.0056, "step": 39515 }, { "epoch": 18.432835820895523, "grad_norm": 0.20266873710107491, "learning_rate": 5.7511775412616415e-06, "loss": 0.0055, "step": 39520 }, { "epoch": 18.43516791044776, "grad_norm": 0.18419816952626356, "learning_rate": 5.748956034420539e-06, "loss": 0.0056, "step": 39525 }, { "epoch": 18.4375, "grad_norm": 0.19665379572537242, "learning_rate": 5.74673776175128e-06, "loss": 0.0056, "step": 39530 }, { "epoch": 18.43983208955224, "grad_norm": 0.18751026665837292, "learning_rate": 5.744522723583689e-06, "loss": 0.0058, "step": 39535 }, { "epoch": 18.442164179104477, "grad_norm": 0.2220024439889981, "learning_rate": 5.742310920247127e-06, "loss": 0.0056, "step": 39540 }, { "epoch": 18.444496268656717, "grad_norm": 0.2275231835596355, "learning_rate": 5.740102352070463e-06, "loss": 0.0054, "step": 39545 }, { "epoch": 18.446828358208954, "grad_norm": 0.21872583788351435, "learning_rate": 5.737897019382098e-06, "loss": 0.0058, "step": 39550 }, { "epoch": 18.449160447761194, "grad_norm": 0.22290957359571492, "learning_rate": 5.735694922509938e-06, "loss": 0.0056, "step": 39555 }, { "epoch": 18.451492537313435, "grad_norm": 0.17169452418752867, "learning_rate": 5.733496061781418e-06, "loss": 0.0054, "step": 39560 }, { "epoch": 18.45382462686567, "grad_norm": 0.19184144799225555, "learning_rate": 5.73130043752348e-06, "loss": 0.0056, "step": 39565 }, { "epoch": 18.45615671641791, "grad_norm": 0.1941674330938808, "learning_rate": 5.729108050062603e-06, "loss": 0.0058, "step": 39570 }, { "epoch": 18.45848880597015, "grad_norm": 0.1695045014951776, "learning_rate": 5.726918899724759e-06, "loss": 0.0054, "step": 39575 }, { "epoch": 18.46082089552239, "grad_norm": 0.1984944049233875, "learning_rate": 5.7247329868354705e-06, "loss": 0.0059, "step": 39580 }, { "epoch": 18.463152985074625, "grad_norm": 0.18813810231631534, "learning_rate": 5.722550311719753e-06, "loss": 0.0059, "step": 39585 }, { "epoch": 18.465485074626866, "grad_norm": 0.18085820586622134, "learning_rate": 5.720370874702148e-06, "loss": 0.0058, "step": 39590 }, { "epoch": 18.467817164179106, "grad_norm": 0.1953538989730702, "learning_rate": 5.7181946761067205e-06, "loss": 0.0058, "step": 39595 }, { "epoch": 18.470149253731343, "grad_norm": 0.1840290125666973, "learning_rate": 5.716021716257047e-06, "loss": 0.0057, "step": 39600 }, { "epoch": 18.472481343283583, "grad_norm": 0.20690913865999794, "learning_rate": 5.71385199547623e-06, "loss": 0.0055, "step": 39605 }, { "epoch": 18.47481343283582, "grad_norm": 0.21407772316275864, "learning_rate": 5.7116855140868874e-06, "loss": 0.0057, "step": 39610 }, { "epoch": 18.47714552238806, "grad_norm": 0.20038964854622843, "learning_rate": 5.709522272411145e-06, "loss": 0.0056, "step": 39615 }, { "epoch": 18.479477611940297, "grad_norm": 0.19856685087700252, "learning_rate": 5.707362270770665e-06, "loss": 0.0058, "step": 39620 }, { "epoch": 18.481809701492537, "grad_norm": 0.1926909384095925, "learning_rate": 5.705205509486613e-06, "loss": 0.0056, "step": 39625 }, { "epoch": 18.484141791044777, "grad_norm": 0.19351776509604737, "learning_rate": 5.703051988879689e-06, "loss": 0.0057, "step": 39630 }, { "epoch": 18.486473880597014, "grad_norm": 0.1909145409115887, "learning_rate": 5.700901709270088e-06, "loss": 0.0063, "step": 39635 }, { "epoch": 18.488805970149254, "grad_norm": 0.2040287167470097, "learning_rate": 5.698754670977544e-06, "loss": 0.0057, "step": 39640 }, { "epoch": 18.49113805970149, "grad_norm": 0.1895263801283268, "learning_rate": 5.696610874321296e-06, "loss": 0.0056, "step": 39645 }, { "epoch": 18.49347014925373, "grad_norm": 0.22145874348166777, "learning_rate": 5.69447031962011e-06, "loss": 0.0059, "step": 39650 }, { "epoch": 18.49580223880597, "grad_norm": 0.19084713626768446, "learning_rate": 5.6923330071922634e-06, "loss": 0.0058, "step": 39655 }, { "epoch": 18.49813432835821, "grad_norm": 0.19254441750578644, "learning_rate": 5.690198937355561e-06, "loss": 0.0056, "step": 39660 }, { "epoch": 18.50046641791045, "grad_norm": 0.1836980149714967, "learning_rate": 5.68806811042731e-06, "loss": 0.006, "step": 39665 }, { "epoch": 18.502798507462686, "grad_norm": 0.1943394497801656, "learning_rate": 5.685940526724344e-06, "loss": 0.0058, "step": 39670 }, { "epoch": 18.505130597014926, "grad_norm": 0.21677746191444935, "learning_rate": 5.683816186563018e-06, "loss": 0.0061, "step": 39675 }, { "epoch": 18.507462686567163, "grad_norm": 0.19558148220674637, "learning_rate": 5.6816950902592005e-06, "loss": 0.0057, "step": 39680 }, { "epoch": 18.509794776119403, "grad_norm": 0.20135537731743186, "learning_rate": 5.6795772381282785e-06, "loss": 0.0057, "step": 39685 }, { "epoch": 18.512126865671643, "grad_norm": 0.24103661615887217, "learning_rate": 5.6774626304851555e-06, "loss": 0.006, "step": 39690 }, { "epoch": 18.51445895522388, "grad_norm": 0.22177994911697937, "learning_rate": 5.675351267644256e-06, "loss": 0.0059, "step": 39695 }, { "epoch": 18.51679104477612, "grad_norm": 0.2004964603759077, "learning_rate": 5.673243149919512e-06, "loss": 0.0057, "step": 39700 }, { "epoch": 18.519123134328357, "grad_norm": 0.18020942023045244, "learning_rate": 5.671138277624391e-06, "loss": 0.0054, "step": 39705 }, { "epoch": 18.521455223880597, "grad_norm": 0.20739954040171985, "learning_rate": 5.669036651071857e-06, "loss": 0.0058, "step": 39710 }, { "epoch": 18.523787313432837, "grad_norm": 0.20368152682221163, "learning_rate": 5.66693827057441e-06, "loss": 0.006, "step": 39715 }, { "epoch": 18.526119402985074, "grad_norm": 0.19553661261732358, "learning_rate": 5.664843136444054e-06, "loss": 0.0058, "step": 39720 }, { "epoch": 18.528451492537314, "grad_norm": 0.1879597652499224, "learning_rate": 5.662751248992315e-06, "loss": 0.0058, "step": 39725 }, { "epoch": 18.53078358208955, "grad_norm": 0.20034949083526077, "learning_rate": 5.660662608530239e-06, "loss": 0.0057, "step": 39730 }, { "epoch": 18.53311567164179, "grad_norm": 0.18919058295368038, "learning_rate": 5.658577215368389e-06, "loss": 0.0059, "step": 39735 }, { "epoch": 18.53544776119403, "grad_norm": 0.1734486430460456, "learning_rate": 5.6564950698168385e-06, "loss": 0.0056, "step": 39740 }, { "epoch": 18.53777985074627, "grad_norm": 0.19070187508121111, "learning_rate": 5.654416172185187e-06, "loss": 0.0059, "step": 39745 }, { "epoch": 18.54011194029851, "grad_norm": 0.20952828033300974, "learning_rate": 5.652340522782542e-06, "loss": 0.0058, "step": 39750 }, { "epoch": 18.542444029850746, "grad_norm": 0.21441351080126025, "learning_rate": 5.6502681219175355e-06, "loss": 0.0058, "step": 39755 }, { "epoch": 18.544776119402986, "grad_norm": 0.20690347572766524, "learning_rate": 5.648198969898311e-06, "loss": 0.0059, "step": 39760 }, { "epoch": 18.547108208955223, "grad_norm": 0.21598019538572605, "learning_rate": 5.646133067032536e-06, "loss": 0.0058, "step": 39765 }, { "epoch": 18.549440298507463, "grad_norm": 0.18682411022348158, "learning_rate": 5.644070413627386e-06, "loss": 0.0057, "step": 39770 }, { "epoch": 18.551772388059703, "grad_norm": 0.19159009215322303, "learning_rate": 5.642011009989562e-06, "loss": 0.0057, "step": 39775 }, { "epoch": 18.55410447761194, "grad_norm": 0.18229319415496134, "learning_rate": 5.639954856425273e-06, "loss": 0.006, "step": 39780 }, { "epoch": 18.55643656716418, "grad_norm": 0.22271575350410125, "learning_rate": 5.6379019532402554e-06, "loss": 0.0057, "step": 39785 }, { "epoch": 18.558768656716417, "grad_norm": 0.19787186878947183, "learning_rate": 5.6358523007397485e-06, "loss": 0.006, "step": 39790 }, { "epoch": 18.561100746268657, "grad_norm": 0.192980085912607, "learning_rate": 5.633805899228524e-06, "loss": 0.0057, "step": 39795 }, { "epoch": 18.563432835820894, "grad_norm": 0.22284076504797293, "learning_rate": 5.631762749010855e-06, "loss": 0.0058, "step": 39800 }, { "epoch": 18.565764925373134, "grad_norm": 0.21827937029583544, "learning_rate": 5.629722850390544e-06, "loss": 0.0058, "step": 39805 }, { "epoch": 18.568097014925375, "grad_norm": 0.19340338476093175, "learning_rate": 5.6276862036709e-06, "loss": 0.0058, "step": 39810 }, { "epoch": 18.57042910447761, "grad_norm": 0.17985669962513448, "learning_rate": 5.625652809154753e-06, "loss": 0.0058, "step": 39815 }, { "epoch": 18.57276119402985, "grad_norm": 0.22856022690805033, "learning_rate": 5.6236226671444555e-06, "loss": 0.0059, "step": 39820 }, { "epoch": 18.57509328358209, "grad_norm": 0.19687645631074527, "learning_rate": 5.6215957779418624e-06, "loss": 0.0056, "step": 39825 }, { "epoch": 18.57742537313433, "grad_norm": 0.19598839826117523, "learning_rate": 5.619572141848358e-06, "loss": 0.0056, "step": 39830 }, { "epoch": 18.579757462686565, "grad_norm": 0.21071080661490924, "learning_rate": 5.617551759164836e-06, "loss": 0.0059, "step": 39835 }, { "epoch": 18.582089552238806, "grad_norm": 0.18779607726806066, "learning_rate": 5.615534630191708e-06, "loss": 0.0058, "step": 39840 }, { "epoch": 18.584421641791046, "grad_norm": 0.21144278704566988, "learning_rate": 5.613520755228901e-06, "loss": 0.0059, "step": 39845 }, { "epoch": 18.586753731343283, "grad_norm": 0.22329588212990006, "learning_rate": 5.611510134575859e-06, "loss": 0.0058, "step": 39850 }, { "epoch": 18.589085820895523, "grad_norm": 0.1944619729869692, "learning_rate": 5.609502768531541e-06, "loss": 0.0056, "step": 39855 }, { "epoch": 18.59141791044776, "grad_norm": 0.20277661026622407, "learning_rate": 5.607498657394424e-06, "loss": 0.0058, "step": 39860 }, { "epoch": 18.59375, "grad_norm": 0.21616264107414976, "learning_rate": 5.605497801462503e-06, "loss": 0.0059, "step": 39865 }, { "epoch": 18.59608208955224, "grad_norm": 0.19259292533142583, "learning_rate": 5.603500201033285e-06, "loss": 0.0059, "step": 39870 }, { "epoch": 18.598414179104477, "grad_norm": 0.23347197821127583, "learning_rate": 5.601505856403786e-06, "loss": 0.0059, "step": 39875 }, { "epoch": 18.600746268656717, "grad_norm": 0.19784451987548804, "learning_rate": 5.59951476787056e-06, "loss": 0.0058, "step": 39880 }, { "epoch": 18.603078358208954, "grad_norm": 0.21091286380312047, "learning_rate": 5.59752693572965e-06, "loss": 0.0058, "step": 39885 }, { "epoch": 18.605410447761194, "grad_norm": 0.20011916224347348, "learning_rate": 5.595542360276636e-06, "loss": 0.0059, "step": 39890 }, { "epoch": 18.607742537313435, "grad_norm": 0.20267992754563605, "learning_rate": 5.593561041806601e-06, "loss": 0.0059, "step": 39895 }, { "epoch": 18.61007462686567, "grad_norm": 0.17884897091076435, "learning_rate": 5.591582980614151e-06, "loss": 0.006, "step": 39900 }, { "epoch": 18.61240671641791, "grad_norm": 0.1896635042386597, "learning_rate": 5.589608176993401e-06, "loss": 0.006, "step": 39905 }, { "epoch": 18.61473880597015, "grad_norm": 0.20232175290045504, "learning_rate": 5.587636631237991e-06, "loss": 0.0058, "step": 39910 }, { "epoch": 18.61707089552239, "grad_norm": 0.19840414701476025, "learning_rate": 5.585668343641064e-06, "loss": 0.0059, "step": 39915 }, { "epoch": 18.619402985074625, "grad_norm": 0.20894753377374392, "learning_rate": 5.583703314495294e-06, "loss": 0.006, "step": 39920 }, { "epoch": 18.621735074626866, "grad_norm": 0.19912328187225364, "learning_rate": 5.58174154409285e-06, "loss": 0.0059, "step": 39925 }, { "epoch": 18.624067164179106, "grad_norm": 0.2008857057146078, "learning_rate": 5.579783032725441e-06, "loss": 0.0061, "step": 39930 }, { "epoch": 18.626399253731343, "grad_norm": 0.20089876470928703, "learning_rate": 5.577827780684269e-06, "loss": 0.0057, "step": 39935 }, { "epoch": 18.628731343283583, "grad_norm": 0.19499608047263406, "learning_rate": 5.5758757882600706e-06, "loss": 0.0057, "step": 39940 }, { "epoch": 18.63106343283582, "grad_norm": 0.1822205867291809, "learning_rate": 5.573927055743082e-06, "loss": 0.0058, "step": 39945 }, { "epoch": 18.63339552238806, "grad_norm": 0.1918934627864226, "learning_rate": 5.57198158342306e-06, "loss": 0.0061, "step": 39950 }, { "epoch": 18.635727611940297, "grad_norm": 0.18414180059376864, "learning_rate": 5.5700393715892815e-06, "loss": 0.0058, "step": 39955 }, { "epoch": 18.638059701492537, "grad_norm": 0.18970387506954706, "learning_rate": 5.568100420530533e-06, "loss": 0.0057, "step": 39960 }, { "epoch": 18.640391791044777, "grad_norm": 0.20704893643925804, "learning_rate": 5.566164730535119e-06, "loss": 0.0058, "step": 39965 }, { "epoch": 18.642723880597014, "grad_norm": 0.22967481962772351, "learning_rate": 5.5642323018908595e-06, "loss": 0.0058, "step": 39970 }, { "epoch": 18.645055970149254, "grad_norm": 0.21997130687090896, "learning_rate": 5.5623031348850815e-06, "loss": 0.0058, "step": 39975 }, { "epoch": 18.64738805970149, "grad_norm": 0.21158919129752682, "learning_rate": 5.560377229804644e-06, "loss": 0.0058, "step": 39980 }, { "epoch": 18.64972014925373, "grad_norm": 0.21316766741168056, "learning_rate": 5.558454586935901e-06, "loss": 0.0061, "step": 39985 }, { "epoch": 18.65205223880597, "grad_norm": 0.21641940048526384, "learning_rate": 5.556535206564733e-06, "loss": 0.006, "step": 39990 }, { "epoch": 18.65438432835821, "grad_norm": 0.21274605672370717, "learning_rate": 5.554619088976538e-06, "loss": 0.0059, "step": 39995 }, { "epoch": 18.65671641791045, "grad_norm": 0.18834355686178905, "learning_rate": 5.55270623445622e-06, "loss": 0.0058, "step": 40000 }, { "epoch": 18.659048507462686, "grad_norm": 0.2255233133679993, "learning_rate": 5.5507966432882056e-06, "loss": 0.0058, "step": 40005 }, { "epoch": 18.661380597014926, "grad_norm": 0.20656847078323054, "learning_rate": 5.548890315756433e-06, "loss": 0.0058, "step": 40010 }, { "epoch": 18.663712686567163, "grad_norm": 0.2114657273513814, "learning_rate": 5.546987252144351e-06, "loss": 0.006, "step": 40015 }, { "epoch": 18.666044776119403, "grad_norm": 0.21318334411932804, "learning_rate": 5.545087452734928e-06, "loss": 0.0058, "step": 40020 }, { "epoch": 18.668376865671643, "grad_norm": 0.19507449690954642, "learning_rate": 5.543190917810647e-06, "loss": 0.0059, "step": 40025 }, { "epoch": 18.67070895522388, "grad_norm": 0.18591002241380447, "learning_rate": 5.541297647653505e-06, "loss": 0.006, "step": 40030 }, { "epoch": 18.67304104477612, "grad_norm": 0.20363373590631575, "learning_rate": 5.539407642545012e-06, "loss": 0.0059, "step": 40035 }, { "epoch": 18.675373134328357, "grad_norm": 0.22838716359135505, "learning_rate": 5.537520902766193e-06, "loss": 0.0059, "step": 40040 }, { "epoch": 18.677705223880597, "grad_norm": 0.21059003973196752, "learning_rate": 5.535637428597591e-06, "loss": 0.006, "step": 40045 }, { "epoch": 18.680037313432837, "grad_norm": 0.19451539015702954, "learning_rate": 5.533757220319257e-06, "loss": 0.0061, "step": 40050 }, { "epoch": 18.682369402985074, "grad_norm": 0.21039418085041478, "learning_rate": 5.531880278210764e-06, "loss": 0.0058, "step": 40055 }, { "epoch": 18.684701492537314, "grad_norm": 0.20545987403959368, "learning_rate": 5.5300066025511885e-06, "loss": 0.0058, "step": 40060 }, { "epoch": 18.68703358208955, "grad_norm": 0.2057723814115029, "learning_rate": 5.528136193619137e-06, "loss": 0.0061, "step": 40065 }, { "epoch": 18.68936567164179, "grad_norm": 0.18996014724929378, "learning_rate": 5.526269051692717e-06, "loss": 0.0058, "step": 40070 }, { "epoch": 18.69169776119403, "grad_norm": 0.20284869758854787, "learning_rate": 5.524405177049553e-06, "loss": 0.0063, "step": 40075 }, { "epoch": 18.69402985074627, "grad_norm": 0.21889697025858265, "learning_rate": 5.522544569966786e-06, "loss": 0.0061, "step": 40080 }, { "epoch": 18.69636194029851, "grad_norm": 0.20693847113150016, "learning_rate": 5.520687230721073e-06, "loss": 0.0059, "step": 40085 }, { "epoch": 18.698694029850746, "grad_norm": 0.19531284600966614, "learning_rate": 5.518833159588582e-06, "loss": 0.0059, "step": 40090 }, { "epoch": 18.701026119402986, "grad_norm": 0.19809762755068394, "learning_rate": 5.516982356844994e-06, "loss": 0.006, "step": 40095 }, { "epoch": 18.703358208955223, "grad_norm": 0.22170011791749117, "learning_rate": 5.515134822765504e-06, "loss": 0.006, "step": 40100 }, { "epoch": 18.705690298507463, "grad_norm": 0.21461132837104044, "learning_rate": 5.513290557624827e-06, "loss": 0.006, "step": 40105 }, { "epoch": 18.708022388059703, "grad_norm": 0.20910447779604324, "learning_rate": 5.511449561697183e-06, "loss": 0.0057, "step": 40110 }, { "epoch": 18.71035447761194, "grad_norm": 0.2187255974161587, "learning_rate": 5.509611835256317e-06, "loss": 0.0063, "step": 40115 }, { "epoch": 18.71268656716418, "grad_norm": 0.21975072426902229, "learning_rate": 5.507777378575474e-06, "loss": 0.0059, "step": 40120 }, { "epoch": 18.715018656716417, "grad_norm": 0.1942697604142443, "learning_rate": 5.505946191927424e-06, "loss": 0.0058, "step": 40125 }, { "epoch": 18.717350746268657, "grad_norm": 0.22750965487672095, "learning_rate": 5.504118275584444e-06, "loss": 0.0062, "step": 40130 }, { "epoch": 18.719682835820894, "grad_norm": 0.22808598508275155, "learning_rate": 5.5022936298183316e-06, "loss": 0.0061, "step": 40135 }, { "epoch": 18.722014925373134, "grad_norm": 0.21411703018111172, "learning_rate": 5.500472254900392e-06, "loss": 0.0062, "step": 40140 }, { "epoch": 18.724347014925375, "grad_norm": 0.23846440381392556, "learning_rate": 5.49865415110145e-06, "loss": 0.0059, "step": 40145 }, { "epoch": 18.72667910447761, "grad_norm": 0.20285705593629716, "learning_rate": 5.49683931869183e-06, "loss": 0.006, "step": 40150 }, { "epoch": 18.72901119402985, "grad_norm": 0.21711617913313663, "learning_rate": 5.495027757941394e-06, "loss": 0.0062, "step": 40155 }, { "epoch": 18.73134328358209, "grad_norm": 0.22602033910615255, "learning_rate": 5.4932194691194905e-06, "loss": 0.006, "step": 40160 }, { "epoch": 18.73367537313433, "grad_norm": 0.20489891997756243, "learning_rate": 5.491414452495006e-06, "loss": 0.0061, "step": 40165 }, { "epoch": 18.736007462686565, "grad_norm": 0.2095281237690962, "learning_rate": 5.489612708336324e-06, "loss": 0.0062, "step": 40170 }, { "epoch": 18.738339552238806, "grad_norm": 0.2188952346278294, "learning_rate": 5.487814236911344e-06, "loss": 0.006, "step": 40175 }, { "epoch": 18.740671641791046, "grad_norm": 0.2083879563087798, "learning_rate": 5.486019038487483e-06, "loss": 0.0061, "step": 40180 }, { "epoch": 18.743003731343283, "grad_norm": 0.2224063920519559, "learning_rate": 5.484227113331673e-06, "loss": 0.0058, "step": 40185 }, { "epoch": 18.745335820895523, "grad_norm": 0.2154968690067093, "learning_rate": 5.482438461710355e-06, "loss": 0.0062, "step": 40190 }, { "epoch": 18.74766791044776, "grad_norm": 0.2265879382905989, "learning_rate": 5.480653083889483e-06, "loss": 0.006, "step": 40195 }, { "epoch": 18.75, "grad_norm": 0.22060437239340455, "learning_rate": 5.4788709801345244e-06, "loss": 0.0061, "step": 40200 }, { "epoch": 18.75233208955224, "grad_norm": 0.22864097396959493, "learning_rate": 5.477092150710465e-06, "loss": 0.0058, "step": 40205 }, { "epoch": 18.754664179104477, "grad_norm": 0.19934358726119156, "learning_rate": 5.475316595881796e-06, "loss": 0.006, "step": 40210 }, { "epoch": 18.756996268656717, "grad_norm": 0.19232294011605708, "learning_rate": 5.473544315912525e-06, "loss": 0.0058, "step": 40215 }, { "epoch": 18.759328358208954, "grad_norm": 0.20173308062780454, "learning_rate": 5.471775311066177e-06, "loss": 0.006, "step": 40220 }, { "epoch": 18.761660447761194, "grad_norm": 0.2270900148495839, "learning_rate": 5.470009581605784e-06, "loss": 0.0062, "step": 40225 }, { "epoch": 18.763992537313435, "grad_norm": 0.22077587723694608, "learning_rate": 5.468247127793893e-06, "loss": 0.0059, "step": 40230 }, { "epoch": 18.76632462686567, "grad_norm": 0.21790642544836053, "learning_rate": 5.46648794989256e-06, "loss": 0.006, "step": 40235 }, { "epoch": 18.76865671641791, "grad_norm": 0.21424707856887928, "learning_rate": 5.464732048163365e-06, "loss": 0.0059, "step": 40240 }, { "epoch": 18.77098880597015, "grad_norm": 0.21173392723036585, "learning_rate": 5.462979422867388e-06, "loss": 0.006, "step": 40245 }, { "epoch": 18.77332089552239, "grad_norm": 0.20637688473413726, "learning_rate": 5.461230074265233e-06, "loss": 0.0061, "step": 40250 }, { "epoch": 18.775652985074625, "grad_norm": 0.22904075221018994, "learning_rate": 5.459484002617008e-06, "loss": 0.006, "step": 40255 }, { "epoch": 18.777985074626866, "grad_norm": 0.1991854890140179, "learning_rate": 5.4577412081823355e-06, "loss": 0.006, "step": 40260 }, { "epoch": 18.780317164179106, "grad_norm": 0.22268356202663755, "learning_rate": 5.456001691220357e-06, "loss": 0.0061, "step": 40265 }, { "epoch": 18.782649253731343, "grad_norm": 0.20921204686798153, "learning_rate": 5.45426545198972e-06, "loss": 0.0059, "step": 40270 }, { "epoch": 18.784981343283583, "grad_norm": 0.2040715362072147, "learning_rate": 5.452532490748581e-06, "loss": 0.0061, "step": 40275 }, { "epoch": 18.78731343283582, "grad_norm": 0.18931998154888638, "learning_rate": 5.450802807754625e-06, "loss": 0.0058, "step": 40280 }, { "epoch": 18.78964552238806, "grad_norm": 0.23912110126220246, "learning_rate": 5.449076403265029e-06, "loss": 0.0061, "step": 40285 }, { "epoch": 18.791977611940297, "grad_norm": 0.2134258556261425, "learning_rate": 5.4473532775365026e-06, "loss": 0.006, "step": 40290 }, { "epoch": 18.794309701492537, "grad_norm": 0.2035626402856155, "learning_rate": 5.44563343082525e-06, "loss": 0.0059, "step": 40295 }, { "epoch": 18.796641791044777, "grad_norm": 0.21138960920601801, "learning_rate": 5.443916863387002e-06, "loss": 0.0062, "step": 40300 }, { "epoch": 18.798973880597014, "grad_norm": 0.21407249162843492, "learning_rate": 5.4422035754769915e-06, "loss": 0.0061, "step": 40305 }, { "epoch": 18.801305970149254, "grad_norm": 0.244217609372156, "learning_rate": 5.4404935673499685e-06, "loss": 0.0061, "step": 40310 }, { "epoch": 18.80363805970149, "grad_norm": 0.237945700089956, "learning_rate": 5.438786839260197e-06, "loss": 0.0061, "step": 40315 }, { "epoch": 18.80597014925373, "grad_norm": 0.22966854574228537, "learning_rate": 5.437083391461452e-06, "loss": 0.0062, "step": 40320 }, { "epoch": 18.80830223880597, "grad_norm": 0.2331300558002787, "learning_rate": 5.4353832242070155e-06, "loss": 0.0062, "step": 40325 }, { "epoch": 18.81063432835821, "grad_norm": 0.2530046168347387, "learning_rate": 5.43368633774969e-06, "loss": 0.0062, "step": 40330 }, { "epoch": 18.81296641791045, "grad_norm": 0.2234071291153039, "learning_rate": 5.43199273234178e-06, "loss": 0.0062, "step": 40335 }, { "epoch": 18.815298507462686, "grad_norm": 0.22040775987679528, "learning_rate": 5.43030240823512e-06, "loss": 0.0063, "step": 40340 }, { "epoch": 18.817630597014926, "grad_norm": 0.2125831038260644, "learning_rate": 5.428615365681034e-06, "loss": 0.006, "step": 40345 }, { "epoch": 18.819962686567163, "grad_norm": 0.20651607189356883, "learning_rate": 5.426931604930375e-06, "loss": 0.0063, "step": 40350 }, { "epoch": 18.822294776119403, "grad_norm": 0.20871588772584826, "learning_rate": 5.425251126233498e-06, "loss": 0.0061, "step": 40355 }, { "epoch": 18.824626865671643, "grad_norm": 0.21549189761685184, "learning_rate": 5.423573929840277e-06, "loss": 0.006, "step": 40360 }, { "epoch": 18.82695895522388, "grad_norm": 0.2139042554637682, "learning_rate": 5.421900016000093e-06, "loss": 0.006, "step": 40365 }, { "epoch": 18.82929104477612, "grad_norm": 0.23775413108680588, "learning_rate": 5.420229384961847e-06, "loss": 0.0062, "step": 40370 }, { "epoch": 18.831623134328357, "grad_norm": 0.2154098653851488, "learning_rate": 5.418562036973937e-06, "loss": 0.0059, "step": 40375 }, { "epoch": 18.833955223880597, "grad_norm": 0.2400463335087359, "learning_rate": 5.416897972284287e-06, "loss": 0.0061, "step": 40380 }, { "epoch": 18.836287313432837, "grad_norm": 0.26772365413163707, "learning_rate": 5.415237191140326e-06, "loss": 0.0062, "step": 40385 }, { "epoch": 18.838619402985074, "grad_norm": 0.23429715992881112, "learning_rate": 5.413579693788995e-06, "loss": 0.0063, "step": 40390 }, { "epoch": 18.840951492537314, "grad_norm": 0.2143341447546882, "learning_rate": 5.411925480476752e-06, "loss": 0.006, "step": 40395 }, { "epoch": 18.84328358208955, "grad_norm": 0.23262090672138439, "learning_rate": 5.410274551449559e-06, "loss": 0.0062, "step": 40400 }, { "epoch": 18.84561567164179, "grad_norm": 0.2069143287320506, "learning_rate": 5.408626906952895e-06, "loss": 0.006, "step": 40405 }, { "epoch": 18.84794776119403, "grad_norm": 0.2112336808505122, "learning_rate": 5.406982547231746e-06, "loss": 0.0062, "step": 40410 }, { "epoch": 18.85027985074627, "grad_norm": 0.2096643387757859, "learning_rate": 5.40534147253062e-06, "loss": 0.006, "step": 40415 }, { "epoch": 18.85261194029851, "grad_norm": 0.22483311952345617, "learning_rate": 5.403703683093517e-06, "loss": 0.0061, "step": 40420 }, { "epoch": 18.854944029850746, "grad_norm": 0.21769435507535637, "learning_rate": 5.402069179163974e-06, "loss": 0.0061, "step": 40425 }, { "epoch": 18.857276119402986, "grad_norm": 0.21981279288231065, "learning_rate": 5.400437960985017e-06, "loss": 0.0061, "step": 40430 }, { "epoch": 18.859608208955223, "grad_norm": 0.22268016017963785, "learning_rate": 5.398810028799196e-06, "loss": 0.0062, "step": 40435 }, { "epoch": 18.861940298507463, "grad_norm": 0.22171631964175476, "learning_rate": 5.397185382848568e-06, "loss": 0.006, "step": 40440 }, { "epoch": 18.864272388059703, "grad_norm": 0.19708325195890353, "learning_rate": 5.3955640233747e-06, "loss": 0.0062, "step": 40445 }, { "epoch": 18.86660447761194, "grad_norm": 0.20974320642404717, "learning_rate": 5.393945950618678e-06, "loss": 0.006, "step": 40450 }, { "epoch": 18.86893656716418, "grad_norm": 0.21628233136330408, "learning_rate": 5.392331164821091e-06, "loss": 0.006, "step": 40455 }, { "epoch": 18.871268656716417, "grad_norm": 0.2238470274010053, "learning_rate": 5.39071966622204e-06, "loss": 0.0061, "step": 40460 }, { "epoch": 18.873600746268657, "grad_norm": 0.2159063831463692, "learning_rate": 5.3891114550611434e-06, "loss": 0.006, "step": 40465 }, { "epoch": 18.875932835820894, "grad_norm": 0.19969592489178267, "learning_rate": 5.387506531577523e-06, "loss": 0.0063, "step": 40470 }, { "epoch": 18.878264925373134, "grad_norm": 0.2253673837743775, "learning_rate": 5.385904896009821e-06, "loss": 0.0064, "step": 40475 }, { "epoch": 18.880597014925375, "grad_norm": 0.21087576083022871, "learning_rate": 5.384306548596178e-06, "loss": 0.0062, "step": 40480 }, { "epoch": 18.88292910447761, "grad_norm": 0.2102353759296161, "learning_rate": 5.382711489574259e-06, "loss": 0.0061, "step": 40485 }, { "epoch": 18.88526119402985, "grad_norm": 0.19562182600881267, "learning_rate": 5.3811197191812296e-06, "loss": 0.0063, "step": 40490 }, { "epoch": 18.88759328358209, "grad_norm": 0.20945695016923718, "learning_rate": 5.379531237653774e-06, "loss": 0.0062, "step": 40495 }, { "epoch": 18.88992537313433, "grad_norm": 0.20737546204758045, "learning_rate": 5.377946045228084e-06, "loss": 0.006, "step": 40500 }, { "epoch": 18.892257462686565, "grad_norm": 0.21288249231918147, "learning_rate": 5.376364142139862e-06, "loss": 0.0062, "step": 40505 }, { "epoch": 18.894589552238806, "grad_norm": 0.22113599374005188, "learning_rate": 5.374785528624317e-06, "loss": 0.0064, "step": 40510 }, { "epoch": 18.896921641791046, "grad_norm": 0.22368606605302113, "learning_rate": 5.3732102049161845e-06, "loss": 0.006, "step": 40515 }, { "epoch": 18.899253731343283, "grad_norm": 0.21060947486672899, "learning_rate": 5.37163817124969e-06, "loss": 0.0061, "step": 40520 }, { "epoch": 18.901585820895523, "grad_norm": 0.21747157563379385, "learning_rate": 5.370069427858584e-06, "loss": 0.0061, "step": 40525 }, { "epoch": 18.90391791044776, "grad_norm": 0.2059313741125952, "learning_rate": 5.368503974976122e-06, "loss": 0.0061, "step": 40530 }, { "epoch": 18.90625, "grad_norm": 0.23190921685369822, "learning_rate": 5.366941812835075e-06, "loss": 0.006, "step": 40535 }, { "epoch": 18.90858208955224, "grad_norm": 0.21801596373932836, "learning_rate": 5.36538294166772e-06, "loss": 0.0062, "step": 40540 }, { "epoch": 18.910914179104477, "grad_norm": 0.2361033595065695, "learning_rate": 5.363827361705844e-06, "loss": 0.0064, "step": 40545 }, { "epoch": 18.913246268656717, "grad_norm": 0.20606111010855302, "learning_rate": 5.362275073180749e-06, "loss": 0.0062, "step": 40550 }, { "epoch": 18.915578358208954, "grad_norm": 0.22822474006021617, "learning_rate": 5.360726076323246e-06, "loss": 0.0062, "step": 40555 }, { "epoch": 18.917910447761194, "grad_norm": 0.25067088215086214, "learning_rate": 5.3591803713636545e-06, "loss": 0.0063, "step": 40560 }, { "epoch": 18.920242537313435, "grad_norm": 0.22841962820975623, "learning_rate": 5.357637958531805e-06, "loss": 0.0063, "step": 40565 }, { "epoch": 18.92257462686567, "grad_norm": 0.2167243695935079, "learning_rate": 5.3560988380570405e-06, "loss": 0.006, "step": 40570 }, { "epoch": 18.92490671641791, "grad_norm": 0.2298917081792521, "learning_rate": 5.3545630101682155e-06, "loss": 0.0061, "step": 40575 }, { "epoch": 18.92723880597015, "grad_norm": 0.20924197613178092, "learning_rate": 5.353030475093694e-06, "loss": 0.0063, "step": 40580 }, { "epoch": 18.92957089552239, "grad_norm": 0.2137466262101127, "learning_rate": 5.351501233061343e-06, "loss": 0.0061, "step": 40585 }, { "epoch": 18.931902985074625, "grad_norm": 0.20729319391508927, "learning_rate": 5.349975284298552e-06, "loss": 0.006, "step": 40590 }, { "epoch": 18.934235074626866, "grad_norm": 0.21725487522903186, "learning_rate": 5.348452629032209e-06, "loss": 0.0062, "step": 40595 }, { "epoch": 18.936567164179106, "grad_norm": 0.2143692066542562, "learning_rate": 5.346933267488726e-06, "loss": 0.0064, "step": 40600 }, { "epoch": 18.938899253731343, "grad_norm": 0.2534034323082815, "learning_rate": 5.345417199894012e-06, "loss": 0.0062, "step": 40605 }, { "epoch": 18.941231343283583, "grad_norm": 0.22528880114451194, "learning_rate": 5.343904426473493e-06, "loss": 0.0063, "step": 40610 }, { "epoch": 18.94356343283582, "grad_norm": 0.2203880230498759, "learning_rate": 5.342394947452106e-06, "loss": 0.0062, "step": 40615 }, { "epoch": 18.94589552238806, "grad_norm": 0.20339117760838918, "learning_rate": 5.340888763054291e-06, "loss": 0.0061, "step": 40620 }, { "epoch": 18.948227611940297, "grad_norm": 0.22026809789959145, "learning_rate": 5.3393858735040074e-06, "loss": 0.0062, "step": 40625 }, { "epoch": 18.950559701492537, "grad_norm": 0.2301011631778669, "learning_rate": 5.337886279024722e-06, "loss": 0.0062, "step": 40630 }, { "epoch": 18.952891791044777, "grad_norm": 0.231747493550917, "learning_rate": 5.336389979839405e-06, "loss": 0.0063, "step": 40635 }, { "epoch": 18.955223880597014, "grad_norm": 0.2207771227530034, "learning_rate": 5.3348969761705446e-06, "loss": 0.0063, "step": 40640 }, { "epoch": 18.957555970149254, "grad_norm": 0.2263755927205801, "learning_rate": 5.3334072682401365e-06, "loss": 0.0061, "step": 40645 }, { "epoch": 18.95988805970149, "grad_norm": 0.22418562923185378, "learning_rate": 5.331920856269686e-06, "loss": 0.0062, "step": 40650 }, { "epoch": 18.96222014925373, "grad_norm": 0.22587324697729685, "learning_rate": 5.330437740480206e-06, "loss": 0.006, "step": 40655 }, { "epoch": 18.96455223880597, "grad_norm": 0.21237835640661695, "learning_rate": 5.328957921092224e-06, "loss": 0.0061, "step": 40660 }, { "epoch": 18.96688432835821, "grad_norm": 0.23524908021059887, "learning_rate": 5.327481398325775e-06, "loss": 0.006, "step": 40665 }, { "epoch": 18.96921641791045, "grad_norm": 0.20784371138554966, "learning_rate": 5.326008172400402e-06, "loss": 0.0061, "step": 40670 }, { "epoch": 18.971548507462686, "grad_norm": 0.23412805922994953, "learning_rate": 5.324538243535162e-06, "loss": 0.0061, "step": 40675 }, { "epoch": 18.973880597014926, "grad_norm": 0.2064320177942918, "learning_rate": 5.323071611948619e-06, "loss": 0.0061, "step": 40680 }, { "epoch": 18.976212686567163, "grad_norm": 0.20074961651190928, "learning_rate": 5.3216082778588426e-06, "loss": 0.0061, "step": 40685 }, { "epoch": 18.978544776119403, "grad_norm": 0.20984968957206832, "learning_rate": 5.320148241483422e-06, "loss": 0.0063, "step": 40690 }, { "epoch": 18.980876865671643, "grad_norm": 0.22703242368015863, "learning_rate": 5.318691503039448e-06, "loss": 0.0062, "step": 40695 }, { "epoch": 18.98320895522388, "grad_norm": 0.21020407933065932, "learning_rate": 5.317238062743527e-06, "loss": 0.0061, "step": 40700 }, { "epoch": 18.98554104477612, "grad_norm": 0.21439709418440162, "learning_rate": 5.315787920811766e-06, "loss": 0.0064, "step": 40705 }, { "epoch": 18.987873134328357, "grad_norm": 0.2312809849046748, "learning_rate": 5.31434107745979e-06, "loss": 0.0062, "step": 40710 }, { "epoch": 18.990205223880597, "grad_norm": 0.21055137489606954, "learning_rate": 5.312897532902733e-06, "loss": 0.006, "step": 40715 }, { "epoch": 18.992537313432837, "grad_norm": 0.23041684395182446, "learning_rate": 5.311457287355232e-06, "loss": 0.0062, "step": 40720 }, { "epoch": 18.994869402985074, "grad_norm": 0.21672705061980607, "learning_rate": 5.310020341031439e-06, "loss": 0.0063, "step": 40725 }, { "epoch": 18.997201492537314, "grad_norm": 0.2522773278160765, "learning_rate": 5.3085866941450185e-06, "loss": 0.0061, "step": 40730 }, { "epoch": 18.99953358208955, "grad_norm": 0.22104410362620722, "learning_rate": 5.307156346909135e-06, "loss": 0.0062, "step": 40735 }, { "epoch": 19.00186567164179, "grad_norm": 0.14633423018006617, "learning_rate": 5.3057292995364695e-06, "loss": 0.0051, "step": 40740 }, { "epoch": 19.00419776119403, "grad_norm": 0.13298499954275805, "learning_rate": 5.304305552239209e-06, "loss": 0.0045, "step": 40745 }, { "epoch": 19.00652985074627, "grad_norm": 0.1614061327469071, "learning_rate": 5.302885105229052e-06, "loss": 0.0045, "step": 40750 }, { "epoch": 19.00886194029851, "grad_norm": 0.19748865074437688, "learning_rate": 5.301467958717205e-06, "loss": 0.0044, "step": 40755 }, { "epoch": 19.011194029850746, "grad_norm": 0.14523889436214665, "learning_rate": 5.300054112914385e-06, "loss": 0.0044, "step": 40760 }, { "epoch": 19.013526119402986, "grad_norm": 0.15689785160661263, "learning_rate": 5.298643568030817e-06, "loss": 0.0044, "step": 40765 }, { "epoch": 19.015858208955223, "grad_norm": 0.14267257625152388, "learning_rate": 5.297236324276231e-06, "loss": 0.0044, "step": 40770 }, { "epoch": 19.018190298507463, "grad_norm": 0.16637638426526777, "learning_rate": 5.295832381859881e-06, "loss": 0.0044, "step": 40775 }, { "epoch": 19.020522388059703, "grad_norm": 0.13973621578999862, "learning_rate": 5.294431740990509e-06, "loss": 0.0044, "step": 40780 }, { "epoch": 19.02285447761194, "grad_norm": 0.16216453553508378, "learning_rate": 5.293034401876384e-06, "loss": 0.0045, "step": 40785 }, { "epoch": 19.02518656716418, "grad_norm": 0.13291453722792995, "learning_rate": 5.291640364725272e-06, "loss": 0.0044, "step": 40790 }, { "epoch": 19.027518656716417, "grad_norm": 0.15624912018918566, "learning_rate": 5.290249629744457e-06, "loss": 0.0042, "step": 40795 }, { "epoch": 19.029850746268657, "grad_norm": 0.13667111880734736, "learning_rate": 5.288862197140726e-06, "loss": 0.0044, "step": 40800 }, { "epoch": 19.032182835820894, "grad_norm": 0.16667035443932698, "learning_rate": 5.28747806712038e-06, "loss": 0.0044, "step": 40805 }, { "epoch": 19.034514925373134, "grad_norm": 0.15477216552968828, "learning_rate": 5.286097239889219e-06, "loss": 0.0042, "step": 40810 }, { "epoch": 19.036847014925375, "grad_norm": 0.17288887296345037, "learning_rate": 5.284719715652565e-06, "loss": 0.0044, "step": 40815 }, { "epoch": 19.03917910447761, "grad_norm": 0.17199780980812832, "learning_rate": 5.283345494615238e-06, "loss": 0.0046, "step": 40820 }, { "epoch": 19.04151119402985, "grad_norm": 0.12593073451739253, "learning_rate": 5.281974576981579e-06, "loss": 0.0044, "step": 40825 }, { "epoch": 19.04384328358209, "grad_norm": 0.15572587364844806, "learning_rate": 5.280606962955423e-06, "loss": 0.0044, "step": 40830 }, { "epoch": 19.04617537313433, "grad_norm": 0.13923275542381355, "learning_rate": 5.279242652740121e-06, "loss": 0.0043, "step": 40835 }, { "epoch": 19.04850746268657, "grad_norm": 0.15058809110590765, "learning_rate": 5.277881646538537e-06, "loss": 0.0044, "step": 40840 }, { "epoch": 19.050839552238806, "grad_norm": 0.14149667438094082, "learning_rate": 5.276523944553039e-06, "loss": 0.0043, "step": 40845 }, { "epoch": 19.053171641791046, "grad_norm": 0.17079321455868, "learning_rate": 5.275169546985502e-06, "loss": 0.0045, "step": 40850 }, { "epoch": 19.055503731343283, "grad_norm": 0.1548873070681626, "learning_rate": 5.2738184540373165e-06, "loss": 0.0045, "step": 40855 }, { "epoch": 19.057835820895523, "grad_norm": 0.1541025922115986, "learning_rate": 5.272470665909368e-06, "loss": 0.0042, "step": 40860 }, { "epoch": 19.06016791044776, "grad_norm": 0.16464040853475148, "learning_rate": 5.271126182802072e-06, "loss": 0.0043, "step": 40865 }, { "epoch": 19.0625, "grad_norm": 0.1322592844810485, "learning_rate": 5.269785004915328e-06, "loss": 0.0043, "step": 40870 }, { "epoch": 19.06483208955224, "grad_norm": 0.1760820984967981, "learning_rate": 5.268447132448565e-06, "loss": 0.0043, "step": 40875 }, { "epoch": 19.067164179104477, "grad_norm": 0.14853858536449116, "learning_rate": 5.267112565600707e-06, "loss": 0.0042, "step": 40880 }, { "epoch": 19.069496268656717, "grad_norm": 0.16731432710212338, "learning_rate": 5.265781304570194e-06, "loss": 0.0044, "step": 40885 }, { "epoch": 19.071828358208954, "grad_norm": 0.1483814664094707, "learning_rate": 5.26445334955497e-06, "loss": 0.0044, "step": 40890 }, { "epoch": 19.074160447761194, "grad_norm": 0.13081523814722304, "learning_rate": 5.263128700752493e-06, "loss": 0.0043, "step": 40895 }, { "epoch": 19.07649253731343, "grad_norm": 0.13658682666284352, "learning_rate": 5.261807358359719e-06, "loss": 0.0041, "step": 40900 }, { "epoch": 19.07882462686567, "grad_norm": 0.1533237149858407, "learning_rate": 5.260489322573125e-06, "loss": 0.0045, "step": 40905 }, { "epoch": 19.08115671641791, "grad_norm": 0.15704248572826043, "learning_rate": 5.259174593588688e-06, "loss": 0.0045, "step": 40910 }, { "epoch": 19.08348880597015, "grad_norm": 0.16072933405530979, "learning_rate": 5.257863171601895e-06, "loss": 0.0044, "step": 40915 }, { "epoch": 19.08582089552239, "grad_norm": 0.16136847256864847, "learning_rate": 5.25655505680774e-06, "loss": 0.0044, "step": 40920 }, { "epoch": 19.088152985074625, "grad_norm": 0.147855248178987, "learning_rate": 5.255250249400732e-06, "loss": 0.0044, "step": 40925 }, { "epoch": 19.090485074626866, "grad_norm": 0.13798272932614347, "learning_rate": 5.253948749574879e-06, "loss": 0.0043, "step": 40930 }, { "epoch": 19.092817164179106, "grad_norm": 0.1271862041481399, "learning_rate": 5.252650557523707e-06, "loss": 0.0041, "step": 40935 }, { "epoch": 19.095149253731343, "grad_norm": 0.16703134632592037, "learning_rate": 5.2513556734402384e-06, "loss": 0.0044, "step": 40940 }, { "epoch": 19.097481343283583, "grad_norm": 0.14542435480308954, "learning_rate": 5.2500640975170116e-06, "loss": 0.0042, "step": 40945 }, { "epoch": 19.09981343283582, "grad_norm": 0.13760863135041246, "learning_rate": 5.248775829946076e-06, "loss": 0.0049, "step": 40950 }, { "epoch": 19.10214552238806, "grad_norm": 0.1503730799781232, "learning_rate": 5.247490870918979e-06, "loss": 0.0044, "step": 40955 }, { "epoch": 19.104477611940297, "grad_norm": 0.1403136702600131, "learning_rate": 5.2462092206267864e-06, "loss": 0.0041, "step": 40960 }, { "epoch": 19.106809701492537, "grad_norm": 0.16294334255901172, "learning_rate": 5.244930879260062e-06, "loss": 0.0048, "step": 40965 }, { "epoch": 19.109141791044777, "grad_norm": 0.14003958496446764, "learning_rate": 5.243655847008888e-06, "loss": 0.0041, "step": 40970 }, { "epoch": 19.111473880597014, "grad_norm": 0.12821172875331321, "learning_rate": 5.242384124062848e-06, "loss": 0.0043, "step": 40975 }, { "epoch": 19.113805970149254, "grad_norm": 0.1460144472528093, "learning_rate": 5.241115710611033e-06, "loss": 0.0044, "step": 40980 }, { "epoch": 19.11613805970149, "grad_norm": 0.1671438466466865, "learning_rate": 5.239850606842045e-06, "loss": 0.0042, "step": 40985 }, { "epoch": 19.11847014925373, "grad_norm": 0.15450686826999488, "learning_rate": 5.2385888129439934e-06, "loss": 0.0044, "step": 40990 }, { "epoch": 19.12080223880597, "grad_norm": 0.1703216027808762, "learning_rate": 5.237330329104494e-06, "loss": 0.0043, "step": 40995 }, { "epoch": 19.12313432835821, "grad_norm": 0.14115012490270665, "learning_rate": 5.236075155510675e-06, "loss": 0.0041, "step": 41000 }, { "epoch": 19.12546641791045, "grad_norm": 0.15247626602768088, "learning_rate": 5.234823292349164e-06, "loss": 0.0042, "step": 41005 }, { "epoch": 19.127798507462686, "grad_norm": 0.16269496811374648, "learning_rate": 5.2335747398061e-06, "loss": 0.0042, "step": 41010 }, { "epoch": 19.130130597014926, "grad_norm": 0.1837563096943232, "learning_rate": 5.2323294980671375e-06, "loss": 0.0046, "step": 41015 }, { "epoch": 19.132462686567163, "grad_norm": 0.14455427286537517, "learning_rate": 5.231087567317425e-06, "loss": 0.0043, "step": 41020 }, { "epoch": 19.134794776119403, "grad_norm": 0.1459107282147027, "learning_rate": 5.229848947741629e-06, "loss": 0.0044, "step": 41025 }, { "epoch": 19.137126865671643, "grad_norm": 0.1510268070387854, "learning_rate": 5.228613639523922e-06, "loss": 0.0042, "step": 41030 }, { "epoch": 19.13945895522388, "grad_norm": 0.15321626908208574, "learning_rate": 5.2273816428479785e-06, "loss": 0.0046, "step": 41035 }, { "epoch": 19.14179104477612, "grad_norm": 0.1323533698718636, "learning_rate": 5.2261529578969905e-06, "loss": 0.0043, "step": 41040 }, { "epoch": 19.144123134328357, "grad_norm": 0.13823835320276426, "learning_rate": 5.224927584853641e-06, "loss": 0.0044, "step": 41045 }, { "epoch": 19.146455223880597, "grad_norm": 0.14228805147827173, "learning_rate": 5.223705523900145e-06, "loss": 0.0043, "step": 41050 }, { "epoch": 19.148787313432837, "grad_norm": 0.17301815048632954, "learning_rate": 5.2224867752181995e-06, "loss": 0.0043, "step": 41055 }, { "epoch": 19.151119402985074, "grad_norm": 0.17088738075956644, "learning_rate": 5.22127133898903e-06, "loss": 0.0044, "step": 41060 }, { "epoch": 19.153451492537314, "grad_norm": 0.15606331913152804, "learning_rate": 5.220059215393352e-06, "loss": 0.0043, "step": 41065 }, { "epoch": 19.15578358208955, "grad_norm": 0.13741052073102009, "learning_rate": 5.2188504046114005e-06, "loss": 0.0043, "step": 41070 }, { "epoch": 19.15811567164179, "grad_norm": 0.1411943313674725, "learning_rate": 5.217644906822914e-06, "loss": 0.0042, "step": 41075 }, { "epoch": 19.16044776119403, "grad_norm": 0.13529437690607143, "learning_rate": 5.216442722207141e-06, "loss": 0.0041, "step": 41080 }, { "epoch": 19.16277985074627, "grad_norm": 0.14126424598497914, "learning_rate": 5.215243850942831e-06, "loss": 0.0043, "step": 41085 }, { "epoch": 19.16511194029851, "grad_norm": 0.15837534201818487, "learning_rate": 5.214048293208246e-06, "loss": 0.0044, "step": 41090 }, { "epoch": 19.167444029850746, "grad_norm": 0.17282189911001272, "learning_rate": 5.212856049181154e-06, "loss": 0.0045, "step": 41095 }, { "epoch": 19.169776119402986, "grad_norm": 0.14697570489161266, "learning_rate": 5.211667119038829e-06, "loss": 0.0042, "step": 41100 }, { "epoch": 19.172108208955223, "grad_norm": 0.14101133394597443, "learning_rate": 5.210481502958057e-06, "loss": 0.0042, "step": 41105 }, { "epoch": 19.174440298507463, "grad_norm": 0.15565895016092496, "learning_rate": 5.209299201115125e-06, "loss": 0.0045, "step": 41110 }, { "epoch": 19.176772388059703, "grad_norm": 0.14307648133308226, "learning_rate": 5.2081202136858296e-06, "loss": 0.0041, "step": 41115 }, { "epoch": 19.17910447761194, "grad_norm": 0.1668478878440532, "learning_rate": 5.206944540845476e-06, "loss": 0.0044, "step": 41120 }, { "epoch": 19.18143656716418, "grad_norm": 0.13386848448489222, "learning_rate": 5.205772182768876e-06, "loss": 0.0042, "step": 41125 }, { "epoch": 19.183768656716417, "grad_norm": 0.16711805893085785, "learning_rate": 5.204603139630345e-06, "loss": 0.0043, "step": 41130 }, { "epoch": 19.186100746268657, "grad_norm": 0.13943670038463785, "learning_rate": 5.2034374116037146e-06, "loss": 0.0044, "step": 41135 }, { "epoch": 19.188432835820894, "grad_norm": 0.1571368347784043, "learning_rate": 5.202274998862312e-06, "loss": 0.0042, "step": 41140 }, { "epoch": 19.190764925373134, "grad_norm": 0.16350818194589029, "learning_rate": 5.2011159015789775e-06, "loss": 0.0046, "step": 41145 }, { "epoch": 19.193097014925375, "grad_norm": 0.1278098792411466, "learning_rate": 5.199960119926059e-06, "loss": 0.0043, "step": 41150 }, { "epoch": 19.19542910447761, "grad_norm": 0.15419666813602875, "learning_rate": 5.19880765407541e-06, "loss": 0.0045, "step": 41155 }, { "epoch": 19.19776119402985, "grad_norm": 0.17402895678540975, "learning_rate": 5.197658504198392e-06, "loss": 0.0045, "step": 41160 }, { "epoch": 19.20009328358209, "grad_norm": 0.13114634595672556, "learning_rate": 5.19651267046587e-06, "loss": 0.0043, "step": 41165 }, { "epoch": 19.20242537313433, "grad_norm": 0.1416788116909838, "learning_rate": 5.1953701530482215e-06, "loss": 0.0043, "step": 41170 }, { "epoch": 19.20475746268657, "grad_norm": 0.14948692072651892, "learning_rate": 5.194230952115327e-06, "loss": 0.0045, "step": 41175 }, { "epoch": 19.207089552238806, "grad_norm": 0.16425522657907002, "learning_rate": 5.1930950678365715e-06, "loss": 0.0044, "step": 41180 }, { "epoch": 19.209421641791046, "grad_norm": 0.15108402142391744, "learning_rate": 5.191962500380854e-06, "loss": 0.0042, "step": 41185 }, { "epoch": 19.211753731343283, "grad_norm": 0.14624704500784763, "learning_rate": 5.190833249916577e-06, "loss": 0.0044, "step": 41190 }, { "epoch": 19.214085820895523, "grad_norm": 0.1819091072947315, "learning_rate": 5.189707316611646e-06, "loss": 0.0045, "step": 41195 }, { "epoch": 19.21641791044776, "grad_norm": 0.16072554006681533, "learning_rate": 5.188584700633478e-06, "loss": 0.0043, "step": 41200 }, { "epoch": 19.21875, "grad_norm": 0.1444277334935009, "learning_rate": 5.187465402148996e-06, "loss": 0.0045, "step": 41205 }, { "epoch": 19.22108208955224, "grad_norm": 0.14757341573034835, "learning_rate": 5.186349421324627e-06, "loss": 0.0044, "step": 41210 }, { "epoch": 19.223414179104477, "grad_norm": 0.14832345814241396, "learning_rate": 5.185236758326307e-06, "loss": 0.0046, "step": 41215 }, { "epoch": 19.225746268656717, "grad_norm": 0.1557902726556655, "learning_rate": 5.184127413319482e-06, "loss": 0.0041, "step": 41220 }, { "epoch": 19.228078358208954, "grad_norm": 0.17463184043414007, "learning_rate": 5.183021386469096e-06, "loss": 0.0043, "step": 41225 }, { "epoch": 19.230410447761194, "grad_norm": 0.15553759735648628, "learning_rate": 5.181918677939608e-06, "loss": 0.0043, "step": 41230 }, { "epoch": 19.23274253731343, "grad_norm": 0.1478010380508771, "learning_rate": 5.180819287894979e-06, "loss": 0.0043, "step": 41235 }, { "epoch": 19.23507462686567, "grad_norm": 0.1540215166207489, "learning_rate": 5.179723216498677e-06, "loss": 0.0044, "step": 41240 }, { "epoch": 19.23740671641791, "grad_norm": 0.16498850917247998, "learning_rate": 5.178630463913678e-06, "loss": 0.0043, "step": 41245 }, { "epoch": 19.23973880597015, "grad_norm": 0.13930397700088643, "learning_rate": 5.177541030302462e-06, "loss": 0.0043, "step": 41250 }, { "epoch": 19.24207089552239, "grad_norm": 0.1672127904744171, "learning_rate": 5.176454915827022e-06, "loss": 0.0044, "step": 41255 }, { "epoch": 19.244402985074625, "grad_norm": 0.15790552715022352, "learning_rate": 5.17537212064885e-06, "loss": 0.0044, "step": 41260 }, { "epoch": 19.246735074626866, "grad_norm": 0.14663419097367653, "learning_rate": 5.174292644928947e-06, "loss": 0.0045, "step": 41265 }, { "epoch": 19.249067164179106, "grad_norm": 0.15968397319406982, "learning_rate": 5.173216488827822e-06, "loss": 0.0044, "step": 41270 }, { "epoch": 19.251399253731343, "grad_norm": 0.15018232487036806, "learning_rate": 5.172143652505484e-06, "loss": 0.0044, "step": 41275 }, { "epoch": 19.253731343283583, "grad_norm": 0.14048882226217788, "learning_rate": 5.171074136121461e-06, "loss": 0.0044, "step": 41280 }, { "epoch": 19.25606343283582, "grad_norm": 0.14643785678155774, "learning_rate": 5.170007939834775e-06, "loss": 0.0043, "step": 41285 }, { "epoch": 19.25839552238806, "grad_norm": 0.15368977724331398, "learning_rate": 5.168945063803962e-06, "loss": 0.0043, "step": 41290 }, { "epoch": 19.260727611940297, "grad_norm": 0.15867155427800939, "learning_rate": 5.167885508187059e-06, "loss": 0.0044, "step": 41295 }, { "epoch": 19.263059701492537, "grad_norm": 0.1693754145562822, "learning_rate": 5.166829273141612e-06, "loss": 0.0046, "step": 41300 }, { "epoch": 19.265391791044777, "grad_norm": 0.15108390469655422, "learning_rate": 5.165776358824675e-06, "loss": 0.0044, "step": 41305 }, { "epoch": 19.267723880597014, "grad_norm": 0.1957430196379277, "learning_rate": 5.164726765392805e-06, "loss": 0.0044, "step": 41310 }, { "epoch": 19.270055970149254, "grad_norm": 0.17779144583293627, "learning_rate": 5.163680493002067e-06, "loss": 0.0042, "step": 41315 }, { "epoch": 19.27238805970149, "grad_norm": 0.16109050032518876, "learning_rate": 5.162637541808031e-06, "loss": 0.0045, "step": 41320 }, { "epoch": 19.27472014925373, "grad_norm": 0.16159815685291012, "learning_rate": 5.161597911965775e-06, "loss": 0.0045, "step": 41325 }, { "epoch": 19.27705223880597, "grad_norm": 0.16583796424193764, "learning_rate": 5.16056160362988e-06, "loss": 0.0046, "step": 41330 }, { "epoch": 19.27938432835821, "grad_norm": 0.159978150912828, "learning_rate": 5.159528616954435e-06, "loss": 0.0044, "step": 41335 }, { "epoch": 19.28171641791045, "grad_norm": 0.15425981032253425, "learning_rate": 5.158498952093038e-06, "loss": 0.0044, "step": 41340 }, { "epoch": 19.284048507462686, "grad_norm": 0.14402640213212092, "learning_rate": 5.157472609198789e-06, "loss": 0.0045, "step": 41345 }, { "epoch": 19.286380597014926, "grad_norm": 0.17569631118386464, "learning_rate": 5.156449588424295e-06, "loss": 0.0045, "step": 41350 }, { "epoch": 19.288712686567163, "grad_norm": 0.1741710842761033, "learning_rate": 5.155429889921669e-06, "loss": 0.0045, "step": 41355 }, { "epoch": 19.291044776119403, "grad_norm": 0.14453968643426218, "learning_rate": 5.154413513842533e-06, "loss": 0.0044, "step": 41360 }, { "epoch": 19.293376865671643, "grad_norm": 0.16588989063790388, "learning_rate": 5.153400460338007e-06, "loss": 0.0044, "step": 41365 }, { "epoch": 19.29570895522388, "grad_norm": 0.1495119423386117, "learning_rate": 5.152390729558727e-06, "loss": 0.0046, "step": 41370 }, { "epoch": 19.29804104477612, "grad_norm": 0.14712125376963345, "learning_rate": 5.151384321654828e-06, "loss": 0.0043, "step": 41375 }, { "epoch": 19.300373134328357, "grad_norm": 0.16034196355037308, "learning_rate": 5.1503812367759575e-06, "loss": 0.0045, "step": 41380 }, { "epoch": 19.302705223880597, "grad_norm": 0.172119042839543, "learning_rate": 5.149381475071259e-06, "loss": 0.0045, "step": 41385 }, { "epoch": 19.305037313432837, "grad_norm": 0.18016569169392999, "learning_rate": 5.148385036689391e-06, "loss": 0.0047, "step": 41390 }, { "epoch": 19.307369402985074, "grad_norm": 0.1645862150226672, "learning_rate": 5.14739192177851e-06, "loss": 0.0046, "step": 41395 }, { "epoch": 19.309701492537314, "grad_norm": 0.1512595693046788, "learning_rate": 5.146402130486288e-06, "loss": 0.0044, "step": 41400 }, { "epoch": 19.31203358208955, "grad_norm": 0.16089715204231153, "learning_rate": 5.145415662959895e-06, "loss": 0.0045, "step": 41405 }, { "epoch": 19.31436567164179, "grad_norm": 0.17268596890562884, "learning_rate": 5.144432519346011e-06, "loss": 0.0045, "step": 41410 }, { "epoch": 19.31669776119403, "grad_norm": 0.1769692773196846, "learning_rate": 5.143452699790817e-06, "loss": 0.0046, "step": 41415 }, { "epoch": 19.31902985074627, "grad_norm": 0.15784705880857008, "learning_rate": 5.142476204440002e-06, "loss": 0.0045, "step": 41420 }, { "epoch": 19.32136194029851, "grad_norm": 0.14320344085981715, "learning_rate": 5.141503033438769e-06, "loss": 0.0044, "step": 41425 }, { "epoch": 19.323694029850746, "grad_norm": 0.14948697520191398, "learning_rate": 5.140533186931809e-06, "loss": 0.0045, "step": 41430 }, { "epoch": 19.326026119402986, "grad_norm": 0.13694164108912538, "learning_rate": 5.139566665063337e-06, "loss": 0.0044, "step": 41435 }, { "epoch": 19.328358208955223, "grad_norm": 0.15479347750904393, "learning_rate": 5.138603467977062e-06, "loss": 0.0043, "step": 41440 }, { "epoch": 19.330690298507463, "grad_norm": 0.1599121857970993, "learning_rate": 5.137643595816202e-06, "loss": 0.0044, "step": 41445 }, { "epoch": 19.333022388059703, "grad_norm": 0.1801906473532346, "learning_rate": 5.136687048723483e-06, "loss": 0.0044, "step": 41450 }, { "epoch": 19.33535447761194, "grad_norm": 0.17896637963232928, "learning_rate": 5.135733826841131e-06, "loss": 0.0045, "step": 41455 }, { "epoch": 19.33768656716418, "grad_norm": 0.18077424597050445, "learning_rate": 5.134783930310883e-06, "loss": 0.0043, "step": 41460 }, { "epoch": 19.340018656716417, "grad_norm": 0.17219050883213347, "learning_rate": 5.13383735927398e-06, "loss": 0.0045, "step": 41465 }, { "epoch": 19.342350746268657, "grad_norm": 0.16014846080608894, "learning_rate": 5.132894113871167e-06, "loss": 0.0045, "step": 41470 }, { "epoch": 19.344682835820894, "grad_norm": 0.14614274161853635, "learning_rate": 5.131954194242696e-06, "loss": 0.0044, "step": 41475 }, { "epoch": 19.347014925373134, "grad_norm": 0.17364760116689926, "learning_rate": 5.131017600528324e-06, "loss": 0.0045, "step": 41480 }, { "epoch": 19.349347014925375, "grad_norm": 0.16336362912231064, "learning_rate": 5.130084332867315e-06, "loss": 0.0045, "step": 41485 }, { "epoch": 19.35167910447761, "grad_norm": 0.1630164114933879, "learning_rate": 5.129154391398433e-06, "loss": 0.0044, "step": 41490 }, { "epoch": 19.35401119402985, "grad_norm": 0.15691800096842043, "learning_rate": 5.128227776259953e-06, "loss": 0.0043, "step": 41495 }, { "epoch": 19.35634328358209, "grad_norm": 0.1666531048403167, "learning_rate": 5.127304487589658e-06, "loss": 0.0045, "step": 41500 }, { "epoch": 19.35867537313433, "grad_norm": 0.15231215103451637, "learning_rate": 5.126384525524826e-06, "loss": 0.0045, "step": 41505 }, { "epoch": 19.361007462686565, "grad_norm": 0.17596208364893565, "learning_rate": 5.12546789020225e-06, "loss": 0.0047, "step": 41510 }, { "epoch": 19.363339552238806, "grad_norm": 0.18470752658380338, "learning_rate": 5.124554581758225e-06, "loss": 0.0044, "step": 41515 }, { "epoch": 19.365671641791046, "grad_norm": 0.16899439965515203, "learning_rate": 5.123644600328549e-06, "loss": 0.0044, "step": 41520 }, { "epoch": 19.368003731343283, "grad_norm": 0.16787902742749947, "learning_rate": 5.122737946048531e-06, "loss": 0.0044, "step": 41525 }, { "epoch": 19.370335820895523, "grad_norm": 0.1884786850030276, "learning_rate": 5.121834619052979e-06, "loss": 0.0045, "step": 41530 }, { "epoch": 19.37266791044776, "grad_norm": 0.1506306366844952, "learning_rate": 5.1209346194762086e-06, "loss": 0.0045, "step": 41535 }, { "epoch": 19.375, "grad_norm": 0.17720340832202763, "learning_rate": 5.120037947452043e-06, "loss": 0.0045, "step": 41540 }, { "epoch": 19.37733208955224, "grad_norm": 0.1697695122052455, "learning_rate": 5.119144603113809e-06, "loss": 0.0047, "step": 41545 }, { "epoch": 19.379664179104477, "grad_norm": 0.1886811010449865, "learning_rate": 5.118254586594335e-06, "loss": 0.0045, "step": 41550 }, { "epoch": 19.381996268656717, "grad_norm": 0.18366268783440626, "learning_rate": 5.117367898025964e-06, "loss": 0.0045, "step": 41555 }, { "epoch": 19.384328358208954, "grad_norm": 0.15750331777580853, "learning_rate": 5.116484537540532e-06, "loss": 0.0045, "step": 41560 }, { "epoch": 19.386660447761194, "grad_norm": 0.15988732014145537, "learning_rate": 5.115604505269388e-06, "loss": 0.0044, "step": 41565 }, { "epoch": 19.388992537313435, "grad_norm": 0.15287821819823985, "learning_rate": 5.114727801343385e-06, "loss": 0.0046, "step": 41570 }, { "epoch": 19.39132462686567, "grad_norm": 0.16069067381364174, "learning_rate": 5.113854425892884e-06, "loss": 0.0045, "step": 41575 }, { "epoch": 19.39365671641791, "grad_norm": 0.18635039857722377, "learning_rate": 5.11298437904774e-06, "loss": 0.0047, "step": 41580 }, { "epoch": 19.39598880597015, "grad_norm": 0.16952237775932935, "learning_rate": 5.112117660937328e-06, "loss": 0.0046, "step": 41585 }, { "epoch": 19.39832089552239, "grad_norm": 0.17715921428210366, "learning_rate": 5.111254271690516e-06, "loss": 0.0047, "step": 41590 }, { "epoch": 19.400652985074625, "grad_norm": 0.17604709926210782, "learning_rate": 5.110394211435682e-06, "loss": 0.0044, "step": 41595 }, { "epoch": 19.402985074626866, "grad_norm": 0.15871240368432363, "learning_rate": 5.1095374803007115e-06, "loss": 0.0043, "step": 41600 }, { "epoch": 19.405317164179106, "grad_norm": 0.15883697860389748, "learning_rate": 5.1086840784129895e-06, "loss": 0.0045, "step": 41605 }, { "epoch": 19.407649253731343, "grad_norm": 0.1810760784743733, "learning_rate": 5.107834005899409e-06, "loss": 0.0046, "step": 41610 }, { "epoch": 19.409981343283583, "grad_norm": 0.1553905324714659, "learning_rate": 5.106987262886371e-06, "loss": 0.0045, "step": 41615 }, { "epoch": 19.41231343283582, "grad_norm": 0.1760526618489048, "learning_rate": 5.1061438494997726e-06, "loss": 0.0047, "step": 41620 }, { "epoch": 19.41464552238806, "grad_norm": 0.19302030358723807, "learning_rate": 5.105303765865026e-06, "loss": 0.0046, "step": 41625 }, { "epoch": 19.416977611940297, "grad_norm": 0.18203728767322214, "learning_rate": 5.104467012107041e-06, "loss": 0.0044, "step": 41630 }, { "epoch": 19.419309701492537, "grad_norm": 0.1512728980657697, "learning_rate": 5.103633588350236e-06, "loss": 0.0044, "step": 41635 }, { "epoch": 19.421641791044777, "grad_norm": 0.16799816249006394, "learning_rate": 5.102803494718532e-06, "loss": 0.0048, "step": 41640 }, { "epoch": 19.423973880597014, "grad_norm": 0.1902607517110787, "learning_rate": 5.1019767313353564e-06, "loss": 0.0045, "step": 41645 }, { "epoch": 19.426305970149254, "grad_norm": 0.19909405734596283, "learning_rate": 5.101153298323643e-06, "loss": 0.0045, "step": 41650 }, { "epoch": 19.42863805970149, "grad_norm": 0.16323793659835234, "learning_rate": 5.100333195805823e-06, "loss": 0.0047, "step": 41655 }, { "epoch": 19.43097014925373, "grad_norm": 0.18901759076022193, "learning_rate": 5.099516423903844e-06, "loss": 0.0047, "step": 41660 }, { "epoch": 19.43330223880597, "grad_norm": 0.15644113949708208, "learning_rate": 5.098702982739151e-06, "loss": 0.0044, "step": 41665 }, { "epoch": 19.43563432835821, "grad_norm": 0.1674219518937521, "learning_rate": 5.097892872432691e-06, "loss": 0.0046, "step": 41670 }, { "epoch": 19.43796641791045, "grad_norm": 0.17256850878960264, "learning_rate": 5.097086093104924e-06, "loss": 0.0045, "step": 41675 }, { "epoch": 19.440298507462686, "grad_norm": 0.16864066266710503, "learning_rate": 5.096282644875807e-06, "loss": 0.0045, "step": 41680 }, { "epoch": 19.442630597014926, "grad_norm": 0.192174017428889, "learning_rate": 5.095482527864808e-06, "loss": 0.0046, "step": 41685 }, { "epoch": 19.444962686567163, "grad_norm": 0.16564073068094676, "learning_rate": 5.094685742190896e-06, "loss": 0.0046, "step": 41690 }, { "epoch": 19.447294776119403, "grad_norm": 0.1612591203389178, "learning_rate": 5.09389228797254e-06, "loss": 0.0046, "step": 41695 }, { "epoch": 19.449626865671643, "grad_norm": 0.1888938507908295, "learning_rate": 5.093102165327729e-06, "loss": 0.0048, "step": 41700 }, { "epoch": 19.45195895522388, "grad_norm": 0.17157597609266925, "learning_rate": 5.092315374373937e-06, "loss": 0.0045, "step": 41705 }, { "epoch": 19.45429104477612, "grad_norm": 0.1634634988068042, "learning_rate": 5.09153191522816e-06, "loss": 0.0047, "step": 41710 }, { "epoch": 19.456623134328357, "grad_norm": 0.1689972011997282, "learning_rate": 5.090751788006885e-06, "loss": 0.0045, "step": 41715 }, { "epoch": 19.458955223880597, "grad_norm": 0.18327724970109255, "learning_rate": 5.089974992826117e-06, "loss": 0.0045, "step": 41720 }, { "epoch": 19.461287313432837, "grad_norm": 0.14910195919939986, "learning_rate": 5.0892015298013485e-06, "loss": 0.0047, "step": 41725 }, { "epoch": 19.463619402985074, "grad_norm": 0.16245908387312577, "learning_rate": 5.08843139904759e-06, "loss": 0.0048, "step": 41730 }, { "epoch": 19.465951492537314, "grad_norm": 0.1648819922848688, "learning_rate": 5.087664600679356e-06, "loss": 0.0046, "step": 41735 }, { "epoch": 19.46828358208955, "grad_norm": 0.1762017518378467, "learning_rate": 5.086901134810658e-06, "loss": 0.0046, "step": 41740 }, { "epoch": 19.47061567164179, "grad_norm": 0.18549539188212358, "learning_rate": 5.086141001555016e-06, "loss": 0.0048, "step": 41745 }, { "epoch": 19.47294776119403, "grad_norm": 0.16924890987048438, "learning_rate": 5.085384201025457e-06, "loss": 0.0046, "step": 41750 }, { "epoch": 19.47527985074627, "grad_norm": 0.1630385441489055, "learning_rate": 5.084630733334508e-06, "loss": 0.0047, "step": 41755 }, { "epoch": 19.47761194029851, "grad_norm": 0.17850398012425744, "learning_rate": 5.083880598594204e-06, "loss": 0.0048, "step": 41760 }, { "epoch": 19.479944029850746, "grad_norm": 0.17910693640755584, "learning_rate": 5.083133796916081e-06, "loss": 0.0046, "step": 41765 }, { "epoch": 19.482276119402986, "grad_norm": 0.1812902122020394, "learning_rate": 5.082390328411184e-06, "loss": 0.0049, "step": 41770 }, { "epoch": 19.484608208955223, "grad_norm": 0.1956650598170847, "learning_rate": 5.081650193190057e-06, "loss": 0.0047, "step": 41775 }, { "epoch": 19.486940298507463, "grad_norm": 0.17222760125899136, "learning_rate": 5.080913391362749e-06, "loss": 0.0047, "step": 41780 }, { "epoch": 19.489272388059703, "grad_norm": 0.16482521056863256, "learning_rate": 5.080179923038822e-06, "loss": 0.0047, "step": 41785 }, { "epoch": 19.49160447761194, "grad_norm": 0.18989863677192076, "learning_rate": 5.079449788327332e-06, "loss": 0.0046, "step": 41790 }, { "epoch": 19.49393656716418, "grad_norm": 0.16411891136741263, "learning_rate": 5.078722987336843e-06, "loss": 0.0047, "step": 41795 }, { "epoch": 19.496268656716417, "grad_norm": 0.167390262445185, "learning_rate": 5.0779995201754225e-06, "loss": 0.0046, "step": 41800 }, { "epoch": 19.498600746268657, "grad_norm": 0.1928774896482118, "learning_rate": 5.077279386950642e-06, "loss": 0.0048, "step": 41805 }, { "epoch": 19.500932835820894, "grad_norm": 0.16391646921262046, "learning_rate": 5.076562587769584e-06, "loss": 0.0047, "step": 41810 }, { "epoch": 19.503264925373134, "grad_norm": 0.15718699940802888, "learning_rate": 5.0758491227388235e-06, "loss": 0.0049, "step": 41815 }, { "epoch": 19.505597014925375, "grad_norm": 0.18226252218007732, "learning_rate": 5.07513899196445e-06, "loss": 0.0046, "step": 41820 }, { "epoch": 19.50792910447761, "grad_norm": 0.1887689738929634, "learning_rate": 5.074432195552053e-06, "loss": 0.0046, "step": 41825 }, { "epoch": 19.51026119402985, "grad_norm": 0.1661779147362316, "learning_rate": 5.073728733606722e-06, "loss": 0.0046, "step": 41830 }, { "epoch": 19.51259328358209, "grad_norm": 0.1744006460586645, "learning_rate": 5.073028606233059e-06, "loss": 0.0045, "step": 41835 }, { "epoch": 19.51492537313433, "grad_norm": 0.17896019627860885, "learning_rate": 5.072331813535166e-06, "loss": 0.0045, "step": 41840 }, { "epoch": 19.517257462686565, "grad_norm": 0.16701217167520171, "learning_rate": 5.071638355616648e-06, "loss": 0.0047, "step": 41845 }, { "epoch": 19.519589552238806, "grad_norm": 0.19112506904730245, "learning_rate": 5.070948232580618e-06, "loss": 0.0048, "step": 41850 }, { "epoch": 19.521921641791046, "grad_norm": 0.1778343980262795, "learning_rate": 5.070261444529688e-06, "loss": 0.0049, "step": 41855 }, { "epoch": 19.524253731343283, "grad_norm": 0.20574134132818306, "learning_rate": 5.069577991565977e-06, "loss": 0.0049, "step": 41860 }, { "epoch": 19.526585820895523, "grad_norm": 0.19778779502214722, "learning_rate": 5.0688978737911085e-06, "loss": 0.0046, "step": 41865 }, { "epoch": 19.52891791044776, "grad_norm": 0.17969518323667197, "learning_rate": 5.06822109130621e-06, "loss": 0.0046, "step": 41870 }, { "epoch": 19.53125, "grad_norm": 0.17966892142579816, "learning_rate": 5.067547644211914e-06, "loss": 0.0047, "step": 41875 }, { "epoch": 19.53358208955224, "grad_norm": 0.1801442556521113, "learning_rate": 5.066877532608349e-06, "loss": 0.0047, "step": 41880 }, { "epoch": 19.535914179104477, "grad_norm": 0.1774172201528628, "learning_rate": 5.066210756595164e-06, "loss": 0.0047, "step": 41885 }, { "epoch": 19.538246268656717, "grad_norm": 0.16965964025331903, "learning_rate": 5.065547316271494e-06, "loss": 0.0048, "step": 41890 }, { "epoch": 19.540578358208954, "grad_norm": 0.17269849020815883, "learning_rate": 5.064887211735991e-06, "loss": 0.0046, "step": 41895 }, { "epoch": 19.542910447761194, "grad_norm": 0.18241446544524426, "learning_rate": 5.064230443086805e-06, "loss": 0.0046, "step": 41900 }, { "epoch": 19.545242537313435, "grad_norm": 0.1882922278690021, "learning_rate": 5.0635770104215915e-06, "loss": 0.0047, "step": 41905 }, { "epoch": 19.54757462686567, "grad_norm": 0.17110679569486492, "learning_rate": 5.062926913837507e-06, "loss": 0.0046, "step": 41910 }, { "epoch": 19.54990671641791, "grad_norm": 0.19004673985485673, "learning_rate": 5.062280153431218e-06, "loss": 0.0046, "step": 41915 }, { "epoch": 19.55223880597015, "grad_norm": 0.1713869193558569, "learning_rate": 5.06163672929889e-06, "loss": 0.0044, "step": 41920 }, { "epoch": 19.55457089552239, "grad_norm": 0.20127211437345133, "learning_rate": 5.060996641536193e-06, "loss": 0.0047, "step": 41925 }, { "epoch": 19.556902985074625, "grad_norm": 0.18446069193200357, "learning_rate": 5.060359890238305e-06, "loss": 0.0046, "step": 41930 }, { "epoch": 19.559235074626866, "grad_norm": 0.1840236539645985, "learning_rate": 5.059726475499902e-06, "loss": 0.0046, "step": 41935 }, { "epoch": 19.561567164179106, "grad_norm": 0.17342806727948956, "learning_rate": 5.059096397415167e-06, "loss": 0.0047, "step": 41940 }, { "epoch": 19.563899253731343, "grad_norm": 0.17872666443240495, "learning_rate": 5.058469656077789e-06, "loss": 0.0046, "step": 41945 }, { "epoch": 19.566231343283583, "grad_norm": 0.18797267892395583, "learning_rate": 5.057846251580957e-06, "loss": 0.0046, "step": 41950 }, { "epoch": 19.56856343283582, "grad_norm": 0.16165788270580947, "learning_rate": 5.057226184017362e-06, "loss": 0.0048, "step": 41955 }, { "epoch": 19.57089552238806, "grad_norm": 0.16436172140723626, "learning_rate": 5.056609453479208e-06, "loss": 0.0048, "step": 41960 }, { "epoch": 19.573227611940297, "grad_norm": 0.1836826676295278, "learning_rate": 5.055996060058192e-06, "loss": 0.0048, "step": 41965 }, { "epoch": 19.575559701492537, "grad_norm": 0.15875047837239983, "learning_rate": 5.055386003845524e-06, "loss": 0.0048, "step": 41970 }, { "epoch": 19.577891791044777, "grad_norm": 0.186966868269448, "learning_rate": 5.054779284931909e-06, "loss": 0.0048, "step": 41975 }, { "epoch": 19.580223880597014, "grad_norm": 0.1669999125908688, "learning_rate": 5.0541759034075645e-06, "loss": 0.0045, "step": 41980 }, { "epoch": 19.582555970149254, "grad_norm": 0.1890492359249907, "learning_rate": 5.053575859362203e-06, "loss": 0.0047, "step": 41985 }, { "epoch": 19.58488805970149, "grad_norm": 0.17521714905116387, "learning_rate": 5.0529791528850515e-06, "loss": 0.0048, "step": 41990 }, { "epoch": 19.58722014925373, "grad_norm": 0.18498613636658276, "learning_rate": 5.052385784064827e-06, "loss": 0.0049, "step": 41995 }, { "epoch": 19.58955223880597, "grad_norm": 0.20041830495566546, "learning_rate": 5.051795752989764e-06, "loss": 0.0048, "step": 42000 }, { "epoch": 19.59188432835821, "grad_norm": 0.18894532052773938, "learning_rate": 5.051209059747594e-06, "loss": 0.0048, "step": 42005 }, { "epoch": 19.59421641791045, "grad_norm": 0.1942839947549523, "learning_rate": 5.050625704425547e-06, "loss": 0.0048, "step": 42010 }, { "epoch": 19.596548507462686, "grad_norm": 0.172528300159613, "learning_rate": 5.0500456871103686e-06, "loss": 0.0049, "step": 42015 }, { "epoch": 19.598880597014926, "grad_norm": 0.18544022600088844, "learning_rate": 5.049469007888298e-06, "loss": 0.0047, "step": 42020 }, { "epoch": 19.601212686567163, "grad_norm": 0.18727534195557916, "learning_rate": 5.048895666845084e-06, "loss": 0.0048, "step": 42025 }, { "epoch": 19.603544776119403, "grad_norm": 0.18676363147205619, "learning_rate": 5.048325664065975e-06, "loss": 0.0047, "step": 42030 }, { "epoch": 19.605876865671643, "grad_norm": 0.18895644040920612, "learning_rate": 5.047758999635728e-06, "loss": 0.0048, "step": 42035 }, { "epoch": 19.60820895522388, "grad_norm": 0.18981180422811456, "learning_rate": 5.047195673638596e-06, "loss": 0.0048, "step": 42040 }, { "epoch": 19.61054104477612, "grad_norm": 0.17080026504378126, "learning_rate": 5.0466356861583445e-06, "loss": 0.0048, "step": 42045 }, { "epoch": 19.612873134328357, "grad_norm": 0.1820502497614841, "learning_rate": 5.046079037278237e-06, "loss": 0.0048, "step": 42050 }, { "epoch": 19.615205223880597, "grad_norm": 0.18307925313457954, "learning_rate": 5.0455257270810425e-06, "loss": 0.0047, "step": 42055 }, { "epoch": 19.617537313432837, "grad_norm": 0.192932461155424, "learning_rate": 5.044975755649028e-06, "loss": 0.0048, "step": 42060 }, { "epoch": 19.619869402985074, "grad_norm": 0.19212627342165556, "learning_rate": 5.044429123063977e-06, "loss": 0.0049, "step": 42065 }, { "epoch": 19.622201492537314, "grad_norm": 0.18188725311659257, "learning_rate": 5.043885829407164e-06, "loss": 0.0047, "step": 42070 }, { "epoch": 19.62453358208955, "grad_norm": 0.19441721563951214, "learning_rate": 5.043345874759371e-06, "loss": 0.0049, "step": 42075 }, { "epoch": 19.62686567164179, "grad_norm": 0.1730917085843965, "learning_rate": 5.042809259200885e-06, "loss": 0.0047, "step": 42080 }, { "epoch": 19.62919776119403, "grad_norm": 0.19762773107937143, "learning_rate": 5.042275982811495e-06, "loss": 0.005, "step": 42085 }, { "epoch": 19.63152985074627, "grad_norm": 0.17972413654087835, "learning_rate": 5.041746045670495e-06, "loss": 0.0049, "step": 42090 }, { "epoch": 19.63386194029851, "grad_norm": 0.22296853430064753, "learning_rate": 5.041219447856681e-06, "loss": 0.0047, "step": 42095 }, { "epoch": 19.636194029850746, "grad_norm": 0.18632952257734595, "learning_rate": 5.040696189448356e-06, "loss": 0.0048, "step": 42100 }, { "epoch": 19.638526119402986, "grad_norm": 0.17408218079932386, "learning_rate": 5.040176270523318e-06, "loss": 0.0046, "step": 42105 }, { "epoch": 19.640858208955223, "grad_norm": 0.17988783776515627, "learning_rate": 5.039659691158878e-06, "loss": 0.0049, "step": 42110 }, { "epoch": 19.643190298507463, "grad_norm": 0.1885543662666801, "learning_rate": 5.039146451431845e-06, "loss": 0.0047, "step": 42115 }, { "epoch": 19.645522388059703, "grad_norm": 0.1928983044085103, "learning_rate": 5.038636551418533e-06, "loss": 0.005, "step": 42120 }, { "epoch": 19.64785447761194, "grad_norm": 0.18217078948214688, "learning_rate": 5.038129991194761e-06, "loss": 0.0049, "step": 42125 }, { "epoch": 19.65018656716418, "grad_norm": 0.17826845850219492, "learning_rate": 5.0376267708358455e-06, "loss": 0.0048, "step": 42130 }, { "epoch": 19.652518656716417, "grad_norm": 0.18541102223263267, "learning_rate": 5.037126890416614e-06, "loss": 0.0048, "step": 42135 }, { "epoch": 19.654850746268657, "grad_norm": 0.17643535607609967, "learning_rate": 5.036630350011395e-06, "loss": 0.0047, "step": 42140 }, { "epoch": 19.657182835820894, "grad_norm": 0.17043733764842875, "learning_rate": 5.036137149694013e-06, "loss": 0.0047, "step": 42145 }, { "epoch": 19.659514925373134, "grad_norm": 0.17671549576928658, "learning_rate": 5.03564728953781e-06, "loss": 0.0048, "step": 42150 }, { "epoch": 19.661847014925375, "grad_norm": 0.17241936856383538, "learning_rate": 5.035160769615618e-06, "loss": 0.0047, "step": 42155 }, { "epoch": 19.66417910447761, "grad_norm": 0.189307542215631, "learning_rate": 5.034677589999783e-06, "loss": 0.0048, "step": 42160 }, { "epoch": 19.66651119402985, "grad_norm": 0.2101804302144474, "learning_rate": 5.034197750762141e-06, "loss": 0.0048, "step": 42165 }, { "epoch": 19.66884328358209, "grad_norm": 0.1883581043202821, "learning_rate": 5.033721251974047e-06, "loss": 0.0051, "step": 42170 }, { "epoch": 19.67117537313433, "grad_norm": 0.1976904782123455, "learning_rate": 5.03324809370635e-06, "loss": 0.0047, "step": 42175 }, { "epoch": 19.673507462686565, "grad_norm": 0.19480348806009998, "learning_rate": 5.032778276029403e-06, "loss": 0.0048, "step": 42180 }, { "epoch": 19.675839552238806, "grad_norm": 0.19666676178634018, "learning_rate": 5.032311799013064e-06, "loss": 0.0048, "step": 42185 }, { "epoch": 19.678171641791046, "grad_norm": 0.19983586537358156, "learning_rate": 5.031848662726692e-06, "loss": 0.0048, "step": 42190 }, { "epoch": 19.680503731343283, "grad_norm": 0.20643149109659942, "learning_rate": 5.031388867239153e-06, "loss": 0.0048, "step": 42195 }, { "epoch": 19.682835820895523, "grad_norm": 0.20368262528363984, "learning_rate": 5.030932412618815e-06, "loss": 0.005, "step": 42200 }, { "epoch": 19.68516791044776, "grad_norm": 0.1821597092998753, "learning_rate": 5.030479298933544e-06, "loss": 0.0049, "step": 42205 }, { "epoch": 19.6875, "grad_norm": 0.17828983637477885, "learning_rate": 5.030029526250719e-06, "loss": 0.0049, "step": 42210 }, { "epoch": 19.68983208955224, "grad_norm": 0.2196248873780803, "learning_rate": 5.029583094637212e-06, "loss": 0.005, "step": 42215 }, { "epoch": 19.692164179104477, "grad_norm": 0.20537464904492916, "learning_rate": 5.029140004159409e-06, "loss": 0.0051, "step": 42220 }, { "epoch": 19.694496268656717, "grad_norm": 0.19198924562405717, "learning_rate": 5.028700254883189e-06, "loss": 0.0054, "step": 42225 }, { "epoch": 19.696828358208954, "grad_norm": 0.20784393711487592, "learning_rate": 5.028263846873938e-06, "loss": 0.0048, "step": 42230 }, { "epoch": 19.699160447761194, "grad_norm": 0.1988966488088156, "learning_rate": 5.027830780196549e-06, "loss": 0.0049, "step": 42235 }, { "epoch": 19.701492537313435, "grad_norm": 0.20433896815899566, "learning_rate": 5.02740105491541e-06, "loss": 0.005, "step": 42240 }, { "epoch": 19.70382462686567, "grad_norm": 0.19910692981353179, "learning_rate": 5.026974671094422e-06, "loss": 0.0053, "step": 42245 }, { "epoch": 19.70615671641791, "grad_norm": 0.19543534110315136, "learning_rate": 5.026551628796982e-06, "loss": 0.0048, "step": 42250 }, { "epoch": 19.70848880597015, "grad_norm": 0.21369251763642655, "learning_rate": 5.026131928085994e-06, "loss": 0.0048, "step": 42255 }, { "epoch": 19.71082089552239, "grad_norm": 0.18598479362228382, "learning_rate": 5.025715569023859e-06, "loss": 0.0049, "step": 42260 }, { "epoch": 19.713152985074625, "grad_norm": 0.1926293682048647, "learning_rate": 5.025302551672492e-06, "loss": 0.0051, "step": 42265 }, { "epoch": 19.715485074626866, "grad_norm": 0.2019924036509592, "learning_rate": 5.024892876093299e-06, "loss": 0.0049, "step": 42270 }, { "epoch": 19.717817164179106, "grad_norm": 0.18724660184410546, "learning_rate": 5.024486542347199e-06, "loss": 0.0051, "step": 42275 }, { "epoch": 19.720149253731343, "grad_norm": 0.18000931183854643, "learning_rate": 5.024083550494606e-06, "loss": 0.0048, "step": 42280 }, { "epoch": 19.722481343283583, "grad_norm": 0.20435588427574541, "learning_rate": 5.023683900595444e-06, "loss": 0.0049, "step": 42285 }, { "epoch": 19.72481343283582, "grad_norm": 0.17702956941391074, "learning_rate": 5.023287592709136e-06, "loss": 0.005, "step": 42290 }, { "epoch": 19.72714552238806, "grad_norm": 0.20070311721708134, "learning_rate": 5.02289462689461e-06, "loss": 0.0049, "step": 42295 }, { "epoch": 19.729477611940297, "grad_norm": 0.2234219571729545, "learning_rate": 5.0225050032102965e-06, "loss": 0.0049, "step": 42300 }, { "epoch": 19.731809701492537, "grad_norm": 0.18354266027692417, "learning_rate": 5.022118721714127e-06, "loss": 0.0049, "step": 42305 }, { "epoch": 19.734141791044777, "grad_norm": 0.17414502912522759, "learning_rate": 5.021735782463537e-06, "loss": 0.005, "step": 42310 }, { "epoch": 19.736473880597014, "grad_norm": 0.2128323192250199, "learning_rate": 5.02135618551547e-06, "loss": 0.005, "step": 42315 }, { "epoch": 19.738805970149254, "grad_norm": 0.17385303770338012, "learning_rate": 5.020979930926365e-06, "loss": 0.005, "step": 42320 }, { "epoch": 19.74113805970149, "grad_norm": 0.19371694450253318, "learning_rate": 5.02060701875217e-06, "loss": 0.0049, "step": 42325 }, { "epoch": 19.74347014925373, "grad_norm": 0.19706071156993596, "learning_rate": 5.020237449048333e-06, "loss": 0.005, "step": 42330 }, { "epoch": 19.74580223880597, "grad_norm": 0.18046790990804043, "learning_rate": 5.019871221869802e-06, "loss": 0.0051, "step": 42335 }, { "epoch": 19.74813432835821, "grad_norm": 0.21969342487171145, "learning_rate": 5.0195083372710345e-06, "loss": 0.005, "step": 42340 }, { "epoch": 19.75046641791045, "grad_norm": 0.19893771018562406, "learning_rate": 5.019148795305989e-06, "loss": 0.0049, "step": 42345 }, { "epoch": 19.752798507462686, "grad_norm": 0.20036089183365055, "learning_rate": 5.018792596028123e-06, "loss": 0.005, "step": 42350 }, { "epoch": 19.755130597014926, "grad_norm": 0.19993324549900657, "learning_rate": 5.018439739490402e-06, "loss": 0.005, "step": 42355 }, { "epoch": 19.757462686567163, "grad_norm": 0.18054188761288478, "learning_rate": 5.018090225745291e-06, "loss": 0.0048, "step": 42360 }, { "epoch": 19.759794776119403, "grad_norm": 0.1836225896660672, "learning_rate": 5.017744054844761e-06, "loss": 0.0048, "step": 42365 }, { "epoch": 19.762126865671643, "grad_norm": 0.2059438475493961, "learning_rate": 5.017401226840284e-06, "loss": 0.005, "step": 42370 }, { "epoch": 19.76445895522388, "grad_norm": 0.22153450558627935, "learning_rate": 5.017061741782833e-06, "loss": 0.0049, "step": 42375 }, { "epoch": 19.76679104477612, "grad_norm": 0.20187014030996217, "learning_rate": 5.016725599722889e-06, "loss": 0.005, "step": 42380 }, { "epoch": 19.769123134328357, "grad_norm": 0.18467332903181707, "learning_rate": 5.016392800710434e-06, "loss": 0.0051, "step": 42385 }, { "epoch": 19.771455223880597, "grad_norm": 0.20025571933051609, "learning_rate": 5.016063344794947e-06, "loss": 0.0055, "step": 42390 }, { "epoch": 19.773787313432837, "grad_norm": 0.20550044142603127, "learning_rate": 5.015737232025418e-06, "loss": 0.0051, "step": 42395 }, { "epoch": 19.776119402985074, "grad_norm": 0.20291534905164693, "learning_rate": 5.0154144624503365e-06, "loss": 0.0049, "step": 42400 }, { "epoch": 19.778451492537314, "grad_norm": 0.2028249262990562, "learning_rate": 5.015095036117697e-06, "loss": 0.0049, "step": 42405 }, { "epoch": 19.78078358208955, "grad_norm": 0.18231509023399786, "learning_rate": 5.014778953074992e-06, "loss": 0.0051, "step": 42410 }, { "epoch": 19.78311567164179, "grad_norm": 0.20314149485505836, "learning_rate": 5.014466213369223e-06, "loss": 0.0051, "step": 42415 }, { "epoch": 19.78544776119403, "grad_norm": 0.19977520877557323, "learning_rate": 5.014156817046891e-06, "loss": 0.0051, "step": 42420 }, { "epoch": 19.78777985074627, "grad_norm": 0.23951408657392095, "learning_rate": 5.013850764153996e-06, "loss": 0.005, "step": 42425 }, { "epoch": 19.79011194029851, "grad_norm": 0.19993474883634965, "learning_rate": 5.013548054736049e-06, "loss": 0.005, "step": 42430 }, { "epoch": 19.792444029850746, "grad_norm": 0.2086061724859441, "learning_rate": 5.013248688838061e-06, "loss": 0.0051, "step": 42435 }, { "epoch": 19.794776119402986, "grad_norm": 0.19381733329179648, "learning_rate": 5.012952666504542e-06, "loss": 0.005, "step": 42440 }, { "epoch": 19.797108208955223, "grad_norm": 0.2032126125562333, "learning_rate": 5.012659987779512e-06, "loss": 0.0052, "step": 42445 }, { "epoch": 19.799440298507463, "grad_norm": 0.18922215932899086, "learning_rate": 5.012370652706484e-06, "loss": 0.0051, "step": 42450 }, { "epoch": 19.801772388059703, "grad_norm": 0.19907307097181182, "learning_rate": 5.012084661328482e-06, "loss": 0.005, "step": 42455 }, { "epoch": 19.80410447761194, "grad_norm": 0.2029691396419958, "learning_rate": 5.011802013688029e-06, "loss": 0.0051, "step": 42460 }, { "epoch": 19.80643656716418, "grad_norm": 0.17924218131439312, "learning_rate": 5.011522709827154e-06, "loss": 0.005, "step": 42465 }, { "epoch": 19.808768656716417, "grad_norm": 0.2104059190745434, "learning_rate": 5.011246749787385e-06, "loss": 0.005, "step": 42470 }, { "epoch": 19.811100746268657, "grad_norm": 0.1907933273888902, "learning_rate": 5.010974133609758e-06, "loss": 0.0048, "step": 42475 }, { "epoch": 19.813432835820894, "grad_norm": 0.1811183097186428, "learning_rate": 5.010704861334803e-06, "loss": 0.005, "step": 42480 }, { "epoch": 19.815764925373134, "grad_norm": 0.1947215395768381, "learning_rate": 5.010438933002563e-06, "loss": 0.0052, "step": 42485 }, { "epoch": 19.818097014925375, "grad_norm": 0.1915845183280074, "learning_rate": 5.010176348652576e-06, "loss": 0.005, "step": 42490 }, { "epoch": 19.82042910447761, "grad_norm": 0.20398768163586276, "learning_rate": 5.009917108323885e-06, "loss": 0.0051, "step": 42495 }, { "epoch": 19.82276119402985, "grad_norm": 0.21768054497459652, "learning_rate": 5.0096612120550436e-06, "loss": 0.0049, "step": 42500 }, { "epoch": 19.82509328358209, "grad_norm": 0.23459900074164594, "learning_rate": 5.009408659884092e-06, "loss": 0.0052, "step": 42505 }, { "epoch": 19.82742537313433, "grad_norm": 0.21444715459823308, "learning_rate": 5.009159451848587e-06, "loss": 0.005, "step": 42510 }, { "epoch": 19.829757462686565, "grad_norm": 0.20460390186194036, "learning_rate": 5.008913587985581e-06, "loss": 0.0051, "step": 42515 }, { "epoch": 19.832089552238806, "grad_norm": 0.21162854941466003, "learning_rate": 5.008671068331634e-06, "loss": 0.0052, "step": 42520 }, { "epoch": 19.834421641791046, "grad_norm": 0.2089990531184918, "learning_rate": 5.008431892922808e-06, "loss": 0.005, "step": 42525 }, { "epoch": 19.836753731343283, "grad_norm": 0.20329731207821583, "learning_rate": 5.00819606179466e-06, "loss": 0.0052, "step": 42530 }, { "epoch": 19.839085820895523, "grad_norm": 0.22533268319086847, "learning_rate": 5.007963574982264e-06, "loss": 0.0051, "step": 42535 }, { "epoch": 19.84141791044776, "grad_norm": 0.22163766510723298, "learning_rate": 5.007734432520179e-06, "loss": 0.0051, "step": 42540 }, { "epoch": 19.84375, "grad_norm": 0.2129849203817998, "learning_rate": 5.0075086344424855e-06, "loss": 0.0052, "step": 42545 }, { "epoch": 19.84608208955224, "grad_norm": 0.2112990198942378, "learning_rate": 5.0072861807827505e-06, "loss": 0.0052, "step": 42550 }, { "epoch": 19.848414179104477, "grad_norm": 0.19456536933225005, "learning_rate": 5.007067071574053e-06, "loss": 0.0051, "step": 42555 }, { "epoch": 19.850746268656717, "grad_norm": 0.20383777363072608, "learning_rate": 5.0068513068489765e-06, "loss": 0.0051, "step": 42560 }, { "epoch": 19.853078358208954, "grad_norm": 0.21164134704492935, "learning_rate": 5.006638886639597e-06, "loss": 0.0051, "step": 42565 }, { "epoch": 19.855410447761194, "grad_norm": 0.20854051430842874, "learning_rate": 5.0064298109775035e-06, "loss": 0.0052, "step": 42570 }, { "epoch": 19.857742537313435, "grad_norm": 0.2097627259321623, "learning_rate": 5.00622407989378e-06, "loss": 0.0051, "step": 42575 }, { "epoch": 19.86007462686567, "grad_norm": 0.2097991817081593, "learning_rate": 5.006021693419021e-06, "loss": 0.0053, "step": 42580 }, { "epoch": 19.86240671641791, "grad_norm": 0.2152996036547133, "learning_rate": 5.005822651583317e-06, "loss": 0.0051, "step": 42585 }, { "epoch": 19.86473880597015, "grad_norm": 0.2127486595554604, "learning_rate": 5.0056269544162635e-06, "loss": 0.005, "step": 42590 }, { "epoch": 19.86707089552239, "grad_norm": 0.18554310900304505, "learning_rate": 5.005434601946959e-06, "loss": 0.0052, "step": 42595 }, { "epoch": 19.869402985074625, "grad_norm": 0.21773661138179906, "learning_rate": 5.0052455942040045e-06, "loss": 0.0051, "step": 42600 }, { "epoch": 19.871735074626866, "grad_norm": 0.21153690782902956, "learning_rate": 5.005059931215503e-06, "loss": 0.0052, "step": 42605 }, { "epoch": 19.874067164179106, "grad_norm": 0.17804355526371554, "learning_rate": 5.004877613009064e-06, "loss": 0.005, "step": 42610 }, { "epoch": 19.876399253731343, "grad_norm": 0.22716836621720002, "learning_rate": 5.004698639611792e-06, "loss": 0.0051, "step": 42615 }, { "epoch": 19.878731343283583, "grad_norm": 0.19412000816757866, "learning_rate": 5.0045230110503e-06, "loss": 0.0053, "step": 42620 }, { "epoch": 19.88106343283582, "grad_norm": 0.19543878534476622, "learning_rate": 5.004350727350704e-06, "loss": 0.005, "step": 42625 }, { "epoch": 19.88339552238806, "grad_norm": 0.20540439116163617, "learning_rate": 5.00418178853862e-06, "loss": 0.0052, "step": 42630 }, { "epoch": 19.885727611940297, "grad_norm": 0.19569826581186736, "learning_rate": 5.004016194639169e-06, "loss": 0.0051, "step": 42635 }, { "epoch": 19.888059701492537, "grad_norm": 0.19581535261607907, "learning_rate": 5.003853945676969e-06, "loss": 0.0051, "step": 42640 }, { "epoch": 19.890391791044777, "grad_norm": 0.19280734109238035, "learning_rate": 5.0036950416761485e-06, "loss": 0.0051, "step": 42645 }, { "epoch": 19.892723880597014, "grad_norm": 0.22117443011719665, "learning_rate": 5.0035394826603345e-06, "loss": 0.0053, "step": 42650 }, { "epoch": 19.895055970149254, "grad_norm": 0.20724709762189852, "learning_rate": 5.003387268652657e-06, "loss": 0.0051, "step": 42655 }, { "epoch": 19.89738805970149, "grad_norm": 0.24219156100264877, "learning_rate": 5.003238399675746e-06, "loss": 0.0052, "step": 42660 }, { "epoch": 19.89972014925373, "grad_norm": 0.21234898738365904, "learning_rate": 5.003092875751742e-06, "loss": 0.0052, "step": 42665 }, { "epoch": 19.90205223880597, "grad_norm": 0.23060978879542518, "learning_rate": 5.002950696902278e-06, "loss": 0.0052, "step": 42670 }, { "epoch": 19.90438432835821, "grad_norm": 0.18821490932128207, "learning_rate": 5.0028118631485e-06, "loss": 0.005, "step": 42675 }, { "epoch": 19.90671641791045, "grad_norm": 0.19857535304467425, "learning_rate": 5.002676374511046e-06, "loss": 0.0051, "step": 42680 }, { "epoch": 19.909048507462686, "grad_norm": 0.23945456190169218, "learning_rate": 5.002544231010064e-06, "loss": 0.0053, "step": 42685 }, { "epoch": 19.911380597014926, "grad_norm": 0.22702515272800294, "learning_rate": 5.0024154326652044e-06, "loss": 0.0051, "step": 42690 }, { "epoch": 19.913712686567163, "grad_norm": 0.20599742003659421, "learning_rate": 5.002289979495614e-06, "loss": 0.0053, "step": 42695 }, { "epoch": 19.916044776119403, "grad_norm": 0.21318812440595114, "learning_rate": 5.002167871519951e-06, "loss": 0.0051, "step": 42700 }, { "epoch": 19.918376865671643, "grad_norm": 0.2041892918571914, "learning_rate": 5.00204910875637e-06, "loss": 0.0053, "step": 42705 }, { "epoch": 19.92070895522388, "grad_norm": 0.20779898589919088, "learning_rate": 5.001933691222527e-06, "loss": 0.0051, "step": 42710 }, { "epoch": 19.92304104477612, "grad_norm": 0.21364300973756073, "learning_rate": 5.001821618935589e-06, "loss": 0.0051, "step": 42715 }, { "epoch": 19.925373134328357, "grad_norm": 0.20550580214777922, "learning_rate": 5.001712891912217e-06, "loss": 0.0054, "step": 42720 }, { "epoch": 19.927705223880597, "grad_norm": 0.18688898066662468, "learning_rate": 5.001607510168576e-06, "loss": 0.0054, "step": 42725 }, { "epoch": 19.930037313432837, "grad_norm": 0.21737798932408203, "learning_rate": 5.001505473720337e-06, "loss": 0.0054, "step": 42730 }, { "epoch": 19.932369402985074, "grad_norm": 0.1963521763190275, "learning_rate": 5.001406782582673e-06, "loss": 0.0054, "step": 42735 }, { "epoch": 19.934701492537314, "grad_norm": 0.2098878008214197, "learning_rate": 5.001311436770255e-06, "loss": 0.0051, "step": 42740 }, { "epoch": 19.93703358208955, "grad_norm": 0.19233103994161724, "learning_rate": 5.001219436297262e-06, "loss": 0.0051, "step": 42745 }, { "epoch": 19.93936567164179, "grad_norm": 0.23071056783712077, "learning_rate": 5.001130781177377e-06, "loss": 0.0053, "step": 42750 }, { "epoch": 19.94169776119403, "grad_norm": 0.21047838842366715, "learning_rate": 5.0010454714237786e-06, "loss": 0.0051, "step": 42755 }, { "epoch": 19.94402985074627, "grad_norm": 0.21320338512908654, "learning_rate": 5.000963507049151e-06, "loss": 0.0052, "step": 42760 }, { "epoch": 19.94636194029851, "grad_norm": 0.18892048121915297, "learning_rate": 5.000884888065682e-06, "loss": 0.0051, "step": 42765 }, { "epoch": 19.948694029850746, "grad_norm": 0.2113567135073811, "learning_rate": 5.000809614485062e-06, "loss": 0.0055, "step": 42770 }, { "epoch": 19.951026119402986, "grad_norm": 0.22681960234159285, "learning_rate": 5.0007376863184835e-06, "loss": 0.0051, "step": 42775 }, { "epoch": 19.953358208955223, "grad_norm": 0.21249810677793796, "learning_rate": 5.000669103576643e-06, "loss": 0.0053, "step": 42780 }, { "epoch": 19.955690298507463, "grad_norm": 0.21364535124163636, "learning_rate": 5.000603866269734e-06, "loss": 0.0054, "step": 42785 }, { "epoch": 19.958022388059703, "grad_norm": 0.2063183881173079, "learning_rate": 5.000541974407462e-06, "loss": 0.0052, "step": 42790 }, { "epoch": 19.96035447761194, "grad_norm": 0.20008211123008493, "learning_rate": 5.0004834279990245e-06, "loss": 0.0052, "step": 42795 }, { "epoch": 19.96268656716418, "grad_norm": 0.20876612462430374, "learning_rate": 5.000428227053131e-06, "loss": 0.0053, "step": 42800 }, { "epoch": 19.965018656716417, "grad_norm": 0.2290294102380546, "learning_rate": 5.000376371577987e-06, "loss": 0.0052, "step": 42805 }, { "epoch": 19.967350746268657, "grad_norm": 0.218102817936989, "learning_rate": 5.000327861581302e-06, "loss": 0.0053, "step": 42810 }, { "epoch": 19.969682835820894, "grad_norm": 0.20076715779209287, "learning_rate": 5.000282697070291e-06, "loss": 0.0053, "step": 42815 }, { "epoch": 19.972014925373134, "grad_norm": 0.2027749420620175, "learning_rate": 5.000240878051671e-06, "loss": 0.0053, "step": 42820 }, { "epoch": 19.974347014925375, "grad_norm": 0.22918768292317776, "learning_rate": 5.000202404531656e-06, "loss": 0.0052, "step": 42825 }, { "epoch": 19.97667910447761, "grad_norm": 0.22497898870045024, "learning_rate": 5.0001672765159696e-06, "loss": 0.0053, "step": 42830 }, { "epoch": 19.97901119402985, "grad_norm": 0.22913834639868777, "learning_rate": 5.000135494009835e-06, "loss": 0.0054, "step": 42835 }, { "epoch": 19.98134328358209, "grad_norm": 0.21785088693746055, "learning_rate": 5.000107057017976e-06, "loss": 0.0054, "step": 42840 }, { "epoch": 19.98367537313433, "grad_norm": 0.18885028400241566, "learning_rate": 5.000081965544622e-06, "loss": 0.0051, "step": 42845 }, { "epoch": 19.986007462686565, "grad_norm": 0.21111495655448076, "learning_rate": 5.0000602195935046e-06, "loss": 0.0054, "step": 42850 }, { "epoch": 19.988339552238806, "grad_norm": 0.2008148101894371, "learning_rate": 5.000041819167857e-06, "loss": 0.0053, "step": 42855 }, { "epoch": 19.990671641791046, "grad_norm": 0.21420366382945036, "learning_rate": 5.000026764270413e-06, "loss": 0.0051, "step": 42860 }, { "epoch": 19.993003731343283, "grad_norm": 0.2234152347464571, "learning_rate": 5.000015054903415e-06, "loss": 0.0053, "step": 42865 }, { "epoch": 19.995335820895523, "grad_norm": 0.22392160716181395, "learning_rate": 5.0000066910686e-06, "loss": 0.0054, "step": 42870 }, { "epoch": 19.99766791044776, "grad_norm": 0.22686346301564736, "learning_rate": 5.0000016727672125e-06, "loss": 0.0053, "step": 42875 }, { "epoch": 20.0, "grad_norm": 0.23326511326123883, "learning_rate": 5e-06, "loss": 0.0052, "step": 42880 }, { "epoch": 20.0, "step": 42880, "total_flos": 9771570356551680.0, "train_loss": 0.08984405147259249, "train_runtime": 496360.1448, "train_samples_per_second": 1.382, "train_steps_per_second": 0.086 } ], "logging_steps": 5, "max_steps": 42880, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9771570356551680.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }