{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.458598514293791, "eval_steps": 500, "global_step": 208000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016627937429071454, "grad_norm": 16.836948455305052, "learning_rate": 1.6461589624210177e-08, "loss": 2.913, "step": 100 }, { "epoch": 0.0033255874858142908, "grad_norm": 16.982138954714582, "learning_rate": 3.3089457931493186e-08, "loss": 2.9144, "step": 200 }, { "epoch": 0.004988381228721436, "grad_norm": 13.476755399445098, "learning_rate": 4.97173262387762e-08, "loss": 2.811, "step": 300 }, { "epoch": 0.0066511749716285816, "grad_norm": 9.422795168574819, "learning_rate": 6.634519454605921e-08, "loss": 2.7094, "step": 400 }, { "epoch": 0.008313968714535727, "grad_norm": 8.428710094976573, "learning_rate": 8.297306285334222e-08, "loss": 2.6395, "step": 500 }, { "epoch": 0.009976762457442872, "grad_norm": 8.977754455375363, "learning_rate": 9.960093116062521e-08, "loss": 2.5948, "step": 600 }, { "epoch": 0.011639556200350018, "grad_norm": 7.592406249094615, "learning_rate": 1.1622879946790823e-07, "loss": 2.5369, "step": 700 }, { "epoch": 0.013302349943257163, "grad_norm": 8.562179525002133, "learning_rate": 1.3285666777519123e-07, "loss": 2.5284, "step": 800 }, { "epoch": 0.01496514368616431, "grad_norm": 10.127795212811147, "learning_rate": 1.4948453608247425e-07, "loss": 2.4478, "step": 900 }, { "epoch": 0.016627937429071454, "grad_norm": 7.97078368012482, "learning_rate": 1.6611240438975724e-07, "loss": 2.4548, "step": 1000 }, { "epoch": 0.0182907311719786, "grad_norm": 8.540276135706934, "learning_rate": 1.8274027269704026e-07, "loss": 2.4732, "step": 1100 }, { "epoch": 0.019953524914885744, "grad_norm": 9.75014166876878, "learning_rate": 1.9936814100432328e-07, "loss": 2.4013, "step": 1200 }, { "epoch": 0.021616318657792892, "grad_norm": 7.36821361540804, "learning_rate": 2.1599600931160627e-07, "loss": 2.3811, "step": 1300 }, { "epoch": 0.023279112400700037, "grad_norm": 9.928659291241967, "learning_rate": 2.326238776188893e-07, "loss": 2.3791, "step": 1400 }, { "epoch": 0.02494190614360718, "grad_norm": 10.291477019784509, "learning_rate": 2.492517459261723e-07, "loss": 2.3525, "step": 1500 }, { "epoch": 0.026604699886514326, "grad_norm": 7.209560150932202, "learning_rate": 2.658796142334553e-07, "loss": 2.3107, "step": 1600 }, { "epoch": 0.02826749362942147, "grad_norm": 9.465954339233445, "learning_rate": 2.825074825407383e-07, "loss": 2.3064, "step": 1700 }, { "epoch": 0.02993028737232862, "grad_norm": 9.814394858039664, "learning_rate": 2.991353508480213e-07, "loss": 2.2585, "step": 1800 }, { "epoch": 0.03159308111523576, "grad_norm": 7.100149472457117, "learning_rate": 3.157632191553043e-07, "loss": 2.2728, "step": 1900 }, { "epoch": 0.03325587485814291, "grad_norm": 6.30983140901061, "learning_rate": 3.3239108746258735e-07, "loss": 2.2073, "step": 2000 }, { "epoch": 0.03491866860105006, "grad_norm": 7.9341934356625075, "learning_rate": 3.490189557698703e-07, "loss": 2.2062, "step": 2100 }, { "epoch": 0.0365814623439572, "grad_norm": 8.522134206306216, "learning_rate": 3.6564682407715333e-07, "loss": 2.1822, "step": 2200 }, { "epoch": 0.038244256086864346, "grad_norm": 7.163625149799112, "learning_rate": 3.8227469238443635e-07, "loss": 2.1862, "step": 2300 }, { "epoch": 0.03990704982977149, "grad_norm": 7.15040099035335, "learning_rate": 3.989025606917193e-07, "loss": 2.157, "step": 2400 }, { "epoch": 0.041569843572678636, "grad_norm": 8.81292106030734, "learning_rate": 4.155304289990024e-07, "loss": 2.1391, "step": 2500 }, { "epoch": 0.043232637315585784, "grad_norm": 7.6526035991644985, "learning_rate": 4.3215829730628536e-07, "loss": 2.1625, "step": 2600 }, { "epoch": 0.044895431058492925, "grad_norm": 8.039734829373732, "learning_rate": 4.487861656135684e-07, "loss": 2.1441, "step": 2700 }, { "epoch": 0.046558224801400074, "grad_norm": 9.12686523706147, "learning_rate": 4.654140339208514e-07, "loss": 2.1339, "step": 2800 }, { "epoch": 0.048221018544307215, "grad_norm": 9.771638409501342, "learning_rate": 4.820419022281344e-07, "loss": 2.0796, "step": 2900 }, { "epoch": 0.04988381228721436, "grad_norm": 9.730343484323338, "learning_rate": 4.986697705354174e-07, "loss": 2.0863, "step": 3000 }, { "epoch": 0.05154660603012151, "grad_norm": 10.526638676282575, "learning_rate": 5.152976388427004e-07, "loss": 2.0761, "step": 3100 }, { "epoch": 0.05320939977302865, "grad_norm": 8.456881353426784, "learning_rate": 5.319255071499834e-07, "loss": 2.0823, "step": 3200 }, { "epoch": 0.0548721935159358, "grad_norm": 7.781373840455716, "learning_rate": 5.485533754572664e-07, "loss": 2.0355, "step": 3300 }, { "epoch": 0.05653498725884294, "grad_norm": 10.87631150103102, "learning_rate": 5.651812437645495e-07, "loss": 2.0708, "step": 3400 }, { "epoch": 0.05819778100175009, "grad_norm": 10.56993575190311, "learning_rate": 5.818091120718324e-07, "loss": 2.0084, "step": 3500 }, { "epoch": 0.05986057474465724, "grad_norm": 8.33529595581404, "learning_rate": 5.984369803791155e-07, "loss": 1.9898, "step": 3600 }, { "epoch": 0.06152336848756438, "grad_norm": 9.22204030538152, "learning_rate": 6.150648486863985e-07, "loss": 2.0196, "step": 3700 }, { "epoch": 0.06318616223047152, "grad_norm": 7.883632982842793, "learning_rate": 6.316927169936815e-07, "loss": 1.979, "step": 3800 }, { "epoch": 0.06484895597337867, "grad_norm": 8.051956204321538, "learning_rate": 6.483205853009644e-07, "loss": 1.9718, "step": 3900 }, { "epoch": 0.06651174971628582, "grad_norm": 7.261956343828639, "learning_rate": 6.649484536082475e-07, "loss": 1.9839, "step": 4000 }, { "epoch": 0.06817454345919297, "grad_norm": 9.016333108278552, "learning_rate": 6.815763219155305e-07, "loss": 1.9674, "step": 4100 }, { "epoch": 0.06983733720210011, "grad_norm": 8.469039670130794, "learning_rate": 6.982041902228134e-07, "loss": 1.9446, "step": 4200 }, { "epoch": 0.07150013094500725, "grad_norm": 8.39124002637072, "learning_rate": 7.148320585300965e-07, "loss": 1.9328, "step": 4300 }, { "epoch": 0.0731629246879144, "grad_norm": 11.159472370089967, "learning_rate": 7.314599268373795e-07, "loss": 1.9166, "step": 4400 }, { "epoch": 0.07482571843082154, "grad_norm": 8.008702502605772, "learning_rate": 7.480877951446625e-07, "loss": 1.9599, "step": 4500 }, { "epoch": 0.07648851217372869, "grad_norm": 9.714706374163736, "learning_rate": 7.647156634519455e-07, "loss": 1.9175, "step": 4600 }, { "epoch": 0.07815130591663584, "grad_norm": 9.94625410255174, "learning_rate": 7.813435317592285e-07, "loss": 1.9013, "step": 4700 }, { "epoch": 0.07981409965954298, "grad_norm": 9.096036945372928, "learning_rate": 7.979714000665115e-07, "loss": 1.9244, "step": 4800 }, { "epoch": 0.08147689340245012, "grad_norm": 7.750833927815385, "learning_rate": 8.145992683737946e-07, "loss": 1.8377, "step": 4900 }, { "epoch": 0.08313968714535727, "grad_norm": 5.794046459865048, "learning_rate": 8.312271366810775e-07, "loss": 1.8468, "step": 5000 }, { "epoch": 0.08480248088826442, "grad_norm": 7.195257102348483, "learning_rate": 8.478550049883606e-07, "loss": 1.8597, "step": 5100 }, { "epoch": 0.08646527463117157, "grad_norm": 8.933953401301741, "learning_rate": 8.644828732956436e-07, "loss": 1.828, "step": 5200 }, { "epoch": 0.0881280683740787, "grad_norm": 13.066909203722874, "learning_rate": 8.811107416029265e-07, "loss": 1.8624, "step": 5300 }, { "epoch": 0.08979086211698585, "grad_norm": 9.112206039795705, "learning_rate": 8.977386099102096e-07, "loss": 1.8767, "step": 5400 }, { "epoch": 0.091453655859893, "grad_norm": 9.198018756754522, "learning_rate": 9.143664782174926e-07, "loss": 1.8294, "step": 5500 }, { "epoch": 0.09311644960280015, "grad_norm": 6.483424027265559, "learning_rate": 9.309943465247757e-07, "loss": 1.8233, "step": 5600 }, { "epoch": 0.0947792433457073, "grad_norm": 8.728166247750746, "learning_rate": 9.476222148320586e-07, "loss": 1.779, "step": 5700 }, { "epoch": 0.09644203708861443, "grad_norm": 8.234341893651145, "learning_rate": 9.642500831393415e-07, "loss": 1.7874, "step": 5800 }, { "epoch": 0.09810483083152158, "grad_norm": 9.490781470517021, "learning_rate": 9.808779514466245e-07, "loss": 1.7859, "step": 5900 }, { "epoch": 0.09976762457442873, "grad_norm": 10.682280658910871, "learning_rate": 9.975058197539076e-07, "loss": 1.7656, "step": 6000 }, { "epoch": 0.10143041831733587, "grad_norm": 11.583785132292213, "learning_rate": 1.0141336880611905e-06, "loss": 1.8071, "step": 6100 }, { "epoch": 0.10309321206024302, "grad_norm": 8.180832170774583, "learning_rate": 1.0307615563684736e-06, "loss": 1.7854, "step": 6200 }, { "epoch": 0.10475600580315016, "grad_norm": 7.987886808715907, "learning_rate": 1.0473894246757566e-06, "loss": 1.7556, "step": 6300 }, { "epoch": 0.1064187995460573, "grad_norm": 9.035241788710403, "learning_rate": 1.0640172929830397e-06, "loss": 1.746, "step": 6400 }, { "epoch": 0.10808159328896445, "grad_norm": 9.95401364949345, "learning_rate": 1.0806451612903226e-06, "loss": 1.7497, "step": 6500 }, { "epoch": 0.1097443870318716, "grad_norm": 7.679566998681046, "learning_rate": 1.0972730295976056e-06, "loss": 1.7406, "step": 6600 }, { "epoch": 0.11140718077477875, "grad_norm": 9.569078449637152, "learning_rate": 1.1139008979048887e-06, "loss": 1.7683, "step": 6700 }, { "epoch": 0.11306997451768588, "grad_norm": 11.280183352890054, "learning_rate": 1.1305287662121718e-06, "loss": 1.7453, "step": 6800 }, { "epoch": 0.11473276826059303, "grad_norm": 7.219466098096562, "learning_rate": 1.1471566345194546e-06, "loss": 1.708, "step": 6900 }, { "epoch": 0.11639556200350018, "grad_norm": 7.835812516750071, "learning_rate": 1.1637845028267377e-06, "loss": 1.7336, "step": 7000 }, { "epoch": 0.11805835574640733, "grad_norm": 12.380130480569354, "learning_rate": 1.1804123711340208e-06, "loss": 1.7385, "step": 7100 }, { "epoch": 0.11972114948931448, "grad_norm": 10.238856360567828, "learning_rate": 1.1970402394413036e-06, "loss": 1.7086, "step": 7200 }, { "epoch": 0.12138394323222161, "grad_norm": 9.210783248944256, "learning_rate": 1.2136681077485867e-06, "loss": 1.6659, "step": 7300 }, { "epoch": 0.12304673697512876, "grad_norm": 10.481892767730153, "learning_rate": 1.2302959760558698e-06, "loss": 1.7041, "step": 7400 }, { "epoch": 0.12470953071803591, "grad_norm": 8.328533505570952, "learning_rate": 1.2469238443631528e-06, "loss": 1.7071, "step": 7500 }, { "epoch": 0.12637232446094304, "grad_norm": 6.130246443146608, "learning_rate": 1.2635517126704357e-06, "loss": 1.7083, "step": 7600 }, { "epoch": 0.1280351182038502, "grad_norm": 7.777248233591925, "learning_rate": 1.2801795809777188e-06, "loss": 1.7093, "step": 7700 }, { "epoch": 0.12969791194675734, "grad_norm": 6.543635378916948, "learning_rate": 1.2968074492850019e-06, "loss": 1.6582, "step": 7800 }, { "epoch": 0.1313607056896645, "grad_norm": 10.615311963709088, "learning_rate": 1.313435317592285e-06, "loss": 1.6806, "step": 7900 }, { "epoch": 0.13302349943257163, "grad_norm": 8.077790244070703, "learning_rate": 1.3300631858995678e-06, "loss": 1.6574, "step": 8000 }, { "epoch": 0.13468629317547878, "grad_norm": 9.321295919668268, "learning_rate": 1.3466910542068509e-06, "loss": 1.6729, "step": 8100 }, { "epoch": 0.13634908691838593, "grad_norm": 8.558602257275567, "learning_rate": 1.363318922514134e-06, "loss": 1.6643, "step": 8200 }, { "epoch": 0.13801188066129308, "grad_norm": 7.927652331928149, "learning_rate": 1.3799467908214168e-06, "loss": 1.6488, "step": 8300 }, { "epoch": 0.13967467440420023, "grad_norm": 10.068754767303098, "learning_rate": 1.3965746591286999e-06, "loss": 1.6698, "step": 8400 }, { "epoch": 0.14133746814710738, "grad_norm": 8.275808456695732, "learning_rate": 1.413202527435983e-06, "loss": 1.6619, "step": 8500 }, { "epoch": 0.1430002618900145, "grad_norm": 8.819442153981068, "learning_rate": 1.4298303957432658e-06, "loss": 1.6364, "step": 8600 }, { "epoch": 0.14466305563292164, "grad_norm": 7.25115745973657, "learning_rate": 1.4464582640505489e-06, "loss": 1.6545, "step": 8700 }, { "epoch": 0.1463258493758288, "grad_norm": 7.483212630218626, "learning_rate": 1.463086132357832e-06, "loss": 1.6275, "step": 8800 }, { "epoch": 0.14798864311873594, "grad_norm": 14.494936092457923, "learning_rate": 1.4797140006651148e-06, "loss": 1.623, "step": 8900 }, { "epoch": 0.1496514368616431, "grad_norm": 9.351791918318119, "learning_rate": 1.496341868972398e-06, "loss": 1.608, "step": 9000 }, { "epoch": 0.15131423060455024, "grad_norm": 8.053620241614754, "learning_rate": 1.512969737279681e-06, "loss": 1.5981, "step": 9100 }, { "epoch": 0.15297702434745739, "grad_norm": 9.635626174535078, "learning_rate": 1.5295976055869638e-06, "loss": 1.5966, "step": 9200 }, { "epoch": 0.15463981809036453, "grad_norm": 6.990087493823738, "learning_rate": 1.546225473894247e-06, "loss": 1.6032, "step": 9300 }, { "epoch": 0.15630261183327168, "grad_norm": 10.338065359451893, "learning_rate": 1.56285334220153e-06, "loss": 1.6137, "step": 9400 }, { "epoch": 0.15796540557617883, "grad_norm": 8.813718362092441, "learning_rate": 1.5794812105088128e-06, "loss": 1.5779, "step": 9500 }, { "epoch": 0.15962819931908595, "grad_norm": 8.501895128712118, "learning_rate": 1.596109078816096e-06, "loss": 1.5569, "step": 9600 }, { "epoch": 0.1612909930619931, "grad_norm": 7.7822377543624155, "learning_rate": 1.612736947123379e-06, "loss": 1.6054, "step": 9700 }, { "epoch": 0.16295378680490025, "grad_norm": 10.565268273304525, "learning_rate": 1.6293648154306618e-06, "loss": 1.5737, "step": 9800 }, { "epoch": 0.1646165805478074, "grad_norm": 7.640896323821734, "learning_rate": 1.645992683737945e-06, "loss": 1.583, "step": 9900 }, { "epoch": 0.16627937429071454, "grad_norm": 8.874504733690651, "learning_rate": 1.662620552045228e-06, "loss": 1.5849, "step": 10000 }, { "epoch": 0.1679421680336217, "grad_norm": 8.196271357400143, "learning_rate": 1.6792484203525108e-06, "loss": 1.5669, "step": 10100 }, { "epoch": 0.16960496177652884, "grad_norm": 8.756001789361903, "learning_rate": 1.695876288659794e-06, "loss": 1.5445, "step": 10200 }, { "epoch": 0.171267755519436, "grad_norm": 8.889976968955034, "learning_rate": 1.712504156967077e-06, "loss": 1.559, "step": 10300 }, { "epoch": 0.17293054926234314, "grad_norm": 8.017648932766688, "learning_rate": 1.7291320252743598e-06, "loss": 1.5236, "step": 10400 }, { "epoch": 0.17459334300525028, "grad_norm": 8.404927172554027, "learning_rate": 1.745759893581643e-06, "loss": 1.5549, "step": 10500 }, { "epoch": 0.1762561367481574, "grad_norm": 8.56766140283494, "learning_rate": 1.762387761888926e-06, "loss": 1.5495, "step": 10600 }, { "epoch": 0.17791893049106455, "grad_norm": 7.5826246124446355, "learning_rate": 1.7790156301962088e-06, "loss": 1.5718, "step": 10700 }, { "epoch": 0.1795817242339717, "grad_norm": 11.313078897044479, "learning_rate": 1.795643498503492e-06, "loss": 1.5339, "step": 10800 }, { "epoch": 0.18124451797687885, "grad_norm": 10.002000086897128, "learning_rate": 1.812271366810775e-06, "loss": 1.5457, "step": 10900 }, { "epoch": 0.182907311719786, "grad_norm": 10.638640213468758, "learning_rate": 1.8288992351180578e-06, "loss": 1.5194, "step": 11000 }, { "epoch": 0.18457010546269315, "grad_norm": 7.690551701588416, "learning_rate": 1.845527103425341e-06, "loss": 1.5357, "step": 11100 }, { "epoch": 0.1862328992056003, "grad_norm": 9.702246485637074, "learning_rate": 1.862154971732624e-06, "loss": 1.5056, "step": 11200 }, { "epoch": 0.18789569294850744, "grad_norm": 6.817444914791742, "learning_rate": 1.8787828400399068e-06, "loss": 1.5038, "step": 11300 }, { "epoch": 0.1895584866914146, "grad_norm": 10.928476084155317, "learning_rate": 1.89541070834719e-06, "loss": 1.4601, "step": 11400 }, { "epoch": 0.19122128043432174, "grad_norm": 10.778382633801488, "learning_rate": 1.912038576654473e-06, "loss": 1.522, "step": 11500 }, { "epoch": 0.19288407417722886, "grad_norm": 7.133819351349779, "learning_rate": 1.928666444961756e-06, "loss": 1.522, "step": 11600 }, { "epoch": 0.194546867920136, "grad_norm": 8.46784982281041, "learning_rate": 1.945294313269039e-06, "loss": 1.5006, "step": 11700 }, { "epoch": 0.19620966166304316, "grad_norm": 9.772425131958673, "learning_rate": 1.961922181576322e-06, "loss": 1.4951, "step": 11800 }, { "epoch": 0.1978724554059503, "grad_norm": 8.179411189467215, "learning_rate": 1.9785500498836052e-06, "loss": 1.4847, "step": 11900 }, { "epoch": 0.19953524914885745, "grad_norm": 8.693031983279985, "learning_rate": 1.9951779181908883e-06, "loss": 1.4922, "step": 12000 }, { "epoch": 0.2011980428917646, "grad_norm": 7.970897883516274, "learning_rate": 2.011805786498171e-06, "loss": 1.4784, "step": 12100 }, { "epoch": 0.20286083663467175, "grad_norm": 10.9060917593891, "learning_rate": 2.028433654805454e-06, "loss": 1.4666, "step": 12200 }, { "epoch": 0.2045236303775789, "grad_norm": 7.00437417307804, "learning_rate": 2.045061523112737e-06, "loss": 1.4735, "step": 12300 }, { "epoch": 0.20618642412048604, "grad_norm": 8.12967130796718, "learning_rate": 2.06168939142002e-06, "loss": 1.4323, "step": 12400 }, { "epoch": 0.2078492178633932, "grad_norm": 11.151856777489291, "learning_rate": 2.0783172597273033e-06, "loss": 1.4418, "step": 12500 }, { "epoch": 0.2095120116063003, "grad_norm": 8.619045158595066, "learning_rate": 2.0949451280345863e-06, "loss": 1.4766, "step": 12600 }, { "epoch": 0.21117480534920746, "grad_norm": 7.127694165908561, "learning_rate": 2.111572996341869e-06, "loss": 1.47, "step": 12700 }, { "epoch": 0.2128375990921146, "grad_norm": 7.266512656013455, "learning_rate": 2.1282008646491525e-06, "loss": 1.4577, "step": 12800 }, { "epoch": 0.21450039283502176, "grad_norm": 8.331750799799238, "learning_rate": 2.144828732956435e-06, "loss": 1.4919, "step": 12900 }, { "epoch": 0.2161631865779289, "grad_norm": 11.017332972852195, "learning_rate": 2.161456601263718e-06, "loss": 1.4483, "step": 13000 }, { "epoch": 0.21782598032083605, "grad_norm": 8.76796749181763, "learning_rate": 2.1780844695710013e-06, "loss": 1.4311, "step": 13100 }, { "epoch": 0.2194887740637432, "grad_norm": 10.695618737489594, "learning_rate": 2.1947123378782843e-06, "loss": 1.4398, "step": 13200 }, { "epoch": 0.22115156780665035, "grad_norm": 9.41325821972631, "learning_rate": 2.211340206185567e-06, "loss": 1.4295, "step": 13300 }, { "epoch": 0.2228143615495575, "grad_norm": 10.05904433078892, "learning_rate": 2.2279680744928505e-06, "loss": 1.4295, "step": 13400 }, { "epoch": 0.22447715529246465, "grad_norm": 7.260719694253746, "learning_rate": 2.244595942800133e-06, "loss": 1.4358, "step": 13500 }, { "epoch": 0.22613994903537177, "grad_norm": 9.241713499678161, "learning_rate": 2.261223811107416e-06, "loss": 1.4254, "step": 13600 }, { "epoch": 0.22780274277827892, "grad_norm": 9.267152957258256, "learning_rate": 2.2778516794146993e-06, "loss": 1.4354, "step": 13700 }, { "epoch": 0.22946553652118606, "grad_norm": 8.046888006424094, "learning_rate": 2.2944795477219823e-06, "loss": 1.4267, "step": 13800 }, { "epoch": 0.2311283302640932, "grad_norm": 6.6552629445552896, "learning_rate": 2.311107416029265e-06, "loss": 1.4285, "step": 13900 }, { "epoch": 0.23279112400700036, "grad_norm": 8.492428581949287, "learning_rate": 2.3277352843365485e-06, "loss": 1.4165, "step": 14000 }, { "epoch": 0.2344539177499075, "grad_norm": 8.121333415704306, "learning_rate": 2.344363152643831e-06, "loss": 1.4401, "step": 14100 }, { "epoch": 0.23611671149281466, "grad_norm": 7.275290535346962, "learning_rate": 2.360991020951114e-06, "loss": 1.3894, "step": 14200 }, { "epoch": 0.2377795052357218, "grad_norm": 7.738832912861498, "learning_rate": 2.3776188892583973e-06, "loss": 1.4203, "step": 14300 }, { "epoch": 0.23944229897862895, "grad_norm": 6.472892468608509, "learning_rate": 2.3942467575656803e-06, "loss": 1.384, "step": 14400 }, { "epoch": 0.2411050927215361, "grad_norm": 6.350281232815796, "learning_rate": 2.410874625872963e-06, "loss": 1.3813, "step": 14500 }, { "epoch": 0.24276788646444322, "grad_norm": 8.102922726418806, "learning_rate": 2.4275024941802465e-06, "loss": 1.3842, "step": 14600 }, { "epoch": 0.24443068020735037, "grad_norm": 8.114782188891244, "learning_rate": 2.444130362487529e-06, "loss": 1.3872, "step": 14700 }, { "epoch": 0.24609347395025752, "grad_norm": 6.085427083322673, "learning_rate": 2.460758230794812e-06, "loss": 1.4271, "step": 14800 }, { "epoch": 0.24775626769316467, "grad_norm": 10.594950856009243, "learning_rate": 2.4773860991020953e-06, "loss": 1.3663, "step": 14900 }, { "epoch": 0.24941906143607182, "grad_norm": 8.283339614712192, "learning_rate": 2.4940139674093783e-06, "loss": 1.3823, "step": 15000 }, { "epoch": 0.25108185517897896, "grad_norm": 5.944717781747582, "learning_rate": 2.510641835716661e-06, "loss": 1.3597, "step": 15100 }, { "epoch": 0.2527446489218861, "grad_norm": 6.763109543049465, "learning_rate": 2.527269704023944e-06, "loss": 1.3903, "step": 15200 }, { "epoch": 0.25440744266479326, "grad_norm": 8.070433050129433, "learning_rate": 2.5438975723312276e-06, "loss": 1.3571, "step": 15300 }, { "epoch": 0.2560702364077004, "grad_norm": 9.014838147948515, "learning_rate": 2.5605254406385106e-06, "loss": 1.3734, "step": 15400 }, { "epoch": 0.25773303015060756, "grad_norm": 7.3050170163076364, "learning_rate": 2.5771533089457933e-06, "loss": 1.3347, "step": 15500 }, { "epoch": 0.2593958238935147, "grad_norm": 9.021364515486356, "learning_rate": 2.5937811772530764e-06, "loss": 1.4154, "step": 15600 }, { "epoch": 0.26105861763642185, "grad_norm": 7.860056382701673, "learning_rate": 2.6104090455603594e-06, "loss": 1.3663, "step": 15700 }, { "epoch": 0.262721411379329, "grad_norm": 7.3238029484308935, "learning_rate": 2.627036913867642e-06, "loss": 1.3573, "step": 15800 }, { "epoch": 0.26438420512223615, "grad_norm": 6.625085021555603, "learning_rate": 2.6436647821749256e-06, "loss": 1.3486, "step": 15900 }, { "epoch": 0.26604699886514327, "grad_norm": 7.218079859369805, "learning_rate": 2.6602926504822086e-06, "loss": 1.37, "step": 16000 }, { "epoch": 0.2677097926080504, "grad_norm": 6.624047081574884, "learning_rate": 2.6769205187894913e-06, "loss": 1.3697, "step": 16100 }, { "epoch": 0.26937258635095757, "grad_norm": 5.843685934212928, "learning_rate": 2.6935483870967744e-06, "loss": 1.314, "step": 16200 }, { "epoch": 0.2710353800938647, "grad_norm": 9.331197235467867, "learning_rate": 2.7101762554040574e-06, "loss": 1.3269, "step": 16300 }, { "epoch": 0.27269817383677186, "grad_norm": 6.129639612503374, "learning_rate": 2.72680412371134e-06, "loss": 1.3289, "step": 16400 }, { "epoch": 0.274360967579679, "grad_norm": 6.335463579789276, "learning_rate": 2.7434319920186236e-06, "loss": 1.3162, "step": 16500 }, { "epoch": 0.27602376132258616, "grad_norm": 11.181637713251872, "learning_rate": 2.7600598603259066e-06, "loss": 1.3263, "step": 16600 }, { "epoch": 0.2776865550654933, "grad_norm": 8.300811998538336, "learning_rate": 2.7766877286331893e-06, "loss": 1.3344, "step": 16700 }, { "epoch": 0.27934934880840046, "grad_norm": 6.245184527105126, "learning_rate": 2.7933155969404724e-06, "loss": 1.3123, "step": 16800 }, { "epoch": 0.2810121425513076, "grad_norm": 9.013074296692805, "learning_rate": 2.8099434652477554e-06, "loss": 1.3377, "step": 16900 }, { "epoch": 0.28267493629421475, "grad_norm": 9.185763063268965, "learning_rate": 2.826571333555038e-06, "loss": 1.2883, "step": 17000 }, { "epoch": 0.28433773003712187, "grad_norm": 6.385147009363078, "learning_rate": 2.8431992018623216e-06, "loss": 1.3035, "step": 17100 }, { "epoch": 0.286000523780029, "grad_norm": 8.924208124446258, "learning_rate": 2.8598270701696047e-06, "loss": 1.3455, "step": 17200 }, { "epoch": 0.28766331752293617, "grad_norm": 6.080690989382192, "learning_rate": 2.8764549384768873e-06, "loss": 1.2902, "step": 17300 }, { "epoch": 0.2893261112658433, "grad_norm": 12.14946380737574, "learning_rate": 2.8930828067841704e-06, "loss": 1.3331, "step": 17400 }, { "epoch": 0.29098890500875046, "grad_norm": 7.80936464297036, "learning_rate": 2.9097106750914534e-06, "loss": 1.2897, "step": 17500 }, { "epoch": 0.2926516987516576, "grad_norm": 6.588705514457883, "learning_rate": 2.926338543398737e-06, "loss": 1.3394, "step": 17600 }, { "epoch": 0.29431449249456476, "grad_norm": 6.6285242697423365, "learning_rate": 2.9429664117060196e-06, "loss": 1.2631, "step": 17700 }, { "epoch": 0.2959772862374719, "grad_norm": 5.040176566989124, "learning_rate": 2.9595942800133027e-06, "loss": 1.3179, "step": 17800 }, { "epoch": 0.29764007998037906, "grad_norm": 8.133417313115158, "learning_rate": 2.9762221483205857e-06, "loss": 1.3184, "step": 17900 }, { "epoch": 0.2993028737232862, "grad_norm": 6.830238402278381, "learning_rate": 2.9928500166278684e-06, "loss": 1.3085, "step": 18000 }, { "epoch": 0.3009656674661933, "grad_norm": 5.3240952409397435, "learning_rate": 3.0094778849351515e-06, "loss": 1.317, "step": 18100 }, { "epoch": 0.3026284612091005, "grad_norm": 7.26531261319524, "learning_rate": 3.026105753242435e-06, "loss": 1.2905, "step": 18200 }, { "epoch": 0.3042912549520076, "grad_norm": 10.120933536762188, "learning_rate": 3.0427336215497176e-06, "loss": 1.2999, "step": 18300 }, { "epoch": 0.30595404869491477, "grad_norm": 9.162403791983081, "learning_rate": 3.0593614898570007e-06, "loss": 1.2694, "step": 18400 }, { "epoch": 0.3076168424378219, "grad_norm": 6.5109072302479145, "learning_rate": 3.0759893581642837e-06, "loss": 1.3083, "step": 18500 }, { "epoch": 0.30927963618072907, "grad_norm": 6.3633738196883, "learning_rate": 3.0926172264715664e-06, "loss": 1.3042, "step": 18600 }, { "epoch": 0.3109424299236362, "grad_norm": 5.625343295854184, "learning_rate": 3.1092450947788495e-06, "loss": 1.3098, "step": 18700 }, { "epoch": 0.31260522366654336, "grad_norm": 7.234150285886272, "learning_rate": 3.125872963086133e-06, "loss": 1.2703, "step": 18800 }, { "epoch": 0.3142680174094505, "grad_norm": 6.588706577280905, "learning_rate": 3.1425008313934156e-06, "loss": 1.262, "step": 18900 }, { "epoch": 0.31593081115235766, "grad_norm": 6.170502482217438, "learning_rate": 3.1591286997006987e-06, "loss": 1.2975, "step": 19000 }, { "epoch": 0.3175936048952648, "grad_norm": 5.813326246979877, "learning_rate": 3.1757565680079817e-06, "loss": 1.2783, "step": 19100 }, { "epoch": 0.3192563986381719, "grad_norm": 10.802775116787094, "learning_rate": 3.1923844363152644e-06, "loss": 1.2281, "step": 19200 }, { "epoch": 0.3209191923810791, "grad_norm": 6.586974851176334, "learning_rate": 3.2090123046225475e-06, "loss": 1.2647, "step": 19300 }, { "epoch": 0.3225819861239862, "grad_norm": 9.166559762686617, "learning_rate": 3.225640172929831e-06, "loss": 1.2642, "step": 19400 }, { "epoch": 0.3242447798668934, "grad_norm": 7.175469967749592, "learning_rate": 3.2422680412371136e-06, "loss": 1.2658, "step": 19500 }, { "epoch": 0.3259075736098005, "grad_norm": 7.202573075631736, "learning_rate": 3.2588959095443967e-06, "loss": 1.2506, "step": 19600 }, { "epoch": 0.32757036735270767, "grad_norm": 7.258660525906709, "learning_rate": 3.2755237778516798e-06, "loss": 1.2305, "step": 19700 }, { "epoch": 0.3292331610956148, "grad_norm": 7.341002234230417, "learning_rate": 3.2921516461589624e-06, "loss": 1.2288, "step": 19800 }, { "epoch": 0.33089595483852197, "grad_norm": 9.031259932210903, "learning_rate": 3.3087795144662455e-06, "loss": 1.2777, "step": 19900 }, { "epoch": 0.3325587485814291, "grad_norm": 9.023784683784134, "learning_rate": 3.325407382773529e-06, "loss": 1.2449, "step": 20000 }, { "epoch": 0.3342215423243362, "grad_norm": 7.7357914517265325, "learning_rate": 3.342035251080812e-06, "loss": 1.2359, "step": 20100 }, { "epoch": 0.3358843360672434, "grad_norm": 6.72139729734092, "learning_rate": 3.3586631193880947e-06, "loss": 1.2725, "step": 20200 }, { "epoch": 0.3375471298101505, "grad_norm": 7.267577138677613, "learning_rate": 3.3752909876953778e-06, "loss": 1.2484, "step": 20300 }, { "epoch": 0.3392099235530577, "grad_norm": 6.700883467872992, "learning_rate": 3.3919188560026604e-06, "loss": 1.2429, "step": 20400 }, { "epoch": 0.3408727172959648, "grad_norm": 7.863519238815539, "learning_rate": 3.4085467243099435e-06, "loss": 1.227, "step": 20500 }, { "epoch": 0.342535511038872, "grad_norm": 5.384809008541422, "learning_rate": 3.425174592617227e-06, "loss": 1.2254, "step": 20600 }, { "epoch": 0.3441983047817791, "grad_norm": 9.418514532780396, "learning_rate": 3.44180246092451e-06, "loss": 1.2521, "step": 20700 }, { "epoch": 0.3458610985246863, "grad_norm": 6.951554261114451, "learning_rate": 3.4584303292317927e-06, "loss": 1.2444, "step": 20800 }, { "epoch": 0.3475238922675934, "grad_norm": 6.931133368275928, "learning_rate": 3.4750581975390758e-06, "loss": 1.211, "step": 20900 }, { "epoch": 0.34918668601050057, "grad_norm": 6.482244565666992, "learning_rate": 3.4916860658463584e-06, "loss": 1.2455, "step": 21000 }, { "epoch": 0.3508494797534077, "grad_norm": 7.817965886661366, "learning_rate": 3.5083139341536415e-06, "loss": 1.2515, "step": 21100 }, { "epoch": 0.3525122734963148, "grad_norm": 5.824567240461767, "learning_rate": 3.524941802460925e-06, "loss": 1.2692, "step": 21200 }, { "epoch": 0.354175067239222, "grad_norm": 8.54922844096634, "learning_rate": 3.541569670768208e-06, "loss": 1.19, "step": 21300 }, { "epoch": 0.3558378609821291, "grad_norm": 7.17613928714268, "learning_rate": 3.5581975390754907e-06, "loss": 1.2376, "step": 21400 }, { "epoch": 0.3575006547250363, "grad_norm": 6.6433173821723654, "learning_rate": 3.5748254073827738e-06, "loss": 1.1912, "step": 21500 }, { "epoch": 0.3591634484679434, "grad_norm": 6.050726642627391, "learning_rate": 3.591453275690057e-06, "loss": 1.1824, "step": 21600 }, { "epoch": 0.3608262422108506, "grad_norm": 6.433758885698906, "learning_rate": 3.60808114399734e-06, "loss": 1.2372, "step": 21700 }, { "epoch": 0.3624890359537577, "grad_norm": 5.417799695283234, "learning_rate": 3.624709012304623e-06, "loss": 1.1777, "step": 21800 }, { "epoch": 0.3641518296966649, "grad_norm": 6.720065315850671, "learning_rate": 3.641336880611906e-06, "loss": 1.2089, "step": 21900 }, { "epoch": 0.365814623439572, "grad_norm": 7.311289178812438, "learning_rate": 3.6579647489191887e-06, "loss": 1.1884, "step": 22000 }, { "epoch": 0.3674774171824791, "grad_norm": 10.031745685962056, "learning_rate": 3.6745926172264718e-06, "loss": 1.2322, "step": 22100 }, { "epoch": 0.3691402109253863, "grad_norm": 7.531824611023871, "learning_rate": 3.691220485533755e-06, "loss": 1.1991, "step": 22200 }, { "epoch": 0.3708030046682934, "grad_norm": 6.832932634934062, "learning_rate": 3.7078483538410383e-06, "loss": 1.2286, "step": 22300 }, { "epoch": 0.3724657984112006, "grad_norm": 7.6207715976526025, "learning_rate": 3.724476222148321e-06, "loss": 1.1809, "step": 22400 }, { "epoch": 0.3741285921541077, "grad_norm": 5.9568346626570685, "learning_rate": 3.741104090455604e-06, "loss": 1.1995, "step": 22500 }, { "epoch": 0.3757913858970149, "grad_norm": 6.666066996702077, "learning_rate": 3.7577319587628867e-06, "loss": 1.1841, "step": 22600 }, { "epoch": 0.377454179639922, "grad_norm": 7.308874828631156, "learning_rate": 3.7743598270701698e-06, "loss": 1.1977, "step": 22700 }, { "epoch": 0.3791169733828292, "grad_norm": 9.26815708860237, "learning_rate": 3.790987695377453e-06, "loss": 1.2057, "step": 22800 }, { "epoch": 0.3807797671257363, "grad_norm": 6.99004972180746, "learning_rate": 3.8076155636847363e-06, "loss": 1.2373, "step": 22900 }, { "epoch": 0.3824425608686435, "grad_norm": 6.436970712838096, "learning_rate": 3.824243431992019e-06, "loss": 1.1858, "step": 23000 }, { "epoch": 0.3841053546115506, "grad_norm": 6.208724839124004, "learning_rate": 3.840871300299302e-06, "loss": 1.1792, "step": 23100 }, { "epoch": 0.3857681483544577, "grad_norm": 6.175994711944378, "learning_rate": 3.857499168606585e-06, "loss": 1.2028, "step": 23200 }, { "epoch": 0.3874309420973649, "grad_norm": 6.207898304826454, "learning_rate": 3.874127036913868e-06, "loss": 1.1994, "step": 23300 }, { "epoch": 0.389093735840272, "grad_norm": 6.237535168321395, "learning_rate": 3.8907549052211504e-06, "loss": 1.1977, "step": 23400 }, { "epoch": 0.3907565295831792, "grad_norm": 9.80334994166567, "learning_rate": 3.907382773528434e-06, "loss": 1.1926, "step": 23500 }, { "epoch": 0.3924193233260863, "grad_norm": 5.880115356101619, "learning_rate": 3.9240106418357174e-06, "loss": 1.1847, "step": 23600 }, { "epoch": 0.3940821170689935, "grad_norm": 8.556583211149396, "learning_rate": 3.940638510143e-06, "loss": 1.1774, "step": 23700 }, { "epoch": 0.3957449108119006, "grad_norm": 8.008999397461897, "learning_rate": 3.957266378450283e-06, "loss": 1.1826, "step": 23800 }, { "epoch": 0.3974077045548078, "grad_norm": 5.938595079681757, "learning_rate": 3.973894246757566e-06, "loss": 1.1423, "step": 23900 }, { "epoch": 0.3990704982977149, "grad_norm": 6.3627169149828955, "learning_rate": 3.990522115064849e-06, "loss": 1.2011, "step": 24000 }, { "epoch": 0.400733292040622, "grad_norm": 6.66023438556267, "learning_rate": 4.007149983372132e-06, "loss": 1.1473, "step": 24100 }, { "epoch": 0.4023960857835292, "grad_norm": 9.21691878133637, "learning_rate": 4.023777851679415e-06, "loss": 1.1749, "step": 24200 }, { "epoch": 0.4040588795264363, "grad_norm": 7.466366638030643, "learning_rate": 4.040405719986698e-06, "loss": 1.1739, "step": 24300 }, { "epoch": 0.4057216732693435, "grad_norm": 9.983455971755188, "learning_rate": 4.057033588293981e-06, "loss": 1.1488, "step": 24400 }, { "epoch": 0.4073844670122506, "grad_norm": 7.62648738229621, "learning_rate": 4.073661456601264e-06, "loss": 1.1783, "step": 24500 }, { "epoch": 0.4090472607551578, "grad_norm": 7.4283192018772, "learning_rate": 4.0902893249085464e-06, "loss": 1.1347, "step": 24600 }, { "epoch": 0.4107100544980649, "grad_norm": 6.4200605801742565, "learning_rate": 4.10691719321583e-06, "loss": 1.165, "step": 24700 }, { "epoch": 0.4123728482409721, "grad_norm": 6.000797613227262, "learning_rate": 4.1235450615231134e-06, "loss": 1.1914, "step": 24800 }, { "epoch": 0.4140356419838792, "grad_norm": 6.585673733192562, "learning_rate": 4.1401729298303965e-06, "loss": 1.1271, "step": 24900 }, { "epoch": 0.4156984357267864, "grad_norm": 5.176923354693477, "learning_rate": 4.156800798137679e-06, "loss": 1.1201, "step": 25000 }, { "epoch": 0.4173612294696935, "grad_norm": 7.4413826299674195, "learning_rate": 4.173428666444962e-06, "loss": 1.1781, "step": 25100 }, { "epoch": 0.4190240232126006, "grad_norm": 5.743404527023268, "learning_rate": 4.190056534752245e-06, "loss": 1.1283, "step": 25200 }, { "epoch": 0.4206868169555078, "grad_norm": 6.332706180238943, "learning_rate": 4.206684403059528e-06, "loss": 1.1749, "step": 25300 }, { "epoch": 0.4223496106984149, "grad_norm": 7.43627559391023, "learning_rate": 4.223312271366811e-06, "loss": 1.1727, "step": 25400 }, { "epoch": 0.4240124044413221, "grad_norm": 6.183417103853316, "learning_rate": 4.239940139674094e-06, "loss": 1.148, "step": 25500 }, { "epoch": 0.4256751981842292, "grad_norm": 5.9832962201544895, "learning_rate": 4.256568007981377e-06, "loss": 1.1527, "step": 25600 }, { "epoch": 0.4273379919271364, "grad_norm": 7.1589877799910235, "learning_rate": 4.27319587628866e-06, "loss": 1.1646, "step": 25700 }, { "epoch": 0.4290007856700435, "grad_norm": 7.314818877475148, "learning_rate": 4.289823744595943e-06, "loss": 1.1577, "step": 25800 }, { "epoch": 0.4306635794129507, "grad_norm": 6.12432211429683, "learning_rate": 4.306451612903226e-06, "loss": 1.1528, "step": 25900 }, { "epoch": 0.4323263731558578, "grad_norm": 6.461915721932705, "learning_rate": 4.3230794812105095e-06, "loss": 1.1612, "step": 26000 }, { "epoch": 0.43398916689876493, "grad_norm": 7.997063460000282, "learning_rate": 4.3397073495177925e-06, "loss": 1.1438, "step": 26100 }, { "epoch": 0.4356519606416721, "grad_norm": 7.699075345264144, "learning_rate": 4.356335217825075e-06, "loss": 1.1533, "step": 26200 }, { "epoch": 0.43731475438457923, "grad_norm": 5.149471892360013, "learning_rate": 4.372963086132358e-06, "loss": 1.0948, "step": 26300 }, { "epoch": 0.4389775481274864, "grad_norm": 7.743276406727591, "learning_rate": 4.389590954439642e-06, "loss": 1.1401, "step": 26400 }, { "epoch": 0.4406403418703935, "grad_norm": 5.94498588718141, "learning_rate": 4.406218822746924e-06, "loss": 1.0919, "step": 26500 }, { "epoch": 0.4423031356133007, "grad_norm": 5.7759219245975135, "learning_rate": 4.422846691054207e-06, "loss": 1.1399, "step": 26600 }, { "epoch": 0.4439659293562078, "grad_norm": 8.086291529054675, "learning_rate": 4.43947455936149e-06, "loss": 1.089, "step": 26700 }, { "epoch": 0.445628723099115, "grad_norm": 6.133354305185843, "learning_rate": 4.456102427668773e-06, "loss": 1.1384, "step": 26800 }, { "epoch": 0.4472915168420221, "grad_norm": 5.908907457725012, "learning_rate": 4.472730295976056e-06, "loss": 1.1492, "step": 26900 }, { "epoch": 0.4489543105849293, "grad_norm": 8.04242907646122, "learning_rate": 4.489358164283339e-06, "loss": 1.1438, "step": 27000 }, { "epoch": 0.4506171043278364, "grad_norm": 8.101349279614235, "learning_rate": 4.505986032590622e-06, "loss": 1.1156, "step": 27100 }, { "epoch": 0.45227989807074354, "grad_norm": 9.13477456796015, "learning_rate": 4.5226139008979055e-06, "loss": 1.1662, "step": 27200 }, { "epoch": 0.4539426918136507, "grad_norm": 5.886451911462184, "learning_rate": 4.5392417692051885e-06, "loss": 1.0949, "step": 27300 }, { "epoch": 0.45560548555655783, "grad_norm": 9.496396168840416, "learning_rate": 4.555869637512471e-06, "loss": 1.126, "step": 27400 }, { "epoch": 0.457268279299465, "grad_norm": 5.595207422839876, "learning_rate": 4.572497505819754e-06, "loss": 1.1315, "step": 27500 }, { "epoch": 0.45893107304237213, "grad_norm": 7.897594030767635, "learning_rate": 4.589125374127038e-06, "loss": 1.0838, "step": 27600 }, { "epoch": 0.4605938667852793, "grad_norm": 6.761514550643024, "learning_rate": 4.605753242434321e-06, "loss": 1.1353, "step": 27700 }, { "epoch": 0.4622566605281864, "grad_norm": 6.271407060931469, "learning_rate": 4.622381110741603e-06, "loss": 1.1058, "step": 27800 }, { "epoch": 0.4639194542710936, "grad_norm": 5.70894258863696, "learning_rate": 4.639008979048886e-06, "loss": 1.1267, "step": 27900 }, { "epoch": 0.4655822480140007, "grad_norm": 5.263980015126016, "learning_rate": 4.655636847356169e-06, "loss": 1.1168, "step": 28000 }, { "epoch": 0.46724504175690784, "grad_norm": 7.330562099191673, "learning_rate": 4.672264715663452e-06, "loss": 1.0962, "step": 28100 }, { "epoch": 0.468907835499815, "grad_norm": 5.850759931815431, "learning_rate": 4.688892583970735e-06, "loss": 1.0818, "step": 28200 }, { "epoch": 0.47057062924272214, "grad_norm": 6.328723678762731, "learning_rate": 4.705520452278018e-06, "loss": 1.1146, "step": 28300 }, { "epoch": 0.4722334229856293, "grad_norm": 10.474175838879775, "learning_rate": 4.7221483205853015e-06, "loss": 1.1016, "step": 28400 }, { "epoch": 0.47389621672853643, "grad_norm": 6.213730579838414, "learning_rate": 4.7387761888925845e-06, "loss": 1.0977, "step": 28500 }, { "epoch": 0.4755590104714436, "grad_norm": 6.276416456508427, "learning_rate": 4.755404057199868e-06, "loss": 1.0787, "step": 28600 }, { "epoch": 0.47722180421435073, "grad_norm": 7.869007916966236, "learning_rate": 4.77203192550715e-06, "loss": 1.0968, "step": 28700 }, { "epoch": 0.4788845979572579, "grad_norm": 6.624865200475049, "learning_rate": 4.788659793814434e-06, "loss": 1.1365, "step": 28800 }, { "epoch": 0.480547391700165, "grad_norm": 6.527065750932926, "learning_rate": 4.805287662121717e-06, "loss": 1.0999, "step": 28900 }, { "epoch": 0.4822101854430722, "grad_norm": 5.307630783787529, "learning_rate": 4.821915530428999e-06, "loss": 1.1046, "step": 29000 }, { "epoch": 0.4838729791859793, "grad_norm": 5.607757162361482, "learning_rate": 4.838543398736282e-06, "loss": 1.1289, "step": 29100 }, { "epoch": 0.48553577292888644, "grad_norm": 5.880106329568287, "learning_rate": 4.855171267043565e-06, "loss": 1.1265, "step": 29200 }, { "epoch": 0.4871985666717936, "grad_norm": 4.852479761279785, "learning_rate": 4.871799135350848e-06, "loss": 1.0924, "step": 29300 }, { "epoch": 0.48886136041470074, "grad_norm": 5.731228587071771, "learning_rate": 4.888427003658131e-06, "loss": 1.0861, "step": 29400 }, { "epoch": 0.4905241541576079, "grad_norm": 5.760513291891099, "learning_rate": 4.905054871965414e-06, "loss": 1.1109, "step": 29500 }, { "epoch": 0.49218694790051504, "grad_norm": 4.971300881094378, "learning_rate": 4.9216827402726975e-06, "loss": 1.108, "step": 29600 }, { "epoch": 0.4938497416434222, "grad_norm": 8.330665036205216, "learning_rate": 4.9383106085799806e-06, "loss": 1.0995, "step": 29700 }, { "epoch": 0.49551253538632933, "grad_norm": 6.519333357815815, "learning_rate": 4.954938476887264e-06, "loss": 1.0664, "step": 29800 }, { "epoch": 0.4971753291292365, "grad_norm": 5.721180481842605, "learning_rate": 4.971566345194547e-06, "loss": 1.1152, "step": 29900 }, { "epoch": 0.49883812287214363, "grad_norm": 6.536386885992491, "learning_rate": 4.98819421350183e-06, "loss": 1.0796, "step": 30000 }, { "epoch": 0.5005009166150508, "grad_norm": 6.816929083547968, "learning_rate": 5.004822081809112e-06, "loss": 1.0751, "step": 30100 }, { "epoch": 0.5021637103579579, "grad_norm": 9.147920522673395, "learning_rate": 5.021449950116396e-06, "loss": 1.0969, "step": 30200 }, { "epoch": 0.503826504100865, "grad_norm": 6.222511430536641, "learning_rate": 5.038077818423679e-06, "loss": 1.0929, "step": 30300 }, { "epoch": 0.5054892978437722, "grad_norm": 6.461325225341745, "learning_rate": 5.054705686730962e-06, "loss": 1.0778, "step": 30400 }, { "epoch": 0.5071520915866794, "grad_norm": 5.715681207874041, "learning_rate": 5.071333555038245e-06, "loss": 1.1183, "step": 30500 }, { "epoch": 0.5088148853295865, "grad_norm": 6.379766676238729, "learning_rate": 5.087961423345527e-06, "loss": 1.0958, "step": 30600 }, { "epoch": 0.5104776790724936, "grad_norm": 6.420163383713202, "learning_rate": 5.1045892916528104e-06, "loss": 1.106, "step": 30700 }, { "epoch": 0.5121404728154008, "grad_norm": 6.238526633150148, "learning_rate": 5.1212171599600935e-06, "loss": 1.0972, "step": 30800 }, { "epoch": 0.513803266558308, "grad_norm": 6.3440254061500765, "learning_rate": 5.1378450282673766e-06, "loss": 1.0976, "step": 30900 }, { "epoch": 0.5154660603012151, "grad_norm": 5.051899928510722, "learning_rate": 5.15447289657466e-06, "loss": 1.08, "step": 31000 }, { "epoch": 0.5171288540441222, "grad_norm": 5.048655699513924, "learning_rate": 5.171100764881942e-06, "loss": 1.0817, "step": 31100 }, { "epoch": 0.5187916477870294, "grad_norm": 5.169209721087907, "learning_rate": 5.187728633189225e-06, "loss": 1.1042, "step": 31200 }, { "epoch": 0.5204544415299365, "grad_norm": 7.11324469585728, "learning_rate": 5.204356501496508e-06, "loss": 1.0854, "step": 31300 }, { "epoch": 0.5221172352728437, "grad_norm": 4.725286780794923, "learning_rate": 5.220984369803792e-06, "loss": 1.0771, "step": 31400 }, { "epoch": 0.5237800290157508, "grad_norm": 5.1048712702456625, "learning_rate": 5.237612238111075e-06, "loss": 1.0663, "step": 31500 }, { "epoch": 0.525442822758658, "grad_norm": 6.008486629387042, "learning_rate": 5.254240106418358e-06, "loss": 1.0766, "step": 31600 }, { "epoch": 0.5271056165015651, "grad_norm": 6.1863905081201676, "learning_rate": 5.270867974725641e-06, "loss": 1.0615, "step": 31700 }, { "epoch": 0.5287684102444723, "grad_norm": 4.674605323724885, "learning_rate": 5.287495843032923e-06, "loss": 1.083, "step": 31800 }, { "epoch": 0.5304312039873794, "grad_norm": 6.547211589473249, "learning_rate": 5.3041237113402064e-06, "loss": 1.0849, "step": 31900 }, { "epoch": 0.5320939977302865, "grad_norm": 6.303189605710657, "learning_rate": 5.3207515796474895e-06, "loss": 1.0901, "step": 32000 }, { "epoch": 0.5337567914731937, "grad_norm": 4.716347241979849, "learning_rate": 5.337379447954773e-06, "loss": 1.0564, "step": 32100 }, { "epoch": 0.5354195852161008, "grad_norm": 8.796690907219757, "learning_rate": 5.354007316262056e-06, "loss": 1.07, "step": 32200 }, { "epoch": 0.537082378959008, "grad_norm": 6.8324251247565595, "learning_rate": 5.370635184569338e-06, "loss": 1.0692, "step": 32300 }, { "epoch": 0.5387451727019151, "grad_norm": 5.883323367105101, "learning_rate": 5.387263052876621e-06, "loss": 1.0561, "step": 32400 }, { "epoch": 0.5404079664448223, "grad_norm": 4.932452436309797, "learning_rate": 5.403890921183904e-06, "loss": 1.0568, "step": 32500 }, { "epoch": 0.5420707601877294, "grad_norm": 7.361927435293321, "learning_rate": 5.420518789491188e-06, "loss": 1.0864, "step": 32600 }, { "epoch": 0.5437335539306366, "grad_norm": 4.881438433082831, "learning_rate": 5.437146657798471e-06, "loss": 1.0273, "step": 32700 }, { "epoch": 0.5453963476735437, "grad_norm": 4.743637760457526, "learning_rate": 5.453774526105754e-06, "loss": 1.0897, "step": 32800 }, { "epoch": 0.5470591414164508, "grad_norm": 5.473398236417018, "learning_rate": 5.470402394413037e-06, "loss": 1.0424, "step": 32900 }, { "epoch": 0.548721935159358, "grad_norm": 6.501887206452945, "learning_rate": 5.487030262720319e-06, "loss": 1.064, "step": 33000 }, { "epoch": 0.5503847289022651, "grad_norm": 6.963905974460821, "learning_rate": 5.5036581310276025e-06, "loss": 1.0406, "step": 33100 }, { "epoch": 0.5520475226451723, "grad_norm": 5.234260162759803, "learning_rate": 5.5202859993348855e-06, "loss": 1.0505, "step": 33200 }, { "epoch": 0.5537103163880794, "grad_norm": 8.508392394551565, "learning_rate": 5.536913867642169e-06, "loss": 1.0435, "step": 33300 }, { "epoch": 0.5553731101309866, "grad_norm": 5.430443505474462, "learning_rate": 5.553541735949452e-06, "loss": 1.0798, "step": 33400 }, { "epoch": 0.5570359038738937, "grad_norm": 5.560913734725864, "learning_rate": 5.570169604256735e-06, "loss": 1.0485, "step": 33500 }, { "epoch": 0.5586986976168009, "grad_norm": 5.873151733509716, "learning_rate": 5.586797472564017e-06, "loss": 1.0763, "step": 33600 }, { "epoch": 0.560361491359708, "grad_norm": 8.054156309709535, "learning_rate": 5.603425340871301e-06, "loss": 1.0446, "step": 33700 }, { "epoch": 0.5620242851026152, "grad_norm": 5.057928996291734, "learning_rate": 5.620053209178584e-06, "loss": 1.0478, "step": 33800 }, { "epoch": 0.5636870788455223, "grad_norm": 5.094343613252709, "learning_rate": 5.636681077485867e-06, "loss": 1.0721, "step": 33900 }, { "epoch": 0.5653498725884295, "grad_norm": 6.055214138445745, "learning_rate": 5.65330894579315e-06, "loss": 1.1015, "step": 34000 }, { "epoch": 0.5670126663313366, "grad_norm": 7.411164688612714, "learning_rate": 5.669936814100433e-06, "loss": 1.0804, "step": 34100 }, { "epoch": 0.5686754600742437, "grad_norm": 7.759912746223972, "learning_rate": 5.686564682407716e-06, "loss": 1.0771, "step": 34200 }, { "epoch": 0.5703382538171509, "grad_norm": 8.871299640721777, "learning_rate": 5.7031925507149985e-06, "loss": 1.0187, "step": 34300 }, { "epoch": 0.572001047560058, "grad_norm": 7.739684849022581, "learning_rate": 5.7198204190222815e-06, "loss": 1.0415, "step": 34400 }, { "epoch": 0.5736638413029652, "grad_norm": 4.763020358018351, "learning_rate": 5.736448287329565e-06, "loss": 1.05, "step": 34500 }, { "epoch": 0.5753266350458723, "grad_norm": 5.439616519544066, "learning_rate": 5.753076155636848e-06, "loss": 1.054, "step": 34600 }, { "epoch": 0.5769894287887795, "grad_norm": 8.039696149072633, "learning_rate": 5.769704023944131e-06, "loss": 1.048, "step": 34700 }, { "epoch": 0.5786522225316866, "grad_norm": 7.240295672010923, "learning_rate": 5.786331892251413e-06, "loss": 1.0517, "step": 34800 }, { "epoch": 0.5803150162745938, "grad_norm": 8.134677299524787, "learning_rate": 5.802959760558698e-06, "loss": 1.0397, "step": 34900 }, { "epoch": 0.5819778100175009, "grad_norm": 8.03718795943793, "learning_rate": 5.81958762886598e-06, "loss": 1.0317, "step": 35000 }, { "epoch": 0.583640603760408, "grad_norm": 4.157076986685206, "learning_rate": 5.836215497173263e-06, "loss": 1.0585, "step": 35100 }, { "epoch": 0.5853033975033152, "grad_norm": 4.557884057832654, "learning_rate": 5.852843365480546e-06, "loss": 1.0452, "step": 35200 }, { "epoch": 0.5869661912462223, "grad_norm": 4.269276702215842, "learning_rate": 5.869471233787829e-06, "loss": 1.0518, "step": 35300 }, { "epoch": 0.5886289849891295, "grad_norm": 4.161743658862873, "learning_rate": 5.886099102095112e-06, "loss": 1.0501, "step": 35400 }, { "epoch": 0.5902917787320366, "grad_norm": 4.725195709900666, "learning_rate": 5.9027269704023945e-06, "loss": 1.0544, "step": 35500 }, { "epoch": 0.5919545724749438, "grad_norm": 8.538079291489574, "learning_rate": 5.9193548387096776e-06, "loss": 1.044, "step": 35600 }, { "epoch": 0.5936173662178509, "grad_norm": 6.234723954092716, "learning_rate": 5.935982707016961e-06, "loss": 1.0137, "step": 35700 }, { "epoch": 0.5952801599607581, "grad_norm": 5.171653126366826, "learning_rate": 5.952610575324244e-06, "loss": 1.0724, "step": 35800 }, { "epoch": 0.5969429537036652, "grad_norm": 4.392409305156987, "learning_rate": 5.969238443631527e-06, "loss": 1.0724, "step": 35900 }, { "epoch": 0.5986057474465724, "grad_norm": 5.327860557381011, "learning_rate": 5.985866311938809e-06, "loss": 1.0104, "step": 36000 }, { "epoch": 0.6002685411894795, "grad_norm": 6.301433280684925, "learning_rate": 6.002494180246094e-06, "loss": 1.0852, "step": 36100 }, { "epoch": 0.6019313349323866, "grad_norm": 5.312845580817294, "learning_rate": 6.019122048553376e-06, "loss": 1.0626, "step": 36200 }, { "epoch": 0.6035941286752938, "grad_norm": 6.125904966740482, "learning_rate": 6.035749916860659e-06, "loss": 1.0544, "step": 36300 }, { "epoch": 0.605256922418201, "grad_norm": 6.19598652943213, "learning_rate": 6.052377785167942e-06, "loss": 1.0364, "step": 36400 }, { "epoch": 0.6069197161611081, "grad_norm": 5.285050015690505, "learning_rate": 6.069005653475225e-06, "loss": 1.014, "step": 36500 }, { "epoch": 0.6085825099040152, "grad_norm": 5.946484476407859, "learning_rate": 6.085633521782508e-06, "loss": 1.0133, "step": 36600 }, { "epoch": 0.6102453036469224, "grad_norm": 6.072708597234774, "learning_rate": 6.1022613900897905e-06, "loss": 1.0701, "step": 36700 }, { "epoch": 0.6119080973898295, "grad_norm": 4.6515165207107065, "learning_rate": 6.1188892583970736e-06, "loss": 1.0684, "step": 36800 }, { "epoch": 0.6135708911327367, "grad_norm": 6.297252131303459, "learning_rate": 6.135517126704357e-06, "loss": 1.0382, "step": 36900 }, { "epoch": 0.6152336848756438, "grad_norm": 8.019497854527017, "learning_rate": 6.15214499501164e-06, "loss": 1.0296, "step": 37000 }, { "epoch": 0.6168964786185509, "grad_norm": 4.150814891148968, "learning_rate": 6.168772863318923e-06, "loss": 1.0048, "step": 37100 }, { "epoch": 0.6185592723614581, "grad_norm": 6.435685600919684, "learning_rate": 6.185400731626207e-06, "loss": 1.0398, "step": 37200 }, { "epoch": 0.6202220661043653, "grad_norm": 4.699552261275521, "learning_rate": 6.20202859993349e-06, "loss": 1.0082, "step": 37300 }, { "epoch": 0.6218848598472724, "grad_norm": 6.265654323611004, "learning_rate": 6.218656468240772e-06, "loss": 1.0344, "step": 37400 }, { "epoch": 0.6235476535901795, "grad_norm": 5.347032021455777, "learning_rate": 6.235284336548055e-06, "loss": 1.0094, "step": 37500 }, { "epoch": 0.6252104473330867, "grad_norm": 5.255308058156609, "learning_rate": 6.251912204855338e-06, "loss": 1.0394, "step": 37600 }, { "epoch": 0.6268732410759938, "grad_norm": 5.006306481232977, "learning_rate": 6.268540073162621e-06, "loss": 1.0638, "step": 37700 }, { "epoch": 0.628536034818901, "grad_norm": 5.7535848535102945, "learning_rate": 6.285167941469904e-06, "loss": 1.0149, "step": 37800 }, { "epoch": 0.6301988285618081, "grad_norm": 4.091898197379347, "learning_rate": 6.301795809777187e-06, "loss": 1.0401, "step": 37900 }, { "epoch": 0.6318616223047153, "grad_norm": 4.711272707154923, "learning_rate": 6.3184236780844696e-06, "loss": 1.0499, "step": 38000 }, { "epoch": 0.6335244160476224, "grad_norm": 7.43822114972355, "learning_rate": 6.335051546391753e-06, "loss": 1.0462, "step": 38100 }, { "epoch": 0.6351872097905296, "grad_norm": 4.114457089935014, "learning_rate": 6.351679414699036e-06, "loss": 1.0029, "step": 38200 }, { "epoch": 0.6368500035334367, "grad_norm": 4.14499751704086, "learning_rate": 6.368307283006319e-06, "loss": 1.0098, "step": 38300 }, { "epoch": 0.6385127972763438, "grad_norm": 6.053643466404795, "learning_rate": 6.384935151313603e-06, "loss": 1.0283, "step": 38400 }, { "epoch": 0.640175591019251, "grad_norm": 5.591742020139934, "learning_rate": 6.401563019620886e-06, "loss": 1.0248, "step": 38500 }, { "epoch": 0.6418383847621582, "grad_norm": 4.453471465776507, "learning_rate": 6.418190887928169e-06, "loss": 1.036, "step": 38600 }, { "epoch": 0.6435011785050653, "grad_norm": 5.958739060802354, "learning_rate": 6.434818756235451e-06, "loss": 1.0236, "step": 38700 }, { "epoch": 0.6451639722479724, "grad_norm": 7.850592840042609, "learning_rate": 6.451446624542734e-06, "loss": 1.0328, "step": 38800 }, { "epoch": 0.6468267659908796, "grad_norm": 4.877181534561705, "learning_rate": 6.468074492850017e-06, "loss": 1.0186, "step": 38900 }, { "epoch": 0.6484895597337867, "grad_norm": 5.7945889579793946, "learning_rate": 6.4847023611573e-06, "loss": 1.0696, "step": 39000 }, { "epoch": 0.6501523534766939, "grad_norm": 6.129048145872351, "learning_rate": 6.501330229464583e-06, "loss": 1.0404, "step": 39100 }, { "epoch": 0.651815147219601, "grad_norm": 7.028751933021658, "learning_rate": 6.517958097771866e-06, "loss": 1.016, "step": 39200 }, { "epoch": 0.6534779409625081, "grad_norm": 3.9602917535785056, "learning_rate": 6.534585966079149e-06, "loss": 1.0262, "step": 39300 }, { "epoch": 0.6551407347054153, "grad_norm": 4.745403373914077, "learning_rate": 6.551213834386432e-06, "loss": 0.9989, "step": 39400 }, { "epoch": 0.6568035284483225, "grad_norm": 5.131067489491939, "learning_rate": 6.567841702693715e-06, "loss": 1.0006, "step": 39500 }, { "epoch": 0.6584663221912296, "grad_norm": 4.423633657090077, "learning_rate": 6.584469571000999e-06, "loss": 0.9938, "step": 39600 }, { "epoch": 0.6601291159341367, "grad_norm": 5.855972471818852, "learning_rate": 6.601097439308282e-06, "loss": 1.0256, "step": 39700 }, { "epoch": 0.6617919096770439, "grad_norm": 5.667133229001661, "learning_rate": 6.617725307615565e-06, "loss": 1.0219, "step": 39800 }, { "epoch": 0.663454703419951, "grad_norm": 6.662643178189083, "learning_rate": 6.634353175922847e-06, "loss": 1.0027, "step": 39900 }, { "epoch": 0.6651174971628582, "grad_norm": 6.42873175139723, "learning_rate": 6.65098104423013e-06, "loss": 0.9984, "step": 40000 }, { "epoch": 0.6667802909057653, "grad_norm": 6.523069804482123, "learning_rate": 6.667608912537413e-06, "loss": 1.0054, "step": 40100 }, { "epoch": 0.6684430846486724, "grad_norm": 5.590684864594384, "learning_rate": 6.684236780844696e-06, "loss": 0.9852, "step": 40200 }, { "epoch": 0.6701058783915796, "grad_norm": 5.862748484397506, "learning_rate": 6.700864649151979e-06, "loss": 1.0092, "step": 40300 }, { "epoch": 0.6717686721344868, "grad_norm": 4.1204974680346975, "learning_rate": 6.717492517459262e-06, "loss": 1.0203, "step": 40400 }, { "epoch": 0.6734314658773939, "grad_norm": 4.794128012603177, "learning_rate": 6.734120385766545e-06, "loss": 1.0063, "step": 40500 }, { "epoch": 0.675094259620301, "grad_norm": 4.908162664046532, "learning_rate": 6.750748254073828e-06, "loss": 1.0245, "step": 40600 }, { "epoch": 0.6767570533632082, "grad_norm": 6.307286161131397, "learning_rate": 6.767376122381111e-06, "loss": 1.0378, "step": 40700 }, { "epoch": 0.6784198471061154, "grad_norm": 8.071547832734824, "learning_rate": 6.784003990688395e-06, "loss": 1.0211, "step": 40800 }, { "epoch": 0.6800826408490225, "grad_norm": 7.37236906503034, "learning_rate": 6.800631858995678e-06, "loss": 1.0319, "step": 40900 }, { "epoch": 0.6817454345919296, "grad_norm": 5.861971504303635, "learning_rate": 6.817259727302961e-06, "loss": 1.038, "step": 41000 }, { "epoch": 0.6834082283348368, "grad_norm": 4.747172767863784, "learning_rate": 6.833887595610243e-06, "loss": 1.047, "step": 41100 }, { "epoch": 0.685071022077744, "grad_norm": 6.107571921539521, "learning_rate": 6.850515463917526e-06, "loss": 0.9884, "step": 41200 }, { "epoch": 0.6867338158206511, "grad_norm": 4.664564465428331, "learning_rate": 6.867143332224809e-06, "loss": 1.0524, "step": 41300 }, { "epoch": 0.6883966095635582, "grad_norm": 4.049176525380206, "learning_rate": 6.883771200532092e-06, "loss": 0.9997, "step": 41400 }, { "epoch": 0.6900594033064653, "grad_norm": 5.535399178780478, "learning_rate": 6.900399068839375e-06, "loss": 1.0281, "step": 41500 }, { "epoch": 0.6917221970493725, "grad_norm": 4.169034670815082, "learning_rate": 6.9170269371466585e-06, "loss": 1.0003, "step": 41600 }, { "epoch": 0.6933849907922797, "grad_norm": 4.569794977253707, "learning_rate": 6.933654805453941e-06, "loss": 1.0364, "step": 41700 }, { "epoch": 0.6950477845351868, "grad_norm": 4.6367273655935906, "learning_rate": 6.950282673761224e-06, "loss": 1.04, "step": 41800 }, { "epoch": 0.6967105782780939, "grad_norm": 5.56476678722383, "learning_rate": 6.966910542068508e-06, "loss": 1.0219, "step": 41900 }, { "epoch": 0.6983733720210011, "grad_norm": 5.530510418102526, "learning_rate": 6.983538410375791e-06, "loss": 1.0169, "step": 42000 }, { "epoch": 0.7000361657639083, "grad_norm": 5.114431641180085, "learning_rate": 7.000166278683074e-06, "loss": 1.0304, "step": 42100 }, { "epoch": 0.7016989595068154, "grad_norm": 5.952351016263428, "learning_rate": 7.016794146990357e-06, "loss": 1.0173, "step": 42200 }, { "epoch": 0.7033617532497225, "grad_norm": 7.1691012621040295, "learning_rate": 7.03342201529764e-06, "loss": 1.0226, "step": 42300 }, { "epoch": 0.7050245469926296, "grad_norm": 7.3728620788785895, "learning_rate": 7.050049883604922e-06, "loss": 0.9968, "step": 42400 }, { "epoch": 0.7066873407355369, "grad_norm": 4.144069930444911, "learning_rate": 7.066677751912205e-06, "loss": 0.9996, "step": 42500 }, { "epoch": 0.708350134478444, "grad_norm": 4.986045370305048, "learning_rate": 7.083305620219488e-06, "loss": 1.0374, "step": 42600 }, { "epoch": 0.7100129282213511, "grad_norm": 6.281361451972544, "learning_rate": 7.099933488526771e-06, "loss": 0.9935, "step": 42700 }, { "epoch": 0.7116757219642582, "grad_norm": 4.067630840906217, "learning_rate": 7.1165613568340545e-06, "loss": 1.0234, "step": 42800 }, { "epoch": 0.7133385157071654, "grad_norm": 7.097989878107153, "learning_rate": 7.133189225141337e-06, "loss": 1.0216, "step": 42900 }, { "epoch": 0.7150013094500726, "grad_norm": 6.963356090155387, "learning_rate": 7.14981709344862e-06, "loss": 1.0059, "step": 43000 }, { "epoch": 0.7166641031929797, "grad_norm": 7.619096350344966, "learning_rate": 7.166444961755904e-06, "loss": 0.9712, "step": 43100 }, { "epoch": 0.7183268969358868, "grad_norm": 5.733849028616467, "learning_rate": 7.183072830063187e-06, "loss": 1.0119, "step": 43200 }, { "epoch": 0.7199896906787939, "grad_norm": 5.245883695988347, "learning_rate": 7.19970069837047e-06, "loss": 1.0035, "step": 43300 }, { "epoch": 0.7216524844217012, "grad_norm": 5.2277935125789154, "learning_rate": 7.216328566677753e-06, "loss": 1.0069, "step": 43400 }, { "epoch": 0.7233152781646083, "grad_norm": 5.060629030147589, "learning_rate": 7.232956434985036e-06, "loss": 0.9906, "step": 43500 }, { "epoch": 0.7249780719075154, "grad_norm": 5.628969849034644, "learning_rate": 7.249584303292318e-06, "loss": 0.9449, "step": 43600 }, { "epoch": 0.7266408656504225, "grad_norm": 4.224410029148149, "learning_rate": 7.266212171599601e-06, "loss": 1.0318, "step": 43700 }, { "epoch": 0.7283036593933297, "grad_norm": 5.8330007880416925, "learning_rate": 7.282840039906884e-06, "loss": 1.0141, "step": 43800 }, { "epoch": 0.7299664531362369, "grad_norm": 3.691781271717763, "learning_rate": 7.299467908214167e-06, "loss": 0.981, "step": 43900 }, { "epoch": 0.731629246879144, "grad_norm": 5.912078837862132, "learning_rate": 7.3160957765214505e-06, "loss": 0.9795, "step": 44000 }, { "epoch": 0.7332920406220511, "grad_norm": 4.802152386444188, "learning_rate": 7.332723644828733e-06, "loss": 0.9997, "step": 44100 }, { "epoch": 0.7349548343649582, "grad_norm": 5.211537984031411, "learning_rate": 7.349351513136016e-06, "loss": 0.9843, "step": 44200 }, { "epoch": 0.7366176281078655, "grad_norm": 4.151651486560792, "learning_rate": 7.3659793814433e-06, "loss": 0.9944, "step": 44300 }, { "epoch": 0.7382804218507726, "grad_norm": 5.1622698194112715, "learning_rate": 7.382607249750583e-06, "loss": 1.0058, "step": 44400 }, { "epoch": 0.7399432155936797, "grad_norm": 4.9502619742502665, "learning_rate": 7.399235118057866e-06, "loss": 0.9811, "step": 44500 }, { "epoch": 0.7416060093365868, "grad_norm": 5.71173112630753, "learning_rate": 7.415862986365149e-06, "loss": 0.9962, "step": 44600 }, { "epoch": 0.7432688030794941, "grad_norm": 4.155687596802704, "learning_rate": 7.432490854672432e-06, "loss": 0.992, "step": 44700 }, { "epoch": 0.7449315968224012, "grad_norm": 5.475217405746201, "learning_rate": 7.449118722979714e-06, "loss": 0.9758, "step": 44800 }, { "epoch": 0.7465943905653083, "grad_norm": 3.7968041941226236, "learning_rate": 7.465746591286997e-06, "loss": 0.9821, "step": 44900 }, { "epoch": 0.7482571843082154, "grad_norm": 4.680594046982439, "learning_rate": 7.48237445959428e-06, "loss": 0.9762, "step": 45000 }, { "epoch": 0.7499199780511226, "grad_norm": 5.520322428419844, "learning_rate": 7.4990023279015634e-06, "loss": 1.0154, "step": 45100 }, { "epoch": 0.7515827717940298, "grad_norm": 4.803981054980881, "learning_rate": 7.5156301962088465e-06, "loss": 1.046, "step": 45200 }, { "epoch": 0.7532455655369369, "grad_norm": 4.74785919824513, "learning_rate": 7.5322580645161296e-06, "loss": 1.008, "step": 45300 }, { "epoch": 0.754908359279844, "grad_norm": 5.693611830772322, "learning_rate": 7.5488859328234135e-06, "loss": 1.0285, "step": 45400 }, { "epoch": 0.7565711530227511, "grad_norm": 7.412393489522696, "learning_rate": 7.565513801130696e-06, "loss": 0.9946, "step": 45500 }, { "epoch": 0.7582339467656584, "grad_norm": 5.111931806998519, "learning_rate": 7.582141669437979e-06, "loss": 0.9943, "step": 45600 }, { "epoch": 0.7598967405085655, "grad_norm": 5.711925018224354, "learning_rate": 7.598769537745262e-06, "loss": 0.9803, "step": 45700 }, { "epoch": 0.7615595342514726, "grad_norm": 6.515439187840381, "learning_rate": 7.615397406052545e-06, "loss": 0.9758, "step": 45800 }, { "epoch": 0.7632223279943797, "grad_norm": 5.401079548302396, "learning_rate": 7.632025274359827e-06, "loss": 0.9966, "step": 45900 }, { "epoch": 0.764885121737287, "grad_norm": 6.011323148641013, "learning_rate": 7.648653142667111e-06, "loss": 1.0197, "step": 46000 }, { "epoch": 0.7665479154801941, "grad_norm": 3.9142932232073457, "learning_rate": 7.665281010974393e-06, "loss": 0.9754, "step": 46100 }, { "epoch": 0.7682107092231012, "grad_norm": 3.5927830698057672, "learning_rate": 7.681908879281677e-06, "loss": 0.9929, "step": 46200 }, { "epoch": 0.7698735029660083, "grad_norm": 4.058024389343865, "learning_rate": 7.69853674758896e-06, "loss": 0.9654, "step": 46300 }, { "epoch": 0.7715362967089154, "grad_norm": 4.865508711189542, "learning_rate": 7.715164615896242e-06, "loss": 0.9535, "step": 46400 }, { "epoch": 0.7731990904518227, "grad_norm": 5.591286885937322, "learning_rate": 7.731792484203526e-06, "loss": 0.9798, "step": 46500 }, { "epoch": 0.7748618841947298, "grad_norm": 6.911708525535553, "learning_rate": 7.74842035251081e-06, "loss": 0.9654, "step": 46600 }, { "epoch": 0.7765246779376369, "grad_norm": 6.005571629009076, "learning_rate": 7.765048220818092e-06, "loss": 0.9863, "step": 46700 }, { "epoch": 0.778187471680544, "grad_norm": 5.685972108508772, "learning_rate": 7.781676089125376e-06, "loss": 1.0034, "step": 46800 }, { "epoch": 0.7798502654234513, "grad_norm": 5.666135570125074, "learning_rate": 7.798303957432658e-06, "loss": 0.9665, "step": 46900 }, { "epoch": 0.7815130591663584, "grad_norm": 4.893168167313203, "learning_rate": 7.81493182573994e-06, "loss": 0.9883, "step": 47000 }, { "epoch": 0.7831758529092655, "grad_norm": 4.379119120036917, "learning_rate": 7.831559694047224e-06, "loss": 0.972, "step": 47100 }, { "epoch": 0.7848386466521726, "grad_norm": 5.990580011420136, "learning_rate": 7.848187562354506e-06, "loss": 0.9615, "step": 47200 }, { "epoch": 0.7865014403950797, "grad_norm": 5.148524014604785, "learning_rate": 7.86481543066179e-06, "loss": 0.9833, "step": 47300 }, { "epoch": 0.788164234137987, "grad_norm": 5.081708209334499, "learning_rate": 7.881443298969072e-06, "loss": 1.004, "step": 47400 }, { "epoch": 0.7898270278808941, "grad_norm": 6.0788772817689525, "learning_rate": 7.898071167276356e-06, "loss": 1.0121, "step": 47500 }, { "epoch": 0.7914898216238012, "grad_norm": 6.198901801339425, "learning_rate": 7.914699035583639e-06, "loss": 1.0004, "step": 47600 }, { "epoch": 0.7931526153667083, "grad_norm": 6.571118539690258, "learning_rate": 7.93132690389092e-06, "loss": 0.9845, "step": 47700 }, { "epoch": 0.7948154091096156, "grad_norm": 4.2917187440576186, "learning_rate": 7.947954772198205e-06, "loss": 0.9984, "step": 47800 }, { "epoch": 0.7964782028525227, "grad_norm": 6.234166473073519, "learning_rate": 7.964582640505489e-06, "loss": 0.9298, "step": 47900 }, { "epoch": 0.7981409965954298, "grad_norm": 4.87295086001104, "learning_rate": 7.98121050881277e-06, "loss": 0.9414, "step": 48000 }, { "epoch": 0.7998037903383369, "grad_norm": 4.145211258180426, "learning_rate": 7.997838377120055e-06, "loss": 0.9678, "step": 48100 }, { "epoch": 0.801466584081244, "grad_norm": 4.09065135463522, "learning_rate": 8.014466245427337e-06, "loss": 0.9659, "step": 48200 }, { "epoch": 0.8031293778241513, "grad_norm": 4.507326532599483, "learning_rate": 8.03109411373462e-06, "loss": 0.9701, "step": 48300 }, { "epoch": 0.8047921715670584, "grad_norm": 3.8355185950704307, "learning_rate": 8.047721982041903e-06, "loss": 0.9852, "step": 48400 }, { "epoch": 0.8064549653099655, "grad_norm": 3.925063476729925, "learning_rate": 8.064349850349185e-06, "loss": 0.9738, "step": 48500 }, { "epoch": 0.8081177590528726, "grad_norm": 5.81246346063319, "learning_rate": 8.08097771865647e-06, "loss": 0.9887, "step": 48600 }, { "epoch": 0.8097805527957799, "grad_norm": 4.59454425418089, "learning_rate": 8.097605586963751e-06, "loss": 1.0223, "step": 48700 }, { "epoch": 0.811443346538687, "grad_norm": 4.544166699688075, "learning_rate": 8.114233455271034e-06, "loss": 0.9903, "step": 48800 }, { "epoch": 0.8131061402815941, "grad_norm": 4.3302414345284435, "learning_rate": 8.13086132357832e-06, "loss": 0.9596, "step": 48900 }, { "epoch": 0.8147689340245012, "grad_norm": 3.855971431688213, "learning_rate": 8.147489191885602e-06, "loss": 1.014, "step": 49000 }, { "epoch": 0.8164317277674085, "grad_norm": 6.13092945390848, "learning_rate": 8.164117060192884e-06, "loss": 0.9614, "step": 49100 }, { "epoch": 0.8180945215103156, "grad_norm": 6.450184618427879, "learning_rate": 8.180744928500168e-06, "loss": 0.9702, "step": 49200 }, { "epoch": 0.8197573152532227, "grad_norm": 4.476354443935958, "learning_rate": 8.19737279680745e-06, "loss": 0.9623, "step": 49300 }, { "epoch": 0.8214201089961298, "grad_norm": 5.14010986458347, "learning_rate": 8.214000665114734e-06, "loss": 0.9824, "step": 49400 }, { "epoch": 0.823082902739037, "grad_norm": 3.7652742919199103, "learning_rate": 8.230628533422016e-06, "loss": 0.9861, "step": 49500 }, { "epoch": 0.8247456964819442, "grad_norm": 5.236712041001846, "learning_rate": 8.247256401729298e-06, "loss": 0.9941, "step": 49600 }, { "epoch": 0.8264084902248513, "grad_norm": 4.150277056410904, "learning_rate": 8.263884270036582e-06, "loss": 0.9741, "step": 49700 }, { "epoch": 0.8280712839677584, "grad_norm": 6.765359411662175, "learning_rate": 8.280512138343864e-06, "loss": 0.9394, "step": 49800 }, { "epoch": 0.8297340777106655, "grad_norm": 4.264004354979825, "learning_rate": 8.297140006651148e-06, "loss": 0.9646, "step": 49900 }, { "epoch": 0.8313968714535728, "grad_norm": 4.663588619006846, "learning_rate": 8.31376787495843e-06, "loss": 0.9759, "step": 50000 }, { "epoch": 0.8330596651964799, "grad_norm": 4.828332501302462, "learning_rate": 8.330395743265714e-06, "loss": 0.9655, "step": 50100 }, { "epoch": 0.834722458939387, "grad_norm": 4.488089393424456, "learning_rate": 8.347023611572997e-06, "loss": 0.9918, "step": 50200 }, { "epoch": 0.8363852526822941, "grad_norm": 5.1109347290658445, "learning_rate": 8.36365147988028e-06, "loss": 0.9542, "step": 50300 }, { "epoch": 0.8380480464252013, "grad_norm": 5.3615332950696715, "learning_rate": 8.380279348187563e-06, "loss": 0.9762, "step": 50400 }, { "epoch": 0.8397108401681085, "grad_norm": 6.817708485316357, "learning_rate": 8.396907216494847e-06, "loss": 0.9662, "step": 50500 }, { "epoch": 0.8413736339110156, "grad_norm": 4.062828687846936, "learning_rate": 8.413535084802129e-06, "loss": 0.9923, "step": 50600 }, { "epoch": 0.8430364276539227, "grad_norm": 5.79759688679963, "learning_rate": 8.430162953109411e-06, "loss": 0.9965, "step": 50700 }, { "epoch": 0.8446992213968298, "grad_norm": 4.267588042774008, "learning_rate": 8.446790821416695e-06, "loss": 0.9897, "step": 50800 }, { "epoch": 0.8463620151397371, "grad_norm": 4.200002383425093, "learning_rate": 8.463418689723977e-06, "loss": 0.9897, "step": 50900 }, { "epoch": 0.8480248088826442, "grad_norm": 4.5927546252240035, "learning_rate": 8.480046558031261e-06, "loss": 0.9695, "step": 51000 }, { "epoch": 0.8496876026255513, "grad_norm": 5.356275304662748, "learning_rate": 8.496674426338543e-06, "loss": 0.9959, "step": 51100 }, { "epoch": 0.8513503963684584, "grad_norm": 4.196404241886447, "learning_rate": 8.513302294645827e-06, "loss": 0.9659, "step": 51200 }, { "epoch": 0.8530131901113656, "grad_norm": 5.762613942305789, "learning_rate": 8.529930162953111e-06, "loss": 0.9421, "step": 51300 }, { "epoch": 0.8546759838542728, "grad_norm": 4.300051076620565, "learning_rate": 8.546558031260394e-06, "loss": 0.9638, "step": 51400 }, { "epoch": 0.8563387775971799, "grad_norm": 3.547113506347258, "learning_rate": 8.563185899567676e-06, "loss": 0.983, "step": 51500 }, { "epoch": 0.858001571340087, "grad_norm": 6.113822263899743, "learning_rate": 8.57981376787496e-06, "loss": 0.984, "step": 51600 }, { "epoch": 0.8596643650829942, "grad_norm": 4.527448511165878, "learning_rate": 8.596441636182242e-06, "loss": 0.9658, "step": 51700 }, { "epoch": 0.8613271588259014, "grad_norm": 5.152013727127041, "learning_rate": 8.613069504489526e-06, "loss": 0.9677, "step": 51800 }, { "epoch": 0.8629899525688085, "grad_norm": 3.9526286769357983, "learning_rate": 8.629697372796808e-06, "loss": 0.9348, "step": 51900 }, { "epoch": 0.8646527463117156, "grad_norm": 4.78045122091316, "learning_rate": 8.64632524110409e-06, "loss": 0.9973, "step": 52000 }, { "epoch": 0.8663155400546227, "grad_norm": 5.318753525464477, "learning_rate": 8.662953109411374e-06, "loss": 0.9916, "step": 52100 }, { "epoch": 0.8679783337975299, "grad_norm": 3.7989760915230715, "learning_rate": 8.679580977718656e-06, "loss": 0.9661, "step": 52200 }, { "epoch": 0.8696411275404371, "grad_norm": 4.666574429779674, "learning_rate": 8.69620884602594e-06, "loss": 0.9247, "step": 52300 }, { "epoch": 0.8713039212833442, "grad_norm": 4.0380362605412525, "learning_rate": 8.712836714333223e-06, "loss": 0.9561, "step": 52400 }, { "epoch": 0.8729667150262513, "grad_norm": 5.297468890413809, "learning_rate": 8.729464582640506e-06, "loss": 0.9503, "step": 52500 }, { "epoch": 0.8746295087691585, "grad_norm": 3.807885494704066, "learning_rate": 8.74609245094779e-06, "loss": 0.9665, "step": 52600 }, { "epoch": 0.8762923025120657, "grad_norm": 4.475074056475065, "learning_rate": 8.762720319255073e-06, "loss": 0.9503, "step": 52700 }, { "epoch": 0.8779550962549728, "grad_norm": 5.53521210868188, "learning_rate": 8.779348187562355e-06, "loss": 0.955, "step": 52800 }, { "epoch": 0.8796178899978799, "grad_norm": 4.327875752441062, "learning_rate": 8.795976055869639e-06, "loss": 0.9296, "step": 52900 }, { "epoch": 0.881280683740787, "grad_norm": 4.560484460349618, "learning_rate": 8.812603924176921e-06, "loss": 0.9689, "step": 53000 }, { "epoch": 0.8829434774836943, "grad_norm": 5.240086086235895, "learning_rate": 8.829231792484205e-06, "loss": 0.9402, "step": 53100 }, { "epoch": 0.8846062712266014, "grad_norm": 4.254768311608756, "learning_rate": 8.845859660791487e-06, "loss": 0.9225, "step": 53200 }, { "epoch": 0.8862690649695085, "grad_norm": 4.415790502512742, "learning_rate": 8.86248752909877e-06, "loss": 0.9849, "step": 53300 }, { "epoch": 0.8879318587124156, "grad_norm": 7.6213957356947954, "learning_rate": 8.879115397406053e-06, "loss": 0.9416, "step": 53400 }, { "epoch": 0.8895946524553228, "grad_norm": 5.120479221617414, "learning_rate": 8.895743265713336e-06, "loss": 0.9296, "step": 53500 }, { "epoch": 0.89125744619823, "grad_norm": 5.674079269036602, "learning_rate": 8.91237113402062e-06, "loss": 0.9485, "step": 53600 }, { "epoch": 0.8929202399411371, "grad_norm": 3.858490632137535, "learning_rate": 8.928999002327903e-06, "loss": 0.9679, "step": 53700 }, { "epoch": 0.8945830336840442, "grad_norm": 6.135244215454858, "learning_rate": 8.945626870635186e-06, "loss": 0.9604, "step": 53800 }, { "epoch": 0.8962458274269514, "grad_norm": 5.392128798422555, "learning_rate": 8.962254738942468e-06, "loss": 0.9576, "step": 53900 }, { "epoch": 0.8979086211698586, "grad_norm": 3.378058346001929, "learning_rate": 8.978882607249752e-06, "loss": 0.9774, "step": 54000 }, { "epoch": 0.8995714149127657, "grad_norm": 3.420041697032006, "learning_rate": 8.995510475557034e-06, "loss": 0.9191, "step": 54100 }, { "epoch": 0.9012342086556728, "grad_norm": 4.867249393305101, "learning_rate": 9.012138343864318e-06, "loss": 0.9845, "step": 54200 }, { "epoch": 0.90289700239858, "grad_norm": 5.899712925377642, "learning_rate": 9.0287662121716e-06, "loss": 0.9721, "step": 54300 }, { "epoch": 0.9045597961414871, "grad_norm": 4.796843061496119, "learning_rate": 9.045394080478882e-06, "loss": 0.95, "step": 54400 }, { "epoch": 0.9062225898843943, "grad_norm": 3.7374046363391167, "learning_rate": 9.062021948786166e-06, "loss": 0.9431, "step": 54500 }, { "epoch": 0.9078853836273014, "grad_norm": 5.848751359203812, "learning_rate": 9.078649817093448e-06, "loss": 0.9775, "step": 54600 }, { "epoch": 0.9095481773702085, "grad_norm": 4.051464527364664, "learning_rate": 9.095277685400732e-06, "loss": 0.9706, "step": 54700 }, { "epoch": 0.9112109711131157, "grad_norm": 4.047109203397398, "learning_rate": 9.111905553708016e-06, "loss": 0.9765, "step": 54800 }, { "epoch": 0.9128737648560229, "grad_norm": 4.8847362917827795, "learning_rate": 9.128533422015299e-06, "loss": 0.9676, "step": 54900 }, { "epoch": 0.91453655859893, "grad_norm": 3.0466420263144376, "learning_rate": 9.145161290322582e-06, "loss": 0.9282, "step": 55000 }, { "epoch": 0.9161993523418371, "grad_norm": 5.640025402052146, "learning_rate": 9.161789158629865e-06, "loss": 0.9543, "step": 55100 }, { "epoch": 0.9178621460847443, "grad_norm": 3.896085536094126, "learning_rate": 9.178417026937147e-06, "loss": 0.9349, "step": 55200 }, { "epoch": 0.9195249398276514, "grad_norm": 4.944860391568306, "learning_rate": 9.19504489524443e-06, "loss": 0.9746, "step": 55300 }, { "epoch": 0.9211877335705586, "grad_norm": 3.6703867342663257, "learning_rate": 9.211672763551713e-06, "loss": 0.9484, "step": 55400 }, { "epoch": 0.9228505273134657, "grad_norm": 4.753468526116723, "learning_rate": 9.228300631858997e-06, "loss": 0.9492, "step": 55500 }, { "epoch": 0.9245133210563728, "grad_norm": 3.5421453772688474, "learning_rate": 9.24492850016628e-06, "loss": 0.9472, "step": 55600 }, { "epoch": 0.92617611479928, "grad_norm": 4.657631450376159, "learning_rate": 9.261556368473561e-06, "loss": 0.9349, "step": 55700 }, { "epoch": 0.9278389085421872, "grad_norm": 4.832946103867109, "learning_rate": 9.278184236780845e-06, "loss": 0.9631, "step": 55800 }, { "epoch": 0.9295017022850943, "grad_norm": 5.311156999571482, "learning_rate": 9.294812105088128e-06, "loss": 0.9631, "step": 55900 }, { "epoch": 0.9311644960280014, "grad_norm": 3.3133203896464445, "learning_rate": 9.311439973395411e-06, "loss": 0.9567, "step": 56000 }, { "epoch": 0.9328272897709086, "grad_norm": 4.28294184604216, "learning_rate": 9.328067841702695e-06, "loss": 0.9569, "step": 56100 }, { "epoch": 0.9344900835138157, "grad_norm": 4.8363594256061155, "learning_rate": 9.344695710009978e-06, "loss": 0.9463, "step": 56200 }, { "epoch": 0.9361528772567229, "grad_norm": 4.869432379459609, "learning_rate": 9.361323578317262e-06, "loss": 0.9306, "step": 56300 }, { "epoch": 0.93781567099963, "grad_norm": 3.66238380082277, "learning_rate": 9.377951446624544e-06, "loss": 0.9539, "step": 56400 }, { "epoch": 0.9394784647425372, "grad_norm": 4.3308454144307245, "learning_rate": 9.394579314931826e-06, "loss": 0.9222, "step": 56500 }, { "epoch": 0.9411412584854443, "grad_norm": 5.661115891065899, "learning_rate": 9.41120718323911e-06, "loss": 0.9461, "step": 56600 }, { "epoch": 0.9428040522283515, "grad_norm": 4.028838853780231, "learning_rate": 9.427835051546392e-06, "loss": 0.9003, "step": 56700 }, { "epoch": 0.9444668459712586, "grad_norm": 4.843839813690367, "learning_rate": 9.444462919853676e-06, "loss": 0.9311, "step": 56800 }, { "epoch": 0.9461296397141657, "grad_norm": 3.340211891775477, "learning_rate": 9.461090788160958e-06, "loss": 0.9579, "step": 56900 }, { "epoch": 0.9477924334570729, "grad_norm": 5.047237731910924, "learning_rate": 9.47771865646824e-06, "loss": 0.9546, "step": 57000 }, { "epoch": 0.9494552271999801, "grad_norm": 5.519303641039246, "learning_rate": 9.494346524775524e-06, "loss": 0.9478, "step": 57100 }, { "epoch": 0.9511180209428872, "grad_norm": 3.7909345546455877, "learning_rate": 9.510974393082808e-06, "loss": 0.9535, "step": 57200 }, { "epoch": 0.9527808146857943, "grad_norm": 5.8972557958549965, "learning_rate": 9.52760226139009e-06, "loss": 0.9178, "step": 57300 }, { "epoch": 0.9544436084287015, "grad_norm": 4.79128310764228, "learning_rate": 9.544230129697374e-06, "loss": 0.9602, "step": 57400 }, { "epoch": 0.9561064021716086, "grad_norm": 3.88775966984769, "learning_rate": 9.560857998004657e-06, "loss": 0.9387, "step": 57500 }, { "epoch": 0.9577691959145158, "grad_norm": 3.467712736836128, "learning_rate": 9.577485866311939e-06, "loss": 0.95, "step": 57600 }, { "epoch": 0.9594319896574229, "grad_norm": 7.041640446305675, "learning_rate": 9.594113734619223e-06, "loss": 0.9974, "step": 57700 }, { "epoch": 0.96109478340033, "grad_norm": 5.04695547723181, "learning_rate": 9.610741602926505e-06, "loss": 0.9549, "step": 57800 }, { "epoch": 0.9627575771432372, "grad_norm": 3.436597090247969, "learning_rate": 9.627369471233789e-06, "loss": 0.9799, "step": 57900 }, { "epoch": 0.9644203708861444, "grad_norm": 5.301286547874062, "learning_rate": 9.643997339541071e-06, "loss": 0.9624, "step": 58000 }, { "epoch": 0.9660831646290515, "grad_norm": 4.856830647153541, "learning_rate": 9.660625207848353e-06, "loss": 0.9302, "step": 58100 }, { "epoch": 0.9677459583719586, "grad_norm": 5.657986545944047, "learning_rate": 9.677253076155637e-06, "loss": 0.9805, "step": 58200 }, { "epoch": 0.9694087521148658, "grad_norm": 3.9289148444816253, "learning_rate": 9.693880944462921e-06, "loss": 0.8925, "step": 58300 }, { "epoch": 0.9710715458577729, "grad_norm": 3.323735081878265, "learning_rate": 9.710508812770203e-06, "loss": 0.9248, "step": 58400 }, { "epoch": 0.9727343396006801, "grad_norm": 6.066117637513508, "learning_rate": 9.727136681077487e-06, "loss": 0.9598, "step": 58500 }, { "epoch": 0.9743971333435872, "grad_norm": 4.935640255657765, "learning_rate": 9.74376454938477e-06, "loss": 0.9461, "step": 58600 }, { "epoch": 0.9760599270864944, "grad_norm": 4.705783392661752, "learning_rate": 9.760392417692054e-06, "loss": 0.8909, "step": 58700 }, { "epoch": 0.9777227208294015, "grad_norm": 3.72088277641973, "learning_rate": 9.777020285999336e-06, "loss": 0.9231, "step": 58800 }, { "epoch": 0.9793855145723087, "grad_norm": 5.859584932824668, "learning_rate": 9.793648154306618e-06, "loss": 0.9452, "step": 58900 }, { "epoch": 0.9810483083152158, "grad_norm": 3.890567109028243, "learning_rate": 9.810276022613902e-06, "loss": 0.9535, "step": 59000 }, { "epoch": 0.982711102058123, "grad_norm": 5.175375476545309, "learning_rate": 9.826903890921184e-06, "loss": 0.9682, "step": 59100 }, { "epoch": 0.9843738958010301, "grad_norm": 4.44772117810384, "learning_rate": 9.843531759228468e-06, "loss": 0.9471, "step": 59200 }, { "epoch": 0.9860366895439372, "grad_norm": 4.496847718811847, "learning_rate": 9.86015962753575e-06, "loss": 0.9238, "step": 59300 }, { "epoch": 0.9876994832868444, "grad_norm": 3.947825874611403, "learning_rate": 9.876787495843033e-06, "loss": 0.9246, "step": 59400 }, { "epoch": 0.9893622770297515, "grad_norm": 3.1208662443908763, "learning_rate": 9.893415364150316e-06, "loss": 0.9475, "step": 59500 }, { "epoch": 0.9910250707726587, "grad_norm": 4.302385473853177, "learning_rate": 9.9100432324576e-06, "loss": 0.9735, "step": 59600 }, { "epoch": 0.9926878645155658, "grad_norm": 6.0010467301600965, "learning_rate": 9.926671100764883e-06, "loss": 0.9595, "step": 59700 }, { "epoch": 0.994350658258473, "grad_norm": 4.183252593938311, "learning_rate": 9.943298969072166e-06, "loss": 0.9409, "step": 59800 }, { "epoch": 0.9960134520013801, "grad_norm": 4.046969462521464, "learning_rate": 9.959926837379449e-06, "loss": 0.9417, "step": 59900 }, { "epoch": 0.9976762457442873, "grad_norm": 6.077956683975851, "learning_rate": 9.976554705686733e-06, "loss": 0.9162, "step": 60000 }, { "epoch": 0.9993390394871944, "grad_norm": 4.371519809293696, "learning_rate": 9.993182573994015e-06, "loss": 0.9512, "step": 60100 }, { "epoch": 1.0009976762457442, "grad_norm": 4.280263298577012, "learning_rate": 9.999999706821644e-06, "loss": 0.8823, "step": 60200 }, { "epoch": 1.0026604699886514, "grad_norm": 5.046045938386894, "learning_rate": 9.99999787077231e-06, "loss": 0.8362, "step": 60300 }, { "epoch": 1.0043232637315587, "grad_norm": 5.624362745757122, "learning_rate": 9.999994350274676e-06, "loss": 0.8475, "step": 60400 }, { "epoch": 1.0059860574744657, "grad_norm": 3.886200074856264, "learning_rate": 9.999989145329928e-06, "loss": 0.8248, "step": 60500 }, { "epoch": 1.007648851217373, "grad_norm": 7.459029306467848, "learning_rate": 9.999982255939816e-06, "loss": 0.8771, "step": 60600 }, { "epoch": 1.00931164496028, "grad_norm": 3.6851348097093903, "learning_rate": 9.999973682106664e-06, "loss": 0.8349, "step": 60700 }, { "epoch": 1.0109744387031872, "grad_norm": 4.081439786041252, "learning_rate": 9.99996342383336e-06, "loss": 0.8781, "step": 60800 }, { "epoch": 1.0126372324460944, "grad_norm": 4.812576124371472, "learning_rate": 9.999951481123362e-06, "loss": 0.8441, "step": 60900 }, { "epoch": 1.0143000261890014, "grad_norm": 5.268362162503153, "learning_rate": 9.99993785398069e-06, "loss": 0.8517, "step": 61000 }, { "epoch": 1.0159628199319086, "grad_norm": 4.177229931988407, "learning_rate": 9.999922542409934e-06, "loss": 0.8499, "step": 61100 }, { "epoch": 1.0176256136748156, "grad_norm": 5.11683053528435, "learning_rate": 9.999905546416258e-06, "loss": 0.8236, "step": 61200 }, { "epoch": 1.0192884074177229, "grad_norm": 3.9194876668128327, "learning_rate": 9.999886866005381e-06, "loss": 0.869, "step": 61300 }, { "epoch": 1.02095120116063, "grad_norm": 4.714335688931545, "learning_rate": 9.9998665011836e-06, "loss": 0.8549, "step": 61400 }, { "epoch": 1.022613994903537, "grad_norm": 4.128091988107122, "learning_rate": 9.999844451957776e-06, "loss": 0.8246, "step": 61500 }, { "epoch": 1.0242767886464443, "grad_norm": 4.882991823017929, "learning_rate": 9.999820718335336e-06, "loss": 0.8597, "step": 61600 }, { "epoch": 1.0259395823893516, "grad_norm": 5.292204358605255, "learning_rate": 9.999795300324275e-06, "loss": 0.8566, "step": 61700 }, { "epoch": 1.0276023761322586, "grad_norm": 5.527557334637766, "learning_rate": 9.999768197933158e-06, "loss": 0.8333, "step": 61800 }, { "epoch": 1.0292651698751658, "grad_norm": 4.500855654510317, "learning_rate": 9.999739411171113e-06, "loss": 0.8111, "step": 61900 }, { "epoch": 1.0309279636180728, "grad_norm": 3.9215564316622946, "learning_rate": 9.999708940047841e-06, "loss": 0.8296, "step": 62000 }, { "epoch": 1.03259075736098, "grad_norm": 3.9539411225410754, "learning_rate": 9.999676784573605e-06, "loss": 0.8471, "step": 62100 }, { "epoch": 1.0342535511038873, "grad_norm": 5.404865841861741, "learning_rate": 9.999642944759238e-06, "loss": 0.8201, "step": 62200 }, { "epoch": 1.0359163448467943, "grad_norm": 3.196679308412231, "learning_rate": 9.999607420616142e-06, "loss": 0.8783, "step": 62300 }, { "epoch": 1.0375791385897015, "grad_norm": 4.35837914887357, "learning_rate": 9.999570212156283e-06, "loss": 0.8677, "step": 62400 }, { "epoch": 1.0392419323326085, "grad_norm": 4.041693951086595, "learning_rate": 9.999531319392197e-06, "loss": 0.8819, "step": 62500 }, { "epoch": 1.0409047260755158, "grad_norm": 3.33588746268096, "learning_rate": 9.999490742336987e-06, "loss": 0.8543, "step": 62600 }, { "epoch": 1.042567519818423, "grad_norm": 4.631323431756272, "learning_rate": 9.999448481004321e-06, "loss": 0.8133, "step": 62700 }, { "epoch": 1.04423031356133, "grad_norm": 4.361414233663171, "learning_rate": 9.999404535408439e-06, "loss": 0.8417, "step": 62800 }, { "epoch": 1.0458931073042372, "grad_norm": 3.943046868882542, "learning_rate": 9.999358905564145e-06, "loss": 0.8155, "step": 62900 }, { "epoch": 1.0475559010471445, "grad_norm": 4.302842992964677, "learning_rate": 9.99931159148681e-06, "loss": 0.8434, "step": 63000 }, { "epoch": 1.0492186947900515, "grad_norm": 4.849502973337214, "learning_rate": 9.999262593192374e-06, "loss": 0.8517, "step": 63100 }, { "epoch": 1.0508814885329587, "grad_norm": 4.360380295216058, "learning_rate": 9.999211910697345e-06, "loss": 0.8368, "step": 63200 }, { "epoch": 1.0525442822758657, "grad_norm": 4.488315687127207, "learning_rate": 9.999159544018798e-06, "loss": 0.8421, "step": 63300 }, { "epoch": 1.054207076018773, "grad_norm": 4.881341260668819, "learning_rate": 9.999105493174372e-06, "loss": 0.8706, "step": 63400 }, { "epoch": 1.0558698697616802, "grad_norm": 3.5656818785812887, "learning_rate": 9.99904975818228e-06, "loss": 0.8486, "step": 63500 }, { "epoch": 1.0575326635045872, "grad_norm": 3.9194009057982346, "learning_rate": 9.998992339061295e-06, "loss": 0.8712, "step": 63600 }, { "epoch": 1.0591954572474944, "grad_norm": 5.866830382363368, "learning_rate": 9.998933235830763e-06, "loss": 0.8601, "step": 63700 }, { "epoch": 1.0608582509904014, "grad_norm": 3.5205277554766523, "learning_rate": 9.998872448510593e-06, "loss": 0.8699, "step": 63800 }, { "epoch": 1.0625210447333087, "grad_norm": 4.621256553108591, "learning_rate": 9.998809977121266e-06, "loss": 0.8423, "step": 63900 }, { "epoch": 1.064183838476216, "grad_norm": 3.74075900311844, "learning_rate": 9.998745821683829e-06, "loss": 0.8422, "step": 64000 }, { "epoch": 1.065846632219123, "grad_norm": 5.018425382720956, "learning_rate": 9.998679982219892e-06, "loss": 0.8523, "step": 64100 }, { "epoch": 1.0675094259620301, "grad_norm": 3.370031613405687, "learning_rate": 9.998612458751637e-06, "loss": 0.8651, "step": 64200 }, { "epoch": 1.0691722197049374, "grad_norm": 4.207593547882103, "learning_rate": 9.998543251301813e-06, "loss": 0.853, "step": 64300 }, { "epoch": 1.0708350134478444, "grad_norm": 4.6272707400016655, "learning_rate": 9.998472359893734e-06, "loss": 0.8438, "step": 64400 }, { "epoch": 1.0724978071907516, "grad_norm": 3.173643957962903, "learning_rate": 9.998399784551282e-06, "loss": 0.8143, "step": 64500 }, { "epoch": 1.0741606009336586, "grad_norm": 5.286206653390939, "learning_rate": 9.99832552529891e-06, "loss": 0.8169, "step": 64600 }, { "epoch": 1.0758233946765658, "grad_norm": 4.659155868462095, "learning_rate": 9.998249582161632e-06, "loss": 0.8352, "step": 64700 }, { "epoch": 1.077486188419473, "grad_norm": 3.9008288546448706, "learning_rate": 9.998171955165034e-06, "loss": 0.8452, "step": 64800 }, { "epoch": 1.07914898216238, "grad_norm": 6.124317205875885, "learning_rate": 9.998092644335266e-06, "loss": 0.8647, "step": 64900 }, { "epoch": 1.0808117759052873, "grad_norm": 4.185980467245101, "learning_rate": 9.99801164969905e-06, "loss": 0.8542, "step": 65000 }, { "epoch": 1.0824745696481943, "grad_norm": 5.693787285315702, "learning_rate": 9.99792897128367e-06, "loss": 0.8586, "step": 65100 }, { "epoch": 1.0841373633911016, "grad_norm": 3.424600658315801, "learning_rate": 9.997844609116981e-06, "loss": 0.8718, "step": 65200 }, { "epoch": 1.0858001571340088, "grad_norm": 4.020057735846868, "learning_rate": 9.9977585632274e-06, "loss": 0.8654, "step": 65300 }, { "epoch": 1.0874629508769158, "grad_norm": 3.743187970387439, "learning_rate": 9.997670833643922e-06, "loss": 0.8423, "step": 65400 }, { "epoch": 1.089125744619823, "grad_norm": 7.436682489452169, "learning_rate": 9.997581420396094e-06, "loss": 0.8506, "step": 65500 }, { "epoch": 1.09078853836273, "grad_norm": 3.329428044388096, "learning_rate": 9.997490323514046e-06, "loss": 0.8461, "step": 65600 }, { "epoch": 1.0924513321056373, "grad_norm": 4.945965820950513, "learning_rate": 9.997397543028462e-06, "loss": 0.8453, "step": 65700 }, { "epoch": 1.0941141258485445, "grad_norm": 3.842041154875245, "learning_rate": 9.997303078970601e-06, "loss": 0.8318, "step": 65800 }, { "epoch": 1.0957769195914515, "grad_norm": 4.204070532612385, "learning_rate": 9.997206931372287e-06, "loss": 0.8486, "step": 65900 }, { "epoch": 1.0974397133343587, "grad_norm": 3.3891882980047483, "learning_rate": 9.997109100265912e-06, "loss": 0.838, "step": 66000 }, { "epoch": 1.0991025070772658, "grad_norm": 3.589231020435699, "learning_rate": 9.997009585684432e-06, "loss": 0.8041, "step": 66100 }, { "epoch": 1.100765300820173, "grad_norm": 4.982159692993814, "learning_rate": 9.996908387661375e-06, "loss": 0.8424, "step": 66200 }, { "epoch": 1.1024280945630802, "grad_norm": 3.688155310355133, "learning_rate": 9.996805506230831e-06, "loss": 0.8521, "step": 66300 }, { "epoch": 1.1040908883059872, "grad_norm": 3.487974814552195, "learning_rate": 9.996700941427461e-06, "loss": 0.8356, "step": 66400 }, { "epoch": 1.1057536820488945, "grad_norm": 4.077906986613009, "learning_rate": 9.996594693286495e-06, "loss": 0.8294, "step": 66500 }, { "epoch": 1.1074164757918017, "grad_norm": 3.531255139258326, "learning_rate": 9.99648676184372e-06, "loss": 0.8281, "step": 66600 }, { "epoch": 1.1090792695347087, "grad_norm": 4.581810572645151, "learning_rate": 9.996377147135503e-06, "loss": 0.8565, "step": 66700 }, { "epoch": 1.110742063277616, "grad_norm": 3.3019575794842617, "learning_rate": 9.996265849198769e-06, "loss": 0.8549, "step": 66800 }, { "epoch": 1.112404857020523, "grad_norm": 4.472521966806495, "learning_rate": 9.996152868071014e-06, "loss": 0.8063, "step": 66900 }, { "epoch": 1.1140676507634302, "grad_norm": 7.171543420842454, "learning_rate": 9.996038203790299e-06, "loss": 0.875, "step": 67000 }, { "epoch": 1.1157304445063374, "grad_norm": 5.63304713272905, "learning_rate": 9.995921856395256e-06, "loss": 0.8459, "step": 67100 }, { "epoch": 1.1173932382492444, "grad_norm": 4.929145595479717, "learning_rate": 9.99580382592508e-06, "loss": 0.8591, "step": 67200 }, { "epoch": 1.1190560319921516, "grad_norm": 3.246916195888537, "learning_rate": 9.995684112419532e-06, "loss": 0.8118, "step": 67300 }, { "epoch": 1.1207188257350587, "grad_norm": 3.572964618322181, "learning_rate": 9.995562715918948e-06, "loss": 0.885, "step": 67400 }, { "epoch": 1.1223816194779659, "grad_norm": 3.531295511796754, "learning_rate": 9.995439636464217e-06, "loss": 0.8519, "step": 67500 }, { "epoch": 1.1240444132208731, "grad_norm": 4.993699811140537, "learning_rate": 9.99531487409681e-06, "loss": 0.8511, "step": 67600 }, { "epoch": 1.1257072069637801, "grad_norm": 3.443792616764757, "learning_rate": 9.995188428858755e-06, "loss": 0.8184, "step": 67700 }, { "epoch": 1.1273700007066874, "grad_norm": 3.460687506014102, "learning_rate": 9.995060300792652e-06, "loss": 0.8186, "step": 67800 }, { "epoch": 1.1290327944495946, "grad_norm": 4.65188920150722, "learning_rate": 9.994930489941663e-06, "loss": 0.8116, "step": 67900 }, { "epoch": 1.1306955881925016, "grad_norm": 4.221420571725041, "learning_rate": 9.994798996349524e-06, "loss": 0.8169, "step": 68000 }, { "epoch": 1.1323583819354088, "grad_norm": 4.800199035389522, "learning_rate": 9.99466582006053e-06, "loss": 0.8863, "step": 68100 }, { "epoch": 1.1340211756783158, "grad_norm": 4.21311147177404, "learning_rate": 9.99453096111955e-06, "loss": 0.8705, "step": 68200 }, { "epoch": 1.135683969421223, "grad_norm": 3.4105603374938736, "learning_rate": 9.994394419572016e-06, "loss": 0.8456, "step": 68300 }, { "epoch": 1.1373467631641303, "grad_norm": 5.326554515239479, "learning_rate": 9.994256195463923e-06, "loss": 0.8276, "step": 68400 }, { "epoch": 1.1390095569070373, "grad_norm": 4.78126926762809, "learning_rate": 9.994116288841844e-06, "loss": 0.8285, "step": 68500 }, { "epoch": 1.1406723506499445, "grad_norm": 3.6725171120063926, "learning_rate": 9.993974699752908e-06, "loss": 0.8272, "step": 68600 }, { "epoch": 1.1423351443928516, "grad_norm": 4.82354915095916, "learning_rate": 9.993831428244815e-06, "loss": 0.8552, "step": 68700 }, { "epoch": 1.1439979381357588, "grad_norm": 4.167005070901884, "learning_rate": 9.993686474365834e-06, "loss": 0.8598, "step": 68800 }, { "epoch": 1.145660731878666, "grad_norm": 6.379024878692271, "learning_rate": 9.993539838164797e-06, "loss": 0.8308, "step": 68900 }, { "epoch": 1.147323525621573, "grad_norm": 5.054701306303475, "learning_rate": 9.993391519691102e-06, "loss": 0.8482, "step": 69000 }, { "epoch": 1.1489863193644803, "grad_norm": 4.801057804744682, "learning_rate": 9.99324151899472e-06, "loss": 0.8476, "step": 69100 }, { "epoch": 1.1506491131073875, "grad_norm": 4.050514012180359, "learning_rate": 9.993089836126184e-06, "loss": 0.8229, "step": 69200 }, { "epoch": 1.1523119068502945, "grad_norm": 3.4627697168806164, "learning_rate": 9.992936471136592e-06, "loss": 0.8692, "step": 69300 }, { "epoch": 1.1539747005932017, "grad_norm": 3.5955135173216854, "learning_rate": 9.992781424077613e-06, "loss": 0.8759, "step": 69400 }, { "epoch": 1.1556374943361087, "grad_norm": 3.972138904213258, "learning_rate": 9.99262469500148e-06, "loss": 0.8441, "step": 69500 }, { "epoch": 1.157300288079016, "grad_norm": 3.6868684637975546, "learning_rate": 9.992466283960996e-06, "loss": 0.8559, "step": 69600 }, { "epoch": 1.158963081821923, "grad_norm": 4.902423092278565, "learning_rate": 9.992306191009523e-06, "loss": 0.8406, "step": 69700 }, { "epoch": 1.1606258755648302, "grad_norm": 4.344502245794673, "learning_rate": 9.992144416200998e-06, "loss": 0.8355, "step": 69800 }, { "epoch": 1.1622886693077374, "grad_norm": 4.813733027894809, "learning_rate": 9.991980959589921e-06, "loss": 0.8673, "step": 69900 }, { "epoch": 1.1639514630506445, "grad_norm": 3.4288306474478545, "learning_rate": 9.991815821231359e-06, "loss": 0.8658, "step": 70000 }, { "epoch": 1.1656142567935517, "grad_norm": 4.371536558639083, "learning_rate": 9.991649001180945e-06, "loss": 0.8121, "step": 70100 }, { "epoch": 1.167277050536459, "grad_norm": 4.124439105825272, "learning_rate": 9.99148049949488e-06, "loss": 0.8451, "step": 70200 }, { "epoch": 1.168939844279366, "grad_norm": 3.8964711739083313, "learning_rate": 9.991310316229928e-06, "loss": 0.8503, "step": 70300 }, { "epoch": 1.1706026380222732, "grad_norm": 3.843722424225369, "learning_rate": 9.991138451443423e-06, "loss": 0.8234, "step": 70400 }, { "epoch": 1.1722654317651804, "grad_norm": 5.709079144283439, "learning_rate": 9.990964905193265e-06, "loss": 0.8318, "step": 70500 }, { "epoch": 1.1739282255080874, "grad_norm": 3.118861581443254, "learning_rate": 9.990789677537922e-06, "loss": 0.8179, "step": 70600 }, { "epoch": 1.1755910192509946, "grad_norm": 3.2479984493564955, "learning_rate": 9.990612768536424e-06, "loss": 0.8003, "step": 70700 }, { "epoch": 1.1772538129939016, "grad_norm": 4.044786558437745, "learning_rate": 9.990434178248368e-06, "loss": 0.8183, "step": 70800 }, { "epoch": 1.1789166067368089, "grad_norm": 3.6277440772612803, "learning_rate": 9.990253906733923e-06, "loss": 0.8372, "step": 70900 }, { "epoch": 1.1805794004797159, "grad_norm": 3.979681370660199, "learning_rate": 9.990071954053819e-06, "loss": 0.8518, "step": 71000 }, { "epoch": 1.182242194222623, "grad_norm": 4.42890654217439, "learning_rate": 9.989888320269354e-06, "loss": 0.8507, "step": 71100 }, { "epoch": 1.1839049879655303, "grad_norm": 3.3934794631557983, "learning_rate": 9.989703005442393e-06, "loss": 0.8506, "step": 71200 }, { "epoch": 1.1855677817084374, "grad_norm": 3.763330347232676, "learning_rate": 9.989516009635366e-06, "loss": 0.8225, "step": 71300 }, { "epoch": 1.1872305754513446, "grad_norm": 5.045235149417063, "learning_rate": 9.98932733291127e-06, "loss": 0.8291, "step": 71400 }, { "epoch": 1.1888933691942518, "grad_norm": 4.289619553784828, "learning_rate": 9.989136975333669e-06, "loss": 0.7918, "step": 71500 }, { "epoch": 1.1905561629371588, "grad_norm": 3.6450996970706893, "learning_rate": 9.988944936966691e-06, "loss": 0.8264, "step": 71600 }, { "epoch": 1.192218956680066, "grad_norm": 3.7521472727020346, "learning_rate": 9.988751217875032e-06, "loss": 0.8143, "step": 71700 }, { "epoch": 1.193881750422973, "grad_norm": 4.269114505570928, "learning_rate": 9.988555818123956e-06, "loss": 0.8181, "step": 71800 }, { "epoch": 1.1955445441658803, "grad_norm": 4.614308569695154, "learning_rate": 9.98835873777929e-06, "loss": 0.8264, "step": 71900 }, { "epoch": 1.1972073379087875, "grad_norm": 4.235503599414205, "learning_rate": 9.988159976907428e-06, "loss": 0.813, "step": 72000 }, { "epoch": 1.1988701316516945, "grad_norm": 4.462131918005587, "learning_rate": 9.987959535575329e-06, "loss": 0.8119, "step": 72100 }, { "epoch": 1.2005329253946018, "grad_norm": 2.980380996024795, "learning_rate": 9.987757413850523e-06, "loss": 0.8458, "step": 72200 }, { "epoch": 1.2021957191375088, "grad_norm": 3.6129078511604047, "learning_rate": 9.987553611801102e-06, "loss": 0.8313, "step": 72300 }, { "epoch": 1.203858512880416, "grad_norm": 3.2721773409712864, "learning_rate": 9.987348129495721e-06, "loss": 0.7978, "step": 72400 }, { "epoch": 1.2055213066233232, "grad_norm": 4.117924825930048, "learning_rate": 9.98714096700361e-06, "loss": 0.8274, "step": 72500 }, { "epoch": 1.2071841003662303, "grad_norm": 4.911908857517122, "learning_rate": 9.986932124394557e-06, "loss": 0.8236, "step": 72600 }, { "epoch": 1.2088468941091375, "grad_norm": 5.2602976602992495, "learning_rate": 9.98672160173892e-06, "loss": 0.8338, "step": 72700 }, { "epoch": 1.2105096878520447, "grad_norm": 3.686652182573133, "learning_rate": 9.986509399107623e-06, "loss": 0.83, "step": 72800 }, { "epoch": 1.2121724815949517, "grad_norm": 3.9421894918583322, "learning_rate": 9.986295516572153e-06, "loss": 0.8187, "step": 72900 }, { "epoch": 1.213835275337859, "grad_norm": 4.072783711999539, "learning_rate": 9.986079954204563e-06, "loss": 0.837, "step": 73000 }, { "epoch": 1.215498069080766, "grad_norm": 3.542344223719677, "learning_rate": 9.985862712077478e-06, "loss": 0.8231, "step": 73100 }, { "epoch": 1.2171608628236732, "grad_norm": 5.067535798443618, "learning_rate": 9.985643790264083e-06, "loss": 0.8151, "step": 73200 }, { "epoch": 1.2188236565665804, "grad_norm": 4.001519621905467, "learning_rate": 9.98542318883813e-06, "loss": 0.8327, "step": 73300 }, { "epoch": 1.2204864503094874, "grad_norm": 4.657618573127487, "learning_rate": 9.985200907873939e-06, "loss": 0.8224, "step": 73400 }, { "epoch": 1.2221492440523947, "grad_norm": 4.776677346627273, "learning_rate": 9.984976947446392e-06, "loss": 0.8236, "step": 73500 }, { "epoch": 1.2238120377953017, "grad_norm": 4.023591758168419, "learning_rate": 9.984751307630939e-06, "loss": 0.8513, "step": 73600 }, { "epoch": 1.225474831538209, "grad_norm": 4.463271283079919, "learning_rate": 9.984523988503598e-06, "loss": 0.8077, "step": 73700 }, { "epoch": 1.2271376252811161, "grad_norm": 4.291262392912056, "learning_rate": 9.984294990140948e-06, "loss": 0.8084, "step": 73800 }, { "epoch": 1.2288004190240231, "grad_norm": 4.000470771328759, "learning_rate": 9.984064312620137e-06, "loss": 0.819, "step": 73900 }, { "epoch": 1.2304632127669304, "grad_norm": 3.4988296465789706, "learning_rate": 9.98383195601888e-06, "loss": 0.8112, "step": 74000 }, { "epoch": 1.2321260065098376, "grad_norm": 4.61774636253288, "learning_rate": 9.983597920415452e-06, "loss": 0.8372, "step": 74100 }, { "epoch": 1.2337888002527446, "grad_norm": 5.064264266396653, "learning_rate": 9.983362205888701e-06, "loss": 0.8278, "step": 74200 }, { "epoch": 1.2354515939956519, "grad_norm": 3.959230378277228, "learning_rate": 9.983124812518032e-06, "loss": 0.8138, "step": 74300 }, { "epoch": 1.2371143877385589, "grad_norm": 3.6941019337262735, "learning_rate": 9.982885740383425e-06, "loss": 0.8207, "step": 74400 }, { "epoch": 1.238777181481466, "grad_norm": 3.3472842135406573, "learning_rate": 9.98264498956542e-06, "loss": 0.8242, "step": 74500 }, { "epoch": 1.2404399752243733, "grad_norm": 4.09324693905301, "learning_rate": 9.98240256014512e-06, "loss": 0.7926, "step": 74600 }, { "epoch": 1.2421027689672803, "grad_norm": 4.880287265432911, "learning_rate": 9.982158452204203e-06, "loss": 0.8037, "step": 74700 }, { "epoch": 1.2437655627101876, "grad_norm": 3.179665358103396, "learning_rate": 9.981912665824901e-06, "loss": 0.8389, "step": 74800 }, { "epoch": 1.2454283564530946, "grad_norm": 4.199987019523832, "learning_rate": 9.981665201090022e-06, "loss": 0.8259, "step": 74900 }, { "epoch": 1.2470911501960018, "grad_norm": 4.099624539741141, "learning_rate": 9.981416058082929e-06, "loss": 0.8076, "step": 75000 }, { "epoch": 1.248753943938909, "grad_norm": 4.4810491199810425, "learning_rate": 9.98116523688756e-06, "loss": 0.8395, "step": 75100 }, { "epoch": 1.250416737681816, "grad_norm": 3.036830481635786, "learning_rate": 9.98091273758841e-06, "loss": 0.8252, "step": 75200 }, { "epoch": 1.2520795314247233, "grad_norm": 3.7457954304712824, "learning_rate": 9.980658560270547e-06, "loss": 0.8054, "step": 75300 }, { "epoch": 1.2537423251676305, "grad_norm": 3.4152821461000604, "learning_rate": 9.980402705019599e-06, "loss": 0.794, "step": 75400 }, { "epoch": 1.2554051189105375, "grad_norm": 4.798035938265916, "learning_rate": 9.980145171921762e-06, "loss": 0.8342, "step": 75500 }, { "epoch": 1.2570679126534448, "grad_norm": 4.449537501602619, "learning_rate": 9.979885961063795e-06, "loss": 0.8361, "step": 75600 }, { "epoch": 1.2587307063963518, "grad_norm": 4.742547123839023, "learning_rate": 9.979625072533026e-06, "loss": 0.8196, "step": 75700 }, { "epoch": 1.260393500139259, "grad_norm": 3.542425720171049, "learning_rate": 9.979362506417342e-06, "loss": 0.8211, "step": 75800 }, { "epoch": 1.262056293882166, "grad_norm": 4.202860418895215, "learning_rate": 9.979098262805203e-06, "loss": 0.8421, "step": 75900 }, { "epoch": 1.2637190876250732, "grad_norm": 4.383091888467918, "learning_rate": 9.978832341785625e-06, "loss": 0.8097, "step": 76000 }, { "epoch": 1.2653818813679805, "grad_norm": 2.9793107782841193, "learning_rate": 9.978564743448198e-06, "loss": 0.8223, "step": 76100 }, { "epoch": 1.2670446751108875, "grad_norm": 3.6596562295093578, "learning_rate": 9.978295467883074e-06, "loss": 0.789, "step": 76200 }, { "epoch": 1.2687074688537947, "grad_norm": 4.788944969004023, "learning_rate": 9.978024515180967e-06, "loss": 0.817, "step": 76300 }, { "epoch": 1.270370262596702, "grad_norm": 3.0800982975848066, "learning_rate": 9.977751885433156e-06, "loss": 0.8143, "step": 76400 }, { "epoch": 1.272033056339609, "grad_norm": 4.95185581000981, "learning_rate": 9.97747757873149e-06, "loss": 0.858, "step": 76500 }, { "epoch": 1.2736958500825162, "grad_norm": 3.884080447126271, "learning_rate": 9.97720159516838e-06, "loss": 0.8196, "step": 76600 }, { "epoch": 1.2753586438254234, "grad_norm": 5.006362351431296, "learning_rate": 9.976923934836803e-06, "loss": 0.8102, "step": 76700 }, { "epoch": 1.2770214375683304, "grad_norm": 4.198913188558703, "learning_rate": 9.976644597830298e-06, "loss": 0.8036, "step": 76800 }, { "epoch": 1.2786842313112377, "grad_norm": 4.1966096231249495, "learning_rate": 9.976363584242971e-06, "loss": 0.8165, "step": 76900 }, { "epoch": 1.2803470250541447, "grad_norm": 5.568598679418043, "learning_rate": 9.976080894169495e-06, "loss": 0.8133, "step": 77000 }, { "epoch": 1.282009818797052, "grad_norm": 3.6395751373794645, "learning_rate": 9.975796527705102e-06, "loss": 0.8308, "step": 77100 }, { "epoch": 1.283672612539959, "grad_norm": 5.1111732355787955, "learning_rate": 9.975510484945594e-06, "loss": 0.8195, "step": 77200 }, { "epoch": 1.2853354062828661, "grad_norm": 4.429478683404174, "learning_rate": 9.975222765987336e-06, "loss": 0.8341, "step": 77300 }, { "epoch": 1.2869982000257734, "grad_norm": 4.413411814635311, "learning_rate": 9.974933370927256e-06, "loss": 0.829, "step": 77400 }, { "epoch": 1.2886609937686804, "grad_norm": 3.5024827201530133, "learning_rate": 9.974642299862851e-06, "loss": 0.8212, "step": 77500 }, { "epoch": 1.2903237875115876, "grad_norm": 3.4438774530170044, "learning_rate": 9.974349552892177e-06, "loss": 0.8179, "step": 77600 }, { "epoch": 1.2919865812544948, "grad_norm": 3.378099436848102, "learning_rate": 9.97405513011386e-06, "loss": 0.8009, "step": 77700 }, { "epoch": 1.2936493749974018, "grad_norm": 3.560391750952688, "learning_rate": 9.973759031627084e-06, "loss": 0.7939, "step": 77800 }, { "epoch": 1.295312168740309, "grad_norm": 4.070162549650913, "learning_rate": 9.973461257531606e-06, "loss": 0.8191, "step": 77900 }, { "epoch": 1.2969749624832163, "grad_norm": 4.194189859321232, "learning_rate": 9.973161807927743e-06, "loss": 0.829, "step": 78000 }, { "epoch": 1.2986377562261233, "grad_norm": 3.618414757267253, "learning_rate": 9.972860682916375e-06, "loss": 0.8058, "step": 78100 }, { "epoch": 1.3003005499690303, "grad_norm": 4.881538439462496, "learning_rate": 9.972557882598945e-06, "loss": 0.8018, "step": 78200 }, { "epoch": 1.3019633437119376, "grad_norm": 3.9937766903745926, "learning_rate": 9.972253407077467e-06, "loss": 0.8246, "step": 78300 }, { "epoch": 1.3036261374548448, "grad_norm": 4.099528905650479, "learning_rate": 9.971947256454518e-06, "loss": 0.7909, "step": 78400 }, { "epoch": 1.3052889311977518, "grad_norm": 4.389600910607364, "learning_rate": 9.97163943083323e-06, "loss": 0.7908, "step": 78500 }, { "epoch": 1.306951724940659, "grad_norm": 4.706689369716256, "learning_rate": 9.971329930317313e-06, "loss": 0.8105, "step": 78600 }, { "epoch": 1.3086145186835663, "grad_norm": 4.664004880158954, "learning_rate": 9.971018755011031e-06, "loss": 0.7991, "step": 78700 }, { "epoch": 1.3102773124264733, "grad_norm": 3.872116052851584, "learning_rate": 9.970705905019216e-06, "loss": 0.8377, "step": 78800 }, { "epoch": 1.3119401061693805, "grad_norm": 4.366558649766099, "learning_rate": 9.970391380447265e-06, "loss": 0.8045, "step": 78900 }, { "epoch": 1.3136028999122877, "grad_norm": 3.9729681872381737, "learning_rate": 9.970075181401136e-06, "loss": 0.794, "step": 79000 }, { "epoch": 1.3152656936551947, "grad_norm": 3.2165524645881822, "learning_rate": 9.969757307987356e-06, "loss": 0.8259, "step": 79100 }, { "epoch": 1.316928487398102, "grad_norm": 3.7553469471332592, "learning_rate": 9.969437760313014e-06, "loss": 0.829, "step": 79200 }, { "epoch": 1.318591281141009, "grad_norm": 4.766162549524405, "learning_rate": 9.96911653848576e-06, "loss": 0.8079, "step": 79300 }, { "epoch": 1.3202540748839162, "grad_norm": 4.124606597731356, "learning_rate": 9.96879364261381e-06, "loss": 0.8318, "step": 79400 }, { "epoch": 1.3219168686268232, "grad_norm": 4.315056058870725, "learning_rate": 9.968469072805944e-06, "loss": 0.7942, "step": 79500 }, { "epoch": 1.3235796623697305, "grad_norm": 4.318227294483239, "learning_rate": 9.96814282917151e-06, "loss": 0.8307, "step": 79600 }, { "epoch": 1.3252424561126377, "grad_norm": 3.080458030722517, "learning_rate": 9.967814911820413e-06, "loss": 0.8236, "step": 79700 }, { "epoch": 1.3269052498555447, "grad_norm": 3.9862099284772055, "learning_rate": 9.967485320863126e-06, "loss": 0.8302, "step": 79800 }, { "epoch": 1.328568043598452, "grad_norm": 3.6416300634259424, "learning_rate": 9.967154056410684e-06, "loss": 0.8005, "step": 79900 }, { "epoch": 1.3302308373413592, "grad_norm": 3.5964197051710145, "learning_rate": 9.966821118574687e-06, "loss": 0.7826, "step": 80000 }, { "epoch": 1.3318936310842662, "grad_norm": 4.4848980105638185, "learning_rate": 9.966486507467298e-06, "loss": 0.7898, "step": 80100 }, { "epoch": 1.3335564248271734, "grad_norm": 3.5535300701384944, "learning_rate": 9.966150223201245e-06, "loss": 0.7916, "step": 80200 }, { "epoch": 1.3352192185700806, "grad_norm": 6.080567105587135, "learning_rate": 9.965812265889819e-06, "loss": 0.8272, "step": 80300 }, { "epoch": 1.3368820123129876, "grad_norm": 3.7320674968418466, "learning_rate": 9.965472635646873e-06, "loss": 0.7949, "step": 80400 }, { "epoch": 1.3385448060558949, "grad_norm": 3.722843060353167, "learning_rate": 9.965131332586826e-06, "loss": 0.8063, "step": 80500 }, { "epoch": 1.3402075997988019, "grad_norm": 3.320738951402475, "learning_rate": 9.964788356824659e-06, "loss": 0.8323, "step": 80600 }, { "epoch": 1.3418703935417091, "grad_norm": 4.893094979974242, "learning_rate": 9.964443708475918e-06, "loss": 0.7752, "step": 80700 }, { "epoch": 1.3435331872846161, "grad_norm": 5.214677071354442, "learning_rate": 9.96409738765671e-06, "loss": 0.7915, "step": 80800 }, { "epoch": 1.3451959810275234, "grad_norm": 3.8861568990616204, "learning_rate": 9.963749394483708e-06, "loss": 0.8157, "step": 80900 }, { "epoch": 1.3468587747704306, "grad_norm": 3.010353622502149, "learning_rate": 9.963399729074147e-06, "loss": 0.7807, "step": 81000 }, { "epoch": 1.3485215685133376, "grad_norm": 4.26531759597162, "learning_rate": 9.963048391545826e-06, "loss": 0.8141, "step": 81100 }, { "epoch": 1.3501843622562448, "grad_norm": 3.852918990875345, "learning_rate": 9.962695382017107e-06, "loss": 0.8162, "step": 81200 }, { "epoch": 1.351847155999152, "grad_norm": 3.1141048336468065, "learning_rate": 9.962340700606915e-06, "loss": 0.8104, "step": 81300 }, { "epoch": 1.353509949742059, "grad_norm": 3.220594698977834, "learning_rate": 9.961984347434736e-06, "loss": 0.8007, "step": 81400 }, { "epoch": 1.3551727434849663, "grad_norm": 3.7432874164064893, "learning_rate": 9.961626322620627e-06, "loss": 0.8184, "step": 81500 }, { "epoch": 1.3568355372278735, "grad_norm": 2.96539066047192, "learning_rate": 9.9612666262852e-06, "loss": 0.8048, "step": 81600 }, { "epoch": 1.3584983309707805, "grad_norm": 5.309822902460754, "learning_rate": 9.960905258549633e-06, "loss": 0.7714, "step": 81700 }, { "epoch": 1.3601611247136878, "grad_norm": 3.914083658790174, "learning_rate": 9.960542219535668e-06, "loss": 0.7725, "step": 81800 }, { "epoch": 1.3618239184565948, "grad_norm": 3.667553970182072, "learning_rate": 9.960177509365608e-06, "loss": 0.8255, "step": 81900 }, { "epoch": 1.363486712199502, "grad_norm": 4.509014241741513, "learning_rate": 9.959811128162322e-06, "loss": 0.8199, "step": 82000 }, { "epoch": 1.365149505942409, "grad_norm": 3.986985555165724, "learning_rate": 9.959443076049238e-06, "loss": 0.8007, "step": 82100 }, { "epoch": 1.3668122996853163, "grad_norm": 3.06932754861253, "learning_rate": 9.959073353150349e-06, "loss": 0.8003, "step": 82200 }, { "epoch": 1.3684750934282235, "grad_norm": 4.018527105292403, "learning_rate": 9.958701959590212e-06, "loss": 0.7636, "step": 82300 }, { "epoch": 1.3701378871711305, "grad_norm": 4.374507366389912, "learning_rate": 9.958328895493946e-06, "loss": 0.7892, "step": 82400 }, { "epoch": 1.3718006809140377, "grad_norm": 3.0275499078031864, "learning_rate": 9.95795416098723e-06, "loss": 0.7977, "step": 82500 }, { "epoch": 1.373463474656945, "grad_norm": 3.4506987990176854, "learning_rate": 9.957577756196312e-06, "loss": 0.7998, "step": 82600 }, { "epoch": 1.375126268399852, "grad_norm": 4.843915800179339, "learning_rate": 9.957199681247997e-06, "loss": 0.7815, "step": 82700 }, { "epoch": 1.3767890621427592, "grad_norm": 3.507624733162208, "learning_rate": 9.956819936269655e-06, "loss": 0.8117, "step": 82800 }, { "epoch": 1.3784518558856664, "grad_norm": 3.7133811691232754, "learning_rate": 9.956438521389217e-06, "loss": 0.8226, "step": 82900 }, { "epoch": 1.3801146496285734, "grad_norm": 4.456304529715196, "learning_rate": 9.95605543673518e-06, "loss": 0.8582, "step": 83000 }, { "epoch": 1.3817774433714807, "grad_norm": 5.779159372091458, "learning_rate": 9.955670682436598e-06, "loss": 0.7998, "step": 83100 }, { "epoch": 1.3834402371143877, "grad_norm": 3.457481726600979, "learning_rate": 9.955284258623095e-06, "loss": 0.8445, "step": 83200 }, { "epoch": 1.385103030857295, "grad_norm": 4.076990396424398, "learning_rate": 9.95489616542485e-06, "loss": 0.7683, "step": 83300 }, { "epoch": 1.386765824600202, "grad_norm": 4.170805247035064, "learning_rate": 9.954506402972607e-06, "loss": 0.8036, "step": 83400 }, { "epoch": 1.3884286183431092, "grad_norm": 3.717064832800998, "learning_rate": 9.954114971397676e-06, "loss": 0.7919, "step": 83500 }, { "epoch": 1.3900914120860164, "grad_norm": 3.735312261620411, "learning_rate": 9.953721870831925e-06, "loss": 0.7831, "step": 83600 }, { "epoch": 1.3917542058289234, "grad_norm": 3.2305866887763637, "learning_rate": 9.953327101407786e-06, "loss": 0.7636, "step": 83700 }, { "epoch": 1.3934169995718306, "grad_norm": 5.772440547394107, "learning_rate": 9.952930663258251e-06, "loss": 0.7863, "step": 83800 }, { "epoch": 1.3950797933147379, "grad_norm": 3.691510000695068, "learning_rate": 9.952532556516878e-06, "loss": 0.7579, "step": 83900 }, { "epoch": 1.3967425870576449, "grad_norm": 4.948677356669001, "learning_rate": 9.952132781317785e-06, "loss": 0.7681, "step": 84000 }, { "epoch": 1.398405380800552, "grad_norm": 3.183513650700687, "learning_rate": 9.95173133779565e-06, "loss": 0.7696, "step": 84100 }, { "epoch": 1.4000681745434593, "grad_norm": 3.5285046416049166, "learning_rate": 9.951328226085718e-06, "loss": 0.7747, "step": 84200 }, { "epoch": 1.4017309682863663, "grad_norm": 4.226096944062637, "learning_rate": 9.950923446323792e-06, "loss": 0.8082, "step": 84300 }, { "epoch": 1.4033937620292734, "grad_norm": 3.5006440008016075, "learning_rate": 9.950516998646239e-06, "loss": 0.799, "step": 84400 }, { "epoch": 1.4050565557721806, "grad_norm": 3.75172219660176, "learning_rate": 9.950108883189984e-06, "loss": 0.8172, "step": 84500 }, { "epoch": 1.4067193495150878, "grad_norm": 3.6033557839438726, "learning_rate": 9.949699100092521e-06, "loss": 0.7681, "step": 84600 }, { "epoch": 1.4083821432579948, "grad_norm": 4.26892457437623, "learning_rate": 9.949287649491898e-06, "loss": 0.7538, "step": 84700 }, { "epoch": 1.410044937000902, "grad_norm": 4.121435680380036, "learning_rate": 9.948874531526733e-06, "loss": 0.8083, "step": 84800 }, { "epoch": 1.4117077307438093, "grad_norm": 4.284890279652193, "learning_rate": 9.948459746336198e-06, "loss": 0.7999, "step": 84900 }, { "epoch": 1.4133705244867163, "grad_norm": 4.328408029077744, "learning_rate": 9.94804329406003e-06, "loss": 0.7853, "step": 85000 }, { "epoch": 1.4150333182296235, "grad_norm": 3.5704192024689596, "learning_rate": 9.947625174838528e-06, "loss": 0.7812, "step": 85100 }, { "epoch": 1.4166961119725308, "grad_norm": 3.69092386971968, "learning_rate": 9.947205388812554e-06, "loss": 0.7777, "step": 85200 }, { "epoch": 1.4183589057154378, "grad_norm": 3.8876034515733275, "learning_rate": 9.946783936123525e-06, "loss": 0.8047, "step": 85300 }, { "epoch": 1.420021699458345, "grad_norm": 3.665695253926027, "learning_rate": 9.946360816913429e-06, "loss": 0.7942, "step": 85400 }, { "epoch": 1.421684493201252, "grad_norm": 5.07078047694962, "learning_rate": 9.945936031324807e-06, "loss": 0.8202, "step": 85500 }, { "epoch": 1.4233472869441592, "grad_norm": 3.2025322133879133, "learning_rate": 9.945509579500768e-06, "loss": 0.7772, "step": 85600 }, { "epoch": 1.4250100806870662, "grad_norm": 4.50238700247485, "learning_rate": 9.945081461584977e-06, "loss": 0.792, "step": 85700 }, { "epoch": 1.4266728744299735, "grad_norm": 3.962627220237532, "learning_rate": 9.944651677721663e-06, "loss": 0.812, "step": 85800 }, { "epoch": 1.4283356681728807, "grad_norm": 3.389746517763239, "learning_rate": 9.944220228055618e-06, "loss": 0.7961, "step": 85900 }, { "epoch": 1.4299984619157877, "grad_norm": 3.19047444602814, "learning_rate": 9.94378711273219e-06, "loss": 0.7899, "step": 86000 }, { "epoch": 1.431661255658695, "grad_norm": 4.377870060324949, "learning_rate": 9.943352331897291e-06, "loss": 0.7873, "step": 86100 }, { "epoch": 1.4333240494016022, "grad_norm": 4.005558664091523, "learning_rate": 9.942915885697398e-06, "loss": 0.7681, "step": 86200 }, { "epoch": 1.4349868431445092, "grad_norm": 3.454292926538536, "learning_rate": 9.94247777427954e-06, "loss": 0.7927, "step": 86300 }, { "epoch": 1.4366496368874164, "grad_norm": 3.8723074230692833, "learning_rate": 9.942037997791315e-06, "loss": 0.7727, "step": 86400 }, { "epoch": 1.4383124306303237, "grad_norm": 3.8025222014756244, "learning_rate": 9.941596556380882e-06, "loss": 0.7604, "step": 86500 }, { "epoch": 1.4399752243732307, "grad_norm": 3.2367132543067587, "learning_rate": 9.941153450196951e-06, "loss": 0.7739, "step": 86600 }, { "epoch": 1.441638018116138, "grad_norm": 4.0339185702816165, "learning_rate": 9.940708679388808e-06, "loss": 0.7651, "step": 86700 }, { "epoch": 1.443300811859045, "grad_norm": 3.0235387012131323, "learning_rate": 9.940262244106288e-06, "loss": 0.7666, "step": 86800 }, { "epoch": 1.4449636056019521, "grad_norm": 4.72098564979634, "learning_rate": 9.939814144499788e-06, "loss": 0.7671, "step": 86900 }, { "epoch": 1.4466263993448591, "grad_norm": 3.2881982331667143, "learning_rate": 9.939364380720273e-06, "loss": 0.7921, "step": 87000 }, { "epoch": 1.4482891930877664, "grad_norm": 3.74880501638505, "learning_rate": 9.93891295291926e-06, "loss": 0.8028, "step": 87100 }, { "epoch": 1.4499519868306736, "grad_norm": 3.542652846940566, "learning_rate": 9.938459861248833e-06, "loss": 0.7789, "step": 87200 }, { "epoch": 1.4516147805735806, "grad_norm": 3.570755739519519, "learning_rate": 9.938005105861633e-06, "loss": 0.7947, "step": 87300 }, { "epoch": 1.4532775743164879, "grad_norm": 3.623640582672003, "learning_rate": 9.937548686910861e-06, "loss": 0.7703, "step": 87400 }, { "epoch": 1.454940368059395, "grad_norm": 4.588997948333026, "learning_rate": 9.937090604550284e-06, "loss": 0.8043, "step": 87500 }, { "epoch": 1.456603161802302, "grad_norm": 4.427037941222024, "learning_rate": 9.93663085893422e-06, "loss": 0.7802, "step": 87600 }, { "epoch": 1.4582659555452093, "grad_norm": 5.2953056564188605, "learning_rate": 9.936169450217556e-06, "loss": 0.7794, "step": 87700 }, { "epoch": 1.4599287492881166, "grad_norm": 3.392085594156179, "learning_rate": 9.935706378555735e-06, "loss": 0.8037, "step": 87800 }, { "epoch": 1.4615915430310236, "grad_norm": 3.573508216644682, "learning_rate": 9.935241644104761e-06, "loss": 0.7638, "step": 87900 }, { "epoch": 1.4632543367739308, "grad_norm": 3.310903275512932, "learning_rate": 9.9347752470212e-06, "loss": 0.7791, "step": 88000 }, { "epoch": 1.4649171305168378, "grad_norm": 3.644730457506361, "learning_rate": 9.934307187462174e-06, "loss": 0.7865, "step": 88100 }, { "epoch": 1.466579924259745, "grad_norm": 4.578513615159866, "learning_rate": 9.933837465585367e-06, "loss": 0.7804, "step": 88200 }, { "epoch": 1.468242718002652, "grad_norm": 4.1970516493836465, "learning_rate": 9.933366081549024e-06, "loss": 0.7749, "step": 88300 }, { "epoch": 1.4699055117455593, "grad_norm": 4.086272691551801, "learning_rate": 9.932893035511953e-06, "loss": 0.7898, "step": 88400 }, { "epoch": 1.4715683054884665, "grad_norm": 4.7228652079941025, "learning_rate": 9.932418327633515e-06, "loss": 0.7896, "step": 88500 }, { "epoch": 1.4732310992313735, "grad_norm": 3.6143259622343673, "learning_rate": 9.931941958073635e-06, "loss": 0.77, "step": 88600 }, { "epoch": 1.4748938929742808, "grad_norm": 2.814557092886022, "learning_rate": 9.931463926992797e-06, "loss": 0.7783, "step": 88700 }, { "epoch": 1.476556686717188, "grad_norm": 4.132600064594909, "learning_rate": 9.930984234552046e-06, "loss": 0.789, "step": 88800 }, { "epoch": 1.478219480460095, "grad_norm": 4.223105090169592, "learning_rate": 9.930502880912982e-06, "loss": 0.7732, "step": 88900 }, { "epoch": 1.4798822742030022, "grad_norm": 3.218416346798435, "learning_rate": 9.930019866237773e-06, "loss": 0.8083, "step": 89000 }, { "epoch": 1.4815450679459095, "grad_norm": 3.78850201359491, "learning_rate": 9.929535190689137e-06, "loss": 0.7644, "step": 89100 }, { "epoch": 1.4832078616888165, "grad_norm": 4.690078098917524, "learning_rate": 9.92904885443036e-06, "loss": 0.7765, "step": 89200 }, { "epoch": 1.4848706554317235, "grad_norm": 3.333453249456671, "learning_rate": 9.928560857625281e-06, "loss": 0.79, "step": 89300 }, { "epoch": 1.4865334491746307, "grad_norm": 4.197244139646398, "learning_rate": 9.928071200438304e-06, "loss": 0.7538, "step": 89400 }, { "epoch": 1.488196242917538, "grad_norm": 4.190088749325538, "learning_rate": 9.927579883034386e-06, "loss": 0.7844, "step": 89500 }, { "epoch": 1.489859036660445, "grad_norm": 4.609226779765057, "learning_rate": 9.927086905579048e-06, "loss": 0.8012, "step": 89600 }, { "epoch": 1.4915218304033522, "grad_norm": 3.3126148655619505, "learning_rate": 9.926592268238372e-06, "loss": 0.8152, "step": 89700 }, { "epoch": 1.4931846241462594, "grad_norm": 3.092653174763841, "learning_rate": 9.926095971178993e-06, "loss": 0.7697, "step": 89800 }, { "epoch": 1.4948474178891664, "grad_norm": 2.631006787142747, "learning_rate": 9.92559801456811e-06, "loss": 0.7619, "step": 89900 }, { "epoch": 1.4965102116320737, "grad_norm": 3.833896076269967, "learning_rate": 9.925098398573478e-06, "loss": 0.7657, "step": 90000 }, { "epoch": 1.4981730053749809, "grad_norm": 5.968968345215898, "learning_rate": 9.924597123363415e-06, "loss": 0.7544, "step": 90100 }, { "epoch": 1.499835799117888, "grad_norm": 3.3961306160537905, "learning_rate": 9.924094189106793e-06, "loss": 0.7687, "step": 90200 }, { "epoch": 1.501498592860795, "grad_norm": 3.9349016601469153, "learning_rate": 9.923589595973045e-06, "loss": 0.786, "step": 90300 }, { "epoch": 1.5031613866037024, "grad_norm": 3.322763328194784, "learning_rate": 9.923083344132166e-06, "loss": 0.7897, "step": 90400 }, { "epoch": 1.5048241803466094, "grad_norm": 3.1689946759445253, "learning_rate": 9.922575433754707e-06, "loss": 0.757, "step": 90500 }, { "epoch": 1.5064869740895164, "grad_norm": 3.402887453129489, "learning_rate": 9.922065865011774e-06, "loss": 0.7781, "step": 90600 }, { "epoch": 1.5081497678324236, "grad_norm": 4.092730392651357, "learning_rate": 9.92155463807504e-06, "loss": 0.7919, "step": 90700 }, { "epoch": 1.5098125615753308, "grad_norm": 3.236681590000138, "learning_rate": 9.921041753116728e-06, "loss": 0.8009, "step": 90800 }, { "epoch": 1.5114753553182378, "grad_norm": 3.326886439740561, "learning_rate": 9.920527210309629e-06, "loss": 0.7765, "step": 90900 }, { "epoch": 1.513138149061145, "grad_norm": 3.589624246082719, "learning_rate": 9.920011009827083e-06, "loss": 0.7728, "step": 91000 }, { "epoch": 1.5148009428040523, "grad_norm": 3.6352060990798214, "learning_rate": 9.919493151842993e-06, "loss": 0.78, "step": 91100 }, { "epoch": 1.5164637365469593, "grad_norm": 5.6658204598876, "learning_rate": 9.918973636531823e-06, "loss": 0.8099, "step": 91200 }, { "epoch": 1.5181265302898665, "grad_norm": 4.8102305637551614, "learning_rate": 9.918452464068588e-06, "loss": 0.7845, "step": 91300 }, { "epoch": 1.5197893240327738, "grad_norm": 2.974457103976038, "learning_rate": 9.91792963462887e-06, "loss": 0.7958, "step": 91400 }, { "epoch": 1.5214521177756808, "grad_norm": 3.328841064729169, "learning_rate": 9.917405148388801e-06, "loss": 0.7554, "step": 91500 }, { "epoch": 1.5231149115185878, "grad_norm": 3.4903133247739477, "learning_rate": 9.916879005525078e-06, "loss": 0.7948, "step": 91600 }, { "epoch": 1.5247777052614953, "grad_norm": 5.058129340850285, "learning_rate": 9.916351206214953e-06, "loss": 0.8137, "step": 91700 }, { "epoch": 1.5264404990044023, "grad_norm": 3.523313468512379, "learning_rate": 9.915821750636234e-06, "loss": 0.7836, "step": 91800 }, { "epoch": 1.5281032927473093, "grad_norm": 2.8341692592413366, "learning_rate": 9.91529063896729e-06, "loss": 0.7431, "step": 91900 }, { "epoch": 1.5297660864902165, "grad_norm": 2.8936943581663304, "learning_rate": 9.91475787138705e-06, "loss": 0.753, "step": 92000 }, { "epoch": 1.5314288802331237, "grad_norm": 3.9936483613281064, "learning_rate": 9.914223448074995e-06, "loss": 0.8055, "step": 92100 }, { "epoch": 1.5330916739760307, "grad_norm": 3.2495170148806496, "learning_rate": 9.913687369211166e-06, "loss": 0.7748, "step": 92200 }, { "epoch": 1.534754467718938, "grad_norm": 4.17825831053082, "learning_rate": 9.913149634976164e-06, "loss": 0.7897, "step": 92300 }, { "epoch": 1.5364172614618452, "grad_norm": 4.391653767478127, "learning_rate": 9.912610245551148e-06, "loss": 0.7643, "step": 92400 }, { "epoch": 1.5380800552047522, "grad_norm": 4.330592919966566, "learning_rate": 9.912069201117829e-06, "loss": 0.7933, "step": 92500 }, { "epoch": 1.5397428489476594, "grad_norm": 4.28866431310378, "learning_rate": 9.91152650185848e-06, "loss": 0.7589, "step": 92600 }, { "epoch": 1.5414056426905667, "grad_norm": 4.12015251756233, "learning_rate": 9.910982147955937e-06, "loss": 0.7952, "step": 92700 }, { "epoch": 1.5430684364334737, "grad_norm": 4.335101603600038, "learning_rate": 9.910436139593579e-06, "loss": 0.7904, "step": 92800 }, { "epoch": 1.5447312301763807, "grad_norm": 4.113173303517111, "learning_rate": 9.909888476955354e-06, "loss": 0.796, "step": 92900 }, { "epoch": 1.5463940239192882, "grad_norm": 4.651140758807762, "learning_rate": 9.909339160225766e-06, "loss": 0.7938, "step": 93000 }, { "epoch": 1.5480568176621952, "grad_norm": 5.237251231786318, "learning_rate": 9.908788189589868e-06, "loss": 0.7885, "step": 93100 }, { "epoch": 1.5497196114051022, "grad_norm": 5.6337382106317255, "learning_rate": 9.908235565233283e-06, "loss": 0.7683, "step": 93200 }, { "epoch": 1.5513824051480094, "grad_norm": 5.062786857556366, "learning_rate": 9.907681287342183e-06, "loss": 0.7534, "step": 93300 }, { "epoch": 1.5530451988909166, "grad_norm": 3.820293552923005, "learning_rate": 9.907125356103297e-06, "loss": 0.7832, "step": 93400 }, { "epoch": 1.5547079926338236, "grad_norm": 6.090160898491393, "learning_rate": 9.906567771703914e-06, "loss": 0.7692, "step": 93500 }, { "epoch": 1.5563707863767309, "grad_norm": 3.5637119487532787, "learning_rate": 9.906008534331876e-06, "loss": 0.7987, "step": 93600 }, { "epoch": 1.558033580119638, "grad_norm": 5.6694261196670075, "learning_rate": 9.905447644175587e-06, "loss": 0.7931, "step": 93700 }, { "epoch": 1.5596963738625451, "grad_norm": 3.9210545127920637, "learning_rate": 9.904885101424004e-06, "loss": 0.7833, "step": 93800 }, { "epoch": 1.5613591676054523, "grad_norm": 3.0429657861750763, "learning_rate": 9.904320906266643e-06, "loss": 0.7902, "step": 93900 }, { "epoch": 1.5630219613483596, "grad_norm": 3.754229620748171, "learning_rate": 9.903755058893574e-06, "loss": 0.7662, "step": 94000 }, { "epoch": 1.5646847550912666, "grad_norm": 3.3145877875245273, "learning_rate": 9.903187559495426e-06, "loss": 0.7945, "step": 94100 }, { "epoch": 1.5663475488341736, "grad_norm": 4.8926266460137, "learning_rate": 9.902618408263385e-06, "loss": 0.7766, "step": 94200 }, { "epoch": 1.5680103425770808, "grad_norm": 2.596511863899627, "learning_rate": 9.902047605389189e-06, "loss": 0.7568, "step": 94300 }, { "epoch": 1.569673136319988, "grad_norm": 4.004396995659401, "learning_rate": 9.901475151065138e-06, "loss": 0.739, "step": 94400 }, { "epoch": 1.571335930062895, "grad_norm": 4.874817091392248, "learning_rate": 9.900901045484087e-06, "loss": 0.7699, "step": 94500 }, { "epoch": 1.5729987238058023, "grad_norm": 3.7066420138784557, "learning_rate": 9.900325288839444e-06, "loss": 0.7773, "step": 94600 }, { "epoch": 1.5746615175487095, "grad_norm": 3.604759864681537, "learning_rate": 9.899747881325177e-06, "loss": 0.747, "step": 94700 }, { "epoch": 1.5763243112916165, "grad_norm": 3.008379154878687, "learning_rate": 9.899168823135807e-06, "loss": 0.7669, "step": 94800 }, { "epoch": 1.5779871050345238, "grad_norm": 3.0391731758811322, "learning_rate": 9.898588114466417e-06, "loss": 0.7843, "step": 94900 }, { "epoch": 1.579649898777431, "grad_norm": 3.763471553430544, "learning_rate": 9.898005755512636e-06, "loss": 0.7465, "step": 95000 }, { "epoch": 1.581312692520338, "grad_norm": 4.467719160375297, "learning_rate": 9.897421746470658e-06, "loss": 0.7312, "step": 95100 }, { "epoch": 1.5829754862632452, "grad_norm": 3.775229663802923, "learning_rate": 9.89683608753723e-06, "loss": 0.7484, "step": 95200 }, { "epoch": 1.5846382800061525, "grad_norm": 4.706030085521457, "learning_rate": 9.896248778909653e-06, "loss": 0.7744, "step": 95300 }, { "epoch": 1.5863010737490595, "grad_norm": 4.201678645808538, "learning_rate": 9.895659820785789e-06, "loss": 0.7419, "step": 95400 }, { "epoch": 1.5879638674919665, "grad_norm": 4.321149490186043, "learning_rate": 9.895069213364045e-06, "loss": 0.7233, "step": 95500 }, { "epoch": 1.5896266612348737, "grad_norm": 4.375189375828417, "learning_rate": 9.894476956843399e-06, "loss": 0.7853, "step": 95600 }, { "epoch": 1.591289454977781, "grad_norm": 3.3189753710540737, "learning_rate": 9.893883051423369e-06, "loss": 0.7703, "step": 95700 }, { "epoch": 1.592952248720688, "grad_norm": 3.1682198411648774, "learning_rate": 9.893287497304041e-06, "loss": 0.7995, "step": 95800 }, { "epoch": 1.5946150424635952, "grad_norm": 4.282852223226291, "learning_rate": 9.892690294686046e-06, "loss": 0.7753, "step": 95900 }, { "epoch": 1.5962778362065024, "grad_norm": 4.222485299496144, "learning_rate": 9.89209144377058e-06, "loss": 0.7633, "step": 96000 }, { "epoch": 1.5979406299494094, "grad_norm": 2.998118364951193, "learning_rate": 9.891490944759388e-06, "loss": 0.7688, "step": 96100 }, { "epoch": 1.5996034236923167, "grad_norm": 4.187760219766308, "learning_rate": 9.890888797854771e-06, "loss": 0.7737, "step": 96200 }, { "epoch": 1.601266217435224, "grad_norm": 3.5240243437396868, "learning_rate": 9.890285003259588e-06, "loss": 0.7387, "step": 96300 }, { "epoch": 1.602929011178131, "grad_norm": 4.045441369829308, "learning_rate": 9.88967956117725e-06, "loss": 0.7534, "step": 96400 }, { "epoch": 1.604591804921038, "grad_norm": 3.4594610656753293, "learning_rate": 9.889072471811724e-06, "loss": 0.7688, "step": 96500 }, { "epoch": 1.6062545986639454, "grad_norm": 3.729118706443404, "learning_rate": 9.888463735367532e-06, "loss": 0.8119, "step": 96600 }, { "epoch": 1.6079173924068524, "grad_norm": 2.7963888567458115, "learning_rate": 9.887853352049755e-06, "loss": 0.7456, "step": 96700 }, { "epoch": 1.6095801861497594, "grad_norm": 3.586498808565558, "learning_rate": 9.88724132206402e-06, "loss": 0.7495, "step": 96800 }, { "epoch": 1.6112429798926666, "grad_norm": 4.835899309281381, "learning_rate": 9.886627645616515e-06, "loss": 0.7534, "step": 96900 }, { "epoch": 1.6129057736355739, "grad_norm": 4.259340883879215, "learning_rate": 9.886012322913984e-06, "loss": 0.7718, "step": 97000 }, { "epoch": 1.6145685673784809, "grad_norm": 3.0871322552145934, "learning_rate": 9.88539535416372e-06, "loss": 0.7835, "step": 97100 }, { "epoch": 1.616231361121388, "grad_norm": 3.9857591507122274, "learning_rate": 9.884776739573574e-06, "loss": 0.7754, "step": 97200 }, { "epoch": 1.6178941548642953, "grad_norm": 4.439460868021525, "learning_rate": 9.884156479351951e-06, "loss": 0.7571, "step": 97300 }, { "epoch": 1.6195569486072023, "grad_norm": 3.3869795740964155, "learning_rate": 9.88353457370781e-06, "loss": 0.7667, "step": 97400 }, { "epoch": 1.6212197423501096, "grad_norm": 3.6492504950205604, "learning_rate": 9.882911022850667e-06, "loss": 0.7387, "step": 97500 }, { "epoch": 1.6228825360930168, "grad_norm": 4.0750690385275865, "learning_rate": 9.882285826990587e-06, "loss": 0.7711, "step": 97600 }, { "epoch": 1.6245453298359238, "grad_norm": 4.093455240036983, "learning_rate": 9.881658986338194e-06, "loss": 0.7446, "step": 97700 }, { "epoch": 1.6262081235788308, "grad_norm": 3.9284267359307283, "learning_rate": 9.881030501104662e-06, "loss": 0.7895, "step": 97800 }, { "epoch": 1.6278709173217383, "grad_norm": 3.2337031925403843, "learning_rate": 9.880400371501723e-06, "loss": 0.7821, "step": 97900 }, { "epoch": 1.6295337110646453, "grad_norm": 2.379372253841022, "learning_rate": 9.879768597741663e-06, "loss": 0.7588, "step": 98000 }, { "epoch": 1.6311965048075523, "grad_norm": 3.806395651783701, "learning_rate": 9.879135180037316e-06, "loss": 0.7488, "step": 98100 }, { "epoch": 1.6328592985504595, "grad_norm": 3.9272942204803503, "learning_rate": 9.878500118602079e-06, "loss": 0.7553, "step": 98200 }, { "epoch": 1.6345220922933668, "grad_norm": 4.103943325622184, "learning_rate": 9.877863413649893e-06, "loss": 0.7373, "step": 98300 }, { "epoch": 1.6361848860362738, "grad_norm": 4.271193682359871, "learning_rate": 9.87722506539526e-06, "loss": 0.7759, "step": 98400 }, { "epoch": 1.637847679779181, "grad_norm": 3.190211028041944, "learning_rate": 9.876585074053231e-06, "loss": 0.7872, "step": 98500 }, { "epoch": 1.6395104735220882, "grad_norm": 4.217003506434438, "learning_rate": 9.875943439839414e-06, "loss": 0.7245, "step": 98600 }, { "epoch": 1.6411732672649952, "grad_norm": 3.5518388367082747, "learning_rate": 9.87530016296997e-06, "loss": 0.7699, "step": 98700 }, { "epoch": 1.6428360610079025, "grad_norm": 3.789137669888503, "learning_rate": 9.87465524366161e-06, "loss": 0.7662, "step": 98800 }, { "epoch": 1.6444988547508097, "grad_norm": 4.177649719451542, "learning_rate": 9.874008682131602e-06, "loss": 0.775, "step": 98900 }, { "epoch": 1.6461616484937167, "grad_norm": 3.9418553551951248, "learning_rate": 9.873360478597766e-06, "loss": 0.77, "step": 99000 }, { "epoch": 1.6478244422366237, "grad_norm": 3.0068372730566724, "learning_rate": 9.872710633278478e-06, "loss": 0.7805, "step": 99100 }, { "epoch": 1.6494872359795312, "grad_norm": 3.788762889720705, "learning_rate": 9.872059146392658e-06, "loss": 0.7643, "step": 99200 }, { "epoch": 1.6511500297224382, "grad_norm": 3.73263790323488, "learning_rate": 9.87140601815979e-06, "loss": 0.8055, "step": 99300 }, { "epoch": 1.6528128234653452, "grad_norm": 4.125011901955223, "learning_rate": 9.870751248799905e-06, "loss": 0.7982, "step": 99400 }, { "epoch": 1.6544756172082524, "grad_norm": 3.4770021674445406, "learning_rate": 9.870094838533587e-06, "loss": 0.7473, "step": 99500 }, { "epoch": 1.6561384109511597, "grad_norm": 3.576998179115207, "learning_rate": 9.869436787581976e-06, "loss": 0.7567, "step": 99600 }, { "epoch": 1.6578012046940667, "grad_norm": 3.401773266511048, "learning_rate": 9.868777096166762e-06, "loss": 0.8024, "step": 99700 }, { "epoch": 1.659463998436974, "grad_norm": 3.6961384076843804, "learning_rate": 9.868115764510189e-06, "loss": 0.7459, "step": 99800 }, { "epoch": 1.6611267921798811, "grad_norm": 3.64010023307875, "learning_rate": 9.86745279283505e-06, "loss": 0.7654, "step": 99900 }, { "epoch": 1.6627895859227881, "grad_norm": 2.829736483432069, "learning_rate": 9.866788181364696e-06, "loss": 0.7587, "step": 100000 }, { "epoch": 1.6644523796656954, "grad_norm": 3.5192487008929083, "learning_rate": 9.866121930323028e-06, "loss": 0.7796, "step": 100100 }, { "epoch": 1.6661151734086026, "grad_norm": 3.629937742692836, "learning_rate": 9.865454039934496e-06, "loss": 0.7566, "step": 100200 }, { "epoch": 1.6677779671515096, "grad_norm": 5.922875307184434, "learning_rate": 9.864784510424108e-06, "loss": 0.7628, "step": 100300 }, { "epoch": 1.6694407608944166, "grad_norm": 3.29958843275615, "learning_rate": 9.864113342017423e-06, "loss": 0.7334, "step": 100400 }, { "epoch": 1.6711035546373239, "grad_norm": 3.29621995760342, "learning_rate": 9.86344053494055e-06, "loss": 0.7606, "step": 100500 }, { "epoch": 1.672766348380231, "grad_norm": 2.9358900396765764, "learning_rate": 9.862766089420147e-06, "loss": 0.7723, "step": 100600 }, { "epoch": 1.674429142123138, "grad_norm": 2.896378044492428, "learning_rate": 9.862090005683433e-06, "loss": 0.7574, "step": 100700 }, { "epoch": 1.6760919358660453, "grad_norm": 3.5389802988881236, "learning_rate": 9.861412283958171e-06, "loss": 0.7483, "step": 100800 }, { "epoch": 1.6777547296089526, "grad_norm": 4.215025433321102, "learning_rate": 9.860732924472678e-06, "loss": 0.7403, "step": 100900 }, { "epoch": 1.6794175233518596, "grad_norm": 4.055163145724576, "learning_rate": 9.860051927455827e-06, "loss": 0.7609, "step": 101000 }, { "epoch": 1.6810803170947668, "grad_norm": 3.914274018155986, "learning_rate": 9.859369293137035e-06, "loss": 0.7627, "step": 101100 }, { "epoch": 1.682743110837674, "grad_norm": 3.7314335059263692, "learning_rate": 9.858685021746275e-06, "loss": 0.7346, "step": 101200 }, { "epoch": 1.684405904580581, "grad_norm": 4.312072011447376, "learning_rate": 9.857999113514074e-06, "loss": 0.7636, "step": 101300 }, { "epoch": 1.686068698323488, "grad_norm": 3.5707298492114585, "learning_rate": 9.857311568671503e-06, "loss": 0.759, "step": 101400 }, { "epoch": 1.6877314920663955, "grad_norm": 4.184173629292376, "learning_rate": 9.856622387450192e-06, "loss": 0.7574, "step": 101500 }, { "epoch": 1.6893942858093025, "grad_norm": 4.15579068573827, "learning_rate": 9.85593157008232e-06, "loss": 0.7268, "step": 101600 }, { "epoch": 1.6910570795522095, "grad_norm": 3.7596181842722163, "learning_rate": 9.855239116800612e-06, "loss": 0.7839, "step": 101700 }, { "epoch": 1.6927198732951168, "grad_norm": 2.98060814335271, "learning_rate": 9.854545027838354e-06, "loss": 0.7271, "step": 101800 }, { "epoch": 1.694382667038024, "grad_norm": 3.190088044762649, "learning_rate": 9.853849303429373e-06, "loss": 0.7251, "step": 101900 }, { "epoch": 1.696045460780931, "grad_norm": 3.812429371612506, "learning_rate": 9.853151943808054e-06, "loss": 0.7486, "step": 102000 }, { "epoch": 1.6977082545238382, "grad_norm": 3.255335157101712, "learning_rate": 9.852452949209329e-06, "loss": 0.7528, "step": 102100 }, { "epoch": 1.6993710482667455, "grad_norm": 3.4173856311950526, "learning_rate": 9.851752319868682e-06, "loss": 0.7533, "step": 102200 }, { "epoch": 1.7010338420096525, "grad_norm": 4.45193559034386, "learning_rate": 9.85105005602215e-06, "loss": 0.7639, "step": 102300 }, { "epoch": 1.7026966357525597, "grad_norm": 3.4191146561551258, "learning_rate": 9.850346157906317e-06, "loss": 0.7037, "step": 102400 }, { "epoch": 1.704359429495467, "grad_norm": 3.766327535270543, "learning_rate": 9.849640625758318e-06, "loss": 0.771, "step": 102500 }, { "epoch": 1.706022223238374, "grad_norm": 3.4005622015603696, "learning_rate": 9.848933459815842e-06, "loss": 0.7659, "step": 102600 }, { "epoch": 1.707685016981281, "grad_norm": 3.449611500493808, "learning_rate": 9.848224660317123e-06, "loss": 0.7564, "step": 102700 }, { "epoch": 1.7093478107241884, "grad_norm": 5.6821813781652795, "learning_rate": 9.847514227500952e-06, "loss": 0.7728, "step": 102800 }, { "epoch": 1.7110106044670954, "grad_norm": 4.038543855437762, "learning_rate": 9.846802161606662e-06, "loss": 0.7339, "step": 102900 }, { "epoch": 1.7126733982100024, "grad_norm": 3.896742786266056, "learning_rate": 9.846088462874147e-06, "loss": 0.748, "step": 103000 }, { "epoch": 1.7143361919529096, "grad_norm": 2.9373660373520267, "learning_rate": 9.845373131543839e-06, "loss": 0.7641, "step": 103100 }, { "epoch": 1.7159989856958169, "grad_norm": 4.67619393323598, "learning_rate": 9.844656167856728e-06, "loss": 0.742, "step": 103200 }, { "epoch": 1.717661779438724, "grad_norm": 3.7352678287163927, "learning_rate": 9.843937572054352e-06, "loss": 0.7484, "step": 103300 }, { "epoch": 1.7193245731816311, "grad_norm": 2.8581803144933953, "learning_rate": 9.843217344378798e-06, "loss": 0.7529, "step": 103400 }, { "epoch": 1.7209873669245384, "grad_norm": 3.797946023681648, "learning_rate": 9.842495485072703e-06, "loss": 0.7561, "step": 103500 }, { "epoch": 1.7226501606674454, "grad_norm": 3.938845721838936, "learning_rate": 9.841771994379257e-06, "loss": 0.7382, "step": 103600 }, { "epoch": 1.7243129544103526, "grad_norm": 3.842076823931244, "learning_rate": 9.841046872542192e-06, "loss": 0.7538, "step": 103700 }, { "epoch": 1.7259757481532598, "grad_norm": 3.535011028105942, "learning_rate": 9.840320119805798e-06, "loss": 0.7257, "step": 103800 }, { "epoch": 1.7276385418961668, "grad_norm": 2.8322991446814076, "learning_rate": 9.83959173641491e-06, "loss": 0.7679, "step": 103900 }, { "epoch": 1.7293013356390738, "grad_norm": 3.1123975376493522, "learning_rate": 9.83886172261491e-06, "loss": 0.7726, "step": 104000 }, { "epoch": 1.7309641293819813, "grad_norm": 2.632303147355754, "learning_rate": 9.838130078651736e-06, "loss": 0.7402, "step": 104100 }, { "epoch": 1.7326269231248883, "grad_norm": 2.9552849877871186, "learning_rate": 9.837396804771869e-06, "loss": 0.7388, "step": 104200 }, { "epoch": 1.7342897168677953, "grad_norm": 2.806843607801589, "learning_rate": 9.836661901222343e-06, "loss": 0.7295, "step": 104300 }, { "epoch": 1.7359525106107025, "grad_norm": 3.0947644695223873, "learning_rate": 9.835925368250739e-06, "loss": 0.748, "step": 104400 }, { "epoch": 1.7376153043536098, "grad_norm": 3.483735601366882, "learning_rate": 9.835187206105186e-06, "loss": 0.7745, "step": 104500 }, { "epoch": 1.7392780980965168, "grad_norm": 3.7176073146169197, "learning_rate": 9.834447415034364e-06, "loss": 0.753, "step": 104600 }, { "epoch": 1.740940891839424, "grad_norm": 5.287526418311282, "learning_rate": 9.833705995287502e-06, "loss": 0.7222, "step": 104700 }, { "epoch": 1.7426036855823313, "grad_norm": 3.722013829365452, "learning_rate": 9.832962947114376e-06, "loss": 0.7489, "step": 104800 }, { "epoch": 1.7442664793252383, "grad_norm": 3.2919832843294747, "learning_rate": 9.832218270765313e-06, "loss": 0.7581, "step": 104900 }, { "epoch": 1.7459292730681455, "grad_norm": 3.954704521610358, "learning_rate": 9.831471966491184e-06, "loss": 0.7049, "step": 105000 }, { "epoch": 1.7475920668110527, "grad_norm": 4.628720082148064, "learning_rate": 9.830724034543413e-06, "loss": 0.7395, "step": 105100 }, { "epoch": 1.7492548605539597, "grad_norm": 3.4910704639028842, "learning_rate": 9.829974475173972e-06, "loss": 0.7334, "step": 105200 }, { "epoch": 1.7509176542968667, "grad_norm": 3.382390621560173, "learning_rate": 9.829223288635376e-06, "loss": 0.754, "step": 105300 }, { "epoch": 1.752580448039774, "grad_norm": 4.015696714745177, "learning_rate": 9.828470475180696e-06, "loss": 0.7625, "step": 105400 }, { "epoch": 1.7542432417826812, "grad_norm": 3.402964794686038, "learning_rate": 9.827716035063545e-06, "loss": 0.7635, "step": 105500 }, { "epoch": 1.7559060355255882, "grad_norm": 5.360929018453376, "learning_rate": 9.826959968538086e-06, "loss": 0.7551, "step": 105600 }, { "epoch": 1.7575688292684954, "grad_norm": 4.324465434893772, "learning_rate": 9.826202275859032e-06, "loss": 0.7297, "step": 105700 }, { "epoch": 1.7592316230114027, "grad_norm": 4.139905387093327, "learning_rate": 9.825442957281642e-06, "loss": 0.7339, "step": 105800 }, { "epoch": 1.7608944167543097, "grad_norm": 3.2690994146624894, "learning_rate": 9.824682013061719e-06, "loss": 0.7485, "step": 105900 }, { "epoch": 1.762557210497217, "grad_norm": 3.5563463255273664, "learning_rate": 9.82391944345562e-06, "loss": 0.7752, "step": 106000 }, { "epoch": 1.7642200042401242, "grad_norm": 4.1523084194149344, "learning_rate": 9.823155248720249e-06, "loss": 0.7354, "step": 106100 }, { "epoch": 1.7658827979830312, "grad_norm": 3.017980633530474, "learning_rate": 9.822389429113053e-06, "loss": 0.7432, "step": 106200 }, { "epoch": 1.7675455917259382, "grad_norm": 3.1100993354286994, "learning_rate": 9.821621984892027e-06, "loss": 0.7545, "step": 106300 }, { "epoch": 1.7692083854688456, "grad_norm": 4.990297600138578, "learning_rate": 9.820852916315719e-06, "loss": 0.73, "step": 106400 }, { "epoch": 1.7708711792117526, "grad_norm": 4.565806435447591, "learning_rate": 9.820082223643217e-06, "loss": 0.7393, "step": 106500 }, { "epoch": 1.7725339729546596, "grad_norm": 4.017572069148186, "learning_rate": 9.819309907134162e-06, "loss": 0.7291, "step": 106600 }, { "epoch": 1.7741967666975669, "grad_norm": 2.904172393348025, "learning_rate": 9.818535967048739e-06, "loss": 0.7646, "step": 106700 }, { "epoch": 1.775859560440474, "grad_norm": 3.5369586138919007, "learning_rate": 9.817760403647678e-06, "loss": 0.737, "step": 106800 }, { "epoch": 1.7775223541833811, "grad_norm": 3.731598057657563, "learning_rate": 9.816983217192263e-06, "loss": 0.7269, "step": 106900 }, { "epoch": 1.7791851479262883, "grad_norm": 3.571953468118676, "learning_rate": 9.816204407944317e-06, "loss": 0.7303, "step": 107000 }, { "epoch": 1.7808479416691956, "grad_norm": 4.353232589150283, "learning_rate": 9.815423976166212e-06, "loss": 0.7536, "step": 107100 }, { "epoch": 1.7825107354121026, "grad_norm": 3.160971019576106, "learning_rate": 9.814641922120871e-06, "loss": 0.7194, "step": 107200 }, { "epoch": 1.7841735291550098, "grad_norm": 3.568131128822344, "learning_rate": 9.813858246071756e-06, "loss": 0.7274, "step": 107300 }, { "epoch": 1.785836322897917, "grad_norm": 4.674952032341042, "learning_rate": 9.813072948282884e-06, "loss": 0.7255, "step": 107400 }, { "epoch": 1.787499116640824, "grad_norm": 5.9545334838253385, "learning_rate": 9.812286029018811e-06, "loss": 0.7302, "step": 107500 }, { "epoch": 1.789161910383731, "grad_norm": 4.022663030949338, "learning_rate": 9.811497488544642e-06, "loss": 0.737, "step": 107600 }, { "epoch": 1.7908247041266385, "grad_norm": 5.106777240410953, "learning_rate": 9.810707327126028e-06, "loss": 0.7344, "step": 107700 }, { "epoch": 1.7924874978695455, "grad_norm": 4.586099606206273, "learning_rate": 9.809915545029169e-06, "loss": 0.7272, "step": 107800 }, { "epoch": 1.7941502916124525, "grad_norm": 4.232262945927058, "learning_rate": 9.809122142520806e-06, "loss": 0.7555, "step": 107900 }, { "epoch": 1.7958130853553598, "grad_norm": 3.7307633243066154, "learning_rate": 9.808327119868226e-06, "loss": 0.7754, "step": 108000 }, { "epoch": 1.797475879098267, "grad_norm": 2.7792888620790426, "learning_rate": 9.807530477339269e-06, "loss": 0.7425, "step": 108100 }, { "epoch": 1.799138672841174, "grad_norm": 3.7867608182039176, "learning_rate": 9.806732215202313e-06, "loss": 0.7529, "step": 108200 }, { "epoch": 1.8008014665840812, "grad_norm": 3.661757129796452, "learning_rate": 9.805932333726285e-06, "loss": 0.7167, "step": 108300 }, { "epoch": 1.8024642603269885, "grad_norm": 3.3158880152363297, "learning_rate": 9.805130833180656e-06, "loss": 0.757, "step": 108400 }, { "epoch": 1.8041270540698955, "grad_norm": 2.5241405342049217, "learning_rate": 9.804327713835445e-06, "loss": 0.7531, "step": 108500 }, { "epoch": 1.8057898478128027, "grad_norm": 3.732388419145529, "learning_rate": 9.803522975961213e-06, "loss": 0.7171, "step": 108600 }, { "epoch": 1.80745264155571, "grad_norm": 3.0401651300067485, "learning_rate": 9.802716619829069e-06, "loss": 0.7356, "step": 108700 }, { "epoch": 1.809115435298617, "grad_norm": 4.87907848838732, "learning_rate": 9.801908645710665e-06, "loss": 0.7364, "step": 108800 }, { "epoch": 1.810778229041524, "grad_norm": 3.752774094805065, "learning_rate": 9.8010990538782e-06, "loss": 0.7414, "step": 108900 }, { "epoch": 1.8124410227844314, "grad_norm": 3.9869267824228345, "learning_rate": 9.800287844604419e-06, "loss": 0.7164, "step": 109000 }, { "epoch": 1.8141038165273384, "grad_norm": 3.5334100841384135, "learning_rate": 9.799475018162607e-06, "loss": 0.7222, "step": 109100 }, { "epoch": 1.8157666102702454, "grad_norm": 3.5421986219041703, "learning_rate": 9.798660574826598e-06, "loss": 0.7247, "step": 109200 }, { "epoch": 1.8174294040131527, "grad_norm": 3.5757896737807298, "learning_rate": 9.797844514870771e-06, "loss": 0.7258, "step": 109300 }, { "epoch": 1.81909219775606, "grad_norm": 2.523174519928626, "learning_rate": 9.797026838570047e-06, "loss": 0.7412, "step": 109400 }, { "epoch": 1.820754991498967, "grad_norm": 4.333643846152769, "learning_rate": 9.796207546199894e-06, "loss": 0.7466, "step": 109500 }, { "epoch": 1.8224177852418741, "grad_norm": 5.329728948551767, "learning_rate": 9.795386638036322e-06, "loss": 0.7348, "step": 109600 }, { "epoch": 1.8240805789847814, "grad_norm": 3.63511164932097, "learning_rate": 9.794564114355888e-06, "loss": 0.7469, "step": 109700 }, { "epoch": 1.8257433727276884, "grad_norm": 2.862330535352371, "learning_rate": 9.79373997543569e-06, "loss": 0.7499, "step": 109800 }, { "epoch": 1.8274061664705956, "grad_norm": 3.2096487195873222, "learning_rate": 9.792914221553372e-06, "loss": 0.7158, "step": 109900 }, { "epoch": 1.8290689602135028, "grad_norm": 3.467107455624356, "learning_rate": 9.792086852987124e-06, "loss": 0.7435, "step": 110000 }, { "epoch": 1.8307317539564099, "grad_norm": 2.8072148443222895, "learning_rate": 9.791257870015676e-06, "loss": 0.7286, "step": 110100 }, { "epoch": 1.8323945476993169, "grad_norm": 4.373475398953231, "learning_rate": 9.790427272918306e-06, "loss": 0.7154, "step": 110200 }, { "epoch": 1.834057341442224, "grad_norm": 4.415725362536801, "learning_rate": 9.789595061974832e-06, "loss": 0.7581, "step": 110300 }, { "epoch": 1.8357201351851313, "grad_norm": 3.6520288148136957, "learning_rate": 9.788761237465617e-06, "loss": 0.735, "step": 110400 }, { "epoch": 1.8373829289280383, "grad_norm": 3.3981240086645688, "learning_rate": 9.787925799671571e-06, "loss": 0.7518, "step": 110500 }, { "epoch": 1.8390457226709456, "grad_norm": 3.306256336584379, "learning_rate": 9.78708874887414e-06, "loss": 0.7164, "step": 110600 }, { "epoch": 1.8407085164138528, "grad_norm": 3.4455319082524287, "learning_rate": 9.786250085355322e-06, "loss": 0.732, "step": 110700 }, { "epoch": 1.8423713101567598, "grad_norm": 3.1698421718598677, "learning_rate": 9.785409809397653e-06, "loss": 0.7607, "step": 110800 }, { "epoch": 1.844034103899667, "grad_norm": 4.221480447169919, "learning_rate": 9.784567921284212e-06, "loss": 0.7583, "step": 110900 }, { "epoch": 1.8456968976425743, "grad_norm": 4.8531537149966955, "learning_rate": 9.783724421298623e-06, "loss": 0.7254, "step": 111000 }, { "epoch": 1.8473596913854813, "grad_norm": 2.830764174654953, "learning_rate": 9.782879309725053e-06, "loss": 0.7106, "step": 111100 }, { "epoch": 1.8490224851283885, "grad_norm": 3.5171308920146243, "learning_rate": 9.782032586848212e-06, "loss": 0.6832, "step": 111200 }, { "epoch": 1.8506852788712957, "grad_norm": 4.515771471820688, "learning_rate": 9.78118425295335e-06, "loss": 0.7611, "step": 111300 }, { "epoch": 1.8523480726142028, "grad_norm": 3.7692509164043444, "learning_rate": 9.780334308326264e-06, "loss": 0.735, "step": 111400 }, { "epoch": 1.8540108663571098, "grad_norm": 3.33250220544373, "learning_rate": 9.779482753253294e-06, "loss": 0.7266, "step": 111500 }, { "epoch": 1.855673660100017, "grad_norm": 3.7167635876845804, "learning_rate": 9.778629588021315e-06, "loss": 0.7364, "step": 111600 }, { "epoch": 1.8573364538429242, "grad_norm": 3.1194570110501156, "learning_rate": 9.777774812917754e-06, "loss": 0.7439, "step": 111700 }, { "epoch": 1.8589992475858312, "grad_norm": 3.956860548120738, "learning_rate": 9.776918428230573e-06, "loss": 0.7068, "step": 111800 }, { "epoch": 1.8606620413287385, "grad_norm": 3.0209904704490427, "learning_rate": 9.776060434248281e-06, "loss": 0.7644, "step": 111900 }, { "epoch": 1.8623248350716457, "grad_norm": 4.97988676604682, "learning_rate": 9.775200831259927e-06, "loss": 0.7244, "step": 112000 }, { "epoch": 1.8639876288145527, "grad_norm": 2.861515004781934, "learning_rate": 9.774339619555103e-06, "loss": 0.7713, "step": 112100 }, { "epoch": 1.86565042255746, "grad_norm": 3.9719255339101807, "learning_rate": 9.773476799423942e-06, "loss": 0.7336, "step": 112200 }, { "epoch": 1.8673132163003672, "grad_norm": 3.635133874044468, "learning_rate": 9.772612371157118e-06, "loss": 0.7385, "step": 112300 }, { "epoch": 1.8689760100432742, "grad_norm": 3.4447956568724813, "learning_rate": 9.771746335045852e-06, "loss": 0.7128, "step": 112400 }, { "epoch": 1.8706388037861812, "grad_norm": 4.252521645519925, "learning_rate": 9.770878691381896e-06, "loss": 0.7276, "step": 112500 }, { "epoch": 1.8723015975290886, "grad_norm": 3.7390404972226636, "learning_rate": 9.770009440457557e-06, "loss": 0.707, "step": 112600 }, { "epoch": 1.8739643912719957, "grad_norm": 3.732526904301247, "learning_rate": 9.769138582565673e-06, "loss": 0.7131, "step": 112700 }, { "epoch": 1.8756271850149027, "grad_norm": 3.3501079550370636, "learning_rate": 9.76826611799963e-06, "loss": 0.7041, "step": 112800 }, { "epoch": 1.87728997875781, "grad_norm": 3.4859025592326223, "learning_rate": 9.76739204705335e-06, "loss": 0.7119, "step": 112900 }, { "epoch": 1.8789527725007171, "grad_norm": 4.592167263261919, "learning_rate": 9.766516370021297e-06, "loss": 0.7175, "step": 113000 }, { "epoch": 1.8806155662436241, "grad_norm": 3.121166363022421, "learning_rate": 9.765639087198483e-06, "loss": 0.7454, "step": 113100 }, { "epoch": 1.8822783599865314, "grad_norm": 4.412217122012908, "learning_rate": 9.764760198880452e-06, "loss": 0.7573, "step": 113200 }, { "epoch": 1.8839411537294386, "grad_norm": 3.3655026013492724, "learning_rate": 9.763879705363291e-06, "loss": 0.7474, "step": 113300 }, { "epoch": 1.8856039474723456, "grad_norm": 4.213980811746898, "learning_rate": 9.762997606943633e-06, "loss": 0.7431, "step": 113400 }, { "epoch": 1.8872667412152528, "grad_norm": 3.5251951536433874, "learning_rate": 9.762113903918646e-06, "loss": 0.7322, "step": 113500 }, { "epoch": 1.88892953495816, "grad_norm": 2.774426461469435, "learning_rate": 9.76122859658604e-06, "loss": 0.7419, "step": 113600 }, { "epoch": 1.890592328701067, "grad_norm": 3.0323050602722774, "learning_rate": 9.76034168524407e-06, "loss": 0.7398, "step": 113700 }, { "epoch": 1.892255122443974, "grad_norm": 2.878524576634356, "learning_rate": 9.759453170191521e-06, "loss": 0.7275, "step": 113800 }, { "epoch": 1.8939179161868815, "grad_norm": 2.7370492303028007, "learning_rate": 9.75856305172773e-06, "loss": 0.7355, "step": 113900 }, { "epoch": 1.8955807099297886, "grad_norm": 4.183897078283835, "learning_rate": 9.757671330152566e-06, "loss": 0.7575, "step": 114000 }, { "epoch": 1.8972435036726956, "grad_norm": 4.125347811873017, "learning_rate": 9.75677800576644e-06, "loss": 0.7288, "step": 114100 }, { "epoch": 1.8989062974156028, "grad_norm": 3.48608464983815, "learning_rate": 9.755883078870308e-06, "loss": 0.7735, "step": 114200 }, { "epoch": 1.90056909115851, "grad_norm": 3.3091747562105174, "learning_rate": 9.75498654976566e-06, "loss": 0.7442, "step": 114300 }, { "epoch": 1.902231884901417, "grad_norm": 3.4436714018400867, "learning_rate": 9.754088418754523e-06, "loss": 0.7103, "step": 114400 }, { "epoch": 1.9038946786443243, "grad_norm": 3.2452872277198983, "learning_rate": 9.753188686139475e-06, "loss": 0.73, "step": 114500 }, { "epoch": 1.9055574723872315, "grad_norm": 3.752737086252177, "learning_rate": 9.752287352223622e-06, "loss": 0.7505, "step": 114600 }, { "epoch": 1.9072202661301385, "grad_norm": 3.183588857266607, "learning_rate": 9.751384417310618e-06, "loss": 0.7296, "step": 114700 }, { "epoch": 1.9088830598730457, "grad_norm": 2.6423632519150058, "learning_rate": 9.750479881704649e-06, "loss": 0.7163, "step": 114800 }, { "epoch": 1.910545853615953, "grad_norm": 3.3168203673629453, "learning_rate": 9.749573745710446e-06, "loss": 0.7282, "step": 114900 }, { "epoch": 1.91220864735886, "grad_norm": 4.728464957712564, "learning_rate": 9.748666009633276e-06, "loss": 0.7233, "step": 115000 }, { "epoch": 1.913871441101767, "grad_norm": 2.965982864103248, "learning_rate": 9.747756673778946e-06, "loss": 0.7213, "step": 115100 }, { "epoch": 1.9155342348446744, "grad_norm": 2.7311900736505366, "learning_rate": 9.746845738453802e-06, "loss": 0.711, "step": 115200 }, { "epoch": 1.9171970285875815, "grad_norm": 3.33307419633374, "learning_rate": 9.74593320396473e-06, "loss": 0.7173, "step": 115300 }, { "epoch": 1.9188598223304885, "grad_norm": 4.0788930197980475, "learning_rate": 9.745019070619152e-06, "loss": 0.7257, "step": 115400 }, { "epoch": 1.9205226160733957, "grad_norm": 3.6056531078252143, "learning_rate": 9.744103338725034e-06, "loss": 0.7589, "step": 115500 }, { "epoch": 1.922185409816303, "grad_norm": 3.153639457513063, "learning_rate": 9.743186008590871e-06, "loss": 0.7636, "step": 115600 }, { "epoch": 1.92384820355921, "grad_norm": 2.7373736589761615, "learning_rate": 9.742267080525704e-06, "loss": 0.7148, "step": 115700 }, { "epoch": 1.9255109973021172, "grad_norm": 3.3072906507476283, "learning_rate": 9.741346554839114e-06, "loss": 0.7148, "step": 115800 }, { "epoch": 1.9271737910450244, "grad_norm": 3.847913345961657, "learning_rate": 9.740424431841211e-06, "loss": 0.751, "step": 115900 }, { "epoch": 1.9288365847879314, "grad_norm": 3.46375305325164, "learning_rate": 9.739500711842654e-06, "loss": 0.7232, "step": 116000 }, { "epoch": 1.9304993785308386, "grad_norm": 3.85568599502399, "learning_rate": 9.738575395154631e-06, "loss": 0.7104, "step": 116100 }, { "epoch": 1.9321621722737459, "grad_norm": 2.8013527918210346, "learning_rate": 9.737648482088874e-06, "loss": 0.6994, "step": 116200 }, { "epoch": 1.9338249660166529, "grad_norm": 3.491132791227139, "learning_rate": 9.73671997295765e-06, "loss": 0.7301, "step": 116300 }, { "epoch": 1.93548775975956, "grad_norm": 4.336607797849408, "learning_rate": 9.735789868073763e-06, "loss": 0.706, "step": 116400 }, { "epoch": 1.9371505535024671, "grad_norm": 3.0563350639634077, "learning_rate": 9.734858167750561e-06, "loss": 0.6876, "step": 116500 }, { "epoch": 1.9388133472453744, "grad_norm": 2.8702767239395848, "learning_rate": 9.733924872301918e-06, "loss": 0.7124, "step": 116600 }, { "epoch": 1.9404761409882814, "grad_norm": 3.2310228572305477, "learning_rate": 9.732989982042254e-06, "loss": 0.7201, "step": 116700 }, { "epoch": 1.9421389347311886, "grad_norm": 3.0604194948709327, "learning_rate": 9.732053497286524e-06, "loss": 0.7017, "step": 116800 }, { "epoch": 1.9438017284740958, "grad_norm": 3.5230745945049295, "learning_rate": 9.731115418350222e-06, "loss": 0.7065, "step": 116900 }, { "epoch": 1.9454645222170028, "grad_norm": 3.4205711374919985, "learning_rate": 9.730175745549375e-06, "loss": 0.7156, "step": 117000 }, { "epoch": 1.94712731595991, "grad_norm": 3.6014850884114615, "learning_rate": 9.72923447920055e-06, "loss": 0.729, "step": 117100 }, { "epoch": 1.9487901097028173, "grad_norm": 2.712635313004732, "learning_rate": 9.728291619620847e-06, "loss": 0.7238, "step": 117200 }, { "epoch": 1.9504529034457243, "grad_norm": 3.0660762206799967, "learning_rate": 9.727347167127911e-06, "loss": 0.7146, "step": 117300 }, { "epoch": 1.9521156971886313, "grad_norm": 3.663423637313754, "learning_rate": 9.726401122039917e-06, "loss": 0.7273, "step": 117400 }, { "epoch": 1.9537784909315388, "grad_norm": 2.294080773017413, "learning_rate": 9.725453484675576e-06, "loss": 0.7083, "step": 117500 }, { "epoch": 1.9554412846744458, "grad_norm": 4.675093183567287, "learning_rate": 9.724504255354137e-06, "loss": 0.7466, "step": 117600 }, { "epoch": 1.9571040784173528, "grad_norm": 3.508210250249978, "learning_rate": 9.723553434395388e-06, "loss": 0.7416, "step": 117700 }, { "epoch": 1.95876687216026, "grad_norm": 3.3745749536828455, "learning_rate": 9.72260102211965e-06, "loss": 0.7027, "step": 117800 }, { "epoch": 1.9604296659031673, "grad_norm": 4.8889409690767085, "learning_rate": 9.721647018847781e-06, "loss": 0.7462, "step": 117900 }, { "epoch": 1.9620924596460743, "grad_norm": 3.792509910561541, "learning_rate": 9.720691424901175e-06, "loss": 0.7067, "step": 118000 }, { "epoch": 1.9637552533889815, "grad_norm": 2.805179230484485, "learning_rate": 9.719734240601761e-06, "loss": 0.7347, "step": 118100 }, { "epoch": 1.9654180471318887, "grad_norm": 2.989173480093255, "learning_rate": 9.718775466272006e-06, "loss": 0.7181, "step": 118200 }, { "epoch": 1.9670808408747957, "grad_norm": 3.4134048580348906, "learning_rate": 9.71781510223491e-06, "loss": 0.7413, "step": 118300 }, { "epoch": 1.968743634617703, "grad_norm": 3.080077255508235, "learning_rate": 9.71685314881401e-06, "loss": 0.7126, "step": 118400 }, { "epoch": 1.9704064283606102, "grad_norm": 2.8277625731894647, "learning_rate": 9.71588960633338e-06, "loss": 0.7271, "step": 118500 }, { "epoch": 1.9720692221035172, "grad_norm": 3.560111639727041, "learning_rate": 9.714924475117626e-06, "loss": 0.721, "step": 118600 }, { "epoch": 1.9737320158464242, "grad_norm": 4.172871772581491, "learning_rate": 9.713957755491892e-06, "loss": 0.6867, "step": 118700 }, { "epoch": 1.9753948095893317, "grad_norm": 3.1375124519301743, "learning_rate": 9.712989447781853e-06, "loss": 0.7476, "step": 118800 }, { "epoch": 1.9770576033322387, "grad_norm": 4.307707230059423, "learning_rate": 9.712019552313727e-06, "loss": 0.6802, "step": 118900 }, { "epoch": 1.9787203970751457, "grad_norm": 3.092986165583386, "learning_rate": 9.711048069414257e-06, "loss": 0.7304, "step": 119000 }, { "epoch": 1.980383190818053, "grad_norm": 3.8418752747742766, "learning_rate": 9.71007499941073e-06, "loss": 0.7342, "step": 119100 }, { "epoch": 1.9820459845609602, "grad_norm": 3.0318853794140868, "learning_rate": 9.70910034263096e-06, "loss": 0.7257, "step": 119200 }, { "epoch": 1.9837087783038672, "grad_norm": 3.3909301463855184, "learning_rate": 9.7081240994033e-06, "loss": 0.7176, "step": 119300 }, { "epoch": 1.9853715720467744, "grad_norm": 3.7409930132180604, "learning_rate": 9.707146270056636e-06, "loss": 0.7236, "step": 119400 }, { "epoch": 1.9870343657896816, "grad_norm": 3.134129093061777, "learning_rate": 9.706166854920388e-06, "loss": 0.7022, "step": 119500 }, { "epoch": 1.9886971595325886, "grad_norm": 3.2903385067775606, "learning_rate": 9.705185854324514e-06, "loss": 0.7211, "step": 119600 }, { "epoch": 1.9903599532754959, "grad_norm": 3.3175172423625328, "learning_rate": 9.704203268599503e-06, "loss": 0.7332, "step": 119700 }, { "epoch": 1.992022747018403, "grad_norm": 3.453558439507808, "learning_rate": 9.703219098076375e-06, "loss": 0.7105, "step": 119800 }, { "epoch": 1.99368554076131, "grad_norm": 4.142010334602534, "learning_rate": 9.702233343086687e-06, "loss": 0.7246, "step": 119900 }, { "epoch": 1.9953483345042171, "grad_norm": 3.609422032724139, "learning_rate": 9.701246003962534e-06, "loss": 0.6993, "step": 120000 }, { "epoch": 1.9970111282471246, "grad_norm": 3.1047263078082397, "learning_rate": 9.700257081036535e-06, "loss": 0.7036, "step": 120100 }, { "epoch": 1.9986739219900316, "grad_norm": 2.903586950830703, "learning_rate": 9.699266574641852e-06, "loss": 0.6901, "step": 120200 }, { "epoch": 2.0003325587485814, "grad_norm": 3.6170308112492355, "learning_rate": 9.698274485112176e-06, "loss": 0.6648, "step": 120300 }, { "epoch": 2.0019953524914884, "grad_norm": 4.206322252399673, "learning_rate": 9.697280812781732e-06, "loss": 0.5625, "step": 120400 }, { "epoch": 2.003658146234396, "grad_norm": 3.3586155185564284, "learning_rate": 9.696285557985274e-06, "loss": 0.5758, "step": 120500 }, { "epoch": 2.005320939977303, "grad_norm": 2.440141996257732, "learning_rate": 9.695288721058098e-06, "loss": 0.562, "step": 120600 }, { "epoch": 2.00698373372021, "grad_norm": 4.748129010612645, "learning_rate": 9.694290302336028e-06, "loss": 0.5369, "step": 120700 }, { "epoch": 2.0086465274631173, "grad_norm": 4.764662377246201, "learning_rate": 9.693290302155418e-06, "loss": 0.5517, "step": 120800 }, { "epoch": 2.0103093212060243, "grad_norm": 2.7167907209263955, "learning_rate": 9.692288720853159e-06, "loss": 0.5733, "step": 120900 }, { "epoch": 2.0119721149489314, "grad_norm": 4.2891702168801, "learning_rate": 9.691285558766675e-06, "loss": 0.5515, "step": 121000 }, { "epoch": 2.013634908691839, "grad_norm": 4.174888271677108, "learning_rate": 9.690280816233921e-06, "loss": 0.5879, "step": 121100 }, { "epoch": 2.015297702434746, "grad_norm": 3.86460350759774, "learning_rate": 9.689274493593382e-06, "loss": 0.5618, "step": 121200 }, { "epoch": 2.016960496177653, "grad_norm": 4.189924220257442, "learning_rate": 9.688266591184081e-06, "loss": 0.5301, "step": 121300 }, { "epoch": 2.01862328992056, "grad_norm": 4.339044721585196, "learning_rate": 9.687257109345567e-06, "loss": 0.5438, "step": 121400 }, { "epoch": 2.0202860836634673, "grad_norm": 3.1296419906159683, "learning_rate": 9.686246048417928e-06, "loss": 0.5789, "step": 121500 }, { "epoch": 2.0219488774063743, "grad_norm": 3.64519110675239, "learning_rate": 9.685233408741776e-06, "loss": 0.5826, "step": 121600 }, { "epoch": 2.0236116711492813, "grad_norm": 4.472198138555999, "learning_rate": 9.684219190658261e-06, "loss": 0.5617, "step": 121700 }, { "epoch": 2.0252744648921888, "grad_norm": 3.8319020753633226, "learning_rate": 9.683203394509064e-06, "loss": 0.5735, "step": 121800 }, { "epoch": 2.0269372586350958, "grad_norm": 3.767381079940762, "learning_rate": 9.682186020636393e-06, "loss": 0.5883, "step": 121900 }, { "epoch": 2.028600052378003, "grad_norm": 3.020371769102918, "learning_rate": 9.681167069382992e-06, "loss": 0.5975, "step": 122000 }, { "epoch": 2.0302628461209102, "grad_norm": 3.099124662344685, "learning_rate": 9.68014654109214e-06, "loss": 0.5602, "step": 122100 }, { "epoch": 2.0319256398638172, "grad_norm": 2.4725984783872614, "learning_rate": 9.679124436107635e-06, "loss": 0.5709, "step": 122200 }, { "epoch": 2.0335884336067243, "grad_norm": 3.699004022222908, "learning_rate": 9.678100754773819e-06, "loss": 0.5596, "step": 122300 }, { "epoch": 2.0352512273496313, "grad_norm": 2.7157164235393654, "learning_rate": 9.677075497435557e-06, "loss": 0.5639, "step": 122400 }, { "epoch": 2.0369140210925387, "grad_norm": 3.159386094154512, "learning_rate": 9.67604866443825e-06, "loss": 0.5284, "step": 122500 }, { "epoch": 2.0385768148354457, "grad_norm": 3.6879357080715356, "learning_rate": 9.675020256127828e-06, "loss": 0.5666, "step": 122600 }, { "epoch": 2.0402396085783527, "grad_norm": 3.300700573870989, "learning_rate": 9.673990272850746e-06, "loss": 0.5621, "step": 122700 }, { "epoch": 2.04190240232126, "grad_norm": 3.965652150692699, "learning_rate": 9.672958714954e-06, "loss": 0.5582, "step": 122800 }, { "epoch": 2.043565196064167, "grad_norm": 3.7051741539915475, "learning_rate": 9.671925582785112e-06, "loss": 0.5471, "step": 122900 }, { "epoch": 2.045227989807074, "grad_norm": 2.3293166843623436, "learning_rate": 9.67089087669213e-06, "loss": 0.5493, "step": 123000 }, { "epoch": 2.0468907835499817, "grad_norm": 3.0164209953906203, "learning_rate": 9.669854597023637e-06, "loss": 0.5893, "step": 123100 }, { "epoch": 2.0485535772928887, "grad_norm": 3.787760506160578, "learning_rate": 9.668816744128748e-06, "loss": 0.5607, "step": 123200 }, { "epoch": 2.0502163710357957, "grad_norm": 3.592802015857854, "learning_rate": 9.6677773183571e-06, "loss": 0.5713, "step": 123300 }, { "epoch": 2.051879164778703, "grad_norm": 3.942878586798843, "learning_rate": 9.666736320058869e-06, "loss": 0.5577, "step": 123400 }, { "epoch": 2.05354195852161, "grad_norm": 3.0296366325476103, "learning_rate": 9.665693749584757e-06, "loss": 0.5563, "step": 123500 }, { "epoch": 2.055204752264517, "grad_norm": 2.9080487523906555, "learning_rate": 9.664649607285992e-06, "loss": 0.5933, "step": 123600 }, { "epoch": 2.056867546007424, "grad_norm": 4.487290000387383, "learning_rate": 9.663603893514336e-06, "loss": 0.5708, "step": 123700 }, { "epoch": 2.0585303397503316, "grad_norm": 3.6846684492633908, "learning_rate": 9.66255660862208e-06, "loss": 0.565, "step": 123800 }, { "epoch": 2.0601931334932386, "grad_norm": 3.5531161615308364, "learning_rate": 9.661507752962046e-06, "loss": 0.5712, "step": 123900 }, { "epoch": 2.0618559272361456, "grad_norm": 4.159202107759318, "learning_rate": 9.660457326887578e-06, "loss": 0.5698, "step": 124000 }, { "epoch": 2.063518720979053, "grad_norm": 3.493047573072458, "learning_rate": 9.659405330752556e-06, "loss": 0.5602, "step": 124100 }, { "epoch": 2.06518151472196, "grad_norm": 3.302616991766687, "learning_rate": 9.658351764911387e-06, "loss": 0.5762, "step": 124200 }, { "epoch": 2.066844308464867, "grad_norm": 2.868998979249776, "learning_rate": 9.657296629719007e-06, "loss": 0.5711, "step": 124300 }, { "epoch": 2.0685071022077746, "grad_norm": 2.982746898992774, "learning_rate": 9.656239925530881e-06, "loss": 0.5735, "step": 124400 }, { "epoch": 2.0701698959506816, "grad_norm": 3.918747513887849, "learning_rate": 9.655181652702998e-06, "loss": 0.5633, "step": 124500 }, { "epoch": 2.0718326896935886, "grad_norm": 3.488071399356048, "learning_rate": 9.654121811591885e-06, "loss": 0.5588, "step": 124600 }, { "epoch": 2.073495483436496, "grad_norm": 3.5907324172313824, "learning_rate": 9.653060402554586e-06, "loss": 0.5551, "step": 124700 }, { "epoch": 2.075158277179403, "grad_norm": 4.0001288188024695, "learning_rate": 9.651997425948683e-06, "loss": 0.5657, "step": 124800 }, { "epoch": 2.07682107092231, "grad_norm": 3.126355206187341, "learning_rate": 9.65093288213228e-06, "loss": 0.5584, "step": 124900 }, { "epoch": 2.078483864665217, "grad_norm": 3.5480616030028562, "learning_rate": 9.649866771464011e-06, "loss": 0.5731, "step": 125000 }, { "epoch": 2.0801466584081245, "grad_norm": 2.957859171124513, "learning_rate": 9.648799094303041e-06, "loss": 0.5845, "step": 125100 }, { "epoch": 2.0818094521510315, "grad_norm": 3.373521124182474, "learning_rate": 9.647729851009054e-06, "loss": 0.5554, "step": 125200 }, { "epoch": 2.0834722458939385, "grad_norm": 3.3288085312687126, "learning_rate": 9.646659041942271e-06, "loss": 0.5549, "step": 125300 }, { "epoch": 2.085135039636846, "grad_norm": 4.087454734737762, "learning_rate": 9.645586667463436e-06, "loss": 0.54, "step": 125400 }, { "epoch": 2.086797833379753, "grad_norm": 3.2168031502465486, "learning_rate": 9.64451272793382e-06, "loss": 0.5838, "step": 125500 }, { "epoch": 2.08846062712266, "grad_norm": 3.7084229346992226, "learning_rate": 9.643437223715223e-06, "loss": 0.5717, "step": 125600 }, { "epoch": 2.0901234208655675, "grad_norm": 3.943153681446435, "learning_rate": 9.642360155169971e-06, "loss": 0.5802, "step": 125700 }, { "epoch": 2.0917862146084745, "grad_norm": 3.2576833444850366, "learning_rate": 9.64128152266092e-06, "loss": 0.5682, "step": 125800 }, { "epoch": 2.0934490083513815, "grad_norm": 3.397890350924592, "learning_rate": 9.640201326551444e-06, "loss": 0.5657, "step": 125900 }, { "epoch": 2.095111802094289, "grad_norm": 3.840521291251285, "learning_rate": 9.639119567205456e-06, "loss": 0.5668, "step": 126000 }, { "epoch": 2.096774595837196, "grad_norm": 3.8726602037597986, "learning_rate": 9.63803624498739e-06, "loss": 0.5658, "step": 126100 }, { "epoch": 2.098437389580103, "grad_norm": 2.783387217870828, "learning_rate": 9.636951360262202e-06, "loss": 0.5731, "step": 126200 }, { "epoch": 2.10010018332301, "grad_norm": 3.326739818655406, "learning_rate": 9.63586491339538e-06, "loss": 0.5706, "step": 126300 }, { "epoch": 2.1017629770659174, "grad_norm": 3.3883041264605254, "learning_rate": 9.634776904752941e-06, "loss": 0.5778, "step": 126400 }, { "epoch": 2.1034257708088244, "grad_norm": 3.0087555436116626, "learning_rate": 9.633687334701417e-06, "loss": 0.5585, "step": 126500 }, { "epoch": 2.1050885645517314, "grad_norm": 4.048723791895577, "learning_rate": 9.632596203607878e-06, "loss": 0.5811, "step": 126600 }, { "epoch": 2.106751358294639, "grad_norm": 3.4886427801958253, "learning_rate": 9.631503511839912e-06, "loss": 0.5738, "step": 126700 }, { "epoch": 2.108414152037546, "grad_norm": 4.676903876157338, "learning_rate": 9.630409259765638e-06, "loss": 0.5326, "step": 126800 }, { "epoch": 2.110076945780453, "grad_norm": 3.516968570262695, "learning_rate": 9.629313447753698e-06, "loss": 0.5948, "step": 126900 }, { "epoch": 2.1117397395233604, "grad_norm": 3.759760437850574, "learning_rate": 9.62821607617326e-06, "loss": 0.5802, "step": 127000 }, { "epoch": 2.1134025332662674, "grad_norm": 4.071142778023786, "learning_rate": 9.627117145394016e-06, "loss": 0.5561, "step": 127100 }, { "epoch": 2.1150653270091744, "grad_norm": 3.391945709579904, "learning_rate": 9.626016655786185e-06, "loss": 0.5923, "step": 127200 }, { "epoch": 2.1167281207520814, "grad_norm": 3.7983416721212206, "learning_rate": 9.624914607720512e-06, "loss": 0.5811, "step": 127300 }, { "epoch": 2.118390914494989, "grad_norm": 3.384604266827708, "learning_rate": 9.623811001568264e-06, "loss": 0.5656, "step": 127400 }, { "epoch": 2.120053708237896, "grad_norm": 3.735035600955194, "learning_rate": 9.622705837701237e-06, "loss": 0.5846, "step": 127500 }, { "epoch": 2.121716501980803, "grad_norm": 2.854583950649377, "learning_rate": 9.621599116491747e-06, "loss": 0.5909, "step": 127600 }, { "epoch": 2.1233792957237103, "grad_norm": 2.6532819639316574, "learning_rate": 9.620490838312638e-06, "loss": 0.6017, "step": 127700 }, { "epoch": 2.1250420894666173, "grad_norm": 3.0038386918154045, "learning_rate": 9.619381003537275e-06, "loss": 0.6036, "step": 127800 }, { "epoch": 2.1267048832095243, "grad_norm": 3.5493473524439993, "learning_rate": 9.618269612539558e-06, "loss": 0.54, "step": 127900 }, { "epoch": 2.128367676952432, "grad_norm": 5.663043019784249, "learning_rate": 9.617156665693894e-06, "loss": 0.5529, "step": 128000 }, { "epoch": 2.130030470695339, "grad_norm": 3.6077797540414225, "learning_rate": 9.616042163375228e-06, "loss": 0.5729, "step": 128100 }, { "epoch": 2.131693264438246, "grad_norm": 3.8208774922517255, "learning_rate": 9.614926105959024e-06, "loss": 0.5837, "step": 128200 }, { "epoch": 2.1333560581811533, "grad_norm": 3.2877771251611887, "learning_rate": 9.61380849382127e-06, "loss": 0.5563, "step": 128300 }, { "epoch": 2.1350188519240603, "grad_norm": 3.9580219108758263, "learning_rate": 9.61268932733848e-06, "loss": 0.5461, "step": 128400 }, { "epoch": 2.1366816456669673, "grad_norm": 3.189135396154438, "learning_rate": 9.611568606887687e-06, "loss": 0.5735, "step": 128500 }, { "epoch": 2.1383444394098747, "grad_norm": 3.5950450951001027, "learning_rate": 9.610446332846451e-06, "loss": 0.5777, "step": 128600 }, { "epoch": 2.1400072331527817, "grad_norm": 2.82385319269266, "learning_rate": 9.609322505592856e-06, "loss": 0.5607, "step": 128700 }, { "epoch": 2.1416700268956887, "grad_norm": 3.9761463284231247, "learning_rate": 9.608197125505508e-06, "loss": 0.5695, "step": 128800 }, { "epoch": 2.1433328206385958, "grad_norm": 3.2118669286223995, "learning_rate": 9.607070192963532e-06, "loss": 0.5947, "step": 128900 }, { "epoch": 2.144995614381503, "grad_norm": 3.8886317276372724, "learning_rate": 9.605941708346585e-06, "loss": 0.5537, "step": 129000 }, { "epoch": 2.1466584081244102, "grad_norm": 3.121201021577136, "learning_rate": 9.604811672034841e-06, "loss": 0.5769, "step": 129100 }, { "epoch": 2.1483212018673172, "grad_norm": 2.637148459779667, "learning_rate": 9.603680084408996e-06, "loss": 0.5717, "step": 129200 }, { "epoch": 2.1499839956102247, "grad_norm": 3.540984888631948, "learning_rate": 9.602546945850271e-06, "loss": 0.5539, "step": 129300 }, { "epoch": 2.1516467893531317, "grad_norm": 2.906772014253539, "learning_rate": 9.60141225674041e-06, "loss": 0.5634, "step": 129400 }, { "epoch": 2.1533095830960387, "grad_norm": 2.999886573780494, "learning_rate": 9.600276017461675e-06, "loss": 0.57, "step": 129500 }, { "epoch": 2.154972376838946, "grad_norm": 2.8024283999900046, "learning_rate": 9.599138228396859e-06, "loss": 0.5715, "step": 129600 }, { "epoch": 2.156635170581853, "grad_norm": 4.360859819030659, "learning_rate": 9.597998889929265e-06, "loss": 0.5586, "step": 129700 }, { "epoch": 2.15829796432476, "grad_norm": 3.3503695602504555, "learning_rate": 9.596858002442731e-06, "loss": 0.5643, "step": 129800 }, { "epoch": 2.159960758067667, "grad_norm": 3.128796170865448, "learning_rate": 9.595715566321603e-06, "loss": 0.6067, "step": 129900 }, { "epoch": 2.1616235518105746, "grad_norm": 2.7437989207746822, "learning_rate": 9.594571581950765e-06, "loss": 0.5664, "step": 130000 }, { "epoch": 2.1632863455534816, "grad_norm": 4.483005088544571, "learning_rate": 9.593426049715606e-06, "loss": 0.5901, "step": 130100 }, { "epoch": 2.1649491392963887, "grad_norm": 3.4237098924656904, "learning_rate": 9.592278970002047e-06, "loss": 0.5897, "step": 130200 }, { "epoch": 2.166611933039296, "grad_norm": 3.4868822463041655, "learning_rate": 9.591130343196528e-06, "loss": 0.5865, "step": 130300 }, { "epoch": 2.168274726782203, "grad_norm": 3.2011431149892733, "learning_rate": 9.589980169686009e-06, "loss": 0.5766, "step": 130400 }, { "epoch": 2.16993752052511, "grad_norm": 3.885730257178098, "learning_rate": 9.58882844985797e-06, "loss": 0.5719, "step": 130500 }, { "epoch": 2.1716003142680176, "grad_norm": 4.854495365347884, "learning_rate": 9.587675184100419e-06, "loss": 0.5987, "step": 130600 }, { "epoch": 2.1732631080109246, "grad_norm": 4.7523467558823995, "learning_rate": 9.586520372801874e-06, "loss": 0.5752, "step": 130700 }, { "epoch": 2.1749259017538316, "grad_norm": 3.2064458756542518, "learning_rate": 9.58536401635138e-06, "loss": 0.5467, "step": 130800 }, { "epoch": 2.176588695496739, "grad_norm": 4.359060502764875, "learning_rate": 9.584206115138503e-06, "loss": 0.5694, "step": 130900 }, { "epoch": 2.178251489239646, "grad_norm": 4.255219207963976, "learning_rate": 9.583046669553328e-06, "loss": 0.5871, "step": 131000 }, { "epoch": 2.179914282982553, "grad_norm": 3.743071465686546, "learning_rate": 9.581885679986462e-06, "loss": 0.6104, "step": 131100 }, { "epoch": 2.18157707672546, "grad_norm": 2.9756438742870848, "learning_rate": 9.580723146829027e-06, "loss": 0.6057, "step": 131200 }, { "epoch": 2.1832398704683675, "grad_norm": 3.942846626099289, "learning_rate": 9.579559070472672e-06, "loss": 0.5869, "step": 131300 }, { "epoch": 2.1849026642112745, "grad_norm": 5.2165211037425845, "learning_rate": 9.57839345130956e-06, "loss": 0.5873, "step": 131400 }, { "epoch": 2.1865654579541816, "grad_norm": 4.572350477736375, "learning_rate": 9.577226289732378e-06, "loss": 0.5637, "step": 131500 }, { "epoch": 2.188228251697089, "grad_norm": 2.587098453382327, "learning_rate": 9.576057586134328e-06, "loss": 0.5971, "step": 131600 }, { "epoch": 2.189891045439996, "grad_norm": 2.8698840063495186, "learning_rate": 9.574887340909138e-06, "loss": 0.5863, "step": 131700 }, { "epoch": 2.191553839182903, "grad_norm": 3.635005020143186, "learning_rate": 9.573715554451049e-06, "loss": 0.5758, "step": 131800 }, { "epoch": 2.1932166329258105, "grad_norm": 4.9629575645739274, "learning_rate": 9.572542227154826e-06, "loss": 0.58, "step": 131900 }, { "epoch": 2.1948794266687175, "grad_norm": 3.7318619326433535, "learning_rate": 9.571367359415749e-06, "loss": 0.5468, "step": 132000 }, { "epoch": 2.1965422204116245, "grad_norm": 3.9152059390832035, "learning_rate": 9.57019095162962e-06, "loss": 0.5609, "step": 132100 }, { "epoch": 2.1982050141545315, "grad_norm": 4.771758113541121, "learning_rate": 9.569013004192757e-06, "loss": 0.6025, "step": 132200 }, { "epoch": 2.199867807897439, "grad_norm": 3.6217379020759974, "learning_rate": 9.567833517502003e-06, "loss": 0.583, "step": 132300 }, { "epoch": 2.201530601640346, "grad_norm": 3.0886011562705113, "learning_rate": 9.56665249195471e-06, "loss": 0.5964, "step": 132400 }, { "epoch": 2.203193395383253, "grad_norm": 4.649381449838598, "learning_rate": 9.565469927948757e-06, "loss": 0.5945, "step": 132500 }, { "epoch": 2.2048561891261604, "grad_norm": 2.990231589474096, "learning_rate": 9.564285825882536e-06, "loss": 0.5754, "step": 132600 }, { "epoch": 2.2065189828690674, "grad_norm": 3.7752388989019803, "learning_rate": 9.563100186154957e-06, "loss": 0.5913, "step": 132700 }, { "epoch": 2.2081817766119745, "grad_norm": 4.540595103765588, "learning_rate": 9.561913009165455e-06, "loss": 0.5912, "step": 132800 }, { "epoch": 2.209844570354882, "grad_norm": 4.534693760039142, "learning_rate": 9.560724295313973e-06, "loss": 0.591, "step": 132900 }, { "epoch": 2.211507364097789, "grad_norm": 3.0119738738349704, "learning_rate": 9.55953404500098e-06, "loss": 0.5977, "step": 133000 }, { "epoch": 2.213170157840696, "grad_norm": 2.2468436017209283, "learning_rate": 9.558342258627458e-06, "loss": 0.5631, "step": 133100 }, { "epoch": 2.2148329515836034, "grad_norm": 3.6706874584936076, "learning_rate": 9.557148936594906e-06, "loss": 0.5711, "step": 133200 }, { "epoch": 2.2164957453265104, "grad_norm": 5.781734118873615, "learning_rate": 9.555954079305344e-06, "loss": 0.5745, "step": 133300 }, { "epoch": 2.2181585390694174, "grad_norm": 3.056712431283082, "learning_rate": 9.554757687161307e-06, "loss": 0.5805, "step": 133400 }, { "epoch": 2.219821332812325, "grad_norm": 3.295321989457798, "learning_rate": 9.553559760565848e-06, "loss": 0.5667, "step": 133500 }, { "epoch": 2.221484126555232, "grad_norm": 3.4124843678068415, "learning_rate": 9.552360299922533e-06, "loss": 0.6121, "step": 133600 }, { "epoch": 2.223146920298139, "grad_norm": 3.344383981866012, "learning_rate": 9.551159305635451e-06, "loss": 0.6052, "step": 133700 }, { "epoch": 2.224809714041046, "grad_norm": 3.6424533628637783, "learning_rate": 9.549956778109205e-06, "loss": 0.606, "step": 133800 }, { "epoch": 2.2264725077839533, "grad_norm": 3.248902180926933, "learning_rate": 9.548752717748912e-06, "loss": 0.5566, "step": 133900 }, { "epoch": 2.2281353015268603, "grad_norm": 4.255017459593084, "learning_rate": 9.547547124960211e-06, "loss": 0.5559, "step": 134000 }, { "epoch": 2.2297980952697674, "grad_norm": 3.880793191231222, "learning_rate": 9.54634000014925e-06, "loss": 0.5854, "step": 134100 }, { "epoch": 2.231460889012675, "grad_norm": 2.9283156691495242, "learning_rate": 9.5451313437227e-06, "loss": 0.57, "step": 134200 }, { "epoch": 2.233123682755582, "grad_norm": 2.7717293394822025, "learning_rate": 9.543921156087746e-06, "loss": 0.6048, "step": 134300 }, { "epoch": 2.234786476498489, "grad_norm": 4.457158023374438, "learning_rate": 9.542709437652082e-06, "loss": 0.5798, "step": 134400 }, { "epoch": 2.2364492702413963, "grad_norm": 3.043047116809801, "learning_rate": 9.54149618882393e-06, "loss": 0.5765, "step": 134500 }, { "epoch": 2.2381120639843033, "grad_norm": 3.453302582183686, "learning_rate": 9.540281410012018e-06, "loss": 0.5951, "step": 134600 }, { "epoch": 2.2397748577272103, "grad_norm": 2.894152947697887, "learning_rate": 9.539065101625592e-06, "loss": 0.5705, "step": 134700 }, { "epoch": 2.2414376514701173, "grad_norm": 3.352266552168988, "learning_rate": 9.537847264074416e-06, "loss": 0.5937, "step": 134800 }, { "epoch": 2.2431004452130248, "grad_norm": 4.33836536336297, "learning_rate": 9.536627897768767e-06, "loss": 0.5545, "step": 134900 }, { "epoch": 2.2447632389559318, "grad_norm": 3.6864435254723897, "learning_rate": 9.535407003119435e-06, "loss": 0.5764, "step": 135000 }, { "epoch": 2.246426032698839, "grad_norm": 2.709611834723059, "learning_rate": 9.534184580537728e-06, "loss": 0.5923, "step": 135100 }, { "epoch": 2.2480888264417462, "grad_norm": 4.122348289599256, "learning_rate": 9.532960630435468e-06, "loss": 0.5623, "step": 135200 }, { "epoch": 2.2497516201846532, "grad_norm": 4.196185284657727, "learning_rate": 9.531735153224992e-06, "loss": 0.5655, "step": 135300 }, { "epoch": 2.2514144139275603, "grad_norm": 4.440611566755102, "learning_rate": 9.530508149319149e-06, "loss": 0.5843, "step": 135400 }, { "epoch": 2.2530772076704677, "grad_norm": 2.9380273784834565, "learning_rate": 9.529279619131306e-06, "loss": 0.5705, "step": 135500 }, { "epoch": 2.2547400014133747, "grad_norm": 4.466915972990929, "learning_rate": 9.528049563075341e-06, "loss": 0.5906, "step": 135600 }, { "epoch": 2.2564027951562817, "grad_norm": 2.871057618515276, "learning_rate": 9.526817981565648e-06, "loss": 0.5795, "step": 135700 }, { "epoch": 2.258065588899189, "grad_norm": 3.153837401373427, "learning_rate": 9.525584875017132e-06, "loss": 0.5691, "step": 135800 }, { "epoch": 2.259728382642096, "grad_norm": 3.6040483462964152, "learning_rate": 9.524350243845218e-06, "loss": 0.5683, "step": 135900 }, { "epoch": 2.261391176385003, "grad_norm": 4.412084650741116, "learning_rate": 9.523114088465839e-06, "loss": 0.5635, "step": 136000 }, { "epoch": 2.2630539701279107, "grad_norm": 3.634923540409369, "learning_rate": 9.521876409295441e-06, "loss": 0.5519, "step": 136100 }, { "epoch": 2.2647167638708177, "grad_norm": 3.264117323921844, "learning_rate": 9.52063720675099e-06, "loss": 0.5705, "step": 136200 }, { "epoch": 2.2663795576137247, "grad_norm": 2.626910412225938, "learning_rate": 9.519396481249954e-06, "loss": 0.6132, "step": 136300 }, { "epoch": 2.2680423513566317, "grad_norm": 4.710409102867711, "learning_rate": 9.518154233210328e-06, "loss": 0.6069, "step": 136400 }, { "epoch": 2.269705145099539, "grad_norm": 3.374722928282345, "learning_rate": 9.516910463050608e-06, "loss": 0.548, "step": 136500 }, { "epoch": 2.271367938842446, "grad_norm": 4.776062756301114, "learning_rate": 9.51566517118981e-06, "loss": 0.5852, "step": 136600 }, { "epoch": 2.273030732585353, "grad_norm": 4.025652333091454, "learning_rate": 9.514418358047458e-06, "loss": 0.5495, "step": 136700 }, { "epoch": 2.2746935263282606, "grad_norm": 2.553455616902114, "learning_rate": 9.513170024043591e-06, "loss": 0.5793, "step": 136800 }, { "epoch": 2.2763563200711676, "grad_norm": 3.3513844493469205, "learning_rate": 9.511920169598764e-06, "loss": 0.6026, "step": 136900 }, { "epoch": 2.2780191138140746, "grad_norm": 4.548355176897355, "learning_rate": 9.510668795134033e-06, "loss": 0.5725, "step": 137000 }, { "epoch": 2.2796819075569816, "grad_norm": 3.611294098642709, "learning_rate": 9.50941590107098e-06, "loss": 0.5883, "step": 137100 }, { "epoch": 2.281344701299889, "grad_norm": 2.71598993541464, "learning_rate": 9.508161487831689e-06, "loss": 0.5832, "step": 137200 }, { "epoch": 2.283007495042796, "grad_norm": 2.7922755919739006, "learning_rate": 9.506905555838759e-06, "loss": 0.5999, "step": 137300 }, { "epoch": 2.284670288785703, "grad_norm": 3.009807633349081, "learning_rate": 9.5056481055153e-06, "loss": 0.5872, "step": 137400 }, { "epoch": 2.2863330825286106, "grad_norm": 3.5085581842597624, "learning_rate": 9.504389137284937e-06, "loss": 0.5603, "step": 137500 }, { "epoch": 2.2879958762715176, "grad_norm": 3.6164563360530684, "learning_rate": 9.503128651571801e-06, "loss": 0.5679, "step": 137600 }, { "epoch": 2.2896586700144246, "grad_norm": 3.342764462977914, "learning_rate": 9.501866648800537e-06, "loss": 0.561, "step": 137700 }, { "epoch": 2.291321463757332, "grad_norm": 3.3443087882978424, "learning_rate": 9.500603129396304e-06, "loss": 0.5715, "step": 137800 }, { "epoch": 2.292984257500239, "grad_norm": 3.375676452036152, "learning_rate": 9.499338093784763e-06, "loss": 0.601, "step": 137900 }, { "epoch": 2.294647051243146, "grad_norm": 3.213891798287831, "learning_rate": 9.498071542392098e-06, "loss": 0.5937, "step": 138000 }, { "epoch": 2.2963098449860535, "grad_norm": 3.4927752255513616, "learning_rate": 9.496803475644992e-06, "loss": 0.5733, "step": 138100 }, { "epoch": 2.2979726387289605, "grad_norm": 3.4208603129577893, "learning_rate": 9.495533893970647e-06, "loss": 0.5775, "step": 138200 }, { "epoch": 2.2996354324718675, "grad_norm": 3.376905446120309, "learning_rate": 9.494262797796771e-06, "loss": 0.5966, "step": 138300 }, { "epoch": 2.301298226214775, "grad_norm": 4.014478450507154, "learning_rate": 9.492990187551583e-06, "loss": 0.6026, "step": 138400 }, { "epoch": 2.302961019957682, "grad_norm": 5.262606952176788, "learning_rate": 9.491716063663814e-06, "loss": 0.5841, "step": 138500 }, { "epoch": 2.304623813700589, "grad_norm": 3.165110607129799, "learning_rate": 9.490440426562701e-06, "loss": 0.5603, "step": 138600 }, { "epoch": 2.306286607443496, "grad_norm": 5.05527201216723, "learning_rate": 9.489163276677996e-06, "loss": 0.5932, "step": 138700 }, { "epoch": 2.3079494011864035, "grad_norm": 2.3918973197220743, "learning_rate": 9.487884614439955e-06, "loss": 0.5541, "step": 138800 }, { "epoch": 2.3096121949293105, "grad_norm": 2.76817570039615, "learning_rate": 9.486604440279346e-06, "loss": 0.5737, "step": 138900 }, { "epoch": 2.3112749886722175, "grad_norm": 4.781479902704038, "learning_rate": 9.485322754627449e-06, "loss": 0.5849, "step": 139000 }, { "epoch": 2.312937782415125, "grad_norm": 3.4801894131434117, "learning_rate": 9.484039557916051e-06, "loss": 0.5752, "step": 139100 }, { "epoch": 2.314600576158032, "grad_norm": 4.4083866817450925, "learning_rate": 9.482754850577446e-06, "loss": 0.597, "step": 139200 }, { "epoch": 2.316263369900939, "grad_norm": 3.3306738673677234, "learning_rate": 9.48146863304444e-06, "loss": 0.5797, "step": 139300 }, { "epoch": 2.317926163643846, "grad_norm": 2.802548566873447, "learning_rate": 9.480180905750345e-06, "loss": 0.5719, "step": 139400 }, { "epoch": 2.3195889573867534, "grad_norm": 3.885078617956506, "learning_rate": 9.478891669128985e-06, "loss": 0.5645, "step": 139500 }, { "epoch": 2.3212517511296604, "grad_norm": 3.0863960441646197, "learning_rate": 9.477600923614689e-06, "loss": 0.5682, "step": 139600 }, { "epoch": 2.3229145448725674, "grad_norm": 2.8674883844750143, "learning_rate": 9.476308669642298e-06, "loss": 0.5885, "step": 139700 }, { "epoch": 2.324577338615475, "grad_norm": 3.1348047350924175, "learning_rate": 9.475014907647157e-06, "loss": 0.5912, "step": 139800 }, { "epoch": 2.326240132358382, "grad_norm": 3.2937825508612106, "learning_rate": 9.473719638065122e-06, "loss": 0.5583, "step": 139900 }, { "epoch": 2.327902926101289, "grad_norm": 3.1765422803172094, "learning_rate": 9.472422861332559e-06, "loss": 0.6186, "step": 140000 }, { "epoch": 2.3295657198441964, "grad_norm": 3.984555803814265, "learning_rate": 9.471124577886335e-06, "loss": 0.5561, "step": 140100 }, { "epoch": 2.3312285135871034, "grad_norm": 5.040600095215838, "learning_rate": 9.469824788163828e-06, "loss": 0.5567, "step": 140200 }, { "epoch": 2.3328913073300104, "grad_norm": 3.367236983242205, "learning_rate": 9.468523492602928e-06, "loss": 0.5959, "step": 140300 }, { "epoch": 2.334554101072918, "grad_norm": 2.724023363997979, "learning_rate": 9.467220691642025e-06, "loss": 0.5845, "step": 140400 }, { "epoch": 2.336216894815825, "grad_norm": 3.451442569948144, "learning_rate": 9.465916385720021e-06, "loss": 0.5717, "step": 140500 }, { "epoch": 2.337879688558732, "grad_norm": 2.5214385731357383, "learning_rate": 9.464610575276322e-06, "loss": 0.5462, "step": 140600 }, { "epoch": 2.3395424823016393, "grad_norm": 4.180149023964795, "learning_rate": 9.463303260750842e-06, "loss": 0.5926, "step": 140700 }, { "epoch": 2.3412052760445463, "grad_norm": 3.5693439397401194, "learning_rate": 9.461994442584004e-06, "loss": 0.5829, "step": 140800 }, { "epoch": 2.3428680697874533, "grad_norm": 2.950464688365965, "learning_rate": 9.460684121216734e-06, "loss": 0.5937, "step": 140900 }, { "epoch": 2.3445308635303608, "grad_norm": 2.5387674836461005, "learning_rate": 9.459372297090467e-06, "loss": 0.5992, "step": 141000 }, { "epoch": 2.346193657273268, "grad_norm": 4.431861239259893, "learning_rate": 9.458058970647142e-06, "loss": 0.5578, "step": 141100 }, { "epoch": 2.347856451016175, "grad_norm": 3.4372129025559963, "learning_rate": 9.456744142329203e-06, "loss": 0.5819, "step": 141200 }, { "epoch": 2.349519244759082, "grad_norm": 2.4092905758760264, "learning_rate": 9.45542781257961e-06, "loss": 0.589, "step": 141300 }, { "epoch": 2.3511820385019893, "grad_norm": 4.0977563137694695, "learning_rate": 9.454109981841813e-06, "loss": 0.5814, "step": 141400 }, { "epoch": 2.3528448322448963, "grad_norm": 4.26384957109694, "learning_rate": 9.452790650559777e-06, "loss": 0.571, "step": 141500 }, { "epoch": 2.3545076259878033, "grad_norm": 2.462161745997543, "learning_rate": 9.451469819177975e-06, "loss": 0.5735, "step": 141600 }, { "epoch": 2.3561704197307107, "grad_norm": 4.32788429673624, "learning_rate": 9.450147488141379e-06, "loss": 0.5875, "step": 141700 }, { "epoch": 2.3578332134736177, "grad_norm": 4.1875440219218385, "learning_rate": 9.44882365789547e-06, "loss": 0.5868, "step": 141800 }, { "epoch": 2.3594960072165247, "grad_norm": 3.6937168936711444, "learning_rate": 9.447498328886232e-06, "loss": 0.6269, "step": 141900 }, { "epoch": 2.3611588009594318, "grad_norm": 3.1065376482861446, "learning_rate": 9.446171501560155e-06, "loss": 0.601, "step": 142000 }, { "epoch": 2.362821594702339, "grad_norm": 3.1464887177259753, "learning_rate": 9.444843176364236e-06, "loss": 0.564, "step": 142100 }, { "epoch": 2.364484388445246, "grad_norm": 4.163841804569901, "learning_rate": 9.443513353745969e-06, "loss": 0.5971, "step": 142200 }, { "epoch": 2.3661471821881532, "grad_norm": 5.59389792385416, "learning_rate": 9.442182034153363e-06, "loss": 0.5762, "step": 142300 }, { "epoch": 2.3678099759310607, "grad_norm": 4.230020908454227, "learning_rate": 9.44084921803492e-06, "loss": 0.5778, "step": 142400 }, { "epoch": 2.3694727696739677, "grad_norm": 4.430418364787737, "learning_rate": 9.43951490583966e-06, "loss": 0.5842, "step": 142500 }, { "epoch": 2.3711355634168747, "grad_norm": 3.6332314884428776, "learning_rate": 9.438179098017092e-06, "loss": 0.6198, "step": 142600 }, { "epoch": 2.372798357159782, "grad_norm": 4.711287064229402, "learning_rate": 9.436841795017241e-06, "loss": 0.5737, "step": 142700 }, { "epoch": 2.374461150902689, "grad_norm": 3.908359739670058, "learning_rate": 9.43550299729063e-06, "loss": 0.5674, "step": 142800 }, { "epoch": 2.376123944645596, "grad_norm": 4.780817926106279, "learning_rate": 9.434162705288282e-06, "loss": 0.5763, "step": 142900 }, { "epoch": 2.3777867383885036, "grad_norm": 3.994523143561205, "learning_rate": 9.432820919461732e-06, "loss": 0.5685, "step": 143000 }, { "epoch": 2.3794495321314106, "grad_norm": 3.6526534651621265, "learning_rate": 9.431477640263011e-06, "loss": 0.574, "step": 143100 }, { "epoch": 2.3811123258743176, "grad_norm": 2.719399320120872, "learning_rate": 9.43013286814466e-06, "loss": 0.5732, "step": 143200 }, { "epoch": 2.382775119617225, "grad_norm": 3.304707763563539, "learning_rate": 9.428786603559717e-06, "loss": 0.5795, "step": 143300 }, { "epoch": 2.384437913360132, "grad_norm": 3.8303947738799016, "learning_rate": 9.427438846961723e-06, "loss": 0.5938, "step": 143400 }, { "epoch": 2.386100707103039, "grad_norm": 4.42837858211358, "learning_rate": 9.426089598804727e-06, "loss": 0.5769, "step": 143500 }, { "epoch": 2.387763500845946, "grad_norm": 4.310294207322987, "learning_rate": 9.424738859543274e-06, "loss": 0.5764, "step": 143600 }, { "epoch": 2.3894262945888536, "grad_norm": 4.260638069357201, "learning_rate": 9.423386629632416e-06, "loss": 0.5743, "step": 143700 }, { "epoch": 2.3910890883317606, "grad_norm": 3.212715557622254, "learning_rate": 9.422032909527704e-06, "loss": 0.5805, "step": 143800 }, { "epoch": 2.3927518820746676, "grad_norm": 3.6477636397194235, "learning_rate": 9.420677699685193e-06, "loss": 0.6092, "step": 143900 }, { "epoch": 2.394414675817575, "grad_norm": 2.4821428653931545, "learning_rate": 9.419321000561441e-06, "loss": 0.5805, "step": 144000 }, { "epoch": 2.396077469560482, "grad_norm": 5.35483778564145, "learning_rate": 9.417962812613504e-06, "loss": 0.5952, "step": 144100 }, { "epoch": 2.397740263303389, "grad_norm": 3.6361358954295637, "learning_rate": 9.416603136298942e-06, "loss": 0.5723, "step": 144200 }, { "epoch": 2.3994030570462965, "grad_norm": 4.2906306836434975, "learning_rate": 9.415241972075817e-06, "loss": 0.5706, "step": 144300 }, { "epoch": 2.4010658507892035, "grad_norm": 2.9522552185566564, "learning_rate": 9.413879320402691e-06, "loss": 0.5992, "step": 144400 }, { "epoch": 2.4027286445321105, "grad_norm": 2.9942457825527256, "learning_rate": 9.412515181738628e-06, "loss": 0.5815, "step": 144500 }, { "epoch": 2.4043914382750176, "grad_norm": 4.939157826972667, "learning_rate": 9.41114955654319e-06, "loss": 0.5457, "step": 144600 }, { "epoch": 2.406054232017925, "grad_norm": 2.6589325197623785, "learning_rate": 9.409782445276445e-06, "loss": 0.6084, "step": 144700 }, { "epoch": 2.407717025760832, "grad_norm": 4.2808799168855955, "learning_rate": 9.408413848398959e-06, "loss": 0.6148, "step": 144800 }, { "epoch": 2.409379819503739, "grad_norm": 3.6799438993781455, "learning_rate": 9.407043766371795e-06, "loss": 0.5823, "step": 144900 }, { "epoch": 2.4110426132466465, "grad_norm": 3.373934081702259, "learning_rate": 9.405672199656522e-06, "loss": 0.5863, "step": 145000 }, { "epoch": 2.4127054069895535, "grad_norm": 3.044594068669669, "learning_rate": 9.404299148715207e-06, "loss": 0.6018, "step": 145100 }, { "epoch": 2.4143682007324605, "grad_norm": 3.2186310129280105, "learning_rate": 9.402924614010418e-06, "loss": 0.6064, "step": 145200 }, { "epoch": 2.416030994475368, "grad_norm": 3.6891022381790157, "learning_rate": 9.401548596005219e-06, "loss": 0.5783, "step": 145300 }, { "epoch": 2.417693788218275, "grad_norm": 3.1078407339856042, "learning_rate": 9.400171095163177e-06, "loss": 0.5734, "step": 145400 }, { "epoch": 2.419356581961182, "grad_norm": 3.409055524585729, "learning_rate": 9.39879211194836e-06, "loss": 0.5896, "step": 145500 }, { "epoch": 2.4210193757040894, "grad_norm": 3.8705894033582493, "learning_rate": 9.397411646825332e-06, "loss": 0.5779, "step": 145600 }, { "epoch": 2.4226821694469964, "grad_norm": 3.470155339975382, "learning_rate": 9.396029700259157e-06, "loss": 0.556, "step": 145700 }, { "epoch": 2.4243449631899034, "grad_norm": 4.105112753740265, "learning_rate": 9.3946462727154e-06, "loss": 0.5757, "step": 145800 }, { "epoch": 2.426007756932811, "grad_norm": 3.91399704055092, "learning_rate": 9.393261364660122e-06, "loss": 0.5684, "step": 145900 }, { "epoch": 2.427670550675718, "grad_norm": 3.1913790051731334, "learning_rate": 9.391874976559885e-06, "loss": 0.6302, "step": 146000 }, { "epoch": 2.429333344418625, "grad_norm": 3.9122262256579177, "learning_rate": 9.390487108881752e-06, "loss": 0.5859, "step": 146100 }, { "epoch": 2.430996138161532, "grad_norm": 3.7503671926503745, "learning_rate": 9.389097762093276e-06, "loss": 0.6024, "step": 146200 }, { "epoch": 2.4326589319044394, "grad_norm": 4.5489789812451376, "learning_rate": 9.38770693666252e-06, "loss": 0.5525, "step": 146300 }, { "epoch": 2.4343217256473464, "grad_norm": 3.3734944493057886, "learning_rate": 9.38631463305803e-06, "loss": 0.5972, "step": 146400 }, { "epoch": 2.4359845193902534, "grad_norm": 3.3089972813172253, "learning_rate": 9.38492085174887e-06, "loss": 0.6334, "step": 146500 }, { "epoch": 2.437647313133161, "grad_norm": 3.9806490112678588, "learning_rate": 9.383525593204584e-06, "loss": 0.6167, "step": 146600 }, { "epoch": 2.439310106876068, "grad_norm": 4.018327137487482, "learning_rate": 9.38212885789522e-06, "loss": 0.5818, "step": 146700 }, { "epoch": 2.440972900618975, "grad_norm": 3.2813736599634775, "learning_rate": 9.380730646291325e-06, "loss": 0.5778, "step": 146800 }, { "epoch": 2.442635694361882, "grad_norm": 4.080223129162738, "learning_rate": 9.379330958863943e-06, "loss": 0.5595, "step": 146900 }, { "epoch": 2.4442984881047893, "grad_norm": 3.4437546892017386, "learning_rate": 9.377929796084615e-06, "loss": 0.611, "step": 147000 }, { "epoch": 2.4459612818476963, "grad_norm": 4.108154168932564, "learning_rate": 9.376527158425378e-06, "loss": 0.578, "step": 147100 }, { "epoch": 2.4476240755906034, "grad_norm": 4.8148225228776935, "learning_rate": 9.375123046358764e-06, "loss": 0.5776, "step": 147200 }, { "epoch": 2.449286869333511, "grad_norm": 4.352947408358977, "learning_rate": 9.373717460357807e-06, "loss": 0.6093, "step": 147300 }, { "epoch": 2.450949663076418, "grad_norm": 3.3973117787198484, "learning_rate": 9.372310400896034e-06, "loss": 0.5784, "step": 147400 }, { "epoch": 2.452612456819325, "grad_norm": 3.2917404151621876, "learning_rate": 9.370901868447468e-06, "loss": 0.5819, "step": 147500 }, { "epoch": 2.4542752505622323, "grad_norm": 4.28623669996688, "learning_rate": 9.369491863486631e-06, "loss": 0.6067, "step": 147600 }, { "epoch": 2.4559380443051393, "grad_norm": 2.904114267110331, "learning_rate": 9.368080386488536e-06, "loss": 0.55, "step": 147700 }, { "epoch": 2.4576008380480463, "grad_norm": 3.186799106602535, "learning_rate": 9.366667437928698e-06, "loss": 0.5656, "step": 147800 }, { "epoch": 2.4592636317909538, "grad_norm": 3.43845039047919, "learning_rate": 9.365253018283123e-06, "loss": 0.5834, "step": 147900 }, { "epoch": 2.4609264255338608, "grad_norm": 2.742776520678141, "learning_rate": 9.363837128028318e-06, "loss": 0.5657, "step": 148000 }, { "epoch": 2.4625892192767678, "grad_norm": 3.6931878823813484, "learning_rate": 9.362419767641278e-06, "loss": 0.5853, "step": 148100 }, { "epoch": 2.4642520130196752, "grad_norm": 3.0561648290661663, "learning_rate": 9.3610009375995e-06, "loss": 0.5866, "step": 148200 }, { "epoch": 2.4659148067625822, "grad_norm": 4.176889820842016, "learning_rate": 9.359580638380971e-06, "loss": 0.5662, "step": 148300 }, { "epoch": 2.4675776005054892, "grad_norm": 3.081465490763675, "learning_rate": 9.358158870464177e-06, "loss": 0.5634, "step": 148400 }, { "epoch": 2.4692403942483963, "grad_norm": 3.701846161834131, "learning_rate": 9.356735634328097e-06, "loss": 0.5747, "step": 148500 }, { "epoch": 2.4709031879913037, "grad_norm": 2.2534024555842516, "learning_rate": 9.355310930452207e-06, "loss": 0.5661, "step": 148600 }, { "epoch": 2.4725659817342107, "grad_norm": 2.74471755565665, "learning_rate": 9.35388475931647e-06, "loss": 0.5765, "step": 148700 }, { "epoch": 2.4742287754771177, "grad_norm": 3.994312121621282, "learning_rate": 9.352457121401351e-06, "loss": 0.5839, "step": 148800 }, { "epoch": 2.475891569220025, "grad_norm": 2.384371291472118, "learning_rate": 9.351028017187807e-06, "loss": 0.5911, "step": 148900 }, { "epoch": 2.477554362962932, "grad_norm": 3.1230969808543434, "learning_rate": 9.34959744715729e-06, "loss": 0.5694, "step": 149000 }, { "epoch": 2.479217156705839, "grad_norm": 3.947123395566001, "learning_rate": 9.348165411791743e-06, "loss": 0.5746, "step": 149100 }, { "epoch": 2.4808799504487467, "grad_norm": 3.209218504526132, "learning_rate": 9.346731911573601e-06, "loss": 0.56, "step": 149200 }, { "epoch": 2.4825427441916537, "grad_norm": 3.6421302161650293, "learning_rate": 9.345296946985802e-06, "loss": 0.5997, "step": 149300 }, { "epoch": 2.4842055379345607, "grad_norm": 3.5367366987484967, "learning_rate": 9.343860518511767e-06, "loss": 0.5418, "step": 149400 }, { "epoch": 2.4858683316774677, "grad_norm": 2.8856821206460035, "learning_rate": 9.342422626635414e-06, "loss": 0.6169, "step": 149500 }, { "epoch": 2.487531125420375, "grad_norm": 3.1313292051387482, "learning_rate": 9.340983271841155e-06, "loss": 0.5928, "step": 149600 }, { "epoch": 2.489193919163282, "grad_norm": 3.179087480221559, "learning_rate": 9.339542454613895e-06, "loss": 0.5754, "step": 149700 }, { "epoch": 2.490856712906189, "grad_norm": 3.5176825051529175, "learning_rate": 9.338100175439029e-06, "loss": 0.5773, "step": 149800 }, { "epoch": 2.4925195066490966, "grad_norm": 3.2929189110302186, "learning_rate": 9.336656434802447e-06, "loss": 0.6069, "step": 149900 }, { "epoch": 2.4941823003920036, "grad_norm": 3.857493102333727, "learning_rate": 9.335211233190529e-06, "loss": 0.6103, "step": 150000 }, { "epoch": 2.4958450941349106, "grad_norm": 3.206433043429714, "learning_rate": 9.333764571090149e-06, "loss": 0.5898, "step": 150100 }, { "epoch": 2.497507887877818, "grad_norm": 4.787693797984458, "learning_rate": 9.332316448988675e-06, "loss": 0.5913, "step": 150200 }, { "epoch": 2.499170681620725, "grad_norm": 2.8034971997549265, "learning_rate": 9.330866867373962e-06, "loss": 0.58, "step": 150300 }, { "epoch": 2.500833475363632, "grad_norm": 3.322460747711808, "learning_rate": 9.329415826734362e-06, "loss": 0.5949, "step": 150400 }, { "epoch": 2.5024962691065396, "grad_norm": 2.802755492028114, "learning_rate": 9.327963327558713e-06, "loss": 0.5815, "step": 150500 }, { "epoch": 2.5041590628494466, "grad_norm": 4.0028039349732, "learning_rate": 9.32650937033635e-06, "loss": 0.6015, "step": 150600 }, { "epoch": 2.5058218565923536, "grad_norm": 3.9291006498243655, "learning_rate": 9.325053955557093e-06, "loss": 0.5983, "step": 150700 }, { "epoch": 2.507484650335261, "grad_norm": 4.424547372937454, "learning_rate": 9.323597083711258e-06, "loss": 0.5975, "step": 150800 }, { "epoch": 2.509147444078168, "grad_norm": 5.534061582729553, "learning_rate": 9.32213875528965e-06, "loss": 0.6026, "step": 150900 }, { "epoch": 2.510810237821075, "grad_norm": 2.635699308076873, "learning_rate": 9.320678970783565e-06, "loss": 0.5815, "step": 151000 }, { "epoch": 2.5124730315639825, "grad_norm": 2.939451355374091, "learning_rate": 9.319217730684792e-06, "loss": 0.5814, "step": 151100 }, { "epoch": 2.5141358253068895, "grad_norm": 3.7848558052793586, "learning_rate": 9.317755035485604e-06, "loss": 0.5685, "step": 151200 }, { "epoch": 2.5157986190497965, "grad_norm": 3.6588753383284396, "learning_rate": 9.31629088567877e-06, "loss": 0.6108, "step": 151300 }, { "epoch": 2.5174614127927035, "grad_norm": 3.028179282811651, "learning_rate": 9.314825281757547e-06, "loss": 0.571, "step": 151400 }, { "epoch": 2.5191242065356105, "grad_norm": 2.746399551622778, "learning_rate": 9.313358224215682e-06, "loss": 0.5619, "step": 151500 }, { "epoch": 2.520787000278518, "grad_norm": 5.780828554476531, "learning_rate": 9.311889713547414e-06, "loss": 0.5732, "step": 151600 }, { "epoch": 2.522449794021425, "grad_norm": 2.9881559663942934, "learning_rate": 9.310419750247463e-06, "loss": 0.5868, "step": 151700 }, { "epoch": 2.524112587764332, "grad_norm": 2.9724086075683585, "learning_rate": 9.308948334811053e-06, "loss": 0.5727, "step": 151800 }, { "epoch": 2.5257753815072395, "grad_norm": 3.683408338473065, "learning_rate": 9.307475467733879e-06, "loss": 0.5993, "step": 151900 }, { "epoch": 2.5274381752501465, "grad_norm": 4.109036748654433, "learning_rate": 9.306001149512145e-06, "loss": 0.5708, "step": 152000 }, { "epoch": 2.5291009689930535, "grad_norm": 6.039196641207457, "learning_rate": 9.304525380642528e-06, "loss": 0.5753, "step": 152100 }, { "epoch": 2.530763762735961, "grad_norm": 3.3925114788152237, "learning_rate": 9.3030481616222e-06, "loss": 0.6046, "step": 152200 }, { "epoch": 2.532426556478868, "grad_norm": 3.680048780660819, "learning_rate": 9.301569492948822e-06, "loss": 0.5521, "step": 152300 }, { "epoch": 2.534089350221775, "grad_norm": 3.0021431406930033, "learning_rate": 9.300089375120542e-06, "loss": 0.5919, "step": 152400 }, { "epoch": 2.5357521439646824, "grad_norm": 4.299932200698165, "learning_rate": 9.298607808635995e-06, "loss": 0.5586, "step": 152500 }, { "epoch": 2.5374149377075894, "grad_norm": 3.6567380967477248, "learning_rate": 9.29712479399431e-06, "loss": 0.5943, "step": 152600 }, { "epoch": 2.5390777314504964, "grad_norm": 3.3269883874473987, "learning_rate": 9.295640331695094e-06, "loss": 0.595, "step": 152700 }, { "epoch": 2.540740525193404, "grad_norm": 4.102927873492433, "learning_rate": 9.294154422238452e-06, "loss": 0.5862, "step": 152800 }, { "epoch": 2.542403318936311, "grad_norm": 3.973521412524965, "learning_rate": 9.292667066124972e-06, "loss": 0.5678, "step": 152900 }, { "epoch": 2.544066112679218, "grad_norm": 3.3057839731162946, "learning_rate": 9.291178263855723e-06, "loss": 0.5544, "step": 153000 }, { "epoch": 2.5457289064221253, "grad_norm": 3.098534823582393, "learning_rate": 9.289688015932272e-06, "loss": 0.5711, "step": 153100 }, { "epoch": 2.5473917001650324, "grad_norm": 3.425264666677207, "learning_rate": 9.288196322856668e-06, "loss": 0.6089, "step": 153200 }, { "epoch": 2.5490544939079394, "grad_norm": 3.025214787143506, "learning_rate": 9.286703185131448e-06, "loss": 0.5874, "step": 153300 }, { "epoch": 2.550717287650847, "grad_norm": 3.5784936760764374, "learning_rate": 9.285208603259632e-06, "loss": 0.5667, "step": 153400 }, { "epoch": 2.552380081393754, "grad_norm": 3.426211875177637, "learning_rate": 9.283712577744732e-06, "loss": 0.5685, "step": 153500 }, { "epoch": 2.554042875136661, "grad_norm": 3.3300978098683207, "learning_rate": 9.282215109090742e-06, "loss": 0.5948, "step": 153600 }, { "epoch": 2.555705668879568, "grad_norm": 3.343520088617618, "learning_rate": 9.280716197802144e-06, "loss": 0.5805, "step": 153700 }, { "epoch": 2.5573684626224753, "grad_norm": 2.94401614856652, "learning_rate": 9.279215844383908e-06, "loss": 0.5844, "step": 153800 }, { "epoch": 2.5590312563653823, "grad_norm": 6.340770992482069, "learning_rate": 9.277714049341486e-06, "loss": 0.5888, "step": 153900 }, { "epoch": 2.5606940501082893, "grad_norm": 2.7205399544308406, "learning_rate": 9.276210813180817e-06, "loss": 0.6025, "step": 154000 }, { "epoch": 2.5623568438511963, "grad_norm": 3.9240888783652377, "learning_rate": 9.274706136408326e-06, "loss": 0.5746, "step": 154100 }, { "epoch": 2.564019637594104, "grad_norm": 3.222810625776081, "learning_rate": 9.273200019530928e-06, "loss": 0.6319, "step": 154200 }, { "epoch": 2.565682431337011, "grad_norm": 3.494913355295289, "learning_rate": 9.27169246305601e-06, "loss": 0.5602, "step": 154300 }, { "epoch": 2.567345225079918, "grad_norm": 3.4377053444429806, "learning_rate": 9.270183467491458e-06, "loss": 0.5915, "step": 154400 }, { "epoch": 2.5690080188228253, "grad_norm": 2.322632869982996, "learning_rate": 9.268673033345637e-06, "loss": 0.5918, "step": 154500 }, { "epoch": 2.5706708125657323, "grad_norm": 4.069896205427323, "learning_rate": 9.267161161127396e-06, "loss": 0.5844, "step": 154600 }, { "epoch": 2.5723336063086393, "grad_norm": 4.0085064197059594, "learning_rate": 9.265647851346068e-06, "loss": 0.5939, "step": 154700 }, { "epoch": 2.5739964000515467, "grad_norm": 3.290951307170892, "learning_rate": 9.264133104511474e-06, "loss": 0.5948, "step": 154800 }, { "epoch": 2.5756591937944537, "grad_norm": 2.223862735724408, "learning_rate": 9.262616921133914e-06, "loss": 0.5902, "step": 154900 }, { "epoch": 2.5773219875373607, "grad_norm": 3.6116722180491223, "learning_rate": 9.261099301724177e-06, "loss": 0.5861, "step": 155000 }, { "epoch": 2.578984781280268, "grad_norm": 3.222588676365531, "learning_rate": 9.259580246793534e-06, "loss": 0.5847, "step": 155100 }, { "epoch": 2.580647575023175, "grad_norm": 3.9885805653111372, "learning_rate": 9.258059756853737e-06, "loss": 0.5814, "step": 155200 }, { "epoch": 2.582310368766082, "grad_norm": 2.5487746807178473, "learning_rate": 9.256537832417024e-06, "loss": 0.5663, "step": 155300 }, { "epoch": 2.5839731625089897, "grad_norm": 3.70303492614333, "learning_rate": 9.255014473996117e-06, "loss": 0.6184, "step": 155400 }, { "epoch": 2.5856359562518967, "grad_norm": 3.569794056780094, "learning_rate": 9.25348968210422e-06, "loss": 0.5992, "step": 155500 }, { "epoch": 2.5872987499948037, "grad_norm": 3.29388494783942, "learning_rate": 9.251963457255018e-06, "loss": 0.5786, "step": 155600 }, { "epoch": 2.588961543737711, "grad_norm": 3.291948641708227, "learning_rate": 9.250435799962682e-06, "loss": 0.5691, "step": 155700 }, { "epoch": 2.590624337480618, "grad_norm": 3.4837830044568925, "learning_rate": 9.248906710741863e-06, "loss": 0.6143, "step": 155800 }, { "epoch": 2.592287131223525, "grad_norm": 4.464884791630766, "learning_rate": 9.247376190107697e-06, "loss": 0.5817, "step": 155900 }, { "epoch": 2.5939499249664326, "grad_norm": 5.057482082941682, "learning_rate": 9.2458442385758e-06, "loss": 0.5925, "step": 156000 }, { "epoch": 2.5956127187093396, "grad_norm": 4.109979343457313, "learning_rate": 9.244310856662273e-06, "loss": 0.5929, "step": 156100 }, { "epoch": 2.5972755124522466, "grad_norm": 4.2509610214025075, "learning_rate": 9.242776044883693e-06, "loss": 0.5934, "step": 156200 }, { "epoch": 2.5989383061951536, "grad_norm": 2.496263045268177, "learning_rate": 9.241239803757125e-06, "loss": 0.6173, "step": 156300 }, { "epoch": 2.6006010999380607, "grad_norm": 3.410114771084597, "learning_rate": 9.239702133800112e-06, "loss": 0.5972, "step": 156400 }, { "epoch": 2.602263893680968, "grad_norm": 3.2504554801178767, "learning_rate": 9.238163035530679e-06, "loss": 0.5989, "step": 156500 }, { "epoch": 2.603926687423875, "grad_norm": 3.326418922798411, "learning_rate": 9.236622509467334e-06, "loss": 0.6053, "step": 156600 }, { "epoch": 2.605589481166782, "grad_norm": 2.656426387743378, "learning_rate": 9.235080556129062e-06, "loss": 0.5466, "step": 156700 }, { "epoch": 2.6072522749096896, "grad_norm": 3.4282962402504746, "learning_rate": 9.233537176035334e-06, "loss": 0.5884, "step": 156800 }, { "epoch": 2.6089150686525966, "grad_norm": 4.830549944435231, "learning_rate": 9.231992369706097e-06, "loss": 0.5853, "step": 156900 }, { "epoch": 2.6105778623955036, "grad_norm": 2.863047857835896, "learning_rate": 9.230446137661783e-06, "loss": 0.5587, "step": 157000 }, { "epoch": 2.612240656138411, "grad_norm": 2.802119160236031, "learning_rate": 9.2288984804233e-06, "loss": 0.5999, "step": 157100 }, { "epoch": 2.613903449881318, "grad_norm": 2.9638858691242076, "learning_rate": 9.227349398512037e-06, "loss": 0.5749, "step": 157200 }, { "epoch": 2.615566243624225, "grad_norm": 3.539555844227561, "learning_rate": 9.225798892449866e-06, "loss": 0.5599, "step": 157300 }, { "epoch": 2.6172290373671325, "grad_norm": 3.705604787728183, "learning_rate": 9.224246962759135e-06, "loss": 0.6054, "step": 157400 }, { "epoch": 2.6188918311100395, "grad_norm": 3.4677354005150405, "learning_rate": 9.222693609962676e-06, "loss": 0.5725, "step": 157500 }, { "epoch": 2.6205546248529465, "grad_norm": 3.599358197255506, "learning_rate": 9.221138834583795e-06, "loss": 0.5954, "step": 157600 }, { "epoch": 2.622217418595854, "grad_norm": 2.9566167345729313, "learning_rate": 9.21958263714628e-06, "loss": 0.5536, "step": 157700 }, { "epoch": 2.623880212338761, "grad_norm": 3.7239229086287833, "learning_rate": 9.218025018174399e-06, "loss": 0.6089, "step": 157800 }, { "epoch": 2.625543006081668, "grad_norm": 2.800454583739947, "learning_rate": 9.216465978192899e-06, "loss": 0.5887, "step": 157900 }, { "epoch": 2.6272057998245755, "grad_norm": 2.6353771147117238, "learning_rate": 9.214905517727003e-06, "loss": 0.5963, "step": 158000 }, { "epoch": 2.6288685935674825, "grad_norm": 2.9594502850052766, "learning_rate": 9.213343637302412e-06, "loss": 0.62, "step": 158100 }, { "epoch": 2.6305313873103895, "grad_norm": 3.170459425183372, "learning_rate": 9.211780337445313e-06, "loss": 0.5844, "step": 158200 }, { "epoch": 2.632194181053297, "grad_norm": 5.944736372377693, "learning_rate": 9.210215618682363e-06, "loss": 0.5713, "step": 158300 }, { "epoch": 2.633856974796204, "grad_norm": 3.2272618847589927, "learning_rate": 9.2086494815407e-06, "loss": 0.5884, "step": 158400 }, { "epoch": 2.635519768539111, "grad_norm": 3.0627148669924527, "learning_rate": 9.207081926547936e-06, "loss": 0.5996, "step": 158500 }, { "epoch": 2.637182562282018, "grad_norm": 3.9988072146103666, "learning_rate": 9.205512954232171e-06, "loss": 0.5507, "step": 158600 }, { "epoch": 2.6388453560249254, "grad_norm": 3.93148283884591, "learning_rate": 9.203942565121973e-06, "loss": 0.5801, "step": 158700 }, { "epoch": 2.6405081497678324, "grad_norm": 2.749354489130315, "learning_rate": 9.202370759746388e-06, "loss": 0.5998, "step": 158800 }, { "epoch": 2.6421709435107394, "grad_norm": 3.725675623806561, "learning_rate": 9.200797538634943e-06, "loss": 0.5948, "step": 158900 }, { "epoch": 2.6438337372536465, "grad_norm": 3.6056491334829834, "learning_rate": 9.199222902317641e-06, "loss": 0.5879, "step": 159000 }, { "epoch": 2.645496530996554, "grad_norm": 2.3558294579816903, "learning_rate": 9.197646851324959e-06, "loss": 0.5691, "step": 159100 }, { "epoch": 2.647159324739461, "grad_norm": 3.61222233558821, "learning_rate": 9.196069386187854e-06, "loss": 0.5833, "step": 159200 }, { "epoch": 2.648822118482368, "grad_norm": 3.49883213839362, "learning_rate": 9.194490507437756e-06, "loss": 0.6047, "step": 159300 }, { "epoch": 2.6504849122252754, "grad_norm": 3.104782672588254, "learning_rate": 9.192910215606576e-06, "loss": 0.6013, "step": 159400 }, { "epoch": 2.6521477059681824, "grad_norm": 3.1990183055831887, "learning_rate": 9.191328511226696e-06, "loss": 0.5747, "step": 159500 }, { "epoch": 2.6538104997110894, "grad_norm": 2.818550788791299, "learning_rate": 9.189745394830977e-06, "loss": 0.5896, "step": 159600 }, { "epoch": 2.655473293453997, "grad_norm": 3.8081186896815167, "learning_rate": 9.188160866952753e-06, "loss": 0.6007, "step": 159700 }, { "epoch": 2.657136087196904, "grad_norm": 3.2359733438209024, "learning_rate": 9.18657492812584e-06, "loss": 0.5915, "step": 159800 }, { "epoch": 2.658798880939811, "grad_norm": 2.446295125835061, "learning_rate": 9.184987578884519e-06, "loss": 0.5702, "step": 159900 }, { "epoch": 2.6604616746827183, "grad_norm": 3.8679337461635455, "learning_rate": 9.183398819763554e-06, "loss": 0.5844, "step": 160000 }, { "epoch": 2.6621244684256253, "grad_norm": 2.574683132752975, "learning_rate": 9.181808651298183e-06, "loss": 0.5645, "step": 160100 }, { "epoch": 2.6637872621685323, "grad_norm": 3.657161180980428, "learning_rate": 9.180217074024116e-06, "loss": 0.5958, "step": 160200 }, { "epoch": 2.66545005591144, "grad_norm": 2.926903169592543, "learning_rate": 9.178624088477541e-06, "loss": 0.5935, "step": 160300 }, { "epoch": 2.667112849654347, "grad_norm": 4.946176905038814, "learning_rate": 9.177029695195115e-06, "loss": 0.5855, "step": 160400 }, { "epoch": 2.668775643397254, "grad_norm": 5.689723671880836, "learning_rate": 9.175433894713976e-06, "loss": 0.5989, "step": 160500 }, { "epoch": 2.6704384371401613, "grad_norm": 2.9129421024514115, "learning_rate": 9.173836687571733e-06, "loss": 0.5944, "step": 160600 }, { "epoch": 2.6721012308830683, "grad_norm": 3.926244267330042, "learning_rate": 9.172238074306467e-06, "loss": 0.5654, "step": 160700 }, { "epoch": 2.6737640246259753, "grad_norm": 3.3544259400691523, "learning_rate": 9.170638055456735e-06, "loss": 0.5854, "step": 160800 }, { "epoch": 2.6754268183688827, "grad_norm": 2.9778923294680046, "learning_rate": 9.169036631561567e-06, "loss": 0.5737, "step": 160900 }, { "epoch": 2.6770896121117898, "grad_norm": 3.5173900202364643, "learning_rate": 9.167433803160467e-06, "loss": 0.5984, "step": 161000 }, { "epoch": 2.6787524058546968, "grad_norm": 4.104455931853696, "learning_rate": 9.165829570793413e-06, "loss": 0.5745, "step": 161100 }, { "epoch": 2.6804151995976038, "grad_norm": 3.2378710348590234, "learning_rate": 9.164223935000852e-06, "loss": 0.5721, "step": 161200 }, { "epoch": 2.6820779933405112, "grad_norm": 3.58304846612646, "learning_rate": 9.162616896323707e-06, "loss": 0.5822, "step": 161300 }, { "epoch": 2.6837407870834182, "grad_norm": 4.1458215701128065, "learning_rate": 9.161008455303372e-06, "loss": 0.5855, "step": 161400 }, { "epoch": 2.6854035808263252, "grad_norm": 2.6907336827841903, "learning_rate": 9.159398612481716e-06, "loss": 0.6272, "step": 161500 }, { "epoch": 2.6870663745692323, "grad_norm": 2.936489849108127, "learning_rate": 9.157787368401078e-06, "loss": 0.5959, "step": 161600 }, { "epoch": 2.6887291683121397, "grad_norm": 3.47279509111582, "learning_rate": 9.15617472360427e-06, "loss": 0.5835, "step": 161700 }, { "epoch": 2.6903919620550467, "grad_norm": 2.8198617595414457, "learning_rate": 9.154560678634574e-06, "loss": 0.5819, "step": 161800 }, { "epoch": 2.6920547557979537, "grad_norm": 3.170723666969279, "learning_rate": 9.152945234035749e-06, "loss": 0.611, "step": 161900 }, { "epoch": 2.693717549540861, "grad_norm": 4.03680196468221, "learning_rate": 9.151328390352018e-06, "loss": 0.5838, "step": 162000 }, { "epoch": 2.695380343283768, "grad_norm": 3.346111077386709, "learning_rate": 9.149710148128082e-06, "loss": 0.5972, "step": 162100 }, { "epoch": 2.697043137026675, "grad_norm": 2.49637652041409, "learning_rate": 9.148090507909106e-06, "loss": 0.5676, "step": 162200 }, { "epoch": 2.6987059307695827, "grad_norm": 3.4524738354028788, "learning_rate": 9.146469470240735e-06, "loss": 0.5997, "step": 162300 }, { "epoch": 2.7003687245124897, "grad_norm": 2.922691998578699, "learning_rate": 9.144847035669079e-06, "loss": 0.6105, "step": 162400 }, { "epoch": 2.7020315182553967, "grad_norm": 2.9895704542744874, "learning_rate": 9.143223204740718e-06, "loss": 0.5782, "step": 162500 }, { "epoch": 2.703694311998304, "grad_norm": 3.2151290531999517, "learning_rate": 9.141597978002705e-06, "loss": 0.5662, "step": 162600 }, { "epoch": 2.705357105741211, "grad_norm": 2.933586540923926, "learning_rate": 9.139971356002564e-06, "loss": 0.572, "step": 162700 }, { "epoch": 2.707019899484118, "grad_norm": 3.6103776829150935, "learning_rate": 9.138343339288284e-06, "loss": 0.5974, "step": 162800 }, { "epoch": 2.7086826932270256, "grad_norm": 2.5414583239083455, "learning_rate": 9.13671392840833e-06, "loss": 0.5796, "step": 162900 }, { "epoch": 2.7103454869699326, "grad_norm": 4.2307886103525005, "learning_rate": 9.135083123911632e-06, "loss": 0.5803, "step": 163000 }, { "epoch": 2.7120082807128396, "grad_norm": 2.861366287206757, "learning_rate": 9.133450926347591e-06, "loss": 0.5813, "step": 163100 }, { "epoch": 2.713671074455747, "grad_norm": 2.949573373005335, "learning_rate": 9.13181733626608e-06, "loss": 0.6062, "step": 163200 }, { "epoch": 2.715333868198654, "grad_norm": 2.2731833147898532, "learning_rate": 9.130182354217439e-06, "loss": 0.5809, "step": 163300 }, { "epoch": 2.716996661941561, "grad_norm": 4.412475640259958, "learning_rate": 9.128545980752474e-06, "loss": 0.5836, "step": 163400 }, { "epoch": 2.718659455684468, "grad_norm": 3.402529774285965, "learning_rate": 9.126908216422465e-06, "loss": 0.6263, "step": 163500 }, { "epoch": 2.7203222494273755, "grad_norm": 3.6403895038358174, "learning_rate": 9.125269061779157e-06, "loss": 0.5963, "step": 163600 }, { "epoch": 2.7219850431702826, "grad_norm": 2.5868625521862425, "learning_rate": 9.123628517374765e-06, "loss": 0.5845, "step": 163700 }, { "epoch": 2.7236478369131896, "grad_norm": 3.315887858515708, "learning_rate": 9.12198658376197e-06, "loss": 0.5961, "step": 163800 }, { "epoch": 2.7253106306560966, "grad_norm": 3.3032470017042965, "learning_rate": 9.120343261493925e-06, "loss": 0.5826, "step": 163900 }, { "epoch": 2.726973424399004, "grad_norm": 3.306829924688429, "learning_rate": 9.11869855112425e-06, "loss": 0.5927, "step": 164000 }, { "epoch": 2.728636218141911, "grad_norm": 3.0937884916866003, "learning_rate": 9.117052453207025e-06, "loss": 0.5542, "step": 164100 }, { "epoch": 2.730299011884818, "grad_norm": 3.3732236713686152, "learning_rate": 9.11540496829681e-06, "loss": 0.5812, "step": 164200 }, { "epoch": 2.7319618056277255, "grad_norm": 3.469111017429254, "learning_rate": 9.11375609694862e-06, "loss": 0.5959, "step": 164300 }, { "epoch": 2.7336245993706325, "grad_norm": 2.8234350257183722, "learning_rate": 9.11210583971795e-06, "loss": 0.5881, "step": 164400 }, { "epoch": 2.7352873931135395, "grad_norm": 3.1162032900233423, "learning_rate": 9.110454197160748e-06, "loss": 0.5523, "step": 164500 }, { "epoch": 2.736950186856447, "grad_norm": 3.5412524593073678, "learning_rate": 9.108801169833441e-06, "loss": 0.5547, "step": 164600 }, { "epoch": 2.738612980599354, "grad_norm": 4.545310038102694, "learning_rate": 9.107146758292912e-06, "loss": 0.589, "step": 164700 }, { "epoch": 2.740275774342261, "grad_norm": 4.546170499884451, "learning_rate": 9.105490963096521e-06, "loss": 0.6019, "step": 164800 }, { "epoch": 2.7419385680851684, "grad_norm": 2.8911505514765103, "learning_rate": 9.103833784802083e-06, "loss": 0.5744, "step": 164900 }, { "epoch": 2.7436013618280755, "grad_norm": 3.140214568412663, "learning_rate": 9.102175223967889e-06, "loss": 0.5722, "step": 165000 }, { "epoch": 2.7452641555709825, "grad_norm": 4.171955096819655, "learning_rate": 9.100515281152686e-06, "loss": 0.601, "step": 165100 }, { "epoch": 2.74692694931389, "grad_norm": 3.034522352572949, "learning_rate": 9.098853956915699e-06, "loss": 0.555, "step": 165200 }, { "epoch": 2.748589743056797, "grad_norm": 3.649823017589684, "learning_rate": 9.097191251816604e-06, "loss": 0.5736, "step": 165300 }, { "epoch": 2.750252536799704, "grad_norm": 3.8026043267723812, "learning_rate": 9.095527166415554e-06, "loss": 0.6397, "step": 165400 }, { "epoch": 2.7519153305426114, "grad_norm": 3.161478257735579, "learning_rate": 9.09386170127316e-06, "loss": 0.5617, "step": 165500 }, { "epoch": 2.7535781242855184, "grad_norm": 4.087411360097381, "learning_rate": 9.092194856950503e-06, "loss": 0.5822, "step": 165600 }, { "epoch": 2.7552409180284254, "grad_norm": 3.0103723470938286, "learning_rate": 9.090526634009123e-06, "loss": 0.6157, "step": 165700 }, { "epoch": 2.756903711771333, "grad_norm": 3.3842559235997127, "learning_rate": 9.08885703301103e-06, "loss": 0.5722, "step": 165800 }, { "epoch": 2.75856650551424, "grad_norm": 3.111961462196214, "learning_rate": 9.087186054518691e-06, "loss": 0.5633, "step": 165900 }, { "epoch": 2.760229299257147, "grad_norm": 3.353163157828044, "learning_rate": 9.085513699095047e-06, "loss": 0.5822, "step": 166000 }, { "epoch": 2.761892093000054, "grad_norm": 3.6842048972869814, "learning_rate": 9.083839967303494e-06, "loss": 0.5789, "step": 166100 }, { "epoch": 2.7635548867429613, "grad_norm": 4.450400196352232, "learning_rate": 9.082164859707897e-06, "loss": 0.5788, "step": 166200 }, { "epoch": 2.7652176804858684, "grad_norm": 3.697627064552897, "learning_rate": 9.08048837687258e-06, "loss": 0.5999, "step": 166300 }, { "epoch": 2.7668804742287754, "grad_norm": 5.914602968035506, "learning_rate": 9.078810519362335e-06, "loss": 0.5478, "step": 166400 }, { "epoch": 2.7685432679716824, "grad_norm": 2.77567788831181, "learning_rate": 9.077131287742416e-06, "loss": 0.5902, "step": 166500 }, { "epoch": 2.77020606171459, "grad_norm": 5.3394807107105295, "learning_rate": 9.075450682578539e-06, "loss": 0.6009, "step": 166600 }, { "epoch": 2.771868855457497, "grad_norm": 2.8519461783748787, "learning_rate": 9.07376870443688e-06, "loss": 0.5772, "step": 166700 }, { "epoch": 2.773531649200404, "grad_norm": 3.777420243141979, "learning_rate": 9.072085353884082e-06, "loss": 0.5659, "step": 166800 }, { "epoch": 2.7751944429433113, "grad_norm": 2.967900363848339, "learning_rate": 9.070400631487249e-06, "loss": 0.5996, "step": 166900 }, { "epoch": 2.7768572366862183, "grad_norm": 3.0726045292951536, "learning_rate": 9.068714537813948e-06, "loss": 0.589, "step": 167000 }, { "epoch": 2.7785200304291253, "grad_norm": 3.5499884883196833, "learning_rate": 9.067027073432204e-06, "loss": 0.6099, "step": 167100 }, { "epoch": 2.7801828241720328, "grad_norm": 2.287390319688794, "learning_rate": 9.065338238910506e-06, "loss": 0.585, "step": 167200 }, { "epoch": 2.78184561791494, "grad_norm": 3.435294950390635, "learning_rate": 9.06364803481781e-06, "loss": 0.5731, "step": 167300 }, { "epoch": 2.783508411657847, "grad_norm": 3.0193866564262253, "learning_rate": 9.061956461723523e-06, "loss": 0.5813, "step": 167400 }, { "epoch": 2.7851712054007542, "grad_norm": 5.927320882377203, "learning_rate": 9.060263520197521e-06, "loss": 0.5893, "step": 167500 }, { "epoch": 2.7868339991436613, "grad_norm": 3.417747405339399, "learning_rate": 9.05856921081014e-06, "loss": 0.5733, "step": 167600 }, { "epoch": 2.7884967928865683, "grad_norm": 3.3279924730403616, "learning_rate": 9.056873534132174e-06, "loss": 0.5784, "step": 167700 }, { "epoch": 2.7901595866294757, "grad_norm": 4.039246994923229, "learning_rate": 9.055176490734881e-06, "loss": 0.6025, "step": 167800 }, { "epoch": 2.7918223803723827, "grad_norm": 2.9794833874821562, "learning_rate": 9.053478081189974e-06, "loss": 0.547, "step": 167900 }, { "epoch": 2.7934851741152897, "grad_norm": 4.043346490714273, "learning_rate": 9.051778306069633e-06, "loss": 0.5663, "step": 168000 }, { "epoch": 2.795147967858197, "grad_norm": 3.5800882098107887, "learning_rate": 9.050077165946494e-06, "loss": 0.6052, "step": 168100 }, { "epoch": 2.796810761601104, "grad_norm": 3.0036323777719898, "learning_rate": 9.048374661393653e-06, "loss": 0.5903, "step": 168200 }, { "epoch": 2.798473555344011, "grad_norm": 2.9058904522232156, "learning_rate": 9.046670792984668e-06, "loss": 0.5716, "step": 168300 }, { "epoch": 2.8001363490869187, "grad_norm": 3.6564908371992186, "learning_rate": 9.044965561293553e-06, "loss": 0.5498, "step": 168400 }, { "epoch": 2.8017991428298257, "grad_norm": 2.9728416090707213, "learning_rate": 9.043258966894786e-06, "loss": 0.5994, "step": 168500 }, { "epoch": 2.8034619365727327, "grad_norm": 2.6596154700568673, "learning_rate": 9.041551010363298e-06, "loss": 0.5853, "step": 168600 }, { "epoch": 2.8051247303156397, "grad_norm": 3.60765333979242, "learning_rate": 9.039841692274485e-06, "loss": 0.5736, "step": 168700 }, { "epoch": 2.8067875240585467, "grad_norm": 3.206261074454202, "learning_rate": 9.038131013204197e-06, "loss": 0.5772, "step": 168800 }, { "epoch": 2.808450317801454, "grad_norm": 3.701819733782826, "learning_rate": 9.036418973728744e-06, "loss": 0.5623, "step": 168900 }, { "epoch": 2.810113111544361, "grad_norm": 3.2913037763507913, "learning_rate": 9.034705574424896e-06, "loss": 0.5977, "step": 169000 }, { "epoch": 2.811775905287268, "grad_norm": 3.3235084852255943, "learning_rate": 9.032990815869877e-06, "loss": 0.589, "step": 169100 }, { "epoch": 2.8134386990301756, "grad_norm": 3.6060112610851043, "learning_rate": 9.031274698641377e-06, "loss": 0.596, "step": 169200 }, { "epoch": 2.8151014927730826, "grad_norm": 3.1613899287334806, "learning_rate": 9.029557223317534e-06, "loss": 0.5802, "step": 169300 }, { "epoch": 2.8167642865159896, "grad_norm": 5.387267123878764, "learning_rate": 9.02783839047695e-06, "loss": 0.5623, "step": 169400 }, { "epoch": 2.818427080258897, "grad_norm": 2.3608484251952406, "learning_rate": 9.026118200698681e-06, "loss": 0.5859, "step": 169500 }, { "epoch": 2.820089874001804, "grad_norm": 3.3719352613699214, "learning_rate": 9.02439665456224e-06, "loss": 0.5795, "step": 169600 }, { "epoch": 2.821752667744711, "grad_norm": 3.449030507303671, "learning_rate": 9.022673752647602e-06, "loss": 0.5704, "step": 169700 }, { "epoch": 2.8234154614876186, "grad_norm": 3.629584992838947, "learning_rate": 9.020949495535194e-06, "loss": 0.5844, "step": 169800 }, { "epoch": 2.8250782552305256, "grad_norm": 2.5368736480149776, "learning_rate": 9.019223883805898e-06, "loss": 0.5726, "step": 169900 }, { "epoch": 2.8267410489734326, "grad_norm": 3.587666491393064, "learning_rate": 9.017496918041058e-06, "loss": 0.5721, "step": 170000 }, { "epoch": 2.82840384271634, "grad_norm": 3.7099137972116853, "learning_rate": 9.01576859882247e-06, "loss": 0.5819, "step": 170100 }, { "epoch": 2.830066636459247, "grad_norm": 3.090224351842192, "learning_rate": 9.014038926732387e-06, "loss": 0.5951, "step": 170200 }, { "epoch": 2.831729430202154, "grad_norm": 2.762183024977617, "learning_rate": 9.012307902353519e-06, "loss": 0.5785, "step": 170300 }, { "epoch": 2.8333922239450615, "grad_norm": 3.9773406342500635, "learning_rate": 9.010575526269027e-06, "loss": 0.5702, "step": 170400 }, { "epoch": 2.8350550176879685, "grad_norm": 6.121169751815687, "learning_rate": 9.008841799062535e-06, "loss": 0.566, "step": 170500 }, { "epoch": 2.8367178114308755, "grad_norm": 3.651626411583912, "learning_rate": 9.007106721318117e-06, "loss": 0.5772, "step": 170600 }, { "epoch": 2.838380605173783, "grad_norm": 4.073932962006073, "learning_rate": 9.005370293620302e-06, "loss": 0.5858, "step": 170700 }, { "epoch": 2.84004339891669, "grad_norm": 4.418866806452721, "learning_rate": 9.003632516554075e-06, "loss": 0.5297, "step": 170800 }, { "epoch": 2.841706192659597, "grad_norm": 3.448914053535341, "learning_rate": 9.001893390704874e-06, "loss": 0.5818, "step": 170900 }, { "epoch": 2.843368986402504, "grad_norm": 4.121124190584532, "learning_rate": 9.000152916658596e-06, "loss": 0.5289, "step": 171000 }, { "epoch": 2.8450317801454115, "grad_norm": 3.421894357231888, "learning_rate": 8.998411095001588e-06, "loss": 0.5661, "step": 171100 }, { "epoch": 2.8466945738883185, "grad_norm": 3.459465620324796, "learning_rate": 8.99666792632065e-06, "loss": 0.5966, "step": 171200 }, { "epoch": 2.8483573676312255, "grad_norm": 2.6664658586741674, "learning_rate": 8.99492341120304e-06, "loss": 0.5905, "step": 171300 }, { "epoch": 2.8500201613741325, "grad_norm": 4.428553194977593, "learning_rate": 8.993177550236464e-06, "loss": 0.5715, "step": 171400 }, { "epoch": 2.85168295511704, "grad_norm": 3.7378778497338665, "learning_rate": 8.99143034400909e-06, "loss": 0.5816, "step": 171500 }, { "epoch": 2.853345748859947, "grad_norm": 3.578211241276745, "learning_rate": 8.98968179310953e-06, "loss": 0.6093, "step": 171600 }, { "epoch": 2.855008542602854, "grad_norm": 4.697924316081436, "learning_rate": 8.987931898126854e-06, "loss": 0.608, "step": 171700 }, { "epoch": 2.8566713363457614, "grad_norm": 3.270036623654067, "learning_rate": 8.986180659650583e-06, "loss": 0.5789, "step": 171800 }, { "epoch": 2.8583341300886684, "grad_norm": 2.365527654373908, "learning_rate": 8.984428078270693e-06, "loss": 0.5723, "step": 171900 }, { "epoch": 2.8599969238315754, "grad_norm": 3.8851668356012636, "learning_rate": 8.98267415457761e-06, "loss": 0.5694, "step": 172000 }, { "epoch": 2.861659717574483, "grad_norm": 3.7043838247360665, "learning_rate": 8.980918889162213e-06, "loss": 0.5863, "step": 172100 }, { "epoch": 2.86332251131739, "grad_norm": 3.7743107934322935, "learning_rate": 8.979162282615832e-06, "loss": 0.5596, "step": 172200 }, { "epoch": 2.864985305060297, "grad_norm": 3.4062586066125595, "learning_rate": 8.977404335530251e-06, "loss": 0.5933, "step": 172300 }, { "epoch": 2.8666480988032044, "grad_norm": 3.567081800128531, "learning_rate": 8.975645048497706e-06, "loss": 0.5789, "step": 172400 }, { "epoch": 2.8683108925461114, "grad_norm": 2.8677170482941277, "learning_rate": 8.973884422110879e-06, "loss": 0.5837, "step": 172500 }, { "epoch": 2.8699736862890184, "grad_norm": 3.7471352403089813, "learning_rate": 8.97212245696291e-06, "loss": 0.5813, "step": 172600 }, { "epoch": 2.871636480031926, "grad_norm": 3.9764891918445584, "learning_rate": 8.970359153647388e-06, "loss": 0.5689, "step": 172700 }, { "epoch": 2.873299273774833, "grad_norm": 3.2042656532741955, "learning_rate": 8.968594512758348e-06, "loss": 0.5991, "step": 172800 }, { "epoch": 2.87496206751774, "grad_norm": 2.952354253040721, "learning_rate": 8.966828534890283e-06, "loss": 0.5588, "step": 172900 }, { "epoch": 2.8766248612606473, "grad_norm": 2.6052906920194356, "learning_rate": 8.965061220638132e-06, "loss": 0.6174, "step": 173000 }, { "epoch": 2.8782876550035543, "grad_norm": 2.3699182922194164, "learning_rate": 8.963292570597285e-06, "loss": 0.5823, "step": 173100 }, { "epoch": 2.8799504487464613, "grad_norm": 4.221152602935893, "learning_rate": 8.96152258536358e-06, "loss": 0.5664, "step": 173200 }, { "epoch": 2.881613242489369, "grad_norm": 3.117586246686879, "learning_rate": 8.95975126553331e-06, "loss": 0.5659, "step": 173300 }, { "epoch": 2.883276036232276, "grad_norm": 3.233497302219846, "learning_rate": 8.957978611703214e-06, "loss": 0.5866, "step": 173400 }, { "epoch": 2.884938829975183, "grad_norm": 2.4962780707431014, "learning_rate": 8.956204624470482e-06, "loss": 0.5705, "step": 173500 }, { "epoch": 2.88660162371809, "grad_norm": 3.923185674542617, "learning_rate": 8.954429304432748e-06, "loss": 0.5654, "step": 173600 }, { "epoch": 2.888264417460997, "grad_norm": 3.0219748977986693, "learning_rate": 8.952652652188104e-06, "loss": 0.5688, "step": 173700 }, { "epoch": 2.8899272112039043, "grad_norm": 2.7495843413536485, "learning_rate": 8.950874668335082e-06, "loss": 0.5815, "step": 173800 }, { "epoch": 2.8915900049468113, "grad_norm": 3.0802527022189388, "learning_rate": 8.94909535347267e-06, "loss": 0.5957, "step": 173900 }, { "epoch": 2.8932527986897183, "grad_norm": 3.5765752761428744, "learning_rate": 8.947314708200302e-06, "loss": 0.5549, "step": 174000 }, { "epoch": 2.8949155924326258, "grad_norm": 2.5115366725974173, "learning_rate": 8.945532733117853e-06, "loss": 0.5767, "step": 174100 }, { "epoch": 2.8965783861755328, "grad_norm": 3.8750457340157682, "learning_rate": 8.943749428825657e-06, "loss": 0.5944, "step": 174200 }, { "epoch": 2.8982411799184398, "grad_norm": 3.878223658019258, "learning_rate": 8.941964795924492e-06, "loss": 0.5773, "step": 174300 }, { "epoch": 2.8999039736613472, "grad_norm": 3.2282620228534094, "learning_rate": 8.94017883501558e-06, "loss": 0.5959, "step": 174400 }, { "epoch": 2.9015667674042542, "grad_norm": 4.055408368041787, "learning_rate": 8.938391546700593e-06, "loss": 0.6004, "step": 174500 }, { "epoch": 2.9032295611471612, "grad_norm": 4.252890551171671, "learning_rate": 8.936602931581652e-06, "loss": 0.5926, "step": 174600 }, { "epoch": 2.9048923548900687, "grad_norm": 4.553004221191615, "learning_rate": 8.934812990261322e-06, "loss": 0.5768, "step": 174700 }, { "epoch": 2.9065551486329757, "grad_norm": 2.540090789561342, "learning_rate": 8.933021723342616e-06, "loss": 0.5826, "step": 174800 }, { "epoch": 2.9082179423758827, "grad_norm": 3.0060147125707615, "learning_rate": 8.931229131428996e-06, "loss": 0.5461, "step": 174900 }, { "epoch": 2.90988073611879, "grad_norm": 4.859286877320861, "learning_rate": 8.929435215124363e-06, "loss": 0.569, "step": 175000 }, { "epoch": 2.911543529861697, "grad_norm": 3.2256266447123254, "learning_rate": 8.927639975033072e-06, "loss": 0.5829, "step": 175100 }, { "epoch": 2.913206323604604, "grad_norm": 3.3230074011583586, "learning_rate": 8.925843411759922e-06, "loss": 0.5745, "step": 175200 }, { "epoch": 2.9148691173475116, "grad_norm": 4.675492895660088, "learning_rate": 8.924045525910156e-06, "loss": 0.5757, "step": 175300 }, { "epoch": 2.9165319110904186, "grad_norm": 5.742645199804665, "learning_rate": 8.922246318089462e-06, "loss": 0.5693, "step": 175400 }, { "epoch": 2.9181947048333257, "grad_norm": 3.1878479675872744, "learning_rate": 8.920445788903978e-06, "loss": 0.6284, "step": 175500 }, { "epoch": 2.919857498576233, "grad_norm": 2.898960045322078, "learning_rate": 8.91864393896028e-06, "loss": 0.5936, "step": 175600 }, { "epoch": 2.92152029231914, "grad_norm": 3.182533121523528, "learning_rate": 8.916840768865394e-06, "loss": 0.5774, "step": 175700 }, { "epoch": 2.923183086062047, "grad_norm": 2.6682278040630933, "learning_rate": 8.915036279226792e-06, "loss": 0.5862, "step": 175800 }, { "epoch": 2.924845879804954, "grad_norm": 2.5094932566653525, "learning_rate": 8.913230470652387e-06, "loss": 0.5604, "step": 175900 }, { "epoch": 2.9265086735478616, "grad_norm": 2.9085036749350786, "learning_rate": 8.911423343750536e-06, "loss": 0.5759, "step": 176000 }, { "epoch": 2.9281714672907686, "grad_norm": 4.414601151771383, "learning_rate": 8.909614899130044e-06, "loss": 0.5846, "step": 176100 }, { "epoch": 2.9298342610336756, "grad_norm": 3.7192099051142833, "learning_rate": 8.907805137400154e-06, "loss": 0.5762, "step": 176200 }, { "epoch": 2.9314970547765826, "grad_norm": 5.559183505178442, "learning_rate": 8.90599405917056e-06, "loss": 0.5745, "step": 176300 }, { "epoch": 2.93315984851949, "grad_norm": 4.893971812775231, "learning_rate": 8.904181665051395e-06, "loss": 0.5728, "step": 176400 }, { "epoch": 2.934822642262397, "grad_norm": 3.5376322470765147, "learning_rate": 8.902367955653233e-06, "loss": 0.5884, "step": 176500 }, { "epoch": 2.936485436005304, "grad_norm": 5.010326862446793, "learning_rate": 8.9005529315871e-06, "loss": 0.6117, "step": 176600 }, { "epoch": 2.9381482297482115, "grad_norm": 3.0482958315560706, "learning_rate": 8.898736593464453e-06, "loss": 0.6018, "step": 176700 }, { "epoch": 2.9398110234911186, "grad_norm": 3.351359183815121, "learning_rate": 8.8969189418972e-06, "loss": 0.5992, "step": 176800 }, { "epoch": 2.9414738172340256, "grad_norm": 3.0580737369663087, "learning_rate": 8.895099977497691e-06, "loss": 0.5682, "step": 176900 }, { "epoch": 2.943136610976933, "grad_norm": 3.282589582416521, "learning_rate": 8.893279700878714e-06, "loss": 0.5709, "step": 177000 }, { "epoch": 2.94479940471984, "grad_norm": 3.5899031157533456, "learning_rate": 8.891458112653506e-06, "loss": 0.5893, "step": 177100 }, { "epoch": 2.946462198462747, "grad_norm": 3.3338792113442177, "learning_rate": 8.889635213435736e-06, "loss": 0.5728, "step": 177200 }, { "epoch": 2.9481249922056545, "grad_norm": 2.4662950562680583, "learning_rate": 8.887811003839522e-06, "loss": 0.5286, "step": 177300 }, { "epoch": 2.9497877859485615, "grad_norm": 3.1261052615688065, "learning_rate": 8.885985484479425e-06, "loss": 0.5699, "step": 177400 }, { "epoch": 2.9514505796914685, "grad_norm": 4.645528079542554, "learning_rate": 8.884158655970438e-06, "loss": 0.5597, "step": 177500 }, { "epoch": 2.953113373434376, "grad_norm": 3.944967569443682, "learning_rate": 8.882330518928006e-06, "loss": 0.5617, "step": 177600 }, { "epoch": 2.954776167177283, "grad_norm": 4.2351803430315025, "learning_rate": 8.880501073968008e-06, "loss": 0.563, "step": 177700 }, { "epoch": 2.95643896092019, "grad_norm": 3.4528007848625912, "learning_rate": 8.878670321706764e-06, "loss": 0.5888, "step": 177800 }, { "epoch": 2.9581017546630974, "grad_norm": 2.702520875595441, "learning_rate": 8.876838262761037e-06, "loss": 0.6059, "step": 177900 }, { "epoch": 2.9597645484060044, "grad_norm": 3.5922935504747326, "learning_rate": 8.875004897748028e-06, "loss": 0.5779, "step": 178000 }, { "epoch": 2.9614273421489115, "grad_norm": 3.9193498780951983, "learning_rate": 8.873170227285382e-06, "loss": 0.5689, "step": 178100 }, { "epoch": 2.963090135891819, "grad_norm": 5.152347719640892, "learning_rate": 8.871334251991178e-06, "loss": 0.5897, "step": 178200 }, { "epoch": 2.964752929634726, "grad_norm": 3.7193837763199933, "learning_rate": 8.869496972483935e-06, "loss": 0.6042, "step": 178300 }, { "epoch": 2.966415723377633, "grad_norm": 3.822376748286899, "learning_rate": 8.867658389382621e-06, "loss": 0.5967, "step": 178400 }, { "epoch": 2.96807851712054, "grad_norm": 2.7507679648913848, "learning_rate": 8.865818503306628e-06, "loss": 0.5461, "step": 178500 }, { "epoch": 2.969741310863447, "grad_norm": 3.3732276687875946, "learning_rate": 8.8639773148758e-06, "loss": 0.5786, "step": 178600 }, { "epoch": 2.9714041046063544, "grad_norm": 2.8541157676195237, "learning_rate": 8.862134824710413e-06, "loss": 0.5695, "step": 178700 }, { "epoch": 2.9730668983492614, "grad_norm": 2.9937704173374042, "learning_rate": 8.860291033431183e-06, "loss": 0.6048, "step": 178800 }, { "epoch": 2.9747296920921684, "grad_norm": 2.3380527324261378, "learning_rate": 8.858445941659265e-06, "loss": 0.6029, "step": 178900 }, { "epoch": 2.976392485835076, "grad_norm": 3.538399757314013, "learning_rate": 8.85659955001625e-06, "loss": 0.5757, "step": 179000 }, { "epoch": 2.978055279577983, "grad_norm": 2.795739676463927, "learning_rate": 8.854751859124171e-06, "loss": 0.5945, "step": 179100 }, { "epoch": 2.97971807332089, "grad_norm": 3.964266587653929, "learning_rate": 8.852902869605496e-06, "loss": 0.548, "step": 179200 }, { "epoch": 2.9813808670637973, "grad_norm": 3.9550460680552586, "learning_rate": 8.851052582083128e-06, "loss": 0.5514, "step": 179300 }, { "epoch": 2.9830436608067044, "grad_norm": 3.6559049071096177, "learning_rate": 8.849200997180411e-06, "loss": 0.5659, "step": 179400 }, { "epoch": 2.9847064545496114, "grad_norm": 3.1800780587279545, "learning_rate": 8.847348115521129e-06, "loss": 0.5737, "step": 179500 }, { "epoch": 2.986369248292519, "grad_norm": 3.614150426711556, "learning_rate": 8.845493937729492e-06, "loss": 0.5553, "step": 179600 }, { "epoch": 2.988032042035426, "grad_norm": 3.2024029602770385, "learning_rate": 8.843638464430159e-06, "loss": 0.5667, "step": 179700 }, { "epoch": 2.989694835778333, "grad_norm": 3.1241704267392207, "learning_rate": 8.841781696248218e-06, "loss": 0.5587, "step": 179800 }, { "epoch": 2.9913576295212403, "grad_norm": 2.4562252162063585, "learning_rate": 8.839923633809197e-06, "loss": 0.5874, "step": 179900 }, { "epoch": 2.9930204232641473, "grad_norm": 2.7587404680220327, "learning_rate": 8.838064277739055e-06, "loss": 0.5514, "step": 180000 }, { "epoch": 2.9946832170070543, "grad_norm": 2.469696622663351, "learning_rate": 8.836203628664193e-06, "loss": 0.5804, "step": 180100 }, { "epoch": 2.9963460107499618, "grad_norm": 4.981389816137938, "learning_rate": 8.834341687211444e-06, "loss": 0.5612, "step": 180200 }, { "epoch": 2.9980088044928688, "grad_norm": 3.7828618680750914, "learning_rate": 8.832478454008076e-06, "loss": 0.5788, "step": 180300 }, { "epoch": 2.999671598235776, "grad_norm": 3.318708940245835, "learning_rate": 8.830613929681794e-06, "loss": 0.5723, "step": 180400 }, { "epoch": 3.0013302349943256, "grad_norm": 2.2536150998360016, "learning_rate": 8.82874811486074e-06, "loss": 0.4475, "step": 180500 }, { "epoch": 3.002993028737233, "grad_norm": 3.4332351048707066, "learning_rate": 8.826881010173482e-06, "loss": 0.4126, "step": 180600 }, { "epoch": 3.00465582248014, "grad_norm": 2.9836030767643553, "learning_rate": 8.825012616249033e-06, "loss": 0.4048, "step": 180700 }, { "epoch": 3.006318616223047, "grad_norm": 3.0894283536659923, "learning_rate": 8.823142933716834e-06, "loss": 0.4226, "step": 180800 }, { "epoch": 3.007981409965954, "grad_norm": 3.940000584575768, "learning_rate": 8.821271963206764e-06, "loss": 0.4082, "step": 180900 }, { "epoch": 3.0096442037088615, "grad_norm": 3.1643634499084583, "learning_rate": 8.81939970534913e-06, "loss": 0.3854, "step": 181000 }, { "epoch": 3.0113069974517686, "grad_norm": 4.676959084123542, "learning_rate": 8.817526160774683e-06, "loss": 0.4302, "step": 181100 }, { "epoch": 3.0129697911946756, "grad_norm": 2.856259542163876, "learning_rate": 8.815651330114595e-06, "loss": 0.4203, "step": 181200 }, { "epoch": 3.014632584937583, "grad_norm": 2.7246172432970996, "learning_rate": 8.813775214000479e-06, "loss": 0.427, "step": 181300 }, { "epoch": 3.01629537868049, "grad_norm": 3.391352308610929, "learning_rate": 8.811897813064381e-06, "loss": 0.4108, "step": 181400 }, { "epoch": 3.017958172423397, "grad_norm": 2.823887021548535, "learning_rate": 8.810019127938776e-06, "loss": 0.4124, "step": 181500 }, { "epoch": 3.0196209661663045, "grad_norm": 4.058058293573884, "learning_rate": 8.808139159256576e-06, "loss": 0.4306, "step": 181600 }, { "epoch": 3.0212837599092115, "grad_norm": 4.33566924488492, "learning_rate": 8.806257907651122e-06, "loss": 0.4101, "step": 181700 }, { "epoch": 3.0229465536521185, "grad_norm": 2.797916826702092, "learning_rate": 8.804375373756188e-06, "loss": 0.4182, "step": 181800 }, { "epoch": 3.024609347395026, "grad_norm": 3.9163049886737835, "learning_rate": 8.802491558205984e-06, "loss": 0.4112, "step": 181900 }, { "epoch": 3.026272141137933, "grad_norm": 2.7325176625361323, "learning_rate": 8.800606461635143e-06, "loss": 0.4194, "step": 182000 }, { "epoch": 3.02793493488084, "grad_norm": 3.5918727937577586, "learning_rate": 8.798720084678738e-06, "loss": 0.4124, "step": 182100 }, { "epoch": 3.029597728623747, "grad_norm": 3.635896074284655, "learning_rate": 8.796832427972271e-06, "loss": 0.417, "step": 182200 }, { "epoch": 3.0312605223666544, "grad_norm": 3.140177821937068, "learning_rate": 8.79494349215167e-06, "loss": 0.4283, "step": 182300 }, { "epoch": 3.0329233161095615, "grad_norm": 4.343031635149152, "learning_rate": 8.793053277853302e-06, "loss": 0.4108, "step": 182400 }, { "epoch": 3.0345861098524685, "grad_norm": 3.64926950499295, "learning_rate": 8.79116178571396e-06, "loss": 0.4012, "step": 182500 }, { "epoch": 3.036248903595376, "grad_norm": 3.5092749916503307, "learning_rate": 8.78926901637087e-06, "loss": 0.4071, "step": 182600 }, { "epoch": 3.037911697338283, "grad_norm": 3.15452774575917, "learning_rate": 8.787374970461684e-06, "loss": 0.4174, "step": 182700 }, { "epoch": 3.03957449108119, "grad_norm": 4.027158625256367, "learning_rate": 8.785479648624487e-06, "loss": 0.4061, "step": 182800 }, { "epoch": 3.0412372848240974, "grad_norm": 6.106086216231872, "learning_rate": 8.783583051497794e-06, "loss": 0.4232, "step": 182900 }, { "epoch": 3.0429000785670044, "grad_norm": 4.11534026109937, "learning_rate": 8.781685179720552e-06, "loss": 0.4351, "step": 183000 }, { "epoch": 3.0445628723099114, "grad_norm": 2.730019095514776, "learning_rate": 8.77978603393213e-06, "loss": 0.4016, "step": 183100 }, { "epoch": 3.046225666052819, "grad_norm": 3.5820025003697964, "learning_rate": 8.777885614772332e-06, "loss": 0.4113, "step": 183200 }, { "epoch": 3.047888459795726, "grad_norm": 3.5683122044012707, "learning_rate": 8.775983922881394e-06, "loss": 0.4041, "step": 183300 }, { "epoch": 3.049551253538633, "grad_norm": 3.014092104274671, "learning_rate": 8.774080958899974e-06, "loss": 0.4191, "step": 183400 }, { "epoch": 3.05121404728154, "grad_norm": 4.5151204602952, "learning_rate": 8.772176723469158e-06, "loss": 0.402, "step": 183500 }, { "epoch": 3.0528768410244473, "grad_norm": 4.222307641764165, "learning_rate": 8.770271217230466e-06, "loss": 0.4217, "step": 183600 }, { "epoch": 3.0545396347673544, "grad_norm": 2.8525333518266334, "learning_rate": 8.768364440825846e-06, "loss": 0.4238, "step": 183700 }, { "epoch": 3.0562024285102614, "grad_norm": 4.023941233944651, "learning_rate": 8.76645639489767e-06, "loss": 0.4244, "step": 183800 }, { "epoch": 3.057865222253169, "grad_norm": 4.694723383104588, "learning_rate": 8.764547080088736e-06, "loss": 0.4454, "step": 183900 }, { "epoch": 3.059528015996076, "grad_norm": 3.1345786330437475, "learning_rate": 8.762636497042276e-06, "loss": 0.4268, "step": 184000 }, { "epoch": 3.061190809738983, "grad_norm": 4.553561208125061, "learning_rate": 8.760724646401946e-06, "loss": 0.403, "step": 184100 }, { "epoch": 3.0628536034818903, "grad_norm": 3.281616844707741, "learning_rate": 8.758811528811829e-06, "loss": 0.4181, "step": 184200 }, { "epoch": 3.0645163972247973, "grad_norm": 3.7309090794385114, "learning_rate": 8.756897144916432e-06, "loss": 0.4226, "step": 184300 }, { "epoch": 3.0661791909677043, "grad_norm": 3.607716739867412, "learning_rate": 8.754981495360695e-06, "loss": 0.4277, "step": 184400 }, { "epoch": 3.0678419847106113, "grad_norm": 2.6417911990250156, "learning_rate": 8.753064580789978e-06, "loss": 0.4119, "step": 184500 }, { "epoch": 3.0695047784535188, "grad_norm": 4.1671471973684735, "learning_rate": 8.751146401850073e-06, "loss": 0.4088, "step": 184600 }, { "epoch": 3.0711675721964258, "grad_norm": 2.943784424054827, "learning_rate": 8.749226959187191e-06, "loss": 0.4201, "step": 184700 }, { "epoch": 3.072830365939333, "grad_norm": 4.5659868829257375, "learning_rate": 8.747306253447975e-06, "loss": 0.426, "step": 184800 }, { "epoch": 3.0744931596822402, "grad_norm": 4.456528910236368, "learning_rate": 8.745384285279494e-06, "loss": 0.3999, "step": 184900 }, { "epoch": 3.0761559534251472, "grad_norm": 2.69060901447555, "learning_rate": 8.743461055329234e-06, "loss": 0.4057, "step": 185000 }, { "epoch": 3.0778187471680543, "grad_norm": 2.6565761328883415, "learning_rate": 8.741536564245114e-06, "loss": 0.3982, "step": 185100 }, { "epoch": 3.0794815409109617, "grad_norm": 2.6262222059022644, "learning_rate": 8.739610812675476e-06, "loss": 0.4499, "step": 185200 }, { "epoch": 3.0811443346538687, "grad_norm": 3.279447200814324, "learning_rate": 8.737683801269086e-06, "loss": 0.4173, "step": 185300 }, { "epoch": 3.0828071283967757, "grad_norm": 3.941736851192146, "learning_rate": 8.735755530675134e-06, "loss": 0.4137, "step": 185400 }, { "epoch": 3.084469922139683, "grad_norm": 3.632984472668215, "learning_rate": 8.733826001543233e-06, "loss": 0.3816, "step": 185500 }, { "epoch": 3.08613271588259, "grad_norm": 3.186302715121371, "learning_rate": 8.731895214523425e-06, "loss": 0.4346, "step": 185600 }, { "epoch": 3.087795509625497, "grad_norm": 2.848560477910532, "learning_rate": 8.729963170266172e-06, "loss": 0.429, "step": 185700 }, { "epoch": 3.089458303368404, "grad_norm": 3.7961577821915164, "learning_rate": 8.728029869422357e-06, "loss": 0.4192, "step": 185800 }, { "epoch": 3.0911210971113117, "grad_norm": 3.6184188163709003, "learning_rate": 8.726095312643292e-06, "loss": 0.4134, "step": 185900 }, { "epoch": 3.0927838908542187, "grad_norm": 3.752608779243144, "learning_rate": 8.724159500580707e-06, "loss": 0.4035, "step": 186000 }, { "epoch": 3.0944466845971257, "grad_norm": 3.8806473116126097, "learning_rate": 8.72222243388676e-06, "loss": 0.396, "step": 186100 }, { "epoch": 3.096109478340033, "grad_norm": 2.491462284226917, "learning_rate": 8.720284113214029e-06, "loss": 0.4201, "step": 186200 }, { "epoch": 3.09777227208294, "grad_norm": 3.2680091286443598, "learning_rate": 8.718344539215512e-06, "loss": 0.418, "step": 186300 }, { "epoch": 3.099435065825847, "grad_norm": 2.9724741199373197, "learning_rate": 8.716403712544635e-06, "loss": 0.4342, "step": 186400 }, { "epoch": 3.1010978595687546, "grad_norm": 2.855332633442923, "learning_rate": 8.71446163385524e-06, "loss": 0.4143, "step": 186500 }, { "epoch": 3.1027606533116616, "grad_norm": 4.162770229249731, "learning_rate": 8.712518303801595e-06, "loss": 0.4114, "step": 186600 }, { "epoch": 3.1044234470545686, "grad_norm": 2.688146038748901, "learning_rate": 8.710573723038386e-06, "loss": 0.4082, "step": 186700 }, { "epoch": 3.106086240797476, "grad_norm": 3.089028563597293, "learning_rate": 8.708627892220727e-06, "loss": 0.3923, "step": 186800 }, { "epoch": 3.107749034540383, "grad_norm": 4.377678705820261, "learning_rate": 8.706680812004143e-06, "loss": 0.4001, "step": 186900 }, { "epoch": 3.10941182828329, "grad_norm": 3.965461048380571, "learning_rate": 8.704732483044588e-06, "loss": 0.4361, "step": 187000 }, { "epoch": 3.111074622026197, "grad_norm": 3.1552349886667153, "learning_rate": 8.702782905998434e-06, "loss": 0.4311, "step": 187100 }, { "epoch": 3.1127374157691046, "grad_norm": 4.062312819640847, "learning_rate": 8.700832081522475e-06, "loss": 0.4144, "step": 187200 }, { "epoch": 3.1144002095120116, "grad_norm": 3.204320636270935, "learning_rate": 8.698880010273923e-06, "loss": 0.4449, "step": 187300 }, { "epoch": 3.1160630032549186, "grad_norm": 4.0376395860286465, "learning_rate": 8.69692669291041e-06, "loss": 0.416, "step": 187400 }, { "epoch": 3.117725796997826, "grad_norm": 3.311440776267003, "learning_rate": 8.694972130089987e-06, "loss": 0.4083, "step": 187500 }, { "epoch": 3.119388590740733, "grad_norm": 3.298160578078309, "learning_rate": 8.69301632247113e-06, "loss": 0.4361, "step": 187600 }, { "epoch": 3.12105138448364, "grad_norm": 2.8223396084705965, "learning_rate": 8.691059270712731e-06, "loss": 0.4182, "step": 187700 }, { "epoch": 3.1227141782265475, "grad_norm": 3.589552799936944, "learning_rate": 8.689100975474097e-06, "loss": 0.4189, "step": 187800 }, { "epoch": 3.1243769719694545, "grad_norm": 2.8963802386924797, "learning_rate": 8.68714143741496e-06, "loss": 0.4389, "step": 187900 }, { "epoch": 3.1260397657123615, "grad_norm": 2.6421669745212775, "learning_rate": 8.685180657195469e-06, "loss": 0.4505, "step": 188000 }, { "epoch": 3.127702559455269, "grad_norm": 3.942967759007455, "learning_rate": 8.68321863547619e-06, "loss": 0.4222, "step": 188100 }, { "epoch": 3.129365353198176, "grad_norm": 3.0352316131807506, "learning_rate": 8.681255372918106e-06, "loss": 0.4002, "step": 188200 }, { "epoch": 3.131028146941083, "grad_norm": 3.4132231849738623, "learning_rate": 8.679290870182625e-06, "loss": 0.4321, "step": 188300 }, { "epoch": 3.13269094068399, "grad_norm": 3.70943562677321, "learning_rate": 8.677325127931563e-06, "loss": 0.4279, "step": 188400 }, { "epoch": 3.1343537344268975, "grad_norm": 2.8227526531837785, "learning_rate": 8.675358146827161e-06, "loss": 0.4014, "step": 188500 }, { "epoch": 3.1360165281698045, "grad_norm": 2.537272495235499, "learning_rate": 8.673389927532074e-06, "loss": 0.4058, "step": 188600 }, { "epoch": 3.1376793219127115, "grad_norm": 3.860880049316033, "learning_rate": 8.671420470709378e-06, "loss": 0.3922, "step": 188700 }, { "epoch": 3.139342115655619, "grad_norm": 4.268005304836198, "learning_rate": 8.669449777022556e-06, "loss": 0.4247, "step": 188800 }, { "epoch": 3.141004909398526, "grad_norm": 2.6943916588969583, "learning_rate": 8.667477847135523e-06, "loss": 0.4167, "step": 188900 }, { "epoch": 3.142667703141433, "grad_norm": 3.926561555293632, "learning_rate": 8.665504681712598e-06, "loss": 0.4151, "step": 189000 }, { "epoch": 3.1443304968843404, "grad_norm": 3.655757176859639, "learning_rate": 8.66353028141852e-06, "loss": 0.4478, "step": 189100 }, { "epoch": 3.1459932906272474, "grad_norm": 3.323181086190111, "learning_rate": 8.661554646918441e-06, "loss": 0.4067, "step": 189200 }, { "epoch": 3.1476560843701544, "grad_norm": 3.6570831607638388, "learning_rate": 8.659577778877942e-06, "loss": 0.4389, "step": 189300 }, { "epoch": 3.1493188781130614, "grad_norm": 3.506587037626475, "learning_rate": 8.657599677963e-06, "loss": 0.4295, "step": 189400 }, { "epoch": 3.150981671855969, "grad_norm": 3.4078868180519915, "learning_rate": 8.655620344840022e-06, "loss": 0.4354, "step": 189500 }, { "epoch": 3.152644465598876, "grad_norm": 3.147226430114259, "learning_rate": 8.653639780175822e-06, "loss": 0.4444, "step": 189600 }, { "epoch": 3.154307259341783, "grad_norm": 3.709367226669021, "learning_rate": 8.651657984637633e-06, "loss": 0.4251, "step": 189700 }, { "epoch": 3.1559700530846904, "grad_norm": 4.675836554717505, "learning_rate": 8.649674958893104e-06, "loss": 0.4239, "step": 189800 }, { "epoch": 3.1576328468275974, "grad_norm": 2.967450388224673, "learning_rate": 8.647690703610296e-06, "loss": 0.4204, "step": 189900 }, { "epoch": 3.1592956405705044, "grad_norm": 2.4027225477612486, "learning_rate": 8.645705219457681e-06, "loss": 0.4273, "step": 190000 }, { "epoch": 3.160958434313412, "grad_norm": 3.5282103601073405, "learning_rate": 8.643718507104152e-06, "loss": 0.4359, "step": 190100 }, { "epoch": 3.162621228056319, "grad_norm": 3.666533696355856, "learning_rate": 8.64173056721901e-06, "loss": 0.4328, "step": 190200 }, { "epoch": 3.164284021799226, "grad_norm": 3.510089775624312, "learning_rate": 8.639741400471973e-06, "loss": 0.4248, "step": 190300 }, { "epoch": 3.1659468155421333, "grad_norm": 3.119760820242221, "learning_rate": 8.637751007533169e-06, "loss": 0.4277, "step": 190400 }, { "epoch": 3.1676096092850403, "grad_norm": 3.876528631436766, "learning_rate": 8.635759389073142e-06, "loss": 0.4261, "step": 190500 }, { "epoch": 3.1692724030279473, "grad_norm": 3.0748210748187637, "learning_rate": 8.63376654576285e-06, "loss": 0.4355, "step": 190600 }, { "epoch": 3.170935196770855, "grad_norm": 2.7269576351993714, "learning_rate": 8.631772478273656e-06, "loss": 0.4395, "step": 190700 }, { "epoch": 3.172597990513762, "grad_norm": 2.9926538949735586, "learning_rate": 8.629777187277349e-06, "loss": 0.424, "step": 190800 }, { "epoch": 3.174260784256669, "grad_norm": 3.172101740695987, "learning_rate": 8.627780673446116e-06, "loss": 0.4247, "step": 190900 }, { "epoch": 3.175923577999576, "grad_norm": 3.021965520026039, "learning_rate": 8.625782937452565e-06, "loss": 0.402, "step": 191000 }, { "epoch": 3.1775863717424833, "grad_norm": 2.401519299153696, "learning_rate": 8.62378397996971e-06, "loss": 0.4359, "step": 191100 }, { "epoch": 3.1792491654853903, "grad_norm": 2.9964578943412117, "learning_rate": 8.621783801670985e-06, "loss": 0.442, "step": 191200 }, { "epoch": 3.1809119592282973, "grad_norm": 4.053286079596666, "learning_rate": 8.619782403230223e-06, "loss": 0.4341, "step": 191300 }, { "epoch": 3.1825747529712047, "grad_norm": 2.0869928485225424, "learning_rate": 8.617779785321679e-06, "loss": 0.4298, "step": 191400 }, { "epoch": 3.1842375467141117, "grad_norm": 3.1233304095034353, "learning_rate": 8.615775948620014e-06, "loss": 0.437, "step": 191500 }, { "epoch": 3.1859003404570188, "grad_norm": 3.3122900378134403, "learning_rate": 8.6137708938003e-06, "loss": 0.4284, "step": 191600 }, { "epoch": 3.187563134199926, "grad_norm": 3.2147619595088526, "learning_rate": 8.611764621538017e-06, "loss": 0.4159, "step": 191700 }, { "epoch": 3.189225927942833, "grad_norm": 2.674674166396048, "learning_rate": 8.609757132509063e-06, "loss": 0.4325, "step": 191800 }, { "epoch": 3.1908887216857402, "grad_norm": 3.591070167287651, "learning_rate": 8.607748427389735e-06, "loss": 0.4379, "step": 191900 }, { "epoch": 3.1925515154286472, "grad_norm": 4.558662830019176, "learning_rate": 8.605738506856747e-06, "loss": 0.4355, "step": 192000 }, { "epoch": 3.1942143091715547, "grad_norm": 4.341680009065942, "learning_rate": 8.603727371587222e-06, "loss": 0.4215, "step": 192100 }, { "epoch": 3.1958771029144617, "grad_norm": 3.722310383491162, "learning_rate": 8.601715022258691e-06, "loss": 0.4351, "step": 192200 }, { "epoch": 3.1975398966573687, "grad_norm": 3.634104404930771, "learning_rate": 8.599701459549092e-06, "loss": 0.4082, "step": 192300 }, { "epoch": 3.199202690400276, "grad_norm": 4.6154063004828, "learning_rate": 8.597686684136777e-06, "loss": 0.4106, "step": 192400 }, { "epoch": 3.200865484143183, "grad_norm": 2.723660983415498, "learning_rate": 8.5956706967005e-06, "loss": 0.3903, "step": 192500 }, { "epoch": 3.20252827788609, "grad_norm": 3.0214390451569026, "learning_rate": 8.593653497919428e-06, "loss": 0.4115, "step": 192600 }, { "epoch": 3.2041910716289976, "grad_norm": 4.011795989167877, "learning_rate": 8.591635088473135e-06, "loss": 0.4496, "step": 192700 }, { "epoch": 3.2058538653719046, "grad_norm": 3.057904548047888, "learning_rate": 8.589615469041602e-06, "loss": 0.4312, "step": 192800 }, { "epoch": 3.2075166591148117, "grad_norm": 2.691627016169654, "learning_rate": 8.58759464030522e-06, "loss": 0.4352, "step": 192900 }, { "epoch": 3.209179452857719, "grad_norm": 4.206844530409429, "learning_rate": 8.58557260294478e-06, "loss": 0.4363, "step": 193000 }, { "epoch": 3.210842246600626, "grad_norm": 3.7829200721561884, "learning_rate": 8.583549357641494e-06, "loss": 0.4305, "step": 193100 }, { "epoch": 3.212505040343533, "grad_norm": 2.752393492197729, "learning_rate": 8.581524905076966e-06, "loss": 0.4332, "step": 193200 }, { "epoch": 3.21416783408644, "grad_norm": 4.145684512940028, "learning_rate": 8.579499245933216e-06, "loss": 0.4168, "step": 193300 }, { "epoch": 3.2158306278293476, "grad_norm": 3.604042928104094, "learning_rate": 8.577472380892668e-06, "loss": 0.4407, "step": 193400 }, { "epoch": 3.2174934215722546, "grad_norm": 3.8113271695728654, "learning_rate": 8.575444310638154e-06, "loss": 0.4518, "step": 193500 }, { "epoch": 3.2191562153151616, "grad_norm": 3.9103445528758938, "learning_rate": 8.573415035852905e-06, "loss": 0.4316, "step": 193600 }, { "epoch": 3.220819009058069, "grad_norm": 3.716658178417669, "learning_rate": 8.571384557220569e-06, "loss": 0.425, "step": 193700 }, { "epoch": 3.222481802800976, "grad_norm": 3.582948133481535, "learning_rate": 8.569352875425188e-06, "loss": 0.4381, "step": 193800 }, { "epoch": 3.224144596543883, "grad_norm": 3.122242863902653, "learning_rate": 8.56731999115122e-06, "loss": 0.4555, "step": 193900 }, { "epoch": 3.2258073902867905, "grad_norm": 5.18957595638227, "learning_rate": 8.56528590508352e-06, "loss": 0.4358, "step": 194000 }, { "epoch": 3.2274701840296975, "grad_norm": 2.43018896402642, "learning_rate": 8.56325061790735e-06, "loss": 0.4447, "step": 194100 }, { "epoch": 3.2291329777726046, "grad_norm": 4.299096969983994, "learning_rate": 8.561214130308379e-06, "loss": 0.4296, "step": 194200 }, { "epoch": 3.2307957715155116, "grad_norm": 3.6630297951986344, "learning_rate": 8.55917644297268e-06, "loss": 0.4573, "step": 194300 }, { "epoch": 3.232458565258419, "grad_norm": 3.520873114377564, "learning_rate": 8.55713755658673e-06, "loss": 0.4108, "step": 194400 }, { "epoch": 3.234121359001326, "grad_norm": 3.7373395533864517, "learning_rate": 8.555097471837403e-06, "loss": 0.42, "step": 194500 }, { "epoch": 3.235784152744233, "grad_norm": 3.7234772866432446, "learning_rate": 8.553056189411987e-06, "loss": 0.4505, "step": 194600 }, { "epoch": 3.2374469464871405, "grad_norm": 3.2370397116717857, "learning_rate": 8.551013709998172e-06, "loss": 0.4452, "step": 194700 }, { "epoch": 3.2391097402300475, "grad_norm": 2.8378010048088407, "learning_rate": 8.548970034284043e-06, "loss": 0.4168, "step": 194800 }, { "epoch": 3.2407725339729545, "grad_norm": 3.3721747988465336, "learning_rate": 8.546925162958097e-06, "loss": 0.4361, "step": 194900 }, { "epoch": 3.242435327715862, "grad_norm": 4.459130298203037, "learning_rate": 8.544879096709229e-06, "loss": 0.4269, "step": 195000 }, { "epoch": 3.244098121458769, "grad_norm": 2.8903791920818374, "learning_rate": 8.542831836226737e-06, "loss": 0.4433, "step": 195100 }, { "epoch": 3.245760915201676, "grad_norm": 3.3528270832721647, "learning_rate": 8.540783382200326e-06, "loss": 0.4329, "step": 195200 }, { "epoch": 3.2474237089445834, "grad_norm": 3.203340417006741, "learning_rate": 8.538733735320093e-06, "loss": 0.4091, "step": 195300 }, { "epoch": 3.2490865026874904, "grad_norm": 3.3444187560731766, "learning_rate": 8.53668289627655e-06, "loss": 0.4509, "step": 195400 }, { "epoch": 3.2507492964303975, "grad_norm": 2.5222512116412257, "learning_rate": 8.534630865760598e-06, "loss": 0.4507, "step": 195500 }, { "epoch": 3.252412090173305, "grad_norm": 3.4708831499692305, "learning_rate": 8.532577644463548e-06, "loss": 0.4378, "step": 195600 }, { "epoch": 3.254074883916212, "grad_norm": 3.083474873954241, "learning_rate": 8.530523233077111e-06, "loss": 0.4657, "step": 195700 }, { "epoch": 3.255737677659119, "grad_norm": 5.14915046737342, "learning_rate": 8.528467632293393e-06, "loss": 0.4341, "step": 195800 }, { "epoch": 3.257400471402026, "grad_norm": 4.097218656785237, "learning_rate": 8.526410842804907e-06, "loss": 0.4398, "step": 195900 }, { "epoch": 3.2590632651449334, "grad_norm": 2.4075326460944644, "learning_rate": 8.524352865304566e-06, "loss": 0.4381, "step": 196000 }, { "epoch": 3.2607260588878404, "grad_norm": 3.8253566718601735, "learning_rate": 8.522293700485677e-06, "loss": 0.4235, "step": 196100 }, { "epoch": 3.2623888526307474, "grad_norm": 4.273135455103274, "learning_rate": 8.52023334904196e-06, "loss": 0.4473, "step": 196200 }, { "epoch": 3.264051646373655, "grad_norm": 3.83869974937846, "learning_rate": 8.518171811667517e-06, "loss": 0.4448, "step": 196300 }, { "epoch": 3.265714440116562, "grad_norm": 2.543576708521957, "learning_rate": 8.516109089056864e-06, "loss": 0.4271, "step": 196400 }, { "epoch": 3.267377233859469, "grad_norm": 3.668555133893202, "learning_rate": 8.514045181904911e-06, "loss": 0.41, "step": 196500 }, { "epoch": 3.269040027602376, "grad_norm": 2.8741196823320485, "learning_rate": 8.511980090906967e-06, "loss": 0.4204, "step": 196600 }, { "epoch": 3.2707028213452833, "grad_norm": 3.22052526617322, "learning_rate": 8.50991381675874e-06, "loss": 0.4315, "step": 196700 }, { "epoch": 3.2723656150881903, "grad_norm": 3.20741167091432, "learning_rate": 8.507846360156335e-06, "loss": 0.4293, "step": 196800 }, { "epoch": 3.2740284088310974, "grad_norm": 3.662488560481595, "learning_rate": 8.505777721796257e-06, "loss": 0.4259, "step": 196900 }, { "epoch": 3.275691202574005, "grad_norm": 3.7274626135723143, "learning_rate": 8.503707902375414e-06, "loss": 0.4061, "step": 197000 }, { "epoch": 3.277353996316912, "grad_norm": 2.952439498950459, "learning_rate": 8.501636902591102e-06, "loss": 0.4317, "step": 197100 }, { "epoch": 3.279016790059819, "grad_norm": 4.985015711443434, "learning_rate": 8.49956472314102e-06, "loss": 0.4301, "step": 197200 }, { "epoch": 3.2806795838027263, "grad_norm": 5.668691405094465, "learning_rate": 8.497491364723268e-06, "loss": 0.4118, "step": 197300 }, { "epoch": 3.2823423775456333, "grad_norm": 2.840144428762032, "learning_rate": 8.495416828036337e-06, "loss": 0.4348, "step": 197400 }, { "epoch": 3.2840051712885403, "grad_norm": 3.588433600356091, "learning_rate": 8.493341113779114e-06, "loss": 0.4477, "step": 197500 }, { "epoch": 3.2856679650314478, "grad_norm": 2.2772068016931066, "learning_rate": 8.491264222650892e-06, "loss": 0.3983, "step": 197600 }, { "epoch": 3.2873307587743548, "grad_norm": 4.180492009594509, "learning_rate": 8.48918615535135e-06, "loss": 0.408, "step": 197700 }, { "epoch": 3.2889935525172618, "grad_norm": 2.9509105859430704, "learning_rate": 8.48710691258057e-06, "loss": 0.4377, "step": 197800 }, { "epoch": 3.2906563462601692, "grad_norm": 2.3311353778136334, "learning_rate": 8.485026495039026e-06, "loss": 0.4221, "step": 197900 }, { "epoch": 3.2923191400030762, "grad_norm": 3.2174566501530313, "learning_rate": 8.482944903427591e-06, "loss": 0.441, "step": 198000 }, { "epoch": 3.2939819337459832, "grad_norm": 3.946966629295538, "learning_rate": 8.480862138447532e-06, "loss": 0.4446, "step": 198100 }, { "epoch": 3.2956447274888907, "grad_norm": 3.5164683420946794, "learning_rate": 8.47877820080051e-06, "loss": 0.4231, "step": 198200 }, { "epoch": 3.2973075212317977, "grad_norm": 6.355411887703506, "learning_rate": 8.476693091188581e-06, "loss": 0.4356, "step": 198300 }, { "epoch": 3.2989703149747047, "grad_norm": 3.712680099701989, "learning_rate": 8.474606810314201e-06, "loss": 0.432, "step": 198400 }, { "epoch": 3.3006331087176117, "grad_norm": 3.8261703800678615, "learning_rate": 8.472519358880215e-06, "loss": 0.4391, "step": 198500 }, { "epoch": 3.302295902460519, "grad_norm": 3.7301489337331235, "learning_rate": 8.470430737589863e-06, "loss": 0.4506, "step": 198600 }, { "epoch": 3.303958696203426, "grad_norm": 3.0511069150922423, "learning_rate": 8.468340947146783e-06, "loss": 0.4634, "step": 198700 }, { "epoch": 3.305621489946333, "grad_norm": 3.9386769174567706, "learning_rate": 8.466249988255e-06, "loss": 0.4467, "step": 198800 }, { "epoch": 3.3072842836892407, "grad_norm": 3.5422955982797992, "learning_rate": 8.46415786161894e-06, "loss": 0.4193, "step": 198900 }, { "epoch": 3.3089470774321477, "grad_norm": 3.064879666097579, "learning_rate": 8.462064567943417e-06, "loss": 0.4463, "step": 199000 }, { "epoch": 3.3106098711750547, "grad_norm": 3.145909911645261, "learning_rate": 8.459970107933644e-06, "loss": 0.4445, "step": 199100 }, { "epoch": 3.3122726649179617, "grad_norm": 4.663164178992454, "learning_rate": 8.457874482295218e-06, "loss": 0.4241, "step": 199200 }, { "epoch": 3.313935458660869, "grad_norm": 3.263553339524578, "learning_rate": 8.455777691734139e-06, "loss": 0.4328, "step": 199300 }, { "epoch": 3.315598252403776, "grad_norm": 2.3213070078531173, "learning_rate": 8.45367973695679e-06, "loss": 0.4663, "step": 199400 }, { "epoch": 3.317261046146683, "grad_norm": 3.067563997507016, "learning_rate": 8.451580618669953e-06, "loss": 0.4161, "step": 199500 }, { "epoch": 3.3189238398895906, "grad_norm": 3.174281159182411, "learning_rate": 8.4494803375808e-06, "loss": 0.4225, "step": 199600 }, { "epoch": 3.3205866336324976, "grad_norm": 3.5048487255855476, "learning_rate": 8.447378894396891e-06, "loss": 0.4439, "step": 199700 }, { "epoch": 3.3222494273754046, "grad_norm": 2.602645537927013, "learning_rate": 8.445276289826185e-06, "loss": 0.4473, "step": 199800 }, { "epoch": 3.323912221118312, "grad_norm": 3.28385674290515, "learning_rate": 8.443172524577026e-06, "loss": 0.4405, "step": 199900 }, { "epoch": 3.325575014861219, "grad_norm": 3.3361784547221647, "learning_rate": 8.44106759935815e-06, "loss": 0.4586, "step": 200000 }, { "epoch": 3.327237808604126, "grad_norm": 2.750660704722319, "learning_rate": 8.438961514878687e-06, "loss": 0.4198, "step": 200100 }, { "epoch": 3.3289006023470336, "grad_norm": 3.4677948945637116, "learning_rate": 8.436854271848156e-06, "loss": 0.4349, "step": 200200 }, { "epoch": 3.3305633960899406, "grad_norm": 3.5933143680435977, "learning_rate": 8.434745870976462e-06, "loss": 0.4266, "step": 200300 }, { "epoch": 3.3322261898328476, "grad_norm": 3.871259439512274, "learning_rate": 8.432636312973907e-06, "loss": 0.4316, "step": 200400 }, { "epoch": 3.333888983575755, "grad_norm": 4.840220684696205, "learning_rate": 8.430525598551179e-06, "loss": 0.478, "step": 200500 }, { "epoch": 3.335551777318662, "grad_norm": 3.201341531968915, "learning_rate": 8.428413728419354e-06, "loss": 0.4429, "step": 200600 }, { "epoch": 3.337214571061569, "grad_norm": 3.724755308024204, "learning_rate": 8.4263007032899e-06, "loss": 0.4395, "step": 200700 }, { "epoch": 3.338877364804476, "grad_norm": 4.367118956465079, "learning_rate": 8.424186523874678e-06, "loss": 0.4526, "step": 200800 }, { "epoch": 3.3405401585473835, "grad_norm": 3.102474393588992, "learning_rate": 8.422071190885928e-06, "loss": 0.418, "step": 200900 }, { "epoch": 3.3422029522902905, "grad_norm": 5.93517262445002, "learning_rate": 8.419954705036288e-06, "loss": 0.4284, "step": 201000 }, { "epoch": 3.3438657460331975, "grad_norm": 4.080391738401063, "learning_rate": 8.417837067038777e-06, "loss": 0.4415, "step": 201100 }, { "epoch": 3.345528539776105, "grad_norm": 3.0338980743196915, "learning_rate": 8.415718277606807e-06, "loss": 0.4135, "step": 201200 }, { "epoch": 3.347191333519012, "grad_norm": 3.865942054585576, "learning_rate": 8.41359833745418e-06, "loss": 0.4485, "step": 201300 }, { "epoch": 3.348854127261919, "grad_norm": 3.340210282375979, "learning_rate": 8.411477247295078e-06, "loss": 0.4604, "step": 201400 }, { "epoch": 3.350516921004826, "grad_norm": 3.6094769968439935, "learning_rate": 8.409355007844075e-06, "loss": 0.4459, "step": 201500 }, { "epoch": 3.3521797147477335, "grad_norm": 4.6558861394679525, "learning_rate": 8.407231619816133e-06, "loss": 0.4386, "step": 201600 }, { "epoch": 3.3538425084906405, "grad_norm": 5.378108338393358, "learning_rate": 8.405107083926602e-06, "loss": 0.4477, "step": 201700 }, { "epoch": 3.3555053022335475, "grad_norm": 3.740451105313156, "learning_rate": 8.402981400891213e-06, "loss": 0.4139, "step": 201800 }, { "epoch": 3.357168095976455, "grad_norm": 3.192042558106382, "learning_rate": 8.400854571426085e-06, "loss": 0.4333, "step": 201900 }, { "epoch": 3.358830889719362, "grad_norm": 4.5826860491320645, "learning_rate": 8.398726596247731e-06, "loss": 0.4326, "step": 202000 }, { "epoch": 3.360493683462269, "grad_norm": 4.7375129110891026, "learning_rate": 8.39659747607304e-06, "loss": 0.4474, "step": 202100 }, { "epoch": 3.3621564772051764, "grad_norm": 2.9425605037776297, "learning_rate": 8.394467211619293e-06, "loss": 0.4525, "step": 202200 }, { "epoch": 3.3638192709480834, "grad_norm": 3.25446525610724, "learning_rate": 8.392335803604155e-06, "loss": 0.4371, "step": 202300 }, { "epoch": 3.3654820646909904, "grad_norm": 2.9993454313367196, "learning_rate": 8.390203252745671e-06, "loss": 0.433, "step": 202400 }, { "epoch": 3.367144858433898, "grad_norm": 2.6960222253490125, "learning_rate": 8.388069559762281e-06, "loss": 0.4681, "step": 202500 }, { "epoch": 3.368807652176805, "grad_norm": 3.341467526403397, "learning_rate": 8.3859347253728e-06, "loss": 0.4492, "step": 202600 }, { "epoch": 3.370470445919712, "grad_norm": 3.2092443935821398, "learning_rate": 8.383798750296437e-06, "loss": 0.4282, "step": 202700 }, { "epoch": 3.3721332396626194, "grad_norm": 3.036173902250312, "learning_rate": 8.381661635252775e-06, "loss": 0.4527, "step": 202800 }, { "epoch": 3.3737960334055264, "grad_norm": 3.558421939827158, "learning_rate": 8.379523380961789e-06, "loss": 0.4695, "step": 202900 }, { "epoch": 3.3754588271484334, "grad_norm": 3.2009673700324566, "learning_rate": 8.377383988143838e-06, "loss": 0.4288, "step": 203000 }, { "epoch": 3.377121620891341, "grad_norm": 2.9549183998356248, "learning_rate": 8.375243457519654e-06, "loss": 0.4412, "step": 203100 }, { "epoch": 3.378784414634248, "grad_norm": 4.122107720979962, "learning_rate": 8.373101789810365e-06, "loss": 0.4387, "step": 203200 }, { "epoch": 3.380447208377155, "grad_norm": 2.874805548504132, "learning_rate": 8.370958985737477e-06, "loss": 0.4528, "step": 203300 }, { "epoch": 3.382110002120062, "grad_norm": 2.825831980631311, "learning_rate": 8.36881504602288e-06, "loss": 0.4478, "step": 203400 }, { "epoch": 3.3837727958629693, "grad_norm": 4.127471259279281, "learning_rate": 8.366669971388842e-06, "loss": 0.4567, "step": 203500 }, { "epoch": 3.3854355896058763, "grad_norm": 4.8946461917945845, "learning_rate": 8.364523762558017e-06, "loss": 0.4482, "step": 203600 }, { "epoch": 3.3870983833487833, "grad_norm": 2.7938589343395135, "learning_rate": 8.362376420253444e-06, "loss": 0.4285, "step": 203700 }, { "epoch": 3.388761177091691, "grad_norm": 2.9070639970128327, "learning_rate": 8.360227945198539e-06, "loss": 0.4557, "step": 203800 }, { "epoch": 3.390423970834598, "grad_norm": 3.341386501397015, "learning_rate": 8.3580783381171e-06, "loss": 0.454, "step": 203900 }, { "epoch": 3.392086764577505, "grad_norm": 4.198723770272761, "learning_rate": 8.35592759973331e-06, "loss": 0.439, "step": 204000 }, { "epoch": 3.393749558320412, "grad_norm": 3.5305408810437826, "learning_rate": 8.353775730771729e-06, "loss": 0.4446, "step": 204100 }, { "epoch": 3.3954123520633193, "grad_norm": 2.9493604631265344, "learning_rate": 8.3516227319573e-06, "loss": 0.4221, "step": 204200 }, { "epoch": 3.3970751458062263, "grad_norm": 4.311289967896452, "learning_rate": 8.349468604015347e-06, "loss": 0.4398, "step": 204300 }, { "epoch": 3.3987379395491333, "grad_norm": 3.4104264143635064, "learning_rate": 8.347313347671576e-06, "loss": 0.4516, "step": 204400 }, { "epoch": 3.4004007332920407, "grad_norm": 3.3917597739829692, "learning_rate": 8.345156963652066e-06, "loss": 0.4564, "step": 204500 }, { "epoch": 3.4020635270349477, "grad_norm": 4.147614789142947, "learning_rate": 8.342999452683284e-06, "loss": 0.4238, "step": 204600 }, { "epoch": 3.4037263207778548, "grad_norm": 3.25695902976395, "learning_rate": 8.340840815492072e-06, "loss": 0.4279, "step": 204700 }, { "epoch": 3.405389114520762, "grad_norm": 3.232446413941761, "learning_rate": 8.338681052805655e-06, "loss": 0.4604, "step": 204800 }, { "epoch": 3.407051908263669, "grad_norm": 3.292733942111995, "learning_rate": 8.336520165351633e-06, "loss": 0.4367, "step": 204900 }, { "epoch": 3.4087147020065762, "grad_norm": 2.8199574284030713, "learning_rate": 8.334358153857988e-06, "loss": 0.4198, "step": 205000 }, { "epoch": 3.4103774957494837, "grad_norm": 4.584700279986647, "learning_rate": 8.33219501905308e-06, "loss": 0.4526, "step": 205100 }, { "epoch": 3.4120402894923907, "grad_norm": 4.122207311378825, "learning_rate": 8.330030761665643e-06, "loss": 0.4381, "step": 205200 }, { "epoch": 3.4137030832352977, "grad_norm": 3.5536318816884402, "learning_rate": 8.327865382424801e-06, "loss": 0.4541, "step": 205300 }, { "epoch": 3.415365876978205, "grad_norm": 3.38157944247793, "learning_rate": 8.32569888206004e-06, "loss": 0.4268, "step": 205400 }, { "epoch": 3.417028670721112, "grad_norm": 2.819129577530832, "learning_rate": 8.323531261301237e-06, "loss": 0.4536, "step": 205500 }, { "epoch": 3.418691464464019, "grad_norm": 2.850412805869241, "learning_rate": 8.32136252087864e-06, "loss": 0.4356, "step": 205600 }, { "epoch": 3.420354258206926, "grad_norm": 2.5654098380199395, "learning_rate": 8.319192661522875e-06, "loss": 0.4275, "step": 205700 }, { "epoch": 3.4220170519498336, "grad_norm": 3.822694783267703, "learning_rate": 8.317021683964945e-06, "loss": 0.4377, "step": 205800 }, { "epoch": 3.4236798456927406, "grad_norm": 4.084810886597552, "learning_rate": 8.314849588936234e-06, "loss": 0.455, "step": 205900 }, { "epoch": 3.4253426394356477, "grad_norm": 3.351842385765125, "learning_rate": 8.312676377168493e-06, "loss": 0.4278, "step": 206000 }, { "epoch": 3.427005433178555, "grad_norm": 3.3940947337447533, "learning_rate": 8.310502049393859e-06, "loss": 0.4508, "step": 206100 }, { "epoch": 3.428668226921462, "grad_norm": 2.777957938013167, "learning_rate": 8.308326606344838e-06, "loss": 0.4445, "step": 206200 }, { "epoch": 3.430331020664369, "grad_norm": 3.5102992671688695, "learning_rate": 8.306150048754319e-06, "loss": 0.4494, "step": 206300 }, { "epoch": 3.4319938144072766, "grad_norm": 3.2980660040263277, "learning_rate": 8.303972377355557e-06, "loss": 0.4525, "step": 206400 }, { "epoch": 3.4336566081501836, "grad_norm": 3.9345405021384545, "learning_rate": 8.30179359288219e-06, "loss": 0.4433, "step": 206500 }, { "epoch": 3.4353194018930906, "grad_norm": 2.6846217081161816, "learning_rate": 8.299613696068228e-06, "loss": 0.4408, "step": 206600 }, { "epoch": 3.4369821956359976, "grad_norm": 2.9984864319714637, "learning_rate": 8.297432687648056e-06, "loss": 0.4634, "step": 206700 }, { "epoch": 3.438644989378905, "grad_norm": 4.13783971755982, "learning_rate": 8.295250568356433e-06, "loss": 0.4448, "step": 206800 }, { "epoch": 3.440307783121812, "grad_norm": 2.7617547158924003, "learning_rate": 8.29306733892849e-06, "loss": 0.4431, "step": 206900 }, { "epoch": 3.441970576864719, "grad_norm": 4.92058783929245, "learning_rate": 8.290883000099744e-06, "loss": 0.4379, "step": 207000 }, { "epoch": 3.4436333706076265, "grad_norm": 4.019396000540159, "learning_rate": 8.288697552606066e-06, "loss": 0.4444, "step": 207100 }, { "epoch": 3.4452961643505335, "grad_norm": 2.8429807471522683, "learning_rate": 8.286510997183715e-06, "loss": 0.4484, "step": 207200 }, { "epoch": 3.4469589580934406, "grad_norm": 3.612951063778096, "learning_rate": 8.28432333456932e-06, "loss": 0.4306, "step": 207300 }, { "epoch": 3.448621751836348, "grad_norm": 4.0419502660424085, "learning_rate": 8.282134565499881e-06, "loss": 0.4437, "step": 207400 }, { "epoch": 3.450284545579255, "grad_norm": 3.065715562354169, "learning_rate": 8.279944690712775e-06, "loss": 0.4362, "step": 207500 }, { "epoch": 3.451947339322162, "grad_norm": 2.702436977019126, "learning_rate": 8.277753710945746e-06, "loss": 0.4591, "step": 207600 }, { "epoch": 3.4536101330650695, "grad_norm": 3.1008409820530036, "learning_rate": 8.27556162693691e-06, "loss": 0.4398, "step": 207700 }, { "epoch": 3.4552729268079765, "grad_norm": 3.416537282784962, "learning_rate": 8.273368439424762e-06, "loss": 0.4217, "step": 207800 }, { "epoch": 3.4569357205508835, "grad_norm": 3.1506560650112494, "learning_rate": 8.271174149148165e-06, "loss": 0.4415, "step": 207900 }, { "epoch": 3.458598514293791, "grad_norm": 2.5042419599188697, "learning_rate": 8.268978756846349e-06, "loss": 0.4615, "step": 208000 } ], "logging_steps": 100, "max_steps": 601400, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4791983591522304.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }