{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 32276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015491386788945346, "grad_norm": 7.1502055693895565, "learning_rate": 1.2391573729863694e-08, "loss": 1.1088, "step": 5 }, { "epoch": 0.0003098277357789069, "grad_norm": 7.285541140080153, "learning_rate": 2.788104089219331e-08, "loss": 1.2214, "step": 10 }, { "epoch": 0.0004647416036683604, "grad_norm": 5.442876446260737, "learning_rate": 4.337050805452293e-08, "loss": 1.113, "step": 15 }, { "epoch": 0.0006196554715578138, "grad_norm": 10.374377138567125, "learning_rate": 5.885997521685254e-08, "loss": 1.1804, "step": 20 }, { "epoch": 0.0007745693394472673, "grad_norm": 14.08474962799312, "learning_rate": 7.434944237918216e-08, "loss": 1.1039, "step": 25 }, { "epoch": 0.0009294832073367208, "grad_norm": 6.652134421727645, "learning_rate": 8.983890954151179e-08, "loss": 1.235, "step": 30 }, { "epoch": 0.0010843970752261742, "grad_norm": 6.496629964543396, "learning_rate": 1.053283767038414e-07, "loss": 1.1638, "step": 35 }, { "epoch": 0.0012393109431156277, "grad_norm": 6.238181771200748, "learning_rate": 1.2081784386617102e-07, "loss": 1.1402, "step": 40 }, { "epoch": 0.0013942248110050811, "grad_norm": 13.373659564861685, "learning_rate": 1.3630731102850064e-07, "loss": 1.1976, "step": 45 }, { "epoch": 0.0015491386788945346, "grad_norm": 7.510778906982287, "learning_rate": 1.5179677819083023e-07, "loss": 1.3535, "step": 50 }, { "epoch": 0.001704052546783988, "grad_norm": 7.007447188764632, "learning_rate": 1.6728624535315985e-07, "loss": 1.2683, "step": 55 }, { "epoch": 0.0018589664146734415, "grad_norm": 6.8365191085405375, "learning_rate": 1.827757125154895e-07, "loss": 1.1724, "step": 60 }, { "epoch": 0.002013880282562895, "grad_norm": 8.67387964117302, "learning_rate": 1.982651796778191e-07, "loss": 1.2859, "step": 65 }, { "epoch": 0.0021687941504523484, "grad_norm": 5.780633107701592, "learning_rate": 2.1375464684014872e-07, "loss": 1.1063, "step": 70 }, { "epoch": 0.002323708018341802, "grad_norm": 20.450955130843273, "learning_rate": 2.2924411400247831e-07, "loss": 1.1003, "step": 75 }, { "epoch": 0.0024786218862312553, "grad_norm": 6.072553644271577, "learning_rate": 2.4473358116480796e-07, "loss": 1.1366, "step": 80 }, { "epoch": 0.002633535754120709, "grad_norm": 8.272987143842586, "learning_rate": 2.6022304832713755e-07, "loss": 1.0293, "step": 85 }, { "epoch": 0.0027884496220101623, "grad_norm": 4.617965195261736, "learning_rate": 2.757125154894672e-07, "loss": 1.141, "step": 90 }, { "epoch": 0.0029433634898996157, "grad_norm": 5.554023688311453, "learning_rate": 2.912019826517968e-07, "loss": 1.0237, "step": 95 }, { "epoch": 0.003098277357789069, "grad_norm": 5.591713941725516, "learning_rate": 3.066914498141264e-07, "loss": 1.1335, "step": 100 }, { "epoch": 0.0032531912256785226, "grad_norm": 4.5703942310207655, "learning_rate": 3.22180916976456e-07, "loss": 1.0452, "step": 105 }, { "epoch": 0.003408105093567976, "grad_norm": 8.13842035271025, "learning_rate": 3.3767038413878566e-07, "loss": 1.0393, "step": 110 }, { "epoch": 0.0035630189614574296, "grad_norm": 3.655969417362317, "learning_rate": 3.531598513011153e-07, "loss": 1.0662, "step": 115 }, { "epoch": 0.003717932829346883, "grad_norm": 4.249367486592905, "learning_rate": 3.686493184634449e-07, "loss": 1.0024, "step": 120 }, { "epoch": 0.0038728466972363365, "grad_norm": 4.234937679554841, "learning_rate": 3.8413878562577453e-07, "loss": 0.9698, "step": 125 }, { "epoch": 0.00402776056512579, "grad_norm": 4.220036087886395, "learning_rate": 3.9962825278810407e-07, "loss": 1.1144, "step": 130 }, { "epoch": 0.004182674433015243, "grad_norm": 3.175405897811266, "learning_rate": 4.151177199504337e-07, "loss": 0.8917, "step": 135 }, { "epoch": 0.004337588300904697, "grad_norm": 3.083417494313278, "learning_rate": 4.3060718711276335e-07, "loss": 1.0065, "step": 140 }, { "epoch": 0.00449250216879415, "grad_norm": 4.711839462958908, "learning_rate": 4.4609665427509294e-07, "loss": 0.9267, "step": 145 }, { "epoch": 0.004647416036683604, "grad_norm": 3.3474408502404756, "learning_rate": 4.615861214374226e-07, "loss": 1.0406, "step": 150 }, { "epoch": 0.004802329904573057, "grad_norm": 12.609590963458666, "learning_rate": 4.770755885997523e-07, "loss": 1.0012, "step": 155 }, { "epoch": 0.004957243772462511, "grad_norm": 4.058913975974338, "learning_rate": 4.925650557620818e-07, "loss": 0.9897, "step": 160 }, { "epoch": 0.005112157640351964, "grad_norm": 4.136283307650433, "learning_rate": 5.080545229244115e-07, "loss": 1.0268, "step": 165 }, { "epoch": 0.005267071508241418, "grad_norm": 4.881589934299079, "learning_rate": 5.23543990086741e-07, "loss": 1.0483, "step": 170 }, { "epoch": 0.005421985376130871, "grad_norm": 3.343387026960209, "learning_rate": 5.390334572490706e-07, "loss": 0.9886, "step": 175 }, { "epoch": 0.0055768992440203245, "grad_norm": 4.68472650009946, "learning_rate": 5.545229244114003e-07, "loss": 0.9965, "step": 180 }, { "epoch": 0.005731813111909778, "grad_norm": 4.9299051779612855, "learning_rate": 5.700123915737299e-07, "loss": 1.0091, "step": 185 }, { "epoch": 0.0058867269797992314, "grad_norm": 6.7326316882429476, "learning_rate": 5.855018587360596e-07, "loss": 0.9499, "step": 190 }, { "epoch": 0.006041640847688685, "grad_norm": 4.3164408597914266, "learning_rate": 6.009913258983891e-07, "loss": 1.0296, "step": 195 }, { "epoch": 0.006196554715578138, "grad_norm": 3.710456909213671, "learning_rate": 6.164807930607188e-07, "loss": 0.8716, "step": 200 }, { "epoch": 0.006351468583467592, "grad_norm": 4.656877085486324, "learning_rate": 6.319702602230483e-07, "loss": 1.0162, "step": 205 }, { "epoch": 0.006506382451357045, "grad_norm": 6.159171126704424, "learning_rate": 6.47459727385378e-07, "loss": 1.0029, "step": 210 }, { "epoch": 0.006661296319246499, "grad_norm": 3.7525787718012045, "learning_rate": 6.629491945477076e-07, "loss": 0.9854, "step": 215 }, { "epoch": 0.006816210187135952, "grad_norm": 3.3432537269582423, "learning_rate": 6.784386617100372e-07, "loss": 0.9772, "step": 220 }, { "epoch": 0.006971124055025406, "grad_norm": 4.356528832994933, "learning_rate": 6.939281288723669e-07, "loss": 0.9976, "step": 225 }, { "epoch": 0.007126037922914859, "grad_norm": 3.4414033012167105, "learning_rate": 7.094175960346965e-07, "loss": 1.0102, "step": 230 }, { "epoch": 0.007280951790804313, "grad_norm": 4.067386454457795, "learning_rate": 7.24907063197026e-07, "loss": 0.9528, "step": 235 }, { "epoch": 0.007435865658693766, "grad_norm": 6.903771834017331, "learning_rate": 7.403965303593556e-07, "loss": 0.9821, "step": 240 }, { "epoch": 0.0075907795265832195, "grad_norm": 4.986992551134776, "learning_rate": 7.558859975216853e-07, "loss": 0.952, "step": 245 }, { "epoch": 0.007745693394472673, "grad_norm": 3.5027114353153417, "learning_rate": 7.71375464684015e-07, "loss": 1.0262, "step": 250 }, { "epoch": 0.007900607262362127, "grad_norm": 4.0279619927390105, "learning_rate": 7.868649318463445e-07, "loss": 0.8735, "step": 255 }, { "epoch": 0.00805552113025158, "grad_norm": 5.042929704951095, "learning_rate": 8.023543990086743e-07, "loss": 1.0326, "step": 260 }, { "epoch": 0.008210434998141034, "grad_norm": 3.877981370372933, "learning_rate": 8.178438661710038e-07, "loss": 0.9602, "step": 265 }, { "epoch": 0.008365348866030487, "grad_norm": 3.4014402109424986, "learning_rate": 8.333333333333333e-07, "loss": 0.8951, "step": 270 }, { "epoch": 0.008520262733919941, "grad_norm": 4.322148736930424, "learning_rate": 8.488228004956631e-07, "loss": 0.8263, "step": 275 }, { "epoch": 0.008675176601809394, "grad_norm": 4.695598377438286, "learning_rate": 8.643122676579926e-07, "loss": 0.9311, "step": 280 }, { "epoch": 0.008830090469698848, "grad_norm": 2.8768961676775793, "learning_rate": 8.798017348203223e-07, "loss": 0.9144, "step": 285 }, { "epoch": 0.0089850043375883, "grad_norm": 4.0953621328676855, "learning_rate": 8.952912019826519e-07, "loss": 0.9667, "step": 290 }, { "epoch": 0.009139918205477755, "grad_norm": 3.356025051012859, "learning_rate": 9.107806691449815e-07, "loss": 0.9631, "step": 295 }, { "epoch": 0.009294832073367208, "grad_norm": 3.908498318167403, "learning_rate": 9.262701363073111e-07, "loss": 0.9497, "step": 300 }, { "epoch": 0.009449745941256662, "grad_norm": 4.025676198168436, "learning_rate": 9.417596034696406e-07, "loss": 0.9563, "step": 305 }, { "epoch": 0.009604659809146114, "grad_norm": 3.051904321925762, "learning_rate": 9.572490706319703e-07, "loss": 0.934, "step": 310 }, { "epoch": 0.009759573677035569, "grad_norm": 2.510977817641766, "learning_rate": 9.727385377943e-07, "loss": 0.8492, "step": 315 }, { "epoch": 0.009914487544925021, "grad_norm": 4.471017186972991, "learning_rate": 9.882280049566295e-07, "loss": 0.8756, "step": 320 }, { "epoch": 0.010069401412814476, "grad_norm": 4.092095408935038, "learning_rate": 1.0037174721189593e-06, "loss": 0.9547, "step": 325 }, { "epoch": 0.010224315280703928, "grad_norm": 4.543478194153528, "learning_rate": 1.0192069392812888e-06, "loss": 0.9606, "step": 330 }, { "epoch": 0.010379229148593383, "grad_norm": 3.4196142010439865, "learning_rate": 1.0346964064436184e-06, "loss": 0.9234, "step": 335 }, { "epoch": 0.010534143016482835, "grad_norm": 3.6034121588634505, "learning_rate": 1.0501858736059481e-06, "loss": 0.8924, "step": 340 }, { "epoch": 0.01068905688437229, "grad_norm": 3.95351493399197, "learning_rate": 1.0656753407682777e-06, "loss": 0.8508, "step": 345 }, { "epoch": 0.010843970752261742, "grad_norm": 5.528082529650105, "learning_rate": 1.0811648079306072e-06, "loss": 0.9516, "step": 350 }, { "epoch": 0.010998884620151196, "grad_norm": 3.49064340349476, "learning_rate": 1.096654275092937e-06, "loss": 0.9351, "step": 355 }, { "epoch": 0.011153798488040649, "grad_norm": 4.241092532852551, "learning_rate": 1.1121437422552665e-06, "loss": 0.9298, "step": 360 }, { "epoch": 0.011308712355930103, "grad_norm": 3.587675142548451, "learning_rate": 1.127633209417596e-06, "loss": 0.9592, "step": 365 }, { "epoch": 0.011463626223819556, "grad_norm": 5.050760764206234, "learning_rate": 1.1431226765799258e-06, "loss": 0.8118, "step": 370 }, { "epoch": 0.01161854009170901, "grad_norm": 3.3604765439299538, "learning_rate": 1.1586121437422553e-06, "loss": 0.8497, "step": 375 }, { "epoch": 0.011773453959598463, "grad_norm": 3.789983534337695, "learning_rate": 1.174101610904585e-06, "loss": 0.8322, "step": 380 }, { "epoch": 0.011928367827487917, "grad_norm": 3.9650734904401936, "learning_rate": 1.1895910780669146e-06, "loss": 0.8357, "step": 385 }, { "epoch": 0.01208328169537737, "grad_norm": 3.5302831305939244, "learning_rate": 1.2050805452292443e-06, "loss": 0.8933, "step": 390 }, { "epoch": 0.012238195563266824, "grad_norm": 3.9214384132021256, "learning_rate": 1.2205700123915739e-06, "loss": 0.8419, "step": 395 }, { "epoch": 0.012393109431156277, "grad_norm": 6.34757348829879, "learning_rate": 1.2360594795539034e-06, "loss": 0.8541, "step": 400 }, { "epoch": 0.012548023299045731, "grad_norm": 3.0096040417139935, "learning_rate": 1.2515489467162332e-06, "loss": 0.9286, "step": 405 }, { "epoch": 0.012702937166935184, "grad_norm": 8.72721983773612, "learning_rate": 1.2670384138785627e-06, "loss": 0.86, "step": 410 }, { "epoch": 0.012857851034824638, "grad_norm": 2.9412702473067687, "learning_rate": 1.2825278810408922e-06, "loss": 0.9237, "step": 415 }, { "epoch": 0.01301276490271409, "grad_norm": 4.659578970424144, "learning_rate": 1.2980173482032218e-06, "loss": 0.9603, "step": 420 }, { "epoch": 0.013167678770603545, "grad_norm": 3.683365386015848, "learning_rate": 1.3135068153655513e-06, "loss": 0.8757, "step": 425 }, { "epoch": 0.013322592638492997, "grad_norm": 3.5744345474415815, "learning_rate": 1.3289962825278813e-06, "loss": 0.9108, "step": 430 }, { "epoch": 0.013477506506382452, "grad_norm": 3.3495611139670807, "learning_rate": 1.3444857496902108e-06, "loss": 0.9088, "step": 435 }, { "epoch": 0.013632420374271904, "grad_norm": 3.0960870944713923, "learning_rate": 1.3599752168525403e-06, "loss": 0.8661, "step": 440 }, { "epoch": 0.013787334242161359, "grad_norm": 3.590522491856707, "learning_rate": 1.37546468401487e-06, "loss": 0.8932, "step": 445 }, { "epoch": 0.013942248110050811, "grad_norm": 13.856073404399028, "learning_rate": 1.3909541511771996e-06, "loss": 0.895, "step": 450 }, { "epoch": 0.014097161977940266, "grad_norm": 3.88783248013948, "learning_rate": 1.4064436183395292e-06, "loss": 0.9176, "step": 455 }, { "epoch": 0.014252075845829718, "grad_norm": 2.590757635322331, "learning_rate": 1.421933085501859e-06, "loss": 0.8353, "step": 460 }, { "epoch": 0.014406989713719173, "grad_norm": 3.1709844752638405, "learning_rate": 1.4374225526641887e-06, "loss": 0.8455, "step": 465 }, { "epoch": 0.014561903581608625, "grad_norm": 3.421753963324821, "learning_rate": 1.4529120198265182e-06, "loss": 0.9337, "step": 470 }, { "epoch": 0.01471681744949808, "grad_norm": 3.2447236552447793, "learning_rate": 1.4684014869888477e-06, "loss": 0.9445, "step": 475 }, { "epoch": 0.014871731317387532, "grad_norm": 5.253303982842093, "learning_rate": 1.4838909541511773e-06, "loss": 0.9255, "step": 480 }, { "epoch": 0.015026645185276986, "grad_norm": 3.2504976607327536, "learning_rate": 1.4993804213135068e-06, "loss": 0.9265, "step": 485 }, { "epoch": 0.015181559053166439, "grad_norm": 3.507539485306519, "learning_rate": 1.5148698884758364e-06, "loss": 0.9625, "step": 490 }, { "epoch": 0.015336472921055893, "grad_norm": 3.3599930952585426, "learning_rate": 1.5303593556381663e-06, "loss": 0.9757, "step": 495 }, { "epoch": 0.015491386788945346, "grad_norm": 3.5936617578074417, "learning_rate": 1.5458488228004958e-06, "loss": 0.8374, "step": 500 }, { "epoch": 0.0156463006568348, "grad_norm": 2.4763477219068872, "learning_rate": 1.5613382899628254e-06, "loss": 0.9029, "step": 505 }, { "epoch": 0.015801214524724255, "grad_norm": 2.8224202069833404, "learning_rate": 1.576827757125155e-06, "loss": 0.8533, "step": 510 }, { "epoch": 0.015956128392613705, "grad_norm": 3.0442463187540434, "learning_rate": 1.5923172242874847e-06, "loss": 0.8255, "step": 515 }, { "epoch": 0.01611104226050316, "grad_norm": 3.3010719809197244, "learning_rate": 1.6078066914498142e-06, "loss": 0.8641, "step": 520 }, { "epoch": 0.016265956128392614, "grad_norm": 2.906494273346089, "learning_rate": 1.623296158612144e-06, "loss": 0.866, "step": 525 }, { "epoch": 0.01642086999628207, "grad_norm": 3.458814007029948, "learning_rate": 1.6387856257744735e-06, "loss": 0.8484, "step": 530 }, { "epoch": 0.01657578386417152, "grad_norm": 3.5476707620966215, "learning_rate": 1.6542750929368032e-06, "loss": 0.8952, "step": 535 }, { "epoch": 0.016730697732060974, "grad_norm": 10.244424898213408, "learning_rate": 1.6697645600991328e-06, "loss": 0.8385, "step": 540 }, { "epoch": 0.016885611599950428, "grad_norm": 3.4846127389648194, "learning_rate": 1.6852540272614623e-06, "loss": 0.9094, "step": 545 }, { "epoch": 0.017040525467839882, "grad_norm": 2.6157166007516484, "learning_rate": 1.7007434944237919e-06, "loss": 0.8691, "step": 550 }, { "epoch": 0.017195439335729333, "grad_norm": 2.654541773772655, "learning_rate": 1.7162329615861214e-06, "loss": 0.8488, "step": 555 }, { "epoch": 0.017350353203618787, "grad_norm": 2.59035080029755, "learning_rate": 1.7317224287484514e-06, "loss": 0.8155, "step": 560 }, { "epoch": 0.017505267071508242, "grad_norm": 4.468695474897117, "learning_rate": 1.7472118959107809e-06, "loss": 0.9476, "step": 565 }, { "epoch": 0.017660180939397696, "grad_norm": 5.207310081758458, "learning_rate": 1.7627013630731104e-06, "loss": 0.7995, "step": 570 }, { "epoch": 0.017815094807287147, "grad_norm": 4.644744487378227, "learning_rate": 1.77819083023544e-06, "loss": 0.8397, "step": 575 }, { "epoch": 0.0179700086751766, "grad_norm": 7.137126324594951, "learning_rate": 1.7936802973977697e-06, "loss": 0.9389, "step": 580 }, { "epoch": 0.018124922543066056, "grad_norm": 3.2699868638262495, "learning_rate": 1.8091697645600993e-06, "loss": 0.8297, "step": 585 }, { "epoch": 0.01827983641095551, "grad_norm": 4.953924809740286, "learning_rate": 1.824659231722429e-06, "loss": 0.8273, "step": 590 }, { "epoch": 0.01843475027884496, "grad_norm": 3.869473875096369, "learning_rate": 1.8401486988847585e-06, "loss": 0.8321, "step": 595 }, { "epoch": 0.018589664146734415, "grad_norm": 4.32485088358261, "learning_rate": 1.8556381660470883e-06, "loss": 0.8081, "step": 600 }, { "epoch": 0.01874457801462387, "grad_norm": 4.735377528051087, "learning_rate": 1.8711276332094178e-06, "loss": 0.7999, "step": 605 }, { "epoch": 0.018899491882513324, "grad_norm": 4.47894256759768, "learning_rate": 1.8866171003717474e-06, "loss": 0.9685, "step": 610 }, { "epoch": 0.019054405750402775, "grad_norm": 5.249895824864087, "learning_rate": 1.902106567534077e-06, "loss": 0.8643, "step": 615 }, { "epoch": 0.01920931961829223, "grad_norm": 3.0395259586315224, "learning_rate": 1.9175960346964066e-06, "loss": 0.8971, "step": 620 }, { "epoch": 0.019364233486181683, "grad_norm": 2.910893516125444, "learning_rate": 1.9330855018587364e-06, "loss": 0.8522, "step": 625 }, { "epoch": 0.019519147354071138, "grad_norm": 3.896684619617823, "learning_rate": 1.9485749690210657e-06, "loss": 0.8579, "step": 630 }, { "epoch": 0.01967406122196059, "grad_norm": 3.158835405221959, "learning_rate": 1.9640644361833955e-06, "loss": 0.8033, "step": 635 }, { "epoch": 0.019828975089850043, "grad_norm": 2.832476526032089, "learning_rate": 1.9795539033457252e-06, "loss": 0.8386, "step": 640 }, { "epoch": 0.019983888957739497, "grad_norm": 2.8511154098702614, "learning_rate": 1.9950433705080545e-06, "loss": 0.8533, "step": 645 }, { "epoch": 0.02013880282562895, "grad_norm": 2.602750506232658, "learning_rate": 2.0105328376703843e-06, "loss": 0.8522, "step": 650 }, { "epoch": 0.020293716693518402, "grad_norm": 3.5325023986893784, "learning_rate": 2.0260223048327136e-06, "loss": 0.899, "step": 655 }, { "epoch": 0.020448630561407857, "grad_norm": 3.531406073185907, "learning_rate": 2.041511771995044e-06, "loss": 0.8482, "step": 660 }, { "epoch": 0.02060354442929731, "grad_norm": 9.413317640235492, "learning_rate": 2.057001239157373e-06, "loss": 0.8298, "step": 665 }, { "epoch": 0.020758458297186765, "grad_norm": 6.616762935808326, "learning_rate": 2.072490706319703e-06, "loss": 0.932, "step": 670 }, { "epoch": 0.020913372165076216, "grad_norm": 6.13838805866849, "learning_rate": 2.087980173482032e-06, "loss": 0.8315, "step": 675 }, { "epoch": 0.02106828603296567, "grad_norm": 3.297474296829918, "learning_rate": 2.103469640644362e-06, "loss": 0.8854, "step": 680 }, { "epoch": 0.021223199900855125, "grad_norm": 2.836039563361812, "learning_rate": 2.1189591078066917e-06, "loss": 0.9065, "step": 685 }, { "epoch": 0.02137811376874458, "grad_norm": 3.348366787145468, "learning_rate": 2.1344485749690214e-06, "loss": 0.8535, "step": 690 }, { "epoch": 0.02153302763663403, "grad_norm": 3.045088823557142, "learning_rate": 2.1499380421313508e-06, "loss": 0.868, "step": 695 }, { "epoch": 0.021687941504523484, "grad_norm": 3.1392433078843354, "learning_rate": 2.1654275092936805e-06, "loss": 0.9141, "step": 700 }, { "epoch": 0.02184285537241294, "grad_norm": 2.919542689348482, "learning_rate": 2.1809169764560103e-06, "loss": 0.7904, "step": 705 }, { "epoch": 0.021997769240302393, "grad_norm": 4.015591630463191, "learning_rate": 2.1964064436183396e-06, "loss": 0.8497, "step": 710 }, { "epoch": 0.022152683108191844, "grad_norm": 3.3919109517944226, "learning_rate": 2.2118959107806693e-06, "loss": 0.8853, "step": 715 }, { "epoch": 0.022307596976081298, "grad_norm": 3.1784888886425073, "learning_rate": 2.2273853779429987e-06, "loss": 0.8428, "step": 720 }, { "epoch": 0.022462510843970752, "grad_norm": 3.5064957605034746, "learning_rate": 2.242874845105329e-06, "loss": 0.856, "step": 725 }, { "epoch": 0.022617424711860207, "grad_norm": 3.1942454879435322, "learning_rate": 2.258364312267658e-06, "loss": 0.8608, "step": 730 }, { "epoch": 0.022772338579749658, "grad_norm": 3.827291869828899, "learning_rate": 2.273853779429988e-06, "loss": 0.8717, "step": 735 }, { "epoch": 0.022927252447639112, "grad_norm": 5.428836504606775, "learning_rate": 2.2893432465923172e-06, "loss": 0.8846, "step": 740 }, { "epoch": 0.023082166315528566, "grad_norm": 3.111899923957158, "learning_rate": 2.304832713754647e-06, "loss": 0.8392, "step": 745 }, { "epoch": 0.02323708018341802, "grad_norm": 3.1880577944843504, "learning_rate": 2.3203221809169767e-06, "loss": 0.8706, "step": 750 }, { "epoch": 0.02339199405130747, "grad_norm": 3.507348496649617, "learning_rate": 2.3358116480793065e-06, "loss": 0.9467, "step": 755 }, { "epoch": 0.023546907919196926, "grad_norm": 3.290767783291904, "learning_rate": 2.351301115241636e-06, "loss": 0.8065, "step": 760 }, { "epoch": 0.02370182178708638, "grad_norm": 2.8646632209795984, "learning_rate": 2.3667905824039656e-06, "loss": 0.8501, "step": 765 }, { "epoch": 0.023856735654975834, "grad_norm": 3.9252517500690383, "learning_rate": 2.3822800495662953e-06, "loss": 0.9116, "step": 770 }, { "epoch": 0.024011649522865285, "grad_norm": 3.2073997236687486, "learning_rate": 2.3977695167286246e-06, "loss": 0.8228, "step": 775 }, { "epoch": 0.02416656339075474, "grad_norm": 3.1191887834250807, "learning_rate": 2.4132589838909544e-06, "loss": 0.8654, "step": 780 }, { "epoch": 0.024321477258644194, "grad_norm": 2.7702309634481637, "learning_rate": 2.4287484510532837e-06, "loss": 0.8008, "step": 785 }, { "epoch": 0.024476391126533648, "grad_norm": 3.439772726147364, "learning_rate": 2.444237918215614e-06, "loss": 0.7413, "step": 790 }, { "epoch": 0.0246313049944231, "grad_norm": 4.340062822936703, "learning_rate": 2.459727385377943e-06, "loss": 0.9403, "step": 795 }, { "epoch": 0.024786218862312553, "grad_norm": 3.674228555127088, "learning_rate": 2.475216852540273e-06, "loss": 0.8856, "step": 800 }, { "epoch": 0.024941132730202008, "grad_norm": 6.206829801628653, "learning_rate": 2.4907063197026023e-06, "loss": 0.7993, "step": 805 }, { "epoch": 0.025096046598091462, "grad_norm": 4.555390860598623, "learning_rate": 2.5061957868649324e-06, "loss": 0.8529, "step": 810 }, { "epoch": 0.025250960465980913, "grad_norm": 3.0697050031209607, "learning_rate": 2.5216852540272618e-06, "loss": 0.8308, "step": 815 }, { "epoch": 0.025405874333870367, "grad_norm": 2.9080100342587136, "learning_rate": 2.5371747211895915e-06, "loss": 0.87, "step": 820 }, { "epoch": 0.02556078820175982, "grad_norm": 6.911069156554785, "learning_rate": 2.552664188351921e-06, "loss": 0.8778, "step": 825 }, { "epoch": 0.025715702069649276, "grad_norm": 3.498241918950015, "learning_rate": 2.5681536555142506e-06, "loss": 0.8078, "step": 830 }, { "epoch": 0.025870615937538727, "grad_norm": 5.3180976136859615, "learning_rate": 2.5836431226765803e-06, "loss": 0.861, "step": 835 }, { "epoch": 0.02602552980542818, "grad_norm": 3.2494922294341673, "learning_rate": 2.5991325898389097e-06, "loss": 0.8898, "step": 840 }, { "epoch": 0.026180443673317635, "grad_norm": 3.709720529589958, "learning_rate": 2.6146220570012394e-06, "loss": 0.8156, "step": 845 }, { "epoch": 0.02633535754120709, "grad_norm": 3.098977359339887, "learning_rate": 2.6301115241635687e-06, "loss": 0.817, "step": 850 }, { "epoch": 0.02649027140909654, "grad_norm": 2.4197562331290254, "learning_rate": 2.6456009913258985e-06, "loss": 0.8573, "step": 855 }, { "epoch": 0.026645185276985995, "grad_norm": 3.4674678280402205, "learning_rate": 2.661090458488228e-06, "loss": 0.7894, "step": 860 }, { "epoch": 0.02680009914487545, "grad_norm": 3.679308730955473, "learning_rate": 2.6765799256505576e-06, "loss": 0.8836, "step": 865 }, { "epoch": 0.026955013012764904, "grad_norm": 3.298458229300346, "learning_rate": 2.6920693928128877e-06, "loss": 0.9398, "step": 870 }, { "epoch": 0.027109926880654354, "grad_norm": 2.5388017687465623, "learning_rate": 2.7075588599752175e-06, "loss": 0.9055, "step": 875 }, { "epoch": 0.02726484074854381, "grad_norm": 3.206985855541687, "learning_rate": 2.723048327137547e-06, "loss": 0.8804, "step": 880 }, { "epoch": 0.027419754616433263, "grad_norm": 2.8400542913022253, "learning_rate": 2.7385377942998766e-06, "loss": 0.8003, "step": 885 }, { "epoch": 0.027574668484322717, "grad_norm": 2.9153945127576097, "learning_rate": 2.754027261462206e-06, "loss": 0.8242, "step": 890 }, { "epoch": 0.02772958235221217, "grad_norm": 3.0384486463143423, "learning_rate": 2.7695167286245356e-06, "loss": 0.8495, "step": 895 }, { "epoch": 0.027884496220101623, "grad_norm": 4.092289353218504, "learning_rate": 2.785006195786865e-06, "loss": 0.9124, "step": 900 }, { "epoch": 0.028039410087991077, "grad_norm": 3.630995679956298, "learning_rate": 2.8004956629491947e-06, "loss": 0.8992, "step": 905 }, { "epoch": 0.02819432395588053, "grad_norm": 2.6569758624035655, "learning_rate": 2.8159851301115245e-06, "loss": 0.9324, "step": 910 }, { "epoch": 0.028349237823769986, "grad_norm": 3.6170289070781423, "learning_rate": 2.8314745972738538e-06, "loss": 0.9211, "step": 915 }, { "epoch": 0.028504151691659436, "grad_norm": 2.920732471812002, "learning_rate": 2.8469640644361835e-06, "loss": 0.7725, "step": 920 }, { "epoch": 0.02865906555954889, "grad_norm": 3.64427165531814, "learning_rate": 2.862453531598513e-06, "loss": 0.9275, "step": 925 }, { "epoch": 0.028813979427438345, "grad_norm": 4.477865546711906, "learning_rate": 2.8779429987608426e-06, "loss": 0.8311, "step": 930 }, { "epoch": 0.0289688932953278, "grad_norm": 3.4444566509959613, "learning_rate": 2.8934324659231728e-06, "loss": 0.7418, "step": 935 }, { "epoch": 0.02912380716321725, "grad_norm": 4.222592444204683, "learning_rate": 2.908921933085502e-06, "loss": 0.8649, "step": 940 }, { "epoch": 0.029278721031106705, "grad_norm": 3.000468689916493, "learning_rate": 2.924411400247832e-06, "loss": 0.8293, "step": 945 }, { "epoch": 0.02943363489899616, "grad_norm": 3.0643977913898555, "learning_rate": 2.9399008674101616e-06, "loss": 0.8292, "step": 950 }, { "epoch": 0.029588548766885613, "grad_norm": 2.8626009945197026, "learning_rate": 2.955390334572491e-06, "loss": 0.8081, "step": 955 }, { "epoch": 0.029743462634775064, "grad_norm": 3.0503726349346323, "learning_rate": 2.9708798017348207e-06, "loss": 0.8326, "step": 960 }, { "epoch": 0.02989837650266452, "grad_norm": 3.622418653456788, "learning_rate": 2.98636926889715e-06, "loss": 0.8094, "step": 965 }, { "epoch": 0.030053290370553973, "grad_norm": 5.393929359351496, "learning_rate": 3.0018587360594797e-06, "loss": 0.8173, "step": 970 }, { "epoch": 0.030208204238443427, "grad_norm": 3.47909785308864, "learning_rate": 3.0173482032218095e-06, "loss": 0.8478, "step": 975 }, { "epoch": 0.030363118106332878, "grad_norm": 2.8685279788171503, "learning_rate": 3.032837670384139e-06, "loss": 0.7843, "step": 980 }, { "epoch": 0.030518031974222332, "grad_norm": 2.843167738113205, "learning_rate": 3.0483271375464686e-06, "loss": 0.8902, "step": 985 }, { "epoch": 0.030672945842111787, "grad_norm": 4.149068387948018, "learning_rate": 3.063816604708798e-06, "loss": 0.8479, "step": 990 }, { "epoch": 0.03082785971000124, "grad_norm": 3.129776103156325, "learning_rate": 3.0793060718711276e-06, "loss": 0.7646, "step": 995 }, { "epoch": 0.030982773577890692, "grad_norm": 3.7643817443092256, "learning_rate": 3.094795539033458e-06, "loss": 0.8884, "step": 1000 }, { "epoch": 0.031137687445780146, "grad_norm": 3.478467011698403, "learning_rate": 3.110285006195787e-06, "loss": 0.814, "step": 1005 }, { "epoch": 0.0312926013136696, "grad_norm": 3.4143216354218127, "learning_rate": 3.125774473358117e-06, "loss": 0.7667, "step": 1010 }, { "epoch": 0.031447515181559055, "grad_norm": 2.8795235823077623, "learning_rate": 3.1412639405204466e-06, "loss": 0.8748, "step": 1015 }, { "epoch": 0.03160242904944851, "grad_norm": 3.8773425028042428, "learning_rate": 3.156753407682776e-06, "loss": 0.8092, "step": 1020 }, { "epoch": 0.03175734291733796, "grad_norm": 3.028288783429358, "learning_rate": 3.1722428748451057e-06, "loss": 0.8106, "step": 1025 }, { "epoch": 0.03191225678522741, "grad_norm": 3.9610639307711724, "learning_rate": 3.187732342007435e-06, "loss": 0.8792, "step": 1030 }, { "epoch": 0.032067170653116865, "grad_norm": 6.589982170675244, "learning_rate": 3.203221809169765e-06, "loss": 0.8103, "step": 1035 }, { "epoch": 0.03222208452100632, "grad_norm": 2.78084887294494, "learning_rate": 3.2187112763320945e-06, "loss": 0.8394, "step": 1040 }, { "epoch": 0.032376998388895774, "grad_norm": 2.679966098911917, "learning_rate": 3.234200743494424e-06, "loss": 0.7206, "step": 1045 }, { "epoch": 0.03253191225678523, "grad_norm": 3.671311150939833, "learning_rate": 3.2496902106567536e-06, "loss": 0.8099, "step": 1050 }, { "epoch": 0.03268682612467468, "grad_norm": 3.4621534653201955, "learning_rate": 3.265179677819083e-06, "loss": 0.8407, "step": 1055 }, { "epoch": 0.03284173999256414, "grad_norm": 2.9903136974655564, "learning_rate": 3.2806691449814127e-06, "loss": 0.811, "step": 1060 }, { "epoch": 0.03299665386045359, "grad_norm": 3.0217945929172756, "learning_rate": 3.296158612143743e-06, "loss": 0.8364, "step": 1065 }, { "epoch": 0.03315156772834304, "grad_norm": 3.8655991038643283, "learning_rate": 3.311648079306072e-06, "loss": 0.8504, "step": 1070 }, { "epoch": 0.03330648159623249, "grad_norm": 2.9929093517405105, "learning_rate": 3.327137546468402e-06, "loss": 0.8729, "step": 1075 }, { "epoch": 0.03346139546412195, "grad_norm": 2.960170920105308, "learning_rate": 3.3426270136307317e-06, "loss": 0.9178, "step": 1080 }, { "epoch": 0.0336163093320114, "grad_norm": 3.0127088631383083, "learning_rate": 3.358116480793061e-06, "loss": 0.779, "step": 1085 }, { "epoch": 0.033771223199900856, "grad_norm": 3.352792810523269, "learning_rate": 3.3736059479553908e-06, "loss": 0.8706, "step": 1090 }, { "epoch": 0.03392613706779031, "grad_norm": 3.4897785753351873, "learning_rate": 3.38909541511772e-06, "loss": 0.7432, "step": 1095 }, { "epoch": 0.034081050935679764, "grad_norm": 2.965220840106614, "learning_rate": 3.40458488228005e-06, "loss": 0.8679, "step": 1100 }, { "epoch": 0.03423596480356922, "grad_norm": 3.5422175897182444, "learning_rate": 3.4200743494423796e-06, "loss": 0.8622, "step": 1105 }, { "epoch": 0.034390878671458666, "grad_norm": 4.254454939629083, "learning_rate": 3.435563816604709e-06, "loss": 0.8648, "step": 1110 }, { "epoch": 0.03454579253934812, "grad_norm": 3.768193508290591, "learning_rate": 3.4510532837670387e-06, "loss": 0.8325, "step": 1115 }, { "epoch": 0.034700706407237575, "grad_norm": 3.5318601521072512, "learning_rate": 3.466542750929368e-06, "loss": 0.8741, "step": 1120 }, { "epoch": 0.03485562027512703, "grad_norm": 2.8926645494030554, "learning_rate": 3.4820322180916977e-06, "loss": 0.8142, "step": 1125 }, { "epoch": 0.035010534143016483, "grad_norm": 2.89288898218319, "learning_rate": 3.497521685254028e-06, "loss": 0.7844, "step": 1130 }, { "epoch": 0.03516544801090594, "grad_norm": 3.203958642183019, "learning_rate": 3.5130111524163572e-06, "loss": 0.8045, "step": 1135 }, { "epoch": 0.03532036187879539, "grad_norm": 3.0722204559856863, "learning_rate": 3.528500619578687e-06, "loss": 0.7811, "step": 1140 }, { "epoch": 0.035475275746684846, "grad_norm": 4.085484406084257, "learning_rate": 3.5439900867410167e-06, "loss": 0.921, "step": 1145 }, { "epoch": 0.035630189614574294, "grad_norm": 3.4640435741424542, "learning_rate": 3.559479553903346e-06, "loss": 0.8064, "step": 1150 }, { "epoch": 0.03578510348246375, "grad_norm": 2.6085951615147738, "learning_rate": 3.574969021065676e-06, "loss": 0.7933, "step": 1155 }, { "epoch": 0.0359400173503532, "grad_norm": 4.352501076783243, "learning_rate": 3.590458488228005e-06, "loss": 0.8635, "step": 1160 }, { "epoch": 0.03609493121824266, "grad_norm": 3.088781513936566, "learning_rate": 3.605947955390335e-06, "loss": 0.8321, "step": 1165 }, { "epoch": 0.03624984508613211, "grad_norm": 3.0368703647460356, "learning_rate": 3.621437422552664e-06, "loss": 0.7901, "step": 1170 }, { "epoch": 0.036404758954021565, "grad_norm": 2.6955379184017123, "learning_rate": 3.636926889714994e-06, "loss": 0.8472, "step": 1175 }, { "epoch": 0.03655967282191102, "grad_norm": 4.111712850163494, "learning_rate": 3.6524163568773237e-06, "loss": 0.8799, "step": 1180 }, { "epoch": 0.036714586689800474, "grad_norm": 3.2198257858910835, "learning_rate": 3.667905824039653e-06, "loss": 0.7955, "step": 1185 }, { "epoch": 0.03686950055768992, "grad_norm": 4.362254685971314, "learning_rate": 3.6833952912019828e-06, "loss": 0.8213, "step": 1190 }, { "epoch": 0.037024414425579376, "grad_norm": 3.23241128767789, "learning_rate": 3.698884758364313e-06, "loss": 0.8046, "step": 1195 }, { "epoch": 0.03717932829346883, "grad_norm": 2.717585488855344, "learning_rate": 3.7143742255266423e-06, "loss": 0.8537, "step": 1200 }, { "epoch": 0.037334242161358284, "grad_norm": 2.7319973783923848, "learning_rate": 3.729863692688972e-06, "loss": 0.8407, "step": 1205 }, { "epoch": 0.03748915602924774, "grad_norm": 2.992053118968365, "learning_rate": 3.7453531598513013e-06, "loss": 0.8073, "step": 1210 }, { "epoch": 0.03764406989713719, "grad_norm": 2.9546022124648004, "learning_rate": 3.760842627013631e-06, "loss": 0.7757, "step": 1215 }, { "epoch": 0.03779898376502665, "grad_norm": 3.6689943626639954, "learning_rate": 3.776332094175961e-06, "loss": 0.8604, "step": 1220 }, { "epoch": 0.0379538976329161, "grad_norm": 3.385443064168972, "learning_rate": 3.79182156133829e-06, "loss": 0.8318, "step": 1225 }, { "epoch": 0.03810881150080555, "grad_norm": 3.5684081284225995, "learning_rate": 3.80731102850062e-06, "loss": 0.8183, "step": 1230 }, { "epoch": 0.038263725368695004, "grad_norm": 2.5006076729306908, "learning_rate": 3.82280049566295e-06, "loss": 0.7822, "step": 1235 }, { "epoch": 0.03841863923658446, "grad_norm": 2.8063487078183282, "learning_rate": 3.838289962825279e-06, "loss": 0.8539, "step": 1240 }, { "epoch": 0.03857355310447391, "grad_norm": 2.7290608186200433, "learning_rate": 3.853779429987608e-06, "loss": 0.8594, "step": 1245 }, { "epoch": 0.038728466972363366, "grad_norm": 3.1499164730870293, "learning_rate": 3.869268897149938e-06, "loss": 0.8222, "step": 1250 }, { "epoch": 0.03888338084025282, "grad_norm": 2.9938764655957, "learning_rate": 3.884758364312268e-06, "loss": 0.892, "step": 1255 }, { "epoch": 0.039038294708142275, "grad_norm": 3.038637724295859, "learning_rate": 3.9002478314745976e-06, "loss": 0.8411, "step": 1260 }, { "epoch": 0.03919320857603173, "grad_norm": 3.707241559313066, "learning_rate": 3.915737298636927e-06, "loss": 0.7946, "step": 1265 }, { "epoch": 0.03934812244392118, "grad_norm": 3.164354554637033, "learning_rate": 3.931226765799257e-06, "loss": 0.7483, "step": 1270 }, { "epoch": 0.03950303631181063, "grad_norm": 3.028061515222985, "learning_rate": 3.946716232961587e-06, "loss": 0.8084, "step": 1275 }, { "epoch": 0.039657950179700086, "grad_norm": 3.005446315340669, "learning_rate": 3.9622057001239166e-06, "loss": 0.8927, "step": 1280 }, { "epoch": 0.03981286404758954, "grad_norm": 3.376037976283865, "learning_rate": 3.9776951672862455e-06, "loss": 0.7905, "step": 1285 }, { "epoch": 0.039967777915478994, "grad_norm": 2.925839938562806, "learning_rate": 3.993184634448575e-06, "loss": 0.9303, "step": 1290 }, { "epoch": 0.04012269178336845, "grad_norm": 3.4152630418068197, "learning_rate": 4.008674101610905e-06, "loss": 0.7605, "step": 1295 }, { "epoch": 0.0402776056512579, "grad_norm": 2.9085390497687693, "learning_rate": 4.024163568773235e-06, "loss": 0.7686, "step": 1300 }, { "epoch": 0.04043251951914736, "grad_norm": 3.1596939423407457, "learning_rate": 4.039653035935564e-06, "loss": 0.8101, "step": 1305 }, { "epoch": 0.040587433387036805, "grad_norm": 3.732729519554439, "learning_rate": 4.055142503097893e-06, "loss": 0.8629, "step": 1310 }, { "epoch": 0.04074234725492626, "grad_norm": 3.703411688136309, "learning_rate": 4.070631970260223e-06, "loss": 0.9008, "step": 1315 }, { "epoch": 0.04089726112281571, "grad_norm": 3.295576160772179, "learning_rate": 4.086121437422553e-06, "loss": 0.8207, "step": 1320 }, { "epoch": 0.04105217499070517, "grad_norm": 2.5424686383457966, "learning_rate": 4.101610904584883e-06, "loss": 0.8254, "step": 1325 }, { "epoch": 0.04120708885859462, "grad_norm": 3.2460717329216577, "learning_rate": 4.117100371747212e-06, "loss": 0.8801, "step": 1330 }, { "epoch": 0.041362002726484076, "grad_norm": 3.8214657061846107, "learning_rate": 4.132589838909542e-06, "loss": 0.7918, "step": 1335 }, { "epoch": 0.04151691659437353, "grad_norm": 4.406461869863772, "learning_rate": 4.148079306071872e-06, "loss": 0.827, "step": 1340 }, { "epoch": 0.041671830462262985, "grad_norm": 3.6765133025115273, "learning_rate": 4.163568773234201e-06, "loss": 0.8412, "step": 1345 }, { "epoch": 0.04182674433015243, "grad_norm": 2.9017758802208466, "learning_rate": 4.1790582403965305e-06, "loss": 0.7897, "step": 1350 }, { "epoch": 0.04198165819804189, "grad_norm": 3.2337700905834694, "learning_rate": 4.19454770755886e-06, "loss": 0.7665, "step": 1355 }, { "epoch": 0.04213657206593134, "grad_norm": 3.237056881166711, "learning_rate": 4.21003717472119e-06, "loss": 0.8099, "step": 1360 }, { "epoch": 0.042291485933820795, "grad_norm": 3.684508365236262, "learning_rate": 4.22552664188352e-06, "loss": 0.8874, "step": 1365 }, { "epoch": 0.04244639980171025, "grad_norm": 2.8289276638155534, "learning_rate": 4.241016109045849e-06, "loss": 0.8284, "step": 1370 }, { "epoch": 0.042601313669599704, "grad_norm": 3.505874242925955, "learning_rate": 4.256505576208178e-06, "loss": 0.8264, "step": 1375 }, { "epoch": 0.04275622753748916, "grad_norm": 2.9216046018791073, "learning_rate": 4.271995043370508e-06, "loss": 0.712, "step": 1380 }, { "epoch": 0.04291114140537861, "grad_norm": 3.3055974447226397, "learning_rate": 4.287484510532838e-06, "loss": 0.7385, "step": 1385 }, { "epoch": 0.04306605527326806, "grad_norm": 3.2917336673096145, "learning_rate": 4.302973977695168e-06, "loss": 0.7898, "step": 1390 }, { "epoch": 0.043220969141157514, "grad_norm": 3.296506055401993, "learning_rate": 4.318463444857497e-06, "loss": 0.7646, "step": 1395 }, { "epoch": 0.04337588300904697, "grad_norm": 3.54735662035021, "learning_rate": 4.333952912019827e-06, "loss": 0.8193, "step": 1400 }, { "epoch": 0.04353079687693642, "grad_norm": 4.326519875278304, "learning_rate": 4.349442379182157e-06, "loss": 0.8111, "step": 1405 }, { "epoch": 0.04368571074482588, "grad_norm": 3.933403947714215, "learning_rate": 4.364931846344486e-06, "loss": 0.8527, "step": 1410 }, { "epoch": 0.04384062461271533, "grad_norm": 4.050353073251353, "learning_rate": 4.3804213135068155e-06, "loss": 0.8434, "step": 1415 }, { "epoch": 0.043995538480604786, "grad_norm": 2.9658820700964745, "learning_rate": 4.395910780669145e-06, "loss": 0.8015, "step": 1420 }, { "epoch": 0.04415045234849424, "grad_norm": 3.276676753599336, "learning_rate": 4.411400247831475e-06, "loss": 0.8837, "step": 1425 }, { "epoch": 0.04430536621638369, "grad_norm": 3.4111685379175394, "learning_rate": 4.426889714993805e-06, "loss": 0.788, "step": 1430 }, { "epoch": 0.04446028008427314, "grad_norm": 3.008514603746073, "learning_rate": 4.442379182156134e-06, "loss": 0.8564, "step": 1435 }, { "epoch": 0.044615193952162596, "grad_norm": 3.216436938528114, "learning_rate": 4.4578686493184634e-06, "loss": 0.8501, "step": 1440 }, { "epoch": 0.04477010782005205, "grad_norm": 3.2048185840995695, "learning_rate": 4.473358116480793e-06, "loss": 0.8388, "step": 1445 }, { "epoch": 0.044925021687941505, "grad_norm": 2.570191948473344, "learning_rate": 4.488847583643123e-06, "loss": 0.8071, "step": 1450 }, { "epoch": 0.04507993555583096, "grad_norm": 2.7457116710545475, "learning_rate": 4.504337050805453e-06, "loss": 0.8036, "step": 1455 }, { "epoch": 0.045234849423720414, "grad_norm": 3.4060792161301405, "learning_rate": 4.5198265179677824e-06, "loss": 0.8234, "step": 1460 }, { "epoch": 0.04538976329160987, "grad_norm": 2.6844184901042807, "learning_rate": 4.535315985130112e-06, "loss": 0.7247, "step": 1465 }, { "epoch": 0.045544677159499315, "grad_norm": 5.014362902415591, "learning_rate": 4.550805452292442e-06, "loss": 0.7856, "step": 1470 }, { "epoch": 0.04569959102738877, "grad_norm": 2.8600145630852736, "learning_rate": 4.566294919454771e-06, "loss": 0.7897, "step": 1475 }, { "epoch": 0.045854504895278224, "grad_norm": 5.888002871270434, "learning_rate": 4.581784386617101e-06, "loss": 0.8362, "step": 1480 }, { "epoch": 0.04600941876316768, "grad_norm": 2.674304502669238, "learning_rate": 4.59727385377943e-06, "loss": 0.7691, "step": 1485 }, { "epoch": 0.04616433263105713, "grad_norm": 2.8506270737747976, "learning_rate": 4.61276332094176e-06, "loss": 0.7946, "step": 1490 }, { "epoch": 0.04631924649894659, "grad_norm": 3.4535083307554815, "learning_rate": 4.62825278810409e-06, "loss": 0.7267, "step": 1495 }, { "epoch": 0.04647416036683604, "grad_norm": 3.6679698092086976, "learning_rate": 4.643742255266419e-06, "loss": 0.8135, "step": 1500 }, { "epoch": 0.046629074234725496, "grad_norm": 2.5919771628571358, "learning_rate": 4.6592317224287485e-06, "loss": 0.875, "step": 1505 }, { "epoch": 0.04678398810261494, "grad_norm": 3.2068891656515177, "learning_rate": 4.674721189591078e-06, "loss": 0.7905, "step": 1510 }, { "epoch": 0.0469389019705044, "grad_norm": 3.8856371032790316, "learning_rate": 4.690210656753408e-06, "loss": 0.8508, "step": 1515 }, { "epoch": 0.04709381583839385, "grad_norm": 4.187990452191108, "learning_rate": 4.705700123915738e-06, "loss": 0.8734, "step": 1520 }, { "epoch": 0.047248729706283306, "grad_norm": 2.3183204269465905, "learning_rate": 4.7211895910780675e-06, "loss": 0.8715, "step": 1525 }, { "epoch": 0.04740364357417276, "grad_norm": 2.808481730359632, "learning_rate": 4.736679058240397e-06, "loss": 0.8173, "step": 1530 }, { "epoch": 0.047558557442062215, "grad_norm": 2.8873566316318313, "learning_rate": 4.752168525402727e-06, "loss": 0.8575, "step": 1535 }, { "epoch": 0.04771347130995167, "grad_norm": 3.048257189883081, "learning_rate": 4.767657992565056e-06, "loss": 0.8106, "step": 1540 }, { "epoch": 0.04786838517784112, "grad_norm": 2.961685664999941, "learning_rate": 4.783147459727386e-06, "loss": 0.8007, "step": 1545 }, { "epoch": 0.04802329904573057, "grad_norm": 2.5061487694727047, "learning_rate": 4.798636926889715e-06, "loss": 0.8115, "step": 1550 }, { "epoch": 0.048178212913620025, "grad_norm": 4.451371774632417, "learning_rate": 4.814126394052045e-06, "loss": 0.7487, "step": 1555 }, { "epoch": 0.04833312678150948, "grad_norm": 2.6156634420743745, "learning_rate": 4.829615861214375e-06, "loss": 0.8618, "step": 1560 }, { "epoch": 0.048488040649398934, "grad_norm": 2.709311169926272, "learning_rate": 4.845105328376704e-06, "loss": 0.8153, "step": 1565 }, { "epoch": 0.04864295451728839, "grad_norm": 2.784666682738858, "learning_rate": 4.8605947955390335e-06, "loss": 0.786, "step": 1570 }, { "epoch": 0.04879786838517784, "grad_norm": 2.6737590725346854, "learning_rate": 4.876084262701363e-06, "loss": 0.8441, "step": 1575 }, { "epoch": 0.048952782253067297, "grad_norm": 2.702806383752254, "learning_rate": 4.891573729863693e-06, "loss": 0.7792, "step": 1580 }, { "epoch": 0.04910769612095675, "grad_norm": 3.3181276867125735, "learning_rate": 4.907063197026023e-06, "loss": 0.8546, "step": 1585 }, { "epoch": 0.0492626099888462, "grad_norm": 2.94472693204023, "learning_rate": 4.9225526641883525e-06, "loss": 0.7495, "step": 1590 }, { "epoch": 0.04941752385673565, "grad_norm": 3.497666085991972, "learning_rate": 4.938042131350682e-06, "loss": 0.8741, "step": 1595 }, { "epoch": 0.04957243772462511, "grad_norm": 2.642323963808556, "learning_rate": 4.953531598513012e-06, "loss": 0.8105, "step": 1600 }, { "epoch": 0.04972735159251456, "grad_norm": 2.7666997057987612, "learning_rate": 4.969021065675341e-06, "loss": 0.8804, "step": 1605 }, { "epoch": 0.049882265460404016, "grad_norm": 2.6267999849645323, "learning_rate": 4.984510532837671e-06, "loss": 0.8628, "step": 1610 }, { "epoch": 0.05003717932829347, "grad_norm": 2.956259485583459, "learning_rate": 5e-06, "loss": 0.9639, "step": 1615 }, { "epoch": 0.050192093196182924, "grad_norm": 4.055322914400143, "learning_rate": 4.999184658534995e-06, "loss": 0.8438, "step": 1620 }, { "epoch": 0.05034700706407238, "grad_norm": 3.624428442940413, "learning_rate": 4.998369317069989e-06, "loss": 0.8048, "step": 1625 }, { "epoch": 0.050501920931961826, "grad_norm": 4.613101346131987, "learning_rate": 4.997553975604984e-06, "loss": 0.8292, "step": 1630 }, { "epoch": 0.05065683479985128, "grad_norm": 7.13239905982628, "learning_rate": 4.996738634139978e-06, "loss": 0.8974, "step": 1635 }, { "epoch": 0.050811748667740735, "grad_norm": 2.807110929672211, "learning_rate": 4.995923292674973e-06, "loss": 0.809, "step": 1640 }, { "epoch": 0.05096666253563019, "grad_norm": 3.6870355650459277, "learning_rate": 4.995107951209967e-06, "loss": 0.9035, "step": 1645 }, { "epoch": 0.05112157640351964, "grad_norm": 7.636026660521979, "learning_rate": 4.994292609744962e-06, "loss": 0.805, "step": 1650 }, { "epoch": 0.0512764902714091, "grad_norm": 2.9488659554056658, "learning_rate": 4.993477268279956e-06, "loss": 0.882, "step": 1655 }, { "epoch": 0.05143140413929855, "grad_norm": 2.808215942682858, "learning_rate": 4.9926619268149505e-06, "loss": 0.8404, "step": 1660 }, { "epoch": 0.051586318007188006, "grad_norm": 2.7157515881157823, "learning_rate": 4.991846585349945e-06, "loss": 0.7645, "step": 1665 }, { "epoch": 0.051741231875077454, "grad_norm": 3.433242668634347, "learning_rate": 4.9910312438849395e-06, "loss": 0.841, "step": 1670 }, { "epoch": 0.05189614574296691, "grad_norm": 2.9421708340272112, "learning_rate": 4.990215902419934e-06, "loss": 0.8111, "step": 1675 }, { "epoch": 0.05205105961085636, "grad_norm": 3.3006627494283154, "learning_rate": 4.989400560954928e-06, "loss": 0.8388, "step": 1680 }, { "epoch": 0.05220597347874582, "grad_norm": 2.402379886736944, "learning_rate": 4.988585219489923e-06, "loss": 0.7092, "step": 1685 }, { "epoch": 0.05236088734663527, "grad_norm": 2.77935644095284, "learning_rate": 4.987769878024917e-06, "loss": 0.8389, "step": 1690 }, { "epoch": 0.052515801214524725, "grad_norm": 2.8304370338334786, "learning_rate": 4.986954536559912e-06, "loss": 0.8592, "step": 1695 }, { "epoch": 0.05267071508241418, "grad_norm": 3.608771826855995, "learning_rate": 4.986139195094906e-06, "loss": 0.8316, "step": 1700 }, { "epoch": 0.052825628950303634, "grad_norm": 3.0638901131808667, "learning_rate": 4.985323853629901e-06, "loss": 0.8914, "step": 1705 }, { "epoch": 0.05298054281819308, "grad_norm": 2.987544120349669, "learning_rate": 4.984508512164895e-06, "loss": 0.87, "step": 1710 }, { "epoch": 0.053135456686082536, "grad_norm": 4.018180319788528, "learning_rate": 4.98369317069989e-06, "loss": 0.8022, "step": 1715 }, { "epoch": 0.05329037055397199, "grad_norm": 3.367746749898724, "learning_rate": 4.982877829234884e-06, "loss": 0.9112, "step": 1720 }, { "epoch": 0.053445284421861444, "grad_norm": 2.802036697992754, "learning_rate": 4.9820624877698785e-06, "loss": 0.8805, "step": 1725 }, { "epoch": 0.0536001982897509, "grad_norm": 4.137094775709408, "learning_rate": 4.981247146304873e-06, "loss": 0.848, "step": 1730 }, { "epoch": 0.05375511215764035, "grad_norm": 3.1240677204919107, "learning_rate": 4.9804318048398674e-06, "loss": 0.8404, "step": 1735 }, { "epoch": 0.05391002602552981, "grad_norm": 3.407565842076697, "learning_rate": 4.979616463374862e-06, "loss": 0.8649, "step": 1740 }, { "epoch": 0.05406493989341926, "grad_norm": 3.587037620460203, "learning_rate": 4.978801121909856e-06, "loss": 0.8006, "step": 1745 }, { "epoch": 0.05421985376130871, "grad_norm": 2.919788786537585, "learning_rate": 4.977985780444851e-06, "loss": 0.738, "step": 1750 }, { "epoch": 0.05437476762919816, "grad_norm": 2.717976650634877, "learning_rate": 4.977170438979845e-06, "loss": 0.8118, "step": 1755 }, { "epoch": 0.05452968149708762, "grad_norm": 3.819789861424081, "learning_rate": 4.97635509751484e-06, "loss": 0.8707, "step": 1760 }, { "epoch": 0.05468459536497707, "grad_norm": 2.465035283789656, "learning_rate": 4.975539756049834e-06, "loss": 0.794, "step": 1765 }, { "epoch": 0.054839509232866526, "grad_norm": 2.5898358291998527, "learning_rate": 4.974724414584829e-06, "loss": 0.7994, "step": 1770 }, { "epoch": 0.05499442310075598, "grad_norm": 3.380654515705737, "learning_rate": 4.973909073119823e-06, "loss": 0.8405, "step": 1775 }, { "epoch": 0.055149336968645435, "grad_norm": 2.777826352812253, "learning_rate": 4.9730937316548176e-06, "loss": 0.7877, "step": 1780 }, { "epoch": 0.05530425083653489, "grad_norm": 3.132133128328451, "learning_rate": 4.972278390189812e-06, "loss": 0.8227, "step": 1785 }, { "epoch": 0.05545916470442434, "grad_norm": 5.115469388530754, "learning_rate": 4.9714630487248065e-06, "loss": 0.8278, "step": 1790 }, { "epoch": 0.05561407857231379, "grad_norm": 4.036696813158882, "learning_rate": 4.970647707259801e-06, "loss": 0.8442, "step": 1795 }, { "epoch": 0.055768992440203245, "grad_norm": 2.756454943190547, "learning_rate": 4.969832365794795e-06, "loss": 0.8126, "step": 1800 }, { "epoch": 0.0559239063080927, "grad_norm": 2.8803902293541648, "learning_rate": 4.96901702432979e-06, "loss": 0.7786, "step": 1805 }, { "epoch": 0.056078820175982154, "grad_norm": 3.211470681733631, "learning_rate": 4.968201682864784e-06, "loss": 0.9058, "step": 1810 }, { "epoch": 0.05623373404387161, "grad_norm": 2.979718349328635, "learning_rate": 4.967386341399779e-06, "loss": 0.8422, "step": 1815 }, { "epoch": 0.05638864791176106, "grad_norm": 2.8756996337579643, "learning_rate": 4.966570999934773e-06, "loss": 0.8317, "step": 1820 }, { "epoch": 0.05654356177965052, "grad_norm": 3.152708853728837, "learning_rate": 4.965755658469768e-06, "loss": 0.8446, "step": 1825 }, { "epoch": 0.05669847564753997, "grad_norm": 3.5514240287513714, "learning_rate": 4.964940317004762e-06, "loss": 0.828, "step": 1830 }, { "epoch": 0.05685338951542942, "grad_norm": 4.197709933125799, "learning_rate": 4.964124975539757e-06, "loss": 0.7975, "step": 1835 }, { "epoch": 0.05700830338331887, "grad_norm": 3.1281947182099605, "learning_rate": 4.963309634074751e-06, "loss": 0.7288, "step": 1840 }, { "epoch": 0.05716321725120833, "grad_norm": 3.362504534520526, "learning_rate": 4.9624942926097455e-06, "loss": 0.887, "step": 1845 }, { "epoch": 0.05731813111909778, "grad_norm": 3.0079825315200117, "learning_rate": 4.96167895114474e-06, "loss": 0.8317, "step": 1850 }, { "epoch": 0.057473044986987236, "grad_norm": 2.8789492892460613, "learning_rate": 4.9608636096797344e-06, "loss": 0.9111, "step": 1855 }, { "epoch": 0.05762795885487669, "grad_norm": 3.167439169687481, "learning_rate": 4.960048268214729e-06, "loss": 0.7832, "step": 1860 }, { "epoch": 0.057782872722766145, "grad_norm": 3.5920435931716788, "learning_rate": 4.959232926749723e-06, "loss": 0.8247, "step": 1865 }, { "epoch": 0.0579377865906556, "grad_norm": 3.444067594893881, "learning_rate": 4.958417585284718e-06, "loss": 0.7852, "step": 1870 }, { "epoch": 0.058092700458545046, "grad_norm": 2.9614287491642446, "learning_rate": 4.957602243819712e-06, "loss": 0.9175, "step": 1875 }, { "epoch": 0.0582476143264345, "grad_norm": 2.8105004467632875, "learning_rate": 4.956786902354707e-06, "loss": 0.8014, "step": 1880 }, { "epoch": 0.058402528194323955, "grad_norm": 3.2833747055528604, "learning_rate": 4.9559715608897e-06, "loss": 0.7955, "step": 1885 }, { "epoch": 0.05855744206221341, "grad_norm": 2.5875631686108638, "learning_rate": 4.955156219424696e-06, "loss": 0.8166, "step": 1890 }, { "epoch": 0.058712355930102864, "grad_norm": 2.5975719167492564, "learning_rate": 4.954340877959689e-06, "loss": 0.8135, "step": 1895 }, { "epoch": 0.05886726979799232, "grad_norm": 3.0891803865214613, "learning_rate": 4.953525536494685e-06, "loss": 0.84, "step": 1900 }, { "epoch": 0.05902218366588177, "grad_norm": 2.7920109395688337, "learning_rate": 4.952710195029679e-06, "loss": 0.8434, "step": 1905 }, { "epoch": 0.05917709753377123, "grad_norm": 3.02068917368157, "learning_rate": 4.9518948535646735e-06, "loss": 0.8366, "step": 1910 }, { "epoch": 0.059332011401660674, "grad_norm": 3.1058787196280306, "learning_rate": 4.951079512099668e-06, "loss": 0.7629, "step": 1915 }, { "epoch": 0.05948692526955013, "grad_norm": 2.910592714426578, "learning_rate": 4.950264170634662e-06, "loss": 0.8244, "step": 1920 }, { "epoch": 0.05964183913743958, "grad_norm": 2.526022875437933, "learning_rate": 4.949448829169657e-06, "loss": 0.827, "step": 1925 }, { "epoch": 0.05979675300532904, "grad_norm": 3.6276887836933223, "learning_rate": 4.948633487704651e-06, "loss": 0.8862, "step": 1930 }, { "epoch": 0.05995166687321849, "grad_norm": 2.6861659286001895, "learning_rate": 4.947818146239646e-06, "loss": 0.8855, "step": 1935 }, { "epoch": 0.060106580741107946, "grad_norm": 3.05693886814625, "learning_rate": 4.94700280477464e-06, "loss": 0.8024, "step": 1940 }, { "epoch": 0.0602614946089974, "grad_norm": 2.7978523187590096, "learning_rate": 4.946187463309635e-06, "loss": 0.8943, "step": 1945 }, { "epoch": 0.060416408476886854, "grad_norm": 3.182024238297607, "learning_rate": 4.945372121844629e-06, "loss": 0.7927, "step": 1950 }, { "epoch": 0.0605713223447763, "grad_norm": 2.9250781048788834, "learning_rate": 4.944556780379624e-06, "loss": 0.755, "step": 1955 }, { "epoch": 0.060726236212665756, "grad_norm": 2.6271685228875095, "learning_rate": 4.943741438914617e-06, "loss": 0.8049, "step": 1960 }, { "epoch": 0.06088115008055521, "grad_norm": 4.840146720925739, "learning_rate": 4.9429260974496125e-06, "loss": 0.8037, "step": 1965 }, { "epoch": 0.061036063948444665, "grad_norm": 3.6517421040199296, "learning_rate": 4.942110755984606e-06, "loss": 0.797, "step": 1970 }, { "epoch": 0.06119097781633412, "grad_norm": 4.152212508004564, "learning_rate": 4.9412954145196015e-06, "loss": 0.7925, "step": 1975 }, { "epoch": 0.06134589168422357, "grad_norm": 3.520562282926508, "learning_rate": 4.940480073054595e-06, "loss": 0.7381, "step": 1980 }, { "epoch": 0.06150080555211303, "grad_norm": 4.6983709913860485, "learning_rate": 4.93966473158959e-06, "loss": 0.695, "step": 1985 }, { "epoch": 0.06165571942000248, "grad_norm": 4.1946092046984695, "learning_rate": 4.938849390124584e-06, "loss": 0.819, "step": 1990 }, { "epoch": 0.06181063328789193, "grad_norm": 3.491982306871215, "learning_rate": 4.938034048659579e-06, "loss": 0.8523, "step": 1995 }, { "epoch": 0.061965547155781384, "grad_norm": 3.662553100019726, "learning_rate": 4.937218707194574e-06, "loss": 0.7822, "step": 2000 }, { "epoch": 0.06212046102367084, "grad_norm": 3.281929994261276, "learning_rate": 4.936403365729568e-06, "loss": 0.7471, "step": 2005 }, { "epoch": 0.06227537489156029, "grad_norm": 2.9295380711858185, "learning_rate": 4.935588024264563e-06, "loss": 0.807, "step": 2010 }, { "epoch": 0.06243028875944975, "grad_norm": 4.605931792295588, "learning_rate": 4.934772682799557e-06, "loss": 0.8301, "step": 2015 }, { "epoch": 0.0625852026273392, "grad_norm": 2.3136461547887928, "learning_rate": 4.933957341334552e-06, "loss": 0.7227, "step": 2020 }, { "epoch": 0.06274011649522865, "grad_norm": 2.916062468421234, "learning_rate": 4.933141999869546e-06, "loss": 0.7774, "step": 2025 }, { "epoch": 0.06289503036311811, "grad_norm": 2.9708482108291103, "learning_rate": 4.9323266584045405e-06, "loss": 0.8167, "step": 2030 }, { "epoch": 0.06304994423100756, "grad_norm": 3.177443833438093, "learning_rate": 4.931511316939535e-06, "loss": 0.7765, "step": 2035 }, { "epoch": 0.06320485809889702, "grad_norm": 2.6905173150419492, "learning_rate": 4.9306959754745294e-06, "loss": 0.7805, "step": 2040 }, { "epoch": 0.06335977196678647, "grad_norm": 3.5008463394035645, "learning_rate": 4.929880634009523e-06, "loss": 0.8187, "step": 2045 }, { "epoch": 0.06351468583467593, "grad_norm": 4.030669240843702, "learning_rate": 4.929065292544518e-06, "loss": 0.7838, "step": 2050 }, { "epoch": 0.06366959970256537, "grad_norm": 3.353251322348195, "learning_rate": 4.928249951079512e-06, "loss": 0.8136, "step": 2055 }, { "epoch": 0.06382451357045482, "grad_norm": 2.9518484731190933, "learning_rate": 4.927434609614507e-06, "loss": 0.7888, "step": 2060 }, { "epoch": 0.06397942743834428, "grad_norm": 3.7646981203923406, "learning_rate": 4.926619268149501e-06, "loss": 0.7596, "step": 2065 }, { "epoch": 0.06413434130623373, "grad_norm": 2.72573243205493, "learning_rate": 4.925803926684496e-06, "loss": 0.7365, "step": 2070 }, { "epoch": 0.06428925517412319, "grad_norm": 3.099138484176321, "learning_rate": 4.92498858521949e-06, "loss": 0.8162, "step": 2075 }, { "epoch": 0.06444416904201264, "grad_norm": 3.1859297800064486, "learning_rate": 4.924173243754485e-06, "loss": 0.84, "step": 2080 }, { "epoch": 0.0645990829099021, "grad_norm": 4.222619548684509, "learning_rate": 4.923357902289479e-06, "loss": 0.7904, "step": 2085 }, { "epoch": 0.06475399677779155, "grad_norm": 2.8558389307018697, "learning_rate": 4.922542560824474e-06, "loss": 0.9258, "step": 2090 }, { "epoch": 0.064908910645681, "grad_norm": 2.668561679314024, "learning_rate": 4.9217272193594685e-06, "loss": 0.695, "step": 2095 }, { "epoch": 0.06506382451357046, "grad_norm": 3.135654857543494, "learning_rate": 4.920911877894463e-06, "loss": 0.8217, "step": 2100 }, { "epoch": 0.0652187383814599, "grad_norm": 3.6614491641266786, "learning_rate": 4.920096536429457e-06, "loss": 0.8776, "step": 2105 }, { "epoch": 0.06537365224934936, "grad_norm": 3.4376648208224942, "learning_rate": 4.919281194964452e-06, "loss": 0.833, "step": 2110 }, { "epoch": 0.06552856611723881, "grad_norm": 3.1341962736064035, "learning_rate": 4.918465853499446e-06, "loss": 0.8134, "step": 2115 }, { "epoch": 0.06568347998512827, "grad_norm": 2.9466981137264026, "learning_rate": 4.91765051203444e-06, "loss": 0.7285, "step": 2120 }, { "epoch": 0.06583839385301772, "grad_norm": 2.9300078983120783, "learning_rate": 4.916835170569435e-06, "loss": 0.8404, "step": 2125 }, { "epoch": 0.06599330772090718, "grad_norm": 5.246947608402578, "learning_rate": 4.916019829104429e-06, "loss": 0.777, "step": 2130 }, { "epoch": 0.06614822158879663, "grad_norm": 3.2674924190240575, "learning_rate": 4.915204487639424e-06, "loss": 0.8036, "step": 2135 }, { "epoch": 0.06630313545668608, "grad_norm": 2.671286850351431, "learning_rate": 4.914389146174418e-06, "loss": 0.8411, "step": 2140 }, { "epoch": 0.06645804932457554, "grad_norm": 3.1062760662534465, "learning_rate": 4.913573804709413e-06, "loss": 0.7764, "step": 2145 }, { "epoch": 0.06661296319246499, "grad_norm": 2.7497537332251323, "learning_rate": 4.912758463244407e-06, "loss": 0.8033, "step": 2150 }, { "epoch": 0.06676787706035445, "grad_norm": 3.2305986107678364, "learning_rate": 4.911943121779402e-06, "loss": 0.8043, "step": 2155 }, { "epoch": 0.0669227909282439, "grad_norm": 2.875794347230891, "learning_rate": 4.911127780314396e-06, "loss": 0.7934, "step": 2160 }, { "epoch": 0.06707770479613336, "grad_norm": 3.6639753587744712, "learning_rate": 4.910312438849391e-06, "loss": 0.8161, "step": 2165 }, { "epoch": 0.0672326186640228, "grad_norm": 2.9378054621013736, "learning_rate": 4.9094970973843845e-06, "loss": 0.8989, "step": 2170 }, { "epoch": 0.06738753253191225, "grad_norm": 4.049680945472019, "learning_rate": 4.90868175591938e-06, "loss": 0.7773, "step": 2175 }, { "epoch": 0.06754244639980171, "grad_norm": 2.9795777910282446, "learning_rate": 4.9078664144543734e-06, "loss": 0.8338, "step": 2180 }, { "epoch": 0.06769736026769116, "grad_norm": 2.611269746097714, "learning_rate": 4.907051072989369e-06, "loss": 0.834, "step": 2185 }, { "epoch": 0.06785227413558062, "grad_norm": 3.4421089874528037, "learning_rate": 4.906235731524363e-06, "loss": 0.8058, "step": 2190 }, { "epoch": 0.06800718800347007, "grad_norm": 3.3293467359779396, "learning_rate": 4.905420390059357e-06, "loss": 0.8601, "step": 2195 }, { "epoch": 0.06816210187135953, "grad_norm": 3.7332412457374247, "learning_rate": 4.904605048594352e-06, "loss": 0.7852, "step": 2200 }, { "epoch": 0.06831701573924898, "grad_norm": 2.955266892845022, "learning_rate": 4.903789707129346e-06, "loss": 0.8155, "step": 2205 }, { "epoch": 0.06847192960713844, "grad_norm": 5.204003137391139, "learning_rate": 4.902974365664341e-06, "loss": 0.7777, "step": 2210 }, { "epoch": 0.06862684347502788, "grad_norm": 2.9734422501148323, "learning_rate": 4.902159024199335e-06, "loss": 0.8521, "step": 2215 }, { "epoch": 0.06878175734291733, "grad_norm": 2.818271286536019, "learning_rate": 4.90134368273433e-06, "loss": 0.8562, "step": 2220 }, { "epoch": 0.0689366712108068, "grad_norm": 3.2899963993603074, "learning_rate": 4.9005283412693236e-06, "loss": 0.7732, "step": 2225 }, { "epoch": 0.06909158507869624, "grad_norm": 3.0443377834244343, "learning_rate": 4.899712999804319e-06, "loss": 0.8308, "step": 2230 }, { "epoch": 0.0692464989465857, "grad_norm": 3.4686439883583806, "learning_rate": 4.8988976583393125e-06, "loss": 0.8233, "step": 2235 }, { "epoch": 0.06940141281447515, "grad_norm": 2.617650116067831, "learning_rate": 4.898082316874308e-06, "loss": 0.8298, "step": 2240 }, { "epoch": 0.06955632668236461, "grad_norm": 2.884830814820101, "learning_rate": 4.897266975409301e-06, "loss": 0.8283, "step": 2245 }, { "epoch": 0.06971124055025406, "grad_norm": 2.447751408738041, "learning_rate": 4.896451633944297e-06, "loss": 0.7752, "step": 2250 }, { "epoch": 0.0698661544181435, "grad_norm": 2.8022009960489207, "learning_rate": 4.89563629247929e-06, "loss": 0.8112, "step": 2255 }, { "epoch": 0.07002106828603297, "grad_norm": 2.867642773839468, "learning_rate": 4.894820951014286e-06, "loss": 0.7999, "step": 2260 }, { "epoch": 0.07017598215392241, "grad_norm": 3.2142270265455535, "learning_rate": 4.894005609549279e-06, "loss": 0.7394, "step": 2265 }, { "epoch": 0.07033089602181188, "grad_norm": 3.434745755100372, "learning_rate": 4.893190268084274e-06, "loss": 0.7926, "step": 2270 }, { "epoch": 0.07048580988970132, "grad_norm": 3.1424694137861304, "learning_rate": 4.892374926619268e-06, "loss": 0.8936, "step": 2275 }, { "epoch": 0.07064072375759078, "grad_norm": 2.5104097354932753, "learning_rate": 4.891559585154263e-06, "loss": 0.7923, "step": 2280 }, { "epoch": 0.07079563762548023, "grad_norm": 2.620717994847216, "learning_rate": 4.890744243689258e-06, "loss": 0.827, "step": 2285 }, { "epoch": 0.07095055149336969, "grad_norm": 3.046419314691063, "learning_rate": 4.8899289022242515e-06, "loss": 0.7348, "step": 2290 }, { "epoch": 0.07110546536125914, "grad_norm": 2.9449828975286105, "learning_rate": 4.889113560759247e-06, "loss": 0.7091, "step": 2295 }, { "epoch": 0.07126037922914859, "grad_norm": 2.5911688534789525, "learning_rate": 4.8882982192942405e-06, "loss": 0.8715, "step": 2300 }, { "epoch": 0.07141529309703805, "grad_norm": 3.5019063960009222, "learning_rate": 4.887482877829236e-06, "loss": 0.7661, "step": 2305 }, { "epoch": 0.0715702069649275, "grad_norm": 2.9257283064579362, "learning_rate": 4.886667536364229e-06, "loss": 0.9038, "step": 2310 }, { "epoch": 0.07172512083281696, "grad_norm": 3.112902718298353, "learning_rate": 4.885852194899225e-06, "loss": 0.8079, "step": 2315 }, { "epoch": 0.0718800347007064, "grad_norm": 2.8202246761753766, "learning_rate": 4.885036853434218e-06, "loss": 0.867, "step": 2320 }, { "epoch": 0.07203494856859587, "grad_norm": 2.8120217611117515, "learning_rate": 4.884221511969214e-06, "loss": 0.8341, "step": 2325 }, { "epoch": 0.07218986243648531, "grad_norm": 4.445951058903666, "learning_rate": 4.883406170504207e-06, "loss": 0.761, "step": 2330 }, { "epoch": 0.07234477630437476, "grad_norm": 4.931819386020321, "learning_rate": 4.8825908290392025e-06, "loss": 0.9468, "step": 2335 }, { "epoch": 0.07249969017226422, "grad_norm": 3.0157210289030116, "learning_rate": 4.881775487574196e-06, "loss": 0.8462, "step": 2340 }, { "epoch": 0.07265460404015367, "grad_norm": 2.8166131628442606, "learning_rate": 4.880960146109191e-06, "loss": 0.732, "step": 2345 }, { "epoch": 0.07280951790804313, "grad_norm": 3.5996763429655263, "learning_rate": 4.880144804644185e-06, "loss": 0.8485, "step": 2350 }, { "epoch": 0.07296443177593258, "grad_norm": 2.754801777525978, "learning_rate": 4.8793294631791795e-06, "loss": 0.7642, "step": 2355 }, { "epoch": 0.07311934564382204, "grad_norm": 4.005092850623661, "learning_rate": 4.878514121714174e-06, "loss": 0.7877, "step": 2360 }, { "epoch": 0.07327425951171149, "grad_norm": 3.3232554270674384, "learning_rate": 4.8776987802491684e-06, "loss": 0.8573, "step": 2365 }, { "epoch": 0.07342917337960095, "grad_norm": 2.812042594802079, "learning_rate": 4.876883438784164e-06, "loss": 0.8061, "step": 2370 }, { "epoch": 0.0735840872474904, "grad_norm": 2.9991185539705607, "learning_rate": 4.876068097319157e-06, "loss": 0.7519, "step": 2375 }, { "epoch": 0.07373900111537984, "grad_norm": 2.3979599558198332, "learning_rate": 4.875252755854153e-06, "loss": 0.7835, "step": 2380 }, { "epoch": 0.0738939149832693, "grad_norm": 3.3942819562346678, "learning_rate": 4.874437414389146e-06, "loss": 0.7653, "step": 2385 }, { "epoch": 0.07404882885115875, "grad_norm": 2.563137720277763, "learning_rate": 4.8736220729241416e-06, "loss": 0.7559, "step": 2390 }, { "epoch": 0.07420374271904821, "grad_norm": 2.760367296249115, "learning_rate": 4.872806731459135e-06, "loss": 0.7604, "step": 2395 }, { "epoch": 0.07435865658693766, "grad_norm": 3.259642154007994, "learning_rate": 4.8719913899941305e-06, "loss": 0.8393, "step": 2400 }, { "epoch": 0.07451357045482712, "grad_norm": 2.797416430060911, "learning_rate": 4.871176048529124e-06, "loss": 0.7585, "step": 2405 }, { "epoch": 0.07466848432271657, "grad_norm": 2.6826474090971755, "learning_rate": 4.870360707064119e-06, "loss": 0.7746, "step": 2410 }, { "epoch": 0.07482339819060602, "grad_norm": 4.507344224748453, "learning_rate": 4.869545365599113e-06, "loss": 0.8414, "step": 2415 }, { "epoch": 0.07497831205849548, "grad_norm": 3.1726511378553073, "learning_rate": 4.8687300241341075e-06, "loss": 0.8021, "step": 2420 }, { "epoch": 0.07513322592638493, "grad_norm": 4.591285719948168, "learning_rate": 4.867914682669102e-06, "loss": 0.7907, "step": 2425 }, { "epoch": 0.07528813979427439, "grad_norm": 4.049453371193074, "learning_rate": 4.867099341204096e-06, "loss": 0.7694, "step": 2430 }, { "epoch": 0.07544305366216383, "grad_norm": 2.7634347926300324, "learning_rate": 4.866283999739091e-06, "loss": 0.8273, "step": 2435 }, { "epoch": 0.0755979675300533, "grad_norm": 3.878848054630207, "learning_rate": 4.865468658274085e-06, "loss": 0.7244, "step": 2440 }, { "epoch": 0.07575288139794274, "grad_norm": 3.3122136811429495, "learning_rate": 4.86465331680908e-06, "loss": 0.7793, "step": 2445 }, { "epoch": 0.0759077952658322, "grad_norm": 2.5802683886125983, "learning_rate": 4.863837975344074e-06, "loss": 0.7266, "step": 2450 }, { "epoch": 0.07606270913372165, "grad_norm": 3.7788113401135246, "learning_rate": 4.863022633879069e-06, "loss": 0.8303, "step": 2455 }, { "epoch": 0.0762176230016111, "grad_norm": 2.7586026268376633, "learning_rate": 4.862207292414063e-06, "loss": 0.7391, "step": 2460 }, { "epoch": 0.07637253686950056, "grad_norm": 3.5178846239475834, "learning_rate": 4.8613919509490585e-06, "loss": 0.798, "step": 2465 }, { "epoch": 0.07652745073739001, "grad_norm": 3.5427800541403736, "learning_rate": 4.860576609484052e-06, "loss": 0.7927, "step": 2470 }, { "epoch": 0.07668236460527947, "grad_norm": 4.092266396077662, "learning_rate": 4.859761268019047e-06, "loss": 0.8543, "step": 2475 }, { "epoch": 0.07683727847316892, "grad_norm": 2.8378303037117822, "learning_rate": 4.858945926554041e-06, "loss": 0.8819, "step": 2480 }, { "epoch": 0.07699219234105838, "grad_norm": 3.206289138943002, "learning_rate": 4.858130585089036e-06, "loss": 0.8267, "step": 2485 }, { "epoch": 0.07714710620894782, "grad_norm": 2.86522958003329, "learning_rate": 4.85731524362403e-06, "loss": 0.7762, "step": 2490 }, { "epoch": 0.07730202007683727, "grad_norm": 4.11274780684594, "learning_rate": 4.856499902159024e-06, "loss": 0.7562, "step": 2495 }, { "epoch": 0.07745693394472673, "grad_norm": 4.291520219684717, "learning_rate": 4.855684560694019e-06, "loss": 0.7776, "step": 2500 }, { "epoch": 0.07761184781261618, "grad_norm": 3.111557984040223, "learning_rate": 4.854869219229013e-06, "loss": 0.868, "step": 2505 }, { "epoch": 0.07776676168050564, "grad_norm": 2.9168071019196953, "learning_rate": 4.854053877764008e-06, "loss": 0.7367, "step": 2510 }, { "epoch": 0.07792167554839509, "grad_norm": 9.953661288542536, "learning_rate": 4.853238536299002e-06, "loss": 0.7493, "step": 2515 }, { "epoch": 0.07807658941628455, "grad_norm": 2.9569277338825883, "learning_rate": 4.852423194833997e-06, "loss": 0.7927, "step": 2520 }, { "epoch": 0.078231503284174, "grad_norm": 2.9988127517972316, "learning_rate": 4.851607853368991e-06, "loss": 0.8602, "step": 2525 }, { "epoch": 0.07838641715206346, "grad_norm": 3.0537944327939046, "learning_rate": 4.850792511903986e-06, "loss": 0.8326, "step": 2530 }, { "epoch": 0.0785413310199529, "grad_norm": 3.1613752601240774, "learning_rate": 4.84997717043898e-06, "loss": 0.8652, "step": 2535 }, { "epoch": 0.07869624488784235, "grad_norm": 3.7289064170822352, "learning_rate": 4.8491618289739745e-06, "loss": 0.7344, "step": 2540 }, { "epoch": 0.07885115875573181, "grad_norm": 3.3986073455669104, "learning_rate": 4.848346487508969e-06, "loss": 0.8822, "step": 2545 }, { "epoch": 0.07900607262362126, "grad_norm": 2.5180795577290005, "learning_rate": 4.847531146043963e-06, "loss": 0.8256, "step": 2550 }, { "epoch": 0.07916098649151072, "grad_norm": 3.1371994218136483, "learning_rate": 4.846715804578958e-06, "loss": 0.8647, "step": 2555 }, { "epoch": 0.07931590035940017, "grad_norm": 2.2803124952968323, "learning_rate": 4.845900463113953e-06, "loss": 0.7998, "step": 2560 }, { "epoch": 0.07947081422728963, "grad_norm": 3.1662656445535884, "learning_rate": 4.845085121648947e-06, "loss": 0.7104, "step": 2565 }, { "epoch": 0.07962572809517908, "grad_norm": 2.9553914807095127, "learning_rate": 4.844269780183941e-06, "loss": 0.843, "step": 2570 }, { "epoch": 0.07978064196306853, "grad_norm": 3.1123206842634157, "learning_rate": 4.843454438718936e-06, "loss": 0.7704, "step": 2575 }, { "epoch": 0.07993555583095799, "grad_norm": 6.972208839345867, "learning_rate": 4.84263909725393e-06, "loss": 0.8054, "step": 2580 }, { "epoch": 0.08009046969884744, "grad_norm": 2.7441146721500838, "learning_rate": 4.841823755788925e-06, "loss": 0.808, "step": 2585 }, { "epoch": 0.0802453835667369, "grad_norm": 3.127789366914498, "learning_rate": 4.841008414323919e-06, "loss": 0.8431, "step": 2590 }, { "epoch": 0.08040029743462634, "grad_norm": 3.15696106821864, "learning_rate": 4.8401930728589135e-06, "loss": 0.8065, "step": 2595 }, { "epoch": 0.0805552113025158, "grad_norm": 2.720325406722442, "learning_rate": 4.839377731393908e-06, "loss": 0.7599, "step": 2600 }, { "epoch": 0.08071012517040525, "grad_norm": 3.311473675377248, "learning_rate": 4.8385623899289025e-06, "loss": 0.8055, "step": 2605 }, { "epoch": 0.08086503903829471, "grad_norm": 3.5665958700249623, "learning_rate": 4.837747048463897e-06, "loss": 0.7648, "step": 2610 }, { "epoch": 0.08101995290618416, "grad_norm": 4.262685614552365, "learning_rate": 4.836931706998891e-06, "loss": 0.8157, "step": 2615 }, { "epoch": 0.08117486677407361, "grad_norm": 3.5494999354201404, "learning_rate": 4.836116365533886e-06, "loss": 0.8006, "step": 2620 }, { "epoch": 0.08132978064196307, "grad_norm": 2.879625935217788, "learning_rate": 4.83530102406888e-06, "loss": 0.7644, "step": 2625 }, { "epoch": 0.08148469450985252, "grad_norm": 3.4460105531267704, "learning_rate": 4.834485682603875e-06, "loss": 0.7835, "step": 2630 }, { "epoch": 0.08163960837774198, "grad_norm": 3.0817531285542996, "learning_rate": 4.833670341138869e-06, "loss": 0.7501, "step": 2635 }, { "epoch": 0.08179452224563143, "grad_norm": 2.761397720500759, "learning_rate": 4.832854999673864e-06, "loss": 0.78, "step": 2640 }, { "epoch": 0.08194943611352089, "grad_norm": 6.062419742213908, "learning_rate": 4.832039658208858e-06, "loss": 0.7826, "step": 2645 }, { "epoch": 0.08210434998141034, "grad_norm": 2.623661246899907, "learning_rate": 4.831224316743853e-06, "loss": 0.7387, "step": 2650 }, { "epoch": 0.08225926384929978, "grad_norm": 2.3463552009673485, "learning_rate": 4.830408975278847e-06, "loss": 0.8008, "step": 2655 }, { "epoch": 0.08241417771718924, "grad_norm": 2.3718746241773347, "learning_rate": 4.8295936338138415e-06, "loss": 0.7942, "step": 2660 }, { "epoch": 0.08256909158507869, "grad_norm": 2.24097048146972, "learning_rate": 4.828778292348836e-06, "loss": 0.7659, "step": 2665 }, { "epoch": 0.08272400545296815, "grad_norm": 2.7090798798070437, "learning_rate": 4.8279629508838304e-06, "loss": 0.8028, "step": 2670 }, { "epoch": 0.0828789193208576, "grad_norm": 3.0822253655753067, "learning_rate": 4.827147609418825e-06, "loss": 0.8315, "step": 2675 }, { "epoch": 0.08303383318874706, "grad_norm": 3.3383323713875566, "learning_rate": 4.826332267953819e-06, "loss": 0.8085, "step": 2680 }, { "epoch": 0.08318874705663651, "grad_norm": 3.7428707169218898, "learning_rate": 4.825516926488814e-06, "loss": 0.807, "step": 2685 }, { "epoch": 0.08334366092452597, "grad_norm": 3.948232304070687, "learning_rate": 4.824701585023808e-06, "loss": 0.8597, "step": 2690 }, { "epoch": 0.08349857479241542, "grad_norm": 3.3523345151307677, "learning_rate": 4.823886243558803e-06, "loss": 0.7922, "step": 2695 }, { "epoch": 0.08365348866030486, "grad_norm": 3.354226883264201, "learning_rate": 4.823070902093797e-06, "loss": 0.8272, "step": 2700 }, { "epoch": 0.08380840252819433, "grad_norm": 3.3860441893517104, "learning_rate": 4.822255560628792e-06, "loss": 0.8284, "step": 2705 }, { "epoch": 0.08396331639608377, "grad_norm": 3.176152556479033, "learning_rate": 4.821440219163786e-06, "loss": 0.8186, "step": 2710 }, { "epoch": 0.08411823026397323, "grad_norm": 2.83092552708419, "learning_rate": 4.8206248776987806e-06, "loss": 0.7842, "step": 2715 }, { "epoch": 0.08427314413186268, "grad_norm": 3.4279057091709486, "learning_rate": 4.819809536233775e-06, "loss": 0.8324, "step": 2720 }, { "epoch": 0.08442805799975214, "grad_norm": 3.1796270295042026, "learning_rate": 4.8189941947687695e-06, "loss": 0.8927, "step": 2725 }, { "epoch": 0.08458297186764159, "grad_norm": 3.116935033938853, "learning_rate": 4.818178853303764e-06, "loss": 0.7757, "step": 2730 }, { "epoch": 0.08473788573553105, "grad_norm": 2.636334024233499, "learning_rate": 4.817363511838758e-06, "loss": 0.7678, "step": 2735 }, { "epoch": 0.0848927996034205, "grad_norm": 3.3272217759611378, "learning_rate": 4.816548170373753e-06, "loss": 0.8648, "step": 2740 }, { "epoch": 0.08504771347130995, "grad_norm": 2.5277976793682333, "learning_rate": 4.815732828908747e-06, "loss": 0.7809, "step": 2745 }, { "epoch": 0.08520262733919941, "grad_norm": 2.7351567491498283, "learning_rate": 4.814917487443742e-06, "loss": 0.8412, "step": 2750 }, { "epoch": 0.08535754120708886, "grad_norm": 3.0934271267686126, "learning_rate": 4.814102145978736e-06, "loss": 0.8201, "step": 2755 }, { "epoch": 0.08551245507497832, "grad_norm": 2.2837072214368908, "learning_rate": 4.813286804513731e-06, "loss": 0.696, "step": 2760 }, { "epoch": 0.08566736894286776, "grad_norm": 3.659169896317865, "learning_rate": 4.812471463048725e-06, "loss": 0.8048, "step": 2765 }, { "epoch": 0.08582228281075722, "grad_norm": 2.465536348427789, "learning_rate": 4.81165612158372e-06, "loss": 0.8079, "step": 2770 }, { "epoch": 0.08597719667864667, "grad_norm": 3.0478660183584494, "learning_rate": 4.810840780118714e-06, "loss": 0.7783, "step": 2775 }, { "epoch": 0.08613211054653612, "grad_norm": 2.7236567765940083, "learning_rate": 4.8100254386537085e-06, "loss": 0.7368, "step": 2780 }, { "epoch": 0.08628702441442558, "grad_norm": 3.1553046038046513, "learning_rate": 4.809210097188703e-06, "loss": 0.8113, "step": 2785 }, { "epoch": 0.08644193828231503, "grad_norm": 3.6929740292702746, "learning_rate": 4.8083947557236975e-06, "loss": 0.7565, "step": 2790 }, { "epoch": 0.08659685215020449, "grad_norm": 2.6627815974114046, "learning_rate": 4.807579414258692e-06, "loss": 0.7984, "step": 2795 }, { "epoch": 0.08675176601809394, "grad_norm": 3.364319144975152, "learning_rate": 4.806764072793686e-06, "loss": 0.8046, "step": 2800 }, { "epoch": 0.0869066798859834, "grad_norm": 2.484937943495124, "learning_rate": 4.805948731328681e-06, "loss": 0.7592, "step": 2805 }, { "epoch": 0.08706159375387285, "grad_norm": 3.842242170824601, "learning_rate": 4.805133389863675e-06, "loss": 0.7943, "step": 2810 }, { "epoch": 0.08721650762176231, "grad_norm": 2.8717015729778335, "learning_rate": 4.80431804839867e-06, "loss": 0.7763, "step": 2815 }, { "epoch": 0.08737142148965175, "grad_norm": 2.325924948812831, "learning_rate": 4.803502706933664e-06, "loss": 0.7546, "step": 2820 }, { "epoch": 0.0875263353575412, "grad_norm": 3.1081313491172993, "learning_rate": 4.802687365468659e-06, "loss": 0.8455, "step": 2825 }, { "epoch": 0.08768124922543066, "grad_norm": 3.7702725056047726, "learning_rate": 4.801872024003653e-06, "loss": 0.904, "step": 2830 }, { "epoch": 0.08783616309332011, "grad_norm": 2.839266020275429, "learning_rate": 4.801056682538648e-06, "loss": 0.7989, "step": 2835 }, { "epoch": 0.08799107696120957, "grad_norm": 2.3047371801937446, "learning_rate": 4.800241341073642e-06, "loss": 0.7936, "step": 2840 }, { "epoch": 0.08814599082909902, "grad_norm": 2.852055256890056, "learning_rate": 4.7994259996086365e-06, "loss": 0.8, "step": 2845 }, { "epoch": 0.08830090469698848, "grad_norm": 2.6584949067691195, "learning_rate": 4.798610658143631e-06, "loss": 0.7938, "step": 2850 }, { "epoch": 0.08845581856487793, "grad_norm": 3.4168061935725604, "learning_rate": 4.7977953166786254e-06, "loss": 0.8617, "step": 2855 }, { "epoch": 0.08861073243276738, "grad_norm": 2.4937930610780312, "learning_rate": 4.79697997521362e-06, "loss": 0.7201, "step": 2860 }, { "epoch": 0.08876564630065684, "grad_norm": 2.838678034539256, "learning_rate": 4.796164633748614e-06, "loss": 0.8239, "step": 2865 }, { "epoch": 0.08892056016854628, "grad_norm": 2.3795155941144035, "learning_rate": 4.795349292283609e-06, "loss": 0.8001, "step": 2870 }, { "epoch": 0.08907547403643575, "grad_norm": 2.9320551259968743, "learning_rate": 4.794533950818603e-06, "loss": 0.832, "step": 2875 }, { "epoch": 0.08923038790432519, "grad_norm": 2.694247845279839, "learning_rate": 4.793718609353598e-06, "loss": 0.8332, "step": 2880 }, { "epoch": 0.08938530177221465, "grad_norm": 3.087698159528504, "learning_rate": 4.792903267888592e-06, "loss": 0.7786, "step": 2885 }, { "epoch": 0.0895402156401041, "grad_norm": 3.7307800881072835, "learning_rate": 4.792087926423587e-06, "loss": 0.7774, "step": 2890 }, { "epoch": 0.08969512950799356, "grad_norm": 3.614399805196134, "learning_rate": 4.791272584958581e-06, "loss": 0.7892, "step": 2895 }, { "epoch": 0.08985004337588301, "grad_norm": 2.254445401504469, "learning_rate": 4.7904572434935756e-06, "loss": 0.7303, "step": 2900 }, { "epoch": 0.09000495724377246, "grad_norm": 2.821869997245641, "learning_rate": 4.78964190202857e-06, "loss": 0.8337, "step": 2905 }, { "epoch": 0.09015987111166192, "grad_norm": 3.2905128565426676, "learning_rate": 4.7888265605635645e-06, "loss": 0.804, "step": 2910 }, { "epoch": 0.09031478497955137, "grad_norm": 2.422644311038107, "learning_rate": 4.788011219098559e-06, "loss": 0.8065, "step": 2915 }, { "epoch": 0.09046969884744083, "grad_norm": 2.526075638718423, "learning_rate": 4.787195877633553e-06, "loss": 0.7837, "step": 2920 }, { "epoch": 0.09062461271533027, "grad_norm": 2.2901952268824726, "learning_rate": 4.786380536168548e-06, "loss": 0.7703, "step": 2925 }, { "epoch": 0.09077952658321974, "grad_norm": 2.793957404716192, "learning_rate": 4.785565194703542e-06, "loss": 0.75, "step": 2930 }, { "epoch": 0.09093444045110918, "grad_norm": 3.8553777424239133, "learning_rate": 4.784749853238537e-06, "loss": 0.8517, "step": 2935 }, { "epoch": 0.09108935431899863, "grad_norm": 3.531403291546277, "learning_rate": 4.783934511773531e-06, "loss": 0.8123, "step": 2940 }, { "epoch": 0.09124426818688809, "grad_norm": 2.8984882401381724, "learning_rate": 4.783119170308526e-06, "loss": 0.7764, "step": 2945 }, { "epoch": 0.09139918205477754, "grad_norm": 3.8588648454795558, "learning_rate": 4.78230382884352e-06, "loss": 0.8126, "step": 2950 }, { "epoch": 0.091554095922667, "grad_norm": 2.9203723299697177, "learning_rate": 4.781488487378515e-06, "loss": 0.8622, "step": 2955 }, { "epoch": 0.09170900979055645, "grad_norm": 2.7191331673243067, "learning_rate": 4.780673145913509e-06, "loss": 0.7585, "step": 2960 }, { "epoch": 0.09186392365844591, "grad_norm": 3.2886032143037633, "learning_rate": 4.7798578044485035e-06, "loss": 0.8891, "step": 2965 }, { "epoch": 0.09201883752633536, "grad_norm": 3.923431890428007, "learning_rate": 4.779042462983498e-06, "loss": 0.7884, "step": 2970 }, { "epoch": 0.09217375139422482, "grad_norm": 4.103319550866087, "learning_rate": 4.7782271215184924e-06, "loss": 0.8407, "step": 2975 }, { "epoch": 0.09232866526211427, "grad_norm": 4.326675529259192, "learning_rate": 4.777411780053487e-06, "loss": 0.8889, "step": 2980 }, { "epoch": 0.09248357913000371, "grad_norm": 2.667772296297073, "learning_rate": 4.776596438588481e-06, "loss": 0.7873, "step": 2985 }, { "epoch": 0.09263849299789317, "grad_norm": 4.076633068377855, "learning_rate": 4.775781097123476e-06, "loss": 0.827, "step": 2990 }, { "epoch": 0.09279340686578262, "grad_norm": 3.049457760649418, "learning_rate": 4.77496575565847e-06, "loss": 0.7684, "step": 2995 }, { "epoch": 0.09294832073367208, "grad_norm": 3.2189704027912, "learning_rate": 4.774150414193465e-06, "loss": 0.8105, "step": 3000 }, { "epoch": 0.09310323460156153, "grad_norm": 2.7120597608398387, "learning_rate": 4.773335072728459e-06, "loss": 0.7758, "step": 3005 }, { "epoch": 0.09325814846945099, "grad_norm": 3.7011160750907925, "learning_rate": 4.772519731263454e-06, "loss": 0.7801, "step": 3010 }, { "epoch": 0.09341306233734044, "grad_norm": 3.1602635779400368, "learning_rate": 4.771704389798448e-06, "loss": 0.8139, "step": 3015 }, { "epoch": 0.09356797620522989, "grad_norm": 3.238136660201136, "learning_rate": 4.7708890483334426e-06, "loss": 0.8116, "step": 3020 }, { "epoch": 0.09372289007311935, "grad_norm": 2.28192125724255, "learning_rate": 4.770073706868437e-06, "loss": 0.7985, "step": 3025 }, { "epoch": 0.0938778039410088, "grad_norm": 3.242929951129433, "learning_rate": 4.7692583654034315e-06, "loss": 0.7974, "step": 3030 }, { "epoch": 0.09403271780889826, "grad_norm": 3.3477957008272776, "learning_rate": 4.768443023938426e-06, "loss": 0.7648, "step": 3035 }, { "epoch": 0.0941876316767877, "grad_norm": 3.212748117537195, "learning_rate": 4.76762768247342e-06, "loss": 0.7323, "step": 3040 }, { "epoch": 0.09434254554467716, "grad_norm": 2.683689737037953, "learning_rate": 4.766812341008415e-06, "loss": 0.7023, "step": 3045 }, { "epoch": 0.09449745941256661, "grad_norm": 4.934595130924173, "learning_rate": 4.765996999543409e-06, "loss": 0.7861, "step": 3050 }, { "epoch": 0.09465237328045607, "grad_norm": 3.0691611917127077, "learning_rate": 4.765181658078404e-06, "loss": 0.832, "step": 3055 }, { "epoch": 0.09480728714834552, "grad_norm": 3.100833168200613, "learning_rate": 4.764366316613398e-06, "loss": 0.7438, "step": 3060 }, { "epoch": 0.09496220101623497, "grad_norm": 2.7620079996868125, "learning_rate": 4.763550975148393e-06, "loss": 0.8084, "step": 3065 }, { "epoch": 0.09511711488412443, "grad_norm": 2.90315170929386, "learning_rate": 4.762735633683387e-06, "loss": 0.739, "step": 3070 }, { "epoch": 0.09527202875201388, "grad_norm": 2.726805473881834, "learning_rate": 4.761920292218382e-06, "loss": 0.7482, "step": 3075 }, { "epoch": 0.09542694261990334, "grad_norm": 4.130764959339218, "learning_rate": 4.761104950753376e-06, "loss": 0.7656, "step": 3080 }, { "epoch": 0.09558185648779279, "grad_norm": 4.874915251103555, "learning_rate": 4.7602896092883705e-06, "loss": 0.8429, "step": 3085 }, { "epoch": 0.09573677035568225, "grad_norm": 2.9209157895030193, "learning_rate": 4.759474267823365e-06, "loss": 0.8446, "step": 3090 }, { "epoch": 0.0958916842235717, "grad_norm": 2.9144242466423624, "learning_rate": 4.7586589263583595e-06, "loss": 0.7885, "step": 3095 }, { "epoch": 0.09604659809146114, "grad_norm": 3.028948252610044, "learning_rate": 4.757843584893354e-06, "loss": 0.8083, "step": 3100 }, { "epoch": 0.0962015119593506, "grad_norm": 2.8315969610320972, "learning_rate": 4.757028243428348e-06, "loss": 0.7845, "step": 3105 }, { "epoch": 0.09635642582724005, "grad_norm": 4.205357229399791, "learning_rate": 4.756212901963342e-06, "loss": 0.8584, "step": 3110 }, { "epoch": 0.09651133969512951, "grad_norm": 2.72178240924005, "learning_rate": 4.755397560498337e-06, "loss": 0.8429, "step": 3115 }, { "epoch": 0.09666625356301896, "grad_norm": 2.4131231893645166, "learning_rate": 4.754582219033331e-06, "loss": 0.7908, "step": 3120 }, { "epoch": 0.09682116743090842, "grad_norm": 3.8112266529990473, "learning_rate": 4.753766877568326e-06, "loss": 0.808, "step": 3125 }, { "epoch": 0.09697608129879787, "grad_norm": 2.764833160303927, "learning_rate": 4.752951536103321e-06, "loss": 0.7948, "step": 3130 }, { "epoch": 0.09713099516668733, "grad_norm": 3.407216469698545, "learning_rate": 4.752136194638315e-06, "loss": 0.7627, "step": 3135 }, { "epoch": 0.09728590903457678, "grad_norm": 2.675116933515371, "learning_rate": 4.75132085317331e-06, "loss": 0.7144, "step": 3140 }, { "epoch": 0.09744082290246622, "grad_norm": 2.583955802024396, "learning_rate": 4.750505511708304e-06, "loss": 0.7298, "step": 3145 }, { "epoch": 0.09759573677035568, "grad_norm": 3.1050044332332316, "learning_rate": 4.7496901702432985e-06, "loss": 0.7568, "step": 3150 }, { "epoch": 0.09775065063824513, "grad_norm": 3.0972293422969037, "learning_rate": 4.748874828778293e-06, "loss": 0.7207, "step": 3155 }, { "epoch": 0.09790556450613459, "grad_norm": 3.348239800912128, "learning_rate": 4.7480594873132874e-06, "loss": 0.8949, "step": 3160 }, { "epoch": 0.09806047837402404, "grad_norm": 2.9856741287525628, "learning_rate": 4.747244145848282e-06, "loss": 0.8035, "step": 3165 }, { "epoch": 0.0982153922419135, "grad_norm": 2.7941289181564866, "learning_rate": 4.746428804383276e-06, "loss": 0.8068, "step": 3170 }, { "epoch": 0.09837030610980295, "grad_norm": 2.5237659397400183, "learning_rate": 4.745613462918271e-06, "loss": 0.8002, "step": 3175 }, { "epoch": 0.0985252199776924, "grad_norm": 2.754871666674811, "learning_rate": 4.744798121453265e-06, "loss": 0.8373, "step": 3180 }, { "epoch": 0.09868013384558186, "grad_norm": 2.8875538774633, "learning_rate": 4.743982779988259e-06, "loss": 0.7491, "step": 3185 }, { "epoch": 0.0988350477134713, "grad_norm": 2.788359594452915, "learning_rate": 4.743167438523254e-06, "loss": 0.7184, "step": 3190 }, { "epoch": 0.09898996158136077, "grad_norm": 4.263658004428424, "learning_rate": 4.742352097058248e-06, "loss": 0.8529, "step": 3195 }, { "epoch": 0.09914487544925021, "grad_norm": 3.3214806291669037, "learning_rate": 4.741536755593243e-06, "loss": 0.784, "step": 3200 }, { "epoch": 0.09929978931713968, "grad_norm": 3.2477020969999515, "learning_rate": 4.740721414128237e-06, "loss": 0.7575, "step": 3205 }, { "epoch": 0.09945470318502912, "grad_norm": 2.410805233394219, "learning_rate": 4.739906072663232e-06, "loss": 0.7154, "step": 3210 }, { "epoch": 0.09960961705291858, "grad_norm": 2.7994526842250367, "learning_rate": 4.739090731198226e-06, "loss": 0.7755, "step": 3215 }, { "epoch": 0.09976453092080803, "grad_norm": 3.108082227065868, "learning_rate": 4.738275389733221e-06, "loss": 0.8877, "step": 3220 }, { "epoch": 0.09991944478869748, "grad_norm": 5.716738382581319, "learning_rate": 4.737460048268215e-06, "loss": 0.7958, "step": 3225 }, { "epoch": 0.10007435865658694, "grad_norm": 2.9996139175491643, "learning_rate": 4.73664470680321e-06, "loss": 0.7803, "step": 3230 }, { "epoch": 0.10022927252447639, "grad_norm": 2.466600752113433, "learning_rate": 4.735829365338204e-06, "loss": 0.7792, "step": 3235 }, { "epoch": 0.10038418639236585, "grad_norm": 2.436729377697789, "learning_rate": 4.735014023873199e-06, "loss": 0.736, "step": 3240 }, { "epoch": 0.1005391002602553, "grad_norm": 2.979169519450376, "learning_rate": 4.734198682408193e-06, "loss": 0.746, "step": 3245 }, { "epoch": 0.10069401412814476, "grad_norm": 3.44746813472607, "learning_rate": 4.733383340943188e-06, "loss": 0.8679, "step": 3250 }, { "epoch": 0.1008489279960342, "grad_norm": 3.724968170736257, "learning_rate": 4.732567999478182e-06, "loss": 0.712, "step": 3255 }, { "epoch": 0.10100384186392365, "grad_norm": 3.378109497116171, "learning_rate": 4.731752658013176e-06, "loss": 0.8105, "step": 3260 }, { "epoch": 0.10115875573181311, "grad_norm": 8.17981384515224, "learning_rate": 4.730937316548171e-06, "loss": 0.838, "step": 3265 }, { "epoch": 0.10131366959970256, "grad_norm": 3.125449572291468, "learning_rate": 4.730121975083165e-06, "loss": 0.8282, "step": 3270 }, { "epoch": 0.10146858346759202, "grad_norm": 3.1714043171003308, "learning_rate": 4.72930663361816e-06, "loss": 0.7222, "step": 3275 }, { "epoch": 0.10162349733548147, "grad_norm": 3.4855720546450404, "learning_rate": 4.728491292153154e-06, "loss": 0.8733, "step": 3280 }, { "epoch": 0.10177841120337093, "grad_norm": 3.376504949707857, "learning_rate": 4.727675950688149e-06, "loss": 0.8493, "step": 3285 }, { "epoch": 0.10193332507126038, "grad_norm": 3.5211115859074384, "learning_rate": 4.7268606092231425e-06, "loss": 0.8133, "step": 3290 }, { "epoch": 0.10208823893914984, "grad_norm": 3.213076603988383, "learning_rate": 4.726045267758138e-06, "loss": 0.8096, "step": 3295 }, { "epoch": 0.10224315280703929, "grad_norm": 2.935888336772498, "learning_rate": 4.7252299262931314e-06, "loss": 0.8313, "step": 3300 }, { "epoch": 0.10239806667492873, "grad_norm": 2.6960639278249294, "learning_rate": 4.724414584828127e-06, "loss": 0.7783, "step": 3305 }, { "epoch": 0.1025529805428182, "grad_norm": 3.317659780686857, "learning_rate": 4.72359924336312e-06, "loss": 0.8861, "step": 3310 }, { "epoch": 0.10270789441070764, "grad_norm": 2.959024675998564, "learning_rate": 4.722783901898116e-06, "loss": 0.8162, "step": 3315 }, { "epoch": 0.1028628082785971, "grad_norm": 3.0068969968188415, "learning_rate": 4.72196856043311e-06, "loss": 0.8261, "step": 3320 }, { "epoch": 0.10301772214648655, "grad_norm": 4.439768469803273, "learning_rate": 4.721153218968105e-06, "loss": 0.7932, "step": 3325 }, { "epoch": 0.10317263601437601, "grad_norm": 3.204328943662207, "learning_rate": 4.720337877503099e-06, "loss": 0.8412, "step": 3330 }, { "epoch": 0.10332754988226546, "grad_norm": 3.3664847514703453, "learning_rate": 4.719522536038093e-06, "loss": 0.8028, "step": 3335 }, { "epoch": 0.10348246375015491, "grad_norm": 4.428223315820863, "learning_rate": 4.718707194573088e-06, "loss": 0.7896, "step": 3340 }, { "epoch": 0.10363737761804437, "grad_norm": 4.178646079581713, "learning_rate": 4.7178918531080816e-06, "loss": 0.8227, "step": 3345 }, { "epoch": 0.10379229148593382, "grad_norm": 3.7061098656581213, "learning_rate": 4.717076511643077e-06, "loss": 0.8358, "step": 3350 }, { "epoch": 0.10394720535382328, "grad_norm": 2.6604145661519483, "learning_rate": 4.7162611701780705e-06, "loss": 0.7988, "step": 3355 }, { "epoch": 0.10410211922171272, "grad_norm": 2.5960137202653137, "learning_rate": 4.715445828713066e-06, "loss": 0.8129, "step": 3360 }, { "epoch": 0.10425703308960219, "grad_norm": 3.9360631800507497, "learning_rate": 4.714630487248059e-06, "loss": 0.8211, "step": 3365 }, { "epoch": 0.10441194695749163, "grad_norm": 2.9726590536153643, "learning_rate": 4.713815145783055e-06, "loss": 0.7705, "step": 3370 }, { "epoch": 0.1045668608253811, "grad_norm": 2.738278928307161, "learning_rate": 4.712999804318048e-06, "loss": 0.7191, "step": 3375 }, { "epoch": 0.10472177469327054, "grad_norm": 4.228280012010342, "learning_rate": 4.712184462853044e-06, "loss": 0.8186, "step": 3380 }, { "epoch": 0.10487668856115999, "grad_norm": 3.1168364018548935, "learning_rate": 4.711369121388037e-06, "loss": 0.835, "step": 3385 }, { "epoch": 0.10503160242904945, "grad_norm": 2.789701051441153, "learning_rate": 4.7105537799230325e-06, "loss": 0.7746, "step": 3390 }, { "epoch": 0.1051865162969389, "grad_norm": 3.0899748553449515, "learning_rate": 4.709738438458026e-06, "loss": 0.8011, "step": 3395 }, { "epoch": 0.10534143016482836, "grad_norm": 2.7819874844234183, "learning_rate": 4.7089230969930215e-06, "loss": 0.7957, "step": 3400 }, { "epoch": 0.1054963440327178, "grad_norm": 2.739589228265412, "learning_rate": 4.708107755528015e-06, "loss": 0.7237, "step": 3405 }, { "epoch": 0.10565125790060727, "grad_norm": 2.4781656932283056, "learning_rate": 4.70729241406301e-06, "loss": 0.8192, "step": 3410 }, { "epoch": 0.10580617176849672, "grad_norm": 2.601989497232389, "learning_rate": 4.706477072598005e-06, "loss": 0.8735, "step": 3415 }, { "epoch": 0.10596108563638616, "grad_norm": 3.460130930983784, "learning_rate": 4.7056617311329985e-06, "loss": 0.828, "step": 3420 }, { "epoch": 0.10611599950427562, "grad_norm": 2.2308934697275844, "learning_rate": 4.704846389667994e-06, "loss": 0.7862, "step": 3425 }, { "epoch": 0.10627091337216507, "grad_norm": 2.7584470274033817, "learning_rate": 4.704031048202987e-06, "loss": 0.7477, "step": 3430 }, { "epoch": 0.10642582724005453, "grad_norm": 2.6534991527961913, "learning_rate": 4.703215706737983e-06, "loss": 0.7998, "step": 3435 }, { "epoch": 0.10658074110794398, "grad_norm": 6.801599039394142, "learning_rate": 4.702400365272976e-06, "loss": 0.7915, "step": 3440 }, { "epoch": 0.10673565497583344, "grad_norm": 3.0162276975918347, "learning_rate": 4.701585023807972e-06, "loss": 0.8384, "step": 3445 }, { "epoch": 0.10689056884372289, "grad_norm": 2.5563560494843807, "learning_rate": 4.700769682342965e-06, "loss": 0.7787, "step": 3450 }, { "epoch": 0.10704548271161235, "grad_norm": 2.8084289729986214, "learning_rate": 4.6999543408779605e-06, "loss": 0.7767, "step": 3455 }, { "epoch": 0.1072003965795018, "grad_norm": 2.9019058794224284, "learning_rate": 4.699138999412954e-06, "loss": 0.8358, "step": 3460 }, { "epoch": 0.10735531044739124, "grad_norm": 3.53105402766897, "learning_rate": 4.6983236579479494e-06, "loss": 0.6932, "step": 3465 }, { "epoch": 0.1075102243152807, "grad_norm": 3.6191073253459374, "learning_rate": 4.697508316482943e-06, "loss": 0.7798, "step": 3470 }, { "epoch": 0.10766513818317015, "grad_norm": 3.1685564513999966, "learning_rate": 4.696692975017938e-06, "loss": 0.7998, "step": 3475 }, { "epoch": 0.10782005205105961, "grad_norm": 3.14963337706882, "learning_rate": 4.695877633552932e-06, "loss": 0.7336, "step": 3480 }, { "epoch": 0.10797496591894906, "grad_norm": 2.840277371239466, "learning_rate": 4.695062292087927e-06, "loss": 0.7351, "step": 3485 }, { "epoch": 0.10812987978683852, "grad_norm": 2.167102004259278, "learning_rate": 4.694246950622921e-06, "loss": 0.7852, "step": 3490 }, { "epoch": 0.10828479365472797, "grad_norm": 3.6349065129728872, "learning_rate": 4.693431609157915e-06, "loss": 0.722, "step": 3495 }, { "epoch": 0.10843970752261742, "grad_norm": 2.5111466374035816, "learning_rate": 4.69261626769291e-06, "loss": 0.8157, "step": 3500 }, { "epoch": 0.10859462139050688, "grad_norm": 3.79141763447468, "learning_rate": 4.691800926227904e-06, "loss": 0.7421, "step": 3505 }, { "epoch": 0.10874953525839633, "grad_norm": 2.585373434046736, "learning_rate": 4.6909855847628996e-06, "loss": 0.8215, "step": 3510 }, { "epoch": 0.10890444912628579, "grad_norm": 3.2960997394233846, "learning_rate": 4.690170243297893e-06, "loss": 0.7692, "step": 3515 }, { "epoch": 0.10905936299417524, "grad_norm": 3.535573077912255, "learning_rate": 4.6893549018328885e-06, "loss": 0.7544, "step": 3520 }, { "epoch": 0.1092142768620647, "grad_norm": 2.675735172695889, "learning_rate": 4.688539560367882e-06, "loss": 0.7916, "step": 3525 }, { "epoch": 0.10936919072995414, "grad_norm": 3.3663488539345425, "learning_rate": 4.687724218902877e-06, "loss": 0.754, "step": 3530 }, { "epoch": 0.1095241045978436, "grad_norm": 2.6109881533586345, "learning_rate": 4.686908877437871e-06, "loss": 0.7937, "step": 3535 }, { "epoch": 0.10967901846573305, "grad_norm": 3.110170474630572, "learning_rate": 4.686093535972866e-06, "loss": 0.8301, "step": 3540 }, { "epoch": 0.1098339323336225, "grad_norm": 2.999863628970889, "learning_rate": 4.68527819450786e-06, "loss": 0.8226, "step": 3545 }, { "epoch": 0.10998884620151196, "grad_norm": 3.5017402384388143, "learning_rate": 4.684462853042855e-06, "loss": 0.734, "step": 3550 }, { "epoch": 0.11014376006940141, "grad_norm": 3.164180129478369, "learning_rate": 4.683647511577849e-06, "loss": 0.8874, "step": 3555 }, { "epoch": 0.11029867393729087, "grad_norm": 2.738595198461231, "learning_rate": 4.682832170112844e-06, "loss": 0.8143, "step": 3560 }, { "epoch": 0.11045358780518032, "grad_norm": 4.19924475347032, "learning_rate": 4.682016828647838e-06, "loss": 0.7483, "step": 3565 }, { "epoch": 0.11060850167306978, "grad_norm": 2.8205634589106348, "learning_rate": 4.681201487182832e-06, "loss": 0.7854, "step": 3570 }, { "epoch": 0.11076341554095923, "grad_norm": 2.8259746119509797, "learning_rate": 4.680386145717827e-06, "loss": 0.7796, "step": 3575 }, { "epoch": 0.11091832940884867, "grad_norm": 3.7009858521387065, "learning_rate": 4.679570804252821e-06, "loss": 0.7526, "step": 3580 }, { "epoch": 0.11107324327673813, "grad_norm": 2.6563735955626746, "learning_rate": 4.678755462787816e-06, "loss": 0.8289, "step": 3585 }, { "epoch": 0.11122815714462758, "grad_norm": 2.7406022310183613, "learning_rate": 4.67794012132281e-06, "loss": 0.7617, "step": 3590 }, { "epoch": 0.11138307101251704, "grad_norm": 2.9593599531524792, "learning_rate": 4.6771247798578045e-06, "loss": 0.8085, "step": 3595 }, { "epoch": 0.11153798488040649, "grad_norm": 3.9700654660369055, "learning_rate": 4.676309438392799e-06, "loss": 0.7704, "step": 3600 }, { "epoch": 0.11169289874829595, "grad_norm": 3.2316258164795593, "learning_rate": 4.675494096927794e-06, "loss": 0.8267, "step": 3605 }, { "epoch": 0.1118478126161854, "grad_norm": 3.639005488853124, "learning_rate": 4.674678755462788e-06, "loss": 0.7443, "step": 3610 }, { "epoch": 0.11200272648407486, "grad_norm": 3.3608830008611172, "learning_rate": 4.673863413997783e-06, "loss": 0.7992, "step": 3615 }, { "epoch": 0.11215764035196431, "grad_norm": 3.0538197829216327, "learning_rate": 4.673048072532777e-06, "loss": 0.8547, "step": 3620 }, { "epoch": 0.11231255421985376, "grad_norm": 2.7293600863480827, "learning_rate": 4.672232731067772e-06, "loss": 0.798, "step": 3625 }, { "epoch": 0.11246746808774322, "grad_norm": 3.1497618074021605, "learning_rate": 4.671417389602766e-06, "loss": 0.7819, "step": 3630 }, { "epoch": 0.11262238195563266, "grad_norm": 2.6364052663535866, "learning_rate": 4.670602048137761e-06, "loss": 0.7814, "step": 3635 }, { "epoch": 0.11277729582352213, "grad_norm": 3.0321207552969622, "learning_rate": 4.669786706672755e-06, "loss": 0.8616, "step": 3640 }, { "epoch": 0.11293220969141157, "grad_norm": 2.779183940669347, "learning_rate": 4.668971365207749e-06, "loss": 0.7309, "step": 3645 }, { "epoch": 0.11308712355930103, "grad_norm": 2.391909883364008, "learning_rate": 4.668156023742744e-06, "loss": 0.8715, "step": 3650 }, { "epoch": 0.11324203742719048, "grad_norm": 2.6382811792753276, "learning_rate": 4.667340682277738e-06, "loss": 0.7511, "step": 3655 }, { "epoch": 0.11339695129507994, "grad_norm": 3.174019182757369, "learning_rate": 4.6665253408127325e-06, "loss": 0.8229, "step": 3660 }, { "epoch": 0.11355186516296939, "grad_norm": 2.6100979657815193, "learning_rate": 4.665709999347727e-06, "loss": 0.752, "step": 3665 }, { "epoch": 0.11370677903085884, "grad_norm": 2.8584048971346254, "learning_rate": 4.664894657882721e-06, "loss": 0.768, "step": 3670 }, { "epoch": 0.1138616928987483, "grad_norm": 2.932859540664257, "learning_rate": 4.664079316417716e-06, "loss": 0.768, "step": 3675 }, { "epoch": 0.11401660676663775, "grad_norm": 2.788158164459821, "learning_rate": 4.66326397495271e-06, "loss": 0.7666, "step": 3680 }, { "epoch": 0.11417152063452721, "grad_norm": 2.5833675026176364, "learning_rate": 4.662448633487705e-06, "loss": 0.7361, "step": 3685 }, { "epoch": 0.11432643450241665, "grad_norm": 2.5157167116911907, "learning_rate": 4.6616332920227e-06, "loss": 0.845, "step": 3690 }, { "epoch": 0.11448134837030612, "grad_norm": 2.4837858404185447, "learning_rate": 4.660817950557694e-06, "loss": 0.7593, "step": 3695 }, { "epoch": 0.11463626223819556, "grad_norm": 3.0960184935499413, "learning_rate": 4.660002609092689e-06, "loss": 0.7313, "step": 3700 }, { "epoch": 0.11479117610608501, "grad_norm": 2.751953235811583, "learning_rate": 4.659187267627683e-06, "loss": 0.7608, "step": 3705 }, { "epoch": 0.11494608997397447, "grad_norm": 2.4635322427181285, "learning_rate": 4.658371926162678e-06, "loss": 0.8032, "step": 3710 }, { "epoch": 0.11510100384186392, "grad_norm": 2.580594662294029, "learning_rate": 4.6575565846976715e-06, "loss": 0.7707, "step": 3715 }, { "epoch": 0.11525591770975338, "grad_norm": 3.1314510579457115, "learning_rate": 4.656741243232666e-06, "loss": 0.7235, "step": 3720 }, { "epoch": 0.11541083157764283, "grad_norm": 2.8217698345281526, "learning_rate": 4.6559259017676605e-06, "loss": 0.8025, "step": 3725 }, { "epoch": 0.11556574544553229, "grad_norm": 2.7913428454883307, "learning_rate": 4.655110560302655e-06, "loss": 0.848, "step": 3730 }, { "epoch": 0.11572065931342174, "grad_norm": 2.9019044806032435, "learning_rate": 4.654295218837649e-06, "loss": 0.7074, "step": 3735 }, { "epoch": 0.1158755731813112, "grad_norm": 2.939221098858861, "learning_rate": 4.653479877372644e-06, "loss": 0.7553, "step": 3740 }, { "epoch": 0.11603048704920065, "grad_norm": 2.338382800949623, "learning_rate": 4.652664535907638e-06, "loss": 0.7498, "step": 3745 }, { "epoch": 0.11618540091709009, "grad_norm": 2.9035275329215855, "learning_rate": 4.651849194442633e-06, "loss": 0.8033, "step": 3750 }, { "epoch": 0.11634031478497955, "grad_norm": 2.645826600039351, "learning_rate": 4.651033852977627e-06, "loss": 0.8226, "step": 3755 }, { "epoch": 0.116495228652869, "grad_norm": 2.927089469465198, "learning_rate": 4.650218511512622e-06, "loss": 0.7655, "step": 3760 }, { "epoch": 0.11665014252075846, "grad_norm": 4.680089631104443, "learning_rate": 4.649403170047616e-06, "loss": 0.7591, "step": 3765 }, { "epoch": 0.11680505638864791, "grad_norm": 3.444649083743403, "learning_rate": 4.648587828582611e-06, "loss": 0.8118, "step": 3770 }, { "epoch": 0.11695997025653737, "grad_norm": 3.8357750358442044, "learning_rate": 4.647772487117605e-06, "loss": 0.7162, "step": 3775 }, { "epoch": 0.11711488412442682, "grad_norm": 3.608135217275193, "learning_rate": 4.6469571456525995e-06, "loss": 0.7943, "step": 3780 }, { "epoch": 0.11726979799231627, "grad_norm": 2.975624908865606, "learning_rate": 4.646141804187595e-06, "loss": 0.7628, "step": 3785 }, { "epoch": 0.11742471186020573, "grad_norm": 3.4022165017768047, "learning_rate": 4.6453264627225884e-06, "loss": 0.7792, "step": 3790 }, { "epoch": 0.11757962572809517, "grad_norm": 2.6137016051203927, "learning_rate": 4.644511121257583e-06, "loss": 0.7409, "step": 3795 }, { "epoch": 0.11773453959598464, "grad_norm": 4.681823859201187, "learning_rate": 4.643695779792577e-06, "loss": 0.8025, "step": 3800 }, { "epoch": 0.11788945346387408, "grad_norm": 3.278576875469406, "learning_rate": 4.642880438327572e-06, "loss": 0.7841, "step": 3805 }, { "epoch": 0.11804436733176354, "grad_norm": 2.6816132622388547, "learning_rate": 4.642065096862566e-06, "loss": 0.7724, "step": 3810 }, { "epoch": 0.11819928119965299, "grad_norm": 2.943823821330906, "learning_rate": 4.641249755397561e-06, "loss": 0.7824, "step": 3815 }, { "epoch": 0.11835419506754245, "grad_norm": 2.6279178504374037, "learning_rate": 4.640434413932555e-06, "loss": 0.7759, "step": 3820 }, { "epoch": 0.1185091089354319, "grad_norm": 2.635599233992334, "learning_rate": 4.63961907246755e-06, "loss": 0.7955, "step": 3825 }, { "epoch": 0.11866402280332135, "grad_norm": 3.1800497178153835, "learning_rate": 4.638803731002544e-06, "loss": 0.7697, "step": 3830 }, { "epoch": 0.11881893667121081, "grad_norm": 3.0088171475598924, "learning_rate": 4.6379883895375386e-06, "loss": 0.7371, "step": 3835 }, { "epoch": 0.11897385053910026, "grad_norm": 3.726970878711169, "learning_rate": 4.637173048072533e-06, "loss": 0.7353, "step": 3840 }, { "epoch": 0.11912876440698972, "grad_norm": 2.9079582279076406, "learning_rate": 4.6363577066075275e-06, "loss": 0.8573, "step": 3845 }, { "epoch": 0.11928367827487917, "grad_norm": 3.1784290972544587, "learning_rate": 4.635542365142522e-06, "loss": 0.7075, "step": 3850 }, { "epoch": 0.11943859214276863, "grad_norm": 3.211298661129267, "learning_rate": 4.634727023677516e-06, "loss": 0.7472, "step": 3855 }, { "epoch": 0.11959350601065807, "grad_norm": 3.1580499189849127, "learning_rate": 4.633911682212511e-06, "loss": 0.7772, "step": 3860 }, { "epoch": 0.11974841987854752, "grad_norm": 3.955815846519093, "learning_rate": 4.633096340747505e-06, "loss": 0.7598, "step": 3865 }, { "epoch": 0.11990333374643698, "grad_norm": 2.8948515622568185, "learning_rate": 4.6322809992825e-06, "loss": 0.8044, "step": 3870 }, { "epoch": 0.12005824761432643, "grad_norm": 9.477965000470668, "learning_rate": 4.631465657817494e-06, "loss": 0.7512, "step": 3875 }, { "epoch": 0.12021316148221589, "grad_norm": 2.1293857205598967, "learning_rate": 4.630650316352489e-06, "loss": 0.8203, "step": 3880 }, { "epoch": 0.12036807535010534, "grad_norm": 2.8924061680066617, "learning_rate": 4.629834974887483e-06, "loss": 0.7668, "step": 3885 }, { "epoch": 0.1205229892179948, "grad_norm": 2.720013420921353, "learning_rate": 4.629019633422478e-06, "loss": 0.8138, "step": 3890 }, { "epoch": 0.12067790308588425, "grad_norm": 2.3501215237933204, "learning_rate": 4.628204291957472e-06, "loss": 0.7924, "step": 3895 }, { "epoch": 0.12083281695377371, "grad_norm": 2.8071067801840046, "learning_rate": 4.6273889504924665e-06, "loss": 0.8204, "step": 3900 }, { "epoch": 0.12098773082166316, "grad_norm": 2.717525261889446, "learning_rate": 4.626573609027461e-06, "loss": 0.7523, "step": 3905 }, { "epoch": 0.1211426446895526, "grad_norm": 2.8069744076216296, "learning_rate": 4.6257582675624555e-06, "loss": 0.891, "step": 3910 }, { "epoch": 0.12129755855744206, "grad_norm": 2.605164972255893, "learning_rate": 4.62494292609745e-06, "loss": 0.7758, "step": 3915 }, { "epoch": 0.12145247242533151, "grad_norm": 2.464322794688569, "learning_rate": 4.624127584632444e-06, "loss": 0.795, "step": 3920 }, { "epoch": 0.12160738629322097, "grad_norm": 3.7153938485493314, "learning_rate": 4.623312243167439e-06, "loss": 0.8117, "step": 3925 }, { "epoch": 0.12176230016111042, "grad_norm": 3.2870930426355187, "learning_rate": 4.622496901702433e-06, "loss": 0.8503, "step": 3930 }, { "epoch": 0.12191721402899988, "grad_norm": 3.097762551337673, "learning_rate": 4.621681560237428e-06, "loss": 0.7743, "step": 3935 }, { "epoch": 0.12207212789688933, "grad_norm": 6.346254133059192, "learning_rate": 4.620866218772422e-06, "loss": 0.8323, "step": 3940 }, { "epoch": 0.12222704176477878, "grad_norm": 2.432039471729066, "learning_rate": 4.620050877307417e-06, "loss": 0.743, "step": 3945 }, { "epoch": 0.12238195563266824, "grad_norm": 2.7974365248758395, "learning_rate": 4.619235535842411e-06, "loss": 0.9041, "step": 3950 }, { "epoch": 0.12253686950055769, "grad_norm": 5.395463076310122, "learning_rate": 4.618420194377406e-06, "loss": 0.722, "step": 3955 }, { "epoch": 0.12269178336844715, "grad_norm": 3.105413449043781, "learning_rate": 4.6176048529124e-06, "loss": 0.8611, "step": 3960 }, { "epoch": 0.1228466972363366, "grad_norm": 2.6229495949484534, "learning_rate": 4.6167895114473945e-06, "loss": 0.6569, "step": 3965 }, { "epoch": 0.12300161110422606, "grad_norm": 2.9204531732158716, "learning_rate": 4.615974169982389e-06, "loss": 0.8036, "step": 3970 }, { "epoch": 0.1231565249721155, "grad_norm": 3.9495344202667058, "learning_rate": 4.615158828517383e-06, "loss": 0.719, "step": 3975 }, { "epoch": 0.12331143884000496, "grad_norm": 2.5824569218680407, "learning_rate": 4.614343487052378e-06, "loss": 0.7541, "step": 3980 }, { "epoch": 0.12346635270789441, "grad_norm": 3.025612997615449, "learning_rate": 4.613528145587372e-06, "loss": 0.6674, "step": 3985 }, { "epoch": 0.12362126657578386, "grad_norm": 3.0888130054372303, "learning_rate": 4.612712804122367e-06, "loss": 0.8226, "step": 3990 }, { "epoch": 0.12377618044367332, "grad_norm": 3.8711671329459505, "learning_rate": 4.611897462657361e-06, "loss": 0.8127, "step": 3995 }, { "epoch": 0.12393109431156277, "grad_norm": 5.0275386319265625, "learning_rate": 4.611082121192356e-06, "loss": 0.7983, "step": 4000 }, { "epoch": 0.12408600817945223, "grad_norm": 2.5203447548741003, "learning_rate": 4.61026677972735e-06, "loss": 0.8139, "step": 4005 }, { "epoch": 0.12424092204734168, "grad_norm": 5.3847882936116, "learning_rate": 4.609451438262345e-06, "loss": 0.6913, "step": 4010 }, { "epoch": 0.12439583591523114, "grad_norm": 2.7898923059696346, "learning_rate": 4.608636096797339e-06, "loss": 0.8767, "step": 4015 }, { "epoch": 0.12455074978312058, "grad_norm": 3.0279291990431525, "learning_rate": 4.6078207553323336e-06, "loss": 0.7995, "step": 4020 }, { "epoch": 0.12470566365101003, "grad_norm": 3.9623634523959312, "learning_rate": 4.607005413867328e-06, "loss": 0.8185, "step": 4025 }, { "epoch": 0.1248605775188995, "grad_norm": 3.7239210305857497, "learning_rate": 4.6061900724023225e-06, "loss": 0.9275, "step": 4030 }, { "epoch": 0.12501549138678894, "grad_norm": 2.583836819681966, "learning_rate": 4.605374730937317e-06, "loss": 0.811, "step": 4035 }, { "epoch": 0.1251704052546784, "grad_norm": 3.1261918270385567, "learning_rate": 4.604559389472311e-06, "loss": 0.8277, "step": 4040 }, { "epoch": 0.12532531912256786, "grad_norm": 3.2425770216202383, "learning_rate": 4.603744048007306e-06, "loss": 0.9322, "step": 4045 }, { "epoch": 0.1254802329904573, "grad_norm": 2.79289867125266, "learning_rate": 4.6029287065423e-06, "loss": 0.807, "step": 4050 }, { "epoch": 0.12563514685834676, "grad_norm": 2.5012081529971235, "learning_rate": 4.602113365077295e-06, "loss": 0.7653, "step": 4055 }, { "epoch": 0.12579006072623622, "grad_norm": 3.2023631667569843, "learning_rate": 4.601298023612289e-06, "loss": 0.7245, "step": 4060 }, { "epoch": 0.12594497459412565, "grad_norm": 3.0604653783589164, "learning_rate": 4.600482682147284e-06, "loss": 0.8377, "step": 4065 }, { "epoch": 0.12609988846201511, "grad_norm": 3.241628480381823, "learning_rate": 4.599667340682278e-06, "loss": 0.7706, "step": 4070 }, { "epoch": 0.12625480232990458, "grad_norm": 3.1991912192992222, "learning_rate": 4.598851999217273e-06, "loss": 0.7945, "step": 4075 }, { "epoch": 0.12640971619779404, "grad_norm": 2.9994370545623052, "learning_rate": 4.598036657752267e-06, "loss": 0.8438, "step": 4080 }, { "epoch": 0.12656463006568347, "grad_norm": 2.635815295697649, "learning_rate": 4.5972213162872615e-06, "loss": 0.7695, "step": 4085 }, { "epoch": 0.12671954393357293, "grad_norm": 3.2664666780116107, "learning_rate": 4.596405974822256e-06, "loss": 0.7655, "step": 4090 }, { "epoch": 0.1268744578014624, "grad_norm": 2.4131547523136456, "learning_rate": 4.5955906333572504e-06, "loss": 0.7849, "step": 4095 }, { "epoch": 0.12702937166935185, "grad_norm": 3.1990179748412073, "learning_rate": 4.594775291892245e-06, "loss": 0.8533, "step": 4100 }, { "epoch": 0.1271842855372413, "grad_norm": 3.1037526362961643, "learning_rate": 4.593959950427239e-06, "loss": 0.8259, "step": 4105 }, { "epoch": 0.12733919940513075, "grad_norm": 2.741039757134674, "learning_rate": 4.593144608962234e-06, "loss": 0.8007, "step": 4110 }, { "epoch": 0.1274941132730202, "grad_norm": 3.0182476572657206, "learning_rate": 4.592329267497228e-06, "loss": 0.8557, "step": 4115 }, { "epoch": 0.12764902714090964, "grad_norm": 2.6933123911527255, "learning_rate": 4.591513926032223e-06, "loss": 0.8367, "step": 4120 }, { "epoch": 0.1278039410087991, "grad_norm": 2.411589959500512, "learning_rate": 4.590698584567217e-06, "loss": 0.8613, "step": 4125 }, { "epoch": 0.12795885487668857, "grad_norm": 3.14367519965807, "learning_rate": 4.589883243102212e-06, "loss": 0.9062, "step": 4130 }, { "epoch": 0.12811376874457803, "grad_norm": 3.1855076778982436, "learning_rate": 4.589067901637206e-06, "loss": 0.8265, "step": 4135 }, { "epoch": 0.12826868261246746, "grad_norm": 3.2056357173162087, "learning_rate": 4.5882525601722006e-06, "loss": 0.8109, "step": 4140 }, { "epoch": 0.12842359648035692, "grad_norm": 3.196108506852224, "learning_rate": 4.587437218707195e-06, "loss": 0.7577, "step": 4145 }, { "epoch": 0.12857851034824638, "grad_norm": 3.383705084678584, "learning_rate": 4.5866218772421895e-06, "loss": 0.7982, "step": 4150 }, { "epoch": 0.12873342421613582, "grad_norm": 2.972221814544113, "learning_rate": 4.585806535777184e-06, "loss": 0.7867, "step": 4155 }, { "epoch": 0.12888833808402528, "grad_norm": 2.6435749045768566, "learning_rate": 4.584991194312178e-06, "loss": 0.7301, "step": 4160 }, { "epoch": 0.12904325195191474, "grad_norm": 2.7461029358907663, "learning_rate": 4.584175852847173e-06, "loss": 0.7166, "step": 4165 }, { "epoch": 0.1291981658198042, "grad_norm": 2.965398236408742, "learning_rate": 4.583360511382167e-06, "loss": 0.8228, "step": 4170 }, { "epoch": 0.12935307968769363, "grad_norm": 4.359037531050025, "learning_rate": 4.582545169917162e-06, "loss": 0.8437, "step": 4175 }, { "epoch": 0.1295079935555831, "grad_norm": 2.391980432061279, "learning_rate": 4.581729828452156e-06, "loss": 0.7885, "step": 4180 }, { "epoch": 0.12966290742347256, "grad_norm": 2.3132206638147745, "learning_rate": 4.580914486987151e-06, "loss": 0.8129, "step": 4185 }, { "epoch": 0.129817821291362, "grad_norm": 2.226546900242879, "learning_rate": 4.580099145522145e-06, "loss": 0.7321, "step": 4190 }, { "epoch": 0.12997273515925145, "grad_norm": 2.8724158260392056, "learning_rate": 4.57928380405714e-06, "loss": 0.7866, "step": 4195 }, { "epoch": 0.1301276490271409, "grad_norm": 3.957388071544943, "learning_rate": 4.578468462592134e-06, "loss": 0.7897, "step": 4200 }, { "epoch": 0.13028256289503037, "grad_norm": 2.7134564196832685, "learning_rate": 4.5776531211271285e-06, "loss": 0.7649, "step": 4205 }, { "epoch": 0.1304374767629198, "grad_norm": 2.715102005763708, "learning_rate": 4.576837779662123e-06, "loss": 0.7799, "step": 4210 }, { "epoch": 0.13059239063080927, "grad_norm": 2.8383106122037405, "learning_rate": 4.5760224381971175e-06, "loss": 0.8147, "step": 4215 }, { "epoch": 0.13074730449869873, "grad_norm": 2.3386866860923856, "learning_rate": 4.575207096732112e-06, "loss": 0.6984, "step": 4220 }, { "epoch": 0.13090221836658816, "grad_norm": 2.899327058201931, "learning_rate": 4.574391755267106e-06, "loss": 0.7526, "step": 4225 }, { "epoch": 0.13105713223447762, "grad_norm": 2.982647048719208, "learning_rate": 4.573576413802101e-06, "loss": 0.7516, "step": 4230 }, { "epoch": 0.13121204610236709, "grad_norm": 3.2435542943701536, "learning_rate": 4.572761072337095e-06, "loss": 0.7864, "step": 4235 }, { "epoch": 0.13136695997025655, "grad_norm": 2.9989804402677254, "learning_rate": 4.57194573087209e-06, "loss": 0.7827, "step": 4240 }, { "epoch": 0.13152187383814598, "grad_norm": 2.875741438950897, "learning_rate": 4.571130389407084e-06, "loss": 0.7291, "step": 4245 }, { "epoch": 0.13167678770603544, "grad_norm": 3.349148686487195, "learning_rate": 4.570315047942079e-06, "loss": 0.7835, "step": 4250 }, { "epoch": 0.1318317015739249, "grad_norm": 3.496344796272357, "learning_rate": 4.569499706477073e-06, "loss": 0.8253, "step": 4255 }, { "epoch": 0.13198661544181436, "grad_norm": 3.2389278086181506, "learning_rate": 4.568684365012068e-06, "loss": 0.8309, "step": 4260 }, { "epoch": 0.1321415293097038, "grad_norm": 2.625717290982775, "learning_rate": 4.567869023547062e-06, "loss": 0.8115, "step": 4265 }, { "epoch": 0.13229644317759326, "grad_norm": 3.644303062280959, "learning_rate": 4.5670536820820565e-06, "loss": 0.7424, "step": 4270 }, { "epoch": 0.13245135704548272, "grad_norm": 3.17722057908234, "learning_rate": 4.566238340617051e-06, "loss": 0.7977, "step": 4275 }, { "epoch": 0.13260627091337215, "grad_norm": 2.526074528393867, "learning_rate": 4.5654229991520454e-06, "loss": 0.7583, "step": 4280 }, { "epoch": 0.13276118478126162, "grad_norm": 3.4221939382297504, "learning_rate": 4.56460765768704e-06, "loss": 0.8455, "step": 4285 }, { "epoch": 0.13291609864915108, "grad_norm": 3.1424391328068415, "learning_rate": 4.563792316222034e-06, "loss": 0.7815, "step": 4290 }, { "epoch": 0.13307101251704054, "grad_norm": 2.7529350790380356, "learning_rate": 4.562976974757029e-06, "loss": 0.792, "step": 4295 }, { "epoch": 0.13322592638492997, "grad_norm": 2.4292284748820934, "learning_rate": 4.562161633292023e-06, "loss": 0.762, "step": 4300 }, { "epoch": 0.13338084025281943, "grad_norm": 5.198881617699958, "learning_rate": 4.561346291827018e-06, "loss": 0.9033, "step": 4305 }, { "epoch": 0.1335357541207089, "grad_norm": 3.8828337004474793, "learning_rate": 4.560530950362012e-06, "loss": 0.8197, "step": 4310 }, { "epoch": 0.13369066798859833, "grad_norm": 3.240537771409239, "learning_rate": 4.559715608897007e-06, "loss": 0.7907, "step": 4315 }, { "epoch": 0.1338455818564878, "grad_norm": 3.0090572674757956, "learning_rate": 4.558900267432001e-06, "loss": 0.8003, "step": 4320 }, { "epoch": 0.13400049572437725, "grad_norm": 4.282988972519109, "learning_rate": 4.5580849259669956e-06, "loss": 0.7328, "step": 4325 }, { "epoch": 0.1341554095922667, "grad_norm": 4.199818292219312, "learning_rate": 4.55726958450199e-06, "loss": 0.8217, "step": 4330 }, { "epoch": 0.13431032346015614, "grad_norm": 3.120458750092222, "learning_rate": 4.556454243036984e-06, "loss": 0.8618, "step": 4335 }, { "epoch": 0.1344652373280456, "grad_norm": 2.602861198879518, "learning_rate": 4.555638901571979e-06, "loss": 0.7825, "step": 4340 }, { "epoch": 0.13462015119593507, "grad_norm": 2.8029228675935123, "learning_rate": 4.5548235601069725e-06, "loss": 0.8326, "step": 4345 }, { "epoch": 0.1347750650638245, "grad_norm": 3.18419319960565, "learning_rate": 4.554008218641968e-06, "loss": 0.795, "step": 4350 }, { "epoch": 0.13492997893171396, "grad_norm": 2.9918049386298677, "learning_rate": 4.553192877176962e-06, "loss": 0.8107, "step": 4355 }, { "epoch": 0.13508489279960342, "grad_norm": 3.770689839685593, "learning_rate": 4.552377535711957e-06, "loss": 0.7343, "step": 4360 }, { "epoch": 0.13523980666749288, "grad_norm": 4.611228046552848, "learning_rate": 4.551562194246951e-06, "loss": 0.8067, "step": 4365 }, { "epoch": 0.13539472053538232, "grad_norm": 2.9895862338071693, "learning_rate": 4.550746852781946e-06, "loss": 0.7049, "step": 4370 }, { "epoch": 0.13554963440327178, "grad_norm": 2.9875883353499564, "learning_rate": 4.54993151131694e-06, "loss": 0.8472, "step": 4375 }, { "epoch": 0.13570454827116124, "grad_norm": 3.8357381903104963, "learning_rate": 4.549116169851935e-06, "loss": 0.7242, "step": 4380 }, { "epoch": 0.13585946213905067, "grad_norm": 3.2986377492254233, "learning_rate": 4.548300828386929e-06, "loss": 0.7399, "step": 4385 }, { "epoch": 0.13601437600694014, "grad_norm": 4.281842649519941, "learning_rate": 4.5474854869219235e-06, "loss": 0.817, "step": 4390 }, { "epoch": 0.1361692898748296, "grad_norm": 3.8028132776023833, "learning_rate": 4.546670145456918e-06, "loss": 0.7677, "step": 4395 }, { "epoch": 0.13632420374271906, "grad_norm": 2.8311043777281033, "learning_rate": 4.5458548039919124e-06, "loss": 0.7839, "step": 4400 }, { "epoch": 0.1364791176106085, "grad_norm": 3.0134129440067463, "learning_rate": 4.545039462526907e-06, "loss": 0.7984, "step": 4405 }, { "epoch": 0.13663403147849795, "grad_norm": 2.7035717327069744, "learning_rate": 4.5442241210619005e-06, "loss": 0.9102, "step": 4410 }, { "epoch": 0.1367889453463874, "grad_norm": 2.5068814641981736, "learning_rate": 4.543408779596896e-06, "loss": 0.8155, "step": 4415 }, { "epoch": 0.13694385921427688, "grad_norm": 2.7039413182160215, "learning_rate": 4.5425934381318894e-06, "loss": 0.6965, "step": 4420 }, { "epoch": 0.1370987730821663, "grad_norm": 2.55090875408851, "learning_rate": 4.541778096666885e-06, "loss": 0.7395, "step": 4425 }, { "epoch": 0.13725368695005577, "grad_norm": 3.469033924416139, "learning_rate": 4.540962755201878e-06, "loss": 0.7764, "step": 4430 }, { "epoch": 0.13740860081794523, "grad_norm": 3.383101048404671, "learning_rate": 4.540147413736874e-06, "loss": 0.7948, "step": 4435 }, { "epoch": 0.13756351468583466, "grad_norm": 6.14407295301112, "learning_rate": 4.539332072271867e-06, "loss": 0.7984, "step": 4440 }, { "epoch": 0.13771842855372413, "grad_norm": 2.498266321743773, "learning_rate": 4.538516730806863e-06, "loss": 0.7292, "step": 4445 }, { "epoch": 0.1378733424216136, "grad_norm": 2.914190781272276, "learning_rate": 4.537701389341857e-06, "loss": 0.7832, "step": 4450 }, { "epoch": 0.13802825628950305, "grad_norm": 2.891253880628552, "learning_rate": 4.5368860478768515e-06, "loss": 0.6833, "step": 4455 }, { "epoch": 0.13818317015739248, "grad_norm": 2.2883573142908773, "learning_rate": 4.536070706411846e-06, "loss": 0.733, "step": 4460 }, { "epoch": 0.13833808402528194, "grad_norm": 3.2004075122618407, "learning_rate": 4.53525536494684e-06, "loss": 0.8332, "step": 4465 }, { "epoch": 0.1384929978931714, "grad_norm": 2.621256409562311, "learning_rate": 4.534440023481835e-06, "loss": 0.7828, "step": 4470 }, { "epoch": 0.13864791176106084, "grad_norm": 2.5279232219055703, "learning_rate": 4.533624682016829e-06, "loss": 0.7649, "step": 4475 }, { "epoch": 0.1388028256289503, "grad_norm": 3.329837853821965, "learning_rate": 4.532809340551824e-06, "loss": 0.8028, "step": 4480 }, { "epoch": 0.13895773949683976, "grad_norm": 3.7075619833201556, "learning_rate": 4.531993999086817e-06, "loss": 0.8259, "step": 4485 }, { "epoch": 0.13911265336472922, "grad_norm": 2.8290481998757255, "learning_rate": 4.531178657621813e-06, "loss": 0.7311, "step": 4490 }, { "epoch": 0.13926756723261866, "grad_norm": 4.805998143218684, "learning_rate": 4.530363316156806e-06, "loss": 0.832, "step": 4495 }, { "epoch": 0.13942248110050812, "grad_norm": 2.6441087185020273, "learning_rate": 4.529547974691802e-06, "loss": 0.7408, "step": 4500 }, { "epoch": 0.13957739496839758, "grad_norm": 2.3899620237404444, "learning_rate": 4.528732633226795e-06, "loss": 0.7543, "step": 4505 }, { "epoch": 0.139732308836287, "grad_norm": 3.379963067409091, "learning_rate": 4.5279172917617905e-06, "loss": 0.8019, "step": 4510 }, { "epoch": 0.13988722270417647, "grad_norm": 2.9274173000222268, "learning_rate": 4.527101950296784e-06, "loss": 0.7369, "step": 4515 }, { "epoch": 0.14004213657206593, "grad_norm": 3.57718585753366, "learning_rate": 4.5262866088317795e-06, "loss": 0.7896, "step": 4520 }, { "epoch": 0.1401970504399554, "grad_norm": 2.7191058408128557, "learning_rate": 4.525471267366773e-06, "loss": 0.7829, "step": 4525 }, { "epoch": 0.14035196430784483, "grad_norm": 3.7394608138558425, "learning_rate": 4.524655925901768e-06, "loss": 0.8458, "step": 4530 }, { "epoch": 0.1405068781757343, "grad_norm": 2.898531445095009, "learning_rate": 4.523840584436762e-06, "loss": 0.7408, "step": 4535 }, { "epoch": 0.14066179204362375, "grad_norm": 4.003082557740242, "learning_rate": 4.523025242971757e-06, "loss": 0.7646, "step": 4540 }, { "epoch": 0.1408167059115132, "grad_norm": 2.7972437664629717, "learning_rate": 4.522209901506752e-06, "loss": 0.7792, "step": 4545 }, { "epoch": 0.14097161977940265, "grad_norm": 3.146968448691527, "learning_rate": 4.521394560041746e-06, "loss": 0.7686, "step": 4550 }, { "epoch": 0.1411265336472921, "grad_norm": 3.0656250497720094, "learning_rate": 4.520579218576741e-06, "loss": 0.8515, "step": 4555 }, { "epoch": 0.14128144751518157, "grad_norm": 3.2504486947828597, "learning_rate": 4.519763877111734e-06, "loss": 0.8508, "step": 4560 }, { "epoch": 0.141436361383071, "grad_norm": 2.5596837256391676, "learning_rate": 4.51894853564673e-06, "loss": 0.7567, "step": 4565 }, { "epoch": 0.14159127525096046, "grad_norm": 2.9198839299877375, "learning_rate": 4.518133194181723e-06, "loss": 0.7555, "step": 4570 }, { "epoch": 0.14174618911884992, "grad_norm": 3.1669321073655703, "learning_rate": 4.5173178527167185e-06, "loss": 0.7749, "step": 4575 }, { "epoch": 0.14190110298673939, "grad_norm": 2.4731141037065525, "learning_rate": 4.516502511251712e-06, "loss": 0.8026, "step": 4580 }, { "epoch": 0.14205601685462882, "grad_norm": 4.919003451473519, "learning_rate": 4.5156871697867074e-06, "loss": 0.7401, "step": 4585 }, { "epoch": 0.14221093072251828, "grad_norm": 2.8879336151231234, "learning_rate": 4.514871828321701e-06, "loss": 0.808, "step": 4590 }, { "epoch": 0.14236584459040774, "grad_norm": 2.8092099674851365, "learning_rate": 4.514056486856696e-06, "loss": 0.6932, "step": 4595 }, { "epoch": 0.14252075845829718, "grad_norm": 2.312078772403807, "learning_rate": 4.51324114539169e-06, "loss": 0.7383, "step": 4600 }, { "epoch": 0.14267567232618664, "grad_norm": 4.096659116544071, "learning_rate": 4.512425803926685e-06, "loss": 0.7439, "step": 4605 }, { "epoch": 0.1428305861940761, "grad_norm": 2.7555908254771655, "learning_rate": 4.511610462461679e-06, "loss": 0.7564, "step": 4610 }, { "epoch": 0.14298550006196556, "grad_norm": 2.9289314056632296, "learning_rate": 4.510795120996674e-06, "loss": 0.7365, "step": 4615 }, { "epoch": 0.143140413929855, "grad_norm": 4.4852593871127615, "learning_rate": 4.509979779531668e-06, "loss": 0.8081, "step": 4620 }, { "epoch": 0.14329532779774445, "grad_norm": 2.7059738205366277, "learning_rate": 4.509164438066663e-06, "loss": 0.7617, "step": 4625 }, { "epoch": 0.14345024166563392, "grad_norm": 3.2701537846386404, "learning_rate": 4.508349096601657e-06, "loss": 0.7743, "step": 4630 }, { "epoch": 0.14360515553352335, "grad_norm": 2.6404674137108017, "learning_rate": 4.507533755136651e-06, "loss": 0.7585, "step": 4635 }, { "epoch": 0.1437600694014128, "grad_norm": 3.269717754198127, "learning_rate": 4.5067184136716465e-06, "loss": 0.7713, "step": 4640 }, { "epoch": 0.14391498326930227, "grad_norm": 3.115259508321054, "learning_rate": 4.50590307220664e-06, "loss": 0.6827, "step": 4645 }, { "epoch": 0.14406989713719173, "grad_norm": 4.663498857080817, "learning_rate": 4.505087730741635e-06, "loss": 0.7658, "step": 4650 }, { "epoch": 0.14422481100508117, "grad_norm": 2.893990593080853, "learning_rate": 4.504272389276629e-06, "loss": 0.8228, "step": 4655 }, { "epoch": 0.14437972487297063, "grad_norm": 3.0293287097714736, "learning_rate": 4.503457047811624e-06, "loss": 0.86, "step": 4660 }, { "epoch": 0.1445346387408601, "grad_norm": 2.5393741337277356, "learning_rate": 4.502641706346618e-06, "loss": 0.7903, "step": 4665 }, { "epoch": 0.14468955260874952, "grad_norm": 2.7041505511527246, "learning_rate": 4.501826364881613e-06, "loss": 0.7804, "step": 4670 }, { "epoch": 0.14484446647663898, "grad_norm": 2.6196273707975677, "learning_rate": 4.501011023416607e-06, "loss": 0.7528, "step": 4675 }, { "epoch": 0.14499938034452844, "grad_norm": 3.0525499831833565, "learning_rate": 4.500195681951602e-06, "loss": 0.8157, "step": 4680 }, { "epoch": 0.1451542942124179, "grad_norm": 3.6731119181939453, "learning_rate": 4.499380340486596e-06, "loss": 0.848, "step": 4685 }, { "epoch": 0.14530920808030734, "grad_norm": 3.4906785425698796, "learning_rate": 4.498564999021591e-06, "loss": 0.8036, "step": 4690 }, { "epoch": 0.1454641219481968, "grad_norm": 2.464687489547642, "learning_rate": 4.497749657556585e-06, "loss": 0.7442, "step": 4695 }, { "epoch": 0.14561903581608626, "grad_norm": 2.6943365447198304, "learning_rate": 4.49693431609158e-06, "loss": 0.7043, "step": 4700 }, { "epoch": 0.14577394968397572, "grad_norm": 2.937776461871057, "learning_rate": 4.496118974626574e-06, "loss": 0.784, "step": 4705 }, { "epoch": 0.14592886355186516, "grad_norm": 3.1236716733567884, "learning_rate": 4.495303633161568e-06, "loss": 0.7764, "step": 4710 }, { "epoch": 0.14608377741975462, "grad_norm": 2.76442872763958, "learning_rate": 4.4944882916965625e-06, "loss": 0.7537, "step": 4715 }, { "epoch": 0.14623869128764408, "grad_norm": 3.1750132009474683, "learning_rate": 4.493672950231557e-06, "loss": 0.7295, "step": 4720 }, { "epoch": 0.1463936051555335, "grad_norm": 2.446076972471536, "learning_rate": 4.4928576087665514e-06, "loss": 0.7221, "step": 4725 }, { "epoch": 0.14654851902342297, "grad_norm": 2.642514287413422, "learning_rate": 4.492042267301546e-06, "loss": 0.7527, "step": 4730 }, { "epoch": 0.14670343289131244, "grad_norm": 4.087236786413229, "learning_rate": 4.491226925836541e-06, "loss": 0.8009, "step": 4735 }, { "epoch": 0.1468583467592019, "grad_norm": 4.236180928161451, "learning_rate": 4.490411584371535e-06, "loss": 0.8291, "step": 4740 }, { "epoch": 0.14701326062709133, "grad_norm": 2.904325463188587, "learning_rate": 4.48959624290653e-06, "loss": 0.8134, "step": 4745 }, { "epoch": 0.1471681744949808, "grad_norm": 2.9616286002295733, "learning_rate": 4.488780901441524e-06, "loss": 0.8599, "step": 4750 }, { "epoch": 0.14732308836287025, "grad_norm": 2.6319304965004697, "learning_rate": 4.487965559976519e-06, "loss": 0.8094, "step": 4755 }, { "epoch": 0.14747800223075969, "grad_norm": 4.07742968558367, "learning_rate": 4.487150218511513e-06, "loss": 0.8251, "step": 4760 }, { "epoch": 0.14763291609864915, "grad_norm": 2.759313188624533, "learning_rate": 4.486334877046508e-06, "loss": 0.7509, "step": 4765 }, { "epoch": 0.1477878299665386, "grad_norm": 3.155971032987636, "learning_rate": 4.4855195355815016e-06, "loss": 0.735, "step": 4770 }, { "epoch": 0.14794274383442807, "grad_norm": 3.182772857016518, "learning_rate": 4.484704194116497e-06, "loss": 0.7629, "step": 4775 }, { "epoch": 0.1480976577023175, "grad_norm": 2.4195653351751365, "learning_rate": 4.4838888526514905e-06, "loss": 0.7036, "step": 4780 }, { "epoch": 0.14825257157020696, "grad_norm": 2.775332523335011, "learning_rate": 4.483073511186485e-06, "loss": 0.7672, "step": 4785 }, { "epoch": 0.14840748543809643, "grad_norm": 4.6580652991722165, "learning_rate": 4.482258169721479e-06, "loss": 0.8265, "step": 4790 }, { "epoch": 0.14856239930598586, "grad_norm": 3.1183327219581094, "learning_rate": 4.481442828256474e-06, "loss": 0.6981, "step": 4795 }, { "epoch": 0.14871731317387532, "grad_norm": 3.0031480189775546, "learning_rate": 4.480627486791468e-06, "loss": 0.7628, "step": 4800 }, { "epoch": 0.14887222704176478, "grad_norm": 3.7862167957458195, "learning_rate": 4.479812145326463e-06, "loss": 0.7045, "step": 4805 }, { "epoch": 0.14902714090965424, "grad_norm": 4.043437062690894, "learning_rate": 4.478996803861457e-06, "loss": 0.8104, "step": 4810 }, { "epoch": 0.14918205477754368, "grad_norm": 2.419434448637485, "learning_rate": 4.478181462396452e-06, "loss": 0.7344, "step": 4815 }, { "epoch": 0.14933696864543314, "grad_norm": 3.5416140464428354, "learning_rate": 4.477366120931446e-06, "loss": 0.7905, "step": 4820 }, { "epoch": 0.1494918825133226, "grad_norm": 2.309395662170973, "learning_rate": 4.476550779466441e-06, "loss": 0.7535, "step": 4825 }, { "epoch": 0.14964679638121203, "grad_norm": 3.7712402286663096, "learning_rate": 4.475735438001436e-06, "loss": 0.773, "step": 4830 }, { "epoch": 0.1498017102491015, "grad_norm": 3.290237349014747, "learning_rate": 4.4749200965364295e-06, "loss": 0.775, "step": 4835 }, { "epoch": 0.14995662411699096, "grad_norm": 2.8381123950280034, "learning_rate": 4.474104755071425e-06, "loss": 0.7455, "step": 4840 }, { "epoch": 0.15011153798488042, "grad_norm": 3.531360992465647, "learning_rate": 4.4732894136064185e-06, "loss": 0.8675, "step": 4845 }, { "epoch": 0.15026645185276985, "grad_norm": 2.5490614176847184, "learning_rate": 4.472474072141414e-06, "loss": 0.7074, "step": 4850 }, { "epoch": 0.1504213657206593, "grad_norm": 2.7703540891167955, "learning_rate": 4.471658730676407e-06, "loss": 0.7503, "step": 4855 }, { "epoch": 0.15057627958854877, "grad_norm": 2.710583630125612, "learning_rate": 4.470843389211403e-06, "loss": 0.8428, "step": 4860 }, { "epoch": 0.15073119345643823, "grad_norm": 5.823514165158324, "learning_rate": 4.470028047746396e-06, "loss": 0.7159, "step": 4865 }, { "epoch": 0.15088610732432767, "grad_norm": 2.981151551231262, "learning_rate": 4.469212706281391e-06, "loss": 0.7575, "step": 4870 }, { "epoch": 0.15104102119221713, "grad_norm": 2.6482936505311, "learning_rate": 4.468397364816385e-06, "loss": 0.8399, "step": 4875 }, { "epoch": 0.1511959350601066, "grad_norm": 4.143014853095826, "learning_rate": 4.46758202335138e-06, "loss": 0.7764, "step": 4880 }, { "epoch": 0.15135084892799602, "grad_norm": 3.5767531769105316, "learning_rate": 4.466766681886374e-06, "loss": 0.7901, "step": 4885 }, { "epoch": 0.15150576279588548, "grad_norm": 2.659197429540348, "learning_rate": 4.465951340421369e-06, "loss": 0.8473, "step": 4890 }, { "epoch": 0.15166067666377495, "grad_norm": 3.1818096470168435, "learning_rate": 4.465135998956363e-06, "loss": 0.8025, "step": 4895 }, { "epoch": 0.1518155905316644, "grad_norm": 4.0049192141744125, "learning_rate": 4.4643206574913575e-06, "loss": 0.7822, "step": 4900 }, { "epoch": 0.15197050439955384, "grad_norm": 2.525842049753675, "learning_rate": 4.463505316026352e-06, "loss": 0.7036, "step": 4905 }, { "epoch": 0.1521254182674433, "grad_norm": 3.082845826282703, "learning_rate": 4.4626899745613464e-06, "loss": 0.7945, "step": 4910 }, { "epoch": 0.15228033213533276, "grad_norm": 2.9528498660114963, "learning_rate": 4.461874633096341e-06, "loss": 0.7102, "step": 4915 }, { "epoch": 0.1524352460032222, "grad_norm": 2.7238406708707608, "learning_rate": 4.461059291631335e-06, "loss": 0.7713, "step": 4920 }, { "epoch": 0.15259015987111166, "grad_norm": 3.5152563316781005, "learning_rate": 4.460243950166331e-06, "loss": 0.7167, "step": 4925 }, { "epoch": 0.15274507373900112, "grad_norm": 3.0327223967760952, "learning_rate": 4.459428608701324e-06, "loss": 0.7872, "step": 4930 }, { "epoch": 0.15289998760689058, "grad_norm": 2.8011688902161724, "learning_rate": 4.4586132672363196e-06, "loss": 0.7554, "step": 4935 }, { "epoch": 0.15305490147478001, "grad_norm": 3.5030835722643667, "learning_rate": 4.457797925771313e-06, "loss": 0.7922, "step": 4940 }, { "epoch": 0.15320981534266948, "grad_norm": 2.517788999938251, "learning_rate": 4.456982584306308e-06, "loss": 0.6681, "step": 4945 }, { "epoch": 0.15336472921055894, "grad_norm": 3.7211835196742618, "learning_rate": 4.456167242841302e-06, "loss": 0.7683, "step": 4950 }, { "epoch": 0.15351964307844837, "grad_norm": 4.6535180525952455, "learning_rate": 4.4553519013762966e-06, "loss": 0.7227, "step": 4955 }, { "epoch": 0.15367455694633783, "grad_norm": 2.238888438927339, "learning_rate": 4.454536559911291e-06, "loss": 0.7406, "step": 4960 }, { "epoch": 0.1538294708142273, "grad_norm": 3.269766325514787, "learning_rate": 4.4537212184462855e-06, "loss": 0.7689, "step": 4965 }, { "epoch": 0.15398438468211675, "grad_norm": 2.51366931565056, "learning_rate": 4.45290587698128e-06, "loss": 0.8015, "step": 4970 }, { "epoch": 0.1541392985500062, "grad_norm": 2.587032224838206, "learning_rate": 4.452090535516274e-06, "loss": 0.8043, "step": 4975 }, { "epoch": 0.15429421241789565, "grad_norm": 2.8065973553537784, "learning_rate": 4.451275194051269e-06, "loss": 0.7126, "step": 4980 }, { "epoch": 0.1544491262857851, "grad_norm": 3.0831930591197385, "learning_rate": 4.450459852586263e-06, "loss": 0.8605, "step": 4985 }, { "epoch": 0.15460404015367454, "grad_norm": 3.5085289983286847, "learning_rate": 4.449644511121258e-06, "loss": 0.7288, "step": 4990 }, { "epoch": 0.154758954021564, "grad_norm": 3.1554871250218115, "learning_rate": 4.448829169656252e-06, "loss": 0.7335, "step": 4995 }, { "epoch": 0.15491386788945347, "grad_norm": 3.529311781375021, "learning_rate": 4.448013828191247e-06, "loss": 0.8361, "step": 5000 }, { "epoch": 0.15506878175734293, "grad_norm": 3.144176642625326, "learning_rate": 4.447198486726241e-06, "loss": 0.7809, "step": 5005 }, { "epoch": 0.15522369562523236, "grad_norm": 2.1511748044298273, "learning_rate": 4.4463831452612365e-06, "loss": 0.7331, "step": 5010 }, { "epoch": 0.15537860949312182, "grad_norm": 2.766400885148497, "learning_rate": 4.44556780379623e-06, "loss": 0.8694, "step": 5015 }, { "epoch": 0.15553352336101128, "grad_norm": 3.254540124211028, "learning_rate": 4.4447524623312245e-06, "loss": 0.7697, "step": 5020 }, { "epoch": 0.15568843722890074, "grad_norm": 3.1104658753974808, "learning_rate": 4.443937120866219e-06, "loss": 0.8581, "step": 5025 }, { "epoch": 0.15584335109679018, "grad_norm": 2.976617634478191, "learning_rate": 4.4431217794012134e-06, "loss": 0.7024, "step": 5030 }, { "epoch": 0.15599826496467964, "grad_norm": 3.706088052991768, "learning_rate": 4.442306437936208e-06, "loss": 0.7521, "step": 5035 }, { "epoch": 0.1561531788325691, "grad_norm": 2.8761187684137757, "learning_rate": 4.441491096471202e-06, "loss": 0.6959, "step": 5040 }, { "epoch": 0.15630809270045853, "grad_norm": 2.257795295311228, "learning_rate": 4.440675755006197e-06, "loss": 0.7909, "step": 5045 }, { "epoch": 0.156463006568348, "grad_norm": 5.026983305280436, "learning_rate": 4.439860413541191e-06, "loss": 0.7201, "step": 5050 }, { "epoch": 0.15661792043623746, "grad_norm": 3.457801182072693, "learning_rate": 4.439045072076186e-06, "loss": 0.7711, "step": 5055 }, { "epoch": 0.15677283430412692, "grad_norm": 4.7062749956477665, "learning_rate": 4.43822973061118e-06, "loss": 0.7284, "step": 5060 }, { "epoch": 0.15692774817201635, "grad_norm": 3.060627052648756, "learning_rate": 4.437414389146175e-06, "loss": 0.8415, "step": 5065 }, { "epoch": 0.1570826620399058, "grad_norm": 3.267993832618697, "learning_rate": 4.436599047681169e-06, "loss": 0.8429, "step": 5070 }, { "epoch": 0.15723757590779527, "grad_norm": 2.8926074695297235, "learning_rate": 4.435783706216164e-06, "loss": 0.7161, "step": 5075 }, { "epoch": 0.1573924897756847, "grad_norm": 4.844514749944251, "learning_rate": 4.434968364751158e-06, "loss": 0.8176, "step": 5080 }, { "epoch": 0.15754740364357417, "grad_norm": 3.586708262293216, "learning_rate": 4.4341530232861525e-06, "loss": 0.7735, "step": 5085 }, { "epoch": 0.15770231751146363, "grad_norm": 3.6298349862977783, "learning_rate": 4.433337681821147e-06, "loss": 0.7388, "step": 5090 }, { "epoch": 0.1578572313793531, "grad_norm": 2.843871008849915, "learning_rate": 4.432522340356141e-06, "loss": 0.7662, "step": 5095 }, { "epoch": 0.15801214524724252, "grad_norm": 3.831553723441914, "learning_rate": 4.431706998891136e-06, "loss": 0.7595, "step": 5100 }, { "epoch": 0.15816705911513199, "grad_norm": 2.9751096248231232, "learning_rate": 4.43089165742613e-06, "loss": 0.8587, "step": 5105 }, { "epoch": 0.15832197298302145, "grad_norm": 2.9991686304480454, "learning_rate": 4.430076315961125e-06, "loss": 0.7381, "step": 5110 }, { "epoch": 0.15847688685091088, "grad_norm": 2.788960612015319, "learning_rate": 4.429260974496119e-06, "loss": 0.8146, "step": 5115 }, { "epoch": 0.15863180071880034, "grad_norm": 2.79330745712249, "learning_rate": 4.428445633031114e-06, "loss": 0.766, "step": 5120 }, { "epoch": 0.1587867145866898, "grad_norm": 3.0838507274737132, "learning_rate": 4.427630291566108e-06, "loss": 0.7721, "step": 5125 }, { "epoch": 0.15894162845457926, "grad_norm": 2.971927625419794, "learning_rate": 4.426814950101103e-06, "loss": 0.8539, "step": 5130 }, { "epoch": 0.1590965423224687, "grad_norm": 2.849768668576067, "learning_rate": 4.425999608636097e-06, "loss": 0.7672, "step": 5135 }, { "epoch": 0.15925145619035816, "grad_norm": 2.6397877569390666, "learning_rate": 4.4251842671710915e-06, "loss": 0.7228, "step": 5140 }, { "epoch": 0.15940637005824762, "grad_norm": 2.7655310766975494, "learning_rate": 4.424368925706086e-06, "loss": 0.8287, "step": 5145 }, { "epoch": 0.15956128392613705, "grad_norm": 5.439054395092519, "learning_rate": 4.4235535842410805e-06, "loss": 0.746, "step": 5150 }, { "epoch": 0.15971619779402652, "grad_norm": 2.7341184515664008, "learning_rate": 4.422738242776075e-06, "loss": 0.8038, "step": 5155 }, { "epoch": 0.15987111166191598, "grad_norm": 2.514908009582635, "learning_rate": 4.421922901311069e-06, "loss": 0.7549, "step": 5160 }, { "epoch": 0.16002602552980544, "grad_norm": 2.8566274337663113, "learning_rate": 4.421107559846064e-06, "loss": 0.7567, "step": 5165 }, { "epoch": 0.16018093939769487, "grad_norm": 3.5763257237293917, "learning_rate": 4.420292218381058e-06, "loss": 0.8166, "step": 5170 }, { "epoch": 0.16033585326558433, "grad_norm": 2.449691046911433, "learning_rate": 4.419476876916053e-06, "loss": 0.6901, "step": 5175 }, { "epoch": 0.1604907671334738, "grad_norm": 3.3599885366746416, "learning_rate": 4.418661535451047e-06, "loss": 0.7715, "step": 5180 }, { "epoch": 0.16064568100136326, "grad_norm": 2.673686962922106, "learning_rate": 4.417846193986042e-06, "loss": 0.804, "step": 5185 }, { "epoch": 0.1608005948692527, "grad_norm": 2.1548834069611513, "learning_rate": 4.417030852521036e-06, "loss": 0.8402, "step": 5190 }, { "epoch": 0.16095550873714215, "grad_norm": 2.972864853723168, "learning_rate": 4.416215511056031e-06, "loss": 0.7266, "step": 5195 }, { "epoch": 0.1611104226050316, "grad_norm": 2.472652809174208, "learning_rate": 4.415400169591025e-06, "loss": 0.7693, "step": 5200 }, { "epoch": 0.16126533647292104, "grad_norm": 2.574309439785317, "learning_rate": 4.4145848281260195e-06, "loss": 0.757, "step": 5205 }, { "epoch": 0.1614202503408105, "grad_norm": 5.159660869261577, "learning_rate": 4.413769486661014e-06, "loss": 0.76, "step": 5210 }, { "epoch": 0.16157516420869997, "grad_norm": 2.863916060657096, "learning_rate": 4.4129541451960084e-06, "loss": 0.8213, "step": 5215 }, { "epoch": 0.16173007807658943, "grad_norm": 2.487750755970028, "learning_rate": 4.412138803731003e-06, "loss": 0.7709, "step": 5220 }, { "epoch": 0.16188499194447886, "grad_norm": 2.9567885758738846, "learning_rate": 4.411323462265997e-06, "loss": 0.7687, "step": 5225 }, { "epoch": 0.16203990581236832, "grad_norm": 3.252294538989296, "learning_rate": 4.410508120800992e-06, "loss": 0.7376, "step": 5230 }, { "epoch": 0.16219481968025778, "grad_norm": 2.6962281558586945, "learning_rate": 4.409692779335986e-06, "loss": 0.8635, "step": 5235 }, { "epoch": 0.16234973354814722, "grad_norm": 3.2763306421031997, "learning_rate": 4.408877437870981e-06, "loss": 0.8371, "step": 5240 }, { "epoch": 0.16250464741603668, "grad_norm": 2.948823864061083, "learning_rate": 4.408062096405975e-06, "loss": 0.7641, "step": 5245 }, { "epoch": 0.16265956128392614, "grad_norm": 3.0716042722456827, "learning_rate": 4.40724675494097e-06, "loss": 0.8436, "step": 5250 }, { "epoch": 0.1628144751518156, "grad_norm": 2.985779731678258, "learning_rate": 4.406431413475964e-06, "loss": 0.8283, "step": 5255 }, { "epoch": 0.16296938901970504, "grad_norm": 3.305214874629326, "learning_rate": 4.4056160720109586e-06, "loss": 0.8251, "step": 5260 }, { "epoch": 0.1631243028875945, "grad_norm": 3.0251346666850285, "learning_rate": 4.404800730545953e-06, "loss": 0.7444, "step": 5265 }, { "epoch": 0.16327921675548396, "grad_norm": 2.15089776695814, "learning_rate": 4.4039853890809475e-06, "loss": 0.7967, "step": 5270 }, { "epoch": 0.1634341306233734, "grad_norm": 2.601500356542074, "learning_rate": 4.403170047615942e-06, "loss": 0.6868, "step": 5275 }, { "epoch": 0.16358904449126285, "grad_norm": 2.8304180380552095, "learning_rate": 4.402354706150936e-06, "loss": 0.666, "step": 5280 }, { "epoch": 0.16374395835915231, "grad_norm": 2.7936943745608094, "learning_rate": 4.401539364685931e-06, "loss": 0.7782, "step": 5285 }, { "epoch": 0.16389887222704178, "grad_norm": 3.510362156889563, "learning_rate": 4.400724023220925e-06, "loss": 0.8211, "step": 5290 }, { "epoch": 0.1640537860949312, "grad_norm": 2.5936131907576825, "learning_rate": 4.39990868175592e-06, "loss": 0.7676, "step": 5295 }, { "epoch": 0.16420869996282067, "grad_norm": 2.7103507029146714, "learning_rate": 4.399093340290914e-06, "loss": 0.8507, "step": 5300 }, { "epoch": 0.16436361383071013, "grad_norm": 2.637921361943342, "learning_rate": 4.398277998825909e-06, "loss": 0.7976, "step": 5305 }, { "epoch": 0.16451852769859956, "grad_norm": 2.654007164198866, "learning_rate": 4.397462657360903e-06, "loss": 0.8172, "step": 5310 }, { "epoch": 0.16467344156648903, "grad_norm": 2.383891285896202, "learning_rate": 4.396647315895898e-06, "loss": 0.7848, "step": 5315 }, { "epoch": 0.1648283554343785, "grad_norm": 3.6784706876022644, "learning_rate": 4.395831974430892e-06, "loss": 0.8046, "step": 5320 }, { "epoch": 0.16498326930226795, "grad_norm": 2.762253432540864, "learning_rate": 4.3950166329658865e-06, "loss": 0.7621, "step": 5325 }, { "epoch": 0.16513818317015738, "grad_norm": 3.5952600185641597, "learning_rate": 4.394201291500881e-06, "loss": 0.6899, "step": 5330 }, { "epoch": 0.16529309703804684, "grad_norm": 5.018987496068969, "learning_rate": 4.3933859500358755e-06, "loss": 0.6842, "step": 5335 }, { "epoch": 0.1654480109059363, "grad_norm": 2.5154644134421886, "learning_rate": 4.39257060857087e-06, "loss": 0.7501, "step": 5340 }, { "epoch": 0.16560292477382577, "grad_norm": 3.358719465740722, "learning_rate": 4.391755267105864e-06, "loss": 0.8159, "step": 5345 }, { "epoch": 0.1657578386417152, "grad_norm": 2.6932317275695628, "learning_rate": 4.390939925640859e-06, "loss": 0.7507, "step": 5350 }, { "epoch": 0.16591275250960466, "grad_norm": 2.6492810857046316, "learning_rate": 4.390124584175853e-06, "loss": 0.7361, "step": 5355 }, { "epoch": 0.16606766637749412, "grad_norm": 2.8271477278169117, "learning_rate": 4.389309242710848e-06, "loss": 0.7949, "step": 5360 }, { "epoch": 0.16622258024538356, "grad_norm": 3.2528982108765807, "learning_rate": 4.388493901245842e-06, "loss": 0.7236, "step": 5365 }, { "epoch": 0.16637749411327302, "grad_norm": 2.397398024127316, "learning_rate": 4.387678559780837e-06, "loss": 0.656, "step": 5370 }, { "epoch": 0.16653240798116248, "grad_norm": 3.1886413138428775, "learning_rate": 4.386863218315831e-06, "loss": 0.8081, "step": 5375 }, { "epoch": 0.16668732184905194, "grad_norm": 2.391369547546912, "learning_rate": 4.386047876850826e-06, "loss": 0.7797, "step": 5380 }, { "epoch": 0.16684223571694137, "grad_norm": 2.6033987841255293, "learning_rate": 4.38523253538582e-06, "loss": 0.7269, "step": 5385 }, { "epoch": 0.16699714958483083, "grad_norm": 3.1509058541002033, "learning_rate": 4.3844171939208145e-06, "loss": 0.7827, "step": 5390 }, { "epoch": 0.1671520634527203, "grad_norm": 2.6108250218954216, "learning_rate": 4.383601852455809e-06, "loss": 0.7183, "step": 5395 }, { "epoch": 0.16730697732060973, "grad_norm": 2.968745082474728, "learning_rate": 4.382786510990803e-06, "loss": 0.7534, "step": 5400 }, { "epoch": 0.1674618911884992, "grad_norm": 2.7662738897009054, "learning_rate": 4.381971169525798e-06, "loss": 0.7506, "step": 5405 }, { "epoch": 0.16761680505638865, "grad_norm": 2.5957002100901145, "learning_rate": 4.381155828060792e-06, "loss": 0.6352, "step": 5410 }, { "epoch": 0.1677717189242781, "grad_norm": 4.166429994392841, "learning_rate": 4.380340486595787e-06, "loss": 0.7571, "step": 5415 }, { "epoch": 0.16792663279216755, "grad_norm": 2.4987347006593903, "learning_rate": 4.379525145130781e-06, "loss": 0.7605, "step": 5420 }, { "epoch": 0.168081546660057, "grad_norm": 2.4244994569930753, "learning_rate": 4.378709803665776e-06, "loss": 0.7163, "step": 5425 }, { "epoch": 0.16823646052794647, "grad_norm": 4.84999396582533, "learning_rate": 4.37789446220077e-06, "loss": 0.7859, "step": 5430 }, { "epoch": 0.1683913743958359, "grad_norm": 2.8480144302092234, "learning_rate": 4.377079120735765e-06, "loss": 0.8193, "step": 5435 }, { "epoch": 0.16854628826372536, "grad_norm": 3.3082568789001563, "learning_rate": 4.376263779270759e-06, "loss": 0.8468, "step": 5440 }, { "epoch": 0.16870120213161482, "grad_norm": 6.623437151895753, "learning_rate": 4.3754484378057536e-06, "loss": 0.6768, "step": 5445 }, { "epoch": 0.16885611599950429, "grad_norm": 2.5014595541055953, "learning_rate": 4.374633096340748e-06, "loss": 0.7416, "step": 5450 }, { "epoch": 0.16901102986739372, "grad_norm": 3.6382968399888065, "learning_rate": 4.3738177548757425e-06, "loss": 0.8196, "step": 5455 }, { "epoch": 0.16916594373528318, "grad_norm": 2.9994564596466997, "learning_rate": 4.373002413410737e-06, "loss": 0.7726, "step": 5460 }, { "epoch": 0.16932085760317264, "grad_norm": 2.855693884939994, "learning_rate": 4.372187071945731e-06, "loss": 0.7834, "step": 5465 }, { "epoch": 0.1694757714710621, "grad_norm": 2.3984859945137327, "learning_rate": 4.371371730480726e-06, "loss": 0.7322, "step": 5470 }, { "epoch": 0.16963068533895154, "grad_norm": 2.9002376192309747, "learning_rate": 4.3705563890157195e-06, "loss": 0.7642, "step": 5475 }, { "epoch": 0.169785599206841, "grad_norm": 3.5470189351872534, "learning_rate": 4.369741047550715e-06, "loss": 0.7458, "step": 5480 }, { "epoch": 0.16994051307473046, "grad_norm": 3.5272641078662996, "learning_rate": 4.368925706085709e-06, "loss": 0.7591, "step": 5485 }, { "epoch": 0.1700954269426199, "grad_norm": 3.2789573590509375, "learning_rate": 4.368110364620704e-06, "loss": 0.8289, "step": 5490 }, { "epoch": 0.17025034081050935, "grad_norm": 3.2507862494256514, "learning_rate": 4.367295023155698e-06, "loss": 0.7415, "step": 5495 }, { "epoch": 0.17040525467839882, "grad_norm": 3.042261010549591, "learning_rate": 4.366479681690693e-06, "loss": 0.6794, "step": 5500 }, { "epoch": 0.17056016854628828, "grad_norm": 3.0441170392729484, "learning_rate": 4.365664340225687e-06, "loss": 0.802, "step": 5505 }, { "epoch": 0.1707150824141777, "grad_norm": 3.068583750521985, "learning_rate": 4.3648489987606815e-06, "loss": 0.8123, "step": 5510 }, { "epoch": 0.17086999628206717, "grad_norm": 3.3431821744712242, "learning_rate": 4.364033657295676e-06, "loss": 0.7436, "step": 5515 }, { "epoch": 0.17102491014995663, "grad_norm": 3.146531447880595, "learning_rate": 4.3632183158306704e-06, "loss": 0.8518, "step": 5520 }, { "epoch": 0.17117982401784607, "grad_norm": 3.028181382582069, "learning_rate": 4.362402974365665e-06, "loss": 0.818, "step": 5525 }, { "epoch": 0.17133473788573553, "grad_norm": 2.5109900565236205, "learning_rate": 4.361587632900659e-06, "loss": 0.8293, "step": 5530 }, { "epoch": 0.171489651753625, "grad_norm": 2.455315801545713, "learning_rate": 4.360772291435654e-06, "loss": 0.7109, "step": 5535 }, { "epoch": 0.17164456562151445, "grad_norm": 4.747454903437599, "learning_rate": 4.359956949970648e-06, "loss": 0.7798, "step": 5540 }, { "epoch": 0.17179947948940388, "grad_norm": 2.100577649738655, "learning_rate": 4.359141608505643e-06, "loss": 0.8247, "step": 5545 }, { "epoch": 0.17195439335729334, "grad_norm": 2.5813865540958933, "learning_rate": 4.358326267040636e-06, "loss": 0.7836, "step": 5550 }, { "epoch": 0.1721093072251828, "grad_norm": 3.009997055337116, "learning_rate": 4.357510925575632e-06, "loss": 0.7491, "step": 5555 }, { "epoch": 0.17226422109307224, "grad_norm": 2.742089863193226, "learning_rate": 4.356695584110625e-06, "loss": 0.8101, "step": 5560 }, { "epoch": 0.1724191349609617, "grad_norm": 3.239812651264049, "learning_rate": 4.3558802426456206e-06, "loss": 0.8572, "step": 5565 }, { "epoch": 0.17257404882885116, "grad_norm": 2.8301286766348124, "learning_rate": 4.355064901180614e-06, "loss": 0.8001, "step": 5570 }, { "epoch": 0.17272896269674062, "grad_norm": 4.285469792257762, "learning_rate": 4.3542495597156095e-06, "loss": 0.6794, "step": 5575 }, { "epoch": 0.17288387656463006, "grad_norm": 4.187424392243814, "learning_rate": 4.353434218250604e-06, "loss": 0.7731, "step": 5580 }, { "epoch": 0.17303879043251952, "grad_norm": 2.4096467820007423, "learning_rate": 4.352618876785598e-06, "loss": 0.718, "step": 5585 }, { "epoch": 0.17319370430040898, "grad_norm": 2.740244733808198, "learning_rate": 4.351803535320593e-06, "loss": 0.7077, "step": 5590 }, { "epoch": 0.1733486181682984, "grad_norm": 3.6143235365026234, "learning_rate": 4.350988193855587e-06, "loss": 0.7544, "step": 5595 }, { "epoch": 0.17350353203618787, "grad_norm": 3.0961231003560004, "learning_rate": 4.350172852390582e-06, "loss": 0.8031, "step": 5600 }, { "epoch": 0.17365844590407734, "grad_norm": 2.599819075128163, "learning_rate": 4.349357510925576e-06, "loss": 0.7234, "step": 5605 }, { "epoch": 0.1738133597719668, "grad_norm": 2.4312728367624374, "learning_rate": 4.348542169460571e-06, "loss": 0.659, "step": 5610 }, { "epoch": 0.17396827363985623, "grad_norm": 3.0403929641496914, "learning_rate": 4.347726827995565e-06, "loss": 0.836, "step": 5615 }, { "epoch": 0.1741231875077457, "grad_norm": 2.312291571103329, "learning_rate": 4.34691148653056e-06, "loss": 0.722, "step": 5620 }, { "epoch": 0.17427810137563515, "grad_norm": 3.525762747165484, "learning_rate": 4.346096145065554e-06, "loss": 0.6804, "step": 5625 }, { "epoch": 0.17443301524352461, "grad_norm": 2.143732270375045, "learning_rate": 4.3452808036005485e-06, "loss": 0.7546, "step": 5630 }, { "epoch": 0.17458792911141405, "grad_norm": 2.6989853012802034, "learning_rate": 4.344465462135542e-06, "loss": 0.7544, "step": 5635 }, { "epoch": 0.1747428429793035, "grad_norm": 3.2439633275706155, "learning_rate": 4.3436501206705375e-06, "loss": 0.6925, "step": 5640 }, { "epoch": 0.17489775684719297, "grad_norm": 3.176404364504209, "learning_rate": 4.342834779205531e-06, "loss": 0.7334, "step": 5645 }, { "epoch": 0.1750526707150824, "grad_norm": 2.3948145331467696, "learning_rate": 4.342019437740526e-06, "loss": 0.7637, "step": 5650 }, { "epoch": 0.17520758458297186, "grad_norm": 2.4006200028063627, "learning_rate": 4.34120409627552e-06, "loss": 0.8439, "step": 5655 }, { "epoch": 0.17536249845086133, "grad_norm": 2.9668054582752807, "learning_rate": 4.340388754810515e-06, "loss": 0.7357, "step": 5660 }, { "epoch": 0.1755174123187508, "grad_norm": 3.7390510238181394, "learning_rate": 4.339573413345509e-06, "loss": 0.8933, "step": 5665 }, { "epoch": 0.17567232618664022, "grad_norm": 2.912943589930505, "learning_rate": 4.338758071880504e-06, "loss": 0.7448, "step": 5670 }, { "epoch": 0.17582724005452968, "grad_norm": 6.139487539917326, "learning_rate": 4.337942730415499e-06, "loss": 0.7802, "step": 5675 }, { "epoch": 0.17598215392241914, "grad_norm": 2.7024167119136107, "learning_rate": 4.337127388950493e-06, "loss": 0.7282, "step": 5680 }, { "epoch": 0.17613706779030858, "grad_norm": 3.393339882960223, "learning_rate": 4.336312047485488e-06, "loss": 0.8332, "step": 5685 }, { "epoch": 0.17629198165819804, "grad_norm": 2.3959302974967143, "learning_rate": 4.335496706020482e-06, "loss": 0.6814, "step": 5690 }, { "epoch": 0.1764468955260875, "grad_norm": 2.8184611753163793, "learning_rate": 4.3346813645554765e-06, "loss": 0.7362, "step": 5695 }, { "epoch": 0.17660180939397696, "grad_norm": 2.9982350219620635, "learning_rate": 4.333866023090471e-06, "loss": 0.7621, "step": 5700 }, { "epoch": 0.1767567232618664, "grad_norm": 2.76458626575282, "learning_rate": 4.3330506816254654e-06, "loss": 0.7364, "step": 5705 }, { "epoch": 0.17691163712975586, "grad_norm": 2.709602347276792, "learning_rate": 4.332235340160459e-06, "loss": 0.7786, "step": 5710 }, { "epoch": 0.17706655099764532, "grad_norm": 3.110364527424863, "learning_rate": 4.331419998695454e-06, "loss": 0.806, "step": 5715 }, { "epoch": 0.17722146486553475, "grad_norm": 2.8471232242707303, "learning_rate": 4.330604657230448e-06, "loss": 0.7704, "step": 5720 }, { "epoch": 0.1773763787334242, "grad_norm": 2.35496454943872, "learning_rate": 4.329789315765443e-06, "loss": 0.7295, "step": 5725 }, { "epoch": 0.17753129260131367, "grad_norm": 3.466860703518759, "learning_rate": 4.328973974300437e-06, "loss": 0.857, "step": 5730 }, { "epoch": 0.17768620646920313, "grad_norm": 3.588308813555836, "learning_rate": 4.328158632835432e-06, "loss": 0.7529, "step": 5735 }, { "epoch": 0.17784112033709257, "grad_norm": 2.576584521200645, "learning_rate": 4.327343291370426e-06, "loss": 0.7184, "step": 5740 }, { "epoch": 0.17799603420498203, "grad_norm": 3.3839733408671755, "learning_rate": 4.326527949905421e-06, "loss": 0.8317, "step": 5745 }, { "epoch": 0.1781509480728715, "grad_norm": 2.8489572100686065, "learning_rate": 4.325712608440415e-06, "loss": 0.7811, "step": 5750 }, { "epoch": 0.17830586194076092, "grad_norm": 2.3016969676850816, "learning_rate": 4.32489726697541e-06, "loss": 0.8077, "step": 5755 }, { "epoch": 0.17846077580865038, "grad_norm": 2.6473969426032435, "learning_rate": 4.324081925510404e-06, "loss": 0.7326, "step": 5760 }, { "epoch": 0.17861568967653985, "grad_norm": 2.3514810728766906, "learning_rate": 4.323266584045399e-06, "loss": 0.7253, "step": 5765 }, { "epoch": 0.1787706035444293, "grad_norm": 2.530963839325224, "learning_rate": 4.322451242580393e-06, "loss": 0.7626, "step": 5770 }, { "epoch": 0.17892551741231874, "grad_norm": 3.806338295545008, "learning_rate": 4.321635901115388e-06, "loss": 0.7717, "step": 5775 }, { "epoch": 0.1790804312802082, "grad_norm": 2.453175524352636, "learning_rate": 4.320820559650382e-06, "loss": 0.746, "step": 5780 }, { "epoch": 0.17923534514809766, "grad_norm": 2.5338263184362546, "learning_rate": 4.320005218185376e-06, "loss": 0.7403, "step": 5785 }, { "epoch": 0.17939025901598712, "grad_norm": 4.7157155230798224, "learning_rate": 4.319189876720371e-06, "loss": 0.8018, "step": 5790 }, { "epoch": 0.17954517288387656, "grad_norm": 2.279234508154891, "learning_rate": 4.318374535255365e-06, "loss": 0.7172, "step": 5795 }, { "epoch": 0.17970008675176602, "grad_norm": 3.324015869184283, "learning_rate": 4.31755919379036e-06, "loss": 0.7651, "step": 5800 }, { "epoch": 0.17985500061965548, "grad_norm": 2.875874761430461, "learning_rate": 4.316743852325354e-06, "loss": 0.7918, "step": 5805 }, { "epoch": 0.18000991448754491, "grad_norm": 2.799047827516752, "learning_rate": 4.315928510860349e-06, "loss": 0.8567, "step": 5810 }, { "epoch": 0.18016482835543438, "grad_norm": 3.5994752080833035, "learning_rate": 4.315113169395343e-06, "loss": 0.8005, "step": 5815 }, { "epoch": 0.18031974222332384, "grad_norm": 3.131332831279646, "learning_rate": 4.314297827930338e-06, "loss": 0.8345, "step": 5820 }, { "epoch": 0.1804746560912133, "grad_norm": 2.5037049179512714, "learning_rate": 4.313482486465332e-06, "loss": 0.769, "step": 5825 }, { "epoch": 0.18062956995910273, "grad_norm": 3.2519297331905714, "learning_rate": 4.312667145000327e-06, "loss": 0.749, "step": 5830 }, { "epoch": 0.1807844838269922, "grad_norm": 2.568149592331942, "learning_rate": 4.3118518035353205e-06, "loss": 0.8047, "step": 5835 }, { "epoch": 0.18093939769488165, "grad_norm": 2.295998710413109, "learning_rate": 4.311036462070316e-06, "loss": 0.7115, "step": 5840 }, { "epoch": 0.1810943115627711, "grad_norm": 3.1568418301685695, "learning_rate": 4.3102211206053094e-06, "loss": 0.7315, "step": 5845 }, { "epoch": 0.18124922543066055, "grad_norm": 2.519370797939504, "learning_rate": 4.309405779140305e-06, "loss": 0.7175, "step": 5850 }, { "epoch": 0.18140413929855, "grad_norm": 3.7255813195382825, "learning_rate": 4.308590437675298e-06, "loss": 0.8089, "step": 5855 }, { "epoch": 0.18155905316643947, "grad_norm": 2.938225237425983, "learning_rate": 4.307775096210293e-06, "loss": 0.7684, "step": 5860 }, { "epoch": 0.1817139670343289, "grad_norm": 2.4908234666847635, "learning_rate": 4.306959754745288e-06, "loss": 0.755, "step": 5865 }, { "epoch": 0.18186888090221837, "grad_norm": 2.374582585949276, "learning_rate": 4.306144413280282e-06, "loss": 0.752, "step": 5870 }, { "epoch": 0.18202379477010783, "grad_norm": 2.2054101483221276, "learning_rate": 4.305329071815277e-06, "loss": 0.7166, "step": 5875 }, { "epoch": 0.18217870863799726, "grad_norm": 2.8163116382688194, "learning_rate": 4.304513730350271e-06, "loss": 0.7628, "step": 5880 }, { "epoch": 0.18233362250588672, "grad_norm": 2.103861122273291, "learning_rate": 4.303698388885266e-06, "loss": 0.6412, "step": 5885 }, { "epoch": 0.18248853637377618, "grad_norm": 2.800528987056633, "learning_rate": 4.3028830474202596e-06, "loss": 0.7638, "step": 5890 }, { "epoch": 0.18264345024166564, "grad_norm": 2.598661739741656, "learning_rate": 4.302067705955255e-06, "loss": 0.678, "step": 5895 }, { "epoch": 0.18279836410955508, "grad_norm": 2.7132221824440763, "learning_rate": 4.3012523644902485e-06, "loss": 0.7796, "step": 5900 }, { "epoch": 0.18295327797744454, "grad_norm": 2.618950433672976, "learning_rate": 4.300437023025244e-06, "loss": 0.7245, "step": 5905 }, { "epoch": 0.183108191845334, "grad_norm": 3.638330467142296, "learning_rate": 4.299621681560237e-06, "loss": 0.7643, "step": 5910 }, { "epoch": 0.18326310571322343, "grad_norm": 4.335623344756079, "learning_rate": 4.298806340095233e-06, "loss": 0.8537, "step": 5915 }, { "epoch": 0.1834180195811129, "grad_norm": 3.2034907391049217, "learning_rate": 4.297990998630226e-06, "loss": 0.7107, "step": 5920 }, { "epoch": 0.18357293344900236, "grad_norm": 2.7536244654304443, "learning_rate": 4.297175657165222e-06, "loss": 0.8058, "step": 5925 }, { "epoch": 0.18372784731689182, "grad_norm": 2.294886387327097, "learning_rate": 4.296360315700215e-06, "loss": 0.7471, "step": 5930 }, { "epoch": 0.18388276118478125, "grad_norm": 3.031575265544021, "learning_rate": 4.29554497423521e-06, "loss": 0.7215, "step": 5935 }, { "epoch": 0.1840376750526707, "grad_norm": 2.3850471196327554, "learning_rate": 4.294729632770204e-06, "loss": 0.6967, "step": 5940 }, { "epoch": 0.18419258892056017, "grad_norm": 2.3945875687647664, "learning_rate": 4.293914291305199e-06, "loss": 0.6706, "step": 5945 }, { "epoch": 0.18434750278844964, "grad_norm": 2.595632941802879, "learning_rate": 4.293098949840193e-06, "loss": 0.7621, "step": 5950 }, { "epoch": 0.18450241665633907, "grad_norm": 3.395380038404286, "learning_rate": 4.2922836083751875e-06, "loss": 0.7387, "step": 5955 }, { "epoch": 0.18465733052422853, "grad_norm": 2.81147524416885, "learning_rate": 4.291468266910183e-06, "loss": 0.7223, "step": 5960 }, { "epoch": 0.184812244392118, "grad_norm": 2.533954429031754, "learning_rate": 4.2906529254451765e-06, "loss": 0.8133, "step": 5965 }, { "epoch": 0.18496715826000742, "grad_norm": 2.517400480633623, "learning_rate": 4.289837583980172e-06, "loss": 0.74, "step": 5970 }, { "epoch": 0.1851220721278969, "grad_norm": 3.5230604544836184, "learning_rate": 4.289022242515165e-06, "loss": 0.7732, "step": 5975 }, { "epoch": 0.18527698599578635, "grad_norm": 2.3486813965684847, "learning_rate": 4.288206901050161e-06, "loss": 0.7629, "step": 5980 }, { "epoch": 0.1854318998636758, "grad_norm": 3.2954241165643325, "learning_rate": 4.287391559585154e-06, "loss": 0.7123, "step": 5985 }, { "epoch": 0.18558681373156524, "grad_norm": 2.865841095022122, "learning_rate": 4.28657621812015e-06, "loss": 0.7678, "step": 5990 }, { "epoch": 0.1857417275994547, "grad_norm": 3.164953960056925, "learning_rate": 4.285760876655143e-06, "loss": 0.78, "step": 5995 }, { "epoch": 0.18589664146734416, "grad_norm": 2.869991795212917, "learning_rate": 4.2849455351901385e-06, "loss": 0.742, "step": 6000 }, { "epoch": 0.1860515553352336, "grad_norm": 2.203081482121405, "learning_rate": 4.284130193725132e-06, "loss": 0.759, "step": 6005 }, { "epoch": 0.18620646920312306, "grad_norm": 2.7195684882133344, "learning_rate": 4.283314852260127e-06, "loss": 0.7435, "step": 6010 }, { "epoch": 0.18636138307101252, "grad_norm": 2.9607039570874067, "learning_rate": 4.282499510795121e-06, "loss": 0.8116, "step": 6015 }, { "epoch": 0.18651629693890198, "grad_norm": 2.869896505576161, "learning_rate": 4.2816841693301155e-06, "loss": 0.7921, "step": 6020 }, { "epoch": 0.18667121080679142, "grad_norm": 2.774543887169059, "learning_rate": 4.28086882786511e-06, "loss": 0.7187, "step": 6025 }, { "epoch": 0.18682612467468088, "grad_norm": 2.726875775336164, "learning_rate": 4.2800534864001044e-06, "loss": 0.805, "step": 6030 }, { "epoch": 0.18698103854257034, "grad_norm": 2.5442231884081976, "learning_rate": 4.279238144935099e-06, "loss": 0.7535, "step": 6035 }, { "epoch": 0.18713595241045977, "grad_norm": 2.454716050251383, "learning_rate": 4.278422803470093e-06, "loss": 0.7346, "step": 6040 }, { "epoch": 0.18729086627834923, "grad_norm": 3.047629557551625, "learning_rate": 4.277607462005088e-06, "loss": 0.7803, "step": 6045 }, { "epoch": 0.1874457801462387, "grad_norm": 2.6107861820253, "learning_rate": 4.276792120540082e-06, "loss": 0.8037, "step": 6050 }, { "epoch": 0.18760069401412816, "grad_norm": 2.6193666313961965, "learning_rate": 4.2759767790750776e-06, "loss": 0.8122, "step": 6055 }, { "epoch": 0.1877556078820176, "grad_norm": 2.4526492270943967, "learning_rate": 4.275161437610071e-06, "loss": 0.7505, "step": 6060 }, { "epoch": 0.18791052174990705, "grad_norm": 2.6029873436350135, "learning_rate": 4.2743460961450665e-06, "loss": 0.7137, "step": 6065 }, { "epoch": 0.1880654356177965, "grad_norm": 2.335253767755644, "learning_rate": 4.27353075468006e-06, "loss": 0.7739, "step": 6070 }, { "epoch": 0.18822034948568594, "grad_norm": 2.962884693554328, "learning_rate": 4.272715413215055e-06, "loss": 0.7092, "step": 6075 }, { "epoch": 0.1883752633535754, "grad_norm": 2.4449123364032315, "learning_rate": 4.271900071750049e-06, "loss": 0.7198, "step": 6080 }, { "epoch": 0.18853017722146487, "grad_norm": 3.4659846968841728, "learning_rate": 4.2710847302850435e-06, "loss": 0.7663, "step": 6085 }, { "epoch": 0.18868509108935433, "grad_norm": 2.5532347500692136, "learning_rate": 4.270269388820038e-06, "loss": 0.7986, "step": 6090 }, { "epoch": 0.18884000495724376, "grad_norm": 3.2459257679584907, "learning_rate": 4.269454047355032e-06, "loss": 0.792, "step": 6095 }, { "epoch": 0.18899491882513322, "grad_norm": 4.909883155349975, "learning_rate": 4.268638705890027e-06, "loss": 0.8884, "step": 6100 }, { "epoch": 0.18914983269302268, "grad_norm": 3.034823425014693, "learning_rate": 4.267823364425021e-06, "loss": 0.7636, "step": 6105 }, { "epoch": 0.18930474656091215, "grad_norm": 2.981909998622553, "learning_rate": 4.267008022960016e-06, "loss": 0.81, "step": 6110 }, { "epoch": 0.18945966042880158, "grad_norm": 2.726830264801564, "learning_rate": 4.26619268149501e-06, "loss": 0.7817, "step": 6115 }, { "epoch": 0.18961457429669104, "grad_norm": 2.356802479590097, "learning_rate": 4.265377340030005e-06, "loss": 0.7707, "step": 6120 }, { "epoch": 0.1897694881645805, "grad_norm": 2.9264295176999253, "learning_rate": 4.264561998564999e-06, "loss": 0.797, "step": 6125 }, { "epoch": 0.18992440203246994, "grad_norm": 2.8739918185150524, "learning_rate": 4.263746657099994e-06, "loss": 0.7526, "step": 6130 }, { "epoch": 0.1900793159003594, "grad_norm": 2.7006391509226018, "learning_rate": 4.262931315634988e-06, "loss": 0.7343, "step": 6135 }, { "epoch": 0.19023422976824886, "grad_norm": 2.505817678638681, "learning_rate": 4.2621159741699825e-06, "loss": 0.7633, "step": 6140 }, { "epoch": 0.19038914363613832, "grad_norm": 2.6594258544171163, "learning_rate": 4.261300632704977e-06, "loss": 0.6987, "step": 6145 }, { "epoch": 0.19054405750402775, "grad_norm": 2.901655063473049, "learning_rate": 4.260485291239972e-06, "loss": 0.8029, "step": 6150 }, { "epoch": 0.19069897137191721, "grad_norm": 2.4123329825267956, "learning_rate": 4.259669949774966e-06, "loss": 0.7903, "step": 6155 }, { "epoch": 0.19085388523980668, "grad_norm": 2.516344632234148, "learning_rate": 4.25885460830996e-06, "loss": 0.7372, "step": 6160 }, { "epoch": 0.1910087991076961, "grad_norm": 3.1079418428103636, "learning_rate": 4.258039266844955e-06, "loss": 0.7462, "step": 6165 }, { "epoch": 0.19116371297558557, "grad_norm": 2.4798490113534433, "learning_rate": 4.257223925379949e-06, "loss": 0.713, "step": 6170 }, { "epoch": 0.19131862684347503, "grad_norm": 3.0257058441969207, "learning_rate": 4.256408583914944e-06, "loss": 0.8, "step": 6175 }, { "epoch": 0.1914735407113645, "grad_norm": 5.081031249185892, "learning_rate": 4.255593242449938e-06, "loss": 0.8106, "step": 6180 }, { "epoch": 0.19162845457925393, "grad_norm": 3.345615393836603, "learning_rate": 4.254777900984933e-06, "loss": 0.748, "step": 6185 }, { "epoch": 0.1917833684471434, "grad_norm": 2.700521414468274, "learning_rate": 4.253962559519927e-06, "loss": 0.8357, "step": 6190 }, { "epoch": 0.19193828231503285, "grad_norm": 3.139061295434219, "learning_rate": 4.2531472180549216e-06, "loss": 0.7221, "step": 6195 }, { "epoch": 0.19209319618292228, "grad_norm": 2.7602639676565466, "learning_rate": 4.252331876589916e-06, "loss": 0.7408, "step": 6200 }, { "epoch": 0.19224811005081174, "grad_norm": 3.2594353182372178, "learning_rate": 4.2515165351249105e-06, "loss": 0.7449, "step": 6205 }, { "epoch": 0.1924030239187012, "grad_norm": 3.166046131949625, "learning_rate": 4.250701193659905e-06, "loss": 0.7301, "step": 6210 }, { "epoch": 0.19255793778659067, "grad_norm": 3.2921608562822766, "learning_rate": 4.249885852194899e-06, "loss": 0.8746, "step": 6215 }, { "epoch": 0.1927128516544801, "grad_norm": 3.0196083070468207, "learning_rate": 4.249070510729894e-06, "loss": 0.7896, "step": 6220 }, { "epoch": 0.19286776552236956, "grad_norm": 3.084644423985564, "learning_rate": 4.248255169264888e-06, "loss": 0.7466, "step": 6225 }, { "epoch": 0.19302267939025902, "grad_norm": 4.151380517129842, "learning_rate": 4.247439827799883e-06, "loss": 0.758, "step": 6230 }, { "epoch": 0.19317759325814846, "grad_norm": 2.5212471354489323, "learning_rate": 4.246624486334877e-06, "loss": 0.7066, "step": 6235 }, { "epoch": 0.19333250712603792, "grad_norm": 2.731513599188183, "learning_rate": 4.245809144869872e-06, "loss": 0.705, "step": 6240 }, { "epoch": 0.19348742099392738, "grad_norm": 2.741356034597897, "learning_rate": 4.244993803404866e-06, "loss": 0.7954, "step": 6245 }, { "epoch": 0.19364233486181684, "grad_norm": 2.5596614439709993, "learning_rate": 4.244178461939861e-06, "loss": 0.7296, "step": 6250 }, { "epoch": 0.19379724872970627, "grad_norm": 2.8827264778514956, "learning_rate": 4.243363120474855e-06, "loss": 0.7759, "step": 6255 }, { "epoch": 0.19395216259759573, "grad_norm": 3.5853260635909425, "learning_rate": 4.2425477790098495e-06, "loss": 0.8135, "step": 6260 }, { "epoch": 0.1941070764654852, "grad_norm": 2.635101645264584, "learning_rate": 4.241732437544844e-06, "loss": 0.814, "step": 6265 }, { "epoch": 0.19426199033337466, "grad_norm": 2.9473068967042213, "learning_rate": 4.2409170960798385e-06, "loss": 0.7061, "step": 6270 }, { "epoch": 0.1944169042012641, "grad_norm": 2.6353786393681413, "learning_rate": 4.240101754614833e-06, "loss": 0.7174, "step": 6275 }, { "epoch": 0.19457181806915355, "grad_norm": 2.1681159250827515, "learning_rate": 4.239286413149827e-06, "loss": 0.7584, "step": 6280 }, { "epoch": 0.194726731937043, "grad_norm": 2.981251475625493, "learning_rate": 4.238471071684822e-06, "loss": 0.8373, "step": 6285 }, { "epoch": 0.19488164580493245, "grad_norm": 2.562361101136116, "learning_rate": 4.237655730219816e-06, "loss": 0.8457, "step": 6290 }, { "epoch": 0.1950365596728219, "grad_norm": 2.206383730811549, "learning_rate": 4.236840388754811e-06, "loss": 0.689, "step": 6295 }, { "epoch": 0.19519147354071137, "grad_norm": 2.559481113636516, "learning_rate": 4.236025047289805e-06, "loss": 0.7361, "step": 6300 }, { "epoch": 0.19534638740860083, "grad_norm": 2.6689029844560332, "learning_rate": 4.2352097058248e-06, "loss": 0.734, "step": 6305 }, { "epoch": 0.19550130127649026, "grad_norm": 2.7058004030215397, "learning_rate": 4.234394364359794e-06, "loss": 0.7349, "step": 6310 }, { "epoch": 0.19565621514437972, "grad_norm": 2.5054193327260434, "learning_rate": 4.233579022894789e-06, "loss": 0.7316, "step": 6315 }, { "epoch": 0.19581112901226919, "grad_norm": 3.9367968792622636, "learning_rate": 4.232763681429783e-06, "loss": 0.8237, "step": 6320 }, { "epoch": 0.19596604288015862, "grad_norm": 2.9972447162431424, "learning_rate": 4.2319483399647775e-06, "loss": 0.7526, "step": 6325 }, { "epoch": 0.19612095674804808, "grad_norm": 4.530190217177766, "learning_rate": 4.231132998499772e-06, "loss": 0.7366, "step": 6330 }, { "epoch": 0.19627587061593754, "grad_norm": 2.9966195182914275, "learning_rate": 4.2303176570347664e-06, "loss": 0.8256, "step": 6335 }, { "epoch": 0.196430784483827, "grad_norm": 2.8583969599380348, "learning_rate": 4.229502315569761e-06, "loss": 0.801, "step": 6340 }, { "epoch": 0.19658569835171644, "grad_norm": 4.86783397360561, "learning_rate": 4.228686974104755e-06, "loss": 0.7493, "step": 6345 }, { "epoch": 0.1967406122196059, "grad_norm": 3.0043339919720284, "learning_rate": 4.22787163263975e-06, "loss": 0.7717, "step": 6350 }, { "epoch": 0.19689552608749536, "grad_norm": 3.021053709007017, "learning_rate": 4.227056291174744e-06, "loss": 0.7663, "step": 6355 }, { "epoch": 0.1970504399553848, "grad_norm": 2.8378134064273506, "learning_rate": 4.226240949709739e-06, "loss": 0.8004, "step": 6360 }, { "epoch": 0.19720535382327425, "grad_norm": 2.492439804377685, "learning_rate": 4.225425608244733e-06, "loss": 0.7098, "step": 6365 }, { "epoch": 0.19736026769116372, "grad_norm": 3.38415558114578, "learning_rate": 4.224610266779728e-06, "loss": 0.712, "step": 6370 }, { "epoch": 0.19751518155905318, "grad_norm": 2.4052669109864033, "learning_rate": 4.223794925314722e-06, "loss": 0.704, "step": 6375 }, { "epoch": 0.1976700954269426, "grad_norm": 2.711908251744757, "learning_rate": 4.2229795838497166e-06, "loss": 0.7747, "step": 6380 }, { "epoch": 0.19782500929483207, "grad_norm": 2.42490347487531, "learning_rate": 4.222164242384711e-06, "loss": 0.7278, "step": 6385 }, { "epoch": 0.19797992316272153, "grad_norm": 4.099877860691753, "learning_rate": 4.2213489009197055e-06, "loss": 0.7528, "step": 6390 }, { "epoch": 0.198134837030611, "grad_norm": 2.4922562367871346, "learning_rate": 4.2205335594547e-06, "loss": 0.7666, "step": 6395 }, { "epoch": 0.19828975089850043, "grad_norm": 3.7290559473277534, "learning_rate": 4.219718217989694e-06, "loss": 0.8102, "step": 6400 }, { "epoch": 0.1984446647663899, "grad_norm": 4.1948960869422836, "learning_rate": 4.218902876524689e-06, "loss": 0.7121, "step": 6405 }, { "epoch": 0.19859957863427935, "grad_norm": 2.4264085085431897, "learning_rate": 4.218087535059683e-06, "loss": 0.7522, "step": 6410 }, { "epoch": 0.19875449250216878, "grad_norm": 2.186795270990692, "learning_rate": 4.217272193594678e-06, "loss": 0.8345, "step": 6415 }, { "epoch": 0.19890940637005824, "grad_norm": 2.3987482658136488, "learning_rate": 4.216456852129672e-06, "loss": 0.7442, "step": 6420 }, { "epoch": 0.1990643202379477, "grad_norm": 2.339226263016675, "learning_rate": 4.215641510664667e-06, "loss": 0.7783, "step": 6425 }, { "epoch": 0.19921923410583717, "grad_norm": 3.3305909581253212, "learning_rate": 4.214826169199661e-06, "loss": 0.7867, "step": 6430 }, { "epoch": 0.1993741479737266, "grad_norm": 2.9452020264396888, "learning_rate": 4.214010827734656e-06, "loss": 0.7502, "step": 6435 }, { "epoch": 0.19952906184161606, "grad_norm": 3.8771246272043536, "learning_rate": 4.21319548626965e-06, "loss": 0.7231, "step": 6440 }, { "epoch": 0.19968397570950552, "grad_norm": 2.520087192821889, "learning_rate": 4.2123801448046445e-06, "loss": 0.81, "step": 6445 }, { "epoch": 0.19983888957739496, "grad_norm": 3.2756655876347187, "learning_rate": 4.211564803339639e-06, "loss": 0.7327, "step": 6450 }, { "epoch": 0.19999380344528442, "grad_norm": 3.386167572970496, "learning_rate": 4.2107494618746334e-06, "loss": 0.7201, "step": 6455 }, { "epoch": 0.20014871731317388, "grad_norm": 2.630160676368733, "learning_rate": 4.209934120409628e-06, "loss": 0.7458, "step": 6460 }, { "epoch": 0.20030363118106334, "grad_norm": 3.3620726841120896, "learning_rate": 4.209118778944622e-06, "loss": 0.841, "step": 6465 }, { "epoch": 0.20045854504895277, "grad_norm": 3.4431259217672463, "learning_rate": 4.208303437479617e-06, "loss": 0.7374, "step": 6470 }, { "epoch": 0.20061345891684224, "grad_norm": 3.183026199591362, "learning_rate": 4.207488096014611e-06, "loss": 0.753, "step": 6475 }, { "epoch": 0.2007683727847317, "grad_norm": 2.628312347114581, "learning_rate": 4.206672754549606e-06, "loss": 0.8063, "step": 6480 }, { "epoch": 0.20092328665262113, "grad_norm": 2.858633412753414, "learning_rate": 4.2058574130846e-06, "loss": 0.6966, "step": 6485 }, { "epoch": 0.2010782005205106, "grad_norm": 2.877018796075857, "learning_rate": 4.205042071619595e-06, "loss": 0.7282, "step": 6490 }, { "epoch": 0.20123311438840005, "grad_norm": 2.8603706736561625, "learning_rate": 4.204226730154589e-06, "loss": 0.7634, "step": 6495 }, { "epoch": 0.20138802825628951, "grad_norm": 2.7728315517879993, "learning_rate": 4.203411388689584e-06, "loss": 0.777, "step": 6500 }, { "epoch": 0.20154294212417895, "grad_norm": 2.6283066485571127, "learning_rate": 4.202596047224578e-06, "loss": 0.7589, "step": 6505 }, { "epoch": 0.2016978559920684, "grad_norm": 2.8220033232894974, "learning_rate": 4.2017807057595725e-06, "loss": 0.7654, "step": 6510 }, { "epoch": 0.20185276985995787, "grad_norm": 3.6622119413572687, "learning_rate": 4.200965364294567e-06, "loss": 0.7578, "step": 6515 }, { "epoch": 0.2020076837278473, "grad_norm": 3.3950018437827807, "learning_rate": 4.200150022829561e-06, "loss": 0.7891, "step": 6520 }, { "epoch": 0.20216259759573676, "grad_norm": 2.3240521558763914, "learning_rate": 4.199334681364556e-06, "loss": 0.761, "step": 6525 }, { "epoch": 0.20231751146362623, "grad_norm": 2.963432025241065, "learning_rate": 4.19851933989955e-06, "loss": 0.7792, "step": 6530 }, { "epoch": 0.2024724253315157, "grad_norm": 2.928523207095532, "learning_rate": 4.197703998434545e-06, "loss": 0.8753, "step": 6535 }, { "epoch": 0.20262733919940512, "grad_norm": 2.6919186869954475, "learning_rate": 4.196888656969539e-06, "loss": 0.8486, "step": 6540 }, { "epoch": 0.20278225306729458, "grad_norm": 3.7642659867593866, "learning_rate": 4.196073315504534e-06, "loss": 0.6952, "step": 6545 }, { "epoch": 0.20293716693518404, "grad_norm": 3.5658611646800416, "learning_rate": 4.195257974039528e-06, "loss": 0.7523, "step": 6550 }, { "epoch": 0.2030920808030735, "grad_norm": 2.542210251286996, "learning_rate": 4.194442632574523e-06, "loss": 0.7833, "step": 6555 }, { "epoch": 0.20324699467096294, "grad_norm": 2.9385005459911504, "learning_rate": 4.193627291109517e-06, "loss": 0.7842, "step": 6560 }, { "epoch": 0.2034019085388524, "grad_norm": 3.051257010306594, "learning_rate": 4.1928119496445115e-06, "loss": 0.7199, "step": 6565 }, { "epoch": 0.20355682240674186, "grad_norm": 2.4524698122552255, "learning_rate": 4.191996608179506e-06, "loss": 0.7629, "step": 6570 }, { "epoch": 0.2037117362746313, "grad_norm": 2.83490185809297, "learning_rate": 4.1911812667145005e-06, "loss": 0.6972, "step": 6575 }, { "epoch": 0.20386665014252076, "grad_norm": 2.734833540141071, "learning_rate": 4.190365925249495e-06, "loss": 0.7695, "step": 6580 }, { "epoch": 0.20402156401041022, "grad_norm": 2.922393273811847, "learning_rate": 4.189550583784489e-06, "loss": 0.8044, "step": 6585 }, { "epoch": 0.20417647787829968, "grad_norm": 2.4772889953247414, "learning_rate": 4.188735242319484e-06, "loss": 0.7648, "step": 6590 }, { "epoch": 0.2043313917461891, "grad_norm": 3.7921486270537996, "learning_rate": 4.187919900854478e-06, "loss": 0.7481, "step": 6595 }, { "epoch": 0.20448630561407857, "grad_norm": 2.522984228197576, "learning_rate": 4.187104559389473e-06, "loss": 0.7516, "step": 6600 }, { "epoch": 0.20464121948196803, "grad_norm": 2.491431004048215, "learning_rate": 4.186289217924467e-06, "loss": 0.8023, "step": 6605 }, { "epoch": 0.20479613334985747, "grad_norm": 2.495684905724102, "learning_rate": 4.185473876459462e-06, "loss": 0.7473, "step": 6610 }, { "epoch": 0.20495104721774693, "grad_norm": 2.7669001666074515, "learning_rate": 4.184658534994456e-06, "loss": 0.7086, "step": 6615 }, { "epoch": 0.2051059610856364, "grad_norm": 3.262634675066312, "learning_rate": 4.183843193529451e-06, "loss": 0.8734, "step": 6620 }, { "epoch": 0.20526087495352585, "grad_norm": 2.8676703241852266, "learning_rate": 4.183027852064445e-06, "loss": 0.8204, "step": 6625 }, { "epoch": 0.20541578882141528, "grad_norm": 2.9866774877512694, "learning_rate": 4.1822125105994395e-06, "loss": 0.7274, "step": 6630 }, { "epoch": 0.20557070268930475, "grad_norm": 4.38464266817254, "learning_rate": 4.181397169134434e-06, "loss": 0.7538, "step": 6635 }, { "epoch": 0.2057256165571942, "grad_norm": 2.7106241217413936, "learning_rate": 4.1805818276694284e-06, "loss": 0.7949, "step": 6640 }, { "epoch": 0.20588053042508364, "grad_norm": 2.87065488630285, "learning_rate": 4.179766486204423e-06, "loss": 0.6725, "step": 6645 }, { "epoch": 0.2060354442929731, "grad_norm": 3.126882342458352, "learning_rate": 4.178951144739417e-06, "loss": 0.7774, "step": 6650 }, { "epoch": 0.20619035816086256, "grad_norm": 3.519329703367183, "learning_rate": 4.178135803274412e-06, "loss": 0.7027, "step": 6655 }, { "epoch": 0.20634527202875202, "grad_norm": 2.254216273234946, "learning_rate": 4.177320461809406e-06, "loss": 0.7536, "step": 6660 }, { "epoch": 0.20650018589664146, "grad_norm": 2.5258953313995507, "learning_rate": 4.176505120344401e-06, "loss": 0.8543, "step": 6665 }, { "epoch": 0.20665509976453092, "grad_norm": 2.7859037571493332, "learning_rate": 4.175689778879395e-06, "loss": 0.7897, "step": 6670 }, { "epoch": 0.20681001363242038, "grad_norm": 2.8198089123704824, "learning_rate": 4.17487443741439e-06, "loss": 0.8451, "step": 6675 }, { "epoch": 0.20696492750030981, "grad_norm": 2.6027890293948093, "learning_rate": 4.174059095949384e-06, "loss": 0.7323, "step": 6680 }, { "epoch": 0.20711984136819928, "grad_norm": 2.8030377025476296, "learning_rate": 4.1732437544843786e-06, "loss": 0.8014, "step": 6685 }, { "epoch": 0.20727475523608874, "grad_norm": 2.6458179059800906, "learning_rate": 4.172428413019373e-06, "loss": 0.7853, "step": 6690 }, { "epoch": 0.2074296691039782, "grad_norm": 2.6688072937990586, "learning_rate": 4.1716130715543675e-06, "loss": 0.8132, "step": 6695 }, { "epoch": 0.20758458297186763, "grad_norm": 2.89580637256327, "learning_rate": 4.170797730089361e-06, "loss": 0.8479, "step": 6700 }, { "epoch": 0.2077394968397571, "grad_norm": 3.1278625642316067, "learning_rate": 4.169982388624356e-06, "loss": 0.7925, "step": 6705 }, { "epoch": 0.20789441070764655, "grad_norm": 2.265537636382249, "learning_rate": 4.169167047159351e-06, "loss": 0.7343, "step": 6710 }, { "epoch": 0.20804932457553602, "grad_norm": 3.211505036601466, "learning_rate": 4.168351705694345e-06, "loss": 0.7466, "step": 6715 }, { "epoch": 0.20820423844342545, "grad_norm": 3.0759428393537473, "learning_rate": 4.16753636422934e-06, "loss": 0.6797, "step": 6720 }, { "epoch": 0.2083591523113149, "grad_norm": 2.826308010149381, "learning_rate": 4.166721022764334e-06, "loss": 0.7039, "step": 6725 }, { "epoch": 0.20851406617920437, "grad_norm": 2.310719257065342, "learning_rate": 4.165905681299329e-06, "loss": 0.696, "step": 6730 }, { "epoch": 0.2086689800470938, "grad_norm": 3.2001722491004836, "learning_rate": 4.165090339834323e-06, "loss": 0.7654, "step": 6735 }, { "epoch": 0.20882389391498327, "grad_norm": 2.1859044805047882, "learning_rate": 4.164274998369318e-06, "loss": 0.6917, "step": 6740 }, { "epoch": 0.20897880778287273, "grad_norm": 2.4849102111600443, "learning_rate": 4.163459656904312e-06, "loss": 0.7137, "step": 6745 }, { "epoch": 0.2091337216507622, "grad_norm": 2.5150890016818845, "learning_rate": 4.1626443154393065e-06, "loss": 0.7544, "step": 6750 }, { "epoch": 0.20928863551865162, "grad_norm": 2.7652279397022372, "learning_rate": 4.161828973974301e-06, "loss": 0.7174, "step": 6755 }, { "epoch": 0.20944354938654108, "grad_norm": 3.542008475481419, "learning_rate": 4.1610136325092955e-06, "loss": 0.7856, "step": 6760 }, { "epoch": 0.20959846325443054, "grad_norm": 2.3813518059891896, "learning_rate": 4.16019829104429e-06, "loss": 0.7159, "step": 6765 }, { "epoch": 0.20975337712231998, "grad_norm": 2.456257912985434, "learning_rate": 4.159382949579284e-06, "loss": 0.8033, "step": 6770 }, { "epoch": 0.20990829099020944, "grad_norm": 3.750833892009264, "learning_rate": 4.158567608114278e-06, "loss": 0.8167, "step": 6775 }, { "epoch": 0.2100632048580989, "grad_norm": 2.918408933417208, "learning_rate": 4.157752266649273e-06, "loss": 0.8601, "step": 6780 }, { "epoch": 0.21021811872598836, "grad_norm": 2.2706779480716115, "learning_rate": 4.156936925184267e-06, "loss": 0.794, "step": 6785 }, { "epoch": 0.2103730325938778, "grad_norm": 3.186515139918632, "learning_rate": 4.156121583719262e-06, "loss": 0.7871, "step": 6790 }, { "epoch": 0.21052794646176726, "grad_norm": 2.9505079386299156, "learning_rate": 4.155306242254256e-06, "loss": 0.7012, "step": 6795 }, { "epoch": 0.21068286032965672, "grad_norm": 4.561986673528028, "learning_rate": 4.154490900789251e-06, "loss": 0.8184, "step": 6800 }, { "epoch": 0.21083777419754615, "grad_norm": 3.9312348605817147, "learning_rate": 4.153675559324246e-06, "loss": 0.7446, "step": 6805 }, { "epoch": 0.2109926880654356, "grad_norm": 2.5533743046716775, "learning_rate": 4.15286021785924e-06, "loss": 0.7569, "step": 6810 }, { "epoch": 0.21114760193332507, "grad_norm": 2.9200793251590493, "learning_rate": 4.1520448763942345e-06, "loss": 0.7478, "step": 6815 }, { "epoch": 0.21130251580121454, "grad_norm": 2.891999311892523, "learning_rate": 4.151229534929229e-06, "loss": 0.773, "step": 6820 }, { "epoch": 0.21145742966910397, "grad_norm": 2.519058744244231, "learning_rate": 4.1504141934642234e-06, "loss": 0.7728, "step": 6825 }, { "epoch": 0.21161234353699343, "grad_norm": 2.631784516613136, "learning_rate": 4.149598851999218e-06, "loss": 0.7348, "step": 6830 }, { "epoch": 0.2117672574048829, "grad_norm": 2.956302138575019, "learning_rate": 4.148783510534212e-06, "loss": 0.8083, "step": 6835 }, { "epoch": 0.21192217127277233, "grad_norm": 2.8535547394017358, "learning_rate": 4.147968169069207e-06, "loss": 0.7579, "step": 6840 }, { "epoch": 0.2120770851406618, "grad_norm": 3.823575214015422, "learning_rate": 4.147152827604201e-06, "loss": 0.7972, "step": 6845 }, { "epoch": 0.21223199900855125, "grad_norm": 4.799961644004689, "learning_rate": 4.146337486139195e-06, "loss": 0.8143, "step": 6850 }, { "epoch": 0.2123869128764407, "grad_norm": 2.320345884978964, "learning_rate": 4.14552214467419e-06, "loss": 0.7116, "step": 6855 }, { "epoch": 0.21254182674433014, "grad_norm": 2.6747981623250867, "learning_rate": 4.144706803209184e-06, "loss": 0.7785, "step": 6860 }, { "epoch": 0.2126967406122196, "grad_norm": 3.2844732334497353, "learning_rate": 4.143891461744179e-06, "loss": 0.7672, "step": 6865 }, { "epoch": 0.21285165448010906, "grad_norm": 2.766273569538639, "learning_rate": 4.143076120279173e-06, "loss": 0.771, "step": 6870 }, { "epoch": 0.21300656834799853, "grad_norm": 2.4842803050324815, "learning_rate": 4.142260778814168e-06, "loss": 0.7213, "step": 6875 }, { "epoch": 0.21316148221588796, "grad_norm": 3.0657296699520735, "learning_rate": 4.141445437349162e-06, "loss": 0.7733, "step": 6880 }, { "epoch": 0.21331639608377742, "grad_norm": 3.0093896883604665, "learning_rate": 4.140630095884157e-06, "loss": 0.809, "step": 6885 }, { "epoch": 0.21347130995166688, "grad_norm": 3.577919184919868, "learning_rate": 4.1398147544191505e-06, "loss": 0.7488, "step": 6890 }, { "epoch": 0.21362622381955632, "grad_norm": 3.720055329075739, "learning_rate": 4.138999412954146e-06, "loss": 0.6696, "step": 6895 }, { "epoch": 0.21378113768744578, "grad_norm": 4.523349618062322, "learning_rate": 4.13818407148914e-06, "loss": 0.8138, "step": 6900 }, { "epoch": 0.21393605155533524, "grad_norm": 2.7493922788576253, "learning_rate": 4.137368730024135e-06, "loss": 0.7858, "step": 6905 }, { "epoch": 0.2140909654232247, "grad_norm": 3.605582871406352, "learning_rate": 4.136553388559129e-06, "loss": 0.7796, "step": 6910 }, { "epoch": 0.21424587929111413, "grad_norm": 3.9963056744892476, "learning_rate": 4.135738047094124e-06, "loss": 0.7907, "step": 6915 }, { "epoch": 0.2144007931590036, "grad_norm": 2.850584507301773, "learning_rate": 4.134922705629118e-06, "loss": 0.717, "step": 6920 }, { "epoch": 0.21455570702689306, "grad_norm": 2.595576961491219, "learning_rate": 4.134107364164112e-06, "loss": 0.7329, "step": 6925 }, { "epoch": 0.2147106208947825, "grad_norm": 2.8892946483477653, "learning_rate": 4.133292022699107e-06, "loss": 0.6584, "step": 6930 }, { "epoch": 0.21486553476267195, "grad_norm": 2.478350153658545, "learning_rate": 4.132476681234101e-06, "loss": 0.735, "step": 6935 }, { "epoch": 0.2150204486305614, "grad_norm": 3.510004682364688, "learning_rate": 4.131661339769096e-06, "loss": 0.8153, "step": 6940 }, { "epoch": 0.21517536249845087, "grad_norm": 3.268354299571415, "learning_rate": 4.13084599830409e-06, "loss": 0.7322, "step": 6945 }, { "epoch": 0.2153302763663403, "grad_norm": 2.504649541846655, "learning_rate": 4.130030656839085e-06, "loss": 0.7669, "step": 6950 }, { "epoch": 0.21548519023422977, "grad_norm": 2.4485111188336433, "learning_rate": 4.1292153153740785e-06, "loss": 0.7823, "step": 6955 }, { "epoch": 0.21564010410211923, "grad_norm": 2.457671425641937, "learning_rate": 4.128399973909074e-06, "loss": 0.7125, "step": 6960 }, { "epoch": 0.21579501797000866, "grad_norm": 2.7293001988432577, "learning_rate": 4.1275846324440674e-06, "loss": 0.6647, "step": 6965 }, { "epoch": 0.21594993183789812, "grad_norm": 2.8559363713322297, "learning_rate": 4.126769290979063e-06, "loss": 0.6923, "step": 6970 }, { "epoch": 0.21610484570578758, "grad_norm": 2.6385080868533755, "learning_rate": 4.125953949514056e-06, "loss": 0.7401, "step": 6975 }, { "epoch": 0.21625975957367705, "grad_norm": 2.821712786098711, "learning_rate": 4.125138608049052e-06, "loss": 0.6695, "step": 6980 }, { "epoch": 0.21641467344156648, "grad_norm": 3.7593957609682294, "learning_rate": 4.124323266584045e-06, "loss": 0.7062, "step": 6985 }, { "epoch": 0.21656958730945594, "grad_norm": 3.3633706222307946, "learning_rate": 4.1235079251190406e-06, "loss": 0.6944, "step": 6990 }, { "epoch": 0.2167245011773454, "grad_norm": 2.4616802700380633, "learning_rate": 4.122692583654035e-06, "loss": 0.853, "step": 6995 }, { "epoch": 0.21687941504523484, "grad_norm": 2.1970970810482284, "learning_rate": 4.1218772421890295e-06, "loss": 0.671, "step": 7000 }, { "epoch": 0.2170343289131243, "grad_norm": 2.621740582651703, "learning_rate": 4.121061900724024e-06, "loss": 0.6755, "step": 7005 }, { "epoch": 0.21718924278101376, "grad_norm": 2.920740443769776, "learning_rate": 4.1202465592590176e-06, "loss": 0.7769, "step": 7010 }, { "epoch": 0.21734415664890322, "grad_norm": 2.4660382957932105, "learning_rate": 4.119431217794013e-06, "loss": 0.7514, "step": 7015 }, { "epoch": 0.21749907051679265, "grad_norm": 2.6617399806713253, "learning_rate": 4.1186158763290065e-06, "loss": 0.6961, "step": 7020 }, { "epoch": 0.21765398438468211, "grad_norm": 3.7741144472178023, "learning_rate": 4.117800534864002e-06, "loss": 0.8308, "step": 7025 }, { "epoch": 0.21780889825257158, "grad_norm": 2.605471672451316, "learning_rate": 4.116985193398995e-06, "loss": 0.6901, "step": 7030 }, { "epoch": 0.21796381212046104, "grad_norm": 2.4961471470706744, "learning_rate": 4.116169851933991e-06, "loss": 0.7797, "step": 7035 }, { "epoch": 0.21811872598835047, "grad_norm": 2.6941592964804433, "learning_rate": 4.115354510468984e-06, "loss": 0.7483, "step": 7040 }, { "epoch": 0.21827363985623993, "grad_norm": 3.0016296356033063, "learning_rate": 4.11453916900398e-06, "loss": 0.7992, "step": 7045 }, { "epoch": 0.2184285537241294, "grad_norm": 3.6146924002031815, "learning_rate": 4.113723827538973e-06, "loss": 0.7025, "step": 7050 }, { "epoch": 0.21858346759201883, "grad_norm": 2.2674016002915667, "learning_rate": 4.1129084860739685e-06, "loss": 0.7333, "step": 7055 }, { "epoch": 0.2187383814599083, "grad_norm": 2.8677871183620223, "learning_rate": 4.112093144608962e-06, "loss": 0.7422, "step": 7060 }, { "epoch": 0.21889329532779775, "grad_norm": 2.439171074822472, "learning_rate": 4.1112778031439575e-06, "loss": 0.8746, "step": 7065 }, { "epoch": 0.2190482091956872, "grad_norm": 3.7185083093213365, "learning_rate": 4.110462461678951e-06, "loss": 0.7576, "step": 7070 }, { "epoch": 0.21920312306357664, "grad_norm": 2.9497347328694152, "learning_rate": 4.109647120213946e-06, "loss": 0.7534, "step": 7075 }, { "epoch": 0.2193580369314661, "grad_norm": 2.391660862114072, "learning_rate": 4.10883177874894e-06, "loss": 0.7259, "step": 7080 }, { "epoch": 0.21951295079935557, "grad_norm": 2.3902428405267973, "learning_rate": 4.1080164372839345e-06, "loss": 0.7767, "step": 7085 }, { "epoch": 0.219667864667245, "grad_norm": 2.4452566999103453, "learning_rate": 4.10720109581893e-06, "loss": 0.7169, "step": 7090 }, { "epoch": 0.21982277853513446, "grad_norm": 2.34486821751047, "learning_rate": 4.106385754353923e-06, "loss": 0.7948, "step": 7095 }, { "epoch": 0.21997769240302392, "grad_norm": 2.0236611787407512, "learning_rate": 4.105570412888919e-06, "loss": 0.6994, "step": 7100 }, { "epoch": 0.22013260627091338, "grad_norm": 3.5184474503017813, "learning_rate": 4.104755071423912e-06, "loss": 0.6827, "step": 7105 }, { "epoch": 0.22028752013880282, "grad_norm": 3.443185525836317, "learning_rate": 4.103939729958908e-06, "loss": 0.7576, "step": 7110 }, { "epoch": 0.22044243400669228, "grad_norm": 2.4685360317066536, "learning_rate": 4.103124388493901e-06, "loss": 0.7701, "step": 7115 }, { "epoch": 0.22059734787458174, "grad_norm": 3.4702105931694867, "learning_rate": 4.1023090470288965e-06, "loss": 0.7664, "step": 7120 }, { "epoch": 0.22075226174247117, "grad_norm": 2.565972180267018, "learning_rate": 4.10149370556389e-06, "loss": 0.7438, "step": 7125 }, { "epoch": 0.22090717561036063, "grad_norm": 4.290784037579915, "learning_rate": 4.1006783640988854e-06, "loss": 0.7941, "step": 7130 }, { "epoch": 0.2210620894782501, "grad_norm": 3.134007415727662, "learning_rate": 4.099863022633879e-06, "loss": 0.7374, "step": 7135 }, { "epoch": 0.22121700334613956, "grad_norm": 3.2772399375243078, "learning_rate": 4.099047681168874e-06, "loss": 0.7371, "step": 7140 }, { "epoch": 0.221371917214029, "grad_norm": 3.6418306784732035, "learning_rate": 4.098232339703868e-06, "loss": 0.7513, "step": 7145 }, { "epoch": 0.22152683108191845, "grad_norm": 2.5133250419959716, "learning_rate": 4.097416998238863e-06, "loss": 0.8105, "step": 7150 }, { "epoch": 0.2216817449498079, "grad_norm": 2.4414357901906554, "learning_rate": 4.096601656773857e-06, "loss": 0.82, "step": 7155 }, { "epoch": 0.22183665881769735, "grad_norm": 3.609904679685894, "learning_rate": 4.095786315308851e-06, "loss": 0.7431, "step": 7160 }, { "epoch": 0.2219915726855868, "grad_norm": 2.6515064154091545, "learning_rate": 4.094970973843846e-06, "loss": 0.7491, "step": 7165 }, { "epoch": 0.22214648655347627, "grad_norm": 3.0956654645167254, "learning_rate": 4.09415563237884e-06, "loss": 0.7276, "step": 7170 }, { "epoch": 0.22230140042136573, "grad_norm": 2.2908580326169097, "learning_rate": 4.093340290913835e-06, "loss": 0.7439, "step": 7175 }, { "epoch": 0.22245631428925516, "grad_norm": 2.7239946033052704, "learning_rate": 4.092524949448829e-06, "loss": 0.7557, "step": 7180 }, { "epoch": 0.22261122815714462, "grad_norm": 4.534514450785281, "learning_rate": 4.0917096079838245e-06, "loss": 0.7963, "step": 7185 }, { "epoch": 0.2227661420250341, "grad_norm": 3.515611036154439, "learning_rate": 4.090894266518818e-06, "loss": 0.8088, "step": 7190 }, { "epoch": 0.22292105589292355, "grad_norm": 2.486766041302042, "learning_rate": 4.090078925053813e-06, "loss": 0.7702, "step": 7195 }, { "epoch": 0.22307596976081298, "grad_norm": 3.071857453860059, "learning_rate": 4.089263583588807e-06, "loss": 0.7213, "step": 7200 }, { "epoch": 0.22323088362870244, "grad_norm": 2.4197976913216217, "learning_rate": 4.088448242123802e-06, "loss": 0.7739, "step": 7205 }, { "epoch": 0.2233857974965919, "grad_norm": 2.2723907692630108, "learning_rate": 4.087632900658796e-06, "loss": 0.7242, "step": 7210 }, { "epoch": 0.22354071136448134, "grad_norm": 2.7074124294164794, "learning_rate": 4.086817559193791e-06, "loss": 0.7992, "step": 7215 }, { "epoch": 0.2236956252323708, "grad_norm": 2.616352735115512, "learning_rate": 4.086002217728785e-06, "loss": 0.7167, "step": 7220 }, { "epoch": 0.22385053910026026, "grad_norm": 2.472574867337568, "learning_rate": 4.08518687626378e-06, "loss": 0.6763, "step": 7225 }, { "epoch": 0.22400545296814972, "grad_norm": 2.4730126264521792, "learning_rate": 4.084371534798774e-06, "loss": 0.7567, "step": 7230 }, { "epoch": 0.22416036683603915, "grad_norm": 3.569799076235403, "learning_rate": 4.083556193333768e-06, "loss": 0.7168, "step": 7235 }, { "epoch": 0.22431528070392862, "grad_norm": 4.432887142280187, "learning_rate": 4.082740851868763e-06, "loss": 0.7309, "step": 7240 }, { "epoch": 0.22447019457181808, "grad_norm": 2.706962380175738, "learning_rate": 4.081925510403757e-06, "loss": 0.7408, "step": 7245 }, { "epoch": 0.2246251084397075, "grad_norm": 2.810190581292827, "learning_rate": 4.081110168938752e-06, "loss": 0.7184, "step": 7250 }, { "epoch": 0.22478002230759697, "grad_norm": 2.5441414361143946, "learning_rate": 4.080294827473746e-06, "loss": 0.7551, "step": 7255 }, { "epoch": 0.22493493617548643, "grad_norm": 2.717406021630738, "learning_rate": 4.0794794860087405e-06, "loss": 0.7818, "step": 7260 }, { "epoch": 0.2250898500433759, "grad_norm": 2.120058643697333, "learning_rate": 4.078664144543735e-06, "loss": 0.7558, "step": 7265 }, { "epoch": 0.22524476391126533, "grad_norm": 2.9166856179867175, "learning_rate": 4.0778488030787294e-06, "loss": 0.7804, "step": 7270 }, { "epoch": 0.2253996777791548, "grad_norm": 2.72489277795081, "learning_rate": 4.077033461613724e-06, "loss": 0.7067, "step": 7275 }, { "epoch": 0.22555459164704425, "grad_norm": 2.7327860780259208, "learning_rate": 4.076218120148719e-06, "loss": 0.7637, "step": 7280 }, { "epoch": 0.22570950551493368, "grad_norm": 2.3571887273165637, "learning_rate": 4.075402778683713e-06, "loss": 0.7616, "step": 7285 }, { "epoch": 0.22586441938282315, "grad_norm": 2.2479971805413634, "learning_rate": 4.074587437218708e-06, "loss": 0.7207, "step": 7290 }, { "epoch": 0.2260193332507126, "grad_norm": 4.189136034558924, "learning_rate": 4.073772095753702e-06, "loss": 0.7824, "step": 7295 }, { "epoch": 0.22617424711860207, "grad_norm": 2.7665070052278424, "learning_rate": 4.072956754288697e-06, "loss": 0.763, "step": 7300 }, { "epoch": 0.2263291609864915, "grad_norm": 3.494102987871959, "learning_rate": 4.072141412823691e-06, "loss": 0.7543, "step": 7305 }, { "epoch": 0.22648407485438096, "grad_norm": 2.238736506775003, "learning_rate": 4.071326071358685e-06, "loss": 0.801, "step": 7310 }, { "epoch": 0.22663898872227042, "grad_norm": 4.163694202615216, "learning_rate": 4.0705107298936796e-06, "loss": 0.8556, "step": 7315 }, { "epoch": 0.22679390259015988, "grad_norm": 2.642931566000518, "learning_rate": 4.069695388428674e-06, "loss": 0.7406, "step": 7320 }, { "epoch": 0.22694881645804932, "grad_norm": 4.807726237462002, "learning_rate": 4.0688800469636685e-06, "loss": 0.7504, "step": 7325 }, { "epoch": 0.22710373032593878, "grad_norm": 2.666995099614276, "learning_rate": 4.068064705498663e-06, "loss": 0.7942, "step": 7330 }, { "epoch": 0.22725864419382824, "grad_norm": 2.6084521854016454, "learning_rate": 4.067249364033657e-06, "loss": 0.7862, "step": 7335 }, { "epoch": 0.22741355806171767, "grad_norm": 2.7247415191676643, "learning_rate": 4.066434022568652e-06, "loss": 0.7691, "step": 7340 }, { "epoch": 0.22756847192960714, "grad_norm": 2.4653793303879685, "learning_rate": 4.065618681103646e-06, "loss": 0.8284, "step": 7345 }, { "epoch": 0.2277233857974966, "grad_norm": 3.982721305777081, "learning_rate": 4.064803339638641e-06, "loss": 0.7239, "step": 7350 }, { "epoch": 0.22787829966538606, "grad_norm": 2.694023959730302, "learning_rate": 4.063987998173635e-06, "loss": 0.7781, "step": 7355 }, { "epoch": 0.2280332135332755, "grad_norm": 2.2444471665821797, "learning_rate": 4.06317265670863e-06, "loss": 0.7007, "step": 7360 }, { "epoch": 0.22818812740116495, "grad_norm": 2.706250851967311, "learning_rate": 4.062357315243624e-06, "loss": 0.7833, "step": 7365 }, { "epoch": 0.22834304126905441, "grad_norm": 2.707578515325793, "learning_rate": 4.061541973778619e-06, "loss": 0.7646, "step": 7370 }, { "epoch": 0.22849795513694385, "grad_norm": 2.2682386068533624, "learning_rate": 4.060726632313614e-06, "loss": 0.6765, "step": 7375 }, { "epoch": 0.2286528690048333, "grad_norm": 2.3497721319573683, "learning_rate": 4.0599112908486075e-06, "loss": 0.7597, "step": 7380 }, { "epoch": 0.22880778287272277, "grad_norm": 2.9368963810548125, "learning_rate": 4.059095949383602e-06, "loss": 0.719, "step": 7385 }, { "epoch": 0.22896269674061223, "grad_norm": 2.9019747452326046, "learning_rate": 4.0582806079185965e-06, "loss": 0.7717, "step": 7390 }, { "epoch": 0.22911761060850167, "grad_norm": 2.3952404685524713, "learning_rate": 4.057465266453591e-06, "loss": 0.7812, "step": 7395 }, { "epoch": 0.22927252447639113, "grad_norm": 2.453251265953789, "learning_rate": 4.056649924988585e-06, "loss": 0.7474, "step": 7400 }, { "epoch": 0.2294274383442806, "grad_norm": 2.3928178657419386, "learning_rate": 4.05583458352358e-06, "loss": 0.7732, "step": 7405 }, { "epoch": 0.22958235221217002, "grad_norm": 3.410296769418617, "learning_rate": 4.055019242058574e-06, "loss": 0.9138, "step": 7410 }, { "epoch": 0.22973726608005948, "grad_norm": 3.3359264362602397, "learning_rate": 4.054203900593569e-06, "loss": 0.708, "step": 7415 }, { "epoch": 0.22989217994794894, "grad_norm": 2.75020212019616, "learning_rate": 4.053388559128563e-06, "loss": 0.693, "step": 7420 }, { "epoch": 0.2300470938158384, "grad_norm": 2.6565452072529583, "learning_rate": 4.052573217663558e-06, "loss": 0.7026, "step": 7425 }, { "epoch": 0.23020200768372784, "grad_norm": 3.313793964927385, "learning_rate": 4.051757876198552e-06, "loss": 0.7524, "step": 7430 }, { "epoch": 0.2303569215516173, "grad_norm": 3.0770391770251595, "learning_rate": 4.050942534733547e-06, "loss": 0.709, "step": 7435 }, { "epoch": 0.23051183541950676, "grad_norm": 2.9132267231631603, "learning_rate": 4.050127193268541e-06, "loss": 0.6906, "step": 7440 }, { "epoch": 0.2306667492873962, "grad_norm": 2.8515207416129393, "learning_rate": 4.0493118518035355e-06, "loss": 0.7368, "step": 7445 }, { "epoch": 0.23082166315528566, "grad_norm": 2.8572425192014745, "learning_rate": 4.04849651033853e-06, "loss": 0.7255, "step": 7450 }, { "epoch": 0.23097657702317512, "grad_norm": 2.810423484004944, "learning_rate": 4.0476811688735244e-06, "loss": 0.6785, "step": 7455 }, { "epoch": 0.23113149089106458, "grad_norm": 4.041776415257052, "learning_rate": 4.046865827408519e-06, "loss": 0.7738, "step": 7460 }, { "epoch": 0.231286404758954, "grad_norm": 3.3559143199308075, "learning_rate": 4.046050485943513e-06, "loss": 0.7044, "step": 7465 }, { "epoch": 0.23144131862684347, "grad_norm": 2.396377610971588, "learning_rate": 4.045235144478508e-06, "loss": 0.6795, "step": 7470 }, { "epoch": 0.23159623249473293, "grad_norm": 2.513247077122532, "learning_rate": 4.044419803013502e-06, "loss": 0.6888, "step": 7475 }, { "epoch": 0.2317511463626224, "grad_norm": 2.3896379682825337, "learning_rate": 4.043604461548497e-06, "loss": 0.7408, "step": 7480 }, { "epoch": 0.23190606023051183, "grad_norm": 2.723353865724839, "learning_rate": 4.042789120083491e-06, "loss": 0.7763, "step": 7485 }, { "epoch": 0.2320609740984013, "grad_norm": 2.5537001610890377, "learning_rate": 4.041973778618486e-06, "loss": 0.7218, "step": 7490 }, { "epoch": 0.23221588796629075, "grad_norm": 2.5341387605825894, "learning_rate": 4.04115843715348e-06, "loss": 0.7513, "step": 7495 }, { "epoch": 0.23237080183418019, "grad_norm": 2.672804996529566, "learning_rate": 4.0403430956884746e-06, "loss": 0.7085, "step": 7500 }, { "epoch": 0.23252571570206965, "grad_norm": 2.3810383809268165, "learning_rate": 4.039527754223469e-06, "loss": 0.6892, "step": 7505 }, { "epoch": 0.2326806295699591, "grad_norm": 2.071681503545533, "learning_rate": 4.0387124127584635e-06, "loss": 0.7914, "step": 7510 }, { "epoch": 0.23283554343784857, "grad_norm": 2.681958352616718, "learning_rate": 4.037897071293458e-06, "loss": 0.7157, "step": 7515 }, { "epoch": 0.232990457305738, "grad_norm": 2.452338305585355, "learning_rate": 4.037081729828452e-06, "loss": 0.6808, "step": 7520 }, { "epoch": 0.23314537117362746, "grad_norm": 2.9577952673168295, "learning_rate": 4.036266388363447e-06, "loss": 0.8112, "step": 7525 }, { "epoch": 0.23330028504151692, "grad_norm": 2.171096508588843, "learning_rate": 4.035451046898441e-06, "loss": 0.6885, "step": 7530 }, { "epoch": 0.23345519890940636, "grad_norm": 3.2102593461315827, "learning_rate": 4.034635705433436e-06, "loss": 0.7327, "step": 7535 }, { "epoch": 0.23361011277729582, "grad_norm": 2.5326789984013667, "learning_rate": 4.03382036396843e-06, "loss": 0.7322, "step": 7540 }, { "epoch": 0.23376502664518528, "grad_norm": 2.4886591033005643, "learning_rate": 4.033005022503425e-06, "loss": 0.7779, "step": 7545 }, { "epoch": 0.23391994051307474, "grad_norm": 2.7110163225689172, "learning_rate": 4.032189681038419e-06, "loss": 0.6991, "step": 7550 }, { "epoch": 0.23407485438096418, "grad_norm": 2.107106610884122, "learning_rate": 4.031374339573414e-06, "loss": 0.6913, "step": 7555 }, { "epoch": 0.23422976824885364, "grad_norm": 4.050855137986849, "learning_rate": 4.030558998108408e-06, "loss": 0.7399, "step": 7560 }, { "epoch": 0.2343846821167431, "grad_norm": 4.3075113349835625, "learning_rate": 4.0297436566434025e-06, "loss": 0.7771, "step": 7565 }, { "epoch": 0.23453959598463253, "grad_norm": 3.092694482796232, "learning_rate": 4.028928315178397e-06, "loss": 0.7334, "step": 7570 }, { "epoch": 0.234694509852522, "grad_norm": 2.334607919169634, "learning_rate": 4.0281129737133914e-06, "loss": 0.6794, "step": 7575 }, { "epoch": 0.23484942372041145, "grad_norm": 4.642987572323297, "learning_rate": 4.027297632248386e-06, "loss": 0.738, "step": 7580 }, { "epoch": 0.23500433758830092, "grad_norm": 2.861814188751204, "learning_rate": 4.02648229078338e-06, "loss": 0.7574, "step": 7585 }, { "epoch": 0.23515925145619035, "grad_norm": 3.424499024586531, "learning_rate": 4.025666949318375e-06, "loss": 0.6821, "step": 7590 }, { "epoch": 0.2353141653240798, "grad_norm": 2.335629636675943, "learning_rate": 4.024851607853369e-06, "loss": 0.7395, "step": 7595 }, { "epoch": 0.23546907919196927, "grad_norm": 2.7105600346720724, "learning_rate": 4.024036266388364e-06, "loss": 0.7081, "step": 7600 }, { "epoch": 0.2356239930598587, "grad_norm": 2.5652782202198723, "learning_rate": 4.023220924923358e-06, "loss": 0.735, "step": 7605 }, { "epoch": 0.23577890692774817, "grad_norm": 2.1315554798143883, "learning_rate": 4.022405583458353e-06, "loss": 0.7434, "step": 7610 }, { "epoch": 0.23593382079563763, "grad_norm": 3.411540834150471, "learning_rate": 4.021590241993347e-06, "loss": 0.7869, "step": 7615 }, { "epoch": 0.2360887346635271, "grad_norm": 2.6473257548930706, "learning_rate": 4.020774900528342e-06, "loss": 0.7926, "step": 7620 }, { "epoch": 0.23624364853141652, "grad_norm": 2.112530153318386, "learning_rate": 4.019959559063336e-06, "loss": 0.7372, "step": 7625 }, { "epoch": 0.23639856239930598, "grad_norm": 2.5345829909541147, "learning_rate": 4.0191442175983305e-06, "loss": 0.7709, "step": 7630 }, { "epoch": 0.23655347626719544, "grad_norm": 2.3725946040146892, "learning_rate": 4.018328876133325e-06, "loss": 0.6475, "step": 7635 }, { "epoch": 0.2367083901350849, "grad_norm": 2.6463775257990076, "learning_rate": 4.017513534668319e-06, "loss": 0.6783, "step": 7640 }, { "epoch": 0.23686330400297434, "grad_norm": 2.296973876115891, "learning_rate": 4.016698193203314e-06, "loss": 0.7135, "step": 7645 }, { "epoch": 0.2370182178708638, "grad_norm": 2.9032072123588555, "learning_rate": 4.015882851738308e-06, "loss": 0.7728, "step": 7650 }, { "epoch": 0.23717313173875326, "grad_norm": 2.582472843719539, "learning_rate": 4.015067510273303e-06, "loss": 0.7135, "step": 7655 }, { "epoch": 0.2373280456066427, "grad_norm": 2.9351248333974502, "learning_rate": 4.014252168808297e-06, "loss": 0.7271, "step": 7660 }, { "epoch": 0.23748295947453216, "grad_norm": 2.2858536379146295, "learning_rate": 4.013436827343292e-06, "loss": 0.742, "step": 7665 }, { "epoch": 0.23763787334242162, "grad_norm": 4.072714585878023, "learning_rate": 4.012621485878286e-06, "loss": 0.7027, "step": 7670 }, { "epoch": 0.23779278721031108, "grad_norm": 2.7428436077497294, "learning_rate": 4.011806144413281e-06, "loss": 0.7504, "step": 7675 }, { "epoch": 0.2379477010782005, "grad_norm": 3.171061912699437, "learning_rate": 4.010990802948275e-06, "loss": 0.7452, "step": 7680 }, { "epoch": 0.23810261494608997, "grad_norm": 2.548717667216393, "learning_rate": 4.0101754614832695e-06, "loss": 0.7984, "step": 7685 }, { "epoch": 0.23825752881397944, "grad_norm": 2.513335372323551, "learning_rate": 4.009360120018264e-06, "loss": 0.7679, "step": 7690 }, { "epoch": 0.23841244268186887, "grad_norm": 2.5296274615357777, "learning_rate": 4.0085447785532585e-06, "loss": 0.7445, "step": 7695 }, { "epoch": 0.23856735654975833, "grad_norm": 3.508848484573184, "learning_rate": 4.007729437088253e-06, "loss": 0.7235, "step": 7700 }, { "epoch": 0.2387222704176478, "grad_norm": 2.875350763615326, "learning_rate": 4.006914095623247e-06, "loss": 0.853, "step": 7705 }, { "epoch": 0.23887718428553725, "grad_norm": 2.2801868601635653, "learning_rate": 4.006098754158242e-06, "loss": 0.7731, "step": 7710 }, { "epoch": 0.2390320981534267, "grad_norm": 2.284263666242802, "learning_rate": 4.005283412693236e-06, "loss": 0.7062, "step": 7715 }, { "epoch": 0.23918701202131615, "grad_norm": 2.936956611404008, "learning_rate": 4.004468071228231e-06, "loss": 0.8258, "step": 7720 }, { "epoch": 0.2393419258892056, "grad_norm": 3.026978599548529, "learning_rate": 4.003652729763225e-06, "loss": 0.8047, "step": 7725 }, { "epoch": 0.23949683975709504, "grad_norm": 6.328605420428572, "learning_rate": 4.00283738829822e-06, "loss": 0.8102, "step": 7730 }, { "epoch": 0.2396517536249845, "grad_norm": 2.536815687261922, "learning_rate": 4.002022046833214e-06, "loss": 0.8045, "step": 7735 }, { "epoch": 0.23980666749287396, "grad_norm": 3.5972466198719255, "learning_rate": 4.001206705368209e-06, "loss": 0.7509, "step": 7740 }, { "epoch": 0.23996158136076343, "grad_norm": 3.266879084195895, "learning_rate": 4.000391363903203e-06, "loss": 0.785, "step": 7745 }, { "epoch": 0.24011649522865286, "grad_norm": 2.6934958981125154, "learning_rate": 3.9995760224381975e-06, "loss": 0.7097, "step": 7750 }, { "epoch": 0.24027140909654232, "grad_norm": 2.5625401858354286, "learning_rate": 3.998760680973192e-06, "loss": 0.7076, "step": 7755 }, { "epoch": 0.24042632296443178, "grad_norm": 3.844543160261918, "learning_rate": 3.9979453395081864e-06, "loss": 0.8164, "step": 7760 }, { "epoch": 0.24058123683232122, "grad_norm": 2.4546654543586013, "learning_rate": 3.997129998043181e-06, "loss": 0.7686, "step": 7765 }, { "epoch": 0.24073615070021068, "grad_norm": 2.620831182010525, "learning_rate": 3.996314656578175e-06, "loss": 0.7627, "step": 7770 }, { "epoch": 0.24089106456810014, "grad_norm": 3.607482299824993, "learning_rate": 3.99549931511317e-06, "loss": 0.7624, "step": 7775 }, { "epoch": 0.2410459784359896, "grad_norm": 2.3952585820592946, "learning_rate": 3.994683973648164e-06, "loss": 0.7495, "step": 7780 }, { "epoch": 0.24120089230387903, "grad_norm": 3.302452808961849, "learning_rate": 3.993868632183159e-06, "loss": 0.7099, "step": 7785 }, { "epoch": 0.2413558061717685, "grad_norm": 2.2005533462328133, "learning_rate": 3.993053290718153e-06, "loss": 0.7805, "step": 7790 }, { "epoch": 0.24151072003965796, "grad_norm": 3.9664339747949056, "learning_rate": 3.992237949253148e-06, "loss": 0.7773, "step": 7795 }, { "epoch": 0.24166563390754742, "grad_norm": 3.255232276667875, "learning_rate": 3.991422607788142e-06, "loss": 0.7344, "step": 7800 }, { "epoch": 0.24182054777543685, "grad_norm": 3.138475359689449, "learning_rate": 3.9906072663231366e-06, "loss": 0.7541, "step": 7805 }, { "epoch": 0.2419754616433263, "grad_norm": 3.327632916779526, "learning_rate": 3.989791924858131e-06, "loss": 0.736, "step": 7810 }, { "epoch": 0.24213037551121577, "grad_norm": 2.9935095664766798, "learning_rate": 3.9889765833931255e-06, "loss": 0.7454, "step": 7815 }, { "epoch": 0.2422852893791052, "grad_norm": 3.4353282558957523, "learning_rate": 3.98816124192812e-06, "loss": 0.7716, "step": 7820 }, { "epoch": 0.24244020324699467, "grad_norm": 2.2566961489695525, "learning_rate": 3.987345900463114e-06, "loss": 0.689, "step": 7825 }, { "epoch": 0.24259511711488413, "grad_norm": 2.179628962169955, "learning_rate": 3.986530558998109e-06, "loss": 0.7583, "step": 7830 }, { "epoch": 0.2427500309827736, "grad_norm": 4.8187730219848515, "learning_rate": 3.985715217533103e-06, "loss": 0.873, "step": 7835 }, { "epoch": 0.24290494485066302, "grad_norm": 2.0612360293543674, "learning_rate": 3.984899876068098e-06, "loss": 0.7143, "step": 7840 }, { "epoch": 0.24305985871855249, "grad_norm": 2.830777414023961, "learning_rate": 3.984084534603092e-06, "loss": 0.6776, "step": 7845 }, { "epoch": 0.24321477258644195, "grad_norm": 2.4577209488704144, "learning_rate": 3.983269193138087e-06, "loss": 0.6934, "step": 7850 }, { "epoch": 0.24336968645433138, "grad_norm": 2.6034502058120252, "learning_rate": 3.982453851673081e-06, "loss": 0.7816, "step": 7855 }, { "epoch": 0.24352460032222084, "grad_norm": 6.029308262300204, "learning_rate": 3.981638510208076e-06, "loss": 0.7755, "step": 7860 }, { "epoch": 0.2436795141901103, "grad_norm": 3.122702315577363, "learning_rate": 3.98082316874307e-06, "loss": 0.7476, "step": 7865 }, { "epoch": 0.24383442805799976, "grad_norm": 2.580240828818631, "learning_rate": 3.9800078272780645e-06, "loss": 0.6761, "step": 7870 }, { "epoch": 0.2439893419258892, "grad_norm": 2.8589171490856042, "learning_rate": 3.979192485813059e-06, "loss": 0.6023, "step": 7875 }, { "epoch": 0.24414425579377866, "grad_norm": 2.455061119300849, "learning_rate": 3.9783771443480535e-06, "loss": 0.7105, "step": 7880 }, { "epoch": 0.24429916966166812, "grad_norm": 2.9967078237521214, "learning_rate": 3.977561802883048e-06, "loss": 0.746, "step": 7885 }, { "epoch": 0.24445408352955755, "grad_norm": 3.07393536301856, "learning_rate": 3.976746461418042e-06, "loss": 0.7253, "step": 7890 }, { "epoch": 0.24460899739744701, "grad_norm": 2.681288974796031, "learning_rate": 3.975931119953037e-06, "loss": 0.7062, "step": 7895 }, { "epoch": 0.24476391126533648, "grad_norm": 2.967699774986546, "learning_rate": 3.975115778488031e-06, "loss": 0.6943, "step": 7900 }, { "epoch": 0.24491882513322594, "grad_norm": 2.4390247252465866, "learning_rate": 3.974300437023026e-06, "loss": 0.7192, "step": 7905 }, { "epoch": 0.24507373900111537, "grad_norm": 2.9557779792173955, "learning_rate": 3.97348509555802e-06, "loss": 0.7979, "step": 7910 }, { "epoch": 0.24522865286900483, "grad_norm": 2.5802143090729666, "learning_rate": 3.972669754093015e-06, "loss": 0.7419, "step": 7915 }, { "epoch": 0.2453835667368943, "grad_norm": 2.4180124419203666, "learning_rate": 3.971854412628009e-06, "loss": 0.7437, "step": 7920 }, { "epoch": 0.24553848060478373, "grad_norm": 2.5351972202431408, "learning_rate": 3.971039071163003e-06, "loss": 0.7419, "step": 7925 }, { "epoch": 0.2456933944726732, "grad_norm": 2.9329452611852327, "learning_rate": 3.970223729697998e-06, "loss": 0.7093, "step": 7930 }, { "epoch": 0.24584830834056265, "grad_norm": 3.278438282730836, "learning_rate": 3.9694083882329925e-06, "loss": 0.7688, "step": 7935 }, { "epoch": 0.2460032222084521, "grad_norm": 2.7612671313730224, "learning_rate": 3.968593046767987e-06, "loss": 0.5866, "step": 7940 }, { "epoch": 0.24615813607634154, "grad_norm": 2.5160931073056263, "learning_rate": 3.967777705302981e-06, "loss": 0.7269, "step": 7945 }, { "epoch": 0.246313049944231, "grad_norm": 2.3314695831822676, "learning_rate": 3.966962363837976e-06, "loss": 0.8022, "step": 7950 }, { "epoch": 0.24646796381212047, "grad_norm": 2.7115724767654004, "learning_rate": 3.96614702237297e-06, "loss": 0.7154, "step": 7955 }, { "epoch": 0.24662287768000993, "grad_norm": 2.8805169513003968, "learning_rate": 3.965331680907965e-06, "loss": 0.7718, "step": 7960 }, { "epoch": 0.24677779154789936, "grad_norm": 3.0057043423865384, "learning_rate": 3.964516339442959e-06, "loss": 0.7435, "step": 7965 }, { "epoch": 0.24693270541578882, "grad_norm": 2.5316803541084427, "learning_rate": 3.963700997977954e-06, "loss": 0.6566, "step": 7970 }, { "epoch": 0.24708761928367828, "grad_norm": 2.7587535753603594, "learning_rate": 3.962885656512948e-06, "loss": 0.7297, "step": 7975 }, { "epoch": 0.24724253315156772, "grad_norm": 2.479613354259577, "learning_rate": 3.962070315047943e-06, "loss": 0.8003, "step": 7980 }, { "epoch": 0.24739744701945718, "grad_norm": 3.282700509974491, "learning_rate": 3.961254973582937e-06, "loss": 0.7799, "step": 7985 }, { "epoch": 0.24755236088734664, "grad_norm": 2.8901988235667995, "learning_rate": 3.9604396321179316e-06, "loss": 0.6801, "step": 7990 }, { "epoch": 0.2477072747552361, "grad_norm": 3.182302699395792, "learning_rate": 3.959624290652926e-06, "loss": 0.8007, "step": 7995 }, { "epoch": 0.24786218862312553, "grad_norm": 2.7572442315667467, "learning_rate": 3.95880894918792e-06, "loss": 0.721, "step": 8000 }, { "epoch": 0.248017102491015, "grad_norm": 2.750509678464447, "learning_rate": 3.957993607722915e-06, "loss": 0.7516, "step": 8005 }, { "epoch": 0.24817201635890446, "grad_norm": 3.8222164312594864, "learning_rate": 3.9571782662579085e-06, "loss": 0.6844, "step": 8010 }, { "epoch": 0.2483269302267939, "grad_norm": 2.6883504690580313, "learning_rate": 3.956362924792904e-06, "loss": 0.7967, "step": 8015 }, { "epoch": 0.24848184409468335, "grad_norm": 1.99561971239162, "learning_rate": 3.9555475833278975e-06, "loss": 0.6835, "step": 8020 }, { "epoch": 0.2486367579625728, "grad_norm": 2.498720144769522, "learning_rate": 3.954732241862893e-06, "loss": 0.7188, "step": 8025 }, { "epoch": 0.24879167183046227, "grad_norm": 4.949770564286487, "learning_rate": 3.953916900397887e-06, "loss": 0.7906, "step": 8030 }, { "epoch": 0.2489465856983517, "grad_norm": 5.238830286802781, "learning_rate": 3.953101558932882e-06, "loss": 0.7364, "step": 8035 }, { "epoch": 0.24910149956624117, "grad_norm": 4.018480154494701, "learning_rate": 3.952286217467876e-06, "loss": 0.7661, "step": 8040 }, { "epoch": 0.24925641343413063, "grad_norm": 4.057530653069806, "learning_rate": 3.951470876002871e-06, "loss": 0.7767, "step": 8045 }, { "epoch": 0.24941132730202006, "grad_norm": 3.83184182629974, "learning_rate": 3.950655534537865e-06, "loss": 0.6582, "step": 8050 }, { "epoch": 0.24956624116990953, "grad_norm": 2.843115030409565, "learning_rate": 3.9498401930728595e-06, "loss": 0.7238, "step": 8055 }, { "epoch": 0.249721155037799, "grad_norm": 2.708758356333351, "learning_rate": 3.949024851607854e-06, "loss": 0.7302, "step": 8060 }, { "epoch": 0.24987606890568845, "grad_norm": 2.353215208034813, "learning_rate": 3.9482095101428484e-06, "loss": 0.7385, "step": 8065 }, { "epoch": 0.2500309827735779, "grad_norm": 2.999469913367751, "learning_rate": 3.947394168677843e-06, "loss": 0.7604, "step": 8070 }, { "epoch": 0.25018589664146734, "grad_norm": 2.648416229000828, "learning_rate": 3.9465788272128365e-06, "loss": 0.7279, "step": 8075 }, { "epoch": 0.2503408105093568, "grad_norm": 2.6456418574680383, "learning_rate": 3.945763485747832e-06, "loss": 0.6885, "step": 8080 }, { "epoch": 0.25049572437724626, "grad_norm": 2.6466653004471996, "learning_rate": 3.9449481442828254e-06, "loss": 0.7053, "step": 8085 }, { "epoch": 0.2506506382451357, "grad_norm": 4.843997115802743, "learning_rate": 3.944132802817821e-06, "loss": 0.7166, "step": 8090 }, { "epoch": 0.25080555211302513, "grad_norm": 2.5558818629708124, "learning_rate": 3.943317461352814e-06, "loss": 0.7386, "step": 8095 }, { "epoch": 0.2509604659809146, "grad_norm": 2.8651723616451563, "learning_rate": 3.94250211988781e-06, "loss": 0.7217, "step": 8100 }, { "epoch": 0.25111537984880405, "grad_norm": 2.8055757832302546, "learning_rate": 3.941686778422803e-06, "loss": 0.7654, "step": 8105 }, { "epoch": 0.2512702937166935, "grad_norm": 2.4140494109181434, "learning_rate": 3.9408714369577986e-06, "loss": 0.7849, "step": 8110 }, { "epoch": 0.251425207584583, "grad_norm": 2.8758495174993883, "learning_rate": 3.940056095492792e-06, "loss": 0.8023, "step": 8115 }, { "epoch": 0.25158012145247244, "grad_norm": 2.20680404280677, "learning_rate": 3.9392407540277875e-06, "loss": 0.7618, "step": 8120 }, { "epoch": 0.2517350353203619, "grad_norm": 2.346956364943166, "learning_rate": 3.938425412562782e-06, "loss": 0.7175, "step": 8125 }, { "epoch": 0.2518899491882513, "grad_norm": 3.2565178418751763, "learning_rate": 3.937610071097776e-06, "loss": 0.625, "step": 8130 }, { "epoch": 0.25204486305614077, "grad_norm": 2.416214783019695, "learning_rate": 3.936794729632771e-06, "loss": 0.7299, "step": 8135 }, { "epoch": 0.25219977692403023, "grad_norm": 2.6890803232778198, "learning_rate": 3.935979388167765e-06, "loss": 0.7057, "step": 8140 }, { "epoch": 0.2523546907919197, "grad_norm": 2.8556395146927187, "learning_rate": 3.93516404670276e-06, "loss": 0.7361, "step": 8145 }, { "epoch": 0.25250960465980915, "grad_norm": 2.6425032888222524, "learning_rate": 3.934348705237753e-06, "loss": 0.7046, "step": 8150 }, { "epoch": 0.2526645185276986, "grad_norm": 2.9258244506733972, "learning_rate": 3.933533363772749e-06, "loss": 0.8152, "step": 8155 }, { "epoch": 0.2528194323955881, "grad_norm": 3.5404568306307245, "learning_rate": 3.932718022307742e-06, "loss": 0.6939, "step": 8160 }, { "epoch": 0.25297434626347753, "grad_norm": 2.779354485103514, "learning_rate": 3.931902680842738e-06, "loss": 0.6826, "step": 8165 }, { "epoch": 0.25312926013136694, "grad_norm": 3.1985504947630683, "learning_rate": 3.931087339377731e-06, "loss": 0.8384, "step": 8170 }, { "epoch": 0.2532841739992564, "grad_norm": 2.601713706563802, "learning_rate": 3.9302719979127265e-06, "loss": 0.6856, "step": 8175 }, { "epoch": 0.25343908786714586, "grad_norm": 2.5066783040241463, "learning_rate": 3.92945665644772e-06, "loss": 0.7599, "step": 8180 }, { "epoch": 0.2535940017350353, "grad_norm": 3.399072344755018, "learning_rate": 3.9286413149827155e-06, "loss": 0.7214, "step": 8185 }, { "epoch": 0.2537489156029248, "grad_norm": 2.591087489987889, "learning_rate": 3.927825973517709e-06, "loss": 0.7521, "step": 8190 }, { "epoch": 0.25390382947081425, "grad_norm": 2.688650475120889, "learning_rate": 3.927010632052704e-06, "loss": 0.7445, "step": 8195 }, { "epoch": 0.2540587433387037, "grad_norm": 2.839667507129447, "learning_rate": 3.926195290587698e-06, "loss": 0.6805, "step": 8200 }, { "epoch": 0.2542136572065931, "grad_norm": 3.1496209179882757, "learning_rate": 3.925379949122693e-06, "loss": 0.7593, "step": 8205 }, { "epoch": 0.2543685710744826, "grad_norm": 2.9293644437824065, "learning_rate": 3.924564607657687e-06, "loss": 0.7465, "step": 8210 }, { "epoch": 0.25452348494237204, "grad_norm": 3.0952914285706337, "learning_rate": 3.923749266192682e-06, "loss": 0.7095, "step": 8215 }, { "epoch": 0.2546783988102615, "grad_norm": 2.6099142564576576, "learning_rate": 3.922933924727677e-06, "loss": 0.7415, "step": 8220 }, { "epoch": 0.25483331267815096, "grad_norm": 2.4494034041956643, "learning_rate": 3.92211858326267e-06, "loss": 0.6224, "step": 8225 }, { "epoch": 0.2549882265460404, "grad_norm": 2.568695506060627, "learning_rate": 3.921303241797666e-06, "loss": 0.7048, "step": 8230 }, { "epoch": 0.2551431404139299, "grad_norm": 4.235473930805889, "learning_rate": 3.920487900332659e-06, "loss": 0.7283, "step": 8235 }, { "epoch": 0.2552980542818193, "grad_norm": 2.6729854910559285, "learning_rate": 3.9196725588676545e-06, "loss": 0.8002, "step": 8240 }, { "epoch": 0.25545296814970875, "grad_norm": 2.8663210839253077, "learning_rate": 3.918857217402648e-06, "loss": 0.7334, "step": 8245 }, { "epoch": 0.2556078820175982, "grad_norm": 2.607497939801493, "learning_rate": 3.9180418759376434e-06, "loss": 0.8, "step": 8250 }, { "epoch": 0.25576279588548767, "grad_norm": 4.064025046148329, "learning_rate": 3.917226534472637e-06, "loss": 0.7411, "step": 8255 }, { "epoch": 0.25591770975337713, "grad_norm": 2.7112539297659675, "learning_rate": 3.916411193007632e-06, "loss": 0.7246, "step": 8260 }, { "epoch": 0.2560726236212666, "grad_norm": 2.1800402164975106, "learning_rate": 3.915595851542626e-06, "loss": 0.6676, "step": 8265 }, { "epoch": 0.25622753748915605, "grad_norm": 2.628442335617829, "learning_rate": 3.914780510077621e-06, "loss": 0.7489, "step": 8270 }, { "epoch": 0.25638245135704546, "grad_norm": 2.5383416873283817, "learning_rate": 3.913965168612615e-06, "loss": 0.7937, "step": 8275 }, { "epoch": 0.2565373652249349, "grad_norm": 2.4857246330922846, "learning_rate": 3.91314982714761e-06, "loss": 0.713, "step": 8280 }, { "epoch": 0.2566922790928244, "grad_norm": 3.364117703950874, "learning_rate": 3.912334485682604e-06, "loss": 0.709, "step": 8285 }, { "epoch": 0.25684719296071384, "grad_norm": 2.9748991011777286, "learning_rate": 3.911519144217599e-06, "loss": 0.7258, "step": 8290 }, { "epoch": 0.2570021068286033, "grad_norm": 2.7105047312259756, "learning_rate": 3.910703802752593e-06, "loss": 0.7083, "step": 8295 }, { "epoch": 0.25715702069649277, "grad_norm": 2.7683782655510245, "learning_rate": 3.909888461287587e-06, "loss": 0.7421, "step": 8300 }, { "epoch": 0.2573119345643822, "grad_norm": 2.9411908855144238, "learning_rate": 3.909073119822582e-06, "loss": 0.7769, "step": 8305 }, { "epoch": 0.25746684843227163, "grad_norm": 2.730648676145683, "learning_rate": 3.908257778357576e-06, "loss": 0.7084, "step": 8310 }, { "epoch": 0.2576217623001611, "grad_norm": 2.2393460222475277, "learning_rate": 3.907442436892571e-06, "loss": 0.7429, "step": 8315 }, { "epoch": 0.25777667616805056, "grad_norm": 2.5045746902849295, "learning_rate": 3.906627095427565e-06, "loss": 0.7499, "step": 8320 }, { "epoch": 0.25793159003594, "grad_norm": 2.5482792428968772, "learning_rate": 3.90581175396256e-06, "loss": 0.7457, "step": 8325 }, { "epoch": 0.2580865039038295, "grad_norm": 2.3100777309939944, "learning_rate": 3.904996412497554e-06, "loss": 0.8024, "step": 8330 }, { "epoch": 0.25824141777171894, "grad_norm": 3.613378145102757, "learning_rate": 3.904181071032549e-06, "loss": 0.7581, "step": 8335 }, { "epoch": 0.2583963316396084, "grad_norm": 2.8543703485985152, "learning_rate": 3.903365729567543e-06, "loss": 0.8086, "step": 8340 }, { "epoch": 0.2585512455074978, "grad_norm": 2.7735737521681063, "learning_rate": 3.902550388102538e-06, "loss": 0.8521, "step": 8345 }, { "epoch": 0.25870615937538727, "grad_norm": 4.627360714181205, "learning_rate": 3.901735046637532e-06, "loss": 0.7657, "step": 8350 }, { "epoch": 0.25886107324327673, "grad_norm": 2.6621095577892366, "learning_rate": 3.900919705172527e-06, "loss": 0.762, "step": 8355 }, { "epoch": 0.2590159871111662, "grad_norm": 3.6844429912499357, "learning_rate": 3.900104363707521e-06, "loss": 0.7905, "step": 8360 }, { "epoch": 0.25917090097905565, "grad_norm": 2.389385438650573, "learning_rate": 3.899289022242516e-06, "loss": 0.698, "step": 8365 }, { "epoch": 0.2593258148469451, "grad_norm": 2.9104158780826843, "learning_rate": 3.89847368077751e-06, "loss": 0.8119, "step": 8370 }, { "epoch": 0.2594807287148346, "grad_norm": 4.25522622385487, "learning_rate": 3.897658339312504e-06, "loss": 0.767, "step": 8375 }, { "epoch": 0.259635642582724, "grad_norm": 2.931529382797452, "learning_rate": 3.8968429978474985e-06, "loss": 0.6928, "step": 8380 }, { "epoch": 0.25979055645061344, "grad_norm": 5.031143490634951, "learning_rate": 3.896027656382493e-06, "loss": 0.7651, "step": 8385 }, { "epoch": 0.2599454703185029, "grad_norm": 2.627995561311182, "learning_rate": 3.8952123149174874e-06, "loss": 0.7408, "step": 8390 }, { "epoch": 0.26010038418639236, "grad_norm": 2.8811151951636176, "learning_rate": 3.894396973452482e-06, "loss": 0.7208, "step": 8395 }, { "epoch": 0.2602552980542818, "grad_norm": 2.85157263508092, "learning_rate": 3.893581631987476e-06, "loss": 0.7243, "step": 8400 }, { "epoch": 0.2604102119221713, "grad_norm": 2.497522535765262, "learning_rate": 3.892766290522471e-06, "loss": 0.7681, "step": 8405 }, { "epoch": 0.26056512579006075, "grad_norm": 2.5150568274004423, "learning_rate": 3.891950949057466e-06, "loss": 0.7043, "step": 8410 }, { "epoch": 0.26072003965795015, "grad_norm": 3.1765075235169378, "learning_rate": 3.89113560759246e-06, "loss": 0.6388, "step": 8415 }, { "epoch": 0.2608749535258396, "grad_norm": 2.835005852972779, "learning_rate": 3.890320266127455e-06, "loss": 0.7578, "step": 8420 }, { "epoch": 0.2610298673937291, "grad_norm": 2.695719970082287, "learning_rate": 3.889504924662449e-06, "loss": 0.738, "step": 8425 }, { "epoch": 0.26118478126161854, "grad_norm": 2.7518793943050937, "learning_rate": 3.888689583197444e-06, "loss": 0.7855, "step": 8430 }, { "epoch": 0.261339695129508, "grad_norm": 3.2347758707393233, "learning_rate": 3.8878742417324376e-06, "loss": 0.7099, "step": 8435 }, { "epoch": 0.26149460899739746, "grad_norm": 2.7351889788631083, "learning_rate": 3.887058900267433e-06, "loss": 0.6903, "step": 8440 }, { "epoch": 0.2616495228652869, "grad_norm": 3.027799539107644, "learning_rate": 3.8862435588024265e-06, "loss": 0.6701, "step": 8445 }, { "epoch": 0.2618044367331763, "grad_norm": 2.8434878747154486, "learning_rate": 3.885428217337422e-06, "loss": 0.6643, "step": 8450 }, { "epoch": 0.2619593506010658, "grad_norm": 2.932449727160189, "learning_rate": 3.884612875872415e-06, "loss": 0.8431, "step": 8455 }, { "epoch": 0.26211426446895525, "grad_norm": 2.5907738875129724, "learning_rate": 3.88379753440741e-06, "loss": 0.8448, "step": 8460 }, { "epoch": 0.2622691783368447, "grad_norm": 3.68224670928995, "learning_rate": 3.882982192942404e-06, "loss": 0.773, "step": 8465 }, { "epoch": 0.26242409220473417, "grad_norm": 3.2936957775575237, "learning_rate": 3.882166851477399e-06, "loss": 0.806, "step": 8470 }, { "epoch": 0.26257900607262363, "grad_norm": 3.305542742039858, "learning_rate": 3.881351510012393e-06, "loss": 0.7376, "step": 8475 }, { "epoch": 0.2627339199405131, "grad_norm": 2.4381394563911067, "learning_rate": 3.880536168547388e-06, "loss": 0.7173, "step": 8480 }, { "epoch": 0.26288883380840256, "grad_norm": 3.1535050393824458, "learning_rate": 3.879720827082382e-06, "loss": 0.7533, "step": 8485 }, { "epoch": 0.26304374767629196, "grad_norm": 3.0767697964259764, "learning_rate": 3.878905485617377e-06, "loss": 0.727, "step": 8490 }, { "epoch": 0.2631986615441814, "grad_norm": 3.101380124552947, "learning_rate": 3.878090144152371e-06, "loss": 0.7165, "step": 8495 }, { "epoch": 0.2633535754120709, "grad_norm": 3.7361422347917164, "learning_rate": 3.8772748026873655e-06, "loss": 0.8027, "step": 8500 }, { "epoch": 0.26350848927996035, "grad_norm": 2.778933510362988, "learning_rate": 3.876459461222361e-06, "loss": 0.769, "step": 8505 }, { "epoch": 0.2636634031478498, "grad_norm": 3.375550933875312, "learning_rate": 3.8756441197573545e-06, "loss": 0.7972, "step": 8510 }, { "epoch": 0.26381831701573927, "grad_norm": 2.3694027798546924, "learning_rate": 3.87482877829235e-06, "loss": 0.64, "step": 8515 }, { "epoch": 0.26397323088362873, "grad_norm": 2.63306796382953, "learning_rate": 3.874013436827343e-06, "loss": 0.7082, "step": 8520 }, { "epoch": 0.26412814475151813, "grad_norm": 2.4341735446262263, "learning_rate": 3.873198095362339e-06, "loss": 0.7437, "step": 8525 }, { "epoch": 0.2642830586194076, "grad_norm": 4.052453151884556, "learning_rate": 3.872382753897332e-06, "loss": 0.7179, "step": 8530 }, { "epoch": 0.26443797248729706, "grad_norm": 2.8964006911060802, "learning_rate": 3.871567412432327e-06, "loss": 0.7438, "step": 8535 }, { "epoch": 0.2645928863551865, "grad_norm": 3.2783411380779035, "learning_rate": 3.870752070967321e-06, "loss": 0.7917, "step": 8540 }, { "epoch": 0.264747800223076, "grad_norm": 2.9616986334027007, "learning_rate": 3.869936729502316e-06, "loss": 0.686, "step": 8545 }, { "epoch": 0.26490271409096544, "grad_norm": 2.99850562549522, "learning_rate": 3.86912138803731e-06, "loss": 0.8111, "step": 8550 }, { "epoch": 0.2650576279588549, "grad_norm": 2.482624250411589, "learning_rate": 3.868306046572305e-06, "loss": 0.7096, "step": 8555 }, { "epoch": 0.2652125418267443, "grad_norm": 3.6112164665371345, "learning_rate": 3.867490705107299e-06, "loss": 0.8372, "step": 8560 }, { "epoch": 0.26536745569463377, "grad_norm": 2.704250193265035, "learning_rate": 3.8666753636422935e-06, "loss": 0.8387, "step": 8565 }, { "epoch": 0.26552236956252323, "grad_norm": 2.889598048908403, "learning_rate": 3.865860022177288e-06, "loss": 0.7386, "step": 8570 }, { "epoch": 0.2656772834304127, "grad_norm": 3.2589057313946808, "learning_rate": 3.865044680712282e-06, "loss": 0.753, "step": 8575 }, { "epoch": 0.26583219729830215, "grad_norm": 2.4051040499011105, "learning_rate": 3.864229339247277e-06, "loss": 0.6798, "step": 8580 }, { "epoch": 0.2659871111661916, "grad_norm": 3.1003904148789516, "learning_rate": 3.863413997782271e-06, "loss": 0.7, "step": 8585 }, { "epoch": 0.2661420250340811, "grad_norm": 2.774709927176312, "learning_rate": 3.862598656317266e-06, "loss": 0.716, "step": 8590 }, { "epoch": 0.2662969389019705, "grad_norm": 2.904486097159884, "learning_rate": 3.86178331485226e-06, "loss": 0.7777, "step": 8595 }, { "epoch": 0.26645185276985994, "grad_norm": 3.62398105330797, "learning_rate": 3.8609679733872556e-06, "loss": 0.789, "step": 8600 }, { "epoch": 0.2666067666377494, "grad_norm": 2.9843526659844275, "learning_rate": 3.860152631922249e-06, "loss": 0.7258, "step": 8605 }, { "epoch": 0.26676168050563887, "grad_norm": 3.110544896948684, "learning_rate": 3.859337290457244e-06, "loss": 0.8024, "step": 8610 }, { "epoch": 0.2669165943735283, "grad_norm": 3.983772133139551, "learning_rate": 3.858521948992238e-06, "loss": 0.7326, "step": 8615 }, { "epoch": 0.2670715082414178, "grad_norm": 2.6227542031760374, "learning_rate": 3.8577066075272326e-06, "loss": 0.7442, "step": 8620 }, { "epoch": 0.26722642210930725, "grad_norm": 4.210967936825249, "learning_rate": 3.856891266062227e-06, "loss": 0.7735, "step": 8625 }, { "epoch": 0.26738133597719665, "grad_norm": 3.40312224578618, "learning_rate": 3.8560759245972215e-06, "loss": 0.7748, "step": 8630 }, { "epoch": 0.2675362498450861, "grad_norm": 2.7823652176712654, "learning_rate": 3.855260583132216e-06, "loss": 0.7739, "step": 8635 }, { "epoch": 0.2676911637129756, "grad_norm": 3.0802451136137434, "learning_rate": 3.85444524166721e-06, "loss": 0.6927, "step": 8640 }, { "epoch": 0.26784607758086504, "grad_norm": 3.3305493884905957, "learning_rate": 3.853629900202205e-06, "loss": 0.6938, "step": 8645 }, { "epoch": 0.2680009914487545, "grad_norm": 3.545604690513145, "learning_rate": 3.852814558737199e-06, "loss": 0.7583, "step": 8650 }, { "epoch": 0.26815590531664396, "grad_norm": 2.9941471284144683, "learning_rate": 3.851999217272194e-06, "loss": 0.8157, "step": 8655 }, { "epoch": 0.2683108191845334, "grad_norm": 2.3430317397001628, "learning_rate": 3.851183875807188e-06, "loss": 0.7778, "step": 8660 }, { "epoch": 0.26846573305242283, "grad_norm": 4.189537119169044, "learning_rate": 3.850368534342183e-06, "loss": 0.8284, "step": 8665 }, { "epoch": 0.2686206469203123, "grad_norm": 2.212346180194587, "learning_rate": 3.849553192877177e-06, "loss": 0.6372, "step": 8670 }, { "epoch": 0.26877556078820175, "grad_norm": 2.2773515458949065, "learning_rate": 3.848737851412172e-06, "loss": 0.7046, "step": 8675 }, { "epoch": 0.2689304746560912, "grad_norm": 2.796636955140069, "learning_rate": 3.847922509947166e-06, "loss": 0.8186, "step": 8680 }, { "epoch": 0.2690853885239807, "grad_norm": 2.9775753530505362, "learning_rate": 3.8471071684821605e-06, "loss": 0.7136, "step": 8685 }, { "epoch": 0.26924030239187013, "grad_norm": 2.5382233684333384, "learning_rate": 3.846291827017155e-06, "loss": 0.7435, "step": 8690 }, { "epoch": 0.2693952162597596, "grad_norm": 2.6548191718576937, "learning_rate": 3.8454764855521494e-06, "loss": 0.7347, "step": 8695 }, { "epoch": 0.269550130127649, "grad_norm": 3.537602281899176, "learning_rate": 3.844661144087144e-06, "loss": 0.8142, "step": 8700 }, { "epoch": 0.26970504399553846, "grad_norm": 2.2922070719124634, "learning_rate": 3.843845802622138e-06, "loss": 0.7071, "step": 8705 }, { "epoch": 0.2698599578634279, "grad_norm": 3.0981040187010556, "learning_rate": 3.843030461157133e-06, "loss": 0.7214, "step": 8710 }, { "epoch": 0.2700148717313174, "grad_norm": 2.5175243836342163, "learning_rate": 3.842215119692127e-06, "loss": 0.8023, "step": 8715 }, { "epoch": 0.27016978559920685, "grad_norm": 3.1900405958526807, "learning_rate": 3.841399778227122e-06, "loss": 0.7743, "step": 8720 }, { "epoch": 0.2703246994670963, "grad_norm": 2.4859137904235356, "learning_rate": 3.840584436762116e-06, "loss": 0.7504, "step": 8725 }, { "epoch": 0.27047961333498577, "grad_norm": 3.611590103057864, "learning_rate": 3.839769095297111e-06, "loss": 0.8184, "step": 8730 }, { "epoch": 0.2706345272028752, "grad_norm": 2.567699908505314, "learning_rate": 3.838953753832105e-06, "loss": 0.71, "step": 8735 }, { "epoch": 0.27078944107076464, "grad_norm": 2.265942389898583, "learning_rate": 3.8381384123670996e-06, "loss": 0.7092, "step": 8740 }, { "epoch": 0.2709443549386541, "grad_norm": 2.5498713387336487, "learning_rate": 3.837323070902094e-06, "loss": 0.6952, "step": 8745 }, { "epoch": 0.27109926880654356, "grad_norm": 2.2229611917261756, "learning_rate": 3.8365077294370885e-06, "loss": 0.7586, "step": 8750 }, { "epoch": 0.271254182674433, "grad_norm": 2.4319307546409963, "learning_rate": 3.835692387972083e-06, "loss": 0.7542, "step": 8755 }, { "epoch": 0.2714090965423225, "grad_norm": 3.514058421536036, "learning_rate": 3.834877046507077e-06, "loss": 0.7343, "step": 8760 }, { "epoch": 0.27156401041021194, "grad_norm": 3.2439476310202426, "learning_rate": 3.834061705042072e-06, "loss": 0.7682, "step": 8765 }, { "epoch": 0.27171892427810135, "grad_norm": 2.7695956375491164, "learning_rate": 3.833246363577066e-06, "loss": 0.713, "step": 8770 }, { "epoch": 0.2718738381459908, "grad_norm": 2.3704770509278434, "learning_rate": 3.832431022112061e-06, "loss": 0.6755, "step": 8775 }, { "epoch": 0.27202875201388027, "grad_norm": 2.9698485913950115, "learning_rate": 3.831615680647055e-06, "loss": 0.7468, "step": 8780 }, { "epoch": 0.27218366588176973, "grad_norm": 2.299604090154999, "learning_rate": 3.83080033918205e-06, "loss": 0.6583, "step": 8785 }, { "epoch": 0.2723385797496592, "grad_norm": 3.413475844338282, "learning_rate": 3.829984997717044e-06, "loss": 0.7696, "step": 8790 }, { "epoch": 0.27249349361754865, "grad_norm": 3.2679373925448876, "learning_rate": 3.829169656252039e-06, "loss": 0.7636, "step": 8795 }, { "epoch": 0.2726484074854381, "grad_norm": 2.282250079622702, "learning_rate": 3.828354314787033e-06, "loss": 0.7871, "step": 8800 }, { "epoch": 0.2728033213533276, "grad_norm": 2.894671142656334, "learning_rate": 3.8275389733220275e-06, "loss": 0.8423, "step": 8805 }, { "epoch": 0.272958235221217, "grad_norm": 4.84087746661593, "learning_rate": 3.826723631857022e-06, "loss": 0.7172, "step": 8810 }, { "epoch": 0.27311314908910644, "grad_norm": 2.465595484802092, "learning_rate": 3.8259082903920165e-06, "loss": 0.7221, "step": 8815 }, { "epoch": 0.2732680629569959, "grad_norm": 2.741429400869263, "learning_rate": 3.825092948927011e-06, "loss": 0.7341, "step": 8820 }, { "epoch": 0.27342297682488537, "grad_norm": 3.432402579763835, "learning_rate": 3.824277607462005e-06, "loss": 0.8002, "step": 8825 }, { "epoch": 0.2735778906927748, "grad_norm": 2.7594258135534084, "learning_rate": 3.823462265997e-06, "loss": 0.765, "step": 8830 }, { "epoch": 0.2737328045606643, "grad_norm": 2.4577364603401923, "learning_rate": 3.822646924531994e-06, "loss": 0.7668, "step": 8835 }, { "epoch": 0.27388771842855375, "grad_norm": 2.620597716511247, "learning_rate": 3.821831583066989e-06, "loss": 0.7179, "step": 8840 }, { "epoch": 0.27404263229644316, "grad_norm": 2.3374655448144144, "learning_rate": 3.821016241601983e-06, "loss": 0.6989, "step": 8845 }, { "epoch": 0.2741975461643326, "grad_norm": 2.1768811318133423, "learning_rate": 3.820200900136978e-06, "loss": 0.7203, "step": 8850 }, { "epoch": 0.2743524600322221, "grad_norm": 2.8324989564331515, "learning_rate": 3.819385558671972e-06, "loss": 0.7001, "step": 8855 }, { "epoch": 0.27450737390011154, "grad_norm": 3.123564291941681, "learning_rate": 3.818570217206967e-06, "loss": 0.6738, "step": 8860 }, { "epoch": 0.274662287768001, "grad_norm": 2.115377127716704, "learning_rate": 3.817754875741961e-06, "loss": 0.7405, "step": 8865 }, { "epoch": 0.27481720163589046, "grad_norm": 3.1534780910345632, "learning_rate": 3.8169395342769555e-06, "loss": 0.8204, "step": 8870 }, { "epoch": 0.2749721155037799, "grad_norm": 2.4541445572883656, "learning_rate": 3.81612419281195e-06, "loss": 0.6729, "step": 8875 }, { "epoch": 0.27512702937166933, "grad_norm": 2.855543578648507, "learning_rate": 3.8153088513469444e-06, "loss": 0.757, "step": 8880 }, { "epoch": 0.2752819432395588, "grad_norm": 2.4148115916599555, "learning_rate": 3.8144935098819393e-06, "loss": 0.723, "step": 8885 }, { "epoch": 0.27543685710744825, "grad_norm": 2.80714862631165, "learning_rate": 3.8136781684169333e-06, "loss": 0.7328, "step": 8890 }, { "epoch": 0.2755917709753377, "grad_norm": 2.79812829087707, "learning_rate": 3.812862826951928e-06, "loss": 0.6808, "step": 8895 }, { "epoch": 0.2757466848432272, "grad_norm": 2.275009085589606, "learning_rate": 3.8120474854869223e-06, "loss": 0.6547, "step": 8900 }, { "epoch": 0.27590159871111664, "grad_norm": 2.249954420204924, "learning_rate": 3.8112321440219167e-06, "loss": 0.6955, "step": 8905 }, { "epoch": 0.2760565125790061, "grad_norm": 2.3263761123595397, "learning_rate": 3.810416802556911e-06, "loss": 0.809, "step": 8910 }, { "epoch": 0.2762114264468955, "grad_norm": 2.482020941637356, "learning_rate": 3.8096014610919056e-06, "loss": 0.6546, "step": 8915 }, { "epoch": 0.27636634031478496, "grad_norm": 3.3576381010264655, "learning_rate": 3.8087861196269e-06, "loss": 0.721, "step": 8920 }, { "epoch": 0.2765212541826744, "grad_norm": 2.518059848149723, "learning_rate": 3.8079707781618946e-06, "loss": 0.6692, "step": 8925 }, { "epoch": 0.2766761680505639, "grad_norm": 2.33291674062038, "learning_rate": 3.8071554366968886e-06, "loss": 0.8366, "step": 8930 }, { "epoch": 0.27683108191845335, "grad_norm": 2.665720357948644, "learning_rate": 3.8063400952318835e-06, "loss": 0.7167, "step": 8935 }, { "epoch": 0.2769859957863428, "grad_norm": 3.3647520029311675, "learning_rate": 3.8055247537668775e-06, "loss": 0.767, "step": 8940 }, { "epoch": 0.27714090965423227, "grad_norm": 2.48295895823448, "learning_rate": 3.8047094123018724e-06, "loss": 0.7773, "step": 8945 }, { "epoch": 0.2772958235221217, "grad_norm": 3.036215756680506, "learning_rate": 3.8038940708368664e-06, "loss": 0.768, "step": 8950 }, { "epoch": 0.27745073739001114, "grad_norm": 2.607695879568836, "learning_rate": 3.8030787293718613e-06, "loss": 0.7621, "step": 8955 }, { "epoch": 0.2776056512579006, "grad_norm": 2.499535008398855, "learning_rate": 3.8022633879068554e-06, "loss": 0.7406, "step": 8960 }, { "epoch": 0.27776056512579006, "grad_norm": 2.6132680918182025, "learning_rate": 3.8014480464418502e-06, "loss": 0.7504, "step": 8965 }, { "epoch": 0.2779154789936795, "grad_norm": 4.512166873566018, "learning_rate": 3.8006327049768447e-06, "loss": 0.775, "step": 8970 }, { "epoch": 0.278070392861569, "grad_norm": 2.3414455617767937, "learning_rate": 3.799817363511839e-06, "loss": 0.6723, "step": 8975 }, { "epoch": 0.27822530672945844, "grad_norm": 2.408355615834613, "learning_rate": 3.7990020220468336e-06, "loss": 0.6543, "step": 8980 }, { "epoch": 0.27838022059734785, "grad_norm": 2.9594610274614808, "learning_rate": 3.798186680581828e-06, "loss": 0.6895, "step": 8985 }, { "epoch": 0.2785351344652373, "grad_norm": 2.2650110816469016, "learning_rate": 3.7973713391168225e-06, "loss": 0.7178, "step": 8990 }, { "epoch": 0.27869004833312677, "grad_norm": 2.8403484900138785, "learning_rate": 3.796555997651817e-06, "loss": 0.6877, "step": 8995 }, { "epoch": 0.27884496220101623, "grad_norm": 3.0716038715526452, "learning_rate": 3.7957406561868114e-06, "loss": 0.734, "step": 9000 }, { "epoch": 0.2789998760689057, "grad_norm": 2.8721453258540337, "learning_rate": 3.794925314721806e-06, "loss": 0.7143, "step": 9005 }, { "epoch": 0.27915478993679516, "grad_norm": 3.2484257881421885, "learning_rate": 3.7941099732568004e-06, "loss": 0.7874, "step": 9010 }, { "epoch": 0.2793097038046846, "grad_norm": 3.3443400598837605, "learning_rate": 3.7932946317917944e-06, "loss": 0.8136, "step": 9015 }, { "epoch": 0.279464617672574, "grad_norm": 3.7215557496423552, "learning_rate": 3.7924792903267893e-06, "loss": 0.7447, "step": 9020 }, { "epoch": 0.2796195315404635, "grad_norm": 2.5914149778741504, "learning_rate": 3.7916639488617833e-06, "loss": 0.7357, "step": 9025 }, { "epoch": 0.27977444540835295, "grad_norm": 2.6845868660874523, "learning_rate": 3.790848607396778e-06, "loss": 0.7576, "step": 9030 }, { "epoch": 0.2799293592762424, "grad_norm": 2.6367232733756425, "learning_rate": 3.7900332659317722e-06, "loss": 0.6716, "step": 9035 }, { "epoch": 0.28008427314413187, "grad_norm": 2.7470527412006946, "learning_rate": 3.789217924466767e-06, "loss": 0.7257, "step": 9040 }, { "epoch": 0.28023918701202133, "grad_norm": 2.216251781944987, "learning_rate": 3.788402583001761e-06, "loss": 0.7463, "step": 9045 }, { "epoch": 0.2803941008799108, "grad_norm": 3.5758215387717067, "learning_rate": 3.787587241536756e-06, "loss": 0.74, "step": 9050 }, { "epoch": 0.2805490147478002, "grad_norm": 3.1399794988441863, "learning_rate": 3.78677190007175e-06, "loss": 0.7473, "step": 9055 }, { "epoch": 0.28070392861568966, "grad_norm": 3.0436544654695004, "learning_rate": 3.785956558606745e-06, "loss": 0.7313, "step": 9060 }, { "epoch": 0.2808588424835791, "grad_norm": 2.747993354241998, "learning_rate": 3.7851412171417394e-06, "loss": 0.7048, "step": 9065 }, { "epoch": 0.2810137563514686, "grad_norm": 2.9944192591097947, "learning_rate": 3.784325875676734e-06, "loss": 0.7323, "step": 9070 }, { "epoch": 0.28116867021935804, "grad_norm": 2.7920262138156033, "learning_rate": 3.7835105342117283e-06, "loss": 0.7779, "step": 9075 }, { "epoch": 0.2813235840872475, "grad_norm": 2.447958665885653, "learning_rate": 3.782695192746723e-06, "loss": 0.6366, "step": 9080 }, { "epoch": 0.28147849795513696, "grad_norm": 2.633448446312138, "learning_rate": 3.7818798512817173e-06, "loss": 0.6925, "step": 9085 }, { "epoch": 0.2816334118230264, "grad_norm": 2.380264976446425, "learning_rate": 3.7810645098167113e-06, "loss": 0.6892, "step": 9090 }, { "epoch": 0.28178832569091583, "grad_norm": 2.508998113145267, "learning_rate": 3.780249168351706e-06, "loss": 0.7158, "step": 9095 }, { "epoch": 0.2819432395588053, "grad_norm": 2.378907777045125, "learning_rate": 3.7794338268867e-06, "loss": 0.8004, "step": 9100 }, { "epoch": 0.28209815342669475, "grad_norm": 3.163654193564536, "learning_rate": 3.778618485421695e-06, "loss": 0.7319, "step": 9105 }, { "epoch": 0.2822530672945842, "grad_norm": 3.340830000571225, "learning_rate": 3.777803143956689e-06, "loss": 0.783, "step": 9110 }, { "epoch": 0.2824079811624737, "grad_norm": 2.8907670199154194, "learning_rate": 3.776987802491684e-06, "loss": 0.7708, "step": 9115 }, { "epoch": 0.28256289503036314, "grad_norm": 3.1595268386251316, "learning_rate": 3.776172461026678e-06, "loss": 0.7025, "step": 9120 }, { "epoch": 0.2827178088982526, "grad_norm": 2.8365991526994723, "learning_rate": 3.775357119561673e-06, "loss": 0.7348, "step": 9125 }, { "epoch": 0.282872722766142, "grad_norm": 2.2982209221459327, "learning_rate": 3.774541778096667e-06, "loss": 0.7528, "step": 9130 }, { "epoch": 0.28302763663403147, "grad_norm": 2.1411169794065246, "learning_rate": 3.773726436631662e-06, "loss": 0.7017, "step": 9135 }, { "epoch": 0.2831825505019209, "grad_norm": 2.3521308998795067, "learning_rate": 3.772911095166656e-06, "loss": 0.7194, "step": 9140 }, { "epoch": 0.2833374643698104, "grad_norm": 3.074908651049176, "learning_rate": 3.7720957537016508e-06, "loss": 0.769, "step": 9145 }, { "epoch": 0.28349237823769985, "grad_norm": 2.5930025986794374, "learning_rate": 3.771280412236645e-06, "loss": 0.7366, "step": 9150 }, { "epoch": 0.2836472921055893, "grad_norm": 2.8308882119563523, "learning_rate": 3.7704650707716397e-06, "loss": 0.7479, "step": 9155 }, { "epoch": 0.28380220597347877, "grad_norm": 2.3532331313794574, "learning_rate": 3.769649729306634e-06, "loss": 0.7224, "step": 9160 }, { "epoch": 0.2839571198413682, "grad_norm": 2.9606385743604067, "learning_rate": 3.768834387841628e-06, "loss": 0.6902, "step": 9165 }, { "epoch": 0.28411203370925764, "grad_norm": 3.715969969069691, "learning_rate": 3.768019046376623e-06, "loss": 0.7928, "step": 9170 }, { "epoch": 0.2842669475771471, "grad_norm": 2.712956743346871, "learning_rate": 3.767203704911617e-06, "loss": 0.7131, "step": 9175 }, { "epoch": 0.28442186144503656, "grad_norm": 2.729976787498431, "learning_rate": 3.766388363446612e-06, "loss": 0.7305, "step": 9180 }, { "epoch": 0.284576775312926, "grad_norm": 2.67287949084008, "learning_rate": 3.765573021981606e-06, "loss": 0.7688, "step": 9185 }, { "epoch": 0.2847316891808155, "grad_norm": 2.4282353860895025, "learning_rate": 3.764757680516601e-06, "loss": 0.6735, "step": 9190 }, { "epoch": 0.28488660304870494, "grad_norm": 3.0987690605952705, "learning_rate": 3.763942339051595e-06, "loss": 0.7456, "step": 9195 }, { "epoch": 0.28504151691659435, "grad_norm": 2.244601605680167, "learning_rate": 3.76312699758659e-06, "loss": 0.7663, "step": 9200 }, { "epoch": 0.2851964307844838, "grad_norm": 2.326972020087724, "learning_rate": 3.762311656121584e-06, "loss": 0.6916, "step": 9205 }, { "epoch": 0.2853513446523733, "grad_norm": 2.82569745629956, "learning_rate": 3.7614963146565787e-06, "loss": 0.7682, "step": 9210 }, { "epoch": 0.28550625852026273, "grad_norm": 2.751295948545117, "learning_rate": 3.7606809731915728e-06, "loss": 0.7268, "step": 9215 }, { "epoch": 0.2856611723881522, "grad_norm": 3.0183579106062854, "learning_rate": 3.7598656317265676e-06, "loss": 0.7695, "step": 9220 }, { "epoch": 0.28581608625604166, "grad_norm": 3.1368500025049784, "learning_rate": 3.7590502902615617e-06, "loss": 0.7671, "step": 9225 }, { "epoch": 0.2859710001239311, "grad_norm": 2.783634034609448, "learning_rate": 3.7582349487965566e-06, "loss": 0.7842, "step": 9230 }, { "epoch": 0.2861259139918205, "grad_norm": 2.7826519523669884, "learning_rate": 3.7574196073315506e-06, "loss": 0.7181, "step": 9235 }, { "epoch": 0.28628082785971, "grad_norm": 2.4093109161353135, "learning_rate": 3.756604265866545e-06, "loss": 0.6936, "step": 9240 }, { "epoch": 0.28643574172759945, "grad_norm": 2.5765172795840754, "learning_rate": 3.7557889244015395e-06, "loss": 0.7372, "step": 9245 }, { "epoch": 0.2865906555954889, "grad_norm": 3.455432109932045, "learning_rate": 3.754973582936534e-06, "loss": 0.7476, "step": 9250 }, { "epoch": 0.28674556946337837, "grad_norm": 3.0567411774855966, "learning_rate": 3.754158241471529e-06, "loss": 0.7456, "step": 9255 }, { "epoch": 0.28690048333126783, "grad_norm": 2.658099401380439, "learning_rate": 3.753342900006523e-06, "loss": 0.7959, "step": 9260 }, { "epoch": 0.2870553971991573, "grad_norm": 3.170071496861152, "learning_rate": 3.7525275585415178e-06, "loss": 0.7068, "step": 9265 }, { "epoch": 0.2872103110670467, "grad_norm": 4.067527941141243, "learning_rate": 3.751712217076512e-06, "loss": 0.7151, "step": 9270 }, { "epoch": 0.28736522493493616, "grad_norm": 3.701646029213038, "learning_rate": 3.7508968756115067e-06, "loss": 0.6928, "step": 9275 }, { "epoch": 0.2875201388028256, "grad_norm": 2.641952368726213, "learning_rate": 3.7500815341465007e-06, "loss": 0.7219, "step": 9280 }, { "epoch": 0.2876750526707151, "grad_norm": 2.557438222185808, "learning_rate": 3.7492661926814956e-06, "loss": 0.639, "step": 9285 }, { "epoch": 0.28782996653860454, "grad_norm": 2.591288178091283, "learning_rate": 3.7484508512164897e-06, "loss": 0.7538, "step": 9290 }, { "epoch": 0.287984880406494, "grad_norm": 2.824481105390821, "learning_rate": 3.7476355097514845e-06, "loss": 0.746, "step": 9295 }, { "epoch": 0.28813979427438347, "grad_norm": 3.352046036534597, "learning_rate": 3.7468201682864786e-06, "loss": 0.7681, "step": 9300 }, { "epoch": 0.28829470814227287, "grad_norm": 3.5773733508527736, "learning_rate": 3.7460048268214735e-06, "loss": 0.8458, "step": 9305 }, { "epoch": 0.28844962201016233, "grad_norm": 2.235823600351463, "learning_rate": 3.7451894853564675e-06, "loss": 0.6942, "step": 9310 }, { "epoch": 0.2886045358780518, "grad_norm": 2.718401412486879, "learning_rate": 3.744374143891462e-06, "loss": 0.7844, "step": 9315 }, { "epoch": 0.28875944974594125, "grad_norm": 3.3499665405624275, "learning_rate": 3.7435588024264564e-06, "loss": 0.7903, "step": 9320 }, { "epoch": 0.2889143636138307, "grad_norm": 2.1984439258917345, "learning_rate": 3.742743460961451e-06, "loss": 0.6748, "step": 9325 }, { "epoch": 0.2890692774817202, "grad_norm": 2.335346906632131, "learning_rate": 3.7419281194964453e-06, "loss": 0.7402, "step": 9330 }, { "epoch": 0.28922419134960964, "grad_norm": 2.7223700019250647, "learning_rate": 3.7411127780314398e-06, "loss": 0.7704, "step": 9335 }, { "epoch": 0.28937910521749904, "grad_norm": 3.5088100933566824, "learning_rate": 3.7402974365664342e-06, "loss": 0.742, "step": 9340 }, { "epoch": 0.2895340190853885, "grad_norm": 2.8675422710482796, "learning_rate": 3.7394820951014287e-06, "loss": 0.726, "step": 9345 }, { "epoch": 0.28968893295327797, "grad_norm": 2.9905163019302416, "learning_rate": 3.7386667536364236e-06, "loss": 0.7006, "step": 9350 }, { "epoch": 0.28984384682116743, "grad_norm": 3.3483091943909415, "learning_rate": 3.7378514121714176e-06, "loss": 0.6602, "step": 9355 }, { "epoch": 0.2899987606890569, "grad_norm": 2.361328478843863, "learning_rate": 3.7370360707064125e-06, "loss": 0.7293, "step": 9360 }, { "epoch": 0.29015367455694635, "grad_norm": 2.48449386524388, "learning_rate": 3.7362207292414065e-06, "loss": 0.7041, "step": 9365 }, { "epoch": 0.2903085884248358, "grad_norm": 2.6166002689980528, "learning_rate": 3.7354053877764014e-06, "loss": 0.7434, "step": 9370 }, { "epoch": 0.2904635022927252, "grad_norm": 3.294267705120911, "learning_rate": 3.7345900463113955e-06, "loss": 0.777, "step": 9375 }, { "epoch": 0.2906184161606147, "grad_norm": 2.343384349016672, "learning_rate": 3.7337747048463903e-06, "loss": 0.7489, "step": 9380 }, { "epoch": 0.29077333002850414, "grad_norm": 2.5953991157066403, "learning_rate": 3.7329593633813844e-06, "loss": 0.752, "step": 9385 }, { "epoch": 0.2909282438963936, "grad_norm": 3.2091542105389164, "learning_rate": 3.732144021916379e-06, "loss": 0.6247, "step": 9390 }, { "epoch": 0.29108315776428306, "grad_norm": 2.8053930362113197, "learning_rate": 3.7313286804513733e-06, "loss": 0.6699, "step": 9395 }, { "epoch": 0.2912380716321725, "grad_norm": 2.7876137386176403, "learning_rate": 3.7305133389863678e-06, "loss": 0.677, "step": 9400 }, { "epoch": 0.291392985500062, "grad_norm": 2.381067005460194, "learning_rate": 3.7296979975213622e-06, "loss": 0.7176, "step": 9405 }, { "epoch": 0.29154789936795145, "grad_norm": 2.7504033473542577, "learning_rate": 3.7288826560563567e-06, "loss": 0.7307, "step": 9410 }, { "epoch": 0.29170281323584085, "grad_norm": 3.3863452266037455, "learning_rate": 3.728067314591351e-06, "loss": 0.7394, "step": 9415 }, { "epoch": 0.2918577271037303, "grad_norm": 2.41192873066751, "learning_rate": 3.7272519731263456e-06, "loss": 0.7576, "step": 9420 }, { "epoch": 0.2920126409716198, "grad_norm": 2.7404470167531367, "learning_rate": 3.72643663166134e-06, "loss": 0.7301, "step": 9425 }, { "epoch": 0.29216755483950924, "grad_norm": 2.6298904514319124, "learning_rate": 3.7256212901963345e-06, "loss": 0.7275, "step": 9430 }, { "epoch": 0.2923224687073987, "grad_norm": 2.5346992227231384, "learning_rate": 3.7248059487313285e-06, "loss": 0.6911, "step": 9435 }, { "epoch": 0.29247738257528816, "grad_norm": 2.4087050736399767, "learning_rate": 3.7239906072663234e-06, "loss": 0.7362, "step": 9440 }, { "epoch": 0.2926322964431776, "grad_norm": 4.428555380807439, "learning_rate": 3.7231752658013183e-06, "loss": 0.7683, "step": 9445 }, { "epoch": 0.292787210311067, "grad_norm": 2.7543556498612416, "learning_rate": 3.7223599243363123e-06, "loss": 0.7428, "step": 9450 }, { "epoch": 0.2929421241789565, "grad_norm": 2.494588688191468, "learning_rate": 3.7215445828713072e-06, "loss": 0.7108, "step": 9455 }, { "epoch": 0.29309703804684595, "grad_norm": 3.7589143401225544, "learning_rate": 3.7207292414063013e-06, "loss": 0.7137, "step": 9460 }, { "epoch": 0.2932519519147354, "grad_norm": 4.55147104711264, "learning_rate": 3.7199138999412957e-06, "loss": 0.7613, "step": 9465 }, { "epoch": 0.29340686578262487, "grad_norm": 3.049847665720729, "learning_rate": 3.71909855847629e-06, "loss": 0.674, "step": 9470 }, { "epoch": 0.29356177965051433, "grad_norm": 2.7102504323314687, "learning_rate": 3.7182832170112846e-06, "loss": 0.6283, "step": 9475 }, { "epoch": 0.2937166935184038, "grad_norm": 2.258002432162687, "learning_rate": 3.717467875546279e-06, "loss": 0.7198, "step": 9480 }, { "epoch": 0.2938716073862932, "grad_norm": 5.680641228936299, "learning_rate": 3.7166525340812736e-06, "loss": 0.7319, "step": 9485 }, { "epoch": 0.29402652125418266, "grad_norm": 2.1848587505620234, "learning_rate": 3.715837192616268e-06, "loss": 0.7228, "step": 9490 }, { "epoch": 0.2941814351220721, "grad_norm": 2.9928026558640086, "learning_rate": 3.7150218511512625e-06, "loss": 0.7806, "step": 9495 }, { "epoch": 0.2943363489899616, "grad_norm": 2.453230524275333, "learning_rate": 3.714206509686257e-06, "loss": 0.6779, "step": 9500 }, { "epoch": 0.29449126285785104, "grad_norm": 3.2651540295106614, "learning_rate": 3.7133911682212514e-06, "loss": 0.7743, "step": 9505 }, { "epoch": 0.2946461767257405, "grad_norm": 2.777880851862352, "learning_rate": 3.7125758267562454e-06, "loss": 0.7373, "step": 9510 }, { "epoch": 0.29480109059362997, "grad_norm": 2.698919046364163, "learning_rate": 3.7117604852912403e-06, "loss": 0.7471, "step": 9515 }, { "epoch": 0.29495600446151937, "grad_norm": 2.781605673031922, "learning_rate": 3.7109451438262343e-06, "loss": 0.7145, "step": 9520 }, { "epoch": 0.29511091832940883, "grad_norm": 2.6704660317344553, "learning_rate": 3.7101298023612292e-06, "loss": 0.7618, "step": 9525 }, { "epoch": 0.2952658321972983, "grad_norm": 3.631341309951115, "learning_rate": 3.7093144608962233e-06, "loss": 0.7301, "step": 9530 }, { "epoch": 0.29542074606518776, "grad_norm": 3.8344714322323163, "learning_rate": 3.708499119431218e-06, "loss": 0.7342, "step": 9535 }, { "epoch": 0.2955756599330772, "grad_norm": 2.5114983986130017, "learning_rate": 3.7076837779662126e-06, "loss": 0.7115, "step": 9540 }, { "epoch": 0.2957305738009667, "grad_norm": 2.724051780835128, "learning_rate": 3.706868436501207e-06, "loss": 0.7005, "step": 9545 }, { "epoch": 0.29588548766885614, "grad_norm": 2.45466835902362, "learning_rate": 3.7060530950362015e-06, "loss": 0.6863, "step": 9550 }, { "epoch": 0.29604040153674555, "grad_norm": 2.2315181734251013, "learning_rate": 3.705237753571196e-06, "loss": 0.748, "step": 9555 }, { "epoch": 0.296195315404635, "grad_norm": 5.206739156583997, "learning_rate": 3.7044224121061904e-06, "loss": 0.8295, "step": 9560 }, { "epoch": 0.29635022927252447, "grad_norm": 4.182113506111645, "learning_rate": 3.703607070641185e-06, "loss": 0.7223, "step": 9565 }, { "epoch": 0.29650514314041393, "grad_norm": 2.2353368183329274, "learning_rate": 3.7027917291761794e-06, "loss": 0.7364, "step": 9570 }, { "epoch": 0.2966600570083034, "grad_norm": 3.094182043862017, "learning_rate": 3.701976387711174e-06, "loss": 0.7343, "step": 9575 }, { "epoch": 0.29681497087619285, "grad_norm": 2.8839583944435527, "learning_rate": 3.7011610462461683e-06, "loss": 0.7527, "step": 9580 }, { "epoch": 0.2969698847440823, "grad_norm": 2.6325903651763145, "learning_rate": 3.7003457047811623e-06, "loss": 0.786, "step": 9585 }, { "epoch": 0.2971247986119717, "grad_norm": 3.2582485386719626, "learning_rate": 3.699530363316157e-06, "loss": 0.7155, "step": 9590 }, { "epoch": 0.2972797124798612, "grad_norm": 2.4375393488811503, "learning_rate": 3.6987150218511512e-06, "loss": 0.7901, "step": 9595 }, { "epoch": 0.29743462634775064, "grad_norm": 2.7920683807653934, "learning_rate": 3.697899680386146e-06, "loss": 0.7495, "step": 9600 }, { "epoch": 0.2975895402156401, "grad_norm": 2.2479246231639647, "learning_rate": 3.69708433892114e-06, "loss": 0.7814, "step": 9605 }, { "epoch": 0.29774445408352956, "grad_norm": 2.802214283888634, "learning_rate": 3.696268997456135e-06, "loss": 0.7846, "step": 9610 }, { "epoch": 0.297899367951419, "grad_norm": 3.0304076257830213, "learning_rate": 3.695453655991129e-06, "loss": 0.7564, "step": 9615 }, { "epoch": 0.2980542818193085, "grad_norm": 2.7185860444345136, "learning_rate": 3.694638314526124e-06, "loss": 0.724, "step": 9620 }, { "epoch": 0.2982091956871979, "grad_norm": 4.49021294741384, "learning_rate": 3.693822973061118e-06, "loss": 0.7872, "step": 9625 }, { "epoch": 0.29836410955508735, "grad_norm": 2.635303399814187, "learning_rate": 3.693007631596113e-06, "loss": 0.7415, "step": 9630 }, { "epoch": 0.2985190234229768, "grad_norm": 2.9782337224409483, "learning_rate": 3.6921922901311073e-06, "loss": 0.7869, "step": 9635 }, { "epoch": 0.2986739372908663, "grad_norm": 3.4762174134522303, "learning_rate": 3.691376948666102e-06, "loss": 0.7602, "step": 9640 }, { "epoch": 0.29882885115875574, "grad_norm": 2.829738383709011, "learning_rate": 3.6905616072010962e-06, "loss": 0.7873, "step": 9645 }, { "epoch": 0.2989837650266452, "grad_norm": 3.026275982636133, "learning_rate": 3.6897462657360907e-06, "loss": 0.7217, "step": 9650 }, { "epoch": 0.29913867889453466, "grad_norm": 3.082434117453375, "learning_rate": 3.688930924271085e-06, "loss": 0.7107, "step": 9655 }, { "epoch": 0.29929359276242407, "grad_norm": 2.9295248012348516, "learning_rate": 3.688115582806079e-06, "loss": 0.7991, "step": 9660 }, { "epoch": 0.2994485066303135, "grad_norm": 2.4609347243771813, "learning_rate": 3.687300241341074e-06, "loss": 0.7274, "step": 9665 }, { "epoch": 0.299603420498203, "grad_norm": 2.7302321112531014, "learning_rate": 3.686484899876068e-06, "loss": 0.7612, "step": 9670 }, { "epoch": 0.29975833436609245, "grad_norm": 2.7585330964679886, "learning_rate": 3.685669558411063e-06, "loss": 0.8035, "step": 9675 }, { "epoch": 0.2999132482339819, "grad_norm": 2.552159555245994, "learning_rate": 3.684854216946057e-06, "loss": 0.7125, "step": 9680 }, { "epoch": 0.30006816210187137, "grad_norm": 2.820348874073129, "learning_rate": 3.684038875481052e-06, "loss": 0.6914, "step": 9685 }, { "epoch": 0.30022307596976083, "grad_norm": 3.3308687518184295, "learning_rate": 3.683223534016046e-06, "loss": 0.732, "step": 9690 }, { "epoch": 0.30037798983765024, "grad_norm": 2.9477720113025785, "learning_rate": 3.682408192551041e-06, "loss": 0.6898, "step": 9695 }, { "epoch": 0.3005329037055397, "grad_norm": 2.5729705540941192, "learning_rate": 3.681592851086035e-06, "loss": 0.7482, "step": 9700 }, { "epoch": 0.30068781757342916, "grad_norm": 2.5554502833346775, "learning_rate": 3.6807775096210298e-06, "loss": 0.7144, "step": 9705 }, { "epoch": 0.3008427314413186, "grad_norm": 2.6383969979622397, "learning_rate": 3.679962168156024e-06, "loss": 0.7763, "step": 9710 }, { "epoch": 0.3009976453092081, "grad_norm": 3.34909235266264, "learning_rate": 3.6791468266910187e-06, "loss": 0.7146, "step": 9715 }, { "epoch": 0.30115255917709755, "grad_norm": 2.934280154718233, "learning_rate": 3.6783314852260127e-06, "loss": 0.7834, "step": 9720 }, { "epoch": 0.301307473044987, "grad_norm": 2.7205299432261048, "learning_rate": 3.6775161437610076e-06, "loss": 0.6964, "step": 9725 }, { "epoch": 0.30146238691287647, "grad_norm": 2.436263516145441, "learning_rate": 3.676700802296002e-06, "loss": 0.7447, "step": 9730 }, { "epoch": 0.3016173007807659, "grad_norm": 3.1493726617740214, "learning_rate": 3.675885460830996e-06, "loss": 0.7066, "step": 9735 }, { "epoch": 0.30177221464865533, "grad_norm": 3.5066568366566293, "learning_rate": 3.675070119365991e-06, "loss": 0.7614, "step": 9740 }, { "epoch": 0.3019271285165448, "grad_norm": 3.647337931854857, "learning_rate": 3.674254777900985e-06, "loss": 0.7087, "step": 9745 }, { "epoch": 0.30208204238443426, "grad_norm": 3.2040368544995297, "learning_rate": 3.67343943643598e-06, "loss": 0.7218, "step": 9750 }, { "epoch": 0.3022369562523237, "grad_norm": 3.36605095313676, "learning_rate": 3.672624094970974e-06, "loss": 0.7573, "step": 9755 }, { "epoch": 0.3023918701202132, "grad_norm": 2.9176228223701344, "learning_rate": 3.671808753505969e-06, "loss": 0.756, "step": 9760 }, { "epoch": 0.30254678398810264, "grad_norm": 2.3990639088636097, "learning_rate": 3.670993412040963e-06, "loss": 0.7158, "step": 9765 }, { "epoch": 0.30270169785599205, "grad_norm": 2.514719673273362, "learning_rate": 3.6701780705759577e-06, "loss": 0.7181, "step": 9770 }, { "epoch": 0.3028566117238815, "grad_norm": 2.5263086890845177, "learning_rate": 3.6693627291109518e-06, "loss": 0.7541, "step": 9775 }, { "epoch": 0.30301152559177097, "grad_norm": 2.941967917544809, "learning_rate": 3.6685473876459466e-06, "loss": 0.7452, "step": 9780 }, { "epoch": 0.30316643945966043, "grad_norm": 2.928962309347921, "learning_rate": 3.6677320461809407e-06, "loss": 0.8105, "step": 9785 }, { "epoch": 0.3033213533275499, "grad_norm": 2.6084173483902853, "learning_rate": 3.6669167047159356e-06, "loss": 0.7174, "step": 9790 }, { "epoch": 0.30347626719543935, "grad_norm": 2.483595531592936, "learning_rate": 3.6661013632509296e-06, "loss": 0.768, "step": 9795 }, { "epoch": 0.3036311810633288, "grad_norm": 3.3385881178627868, "learning_rate": 3.6652860217859245e-06, "loss": 0.723, "step": 9800 }, { "epoch": 0.3037860949312182, "grad_norm": 2.753901873947156, "learning_rate": 3.6644706803209185e-06, "loss": 0.8429, "step": 9805 }, { "epoch": 0.3039410087991077, "grad_norm": 3.5310052643239573, "learning_rate": 3.663655338855913e-06, "loss": 0.697, "step": 9810 }, { "epoch": 0.30409592266699714, "grad_norm": 2.613400565538126, "learning_rate": 3.6628399973909074e-06, "loss": 0.7746, "step": 9815 }, { "epoch": 0.3042508365348866, "grad_norm": 3.0660621348256645, "learning_rate": 3.662024655925902e-06, "loss": 0.7668, "step": 9820 }, { "epoch": 0.30440575040277607, "grad_norm": 5.161450757567439, "learning_rate": 3.6612093144608968e-06, "loss": 0.6976, "step": 9825 }, { "epoch": 0.3045606642706655, "grad_norm": 2.0929070700478016, "learning_rate": 3.660393972995891e-06, "loss": 0.6779, "step": 9830 }, { "epoch": 0.304715578138555, "grad_norm": 2.558824571276169, "learning_rate": 3.6595786315308857e-06, "loss": 0.7281, "step": 9835 }, { "epoch": 0.3048704920064444, "grad_norm": 1.9260073373397497, "learning_rate": 3.6587632900658797e-06, "loss": 0.6903, "step": 9840 }, { "epoch": 0.30502540587433385, "grad_norm": 2.883009496378845, "learning_rate": 3.6579479486008746e-06, "loss": 0.7256, "step": 9845 }, { "epoch": 0.3051803197422233, "grad_norm": 2.554656215850843, "learning_rate": 3.6571326071358686e-06, "loss": 0.7434, "step": 9850 }, { "epoch": 0.3053352336101128, "grad_norm": 2.004143245207792, "learning_rate": 3.6563172656708635e-06, "loss": 0.7453, "step": 9855 }, { "epoch": 0.30549014747800224, "grad_norm": 2.924535578539942, "learning_rate": 3.6555019242058576e-06, "loss": 0.6957, "step": 9860 }, { "epoch": 0.3056450613458917, "grad_norm": 4.162800144303281, "learning_rate": 3.6546865827408525e-06, "loss": 0.7048, "step": 9865 }, { "epoch": 0.30579997521378116, "grad_norm": 2.3601795957108536, "learning_rate": 3.6538712412758465e-06, "loss": 0.647, "step": 9870 }, { "epoch": 0.30595488908167057, "grad_norm": 3.8227541723389464, "learning_rate": 3.6530558998108414e-06, "loss": 0.6464, "step": 9875 }, { "epoch": 0.30610980294956003, "grad_norm": 2.2421579133354252, "learning_rate": 3.6522405583458354e-06, "loss": 0.7068, "step": 9880 }, { "epoch": 0.3062647168174495, "grad_norm": 3.376374247704328, "learning_rate": 3.65142521688083e-06, "loss": 0.7585, "step": 9885 }, { "epoch": 0.30641963068533895, "grad_norm": 2.8659866878441096, "learning_rate": 3.6506098754158243e-06, "loss": 0.6655, "step": 9890 }, { "epoch": 0.3065745445532284, "grad_norm": 3.7353725385031495, "learning_rate": 3.6497945339508188e-06, "loss": 0.7214, "step": 9895 }, { "epoch": 0.3067294584211179, "grad_norm": 2.6702844553841825, "learning_rate": 3.6489791924858132e-06, "loss": 0.7954, "step": 9900 }, { "epoch": 0.30688437228900733, "grad_norm": 2.427200392420839, "learning_rate": 3.6481638510208077e-06, "loss": 0.7227, "step": 9905 }, { "epoch": 0.30703928615689674, "grad_norm": 3.4522734691864434, "learning_rate": 3.647348509555802e-06, "loss": 0.7885, "step": 9910 }, { "epoch": 0.3071942000247862, "grad_norm": 2.2864001599661266, "learning_rate": 3.6465331680907966e-06, "loss": 0.6931, "step": 9915 }, { "epoch": 0.30734911389267566, "grad_norm": 2.4539546671502905, "learning_rate": 3.6457178266257915e-06, "loss": 0.697, "step": 9920 }, { "epoch": 0.3075040277605651, "grad_norm": 2.3281368807914977, "learning_rate": 3.6449024851607855e-06, "loss": 0.706, "step": 9925 }, { "epoch": 0.3076589416284546, "grad_norm": 3.0393540056291757, "learning_rate": 3.6440871436957804e-06, "loss": 0.7356, "step": 9930 }, { "epoch": 0.30781385549634405, "grad_norm": 3.4177697965525873, "learning_rate": 3.6432718022307745e-06, "loss": 0.7444, "step": 9935 }, { "epoch": 0.3079687693642335, "grad_norm": 4.174787392367597, "learning_rate": 3.6424564607657693e-06, "loss": 0.7331, "step": 9940 }, { "epoch": 0.3081236832321229, "grad_norm": 2.3074475053598786, "learning_rate": 3.6416411193007634e-06, "loss": 0.6691, "step": 9945 }, { "epoch": 0.3082785971000124, "grad_norm": 2.8597916129672947, "learning_rate": 3.6408257778357583e-06, "loss": 0.7241, "step": 9950 }, { "epoch": 0.30843351096790184, "grad_norm": 2.507048938653754, "learning_rate": 3.6400104363707523e-06, "loss": 0.7977, "step": 9955 }, { "epoch": 0.3085884248357913, "grad_norm": 2.4057750869720214, "learning_rate": 3.6391950949057468e-06, "loss": 0.7209, "step": 9960 }, { "epoch": 0.30874333870368076, "grad_norm": 2.762601888120641, "learning_rate": 3.638379753440741e-06, "loss": 0.7286, "step": 9965 }, { "epoch": 0.3088982525715702, "grad_norm": 2.467698971971578, "learning_rate": 3.6375644119757357e-06, "loss": 0.6862, "step": 9970 }, { "epoch": 0.3090531664394597, "grad_norm": 5.999228813406483, "learning_rate": 3.63674907051073e-06, "loss": 0.6444, "step": 9975 }, { "epoch": 0.3092080803073491, "grad_norm": 2.54165858684686, "learning_rate": 3.6359337290457246e-06, "loss": 0.7314, "step": 9980 }, { "epoch": 0.30936299417523855, "grad_norm": 2.6434409168936597, "learning_rate": 3.635118387580719e-06, "loss": 0.7068, "step": 9985 }, { "epoch": 0.309517908043128, "grad_norm": 2.822583955743681, "learning_rate": 3.6343030461157135e-06, "loss": 0.7508, "step": 9990 }, { "epoch": 0.30967282191101747, "grad_norm": 3.0723802421563655, "learning_rate": 3.633487704650708e-06, "loss": 0.7798, "step": 9995 }, { "epoch": 0.30982773577890693, "grad_norm": 2.4611018771955906, "learning_rate": 3.6326723631857024e-06, "loss": 0.7514, "step": 10000 }, { "epoch": 0.3099826496467964, "grad_norm": 2.5776861526523343, "learning_rate": 3.6318570217206965e-06, "loss": 0.6688, "step": 10005 }, { "epoch": 0.31013756351468585, "grad_norm": 2.475227858954842, "learning_rate": 3.6310416802556913e-06, "loss": 0.6477, "step": 10010 }, { "epoch": 0.3102924773825753, "grad_norm": 2.2862212586442396, "learning_rate": 3.6302263387906862e-06, "loss": 0.6981, "step": 10015 }, { "epoch": 0.3104473912504647, "grad_norm": 3.104071151906772, "learning_rate": 3.6294109973256803e-06, "loss": 0.6795, "step": 10020 }, { "epoch": 0.3106023051183542, "grad_norm": 3.3007772753389264, "learning_rate": 3.628595655860675e-06, "loss": 0.6724, "step": 10025 }, { "epoch": 0.31075721898624364, "grad_norm": 3.2509182662116354, "learning_rate": 3.627780314395669e-06, "loss": 0.7109, "step": 10030 }, { "epoch": 0.3109121328541331, "grad_norm": 2.6390936064969, "learning_rate": 3.626964972930664e-06, "loss": 0.7611, "step": 10035 }, { "epoch": 0.31106704672202257, "grad_norm": 3.186288963600813, "learning_rate": 3.626149631465658e-06, "loss": 0.7461, "step": 10040 }, { "epoch": 0.31122196058991203, "grad_norm": 3.7531098659835544, "learning_rate": 3.6253342900006526e-06, "loss": 0.7605, "step": 10045 }, { "epoch": 0.3113768744578015, "grad_norm": 2.527413817132097, "learning_rate": 3.624518948535647e-06, "loss": 0.7037, "step": 10050 }, { "epoch": 0.3115317883256909, "grad_norm": 2.565308200997408, "learning_rate": 3.6237036070706415e-06, "loss": 0.7532, "step": 10055 }, { "epoch": 0.31168670219358036, "grad_norm": 3.1277262932388856, "learning_rate": 3.622888265605636e-06, "loss": 0.7575, "step": 10060 }, { "epoch": 0.3118416160614698, "grad_norm": 2.751176418028874, "learning_rate": 3.6220729241406304e-06, "loss": 0.7561, "step": 10065 }, { "epoch": 0.3119965299293593, "grad_norm": 2.468750324061455, "learning_rate": 3.621257582675625e-06, "loss": 0.7322, "step": 10070 }, { "epoch": 0.31215144379724874, "grad_norm": 2.8594415514647378, "learning_rate": 3.6204422412106193e-06, "loss": 0.7193, "step": 10075 }, { "epoch": 0.3123063576651382, "grad_norm": 3.159570438499813, "learning_rate": 3.6196268997456133e-06, "loss": 0.7925, "step": 10080 }, { "epoch": 0.31246127153302766, "grad_norm": 3.083546223691494, "learning_rate": 3.6188115582806082e-06, "loss": 0.7964, "step": 10085 }, { "epoch": 0.31261618540091707, "grad_norm": 2.3373221806355353, "learning_rate": 3.6179962168156023e-06, "loss": 0.6919, "step": 10090 }, { "epoch": 0.31277109926880653, "grad_norm": 2.7573056620577066, "learning_rate": 3.617180875350597e-06, "loss": 0.7904, "step": 10095 }, { "epoch": 0.312926013136696, "grad_norm": 2.2420457138460455, "learning_rate": 3.616365533885591e-06, "loss": 0.7322, "step": 10100 }, { "epoch": 0.31308092700458545, "grad_norm": 4.733324607141138, "learning_rate": 3.615550192420586e-06, "loss": 0.7476, "step": 10105 }, { "epoch": 0.3132358408724749, "grad_norm": 2.259436681783457, "learning_rate": 3.614734850955581e-06, "loss": 0.8373, "step": 10110 }, { "epoch": 0.3133907547403644, "grad_norm": 2.3471377142555925, "learning_rate": 3.613919509490575e-06, "loss": 0.7043, "step": 10115 }, { "epoch": 0.31354566860825384, "grad_norm": 2.3837887392339514, "learning_rate": 3.6131041680255694e-06, "loss": 0.7588, "step": 10120 }, { "epoch": 0.31370058247614324, "grad_norm": 2.483679838966951, "learning_rate": 3.612288826560564e-06, "loss": 0.7012, "step": 10125 }, { "epoch": 0.3138554963440327, "grad_norm": 2.92201778262051, "learning_rate": 3.6114734850955584e-06, "loss": 0.8069, "step": 10130 }, { "epoch": 0.31401041021192216, "grad_norm": 2.4667503975255647, "learning_rate": 3.610658143630553e-06, "loss": 0.7842, "step": 10135 }, { "epoch": 0.3141653240798116, "grad_norm": 2.5459094696076474, "learning_rate": 3.6098428021655473e-06, "loss": 0.7079, "step": 10140 }, { "epoch": 0.3143202379477011, "grad_norm": 2.2231924421905935, "learning_rate": 3.6090274607005417e-06, "loss": 0.7346, "step": 10145 }, { "epoch": 0.31447515181559055, "grad_norm": 2.8812585135257756, "learning_rate": 3.608212119235536e-06, "loss": 0.7243, "step": 10150 }, { "epoch": 0.31463006568348, "grad_norm": 2.8238159998407832, "learning_rate": 3.6073967777705302e-06, "loss": 0.5737, "step": 10155 }, { "epoch": 0.3147849795513694, "grad_norm": 3.9180098110220376, "learning_rate": 3.606581436305525e-06, "loss": 0.7884, "step": 10160 }, { "epoch": 0.3149398934192589, "grad_norm": 2.7175173552990386, "learning_rate": 3.605766094840519e-06, "loss": 0.7212, "step": 10165 }, { "epoch": 0.31509480728714834, "grad_norm": 2.6102257251435503, "learning_rate": 3.604950753375514e-06, "loss": 0.7807, "step": 10170 }, { "epoch": 0.3152497211550378, "grad_norm": 2.5293222971730365, "learning_rate": 3.604135411910508e-06, "loss": 0.7326, "step": 10175 }, { "epoch": 0.31540463502292726, "grad_norm": 3.1223370091213725, "learning_rate": 3.603320070445503e-06, "loss": 0.7853, "step": 10180 }, { "epoch": 0.3155595488908167, "grad_norm": 2.6123678678163538, "learning_rate": 3.602504728980497e-06, "loss": 0.8291, "step": 10185 }, { "epoch": 0.3157144627587062, "grad_norm": 3.801833933450655, "learning_rate": 3.601689387515492e-06, "loss": 0.7923, "step": 10190 }, { "epoch": 0.3158693766265956, "grad_norm": 2.573014362179669, "learning_rate": 3.600874046050486e-06, "loss": 0.804, "step": 10195 }, { "epoch": 0.31602429049448505, "grad_norm": 2.386160978014513, "learning_rate": 3.600058704585481e-06, "loss": 0.7062, "step": 10200 }, { "epoch": 0.3161792043623745, "grad_norm": 3.305818709339036, "learning_rate": 3.5992433631204752e-06, "loss": 0.6922, "step": 10205 }, { "epoch": 0.31633411823026397, "grad_norm": 2.6999890283329537, "learning_rate": 3.5984280216554697e-06, "loss": 0.7431, "step": 10210 }, { "epoch": 0.31648903209815343, "grad_norm": 3.1056881627772768, "learning_rate": 3.597612680190464e-06, "loss": 0.7805, "step": 10215 }, { "epoch": 0.3166439459660429, "grad_norm": 2.3997318426297087, "learning_rate": 3.5967973387254586e-06, "loss": 0.7076, "step": 10220 }, { "epoch": 0.31679885983393236, "grad_norm": 2.848408703540176, "learning_rate": 3.595981997260453e-06, "loss": 0.7387, "step": 10225 }, { "epoch": 0.31695377370182176, "grad_norm": 2.682018006452229, "learning_rate": 3.595166655795447e-06, "loss": 0.7043, "step": 10230 }, { "epoch": 0.3171086875697112, "grad_norm": 2.6847569530592197, "learning_rate": 3.594351314330442e-06, "loss": 0.856, "step": 10235 }, { "epoch": 0.3172636014376007, "grad_norm": 2.6726584908806457, "learning_rate": 3.593535972865436e-06, "loss": 0.6978, "step": 10240 }, { "epoch": 0.31741851530549015, "grad_norm": 2.872193507224582, "learning_rate": 3.592720631400431e-06, "loss": 0.6968, "step": 10245 }, { "epoch": 0.3175734291733796, "grad_norm": 2.7405232663398404, "learning_rate": 3.591905289935425e-06, "loss": 0.7195, "step": 10250 }, { "epoch": 0.31772834304126907, "grad_norm": 3.084000708122685, "learning_rate": 3.59108994847042e-06, "loss": 0.7306, "step": 10255 }, { "epoch": 0.31788325690915853, "grad_norm": 2.3973208665979633, "learning_rate": 3.590274607005414e-06, "loss": 0.7127, "step": 10260 }, { "epoch": 0.31803817077704793, "grad_norm": 3.2160210575299826, "learning_rate": 3.5894592655404088e-06, "loss": 0.7662, "step": 10265 }, { "epoch": 0.3181930846449374, "grad_norm": 3.1333160283382058, "learning_rate": 3.588643924075403e-06, "loss": 0.7397, "step": 10270 }, { "epoch": 0.31834799851282686, "grad_norm": 2.872508268946, "learning_rate": 3.5878285826103977e-06, "loss": 0.7588, "step": 10275 }, { "epoch": 0.3185029123807163, "grad_norm": 2.911995759859247, "learning_rate": 3.5870132411453917e-06, "loss": 0.6754, "step": 10280 }, { "epoch": 0.3186578262486058, "grad_norm": 2.3337503433903506, "learning_rate": 3.5861978996803866e-06, "loss": 0.7011, "step": 10285 }, { "epoch": 0.31881274011649524, "grad_norm": 2.492514740464281, "learning_rate": 3.585382558215381e-06, "loss": 0.7455, "step": 10290 }, { "epoch": 0.3189676539843847, "grad_norm": 2.4112840756883136, "learning_rate": 3.5845672167503755e-06, "loss": 0.74, "step": 10295 }, { "epoch": 0.3191225678522741, "grad_norm": 2.2798932573379065, "learning_rate": 3.58375187528537e-06, "loss": 0.7435, "step": 10300 }, { "epoch": 0.31927748172016357, "grad_norm": 2.605964636540268, "learning_rate": 3.582936533820364e-06, "loss": 0.6972, "step": 10305 }, { "epoch": 0.31943239558805303, "grad_norm": 4.139746223549636, "learning_rate": 3.582121192355359e-06, "loss": 0.6932, "step": 10310 }, { "epoch": 0.3195873094559425, "grad_norm": 2.605124993973739, "learning_rate": 3.581305850890353e-06, "loss": 0.7497, "step": 10315 }, { "epoch": 0.31974222332383195, "grad_norm": 2.693825444890214, "learning_rate": 3.580490509425348e-06, "loss": 0.6895, "step": 10320 }, { "epoch": 0.3198971371917214, "grad_norm": 2.399011936048288, "learning_rate": 3.579675167960342e-06, "loss": 0.7098, "step": 10325 }, { "epoch": 0.3200520510596109, "grad_norm": 5.865093110167051, "learning_rate": 3.5788598264953367e-06, "loss": 0.7151, "step": 10330 }, { "epoch": 0.32020696492750034, "grad_norm": 2.846447453422827, "learning_rate": 3.5780444850303308e-06, "loss": 0.6735, "step": 10335 }, { "epoch": 0.32036187879538974, "grad_norm": 2.955711627171982, "learning_rate": 3.5772291435653256e-06, "loss": 0.7316, "step": 10340 }, { "epoch": 0.3205167926632792, "grad_norm": 3.0758580841092473, "learning_rate": 3.5764138021003197e-06, "loss": 0.8233, "step": 10345 }, { "epoch": 0.32067170653116867, "grad_norm": 2.6025380646527116, "learning_rate": 3.5755984606353146e-06, "loss": 0.6341, "step": 10350 }, { "epoch": 0.3208266203990581, "grad_norm": 2.569365282823467, "learning_rate": 3.5747831191703086e-06, "loss": 0.7509, "step": 10355 }, { "epoch": 0.3209815342669476, "grad_norm": 2.723777946427814, "learning_rate": 3.5739677777053035e-06, "loss": 0.9231, "step": 10360 }, { "epoch": 0.32113644813483705, "grad_norm": 3.5259905824410547, "learning_rate": 3.5731524362402975e-06, "loss": 0.7615, "step": 10365 }, { "epoch": 0.3212913620027265, "grad_norm": 2.9989550107255223, "learning_rate": 3.5723370947752924e-06, "loss": 0.7232, "step": 10370 }, { "epoch": 0.3214462758706159, "grad_norm": 2.424945409245488, "learning_rate": 3.5715217533102864e-06, "loss": 0.7289, "step": 10375 }, { "epoch": 0.3216011897385054, "grad_norm": 2.6009264306514277, "learning_rate": 3.5707064118452813e-06, "loss": 0.6979, "step": 10380 }, { "epoch": 0.32175610360639484, "grad_norm": 3.1509869732767886, "learning_rate": 3.5698910703802758e-06, "loss": 0.823, "step": 10385 }, { "epoch": 0.3219110174742843, "grad_norm": 2.6464034181813623, "learning_rate": 3.56907572891527e-06, "loss": 0.7086, "step": 10390 }, { "epoch": 0.32206593134217376, "grad_norm": 2.5563693001434435, "learning_rate": 3.5682603874502647e-06, "loss": 0.7782, "step": 10395 }, { "epoch": 0.3222208452100632, "grad_norm": 2.794049197270955, "learning_rate": 3.5674450459852587e-06, "loss": 0.6853, "step": 10400 }, { "epoch": 0.3223757590779527, "grad_norm": 2.9443718133014536, "learning_rate": 3.5666297045202536e-06, "loss": 0.7904, "step": 10405 }, { "epoch": 0.3225306729458421, "grad_norm": 4.7343684066872385, "learning_rate": 3.5658143630552476e-06, "loss": 0.8151, "step": 10410 }, { "epoch": 0.32268558681373155, "grad_norm": 1.9451031831149037, "learning_rate": 3.5649990215902425e-06, "loss": 0.6716, "step": 10415 }, { "epoch": 0.322840500681621, "grad_norm": 2.831422463685274, "learning_rate": 3.5641836801252366e-06, "loss": 0.6977, "step": 10420 }, { "epoch": 0.3229954145495105, "grad_norm": 2.5905040013978544, "learning_rate": 3.5633683386602314e-06, "loss": 0.7248, "step": 10425 }, { "epoch": 0.32315032841739993, "grad_norm": 2.834675358371188, "learning_rate": 3.5625529971952255e-06, "loss": 0.7104, "step": 10430 }, { "epoch": 0.3233052422852894, "grad_norm": 3.342082165089633, "learning_rate": 3.5617376557302204e-06, "loss": 0.7135, "step": 10435 }, { "epoch": 0.32346015615317886, "grad_norm": 2.566395609668346, "learning_rate": 3.5609223142652144e-06, "loss": 0.6787, "step": 10440 }, { "epoch": 0.32361507002106826, "grad_norm": 2.9045777350605926, "learning_rate": 3.5601069728002093e-06, "loss": 0.7434, "step": 10445 }, { "epoch": 0.3237699838889577, "grad_norm": 2.7529913437819573, "learning_rate": 3.5592916313352033e-06, "loss": 0.7275, "step": 10450 }, { "epoch": 0.3239248977568472, "grad_norm": 2.135164702282977, "learning_rate": 3.558476289870198e-06, "loss": 0.8268, "step": 10455 }, { "epoch": 0.32407981162473665, "grad_norm": 2.2631021853781017, "learning_rate": 3.5576609484051922e-06, "loss": 0.6729, "step": 10460 }, { "epoch": 0.3242347254926261, "grad_norm": 2.2668815577127206, "learning_rate": 3.5568456069401867e-06, "loss": 0.6791, "step": 10465 }, { "epoch": 0.32438963936051557, "grad_norm": 2.372626685394702, "learning_rate": 3.556030265475181e-06, "loss": 0.7044, "step": 10470 }, { "epoch": 0.32454455322840503, "grad_norm": 2.4297806481488897, "learning_rate": 3.5552149240101756e-06, "loss": 0.7152, "step": 10475 }, { "epoch": 0.32469946709629444, "grad_norm": 2.4716551676479614, "learning_rate": 3.5543995825451705e-06, "loss": 0.6855, "step": 10480 }, { "epoch": 0.3248543809641839, "grad_norm": 2.2514520863728276, "learning_rate": 3.5535842410801645e-06, "loss": 0.7294, "step": 10485 }, { "epoch": 0.32500929483207336, "grad_norm": 2.9594884658269103, "learning_rate": 3.5527688996151594e-06, "loss": 0.6847, "step": 10490 }, { "epoch": 0.3251642086999628, "grad_norm": 3.657535718436277, "learning_rate": 3.5519535581501535e-06, "loss": 0.7949, "step": 10495 }, { "epoch": 0.3253191225678523, "grad_norm": 2.862417383681578, "learning_rate": 3.5511382166851483e-06, "loss": 0.713, "step": 10500 }, { "epoch": 0.32547403643574174, "grad_norm": 3.102053738162179, "learning_rate": 3.5503228752201424e-06, "loss": 0.6877, "step": 10505 }, { "epoch": 0.3256289503036312, "grad_norm": 2.628531596305015, "learning_rate": 3.5495075337551373e-06, "loss": 0.6718, "step": 10510 }, { "epoch": 0.3257838641715206, "grad_norm": 3.4606924485470665, "learning_rate": 3.5486921922901313e-06, "loss": 0.7584, "step": 10515 }, { "epoch": 0.32593877803941007, "grad_norm": 1.9263835712428283, "learning_rate": 3.547876850825126e-06, "loss": 0.6747, "step": 10520 }, { "epoch": 0.32609369190729953, "grad_norm": 2.949098641846432, "learning_rate": 3.54706150936012e-06, "loss": 0.6926, "step": 10525 }, { "epoch": 0.326248605775189, "grad_norm": 2.370238415124707, "learning_rate": 3.546246167895115e-06, "loss": 0.7521, "step": 10530 }, { "epoch": 0.32640351964307845, "grad_norm": 2.6525975369287593, "learning_rate": 3.545430826430109e-06, "loss": 0.6957, "step": 10535 }, { "epoch": 0.3265584335109679, "grad_norm": 2.4266229179852288, "learning_rate": 3.5446154849651036e-06, "loss": 0.652, "step": 10540 }, { "epoch": 0.3267133473788574, "grad_norm": 3.0465401855996275, "learning_rate": 3.543800143500098e-06, "loss": 0.7277, "step": 10545 }, { "epoch": 0.3268682612467468, "grad_norm": 2.396804697579797, "learning_rate": 3.5429848020350925e-06, "loss": 0.7784, "step": 10550 }, { "epoch": 0.32702317511463624, "grad_norm": 2.4635112318284933, "learning_rate": 3.542169460570087e-06, "loss": 0.7136, "step": 10555 }, { "epoch": 0.3271780889825257, "grad_norm": 2.671842867885052, "learning_rate": 3.5413541191050814e-06, "loss": 0.7371, "step": 10560 }, { "epoch": 0.32733300285041517, "grad_norm": 3.3232524291275287, "learning_rate": 3.540538777640076e-06, "loss": 0.7898, "step": 10565 }, { "epoch": 0.32748791671830463, "grad_norm": 2.8116804970486213, "learning_rate": 3.5397234361750703e-06, "loss": 0.7554, "step": 10570 }, { "epoch": 0.3276428305861941, "grad_norm": 2.4717476511337684, "learning_rate": 3.5389080947100652e-06, "loss": 0.712, "step": 10575 }, { "epoch": 0.32779774445408355, "grad_norm": 2.169913932823033, "learning_rate": 3.5380927532450593e-06, "loss": 0.7118, "step": 10580 }, { "epoch": 0.32795265832197296, "grad_norm": 2.521910168335435, "learning_rate": 3.537277411780054e-06, "loss": 0.7214, "step": 10585 }, { "epoch": 0.3281075721898624, "grad_norm": 1.954128987303488, "learning_rate": 3.536462070315048e-06, "loss": 0.6155, "step": 10590 }, { "epoch": 0.3282624860577519, "grad_norm": 2.6012415326503437, "learning_rate": 3.535646728850043e-06, "loss": 0.7298, "step": 10595 }, { "epoch": 0.32841739992564134, "grad_norm": 2.4776149234901457, "learning_rate": 3.534831387385037e-06, "loss": 0.7105, "step": 10600 }, { "epoch": 0.3285723137935308, "grad_norm": 2.53308086253722, "learning_rate": 3.534016045920032e-06, "loss": 0.6704, "step": 10605 }, { "epoch": 0.32872722766142026, "grad_norm": 2.8936411110530877, "learning_rate": 3.533200704455026e-06, "loss": 0.7325, "step": 10610 }, { "epoch": 0.3288821415293097, "grad_norm": 2.440759974081941, "learning_rate": 3.5323853629900205e-06, "loss": 0.6629, "step": 10615 }, { "epoch": 0.32903705539719913, "grad_norm": 4.014084729338176, "learning_rate": 3.531570021525015e-06, "loss": 0.6376, "step": 10620 }, { "epoch": 0.3291919692650886, "grad_norm": 2.225803622976974, "learning_rate": 3.5307546800600094e-06, "loss": 0.767, "step": 10625 }, { "epoch": 0.32934688313297805, "grad_norm": 2.706115361928797, "learning_rate": 3.529939338595004e-06, "loss": 0.7046, "step": 10630 }, { "epoch": 0.3295017970008675, "grad_norm": 2.6225504009042107, "learning_rate": 3.5291239971299983e-06, "loss": 0.704, "step": 10635 }, { "epoch": 0.329656710868757, "grad_norm": 2.679010147181377, "learning_rate": 3.5283086556649928e-06, "loss": 0.6691, "step": 10640 }, { "epoch": 0.32981162473664644, "grad_norm": 4.505394447891572, "learning_rate": 3.5274933141999872e-06, "loss": 0.7733, "step": 10645 }, { "epoch": 0.3299665386045359, "grad_norm": 2.532334319374387, "learning_rate": 3.5266779727349813e-06, "loss": 0.7607, "step": 10650 }, { "epoch": 0.33012145247242536, "grad_norm": 4.311065926506735, "learning_rate": 3.525862631269976e-06, "loss": 0.7249, "step": 10655 }, { "epoch": 0.33027636634031476, "grad_norm": 2.8675090069135165, "learning_rate": 3.52504728980497e-06, "loss": 0.6935, "step": 10660 }, { "epoch": 0.3304312802082042, "grad_norm": 2.7631158082311873, "learning_rate": 3.524231948339965e-06, "loss": 0.7229, "step": 10665 }, { "epoch": 0.3305861940760937, "grad_norm": 2.8709030334669916, "learning_rate": 3.52341660687496e-06, "loss": 0.7573, "step": 10670 }, { "epoch": 0.33074110794398315, "grad_norm": 4.562516890413725, "learning_rate": 3.522601265409954e-06, "loss": 0.7102, "step": 10675 }, { "epoch": 0.3308960218118726, "grad_norm": 2.3844602431800186, "learning_rate": 3.521785923944949e-06, "loss": 0.6598, "step": 10680 }, { "epoch": 0.33105093567976207, "grad_norm": 3.014622740389309, "learning_rate": 3.520970582479943e-06, "loss": 0.7322, "step": 10685 }, { "epoch": 0.33120584954765153, "grad_norm": 3.032115732938376, "learning_rate": 3.5201552410149374e-06, "loss": 0.6982, "step": 10690 }, { "epoch": 0.33136076341554094, "grad_norm": 2.711638263795399, "learning_rate": 3.519339899549932e-06, "loss": 0.7304, "step": 10695 }, { "epoch": 0.3315156772834304, "grad_norm": 3.0172155772121503, "learning_rate": 3.5185245580849263e-06, "loss": 0.7094, "step": 10700 }, { "epoch": 0.33167059115131986, "grad_norm": 3.1280662792756084, "learning_rate": 3.5177092166199207e-06, "loss": 0.722, "step": 10705 }, { "epoch": 0.3318255050192093, "grad_norm": 2.8320677372040777, "learning_rate": 3.516893875154915e-06, "loss": 0.6369, "step": 10710 }, { "epoch": 0.3319804188870988, "grad_norm": 2.587211466616818, "learning_rate": 3.5160785336899097e-06, "loss": 0.7187, "step": 10715 }, { "epoch": 0.33213533275498824, "grad_norm": 2.5301217687967275, "learning_rate": 3.515263192224904e-06, "loss": 0.7437, "step": 10720 }, { "epoch": 0.3322902466228777, "grad_norm": 2.5108189060592676, "learning_rate": 3.5144478507598986e-06, "loss": 0.7088, "step": 10725 }, { "epoch": 0.3324451604907671, "grad_norm": 2.655095251052647, "learning_rate": 3.513632509294893e-06, "loss": 0.7176, "step": 10730 }, { "epoch": 0.33260007435865657, "grad_norm": 2.3722539281567596, "learning_rate": 3.512817167829887e-06, "loss": 0.6767, "step": 10735 }, { "epoch": 0.33275498822654603, "grad_norm": 2.514587622528881, "learning_rate": 3.512001826364882e-06, "loss": 0.687, "step": 10740 }, { "epoch": 0.3329099020944355, "grad_norm": 2.4377242028757387, "learning_rate": 3.511186484899876e-06, "loss": 0.623, "step": 10745 }, { "epoch": 0.33306481596232496, "grad_norm": 2.6551615788826277, "learning_rate": 3.510371143434871e-06, "loss": 0.6972, "step": 10750 }, { "epoch": 0.3332197298302144, "grad_norm": 2.368384950906272, "learning_rate": 3.509555801969865e-06, "loss": 0.6707, "step": 10755 }, { "epoch": 0.3333746436981039, "grad_norm": 2.467797043324463, "learning_rate": 3.5087404605048598e-06, "loss": 0.7842, "step": 10760 }, { "epoch": 0.3335295575659933, "grad_norm": 2.4714514831897954, "learning_rate": 3.5079251190398542e-06, "loss": 0.7801, "step": 10765 }, { "epoch": 0.33368447143388275, "grad_norm": 2.3461572539132334, "learning_rate": 3.5071097775748487e-06, "loss": 0.7043, "step": 10770 }, { "epoch": 0.3338393853017722, "grad_norm": 2.6808863033872763, "learning_rate": 3.506294436109843e-06, "loss": 0.6711, "step": 10775 }, { "epoch": 0.33399429916966167, "grad_norm": 2.417507979396965, "learning_rate": 3.5054790946448376e-06, "loss": 0.7064, "step": 10780 }, { "epoch": 0.33414921303755113, "grad_norm": 2.6073316117247742, "learning_rate": 3.504663753179832e-06, "loss": 0.7825, "step": 10785 }, { "epoch": 0.3343041269054406, "grad_norm": 2.085971339360494, "learning_rate": 3.5038484117148265e-06, "loss": 0.7693, "step": 10790 }, { "epoch": 0.33445904077333005, "grad_norm": 2.706245077527218, "learning_rate": 3.503033070249821e-06, "loss": 0.6604, "step": 10795 }, { "epoch": 0.33461395464121946, "grad_norm": 2.6894894067751967, "learning_rate": 3.5022177287848155e-06, "loss": 0.7318, "step": 10800 }, { "epoch": 0.3347688685091089, "grad_norm": 2.429408276741977, "learning_rate": 3.50140238731981e-06, "loss": 0.6919, "step": 10805 }, { "epoch": 0.3349237823769984, "grad_norm": 2.7460782761726894, "learning_rate": 3.500587045854804e-06, "loss": 0.7035, "step": 10810 }, { "epoch": 0.33507869624488784, "grad_norm": 2.6656882673983304, "learning_rate": 3.499771704389799e-06, "loss": 0.7292, "step": 10815 }, { "epoch": 0.3352336101127773, "grad_norm": 2.452029870863252, "learning_rate": 3.498956362924793e-06, "loss": 0.7294, "step": 10820 }, { "epoch": 0.33538852398066676, "grad_norm": 2.423395831003077, "learning_rate": 3.4981410214597878e-06, "loss": 0.6303, "step": 10825 }, { "epoch": 0.3355434378485562, "grad_norm": 2.4486193920605785, "learning_rate": 3.497325679994782e-06, "loss": 0.7178, "step": 10830 }, { "epoch": 0.33569835171644563, "grad_norm": 3.1655718492542726, "learning_rate": 3.4965103385297767e-06, "loss": 0.7299, "step": 10835 }, { "epoch": 0.3358532655843351, "grad_norm": 2.1131926353544825, "learning_rate": 3.4956949970647707e-06, "loss": 0.6741, "step": 10840 }, { "epoch": 0.33600817945222455, "grad_norm": 2.378646306585948, "learning_rate": 3.4948796555997656e-06, "loss": 0.7185, "step": 10845 }, { "epoch": 0.336163093320114, "grad_norm": 2.925671440400636, "learning_rate": 3.4940643141347596e-06, "loss": 0.6955, "step": 10850 }, { "epoch": 0.3363180071880035, "grad_norm": 2.809891735450385, "learning_rate": 3.4932489726697545e-06, "loss": 0.7388, "step": 10855 }, { "epoch": 0.33647292105589294, "grad_norm": 2.9623863696893897, "learning_rate": 3.492433631204749e-06, "loss": 0.7719, "step": 10860 }, { "epoch": 0.3366278349237824, "grad_norm": 2.8475447146357555, "learning_rate": 3.4916182897397434e-06, "loss": 0.7385, "step": 10865 }, { "epoch": 0.3367827487916718, "grad_norm": 3.476914362078129, "learning_rate": 3.490802948274738e-06, "loss": 0.6799, "step": 10870 }, { "epoch": 0.33693766265956127, "grad_norm": 2.533272922551067, "learning_rate": 3.4899876068097323e-06, "loss": 0.7461, "step": 10875 }, { "epoch": 0.3370925765274507, "grad_norm": 3.005633699916325, "learning_rate": 3.489172265344727e-06, "loss": 0.6776, "step": 10880 }, { "epoch": 0.3372474903953402, "grad_norm": 2.5997167657718028, "learning_rate": 3.488356923879721e-06, "loss": 0.732, "step": 10885 }, { "epoch": 0.33740240426322965, "grad_norm": 2.5233985807453867, "learning_rate": 3.4875415824147157e-06, "loss": 0.7651, "step": 10890 }, { "epoch": 0.3375573181311191, "grad_norm": 2.795719029829067, "learning_rate": 3.4867262409497098e-06, "loss": 0.704, "step": 10895 }, { "epoch": 0.33771223199900857, "grad_norm": 3.2708963856157762, "learning_rate": 3.4859108994847046e-06, "loss": 0.7278, "step": 10900 }, { "epoch": 0.337867145866898, "grad_norm": 2.7707232725959936, "learning_rate": 3.4850955580196987e-06, "loss": 0.6804, "step": 10905 }, { "epoch": 0.33802205973478744, "grad_norm": 1.9922664637673502, "learning_rate": 3.4842802165546936e-06, "loss": 0.6913, "step": 10910 }, { "epoch": 0.3381769736026769, "grad_norm": 2.413438866493051, "learning_rate": 3.4834648750896876e-06, "loss": 0.6451, "step": 10915 }, { "epoch": 0.33833188747056636, "grad_norm": 2.506934613289813, "learning_rate": 3.4826495336246825e-06, "loss": 0.7015, "step": 10920 }, { "epoch": 0.3384868013384558, "grad_norm": 3.3973705078883207, "learning_rate": 3.4818341921596765e-06, "loss": 0.7692, "step": 10925 }, { "epoch": 0.3386417152063453, "grad_norm": 3.0138734130154474, "learning_rate": 3.4810188506946714e-06, "loss": 0.7702, "step": 10930 }, { "epoch": 0.33879662907423475, "grad_norm": 3.696206962058697, "learning_rate": 3.4802035092296654e-06, "loss": 0.7253, "step": 10935 }, { "epoch": 0.3389515429421242, "grad_norm": 2.9715677143104493, "learning_rate": 3.4793881677646603e-06, "loss": 0.6936, "step": 10940 }, { "epoch": 0.3391064568100136, "grad_norm": 3.3533726983472087, "learning_rate": 3.4785728262996544e-06, "loss": 0.7322, "step": 10945 }, { "epoch": 0.3392613706779031, "grad_norm": 2.63300601050237, "learning_rate": 3.4777574848346492e-06, "loss": 0.68, "step": 10950 }, { "epoch": 0.33941628454579253, "grad_norm": 3.219194238417535, "learning_rate": 3.4769421433696437e-06, "loss": 0.7214, "step": 10955 }, { "epoch": 0.339571198413682, "grad_norm": 3.2636679160690307, "learning_rate": 3.4761268019046377e-06, "loss": 0.7587, "step": 10960 }, { "epoch": 0.33972611228157146, "grad_norm": 2.3208177093651603, "learning_rate": 3.4753114604396326e-06, "loss": 0.7391, "step": 10965 }, { "epoch": 0.3398810261494609, "grad_norm": 2.551504273903647, "learning_rate": 3.4744961189746266e-06, "loss": 0.7654, "step": 10970 }, { "epoch": 0.3400359400173504, "grad_norm": 2.450300306699054, "learning_rate": 3.4736807775096215e-06, "loss": 0.7287, "step": 10975 }, { "epoch": 0.3401908538852398, "grad_norm": 2.5126921666320863, "learning_rate": 3.4728654360446156e-06, "loss": 0.6903, "step": 10980 }, { "epoch": 0.34034576775312925, "grad_norm": 3.8232484131693982, "learning_rate": 3.4720500945796104e-06, "loss": 0.6124, "step": 10985 }, { "epoch": 0.3405006816210187, "grad_norm": 2.937966872547711, "learning_rate": 3.4712347531146045e-06, "loss": 0.77, "step": 10990 }, { "epoch": 0.34065559548890817, "grad_norm": 3.4911520381346763, "learning_rate": 3.4704194116495994e-06, "loss": 0.7209, "step": 10995 }, { "epoch": 0.34081050935679763, "grad_norm": 2.3531434361124814, "learning_rate": 3.4696040701845934e-06, "loss": 0.6091, "step": 11000 }, { "epoch": 0.3409654232246871, "grad_norm": 2.4142252257323813, "learning_rate": 3.4687887287195883e-06, "loss": 0.7082, "step": 11005 }, { "epoch": 0.34112033709257655, "grad_norm": 2.4531089686020726, "learning_rate": 3.4679733872545823e-06, "loss": 0.7157, "step": 11010 }, { "epoch": 0.34127525096046596, "grad_norm": 2.6354109733658064, "learning_rate": 3.467158045789577e-06, "loss": 0.7117, "step": 11015 }, { "epoch": 0.3414301648283554, "grad_norm": 2.116887906326309, "learning_rate": 3.4663427043245712e-06, "loss": 0.7666, "step": 11020 }, { "epoch": 0.3415850786962449, "grad_norm": 5.328441898567655, "learning_rate": 3.465527362859566e-06, "loss": 0.7752, "step": 11025 }, { "epoch": 0.34173999256413434, "grad_norm": 2.4598530199926762, "learning_rate": 3.46471202139456e-06, "loss": 0.7883, "step": 11030 }, { "epoch": 0.3418949064320238, "grad_norm": 2.630465662316378, "learning_rate": 3.4638966799295546e-06, "loss": 0.7055, "step": 11035 }, { "epoch": 0.34204982029991327, "grad_norm": 2.3763768381894717, "learning_rate": 3.463081338464549e-06, "loss": 0.8124, "step": 11040 }, { "epoch": 0.3422047341678027, "grad_norm": 4.805835201499556, "learning_rate": 3.4622659969995435e-06, "loss": 0.6374, "step": 11045 }, { "epoch": 0.34235964803569213, "grad_norm": 2.0208734134424833, "learning_rate": 3.4614506555345384e-06, "loss": 0.6345, "step": 11050 }, { "epoch": 0.3425145619035816, "grad_norm": 2.927001232760166, "learning_rate": 3.4606353140695325e-06, "loss": 0.7021, "step": 11055 }, { "epoch": 0.34266947577147105, "grad_norm": 2.7411978108952844, "learning_rate": 3.4598199726045273e-06, "loss": 0.7796, "step": 11060 }, { "epoch": 0.3428243896393605, "grad_norm": 3.0536075848505972, "learning_rate": 3.4590046311395214e-06, "loss": 0.7051, "step": 11065 }, { "epoch": 0.34297930350725, "grad_norm": 3.186263129207867, "learning_rate": 3.4581892896745163e-06, "loss": 0.7823, "step": 11070 }, { "epoch": 0.34313421737513944, "grad_norm": 2.6801748006457298, "learning_rate": 3.4573739482095103e-06, "loss": 0.8269, "step": 11075 }, { "epoch": 0.3432891312430289, "grad_norm": 2.8509049844749015, "learning_rate": 3.456558606744505e-06, "loss": 0.7138, "step": 11080 }, { "epoch": 0.3434440451109183, "grad_norm": 3.2786184172905855, "learning_rate": 3.455743265279499e-06, "loss": 0.7346, "step": 11085 }, { "epoch": 0.34359895897880777, "grad_norm": 3.19224174215693, "learning_rate": 3.454927923814494e-06, "loss": 0.6739, "step": 11090 }, { "epoch": 0.34375387284669723, "grad_norm": 3.7539436295989406, "learning_rate": 3.454112582349488e-06, "loss": 0.7118, "step": 11095 }, { "epoch": 0.3439087867145867, "grad_norm": 2.448411839040356, "learning_rate": 3.453297240884483e-06, "loss": 0.7159, "step": 11100 }, { "epoch": 0.34406370058247615, "grad_norm": 2.607016509932094, "learning_rate": 3.452481899419477e-06, "loss": 0.7193, "step": 11105 }, { "epoch": 0.3442186144503656, "grad_norm": 2.863804243876847, "learning_rate": 3.4516665579544715e-06, "loss": 0.7108, "step": 11110 }, { "epoch": 0.3443735283182551, "grad_norm": 3.9015286680145236, "learning_rate": 3.450851216489466e-06, "loss": 0.6616, "step": 11115 }, { "epoch": 0.3445284421861445, "grad_norm": 2.569040052743634, "learning_rate": 3.4500358750244604e-06, "loss": 0.698, "step": 11120 }, { "epoch": 0.34468335605403394, "grad_norm": 2.2790938276752204, "learning_rate": 3.449220533559455e-06, "loss": 0.7218, "step": 11125 }, { "epoch": 0.3448382699219234, "grad_norm": 2.789138256355003, "learning_rate": 3.4484051920944493e-06, "loss": 0.714, "step": 11130 }, { "epoch": 0.34499318378981286, "grad_norm": 3.1160816432403666, "learning_rate": 3.447589850629444e-06, "loss": 0.6983, "step": 11135 }, { "epoch": 0.3451480976577023, "grad_norm": 3.882721470825175, "learning_rate": 3.4467745091644383e-06, "loss": 0.7083, "step": 11140 }, { "epoch": 0.3453030115255918, "grad_norm": 2.8060854365092722, "learning_rate": 3.445959167699433e-06, "loss": 0.7731, "step": 11145 }, { "epoch": 0.34545792539348125, "grad_norm": 2.1556529146179737, "learning_rate": 3.445143826234427e-06, "loss": 0.7595, "step": 11150 }, { "epoch": 0.34561283926137065, "grad_norm": 2.808366341501472, "learning_rate": 3.444328484769422e-06, "loss": 0.6895, "step": 11155 }, { "epoch": 0.3457677531292601, "grad_norm": 2.4735400704098334, "learning_rate": 3.443513143304416e-06, "loss": 0.6646, "step": 11160 }, { "epoch": 0.3459226669971496, "grad_norm": 2.4095669788793894, "learning_rate": 3.442697801839411e-06, "loss": 0.715, "step": 11165 }, { "epoch": 0.34607758086503904, "grad_norm": 2.4948800528938984, "learning_rate": 3.441882460374405e-06, "loss": 0.7259, "step": 11170 }, { "epoch": 0.3462324947329285, "grad_norm": 2.776832402846379, "learning_rate": 3.4410671189094e-06, "loss": 0.6771, "step": 11175 }, { "epoch": 0.34638740860081796, "grad_norm": 3.526892273251418, "learning_rate": 3.440251777444394e-06, "loss": 0.7948, "step": 11180 }, { "epoch": 0.3465423224687074, "grad_norm": 2.744588621767311, "learning_rate": 3.4394364359793884e-06, "loss": 0.658, "step": 11185 }, { "epoch": 0.3466972363365968, "grad_norm": 2.3520957220914185, "learning_rate": 3.438621094514383e-06, "loss": 0.7072, "step": 11190 }, { "epoch": 0.3468521502044863, "grad_norm": 5.510438528763299, "learning_rate": 3.4378057530493773e-06, "loss": 0.7593, "step": 11195 }, { "epoch": 0.34700706407237575, "grad_norm": 2.541723904381172, "learning_rate": 3.4369904115843718e-06, "loss": 0.6927, "step": 11200 }, { "epoch": 0.3471619779402652, "grad_norm": 2.858716374364299, "learning_rate": 3.4361750701193662e-06, "loss": 0.7188, "step": 11205 }, { "epoch": 0.34731689180815467, "grad_norm": 3.369572790112571, "learning_rate": 3.4353597286543607e-06, "loss": 0.703, "step": 11210 }, { "epoch": 0.34747180567604413, "grad_norm": 2.7886706050837, "learning_rate": 3.434544387189355e-06, "loss": 0.7237, "step": 11215 }, { "epoch": 0.3476267195439336, "grad_norm": 2.5515284655731993, "learning_rate": 3.4337290457243496e-06, "loss": 0.8098, "step": 11220 }, { "epoch": 0.347781633411823, "grad_norm": 2.679129986444899, "learning_rate": 3.432913704259344e-06, "loss": 0.6951, "step": 11225 }, { "epoch": 0.34793654727971246, "grad_norm": 2.63009382731859, "learning_rate": 3.432098362794338e-06, "loss": 0.6294, "step": 11230 }, { "epoch": 0.3480914611476019, "grad_norm": 2.3934661246696516, "learning_rate": 3.431283021329333e-06, "loss": 0.7136, "step": 11235 }, { "epoch": 0.3482463750154914, "grad_norm": 2.608389471975305, "learning_rate": 3.430467679864328e-06, "loss": 0.7346, "step": 11240 }, { "epoch": 0.34840128888338084, "grad_norm": 2.530209331959009, "learning_rate": 3.429652338399322e-06, "loss": 0.6906, "step": 11245 }, { "epoch": 0.3485562027512703, "grad_norm": 2.5556342484810215, "learning_rate": 3.4288369969343168e-06, "loss": 0.7401, "step": 11250 }, { "epoch": 0.34871111661915977, "grad_norm": 3.301851369713711, "learning_rate": 3.428021655469311e-06, "loss": 0.6746, "step": 11255 }, { "epoch": 0.34886603048704923, "grad_norm": 3.1117701986792596, "learning_rate": 3.4272063140043053e-06, "loss": 0.7162, "step": 11260 }, { "epoch": 0.34902094435493863, "grad_norm": 2.8133000874646883, "learning_rate": 3.4263909725392997e-06, "loss": 0.6869, "step": 11265 }, { "epoch": 0.3491758582228281, "grad_norm": 2.89721875768549, "learning_rate": 3.425575631074294e-06, "loss": 0.7092, "step": 11270 }, { "epoch": 0.34933077209071756, "grad_norm": 2.5375516512862055, "learning_rate": 3.4247602896092887e-06, "loss": 0.7714, "step": 11275 }, { "epoch": 0.349485685958607, "grad_norm": 2.709920882924235, "learning_rate": 3.423944948144283e-06, "loss": 0.7851, "step": 11280 }, { "epoch": 0.3496405998264965, "grad_norm": 2.616100828946647, "learning_rate": 3.4231296066792776e-06, "loss": 0.6508, "step": 11285 }, { "epoch": 0.34979551369438594, "grad_norm": 2.660756197853669, "learning_rate": 3.422314265214272e-06, "loss": 0.6906, "step": 11290 }, { "epoch": 0.3499504275622754, "grad_norm": 2.3377714639108933, "learning_rate": 3.4214989237492665e-06, "loss": 0.6965, "step": 11295 }, { "epoch": 0.3501053414301648, "grad_norm": 2.756641572494019, "learning_rate": 3.420683582284261e-06, "loss": 0.7803, "step": 11300 }, { "epoch": 0.35026025529805427, "grad_norm": 2.688369974461714, "learning_rate": 3.419868240819255e-06, "loss": 0.7196, "step": 11305 }, { "epoch": 0.35041516916594373, "grad_norm": 2.9281459298793044, "learning_rate": 3.41905289935425e-06, "loss": 0.7087, "step": 11310 }, { "epoch": 0.3505700830338332, "grad_norm": 2.641862711437835, "learning_rate": 3.418237557889244e-06, "loss": 0.7576, "step": 11315 }, { "epoch": 0.35072499690172265, "grad_norm": 2.673166438198359, "learning_rate": 3.4174222164242388e-06, "loss": 0.7018, "step": 11320 }, { "epoch": 0.3508799107696121, "grad_norm": 2.6547465585417624, "learning_rate": 3.416606874959233e-06, "loss": 0.6884, "step": 11325 }, { "epoch": 0.3510348246375016, "grad_norm": 2.3297589885210765, "learning_rate": 3.4157915334942277e-06, "loss": 0.7489, "step": 11330 }, { "epoch": 0.351189738505391, "grad_norm": 2.636839730942574, "learning_rate": 3.414976192029222e-06, "loss": 0.7257, "step": 11335 }, { "epoch": 0.35134465237328044, "grad_norm": 2.412805247126126, "learning_rate": 3.4141608505642166e-06, "loss": 0.7369, "step": 11340 }, { "epoch": 0.3514995662411699, "grad_norm": 2.4484731096209678, "learning_rate": 3.413345509099211e-06, "loss": 0.7009, "step": 11345 }, { "epoch": 0.35165448010905936, "grad_norm": 2.460436356843927, "learning_rate": 3.4125301676342055e-06, "loss": 0.749, "step": 11350 }, { "epoch": 0.3518093939769488, "grad_norm": 2.935445512762515, "learning_rate": 3.4117148261692e-06, "loss": 0.7851, "step": 11355 }, { "epoch": 0.3519643078448383, "grad_norm": 2.4971987300664042, "learning_rate": 3.4108994847041945e-06, "loss": 0.7182, "step": 11360 }, { "epoch": 0.35211922171272775, "grad_norm": 2.6646945989620656, "learning_rate": 3.410084143239189e-06, "loss": 0.6959, "step": 11365 }, { "epoch": 0.35227413558061715, "grad_norm": 4.377208850162593, "learning_rate": 3.4092688017741834e-06, "loss": 0.7249, "step": 11370 }, { "epoch": 0.3524290494485066, "grad_norm": 2.776463749867663, "learning_rate": 3.408453460309178e-06, "loss": 0.7345, "step": 11375 }, { "epoch": 0.3525839633163961, "grad_norm": 3.0500774462524056, "learning_rate": 3.407638118844172e-06, "loss": 0.6674, "step": 11380 }, { "epoch": 0.35273887718428554, "grad_norm": 2.360209070474476, "learning_rate": 3.4068227773791668e-06, "loss": 0.7924, "step": 11385 }, { "epoch": 0.352893791052175, "grad_norm": 2.7820237944185675, "learning_rate": 3.4060074359141608e-06, "loss": 0.7439, "step": 11390 }, { "epoch": 0.35304870492006446, "grad_norm": 2.4271119331553983, "learning_rate": 3.4051920944491557e-06, "loss": 0.7059, "step": 11395 }, { "epoch": 0.3532036187879539, "grad_norm": 2.74794323049327, "learning_rate": 3.4043767529841497e-06, "loss": 0.7768, "step": 11400 }, { "epoch": 0.3533585326558433, "grad_norm": 2.7520696887153355, "learning_rate": 3.4035614115191446e-06, "loss": 0.7641, "step": 11405 }, { "epoch": 0.3535134465237328, "grad_norm": 2.8308876045469917, "learning_rate": 3.4027460700541386e-06, "loss": 0.6656, "step": 11410 }, { "epoch": 0.35366836039162225, "grad_norm": 2.894969029179216, "learning_rate": 3.4019307285891335e-06, "loss": 0.7586, "step": 11415 }, { "epoch": 0.3538232742595117, "grad_norm": 2.9322581347035515, "learning_rate": 3.4011153871241275e-06, "loss": 0.7358, "step": 11420 }, { "epoch": 0.35397818812740117, "grad_norm": 2.7427956273336376, "learning_rate": 3.4003000456591224e-06, "loss": 0.7911, "step": 11425 }, { "epoch": 0.35413310199529063, "grad_norm": 2.731261777493357, "learning_rate": 3.399484704194117e-06, "loss": 0.7182, "step": 11430 }, { "epoch": 0.3542880158631801, "grad_norm": 2.3661500417226256, "learning_rate": 3.3986693627291113e-06, "loss": 0.7697, "step": 11435 }, { "epoch": 0.3544429297310695, "grad_norm": 2.3731299075276624, "learning_rate": 3.397854021264106e-06, "loss": 0.7717, "step": 11440 }, { "epoch": 0.35459784359895896, "grad_norm": 3.0371742356277625, "learning_rate": 3.3970386797991003e-06, "loss": 0.6997, "step": 11445 }, { "epoch": 0.3547527574668484, "grad_norm": 2.3541903670978312, "learning_rate": 3.3962233383340947e-06, "loss": 0.7713, "step": 11450 }, { "epoch": 0.3549076713347379, "grad_norm": 3.459573754210836, "learning_rate": 3.3954079968690888e-06, "loss": 0.8073, "step": 11455 }, { "epoch": 0.35506258520262735, "grad_norm": 2.590998204578844, "learning_rate": 3.3945926554040836e-06, "loss": 0.7358, "step": 11460 }, { "epoch": 0.3552174990705168, "grad_norm": 2.8371279148919553, "learning_rate": 3.3937773139390777e-06, "loss": 0.781, "step": 11465 }, { "epoch": 0.35537241293840627, "grad_norm": 2.30396240623628, "learning_rate": 3.3929619724740726e-06, "loss": 0.6693, "step": 11470 }, { "epoch": 0.3555273268062957, "grad_norm": 2.172887430880666, "learning_rate": 3.3921466310090666e-06, "loss": 0.7129, "step": 11475 }, { "epoch": 0.35568224067418514, "grad_norm": 3.3625189430159805, "learning_rate": 3.3913312895440615e-06, "loss": 0.7627, "step": 11480 }, { "epoch": 0.3558371545420746, "grad_norm": 2.852453482965898, "learning_rate": 3.3905159480790555e-06, "loss": 0.7319, "step": 11485 }, { "epoch": 0.35599206840996406, "grad_norm": 2.101037372886932, "learning_rate": 3.3897006066140504e-06, "loss": 0.6667, "step": 11490 }, { "epoch": 0.3561469822778535, "grad_norm": 4.503043775837993, "learning_rate": 3.3888852651490444e-06, "loss": 0.7193, "step": 11495 }, { "epoch": 0.356301896145743, "grad_norm": 2.1286167658008726, "learning_rate": 3.3880699236840393e-06, "loss": 0.6496, "step": 11500 }, { "epoch": 0.35645681001363244, "grad_norm": 2.8898476969400924, "learning_rate": 3.3872545822190333e-06, "loss": 0.7476, "step": 11505 }, { "epoch": 0.35661172388152185, "grad_norm": 2.7137311680623992, "learning_rate": 3.3864392407540282e-06, "loss": 0.675, "step": 11510 }, { "epoch": 0.3567666377494113, "grad_norm": 2.525506252330811, "learning_rate": 3.3856238992890223e-06, "loss": 0.6913, "step": 11515 }, { "epoch": 0.35692155161730077, "grad_norm": 3.289611667040906, "learning_rate": 3.384808557824017e-06, "loss": 0.8043, "step": 11520 }, { "epoch": 0.35707646548519023, "grad_norm": 2.7918419667026586, "learning_rate": 3.3839932163590116e-06, "loss": 0.7565, "step": 11525 }, { "epoch": 0.3572313793530797, "grad_norm": 2.373845159604717, "learning_rate": 3.3831778748940056e-06, "loss": 0.7296, "step": 11530 }, { "epoch": 0.35738629322096915, "grad_norm": 2.6030408178873663, "learning_rate": 3.3823625334290005e-06, "loss": 0.7442, "step": 11535 }, { "epoch": 0.3575412070888586, "grad_norm": 2.826486152745588, "learning_rate": 3.3815471919639946e-06, "loss": 0.7693, "step": 11540 }, { "epoch": 0.357696120956748, "grad_norm": 2.8776507393075508, "learning_rate": 3.3807318504989894e-06, "loss": 0.7308, "step": 11545 }, { "epoch": 0.3578510348246375, "grad_norm": 2.860095529237627, "learning_rate": 3.3799165090339835e-06, "loss": 0.7526, "step": 11550 }, { "epoch": 0.35800594869252694, "grad_norm": 2.2321360374273174, "learning_rate": 3.3791011675689784e-06, "loss": 0.6419, "step": 11555 }, { "epoch": 0.3581608625604164, "grad_norm": 2.542994375458767, "learning_rate": 3.3782858261039724e-06, "loss": 0.6991, "step": 11560 }, { "epoch": 0.35831577642830587, "grad_norm": 3.320763566045848, "learning_rate": 3.3774704846389673e-06, "loss": 0.8771, "step": 11565 }, { "epoch": 0.3584706902961953, "grad_norm": 2.6181026035909056, "learning_rate": 3.3766551431739613e-06, "loss": 0.8032, "step": 11570 }, { "epoch": 0.3586256041640848, "grad_norm": 2.4545887000821733, "learning_rate": 3.375839801708956e-06, "loss": 0.6915, "step": 11575 }, { "epoch": 0.35878051803197425, "grad_norm": 3.035338668889623, "learning_rate": 3.3750244602439502e-06, "loss": 0.7007, "step": 11580 }, { "epoch": 0.35893543189986366, "grad_norm": 2.748358411413495, "learning_rate": 3.374209118778945e-06, "loss": 0.6934, "step": 11585 }, { "epoch": 0.3590903457677531, "grad_norm": 2.6771590139273127, "learning_rate": 3.373393777313939e-06, "loss": 0.7079, "step": 11590 }, { "epoch": 0.3592452596356426, "grad_norm": 2.7130047080014243, "learning_rate": 3.372578435848934e-06, "loss": 0.7479, "step": 11595 }, { "epoch": 0.35940017350353204, "grad_norm": 3.588664670422809, "learning_rate": 3.371763094383928e-06, "loss": 0.8052, "step": 11600 }, { "epoch": 0.3595550873714215, "grad_norm": 2.5311661594745547, "learning_rate": 3.3709477529189225e-06, "loss": 0.7437, "step": 11605 }, { "epoch": 0.35971000123931096, "grad_norm": 3.642599254518804, "learning_rate": 3.3701324114539174e-06, "loss": 0.7326, "step": 11610 }, { "epoch": 0.3598649151072004, "grad_norm": 2.8165311592405957, "learning_rate": 3.3693170699889114e-06, "loss": 0.725, "step": 11615 }, { "epoch": 0.36001982897508983, "grad_norm": 2.4104784399332546, "learning_rate": 3.3685017285239063e-06, "loss": 0.6933, "step": 11620 }, { "epoch": 0.3601747428429793, "grad_norm": 2.6098473402674345, "learning_rate": 3.3676863870589004e-06, "loss": 0.8217, "step": 11625 }, { "epoch": 0.36032965671086875, "grad_norm": 2.929141433255645, "learning_rate": 3.3668710455938952e-06, "loss": 0.7411, "step": 11630 }, { "epoch": 0.3604845705787582, "grad_norm": 2.778633904255767, "learning_rate": 3.3660557041288893e-06, "loss": 0.7527, "step": 11635 }, { "epoch": 0.3606394844466477, "grad_norm": 3.030550138422364, "learning_rate": 3.365240362663884e-06, "loss": 0.8374, "step": 11640 }, { "epoch": 0.36079439831453713, "grad_norm": 2.7717647778348553, "learning_rate": 3.364425021198878e-06, "loss": 0.786, "step": 11645 }, { "epoch": 0.3609493121824266, "grad_norm": 2.145955551832641, "learning_rate": 3.363609679733873e-06, "loss": 0.7516, "step": 11650 }, { "epoch": 0.361104226050316, "grad_norm": 2.8876991044986178, "learning_rate": 3.362794338268867e-06, "loss": 0.76, "step": 11655 }, { "epoch": 0.36125913991820546, "grad_norm": 3.4024173773636397, "learning_rate": 3.361978996803862e-06, "loss": 0.7997, "step": 11660 }, { "epoch": 0.3614140537860949, "grad_norm": 2.616766258321926, "learning_rate": 3.361163655338856e-06, "loss": 0.7519, "step": 11665 }, { "epoch": 0.3615689676539844, "grad_norm": 2.5860799351887604, "learning_rate": 3.360348313873851e-06, "loss": 0.789, "step": 11670 }, { "epoch": 0.36172388152187385, "grad_norm": 2.3227817613655426, "learning_rate": 3.359532972408845e-06, "loss": 0.7289, "step": 11675 }, { "epoch": 0.3618787953897633, "grad_norm": 2.657485040319202, "learning_rate": 3.3587176309438394e-06, "loss": 0.7058, "step": 11680 }, { "epoch": 0.36203370925765277, "grad_norm": 2.6715796434401926, "learning_rate": 3.357902289478834e-06, "loss": 0.6836, "step": 11685 }, { "epoch": 0.3621886231255422, "grad_norm": 2.4992492810111857, "learning_rate": 3.3570869480138283e-06, "loss": 0.6228, "step": 11690 }, { "epoch": 0.36234353699343164, "grad_norm": 3.1805699832434233, "learning_rate": 3.356271606548823e-06, "loss": 0.7449, "step": 11695 }, { "epoch": 0.3624984508613211, "grad_norm": 2.867345852903758, "learning_rate": 3.3554562650838173e-06, "loss": 0.7351, "step": 11700 }, { "epoch": 0.36265336472921056, "grad_norm": 4.301143745228383, "learning_rate": 3.354640923618812e-06, "loss": 0.6398, "step": 11705 }, { "epoch": 0.3628082785971, "grad_norm": 2.910013265272147, "learning_rate": 3.353825582153806e-06, "loss": 0.6715, "step": 11710 }, { "epoch": 0.3629631924649895, "grad_norm": 2.674980047143184, "learning_rate": 3.353010240688801e-06, "loss": 0.717, "step": 11715 }, { "epoch": 0.36311810633287894, "grad_norm": 2.4300618138403722, "learning_rate": 3.352194899223795e-06, "loss": 0.6876, "step": 11720 }, { "epoch": 0.36327302020076835, "grad_norm": 2.9597241319702277, "learning_rate": 3.35137955775879e-06, "loss": 0.6496, "step": 11725 }, { "epoch": 0.3634279340686578, "grad_norm": 2.896431549719106, "learning_rate": 3.350564216293784e-06, "loss": 0.694, "step": 11730 }, { "epoch": 0.36358284793654727, "grad_norm": 2.900549695221637, "learning_rate": 3.349748874828779e-06, "loss": 0.8021, "step": 11735 }, { "epoch": 0.36373776180443673, "grad_norm": 2.471837178071977, "learning_rate": 3.348933533363773e-06, "loss": 0.7822, "step": 11740 }, { "epoch": 0.3638926756723262, "grad_norm": 2.684208022441595, "learning_rate": 3.348118191898768e-06, "loss": 0.6823, "step": 11745 }, { "epoch": 0.36404758954021565, "grad_norm": 2.907917009523349, "learning_rate": 3.347302850433762e-06, "loss": 0.7105, "step": 11750 }, { "epoch": 0.3642025034081051, "grad_norm": 3.3821284749044023, "learning_rate": 3.3464875089687563e-06, "loss": 0.7512, "step": 11755 }, { "epoch": 0.3643574172759945, "grad_norm": 2.4423016649486335, "learning_rate": 3.3456721675037508e-06, "loss": 0.6935, "step": 11760 }, { "epoch": 0.364512331143884, "grad_norm": 2.3509585923774217, "learning_rate": 3.3448568260387452e-06, "loss": 0.8078, "step": 11765 }, { "epoch": 0.36466724501177344, "grad_norm": 2.817798082019803, "learning_rate": 3.3440414845737397e-06, "loss": 0.7251, "step": 11770 }, { "epoch": 0.3648221588796629, "grad_norm": 3.254383953367764, "learning_rate": 3.343226143108734e-06, "loss": 0.7276, "step": 11775 }, { "epoch": 0.36497707274755237, "grad_norm": 2.7859242888183577, "learning_rate": 3.3424108016437286e-06, "loss": 0.6982, "step": 11780 }, { "epoch": 0.36513198661544183, "grad_norm": 2.444988310317583, "learning_rate": 3.341595460178723e-06, "loss": 0.7389, "step": 11785 }, { "epoch": 0.3652869004833313, "grad_norm": 2.7746504197318997, "learning_rate": 3.3407801187137175e-06, "loss": 0.7367, "step": 11790 }, { "epoch": 0.3654418143512207, "grad_norm": 2.603894906177484, "learning_rate": 3.339964777248712e-06, "loss": 0.717, "step": 11795 }, { "epoch": 0.36559672821911016, "grad_norm": 2.6777040229474545, "learning_rate": 3.339149435783707e-06, "loss": 0.8886, "step": 11800 }, { "epoch": 0.3657516420869996, "grad_norm": 2.244232030947677, "learning_rate": 3.338334094318701e-06, "loss": 0.6858, "step": 11805 }, { "epoch": 0.3659065559548891, "grad_norm": 2.739544007246071, "learning_rate": 3.3375187528536958e-06, "loss": 0.7266, "step": 11810 }, { "epoch": 0.36606146982277854, "grad_norm": 2.6558305218944196, "learning_rate": 3.33670341138869e-06, "loss": 0.7476, "step": 11815 }, { "epoch": 0.366216383690668, "grad_norm": 2.7174872188194357, "learning_rate": 3.3358880699236847e-06, "loss": 0.6499, "step": 11820 }, { "epoch": 0.36637129755855746, "grad_norm": 3.036011430005047, "learning_rate": 3.3350727284586787e-06, "loss": 0.6884, "step": 11825 }, { "epoch": 0.36652621142644687, "grad_norm": 2.9258940072255477, "learning_rate": 3.3342573869936736e-06, "loss": 0.8491, "step": 11830 }, { "epoch": 0.36668112529433633, "grad_norm": 2.8584901726921004, "learning_rate": 3.3334420455286676e-06, "loss": 0.679, "step": 11835 }, { "epoch": 0.3668360391622258, "grad_norm": 2.9889787690351635, "learning_rate": 3.332626704063662e-06, "loss": 0.708, "step": 11840 }, { "epoch": 0.36699095303011525, "grad_norm": 2.741149949479352, "learning_rate": 3.3318113625986566e-06, "loss": 0.6819, "step": 11845 }, { "epoch": 0.3671458668980047, "grad_norm": 2.5552699326763215, "learning_rate": 3.330996021133651e-06, "loss": 0.7428, "step": 11850 }, { "epoch": 0.3673007807658942, "grad_norm": 3.0094985348934613, "learning_rate": 3.3301806796686455e-06, "loss": 0.7826, "step": 11855 }, { "epoch": 0.36745569463378364, "grad_norm": 3.404921899806415, "learning_rate": 3.32936533820364e-06, "loss": 0.7042, "step": 11860 }, { "epoch": 0.3676106085016731, "grad_norm": 2.4610635501529528, "learning_rate": 3.3285499967386344e-06, "loss": 0.7395, "step": 11865 }, { "epoch": 0.3677655223695625, "grad_norm": 3.4990565342200535, "learning_rate": 3.327734655273629e-06, "loss": 0.7854, "step": 11870 }, { "epoch": 0.36792043623745196, "grad_norm": 2.434919640944274, "learning_rate": 3.326919313808623e-06, "loss": 0.7484, "step": 11875 }, { "epoch": 0.3680753501053414, "grad_norm": 3.509397796824874, "learning_rate": 3.3261039723436178e-06, "loss": 0.7033, "step": 11880 }, { "epoch": 0.3682302639732309, "grad_norm": 2.618546613150439, "learning_rate": 3.325288630878612e-06, "loss": 0.6837, "step": 11885 }, { "epoch": 0.36838517784112035, "grad_norm": 3.34824068322968, "learning_rate": 3.3244732894136067e-06, "loss": 0.7001, "step": 11890 }, { "epoch": 0.3685400917090098, "grad_norm": 2.4465265608100384, "learning_rate": 3.3236579479486016e-06, "loss": 0.6513, "step": 11895 }, { "epoch": 0.36869500557689927, "grad_norm": 4.123386655504776, "learning_rate": 3.3228426064835956e-06, "loss": 0.7509, "step": 11900 }, { "epoch": 0.3688499194447887, "grad_norm": 2.7403340246627903, "learning_rate": 3.3220272650185905e-06, "loss": 0.6161, "step": 11905 }, { "epoch": 0.36900483331267814, "grad_norm": 3.0670261737883933, "learning_rate": 3.3212119235535845e-06, "loss": 0.7689, "step": 11910 }, { "epoch": 0.3691597471805676, "grad_norm": 2.5200848996367804, "learning_rate": 3.320396582088579e-06, "loss": 0.7043, "step": 11915 }, { "epoch": 0.36931466104845706, "grad_norm": 2.3877648290130806, "learning_rate": 3.3195812406235735e-06, "loss": 0.7512, "step": 11920 }, { "epoch": 0.3694695749163465, "grad_norm": 3.1691506959084363, "learning_rate": 3.318765899158568e-06, "loss": 0.7687, "step": 11925 }, { "epoch": 0.369624488784236, "grad_norm": 3.3444319055401768, "learning_rate": 3.3179505576935624e-06, "loss": 0.7299, "step": 11930 }, { "epoch": 0.36977940265212544, "grad_norm": 2.560087564651585, "learning_rate": 3.317135216228557e-06, "loss": 0.7602, "step": 11935 }, { "epoch": 0.36993431652001485, "grad_norm": 2.363583958239512, "learning_rate": 3.3163198747635513e-06, "loss": 0.6644, "step": 11940 }, { "epoch": 0.3700892303879043, "grad_norm": 3.3853003179215606, "learning_rate": 3.3155045332985458e-06, "loss": 0.7645, "step": 11945 }, { "epoch": 0.3702441442557938, "grad_norm": 2.7283337244443038, "learning_rate": 3.3146891918335398e-06, "loss": 0.7114, "step": 11950 }, { "epoch": 0.37039905812368323, "grad_norm": 2.767460518373948, "learning_rate": 3.3138738503685347e-06, "loss": 0.7074, "step": 11955 }, { "epoch": 0.3705539719915727, "grad_norm": 4.15722573362726, "learning_rate": 3.3130585089035287e-06, "loss": 0.7544, "step": 11960 }, { "epoch": 0.37070888585946216, "grad_norm": 2.9907742102638064, "learning_rate": 3.3122431674385236e-06, "loss": 0.7122, "step": 11965 }, { "epoch": 0.3708637997273516, "grad_norm": 2.4941921446997997, "learning_rate": 3.3114278259735176e-06, "loss": 0.7549, "step": 11970 }, { "epoch": 0.371018713595241, "grad_norm": 2.19951586738151, "learning_rate": 3.3106124845085125e-06, "loss": 0.6671, "step": 11975 }, { "epoch": 0.3711736274631305, "grad_norm": 2.4785719358693155, "learning_rate": 3.3097971430435065e-06, "loss": 0.702, "step": 11980 }, { "epoch": 0.37132854133101995, "grad_norm": 2.8761870503957625, "learning_rate": 3.3089818015785014e-06, "loss": 0.669, "step": 11985 }, { "epoch": 0.3714834551989094, "grad_norm": 2.681657396808427, "learning_rate": 3.308166460113496e-06, "loss": 0.8157, "step": 11990 }, { "epoch": 0.37163836906679887, "grad_norm": 2.3339716345141213, "learning_rate": 3.3073511186484903e-06, "loss": 0.6695, "step": 11995 }, { "epoch": 0.37179328293468833, "grad_norm": 2.925305772108646, "learning_rate": 3.306535777183485e-06, "loss": 0.749, "step": 12000 }, { "epoch": 0.3719481968025778, "grad_norm": 2.97711358045656, "learning_rate": 3.3057204357184793e-06, "loss": 0.7377, "step": 12005 }, { "epoch": 0.3721031106704672, "grad_norm": 2.854326184877621, "learning_rate": 3.3049050942534737e-06, "loss": 0.7544, "step": 12010 }, { "epoch": 0.37225802453835666, "grad_norm": 2.263704976235837, "learning_rate": 3.304089752788468e-06, "loss": 0.751, "step": 12015 }, { "epoch": 0.3724129384062461, "grad_norm": 3.120602377198888, "learning_rate": 3.3032744113234626e-06, "loss": 0.7321, "step": 12020 }, { "epoch": 0.3725678522741356, "grad_norm": 2.589975014501125, "learning_rate": 3.3024590698584567e-06, "loss": 0.6113, "step": 12025 }, { "epoch": 0.37272276614202504, "grad_norm": 2.3310303412012887, "learning_rate": 3.3016437283934516e-06, "loss": 0.6717, "step": 12030 }, { "epoch": 0.3728776800099145, "grad_norm": 3.1528615804462348, "learning_rate": 3.3008283869284456e-06, "loss": 0.8149, "step": 12035 }, { "epoch": 0.37303259387780396, "grad_norm": 3.1276754179695843, "learning_rate": 3.3000130454634405e-06, "loss": 0.6874, "step": 12040 }, { "epoch": 0.37318750774569337, "grad_norm": 2.840176887107036, "learning_rate": 3.2991977039984345e-06, "loss": 0.7307, "step": 12045 }, { "epoch": 0.37334242161358283, "grad_norm": 3.695011771102282, "learning_rate": 3.2983823625334294e-06, "loss": 0.6854, "step": 12050 }, { "epoch": 0.3734973354814723, "grad_norm": 2.93800876552409, "learning_rate": 3.2975670210684234e-06, "loss": 0.7074, "step": 12055 }, { "epoch": 0.37365224934936175, "grad_norm": 2.297796357416328, "learning_rate": 3.2967516796034183e-06, "loss": 0.7735, "step": 12060 }, { "epoch": 0.3738071632172512, "grad_norm": 2.389177221962259, "learning_rate": 3.2959363381384123e-06, "loss": 0.749, "step": 12065 }, { "epoch": 0.3739620770851407, "grad_norm": 2.4438160889894034, "learning_rate": 3.2951209966734072e-06, "loss": 0.6885, "step": 12070 }, { "epoch": 0.37411699095303014, "grad_norm": 4.398313361733504, "learning_rate": 3.2943056552084013e-06, "loss": 0.6795, "step": 12075 }, { "epoch": 0.37427190482091954, "grad_norm": 2.5669350197455123, "learning_rate": 3.293490313743396e-06, "loss": 0.7323, "step": 12080 }, { "epoch": 0.374426818688809, "grad_norm": 3.4215024376044494, "learning_rate": 3.2926749722783906e-06, "loss": 0.7081, "step": 12085 }, { "epoch": 0.37458173255669847, "grad_norm": 2.691113557536336, "learning_rate": 3.291859630813385e-06, "loss": 0.6792, "step": 12090 }, { "epoch": 0.3747366464245879, "grad_norm": 2.657251375304297, "learning_rate": 3.2910442893483795e-06, "loss": 0.6849, "step": 12095 }, { "epoch": 0.3748915602924774, "grad_norm": 3.1188663909862053, "learning_rate": 3.2902289478833736e-06, "loss": 0.636, "step": 12100 }, { "epoch": 0.37504647416036685, "grad_norm": 2.95392801375011, "learning_rate": 3.2894136064183684e-06, "loss": 0.706, "step": 12105 }, { "epoch": 0.3752013880282563, "grad_norm": 3.0923168628498283, "learning_rate": 3.2885982649533625e-06, "loss": 0.6182, "step": 12110 }, { "epoch": 0.3753563018961457, "grad_norm": 2.7648345322725367, "learning_rate": 3.2877829234883574e-06, "loss": 0.673, "step": 12115 }, { "epoch": 0.3755112157640352, "grad_norm": 4.520414025400638, "learning_rate": 3.2869675820233514e-06, "loss": 0.8012, "step": 12120 }, { "epoch": 0.37566612963192464, "grad_norm": 2.941264565254632, "learning_rate": 3.2861522405583463e-06, "loss": 0.6761, "step": 12125 }, { "epoch": 0.3758210434998141, "grad_norm": 2.946244404032324, "learning_rate": 3.2853368990933403e-06, "loss": 0.6666, "step": 12130 }, { "epoch": 0.37597595736770356, "grad_norm": 2.3298468598839315, "learning_rate": 3.284521557628335e-06, "loss": 0.7645, "step": 12135 }, { "epoch": 0.376130871235593, "grad_norm": 4.225850700562701, "learning_rate": 3.2837062161633292e-06, "loss": 0.6509, "step": 12140 }, { "epoch": 0.3762857851034825, "grad_norm": 2.8993550297558284, "learning_rate": 3.282890874698324e-06, "loss": 0.7165, "step": 12145 }, { "epoch": 0.3764406989713719, "grad_norm": 2.785848839806777, "learning_rate": 3.282075533233318e-06, "loss": 0.5939, "step": 12150 }, { "epoch": 0.37659561283926135, "grad_norm": 2.740136429825916, "learning_rate": 3.281260191768313e-06, "loss": 0.7289, "step": 12155 }, { "epoch": 0.3767505267071508, "grad_norm": 2.59978686293555, "learning_rate": 3.280444850303307e-06, "loss": 0.6666, "step": 12160 }, { "epoch": 0.3769054405750403, "grad_norm": 2.8694718482300274, "learning_rate": 3.279629508838302e-06, "loss": 0.7251, "step": 12165 }, { "epoch": 0.37706035444292973, "grad_norm": 2.6751943133733045, "learning_rate": 3.278814167373296e-06, "loss": 0.761, "step": 12170 }, { "epoch": 0.3772152683108192, "grad_norm": 2.6376311483607124, "learning_rate": 3.277998825908291e-06, "loss": 0.661, "step": 12175 }, { "epoch": 0.37737018217870866, "grad_norm": 2.643701149351783, "learning_rate": 3.2771834844432853e-06, "loss": 0.75, "step": 12180 }, { "epoch": 0.3775250960465981, "grad_norm": 2.972558959173322, "learning_rate": 3.2763681429782794e-06, "loss": 0.7383, "step": 12185 }, { "epoch": 0.3776800099144875, "grad_norm": 2.626422348118744, "learning_rate": 3.2755528015132742e-06, "loss": 0.7087, "step": 12190 }, { "epoch": 0.377834923782377, "grad_norm": 3.757701313547967, "learning_rate": 3.2747374600482683e-06, "loss": 0.7921, "step": 12195 }, { "epoch": 0.37798983765026645, "grad_norm": 2.446505994482027, "learning_rate": 3.273922118583263e-06, "loss": 0.6399, "step": 12200 }, { "epoch": 0.3781447515181559, "grad_norm": 2.9942508786525774, "learning_rate": 3.273106777118257e-06, "loss": 0.7727, "step": 12205 }, { "epoch": 0.37829966538604537, "grad_norm": 2.363337296542284, "learning_rate": 3.272291435653252e-06, "loss": 0.7244, "step": 12210 }, { "epoch": 0.37845457925393483, "grad_norm": 3.3534360979602016, "learning_rate": 3.271476094188246e-06, "loss": 0.6929, "step": 12215 }, { "epoch": 0.3786094931218243, "grad_norm": 3.2906086571767985, "learning_rate": 3.270660752723241e-06, "loss": 0.744, "step": 12220 }, { "epoch": 0.3787644069897137, "grad_norm": 2.3127095498084325, "learning_rate": 3.269845411258235e-06, "loss": 0.6993, "step": 12225 }, { "epoch": 0.37891932085760316, "grad_norm": 2.7386839123197677, "learning_rate": 3.26903006979323e-06, "loss": 0.7391, "step": 12230 }, { "epoch": 0.3790742347254926, "grad_norm": 3.3659308698553105, "learning_rate": 3.268214728328224e-06, "loss": 0.7232, "step": 12235 }, { "epoch": 0.3792291485933821, "grad_norm": 2.5922357492048493, "learning_rate": 3.267399386863219e-06, "loss": 0.6802, "step": 12240 }, { "epoch": 0.37938406246127154, "grad_norm": 2.5652747156090467, "learning_rate": 3.266584045398213e-06, "loss": 0.8409, "step": 12245 }, { "epoch": 0.379538976329161, "grad_norm": 2.575488490231671, "learning_rate": 3.2657687039332078e-06, "loss": 0.7382, "step": 12250 }, { "epoch": 0.37969389019705047, "grad_norm": 3.0987093289717715, "learning_rate": 3.264953362468202e-06, "loss": 0.7108, "step": 12255 }, { "epoch": 0.37984880406493987, "grad_norm": 3.743166513458705, "learning_rate": 3.2641380210031963e-06, "loss": 0.7059, "step": 12260 }, { "epoch": 0.38000371793282933, "grad_norm": 2.7524941902599793, "learning_rate": 3.2633226795381907e-06, "loss": 0.6531, "step": 12265 }, { "epoch": 0.3801586318007188, "grad_norm": 2.864645843008098, "learning_rate": 3.262507338073185e-06, "loss": 0.6865, "step": 12270 }, { "epoch": 0.38031354566860825, "grad_norm": 3.2641972191804745, "learning_rate": 3.26169199660818e-06, "loss": 0.8179, "step": 12275 }, { "epoch": 0.3804684595364977, "grad_norm": 2.344409165054654, "learning_rate": 3.260876655143174e-06, "loss": 0.7488, "step": 12280 }, { "epoch": 0.3806233734043872, "grad_norm": 2.0959730189606107, "learning_rate": 3.260061313678169e-06, "loss": 0.6189, "step": 12285 }, { "epoch": 0.38077828727227664, "grad_norm": 2.584013503303283, "learning_rate": 3.259245972213163e-06, "loss": 0.696, "step": 12290 }, { "epoch": 0.38093320114016604, "grad_norm": 3.414891803453454, "learning_rate": 3.258430630748158e-06, "loss": 0.7086, "step": 12295 }, { "epoch": 0.3810881150080555, "grad_norm": 3.676168726152558, "learning_rate": 3.257615289283152e-06, "loss": 0.7371, "step": 12300 }, { "epoch": 0.38124302887594497, "grad_norm": 3.331097708111289, "learning_rate": 3.256799947818147e-06, "loss": 0.7347, "step": 12305 }, { "epoch": 0.38139794274383443, "grad_norm": 2.4359641669384424, "learning_rate": 3.255984606353141e-06, "loss": 0.6387, "step": 12310 }, { "epoch": 0.3815528566117239, "grad_norm": 2.334876012302572, "learning_rate": 3.2551692648881357e-06, "loss": 0.693, "step": 12315 }, { "epoch": 0.38170777047961335, "grad_norm": 3.8861321220837675, "learning_rate": 3.2543539234231298e-06, "loss": 0.7305, "step": 12320 }, { "epoch": 0.3818626843475028, "grad_norm": 2.7782663035409594, "learning_rate": 3.2535385819581246e-06, "loss": 0.7272, "step": 12325 }, { "epoch": 0.3820175982153922, "grad_norm": 2.942647298602944, "learning_rate": 3.2527232404931187e-06, "loss": 0.6504, "step": 12330 }, { "epoch": 0.3821725120832817, "grad_norm": 2.45630963999116, "learning_rate": 3.251907899028113e-06, "loss": 0.694, "step": 12335 }, { "epoch": 0.38232742595117114, "grad_norm": 2.513279655563005, "learning_rate": 3.2510925575631076e-06, "loss": 0.7241, "step": 12340 }, { "epoch": 0.3824823398190606, "grad_norm": 3.4071768476084716, "learning_rate": 3.250277216098102e-06, "loss": 0.729, "step": 12345 }, { "epoch": 0.38263725368695006, "grad_norm": 2.7285102973847697, "learning_rate": 3.2494618746330965e-06, "loss": 0.7143, "step": 12350 }, { "epoch": 0.3827921675548395, "grad_norm": 2.726797093459626, "learning_rate": 3.248646533168091e-06, "loss": 0.7159, "step": 12355 }, { "epoch": 0.382947081422729, "grad_norm": 3.3309220381510123, "learning_rate": 3.2478311917030854e-06, "loss": 0.7298, "step": 12360 }, { "epoch": 0.3831019952906184, "grad_norm": 2.8687404653038806, "learning_rate": 3.24701585023808e-06, "loss": 0.7134, "step": 12365 }, { "epoch": 0.38325690915850785, "grad_norm": 2.502777570725272, "learning_rate": 3.2462005087730748e-06, "loss": 0.7095, "step": 12370 }, { "epoch": 0.3834118230263973, "grad_norm": 3.3528238143848186, "learning_rate": 3.245385167308069e-06, "loss": 0.6952, "step": 12375 }, { "epoch": 0.3835667368942868, "grad_norm": 2.419107260489693, "learning_rate": 3.2445698258430637e-06, "loss": 0.719, "step": 12380 }, { "epoch": 0.38372165076217624, "grad_norm": 2.576770119762887, "learning_rate": 3.2437544843780577e-06, "loss": 0.7434, "step": 12385 }, { "epoch": 0.3838765646300657, "grad_norm": 2.6693631093254844, "learning_rate": 3.2429391429130526e-06, "loss": 0.7691, "step": 12390 }, { "epoch": 0.38403147849795516, "grad_norm": 2.5673995778897334, "learning_rate": 3.2421238014480466e-06, "loss": 0.7416, "step": 12395 }, { "epoch": 0.38418639236584456, "grad_norm": 2.438011164434322, "learning_rate": 3.2413084599830415e-06, "loss": 0.65, "step": 12400 }, { "epoch": 0.384341306233734, "grad_norm": 2.220939986749875, "learning_rate": 3.2404931185180356e-06, "loss": 0.623, "step": 12405 }, { "epoch": 0.3844962201016235, "grad_norm": 3.2046573620700034, "learning_rate": 3.23967777705303e-06, "loss": 0.7137, "step": 12410 }, { "epoch": 0.38465113396951295, "grad_norm": 2.5527386078307175, "learning_rate": 3.2388624355880245e-06, "loss": 0.6697, "step": 12415 }, { "epoch": 0.3848060478374024, "grad_norm": 2.381595108298338, "learning_rate": 3.238047094123019e-06, "loss": 0.6864, "step": 12420 }, { "epoch": 0.38496096170529187, "grad_norm": 2.473897299824356, "learning_rate": 3.2372317526580134e-06, "loss": 0.7704, "step": 12425 }, { "epoch": 0.38511587557318133, "grad_norm": 5.715205768854749, "learning_rate": 3.236416411193008e-06, "loss": 0.7438, "step": 12430 }, { "epoch": 0.38527078944107074, "grad_norm": 2.7485760802275676, "learning_rate": 3.2356010697280023e-06, "loss": 0.7109, "step": 12435 }, { "epoch": 0.3854257033089602, "grad_norm": 2.505947379355649, "learning_rate": 3.2347857282629968e-06, "loss": 0.6927, "step": 12440 }, { "epoch": 0.38558061717684966, "grad_norm": 2.757740257713225, "learning_rate": 3.233970386797991e-06, "loss": 0.7834, "step": 12445 }, { "epoch": 0.3857355310447391, "grad_norm": 2.404206038843786, "learning_rate": 3.2331550453329857e-06, "loss": 0.713, "step": 12450 }, { "epoch": 0.3858904449126286, "grad_norm": 3.4047418576037027, "learning_rate": 3.2323397038679797e-06, "loss": 0.6792, "step": 12455 }, { "epoch": 0.38604535878051804, "grad_norm": 2.412284159913371, "learning_rate": 3.2315243624029746e-06, "loss": 0.7219, "step": 12460 }, { "epoch": 0.3862002726484075, "grad_norm": 2.4570381712620253, "learning_rate": 3.2307090209379695e-06, "loss": 0.6541, "step": 12465 }, { "epoch": 0.3863551865162969, "grad_norm": 2.3543503525773484, "learning_rate": 3.2298936794729635e-06, "loss": 0.6378, "step": 12470 }, { "epoch": 0.3865101003841864, "grad_norm": 2.835804553312304, "learning_rate": 3.2290783380079584e-06, "loss": 0.7104, "step": 12475 }, { "epoch": 0.38666501425207583, "grad_norm": 3.119196198212217, "learning_rate": 3.2282629965429525e-06, "loss": 0.7776, "step": 12480 }, { "epoch": 0.3868199281199653, "grad_norm": 3.1833288918595044, "learning_rate": 3.227447655077947e-06, "loss": 0.6713, "step": 12485 }, { "epoch": 0.38697484198785476, "grad_norm": 2.6634959018889903, "learning_rate": 3.2266323136129414e-06, "loss": 0.7476, "step": 12490 }, { "epoch": 0.3871297558557442, "grad_norm": 2.8980514589553654, "learning_rate": 3.225816972147936e-06, "loss": 0.7717, "step": 12495 }, { "epoch": 0.3872846697236337, "grad_norm": 2.699845232054519, "learning_rate": 3.2250016306829303e-06, "loss": 0.6618, "step": 12500 }, { "epoch": 0.38743958359152314, "grad_norm": 2.591064552573838, "learning_rate": 3.2241862892179247e-06, "loss": 0.7001, "step": 12505 }, { "epoch": 0.38759449745941255, "grad_norm": 2.137768012873491, "learning_rate": 3.223370947752919e-06, "loss": 0.7451, "step": 12510 }, { "epoch": 0.387749411327302, "grad_norm": 2.1126508419985415, "learning_rate": 3.2225556062879137e-06, "loss": 0.6699, "step": 12515 }, { "epoch": 0.38790432519519147, "grad_norm": 3.405870539107483, "learning_rate": 3.221740264822908e-06, "loss": 0.7544, "step": 12520 }, { "epoch": 0.38805923906308093, "grad_norm": 2.249710758775008, "learning_rate": 3.2209249233579026e-06, "loss": 0.765, "step": 12525 }, { "epoch": 0.3882141529309704, "grad_norm": 2.0884312971031167, "learning_rate": 3.2201095818928966e-06, "loss": 0.7811, "step": 12530 }, { "epoch": 0.38836906679885985, "grad_norm": 2.5912936434989877, "learning_rate": 3.2192942404278915e-06, "loss": 0.7247, "step": 12535 }, { "epoch": 0.3885239806667493, "grad_norm": 2.541797855762961, "learning_rate": 3.2184788989628855e-06, "loss": 0.6652, "step": 12540 }, { "epoch": 0.3886788945346387, "grad_norm": 2.715006903410247, "learning_rate": 3.2176635574978804e-06, "loss": 0.7464, "step": 12545 }, { "epoch": 0.3888338084025282, "grad_norm": 2.6585823819180825, "learning_rate": 3.2168482160328745e-06, "loss": 0.6576, "step": 12550 }, { "epoch": 0.38898872227041764, "grad_norm": 2.7062002767965443, "learning_rate": 3.2160328745678693e-06, "loss": 0.7462, "step": 12555 }, { "epoch": 0.3891436361383071, "grad_norm": 2.4167985315168026, "learning_rate": 3.215217533102864e-06, "loss": 0.7266, "step": 12560 }, { "epoch": 0.38929855000619656, "grad_norm": 3.078900699431057, "learning_rate": 3.2144021916378583e-06, "loss": 0.7358, "step": 12565 }, { "epoch": 0.389453463874086, "grad_norm": 2.668030766973122, "learning_rate": 3.2135868501728527e-06, "loss": 0.7855, "step": 12570 }, { "epoch": 0.3896083777419755, "grad_norm": 2.67137837220138, "learning_rate": 3.212771508707847e-06, "loss": 0.7378, "step": 12575 }, { "epoch": 0.3897632916098649, "grad_norm": 2.1266769908271925, "learning_rate": 3.2119561672428416e-06, "loss": 0.6937, "step": 12580 }, { "epoch": 0.38991820547775435, "grad_norm": 2.1630304220018974, "learning_rate": 3.211140825777836e-06, "loss": 0.6906, "step": 12585 }, { "epoch": 0.3900731193456438, "grad_norm": 4.828917134488313, "learning_rate": 3.2103254843128306e-06, "loss": 0.8266, "step": 12590 }, { "epoch": 0.3902280332135333, "grad_norm": 3.3840416027126152, "learning_rate": 3.209510142847825e-06, "loss": 0.7463, "step": 12595 }, { "epoch": 0.39038294708142274, "grad_norm": 2.6910101312474874, "learning_rate": 3.2086948013828195e-06, "loss": 0.7007, "step": 12600 }, { "epoch": 0.3905378609493122, "grad_norm": 2.4987048473903113, "learning_rate": 3.2078794599178135e-06, "loss": 0.7822, "step": 12605 }, { "epoch": 0.39069277481720166, "grad_norm": 3.227491275164398, "learning_rate": 3.2070641184528084e-06, "loss": 0.6457, "step": 12610 }, { "epoch": 0.39084768868509107, "grad_norm": 2.6016746114528426, "learning_rate": 3.2062487769878024e-06, "loss": 0.6941, "step": 12615 }, { "epoch": 0.3910026025529805, "grad_norm": 2.688681862005434, "learning_rate": 3.2054334355227973e-06, "loss": 0.6702, "step": 12620 }, { "epoch": 0.39115751642087, "grad_norm": 2.505608105122038, "learning_rate": 3.2046180940577913e-06, "loss": 0.6692, "step": 12625 }, { "epoch": 0.39131243028875945, "grad_norm": 2.4463541922739327, "learning_rate": 3.2038027525927862e-06, "loss": 0.6848, "step": 12630 }, { "epoch": 0.3914673441566489, "grad_norm": 2.840826717446628, "learning_rate": 3.2029874111277803e-06, "loss": 0.8025, "step": 12635 }, { "epoch": 0.39162225802453837, "grad_norm": 2.503583874470555, "learning_rate": 3.202172069662775e-06, "loss": 0.6753, "step": 12640 }, { "epoch": 0.39177717189242783, "grad_norm": 3.8661952019709362, "learning_rate": 3.201356728197769e-06, "loss": 0.7051, "step": 12645 }, { "epoch": 0.39193208576031724, "grad_norm": 2.8666226164329487, "learning_rate": 3.200541386732764e-06, "loss": 0.6824, "step": 12650 }, { "epoch": 0.3920869996282067, "grad_norm": 2.379493768270431, "learning_rate": 3.1997260452677585e-06, "loss": 0.6337, "step": 12655 }, { "epoch": 0.39224191349609616, "grad_norm": 2.6372040343115306, "learning_rate": 3.198910703802753e-06, "loss": 0.6982, "step": 12660 }, { "epoch": 0.3923968273639856, "grad_norm": 2.447211437668814, "learning_rate": 3.1980953623377474e-06, "loss": 0.705, "step": 12665 }, { "epoch": 0.3925517412318751, "grad_norm": 2.4425537240008364, "learning_rate": 3.197280020872742e-06, "loss": 0.7647, "step": 12670 }, { "epoch": 0.39270665509976455, "grad_norm": 2.4610093149090093, "learning_rate": 3.1964646794077364e-06, "loss": 0.6884, "step": 12675 }, { "epoch": 0.392861568967654, "grad_norm": 2.7008972367677795, "learning_rate": 3.1956493379427304e-06, "loss": 0.7112, "step": 12680 }, { "epoch": 0.3930164828355434, "grad_norm": 2.893067268599089, "learning_rate": 3.1948339964777253e-06, "loss": 0.7283, "step": 12685 }, { "epoch": 0.3931713967034329, "grad_norm": 2.4570705573168645, "learning_rate": 3.1940186550127193e-06, "loss": 0.744, "step": 12690 }, { "epoch": 0.39332631057132234, "grad_norm": 2.7876412068072907, "learning_rate": 3.193203313547714e-06, "loss": 0.8833, "step": 12695 }, { "epoch": 0.3934812244392118, "grad_norm": 3.3744727626713527, "learning_rate": 3.1923879720827082e-06, "loss": 0.6609, "step": 12700 }, { "epoch": 0.39363613830710126, "grad_norm": 2.6067001972515635, "learning_rate": 3.191572630617703e-06, "loss": 0.7136, "step": 12705 }, { "epoch": 0.3937910521749907, "grad_norm": 3.855027787072935, "learning_rate": 3.190757289152697e-06, "loss": 0.6893, "step": 12710 }, { "epoch": 0.3939459660428802, "grad_norm": 5.297382186288328, "learning_rate": 3.189941947687692e-06, "loss": 0.716, "step": 12715 }, { "epoch": 0.3941008799107696, "grad_norm": 4.453418585032165, "learning_rate": 3.189126606222686e-06, "loss": 0.645, "step": 12720 }, { "epoch": 0.39425579377865905, "grad_norm": 2.442982693340018, "learning_rate": 3.188311264757681e-06, "loss": 0.6833, "step": 12725 }, { "epoch": 0.3944107076465485, "grad_norm": 2.488079516807807, "learning_rate": 3.187495923292675e-06, "loss": 0.716, "step": 12730 }, { "epoch": 0.39456562151443797, "grad_norm": 3.962368350075495, "learning_rate": 3.18668058182767e-06, "loss": 0.7085, "step": 12735 }, { "epoch": 0.39472053538232743, "grad_norm": 3.4591856253654334, "learning_rate": 3.185865240362664e-06, "loss": 0.7634, "step": 12740 }, { "epoch": 0.3948754492502169, "grad_norm": 2.6819770212602383, "learning_rate": 3.1850498988976588e-06, "loss": 0.727, "step": 12745 }, { "epoch": 0.39503036311810635, "grad_norm": 2.4711879391990754, "learning_rate": 3.1842345574326532e-06, "loss": 0.6701, "step": 12750 }, { "epoch": 0.39518527698599576, "grad_norm": 2.815995458628415, "learning_rate": 3.1834192159676473e-06, "loss": 0.7025, "step": 12755 }, { "epoch": 0.3953401908538852, "grad_norm": 2.717770815634263, "learning_rate": 3.182603874502642e-06, "loss": 0.7145, "step": 12760 }, { "epoch": 0.3954951047217747, "grad_norm": 2.194250236315806, "learning_rate": 3.181788533037636e-06, "loss": 0.6738, "step": 12765 }, { "epoch": 0.39565001858966414, "grad_norm": 3.3668434207915685, "learning_rate": 3.180973191572631e-06, "loss": 0.7577, "step": 12770 }, { "epoch": 0.3958049324575536, "grad_norm": 3.49671752725937, "learning_rate": 3.180157850107625e-06, "loss": 0.6293, "step": 12775 }, { "epoch": 0.39595984632544307, "grad_norm": 2.634598113135197, "learning_rate": 3.17934250864262e-06, "loss": 0.7115, "step": 12780 }, { "epoch": 0.3961147601933325, "grad_norm": 3.1059156076764025, "learning_rate": 3.178527167177614e-06, "loss": 0.7422, "step": 12785 }, { "epoch": 0.396269674061222, "grad_norm": 2.5499299827494744, "learning_rate": 3.177711825712609e-06, "loss": 0.6544, "step": 12790 }, { "epoch": 0.3964245879291114, "grad_norm": 2.6740353924401736, "learning_rate": 3.176896484247603e-06, "loss": 0.7609, "step": 12795 }, { "epoch": 0.39657950179700086, "grad_norm": 2.845973992476201, "learning_rate": 3.176081142782598e-06, "loss": 0.7278, "step": 12800 }, { "epoch": 0.3967344156648903, "grad_norm": 2.7201541357543064, "learning_rate": 3.175265801317592e-06, "loss": 0.7184, "step": 12805 }, { "epoch": 0.3968893295327798, "grad_norm": 2.924018697969892, "learning_rate": 3.1744504598525868e-06, "loss": 0.7372, "step": 12810 }, { "epoch": 0.39704424340066924, "grad_norm": 3.8010702078189214, "learning_rate": 3.173635118387581e-06, "loss": 0.7161, "step": 12815 }, { "epoch": 0.3971991572685587, "grad_norm": 3.3535465731908563, "learning_rate": 3.1728197769225757e-06, "loss": 0.6872, "step": 12820 }, { "epoch": 0.39735407113644816, "grad_norm": 3.973821305093208, "learning_rate": 3.1720044354575697e-06, "loss": 0.7453, "step": 12825 }, { "epoch": 0.39750898500433757, "grad_norm": 2.729869323843595, "learning_rate": 3.171189093992564e-06, "loss": 0.69, "step": 12830 }, { "epoch": 0.39766389887222703, "grad_norm": 2.510412725321038, "learning_rate": 3.1703737525275586e-06, "loss": 0.7622, "step": 12835 }, { "epoch": 0.3978188127401165, "grad_norm": 2.7423877173301863, "learning_rate": 3.169558411062553e-06, "loss": 0.7296, "step": 12840 }, { "epoch": 0.39797372660800595, "grad_norm": 3.210410165296419, "learning_rate": 3.168743069597548e-06, "loss": 0.7815, "step": 12845 }, { "epoch": 0.3981286404758954, "grad_norm": 2.3613424044591174, "learning_rate": 3.167927728132542e-06, "loss": 0.753, "step": 12850 }, { "epoch": 0.3982835543437849, "grad_norm": 3.346555712625965, "learning_rate": 3.167112386667537e-06, "loss": 0.6912, "step": 12855 }, { "epoch": 0.39843846821167433, "grad_norm": 2.9877440390839203, "learning_rate": 3.166297045202531e-06, "loss": 0.7054, "step": 12860 }, { "epoch": 0.39859338207956374, "grad_norm": 2.3156179263864556, "learning_rate": 3.165481703737526e-06, "loss": 0.6964, "step": 12865 }, { "epoch": 0.3987482959474532, "grad_norm": 3.343900407408983, "learning_rate": 3.16466636227252e-06, "loss": 0.7737, "step": 12870 }, { "epoch": 0.39890320981534266, "grad_norm": 2.8793983077007015, "learning_rate": 3.1638510208075147e-06, "loss": 0.7612, "step": 12875 }, { "epoch": 0.3990581236832321, "grad_norm": 2.4660424497418734, "learning_rate": 3.1630356793425088e-06, "loss": 0.7351, "step": 12880 }, { "epoch": 0.3992130375511216, "grad_norm": 3.166623330550365, "learning_rate": 3.1622203378775036e-06, "loss": 0.7163, "step": 12885 }, { "epoch": 0.39936795141901105, "grad_norm": 3.473023376864374, "learning_rate": 3.1614049964124977e-06, "loss": 0.6908, "step": 12890 }, { "epoch": 0.3995228652869005, "grad_norm": 2.3315427950169165, "learning_rate": 3.1605896549474926e-06, "loss": 0.7243, "step": 12895 }, { "epoch": 0.3996777791547899, "grad_norm": 2.2884093967420123, "learning_rate": 3.1597743134824866e-06, "loss": 0.6699, "step": 12900 }, { "epoch": 0.3998326930226794, "grad_norm": 2.655389867441325, "learning_rate": 3.158958972017481e-06, "loss": 0.7688, "step": 12905 }, { "epoch": 0.39998760689056884, "grad_norm": 2.782107306250648, "learning_rate": 3.1581436305524755e-06, "loss": 0.6835, "step": 12910 }, { "epoch": 0.4001425207584583, "grad_norm": 3.8900110770860503, "learning_rate": 3.15732828908747e-06, "loss": 0.7209, "step": 12915 }, { "epoch": 0.40029743462634776, "grad_norm": 2.2840598154213736, "learning_rate": 3.1565129476224644e-06, "loss": 0.6897, "step": 12920 }, { "epoch": 0.4004523484942372, "grad_norm": 2.3631884769681633, "learning_rate": 3.155697606157459e-06, "loss": 0.7113, "step": 12925 }, { "epoch": 0.4006072623621267, "grad_norm": 2.517199019364982, "learning_rate": 3.1548822646924538e-06, "loss": 0.8404, "step": 12930 }, { "epoch": 0.4007621762300161, "grad_norm": 2.8364607067269647, "learning_rate": 3.154066923227448e-06, "loss": 0.6673, "step": 12935 }, { "epoch": 0.40091709009790555, "grad_norm": 2.0601186479757247, "learning_rate": 3.1532515817624427e-06, "loss": 0.7532, "step": 12940 }, { "epoch": 0.401072003965795, "grad_norm": 2.3269011079841913, "learning_rate": 3.1524362402974367e-06, "loss": 0.7336, "step": 12945 }, { "epoch": 0.40122691783368447, "grad_norm": 2.64621798402325, "learning_rate": 3.1516208988324316e-06, "loss": 0.6787, "step": 12950 }, { "epoch": 0.40138183170157393, "grad_norm": 2.6824141021441004, "learning_rate": 3.1508055573674256e-06, "loss": 0.6961, "step": 12955 }, { "epoch": 0.4015367455694634, "grad_norm": 3.5652162251510915, "learning_rate": 3.1499902159024205e-06, "loss": 0.6657, "step": 12960 }, { "epoch": 0.40169165943735285, "grad_norm": 2.91490083615343, "learning_rate": 3.1491748744374146e-06, "loss": 0.6647, "step": 12965 }, { "epoch": 0.40184657330524226, "grad_norm": 2.5106061941107227, "learning_rate": 3.1483595329724094e-06, "loss": 0.6557, "step": 12970 }, { "epoch": 0.4020014871731317, "grad_norm": 2.770185820674694, "learning_rate": 3.1475441915074035e-06, "loss": 0.731, "step": 12975 }, { "epoch": 0.4021564010410212, "grad_norm": 2.9560745054981408, "learning_rate": 3.146728850042398e-06, "loss": 0.654, "step": 12980 }, { "epoch": 0.40231131490891064, "grad_norm": 2.9146319674458616, "learning_rate": 3.1459135085773924e-06, "loss": 0.7265, "step": 12985 }, { "epoch": 0.4024662287768001, "grad_norm": 2.2884378402341445, "learning_rate": 3.145098167112387e-06, "loss": 0.6588, "step": 12990 }, { "epoch": 0.40262114264468957, "grad_norm": 2.4244692667652745, "learning_rate": 3.1442828256473813e-06, "loss": 0.6927, "step": 12995 }, { "epoch": 0.40277605651257903, "grad_norm": 3.827895215898755, "learning_rate": 3.1434674841823758e-06, "loss": 0.7358, "step": 13000 }, { "epoch": 0.40293097038046843, "grad_norm": 2.428995398124668, "learning_rate": 3.1426521427173702e-06, "loss": 0.6595, "step": 13005 }, { "epoch": 0.4030858842483579, "grad_norm": 3.7274029893271243, "learning_rate": 3.1418368012523647e-06, "loss": 0.6137, "step": 13010 }, { "epoch": 0.40324079811624736, "grad_norm": 2.877505925475462, "learning_rate": 3.141021459787359e-06, "loss": 0.7318, "step": 13015 }, { "epoch": 0.4033957119841368, "grad_norm": 2.6392203557104295, "learning_rate": 3.1402061183223536e-06, "loss": 0.6729, "step": 13020 }, { "epoch": 0.4035506258520263, "grad_norm": 2.9452540939788032, "learning_rate": 3.1393907768573485e-06, "loss": 0.6648, "step": 13025 }, { "epoch": 0.40370553971991574, "grad_norm": 3.0766583858449925, "learning_rate": 3.1385754353923425e-06, "loss": 0.6585, "step": 13030 }, { "epoch": 0.4038604535878052, "grad_norm": 2.256792195710369, "learning_rate": 3.1377600939273374e-06, "loss": 0.6958, "step": 13035 }, { "epoch": 0.4040153674556946, "grad_norm": 2.706350880468362, "learning_rate": 3.1369447524623315e-06, "loss": 0.7059, "step": 13040 }, { "epoch": 0.40417028132358407, "grad_norm": 2.450004579900893, "learning_rate": 3.1361294109973263e-06, "loss": 0.7589, "step": 13045 }, { "epoch": 0.40432519519147353, "grad_norm": 2.907606868751571, "learning_rate": 3.1353140695323204e-06, "loss": 0.7748, "step": 13050 }, { "epoch": 0.404480109059363, "grad_norm": 2.386986018827967, "learning_rate": 3.134498728067315e-06, "loss": 0.6943, "step": 13055 }, { "epoch": 0.40463502292725245, "grad_norm": 3.9049044810869007, "learning_rate": 3.1336833866023093e-06, "loss": 0.7246, "step": 13060 }, { "epoch": 0.4047899367951419, "grad_norm": 2.672051131229668, "learning_rate": 3.1328680451373037e-06, "loss": 0.6646, "step": 13065 }, { "epoch": 0.4049448506630314, "grad_norm": 2.878415119189296, "learning_rate": 3.132052703672298e-06, "loss": 0.6945, "step": 13070 }, { "epoch": 0.4050997645309208, "grad_norm": 3.207649664980733, "learning_rate": 3.1312373622072927e-06, "loss": 0.6958, "step": 13075 }, { "epoch": 0.40525467839881024, "grad_norm": 3.021756780408962, "learning_rate": 3.130422020742287e-06, "loss": 0.7683, "step": 13080 }, { "epoch": 0.4054095922666997, "grad_norm": 2.2835825259835887, "learning_rate": 3.1296066792772816e-06, "loss": 0.6817, "step": 13085 }, { "epoch": 0.40556450613458916, "grad_norm": 2.9854504386511507, "learning_rate": 3.128791337812276e-06, "loss": 0.6535, "step": 13090 }, { "epoch": 0.4057194200024786, "grad_norm": 2.1499407295459103, "learning_rate": 3.1279759963472705e-06, "loss": 0.7143, "step": 13095 }, { "epoch": 0.4058743338703681, "grad_norm": 2.501035020586829, "learning_rate": 3.1271606548822645e-06, "loss": 0.6867, "step": 13100 }, { "epoch": 0.40602924773825755, "grad_norm": 3.358586944062689, "learning_rate": 3.1263453134172594e-06, "loss": 0.7698, "step": 13105 }, { "epoch": 0.406184161606147, "grad_norm": 2.373529870178291, "learning_rate": 3.1255299719522535e-06, "loss": 0.7111, "step": 13110 }, { "epoch": 0.4063390754740364, "grad_norm": 2.369970590698376, "learning_rate": 3.1247146304872483e-06, "loss": 0.7763, "step": 13115 }, { "epoch": 0.4064939893419259, "grad_norm": 2.502307151457754, "learning_rate": 3.1238992890222432e-06, "loss": 0.6853, "step": 13120 }, { "epoch": 0.40664890320981534, "grad_norm": 2.8138855384451693, "learning_rate": 3.1230839475572373e-06, "loss": 0.6815, "step": 13125 }, { "epoch": 0.4068038170777048, "grad_norm": 2.5689381555193305, "learning_rate": 3.1222686060922317e-06, "loss": 0.6963, "step": 13130 }, { "epoch": 0.40695873094559426, "grad_norm": 3.4689153916434723, "learning_rate": 3.121453264627226e-06, "loss": 0.7263, "step": 13135 }, { "epoch": 0.4071136448134837, "grad_norm": 3.1251376165726765, "learning_rate": 3.1206379231622206e-06, "loss": 0.7375, "step": 13140 }, { "epoch": 0.4072685586813732, "grad_norm": 3.254586142353053, "learning_rate": 3.119822581697215e-06, "loss": 0.7183, "step": 13145 }, { "epoch": 0.4074234725492626, "grad_norm": 3.4051598136756107, "learning_rate": 3.1190072402322096e-06, "loss": 0.6758, "step": 13150 }, { "epoch": 0.40757838641715205, "grad_norm": 2.905330345072755, "learning_rate": 3.118191898767204e-06, "loss": 0.8427, "step": 13155 }, { "epoch": 0.4077333002850415, "grad_norm": 2.644214821429436, "learning_rate": 3.1173765573021985e-06, "loss": 0.6913, "step": 13160 }, { "epoch": 0.407888214152931, "grad_norm": 2.2494584305136107, "learning_rate": 3.116561215837193e-06, "loss": 0.6944, "step": 13165 }, { "epoch": 0.40804312802082043, "grad_norm": 2.7106794746419283, "learning_rate": 3.1157458743721874e-06, "loss": 0.6163, "step": 13170 }, { "epoch": 0.4081980418887099, "grad_norm": 3.365653578638173, "learning_rate": 3.1149305329071814e-06, "loss": 0.7214, "step": 13175 }, { "epoch": 0.40835295575659936, "grad_norm": 2.464389157474503, "learning_rate": 3.1141151914421763e-06, "loss": 0.7221, "step": 13180 }, { "epoch": 0.40850786962448876, "grad_norm": 3.1188154412180897, "learning_rate": 3.1132998499771703e-06, "loss": 0.7384, "step": 13185 }, { "epoch": 0.4086627834923782, "grad_norm": 3.279728355325564, "learning_rate": 3.1124845085121652e-06, "loss": 0.6191, "step": 13190 }, { "epoch": 0.4088176973602677, "grad_norm": 2.889983069919303, "learning_rate": 3.1116691670471593e-06, "loss": 0.6766, "step": 13195 }, { "epoch": 0.40897261122815715, "grad_norm": 2.6339959795083323, "learning_rate": 3.110853825582154e-06, "loss": 0.733, "step": 13200 }, { "epoch": 0.4091275250960466, "grad_norm": 2.4655585899677965, "learning_rate": 3.110038484117148e-06, "loss": 0.7254, "step": 13205 }, { "epoch": 0.40928243896393607, "grad_norm": 2.181305538859242, "learning_rate": 3.109223142652143e-06, "loss": 0.707, "step": 13210 }, { "epoch": 0.40943735283182553, "grad_norm": 2.6432212337044936, "learning_rate": 3.1084078011871375e-06, "loss": 0.7079, "step": 13215 }, { "epoch": 0.40959226669971494, "grad_norm": 4.642310493268193, "learning_rate": 3.107592459722132e-06, "loss": 0.6757, "step": 13220 }, { "epoch": 0.4097471805676044, "grad_norm": 3.199005037854095, "learning_rate": 3.1067771182571264e-06, "loss": 0.7344, "step": 13225 }, { "epoch": 0.40990209443549386, "grad_norm": 2.8376500510080565, "learning_rate": 3.105961776792121e-06, "loss": 0.6938, "step": 13230 }, { "epoch": 0.4100570083033833, "grad_norm": 2.4929706427512146, "learning_rate": 3.1051464353271154e-06, "loss": 0.6296, "step": 13235 }, { "epoch": 0.4102119221712728, "grad_norm": 4.0464916721888216, "learning_rate": 3.10433109386211e-06, "loss": 0.8114, "step": 13240 }, { "epoch": 0.41036683603916224, "grad_norm": 3.2533869022265267, "learning_rate": 3.1035157523971043e-06, "loss": 0.7392, "step": 13245 }, { "epoch": 0.4105217499070517, "grad_norm": 3.0726950346914927, "learning_rate": 3.1027004109320983e-06, "loss": 0.7263, "step": 13250 }, { "epoch": 0.4106766637749411, "grad_norm": 2.690938291841008, "learning_rate": 3.101885069467093e-06, "loss": 0.7005, "step": 13255 }, { "epoch": 0.41083157764283057, "grad_norm": 3.084784886998979, "learning_rate": 3.1010697280020872e-06, "loss": 0.777, "step": 13260 }, { "epoch": 0.41098649151072003, "grad_norm": 2.090494689872752, "learning_rate": 3.100254386537082e-06, "loss": 0.7173, "step": 13265 }, { "epoch": 0.4111414053786095, "grad_norm": 2.8832637958333898, "learning_rate": 3.099439045072076e-06, "loss": 0.8061, "step": 13270 }, { "epoch": 0.41129631924649895, "grad_norm": 2.413548490420166, "learning_rate": 3.098623703607071e-06, "loss": 0.6619, "step": 13275 }, { "epoch": 0.4114512331143884, "grad_norm": 2.8584524858412017, "learning_rate": 3.097808362142065e-06, "loss": 0.7376, "step": 13280 }, { "epoch": 0.4116061469822779, "grad_norm": 2.6342861477293082, "learning_rate": 3.09699302067706e-06, "loss": 0.6937, "step": 13285 }, { "epoch": 0.4117610608501673, "grad_norm": 2.3298404410183413, "learning_rate": 3.096177679212054e-06, "loss": 0.6375, "step": 13290 }, { "epoch": 0.41191597471805674, "grad_norm": 2.964436487934177, "learning_rate": 3.095362337747049e-06, "loss": 0.7463, "step": 13295 }, { "epoch": 0.4120708885859462, "grad_norm": 2.6696099172252032, "learning_rate": 3.094546996282043e-06, "loss": 0.803, "step": 13300 }, { "epoch": 0.41222580245383567, "grad_norm": 3.0066121403339925, "learning_rate": 3.0937316548170378e-06, "loss": 0.7959, "step": 13305 }, { "epoch": 0.4123807163217251, "grad_norm": 2.7204994091831214, "learning_rate": 3.0929163133520322e-06, "loss": 0.6648, "step": 13310 }, { "epoch": 0.4125356301896146, "grad_norm": 2.5582976549847536, "learning_rate": 3.0921009718870267e-06, "loss": 0.671, "step": 13315 }, { "epoch": 0.41269054405750405, "grad_norm": 2.7947053997879294, "learning_rate": 3.091285630422021e-06, "loss": 0.7144, "step": 13320 }, { "epoch": 0.41284545792539346, "grad_norm": 3.56846227222114, "learning_rate": 3.090470288957015e-06, "loss": 0.8392, "step": 13325 }, { "epoch": 0.4130003717932829, "grad_norm": 2.389016875793821, "learning_rate": 3.08965494749201e-06, "loss": 0.7025, "step": 13330 }, { "epoch": 0.4131552856611724, "grad_norm": 2.8193018204181177, "learning_rate": 3.088839606027004e-06, "loss": 0.6736, "step": 13335 }, { "epoch": 0.41331019952906184, "grad_norm": 2.5183852984294743, "learning_rate": 3.088024264561999e-06, "loss": 0.711, "step": 13340 }, { "epoch": 0.4134651133969513, "grad_norm": 2.948061065834547, "learning_rate": 3.087208923096993e-06, "loss": 0.7271, "step": 13345 }, { "epoch": 0.41362002726484076, "grad_norm": 2.4807945024800477, "learning_rate": 3.086393581631988e-06, "loss": 0.6436, "step": 13350 }, { "epoch": 0.4137749411327302, "grad_norm": 3.2079926512760326, "learning_rate": 3.085578240166982e-06, "loss": 0.6591, "step": 13355 }, { "epoch": 0.41392985500061963, "grad_norm": 2.6449752542716953, "learning_rate": 3.084762898701977e-06, "loss": 0.7435, "step": 13360 }, { "epoch": 0.4140847688685091, "grad_norm": 3.0610781661099904, "learning_rate": 3.083947557236971e-06, "loss": 0.742, "step": 13365 }, { "epoch": 0.41423968273639855, "grad_norm": 4.720759289008389, "learning_rate": 3.0831322157719658e-06, "loss": 0.7014, "step": 13370 }, { "epoch": 0.414394596604288, "grad_norm": 2.26205633281474, "learning_rate": 3.0823168743069598e-06, "loss": 0.6357, "step": 13375 }, { "epoch": 0.4145495104721775, "grad_norm": 2.308011000930861, "learning_rate": 3.0815015328419547e-06, "loss": 0.6831, "step": 13380 }, { "epoch": 0.41470442434006693, "grad_norm": 2.9446417826516558, "learning_rate": 3.0806861913769487e-06, "loss": 0.6572, "step": 13385 }, { "epoch": 0.4148593382079564, "grad_norm": 2.4403685076028525, "learning_rate": 3.0798708499119436e-06, "loss": 0.6818, "step": 13390 }, { "epoch": 0.4150142520758458, "grad_norm": 4.191317268301402, "learning_rate": 3.0790555084469376e-06, "loss": 0.7402, "step": 13395 }, { "epoch": 0.41516916594373526, "grad_norm": 2.1368195487508155, "learning_rate": 3.078240166981932e-06, "loss": 0.6265, "step": 13400 }, { "epoch": 0.4153240798116247, "grad_norm": 3.993933635550467, "learning_rate": 3.077424825516927e-06, "loss": 0.7162, "step": 13405 }, { "epoch": 0.4154789936795142, "grad_norm": 2.107623612781436, "learning_rate": 3.076609484051921e-06, "loss": 0.772, "step": 13410 }, { "epoch": 0.41563390754740365, "grad_norm": 3.2120770194898554, "learning_rate": 3.075794142586916e-06, "loss": 0.6844, "step": 13415 }, { "epoch": 0.4157888214152931, "grad_norm": 2.4197844177204386, "learning_rate": 3.07497880112191e-06, "loss": 0.7352, "step": 13420 }, { "epoch": 0.41594373528318257, "grad_norm": 2.602325327395922, "learning_rate": 3.074163459656905e-06, "loss": 0.69, "step": 13425 }, { "epoch": 0.41609864915107203, "grad_norm": 2.906716613223132, "learning_rate": 3.073348118191899e-06, "loss": 0.7212, "step": 13430 }, { "epoch": 0.41625356301896144, "grad_norm": 3.0609052487550636, "learning_rate": 3.0725327767268937e-06, "loss": 0.7087, "step": 13435 }, { "epoch": 0.4164084768868509, "grad_norm": 2.6907011931569578, "learning_rate": 3.0717174352618878e-06, "loss": 0.7479, "step": 13440 }, { "epoch": 0.41656339075474036, "grad_norm": 2.715652455961831, "learning_rate": 3.0709020937968826e-06, "loss": 0.7365, "step": 13445 }, { "epoch": 0.4167183046226298, "grad_norm": 2.4783556168548673, "learning_rate": 3.0700867523318767e-06, "loss": 0.6766, "step": 13450 }, { "epoch": 0.4168732184905193, "grad_norm": 2.2899020323731647, "learning_rate": 3.0692714108668716e-06, "loss": 0.718, "step": 13455 }, { "epoch": 0.41702813235840874, "grad_norm": 2.0483774469543663, "learning_rate": 3.0684560694018656e-06, "loss": 0.6392, "step": 13460 }, { "epoch": 0.4171830462262982, "grad_norm": 2.6508723417076614, "learning_rate": 3.0676407279368605e-06, "loss": 0.8128, "step": 13465 }, { "epoch": 0.4173379600941876, "grad_norm": 2.7766580481440655, "learning_rate": 3.0668253864718545e-06, "loss": 0.7887, "step": 13470 }, { "epoch": 0.41749287396207707, "grad_norm": 2.5154716221791302, "learning_rate": 3.066010045006849e-06, "loss": 0.6777, "step": 13475 }, { "epoch": 0.41764778782996653, "grad_norm": 2.5301514163119294, "learning_rate": 3.0651947035418434e-06, "loss": 0.7375, "step": 13480 }, { "epoch": 0.417802701697856, "grad_norm": 2.453226811883419, "learning_rate": 3.064379362076838e-06, "loss": 0.6874, "step": 13485 }, { "epoch": 0.41795761556574546, "grad_norm": 2.3509175763303167, "learning_rate": 3.0635640206118323e-06, "loss": 0.7378, "step": 13490 }, { "epoch": 0.4181125294336349, "grad_norm": 2.869587543281522, "learning_rate": 3.062748679146827e-06, "loss": 0.7116, "step": 13495 }, { "epoch": 0.4182674433015244, "grad_norm": 3.092064325290953, "learning_rate": 3.0619333376818217e-06, "loss": 0.7622, "step": 13500 }, { "epoch": 0.4184223571694138, "grad_norm": 2.67220781958524, "learning_rate": 3.0611179962168157e-06, "loss": 0.7059, "step": 13505 }, { "epoch": 0.41857727103730324, "grad_norm": 2.2992511182764703, "learning_rate": 3.0603026547518106e-06, "loss": 0.8534, "step": 13510 }, { "epoch": 0.4187321849051927, "grad_norm": 3.1658839569410833, "learning_rate": 3.0594873132868046e-06, "loss": 0.6872, "step": 13515 }, { "epoch": 0.41888709877308217, "grad_norm": 2.2665886166479745, "learning_rate": 3.0586719718217995e-06, "loss": 0.7246, "step": 13520 }, { "epoch": 0.41904201264097163, "grad_norm": 3.6858290878558724, "learning_rate": 3.0578566303567936e-06, "loss": 0.7028, "step": 13525 }, { "epoch": 0.4191969265088611, "grad_norm": 2.8549409378690562, "learning_rate": 3.0570412888917884e-06, "loss": 0.6677, "step": 13530 }, { "epoch": 0.41935184037675055, "grad_norm": 2.1821737498341442, "learning_rate": 3.0562259474267825e-06, "loss": 0.7021, "step": 13535 }, { "epoch": 0.41950675424463996, "grad_norm": 2.6515894690519075, "learning_rate": 3.0554106059617774e-06, "loss": 0.7308, "step": 13540 }, { "epoch": 0.4196616681125294, "grad_norm": 2.421525336441829, "learning_rate": 3.0545952644967714e-06, "loss": 0.8361, "step": 13545 }, { "epoch": 0.4198165819804189, "grad_norm": 4.241127752355062, "learning_rate": 3.0537799230317663e-06, "loss": 0.7876, "step": 13550 }, { "epoch": 0.41997149584830834, "grad_norm": 2.737227616506851, "learning_rate": 3.0529645815667603e-06, "loss": 0.7104, "step": 13555 }, { "epoch": 0.4201264097161978, "grad_norm": 2.8079268175042946, "learning_rate": 3.0521492401017548e-06, "loss": 0.7742, "step": 13560 }, { "epoch": 0.42028132358408726, "grad_norm": 2.3307910701448185, "learning_rate": 3.0513338986367492e-06, "loss": 0.7196, "step": 13565 }, { "epoch": 0.4204362374519767, "grad_norm": 3.680414707715909, "learning_rate": 3.0505185571717437e-06, "loss": 0.6568, "step": 13570 }, { "epoch": 0.42059115131986613, "grad_norm": 2.330464448792759, "learning_rate": 3.049703215706738e-06, "loss": 0.7163, "step": 13575 }, { "epoch": 0.4207460651877556, "grad_norm": 2.57132563417102, "learning_rate": 3.0488878742417326e-06, "loss": 0.6838, "step": 13580 }, { "epoch": 0.42090097905564505, "grad_norm": 3.453175467750815, "learning_rate": 3.048072532776727e-06, "loss": 0.6819, "step": 13585 }, { "epoch": 0.4210558929235345, "grad_norm": 2.4819127063056823, "learning_rate": 3.0472571913117215e-06, "loss": 0.6646, "step": 13590 }, { "epoch": 0.421210806791424, "grad_norm": 3.043390585786319, "learning_rate": 3.0464418498467164e-06, "loss": 0.7218, "step": 13595 }, { "epoch": 0.42136572065931344, "grad_norm": 2.5322647470183717, "learning_rate": 3.0456265083817104e-06, "loss": 0.689, "step": 13600 }, { "epoch": 0.4215206345272029, "grad_norm": 2.7070752669406346, "learning_rate": 3.0448111669167053e-06, "loss": 0.7874, "step": 13605 }, { "epoch": 0.4216755483950923, "grad_norm": 2.6445168443332228, "learning_rate": 3.0439958254516994e-06, "loss": 0.7087, "step": 13610 }, { "epoch": 0.42183046226298176, "grad_norm": 2.7663849192292194, "learning_rate": 3.0431804839866942e-06, "loss": 0.6925, "step": 13615 }, { "epoch": 0.4219853761308712, "grad_norm": 3.4968032001523404, "learning_rate": 3.0423651425216883e-06, "loss": 0.6916, "step": 13620 }, { "epoch": 0.4221402899987607, "grad_norm": 3.8749620464461785, "learning_rate": 3.041549801056683e-06, "loss": 0.7396, "step": 13625 }, { "epoch": 0.42229520386665015, "grad_norm": 2.618789841589781, "learning_rate": 3.040734459591677e-06, "loss": 0.6521, "step": 13630 }, { "epoch": 0.4224501177345396, "grad_norm": 2.6105343941960264, "learning_rate": 3.0399191181266717e-06, "loss": 0.8507, "step": 13635 }, { "epoch": 0.42260503160242907, "grad_norm": 3.067790102210866, "learning_rate": 3.039103776661666e-06, "loss": 0.6895, "step": 13640 }, { "epoch": 0.4227599454703185, "grad_norm": 3.921460607842243, "learning_rate": 3.0382884351966606e-06, "loss": 0.6729, "step": 13645 }, { "epoch": 0.42291485933820794, "grad_norm": 2.498669505424626, "learning_rate": 3.037473093731655e-06, "loss": 0.7389, "step": 13650 }, { "epoch": 0.4230697732060974, "grad_norm": 2.9353854930093495, "learning_rate": 3.0366577522666495e-06, "loss": 0.6801, "step": 13655 }, { "epoch": 0.42322468707398686, "grad_norm": 3.9909747836197123, "learning_rate": 3.035842410801644e-06, "loss": 0.809, "step": 13660 }, { "epoch": 0.4233796009418763, "grad_norm": 2.8775240521598553, "learning_rate": 3.0350270693366384e-06, "loss": 0.6916, "step": 13665 }, { "epoch": 0.4235345148097658, "grad_norm": 3.0682025449893273, "learning_rate": 3.0342117278716325e-06, "loss": 0.7079, "step": 13670 }, { "epoch": 0.42368942867765524, "grad_norm": 2.4698222613992655, "learning_rate": 3.0333963864066273e-06, "loss": 0.7023, "step": 13675 }, { "epoch": 0.42384434254554465, "grad_norm": 2.856240436212485, "learning_rate": 3.0325810449416214e-06, "loss": 0.8436, "step": 13680 }, { "epoch": 0.4239992564134341, "grad_norm": 2.7290103189379793, "learning_rate": 3.0317657034766163e-06, "loss": 0.7276, "step": 13685 }, { "epoch": 0.4241541702813236, "grad_norm": 1.9718976720774013, "learning_rate": 3.030950362011611e-06, "loss": 0.6815, "step": 13690 }, { "epoch": 0.42430908414921303, "grad_norm": 2.5496222083800753, "learning_rate": 3.030135020546605e-06, "loss": 0.7063, "step": 13695 }, { "epoch": 0.4244639980171025, "grad_norm": 3.0394795819999656, "learning_rate": 3.0293196790816e-06, "loss": 0.7368, "step": 13700 }, { "epoch": 0.42461891188499196, "grad_norm": 2.918640259275741, "learning_rate": 3.028504337616594e-06, "loss": 0.6772, "step": 13705 }, { "epoch": 0.4247738257528814, "grad_norm": 2.3922837098301484, "learning_rate": 3.0276889961515885e-06, "loss": 0.7597, "step": 13710 }, { "epoch": 0.4249287396207709, "grad_norm": 2.8553815218342176, "learning_rate": 3.026873654686583e-06, "loss": 0.6667, "step": 13715 }, { "epoch": 0.4250836534886603, "grad_norm": 3.054478088664403, "learning_rate": 3.0260583132215775e-06, "loss": 0.6746, "step": 13720 }, { "epoch": 0.42523856735654975, "grad_norm": 2.6178302268938634, "learning_rate": 3.025242971756572e-06, "loss": 0.6721, "step": 13725 }, { "epoch": 0.4253934812244392, "grad_norm": 3.016831368746078, "learning_rate": 3.0244276302915664e-06, "loss": 0.8261, "step": 13730 }, { "epoch": 0.42554839509232867, "grad_norm": 2.531434379029286, "learning_rate": 3.023612288826561e-06, "loss": 0.7741, "step": 13735 }, { "epoch": 0.42570330896021813, "grad_norm": 2.6871657816507386, "learning_rate": 3.0227969473615553e-06, "loss": 0.7105, "step": 13740 }, { "epoch": 0.4258582228281076, "grad_norm": 3.3760264567714913, "learning_rate": 3.0219816058965493e-06, "loss": 0.7141, "step": 13745 }, { "epoch": 0.42601313669599705, "grad_norm": 3.5034114182640126, "learning_rate": 3.0211662644315442e-06, "loss": 0.6831, "step": 13750 }, { "epoch": 0.42616805056388646, "grad_norm": 2.1269854411889746, "learning_rate": 3.0203509229665383e-06, "loss": 0.6799, "step": 13755 }, { "epoch": 0.4263229644317759, "grad_norm": 3.2507165837277827, "learning_rate": 3.019535581501533e-06, "loss": 0.7381, "step": 13760 }, { "epoch": 0.4264778782996654, "grad_norm": 2.506334467562182, "learning_rate": 3.018720240036527e-06, "loss": 0.6177, "step": 13765 }, { "epoch": 0.42663279216755484, "grad_norm": 3.0556754673904414, "learning_rate": 3.017904898571522e-06, "loss": 0.6989, "step": 13770 }, { "epoch": 0.4267877060354443, "grad_norm": 2.3751550228971565, "learning_rate": 3.017089557106516e-06, "loss": 0.689, "step": 13775 }, { "epoch": 0.42694261990333376, "grad_norm": 2.845158838904477, "learning_rate": 3.016274215641511e-06, "loss": 0.6833, "step": 13780 }, { "epoch": 0.4270975337712232, "grad_norm": 2.190850258115353, "learning_rate": 3.0154588741765054e-06, "loss": 0.6775, "step": 13785 }, { "epoch": 0.42725244763911263, "grad_norm": 2.523119154593874, "learning_rate": 3.0146435327115e-06, "loss": 0.7081, "step": 13790 }, { "epoch": 0.4274073615070021, "grad_norm": 2.7613404617157524, "learning_rate": 3.0138281912464944e-06, "loss": 0.6571, "step": 13795 }, { "epoch": 0.42756227537489155, "grad_norm": 3.2679279821310927, "learning_rate": 3.013012849781489e-06, "loss": 0.6575, "step": 13800 }, { "epoch": 0.427717189242781, "grad_norm": 2.6270349363340433, "learning_rate": 3.0121975083164833e-06, "loss": 0.7069, "step": 13805 }, { "epoch": 0.4278721031106705, "grad_norm": 2.6573530550097737, "learning_rate": 3.0113821668514777e-06, "loss": 0.7466, "step": 13810 }, { "epoch": 0.42802701697855994, "grad_norm": 2.586714825950716, "learning_rate": 3.010566825386472e-06, "loss": 0.7147, "step": 13815 }, { "epoch": 0.4281819308464494, "grad_norm": 2.4643093299807672, "learning_rate": 3.0097514839214662e-06, "loss": 0.6353, "step": 13820 }, { "epoch": 0.4283368447143388, "grad_norm": 2.4530907283003955, "learning_rate": 3.008936142456461e-06, "loss": 0.6253, "step": 13825 }, { "epoch": 0.42849175858222827, "grad_norm": 2.7685311504132484, "learning_rate": 3.008120800991455e-06, "loss": 0.7223, "step": 13830 }, { "epoch": 0.4286466724501177, "grad_norm": 2.9991378540551072, "learning_rate": 3.00730545952645e-06, "loss": 0.7309, "step": 13835 }, { "epoch": 0.4288015863180072, "grad_norm": 2.9092645083947133, "learning_rate": 3.006490118061444e-06, "loss": 0.7246, "step": 13840 }, { "epoch": 0.42895650018589665, "grad_norm": 3.676667492691187, "learning_rate": 3.005674776596439e-06, "loss": 0.7791, "step": 13845 }, { "epoch": 0.4291114140537861, "grad_norm": 2.9227212416971957, "learning_rate": 3.004859435131433e-06, "loss": 0.6873, "step": 13850 }, { "epoch": 0.4292663279216756, "grad_norm": 2.484684449725232, "learning_rate": 3.004044093666428e-06, "loss": 0.6988, "step": 13855 }, { "epoch": 0.429421241789565, "grad_norm": 2.5436724331899505, "learning_rate": 3.003228752201422e-06, "loss": 0.6472, "step": 13860 }, { "epoch": 0.42957615565745444, "grad_norm": 2.3436119041650247, "learning_rate": 3.0024134107364168e-06, "loss": 0.7336, "step": 13865 }, { "epoch": 0.4297310695253439, "grad_norm": 3.6672347751172345, "learning_rate": 3.001598069271411e-06, "loss": 0.6499, "step": 13870 }, { "epoch": 0.42988598339323336, "grad_norm": 2.6415208083286705, "learning_rate": 3.0007827278064057e-06, "loss": 0.702, "step": 13875 }, { "epoch": 0.4300408972611228, "grad_norm": 3.2069558656890655, "learning_rate": 2.9999673863414e-06, "loss": 0.7734, "step": 13880 }, { "epoch": 0.4301958111290123, "grad_norm": 3.748280747272982, "learning_rate": 2.9991520448763946e-06, "loss": 0.7353, "step": 13885 }, { "epoch": 0.43035072499690175, "grad_norm": 2.4989709838136402, "learning_rate": 2.998336703411389e-06, "loss": 0.6776, "step": 13890 }, { "epoch": 0.43050563886479115, "grad_norm": 2.8706038791125716, "learning_rate": 2.997521361946383e-06, "loss": 0.7547, "step": 13895 }, { "epoch": 0.4306605527326806, "grad_norm": 2.5531565722402423, "learning_rate": 2.996706020481378e-06, "loss": 0.7319, "step": 13900 }, { "epoch": 0.4308154666005701, "grad_norm": 3.1230460300154292, "learning_rate": 2.995890679016372e-06, "loss": 0.6422, "step": 13905 }, { "epoch": 0.43097038046845954, "grad_norm": 3.008948935844224, "learning_rate": 2.995075337551367e-06, "loss": 0.6627, "step": 13910 }, { "epoch": 0.431125294336349, "grad_norm": 2.9482616573937954, "learning_rate": 2.994259996086361e-06, "loss": 0.7251, "step": 13915 }, { "epoch": 0.43128020820423846, "grad_norm": 3.173298019224595, "learning_rate": 2.993444654621356e-06, "loss": 0.7202, "step": 13920 }, { "epoch": 0.4314351220721279, "grad_norm": 3.0971657301960156, "learning_rate": 2.99262931315635e-06, "loss": 0.6384, "step": 13925 }, { "epoch": 0.4315900359400173, "grad_norm": 2.5750102308244873, "learning_rate": 2.9918139716913448e-06, "loss": 0.7025, "step": 13930 }, { "epoch": 0.4317449498079068, "grad_norm": 3.246412925504511, "learning_rate": 2.9909986302263388e-06, "loss": 0.7989, "step": 13935 }, { "epoch": 0.43189986367579625, "grad_norm": 3.3741418182919993, "learning_rate": 2.9901832887613337e-06, "loss": 0.7488, "step": 13940 }, { "epoch": 0.4320547775436857, "grad_norm": 3.7878194964334226, "learning_rate": 2.9893679472963277e-06, "loss": 0.7399, "step": 13945 }, { "epoch": 0.43220969141157517, "grad_norm": 2.498802837327564, "learning_rate": 2.9885526058313226e-06, "loss": 0.6979, "step": 13950 }, { "epoch": 0.43236460527946463, "grad_norm": 3.785975170193065, "learning_rate": 2.9877372643663166e-06, "loss": 0.7183, "step": 13955 }, { "epoch": 0.4325195191473541, "grad_norm": 2.807752598103349, "learning_rate": 2.9869219229013115e-06, "loss": 0.7619, "step": 13960 }, { "epoch": 0.4326744330152435, "grad_norm": 2.779751910430678, "learning_rate": 2.9861065814363055e-06, "loss": 0.7322, "step": 13965 }, { "epoch": 0.43282934688313296, "grad_norm": 2.418105729011725, "learning_rate": 2.9852912399713004e-06, "loss": 0.5781, "step": 13970 }, { "epoch": 0.4329842607510224, "grad_norm": 2.5292218534478947, "learning_rate": 2.984475898506295e-06, "loss": 0.7218, "step": 13975 }, { "epoch": 0.4331391746189119, "grad_norm": 2.4769485070851816, "learning_rate": 2.983660557041289e-06, "loss": 0.7511, "step": 13980 }, { "epoch": 0.43329408848680134, "grad_norm": 3.142786999220823, "learning_rate": 2.982845215576284e-06, "loss": 0.6938, "step": 13985 }, { "epoch": 0.4334490023546908, "grad_norm": 3.060727215609716, "learning_rate": 2.982029874111278e-06, "loss": 0.7843, "step": 13990 }, { "epoch": 0.43360391622258027, "grad_norm": 2.2665693761382446, "learning_rate": 2.9812145326462727e-06, "loss": 0.6749, "step": 13995 }, { "epoch": 0.43375883009046967, "grad_norm": 2.404166617530157, "learning_rate": 2.9803991911812668e-06, "loss": 0.6581, "step": 14000 }, { "epoch": 0.43391374395835913, "grad_norm": 1.967877209893341, "learning_rate": 2.9795838497162616e-06, "loss": 0.6589, "step": 14005 }, { "epoch": 0.4340686578262486, "grad_norm": 2.0759968769606885, "learning_rate": 2.9787685082512557e-06, "loss": 0.6901, "step": 14010 }, { "epoch": 0.43422357169413806, "grad_norm": 2.5911182569046787, "learning_rate": 2.9779531667862506e-06, "loss": 0.6731, "step": 14015 }, { "epoch": 0.4343784855620275, "grad_norm": 2.3727426822767153, "learning_rate": 2.9771378253212446e-06, "loss": 0.7722, "step": 14020 }, { "epoch": 0.434533399429917, "grad_norm": 2.7256162783267173, "learning_rate": 2.9763224838562395e-06, "loss": 0.6955, "step": 14025 }, { "epoch": 0.43468831329780644, "grad_norm": 2.644026435078925, "learning_rate": 2.9755071423912335e-06, "loss": 0.7216, "step": 14030 }, { "epoch": 0.4348432271656959, "grad_norm": 2.6693954962793245, "learning_rate": 2.9746918009262284e-06, "loss": 0.7146, "step": 14035 }, { "epoch": 0.4349981410335853, "grad_norm": 3.185051260359796, "learning_rate": 2.9738764594612224e-06, "loss": 0.7289, "step": 14040 }, { "epoch": 0.43515305490147477, "grad_norm": 2.5796889655419806, "learning_rate": 2.9730611179962173e-06, "loss": 0.6796, "step": 14045 }, { "epoch": 0.43530796876936423, "grad_norm": 2.8683500763130003, "learning_rate": 2.9722457765312113e-06, "loss": 0.7458, "step": 14050 }, { "epoch": 0.4354628826372537, "grad_norm": 2.324356020921709, "learning_rate": 2.971430435066206e-06, "loss": 0.7315, "step": 14055 }, { "epoch": 0.43561779650514315, "grad_norm": 2.39011978593857, "learning_rate": 2.9706150936012003e-06, "loss": 0.6367, "step": 14060 }, { "epoch": 0.4357727103730326, "grad_norm": 3.008232215692002, "learning_rate": 2.9697997521361947e-06, "loss": 0.7508, "step": 14065 }, { "epoch": 0.4359276242409221, "grad_norm": 2.8581243442162902, "learning_rate": 2.9689844106711896e-06, "loss": 0.7114, "step": 14070 }, { "epoch": 0.4360825381088115, "grad_norm": 2.4756701615117653, "learning_rate": 2.9681690692061836e-06, "loss": 0.7095, "step": 14075 }, { "epoch": 0.43623745197670094, "grad_norm": 3.1611872580447047, "learning_rate": 2.9673537277411785e-06, "loss": 0.7611, "step": 14080 }, { "epoch": 0.4363923658445904, "grad_norm": 2.207753578150126, "learning_rate": 2.9665383862761726e-06, "loss": 0.7388, "step": 14085 }, { "epoch": 0.43654727971247986, "grad_norm": 2.3575750982263752, "learning_rate": 2.9657230448111674e-06, "loss": 0.6902, "step": 14090 }, { "epoch": 0.4367021935803693, "grad_norm": 4.552226127072209, "learning_rate": 2.9649077033461615e-06, "loss": 0.7083, "step": 14095 }, { "epoch": 0.4368571074482588, "grad_norm": 4.5456634828199824, "learning_rate": 2.9640923618811564e-06, "loss": 0.7403, "step": 14100 }, { "epoch": 0.43701202131614825, "grad_norm": 2.5977325839323866, "learning_rate": 2.9632770204161504e-06, "loss": 0.6738, "step": 14105 }, { "epoch": 0.43716693518403765, "grad_norm": 2.5269430562324278, "learning_rate": 2.9624616789511453e-06, "loss": 0.6528, "step": 14110 }, { "epoch": 0.4373218490519271, "grad_norm": 3.5162783526912063, "learning_rate": 2.9616463374861393e-06, "loss": 0.68, "step": 14115 }, { "epoch": 0.4374767629198166, "grad_norm": 2.7282878408028948, "learning_rate": 2.960830996021134e-06, "loss": 0.6669, "step": 14120 }, { "epoch": 0.43763167678770604, "grad_norm": 2.867318978809138, "learning_rate": 2.9600156545561282e-06, "loss": 0.8135, "step": 14125 }, { "epoch": 0.4377865906555955, "grad_norm": 2.304415541278577, "learning_rate": 2.9592003130911227e-06, "loss": 0.662, "step": 14130 }, { "epoch": 0.43794150452348496, "grad_norm": 2.596142417724934, "learning_rate": 2.958384971626117e-06, "loss": 0.7546, "step": 14135 }, { "epoch": 0.4380964183913744, "grad_norm": 2.403920198544352, "learning_rate": 2.9575696301611116e-06, "loss": 0.6968, "step": 14140 }, { "epoch": 0.4382513322592638, "grad_norm": 2.1657146977991872, "learning_rate": 2.956754288696106e-06, "loss": 0.6463, "step": 14145 }, { "epoch": 0.4384062461271533, "grad_norm": 4.071644930305252, "learning_rate": 2.9559389472311005e-06, "loss": 0.7194, "step": 14150 }, { "epoch": 0.43856115999504275, "grad_norm": 2.5427780760888106, "learning_rate": 2.955123605766095e-06, "loss": 0.75, "step": 14155 }, { "epoch": 0.4387160738629322, "grad_norm": 3.0284224139867812, "learning_rate": 2.9543082643010894e-06, "loss": 0.716, "step": 14160 }, { "epoch": 0.43887098773082167, "grad_norm": 2.7312949823153874, "learning_rate": 2.9534929228360843e-06, "loss": 0.677, "step": 14165 }, { "epoch": 0.43902590159871113, "grad_norm": 2.2637613420296816, "learning_rate": 2.9526775813710784e-06, "loss": 0.7005, "step": 14170 }, { "epoch": 0.4391808154666006, "grad_norm": 2.369503796156928, "learning_rate": 2.9518622399060732e-06, "loss": 0.7297, "step": 14175 }, { "epoch": 0.43933572933449, "grad_norm": 2.8561517075927214, "learning_rate": 2.9510468984410673e-06, "loss": 0.7046, "step": 14180 }, { "epoch": 0.43949064320237946, "grad_norm": 2.7747993795738357, "learning_rate": 2.950231556976062e-06, "loss": 0.6851, "step": 14185 }, { "epoch": 0.4396455570702689, "grad_norm": 2.560189608454841, "learning_rate": 2.949416215511056e-06, "loss": 0.6486, "step": 14190 }, { "epoch": 0.4398004709381584, "grad_norm": 3.769290438240271, "learning_rate": 2.948600874046051e-06, "loss": 0.669, "step": 14195 }, { "epoch": 0.43995538480604784, "grad_norm": 2.3200282941819514, "learning_rate": 2.947785532581045e-06, "loss": 0.6757, "step": 14200 }, { "epoch": 0.4401102986739373, "grad_norm": 2.486426326584074, "learning_rate": 2.9469701911160396e-06, "loss": 0.6805, "step": 14205 }, { "epoch": 0.44026521254182677, "grad_norm": 2.6648887512744146, "learning_rate": 2.946154849651034e-06, "loss": 0.7356, "step": 14210 }, { "epoch": 0.4404201264097162, "grad_norm": 2.901719639326456, "learning_rate": 2.9453395081860285e-06, "loss": 0.8012, "step": 14215 }, { "epoch": 0.44057504027760563, "grad_norm": 2.8457352495447688, "learning_rate": 2.944524166721023e-06, "loss": 0.6897, "step": 14220 }, { "epoch": 0.4407299541454951, "grad_norm": 2.451974976830542, "learning_rate": 2.9437088252560174e-06, "loss": 0.7069, "step": 14225 }, { "epoch": 0.44088486801338456, "grad_norm": 2.8606611208928894, "learning_rate": 2.942893483791012e-06, "loss": 0.7496, "step": 14230 }, { "epoch": 0.441039781881274, "grad_norm": 2.6112308863570517, "learning_rate": 2.9420781423260063e-06, "loss": 0.7163, "step": 14235 }, { "epoch": 0.4411946957491635, "grad_norm": 2.430944761481588, "learning_rate": 2.9412628008610004e-06, "loss": 0.7368, "step": 14240 }, { "epoch": 0.44134960961705294, "grad_norm": 2.630418896936784, "learning_rate": 2.9404474593959953e-06, "loss": 0.7079, "step": 14245 }, { "epoch": 0.44150452348494235, "grad_norm": 2.2791436636432305, "learning_rate": 2.93963211793099e-06, "loss": 0.7051, "step": 14250 }, { "epoch": 0.4416594373528318, "grad_norm": 3.0479949174857905, "learning_rate": 2.938816776465984e-06, "loss": 0.7254, "step": 14255 }, { "epoch": 0.44181435122072127, "grad_norm": 4.276831151809134, "learning_rate": 2.938001435000979e-06, "loss": 0.6068, "step": 14260 }, { "epoch": 0.44196926508861073, "grad_norm": 2.693739124106962, "learning_rate": 2.937186093535973e-06, "loss": 0.7684, "step": 14265 }, { "epoch": 0.4421241789565002, "grad_norm": 2.6718242811586386, "learning_rate": 2.936370752070968e-06, "loss": 0.723, "step": 14270 }, { "epoch": 0.44227909282438965, "grad_norm": 2.5621596462708123, "learning_rate": 2.935555410605962e-06, "loss": 0.6867, "step": 14275 }, { "epoch": 0.4424340066922791, "grad_norm": 2.541227589310366, "learning_rate": 2.9347400691409565e-06, "loss": 0.696, "step": 14280 }, { "epoch": 0.4425889205601685, "grad_norm": 2.7220444544571043, "learning_rate": 2.933924727675951e-06, "loss": 0.7676, "step": 14285 }, { "epoch": 0.442743834428058, "grad_norm": 2.747859622161901, "learning_rate": 2.9331093862109454e-06, "loss": 0.7383, "step": 14290 }, { "epoch": 0.44289874829594744, "grad_norm": 2.443138608410603, "learning_rate": 2.93229404474594e-06, "loss": 0.6682, "step": 14295 }, { "epoch": 0.4430536621638369, "grad_norm": 2.68285080900823, "learning_rate": 2.9314787032809343e-06, "loss": 0.7321, "step": 14300 }, { "epoch": 0.44320857603172636, "grad_norm": 2.3649664648909416, "learning_rate": 2.9306633618159288e-06, "loss": 0.6715, "step": 14305 }, { "epoch": 0.4433634898996158, "grad_norm": 3.775128292423668, "learning_rate": 2.9298480203509232e-06, "loss": 0.6823, "step": 14310 }, { "epoch": 0.4435184037675053, "grad_norm": 3.1768773014400256, "learning_rate": 2.9290326788859177e-06, "loss": 0.7194, "step": 14315 }, { "epoch": 0.4436733176353947, "grad_norm": 2.357419691186681, "learning_rate": 2.928217337420912e-06, "loss": 0.7181, "step": 14320 }, { "epoch": 0.44382823150328415, "grad_norm": 2.440112653173972, "learning_rate": 2.927401995955906e-06, "loss": 0.6339, "step": 14325 }, { "epoch": 0.4439831453711736, "grad_norm": 2.5610471473491017, "learning_rate": 2.926586654490901e-06, "loss": 0.7574, "step": 14330 }, { "epoch": 0.4441380592390631, "grad_norm": 2.4169262536426883, "learning_rate": 2.925771313025895e-06, "loss": 0.7727, "step": 14335 }, { "epoch": 0.44429297310695254, "grad_norm": 2.630568143449849, "learning_rate": 2.92495597156089e-06, "loss": 0.7312, "step": 14340 }, { "epoch": 0.444447886974842, "grad_norm": 2.9709598938366963, "learning_rate": 2.924140630095885e-06, "loss": 0.7874, "step": 14345 }, { "epoch": 0.44460280084273146, "grad_norm": 2.543069718803447, "learning_rate": 2.923325288630879e-06, "loss": 0.6474, "step": 14350 }, { "epoch": 0.4447577147106209, "grad_norm": 2.5865866308543266, "learning_rate": 2.9225099471658734e-06, "loss": 0.7511, "step": 14355 }, { "epoch": 0.4449126285785103, "grad_norm": 2.815000772352454, "learning_rate": 2.921694605700868e-06, "loss": 0.6999, "step": 14360 }, { "epoch": 0.4450675424463998, "grad_norm": 2.5330640422316635, "learning_rate": 2.9208792642358623e-06, "loss": 0.7547, "step": 14365 }, { "epoch": 0.44522245631428925, "grad_norm": 2.8160909517321855, "learning_rate": 2.9200639227708567e-06, "loss": 0.7613, "step": 14370 }, { "epoch": 0.4453773701821787, "grad_norm": 2.378793177219393, "learning_rate": 2.919248581305851e-06, "loss": 0.7627, "step": 14375 }, { "epoch": 0.4455322840500682, "grad_norm": 1.8948017147664693, "learning_rate": 2.9184332398408456e-06, "loss": 0.6443, "step": 14380 }, { "epoch": 0.44568719791795763, "grad_norm": 2.7100895684661293, "learning_rate": 2.91761789837584e-06, "loss": 0.7729, "step": 14385 }, { "epoch": 0.4458421117858471, "grad_norm": 2.3534503236292683, "learning_rate": 2.9168025569108346e-06, "loss": 0.7338, "step": 14390 }, { "epoch": 0.4459970256537365, "grad_norm": 2.6946375589385214, "learning_rate": 2.915987215445829e-06, "loss": 0.7539, "step": 14395 }, { "epoch": 0.44615193952162596, "grad_norm": 2.5598022082845886, "learning_rate": 2.915171873980823e-06, "loss": 0.6229, "step": 14400 }, { "epoch": 0.4463068533895154, "grad_norm": 2.906504867271012, "learning_rate": 2.914356532515818e-06, "loss": 0.7272, "step": 14405 }, { "epoch": 0.4464617672574049, "grad_norm": 2.7986947726525955, "learning_rate": 2.913541191050812e-06, "loss": 0.6605, "step": 14410 }, { "epoch": 0.44661668112529435, "grad_norm": 3.2698449297855188, "learning_rate": 2.912725849585807e-06, "loss": 0.7314, "step": 14415 }, { "epoch": 0.4467715949931838, "grad_norm": 2.584177955550036, "learning_rate": 2.911910508120801e-06, "loss": 0.7134, "step": 14420 }, { "epoch": 0.44692650886107327, "grad_norm": 2.6528119024816093, "learning_rate": 2.9110951666557958e-06, "loss": 0.6949, "step": 14425 }, { "epoch": 0.4470814227289627, "grad_norm": 4.444948323472493, "learning_rate": 2.91027982519079e-06, "loss": 0.7462, "step": 14430 }, { "epoch": 0.44723633659685214, "grad_norm": 2.995681682066266, "learning_rate": 2.9094644837257847e-06, "loss": 0.6822, "step": 14435 }, { "epoch": 0.4473912504647416, "grad_norm": 2.183394306529823, "learning_rate": 2.908649142260779e-06, "loss": 0.6657, "step": 14440 }, { "epoch": 0.44754616433263106, "grad_norm": 2.9640139883992944, "learning_rate": 2.9078338007957736e-06, "loss": 0.6883, "step": 14445 }, { "epoch": 0.4477010782005205, "grad_norm": 3.0221536449664628, "learning_rate": 2.907018459330768e-06, "loss": 0.7811, "step": 14450 }, { "epoch": 0.44785599206841, "grad_norm": 2.968251549803907, "learning_rate": 2.9062031178657625e-06, "loss": 0.8182, "step": 14455 }, { "epoch": 0.44801090593629944, "grad_norm": 2.672426188575714, "learning_rate": 2.905387776400757e-06, "loss": 0.7182, "step": 14460 }, { "epoch": 0.44816581980418885, "grad_norm": 2.4647326703842514, "learning_rate": 2.9045724349357515e-06, "loss": 0.6698, "step": 14465 }, { "epoch": 0.4483207336720783, "grad_norm": 2.20093061679051, "learning_rate": 2.903757093470746e-06, "loss": 0.6758, "step": 14470 }, { "epoch": 0.44847564753996777, "grad_norm": 1.877141705525952, "learning_rate": 2.90294175200574e-06, "loss": 0.6886, "step": 14475 }, { "epoch": 0.44863056140785723, "grad_norm": 2.50553941739409, "learning_rate": 2.902126410540735e-06, "loss": 0.6329, "step": 14480 }, { "epoch": 0.4487854752757467, "grad_norm": 2.4025718956661324, "learning_rate": 2.901311069075729e-06, "loss": 0.6603, "step": 14485 }, { "epoch": 0.44894038914363615, "grad_norm": 2.4173234671916246, "learning_rate": 2.9004957276107237e-06, "loss": 0.7935, "step": 14490 }, { "epoch": 0.4490953030115256, "grad_norm": 2.902285611425599, "learning_rate": 2.8996803861457178e-06, "loss": 0.6297, "step": 14495 }, { "epoch": 0.449250216879415, "grad_norm": 2.6146549663075986, "learning_rate": 2.8988650446807127e-06, "loss": 0.673, "step": 14500 }, { "epoch": 0.4494051307473045, "grad_norm": 2.4136281321608135, "learning_rate": 2.8980497032157067e-06, "loss": 0.7009, "step": 14505 }, { "epoch": 0.44956004461519394, "grad_norm": 3.155791848186233, "learning_rate": 2.8972343617507016e-06, "loss": 0.6791, "step": 14510 }, { "epoch": 0.4497149584830834, "grad_norm": 3.150171032206265, "learning_rate": 2.8964190202856956e-06, "loss": 0.7055, "step": 14515 }, { "epoch": 0.44986987235097287, "grad_norm": 2.349852131446419, "learning_rate": 2.8956036788206905e-06, "loss": 0.6886, "step": 14520 }, { "epoch": 0.4500247862188623, "grad_norm": 2.7426408458114673, "learning_rate": 2.8947883373556845e-06, "loss": 0.7088, "step": 14525 }, { "epoch": 0.4501797000867518, "grad_norm": 2.9646511093240964, "learning_rate": 2.8939729958906794e-06, "loss": 0.7419, "step": 14530 }, { "epoch": 0.4503346139546412, "grad_norm": 3.3772603549169755, "learning_rate": 2.893157654425674e-06, "loss": 0.7687, "step": 14535 }, { "epoch": 0.45048952782253066, "grad_norm": 2.3489822454651677, "learning_rate": 2.8923423129606683e-06, "loss": 0.6763, "step": 14540 }, { "epoch": 0.4506444416904201, "grad_norm": 3.1642021654960293, "learning_rate": 2.891526971495663e-06, "loss": 0.6522, "step": 14545 }, { "epoch": 0.4507993555583096, "grad_norm": 3.7797250015281136, "learning_rate": 2.890711630030657e-06, "loss": 0.6885, "step": 14550 }, { "epoch": 0.45095426942619904, "grad_norm": 3.143074704195062, "learning_rate": 2.8898962885656517e-06, "loss": 0.7051, "step": 14555 }, { "epoch": 0.4511091832940885, "grad_norm": 2.592453852234965, "learning_rate": 2.8890809471006458e-06, "loss": 0.6879, "step": 14560 }, { "epoch": 0.45126409716197796, "grad_norm": 2.3777733967911296, "learning_rate": 2.8882656056356406e-06, "loss": 0.7233, "step": 14565 }, { "epoch": 0.45141901102986737, "grad_norm": 3.755197672132471, "learning_rate": 2.8874502641706347e-06, "loss": 0.702, "step": 14570 }, { "epoch": 0.45157392489775683, "grad_norm": 3.6379746604374024, "learning_rate": 2.8866349227056296e-06, "loss": 0.6456, "step": 14575 }, { "epoch": 0.4517288387656463, "grad_norm": 2.8163615689758896, "learning_rate": 2.8858195812406236e-06, "loss": 0.7387, "step": 14580 }, { "epoch": 0.45188375263353575, "grad_norm": 2.418392910613692, "learning_rate": 2.8850042397756185e-06, "loss": 0.7679, "step": 14585 }, { "epoch": 0.4520386665014252, "grad_norm": 3.3319192803004145, "learning_rate": 2.8841888983106125e-06, "loss": 0.6974, "step": 14590 }, { "epoch": 0.4521935803693147, "grad_norm": 2.5191328713052434, "learning_rate": 2.8833735568456074e-06, "loss": 0.6605, "step": 14595 }, { "epoch": 0.45234849423720414, "grad_norm": 2.5246096750904075, "learning_rate": 2.8825582153806014e-06, "loss": 0.7189, "step": 14600 }, { "epoch": 0.45250340810509354, "grad_norm": 2.9285379390249844, "learning_rate": 2.8817428739155963e-06, "loss": 0.6728, "step": 14605 }, { "epoch": 0.452658321972983, "grad_norm": 2.765260762711987, "learning_rate": 2.8809275324505903e-06, "loss": 0.7199, "step": 14610 }, { "epoch": 0.45281323584087246, "grad_norm": 2.879213062397481, "learning_rate": 2.8801121909855852e-06, "loss": 0.6899, "step": 14615 }, { "epoch": 0.4529681497087619, "grad_norm": 2.439357625985174, "learning_rate": 2.8792968495205793e-06, "loss": 0.7103, "step": 14620 }, { "epoch": 0.4531230635766514, "grad_norm": 2.210958293534389, "learning_rate": 2.8784815080555737e-06, "loss": 0.6611, "step": 14625 }, { "epoch": 0.45327797744454085, "grad_norm": 2.4507160826480527, "learning_rate": 2.8776661665905686e-06, "loss": 0.7245, "step": 14630 }, { "epoch": 0.4534328913124303, "grad_norm": 3.2453434023827885, "learning_rate": 2.8768508251255626e-06, "loss": 0.6915, "step": 14635 }, { "epoch": 0.45358780518031977, "grad_norm": 3.12391090770406, "learning_rate": 2.8760354836605575e-06, "loss": 0.6276, "step": 14640 }, { "epoch": 0.4537427190482092, "grad_norm": 3.3483689845541473, "learning_rate": 2.8752201421955516e-06, "loss": 0.6713, "step": 14645 }, { "epoch": 0.45389763291609864, "grad_norm": 2.841533832840971, "learning_rate": 2.8744048007305464e-06, "loss": 0.6764, "step": 14650 }, { "epoch": 0.4540525467839881, "grad_norm": 2.2853862159226592, "learning_rate": 2.8735894592655405e-06, "loss": 0.7212, "step": 14655 }, { "epoch": 0.45420746065187756, "grad_norm": 2.654944171004821, "learning_rate": 2.8727741178005354e-06, "loss": 0.7168, "step": 14660 }, { "epoch": 0.454362374519767, "grad_norm": 2.621611405187504, "learning_rate": 2.8719587763355294e-06, "loss": 0.7119, "step": 14665 }, { "epoch": 0.4545172883876565, "grad_norm": 3.0569118955226187, "learning_rate": 2.8711434348705243e-06, "loss": 0.6902, "step": 14670 }, { "epoch": 0.45467220225554594, "grad_norm": 3.0973777427642397, "learning_rate": 2.8703280934055183e-06, "loss": 0.6935, "step": 14675 }, { "epoch": 0.45482711612343535, "grad_norm": 2.4256332161900387, "learning_rate": 2.869512751940513e-06, "loss": 0.6037, "step": 14680 }, { "epoch": 0.4549820299913248, "grad_norm": 3.0737643964640133, "learning_rate": 2.8686974104755072e-06, "loss": 0.7421, "step": 14685 }, { "epoch": 0.45513694385921427, "grad_norm": 6.130620037979537, "learning_rate": 2.867882069010502e-06, "loss": 0.7977, "step": 14690 }, { "epoch": 0.45529185772710373, "grad_norm": 2.903589707865167, "learning_rate": 2.867066727545496e-06, "loss": 0.665, "step": 14695 }, { "epoch": 0.4554467715949932, "grad_norm": 2.895983230904174, "learning_rate": 2.8662513860804906e-06, "loss": 0.7516, "step": 14700 }, { "epoch": 0.45560168546288266, "grad_norm": 2.4013678655853354, "learning_rate": 2.865436044615485e-06, "loss": 0.7042, "step": 14705 }, { "epoch": 0.4557565993307721, "grad_norm": 2.959475204496317, "learning_rate": 2.8646207031504795e-06, "loss": 0.7219, "step": 14710 }, { "epoch": 0.4559115131986615, "grad_norm": 2.3277457308892626, "learning_rate": 2.863805361685474e-06, "loss": 0.6629, "step": 14715 }, { "epoch": 0.456066427066551, "grad_norm": 2.9044487692807524, "learning_rate": 2.8629900202204684e-06, "loss": 0.7076, "step": 14720 }, { "epoch": 0.45622134093444044, "grad_norm": 4.129377080193363, "learning_rate": 2.8621746787554633e-06, "loss": 0.7695, "step": 14725 }, { "epoch": 0.4563762548023299, "grad_norm": 2.856115698781389, "learning_rate": 2.8613593372904574e-06, "loss": 0.7112, "step": 14730 }, { "epoch": 0.45653116867021937, "grad_norm": 2.6125391512921268, "learning_rate": 2.8605439958254522e-06, "loss": 0.6952, "step": 14735 }, { "epoch": 0.45668608253810883, "grad_norm": 2.5034614530830552, "learning_rate": 2.8597286543604463e-06, "loss": 0.7059, "step": 14740 }, { "epoch": 0.4568409964059983, "grad_norm": 2.8227392388352253, "learning_rate": 2.858913312895441e-06, "loss": 0.7284, "step": 14745 }, { "epoch": 0.4569959102738877, "grad_norm": 2.18363267636365, "learning_rate": 2.858097971430435e-06, "loss": 0.7195, "step": 14750 }, { "epoch": 0.45715082414177716, "grad_norm": 2.4578685939952525, "learning_rate": 2.85728262996543e-06, "loss": 0.6745, "step": 14755 }, { "epoch": 0.4573057380096666, "grad_norm": 3.496981922170526, "learning_rate": 2.856467288500424e-06, "loss": 0.7541, "step": 14760 }, { "epoch": 0.4574606518775561, "grad_norm": 2.8338402327218777, "learning_rate": 2.855651947035419e-06, "loss": 0.6759, "step": 14765 }, { "epoch": 0.45761556574544554, "grad_norm": 2.4371985735632857, "learning_rate": 2.854836605570413e-06, "loss": 0.7195, "step": 14770 }, { "epoch": 0.457770479613335, "grad_norm": 2.3108172509254814, "learning_rate": 2.8540212641054075e-06, "loss": 0.7311, "step": 14775 }, { "epoch": 0.45792539348122446, "grad_norm": 4.049986943134284, "learning_rate": 2.853205922640402e-06, "loss": 0.7121, "step": 14780 }, { "epoch": 0.45808030734911387, "grad_norm": 2.5566322896276197, "learning_rate": 2.8523905811753964e-06, "loss": 0.6955, "step": 14785 }, { "epoch": 0.45823522121700333, "grad_norm": 3.068839474285069, "learning_rate": 2.851575239710391e-06, "loss": 0.7787, "step": 14790 }, { "epoch": 0.4583901350848928, "grad_norm": 4.3850609569355905, "learning_rate": 2.8507598982453853e-06, "loss": 0.7301, "step": 14795 }, { "epoch": 0.45854504895278225, "grad_norm": 2.595504089203859, "learning_rate": 2.84994455678038e-06, "loss": 0.672, "step": 14800 }, { "epoch": 0.4586999628206717, "grad_norm": 4.4522545894469845, "learning_rate": 2.8491292153153742e-06, "loss": 0.7043, "step": 14805 }, { "epoch": 0.4588548766885612, "grad_norm": 6.368837277287551, "learning_rate": 2.8483138738503687e-06, "loss": 0.7502, "step": 14810 }, { "epoch": 0.45900979055645064, "grad_norm": 2.251707317427817, "learning_rate": 2.847498532385363e-06, "loss": 0.7287, "step": 14815 }, { "epoch": 0.45916470442434004, "grad_norm": 3.037613302190073, "learning_rate": 2.846683190920358e-06, "loss": 0.6725, "step": 14820 }, { "epoch": 0.4593196182922295, "grad_norm": 2.9184124670626432, "learning_rate": 2.845867849455352e-06, "loss": 0.6348, "step": 14825 }, { "epoch": 0.45947453216011896, "grad_norm": 2.5726383218570104, "learning_rate": 2.845052507990347e-06, "loss": 0.7437, "step": 14830 }, { "epoch": 0.4596294460280084, "grad_norm": 2.68873284891003, "learning_rate": 2.844237166525341e-06, "loss": 0.716, "step": 14835 }, { "epoch": 0.4597843598958979, "grad_norm": 2.4620541242454728, "learning_rate": 2.843421825060336e-06, "loss": 0.7117, "step": 14840 }, { "epoch": 0.45993927376378735, "grad_norm": 2.615292736770012, "learning_rate": 2.84260648359533e-06, "loss": 0.7283, "step": 14845 }, { "epoch": 0.4600941876316768, "grad_norm": 3.6895927380045106, "learning_rate": 2.8417911421303244e-06, "loss": 0.683, "step": 14850 }, { "epoch": 0.4602491014995662, "grad_norm": 2.681313913308637, "learning_rate": 2.840975800665319e-06, "loss": 0.7299, "step": 14855 }, { "epoch": 0.4604040153674557, "grad_norm": 3.373883966041823, "learning_rate": 2.8401604592003133e-06, "loss": 0.6775, "step": 14860 }, { "epoch": 0.46055892923534514, "grad_norm": 3.3356418316073957, "learning_rate": 2.8393451177353078e-06, "loss": 0.6563, "step": 14865 }, { "epoch": 0.4607138431032346, "grad_norm": 2.5363643316297755, "learning_rate": 2.8385297762703022e-06, "loss": 0.6682, "step": 14870 }, { "epoch": 0.46086875697112406, "grad_norm": 2.462757179173649, "learning_rate": 2.8377144348052967e-06, "loss": 0.8199, "step": 14875 }, { "epoch": 0.4610236708390135, "grad_norm": 2.5093105894958314, "learning_rate": 2.836899093340291e-06, "loss": 0.6768, "step": 14880 }, { "epoch": 0.461178584706903, "grad_norm": 2.7882142389697293, "learning_rate": 2.8360837518752856e-06, "loss": 0.6277, "step": 14885 }, { "epoch": 0.4613334985747924, "grad_norm": 2.8681445972132855, "learning_rate": 2.83526841041028e-06, "loss": 0.7421, "step": 14890 }, { "epoch": 0.46148841244268185, "grad_norm": 2.8985326471328494, "learning_rate": 2.834453068945274e-06, "loss": 0.7054, "step": 14895 }, { "epoch": 0.4616433263105713, "grad_norm": 2.516728173329944, "learning_rate": 2.833637727480269e-06, "loss": 0.7283, "step": 14900 }, { "epoch": 0.4617982401784608, "grad_norm": 2.363474229149989, "learning_rate": 2.832822386015263e-06, "loss": 0.6782, "step": 14905 }, { "epoch": 0.46195315404635023, "grad_norm": 2.9032501651342133, "learning_rate": 2.832007044550258e-06, "loss": 0.7139, "step": 14910 }, { "epoch": 0.4621080679142397, "grad_norm": 2.533674005588855, "learning_rate": 2.8311917030852528e-06, "loss": 0.7356, "step": 14915 }, { "epoch": 0.46226298178212916, "grad_norm": 2.943887887855059, "learning_rate": 2.830376361620247e-06, "loss": 0.7705, "step": 14920 }, { "epoch": 0.46241789565001856, "grad_norm": 2.3328304059404292, "learning_rate": 2.8295610201552413e-06, "loss": 0.656, "step": 14925 }, { "epoch": 0.462572809517908, "grad_norm": 2.3138717688503543, "learning_rate": 2.8287456786902357e-06, "loss": 0.6091, "step": 14930 }, { "epoch": 0.4627277233857975, "grad_norm": 2.744339721216924, "learning_rate": 2.82793033722523e-06, "loss": 0.7262, "step": 14935 }, { "epoch": 0.46288263725368695, "grad_norm": 3.5658602336641443, "learning_rate": 2.8271149957602246e-06, "loss": 0.7244, "step": 14940 }, { "epoch": 0.4630375511215764, "grad_norm": 1.784272534235271, "learning_rate": 2.826299654295219e-06, "loss": 0.6941, "step": 14945 }, { "epoch": 0.46319246498946587, "grad_norm": 2.3659571861050384, "learning_rate": 2.8254843128302136e-06, "loss": 0.6871, "step": 14950 }, { "epoch": 0.46334737885735533, "grad_norm": 2.6309751939524686, "learning_rate": 2.824668971365208e-06, "loss": 0.7489, "step": 14955 }, { "epoch": 0.4635022927252448, "grad_norm": 2.2107640084991584, "learning_rate": 2.8238536299002025e-06, "loss": 0.7059, "step": 14960 }, { "epoch": 0.4636572065931342, "grad_norm": 3.9508538197808742, "learning_rate": 2.823038288435197e-06, "loss": 0.6914, "step": 14965 }, { "epoch": 0.46381212046102366, "grad_norm": 2.8403926994862534, "learning_rate": 2.822222946970191e-06, "loss": 0.6963, "step": 14970 }, { "epoch": 0.4639670343289131, "grad_norm": 2.7874605866671263, "learning_rate": 2.821407605505186e-06, "loss": 0.6878, "step": 14975 }, { "epoch": 0.4641219481968026, "grad_norm": 2.725283070195321, "learning_rate": 2.82059226404018e-06, "loss": 0.7083, "step": 14980 }, { "epoch": 0.46427686206469204, "grad_norm": 4.878379125152868, "learning_rate": 2.8197769225751748e-06, "loss": 0.7257, "step": 14985 }, { "epoch": 0.4644317759325815, "grad_norm": 2.2794935137487182, "learning_rate": 2.818961581110169e-06, "loss": 0.7338, "step": 14990 }, { "epoch": 0.46458668980047096, "grad_norm": 2.2364092166308773, "learning_rate": 2.8181462396451637e-06, "loss": 0.7278, "step": 14995 }, { "epoch": 0.46474160366836037, "grad_norm": 2.5784926090317914, "learning_rate": 2.8173308981801577e-06, "loss": 0.6858, "step": 15000 }, { "epoch": 0.46489651753624983, "grad_norm": 2.698942981533712, "learning_rate": 2.8165155567151526e-06, "loss": 0.664, "step": 15005 }, { "epoch": 0.4650514314041393, "grad_norm": 2.580586793476587, "learning_rate": 2.815700215250147e-06, "loss": 0.761, "step": 15010 }, { "epoch": 0.46520634527202875, "grad_norm": 2.57505635207317, "learning_rate": 2.8148848737851415e-06, "loss": 0.6934, "step": 15015 }, { "epoch": 0.4653612591399182, "grad_norm": 3.0152578007522717, "learning_rate": 2.814069532320136e-06, "loss": 0.7897, "step": 15020 }, { "epoch": 0.4655161730078077, "grad_norm": 3.1741606003617013, "learning_rate": 2.8132541908551305e-06, "loss": 0.7732, "step": 15025 }, { "epoch": 0.46567108687569714, "grad_norm": 2.6012761087471445, "learning_rate": 2.812438849390125e-06, "loss": 0.6922, "step": 15030 }, { "epoch": 0.46582600074358654, "grad_norm": 2.616175988051008, "learning_rate": 2.8116235079251194e-06, "loss": 0.6766, "step": 15035 }, { "epoch": 0.465980914611476, "grad_norm": 2.9595671052229955, "learning_rate": 2.810808166460114e-06, "loss": 0.6936, "step": 15040 }, { "epoch": 0.46613582847936547, "grad_norm": 2.9308534628281793, "learning_rate": 2.809992824995108e-06, "loss": 0.7344, "step": 15045 }, { "epoch": 0.4662907423472549, "grad_norm": 2.7247983970973886, "learning_rate": 2.8091774835301027e-06, "loss": 0.6973, "step": 15050 }, { "epoch": 0.4664456562151444, "grad_norm": 2.20563997883458, "learning_rate": 2.8083621420650968e-06, "loss": 0.7506, "step": 15055 }, { "epoch": 0.46660057008303385, "grad_norm": 2.854161596754073, "learning_rate": 2.8075468006000917e-06, "loss": 0.6567, "step": 15060 }, { "epoch": 0.4667554839509233, "grad_norm": 2.6993761401695604, "learning_rate": 2.8067314591350857e-06, "loss": 0.6508, "step": 15065 }, { "epoch": 0.4669103978188127, "grad_norm": 3.104129380758062, "learning_rate": 2.8059161176700806e-06, "loss": 0.6904, "step": 15070 }, { "epoch": 0.4670653116867022, "grad_norm": 2.9510309400581796, "learning_rate": 2.8051007762050746e-06, "loss": 0.8076, "step": 15075 }, { "epoch": 0.46722022555459164, "grad_norm": 2.3206321766005886, "learning_rate": 2.8042854347400695e-06, "loss": 0.7333, "step": 15080 }, { "epoch": 0.4673751394224811, "grad_norm": 4.048749471034624, "learning_rate": 2.8034700932750635e-06, "loss": 0.7227, "step": 15085 }, { "epoch": 0.46753005329037056, "grad_norm": 2.4111114817872616, "learning_rate": 2.8026547518100584e-06, "loss": 0.7396, "step": 15090 }, { "epoch": 0.46768496715826, "grad_norm": 3.832500455552073, "learning_rate": 2.8018394103450525e-06, "loss": 0.7368, "step": 15095 }, { "epoch": 0.4678398810261495, "grad_norm": 3.086671093657988, "learning_rate": 2.8010240688800473e-06, "loss": 0.6861, "step": 15100 }, { "epoch": 0.4679947948940389, "grad_norm": 2.105019263349876, "learning_rate": 2.800208727415042e-06, "loss": 0.7019, "step": 15105 }, { "epoch": 0.46814970876192835, "grad_norm": 3.0294826373032566, "learning_rate": 2.7993933859500363e-06, "loss": 0.8097, "step": 15110 }, { "epoch": 0.4683046226298178, "grad_norm": 2.8418061324553756, "learning_rate": 2.7985780444850307e-06, "loss": 0.635, "step": 15115 }, { "epoch": 0.4684595364977073, "grad_norm": 2.4990074149284145, "learning_rate": 2.7977627030200248e-06, "loss": 0.6131, "step": 15120 }, { "epoch": 0.46861445036559674, "grad_norm": 2.945506027667919, "learning_rate": 2.7969473615550196e-06, "loss": 0.7103, "step": 15125 }, { "epoch": 0.4687693642334862, "grad_norm": 2.8268288314624876, "learning_rate": 2.7961320200900137e-06, "loss": 0.7476, "step": 15130 }, { "epoch": 0.46892427810137566, "grad_norm": 2.1504671086294476, "learning_rate": 2.7953166786250086e-06, "loss": 0.6755, "step": 15135 }, { "epoch": 0.46907919196926506, "grad_norm": 2.762308687506683, "learning_rate": 2.7945013371600026e-06, "loss": 0.762, "step": 15140 }, { "epoch": 0.4692341058371545, "grad_norm": 2.4660405568624717, "learning_rate": 2.7936859956949975e-06, "loss": 0.6925, "step": 15145 }, { "epoch": 0.469389019705044, "grad_norm": 2.7390371232387847, "learning_rate": 2.7928706542299915e-06, "loss": 0.6358, "step": 15150 }, { "epoch": 0.46954393357293345, "grad_norm": 2.2375438943910066, "learning_rate": 2.7920553127649864e-06, "loss": 0.6823, "step": 15155 }, { "epoch": 0.4696988474408229, "grad_norm": 2.714399599983898, "learning_rate": 2.7912399712999804e-06, "loss": 0.7034, "step": 15160 }, { "epoch": 0.46985376130871237, "grad_norm": 2.8839198369138574, "learning_rate": 2.7904246298349753e-06, "loss": 0.6974, "step": 15165 }, { "epoch": 0.47000867517660183, "grad_norm": 2.863995247643269, "learning_rate": 2.7896092883699693e-06, "loss": 0.6728, "step": 15170 }, { "epoch": 0.47016358904449124, "grad_norm": 3.713779880843263, "learning_rate": 2.7887939469049642e-06, "loss": 0.6375, "step": 15175 }, { "epoch": 0.4703185029123807, "grad_norm": 3.230515646729585, "learning_rate": 2.7879786054399583e-06, "loss": 0.7133, "step": 15180 }, { "epoch": 0.47047341678027016, "grad_norm": 2.8273998890947882, "learning_rate": 2.787163263974953e-06, "loss": 0.6041, "step": 15185 }, { "epoch": 0.4706283306481596, "grad_norm": 2.8842487573310347, "learning_rate": 2.786347922509947e-06, "loss": 0.7168, "step": 15190 }, { "epoch": 0.4707832445160491, "grad_norm": 2.59473905585187, "learning_rate": 2.7855325810449416e-06, "loss": 0.6511, "step": 15195 }, { "epoch": 0.47093815838393854, "grad_norm": 2.3849304219704783, "learning_rate": 2.7847172395799365e-06, "loss": 0.7603, "step": 15200 }, { "epoch": 0.471093072251828, "grad_norm": 2.6552012052037517, "learning_rate": 2.7839018981149306e-06, "loss": 0.724, "step": 15205 }, { "epoch": 0.4712479861197174, "grad_norm": 3.395983607064755, "learning_rate": 2.7830865566499254e-06, "loss": 0.6579, "step": 15210 }, { "epoch": 0.47140289998760687, "grad_norm": 3.500041250896869, "learning_rate": 2.7822712151849195e-06, "loss": 0.7367, "step": 15215 }, { "epoch": 0.47155781385549633, "grad_norm": 2.262355944045136, "learning_rate": 2.7814558737199144e-06, "loss": 0.7007, "step": 15220 }, { "epoch": 0.4717127277233858, "grad_norm": 2.2090577698615634, "learning_rate": 2.7806405322549084e-06, "loss": 0.7375, "step": 15225 }, { "epoch": 0.47186764159127526, "grad_norm": 2.860680924125809, "learning_rate": 2.7798251907899033e-06, "loss": 0.6411, "step": 15230 }, { "epoch": 0.4720225554591647, "grad_norm": 2.2970086934704272, "learning_rate": 2.7790098493248973e-06, "loss": 0.655, "step": 15235 }, { "epoch": 0.4721774693270542, "grad_norm": 3.008176977634337, "learning_rate": 2.778194507859892e-06, "loss": 0.7109, "step": 15240 }, { "epoch": 0.47233238319494364, "grad_norm": 2.9349832657717987, "learning_rate": 2.7773791663948862e-06, "loss": 0.6717, "step": 15245 }, { "epoch": 0.47248729706283304, "grad_norm": 3.0833417973290644, "learning_rate": 2.776563824929881e-06, "loss": 0.7391, "step": 15250 }, { "epoch": 0.4726422109307225, "grad_norm": 2.312418716830519, "learning_rate": 2.775748483464875e-06, "loss": 0.6484, "step": 15255 }, { "epoch": 0.47279712479861197, "grad_norm": 2.7704430441818992, "learning_rate": 2.77493314199987e-06, "loss": 0.7121, "step": 15260 }, { "epoch": 0.47295203866650143, "grad_norm": 3.3515253529250657, "learning_rate": 2.774117800534864e-06, "loss": 0.7083, "step": 15265 }, { "epoch": 0.4731069525343909, "grad_norm": 2.9975829403024603, "learning_rate": 2.7733024590698585e-06, "loss": 0.7318, "step": 15270 }, { "epoch": 0.47326186640228035, "grad_norm": 3.1208429042613837, "learning_rate": 2.772487117604853e-06, "loss": 0.7307, "step": 15275 }, { "epoch": 0.4734167802701698, "grad_norm": 3.3166403952739154, "learning_rate": 2.7716717761398474e-06, "loss": 0.7276, "step": 15280 }, { "epoch": 0.4735716941380592, "grad_norm": 2.6718835611603198, "learning_rate": 2.770856434674842e-06, "loss": 0.7551, "step": 15285 }, { "epoch": 0.4737266080059487, "grad_norm": 2.595624572189836, "learning_rate": 2.7700410932098364e-06, "loss": 0.7382, "step": 15290 }, { "epoch": 0.47388152187383814, "grad_norm": 2.7521701544935318, "learning_rate": 2.7692257517448312e-06, "loss": 0.7125, "step": 15295 }, { "epoch": 0.4740364357417276, "grad_norm": 2.9233290441226476, "learning_rate": 2.7684104102798253e-06, "loss": 0.6648, "step": 15300 }, { "epoch": 0.47419134960961706, "grad_norm": 3.0873091754364212, "learning_rate": 2.76759506881482e-06, "loss": 0.6533, "step": 15305 }, { "epoch": 0.4743462634775065, "grad_norm": 2.690814800456911, "learning_rate": 2.766779727349814e-06, "loss": 0.6647, "step": 15310 }, { "epoch": 0.474501177345396, "grad_norm": 2.7660888618589126, "learning_rate": 2.765964385884809e-06, "loss": 0.7446, "step": 15315 }, { "epoch": 0.4746560912132854, "grad_norm": 2.372308488021734, "learning_rate": 2.765149044419803e-06, "loss": 0.6871, "step": 15320 }, { "epoch": 0.47481100508117485, "grad_norm": 3.620903557877288, "learning_rate": 2.764333702954798e-06, "loss": 0.7562, "step": 15325 }, { "epoch": 0.4749659189490643, "grad_norm": 2.858408811453041, "learning_rate": 2.763518361489792e-06, "loss": 0.7376, "step": 15330 }, { "epoch": 0.4751208328169538, "grad_norm": 2.6820268467077777, "learning_rate": 2.762703020024787e-06, "loss": 0.6891, "step": 15335 }, { "epoch": 0.47527574668484324, "grad_norm": 3.0712033461288715, "learning_rate": 2.761887678559781e-06, "loss": 0.7479, "step": 15340 }, { "epoch": 0.4754306605527327, "grad_norm": 2.37399455349314, "learning_rate": 2.761072337094776e-06, "loss": 0.6889, "step": 15345 }, { "epoch": 0.47558557442062216, "grad_norm": 3.6085633476404384, "learning_rate": 2.76025699562977e-06, "loss": 0.7158, "step": 15350 }, { "epoch": 0.47574048828851156, "grad_norm": 2.524947705076108, "learning_rate": 2.7594416541647643e-06, "loss": 0.7435, "step": 15355 }, { "epoch": 0.475895402156401, "grad_norm": 2.7719290857386487, "learning_rate": 2.7586263126997588e-06, "loss": 0.6747, "step": 15360 }, { "epoch": 0.4760503160242905, "grad_norm": 2.575105557737504, "learning_rate": 2.7578109712347532e-06, "loss": 0.6492, "step": 15365 }, { "epoch": 0.47620522989217995, "grad_norm": 2.629764654391383, "learning_rate": 2.7569956297697477e-06, "loss": 0.7912, "step": 15370 }, { "epoch": 0.4763601437600694, "grad_norm": 2.934519822029675, "learning_rate": 2.756180288304742e-06, "loss": 0.6779, "step": 15375 }, { "epoch": 0.47651505762795887, "grad_norm": 4.873769718549405, "learning_rate": 2.7553649468397366e-06, "loss": 0.8018, "step": 15380 }, { "epoch": 0.47666997149584833, "grad_norm": 3.4972853688233383, "learning_rate": 2.754549605374731e-06, "loss": 0.6796, "step": 15385 }, { "epoch": 0.47682488536373774, "grad_norm": 2.52122698023555, "learning_rate": 2.753734263909726e-06, "loss": 0.7382, "step": 15390 }, { "epoch": 0.4769797992316272, "grad_norm": 2.7186153713250607, "learning_rate": 2.75291892244472e-06, "loss": 0.6089, "step": 15395 }, { "epoch": 0.47713471309951666, "grad_norm": 2.7906815901006374, "learning_rate": 2.752103580979715e-06, "loss": 0.763, "step": 15400 }, { "epoch": 0.4772896269674061, "grad_norm": 2.7156693677921937, "learning_rate": 2.751288239514709e-06, "loss": 0.7211, "step": 15405 }, { "epoch": 0.4774445408352956, "grad_norm": 2.883190875863044, "learning_rate": 2.750472898049704e-06, "loss": 0.7842, "step": 15410 }, { "epoch": 0.47759945470318504, "grad_norm": 2.297569501806405, "learning_rate": 2.749657556584698e-06, "loss": 0.7264, "step": 15415 }, { "epoch": 0.4777543685710745, "grad_norm": 4.231659985276002, "learning_rate": 2.7488422151196927e-06, "loss": 0.6787, "step": 15420 }, { "epoch": 0.4779092824389639, "grad_norm": 3.1940774032538064, "learning_rate": 2.7480268736546868e-06, "loss": 0.6799, "step": 15425 }, { "epoch": 0.4780641963068534, "grad_norm": 2.7172310848732915, "learning_rate": 2.7472115321896812e-06, "loss": 0.6778, "step": 15430 }, { "epoch": 0.47821911017474283, "grad_norm": 2.5647868787060744, "learning_rate": 2.7463961907246757e-06, "loss": 0.7354, "step": 15435 }, { "epoch": 0.4783740240426323, "grad_norm": 2.9420766558352853, "learning_rate": 2.74558084925967e-06, "loss": 0.6995, "step": 15440 }, { "epoch": 0.47852893791052176, "grad_norm": 1.983091804001487, "learning_rate": 2.7447655077946646e-06, "loss": 0.6663, "step": 15445 }, { "epoch": 0.4786838517784112, "grad_norm": 2.5421194396915223, "learning_rate": 2.743950166329659e-06, "loss": 0.7185, "step": 15450 }, { "epoch": 0.4788387656463007, "grad_norm": 3.027132627628868, "learning_rate": 2.7431348248646535e-06, "loss": 0.7403, "step": 15455 }, { "epoch": 0.4789936795141901, "grad_norm": 2.7490490403566006, "learning_rate": 2.742319483399648e-06, "loss": 0.7299, "step": 15460 }, { "epoch": 0.47914859338207955, "grad_norm": 3.2802127811829527, "learning_rate": 2.741504141934642e-06, "loss": 0.6812, "step": 15465 }, { "epoch": 0.479303507249969, "grad_norm": 2.753682061367291, "learning_rate": 2.740688800469637e-06, "loss": 0.7181, "step": 15470 }, { "epoch": 0.47945842111785847, "grad_norm": 2.9274916760079917, "learning_rate": 2.739873459004631e-06, "loss": 0.7303, "step": 15475 }, { "epoch": 0.47961333498574793, "grad_norm": 2.947978223319988, "learning_rate": 2.739058117539626e-06, "loss": 0.6907, "step": 15480 }, { "epoch": 0.4797682488536374, "grad_norm": 2.5288362495331893, "learning_rate": 2.7382427760746207e-06, "loss": 0.7394, "step": 15485 }, { "epoch": 0.47992316272152685, "grad_norm": 5.074011057272216, "learning_rate": 2.7374274346096147e-06, "loss": 0.7184, "step": 15490 }, { "epoch": 0.48007807658941626, "grad_norm": 2.3506659571708486, "learning_rate": 2.7366120931446096e-06, "loss": 0.7192, "step": 15495 }, { "epoch": 0.4802329904573057, "grad_norm": 2.691883742852137, "learning_rate": 2.7357967516796036e-06, "loss": 0.7123, "step": 15500 }, { "epoch": 0.4803879043251952, "grad_norm": 2.564618130570533, "learning_rate": 2.734981410214598e-06, "loss": 0.6147, "step": 15505 }, { "epoch": 0.48054281819308464, "grad_norm": 3.313464947252274, "learning_rate": 2.7341660687495926e-06, "loss": 0.7784, "step": 15510 }, { "epoch": 0.4806977320609741, "grad_norm": 2.4095656754172436, "learning_rate": 2.733350727284587e-06, "loss": 0.6496, "step": 15515 }, { "epoch": 0.48085264592886356, "grad_norm": 3.301851980719741, "learning_rate": 2.7325353858195815e-06, "loss": 0.6868, "step": 15520 }, { "epoch": 0.481007559796753, "grad_norm": 7.704501164921775, "learning_rate": 2.731720044354576e-06, "loss": 0.7086, "step": 15525 }, { "epoch": 0.48116247366464243, "grad_norm": 2.1437735711210335, "learning_rate": 2.7309047028895704e-06, "loss": 0.6878, "step": 15530 }, { "epoch": 0.4813173875325319, "grad_norm": 2.8103066963603154, "learning_rate": 2.730089361424565e-06, "loss": 0.6687, "step": 15535 }, { "epoch": 0.48147230140042135, "grad_norm": 3.2225396079921143, "learning_rate": 2.729274019959559e-06, "loss": 0.6882, "step": 15540 }, { "epoch": 0.4816272152683108, "grad_norm": 2.713605108149701, "learning_rate": 2.7284586784945538e-06, "loss": 0.7388, "step": 15545 }, { "epoch": 0.4817821291362003, "grad_norm": 2.352909238733934, "learning_rate": 2.727643337029548e-06, "loss": 0.6946, "step": 15550 }, { "epoch": 0.48193704300408974, "grad_norm": 2.9687652123153616, "learning_rate": 2.7268279955645427e-06, "loss": 0.6683, "step": 15555 }, { "epoch": 0.4820919568719792, "grad_norm": 2.989939093878162, "learning_rate": 2.7260126540995367e-06, "loss": 0.6679, "step": 15560 }, { "epoch": 0.48224687073986866, "grad_norm": 2.4039851002440424, "learning_rate": 2.7251973126345316e-06, "loss": 0.6745, "step": 15565 }, { "epoch": 0.48240178460775807, "grad_norm": 3.017366275093779, "learning_rate": 2.7243819711695265e-06, "loss": 0.7417, "step": 15570 }, { "epoch": 0.4825566984756475, "grad_norm": 3.6117563232184224, "learning_rate": 2.7235666297045205e-06, "loss": 0.7431, "step": 15575 }, { "epoch": 0.482711612343537, "grad_norm": 4.077828968133597, "learning_rate": 2.722751288239515e-06, "loss": 0.6965, "step": 15580 }, { "epoch": 0.48286652621142645, "grad_norm": 2.4780538072327407, "learning_rate": 2.7219359467745094e-06, "loss": 0.6657, "step": 15585 }, { "epoch": 0.4830214400793159, "grad_norm": 2.4276477406784602, "learning_rate": 2.721120605309504e-06, "loss": 0.713, "step": 15590 }, { "epoch": 0.4831763539472054, "grad_norm": 2.9455282269716365, "learning_rate": 2.7203052638444984e-06, "loss": 0.6913, "step": 15595 }, { "epoch": 0.48333126781509483, "grad_norm": 2.8673050996020155, "learning_rate": 2.719489922379493e-06, "loss": 0.7134, "step": 15600 }, { "epoch": 0.48348618168298424, "grad_norm": 2.3745199960556027, "learning_rate": 2.7186745809144873e-06, "loss": 0.6761, "step": 15605 }, { "epoch": 0.4836410955508737, "grad_norm": 3.4915522580166005, "learning_rate": 2.7178592394494817e-06, "loss": 0.7437, "step": 15610 }, { "epoch": 0.48379600941876316, "grad_norm": 2.4418981987543122, "learning_rate": 2.7170438979844758e-06, "loss": 0.7253, "step": 15615 }, { "epoch": 0.4839509232866526, "grad_norm": 2.5777278881959966, "learning_rate": 2.7162285565194707e-06, "loss": 0.7086, "step": 15620 }, { "epoch": 0.4841058371545421, "grad_norm": 2.8046413137155377, "learning_rate": 2.7154132150544647e-06, "loss": 0.7142, "step": 15625 }, { "epoch": 0.48426075102243155, "grad_norm": 2.7207717981028603, "learning_rate": 2.7145978735894596e-06, "loss": 0.6944, "step": 15630 }, { "epoch": 0.484415664890321, "grad_norm": 2.7791634530662526, "learning_rate": 2.7137825321244536e-06, "loss": 0.645, "step": 15635 }, { "epoch": 0.4845705787582104, "grad_norm": 2.8177049048740614, "learning_rate": 2.7129671906594485e-06, "loss": 0.7183, "step": 15640 }, { "epoch": 0.4847254926260999, "grad_norm": 3.204871734323002, "learning_rate": 2.7121518491944425e-06, "loss": 0.7021, "step": 15645 }, { "epoch": 0.48488040649398934, "grad_norm": 2.956429741257082, "learning_rate": 2.7113365077294374e-06, "loss": 0.7314, "step": 15650 }, { "epoch": 0.4850353203618788, "grad_norm": 2.167091709111447, "learning_rate": 2.7105211662644315e-06, "loss": 0.6925, "step": 15655 }, { "epoch": 0.48519023422976826, "grad_norm": 2.292045165994776, "learning_rate": 2.7097058247994263e-06, "loss": 0.7545, "step": 15660 }, { "epoch": 0.4853451480976577, "grad_norm": 2.512340139559416, "learning_rate": 2.708890483334421e-06, "loss": 0.8024, "step": 15665 }, { "epoch": 0.4855000619655472, "grad_norm": 2.6856813446639505, "learning_rate": 2.7080751418694153e-06, "loss": 0.7053, "step": 15670 }, { "epoch": 0.4856549758334366, "grad_norm": 3.11878180905822, "learning_rate": 2.7072598004044097e-06, "loss": 0.7127, "step": 15675 }, { "epoch": 0.48580988970132605, "grad_norm": 2.6566415064153213, "learning_rate": 2.706444458939404e-06, "loss": 0.7041, "step": 15680 }, { "epoch": 0.4859648035692155, "grad_norm": 2.37327862403568, "learning_rate": 2.7056291174743986e-06, "loss": 0.661, "step": 15685 }, { "epoch": 0.48611971743710497, "grad_norm": 2.3962204017842224, "learning_rate": 2.7048137760093927e-06, "loss": 0.6854, "step": 15690 }, { "epoch": 0.48627463130499443, "grad_norm": 2.3625900003714175, "learning_rate": 2.7039984345443875e-06, "loss": 0.6594, "step": 15695 }, { "epoch": 0.4864295451728839, "grad_norm": 2.4744334409928364, "learning_rate": 2.7031830930793816e-06, "loss": 0.6896, "step": 15700 }, { "epoch": 0.48658445904077335, "grad_norm": 2.3005365548261616, "learning_rate": 2.7023677516143765e-06, "loss": 0.6885, "step": 15705 }, { "epoch": 0.48673937290866276, "grad_norm": 3.535847974598457, "learning_rate": 2.7015524101493705e-06, "loss": 0.765, "step": 15710 }, { "epoch": 0.4868942867765522, "grad_norm": 2.5375178741220012, "learning_rate": 2.7007370686843654e-06, "loss": 0.7481, "step": 15715 }, { "epoch": 0.4870492006444417, "grad_norm": 3.9489847924636554, "learning_rate": 2.6999217272193594e-06, "loss": 0.7399, "step": 15720 }, { "epoch": 0.48720411451233114, "grad_norm": 2.6629130038816555, "learning_rate": 2.6991063857543543e-06, "loss": 0.7067, "step": 15725 }, { "epoch": 0.4873590283802206, "grad_norm": 3.0725144045420296, "learning_rate": 2.6982910442893483e-06, "loss": 0.7152, "step": 15730 }, { "epoch": 0.48751394224811007, "grad_norm": 3.2230149650633866, "learning_rate": 2.6974757028243432e-06, "loss": 0.6723, "step": 15735 }, { "epoch": 0.4876688561159995, "grad_norm": 3.0824803943556636, "learning_rate": 2.6966603613593373e-06, "loss": 0.7821, "step": 15740 }, { "epoch": 0.48782376998388893, "grad_norm": 3.0897273891248442, "learning_rate": 2.695845019894332e-06, "loss": 0.7641, "step": 15745 }, { "epoch": 0.4879786838517784, "grad_norm": 2.2751125857756467, "learning_rate": 2.695029678429326e-06, "loss": 0.6472, "step": 15750 }, { "epoch": 0.48813359771966786, "grad_norm": 2.925241614988877, "learning_rate": 2.694214336964321e-06, "loss": 0.6974, "step": 15755 }, { "epoch": 0.4882885115875573, "grad_norm": 2.267892443374752, "learning_rate": 2.6933989954993155e-06, "loss": 0.6755, "step": 15760 }, { "epoch": 0.4884434254554468, "grad_norm": 2.6861281866885838, "learning_rate": 2.69258365403431e-06, "loss": 0.6837, "step": 15765 }, { "epoch": 0.48859833932333624, "grad_norm": 2.847149856985284, "learning_rate": 2.6917683125693044e-06, "loss": 0.6847, "step": 15770 }, { "epoch": 0.4887532531912257, "grad_norm": 2.7463785531794835, "learning_rate": 2.6909529711042985e-06, "loss": 0.7271, "step": 15775 }, { "epoch": 0.4889081670591151, "grad_norm": 3.2806968170098543, "learning_rate": 2.6901376296392934e-06, "loss": 0.769, "step": 15780 }, { "epoch": 0.48906308092700457, "grad_norm": 2.5937297958078926, "learning_rate": 2.6893222881742874e-06, "loss": 0.7735, "step": 15785 }, { "epoch": 0.48921799479489403, "grad_norm": 3.9921630322009864, "learning_rate": 2.6885069467092823e-06, "loss": 0.71, "step": 15790 }, { "epoch": 0.4893729086627835, "grad_norm": 2.476536543531912, "learning_rate": 2.6876916052442763e-06, "loss": 0.7286, "step": 15795 }, { "epoch": 0.48952782253067295, "grad_norm": 2.184323297380938, "learning_rate": 2.686876263779271e-06, "loss": 0.7022, "step": 15800 }, { "epoch": 0.4896827363985624, "grad_norm": 3.163720684672886, "learning_rate": 2.6860609223142652e-06, "loss": 0.7036, "step": 15805 }, { "epoch": 0.4898376502664519, "grad_norm": 1.9375092591454004, "learning_rate": 2.68524558084926e-06, "loss": 0.5956, "step": 15810 }, { "epoch": 0.4899925641343413, "grad_norm": 2.5180034195490166, "learning_rate": 2.684430239384254e-06, "loss": 0.6357, "step": 15815 }, { "epoch": 0.49014747800223074, "grad_norm": 2.896745453340977, "learning_rate": 2.683614897919249e-06, "loss": 0.7169, "step": 15820 }, { "epoch": 0.4903023918701202, "grad_norm": 3.272391929686873, "learning_rate": 2.682799556454243e-06, "loss": 0.6817, "step": 15825 }, { "epoch": 0.49045730573800966, "grad_norm": 2.5871085870860235, "learning_rate": 2.681984214989238e-06, "loss": 0.6353, "step": 15830 }, { "epoch": 0.4906122196058991, "grad_norm": 2.5252155940643792, "learning_rate": 2.681168873524232e-06, "loss": 0.6834, "step": 15835 }, { "epoch": 0.4907671334737886, "grad_norm": 2.3888435654699047, "learning_rate": 2.680353532059227e-06, "loss": 0.7246, "step": 15840 }, { "epoch": 0.49092204734167805, "grad_norm": 2.4745466533175637, "learning_rate": 2.679538190594221e-06, "loss": 0.7193, "step": 15845 }, { "epoch": 0.49107696120956745, "grad_norm": 2.841237181823049, "learning_rate": 2.6787228491292154e-06, "loss": 0.6707, "step": 15850 }, { "epoch": 0.4912318750774569, "grad_norm": 2.7973176941470874, "learning_rate": 2.6779075076642102e-06, "loss": 0.7407, "step": 15855 }, { "epoch": 0.4913867889453464, "grad_norm": 4.267553961305535, "learning_rate": 2.6770921661992043e-06, "loss": 0.6912, "step": 15860 }, { "epoch": 0.49154170281323584, "grad_norm": 2.062527454371141, "learning_rate": 2.676276824734199e-06, "loss": 0.7571, "step": 15865 }, { "epoch": 0.4916966166811253, "grad_norm": 2.326735424001839, "learning_rate": 2.675461483269193e-06, "loss": 0.7037, "step": 15870 }, { "epoch": 0.49185153054901476, "grad_norm": 2.3948746176652094, "learning_rate": 2.674646141804188e-06, "loss": 0.7492, "step": 15875 }, { "epoch": 0.4920064444169042, "grad_norm": 2.573116982943579, "learning_rate": 2.673830800339182e-06, "loss": 0.6939, "step": 15880 }, { "epoch": 0.4921613582847937, "grad_norm": 2.887638656188618, "learning_rate": 2.673015458874177e-06, "loss": 0.6912, "step": 15885 }, { "epoch": 0.4923162721526831, "grad_norm": 2.2553943881469407, "learning_rate": 2.672200117409171e-06, "loss": 0.7497, "step": 15890 }, { "epoch": 0.49247118602057255, "grad_norm": 2.603648451322513, "learning_rate": 2.671384775944166e-06, "loss": 0.7974, "step": 15895 }, { "epoch": 0.492626099888462, "grad_norm": 2.7644405238833727, "learning_rate": 2.67056943447916e-06, "loss": 0.7064, "step": 15900 }, { "epoch": 0.49278101375635147, "grad_norm": 3.6186385237616094, "learning_rate": 2.669754093014155e-06, "loss": 0.7639, "step": 15905 }, { "epoch": 0.49293592762424093, "grad_norm": 2.6026357269779665, "learning_rate": 2.668938751549149e-06, "loss": 0.6632, "step": 15910 }, { "epoch": 0.4930908414921304, "grad_norm": 2.1362662668516883, "learning_rate": 2.6681234100841438e-06, "loss": 0.7032, "step": 15915 }, { "epoch": 0.49324575536001986, "grad_norm": 3.5054072607043905, "learning_rate": 2.6673080686191378e-06, "loss": 0.6592, "step": 15920 }, { "epoch": 0.49340066922790926, "grad_norm": 2.43595481762166, "learning_rate": 2.6664927271541322e-06, "loss": 0.6134, "step": 15925 }, { "epoch": 0.4935555830957987, "grad_norm": 3.522466740756712, "learning_rate": 2.6656773856891267e-06, "loss": 0.8114, "step": 15930 }, { "epoch": 0.4937104969636882, "grad_norm": 3.393131582318712, "learning_rate": 2.664862044224121e-06, "loss": 0.6815, "step": 15935 }, { "epoch": 0.49386541083157764, "grad_norm": 2.4884588062812933, "learning_rate": 2.6640467027591156e-06, "loss": 0.7479, "step": 15940 }, { "epoch": 0.4940203246994671, "grad_norm": 2.9858185681852927, "learning_rate": 2.66323136129411e-06, "loss": 0.7435, "step": 15945 }, { "epoch": 0.49417523856735657, "grad_norm": 2.405375664614744, "learning_rate": 2.662416019829105e-06, "loss": 0.6646, "step": 15950 }, { "epoch": 0.49433015243524603, "grad_norm": 2.798491108275006, "learning_rate": 2.661600678364099e-06, "loss": 0.7103, "step": 15955 }, { "epoch": 0.49448506630313543, "grad_norm": 2.5421997995427037, "learning_rate": 2.660785336899094e-06, "loss": 0.6028, "step": 15960 }, { "epoch": 0.4946399801710249, "grad_norm": 2.909154798154921, "learning_rate": 2.659969995434088e-06, "loss": 0.6734, "step": 15965 }, { "epoch": 0.49479489403891436, "grad_norm": 3.80098386265206, "learning_rate": 2.659154653969083e-06, "loss": 0.706, "step": 15970 }, { "epoch": 0.4949498079068038, "grad_norm": 3.125503918068725, "learning_rate": 2.658339312504077e-06, "loss": 0.7315, "step": 15975 }, { "epoch": 0.4951047217746933, "grad_norm": 2.686668923960575, "learning_rate": 2.6575239710390717e-06, "loss": 0.6551, "step": 15980 }, { "epoch": 0.49525963564258274, "grad_norm": 2.108044401177533, "learning_rate": 2.6567086295740658e-06, "loss": 0.7241, "step": 15985 }, { "epoch": 0.4954145495104722, "grad_norm": 9.785906024125707, "learning_rate": 2.6558932881090606e-06, "loss": 0.6925, "step": 15990 }, { "epoch": 0.4955694633783616, "grad_norm": 2.581613181671664, "learning_rate": 2.6550779466440547e-06, "loss": 0.6906, "step": 15995 }, { "epoch": 0.49572437724625107, "grad_norm": 2.5403902447906472, "learning_rate": 2.654262605179049e-06, "loss": 0.6348, "step": 16000 }, { "epoch": 0.49587929111414053, "grad_norm": 3.370829721146519, "learning_rate": 2.6534472637140436e-06, "loss": 0.6944, "step": 16005 }, { "epoch": 0.49603420498203, "grad_norm": 2.4219087445926464, "learning_rate": 2.652631922249038e-06, "loss": 0.6967, "step": 16010 }, { "epoch": 0.49618911884991945, "grad_norm": 2.6818579371366402, "learning_rate": 2.6518165807840325e-06, "loss": 0.684, "step": 16015 }, { "epoch": 0.4963440327178089, "grad_norm": 3.0002917047171676, "learning_rate": 2.651001239319027e-06, "loss": 0.6458, "step": 16020 }, { "epoch": 0.4964989465856984, "grad_norm": 2.347519264228077, "learning_rate": 2.6501858978540214e-06, "loss": 0.7901, "step": 16025 }, { "epoch": 0.4966538604535878, "grad_norm": 4.218302501497192, "learning_rate": 2.649370556389016e-06, "loss": 0.7792, "step": 16030 }, { "epoch": 0.49680877432147724, "grad_norm": 3.274882620012785, "learning_rate": 2.64855521492401e-06, "loss": 0.7122, "step": 16035 }, { "epoch": 0.4969636881893667, "grad_norm": 2.4094009072636218, "learning_rate": 2.647739873459005e-06, "loss": 0.7014, "step": 16040 }, { "epoch": 0.49711860205725616, "grad_norm": 3.3280249157993302, "learning_rate": 2.6469245319939997e-06, "loss": 0.7874, "step": 16045 }, { "epoch": 0.4972735159251456, "grad_norm": 2.940134618323839, "learning_rate": 2.6461091905289937e-06, "loss": 0.7303, "step": 16050 }, { "epoch": 0.4974284297930351, "grad_norm": 2.3679751490795846, "learning_rate": 2.6452938490639886e-06, "loss": 0.6536, "step": 16055 }, { "epoch": 0.49758334366092455, "grad_norm": 2.4837795752774476, "learning_rate": 2.6444785075989826e-06, "loss": 0.7442, "step": 16060 }, { "epoch": 0.49773825752881395, "grad_norm": 2.7172897079659823, "learning_rate": 2.6436631661339775e-06, "loss": 0.7699, "step": 16065 }, { "epoch": 0.4978931713967034, "grad_norm": 2.4749835026431817, "learning_rate": 2.6428478246689716e-06, "loss": 0.6983, "step": 16070 }, { "epoch": 0.4980480852645929, "grad_norm": 2.7825966075468274, "learning_rate": 2.642032483203966e-06, "loss": 0.7124, "step": 16075 }, { "epoch": 0.49820299913248234, "grad_norm": 2.966759015953364, "learning_rate": 2.6412171417389605e-06, "loss": 0.7531, "step": 16080 }, { "epoch": 0.4983579130003718, "grad_norm": 2.7508250476402, "learning_rate": 2.640401800273955e-06, "loss": 0.73, "step": 16085 }, { "epoch": 0.49851282686826126, "grad_norm": 2.57197691215971, "learning_rate": 2.6395864588089494e-06, "loss": 0.6964, "step": 16090 }, { "epoch": 0.4986677407361507, "grad_norm": 2.667752563919801, "learning_rate": 2.638771117343944e-06, "loss": 0.6968, "step": 16095 }, { "epoch": 0.4988226546040401, "grad_norm": 2.9638176054025407, "learning_rate": 2.6379557758789383e-06, "loss": 0.7334, "step": 16100 }, { "epoch": 0.4989775684719296, "grad_norm": 2.112565699245795, "learning_rate": 2.6371404344139328e-06, "loss": 0.6753, "step": 16105 }, { "epoch": 0.49913248233981905, "grad_norm": 3.2686689649584872, "learning_rate": 2.6363250929489272e-06, "loss": 0.7037, "step": 16110 }, { "epoch": 0.4992873962077085, "grad_norm": 2.5265394307564293, "learning_rate": 2.6355097514839217e-06, "loss": 0.6473, "step": 16115 }, { "epoch": 0.499442310075598, "grad_norm": 2.2987379973495035, "learning_rate": 2.6346944100189157e-06, "loss": 0.6623, "step": 16120 }, { "epoch": 0.49959722394348743, "grad_norm": 2.634874857710183, "learning_rate": 2.6338790685539106e-06, "loss": 0.706, "step": 16125 }, { "epoch": 0.4997521378113769, "grad_norm": 2.515137917253068, "learning_rate": 2.6330637270889046e-06, "loss": 0.756, "step": 16130 }, { "epoch": 0.4999070516792663, "grad_norm": 2.2849239567971953, "learning_rate": 2.6322483856238995e-06, "loss": 0.7532, "step": 16135 }, { "epoch": 0.5000619655471558, "grad_norm": 2.7332886106833656, "learning_rate": 2.6314330441588944e-06, "loss": 0.7087, "step": 16140 }, { "epoch": 0.5002168794150452, "grad_norm": 2.7071281540781404, "learning_rate": 2.6306177026938884e-06, "loss": 0.7328, "step": 16145 }, { "epoch": 0.5003717932829347, "grad_norm": 2.5077325912676587, "learning_rate": 2.629802361228883e-06, "loss": 0.6759, "step": 16150 }, { "epoch": 0.5005267071508241, "grad_norm": 2.729271242337346, "learning_rate": 2.6289870197638774e-06, "loss": 0.7269, "step": 16155 }, { "epoch": 0.5006816210187136, "grad_norm": 2.4232841434069403, "learning_rate": 2.628171678298872e-06, "loss": 0.6721, "step": 16160 }, { "epoch": 0.5008365348866031, "grad_norm": 3.833730795630698, "learning_rate": 2.6273563368338663e-06, "loss": 0.7719, "step": 16165 }, { "epoch": 0.5009914487544925, "grad_norm": 2.645000864382605, "learning_rate": 2.6265409953688607e-06, "loss": 0.68, "step": 16170 }, { "epoch": 0.501146362622382, "grad_norm": 2.2641804565993833, "learning_rate": 2.625725653903855e-06, "loss": 0.6956, "step": 16175 }, { "epoch": 0.5013012764902715, "grad_norm": 3.4741673880666135, "learning_rate": 2.6249103124388497e-06, "loss": 0.7267, "step": 16180 }, { "epoch": 0.5014561903581609, "grad_norm": 3.9016028816063244, "learning_rate": 2.624094970973844e-06, "loss": 0.717, "step": 16185 }, { "epoch": 0.5016111042260503, "grad_norm": 2.1144328901213343, "learning_rate": 2.6232796295088386e-06, "loss": 0.6802, "step": 16190 }, { "epoch": 0.5017660180939397, "grad_norm": 2.122928032637293, "learning_rate": 2.6224642880438326e-06, "loss": 0.6558, "step": 16195 }, { "epoch": 0.5019209319618292, "grad_norm": 3.064165817084924, "learning_rate": 2.6216489465788275e-06, "loss": 0.7203, "step": 16200 }, { "epoch": 0.5020758458297186, "grad_norm": 2.61924723657099, "learning_rate": 2.6208336051138215e-06, "loss": 0.659, "step": 16205 }, { "epoch": 0.5022307596976081, "grad_norm": 3.225215541861902, "learning_rate": 2.6200182636488164e-06, "loss": 0.7392, "step": 16210 }, { "epoch": 0.5023856735654976, "grad_norm": 2.6827674412658378, "learning_rate": 2.6192029221838105e-06, "loss": 0.6623, "step": 16215 }, { "epoch": 0.502540587433387, "grad_norm": 4.468391566094449, "learning_rate": 2.6183875807188053e-06, "loss": 0.6967, "step": 16220 }, { "epoch": 0.5026955013012765, "grad_norm": 2.4705574475052705, "learning_rate": 2.6175722392537994e-06, "loss": 0.6205, "step": 16225 }, { "epoch": 0.502850415169166, "grad_norm": 2.780207771859933, "learning_rate": 2.6167568977887943e-06, "loss": 0.6969, "step": 16230 }, { "epoch": 0.5030053290370554, "grad_norm": 2.4290703502457376, "learning_rate": 2.6159415563237887e-06, "loss": 0.7657, "step": 16235 }, { "epoch": 0.5031602429049449, "grad_norm": 2.6408958408510297, "learning_rate": 2.615126214858783e-06, "loss": 0.6322, "step": 16240 }, { "epoch": 0.5033151567728343, "grad_norm": 3.1930697071556406, "learning_rate": 2.6143108733937776e-06, "loss": 0.7743, "step": 16245 }, { "epoch": 0.5034700706407238, "grad_norm": 2.5649834379997154, "learning_rate": 2.613495531928772e-06, "loss": 0.6593, "step": 16250 }, { "epoch": 0.5036249845086133, "grad_norm": 2.90340752637588, "learning_rate": 2.6126801904637665e-06, "loss": 0.7439, "step": 16255 }, { "epoch": 0.5037798983765026, "grad_norm": 3.817608004455777, "learning_rate": 2.611864848998761e-06, "loss": 0.6675, "step": 16260 }, { "epoch": 0.5039348122443921, "grad_norm": 2.788101072562362, "learning_rate": 2.6110495075337555e-06, "loss": 0.6959, "step": 16265 }, { "epoch": 0.5040897261122815, "grad_norm": 2.393152325394426, "learning_rate": 2.6102341660687495e-06, "loss": 0.7355, "step": 16270 }, { "epoch": 0.504244639980171, "grad_norm": 2.9926562252311406, "learning_rate": 2.6094188246037444e-06, "loss": 0.6693, "step": 16275 }, { "epoch": 0.5043995538480605, "grad_norm": 3.0918943100325094, "learning_rate": 2.6086034831387384e-06, "loss": 0.6257, "step": 16280 }, { "epoch": 0.5045544677159499, "grad_norm": 2.3746293468535136, "learning_rate": 2.6077881416737333e-06, "loss": 0.7368, "step": 16285 }, { "epoch": 0.5047093815838394, "grad_norm": 2.6717038762449623, "learning_rate": 2.6069728002087273e-06, "loss": 0.6924, "step": 16290 }, { "epoch": 0.5048642954517288, "grad_norm": 2.2974960159478273, "learning_rate": 2.6061574587437222e-06, "loss": 0.6978, "step": 16295 }, { "epoch": 0.5050192093196183, "grad_norm": 2.3538733559295673, "learning_rate": 2.6053421172787163e-06, "loss": 0.6499, "step": 16300 }, { "epoch": 0.5051741231875078, "grad_norm": 2.88123525743553, "learning_rate": 2.604526775813711e-06, "loss": 0.6325, "step": 16305 }, { "epoch": 0.5053290370553972, "grad_norm": 2.6032132736332922, "learning_rate": 2.603711434348705e-06, "loss": 0.6879, "step": 16310 }, { "epoch": 0.5054839509232867, "grad_norm": 2.692647618885346, "learning_rate": 2.6028960928837e-06, "loss": 0.7636, "step": 16315 }, { "epoch": 0.5056388647911761, "grad_norm": 2.4968965738207176, "learning_rate": 2.602080751418694e-06, "loss": 0.6472, "step": 16320 }, { "epoch": 0.5057937786590656, "grad_norm": 2.239490427189635, "learning_rate": 2.601265409953689e-06, "loss": 0.7806, "step": 16325 }, { "epoch": 0.5059486925269551, "grad_norm": 3.8135629117310565, "learning_rate": 2.6004500684886834e-06, "loss": 0.6499, "step": 16330 }, { "epoch": 0.5061036063948444, "grad_norm": 3.0893176165195517, "learning_rate": 2.599634727023678e-06, "loss": 0.6742, "step": 16335 }, { "epoch": 0.5062585202627339, "grad_norm": 2.5943796790336773, "learning_rate": 2.5988193855586724e-06, "loss": 0.7043, "step": 16340 }, { "epoch": 0.5064134341306233, "grad_norm": 2.8724788626809272, "learning_rate": 2.5980040440936664e-06, "loss": 0.7596, "step": 16345 }, { "epoch": 0.5065683479985128, "grad_norm": 3.013837665976191, "learning_rate": 2.5971887026286613e-06, "loss": 0.6353, "step": 16350 }, { "epoch": 0.5067232618664023, "grad_norm": 2.5094016502874976, "learning_rate": 2.5963733611636553e-06, "loss": 0.6593, "step": 16355 }, { "epoch": 0.5068781757342917, "grad_norm": 2.9878561976385023, "learning_rate": 2.59555801969865e-06, "loss": 0.6339, "step": 16360 }, { "epoch": 0.5070330896021812, "grad_norm": 2.8563255073939984, "learning_rate": 2.5947426782336442e-06, "loss": 0.6952, "step": 16365 }, { "epoch": 0.5071880034700706, "grad_norm": 3.1074009201866186, "learning_rate": 2.593927336768639e-06, "loss": 0.6545, "step": 16370 }, { "epoch": 0.5073429173379601, "grad_norm": 2.7383214754317673, "learning_rate": 2.593111995303633e-06, "loss": 0.6844, "step": 16375 }, { "epoch": 0.5074978312058496, "grad_norm": 2.778467674894624, "learning_rate": 2.592296653838628e-06, "loss": 0.7511, "step": 16380 }, { "epoch": 0.507652745073739, "grad_norm": 2.076508664666337, "learning_rate": 2.591481312373622e-06, "loss": 0.6586, "step": 16385 }, { "epoch": 0.5078076589416285, "grad_norm": 3.2310688569510178, "learning_rate": 2.590665970908617e-06, "loss": 0.7254, "step": 16390 }, { "epoch": 0.507962572809518, "grad_norm": 2.8842218018001473, "learning_rate": 2.589850629443611e-06, "loss": 0.6553, "step": 16395 }, { "epoch": 0.5081174866774074, "grad_norm": 2.8520899604381986, "learning_rate": 2.589035287978606e-06, "loss": 0.6977, "step": 16400 }, { "epoch": 0.5082724005452968, "grad_norm": 2.5663909056981686, "learning_rate": 2.5882199465136e-06, "loss": 0.7215, "step": 16405 }, { "epoch": 0.5084273144131862, "grad_norm": 2.0737558402978857, "learning_rate": 2.5874046050485948e-06, "loss": 0.7158, "step": 16410 }, { "epoch": 0.5085822282810757, "grad_norm": 3.3976545077114366, "learning_rate": 2.586589263583589e-06, "loss": 0.6962, "step": 16415 }, { "epoch": 0.5087371421489651, "grad_norm": 2.231951392264551, "learning_rate": 2.5857739221185833e-06, "loss": 0.6913, "step": 16420 }, { "epoch": 0.5088920560168546, "grad_norm": 2.8034794694480163, "learning_rate": 2.584958580653578e-06, "loss": 0.7303, "step": 16425 }, { "epoch": 0.5090469698847441, "grad_norm": 2.694429386353077, "learning_rate": 2.584143239188572e-06, "loss": 0.634, "step": 16430 }, { "epoch": 0.5092018837526335, "grad_norm": 2.539481364989245, "learning_rate": 2.583327897723567e-06, "loss": 0.7041, "step": 16435 }, { "epoch": 0.509356797620523, "grad_norm": 3.4121624240281894, "learning_rate": 2.582512556258561e-06, "loss": 0.7173, "step": 16440 }, { "epoch": 0.5095117114884125, "grad_norm": 2.303481114279192, "learning_rate": 2.581697214793556e-06, "loss": 0.6182, "step": 16445 }, { "epoch": 0.5096666253563019, "grad_norm": 2.751942501854114, "learning_rate": 2.58088187332855e-06, "loss": 0.7311, "step": 16450 }, { "epoch": 0.5098215392241914, "grad_norm": 2.104377834736048, "learning_rate": 2.580066531863545e-06, "loss": 0.6311, "step": 16455 }, { "epoch": 0.5099764530920808, "grad_norm": 2.7475153231927743, "learning_rate": 2.579251190398539e-06, "loss": 0.7383, "step": 16460 }, { "epoch": 0.5101313669599703, "grad_norm": 2.7864212208051518, "learning_rate": 2.578435848933534e-06, "loss": 0.6869, "step": 16465 }, { "epoch": 0.5102862808278598, "grad_norm": 5.5003699874116885, "learning_rate": 2.577620507468528e-06, "loss": 0.6884, "step": 16470 }, { "epoch": 0.5104411946957491, "grad_norm": 2.6630610807335295, "learning_rate": 2.5768051660035227e-06, "loss": 0.7456, "step": 16475 }, { "epoch": 0.5105961085636386, "grad_norm": 2.389275586590967, "learning_rate": 2.5759898245385168e-06, "loss": 0.6977, "step": 16480 }, { "epoch": 0.510751022431528, "grad_norm": 3.543022734943667, "learning_rate": 2.5751744830735117e-06, "loss": 0.7507, "step": 16485 }, { "epoch": 0.5109059362994175, "grad_norm": 2.2160213502695263, "learning_rate": 2.5743591416085057e-06, "loss": 0.6523, "step": 16490 }, { "epoch": 0.511060850167307, "grad_norm": 2.9947957898081956, "learning_rate": 2.5735438001435e-06, "loss": 0.6812, "step": 16495 }, { "epoch": 0.5112157640351964, "grad_norm": 3.013192440591699, "learning_rate": 2.5727284586784946e-06, "loss": 0.6828, "step": 16500 }, { "epoch": 0.5113706779030859, "grad_norm": 3.147462330509901, "learning_rate": 2.571913117213489e-06, "loss": 0.7291, "step": 16505 }, { "epoch": 0.5115255917709753, "grad_norm": 2.8624603809386806, "learning_rate": 2.5710977757484835e-06, "loss": 0.6652, "step": 16510 }, { "epoch": 0.5116805056388648, "grad_norm": 3.497387402667296, "learning_rate": 2.570282434283478e-06, "loss": 0.7146, "step": 16515 }, { "epoch": 0.5118354195067543, "grad_norm": 3.09510620075894, "learning_rate": 2.569467092818473e-06, "loss": 0.6794, "step": 16520 }, { "epoch": 0.5119903333746437, "grad_norm": 2.407490868583995, "learning_rate": 2.568651751353467e-06, "loss": 0.7294, "step": 16525 }, { "epoch": 0.5121452472425332, "grad_norm": 2.512454575101413, "learning_rate": 2.567836409888462e-06, "loss": 0.7393, "step": 16530 }, { "epoch": 0.5123001611104226, "grad_norm": 2.6597232154885524, "learning_rate": 2.567021068423456e-06, "loss": 0.7591, "step": 16535 }, { "epoch": 0.5124550749783121, "grad_norm": 2.4963166360844116, "learning_rate": 2.5662057269584507e-06, "loss": 0.6641, "step": 16540 }, { "epoch": 0.5126099888462015, "grad_norm": 3.1260741935263083, "learning_rate": 2.5653903854934448e-06, "loss": 0.7168, "step": 16545 }, { "epoch": 0.5127649027140909, "grad_norm": 5.094861417349134, "learning_rate": 2.5645750440284396e-06, "loss": 0.6981, "step": 16550 }, { "epoch": 0.5129198165819804, "grad_norm": 2.4955066227370812, "learning_rate": 2.5637597025634337e-06, "loss": 0.7182, "step": 16555 }, { "epoch": 0.5130747304498698, "grad_norm": 2.6001872751772885, "learning_rate": 2.5629443610984286e-06, "loss": 0.7927, "step": 16560 }, { "epoch": 0.5132296443177593, "grad_norm": 2.3595503110211467, "learning_rate": 2.5621290196334226e-06, "loss": 0.7211, "step": 16565 }, { "epoch": 0.5133845581856488, "grad_norm": 3.9209219827067034, "learning_rate": 2.561313678168417e-06, "loss": 0.7825, "step": 16570 }, { "epoch": 0.5135394720535382, "grad_norm": 3.365919616518665, "learning_rate": 2.5604983367034115e-06, "loss": 0.6576, "step": 16575 }, { "epoch": 0.5136943859214277, "grad_norm": 2.207227373773411, "learning_rate": 2.559682995238406e-06, "loss": 0.7085, "step": 16580 }, { "epoch": 0.5138492997893171, "grad_norm": 2.725786463095645, "learning_rate": 2.5588676537734004e-06, "loss": 0.7467, "step": 16585 }, { "epoch": 0.5140042136572066, "grad_norm": 2.3045410711365673, "learning_rate": 2.558052312308395e-06, "loss": 0.763, "step": 16590 }, { "epoch": 0.5141591275250961, "grad_norm": 2.5335482999570353, "learning_rate": 2.5572369708433893e-06, "loss": 0.7126, "step": 16595 }, { "epoch": 0.5143140413929855, "grad_norm": 3.1006616410920476, "learning_rate": 2.556421629378384e-06, "loss": 0.7583, "step": 16600 }, { "epoch": 0.514468955260875, "grad_norm": 2.7125339345401955, "learning_rate": 2.5556062879133783e-06, "loss": 0.7559, "step": 16605 }, { "epoch": 0.5146238691287645, "grad_norm": 3.157374849158329, "learning_rate": 2.5547909464483727e-06, "loss": 0.7407, "step": 16610 }, { "epoch": 0.5147787829966538, "grad_norm": 2.4854465798654335, "learning_rate": 2.5539756049833676e-06, "loss": 0.7013, "step": 16615 }, { "epoch": 0.5149336968645433, "grad_norm": 2.92578188617319, "learning_rate": 2.5531602635183616e-06, "loss": 0.7249, "step": 16620 }, { "epoch": 0.5150886107324327, "grad_norm": 5.6839884634097455, "learning_rate": 2.5523449220533565e-06, "loss": 0.6851, "step": 16625 }, { "epoch": 0.5152435246003222, "grad_norm": 2.9234100636996776, "learning_rate": 2.5515295805883506e-06, "loss": 0.6, "step": 16630 }, { "epoch": 0.5153984384682117, "grad_norm": 4.075882961372896, "learning_rate": 2.5507142391233454e-06, "loss": 0.6721, "step": 16635 }, { "epoch": 0.5155533523361011, "grad_norm": 2.759315535919965, "learning_rate": 2.5498988976583395e-06, "loss": 0.7382, "step": 16640 }, { "epoch": 0.5157082662039906, "grad_norm": 2.5474351179805006, "learning_rate": 2.549083556193334e-06, "loss": 0.7197, "step": 16645 }, { "epoch": 0.51586318007188, "grad_norm": 2.9439288059481736, "learning_rate": 2.5482682147283284e-06, "loss": 0.6439, "step": 16650 }, { "epoch": 0.5160180939397695, "grad_norm": 2.5811105163251176, "learning_rate": 2.547452873263323e-06, "loss": 0.7176, "step": 16655 }, { "epoch": 0.516173007807659, "grad_norm": 2.325776612608292, "learning_rate": 2.5466375317983173e-06, "loss": 0.6995, "step": 16660 }, { "epoch": 0.5163279216755484, "grad_norm": 4.530216972744629, "learning_rate": 2.5458221903333118e-06, "loss": 0.6714, "step": 16665 }, { "epoch": 0.5164828355434379, "grad_norm": 3.9542767280771804, "learning_rate": 2.5450068488683062e-06, "loss": 0.6866, "step": 16670 }, { "epoch": 0.5166377494113273, "grad_norm": 3.1908520398290583, "learning_rate": 2.5441915074033007e-06, "loss": 0.7836, "step": 16675 }, { "epoch": 0.5167926632792168, "grad_norm": 2.5834705504796065, "learning_rate": 2.543376165938295e-06, "loss": 0.6802, "step": 16680 }, { "epoch": 0.5169475771471063, "grad_norm": 2.4702825831029354, "learning_rate": 2.5425608244732896e-06, "loss": 0.6983, "step": 16685 }, { "epoch": 0.5171024910149956, "grad_norm": 2.1490910901951468, "learning_rate": 2.5417454830082836e-06, "loss": 0.7104, "step": 16690 }, { "epoch": 0.5172574048828851, "grad_norm": 7.353945367638327, "learning_rate": 2.5409301415432785e-06, "loss": 0.6646, "step": 16695 }, { "epoch": 0.5174123187507745, "grad_norm": 4.169442072730674, "learning_rate": 2.5401148000782726e-06, "loss": 0.7168, "step": 16700 }, { "epoch": 0.517567232618664, "grad_norm": 4.072803532603621, "learning_rate": 2.5392994586132674e-06, "loss": 0.6796, "step": 16705 }, { "epoch": 0.5177221464865535, "grad_norm": 2.7566874238155186, "learning_rate": 2.5384841171482623e-06, "loss": 0.7074, "step": 16710 }, { "epoch": 0.5178770603544429, "grad_norm": 2.886165364983662, "learning_rate": 2.5376687756832564e-06, "loss": 0.6836, "step": 16715 }, { "epoch": 0.5180319742223324, "grad_norm": 3.054619398833079, "learning_rate": 2.536853434218251e-06, "loss": 0.7037, "step": 16720 }, { "epoch": 0.5181868880902218, "grad_norm": 2.6600125899551337, "learning_rate": 2.5360380927532453e-06, "loss": 0.6674, "step": 16725 }, { "epoch": 0.5183418019581113, "grad_norm": 2.6452532228207213, "learning_rate": 2.5352227512882397e-06, "loss": 0.6836, "step": 16730 }, { "epoch": 0.5184967158260008, "grad_norm": 2.7690757409007154, "learning_rate": 2.534407409823234e-06, "loss": 0.6447, "step": 16735 }, { "epoch": 0.5186516296938902, "grad_norm": 2.387897535559262, "learning_rate": 2.5335920683582287e-06, "loss": 0.6337, "step": 16740 }, { "epoch": 0.5188065435617797, "grad_norm": 2.885719308186068, "learning_rate": 2.532776726893223e-06, "loss": 0.7497, "step": 16745 }, { "epoch": 0.5189614574296691, "grad_norm": 2.3761163163246684, "learning_rate": 2.5319613854282176e-06, "loss": 0.6149, "step": 16750 }, { "epoch": 0.5191163712975586, "grad_norm": 2.996716944080471, "learning_rate": 2.531146043963212e-06, "loss": 0.7229, "step": 16755 }, { "epoch": 0.519271285165448, "grad_norm": 3.24607434020834, "learning_rate": 2.5303307024982065e-06, "loss": 0.6521, "step": 16760 }, { "epoch": 0.5194261990333374, "grad_norm": 2.812408933505673, "learning_rate": 2.5295153610332005e-06, "loss": 0.621, "step": 16765 }, { "epoch": 0.5195811129012269, "grad_norm": 2.5583901122677872, "learning_rate": 2.5287000195681954e-06, "loss": 0.692, "step": 16770 }, { "epoch": 0.5197360267691163, "grad_norm": 2.241164560185444, "learning_rate": 2.5278846781031894e-06, "loss": 0.7747, "step": 16775 }, { "epoch": 0.5198909406370058, "grad_norm": 2.4732015740649502, "learning_rate": 2.5270693366381843e-06, "loss": 0.7253, "step": 16780 }, { "epoch": 0.5200458545048953, "grad_norm": 2.631589604366174, "learning_rate": 2.5262539951731784e-06, "loss": 0.7178, "step": 16785 }, { "epoch": 0.5202007683727847, "grad_norm": 2.6968578953237663, "learning_rate": 2.5254386537081732e-06, "loss": 0.7372, "step": 16790 }, { "epoch": 0.5203556822406742, "grad_norm": 3.3882425016187594, "learning_rate": 2.5246233122431673e-06, "loss": 0.7229, "step": 16795 }, { "epoch": 0.5205105961085637, "grad_norm": 3.4527208590570355, "learning_rate": 2.523807970778162e-06, "loss": 0.6639, "step": 16800 }, { "epoch": 0.5206655099764531, "grad_norm": 2.521152065391674, "learning_rate": 2.5229926293131566e-06, "loss": 0.6826, "step": 16805 }, { "epoch": 0.5208204238443426, "grad_norm": 3.820453543565802, "learning_rate": 2.522177287848151e-06, "loss": 0.728, "step": 16810 }, { "epoch": 0.520975337712232, "grad_norm": 2.503291838232579, "learning_rate": 2.5213619463831455e-06, "loss": 0.7291, "step": 16815 }, { "epoch": 0.5211302515801215, "grad_norm": 2.5691451933409053, "learning_rate": 2.52054660491814e-06, "loss": 0.7644, "step": 16820 }, { "epoch": 0.521285165448011, "grad_norm": 2.3554178204691154, "learning_rate": 2.5197312634531345e-06, "loss": 0.7252, "step": 16825 }, { "epoch": 0.5214400793159003, "grad_norm": 2.3736733420978946, "learning_rate": 2.518915921988129e-06, "loss": 0.6668, "step": 16830 }, { "epoch": 0.5215949931837898, "grad_norm": 3.1340377266169193, "learning_rate": 2.5181005805231234e-06, "loss": 0.7864, "step": 16835 }, { "epoch": 0.5217499070516792, "grad_norm": 2.657480131902431, "learning_rate": 2.5172852390581174e-06, "loss": 0.7223, "step": 16840 }, { "epoch": 0.5219048209195687, "grad_norm": 2.5206396453926967, "learning_rate": 2.5164698975931123e-06, "loss": 0.6657, "step": 16845 }, { "epoch": 0.5220597347874582, "grad_norm": 2.8834494610046115, "learning_rate": 2.5156545561281063e-06, "loss": 0.7663, "step": 16850 }, { "epoch": 0.5222146486553476, "grad_norm": 3.155093900845621, "learning_rate": 2.5148392146631012e-06, "loss": 0.6786, "step": 16855 }, { "epoch": 0.5223695625232371, "grad_norm": 3.1235474786871413, "learning_rate": 2.5140238731980953e-06, "loss": 0.6527, "step": 16860 }, { "epoch": 0.5225244763911265, "grad_norm": 2.521388244790305, "learning_rate": 2.51320853173309e-06, "loss": 0.6945, "step": 16865 }, { "epoch": 0.522679390259016, "grad_norm": 3.869835034655082, "learning_rate": 2.512393190268084e-06, "loss": 0.7897, "step": 16870 }, { "epoch": 0.5228343041269055, "grad_norm": 2.333914624269199, "learning_rate": 2.511577848803079e-06, "loss": 0.6682, "step": 16875 }, { "epoch": 0.5229892179947949, "grad_norm": 2.608213464964397, "learning_rate": 2.510762507338073e-06, "loss": 0.7368, "step": 16880 }, { "epoch": 0.5231441318626844, "grad_norm": 3.0177754238425516, "learning_rate": 2.509947165873068e-06, "loss": 0.6657, "step": 16885 }, { "epoch": 0.5232990457305738, "grad_norm": 2.6907511117509824, "learning_rate": 2.5091318244080624e-06, "loss": 0.6771, "step": 16890 }, { "epoch": 0.5234539595984633, "grad_norm": 5.617715652809345, "learning_rate": 2.508316482943057e-06, "loss": 0.7076, "step": 16895 }, { "epoch": 0.5236088734663527, "grad_norm": 4.284088736249448, "learning_rate": 2.5075011414780514e-06, "loss": 0.7008, "step": 16900 }, { "epoch": 0.5237637873342421, "grad_norm": 2.6928426878160576, "learning_rate": 2.506685800013046e-06, "loss": 0.6741, "step": 16905 }, { "epoch": 0.5239187012021316, "grad_norm": 3.145605527769759, "learning_rate": 2.5058704585480403e-06, "loss": 0.6649, "step": 16910 }, { "epoch": 0.524073615070021, "grad_norm": 2.689807569898256, "learning_rate": 2.5050551170830343e-06, "loss": 0.6456, "step": 16915 }, { "epoch": 0.5242285289379105, "grad_norm": 2.8972875296577234, "learning_rate": 2.504239775618029e-06, "loss": 0.6556, "step": 16920 }, { "epoch": 0.5243834428058, "grad_norm": 2.4277592200185696, "learning_rate": 2.5034244341530232e-06, "loss": 0.7494, "step": 16925 }, { "epoch": 0.5245383566736894, "grad_norm": 2.1335129117034413, "learning_rate": 2.502609092688018e-06, "loss": 0.6992, "step": 16930 }, { "epoch": 0.5246932705415789, "grad_norm": 2.4957274659449813, "learning_rate": 2.501793751223012e-06, "loss": 0.6372, "step": 16935 }, { "epoch": 0.5248481844094683, "grad_norm": 3.08605577010964, "learning_rate": 2.500978409758007e-06, "loss": 0.7216, "step": 16940 }, { "epoch": 0.5250030982773578, "grad_norm": 2.386266929725488, "learning_rate": 2.500163068293001e-06, "loss": 0.7233, "step": 16945 }, { "epoch": 0.5251580121452473, "grad_norm": 2.177935291390344, "learning_rate": 2.499347726827996e-06, "loss": 0.6861, "step": 16950 }, { "epoch": 0.5253129260131367, "grad_norm": 2.286768702375574, "learning_rate": 2.4985323853629904e-06, "loss": 0.674, "step": 16955 }, { "epoch": 0.5254678398810262, "grad_norm": 3.0580229266621557, "learning_rate": 2.497717043897985e-06, "loss": 0.6823, "step": 16960 }, { "epoch": 0.5256227537489156, "grad_norm": 2.4962648740042597, "learning_rate": 2.4969017024329793e-06, "loss": 0.7103, "step": 16965 }, { "epoch": 0.5257776676168051, "grad_norm": 2.750450838199545, "learning_rate": 2.4960863609679738e-06, "loss": 0.6752, "step": 16970 }, { "epoch": 0.5259325814846945, "grad_norm": 2.1795803131925244, "learning_rate": 2.4952710195029682e-06, "loss": 0.692, "step": 16975 }, { "epoch": 0.5260874953525839, "grad_norm": 2.670180780740084, "learning_rate": 2.4944556780379627e-06, "loss": 0.7182, "step": 16980 }, { "epoch": 0.5262424092204734, "grad_norm": 3.313308248660262, "learning_rate": 2.493640336572957e-06, "loss": 0.6585, "step": 16985 }, { "epoch": 0.5263973230883628, "grad_norm": 3.372776974124129, "learning_rate": 2.492824995107951e-06, "loss": 0.6875, "step": 16990 }, { "epoch": 0.5265522369562523, "grad_norm": 5.630470588638943, "learning_rate": 2.4920096536429456e-06, "loss": 0.7871, "step": 16995 }, { "epoch": 0.5267071508241418, "grad_norm": 2.859168103446891, "learning_rate": 2.49119431217794e-06, "loss": 0.7446, "step": 17000 }, { "epoch": 0.5268620646920312, "grad_norm": 2.232252421424273, "learning_rate": 2.4903789707129346e-06, "loss": 0.642, "step": 17005 }, { "epoch": 0.5270169785599207, "grad_norm": 2.021021823670356, "learning_rate": 2.489563629247929e-06, "loss": 0.6822, "step": 17010 }, { "epoch": 0.5271718924278102, "grad_norm": 2.5391651432915143, "learning_rate": 2.4887482877829235e-06, "loss": 0.7456, "step": 17015 }, { "epoch": 0.5273268062956996, "grad_norm": 2.7951255879792054, "learning_rate": 2.487932946317918e-06, "loss": 0.7283, "step": 17020 }, { "epoch": 0.5274817201635891, "grad_norm": 2.5779263291802885, "learning_rate": 2.4871176048529124e-06, "loss": 0.6715, "step": 17025 }, { "epoch": 0.5276366340314785, "grad_norm": 3.03837292115594, "learning_rate": 2.486302263387907e-06, "loss": 0.7582, "step": 17030 }, { "epoch": 0.527791547899368, "grad_norm": 2.567048023806685, "learning_rate": 2.4854869219229017e-06, "loss": 0.6808, "step": 17035 }, { "epoch": 0.5279464617672575, "grad_norm": 3.1793037109817, "learning_rate": 2.484671580457896e-06, "loss": 0.8299, "step": 17040 }, { "epoch": 0.5281013756351468, "grad_norm": 2.080770254158952, "learning_rate": 2.4838562389928907e-06, "loss": 0.7359, "step": 17045 }, { "epoch": 0.5282562895030363, "grad_norm": 2.365391610732072, "learning_rate": 2.483040897527885e-06, "loss": 0.6936, "step": 17050 }, { "epoch": 0.5284112033709257, "grad_norm": 2.5908384442842873, "learning_rate": 2.4822255560628796e-06, "loss": 0.6979, "step": 17055 }, { "epoch": 0.5285661172388152, "grad_norm": 2.468665255666085, "learning_rate": 2.481410214597874e-06, "loss": 0.6561, "step": 17060 }, { "epoch": 0.5287210311067047, "grad_norm": 2.4719854545072812, "learning_rate": 2.480594873132868e-06, "loss": 0.7103, "step": 17065 }, { "epoch": 0.5288759449745941, "grad_norm": 2.3523515548639287, "learning_rate": 2.4797795316678625e-06, "loss": 0.7437, "step": 17070 }, { "epoch": 0.5290308588424836, "grad_norm": 3.5013665074752005, "learning_rate": 2.478964190202857e-06, "loss": 0.7244, "step": 17075 }, { "epoch": 0.529185772710373, "grad_norm": 3.5248045800735506, "learning_rate": 2.4781488487378515e-06, "loss": 0.6773, "step": 17080 }, { "epoch": 0.5293406865782625, "grad_norm": 3.5209708222396023, "learning_rate": 2.477333507272846e-06, "loss": 0.7588, "step": 17085 }, { "epoch": 0.529495600446152, "grad_norm": 2.3104490613488218, "learning_rate": 2.4765181658078404e-06, "loss": 0.6729, "step": 17090 }, { "epoch": 0.5296505143140414, "grad_norm": 2.181476287771051, "learning_rate": 2.475702824342835e-06, "loss": 0.686, "step": 17095 }, { "epoch": 0.5298054281819309, "grad_norm": 2.5163158099336647, "learning_rate": 2.4748874828778293e-06, "loss": 0.7048, "step": 17100 }, { "epoch": 0.5299603420498203, "grad_norm": 2.329156729175159, "learning_rate": 2.4740721414128238e-06, "loss": 0.6281, "step": 17105 }, { "epoch": 0.5301152559177098, "grad_norm": 3.2974703743511875, "learning_rate": 2.473256799947818e-06, "loss": 0.7315, "step": 17110 }, { "epoch": 0.5302701697855992, "grad_norm": 3.095251951227981, "learning_rate": 2.4724414584828127e-06, "loss": 0.7185, "step": 17115 }, { "epoch": 0.5304250836534886, "grad_norm": 2.5949706834784583, "learning_rate": 2.471626117017807e-06, "loss": 0.6534, "step": 17120 }, { "epoch": 0.5305799975213781, "grad_norm": 2.5813131823244047, "learning_rate": 2.4708107755528016e-06, "loss": 0.7427, "step": 17125 }, { "epoch": 0.5307349113892675, "grad_norm": 2.809087608592903, "learning_rate": 2.4699954340877965e-06, "loss": 0.6328, "step": 17130 }, { "epoch": 0.530889825257157, "grad_norm": 2.3181845330052697, "learning_rate": 2.469180092622791e-06, "loss": 0.6058, "step": 17135 }, { "epoch": 0.5310447391250465, "grad_norm": 2.4061973899082396, "learning_rate": 2.4683647511577854e-06, "loss": 0.6778, "step": 17140 }, { "epoch": 0.5311996529929359, "grad_norm": 2.331499914463009, "learning_rate": 2.4675494096927794e-06, "loss": 0.6882, "step": 17145 }, { "epoch": 0.5313545668608254, "grad_norm": 2.669409833599894, "learning_rate": 2.466734068227774e-06, "loss": 0.6466, "step": 17150 }, { "epoch": 0.5315094807287148, "grad_norm": 2.713209872911716, "learning_rate": 2.4659187267627683e-06, "loss": 0.7153, "step": 17155 }, { "epoch": 0.5316643945966043, "grad_norm": 2.686106608613764, "learning_rate": 2.465103385297763e-06, "loss": 0.6713, "step": 17160 }, { "epoch": 0.5318193084644938, "grad_norm": 3.51848262903604, "learning_rate": 2.4642880438327573e-06, "loss": 0.6354, "step": 17165 }, { "epoch": 0.5319742223323832, "grad_norm": 2.4935476712471334, "learning_rate": 2.4634727023677517e-06, "loss": 0.6911, "step": 17170 }, { "epoch": 0.5321291362002727, "grad_norm": 3.566262281105985, "learning_rate": 2.462657360902746e-06, "loss": 0.6441, "step": 17175 }, { "epoch": 0.5322840500681622, "grad_norm": 2.397197152802202, "learning_rate": 2.4618420194377406e-06, "loss": 0.6946, "step": 17180 }, { "epoch": 0.5324389639360515, "grad_norm": 2.9627716729404994, "learning_rate": 2.461026677972735e-06, "loss": 0.6705, "step": 17185 }, { "epoch": 0.532593877803941, "grad_norm": 2.528846881825659, "learning_rate": 2.4602113365077296e-06, "loss": 0.7641, "step": 17190 }, { "epoch": 0.5327487916718304, "grad_norm": 3.028469727384156, "learning_rate": 2.459395995042724e-06, "loss": 0.708, "step": 17195 }, { "epoch": 0.5329037055397199, "grad_norm": 2.3158588751478346, "learning_rate": 2.4585806535777185e-06, "loss": 0.7206, "step": 17200 }, { "epoch": 0.5330586194076093, "grad_norm": 4.1368008418703, "learning_rate": 2.457765312112713e-06, "loss": 0.6959, "step": 17205 }, { "epoch": 0.5332135332754988, "grad_norm": 2.7564880485485244, "learning_rate": 2.4569499706477074e-06, "loss": 0.6695, "step": 17210 }, { "epoch": 0.5333684471433883, "grad_norm": 3.85491466230566, "learning_rate": 2.456134629182702e-06, "loss": 0.7079, "step": 17215 }, { "epoch": 0.5335233610112777, "grad_norm": 2.2221268823672737, "learning_rate": 2.4553192877176963e-06, "loss": 0.6212, "step": 17220 }, { "epoch": 0.5336782748791672, "grad_norm": 2.3951846209324565, "learning_rate": 2.4545039462526908e-06, "loss": 0.7632, "step": 17225 }, { "epoch": 0.5338331887470567, "grad_norm": 2.3476439933980537, "learning_rate": 2.4536886047876852e-06, "loss": 0.6551, "step": 17230 }, { "epoch": 0.5339881026149461, "grad_norm": 3.5499953485279363, "learning_rate": 2.4528732633226797e-06, "loss": 0.7169, "step": 17235 }, { "epoch": 0.5341430164828356, "grad_norm": 2.931985371128824, "learning_rate": 2.452057921857674e-06, "loss": 0.6778, "step": 17240 }, { "epoch": 0.534297930350725, "grad_norm": 2.5406141091235463, "learning_rate": 2.4512425803926686e-06, "loss": 0.7168, "step": 17245 }, { "epoch": 0.5344528442186145, "grad_norm": 2.6042602687109326, "learning_rate": 2.450427238927663e-06, "loss": 0.6963, "step": 17250 }, { "epoch": 0.534607758086504, "grad_norm": 2.4873846496948886, "learning_rate": 2.4496118974626575e-06, "loss": 0.6474, "step": 17255 }, { "epoch": 0.5347626719543933, "grad_norm": 2.234883008495322, "learning_rate": 2.448796555997652e-06, "loss": 0.6387, "step": 17260 }, { "epoch": 0.5349175858222828, "grad_norm": 2.5161057325227856, "learning_rate": 2.4479812145326464e-06, "loss": 0.6563, "step": 17265 }, { "epoch": 0.5350724996901722, "grad_norm": 3.0155823459687228, "learning_rate": 2.447165873067641e-06, "loss": 0.7264, "step": 17270 }, { "epoch": 0.5352274135580617, "grad_norm": 2.5825794039659646, "learning_rate": 2.4463505316026354e-06, "loss": 0.6565, "step": 17275 }, { "epoch": 0.5353823274259512, "grad_norm": 2.7767683008787714, "learning_rate": 2.44553519013763e-06, "loss": 0.6578, "step": 17280 }, { "epoch": 0.5355372412938406, "grad_norm": 3.194011772776228, "learning_rate": 2.4447198486726243e-06, "loss": 0.773, "step": 17285 }, { "epoch": 0.5356921551617301, "grad_norm": 2.9524969477942866, "learning_rate": 2.4439045072076187e-06, "loss": 0.6552, "step": 17290 }, { "epoch": 0.5358470690296195, "grad_norm": 2.7023951996177247, "learning_rate": 2.443089165742613e-06, "loss": 0.6963, "step": 17295 }, { "epoch": 0.536001982897509, "grad_norm": 2.2767976888992303, "learning_rate": 2.4422738242776077e-06, "loss": 0.6684, "step": 17300 }, { "epoch": 0.5361568967653985, "grad_norm": 2.625489154616222, "learning_rate": 2.441458482812602e-06, "loss": 0.6901, "step": 17305 }, { "epoch": 0.5363118106332879, "grad_norm": 2.856918056058143, "learning_rate": 2.4406431413475966e-06, "loss": 0.7919, "step": 17310 }, { "epoch": 0.5364667245011774, "grad_norm": 2.3109030638812356, "learning_rate": 2.439827799882591e-06, "loss": 0.7203, "step": 17315 }, { "epoch": 0.5366216383690668, "grad_norm": 2.2063279940718283, "learning_rate": 2.4390124584175855e-06, "loss": 0.615, "step": 17320 }, { "epoch": 0.5367765522369563, "grad_norm": 3.1280069166477658, "learning_rate": 2.43819711695258e-06, "loss": 0.651, "step": 17325 }, { "epoch": 0.5369314661048457, "grad_norm": 2.313745679289381, "learning_rate": 2.4373817754875744e-06, "loss": 0.6599, "step": 17330 }, { "epoch": 0.5370863799727351, "grad_norm": 3.2964441804472364, "learning_rate": 2.436566434022569e-06, "loss": 0.6385, "step": 17335 }, { "epoch": 0.5372412938406246, "grad_norm": 2.490402865691008, "learning_rate": 2.4357510925575633e-06, "loss": 0.7307, "step": 17340 }, { "epoch": 0.537396207708514, "grad_norm": 3.320959137363298, "learning_rate": 2.4349357510925578e-06, "loss": 0.6531, "step": 17345 }, { "epoch": 0.5375511215764035, "grad_norm": 2.767866747929055, "learning_rate": 2.4341204096275522e-06, "loss": 0.6637, "step": 17350 }, { "epoch": 0.537706035444293, "grad_norm": 2.384891813677763, "learning_rate": 2.4333050681625467e-06, "loss": 0.7189, "step": 17355 }, { "epoch": 0.5378609493121824, "grad_norm": 2.9585275184706585, "learning_rate": 2.432489726697541e-06, "loss": 0.7441, "step": 17360 }, { "epoch": 0.5380158631800719, "grad_norm": 2.8716092867607346, "learning_rate": 2.4316743852325356e-06, "loss": 0.6978, "step": 17365 }, { "epoch": 0.5381707770479613, "grad_norm": 2.4168180991900177, "learning_rate": 2.43085904376753e-06, "loss": 0.5903, "step": 17370 }, { "epoch": 0.5383256909158508, "grad_norm": 4.683563530406104, "learning_rate": 2.4300437023025245e-06, "loss": 0.7501, "step": 17375 }, { "epoch": 0.5384806047837403, "grad_norm": 2.5608239718944867, "learning_rate": 2.429228360837519e-06, "loss": 0.6613, "step": 17380 }, { "epoch": 0.5386355186516297, "grad_norm": 3.104871760458121, "learning_rate": 2.4284130193725135e-06, "loss": 0.6273, "step": 17385 }, { "epoch": 0.5387904325195192, "grad_norm": 2.184186110903754, "learning_rate": 2.427597677907508e-06, "loss": 0.633, "step": 17390 }, { "epoch": 0.5389453463874087, "grad_norm": 3.3841730144137787, "learning_rate": 2.4267823364425024e-06, "loss": 0.6221, "step": 17395 }, { "epoch": 0.539100260255298, "grad_norm": 2.079860642771214, "learning_rate": 2.425966994977497e-06, "loss": 0.6761, "step": 17400 }, { "epoch": 0.5392551741231875, "grad_norm": 2.4509171274761883, "learning_rate": 2.4251516535124913e-06, "loss": 0.6396, "step": 17405 }, { "epoch": 0.5394100879910769, "grad_norm": 2.2943229100083777, "learning_rate": 2.4243363120474858e-06, "loss": 0.7812, "step": 17410 }, { "epoch": 0.5395650018589664, "grad_norm": 2.4923124448336447, "learning_rate": 2.4235209705824802e-06, "loss": 0.7362, "step": 17415 }, { "epoch": 0.5397199157268558, "grad_norm": 2.4625042959079306, "learning_rate": 2.4227056291174747e-06, "loss": 0.6955, "step": 17420 }, { "epoch": 0.5398748295947453, "grad_norm": 2.456048713651526, "learning_rate": 2.421890287652469e-06, "loss": 0.665, "step": 17425 }, { "epoch": 0.5400297434626348, "grad_norm": 3.0717420086788825, "learning_rate": 2.4210749461874636e-06, "loss": 0.6159, "step": 17430 }, { "epoch": 0.5401846573305242, "grad_norm": 2.6105424066141274, "learning_rate": 2.420259604722458e-06, "loss": 0.7053, "step": 17435 }, { "epoch": 0.5403395711984137, "grad_norm": 4.344883322755144, "learning_rate": 2.4194442632574525e-06, "loss": 0.6969, "step": 17440 }, { "epoch": 0.5404944850663032, "grad_norm": 2.745339524748999, "learning_rate": 2.418628921792447e-06, "loss": 0.7472, "step": 17445 }, { "epoch": 0.5406493989341926, "grad_norm": 2.699969993388491, "learning_rate": 2.4178135803274414e-06, "loss": 0.695, "step": 17450 }, { "epoch": 0.5408043128020821, "grad_norm": 2.960474581062604, "learning_rate": 2.416998238862436e-06, "loss": 0.6372, "step": 17455 }, { "epoch": 0.5409592266699715, "grad_norm": 2.7219760799674173, "learning_rate": 2.4161828973974303e-06, "loss": 0.7094, "step": 17460 }, { "epoch": 0.541114140537861, "grad_norm": 2.485021733961237, "learning_rate": 2.415367555932425e-06, "loss": 0.7019, "step": 17465 }, { "epoch": 0.5412690544057503, "grad_norm": 2.4737375753154454, "learning_rate": 2.4145522144674193e-06, "loss": 0.7051, "step": 17470 }, { "epoch": 0.5414239682736398, "grad_norm": 2.713008054861029, "learning_rate": 2.4137368730024137e-06, "loss": 0.6832, "step": 17475 }, { "epoch": 0.5415788821415293, "grad_norm": 2.398407621824164, "learning_rate": 2.412921531537408e-06, "loss": 0.6488, "step": 17480 }, { "epoch": 0.5417337960094187, "grad_norm": 2.440312073717055, "learning_rate": 2.4121061900724022e-06, "loss": 0.6856, "step": 17485 }, { "epoch": 0.5418887098773082, "grad_norm": 2.8614569609729004, "learning_rate": 2.4112908486073967e-06, "loss": 0.7338, "step": 17490 }, { "epoch": 0.5420436237451977, "grad_norm": 2.80763383225423, "learning_rate": 2.410475507142391e-06, "loss": 0.7461, "step": 17495 }, { "epoch": 0.5421985376130871, "grad_norm": 3.056363584182235, "learning_rate": 2.4096601656773856e-06, "loss": 0.7549, "step": 17500 }, { "epoch": 0.5423534514809766, "grad_norm": 2.753448252241469, "learning_rate": 2.4088448242123805e-06, "loss": 0.7542, "step": 17505 }, { "epoch": 0.542508365348866, "grad_norm": 2.9853110030862258, "learning_rate": 2.408029482747375e-06, "loss": 0.7555, "step": 17510 }, { "epoch": 0.5426632792167555, "grad_norm": 3.349062039118412, "learning_rate": 2.4072141412823694e-06, "loss": 0.7471, "step": 17515 }, { "epoch": 0.542818193084645, "grad_norm": 2.441584276080025, "learning_rate": 2.406398799817364e-06, "loss": 0.6394, "step": 17520 }, { "epoch": 0.5429731069525344, "grad_norm": 2.9175482171996694, "learning_rate": 2.4055834583523583e-06, "loss": 0.7488, "step": 17525 }, { "epoch": 0.5431280208204239, "grad_norm": 2.5411826206086356, "learning_rate": 2.4047681168873528e-06, "loss": 0.5656, "step": 17530 }, { "epoch": 0.5432829346883133, "grad_norm": 2.1663574019710015, "learning_rate": 2.4039527754223472e-06, "loss": 0.695, "step": 17535 }, { "epoch": 0.5434378485562027, "grad_norm": 2.746871322998651, "learning_rate": 2.4031374339573417e-06, "loss": 0.7608, "step": 17540 }, { "epoch": 0.5435927624240922, "grad_norm": 2.7838713080894992, "learning_rate": 2.402322092492336e-06, "loss": 0.7427, "step": 17545 }, { "epoch": 0.5437476762919816, "grad_norm": 2.882919987415403, "learning_rate": 2.4015067510273306e-06, "loss": 0.7186, "step": 17550 }, { "epoch": 0.5439025901598711, "grad_norm": 2.616487823761528, "learning_rate": 2.400691409562325e-06, "loss": 0.6697, "step": 17555 }, { "epoch": 0.5440575040277605, "grad_norm": 3.607301851680637, "learning_rate": 2.3998760680973195e-06, "loss": 0.6687, "step": 17560 }, { "epoch": 0.54421241789565, "grad_norm": 2.713003703607556, "learning_rate": 2.3990607266323136e-06, "loss": 0.7149, "step": 17565 }, { "epoch": 0.5443673317635395, "grad_norm": 3.086899838799038, "learning_rate": 2.398245385167308e-06, "loss": 0.7486, "step": 17570 }, { "epoch": 0.5445222456314289, "grad_norm": 3.109230047665327, "learning_rate": 2.3974300437023025e-06, "loss": 0.6727, "step": 17575 }, { "epoch": 0.5446771594993184, "grad_norm": 2.0016791034381347, "learning_rate": 2.396614702237297e-06, "loss": 0.6224, "step": 17580 }, { "epoch": 0.5448320733672078, "grad_norm": 2.9495599157504055, "learning_rate": 2.3957993607722914e-06, "loss": 0.6986, "step": 17585 }, { "epoch": 0.5449869872350973, "grad_norm": 3.141139153377539, "learning_rate": 2.394984019307286e-06, "loss": 0.7478, "step": 17590 }, { "epoch": 0.5451419011029868, "grad_norm": 2.629190128775607, "learning_rate": 2.3941686778422803e-06, "loss": 0.6119, "step": 17595 }, { "epoch": 0.5452968149708762, "grad_norm": 2.9999865848925853, "learning_rate": 2.393353336377275e-06, "loss": 0.6551, "step": 17600 }, { "epoch": 0.5454517288387657, "grad_norm": 3.5784211889227824, "learning_rate": 2.3925379949122697e-06, "loss": 0.6658, "step": 17605 }, { "epoch": 0.5456066427066552, "grad_norm": 2.718214790786109, "learning_rate": 2.391722653447264e-06, "loss": 0.6245, "step": 17610 }, { "epoch": 0.5457615565745445, "grad_norm": 3.0210053090919327, "learning_rate": 2.3909073119822586e-06, "loss": 0.686, "step": 17615 }, { "epoch": 0.545916470442434, "grad_norm": 2.968063693410191, "learning_rate": 2.390091970517253e-06, "loss": 0.7272, "step": 17620 }, { "epoch": 0.5460713843103234, "grad_norm": 2.5725638003607396, "learning_rate": 2.3892766290522475e-06, "loss": 0.6353, "step": 17625 }, { "epoch": 0.5462262981782129, "grad_norm": 2.6132656303300483, "learning_rate": 2.388461287587242e-06, "loss": 0.7151, "step": 17630 }, { "epoch": 0.5463812120461023, "grad_norm": 2.459040976064975, "learning_rate": 2.3876459461222364e-06, "loss": 0.6406, "step": 17635 }, { "epoch": 0.5465361259139918, "grad_norm": 2.529545914651731, "learning_rate": 2.3868306046572305e-06, "loss": 0.6812, "step": 17640 }, { "epoch": 0.5466910397818813, "grad_norm": 2.686772007738979, "learning_rate": 2.386015263192225e-06, "loss": 0.6188, "step": 17645 }, { "epoch": 0.5468459536497707, "grad_norm": 2.3304636558581766, "learning_rate": 2.3851999217272194e-06, "loss": 0.6765, "step": 17650 }, { "epoch": 0.5470008675176602, "grad_norm": 2.020116081045486, "learning_rate": 2.384384580262214e-06, "loss": 0.6434, "step": 17655 }, { "epoch": 0.5471557813855497, "grad_norm": 2.610556903794248, "learning_rate": 2.3835692387972083e-06, "loss": 0.6361, "step": 17660 }, { "epoch": 0.5473106952534391, "grad_norm": 2.5752278104225983, "learning_rate": 2.3827538973322027e-06, "loss": 0.6683, "step": 17665 }, { "epoch": 0.5474656091213286, "grad_norm": 3.0250904323757646, "learning_rate": 2.381938555867197e-06, "loss": 0.6908, "step": 17670 }, { "epoch": 0.547620522989218, "grad_norm": 2.187280154448827, "learning_rate": 2.3811232144021917e-06, "loss": 0.6569, "step": 17675 }, { "epoch": 0.5477754368571075, "grad_norm": 2.2102000108490674, "learning_rate": 2.380307872937186e-06, "loss": 0.7498, "step": 17680 }, { "epoch": 0.5479303507249969, "grad_norm": 3.114115393800408, "learning_rate": 2.3794925314721806e-06, "loss": 0.7881, "step": 17685 }, { "epoch": 0.5480852645928863, "grad_norm": 2.5887438406633767, "learning_rate": 2.378677190007175e-06, "loss": 0.7106, "step": 17690 }, { "epoch": 0.5482401784607758, "grad_norm": 2.797856438179245, "learning_rate": 2.37786184854217e-06, "loss": 0.6971, "step": 17695 }, { "epoch": 0.5483950923286652, "grad_norm": 2.4016992838196014, "learning_rate": 2.3770465070771644e-06, "loss": 0.7225, "step": 17700 }, { "epoch": 0.5485500061965547, "grad_norm": 2.735242186381103, "learning_rate": 2.376231165612159e-06, "loss": 0.6526, "step": 17705 }, { "epoch": 0.5487049200644442, "grad_norm": 4.546094736701548, "learning_rate": 2.3754158241471533e-06, "loss": 0.65, "step": 17710 }, { "epoch": 0.5488598339323336, "grad_norm": 2.6337499771111994, "learning_rate": 2.3746004826821473e-06, "loss": 0.6544, "step": 17715 }, { "epoch": 0.5490147478002231, "grad_norm": 3.1922573544432065, "learning_rate": 2.373785141217142e-06, "loss": 0.6683, "step": 17720 }, { "epoch": 0.5491696616681125, "grad_norm": 2.4543096258850707, "learning_rate": 2.3729697997521363e-06, "loss": 0.7074, "step": 17725 }, { "epoch": 0.549324575536002, "grad_norm": 3.015435305094397, "learning_rate": 2.3721544582871307e-06, "loss": 0.6271, "step": 17730 }, { "epoch": 0.5494794894038915, "grad_norm": 2.5352613974083376, "learning_rate": 2.371339116822125e-06, "loss": 0.6986, "step": 17735 }, { "epoch": 0.5496344032717809, "grad_norm": 2.2923715244012635, "learning_rate": 2.3705237753571196e-06, "loss": 0.6713, "step": 17740 }, { "epoch": 0.5497893171396704, "grad_norm": 3.2466187244888425, "learning_rate": 2.369708433892114e-06, "loss": 0.6989, "step": 17745 }, { "epoch": 0.5499442310075598, "grad_norm": 2.6576695843201, "learning_rate": 2.3688930924271086e-06, "loss": 0.7469, "step": 17750 }, { "epoch": 0.5500991448754492, "grad_norm": 2.8117111918419244, "learning_rate": 2.368077750962103e-06, "loss": 0.7532, "step": 17755 }, { "epoch": 0.5502540587433387, "grad_norm": 3.2892299937018152, "learning_rate": 2.3672624094970975e-06, "loss": 0.7284, "step": 17760 }, { "epoch": 0.5504089726112281, "grad_norm": 2.544420026052673, "learning_rate": 2.366447068032092e-06, "loss": 0.7993, "step": 17765 }, { "epoch": 0.5505638864791176, "grad_norm": 2.7201721080699284, "learning_rate": 2.3656317265670864e-06, "loss": 0.7569, "step": 17770 }, { "epoch": 0.550718800347007, "grad_norm": 2.5443723882191187, "learning_rate": 2.364816385102081e-06, "loss": 0.7271, "step": 17775 }, { "epoch": 0.5508737142148965, "grad_norm": 3.1876800361721953, "learning_rate": 2.3640010436370753e-06, "loss": 0.6456, "step": 17780 }, { "epoch": 0.551028628082786, "grad_norm": 3.0461725636299453, "learning_rate": 2.3631857021720698e-06, "loss": 0.7108, "step": 17785 }, { "epoch": 0.5511835419506754, "grad_norm": 2.583047855397003, "learning_rate": 2.3623703607070642e-06, "loss": 0.6525, "step": 17790 }, { "epoch": 0.5513384558185649, "grad_norm": 2.755908924272554, "learning_rate": 2.3615550192420587e-06, "loss": 0.7833, "step": 17795 }, { "epoch": 0.5514933696864543, "grad_norm": 2.439547320848455, "learning_rate": 2.360739677777053e-06, "loss": 0.6818, "step": 17800 }, { "epoch": 0.5516482835543438, "grad_norm": 2.5244801390184524, "learning_rate": 2.3599243363120476e-06, "loss": 0.7198, "step": 17805 }, { "epoch": 0.5518031974222333, "grad_norm": 2.493432375925996, "learning_rate": 2.359108994847042e-06, "loss": 0.6809, "step": 17810 }, { "epoch": 0.5519581112901227, "grad_norm": 2.5295555563835235, "learning_rate": 2.3582936533820365e-06, "loss": 0.7346, "step": 17815 }, { "epoch": 0.5521130251580122, "grad_norm": 3.1665757813135937, "learning_rate": 2.357478311917031e-06, "loss": 0.7121, "step": 17820 }, { "epoch": 0.5522679390259015, "grad_norm": 2.414999823865433, "learning_rate": 2.3566629704520254e-06, "loss": 0.7084, "step": 17825 }, { "epoch": 0.552422852893791, "grad_norm": 3.038722758217354, "learning_rate": 2.35584762898702e-06, "loss": 0.6193, "step": 17830 }, { "epoch": 0.5525777667616805, "grad_norm": 2.659753277114282, "learning_rate": 2.3550322875220144e-06, "loss": 0.6898, "step": 17835 }, { "epoch": 0.5527326806295699, "grad_norm": 3.0827203217961623, "learning_rate": 2.354216946057009e-06, "loss": 0.6772, "step": 17840 }, { "epoch": 0.5528875944974594, "grad_norm": 2.7340653693855463, "learning_rate": 2.3534016045920033e-06, "loss": 0.749, "step": 17845 }, { "epoch": 0.5530425083653489, "grad_norm": 3.3898206094811867, "learning_rate": 2.3525862631269977e-06, "loss": 0.6824, "step": 17850 }, { "epoch": 0.5531974222332383, "grad_norm": 2.3603271668939407, "learning_rate": 2.351770921661992e-06, "loss": 0.6776, "step": 17855 }, { "epoch": 0.5533523361011278, "grad_norm": 3.739408910516398, "learning_rate": 2.3509555801969867e-06, "loss": 0.6615, "step": 17860 }, { "epoch": 0.5535072499690172, "grad_norm": 2.539135748196286, "learning_rate": 2.350140238731981e-06, "loss": 0.6418, "step": 17865 }, { "epoch": 0.5536621638369067, "grad_norm": 2.8025798925324414, "learning_rate": 2.3493248972669756e-06, "loss": 0.7891, "step": 17870 }, { "epoch": 0.5538170777047962, "grad_norm": 2.819351079955825, "learning_rate": 2.34850955580197e-06, "loss": 0.718, "step": 17875 }, { "epoch": 0.5539719915726856, "grad_norm": 2.795448338514103, "learning_rate": 2.3476942143369645e-06, "loss": 0.6582, "step": 17880 }, { "epoch": 0.5541269054405751, "grad_norm": 3.1285597323075685, "learning_rate": 2.346878872871959e-06, "loss": 0.7337, "step": 17885 }, { "epoch": 0.5542818193084645, "grad_norm": 2.539787516696019, "learning_rate": 2.3460635314069534e-06, "loss": 0.6885, "step": 17890 }, { "epoch": 0.554436733176354, "grad_norm": 2.5697384181882024, "learning_rate": 2.345248189941948e-06, "loss": 0.6668, "step": 17895 }, { "epoch": 0.5545916470442434, "grad_norm": 2.721195117947141, "learning_rate": 2.3444328484769423e-06, "loss": 0.7373, "step": 17900 }, { "epoch": 0.5547465609121328, "grad_norm": 2.3525435086584543, "learning_rate": 2.3436175070119368e-06, "loss": 0.6827, "step": 17905 }, { "epoch": 0.5549014747800223, "grad_norm": 2.98316315291025, "learning_rate": 2.3428021655469312e-06, "loss": 0.649, "step": 17910 }, { "epoch": 0.5550563886479117, "grad_norm": 2.5847218207845404, "learning_rate": 2.3419868240819257e-06, "loss": 0.6397, "step": 17915 }, { "epoch": 0.5552113025158012, "grad_norm": 2.664638364021315, "learning_rate": 2.34117148261692e-06, "loss": 0.6848, "step": 17920 }, { "epoch": 0.5553662163836907, "grad_norm": 4.0073304654447455, "learning_rate": 2.3403561411519146e-06, "loss": 0.6976, "step": 17925 }, { "epoch": 0.5555211302515801, "grad_norm": 2.3339866355949717, "learning_rate": 2.339540799686909e-06, "loss": 0.7053, "step": 17930 }, { "epoch": 0.5556760441194696, "grad_norm": 3.3949930987981416, "learning_rate": 2.3387254582219035e-06, "loss": 0.6998, "step": 17935 }, { "epoch": 0.555830957987359, "grad_norm": 2.486335136889185, "learning_rate": 2.337910116756898e-06, "loss": 0.6089, "step": 17940 }, { "epoch": 0.5559858718552485, "grad_norm": 3.0118650698445566, "learning_rate": 2.3370947752918925e-06, "loss": 0.7216, "step": 17945 }, { "epoch": 0.556140785723138, "grad_norm": 2.541944914612885, "learning_rate": 2.336279433826887e-06, "loss": 0.6802, "step": 17950 }, { "epoch": 0.5562956995910274, "grad_norm": 2.941309771273109, "learning_rate": 2.3354640923618814e-06, "loss": 0.6759, "step": 17955 }, { "epoch": 0.5564506134589169, "grad_norm": 3.9186234481742, "learning_rate": 2.334648750896876e-06, "loss": 0.747, "step": 17960 }, { "epoch": 0.5566055273268063, "grad_norm": 3.6018448820344884, "learning_rate": 2.3338334094318703e-06, "loss": 0.6515, "step": 17965 }, { "epoch": 0.5567604411946957, "grad_norm": 2.466565109630661, "learning_rate": 2.3330180679668648e-06, "loss": 0.7017, "step": 17970 }, { "epoch": 0.5569153550625852, "grad_norm": 2.4366531272474012, "learning_rate": 2.3322027265018592e-06, "loss": 0.6951, "step": 17975 }, { "epoch": 0.5570702689304746, "grad_norm": 3.2328866530037157, "learning_rate": 2.3313873850368537e-06, "loss": 0.728, "step": 17980 }, { "epoch": 0.5572251827983641, "grad_norm": 3.864967824756453, "learning_rate": 2.330572043571848e-06, "loss": 0.8064, "step": 17985 }, { "epoch": 0.5573800966662535, "grad_norm": 2.939614320427385, "learning_rate": 2.3297567021068426e-06, "loss": 0.6743, "step": 17990 }, { "epoch": 0.557535010534143, "grad_norm": 2.3977955393956214, "learning_rate": 2.328941360641837e-06, "loss": 0.7093, "step": 17995 }, { "epoch": 0.5576899244020325, "grad_norm": 2.702599242611414, "learning_rate": 2.3281260191768315e-06, "loss": 0.7103, "step": 18000 }, { "epoch": 0.5578448382699219, "grad_norm": 2.5129645953977717, "learning_rate": 2.327310677711826e-06, "loss": 0.6521, "step": 18005 }, { "epoch": 0.5579997521378114, "grad_norm": 2.6551572857019687, "learning_rate": 2.3264953362468204e-06, "loss": 0.7754, "step": 18010 }, { "epoch": 0.5581546660057009, "grad_norm": 2.3198932167213804, "learning_rate": 2.325679994781815e-06, "loss": 0.7048, "step": 18015 }, { "epoch": 0.5583095798735903, "grad_norm": 2.315035852186575, "learning_rate": 2.3248646533168093e-06, "loss": 0.7374, "step": 18020 }, { "epoch": 0.5584644937414798, "grad_norm": 2.440095732264671, "learning_rate": 2.324049311851804e-06, "loss": 0.5958, "step": 18025 }, { "epoch": 0.5586194076093692, "grad_norm": 2.200053785938478, "learning_rate": 2.3232339703867983e-06, "loss": 0.6561, "step": 18030 }, { "epoch": 0.5587743214772587, "grad_norm": 2.5312481143551064, "learning_rate": 2.3224186289217927e-06, "loss": 0.6722, "step": 18035 }, { "epoch": 0.558929235345148, "grad_norm": 2.833401255757857, "learning_rate": 2.321603287456787e-06, "loss": 0.6555, "step": 18040 }, { "epoch": 0.5590841492130375, "grad_norm": 2.494180745280814, "learning_rate": 2.3207879459917816e-06, "loss": 0.6675, "step": 18045 }, { "epoch": 0.559239063080927, "grad_norm": 2.8134967909085327, "learning_rate": 2.319972604526776e-06, "loss": 0.6866, "step": 18050 }, { "epoch": 0.5593939769488164, "grad_norm": 3.339696566746957, "learning_rate": 2.3191572630617706e-06, "loss": 0.696, "step": 18055 }, { "epoch": 0.5595488908167059, "grad_norm": 2.537599946564249, "learning_rate": 2.3183419215967646e-06, "loss": 0.7605, "step": 18060 }, { "epoch": 0.5597038046845954, "grad_norm": 2.646569681257756, "learning_rate": 2.317526580131759e-06, "loss": 0.6396, "step": 18065 }, { "epoch": 0.5598587185524848, "grad_norm": 2.286781858458803, "learning_rate": 2.316711238666754e-06, "loss": 0.5718, "step": 18070 }, { "epoch": 0.5600136324203743, "grad_norm": 2.8713043774276312, "learning_rate": 2.3158958972017484e-06, "loss": 0.6948, "step": 18075 }, { "epoch": 0.5601685462882637, "grad_norm": 3.4726163621877473, "learning_rate": 2.315080555736743e-06, "loss": 0.7082, "step": 18080 }, { "epoch": 0.5603234601561532, "grad_norm": 2.6210042331754413, "learning_rate": 2.3142652142717373e-06, "loss": 0.671, "step": 18085 }, { "epoch": 0.5604783740240427, "grad_norm": 2.5903342736225308, "learning_rate": 2.3134498728067318e-06, "loss": 0.7302, "step": 18090 }, { "epoch": 0.5606332878919321, "grad_norm": 2.3934796540180177, "learning_rate": 2.3126345313417262e-06, "loss": 0.6904, "step": 18095 }, { "epoch": 0.5607882017598216, "grad_norm": 2.9686648023200863, "learning_rate": 2.3118191898767207e-06, "loss": 0.6937, "step": 18100 }, { "epoch": 0.560943115627711, "grad_norm": 2.4892456307828863, "learning_rate": 2.311003848411715e-06, "loss": 0.6839, "step": 18105 }, { "epoch": 0.5610980294956004, "grad_norm": 2.2260515418283635, "learning_rate": 2.3101885069467096e-06, "loss": 0.7174, "step": 18110 }, { "epoch": 0.5612529433634899, "grad_norm": 2.2728758943044176, "learning_rate": 2.309373165481704e-06, "loss": 0.7383, "step": 18115 }, { "epoch": 0.5614078572313793, "grad_norm": 2.9475696199594204, "learning_rate": 2.3085578240166985e-06, "loss": 0.655, "step": 18120 }, { "epoch": 0.5615627710992688, "grad_norm": 2.905370503134499, "learning_rate": 2.307742482551693e-06, "loss": 0.6878, "step": 18125 }, { "epoch": 0.5617176849671582, "grad_norm": 4.347617235914215, "learning_rate": 2.3069271410866874e-06, "loss": 0.6845, "step": 18130 }, { "epoch": 0.5618725988350477, "grad_norm": 3.231667015946832, "learning_rate": 2.3061117996216815e-06, "loss": 0.611, "step": 18135 }, { "epoch": 0.5620275127029372, "grad_norm": 2.9806209053449875, "learning_rate": 2.305296458156676e-06, "loss": 0.6474, "step": 18140 }, { "epoch": 0.5621824265708266, "grad_norm": 3.0068379831247762, "learning_rate": 2.3044811166916704e-06, "loss": 0.7268, "step": 18145 }, { "epoch": 0.5623373404387161, "grad_norm": 2.3844988030692877, "learning_rate": 2.303665775226665e-06, "loss": 0.699, "step": 18150 }, { "epoch": 0.5624922543066055, "grad_norm": 3.1774232487342102, "learning_rate": 2.3028504337616593e-06, "loss": 0.6878, "step": 18155 }, { "epoch": 0.562647168174495, "grad_norm": 2.750928610539289, "learning_rate": 2.3020350922966538e-06, "loss": 0.6711, "step": 18160 }, { "epoch": 0.5628020820423845, "grad_norm": 2.882408242735662, "learning_rate": 2.3012197508316487e-06, "loss": 0.6343, "step": 18165 }, { "epoch": 0.5629569959102739, "grad_norm": 3.085374292279955, "learning_rate": 2.300404409366643e-06, "loss": 0.6375, "step": 18170 }, { "epoch": 0.5631119097781634, "grad_norm": 2.7007834002342728, "learning_rate": 2.2995890679016376e-06, "loss": 0.6519, "step": 18175 }, { "epoch": 0.5632668236460528, "grad_norm": 3.316271748140405, "learning_rate": 2.298773726436632e-06, "loss": 0.7163, "step": 18180 }, { "epoch": 0.5634217375139422, "grad_norm": 2.5956835743660442, "learning_rate": 2.2979583849716265e-06, "loss": 0.6586, "step": 18185 }, { "epoch": 0.5635766513818317, "grad_norm": 2.487776549755664, "learning_rate": 2.297143043506621e-06, "loss": 0.6579, "step": 18190 }, { "epoch": 0.5637315652497211, "grad_norm": 4.070847596826337, "learning_rate": 2.2963277020416154e-06, "loss": 0.6439, "step": 18195 }, { "epoch": 0.5638864791176106, "grad_norm": 2.667030400732893, "learning_rate": 2.29551236057661e-06, "loss": 0.6875, "step": 18200 }, { "epoch": 0.5640413929855, "grad_norm": 3.126605123731389, "learning_rate": 2.2946970191116043e-06, "loss": 0.717, "step": 18205 }, { "epoch": 0.5641963068533895, "grad_norm": 2.7409388817453118, "learning_rate": 2.293881677646599e-06, "loss": 0.7076, "step": 18210 }, { "epoch": 0.564351220721279, "grad_norm": 2.308762928991863, "learning_rate": 2.293066336181593e-06, "loss": 0.7439, "step": 18215 }, { "epoch": 0.5645061345891684, "grad_norm": 2.5502615601783014, "learning_rate": 2.2922509947165873e-06, "loss": 0.6869, "step": 18220 }, { "epoch": 0.5646610484570579, "grad_norm": 3.197045941453803, "learning_rate": 2.2914356532515817e-06, "loss": 0.6405, "step": 18225 }, { "epoch": 0.5648159623249474, "grad_norm": 2.8187560210603806, "learning_rate": 2.290620311786576e-06, "loss": 0.718, "step": 18230 }, { "epoch": 0.5649708761928368, "grad_norm": 2.5257494996524508, "learning_rate": 2.2898049703215707e-06, "loss": 0.6712, "step": 18235 }, { "epoch": 0.5651257900607263, "grad_norm": 2.1386777647615567, "learning_rate": 2.288989628856565e-06, "loss": 0.7113, "step": 18240 }, { "epoch": 0.5652807039286157, "grad_norm": 2.5701224132366876, "learning_rate": 2.2881742873915596e-06, "loss": 0.7266, "step": 18245 }, { "epoch": 0.5654356177965052, "grad_norm": 2.6596485479292657, "learning_rate": 2.287358945926554e-06, "loss": 0.6235, "step": 18250 }, { "epoch": 0.5655905316643945, "grad_norm": 2.828910573390588, "learning_rate": 2.2865436044615485e-06, "loss": 0.658, "step": 18255 }, { "epoch": 0.565745445532284, "grad_norm": 2.5008774612526823, "learning_rate": 2.2857282629965434e-06, "loss": 0.7195, "step": 18260 }, { "epoch": 0.5659003594001735, "grad_norm": 2.2239678251388706, "learning_rate": 2.284912921531538e-06, "loss": 0.7307, "step": 18265 }, { "epoch": 0.5660552732680629, "grad_norm": 3.3794369571551193, "learning_rate": 2.2840975800665323e-06, "loss": 0.6784, "step": 18270 }, { "epoch": 0.5662101871359524, "grad_norm": 3.8528614078218446, "learning_rate": 2.2832822386015268e-06, "loss": 0.6671, "step": 18275 }, { "epoch": 0.5663651010038419, "grad_norm": 2.236757596484272, "learning_rate": 2.2824668971365212e-06, "loss": 0.741, "step": 18280 }, { "epoch": 0.5665200148717313, "grad_norm": 3.4035230877611093, "learning_rate": 2.2816515556715157e-06, "loss": 0.6389, "step": 18285 }, { "epoch": 0.5666749287396208, "grad_norm": 2.631006460544015, "learning_rate": 2.2808362142065097e-06, "loss": 0.6079, "step": 18290 }, { "epoch": 0.5668298426075102, "grad_norm": 2.7661093621965818, "learning_rate": 2.280020872741504e-06, "loss": 0.6975, "step": 18295 }, { "epoch": 0.5669847564753997, "grad_norm": 1.9096024815821542, "learning_rate": 2.2792055312764986e-06, "loss": 0.6213, "step": 18300 }, { "epoch": 0.5671396703432892, "grad_norm": 2.598960279845188, "learning_rate": 2.278390189811493e-06, "loss": 0.7434, "step": 18305 }, { "epoch": 0.5672945842111786, "grad_norm": 2.697812460724292, "learning_rate": 2.2775748483464876e-06, "loss": 0.6747, "step": 18310 }, { "epoch": 0.5674494980790681, "grad_norm": 2.0441532038680283, "learning_rate": 2.276759506881482e-06, "loss": 0.5764, "step": 18315 }, { "epoch": 0.5676044119469575, "grad_norm": 2.9813875180579132, "learning_rate": 2.2759441654164765e-06, "loss": 0.6735, "step": 18320 }, { "epoch": 0.5677593258148469, "grad_norm": 2.7719042881839444, "learning_rate": 2.275128823951471e-06, "loss": 0.6703, "step": 18325 }, { "epoch": 0.5679142396827364, "grad_norm": 2.1668922164191238, "learning_rate": 2.2743134824864654e-06, "loss": 0.7185, "step": 18330 }, { "epoch": 0.5680691535506258, "grad_norm": 2.829801807569982, "learning_rate": 2.27349814102146e-06, "loss": 0.6716, "step": 18335 }, { "epoch": 0.5682240674185153, "grad_norm": 1.90635055776463, "learning_rate": 2.2726827995564543e-06, "loss": 0.6919, "step": 18340 }, { "epoch": 0.5683789812864047, "grad_norm": 2.4315330054552633, "learning_rate": 2.2718674580914488e-06, "loss": 0.6789, "step": 18345 }, { "epoch": 0.5685338951542942, "grad_norm": 2.893334271038875, "learning_rate": 2.2710521166264432e-06, "loss": 0.6961, "step": 18350 }, { "epoch": 0.5686888090221837, "grad_norm": 3.2344296132792216, "learning_rate": 2.270236775161438e-06, "loss": 0.7294, "step": 18355 }, { "epoch": 0.5688437228900731, "grad_norm": 2.6138283837249108, "learning_rate": 2.2694214336964326e-06, "loss": 0.704, "step": 18360 }, { "epoch": 0.5689986367579626, "grad_norm": 2.384593706348268, "learning_rate": 2.2686060922314266e-06, "loss": 0.6121, "step": 18365 }, { "epoch": 0.569153550625852, "grad_norm": 3.104435300891245, "learning_rate": 2.267790750766421e-06, "loss": 0.6451, "step": 18370 }, { "epoch": 0.5693084644937415, "grad_norm": 3.312081048117644, "learning_rate": 2.2669754093014155e-06, "loss": 0.7247, "step": 18375 }, { "epoch": 0.569463378361631, "grad_norm": 3.1813076313495445, "learning_rate": 2.26616006783641e-06, "loss": 0.6824, "step": 18380 }, { "epoch": 0.5696182922295204, "grad_norm": 2.7476366215052335, "learning_rate": 2.2653447263714044e-06, "loss": 0.6167, "step": 18385 }, { "epoch": 0.5697732060974099, "grad_norm": 3.493485700337425, "learning_rate": 2.264529384906399e-06, "loss": 0.6728, "step": 18390 }, { "epoch": 0.5699281199652992, "grad_norm": 2.3760276681977364, "learning_rate": 2.2637140434413934e-06, "loss": 0.615, "step": 18395 }, { "epoch": 0.5700830338331887, "grad_norm": 4.4574472312475875, "learning_rate": 2.262898701976388e-06, "loss": 0.7037, "step": 18400 }, { "epoch": 0.5702379477010782, "grad_norm": 2.4605224577978153, "learning_rate": 2.2620833605113823e-06, "loss": 0.6697, "step": 18405 }, { "epoch": 0.5703928615689676, "grad_norm": 3.09098693605138, "learning_rate": 2.2612680190463767e-06, "loss": 0.7114, "step": 18410 }, { "epoch": 0.5705477754368571, "grad_norm": 2.706712012024685, "learning_rate": 2.260452677581371e-06, "loss": 0.6427, "step": 18415 }, { "epoch": 0.5707026893047465, "grad_norm": 3.5010775762037776, "learning_rate": 2.2596373361163657e-06, "loss": 0.6575, "step": 18420 }, { "epoch": 0.570857603172636, "grad_norm": 2.962879305994361, "learning_rate": 2.25882199465136e-06, "loss": 0.748, "step": 18425 }, { "epoch": 0.5710125170405255, "grad_norm": 3.096627662869357, "learning_rate": 2.2580066531863546e-06, "loss": 0.7002, "step": 18430 }, { "epoch": 0.5711674309084149, "grad_norm": 2.889844958640162, "learning_rate": 2.257191311721349e-06, "loss": 0.7156, "step": 18435 }, { "epoch": 0.5713223447763044, "grad_norm": 2.9597388296040705, "learning_rate": 2.2563759702563435e-06, "loss": 0.6474, "step": 18440 }, { "epoch": 0.5714772586441939, "grad_norm": 3.1984653780680903, "learning_rate": 2.255560628791338e-06, "loss": 0.6193, "step": 18445 }, { "epoch": 0.5716321725120833, "grad_norm": 2.720700843270717, "learning_rate": 2.2547452873263324e-06, "loss": 0.7239, "step": 18450 }, { "epoch": 0.5717870863799728, "grad_norm": 2.536842844290221, "learning_rate": 2.253929945861327e-06, "loss": 0.6743, "step": 18455 }, { "epoch": 0.5719420002478622, "grad_norm": 2.8398114496823785, "learning_rate": 2.2531146043963213e-06, "loss": 0.5684, "step": 18460 }, { "epoch": 0.5720969141157516, "grad_norm": 2.4715851609177526, "learning_rate": 2.2522992629313158e-06, "loss": 0.6785, "step": 18465 }, { "epoch": 0.572251827983641, "grad_norm": 2.734496375627323, "learning_rate": 2.2514839214663102e-06, "loss": 0.5828, "step": 18470 }, { "epoch": 0.5724067418515305, "grad_norm": 2.392219492881473, "learning_rate": 2.2506685800013047e-06, "loss": 0.604, "step": 18475 }, { "epoch": 0.57256165571942, "grad_norm": 3.0115555268114327, "learning_rate": 2.249853238536299e-06, "loss": 0.6853, "step": 18480 }, { "epoch": 0.5727165695873094, "grad_norm": 2.4771394725994975, "learning_rate": 2.2490378970712936e-06, "loss": 0.7282, "step": 18485 }, { "epoch": 0.5728714834551989, "grad_norm": 3.680074084095745, "learning_rate": 2.248222555606288e-06, "loss": 0.6932, "step": 18490 }, { "epoch": 0.5730263973230884, "grad_norm": 2.4661675562265026, "learning_rate": 2.2474072141412825e-06, "loss": 0.6992, "step": 18495 }, { "epoch": 0.5731813111909778, "grad_norm": 2.9618031907879208, "learning_rate": 2.246591872676277e-06, "loss": 0.6773, "step": 18500 }, { "epoch": 0.5733362250588673, "grad_norm": 3.1347976526909926, "learning_rate": 2.2457765312112715e-06, "loss": 0.6541, "step": 18505 }, { "epoch": 0.5734911389267567, "grad_norm": 2.795860279200649, "learning_rate": 2.244961189746266e-06, "loss": 0.5901, "step": 18510 }, { "epoch": 0.5736460527946462, "grad_norm": 2.3614084260348362, "learning_rate": 2.2441458482812604e-06, "loss": 0.6502, "step": 18515 }, { "epoch": 0.5738009666625357, "grad_norm": 2.5699477919374605, "learning_rate": 2.243330506816255e-06, "loss": 0.6821, "step": 18520 }, { "epoch": 0.5739558805304251, "grad_norm": 3.0289193166717037, "learning_rate": 2.2425151653512493e-06, "loss": 0.7417, "step": 18525 }, { "epoch": 0.5741107943983146, "grad_norm": 3.1428414483875446, "learning_rate": 2.2416998238862438e-06, "loss": 0.7702, "step": 18530 }, { "epoch": 0.574265708266204, "grad_norm": 2.267785342677074, "learning_rate": 2.240884482421238e-06, "loss": 0.6512, "step": 18535 }, { "epoch": 0.5744206221340934, "grad_norm": 2.218913131093005, "learning_rate": 2.2400691409562327e-06, "loss": 0.7618, "step": 18540 }, { "epoch": 0.5745755360019829, "grad_norm": 2.4253128612431367, "learning_rate": 2.239253799491227e-06, "loss": 0.6985, "step": 18545 }, { "epoch": 0.5747304498698723, "grad_norm": 2.3749533153059703, "learning_rate": 2.2384384580262216e-06, "loss": 0.677, "step": 18550 }, { "epoch": 0.5748853637377618, "grad_norm": 2.6790192894946205, "learning_rate": 2.237623116561216e-06, "loss": 0.7099, "step": 18555 }, { "epoch": 0.5750402776056512, "grad_norm": 3.119343819850448, "learning_rate": 2.2368077750962105e-06, "loss": 0.6568, "step": 18560 }, { "epoch": 0.5751951914735407, "grad_norm": 2.971491539560694, "learning_rate": 2.235992433631205e-06, "loss": 0.7453, "step": 18565 }, { "epoch": 0.5753501053414302, "grad_norm": 3.192070402686928, "learning_rate": 2.2351770921661994e-06, "loss": 0.6929, "step": 18570 }, { "epoch": 0.5755050192093196, "grad_norm": 2.7041065170394925, "learning_rate": 2.234361750701194e-06, "loss": 0.6532, "step": 18575 }, { "epoch": 0.5756599330772091, "grad_norm": 2.8970311386274417, "learning_rate": 2.2335464092361883e-06, "loss": 0.8241, "step": 18580 }, { "epoch": 0.5758148469450985, "grad_norm": 2.753525429763718, "learning_rate": 2.232731067771183e-06, "loss": 0.6856, "step": 18585 }, { "epoch": 0.575969760812988, "grad_norm": 3.3277878158871643, "learning_rate": 2.2319157263061773e-06, "loss": 0.7407, "step": 18590 }, { "epoch": 0.5761246746808775, "grad_norm": 2.6641621419870467, "learning_rate": 2.2311003848411717e-06, "loss": 0.6902, "step": 18595 }, { "epoch": 0.5762795885487669, "grad_norm": 3.188527288529295, "learning_rate": 2.230285043376166e-06, "loss": 0.6602, "step": 18600 }, { "epoch": 0.5764345024166564, "grad_norm": 2.7040058626412264, "learning_rate": 2.2294697019111606e-06, "loss": 0.7548, "step": 18605 }, { "epoch": 0.5765894162845457, "grad_norm": 3.3146930800787593, "learning_rate": 2.228654360446155e-06, "loss": 0.7194, "step": 18610 }, { "epoch": 0.5767443301524352, "grad_norm": 3.1251996120980565, "learning_rate": 2.2278390189811496e-06, "loss": 0.6456, "step": 18615 }, { "epoch": 0.5768992440203247, "grad_norm": 2.4916414904092092, "learning_rate": 2.227023677516144e-06, "loss": 0.7475, "step": 18620 }, { "epoch": 0.5770541578882141, "grad_norm": 3.6271415967310254, "learning_rate": 2.2262083360511385e-06, "loss": 0.7367, "step": 18625 }, { "epoch": 0.5772090717561036, "grad_norm": 2.671557312755965, "learning_rate": 2.225392994586133e-06, "loss": 0.6733, "step": 18630 }, { "epoch": 0.577363985623993, "grad_norm": 2.9682051032028944, "learning_rate": 2.2245776531211274e-06, "loss": 0.7629, "step": 18635 }, { "epoch": 0.5775188994918825, "grad_norm": 2.4985308939424957, "learning_rate": 2.223762311656122e-06, "loss": 0.6875, "step": 18640 }, { "epoch": 0.577673813359772, "grad_norm": 2.1654884965015375, "learning_rate": 2.2229469701911163e-06, "loss": 0.696, "step": 18645 }, { "epoch": 0.5778287272276614, "grad_norm": 2.96417086407002, "learning_rate": 2.2221316287261108e-06, "loss": 0.7042, "step": 18650 }, { "epoch": 0.5779836410955509, "grad_norm": 2.91698927159859, "learning_rate": 2.2213162872611052e-06, "loss": 0.7148, "step": 18655 }, { "epoch": 0.5781385549634404, "grad_norm": 3.180866373295808, "learning_rate": 2.2205009457960997e-06, "loss": 0.7579, "step": 18660 }, { "epoch": 0.5782934688313298, "grad_norm": 2.2692651416637664, "learning_rate": 2.219685604331094e-06, "loss": 0.6106, "step": 18665 }, { "epoch": 0.5784483826992193, "grad_norm": 2.2914074611254054, "learning_rate": 2.2188702628660886e-06, "loss": 0.7121, "step": 18670 }, { "epoch": 0.5786032965671087, "grad_norm": 2.4140493270409693, "learning_rate": 2.218054921401083e-06, "loss": 0.6147, "step": 18675 }, { "epoch": 0.5787582104349981, "grad_norm": 2.9490094390830084, "learning_rate": 2.2172395799360775e-06, "loss": 0.7279, "step": 18680 }, { "epoch": 0.5789131243028875, "grad_norm": 2.5877558638946367, "learning_rate": 2.216424238471072e-06, "loss": 0.6861, "step": 18685 }, { "epoch": 0.579068038170777, "grad_norm": 2.7050712000309285, "learning_rate": 2.2156088970060664e-06, "loss": 0.752, "step": 18690 }, { "epoch": 0.5792229520386665, "grad_norm": 2.806522441065996, "learning_rate": 2.214793555541061e-06, "loss": 0.774, "step": 18695 }, { "epoch": 0.5793778659065559, "grad_norm": 2.9826444320810257, "learning_rate": 2.2139782140760554e-06, "loss": 0.653, "step": 18700 }, { "epoch": 0.5795327797744454, "grad_norm": 2.460816591176066, "learning_rate": 2.21316287261105e-06, "loss": 0.7168, "step": 18705 }, { "epoch": 0.5796876936423349, "grad_norm": 2.535071693080727, "learning_rate": 2.212347531146044e-06, "loss": 0.6517, "step": 18710 }, { "epoch": 0.5798426075102243, "grad_norm": 3.8429964024597942, "learning_rate": 2.2115321896810383e-06, "loss": 0.6967, "step": 18715 }, { "epoch": 0.5799975213781138, "grad_norm": 2.516708563365469, "learning_rate": 2.2107168482160328e-06, "loss": 0.7326, "step": 18720 }, { "epoch": 0.5801524352460032, "grad_norm": 1.9644458077216533, "learning_rate": 2.2099015067510272e-06, "loss": 0.5603, "step": 18725 }, { "epoch": 0.5803073491138927, "grad_norm": 3.1148757738973964, "learning_rate": 2.209086165286022e-06, "loss": 0.6373, "step": 18730 }, { "epoch": 0.5804622629817822, "grad_norm": 2.631298482925049, "learning_rate": 2.2082708238210166e-06, "loss": 0.7334, "step": 18735 }, { "epoch": 0.5806171768496716, "grad_norm": 2.52591163131851, "learning_rate": 2.207455482356011e-06, "loss": 0.6024, "step": 18740 }, { "epoch": 0.5807720907175611, "grad_norm": 2.5415478778746716, "learning_rate": 2.2066401408910055e-06, "loss": 0.7168, "step": 18745 }, { "epoch": 0.5809270045854504, "grad_norm": 2.3889596342412043, "learning_rate": 2.205824799426e-06, "loss": 0.7044, "step": 18750 }, { "epoch": 0.5810819184533399, "grad_norm": 2.457299735253565, "learning_rate": 2.2050094579609944e-06, "loss": 0.6512, "step": 18755 }, { "epoch": 0.5812368323212294, "grad_norm": 2.526468929446189, "learning_rate": 2.204194116495989e-06, "loss": 0.6581, "step": 18760 }, { "epoch": 0.5813917461891188, "grad_norm": 3.7284829342574617, "learning_rate": 2.2033787750309833e-06, "loss": 0.7158, "step": 18765 }, { "epoch": 0.5815466600570083, "grad_norm": 3.231182409073111, "learning_rate": 2.202563433565978e-06, "loss": 0.6573, "step": 18770 }, { "epoch": 0.5817015739248977, "grad_norm": 2.5255557224725473, "learning_rate": 2.2017480921009722e-06, "loss": 0.6786, "step": 18775 }, { "epoch": 0.5818564877927872, "grad_norm": 2.318866269708049, "learning_rate": 2.2009327506359667e-06, "loss": 0.7206, "step": 18780 }, { "epoch": 0.5820114016606767, "grad_norm": 2.276687272542749, "learning_rate": 2.2001174091709607e-06, "loss": 0.6699, "step": 18785 }, { "epoch": 0.5821663155285661, "grad_norm": 2.5415066977442007, "learning_rate": 2.199302067705955e-06, "loss": 0.6699, "step": 18790 }, { "epoch": 0.5823212293964556, "grad_norm": 2.9231712975270443, "learning_rate": 2.1984867262409497e-06, "loss": 0.7106, "step": 18795 }, { "epoch": 0.582476143264345, "grad_norm": 3.33529536706819, "learning_rate": 2.197671384775944e-06, "loss": 0.6581, "step": 18800 }, { "epoch": 0.5826310571322345, "grad_norm": 2.3367074803798915, "learning_rate": 2.1968560433109386e-06, "loss": 0.6947, "step": 18805 }, { "epoch": 0.582785971000124, "grad_norm": 2.9294002204601384, "learning_rate": 2.196040701845933e-06, "loss": 0.6406, "step": 18810 }, { "epoch": 0.5829408848680134, "grad_norm": 2.4912023275175126, "learning_rate": 2.1952253603809275e-06, "loss": 0.7017, "step": 18815 }, { "epoch": 0.5830957987359029, "grad_norm": 3.30558829717883, "learning_rate": 2.194410018915922e-06, "loss": 0.6624, "step": 18820 }, { "epoch": 0.5832507126037922, "grad_norm": 2.440835271575489, "learning_rate": 2.193594677450917e-06, "loss": 0.6989, "step": 18825 }, { "epoch": 0.5834056264716817, "grad_norm": 2.404429660206303, "learning_rate": 2.1927793359859113e-06, "loss": 0.6933, "step": 18830 }, { "epoch": 0.5835605403395712, "grad_norm": 3.0372742424449832, "learning_rate": 2.1919639945209058e-06, "loss": 0.7054, "step": 18835 }, { "epoch": 0.5837154542074606, "grad_norm": 3.4549503703100557, "learning_rate": 2.1911486530559002e-06, "loss": 0.7054, "step": 18840 }, { "epoch": 0.5838703680753501, "grad_norm": 2.57336536015316, "learning_rate": 2.1903333115908947e-06, "loss": 0.7052, "step": 18845 }, { "epoch": 0.5840252819432395, "grad_norm": 3.664875535984369, "learning_rate": 2.189517970125889e-06, "loss": 0.6356, "step": 18850 }, { "epoch": 0.584180195811129, "grad_norm": 2.2319917429019864, "learning_rate": 2.1887026286608836e-06, "loss": 0.6759, "step": 18855 }, { "epoch": 0.5843351096790185, "grad_norm": 3.5694536198665845, "learning_rate": 2.1878872871958776e-06, "loss": 0.7644, "step": 18860 }, { "epoch": 0.5844900235469079, "grad_norm": 3.0590969011560687, "learning_rate": 2.187071945730872e-06, "loss": 0.6331, "step": 18865 }, { "epoch": 0.5846449374147974, "grad_norm": 2.1468358555947136, "learning_rate": 2.1862566042658665e-06, "loss": 0.6498, "step": 18870 }, { "epoch": 0.5847998512826869, "grad_norm": 2.4370569196759773, "learning_rate": 2.185441262800861e-06, "loss": 0.725, "step": 18875 }, { "epoch": 0.5849547651505763, "grad_norm": 2.744486606742887, "learning_rate": 2.1846259213358555e-06, "loss": 0.6652, "step": 18880 }, { "epoch": 0.5851096790184658, "grad_norm": 2.7478524859970412, "learning_rate": 2.18381057987085e-06, "loss": 0.6117, "step": 18885 }, { "epoch": 0.5852645928863552, "grad_norm": 2.9938894504379268, "learning_rate": 2.1829952384058444e-06, "loss": 0.6989, "step": 18890 }, { "epoch": 0.5854195067542446, "grad_norm": 2.5591064328383637, "learning_rate": 2.182179896940839e-06, "loss": 0.714, "step": 18895 }, { "epoch": 0.585574420622134, "grad_norm": 2.7179364371638677, "learning_rate": 2.1813645554758333e-06, "loss": 0.6137, "step": 18900 }, { "epoch": 0.5857293344900235, "grad_norm": 2.3460435456995214, "learning_rate": 2.1805492140108278e-06, "loss": 0.7006, "step": 18905 }, { "epoch": 0.585884248357913, "grad_norm": 2.369355904897061, "learning_rate": 2.1797338725458222e-06, "loss": 0.687, "step": 18910 }, { "epoch": 0.5860391622258024, "grad_norm": 2.800372131676568, "learning_rate": 2.1789185310808167e-06, "loss": 0.7107, "step": 18915 }, { "epoch": 0.5861940760936919, "grad_norm": 2.5046436617083114, "learning_rate": 2.1781031896158116e-06, "loss": 0.7303, "step": 18920 }, { "epoch": 0.5863489899615814, "grad_norm": 3.0508371743224587, "learning_rate": 2.177287848150806e-06, "loss": 0.6838, "step": 18925 }, { "epoch": 0.5865039038294708, "grad_norm": 2.351714865863493, "learning_rate": 2.1764725066858005e-06, "loss": 0.6936, "step": 18930 }, { "epoch": 0.5866588176973603, "grad_norm": 2.485672095188709, "learning_rate": 2.175657165220795e-06, "loss": 0.6395, "step": 18935 }, { "epoch": 0.5868137315652497, "grad_norm": 3.1314826110109903, "learning_rate": 2.174841823755789e-06, "loss": 0.6424, "step": 18940 }, { "epoch": 0.5869686454331392, "grad_norm": 2.422145055422998, "learning_rate": 2.1740264822907834e-06, "loss": 0.6544, "step": 18945 }, { "epoch": 0.5871235593010287, "grad_norm": 2.4706891496724803, "learning_rate": 2.173211140825778e-06, "loss": 0.6862, "step": 18950 }, { "epoch": 0.5872784731689181, "grad_norm": 3.3593611536086434, "learning_rate": 2.1723957993607724e-06, "loss": 0.6669, "step": 18955 }, { "epoch": 0.5874333870368076, "grad_norm": 2.602111572803373, "learning_rate": 2.171580457895767e-06, "loss": 0.6869, "step": 18960 }, { "epoch": 0.5875883009046969, "grad_norm": 2.6170368724447357, "learning_rate": 2.1707651164307613e-06, "loss": 0.7425, "step": 18965 }, { "epoch": 0.5877432147725864, "grad_norm": 2.2318778849370746, "learning_rate": 2.1699497749657557e-06, "loss": 0.6575, "step": 18970 }, { "epoch": 0.5878981286404759, "grad_norm": 3.4004119018699845, "learning_rate": 2.16913443350075e-06, "loss": 0.7414, "step": 18975 }, { "epoch": 0.5880530425083653, "grad_norm": 3.1199674479100303, "learning_rate": 2.1683190920357446e-06, "loss": 0.6931, "step": 18980 }, { "epoch": 0.5882079563762548, "grad_norm": 2.7580970301563044, "learning_rate": 2.167503750570739e-06, "loss": 0.6321, "step": 18985 }, { "epoch": 0.5883628702441442, "grad_norm": 2.7030156541278503, "learning_rate": 2.1666884091057336e-06, "loss": 0.6666, "step": 18990 }, { "epoch": 0.5885177841120337, "grad_norm": 2.1575776147866397, "learning_rate": 2.165873067640728e-06, "loss": 0.6317, "step": 18995 }, { "epoch": 0.5886726979799232, "grad_norm": 2.1901284722471908, "learning_rate": 2.1650577261757225e-06, "loss": 0.6795, "step": 19000 }, { "epoch": 0.5888276118478126, "grad_norm": 2.1865520638952702, "learning_rate": 2.164242384710717e-06, "loss": 0.662, "step": 19005 }, { "epoch": 0.5889825257157021, "grad_norm": 2.601941933650103, "learning_rate": 2.1634270432457114e-06, "loss": 0.6348, "step": 19010 }, { "epoch": 0.5891374395835915, "grad_norm": 2.1878200546301305, "learning_rate": 2.162611701780706e-06, "loss": 0.6701, "step": 19015 }, { "epoch": 0.589292353451481, "grad_norm": 3.072418531615749, "learning_rate": 2.1617963603157003e-06, "loss": 0.7104, "step": 19020 }, { "epoch": 0.5894472673193705, "grad_norm": 3.2019991236016123, "learning_rate": 2.1609810188506948e-06, "loss": 0.7485, "step": 19025 }, { "epoch": 0.5896021811872599, "grad_norm": 2.4891450047986767, "learning_rate": 2.1601656773856892e-06, "loss": 0.6752, "step": 19030 }, { "epoch": 0.5897570950551493, "grad_norm": 2.2656400005438226, "learning_rate": 2.1593503359206837e-06, "loss": 0.6766, "step": 19035 }, { "epoch": 0.5899120089230387, "grad_norm": 3.3774498897084237, "learning_rate": 2.158534994455678e-06, "loss": 0.7527, "step": 19040 }, { "epoch": 0.5900669227909282, "grad_norm": 2.7215027270914147, "learning_rate": 2.1577196529906726e-06, "loss": 0.612, "step": 19045 }, { "epoch": 0.5902218366588177, "grad_norm": 2.140013579337898, "learning_rate": 2.156904311525667e-06, "loss": 0.7221, "step": 19050 }, { "epoch": 0.5903767505267071, "grad_norm": 2.499150148961044, "learning_rate": 2.1560889700606615e-06, "loss": 0.6807, "step": 19055 }, { "epoch": 0.5905316643945966, "grad_norm": 3.1237802242151256, "learning_rate": 2.155273628595656e-06, "loss": 0.7844, "step": 19060 }, { "epoch": 0.590686578262486, "grad_norm": 2.9616572448247847, "learning_rate": 2.1544582871306505e-06, "loss": 0.6346, "step": 19065 }, { "epoch": 0.5908414921303755, "grad_norm": 4.049982022013961, "learning_rate": 2.153642945665645e-06, "loss": 0.68, "step": 19070 }, { "epoch": 0.590996405998265, "grad_norm": 2.329226768441293, "learning_rate": 2.1528276042006394e-06, "loss": 0.685, "step": 19075 }, { "epoch": 0.5911513198661544, "grad_norm": 2.724367765924062, "learning_rate": 2.152012262735634e-06, "loss": 0.7415, "step": 19080 }, { "epoch": 0.5913062337340439, "grad_norm": 2.898164008696323, "learning_rate": 2.1511969212706283e-06, "loss": 0.6243, "step": 19085 }, { "epoch": 0.5914611476019334, "grad_norm": 2.3292708797771224, "learning_rate": 2.1503815798056228e-06, "loss": 0.7224, "step": 19090 }, { "epoch": 0.5916160614698228, "grad_norm": 2.4127498377762633, "learning_rate": 2.149566238340617e-06, "loss": 0.6794, "step": 19095 }, { "epoch": 0.5917709753377123, "grad_norm": 2.5509008096344403, "learning_rate": 2.1487508968756117e-06, "loss": 0.6474, "step": 19100 }, { "epoch": 0.5919258892056017, "grad_norm": 3.134193977256001, "learning_rate": 2.147935555410606e-06, "loss": 0.6699, "step": 19105 }, { "epoch": 0.5920808030734911, "grad_norm": 2.8868856417507094, "learning_rate": 2.1471202139456006e-06, "loss": 0.7239, "step": 19110 }, { "epoch": 0.5922357169413806, "grad_norm": 2.3611201793695997, "learning_rate": 2.146304872480595e-06, "loss": 0.6829, "step": 19115 }, { "epoch": 0.59239063080927, "grad_norm": 3.115465137406719, "learning_rate": 2.1454895310155895e-06, "loss": 0.6341, "step": 19120 }, { "epoch": 0.5925455446771595, "grad_norm": 2.61561622011835, "learning_rate": 2.144674189550584e-06, "loss": 0.7351, "step": 19125 }, { "epoch": 0.5927004585450489, "grad_norm": 2.511165490388606, "learning_rate": 2.1438588480855784e-06, "loss": 0.696, "step": 19130 }, { "epoch": 0.5928553724129384, "grad_norm": 2.8604773840896174, "learning_rate": 2.143043506620573e-06, "loss": 0.6406, "step": 19135 }, { "epoch": 0.5930102862808279, "grad_norm": 2.308148910684349, "learning_rate": 2.1422281651555673e-06, "loss": 0.6255, "step": 19140 }, { "epoch": 0.5931652001487173, "grad_norm": 2.834480679697188, "learning_rate": 2.141412823690562e-06, "loss": 0.6149, "step": 19145 }, { "epoch": 0.5933201140166068, "grad_norm": 2.4067959669411843, "learning_rate": 2.1405974822255563e-06, "loss": 0.6077, "step": 19150 }, { "epoch": 0.5934750278844962, "grad_norm": 2.4395098587907036, "learning_rate": 2.1397821407605507e-06, "loss": 0.7033, "step": 19155 }, { "epoch": 0.5936299417523857, "grad_norm": 2.5703787089329926, "learning_rate": 2.138966799295545e-06, "loss": 0.6828, "step": 19160 }, { "epoch": 0.5937848556202752, "grad_norm": 2.897850316800759, "learning_rate": 2.1381514578305396e-06, "loss": 0.6371, "step": 19165 }, { "epoch": 0.5939397694881646, "grad_norm": 2.734048792558428, "learning_rate": 2.137336116365534e-06, "loss": 0.7358, "step": 19170 }, { "epoch": 0.5940946833560541, "grad_norm": 2.8729399478474495, "learning_rate": 2.1365207749005286e-06, "loss": 0.7059, "step": 19175 }, { "epoch": 0.5942495972239434, "grad_norm": 3.0360613552912232, "learning_rate": 2.135705433435523e-06, "loss": 0.6704, "step": 19180 }, { "epoch": 0.5944045110918329, "grad_norm": 2.919691635501608, "learning_rate": 2.1348900919705175e-06, "loss": 0.6646, "step": 19185 }, { "epoch": 0.5945594249597224, "grad_norm": 2.738525495128748, "learning_rate": 2.134074750505512e-06, "loss": 0.7414, "step": 19190 }, { "epoch": 0.5947143388276118, "grad_norm": 2.2175317545231237, "learning_rate": 2.1332594090405064e-06, "loss": 0.7489, "step": 19195 }, { "epoch": 0.5948692526955013, "grad_norm": 2.582431347534493, "learning_rate": 2.132444067575501e-06, "loss": 0.7003, "step": 19200 }, { "epoch": 0.5950241665633907, "grad_norm": 2.66400216795283, "learning_rate": 2.1316287261104953e-06, "loss": 0.6784, "step": 19205 }, { "epoch": 0.5951790804312802, "grad_norm": 2.8173930872499575, "learning_rate": 2.1308133846454898e-06, "loss": 0.7048, "step": 19210 }, { "epoch": 0.5953339942991697, "grad_norm": 3.6516842776144287, "learning_rate": 2.1299980431804842e-06, "loss": 0.7123, "step": 19215 }, { "epoch": 0.5954889081670591, "grad_norm": 4.594303884746544, "learning_rate": 2.1291827017154787e-06, "loss": 0.7226, "step": 19220 }, { "epoch": 0.5956438220349486, "grad_norm": 2.5583667477876753, "learning_rate": 2.128367360250473e-06, "loss": 0.6976, "step": 19225 }, { "epoch": 0.595798735902838, "grad_norm": 2.6335998186187175, "learning_rate": 2.1275520187854676e-06, "loss": 0.6615, "step": 19230 }, { "epoch": 0.5959536497707275, "grad_norm": 3.2058064501798245, "learning_rate": 2.126736677320462e-06, "loss": 0.7194, "step": 19235 }, { "epoch": 0.596108563638617, "grad_norm": 2.7798473049645858, "learning_rate": 2.1259213358554565e-06, "loss": 0.7222, "step": 19240 }, { "epoch": 0.5962634775065064, "grad_norm": 2.972509409578911, "learning_rate": 2.125105994390451e-06, "loss": 0.6633, "step": 19245 }, { "epoch": 0.5964183913743958, "grad_norm": 2.4024564219815137, "learning_rate": 2.1242906529254454e-06, "loss": 0.6432, "step": 19250 }, { "epoch": 0.5965733052422852, "grad_norm": 2.2816767446650847, "learning_rate": 2.12347531146044e-06, "loss": 0.6333, "step": 19255 }, { "epoch": 0.5967282191101747, "grad_norm": 2.8107844915321474, "learning_rate": 2.1226599699954344e-06, "loss": 0.6393, "step": 19260 }, { "epoch": 0.5968831329780642, "grad_norm": 2.425973799549895, "learning_rate": 2.121844628530429e-06, "loss": 0.6853, "step": 19265 }, { "epoch": 0.5970380468459536, "grad_norm": 2.69217038404827, "learning_rate": 2.1210292870654233e-06, "loss": 0.6804, "step": 19270 }, { "epoch": 0.5971929607138431, "grad_norm": 2.607356167486677, "learning_rate": 2.1202139456004177e-06, "loss": 0.7307, "step": 19275 }, { "epoch": 0.5973478745817326, "grad_norm": 2.913154850556315, "learning_rate": 2.119398604135412e-06, "loss": 0.6704, "step": 19280 }, { "epoch": 0.597502788449622, "grad_norm": 2.1308949593012962, "learning_rate": 2.1185832626704062e-06, "loss": 0.6764, "step": 19285 }, { "epoch": 0.5976577023175115, "grad_norm": 2.9348277019642666, "learning_rate": 2.1177679212054007e-06, "loss": 0.6698, "step": 19290 }, { "epoch": 0.5978126161854009, "grad_norm": 2.4270652699228195, "learning_rate": 2.1169525797403956e-06, "loss": 0.6482, "step": 19295 }, { "epoch": 0.5979675300532904, "grad_norm": 2.5626509646777613, "learning_rate": 2.11613723827539e-06, "loss": 0.6807, "step": 19300 }, { "epoch": 0.5981224439211799, "grad_norm": 2.588807664590295, "learning_rate": 2.1153218968103845e-06, "loss": 0.7326, "step": 19305 }, { "epoch": 0.5982773577890693, "grad_norm": 3.940783198167326, "learning_rate": 2.114506555345379e-06, "loss": 0.6868, "step": 19310 }, { "epoch": 0.5984322716569588, "grad_norm": 2.776089072945623, "learning_rate": 2.1136912138803734e-06, "loss": 0.7302, "step": 19315 }, { "epoch": 0.5985871855248481, "grad_norm": 2.3427868454463012, "learning_rate": 2.112875872415368e-06, "loss": 0.6934, "step": 19320 }, { "epoch": 0.5987420993927376, "grad_norm": 2.23474586640177, "learning_rate": 2.1120605309503623e-06, "loss": 0.6265, "step": 19325 }, { "epoch": 0.598897013260627, "grad_norm": 2.45421335118253, "learning_rate": 2.1112451894853568e-06, "loss": 0.6513, "step": 19330 }, { "epoch": 0.5990519271285165, "grad_norm": 2.888994300487293, "learning_rate": 2.1104298480203512e-06, "loss": 0.6267, "step": 19335 }, { "epoch": 0.599206840996406, "grad_norm": 2.8327782409421545, "learning_rate": 2.1096145065553457e-06, "loss": 0.7072, "step": 19340 }, { "epoch": 0.5993617548642954, "grad_norm": 3.396253458922749, "learning_rate": 2.10879916509034e-06, "loss": 0.6248, "step": 19345 }, { "epoch": 0.5995166687321849, "grad_norm": 2.9843304362686127, "learning_rate": 2.1079838236253346e-06, "loss": 0.7342, "step": 19350 }, { "epoch": 0.5996715826000744, "grad_norm": 2.6159671086413017, "learning_rate": 2.107168482160329e-06, "loss": 0.6549, "step": 19355 }, { "epoch": 0.5998264964679638, "grad_norm": 2.3457497379250225, "learning_rate": 2.106353140695323e-06, "loss": 0.6031, "step": 19360 }, { "epoch": 0.5999814103358533, "grad_norm": 3.3044588152977123, "learning_rate": 2.1055377992303176e-06, "loss": 0.678, "step": 19365 }, { "epoch": 0.6001363242037427, "grad_norm": 3.4330819835088566, "learning_rate": 2.104722457765312e-06, "loss": 0.7291, "step": 19370 }, { "epoch": 0.6002912380716322, "grad_norm": 2.569715968786479, "learning_rate": 2.1039071163003065e-06, "loss": 0.6436, "step": 19375 }, { "epoch": 0.6004461519395217, "grad_norm": 2.8376849467190723, "learning_rate": 2.103091774835301e-06, "loss": 0.6263, "step": 19380 }, { "epoch": 0.6006010658074111, "grad_norm": 2.8374807047560537, "learning_rate": 2.1022764333702954e-06, "loss": 0.6441, "step": 19385 }, { "epoch": 0.6007559796753005, "grad_norm": 2.352679019069344, "learning_rate": 2.1014610919052903e-06, "loss": 0.6779, "step": 19390 }, { "epoch": 0.6009108935431899, "grad_norm": 2.3957753382897624, "learning_rate": 2.1006457504402848e-06, "loss": 0.6864, "step": 19395 }, { "epoch": 0.6010658074110794, "grad_norm": 2.8775659970021166, "learning_rate": 2.0998304089752792e-06, "loss": 0.7027, "step": 19400 }, { "epoch": 0.6012207212789689, "grad_norm": 2.098175208576315, "learning_rate": 2.0990150675102737e-06, "loss": 0.6284, "step": 19405 }, { "epoch": 0.6013756351468583, "grad_norm": 2.1132334539498654, "learning_rate": 2.098199726045268e-06, "loss": 0.6578, "step": 19410 }, { "epoch": 0.6015305490147478, "grad_norm": 2.3858574370391015, "learning_rate": 2.0973843845802626e-06, "loss": 0.7394, "step": 19415 }, { "epoch": 0.6016854628826372, "grad_norm": 2.6019815266949258, "learning_rate": 2.096569043115257e-06, "loss": 0.7682, "step": 19420 }, { "epoch": 0.6018403767505267, "grad_norm": 3.029110103631117, "learning_rate": 2.0957537016502515e-06, "loss": 0.6628, "step": 19425 }, { "epoch": 0.6019952906184162, "grad_norm": 2.538802273595332, "learning_rate": 2.094938360185246e-06, "loss": 0.6973, "step": 19430 }, { "epoch": 0.6021502044863056, "grad_norm": 2.4006664335717947, "learning_rate": 2.09412301872024e-06, "loss": 0.6555, "step": 19435 }, { "epoch": 0.6023051183541951, "grad_norm": 2.328969669294531, "learning_rate": 2.0933076772552345e-06, "loss": 0.6957, "step": 19440 }, { "epoch": 0.6024600322220846, "grad_norm": 3.112491487277411, "learning_rate": 2.092492335790229e-06, "loss": 0.6498, "step": 19445 }, { "epoch": 0.602614946089974, "grad_norm": 3.2331145627480105, "learning_rate": 2.0916769943252234e-06, "loss": 0.6887, "step": 19450 }, { "epoch": 0.6027698599578635, "grad_norm": 2.4805186825325323, "learning_rate": 2.090861652860218e-06, "loss": 0.6777, "step": 19455 }, { "epoch": 0.6029247738257529, "grad_norm": 2.6037418285158846, "learning_rate": 2.0900463113952123e-06, "loss": 0.6701, "step": 19460 }, { "epoch": 0.6030796876936423, "grad_norm": 2.9237019290230633, "learning_rate": 2.0892309699302068e-06, "loss": 0.7032, "step": 19465 }, { "epoch": 0.6032346015615317, "grad_norm": 2.7231193983010353, "learning_rate": 2.0884156284652012e-06, "loss": 0.6802, "step": 19470 }, { "epoch": 0.6033895154294212, "grad_norm": 2.6603051151099053, "learning_rate": 2.0876002870001957e-06, "loss": 0.727, "step": 19475 }, { "epoch": 0.6035444292973107, "grad_norm": 2.1623880473808685, "learning_rate": 2.08678494553519e-06, "loss": 0.6704, "step": 19480 }, { "epoch": 0.6036993431652001, "grad_norm": 3.49324427139112, "learning_rate": 2.085969604070185e-06, "loss": 0.68, "step": 19485 }, { "epoch": 0.6038542570330896, "grad_norm": 2.7636180709775555, "learning_rate": 2.0851542626051795e-06, "loss": 0.6789, "step": 19490 }, { "epoch": 0.604009170900979, "grad_norm": 2.3561051178297068, "learning_rate": 2.084338921140174e-06, "loss": 0.6598, "step": 19495 }, { "epoch": 0.6041640847688685, "grad_norm": 2.6817804965669696, "learning_rate": 2.0835235796751684e-06, "loss": 0.6273, "step": 19500 }, { "epoch": 0.604318998636758, "grad_norm": 2.4172690020717242, "learning_rate": 2.082708238210163e-06, "loss": 0.6932, "step": 19505 }, { "epoch": 0.6044739125046474, "grad_norm": 2.599529277613198, "learning_rate": 2.081892896745157e-06, "loss": 0.6478, "step": 19510 }, { "epoch": 0.6046288263725369, "grad_norm": 2.43622075126927, "learning_rate": 2.0810775552801514e-06, "loss": 0.6942, "step": 19515 }, { "epoch": 0.6047837402404264, "grad_norm": 2.8770682650617445, "learning_rate": 2.080262213815146e-06, "loss": 0.7192, "step": 19520 }, { "epoch": 0.6049386541083158, "grad_norm": 2.9868578089215205, "learning_rate": 2.0794468723501403e-06, "loss": 0.6402, "step": 19525 }, { "epoch": 0.6050935679762053, "grad_norm": 2.749175772558714, "learning_rate": 2.0786315308851347e-06, "loss": 0.7182, "step": 19530 }, { "epoch": 0.6052484818440946, "grad_norm": 2.863036099917348, "learning_rate": 2.077816189420129e-06, "loss": 0.6633, "step": 19535 }, { "epoch": 0.6054033957119841, "grad_norm": 3.14205332823731, "learning_rate": 2.0770008479551236e-06, "loss": 0.6931, "step": 19540 }, { "epoch": 0.6055583095798736, "grad_norm": 3.1121252644926725, "learning_rate": 2.076185506490118e-06, "loss": 0.6611, "step": 19545 }, { "epoch": 0.605713223447763, "grad_norm": 2.654884764255307, "learning_rate": 2.0753701650251126e-06, "loss": 0.5719, "step": 19550 }, { "epoch": 0.6058681373156525, "grad_norm": 2.8161110732349655, "learning_rate": 2.074554823560107e-06, "loss": 0.6833, "step": 19555 }, { "epoch": 0.6060230511835419, "grad_norm": 3.5229973348198094, "learning_rate": 2.0737394820951015e-06, "loss": 0.7027, "step": 19560 }, { "epoch": 0.6061779650514314, "grad_norm": 2.015142747470069, "learning_rate": 2.072924140630096e-06, "loss": 0.6142, "step": 19565 }, { "epoch": 0.6063328789193209, "grad_norm": 2.45896668216393, "learning_rate": 2.0721087991650904e-06, "loss": 0.7009, "step": 19570 }, { "epoch": 0.6064877927872103, "grad_norm": 3.0516226812634324, "learning_rate": 2.071293457700085e-06, "loss": 0.7615, "step": 19575 }, { "epoch": 0.6066427066550998, "grad_norm": 4.0253459349713046, "learning_rate": 2.0704781162350797e-06, "loss": 0.5917, "step": 19580 }, { "epoch": 0.6067976205229892, "grad_norm": 3.4367867657309477, "learning_rate": 2.0696627747700738e-06, "loss": 0.6231, "step": 19585 }, { "epoch": 0.6069525343908787, "grad_norm": 2.7932266382153945, "learning_rate": 2.0688474333050682e-06, "loss": 0.705, "step": 19590 }, { "epoch": 0.6071074482587682, "grad_norm": 2.6670318187707767, "learning_rate": 2.0680320918400627e-06, "loss": 0.6866, "step": 19595 }, { "epoch": 0.6072623621266576, "grad_norm": 3.356071300519811, "learning_rate": 2.067216750375057e-06, "loss": 0.7053, "step": 19600 }, { "epoch": 0.607417275994547, "grad_norm": 2.8808676914738185, "learning_rate": 2.0664014089100516e-06, "loss": 0.6656, "step": 19605 }, { "epoch": 0.6075721898624364, "grad_norm": 6.694031887411088, "learning_rate": 2.065586067445046e-06, "loss": 0.6779, "step": 19610 }, { "epoch": 0.6077271037303259, "grad_norm": 2.682980112376493, "learning_rate": 2.0647707259800405e-06, "loss": 0.6512, "step": 19615 }, { "epoch": 0.6078820175982154, "grad_norm": 3.2546852963711217, "learning_rate": 2.063955384515035e-06, "loss": 0.6788, "step": 19620 }, { "epoch": 0.6080369314661048, "grad_norm": 2.4441186838492093, "learning_rate": 2.0631400430500295e-06, "loss": 0.6165, "step": 19625 }, { "epoch": 0.6081918453339943, "grad_norm": 2.575707322096113, "learning_rate": 2.062324701585024e-06, "loss": 0.668, "step": 19630 }, { "epoch": 0.6083467592018837, "grad_norm": 2.357832021756158, "learning_rate": 2.0615093601200184e-06, "loss": 0.6467, "step": 19635 }, { "epoch": 0.6085016730697732, "grad_norm": 3.3401793778152253, "learning_rate": 2.060694018655013e-06, "loss": 0.6296, "step": 19640 }, { "epoch": 0.6086565869376627, "grad_norm": 3.5551368128048275, "learning_rate": 2.0598786771900073e-06, "loss": 0.6574, "step": 19645 }, { "epoch": 0.6088115008055521, "grad_norm": 5.64320056315181, "learning_rate": 2.0590633357250017e-06, "loss": 0.698, "step": 19650 }, { "epoch": 0.6089664146734416, "grad_norm": 2.98703188808185, "learning_rate": 2.058247994259996e-06, "loss": 0.7016, "step": 19655 }, { "epoch": 0.609121328541331, "grad_norm": 2.8247924482428157, "learning_rate": 2.0574326527949907e-06, "loss": 0.6434, "step": 19660 }, { "epoch": 0.6092762424092205, "grad_norm": 2.925280302634056, "learning_rate": 2.056617311329985e-06, "loss": 0.6894, "step": 19665 }, { "epoch": 0.60943115627711, "grad_norm": 2.7696241800074595, "learning_rate": 2.0558019698649796e-06, "loss": 0.6632, "step": 19670 }, { "epoch": 0.6095860701449993, "grad_norm": 2.5245826588135585, "learning_rate": 2.054986628399974e-06, "loss": 0.7067, "step": 19675 }, { "epoch": 0.6097409840128888, "grad_norm": 2.4392400224374056, "learning_rate": 2.0541712869349685e-06, "loss": 0.6595, "step": 19680 }, { "epoch": 0.6098958978807782, "grad_norm": 3.589883123964701, "learning_rate": 2.053355945469963e-06, "loss": 0.689, "step": 19685 }, { "epoch": 0.6100508117486677, "grad_norm": 2.529675230628082, "learning_rate": 2.0525406040049574e-06, "loss": 0.7671, "step": 19690 }, { "epoch": 0.6102057256165572, "grad_norm": 2.46128937761634, "learning_rate": 2.051725262539952e-06, "loss": 0.6367, "step": 19695 }, { "epoch": 0.6103606394844466, "grad_norm": 2.618339710637137, "learning_rate": 2.0509099210749463e-06, "loss": 0.6767, "step": 19700 }, { "epoch": 0.6105155533523361, "grad_norm": 2.640074091467605, "learning_rate": 2.050094579609941e-06, "loss": 0.7466, "step": 19705 }, { "epoch": 0.6106704672202256, "grad_norm": 2.5286455723956704, "learning_rate": 2.0492792381449353e-06, "loss": 0.7067, "step": 19710 }, { "epoch": 0.610825381088115, "grad_norm": 2.9648333387720225, "learning_rate": 2.0484638966799297e-06, "loss": 0.6548, "step": 19715 }, { "epoch": 0.6109802949560045, "grad_norm": 3.5137572369139285, "learning_rate": 2.047648555214924e-06, "loss": 0.7628, "step": 19720 }, { "epoch": 0.6111352088238939, "grad_norm": 2.1980716932975555, "learning_rate": 2.0468332137499186e-06, "loss": 0.6961, "step": 19725 }, { "epoch": 0.6112901226917834, "grad_norm": 3.45265592344536, "learning_rate": 2.046017872284913e-06, "loss": 0.6812, "step": 19730 }, { "epoch": 0.6114450365596729, "grad_norm": 2.499244158474081, "learning_rate": 2.0452025308199076e-06, "loss": 0.6351, "step": 19735 }, { "epoch": 0.6115999504275623, "grad_norm": 2.63630728912007, "learning_rate": 2.044387189354902e-06, "loss": 0.7035, "step": 19740 }, { "epoch": 0.6117548642954518, "grad_norm": 3.0249899082056593, "learning_rate": 2.0435718478898965e-06, "loss": 0.6354, "step": 19745 }, { "epoch": 0.6119097781633411, "grad_norm": 2.5812940489957787, "learning_rate": 2.042756506424891e-06, "loss": 0.7071, "step": 19750 }, { "epoch": 0.6120646920312306, "grad_norm": 2.680411422130089, "learning_rate": 2.0419411649598854e-06, "loss": 0.7487, "step": 19755 }, { "epoch": 0.6122196058991201, "grad_norm": 2.418875196052468, "learning_rate": 2.04112582349488e-06, "loss": 0.618, "step": 19760 }, { "epoch": 0.6123745197670095, "grad_norm": 3.462234775598949, "learning_rate": 2.0403104820298743e-06, "loss": 0.7186, "step": 19765 }, { "epoch": 0.612529433634899, "grad_norm": 2.960394216035935, "learning_rate": 2.0394951405648688e-06, "loss": 0.686, "step": 19770 }, { "epoch": 0.6126843475027884, "grad_norm": 2.259289825898902, "learning_rate": 2.0386797990998632e-06, "loss": 0.7252, "step": 19775 }, { "epoch": 0.6128392613706779, "grad_norm": 3.808855350390732, "learning_rate": 2.0378644576348577e-06, "loss": 0.7035, "step": 19780 }, { "epoch": 0.6129941752385674, "grad_norm": 2.4852532106726435, "learning_rate": 2.037049116169852e-06, "loss": 0.6752, "step": 19785 }, { "epoch": 0.6131490891064568, "grad_norm": 2.305314535053489, "learning_rate": 2.0362337747048466e-06, "loss": 0.75, "step": 19790 }, { "epoch": 0.6133040029743463, "grad_norm": 2.6112883709186003, "learning_rate": 2.035418433239841e-06, "loss": 0.6271, "step": 19795 }, { "epoch": 0.6134589168422357, "grad_norm": 2.4679214773081264, "learning_rate": 2.0346030917748355e-06, "loss": 0.6666, "step": 19800 }, { "epoch": 0.6136138307101252, "grad_norm": 3.435076697846408, "learning_rate": 2.03378775030983e-06, "loss": 0.6827, "step": 19805 }, { "epoch": 0.6137687445780147, "grad_norm": 3.1500478271164067, "learning_rate": 2.0329724088448244e-06, "loss": 0.6815, "step": 19810 }, { "epoch": 0.6139236584459041, "grad_norm": 3.1991002361736562, "learning_rate": 2.032157067379819e-06, "loss": 0.6629, "step": 19815 }, { "epoch": 0.6140785723137935, "grad_norm": 3.58133608488886, "learning_rate": 2.0313417259148134e-06, "loss": 0.6802, "step": 19820 }, { "epoch": 0.6142334861816829, "grad_norm": 2.55393362908436, "learning_rate": 2.030526384449808e-06, "loss": 0.7121, "step": 19825 }, { "epoch": 0.6143884000495724, "grad_norm": 2.7941959607620275, "learning_rate": 2.0297110429848023e-06, "loss": 0.7321, "step": 19830 }, { "epoch": 0.6145433139174619, "grad_norm": 2.782916580519968, "learning_rate": 2.0288957015197967e-06, "loss": 0.6411, "step": 19835 }, { "epoch": 0.6146982277853513, "grad_norm": 1.7661968054092712, "learning_rate": 2.028080360054791e-06, "loss": 0.6238, "step": 19840 }, { "epoch": 0.6148531416532408, "grad_norm": 2.824512761908852, "learning_rate": 2.0272650185897857e-06, "loss": 0.7014, "step": 19845 }, { "epoch": 0.6150080555211302, "grad_norm": 2.4805870971874704, "learning_rate": 2.02644967712478e-06, "loss": 0.7134, "step": 19850 }, { "epoch": 0.6151629693890197, "grad_norm": 4.06139025060895, "learning_rate": 2.025634335659774e-06, "loss": 0.6226, "step": 19855 }, { "epoch": 0.6153178832569092, "grad_norm": 2.797653462250315, "learning_rate": 2.024818994194769e-06, "loss": 0.7082, "step": 19860 }, { "epoch": 0.6154727971247986, "grad_norm": 2.9740248997417815, "learning_rate": 2.0240036527297635e-06, "loss": 0.7272, "step": 19865 }, { "epoch": 0.6156277109926881, "grad_norm": 2.202897983076995, "learning_rate": 2.023188311264758e-06, "loss": 0.6254, "step": 19870 }, { "epoch": 0.6157826248605776, "grad_norm": 3.433512719073647, "learning_rate": 2.0223729697997524e-06, "loss": 0.6981, "step": 19875 }, { "epoch": 0.615937538728467, "grad_norm": 2.848182562290374, "learning_rate": 2.021557628334747e-06, "loss": 0.8071, "step": 19880 }, { "epoch": 0.6160924525963565, "grad_norm": 2.587713481967843, "learning_rate": 2.0207422868697413e-06, "loss": 0.6815, "step": 19885 }, { "epoch": 0.6162473664642458, "grad_norm": 2.805051531796075, "learning_rate": 2.0199269454047358e-06, "loss": 0.685, "step": 19890 }, { "epoch": 0.6164022803321353, "grad_norm": 2.1814253922585283, "learning_rate": 2.0191116039397302e-06, "loss": 0.6978, "step": 19895 }, { "epoch": 0.6165571942000247, "grad_norm": 2.427552961649913, "learning_rate": 2.0182962624747247e-06, "loss": 0.5865, "step": 19900 }, { "epoch": 0.6167121080679142, "grad_norm": 3.626677191632376, "learning_rate": 2.017480921009719e-06, "loss": 0.7391, "step": 19905 }, { "epoch": 0.6168670219358037, "grad_norm": 2.653500564584695, "learning_rate": 2.0166655795447136e-06, "loss": 0.6801, "step": 19910 }, { "epoch": 0.6170219358036931, "grad_norm": 2.381908409082974, "learning_rate": 2.015850238079708e-06, "loss": 0.6644, "step": 19915 }, { "epoch": 0.6171768496715826, "grad_norm": 2.4015478563095667, "learning_rate": 2.0150348966147025e-06, "loss": 0.6975, "step": 19920 }, { "epoch": 0.6173317635394721, "grad_norm": 2.6452219479604655, "learning_rate": 2.014219555149697e-06, "loss": 0.69, "step": 19925 }, { "epoch": 0.6174866774073615, "grad_norm": 2.739323089959695, "learning_rate": 2.013404213684691e-06, "loss": 0.7119, "step": 19930 }, { "epoch": 0.617641591275251, "grad_norm": 2.3766312312708284, "learning_rate": 2.0125888722196855e-06, "loss": 0.7341, "step": 19935 }, { "epoch": 0.6177965051431404, "grad_norm": 2.695357373504293, "learning_rate": 2.01177353075468e-06, "loss": 0.6961, "step": 19940 }, { "epoch": 0.6179514190110299, "grad_norm": 2.8471071612401424, "learning_rate": 2.0109581892896744e-06, "loss": 0.648, "step": 19945 }, { "epoch": 0.6181063328789194, "grad_norm": 2.5341375903882244, "learning_rate": 2.010142847824669e-06, "loss": 0.6372, "step": 19950 }, { "epoch": 0.6182612467468088, "grad_norm": 3.6265941461858198, "learning_rate": 2.0093275063596638e-06, "loss": 0.6852, "step": 19955 }, { "epoch": 0.6184161606146982, "grad_norm": 3.3846347918883675, "learning_rate": 2.0085121648946582e-06, "loss": 0.7077, "step": 19960 }, { "epoch": 0.6185710744825876, "grad_norm": 3.2682784372344837, "learning_rate": 2.0076968234296527e-06, "loss": 0.7491, "step": 19965 }, { "epoch": 0.6187259883504771, "grad_norm": 2.547088751838461, "learning_rate": 2.006881481964647e-06, "loss": 0.7037, "step": 19970 }, { "epoch": 0.6188809022183666, "grad_norm": 2.8066627787928957, "learning_rate": 2.0060661404996416e-06, "loss": 0.7241, "step": 19975 }, { "epoch": 0.619035816086256, "grad_norm": 3.0198454279430362, "learning_rate": 2.005250799034636e-06, "loss": 0.7544, "step": 19980 }, { "epoch": 0.6191907299541455, "grad_norm": 5.041569478651037, "learning_rate": 2.0044354575696305e-06, "loss": 0.7223, "step": 19985 }, { "epoch": 0.6193456438220349, "grad_norm": 2.478837249358492, "learning_rate": 2.003620116104625e-06, "loss": 0.685, "step": 19990 }, { "epoch": 0.6195005576899244, "grad_norm": 2.861039423871038, "learning_rate": 2.0028047746396194e-06, "loss": 0.6088, "step": 19995 }, { "epoch": 0.6196554715578139, "grad_norm": 2.5102074578023954, "learning_rate": 2.001989433174614e-06, "loss": 0.635, "step": 20000 }, { "epoch": 0.6198103854257033, "grad_norm": 3.2759495187583134, "learning_rate": 2.0011740917096083e-06, "loss": 0.6355, "step": 20005 }, { "epoch": 0.6199652992935928, "grad_norm": 3.5741329436520304, "learning_rate": 2.0003587502446024e-06, "loss": 0.7191, "step": 20010 }, { "epoch": 0.6201202131614822, "grad_norm": 2.6965001193005427, "learning_rate": 1.999543408779597e-06, "loss": 0.7223, "step": 20015 }, { "epoch": 0.6202751270293717, "grad_norm": 2.7567209544787534, "learning_rate": 1.9987280673145913e-06, "loss": 0.6793, "step": 20020 }, { "epoch": 0.6204300408972612, "grad_norm": 5.117253169528067, "learning_rate": 1.9979127258495858e-06, "loss": 0.7009, "step": 20025 }, { "epoch": 0.6205849547651506, "grad_norm": 2.3474582064764316, "learning_rate": 1.9970973843845802e-06, "loss": 0.6643, "step": 20030 }, { "epoch": 0.62073986863304, "grad_norm": 3.0079035642603937, "learning_rate": 1.9962820429195747e-06, "loss": 0.7193, "step": 20035 }, { "epoch": 0.6208947825009294, "grad_norm": 2.9700919862618456, "learning_rate": 1.995466701454569e-06, "loss": 0.667, "step": 20040 }, { "epoch": 0.6210496963688189, "grad_norm": 2.628659646997819, "learning_rate": 1.9946513599895636e-06, "loss": 0.6806, "step": 20045 }, { "epoch": 0.6212046102367084, "grad_norm": 2.551287683415146, "learning_rate": 1.9938360185245585e-06, "loss": 0.6556, "step": 20050 }, { "epoch": 0.6213595241045978, "grad_norm": 4.381657433346019, "learning_rate": 1.993020677059553e-06, "loss": 0.6533, "step": 20055 }, { "epoch": 0.6215144379724873, "grad_norm": 2.260129258248574, "learning_rate": 1.9922053355945474e-06, "loss": 0.6506, "step": 20060 }, { "epoch": 0.6216693518403767, "grad_norm": 2.2284320642855957, "learning_rate": 1.991389994129542e-06, "loss": 0.625, "step": 20065 }, { "epoch": 0.6218242657082662, "grad_norm": 2.5117849815356625, "learning_rate": 1.9905746526645363e-06, "loss": 0.6953, "step": 20070 }, { "epoch": 0.6219791795761557, "grad_norm": 2.6483748152003597, "learning_rate": 1.9897593111995308e-06, "loss": 0.7024, "step": 20075 }, { "epoch": 0.6221340934440451, "grad_norm": 2.3273336914417526, "learning_rate": 1.9889439697345252e-06, "loss": 0.6338, "step": 20080 }, { "epoch": 0.6222890073119346, "grad_norm": 2.4284734619633577, "learning_rate": 1.9881286282695193e-06, "loss": 0.6896, "step": 20085 }, { "epoch": 0.6224439211798241, "grad_norm": 2.7084452161439563, "learning_rate": 1.9873132868045137e-06, "loss": 0.7041, "step": 20090 }, { "epoch": 0.6225988350477135, "grad_norm": 2.7540950896402507, "learning_rate": 1.986497945339508e-06, "loss": 0.7497, "step": 20095 }, { "epoch": 0.622753748915603, "grad_norm": 2.3431374624154038, "learning_rate": 1.9856826038745026e-06, "loss": 0.6242, "step": 20100 }, { "epoch": 0.6229086627834923, "grad_norm": 3.1573087768337813, "learning_rate": 1.984867262409497e-06, "loss": 0.7055, "step": 20105 }, { "epoch": 0.6230635766513818, "grad_norm": 2.977277084670943, "learning_rate": 1.9840519209444916e-06, "loss": 0.709, "step": 20110 }, { "epoch": 0.6232184905192713, "grad_norm": 2.3660112626522087, "learning_rate": 1.983236579479486e-06, "loss": 0.6028, "step": 20115 }, { "epoch": 0.6233734043871607, "grad_norm": 2.514171730930001, "learning_rate": 1.9824212380144805e-06, "loss": 0.6701, "step": 20120 }, { "epoch": 0.6235283182550502, "grad_norm": 2.541049560265193, "learning_rate": 1.981605896549475e-06, "loss": 0.6127, "step": 20125 }, { "epoch": 0.6236832321229396, "grad_norm": 2.791694737847188, "learning_rate": 1.9807905550844694e-06, "loss": 0.6407, "step": 20130 }, { "epoch": 0.6238381459908291, "grad_norm": 2.1962795886866777, "learning_rate": 1.979975213619464e-06, "loss": 0.6823, "step": 20135 }, { "epoch": 0.6239930598587186, "grad_norm": 2.8620367407866585, "learning_rate": 1.9791598721544583e-06, "loss": 0.7149, "step": 20140 }, { "epoch": 0.624147973726608, "grad_norm": 2.882408547101549, "learning_rate": 1.978344530689453e-06, "loss": 0.5815, "step": 20145 }, { "epoch": 0.6243028875944975, "grad_norm": 2.8249067882365417, "learning_rate": 1.9775291892244477e-06, "loss": 0.6807, "step": 20150 }, { "epoch": 0.6244578014623869, "grad_norm": 2.98223813048277, "learning_rate": 1.976713847759442e-06, "loss": 0.7035, "step": 20155 }, { "epoch": 0.6246127153302764, "grad_norm": 2.8367015615144124, "learning_rate": 1.975898506294436e-06, "loss": 0.6574, "step": 20160 }, { "epoch": 0.6247676291981659, "grad_norm": 2.7235474614437725, "learning_rate": 1.9750831648294306e-06, "loss": 0.7159, "step": 20165 }, { "epoch": 0.6249225430660553, "grad_norm": 2.606740162607963, "learning_rate": 1.974267823364425e-06, "loss": 0.6584, "step": 20170 }, { "epoch": 0.6250774569339447, "grad_norm": 3.093408733110295, "learning_rate": 1.9734524818994195e-06, "loss": 0.6639, "step": 20175 }, { "epoch": 0.6252323708018341, "grad_norm": 2.6810279132914046, "learning_rate": 1.972637140434414e-06, "loss": 0.6033, "step": 20180 }, { "epoch": 0.6253872846697236, "grad_norm": 2.913444523835083, "learning_rate": 1.9718217989694085e-06, "loss": 0.7599, "step": 20185 }, { "epoch": 0.6255421985376131, "grad_norm": 2.832423647098813, "learning_rate": 1.971006457504403e-06, "loss": 0.7007, "step": 20190 }, { "epoch": 0.6256971124055025, "grad_norm": 2.529456889404142, "learning_rate": 1.9701911160393974e-06, "loss": 0.7401, "step": 20195 }, { "epoch": 0.625852026273392, "grad_norm": 3.3807857605682163, "learning_rate": 1.969375774574392e-06, "loss": 0.6893, "step": 20200 }, { "epoch": 0.6260069401412814, "grad_norm": 4.226089127136974, "learning_rate": 1.9685604331093863e-06, "loss": 0.7345, "step": 20205 }, { "epoch": 0.6261618540091709, "grad_norm": 2.5728633939205694, "learning_rate": 1.9677450916443807e-06, "loss": 0.6842, "step": 20210 }, { "epoch": 0.6263167678770604, "grad_norm": 2.8508986494095594, "learning_rate": 1.966929750179375e-06, "loss": 0.7099, "step": 20215 }, { "epoch": 0.6264716817449498, "grad_norm": 4.123327620798467, "learning_rate": 1.9661144087143697e-06, "loss": 0.7735, "step": 20220 }, { "epoch": 0.6266265956128393, "grad_norm": 2.806089460942558, "learning_rate": 1.965299067249364e-06, "loss": 0.6798, "step": 20225 }, { "epoch": 0.6267815094807287, "grad_norm": 2.1931945984559986, "learning_rate": 1.9644837257843586e-06, "loss": 0.7511, "step": 20230 }, { "epoch": 0.6269364233486182, "grad_norm": 2.695508840910795, "learning_rate": 1.963668384319353e-06, "loss": 0.5803, "step": 20235 }, { "epoch": 0.6270913372165077, "grad_norm": 2.6131632305669457, "learning_rate": 1.9628530428543475e-06, "loss": 0.5514, "step": 20240 }, { "epoch": 0.627246251084397, "grad_norm": 2.5801988124665627, "learning_rate": 1.962037701389342e-06, "loss": 0.6769, "step": 20245 }, { "epoch": 0.6274011649522865, "grad_norm": 3.242073476173872, "learning_rate": 1.9612223599243364e-06, "loss": 0.6511, "step": 20250 }, { "epoch": 0.6275560788201759, "grad_norm": 2.7509946562607728, "learning_rate": 1.960407018459331e-06, "loss": 0.6669, "step": 20255 }, { "epoch": 0.6277109926880654, "grad_norm": 2.6651535384015568, "learning_rate": 1.9595916769943253e-06, "loss": 0.6625, "step": 20260 }, { "epoch": 0.6278659065559549, "grad_norm": 2.6767417896829246, "learning_rate": 1.95877633552932e-06, "loss": 0.6903, "step": 20265 }, { "epoch": 0.6280208204238443, "grad_norm": 2.4961771871774445, "learning_rate": 1.9579609940643143e-06, "loss": 0.6682, "step": 20270 }, { "epoch": 0.6281757342917338, "grad_norm": 2.8736787650951716, "learning_rate": 1.9571456525993087e-06, "loss": 0.6529, "step": 20275 }, { "epoch": 0.6283306481596233, "grad_norm": 2.7065084418905165, "learning_rate": 1.956330311134303e-06, "loss": 0.6815, "step": 20280 }, { "epoch": 0.6284855620275127, "grad_norm": 2.736924132142187, "learning_rate": 1.9555149696692976e-06, "loss": 0.6862, "step": 20285 }, { "epoch": 0.6286404758954022, "grad_norm": 2.4189908356029846, "learning_rate": 1.954699628204292e-06, "loss": 0.6136, "step": 20290 }, { "epoch": 0.6287953897632916, "grad_norm": 2.992855528154387, "learning_rate": 1.9538842867392866e-06, "loss": 0.7114, "step": 20295 }, { "epoch": 0.6289503036311811, "grad_norm": 2.863188270551665, "learning_rate": 1.953068945274281e-06, "loss": 0.6978, "step": 20300 }, { "epoch": 0.6291052174990706, "grad_norm": 4.014004421698518, "learning_rate": 1.9522536038092755e-06, "loss": 0.7017, "step": 20305 }, { "epoch": 0.62926013136696, "grad_norm": 3.3727448976732637, "learning_rate": 1.95143826234427e-06, "loss": 0.6549, "step": 20310 }, { "epoch": 0.6294150452348494, "grad_norm": 4.037476039247873, "learning_rate": 1.9506229208792644e-06, "loss": 0.7107, "step": 20315 }, { "epoch": 0.6295699591027388, "grad_norm": 2.3605537305029887, "learning_rate": 1.949807579414259e-06, "loss": 0.7657, "step": 20320 }, { "epoch": 0.6297248729706283, "grad_norm": 3.259381145043484, "learning_rate": 1.9489922379492533e-06, "loss": 0.6571, "step": 20325 }, { "epoch": 0.6298797868385178, "grad_norm": 3.0215194832996968, "learning_rate": 1.9481768964842478e-06, "loss": 0.7482, "step": 20330 }, { "epoch": 0.6300347007064072, "grad_norm": 3.223016135983444, "learning_rate": 1.9473615550192422e-06, "loss": 0.5862, "step": 20335 }, { "epoch": 0.6301896145742967, "grad_norm": 3.5325298392751265, "learning_rate": 1.9465462135542367e-06, "loss": 0.7453, "step": 20340 }, { "epoch": 0.6303445284421861, "grad_norm": 3.9220736726109626, "learning_rate": 1.945730872089231e-06, "loss": 0.7263, "step": 20345 }, { "epoch": 0.6304994423100756, "grad_norm": 2.885141837607181, "learning_rate": 1.9449155306242256e-06, "loss": 0.6505, "step": 20350 }, { "epoch": 0.6306543561779651, "grad_norm": 2.798047351746353, "learning_rate": 1.94410018915922e-06, "loss": 0.6573, "step": 20355 }, { "epoch": 0.6308092700458545, "grad_norm": 2.544788710815215, "learning_rate": 1.9432848476942145e-06, "loss": 0.7203, "step": 20360 }, { "epoch": 0.630964183913744, "grad_norm": 3.2675821710404245, "learning_rate": 1.942469506229209e-06, "loss": 0.6801, "step": 20365 }, { "epoch": 0.6311190977816334, "grad_norm": 2.6443367232058828, "learning_rate": 1.9416541647642034e-06, "loss": 0.6499, "step": 20370 }, { "epoch": 0.6312740116495229, "grad_norm": 2.651728848773795, "learning_rate": 1.940838823299198e-06, "loss": 0.6478, "step": 20375 }, { "epoch": 0.6314289255174124, "grad_norm": 3.705590309006627, "learning_rate": 1.9400234818341924e-06, "loss": 0.6927, "step": 20380 }, { "epoch": 0.6315838393853018, "grad_norm": 2.6095476915341456, "learning_rate": 1.939208140369187e-06, "loss": 0.6931, "step": 20385 }, { "epoch": 0.6317387532531912, "grad_norm": 2.979412511088387, "learning_rate": 1.9383927989041813e-06, "loss": 0.6606, "step": 20390 }, { "epoch": 0.6318936671210806, "grad_norm": 2.5349038126852994, "learning_rate": 1.9375774574391757e-06, "loss": 0.7077, "step": 20395 }, { "epoch": 0.6320485809889701, "grad_norm": 2.6620048274891444, "learning_rate": 1.93676211597417e-06, "loss": 0.6953, "step": 20400 }, { "epoch": 0.6322034948568596, "grad_norm": 3.2350016645010236, "learning_rate": 1.9359467745091647e-06, "loss": 0.7455, "step": 20405 }, { "epoch": 0.632358408724749, "grad_norm": 2.1940191275846352, "learning_rate": 1.935131433044159e-06, "loss": 0.6825, "step": 20410 }, { "epoch": 0.6325133225926385, "grad_norm": 3.6655230055060795, "learning_rate": 1.9343160915791536e-06, "loss": 0.6379, "step": 20415 }, { "epoch": 0.6326682364605279, "grad_norm": 5.171721473298671, "learning_rate": 1.933500750114148e-06, "loss": 0.7143, "step": 20420 }, { "epoch": 0.6328231503284174, "grad_norm": 2.8388443181753966, "learning_rate": 1.9326854086491425e-06, "loss": 0.6313, "step": 20425 }, { "epoch": 0.6329780641963069, "grad_norm": 2.9310472714287004, "learning_rate": 1.931870067184137e-06, "loss": 0.6838, "step": 20430 }, { "epoch": 0.6331329780641963, "grad_norm": 2.6097408597574447, "learning_rate": 1.9310547257191314e-06, "loss": 0.5993, "step": 20435 }, { "epoch": 0.6332878919320858, "grad_norm": 2.4377969407803777, "learning_rate": 1.930239384254126e-06, "loss": 0.5874, "step": 20440 }, { "epoch": 0.6334428057999753, "grad_norm": 2.3451154435572943, "learning_rate": 1.9294240427891203e-06, "loss": 0.6211, "step": 20445 }, { "epoch": 0.6335977196678647, "grad_norm": 3.4487327422395158, "learning_rate": 1.9286087013241148e-06, "loss": 0.7282, "step": 20450 }, { "epoch": 0.6337526335357542, "grad_norm": 2.425942490897352, "learning_rate": 1.9277933598591092e-06, "loss": 0.6723, "step": 20455 }, { "epoch": 0.6339075474036435, "grad_norm": 3.786399264777132, "learning_rate": 1.9269780183941037e-06, "loss": 0.5937, "step": 20460 }, { "epoch": 0.634062461271533, "grad_norm": 2.7848332838364747, "learning_rate": 1.926162676929098e-06, "loss": 0.6801, "step": 20465 }, { "epoch": 0.6342173751394224, "grad_norm": 2.6114970538771693, "learning_rate": 1.9253473354640926e-06, "loss": 0.6813, "step": 20470 }, { "epoch": 0.6343722890073119, "grad_norm": 2.9616502293193028, "learning_rate": 1.924531993999087e-06, "loss": 0.6712, "step": 20475 }, { "epoch": 0.6345272028752014, "grad_norm": 2.765170847162054, "learning_rate": 1.9237166525340815e-06, "loss": 0.7013, "step": 20480 }, { "epoch": 0.6346821167430908, "grad_norm": 2.826185732979867, "learning_rate": 1.922901311069076e-06, "loss": 0.6622, "step": 20485 }, { "epoch": 0.6348370306109803, "grad_norm": 2.436295613885058, "learning_rate": 1.9220859696040705e-06, "loss": 0.6978, "step": 20490 }, { "epoch": 0.6349919444788698, "grad_norm": 2.817311846521876, "learning_rate": 1.921270628139065e-06, "loss": 0.7085, "step": 20495 }, { "epoch": 0.6351468583467592, "grad_norm": 2.3451356246461064, "learning_rate": 1.9204552866740594e-06, "loss": 0.6501, "step": 20500 }, { "epoch": 0.6353017722146487, "grad_norm": 2.3172173581632722, "learning_rate": 1.9196399452090534e-06, "loss": 0.6806, "step": 20505 }, { "epoch": 0.6354566860825381, "grad_norm": 2.845461388056342, "learning_rate": 1.918824603744048e-06, "loss": 0.6599, "step": 20510 }, { "epoch": 0.6356115999504276, "grad_norm": 2.2636469514769852, "learning_rate": 1.9180092622790423e-06, "loss": 0.648, "step": 20515 }, { "epoch": 0.6357665138183171, "grad_norm": 3.0393231782177197, "learning_rate": 1.917193920814037e-06, "loss": 0.6838, "step": 20520 }, { "epoch": 0.6359214276862065, "grad_norm": 2.825473075066841, "learning_rate": 1.9163785793490317e-06, "loss": 0.665, "step": 20525 }, { "epoch": 0.6360763415540959, "grad_norm": 2.4803687291319387, "learning_rate": 1.915563237884026e-06, "loss": 0.6426, "step": 20530 }, { "epoch": 0.6362312554219853, "grad_norm": 2.909342919789866, "learning_rate": 1.9147478964190206e-06, "loss": 0.6492, "step": 20535 }, { "epoch": 0.6363861692898748, "grad_norm": 3.0789769812734233, "learning_rate": 1.913932554954015e-06, "loss": 0.6774, "step": 20540 }, { "epoch": 0.6365410831577643, "grad_norm": 3.0568461121440973, "learning_rate": 1.9131172134890095e-06, "loss": 0.6967, "step": 20545 }, { "epoch": 0.6366959970256537, "grad_norm": 2.6901059878838707, "learning_rate": 1.912301872024004e-06, "loss": 0.6528, "step": 20550 }, { "epoch": 0.6368509108935432, "grad_norm": 2.551624953023842, "learning_rate": 1.9114865305589984e-06, "loss": 0.669, "step": 20555 }, { "epoch": 0.6370058247614326, "grad_norm": 2.8198537720606556, "learning_rate": 1.910671189093993e-06, "loss": 0.7167, "step": 20560 }, { "epoch": 0.6371607386293221, "grad_norm": 2.098542429115461, "learning_rate": 1.9098558476289873e-06, "loss": 0.6668, "step": 20565 }, { "epoch": 0.6373156524972116, "grad_norm": 2.929584259964265, "learning_rate": 1.909040506163982e-06, "loss": 0.6528, "step": 20570 }, { "epoch": 0.637470566365101, "grad_norm": 4.367735131879772, "learning_rate": 1.9082251646989763e-06, "loss": 0.674, "step": 20575 }, { "epoch": 0.6376254802329905, "grad_norm": 2.515646631234775, "learning_rate": 1.9074098232339703e-06, "loss": 0.6861, "step": 20580 }, { "epoch": 0.6377803941008799, "grad_norm": 2.5135791542094763, "learning_rate": 1.906594481768965e-06, "loss": 0.7197, "step": 20585 }, { "epoch": 0.6379353079687694, "grad_norm": 2.68767532803342, "learning_rate": 1.9057791403039594e-06, "loss": 0.6673, "step": 20590 }, { "epoch": 0.6380902218366589, "grad_norm": 3.70873850159697, "learning_rate": 1.9049637988389539e-06, "loss": 0.5621, "step": 20595 }, { "epoch": 0.6382451357045482, "grad_norm": 3.021213067089858, "learning_rate": 1.9041484573739483e-06, "loss": 0.6588, "step": 20600 }, { "epoch": 0.6384000495724377, "grad_norm": 2.612357421695438, "learning_rate": 1.9033331159089428e-06, "loss": 0.6992, "step": 20605 }, { "epoch": 0.6385549634403271, "grad_norm": 2.414217378485306, "learning_rate": 1.902517774443937e-06, "loss": 0.6734, "step": 20610 }, { "epoch": 0.6387098773082166, "grad_norm": 3.296961651956864, "learning_rate": 1.901702432978932e-06, "loss": 0.708, "step": 20615 }, { "epoch": 0.6388647911761061, "grad_norm": 2.763002968795973, "learning_rate": 1.9008870915139264e-06, "loss": 0.709, "step": 20620 }, { "epoch": 0.6390197050439955, "grad_norm": 2.4073867330516694, "learning_rate": 1.9000717500489209e-06, "loss": 0.7332, "step": 20625 }, { "epoch": 0.639174618911885, "grad_norm": 2.5697663267678257, "learning_rate": 1.899256408583915e-06, "loss": 0.73, "step": 20630 }, { "epoch": 0.6393295327797744, "grad_norm": 2.883187293032513, "learning_rate": 1.8984410671189096e-06, "loss": 0.6419, "step": 20635 }, { "epoch": 0.6394844466476639, "grad_norm": 3.203765239043105, "learning_rate": 1.897625725653904e-06, "loss": 0.6259, "step": 20640 }, { "epoch": 0.6396393605155534, "grad_norm": 2.6610313779761734, "learning_rate": 1.8968103841888985e-06, "loss": 0.6073, "step": 20645 }, { "epoch": 0.6397942743834428, "grad_norm": 2.482416673415541, "learning_rate": 1.895995042723893e-06, "loss": 0.6521, "step": 20650 }, { "epoch": 0.6399491882513323, "grad_norm": 3.497557894839911, "learning_rate": 1.8951797012588874e-06, "loss": 0.6698, "step": 20655 }, { "epoch": 0.6401041021192218, "grad_norm": 2.2155111569946415, "learning_rate": 1.8943643597938819e-06, "loss": 0.6739, "step": 20660 }, { "epoch": 0.6402590159871112, "grad_norm": 2.6919310356323862, "learning_rate": 1.8935490183288763e-06, "loss": 0.695, "step": 20665 }, { "epoch": 0.6404139298550007, "grad_norm": 2.1526933675533124, "learning_rate": 1.8927336768638708e-06, "loss": 0.686, "step": 20670 }, { "epoch": 0.64056884372289, "grad_norm": 2.3723728650313087, "learning_rate": 1.8919183353988652e-06, "loss": 0.6906, "step": 20675 }, { "epoch": 0.6407237575907795, "grad_norm": 2.411307630574409, "learning_rate": 1.8911029939338597e-06, "loss": 0.6745, "step": 20680 }, { "epoch": 0.640878671458669, "grad_norm": 2.507234160932325, "learning_rate": 1.890287652468854e-06, "loss": 0.6716, "step": 20685 }, { "epoch": 0.6410335853265584, "grad_norm": 2.6745392966316226, "learning_rate": 1.8894723110038484e-06, "loss": 0.618, "step": 20690 }, { "epoch": 0.6411884991944479, "grad_norm": 2.343142464221811, "learning_rate": 1.8886569695388429e-06, "loss": 0.6559, "step": 20695 }, { "epoch": 0.6413434130623373, "grad_norm": 2.6126277042091495, "learning_rate": 1.8878416280738373e-06, "loss": 0.6008, "step": 20700 }, { "epoch": 0.6414983269302268, "grad_norm": 2.5784854410023397, "learning_rate": 1.8870262866088318e-06, "loss": 0.6522, "step": 20705 }, { "epoch": 0.6416532407981163, "grad_norm": 2.4852105478846243, "learning_rate": 1.8862109451438264e-06, "loss": 0.6525, "step": 20710 }, { "epoch": 0.6418081546660057, "grad_norm": 3.212966512215674, "learning_rate": 1.885395603678821e-06, "loss": 0.6525, "step": 20715 }, { "epoch": 0.6419630685338952, "grad_norm": 2.8214224363210496, "learning_rate": 1.8845802622138154e-06, "loss": 0.7028, "step": 20720 }, { "epoch": 0.6421179824017846, "grad_norm": 2.3685741672244625, "learning_rate": 1.8837649207488098e-06, "loss": 0.6422, "step": 20725 }, { "epoch": 0.6422728962696741, "grad_norm": 2.727835691047735, "learning_rate": 1.8829495792838043e-06, "loss": 0.7915, "step": 20730 }, { "epoch": 0.6424278101375636, "grad_norm": 2.494554330140216, "learning_rate": 1.8821342378187987e-06, "loss": 0.6685, "step": 20735 }, { "epoch": 0.642582724005453, "grad_norm": 3.089718274500581, "learning_rate": 1.8813188963537932e-06, "loss": 0.7244, "step": 20740 }, { "epoch": 0.6427376378733424, "grad_norm": 2.365315701394471, "learning_rate": 1.8805035548887877e-06, "loss": 0.5932, "step": 20745 }, { "epoch": 0.6428925517412318, "grad_norm": 2.1949483329269506, "learning_rate": 1.8796882134237821e-06, "loss": 0.6054, "step": 20750 }, { "epoch": 0.6430474656091213, "grad_norm": 3.6740676269544865, "learning_rate": 1.8788728719587766e-06, "loss": 0.6905, "step": 20755 }, { "epoch": 0.6432023794770108, "grad_norm": 2.981174886672874, "learning_rate": 1.8780575304937708e-06, "loss": 0.6921, "step": 20760 }, { "epoch": 0.6433572933449002, "grad_norm": 2.7854172348156907, "learning_rate": 1.8772421890287653e-06, "loss": 0.7177, "step": 20765 }, { "epoch": 0.6435122072127897, "grad_norm": 2.3263283000433526, "learning_rate": 1.8764268475637597e-06, "loss": 0.696, "step": 20770 }, { "epoch": 0.6436671210806791, "grad_norm": 2.366785830115644, "learning_rate": 1.8756115060987542e-06, "loss": 0.6677, "step": 20775 }, { "epoch": 0.6438220349485686, "grad_norm": 3.4907850569095538, "learning_rate": 1.8747961646337487e-06, "loss": 0.686, "step": 20780 }, { "epoch": 0.6439769488164581, "grad_norm": 2.264562377833289, "learning_rate": 1.8739808231687431e-06, "loss": 0.6995, "step": 20785 }, { "epoch": 0.6441318626843475, "grad_norm": 2.374403261381638, "learning_rate": 1.8731654817037376e-06, "loss": 0.6367, "step": 20790 }, { "epoch": 0.644286776552237, "grad_norm": 2.6140452141137183, "learning_rate": 1.872350140238732e-06, "loss": 0.6916, "step": 20795 }, { "epoch": 0.6444416904201264, "grad_norm": 3.7595531739669505, "learning_rate": 1.8715347987737265e-06, "loss": 0.6409, "step": 20800 }, { "epoch": 0.6445966042880159, "grad_norm": 2.9870946818111683, "learning_rate": 1.8707194573087212e-06, "loss": 0.6566, "step": 20805 }, { "epoch": 0.6447515181559054, "grad_norm": 2.2820393922443105, "learning_rate": 1.8699041158437156e-06, "loss": 0.6687, "step": 20810 }, { "epoch": 0.6449064320237947, "grad_norm": 2.8356048732884958, "learning_rate": 1.86908877437871e-06, "loss": 0.7595, "step": 20815 }, { "epoch": 0.6450613458916842, "grad_norm": 2.141044449505828, "learning_rate": 1.8682734329137045e-06, "loss": 0.6373, "step": 20820 }, { "epoch": 0.6452162597595736, "grad_norm": 2.7465382638119453, "learning_rate": 1.867458091448699e-06, "loss": 0.6772, "step": 20825 }, { "epoch": 0.6453711736274631, "grad_norm": 3.4380874723133705, "learning_rate": 1.8666427499836935e-06, "loss": 0.7525, "step": 20830 }, { "epoch": 0.6455260874953526, "grad_norm": 2.189459741967796, "learning_rate": 1.865827408518688e-06, "loss": 0.6927, "step": 20835 }, { "epoch": 0.645681001363242, "grad_norm": 2.5930167176008903, "learning_rate": 1.8650120670536822e-06, "loss": 0.7292, "step": 20840 }, { "epoch": 0.6458359152311315, "grad_norm": 2.4098425712875664, "learning_rate": 1.8641967255886766e-06, "loss": 0.6583, "step": 20845 }, { "epoch": 0.645990829099021, "grad_norm": 2.306945297863509, "learning_rate": 1.863381384123671e-06, "loss": 0.696, "step": 20850 }, { "epoch": 0.6461457429669104, "grad_norm": 2.477438097875345, "learning_rate": 1.8625660426586655e-06, "loss": 0.6377, "step": 20855 }, { "epoch": 0.6463006568347999, "grad_norm": 3.3770270209730118, "learning_rate": 1.86175070119366e-06, "loss": 0.7044, "step": 20860 }, { "epoch": 0.6464555707026893, "grad_norm": 3.173736035335028, "learning_rate": 1.8609353597286545e-06, "loss": 0.684, "step": 20865 }, { "epoch": 0.6466104845705788, "grad_norm": 2.865594497532092, "learning_rate": 1.860120018263649e-06, "loss": 0.6576, "step": 20870 }, { "epoch": 0.6467653984384683, "grad_norm": 2.1637811289706472, "learning_rate": 1.8593046767986434e-06, "loss": 0.6892, "step": 20875 }, { "epoch": 0.6469203123063577, "grad_norm": 2.791448289685198, "learning_rate": 1.8584893353336378e-06, "loss": 0.7942, "step": 20880 }, { "epoch": 0.6470752261742471, "grad_norm": 2.8957226136673353, "learning_rate": 1.8576739938686323e-06, "loss": 0.6979, "step": 20885 }, { "epoch": 0.6472301400421365, "grad_norm": 3.05951685824305, "learning_rate": 1.8568586524036268e-06, "loss": 0.6083, "step": 20890 }, { "epoch": 0.647385053910026, "grad_norm": 2.8611824679058033, "learning_rate": 1.856043310938621e-06, "loss": 0.6281, "step": 20895 }, { "epoch": 0.6475399677779154, "grad_norm": 3.715870685248373, "learning_rate": 1.8552279694736159e-06, "loss": 0.6774, "step": 20900 }, { "epoch": 0.6476948816458049, "grad_norm": 3.306096083229585, "learning_rate": 1.8544126280086104e-06, "loss": 0.7052, "step": 20905 }, { "epoch": 0.6478497955136944, "grad_norm": 2.4665106998876474, "learning_rate": 1.8535972865436048e-06, "loss": 0.6643, "step": 20910 }, { "epoch": 0.6480047093815838, "grad_norm": 2.926017725602062, "learning_rate": 1.852781945078599e-06, "loss": 0.6773, "step": 20915 }, { "epoch": 0.6481596232494733, "grad_norm": 2.595219025091953, "learning_rate": 1.8519666036135935e-06, "loss": 0.7178, "step": 20920 }, { "epoch": 0.6483145371173628, "grad_norm": 3.0418744298544675, "learning_rate": 1.851151262148588e-06, "loss": 0.6396, "step": 20925 }, { "epoch": 0.6484694509852522, "grad_norm": 3.2288511805461875, "learning_rate": 1.8503359206835824e-06, "loss": 0.6783, "step": 20930 }, { "epoch": 0.6486243648531417, "grad_norm": 2.3776728234389264, "learning_rate": 1.849520579218577e-06, "loss": 0.6726, "step": 20935 }, { "epoch": 0.6487792787210311, "grad_norm": 2.5205533326302696, "learning_rate": 1.8487052377535714e-06, "loss": 0.6927, "step": 20940 }, { "epoch": 0.6489341925889206, "grad_norm": 3.5224575754354097, "learning_rate": 1.8478898962885658e-06, "loss": 0.6743, "step": 20945 }, { "epoch": 0.6490891064568101, "grad_norm": 2.458905179645258, "learning_rate": 1.8470745548235603e-06, "loss": 0.6385, "step": 20950 }, { "epoch": 0.6492440203246995, "grad_norm": 2.8710575219536896, "learning_rate": 1.8462592133585547e-06, "loss": 0.621, "step": 20955 }, { "epoch": 0.6493989341925889, "grad_norm": 4.5991803139932, "learning_rate": 1.8454438718935492e-06, "loss": 0.701, "step": 20960 }, { "epoch": 0.6495538480604783, "grad_norm": 2.883466514218647, "learning_rate": 1.8446285304285436e-06, "loss": 0.7094, "step": 20965 }, { "epoch": 0.6497087619283678, "grad_norm": 2.223777532703536, "learning_rate": 1.8438131889635381e-06, "loss": 0.7256, "step": 20970 }, { "epoch": 0.6498636757962573, "grad_norm": 2.850844111539934, "learning_rate": 1.8429978474985324e-06, "loss": 0.7259, "step": 20975 }, { "epoch": 0.6500185896641467, "grad_norm": 2.6214807961145636, "learning_rate": 1.8421825060335268e-06, "loss": 0.6945, "step": 20980 }, { "epoch": 0.6501735035320362, "grad_norm": 2.1099656025303526, "learning_rate": 1.8413671645685213e-06, "loss": 0.6828, "step": 20985 }, { "epoch": 0.6503284173999256, "grad_norm": 2.469445070295574, "learning_rate": 1.8405518231035157e-06, "loss": 0.603, "step": 20990 }, { "epoch": 0.6504833312678151, "grad_norm": 2.436549861576443, "learning_rate": 1.8397364816385104e-06, "loss": 0.6778, "step": 20995 }, { "epoch": 0.6506382451357046, "grad_norm": 3.883591561039509, "learning_rate": 1.8389211401735049e-06, "loss": 0.6005, "step": 21000 }, { "epoch": 0.650793159003594, "grad_norm": 2.68298906825914, "learning_rate": 1.8381057987084993e-06, "loss": 0.6151, "step": 21005 }, { "epoch": 0.6509480728714835, "grad_norm": 4.641021089465784, "learning_rate": 1.8372904572434938e-06, "loss": 0.6923, "step": 21010 }, { "epoch": 0.651102986739373, "grad_norm": 2.5623760548009837, "learning_rate": 1.8364751157784882e-06, "loss": 0.7277, "step": 21015 }, { "epoch": 0.6512579006072624, "grad_norm": 2.5305619494403575, "learning_rate": 1.8356597743134827e-06, "loss": 0.7189, "step": 21020 }, { "epoch": 0.6514128144751519, "grad_norm": 2.423377936818959, "learning_rate": 1.8348444328484772e-06, "loss": 0.6699, "step": 21025 }, { "epoch": 0.6515677283430412, "grad_norm": 2.650398780830643, "learning_rate": 1.8340290913834716e-06, "loss": 0.6805, "step": 21030 }, { "epoch": 0.6517226422109307, "grad_norm": 2.85636227749616, "learning_rate": 1.833213749918466e-06, "loss": 0.6554, "step": 21035 }, { "epoch": 0.6518775560788201, "grad_norm": 3.584188438132724, "learning_rate": 1.8323984084534605e-06, "loss": 0.7072, "step": 21040 }, { "epoch": 0.6520324699467096, "grad_norm": 2.818605065029499, "learning_rate": 1.831583066988455e-06, "loss": 0.7332, "step": 21045 }, { "epoch": 0.6521873838145991, "grad_norm": 2.479327535378753, "learning_rate": 1.8307677255234492e-06, "loss": 0.6112, "step": 21050 }, { "epoch": 0.6523422976824885, "grad_norm": 2.38714140987546, "learning_rate": 1.8299523840584437e-06, "loss": 0.6642, "step": 21055 }, { "epoch": 0.652497211550378, "grad_norm": 2.527326675236166, "learning_rate": 1.8291370425934382e-06, "loss": 0.6072, "step": 21060 }, { "epoch": 0.6526521254182674, "grad_norm": 2.654222602090546, "learning_rate": 1.8283217011284326e-06, "loss": 0.6642, "step": 21065 }, { "epoch": 0.6528070392861569, "grad_norm": 3.09621876631744, "learning_rate": 1.827506359663427e-06, "loss": 0.727, "step": 21070 }, { "epoch": 0.6529619531540464, "grad_norm": 2.3555697409851897, "learning_rate": 1.8266910181984215e-06, "loss": 0.6268, "step": 21075 }, { "epoch": 0.6531168670219358, "grad_norm": 2.5990071412622595, "learning_rate": 1.825875676733416e-06, "loss": 0.5605, "step": 21080 }, { "epoch": 0.6532717808898253, "grad_norm": 3.7758122361666926, "learning_rate": 1.8250603352684105e-06, "loss": 0.5418, "step": 21085 }, { "epoch": 0.6534266947577148, "grad_norm": 3.4209120593374327, "learning_rate": 1.8242449938034051e-06, "loss": 0.7385, "step": 21090 }, { "epoch": 0.6535816086256042, "grad_norm": 2.731545341481185, "learning_rate": 1.8234296523383996e-06, "loss": 0.63, "step": 21095 }, { "epoch": 0.6537365224934936, "grad_norm": 2.3324817926405803, "learning_rate": 1.822614310873394e-06, "loss": 0.6511, "step": 21100 }, { "epoch": 0.653891436361383, "grad_norm": 2.6510156916452363, "learning_rate": 1.8217989694083885e-06, "loss": 0.7092, "step": 21105 }, { "epoch": 0.6540463502292725, "grad_norm": 2.494713991665441, "learning_rate": 1.820983627943383e-06, "loss": 0.6754, "step": 21110 }, { "epoch": 0.654201264097162, "grad_norm": 3.125544788629029, "learning_rate": 1.8201682864783774e-06, "loss": 0.668, "step": 21115 }, { "epoch": 0.6543561779650514, "grad_norm": 2.8098465726450153, "learning_rate": 1.8193529450133719e-06, "loss": 0.642, "step": 21120 }, { "epoch": 0.6545110918329409, "grad_norm": 2.929370191181412, "learning_rate": 1.8185376035483661e-06, "loss": 0.6923, "step": 21125 }, { "epoch": 0.6546660057008303, "grad_norm": 2.4245781576895107, "learning_rate": 1.8177222620833606e-06, "loss": 0.6215, "step": 21130 }, { "epoch": 0.6548209195687198, "grad_norm": 2.6601073256092436, "learning_rate": 1.816906920618355e-06, "loss": 0.7313, "step": 21135 }, { "epoch": 0.6549758334366093, "grad_norm": 2.851589663653134, "learning_rate": 1.8160915791533495e-06, "loss": 0.6235, "step": 21140 }, { "epoch": 0.6551307473044987, "grad_norm": 3.6368072644503173, "learning_rate": 1.815276237688344e-06, "loss": 0.6671, "step": 21145 }, { "epoch": 0.6552856611723882, "grad_norm": 2.8275360828713496, "learning_rate": 1.8144608962233384e-06, "loss": 0.6983, "step": 21150 }, { "epoch": 0.6554405750402776, "grad_norm": 2.3967389350022907, "learning_rate": 1.8136455547583329e-06, "loss": 0.6928, "step": 21155 }, { "epoch": 0.6555954889081671, "grad_norm": 2.628835664770768, "learning_rate": 1.8128302132933273e-06, "loss": 0.6901, "step": 21160 }, { "epoch": 0.6557504027760566, "grad_norm": 2.906321342986852, "learning_rate": 1.8120148718283218e-06, "loss": 0.6627, "step": 21165 }, { "epoch": 0.6559053166439459, "grad_norm": 2.679680270744138, "learning_rate": 1.8111995303633163e-06, "loss": 0.675, "step": 21170 }, { "epoch": 0.6560602305118354, "grad_norm": 3.462981701437735, "learning_rate": 1.8103841888983107e-06, "loss": 0.6675, "step": 21175 }, { "epoch": 0.6562151443797248, "grad_norm": 2.520787209049471, "learning_rate": 1.8095688474333054e-06, "loss": 0.6792, "step": 21180 }, { "epoch": 0.6563700582476143, "grad_norm": 2.539919443039954, "learning_rate": 1.8087535059682999e-06, "loss": 0.6617, "step": 21185 }, { "epoch": 0.6565249721155038, "grad_norm": 2.679455253017271, "learning_rate": 1.8079381645032943e-06, "loss": 0.6874, "step": 21190 }, { "epoch": 0.6566798859833932, "grad_norm": 2.542329439339367, "learning_rate": 1.8071228230382888e-06, "loss": 0.6494, "step": 21195 }, { "epoch": 0.6568347998512827, "grad_norm": 2.48923971492643, "learning_rate": 1.806307481573283e-06, "loss": 0.7559, "step": 21200 }, { "epoch": 0.6569897137191721, "grad_norm": 3.1163951952421245, "learning_rate": 1.8054921401082775e-06, "loss": 0.6894, "step": 21205 }, { "epoch": 0.6571446275870616, "grad_norm": 2.3580940227506724, "learning_rate": 1.804676798643272e-06, "loss": 0.6734, "step": 21210 }, { "epoch": 0.6572995414549511, "grad_norm": 3.093066734984161, "learning_rate": 1.8038614571782664e-06, "loss": 0.7145, "step": 21215 }, { "epoch": 0.6574544553228405, "grad_norm": 2.509923682791586, "learning_rate": 1.8030461157132609e-06, "loss": 0.6104, "step": 21220 }, { "epoch": 0.65760936919073, "grad_norm": 2.6062177518179417, "learning_rate": 1.8022307742482553e-06, "loss": 0.6396, "step": 21225 }, { "epoch": 0.6577642830586194, "grad_norm": 3.1448570366526987, "learning_rate": 1.8014154327832498e-06, "loss": 0.7061, "step": 21230 }, { "epoch": 0.6579191969265089, "grad_norm": 2.0357061071625986, "learning_rate": 1.8006000913182442e-06, "loss": 0.6487, "step": 21235 }, { "epoch": 0.6580741107943983, "grad_norm": 3.816996687682627, "learning_rate": 1.7997847498532387e-06, "loss": 0.6899, "step": 21240 }, { "epoch": 0.6582290246622877, "grad_norm": 2.6242956885314634, "learning_rate": 1.7989694083882331e-06, "loss": 0.6966, "step": 21245 }, { "epoch": 0.6583839385301772, "grad_norm": 2.788742007116289, "learning_rate": 1.7981540669232276e-06, "loss": 0.7116, "step": 21250 }, { "epoch": 0.6585388523980666, "grad_norm": 3.205989992239285, "learning_rate": 1.797338725458222e-06, "loss": 0.6766, "step": 21255 }, { "epoch": 0.6586937662659561, "grad_norm": 3.673680165839357, "learning_rate": 1.7965233839932163e-06, "loss": 0.6746, "step": 21260 }, { "epoch": 0.6588486801338456, "grad_norm": 2.1763667472465795, "learning_rate": 1.7957080425282108e-06, "loss": 0.705, "step": 21265 }, { "epoch": 0.659003594001735, "grad_norm": 2.8050905208846495, "learning_rate": 1.7948927010632052e-06, "loss": 0.6269, "step": 21270 }, { "epoch": 0.6591585078696245, "grad_norm": 2.543459910542077, "learning_rate": 1.7940773595982e-06, "loss": 0.6212, "step": 21275 }, { "epoch": 0.659313421737514, "grad_norm": 3.260293201642153, "learning_rate": 1.7932620181331944e-06, "loss": 0.6521, "step": 21280 }, { "epoch": 0.6594683356054034, "grad_norm": 1.9380734967197237, "learning_rate": 1.7924466766681888e-06, "loss": 0.6603, "step": 21285 }, { "epoch": 0.6596232494732929, "grad_norm": 2.713992483874449, "learning_rate": 1.7916313352031833e-06, "loss": 0.6743, "step": 21290 }, { "epoch": 0.6597781633411823, "grad_norm": 2.683075631761161, "learning_rate": 1.7908159937381777e-06, "loss": 0.6306, "step": 21295 }, { "epoch": 0.6599330772090718, "grad_norm": 2.624091712963178, "learning_rate": 1.7900006522731722e-06, "loss": 0.634, "step": 21300 }, { "epoch": 0.6600879910769613, "grad_norm": 2.5793800277243633, "learning_rate": 1.7891853108081667e-06, "loss": 0.6604, "step": 21305 }, { "epoch": 0.6602429049448507, "grad_norm": 2.7960254815034467, "learning_rate": 1.7883699693431611e-06, "loss": 0.658, "step": 21310 }, { "epoch": 0.6603978188127401, "grad_norm": 2.6653502892575065, "learning_rate": 1.7875546278781556e-06, "loss": 0.6554, "step": 21315 }, { "epoch": 0.6605527326806295, "grad_norm": 2.4459840463710694, "learning_rate": 1.78673928641315e-06, "loss": 0.6977, "step": 21320 }, { "epoch": 0.660707646548519, "grad_norm": 3.021071792059433, "learning_rate": 1.7859239449481445e-06, "loss": 0.6984, "step": 21325 }, { "epoch": 0.6608625604164085, "grad_norm": 2.516267771505506, "learning_rate": 1.785108603483139e-06, "loss": 0.7058, "step": 21330 }, { "epoch": 0.6610174742842979, "grad_norm": 2.531782792598737, "learning_rate": 1.7842932620181332e-06, "loss": 0.5813, "step": 21335 }, { "epoch": 0.6611723881521874, "grad_norm": 2.380262441740487, "learning_rate": 1.7834779205531277e-06, "loss": 0.6642, "step": 21340 }, { "epoch": 0.6613273020200768, "grad_norm": 2.6303115784466717, "learning_rate": 1.7826625790881221e-06, "loss": 0.6387, "step": 21345 }, { "epoch": 0.6614822158879663, "grad_norm": 3.894921047520297, "learning_rate": 1.7818472376231166e-06, "loss": 0.6049, "step": 21350 }, { "epoch": 0.6616371297558558, "grad_norm": 2.851802321843781, "learning_rate": 1.781031896158111e-06, "loss": 0.7154, "step": 21355 }, { "epoch": 0.6617920436237452, "grad_norm": 2.7805530131522747, "learning_rate": 1.7802165546931055e-06, "loss": 0.6796, "step": 21360 }, { "epoch": 0.6619469574916347, "grad_norm": 2.351476443617808, "learning_rate": 1.7794012132281e-06, "loss": 0.7461, "step": 21365 }, { "epoch": 0.6621018713595241, "grad_norm": 3.1034087975529423, "learning_rate": 1.7785858717630946e-06, "loss": 0.6993, "step": 21370 }, { "epoch": 0.6622567852274136, "grad_norm": 2.337989104049611, "learning_rate": 1.777770530298089e-06, "loss": 0.6163, "step": 21375 }, { "epoch": 0.6624116990953031, "grad_norm": 3.14304945676613, "learning_rate": 1.7769551888330835e-06, "loss": 0.678, "step": 21380 }, { "epoch": 0.6625666129631924, "grad_norm": 3.0302148645997606, "learning_rate": 1.776139847368078e-06, "loss": 0.7396, "step": 21385 }, { "epoch": 0.6627215268310819, "grad_norm": 3.966112316183657, "learning_rate": 1.7753245059030725e-06, "loss": 0.7367, "step": 21390 }, { "epoch": 0.6628764406989713, "grad_norm": 2.5631994807534553, "learning_rate": 1.774509164438067e-06, "loss": 0.6497, "step": 21395 }, { "epoch": 0.6630313545668608, "grad_norm": 2.579547680553353, "learning_rate": 1.7736938229730614e-06, "loss": 0.7161, "step": 21400 }, { "epoch": 0.6631862684347503, "grad_norm": 2.5260293012320028, "learning_rate": 1.7728784815080558e-06, "loss": 0.7109, "step": 21405 }, { "epoch": 0.6633411823026397, "grad_norm": 2.288339859051784, "learning_rate": 1.77206314004305e-06, "loss": 0.6214, "step": 21410 }, { "epoch": 0.6634960961705292, "grad_norm": 2.860529061282644, "learning_rate": 1.7712477985780445e-06, "loss": 0.696, "step": 21415 }, { "epoch": 0.6636510100384186, "grad_norm": 2.531742961443251, "learning_rate": 1.770432457113039e-06, "loss": 0.7334, "step": 21420 }, { "epoch": 0.6638059239063081, "grad_norm": 4.0357736049889175, "learning_rate": 1.7696171156480335e-06, "loss": 0.68, "step": 21425 }, { "epoch": 0.6639608377741976, "grad_norm": 2.817324033911764, "learning_rate": 1.768801774183028e-06, "loss": 0.6043, "step": 21430 }, { "epoch": 0.664115751642087, "grad_norm": 3.7780024020355008, "learning_rate": 1.7679864327180224e-06, "loss": 0.659, "step": 21435 }, { "epoch": 0.6642706655099765, "grad_norm": 2.387914862435268, "learning_rate": 1.7671710912530168e-06, "loss": 0.635, "step": 21440 }, { "epoch": 0.664425579377866, "grad_norm": 2.4745328071586763, "learning_rate": 1.7663557497880113e-06, "loss": 0.6179, "step": 21445 }, { "epoch": 0.6645804932457554, "grad_norm": 2.6835459407711033, "learning_rate": 1.7655404083230058e-06, "loss": 0.6492, "step": 21450 }, { "epoch": 0.6647354071136448, "grad_norm": 2.3376071160087175, "learning_rate": 1.7647250668580002e-06, "loss": 0.6855, "step": 21455 }, { "epoch": 0.6648903209815342, "grad_norm": 2.7716332525430367, "learning_rate": 1.7639097253929947e-06, "loss": 0.7006, "step": 21460 }, { "epoch": 0.6650452348494237, "grad_norm": 2.4724778782351855, "learning_rate": 1.7630943839279893e-06, "loss": 0.7285, "step": 21465 }, { "epoch": 0.6652001487173131, "grad_norm": 2.661993684657793, "learning_rate": 1.7622790424629838e-06, "loss": 0.6866, "step": 21470 }, { "epoch": 0.6653550625852026, "grad_norm": 3.1610800465570033, "learning_rate": 1.7614637009979783e-06, "loss": 0.6352, "step": 21475 }, { "epoch": 0.6655099764530921, "grad_norm": 2.4154103925359545, "learning_rate": 1.7606483595329727e-06, "loss": 0.6729, "step": 21480 }, { "epoch": 0.6656648903209815, "grad_norm": 2.40651792911429, "learning_rate": 1.759833018067967e-06, "loss": 0.6262, "step": 21485 }, { "epoch": 0.665819804188871, "grad_norm": 2.7269784456366715, "learning_rate": 1.7590176766029614e-06, "loss": 0.6711, "step": 21490 }, { "epoch": 0.6659747180567605, "grad_norm": 2.3726486088404717, "learning_rate": 1.7582023351379559e-06, "loss": 0.6929, "step": 21495 }, { "epoch": 0.6661296319246499, "grad_norm": 21.38876538768976, "learning_rate": 1.7573869936729504e-06, "loss": 0.6414, "step": 21500 }, { "epoch": 0.6662845457925394, "grad_norm": 2.3951144862285725, "learning_rate": 1.7565716522079448e-06, "loss": 0.6604, "step": 21505 }, { "epoch": 0.6664394596604288, "grad_norm": 2.8708206637682805, "learning_rate": 1.7557563107429393e-06, "loss": 0.7737, "step": 21510 }, { "epoch": 0.6665943735283183, "grad_norm": 3.1476516149229425, "learning_rate": 1.7549409692779337e-06, "loss": 0.6853, "step": 21515 }, { "epoch": 0.6667492873962078, "grad_norm": 3.050999757386862, "learning_rate": 1.7541256278129282e-06, "loss": 0.6447, "step": 21520 }, { "epoch": 0.6669042012640971, "grad_norm": 3.034369801616806, "learning_rate": 1.7533102863479226e-06, "loss": 0.6209, "step": 21525 }, { "epoch": 0.6670591151319866, "grad_norm": 2.716076836244515, "learning_rate": 1.7524949448829171e-06, "loss": 0.6687, "step": 21530 }, { "epoch": 0.667214028999876, "grad_norm": 2.414859943146664, "learning_rate": 1.7516796034179116e-06, "loss": 0.6927, "step": 21535 }, { "epoch": 0.6673689428677655, "grad_norm": 2.9323271961694877, "learning_rate": 1.750864261952906e-06, "loss": 0.7191, "step": 21540 }, { "epoch": 0.667523856735655, "grad_norm": 2.408354390704182, "learning_rate": 1.7500489204879003e-06, "loss": 0.6637, "step": 21545 }, { "epoch": 0.6676787706035444, "grad_norm": 3.775991749104408, "learning_rate": 1.7492335790228947e-06, "loss": 0.6536, "step": 21550 }, { "epoch": 0.6678336844714339, "grad_norm": 2.2741955182921716, "learning_rate": 1.7484182375578892e-06, "loss": 0.658, "step": 21555 }, { "epoch": 0.6679885983393233, "grad_norm": 2.837488747252198, "learning_rate": 1.747602896092884e-06, "loss": 0.6895, "step": 21560 }, { "epoch": 0.6681435122072128, "grad_norm": 2.69159156556629, "learning_rate": 1.7467875546278783e-06, "loss": 0.6981, "step": 21565 }, { "epoch": 0.6682984260751023, "grad_norm": 2.2323556248841134, "learning_rate": 1.7459722131628728e-06, "loss": 0.6238, "step": 21570 }, { "epoch": 0.6684533399429917, "grad_norm": 3.5277082143399445, "learning_rate": 1.7451568716978672e-06, "loss": 0.6881, "step": 21575 }, { "epoch": 0.6686082538108812, "grad_norm": 3.006157160044551, "learning_rate": 1.7443415302328617e-06, "loss": 0.6868, "step": 21580 }, { "epoch": 0.6687631676787706, "grad_norm": 2.892816701702562, "learning_rate": 1.7435261887678562e-06, "loss": 0.7722, "step": 21585 }, { "epoch": 0.6689180815466601, "grad_norm": 2.669886238635448, "learning_rate": 1.7427108473028506e-06, "loss": 0.6715, "step": 21590 }, { "epoch": 0.6690729954145496, "grad_norm": 4.027753785985114, "learning_rate": 1.741895505837845e-06, "loss": 0.6111, "step": 21595 }, { "epoch": 0.6692279092824389, "grad_norm": 3.0527401569778867, "learning_rate": 1.7410801643728395e-06, "loss": 0.6446, "step": 21600 }, { "epoch": 0.6693828231503284, "grad_norm": 2.702361334274407, "learning_rate": 1.740264822907834e-06, "loss": 0.7016, "step": 21605 }, { "epoch": 0.6695377370182178, "grad_norm": 2.803549671533653, "learning_rate": 1.7394494814428285e-06, "loss": 0.6724, "step": 21610 }, { "epoch": 0.6696926508861073, "grad_norm": 2.519011826319048, "learning_rate": 1.738634139977823e-06, "loss": 0.6821, "step": 21615 }, { "epoch": 0.6698475647539968, "grad_norm": 2.568228826124198, "learning_rate": 1.7378187985128172e-06, "loss": 0.7561, "step": 21620 }, { "epoch": 0.6700024786218862, "grad_norm": 2.9215654205457824, "learning_rate": 1.7370034570478116e-06, "loss": 0.6567, "step": 21625 }, { "epoch": 0.6701573924897757, "grad_norm": 2.8580114395046747, "learning_rate": 1.736188115582806e-06, "loss": 0.6476, "step": 21630 }, { "epoch": 0.6703123063576651, "grad_norm": 2.554511067254803, "learning_rate": 1.7353727741178005e-06, "loss": 0.6273, "step": 21635 }, { "epoch": 0.6704672202255546, "grad_norm": 3.1424229030235367, "learning_rate": 1.734557432652795e-06, "loss": 0.7109, "step": 21640 }, { "epoch": 0.6706221340934441, "grad_norm": 2.826550419303558, "learning_rate": 1.7337420911877895e-06, "loss": 0.6847, "step": 21645 }, { "epoch": 0.6707770479613335, "grad_norm": 2.200437786784366, "learning_rate": 1.732926749722784e-06, "loss": 0.7192, "step": 21650 }, { "epoch": 0.670931961829223, "grad_norm": 2.5734454104802977, "learning_rate": 1.7321114082577786e-06, "loss": 0.6305, "step": 21655 }, { "epoch": 0.6710868756971125, "grad_norm": 2.408771273922213, "learning_rate": 1.731296066792773e-06, "loss": 0.6463, "step": 21660 }, { "epoch": 0.6712417895650019, "grad_norm": 2.6751622328215916, "learning_rate": 1.7304807253277675e-06, "loss": 0.6436, "step": 21665 }, { "epoch": 0.6713967034328913, "grad_norm": 3.1607801391885686, "learning_rate": 1.729665383862762e-06, "loss": 0.6677, "step": 21670 }, { "epoch": 0.6715516173007807, "grad_norm": 2.6350262943413862, "learning_rate": 1.7288500423977564e-06, "loss": 0.6277, "step": 21675 }, { "epoch": 0.6717065311686702, "grad_norm": 2.4850656636446407, "learning_rate": 1.7280347009327509e-06, "loss": 0.6977, "step": 21680 }, { "epoch": 0.6718614450365596, "grad_norm": 2.73529606717639, "learning_rate": 1.7272193594677453e-06, "loss": 0.7551, "step": 21685 }, { "epoch": 0.6720163589044491, "grad_norm": 2.4213665843894976, "learning_rate": 1.7264040180027398e-06, "loss": 0.6977, "step": 21690 }, { "epoch": 0.6721712727723386, "grad_norm": 2.4531626279254155, "learning_rate": 1.7255886765377343e-06, "loss": 0.7494, "step": 21695 }, { "epoch": 0.672326186640228, "grad_norm": 2.471009909895987, "learning_rate": 1.7247733350727285e-06, "loss": 0.6496, "step": 21700 }, { "epoch": 0.6724811005081175, "grad_norm": 2.668800228656849, "learning_rate": 1.723957993607723e-06, "loss": 0.6947, "step": 21705 }, { "epoch": 0.672636014376007, "grad_norm": 2.7034759692090775, "learning_rate": 1.7231426521427174e-06, "loss": 0.7554, "step": 21710 }, { "epoch": 0.6727909282438964, "grad_norm": 2.9405757162510295, "learning_rate": 1.7223273106777119e-06, "loss": 0.6542, "step": 21715 }, { "epoch": 0.6729458421117859, "grad_norm": 2.592787251008673, "learning_rate": 1.7215119692127063e-06, "loss": 0.6369, "step": 21720 }, { "epoch": 0.6731007559796753, "grad_norm": 2.4044152567490342, "learning_rate": 1.7206966277477008e-06, "loss": 0.6496, "step": 21725 }, { "epoch": 0.6732556698475648, "grad_norm": 2.3930723383927672, "learning_rate": 1.7198812862826953e-06, "loss": 0.7214, "step": 21730 }, { "epoch": 0.6734105837154543, "grad_norm": 3.2716454362236562, "learning_rate": 1.7190659448176897e-06, "loss": 0.6107, "step": 21735 }, { "epoch": 0.6735654975833436, "grad_norm": 2.551252200484895, "learning_rate": 1.7182506033526842e-06, "loss": 0.6554, "step": 21740 }, { "epoch": 0.6737204114512331, "grad_norm": 3.8403879633752007, "learning_rate": 1.7174352618876786e-06, "loss": 0.6348, "step": 21745 }, { "epoch": 0.6738753253191225, "grad_norm": 3.2597972359785534, "learning_rate": 1.7166199204226733e-06, "loss": 0.7235, "step": 21750 }, { "epoch": 0.674030239187012, "grad_norm": 3.1069377265607825, "learning_rate": 1.7158045789576678e-06, "loss": 0.7174, "step": 21755 }, { "epoch": 0.6741851530549015, "grad_norm": 2.6126600259603383, "learning_rate": 1.7149892374926622e-06, "loss": 0.6352, "step": 21760 }, { "epoch": 0.6743400669227909, "grad_norm": 2.701857693309224, "learning_rate": 1.7141738960276567e-06, "loss": 0.6441, "step": 21765 }, { "epoch": 0.6744949807906804, "grad_norm": 2.3010740377919956, "learning_rate": 1.7133585545626511e-06, "loss": 0.6623, "step": 21770 }, { "epoch": 0.6746498946585698, "grad_norm": 3.011079756517945, "learning_rate": 1.7125432130976454e-06, "loss": 0.7243, "step": 21775 }, { "epoch": 0.6748048085264593, "grad_norm": 2.3988496783601625, "learning_rate": 1.7117278716326399e-06, "loss": 0.6991, "step": 21780 }, { "epoch": 0.6749597223943488, "grad_norm": 2.801800294083154, "learning_rate": 1.7109125301676343e-06, "loss": 0.745, "step": 21785 }, { "epoch": 0.6751146362622382, "grad_norm": 2.988868155525815, "learning_rate": 1.7100971887026288e-06, "loss": 0.7518, "step": 21790 }, { "epoch": 0.6752695501301277, "grad_norm": 2.462822536701288, "learning_rate": 1.7092818472376232e-06, "loss": 0.6031, "step": 21795 }, { "epoch": 0.6754244639980171, "grad_norm": 2.4539356939441737, "learning_rate": 1.7084665057726177e-06, "loss": 0.5979, "step": 21800 }, { "epoch": 0.6755793778659066, "grad_norm": 2.4539810433657294, "learning_rate": 1.7076511643076121e-06, "loss": 0.685, "step": 21805 }, { "epoch": 0.675734291733796, "grad_norm": 3.612929397540533, "learning_rate": 1.7068358228426066e-06, "loss": 0.6407, "step": 21810 }, { "epoch": 0.6758892056016854, "grad_norm": 2.3689387589145228, "learning_rate": 1.706020481377601e-06, "loss": 0.7105, "step": 21815 }, { "epoch": 0.6760441194695749, "grad_norm": 2.5251973210072, "learning_rate": 1.7052051399125955e-06, "loss": 0.6753, "step": 21820 }, { "epoch": 0.6761990333374643, "grad_norm": 3.1821174427290235, "learning_rate": 1.70438979844759e-06, "loss": 0.6826, "step": 21825 }, { "epoch": 0.6763539472053538, "grad_norm": 3.1299218081955353, "learning_rate": 1.7035744569825842e-06, "loss": 0.6894, "step": 21830 }, { "epoch": 0.6765088610732433, "grad_norm": 2.296590096509736, "learning_rate": 1.7027591155175787e-06, "loss": 0.6231, "step": 21835 }, { "epoch": 0.6766637749411327, "grad_norm": 2.5310846592757406, "learning_rate": 1.7019437740525736e-06, "loss": 0.6137, "step": 21840 }, { "epoch": 0.6768186888090222, "grad_norm": 2.7071738594204993, "learning_rate": 1.701128432587568e-06, "loss": 0.6425, "step": 21845 }, { "epoch": 0.6769736026769116, "grad_norm": 2.675803788184429, "learning_rate": 1.7003130911225623e-06, "loss": 0.6442, "step": 21850 }, { "epoch": 0.6771285165448011, "grad_norm": 2.2213487290038323, "learning_rate": 1.6994977496575567e-06, "loss": 0.628, "step": 21855 }, { "epoch": 0.6772834304126906, "grad_norm": 2.793231230331935, "learning_rate": 1.6986824081925512e-06, "loss": 0.7588, "step": 21860 }, { "epoch": 0.67743834428058, "grad_norm": 2.210026380418301, "learning_rate": 1.6978670667275457e-06, "loss": 0.7105, "step": 21865 }, { "epoch": 0.6775932581484695, "grad_norm": 3.609212395847262, "learning_rate": 1.6970517252625401e-06, "loss": 0.7107, "step": 21870 }, { "epoch": 0.677748172016359, "grad_norm": 3.0793331239266175, "learning_rate": 1.6962363837975346e-06, "loss": 0.7445, "step": 21875 }, { "epoch": 0.6779030858842484, "grad_norm": 2.824971973067289, "learning_rate": 1.695421042332529e-06, "loss": 0.7135, "step": 21880 }, { "epoch": 0.6780579997521378, "grad_norm": 2.536378676092954, "learning_rate": 1.6946057008675235e-06, "loss": 0.5521, "step": 21885 }, { "epoch": 0.6782129136200272, "grad_norm": 2.3439162421319955, "learning_rate": 1.693790359402518e-06, "loss": 0.6445, "step": 21890 }, { "epoch": 0.6783678274879167, "grad_norm": 2.892548593906391, "learning_rate": 1.6929750179375124e-06, "loss": 0.7432, "step": 21895 }, { "epoch": 0.6785227413558061, "grad_norm": 3.246574822520322, "learning_rate": 1.6921596764725069e-06, "loss": 0.5964, "step": 21900 }, { "epoch": 0.6786776552236956, "grad_norm": 2.8793360577394878, "learning_rate": 1.6913443350075013e-06, "loss": 0.7966, "step": 21905 }, { "epoch": 0.6788325690915851, "grad_norm": 3.6904563983058942, "learning_rate": 1.6905289935424956e-06, "loss": 0.7108, "step": 21910 }, { "epoch": 0.6789874829594745, "grad_norm": 2.8569969002004276, "learning_rate": 1.68971365207749e-06, "loss": 0.7274, "step": 21915 }, { "epoch": 0.679142396827364, "grad_norm": 4.9282106982573435, "learning_rate": 1.6888983106124845e-06, "loss": 0.6367, "step": 21920 }, { "epoch": 0.6792973106952535, "grad_norm": 3.169962141123223, "learning_rate": 1.688082969147479e-06, "loss": 0.6804, "step": 21925 }, { "epoch": 0.6794522245631429, "grad_norm": 2.4478577647814768, "learning_rate": 1.6872676276824734e-06, "loss": 0.6438, "step": 21930 }, { "epoch": 0.6796071384310324, "grad_norm": 2.206767386241759, "learning_rate": 1.686452286217468e-06, "loss": 0.6057, "step": 21935 }, { "epoch": 0.6797620522989218, "grad_norm": 2.949467326284599, "learning_rate": 1.6856369447524625e-06, "loss": 0.6394, "step": 21940 }, { "epoch": 0.6799169661668113, "grad_norm": 2.420333291393433, "learning_rate": 1.684821603287457e-06, "loss": 0.6414, "step": 21945 }, { "epoch": 0.6800718800347008, "grad_norm": 2.5407065845345853, "learning_rate": 1.6840062618224515e-06, "loss": 0.6308, "step": 21950 }, { "epoch": 0.6802267939025901, "grad_norm": 2.239133567110068, "learning_rate": 1.683190920357446e-06, "loss": 0.6977, "step": 21955 }, { "epoch": 0.6803817077704796, "grad_norm": 3.3342589698921428, "learning_rate": 1.6823755788924404e-06, "loss": 0.6841, "step": 21960 }, { "epoch": 0.680536621638369, "grad_norm": 3.318441297107581, "learning_rate": 1.6815602374274348e-06, "loss": 0.7604, "step": 21965 }, { "epoch": 0.6806915355062585, "grad_norm": 3.0399466572405434, "learning_rate": 1.6807448959624293e-06, "loss": 0.6437, "step": 21970 }, { "epoch": 0.680846449374148, "grad_norm": 2.6398239314028205, "learning_rate": 1.6799295544974238e-06, "loss": 0.6903, "step": 21975 }, { "epoch": 0.6810013632420374, "grad_norm": 2.7892835815698147, "learning_rate": 1.6791142130324182e-06, "loss": 0.694, "step": 21980 }, { "epoch": 0.6811562771099269, "grad_norm": 2.429296775725166, "learning_rate": 1.6782988715674125e-06, "loss": 0.7083, "step": 21985 }, { "epoch": 0.6813111909778163, "grad_norm": 2.695123535549737, "learning_rate": 1.677483530102407e-06, "loss": 0.6609, "step": 21990 }, { "epoch": 0.6814661048457058, "grad_norm": 2.349283513096034, "learning_rate": 1.6766681886374014e-06, "loss": 0.6739, "step": 21995 }, { "epoch": 0.6816210187135953, "grad_norm": 2.278630300830965, "learning_rate": 1.6758528471723958e-06, "loss": 0.6919, "step": 22000 }, { "epoch": 0.6817759325814847, "grad_norm": 2.913703654653176, "learning_rate": 1.6750375057073903e-06, "loss": 0.7236, "step": 22005 }, { "epoch": 0.6819308464493742, "grad_norm": 2.3429137947300096, "learning_rate": 1.6742221642423848e-06, "loss": 0.6383, "step": 22010 }, { "epoch": 0.6820857603172636, "grad_norm": 2.941060670240184, "learning_rate": 1.6734068227773792e-06, "loss": 0.6566, "step": 22015 }, { "epoch": 0.6822406741851531, "grad_norm": 2.5473678194994225, "learning_rate": 1.6725914813123737e-06, "loss": 0.6792, "step": 22020 }, { "epoch": 0.6823955880530425, "grad_norm": 2.890357178399481, "learning_rate": 1.6717761398473681e-06, "loss": 0.593, "step": 22025 }, { "epoch": 0.6825505019209319, "grad_norm": 2.55018871534697, "learning_rate": 1.6709607983823628e-06, "loss": 0.649, "step": 22030 }, { "epoch": 0.6827054157888214, "grad_norm": 2.7571092741939025, "learning_rate": 1.6701454569173573e-06, "loss": 0.691, "step": 22035 }, { "epoch": 0.6828603296567108, "grad_norm": 2.1206582452321783, "learning_rate": 1.6693301154523517e-06, "loss": 0.6409, "step": 22040 }, { "epoch": 0.6830152435246003, "grad_norm": 2.649791384868864, "learning_rate": 1.6685147739873462e-06, "loss": 0.6293, "step": 22045 }, { "epoch": 0.6831701573924898, "grad_norm": 2.644205802358605, "learning_rate": 1.6676994325223406e-06, "loss": 0.6874, "step": 22050 }, { "epoch": 0.6833250712603792, "grad_norm": 2.5819889164433305, "learning_rate": 1.666884091057335e-06, "loss": 0.6263, "step": 22055 }, { "epoch": 0.6834799851282687, "grad_norm": 2.9182926128273996, "learning_rate": 1.6660687495923293e-06, "loss": 0.6147, "step": 22060 }, { "epoch": 0.6836348989961581, "grad_norm": 2.490305356305125, "learning_rate": 1.6652534081273238e-06, "loss": 0.6446, "step": 22065 }, { "epoch": 0.6837898128640476, "grad_norm": 2.710216011873999, "learning_rate": 1.6644380666623183e-06, "loss": 0.7406, "step": 22070 }, { "epoch": 0.6839447267319371, "grad_norm": 3.1670812558718446, "learning_rate": 1.6636227251973127e-06, "loss": 0.6291, "step": 22075 }, { "epoch": 0.6840996405998265, "grad_norm": 2.5112019672764214, "learning_rate": 1.6628073837323072e-06, "loss": 0.6306, "step": 22080 }, { "epoch": 0.684254554467716, "grad_norm": 2.343940588994788, "learning_rate": 1.6619920422673016e-06, "loss": 0.6649, "step": 22085 }, { "epoch": 0.6844094683356055, "grad_norm": 2.4168468843434954, "learning_rate": 1.661176700802296e-06, "loss": 0.6586, "step": 22090 }, { "epoch": 0.6845643822034948, "grad_norm": 3.634431955352942, "learning_rate": 1.6603613593372906e-06, "loss": 0.6612, "step": 22095 }, { "epoch": 0.6847192960713843, "grad_norm": 2.4853380535540386, "learning_rate": 1.659546017872285e-06, "loss": 0.6621, "step": 22100 }, { "epoch": 0.6848742099392737, "grad_norm": 2.3840270357863473, "learning_rate": 1.6587306764072795e-06, "loss": 0.6263, "step": 22105 }, { "epoch": 0.6850291238071632, "grad_norm": 2.667582502592733, "learning_rate": 1.657915334942274e-06, "loss": 0.6212, "step": 22110 }, { "epoch": 0.6851840376750526, "grad_norm": 2.4524888278272217, "learning_rate": 1.6570999934772684e-06, "loss": 0.6074, "step": 22115 }, { "epoch": 0.6853389515429421, "grad_norm": 2.7368828777305065, "learning_rate": 1.6562846520122626e-06, "loss": 0.7393, "step": 22120 }, { "epoch": 0.6854938654108316, "grad_norm": 3.335575811953213, "learning_rate": 1.6554693105472575e-06, "loss": 0.6955, "step": 22125 }, { "epoch": 0.685648779278721, "grad_norm": 2.9752371257181043, "learning_rate": 1.654653969082252e-06, "loss": 0.6314, "step": 22130 }, { "epoch": 0.6858036931466105, "grad_norm": 2.6112731293125626, "learning_rate": 1.6538386276172462e-06, "loss": 0.7138, "step": 22135 }, { "epoch": 0.6859586070145, "grad_norm": 2.577798103484358, "learning_rate": 1.6530232861522407e-06, "loss": 0.737, "step": 22140 }, { "epoch": 0.6861135208823894, "grad_norm": 2.9910238358804433, "learning_rate": 1.6522079446872352e-06, "loss": 0.6386, "step": 22145 }, { "epoch": 0.6862684347502789, "grad_norm": 4.934234061430589, "learning_rate": 1.6513926032222296e-06, "loss": 0.6399, "step": 22150 }, { "epoch": 0.6864233486181683, "grad_norm": 3.1215756392327876, "learning_rate": 1.650577261757224e-06, "loss": 0.6986, "step": 22155 }, { "epoch": 0.6865782624860578, "grad_norm": 2.5617088144341893, "learning_rate": 1.6497619202922185e-06, "loss": 0.6829, "step": 22160 }, { "epoch": 0.6867331763539472, "grad_norm": 2.995262217703063, "learning_rate": 1.648946578827213e-06, "loss": 0.6664, "step": 22165 }, { "epoch": 0.6868880902218366, "grad_norm": 2.4899917712705246, "learning_rate": 1.6481312373622075e-06, "loss": 0.5814, "step": 22170 }, { "epoch": 0.6870430040897261, "grad_norm": 2.1987487165180952, "learning_rate": 1.647315895897202e-06, "loss": 0.6728, "step": 22175 }, { "epoch": 0.6871979179576155, "grad_norm": 2.310640887150929, "learning_rate": 1.6465005544321964e-06, "loss": 0.6547, "step": 22180 }, { "epoch": 0.687352831825505, "grad_norm": 2.8131943707370515, "learning_rate": 1.6456852129671908e-06, "loss": 0.697, "step": 22185 }, { "epoch": 0.6875077456933945, "grad_norm": 2.8042801136245097, "learning_rate": 1.6448698715021853e-06, "loss": 0.6722, "step": 22190 }, { "epoch": 0.6876626595612839, "grad_norm": 2.1617867050779593, "learning_rate": 1.6440545300371795e-06, "loss": 0.7004, "step": 22195 }, { "epoch": 0.6878175734291734, "grad_norm": 3.57139438760735, "learning_rate": 1.643239188572174e-06, "loss": 0.6805, "step": 22200 }, { "epoch": 0.6879724872970628, "grad_norm": 2.3545117310981705, "learning_rate": 1.6424238471071685e-06, "loss": 0.6568, "step": 22205 }, { "epoch": 0.6881274011649523, "grad_norm": 2.6364441150485365, "learning_rate": 1.641608505642163e-06, "loss": 0.673, "step": 22210 }, { "epoch": 0.6882823150328418, "grad_norm": 2.1344452930163498, "learning_rate": 1.6407931641771574e-06, "loss": 0.7067, "step": 22215 }, { "epoch": 0.6884372289007312, "grad_norm": 3.2374737670704268, "learning_rate": 1.639977822712152e-06, "loss": 0.7256, "step": 22220 }, { "epoch": 0.6885921427686207, "grad_norm": 2.5989289631821206, "learning_rate": 1.6391624812471465e-06, "loss": 0.7217, "step": 22225 }, { "epoch": 0.6887470566365101, "grad_norm": 3.1574234270549977, "learning_rate": 1.638347139782141e-06, "loss": 0.702, "step": 22230 }, { "epoch": 0.6889019705043996, "grad_norm": 2.6269943802070825, "learning_rate": 1.6375317983171354e-06, "loss": 0.7089, "step": 22235 }, { "epoch": 0.689056884372289, "grad_norm": 2.39807950823122, "learning_rate": 1.6367164568521299e-06, "loss": 0.6491, "step": 22240 }, { "epoch": 0.6892117982401784, "grad_norm": 2.863507578703484, "learning_rate": 1.6359011153871243e-06, "loss": 0.6287, "step": 22245 }, { "epoch": 0.6893667121080679, "grad_norm": 2.2954985047829592, "learning_rate": 1.6350857739221188e-06, "loss": 0.6682, "step": 22250 }, { "epoch": 0.6895216259759573, "grad_norm": 2.7636815324437123, "learning_rate": 1.6342704324571133e-06, "loss": 0.6337, "step": 22255 }, { "epoch": 0.6896765398438468, "grad_norm": 2.426362260360609, "learning_rate": 1.6334550909921077e-06, "loss": 0.7712, "step": 22260 }, { "epoch": 0.6898314537117363, "grad_norm": 2.4419849701571366, "learning_rate": 1.6326397495271022e-06, "loss": 0.6526, "step": 22265 }, { "epoch": 0.6899863675796257, "grad_norm": 6.305019890184574, "learning_rate": 1.6318244080620964e-06, "loss": 0.6931, "step": 22270 }, { "epoch": 0.6901412814475152, "grad_norm": 3.0884558540831275, "learning_rate": 1.6310090665970909e-06, "loss": 0.6922, "step": 22275 }, { "epoch": 0.6902961953154046, "grad_norm": 2.390896078337737, "learning_rate": 1.6301937251320853e-06, "loss": 0.6775, "step": 22280 }, { "epoch": 0.6904511091832941, "grad_norm": 2.4271878038676102, "learning_rate": 1.6293783836670798e-06, "loss": 0.6549, "step": 22285 }, { "epoch": 0.6906060230511836, "grad_norm": 2.750104254377919, "learning_rate": 1.6285630422020743e-06, "loss": 0.6923, "step": 22290 }, { "epoch": 0.690760936919073, "grad_norm": 3.5160630417341823, "learning_rate": 1.6277477007370687e-06, "loss": 0.6818, "step": 22295 }, { "epoch": 0.6909158507869625, "grad_norm": 2.7376100860873316, "learning_rate": 1.6269323592720632e-06, "loss": 0.6756, "step": 22300 }, { "epoch": 0.691070764654852, "grad_norm": 2.8220812517337595, "learning_rate": 1.6261170178070576e-06, "loss": 0.6593, "step": 22305 }, { "epoch": 0.6912256785227413, "grad_norm": 2.584006766426167, "learning_rate": 1.625301676342052e-06, "loss": 0.6504, "step": 22310 }, { "epoch": 0.6913805923906308, "grad_norm": 2.86347973011523, "learning_rate": 1.6244863348770468e-06, "loss": 0.7167, "step": 22315 }, { "epoch": 0.6915355062585202, "grad_norm": 3.5865296883391364, "learning_rate": 1.6236709934120412e-06, "loss": 0.6676, "step": 22320 }, { "epoch": 0.6916904201264097, "grad_norm": 4.137073524531055, "learning_rate": 1.6228556519470357e-06, "loss": 0.6679, "step": 22325 }, { "epoch": 0.6918453339942991, "grad_norm": 2.2382889122911864, "learning_rate": 1.6220403104820301e-06, "loss": 0.5872, "step": 22330 }, { "epoch": 0.6920002478621886, "grad_norm": 3.2888196022266256, "learning_rate": 1.6212249690170246e-06, "loss": 0.6301, "step": 22335 }, { "epoch": 0.6921551617300781, "grad_norm": 2.6988911018097954, "learning_rate": 1.620409627552019e-06, "loss": 0.6776, "step": 22340 }, { "epoch": 0.6923100755979675, "grad_norm": 2.8733841922248686, "learning_rate": 1.6195942860870133e-06, "loss": 0.6345, "step": 22345 }, { "epoch": 0.692464989465857, "grad_norm": 3.2800819925889155, "learning_rate": 1.6187789446220078e-06, "loss": 0.5923, "step": 22350 }, { "epoch": 0.6926199033337465, "grad_norm": 2.5648351587093505, "learning_rate": 1.6179636031570022e-06, "loss": 0.6945, "step": 22355 }, { "epoch": 0.6927748172016359, "grad_norm": 2.5645841362436728, "learning_rate": 1.6171482616919967e-06, "loss": 0.6694, "step": 22360 }, { "epoch": 0.6929297310695254, "grad_norm": 2.6745468338177556, "learning_rate": 1.6163329202269911e-06, "loss": 0.6632, "step": 22365 }, { "epoch": 0.6930846449374148, "grad_norm": 2.7630497256317077, "learning_rate": 1.6155175787619856e-06, "loss": 0.6648, "step": 22370 }, { "epoch": 0.6932395588053043, "grad_norm": 2.5763039688378258, "learning_rate": 1.61470223729698e-06, "loss": 0.6656, "step": 22375 }, { "epoch": 0.6933944726731937, "grad_norm": 2.7900261477895913, "learning_rate": 1.6138868958319745e-06, "loss": 0.6158, "step": 22380 }, { "epoch": 0.6935493865410831, "grad_norm": 2.3944899102950052, "learning_rate": 1.613071554366969e-06, "loss": 0.6892, "step": 22385 }, { "epoch": 0.6937043004089726, "grad_norm": 2.5988512474158045, "learning_rate": 1.6122562129019634e-06, "loss": 0.6722, "step": 22390 }, { "epoch": 0.693859214276862, "grad_norm": 3.211815467224754, "learning_rate": 1.611440871436958e-06, "loss": 0.62, "step": 22395 }, { "epoch": 0.6940141281447515, "grad_norm": 2.317623541280542, "learning_rate": 1.6106255299719524e-06, "loss": 0.6784, "step": 22400 }, { "epoch": 0.694169042012641, "grad_norm": 2.987087611684539, "learning_rate": 1.6098101885069466e-06, "loss": 0.6822, "step": 22405 }, { "epoch": 0.6943239558805304, "grad_norm": 3.0973772005537628, "learning_rate": 1.6089948470419415e-06, "loss": 0.6453, "step": 22410 }, { "epoch": 0.6944788697484199, "grad_norm": 3.6317848469276575, "learning_rate": 1.608179505576936e-06, "loss": 0.7331, "step": 22415 }, { "epoch": 0.6946337836163093, "grad_norm": 2.3739504000781215, "learning_rate": 1.6073641641119304e-06, "loss": 0.6223, "step": 22420 }, { "epoch": 0.6947886974841988, "grad_norm": 3.647145971931868, "learning_rate": 1.6065488226469247e-06, "loss": 0.7291, "step": 22425 }, { "epoch": 0.6949436113520883, "grad_norm": 3.78919984757666, "learning_rate": 1.6057334811819191e-06, "loss": 0.6777, "step": 22430 }, { "epoch": 0.6950985252199777, "grad_norm": 2.4465941075968107, "learning_rate": 1.6049181397169136e-06, "loss": 0.6345, "step": 22435 }, { "epoch": 0.6952534390878672, "grad_norm": 2.7702579462562684, "learning_rate": 1.604102798251908e-06, "loss": 0.7535, "step": 22440 }, { "epoch": 0.6954083529557566, "grad_norm": 2.0228519754298038, "learning_rate": 1.6032874567869025e-06, "loss": 0.6562, "step": 22445 }, { "epoch": 0.695563266823646, "grad_norm": 2.8372326062844926, "learning_rate": 1.602472115321897e-06, "loss": 0.6351, "step": 22450 }, { "epoch": 0.6957181806915355, "grad_norm": 2.616192743448126, "learning_rate": 1.6016567738568914e-06, "loss": 0.6105, "step": 22455 }, { "epoch": 0.6958730945594249, "grad_norm": 3.058264711487051, "learning_rate": 1.6008414323918859e-06, "loss": 0.6877, "step": 22460 }, { "epoch": 0.6960280084273144, "grad_norm": 2.4536583345819487, "learning_rate": 1.6000260909268803e-06, "loss": 0.6583, "step": 22465 }, { "epoch": 0.6961829222952038, "grad_norm": 2.0768014102706824, "learning_rate": 1.5992107494618748e-06, "loss": 0.6547, "step": 22470 }, { "epoch": 0.6963378361630933, "grad_norm": 3.643264771732303, "learning_rate": 1.5983954079968692e-06, "loss": 0.7126, "step": 22475 }, { "epoch": 0.6964927500309828, "grad_norm": 3.80888784929012, "learning_rate": 1.5975800665318635e-06, "loss": 0.6146, "step": 22480 }, { "epoch": 0.6966476638988722, "grad_norm": 3.372817344805883, "learning_rate": 1.596764725066858e-06, "loss": 0.7071, "step": 22485 }, { "epoch": 0.6968025777667617, "grad_norm": 2.671582414682265, "learning_rate": 1.5959493836018524e-06, "loss": 0.6927, "step": 22490 }, { "epoch": 0.6969574916346511, "grad_norm": 2.1999475485098934, "learning_rate": 1.5951340421368469e-06, "loss": 0.7021, "step": 22495 }, { "epoch": 0.6971124055025406, "grad_norm": 2.7020707319030257, "learning_rate": 1.5943187006718415e-06, "loss": 0.7194, "step": 22500 }, { "epoch": 0.6972673193704301, "grad_norm": 2.5007491930985686, "learning_rate": 1.593503359206836e-06, "loss": 0.7485, "step": 22505 }, { "epoch": 0.6974222332383195, "grad_norm": 2.3274250084609345, "learning_rate": 1.5926880177418305e-06, "loss": 0.6114, "step": 22510 }, { "epoch": 0.697577147106209, "grad_norm": 2.8405514077317053, "learning_rate": 1.591872676276825e-06, "loss": 0.6243, "step": 22515 }, { "epoch": 0.6977320609740985, "grad_norm": 4.166510736629593, "learning_rate": 1.5910573348118194e-06, "loss": 0.6719, "step": 22520 }, { "epoch": 0.6978869748419878, "grad_norm": 2.3381797410893466, "learning_rate": 1.5902419933468138e-06, "loss": 0.6797, "step": 22525 }, { "epoch": 0.6980418887098773, "grad_norm": 2.728615823113517, "learning_rate": 1.5894266518818083e-06, "loss": 0.6028, "step": 22530 }, { "epoch": 0.6981968025777667, "grad_norm": 3.7208756503644946, "learning_rate": 1.5886113104168028e-06, "loss": 0.6794, "step": 22535 }, { "epoch": 0.6983517164456562, "grad_norm": 2.8087506562866036, "learning_rate": 1.5877959689517972e-06, "loss": 0.7289, "step": 22540 }, { "epoch": 0.6985066303135457, "grad_norm": 2.6157372801373215, "learning_rate": 1.5869806274867917e-06, "loss": 0.6786, "step": 22545 }, { "epoch": 0.6986615441814351, "grad_norm": 2.540253397539527, "learning_rate": 1.5861652860217861e-06, "loss": 0.6806, "step": 22550 }, { "epoch": 0.6988164580493246, "grad_norm": 3.0720104191676625, "learning_rate": 1.5853499445567804e-06, "loss": 0.6465, "step": 22555 }, { "epoch": 0.698971371917214, "grad_norm": 2.8276450275322946, "learning_rate": 1.5845346030917748e-06, "loss": 0.6874, "step": 22560 }, { "epoch": 0.6991262857851035, "grad_norm": 2.3466908364302137, "learning_rate": 1.5837192616267693e-06, "loss": 0.6745, "step": 22565 }, { "epoch": 0.699281199652993, "grad_norm": 2.3103844817718526, "learning_rate": 1.5829039201617638e-06, "loss": 0.6684, "step": 22570 }, { "epoch": 0.6994361135208824, "grad_norm": 2.6923064669923336, "learning_rate": 1.5820885786967582e-06, "loss": 0.6405, "step": 22575 }, { "epoch": 0.6995910273887719, "grad_norm": 3.075155271257282, "learning_rate": 1.5812732372317527e-06, "loss": 0.632, "step": 22580 }, { "epoch": 0.6997459412566613, "grad_norm": 2.7697451292355244, "learning_rate": 1.5804578957667471e-06, "loss": 0.6411, "step": 22585 }, { "epoch": 0.6999008551245508, "grad_norm": 5.809242618266345, "learning_rate": 1.5796425543017416e-06, "loss": 0.7268, "step": 22590 }, { "epoch": 0.7000557689924402, "grad_norm": 2.6065234569016744, "learning_rate": 1.5788272128367363e-06, "loss": 0.6506, "step": 22595 }, { "epoch": 0.7002106828603296, "grad_norm": 2.6990553070933365, "learning_rate": 1.5780118713717307e-06, "loss": 0.6955, "step": 22600 }, { "epoch": 0.7003655967282191, "grad_norm": 2.345437497901311, "learning_rate": 1.5771965299067252e-06, "loss": 0.7667, "step": 22605 }, { "epoch": 0.7005205105961085, "grad_norm": 3.3549743515132615, "learning_rate": 1.5763811884417196e-06, "loss": 0.6808, "step": 22610 }, { "epoch": 0.700675424463998, "grad_norm": 3.6158574110451935, "learning_rate": 1.575565846976714e-06, "loss": 0.653, "step": 22615 }, { "epoch": 0.7008303383318875, "grad_norm": 2.587083166990627, "learning_rate": 1.5747505055117086e-06, "loss": 0.6521, "step": 22620 }, { "epoch": 0.7009852521997769, "grad_norm": 3.0816269354793095, "learning_rate": 1.573935164046703e-06, "loss": 0.7058, "step": 22625 }, { "epoch": 0.7011401660676664, "grad_norm": 3.7126089692338358, "learning_rate": 1.5731198225816975e-06, "loss": 0.6673, "step": 22630 }, { "epoch": 0.7012950799355558, "grad_norm": 2.8100055657109264, "learning_rate": 1.5723044811166917e-06, "loss": 0.6852, "step": 22635 }, { "epoch": 0.7014499938034453, "grad_norm": 2.561987199803191, "learning_rate": 1.5714891396516862e-06, "loss": 0.6749, "step": 22640 }, { "epoch": 0.7016049076713348, "grad_norm": 3.44579049534149, "learning_rate": 1.5706737981866806e-06, "loss": 0.6971, "step": 22645 }, { "epoch": 0.7017598215392242, "grad_norm": 3.6705947911798815, "learning_rate": 1.569858456721675e-06, "loss": 0.7426, "step": 22650 }, { "epoch": 0.7019147354071137, "grad_norm": 3.596836572445215, "learning_rate": 1.5690431152566696e-06, "loss": 0.6213, "step": 22655 }, { "epoch": 0.7020696492750031, "grad_norm": 3.0247910912251443, "learning_rate": 1.568227773791664e-06, "loss": 0.6227, "step": 22660 }, { "epoch": 0.7022245631428925, "grad_norm": 2.5119330157926054, "learning_rate": 1.5674124323266585e-06, "loss": 0.7057, "step": 22665 }, { "epoch": 0.702379477010782, "grad_norm": 2.563987888400399, "learning_rate": 1.566597090861653e-06, "loss": 0.6447, "step": 22670 }, { "epoch": 0.7025343908786714, "grad_norm": 2.3098924530663165, "learning_rate": 1.5657817493966474e-06, "loss": 0.7149, "step": 22675 }, { "epoch": 0.7026893047465609, "grad_norm": 3.7936881829265374, "learning_rate": 1.5649664079316419e-06, "loss": 0.6692, "step": 22680 }, { "epoch": 0.7028442186144503, "grad_norm": 2.9111389101508625, "learning_rate": 1.5641510664666363e-06, "loss": 0.6669, "step": 22685 }, { "epoch": 0.7029991324823398, "grad_norm": 2.701661704180139, "learning_rate": 1.563335725001631e-06, "loss": 0.6936, "step": 22690 }, { "epoch": 0.7031540463502293, "grad_norm": 2.625367586726295, "learning_rate": 1.5625203835366254e-06, "loss": 0.7413, "step": 22695 }, { "epoch": 0.7033089602181187, "grad_norm": 2.7914585825616984, "learning_rate": 1.56170504207162e-06, "loss": 0.6795, "step": 22700 }, { "epoch": 0.7034638740860082, "grad_norm": 3.8113602147478765, "learning_rate": 1.5608897006066144e-06, "loss": 0.7241, "step": 22705 }, { "epoch": 0.7036187879538977, "grad_norm": 2.8990793861733435, "learning_rate": 1.5600743591416086e-06, "loss": 0.698, "step": 22710 }, { "epoch": 0.7037737018217871, "grad_norm": 3.2269639629415225, "learning_rate": 1.559259017676603e-06, "loss": 0.6818, "step": 22715 }, { "epoch": 0.7039286156896766, "grad_norm": 3.3151169127286644, "learning_rate": 1.5584436762115975e-06, "loss": 0.768, "step": 22720 }, { "epoch": 0.704083529557566, "grad_norm": 2.304028203936202, "learning_rate": 1.557628334746592e-06, "loss": 0.6744, "step": 22725 }, { "epoch": 0.7042384434254555, "grad_norm": 2.488575446089663, "learning_rate": 1.5568129932815864e-06, "loss": 0.7145, "step": 22730 }, { "epoch": 0.7043933572933448, "grad_norm": 2.6457300519786404, "learning_rate": 1.555997651816581e-06, "loss": 0.6816, "step": 22735 }, { "epoch": 0.7045482711612343, "grad_norm": 2.646660105539845, "learning_rate": 1.5551823103515754e-06, "loss": 0.6727, "step": 22740 }, { "epoch": 0.7047031850291238, "grad_norm": 2.80678215409197, "learning_rate": 1.5543669688865698e-06, "loss": 0.609, "step": 22745 }, { "epoch": 0.7048580988970132, "grad_norm": 2.4401963432767504, "learning_rate": 1.5535516274215643e-06, "loss": 0.6105, "step": 22750 }, { "epoch": 0.7050130127649027, "grad_norm": 2.832147259278161, "learning_rate": 1.5527362859565587e-06, "loss": 0.6574, "step": 22755 }, { "epoch": 0.7051679266327922, "grad_norm": 2.6411081149869533, "learning_rate": 1.5519209444915532e-06, "loss": 0.6248, "step": 22760 }, { "epoch": 0.7053228405006816, "grad_norm": 2.458665582406169, "learning_rate": 1.5511056030265477e-06, "loss": 0.61, "step": 22765 }, { "epoch": 0.7054777543685711, "grad_norm": 2.541274930688411, "learning_rate": 1.550290261561542e-06, "loss": 0.6276, "step": 22770 }, { "epoch": 0.7056326682364605, "grad_norm": 2.8454148856194483, "learning_rate": 1.5494749200965364e-06, "loss": 0.7191, "step": 22775 }, { "epoch": 0.70578758210435, "grad_norm": 3.0836510634014402, "learning_rate": 1.5486595786315308e-06, "loss": 0.7122, "step": 22780 }, { "epoch": 0.7059424959722395, "grad_norm": 3.7646791997750366, "learning_rate": 1.5478442371665255e-06, "loss": 0.6366, "step": 22785 }, { "epoch": 0.7060974098401289, "grad_norm": 2.9438575310321027, "learning_rate": 1.54702889570152e-06, "loss": 0.67, "step": 22790 }, { "epoch": 0.7062523237080184, "grad_norm": 2.4338372444098857, "learning_rate": 1.5462135542365144e-06, "loss": 0.6392, "step": 22795 }, { "epoch": 0.7064072375759078, "grad_norm": 3.089321819045335, "learning_rate": 1.5453982127715089e-06, "loss": 0.6187, "step": 22800 }, { "epoch": 0.7065621514437973, "grad_norm": 2.433982121285972, "learning_rate": 1.5445828713065033e-06, "loss": 0.6456, "step": 22805 }, { "epoch": 0.7067170653116867, "grad_norm": 2.2170973119984625, "learning_rate": 1.5437675298414978e-06, "loss": 0.668, "step": 22810 }, { "epoch": 0.7068719791795761, "grad_norm": 2.256610108980676, "learning_rate": 1.5429521883764923e-06, "loss": 0.6381, "step": 22815 }, { "epoch": 0.7070268930474656, "grad_norm": 3.7368450343085193, "learning_rate": 1.5421368469114867e-06, "loss": 0.5974, "step": 22820 }, { "epoch": 0.707181806915355, "grad_norm": 2.5238038606195032, "learning_rate": 1.5413215054464812e-06, "loss": 0.6604, "step": 22825 }, { "epoch": 0.7073367207832445, "grad_norm": 2.7863034311752437, "learning_rate": 1.5405061639814756e-06, "loss": 0.6783, "step": 22830 }, { "epoch": 0.707491634651134, "grad_norm": 3.229723893849162, "learning_rate": 1.53969082251647e-06, "loss": 0.7007, "step": 22835 }, { "epoch": 0.7076465485190234, "grad_norm": 2.9961121184136323, "learning_rate": 1.5388754810514645e-06, "loss": 0.6764, "step": 22840 }, { "epoch": 0.7078014623869129, "grad_norm": 2.4255243996025024, "learning_rate": 1.5380601395864588e-06, "loss": 0.678, "step": 22845 }, { "epoch": 0.7079563762548023, "grad_norm": 2.1106405382012774, "learning_rate": 1.5372447981214533e-06, "loss": 0.6291, "step": 22850 }, { "epoch": 0.7081112901226918, "grad_norm": 3.049180717547846, "learning_rate": 1.5364294566564477e-06, "loss": 0.6348, "step": 22855 }, { "epoch": 0.7082662039905813, "grad_norm": 2.9537302053547094, "learning_rate": 1.5356141151914422e-06, "loss": 0.6804, "step": 22860 }, { "epoch": 0.7084211178584707, "grad_norm": 2.536917701892819, "learning_rate": 1.5347987737264366e-06, "loss": 0.6747, "step": 22865 }, { "epoch": 0.7085760317263602, "grad_norm": 2.651092887370261, "learning_rate": 1.533983432261431e-06, "loss": 0.6355, "step": 22870 }, { "epoch": 0.7087309455942497, "grad_norm": 2.3026668025469723, "learning_rate": 1.5331680907964256e-06, "loss": 0.7504, "step": 22875 }, { "epoch": 0.708885859462139, "grad_norm": 2.9049433895850236, "learning_rate": 1.5323527493314202e-06, "loss": 0.6706, "step": 22880 }, { "epoch": 0.7090407733300285, "grad_norm": 3.536481010398399, "learning_rate": 1.5315374078664147e-06, "loss": 0.7546, "step": 22885 }, { "epoch": 0.7091956871979179, "grad_norm": 3.390748901630505, "learning_rate": 1.5307220664014091e-06, "loss": 0.7047, "step": 22890 }, { "epoch": 0.7093506010658074, "grad_norm": 2.727132842820443, "learning_rate": 1.5299067249364036e-06, "loss": 0.6346, "step": 22895 }, { "epoch": 0.7095055149336968, "grad_norm": 2.8417744234623807, "learning_rate": 1.529091383471398e-06, "loss": 0.5768, "step": 22900 }, { "epoch": 0.7096604288015863, "grad_norm": 4.337802003386231, "learning_rate": 1.5282760420063925e-06, "loss": 0.695, "step": 22905 }, { "epoch": 0.7098153426694758, "grad_norm": 2.1973899389219533, "learning_rate": 1.527460700541387e-06, "loss": 0.6486, "step": 22910 }, { "epoch": 0.7099702565373652, "grad_norm": 2.9568549277026595, "learning_rate": 1.5266453590763814e-06, "loss": 0.6947, "step": 22915 }, { "epoch": 0.7101251704052547, "grad_norm": 2.3738024646205895, "learning_rate": 1.5258300176113757e-06, "loss": 0.6968, "step": 22920 }, { "epoch": 0.7102800842731442, "grad_norm": 3.7763053227820818, "learning_rate": 1.5250146761463701e-06, "loss": 0.709, "step": 22925 }, { "epoch": 0.7104349981410336, "grad_norm": 2.4025146016266086, "learning_rate": 1.5241993346813646e-06, "loss": 0.6562, "step": 22930 }, { "epoch": 0.7105899120089231, "grad_norm": 2.2897068847816326, "learning_rate": 1.523383993216359e-06, "loss": 0.6713, "step": 22935 }, { "epoch": 0.7107448258768125, "grad_norm": 3.0318786246487788, "learning_rate": 1.5225686517513535e-06, "loss": 0.6804, "step": 22940 }, { "epoch": 0.710899739744702, "grad_norm": 3.0197307268318543, "learning_rate": 1.521753310286348e-06, "loss": 0.6858, "step": 22945 }, { "epoch": 0.7110546536125913, "grad_norm": 2.548128425285608, "learning_rate": 1.5209379688213424e-06, "loss": 0.6596, "step": 22950 }, { "epoch": 0.7112095674804808, "grad_norm": 5.233339953387174, "learning_rate": 1.520122627356337e-06, "loss": 0.7583, "step": 22955 }, { "epoch": 0.7113644813483703, "grad_norm": 2.556987104167139, "learning_rate": 1.5193072858913314e-06, "loss": 0.6976, "step": 22960 }, { "epoch": 0.7115193952162597, "grad_norm": 2.6515659707015615, "learning_rate": 1.5184919444263258e-06, "loss": 0.694, "step": 22965 }, { "epoch": 0.7116743090841492, "grad_norm": 2.7438988862172358, "learning_rate": 1.5176766029613203e-06, "loss": 0.6978, "step": 22970 }, { "epoch": 0.7118292229520387, "grad_norm": 1.9137750218422311, "learning_rate": 1.516861261496315e-06, "loss": 0.6494, "step": 22975 }, { "epoch": 0.7119841368199281, "grad_norm": 3.187215939881711, "learning_rate": 1.5160459200313094e-06, "loss": 0.6586, "step": 22980 }, { "epoch": 0.7121390506878176, "grad_norm": 2.7576666961982963, "learning_rate": 1.5152305785663039e-06, "loss": 0.6463, "step": 22985 }, { "epoch": 0.712293964555707, "grad_norm": 2.204879213574103, "learning_rate": 1.5144152371012983e-06, "loss": 0.6226, "step": 22990 }, { "epoch": 0.7124488784235965, "grad_norm": 2.631833665003405, "learning_rate": 1.5135998956362926e-06, "loss": 0.5969, "step": 22995 }, { "epoch": 0.712603792291486, "grad_norm": 4.109412231785641, "learning_rate": 1.512784554171287e-06, "loss": 0.6726, "step": 23000 }, { "epoch": 0.7127587061593754, "grad_norm": 2.717364809111916, "learning_rate": 1.5119692127062815e-06, "loss": 0.6552, "step": 23005 }, { "epoch": 0.7129136200272649, "grad_norm": 2.3434320016908208, "learning_rate": 1.511153871241276e-06, "loss": 0.7124, "step": 23010 }, { "epoch": 0.7130685338951543, "grad_norm": 2.9295279365300204, "learning_rate": 1.5103385297762704e-06, "loss": 0.7243, "step": 23015 }, { "epoch": 0.7132234477630437, "grad_norm": 2.462778966159627, "learning_rate": 1.5095231883112649e-06, "loss": 0.6928, "step": 23020 }, { "epoch": 0.7133783616309332, "grad_norm": 2.4779775591632673, "learning_rate": 1.5087078468462593e-06, "loss": 0.6724, "step": 23025 }, { "epoch": 0.7135332754988226, "grad_norm": 2.416461949178111, "learning_rate": 1.5078925053812538e-06, "loss": 0.6146, "step": 23030 }, { "epoch": 0.7136881893667121, "grad_norm": 3.416627318821514, "learning_rate": 1.5070771639162482e-06, "loss": 0.621, "step": 23035 }, { "epoch": 0.7138431032346015, "grad_norm": 2.3704678176301046, "learning_rate": 1.5062618224512427e-06, "loss": 0.6735, "step": 23040 }, { "epoch": 0.713998017102491, "grad_norm": 2.502065947476252, "learning_rate": 1.5054464809862372e-06, "loss": 0.6846, "step": 23045 }, { "epoch": 0.7141529309703805, "grad_norm": 3.5595542721298954, "learning_rate": 1.5046311395212316e-06, "loss": 0.7049, "step": 23050 }, { "epoch": 0.7143078448382699, "grad_norm": 2.4547360163932783, "learning_rate": 1.5038157980562259e-06, "loss": 0.6603, "step": 23055 }, { "epoch": 0.7144627587061594, "grad_norm": 2.553038815629252, "learning_rate": 1.5030004565912203e-06, "loss": 0.6854, "step": 23060 }, { "epoch": 0.7146176725740488, "grad_norm": 2.6816239634003294, "learning_rate": 1.5021851151262148e-06, "loss": 0.6566, "step": 23065 }, { "epoch": 0.7147725864419383, "grad_norm": 2.1799901976856213, "learning_rate": 1.5013697736612095e-06, "loss": 0.6423, "step": 23070 }, { "epoch": 0.7149275003098278, "grad_norm": 2.268985053601391, "learning_rate": 1.500554432196204e-06, "loss": 0.7116, "step": 23075 }, { "epoch": 0.7150824141777172, "grad_norm": 2.383344947176779, "learning_rate": 1.4997390907311984e-06, "loss": 0.6583, "step": 23080 }, { "epoch": 0.7152373280456067, "grad_norm": 10.368327774185062, "learning_rate": 1.4989237492661928e-06, "loss": 0.6402, "step": 23085 }, { "epoch": 0.715392241913496, "grad_norm": 3.2420123899125888, "learning_rate": 1.4981084078011873e-06, "loss": 0.6874, "step": 23090 }, { "epoch": 0.7155471557813855, "grad_norm": 2.4119024154179085, "learning_rate": 1.4972930663361818e-06, "loss": 0.6156, "step": 23095 }, { "epoch": 0.715702069649275, "grad_norm": 2.857757660128447, "learning_rate": 1.4964777248711762e-06, "loss": 0.5889, "step": 23100 }, { "epoch": 0.7158569835171644, "grad_norm": 2.731341442294841, "learning_rate": 1.4956623834061707e-06, "loss": 0.7168, "step": 23105 }, { "epoch": 0.7160118973850539, "grad_norm": 2.6744813768700624, "learning_rate": 1.4948470419411651e-06, "loss": 0.7044, "step": 23110 }, { "epoch": 0.7161668112529433, "grad_norm": 2.127748528381767, "learning_rate": 1.4940317004761596e-06, "loss": 0.7124, "step": 23115 }, { "epoch": 0.7163217251208328, "grad_norm": 3.12983876710154, "learning_rate": 1.493216359011154e-06, "loss": 0.6816, "step": 23120 }, { "epoch": 0.7164766389887223, "grad_norm": 2.8673198220031977, "learning_rate": 1.4924010175461485e-06, "loss": 0.6429, "step": 23125 }, { "epoch": 0.7166315528566117, "grad_norm": 3.7155045969475498, "learning_rate": 1.4915856760811428e-06, "loss": 0.6371, "step": 23130 }, { "epoch": 0.7167864667245012, "grad_norm": 2.7779644227399682, "learning_rate": 1.4907703346161372e-06, "loss": 0.6823, "step": 23135 }, { "epoch": 0.7169413805923907, "grad_norm": 2.891332284821908, "learning_rate": 1.4899549931511317e-06, "loss": 0.671, "step": 23140 }, { "epoch": 0.7170962944602801, "grad_norm": 2.0953150631876856, "learning_rate": 1.4891396516861261e-06, "loss": 0.6715, "step": 23145 }, { "epoch": 0.7172512083281696, "grad_norm": 2.544485445296116, "learning_rate": 1.4883243102211206e-06, "loss": 0.6949, "step": 23150 }, { "epoch": 0.717406122196059, "grad_norm": 3.665917103337775, "learning_rate": 1.487508968756115e-06, "loss": 0.6851, "step": 23155 }, { "epoch": 0.7175610360639485, "grad_norm": 2.8100603487722213, "learning_rate": 1.4866936272911097e-06, "loss": 0.7097, "step": 23160 }, { "epoch": 0.7177159499318378, "grad_norm": 2.7922036232331586, "learning_rate": 1.4858782858261042e-06, "loss": 0.6421, "step": 23165 }, { "epoch": 0.7178708637997273, "grad_norm": 2.553483722727413, "learning_rate": 1.4850629443610986e-06, "loss": 0.6206, "step": 23170 }, { "epoch": 0.7180257776676168, "grad_norm": 2.4396160075179196, "learning_rate": 1.484247602896093e-06, "loss": 0.6879, "step": 23175 }, { "epoch": 0.7181806915355062, "grad_norm": 2.6806259753597477, "learning_rate": 1.4834322614310876e-06, "loss": 0.6488, "step": 23180 }, { "epoch": 0.7183356054033957, "grad_norm": 2.0831088268294553, "learning_rate": 1.482616919966082e-06, "loss": 0.6741, "step": 23185 }, { "epoch": 0.7184905192712852, "grad_norm": 2.7726537096701063, "learning_rate": 1.4818015785010765e-06, "loss": 0.6707, "step": 23190 }, { "epoch": 0.7186454331391746, "grad_norm": 2.680375313710102, "learning_rate": 1.480986237036071e-06, "loss": 0.6795, "step": 23195 }, { "epoch": 0.7188003470070641, "grad_norm": 3.268451773106772, "learning_rate": 1.4801708955710654e-06, "loss": 0.5948, "step": 23200 }, { "epoch": 0.7189552608749535, "grad_norm": 2.489226659355643, "learning_rate": 1.4793555541060596e-06, "loss": 0.6493, "step": 23205 }, { "epoch": 0.719110174742843, "grad_norm": 5.706720411097765, "learning_rate": 1.478540212641054e-06, "loss": 0.6939, "step": 23210 }, { "epoch": 0.7192650886107325, "grad_norm": 2.1339732576267085, "learning_rate": 1.4777248711760486e-06, "loss": 0.5855, "step": 23215 }, { "epoch": 0.7194200024786219, "grad_norm": 2.8616961221620207, "learning_rate": 1.476909529711043e-06, "loss": 0.6314, "step": 23220 }, { "epoch": 0.7195749163465114, "grad_norm": 3.5420000876223594, "learning_rate": 1.4760941882460375e-06, "loss": 0.6881, "step": 23225 }, { "epoch": 0.7197298302144008, "grad_norm": 3.02995558938077, "learning_rate": 1.475278846781032e-06, "loss": 0.7284, "step": 23230 }, { "epoch": 0.7198847440822902, "grad_norm": 2.9046773774032166, "learning_rate": 1.4744635053160264e-06, "loss": 0.6331, "step": 23235 }, { "epoch": 0.7200396579501797, "grad_norm": 2.5585444835511564, "learning_rate": 1.4736481638510209e-06, "loss": 0.5915, "step": 23240 }, { "epoch": 0.7201945718180691, "grad_norm": 7.348771297036619, "learning_rate": 1.4728328223860153e-06, "loss": 0.7269, "step": 23245 }, { "epoch": 0.7203494856859586, "grad_norm": 2.320071104637098, "learning_rate": 1.4720174809210098e-06, "loss": 0.6059, "step": 23250 }, { "epoch": 0.720504399553848, "grad_norm": 2.2187581110602523, "learning_rate": 1.4712021394560044e-06, "loss": 0.6611, "step": 23255 }, { "epoch": 0.7206593134217375, "grad_norm": 2.693995870883058, "learning_rate": 1.470386797990999e-06, "loss": 0.7036, "step": 23260 }, { "epoch": 0.720814227289627, "grad_norm": 2.320851247984674, "learning_rate": 1.4695714565259934e-06, "loss": 0.7033, "step": 23265 }, { "epoch": 0.7209691411575164, "grad_norm": 1.9942739546409516, "learning_rate": 1.4687561150609878e-06, "loss": 0.6846, "step": 23270 }, { "epoch": 0.7211240550254059, "grad_norm": 2.3152695794924063, "learning_rate": 1.4679407735959823e-06, "loss": 0.6465, "step": 23275 }, { "epoch": 0.7212789688932953, "grad_norm": 2.8888512010187877, "learning_rate": 1.4671254321309767e-06, "loss": 0.7063, "step": 23280 }, { "epoch": 0.7214338827611848, "grad_norm": 2.3751931370313777, "learning_rate": 1.466310090665971e-06, "loss": 0.649, "step": 23285 }, { "epoch": 0.7215887966290743, "grad_norm": 2.9446591482136597, "learning_rate": 1.4654947492009654e-06, "loss": 0.6699, "step": 23290 }, { "epoch": 0.7217437104969637, "grad_norm": 2.2757487549492006, "learning_rate": 1.46467940773596e-06, "loss": 0.5725, "step": 23295 }, { "epoch": 0.7218986243648532, "grad_norm": 2.8689327402804476, "learning_rate": 1.4638640662709544e-06, "loss": 0.625, "step": 23300 }, { "epoch": 0.7220535382327425, "grad_norm": 2.73930955297329, "learning_rate": 1.4630487248059488e-06, "loss": 0.7418, "step": 23305 }, { "epoch": 0.722208452100632, "grad_norm": 2.9147169794406973, "learning_rate": 1.4622333833409433e-06, "loss": 0.7456, "step": 23310 }, { "epoch": 0.7223633659685215, "grad_norm": 3.0866028402026715, "learning_rate": 1.4614180418759377e-06, "loss": 0.5997, "step": 23315 }, { "epoch": 0.7225182798364109, "grad_norm": 2.1972746920577713, "learning_rate": 1.4606027004109322e-06, "loss": 0.6894, "step": 23320 }, { "epoch": 0.7226731937043004, "grad_norm": 2.4721819205753586, "learning_rate": 1.4597873589459267e-06, "loss": 0.6523, "step": 23325 }, { "epoch": 0.7228281075721898, "grad_norm": 3.061721956401552, "learning_rate": 1.4589720174809211e-06, "loss": 0.6424, "step": 23330 }, { "epoch": 0.7229830214400793, "grad_norm": 2.9147327522921147, "learning_rate": 1.4581566760159156e-06, "loss": 0.6896, "step": 23335 }, { "epoch": 0.7231379353079688, "grad_norm": 2.6799221002986835, "learning_rate": 1.4573413345509098e-06, "loss": 0.677, "step": 23340 }, { "epoch": 0.7232928491758582, "grad_norm": 2.261679035611389, "learning_rate": 1.4565259930859043e-06, "loss": 0.6265, "step": 23345 }, { "epoch": 0.7234477630437477, "grad_norm": 2.802805722729318, "learning_rate": 1.4557106516208992e-06, "loss": 0.6781, "step": 23350 }, { "epoch": 0.7236026769116372, "grad_norm": 2.8813402031680937, "learning_rate": 1.4548953101558936e-06, "loss": 0.6998, "step": 23355 }, { "epoch": 0.7237575907795266, "grad_norm": 2.800787365260423, "learning_rate": 1.4540799686908879e-06, "loss": 0.6566, "step": 23360 }, { "epoch": 0.7239125046474161, "grad_norm": 2.450175872803382, "learning_rate": 1.4532646272258823e-06, "loss": 0.753, "step": 23365 }, { "epoch": 0.7240674185153055, "grad_norm": 2.206909673698674, "learning_rate": 1.4524492857608768e-06, "loss": 0.6185, "step": 23370 }, { "epoch": 0.7242223323831949, "grad_norm": 2.184225627347916, "learning_rate": 1.4516339442958713e-06, "loss": 0.6238, "step": 23375 }, { "epoch": 0.7243772462510844, "grad_norm": 2.7447133944473383, "learning_rate": 1.4508186028308657e-06, "loss": 0.6763, "step": 23380 }, { "epoch": 0.7245321601189738, "grad_norm": 2.605605906027187, "learning_rate": 1.4500032613658602e-06, "loss": 0.7529, "step": 23385 }, { "epoch": 0.7246870739868633, "grad_norm": 2.213095529707615, "learning_rate": 1.4491879199008546e-06, "loss": 0.7001, "step": 23390 }, { "epoch": 0.7248419878547527, "grad_norm": 2.428385072999925, "learning_rate": 1.448372578435849e-06, "loss": 0.6552, "step": 23395 }, { "epoch": 0.7249969017226422, "grad_norm": 3.3451099383133944, "learning_rate": 1.4475572369708435e-06, "loss": 0.6798, "step": 23400 }, { "epoch": 0.7251518155905317, "grad_norm": 2.5318768533537073, "learning_rate": 1.446741895505838e-06, "loss": 0.7168, "step": 23405 }, { "epoch": 0.7253067294584211, "grad_norm": 3.2408696742584424, "learning_rate": 1.4459265540408325e-06, "loss": 0.7687, "step": 23410 }, { "epoch": 0.7254616433263106, "grad_norm": 2.480525473252471, "learning_rate": 1.4451112125758267e-06, "loss": 0.6026, "step": 23415 }, { "epoch": 0.7256165571942, "grad_norm": 2.3968239916987195, "learning_rate": 1.4442958711108212e-06, "loss": 0.6254, "step": 23420 }, { "epoch": 0.7257714710620895, "grad_norm": 2.393330498777566, "learning_rate": 1.4434805296458156e-06, "loss": 0.7547, "step": 23425 }, { "epoch": 0.725926384929979, "grad_norm": 2.3557596885875673, "learning_rate": 1.44266518818081e-06, "loss": 0.6269, "step": 23430 }, { "epoch": 0.7260812987978684, "grad_norm": 3.5766399631972257, "learning_rate": 1.4418498467158045e-06, "loss": 0.6589, "step": 23435 }, { "epoch": 0.7262362126657579, "grad_norm": 2.5193737687146656, "learning_rate": 1.441034505250799e-06, "loss": 0.6634, "step": 23440 }, { "epoch": 0.7263911265336473, "grad_norm": 2.636780144415589, "learning_rate": 1.4402191637857937e-06, "loss": 0.6969, "step": 23445 }, { "epoch": 0.7265460404015367, "grad_norm": 2.538733607878441, "learning_rate": 1.4394038223207881e-06, "loss": 0.6739, "step": 23450 }, { "epoch": 0.7267009542694262, "grad_norm": 2.6310152421179236, "learning_rate": 1.4385884808557826e-06, "loss": 0.6687, "step": 23455 }, { "epoch": 0.7268558681373156, "grad_norm": 2.3256223514535215, "learning_rate": 1.437773139390777e-06, "loss": 0.7136, "step": 23460 }, { "epoch": 0.7270107820052051, "grad_norm": 3.4206330516063685, "learning_rate": 1.4369577979257715e-06, "loss": 0.6998, "step": 23465 }, { "epoch": 0.7271656958730945, "grad_norm": 2.5858932791521343, "learning_rate": 1.436142456460766e-06, "loss": 0.645, "step": 23470 }, { "epoch": 0.727320609740984, "grad_norm": 3.3742831164648153, "learning_rate": 1.4353271149957604e-06, "loss": 0.634, "step": 23475 }, { "epoch": 0.7274755236088735, "grad_norm": 2.4085767622569003, "learning_rate": 1.4345117735307549e-06, "loss": 0.7052, "step": 23480 }, { "epoch": 0.7276304374767629, "grad_norm": 2.4106005474656858, "learning_rate": 1.4336964320657494e-06, "loss": 0.6808, "step": 23485 }, { "epoch": 0.7277853513446524, "grad_norm": 2.4841250375133153, "learning_rate": 1.4328810906007438e-06, "loss": 0.6746, "step": 23490 }, { "epoch": 0.7279402652125418, "grad_norm": 2.610105305402267, "learning_rate": 1.432065749135738e-06, "loss": 0.6562, "step": 23495 }, { "epoch": 0.7280951790804313, "grad_norm": 4.238500044466912, "learning_rate": 1.4312504076707325e-06, "loss": 0.6361, "step": 23500 }, { "epoch": 0.7282500929483208, "grad_norm": 3.2247289155412107, "learning_rate": 1.430435066205727e-06, "loss": 0.6052, "step": 23505 }, { "epoch": 0.7284050068162102, "grad_norm": 2.833379701872867, "learning_rate": 1.4296197247407214e-06, "loss": 0.6859, "step": 23510 }, { "epoch": 0.7285599206840997, "grad_norm": 2.5503852135396996, "learning_rate": 1.428804383275716e-06, "loss": 0.6375, "step": 23515 }, { "epoch": 0.728714834551989, "grad_norm": 2.7832142197014766, "learning_rate": 1.4279890418107104e-06, "loss": 0.6896, "step": 23520 }, { "epoch": 0.7288697484198785, "grad_norm": 2.6030196011387803, "learning_rate": 1.4271737003457048e-06, "loss": 0.694, "step": 23525 }, { "epoch": 0.729024662287768, "grad_norm": 2.7223935957743643, "learning_rate": 1.4263583588806993e-06, "loss": 0.7463, "step": 23530 }, { "epoch": 0.7291795761556574, "grad_norm": 3.054102009180003, "learning_rate": 1.4255430174156937e-06, "loss": 0.6884, "step": 23535 }, { "epoch": 0.7293344900235469, "grad_norm": 3.0145737250725055, "learning_rate": 1.4247276759506884e-06, "loss": 0.7135, "step": 23540 }, { "epoch": 0.7294894038914363, "grad_norm": 2.7987127376148235, "learning_rate": 1.4239123344856829e-06, "loss": 0.5882, "step": 23545 }, { "epoch": 0.7296443177593258, "grad_norm": 3.2084411991018946, "learning_rate": 1.4230969930206773e-06, "loss": 0.6417, "step": 23550 }, { "epoch": 0.7297992316272153, "grad_norm": 2.2072689306495565, "learning_rate": 1.4222816515556718e-06, "loss": 0.6836, "step": 23555 }, { "epoch": 0.7299541454951047, "grad_norm": 4.941568457459569, "learning_rate": 1.4214663100906662e-06, "loss": 0.693, "step": 23560 }, { "epoch": 0.7301090593629942, "grad_norm": 2.964636568847831, "learning_rate": 1.4206509686256607e-06, "loss": 0.697, "step": 23565 }, { "epoch": 0.7302639732308837, "grad_norm": 2.0808033001413473, "learning_rate": 1.419835627160655e-06, "loss": 0.6099, "step": 23570 }, { "epoch": 0.7304188870987731, "grad_norm": 3.314270401369273, "learning_rate": 1.4190202856956494e-06, "loss": 0.5688, "step": 23575 }, { "epoch": 0.7305738009666626, "grad_norm": 2.5777675657279926, "learning_rate": 1.4182049442306439e-06, "loss": 0.6686, "step": 23580 }, { "epoch": 0.730728714834552, "grad_norm": 2.4757742239464693, "learning_rate": 1.4173896027656383e-06, "loss": 0.6904, "step": 23585 }, { "epoch": 0.7308836287024414, "grad_norm": 2.944734077595793, "learning_rate": 1.4165742613006328e-06, "loss": 0.6969, "step": 23590 }, { "epoch": 0.7310385425703309, "grad_norm": 2.350357898585607, "learning_rate": 1.4157589198356272e-06, "loss": 0.6776, "step": 23595 }, { "epoch": 0.7311934564382203, "grad_norm": 3.1396856615835045, "learning_rate": 1.4149435783706217e-06, "loss": 0.6402, "step": 23600 }, { "epoch": 0.7313483703061098, "grad_norm": 2.818514835939829, "learning_rate": 1.4141282369056162e-06, "loss": 0.6696, "step": 23605 }, { "epoch": 0.7315032841739992, "grad_norm": 2.435850087185161, "learning_rate": 1.4133128954406106e-06, "loss": 0.6207, "step": 23610 }, { "epoch": 0.7316581980418887, "grad_norm": 2.714447148109492, "learning_rate": 1.412497553975605e-06, "loss": 0.6652, "step": 23615 }, { "epoch": 0.7318131119097782, "grad_norm": 2.7704257906267, "learning_rate": 1.4116822125105995e-06, "loss": 0.703, "step": 23620 }, { "epoch": 0.7319680257776676, "grad_norm": 2.4164596825783904, "learning_rate": 1.4108668710455938e-06, "loss": 0.6562, "step": 23625 }, { "epoch": 0.7321229396455571, "grad_norm": 2.3201932812668913, "learning_rate": 1.4100515295805882e-06, "loss": 0.7127, "step": 23630 }, { "epoch": 0.7322778535134465, "grad_norm": 4.02247280172233, "learning_rate": 1.4092361881155831e-06, "loss": 0.6074, "step": 23635 }, { "epoch": 0.732432767381336, "grad_norm": 2.840182830475063, "learning_rate": 1.4084208466505776e-06, "loss": 0.6989, "step": 23640 }, { "epoch": 0.7325876812492255, "grad_norm": 2.130219394246613, "learning_rate": 1.4076055051855718e-06, "loss": 0.6375, "step": 23645 }, { "epoch": 0.7327425951171149, "grad_norm": 2.2267449594596176, "learning_rate": 1.4067901637205663e-06, "loss": 0.7701, "step": 23650 }, { "epoch": 0.7328975089850044, "grad_norm": 2.8939996715926566, "learning_rate": 1.4059748222555608e-06, "loss": 0.6197, "step": 23655 }, { "epoch": 0.7330524228528937, "grad_norm": 3.3712258882985413, "learning_rate": 1.4051594807905552e-06, "loss": 0.6501, "step": 23660 }, { "epoch": 0.7332073367207832, "grad_norm": 2.2965854103867813, "learning_rate": 1.4043441393255497e-06, "loss": 0.7272, "step": 23665 }, { "epoch": 0.7333622505886727, "grad_norm": 2.530833927984712, "learning_rate": 1.4035287978605441e-06, "loss": 0.7158, "step": 23670 }, { "epoch": 0.7335171644565621, "grad_norm": 2.6624793909170834, "learning_rate": 1.4027134563955386e-06, "loss": 0.6454, "step": 23675 }, { "epoch": 0.7336720783244516, "grad_norm": 3.025982702595492, "learning_rate": 1.401898114930533e-06, "loss": 0.6815, "step": 23680 }, { "epoch": 0.733826992192341, "grad_norm": 3.1102918657092244, "learning_rate": 1.4010827734655275e-06, "loss": 0.6962, "step": 23685 }, { "epoch": 0.7339819060602305, "grad_norm": 3.1554614806299424, "learning_rate": 1.400267432000522e-06, "loss": 0.6959, "step": 23690 }, { "epoch": 0.73413681992812, "grad_norm": 2.499559312471564, "learning_rate": 1.3994520905355164e-06, "loss": 0.6334, "step": 23695 }, { "epoch": 0.7342917337960094, "grad_norm": 3.7085454555965645, "learning_rate": 1.3986367490705109e-06, "loss": 0.6771, "step": 23700 }, { "epoch": 0.7344466476638989, "grad_norm": 3.235832696228077, "learning_rate": 1.3978214076055051e-06, "loss": 0.6989, "step": 23705 }, { "epoch": 0.7346015615317883, "grad_norm": 2.69484503715222, "learning_rate": 1.3970060661404996e-06, "loss": 0.6311, "step": 23710 }, { "epoch": 0.7347564753996778, "grad_norm": 2.6146673908964697, "learning_rate": 1.396190724675494e-06, "loss": 0.684, "step": 23715 }, { "epoch": 0.7349113892675673, "grad_norm": 3.2126428909661304, "learning_rate": 1.3953753832104885e-06, "loss": 0.6438, "step": 23720 }, { "epoch": 0.7350663031354567, "grad_norm": 2.529629474831588, "learning_rate": 1.394560041745483e-06, "loss": 0.7134, "step": 23725 }, { "epoch": 0.7352212170033462, "grad_norm": 2.8978328029187064, "learning_rate": 1.3937447002804776e-06, "loss": 0.6969, "step": 23730 }, { "epoch": 0.7353761308712355, "grad_norm": 3.1336102824093093, "learning_rate": 1.392929358815472e-06, "loss": 0.657, "step": 23735 }, { "epoch": 0.735531044739125, "grad_norm": 2.597566861512655, "learning_rate": 1.3921140173504666e-06, "loss": 0.6606, "step": 23740 }, { "epoch": 0.7356859586070145, "grad_norm": 2.869017550689319, "learning_rate": 1.391298675885461e-06, "loss": 0.6511, "step": 23745 }, { "epoch": 0.7358408724749039, "grad_norm": 4.050781238495579, "learning_rate": 1.3904833344204555e-06, "loss": 0.6617, "step": 23750 }, { "epoch": 0.7359957863427934, "grad_norm": 3.5610829297762603, "learning_rate": 1.38966799295545e-06, "loss": 0.681, "step": 23755 }, { "epoch": 0.7361507002106829, "grad_norm": 2.5045441391401617, "learning_rate": 1.3888526514904444e-06, "loss": 0.6923, "step": 23760 }, { "epoch": 0.7363056140785723, "grad_norm": 3.135406909695778, "learning_rate": 1.3880373100254389e-06, "loss": 0.7262, "step": 23765 }, { "epoch": 0.7364605279464618, "grad_norm": 2.877373955520933, "learning_rate": 1.3872219685604333e-06, "loss": 0.7089, "step": 23770 }, { "epoch": 0.7366154418143512, "grad_norm": 2.672038301232311, "learning_rate": 1.3864066270954278e-06, "loss": 0.6126, "step": 23775 }, { "epoch": 0.7367703556822407, "grad_norm": 2.938908369606691, "learning_rate": 1.385591285630422e-06, "loss": 0.6768, "step": 23780 }, { "epoch": 0.7369252695501302, "grad_norm": 4.236446466701316, "learning_rate": 1.3847759441654165e-06, "loss": 0.6948, "step": 23785 }, { "epoch": 0.7370801834180196, "grad_norm": 3.4191263473717295, "learning_rate": 1.383960602700411e-06, "loss": 0.6983, "step": 23790 }, { "epoch": 0.7372350972859091, "grad_norm": 2.94507177639493, "learning_rate": 1.3831452612354054e-06, "loss": 0.72, "step": 23795 }, { "epoch": 0.7373900111537985, "grad_norm": 2.334371352414032, "learning_rate": 1.3823299197703999e-06, "loss": 0.6433, "step": 23800 }, { "epoch": 0.7375449250216879, "grad_norm": 3.869105224059488, "learning_rate": 1.3815145783053943e-06, "loss": 0.6071, "step": 23805 }, { "epoch": 0.7376998388895774, "grad_norm": 3.238118227591847, "learning_rate": 1.3806992368403888e-06, "loss": 0.6951, "step": 23810 }, { "epoch": 0.7378547527574668, "grad_norm": 2.4553918598296587, "learning_rate": 1.3798838953753832e-06, "loss": 0.7546, "step": 23815 }, { "epoch": 0.7380096666253563, "grad_norm": 2.570445894809217, "learning_rate": 1.379068553910378e-06, "loss": 0.5611, "step": 23820 }, { "epoch": 0.7381645804932457, "grad_norm": 2.7256598669435776, "learning_rate": 1.3782532124453724e-06, "loss": 0.6907, "step": 23825 }, { "epoch": 0.7383194943611352, "grad_norm": 2.824150530359978, "learning_rate": 1.3774378709803668e-06, "loss": 0.6806, "step": 23830 }, { "epoch": 0.7384744082290247, "grad_norm": 2.724768857498958, "learning_rate": 1.3766225295153613e-06, "loss": 0.7055, "step": 23835 }, { "epoch": 0.7386293220969141, "grad_norm": 2.499707412566405, "learning_rate": 1.3758071880503557e-06, "loss": 0.6655, "step": 23840 }, { "epoch": 0.7387842359648036, "grad_norm": 2.8478900163535354, "learning_rate": 1.3749918465853502e-06, "loss": 0.6462, "step": 23845 }, { "epoch": 0.738939149832693, "grad_norm": 3.066043119325396, "learning_rate": 1.3741765051203447e-06, "loss": 0.6936, "step": 23850 }, { "epoch": 0.7390940637005825, "grad_norm": 2.681541135862967, "learning_rate": 1.373361163655339e-06, "loss": 0.6961, "step": 23855 }, { "epoch": 0.739248977568472, "grad_norm": 3.310174924090078, "learning_rate": 1.3725458221903334e-06, "loss": 0.6164, "step": 23860 }, { "epoch": 0.7394038914363614, "grad_norm": 3.7230537835391937, "learning_rate": 1.3717304807253278e-06, "loss": 0.6554, "step": 23865 }, { "epoch": 0.7395588053042509, "grad_norm": 2.3092235263376275, "learning_rate": 1.3709151392603223e-06, "loss": 0.7262, "step": 23870 }, { "epoch": 0.7397137191721402, "grad_norm": 2.82508411175743, "learning_rate": 1.3700997977953167e-06, "loss": 0.5855, "step": 23875 }, { "epoch": 0.7398686330400297, "grad_norm": 2.4645703946833333, "learning_rate": 1.3692844563303112e-06, "loss": 0.6972, "step": 23880 }, { "epoch": 0.7400235469079192, "grad_norm": 2.6620839422663574, "learning_rate": 1.3684691148653057e-06, "loss": 0.619, "step": 23885 }, { "epoch": 0.7401784607758086, "grad_norm": 2.9129458496169685, "learning_rate": 1.3676537734003001e-06, "loss": 0.7127, "step": 23890 }, { "epoch": 0.7403333746436981, "grad_norm": 2.2982493675466844, "learning_rate": 1.3668384319352946e-06, "loss": 0.7251, "step": 23895 }, { "epoch": 0.7404882885115875, "grad_norm": 3.086765248695479, "learning_rate": 1.366023090470289e-06, "loss": 0.6595, "step": 23900 }, { "epoch": 0.740643202379477, "grad_norm": 2.466995392161572, "learning_rate": 1.3652077490052835e-06, "loss": 0.6969, "step": 23905 }, { "epoch": 0.7407981162473665, "grad_norm": 3.945003474387983, "learning_rate": 1.364392407540278e-06, "loss": 0.6457, "step": 23910 }, { "epoch": 0.7409530301152559, "grad_norm": 2.231965215515961, "learning_rate": 1.3635770660752726e-06, "loss": 0.6767, "step": 23915 }, { "epoch": 0.7411079439831454, "grad_norm": 2.0316141357035966, "learning_rate": 1.362761724610267e-06, "loss": 0.6129, "step": 23920 }, { "epoch": 0.7412628578510349, "grad_norm": 2.241276388188354, "learning_rate": 1.3619463831452615e-06, "loss": 0.5949, "step": 23925 }, { "epoch": 0.7414177717189243, "grad_norm": 3.3552402934879995, "learning_rate": 1.3611310416802558e-06, "loss": 0.679, "step": 23930 }, { "epoch": 0.7415726855868138, "grad_norm": 3.007888796884897, "learning_rate": 1.3603157002152502e-06, "loss": 0.7043, "step": 23935 }, { "epoch": 0.7417275994547032, "grad_norm": 4.297395932629347, "learning_rate": 1.3595003587502447e-06, "loss": 0.7065, "step": 23940 }, { "epoch": 0.7418825133225926, "grad_norm": 2.9407460131671406, "learning_rate": 1.3586850172852392e-06, "loss": 0.7112, "step": 23945 }, { "epoch": 0.742037427190482, "grad_norm": 2.9135446968692023, "learning_rate": 1.3578696758202336e-06, "loss": 0.6687, "step": 23950 }, { "epoch": 0.7421923410583715, "grad_norm": 2.387773653280935, "learning_rate": 1.357054334355228e-06, "loss": 0.648, "step": 23955 }, { "epoch": 0.742347254926261, "grad_norm": 2.4346610779647784, "learning_rate": 1.3562389928902225e-06, "loss": 0.6256, "step": 23960 }, { "epoch": 0.7425021687941504, "grad_norm": 3.261475138846108, "learning_rate": 1.355423651425217e-06, "loss": 0.6461, "step": 23965 }, { "epoch": 0.7426570826620399, "grad_norm": 3.03658347388897, "learning_rate": 1.3546083099602115e-06, "loss": 0.7008, "step": 23970 }, { "epoch": 0.7428119965299294, "grad_norm": 2.874022023473976, "learning_rate": 1.353792968495206e-06, "loss": 0.7185, "step": 23975 }, { "epoch": 0.7429669103978188, "grad_norm": 2.4512228906539493, "learning_rate": 1.3529776270302004e-06, "loss": 0.6346, "step": 23980 }, { "epoch": 0.7431218242657083, "grad_norm": 2.641334343542898, "learning_rate": 1.3521622855651948e-06, "loss": 0.7084, "step": 23985 }, { "epoch": 0.7432767381335977, "grad_norm": 2.640128742047724, "learning_rate": 1.351346944100189e-06, "loss": 0.6765, "step": 23990 }, { "epoch": 0.7434316520014872, "grad_norm": 2.567203313540013, "learning_rate": 1.3505316026351835e-06, "loss": 0.6997, "step": 23995 }, { "epoch": 0.7435865658693767, "grad_norm": 2.636851813166, "learning_rate": 1.349716261170178e-06, "loss": 0.6971, "step": 24000 }, { "epoch": 0.7437414797372661, "grad_norm": 2.827533735742477, "learning_rate": 1.3489009197051725e-06, "loss": 0.642, "step": 24005 }, { "epoch": 0.7438963936051556, "grad_norm": 2.4956483174677055, "learning_rate": 1.3480855782401671e-06, "loss": 0.6402, "step": 24010 }, { "epoch": 0.7440513074730449, "grad_norm": 3.146918844031982, "learning_rate": 1.3472702367751616e-06, "loss": 0.6348, "step": 24015 }, { "epoch": 0.7442062213409344, "grad_norm": 2.651185821753874, "learning_rate": 1.346454895310156e-06, "loss": 0.6226, "step": 24020 }, { "epoch": 0.7443611352088239, "grad_norm": 2.772107000069493, "learning_rate": 1.3456395538451505e-06, "loss": 0.6513, "step": 24025 }, { "epoch": 0.7445160490767133, "grad_norm": 3.4955772355333643, "learning_rate": 1.344824212380145e-06, "loss": 0.6609, "step": 24030 }, { "epoch": 0.7446709629446028, "grad_norm": 2.4835878820815047, "learning_rate": 1.3440088709151394e-06, "loss": 0.6218, "step": 24035 }, { "epoch": 0.7448258768124922, "grad_norm": 3.278463148041174, "learning_rate": 1.3431935294501339e-06, "loss": 0.6927, "step": 24040 }, { "epoch": 0.7449807906803817, "grad_norm": 2.452428229317557, "learning_rate": 1.3423781879851283e-06, "loss": 0.6291, "step": 24045 }, { "epoch": 0.7451357045482712, "grad_norm": 2.379780851966142, "learning_rate": 1.3415628465201228e-06, "loss": 0.698, "step": 24050 }, { "epoch": 0.7452906184161606, "grad_norm": 3.2271527736934704, "learning_rate": 1.3407475050551173e-06, "loss": 0.6616, "step": 24055 }, { "epoch": 0.7454455322840501, "grad_norm": 2.8120410755006833, "learning_rate": 1.3399321635901117e-06, "loss": 0.6609, "step": 24060 }, { "epoch": 0.7456004461519395, "grad_norm": 2.60383013553364, "learning_rate": 1.339116822125106e-06, "loss": 0.5941, "step": 24065 }, { "epoch": 0.745755360019829, "grad_norm": 2.7240825690984476, "learning_rate": 1.3383014806601004e-06, "loss": 0.6575, "step": 24070 }, { "epoch": 0.7459102738877185, "grad_norm": 2.7845820997895854, "learning_rate": 1.3374861391950949e-06, "loss": 0.6434, "step": 24075 }, { "epoch": 0.7460651877556079, "grad_norm": 3.24998199598859, "learning_rate": 1.3366707977300894e-06, "loss": 0.6744, "step": 24080 }, { "epoch": 0.7462201016234974, "grad_norm": 2.147854943302697, "learning_rate": 1.3358554562650838e-06, "loss": 0.6356, "step": 24085 }, { "epoch": 0.7463750154913867, "grad_norm": 2.8895090713522467, "learning_rate": 1.3350401148000783e-06, "loss": 0.6656, "step": 24090 }, { "epoch": 0.7465299293592762, "grad_norm": 2.3586393156686967, "learning_rate": 1.3342247733350727e-06, "loss": 0.7029, "step": 24095 }, { "epoch": 0.7466848432271657, "grad_norm": 2.649343848651465, "learning_rate": 1.3334094318700672e-06, "loss": 0.6634, "step": 24100 }, { "epoch": 0.7468397570950551, "grad_norm": 2.2338571358192576, "learning_rate": 1.3325940904050619e-06, "loss": 0.6025, "step": 24105 }, { "epoch": 0.7469946709629446, "grad_norm": 3.0977652082614364, "learning_rate": 1.3317787489400563e-06, "loss": 0.6837, "step": 24110 }, { "epoch": 0.747149584830834, "grad_norm": 2.3200380524761437, "learning_rate": 1.3309634074750508e-06, "loss": 0.6789, "step": 24115 }, { "epoch": 0.7473044986987235, "grad_norm": 3.8413392645941213, "learning_rate": 1.3301480660100452e-06, "loss": 0.6449, "step": 24120 }, { "epoch": 0.747459412566613, "grad_norm": 2.8958729213284293, "learning_rate": 1.3293327245450397e-06, "loss": 0.6011, "step": 24125 }, { "epoch": 0.7476143264345024, "grad_norm": 2.8491016518735366, "learning_rate": 1.3285173830800342e-06, "loss": 0.6065, "step": 24130 }, { "epoch": 0.7477692403023919, "grad_norm": 2.2969346786641798, "learning_rate": 1.3277020416150286e-06, "loss": 0.6079, "step": 24135 }, { "epoch": 0.7479241541702814, "grad_norm": 3.2439074343279235, "learning_rate": 1.3268867001500229e-06, "loss": 0.6635, "step": 24140 }, { "epoch": 0.7480790680381708, "grad_norm": 4.005910836576014, "learning_rate": 1.3260713586850173e-06, "loss": 0.6861, "step": 24145 }, { "epoch": 0.7482339819060603, "grad_norm": 2.4445760631525126, "learning_rate": 1.3252560172200118e-06, "loss": 0.776, "step": 24150 }, { "epoch": 0.7483888957739497, "grad_norm": 2.720837633122896, "learning_rate": 1.3244406757550062e-06, "loss": 0.6854, "step": 24155 }, { "epoch": 0.7485438096418391, "grad_norm": 2.392924154970151, "learning_rate": 1.3236253342900007e-06, "loss": 0.6629, "step": 24160 }, { "epoch": 0.7486987235097285, "grad_norm": 3.2438946449869617, "learning_rate": 1.3228099928249952e-06, "loss": 0.6793, "step": 24165 }, { "epoch": 0.748853637377618, "grad_norm": 1.9366283330538148, "learning_rate": 1.3219946513599896e-06, "loss": 0.5163, "step": 24170 }, { "epoch": 0.7490085512455075, "grad_norm": 2.2192377903095886, "learning_rate": 1.321179309894984e-06, "loss": 0.6424, "step": 24175 }, { "epoch": 0.7491634651133969, "grad_norm": 2.7658935939978413, "learning_rate": 1.3203639684299785e-06, "loss": 0.6409, "step": 24180 }, { "epoch": 0.7493183789812864, "grad_norm": 3.227124898660567, "learning_rate": 1.319548626964973e-06, "loss": 0.6713, "step": 24185 }, { "epoch": 0.7494732928491759, "grad_norm": 2.86332384933109, "learning_rate": 1.3187332854999675e-06, "loss": 0.7389, "step": 24190 }, { "epoch": 0.7496282067170653, "grad_norm": 2.2287802399998613, "learning_rate": 1.317917944034962e-06, "loss": 0.6881, "step": 24195 }, { "epoch": 0.7497831205849548, "grad_norm": 2.801163799408493, "learning_rate": 1.3171026025699566e-06, "loss": 0.5857, "step": 24200 }, { "epoch": 0.7499380344528442, "grad_norm": 4.756655870748995, "learning_rate": 1.316287261104951e-06, "loss": 0.6344, "step": 24205 }, { "epoch": 0.7500929483207337, "grad_norm": 2.542513061388491, "learning_rate": 1.3154719196399455e-06, "loss": 0.5963, "step": 24210 }, { "epoch": 0.7502478621886232, "grad_norm": 2.4138689121161137, "learning_rate": 1.31465657817494e-06, "loss": 0.6408, "step": 24215 }, { "epoch": 0.7504027760565126, "grad_norm": 2.2413984890581444, "learning_rate": 1.3138412367099342e-06, "loss": 0.692, "step": 24220 }, { "epoch": 0.7505576899244021, "grad_norm": 2.8732603860916903, "learning_rate": 1.3130258952449287e-06, "loss": 0.6415, "step": 24225 }, { "epoch": 0.7507126037922914, "grad_norm": 3.129878951458669, "learning_rate": 1.3122105537799231e-06, "loss": 0.6752, "step": 24230 }, { "epoch": 0.7508675176601809, "grad_norm": 2.938555845548512, "learning_rate": 1.3113952123149176e-06, "loss": 0.7078, "step": 24235 }, { "epoch": 0.7510224315280704, "grad_norm": 2.1089371524519707, "learning_rate": 1.310579870849912e-06, "loss": 0.5494, "step": 24240 }, { "epoch": 0.7511773453959598, "grad_norm": 3.137361132205042, "learning_rate": 1.3097645293849065e-06, "loss": 0.6589, "step": 24245 }, { "epoch": 0.7513322592638493, "grad_norm": 2.3614541162926352, "learning_rate": 1.308949187919901e-06, "loss": 0.6882, "step": 24250 }, { "epoch": 0.7514871731317387, "grad_norm": 2.621579065257565, "learning_rate": 1.3081338464548954e-06, "loss": 0.7319, "step": 24255 }, { "epoch": 0.7516420869996282, "grad_norm": 2.4344397359218184, "learning_rate": 1.3073185049898899e-06, "loss": 0.6762, "step": 24260 }, { "epoch": 0.7517970008675177, "grad_norm": 3.3016740892247207, "learning_rate": 1.3065031635248843e-06, "loss": 0.6844, "step": 24265 }, { "epoch": 0.7519519147354071, "grad_norm": 3.1994111189310104, "learning_rate": 1.3056878220598788e-06, "loss": 0.7432, "step": 24270 }, { "epoch": 0.7521068286032966, "grad_norm": 2.3824478755519736, "learning_rate": 1.304872480594873e-06, "loss": 0.7125, "step": 24275 }, { "epoch": 0.752261742471186, "grad_norm": 2.373339848501158, "learning_rate": 1.3040571391298675e-06, "loss": 0.6673, "step": 24280 }, { "epoch": 0.7524166563390755, "grad_norm": 2.616549774529697, "learning_rate": 1.303241797664862e-06, "loss": 0.6083, "step": 24285 }, { "epoch": 0.752571570206965, "grad_norm": 2.2540217886436817, "learning_rate": 1.3024264561998564e-06, "loss": 0.6039, "step": 24290 }, { "epoch": 0.7527264840748544, "grad_norm": 2.4183258757902237, "learning_rate": 1.301611114734851e-06, "loss": 0.6651, "step": 24295 }, { "epoch": 0.7528813979427438, "grad_norm": 2.656839567598488, "learning_rate": 1.3007957732698456e-06, "loss": 0.5986, "step": 24300 }, { "epoch": 0.7530363118106332, "grad_norm": 2.386850105338026, "learning_rate": 1.29998043180484e-06, "loss": 0.6727, "step": 24305 }, { "epoch": 0.7531912256785227, "grad_norm": 2.1416057897617806, "learning_rate": 1.2991650903398345e-06, "loss": 0.6135, "step": 24310 }, { "epoch": 0.7533461395464122, "grad_norm": 2.555086539986011, "learning_rate": 1.298349748874829e-06, "loss": 0.6722, "step": 24315 }, { "epoch": 0.7535010534143016, "grad_norm": 2.392873229500813, "learning_rate": 1.2975344074098234e-06, "loss": 0.6589, "step": 24320 }, { "epoch": 0.7536559672821911, "grad_norm": 2.6874876928888893, "learning_rate": 1.2967190659448178e-06, "loss": 0.6926, "step": 24325 }, { "epoch": 0.7538108811500805, "grad_norm": 2.744739109594639, "learning_rate": 1.2959037244798123e-06, "loss": 0.7849, "step": 24330 }, { "epoch": 0.75396579501797, "grad_norm": 2.6470286408968935, "learning_rate": 1.2950883830148068e-06, "loss": 0.6877, "step": 24335 }, { "epoch": 0.7541207088858595, "grad_norm": 3.1497920889493183, "learning_rate": 1.2942730415498012e-06, "loss": 0.7474, "step": 24340 }, { "epoch": 0.7542756227537489, "grad_norm": 3.0785212767876273, "learning_rate": 1.2934577000847957e-06, "loss": 0.7052, "step": 24345 }, { "epoch": 0.7544305366216384, "grad_norm": 2.3805921175051874, "learning_rate": 1.2926423586197901e-06, "loss": 0.7201, "step": 24350 }, { "epoch": 0.7545854504895279, "grad_norm": 2.848791648618633, "learning_rate": 1.2918270171547844e-06, "loss": 0.6707, "step": 24355 }, { "epoch": 0.7547403643574173, "grad_norm": 3.1441798320412375, "learning_rate": 1.2910116756897789e-06, "loss": 0.7143, "step": 24360 }, { "epoch": 0.7548952782253068, "grad_norm": 2.4863074379925716, "learning_rate": 1.2901963342247733e-06, "loss": 0.6722, "step": 24365 }, { "epoch": 0.7550501920931962, "grad_norm": 3.4320591691846496, "learning_rate": 1.2893809927597678e-06, "loss": 0.6404, "step": 24370 }, { "epoch": 0.7552051059610856, "grad_norm": 2.584807484249111, "learning_rate": 1.2885656512947622e-06, "loss": 0.6356, "step": 24375 }, { "epoch": 0.755360019828975, "grad_norm": 3.8589490065219736, "learning_rate": 1.2877503098297567e-06, "loss": 0.6857, "step": 24380 }, { "epoch": 0.7555149336968645, "grad_norm": 3.0043332854219846, "learning_rate": 1.2869349683647511e-06, "loss": 0.6748, "step": 24385 }, { "epoch": 0.755669847564754, "grad_norm": 3.1341720379359406, "learning_rate": 1.2861196268997458e-06, "loss": 0.6742, "step": 24390 }, { "epoch": 0.7558247614326434, "grad_norm": 2.6810241422917436, "learning_rate": 1.2853042854347403e-06, "loss": 0.6579, "step": 24395 }, { "epoch": 0.7559796753005329, "grad_norm": 2.229982124712599, "learning_rate": 1.2844889439697347e-06, "loss": 0.6788, "step": 24400 }, { "epoch": 0.7561345891684224, "grad_norm": 2.3710007534711135, "learning_rate": 1.2836736025047292e-06, "loss": 0.6338, "step": 24405 }, { "epoch": 0.7562895030363118, "grad_norm": 2.7787623174181735, "learning_rate": 1.2828582610397237e-06, "loss": 0.6997, "step": 24410 }, { "epoch": 0.7564444169042013, "grad_norm": 2.122398058904802, "learning_rate": 1.2820429195747181e-06, "loss": 0.6378, "step": 24415 }, { "epoch": 0.7565993307720907, "grad_norm": 2.8438674306525598, "learning_rate": 1.2812275781097126e-06, "loss": 0.6502, "step": 24420 }, { "epoch": 0.7567542446399802, "grad_norm": 3.9144360278666626, "learning_rate": 1.280412236644707e-06, "loss": 0.6369, "step": 24425 }, { "epoch": 0.7569091585078697, "grad_norm": 4.035299824763812, "learning_rate": 1.2795968951797013e-06, "loss": 0.7263, "step": 24430 }, { "epoch": 0.7570640723757591, "grad_norm": 2.4544921110883475, "learning_rate": 1.2787815537146957e-06, "loss": 0.6441, "step": 24435 }, { "epoch": 0.7572189862436486, "grad_norm": 2.470993597438397, "learning_rate": 1.2779662122496902e-06, "loss": 0.6109, "step": 24440 }, { "epoch": 0.7573739001115379, "grad_norm": 2.6626050150280265, "learning_rate": 1.2771508707846847e-06, "loss": 0.6437, "step": 24445 }, { "epoch": 0.7575288139794274, "grad_norm": 2.254075686348064, "learning_rate": 1.2763355293196791e-06, "loss": 0.5982, "step": 24450 }, { "epoch": 0.7576837278473169, "grad_norm": 2.957157126063553, "learning_rate": 1.2755201878546736e-06, "loss": 0.6797, "step": 24455 }, { "epoch": 0.7578386417152063, "grad_norm": 2.9433904336477994, "learning_rate": 1.274704846389668e-06, "loss": 0.739, "step": 24460 }, { "epoch": 0.7579935555830958, "grad_norm": 2.780019909485868, "learning_rate": 1.2738895049246625e-06, "loss": 0.6571, "step": 24465 }, { "epoch": 0.7581484694509852, "grad_norm": 2.443184474467377, "learning_rate": 1.273074163459657e-06, "loss": 0.6655, "step": 24470 }, { "epoch": 0.7583033833188747, "grad_norm": 2.965051295881764, "learning_rate": 1.2722588219946514e-06, "loss": 0.6807, "step": 24475 }, { "epoch": 0.7584582971867642, "grad_norm": 2.5052628404624024, "learning_rate": 1.271443480529646e-06, "loss": 0.7101, "step": 24480 }, { "epoch": 0.7586132110546536, "grad_norm": 2.4923953044464113, "learning_rate": 1.2706281390646405e-06, "loss": 0.5967, "step": 24485 }, { "epoch": 0.7587681249225431, "grad_norm": 3.0853475950732308, "learning_rate": 1.269812797599635e-06, "loss": 0.6939, "step": 24490 }, { "epoch": 0.7589230387904325, "grad_norm": 2.9373884995762487, "learning_rate": 1.2689974561346295e-06, "loss": 0.7189, "step": 24495 }, { "epoch": 0.759077952658322, "grad_norm": 2.69760543786747, "learning_rate": 1.268182114669624e-06, "loss": 0.6339, "step": 24500 }, { "epoch": 0.7592328665262115, "grad_norm": 2.6850644314680685, "learning_rate": 1.2673667732046182e-06, "loss": 0.6384, "step": 24505 }, { "epoch": 0.7593877803941009, "grad_norm": 3.4636418590307665, "learning_rate": 1.2665514317396126e-06, "loss": 0.6412, "step": 24510 }, { "epoch": 0.7595426942619903, "grad_norm": 2.977060473944135, "learning_rate": 1.265736090274607e-06, "loss": 0.6337, "step": 24515 }, { "epoch": 0.7596976081298797, "grad_norm": 5.527647957218463, "learning_rate": 1.2649207488096015e-06, "loss": 0.6463, "step": 24520 }, { "epoch": 0.7598525219977692, "grad_norm": 2.722565100372201, "learning_rate": 1.264105407344596e-06, "loss": 0.6833, "step": 24525 }, { "epoch": 0.7600074358656587, "grad_norm": 2.4668518415100653, "learning_rate": 1.2632900658795905e-06, "loss": 0.6109, "step": 24530 }, { "epoch": 0.7601623497335481, "grad_norm": 5.074236125012033, "learning_rate": 1.262474724414585e-06, "loss": 0.6328, "step": 24535 }, { "epoch": 0.7603172636014376, "grad_norm": 2.7870184466864663, "learning_rate": 1.2616593829495794e-06, "loss": 0.7349, "step": 24540 }, { "epoch": 0.760472177469327, "grad_norm": 2.1395769694342066, "learning_rate": 1.2608440414845738e-06, "loss": 0.6992, "step": 24545 }, { "epoch": 0.7606270913372165, "grad_norm": 2.7550756859680288, "learning_rate": 1.2600287000195683e-06, "loss": 0.705, "step": 24550 }, { "epoch": 0.760782005205106, "grad_norm": 4.900241448359159, "learning_rate": 1.2592133585545628e-06, "loss": 0.7411, "step": 24555 }, { "epoch": 0.7609369190729954, "grad_norm": 2.619717052913018, "learning_rate": 1.2583980170895572e-06, "loss": 0.6887, "step": 24560 }, { "epoch": 0.7610918329408849, "grad_norm": 2.6497924564076434, "learning_rate": 1.2575826756245515e-06, "loss": 0.7011, "step": 24565 }, { "epoch": 0.7612467468087744, "grad_norm": 2.391401991049395, "learning_rate": 1.256767334159546e-06, "loss": 0.6661, "step": 24570 }, { "epoch": 0.7614016606766638, "grad_norm": 2.770182297896785, "learning_rate": 1.2559519926945408e-06, "loss": 0.7007, "step": 24575 }, { "epoch": 0.7615565745445533, "grad_norm": 2.948206817799923, "learning_rate": 1.255136651229535e-06, "loss": 0.658, "step": 24580 }, { "epoch": 0.7617114884124426, "grad_norm": 2.794410962572946, "learning_rate": 1.2543213097645295e-06, "loss": 0.7076, "step": 24585 }, { "epoch": 0.7618664022803321, "grad_norm": 2.631582055551625, "learning_rate": 1.253505968299524e-06, "loss": 0.6921, "step": 24590 }, { "epoch": 0.7620213161482216, "grad_norm": 2.9346124683108408, "learning_rate": 1.2526906268345184e-06, "loss": 0.6605, "step": 24595 }, { "epoch": 0.762176230016111, "grad_norm": 3.0040293479350244, "learning_rate": 1.2518752853695129e-06, "loss": 0.6631, "step": 24600 }, { "epoch": 0.7623311438840005, "grad_norm": 2.9504747093171466, "learning_rate": 1.2510599439045073e-06, "loss": 0.6551, "step": 24605 }, { "epoch": 0.7624860577518899, "grad_norm": 3.8618100299818434, "learning_rate": 1.2502446024395018e-06, "loss": 0.7265, "step": 24610 }, { "epoch": 0.7626409716197794, "grad_norm": 3.177258511549278, "learning_rate": 1.2494292609744963e-06, "loss": 0.7283, "step": 24615 }, { "epoch": 0.7627958854876689, "grad_norm": 3.8389704966606373, "learning_rate": 1.2486139195094907e-06, "loss": 0.6502, "step": 24620 }, { "epoch": 0.7629507993555583, "grad_norm": 2.6112837579368615, "learning_rate": 1.2477985780444852e-06, "loss": 0.6439, "step": 24625 }, { "epoch": 0.7631057132234478, "grad_norm": 2.5747501897490443, "learning_rate": 1.2469832365794796e-06, "loss": 0.6849, "step": 24630 }, { "epoch": 0.7632606270913372, "grad_norm": 3.185710074486082, "learning_rate": 1.246167895114474e-06, "loss": 0.7147, "step": 24635 }, { "epoch": 0.7634155409592267, "grad_norm": 2.1348644261790217, "learning_rate": 1.2453525536494683e-06, "loss": 0.5889, "step": 24640 }, { "epoch": 0.7635704548271162, "grad_norm": 2.9007710099668294, "learning_rate": 1.244537212184463e-06, "loss": 0.7536, "step": 24645 }, { "epoch": 0.7637253686950056, "grad_norm": 2.190123966585471, "learning_rate": 1.2437218707194575e-06, "loss": 0.6836, "step": 24650 }, { "epoch": 0.7638802825628951, "grad_norm": 2.537836682071782, "learning_rate": 1.242906529254452e-06, "loss": 0.6775, "step": 24655 }, { "epoch": 0.7640351964307844, "grad_norm": 2.6158311606348343, "learning_rate": 1.2420911877894464e-06, "loss": 0.6292, "step": 24660 }, { "epoch": 0.7641901102986739, "grad_norm": 3.1968892624154774, "learning_rate": 1.2412758463244409e-06, "loss": 0.6304, "step": 24665 }, { "epoch": 0.7643450241665634, "grad_norm": 2.968693831321875, "learning_rate": 1.2404605048594353e-06, "loss": 0.6247, "step": 24670 }, { "epoch": 0.7644999380344528, "grad_norm": 3.769296133914613, "learning_rate": 1.2396451633944296e-06, "loss": 0.6651, "step": 24675 }, { "epoch": 0.7646548519023423, "grad_norm": 2.7219819312880515, "learning_rate": 1.238829821929424e-06, "loss": 0.5792, "step": 24680 }, { "epoch": 0.7648097657702317, "grad_norm": 2.837661083067102, "learning_rate": 1.2380144804644185e-06, "loss": 0.7453, "step": 24685 }, { "epoch": 0.7649646796381212, "grad_norm": 2.04982466916493, "learning_rate": 1.237199138999413e-06, "loss": 0.6524, "step": 24690 }, { "epoch": 0.7651195935060107, "grad_norm": 2.776891463089921, "learning_rate": 1.2363837975344076e-06, "loss": 0.7114, "step": 24695 }, { "epoch": 0.7652745073739001, "grad_norm": 2.020928982121137, "learning_rate": 1.235568456069402e-06, "loss": 0.6252, "step": 24700 }, { "epoch": 0.7654294212417896, "grad_norm": 2.4676468846785102, "learning_rate": 1.2347531146043965e-06, "loss": 0.646, "step": 24705 }, { "epoch": 0.765584335109679, "grad_norm": 3.1347747996853714, "learning_rate": 1.233937773139391e-06, "loss": 0.5739, "step": 24710 }, { "epoch": 0.7657392489775685, "grad_norm": 3.717426625633753, "learning_rate": 1.2331224316743852e-06, "loss": 0.721, "step": 24715 }, { "epoch": 0.765894162845458, "grad_norm": 2.5996204573564836, "learning_rate": 1.2323070902093797e-06, "loss": 0.689, "step": 24720 }, { "epoch": 0.7660490767133474, "grad_norm": 3.0350884681527597, "learning_rate": 1.2314917487443742e-06, "loss": 0.7353, "step": 24725 }, { "epoch": 0.7662039905812368, "grad_norm": 3.067504807505462, "learning_rate": 1.2306764072793686e-06, "loss": 0.7162, "step": 24730 }, { "epoch": 0.7663589044491262, "grad_norm": 2.6465186573175044, "learning_rate": 1.229861065814363e-06, "loss": 0.6198, "step": 24735 }, { "epoch": 0.7665138183170157, "grad_norm": 2.8885460146112094, "learning_rate": 1.2290457243493577e-06, "loss": 0.6687, "step": 24740 }, { "epoch": 0.7666687321849052, "grad_norm": 3.1594652036391944, "learning_rate": 1.2282303828843522e-06, "loss": 0.6128, "step": 24745 }, { "epoch": 0.7668236460527946, "grad_norm": 5.511312097925785, "learning_rate": 1.2274150414193467e-06, "loss": 0.7464, "step": 24750 }, { "epoch": 0.7669785599206841, "grad_norm": 3.517190059325142, "learning_rate": 1.226599699954341e-06, "loss": 0.6522, "step": 24755 }, { "epoch": 0.7671334737885735, "grad_norm": 2.3163117465474015, "learning_rate": 1.2257843584893354e-06, "loss": 0.6805, "step": 24760 }, { "epoch": 0.767288387656463, "grad_norm": 2.546600246156703, "learning_rate": 1.2249690170243298e-06, "loss": 0.6509, "step": 24765 }, { "epoch": 0.7674433015243525, "grad_norm": 2.940380197232815, "learning_rate": 1.2241536755593243e-06, "loss": 0.6409, "step": 24770 }, { "epoch": 0.7675982153922419, "grad_norm": 2.6875216910513853, "learning_rate": 1.2233383340943187e-06, "loss": 0.6851, "step": 24775 }, { "epoch": 0.7677531292601314, "grad_norm": 2.8507150519350204, "learning_rate": 1.2225229926293132e-06, "loss": 0.6304, "step": 24780 }, { "epoch": 0.7679080431280209, "grad_norm": 2.785978698673827, "learning_rate": 1.2217076511643077e-06, "loss": 0.6805, "step": 24785 }, { "epoch": 0.7680629569959103, "grad_norm": 4.579220199475182, "learning_rate": 1.2208923096993021e-06, "loss": 0.654, "step": 24790 }, { "epoch": 0.7682178708637998, "grad_norm": 7.324219355492813, "learning_rate": 1.2200769682342966e-06, "loss": 0.635, "step": 24795 }, { "epoch": 0.7683727847316891, "grad_norm": 2.4619641777273613, "learning_rate": 1.219261626769291e-06, "loss": 0.6772, "step": 24800 }, { "epoch": 0.7685276985995786, "grad_norm": 2.053290357052023, "learning_rate": 1.2184462853042855e-06, "loss": 0.6924, "step": 24805 }, { "epoch": 0.768682612467468, "grad_norm": 2.206220391608593, "learning_rate": 1.21763094383928e-06, "loss": 0.6421, "step": 24810 }, { "epoch": 0.7688375263353575, "grad_norm": 3.536897724014761, "learning_rate": 1.2168156023742744e-06, "loss": 0.7109, "step": 24815 }, { "epoch": 0.768992440203247, "grad_norm": 2.2066932887502686, "learning_rate": 1.2160002609092689e-06, "loss": 0.6447, "step": 24820 }, { "epoch": 0.7691473540711364, "grad_norm": 2.130603944782472, "learning_rate": 1.2151849194442633e-06, "loss": 0.6081, "step": 24825 }, { "epoch": 0.7693022679390259, "grad_norm": 2.3630162481353274, "learning_rate": 1.2143695779792578e-06, "loss": 0.6376, "step": 24830 }, { "epoch": 0.7694571818069154, "grad_norm": 3.0149910611224087, "learning_rate": 1.2135542365142523e-06, "loss": 0.6986, "step": 24835 }, { "epoch": 0.7696120956748048, "grad_norm": 3.134313853042442, "learning_rate": 1.2127388950492467e-06, "loss": 0.6669, "step": 24840 }, { "epoch": 0.7697670095426943, "grad_norm": 2.8488287301456525, "learning_rate": 1.2119235535842412e-06, "loss": 0.6486, "step": 24845 }, { "epoch": 0.7699219234105837, "grad_norm": 3.15628961806411, "learning_rate": 1.2111082121192356e-06, "loss": 0.7072, "step": 24850 }, { "epoch": 0.7700768372784732, "grad_norm": 2.533620839648205, "learning_rate": 1.21029287065423e-06, "loss": 0.719, "step": 24855 }, { "epoch": 0.7702317511463627, "grad_norm": 2.5153859373352843, "learning_rate": 1.2094775291892246e-06, "loss": 0.7019, "step": 24860 }, { "epoch": 0.7703866650142521, "grad_norm": 2.5862283111474613, "learning_rate": 1.208662187724219e-06, "loss": 0.7317, "step": 24865 }, { "epoch": 0.7705415788821415, "grad_norm": 3.0076165762420755, "learning_rate": 1.2078468462592135e-06, "loss": 0.6364, "step": 24870 }, { "epoch": 0.7706964927500309, "grad_norm": 2.8471869223706237, "learning_rate": 1.207031504794208e-06, "loss": 0.726, "step": 24875 }, { "epoch": 0.7708514066179204, "grad_norm": 2.671049302767378, "learning_rate": 1.2062161633292024e-06, "loss": 0.6943, "step": 24880 }, { "epoch": 0.7710063204858099, "grad_norm": 2.353698902721607, "learning_rate": 1.2054008218641968e-06, "loss": 0.6122, "step": 24885 }, { "epoch": 0.7711612343536993, "grad_norm": 2.7235286305279036, "learning_rate": 1.2045854803991913e-06, "loss": 0.6529, "step": 24890 }, { "epoch": 0.7713161482215888, "grad_norm": 2.518512539153936, "learning_rate": 1.2037701389341858e-06, "loss": 0.6859, "step": 24895 }, { "epoch": 0.7714710620894782, "grad_norm": 2.49497579586053, "learning_rate": 1.2029547974691802e-06, "loss": 0.6296, "step": 24900 }, { "epoch": 0.7716259759573677, "grad_norm": 2.622042166189354, "learning_rate": 1.2021394560041747e-06, "loss": 0.6404, "step": 24905 }, { "epoch": 0.7717808898252572, "grad_norm": 2.7963127195197566, "learning_rate": 1.2013241145391691e-06, "loss": 0.6706, "step": 24910 }, { "epoch": 0.7719358036931466, "grad_norm": 2.996997799283476, "learning_rate": 1.2005087730741636e-06, "loss": 0.753, "step": 24915 }, { "epoch": 0.7720907175610361, "grad_norm": 2.9043813355326127, "learning_rate": 1.199693431609158e-06, "loss": 0.6503, "step": 24920 }, { "epoch": 0.7722456314289255, "grad_norm": 3.1278566920785122, "learning_rate": 1.1988780901441523e-06, "loss": 0.5907, "step": 24925 }, { "epoch": 0.772400545296815, "grad_norm": 2.655378040357253, "learning_rate": 1.198062748679147e-06, "loss": 0.6498, "step": 24930 }, { "epoch": 0.7725554591647045, "grad_norm": 2.2014572170146, "learning_rate": 1.1972474072141414e-06, "loss": 0.7147, "step": 24935 }, { "epoch": 0.7727103730325938, "grad_norm": 2.7835147616549896, "learning_rate": 1.196432065749136e-06, "loss": 0.7015, "step": 24940 }, { "epoch": 0.7728652869004833, "grad_norm": 2.807104124517542, "learning_rate": 1.1956167242841304e-06, "loss": 0.6809, "step": 24945 }, { "epoch": 0.7730202007683727, "grad_norm": 3.116080816852237, "learning_rate": 1.1948013828191248e-06, "loss": 0.654, "step": 24950 }, { "epoch": 0.7731751146362622, "grad_norm": 2.74221644056766, "learning_rate": 1.1939860413541193e-06, "loss": 0.6669, "step": 24955 }, { "epoch": 0.7733300285041517, "grad_norm": 3.19090716510582, "learning_rate": 1.1931706998891137e-06, "loss": 0.6744, "step": 24960 }, { "epoch": 0.7734849423720411, "grad_norm": 2.5845123924413564, "learning_rate": 1.192355358424108e-06, "loss": 0.6636, "step": 24965 }, { "epoch": 0.7736398562399306, "grad_norm": 2.765076633189691, "learning_rate": 1.1915400169591024e-06, "loss": 0.6691, "step": 24970 }, { "epoch": 0.77379477010782, "grad_norm": 3.0981354204810163, "learning_rate": 1.1907246754940971e-06, "loss": 0.7298, "step": 24975 }, { "epoch": 0.7739496839757095, "grad_norm": 3.1519161029635705, "learning_rate": 1.1899093340290916e-06, "loss": 0.7312, "step": 24980 }, { "epoch": 0.774104597843599, "grad_norm": 2.4733548576466147, "learning_rate": 1.189093992564086e-06, "loss": 0.5876, "step": 24985 }, { "epoch": 0.7742595117114884, "grad_norm": 2.569804460247831, "learning_rate": 1.1882786510990805e-06, "loss": 0.642, "step": 24990 }, { "epoch": 0.7744144255793779, "grad_norm": 4.922808431737593, "learning_rate": 1.187463309634075e-06, "loss": 0.7272, "step": 24995 }, { "epoch": 0.7745693394472674, "grad_norm": 3.4042702899952144, "learning_rate": 1.1866479681690692e-06, "loss": 0.7079, "step": 25000 }, { "epoch": 0.7747242533151568, "grad_norm": 2.4460641860587704, "learning_rate": 1.1858326267040637e-06, "loss": 0.6401, "step": 25005 }, { "epoch": 0.7748791671830463, "grad_norm": 2.5110564010794576, "learning_rate": 1.1850172852390581e-06, "loss": 0.6739, "step": 25010 }, { "epoch": 0.7750340810509356, "grad_norm": 2.196383279959754, "learning_rate": 1.1842019437740526e-06, "loss": 0.679, "step": 25015 }, { "epoch": 0.7751889949188251, "grad_norm": 3.0994667652502894, "learning_rate": 1.183386602309047e-06, "loss": 0.6685, "step": 25020 }, { "epoch": 0.7753439087867146, "grad_norm": 2.468937141185303, "learning_rate": 1.1825712608440417e-06, "loss": 0.704, "step": 25025 }, { "epoch": 0.775498822654604, "grad_norm": 2.4669142306014638, "learning_rate": 1.1817559193790362e-06, "loss": 0.6659, "step": 25030 }, { "epoch": 0.7756537365224935, "grad_norm": 2.6870057875648565, "learning_rate": 1.1809405779140306e-06, "loss": 0.6628, "step": 25035 }, { "epoch": 0.7758086503903829, "grad_norm": 3.094643854558254, "learning_rate": 1.1801252364490249e-06, "loss": 0.7273, "step": 25040 }, { "epoch": 0.7759635642582724, "grad_norm": 2.342454226568964, "learning_rate": 1.1793098949840193e-06, "loss": 0.6962, "step": 25045 }, { "epoch": 0.7761184781261619, "grad_norm": 3.955393605535403, "learning_rate": 1.1784945535190138e-06, "loss": 0.7156, "step": 25050 }, { "epoch": 0.7762733919940513, "grad_norm": 2.8284815491833304, "learning_rate": 1.1776792120540082e-06, "loss": 0.6596, "step": 25055 }, { "epoch": 0.7764283058619408, "grad_norm": 3.6474054165055434, "learning_rate": 1.1768638705890027e-06, "loss": 0.6719, "step": 25060 }, { "epoch": 0.7765832197298302, "grad_norm": 2.854515883478379, "learning_rate": 1.1760485291239972e-06, "loss": 0.6709, "step": 25065 }, { "epoch": 0.7767381335977197, "grad_norm": 3.4954886450197806, "learning_rate": 1.1752331876589918e-06, "loss": 0.736, "step": 25070 }, { "epoch": 0.7768930474656092, "grad_norm": 2.216608920677337, "learning_rate": 1.1744178461939863e-06, "loss": 0.6688, "step": 25075 }, { "epoch": 0.7770479613334986, "grad_norm": 2.56128152690863, "learning_rate": 1.1736025047289805e-06, "loss": 0.6555, "step": 25080 }, { "epoch": 0.777202875201388, "grad_norm": 2.839596496258949, "learning_rate": 1.172787163263975e-06, "loss": 0.6106, "step": 25085 }, { "epoch": 0.7773577890692774, "grad_norm": 3.5607060602326768, "learning_rate": 1.1719718217989695e-06, "loss": 0.6362, "step": 25090 }, { "epoch": 0.7775127029371669, "grad_norm": 4.968082057320078, "learning_rate": 1.171156480333964e-06, "loss": 0.6264, "step": 25095 }, { "epoch": 0.7776676168050564, "grad_norm": 2.153085716578855, "learning_rate": 1.1703411388689584e-06, "loss": 0.6318, "step": 25100 }, { "epoch": 0.7778225306729458, "grad_norm": 2.878301234821489, "learning_rate": 1.1695257974039528e-06, "loss": 0.6729, "step": 25105 }, { "epoch": 0.7779774445408353, "grad_norm": 3.185349646841353, "learning_rate": 1.1687104559389473e-06, "loss": 0.6711, "step": 25110 }, { "epoch": 0.7781323584087247, "grad_norm": 2.434828778890885, "learning_rate": 1.1678951144739418e-06, "loss": 0.5801, "step": 25115 }, { "epoch": 0.7782872722766142, "grad_norm": 2.1395467765249947, "learning_rate": 1.1670797730089362e-06, "loss": 0.7352, "step": 25120 }, { "epoch": 0.7784421861445037, "grad_norm": 2.455812254273302, "learning_rate": 1.1662644315439307e-06, "loss": 0.6332, "step": 25125 }, { "epoch": 0.7785971000123931, "grad_norm": 3.2508351020791952, "learning_rate": 1.1654490900789251e-06, "loss": 0.6725, "step": 25130 }, { "epoch": 0.7787520138802826, "grad_norm": 2.819090514062204, "learning_rate": 1.1646337486139196e-06, "loss": 0.6947, "step": 25135 }, { "epoch": 0.778906927748172, "grad_norm": 5.829969127344234, "learning_rate": 1.163818407148914e-06, "loss": 0.7158, "step": 25140 }, { "epoch": 0.7790618416160615, "grad_norm": 3.747191626937666, "learning_rate": 1.1630030656839085e-06, "loss": 0.6597, "step": 25145 }, { "epoch": 0.779216755483951, "grad_norm": 2.6043542643570037, "learning_rate": 1.162187724218903e-06, "loss": 0.6403, "step": 25150 }, { "epoch": 0.7793716693518403, "grad_norm": 2.4131204372689417, "learning_rate": 1.1613723827538974e-06, "loss": 0.7076, "step": 25155 }, { "epoch": 0.7795265832197298, "grad_norm": 2.3607363892303166, "learning_rate": 1.1605570412888919e-06, "loss": 0.6008, "step": 25160 }, { "epoch": 0.7796814970876192, "grad_norm": 2.989631648660627, "learning_rate": 1.1597416998238863e-06, "loss": 0.6489, "step": 25165 }, { "epoch": 0.7798364109555087, "grad_norm": 2.9735135848275664, "learning_rate": 1.1589263583588808e-06, "loss": 0.6379, "step": 25170 }, { "epoch": 0.7799913248233982, "grad_norm": 2.634170203542536, "learning_rate": 1.1581110168938753e-06, "loss": 0.6542, "step": 25175 }, { "epoch": 0.7801462386912876, "grad_norm": 2.736594732564341, "learning_rate": 1.1572956754288697e-06, "loss": 0.6841, "step": 25180 }, { "epoch": 0.7803011525591771, "grad_norm": 4.116042859118889, "learning_rate": 1.1564803339638642e-06, "loss": 0.6824, "step": 25185 }, { "epoch": 0.7804560664270666, "grad_norm": 2.3616216394139946, "learning_rate": 1.1556649924988586e-06, "loss": 0.7168, "step": 25190 }, { "epoch": 0.780610980294956, "grad_norm": 3.2179258909537003, "learning_rate": 1.154849651033853e-06, "loss": 0.6648, "step": 25195 }, { "epoch": 0.7807658941628455, "grad_norm": 2.706759372015871, "learning_rate": 1.1540343095688476e-06, "loss": 0.674, "step": 25200 }, { "epoch": 0.7809208080307349, "grad_norm": 2.1285743529127563, "learning_rate": 1.153218968103842e-06, "loss": 0.6306, "step": 25205 }, { "epoch": 0.7810757218986244, "grad_norm": 2.932785504767128, "learning_rate": 1.1524036266388363e-06, "loss": 0.6736, "step": 25210 }, { "epoch": 0.7812306357665139, "grad_norm": 2.416327195878457, "learning_rate": 1.151588285173831e-06, "loss": 0.6326, "step": 25215 }, { "epoch": 0.7813855496344033, "grad_norm": 2.9916876088238573, "learning_rate": 1.1507729437088254e-06, "loss": 0.6728, "step": 25220 }, { "epoch": 0.7815404635022927, "grad_norm": 3.2111362337721903, "learning_rate": 1.1499576022438199e-06, "loss": 0.6388, "step": 25225 }, { "epoch": 0.7816953773701821, "grad_norm": 2.793508349839582, "learning_rate": 1.1491422607788143e-06, "loss": 0.6612, "step": 25230 }, { "epoch": 0.7818502912380716, "grad_norm": 2.82147930744665, "learning_rate": 1.1483269193138088e-06, "loss": 0.6766, "step": 25235 }, { "epoch": 0.782005205105961, "grad_norm": 3.0380095655035326, "learning_rate": 1.1475115778488032e-06, "loss": 0.5963, "step": 25240 }, { "epoch": 0.7821601189738505, "grad_norm": 3.3662879772808623, "learning_rate": 1.1466962363837977e-06, "loss": 0.6844, "step": 25245 }, { "epoch": 0.78231503284174, "grad_norm": 2.5531558541419277, "learning_rate": 1.145880894918792e-06, "loss": 0.6659, "step": 25250 }, { "epoch": 0.7824699467096294, "grad_norm": 2.585297628351479, "learning_rate": 1.1450655534537864e-06, "loss": 0.6465, "step": 25255 }, { "epoch": 0.7826248605775189, "grad_norm": 2.9447001728753803, "learning_rate": 1.144250211988781e-06, "loss": 0.6713, "step": 25260 }, { "epoch": 0.7827797744454084, "grad_norm": 2.6097651272900473, "learning_rate": 1.1434348705237755e-06, "loss": 0.6409, "step": 25265 }, { "epoch": 0.7829346883132978, "grad_norm": 2.8560145715807943, "learning_rate": 1.14261952905877e-06, "loss": 0.6193, "step": 25270 }, { "epoch": 0.7830896021811873, "grad_norm": 2.423966452470546, "learning_rate": 1.1418041875937644e-06, "loss": 0.6152, "step": 25275 }, { "epoch": 0.7832445160490767, "grad_norm": 2.38471913903088, "learning_rate": 1.140988846128759e-06, "loss": 0.6356, "step": 25280 }, { "epoch": 0.7833994299169662, "grad_norm": 2.2108269329863863, "learning_rate": 1.1401735046637534e-06, "loss": 0.7205, "step": 25285 }, { "epoch": 0.7835543437848557, "grad_norm": 2.2580885862112576, "learning_rate": 1.1393581631987476e-06, "loss": 0.6962, "step": 25290 }, { "epoch": 0.7837092576527451, "grad_norm": 2.6139513101863545, "learning_rate": 1.138542821733742e-06, "loss": 0.6241, "step": 25295 }, { "epoch": 0.7838641715206345, "grad_norm": 2.3727839436935514, "learning_rate": 1.1377274802687365e-06, "loss": 0.6438, "step": 25300 }, { "epoch": 0.7840190853885239, "grad_norm": 2.785311596563675, "learning_rate": 1.1369121388037312e-06, "loss": 0.6534, "step": 25305 }, { "epoch": 0.7841739992564134, "grad_norm": 3.503433501602721, "learning_rate": 1.1360967973387257e-06, "loss": 0.6348, "step": 25310 }, { "epoch": 0.7843289131243029, "grad_norm": 2.6671645446580166, "learning_rate": 1.1352814558737201e-06, "loss": 0.6557, "step": 25315 }, { "epoch": 0.7844838269921923, "grad_norm": 2.7022435173238306, "learning_rate": 1.1344661144087146e-06, "loss": 0.6091, "step": 25320 }, { "epoch": 0.7846387408600818, "grad_norm": 3.346631393790569, "learning_rate": 1.1336507729437088e-06, "loss": 0.7229, "step": 25325 }, { "epoch": 0.7847936547279712, "grad_norm": 3.013101298299288, "learning_rate": 1.1328354314787033e-06, "loss": 0.6168, "step": 25330 }, { "epoch": 0.7849485685958607, "grad_norm": 2.689220576772039, "learning_rate": 1.1320200900136977e-06, "loss": 0.6437, "step": 25335 }, { "epoch": 0.7851034824637502, "grad_norm": 3.061085119537716, "learning_rate": 1.1312047485486922e-06, "loss": 0.6726, "step": 25340 }, { "epoch": 0.7852583963316396, "grad_norm": 2.705573779787796, "learning_rate": 1.1303894070836867e-06, "loss": 0.6271, "step": 25345 }, { "epoch": 0.7854133101995291, "grad_norm": 2.068443910082805, "learning_rate": 1.1295740656186811e-06, "loss": 0.6548, "step": 25350 }, { "epoch": 0.7855682240674186, "grad_norm": 2.3522535081800937, "learning_rate": 1.1287587241536758e-06, "loss": 0.6123, "step": 25355 }, { "epoch": 0.785723137935308, "grad_norm": 3.084727903253739, "learning_rate": 1.1279433826886703e-06, "loss": 0.6709, "step": 25360 }, { "epoch": 0.7858780518031975, "grad_norm": 2.616289519771823, "learning_rate": 1.1271280412236645e-06, "loss": 0.6208, "step": 25365 }, { "epoch": 0.7860329656710868, "grad_norm": 3.525600478924648, "learning_rate": 1.126312699758659e-06, "loss": 0.6578, "step": 25370 }, { "epoch": 0.7861878795389763, "grad_norm": 2.6275021796631566, "learning_rate": 1.1254973582936534e-06, "loss": 0.6922, "step": 25375 }, { "epoch": 0.7863427934068657, "grad_norm": 5.235476056729769, "learning_rate": 1.1246820168286479e-06, "loss": 0.6903, "step": 25380 }, { "epoch": 0.7864977072747552, "grad_norm": 2.642140362470058, "learning_rate": 1.1238666753636423e-06, "loss": 0.662, "step": 25385 }, { "epoch": 0.7866526211426447, "grad_norm": 2.482734441480005, "learning_rate": 1.1230513338986368e-06, "loss": 0.5888, "step": 25390 }, { "epoch": 0.7868075350105341, "grad_norm": 2.2313938320645272, "learning_rate": 1.1222359924336313e-06, "loss": 0.6831, "step": 25395 }, { "epoch": 0.7869624488784236, "grad_norm": 2.6718517746254205, "learning_rate": 1.1214206509686257e-06, "loss": 0.645, "step": 25400 }, { "epoch": 0.787117362746313, "grad_norm": 3.3584708335795805, "learning_rate": 1.1206053095036202e-06, "loss": 0.6067, "step": 25405 }, { "epoch": 0.7872722766142025, "grad_norm": 2.619077085110801, "learning_rate": 1.1197899680386146e-06, "loss": 0.5738, "step": 25410 }, { "epoch": 0.787427190482092, "grad_norm": 2.7491817429760257, "learning_rate": 1.118974626573609e-06, "loss": 0.677, "step": 25415 }, { "epoch": 0.7875821043499814, "grad_norm": 3.233856779585338, "learning_rate": 1.1181592851086035e-06, "loss": 0.7403, "step": 25420 }, { "epoch": 0.7877370182178709, "grad_norm": 2.7395626729594587, "learning_rate": 1.117343943643598e-06, "loss": 0.6553, "step": 25425 }, { "epoch": 0.7878919320857604, "grad_norm": 2.382121195094325, "learning_rate": 1.1165286021785925e-06, "loss": 0.7042, "step": 25430 }, { "epoch": 0.7880468459536498, "grad_norm": 2.4424119917570213, "learning_rate": 1.115713260713587e-06, "loss": 0.6339, "step": 25435 }, { "epoch": 0.7882017598215392, "grad_norm": 3.3364252977284963, "learning_rate": 1.1148979192485814e-06, "loss": 0.6763, "step": 25440 }, { "epoch": 0.7883566736894286, "grad_norm": 3.3487291049826244, "learning_rate": 1.1140825777835758e-06, "loss": 0.6082, "step": 25445 }, { "epoch": 0.7885115875573181, "grad_norm": 2.252216633240912, "learning_rate": 1.1132672363185703e-06, "loss": 0.6293, "step": 25450 }, { "epoch": 0.7886665014252076, "grad_norm": 3.107952793730147, "learning_rate": 1.1124518948535648e-06, "loss": 0.6742, "step": 25455 }, { "epoch": 0.788821415293097, "grad_norm": 2.631686597415445, "learning_rate": 1.1116365533885592e-06, "loss": 0.6393, "step": 25460 }, { "epoch": 0.7889763291609865, "grad_norm": 2.799272606137213, "learning_rate": 1.1108212119235537e-06, "loss": 0.5812, "step": 25465 }, { "epoch": 0.7891312430288759, "grad_norm": 3.6770578916458114, "learning_rate": 1.1100058704585481e-06, "loss": 0.652, "step": 25470 }, { "epoch": 0.7892861568967654, "grad_norm": 2.743887207281788, "learning_rate": 1.1091905289935426e-06, "loss": 0.7453, "step": 25475 }, { "epoch": 0.7894410707646549, "grad_norm": 2.8302115929700644, "learning_rate": 1.108375187528537e-06, "loss": 0.6807, "step": 25480 }, { "epoch": 0.7895959846325443, "grad_norm": 3.3788852432134995, "learning_rate": 1.1075598460635315e-06, "loss": 0.725, "step": 25485 }, { "epoch": 0.7897508985004338, "grad_norm": 2.434371975679316, "learning_rate": 1.106744504598526e-06, "loss": 0.7025, "step": 25490 }, { "epoch": 0.7899058123683232, "grad_norm": 2.6834067436317195, "learning_rate": 1.1059291631335204e-06, "loss": 0.6882, "step": 25495 }, { "epoch": 0.7900607262362127, "grad_norm": 4.1593989353274585, "learning_rate": 1.105113821668515e-06, "loss": 0.7571, "step": 25500 }, { "epoch": 0.7902156401041022, "grad_norm": 3.0229021853830926, "learning_rate": 1.1042984802035094e-06, "loss": 0.6627, "step": 25505 }, { "epoch": 0.7903705539719915, "grad_norm": 3.267336776073539, "learning_rate": 1.1034831387385038e-06, "loss": 0.6035, "step": 25510 }, { "epoch": 0.790525467839881, "grad_norm": 2.1734201049064685, "learning_rate": 1.1026677972734983e-06, "loss": 0.6791, "step": 25515 }, { "epoch": 0.7906803817077704, "grad_norm": 2.474669816447947, "learning_rate": 1.1018524558084927e-06, "loss": 0.6478, "step": 25520 }, { "epoch": 0.7908352955756599, "grad_norm": 2.5778116340826416, "learning_rate": 1.1010371143434872e-06, "loss": 0.6189, "step": 25525 }, { "epoch": 0.7909902094435494, "grad_norm": 3.2043075039682507, "learning_rate": 1.1002217728784816e-06, "loss": 0.674, "step": 25530 }, { "epoch": 0.7911451233114388, "grad_norm": 2.6960560099537743, "learning_rate": 1.099406431413476e-06, "loss": 0.6268, "step": 25535 }, { "epoch": 0.7913000371793283, "grad_norm": 2.6440508688531543, "learning_rate": 1.0985910899484704e-06, "loss": 0.6708, "step": 25540 }, { "epoch": 0.7914549510472177, "grad_norm": 2.6327885719078026, "learning_rate": 1.097775748483465e-06, "loss": 0.6328, "step": 25545 }, { "epoch": 0.7916098649151072, "grad_norm": 2.639894259196701, "learning_rate": 1.0969604070184595e-06, "loss": 0.6979, "step": 25550 }, { "epoch": 0.7917647787829967, "grad_norm": 2.8584016821883442, "learning_rate": 1.096145065553454e-06, "loss": 0.7037, "step": 25555 }, { "epoch": 0.7919196926508861, "grad_norm": 2.6209816744425316, "learning_rate": 1.0953297240884484e-06, "loss": 0.6202, "step": 25560 }, { "epoch": 0.7920746065187756, "grad_norm": 2.814929208823012, "learning_rate": 1.0945143826234429e-06, "loss": 0.6958, "step": 25565 }, { "epoch": 0.792229520386665, "grad_norm": 3.303247936406212, "learning_rate": 1.0936990411584373e-06, "loss": 0.6822, "step": 25570 }, { "epoch": 0.7923844342545545, "grad_norm": 3.4662568107504956, "learning_rate": 1.0928836996934316e-06, "loss": 0.6278, "step": 25575 }, { "epoch": 0.792539348122444, "grad_norm": 4.137696342881055, "learning_rate": 1.092068358228426e-06, "loss": 0.6279, "step": 25580 }, { "epoch": 0.7926942619903333, "grad_norm": 3.0716091325133275, "learning_rate": 1.0912530167634205e-06, "loss": 0.5661, "step": 25585 }, { "epoch": 0.7928491758582228, "grad_norm": 3.5185187161047518, "learning_rate": 1.0904376752984152e-06, "loss": 0.6642, "step": 25590 }, { "epoch": 0.7930040897261122, "grad_norm": 1.932443163728751, "learning_rate": 1.0896223338334096e-06, "loss": 0.6184, "step": 25595 }, { "epoch": 0.7931590035940017, "grad_norm": 2.508762970838161, "learning_rate": 1.088806992368404e-06, "loss": 0.6626, "step": 25600 }, { "epoch": 0.7933139174618912, "grad_norm": 2.6079875757395796, "learning_rate": 1.0879916509033985e-06, "loss": 0.6465, "step": 25605 }, { "epoch": 0.7934688313297806, "grad_norm": 3.1642776388489255, "learning_rate": 1.087176309438393e-06, "loss": 0.6867, "step": 25610 }, { "epoch": 0.7936237451976701, "grad_norm": 2.6729979892817854, "learning_rate": 1.0863609679733872e-06, "loss": 0.5924, "step": 25615 }, { "epoch": 0.7937786590655596, "grad_norm": 3.1038424431525917, "learning_rate": 1.0855456265083817e-06, "loss": 0.6711, "step": 25620 }, { "epoch": 0.793933572933449, "grad_norm": 3.1479626243723424, "learning_rate": 1.0847302850433762e-06, "loss": 0.6531, "step": 25625 }, { "epoch": 0.7940884868013385, "grad_norm": 2.1918869208184546, "learning_rate": 1.0839149435783706e-06, "loss": 0.6505, "step": 25630 }, { "epoch": 0.7942434006692279, "grad_norm": 2.376986557429786, "learning_rate": 1.0830996021133653e-06, "loss": 0.6932, "step": 25635 }, { "epoch": 0.7943983145371174, "grad_norm": 3.708193543243347, "learning_rate": 1.0822842606483598e-06, "loss": 0.6116, "step": 25640 }, { "epoch": 0.7945532284050069, "grad_norm": 7.8816745495423675, "learning_rate": 1.0814689191833542e-06, "loss": 0.642, "step": 25645 }, { "epoch": 0.7947081422728963, "grad_norm": 2.5302704863816343, "learning_rate": 1.0806535777183485e-06, "loss": 0.6992, "step": 25650 }, { "epoch": 0.7948630561407857, "grad_norm": 2.045022745433592, "learning_rate": 1.079838236253343e-06, "loss": 0.6314, "step": 25655 }, { "epoch": 0.7950179700086751, "grad_norm": 2.5978034056580244, "learning_rate": 1.0790228947883374e-06, "loss": 0.658, "step": 25660 }, { "epoch": 0.7951728838765646, "grad_norm": 2.6261525548344147, "learning_rate": 1.0782075533233318e-06, "loss": 0.6792, "step": 25665 }, { "epoch": 0.7953277977444541, "grad_norm": 2.562399747818008, "learning_rate": 1.0773922118583263e-06, "loss": 0.7714, "step": 25670 }, { "epoch": 0.7954827116123435, "grad_norm": 2.7127473316015513, "learning_rate": 1.0765768703933208e-06, "loss": 0.6464, "step": 25675 }, { "epoch": 0.795637625480233, "grad_norm": 2.9557748956733794, "learning_rate": 1.0757615289283152e-06, "loss": 0.6193, "step": 25680 }, { "epoch": 0.7957925393481224, "grad_norm": 2.7945954386535847, "learning_rate": 1.0749461874633099e-06, "loss": 0.6586, "step": 25685 }, { "epoch": 0.7959474532160119, "grad_norm": 3.252583419698434, "learning_rate": 1.0741308459983041e-06, "loss": 0.7113, "step": 25690 }, { "epoch": 0.7961023670839014, "grad_norm": 2.632188024093406, "learning_rate": 1.0733155045332986e-06, "loss": 0.6296, "step": 25695 }, { "epoch": 0.7962572809517908, "grad_norm": 2.293751363567034, "learning_rate": 1.072500163068293e-06, "loss": 0.6663, "step": 25700 }, { "epoch": 0.7964121948196803, "grad_norm": 2.439398550085947, "learning_rate": 1.0716848216032875e-06, "loss": 0.6205, "step": 25705 }, { "epoch": 0.7965671086875697, "grad_norm": 2.79785825637931, "learning_rate": 1.070869480138282e-06, "loss": 0.6991, "step": 25710 }, { "epoch": 0.7967220225554592, "grad_norm": 2.767350236516239, "learning_rate": 1.0700541386732764e-06, "loss": 0.5659, "step": 25715 }, { "epoch": 0.7968769364233487, "grad_norm": 2.6771411162727974, "learning_rate": 1.0692387972082709e-06, "loss": 0.7374, "step": 25720 }, { "epoch": 0.797031850291238, "grad_norm": 2.884245310160666, "learning_rate": 1.0684234557432653e-06, "loss": 0.6335, "step": 25725 }, { "epoch": 0.7971867641591275, "grad_norm": 3.410626814773982, "learning_rate": 1.0676081142782598e-06, "loss": 0.6965, "step": 25730 }, { "epoch": 0.7973416780270169, "grad_norm": 2.679536151567673, "learning_rate": 1.0667927728132543e-06, "loss": 0.6142, "step": 25735 }, { "epoch": 0.7974965918949064, "grad_norm": 2.651629903352605, "learning_rate": 1.0659774313482487e-06, "loss": 0.6111, "step": 25740 }, { "epoch": 0.7976515057627959, "grad_norm": 3.90226062935224, "learning_rate": 1.0651620898832432e-06, "loss": 0.6494, "step": 25745 }, { "epoch": 0.7978064196306853, "grad_norm": 3.314663606375296, "learning_rate": 1.0643467484182376e-06, "loss": 0.6643, "step": 25750 }, { "epoch": 0.7979613334985748, "grad_norm": 2.452105167354767, "learning_rate": 1.063531406953232e-06, "loss": 0.5836, "step": 25755 }, { "epoch": 0.7981162473664642, "grad_norm": 2.6023326072401685, "learning_rate": 1.0627160654882266e-06, "loss": 0.6676, "step": 25760 }, { "epoch": 0.7982711612343537, "grad_norm": 2.3960381271635236, "learning_rate": 1.061900724023221e-06, "loss": 0.6444, "step": 25765 }, { "epoch": 0.7984260751022432, "grad_norm": 2.2759488957472045, "learning_rate": 1.0610853825582155e-06, "loss": 0.6671, "step": 25770 }, { "epoch": 0.7985809889701326, "grad_norm": 2.849442097270934, "learning_rate": 1.06027004109321e-06, "loss": 0.6522, "step": 25775 }, { "epoch": 0.7987359028380221, "grad_norm": 2.5077960542043303, "learning_rate": 1.0594546996282044e-06, "loss": 0.6519, "step": 25780 }, { "epoch": 0.7988908167059116, "grad_norm": 2.907983310344871, "learning_rate": 1.0586393581631989e-06, "loss": 0.6168, "step": 25785 }, { "epoch": 0.799045730573801, "grad_norm": 2.5016785488317557, "learning_rate": 1.0578240166981933e-06, "loss": 0.6114, "step": 25790 }, { "epoch": 0.7992006444416904, "grad_norm": 2.3497923561979386, "learning_rate": 1.0570086752331878e-06, "loss": 0.6304, "step": 25795 }, { "epoch": 0.7993555583095798, "grad_norm": 2.957685625120283, "learning_rate": 1.0561933337681822e-06, "loss": 0.6898, "step": 25800 }, { "epoch": 0.7995104721774693, "grad_norm": 2.9169675121170435, "learning_rate": 1.0553779923031767e-06, "loss": 0.6554, "step": 25805 }, { "epoch": 0.7996653860453588, "grad_norm": 2.400186229110103, "learning_rate": 1.0545626508381711e-06, "loss": 0.6021, "step": 25810 }, { "epoch": 0.7998202999132482, "grad_norm": 2.7931865794716315, "learning_rate": 1.0537473093731656e-06, "loss": 0.7072, "step": 25815 }, { "epoch": 0.7999752137811377, "grad_norm": 3.4027284659249952, "learning_rate": 1.05293196790816e-06, "loss": 0.6149, "step": 25820 }, { "epoch": 0.8001301276490271, "grad_norm": 3.3609016028186054, "learning_rate": 1.0521166264431545e-06, "loss": 0.68, "step": 25825 }, { "epoch": 0.8002850415169166, "grad_norm": 2.9584878141272943, "learning_rate": 1.051301284978149e-06, "loss": 0.7012, "step": 25830 }, { "epoch": 0.8004399553848061, "grad_norm": 2.732263030891977, "learning_rate": 1.0504859435131434e-06, "loss": 0.6483, "step": 25835 }, { "epoch": 0.8005948692526955, "grad_norm": 3.9801951895158854, "learning_rate": 1.049670602048138e-06, "loss": 0.6773, "step": 25840 }, { "epoch": 0.800749783120585, "grad_norm": 3.053704646223756, "learning_rate": 1.0488552605831324e-06, "loss": 0.698, "step": 25845 }, { "epoch": 0.8009046969884744, "grad_norm": 2.4908209114615465, "learning_rate": 1.0480399191181268e-06, "loss": 0.7127, "step": 25850 }, { "epoch": 0.8010596108563639, "grad_norm": 2.416258467296732, "learning_rate": 1.0472245776531213e-06, "loss": 0.5816, "step": 25855 }, { "epoch": 0.8012145247242534, "grad_norm": 2.6781017727562215, "learning_rate": 1.0464092361881155e-06, "loss": 0.728, "step": 25860 }, { "epoch": 0.8013694385921427, "grad_norm": 2.597286595850902, "learning_rate": 1.04559389472311e-06, "loss": 0.6547, "step": 25865 }, { "epoch": 0.8015243524600322, "grad_norm": 3.0868530774297764, "learning_rate": 1.0447785532581044e-06, "loss": 0.6527, "step": 25870 }, { "epoch": 0.8016792663279216, "grad_norm": 2.0513059257361346, "learning_rate": 1.0439632117930991e-06, "loss": 0.6033, "step": 25875 }, { "epoch": 0.8018341801958111, "grad_norm": 2.5029389499677004, "learning_rate": 1.0431478703280936e-06, "loss": 0.6986, "step": 25880 }, { "epoch": 0.8019890940637006, "grad_norm": 2.795329857466798, "learning_rate": 1.042332528863088e-06, "loss": 0.695, "step": 25885 }, { "epoch": 0.80214400793159, "grad_norm": 3.1216058716665716, "learning_rate": 1.0415171873980825e-06, "loss": 0.6588, "step": 25890 }, { "epoch": 0.8022989217994795, "grad_norm": 2.98238724335887, "learning_rate": 1.040701845933077e-06, "loss": 0.7017, "step": 25895 }, { "epoch": 0.8024538356673689, "grad_norm": 3.0585960283190503, "learning_rate": 1.0398865044680712e-06, "loss": 0.6824, "step": 25900 }, { "epoch": 0.8026087495352584, "grad_norm": 2.63489804580098, "learning_rate": 1.0390711630030657e-06, "loss": 0.6476, "step": 25905 }, { "epoch": 0.8027636634031479, "grad_norm": 2.6043420966228696, "learning_rate": 1.0382558215380601e-06, "loss": 0.6317, "step": 25910 }, { "epoch": 0.8029185772710373, "grad_norm": 3.491632563280826, "learning_rate": 1.0374404800730546e-06, "loss": 0.6712, "step": 25915 }, { "epoch": 0.8030734911389268, "grad_norm": 2.7214897318982416, "learning_rate": 1.0366251386080492e-06, "loss": 0.6535, "step": 25920 }, { "epoch": 0.8032284050068162, "grad_norm": 3.23740665677746, "learning_rate": 1.0358097971430437e-06, "loss": 0.6791, "step": 25925 }, { "epoch": 0.8033833188747057, "grad_norm": 3.1192496460893175, "learning_rate": 1.0349944556780382e-06, "loss": 0.6137, "step": 25930 }, { "epoch": 0.8035382327425952, "grad_norm": 2.80210578548082, "learning_rate": 1.0341791142130324e-06, "loss": 0.6973, "step": 25935 }, { "epoch": 0.8036931466104845, "grad_norm": 2.611324401394407, "learning_rate": 1.0333637727480269e-06, "loss": 0.6826, "step": 25940 }, { "epoch": 0.803848060478374, "grad_norm": 2.8127509271643385, "learning_rate": 1.0325484312830213e-06, "loss": 0.6325, "step": 25945 }, { "epoch": 0.8040029743462634, "grad_norm": 3.264575979522763, "learning_rate": 1.0317330898180158e-06, "loss": 0.6574, "step": 25950 }, { "epoch": 0.8041578882141529, "grad_norm": 2.3392629825698465, "learning_rate": 1.0309177483530103e-06, "loss": 0.7369, "step": 25955 }, { "epoch": 0.8043128020820424, "grad_norm": 2.78916737132183, "learning_rate": 1.0301024068880047e-06, "loss": 0.6957, "step": 25960 }, { "epoch": 0.8044677159499318, "grad_norm": 2.2341308900445127, "learning_rate": 1.0292870654229994e-06, "loss": 0.6308, "step": 25965 }, { "epoch": 0.8046226298178213, "grad_norm": 2.533000130615074, "learning_rate": 1.0284717239579938e-06, "loss": 0.7177, "step": 25970 }, { "epoch": 0.8047775436857108, "grad_norm": 2.504900965373983, "learning_rate": 1.027656382492988e-06, "loss": 0.6512, "step": 25975 }, { "epoch": 0.8049324575536002, "grad_norm": 2.454515168902768, "learning_rate": 1.0268410410279825e-06, "loss": 0.7036, "step": 25980 }, { "epoch": 0.8050873714214897, "grad_norm": 4.233487822602659, "learning_rate": 1.026025699562977e-06, "loss": 0.6836, "step": 25985 }, { "epoch": 0.8052422852893791, "grad_norm": 2.496257152958056, "learning_rate": 1.0252103580979715e-06, "loss": 0.7163, "step": 25990 }, { "epoch": 0.8053971991572686, "grad_norm": 2.6573514172240387, "learning_rate": 1.024395016632966e-06, "loss": 0.6524, "step": 25995 }, { "epoch": 0.8055521130251581, "grad_norm": 2.4904373700761955, "learning_rate": 1.0235796751679604e-06, "loss": 0.6102, "step": 26000 }, { "epoch": 0.8057070268930475, "grad_norm": 4.170536892294696, "learning_rate": 1.0227643337029548e-06, "loss": 0.6996, "step": 26005 }, { "epoch": 0.8058619407609369, "grad_norm": 2.673273117998475, "learning_rate": 1.0219489922379493e-06, "loss": 0.6245, "step": 26010 }, { "epoch": 0.8060168546288263, "grad_norm": 2.8770888256346696, "learning_rate": 1.0211336507729438e-06, "loss": 0.7386, "step": 26015 }, { "epoch": 0.8061717684967158, "grad_norm": 2.946772813695558, "learning_rate": 1.0203183093079382e-06, "loss": 0.7579, "step": 26020 }, { "epoch": 0.8063266823646053, "grad_norm": 4.212627769019926, "learning_rate": 1.0195029678429327e-06, "loss": 0.7515, "step": 26025 }, { "epoch": 0.8064815962324947, "grad_norm": 3.374466137721325, "learning_rate": 1.0186876263779271e-06, "loss": 0.6925, "step": 26030 }, { "epoch": 0.8066365101003842, "grad_norm": 2.9953235260057234, "learning_rate": 1.0178722849129216e-06, "loss": 0.6469, "step": 26035 }, { "epoch": 0.8067914239682736, "grad_norm": 2.767748491135608, "learning_rate": 1.017056943447916e-06, "loss": 0.672, "step": 26040 }, { "epoch": 0.8069463378361631, "grad_norm": 2.127406897627401, "learning_rate": 1.0162416019829105e-06, "loss": 0.7246, "step": 26045 }, { "epoch": 0.8071012517040526, "grad_norm": 2.659864774582185, "learning_rate": 1.015426260517905e-06, "loss": 0.6681, "step": 26050 }, { "epoch": 0.807256165571942, "grad_norm": 2.2123758179303366, "learning_rate": 1.0146109190528994e-06, "loss": 0.6732, "step": 26055 }, { "epoch": 0.8074110794398315, "grad_norm": 3.8250257287968124, "learning_rate": 1.0137955775878939e-06, "loss": 0.7317, "step": 26060 }, { "epoch": 0.8075659933077209, "grad_norm": 2.6500961701102645, "learning_rate": 1.0129802361228884e-06, "loss": 0.7215, "step": 26065 }, { "epoch": 0.8077209071756104, "grad_norm": 2.976222939206924, "learning_rate": 1.0121648946578828e-06, "loss": 0.7005, "step": 26070 }, { "epoch": 0.8078758210434999, "grad_norm": 3.2062066762745953, "learning_rate": 1.0113495531928773e-06, "loss": 0.6024, "step": 26075 }, { "epoch": 0.8080307349113892, "grad_norm": 2.782781484214325, "learning_rate": 1.0105342117278717e-06, "loss": 0.6524, "step": 26080 }, { "epoch": 0.8081856487792787, "grad_norm": 3.0041862973686984, "learning_rate": 1.0097188702628662e-06, "loss": 0.6853, "step": 26085 }, { "epoch": 0.8083405626471681, "grad_norm": 2.29530572212185, "learning_rate": 1.0089035287978606e-06, "loss": 0.6604, "step": 26090 }, { "epoch": 0.8084954765150576, "grad_norm": 2.5095289156310536, "learning_rate": 1.0080881873328551e-06, "loss": 0.6433, "step": 26095 }, { "epoch": 0.8086503903829471, "grad_norm": 3.5019837293849356, "learning_rate": 1.0072728458678496e-06, "loss": 0.6835, "step": 26100 }, { "epoch": 0.8088053042508365, "grad_norm": 2.2303411122291044, "learning_rate": 1.006457504402844e-06, "loss": 0.6299, "step": 26105 }, { "epoch": 0.808960218118726, "grad_norm": 2.160800476489701, "learning_rate": 1.0056421629378385e-06, "loss": 0.6131, "step": 26110 }, { "epoch": 0.8091151319866154, "grad_norm": 2.581737536100035, "learning_rate": 1.004826821472833e-06, "loss": 0.6322, "step": 26115 }, { "epoch": 0.8092700458545049, "grad_norm": 2.3620731164787543, "learning_rate": 1.0040114800078274e-06, "loss": 0.6845, "step": 26120 }, { "epoch": 0.8094249597223944, "grad_norm": 3.360071536409706, "learning_rate": 1.0031961385428219e-06, "loss": 0.6954, "step": 26125 }, { "epoch": 0.8095798735902838, "grad_norm": 3.060857032207938, "learning_rate": 1.0023807970778163e-06, "loss": 0.67, "step": 26130 }, { "epoch": 0.8097347874581733, "grad_norm": 6.495116610787, "learning_rate": 1.0015654556128108e-06, "loss": 0.7105, "step": 26135 }, { "epoch": 0.8098897013260627, "grad_norm": 2.4537526230607685, "learning_rate": 1.0007501141478052e-06, "loss": 0.6972, "step": 26140 }, { "epoch": 0.8100446151939522, "grad_norm": 2.5832123124907684, "learning_rate": 9.999347726827997e-07, "loss": 0.6574, "step": 26145 }, { "epoch": 0.8101995290618416, "grad_norm": 3.8526802493024497, "learning_rate": 9.99119431217794e-07, "loss": 0.6162, "step": 26150 }, { "epoch": 0.810354442929731, "grad_norm": 2.21870792242421, "learning_rate": 9.983040897527886e-07, "loss": 0.6676, "step": 26155 }, { "epoch": 0.8105093567976205, "grad_norm": 2.1636917613537427, "learning_rate": 9.97488748287783e-07, "loss": 0.6885, "step": 26160 }, { "epoch": 0.81066427066551, "grad_norm": 2.5951537992268445, "learning_rate": 9.966734068227775e-07, "loss": 0.7433, "step": 26165 }, { "epoch": 0.8108191845333994, "grad_norm": 2.3851491722472806, "learning_rate": 9.95858065357772e-07, "loss": 0.6967, "step": 26170 }, { "epoch": 0.8109740984012889, "grad_norm": 3.073769667485626, "learning_rate": 9.950427238927665e-07, "loss": 0.6155, "step": 26175 }, { "epoch": 0.8111290122691783, "grad_norm": 2.8185439492701123, "learning_rate": 9.94227382427761e-07, "loss": 0.6622, "step": 26180 }, { "epoch": 0.8112839261370678, "grad_norm": 2.77602919287359, "learning_rate": 9.934120409627552e-07, "loss": 0.6018, "step": 26185 }, { "epoch": 0.8114388400049573, "grad_norm": 2.4686235286560434, "learning_rate": 9.925966994977496e-07, "loss": 0.7156, "step": 26190 }, { "epoch": 0.8115937538728467, "grad_norm": 2.559599909267231, "learning_rate": 9.91781358032744e-07, "loss": 0.7001, "step": 26195 }, { "epoch": 0.8117486677407362, "grad_norm": 4.4364488031658365, "learning_rate": 9.909660165677385e-07, "loss": 0.6944, "step": 26200 }, { "epoch": 0.8119035816086256, "grad_norm": 2.5815266462542947, "learning_rate": 9.901506751027332e-07, "loss": 0.6207, "step": 26205 }, { "epoch": 0.8120584954765151, "grad_norm": 2.670795237970737, "learning_rate": 9.893353336377277e-07, "loss": 0.6069, "step": 26210 }, { "epoch": 0.8122134093444046, "grad_norm": 3.552122414886237, "learning_rate": 9.885199921727221e-07, "loss": 0.6177, "step": 26215 }, { "epoch": 0.812368323212294, "grad_norm": 3.269978883936804, "learning_rate": 9.877046507077166e-07, "loss": 0.6714, "step": 26220 }, { "epoch": 0.8125232370801834, "grad_norm": 3.0570911996380206, "learning_rate": 9.868893092427108e-07, "loss": 0.6936, "step": 26225 }, { "epoch": 0.8126781509480728, "grad_norm": 2.1782170337770106, "learning_rate": 9.860739677777053e-07, "loss": 0.6607, "step": 26230 }, { "epoch": 0.8128330648159623, "grad_norm": 3.072658325654333, "learning_rate": 9.852586263126998e-07, "loss": 0.7379, "step": 26235 }, { "epoch": 0.8129879786838518, "grad_norm": 2.7462596391487, "learning_rate": 9.844432848476942e-07, "loss": 0.6624, "step": 26240 }, { "epoch": 0.8131428925517412, "grad_norm": 3.2740380545587064, "learning_rate": 9.836279433826887e-07, "loss": 0.6299, "step": 26245 }, { "epoch": 0.8132978064196307, "grad_norm": 2.4122054963602197, "learning_rate": 9.828126019176833e-07, "loss": 0.5965, "step": 26250 }, { "epoch": 0.8134527202875201, "grad_norm": 2.7792298220760685, "learning_rate": 9.819972604526778e-07, "loss": 0.6017, "step": 26255 }, { "epoch": 0.8136076341554096, "grad_norm": 2.868997740951342, "learning_rate": 9.81181918987672e-07, "loss": 0.619, "step": 26260 }, { "epoch": 0.8137625480232991, "grad_norm": 3.0969637259950957, "learning_rate": 9.803665775226665e-07, "loss": 0.7006, "step": 26265 }, { "epoch": 0.8139174618911885, "grad_norm": 2.897656310207861, "learning_rate": 9.79551236057661e-07, "loss": 0.6482, "step": 26270 }, { "epoch": 0.814072375759078, "grad_norm": 2.377227957012317, "learning_rate": 9.787358945926554e-07, "loss": 0.6928, "step": 26275 }, { "epoch": 0.8142272896269674, "grad_norm": 2.305483800885338, "learning_rate": 9.779205531276499e-07, "loss": 0.699, "step": 26280 }, { "epoch": 0.8143822034948569, "grad_norm": 2.8637421371418785, "learning_rate": 9.771052116626443e-07, "loss": 0.6261, "step": 26285 }, { "epoch": 0.8145371173627464, "grad_norm": 2.8589692371519897, "learning_rate": 9.762898701976388e-07, "loss": 0.6717, "step": 26290 }, { "epoch": 0.8146920312306357, "grad_norm": 2.3988418491102608, "learning_rate": 9.754745287326335e-07, "loss": 0.7355, "step": 26295 }, { "epoch": 0.8148469450985252, "grad_norm": 2.577722896765845, "learning_rate": 9.746591872676277e-07, "loss": 0.6893, "step": 26300 }, { "epoch": 0.8150018589664146, "grad_norm": 2.9262736465121226, "learning_rate": 9.738438458026222e-07, "loss": 0.673, "step": 26305 }, { "epoch": 0.8151567728343041, "grad_norm": 3.665590269481224, "learning_rate": 9.730285043376166e-07, "loss": 0.6662, "step": 26310 }, { "epoch": 0.8153116867021936, "grad_norm": 3.7772493274456913, "learning_rate": 9.72213162872611e-07, "loss": 0.6381, "step": 26315 }, { "epoch": 0.815466600570083, "grad_norm": 2.5925406775462214, "learning_rate": 9.713978214076056e-07, "loss": 0.6022, "step": 26320 }, { "epoch": 0.8156215144379725, "grad_norm": 2.237279323538581, "learning_rate": 9.705824799426e-07, "loss": 0.6052, "step": 26325 }, { "epoch": 0.815776428305862, "grad_norm": 2.7817544312975158, "learning_rate": 9.697671384775945e-07, "loss": 0.6137, "step": 26330 }, { "epoch": 0.8159313421737514, "grad_norm": 4.35800008956254, "learning_rate": 9.68951797012589e-07, "loss": 0.7278, "step": 26335 }, { "epoch": 0.8160862560416409, "grad_norm": 3.8428059385402245, "learning_rate": 9.681364555475834e-07, "loss": 0.6268, "step": 26340 }, { "epoch": 0.8162411699095303, "grad_norm": 2.8455069091210268, "learning_rate": 9.673211140825779e-07, "loss": 0.676, "step": 26345 }, { "epoch": 0.8163960837774198, "grad_norm": 2.8238832549444837, "learning_rate": 9.665057726175723e-07, "loss": 0.6564, "step": 26350 }, { "epoch": 0.8165509976453093, "grad_norm": 4.593073918014872, "learning_rate": 9.656904311525668e-07, "loss": 0.6943, "step": 26355 }, { "epoch": 0.8167059115131987, "grad_norm": 2.4040245329932035, "learning_rate": 9.648750896875612e-07, "loss": 0.669, "step": 26360 }, { "epoch": 0.8168608253810881, "grad_norm": 3.2189126274938564, "learning_rate": 9.640597482225557e-07, "loss": 0.6462, "step": 26365 }, { "epoch": 0.8170157392489775, "grad_norm": 3.7793025878449473, "learning_rate": 9.632444067575501e-07, "loss": 0.6848, "step": 26370 }, { "epoch": 0.817170653116867, "grad_norm": 2.5349337663037614, "learning_rate": 9.624290652925446e-07, "loss": 0.6351, "step": 26375 }, { "epoch": 0.8173255669847564, "grad_norm": 2.974378818997168, "learning_rate": 9.61613723827539e-07, "loss": 0.6597, "step": 26380 }, { "epoch": 0.8174804808526459, "grad_norm": 2.3327698690777523, "learning_rate": 9.607983823625335e-07, "loss": 0.6906, "step": 26385 }, { "epoch": 0.8176353947205354, "grad_norm": 2.386523570778067, "learning_rate": 9.59983040897528e-07, "loss": 0.6516, "step": 26390 }, { "epoch": 0.8177903085884248, "grad_norm": 3.3960535094672126, "learning_rate": 9.591676994325224e-07, "loss": 0.7264, "step": 26395 }, { "epoch": 0.8179452224563143, "grad_norm": 2.6878063440977766, "learning_rate": 9.58352357967517e-07, "loss": 0.7733, "step": 26400 }, { "epoch": 0.8181001363242038, "grad_norm": 3.324698128560545, "learning_rate": 9.575370165025114e-07, "loss": 0.6286, "step": 26405 }, { "epoch": 0.8182550501920932, "grad_norm": 3.082755657662406, "learning_rate": 9.567216750375058e-07, "loss": 0.6339, "step": 26410 }, { "epoch": 0.8184099640599827, "grad_norm": 2.183970609619091, "learning_rate": 9.559063335725003e-07, "loss": 0.6252, "step": 26415 }, { "epoch": 0.8185648779278721, "grad_norm": 2.932898467297843, "learning_rate": 9.550909921074947e-07, "loss": 0.5926, "step": 26420 }, { "epoch": 0.8187197917957616, "grad_norm": 2.736379928225597, "learning_rate": 9.542756506424892e-07, "loss": 0.6732, "step": 26425 }, { "epoch": 0.8188747056636511, "grad_norm": 5.126979745983065, "learning_rate": 9.534603091774836e-07, "loss": 0.6635, "step": 26430 }, { "epoch": 0.8190296195315404, "grad_norm": 2.4978564875778053, "learning_rate": 9.52644967712478e-07, "loss": 0.681, "step": 26435 }, { "epoch": 0.8191845333994299, "grad_norm": 2.3959238799375977, "learning_rate": 9.518296262474726e-07, "loss": 0.5735, "step": 26440 }, { "epoch": 0.8193394472673193, "grad_norm": 4.756847910905989, "learning_rate": 9.51014284782467e-07, "loss": 0.664, "step": 26445 }, { "epoch": 0.8194943611352088, "grad_norm": 3.5559656859147406, "learning_rate": 9.501989433174615e-07, "loss": 0.6008, "step": 26450 }, { "epoch": 0.8196492750030983, "grad_norm": 2.612039200562355, "learning_rate": 9.49383601852456e-07, "loss": 0.6132, "step": 26455 }, { "epoch": 0.8198041888709877, "grad_norm": 2.5360410949639434, "learning_rate": 9.485682603874503e-07, "loss": 0.5738, "step": 26460 }, { "epoch": 0.8199591027388772, "grad_norm": 2.6104505825970503, "learning_rate": 9.477529189224448e-07, "loss": 0.6379, "step": 26465 }, { "epoch": 0.8201140166067666, "grad_norm": 2.263135424521146, "learning_rate": 9.469375774574392e-07, "loss": 0.6073, "step": 26470 }, { "epoch": 0.8202689304746561, "grad_norm": 2.23217371940376, "learning_rate": 9.461222359924337e-07, "loss": 0.6411, "step": 26475 }, { "epoch": 0.8204238443425456, "grad_norm": 2.3611251409366227, "learning_rate": 9.453068945274281e-07, "loss": 0.7038, "step": 26480 }, { "epoch": 0.820578758210435, "grad_norm": 2.390576471792431, "learning_rate": 9.444915530624227e-07, "loss": 0.6182, "step": 26485 }, { "epoch": 0.8207336720783245, "grad_norm": 4.068866808362362, "learning_rate": 9.436762115974172e-07, "loss": 0.629, "step": 26490 }, { "epoch": 0.8208885859462139, "grad_norm": 3.5034852140665187, "learning_rate": 9.428608701324115e-07, "loss": 0.7414, "step": 26495 }, { "epoch": 0.8210434998141034, "grad_norm": 2.6431784190399643, "learning_rate": 9.42045528667406e-07, "loss": 0.6697, "step": 26500 }, { "epoch": 0.8211984136819929, "grad_norm": 2.7040678008403596, "learning_rate": 9.412301872024004e-07, "loss": 0.6559, "step": 26505 }, { "epoch": 0.8213533275498822, "grad_norm": 2.8162137337178907, "learning_rate": 9.404148457373949e-07, "loss": 0.6484, "step": 26510 }, { "epoch": 0.8215082414177717, "grad_norm": 3.187021850999425, "learning_rate": 9.395995042723894e-07, "loss": 0.699, "step": 26515 }, { "epoch": 0.8216631552856611, "grad_norm": 2.696880232206354, "learning_rate": 9.387841628073838e-07, "loss": 0.6435, "step": 26520 }, { "epoch": 0.8218180691535506, "grad_norm": 2.653283633890602, "learning_rate": 9.379688213423782e-07, "loss": 0.6897, "step": 26525 }, { "epoch": 0.8219729830214401, "grad_norm": 2.314559720241342, "learning_rate": 9.371534798773726e-07, "loss": 0.726, "step": 26530 }, { "epoch": 0.8221278968893295, "grad_norm": 2.4573083228318966, "learning_rate": 9.363381384123672e-07, "loss": 0.6739, "step": 26535 }, { "epoch": 0.822282810757219, "grad_norm": 2.913648397069744, "learning_rate": 9.355227969473617e-07, "loss": 0.635, "step": 26540 }, { "epoch": 0.8224377246251084, "grad_norm": 2.0672441051160257, "learning_rate": 9.347074554823561e-07, "loss": 0.619, "step": 26545 }, { "epoch": 0.8225926384929979, "grad_norm": 2.360156270636243, "learning_rate": 9.338921140173506e-07, "loss": 0.6124, "step": 26550 }, { "epoch": 0.8227475523608874, "grad_norm": 2.715031429263799, "learning_rate": 9.33076772552345e-07, "loss": 0.6281, "step": 26555 }, { "epoch": 0.8229024662287768, "grad_norm": 2.518371365657636, "learning_rate": 9.322614310873395e-07, "loss": 0.6038, "step": 26560 }, { "epoch": 0.8230573800966663, "grad_norm": 2.796151470216414, "learning_rate": 9.314460896223338e-07, "loss": 0.6593, "step": 26565 }, { "epoch": 0.8232122939645558, "grad_norm": 2.349103587533317, "learning_rate": 9.306307481573283e-07, "loss": 0.6264, "step": 26570 }, { "epoch": 0.8233672078324452, "grad_norm": 2.441558423033992, "learning_rate": 9.298154066923228e-07, "loss": 0.5906, "step": 26575 }, { "epoch": 0.8235221217003346, "grad_norm": 2.714080539525568, "learning_rate": 9.290000652273173e-07, "loss": 0.65, "step": 26580 }, { "epoch": 0.823677035568224, "grad_norm": 2.4542196877879605, "learning_rate": 9.281847237623118e-07, "loss": 0.6743, "step": 26585 }, { "epoch": 0.8238319494361135, "grad_norm": 2.664425545672804, "learning_rate": 9.273693822973062e-07, "loss": 0.6403, "step": 26590 }, { "epoch": 0.823986863304003, "grad_norm": 3.9722244253770445, "learning_rate": 9.265540408323007e-07, "loss": 0.7446, "step": 26595 }, { "epoch": 0.8241417771718924, "grad_norm": 2.5284891817842823, "learning_rate": 9.257386993672951e-07, "loss": 0.7367, "step": 26600 }, { "epoch": 0.8242966910397819, "grad_norm": 2.9492228482349265, "learning_rate": 9.249233579022895e-07, "loss": 0.7016, "step": 26605 }, { "epoch": 0.8244516049076713, "grad_norm": 2.8375603065337156, "learning_rate": 9.24108016437284e-07, "loss": 0.6431, "step": 26610 }, { "epoch": 0.8246065187755608, "grad_norm": 3.2232753411560244, "learning_rate": 9.232926749722784e-07, "loss": 0.6658, "step": 26615 }, { "epoch": 0.8247614326434503, "grad_norm": 3.416628717764639, "learning_rate": 9.224773335072729e-07, "loss": 0.6305, "step": 26620 }, { "epoch": 0.8249163465113397, "grad_norm": 2.720457776094197, "learning_rate": 9.216619920422675e-07, "loss": 0.5994, "step": 26625 }, { "epoch": 0.8250712603792292, "grad_norm": 2.635432435891455, "learning_rate": 9.208466505772619e-07, "loss": 0.6423, "step": 26630 }, { "epoch": 0.8252261742471186, "grad_norm": 2.693378171198696, "learning_rate": 9.200313091122564e-07, "loss": 0.7352, "step": 26635 }, { "epoch": 0.8253810881150081, "grad_norm": 2.491566304989675, "learning_rate": 9.192159676472507e-07, "loss": 0.6807, "step": 26640 }, { "epoch": 0.8255360019828976, "grad_norm": 2.9128865483176445, "learning_rate": 9.184006261822452e-07, "loss": 0.6363, "step": 26645 }, { "epoch": 0.8256909158507869, "grad_norm": 2.664686301857978, "learning_rate": 9.175852847172396e-07, "loss": 0.6801, "step": 26650 }, { "epoch": 0.8258458297186764, "grad_norm": 2.249425029757507, "learning_rate": 9.167699432522341e-07, "loss": 0.6057, "step": 26655 }, { "epoch": 0.8260007435865658, "grad_norm": 2.301695113902853, "learning_rate": 9.159546017872286e-07, "loss": 0.645, "step": 26660 }, { "epoch": 0.8261556574544553, "grad_norm": 2.0747877680944544, "learning_rate": 9.15139260322223e-07, "loss": 0.629, "step": 26665 }, { "epoch": 0.8263105713223448, "grad_norm": 2.956411245742692, "learning_rate": 9.143239188572174e-07, "loss": 0.6658, "step": 26670 }, { "epoch": 0.8264654851902342, "grad_norm": 2.8249590863627363, "learning_rate": 9.135085773922119e-07, "loss": 0.6477, "step": 26675 }, { "epoch": 0.8266203990581237, "grad_norm": 2.4782249041071207, "learning_rate": 9.126932359272064e-07, "loss": 0.6225, "step": 26680 }, { "epoch": 0.8267753129260131, "grad_norm": 2.628590798579799, "learning_rate": 9.118778944622009e-07, "loss": 0.6451, "step": 26685 }, { "epoch": 0.8269302267939026, "grad_norm": 2.4232352902748056, "learning_rate": 9.110625529971953e-07, "loss": 0.7118, "step": 26690 }, { "epoch": 0.8270851406617921, "grad_norm": 3.2635472455314827, "learning_rate": 9.102472115321898e-07, "loss": 0.7098, "step": 26695 }, { "epoch": 0.8272400545296815, "grad_norm": 2.71168017614939, "learning_rate": 9.094318700671842e-07, "loss": 0.6452, "step": 26700 }, { "epoch": 0.827394968397571, "grad_norm": 2.3545457291742147, "learning_rate": 9.086165286021786e-07, "loss": 0.7174, "step": 26705 }, { "epoch": 0.8275498822654604, "grad_norm": 3.1229761593878598, "learning_rate": 9.07801187137173e-07, "loss": 0.7101, "step": 26710 }, { "epoch": 0.8277047961333499, "grad_norm": 2.8691712554443023, "learning_rate": 9.069858456721675e-07, "loss": 0.6319, "step": 26715 }, { "epoch": 0.8278597100012393, "grad_norm": 3.253072441519378, "learning_rate": 9.061705042071621e-07, "loss": 0.6999, "step": 26720 }, { "epoch": 0.8280146238691287, "grad_norm": 2.3481474194685146, "learning_rate": 9.053551627421565e-07, "loss": 0.661, "step": 26725 }, { "epoch": 0.8281695377370182, "grad_norm": 2.605795283890029, "learning_rate": 9.04539821277151e-07, "loss": 0.7023, "step": 26730 }, { "epoch": 0.8283244516049076, "grad_norm": 2.57154177306733, "learning_rate": 9.037244798121455e-07, "loss": 0.6399, "step": 26735 }, { "epoch": 0.8284793654727971, "grad_norm": 2.324742765238053, "learning_rate": 9.029091383471399e-07, "loss": 0.5797, "step": 26740 }, { "epoch": 0.8286342793406866, "grad_norm": 2.7938683169901437, "learning_rate": 9.020937968821343e-07, "loss": 0.6048, "step": 26745 }, { "epoch": 0.828789193208576, "grad_norm": 2.9922054074825843, "learning_rate": 9.012784554171287e-07, "loss": 0.6908, "step": 26750 }, { "epoch": 0.8289441070764655, "grad_norm": 3.2214424634830507, "learning_rate": 9.004631139521232e-07, "loss": 0.6543, "step": 26755 }, { "epoch": 0.829099020944355, "grad_norm": 2.932013387697456, "learning_rate": 8.996477724871176e-07, "loss": 0.7169, "step": 26760 }, { "epoch": 0.8292539348122444, "grad_norm": 3.263989019385275, "learning_rate": 8.988324310221121e-07, "loss": 0.6999, "step": 26765 }, { "epoch": 0.8294088486801339, "grad_norm": 2.779929090214024, "learning_rate": 8.980170895571067e-07, "loss": 0.6951, "step": 26770 }, { "epoch": 0.8295637625480233, "grad_norm": 2.3630272949566593, "learning_rate": 8.972017480921011e-07, "loss": 0.7342, "step": 26775 }, { "epoch": 0.8297186764159128, "grad_norm": 3.391766401909657, "learning_rate": 8.963864066270955e-07, "loss": 0.6748, "step": 26780 }, { "epoch": 0.8298735902838023, "grad_norm": 2.5529114000071793, "learning_rate": 8.955710651620899e-07, "loss": 0.5833, "step": 26785 }, { "epoch": 0.8300285041516916, "grad_norm": 2.42894349059675, "learning_rate": 8.947557236970844e-07, "loss": 0.5274, "step": 26790 }, { "epoch": 0.8301834180195811, "grad_norm": 2.869437247356613, "learning_rate": 8.939403822320789e-07, "loss": 0.6834, "step": 26795 }, { "epoch": 0.8303383318874705, "grad_norm": 2.924587962855452, "learning_rate": 8.931250407670733e-07, "loss": 0.6049, "step": 26800 }, { "epoch": 0.83049324575536, "grad_norm": 2.8855247967709188, "learning_rate": 8.923096993020678e-07, "loss": 0.721, "step": 26805 }, { "epoch": 0.8306481596232494, "grad_norm": 2.82002610971679, "learning_rate": 8.914943578370621e-07, "loss": 0.5354, "step": 26810 }, { "epoch": 0.8308030734911389, "grad_norm": 2.808845805769195, "learning_rate": 8.906790163720568e-07, "loss": 0.6994, "step": 26815 }, { "epoch": 0.8309579873590284, "grad_norm": 3.4564435759881182, "learning_rate": 8.898636749070511e-07, "loss": 0.716, "step": 26820 }, { "epoch": 0.8311129012269178, "grad_norm": 2.486622958048941, "learning_rate": 8.890483334420456e-07, "loss": 0.6686, "step": 26825 }, { "epoch": 0.8312678150948073, "grad_norm": 2.388801278745262, "learning_rate": 8.882329919770401e-07, "loss": 0.7097, "step": 26830 }, { "epoch": 0.8314227289626968, "grad_norm": 3.23773257587824, "learning_rate": 8.874176505120345e-07, "loss": 0.7038, "step": 26835 }, { "epoch": 0.8315776428305862, "grad_norm": 2.529984883807622, "learning_rate": 8.86602309047029e-07, "loss": 0.6334, "step": 26840 }, { "epoch": 0.8317325566984757, "grad_norm": 3.194115311313005, "learning_rate": 8.857869675820234e-07, "loss": 0.7254, "step": 26845 }, { "epoch": 0.8318874705663651, "grad_norm": 2.5569489632453326, "learning_rate": 8.849716261170178e-07, "loss": 0.6785, "step": 26850 }, { "epoch": 0.8320423844342546, "grad_norm": 2.0777617804652597, "learning_rate": 8.841562846520123e-07, "loss": 0.6821, "step": 26855 }, { "epoch": 0.8321972983021441, "grad_norm": 2.148919097044753, "learning_rate": 8.833409431870067e-07, "loss": 0.7351, "step": 26860 }, { "epoch": 0.8323522121700334, "grad_norm": 2.450832617056621, "learning_rate": 8.825256017220013e-07, "loss": 0.6767, "step": 26865 }, { "epoch": 0.8325071260379229, "grad_norm": 3.1886813377822416, "learning_rate": 8.817102602569957e-07, "loss": 0.6424, "step": 26870 }, { "epoch": 0.8326620399058123, "grad_norm": 3.335628842384296, "learning_rate": 8.808949187919902e-07, "loss": 0.6439, "step": 26875 }, { "epoch": 0.8328169537737018, "grad_norm": 3.1536669811135627, "learning_rate": 8.800795773269847e-07, "loss": 0.6983, "step": 26880 }, { "epoch": 0.8329718676415913, "grad_norm": 2.4441720130384725, "learning_rate": 8.792642358619791e-07, "loss": 0.6217, "step": 26885 }, { "epoch": 0.8331267815094807, "grad_norm": 2.2942873612844914, "learning_rate": 8.784488943969735e-07, "loss": 0.6314, "step": 26890 }, { "epoch": 0.8332816953773702, "grad_norm": 2.699619887742222, "learning_rate": 8.776335529319679e-07, "loss": 0.6852, "step": 26895 }, { "epoch": 0.8334366092452596, "grad_norm": 2.5806637897275846, "learning_rate": 8.768182114669624e-07, "loss": 0.6983, "step": 26900 }, { "epoch": 0.8335915231131491, "grad_norm": 2.2486030109160877, "learning_rate": 8.760028700019568e-07, "loss": 0.6561, "step": 26905 }, { "epoch": 0.8337464369810386, "grad_norm": 2.6608745462589973, "learning_rate": 8.751875285369514e-07, "loss": 0.7105, "step": 26910 }, { "epoch": 0.833901350848928, "grad_norm": 2.7548422207120153, "learning_rate": 8.743721870719459e-07, "loss": 0.6973, "step": 26915 }, { "epoch": 0.8340562647168175, "grad_norm": 2.658274033859709, "learning_rate": 8.735568456069403e-07, "loss": 0.6695, "step": 26920 }, { "epoch": 0.834211178584707, "grad_norm": 3.657657192390016, "learning_rate": 8.727415041419347e-07, "loss": 0.7153, "step": 26925 }, { "epoch": 0.8343660924525964, "grad_norm": 2.631159211942071, "learning_rate": 8.719261626769291e-07, "loss": 0.6649, "step": 26930 }, { "epoch": 0.8345210063204858, "grad_norm": 2.8437523042291573, "learning_rate": 8.711108212119236e-07, "loss": 0.6813, "step": 26935 }, { "epoch": 0.8346759201883752, "grad_norm": 2.812512586481204, "learning_rate": 8.702954797469181e-07, "loss": 0.7107, "step": 26940 }, { "epoch": 0.8348308340562647, "grad_norm": 2.2180099236420987, "learning_rate": 8.694801382819125e-07, "loss": 0.6493, "step": 26945 }, { "epoch": 0.8349857479241541, "grad_norm": 2.5925848768312147, "learning_rate": 8.68664796816907e-07, "loss": 0.638, "step": 26950 }, { "epoch": 0.8351406617920436, "grad_norm": 2.3872095852761244, "learning_rate": 8.678494553519015e-07, "loss": 0.7065, "step": 26955 }, { "epoch": 0.8352955756599331, "grad_norm": 2.5219395154713617, "learning_rate": 8.67034113886896e-07, "loss": 0.6086, "step": 26960 }, { "epoch": 0.8354504895278225, "grad_norm": 3.2969230978842883, "learning_rate": 8.662187724218904e-07, "loss": 0.6629, "step": 26965 }, { "epoch": 0.835605403395712, "grad_norm": 2.4423572711933055, "learning_rate": 8.654034309568848e-07, "loss": 0.6797, "step": 26970 }, { "epoch": 0.8357603172636014, "grad_norm": 2.656255636874233, "learning_rate": 8.645880894918793e-07, "loss": 0.6003, "step": 26975 }, { "epoch": 0.8359152311314909, "grad_norm": 2.432294863021519, "learning_rate": 8.637727480268737e-07, "loss": 0.5905, "step": 26980 }, { "epoch": 0.8360701449993804, "grad_norm": 4.14419448775876, "learning_rate": 8.629574065618682e-07, "loss": 0.6777, "step": 26985 }, { "epoch": 0.8362250588672698, "grad_norm": 2.6414404089337933, "learning_rate": 8.621420650968627e-07, "loss": 0.6727, "step": 26990 }, { "epoch": 0.8363799727351593, "grad_norm": 2.6251047749558385, "learning_rate": 8.61326723631857e-07, "loss": 0.6448, "step": 26995 }, { "epoch": 0.8365348866030488, "grad_norm": 2.47422732329888, "learning_rate": 8.605113821668515e-07, "loss": 0.6631, "step": 27000 }, { "epoch": 0.8366898004709381, "grad_norm": 2.650295828042756, "learning_rate": 8.59696040701846e-07, "loss": 0.6903, "step": 27005 }, { "epoch": 0.8368447143388276, "grad_norm": 2.715633357676813, "learning_rate": 8.588806992368405e-07, "loss": 0.6662, "step": 27010 }, { "epoch": 0.836999628206717, "grad_norm": 2.339809332082143, "learning_rate": 8.58065357771835e-07, "loss": 0.6022, "step": 27015 }, { "epoch": 0.8371545420746065, "grad_norm": 3.346090536490413, "learning_rate": 8.572500163068294e-07, "loss": 0.6502, "step": 27020 }, { "epoch": 0.837309455942496, "grad_norm": 3.074754782643699, "learning_rate": 8.564346748418239e-07, "loss": 0.6555, "step": 27025 }, { "epoch": 0.8374643698103854, "grad_norm": 3.335728124689347, "learning_rate": 8.556193333768182e-07, "loss": 0.623, "step": 27030 }, { "epoch": 0.8376192836782749, "grad_norm": 2.475488607440758, "learning_rate": 8.548039919118127e-07, "loss": 0.682, "step": 27035 }, { "epoch": 0.8377741975461643, "grad_norm": 3.2269004107140855, "learning_rate": 8.539886504468071e-07, "loss": 0.6506, "step": 27040 }, { "epoch": 0.8379291114140538, "grad_norm": 2.5771727967347293, "learning_rate": 8.531733089818016e-07, "loss": 0.6448, "step": 27045 }, { "epoch": 0.8380840252819433, "grad_norm": 2.984618347119394, "learning_rate": 8.523579675167962e-07, "loss": 0.6473, "step": 27050 }, { "epoch": 0.8382389391498327, "grad_norm": 2.8308136706203704, "learning_rate": 8.515426260517906e-07, "loss": 0.7647, "step": 27055 }, { "epoch": 0.8383938530177222, "grad_norm": 3.0063886755446383, "learning_rate": 8.507272845867851e-07, "loss": 0.6821, "step": 27060 }, { "epoch": 0.8385487668856116, "grad_norm": 2.5329483357976605, "learning_rate": 8.499119431217795e-07, "loss": 0.6471, "step": 27065 }, { "epoch": 0.8387036807535011, "grad_norm": 3.160506601890016, "learning_rate": 8.490966016567739e-07, "loss": 0.7218, "step": 27070 }, { "epoch": 0.8388585946213905, "grad_norm": 3.0711539945785273, "learning_rate": 8.482812601917684e-07, "loss": 0.7184, "step": 27075 }, { "epoch": 0.8390135084892799, "grad_norm": 2.694884615534064, "learning_rate": 8.474659187267628e-07, "loss": 0.6875, "step": 27080 }, { "epoch": 0.8391684223571694, "grad_norm": 2.5781152968375767, "learning_rate": 8.466505772617573e-07, "loss": 0.7016, "step": 27085 }, { "epoch": 0.8393233362250588, "grad_norm": 2.5425747238169367, "learning_rate": 8.458352357967517e-07, "loss": 0.7536, "step": 27090 }, { "epoch": 0.8394782500929483, "grad_norm": 3.258083565791127, "learning_rate": 8.450198943317462e-07, "loss": 0.7047, "step": 27095 }, { "epoch": 0.8396331639608378, "grad_norm": 3.5009773251894747, "learning_rate": 8.442045528667408e-07, "loss": 0.6336, "step": 27100 }, { "epoch": 0.8397880778287272, "grad_norm": 2.6708798520152888, "learning_rate": 8.433892114017351e-07, "loss": 0.7435, "step": 27105 }, { "epoch": 0.8399429916966167, "grad_norm": 2.5295521823546987, "learning_rate": 8.425738699367296e-07, "loss": 0.6177, "step": 27110 }, { "epoch": 0.8400979055645061, "grad_norm": 2.9674450577273235, "learning_rate": 8.41758528471724e-07, "loss": 0.6714, "step": 27115 }, { "epoch": 0.8402528194323956, "grad_norm": 2.422696073558406, "learning_rate": 8.409431870067185e-07, "loss": 0.6754, "step": 27120 }, { "epoch": 0.8404077333002851, "grad_norm": 2.2123454534431843, "learning_rate": 8.401278455417129e-07, "loss": 0.6536, "step": 27125 }, { "epoch": 0.8405626471681745, "grad_norm": 2.719791261834908, "learning_rate": 8.393125040767074e-07, "loss": 0.6924, "step": 27130 }, { "epoch": 0.840717561036064, "grad_norm": 2.324613103994927, "learning_rate": 8.384971626117018e-07, "loss": 0.7115, "step": 27135 }, { "epoch": 0.8408724749039534, "grad_norm": 2.584796748382112, "learning_rate": 8.376818211466962e-07, "loss": 0.693, "step": 27140 }, { "epoch": 0.8410273887718429, "grad_norm": 2.783226456772049, "learning_rate": 8.368664796816908e-07, "loss": 0.6348, "step": 27145 }, { "epoch": 0.8411823026397323, "grad_norm": 2.69982736443665, "learning_rate": 8.360511382166852e-07, "loss": 0.6173, "step": 27150 }, { "epoch": 0.8413372165076217, "grad_norm": 3.007443004914251, "learning_rate": 8.352357967516797e-07, "loss": 0.6582, "step": 27155 }, { "epoch": 0.8414921303755112, "grad_norm": 3.199449580869729, "learning_rate": 8.344204552866742e-07, "loss": 0.723, "step": 27160 }, { "epoch": 0.8416470442434006, "grad_norm": 2.779405940230231, "learning_rate": 8.336051138216686e-07, "loss": 0.7161, "step": 27165 }, { "epoch": 0.8418019581112901, "grad_norm": 2.8919765391634074, "learning_rate": 8.327897723566631e-07, "loss": 0.6413, "step": 27170 }, { "epoch": 0.8419568719791796, "grad_norm": 2.8280177778553153, "learning_rate": 8.319744308916574e-07, "loss": 0.688, "step": 27175 }, { "epoch": 0.842111785847069, "grad_norm": 3.1333547548498326, "learning_rate": 8.311590894266519e-07, "loss": 0.605, "step": 27180 }, { "epoch": 0.8422666997149585, "grad_norm": 2.844835023494326, "learning_rate": 8.303437479616463e-07, "loss": 0.683, "step": 27185 }, { "epoch": 0.842421613582848, "grad_norm": 2.0296497179487574, "learning_rate": 8.295284064966408e-07, "loss": 0.6066, "step": 27190 }, { "epoch": 0.8425765274507374, "grad_norm": 2.8768263762695647, "learning_rate": 8.287130650316354e-07, "loss": 0.6828, "step": 27195 }, { "epoch": 0.8427314413186269, "grad_norm": 3.314447535184254, "learning_rate": 8.278977235666298e-07, "loss": 0.6548, "step": 27200 }, { "epoch": 0.8428863551865163, "grad_norm": 2.7560219680845606, "learning_rate": 8.270823821016243e-07, "loss": 0.7032, "step": 27205 }, { "epoch": 0.8430412690544058, "grad_norm": 2.6822186191147592, "learning_rate": 8.262670406366186e-07, "loss": 0.6021, "step": 27210 }, { "epoch": 0.8431961829222953, "grad_norm": 2.644823446166381, "learning_rate": 8.254516991716131e-07, "loss": 0.6837, "step": 27215 }, { "epoch": 0.8433510967901846, "grad_norm": 2.552587666245917, "learning_rate": 8.246363577066076e-07, "loss": 0.6582, "step": 27220 }, { "epoch": 0.8435060106580741, "grad_norm": 2.903949210351587, "learning_rate": 8.23821016241602e-07, "loss": 0.6885, "step": 27225 }, { "epoch": 0.8436609245259635, "grad_norm": 2.566346140908453, "learning_rate": 8.230056747765965e-07, "loss": 0.7148, "step": 27230 }, { "epoch": 0.843815838393853, "grad_norm": 2.5535343877190457, "learning_rate": 8.221903333115909e-07, "loss": 0.6601, "step": 27235 }, { "epoch": 0.8439707522617425, "grad_norm": 2.4809588080071494, "learning_rate": 8.213749918465855e-07, "loss": 0.6589, "step": 27240 }, { "epoch": 0.8441256661296319, "grad_norm": 2.5841990777567143, "learning_rate": 8.2055965038158e-07, "loss": 0.702, "step": 27245 }, { "epoch": 0.8442805799975214, "grad_norm": 2.969015855162042, "learning_rate": 8.197443089165743e-07, "loss": 0.7105, "step": 27250 }, { "epoch": 0.8444354938654108, "grad_norm": 2.7357829948563475, "learning_rate": 8.189289674515688e-07, "loss": 0.6265, "step": 27255 }, { "epoch": 0.8445904077333003, "grad_norm": 2.8950924077556954, "learning_rate": 8.181136259865632e-07, "loss": 0.6907, "step": 27260 }, { "epoch": 0.8447453216011898, "grad_norm": 2.536441355416867, "learning_rate": 8.172982845215577e-07, "loss": 0.5915, "step": 27265 }, { "epoch": 0.8449002354690792, "grad_norm": 3.3616681104345987, "learning_rate": 8.164829430565522e-07, "loss": 0.6822, "step": 27270 }, { "epoch": 0.8450551493369687, "grad_norm": 2.739675033930924, "learning_rate": 8.156676015915466e-07, "loss": 0.6388, "step": 27275 }, { "epoch": 0.8452100632048581, "grad_norm": 2.393299896678931, "learning_rate": 8.14852260126541e-07, "loss": 0.5819, "step": 27280 }, { "epoch": 0.8453649770727476, "grad_norm": 2.014084883160572, "learning_rate": 8.140369186615356e-07, "loss": 0.6221, "step": 27285 }, { "epoch": 0.845519890940637, "grad_norm": 3.683170979408243, "learning_rate": 8.1322157719653e-07, "loss": 0.68, "step": 27290 }, { "epoch": 0.8456748048085264, "grad_norm": 2.411060569716681, "learning_rate": 8.124062357315244e-07, "loss": 0.6432, "step": 27295 }, { "epoch": 0.8458297186764159, "grad_norm": 2.1515742353068004, "learning_rate": 8.115908942665189e-07, "loss": 0.5532, "step": 27300 }, { "epoch": 0.8459846325443053, "grad_norm": 2.4551377458302874, "learning_rate": 8.107755528015134e-07, "loss": 0.6799, "step": 27305 }, { "epoch": 0.8461395464121948, "grad_norm": 2.725232109483922, "learning_rate": 8.099602113365078e-07, "loss": 0.6209, "step": 27310 }, { "epoch": 0.8462944602800843, "grad_norm": 1.9210554611866935, "learning_rate": 8.091448698715022e-07, "loss": 0.6046, "step": 27315 }, { "epoch": 0.8464493741479737, "grad_norm": 2.6623539991218346, "learning_rate": 8.083295284064966e-07, "loss": 0.6372, "step": 27320 }, { "epoch": 0.8466042880158632, "grad_norm": 2.5677632425662336, "learning_rate": 8.075141869414911e-07, "loss": 0.6148, "step": 27325 }, { "epoch": 0.8467592018837526, "grad_norm": 4.307462657081225, "learning_rate": 8.066988454764856e-07, "loss": 0.7119, "step": 27330 }, { "epoch": 0.8469141157516421, "grad_norm": 3.127662379928689, "learning_rate": 8.058835040114801e-07, "loss": 0.6605, "step": 27335 }, { "epoch": 0.8470690296195316, "grad_norm": 3.2904561701700734, "learning_rate": 8.050681625464746e-07, "loss": 0.6441, "step": 27340 }, { "epoch": 0.847223943487421, "grad_norm": 2.803712958694768, "learning_rate": 8.04252821081469e-07, "loss": 0.6705, "step": 27345 }, { "epoch": 0.8473788573553105, "grad_norm": 2.56957151224809, "learning_rate": 8.034374796164635e-07, "loss": 0.677, "step": 27350 }, { "epoch": 0.8475337712232, "grad_norm": 3.458747138505282, "learning_rate": 8.026221381514579e-07, "loss": 0.618, "step": 27355 }, { "epoch": 0.8476886850910893, "grad_norm": 2.748069507366582, "learning_rate": 8.018067966864523e-07, "loss": 0.6561, "step": 27360 }, { "epoch": 0.8478435989589788, "grad_norm": 3.739995027473032, "learning_rate": 8.009914552214468e-07, "loss": 0.7541, "step": 27365 }, { "epoch": 0.8479985128268682, "grad_norm": 3.301950640183201, "learning_rate": 8.001761137564412e-07, "loss": 0.6504, "step": 27370 }, { "epoch": 0.8481534266947577, "grad_norm": 2.993964425765295, "learning_rate": 7.993607722914357e-07, "loss": 0.6431, "step": 27375 }, { "epoch": 0.8483083405626471, "grad_norm": 2.7073369766577735, "learning_rate": 7.985454308264303e-07, "loss": 0.6966, "step": 27380 }, { "epoch": 0.8484632544305366, "grad_norm": 2.654788242791425, "learning_rate": 7.977300893614247e-07, "loss": 0.7252, "step": 27385 }, { "epoch": 0.8486181682984261, "grad_norm": 2.7724499827133573, "learning_rate": 7.969147478964192e-07, "loss": 0.7126, "step": 27390 }, { "epoch": 0.8487730821663155, "grad_norm": 2.6433992629340954, "learning_rate": 7.960994064314135e-07, "loss": 0.6438, "step": 27395 }, { "epoch": 0.848927996034205, "grad_norm": 2.845903835120517, "learning_rate": 7.95284064966408e-07, "loss": 0.7014, "step": 27400 }, { "epoch": 0.8490829099020945, "grad_norm": 4.04059563883947, "learning_rate": 7.944687235014024e-07, "loss": 0.6969, "step": 27405 }, { "epoch": 0.8492378237699839, "grad_norm": 2.8958532325715822, "learning_rate": 7.936533820363969e-07, "loss": 0.6727, "step": 27410 }, { "epoch": 0.8493927376378734, "grad_norm": 2.5325046895636687, "learning_rate": 7.928380405713914e-07, "loss": 0.5809, "step": 27415 }, { "epoch": 0.8495476515057628, "grad_norm": 2.3764470965413214, "learning_rate": 7.920226991063857e-07, "loss": 0.6766, "step": 27420 }, { "epoch": 0.8497025653736523, "grad_norm": 2.5917259234085432, "learning_rate": 7.912073576413802e-07, "loss": 0.6529, "step": 27425 }, { "epoch": 0.8498574792415418, "grad_norm": 2.616211984307749, "learning_rate": 7.903920161763747e-07, "loss": 0.6085, "step": 27430 }, { "epoch": 0.8500123931094311, "grad_norm": 2.1516072048293804, "learning_rate": 7.895766747113692e-07, "loss": 0.6421, "step": 27435 }, { "epoch": 0.8501673069773206, "grad_norm": 2.6563091333355495, "learning_rate": 7.887613332463637e-07, "loss": 0.5618, "step": 27440 }, { "epoch": 0.85032222084521, "grad_norm": 2.843929453590438, "learning_rate": 7.879459917813581e-07, "loss": 0.674, "step": 27445 }, { "epoch": 0.8504771347130995, "grad_norm": 3.120846413456794, "learning_rate": 7.871306503163526e-07, "loss": 0.6904, "step": 27450 }, { "epoch": 0.850632048580989, "grad_norm": 2.4040755451320632, "learning_rate": 7.86315308851347e-07, "loss": 0.6961, "step": 27455 }, { "epoch": 0.8507869624488784, "grad_norm": 2.4974223961197337, "learning_rate": 7.854999673863414e-07, "loss": 0.5678, "step": 27460 }, { "epoch": 0.8509418763167679, "grad_norm": 3.030336879109949, "learning_rate": 7.846846259213358e-07, "loss": 0.6773, "step": 27465 }, { "epoch": 0.8510967901846573, "grad_norm": 2.7322010654125024, "learning_rate": 7.838692844563303e-07, "loss": 0.7873, "step": 27470 }, { "epoch": 0.8512517040525468, "grad_norm": 2.6926510109571673, "learning_rate": 7.830539429913249e-07, "loss": 0.6558, "step": 27475 }, { "epoch": 0.8514066179204363, "grad_norm": 2.505842862075245, "learning_rate": 7.822386015263193e-07, "loss": 0.6488, "step": 27480 }, { "epoch": 0.8515615317883257, "grad_norm": 2.7098354002030187, "learning_rate": 7.814232600613138e-07, "loss": 0.6403, "step": 27485 }, { "epoch": 0.8517164456562152, "grad_norm": 2.9018229647886535, "learning_rate": 7.806079185963082e-07, "loss": 0.6322, "step": 27490 }, { "epoch": 0.8518713595241046, "grad_norm": 2.7693705552710326, "learning_rate": 7.797925771313027e-07, "loss": 0.6488, "step": 27495 }, { "epoch": 0.8520262733919941, "grad_norm": 2.813713794952479, "learning_rate": 7.789772356662971e-07, "loss": 0.5923, "step": 27500 }, { "epoch": 0.8521811872598835, "grad_norm": 2.5197309803414116, "learning_rate": 7.781618942012915e-07, "loss": 0.7102, "step": 27505 }, { "epoch": 0.8523361011277729, "grad_norm": 3.208285997896772, "learning_rate": 7.77346552736286e-07, "loss": 0.601, "step": 27510 }, { "epoch": 0.8524910149956624, "grad_norm": 3.0142888178896294, "learning_rate": 7.765312112712804e-07, "loss": 0.7131, "step": 27515 }, { "epoch": 0.8526459288635518, "grad_norm": 2.429256765204619, "learning_rate": 7.757158698062749e-07, "loss": 0.6794, "step": 27520 }, { "epoch": 0.8528008427314413, "grad_norm": 3.475486828038993, "learning_rate": 7.749005283412695e-07, "loss": 0.7164, "step": 27525 }, { "epoch": 0.8529557565993308, "grad_norm": 2.8718716015881074, "learning_rate": 7.740851868762639e-07, "loss": 0.6546, "step": 27530 }, { "epoch": 0.8531106704672202, "grad_norm": 3.84723263593517, "learning_rate": 7.732698454112583e-07, "loss": 0.6707, "step": 27535 }, { "epoch": 0.8532655843351097, "grad_norm": 2.6422333737732435, "learning_rate": 7.724545039462527e-07, "loss": 0.6868, "step": 27540 }, { "epoch": 0.8534204982029991, "grad_norm": 2.8222654746461315, "learning_rate": 7.716391624812472e-07, "loss": 0.641, "step": 27545 }, { "epoch": 0.8535754120708886, "grad_norm": 2.0097822426717005, "learning_rate": 7.708238210162417e-07, "loss": 0.6757, "step": 27550 }, { "epoch": 0.8537303259387781, "grad_norm": 3.0169123956503756, "learning_rate": 7.700084795512361e-07, "loss": 0.6309, "step": 27555 }, { "epoch": 0.8538852398066675, "grad_norm": 2.409277599099234, "learning_rate": 7.691931380862306e-07, "loss": 0.686, "step": 27560 }, { "epoch": 0.854040153674557, "grad_norm": 2.578826271290935, "learning_rate": 7.683777966212249e-07, "loss": 0.6979, "step": 27565 }, { "epoch": 0.8541950675424465, "grad_norm": 2.678092567459301, "learning_rate": 7.675624551562196e-07, "loss": 0.5814, "step": 27570 }, { "epoch": 0.8543499814103358, "grad_norm": 2.524737122835549, "learning_rate": 7.66747113691214e-07, "loss": 0.5723, "step": 27575 }, { "epoch": 0.8545048952782253, "grad_norm": 3.6461583814578704, "learning_rate": 7.659317722262084e-07, "loss": 0.611, "step": 27580 }, { "epoch": 0.8546598091461147, "grad_norm": 2.4977616762412826, "learning_rate": 7.651164307612029e-07, "loss": 0.6989, "step": 27585 }, { "epoch": 0.8548147230140042, "grad_norm": 2.6107745403814744, "learning_rate": 7.643010892961973e-07, "loss": 0.697, "step": 27590 }, { "epoch": 0.8549696368818936, "grad_norm": 2.5229246024347014, "learning_rate": 7.634857478311918e-07, "loss": 0.6635, "step": 27595 }, { "epoch": 0.8551245507497831, "grad_norm": 2.4372653782163813, "learning_rate": 7.626704063661862e-07, "loss": 0.6483, "step": 27600 }, { "epoch": 0.8552794646176726, "grad_norm": 3.39867872624241, "learning_rate": 7.618550649011806e-07, "loss": 0.6435, "step": 27605 }, { "epoch": 0.855434378485562, "grad_norm": 2.1086529238390432, "learning_rate": 7.610397234361751e-07, "loss": 0.6298, "step": 27610 }, { "epoch": 0.8555892923534515, "grad_norm": 2.683641821470691, "learning_rate": 7.602243819711696e-07, "loss": 0.6706, "step": 27615 }, { "epoch": 0.855744206221341, "grad_norm": 2.668760801686805, "learning_rate": 7.594090405061641e-07, "loss": 0.6207, "step": 27620 }, { "epoch": 0.8558991200892304, "grad_norm": 3.360164308647224, "learning_rate": 7.585936990411585e-07, "loss": 0.6441, "step": 27625 }, { "epoch": 0.8560540339571199, "grad_norm": 3.1793454383769437, "learning_rate": 7.57778357576153e-07, "loss": 0.7145, "step": 27630 }, { "epoch": 0.8562089478250093, "grad_norm": 2.195334158452135, "learning_rate": 7.569630161111475e-07, "loss": 0.609, "step": 27635 }, { "epoch": 0.8563638616928988, "grad_norm": 2.6738090458468315, "learning_rate": 7.561476746461418e-07, "loss": 0.6454, "step": 27640 }, { "epoch": 0.8565187755607881, "grad_norm": 2.430974054402034, "learning_rate": 7.553323331811363e-07, "loss": 0.6529, "step": 27645 }, { "epoch": 0.8566736894286776, "grad_norm": 2.7432536450412113, "learning_rate": 7.545169917161307e-07, "loss": 0.6268, "step": 27650 }, { "epoch": 0.8568286032965671, "grad_norm": 2.0577899551993712, "learning_rate": 7.537016502511252e-07, "loss": 0.6189, "step": 27655 }, { "epoch": 0.8569835171644565, "grad_norm": 4.095840082608159, "learning_rate": 7.528863087861196e-07, "loss": 0.6396, "step": 27660 }, { "epoch": 0.857138431032346, "grad_norm": 2.5323457994773286, "learning_rate": 7.520709673211142e-07, "loss": 0.7037, "step": 27665 }, { "epoch": 0.8572933449002355, "grad_norm": 2.541868868513928, "learning_rate": 7.512556258561087e-07, "loss": 0.6405, "step": 27670 }, { "epoch": 0.8574482587681249, "grad_norm": 2.417374447603616, "learning_rate": 7.504402843911031e-07, "loss": 0.5849, "step": 27675 }, { "epoch": 0.8576031726360144, "grad_norm": 4.433146067617524, "learning_rate": 7.496249429260975e-07, "loss": 0.8345, "step": 27680 }, { "epoch": 0.8577580865039038, "grad_norm": 2.611955098468851, "learning_rate": 7.488096014610919e-07, "loss": 0.6373, "step": 27685 }, { "epoch": 0.8579130003717933, "grad_norm": 3.0037464541416394, "learning_rate": 7.479942599960864e-07, "loss": 0.6639, "step": 27690 }, { "epoch": 0.8580679142396828, "grad_norm": 2.643002315629722, "learning_rate": 7.471789185310809e-07, "loss": 0.697, "step": 27695 }, { "epoch": 0.8582228281075722, "grad_norm": 2.255670331061852, "learning_rate": 7.463635770660753e-07, "loss": 0.6242, "step": 27700 }, { "epoch": 0.8583777419754617, "grad_norm": 2.8463245888480238, "learning_rate": 7.455482356010698e-07, "loss": 0.6766, "step": 27705 }, { "epoch": 0.8585326558433511, "grad_norm": 2.6544687314503124, "learning_rate": 7.447328941360643e-07, "loss": 0.6442, "step": 27710 }, { "epoch": 0.8586875697112405, "grad_norm": 2.496229683068221, "learning_rate": 7.439175526710588e-07, "loss": 0.6105, "step": 27715 }, { "epoch": 0.85884248357913, "grad_norm": 3.9227375849650525, "learning_rate": 7.431022112060532e-07, "loss": 0.6952, "step": 27720 }, { "epoch": 0.8589973974470194, "grad_norm": 2.316745517809695, "learning_rate": 7.422868697410476e-07, "loss": 0.6784, "step": 27725 }, { "epoch": 0.8591523113149089, "grad_norm": 4.597787010052603, "learning_rate": 7.414715282760421e-07, "loss": 0.6372, "step": 27730 }, { "epoch": 0.8593072251827983, "grad_norm": 3.237996946517901, "learning_rate": 7.406561868110365e-07, "loss": 0.6374, "step": 27735 }, { "epoch": 0.8594621390506878, "grad_norm": 2.074001206336912, "learning_rate": 7.39840845346031e-07, "loss": 0.6205, "step": 27740 }, { "epoch": 0.8596170529185773, "grad_norm": 2.3499602921276646, "learning_rate": 7.390255038810253e-07, "loss": 0.6544, "step": 27745 }, { "epoch": 0.8597719667864667, "grad_norm": 3.6661559912041293, "learning_rate": 7.382101624160198e-07, "loss": 0.691, "step": 27750 }, { "epoch": 0.8599268806543562, "grad_norm": 4.535116492432501, "learning_rate": 7.373948209510143e-07, "loss": 0.7333, "step": 27755 }, { "epoch": 0.8600817945222456, "grad_norm": 2.4960777739265287, "learning_rate": 7.365794794860088e-07, "loss": 0.68, "step": 27760 }, { "epoch": 0.8602367083901351, "grad_norm": 2.426390716143891, "learning_rate": 7.357641380210033e-07, "loss": 0.6208, "step": 27765 }, { "epoch": 0.8603916222580246, "grad_norm": 2.6290849770879237, "learning_rate": 7.349487965559977e-07, "loss": 0.6471, "step": 27770 }, { "epoch": 0.860546536125914, "grad_norm": 2.1231910519840165, "learning_rate": 7.341334550909922e-07, "loss": 0.6801, "step": 27775 }, { "epoch": 0.8607014499938035, "grad_norm": 3.619642716246525, "learning_rate": 7.333181136259867e-07, "loss": 0.6881, "step": 27780 }, { "epoch": 0.860856363861693, "grad_norm": 2.5008374366940034, "learning_rate": 7.32502772160981e-07, "loss": 0.712, "step": 27785 }, { "epoch": 0.8610112777295823, "grad_norm": 2.5952273603356746, "learning_rate": 7.316874306959755e-07, "loss": 0.673, "step": 27790 }, { "epoch": 0.8611661915974718, "grad_norm": 2.5618374019270385, "learning_rate": 7.308720892309699e-07, "loss": 0.6599, "step": 27795 }, { "epoch": 0.8613211054653612, "grad_norm": 3.258786470219016, "learning_rate": 7.300567477659644e-07, "loss": 0.6584, "step": 27800 }, { "epoch": 0.8614760193332507, "grad_norm": 2.6198208088115473, "learning_rate": 7.29241406300959e-07, "loss": 0.5634, "step": 27805 }, { "epoch": 0.8616309332011401, "grad_norm": 3.1143704294002776, "learning_rate": 7.284260648359534e-07, "loss": 0.6724, "step": 27810 }, { "epoch": 0.8617858470690296, "grad_norm": 2.351705379613561, "learning_rate": 7.276107233709479e-07, "loss": 0.6607, "step": 27815 }, { "epoch": 0.8619407609369191, "grad_norm": 2.57576774257152, "learning_rate": 7.267953819059423e-07, "loss": 0.6249, "step": 27820 }, { "epoch": 0.8620956748048085, "grad_norm": 2.3870470637628225, "learning_rate": 7.259800404409367e-07, "loss": 0.6611, "step": 27825 }, { "epoch": 0.862250588672698, "grad_norm": 3.1931267974398, "learning_rate": 7.251646989759312e-07, "loss": 0.6932, "step": 27830 }, { "epoch": 0.8624055025405875, "grad_norm": 4.994967217208362, "learning_rate": 7.243493575109256e-07, "loss": 0.7122, "step": 27835 }, { "epoch": 0.8625604164084769, "grad_norm": 2.719129006017126, "learning_rate": 7.235340160459201e-07, "loss": 0.6403, "step": 27840 }, { "epoch": 0.8627153302763664, "grad_norm": 2.252522483381991, "learning_rate": 7.227186745809145e-07, "loss": 0.6157, "step": 27845 }, { "epoch": 0.8628702441442558, "grad_norm": 2.423258879383203, "learning_rate": 7.219033331159089e-07, "loss": 0.6495, "step": 27850 }, { "epoch": 0.8630251580121453, "grad_norm": 2.248837170021354, "learning_rate": 7.210879916509036e-07, "loss": 0.6448, "step": 27855 }, { "epoch": 0.8631800718800346, "grad_norm": 2.359467215135131, "learning_rate": 7.202726501858979e-07, "loss": 0.5985, "step": 27860 }, { "epoch": 0.8633349857479241, "grad_norm": 2.882977154326312, "learning_rate": 7.194573087208924e-07, "loss": 0.6474, "step": 27865 }, { "epoch": 0.8634898996158136, "grad_norm": 2.406916440931398, "learning_rate": 7.186419672558868e-07, "loss": 0.6148, "step": 27870 }, { "epoch": 0.863644813483703, "grad_norm": 2.9693115667099574, "learning_rate": 7.178266257908813e-07, "loss": 0.6451, "step": 27875 }, { "epoch": 0.8637997273515925, "grad_norm": 2.7128783706228443, "learning_rate": 7.170112843258757e-07, "loss": 0.6112, "step": 27880 }, { "epoch": 0.863954641219482, "grad_norm": 2.982447946513176, "learning_rate": 7.161959428608702e-07, "loss": 0.6378, "step": 27885 }, { "epoch": 0.8641095550873714, "grad_norm": 2.9878934011576526, "learning_rate": 7.153806013958646e-07, "loss": 0.57, "step": 27890 }, { "epoch": 0.8642644689552609, "grad_norm": 3.1417594529024466, "learning_rate": 7.14565259930859e-07, "loss": 0.7104, "step": 27895 }, { "epoch": 0.8644193828231503, "grad_norm": 2.252297359474742, "learning_rate": 7.137499184658536e-07, "loss": 0.5603, "step": 27900 }, { "epoch": 0.8645742966910398, "grad_norm": 3.122345555722821, "learning_rate": 7.12934577000848e-07, "loss": 0.6682, "step": 27905 }, { "epoch": 0.8647292105589293, "grad_norm": 3.640899768516225, "learning_rate": 7.121192355358425e-07, "loss": 0.6594, "step": 27910 }, { "epoch": 0.8648841244268187, "grad_norm": 2.619677138909908, "learning_rate": 7.11303894070837e-07, "loss": 0.6849, "step": 27915 }, { "epoch": 0.8650390382947082, "grad_norm": 2.8907202165911885, "learning_rate": 7.104885526058314e-07, "loss": 0.6521, "step": 27920 }, { "epoch": 0.8651939521625976, "grad_norm": 2.73344300337766, "learning_rate": 7.096732111408259e-07, "loss": 0.5991, "step": 27925 }, { "epoch": 0.865348866030487, "grad_norm": 2.6912899755075252, "learning_rate": 7.088578696758202e-07, "loss": 0.7427, "step": 27930 }, { "epoch": 0.8655037798983765, "grad_norm": 3.6100464051595647, "learning_rate": 7.080425282108147e-07, "loss": 0.765, "step": 27935 }, { "epoch": 0.8656586937662659, "grad_norm": 2.3246973339688526, "learning_rate": 7.072271867458091e-07, "loss": 0.6354, "step": 27940 }, { "epoch": 0.8658136076341554, "grad_norm": 2.851421578023879, "learning_rate": 7.064118452808037e-07, "loss": 0.6559, "step": 27945 }, { "epoch": 0.8659685215020448, "grad_norm": 3.9531954867829446, "learning_rate": 7.055965038157982e-07, "loss": 0.6466, "step": 27950 }, { "epoch": 0.8661234353699343, "grad_norm": 2.740176436062633, "learning_rate": 7.047811623507926e-07, "loss": 0.6224, "step": 27955 }, { "epoch": 0.8662783492378238, "grad_norm": 2.2492850098782617, "learning_rate": 7.039658208857871e-07, "loss": 0.697, "step": 27960 }, { "epoch": 0.8664332631057132, "grad_norm": 2.5358357892890875, "learning_rate": 7.031504794207814e-07, "loss": 0.6253, "step": 27965 }, { "epoch": 0.8665881769736027, "grad_norm": 3.245030612519347, "learning_rate": 7.023351379557759e-07, "loss": 0.6385, "step": 27970 }, { "epoch": 0.8667430908414921, "grad_norm": 2.797201238527762, "learning_rate": 7.015197964907704e-07, "loss": 0.6853, "step": 27975 }, { "epoch": 0.8668980047093816, "grad_norm": 2.506379826786097, "learning_rate": 7.007044550257648e-07, "loss": 0.619, "step": 27980 }, { "epoch": 0.8670529185772711, "grad_norm": 2.6068818448241076, "learning_rate": 6.998891135607593e-07, "loss": 0.6275, "step": 27985 }, { "epoch": 0.8672078324451605, "grad_norm": 2.8147389692774114, "learning_rate": 6.990737720957537e-07, "loss": 0.6387, "step": 27990 }, { "epoch": 0.86736274631305, "grad_norm": 2.304387440619959, "learning_rate": 6.982584306307483e-07, "loss": 0.6844, "step": 27995 }, { "epoch": 0.8675176601809393, "grad_norm": 2.1365281834834393, "learning_rate": 6.974430891657428e-07, "loss": 0.631, "step": 28000 }, { "epoch": 0.8676725740488288, "grad_norm": 4.965952298452953, "learning_rate": 6.966277477007371e-07, "loss": 0.6317, "step": 28005 }, { "epoch": 0.8678274879167183, "grad_norm": 2.489663631672295, "learning_rate": 6.958124062357316e-07, "loss": 0.6675, "step": 28010 }, { "epoch": 0.8679824017846077, "grad_norm": 2.716905580426534, "learning_rate": 6.94997064770726e-07, "loss": 0.6658, "step": 28015 }, { "epoch": 0.8681373156524972, "grad_norm": 2.064240121558852, "learning_rate": 6.941817233057205e-07, "loss": 0.596, "step": 28020 }, { "epoch": 0.8682922295203866, "grad_norm": 3.175655067132419, "learning_rate": 6.93366381840715e-07, "loss": 0.6357, "step": 28025 }, { "epoch": 0.8684471433882761, "grad_norm": 2.70984483272361, "learning_rate": 6.925510403757094e-07, "loss": 0.7027, "step": 28030 }, { "epoch": 0.8686020572561656, "grad_norm": 2.5075196022699267, "learning_rate": 6.917356989107038e-07, "loss": 0.6378, "step": 28035 }, { "epoch": 0.868756971124055, "grad_norm": 2.5084203092696864, "learning_rate": 6.909203574456984e-07, "loss": 0.6875, "step": 28040 }, { "epoch": 0.8689118849919445, "grad_norm": 2.5181118859678286, "learning_rate": 6.901050159806928e-07, "loss": 0.6153, "step": 28045 }, { "epoch": 0.869066798859834, "grad_norm": 3.105989392516881, "learning_rate": 6.892896745156872e-07, "loss": 0.6424, "step": 28050 }, { "epoch": 0.8692217127277234, "grad_norm": 3.47592728095797, "learning_rate": 6.884743330506817e-07, "loss": 0.6669, "step": 28055 }, { "epoch": 0.8693766265956129, "grad_norm": 3.3238905667339256, "learning_rate": 6.876589915856762e-07, "loss": 0.6669, "step": 28060 }, { "epoch": 0.8695315404635023, "grad_norm": 2.3186837903176505, "learning_rate": 6.868436501206706e-07, "loss": 0.6709, "step": 28065 }, { "epoch": 0.8696864543313918, "grad_norm": 3.5667659314399036, "learning_rate": 6.86028308655665e-07, "loss": 0.6945, "step": 28070 }, { "epoch": 0.8698413681992812, "grad_norm": 2.5838169624602565, "learning_rate": 6.852129671906594e-07, "loss": 0.6305, "step": 28075 }, { "epoch": 0.8699962820671706, "grad_norm": 2.9864292460858146, "learning_rate": 6.843976257256539e-07, "loss": 0.6609, "step": 28080 }, { "epoch": 0.8701511959350601, "grad_norm": 2.2048037473402147, "learning_rate": 6.835822842606484e-07, "loss": 0.6483, "step": 28085 }, { "epoch": 0.8703061098029495, "grad_norm": 2.528016553274632, "learning_rate": 6.827669427956429e-07, "loss": 0.6311, "step": 28090 }, { "epoch": 0.870461023670839, "grad_norm": 2.6289535683549206, "learning_rate": 6.819516013306374e-07, "loss": 0.6568, "step": 28095 }, { "epoch": 0.8706159375387285, "grad_norm": 2.8637608604130107, "learning_rate": 6.811362598656318e-07, "loss": 0.6773, "step": 28100 }, { "epoch": 0.8707708514066179, "grad_norm": 2.777474512283653, "learning_rate": 6.803209184006263e-07, "loss": 0.678, "step": 28105 }, { "epoch": 0.8709257652745074, "grad_norm": 2.0907284627920015, "learning_rate": 6.795055769356206e-07, "loss": 0.6046, "step": 28110 }, { "epoch": 0.8710806791423968, "grad_norm": 2.3364211720829124, "learning_rate": 6.786902354706151e-07, "loss": 0.7313, "step": 28115 }, { "epoch": 0.8712355930102863, "grad_norm": 2.701208862949119, "learning_rate": 6.778748940056096e-07, "loss": 0.6035, "step": 28120 }, { "epoch": 0.8713905068781758, "grad_norm": 3.162998307459556, "learning_rate": 6.77059552540604e-07, "loss": 0.7064, "step": 28125 }, { "epoch": 0.8715454207460652, "grad_norm": 2.582862619894364, "learning_rate": 6.762442110755985e-07, "loss": 0.6816, "step": 28130 }, { "epoch": 0.8717003346139547, "grad_norm": 2.1582875293270907, "learning_rate": 6.75428869610593e-07, "loss": 0.6278, "step": 28135 }, { "epoch": 0.8718552484818441, "grad_norm": 2.44769885901872, "learning_rate": 6.746135281455875e-07, "loss": 0.6377, "step": 28140 }, { "epoch": 0.8720101623497335, "grad_norm": 2.546727522899472, "learning_rate": 6.73798186680582e-07, "loss": 0.6385, "step": 28145 }, { "epoch": 0.872165076217623, "grad_norm": 2.957403089388725, "learning_rate": 6.729828452155763e-07, "loss": 0.6468, "step": 28150 }, { "epoch": 0.8723199900855124, "grad_norm": 2.767970314057428, "learning_rate": 6.721675037505708e-07, "loss": 0.6397, "step": 28155 }, { "epoch": 0.8724749039534019, "grad_norm": 2.702742191371529, "learning_rate": 6.713521622855652e-07, "loss": 0.7285, "step": 28160 }, { "epoch": 0.8726298178212913, "grad_norm": 3.0404986430723935, "learning_rate": 6.705368208205597e-07, "loss": 0.7712, "step": 28165 }, { "epoch": 0.8727847316891808, "grad_norm": 2.076888711862015, "learning_rate": 6.697214793555542e-07, "loss": 0.6136, "step": 28170 }, { "epoch": 0.8729396455570703, "grad_norm": 2.1685132553286897, "learning_rate": 6.689061378905485e-07, "loss": 0.6755, "step": 28175 }, { "epoch": 0.8730945594249597, "grad_norm": 2.2459227973290656, "learning_rate": 6.68090796425543e-07, "loss": 0.591, "step": 28180 }, { "epoch": 0.8732494732928492, "grad_norm": 2.9399686541774352, "learning_rate": 6.672754549605375e-07, "loss": 0.6645, "step": 28185 }, { "epoch": 0.8734043871607386, "grad_norm": 2.216243918444446, "learning_rate": 6.66460113495532e-07, "loss": 0.6465, "step": 28190 }, { "epoch": 0.8735593010286281, "grad_norm": 2.2235780017952216, "learning_rate": 6.656447720305265e-07, "loss": 0.5906, "step": 28195 }, { "epoch": 0.8737142148965176, "grad_norm": 2.466816627733377, "learning_rate": 6.648294305655209e-07, "loss": 0.601, "step": 28200 }, { "epoch": 0.873869128764407, "grad_norm": 2.6657301538367544, "learning_rate": 6.640140891005154e-07, "loss": 0.68, "step": 28205 }, { "epoch": 0.8740240426322965, "grad_norm": 2.3742681433388846, "learning_rate": 6.631987476355098e-07, "loss": 0.606, "step": 28210 }, { "epoch": 0.8741789565001858, "grad_norm": 2.77744620628045, "learning_rate": 6.623834061705042e-07, "loss": 0.6545, "step": 28215 }, { "epoch": 0.8743338703680753, "grad_norm": 2.918841037058976, "learning_rate": 6.615680647054986e-07, "loss": 0.6041, "step": 28220 }, { "epoch": 0.8744887842359648, "grad_norm": 2.344118112006429, "learning_rate": 6.607527232404931e-07, "loss": 0.7631, "step": 28225 }, { "epoch": 0.8746436981038542, "grad_norm": 2.6820830879831288, "learning_rate": 6.599373817754877e-07, "loss": 0.7257, "step": 28230 }, { "epoch": 0.8747986119717437, "grad_norm": 3.0052282514022894, "learning_rate": 6.591220403104821e-07, "loss": 0.6771, "step": 28235 }, { "epoch": 0.8749535258396332, "grad_norm": 4.3246482269739195, "learning_rate": 6.583066988454766e-07, "loss": 0.6416, "step": 28240 }, { "epoch": 0.8751084397075226, "grad_norm": 3.031133597110576, "learning_rate": 6.57491357380471e-07, "loss": 0.6421, "step": 28245 }, { "epoch": 0.8752633535754121, "grad_norm": 2.4528499528440255, "learning_rate": 6.566760159154655e-07, "loss": 0.6671, "step": 28250 }, { "epoch": 0.8754182674433015, "grad_norm": 2.7719515879117074, "learning_rate": 6.558606744504599e-07, "loss": 0.6575, "step": 28255 }, { "epoch": 0.875573181311191, "grad_norm": 3.5269017023688103, "learning_rate": 6.550453329854543e-07, "loss": 0.6898, "step": 28260 }, { "epoch": 0.8757280951790805, "grad_norm": 2.564569153577497, "learning_rate": 6.542299915204488e-07, "loss": 0.6404, "step": 28265 }, { "epoch": 0.8758830090469699, "grad_norm": 2.0760125260350053, "learning_rate": 6.534146500554432e-07, "loss": 0.5772, "step": 28270 }, { "epoch": 0.8760379229148594, "grad_norm": 2.7602511431797936, "learning_rate": 6.525993085904378e-07, "loss": 0.652, "step": 28275 }, { "epoch": 0.8761928367827488, "grad_norm": 2.5306437400793804, "learning_rate": 6.517839671254323e-07, "loss": 0.6553, "step": 28280 }, { "epoch": 0.8763477506506382, "grad_norm": 2.359043489195726, "learning_rate": 6.509686256604267e-07, "loss": 0.614, "step": 28285 }, { "epoch": 0.8765026645185277, "grad_norm": 2.4449914935765884, "learning_rate": 6.501532841954211e-07, "loss": 0.6978, "step": 28290 }, { "epoch": 0.8766575783864171, "grad_norm": 2.9150232410465655, "learning_rate": 6.493379427304155e-07, "loss": 0.6445, "step": 28295 }, { "epoch": 0.8768124922543066, "grad_norm": 3.0107516642589, "learning_rate": 6.4852260126541e-07, "loss": 0.6166, "step": 28300 }, { "epoch": 0.876967406122196, "grad_norm": 2.8780650362115927, "learning_rate": 6.477072598004044e-07, "loss": 0.7532, "step": 28305 }, { "epoch": 0.8771223199900855, "grad_norm": 2.4626789128414552, "learning_rate": 6.468919183353989e-07, "loss": 0.6258, "step": 28310 }, { "epoch": 0.877277233857975, "grad_norm": 2.8020572646073103, "learning_rate": 6.460765768703934e-07, "loss": 0.6533, "step": 28315 }, { "epoch": 0.8774321477258644, "grad_norm": 2.5386512156539585, "learning_rate": 6.452612354053877e-07, "loss": 0.6823, "step": 28320 }, { "epoch": 0.8775870615937539, "grad_norm": 2.6013261748255947, "learning_rate": 6.444458939403824e-07, "loss": 0.6249, "step": 28325 }, { "epoch": 0.8777419754616433, "grad_norm": 2.9159451274703776, "learning_rate": 6.436305524753767e-07, "loss": 0.6782, "step": 28330 }, { "epoch": 0.8778968893295328, "grad_norm": 3.590545002294046, "learning_rate": 6.428152110103712e-07, "loss": 0.7018, "step": 28335 }, { "epoch": 0.8780518031974223, "grad_norm": 2.5201486180237356, "learning_rate": 6.419998695453657e-07, "loss": 0.6602, "step": 28340 }, { "epoch": 0.8782067170653117, "grad_norm": 2.7350531421568745, "learning_rate": 6.411845280803601e-07, "loss": 0.6677, "step": 28345 }, { "epoch": 0.8783616309332012, "grad_norm": 2.787812165198692, "learning_rate": 6.403691866153546e-07, "loss": 0.7094, "step": 28350 }, { "epoch": 0.8785165448010906, "grad_norm": 2.349153916408854, "learning_rate": 6.39553845150349e-07, "loss": 0.689, "step": 28355 }, { "epoch": 0.87867145866898, "grad_norm": 2.4720528308423546, "learning_rate": 6.387385036853434e-07, "loss": 0.638, "step": 28360 }, { "epoch": 0.8788263725368695, "grad_norm": 2.22499137100728, "learning_rate": 6.379231622203379e-07, "loss": 0.7245, "step": 28365 }, { "epoch": 0.8789812864047589, "grad_norm": 2.63130302332438, "learning_rate": 6.371078207553324e-07, "loss": 0.6971, "step": 28370 }, { "epoch": 0.8791362002726484, "grad_norm": 2.504947945660723, "learning_rate": 6.362924792903269e-07, "loss": 0.6345, "step": 28375 }, { "epoch": 0.8792911141405378, "grad_norm": 3.063842254374719, "learning_rate": 6.354771378253213e-07, "loss": 0.7091, "step": 28380 }, { "epoch": 0.8794460280084273, "grad_norm": 2.945086084203825, "learning_rate": 6.346617963603158e-07, "loss": 0.6824, "step": 28385 }, { "epoch": 0.8796009418763168, "grad_norm": 3.34231379371623, "learning_rate": 6.338464548953103e-07, "loss": 0.5737, "step": 28390 }, { "epoch": 0.8797558557442062, "grad_norm": 2.928910535381423, "learning_rate": 6.330311134303046e-07, "loss": 0.7163, "step": 28395 }, { "epoch": 0.8799107696120957, "grad_norm": 3.6021502527868865, "learning_rate": 6.322157719652991e-07, "loss": 0.688, "step": 28400 }, { "epoch": 0.8800656834799852, "grad_norm": 3.5668661153808, "learning_rate": 6.314004305002935e-07, "loss": 0.6826, "step": 28405 }, { "epoch": 0.8802205973478746, "grad_norm": 2.3588905072214414, "learning_rate": 6.30585089035288e-07, "loss": 0.6163, "step": 28410 }, { "epoch": 0.8803755112157641, "grad_norm": 2.6025099736133717, "learning_rate": 6.297697475702824e-07, "loss": 0.7322, "step": 28415 }, { "epoch": 0.8805304250836535, "grad_norm": 2.474443631996112, "learning_rate": 6.28954406105277e-07, "loss": 0.6481, "step": 28420 }, { "epoch": 0.880685338951543, "grad_norm": 3.291340589186006, "learning_rate": 6.281390646402715e-07, "loss": 0.6655, "step": 28425 }, { "epoch": 0.8808402528194323, "grad_norm": 2.4869199338029397, "learning_rate": 6.273237231752659e-07, "loss": 0.6054, "step": 28430 }, { "epoch": 0.8809951666873218, "grad_norm": 2.557292870349077, "learning_rate": 6.265083817102603e-07, "loss": 0.6314, "step": 28435 }, { "epoch": 0.8811500805552113, "grad_norm": 2.4290367170029112, "learning_rate": 6.256930402452547e-07, "loss": 0.6435, "step": 28440 }, { "epoch": 0.8813049944231007, "grad_norm": 3.183711329655212, "learning_rate": 6.248776987802492e-07, "loss": 0.6496, "step": 28445 }, { "epoch": 0.8814599082909902, "grad_norm": 2.5565596172815437, "learning_rate": 6.240623573152437e-07, "loss": 0.6137, "step": 28450 }, { "epoch": 0.8816148221588797, "grad_norm": 3.1551062848477005, "learning_rate": 6.232470158502381e-07, "loss": 0.611, "step": 28455 }, { "epoch": 0.8817697360267691, "grad_norm": 2.956829476765574, "learning_rate": 6.224316743852326e-07, "loss": 0.6925, "step": 28460 }, { "epoch": 0.8819246498946586, "grad_norm": 2.8293115356207843, "learning_rate": 6.21616332920227e-07, "loss": 0.7171, "step": 28465 }, { "epoch": 0.882079563762548, "grad_norm": 2.0055441008366985, "learning_rate": 6.208009914552215e-07, "loss": 0.6552, "step": 28470 }, { "epoch": 0.8822344776304375, "grad_norm": 2.926346195380591, "learning_rate": 6.19985649990216e-07, "loss": 0.6203, "step": 28475 }, { "epoch": 0.882389391498327, "grad_norm": 2.496766785152243, "learning_rate": 6.191703085252104e-07, "loss": 0.6651, "step": 28480 }, { "epoch": 0.8825443053662164, "grad_norm": 2.884450535337123, "learning_rate": 6.183549670602049e-07, "loss": 0.6055, "step": 28485 }, { "epoch": 0.8826992192341059, "grad_norm": 2.7319354396896105, "learning_rate": 6.175396255951993e-07, "loss": 0.7446, "step": 28490 }, { "epoch": 0.8828541331019953, "grad_norm": 4.377588545083524, "learning_rate": 6.167242841301938e-07, "loss": 0.6749, "step": 28495 }, { "epoch": 0.8830090469698847, "grad_norm": 5.24722423188279, "learning_rate": 6.159089426651882e-07, "loss": 0.7094, "step": 28500 }, { "epoch": 0.8831639608377742, "grad_norm": 2.2101246438371547, "learning_rate": 6.150936012001827e-07, "loss": 0.6393, "step": 28505 }, { "epoch": 0.8833188747056636, "grad_norm": 2.544974657537719, "learning_rate": 6.142782597351772e-07, "loss": 0.6577, "step": 28510 }, { "epoch": 0.8834737885735531, "grad_norm": 3.54366424855318, "learning_rate": 6.134629182701716e-07, "loss": 0.6832, "step": 28515 }, { "epoch": 0.8836287024414425, "grad_norm": 2.1520356490421606, "learning_rate": 6.12647576805166e-07, "loss": 0.6161, "step": 28520 }, { "epoch": 0.883783616309332, "grad_norm": 2.9656305300241477, "learning_rate": 6.118322353401605e-07, "loss": 0.6614, "step": 28525 }, { "epoch": 0.8839385301772215, "grad_norm": 2.3300263205860263, "learning_rate": 6.11016893875155e-07, "loss": 0.6583, "step": 28530 }, { "epoch": 0.8840934440451109, "grad_norm": 2.7023142958088293, "learning_rate": 6.102015524101495e-07, "loss": 0.6304, "step": 28535 }, { "epoch": 0.8842483579130004, "grad_norm": 2.3793264763132593, "learning_rate": 6.093862109451438e-07, "loss": 0.6516, "step": 28540 }, { "epoch": 0.8844032717808898, "grad_norm": 3.4988429834001744, "learning_rate": 6.085708694801383e-07, "loss": 0.7958, "step": 28545 }, { "epoch": 0.8845581856487793, "grad_norm": 2.719209934586644, "learning_rate": 6.077555280151328e-07, "loss": 0.6511, "step": 28550 }, { "epoch": 0.8847130995166688, "grad_norm": 3.6587806419465925, "learning_rate": 6.069401865501273e-07, "loss": 0.7221, "step": 28555 }, { "epoch": 0.8848680133845582, "grad_norm": 3.3138691280714547, "learning_rate": 6.061248450851217e-07, "loss": 0.7146, "step": 28560 }, { "epoch": 0.8850229272524477, "grad_norm": 2.301905012292336, "learning_rate": 6.053095036201161e-07, "loss": 0.6624, "step": 28565 }, { "epoch": 0.885177841120337, "grad_norm": 2.8250476841970324, "learning_rate": 6.044941621551107e-07, "loss": 0.6539, "step": 28570 }, { "epoch": 0.8853327549882265, "grad_norm": 2.6042146461027347, "learning_rate": 6.03678820690105e-07, "loss": 0.6765, "step": 28575 }, { "epoch": 0.885487668856116, "grad_norm": 2.348383721097165, "learning_rate": 6.028634792250995e-07, "loss": 0.6557, "step": 28580 }, { "epoch": 0.8856425827240054, "grad_norm": 3.035183883382766, "learning_rate": 6.02048137760094e-07, "loss": 0.7252, "step": 28585 }, { "epoch": 0.8857974965918949, "grad_norm": 2.3759159100067957, "learning_rate": 6.012327962950884e-07, "loss": 0.6122, "step": 28590 }, { "epoch": 0.8859524104597843, "grad_norm": 2.2119857581374087, "learning_rate": 6.004174548300829e-07, "loss": 0.6196, "step": 28595 }, { "epoch": 0.8861073243276738, "grad_norm": 2.607316098520702, "learning_rate": 5.996021133650773e-07, "loss": 0.7122, "step": 28600 }, { "epoch": 0.8862622381955633, "grad_norm": 2.6671206127812725, "learning_rate": 5.987867719000718e-07, "loss": 0.626, "step": 28605 }, { "epoch": 0.8864171520634527, "grad_norm": 4.1996811530804505, "learning_rate": 5.979714304350662e-07, "loss": 0.6573, "step": 28610 }, { "epoch": 0.8865720659313422, "grad_norm": 2.8858614960534026, "learning_rate": 5.971560889700607e-07, "loss": 0.6416, "step": 28615 }, { "epoch": 0.8867269797992317, "grad_norm": 2.237806347130692, "learning_rate": 5.963407475050552e-07, "loss": 0.6676, "step": 28620 }, { "epoch": 0.8868818936671211, "grad_norm": 2.872064035928695, "learning_rate": 5.955254060400496e-07, "loss": 0.659, "step": 28625 }, { "epoch": 0.8870368075350106, "grad_norm": 2.587054794563248, "learning_rate": 5.947100645750441e-07, "loss": 0.6321, "step": 28630 }, { "epoch": 0.8871917214029, "grad_norm": 2.583941960975945, "learning_rate": 5.938947231100385e-07, "loss": 0.6485, "step": 28635 }, { "epoch": 0.8873466352707894, "grad_norm": 2.278465078807836, "learning_rate": 5.93079381645033e-07, "loss": 0.6644, "step": 28640 }, { "epoch": 0.8875015491386788, "grad_norm": 2.6745944330650904, "learning_rate": 5.922640401800275e-07, "loss": 0.6193, "step": 28645 }, { "epoch": 0.8876564630065683, "grad_norm": 2.7657693458634838, "learning_rate": 5.914486987150219e-07, "loss": 0.7015, "step": 28650 }, { "epoch": 0.8878113768744578, "grad_norm": 2.8607767306915033, "learning_rate": 5.906333572500164e-07, "loss": 0.5913, "step": 28655 }, { "epoch": 0.8879662907423472, "grad_norm": 2.3376503814032104, "learning_rate": 5.898180157850108e-07, "loss": 0.632, "step": 28660 }, { "epoch": 0.8881212046102367, "grad_norm": 2.49584451599261, "learning_rate": 5.890026743200053e-07, "loss": 0.6474, "step": 28665 }, { "epoch": 0.8882761184781262, "grad_norm": 2.887479544861868, "learning_rate": 5.881873328549998e-07, "loss": 0.6971, "step": 28670 }, { "epoch": 0.8884310323460156, "grad_norm": 1.8812442556465343, "learning_rate": 5.873719913899942e-07, "loss": 0.6963, "step": 28675 }, { "epoch": 0.8885859462139051, "grad_norm": 3.1584213023873087, "learning_rate": 5.865566499249887e-07, "loss": 0.5683, "step": 28680 }, { "epoch": 0.8887408600817945, "grad_norm": 2.7755668232526376, "learning_rate": 5.85741308459983e-07, "loss": 0.6559, "step": 28685 }, { "epoch": 0.888895773949684, "grad_norm": 2.7181874227511664, "learning_rate": 5.849259669949776e-07, "loss": 0.6515, "step": 28690 }, { "epoch": 0.8890506878175735, "grad_norm": 2.947137709176452, "learning_rate": 5.84110625529972e-07, "loss": 0.6349, "step": 28695 }, { "epoch": 0.8892056016854629, "grad_norm": 3.1797406483437785, "learning_rate": 5.832952840649664e-07, "loss": 0.6424, "step": 28700 }, { "epoch": 0.8893605155533524, "grad_norm": 2.2992008775134245, "learning_rate": 5.824799425999609e-07, "loss": 0.6654, "step": 28705 }, { "epoch": 0.8895154294212418, "grad_norm": 2.627400824885471, "learning_rate": 5.816646011349553e-07, "loss": 0.6546, "step": 28710 }, { "epoch": 0.8896703432891312, "grad_norm": 2.5322718233536214, "learning_rate": 5.808492596699499e-07, "loss": 0.7019, "step": 28715 }, { "epoch": 0.8898252571570207, "grad_norm": 3.63113019943499, "learning_rate": 5.800339182049442e-07, "loss": 0.5952, "step": 28720 }, { "epoch": 0.8899801710249101, "grad_norm": 2.241616023187694, "learning_rate": 5.792185767399387e-07, "loss": 0.6445, "step": 28725 }, { "epoch": 0.8901350848927996, "grad_norm": 3.247654081130123, "learning_rate": 5.784032352749332e-07, "loss": 0.7353, "step": 28730 }, { "epoch": 0.890289998760689, "grad_norm": 2.59139973425972, "learning_rate": 5.775878938099277e-07, "loss": 0.6534, "step": 28735 }, { "epoch": 0.8904449126285785, "grad_norm": 2.2589793523953143, "learning_rate": 5.767725523449221e-07, "loss": 0.6852, "step": 28740 }, { "epoch": 0.890599826496468, "grad_norm": 2.813943460275412, "learning_rate": 5.759572108799165e-07, "loss": 0.6592, "step": 28745 }, { "epoch": 0.8907547403643574, "grad_norm": 2.991079908260542, "learning_rate": 5.75141869414911e-07, "loss": 0.6912, "step": 28750 }, { "epoch": 0.8909096542322469, "grad_norm": 2.9485653867938115, "learning_rate": 5.743265279499055e-07, "loss": 0.6669, "step": 28755 }, { "epoch": 0.8910645681001363, "grad_norm": 2.6437549512907568, "learning_rate": 5.735111864848999e-07, "loss": 0.6833, "step": 28760 }, { "epoch": 0.8912194819680258, "grad_norm": 2.6648078286114134, "learning_rate": 5.726958450198944e-07, "loss": 0.6991, "step": 28765 }, { "epoch": 0.8913743958359153, "grad_norm": 2.336509402005104, "learning_rate": 5.718805035548888e-07, "loss": 0.6943, "step": 28770 }, { "epoch": 0.8915293097038047, "grad_norm": 3.611521631843218, "learning_rate": 5.710651620898833e-07, "loss": 0.7799, "step": 28775 }, { "epoch": 0.8916842235716942, "grad_norm": 2.6888566424283358, "learning_rate": 5.702498206248777e-07, "loss": 0.643, "step": 28780 }, { "epoch": 0.8918391374395835, "grad_norm": 2.479750431810428, "learning_rate": 5.694344791598722e-07, "loss": 0.6676, "step": 28785 }, { "epoch": 0.891994051307473, "grad_norm": 2.1192139279224187, "learning_rate": 5.686191376948667e-07, "loss": 0.5991, "step": 28790 }, { "epoch": 0.8921489651753625, "grad_norm": 2.709145972400748, "learning_rate": 5.678037962298611e-07, "loss": 0.5789, "step": 28795 }, { "epoch": 0.8923038790432519, "grad_norm": 2.4366776581730605, "learning_rate": 5.669884547648556e-07, "loss": 0.6423, "step": 28800 }, { "epoch": 0.8924587929111414, "grad_norm": 2.3042747885712647, "learning_rate": 5.6617311329985e-07, "loss": 0.7022, "step": 28805 }, { "epoch": 0.8926137067790308, "grad_norm": 3.3542141360881828, "learning_rate": 5.653577718348445e-07, "loss": 0.6388, "step": 28810 }, { "epoch": 0.8927686206469203, "grad_norm": 1.6773130930341096, "learning_rate": 5.64542430369839e-07, "loss": 0.597, "step": 28815 }, { "epoch": 0.8929235345148098, "grad_norm": 2.5640801187331124, "learning_rate": 5.637270889048334e-07, "loss": 0.7162, "step": 28820 }, { "epoch": 0.8930784483826992, "grad_norm": 3.3365318307331164, "learning_rate": 5.629117474398278e-07, "loss": 0.7635, "step": 28825 }, { "epoch": 0.8932333622505887, "grad_norm": 2.248331752845666, "learning_rate": 5.620964059748223e-07, "loss": 0.6254, "step": 28830 }, { "epoch": 0.8933882761184782, "grad_norm": 3.0157507819385088, "learning_rate": 5.612810645098168e-07, "loss": 0.663, "step": 28835 }, { "epoch": 0.8935431899863676, "grad_norm": 2.576699463624045, "learning_rate": 5.604657230448113e-07, "loss": 0.5944, "step": 28840 }, { "epoch": 0.8936981038542571, "grad_norm": 2.4370266843829533, "learning_rate": 5.596503815798056e-07, "loss": 0.6725, "step": 28845 }, { "epoch": 0.8938530177221465, "grad_norm": 3.2851935994867714, "learning_rate": 5.588350401148001e-07, "loss": 0.6915, "step": 28850 }, { "epoch": 0.8940079315900359, "grad_norm": 2.66859639470865, "learning_rate": 5.580196986497946e-07, "loss": 0.6647, "step": 28855 }, { "epoch": 0.8941628454579253, "grad_norm": 3.485267011923944, "learning_rate": 5.572043571847891e-07, "loss": 0.6718, "step": 28860 }, { "epoch": 0.8943177593258148, "grad_norm": 2.6203594544293187, "learning_rate": 5.563890157197834e-07, "loss": 0.6065, "step": 28865 }, { "epoch": 0.8944726731937043, "grad_norm": 2.3753966231200327, "learning_rate": 5.555736742547779e-07, "loss": 0.6675, "step": 28870 }, { "epoch": 0.8946275870615937, "grad_norm": 3.0566059503647978, "learning_rate": 5.547583327897724e-07, "loss": 0.6412, "step": 28875 }, { "epoch": 0.8947825009294832, "grad_norm": 2.4473406925159957, "learning_rate": 5.539429913247669e-07, "loss": 0.5838, "step": 28880 }, { "epoch": 0.8949374147973727, "grad_norm": 3.6555311594977487, "learning_rate": 5.531276498597613e-07, "loss": 0.6792, "step": 28885 }, { "epoch": 0.8950923286652621, "grad_norm": 2.3100960739869976, "learning_rate": 5.523123083947557e-07, "loss": 0.5951, "step": 28890 }, { "epoch": 0.8952472425331516, "grad_norm": 2.6576394872810196, "learning_rate": 5.514969669297502e-07, "loss": 0.5709, "step": 28895 }, { "epoch": 0.895402156401041, "grad_norm": 3.9490910465694284, "learning_rate": 5.506816254647447e-07, "loss": 0.6651, "step": 28900 }, { "epoch": 0.8955570702689305, "grad_norm": 2.8661499022105215, "learning_rate": 5.498662839997391e-07, "loss": 0.6323, "step": 28905 }, { "epoch": 0.89571198413682, "grad_norm": 2.853187550364711, "learning_rate": 5.490509425347336e-07, "loss": 0.7515, "step": 28910 }, { "epoch": 0.8958668980047094, "grad_norm": 2.850091384982753, "learning_rate": 5.48235601069728e-07, "loss": 0.6585, "step": 28915 }, { "epoch": 0.8960218118725989, "grad_norm": 3.6740896460865016, "learning_rate": 5.474202596047225e-07, "loss": 0.6649, "step": 28920 }, { "epoch": 0.8961767257404882, "grad_norm": 2.901571583242009, "learning_rate": 5.46604918139717e-07, "loss": 0.6838, "step": 28925 }, { "epoch": 0.8963316396083777, "grad_norm": 2.286590550246795, "learning_rate": 5.457895766747114e-07, "loss": 0.5931, "step": 28930 }, { "epoch": 0.8964865534762672, "grad_norm": 2.8603603789448373, "learning_rate": 5.449742352097059e-07, "loss": 0.653, "step": 28935 }, { "epoch": 0.8966414673441566, "grad_norm": 2.4289603791007264, "learning_rate": 5.441588937447003e-07, "loss": 0.6677, "step": 28940 }, { "epoch": 0.8967963812120461, "grad_norm": 2.3425251461396677, "learning_rate": 5.433435522796948e-07, "loss": 0.6865, "step": 28945 }, { "epoch": 0.8969512950799355, "grad_norm": 4.490634327387394, "learning_rate": 5.425282108146893e-07, "loss": 0.6092, "step": 28950 }, { "epoch": 0.897106208947825, "grad_norm": 2.273323917534121, "learning_rate": 5.417128693496837e-07, "loss": 0.6793, "step": 28955 }, { "epoch": 0.8972611228157145, "grad_norm": 3.905900208367151, "learning_rate": 5.408975278846782e-07, "loss": 0.6904, "step": 28960 }, { "epoch": 0.8974160366836039, "grad_norm": 3.9011629405418073, "learning_rate": 5.400821864196726e-07, "loss": 0.7104, "step": 28965 }, { "epoch": 0.8975709505514934, "grad_norm": 2.864902961819272, "learning_rate": 5.392668449546671e-07, "loss": 0.6823, "step": 28970 }, { "epoch": 0.8977258644193828, "grad_norm": 3.1802424053147362, "learning_rate": 5.384515034896615e-07, "loss": 0.6265, "step": 28975 }, { "epoch": 0.8978807782872723, "grad_norm": 2.6585648809619773, "learning_rate": 5.37636162024656e-07, "loss": 0.6646, "step": 28980 }, { "epoch": 0.8980356921551618, "grad_norm": 4.277550523547858, "learning_rate": 5.368208205596505e-07, "loss": 0.6642, "step": 28985 }, { "epoch": 0.8981906060230512, "grad_norm": 2.5776245961063506, "learning_rate": 5.360054790946448e-07, "loss": 0.6447, "step": 28990 }, { "epoch": 0.8983455198909407, "grad_norm": 3.7536894239043286, "learning_rate": 5.351901376296394e-07, "loss": 0.5665, "step": 28995 }, { "epoch": 0.89850043375883, "grad_norm": 3.1595399833101148, "learning_rate": 5.343747961646338e-07, "loss": 0.6181, "step": 29000 }, { "epoch": 0.8986553476267195, "grad_norm": 2.679831377921623, "learning_rate": 5.335594546996282e-07, "loss": 0.6216, "step": 29005 }, { "epoch": 0.898810261494609, "grad_norm": 2.4938838333404423, "learning_rate": 5.327441132346227e-07, "loss": 0.6567, "step": 29010 }, { "epoch": 0.8989651753624984, "grad_norm": 3.59850717380749, "learning_rate": 5.319287717696171e-07, "loss": 0.7188, "step": 29015 }, { "epoch": 0.8991200892303879, "grad_norm": 3.5277903603145804, "learning_rate": 5.311134303046117e-07, "loss": 0.6092, "step": 29020 }, { "epoch": 0.8992750030982773, "grad_norm": 2.587206571723664, "learning_rate": 5.30298088839606e-07, "loss": 0.673, "step": 29025 }, { "epoch": 0.8994299169661668, "grad_norm": 3.2882721243313298, "learning_rate": 5.294827473746005e-07, "loss": 0.6375, "step": 29030 }, { "epoch": 0.8995848308340563, "grad_norm": 2.2190273518600883, "learning_rate": 5.28667405909595e-07, "loss": 0.6928, "step": 29035 }, { "epoch": 0.8997397447019457, "grad_norm": 2.419959619299544, "learning_rate": 5.278520644445894e-07, "loss": 0.6592, "step": 29040 }, { "epoch": 0.8998946585698352, "grad_norm": 2.431448609450018, "learning_rate": 5.270367229795839e-07, "loss": 0.5948, "step": 29045 }, { "epoch": 0.9000495724377247, "grad_norm": 3.1234945419507585, "learning_rate": 5.262213815145783e-07, "loss": 0.6319, "step": 29050 }, { "epoch": 0.9002044863056141, "grad_norm": 2.4998530091736004, "learning_rate": 5.254060400495728e-07, "loss": 0.6766, "step": 29055 }, { "epoch": 0.9003594001735036, "grad_norm": 2.537745252451248, "learning_rate": 5.245906985845672e-07, "loss": 0.6782, "step": 29060 }, { "epoch": 0.900514314041393, "grad_norm": 2.712230324574828, "learning_rate": 5.237753571195617e-07, "loss": 0.7596, "step": 29065 }, { "epoch": 0.9006692279092824, "grad_norm": 3.0921345839754917, "learning_rate": 5.229600156545562e-07, "loss": 0.7824, "step": 29070 }, { "epoch": 0.9008241417771718, "grad_norm": 2.4911491014907177, "learning_rate": 5.221446741895506e-07, "loss": 0.5798, "step": 29075 }, { "epoch": 0.9009790556450613, "grad_norm": 3.1188056831457978, "learning_rate": 5.213293327245451e-07, "loss": 0.6195, "step": 29080 }, { "epoch": 0.9011339695129508, "grad_norm": 2.5792874730911626, "learning_rate": 5.205139912595395e-07, "loss": 0.6333, "step": 29085 }, { "epoch": 0.9012888833808402, "grad_norm": 2.9990851783263004, "learning_rate": 5.19698649794534e-07, "loss": 0.7056, "step": 29090 }, { "epoch": 0.9014437972487297, "grad_norm": 3.352811315956958, "learning_rate": 5.188833083295285e-07, "loss": 0.758, "step": 29095 }, { "epoch": 0.9015987111166192, "grad_norm": 2.7023146147604615, "learning_rate": 5.180679668645229e-07, "loss": 0.6061, "step": 29100 }, { "epoch": 0.9017536249845086, "grad_norm": 2.8159283180987824, "learning_rate": 5.172526253995174e-07, "loss": 0.6239, "step": 29105 }, { "epoch": 0.9019085388523981, "grad_norm": 2.851892131785361, "learning_rate": 5.164372839345117e-07, "loss": 0.693, "step": 29110 }, { "epoch": 0.9020634527202875, "grad_norm": 2.819830737235417, "learning_rate": 5.156219424695063e-07, "loss": 0.6016, "step": 29115 }, { "epoch": 0.902218366588177, "grad_norm": 2.4479564316007805, "learning_rate": 5.148066010045008e-07, "loss": 0.6315, "step": 29120 }, { "epoch": 0.9023732804560665, "grad_norm": 2.3662726482259124, "learning_rate": 5.139912595394952e-07, "loss": 0.582, "step": 29125 }, { "epoch": 0.9025281943239559, "grad_norm": 2.825321958537276, "learning_rate": 5.131759180744896e-07, "loss": 0.6835, "step": 29130 }, { "epoch": 0.9026831081918454, "grad_norm": 3.8860801992240077, "learning_rate": 5.123605766094841e-07, "loss": 0.7025, "step": 29135 }, { "epoch": 0.9028380220597347, "grad_norm": 2.256083739514183, "learning_rate": 5.115452351444786e-07, "loss": 0.6162, "step": 29140 }, { "epoch": 0.9029929359276242, "grad_norm": 2.1654648357989723, "learning_rate": 5.10729893679473e-07, "loss": 0.6876, "step": 29145 }, { "epoch": 0.9031478497955137, "grad_norm": 2.3597967475649093, "learning_rate": 5.099145522144674e-07, "loss": 0.629, "step": 29150 }, { "epoch": 0.9033027636634031, "grad_norm": 2.722168570160654, "learning_rate": 5.090992107494619e-07, "loss": 0.7094, "step": 29155 }, { "epoch": 0.9034576775312926, "grad_norm": 2.5739388769835254, "learning_rate": 5.082838692844564e-07, "loss": 0.6322, "step": 29160 }, { "epoch": 0.903612591399182, "grad_norm": 2.907288937005935, "learning_rate": 5.074685278194509e-07, "loss": 0.696, "step": 29165 }, { "epoch": 0.9037675052670715, "grad_norm": 2.8710355393492595, "learning_rate": 5.066531863544452e-07, "loss": 0.6142, "step": 29170 }, { "epoch": 0.903922419134961, "grad_norm": 2.316476767608357, "learning_rate": 5.058378448894397e-07, "loss": 0.6201, "step": 29175 }, { "epoch": 0.9040773330028504, "grad_norm": 2.8816435801974203, "learning_rate": 5.050225034244342e-07, "loss": 0.6678, "step": 29180 }, { "epoch": 0.9042322468707399, "grad_norm": 3.2928129107081485, "learning_rate": 5.042071619594287e-07, "loss": 0.5697, "step": 29185 }, { "epoch": 0.9043871607386293, "grad_norm": 2.9918750641335197, "learning_rate": 5.033918204944231e-07, "loss": 0.6763, "step": 29190 }, { "epoch": 0.9045420746065188, "grad_norm": 2.268483926529696, "learning_rate": 5.025764790294175e-07, "loss": 0.6592, "step": 29195 }, { "epoch": 0.9046969884744083, "grad_norm": 2.903005046813323, "learning_rate": 5.01761137564412e-07, "loss": 0.5876, "step": 29200 }, { "epoch": 0.9048519023422977, "grad_norm": 3.0619533238800463, "learning_rate": 5.009457960994065e-07, "loss": 0.6658, "step": 29205 }, { "epoch": 0.9050068162101871, "grad_norm": 2.277557997182056, "learning_rate": 5.001304546344009e-07, "loss": 0.6155, "step": 29210 }, { "epoch": 0.9051617300780765, "grad_norm": 2.739652687973796, "learning_rate": 4.993151131693954e-07, "loss": 0.6743, "step": 29215 }, { "epoch": 0.905316643945966, "grad_norm": 2.502780058485154, "learning_rate": 4.984997717043898e-07, "loss": 0.6495, "step": 29220 }, { "epoch": 0.9054715578138555, "grad_norm": 2.962550828273929, "learning_rate": 4.976844302393843e-07, "loss": 0.6036, "step": 29225 }, { "epoch": 0.9056264716817449, "grad_norm": 2.5050760465305477, "learning_rate": 4.968690887743788e-07, "loss": 0.5672, "step": 29230 }, { "epoch": 0.9057813855496344, "grad_norm": 2.8149396539655784, "learning_rate": 4.960537473093732e-07, "loss": 0.6169, "step": 29235 }, { "epoch": 0.9059362994175238, "grad_norm": 3.01811925451465, "learning_rate": 4.952384058443677e-07, "loss": 0.671, "step": 29240 }, { "epoch": 0.9060912132854133, "grad_norm": 2.7643211556428846, "learning_rate": 4.944230643793621e-07, "loss": 0.6305, "step": 29245 }, { "epoch": 0.9062461271533028, "grad_norm": 3.0186917114684535, "learning_rate": 4.936077229143566e-07, "loss": 0.71, "step": 29250 }, { "epoch": 0.9064010410211922, "grad_norm": 2.8200876014360596, "learning_rate": 4.92792381449351e-07, "loss": 0.6947, "step": 29255 }, { "epoch": 0.9065559548890817, "grad_norm": 2.5631989697328326, "learning_rate": 4.919770399843455e-07, "loss": 0.6639, "step": 29260 }, { "epoch": 0.9067108687569712, "grad_norm": 2.5471000426117083, "learning_rate": 4.9116169851934e-07, "loss": 0.7304, "step": 29265 }, { "epoch": 0.9068657826248606, "grad_norm": 2.3082473602113445, "learning_rate": 4.903463570543344e-07, "loss": 0.646, "step": 29270 }, { "epoch": 0.9070206964927501, "grad_norm": 2.461940908944687, "learning_rate": 4.895310155893288e-07, "loss": 0.7036, "step": 29275 }, { "epoch": 0.9071756103606395, "grad_norm": 2.7963252529906613, "learning_rate": 4.887156741243233e-07, "loss": 0.6155, "step": 29280 }, { "epoch": 0.9073305242285289, "grad_norm": 3.4224751745936546, "learning_rate": 4.879003326593178e-07, "loss": 0.6024, "step": 29285 }, { "epoch": 0.9074854380964184, "grad_norm": 5.367135470750747, "learning_rate": 4.870849911943123e-07, "loss": 0.6273, "step": 29290 }, { "epoch": 0.9076403519643078, "grad_norm": 3.0018105507357355, "learning_rate": 4.862696497293066e-07, "loss": 0.7605, "step": 29295 }, { "epoch": 0.9077952658321973, "grad_norm": 4.538405326626304, "learning_rate": 4.854543082643012e-07, "loss": 0.6547, "step": 29300 }, { "epoch": 0.9079501797000867, "grad_norm": 3.619279387362666, "learning_rate": 4.846389667992956e-07, "loss": 0.6436, "step": 29305 }, { "epoch": 0.9081050935679762, "grad_norm": 2.3497666681449485, "learning_rate": 4.838236253342901e-07, "loss": 0.6351, "step": 29310 }, { "epoch": 0.9082600074358657, "grad_norm": 4.123278854588435, "learning_rate": 4.830082838692845e-07, "loss": 0.6959, "step": 29315 }, { "epoch": 0.9084149213037551, "grad_norm": 2.238614483875399, "learning_rate": 4.821929424042789e-07, "loss": 0.6864, "step": 29320 }, { "epoch": 0.9085698351716446, "grad_norm": 2.478338944804345, "learning_rate": 4.813776009392735e-07, "loss": 0.5897, "step": 29325 }, { "epoch": 0.908724749039534, "grad_norm": 2.427725697134511, "learning_rate": 4.805622594742678e-07, "loss": 0.6814, "step": 29330 }, { "epoch": 0.9088796629074235, "grad_norm": 2.2963294403627588, "learning_rate": 4.797469180092623e-07, "loss": 0.6667, "step": 29335 }, { "epoch": 0.909034576775313, "grad_norm": 3.1990786410733993, "learning_rate": 4.789315765442567e-07, "loss": 0.6511, "step": 29340 }, { "epoch": 0.9091894906432024, "grad_norm": 3.0356519671356845, "learning_rate": 4.781162350792512e-07, "loss": 0.7304, "step": 29345 }, { "epoch": 0.9093444045110919, "grad_norm": 3.431443723661517, "learning_rate": 4.773008936142457e-07, "loss": 0.6431, "step": 29350 }, { "epoch": 0.9094993183789812, "grad_norm": 3.096225101539097, "learning_rate": 4.764855521492402e-07, "loss": 0.6162, "step": 29355 }, { "epoch": 0.9096542322468707, "grad_norm": 3.0397558301620884, "learning_rate": 4.756702106842346e-07, "loss": 0.5717, "step": 29360 }, { "epoch": 0.9098091461147602, "grad_norm": 2.6726823997876052, "learning_rate": 4.7485486921922904e-07, "loss": 0.6367, "step": 29365 }, { "epoch": 0.9099640599826496, "grad_norm": 2.864140422795398, "learning_rate": 4.740395277542235e-07, "loss": 0.7101, "step": 29370 }, { "epoch": 0.9101189738505391, "grad_norm": 2.443266367083815, "learning_rate": 4.7322418628921796e-07, "loss": 0.6551, "step": 29375 }, { "epoch": 0.9102738877184285, "grad_norm": 2.5399050810722215, "learning_rate": 4.724088448242124e-07, "loss": 0.6017, "step": 29380 }, { "epoch": 0.910428801586318, "grad_norm": 3.021649279344778, "learning_rate": 4.715935033592069e-07, "loss": 0.6639, "step": 29385 }, { "epoch": 0.9105837154542075, "grad_norm": 2.628939233319431, "learning_rate": 4.707781618942013e-07, "loss": 0.6943, "step": 29390 }, { "epoch": 0.9107386293220969, "grad_norm": 2.1627332352323383, "learning_rate": 4.699628204291958e-07, "loss": 0.6507, "step": 29395 }, { "epoch": 0.9108935431899864, "grad_norm": 4.3059656773098824, "learning_rate": 4.6914747896419026e-07, "loss": 0.668, "step": 29400 }, { "epoch": 0.9110484570578758, "grad_norm": 2.6232450348780403, "learning_rate": 4.683321374991847e-07, "loss": 0.6646, "step": 29405 }, { "epoch": 0.9112033709257653, "grad_norm": 3.639027828451219, "learning_rate": 4.675167960341791e-07, "loss": 0.603, "step": 29410 }, { "epoch": 0.9113582847936548, "grad_norm": 2.652718650508682, "learning_rate": 4.667014545691736e-07, "loss": 0.6686, "step": 29415 }, { "epoch": 0.9115131986615442, "grad_norm": 2.7258409081924917, "learning_rate": 4.658861131041681e-07, "loss": 0.6507, "step": 29420 }, { "epoch": 0.9116681125294336, "grad_norm": 4.266806908089846, "learning_rate": 4.6507077163916255e-07, "loss": 0.6453, "step": 29425 }, { "epoch": 0.911823026397323, "grad_norm": 2.7659302069510354, "learning_rate": 4.6425543017415696e-07, "loss": 0.6383, "step": 29430 }, { "epoch": 0.9119779402652125, "grad_norm": 2.9548702753512477, "learning_rate": 4.634400887091514e-07, "loss": 0.6413, "step": 29435 }, { "epoch": 0.912132854133102, "grad_norm": 3.246061306913716, "learning_rate": 4.626247472441459e-07, "loss": 0.7369, "step": 29440 }, { "epoch": 0.9122877680009914, "grad_norm": 2.2375122203939815, "learning_rate": 4.618094057791404e-07, "loss": 0.6289, "step": 29445 }, { "epoch": 0.9124426818688809, "grad_norm": 2.3025221589359264, "learning_rate": 4.609940643141348e-07, "loss": 0.5923, "step": 29450 }, { "epoch": 0.9125975957367704, "grad_norm": 2.817362619450808, "learning_rate": 4.6017872284912925e-07, "loss": 0.6487, "step": 29455 }, { "epoch": 0.9127525096046598, "grad_norm": 2.3769504599625066, "learning_rate": 4.593633813841237e-07, "loss": 0.7254, "step": 29460 }, { "epoch": 0.9129074234725493, "grad_norm": 2.764091044165748, "learning_rate": 4.585480399191182e-07, "loss": 0.6972, "step": 29465 }, { "epoch": 0.9130623373404387, "grad_norm": 3.6410123105225938, "learning_rate": 4.5773269845411263e-07, "loss": 0.5727, "step": 29470 }, { "epoch": 0.9132172512083282, "grad_norm": 2.5912426914451463, "learning_rate": 4.569173569891071e-07, "loss": 0.6635, "step": 29475 }, { "epoch": 0.9133721650762177, "grad_norm": 2.592588723283196, "learning_rate": 4.561020155241015e-07, "loss": 0.6141, "step": 29480 }, { "epoch": 0.9135270789441071, "grad_norm": 4.716849818717751, "learning_rate": 4.5528667405909595e-07, "loss": 0.7111, "step": 29485 }, { "epoch": 0.9136819928119966, "grad_norm": 2.234676141605445, "learning_rate": 4.5447133259409047e-07, "loss": 0.603, "step": 29490 }, { "epoch": 0.9138369066798859, "grad_norm": 2.5259476393145266, "learning_rate": 4.536559911290849e-07, "loss": 0.7286, "step": 29495 }, { "epoch": 0.9139918205477754, "grad_norm": 2.398222594315409, "learning_rate": 4.5284064966407933e-07, "loss": 0.6701, "step": 29500 }, { "epoch": 0.9141467344156649, "grad_norm": 2.0574788326029667, "learning_rate": 4.520253081990738e-07, "loss": 0.6117, "step": 29505 }, { "epoch": 0.9143016482835543, "grad_norm": 2.691360448246278, "learning_rate": 4.5120996673406825e-07, "loss": 0.6959, "step": 29510 }, { "epoch": 0.9144565621514438, "grad_norm": 2.7086416833740286, "learning_rate": 4.5039462526906276e-07, "loss": 0.6282, "step": 29515 }, { "epoch": 0.9146114760193332, "grad_norm": 3.0612893790839966, "learning_rate": 4.4957928380405717e-07, "loss": 0.6643, "step": 29520 }, { "epoch": 0.9147663898872227, "grad_norm": 2.9196302449188454, "learning_rate": 4.4876394233905163e-07, "loss": 0.6589, "step": 29525 }, { "epoch": 0.9149213037551122, "grad_norm": 2.6580027015928263, "learning_rate": 4.479486008740461e-07, "loss": 0.662, "step": 29530 }, { "epoch": 0.9150762176230016, "grad_norm": 2.71966837996242, "learning_rate": 4.471332594090405e-07, "loss": 0.7679, "step": 29535 }, { "epoch": 0.9152311314908911, "grad_norm": 2.77147771028353, "learning_rate": 4.46317917944035e-07, "loss": 0.6482, "step": 29540 }, { "epoch": 0.9153860453587805, "grad_norm": 2.10006144409714, "learning_rate": 4.4550257647902946e-07, "loss": 0.6155, "step": 29545 }, { "epoch": 0.91554095922667, "grad_norm": 2.868504812015983, "learning_rate": 4.446872350140239e-07, "loss": 0.5554, "step": 29550 }, { "epoch": 0.9156958730945595, "grad_norm": 4.126521351829811, "learning_rate": 4.4387189354901833e-07, "loss": 0.7327, "step": 29555 }, { "epoch": 0.9158507869624489, "grad_norm": 2.380128870738178, "learning_rate": 4.4305655208401284e-07, "loss": 0.6907, "step": 29560 }, { "epoch": 0.9160057008303383, "grad_norm": 2.6734457412618675, "learning_rate": 4.422412106190073e-07, "loss": 0.7002, "step": 29565 }, { "epoch": 0.9161606146982277, "grad_norm": 2.8532755779540646, "learning_rate": 4.4142586915400176e-07, "loss": 0.6308, "step": 29570 }, { "epoch": 0.9163155285661172, "grad_norm": 2.5291294680555216, "learning_rate": 4.4061052768899617e-07, "loss": 0.6036, "step": 29575 }, { "epoch": 0.9164704424340067, "grad_norm": 2.838173510385855, "learning_rate": 4.397951862239906e-07, "loss": 0.5913, "step": 29580 }, { "epoch": 0.9166253563018961, "grad_norm": 2.8348711771405575, "learning_rate": 4.3897984475898514e-07, "loss": 0.5783, "step": 29585 }, { "epoch": 0.9167802701697856, "grad_norm": 2.630411776541012, "learning_rate": 4.3816450329397954e-07, "loss": 0.6906, "step": 29590 }, { "epoch": 0.916935184037675, "grad_norm": 2.7695144844650623, "learning_rate": 4.37349161828974e-07, "loss": 0.5601, "step": 29595 }, { "epoch": 0.9170900979055645, "grad_norm": 3.1793346750552196, "learning_rate": 4.3653382036396846e-07, "loss": 0.6864, "step": 29600 }, { "epoch": 0.917245011773454, "grad_norm": 2.9930992661467974, "learning_rate": 4.3571847889896287e-07, "loss": 0.6344, "step": 29605 }, { "epoch": 0.9173999256413434, "grad_norm": 2.4900032833965438, "learning_rate": 4.349031374339574e-07, "loss": 0.5878, "step": 29610 }, { "epoch": 0.9175548395092329, "grad_norm": 2.6016889288854217, "learning_rate": 4.3408779596895184e-07, "loss": 0.6861, "step": 29615 }, { "epoch": 0.9177097533771224, "grad_norm": 2.5310009462732728, "learning_rate": 4.332724545039463e-07, "loss": 0.6512, "step": 29620 }, { "epoch": 0.9178646672450118, "grad_norm": 2.8731353733433225, "learning_rate": 4.324571130389407e-07, "loss": 0.711, "step": 29625 }, { "epoch": 0.9180195811129013, "grad_norm": 2.574812726312412, "learning_rate": 4.316417715739352e-07, "loss": 0.7417, "step": 29630 }, { "epoch": 0.9181744949807907, "grad_norm": 2.1372511624807005, "learning_rate": 4.308264301089297e-07, "loss": 0.6728, "step": 29635 }, { "epoch": 0.9183294088486801, "grad_norm": 2.4944058282343438, "learning_rate": 4.3001108864392413e-07, "loss": 0.6103, "step": 29640 }, { "epoch": 0.9184843227165695, "grad_norm": 2.51216589679615, "learning_rate": 4.2919574717891854e-07, "loss": 0.6401, "step": 29645 }, { "epoch": 0.918639236584459, "grad_norm": 2.9486422457689865, "learning_rate": 4.28380405713913e-07, "loss": 0.7003, "step": 29650 }, { "epoch": 0.9187941504523485, "grad_norm": 2.6673240483912157, "learning_rate": 4.275650642489075e-07, "loss": 0.6602, "step": 29655 }, { "epoch": 0.9189490643202379, "grad_norm": 3.15988503854348, "learning_rate": 4.2674972278390197e-07, "loss": 0.5804, "step": 29660 }, { "epoch": 0.9191039781881274, "grad_norm": 2.41099995269697, "learning_rate": 4.259343813188964e-07, "loss": 0.7185, "step": 29665 }, { "epoch": 0.9192588920560169, "grad_norm": 2.591375509553596, "learning_rate": 4.2511903985389084e-07, "loss": 0.6646, "step": 29670 }, { "epoch": 0.9194138059239063, "grad_norm": 2.572516593695783, "learning_rate": 4.243036983888853e-07, "loss": 0.6083, "step": 29675 }, { "epoch": 0.9195687197917958, "grad_norm": 2.366366522199376, "learning_rate": 4.234883569238798e-07, "loss": 0.6157, "step": 29680 }, { "epoch": 0.9197236336596852, "grad_norm": 2.788618847635024, "learning_rate": 4.226730154588742e-07, "loss": 0.7077, "step": 29685 }, { "epoch": 0.9198785475275747, "grad_norm": 2.7431024843803815, "learning_rate": 4.2185767399386867e-07, "loss": 0.6982, "step": 29690 }, { "epoch": 0.9200334613954642, "grad_norm": 3.2850363521562618, "learning_rate": 4.210423325288631e-07, "loss": 0.7139, "step": 29695 }, { "epoch": 0.9201883752633536, "grad_norm": 3.3001746173189326, "learning_rate": 4.2022699106385754e-07, "loss": 0.6445, "step": 29700 }, { "epoch": 0.9203432891312431, "grad_norm": 2.1379297927793477, "learning_rate": 4.1941164959885205e-07, "loss": 0.7372, "step": 29705 }, { "epoch": 0.9204982029991324, "grad_norm": 2.597795693843738, "learning_rate": 4.185963081338465e-07, "loss": 0.6019, "step": 29710 }, { "epoch": 0.9206531168670219, "grad_norm": 2.5424586121502597, "learning_rate": 4.177809666688409e-07, "loss": 0.6926, "step": 29715 }, { "epoch": 0.9208080307349114, "grad_norm": 2.651697150528861, "learning_rate": 4.169656252038354e-07, "loss": 0.6347, "step": 29720 }, { "epoch": 0.9209629446028008, "grad_norm": 3.7094200229013325, "learning_rate": 4.161502837388299e-07, "loss": 0.6123, "step": 29725 }, { "epoch": 0.9211178584706903, "grad_norm": 2.6424058450948373, "learning_rate": 4.1533494227382435e-07, "loss": 0.6567, "step": 29730 }, { "epoch": 0.9212727723385797, "grad_norm": 2.414984309395466, "learning_rate": 4.1451960080881875e-07, "loss": 0.6993, "step": 29735 }, { "epoch": 0.9214276862064692, "grad_norm": 2.5413952216898044, "learning_rate": 4.137042593438132e-07, "loss": 0.6682, "step": 29740 }, { "epoch": 0.9215826000743587, "grad_norm": 3.0716098276071424, "learning_rate": 4.1288891787880767e-07, "loss": 0.6392, "step": 29745 }, { "epoch": 0.9217375139422481, "grad_norm": 3.6435700965859255, "learning_rate": 4.120735764138022e-07, "loss": 0.7294, "step": 29750 }, { "epoch": 0.9218924278101376, "grad_norm": 2.075452490027155, "learning_rate": 4.112582349487966e-07, "loss": 0.6857, "step": 29755 }, { "epoch": 0.922047341678027, "grad_norm": 2.6364085525148564, "learning_rate": 4.1044289348379105e-07, "loss": 0.645, "step": 29760 }, { "epoch": 0.9222022555459165, "grad_norm": 2.6785433397224714, "learning_rate": 4.096275520187855e-07, "loss": 0.5506, "step": 29765 }, { "epoch": 0.922357169413806, "grad_norm": 3.3747540575492425, "learning_rate": 4.088122105537799e-07, "loss": 0.6311, "step": 29770 }, { "epoch": 0.9225120832816954, "grad_norm": 2.560951104678476, "learning_rate": 4.079968690887744e-07, "loss": 0.6273, "step": 29775 }, { "epoch": 0.9226669971495848, "grad_norm": 2.5342809980739553, "learning_rate": 4.071815276237689e-07, "loss": 0.602, "step": 29780 }, { "epoch": 0.9228219110174742, "grad_norm": 2.7397075295375197, "learning_rate": 4.0636618615876334e-07, "loss": 0.7783, "step": 29785 }, { "epoch": 0.9229768248853637, "grad_norm": 2.9510410318665974, "learning_rate": 4.0555084469375775e-07, "loss": 0.6468, "step": 29790 }, { "epoch": 0.9231317387532532, "grad_norm": 3.1529026713882775, "learning_rate": 4.0473550322875226e-07, "loss": 0.6295, "step": 29795 }, { "epoch": 0.9232866526211426, "grad_norm": 2.36603428477206, "learning_rate": 4.039201617637467e-07, "loss": 0.6835, "step": 29800 }, { "epoch": 0.9234415664890321, "grad_norm": 2.072695731917544, "learning_rate": 4.031048202987411e-07, "loss": 0.6035, "step": 29805 }, { "epoch": 0.9235964803569215, "grad_norm": 2.9893036689942343, "learning_rate": 4.022894788337356e-07, "loss": 0.7426, "step": 29810 }, { "epoch": 0.923751394224811, "grad_norm": 2.447352541169412, "learning_rate": 4.0147413736873004e-07, "loss": 0.6296, "step": 29815 }, { "epoch": 0.9239063080927005, "grad_norm": 2.668934553627414, "learning_rate": 4.0065879590372456e-07, "loss": 0.6699, "step": 29820 }, { "epoch": 0.9240612219605899, "grad_norm": 2.385518680538775, "learning_rate": 3.9984345443871896e-07, "loss": 0.6163, "step": 29825 }, { "epoch": 0.9242161358284794, "grad_norm": 2.5880617917968656, "learning_rate": 3.990281129737134e-07, "loss": 0.6145, "step": 29830 }, { "epoch": 0.9243710496963689, "grad_norm": 4.178531606810247, "learning_rate": 3.982127715087079e-07, "loss": 0.6441, "step": 29835 }, { "epoch": 0.9245259635642583, "grad_norm": 2.586759542912875, "learning_rate": 3.973974300437023e-07, "loss": 0.6393, "step": 29840 }, { "epoch": 0.9246808774321478, "grad_norm": 2.69682409029252, "learning_rate": 3.965820885786968e-07, "loss": 0.586, "step": 29845 }, { "epoch": 0.9248357913000371, "grad_norm": 2.830309432684121, "learning_rate": 3.9576674711369126e-07, "loss": 0.6711, "step": 29850 }, { "epoch": 0.9249907051679266, "grad_norm": 2.843214200280989, "learning_rate": 3.949514056486857e-07, "loss": 0.6136, "step": 29855 }, { "epoch": 0.925145619035816, "grad_norm": 3.026317229066577, "learning_rate": 3.941360641836801e-07, "loss": 0.6541, "step": 29860 }, { "epoch": 0.9253005329037055, "grad_norm": 2.4704443858930456, "learning_rate": 3.933207227186746e-07, "loss": 0.5929, "step": 29865 }, { "epoch": 0.925455446771595, "grad_norm": 3.048687360978743, "learning_rate": 3.925053812536691e-07, "loss": 0.7253, "step": 29870 }, { "epoch": 0.9256103606394844, "grad_norm": 2.660710953753187, "learning_rate": 3.9169003978866355e-07, "loss": 0.6612, "step": 29875 }, { "epoch": 0.9257652745073739, "grad_norm": 2.662582378943316, "learning_rate": 3.9087469832365796e-07, "loss": 0.606, "step": 29880 }, { "epoch": 0.9259201883752634, "grad_norm": 2.1650038560045695, "learning_rate": 3.900593568586524e-07, "loss": 0.6074, "step": 29885 }, { "epoch": 0.9260751022431528, "grad_norm": 3.799149371880499, "learning_rate": 3.8924401539364693e-07, "loss": 0.6347, "step": 29890 }, { "epoch": 0.9262300161110423, "grad_norm": 2.5847539625716496, "learning_rate": 3.884286739286414e-07, "loss": 0.6083, "step": 29895 }, { "epoch": 0.9263849299789317, "grad_norm": 2.5975391443280147, "learning_rate": 3.876133324636358e-07, "loss": 0.6392, "step": 29900 }, { "epoch": 0.9265398438468212, "grad_norm": 2.333788274807979, "learning_rate": 3.8679799099863026e-07, "loss": 0.6831, "step": 29905 }, { "epoch": 0.9266947577147107, "grad_norm": 3.1953499831186827, "learning_rate": 3.8598264953362466e-07, "loss": 0.6922, "step": 29910 }, { "epoch": 0.9268496715826001, "grad_norm": 3.003725381611021, "learning_rate": 3.851673080686192e-07, "loss": 0.5839, "step": 29915 }, { "epoch": 0.9270045854504896, "grad_norm": 2.6518600997358193, "learning_rate": 3.8435196660361363e-07, "loss": 0.6702, "step": 29920 }, { "epoch": 0.9271594993183789, "grad_norm": 2.999351164212627, "learning_rate": 3.835366251386081e-07, "loss": 0.6422, "step": 29925 }, { "epoch": 0.9273144131862684, "grad_norm": 2.3252314403211964, "learning_rate": 3.827212836736025e-07, "loss": 0.6384, "step": 29930 }, { "epoch": 0.9274693270541579, "grad_norm": 3.2429438518851668, "learning_rate": 3.8190594220859696e-07, "loss": 0.7002, "step": 29935 }, { "epoch": 0.9276242409220473, "grad_norm": 2.5361385841730244, "learning_rate": 3.8109060074359147e-07, "loss": 0.6066, "step": 29940 }, { "epoch": 0.9277791547899368, "grad_norm": 2.3724913662067126, "learning_rate": 3.8027525927858593e-07, "loss": 0.6489, "step": 29945 }, { "epoch": 0.9279340686578262, "grad_norm": 3.0416067203288093, "learning_rate": 3.7945991781358034e-07, "loss": 0.7518, "step": 29950 }, { "epoch": 0.9280889825257157, "grad_norm": 3.0799489914459275, "learning_rate": 3.786445763485748e-07, "loss": 0.7268, "step": 29955 }, { "epoch": 0.9282438963936052, "grad_norm": 2.000391545909553, "learning_rate": 3.778292348835693e-07, "loss": 0.6441, "step": 29960 }, { "epoch": 0.9283988102614946, "grad_norm": 2.572089932345012, "learning_rate": 3.7701389341856377e-07, "loss": 0.6931, "step": 29965 }, { "epoch": 0.9285537241293841, "grad_norm": 2.19486662737595, "learning_rate": 3.7619855195355817e-07, "loss": 0.6037, "step": 29970 }, { "epoch": 0.9287086379972735, "grad_norm": 2.642864742212506, "learning_rate": 3.7538321048855263e-07, "loss": 0.6192, "step": 29975 }, { "epoch": 0.928863551865163, "grad_norm": 2.5541402678443594, "learning_rate": 3.745678690235471e-07, "loss": 0.7063, "step": 29980 }, { "epoch": 0.9290184657330525, "grad_norm": 2.789641901321372, "learning_rate": 3.737525275585416e-07, "loss": 0.6447, "step": 29985 }, { "epoch": 0.9291733796009419, "grad_norm": 2.747605120709773, "learning_rate": 3.72937186093536e-07, "loss": 0.6493, "step": 29990 }, { "epoch": 0.9293282934688313, "grad_norm": 2.137530948246484, "learning_rate": 3.7212184462853047e-07, "loss": 0.6272, "step": 29995 }, { "epoch": 0.9294832073367207, "grad_norm": 2.7980709479095243, "learning_rate": 3.713065031635249e-07, "loss": 0.6773, "step": 30000 }, { "epoch": 0.9296381212046102, "grad_norm": 2.646767454704883, "learning_rate": 3.7049116169851933e-07, "loss": 0.7188, "step": 30005 }, { "epoch": 0.9297930350724997, "grad_norm": 2.739880560048276, "learning_rate": 3.6967582023351384e-07, "loss": 0.6405, "step": 30010 }, { "epoch": 0.9299479489403891, "grad_norm": 2.969596336050028, "learning_rate": 3.688604787685083e-07, "loss": 0.6934, "step": 30015 }, { "epoch": 0.9301028628082786, "grad_norm": 2.486394480787061, "learning_rate": 3.680451373035027e-07, "loss": 0.6657, "step": 30020 }, { "epoch": 0.930257776676168, "grad_norm": 1.987316585492585, "learning_rate": 3.6722979583849717e-07, "loss": 0.5814, "step": 30025 }, { "epoch": 0.9304126905440575, "grad_norm": 2.429196072310317, "learning_rate": 3.6641445437349163e-07, "loss": 0.6745, "step": 30030 }, { "epoch": 0.930567604411947, "grad_norm": 2.437680496833149, "learning_rate": 3.6559911290848614e-07, "loss": 0.67, "step": 30035 }, { "epoch": 0.9307225182798364, "grad_norm": 2.7025792059130773, "learning_rate": 3.6478377144348055e-07, "loss": 0.6314, "step": 30040 }, { "epoch": 0.9308774321477259, "grad_norm": 4.337630100407594, "learning_rate": 3.63968429978475e-07, "loss": 0.6254, "step": 30045 }, { "epoch": 0.9310323460156154, "grad_norm": 2.264984912026214, "learning_rate": 3.6315308851346946e-07, "loss": 0.6665, "step": 30050 }, { "epoch": 0.9311872598835048, "grad_norm": 2.697401099981177, "learning_rate": 3.62337747048464e-07, "loss": 0.7028, "step": 30055 }, { "epoch": 0.9313421737513943, "grad_norm": 2.472801838940025, "learning_rate": 3.615224055834584e-07, "loss": 0.6633, "step": 30060 }, { "epoch": 0.9314970876192836, "grad_norm": 2.8558241656053482, "learning_rate": 3.6070706411845284e-07, "loss": 0.6445, "step": 30065 }, { "epoch": 0.9316520014871731, "grad_norm": 2.673034332323307, "learning_rate": 3.598917226534473e-07, "loss": 0.6497, "step": 30070 }, { "epoch": 0.9318069153550625, "grad_norm": 2.975371223041257, "learning_rate": 3.590763811884417e-07, "loss": 0.6148, "step": 30075 }, { "epoch": 0.931961829222952, "grad_norm": 2.6956785178415097, "learning_rate": 3.582610397234362e-07, "loss": 0.6747, "step": 30080 }, { "epoch": 0.9321167430908415, "grad_norm": 4.460169707784335, "learning_rate": 3.574456982584307e-07, "loss": 0.6321, "step": 30085 }, { "epoch": 0.9322716569587309, "grad_norm": 2.9147684011777835, "learning_rate": 3.5663035679342514e-07, "loss": 0.7496, "step": 30090 }, { "epoch": 0.9324265708266204, "grad_norm": 2.434939740261513, "learning_rate": 3.5581501532841954e-07, "loss": 0.5965, "step": 30095 }, { "epoch": 0.9325814846945099, "grad_norm": 2.698044156503164, "learning_rate": 3.54999673863414e-07, "loss": 0.695, "step": 30100 }, { "epoch": 0.9327363985623993, "grad_norm": 3.0498255443560005, "learning_rate": 3.541843323984085e-07, "loss": 0.6828, "step": 30105 }, { "epoch": 0.9328913124302888, "grad_norm": 2.118165228289471, "learning_rate": 3.533689909334029e-07, "loss": 0.6169, "step": 30110 }, { "epoch": 0.9330462262981782, "grad_norm": 3.4365005882424113, "learning_rate": 3.525536494683974e-07, "loss": 0.7048, "step": 30115 }, { "epoch": 0.9332011401660677, "grad_norm": 3.489141684313873, "learning_rate": 3.5173830800339184e-07, "loss": 0.6207, "step": 30120 }, { "epoch": 0.9333560540339572, "grad_norm": 2.909406387706419, "learning_rate": 3.5092296653838635e-07, "loss": 0.6504, "step": 30125 }, { "epoch": 0.9335109679018466, "grad_norm": 2.664746333944914, "learning_rate": 3.5010762507338076e-07, "loss": 0.6322, "step": 30130 }, { "epoch": 0.933665881769736, "grad_norm": 2.6375619962993393, "learning_rate": 3.492922836083752e-07, "loss": 0.6514, "step": 30135 }, { "epoch": 0.9338207956376254, "grad_norm": 3.2186568327497516, "learning_rate": 3.484769421433697e-07, "loss": 0.6676, "step": 30140 }, { "epoch": 0.9339757095055149, "grad_norm": 2.3517752141790167, "learning_rate": 3.476616006783641e-07, "loss": 0.6412, "step": 30145 }, { "epoch": 0.9341306233734044, "grad_norm": 2.4852097437466885, "learning_rate": 3.468462592133586e-07, "loss": 0.6151, "step": 30150 }, { "epoch": 0.9342855372412938, "grad_norm": 2.895310067569968, "learning_rate": 3.4603091774835305e-07, "loss": 0.7117, "step": 30155 }, { "epoch": 0.9344404511091833, "grad_norm": 3.4878886082609246, "learning_rate": 3.452155762833475e-07, "loss": 0.6477, "step": 30160 }, { "epoch": 0.9345953649770727, "grad_norm": 2.634002703034323, "learning_rate": 3.444002348183419e-07, "loss": 0.7307, "step": 30165 }, { "epoch": 0.9347502788449622, "grad_norm": 2.7998696632680313, "learning_rate": 3.435848933533364e-07, "loss": 0.6206, "step": 30170 }, { "epoch": 0.9349051927128517, "grad_norm": 2.4806301991309345, "learning_rate": 3.427695518883309e-07, "loss": 0.626, "step": 30175 }, { "epoch": 0.9350601065807411, "grad_norm": 2.8063697322363494, "learning_rate": 3.4195421042332535e-07, "loss": 0.714, "step": 30180 }, { "epoch": 0.9352150204486306, "grad_norm": 2.7260250023852794, "learning_rate": 3.4113886895831975e-07, "loss": 0.6053, "step": 30185 }, { "epoch": 0.93536993431652, "grad_norm": 3.9652775952076857, "learning_rate": 3.403235274933142e-07, "loss": 0.7838, "step": 30190 }, { "epoch": 0.9355248481844095, "grad_norm": 2.721467572569529, "learning_rate": 3.3950818602830867e-07, "loss": 0.7064, "step": 30195 }, { "epoch": 0.935679762052299, "grad_norm": 2.70216138517962, "learning_rate": 3.386928445633032e-07, "loss": 0.6865, "step": 30200 }, { "epoch": 0.9358346759201884, "grad_norm": 2.556017576073894, "learning_rate": 3.378775030982976e-07, "loss": 0.6847, "step": 30205 }, { "epoch": 0.9359895897880778, "grad_norm": 2.9835029868595435, "learning_rate": 3.3706216163329205e-07, "loss": 0.5793, "step": 30210 }, { "epoch": 0.9361445036559672, "grad_norm": 2.6743275121768795, "learning_rate": 3.362468201682865e-07, "loss": 0.6405, "step": 30215 }, { "epoch": 0.9362994175238567, "grad_norm": 2.2205044746373406, "learning_rate": 3.3543147870328097e-07, "loss": 0.6369, "step": 30220 }, { "epoch": 0.9364543313917462, "grad_norm": 3.066250018981387, "learning_rate": 3.3461613723827543e-07, "loss": 0.6411, "step": 30225 }, { "epoch": 0.9366092452596356, "grad_norm": 3.9905152490133284, "learning_rate": 3.338007957732699e-07, "loss": 0.6339, "step": 30230 }, { "epoch": 0.9367641591275251, "grad_norm": 2.4633464261486453, "learning_rate": 3.329854543082643e-07, "loss": 0.6252, "step": 30235 }, { "epoch": 0.9369190729954145, "grad_norm": 2.8801173684762165, "learning_rate": 3.3217011284325875e-07, "loss": 0.642, "step": 30240 }, { "epoch": 0.937073986863304, "grad_norm": 3.3479602731415907, "learning_rate": 3.3135477137825326e-07, "loss": 0.6365, "step": 30245 }, { "epoch": 0.9372289007311935, "grad_norm": 2.27624943310664, "learning_rate": 3.305394299132477e-07, "loss": 0.639, "step": 30250 }, { "epoch": 0.9373838145990829, "grad_norm": 3.118283188163038, "learning_rate": 3.2972408844824213e-07, "loss": 0.6115, "step": 30255 }, { "epoch": 0.9375387284669724, "grad_norm": 4.251262320809837, "learning_rate": 3.289087469832366e-07, "loss": 0.607, "step": 30260 }, { "epoch": 0.9376936423348619, "grad_norm": 2.8035017009973595, "learning_rate": 3.2809340551823105e-07, "loss": 0.6942, "step": 30265 }, { "epoch": 0.9378485562027513, "grad_norm": 2.721458771818262, "learning_rate": 3.2727806405322556e-07, "loss": 0.6448, "step": 30270 }, { "epoch": 0.9380034700706408, "grad_norm": 2.8383720050782606, "learning_rate": 3.2646272258821997e-07, "loss": 0.6547, "step": 30275 }, { "epoch": 0.9381583839385301, "grad_norm": 2.5714550379598102, "learning_rate": 3.256473811232144e-07, "loss": 0.6355, "step": 30280 }, { "epoch": 0.9383132978064196, "grad_norm": 2.7622856165249647, "learning_rate": 3.248320396582089e-07, "loss": 0.6568, "step": 30285 }, { "epoch": 0.938468211674309, "grad_norm": 2.7496703460766128, "learning_rate": 3.240166981932034e-07, "loss": 0.5894, "step": 30290 }, { "epoch": 0.9386231255421985, "grad_norm": 3.0409801329139503, "learning_rate": 3.232013567281978e-07, "loss": 0.6109, "step": 30295 }, { "epoch": 0.938778039410088, "grad_norm": 2.801507739553363, "learning_rate": 3.2238601526319226e-07, "loss": 0.6325, "step": 30300 }, { "epoch": 0.9389329532779774, "grad_norm": 2.7381959846543866, "learning_rate": 3.215706737981867e-07, "loss": 0.6371, "step": 30305 }, { "epoch": 0.9390878671458669, "grad_norm": 2.3643953834504265, "learning_rate": 3.2075533233318113e-07, "loss": 0.6496, "step": 30310 }, { "epoch": 0.9392427810137564, "grad_norm": 2.5885659112264547, "learning_rate": 3.1993999086817564e-07, "loss": 0.6705, "step": 30315 }, { "epoch": 0.9393976948816458, "grad_norm": 2.3382941654853115, "learning_rate": 3.191246494031701e-07, "loss": 0.6188, "step": 30320 }, { "epoch": 0.9395526087495353, "grad_norm": 2.85616284341637, "learning_rate": 3.183093079381645e-07, "loss": 0.7051, "step": 30325 }, { "epoch": 0.9397075226174247, "grad_norm": 2.3467914429895487, "learning_rate": 3.1749396647315896e-07, "loss": 0.5917, "step": 30330 }, { "epoch": 0.9398624364853142, "grad_norm": 2.541327394000737, "learning_rate": 3.166786250081534e-07, "loss": 0.6485, "step": 30335 }, { "epoch": 0.9400173503532037, "grad_norm": 2.6250094835613966, "learning_rate": 3.1586328354314793e-07, "loss": 0.624, "step": 30340 }, { "epoch": 0.9401722642210931, "grad_norm": 2.62742097157659, "learning_rate": 3.1504794207814234e-07, "loss": 0.6709, "step": 30345 }, { "epoch": 0.9403271780889825, "grad_norm": 3.197527479217142, "learning_rate": 3.142326006131368e-07, "loss": 0.619, "step": 30350 }, { "epoch": 0.9404820919568719, "grad_norm": 2.4673706613961515, "learning_rate": 3.1341725914813126e-07, "loss": 0.6671, "step": 30355 }, { "epoch": 0.9406370058247614, "grad_norm": 2.5432257392356554, "learning_rate": 3.1260191768312567e-07, "loss": 0.6025, "step": 30360 }, { "epoch": 0.9407919196926509, "grad_norm": 2.720299360006603, "learning_rate": 3.117865762181202e-07, "loss": 0.5786, "step": 30365 }, { "epoch": 0.9409468335605403, "grad_norm": 2.6185932331541224, "learning_rate": 3.1097123475311464e-07, "loss": 0.6388, "step": 30370 }, { "epoch": 0.9411017474284298, "grad_norm": 2.8160309827768275, "learning_rate": 3.101558932881091e-07, "loss": 0.5955, "step": 30375 }, { "epoch": 0.9412566612963192, "grad_norm": 2.9674343454156125, "learning_rate": 3.0934055182310355e-07, "loss": 0.6427, "step": 30380 }, { "epoch": 0.9414115751642087, "grad_norm": 2.4882240785952208, "learning_rate": 3.08525210358098e-07, "loss": 0.6806, "step": 30385 }, { "epoch": 0.9415664890320982, "grad_norm": 2.058374470604461, "learning_rate": 3.077098688930924e-07, "loss": 0.6524, "step": 30390 }, { "epoch": 0.9417214028999876, "grad_norm": 2.814352065439632, "learning_rate": 3.0689452742808693e-07, "loss": 0.6939, "step": 30395 }, { "epoch": 0.9418763167678771, "grad_norm": 3.1346353604082573, "learning_rate": 3.0607918596308134e-07, "loss": 0.6783, "step": 30400 }, { "epoch": 0.9420312306357665, "grad_norm": 3.7466438753802187, "learning_rate": 3.0526384449807585e-07, "loss": 0.6595, "step": 30405 }, { "epoch": 0.942186144503656, "grad_norm": 3.116909181568274, "learning_rate": 3.0444850303307026e-07, "loss": 0.7282, "step": 30410 }, { "epoch": 0.9423410583715455, "grad_norm": 2.799016820612313, "learning_rate": 3.0363316156806477e-07, "loss": 0.7222, "step": 30415 }, { "epoch": 0.9424959722394348, "grad_norm": 3.918392192094032, "learning_rate": 3.028178201030592e-07, "loss": 0.6554, "step": 30420 }, { "epoch": 0.9426508861073243, "grad_norm": 3.2196213376113834, "learning_rate": 3.0200247863805363e-07, "loss": 0.6379, "step": 30425 }, { "epoch": 0.9428057999752137, "grad_norm": 2.7092855780926723, "learning_rate": 3.011871371730481e-07, "loss": 0.6561, "step": 30430 }, { "epoch": 0.9429607138431032, "grad_norm": 3.062213920776691, "learning_rate": 3.0037179570804255e-07, "loss": 0.6763, "step": 30435 }, { "epoch": 0.9431156277109927, "grad_norm": 2.4693540082120653, "learning_rate": 2.99556454243037e-07, "loss": 0.6054, "step": 30440 }, { "epoch": 0.9432705415788821, "grad_norm": 2.315341722620283, "learning_rate": 2.9874111277803147e-07, "loss": 0.6748, "step": 30445 }, { "epoch": 0.9434254554467716, "grad_norm": 2.551117909184723, "learning_rate": 2.9792577131302593e-07, "loss": 0.6341, "step": 30450 }, { "epoch": 0.943580369314661, "grad_norm": 2.876985951892894, "learning_rate": 2.971104298480204e-07, "loss": 0.7198, "step": 30455 }, { "epoch": 0.9437352831825505, "grad_norm": 2.834998010878716, "learning_rate": 2.962950883830148e-07, "loss": 0.5932, "step": 30460 }, { "epoch": 0.94389019705044, "grad_norm": 2.3197827865420884, "learning_rate": 2.954797469180093e-07, "loss": 0.5487, "step": 30465 }, { "epoch": 0.9440451109183294, "grad_norm": 2.9224049785214845, "learning_rate": 2.946644054530037e-07, "loss": 0.6955, "step": 30470 }, { "epoch": 0.9442000247862189, "grad_norm": 2.451457427861608, "learning_rate": 2.938490639879982e-07, "loss": 0.6349, "step": 30475 }, { "epoch": 0.9443549386541084, "grad_norm": 2.0448206148042174, "learning_rate": 2.9303372252299263e-07, "loss": 0.5775, "step": 30480 }, { "epoch": 0.9445098525219978, "grad_norm": 3.268775098992321, "learning_rate": 2.9221838105798714e-07, "loss": 0.6315, "step": 30485 }, { "epoch": 0.9446647663898873, "grad_norm": 3.4246150053612268, "learning_rate": 2.9140303959298155e-07, "loss": 0.6327, "step": 30490 }, { "epoch": 0.9448196802577766, "grad_norm": 2.79821785699283, "learning_rate": 2.90587698127976e-07, "loss": 0.5824, "step": 30495 }, { "epoch": 0.9449745941256661, "grad_norm": 3.1609809357292904, "learning_rate": 2.8977235666297047e-07, "loss": 0.6203, "step": 30500 }, { "epoch": 0.9451295079935556, "grad_norm": 3.194780438392983, "learning_rate": 2.8895701519796493e-07, "loss": 0.6006, "step": 30505 }, { "epoch": 0.945284421861445, "grad_norm": 3.8241750219770156, "learning_rate": 2.881416737329594e-07, "loss": 0.7132, "step": 30510 }, { "epoch": 0.9454393357293345, "grad_norm": 2.5914993889031073, "learning_rate": 2.8732633226795385e-07, "loss": 0.6732, "step": 30515 }, { "epoch": 0.9455942495972239, "grad_norm": 2.5973607443684634, "learning_rate": 2.865109908029483e-07, "loss": 0.6814, "step": 30520 }, { "epoch": 0.9457491634651134, "grad_norm": 2.4512053319346365, "learning_rate": 2.8569564933794276e-07, "loss": 0.6596, "step": 30525 }, { "epoch": 0.9459040773330029, "grad_norm": 3.1053908242497084, "learning_rate": 2.848803078729372e-07, "loss": 0.6838, "step": 30530 }, { "epoch": 0.9460589912008923, "grad_norm": 2.4743450126741857, "learning_rate": 2.840649664079317e-07, "loss": 0.6424, "step": 30535 }, { "epoch": 0.9462139050687818, "grad_norm": 2.7190627508092335, "learning_rate": 2.832496249429261e-07, "loss": 0.6789, "step": 30540 }, { "epoch": 0.9463688189366712, "grad_norm": 4.763721719406776, "learning_rate": 2.824342834779206e-07, "loss": 0.5955, "step": 30545 }, { "epoch": 0.9465237328045607, "grad_norm": 4.077417345336929, "learning_rate": 2.81618942012915e-07, "loss": 0.6034, "step": 30550 }, { "epoch": 0.9466786466724502, "grad_norm": 2.5084359325176475, "learning_rate": 2.8080360054790947e-07, "loss": 0.6621, "step": 30555 }, { "epoch": 0.9468335605403396, "grad_norm": 3.4766198632012437, "learning_rate": 2.799882590829039e-07, "loss": 0.7088, "step": 30560 }, { "epoch": 0.946988474408229, "grad_norm": 2.938535742197722, "learning_rate": 2.791729176178984e-07, "loss": 0.6124, "step": 30565 }, { "epoch": 0.9471433882761184, "grad_norm": 2.888142415883009, "learning_rate": 2.7835757615289284e-07, "loss": 0.6344, "step": 30570 }, { "epoch": 0.9472983021440079, "grad_norm": 2.230829316695861, "learning_rate": 2.775422346878873e-07, "loss": 0.6483, "step": 30575 }, { "epoch": 0.9474532160118974, "grad_norm": 2.97759031531184, "learning_rate": 2.7672689322288176e-07, "loss": 0.6036, "step": 30580 }, { "epoch": 0.9476081298797868, "grad_norm": 3.377217596727129, "learning_rate": 2.759115517578762e-07, "loss": 0.6929, "step": 30585 }, { "epoch": 0.9477630437476763, "grad_norm": 2.13392690479144, "learning_rate": 2.750962102928707e-07, "loss": 0.6215, "step": 30590 }, { "epoch": 0.9479179576155657, "grad_norm": 2.8458057488739747, "learning_rate": 2.7428086882786514e-07, "loss": 0.6906, "step": 30595 }, { "epoch": 0.9480728714834552, "grad_norm": 2.9917282580702733, "learning_rate": 2.734655273628596e-07, "loss": 0.7404, "step": 30600 }, { "epoch": 0.9482277853513447, "grad_norm": 2.93174050758522, "learning_rate": 2.7265018589785406e-07, "loss": 0.7244, "step": 30605 }, { "epoch": 0.9483826992192341, "grad_norm": 2.3116355005133493, "learning_rate": 2.718348444328485e-07, "loss": 0.6147, "step": 30610 }, { "epoch": 0.9485376130871236, "grad_norm": 2.19539163857213, "learning_rate": 2.71019502967843e-07, "loss": 0.6561, "step": 30615 }, { "epoch": 0.948692526955013, "grad_norm": 2.3166593823207142, "learning_rate": 2.7020416150283743e-07, "loss": 0.6277, "step": 30620 }, { "epoch": 0.9488474408229025, "grad_norm": 3.0020061359230166, "learning_rate": 2.6938882003783184e-07, "loss": 0.5486, "step": 30625 }, { "epoch": 0.949002354690792, "grad_norm": 2.7675497910293516, "learning_rate": 2.6857347857282635e-07, "loss": 0.6766, "step": 30630 }, { "epoch": 0.9491572685586813, "grad_norm": 2.8407461299623926, "learning_rate": 2.6775813710782076e-07, "loss": 0.7263, "step": 30635 }, { "epoch": 0.9493121824265708, "grad_norm": 3.5531349137948203, "learning_rate": 2.669427956428152e-07, "loss": 0.676, "step": 30640 }, { "epoch": 0.9494670962944602, "grad_norm": 3.745454716753459, "learning_rate": 2.661274541778097e-07, "loss": 0.6478, "step": 30645 }, { "epoch": 0.9496220101623497, "grad_norm": 2.5779599724812114, "learning_rate": 2.6531211271280414e-07, "loss": 0.685, "step": 30650 }, { "epoch": 0.9497769240302392, "grad_norm": 2.0803276084391897, "learning_rate": 2.644967712477986e-07, "loss": 0.6316, "step": 30655 }, { "epoch": 0.9499318378981286, "grad_norm": 2.6442156323550914, "learning_rate": 2.6368142978279305e-07, "loss": 0.6491, "step": 30660 }, { "epoch": 0.9500867517660181, "grad_norm": 2.45188480737573, "learning_rate": 2.628660883177875e-07, "loss": 0.684, "step": 30665 }, { "epoch": 0.9502416656339076, "grad_norm": 3.0417277158637455, "learning_rate": 2.6205074685278197e-07, "loss": 0.6801, "step": 30670 }, { "epoch": 0.950396579501797, "grad_norm": 2.469470099799005, "learning_rate": 2.6123540538777643e-07, "loss": 0.621, "step": 30675 }, { "epoch": 0.9505514933696865, "grad_norm": 2.5482942497327277, "learning_rate": 2.604200639227709e-07, "loss": 0.7045, "step": 30680 }, { "epoch": 0.9507064072375759, "grad_norm": 2.749229114358848, "learning_rate": 2.5960472245776535e-07, "loss": 0.7075, "step": 30685 }, { "epoch": 0.9508613211054654, "grad_norm": 3.2884739009004846, "learning_rate": 2.587893809927598e-07, "loss": 0.6426, "step": 30690 }, { "epoch": 0.9510162349733549, "grad_norm": 2.217532301814083, "learning_rate": 2.579740395277542e-07, "loss": 0.6508, "step": 30695 }, { "epoch": 0.9511711488412443, "grad_norm": 2.835991347114941, "learning_rate": 2.5715869806274873e-07, "loss": 0.6864, "step": 30700 }, { "epoch": 0.9513260627091337, "grad_norm": 2.8655743533967066, "learning_rate": 2.5634335659774313e-07, "loss": 0.6239, "step": 30705 }, { "epoch": 0.9514809765770231, "grad_norm": 2.2683195270305485, "learning_rate": 2.5552801513273764e-07, "loss": 0.6296, "step": 30710 }, { "epoch": 0.9516358904449126, "grad_norm": 2.0286783840999743, "learning_rate": 2.5471267366773205e-07, "loss": 0.6017, "step": 30715 }, { "epoch": 0.951790804312802, "grad_norm": 2.7300363778664383, "learning_rate": 2.538973322027265e-07, "loss": 0.6868, "step": 30720 }, { "epoch": 0.9519457181806915, "grad_norm": 2.568229242910183, "learning_rate": 2.5308199073772097e-07, "loss": 0.6164, "step": 30725 }, { "epoch": 0.952100632048581, "grad_norm": 3.72490441958618, "learning_rate": 2.5226664927271543e-07, "loss": 0.6692, "step": 30730 }, { "epoch": 0.9522555459164704, "grad_norm": 2.895001774006036, "learning_rate": 2.514513078077099e-07, "loss": 0.6786, "step": 30735 }, { "epoch": 0.9524104597843599, "grad_norm": 2.5155586992065087, "learning_rate": 2.5063596634270435e-07, "loss": 0.6674, "step": 30740 }, { "epoch": 0.9525653736522494, "grad_norm": 2.269462932388905, "learning_rate": 2.498206248776988e-07, "loss": 0.6863, "step": 30745 }, { "epoch": 0.9527202875201388, "grad_norm": 2.417486312109032, "learning_rate": 2.4900528341269326e-07, "loss": 0.587, "step": 30750 }, { "epoch": 0.9528752013880283, "grad_norm": 3.199129991269672, "learning_rate": 2.4818994194768767e-07, "loss": 0.7169, "step": 30755 }, { "epoch": 0.9530301152559177, "grad_norm": 2.3887706354132052, "learning_rate": 2.473746004826822e-07, "loss": 0.6495, "step": 30760 }, { "epoch": 0.9531850291238072, "grad_norm": 2.4569192735632277, "learning_rate": 2.465592590176766e-07, "loss": 0.6848, "step": 30765 }, { "epoch": 0.9533399429916967, "grad_norm": 3.1099622712223978, "learning_rate": 2.457439175526711e-07, "loss": 0.6323, "step": 30770 }, { "epoch": 0.953494856859586, "grad_norm": 2.3495616722916512, "learning_rate": 2.449285760876655e-07, "loss": 0.6017, "step": 30775 }, { "epoch": 0.9536497707274755, "grad_norm": 2.6871903761172775, "learning_rate": 2.4411323462266e-07, "loss": 0.6461, "step": 30780 }, { "epoch": 0.9538046845953649, "grad_norm": 2.720730918627384, "learning_rate": 2.432978931576544e-07, "loss": 0.6407, "step": 30785 }, { "epoch": 0.9539595984632544, "grad_norm": 2.7692422011698006, "learning_rate": 2.424825516926489e-07, "loss": 0.6549, "step": 30790 }, { "epoch": 0.9541145123311439, "grad_norm": 2.5433169378750744, "learning_rate": 2.4166721022764334e-07, "loss": 0.647, "step": 30795 }, { "epoch": 0.9542694261990333, "grad_norm": 2.731693454402691, "learning_rate": 2.408518687626378e-07, "loss": 0.636, "step": 30800 }, { "epoch": 0.9544243400669228, "grad_norm": 3.7271707060480503, "learning_rate": 2.4003652729763226e-07, "loss": 0.6932, "step": 30805 }, { "epoch": 0.9545792539348122, "grad_norm": 2.6355455723727923, "learning_rate": 2.392211858326267e-07, "loss": 0.6238, "step": 30810 }, { "epoch": 0.9547341678027017, "grad_norm": 2.603996361053804, "learning_rate": 2.384058443676212e-07, "loss": 0.6693, "step": 30815 }, { "epoch": 0.9548890816705912, "grad_norm": 2.4170334732262377, "learning_rate": 2.3759050290261564e-07, "loss": 0.6444, "step": 30820 }, { "epoch": 0.9550439955384806, "grad_norm": 2.688540820984469, "learning_rate": 2.3677516143761007e-07, "loss": 0.6395, "step": 30825 }, { "epoch": 0.9551989094063701, "grad_norm": 3.3451545504328974, "learning_rate": 2.3595981997260456e-07, "loss": 0.7108, "step": 30830 }, { "epoch": 0.9553538232742596, "grad_norm": 3.199832592886158, "learning_rate": 2.35144478507599e-07, "loss": 0.6275, "step": 30835 }, { "epoch": 0.955508737142149, "grad_norm": 2.307904062830733, "learning_rate": 2.3432913704259348e-07, "loss": 0.6495, "step": 30840 }, { "epoch": 0.9556636510100385, "grad_norm": 3.5861399222032433, "learning_rate": 2.335137955775879e-07, "loss": 0.6844, "step": 30845 }, { "epoch": 0.9558185648779278, "grad_norm": 2.768002252088757, "learning_rate": 2.326984541125824e-07, "loss": 0.6886, "step": 30850 }, { "epoch": 0.9559734787458173, "grad_norm": 2.565496734115969, "learning_rate": 2.3188311264757683e-07, "loss": 0.6572, "step": 30855 }, { "epoch": 0.9561283926137067, "grad_norm": 3.427263102395327, "learning_rate": 2.3106777118257126e-07, "loss": 0.6489, "step": 30860 }, { "epoch": 0.9562833064815962, "grad_norm": 4.8815880057068055, "learning_rate": 2.3025242971756575e-07, "loss": 0.6397, "step": 30865 }, { "epoch": 0.9564382203494857, "grad_norm": 2.0668906859565004, "learning_rate": 2.2943708825256018e-07, "loss": 0.5479, "step": 30870 }, { "epoch": 0.9565931342173751, "grad_norm": 2.651510220124164, "learning_rate": 2.2862174678755466e-07, "loss": 0.683, "step": 30875 }, { "epoch": 0.9567480480852646, "grad_norm": 2.771343535001805, "learning_rate": 2.278064053225491e-07, "loss": 0.6835, "step": 30880 }, { "epoch": 0.956902961953154, "grad_norm": 2.9104807174731544, "learning_rate": 2.2699106385754353e-07, "loss": 0.6112, "step": 30885 }, { "epoch": 0.9570578758210435, "grad_norm": 3.1728532613349807, "learning_rate": 2.2617572239253801e-07, "loss": 0.7014, "step": 30890 }, { "epoch": 0.957212789688933, "grad_norm": 2.2978056642832505, "learning_rate": 2.2536038092753245e-07, "loss": 0.5759, "step": 30895 }, { "epoch": 0.9573677035568224, "grad_norm": 2.66092412895382, "learning_rate": 2.2454503946252693e-07, "loss": 0.6542, "step": 30900 }, { "epoch": 0.9575226174247119, "grad_norm": 2.7976253560484263, "learning_rate": 2.2372969799752137e-07, "loss": 0.6789, "step": 30905 }, { "epoch": 0.9576775312926014, "grad_norm": 2.148534154682608, "learning_rate": 2.2291435653251585e-07, "loss": 0.6817, "step": 30910 }, { "epoch": 0.9578324451604908, "grad_norm": 2.991561966982517, "learning_rate": 2.2209901506751028e-07, "loss": 0.6122, "step": 30915 }, { "epoch": 0.9579873590283802, "grad_norm": 2.8195924530848364, "learning_rate": 2.2128367360250474e-07, "loss": 0.6425, "step": 30920 }, { "epoch": 0.9581422728962696, "grad_norm": 2.8970435539768085, "learning_rate": 2.204683321374992e-07, "loss": 0.7024, "step": 30925 }, { "epoch": 0.9582971867641591, "grad_norm": 3.581745904418852, "learning_rate": 2.1965299067249366e-07, "loss": 0.6261, "step": 30930 }, { "epoch": 0.9584521006320486, "grad_norm": 3.4250031148297864, "learning_rate": 2.1883764920748812e-07, "loss": 0.7338, "step": 30935 }, { "epoch": 0.958607014499938, "grad_norm": 3.177173711213831, "learning_rate": 2.1802230774248255e-07, "loss": 0.6456, "step": 30940 }, { "epoch": 0.9587619283678275, "grad_norm": 2.6597953999819937, "learning_rate": 2.1720696627747704e-07, "loss": 0.6494, "step": 30945 }, { "epoch": 0.9589168422357169, "grad_norm": 3.686735579050153, "learning_rate": 2.1639162481247147e-07, "loss": 0.5905, "step": 30950 }, { "epoch": 0.9590717561036064, "grad_norm": 3.968174747859255, "learning_rate": 2.1557628334746593e-07, "loss": 0.6488, "step": 30955 }, { "epoch": 0.9592266699714959, "grad_norm": 2.669753590142479, "learning_rate": 2.147609418824604e-07, "loss": 0.6385, "step": 30960 }, { "epoch": 0.9593815838393853, "grad_norm": 2.3651148077061617, "learning_rate": 2.1394560041745485e-07, "loss": 0.6639, "step": 30965 }, { "epoch": 0.9595364977072748, "grad_norm": 2.342985992678725, "learning_rate": 2.131302589524493e-07, "loss": 0.6779, "step": 30970 }, { "epoch": 0.9596914115751642, "grad_norm": 2.4866005249609215, "learning_rate": 2.1231491748744377e-07, "loss": 0.7615, "step": 30975 }, { "epoch": 0.9598463254430537, "grad_norm": 3.2264305845981918, "learning_rate": 2.1149957602243823e-07, "loss": 0.6294, "step": 30980 }, { "epoch": 0.9600012393109432, "grad_norm": 2.862970095119132, "learning_rate": 2.1068423455743266e-07, "loss": 0.6536, "step": 30985 }, { "epoch": 0.9601561531788325, "grad_norm": 2.8046137984387594, "learning_rate": 2.0986889309242712e-07, "loss": 0.6311, "step": 30990 }, { "epoch": 0.960311067046722, "grad_norm": 2.4248400178017, "learning_rate": 2.0905355162742158e-07, "loss": 0.6315, "step": 30995 }, { "epoch": 0.9604659809146114, "grad_norm": 2.5060824154540478, "learning_rate": 2.0823821016241604e-07, "loss": 0.6515, "step": 31000 }, { "epoch": 0.9606208947825009, "grad_norm": 3.103293076626594, "learning_rate": 2.074228686974105e-07, "loss": 0.6722, "step": 31005 }, { "epoch": 0.9607758086503904, "grad_norm": 3.5816497290560014, "learning_rate": 2.0660752723240495e-07, "loss": 0.6454, "step": 31010 }, { "epoch": 0.9609307225182798, "grad_norm": 2.8620313970138866, "learning_rate": 2.057921857673994e-07, "loss": 0.6563, "step": 31015 }, { "epoch": 0.9610856363861693, "grad_norm": 3.0953955558952604, "learning_rate": 2.0497684430239387e-07, "loss": 0.6138, "step": 31020 }, { "epoch": 0.9612405502540587, "grad_norm": 3.545797417116031, "learning_rate": 2.041615028373883e-07, "loss": 0.7185, "step": 31025 }, { "epoch": 0.9613954641219482, "grad_norm": 3.7266108345606646, "learning_rate": 2.033461613723828e-07, "loss": 0.6601, "step": 31030 }, { "epoch": 0.9615503779898377, "grad_norm": 4.184191225428226, "learning_rate": 2.0253081990737722e-07, "loss": 0.6246, "step": 31035 }, { "epoch": 0.9617052918577271, "grad_norm": 4.884620426345044, "learning_rate": 2.0171547844237168e-07, "loss": 0.6392, "step": 31040 }, { "epoch": 0.9618602057256166, "grad_norm": 2.709688215966512, "learning_rate": 2.0090013697736614e-07, "loss": 0.6739, "step": 31045 }, { "epoch": 0.962015119593506, "grad_norm": 2.704900376374912, "learning_rate": 2.0008479551236057e-07, "loss": 0.6679, "step": 31050 }, { "epoch": 0.9621700334613955, "grad_norm": 2.7849540628984872, "learning_rate": 1.9926945404735506e-07, "loss": 0.6664, "step": 31055 }, { "epoch": 0.9623249473292849, "grad_norm": 2.2185523843203914, "learning_rate": 1.984541125823495e-07, "loss": 0.5879, "step": 31060 }, { "epoch": 0.9624798611971743, "grad_norm": 2.75853310781113, "learning_rate": 1.9763877111734398e-07, "loss": 0.625, "step": 31065 }, { "epoch": 0.9626347750650638, "grad_norm": 3.323316838789909, "learning_rate": 1.968234296523384e-07, "loss": 0.7282, "step": 31070 }, { "epoch": 0.9627896889329532, "grad_norm": 2.3989328491633843, "learning_rate": 1.960080881873329e-07, "loss": 0.6561, "step": 31075 }, { "epoch": 0.9629446028008427, "grad_norm": 2.6801036849624125, "learning_rate": 1.9519274672232733e-07, "loss": 0.7821, "step": 31080 }, { "epoch": 0.9630995166687322, "grad_norm": 2.663764750850585, "learning_rate": 1.9437740525732176e-07, "loss": 0.6291, "step": 31085 }, { "epoch": 0.9632544305366216, "grad_norm": 2.679736147158197, "learning_rate": 1.9356206379231625e-07, "loss": 0.6615, "step": 31090 }, { "epoch": 0.9634093444045111, "grad_norm": 2.9480236973731877, "learning_rate": 1.9274672232731068e-07, "loss": 0.6427, "step": 31095 }, { "epoch": 0.9635642582724006, "grad_norm": 3.3050549719107645, "learning_rate": 1.9193138086230516e-07, "loss": 0.6302, "step": 31100 }, { "epoch": 0.96371917214029, "grad_norm": 2.5698570865532484, "learning_rate": 1.911160393972996e-07, "loss": 0.6874, "step": 31105 }, { "epoch": 0.9638740860081795, "grad_norm": 2.4949645022050073, "learning_rate": 1.9030069793229408e-07, "loss": 0.585, "step": 31110 }, { "epoch": 0.9640289998760689, "grad_norm": 2.509343421414211, "learning_rate": 1.8948535646728852e-07, "loss": 0.7021, "step": 31115 }, { "epoch": 0.9641839137439584, "grad_norm": 2.6347640124494838, "learning_rate": 1.8867001500228295e-07, "loss": 0.6232, "step": 31120 }, { "epoch": 0.9643388276118479, "grad_norm": 2.9126341118029986, "learning_rate": 1.8785467353727743e-07, "loss": 0.6653, "step": 31125 }, { "epoch": 0.9644937414797373, "grad_norm": 2.6137000040316383, "learning_rate": 1.8703933207227187e-07, "loss": 0.7148, "step": 31130 }, { "epoch": 0.9646486553476267, "grad_norm": 2.9478603055218815, "learning_rate": 1.8622399060726635e-07, "loss": 0.6674, "step": 31135 }, { "epoch": 0.9648035692155161, "grad_norm": 3.0013145200508027, "learning_rate": 1.8540864914226078e-07, "loss": 0.6516, "step": 31140 }, { "epoch": 0.9649584830834056, "grad_norm": 3.014124036734987, "learning_rate": 1.8459330767725527e-07, "loss": 0.637, "step": 31145 }, { "epoch": 0.965113396951295, "grad_norm": 2.6621798334600117, "learning_rate": 1.837779662122497e-07, "loss": 0.6321, "step": 31150 }, { "epoch": 0.9652683108191845, "grad_norm": 2.5484725970166537, "learning_rate": 1.8296262474724414e-07, "loss": 0.7263, "step": 31155 }, { "epoch": 0.965423224687074, "grad_norm": 2.939611829009823, "learning_rate": 1.8214728328223862e-07, "loss": 0.6836, "step": 31160 }, { "epoch": 0.9655781385549634, "grad_norm": 2.885873865680615, "learning_rate": 1.8133194181723305e-07, "loss": 0.6283, "step": 31165 }, { "epoch": 0.9657330524228529, "grad_norm": 4.026478137525267, "learning_rate": 1.8051660035222754e-07, "loss": 0.6559, "step": 31170 }, { "epoch": 0.9658879662907424, "grad_norm": 2.789430430644598, "learning_rate": 1.7970125888722197e-07, "loss": 0.6467, "step": 31175 }, { "epoch": 0.9660428801586318, "grad_norm": 4.25556077163631, "learning_rate": 1.7888591742221646e-07, "loss": 0.6554, "step": 31180 }, { "epoch": 0.9661977940265213, "grad_norm": 3.476921334182381, "learning_rate": 1.780705759572109e-07, "loss": 0.6285, "step": 31185 }, { "epoch": 0.9663527078944107, "grad_norm": 2.718343152432783, "learning_rate": 1.7725523449220535e-07, "loss": 0.6782, "step": 31190 }, { "epoch": 0.9665076217623002, "grad_norm": 2.7210565668710482, "learning_rate": 1.764398930271998e-07, "loss": 0.6103, "step": 31195 }, { "epoch": 0.9666625356301897, "grad_norm": 3.105414759353777, "learning_rate": 1.7562455156219424e-07, "loss": 0.7059, "step": 31200 }, { "epoch": 0.966817449498079, "grad_norm": 3.7023117559798995, "learning_rate": 1.7480921009718873e-07, "loss": 0.6136, "step": 31205 }, { "epoch": 0.9669723633659685, "grad_norm": 2.43747364232466, "learning_rate": 1.7399386863218316e-07, "loss": 0.658, "step": 31210 }, { "epoch": 0.9671272772338579, "grad_norm": 2.344546801277716, "learning_rate": 1.7317852716717762e-07, "loss": 0.675, "step": 31215 }, { "epoch": 0.9672821911017474, "grad_norm": 2.8339250575142056, "learning_rate": 1.7236318570217208e-07, "loss": 0.6154, "step": 31220 }, { "epoch": 0.9674371049696369, "grad_norm": 2.4542808022867, "learning_rate": 1.7154784423716654e-07, "loss": 0.6101, "step": 31225 }, { "epoch": 0.9675920188375263, "grad_norm": 3.1207869124018934, "learning_rate": 1.70732502772161e-07, "loss": 0.7076, "step": 31230 }, { "epoch": 0.9677469327054158, "grad_norm": 2.861366253266944, "learning_rate": 1.6991716130715546e-07, "loss": 0.6484, "step": 31235 }, { "epoch": 0.9679018465733052, "grad_norm": 2.9536494637490023, "learning_rate": 1.6910181984214991e-07, "loss": 0.6814, "step": 31240 }, { "epoch": 0.9680567604411947, "grad_norm": 2.8203665938543403, "learning_rate": 1.6828647837714437e-07, "loss": 0.7158, "step": 31245 }, { "epoch": 0.9682116743090842, "grad_norm": 2.927999517589271, "learning_rate": 1.674711369121388e-07, "loss": 0.699, "step": 31250 }, { "epoch": 0.9683665881769736, "grad_norm": 2.8287114249012197, "learning_rate": 1.6665579544713327e-07, "loss": 0.6924, "step": 31255 }, { "epoch": 0.9685215020448631, "grad_norm": 2.410865752764168, "learning_rate": 1.6584045398212772e-07, "loss": 0.5921, "step": 31260 }, { "epoch": 0.9686764159127526, "grad_norm": 4.749962690893085, "learning_rate": 1.6502511251712218e-07, "loss": 0.6454, "step": 31265 }, { "epoch": 0.968831329780642, "grad_norm": 2.784830389283282, "learning_rate": 1.6420977105211664e-07, "loss": 0.6336, "step": 31270 }, { "epoch": 0.9689862436485314, "grad_norm": 3.033221642427878, "learning_rate": 1.633944295871111e-07, "loss": 0.6773, "step": 31275 }, { "epoch": 0.9691411575164208, "grad_norm": 2.406459470680981, "learning_rate": 1.6257908812210556e-07, "loss": 0.6815, "step": 31280 }, { "epoch": 0.9692960713843103, "grad_norm": 6.155314738381247, "learning_rate": 1.617637466571e-07, "loss": 0.6153, "step": 31285 }, { "epoch": 0.9694509852521997, "grad_norm": 2.517360685851932, "learning_rate": 1.6094840519209448e-07, "loss": 0.6735, "step": 31290 }, { "epoch": 0.9696058991200892, "grad_norm": 4.007492536420191, "learning_rate": 1.601330637270889e-07, "loss": 0.6577, "step": 31295 }, { "epoch": 0.9697608129879787, "grad_norm": 2.4465550307854484, "learning_rate": 1.5931772226208337e-07, "loss": 0.6888, "step": 31300 }, { "epoch": 0.9699157268558681, "grad_norm": 3.6819751418272886, "learning_rate": 1.5850238079707783e-07, "loss": 0.6467, "step": 31305 }, { "epoch": 0.9700706407237576, "grad_norm": 2.5812535557159326, "learning_rate": 1.576870393320723e-07, "loss": 0.6834, "step": 31310 }, { "epoch": 0.970225554591647, "grad_norm": 3.1098551638893523, "learning_rate": 1.5687169786706675e-07, "loss": 0.6567, "step": 31315 }, { "epoch": 0.9703804684595365, "grad_norm": 2.7557362470562494, "learning_rate": 1.560563564020612e-07, "loss": 0.6519, "step": 31320 }, { "epoch": 0.970535382327426, "grad_norm": 2.903671139266342, "learning_rate": 1.5524101493705567e-07, "loss": 0.6688, "step": 31325 }, { "epoch": 0.9706902961953154, "grad_norm": 2.6976489213456847, "learning_rate": 1.544256734720501e-07, "loss": 0.7157, "step": 31330 }, { "epoch": 0.9708452100632049, "grad_norm": 2.3056328221332376, "learning_rate": 1.5361033200704456e-07, "loss": 0.6575, "step": 31335 }, { "epoch": 0.9710001239310944, "grad_norm": 2.825774563083986, "learning_rate": 1.5279499054203902e-07, "loss": 0.6283, "step": 31340 }, { "epoch": 0.9711550377989837, "grad_norm": 2.365107933672826, "learning_rate": 1.5197964907703348e-07, "loss": 0.6002, "step": 31345 }, { "epoch": 0.9713099516668732, "grad_norm": 2.3284911544403806, "learning_rate": 1.5116430761202794e-07, "loss": 0.7096, "step": 31350 }, { "epoch": 0.9714648655347626, "grad_norm": 2.6051297739966395, "learning_rate": 1.503489661470224e-07, "loss": 0.6487, "step": 31355 }, { "epoch": 0.9716197794026521, "grad_norm": 3.1261722866057435, "learning_rate": 1.4953362468201685e-07, "loss": 0.6162, "step": 31360 }, { "epoch": 0.9717746932705416, "grad_norm": 3.89559005461476, "learning_rate": 1.4871828321701129e-07, "loss": 0.7456, "step": 31365 }, { "epoch": 0.971929607138431, "grad_norm": 3.0460208927497945, "learning_rate": 1.4790294175200575e-07, "loss": 0.6472, "step": 31370 }, { "epoch": 0.9720845210063205, "grad_norm": 2.576137811488391, "learning_rate": 1.470876002870002e-07, "loss": 0.6715, "step": 31375 }, { "epoch": 0.9722394348742099, "grad_norm": 2.6143911348981805, "learning_rate": 1.4627225882199466e-07, "loss": 0.6739, "step": 31380 }, { "epoch": 0.9723943487420994, "grad_norm": 2.352999072657871, "learning_rate": 1.4545691735698912e-07, "loss": 0.5757, "step": 31385 }, { "epoch": 0.9725492626099889, "grad_norm": 2.0451085032926613, "learning_rate": 1.4464157589198358e-07, "loss": 0.5995, "step": 31390 }, { "epoch": 0.9727041764778783, "grad_norm": 3.16681847630082, "learning_rate": 1.4382623442697804e-07, "loss": 0.6559, "step": 31395 }, { "epoch": 0.9728590903457678, "grad_norm": 2.9786319276180104, "learning_rate": 1.4301089296197247e-07, "loss": 0.6931, "step": 31400 }, { "epoch": 0.9730140042136572, "grad_norm": 3.8086467172736245, "learning_rate": 1.4219555149696693e-07, "loss": 0.6616, "step": 31405 }, { "epoch": 0.9731689180815467, "grad_norm": 2.8687684612719915, "learning_rate": 1.413802100319614e-07, "loss": 0.7118, "step": 31410 }, { "epoch": 0.9733238319494362, "grad_norm": 2.7691055444830006, "learning_rate": 1.4056486856695585e-07, "loss": 0.6864, "step": 31415 }, { "epoch": 0.9734787458173255, "grad_norm": 2.7513498833038565, "learning_rate": 1.397495271019503e-07, "loss": 0.6396, "step": 31420 }, { "epoch": 0.973633659685215, "grad_norm": 2.194276322719706, "learning_rate": 1.3893418563694477e-07, "loss": 0.6057, "step": 31425 }, { "epoch": 0.9737885735531044, "grad_norm": 3.3148200127756455, "learning_rate": 1.381188441719392e-07, "loss": 0.6794, "step": 31430 }, { "epoch": 0.9739434874209939, "grad_norm": 3.8757796506943087, "learning_rate": 1.3730350270693366e-07, "loss": 0.6172, "step": 31435 }, { "epoch": 0.9740984012888834, "grad_norm": 2.579585496499453, "learning_rate": 1.3648816124192812e-07, "loss": 0.5674, "step": 31440 }, { "epoch": 0.9742533151567728, "grad_norm": 3.746647770752161, "learning_rate": 1.3567281977692258e-07, "loss": 0.6314, "step": 31445 }, { "epoch": 0.9744082290246623, "grad_norm": 2.619994865206916, "learning_rate": 1.3485747831191704e-07, "loss": 0.6568, "step": 31450 }, { "epoch": 0.9745631428925517, "grad_norm": 3.788322414804284, "learning_rate": 1.340421368469115e-07, "loss": 0.668, "step": 31455 }, { "epoch": 0.9747180567604412, "grad_norm": 2.8649765271045555, "learning_rate": 1.3322679538190596e-07, "loss": 0.6887, "step": 31460 }, { "epoch": 0.9748729706283307, "grad_norm": 2.4096009491050667, "learning_rate": 1.324114539169004e-07, "loss": 0.6543, "step": 31465 }, { "epoch": 0.9750278844962201, "grad_norm": 3.024880033546654, "learning_rate": 1.3159611245189485e-07, "loss": 0.653, "step": 31470 }, { "epoch": 0.9751827983641096, "grad_norm": 3.085273908563673, "learning_rate": 1.307807709868893e-07, "loss": 0.731, "step": 31475 }, { "epoch": 0.975337712231999, "grad_norm": 3.495326476932105, "learning_rate": 1.2996542952188377e-07, "loss": 0.7193, "step": 31480 }, { "epoch": 0.9754926260998885, "grad_norm": 2.568386301683636, "learning_rate": 1.2915008805687823e-07, "loss": 0.6232, "step": 31485 }, { "epoch": 0.9756475399677779, "grad_norm": 4.2150451450022866, "learning_rate": 1.2833474659187269e-07, "loss": 0.7273, "step": 31490 }, { "epoch": 0.9758024538356673, "grad_norm": 2.5968182786714933, "learning_rate": 1.2751940512686714e-07, "loss": 0.7572, "step": 31495 }, { "epoch": 0.9759573677035568, "grad_norm": 2.357593500809346, "learning_rate": 1.267040636618616e-07, "loss": 0.6479, "step": 31500 }, { "epoch": 0.9761122815714462, "grad_norm": 2.2751564038238588, "learning_rate": 1.2588872219685606e-07, "loss": 0.6999, "step": 31505 }, { "epoch": 0.9762671954393357, "grad_norm": 4.255799750327212, "learning_rate": 1.2507338073185052e-07, "loss": 0.6498, "step": 31510 }, { "epoch": 0.9764221093072252, "grad_norm": 2.847789802940729, "learning_rate": 1.2425803926684495e-07, "loss": 0.646, "step": 31515 }, { "epoch": 0.9765770231751146, "grad_norm": 2.702711414335099, "learning_rate": 1.2344269780183941e-07, "loss": 0.6754, "step": 31520 }, { "epoch": 0.9767319370430041, "grad_norm": 3.2554034589162844, "learning_rate": 1.2262735633683387e-07, "loss": 0.6323, "step": 31525 }, { "epoch": 0.9768868509108936, "grad_norm": 4.281088744458597, "learning_rate": 1.2181201487182833e-07, "loss": 0.7171, "step": 31530 }, { "epoch": 0.977041764778783, "grad_norm": 2.9042978104146857, "learning_rate": 1.209966734068228e-07, "loss": 0.6319, "step": 31535 }, { "epoch": 0.9771966786466725, "grad_norm": 3.0897854797977953, "learning_rate": 1.2018133194181725e-07, "loss": 0.6966, "step": 31540 }, { "epoch": 0.9773515925145619, "grad_norm": 2.6404438882351524, "learning_rate": 1.193659904768117e-07, "loss": 0.5417, "step": 31545 }, { "epoch": 0.9775065063824514, "grad_norm": 1.9415976153196823, "learning_rate": 1.1855064901180615e-07, "loss": 0.6513, "step": 31550 }, { "epoch": 0.9776614202503409, "grad_norm": 3.391160168814616, "learning_rate": 1.1773530754680061e-07, "loss": 0.6462, "step": 31555 }, { "epoch": 0.9778163341182302, "grad_norm": 2.505082903348405, "learning_rate": 1.1691996608179507e-07, "loss": 0.6436, "step": 31560 }, { "epoch": 0.9779712479861197, "grad_norm": 2.4384072180105387, "learning_rate": 1.1610462461678952e-07, "loss": 0.6191, "step": 31565 }, { "epoch": 0.9781261618540091, "grad_norm": 2.796236226144855, "learning_rate": 1.1528928315178398e-07, "loss": 0.6387, "step": 31570 }, { "epoch": 0.9782810757218986, "grad_norm": 2.839924511571478, "learning_rate": 1.1447394168677844e-07, "loss": 0.5941, "step": 31575 }, { "epoch": 0.9784359895897881, "grad_norm": 3.241423900179697, "learning_rate": 1.1365860022177288e-07, "loss": 0.6642, "step": 31580 }, { "epoch": 0.9785909034576775, "grad_norm": 3.1333238773653243, "learning_rate": 1.1284325875676734e-07, "loss": 0.6518, "step": 31585 }, { "epoch": 0.978745817325567, "grad_norm": 4.069503337333774, "learning_rate": 1.120279172917618e-07, "loss": 0.6046, "step": 31590 }, { "epoch": 0.9789007311934564, "grad_norm": 3.074618091184679, "learning_rate": 1.1121257582675625e-07, "loss": 0.6742, "step": 31595 }, { "epoch": 0.9790556450613459, "grad_norm": 2.2851823991763798, "learning_rate": 1.103972343617507e-07, "loss": 0.6369, "step": 31600 }, { "epoch": 0.9792105589292354, "grad_norm": 2.21621355934729, "learning_rate": 1.0958189289674517e-07, "loss": 0.6572, "step": 31605 }, { "epoch": 0.9793654727971248, "grad_norm": 2.95986722564244, "learning_rate": 1.0876655143173962e-07, "loss": 0.682, "step": 31610 }, { "epoch": 0.9795203866650143, "grad_norm": 2.8938264255799866, "learning_rate": 1.0795120996673408e-07, "loss": 0.6428, "step": 31615 }, { "epoch": 0.9796753005329037, "grad_norm": 2.370893201405932, "learning_rate": 1.0713586850172854e-07, "loss": 0.6561, "step": 31620 }, { "epoch": 0.9798302144007932, "grad_norm": 2.47061523753149, "learning_rate": 1.06320527036723e-07, "loss": 0.6681, "step": 31625 }, { "epoch": 0.9799851282686826, "grad_norm": 2.8028424063566564, "learning_rate": 1.0550518557171743e-07, "loss": 0.7031, "step": 31630 }, { "epoch": 0.980140042136572, "grad_norm": 2.3453326579729397, "learning_rate": 1.046898441067119e-07, "loss": 0.716, "step": 31635 }, { "epoch": 0.9802949560044615, "grad_norm": 2.093206059964019, "learning_rate": 1.0387450264170635e-07, "loss": 0.6805, "step": 31640 }, { "epoch": 0.9804498698723509, "grad_norm": 2.9737560472096343, "learning_rate": 1.0305916117670081e-07, "loss": 0.6844, "step": 31645 }, { "epoch": 0.9806047837402404, "grad_norm": 2.9496435302293023, "learning_rate": 1.0224381971169527e-07, "loss": 0.6743, "step": 31650 }, { "epoch": 0.9807596976081299, "grad_norm": 2.889702563535447, "learning_rate": 1.0142847824668973e-07, "loss": 0.5971, "step": 31655 }, { "epoch": 0.9809146114760193, "grad_norm": 3.963794509599309, "learning_rate": 1.0061313678168416e-07, "loss": 0.6906, "step": 31660 }, { "epoch": 0.9810695253439088, "grad_norm": 2.986288044975828, "learning_rate": 9.979779531667862e-08, "loss": 0.6452, "step": 31665 }, { "epoch": 0.9812244392117982, "grad_norm": 2.495040280083125, "learning_rate": 9.898245385167308e-08, "loss": 0.7164, "step": 31670 }, { "epoch": 0.9813793530796877, "grad_norm": 3.0566775832802646, "learning_rate": 9.816711238666754e-08, "loss": 0.6238, "step": 31675 }, { "epoch": 0.9815342669475772, "grad_norm": 2.6117514305842793, "learning_rate": 9.7351770921662e-08, "loss": 0.6065, "step": 31680 }, { "epoch": 0.9816891808154666, "grad_norm": 2.640776948412809, "learning_rate": 9.653642945665646e-08, "loss": 0.6839, "step": 31685 }, { "epoch": 0.9818440946833561, "grad_norm": 2.4686429495785824, "learning_rate": 9.572108799165092e-08, "loss": 0.651, "step": 31690 }, { "epoch": 0.9819990085512456, "grad_norm": 2.3073349718385336, "learning_rate": 9.490574652664536e-08, "loss": 0.6422, "step": 31695 }, { "epoch": 0.9821539224191349, "grad_norm": 3.145410322666235, "learning_rate": 9.409040506163982e-08, "loss": 0.5727, "step": 31700 }, { "epoch": 0.9823088362870244, "grad_norm": 2.8741692017020903, "learning_rate": 9.327506359663428e-08, "loss": 0.6497, "step": 31705 }, { "epoch": 0.9824637501549138, "grad_norm": 3.3104345755240328, "learning_rate": 9.245972213162873e-08, "loss": 0.6862, "step": 31710 }, { "epoch": 0.9826186640228033, "grad_norm": 2.434380208067471, "learning_rate": 9.164438066662319e-08, "loss": 0.593, "step": 31715 }, { "epoch": 0.9827735778906928, "grad_norm": 2.821088301694663, "learning_rate": 9.082903920161765e-08, "loss": 0.5973, "step": 31720 }, { "epoch": 0.9829284917585822, "grad_norm": 2.4251415492627455, "learning_rate": 9.00136977366121e-08, "loss": 0.7054, "step": 31725 }, { "epoch": 0.9830834056264717, "grad_norm": 2.866592830359721, "learning_rate": 8.919835627160655e-08, "loss": 0.6775, "step": 31730 }, { "epoch": 0.9832383194943611, "grad_norm": 3.5414068916071204, "learning_rate": 8.838301480660101e-08, "loss": 0.6543, "step": 31735 }, { "epoch": 0.9833932333622506, "grad_norm": 2.4365163091436344, "learning_rate": 8.756767334159547e-08, "loss": 0.6097, "step": 31740 }, { "epoch": 0.9835481472301401, "grad_norm": 2.5853478500368356, "learning_rate": 8.675233187658993e-08, "loss": 0.617, "step": 31745 }, { "epoch": 0.9837030610980295, "grad_norm": 2.961565703928924, "learning_rate": 8.593699041158439e-08, "loss": 0.6551, "step": 31750 }, { "epoch": 0.983857974965919, "grad_norm": 2.2633766139116847, "learning_rate": 8.512164894657885e-08, "loss": 0.6891, "step": 31755 }, { "epoch": 0.9840128888338084, "grad_norm": 3.360362148107893, "learning_rate": 8.430630748157328e-08, "loss": 0.6692, "step": 31760 }, { "epoch": 0.9841678027016979, "grad_norm": 2.5356436912685867, "learning_rate": 8.349096601656774e-08, "loss": 0.6422, "step": 31765 }, { "epoch": 0.9843227165695874, "grad_norm": 3.2520395884681212, "learning_rate": 8.26756245515622e-08, "loss": 0.6786, "step": 31770 }, { "epoch": 0.9844776304374767, "grad_norm": 3.1900612328503937, "learning_rate": 8.186028308655666e-08, "loss": 0.6759, "step": 31775 }, { "epoch": 0.9846325443053662, "grad_norm": 2.9352858091132936, "learning_rate": 8.104494162155112e-08, "loss": 0.6077, "step": 31780 }, { "epoch": 0.9847874581732556, "grad_norm": 2.970092833082034, "learning_rate": 8.022960015654557e-08, "loss": 0.6283, "step": 31785 }, { "epoch": 0.9849423720411451, "grad_norm": 2.7783988204438685, "learning_rate": 7.941425869154003e-08, "loss": 0.7113, "step": 31790 }, { "epoch": 0.9850972859090346, "grad_norm": 3.0635249256086055, "learning_rate": 7.859891722653447e-08, "loss": 0.5976, "step": 31795 }, { "epoch": 0.985252199776924, "grad_norm": 2.759400712784427, "learning_rate": 7.778357576152894e-08, "loss": 0.6742, "step": 31800 }, { "epoch": 0.9854071136448135, "grad_norm": 2.9769714952785375, "learning_rate": 7.696823429652338e-08, "loss": 0.6571, "step": 31805 }, { "epoch": 0.9855620275127029, "grad_norm": 2.4639456166350984, "learning_rate": 7.615289283151784e-08, "loss": 0.6501, "step": 31810 }, { "epoch": 0.9857169413805924, "grad_norm": 2.6126807166584327, "learning_rate": 7.53375513665123e-08, "loss": 0.7439, "step": 31815 }, { "epoch": 0.9858718552484819, "grad_norm": 2.9214133377472455, "learning_rate": 7.452220990150675e-08, "loss": 0.6353, "step": 31820 }, { "epoch": 0.9860267691163713, "grad_norm": 2.4186091595018384, "learning_rate": 7.370686843650121e-08, "loss": 0.6605, "step": 31825 }, { "epoch": 0.9861816829842608, "grad_norm": 2.768980305156183, "learning_rate": 7.289152697149567e-08, "loss": 0.6527, "step": 31830 }, { "epoch": 0.9863365968521502, "grad_norm": 3.1109897873550105, "learning_rate": 7.207618550649013e-08, "loss": 0.7237, "step": 31835 }, { "epoch": 0.9864915107200397, "grad_norm": 3.3227652799704126, "learning_rate": 7.126084404148459e-08, "loss": 0.6145, "step": 31840 }, { "epoch": 0.9866464245879291, "grad_norm": 2.5774239256853053, "learning_rate": 7.044550257647903e-08, "loss": 0.6773, "step": 31845 }, { "epoch": 0.9868013384558185, "grad_norm": 2.691325125906663, "learning_rate": 6.963016111147349e-08, "loss": 0.6708, "step": 31850 }, { "epoch": 0.986956252323708, "grad_norm": 3.827792654379482, "learning_rate": 6.881481964646795e-08, "loss": 0.732, "step": 31855 }, { "epoch": 0.9871111661915974, "grad_norm": 3.149605273733546, "learning_rate": 6.799947818146241e-08, "loss": 0.7029, "step": 31860 }, { "epoch": 0.9872660800594869, "grad_norm": 2.897334188796466, "learning_rate": 6.718413671645687e-08, "loss": 0.6357, "step": 31865 }, { "epoch": 0.9874209939273764, "grad_norm": 2.587594354830957, "learning_rate": 6.636879525145131e-08, "loss": 0.6855, "step": 31870 }, { "epoch": 0.9875759077952658, "grad_norm": 2.768876089072076, "learning_rate": 6.555345378644577e-08, "loss": 0.5599, "step": 31875 }, { "epoch": 0.9877308216631553, "grad_norm": 2.4731083180295923, "learning_rate": 6.473811232144023e-08, "loss": 0.7105, "step": 31880 }, { "epoch": 0.9878857355310448, "grad_norm": 2.408887900437404, "learning_rate": 6.392277085643468e-08, "loss": 0.6583, "step": 31885 }, { "epoch": 0.9880406493989342, "grad_norm": 2.591472174372957, "learning_rate": 6.310742939142914e-08, "loss": 0.7042, "step": 31890 }, { "epoch": 0.9881955632668237, "grad_norm": 2.2044755534170055, "learning_rate": 6.22920879264236e-08, "loss": 0.6643, "step": 31895 }, { "epoch": 0.9883504771347131, "grad_norm": 2.8123307402829534, "learning_rate": 6.147674646141804e-08, "loss": 0.6896, "step": 31900 }, { "epoch": 0.9885053910026026, "grad_norm": 3.4202742173148617, "learning_rate": 6.06614049964125e-08, "loss": 0.7117, "step": 31905 }, { "epoch": 0.9886603048704921, "grad_norm": 2.8369119313008446, "learning_rate": 5.984606353140696e-08, "loss": 0.6548, "step": 31910 }, { "epoch": 0.9888152187383814, "grad_norm": 3.191628023846625, "learning_rate": 5.903072206640142e-08, "loss": 0.7293, "step": 31915 }, { "epoch": 0.9889701326062709, "grad_norm": 2.735871954692738, "learning_rate": 5.8215380601395865e-08, "loss": 0.661, "step": 31920 }, { "epoch": 0.9891250464741603, "grad_norm": 2.6097997418211887, "learning_rate": 5.7400039136390324e-08, "loss": 0.6829, "step": 31925 }, { "epoch": 0.9892799603420498, "grad_norm": 2.99234151633204, "learning_rate": 5.658469767138478e-08, "loss": 0.6686, "step": 31930 }, { "epoch": 0.9894348742099393, "grad_norm": 2.8143074133824113, "learning_rate": 5.5769356206379236e-08, "loss": 0.6281, "step": 31935 }, { "epoch": 0.9895897880778287, "grad_norm": 2.6428994576235256, "learning_rate": 5.4954014741373695e-08, "loss": 0.6995, "step": 31940 }, { "epoch": 0.9897447019457182, "grad_norm": 2.3549644226410096, "learning_rate": 5.413867327636815e-08, "loss": 0.6422, "step": 31945 }, { "epoch": 0.9898996158136076, "grad_norm": 4.085848892749291, "learning_rate": 5.33233318113626e-08, "loss": 0.6174, "step": 31950 }, { "epoch": 0.9900545296814971, "grad_norm": 3.3578257712971764, "learning_rate": 5.250799034635706e-08, "loss": 0.5984, "step": 31955 }, { "epoch": 0.9902094435493866, "grad_norm": 2.6689876029990294, "learning_rate": 5.169264888135152e-08, "loss": 0.7286, "step": 31960 }, { "epoch": 0.990364357417276, "grad_norm": 2.565437323932627, "learning_rate": 5.087730741634598e-08, "loss": 0.6993, "step": 31965 }, { "epoch": 0.9905192712851655, "grad_norm": 2.82057432826121, "learning_rate": 5.006196595134042e-08, "loss": 0.7283, "step": 31970 }, { "epoch": 0.9906741851530549, "grad_norm": 2.50177916064835, "learning_rate": 4.924662448633488e-08, "loss": 0.6231, "step": 31975 }, { "epoch": 0.9908290990209444, "grad_norm": 2.189735477371157, "learning_rate": 4.843128302132934e-08, "loss": 0.6702, "step": 31980 }, { "epoch": 0.9909840128888338, "grad_norm": 2.5331606846495425, "learning_rate": 4.761594155632379e-08, "loss": 0.5933, "step": 31985 }, { "epoch": 0.9911389267567232, "grad_norm": 2.630049153199961, "learning_rate": 4.6800600091318246e-08, "loss": 0.6289, "step": 31990 }, { "epoch": 0.9912938406246127, "grad_norm": 2.8923717838280045, "learning_rate": 4.5985258626312705e-08, "loss": 0.6807, "step": 31995 }, { "epoch": 0.9914487544925021, "grad_norm": 2.274652927929109, "learning_rate": 4.516991716130716e-08, "loss": 0.6642, "step": 32000 }, { "epoch": 0.9916036683603916, "grad_norm": 2.648971727024812, "learning_rate": 4.435457569630162e-08, "loss": 0.6803, "step": 32005 }, { "epoch": 0.9917585822282811, "grad_norm": 2.2236457982302533, "learning_rate": 4.353923423129607e-08, "loss": 0.6461, "step": 32010 }, { "epoch": 0.9919134960961705, "grad_norm": 2.2321022815503273, "learning_rate": 4.272389276629052e-08, "loss": 0.5979, "step": 32015 }, { "epoch": 0.99206840996406, "grad_norm": 3.477847580946346, "learning_rate": 4.190855130128498e-08, "loss": 0.6626, "step": 32020 }, { "epoch": 0.9922233238319494, "grad_norm": 2.4603419367128225, "learning_rate": 4.109320983627944e-08, "loss": 0.6946, "step": 32025 }, { "epoch": 0.9923782376998389, "grad_norm": 2.5675313826967177, "learning_rate": 4.02778683712739e-08, "loss": 0.644, "step": 32030 }, { "epoch": 0.9925331515677284, "grad_norm": 2.737564449891628, "learning_rate": 3.9462526906268345e-08, "loss": 0.6391, "step": 32035 }, { "epoch": 0.9926880654356178, "grad_norm": 2.477483800476631, "learning_rate": 3.8647185441262804e-08, "loss": 0.7029, "step": 32040 }, { "epoch": 0.9928429793035073, "grad_norm": 3.162356596044711, "learning_rate": 3.783184397625726e-08, "loss": 0.6188, "step": 32045 }, { "epoch": 0.9929978931713968, "grad_norm": 2.479436687976172, "learning_rate": 3.7016502511251716e-08, "loss": 0.6402, "step": 32050 }, { "epoch": 0.9931528070392862, "grad_norm": 2.811156655297782, "learning_rate": 3.620116104624617e-08, "loss": 0.6535, "step": 32055 }, { "epoch": 0.9933077209071756, "grad_norm": 2.697676876838464, "learning_rate": 3.538581958124063e-08, "loss": 0.5682, "step": 32060 }, { "epoch": 0.993462634775065, "grad_norm": 2.7875165232687418, "learning_rate": 3.457047811623508e-08, "loss": 0.6232, "step": 32065 }, { "epoch": 0.9936175486429545, "grad_norm": 2.4511443112892914, "learning_rate": 3.375513665122954e-08, "loss": 0.6231, "step": 32070 }, { "epoch": 0.993772462510844, "grad_norm": 3.0369525172085505, "learning_rate": 3.293979518622399e-08, "loss": 0.6354, "step": 32075 }, { "epoch": 0.9939273763787334, "grad_norm": 2.4363639103219055, "learning_rate": 3.212445372121845e-08, "loss": 0.6658, "step": 32080 }, { "epoch": 0.9940822902466229, "grad_norm": 2.6824352151032924, "learning_rate": 3.1309112256212903e-08, "loss": 0.6733, "step": 32085 }, { "epoch": 0.9942372041145123, "grad_norm": 2.647730932429341, "learning_rate": 3.049377079120736e-08, "loss": 0.6413, "step": 32090 }, { "epoch": 0.9943921179824018, "grad_norm": 3.02570544770544, "learning_rate": 2.9678429326201815e-08, "loss": 0.6129, "step": 32095 }, { "epoch": 0.9945470318502913, "grad_norm": 3.3295974603361396, "learning_rate": 2.8863087861196274e-08, "loss": 0.6503, "step": 32100 }, { "epoch": 0.9947019457181807, "grad_norm": 2.692552840320135, "learning_rate": 2.8047746396190727e-08, "loss": 0.7225, "step": 32105 }, { "epoch": 0.9948568595860702, "grad_norm": 3.5715693361278156, "learning_rate": 2.723240493118518e-08, "loss": 0.595, "step": 32110 }, { "epoch": 0.9950117734539596, "grad_norm": 2.74385426493013, "learning_rate": 2.6417063466179638e-08, "loss": 0.7088, "step": 32115 }, { "epoch": 0.9951666873218491, "grad_norm": 2.0350642749995833, "learning_rate": 2.5601722001174094e-08, "loss": 0.6245, "step": 32120 }, { "epoch": 0.9953216011897386, "grad_norm": 2.8142456699833325, "learning_rate": 2.478638053616855e-08, "loss": 0.6328, "step": 32125 }, { "epoch": 0.9954765150576279, "grad_norm": 2.80522723219563, "learning_rate": 2.3971039071163006e-08, "loss": 0.6538, "step": 32130 }, { "epoch": 0.9956314289255174, "grad_norm": 2.9771904521469277, "learning_rate": 2.3155697606157458e-08, "loss": 0.6251, "step": 32135 }, { "epoch": 0.9957863427934068, "grad_norm": 2.50194744111959, "learning_rate": 2.2340356141151917e-08, "loss": 0.6586, "step": 32140 }, { "epoch": 0.9959412566612963, "grad_norm": 2.700292094747342, "learning_rate": 2.152501467614637e-08, "loss": 0.6403, "step": 32145 }, { "epoch": 0.9960961705291858, "grad_norm": 2.555688619733629, "learning_rate": 2.070967321114083e-08, "loss": 0.7169, "step": 32150 }, { "epoch": 0.9962510843970752, "grad_norm": 2.5870650635422154, "learning_rate": 1.989433174613528e-08, "loss": 0.7252, "step": 32155 }, { "epoch": 0.9964059982649647, "grad_norm": 2.686019622643334, "learning_rate": 1.907899028112974e-08, "loss": 0.7359, "step": 32160 }, { "epoch": 0.9965609121328541, "grad_norm": 2.9918050996950316, "learning_rate": 1.8263648816124196e-08, "loss": 0.7162, "step": 32165 }, { "epoch": 0.9967158260007436, "grad_norm": 3.2000809892041433, "learning_rate": 1.744830735111865e-08, "loss": 0.6603, "step": 32170 }, { "epoch": 0.9968707398686331, "grad_norm": 2.8765022518306216, "learning_rate": 1.6632965886113105e-08, "loss": 0.5867, "step": 32175 }, { "epoch": 0.9970256537365225, "grad_norm": 3.2216831480070884, "learning_rate": 1.581762442110756e-08, "loss": 0.6631, "step": 32180 }, { "epoch": 0.997180567604412, "grad_norm": 2.9114821126528154, "learning_rate": 1.5002282956102016e-08, "loss": 0.7026, "step": 32185 }, { "epoch": 0.9973354814723014, "grad_norm": 2.1869836453640614, "learning_rate": 1.418694149109647e-08, "loss": 0.5904, "step": 32190 }, { "epoch": 0.9974903953401909, "grad_norm": 5.554585192873027, "learning_rate": 1.3371600026090928e-08, "loss": 0.5632, "step": 32195 }, { "epoch": 0.9976453092080803, "grad_norm": 3.328214748461546, "learning_rate": 1.2556258561085384e-08, "loss": 0.6987, "step": 32200 }, { "epoch": 0.9978002230759697, "grad_norm": 4.475876979452419, "learning_rate": 1.174091709607984e-08, "loss": 0.6143, "step": 32205 }, { "epoch": 0.9979551369438592, "grad_norm": 2.40698241521225, "learning_rate": 1.0925575631074295e-08, "loss": 0.6402, "step": 32210 }, { "epoch": 0.9981100508117486, "grad_norm": 2.614367415322672, "learning_rate": 1.011023416606875e-08, "loss": 0.7257, "step": 32215 }, { "epoch": 0.9982649646796381, "grad_norm": 2.964337881956991, "learning_rate": 9.294892701063205e-09, "loss": 0.6463, "step": 32220 }, { "epoch": 0.9984198785475276, "grad_norm": 2.586006172681066, "learning_rate": 8.479551236057661e-09, "loss": 0.6124, "step": 32225 }, { "epoch": 0.998574792415417, "grad_norm": 3.1593967398047895, "learning_rate": 7.664209771052117e-09, "loss": 0.6071, "step": 32230 }, { "epoch": 0.9987297062833065, "grad_norm": 2.9975486306459045, "learning_rate": 6.848868306046573e-09, "loss": 0.6353, "step": 32235 }, { "epoch": 0.998884620151196, "grad_norm": 2.6428152085216214, "learning_rate": 6.033526841041029e-09, "loss": 0.688, "step": 32240 }, { "epoch": 0.9990395340190854, "grad_norm": 3.2412458015720302, "learning_rate": 5.218185376035484e-09, "loss": 0.5933, "step": 32245 }, { "epoch": 0.9991944478869749, "grad_norm": 2.6207712644604357, "learning_rate": 4.4028439110299394e-09, "loss": 0.634, "step": 32250 }, { "epoch": 0.9993493617548643, "grad_norm": 5.86333880573301, "learning_rate": 3.5875024460243953e-09, "loss": 0.6851, "step": 32255 }, { "epoch": 0.9995042756227538, "grad_norm": 2.4952963802100214, "learning_rate": 2.772160981018851e-09, "loss": 0.6264, "step": 32260 }, { "epoch": 0.9996591894906433, "grad_norm": 2.517992720275179, "learning_rate": 1.9568195160133065e-09, "loss": 0.6488, "step": 32265 }, { "epoch": 0.9998141033585326, "grad_norm": 2.79159858642405, "learning_rate": 1.1414780510077623e-09, "loss": 0.5822, "step": 32270 }, { "epoch": 0.9999690172264221, "grad_norm": 2.584582574720945, "learning_rate": 3.2613658600221773e-10, "loss": 0.681, "step": 32275 }, { "epoch": 1.0, "step": 32276, "total_flos": 6154185126182912.0, "train_loss": 0.7142781505809259, "train_runtime": 64597.5242, "train_samples_per_second": 15.989, "train_steps_per_second": 0.5 } ], "logging_steps": 5, "max_steps": 32276, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6154185126182912.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }